drc: merge Ari64's patch: 02_xor_zero
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_arm.c *
3 * Copyright (C) 2009-2010 Ari64 *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21extern int cycle_count;
22extern int last_count;
23extern int pcaddr;
24extern int pending_exception;
25extern int branch_target;
26extern uint64_t readmem_dword;
3d624f89 27#ifdef MUPEN64
57871462 28extern precomp_instr fake_pc;
3d624f89 29#endif
57871462 30extern void *dynarec_local;
31extern u_int memory_map[1048576];
32extern u_int mini_ht[32][2];
33extern u_int rounding_modes[4];
34
35void indirect_jump_indexed();
36void indirect_jump();
37void do_interrupt();
38void jump_vaddr_r0();
39void jump_vaddr_r1();
40void jump_vaddr_r2();
41void jump_vaddr_r3();
42void jump_vaddr_r4();
43void jump_vaddr_r5();
44void jump_vaddr_r6();
45void jump_vaddr_r7();
46void jump_vaddr_r8();
47void jump_vaddr_r9();
48void jump_vaddr_r10();
49void jump_vaddr_r12();
50
51const u_int jump_vaddr_reg[16] = {
52 (int)jump_vaddr_r0,
53 (int)jump_vaddr_r1,
54 (int)jump_vaddr_r2,
55 (int)jump_vaddr_r3,
56 (int)jump_vaddr_r4,
57 (int)jump_vaddr_r5,
58 (int)jump_vaddr_r6,
59 (int)jump_vaddr_r7,
60 (int)jump_vaddr_r8,
61 (int)jump_vaddr_r9,
62 (int)jump_vaddr_r10,
63 0,
64 (int)jump_vaddr_r12,
65 0,
66 0,
67 0};
68
69#include "fpu.h"
70
71/* Linker */
72
73void set_jump_target(int addr,u_int target)
74{
75 u_char *ptr=(u_char *)addr;
76 u_int *ptr2=(u_int *)ptr;
77 if(ptr[3]==0xe2) {
78 assert((target-(u_int)ptr2-8)<1024);
79 assert((addr&3)==0);
80 assert((target&3)==0);
81 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
82 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
83 }
84 else if(ptr[3]==0x72) {
85 // generated by emit_jno_unlikely
86 if((target-(u_int)ptr2-8)<1024) {
87 assert((addr&3)==0);
88 assert((target&3)==0);
89 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
90 }
91 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
92 assert((addr&3)==0);
93 assert((target&3)==0);
94 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
95 }
96 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
97 }
98 else {
99 assert((ptr[3]&0x0e)==0xa);
100 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
101 }
102}
103
104// This optionally copies the instruction from the target of the branch into
105// the space before the branch. Works, but the difference in speed is
106// usually insignificant.
107void set_jump_target_fillslot(int addr,u_int target,int copy)
108{
109 u_char *ptr=(u_char *)addr;
110 u_int *ptr2=(u_int *)ptr;
111 assert(!copy||ptr2[-1]==0xe28dd000);
112 if(ptr[3]==0xe2) {
113 assert(!copy);
114 assert((target-(u_int)ptr2-8)<4096);
115 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
116 }
117 else {
118 assert((ptr[3]&0x0e)==0xa);
119 u_int target_insn=*(u_int *)target;
120 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
121 copy=0;
122 }
123 if((target_insn&0x0c100000)==0x04100000) { // Load
124 copy=0;
125 }
126 if(target_insn&0x08000000) {
127 copy=0;
128 }
129 if(copy) {
130 ptr2[-1]=target_insn;
131 target+=4;
132 }
133 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
134 }
135}
136
137/* Literal pool */
138add_literal(int addr,int val)
139{
140 literals[literalcount][0]=addr;
141 literals[literalcount][1]=val;
142 literalcount++;
143}
144
f76eeef9 145void *kill_pointer(void *stub)
57871462 146{
147 int *ptr=(int *)(stub+4);
148 assert((*ptr&0x0ff00000)==0x05900000);
149 u_int offset=*ptr&0xfff;
150 int **l_ptr=(void *)ptr+offset+8;
151 int *i_ptr=*l_ptr;
152 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 153 return i_ptr;
57871462 154}
155
156int get_pointer(void *stub)
157{
158 //printf("get_pointer(%x)\n",(int)stub);
159 int *ptr=(int *)(stub+4);
160 assert((*ptr&0x0ff00000)==0x05900000);
161 u_int offset=*ptr&0xfff;
162 int **l_ptr=(void *)ptr+offset+8;
163 int *i_ptr=*l_ptr;
164 assert((*i_ptr&0x0f000000)==0x0a000000);
165 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
166}
167
168// Find the "clean" entry point from a "dirty" entry point
169// by skipping past the call to verify_code
170u_int get_clean_addr(int addr)
171{
172 int *ptr=(int *)addr;
173 #ifdef ARMv5_ONLY
174 ptr+=4;
175 #else
176 ptr+=6;
177 #endif
178 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
179 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
180 ptr++;
181 if((*ptr&0xFF000000)==0xea000000) {
182 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
183 }
184 return (u_int)ptr;
185}
186
187int verify_dirty(int addr)
188{
189 u_int *ptr=(u_int *)addr;
190 #ifdef ARMv5_ONLY
191 // get from literal pool
192 assert((*ptr&0xFFF00000)==0xe5900000);
193 u_int offset=*ptr&0xfff;
194 u_int *l_ptr=(void *)ptr+offset+8;
195 u_int source=l_ptr[0];
196 u_int copy=l_ptr[1];
197 u_int len=l_ptr[2];
198 ptr+=4;
199 #else
200 // ARMv7 movw/movt
201 assert((*ptr&0xFFF00000)==0xe3000000);
202 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
203 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
204 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
205 ptr+=6;
206 #endif
207 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
208 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 209 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 210 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
211 unsigned int page=source>>12;
212 unsigned int map_value=memory_map[page];
213 if(map_value>=0x80000000) return 0;
214 while(page<((source+len-1)>>12)) {
215 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
216 }
217 source = source+(map_value<<2);
218 }
219 //printf("verify_dirty: %x %x %x\n",source,copy,len);
220 return !memcmp((void *)source,(void *)copy,len);
221}
222
223// This doesn't necessarily find all clean entry points, just
224// guarantees that it's not dirty
225int isclean(int addr)
226{
227 #ifdef ARMv5_ONLY
228 int *ptr=((u_int *)addr)+4;
229 #else
230 int *ptr=((u_int *)addr)+6;
231 #endif
232 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
233 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
234 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
235 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
236 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
237 return 1;
238}
239
240void get_bounds(int addr,u_int *start,u_int *end)
241{
242 u_int *ptr=(u_int *)addr;
243 #ifdef ARMv5_ONLY
244 // get from literal pool
245 assert((*ptr&0xFFF00000)==0xe5900000);
246 u_int offset=*ptr&0xfff;
247 u_int *l_ptr=(void *)ptr+offset+8;
248 u_int source=l_ptr[0];
249 //u_int copy=l_ptr[1];
250 u_int len=l_ptr[2];
251 ptr+=4;
252 #else
253 // ARMv7 movw/movt
254 assert((*ptr&0xFFF00000)==0xe3000000);
255 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
256 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
257 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
258 ptr+=6;
259 #endif
260 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
261 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 262 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 263 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
264 if(memory_map[source>>12]>=0x80000000) source = 0;
265 else source = source+(memory_map[source>>12]<<2);
266 }
267 *start=source;
268 *end=source+len;
269}
270
271/* Register allocation */
272
273// Note: registers are allocated clean (unmodified state)
274// if you intend to modify the register, you must call dirty_reg().
275void alloc_reg(struct regstat *cur,int i,signed char reg)
276{
277 int r,hr;
278 int preferred_reg = (reg&7);
279 if(reg==CCREG) preferred_reg=HOST_CCREG;
280 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
281
282 // Don't allocate unused registers
283 if((cur->u>>reg)&1) return;
284
285 // see if it's already allocated
286 for(hr=0;hr<HOST_REGS;hr++)
287 {
288 if(cur->regmap[hr]==reg) return;
289 }
290
291 // Keep the same mapping if the register was already allocated in a loop
292 preferred_reg = loop_reg(i,reg,preferred_reg);
293
294 // Try to allocate the preferred register
295 if(cur->regmap[preferred_reg]==-1) {
296 cur->regmap[preferred_reg]=reg;
297 cur->dirty&=~(1<<preferred_reg);
298 cur->isconst&=~(1<<preferred_reg);
299 return;
300 }
301 r=cur->regmap[preferred_reg];
302 if(r<64&&((cur->u>>r)&1)) {
303 cur->regmap[preferred_reg]=reg;
304 cur->dirty&=~(1<<preferred_reg);
305 cur->isconst&=~(1<<preferred_reg);
306 return;
307 }
308 if(r>=64&&((cur->uu>>(r&63))&1)) {
309 cur->regmap[preferred_reg]=reg;
310 cur->dirty&=~(1<<preferred_reg);
311 cur->isconst&=~(1<<preferred_reg);
312 return;
313 }
314
315 // Clear any unneeded registers
316 // We try to keep the mapping consistent, if possible, because it
317 // makes branches easier (especially loops). So we try to allocate
318 // first (see above) before removing old mappings. If this is not
319 // possible then go ahead and clear out the registers that are no
320 // longer needed.
321 for(hr=0;hr<HOST_REGS;hr++)
322 {
323 r=cur->regmap[hr];
324 if(r>=0) {
325 if(r<64) {
326 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
327 }
328 else
329 {
330 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
331 }
332 }
333 }
334 // Try to allocate any available register, but prefer
335 // registers that have not been used recently.
336 if(i>0) {
337 for(hr=0;hr<HOST_REGS;hr++) {
338 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
339 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
340 cur->regmap[hr]=reg;
341 cur->dirty&=~(1<<hr);
342 cur->isconst&=~(1<<hr);
343 return;
344 }
345 }
346 }
347 }
348 // Try to allocate any available register
349 for(hr=0;hr<HOST_REGS;hr++) {
350 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
351 cur->regmap[hr]=reg;
352 cur->dirty&=~(1<<hr);
353 cur->isconst&=~(1<<hr);
354 return;
355 }
356 }
357
358 // Ok, now we have to evict someone
359 // Pick a register we hopefully won't need soon
360 u_char hsn[MAXREG+1];
361 memset(hsn,10,sizeof(hsn));
362 int j;
363 lsn(hsn,i,&preferred_reg);
364 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
365 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
366 if(i>0) {
367 // Don't evict the cycle count at entry points, otherwise the entry
368 // stub will have to write it.
369 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
370 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
371 for(j=10;j>=3;j--)
372 {
373 // Alloc preferred register if available
374 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
375 for(hr=0;hr<HOST_REGS;hr++) {
376 // Evict both parts of a 64-bit register
377 if((cur->regmap[hr]&63)==r) {
378 cur->regmap[hr]=-1;
379 cur->dirty&=~(1<<hr);
380 cur->isconst&=~(1<<hr);
381 }
382 }
383 cur->regmap[preferred_reg]=reg;
384 return;
385 }
386 for(r=1;r<=MAXREG;r++)
387 {
388 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
389 for(hr=0;hr<HOST_REGS;hr++) {
390 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
391 if(cur->regmap[hr]==r+64) {
392 cur->regmap[hr]=reg;
393 cur->dirty&=~(1<<hr);
394 cur->isconst&=~(1<<hr);
395 return;
396 }
397 }
398 }
399 for(hr=0;hr<HOST_REGS;hr++) {
400 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
401 if(cur->regmap[hr]==r) {
402 cur->regmap[hr]=reg;
403 cur->dirty&=~(1<<hr);
404 cur->isconst&=~(1<<hr);
405 return;
406 }
407 }
408 }
409 }
410 }
411 }
412 }
413 for(j=10;j>=0;j--)
414 {
415 for(r=1;r<=MAXREG;r++)
416 {
417 if(hsn[r]==j) {
418 for(hr=0;hr<HOST_REGS;hr++) {
419 if(cur->regmap[hr]==r+64) {
420 cur->regmap[hr]=reg;
421 cur->dirty&=~(1<<hr);
422 cur->isconst&=~(1<<hr);
423 return;
424 }
425 }
426 for(hr=0;hr<HOST_REGS;hr++) {
427 if(cur->regmap[hr]==r) {
428 cur->regmap[hr]=reg;
429 cur->dirty&=~(1<<hr);
430 cur->isconst&=~(1<<hr);
431 return;
432 }
433 }
434 }
435 }
436 }
437 printf("This shouldn't happen (alloc_reg)");exit(1);
438}
439
440void alloc_reg64(struct regstat *cur,int i,signed char reg)
441{
442 int preferred_reg = 8+(reg&1);
443 int r,hr;
444
445 // allocate the lower 32 bits
446 alloc_reg(cur,i,reg);
447
448 // Don't allocate unused registers
449 if((cur->uu>>reg)&1) return;
450
451 // see if the upper half is already allocated
452 for(hr=0;hr<HOST_REGS;hr++)
453 {
454 if(cur->regmap[hr]==reg+64) return;
455 }
456
457 // Keep the same mapping if the register was already allocated in a loop
458 preferred_reg = loop_reg(i,reg,preferred_reg);
459
460 // Try to allocate the preferred register
461 if(cur->regmap[preferred_reg]==-1) {
462 cur->regmap[preferred_reg]=reg|64;
463 cur->dirty&=~(1<<preferred_reg);
464 cur->isconst&=~(1<<preferred_reg);
465 return;
466 }
467 r=cur->regmap[preferred_reg];
468 if(r<64&&((cur->u>>r)&1)) {
469 cur->regmap[preferred_reg]=reg|64;
470 cur->dirty&=~(1<<preferred_reg);
471 cur->isconst&=~(1<<preferred_reg);
472 return;
473 }
474 if(r>=64&&((cur->uu>>(r&63))&1)) {
475 cur->regmap[preferred_reg]=reg|64;
476 cur->dirty&=~(1<<preferred_reg);
477 cur->isconst&=~(1<<preferred_reg);
478 return;
479 }
480
481 // Clear any unneeded registers
482 // We try to keep the mapping consistent, if possible, because it
483 // makes branches easier (especially loops). So we try to allocate
484 // first (see above) before removing old mappings. If this is not
485 // possible then go ahead and clear out the registers that are no
486 // longer needed.
487 for(hr=HOST_REGS-1;hr>=0;hr--)
488 {
489 r=cur->regmap[hr];
490 if(r>=0) {
491 if(r<64) {
492 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
493 }
494 else
495 {
496 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
497 }
498 }
499 }
500 // Try to allocate any available register, but prefer
501 // registers that have not been used recently.
502 if(i>0) {
503 for(hr=0;hr<HOST_REGS;hr++) {
504 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
505 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
506 cur->regmap[hr]=reg|64;
507 cur->dirty&=~(1<<hr);
508 cur->isconst&=~(1<<hr);
509 return;
510 }
511 }
512 }
513 }
514 // Try to allocate any available register
515 for(hr=0;hr<HOST_REGS;hr++) {
516 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
517 cur->regmap[hr]=reg|64;
518 cur->dirty&=~(1<<hr);
519 cur->isconst&=~(1<<hr);
520 return;
521 }
522 }
523
524 // Ok, now we have to evict someone
525 // Pick a register we hopefully won't need soon
526 u_char hsn[MAXREG+1];
527 memset(hsn,10,sizeof(hsn));
528 int j;
529 lsn(hsn,i,&preferred_reg);
530 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
531 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
532 if(i>0) {
533 // Don't evict the cycle count at entry points, otherwise the entry
534 // stub will have to write it.
535 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
536 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
537 for(j=10;j>=3;j--)
538 {
539 // Alloc preferred register if available
540 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
541 for(hr=0;hr<HOST_REGS;hr++) {
542 // Evict both parts of a 64-bit register
543 if((cur->regmap[hr]&63)==r) {
544 cur->regmap[hr]=-1;
545 cur->dirty&=~(1<<hr);
546 cur->isconst&=~(1<<hr);
547 }
548 }
549 cur->regmap[preferred_reg]=reg|64;
550 return;
551 }
552 for(r=1;r<=MAXREG;r++)
553 {
554 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
555 for(hr=0;hr<HOST_REGS;hr++) {
556 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
557 if(cur->regmap[hr]==r+64) {
558 cur->regmap[hr]=reg|64;
559 cur->dirty&=~(1<<hr);
560 cur->isconst&=~(1<<hr);
561 return;
562 }
563 }
564 }
565 for(hr=0;hr<HOST_REGS;hr++) {
566 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
567 if(cur->regmap[hr]==r) {
568 cur->regmap[hr]=reg|64;
569 cur->dirty&=~(1<<hr);
570 cur->isconst&=~(1<<hr);
571 return;
572 }
573 }
574 }
575 }
576 }
577 }
578 }
579 for(j=10;j>=0;j--)
580 {
581 for(r=1;r<=MAXREG;r++)
582 {
583 if(hsn[r]==j) {
584 for(hr=0;hr<HOST_REGS;hr++) {
585 if(cur->regmap[hr]==r+64) {
586 cur->regmap[hr]=reg|64;
587 cur->dirty&=~(1<<hr);
588 cur->isconst&=~(1<<hr);
589 return;
590 }
591 }
592 for(hr=0;hr<HOST_REGS;hr++) {
593 if(cur->regmap[hr]==r) {
594 cur->regmap[hr]=reg|64;
595 cur->dirty&=~(1<<hr);
596 cur->isconst&=~(1<<hr);
597 return;
598 }
599 }
600 }
601 }
602 }
603 printf("This shouldn't happen");exit(1);
604}
605
606// Allocate a temporary register. This is done without regard to
607// dirty status or whether the register we request is on the unneeded list
608// Note: This will only allocate one register, even if called multiple times
609void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
610{
611 int r,hr;
612 int preferred_reg = -1;
613
614 // see if it's already allocated
615 for(hr=0;hr<HOST_REGS;hr++)
616 {
617 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
618 }
619
620 // Try to allocate any available register
621 for(hr=HOST_REGS-1;hr>=0;hr--) {
622 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
623 cur->regmap[hr]=reg;
624 cur->dirty&=~(1<<hr);
625 cur->isconst&=~(1<<hr);
626 return;
627 }
628 }
629
630 // Find an unneeded register
631 for(hr=HOST_REGS-1;hr>=0;hr--)
632 {
633 r=cur->regmap[hr];
634 if(r>=0) {
635 if(r<64) {
636 if((cur->u>>r)&1) {
637 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
638 cur->regmap[hr]=reg;
639 cur->dirty&=~(1<<hr);
640 cur->isconst&=~(1<<hr);
641 return;
642 }
643 }
644 }
645 else
646 {
647 if((cur->uu>>(r&63))&1) {
648 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
649 cur->regmap[hr]=reg;
650 cur->dirty&=~(1<<hr);
651 cur->isconst&=~(1<<hr);
652 return;
653 }
654 }
655 }
656 }
657 }
658
659 // Ok, now we have to evict someone
660 // Pick a register we hopefully won't need soon
661 // TODO: we might want to follow unconditional jumps here
662 // TODO: get rid of dupe code and make this into a function
663 u_char hsn[MAXREG+1];
664 memset(hsn,10,sizeof(hsn));
665 int j;
666 lsn(hsn,i,&preferred_reg);
667 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
668 if(i>0) {
669 // Don't evict the cycle count at entry points, otherwise the entry
670 // stub will have to write it.
671 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
672 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
673 for(j=10;j>=3;j--)
674 {
675 for(r=1;r<=MAXREG;r++)
676 {
677 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
678 for(hr=0;hr<HOST_REGS;hr++) {
679 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
680 if(cur->regmap[hr]==r+64) {
681 cur->regmap[hr]=reg;
682 cur->dirty&=~(1<<hr);
683 cur->isconst&=~(1<<hr);
684 return;
685 }
686 }
687 }
688 for(hr=0;hr<HOST_REGS;hr++) {
689 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
690 if(cur->regmap[hr]==r) {
691 cur->regmap[hr]=reg;
692 cur->dirty&=~(1<<hr);
693 cur->isconst&=~(1<<hr);
694 return;
695 }
696 }
697 }
698 }
699 }
700 }
701 }
702 for(j=10;j>=0;j--)
703 {
704 for(r=1;r<=MAXREG;r++)
705 {
706 if(hsn[r]==j) {
707 for(hr=0;hr<HOST_REGS;hr++) {
708 if(cur->regmap[hr]==r+64) {
709 cur->regmap[hr]=reg;
710 cur->dirty&=~(1<<hr);
711 cur->isconst&=~(1<<hr);
712 return;
713 }
714 }
715 for(hr=0;hr<HOST_REGS;hr++) {
716 if(cur->regmap[hr]==r) {
717 cur->regmap[hr]=reg;
718 cur->dirty&=~(1<<hr);
719 cur->isconst&=~(1<<hr);
720 return;
721 }
722 }
723 }
724 }
725 }
726 printf("This shouldn't happen");exit(1);
727}
728// Allocate a specific ARM register.
729void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
730{
731 int n;
732
733 // see if it's already allocated (and dealloc it)
734 for(n=0;n<HOST_REGS;n++)
735 {
736 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {cur->regmap[n]=-1;}
737 }
738
739 cur->regmap[hr]=reg;
740 cur->dirty&=~(1<<hr);
741 cur->isconst&=~(1<<hr);
742}
743
744// Alloc cycle count into dedicated register
745alloc_cc(struct regstat *cur,int i)
746{
747 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
748}
749
750/* Special alloc */
751
752
753/* Assembler */
754
755char regname[16][4] = {
756 "r0",
757 "r1",
758 "r2",
759 "r3",
760 "r4",
761 "r5",
762 "r6",
763 "r7",
764 "r8",
765 "r9",
766 "r10",
767 "fp",
768 "r12",
769 "sp",
770 "lr",
771 "pc"};
772
773void output_byte(u_char byte)
774{
775 *(out++)=byte;
776}
777void output_modrm(u_char mod,u_char rm,u_char ext)
778{
779 assert(mod<4);
780 assert(rm<8);
781 assert(ext<8);
782 u_char byte=(mod<<6)|(ext<<3)|rm;
783 *(out++)=byte;
784}
785void output_sib(u_char scale,u_char index,u_char base)
786{
787 assert(scale<4);
788 assert(index<8);
789 assert(base<8);
790 u_char byte=(scale<<6)|(index<<3)|base;
791 *(out++)=byte;
792}
793void output_w32(u_int word)
794{
795 *((u_int *)out)=word;
796 out+=4;
797}
798u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
799{
800 assert(rd<16);
801 assert(rn<16);
802 assert(rm<16);
803 return((rn<<16)|(rd<<12)|rm);
804}
805u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
806{
807 assert(rd<16);
808 assert(rn<16);
809 assert(imm<256);
810 assert((shift&1)==0);
811 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
812}
813u_int genimm(u_int imm,u_int *encoded)
814{
815 if(imm==0) {*encoded=0;return 1;}
816 int i=32;
817 while(i>0)
818 {
819 if(imm<256) {
820 *encoded=((i&30)<<7)|imm;
821 return 1;
822 }
823 imm=(imm>>2)|(imm<<30);i-=2;
824 }
825 return 0;
826}
cfbd3c6e 827void genimm_checked(u_int imm,u_int *encoded)
828{
829 u_int ret=genimm(imm,encoded);
830 assert(ret);
831}
57871462 832u_int genjmp(u_int addr)
833{
834 int offset=addr-(int)out-8;
e80343e2 835 if(offset<-33554432||offset>=33554432) {
836 if (addr>2) {
837 printf("genjmp: out of range: %08x\n", offset);
838 exit(1);
839 }
840 return 0;
841 }
57871462 842 return ((u_int)offset>>2)&0xffffff;
843}
844
845void emit_mov(int rs,int rt)
846{
847 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
848 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
849}
850
851void emit_movs(int rs,int rt)
852{
853 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
854 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
855}
856
857void emit_add(int rs1,int rs2,int rt)
858{
859 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
860 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
861}
862
863void emit_adds(int rs1,int rs2,int rt)
864{
865 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
866 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
867}
868
869void emit_adcs(int rs1,int rs2,int rt)
870{
871 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
872 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
873}
874
875void emit_sbc(int rs1,int rs2,int rt)
876{
877 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
878 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
879}
880
881void emit_sbcs(int rs1,int rs2,int rt)
882{
883 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
884 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
885}
886
887void emit_neg(int rs, int rt)
888{
889 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
890 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
891}
892
893void emit_negs(int rs, int rt)
894{
895 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
896 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
897}
898
899void emit_sub(int rs1,int rs2,int rt)
900{
901 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
902 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
903}
904
905void emit_subs(int rs1,int rs2,int rt)
906{
907 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
908 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
909}
910
911void emit_zeroreg(int rt)
912{
913 assem_debug("mov %s,#0\n",regname[rt]);
914 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
915}
916
790ee18e 917void emit_loadlp(u_int imm,u_int rt)
918{
919 add_literal((int)out,imm);
920 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
921 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
922}
923void emit_movw(u_int imm,u_int rt)
924{
925 assert(imm<65536);
926 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
927 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
928}
929void emit_movt(u_int imm,u_int rt)
930{
931 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
932 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
933}
934void emit_movimm(u_int imm,u_int rt)
935{
936 u_int armval;
937 if(genimm(imm,&armval)) {
938 assem_debug("mov %s,#%d\n",regname[rt],imm);
939 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
940 }else if(genimm(~imm,&armval)) {
941 assem_debug("mvn %s,#%d\n",regname[rt],imm);
942 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
943 }else if(imm<65536) {
944 #ifdef ARMv5_ONLY
945 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
946 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
947 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
948 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
949 #else
950 emit_movw(imm,rt);
951 #endif
952 }else{
953 #ifdef ARMv5_ONLY
954 emit_loadlp(imm,rt);
955 #else
956 emit_movw(imm&0x0000FFFF,rt);
957 emit_movt(imm&0xFFFF0000,rt);
958 #endif
959 }
960}
961void emit_pcreladdr(u_int rt)
962{
963 assem_debug("add %s,pc,#?\n",regname[rt]);
964 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
965}
966
57871462 967void emit_loadreg(int r, int hr)
968{
3d624f89 969#ifdef FORCE32
970 if(r&64) {
971 printf("64bit load in 32bit mode!\n");
972 exit(1);
973 }
974#endif
57871462 975 if((r&63)==0)
976 emit_zeroreg(hr);
977 else {
3d624f89 978 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 979 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
980 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
981 if(r==CCREG) addr=(int)&cycle_count;
982 if(r==CSREG) addr=(int)&Status;
983 if(r==FSREG) addr=(int)&FCR31;
984 if(r==INVCP) addr=(int)&invc_ptr;
985 u_int offset = addr-(u_int)&dynarec_local;
986 assert(offset<4096);
987 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
988 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
989 }
990}
991void emit_storereg(int r, int hr)
992{
3d624f89 993#ifdef FORCE32
994 if(r&64) {
995 printf("64bit store in 32bit mode!\n");
996 exit(1);
997 }
998#endif
999 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1000 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1001 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1002 if(r==CCREG) addr=(int)&cycle_count;
1003 if(r==FSREG) addr=(int)&FCR31;
1004 u_int offset = addr-(u_int)&dynarec_local;
1005 assert(offset<4096);
1006 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1007 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1008}
1009
1010void emit_test(int rs, int rt)
1011{
1012 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1013 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1014}
1015
1016void emit_testimm(int rs,int imm)
1017{
1018 u_int armval;
1019 assem_debug("tst %s,$%d\n",regname[rs],imm);
cfbd3c6e 1020 genimm_checked(imm,&armval);
57871462 1021 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1022}
1023
b9b61529 1024void emit_testeqimm(int rs,int imm)
1025{
1026 u_int armval;
1027 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1028 genimm_checked(imm,&armval);
b9b61529 1029 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1030}
1031
57871462 1032void emit_not(int rs,int rt)
1033{
1034 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1035 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1036}
1037
b9b61529 1038void emit_mvnmi(int rs,int rt)
1039{
1040 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1041 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1042}
1043
57871462 1044void emit_and(u_int rs1,u_int rs2,u_int rt)
1045{
1046 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1047 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1048}
1049
1050void emit_or(u_int rs1,u_int rs2,u_int rt)
1051{
1052 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1053 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1054}
1055void emit_or_and_set_flags(int rs1,int rs2,int rt)
1056{
1057 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1058 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1059}
1060
f70d384d 1061void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1062{
1063 assert(rs<16);
1064 assert(rt<16);
1065 assert(imm<32);
1066 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1067 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1068}
1069
576bbd8f 1070void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1071{
1072 assert(rs<16);
1073 assert(rt<16);
1074 assert(imm<32);
1075 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1076 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1077}
1078
57871462 1079void emit_xor(u_int rs1,u_int rs2,u_int rt)
1080{
1081 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1082 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1083}
1084
57871462 1085void emit_addimm(u_int rs,int imm,u_int rt)
1086{
1087 assert(rs<16);
1088 assert(rt<16);
1089 if(imm!=0) {
1090 assert(imm>-65536&&imm<65536);
1091 u_int armval;
1092 if(genimm(imm,&armval)) {
1093 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1094 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1095 }else if(genimm(-imm,&armval)) {
1096 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1097 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1098 }else if(imm<0) {
1099 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1100 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1101 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1102 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1103 }else{
1104 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1105 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1106 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1107 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1108 }
1109 }
1110 else if(rs!=rt) emit_mov(rs,rt);
1111}
1112
1113void emit_addimm_and_set_flags(int imm,int rt)
1114{
1115 assert(imm>-65536&&imm<65536);
1116 u_int armval;
1117 if(genimm(imm,&armval)) {
1118 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1119 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1120 }else if(genimm(-imm,&armval)) {
1121 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1122 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1123 }else if(imm<0) {
1124 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1125 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1126 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1127 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1128 }else{
1129 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1130 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1131 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1132 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1133 }
1134}
1135void emit_addimm_no_flags(u_int imm,u_int rt)
1136{
1137 emit_addimm(rt,imm,rt);
1138}
1139
1140void emit_addnop(u_int r)
1141{
1142 assert(r<16);
1143 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1144 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1145}
1146
1147void emit_adcimm(u_int rs,int imm,u_int rt)
1148{
1149 u_int armval;
cfbd3c6e 1150 genimm_checked(imm,&armval);
57871462 1151 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1152 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1153}
1154/*void emit_sbcimm(int imm,u_int rt)
1155{
1156 u_int armval;
cfbd3c6e 1157 genimm_checked(imm,&armval);
57871462 1158 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1159 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1160}*/
1161void emit_sbbimm(int imm,u_int rt)
1162{
1163 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1164 assert(rt<8);
1165 if(imm<128&&imm>=-128) {
1166 output_byte(0x83);
1167 output_modrm(3,rt,3);
1168 output_byte(imm);
1169 }
1170 else
1171 {
1172 output_byte(0x81);
1173 output_modrm(3,rt,3);
1174 output_w32(imm);
1175 }
1176}
1177void emit_rscimm(int rs,int imm,u_int rt)
1178{
1179 assert(0);
1180 u_int armval;
cfbd3c6e 1181 genimm_checked(imm,&armval);
57871462 1182 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1183 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1184}
1185
1186void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1187{
1188 // TODO: if(genimm(imm,&armval)) ...
1189 // else
1190 emit_movimm(imm,HOST_TEMPREG);
1191 emit_adds(HOST_TEMPREG,rsl,rtl);
1192 emit_adcimm(rsh,0,rth);
1193}
1194
1195void emit_sbb(int rs1,int rs2)
1196{
1197 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1198 output_byte(0x19);
1199 output_modrm(3,rs1,rs2);
1200}
1201
1202void emit_andimm(int rs,int imm,int rt)
1203{
1204 u_int armval;
790ee18e 1205 if(imm==0) {
1206 emit_zeroreg(rt);
1207 }else if(genimm(imm,&armval)) {
57871462 1208 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1209 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1210 }else if(genimm(~imm,&armval)) {
1211 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1212 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1213 }else if(imm==65535) {
1214 #ifdef ARMv5_ONLY
1215 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1216 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1217 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1218 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1219 #else
1220 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1221 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1222 #endif
1223 }else{
1224 assert(imm>0&&imm<65535);
1225 #ifdef ARMv5_ONLY
1226 assem_debug("mov r14,#%d\n",imm&0xFF00);
1227 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1228 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1229 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1230 #else
1231 emit_movw(imm,HOST_TEMPREG);
1232 #endif
1233 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1234 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1235 }
1236}
1237
1238void emit_orimm(int rs,int imm,int rt)
1239{
1240 u_int armval;
790ee18e 1241 if(imm==0) {
1242 if(rs!=rt) emit_mov(rs,rt);
1243 }else if(genimm(imm,&armval)) {
57871462 1244 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1245 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1246 }else{
1247 assert(imm>0&&imm<65536);
1248 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1249 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1250 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1251 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1252 }
1253}
1254
1255void emit_xorimm(int rs,int imm,int rt)
1256{
57871462 1257 u_int armval;
790ee18e 1258 if(imm==0) {
1259 if(rs!=rt) emit_mov(rs,rt);
1260 }else if(genimm(imm,&armval)) {
57871462 1261 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1262 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1263 }else{
514ed0d9 1264 assert(imm>0&&imm<65536);
57871462 1265 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1266 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1267 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1268 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1269 }
1270}
1271
1272void emit_shlimm(int rs,u_int imm,int rt)
1273{
1274 assert(imm>0);
1275 assert(imm<32);
1276 //if(imm==1) ...
1277 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1278 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1279}
1280
1281void emit_shrimm(int rs,u_int imm,int rt)
1282{
1283 assert(imm>0);
1284 assert(imm<32);
1285 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1286 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1287}
1288
1289void emit_sarimm(int rs,u_int imm,int rt)
1290{
1291 assert(imm>0);
1292 assert(imm<32);
1293 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1294 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1295}
1296
1297void emit_rorimm(int rs,u_int imm,int rt)
1298{
1299 assert(imm>0);
1300 assert(imm<32);
1301 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1302 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1303}
1304
1305void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1306{
1307 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1308 assert(imm>0);
1309 assert(imm<32);
1310 //if(imm==1) ...
1311 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1312 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1313 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1314 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1315}
1316
1317void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1318{
1319 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1320 assert(imm>0);
1321 assert(imm<32);
1322 //if(imm==1) ...
1323 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1324 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1325 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1326 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1327}
1328
b9b61529 1329void emit_signextend16(int rs,int rt)
1330{
1331 #ifdef ARMv5_ONLY
1332 emit_shlimm(rs,16,rt);
1333 emit_sarimm(rt,16,rt);
1334 #else
1335 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1336 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1337 #endif
1338}
1339
57871462 1340void emit_shl(u_int rs,u_int shift,u_int rt)
1341{
1342 assert(rs<16);
1343 assert(rt<16);
1344 assert(shift<16);
1345 //if(imm==1) ...
1346 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1347 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1348}
1349void emit_shr(u_int rs,u_int shift,u_int rt)
1350{
1351 assert(rs<16);
1352 assert(rt<16);
1353 assert(shift<16);
1354 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1355 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1356}
1357void emit_sar(u_int rs,u_int shift,u_int rt)
1358{
1359 assert(rs<16);
1360 assert(rt<16);
1361 assert(shift<16);
1362 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1363 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1364}
1365void emit_shlcl(int r)
1366{
1367 assem_debug("shl %%%s,%%cl\n",regname[r]);
1368 assert(0);
1369}
1370void emit_shrcl(int r)
1371{
1372 assem_debug("shr %%%s,%%cl\n",regname[r]);
1373 assert(0);
1374}
1375void emit_sarcl(int r)
1376{
1377 assem_debug("sar %%%s,%%cl\n",regname[r]);
1378 assert(0);
1379}
1380
1381void emit_shldcl(int r1,int r2)
1382{
1383 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1384 assert(0);
1385}
1386void emit_shrdcl(int r1,int r2)
1387{
1388 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1389 assert(0);
1390}
1391void emit_orrshl(u_int rs,u_int shift,u_int rt)
1392{
1393 assert(rs<16);
1394 assert(rt<16);
1395 assert(shift<16);
1396 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1397 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1398}
1399void emit_orrshr(u_int rs,u_int shift,u_int rt)
1400{
1401 assert(rs<16);
1402 assert(rt<16);
1403 assert(shift<16);
1404 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1405 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1406}
1407
1408void emit_cmpimm(int rs,int imm)
1409{
1410 u_int armval;
1411 if(genimm(imm,&armval)) {
1412 assem_debug("cmp %s,$%d\n",regname[rs],imm);
1413 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1414 }else if(genimm(-imm,&armval)) {
1415 assem_debug("cmn %s,$%d\n",regname[rs],imm);
1416 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1417 }else if(imm>0) {
1418 assert(imm<65536);
1419 #ifdef ARMv5_ONLY
1420 emit_movimm(imm,HOST_TEMPREG);
1421 #else
1422 emit_movw(imm,HOST_TEMPREG);
1423 #endif
1424 assem_debug("cmp %s,r14\n",regname[rs]);
1425 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1426 }else{
1427 assert(imm>-65536);
1428 #ifdef ARMv5_ONLY
1429 emit_movimm(-imm,HOST_TEMPREG);
1430 #else
1431 emit_movw(-imm,HOST_TEMPREG);
1432 #endif
1433 assem_debug("cmn %s,r14\n",regname[rs]);
1434 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1435 }
1436}
1437
1438void emit_cmovne(u_int *addr,int rt)
1439{
1440 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1441 assert(0);
1442}
1443void emit_cmovl(u_int *addr,int rt)
1444{
1445 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1446 assert(0);
1447}
1448void emit_cmovs(u_int *addr,int rt)
1449{
1450 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1451 assert(0);
1452}
1453void emit_cmovne_imm(int imm,int rt)
1454{
1455 assem_debug("movne %s,#%d\n",regname[rt],imm);
1456 u_int armval;
cfbd3c6e 1457 genimm_checked(imm,&armval);
57871462 1458 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1459}
1460void emit_cmovl_imm(int imm,int rt)
1461{
1462 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1463 u_int armval;
cfbd3c6e 1464 genimm_checked(imm,&armval);
57871462 1465 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1466}
1467void emit_cmovb_imm(int imm,int rt)
1468{
1469 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1470 u_int armval;
cfbd3c6e 1471 genimm_checked(imm,&armval);
57871462 1472 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1473}
1474void emit_cmovs_imm(int imm,int rt)
1475{
1476 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1477 u_int armval;
cfbd3c6e 1478 genimm_checked(imm,&armval);
57871462 1479 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1480}
1481void emit_cmove_reg(int rs,int rt)
1482{
1483 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1484 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1485}
1486void emit_cmovne_reg(int rs,int rt)
1487{
1488 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1489 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1490}
1491void emit_cmovl_reg(int rs,int rt)
1492{
1493 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1494 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1495}
1496void emit_cmovs_reg(int rs,int rt)
1497{
1498 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1499 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1500}
1501
1502void emit_slti32(int rs,int imm,int rt)
1503{
1504 if(rs!=rt) emit_zeroreg(rt);
1505 emit_cmpimm(rs,imm);
1506 if(rs==rt) emit_movimm(0,rt);
1507 emit_cmovl_imm(1,rt);
1508}
1509void emit_sltiu32(int rs,int imm,int rt)
1510{
1511 if(rs!=rt) emit_zeroreg(rt);
1512 emit_cmpimm(rs,imm);
1513 if(rs==rt) emit_movimm(0,rt);
1514 emit_cmovb_imm(1,rt);
1515}
1516void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1517{
1518 assert(rsh!=rt);
1519 emit_slti32(rsl,imm,rt);
1520 if(imm>=0)
1521 {
1522 emit_test(rsh,rsh);
1523 emit_cmovne_imm(0,rt);
1524 emit_cmovs_imm(1,rt);
1525 }
1526 else
1527 {
1528 emit_cmpimm(rsh,-1);
1529 emit_cmovne_imm(0,rt);
1530 emit_cmovl_imm(1,rt);
1531 }
1532}
1533void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1534{
1535 assert(rsh!=rt);
1536 emit_sltiu32(rsl,imm,rt);
1537 if(imm>=0)
1538 {
1539 emit_test(rsh,rsh);
1540 emit_cmovne_imm(0,rt);
1541 }
1542 else
1543 {
1544 emit_cmpimm(rsh,-1);
1545 emit_cmovne_imm(1,rt);
1546 }
1547}
1548
1549void emit_cmp(int rs,int rt)
1550{
1551 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1552 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1553}
1554void emit_set_gz32(int rs, int rt)
1555{
1556 //assem_debug("set_gz32\n");
1557 emit_cmpimm(rs,1);
1558 emit_movimm(1,rt);
1559 emit_cmovl_imm(0,rt);
1560}
1561void emit_set_nz32(int rs, int rt)
1562{
1563 //assem_debug("set_nz32\n");
1564 if(rs!=rt) emit_movs(rs,rt);
1565 else emit_test(rs,rs);
1566 emit_cmovne_imm(1,rt);
1567}
1568void emit_set_gz64_32(int rsh, int rsl, int rt)
1569{
1570 //assem_debug("set_gz64\n");
1571 emit_set_gz32(rsl,rt);
1572 emit_test(rsh,rsh);
1573 emit_cmovne_imm(1,rt);
1574 emit_cmovs_imm(0,rt);
1575}
1576void emit_set_nz64_32(int rsh, int rsl, int rt)
1577{
1578 //assem_debug("set_nz64\n");
1579 emit_or_and_set_flags(rsh,rsl,rt);
1580 emit_cmovne_imm(1,rt);
1581}
1582void emit_set_if_less32(int rs1, int rs2, int rt)
1583{
1584 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1585 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1586 emit_cmp(rs1,rs2);
1587 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1588 emit_cmovl_imm(1,rt);
1589}
1590void emit_set_if_carry32(int rs1, int rs2, int rt)
1591{
1592 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1593 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1594 emit_cmp(rs1,rs2);
1595 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1596 emit_cmovb_imm(1,rt);
1597}
1598void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1599{
1600 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1601 assert(u1!=rt);
1602 assert(u2!=rt);
1603 emit_cmp(l1,l2);
1604 emit_movimm(0,rt);
1605 emit_sbcs(u1,u2,HOST_TEMPREG);
1606 emit_cmovl_imm(1,rt);
1607}
1608void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1609{
1610 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1611 assert(u1!=rt);
1612 assert(u2!=rt);
1613 emit_cmp(l1,l2);
1614 emit_movimm(0,rt);
1615 emit_sbcs(u1,u2,HOST_TEMPREG);
1616 emit_cmovb_imm(1,rt);
1617}
1618
1619void emit_call(int a)
1620{
1621 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1622 u_int offset=genjmp(a);
1623 output_w32(0xeb000000|offset);
1624}
1625void emit_jmp(int a)
1626{
1627 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1628 u_int offset=genjmp(a);
1629 output_w32(0xea000000|offset);
1630}
1631void emit_jne(int a)
1632{
1633 assem_debug("bne %x\n",a);
1634 u_int offset=genjmp(a);
1635 output_w32(0x1a000000|offset);
1636}
1637void emit_jeq(int a)
1638{
1639 assem_debug("beq %x\n",a);
1640 u_int offset=genjmp(a);
1641 output_w32(0x0a000000|offset);
1642}
1643void emit_js(int a)
1644{
1645 assem_debug("bmi %x\n",a);
1646 u_int offset=genjmp(a);
1647 output_w32(0x4a000000|offset);
1648}
1649void emit_jns(int a)
1650{
1651 assem_debug("bpl %x\n",a);
1652 u_int offset=genjmp(a);
1653 output_w32(0x5a000000|offset);
1654}
1655void emit_jl(int a)
1656{
1657 assem_debug("blt %x\n",a);
1658 u_int offset=genjmp(a);
1659 output_w32(0xba000000|offset);
1660}
1661void emit_jge(int a)
1662{
1663 assem_debug("bge %x\n",a);
1664 u_int offset=genjmp(a);
1665 output_w32(0xaa000000|offset);
1666}
1667void emit_jno(int a)
1668{
1669 assem_debug("bvc %x\n",a);
1670 u_int offset=genjmp(a);
1671 output_w32(0x7a000000|offset);
1672}
1673void emit_jc(int a)
1674{
1675 assem_debug("bcs %x\n",a);
1676 u_int offset=genjmp(a);
1677 output_w32(0x2a000000|offset);
1678}
1679void emit_jcc(int a)
1680{
1681 assem_debug("bcc %x\n",a);
1682 u_int offset=genjmp(a);
1683 output_w32(0x3a000000|offset);
1684}
1685
1686void emit_pushimm(int imm)
1687{
1688 assem_debug("push $%x\n",imm);
1689 assert(0);
1690}
1691void emit_pusha()
1692{
1693 assem_debug("pusha\n");
1694 assert(0);
1695}
1696void emit_popa()
1697{
1698 assem_debug("popa\n");
1699 assert(0);
1700}
1701void emit_pushreg(u_int r)
1702{
1703 assem_debug("push %%%s\n",regname[r]);
1704 assert(0);
1705}
1706void emit_popreg(u_int r)
1707{
1708 assem_debug("pop %%%s\n",regname[r]);
1709 assert(0);
1710}
1711void emit_callreg(u_int r)
1712{
1713 assem_debug("call *%%%s\n",regname[r]);
1714 assert(0);
1715}
1716void emit_jmpreg(u_int r)
1717{
1718 assem_debug("mov pc,%s\n",regname[r]);
1719 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1720}
1721
1722void emit_readword_indexed(int offset, int rs, int rt)
1723{
1724 assert(offset>-4096&&offset<4096);
1725 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1726 if(offset>=0) {
1727 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1728 }else{
1729 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1730 }
1731}
1732void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1733{
1734 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1735 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1736}
1737void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1738{
1739 if(map<0) emit_readword_indexed(addr, rs, rt);
1740 else {
1741 assert(addr==0);
1742 emit_readword_dualindexedx4(rs, map, rt);
1743 }
1744}
1745void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1746{
1747 if(map<0) {
1748 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1749 emit_readword_indexed(addr+4, rs, rl);
1750 }else{
1751 assert(rh!=rs);
1752 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1753 emit_addimm(map,1,map);
1754 emit_readword_indexed_tlb(addr, rs, map, rl);
1755 }
1756}
1757void emit_movsbl_indexed(int offset, int rs, int rt)
1758{
1759 assert(offset>-256&&offset<256);
1760 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1761 if(offset>=0) {
1762 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1763 }else{
1764 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1765 }
1766}
1767void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1768{
1769 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1770 else {
1771 if(addr==0) {
1772 emit_shlimm(map,2,map);
1773 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1774 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1775 }else{
1776 assert(addr>-256&&addr<256);
1777 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1778 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1779 emit_movsbl_indexed(addr, rt, rt);
1780 }
1781 }
1782}
1783void emit_movswl_indexed(int offset, int rs, int rt)
1784{
1785 assert(offset>-256&&offset<256);
1786 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1787 if(offset>=0) {
1788 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1789 }else{
1790 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1791 }
1792}
1793void emit_movzbl_indexed(int offset, int rs, int rt)
1794{
1795 assert(offset>-4096&&offset<4096);
1796 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1797 if(offset>=0) {
1798 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1799 }else{
1800 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1801 }
1802}
1803void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1804{
1805 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1806 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1807}
1808void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1809{
1810 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1811 else {
1812 if(addr==0) {
1813 emit_movzbl_dualindexedx4(rs, map, rt);
1814 }else{
1815 emit_addimm(rs,addr,rt);
1816 emit_movzbl_dualindexedx4(rt, map, rt);
1817 }
1818 }
1819}
1820void emit_movzwl_indexed(int offset, int rs, int rt)
1821{
1822 assert(offset>-256&&offset<256);
1823 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1824 if(offset>=0) {
1825 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1826 }else{
1827 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1828 }
1829}
1830void emit_readword(int addr, int rt)
1831{
1832 u_int offset = addr-(u_int)&dynarec_local;
1833 assert(offset<4096);
1834 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1835 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1836}
1837void emit_movsbl(int addr, int rt)
1838{
1839 u_int offset = addr-(u_int)&dynarec_local;
1840 assert(offset<256);
1841 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1842 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1843}
1844void emit_movswl(int addr, int rt)
1845{
1846 u_int offset = addr-(u_int)&dynarec_local;
1847 assert(offset<256);
1848 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1849 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1850}
1851void emit_movzbl(int addr, int rt)
1852{
1853 u_int offset = addr-(u_int)&dynarec_local;
1854 assert(offset<4096);
1855 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1856 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1857}
1858void emit_movzwl(int addr, int rt)
1859{
1860 u_int offset = addr-(u_int)&dynarec_local;
1861 assert(offset<256);
1862 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1863 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1864}
1865void emit_movzwl_reg(int rs, int rt)
1866{
1867 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1868 assert(0);
1869}
1870
1871void emit_xchg(int rs, int rt)
1872{
1873 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1874 assert(0);
1875}
1876void emit_writeword_indexed(int rt, int offset, int rs)
1877{
1878 assert(offset>-4096&&offset<4096);
1879 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1880 if(offset>=0) {
1881 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1882 }else{
1883 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1884 }
1885}
1886void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1887{
1888 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1889 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1890}
1891void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1892{
1893 if(map<0) emit_writeword_indexed(rt, addr, rs);
1894 else {
1895 assert(addr==0);
1896 emit_writeword_dualindexedx4(rt, rs, map);
1897 }
1898}
1899void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1900{
1901 if(map<0) {
1902 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1903 emit_writeword_indexed(rl, addr+4, rs);
1904 }else{
1905 assert(rh>=0);
1906 if(temp!=rs) emit_addimm(map,1,temp);
1907 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1908 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1909 else {
1910 emit_addimm(rs,4,rs);
1911 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1912 }
1913 }
1914}
1915void emit_writehword_indexed(int rt, int offset, int rs)
1916{
1917 assert(offset>-256&&offset<256);
1918 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1919 if(offset>=0) {
1920 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1921 }else{
1922 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1923 }
1924}
1925void emit_writebyte_indexed(int rt, int offset, int rs)
1926{
1927 assert(offset>-4096&&offset<4096);
1928 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1929 if(offset>=0) {
1930 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1931 }else{
1932 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1933 }
1934}
1935void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1936{
1937 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1938 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1939}
1940void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1941{
1942 if(map<0) emit_writebyte_indexed(rt, addr, rs);
1943 else {
1944 if(addr==0) {
1945 emit_writebyte_dualindexedx4(rt, rs, map);
1946 }else{
1947 emit_addimm(rs,addr,temp);
1948 emit_writebyte_dualindexedx4(rt, temp, map);
1949 }
1950 }
1951}
1952void emit_writeword(int rt, int addr)
1953{
1954 u_int offset = addr-(u_int)&dynarec_local;
1955 assert(offset<4096);
1956 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1957 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1958}
1959void emit_writehword(int rt, int addr)
1960{
1961 u_int offset = addr-(u_int)&dynarec_local;
1962 assert(offset<256);
1963 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
1964 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1965}
1966void emit_writebyte(int rt, int addr)
1967{
1968 u_int offset = addr-(u_int)&dynarec_local;
1969 assert(offset<4096);
74426039 1970 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 1971 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
1972}
1973void emit_writeword_imm(int imm, int addr)
1974{
1975 assem_debug("movl $%x,%x\n",imm,addr);
1976 assert(0);
1977}
1978void emit_writebyte_imm(int imm, int addr)
1979{
1980 assem_debug("movb $%x,%x\n",imm,addr);
1981 assert(0);
1982}
1983
1984void emit_mul(int rs)
1985{
1986 assem_debug("mul %%%s\n",regname[rs]);
1987 assert(0);
1988}
1989void emit_imul(int rs)
1990{
1991 assem_debug("imul %%%s\n",regname[rs]);
1992 assert(0);
1993}
1994void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1995{
1996 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1997 assert(rs1<16);
1998 assert(rs2<16);
1999 assert(hi<16);
2000 assert(lo<16);
2001 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2002}
2003void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2004{
2005 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2006 assert(rs1<16);
2007 assert(rs2<16);
2008 assert(hi<16);
2009 assert(lo<16);
2010 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2011}
2012
2013void emit_div(int rs)
2014{
2015 assem_debug("div %%%s\n",regname[rs]);
2016 assert(0);
2017}
2018void emit_idiv(int rs)
2019{
2020 assem_debug("idiv %%%s\n",regname[rs]);
2021 assert(0);
2022}
2023void emit_cdq()
2024{
2025 assem_debug("cdq\n");
2026 assert(0);
2027}
2028
2029void emit_clz(int rs,int rt)
2030{
2031 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2032 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2033}
2034
2035void emit_subcs(int rs1,int rs2,int rt)
2036{
2037 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2038 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2039}
2040
2041void emit_shrcc_imm(int rs,u_int imm,int rt)
2042{
2043 assert(imm>0);
2044 assert(imm<32);
2045 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2046 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2047}
2048
2049void emit_negmi(int rs, int rt)
2050{
2051 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2052 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2053}
2054
2055void emit_negsmi(int rs, int rt)
2056{
2057 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2058 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2059}
2060
2061void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2062{
2063 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2064 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2065}
2066
2067void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2068{
2069 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2070 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2071}
2072
2073void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2074{
2075 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2076 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2077}
2078
2079void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2080{
2081 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2082 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2083}
2084
2085void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2086{
2087 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2088 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2089}
2090
2091void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2092{
2093 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2094 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2095}
2096
2097void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2098{
2099 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2100 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2101}
2102
2103void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2104{
2105 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2106 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2107}
2108
2109void emit_teq(int rs, int rt)
2110{
2111 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2112 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2113}
2114
2115void emit_rsbimm(int rs, int imm, int rt)
2116{
2117 u_int armval;
cfbd3c6e 2118 genimm_checked(imm,&armval);
57871462 2119 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2120 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2121}
2122
2123// Load 2 immediates optimizing for small code size
2124void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2125{
2126 emit_movimm(imm1,rt1);
2127 u_int armval;
2128 if(genimm(imm2-imm1,&armval)) {
2129 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2130 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2131 }else if(genimm(imm1-imm2,&armval)) {
2132 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2133 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2134 }
2135 else emit_movimm(imm2,rt2);
2136}
2137
2138// Conditionally select one of two immediates, optimizing for small code size
2139// This will only be called if HAVE_CMOV_IMM is defined
2140void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2141{
2142 u_int armval;
2143 if(genimm(imm2-imm1,&armval)) {
2144 emit_movimm(imm1,rt);
2145 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2146 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2147 }else if(genimm(imm1-imm2,&armval)) {
2148 emit_movimm(imm1,rt);
2149 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2150 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2151 }
2152 else {
2153 #ifdef ARMv5_ONLY
2154 emit_movimm(imm1,rt);
2155 add_literal((int)out,imm2);
2156 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2157 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2158 #else
2159 emit_movw(imm1&0x0000FFFF,rt);
2160 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2161 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2162 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2163 }
2164 emit_movt(imm1&0xFFFF0000,rt);
2165 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2166 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2167 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2168 }
2169 #endif
2170 }
2171}
2172
2173// special case for checking invalid_code
2174void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2175{
2176 assert(0);
2177}
2178
2179// special case for checking invalid_code
2180void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2181{
2182 assert(imm<128&&imm>=0);
2183 assert(r>=0&&r<16);
2184 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2185 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2186 emit_cmpimm(HOST_TEMPREG,imm);
2187}
2188
2189// special case for tlb mapping
2190void emit_addsr12(int rs1,int rs2,int rt)
2191{
2192 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2193 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2194}
2195
2196// Used to preload hash table entries
2197void emit_prefetch(void *addr)
2198{
2199 assem_debug("prefetch %x\n",(int)addr);
2200 output_byte(0x0F);
2201 output_byte(0x18);
2202 output_modrm(0,5,1);
2203 output_w32((int)addr);
2204}
2205void emit_prefetchreg(int r)
2206{
2207 assem_debug("pld %s\n",regname[r]);
2208 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2209}
2210
2211// Special case for mini_ht
2212void emit_ldreq_indexed(int rs, u_int offset, int rt)
2213{
2214 assert(offset<4096);
2215 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2216 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2217}
2218
2219void emit_flds(int r,int sr)
2220{
2221 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2222 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2223}
2224
2225void emit_vldr(int r,int vr)
2226{
2227 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2228 output_w32(0xed900b00|(vr<<12)|(r<<16));
2229}
2230
2231void emit_fsts(int sr,int r)
2232{
2233 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2234 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2235}
2236
2237void emit_vstr(int vr,int r)
2238{
2239 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2240 output_w32(0xed800b00|(vr<<12)|(r<<16));
2241}
2242
2243void emit_ftosizs(int s,int d)
2244{
2245 assem_debug("ftosizs s%d,s%d\n",d,s);
2246 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2247}
2248
2249void emit_ftosizd(int s,int d)
2250{
2251 assem_debug("ftosizd s%d,d%d\n",d,s);
2252 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2253}
2254
2255void emit_fsitos(int s,int d)
2256{
2257 assem_debug("fsitos s%d,s%d\n",d,s);
2258 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2259}
2260
2261void emit_fsitod(int s,int d)
2262{
2263 assem_debug("fsitod d%d,s%d\n",d,s);
2264 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2265}
2266
2267void emit_fcvtds(int s,int d)
2268{
2269 assem_debug("fcvtds d%d,s%d\n",d,s);
2270 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2271}
2272
2273void emit_fcvtsd(int s,int d)
2274{
2275 assem_debug("fcvtsd s%d,d%d\n",d,s);
2276 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2277}
2278
2279void emit_fsqrts(int s,int d)
2280{
2281 assem_debug("fsqrts d%d,s%d\n",d,s);
2282 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2283}
2284
2285void emit_fsqrtd(int s,int d)
2286{
2287 assem_debug("fsqrtd s%d,d%d\n",d,s);
2288 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2289}
2290
2291void emit_fabss(int s,int d)
2292{
2293 assem_debug("fabss d%d,s%d\n",d,s);
2294 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2295}
2296
2297void emit_fabsd(int s,int d)
2298{
2299 assem_debug("fabsd s%d,d%d\n",d,s);
2300 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2301}
2302
2303void emit_fnegs(int s,int d)
2304{
2305 assem_debug("fnegs d%d,s%d\n",d,s);
2306 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2307}
2308
2309void emit_fnegd(int s,int d)
2310{
2311 assem_debug("fnegd s%d,d%d\n",d,s);
2312 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2313}
2314
2315void emit_fadds(int s1,int s2,int d)
2316{
2317 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2318 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2319}
2320
2321void emit_faddd(int s1,int s2,int d)
2322{
2323 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2324 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2325}
2326
2327void emit_fsubs(int s1,int s2,int d)
2328{
2329 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2330 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2331}
2332
2333void emit_fsubd(int s1,int s2,int d)
2334{
2335 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2336 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2337}
2338
2339void emit_fmuls(int s1,int s2,int d)
2340{
2341 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2342 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2343}
2344
2345void emit_fmuld(int s1,int s2,int d)
2346{
2347 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2348 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2349}
2350
2351void emit_fdivs(int s1,int s2,int d)
2352{
2353 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2354 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2355}
2356
2357void emit_fdivd(int s1,int s2,int d)
2358{
2359 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2360 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2361}
2362
2363void emit_fcmps(int x,int y)
2364{
2365 assem_debug("fcmps s14, s15\n");
2366 output_w32(0xeeb47a67);
2367}
2368
2369void emit_fcmpd(int x,int y)
2370{
2371 assem_debug("fcmpd d6, d7\n");
2372 output_w32(0xeeb46b47);
2373}
2374
2375void emit_fmstat()
2376{
2377 assem_debug("fmstat\n");
2378 output_w32(0xeef1fa10);
2379}
2380
2381void emit_bicne_imm(int rs,int imm,int rt)
2382{
2383 u_int armval;
cfbd3c6e 2384 genimm_checked(imm,&armval);
57871462 2385 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2386 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2387}
2388
2389void emit_biccs_imm(int rs,int imm,int rt)
2390{
2391 u_int armval;
cfbd3c6e 2392 genimm_checked(imm,&armval);
57871462 2393 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2394 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2395}
2396
2397void emit_bicvc_imm(int rs,int imm,int rt)
2398{
2399 u_int armval;
cfbd3c6e 2400 genimm_checked(imm,&armval);
57871462 2401 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2402 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2403}
2404
2405void emit_bichi_imm(int rs,int imm,int rt)
2406{
2407 u_int armval;
cfbd3c6e 2408 genimm_checked(imm,&armval);
57871462 2409 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2410 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2411}
2412
2413void emit_orrvs_imm(int rs,int imm,int rt)
2414{
2415 u_int armval;
cfbd3c6e 2416 genimm_checked(imm,&armval);
57871462 2417 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2418 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2419}
2420
b9b61529 2421void emit_orrne_imm(int rs,int imm,int rt)
2422{
2423 u_int armval;
cfbd3c6e 2424 genimm_checked(imm,&armval);
b9b61529 2425 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2426 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2427}
2428
2429void emit_andne_imm(int rs,int imm,int rt)
2430{
2431 u_int armval;
cfbd3c6e 2432 genimm_checked(imm,&armval);
b9b61529 2433 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2434 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2435}
2436
57871462 2437void emit_jno_unlikely(int a)
2438{
2439 //emit_jno(a);
2440 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2441 output_w32(0x72800000|rd_rn_rm(15,15,0));
2442}
2443
2444// Save registers before function call
2445void save_regs(u_int reglist)
2446{
2447 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2448 if(!reglist) return;
2449 assem_debug("stmia fp,{");
2450 if(reglist&1) assem_debug("r0, ");
2451 if(reglist&2) assem_debug("r1, ");
2452 if(reglist&4) assem_debug("r2, ");
2453 if(reglist&8) assem_debug("r3, ");
2454 if(reglist&0x1000) assem_debug("r12");
2455 assem_debug("}\n");
2456 output_w32(0xe88b0000|reglist);
2457}
2458// Restore registers after function call
2459void restore_regs(u_int reglist)
2460{
2461 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2462 if(!reglist) return;
2463 assem_debug("ldmia fp,{");
2464 if(reglist&1) assem_debug("r0, ");
2465 if(reglist&2) assem_debug("r1, ");
2466 if(reglist&4) assem_debug("r2, ");
2467 if(reglist&8) assem_debug("r3, ");
2468 if(reglist&0x1000) assem_debug("r12");
2469 assem_debug("}\n");
2470 output_w32(0xe89b0000|reglist);
2471}
2472
2473// Write back consts using r14 so we don't disturb the other registers
2474void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2475{
2476 int hr;
2477 for(hr=0;hr<HOST_REGS;hr++) {
2478 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2479 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2480 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2481 int value=constmap[i][hr];
2482 if(value==0) {
2483 emit_zeroreg(HOST_TEMPREG);
2484 }
2485 else {
2486 emit_movimm(value,HOST_TEMPREG);
2487 }
2488 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2489#ifndef FORCE32
57871462 2490 if((i_is32>>i_regmap[hr])&1) {
2491 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2492 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2493 }
24385cae 2494#endif
57871462 2495 }
2496 }
2497 }
2498 }
2499}
2500
2501/* Stubs/epilogue */
2502
2503void literal_pool(int n)
2504{
2505 if(!literalcount) return;
2506 if(n) {
2507 if((int)out-literals[0][0]<4096-n) return;
2508 }
2509 u_int *ptr;
2510 int i;
2511 for(i=0;i<literalcount;i++)
2512 {
2513 ptr=(u_int *)literals[i][0];
2514 u_int offset=(u_int)out-(u_int)ptr-8;
2515 assert(offset<4096);
2516 assert(!(offset&3));
2517 *ptr|=offset;
2518 output_w32(literals[i][1]);
2519 }
2520 literalcount=0;
2521}
2522
2523void literal_pool_jumpover(int n)
2524{
2525 if(!literalcount) return;
2526 if(n) {
2527 if((int)out-literals[0][0]<4096-n) return;
2528 }
2529 int jaddr=(int)out;
2530 emit_jmp(0);
2531 literal_pool(0);
2532 set_jump_target(jaddr,(int)out);
2533}
2534
2535emit_extjump2(int addr, int target, int linker)
2536{
2537 u_char *ptr=(u_char *)addr;
2538 assert((ptr[3]&0x0e)==0xa);
2539 emit_loadlp(target,0);
2540 emit_loadlp(addr,1);
24385cae 2541 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2542 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2543//DEBUG >
2544#ifdef DEBUG_CYCLE_COUNT
2545 emit_readword((int)&last_count,ECX);
2546 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2547 emit_readword((int)&next_interupt,ECX);
2548 emit_writeword(HOST_CCREG,(int)&Count);
2549 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2550 emit_writeword(ECX,(int)&last_count);
2551#endif
2552//DEBUG <
2553 emit_jmp(linker);
2554}
2555
2556emit_extjump(int addr, int target)
2557{
2558 emit_extjump2(addr, target, (int)dyna_linker);
2559}
2560emit_extjump_ds(int addr, int target)
2561{
2562 emit_extjump2(addr, target, (int)dyna_linker_ds);
2563}
2564
2565do_readstub(int n)
2566{
2567 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2568 literal_pool(256);
2569 set_jump_target(stubs[n][1],(int)out);
2570 int type=stubs[n][0];
2571 int i=stubs[n][3];
2572 int rs=stubs[n][4];
2573 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2574 u_int reglist=stubs[n][7];
2575 signed char *i_regmap=i_regs->regmap;
2576 int addr=get_reg(i_regmap,AGEN1+(i&1));
2577 int rth,rt;
2578 int ds;
b9b61529 2579 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2580 rth=get_reg(i_regmap,FTEMP|64);
2581 rt=get_reg(i_regmap,FTEMP);
2582 }else{
2583 rth=get_reg(i_regmap,rt1[i]|64);
2584 rt=get_reg(i_regmap,rt1[i]);
2585 }
2586 assert(rs>=0);
57871462 2587 if(addr<0) addr=rt;
f18c0f46 2588 if(addr<0)
2589 // assume dummy read, no alloced reg
2590 addr=get_reg(i_regmap,-1);
57871462 2591 assert(addr>=0);
2592 int ftable=0;
2593 if(type==LOADB_STUB||type==LOADBU_STUB)
2594 ftable=(int)readmemb;
2595 if(type==LOADH_STUB||type==LOADHU_STUB)
2596 ftable=(int)readmemh;
2597 if(type==LOADW_STUB)
2598 ftable=(int)readmem;
24385cae 2599#ifndef FORCE32
57871462 2600 if(type==LOADD_STUB)
2601 ftable=(int)readmemd;
24385cae 2602#endif
2603 assert(ftable!=0);
57871462 2604 emit_writeword(rs,(int)&address);
2605 //emit_pusha();
2606 save_regs(reglist);
2607 ds=i_regs!=&regs[i];
2608 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2609 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2610 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2611 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2612 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2613 emit_shrimm(rs,16,1);
2614 int cc=get_reg(i_regmap,CCREG);
2615 if(cc<0) {
2616 emit_loadreg(CCREG,2);
2617 }
2618 emit_movimm(ftable,0);
2619 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2620 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2621 //emit_readword((int)&last_count,temp);
2622 //emit_add(cc,temp,cc);
2623 //emit_writeword(cc,(int)&Count);
2624 //emit_mov(15,14);
2625 emit_call((int)&indirect_jump_indexed);
2626 //emit_callreg(rs);
2627 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
2628 // We really shouldn't need to update the count here,
2629 // but not doing so causes random crashes...
2630 emit_readword((int)&Count,HOST_TEMPREG);
2631 emit_readword((int)&next_interupt,2);
2632 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2633 emit_writeword(2,(int)&last_count);
2634 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2635 if(cc<0) {
2636 emit_storereg(CCREG,HOST_TEMPREG);
2637 }
2638 //emit_popa();
2639 restore_regs(reglist);
2640 //if((cc=get_reg(regmap,CCREG))>=0) {
2641 // emit_loadreg(CCREG,cc);
2642 //}
f18c0f46 2643 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2644 assert(rt>=0);
2645 if(type==LOADB_STUB)
2646 emit_movsbl((int)&readmem_dword,rt);
2647 if(type==LOADBU_STUB)
2648 emit_movzbl((int)&readmem_dword,rt);
2649 if(type==LOADH_STUB)
2650 emit_movswl((int)&readmem_dword,rt);
2651 if(type==LOADHU_STUB)
2652 emit_movzwl((int)&readmem_dword,rt);
2653 if(type==LOADW_STUB)
2654 emit_readword((int)&readmem_dword,rt);
2655 if(type==LOADD_STUB) {
2656 emit_readword((int)&readmem_dword,rt);
2657 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2658 }
57871462 2659 }
2660 emit_jmp(stubs[n][2]); // return address
2661}
2662
2663inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2664{
2665 int rs=get_reg(regmap,target);
2666 int rth=get_reg(regmap,target|64);
2667 int rt=get_reg(regmap,target);
fd99c415 2668 // allow for PCSX dummy reads
2669 //assert(rt>=0);
2670 if(rs<0)
2671 rs=get_reg(regmap,-1);
57871462 2672 assert(rs>=0);
57871462 2673 int ftable=0;
2674 if(type==LOADB_STUB||type==LOADBU_STUB)
2675 ftable=(int)readmemb;
2676 if(type==LOADH_STUB||type==LOADHU_STUB)
2677 ftable=(int)readmemh;
2678 if(type==LOADW_STUB)
2679 ftable=(int)readmem;
24385cae 2680#ifndef FORCE32
57871462 2681 if(type==LOADD_STUB)
2682 ftable=(int)readmemd;
24385cae 2683#endif
2684 assert(ftable!=0);
fd99c415 2685 if(target==0)
2686 emit_movimm(addr,rs);
57871462 2687 emit_writeword(rs,(int)&address);
2688 //emit_pusha();
2689 save_regs(reglist);
2690 //emit_shrimm(rs,16,1);
2691 int cc=get_reg(regmap,CCREG);
2692 if(cc<0) {
2693 emit_loadreg(CCREG,2);
2694 }
2695 //emit_movimm(ftable,0);
2696 emit_movimm(((u_int *)ftable)[addr>>16],0);
2697 //emit_readword((int)&last_count,12);
2698 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2699 if((signed int)addr>=(signed int)0xC0000000) {
2700 // Pagefault address
2701 int ds=regmap!=regs[i].regmap;
2702 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2703 }
2704 //emit_add(12,2,2);
2705 //emit_writeword(2,(int)&Count);
2706 //emit_call(((u_int *)ftable)[addr>>16]);
2707 emit_call((int)&indirect_jump);
2708 // We really shouldn't need to update the count here,
2709 // but not doing so causes random crashes...
2710 emit_readword((int)&Count,HOST_TEMPREG);
2711 emit_readword((int)&next_interupt,2);
2712 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2713 emit_writeword(2,(int)&last_count);
2714 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2715 if(cc<0) {
2716 emit_storereg(CCREG,HOST_TEMPREG);
2717 }
2718 //emit_popa();
2719 restore_regs(reglist);
fd99c415 2720 if(rt>=0) {
2721 if(type==LOADB_STUB)
2722 emit_movsbl((int)&readmem_dword,rt);
2723 if(type==LOADBU_STUB)
2724 emit_movzbl((int)&readmem_dword,rt);
2725 if(type==LOADH_STUB)
2726 emit_movswl((int)&readmem_dword,rt);
2727 if(type==LOADHU_STUB)
2728 emit_movzwl((int)&readmem_dword,rt);
2729 if(type==LOADW_STUB)
2730 emit_readword((int)&readmem_dword,rt);
2731 if(type==LOADD_STUB) {
2732 emit_readword((int)&readmem_dword,rt);
2733 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2734 }
57871462 2735 }
2736}
2737
2738do_writestub(int n)
2739{
2740 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2741 literal_pool(256);
2742 set_jump_target(stubs[n][1],(int)out);
2743 int type=stubs[n][0];
2744 int i=stubs[n][3];
2745 int rs=stubs[n][4];
2746 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2747 u_int reglist=stubs[n][7];
2748 signed char *i_regmap=i_regs->regmap;
2749 int addr=get_reg(i_regmap,AGEN1+(i&1));
2750 int rth,rt,r;
2751 int ds;
b9b61529 2752 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2753 rth=get_reg(i_regmap,FTEMP|64);
2754 rt=get_reg(i_regmap,r=FTEMP);
2755 }else{
2756 rth=get_reg(i_regmap,rs2[i]|64);
2757 rt=get_reg(i_regmap,r=rs2[i]);
2758 }
2759 assert(rs>=0);
2760 assert(rt>=0);
2761 if(addr<0) addr=get_reg(i_regmap,-1);
2762 assert(addr>=0);
2763 int ftable=0;
2764 if(type==STOREB_STUB)
2765 ftable=(int)writememb;
2766 if(type==STOREH_STUB)
2767 ftable=(int)writememh;
2768 if(type==STOREW_STUB)
2769 ftable=(int)writemem;
24385cae 2770#ifndef FORCE32
57871462 2771 if(type==STORED_STUB)
2772 ftable=(int)writememd;
24385cae 2773#endif
2774 assert(ftable!=0);
57871462 2775 emit_writeword(rs,(int)&address);
2776 //emit_shrimm(rs,16,rs);
2777 //emit_movmem_indexedx4(ftable,rs,rs);
2778 if(type==STOREB_STUB)
2779 emit_writebyte(rt,(int)&byte);
2780 if(type==STOREH_STUB)
2781 emit_writehword(rt,(int)&hword);
2782 if(type==STOREW_STUB)
2783 emit_writeword(rt,(int)&word);
2784 if(type==STORED_STUB) {
3d624f89 2785#ifndef FORCE32
57871462 2786 emit_writeword(rt,(int)&dword);
2787 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 2788#else
2789 printf("STORED_STUB\n");
2790#endif
57871462 2791 }
2792 //emit_pusha();
2793 save_regs(reglist);
2794 ds=i_regs!=&regs[i];
2795 int real_rs=get_reg(i_regmap,rs1[i]);
2796 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2797 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2798 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2799 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2800 emit_shrimm(rs,16,1);
2801 int cc=get_reg(i_regmap,CCREG);
2802 if(cc<0) {
2803 emit_loadreg(CCREG,2);
2804 }
2805 emit_movimm(ftable,0);
2806 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2807 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2808 //emit_readword((int)&last_count,temp);
2809 //emit_addimm(cc,2*stubs[n][5]+2,cc);
2810 //emit_add(cc,temp,cc);
2811 //emit_writeword(cc,(int)&Count);
2812 emit_call((int)&indirect_jump_indexed);
2813 //emit_callreg(rs);
2814 emit_readword((int)&Count,HOST_TEMPREG);
2815 emit_readword((int)&next_interupt,2);
2816 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2817 emit_writeword(2,(int)&last_count);
2818 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2819 if(cc<0) {
2820 emit_storereg(CCREG,HOST_TEMPREG);
2821 }
2822 //emit_popa();
2823 restore_regs(reglist);
2824 //if((cc=get_reg(regmap,CCREG))>=0) {
2825 // emit_loadreg(CCREG,cc);
2826 //}
2827 emit_jmp(stubs[n][2]); // return address
2828}
2829
2830inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2831{
2832 int rs=get_reg(regmap,-1);
2833 int rth=get_reg(regmap,target|64);
2834 int rt=get_reg(regmap,target);
2835 assert(rs>=0);
2836 assert(rt>=0);
2837 int ftable=0;
2838 if(type==STOREB_STUB)
2839 ftable=(int)writememb;
2840 if(type==STOREH_STUB)
2841 ftable=(int)writememh;
2842 if(type==STOREW_STUB)
2843 ftable=(int)writemem;
24385cae 2844#ifndef FORCE32
57871462 2845 if(type==STORED_STUB)
2846 ftable=(int)writememd;
24385cae 2847#endif
2848 assert(ftable!=0);
57871462 2849 emit_writeword(rs,(int)&address);
2850 //emit_shrimm(rs,16,rs);
2851 //emit_movmem_indexedx4(ftable,rs,rs);
2852 if(type==STOREB_STUB)
2853 emit_writebyte(rt,(int)&byte);
2854 if(type==STOREH_STUB)
2855 emit_writehword(rt,(int)&hword);
2856 if(type==STOREW_STUB)
2857 emit_writeword(rt,(int)&word);
2858 if(type==STORED_STUB) {
3d624f89 2859#ifndef FORCE32
57871462 2860 emit_writeword(rt,(int)&dword);
2861 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 2862#else
2863 printf("STORED_STUB\n");
2864#endif
57871462 2865 }
2866 //emit_pusha();
2867 save_regs(reglist);
2868 //emit_shrimm(rs,16,1);
2869 int cc=get_reg(regmap,CCREG);
2870 if(cc<0) {
2871 emit_loadreg(CCREG,2);
2872 }
2873 //emit_movimm(ftable,0);
2874 emit_movimm(((u_int *)ftable)[addr>>16],0);
2875 //emit_readword((int)&last_count,12);
2876 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2877 if((signed int)addr>=(signed int)0xC0000000) {
2878 // Pagefault address
2879 int ds=regmap!=regs[i].regmap;
2880 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2881 }
2882 //emit_add(12,2,2);
2883 //emit_writeword(2,(int)&Count);
2884 //emit_call(((u_int *)ftable)[addr>>16]);
2885 emit_call((int)&indirect_jump);
2886 emit_readword((int)&Count,HOST_TEMPREG);
2887 emit_readword((int)&next_interupt,2);
2888 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2889 emit_writeword(2,(int)&last_count);
2890 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2891 if(cc<0) {
2892 emit_storereg(CCREG,HOST_TEMPREG);
2893 }
2894 //emit_popa();
2895 restore_regs(reglist);
2896}
2897
2898do_unalignedwritestub(int n)
2899{
b7918751 2900 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
2901 literal_pool(256);
57871462 2902 set_jump_target(stubs[n][1],(int)out);
b7918751 2903
2904 int i=stubs[n][3];
2905 struct regstat *i_regs=(struct regstat *)stubs[n][4];
2906 int addr=stubs[n][5];
2907 u_int reglist=stubs[n][7];
2908 signed char *i_regmap=i_regs->regmap;
2909 int temp2=get_reg(i_regmap,FTEMP);
2910 int rt;
2911 int ds, real_rs;
2912 rt=get_reg(i_regmap,rs2[i]);
2913 assert(rt>=0);
2914 assert(addr>=0);
2915 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2916 reglist|=(1<<addr);
2917 reglist&=~(1<<temp2);
2918
2919 emit_andimm(addr,0xfffffffc,temp2);
2920 emit_writeword(temp2,(int)&address);
2921
2922 save_regs(reglist);
2923 ds=i_regs!=&regs[i];
2924 real_rs=get_reg(i_regmap,rs1[i]);
2925 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2926 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2927 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2928 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2929 emit_shrimm(addr,16,1);
2930 int cc=get_reg(i_regmap,CCREG);
2931 if(cc<0) {
2932 emit_loadreg(CCREG,2);
2933 }
2934 emit_movimm((u_int)readmem,0);
2935 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2936 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3); // XXX: can be rm'd?
2937 emit_call((int)&indirect_jump_indexed);
2938 restore_regs(reglist);
2939
2940 emit_readword((int)&readmem_dword,temp2);
2941 int temp=addr; //hmh
2942 emit_shlimm(addr,3,temp);
2943 emit_andimm(temp,24,temp);
2944#ifdef BIG_ENDIAN_MIPS
2945 if (opcode[i]==0x2e) // SWR
2946#else
2947 if (opcode[i]==0x2a) // SWL
2948#endif
2949 emit_xorimm(temp,24,temp);
2950 emit_movimm(-1,HOST_TEMPREG);
55439448 2951 if (opcode[i]==0x2a) { // SWL
b7918751 2952 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2953 emit_orrshr(rt,temp,temp2);
2954 }else{
2955 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2956 emit_orrshl(rt,temp,temp2);
2957 }
2958 emit_readword((int)&address,addr);
2959 emit_writeword(temp2,(int)&word);
2960 //save_regs(reglist); // don't need to, no state changes
2961 emit_shrimm(addr,16,1);
2962 emit_movimm((u_int)writemem,0);
2963 //emit_call((int)&indirect_jump_indexed);
2964 emit_mov(15,14);
2965 emit_readword_dualindexedx4(0,1,15);
2966 emit_readword((int)&Count,HOST_TEMPREG);
2967 emit_readword((int)&next_interupt,2);
2968 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2969 emit_writeword(2,(int)&last_count);
2970 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2971 if(cc<0) {
2972 emit_storereg(CCREG,HOST_TEMPREG);
2973 }
2974 restore_regs(reglist);
57871462 2975 emit_jmp(stubs[n][2]); // return address
2976}
2977
2978void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
2979{
2980 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
2981}
2982
2983do_invstub(int n)
2984{
2985 literal_pool(20);
2986 u_int reglist=stubs[n][3];
2987 set_jump_target(stubs[n][1],(int)out);
2988 save_regs(reglist);
2989 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2990 emit_call((int)&invalidate_addr);
2991 restore_regs(reglist);
2992 emit_jmp(stubs[n][2]); // return address
2993}
2994
2995int do_dirty_stub(int i)
2996{
2997 assem_debug("do_dirty_stub %x\n",start+i*4);
ac545b3a 2998 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
2999 #ifdef PCSX
3000 addr=(u_int)source;
3001 #endif
57871462 3002 // Careful about the code output here, verify_dirty needs to parse it.
3003 #ifdef ARMv5_ONLY
ac545b3a 3004 emit_loadlp(addr,1);
57871462 3005 emit_loadlp((int)copy,2);
3006 emit_loadlp(slen*4,3);
3007 #else
ac545b3a 3008 emit_movw(addr&0x0000FFFF,1);
57871462 3009 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3010 emit_movt(addr&0xFFFF0000,1);
57871462 3011 emit_movt(((u_int)copy)&0xFFFF0000,2);
3012 emit_movw(slen*4,3);
3013 #endif
3014 emit_movimm(start+i*4,0);
3015 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3016 int entry=(int)out;
3017 load_regs_entry(i);
3018 if(entry==(int)out) entry=instr_addr[i];
3019 emit_jmp(instr_addr[i]);
3020 return entry;
3021}
3022
3023void do_dirty_stub_ds()
3024{
3025 // Careful about the code output here, verify_dirty needs to parse it.
3026 #ifdef ARMv5_ONLY
3027 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3028 emit_loadlp((int)copy,2);
3029 emit_loadlp(slen*4,3);
3030 #else
3031 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3032 emit_movw(((u_int)copy)&0x0000FFFF,2);
3033 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3034 emit_movt(((u_int)copy)&0xFFFF0000,2);
3035 emit_movw(slen*4,3);
3036 #endif
3037 emit_movimm(start+1,0);
3038 emit_call((int)&verify_code_ds);
3039}
3040
3041do_cop1stub(int n)
3042{
3043 literal_pool(256);
3044 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3045 set_jump_target(stubs[n][1],(int)out);
3046 int i=stubs[n][3];
3d624f89 3047// int rs=stubs[n][4];
57871462 3048 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3049 int ds=stubs[n][6];
3050 if(!ds) {
3051 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3052 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3053 }
3054 //else {printf("fp exception in delay slot\n");}
3055 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3056 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3057 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3058 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3059 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3060}
3061
3062/* TLB */
3063
3064int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3065{
3066 if(c) {
3067 if((signed int)addr>=(signed int)0xC0000000) {
3068 // address_generation already loaded the const
3069 emit_readword_dualindexedx4(FP,map,map);
3070 }
3071 else
3072 return -1; // No mapping
3073 }
3074 else {
3075 assert(s!=map);
3076 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3077 emit_addsr12(map,s,map);
3078 // Schedule this while we wait on the load
3079 //if(x) emit_xorimm(s,x,ar);
3080 if(shift>=0) emit_shlimm(s,3,shift);
3081 if(~a) emit_andimm(s,a,ar);
3082 emit_readword_dualindexedx4(FP,map,map);
3083 }
3084 return map;
3085}
3086int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3087{
3088 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3089 emit_test(map,map);
3090 *jaddr=(int)out;
3091 emit_js(0);
3092 }
3093 return map;
3094}
3095
3096int gen_tlb_addr_r(int ar, int map) {
3097 if(map>=0) {
3098 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3099 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3100 }
3101}
3102
3103int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3104{
3105 if(c) {
3106 if(addr<0x80800000||addr>=0xC0000000) {
3107 // address_generation already loaded the const
3108 emit_readword_dualindexedx4(FP,map,map);
3109 }
3110 else
3111 return -1; // No mapping
3112 }
3113 else {
3114 assert(s!=map);
3115 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3116 emit_addsr12(map,s,map);
3117 // Schedule this while we wait on the load
3118 //if(x) emit_xorimm(s,x,ar);
3119 emit_readword_dualindexedx4(FP,map,map);
3120 }
3121 return map;
3122}
3123int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3124{
3125 if(!c||addr<0x80800000||addr>=0xC0000000) {
3126 emit_testimm(map,0x40000000);
3127 *jaddr=(int)out;
3128 emit_jne(0);
3129 }
3130}
3131
3132int gen_tlb_addr_w(int ar, int map) {
3133 if(map>=0) {
3134 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3135 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3136 }
3137}
3138
3139// Generate the address of the memory_map entry, relative to dynarec_local
3140generate_map_const(u_int addr,int reg) {
3141 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3142 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3143}
3144
3145/* Special assem */
3146
3147void shift_assemble_arm(int i,struct regstat *i_regs)
3148{
3149 if(rt1[i]) {
3150 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3151 {
3152 signed char s,t,shift;
3153 t=get_reg(i_regs->regmap,rt1[i]);
3154 s=get_reg(i_regs->regmap,rs1[i]);
3155 shift=get_reg(i_regs->regmap,rs2[i]);
3156 if(t>=0){
3157 if(rs1[i]==0)
3158 {
3159 emit_zeroreg(t);
3160 }
3161 else if(rs2[i]==0)
3162 {
3163 assert(s>=0);
3164 if(s!=t) emit_mov(s,t);
3165 }
3166 else
3167 {
3168 emit_andimm(shift,31,HOST_TEMPREG);
3169 if(opcode2[i]==4) // SLLV
3170 {
3171 emit_shl(s,HOST_TEMPREG,t);
3172 }
3173 if(opcode2[i]==6) // SRLV
3174 {
3175 emit_shr(s,HOST_TEMPREG,t);
3176 }
3177 if(opcode2[i]==7) // SRAV
3178 {
3179 emit_sar(s,HOST_TEMPREG,t);
3180 }
3181 }
3182 }
3183 } else { // DSLLV/DSRLV/DSRAV
3184 signed char sh,sl,th,tl,shift;
3185 th=get_reg(i_regs->regmap,rt1[i]|64);
3186 tl=get_reg(i_regs->regmap,rt1[i]);
3187 sh=get_reg(i_regs->regmap,rs1[i]|64);
3188 sl=get_reg(i_regs->regmap,rs1[i]);
3189 shift=get_reg(i_regs->regmap,rs2[i]);
3190 if(tl>=0){
3191 if(rs1[i]==0)
3192 {
3193 emit_zeroreg(tl);
3194 if(th>=0) emit_zeroreg(th);
3195 }
3196 else if(rs2[i]==0)
3197 {
3198 assert(sl>=0);
3199 if(sl!=tl) emit_mov(sl,tl);
3200 if(th>=0&&sh!=th) emit_mov(sh,th);
3201 }
3202 else
3203 {
3204 // FIXME: What if shift==tl ?
3205 assert(shift!=tl);
3206 int temp=get_reg(i_regs->regmap,-1);
3207 int real_th=th;
3208 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3209 assert(sl>=0);
3210 assert(sh>=0);
3211 emit_andimm(shift,31,HOST_TEMPREG);
3212 if(opcode2[i]==0x14) // DSLLV
3213 {
3214 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3215 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3216 emit_orrshr(sl,HOST_TEMPREG,th);
3217 emit_andimm(shift,31,HOST_TEMPREG);
3218 emit_testimm(shift,32);
3219 emit_shl(sl,HOST_TEMPREG,tl);
3220 if(th>=0) emit_cmovne_reg(tl,th);
3221 emit_cmovne_imm(0,tl);
3222 }
3223 if(opcode2[i]==0x16) // DSRLV
3224 {
3225 assert(th>=0);
3226 emit_shr(sl,HOST_TEMPREG,tl);
3227 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3228 emit_orrshl(sh,HOST_TEMPREG,tl);
3229 emit_andimm(shift,31,HOST_TEMPREG);
3230 emit_testimm(shift,32);
3231 emit_shr(sh,HOST_TEMPREG,th);
3232 emit_cmovne_reg(th,tl);
3233 if(real_th>=0) emit_cmovne_imm(0,th);
3234 }
3235 if(opcode2[i]==0x17) // DSRAV
3236 {
3237 assert(th>=0);
3238 emit_shr(sl,HOST_TEMPREG,tl);
3239 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3240 if(real_th>=0) {
3241 assert(temp>=0);
3242 emit_sarimm(th,31,temp);
3243 }
3244 emit_orrshl(sh,HOST_TEMPREG,tl);
3245 emit_andimm(shift,31,HOST_TEMPREG);
3246 emit_testimm(shift,32);
3247 emit_sar(sh,HOST_TEMPREG,th);
3248 emit_cmovne_reg(th,tl);
3249 if(real_th>=0) emit_cmovne_reg(temp,th);
3250 }
3251 }
3252 }
3253 }
3254 }
3255}
3256#define shift_assemble shift_assemble_arm
3257
3258void loadlr_assemble_arm(int i,struct regstat *i_regs)
3259{
3260 int s,th,tl,temp,temp2,addr,map=-1;
3261 int offset;
3262 int jaddr=0;
3263 int memtarget,c=0;
3264 u_int hr,reglist=0;
3265 th=get_reg(i_regs->regmap,rt1[i]|64);
3266 tl=get_reg(i_regs->regmap,rt1[i]);
3267 s=get_reg(i_regs->regmap,rs1[i]);
3268 temp=get_reg(i_regs->regmap,-1);
3269 temp2=get_reg(i_regs->regmap,FTEMP);
3270 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3271 assert(addr<0);
3272 offset=imm[i];
3273 for(hr=0;hr<HOST_REGS;hr++) {
3274 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3275 }
3276 reglist|=1<<temp;
3277 if(offset||s<0||c) addr=temp2;
3278 else addr=s;
3279 if(s>=0) {
3280 c=(i_regs->wasconst>>s)&1;
4cb76aa4 3281 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
57871462 3282 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3283 }
3284 if(tl>=0) {
3285 //assert(tl>=0);
3286 //assert(rt1[i]);
3287 if(!using_tlb) {
3288 if(!c) {
3289 emit_shlimm(addr,3,temp);
3290 if (opcode[i]==0x22||opcode[i]==0x26) {
3291 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3292 }else{
3293 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3294 }
4cb76aa4 3295 emit_cmpimm(addr,RAM_SIZE);
57871462 3296 jaddr=(int)out;
3297 emit_jno(0);
3298 }
3299 else {
3300 if (opcode[i]==0x22||opcode[i]==0x26) {
3301 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3302 }else{
3303 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3304 }
3305 }
3306 }else{ // using tlb
3307 int a;
3308 if(c) {
3309 a=-1;
3310 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3311 a=0xFFFFFFFC; // LWL/LWR
3312 }else{
3313 a=0xFFFFFFF8; // LDL/LDR
3314 }
3315 map=get_reg(i_regs->regmap,TLREG);
3316 assert(map>=0);
3317 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3318 if(c) {
3319 if (opcode[i]==0x22||opcode[i]==0x26) {
3320 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3321 }else{
3322 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3323 }
3324 }
3325 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3326 }
3327 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3328 if(!c||memtarget) {
3329 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3330 emit_readword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2);
3331 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3332 }
3333 else
3334 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3335 emit_andimm(temp,24,temp);
2002a1db 3336#ifdef BIG_ENDIAN_MIPS
3337 if (opcode[i]==0x26) // LWR
3338#else
3339 if (opcode[i]==0x22) // LWL
3340#endif
3341 emit_xorimm(temp,24,temp);
57871462 3342 emit_movimm(-1,HOST_TEMPREG);
3343 if (opcode[i]==0x26) {
3344 emit_shr(temp2,temp,temp2);
3345 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3346 }else{
3347 emit_shl(temp2,temp,temp2);
3348 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3349 }
3350 emit_or(temp2,tl,tl);
3351 //emit_storereg(rt1[i],tl); // DEBUG
3352 }
3353 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
2002a1db 3354 // FIXME: little endian
57871462 3355 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3356 if(!c||memtarget) {
3357 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3358 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3359 emit_readdword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2h,temp2);
3360 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3361 }
3362 else
3363 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3364 emit_testimm(temp,32);
3365 emit_andimm(temp,24,temp);
3366 if (opcode[i]==0x1A) { // LDL
3367 emit_rsbimm(temp,32,HOST_TEMPREG);
3368 emit_shl(temp2h,temp,temp2h);
3369 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3370 emit_movimm(-1,HOST_TEMPREG);
3371 emit_shl(temp2,temp,temp2);
3372 emit_cmove_reg(temp2h,th);
3373 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3374 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3375 emit_orreq(temp2,tl,tl);
3376 emit_orrne(temp2,th,th);
3377 }
3378 if (opcode[i]==0x1B) { // LDR
3379 emit_xorimm(temp,24,temp);
3380 emit_rsbimm(temp,32,HOST_TEMPREG);
3381 emit_shr(temp2,temp,temp2);
3382 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3383 emit_movimm(-1,HOST_TEMPREG);
3384 emit_shr(temp2h,temp,temp2h);
3385 emit_cmovne_reg(temp2,tl);
3386 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3387 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3388 emit_orrne(temp2h,th,th);
3389 emit_orreq(temp2h,tl,tl);
3390 }
3391 }
3392 }
3393}
3394#define loadlr_assemble loadlr_assemble_arm
3395
3396void cop0_assemble(int i,struct regstat *i_regs)
3397{
3398 if(opcode2[i]==0) // MFC0
3399 {
3400 signed char t=get_reg(i_regs->regmap,rt1[i]);
3401 char copr=(source[i]>>11)&0x1f;
3402 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 3403 if(t>=0&&rt1[i]!=0) {
7139f3c8 3404#ifdef MUPEN64
57871462 3405 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3406 emit_movimm((source[i]>>11)&0x1f,1);
3407 emit_writeword(0,(int)&PC);
3408 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3409 if(copr==9) {
3410 emit_readword((int)&last_count,ECX);
3411 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3412 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3413 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3414 emit_writeword(HOST_CCREG,(int)&Count);
3415 }
3416 emit_call((int)MFC0);
3417 emit_readword((int)&readmem_dword,t);
7139f3c8 3418#else
3419 emit_readword((int)&reg_cop0+copr*4,t);
3420#endif
57871462 3421 }
3422 }
3423 else if(opcode2[i]==4) // MTC0
3424 {
3425 signed char s=get_reg(i_regs->regmap,rs1[i]);
3426 char copr=(source[i]>>11)&0x1f;
3427 assert(s>=0);
3428 emit_writeword(s,(int)&readmem_dword);
3429 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
fca1aef2 3430#ifdef MUPEN64
57871462 3431 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3432 emit_movimm((source[i]>>11)&0x1f,1);
3433 emit_writeword(0,(int)&PC);
3434 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
7139f3c8 3435#endif
3436 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3437 emit_readword((int)&last_count,ECX);
3438 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3439 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3440 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3441 emit_writeword(HOST_CCREG,(int)&Count);
3442 }
3443 // What a mess. The status register (12) can enable interrupts,
3444 // so needs a special case to handle a pending interrupt.
3445 // The interrupt must be taken immediately, because a subsequent
3446 // instruction might disable interrupts again.
7139f3c8 3447 if(copr==12||copr==13) {
fca1aef2 3448#ifdef PCSX
3449 if (is_delayslot) {
3450 // burn cycles to cause cc_interrupt, which will
3451 // reschedule next_interupt. Relies on CCREG from above.
3452 assem_debug("MTC0 DS %d\n", copr);
3453 emit_writeword(HOST_CCREG,(int)&last_count);
3454 emit_movimm(0,HOST_CCREG);
3455 emit_storereg(CCREG,HOST_CCREG);
3456 emit_movimm(copr,0);
3457 emit_call((int)pcsx_mtc0_ds);
3458 return;
3459 }
3460#endif
57871462 3461 emit_movimm(start+i*4+4,0);
3462 emit_movimm(0,1);
3463 emit_writeword(0,(int)&pcaddr);
3464 emit_writeword(1,(int)&pending_exception);
3465 }
3466 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3467 //else
fca1aef2 3468#ifdef PCSX
3469 emit_movimm(copr,0);
3470 emit_call((int)pcsx_mtc0);
3471#else
57871462 3472 emit_call((int)MTC0);
fca1aef2 3473#endif
7139f3c8 3474 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3475 emit_readword((int)&Count,HOST_CCREG);
3476 emit_readword((int)&next_interupt,ECX);
3477 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3478 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3479 emit_writeword(ECX,(int)&last_count);
3480 emit_storereg(CCREG,HOST_CCREG);
3481 }
7139f3c8 3482 if(copr==12||copr==13) {
57871462 3483 assert(!is_delayslot);
3484 emit_readword((int)&pending_exception,14);
3485 }
3486 emit_loadreg(rs1[i],s);
3487 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3488 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
7139f3c8 3489 if(copr==12||copr==13) {
57871462 3490 emit_test(14,14);
3491 emit_jne((int)&do_interrupt);
3492 }
3493 cop1_usable=0;
3494 }
3495 else
3496 {
3497 assert(opcode2[i]==0x10);
3d624f89 3498#ifndef DISABLE_TLB
57871462 3499 if((source[i]&0x3f)==0x01) // TLBR
3500 emit_call((int)TLBR);
3501 if((source[i]&0x3f)==0x02) // TLBWI
3502 emit_call((int)TLBWI_new);
3503 if((source[i]&0x3f)==0x06) { // TLBWR
3504 // The TLB entry written by TLBWR is dependent on the count,
3505 // so update the cycle count
3506 emit_readword((int)&last_count,ECX);
3507 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3508 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3509 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3510 emit_writeword(HOST_CCREG,(int)&Count);
3511 emit_call((int)TLBWR_new);
3512 }
3513 if((source[i]&0x3f)==0x08) // TLBP
3514 emit_call((int)TLBP);
3d624f89 3515#endif
576bbd8f 3516#ifdef PCSX
3517 if((source[i]&0x3f)==0x10) // RFE
3518 {
3519 emit_readword((int)&Status,0);
3520 emit_andimm(0,0x3c,1);
3521 emit_andimm(0,~0xf,0);
3522 emit_orrshr_imm(1,2,0);
3523 emit_writeword(0,(int)&Status);
3524 }
3525#else
57871462 3526 if((source[i]&0x3f)==0x18) // ERET
3527 {
3528 int count=ccadj[i];
3529 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3530 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
3531 emit_jmp((int)jump_eret);
3532 }
576bbd8f 3533#endif
57871462 3534 }
3535}
3536
b9b61529 3537static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3538{
3539 switch (copr) {
3540 case 1:
3541 case 3:
3542 case 5:
3543 case 8:
3544 case 9:
3545 case 10:
3546 case 11:
3547 emit_readword((int)&reg_cop2d[copr],tl);
3548 emit_signextend16(tl,tl);
3549 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3550 break;
3551 case 7:
3552 case 16:
3553 case 17:
3554 case 18:
3555 case 19:
3556 emit_readword((int)&reg_cop2d[copr],tl);
3557 emit_andimm(tl,0xffff,tl);
3558 emit_writeword(tl,(int)&reg_cop2d[copr]);
3559 break;
3560 case 15:
3561 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3562 emit_writeword(tl,(int)&reg_cop2d[copr]);
3563 break;
3564 case 28:
b9b61529 3565 case 29:
3566 emit_readword((int)&reg_cop2d[9],temp);
3567 emit_testimm(temp,0x8000); // do we need this?
3568 emit_andimm(temp,0xf80,temp);
3569 emit_andne_imm(temp,0,temp);
f70d384d 3570 emit_shrimm(temp,7,tl);
b9b61529 3571 emit_readword((int)&reg_cop2d[10],temp);
3572 emit_testimm(temp,0x8000);
3573 emit_andimm(temp,0xf80,temp);
3574 emit_andne_imm(temp,0,temp);
f70d384d 3575 emit_orrshr_imm(temp,2,tl);
b9b61529 3576 emit_readword((int)&reg_cop2d[11],temp);
3577 emit_testimm(temp,0x8000);
3578 emit_andimm(temp,0xf80,temp);
3579 emit_andne_imm(temp,0,temp);
f70d384d 3580 emit_orrshl_imm(temp,3,tl);
b9b61529 3581 emit_writeword(tl,(int)&reg_cop2d[copr]);
3582 break;
3583 default:
3584 emit_readword((int)&reg_cop2d[copr],tl);
3585 break;
3586 }
3587}
3588
3589static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3590{
3591 switch (copr) {
3592 case 15:
3593 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3594 emit_writeword(sl,(int)&reg_cop2d[copr]);
3595 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3596 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3597 emit_writeword(sl,(int)&reg_cop2d[14]);
3598 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3599 break;
3600 case 28:
3601 emit_andimm(sl,0x001f,temp);
f70d384d 3602 emit_shlimm(temp,7,temp);
b9b61529 3603 emit_writeword(temp,(int)&reg_cop2d[9]);
3604 emit_andimm(sl,0x03e0,temp);
f70d384d 3605 emit_shlimm(temp,2,temp);
b9b61529 3606 emit_writeword(temp,(int)&reg_cop2d[10]);
3607 emit_andimm(sl,0x7c00,temp);
f70d384d 3608 emit_shrimm(temp,3,temp);
b9b61529 3609 emit_writeword(temp,(int)&reg_cop2d[11]);
3610 emit_writeword(sl,(int)&reg_cop2d[28]);
3611 break;
3612 case 30:
3613 emit_movs(sl,temp);
3614 emit_mvnmi(temp,temp);
3615 emit_clz(temp,temp);
3616 emit_writeword(sl,(int)&reg_cop2d[30]);
3617 emit_writeword(temp,(int)&reg_cop2d[31]);
3618 break;
b9b61529 3619 case 31:
3620 break;
3621 default:
3622 emit_writeword(sl,(int)&reg_cop2d[copr]);
3623 break;
3624 }
3625}
3626
3627void cop2_assemble(int i,struct regstat *i_regs)
3628{
3629 u_int copr=(source[i]>>11)&0x1f;
3630 signed char temp=get_reg(i_regs->regmap,-1);
3631 if (opcode2[i]==0) { // MFC2
3632 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3633 if(tl>=0&&rt1[i]!=0)
b9b61529 3634 cop2_get_dreg(copr,tl,temp);
3635 }
3636 else if (opcode2[i]==4) { // MTC2
3637 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3638 cop2_put_dreg(copr,sl,temp);
3639 }
3640 else if (opcode2[i]==2) // CFC2
3641 {
3642 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3643 if(tl>=0&&rt1[i]!=0)
b9b61529 3644 emit_readword((int)&reg_cop2c[copr],tl);
3645 }
3646 else if (opcode2[i]==6) // CTC2
3647 {
3648 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3649 switch(copr) {
3650 case 4:
3651 case 12:
3652 case 20:
3653 case 26:
3654 case 27:
3655 case 29:
3656 case 30:
3657 emit_signextend16(sl,temp);
3658 break;
3659 case 31:
3660 //value = value & 0x7ffff000;
3661 //if (value & 0x7f87e000) value |= 0x80000000;
3662 emit_shrimm(sl,12,temp);
3663 emit_shlimm(temp,12,temp);
3664 emit_testimm(temp,0x7f000000);
3665 emit_testeqimm(temp,0x00870000);
3666 emit_testeqimm(temp,0x0000e000);
3667 emit_orrne_imm(temp,0x80000000,temp);
3668 break;
3669 default:
3670 temp=sl;
3671 break;
3672 }
3673 emit_writeword(temp,(int)&reg_cop2c[copr]);
3674 assert(sl>=0);
3675 }
3676}
3677
3678void c2op_assemble(int i,struct regstat *i_regs)
3679{
3680 signed char temp=get_reg(i_regs->regmap,-1);
3681 u_int c2op=source[i]&0x3f;
3682 u_int hr,reglist=0;
3683 for(hr=0;hr<HOST_REGS;hr++) {
3684 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3685 }
3686 if(i==0||itype[i-1]!=C2OP)
3687 save_regs(reglist);
3688
3689 if (gte_handlers[c2op]!=NULL) {
3690 int cc=get_reg(i_regs->regmap,CCREG);
3691 emit_movimm(source[i],temp); // opcode
3692 if (cc>=0&&gte_cycletab[c2op])
3693 emit_addimm(cc,gte_cycletab[c2op]/2,cc); // XXX: cound just adjust ccadj?
3694 emit_writeword(temp,(int)&psxRegs.code);
3695 emit_call((int)gte_handlers[c2op]);
3696 }
3697
3698 if(i>=slen-1||itype[i+1]!=C2OP)
3699 restore_regs(reglist);
3700}
3701
3702void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 3703{
3704 // XXX: should just just do the exception instead
3705 if(!cop1_usable) {
3706 int jaddr=(int)out;
3707 emit_jmp(0);
3708 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3709 cop1_usable=1;
3710 }
3711}
3712
57871462 3713void cop1_assemble(int i,struct regstat *i_regs)
3714{
3d624f89 3715#ifndef DISABLE_COP1
57871462 3716 // Check cop1 unusable
3717 if(!cop1_usable) {
3718 signed char rs=get_reg(i_regs->regmap,CSREG);
3719 assert(rs>=0);
3720 emit_testimm(rs,0x20000000);
3721 int jaddr=(int)out;
3722 emit_jeq(0);
3723 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3724 cop1_usable=1;
3725 }
3726 if (opcode2[i]==0) { // MFC1
3727 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3728 if(tl>=0) {
3729 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
3730 emit_readword_indexed(0,tl,tl);
3731 }
3732 }
3733 else if (opcode2[i]==1) { // DMFC1
3734 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3735 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
3736 if(tl>=0) {
3737 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
3738 if(th>=0) emit_readword_indexed(4,tl,th);
3739 emit_readword_indexed(0,tl,tl);
3740 }
3741 }
3742 else if (opcode2[i]==4) { // MTC1
3743 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3744 signed char temp=get_reg(i_regs->regmap,-1);
3745 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3746 emit_writeword_indexed(sl,0,temp);
3747 }
3748 else if (opcode2[i]==5) { // DMTC1
3749 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3750 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
3751 signed char temp=get_reg(i_regs->regmap,-1);
3752 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3753 emit_writeword_indexed(sh,4,temp);
3754 emit_writeword_indexed(sl,0,temp);
3755 }
3756 else if (opcode2[i]==2) // CFC1
3757 {
3758 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3759 if(tl>=0) {
3760 u_int copr=(source[i]>>11)&0x1f;
3761 if(copr==0) emit_readword((int)&FCR0,tl);
3762 if(copr==31) emit_readword((int)&FCR31,tl);
3763 }
3764 }
3765 else if (opcode2[i]==6) // CTC1
3766 {
3767 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3768 u_int copr=(source[i]>>11)&0x1f;
3769 assert(sl>=0);
3770 if(copr==31)
3771 {
3772 emit_writeword(sl,(int)&FCR31);
3773 // Set the rounding mode
3774 //FIXME
3775 //char temp=get_reg(i_regs->regmap,-1);
3776 //emit_andimm(sl,3,temp);
3777 //emit_fldcw_indexed((int)&rounding_modes,temp);
3778 }
3779 }
3d624f89 3780#else
3781 cop1_unusable(i, i_regs);
3782#endif
57871462 3783}
3784
3785void fconv_assemble_arm(int i,struct regstat *i_regs)
3786{
3d624f89 3787#ifndef DISABLE_COP1
57871462 3788 signed char temp=get_reg(i_regs->regmap,-1);
3789 assert(temp>=0);
3790 // Check cop1 unusable
3791 if(!cop1_usable) {
3792 signed char rs=get_reg(i_regs->regmap,CSREG);
3793 assert(rs>=0);
3794 emit_testimm(rs,0x20000000);
3795 int jaddr=(int)out;
3796 emit_jeq(0);
3797 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3798 cop1_usable=1;
3799 }
3800
3801 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
3802 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
3803 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3804 emit_flds(temp,15);
3805 emit_ftosizs(15,15); // float->int, truncate
3806 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3807 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3808 emit_fsts(15,temp);
3809 return;
3810 }
3811 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
3812 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3813 emit_vldr(temp,7);
3814 emit_ftosizd(7,13); // double->int, truncate
3815 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3816 emit_fsts(13,temp);
3817 return;
3818 }
3819
3820 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
3821 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3822 emit_flds(temp,13);
3823 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3824 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3825 emit_fsitos(13,15);
3826 emit_fsts(15,temp);
3827 return;
3828 }
3829 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
3830 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3831 emit_flds(temp,13);
3832 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3833 emit_fsitod(13,7);
3834 emit_vstr(7,temp);
3835 return;
3836 }
3837
3838 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
3839 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3840 emit_flds(temp,13);
3841 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3842 emit_fcvtds(13,7);
3843 emit_vstr(7,temp);
3844 return;
3845 }
3846 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
3847 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3848 emit_vldr(temp,7);
3849 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3850 emit_fcvtsd(7,13);
3851 emit_fsts(13,temp);
3852 return;
3853 }
3854 #endif
3855
3856 // C emulation code
3857
3858 u_int hr,reglist=0;
3859 for(hr=0;hr<HOST_REGS;hr++) {
3860 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3861 }
3862 save_regs(reglist);
3863
3864 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
3865 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3866 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3867 emit_call((int)cvt_s_w);
3868 }
3869 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
3870 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3871 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3872 emit_call((int)cvt_d_w);
3873 }
3874 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
3875 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3876 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3877 emit_call((int)cvt_s_l);
3878 }
3879 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
3880 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3881 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3882 emit_call((int)cvt_d_l);
3883 }
3884
3885 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
3886 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3887 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3888 emit_call((int)cvt_d_s);
3889 }
3890 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
3891 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3892 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3893 emit_call((int)cvt_w_s);
3894 }
3895 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
3896 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3897 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3898 emit_call((int)cvt_l_s);
3899 }
3900
3901 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
3902 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3903 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3904 emit_call((int)cvt_s_d);
3905 }
3906 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
3907 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3908 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3909 emit_call((int)cvt_w_d);
3910 }
3911 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
3912 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3913 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3914 emit_call((int)cvt_l_d);
3915 }
3916
3917 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
3918 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3919 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3920 emit_call((int)round_l_s);
3921 }
3922 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
3923 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3924 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3925 emit_call((int)trunc_l_s);
3926 }
3927 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
3928 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3929 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3930 emit_call((int)ceil_l_s);
3931 }
3932 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
3933 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3934 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3935 emit_call((int)floor_l_s);
3936 }
3937 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
3938 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3939 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3940 emit_call((int)round_w_s);
3941 }
3942 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
3943 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3944 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3945 emit_call((int)trunc_w_s);
3946 }
3947 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
3948 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3949 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3950 emit_call((int)ceil_w_s);
3951 }
3952 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
3953 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3954 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3955 emit_call((int)floor_w_s);
3956 }
3957
3958 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
3959 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3960 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3961 emit_call((int)round_l_d);
3962 }
3963 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
3964 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3965 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3966 emit_call((int)trunc_l_d);
3967 }
3968 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
3969 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3970 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3971 emit_call((int)ceil_l_d);
3972 }
3973 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
3974 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3975 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3976 emit_call((int)floor_l_d);
3977 }
3978 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
3979 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3980 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3981 emit_call((int)round_w_d);
3982 }
3983 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
3984 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3985 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3986 emit_call((int)trunc_w_d);
3987 }
3988 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
3989 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3990 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3991 emit_call((int)ceil_w_d);
3992 }
3993 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
3994 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3995 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3996 emit_call((int)floor_w_d);
3997 }
3998
3999 restore_regs(reglist);
3d624f89 4000#else
4001 cop1_unusable(i, i_regs);
4002#endif
57871462 4003}
4004#define fconv_assemble fconv_assemble_arm
4005
4006void fcomp_assemble(int i,struct regstat *i_regs)
4007{
3d624f89 4008#ifndef DISABLE_COP1
57871462 4009 signed char fs=get_reg(i_regs->regmap,FSREG);
4010 signed char temp=get_reg(i_regs->regmap,-1);
4011 assert(temp>=0);
4012 // Check cop1 unusable
4013 if(!cop1_usable) {
4014 signed char cs=get_reg(i_regs->regmap,CSREG);
4015 assert(cs>=0);
4016 emit_testimm(cs,0x20000000);
4017 int jaddr=(int)out;
4018 emit_jeq(0);
4019 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4020 cop1_usable=1;
4021 }
4022
4023 if((source[i]&0x3f)==0x30) {
4024 emit_andimm(fs,~0x800000,fs);
4025 return;
4026 }
4027
4028 if((source[i]&0x3e)==0x38) {
4029 // sf/ngle - these should throw exceptions for NaNs
4030 emit_andimm(fs,~0x800000,fs);
4031 return;
4032 }
4033
4034 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4035 if(opcode2[i]==0x10) {
4036 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4037 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4038 emit_orimm(fs,0x800000,fs);
4039 emit_flds(temp,14);
4040 emit_flds(HOST_TEMPREG,15);
4041 emit_fcmps(14,15);
4042 emit_fmstat();
4043 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
4044 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
4045 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
4046 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
4047 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
4048 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
4049 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
4050 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
4051 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
4052 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
4053 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
4054 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
4055 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
4056 return;
4057 }
4058 if(opcode2[i]==0x11) {
4059 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4060 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4061 emit_orimm(fs,0x800000,fs);
4062 emit_vldr(temp,6);
4063 emit_vldr(HOST_TEMPREG,7);
4064 emit_fcmpd(6,7);
4065 emit_fmstat();
4066 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
4067 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
4068 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
4069 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
4070 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
4071 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
4072 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
4073 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
4074 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
4075 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
4076 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
4077 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
4078 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
4079 return;
4080 }
4081 #endif
4082
4083 // C only
4084
4085 u_int hr,reglist=0;
4086 for(hr=0;hr<HOST_REGS;hr++) {
4087 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4088 }
4089 reglist&=~(1<<fs);
4090 save_regs(reglist);
4091 if(opcode2[i]==0x10) {
4092 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4093 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4094 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
4095 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
4096 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
4097 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
4098 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
4099 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
4100 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
4101 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
4102 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
4103 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
4104 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
4105 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
4106 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
4107 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
4108 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
4109 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
4110 }
4111 if(opcode2[i]==0x11) {
4112 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4113 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4114 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
4115 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
4116 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
4117 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
4118 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
4119 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
4120 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
4121 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
4122 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
4123 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
4124 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
4125 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
4126 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
4127 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
4128 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
4129 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
4130 }
4131 restore_regs(reglist);
4132 emit_loadreg(FSREG,fs);
3d624f89 4133#else
4134 cop1_unusable(i, i_regs);
4135#endif
57871462 4136}
4137
4138void float_assemble(int i,struct regstat *i_regs)
4139{
3d624f89 4140#ifndef DISABLE_COP1
57871462 4141 signed char temp=get_reg(i_regs->regmap,-1);
4142 assert(temp>=0);
4143 // Check cop1 unusable
4144 if(!cop1_usable) {
4145 signed char cs=get_reg(i_regs->regmap,CSREG);
4146 assert(cs>=0);
4147 emit_testimm(cs,0x20000000);
4148 int jaddr=(int)out;
4149 emit_jeq(0);
4150 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4151 cop1_usable=1;
4152 }
4153
4154 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4155 if((source[i]&0x3f)==6) // mov
4156 {
4157 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4158 if(opcode2[i]==0x10) {
4159 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4160 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
4161 emit_readword_indexed(0,temp,temp);
4162 emit_writeword_indexed(temp,0,HOST_TEMPREG);
4163 }
4164 if(opcode2[i]==0x11) {
4165 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4166 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
4167 emit_vldr(temp,7);
4168 emit_vstr(7,HOST_TEMPREG);
4169 }
4170 }
4171 return;
4172 }
4173
4174 if((source[i]&0x3f)>3)
4175 {
4176 if(opcode2[i]==0x10) {
4177 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4178 emit_flds(temp,15);
4179 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4180 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4181 }
4182 if((source[i]&0x3f)==4) // sqrt
4183 emit_fsqrts(15,15);
4184 if((source[i]&0x3f)==5) // abs
4185 emit_fabss(15,15);
4186 if((source[i]&0x3f)==7) // neg
4187 emit_fnegs(15,15);
4188 emit_fsts(15,temp);
4189 }
4190 if(opcode2[i]==0x11) {
4191 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4192 emit_vldr(temp,7);
4193 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4194 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4195 }
4196 if((source[i]&0x3f)==4) // sqrt
4197 emit_fsqrtd(7,7);
4198 if((source[i]&0x3f)==5) // abs
4199 emit_fabsd(7,7);
4200 if((source[i]&0x3f)==7) // neg
4201 emit_fnegd(7,7);
4202 emit_vstr(7,temp);
4203 }
4204 return;
4205 }
4206 if((source[i]&0x3f)<4)
4207 {
4208 if(opcode2[i]==0x10) {
4209 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4210 }
4211 if(opcode2[i]==0x11) {
4212 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4213 }
4214 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
4215 if(opcode2[i]==0x10) {
4216 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4217 emit_flds(temp,15);
4218 emit_flds(HOST_TEMPREG,13);
4219 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4220 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4221 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4222 }
4223 }
4224 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
4225 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
4226 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
4227 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
4228 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4229 emit_fsts(15,HOST_TEMPREG);
4230 }else{
4231 emit_fsts(15,temp);
4232 }
4233 }
4234 else if(opcode2[i]==0x11) {
4235 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4236 emit_vldr(temp,7);
4237 emit_vldr(HOST_TEMPREG,6);
4238 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4239 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4240 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4241 }
4242 }
4243 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
4244 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
4245 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
4246 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
4247 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4248 emit_vstr(7,HOST_TEMPREG);
4249 }else{
4250 emit_vstr(7,temp);
4251 }
4252 }
4253 }
4254 else {
4255 if(opcode2[i]==0x10) {
4256 emit_flds(temp,15);
4257 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4258 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4259 }
4260 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
4261 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
4262 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
4263 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
4264 emit_fsts(15,temp);
4265 }
4266 else if(opcode2[i]==0x11) {
4267 emit_vldr(temp,7);
4268 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4269 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4270 }
4271 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
4272 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
4273 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
4274 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
4275 emit_vstr(7,temp);
4276 }
4277 }
4278 return;
4279 }
4280 #endif
4281
4282 u_int hr,reglist=0;
4283 for(hr=0;hr<HOST_REGS;hr++) {
4284 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4285 }
4286 if(opcode2[i]==0x10) { // Single precision
4287 save_regs(reglist);
4288 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4289 if((source[i]&0x3f)<4) {
4290 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4291 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
4292 }else{
4293 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4294 }
4295 switch(source[i]&0x3f)
4296 {
4297 case 0x00: emit_call((int)add_s);break;
4298 case 0x01: emit_call((int)sub_s);break;
4299 case 0x02: emit_call((int)mul_s);break;
4300 case 0x03: emit_call((int)div_s);break;
4301 case 0x04: emit_call((int)sqrt_s);break;
4302 case 0x05: emit_call((int)abs_s);break;
4303 case 0x06: emit_call((int)mov_s);break;
4304 case 0x07: emit_call((int)neg_s);break;
4305 }
4306 restore_regs(reglist);
4307 }
4308 if(opcode2[i]==0x11) { // Double precision
4309 save_regs(reglist);
4310 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4311 if((source[i]&0x3f)<4) {
4312 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4313 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
4314 }else{
4315 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4316 }
4317 switch(source[i]&0x3f)
4318 {
4319 case 0x00: emit_call((int)add_d);break;
4320 case 0x01: emit_call((int)sub_d);break;
4321 case 0x02: emit_call((int)mul_d);break;
4322 case 0x03: emit_call((int)div_d);break;
4323 case 0x04: emit_call((int)sqrt_d);break;
4324 case 0x05: emit_call((int)abs_d);break;
4325 case 0x06: emit_call((int)mov_d);break;
4326 case 0x07: emit_call((int)neg_d);break;
4327 }
4328 restore_regs(reglist);
4329 }
3d624f89 4330#else
4331 cop1_unusable(i, i_regs);
4332#endif
57871462 4333}
4334
4335void multdiv_assemble_arm(int i,struct regstat *i_regs)
4336{
4337 // case 0x18: MULT
4338 // case 0x19: MULTU
4339 // case 0x1A: DIV
4340 // case 0x1B: DIVU
4341 // case 0x1C: DMULT
4342 // case 0x1D: DMULTU
4343 // case 0x1E: DDIV
4344 // case 0x1F: DDIVU
4345 if(rs1[i]&&rs2[i])
4346 {
4347 if((opcode2[i]&4)==0) // 32-bit
4348 {
4349 if(opcode2[i]==0x18) // MULT
4350 {
4351 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4352 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4353 signed char hi=get_reg(i_regs->regmap,HIREG);
4354 signed char lo=get_reg(i_regs->regmap,LOREG);
4355 assert(m1>=0);
4356 assert(m2>=0);
4357 assert(hi>=0);
4358 assert(lo>=0);
4359 emit_smull(m1,m2,hi,lo);
4360 }
4361 if(opcode2[i]==0x19) // MULTU
4362 {
4363 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4364 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4365 signed char hi=get_reg(i_regs->regmap,HIREG);
4366 signed char lo=get_reg(i_regs->regmap,LOREG);
4367 assert(m1>=0);
4368 assert(m2>=0);
4369 assert(hi>=0);
4370 assert(lo>=0);
4371 emit_umull(m1,m2,hi,lo);
4372 }
4373 if(opcode2[i]==0x1A) // DIV
4374 {
4375 signed char d1=get_reg(i_regs->regmap,rs1[i]);
4376 signed char d2=get_reg(i_regs->regmap,rs2[i]);
4377 assert(d1>=0);
4378 assert(d2>=0);
4379 signed char quotient=get_reg(i_regs->regmap,LOREG);
4380 signed char remainder=get_reg(i_regs->regmap,HIREG);
4381 assert(quotient>=0);
4382 assert(remainder>=0);
4383 emit_movs(d1,remainder);
4384 emit_negmi(remainder,remainder);
4385 emit_movs(d2,HOST_TEMPREG);
4386 emit_jeq((int)out+52); // Division by zero
4387 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
4388 emit_clz(HOST_TEMPREG,quotient);
4389 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
4390 emit_orimm(quotient,1<<31,quotient);
4391 emit_shr(quotient,quotient,quotient);
4392 emit_cmp(remainder,HOST_TEMPREG);
4393 emit_subcs(remainder,HOST_TEMPREG,remainder);
4394 emit_adcs(quotient,quotient,quotient);
4395 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
4396 emit_jcc((int)out-16); // -4
4397 emit_teq(d1,d2);
4398 emit_negmi(quotient,quotient);
4399 emit_test(d1,d1);
4400 emit_negmi(remainder,remainder);
4401 }
4402 if(opcode2[i]==0x1B) // DIVU
4403 {
4404 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
4405 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
4406 assert(d1>=0);
4407 assert(d2>=0);
4408 signed char quotient=get_reg(i_regs->regmap,LOREG);
4409 signed char remainder=get_reg(i_regs->regmap,HIREG);
4410 assert(quotient>=0);
4411 assert(remainder>=0);
4412 emit_test(d2,d2);
4413 emit_jeq((int)out+44); // Division by zero
4414 emit_clz(d2,HOST_TEMPREG);
4415 emit_movimm(1<<31,quotient);
4416 emit_shl(d2,HOST_TEMPREG,d2);
4417 emit_mov(d1,remainder);
4418 emit_shr(quotient,HOST_TEMPREG,quotient);
4419 emit_cmp(remainder,d2);
4420 emit_subcs(remainder,d2,remainder);
4421 emit_adcs(quotient,quotient,quotient);
4422 emit_shrcc_imm(d2,1,d2);
4423 emit_jcc((int)out-16); // -4
4424 }
4425 }
4426 else // 64-bit
4427 {
4428 if(opcode2[i]==0x1C) // DMULT
4429 {
4430 assert(opcode2[i]!=0x1C);
4431 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4432 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4433 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4434 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4435 assert(m1h>=0);
4436 assert(m2h>=0);
4437 assert(m1l>=0);
4438 assert(m2l>=0);
4439 emit_pushreg(m2h);
4440 emit_pushreg(m2l);
4441 emit_pushreg(m1h);
4442 emit_pushreg(m1l);
4443 emit_call((int)&mult64);
4444 emit_popreg(m1l);
4445 emit_popreg(m1h);
4446 emit_popreg(m2l);
4447 emit_popreg(m2h);
4448 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4449 signed char hil=get_reg(i_regs->regmap,HIREG);
4450 if(hih>=0) emit_loadreg(HIREG|64,hih);
4451 if(hil>=0) emit_loadreg(HIREG,hil);
4452 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4453 signed char lol=get_reg(i_regs->regmap,LOREG);
4454 if(loh>=0) emit_loadreg(LOREG|64,loh);
4455 if(lol>=0) emit_loadreg(LOREG,lol);
4456 }
4457 if(opcode2[i]==0x1D) // DMULTU
4458 {
4459 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4460 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4461 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4462 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4463 assert(m1h>=0);
4464 assert(m2h>=0);
4465 assert(m1l>=0);
4466 assert(m2l>=0);
4467 save_regs(0x100f);
4468 if(m1l!=0) emit_mov(m1l,0);
4469 if(m1h==0) emit_readword((int)&dynarec_local,1);
4470 else if(m1h>1) emit_mov(m1h,1);
4471 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
4472 else if(m2l>2) emit_mov(m2l,2);
4473 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
4474 else if(m2h>3) emit_mov(m2h,3);
4475 emit_call((int)&multu64);
4476 restore_regs(0x100f);
4477 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4478 signed char hil=get_reg(i_regs->regmap,HIREG);
4479 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4480 signed char lol=get_reg(i_regs->regmap,LOREG);
4481 /*signed char temp=get_reg(i_regs->regmap,-1);
4482 signed char rh=get_reg(i_regs->regmap,HIREG|64);
4483 signed char rl=get_reg(i_regs->regmap,HIREG);
4484 assert(m1h>=0);
4485 assert(m2h>=0);
4486 assert(m1l>=0);
4487 assert(m2l>=0);
4488 assert(temp>=0);
4489 //emit_mov(m1l,EAX);
4490 //emit_mul(m2l);
4491 emit_umull(rl,rh,m1l,m2l);
4492 emit_storereg(LOREG,rl);
4493 emit_mov(rh,temp);
4494 //emit_mov(m1h,EAX);
4495 //emit_mul(m2l);
4496 emit_umull(rl,rh,m1h,m2l);
4497 emit_adds(rl,temp,temp);
4498 emit_adcimm(rh,0,rh);
4499 emit_storereg(HIREG,rh);
4500 //emit_mov(m2h,EAX);
4501 //emit_mul(m1l);
4502 emit_umull(rl,rh,m1l,m2h);
4503 emit_adds(rl,temp,temp);
4504 emit_adcimm(rh,0,rh);
4505 emit_storereg(LOREG|64,temp);
4506 emit_mov(rh,temp);
4507 //emit_mov(m2h,EAX);
4508 //emit_mul(m1h);
4509 emit_umull(rl,rh,m1h,m2h);
4510 emit_adds(rl,temp,rl);
4511 emit_loadreg(HIREG,temp);
4512 emit_adcimm(rh,0,rh);
4513 emit_adds(rl,temp,rl);
4514 emit_adcimm(rh,0,rh);
4515 // DEBUG
4516 /*
4517 emit_pushreg(m2h);
4518 emit_pushreg(m2l);
4519 emit_pushreg(m1h);
4520 emit_pushreg(m1l);
4521 emit_call((int)&multu64);
4522 emit_popreg(m1l);
4523 emit_popreg(m1h);
4524 emit_popreg(m2l);
4525 emit_popreg(m2h);
4526 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4527 signed char hil=get_reg(i_regs->regmap,HIREG);
4528 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
4529 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
4530 */
4531 // Shouldn't be necessary
4532 //char loh=get_reg(i_regs->regmap,LOREG|64);
4533 //char lol=get_reg(i_regs->regmap,LOREG);
4534 //if(loh>=0) emit_loadreg(LOREG|64,loh);
4535 //if(lol>=0) emit_loadreg(LOREG,lol);
4536 }
4537 if(opcode2[i]==0x1E) // DDIV
4538 {
4539 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4540 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4541 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4542 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4543 assert(d1h>=0);
4544 assert(d2h>=0);
4545 assert(d1l>=0);
4546 assert(d2l>=0);
4547 save_regs(0x100f);
4548 if(d1l!=0) emit_mov(d1l,0);
4549 if(d1h==0) emit_readword((int)&dynarec_local,1);
4550 else if(d1h>1) emit_mov(d1h,1);
4551 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4552 else if(d2l>2) emit_mov(d2l,2);
4553 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4554 else if(d2h>3) emit_mov(d2h,3);
4555 emit_call((int)&div64);
4556 restore_regs(0x100f);
4557 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4558 signed char hil=get_reg(i_regs->regmap,HIREG);
4559 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4560 signed char lol=get_reg(i_regs->regmap,LOREG);
4561 if(hih>=0) emit_loadreg(HIREG|64,hih);
4562 if(hil>=0) emit_loadreg(HIREG,hil);
4563 if(loh>=0) emit_loadreg(LOREG|64,loh);
4564 if(lol>=0) emit_loadreg(LOREG,lol);
4565 }
4566 if(opcode2[i]==0x1F) // DDIVU
4567 {
4568 //u_int hr,reglist=0;
4569 //for(hr=0;hr<HOST_REGS;hr++) {
4570 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
4571 //}
4572 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4573 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4574 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4575 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4576 assert(d1h>=0);
4577 assert(d2h>=0);
4578 assert(d1l>=0);
4579 assert(d2l>=0);
4580 save_regs(0x100f);
4581 if(d1l!=0) emit_mov(d1l,0);
4582 if(d1h==0) emit_readword((int)&dynarec_local,1);
4583 else if(d1h>1) emit_mov(d1h,1);
4584 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4585 else if(d2l>2) emit_mov(d2l,2);
4586 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4587 else if(d2h>3) emit_mov(d2h,3);
4588 emit_call((int)&divu64);
4589 restore_regs(0x100f);
4590 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4591 signed char hil=get_reg(i_regs->regmap,HIREG);
4592 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4593 signed char lol=get_reg(i_regs->regmap,LOREG);
4594 if(hih>=0) emit_loadreg(HIREG|64,hih);
4595 if(hil>=0) emit_loadreg(HIREG,hil);
4596 if(loh>=0) emit_loadreg(LOREG|64,loh);
4597 if(lol>=0) emit_loadreg(LOREG,lol);
4598 }
4599 }
4600 }
4601 else
4602 {
4603 // Multiply by zero is zero.
4604 // MIPS does not have a divide by zero exception.
4605 // The result is undefined, we return zero.
4606 signed char hr=get_reg(i_regs->regmap,HIREG);
4607 signed char lr=get_reg(i_regs->regmap,LOREG);
4608 if(hr>=0) emit_zeroreg(hr);
4609 if(lr>=0) emit_zeroreg(lr);
4610 }
4611}
4612#define multdiv_assemble multdiv_assemble_arm
4613
4614void do_preload_rhash(int r) {
4615 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
4616 // register. On ARM the hash can be done with a single instruction (below)
4617}
4618
4619void do_preload_rhtbl(int ht) {
4620 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
4621}
4622
4623void do_rhash(int rs,int rh) {
4624 emit_andimm(rs,0xf8,rh);
4625}
4626
4627void do_miniht_load(int ht,int rh) {
4628 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
4629 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
4630}
4631
4632void do_miniht_jump(int rs,int rh,int ht) {
4633 emit_cmp(rh,rs);
4634 emit_ldreq_indexed(ht,4,15);
4635 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4636 emit_mov(rs,7);
4637 emit_jmp(jump_vaddr_reg[7]);
4638 #else
4639 emit_jmp(jump_vaddr_reg[rs]);
4640 #endif
4641}
4642
4643void do_miniht_insert(u_int return_address,int rt,int temp) {
4644 #ifdef ARMv5_ONLY
4645 emit_movimm(return_address,rt); // PC into link register
4646 add_to_linker((int)out,return_address,1);
4647 emit_pcreladdr(temp);
4648 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4649 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4650 #else
4651 emit_movw(return_address&0x0000FFFF,rt);
4652 add_to_linker((int)out,return_address,1);
4653 emit_pcreladdr(temp);
4654 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4655 emit_movt(return_address&0xFFFF0000,rt);
4656 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4657 #endif
4658}
4659
4660// Sign-extend to 64 bits and write out upper half of a register
4661// This is useful where we have a 32-bit value in a register, and want to
4662// keep it in a 32-bit register, but can't guarantee that it won't be read
4663// as a 64-bit value later.
4664void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
4665{
24385cae 4666#ifndef FORCE32
57871462 4667 if(is32_pre==is32) return;
4668 int hr,reg;
4669 for(hr=0;hr<HOST_REGS;hr++) {
4670 if(hr!=EXCLUDE_REG) {
4671 //if(pre[hr]==entry[hr]) {
4672 if((reg=pre[hr])>=0) {
4673 if((dirty>>hr)&1) {
4674 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
4675 emit_sarimm(hr,31,HOST_TEMPREG);
4676 emit_storereg(reg|64,HOST_TEMPREG);
4677 }
4678 }
4679 }
4680 //}
4681 }
4682 }
24385cae 4683#endif
57871462 4684}
4685
4686void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
4687{
4688 //if(dirty_pre==dirty) return;
4689 int hr,reg,new_hr;
4690 for(hr=0;hr<HOST_REGS;hr++) {
4691 if(hr!=EXCLUDE_REG) {
4692 reg=pre[hr];
4693 if(((~u)>>(reg&63))&1) {
4694 if(reg==entry[hr]||(reg>0&&entry[hr]<0)) {
4695 if(((dirty_pre&~dirty)>>hr)&1) {
4696 if(reg>0&&reg<34) {
4697 emit_storereg(reg,hr);
4698 if( ((is32_pre&~uu)>>reg)&1 ) {
4699 emit_sarimm(hr,31,HOST_TEMPREG);
4700 emit_storereg(reg|64,HOST_TEMPREG);
4701 }
4702 }
4703 else if(reg>=64) {
4704 emit_storereg(reg,hr);
4705 }
4706 }
4707 }
4708 else // Check if register moved to a different register
4709 if((new_hr=get_reg(entry,reg))>=0) {
4710 if((dirty_pre>>hr)&(~dirty>>new_hr)&1) {
4711 if(reg>0&&reg<34) {
4712 emit_storereg(reg,hr);
4713 if( ((is32_pre&~uu)>>reg)&1 ) {
4714 emit_sarimm(hr,31,HOST_TEMPREG);
4715 emit_storereg(reg|64,HOST_TEMPREG);
4716 }
4717 }
4718 else if(reg>=64) {
4719 emit_storereg(reg,hr);
4720 }
4721 }
4722 }
4723 }
4724 }
4725 }
4726}
4727
4728
4729/* using strd could possibly help but you'd have to allocate registers in pairs
4730void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
4731{
4732 int hr;
4733 int wrote=-1;
4734 for(hr=HOST_REGS-1;hr>=0;hr--) {
4735 if(hr!=EXCLUDE_REG) {
4736 if(pre[hr]!=entry[hr]) {
4737 if(pre[hr]>=0) {
4738 if((dirty>>hr)&1) {
4739 if(get_reg(entry,pre[hr])<0) {
4740 if(pre[hr]<64) {
4741 if(!((u>>pre[hr])&1)) {
4742 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4743 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4744 emit_sarimm(hr,31,hr+1);
4745 emit_strdreg(pre[hr],hr);
4746 }
4747 else
4748 emit_storereg(pre[hr],hr);
4749 }else{
4750 emit_storereg(pre[hr],hr);
4751 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4752 emit_sarimm(hr,31,hr);
4753 emit_storereg(pre[hr]|64,hr);
4754 }
4755 }
4756 }
4757 }else{
4758 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4759 emit_storereg(pre[hr],hr);
4760 }
4761 }
4762 wrote=hr;
4763 }
4764 }
4765 }
4766 }
4767 }
4768 }
4769 for(hr=0;hr<HOST_REGS;hr++) {
4770 if(hr!=EXCLUDE_REG) {
4771 if(pre[hr]!=entry[hr]) {
4772 if(pre[hr]>=0) {
4773 int nr;
4774 if((nr=get_reg(entry,pre[hr]))>=0) {
4775 emit_mov(hr,nr);
4776 }
4777 }
4778 }
4779 }
4780 }
4781}
4782#define wb_invalidate wb_invalidate_arm
4783*/
4784
4785// CPU-architecture-specific initialization
4786void arch_init() {
3d624f89 4787#ifndef DISABLE_COP1
57871462 4788 rounding_modes[0]=0x0<<22; // round
4789 rounding_modes[1]=0x3<<22; // trunc
4790 rounding_modes[2]=0x1<<22; // ceil
4791 rounding_modes[3]=0x2<<22; // floor
3d624f89 4792#endif
57871462 4793}
b9b61529 4794
4795// vim:shiftwidth=2:expandtab