drc: merge Ari64's patch: 05_dont_write_r0
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_arm.c *
3 * Copyright (C) 2009-2010 Ari64 *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21extern int cycle_count;
22extern int last_count;
23extern int pcaddr;
24extern int pending_exception;
25extern int branch_target;
26extern uint64_t readmem_dword;
3d624f89 27#ifdef MUPEN64
57871462 28extern precomp_instr fake_pc;
3d624f89 29#endif
57871462 30extern void *dynarec_local;
31extern u_int memory_map[1048576];
32extern u_int mini_ht[32][2];
33extern u_int rounding_modes[4];
34
35void indirect_jump_indexed();
36void indirect_jump();
37void do_interrupt();
38void jump_vaddr_r0();
39void jump_vaddr_r1();
40void jump_vaddr_r2();
41void jump_vaddr_r3();
42void jump_vaddr_r4();
43void jump_vaddr_r5();
44void jump_vaddr_r6();
45void jump_vaddr_r7();
46void jump_vaddr_r8();
47void jump_vaddr_r9();
48void jump_vaddr_r10();
49void jump_vaddr_r12();
50
51const u_int jump_vaddr_reg[16] = {
52 (int)jump_vaddr_r0,
53 (int)jump_vaddr_r1,
54 (int)jump_vaddr_r2,
55 (int)jump_vaddr_r3,
56 (int)jump_vaddr_r4,
57 (int)jump_vaddr_r5,
58 (int)jump_vaddr_r6,
59 (int)jump_vaddr_r7,
60 (int)jump_vaddr_r8,
61 (int)jump_vaddr_r9,
62 (int)jump_vaddr_r10,
63 0,
64 (int)jump_vaddr_r12,
65 0,
66 0,
67 0};
68
69#include "fpu.h"
70
71/* Linker */
72
73void set_jump_target(int addr,u_int target)
74{
75 u_char *ptr=(u_char *)addr;
76 u_int *ptr2=(u_int *)ptr;
77 if(ptr[3]==0xe2) {
78 assert((target-(u_int)ptr2-8)<1024);
79 assert((addr&3)==0);
80 assert((target&3)==0);
81 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
82 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
83 }
84 else if(ptr[3]==0x72) {
85 // generated by emit_jno_unlikely
86 if((target-(u_int)ptr2-8)<1024) {
87 assert((addr&3)==0);
88 assert((target&3)==0);
89 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
90 }
91 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
92 assert((addr&3)==0);
93 assert((target&3)==0);
94 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
95 }
96 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
97 }
98 else {
99 assert((ptr[3]&0x0e)==0xa);
100 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
101 }
102}
103
104// This optionally copies the instruction from the target of the branch into
105// the space before the branch. Works, but the difference in speed is
106// usually insignificant.
107void set_jump_target_fillslot(int addr,u_int target,int copy)
108{
109 u_char *ptr=(u_char *)addr;
110 u_int *ptr2=(u_int *)ptr;
111 assert(!copy||ptr2[-1]==0xe28dd000);
112 if(ptr[3]==0xe2) {
113 assert(!copy);
114 assert((target-(u_int)ptr2-8)<4096);
115 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
116 }
117 else {
118 assert((ptr[3]&0x0e)==0xa);
119 u_int target_insn=*(u_int *)target;
120 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
121 copy=0;
122 }
123 if((target_insn&0x0c100000)==0x04100000) { // Load
124 copy=0;
125 }
126 if(target_insn&0x08000000) {
127 copy=0;
128 }
129 if(copy) {
130 ptr2[-1]=target_insn;
131 target+=4;
132 }
133 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
134 }
135}
136
137/* Literal pool */
138add_literal(int addr,int val)
139{
140 literals[literalcount][0]=addr;
141 literals[literalcount][1]=val;
142 literalcount++;
143}
144
f76eeef9 145void *kill_pointer(void *stub)
57871462 146{
147 int *ptr=(int *)(stub+4);
148 assert((*ptr&0x0ff00000)==0x05900000);
149 u_int offset=*ptr&0xfff;
150 int **l_ptr=(void *)ptr+offset+8;
151 int *i_ptr=*l_ptr;
152 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 153 return i_ptr;
57871462 154}
155
156int get_pointer(void *stub)
157{
158 //printf("get_pointer(%x)\n",(int)stub);
159 int *ptr=(int *)(stub+4);
160 assert((*ptr&0x0ff00000)==0x05900000);
161 u_int offset=*ptr&0xfff;
162 int **l_ptr=(void *)ptr+offset+8;
163 int *i_ptr=*l_ptr;
164 assert((*i_ptr&0x0f000000)==0x0a000000);
165 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
166}
167
168// Find the "clean" entry point from a "dirty" entry point
169// by skipping past the call to verify_code
170u_int get_clean_addr(int addr)
171{
172 int *ptr=(int *)addr;
173 #ifdef ARMv5_ONLY
174 ptr+=4;
175 #else
176 ptr+=6;
177 #endif
178 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
179 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
180 ptr++;
181 if((*ptr&0xFF000000)==0xea000000) {
182 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
183 }
184 return (u_int)ptr;
185}
186
187int verify_dirty(int addr)
188{
189 u_int *ptr=(u_int *)addr;
190 #ifdef ARMv5_ONLY
191 // get from literal pool
192 assert((*ptr&0xFFF00000)==0xe5900000);
193 u_int offset=*ptr&0xfff;
194 u_int *l_ptr=(void *)ptr+offset+8;
195 u_int source=l_ptr[0];
196 u_int copy=l_ptr[1];
197 u_int len=l_ptr[2];
198 ptr+=4;
199 #else
200 // ARMv7 movw/movt
201 assert((*ptr&0xFFF00000)==0xe3000000);
202 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
203 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
204 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
205 ptr+=6;
206 #endif
207 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
208 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 209 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 210 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
211 unsigned int page=source>>12;
212 unsigned int map_value=memory_map[page];
213 if(map_value>=0x80000000) return 0;
214 while(page<((source+len-1)>>12)) {
215 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
216 }
217 source = source+(map_value<<2);
218 }
219 //printf("verify_dirty: %x %x %x\n",source,copy,len);
220 return !memcmp((void *)source,(void *)copy,len);
221}
222
223// This doesn't necessarily find all clean entry points, just
224// guarantees that it's not dirty
225int isclean(int addr)
226{
227 #ifdef ARMv5_ONLY
228 int *ptr=((u_int *)addr)+4;
229 #else
230 int *ptr=((u_int *)addr)+6;
231 #endif
232 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
233 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
234 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
235 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
236 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
237 return 1;
238}
239
240void get_bounds(int addr,u_int *start,u_int *end)
241{
242 u_int *ptr=(u_int *)addr;
243 #ifdef ARMv5_ONLY
244 // get from literal pool
245 assert((*ptr&0xFFF00000)==0xe5900000);
246 u_int offset=*ptr&0xfff;
247 u_int *l_ptr=(void *)ptr+offset+8;
248 u_int source=l_ptr[0];
249 //u_int copy=l_ptr[1];
250 u_int len=l_ptr[2];
251 ptr+=4;
252 #else
253 // ARMv7 movw/movt
254 assert((*ptr&0xFFF00000)==0xe3000000);
255 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
256 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
257 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
258 ptr+=6;
259 #endif
260 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
261 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 262 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 263 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
264 if(memory_map[source>>12]>=0x80000000) source = 0;
265 else source = source+(memory_map[source>>12]<<2);
266 }
267 *start=source;
268 *end=source+len;
269}
270
271/* Register allocation */
272
273// Note: registers are allocated clean (unmodified state)
274// if you intend to modify the register, you must call dirty_reg().
275void alloc_reg(struct regstat *cur,int i,signed char reg)
276{
277 int r,hr;
278 int preferred_reg = (reg&7);
279 if(reg==CCREG) preferred_reg=HOST_CCREG;
280 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
281
282 // Don't allocate unused registers
283 if((cur->u>>reg)&1) return;
284
285 // see if it's already allocated
286 for(hr=0;hr<HOST_REGS;hr++)
287 {
288 if(cur->regmap[hr]==reg) return;
289 }
290
291 // Keep the same mapping if the register was already allocated in a loop
292 preferred_reg = loop_reg(i,reg,preferred_reg);
293
294 // Try to allocate the preferred register
295 if(cur->regmap[preferred_reg]==-1) {
296 cur->regmap[preferred_reg]=reg;
297 cur->dirty&=~(1<<preferred_reg);
298 cur->isconst&=~(1<<preferred_reg);
299 return;
300 }
301 r=cur->regmap[preferred_reg];
302 if(r<64&&((cur->u>>r)&1)) {
303 cur->regmap[preferred_reg]=reg;
304 cur->dirty&=~(1<<preferred_reg);
305 cur->isconst&=~(1<<preferred_reg);
306 return;
307 }
308 if(r>=64&&((cur->uu>>(r&63))&1)) {
309 cur->regmap[preferred_reg]=reg;
310 cur->dirty&=~(1<<preferred_reg);
311 cur->isconst&=~(1<<preferred_reg);
312 return;
313 }
314
315 // Clear any unneeded registers
316 // We try to keep the mapping consistent, if possible, because it
317 // makes branches easier (especially loops). So we try to allocate
318 // first (see above) before removing old mappings. If this is not
319 // possible then go ahead and clear out the registers that are no
320 // longer needed.
321 for(hr=0;hr<HOST_REGS;hr++)
322 {
323 r=cur->regmap[hr];
324 if(r>=0) {
325 if(r<64) {
326 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
327 }
328 else
329 {
330 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
331 }
332 }
333 }
334 // Try to allocate any available register, but prefer
335 // registers that have not been used recently.
336 if(i>0) {
337 for(hr=0;hr<HOST_REGS;hr++) {
338 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
339 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
340 cur->regmap[hr]=reg;
341 cur->dirty&=~(1<<hr);
342 cur->isconst&=~(1<<hr);
343 return;
344 }
345 }
346 }
347 }
348 // Try to allocate any available register
349 for(hr=0;hr<HOST_REGS;hr++) {
350 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
351 cur->regmap[hr]=reg;
352 cur->dirty&=~(1<<hr);
353 cur->isconst&=~(1<<hr);
354 return;
355 }
356 }
357
358 // Ok, now we have to evict someone
359 // Pick a register we hopefully won't need soon
360 u_char hsn[MAXREG+1];
361 memset(hsn,10,sizeof(hsn));
362 int j;
363 lsn(hsn,i,&preferred_reg);
364 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
365 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
366 if(i>0) {
367 // Don't evict the cycle count at entry points, otherwise the entry
368 // stub will have to write it.
369 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
370 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
371 for(j=10;j>=3;j--)
372 {
373 // Alloc preferred register if available
374 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
375 for(hr=0;hr<HOST_REGS;hr++) {
376 // Evict both parts of a 64-bit register
377 if((cur->regmap[hr]&63)==r) {
378 cur->regmap[hr]=-1;
379 cur->dirty&=~(1<<hr);
380 cur->isconst&=~(1<<hr);
381 }
382 }
383 cur->regmap[preferred_reg]=reg;
384 return;
385 }
386 for(r=1;r<=MAXREG;r++)
387 {
388 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
389 for(hr=0;hr<HOST_REGS;hr++) {
390 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
391 if(cur->regmap[hr]==r+64) {
392 cur->regmap[hr]=reg;
393 cur->dirty&=~(1<<hr);
394 cur->isconst&=~(1<<hr);
395 return;
396 }
397 }
398 }
399 for(hr=0;hr<HOST_REGS;hr++) {
400 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
401 if(cur->regmap[hr]==r) {
402 cur->regmap[hr]=reg;
403 cur->dirty&=~(1<<hr);
404 cur->isconst&=~(1<<hr);
405 return;
406 }
407 }
408 }
409 }
410 }
411 }
412 }
413 for(j=10;j>=0;j--)
414 {
415 for(r=1;r<=MAXREG;r++)
416 {
417 if(hsn[r]==j) {
418 for(hr=0;hr<HOST_REGS;hr++) {
419 if(cur->regmap[hr]==r+64) {
420 cur->regmap[hr]=reg;
421 cur->dirty&=~(1<<hr);
422 cur->isconst&=~(1<<hr);
423 return;
424 }
425 }
426 for(hr=0;hr<HOST_REGS;hr++) {
427 if(cur->regmap[hr]==r) {
428 cur->regmap[hr]=reg;
429 cur->dirty&=~(1<<hr);
430 cur->isconst&=~(1<<hr);
431 return;
432 }
433 }
434 }
435 }
436 }
437 printf("This shouldn't happen (alloc_reg)");exit(1);
438}
439
440void alloc_reg64(struct regstat *cur,int i,signed char reg)
441{
442 int preferred_reg = 8+(reg&1);
443 int r,hr;
444
445 // allocate the lower 32 bits
446 alloc_reg(cur,i,reg);
447
448 // Don't allocate unused registers
449 if((cur->uu>>reg)&1) return;
450
451 // see if the upper half is already allocated
452 for(hr=0;hr<HOST_REGS;hr++)
453 {
454 if(cur->regmap[hr]==reg+64) return;
455 }
456
457 // Keep the same mapping if the register was already allocated in a loop
458 preferred_reg = loop_reg(i,reg,preferred_reg);
459
460 // Try to allocate the preferred register
461 if(cur->regmap[preferred_reg]==-1) {
462 cur->regmap[preferred_reg]=reg|64;
463 cur->dirty&=~(1<<preferred_reg);
464 cur->isconst&=~(1<<preferred_reg);
465 return;
466 }
467 r=cur->regmap[preferred_reg];
468 if(r<64&&((cur->u>>r)&1)) {
469 cur->regmap[preferred_reg]=reg|64;
470 cur->dirty&=~(1<<preferred_reg);
471 cur->isconst&=~(1<<preferred_reg);
472 return;
473 }
474 if(r>=64&&((cur->uu>>(r&63))&1)) {
475 cur->regmap[preferred_reg]=reg|64;
476 cur->dirty&=~(1<<preferred_reg);
477 cur->isconst&=~(1<<preferred_reg);
478 return;
479 }
480
481 // Clear any unneeded registers
482 // We try to keep the mapping consistent, if possible, because it
483 // makes branches easier (especially loops). So we try to allocate
484 // first (see above) before removing old mappings. If this is not
485 // possible then go ahead and clear out the registers that are no
486 // longer needed.
487 for(hr=HOST_REGS-1;hr>=0;hr--)
488 {
489 r=cur->regmap[hr];
490 if(r>=0) {
491 if(r<64) {
492 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
493 }
494 else
495 {
496 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
497 }
498 }
499 }
500 // Try to allocate any available register, but prefer
501 // registers that have not been used recently.
502 if(i>0) {
503 for(hr=0;hr<HOST_REGS;hr++) {
504 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
505 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
506 cur->regmap[hr]=reg|64;
507 cur->dirty&=~(1<<hr);
508 cur->isconst&=~(1<<hr);
509 return;
510 }
511 }
512 }
513 }
514 // Try to allocate any available register
515 for(hr=0;hr<HOST_REGS;hr++) {
516 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
517 cur->regmap[hr]=reg|64;
518 cur->dirty&=~(1<<hr);
519 cur->isconst&=~(1<<hr);
520 return;
521 }
522 }
523
524 // Ok, now we have to evict someone
525 // Pick a register we hopefully won't need soon
526 u_char hsn[MAXREG+1];
527 memset(hsn,10,sizeof(hsn));
528 int j;
529 lsn(hsn,i,&preferred_reg);
530 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
531 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
532 if(i>0) {
533 // Don't evict the cycle count at entry points, otherwise the entry
534 // stub will have to write it.
535 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
536 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
537 for(j=10;j>=3;j--)
538 {
539 // Alloc preferred register if available
540 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
541 for(hr=0;hr<HOST_REGS;hr++) {
542 // Evict both parts of a 64-bit register
543 if((cur->regmap[hr]&63)==r) {
544 cur->regmap[hr]=-1;
545 cur->dirty&=~(1<<hr);
546 cur->isconst&=~(1<<hr);
547 }
548 }
549 cur->regmap[preferred_reg]=reg|64;
550 return;
551 }
552 for(r=1;r<=MAXREG;r++)
553 {
554 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
555 for(hr=0;hr<HOST_REGS;hr++) {
556 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
557 if(cur->regmap[hr]==r+64) {
558 cur->regmap[hr]=reg|64;
559 cur->dirty&=~(1<<hr);
560 cur->isconst&=~(1<<hr);
561 return;
562 }
563 }
564 }
565 for(hr=0;hr<HOST_REGS;hr++) {
566 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
567 if(cur->regmap[hr]==r) {
568 cur->regmap[hr]=reg|64;
569 cur->dirty&=~(1<<hr);
570 cur->isconst&=~(1<<hr);
571 return;
572 }
573 }
574 }
575 }
576 }
577 }
578 }
579 for(j=10;j>=0;j--)
580 {
581 for(r=1;r<=MAXREG;r++)
582 {
583 if(hsn[r]==j) {
584 for(hr=0;hr<HOST_REGS;hr++) {
585 if(cur->regmap[hr]==r+64) {
586 cur->regmap[hr]=reg|64;
587 cur->dirty&=~(1<<hr);
588 cur->isconst&=~(1<<hr);
589 return;
590 }
591 }
592 for(hr=0;hr<HOST_REGS;hr++) {
593 if(cur->regmap[hr]==r) {
594 cur->regmap[hr]=reg|64;
595 cur->dirty&=~(1<<hr);
596 cur->isconst&=~(1<<hr);
597 return;
598 }
599 }
600 }
601 }
602 }
603 printf("This shouldn't happen");exit(1);
604}
605
606// Allocate a temporary register. This is done without regard to
607// dirty status or whether the register we request is on the unneeded list
608// Note: This will only allocate one register, even if called multiple times
609void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
610{
611 int r,hr;
612 int preferred_reg = -1;
613
614 // see if it's already allocated
615 for(hr=0;hr<HOST_REGS;hr++)
616 {
617 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
618 }
619
620 // Try to allocate any available register
621 for(hr=HOST_REGS-1;hr>=0;hr--) {
622 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
623 cur->regmap[hr]=reg;
624 cur->dirty&=~(1<<hr);
625 cur->isconst&=~(1<<hr);
626 return;
627 }
628 }
629
630 // Find an unneeded register
631 for(hr=HOST_REGS-1;hr>=0;hr--)
632 {
633 r=cur->regmap[hr];
634 if(r>=0) {
635 if(r<64) {
636 if((cur->u>>r)&1) {
637 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
638 cur->regmap[hr]=reg;
639 cur->dirty&=~(1<<hr);
640 cur->isconst&=~(1<<hr);
641 return;
642 }
643 }
644 }
645 else
646 {
647 if((cur->uu>>(r&63))&1) {
648 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
649 cur->regmap[hr]=reg;
650 cur->dirty&=~(1<<hr);
651 cur->isconst&=~(1<<hr);
652 return;
653 }
654 }
655 }
656 }
657 }
658
659 // Ok, now we have to evict someone
660 // Pick a register we hopefully won't need soon
661 // TODO: we might want to follow unconditional jumps here
662 // TODO: get rid of dupe code and make this into a function
663 u_char hsn[MAXREG+1];
664 memset(hsn,10,sizeof(hsn));
665 int j;
666 lsn(hsn,i,&preferred_reg);
667 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
668 if(i>0) {
669 // Don't evict the cycle count at entry points, otherwise the entry
670 // stub will have to write it.
671 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
672 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
673 for(j=10;j>=3;j--)
674 {
675 for(r=1;r<=MAXREG;r++)
676 {
677 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
678 for(hr=0;hr<HOST_REGS;hr++) {
679 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
680 if(cur->regmap[hr]==r+64) {
681 cur->regmap[hr]=reg;
682 cur->dirty&=~(1<<hr);
683 cur->isconst&=~(1<<hr);
684 return;
685 }
686 }
687 }
688 for(hr=0;hr<HOST_REGS;hr++) {
689 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
690 if(cur->regmap[hr]==r) {
691 cur->regmap[hr]=reg;
692 cur->dirty&=~(1<<hr);
693 cur->isconst&=~(1<<hr);
694 return;
695 }
696 }
697 }
698 }
699 }
700 }
701 }
702 for(j=10;j>=0;j--)
703 {
704 for(r=1;r<=MAXREG;r++)
705 {
706 if(hsn[r]==j) {
707 for(hr=0;hr<HOST_REGS;hr++) {
708 if(cur->regmap[hr]==r+64) {
709 cur->regmap[hr]=reg;
710 cur->dirty&=~(1<<hr);
711 cur->isconst&=~(1<<hr);
712 return;
713 }
714 }
715 for(hr=0;hr<HOST_REGS;hr++) {
716 if(cur->regmap[hr]==r) {
717 cur->regmap[hr]=reg;
718 cur->dirty&=~(1<<hr);
719 cur->isconst&=~(1<<hr);
720 return;
721 }
722 }
723 }
724 }
725 }
726 printf("This shouldn't happen");exit(1);
727}
728// Allocate a specific ARM register.
729void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
730{
731 int n;
732
733 // see if it's already allocated (and dealloc it)
734 for(n=0;n<HOST_REGS;n++)
735 {
736 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {cur->regmap[n]=-1;}
737 }
738
739 cur->regmap[hr]=reg;
740 cur->dirty&=~(1<<hr);
741 cur->isconst&=~(1<<hr);
742}
743
744// Alloc cycle count into dedicated register
745alloc_cc(struct regstat *cur,int i)
746{
747 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
748}
749
750/* Special alloc */
751
752
753/* Assembler */
754
755char regname[16][4] = {
756 "r0",
757 "r1",
758 "r2",
759 "r3",
760 "r4",
761 "r5",
762 "r6",
763 "r7",
764 "r8",
765 "r9",
766 "r10",
767 "fp",
768 "r12",
769 "sp",
770 "lr",
771 "pc"};
772
773void output_byte(u_char byte)
774{
775 *(out++)=byte;
776}
777void output_modrm(u_char mod,u_char rm,u_char ext)
778{
779 assert(mod<4);
780 assert(rm<8);
781 assert(ext<8);
782 u_char byte=(mod<<6)|(ext<<3)|rm;
783 *(out++)=byte;
784}
785void output_sib(u_char scale,u_char index,u_char base)
786{
787 assert(scale<4);
788 assert(index<8);
789 assert(base<8);
790 u_char byte=(scale<<6)|(index<<3)|base;
791 *(out++)=byte;
792}
793void output_w32(u_int word)
794{
795 *((u_int *)out)=word;
796 out+=4;
797}
798u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
799{
800 assert(rd<16);
801 assert(rn<16);
802 assert(rm<16);
803 return((rn<<16)|(rd<<12)|rm);
804}
805u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
806{
807 assert(rd<16);
808 assert(rn<16);
809 assert(imm<256);
810 assert((shift&1)==0);
811 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
812}
813u_int genimm(u_int imm,u_int *encoded)
814{
815 if(imm==0) {*encoded=0;return 1;}
816 int i=32;
817 while(i>0)
818 {
819 if(imm<256) {
820 *encoded=((i&30)<<7)|imm;
821 return 1;
822 }
823 imm=(imm>>2)|(imm<<30);i-=2;
824 }
825 return 0;
826}
cfbd3c6e 827void genimm_checked(u_int imm,u_int *encoded)
828{
829 u_int ret=genimm(imm,encoded);
830 assert(ret);
831}
57871462 832u_int genjmp(u_int addr)
833{
834 int offset=addr-(int)out-8;
e80343e2 835 if(offset<-33554432||offset>=33554432) {
836 if (addr>2) {
837 printf("genjmp: out of range: %08x\n", offset);
838 exit(1);
839 }
840 return 0;
841 }
57871462 842 return ((u_int)offset>>2)&0xffffff;
843}
844
845void emit_mov(int rs,int rt)
846{
847 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
848 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
849}
850
851void emit_movs(int rs,int rt)
852{
853 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
854 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
855}
856
857void emit_add(int rs1,int rs2,int rt)
858{
859 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
860 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
861}
862
863void emit_adds(int rs1,int rs2,int rt)
864{
865 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
866 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
867}
868
869void emit_adcs(int rs1,int rs2,int rt)
870{
871 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
872 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
873}
874
875void emit_sbc(int rs1,int rs2,int rt)
876{
877 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
878 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
879}
880
881void emit_sbcs(int rs1,int rs2,int rt)
882{
883 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
884 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
885}
886
887void emit_neg(int rs, int rt)
888{
889 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
890 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
891}
892
893void emit_negs(int rs, int rt)
894{
895 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
896 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
897}
898
899void emit_sub(int rs1,int rs2,int rt)
900{
901 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
902 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
903}
904
905void emit_subs(int rs1,int rs2,int rt)
906{
907 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
908 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
909}
910
911void emit_zeroreg(int rt)
912{
913 assem_debug("mov %s,#0\n",regname[rt]);
914 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
915}
916
790ee18e 917void emit_loadlp(u_int imm,u_int rt)
918{
919 add_literal((int)out,imm);
920 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
921 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
922}
923void emit_movw(u_int imm,u_int rt)
924{
925 assert(imm<65536);
926 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
927 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
928}
929void emit_movt(u_int imm,u_int rt)
930{
931 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
932 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
933}
934void emit_movimm(u_int imm,u_int rt)
935{
936 u_int armval;
937 if(genimm(imm,&armval)) {
938 assem_debug("mov %s,#%d\n",regname[rt],imm);
939 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
940 }else if(genimm(~imm,&armval)) {
941 assem_debug("mvn %s,#%d\n",regname[rt],imm);
942 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
943 }else if(imm<65536) {
944 #ifdef ARMv5_ONLY
945 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
946 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
947 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
948 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
949 #else
950 emit_movw(imm,rt);
951 #endif
952 }else{
953 #ifdef ARMv5_ONLY
954 emit_loadlp(imm,rt);
955 #else
956 emit_movw(imm&0x0000FFFF,rt);
957 emit_movt(imm&0xFFFF0000,rt);
958 #endif
959 }
960}
961void emit_pcreladdr(u_int rt)
962{
963 assem_debug("add %s,pc,#?\n",regname[rt]);
964 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
965}
966
57871462 967void emit_loadreg(int r, int hr)
968{
3d624f89 969#ifdef FORCE32
970 if(r&64) {
971 printf("64bit load in 32bit mode!\n");
972 exit(1);
973 }
974#endif
57871462 975 if((r&63)==0)
976 emit_zeroreg(hr);
977 else {
3d624f89 978 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 979 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
980 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
981 if(r==CCREG) addr=(int)&cycle_count;
982 if(r==CSREG) addr=(int)&Status;
983 if(r==FSREG) addr=(int)&FCR31;
984 if(r==INVCP) addr=(int)&invc_ptr;
985 u_int offset = addr-(u_int)&dynarec_local;
986 assert(offset<4096);
987 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
988 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
989 }
990}
991void emit_storereg(int r, int hr)
992{
3d624f89 993#ifdef FORCE32
994 if(r&64) {
995 printf("64bit store in 32bit mode!\n");
996 exit(1);
997 }
998#endif
999 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1000 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1001 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1002 if(r==CCREG) addr=(int)&cycle_count;
1003 if(r==FSREG) addr=(int)&FCR31;
1004 u_int offset = addr-(u_int)&dynarec_local;
1005 assert(offset<4096);
1006 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1007 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1008}
1009
1010void emit_test(int rs, int rt)
1011{
1012 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1013 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1014}
1015
1016void emit_testimm(int rs,int imm)
1017{
1018 u_int armval;
1019 assem_debug("tst %s,$%d\n",regname[rs],imm);
cfbd3c6e 1020 genimm_checked(imm,&armval);
57871462 1021 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1022}
1023
b9b61529 1024void emit_testeqimm(int rs,int imm)
1025{
1026 u_int armval;
1027 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1028 genimm_checked(imm,&armval);
b9b61529 1029 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1030}
1031
57871462 1032void emit_not(int rs,int rt)
1033{
1034 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1035 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1036}
1037
b9b61529 1038void emit_mvnmi(int rs,int rt)
1039{
1040 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1041 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1042}
1043
57871462 1044void emit_and(u_int rs1,u_int rs2,u_int rt)
1045{
1046 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1047 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1048}
1049
1050void emit_or(u_int rs1,u_int rs2,u_int rt)
1051{
1052 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1053 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1054}
1055void emit_or_and_set_flags(int rs1,int rs2,int rt)
1056{
1057 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1058 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1059}
1060
f70d384d 1061void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1062{
1063 assert(rs<16);
1064 assert(rt<16);
1065 assert(imm<32);
1066 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1067 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1068}
1069
576bbd8f 1070void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1071{
1072 assert(rs<16);
1073 assert(rt<16);
1074 assert(imm<32);
1075 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1076 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1077}
1078
57871462 1079void emit_xor(u_int rs1,u_int rs2,u_int rt)
1080{
1081 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1082 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1083}
1084
57871462 1085void emit_addimm(u_int rs,int imm,u_int rt)
1086{
1087 assert(rs<16);
1088 assert(rt<16);
1089 if(imm!=0) {
1090 assert(imm>-65536&&imm<65536);
1091 u_int armval;
1092 if(genimm(imm,&armval)) {
1093 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1094 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1095 }else if(genimm(-imm,&armval)) {
1096 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1097 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1098 }else if(imm<0) {
1099 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1100 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1101 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1102 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1103 }else{
1104 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1105 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1106 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1107 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1108 }
1109 }
1110 else if(rs!=rt) emit_mov(rs,rt);
1111}
1112
1113void emit_addimm_and_set_flags(int imm,int rt)
1114{
1115 assert(imm>-65536&&imm<65536);
1116 u_int armval;
1117 if(genimm(imm,&armval)) {
1118 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1119 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1120 }else if(genimm(-imm,&armval)) {
1121 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1122 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1123 }else if(imm<0) {
1124 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1125 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1126 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1127 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1128 }else{
1129 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1130 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1131 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1132 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1133 }
1134}
1135void emit_addimm_no_flags(u_int imm,u_int rt)
1136{
1137 emit_addimm(rt,imm,rt);
1138}
1139
1140void emit_addnop(u_int r)
1141{
1142 assert(r<16);
1143 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1144 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1145}
1146
1147void emit_adcimm(u_int rs,int imm,u_int rt)
1148{
1149 u_int armval;
cfbd3c6e 1150 genimm_checked(imm,&armval);
57871462 1151 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1152 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1153}
1154/*void emit_sbcimm(int imm,u_int rt)
1155{
1156 u_int armval;
cfbd3c6e 1157 genimm_checked(imm,&armval);
57871462 1158 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1159 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1160}*/
1161void emit_sbbimm(int imm,u_int rt)
1162{
1163 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1164 assert(rt<8);
1165 if(imm<128&&imm>=-128) {
1166 output_byte(0x83);
1167 output_modrm(3,rt,3);
1168 output_byte(imm);
1169 }
1170 else
1171 {
1172 output_byte(0x81);
1173 output_modrm(3,rt,3);
1174 output_w32(imm);
1175 }
1176}
1177void emit_rscimm(int rs,int imm,u_int rt)
1178{
1179 assert(0);
1180 u_int armval;
cfbd3c6e 1181 genimm_checked(imm,&armval);
57871462 1182 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1183 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1184}
1185
1186void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1187{
1188 // TODO: if(genimm(imm,&armval)) ...
1189 // else
1190 emit_movimm(imm,HOST_TEMPREG);
1191 emit_adds(HOST_TEMPREG,rsl,rtl);
1192 emit_adcimm(rsh,0,rth);
1193}
1194
1195void emit_sbb(int rs1,int rs2)
1196{
1197 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1198 output_byte(0x19);
1199 output_modrm(3,rs1,rs2);
1200}
1201
1202void emit_andimm(int rs,int imm,int rt)
1203{
1204 u_int armval;
790ee18e 1205 if(imm==0) {
1206 emit_zeroreg(rt);
1207 }else if(genimm(imm,&armval)) {
57871462 1208 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1209 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1210 }else if(genimm(~imm,&armval)) {
1211 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1212 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1213 }else if(imm==65535) {
1214 #ifdef ARMv5_ONLY
1215 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1216 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1217 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1218 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1219 #else
1220 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1221 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1222 #endif
1223 }else{
1224 assert(imm>0&&imm<65535);
1225 #ifdef ARMv5_ONLY
1226 assem_debug("mov r14,#%d\n",imm&0xFF00);
1227 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1228 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1229 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1230 #else
1231 emit_movw(imm,HOST_TEMPREG);
1232 #endif
1233 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1234 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1235 }
1236}
1237
1238void emit_orimm(int rs,int imm,int rt)
1239{
1240 u_int armval;
790ee18e 1241 if(imm==0) {
1242 if(rs!=rt) emit_mov(rs,rt);
1243 }else if(genimm(imm,&armval)) {
57871462 1244 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1245 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1246 }else{
1247 assert(imm>0&&imm<65536);
1248 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1249 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1250 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1251 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1252 }
1253}
1254
1255void emit_xorimm(int rs,int imm,int rt)
1256{
57871462 1257 u_int armval;
790ee18e 1258 if(imm==0) {
1259 if(rs!=rt) emit_mov(rs,rt);
1260 }else if(genimm(imm,&armval)) {
57871462 1261 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1262 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1263 }else{
514ed0d9 1264 assert(imm>0&&imm<65536);
57871462 1265 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1266 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1267 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1268 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1269 }
1270}
1271
1272void emit_shlimm(int rs,u_int imm,int rt)
1273{
1274 assert(imm>0);
1275 assert(imm<32);
1276 //if(imm==1) ...
1277 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1278 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1279}
1280
1281void emit_shrimm(int rs,u_int imm,int rt)
1282{
1283 assert(imm>0);
1284 assert(imm<32);
1285 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1286 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1287}
1288
1289void emit_sarimm(int rs,u_int imm,int rt)
1290{
1291 assert(imm>0);
1292 assert(imm<32);
1293 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1294 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1295}
1296
1297void emit_rorimm(int rs,u_int imm,int rt)
1298{
1299 assert(imm>0);
1300 assert(imm<32);
1301 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1302 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1303}
1304
1305void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1306{
1307 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1308 assert(imm>0);
1309 assert(imm<32);
1310 //if(imm==1) ...
1311 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1312 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1313 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1314 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1315}
1316
1317void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1318{
1319 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1320 assert(imm>0);
1321 assert(imm<32);
1322 //if(imm==1) ...
1323 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1324 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1325 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1326 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1327}
1328
b9b61529 1329void emit_signextend16(int rs,int rt)
1330{
1331 #ifdef ARMv5_ONLY
1332 emit_shlimm(rs,16,rt);
1333 emit_sarimm(rt,16,rt);
1334 #else
1335 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1336 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1337 #endif
1338}
1339
57871462 1340void emit_shl(u_int rs,u_int shift,u_int rt)
1341{
1342 assert(rs<16);
1343 assert(rt<16);
1344 assert(shift<16);
1345 //if(imm==1) ...
1346 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1347 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1348}
1349void emit_shr(u_int rs,u_int shift,u_int rt)
1350{
1351 assert(rs<16);
1352 assert(rt<16);
1353 assert(shift<16);
1354 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1355 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1356}
1357void emit_sar(u_int rs,u_int shift,u_int rt)
1358{
1359 assert(rs<16);
1360 assert(rt<16);
1361 assert(shift<16);
1362 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1363 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1364}
1365void emit_shlcl(int r)
1366{
1367 assem_debug("shl %%%s,%%cl\n",regname[r]);
1368 assert(0);
1369}
1370void emit_shrcl(int r)
1371{
1372 assem_debug("shr %%%s,%%cl\n",regname[r]);
1373 assert(0);
1374}
1375void emit_sarcl(int r)
1376{
1377 assem_debug("sar %%%s,%%cl\n",regname[r]);
1378 assert(0);
1379}
1380
1381void emit_shldcl(int r1,int r2)
1382{
1383 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1384 assert(0);
1385}
1386void emit_shrdcl(int r1,int r2)
1387{
1388 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1389 assert(0);
1390}
1391void emit_orrshl(u_int rs,u_int shift,u_int rt)
1392{
1393 assert(rs<16);
1394 assert(rt<16);
1395 assert(shift<16);
1396 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1397 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1398}
1399void emit_orrshr(u_int rs,u_int shift,u_int rt)
1400{
1401 assert(rs<16);
1402 assert(rt<16);
1403 assert(shift<16);
1404 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1405 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1406}
1407
1408void emit_cmpimm(int rs,int imm)
1409{
1410 u_int armval;
1411 if(genimm(imm,&armval)) {
1412 assem_debug("cmp %s,$%d\n",regname[rs],imm);
1413 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1414 }else if(genimm(-imm,&armval)) {
1415 assem_debug("cmn %s,$%d\n",regname[rs],imm);
1416 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1417 }else if(imm>0) {
1418 assert(imm<65536);
1419 #ifdef ARMv5_ONLY
1420 emit_movimm(imm,HOST_TEMPREG);
1421 #else
1422 emit_movw(imm,HOST_TEMPREG);
1423 #endif
1424 assem_debug("cmp %s,r14\n",regname[rs]);
1425 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1426 }else{
1427 assert(imm>-65536);
1428 #ifdef ARMv5_ONLY
1429 emit_movimm(-imm,HOST_TEMPREG);
1430 #else
1431 emit_movw(-imm,HOST_TEMPREG);
1432 #endif
1433 assem_debug("cmn %s,r14\n",regname[rs]);
1434 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1435 }
1436}
1437
1438void emit_cmovne(u_int *addr,int rt)
1439{
1440 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1441 assert(0);
1442}
1443void emit_cmovl(u_int *addr,int rt)
1444{
1445 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1446 assert(0);
1447}
1448void emit_cmovs(u_int *addr,int rt)
1449{
1450 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1451 assert(0);
1452}
1453void emit_cmovne_imm(int imm,int rt)
1454{
1455 assem_debug("movne %s,#%d\n",regname[rt],imm);
1456 u_int armval;
cfbd3c6e 1457 genimm_checked(imm,&armval);
57871462 1458 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1459}
1460void emit_cmovl_imm(int imm,int rt)
1461{
1462 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1463 u_int armval;
cfbd3c6e 1464 genimm_checked(imm,&armval);
57871462 1465 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1466}
1467void emit_cmovb_imm(int imm,int rt)
1468{
1469 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1470 u_int armval;
cfbd3c6e 1471 genimm_checked(imm,&armval);
57871462 1472 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1473}
1474void emit_cmovs_imm(int imm,int rt)
1475{
1476 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1477 u_int armval;
cfbd3c6e 1478 genimm_checked(imm,&armval);
57871462 1479 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1480}
1481void emit_cmove_reg(int rs,int rt)
1482{
1483 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1484 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1485}
1486void emit_cmovne_reg(int rs,int rt)
1487{
1488 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1489 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1490}
1491void emit_cmovl_reg(int rs,int rt)
1492{
1493 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1494 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1495}
1496void emit_cmovs_reg(int rs,int rt)
1497{
1498 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1499 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1500}
1501
1502void emit_slti32(int rs,int imm,int rt)
1503{
1504 if(rs!=rt) emit_zeroreg(rt);
1505 emit_cmpimm(rs,imm);
1506 if(rs==rt) emit_movimm(0,rt);
1507 emit_cmovl_imm(1,rt);
1508}
1509void emit_sltiu32(int rs,int imm,int rt)
1510{
1511 if(rs!=rt) emit_zeroreg(rt);
1512 emit_cmpimm(rs,imm);
1513 if(rs==rt) emit_movimm(0,rt);
1514 emit_cmovb_imm(1,rt);
1515}
1516void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1517{
1518 assert(rsh!=rt);
1519 emit_slti32(rsl,imm,rt);
1520 if(imm>=0)
1521 {
1522 emit_test(rsh,rsh);
1523 emit_cmovne_imm(0,rt);
1524 emit_cmovs_imm(1,rt);
1525 }
1526 else
1527 {
1528 emit_cmpimm(rsh,-1);
1529 emit_cmovne_imm(0,rt);
1530 emit_cmovl_imm(1,rt);
1531 }
1532}
1533void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1534{
1535 assert(rsh!=rt);
1536 emit_sltiu32(rsl,imm,rt);
1537 if(imm>=0)
1538 {
1539 emit_test(rsh,rsh);
1540 emit_cmovne_imm(0,rt);
1541 }
1542 else
1543 {
1544 emit_cmpimm(rsh,-1);
1545 emit_cmovne_imm(1,rt);
1546 }
1547}
1548
1549void emit_cmp(int rs,int rt)
1550{
1551 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1552 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1553}
1554void emit_set_gz32(int rs, int rt)
1555{
1556 //assem_debug("set_gz32\n");
1557 emit_cmpimm(rs,1);
1558 emit_movimm(1,rt);
1559 emit_cmovl_imm(0,rt);
1560}
1561void emit_set_nz32(int rs, int rt)
1562{
1563 //assem_debug("set_nz32\n");
1564 if(rs!=rt) emit_movs(rs,rt);
1565 else emit_test(rs,rs);
1566 emit_cmovne_imm(1,rt);
1567}
1568void emit_set_gz64_32(int rsh, int rsl, int rt)
1569{
1570 //assem_debug("set_gz64\n");
1571 emit_set_gz32(rsl,rt);
1572 emit_test(rsh,rsh);
1573 emit_cmovne_imm(1,rt);
1574 emit_cmovs_imm(0,rt);
1575}
1576void emit_set_nz64_32(int rsh, int rsl, int rt)
1577{
1578 //assem_debug("set_nz64\n");
1579 emit_or_and_set_flags(rsh,rsl,rt);
1580 emit_cmovne_imm(1,rt);
1581}
1582void emit_set_if_less32(int rs1, int rs2, int rt)
1583{
1584 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1585 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1586 emit_cmp(rs1,rs2);
1587 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1588 emit_cmovl_imm(1,rt);
1589}
1590void emit_set_if_carry32(int rs1, int rs2, int rt)
1591{
1592 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1593 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1594 emit_cmp(rs1,rs2);
1595 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1596 emit_cmovb_imm(1,rt);
1597}
1598void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1599{
1600 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1601 assert(u1!=rt);
1602 assert(u2!=rt);
1603 emit_cmp(l1,l2);
1604 emit_movimm(0,rt);
1605 emit_sbcs(u1,u2,HOST_TEMPREG);
1606 emit_cmovl_imm(1,rt);
1607}
1608void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1609{
1610 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1611 assert(u1!=rt);
1612 assert(u2!=rt);
1613 emit_cmp(l1,l2);
1614 emit_movimm(0,rt);
1615 emit_sbcs(u1,u2,HOST_TEMPREG);
1616 emit_cmovb_imm(1,rt);
1617}
1618
1619void emit_call(int a)
1620{
1621 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1622 u_int offset=genjmp(a);
1623 output_w32(0xeb000000|offset);
1624}
1625void emit_jmp(int a)
1626{
1627 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1628 u_int offset=genjmp(a);
1629 output_w32(0xea000000|offset);
1630}
1631void emit_jne(int a)
1632{
1633 assem_debug("bne %x\n",a);
1634 u_int offset=genjmp(a);
1635 output_w32(0x1a000000|offset);
1636}
1637void emit_jeq(int a)
1638{
1639 assem_debug("beq %x\n",a);
1640 u_int offset=genjmp(a);
1641 output_w32(0x0a000000|offset);
1642}
1643void emit_js(int a)
1644{
1645 assem_debug("bmi %x\n",a);
1646 u_int offset=genjmp(a);
1647 output_w32(0x4a000000|offset);
1648}
1649void emit_jns(int a)
1650{
1651 assem_debug("bpl %x\n",a);
1652 u_int offset=genjmp(a);
1653 output_w32(0x5a000000|offset);
1654}
1655void emit_jl(int a)
1656{
1657 assem_debug("blt %x\n",a);
1658 u_int offset=genjmp(a);
1659 output_w32(0xba000000|offset);
1660}
1661void emit_jge(int a)
1662{
1663 assem_debug("bge %x\n",a);
1664 u_int offset=genjmp(a);
1665 output_w32(0xaa000000|offset);
1666}
1667void emit_jno(int a)
1668{
1669 assem_debug("bvc %x\n",a);
1670 u_int offset=genjmp(a);
1671 output_w32(0x7a000000|offset);
1672}
1673void emit_jc(int a)
1674{
1675 assem_debug("bcs %x\n",a);
1676 u_int offset=genjmp(a);
1677 output_w32(0x2a000000|offset);
1678}
1679void emit_jcc(int a)
1680{
1681 assem_debug("bcc %x\n",a);
1682 u_int offset=genjmp(a);
1683 output_w32(0x3a000000|offset);
1684}
1685
1686void emit_pushimm(int imm)
1687{
1688 assem_debug("push $%x\n",imm);
1689 assert(0);
1690}
1691void emit_pusha()
1692{
1693 assem_debug("pusha\n");
1694 assert(0);
1695}
1696void emit_popa()
1697{
1698 assem_debug("popa\n");
1699 assert(0);
1700}
1701void emit_pushreg(u_int r)
1702{
1703 assem_debug("push %%%s\n",regname[r]);
1704 assert(0);
1705}
1706void emit_popreg(u_int r)
1707{
1708 assem_debug("pop %%%s\n",regname[r]);
1709 assert(0);
1710}
1711void emit_callreg(u_int r)
1712{
1713 assem_debug("call *%%%s\n",regname[r]);
1714 assert(0);
1715}
1716void emit_jmpreg(u_int r)
1717{
1718 assem_debug("mov pc,%s\n",regname[r]);
1719 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1720}
1721
1722void emit_readword_indexed(int offset, int rs, int rt)
1723{
1724 assert(offset>-4096&&offset<4096);
1725 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1726 if(offset>=0) {
1727 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1728 }else{
1729 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1730 }
1731}
1732void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1733{
1734 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1735 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1736}
1737void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1738{
1739 if(map<0) emit_readword_indexed(addr, rs, rt);
1740 else {
1741 assert(addr==0);
1742 emit_readword_dualindexedx4(rs, map, rt);
1743 }
1744}
1745void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1746{
1747 if(map<0) {
1748 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1749 emit_readword_indexed(addr+4, rs, rl);
1750 }else{
1751 assert(rh!=rs);
1752 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1753 emit_addimm(map,1,map);
1754 emit_readword_indexed_tlb(addr, rs, map, rl);
1755 }
1756}
1757void emit_movsbl_indexed(int offset, int rs, int rt)
1758{
1759 assert(offset>-256&&offset<256);
1760 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1761 if(offset>=0) {
1762 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1763 }else{
1764 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1765 }
1766}
1767void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1768{
1769 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1770 else {
1771 if(addr==0) {
1772 emit_shlimm(map,2,map);
1773 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1774 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1775 }else{
1776 assert(addr>-256&&addr<256);
1777 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1778 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1779 emit_movsbl_indexed(addr, rt, rt);
1780 }
1781 }
1782}
1783void emit_movswl_indexed(int offset, int rs, int rt)
1784{
1785 assert(offset>-256&&offset<256);
1786 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1787 if(offset>=0) {
1788 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1789 }else{
1790 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1791 }
1792}
1793void emit_movzbl_indexed(int offset, int rs, int rt)
1794{
1795 assert(offset>-4096&&offset<4096);
1796 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1797 if(offset>=0) {
1798 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1799 }else{
1800 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1801 }
1802}
1803void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1804{
1805 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1806 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1807}
1808void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1809{
1810 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1811 else {
1812 if(addr==0) {
1813 emit_movzbl_dualindexedx4(rs, map, rt);
1814 }else{
1815 emit_addimm(rs,addr,rt);
1816 emit_movzbl_dualindexedx4(rt, map, rt);
1817 }
1818 }
1819}
1820void emit_movzwl_indexed(int offset, int rs, int rt)
1821{
1822 assert(offset>-256&&offset<256);
1823 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1824 if(offset>=0) {
1825 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1826 }else{
1827 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1828 }
1829}
1830void emit_readword(int addr, int rt)
1831{
1832 u_int offset = addr-(u_int)&dynarec_local;
1833 assert(offset<4096);
1834 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1835 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1836}
1837void emit_movsbl(int addr, int rt)
1838{
1839 u_int offset = addr-(u_int)&dynarec_local;
1840 assert(offset<256);
1841 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1842 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1843}
1844void emit_movswl(int addr, int rt)
1845{
1846 u_int offset = addr-(u_int)&dynarec_local;
1847 assert(offset<256);
1848 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1849 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1850}
1851void emit_movzbl(int addr, int rt)
1852{
1853 u_int offset = addr-(u_int)&dynarec_local;
1854 assert(offset<4096);
1855 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1856 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1857}
1858void emit_movzwl(int addr, int rt)
1859{
1860 u_int offset = addr-(u_int)&dynarec_local;
1861 assert(offset<256);
1862 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1863 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1864}
1865void emit_movzwl_reg(int rs, int rt)
1866{
1867 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1868 assert(0);
1869}
1870
1871void emit_xchg(int rs, int rt)
1872{
1873 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1874 assert(0);
1875}
1876void emit_writeword_indexed(int rt, int offset, int rs)
1877{
1878 assert(offset>-4096&&offset<4096);
1879 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1880 if(offset>=0) {
1881 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1882 }else{
1883 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1884 }
1885}
1886void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1887{
1888 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1889 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1890}
1891void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1892{
1893 if(map<0) emit_writeword_indexed(rt, addr, rs);
1894 else {
1895 assert(addr==0);
1896 emit_writeword_dualindexedx4(rt, rs, map);
1897 }
1898}
1899void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1900{
1901 if(map<0) {
1902 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1903 emit_writeword_indexed(rl, addr+4, rs);
1904 }else{
1905 assert(rh>=0);
1906 if(temp!=rs) emit_addimm(map,1,temp);
1907 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1908 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1909 else {
1910 emit_addimm(rs,4,rs);
1911 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1912 }
1913 }
1914}
1915void emit_writehword_indexed(int rt, int offset, int rs)
1916{
1917 assert(offset>-256&&offset<256);
1918 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1919 if(offset>=0) {
1920 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1921 }else{
1922 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1923 }
1924}
1925void emit_writebyte_indexed(int rt, int offset, int rs)
1926{
1927 assert(offset>-4096&&offset<4096);
1928 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1929 if(offset>=0) {
1930 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1931 }else{
1932 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1933 }
1934}
1935void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1936{
1937 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1938 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1939}
1940void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1941{
1942 if(map<0) emit_writebyte_indexed(rt, addr, rs);
1943 else {
1944 if(addr==0) {
1945 emit_writebyte_dualindexedx4(rt, rs, map);
1946 }else{
1947 emit_addimm(rs,addr,temp);
1948 emit_writebyte_dualindexedx4(rt, temp, map);
1949 }
1950 }
1951}
1952void emit_writeword(int rt, int addr)
1953{
1954 u_int offset = addr-(u_int)&dynarec_local;
1955 assert(offset<4096);
1956 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1957 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1958}
1959void emit_writehword(int rt, int addr)
1960{
1961 u_int offset = addr-(u_int)&dynarec_local;
1962 assert(offset<256);
1963 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
1964 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1965}
1966void emit_writebyte(int rt, int addr)
1967{
1968 u_int offset = addr-(u_int)&dynarec_local;
1969 assert(offset<4096);
74426039 1970 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 1971 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
1972}
1973void emit_writeword_imm(int imm, int addr)
1974{
1975 assem_debug("movl $%x,%x\n",imm,addr);
1976 assert(0);
1977}
1978void emit_writebyte_imm(int imm, int addr)
1979{
1980 assem_debug("movb $%x,%x\n",imm,addr);
1981 assert(0);
1982}
1983
1984void emit_mul(int rs)
1985{
1986 assem_debug("mul %%%s\n",regname[rs]);
1987 assert(0);
1988}
1989void emit_imul(int rs)
1990{
1991 assem_debug("imul %%%s\n",regname[rs]);
1992 assert(0);
1993}
1994void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1995{
1996 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1997 assert(rs1<16);
1998 assert(rs2<16);
1999 assert(hi<16);
2000 assert(lo<16);
2001 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2002}
2003void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2004{
2005 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2006 assert(rs1<16);
2007 assert(rs2<16);
2008 assert(hi<16);
2009 assert(lo<16);
2010 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2011}
2012
2013void emit_div(int rs)
2014{
2015 assem_debug("div %%%s\n",regname[rs]);
2016 assert(0);
2017}
2018void emit_idiv(int rs)
2019{
2020 assem_debug("idiv %%%s\n",regname[rs]);
2021 assert(0);
2022}
2023void emit_cdq()
2024{
2025 assem_debug("cdq\n");
2026 assert(0);
2027}
2028
2029void emit_clz(int rs,int rt)
2030{
2031 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2032 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2033}
2034
2035void emit_subcs(int rs1,int rs2,int rt)
2036{
2037 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2038 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2039}
2040
2041void emit_shrcc_imm(int rs,u_int imm,int rt)
2042{
2043 assert(imm>0);
2044 assert(imm<32);
2045 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2046 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2047}
2048
2049void emit_negmi(int rs, int rt)
2050{
2051 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2052 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2053}
2054
2055void emit_negsmi(int rs, int rt)
2056{
2057 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2058 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2059}
2060
2061void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2062{
2063 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2064 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2065}
2066
2067void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2068{
2069 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2070 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2071}
2072
2073void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2074{
2075 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2076 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2077}
2078
2079void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2080{
2081 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2082 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2083}
2084
2085void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2086{
2087 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2088 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2089}
2090
2091void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2092{
2093 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2094 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2095}
2096
2097void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2098{
2099 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2100 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2101}
2102
2103void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2104{
2105 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2106 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2107}
2108
2109void emit_teq(int rs, int rt)
2110{
2111 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2112 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2113}
2114
2115void emit_rsbimm(int rs, int imm, int rt)
2116{
2117 u_int armval;
cfbd3c6e 2118 genimm_checked(imm,&armval);
57871462 2119 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2120 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2121}
2122
2123// Load 2 immediates optimizing for small code size
2124void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2125{
2126 emit_movimm(imm1,rt1);
2127 u_int armval;
2128 if(genimm(imm2-imm1,&armval)) {
2129 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2130 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2131 }else if(genimm(imm1-imm2,&armval)) {
2132 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2133 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2134 }
2135 else emit_movimm(imm2,rt2);
2136}
2137
2138// Conditionally select one of two immediates, optimizing for small code size
2139// This will only be called if HAVE_CMOV_IMM is defined
2140void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2141{
2142 u_int armval;
2143 if(genimm(imm2-imm1,&armval)) {
2144 emit_movimm(imm1,rt);
2145 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2146 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2147 }else if(genimm(imm1-imm2,&armval)) {
2148 emit_movimm(imm1,rt);
2149 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2150 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2151 }
2152 else {
2153 #ifdef ARMv5_ONLY
2154 emit_movimm(imm1,rt);
2155 add_literal((int)out,imm2);
2156 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2157 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2158 #else
2159 emit_movw(imm1&0x0000FFFF,rt);
2160 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2161 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2162 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2163 }
2164 emit_movt(imm1&0xFFFF0000,rt);
2165 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2166 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2167 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2168 }
2169 #endif
2170 }
2171}
2172
2173// special case for checking invalid_code
2174void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2175{
2176 assert(0);
2177}
2178
2179// special case for checking invalid_code
2180void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2181{
2182 assert(imm<128&&imm>=0);
2183 assert(r>=0&&r<16);
2184 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2185 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2186 emit_cmpimm(HOST_TEMPREG,imm);
2187}
2188
2189// special case for tlb mapping
2190void emit_addsr12(int rs1,int rs2,int rt)
2191{
2192 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2193 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2194}
2195
2196// Used to preload hash table entries
2197void emit_prefetch(void *addr)
2198{
2199 assem_debug("prefetch %x\n",(int)addr);
2200 output_byte(0x0F);
2201 output_byte(0x18);
2202 output_modrm(0,5,1);
2203 output_w32((int)addr);
2204}
2205void emit_prefetchreg(int r)
2206{
2207 assem_debug("pld %s\n",regname[r]);
2208 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2209}
2210
2211// Special case for mini_ht
2212void emit_ldreq_indexed(int rs, u_int offset, int rt)
2213{
2214 assert(offset<4096);
2215 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2216 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2217}
2218
2219void emit_flds(int r,int sr)
2220{
2221 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2222 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2223}
2224
2225void emit_vldr(int r,int vr)
2226{
2227 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2228 output_w32(0xed900b00|(vr<<12)|(r<<16));
2229}
2230
2231void emit_fsts(int sr,int r)
2232{
2233 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2234 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2235}
2236
2237void emit_vstr(int vr,int r)
2238{
2239 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2240 output_w32(0xed800b00|(vr<<12)|(r<<16));
2241}
2242
2243void emit_ftosizs(int s,int d)
2244{
2245 assem_debug("ftosizs s%d,s%d\n",d,s);
2246 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2247}
2248
2249void emit_ftosizd(int s,int d)
2250{
2251 assem_debug("ftosizd s%d,d%d\n",d,s);
2252 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2253}
2254
2255void emit_fsitos(int s,int d)
2256{
2257 assem_debug("fsitos s%d,s%d\n",d,s);
2258 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2259}
2260
2261void emit_fsitod(int s,int d)
2262{
2263 assem_debug("fsitod d%d,s%d\n",d,s);
2264 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2265}
2266
2267void emit_fcvtds(int s,int d)
2268{
2269 assem_debug("fcvtds d%d,s%d\n",d,s);
2270 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2271}
2272
2273void emit_fcvtsd(int s,int d)
2274{
2275 assem_debug("fcvtsd s%d,d%d\n",d,s);
2276 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2277}
2278
2279void emit_fsqrts(int s,int d)
2280{
2281 assem_debug("fsqrts d%d,s%d\n",d,s);
2282 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2283}
2284
2285void emit_fsqrtd(int s,int d)
2286{
2287 assem_debug("fsqrtd s%d,d%d\n",d,s);
2288 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2289}
2290
2291void emit_fabss(int s,int d)
2292{
2293 assem_debug("fabss d%d,s%d\n",d,s);
2294 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2295}
2296
2297void emit_fabsd(int s,int d)
2298{
2299 assem_debug("fabsd s%d,d%d\n",d,s);
2300 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2301}
2302
2303void emit_fnegs(int s,int d)
2304{
2305 assem_debug("fnegs d%d,s%d\n",d,s);
2306 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2307}
2308
2309void emit_fnegd(int s,int d)
2310{
2311 assem_debug("fnegd s%d,d%d\n",d,s);
2312 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2313}
2314
2315void emit_fadds(int s1,int s2,int d)
2316{
2317 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2318 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2319}
2320
2321void emit_faddd(int s1,int s2,int d)
2322{
2323 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2324 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2325}
2326
2327void emit_fsubs(int s1,int s2,int d)
2328{
2329 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2330 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2331}
2332
2333void emit_fsubd(int s1,int s2,int d)
2334{
2335 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2336 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2337}
2338
2339void emit_fmuls(int s1,int s2,int d)
2340{
2341 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2342 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2343}
2344
2345void emit_fmuld(int s1,int s2,int d)
2346{
2347 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2348 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2349}
2350
2351void emit_fdivs(int s1,int s2,int d)
2352{
2353 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2354 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2355}
2356
2357void emit_fdivd(int s1,int s2,int d)
2358{
2359 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2360 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2361}
2362
2363void emit_fcmps(int x,int y)
2364{
2365 assem_debug("fcmps s14, s15\n");
2366 output_w32(0xeeb47a67);
2367}
2368
2369void emit_fcmpd(int x,int y)
2370{
2371 assem_debug("fcmpd d6, d7\n");
2372 output_w32(0xeeb46b47);
2373}
2374
2375void emit_fmstat()
2376{
2377 assem_debug("fmstat\n");
2378 output_w32(0xeef1fa10);
2379}
2380
2381void emit_bicne_imm(int rs,int imm,int rt)
2382{
2383 u_int armval;
cfbd3c6e 2384 genimm_checked(imm,&armval);
57871462 2385 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2386 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2387}
2388
2389void emit_biccs_imm(int rs,int imm,int rt)
2390{
2391 u_int armval;
cfbd3c6e 2392 genimm_checked(imm,&armval);
57871462 2393 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2394 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2395}
2396
2397void emit_bicvc_imm(int rs,int imm,int rt)
2398{
2399 u_int armval;
cfbd3c6e 2400 genimm_checked(imm,&armval);
57871462 2401 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2402 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2403}
2404
2405void emit_bichi_imm(int rs,int imm,int rt)
2406{
2407 u_int armval;
cfbd3c6e 2408 genimm_checked(imm,&armval);
57871462 2409 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2410 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2411}
2412
2413void emit_orrvs_imm(int rs,int imm,int rt)
2414{
2415 u_int armval;
cfbd3c6e 2416 genimm_checked(imm,&armval);
57871462 2417 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2418 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2419}
2420
b9b61529 2421void emit_orrne_imm(int rs,int imm,int rt)
2422{
2423 u_int armval;
cfbd3c6e 2424 genimm_checked(imm,&armval);
b9b61529 2425 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2426 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2427}
2428
2429void emit_andne_imm(int rs,int imm,int rt)
2430{
2431 u_int armval;
cfbd3c6e 2432 genimm_checked(imm,&armval);
b9b61529 2433 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2434 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2435}
2436
57871462 2437void emit_jno_unlikely(int a)
2438{
2439 //emit_jno(a);
2440 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2441 output_w32(0x72800000|rd_rn_rm(15,15,0));
2442}
2443
2444// Save registers before function call
2445void save_regs(u_int reglist)
2446{
2447 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2448 if(!reglist) return;
2449 assem_debug("stmia fp,{");
2450 if(reglist&1) assem_debug("r0, ");
2451 if(reglist&2) assem_debug("r1, ");
2452 if(reglist&4) assem_debug("r2, ");
2453 if(reglist&8) assem_debug("r3, ");
2454 if(reglist&0x1000) assem_debug("r12");
2455 assem_debug("}\n");
2456 output_w32(0xe88b0000|reglist);
2457}
2458// Restore registers after function call
2459void restore_regs(u_int reglist)
2460{
2461 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2462 if(!reglist) return;
2463 assem_debug("ldmia fp,{");
2464 if(reglist&1) assem_debug("r0, ");
2465 if(reglist&2) assem_debug("r1, ");
2466 if(reglist&4) assem_debug("r2, ");
2467 if(reglist&8) assem_debug("r3, ");
2468 if(reglist&0x1000) assem_debug("r12");
2469 assem_debug("}\n");
2470 output_w32(0xe89b0000|reglist);
2471}
2472
2473// Write back consts using r14 so we don't disturb the other registers
2474void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2475{
2476 int hr;
2477 for(hr=0;hr<HOST_REGS;hr++) {
2478 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2479 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2480 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2481 int value=constmap[i][hr];
2482 if(value==0) {
2483 emit_zeroreg(HOST_TEMPREG);
2484 }
2485 else {
2486 emit_movimm(value,HOST_TEMPREG);
2487 }
2488 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2489#ifndef FORCE32
57871462 2490 if((i_is32>>i_regmap[hr])&1) {
2491 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2492 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2493 }
24385cae 2494#endif
57871462 2495 }
2496 }
2497 }
2498 }
2499}
2500
2501/* Stubs/epilogue */
2502
2503void literal_pool(int n)
2504{
2505 if(!literalcount) return;
2506 if(n) {
2507 if((int)out-literals[0][0]<4096-n) return;
2508 }
2509 u_int *ptr;
2510 int i;
2511 for(i=0;i<literalcount;i++)
2512 {
2513 ptr=(u_int *)literals[i][0];
2514 u_int offset=(u_int)out-(u_int)ptr-8;
2515 assert(offset<4096);
2516 assert(!(offset&3));
2517 *ptr|=offset;
2518 output_w32(literals[i][1]);
2519 }
2520 literalcount=0;
2521}
2522
2523void literal_pool_jumpover(int n)
2524{
2525 if(!literalcount) return;
2526 if(n) {
2527 if((int)out-literals[0][0]<4096-n) return;
2528 }
2529 int jaddr=(int)out;
2530 emit_jmp(0);
2531 literal_pool(0);
2532 set_jump_target(jaddr,(int)out);
2533}
2534
2535emit_extjump2(int addr, int target, int linker)
2536{
2537 u_char *ptr=(u_char *)addr;
2538 assert((ptr[3]&0x0e)==0xa);
2539 emit_loadlp(target,0);
2540 emit_loadlp(addr,1);
24385cae 2541 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2542 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2543//DEBUG >
2544#ifdef DEBUG_CYCLE_COUNT
2545 emit_readword((int)&last_count,ECX);
2546 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2547 emit_readword((int)&next_interupt,ECX);
2548 emit_writeword(HOST_CCREG,(int)&Count);
2549 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2550 emit_writeword(ECX,(int)&last_count);
2551#endif
2552//DEBUG <
2553 emit_jmp(linker);
2554}
2555
2556emit_extjump(int addr, int target)
2557{
2558 emit_extjump2(addr, target, (int)dyna_linker);
2559}
2560emit_extjump_ds(int addr, int target)
2561{
2562 emit_extjump2(addr, target, (int)dyna_linker_ds);
2563}
2564
2565do_readstub(int n)
2566{
2567 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2568 literal_pool(256);
2569 set_jump_target(stubs[n][1],(int)out);
2570 int type=stubs[n][0];
2571 int i=stubs[n][3];
2572 int rs=stubs[n][4];
2573 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2574 u_int reglist=stubs[n][7];
2575 signed char *i_regmap=i_regs->regmap;
2576 int addr=get_reg(i_regmap,AGEN1+(i&1));
2577 int rth,rt;
2578 int ds;
b9b61529 2579 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2580 rth=get_reg(i_regmap,FTEMP|64);
2581 rt=get_reg(i_regmap,FTEMP);
2582 }else{
2583 rth=get_reg(i_regmap,rt1[i]|64);
2584 rt=get_reg(i_regmap,rt1[i]);
2585 }
2586 assert(rs>=0);
57871462 2587 if(addr<0) addr=rt;
535d208a 2588 if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
57871462 2589 assert(addr>=0);
2590 int ftable=0;
2591 if(type==LOADB_STUB||type==LOADBU_STUB)
2592 ftable=(int)readmemb;
2593 if(type==LOADH_STUB||type==LOADHU_STUB)
2594 ftable=(int)readmemh;
2595 if(type==LOADW_STUB)
2596 ftable=(int)readmem;
24385cae 2597#ifndef FORCE32
57871462 2598 if(type==LOADD_STUB)
2599 ftable=(int)readmemd;
24385cae 2600#endif
2601 assert(ftable!=0);
57871462 2602 emit_writeword(rs,(int)&address);
2603 //emit_pusha();
2604 save_regs(reglist);
2605 ds=i_regs!=&regs[i];
2606 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2607 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2608 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2609 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2610 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2611 emit_shrimm(rs,16,1);
2612 int cc=get_reg(i_regmap,CCREG);
2613 if(cc<0) {
2614 emit_loadreg(CCREG,2);
2615 }
2616 emit_movimm(ftable,0);
2617 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2618 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2619 //emit_readword((int)&last_count,temp);
2620 //emit_add(cc,temp,cc);
2621 //emit_writeword(cc,(int)&Count);
2622 //emit_mov(15,14);
2623 emit_call((int)&indirect_jump_indexed);
2624 //emit_callreg(rs);
2625 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
2626 // We really shouldn't need to update the count here,
2627 // but not doing so causes random crashes...
2628 emit_readword((int)&Count,HOST_TEMPREG);
2629 emit_readword((int)&next_interupt,2);
2630 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2631 emit_writeword(2,(int)&last_count);
2632 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2633 if(cc<0) {
2634 emit_storereg(CCREG,HOST_TEMPREG);
2635 }
2636 //emit_popa();
2637 restore_regs(reglist);
2638 //if((cc=get_reg(regmap,CCREG))>=0) {
2639 // emit_loadreg(CCREG,cc);
2640 //}
f18c0f46 2641 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2642 assert(rt>=0);
2643 if(type==LOADB_STUB)
2644 emit_movsbl((int)&readmem_dword,rt);
2645 if(type==LOADBU_STUB)
2646 emit_movzbl((int)&readmem_dword,rt);
2647 if(type==LOADH_STUB)
2648 emit_movswl((int)&readmem_dword,rt);
2649 if(type==LOADHU_STUB)
2650 emit_movzwl((int)&readmem_dword,rt);
2651 if(type==LOADW_STUB)
2652 emit_readword((int)&readmem_dword,rt);
2653 if(type==LOADD_STUB) {
2654 emit_readword((int)&readmem_dword,rt);
2655 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2656 }
57871462 2657 }
2658 emit_jmp(stubs[n][2]); // return address
2659}
2660
2661inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2662{
2663 int rs=get_reg(regmap,target);
2664 int rth=get_reg(regmap,target|64);
2665 int rt=get_reg(regmap,target);
535d208a 2666 if(rs<0) rs=get_reg(regmap,-1);
57871462 2667 assert(rs>=0);
57871462 2668 int ftable=0;
2669 if(type==LOADB_STUB||type==LOADBU_STUB)
2670 ftable=(int)readmemb;
2671 if(type==LOADH_STUB||type==LOADHU_STUB)
2672 ftable=(int)readmemh;
2673 if(type==LOADW_STUB)
2674 ftable=(int)readmem;
24385cae 2675#ifndef FORCE32
57871462 2676 if(type==LOADD_STUB)
2677 ftable=(int)readmemd;
24385cae 2678#endif
2679 assert(ftable!=0);
fd99c415 2680 if(target==0)
2681 emit_movimm(addr,rs);
57871462 2682 emit_writeword(rs,(int)&address);
2683 //emit_pusha();
2684 save_regs(reglist);
2685 //emit_shrimm(rs,16,1);
2686 int cc=get_reg(regmap,CCREG);
2687 if(cc<0) {
2688 emit_loadreg(CCREG,2);
2689 }
2690 //emit_movimm(ftable,0);
2691 emit_movimm(((u_int *)ftable)[addr>>16],0);
2692 //emit_readword((int)&last_count,12);
2693 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2694 if((signed int)addr>=(signed int)0xC0000000) {
2695 // Pagefault address
2696 int ds=regmap!=regs[i].regmap;
2697 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2698 }
2699 //emit_add(12,2,2);
2700 //emit_writeword(2,(int)&Count);
2701 //emit_call(((u_int *)ftable)[addr>>16]);
2702 emit_call((int)&indirect_jump);
2703 // We really shouldn't need to update the count here,
2704 // but not doing so causes random crashes...
2705 emit_readword((int)&Count,HOST_TEMPREG);
2706 emit_readword((int)&next_interupt,2);
2707 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2708 emit_writeword(2,(int)&last_count);
2709 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2710 if(cc<0) {
2711 emit_storereg(CCREG,HOST_TEMPREG);
2712 }
2713 //emit_popa();
2714 restore_regs(reglist);
fd99c415 2715 if(rt>=0) {
2716 if(type==LOADB_STUB)
2717 emit_movsbl((int)&readmem_dword,rt);
2718 if(type==LOADBU_STUB)
2719 emit_movzbl((int)&readmem_dword,rt);
2720 if(type==LOADH_STUB)
2721 emit_movswl((int)&readmem_dword,rt);
2722 if(type==LOADHU_STUB)
2723 emit_movzwl((int)&readmem_dword,rt);
2724 if(type==LOADW_STUB)
2725 emit_readword((int)&readmem_dword,rt);
2726 if(type==LOADD_STUB) {
2727 emit_readword((int)&readmem_dword,rt);
2728 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2729 }
57871462 2730 }
2731}
2732
2733do_writestub(int n)
2734{
2735 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2736 literal_pool(256);
2737 set_jump_target(stubs[n][1],(int)out);
2738 int type=stubs[n][0];
2739 int i=stubs[n][3];
2740 int rs=stubs[n][4];
2741 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2742 u_int reglist=stubs[n][7];
2743 signed char *i_regmap=i_regs->regmap;
2744 int addr=get_reg(i_regmap,AGEN1+(i&1));
2745 int rth,rt,r;
2746 int ds;
b9b61529 2747 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2748 rth=get_reg(i_regmap,FTEMP|64);
2749 rt=get_reg(i_regmap,r=FTEMP);
2750 }else{
2751 rth=get_reg(i_regmap,rs2[i]|64);
2752 rt=get_reg(i_regmap,r=rs2[i]);
2753 }
2754 assert(rs>=0);
2755 assert(rt>=0);
2756 if(addr<0) addr=get_reg(i_regmap,-1);
2757 assert(addr>=0);
2758 int ftable=0;
2759 if(type==STOREB_STUB)
2760 ftable=(int)writememb;
2761 if(type==STOREH_STUB)
2762 ftable=(int)writememh;
2763 if(type==STOREW_STUB)
2764 ftable=(int)writemem;
24385cae 2765#ifndef FORCE32
57871462 2766 if(type==STORED_STUB)
2767 ftable=(int)writememd;
24385cae 2768#endif
2769 assert(ftable!=0);
57871462 2770 emit_writeword(rs,(int)&address);
2771 //emit_shrimm(rs,16,rs);
2772 //emit_movmem_indexedx4(ftable,rs,rs);
2773 if(type==STOREB_STUB)
2774 emit_writebyte(rt,(int)&byte);
2775 if(type==STOREH_STUB)
2776 emit_writehword(rt,(int)&hword);
2777 if(type==STOREW_STUB)
2778 emit_writeword(rt,(int)&word);
2779 if(type==STORED_STUB) {
3d624f89 2780#ifndef FORCE32
57871462 2781 emit_writeword(rt,(int)&dword);
2782 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 2783#else
2784 printf("STORED_STUB\n");
2785#endif
57871462 2786 }
2787 //emit_pusha();
2788 save_regs(reglist);
2789 ds=i_regs!=&regs[i];
2790 int real_rs=get_reg(i_regmap,rs1[i]);
2791 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2792 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2793 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2794 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2795 emit_shrimm(rs,16,1);
2796 int cc=get_reg(i_regmap,CCREG);
2797 if(cc<0) {
2798 emit_loadreg(CCREG,2);
2799 }
2800 emit_movimm(ftable,0);
2801 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2802 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2803 //emit_readword((int)&last_count,temp);
2804 //emit_addimm(cc,2*stubs[n][5]+2,cc);
2805 //emit_add(cc,temp,cc);
2806 //emit_writeword(cc,(int)&Count);
2807 emit_call((int)&indirect_jump_indexed);
2808 //emit_callreg(rs);
2809 emit_readword((int)&Count,HOST_TEMPREG);
2810 emit_readword((int)&next_interupt,2);
2811 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2812 emit_writeword(2,(int)&last_count);
2813 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2814 if(cc<0) {
2815 emit_storereg(CCREG,HOST_TEMPREG);
2816 }
2817 //emit_popa();
2818 restore_regs(reglist);
2819 //if((cc=get_reg(regmap,CCREG))>=0) {
2820 // emit_loadreg(CCREG,cc);
2821 //}
2822 emit_jmp(stubs[n][2]); // return address
2823}
2824
2825inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2826{
2827 int rs=get_reg(regmap,-1);
2828 int rth=get_reg(regmap,target|64);
2829 int rt=get_reg(regmap,target);
2830 assert(rs>=0);
2831 assert(rt>=0);
2832 int ftable=0;
2833 if(type==STOREB_STUB)
2834 ftable=(int)writememb;
2835 if(type==STOREH_STUB)
2836 ftable=(int)writememh;
2837 if(type==STOREW_STUB)
2838 ftable=(int)writemem;
24385cae 2839#ifndef FORCE32
57871462 2840 if(type==STORED_STUB)
2841 ftable=(int)writememd;
24385cae 2842#endif
2843 assert(ftable!=0);
57871462 2844 emit_writeword(rs,(int)&address);
2845 //emit_shrimm(rs,16,rs);
2846 //emit_movmem_indexedx4(ftable,rs,rs);
2847 if(type==STOREB_STUB)
2848 emit_writebyte(rt,(int)&byte);
2849 if(type==STOREH_STUB)
2850 emit_writehword(rt,(int)&hword);
2851 if(type==STOREW_STUB)
2852 emit_writeword(rt,(int)&word);
2853 if(type==STORED_STUB) {
3d624f89 2854#ifndef FORCE32
57871462 2855 emit_writeword(rt,(int)&dword);
2856 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 2857#else
2858 printf("STORED_STUB\n");
2859#endif
57871462 2860 }
2861 //emit_pusha();
2862 save_regs(reglist);
2863 //emit_shrimm(rs,16,1);
2864 int cc=get_reg(regmap,CCREG);
2865 if(cc<0) {
2866 emit_loadreg(CCREG,2);
2867 }
2868 //emit_movimm(ftable,0);
2869 emit_movimm(((u_int *)ftable)[addr>>16],0);
2870 //emit_readword((int)&last_count,12);
2871 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2872 if((signed int)addr>=(signed int)0xC0000000) {
2873 // Pagefault address
2874 int ds=regmap!=regs[i].regmap;
2875 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2876 }
2877 //emit_add(12,2,2);
2878 //emit_writeword(2,(int)&Count);
2879 //emit_call(((u_int *)ftable)[addr>>16]);
2880 emit_call((int)&indirect_jump);
2881 emit_readword((int)&Count,HOST_TEMPREG);
2882 emit_readword((int)&next_interupt,2);
2883 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2884 emit_writeword(2,(int)&last_count);
2885 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2886 if(cc<0) {
2887 emit_storereg(CCREG,HOST_TEMPREG);
2888 }
2889 //emit_popa();
2890 restore_regs(reglist);
2891}
2892
2893do_unalignedwritestub(int n)
2894{
b7918751 2895 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
2896 literal_pool(256);
57871462 2897 set_jump_target(stubs[n][1],(int)out);
b7918751 2898
2899 int i=stubs[n][3];
2900 struct regstat *i_regs=(struct regstat *)stubs[n][4];
2901 int addr=stubs[n][5];
2902 u_int reglist=stubs[n][7];
2903 signed char *i_regmap=i_regs->regmap;
2904 int temp2=get_reg(i_regmap,FTEMP);
2905 int rt;
2906 int ds, real_rs;
2907 rt=get_reg(i_regmap,rs2[i]);
2908 assert(rt>=0);
2909 assert(addr>=0);
2910 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2911 reglist|=(1<<addr);
2912 reglist&=~(1<<temp2);
2913
2914 emit_andimm(addr,0xfffffffc,temp2);
2915 emit_writeword(temp2,(int)&address);
2916
2917 save_regs(reglist);
2918 ds=i_regs!=&regs[i];
2919 real_rs=get_reg(i_regmap,rs1[i]);
2920 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2921 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2922 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2923 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2924 emit_shrimm(addr,16,1);
2925 int cc=get_reg(i_regmap,CCREG);
2926 if(cc<0) {
2927 emit_loadreg(CCREG,2);
2928 }
2929 emit_movimm((u_int)readmem,0);
2930 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2931 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3); // XXX: can be rm'd?
2932 emit_call((int)&indirect_jump_indexed);
2933 restore_regs(reglist);
2934
2935 emit_readword((int)&readmem_dword,temp2);
2936 int temp=addr; //hmh
2937 emit_shlimm(addr,3,temp);
2938 emit_andimm(temp,24,temp);
2939#ifdef BIG_ENDIAN_MIPS
2940 if (opcode[i]==0x2e) // SWR
2941#else
2942 if (opcode[i]==0x2a) // SWL
2943#endif
2944 emit_xorimm(temp,24,temp);
2945 emit_movimm(-1,HOST_TEMPREG);
55439448 2946 if (opcode[i]==0x2a) { // SWL
b7918751 2947 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2948 emit_orrshr(rt,temp,temp2);
2949 }else{
2950 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2951 emit_orrshl(rt,temp,temp2);
2952 }
2953 emit_readword((int)&address,addr);
2954 emit_writeword(temp2,(int)&word);
2955 //save_regs(reglist); // don't need to, no state changes
2956 emit_shrimm(addr,16,1);
2957 emit_movimm((u_int)writemem,0);
2958 //emit_call((int)&indirect_jump_indexed);
2959 emit_mov(15,14);
2960 emit_readword_dualindexedx4(0,1,15);
2961 emit_readword((int)&Count,HOST_TEMPREG);
2962 emit_readword((int)&next_interupt,2);
2963 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2964 emit_writeword(2,(int)&last_count);
2965 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2966 if(cc<0) {
2967 emit_storereg(CCREG,HOST_TEMPREG);
2968 }
2969 restore_regs(reglist);
57871462 2970 emit_jmp(stubs[n][2]); // return address
2971}
2972
2973void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
2974{
2975 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
2976}
2977
2978do_invstub(int n)
2979{
2980 literal_pool(20);
2981 u_int reglist=stubs[n][3];
2982 set_jump_target(stubs[n][1],(int)out);
2983 save_regs(reglist);
2984 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2985 emit_call((int)&invalidate_addr);
2986 restore_regs(reglist);
2987 emit_jmp(stubs[n][2]); // return address
2988}
2989
2990int do_dirty_stub(int i)
2991{
2992 assem_debug("do_dirty_stub %x\n",start+i*4);
ac545b3a 2993 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
2994 #ifdef PCSX
2995 addr=(u_int)source;
2996 #endif
57871462 2997 // Careful about the code output here, verify_dirty needs to parse it.
2998 #ifdef ARMv5_ONLY
ac545b3a 2999 emit_loadlp(addr,1);
57871462 3000 emit_loadlp((int)copy,2);
3001 emit_loadlp(slen*4,3);
3002 #else
ac545b3a 3003 emit_movw(addr&0x0000FFFF,1);
57871462 3004 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3005 emit_movt(addr&0xFFFF0000,1);
57871462 3006 emit_movt(((u_int)copy)&0xFFFF0000,2);
3007 emit_movw(slen*4,3);
3008 #endif
3009 emit_movimm(start+i*4,0);
3010 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3011 int entry=(int)out;
3012 load_regs_entry(i);
3013 if(entry==(int)out) entry=instr_addr[i];
3014 emit_jmp(instr_addr[i]);
3015 return entry;
3016}
3017
3018void do_dirty_stub_ds()
3019{
3020 // Careful about the code output here, verify_dirty needs to parse it.
3021 #ifdef ARMv5_ONLY
3022 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3023 emit_loadlp((int)copy,2);
3024 emit_loadlp(slen*4,3);
3025 #else
3026 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3027 emit_movw(((u_int)copy)&0x0000FFFF,2);
3028 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3029 emit_movt(((u_int)copy)&0xFFFF0000,2);
3030 emit_movw(slen*4,3);
3031 #endif
3032 emit_movimm(start+1,0);
3033 emit_call((int)&verify_code_ds);
3034}
3035
3036do_cop1stub(int n)
3037{
3038 literal_pool(256);
3039 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3040 set_jump_target(stubs[n][1],(int)out);
3041 int i=stubs[n][3];
3d624f89 3042// int rs=stubs[n][4];
57871462 3043 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3044 int ds=stubs[n][6];
3045 if(!ds) {
3046 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3047 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3048 }
3049 //else {printf("fp exception in delay slot\n");}
3050 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3051 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3052 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3053 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3054 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3055}
3056
3057/* TLB */
3058
3059int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3060{
3061 if(c) {
3062 if((signed int)addr>=(signed int)0xC0000000) {
3063 // address_generation already loaded the const
3064 emit_readword_dualindexedx4(FP,map,map);
3065 }
3066 else
3067 return -1; // No mapping
3068 }
3069 else {
3070 assert(s!=map);
3071 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3072 emit_addsr12(map,s,map);
3073 // Schedule this while we wait on the load
3074 //if(x) emit_xorimm(s,x,ar);
3075 if(shift>=0) emit_shlimm(s,3,shift);
3076 if(~a) emit_andimm(s,a,ar);
3077 emit_readword_dualindexedx4(FP,map,map);
3078 }
3079 return map;
3080}
3081int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3082{
3083 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3084 emit_test(map,map);
3085 *jaddr=(int)out;
3086 emit_js(0);
3087 }
3088 return map;
3089}
3090
3091int gen_tlb_addr_r(int ar, int map) {
3092 if(map>=0) {
3093 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3094 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3095 }
3096}
3097
3098int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3099{
3100 if(c) {
3101 if(addr<0x80800000||addr>=0xC0000000) {
3102 // address_generation already loaded the const
3103 emit_readword_dualindexedx4(FP,map,map);
3104 }
3105 else
3106 return -1; // No mapping
3107 }
3108 else {
3109 assert(s!=map);
3110 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3111 emit_addsr12(map,s,map);
3112 // Schedule this while we wait on the load
3113 //if(x) emit_xorimm(s,x,ar);
3114 emit_readword_dualindexedx4(FP,map,map);
3115 }
3116 return map;
3117}
3118int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3119{
3120 if(!c||addr<0x80800000||addr>=0xC0000000) {
3121 emit_testimm(map,0x40000000);
3122 *jaddr=(int)out;
3123 emit_jne(0);
3124 }
3125}
3126
3127int gen_tlb_addr_w(int ar, int map) {
3128 if(map>=0) {
3129 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3130 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3131 }
3132}
3133
3134// Generate the address of the memory_map entry, relative to dynarec_local
3135generate_map_const(u_int addr,int reg) {
3136 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3137 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3138}
3139
3140/* Special assem */
3141
3142void shift_assemble_arm(int i,struct regstat *i_regs)
3143{
3144 if(rt1[i]) {
3145 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3146 {
3147 signed char s,t,shift;
3148 t=get_reg(i_regs->regmap,rt1[i]);
3149 s=get_reg(i_regs->regmap,rs1[i]);
3150 shift=get_reg(i_regs->regmap,rs2[i]);
3151 if(t>=0){
3152 if(rs1[i]==0)
3153 {
3154 emit_zeroreg(t);
3155 }
3156 else if(rs2[i]==0)
3157 {
3158 assert(s>=0);
3159 if(s!=t) emit_mov(s,t);
3160 }
3161 else
3162 {
3163 emit_andimm(shift,31,HOST_TEMPREG);
3164 if(opcode2[i]==4) // SLLV
3165 {
3166 emit_shl(s,HOST_TEMPREG,t);
3167 }
3168 if(opcode2[i]==6) // SRLV
3169 {
3170 emit_shr(s,HOST_TEMPREG,t);
3171 }
3172 if(opcode2[i]==7) // SRAV
3173 {
3174 emit_sar(s,HOST_TEMPREG,t);
3175 }
3176 }
3177 }
3178 } else { // DSLLV/DSRLV/DSRAV
3179 signed char sh,sl,th,tl,shift;
3180 th=get_reg(i_regs->regmap,rt1[i]|64);
3181 tl=get_reg(i_regs->regmap,rt1[i]);
3182 sh=get_reg(i_regs->regmap,rs1[i]|64);
3183 sl=get_reg(i_regs->regmap,rs1[i]);
3184 shift=get_reg(i_regs->regmap,rs2[i]);
3185 if(tl>=0){
3186 if(rs1[i]==0)
3187 {
3188 emit_zeroreg(tl);
3189 if(th>=0) emit_zeroreg(th);
3190 }
3191 else if(rs2[i]==0)
3192 {
3193 assert(sl>=0);
3194 if(sl!=tl) emit_mov(sl,tl);
3195 if(th>=0&&sh!=th) emit_mov(sh,th);
3196 }
3197 else
3198 {
3199 // FIXME: What if shift==tl ?
3200 assert(shift!=tl);
3201 int temp=get_reg(i_regs->regmap,-1);
3202 int real_th=th;
3203 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3204 assert(sl>=0);
3205 assert(sh>=0);
3206 emit_andimm(shift,31,HOST_TEMPREG);
3207 if(opcode2[i]==0x14) // DSLLV
3208 {
3209 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3210 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3211 emit_orrshr(sl,HOST_TEMPREG,th);
3212 emit_andimm(shift,31,HOST_TEMPREG);
3213 emit_testimm(shift,32);
3214 emit_shl(sl,HOST_TEMPREG,tl);
3215 if(th>=0) emit_cmovne_reg(tl,th);
3216 emit_cmovne_imm(0,tl);
3217 }
3218 if(opcode2[i]==0x16) // DSRLV
3219 {
3220 assert(th>=0);
3221 emit_shr(sl,HOST_TEMPREG,tl);
3222 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3223 emit_orrshl(sh,HOST_TEMPREG,tl);
3224 emit_andimm(shift,31,HOST_TEMPREG);
3225 emit_testimm(shift,32);
3226 emit_shr(sh,HOST_TEMPREG,th);
3227 emit_cmovne_reg(th,tl);
3228 if(real_th>=0) emit_cmovne_imm(0,th);
3229 }
3230 if(opcode2[i]==0x17) // DSRAV
3231 {
3232 assert(th>=0);
3233 emit_shr(sl,HOST_TEMPREG,tl);
3234 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3235 if(real_th>=0) {
3236 assert(temp>=0);
3237 emit_sarimm(th,31,temp);
3238 }
3239 emit_orrshl(sh,HOST_TEMPREG,tl);
3240 emit_andimm(shift,31,HOST_TEMPREG);
3241 emit_testimm(shift,32);
3242 emit_sar(sh,HOST_TEMPREG,th);
3243 emit_cmovne_reg(th,tl);
3244 if(real_th>=0) emit_cmovne_reg(temp,th);
3245 }
3246 }
3247 }
3248 }
3249 }
3250}
3251#define shift_assemble shift_assemble_arm
3252
3253void loadlr_assemble_arm(int i,struct regstat *i_regs)
3254{
3255 int s,th,tl,temp,temp2,addr,map=-1;
3256 int offset;
3257 int jaddr=0;
3258 int memtarget,c=0;
3259 u_int hr,reglist=0;
3260 th=get_reg(i_regs->regmap,rt1[i]|64);
3261 tl=get_reg(i_regs->regmap,rt1[i]);
3262 s=get_reg(i_regs->regmap,rs1[i]);
3263 temp=get_reg(i_regs->regmap,-1);
3264 temp2=get_reg(i_regs->regmap,FTEMP);
3265 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3266 assert(addr<0);
3267 offset=imm[i];
3268 for(hr=0;hr<HOST_REGS;hr++) {
3269 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3270 }
3271 reglist|=1<<temp;
3272 if(offset||s<0||c) addr=temp2;
3273 else addr=s;
3274 if(s>=0) {
3275 c=(i_regs->wasconst>>s)&1;
4cb76aa4 3276 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
57871462 3277 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3278 }
535d208a 3279 if(!using_tlb) {
3280 if(!c) {
3281 #ifdef RAM_OFFSET
3282 map=get_reg(i_regs->regmap,ROREG);
3283 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3284 #endif
3285 emit_shlimm(addr,3,temp);
3286 if (opcode[i]==0x22||opcode[i]==0x26) {
3287 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
57871462 3288 }else{
535d208a 3289 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
57871462 3290 }
535d208a 3291 emit_cmpimm(addr,RAM_SIZE);
3292 jaddr=(int)out;
3293 emit_jno(0);
3294 }
3295 else {
3296 if (opcode[i]==0x22||opcode[i]==0x26) {
3297 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3298 }else{
3299 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 3300 }
57871462 3301 }
535d208a 3302 }else{ // using tlb
3303 int a;
3304 if(c) {
3305 a=-1;
3306 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3307 a=0xFFFFFFFC; // LWL/LWR
3308 }else{
3309 a=0xFFFFFFF8; // LDL/LDR
3310 }
3311 map=get_reg(i_regs->regmap,TLREG);
3312 assert(map>=0);
3313 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3314 if(c) {
3315 if (opcode[i]==0x22||opcode[i]==0x26) {
3316 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3317 }else{
3318 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 3319 }
535d208a 3320 }
3321 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3322 }
3323 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3324 if(!c||memtarget) {
3325 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3326 emit_readword_indexed_tlb(0,temp2,map,temp2);
3327 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3328 }
3329 else
3330 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3331 if(rt1[i]) {
3332 assert(tl>=0);
57871462 3333 emit_andimm(temp,24,temp);
2002a1db 3334#ifdef BIG_ENDIAN_MIPS
3335 if (opcode[i]==0x26) // LWR
3336#else
3337 if (opcode[i]==0x22) // LWL
3338#endif
3339 emit_xorimm(temp,24,temp);
57871462 3340 emit_movimm(-1,HOST_TEMPREG);
3341 if (opcode[i]==0x26) {
3342 emit_shr(temp2,temp,temp2);
3343 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3344 }else{
3345 emit_shl(temp2,temp,temp2);
3346 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3347 }
3348 emit_or(temp2,tl,tl);
57871462 3349 }
535d208a 3350 //emit_storereg(rt1[i],tl); // DEBUG
3351 }
3352 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3353 // FIXME: little endian
3354 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3355 if(!c||memtarget) {
3356 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3357 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3358 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3359 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3360 }
3361 else
3362 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3363 if(rt1[i]) {
3364 assert(th>=0);
3365 assert(tl>=0);
57871462 3366 emit_testimm(temp,32);
3367 emit_andimm(temp,24,temp);
3368 if (opcode[i]==0x1A) { // LDL
3369 emit_rsbimm(temp,32,HOST_TEMPREG);
3370 emit_shl(temp2h,temp,temp2h);
3371 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3372 emit_movimm(-1,HOST_TEMPREG);
3373 emit_shl(temp2,temp,temp2);
3374 emit_cmove_reg(temp2h,th);
3375 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3376 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3377 emit_orreq(temp2,tl,tl);
3378 emit_orrne(temp2,th,th);
3379 }
3380 if (opcode[i]==0x1B) { // LDR
3381 emit_xorimm(temp,24,temp);
3382 emit_rsbimm(temp,32,HOST_TEMPREG);
3383 emit_shr(temp2,temp,temp2);
3384 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3385 emit_movimm(-1,HOST_TEMPREG);
3386 emit_shr(temp2h,temp,temp2h);
3387 emit_cmovne_reg(temp2,tl);
3388 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3389 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3390 emit_orrne(temp2h,th,th);
3391 emit_orreq(temp2h,tl,tl);
3392 }
3393 }
3394 }
3395}
3396#define loadlr_assemble loadlr_assemble_arm
3397
3398void cop0_assemble(int i,struct regstat *i_regs)
3399{
3400 if(opcode2[i]==0) // MFC0
3401 {
3402 signed char t=get_reg(i_regs->regmap,rt1[i]);
3403 char copr=(source[i]>>11)&0x1f;
3404 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 3405 if(t>=0&&rt1[i]!=0) {
7139f3c8 3406#ifdef MUPEN64
57871462 3407 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3408 emit_movimm((source[i]>>11)&0x1f,1);
3409 emit_writeword(0,(int)&PC);
3410 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3411 if(copr==9) {
3412 emit_readword((int)&last_count,ECX);
3413 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3414 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3415 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3416 emit_writeword(HOST_CCREG,(int)&Count);
3417 }
3418 emit_call((int)MFC0);
3419 emit_readword((int)&readmem_dword,t);
7139f3c8 3420#else
3421 emit_readword((int)&reg_cop0+copr*4,t);
3422#endif
57871462 3423 }
3424 }
3425 else if(opcode2[i]==4) // MTC0
3426 {
3427 signed char s=get_reg(i_regs->regmap,rs1[i]);
3428 char copr=(source[i]>>11)&0x1f;
3429 assert(s>=0);
3430 emit_writeword(s,(int)&readmem_dword);
3431 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
fca1aef2 3432#ifdef MUPEN64
57871462 3433 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3434 emit_movimm((source[i]>>11)&0x1f,1);
3435 emit_writeword(0,(int)&PC);
3436 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
7139f3c8 3437#endif
3438 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3439 emit_readword((int)&last_count,ECX);
3440 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3441 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3442 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3443 emit_writeword(HOST_CCREG,(int)&Count);
3444 }
3445 // What a mess. The status register (12) can enable interrupts,
3446 // so needs a special case to handle a pending interrupt.
3447 // The interrupt must be taken immediately, because a subsequent
3448 // instruction might disable interrupts again.
7139f3c8 3449 if(copr==12||copr==13) {
fca1aef2 3450#ifdef PCSX
3451 if (is_delayslot) {
3452 // burn cycles to cause cc_interrupt, which will
3453 // reschedule next_interupt. Relies on CCREG from above.
3454 assem_debug("MTC0 DS %d\n", copr);
3455 emit_writeword(HOST_CCREG,(int)&last_count);
3456 emit_movimm(0,HOST_CCREG);
3457 emit_storereg(CCREG,HOST_CCREG);
3458 emit_movimm(copr,0);
3459 emit_call((int)pcsx_mtc0_ds);
3460 return;
3461 }
3462#endif
57871462 3463 emit_movimm(start+i*4+4,0);
3464 emit_movimm(0,1);
3465 emit_writeword(0,(int)&pcaddr);
3466 emit_writeword(1,(int)&pending_exception);
3467 }
3468 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3469 //else
fca1aef2 3470#ifdef PCSX
3471 emit_movimm(copr,0);
3472 emit_call((int)pcsx_mtc0);
3473#else
57871462 3474 emit_call((int)MTC0);
fca1aef2 3475#endif
7139f3c8 3476 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3477 emit_readword((int)&Count,HOST_CCREG);
3478 emit_readword((int)&next_interupt,ECX);
3479 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3480 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3481 emit_writeword(ECX,(int)&last_count);
3482 emit_storereg(CCREG,HOST_CCREG);
3483 }
7139f3c8 3484 if(copr==12||copr==13) {
57871462 3485 assert(!is_delayslot);
3486 emit_readword((int)&pending_exception,14);
3487 }
3488 emit_loadreg(rs1[i],s);
3489 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3490 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
7139f3c8 3491 if(copr==12||copr==13) {
57871462 3492 emit_test(14,14);
3493 emit_jne((int)&do_interrupt);
3494 }
3495 cop1_usable=0;
3496 }
3497 else
3498 {
3499 assert(opcode2[i]==0x10);
3d624f89 3500#ifndef DISABLE_TLB
57871462 3501 if((source[i]&0x3f)==0x01) // TLBR
3502 emit_call((int)TLBR);
3503 if((source[i]&0x3f)==0x02) // TLBWI
3504 emit_call((int)TLBWI_new);
3505 if((source[i]&0x3f)==0x06) { // TLBWR
3506 // The TLB entry written by TLBWR is dependent on the count,
3507 // so update the cycle count
3508 emit_readword((int)&last_count,ECX);
3509 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3510 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3511 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3512 emit_writeword(HOST_CCREG,(int)&Count);
3513 emit_call((int)TLBWR_new);
3514 }
3515 if((source[i]&0x3f)==0x08) // TLBP
3516 emit_call((int)TLBP);
3d624f89 3517#endif
576bbd8f 3518#ifdef PCSX
3519 if((source[i]&0x3f)==0x10) // RFE
3520 {
3521 emit_readword((int)&Status,0);
3522 emit_andimm(0,0x3c,1);
3523 emit_andimm(0,~0xf,0);
3524 emit_orrshr_imm(1,2,0);
3525 emit_writeword(0,(int)&Status);
3526 }
3527#else
57871462 3528 if((source[i]&0x3f)==0x18) // ERET
3529 {
3530 int count=ccadj[i];
3531 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3532 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
3533 emit_jmp((int)jump_eret);
3534 }
576bbd8f 3535#endif
57871462 3536 }
3537}
3538
b9b61529 3539static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3540{
3541 switch (copr) {
3542 case 1:
3543 case 3:
3544 case 5:
3545 case 8:
3546 case 9:
3547 case 10:
3548 case 11:
3549 emit_readword((int)&reg_cop2d[copr],tl);
3550 emit_signextend16(tl,tl);
3551 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3552 break;
3553 case 7:
3554 case 16:
3555 case 17:
3556 case 18:
3557 case 19:
3558 emit_readword((int)&reg_cop2d[copr],tl);
3559 emit_andimm(tl,0xffff,tl);
3560 emit_writeword(tl,(int)&reg_cop2d[copr]);
3561 break;
3562 case 15:
3563 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3564 emit_writeword(tl,(int)&reg_cop2d[copr]);
3565 break;
3566 case 28:
b9b61529 3567 case 29:
3568 emit_readword((int)&reg_cop2d[9],temp);
3569 emit_testimm(temp,0x8000); // do we need this?
3570 emit_andimm(temp,0xf80,temp);
3571 emit_andne_imm(temp,0,temp);
f70d384d 3572 emit_shrimm(temp,7,tl);
b9b61529 3573 emit_readword((int)&reg_cop2d[10],temp);
3574 emit_testimm(temp,0x8000);
3575 emit_andimm(temp,0xf80,temp);
3576 emit_andne_imm(temp,0,temp);
f70d384d 3577 emit_orrshr_imm(temp,2,tl);
b9b61529 3578 emit_readword((int)&reg_cop2d[11],temp);
3579 emit_testimm(temp,0x8000);
3580 emit_andimm(temp,0xf80,temp);
3581 emit_andne_imm(temp,0,temp);
f70d384d 3582 emit_orrshl_imm(temp,3,tl);
b9b61529 3583 emit_writeword(tl,(int)&reg_cop2d[copr]);
3584 break;
3585 default:
3586 emit_readword((int)&reg_cop2d[copr],tl);
3587 break;
3588 }
3589}
3590
3591static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3592{
3593 switch (copr) {
3594 case 15:
3595 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3596 emit_writeword(sl,(int)&reg_cop2d[copr]);
3597 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3598 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3599 emit_writeword(sl,(int)&reg_cop2d[14]);
3600 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3601 break;
3602 case 28:
3603 emit_andimm(sl,0x001f,temp);
f70d384d 3604 emit_shlimm(temp,7,temp);
b9b61529 3605 emit_writeword(temp,(int)&reg_cop2d[9]);
3606 emit_andimm(sl,0x03e0,temp);
f70d384d 3607 emit_shlimm(temp,2,temp);
b9b61529 3608 emit_writeword(temp,(int)&reg_cop2d[10]);
3609 emit_andimm(sl,0x7c00,temp);
f70d384d 3610 emit_shrimm(temp,3,temp);
b9b61529 3611 emit_writeword(temp,(int)&reg_cop2d[11]);
3612 emit_writeword(sl,(int)&reg_cop2d[28]);
3613 break;
3614 case 30:
3615 emit_movs(sl,temp);
3616 emit_mvnmi(temp,temp);
3617 emit_clz(temp,temp);
3618 emit_writeword(sl,(int)&reg_cop2d[30]);
3619 emit_writeword(temp,(int)&reg_cop2d[31]);
3620 break;
b9b61529 3621 case 31:
3622 break;
3623 default:
3624 emit_writeword(sl,(int)&reg_cop2d[copr]);
3625 break;
3626 }
3627}
3628
3629void cop2_assemble(int i,struct regstat *i_regs)
3630{
3631 u_int copr=(source[i]>>11)&0x1f;
3632 signed char temp=get_reg(i_regs->regmap,-1);
3633 if (opcode2[i]==0) { // MFC2
3634 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3635 if(tl>=0&&rt1[i]!=0)
b9b61529 3636 cop2_get_dreg(copr,tl,temp);
3637 }
3638 else if (opcode2[i]==4) { // MTC2
3639 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3640 cop2_put_dreg(copr,sl,temp);
3641 }
3642 else if (opcode2[i]==2) // CFC2
3643 {
3644 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3645 if(tl>=0&&rt1[i]!=0)
b9b61529 3646 emit_readword((int)&reg_cop2c[copr],tl);
3647 }
3648 else if (opcode2[i]==6) // CTC2
3649 {
3650 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3651 switch(copr) {
3652 case 4:
3653 case 12:
3654 case 20:
3655 case 26:
3656 case 27:
3657 case 29:
3658 case 30:
3659 emit_signextend16(sl,temp);
3660 break;
3661 case 31:
3662 //value = value & 0x7ffff000;
3663 //if (value & 0x7f87e000) value |= 0x80000000;
3664 emit_shrimm(sl,12,temp);
3665 emit_shlimm(temp,12,temp);
3666 emit_testimm(temp,0x7f000000);
3667 emit_testeqimm(temp,0x00870000);
3668 emit_testeqimm(temp,0x0000e000);
3669 emit_orrne_imm(temp,0x80000000,temp);
3670 break;
3671 default:
3672 temp=sl;
3673 break;
3674 }
3675 emit_writeword(temp,(int)&reg_cop2c[copr]);
3676 assert(sl>=0);
3677 }
3678}
3679
3680void c2op_assemble(int i,struct regstat *i_regs)
3681{
3682 signed char temp=get_reg(i_regs->regmap,-1);
3683 u_int c2op=source[i]&0x3f;
3684 u_int hr,reglist=0;
3685 for(hr=0;hr<HOST_REGS;hr++) {
3686 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3687 }
3688 if(i==0||itype[i-1]!=C2OP)
3689 save_regs(reglist);
3690
3691 if (gte_handlers[c2op]!=NULL) {
3692 int cc=get_reg(i_regs->regmap,CCREG);
3693 emit_movimm(source[i],temp); // opcode
3694 if (cc>=0&&gte_cycletab[c2op])
3695 emit_addimm(cc,gte_cycletab[c2op]/2,cc); // XXX: cound just adjust ccadj?
3696 emit_writeword(temp,(int)&psxRegs.code);
3697 emit_call((int)gte_handlers[c2op]);
3698 }
3699
3700 if(i>=slen-1||itype[i+1]!=C2OP)
3701 restore_regs(reglist);
3702}
3703
3704void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 3705{
3706 // XXX: should just just do the exception instead
3707 if(!cop1_usable) {
3708 int jaddr=(int)out;
3709 emit_jmp(0);
3710 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3711 cop1_usable=1;
3712 }
3713}
3714
57871462 3715void cop1_assemble(int i,struct regstat *i_regs)
3716{
3d624f89 3717#ifndef DISABLE_COP1
57871462 3718 // Check cop1 unusable
3719 if(!cop1_usable) {
3720 signed char rs=get_reg(i_regs->regmap,CSREG);
3721 assert(rs>=0);
3722 emit_testimm(rs,0x20000000);
3723 int jaddr=(int)out;
3724 emit_jeq(0);
3725 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3726 cop1_usable=1;
3727 }
3728 if (opcode2[i]==0) { // MFC1
3729 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3730 if(tl>=0) {
3731 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
3732 emit_readword_indexed(0,tl,tl);
3733 }
3734 }
3735 else if (opcode2[i]==1) { // DMFC1
3736 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3737 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
3738 if(tl>=0) {
3739 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
3740 if(th>=0) emit_readword_indexed(4,tl,th);
3741 emit_readword_indexed(0,tl,tl);
3742 }
3743 }
3744 else if (opcode2[i]==4) { // MTC1
3745 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3746 signed char temp=get_reg(i_regs->regmap,-1);
3747 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3748 emit_writeword_indexed(sl,0,temp);
3749 }
3750 else if (opcode2[i]==5) { // DMTC1
3751 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3752 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
3753 signed char temp=get_reg(i_regs->regmap,-1);
3754 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3755 emit_writeword_indexed(sh,4,temp);
3756 emit_writeword_indexed(sl,0,temp);
3757 }
3758 else if (opcode2[i]==2) // CFC1
3759 {
3760 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3761 if(tl>=0) {
3762 u_int copr=(source[i]>>11)&0x1f;
3763 if(copr==0) emit_readword((int)&FCR0,tl);
3764 if(copr==31) emit_readword((int)&FCR31,tl);
3765 }
3766 }
3767 else if (opcode2[i]==6) // CTC1
3768 {
3769 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3770 u_int copr=(source[i]>>11)&0x1f;
3771 assert(sl>=0);
3772 if(copr==31)
3773 {
3774 emit_writeword(sl,(int)&FCR31);
3775 // Set the rounding mode
3776 //FIXME
3777 //char temp=get_reg(i_regs->regmap,-1);
3778 //emit_andimm(sl,3,temp);
3779 //emit_fldcw_indexed((int)&rounding_modes,temp);
3780 }
3781 }
3d624f89 3782#else
3783 cop1_unusable(i, i_regs);
3784#endif
57871462 3785}
3786
3787void fconv_assemble_arm(int i,struct regstat *i_regs)
3788{
3d624f89 3789#ifndef DISABLE_COP1
57871462 3790 signed char temp=get_reg(i_regs->regmap,-1);
3791 assert(temp>=0);
3792 // Check cop1 unusable
3793 if(!cop1_usable) {
3794 signed char rs=get_reg(i_regs->regmap,CSREG);
3795 assert(rs>=0);
3796 emit_testimm(rs,0x20000000);
3797 int jaddr=(int)out;
3798 emit_jeq(0);
3799 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3800 cop1_usable=1;
3801 }
3802
3803 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
3804 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
3805 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3806 emit_flds(temp,15);
3807 emit_ftosizs(15,15); // float->int, truncate
3808 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3809 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3810 emit_fsts(15,temp);
3811 return;
3812 }
3813 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
3814 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3815 emit_vldr(temp,7);
3816 emit_ftosizd(7,13); // double->int, truncate
3817 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3818 emit_fsts(13,temp);
3819 return;
3820 }
3821
3822 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
3823 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3824 emit_flds(temp,13);
3825 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3826 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3827 emit_fsitos(13,15);
3828 emit_fsts(15,temp);
3829 return;
3830 }
3831 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
3832 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3833 emit_flds(temp,13);
3834 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3835 emit_fsitod(13,7);
3836 emit_vstr(7,temp);
3837 return;
3838 }
3839
3840 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
3841 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3842 emit_flds(temp,13);
3843 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3844 emit_fcvtds(13,7);
3845 emit_vstr(7,temp);
3846 return;
3847 }
3848 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
3849 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3850 emit_vldr(temp,7);
3851 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3852 emit_fcvtsd(7,13);
3853 emit_fsts(13,temp);
3854 return;
3855 }
3856 #endif
3857
3858 // C emulation code
3859
3860 u_int hr,reglist=0;
3861 for(hr=0;hr<HOST_REGS;hr++) {
3862 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3863 }
3864 save_regs(reglist);
3865
3866 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
3867 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3868 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3869 emit_call((int)cvt_s_w);
3870 }
3871 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
3872 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3873 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3874 emit_call((int)cvt_d_w);
3875 }
3876 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
3877 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3878 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3879 emit_call((int)cvt_s_l);
3880 }
3881 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
3882 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3883 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3884 emit_call((int)cvt_d_l);
3885 }
3886
3887 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
3888 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3889 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3890 emit_call((int)cvt_d_s);
3891 }
3892 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
3893 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3894 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3895 emit_call((int)cvt_w_s);
3896 }
3897 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
3898 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3899 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3900 emit_call((int)cvt_l_s);
3901 }
3902
3903 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
3904 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3905 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3906 emit_call((int)cvt_s_d);
3907 }
3908 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
3909 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3910 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3911 emit_call((int)cvt_w_d);
3912 }
3913 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
3914 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3915 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3916 emit_call((int)cvt_l_d);
3917 }
3918
3919 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
3920 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3921 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3922 emit_call((int)round_l_s);
3923 }
3924 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
3925 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3926 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3927 emit_call((int)trunc_l_s);
3928 }
3929 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
3930 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3931 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3932 emit_call((int)ceil_l_s);
3933 }
3934 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
3935 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3936 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3937 emit_call((int)floor_l_s);
3938 }
3939 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
3940 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3941 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3942 emit_call((int)round_w_s);
3943 }
3944 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
3945 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3946 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3947 emit_call((int)trunc_w_s);
3948 }
3949 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
3950 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3951 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3952 emit_call((int)ceil_w_s);
3953 }
3954 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
3955 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3956 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3957 emit_call((int)floor_w_s);
3958 }
3959
3960 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
3961 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3962 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3963 emit_call((int)round_l_d);
3964 }
3965 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
3966 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3967 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3968 emit_call((int)trunc_l_d);
3969 }
3970 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
3971 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3972 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3973 emit_call((int)ceil_l_d);
3974 }
3975 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
3976 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3977 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3978 emit_call((int)floor_l_d);
3979 }
3980 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
3981 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3982 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3983 emit_call((int)round_w_d);
3984 }
3985 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
3986 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3987 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3988 emit_call((int)trunc_w_d);
3989 }
3990 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
3991 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3992 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3993 emit_call((int)ceil_w_d);
3994 }
3995 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
3996 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3997 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3998 emit_call((int)floor_w_d);
3999 }
4000
4001 restore_regs(reglist);
3d624f89 4002#else
4003 cop1_unusable(i, i_regs);
4004#endif
57871462 4005}
4006#define fconv_assemble fconv_assemble_arm
4007
4008void fcomp_assemble(int i,struct regstat *i_regs)
4009{
3d624f89 4010#ifndef DISABLE_COP1
57871462 4011 signed char fs=get_reg(i_regs->regmap,FSREG);
4012 signed char temp=get_reg(i_regs->regmap,-1);
4013 assert(temp>=0);
4014 // Check cop1 unusable
4015 if(!cop1_usable) {
4016 signed char cs=get_reg(i_regs->regmap,CSREG);
4017 assert(cs>=0);
4018 emit_testimm(cs,0x20000000);
4019 int jaddr=(int)out;
4020 emit_jeq(0);
4021 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4022 cop1_usable=1;
4023 }
4024
4025 if((source[i]&0x3f)==0x30) {
4026 emit_andimm(fs,~0x800000,fs);
4027 return;
4028 }
4029
4030 if((source[i]&0x3e)==0x38) {
4031 // sf/ngle - these should throw exceptions for NaNs
4032 emit_andimm(fs,~0x800000,fs);
4033 return;
4034 }
4035
4036 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4037 if(opcode2[i]==0x10) {
4038 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4039 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4040 emit_orimm(fs,0x800000,fs);
4041 emit_flds(temp,14);
4042 emit_flds(HOST_TEMPREG,15);
4043 emit_fcmps(14,15);
4044 emit_fmstat();
4045 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
4046 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
4047 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
4048 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
4049 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
4050 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
4051 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
4052 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
4053 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
4054 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
4055 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
4056 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
4057 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
4058 return;
4059 }
4060 if(opcode2[i]==0x11) {
4061 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4062 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4063 emit_orimm(fs,0x800000,fs);
4064 emit_vldr(temp,6);
4065 emit_vldr(HOST_TEMPREG,7);
4066 emit_fcmpd(6,7);
4067 emit_fmstat();
4068 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
4069 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
4070 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
4071 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
4072 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
4073 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
4074 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
4075 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
4076 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
4077 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
4078 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
4079 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
4080 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
4081 return;
4082 }
4083 #endif
4084
4085 // C only
4086
4087 u_int hr,reglist=0;
4088 for(hr=0;hr<HOST_REGS;hr++) {
4089 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4090 }
4091 reglist&=~(1<<fs);
4092 save_regs(reglist);
4093 if(opcode2[i]==0x10) {
4094 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4095 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4096 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
4097 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
4098 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
4099 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
4100 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
4101 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
4102 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
4103 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
4104 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
4105 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
4106 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
4107 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
4108 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
4109 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
4110 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
4111 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
4112 }
4113 if(opcode2[i]==0x11) {
4114 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4115 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4116 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
4117 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
4118 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
4119 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
4120 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
4121 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
4122 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
4123 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
4124 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
4125 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
4126 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
4127 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
4128 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
4129 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
4130 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
4131 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
4132 }
4133 restore_regs(reglist);
4134 emit_loadreg(FSREG,fs);
3d624f89 4135#else
4136 cop1_unusable(i, i_regs);
4137#endif
57871462 4138}
4139
4140void float_assemble(int i,struct regstat *i_regs)
4141{
3d624f89 4142#ifndef DISABLE_COP1
57871462 4143 signed char temp=get_reg(i_regs->regmap,-1);
4144 assert(temp>=0);
4145 // Check cop1 unusable
4146 if(!cop1_usable) {
4147 signed char cs=get_reg(i_regs->regmap,CSREG);
4148 assert(cs>=0);
4149 emit_testimm(cs,0x20000000);
4150 int jaddr=(int)out;
4151 emit_jeq(0);
4152 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4153 cop1_usable=1;
4154 }
4155
4156 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4157 if((source[i]&0x3f)==6) // mov
4158 {
4159 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4160 if(opcode2[i]==0x10) {
4161 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4162 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
4163 emit_readword_indexed(0,temp,temp);
4164 emit_writeword_indexed(temp,0,HOST_TEMPREG);
4165 }
4166 if(opcode2[i]==0x11) {
4167 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4168 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
4169 emit_vldr(temp,7);
4170 emit_vstr(7,HOST_TEMPREG);
4171 }
4172 }
4173 return;
4174 }
4175
4176 if((source[i]&0x3f)>3)
4177 {
4178 if(opcode2[i]==0x10) {
4179 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4180 emit_flds(temp,15);
4181 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4182 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4183 }
4184 if((source[i]&0x3f)==4) // sqrt
4185 emit_fsqrts(15,15);
4186 if((source[i]&0x3f)==5) // abs
4187 emit_fabss(15,15);
4188 if((source[i]&0x3f)==7) // neg
4189 emit_fnegs(15,15);
4190 emit_fsts(15,temp);
4191 }
4192 if(opcode2[i]==0x11) {
4193 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4194 emit_vldr(temp,7);
4195 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4196 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4197 }
4198 if((source[i]&0x3f)==4) // sqrt
4199 emit_fsqrtd(7,7);
4200 if((source[i]&0x3f)==5) // abs
4201 emit_fabsd(7,7);
4202 if((source[i]&0x3f)==7) // neg
4203 emit_fnegd(7,7);
4204 emit_vstr(7,temp);
4205 }
4206 return;
4207 }
4208 if((source[i]&0x3f)<4)
4209 {
4210 if(opcode2[i]==0x10) {
4211 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4212 }
4213 if(opcode2[i]==0x11) {
4214 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4215 }
4216 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
4217 if(opcode2[i]==0x10) {
4218 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4219 emit_flds(temp,15);
4220 emit_flds(HOST_TEMPREG,13);
4221 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4222 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4223 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4224 }
4225 }
4226 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
4227 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
4228 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
4229 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
4230 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4231 emit_fsts(15,HOST_TEMPREG);
4232 }else{
4233 emit_fsts(15,temp);
4234 }
4235 }
4236 else if(opcode2[i]==0x11) {
4237 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4238 emit_vldr(temp,7);
4239 emit_vldr(HOST_TEMPREG,6);
4240 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4241 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4242 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4243 }
4244 }
4245 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
4246 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
4247 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
4248 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
4249 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4250 emit_vstr(7,HOST_TEMPREG);
4251 }else{
4252 emit_vstr(7,temp);
4253 }
4254 }
4255 }
4256 else {
4257 if(opcode2[i]==0x10) {
4258 emit_flds(temp,15);
4259 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4260 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4261 }
4262 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
4263 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
4264 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
4265 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
4266 emit_fsts(15,temp);
4267 }
4268 else if(opcode2[i]==0x11) {
4269 emit_vldr(temp,7);
4270 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4271 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4272 }
4273 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
4274 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
4275 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
4276 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
4277 emit_vstr(7,temp);
4278 }
4279 }
4280 return;
4281 }
4282 #endif
4283
4284 u_int hr,reglist=0;
4285 for(hr=0;hr<HOST_REGS;hr++) {
4286 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4287 }
4288 if(opcode2[i]==0x10) { // Single precision
4289 save_regs(reglist);
4290 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4291 if((source[i]&0x3f)<4) {
4292 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4293 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
4294 }else{
4295 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4296 }
4297 switch(source[i]&0x3f)
4298 {
4299 case 0x00: emit_call((int)add_s);break;
4300 case 0x01: emit_call((int)sub_s);break;
4301 case 0x02: emit_call((int)mul_s);break;
4302 case 0x03: emit_call((int)div_s);break;
4303 case 0x04: emit_call((int)sqrt_s);break;
4304 case 0x05: emit_call((int)abs_s);break;
4305 case 0x06: emit_call((int)mov_s);break;
4306 case 0x07: emit_call((int)neg_s);break;
4307 }
4308 restore_regs(reglist);
4309 }
4310 if(opcode2[i]==0x11) { // Double precision
4311 save_regs(reglist);
4312 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4313 if((source[i]&0x3f)<4) {
4314 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4315 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
4316 }else{
4317 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4318 }
4319 switch(source[i]&0x3f)
4320 {
4321 case 0x00: emit_call((int)add_d);break;
4322 case 0x01: emit_call((int)sub_d);break;
4323 case 0x02: emit_call((int)mul_d);break;
4324 case 0x03: emit_call((int)div_d);break;
4325 case 0x04: emit_call((int)sqrt_d);break;
4326 case 0x05: emit_call((int)abs_d);break;
4327 case 0x06: emit_call((int)mov_d);break;
4328 case 0x07: emit_call((int)neg_d);break;
4329 }
4330 restore_regs(reglist);
4331 }
3d624f89 4332#else
4333 cop1_unusable(i, i_regs);
4334#endif
57871462 4335}
4336
4337void multdiv_assemble_arm(int i,struct regstat *i_regs)
4338{
4339 // case 0x18: MULT
4340 // case 0x19: MULTU
4341 // case 0x1A: DIV
4342 // case 0x1B: DIVU
4343 // case 0x1C: DMULT
4344 // case 0x1D: DMULTU
4345 // case 0x1E: DDIV
4346 // case 0x1F: DDIVU
4347 if(rs1[i]&&rs2[i])
4348 {
4349 if((opcode2[i]&4)==0) // 32-bit
4350 {
4351 if(opcode2[i]==0x18) // MULT
4352 {
4353 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4354 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4355 signed char hi=get_reg(i_regs->regmap,HIREG);
4356 signed char lo=get_reg(i_regs->regmap,LOREG);
4357 assert(m1>=0);
4358 assert(m2>=0);
4359 assert(hi>=0);
4360 assert(lo>=0);
4361 emit_smull(m1,m2,hi,lo);
4362 }
4363 if(opcode2[i]==0x19) // MULTU
4364 {
4365 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4366 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4367 signed char hi=get_reg(i_regs->regmap,HIREG);
4368 signed char lo=get_reg(i_regs->regmap,LOREG);
4369 assert(m1>=0);
4370 assert(m2>=0);
4371 assert(hi>=0);
4372 assert(lo>=0);
4373 emit_umull(m1,m2,hi,lo);
4374 }
4375 if(opcode2[i]==0x1A) // DIV
4376 {
4377 signed char d1=get_reg(i_regs->regmap,rs1[i]);
4378 signed char d2=get_reg(i_regs->regmap,rs2[i]);
4379 assert(d1>=0);
4380 assert(d2>=0);
4381 signed char quotient=get_reg(i_regs->regmap,LOREG);
4382 signed char remainder=get_reg(i_regs->regmap,HIREG);
4383 assert(quotient>=0);
4384 assert(remainder>=0);
4385 emit_movs(d1,remainder);
4386 emit_negmi(remainder,remainder);
4387 emit_movs(d2,HOST_TEMPREG);
4388 emit_jeq((int)out+52); // Division by zero
4389 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
4390 emit_clz(HOST_TEMPREG,quotient);
4391 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
4392 emit_orimm(quotient,1<<31,quotient);
4393 emit_shr(quotient,quotient,quotient);
4394 emit_cmp(remainder,HOST_TEMPREG);
4395 emit_subcs(remainder,HOST_TEMPREG,remainder);
4396 emit_adcs(quotient,quotient,quotient);
4397 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
4398 emit_jcc((int)out-16); // -4
4399 emit_teq(d1,d2);
4400 emit_negmi(quotient,quotient);
4401 emit_test(d1,d1);
4402 emit_negmi(remainder,remainder);
4403 }
4404 if(opcode2[i]==0x1B) // DIVU
4405 {
4406 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
4407 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
4408 assert(d1>=0);
4409 assert(d2>=0);
4410 signed char quotient=get_reg(i_regs->regmap,LOREG);
4411 signed char remainder=get_reg(i_regs->regmap,HIREG);
4412 assert(quotient>=0);
4413 assert(remainder>=0);
4414 emit_test(d2,d2);
4415 emit_jeq((int)out+44); // Division by zero
4416 emit_clz(d2,HOST_TEMPREG);
4417 emit_movimm(1<<31,quotient);
4418 emit_shl(d2,HOST_TEMPREG,d2);
4419 emit_mov(d1,remainder);
4420 emit_shr(quotient,HOST_TEMPREG,quotient);
4421 emit_cmp(remainder,d2);
4422 emit_subcs(remainder,d2,remainder);
4423 emit_adcs(quotient,quotient,quotient);
4424 emit_shrcc_imm(d2,1,d2);
4425 emit_jcc((int)out-16); // -4
4426 }
4427 }
4428 else // 64-bit
4429 {
4430 if(opcode2[i]==0x1C) // DMULT
4431 {
4432 assert(opcode2[i]!=0x1C);
4433 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4434 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4435 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4436 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4437 assert(m1h>=0);
4438 assert(m2h>=0);
4439 assert(m1l>=0);
4440 assert(m2l>=0);
4441 emit_pushreg(m2h);
4442 emit_pushreg(m2l);
4443 emit_pushreg(m1h);
4444 emit_pushreg(m1l);
4445 emit_call((int)&mult64);
4446 emit_popreg(m1l);
4447 emit_popreg(m1h);
4448 emit_popreg(m2l);
4449 emit_popreg(m2h);
4450 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4451 signed char hil=get_reg(i_regs->regmap,HIREG);
4452 if(hih>=0) emit_loadreg(HIREG|64,hih);
4453 if(hil>=0) emit_loadreg(HIREG,hil);
4454 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4455 signed char lol=get_reg(i_regs->regmap,LOREG);
4456 if(loh>=0) emit_loadreg(LOREG|64,loh);
4457 if(lol>=0) emit_loadreg(LOREG,lol);
4458 }
4459 if(opcode2[i]==0x1D) // DMULTU
4460 {
4461 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4462 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4463 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4464 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4465 assert(m1h>=0);
4466 assert(m2h>=0);
4467 assert(m1l>=0);
4468 assert(m2l>=0);
4469 save_regs(0x100f);
4470 if(m1l!=0) emit_mov(m1l,0);
4471 if(m1h==0) emit_readword((int)&dynarec_local,1);
4472 else if(m1h>1) emit_mov(m1h,1);
4473 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
4474 else if(m2l>2) emit_mov(m2l,2);
4475 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
4476 else if(m2h>3) emit_mov(m2h,3);
4477 emit_call((int)&multu64);
4478 restore_regs(0x100f);
4479 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4480 signed char hil=get_reg(i_regs->regmap,HIREG);
4481 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4482 signed char lol=get_reg(i_regs->regmap,LOREG);
4483 /*signed char temp=get_reg(i_regs->regmap,-1);
4484 signed char rh=get_reg(i_regs->regmap,HIREG|64);
4485 signed char rl=get_reg(i_regs->regmap,HIREG);
4486 assert(m1h>=0);
4487 assert(m2h>=0);
4488 assert(m1l>=0);
4489 assert(m2l>=0);
4490 assert(temp>=0);
4491 //emit_mov(m1l,EAX);
4492 //emit_mul(m2l);
4493 emit_umull(rl,rh,m1l,m2l);
4494 emit_storereg(LOREG,rl);
4495 emit_mov(rh,temp);
4496 //emit_mov(m1h,EAX);
4497 //emit_mul(m2l);
4498 emit_umull(rl,rh,m1h,m2l);
4499 emit_adds(rl,temp,temp);
4500 emit_adcimm(rh,0,rh);
4501 emit_storereg(HIREG,rh);
4502 //emit_mov(m2h,EAX);
4503 //emit_mul(m1l);
4504 emit_umull(rl,rh,m1l,m2h);
4505 emit_adds(rl,temp,temp);
4506 emit_adcimm(rh,0,rh);
4507 emit_storereg(LOREG|64,temp);
4508 emit_mov(rh,temp);
4509 //emit_mov(m2h,EAX);
4510 //emit_mul(m1h);
4511 emit_umull(rl,rh,m1h,m2h);
4512 emit_adds(rl,temp,rl);
4513 emit_loadreg(HIREG,temp);
4514 emit_adcimm(rh,0,rh);
4515 emit_adds(rl,temp,rl);
4516 emit_adcimm(rh,0,rh);
4517 // DEBUG
4518 /*
4519 emit_pushreg(m2h);
4520 emit_pushreg(m2l);
4521 emit_pushreg(m1h);
4522 emit_pushreg(m1l);
4523 emit_call((int)&multu64);
4524 emit_popreg(m1l);
4525 emit_popreg(m1h);
4526 emit_popreg(m2l);
4527 emit_popreg(m2h);
4528 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4529 signed char hil=get_reg(i_regs->regmap,HIREG);
4530 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
4531 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
4532 */
4533 // Shouldn't be necessary
4534 //char loh=get_reg(i_regs->regmap,LOREG|64);
4535 //char lol=get_reg(i_regs->regmap,LOREG);
4536 //if(loh>=0) emit_loadreg(LOREG|64,loh);
4537 //if(lol>=0) emit_loadreg(LOREG,lol);
4538 }
4539 if(opcode2[i]==0x1E) // DDIV
4540 {
4541 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4542 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4543 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4544 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4545 assert(d1h>=0);
4546 assert(d2h>=0);
4547 assert(d1l>=0);
4548 assert(d2l>=0);
4549 save_regs(0x100f);
4550 if(d1l!=0) emit_mov(d1l,0);
4551 if(d1h==0) emit_readword((int)&dynarec_local,1);
4552 else if(d1h>1) emit_mov(d1h,1);
4553 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4554 else if(d2l>2) emit_mov(d2l,2);
4555 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4556 else if(d2h>3) emit_mov(d2h,3);
4557 emit_call((int)&div64);
4558 restore_regs(0x100f);
4559 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4560 signed char hil=get_reg(i_regs->regmap,HIREG);
4561 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4562 signed char lol=get_reg(i_regs->regmap,LOREG);
4563 if(hih>=0) emit_loadreg(HIREG|64,hih);
4564 if(hil>=0) emit_loadreg(HIREG,hil);
4565 if(loh>=0) emit_loadreg(LOREG|64,loh);
4566 if(lol>=0) emit_loadreg(LOREG,lol);
4567 }
4568 if(opcode2[i]==0x1F) // DDIVU
4569 {
4570 //u_int hr,reglist=0;
4571 //for(hr=0;hr<HOST_REGS;hr++) {
4572 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
4573 //}
4574 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4575 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4576 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4577 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4578 assert(d1h>=0);
4579 assert(d2h>=0);
4580 assert(d1l>=0);
4581 assert(d2l>=0);
4582 save_regs(0x100f);
4583 if(d1l!=0) emit_mov(d1l,0);
4584 if(d1h==0) emit_readword((int)&dynarec_local,1);
4585 else if(d1h>1) emit_mov(d1h,1);
4586 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4587 else if(d2l>2) emit_mov(d2l,2);
4588 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4589 else if(d2h>3) emit_mov(d2h,3);
4590 emit_call((int)&divu64);
4591 restore_regs(0x100f);
4592 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4593 signed char hil=get_reg(i_regs->regmap,HIREG);
4594 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4595 signed char lol=get_reg(i_regs->regmap,LOREG);
4596 if(hih>=0) emit_loadreg(HIREG|64,hih);
4597 if(hil>=0) emit_loadreg(HIREG,hil);
4598 if(loh>=0) emit_loadreg(LOREG|64,loh);
4599 if(lol>=0) emit_loadreg(LOREG,lol);
4600 }
4601 }
4602 }
4603 else
4604 {
4605 // Multiply by zero is zero.
4606 // MIPS does not have a divide by zero exception.
4607 // The result is undefined, we return zero.
4608 signed char hr=get_reg(i_regs->regmap,HIREG);
4609 signed char lr=get_reg(i_regs->regmap,LOREG);
4610 if(hr>=0) emit_zeroreg(hr);
4611 if(lr>=0) emit_zeroreg(lr);
4612 }
4613}
4614#define multdiv_assemble multdiv_assemble_arm
4615
4616void do_preload_rhash(int r) {
4617 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
4618 // register. On ARM the hash can be done with a single instruction (below)
4619}
4620
4621void do_preload_rhtbl(int ht) {
4622 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
4623}
4624
4625void do_rhash(int rs,int rh) {
4626 emit_andimm(rs,0xf8,rh);
4627}
4628
4629void do_miniht_load(int ht,int rh) {
4630 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
4631 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
4632}
4633
4634void do_miniht_jump(int rs,int rh,int ht) {
4635 emit_cmp(rh,rs);
4636 emit_ldreq_indexed(ht,4,15);
4637 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4638 emit_mov(rs,7);
4639 emit_jmp(jump_vaddr_reg[7]);
4640 #else
4641 emit_jmp(jump_vaddr_reg[rs]);
4642 #endif
4643}
4644
4645void do_miniht_insert(u_int return_address,int rt,int temp) {
4646 #ifdef ARMv5_ONLY
4647 emit_movimm(return_address,rt); // PC into link register
4648 add_to_linker((int)out,return_address,1);
4649 emit_pcreladdr(temp);
4650 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4651 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4652 #else
4653 emit_movw(return_address&0x0000FFFF,rt);
4654 add_to_linker((int)out,return_address,1);
4655 emit_pcreladdr(temp);
4656 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4657 emit_movt(return_address&0xFFFF0000,rt);
4658 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4659 #endif
4660}
4661
4662// Sign-extend to 64 bits and write out upper half of a register
4663// This is useful where we have a 32-bit value in a register, and want to
4664// keep it in a 32-bit register, but can't guarantee that it won't be read
4665// as a 64-bit value later.
4666void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
4667{
24385cae 4668#ifndef FORCE32
57871462 4669 if(is32_pre==is32) return;
4670 int hr,reg;
4671 for(hr=0;hr<HOST_REGS;hr++) {
4672 if(hr!=EXCLUDE_REG) {
4673 //if(pre[hr]==entry[hr]) {
4674 if((reg=pre[hr])>=0) {
4675 if((dirty>>hr)&1) {
4676 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
4677 emit_sarimm(hr,31,HOST_TEMPREG);
4678 emit_storereg(reg|64,HOST_TEMPREG);
4679 }
4680 }
4681 }
4682 //}
4683 }
4684 }
24385cae 4685#endif
57871462 4686}
4687
4688void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
4689{
4690 //if(dirty_pre==dirty) return;
4691 int hr,reg,new_hr;
4692 for(hr=0;hr<HOST_REGS;hr++) {
4693 if(hr!=EXCLUDE_REG) {
4694 reg=pre[hr];
4695 if(((~u)>>(reg&63))&1) {
4696 if(reg==entry[hr]||(reg>0&&entry[hr]<0)) {
4697 if(((dirty_pre&~dirty)>>hr)&1) {
4698 if(reg>0&&reg<34) {
4699 emit_storereg(reg,hr);
4700 if( ((is32_pre&~uu)>>reg)&1 ) {
4701 emit_sarimm(hr,31,HOST_TEMPREG);
4702 emit_storereg(reg|64,HOST_TEMPREG);
4703 }
4704 }
4705 else if(reg>=64) {
4706 emit_storereg(reg,hr);
4707 }
4708 }
4709 }
4710 else // Check if register moved to a different register
4711 if((new_hr=get_reg(entry,reg))>=0) {
4712 if((dirty_pre>>hr)&(~dirty>>new_hr)&1) {
4713 if(reg>0&&reg<34) {
4714 emit_storereg(reg,hr);
4715 if( ((is32_pre&~uu)>>reg)&1 ) {
4716 emit_sarimm(hr,31,HOST_TEMPREG);
4717 emit_storereg(reg|64,HOST_TEMPREG);
4718 }
4719 }
4720 else if(reg>=64) {
4721 emit_storereg(reg,hr);
4722 }
4723 }
4724 }
4725 }
4726 }
4727 }
4728}
4729
4730
4731/* using strd could possibly help but you'd have to allocate registers in pairs
4732void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
4733{
4734 int hr;
4735 int wrote=-1;
4736 for(hr=HOST_REGS-1;hr>=0;hr--) {
4737 if(hr!=EXCLUDE_REG) {
4738 if(pre[hr]!=entry[hr]) {
4739 if(pre[hr]>=0) {
4740 if((dirty>>hr)&1) {
4741 if(get_reg(entry,pre[hr])<0) {
4742 if(pre[hr]<64) {
4743 if(!((u>>pre[hr])&1)) {
4744 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4745 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4746 emit_sarimm(hr,31,hr+1);
4747 emit_strdreg(pre[hr],hr);
4748 }
4749 else
4750 emit_storereg(pre[hr],hr);
4751 }else{
4752 emit_storereg(pre[hr],hr);
4753 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4754 emit_sarimm(hr,31,hr);
4755 emit_storereg(pre[hr]|64,hr);
4756 }
4757 }
4758 }
4759 }else{
4760 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4761 emit_storereg(pre[hr],hr);
4762 }
4763 }
4764 wrote=hr;
4765 }
4766 }
4767 }
4768 }
4769 }
4770 }
4771 for(hr=0;hr<HOST_REGS;hr++) {
4772 if(hr!=EXCLUDE_REG) {
4773 if(pre[hr]!=entry[hr]) {
4774 if(pre[hr]>=0) {
4775 int nr;
4776 if((nr=get_reg(entry,pre[hr]))>=0) {
4777 emit_mov(hr,nr);
4778 }
4779 }
4780 }
4781 }
4782 }
4783}
4784#define wb_invalidate wb_invalidate_arm
4785*/
4786
4787// CPU-architecture-specific initialization
4788void arch_init() {
3d624f89 4789#ifndef DISABLE_COP1
57871462 4790 rounding_modes[0]=0x0<<22; // round
4791 rounding_modes[1]=0x3<<22; // trunc
4792 rounding_modes[2]=0x1<<22; // ceil
4793 rounding_modes[3]=0x2<<22; // floor
3d624f89 4794#endif
57871462 4795}
b9b61529 4796
4797// vim:shiftwidth=2:expandtab