drc: merge Ari64's patch: 10_unnecessary_invalidate
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_arm.c *
3 * Copyright (C) 2009-2010 Ari64 *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21extern int cycle_count;
22extern int last_count;
23extern int pcaddr;
24extern int pending_exception;
25extern int branch_target;
26extern uint64_t readmem_dword;
3d624f89 27#ifdef MUPEN64
57871462 28extern precomp_instr fake_pc;
3d624f89 29#endif
57871462 30extern void *dynarec_local;
31extern u_int memory_map[1048576];
32extern u_int mini_ht[32][2];
33extern u_int rounding_modes[4];
34
35void indirect_jump_indexed();
36void indirect_jump();
37void do_interrupt();
38void jump_vaddr_r0();
39void jump_vaddr_r1();
40void jump_vaddr_r2();
41void jump_vaddr_r3();
42void jump_vaddr_r4();
43void jump_vaddr_r5();
44void jump_vaddr_r6();
45void jump_vaddr_r7();
46void jump_vaddr_r8();
47void jump_vaddr_r9();
48void jump_vaddr_r10();
49void jump_vaddr_r12();
50
51const u_int jump_vaddr_reg[16] = {
52 (int)jump_vaddr_r0,
53 (int)jump_vaddr_r1,
54 (int)jump_vaddr_r2,
55 (int)jump_vaddr_r3,
56 (int)jump_vaddr_r4,
57 (int)jump_vaddr_r5,
58 (int)jump_vaddr_r6,
59 (int)jump_vaddr_r7,
60 (int)jump_vaddr_r8,
61 (int)jump_vaddr_r9,
62 (int)jump_vaddr_r10,
63 0,
64 (int)jump_vaddr_r12,
65 0,
66 0,
67 0};
68
69#include "fpu.h"
70
dd3a91a1 71unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
72
57871462 73/* Linker */
74
75void set_jump_target(int addr,u_int target)
76{
77 u_char *ptr=(u_char *)addr;
78 u_int *ptr2=(u_int *)ptr;
79 if(ptr[3]==0xe2) {
80 assert((target-(u_int)ptr2-8)<1024);
81 assert((addr&3)==0);
82 assert((target&3)==0);
83 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
84 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
85 }
86 else if(ptr[3]==0x72) {
87 // generated by emit_jno_unlikely
88 if((target-(u_int)ptr2-8)<1024) {
89 assert((addr&3)==0);
90 assert((target&3)==0);
91 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
92 }
93 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
94 assert((addr&3)==0);
95 assert((target&3)==0);
96 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
97 }
98 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
99 }
100 else {
101 assert((ptr[3]&0x0e)==0xa);
102 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
103 }
104}
105
106// This optionally copies the instruction from the target of the branch into
107// the space before the branch. Works, but the difference in speed is
108// usually insignificant.
109void set_jump_target_fillslot(int addr,u_int target,int copy)
110{
111 u_char *ptr=(u_char *)addr;
112 u_int *ptr2=(u_int *)ptr;
113 assert(!copy||ptr2[-1]==0xe28dd000);
114 if(ptr[3]==0xe2) {
115 assert(!copy);
116 assert((target-(u_int)ptr2-8)<4096);
117 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
118 }
119 else {
120 assert((ptr[3]&0x0e)==0xa);
121 u_int target_insn=*(u_int *)target;
122 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
123 copy=0;
124 }
125 if((target_insn&0x0c100000)==0x04100000) { // Load
126 copy=0;
127 }
128 if(target_insn&0x08000000) {
129 copy=0;
130 }
131 if(copy) {
132 ptr2[-1]=target_insn;
133 target+=4;
134 }
135 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
136 }
137}
138
139/* Literal pool */
140add_literal(int addr,int val)
141{
142 literals[literalcount][0]=addr;
143 literals[literalcount][1]=val;
144 literalcount++;
145}
146
f76eeef9 147void *kill_pointer(void *stub)
57871462 148{
149 int *ptr=(int *)(stub+4);
150 assert((*ptr&0x0ff00000)==0x05900000);
151 u_int offset=*ptr&0xfff;
152 int **l_ptr=(void *)ptr+offset+8;
153 int *i_ptr=*l_ptr;
154 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 155 return i_ptr;
57871462 156}
157
158int get_pointer(void *stub)
159{
160 //printf("get_pointer(%x)\n",(int)stub);
161 int *ptr=(int *)(stub+4);
162 assert((*ptr&0x0ff00000)==0x05900000);
163 u_int offset=*ptr&0xfff;
164 int **l_ptr=(void *)ptr+offset+8;
165 int *i_ptr=*l_ptr;
166 assert((*i_ptr&0x0f000000)==0x0a000000);
167 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
168}
169
170// Find the "clean" entry point from a "dirty" entry point
171// by skipping past the call to verify_code
172u_int get_clean_addr(int addr)
173{
174 int *ptr=(int *)addr;
175 #ifdef ARMv5_ONLY
176 ptr+=4;
177 #else
178 ptr+=6;
179 #endif
180 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
181 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
182 ptr++;
183 if((*ptr&0xFF000000)==0xea000000) {
184 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
185 }
186 return (u_int)ptr;
187}
188
189int verify_dirty(int addr)
190{
191 u_int *ptr=(u_int *)addr;
192 #ifdef ARMv5_ONLY
193 // get from literal pool
194 assert((*ptr&0xFFF00000)==0xe5900000);
195 u_int offset=*ptr&0xfff;
196 u_int *l_ptr=(void *)ptr+offset+8;
197 u_int source=l_ptr[0];
198 u_int copy=l_ptr[1];
199 u_int len=l_ptr[2];
200 ptr+=4;
201 #else
202 // ARMv7 movw/movt
203 assert((*ptr&0xFFF00000)==0xe3000000);
204 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
205 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
206 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
207 ptr+=6;
208 #endif
209 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
210 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 211 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 212 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
213 unsigned int page=source>>12;
214 unsigned int map_value=memory_map[page];
215 if(map_value>=0x80000000) return 0;
216 while(page<((source+len-1)>>12)) {
217 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
218 }
219 source = source+(map_value<<2);
220 }
221 //printf("verify_dirty: %x %x %x\n",source,copy,len);
222 return !memcmp((void *)source,(void *)copy,len);
223}
224
225// This doesn't necessarily find all clean entry points, just
226// guarantees that it's not dirty
227int isclean(int addr)
228{
229 #ifdef ARMv5_ONLY
230 int *ptr=((u_int *)addr)+4;
231 #else
232 int *ptr=((u_int *)addr)+6;
233 #endif
234 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
235 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
236 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
237 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
238 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
239 return 1;
240}
241
242void get_bounds(int addr,u_int *start,u_int *end)
243{
244 u_int *ptr=(u_int *)addr;
245 #ifdef ARMv5_ONLY
246 // get from literal pool
247 assert((*ptr&0xFFF00000)==0xe5900000);
248 u_int offset=*ptr&0xfff;
249 u_int *l_ptr=(void *)ptr+offset+8;
250 u_int source=l_ptr[0];
251 //u_int copy=l_ptr[1];
252 u_int len=l_ptr[2];
253 ptr+=4;
254 #else
255 // ARMv7 movw/movt
256 assert((*ptr&0xFFF00000)==0xe3000000);
257 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
258 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
259 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
260 ptr+=6;
261 #endif
262 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
263 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 264 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 265 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
266 if(memory_map[source>>12]>=0x80000000) source = 0;
267 else source = source+(memory_map[source>>12]<<2);
268 }
269 *start=source;
270 *end=source+len;
271}
272
273/* Register allocation */
274
275// Note: registers are allocated clean (unmodified state)
276// if you intend to modify the register, you must call dirty_reg().
277void alloc_reg(struct regstat *cur,int i,signed char reg)
278{
279 int r,hr;
280 int preferred_reg = (reg&7);
281 if(reg==CCREG) preferred_reg=HOST_CCREG;
282 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
283
284 // Don't allocate unused registers
285 if((cur->u>>reg)&1) return;
286
287 // see if it's already allocated
288 for(hr=0;hr<HOST_REGS;hr++)
289 {
290 if(cur->regmap[hr]==reg) return;
291 }
292
293 // Keep the same mapping if the register was already allocated in a loop
294 preferred_reg = loop_reg(i,reg,preferred_reg);
295
296 // Try to allocate the preferred register
297 if(cur->regmap[preferred_reg]==-1) {
298 cur->regmap[preferred_reg]=reg;
299 cur->dirty&=~(1<<preferred_reg);
300 cur->isconst&=~(1<<preferred_reg);
301 return;
302 }
303 r=cur->regmap[preferred_reg];
304 if(r<64&&((cur->u>>r)&1)) {
305 cur->regmap[preferred_reg]=reg;
306 cur->dirty&=~(1<<preferred_reg);
307 cur->isconst&=~(1<<preferred_reg);
308 return;
309 }
310 if(r>=64&&((cur->uu>>(r&63))&1)) {
311 cur->regmap[preferred_reg]=reg;
312 cur->dirty&=~(1<<preferred_reg);
313 cur->isconst&=~(1<<preferred_reg);
314 return;
315 }
316
317 // Clear any unneeded registers
318 // We try to keep the mapping consistent, if possible, because it
319 // makes branches easier (especially loops). So we try to allocate
320 // first (see above) before removing old mappings. If this is not
321 // possible then go ahead and clear out the registers that are no
322 // longer needed.
323 for(hr=0;hr<HOST_REGS;hr++)
324 {
325 r=cur->regmap[hr];
326 if(r>=0) {
327 if(r<64) {
328 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
329 }
330 else
331 {
332 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
333 }
334 }
335 }
336 // Try to allocate any available register, but prefer
337 // registers that have not been used recently.
338 if(i>0) {
339 for(hr=0;hr<HOST_REGS;hr++) {
340 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
341 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
342 cur->regmap[hr]=reg;
343 cur->dirty&=~(1<<hr);
344 cur->isconst&=~(1<<hr);
345 return;
346 }
347 }
348 }
349 }
350 // Try to allocate any available register
351 for(hr=0;hr<HOST_REGS;hr++) {
352 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
353 cur->regmap[hr]=reg;
354 cur->dirty&=~(1<<hr);
355 cur->isconst&=~(1<<hr);
356 return;
357 }
358 }
359
360 // Ok, now we have to evict someone
361 // Pick a register we hopefully won't need soon
362 u_char hsn[MAXREG+1];
363 memset(hsn,10,sizeof(hsn));
364 int j;
365 lsn(hsn,i,&preferred_reg);
366 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
367 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
368 if(i>0) {
369 // Don't evict the cycle count at entry points, otherwise the entry
370 // stub will have to write it.
371 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
372 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
373 for(j=10;j>=3;j--)
374 {
375 // Alloc preferred register if available
376 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
377 for(hr=0;hr<HOST_REGS;hr++) {
378 // Evict both parts of a 64-bit register
379 if((cur->regmap[hr]&63)==r) {
380 cur->regmap[hr]=-1;
381 cur->dirty&=~(1<<hr);
382 cur->isconst&=~(1<<hr);
383 }
384 }
385 cur->regmap[preferred_reg]=reg;
386 return;
387 }
388 for(r=1;r<=MAXREG;r++)
389 {
390 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
391 for(hr=0;hr<HOST_REGS;hr++) {
392 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
393 if(cur->regmap[hr]==r+64) {
394 cur->regmap[hr]=reg;
395 cur->dirty&=~(1<<hr);
396 cur->isconst&=~(1<<hr);
397 return;
398 }
399 }
400 }
401 for(hr=0;hr<HOST_REGS;hr++) {
402 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
403 if(cur->regmap[hr]==r) {
404 cur->regmap[hr]=reg;
405 cur->dirty&=~(1<<hr);
406 cur->isconst&=~(1<<hr);
407 return;
408 }
409 }
410 }
411 }
412 }
413 }
414 }
415 for(j=10;j>=0;j--)
416 {
417 for(r=1;r<=MAXREG;r++)
418 {
419 if(hsn[r]==j) {
420 for(hr=0;hr<HOST_REGS;hr++) {
421 if(cur->regmap[hr]==r+64) {
422 cur->regmap[hr]=reg;
423 cur->dirty&=~(1<<hr);
424 cur->isconst&=~(1<<hr);
425 return;
426 }
427 }
428 for(hr=0;hr<HOST_REGS;hr++) {
429 if(cur->regmap[hr]==r) {
430 cur->regmap[hr]=reg;
431 cur->dirty&=~(1<<hr);
432 cur->isconst&=~(1<<hr);
433 return;
434 }
435 }
436 }
437 }
438 }
439 printf("This shouldn't happen (alloc_reg)");exit(1);
440}
441
442void alloc_reg64(struct regstat *cur,int i,signed char reg)
443{
444 int preferred_reg = 8+(reg&1);
445 int r,hr;
446
447 // allocate the lower 32 bits
448 alloc_reg(cur,i,reg);
449
450 // Don't allocate unused registers
451 if((cur->uu>>reg)&1) return;
452
453 // see if the upper half is already allocated
454 for(hr=0;hr<HOST_REGS;hr++)
455 {
456 if(cur->regmap[hr]==reg+64) return;
457 }
458
459 // Keep the same mapping if the register was already allocated in a loop
460 preferred_reg = loop_reg(i,reg,preferred_reg);
461
462 // Try to allocate the preferred register
463 if(cur->regmap[preferred_reg]==-1) {
464 cur->regmap[preferred_reg]=reg|64;
465 cur->dirty&=~(1<<preferred_reg);
466 cur->isconst&=~(1<<preferred_reg);
467 return;
468 }
469 r=cur->regmap[preferred_reg];
470 if(r<64&&((cur->u>>r)&1)) {
471 cur->regmap[preferred_reg]=reg|64;
472 cur->dirty&=~(1<<preferred_reg);
473 cur->isconst&=~(1<<preferred_reg);
474 return;
475 }
476 if(r>=64&&((cur->uu>>(r&63))&1)) {
477 cur->regmap[preferred_reg]=reg|64;
478 cur->dirty&=~(1<<preferred_reg);
479 cur->isconst&=~(1<<preferred_reg);
480 return;
481 }
482
483 // Clear any unneeded registers
484 // We try to keep the mapping consistent, if possible, because it
485 // makes branches easier (especially loops). So we try to allocate
486 // first (see above) before removing old mappings. If this is not
487 // possible then go ahead and clear out the registers that are no
488 // longer needed.
489 for(hr=HOST_REGS-1;hr>=0;hr--)
490 {
491 r=cur->regmap[hr];
492 if(r>=0) {
493 if(r<64) {
494 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
495 }
496 else
497 {
498 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
499 }
500 }
501 }
502 // Try to allocate any available register, but prefer
503 // registers that have not been used recently.
504 if(i>0) {
505 for(hr=0;hr<HOST_REGS;hr++) {
506 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
507 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
508 cur->regmap[hr]=reg|64;
509 cur->dirty&=~(1<<hr);
510 cur->isconst&=~(1<<hr);
511 return;
512 }
513 }
514 }
515 }
516 // Try to allocate any available register
517 for(hr=0;hr<HOST_REGS;hr++) {
518 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
519 cur->regmap[hr]=reg|64;
520 cur->dirty&=~(1<<hr);
521 cur->isconst&=~(1<<hr);
522 return;
523 }
524 }
525
526 // Ok, now we have to evict someone
527 // Pick a register we hopefully won't need soon
528 u_char hsn[MAXREG+1];
529 memset(hsn,10,sizeof(hsn));
530 int j;
531 lsn(hsn,i,&preferred_reg);
532 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
533 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
534 if(i>0) {
535 // Don't evict the cycle count at entry points, otherwise the entry
536 // stub will have to write it.
537 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
538 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
539 for(j=10;j>=3;j--)
540 {
541 // Alloc preferred register if available
542 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
543 for(hr=0;hr<HOST_REGS;hr++) {
544 // Evict both parts of a 64-bit register
545 if((cur->regmap[hr]&63)==r) {
546 cur->regmap[hr]=-1;
547 cur->dirty&=~(1<<hr);
548 cur->isconst&=~(1<<hr);
549 }
550 }
551 cur->regmap[preferred_reg]=reg|64;
552 return;
553 }
554 for(r=1;r<=MAXREG;r++)
555 {
556 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
557 for(hr=0;hr<HOST_REGS;hr++) {
558 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
559 if(cur->regmap[hr]==r+64) {
560 cur->regmap[hr]=reg|64;
561 cur->dirty&=~(1<<hr);
562 cur->isconst&=~(1<<hr);
563 return;
564 }
565 }
566 }
567 for(hr=0;hr<HOST_REGS;hr++) {
568 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
569 if(cur->regmap[hr]==r) {
570 cur->regmap[hr]=reg|64;
571 cur->dirty&=~(1<<hr);
572 cur->isconst&=~(1<<hr);
573 return;
574 }
575 }
576 }
577 }
578 }
579 }
580 }
581 for(j=10;j>=0;j--)
582 {
583 for(r=1;r<=MAXREG;r++)
584 {
585 if(hsn[r]==j) {
586 for(hr=0;hr<HOST_REGS;hr++) {
587 if(cur->regmap[hr]==r+64) {
588 cur->regmap[hr]=reg|64;
589 cur->dirty&=~(1<<hr);
590 cur->isconst&=~(1<<hr);
591 return;
592 }
593 }
594 for(hr=0;hr<HOST_REGS;hr++) {
595 if(cur->regmap[hr]==r) {
596 cur->regmap[hr]=reg|64;
597 cur->dirty&=~(1<<hr);
598 cur->isconst&=~(1<<hr);
599 return;
600 }
601 }
602 }
603 }
604 }
605 printf("This shouldn't happen");exit(1);
606}
607
608// Allocate a temporary register. This is done without regard to
609// dirty status or whether the register we request is on the unneeded list
610// Note: This will only allocate one register, even if called multiple times
611void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
612{
613 int r,hr;
614 int preferred_reg = -1;
615
616 // see if it's already allocated
617 for(hr=0;hr<HOST_REGS;hr++)
618 {
619 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
620 }
621
622 // Try to allocate any available register
623 for(hr=HOST_REGS-1;hr>=0;hr--) {
624 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
625 cur->regmap[hr]=reg;
626 cur->dirty&=~(1<<hr);
627 cur->isconst&=~(1<<hr);
628 return;
629 }
630 }
631
632 // Find an unneeded register
633 for(hr=HOST_REGS-1;hr>=0;hr--)
634 {
635 r=cur->regmap[hr];
636 if(r>=0) {
637 if(r<64) {
638 if((cur->u>>r)&1) {
639 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
640 cur->regmap[hr]=reg;
641 cur->dirty&=~(1<<hr);
642 cur->isconst&=~(1<<hr);
643 return;
644 }
645 }
646 }
647 else
648 {
649 if((cur->uu>>(r&63))&1) {
650 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
651 cur->regmap[hr]=reg;
652 cur->dirty&=~(1<<hr);
653 cur->isconst&=~(1<<hr);
654 return;
655 }
656 }
657 }
658 }
659 }
660
661 // Ok, now we have to evict someone
662 // Pick a register we hopefully won't need soon
663 // TODO: we might want to follow unconditional jumps here
664 // TODO: get rid of dupe code and make this into a function
665 u_char hsn[MAXREG+1];
666 memset(hsn,10,sizeof(hsn));
667 int j;
668 lsn(hsn,i,&preferred_reg);
669 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
670 if(i>0) {
671 // Don't evict the cycle count at entry points, otherwise the entry
672 // stub will have to write it.
673 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
674 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
675 for(j=10;j>=3;j--)
676 {
677 for(r=1;r<=MAXREG;r++)
678 {
679 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
680 for(hr=0;hr<HOST_REGS;hr++) {
681 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
682 if(cur->regmap[hr]==r+64) {
683 cur->regmap[hr]=reg;
684 cur->dirty&=~(1<<hr);
685 cur->isconst&=~(1<<hr);
686 return;
687 }
688 }
689 }
690 for(hr=0;hr<HOST_REGS;hr++) {
691 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
692 if(cur->regmap[hr]==r) {
693 cur->regmap[hr]=reg;
694 cur->dirty&=~(1<<hr);
695 cur->isconst&=~(1<<hr);
696 return;
697 }
698 }
699 }
700 }
701 }
702 }
703 }
704 for(j=10;j>=0;j--)
705 {
706 for(r=1;r<=MAXREG;r++)
707 {
708 if(hsn[r]==j) {
709 for(hr=0;hr<HOST_REGS;hr++) {
710 if(cur->regmap[hr]==r+64) {
711 cur->regmap[hr]=reg;
712 cur->dirty&=~(1<<hr);
713 cur->isconst&=~(1<<hr);
714 return;
715 }
716 }
717 for(hr=0;hr<HOST_REGS;hr++) {
718 if(cur->regmap[hr]==r) {
719 cur->regmap[hr]=reg;
720 cur->dirty&=~(1<<hr);
721 cur->isconst&=~(1<<hr);
722 return;
723 }
724 }
725 }
726 }
727 }
728 printf("This shouldn't happen");exit(1);
729}
730// Allocate a specific ARM register.
731void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
732{
733 int n;
734
735 // see if it's already allocated (and dealloc it)
736 for(n=0;n<HOST_REGS;n++)
737 {
738 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {cur->regmap[n]=-1;}
739 }
740
741 cur->regmap[hr]=reg;
742 cur->dirty&=~(1<<hr);
743 cur->isconst&=~(1<<hr);
744}
745
746// Alloc cycle count into dedicated register
747alloc_cc(struct regstat *cur,int i)
748{
749 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
750}
751
752/* Special alloc */
753
754
755/* Assembler */
756
757char regname[16][4] = {
758 "r0",
759 "r1",
760 "r2",
761 "r3",
762 "r4",
763 "r5",
764 "r6",
765 "r7",
766 "r8",
767 "r9",
768 "r10",
769 "fp",
770 "r12",
771 "sp",
772 "lr",
773 "pc"};
774
775void output_byte(u_char byte)
776{
777 *(out++)=byte;
778}
779void output_modrm(u_char mod,u_char rm,u_char ext)
780{
781 assert(mod<4);
782 assert(rm<8);
783 assert(ext<8);
784 u_char byte=(mod<<6)|(ext<<3)|rm;
785 *(out++)=byte;
786}
787void output_sib(u_char scale,u_char index,u_char base)
788{
789 assert(scale<4);
790 assert(index<8);
791 assert(base<8);
792 u_char byte=(scale<<6)|(index<<3)|base;
793 *(out++)=byte;
794}
795void output_w32(u_int word)
796{
797 *((u_int *)out)=word;
798 out+=4;
799}
800u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
801{
802 assert(rd<16);
803 assert(rn<16);
804 assert(rm<16);
805 return((rn<<16)|(rd<<12)|rm);
806}
807u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
808{
809 assert(rd<16);
810 assert(rn<16);
811 assert(imm<256);
812 assert((shift&1)==0);
813 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
814}
815u_int genimm(u_int imm,u_int *encoded)
816{
817 if(imm==0) {*encoded=0;return 1;}
818 int i=32;
819 while(i>0)
820 {
821 if(imm<256) {
822 *encoded=((i&30)<<7)|imm;
823 return 1;
824 }
825 imm=(imm>>2)|(imm<<30);i-=2;
826 }
827 return 0;
828}
cfbd3c6e 829void genimm_checked(u_int imm,u_int *encoded)
830{
831 u_int ret=genimm(imm,encoded);
832 assert(ret);
833}
57871462 834u_int genjmp(u_int addr)
835{
836 int offset=addr-(int)out-8;
e80343e2 837 if(offset<-33554432||offset>=33554432) {
838 if (addr>2) {
839 printf("genjmp: out of range: %08x\n", offset);
840 exit(1);
841 }
842 return 0;
843 }
57871462 844 return ((u_int)offset>>2)&0xffffff;
845}
846
847void emit_mov(int rs,int rt)
848{
849 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
850 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
851}
852
853void emit_movs(int rs,int rt)
854{
855 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
856 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
857}
858
859void emit_add(int rs1,int rs2,int rt)
860{
861 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
862 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
863}
864
865void emit_adds(int rs1,int rs2,int rt)
866{
867 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
868 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
869}
870
871void emit_adcs(int rs1,int rs2,int rt)
872{
873 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
874 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
875}
876
877void emit_sbc(int rs1,int rs2,int rt)
878{
879 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
880 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
881}
882
883void emit_sbcs(int rs1,int rs2,int rt)
884{
885 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
886 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
887}
888
889void emit_neg(int rs, int rt)
890{
891 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
892 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
893}
894
895void emit_negs(int rs, int rt)
896{
897 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
898 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
899}
900
901void emit_sub(int rs1,int rs2,int rt)
902{
903 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
904 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
905}
906
907void emit_subs(int rs1,int rs2,int rt)
908{
909 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
910 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
911}
912
913void emit_zeroreg(int rt)
914{
915 assem_debug("mov %s,#0\n",regname[rt]);
916 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
917}
918
790ee18e 919void emit_loadlp(u_int imm,u_int rt)
920{
921 add_literal((int)out,imm);
922 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
923 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
924}
925void emit_movw(u_int imm,u_int rt)
926{
927 assert(imm<65536);
928 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
929 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
930}
931void emit_movt(u_int imm,u_int rt)
932{
933 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
934 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
935}
936void emit_movimm(u_int imm,u_int rt)
937{
938 u_int armval;
939 if(genimm(imm,&armval)) {
940 assem_debug("mov %s,#%d\n",regname[rt],imm);
941 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
942 }else if(genimm(~imm,&armval)) {
943 assem_debug("mvn %s,#%d\n",regname[rt],imm);
944 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
945 }else if(imm<65536) {
946 #ifdef ARMv5_ONLY
947 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
948 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
949 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
950 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
951 #else
952 emit_movw(imm,rt);
953 #endif
954 }else{
955 #ifdef ARMv5_ONLY
956 emit_loadlp(imm,rt);
957 #else
958 emit_movw(imm&0x0000FFFF,rt);
959 emit_movt(imm&0xFFFF0000,rt);
960 #endif
961 }
962}
963void emit_pcreladdr(u_int rt)
964{
965 assem_debug("add %s,pc,#?\n",regname[rt]);
966 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
967}
968
57871462 969void emit_loadreg(int r, int hr)
970{
3d624f89 971#ifdef FORCE32
972 if(r&64) {
973 printf("64bit load in 32bit mode!\n");
974 exit(1);
975 }
976#endif
57871462 977 if((r&63)==0)
978 emit_zeroreg(hr);
979 else {
3d624f89 980 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 981 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
982 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
983 if(r==CCREG) addr=(int)&cycle_count;
984 if(r==CSREG) addr=(int)&Status;
985 if(r==FSREG) addr=(int)&FCR31;
986 if(r==INVCP) addr=(int)&invc_ptr;
987 u_int offset = addr-(u_int)&dynarec_local;
988 assert(offset<4096);
989 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
990 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
991 }
992}
993void emit_storereg(int r, int hr)
994{
3d624f89 995#ifdef FORCE32
996 if(r&64) {
997 printf("64bit store in 32bit mode!\n");
998 exit(1);
999 }
1000#endif
1001 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1002 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1003 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1004 if(r==CCREG) addr=(int)&cycle_count;
1005 if(r==FSREG) addr=(int)&FCR31;
1006 u_int offset = addr-(u_int)&dynarec_local;
1007 assert(offset<4096);
1008 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1009 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1010}
1011
1012void emit_test(int rs, int rt)
1013{
1014 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1015 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1016}
1017
1018void emit_testimm(int rs,int imm)
1019{
1020 u_int armval;
1021 assem_debug("tst %s,$%d\n",regname[rs],imm);
cfbd3c6e 1022 genimm_checked(imm,&armval);
57871462 1023 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1024}
1025
b9b61529 1026void emit_testeqimm(int rs,int imm)
1027{
1028 u_int armval;
1029 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1030 genimm_checked(imm,&armval);
b9b61529 1031 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1032}
1033
57871462 1034void emit_not(int rs,int rt)
1035{
1036 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1037 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1038}
1039
b9b61529 1040void emit_mvnmi(int rs,int rt)
1041{
1042 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1043 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1044}
1045
57871462 1046void emit_and(u_int rs1,u_int rs2,u_int rt)
1047{
1048 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1049 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1050}
1051
1052void emit_or(u_int rs1,u_int rs2,u_int rt)
1053{
1054 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1055 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1056}
1057void emit_or_and_set_flags(int rs1,int rs2,int rt)
1058{
1059 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1060 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1061}
1062
f70d384d 1063void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1064{
1065 assert(rs<16);
1066 assert(rt<16);
1067 assert(imm<32);
1068 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1069 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1070}
1071
576bbd8f 1072void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1073{
1074 assert(rs<16);
1075 assert(rt<16);
1076 assert(imm<32);
1077 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1078 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1079}
1080
57871462 1081void emit_xor(u_int rs1,u_int rs2,u_int rt)
1082{
1083 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1084 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1085}
1086
57871462 1087void emit_addimm(u_int rs,int imm,u_int rt)
1088{
1089 assert(rs<16);
1090 assert(rt<16);
1091 if(imm!=0) {
1092 assert(imm>-65536&&imm<65536);
1093 u_int armval;
1094 if(genimm(imm,&armval)) {
1095 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1096 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1097 }else if(genimm(-imm,&armval)) {
1098 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1099 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1100 }else if(imm<0) {
1101 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1102 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1103 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1104 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1105 }else{
1106 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1107 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1108 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1109 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1110 }
1111 }
1112 else if(rs!=rt) emit_mov(rs,rt);
1113}
1114
1115void emit_addimm_and_set_flags(int imm,int rt)
1116{
1117 assert(imm>-65536&&imm<65536);
1118 u_int armval;
1119 if(genimm(imm,&armval)) {
1120 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1121 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1122 }else if(genimm(-imm,&armval)) {
1123 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1124 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1125 }else if(imm<0) {
1126 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1127 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1128 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1129 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1130 }else{
1131 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1132 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1133 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1134 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1135 }
1136}
1137void emit_addimm_no_flags(u_int imm,u_int rt)
1138{
1139 emit_addimm(rt,imm,rt);
1140}
1141
1142void emit_addnop(u_int r)
1143{
1144 assert(r<16);
1145 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1146 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1147}
1148
1149void emit_adcimm(u_int rs,int imm,u_int rt)
1150{
1151 u_int armval;
cfbd3c6e 1152 genimm_checked(imm,&armval);
57871462 1153 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1154 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1155}
1156/*void emit_sbcimm(int imm,u_int rt)
1157{
1158 u_int armval;
cfbd3c6e 1159 genimm_checked(imm,&armval);
57871462 1160 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1161 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1162}*/
1163void emit_sbbimm(int imm,u_int rt)
1164{
1165 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1166 assert(rt<8);
1167 if(imm<128&&imm>=-128) {
1168 output_byte(0x83);
1169 output_modrm(3,rt,3);
1170 output_byte(imm);
1171 }
1172 else
1173 {
1174 output_byte(0x81);
1175 output_modrm(3,rt,3);
1176 output_w32(imm);
1177 }
1178}
1179void emit_rscimm(int rs,int imm,u_int rt)
1180{
1181 assert(0);
1182 u_int armval;
cfbd3c6e 1183 genimm_checked(imm,&armval);
57871462 1184 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1185 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1186}
1187
1188void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1189{
1190 // TODO: if(genimm(imm,&armval)) ...
1191 // else
1192 emit_movimm(imm,HOST_TEMPREG);
1193 emit_adds(HOST_TEMPREG,rsl,rtl);
1194 emit_adcimm(rsh,0,rth);
1195}
1196
1197void emit_sbb(int rs1,int rs2)
1198{
1199 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1200 output_byte(0x19);
1201 output_modrm(3,rs1,rs2);
1202}
1203
1204void emit_andimm(int rs,int imm,int rt)
1205{
1206 u_int armval;
790ee18e 1207 if(imm==0) {
1208 emit_zeroreg(rt);
1209 }else if(genimm(imm,&armval)) {
57871462 1210 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1211 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1212 }else if(genimm(~imm,&armval)) {
1213 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1214 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1215 }else if(imm==65535) {
1216 #ifdef ARMv5_ONLY
1217 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1218 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1219 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1220 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1221 #else
1222 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1223 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1224 #endif
1225 }else{
1226 assert(imm>0&&imm<65535);
1227 #ifdef ARMv5_ONLY
1228 assem_debug("mov r14,#%d\n",imm&0xFF00);
1229 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1230 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1231 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1232 #else
1233 emit_movw(imm,HOST_TEMPREG);
1234 #endif
1235 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1236 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1237 }
1238}
1239
1240void emit_orimm(int rs,int imm,int rt)
1241{
1242 u_int armval;
790ee18e 1243 if(imm==0) {
1244 if(rs!=rt) emit_mov(rs,rt);
1245 }else if(genimm(imm,&armval)) {
57871462 1246 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1247 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1248 }else{
1249 assert(imm>0&&imm<65536);
1250 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1251 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1252 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1253 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1254 }
1255}
1256
1257void emit_xorimm(int rs,int imm,int rt)
1258{
57871462 1259 u_int armval;
790ee18e 1260 if(imm==0) {
1261 if(rs!=rt) emit_mov(rs,rt);
1262 }else if(genimm(imm,&armval)) {
57871462 1263 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1264 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1265 }else{
514ed0d9 1266 assert(imm>0&&imm<65536);
57871462 1267 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1268 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1269 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1270 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1271 }
1272}
1273
1274void emit_shlimm(int rs,u_int imm,int rt)
1275{
1276 assert(imm>0);
1277 assert(imm<32);
1278 //if(imm==1) ...
1279 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1280 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1281}
1282
1283void emit_shrimm(int rs,u_int imm,int rt)
1284{
1285 assert(imm>0);
1286 assert(imm<32);
1287 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1288 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1289}
1290
1291void emit_sarimm(int rs,u_int imm,int rt)
1292{
1293 assert(imm>0);
1294 assert(imm<32);
1295 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1296 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1297}
1298
1299void emit_rorimm(int rs,u_int imm,int rt)
1300{
1301 assert(imm>0);
1302 assert(imm<32);
1303 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1304 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1305}
1306
1307void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1308{
1309 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1310 assert(imm>0);
1311 assert(imm<32);
1312 //if(imm==1) ...
1313 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1314 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1315 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1316 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1317}
1318
1319void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1320{
1321 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1322 assert(imm>0);
1323 assert(imm<32);
1324 //if(imm==1) ...
1325 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1326 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1327 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1328 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1329}
1330
b9b61529 1331void emit_signextend16(int rs,int rt)
1332{
1333 #ifdef ARMv5_ONLY
1334 emit_shlimm(rs,16,rt);
1335 emit_sarimm(rt,16,rt);
1336 #else
1337 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1338 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1339 #endif
1340}
1341
57871462 1342void emit_shl(u_int rs,u_int shift,u_int rt)
1343{
1344 assert(rs<16);
1345 assert(rt<16);
1346 assert(shift<16);
1347 //if(imm==1) ...
1348 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1349 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1350}
1351void emit_shr(u_int rs,u_int shift,u_int rt)
1352{
1353 assert(rs<16);
1354 assert(rt<16);
1355 assert(shift<16);
1356 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1357 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1358}
1359void emit_sar(u_int rs,u_int shift,u_int rt)
1360{
1361 assert(rs<16);
1362 assert(rt<16);
1363 assert(shift<16);
1364 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1365 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1366}
1367void emit_shlcl(int r)
1368{
1369 assem_debug("shl %%%s,%%cl\n",regname[r]);
1370 assert(0);
1371}
1372void emit_shrcl(int r)
1373{
1374 assem_debug("shr %%%s,%%cl\n",regname[r]);
1375 assert(0);
1376}
1377void emit_sarcl(int r)
1378{
1379 assem_debug("sar %%%s,%%cl\n",regname[r]);
1380 assert(0);
1381}
1382
1383void emit_shldcl(int r1,int r2)
1384{
1385 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1386 assert(0);
1387}
1388void emit_shrdcl(int r1,int r2)
1389{
1390 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1391 assert(0);
1392}
1393void emit_orrshl(u_int rs,u_int shift,u_int rt)
1394{
1395 assert(rs<16);
1396 assert(rt<16);
1397 assert(shift<16);
1398 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1399 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1400}
1401void emit_orrshr(u_int rs,u_int shift,u_int rt)
1402{
1403 assert(rs<16);
1404 assert(rt<16);
1405 assert(shift<16);
1406 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1407 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1408}
1409
1410void emit_cmpimm(int rs,int imm)
1411{
1412 u_int armval;
1413 if(genimm(imm,&armval)) {
1414 assem_debug("cmp %s,$%d\n",regname[rs],imm);
1415 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1416 }else if(genimm(-imm,&armval)) {
1417 assem_debug("cmn %s,$%d\n",regname[rs],imm);
1418 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1419 }else if(imm>0) {
1420 assert(imm<65536);
1421 #ifdef ARMv5_ONLY
1422 emit_movimm(imm,HOST_TEMPREG);
1423 #else
1424 emit_movw(imm,HOST_TEMPREG);
1425 #endif
1426 assem_debug("cmp %s,r14\n",regname[rs]);
1427 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1428 }else{
1429 assert(imm>-65536);
1430 #ifdef ARMv5_ONLY
1431 emit_movimm(-imm,HOST_TEMPREG);
1432 #else
1433 emit_movw(-imm,HOST_TEMPREG);
1434 #endif
1435 assem_debug("cmn %s,r14\n",regname[rs]);
1436 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1437 }
1438}
1439
1440void emit_cmovne(u_int *addr,int rt)
1441{
1442 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1443 assert(0);
1444}
1445void emit_cmovl(u_int *addr,int rt)
1446{
1447 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1448 assert(0);
1449}
1450void emit_cmovs(u_int *addr,int rt)
1451{
1452 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1453 assert(0);
1454}
1455void emit_cmovne_imm(int imm,int rt)
1456{
1457 assem_debug("movne %s,#%d\n",regname[rt],imm);
1458 u_int armval;
cfbd3c6e 1459 genimm_checked(imm,&armval);
57871462 1460 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1461}
1462void emit_cmovl_imm(int imm,int rt)
1463{
1464 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1465 u_int armval;
cfbd3c6e 1466 genimm_checked(imm,&armval);
57871462 1467 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1468}
1469void emit_cmovb_imm(int imm,int rt)
1470{
1471 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1472 u_int armval;
cfbd3c6e 1473 genimm_checked(imm,&armval);
57871462 1474 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1475}
1476void emit_cmovs_imm(int imm,int rt)
1477{
1478 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1479 u_int armval;
cfbd3c6e 1480 genimm_checked(imm,&armval);
57871462 1481 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1482}
1483void emit_cmove_reg(int rs,int rt)
1484{
1485 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1486 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1487}
1488void emit_cmovne_reg(int rs,int rt)
1489{
1490 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1491 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1492}
1493void emit_cmovl_reg(int rs,int rt)
1494{
1495 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1496 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1497}
1498void emit_cmovs_reg(int rs,int rt)
1499{
1500 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1501 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1502}
1503
1504void emit_slti32(int rs,int imm,int rt)
1505{
1506 if(rs!=rt) emit_zeroreg(rt);
1507 emit_cmpimm(rs,imm);
1508 if(rs==rt) emit_movimm(0,rt);
1509 emit_cmovl_imm(1,rt);
1510}
1511void emit_sltiu32(int rs,int imm,int rt)
1512{
1513 if(rs!=rt) emit_zeroreg(rt);
1514 emit_cmpimm(rs,imm);
1515 if(rs==rt) emit_movimm(0,rt);
1516 emit_cmovb_imm(1,rt);
1517}
1518void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1519{
1520 assert(rsh!=rt);
1521 emit_slti32(rsl,imm,rt);
1522 if(imm>=0)
1523 {
1524 emit_test(rsh,rsh);
1525 emit_cmovne_imm(0,rt);
1526 emit_cmovs_imm(1,rt);
1527 }
1528 else
1529 {
1530 emit_cmpimm(rsh,-1);
1531 emit_cmovne_imm(0,rt);
1532 emit_cmovl_imm(1,rt);
1533 }
1534}
1535void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1536{
1537 assert(rsh!=rt);
1538 emit_sltiu32(rsl,imm,rt);
1539 if(imm>=0)
1540 {
1541 emit_test(rsh,rsh);
1542 emit_cmovne_imm(0,rt);
1543 }
1544 else
1545 {
1546 emit_cmpimm(rsh,-1);
1547 emit_cmovne_imm(1,rt);
1548 }
1549}
1550
1551void emit_cmp(int rs,int rt)
1552{
1553 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1554 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1555}
1556void emit_set_gz32(int rs, int rt)
1557{
1558 //assem_debug("set_gz32\n");
1559 emit_cmpimm(rs,1);
1560 emit_movimm(1,rt);
1561 emit_cmovl_imm(0,rt);
1562}
1563void emit_set_nz32(int rs, int rt)
1564{
1565 //assem_debug("set_nz32\n");
1566 if(rs!=rt) emit_movs(rs,rt);
1567 else emit_test(rs,rs);
1568 emit_cmovne_imm(1,rt);
1569}
1570void emit_set_gz64_32(int rsh, int rsl, int rt)
1571{
1572 //assem_debug("set_gz64\n");
1573 emit_set_gz32(rsl,rt);
1574 emit_test(rsh,rsh);
1575 emit_cmovne_imm(1,rt);
1576 emit_cmovs_imm(0,rt);
1577}
1578void emit_set_nz64_32(int rsh, int rsl, int rt)
1579{
1580 //assem_debug("set_nz64\n");
1581 emit_or_and_set_flags(rsh,rsl,rt);
1582 emit_cmovne_imm(1,rt);
1583}
1584void emit_set_if_less32(int rs1, int rs2, int rt)
1585{
1586 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1587 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1588 emit_cmp(rs1,rs2);
1589 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1590 emit_cmovl_imm(1,rt);
1591}
1592void emit_set_if_carry32(int rs1, int rs2, int rt)
1593{
1594 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1595 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1596 emit_cmp(rs1,rs2);
1597 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1598 emit_cmovb_imm(1,rt);
1599}
1600void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1601{
1602 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1603 assert(u1!=rt);
1604 assert(u2!=rt);
1605 emit_cmp(l1,l2);
1606 emit_movimm(0,rt);
1607 emit_sbcs(u1,u2,HOST_TEMPREG);
1608 emit_cmovl_imm(1,rt);
1609}
1610void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1611{
1612 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1613 assert(u1!=rt);
1614 assert(u2!=rt);
1615 emit_cmp(l1,l2);
1616 emit_movimm(0,rt);
1617 emit_sbcs(u1,u2,HOST_TEMPREG);
1618 emit_cmovb_imm(1,rt);
1619}
1620
1621void emit_call(int a)
1622{
1623 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1624 u_int offset=genjmp(a);
1625 output_w32(0xeb000000|offset);
1626}
1627void emit_jmp(int a)
1628{
1629 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1630 u_int offset=genjmp(a);
1631 output_w32(0xea000000|offset);
1632}
1633void emit_jne(int a)
1634{
1635 assem_debug("bne %x\n",a);
1636 u_int offset=genjmp(a);
1637 output_w32(0x1a000000|offset);
1638}
1639void emit_jeq(int a)
1640{
1641 assem_debug("beq %x\n",a);
1642 u_int offset=genjmp(a);
1643 output_w32(0x0a000000|offset);
1644}
1645void emit_js(int a)
1646{
1647 assem_debug("bmi %x\n",a);
1648 u_int offset=genjmp(a);
1649 output_w32(0x4a000000|offset);
1650}
1651void emit_jns(int a)
1652{
1653 assem_debug("bpl %x\n",a);
1654 u_int offset=genjmp(a);
1655 output_w32(0x5a000000|offset);
1656}
1657void emit_jl(int a)
1658{
1659 assem_debug("blt %x\n",a);
1660 u_int offset=genjmp(a);
1661 output_w32(0xba000000|offset);
1662}
1663void emit_jge(int a)
1664{
1665 assem_debug("bge %x\n",a);
1666 u_int offset=genjmp(a);
1667 output_w32(0xaa000000|offset);
1668}
1669void emit_jno(int a)
1670{
1671 assem_debug("bvc %x\n",a);
1672 u_int offset=genjmp(a);
1673 output_w32(0x7a000000|offset);
1674}
1675void emit_jc(int a)
1676{
1677 assem_debug("bcs %x\n",a);
1678 u_int offset=genjmp(a);
1679 output_w32(0x2a000000|offset);
1680}
1681void emit_jcc(int a)
1682{
1683 assem_debug("bcc %x\n",a);
1684 u_int offset=genjmp(a);
1685 output_w32(0x3a000000|offset);
1686}
1687
1688void emit_pushimm(int imm)
1689{
1690 assem_debug("push $%x\n",imm);
1691 assert(0);
1692}
1693void emit_pusha()
1694{
1695 assem_debug("pusha\n");
1696 assert(0);
1697}
1698void emit_popa()
1699{
1700 assem_debug("popa\n");
1701 assert(0);
1702}
1703void emit_pushreg(u_int r)
1704{
1705 assem_debug("push %%%s\n",regname[r]);
1706 assert(0);
1707}
1708void emit_popreg(u_int r)
1709{
1710 assem_debug("pop %%%s\n",regname[r]);
1711 assert(0);
1712}
1713void emit_callreg(u_int r)
1714{
1715 assem_debug("call *%%%s\n",regname[r]);
1716 assert(0);
1717}
1718void emit_jmpreg(u_int r)
1719{
1720 assem_debug("mov pc,%s\n",regname[r]);
1721 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1722}
1723
1724void emit_readword_indexed(int offset, int rs, int rt)
1725{
1726 assert(offset>-4096&&offset<4096);
1727 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1728 if(offset>=0) {
1729 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1730 }else{
1731 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1732 }
1733}
1734void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1735{
1736 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1737 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1738}
1739void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1740{
1741 if(map<0) emit_readword_indexed(addr, rs, rt);
1742 else {
1743 assert(addr==0);
1744 emit_readword_dualindexedx4(rs, map, rt);
1745 }
1746}
1747void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1748{
1749 if(map<0) {
1750 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1751 emit_readword_indexed(addr+4, rs, rl);
1752 }else{
1753 assert(rh!=rs);
1754 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1755 emit_addimm(map,1,map);
1756 emit_readword_indexed_tlb(addr, rs, map, rl);
1757 }
1758}
1759void emit_movsbl_indexed(int offset, int rs, int rt)
1760{
1761 assert(offset>-256&&offset<256);
1762 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1763 if(offset>=0) {
1764 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1765 }else{
1766 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1767 }
1768}
1769void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1770{
1771 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1772 else {
1773 if(addr==0) {
1774 emit_shlimm(map,2,map);
1775 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1776 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1777 }else{
1778 assert(addr>-256&&addr<256);
1779 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1780 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1781 emit_movsbl_indexed(addr, rt, rt);
1782 }
1783 }
1784}
1785void emit_movswl_indexed(int offset, int rs, int rt)
1786{
1787 assert(offset>-256&&offset<256);
1788 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1789 if(offset>=0) {
1790 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1791 }else{
1792 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1793 }
1794}
1795void emit_movzbl_indexed(int offset, int rs, int rt)
1796{
1797 assert(offset>-4096&&offset<4096);
1798 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1799 if(offset>=0) {
1800 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1801 }else{
1802 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1803 }
1804}
1805void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1806{
1807 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1808 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1809}
1810void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1811{
1812 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1813 else {
1814 if(addr==0) {
1815 emit_movzbl_dualindexedx4(rs, map, rt);
1816 }else{
1817 emit_addimm(rs,addr,rt);
1818 emit_movzbl_dualindexedx4(rt, map, rt);
1819 }
1820 }
1821}
1822void emit_movzwl_indexed(int offset, int rs, int rt)
1823{
1824 assert(offset>-256&&offset<256);
1825 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1826 if(offset>=0) {
1827 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1828 }else{
1829 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1830 }
1831}
1832void emit_readword(int addr, int rt)
1833{
1834 u_int offset = addr-(u_int)&dynarec_local;
1835 assert(offset<4096);
1836 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1837 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1838}
1839void emit_movsbl(int addr, int rt)
1840{
1841 u_int offset = addr-(u_int)&dynarec_local;
1842 assert(offset<256);
1843 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1844 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1845}
1846void emit_movswl(int addr, int rt)
1847{
1848 u_int offset = addr-(u_int)&dynarec_local;
1849 assert(offset<256);
1850 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1851 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1852}
1853void emit_movzbl(int addr, int rt)
1854{
1855 u_int offset = addr-(u_int)&dynarec_local;
1856 assert(offset<4096);
1857 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1858 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1859}
1860void emit_movzwl(int addr, int rt)
1861{
1862 u_int offset = addr-(u_int)&dynarec_local;
1863 assert(offset<256);
1864 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1865 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1866}
1867void emit_movzwl_reg(int rs, int rt)
1868{
1869 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1870 assert(0);
1871}
1872
1873void emit_xchg(int rs, int rt)
1874{
1875 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1876 assert(0);
1877}
1878void emit_writeword_indexed(int rt, int offset, int rs)
1879{
1880 assert(offset>-4096&&offset<4096);
1881 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1882 if(offset>=0) {
1883 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1884 }else{
1885 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1886 }
1887}
1888void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1889{
1890 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1891 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1892}
1893void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1894{
1895 if(map<0) emit_writeword_indexed(rt, addr, rs);
1896 else {
1897 assert(addr==0);
1898 emit_writeword_dualindexedx4(rt, rs, map);
1899 }
1900}
1901void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1902{
1903 if(map<0) {
1904 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1905 emit_writeword_indexed(rl, addr+4, rs);
1906 }else{
1907 assert(rh>=0);
1908 if(temp!=rs) emit_addimm(map,1,temp);
1909 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1910 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1911 else {
1912 emit_addimm(rs,4,rs);
1913 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1914 }
1915 }
1916}
1917void emit_writehword_indexed(int rt, int offset, int rs)
1918{
1919 assert(offset>-256&&offset<256);
1920 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1921 if(offset>=0) {
1922 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1923 }else{
1924 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1925 }
1926}
1927void emit_writebyte_indexed(int rt, int offset, int rs)
1928{
1929 assert(offset>-4096&&offset<4096);
1930 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1931 if(offset>=0) {
1932 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1933 }else{
1934 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1935 }
1936}
1937void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1938{
1939 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1940 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1941}
1942void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1943{
1944 if(map<0) emit_writebyte_indexed(rt, addr, rs);
1945 else {
1946 if(addr==0) {
1947 emit_writebyte_dualindexedx4(rt, rs, map);
1948 }else{
1949 emit_addimm(rs,addr,temp);
1950 emit_writebyte_dualindexedx4(rt, temp, map);
1951 }
1952 }
1953}
1954void emit_writeword(int rt, int addr)
1955{
1956 u_int offset = addr-(u_int)&dynarec_local;
1957 assert(offset<4096);
1958 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1959 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1960}
1961void emit_writehword(int rt, int addr)
1962{
1963 u_int offset = addr-(u_int)&dynarec_local;
1964 assert(offset<256);
1965 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
1966 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1967}
1968void emit_writebyte(int rt, int addr)
1969{
1970 u_int offset = addr-(u_int)&dynarec_local;
1971 assert(offset<4096);
74426039 1972 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 1973 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
1974}
1975void emit_writeword_imm(int imm, int addr)
1976{
1977 assem_debug("movl $%x,%x\n",imm,addr);
1978 assert(0);
1979}
1980void emit_writebyte_imm(int imm, int addr)
1981{
1982 assem_debug("movb $%x,%x\n",imm,addr);
1983 assert(0);
1984}
1985
1986void emit_mul(int rs)
1987{
1988 assem_debug("mul %%%s\n",regname[rs]);
1989 assert(0);
1990}
1991void emit_imul(int rs)
1992{
1993 assem_debug("imul %%%s\n",regname[rs]);
1994 assert(0);
1995}
1996void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1997{
1998 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1999 assert(rs1<16);
2000 assert(rs2<16);
2001 assert(hi<16);
2002 assert(lo<16);
2003 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2004}
2005void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2006{
2007 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2008 assert(rs1<16);
2009 assert(rs2<16);
2010 assert(hi<16);
2011 assert(lo<16);
2012 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2013}
2014
2015void emit_div(int rs)
2016{
2017 assem_debug("div %%%s\n",regname[rs]);
2018 assert(0);
2019}
2020void emit_idiv(int rs)
2021{
2022 assem_debug("idiv %%%s\n",regname[rs]);
2023 assert(0);
2024}
2025void emit_cdq()
2026{
2027 assem_debug("cdq\n");
2028 assert(0);
2029}
2030
2031void emit_clz(int rs,int rt)
2032{
2033 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2034 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2035}
2036
2037void emit_subcs(int rs1,int rs2,int rt)
2038{
2039 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2040 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2041}
2042
2043void emit_shrcc_imm(int rs,u_int imm,int rt)
2044{
2045 assert(imm>0);
2046 assert(imm<32);
2047 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2048 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2049}
2050
2051void emit_negmi(int rs, int rt)
2052{
2053 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2054 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2055}
2056
2057void emit_negsmi(int rs, int rt)
2058{
2059 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2060 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2061}
2062
2063void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2064{
2065 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2066 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2067}
2068
2069void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2070{
2071 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2072 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2073}
2074
2075void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2076{
2077 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2078 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2079}
2080
2081void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2082{
2083 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2084 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2085}
2086
2087void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2088{
2089 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2090 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2091}
2092
2093void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2094{
2095 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2096 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2097}
2098
2099void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2100{
2101 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2102 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2103}
2104
2105void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2106{
2107 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2108 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2109}
2110
2111void emit_teq(int rs, int rt)
2112{
2113 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2114 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2115}
2116
2117void emit_rsbimm(int rs, int imm, int rt)
2118{
2119 u_int armval;
cfbd3c6e 2120 genimm_checked(imm,&armval);
57871462 2121 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2122 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2123}
2124
2125// Load 2 immediates optimizing for small code size
2126void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2127{
2128 emit_movimm(imm1,rt1);
2129 u_int armval;
2130 if(genimm(imm2-imm1,&armval)) {
2131 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2132 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2133 }else if(genimm(imm1-imm2,&armval)) {
2134 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2135 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2136 }
2137 else emit_movimm(imm2,rt2);
2138}
2139
2140// Conditionally select one of two immediates, optimizing for small code size
2141// This will only be called if HAVE_CMOV_IMM is defined
2142void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2143{
2144 u_int armval;
2145 if(genimm(imm2-imm1,&armval)) {
2146 emit_movimm(imm1,rt);
2147 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2148 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2149 }else if(genimm(imm1-imm2,&armval)) {
2150 emit_movimm(imm1,rt);
2151 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2152 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2153 }
2154 else {
2155 #ifdef ARMv5_ONLY
2156 emit_movimm(imm1,rt);
2157 add_literal((int)out,imm2);
2158 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2159 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2160 #else
2161 emit_movw(imm1&0x0000FFFF,rt);
2162 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2163 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2164 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2165 }
2166 emit_movt(imm1&0xFFFF0000,rt);
2167 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2168 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2169 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2170 }
2171 #endif
2172 }
2173}
2174
2175// special case for checking invalid_code
2176void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2177{
2178 assert(0);
2179}
2180
2181// special case for checking invalid_code
2182void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2183{
2184 assert(imm<128&&imm>=0);
2185 assert(r>=0&&r<16);
2186 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2187 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2188 emit_cmpimm(HOST_TEMPREG,imm);
2189}
2190
2191// special case for tlb mapping
2192void emit_addsr12(int rs1,int rs2,int rt)
2193{
2194 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2195 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2196}
2197
2198// Used to preload hash table entries
2199void emit_prefetch(void *addr)
2200{
2201 assem_debug("prefetch %x\n",(int)addr);
2202 output_byte(0x0F);
2203 output_byte(0x18);
2204 output_modrm(0,5,1);
2205 output_w32((int)addr);
2206}
2207void emit_prefetchreg(int r)
2208{
2209 assem_debug("pld %s\n",regname[r]);
2210 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2211}
2212
2213// Special case for mini_ht
2214void emit_ldreq_indexed(int rs, u_int offset, int rt)
2215{
2216 assert(offset<4096);
2217 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2218 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2219}
2220
2221void emit_flds(int r,int sr)
2222{
2223 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2224 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2225}
2226
2227void emit_vldr(int r,int vr)
2228{
2229 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2230 output_w32(0xed900b00|(vr<<12)|(r<<16));
2231}
2232
2233void emit_fsts(int sr,int r)
2234{
2235 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2236 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2237}
2238
2239void emit_vstr(int vr,int r)
2240{
2241 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2242 output_w32(0xed800b00|(vr<<12)|(r<<16));
2243}
2244
2245void emit_ftosizs(int s,int d)
2246{
2247 assem_debug("ftosizs s%d,s%d\n",d,s);
2248 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2249}
2250
2251void emit_ftosizd(int s,int d)
2252{
2253 assem_debug("ftosizd s%d,d%d\n",d,s);
2254 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2255}
2256
2257void emit_fsitos(int s,int d)
2258{
2259 assem_debug("fsitos s%d,s%d\n",d,s);
2260 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2261}
2262
2263void emit_fsitod(int s,int d)
2264{
2265 assem_debug("fsitod d%d,s%d\n",d,s);
2266 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2267}
2268
2269void emit_fcvtds(int s,int d)
2270{
2271 assem_debug("fcvtds d%d,s%d\n",d,s);
2272 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2273}
2274
2275void emit_fcvtsd(int s,int d)
2276{
2277 assem_debug("fcvtsd s%d,d%d\n",d,s);
2278 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2279}
2280
2281void emit_fsqrts(int s,int d)
2282{
2283 assem_debug("fsqrts d%d,s%d\n",d,s);
2284 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2285}
2286
2287void emit_fsqrtd(int s,int d)
2288{
2289 assem_debug("fsqrtd s%d,d%d\n",d,s);
2290 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2291}
2292
2293void emit_fabss(int s,int d)
2294{
2295 assem_debug("fabss d%d,s%d\n",d,s);
2296 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2297}
2298
2299void emit_fabsd(int s,int d)
2300{
2301 assem_debug("fabsd s%d,d%d\n",d,s);
2302 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2303}
2304
2305void emit_fnegs(int s,int d)
2306{
2307 assem_debug("fnegs d%d,s%d\n",d,s);
2308 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2309}
2310
2311void emit_fnegd(int s,int d)
2312{
2313 assem_debug("fnegd s%d,d%d\n",d,s);
2314 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2315}
2316
2317void emit_fadds(int s1,int s2,int d)
2318{
2319 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2320 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2321}
2322
2323void emit_faddd(int s1,int s2,int d)
2324{
2325 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2326 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2327}
2328
2329void emit_fsubs(int s1,int s2,int d)
2330{
2331 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2332 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2333}
2334
2335void emit_fsubd(int s1,int s2,int d)
2336{
2337 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2338 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2339}
2340
2341void emit_fmuls(int s1,int s2,int d)
2342{
2343 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2344 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2345}
2346
2347void emit_fmuld(int s1,int s2,int d)
2348{
2349 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2350 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2351}
2352
2353void emit_fdivs(int s1,int s2,int d)
2354{
2355 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2356 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2357}
2358
2359void emit_fdivd(int s1,int s2,int d)
2360{
2361 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2362 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2363}
2364
2365void emit_fcmps(int x,int y)
2366{
2367 assem_debug("fcmps s14, s15\n");
2368 output_w32(0xeeb47a67);
2369}
2370
2371void emit_fcmpd(int x,int y)
2372{
2373 assem_debug("fcmpd d6, d7\n");
2374 output_w32(0xeeb46b47);
2375}
2376
2377void emit_fmstat()
2378{
2379 assem_debug("fmstat\n");
2380 output_w32(0xeef1fa10);
2381}
2382
2383void emit_bicne_imm(int rs,int imm,int rt)
2384{
2385 u_int armval;
cfbd3c6e 2386 genimm_checked(imm,&armval);
57871462 2387 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2388 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2389}
2390
2391void emit_biccs_imm(int rs,int imm,int rt)
2392{
2393 u_int armval;
cfbd3c6e 2394 genimm_checked(imm,&armval);
57871462 2395 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2396 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2397}
2398
2399void emit_bicvc_imm(int rs,int imm,int rt)
2400{
2401 u_int armval;
cfbd3c6e 2402 genimm_checked(imm,&armval);
57871462 2403 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2404 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2405}
2406
2407void emit_bichi_imm(int rs,int imm,int rt)
2408{
2409 u_int armval;
cfbd3c6e 2410 genimm_checked(imm,&armval);
57871462 2411 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2412 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2413}
2414
2415void emit_orrvs_imm(int rs,int imm,int rt)
2416{
2417 u_int armval;
cfbd3c6e 2418 genimm_checked(imm,&armval);
57871462 2419 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2420 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2421}
2422
b9b61529 2423void emit_orrne_imm(int rs,int imm,int rt)
2424{
2425 u_int armval;
cfbd3c6e 2426 genimm_checked(imm,&armval);
b9b61529 2427 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2428 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2429}
2430
2431void emit_andne_imm(int rs,int imm,int rt)
2432{
2433 u_int armval;
cfbd3c6e 2434 genimm_checked(imm,&armval);
b9b61529 2435 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2436 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2437}
2438
57871462 2439void emit_jno_unlikely(int a)
2440{
2441 //emit_jno(a);
2442 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2443 output_w32(0x72800000|rd_rn_rm(15,15,0));
2444}
2445
2446// Save registers before function call
2447void save_regs(u_int reglist)
2448{
2449 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2450 if(!reglist) return;
2451 assem_debug("stmia fp,{");
2452 if(reglist&1) assem_debug("r0, ");
2453 if(reglist&2) assem_debug("r1, ");
2454 if(reglist&4) assem_debug("r2, ");
2455 if(reglist&8) assem_debug("r3, ");
2456 if(reglist&0x1000) assem_debug("r12");
2457 assem_debug("}\n");
2458 output_w32(0xe88b0000|reglist);
2459}
2460// Restore registers after function call
2461void restore_regs(u_int reglist)
2462{
2463 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2464 if(!reglist) return;
2465 assem_debug("ldmia fp,{");
2466 if(reglist&1) assem_debug("r0, ");
2467 if(reglist&2) assem_debug("r1, ");
2468 if(reglist&4) assem_debug("r2, ");
2469 if(reglist&8) assem_debug("r3, ");
2470 if(reglist&0x1000) assem_debug("r12");
2471 assem_debug("}\n");
2472 output_w32(0xe89b0000|reglist);
2473}
2474
2475// Write back consts using r14 so we don't disturb the other registers
2476void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2477{
2478 int hr;
2479 for(hr=0;hr<HOST_REGS;hr++) {
2480 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2481 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2482 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2483 int value=constmap[i][hr];
2484 if(value==0) {
2485 emit_zeroreg(HOST_TEMPREG);
2486 }
2487 else {
2488 emit_movimm(value,HOST_TEMPREG);
2489 }
2490 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2491#ifndef FORCE32
57871462 2492 if((i_is32>>i_regmap[hr])&1) {
2493 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2494 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2495 }
24385cae 2496#endif
57871462 2497 }
2498 }
2499 }
2500 }
2501}
2502
2503/* Stubs/epilogue */
2504
2505void literal_pool(int n)
2506{
2507 if(!literalcount) return;
2508 if(n) {
2509 if((int)out-literals[0][0]<4096-n) return;
2510 }
2511 u_int *ptr;
2512 int i;
2513 for(i=0;i<literalcount;i++)
2514 {
2515 ptr=(u_int *)literals[i][0];
2516 u_int offset=(u_int)out-(u_int)ptr-8;
2517 assert(offset<4096);
2518 assert(!(offset&3));
2519 *ptr|=offset;
2520 output_w32(literals[i][1]);
2521 }
2522 literalcount=0;
2523}
2524
2525void literal_pool_jumpover(int n)
2526{
2527 if(!literalcount) return;
2528 if(n) {
2529 if((int)out-literals[0][0]<4096-n) return;
2530 }
2531 int jaddr=(int)out;
2532 emit_jmp(0);
2533 literal_pool(0);
2534 set_jump_target(jaddr,(int)out);
2535}
2536
2537emit_extjump2(int addr, int target, int linker)
2538{
2539 u_char *ptr=(u_char *)addr;
2540 assert((ptr[3]&0x0e)==0xa);
2541 emit_loadlp(target,0);
2542 emit_loadlp(addr,1);
24385cae 2543 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2544 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2545//DEBUG >
2546#ifdef DEBUG_CYCLE_COUNT
2547 emit_readword((int)&last_count,ECX);
2548 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2549 emit_readword((int)&next_interupt,ECX);
2550 emit_writeword(HOST_CCREG,(int)&Count);
2551 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2552 emit_writeword(ECX,(int)&last_count);
2553#endif
2554//DEBUG <
2555 emit_jmp(linker);
2556}
2557
2558emit_extjump(int addr, int target)
2559{
2560 emit_extjump2(addr, target, (int)dyna_linker);
2561}
2562emit_extjump_ds(int addr, int target)
2563{
2564 emit_extjump2(addr, target, (int)dyna_linker_ds);
2565}
2566
2567do_readstub(int n)
2568{
2569 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2570 literal_pool(256);
2571 set_jump_target(stubs[n][1],(int)out);
2572 int type=stubs[n][0];
2573 int i=stubs[n][3];
2574 int rs=stubs[n][4];
2575 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2576 u_int reglist=stubs[n][7];
2577 signed char *i_regmap=i_regs->regmap;
2578 int addr=get_reg(i_regmap,AGEN1+(i&1));
2579 int rth,rt;
2580 int ds;
b9b61529 2581 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2582 rth=get_reg(i_regmap,FTEMP|64);
2583 rt=get_reg(i_regmap,FTEMP);
2584 }else{
2585 rth=get_reg(i_regmap,rt1[i]|64);
2586 rt=get_reg(i_regmap,rt1[i]);
2587 }
2588 assert(rs>=0);
57871462 2589 if(addr<0) addr=rt;
535d208a 2590 if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
57871462 2591 assert(addr>=0);
2592 int ftable=0;
2593 if(type==LOADB_STUB||type==LOADBU_STUB)
2594 ftable=(int)readmemb;
2595 if(type==LOADH_STUB||type==LOADHU_STUB)
2596 ftable=(int)readmemh;
2597 if(type==LOADW_STUB)
2598 ftable=(int)readmem;
24385cae 2599#ifndef FORCE32
57871462 2600 if(type==LOADD_STUB)
2601 ftable=(int)readmemd;
24385cae 2602#endif
2603 assert(ftable!=0);
57871462 2604 emit_writeword(rs,(int)&address);
2605 //emit_pusha();
2606 save_regs(reglist);
2607 ds=i_regs!=&regs[i];
2608 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2609 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2610 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2611 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2612 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2613 emit_shrimm(rs,16,1);
2614 int cc=get_reg(i_regmap,CCREG);
2615 if(cc<0) {
2616 emit_loadreg(CCREG,2);
2617 }
2618 emit_movimm(ftable,0);
2619 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2620 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2621 //emit_readword((int)&last_count,temp);
2622 //emit_add(cc,temp,cc);
2623 //emit_writeword(cc,(int)&Count);
2624 //emit_mov(15,14);
2625 emit_call((int)&indirect_jump_indexed);
2626 //emit_callreg(rs);
2627 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
2628 // We really shouldn't need to update the count here,
2629 // but not doing so causes random crashes...
2630 emit_readword((int)&Count,HOST_TEMPREG);
2631 emit_readword((int)&next_interupt,2);
2632 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2633 emit_writeword(2,(int)&last_count);
2634 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2635 if(cc<0) {
2636 emit_storereg(CCREG,HOST_TEMPREG);
2637 }
2638 //emit_popa();
2639 restore_regs(reglist);
2640 //if((cc=get_reg(regmap,CCREG))>=0) {
2641 // emit_loadreg(CCREG,cc);
2642 //}
f18c0f46 2643 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2644 assert(rt>=0);
2645 if(type==LOADB_STUB)
2646 emit_movsbl((int)&readmem_dword,rt);
2647 if(type==LOADBU_STUB)
2648 emit_movzbl((int)&readmem_dword,rt);
2649 if(type==LOADH_STUB)
2650 emit_movswl((int)&readmem_dword,rt);
2651 if(type==LOADHU_STUB)
2652 emit_movzwl((int)&readmem_dword,rt);
2653 if(type==LOADW_STUB)
2654 emit_readword((int)&readmem_dword,rt);
2655 if(type==LOADD_STUB) {
2656 emit_readword((int)&readmem_dword,rt);
2657 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2658 }
57871462 2659 }
2660 emit_jmp(stubs[n][2]); // return address
2661}
2662
2663inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2664{
2665 int rs=get_reg(regmap,target);
2666 int rth=get_reg(regmap,target|64);
2667 int rt=get_reg(regmap,target);
535d208a 2668 if(rs<0) rs=get_reg(regmap,-1);
57871462 2669 assert(rs>=0);
57871462 2670 int ftable=0;
2671 if(type==LOADB_STUB||type==LOADBU_STUB)
2672 ftable=(int)readmemb;
2673 if(type==LOADH_STUB||type==LOADHU_STUB)
2674 ftable=(int)readmemh;
2675 if(type==LOADW_STUB)
2676 ftable=(int)readmem;
24385cae 2677#ifndef FORCE32
57871462 2678 if(type==LOADD_STUB)
2679 ftable=(int)readmemd;
24385cae 2680#endif
2681 assert(ftable!=0);
fd99c415 2682 if(target==0)
2683 emit_movimm(addr,rs);
57871462 2684 emit_writeword(rs,(int)&address);
2685 //emit_pusha();
2686 save_regs(reglist);
2687 //emit_shrimm(rs,16,1);
2688 int cc=get_reg(regmap,CCREG);
2689 if(cc<0) {
2690 emit_loadreg(CCREG,2);
2691 }
2692 //emit_movimm(ftable,0);
2693 emit_movimm(((u_int *)ftable)[addr>>16],0);
2694 //emit_readword((int)&last_count,12);
2695 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2696 if((signed int)addr>=(signed int)0xC0000000) {
2697 // Pagefault address
2698 int ds=regmap!=regs[i].regmap;
2699 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2700 }
2701 //emit_add(12,2,2);
2702 //emit_writeword(2,(int)&Count);
2703 //emit_call(((u_int *)ftable)[addr>>16]);
2704 emit_call((int)&indirect_jump);
2705 // We really shouldn't need to update the count here,
2706 // but not doing so causes random crashes...
2707 emit_readword((int)&Count,HOST_TEMPREG);
2708 emit_readword((int)&next_interupt,2);
2709 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2710 emit_writeword(2,(int)&last_count);
2711 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2712 if(cc<0) {
2713 emit_storereg(CCREG,HOST_TEMPREG);
2714 }
2715 //emit_popa();
2716 restore_regs(reglist);
fd99c415 2717 if(rt>=0) {
2718 if(type==LOADB_STUB)
2719 emit_movsbl((int)&readmem_dword,rt);
2720 if(type==LOADBU_STUB)
2721 emit_movzbl((int)&readmem_dword,rt);
2722 if(type==LOADH_STUB)
2723 emit_movswl((int)&readmem_dword,rt);
2724 if(type==LOADHU_STUB)
2725 emit_movzwl((int)&readmem_dword,rt);
2726 if(type==LOADW_STUB)
2727 emit_readword((int)&readmem_dword,rt);
2728 if(type==LOADD_STUB) {
2729 emit_readword((int)&readmem_dword,rt);
2730 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2731 }
57871462 2732 }
2733}
2734
2735do_writestub(int n)
2736{
2737 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2738 literal_pool(256);
2739 set_jump_target(stubs[n][1],(int)out);
2740 int type=stubs[n][0];
2741 int i=stubs[n][3];
2742 int rs=stubs[n][4];
2743 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2744 u_int reglist=stubs[n][7];
2745 signed char *i_regmap=i_regs->regmap;
2746 int addr=get_reg(i_regmap,AGEN1+(i&1));
2747 int rth,rt,r;
2748 int ds;
b9b61529 2749 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2750 rth=get_reg(i_regmap,FTEMP|64);
2751 rt=get_reg(i_regmap,r=FTEMP);
2752 }else{
2753 rth=get_reg(i_regmap,rs2[i]|64);
2754 rt=get_reg(i_regmap,r=rs2[i]);
2755 }
2756 assert(rs>=0);
2757 assert(rt>=0);
2758 if(addr<0) addr=get_reg(i_regmap,-1);
2759 assert(addr>=0);
2760 int ftable=0;
2761 if(type==STOREB_STUB)
2762 ftable=(int)writememb;
2763 if(type==STOREH_STUB)
2764 ftable=(int)writememh;
2765 if(type==STOREW_STUB)
2766 ftable=(int)writemem;
24385cae 2767#ifndef FORCE32
57871462 2768 if(type==STORED_STUB)
2769 ftable=(int)writememd;
24385cae 2770#endif
2771 assert(ftable!=0);
57871462 2772 emit_writeword(rs,(int)&address);
2773 //emit_shrimm(rs,16,rs);
2774 //emit_movmem_indexedx4(ftable,rs,rs);
2775 if(type==STOREB_STUB)
2776 emit_writebyte(rt,(int)&byte);
2777 if(type==STOREH_STUB)
2778 emit_writehword(rt,(int)&hword);
2779 if(type==STOREW_STUB)
2780 emit_writeword(rt,(int)&word);
2781 if(type==STORED_STUB) {
3d624f89 2782#ifndef FORCE32
57871462 2783 emit_writeword(rt,(int)&dword);
2784 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 2785#else
2786 printf("STORED_STUB\n");
2787#endif
57871462 2788 }
2789 //emit_pusha();
2790 save_regs(reglist);
2791 ds=i_regs!=&regs[i];
2792 int real_rs=get_reg(i_regmap,rs1[i]);
2793 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2794 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2795 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2796 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2797 emit_shrimm(rs,16,1);
2798 int cc=get_reg(i_regmap,CCREG);
2799 if(cc<0) {
2800 emit_loadreg(CCREG,2);
2801 }
2802 emit_movimm(ftable,0);
2803 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2804 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2805 //emit_readword((int)&last_count,temp);
2806 //emit_addimm(cc,2*stubs[n][5]+2,cc);
2807 //emit_add(cc,temp,cc);
2808 //emit_writeword(cc,(int)&Count);
2809 emit_call((int)&indirect_jump_indexed);
2810 //emit_callreg(rs);
2811 emit_readword((int)&Count,HOST_TEMPREG);
2812 emit_readword((int)&next_interupt,2);
2813 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2814 emit_writeword(2,(int)&last_count);
2815 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2816 if(cc<0) {
2817 emit_storereg(CCREG,HOST_TEMPREG);
2818 }
2819 //emit_popa();
2820 restore_regs(reglist);
2821 //if((cc=get_reg(regmap,CCREG))>=0) {
2822 // emit_loadreg(CCREG,cc);
2823 //}
2824 emit_jmp(stubs[n][2]); // return address
2825}
2826
2827inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2828{
2829 int rs=get_reg(regmap,-1);
2830 int rth=get_reg(regmap,target|64);
2831 int rt=get_reg(regmap,target);
2832 assert(rs>=0);
2833 assert(rt>=0);
2834 int ftable=0;
2835 if(type==STOREB_STUB)
2836 ftable=(int)writememb;
2837 if(type==STOREH_STUB)
2838 ftable=(int)writememh;
2839 if(type==STOREW_STUB)
2840 ftable=(int)writemem;
24385cae 2841#ifndef FORCE32
57871462 2842 if(type==STORED_STUB)
2843 ftable=(int)writememd;
24385cae 2844#endif
2845 assert(ftable!=0);
57871462 2846 emit_writeword(rs,(int)&address);
2847 //emit_shrimm(rs,16,rs);
2848 //emit_movmem_indexedx4(ftable,rs,rs);
2849 if(type==STOREB_STUB)
2850 emit_writebyte(rt,(int)&byte);
2851 if(type==STOREH_STUB)
2852 emit_writehword(rt,(int)&hword);
2853 if(type==STOREW_STUB)
2854 emit_writeword(rt,(int)&word);
2855 if(type==STORED_STUB) {
3d624f89 2856#ifndef FORCE32
57871462 2857 emit_writeword(rt,(int)&dword);
2858 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 2859#else
2860 printf("STORED_STUB\n");
2861#endif
57871462 2862 }
2863 //emit_pusha();
2864 save_regs(reglist);
2865 //emit_shrimm(rs,16,1);
2866 int cc=get_reg(regmap,CCREG);
2867 if(cc<0) {
2868 emit_loadreg(CCREG,2);
2869 }
2870 //emit_movimm(ftable,0);
2871 emit_movimm(((u_int *)ftable)[addr>>16],0);
2872 //emit_readword((int)&last_count,12);
2873 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2874 if((signed int)addr>=(signed int)0xC0000000) {
2875 // Pagefault address
2876 int ds=regmap!=regs[i].regmap;
2877 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2878 }
2879 //emit_add(12,2,2);
2880 //emit_writeword(2,(int)&Count);
2881 //emit_call(((u_int *)ftable)[addr>>16]);
2882 emit_call((int)&indirect_jump);
2883 emit_readword((int)&Count,HOST_TEMPREG);
2884 emit_readword((int)&next_interupt,2);
2885 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2886 emit_writeword(2,(int)&last_count);
2887 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2888 if(cc<0) {
2889 emit_storereg(CCREG,HOST_TEMPREG);
2890 }
2891 //emit_popa();
2892 restore_regs(reglist);
2893}
2894
2895do_unalignedwritestub(int n)
2896{
b7918751 2897 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
2898 literal_pool(256);
57871462 2899 set_jump_target(stubs[n][1],(int)out);
b7918751 2900
2901 int i=stubs[n][3];
2902 struct regstat *i_regs=(struct regstat *)stubs[n][4];
2903 int addr=stubs[n][5];
2904 u_int reglist=stubs[n][7];
2905 signed char *i_regmap=i_regs->regmap;
2906 int temp2=get_reg(i_regmap,FTEMP);
2907 int rt;
2908 int ds, real_rs;
2909 rt=get_reg(i_regmap,rs2[i]);
2910 assert(rt>=0);
2911 assert(addr>=0);
2912 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2913 reglist|=(1<<addr);
2914 reglist&=~(1<<temp2);
2915
2916 emit_andimm(addr,0xfffffffc,temp2);
2917 emit_writeword(temp2,(int)&address);
2918
2919 save_regs(reglist);
2920 ds=i_regs!=&regs[i];
2921 real_rs=get_reg(i_regmap,rs1[i]);
2922 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2923 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2924 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2925 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2926 emit_shrimm(addr,16,1);
2927 int cc=get_reg(i_regmap,CCREG);
2928 if(cc<0) {
2929 emit_loadreg(CCREG,2);
2930 }
2931 emit_movimm((u_int)readmem,0);
2932 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2933 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3); // XXX: can be rm'd?
2934 emit_call((int)&indirect_jump_indexed);
2935 restore_regs(reglist);
2936
2937 emit_readword((int)&readmem_dword,temp2);
2938 int temp=addr; //hmh
2939 emit_shlimm(addr,3,temp);
2940 emit_andimm(temp,24,temp);
2941#ifdef BIG_ENDIAN_MIPS
2942 if (opcode[i]==0x2e) // SWR
2943#else
2944 if (opcode[i]==0x2a) // SWL
2945#endif
2946 emit_xorimm(temp,24,temp);
2947 emit_movimm(-1,HOST_TEMPREG);
55439448 2948 if (opcode[i]==0x2a) { // SWL
b7918751 2949 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2950 emit_orrshr(rt,temp,temp2);
2951 }else{
2952 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2953 emit_orrshl(rt,temp,temp2);
2954 }
2955 emit_readword((int)&address,addr);
2956 emit_writeword(temp2,(int)&word);
2957 //save_regs(reglist); // don't need to, no state changes
2958 emit_shrimm(addr,16,1);
2959 emit_movimm((u_int)writemem,0);
2960 //emit_call((int)&indirect_jump_indexed);
2961 emit_mov(15,14);
2962 emit_readword_dualindexedx4(0,1,15);
2963 emit_readword((int)&Count,HOST_TEMPREG);
2964 emit_readword((int)&next_interupt,2);
2965 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2966 emit_writeword(2,(int)&last_count);
2967 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2968 if(cc<0) {
2969 emit_storereg(CCREG,HOST_TEMPREG);
2970 }
2971 restore_regs(reglist);
57871462 2972 emit_jmp(stubs[n][2]); // return address
2973}
2974
2975void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
2976{
2977 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
2978}
2979
2980do_invstub(int n)
2981{
2982 literal_pool(20);
2983 u_int reglist=stubs[n][3];
2984 set_jump_target(stubs[n][1],(int)out);
2985 save_regs(reglist);
2986 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2987 emit_call((int)&invalidate_addr);
2988 restore_regs(reglist);
2989 emit_jmp(stubs[n][2]); // return address
2990}
2991
2992int do_dirty_stub(int i)
2993{
2994 assem_debug("do_dirty_stub %x\n",start+i*4);
ac545b3a 2995 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
2996 #ifdef PCSX
2997 addr=(u_int)source;
2998 #endif
57871462 2999 // Careful about the code output here, verify_dirty needs to parse it.
3000 #ifdef ARMv5_ONLY
ac545b3a 3001 emit_loadlp(addr,1);
57871462 3002 emit_loadlp((int)copy,2);
3003 emit_loadlp(slen*4,3);
3004 #else
ac545b3a 3005 emit_movw(addr&0x0000FFFF,1);
57871462 3006 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3007 emit_movt(addr&0xFFFF0000,1);
57871462 3008 emit_movt(((u_int)copy)&0xFFFF0000,2);
3009 emit_movw(slen*4,3);
3010 #endif
3011 emit_movimm(start+i*4,0);
3012 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3013 int entry=(int)out;
3014 load_regs_entry(i);
3015 if(entry==(int)out) entry=instr_addr[i];
3016 emit_jmp(instr_addr[i]);
3017 return entry;
3018}
3019
3020void do_dirty_stub_ds()
3021{
3022 // Careful about the code output here, verify_dirty needs to parse it.
3023 #ifdef ARMv5_ONLY
3024 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3025 emit_loadlp((int)copy,2);
3026 emit_loadlp(slen*4,3);
3027 #else
3028 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3029 emit_movw(((u_int)copy)&0x0000FFFF,2);
3030 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3031 emit_movt(((u_int)copy)&0xFFFF0000,2);
3032 emit_movw(slen*4,3);
3033 #endif
3034 emit_movimm(start+1,0);
3035 emit_call((int)&verify_code_ds);
3036}
3037
3038do_cop1stub(int n)
3039{
3040 literal_pool(256);
3041 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3042 set_jump_target(stubs[n][1],(int)out);
3043 int i=stubs[n][3];
3d624f89 3044// int rs=stubs[n][4];
57871462 3045 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3046 int ds=stubs[n][6];
3047 if(!ds) {
3048 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3049 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3050 }
3051 //else {printf("fp exception in delay slot\n");}
3052 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3053 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3054 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3055 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3056 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3057}
3058
3059/* TLB */
3060
3061int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3062{
3063 if(c) {
3064 if((signed int)addr>=(signed int)0xC0000000) {
3065 // address_generation already loaded the const
3066 emit_readword_dualindexedx4(FP,map,map);
3067 }
3068 else
3069 return -1; // No mapping
3070 }
3071 else {
3072 assert(s!=map);
3073 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3074 emit_addsr12(map,s,map);
3075 // Schedule this while we wait on the load
3076 //if(x) emit_xorimm(s,x,ar);
3077 if(shift>=0) emit_shlimm(s,3,shift);
3078 if(~a) emit_andimm(s,a,ar);
3079 emit_readword_dualindexedx4(FP,map,map);
3080 }
3081 return map;
3082}
3083int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3084{
3085 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3086 emit_test(map,map);
3087 *jaddr=(int)out;
3088 emit_js(0);
3089 }
3090 return map;
3091}
3092
3093int gen_tlb_addr_r(int ar, int map) {
3094 if(map>=0) {
3095 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3096 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3097 }
3098}
3099
3100int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3101{
3102 if(c) {
3103 if(addr<0x80800000||addr>=0xC0000000) {
3104 // address_generation already loaded the const
3105 emit_readword_dualindexedx4(FP,map,map);
3106 }
3107 else
3108 return -1; // No mapping
3109 }
3110 else {
3111 assert(s!=map);
3112 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3113 emit_addsr12(map,s,map);
3114 // Schedule this while we wait on the load
3115 //if(x) emit_xorimm(s,x,ar);
3116 emit_readword_dualindexedx4(FP,map,map);
3117 }
3118 return map;
3119}
3120int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3121{
3122 if(!c||addr<0x80800000||addr>=0xC0000000) {
3123 emit_testimm(map,0x40000000);
3124 *jaddr=(int)out;
3125 emit_jne(0);
3126 }
3127}
3128
3129int gen_tlb_addr_w(int ar, int map) {
3130 if(map>=0) {
3131 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3132 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3133 }
3134}
3135
3136// Generate the address of the memory_map entry, relative to dynarec_local
3137generate_map_const(u_int addr,int reg) {
3138 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3139 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3140}
3141
3142/* Special assem */
3143
3144void shift_assemble_arm(int i,struct regstat *i_regs)
3145{
3146 if(rt1[i]) {
3147 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3148 {
3149 signed char s,t,shift;
3150 t=get_reg(i_regs->regmap,rt1[i]);
3151 s=get_reg(i_regs->regmap,rs1[i]);
3152 shift=get_reg(i_regs->regmap,rs2[i]);
3153 if(t>=0){
3154 if(rs1[i]==0)
3155 {
3156 emit_zeroreg(t);
3157 }
3158 else if(rs2[i]==0)
3159 {
3160 assert(s>=0);
3161 if(s!=t) emit_mov(s,t);
3162 }
3163 else
3164 {
3165 emit_andimm(shift,31,HOST_TEMPREG);
3166 if(opcode2[i]==4) // SLLV
3167 {
3168 emit_shl(s,HOST_TEMPREG,t);
3169 }
3170 if(opcode2[i]==6) // SRLV
3171 {
3172 emit_shr(s,HOST_TEMPREG,t);
3173 }
3174 if(opcode2[i]==7) // SRAV
3175 {
3176 emit_sar(s,HOST_TEMPREG,t);
3177 }
3178 }
3179 }
3180 } else { // DSLLV/DSRLV/DSRAV
3181 signed char sh,sl,th,tl,shift;
3182 th=get_reg(i_regs->regmap,rt1[i]|64);
3183 tl=get_reg(i_regs->regmap,rt1[i]);
3184 sh=get_reg(i_regs->regmap,rs1[i]|64);
3185 sl=get_reg(i_regs->regmap,rs1[i]);
3186 shift=get_reg(i_regs->regmap,rs2[i]);
3187 if(tl>=0){
3188 if(rs1[i]==0)
3189 {
3190 emit_zeroreg(tl);
3191 if(th>=0) emit_zeroreg(th);
3192 }
3193 else if(rs2[i]==0)
3194 {
3195 assert(sl>=0);
3196 if(sl!=tl) emit_mov(sl,tl);
3197 if(th>=0&&sh!=th) emit_mov(sh,th);
3198 }
3199 else
3200 {
3201 // FIXME: What if shift==tl ?
3202 assert(shift!=tl);
3203 int temp=get_reg(i_regs->regmap,-1);
3204 int real_th=th;
3205 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3206 assert(sl>=0);
3207 assert(sh>=0);
3208 emit_andimm(shift,31,HOST_TEMPREG);
3209 if(opcode2[i]==0x14) // DSLLV
3210 {
3211 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3212 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3213 emit_orrshr(sl,HOST_TEMPREG,th);
3214 emit_andimm(shift,31,HOST_TEMPREG);
3215 emit_testimm(shift,32);
3216 emit_shl(sl,HOST_TEMPREG,tl);
3217 if(th>=0) emit_cmovne_reg(tl,th);
3218 emit_cmovne_imm(0,tl);
3219 }
3220 if(opcode2[i]==0x16) // DSRLV
3221 {
3222 assert(th>=0);
3223 emit_shr(sl,HOST_TEMPREG,tl);
3224 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3225 emit_orrshl(sh,HOST_TEMPREG,tl);
3226 emit_andimm(shift,31,HOST_TEMPREG);
3227 emit_testimm(shift,32);
3228 emit_shr(sh,HOST_TEMPREG,th);
3229 emit_cmovne_reg(th,tl);
3230 if(real_th>=0) emit_cmovne_imm(0,th);
3231 }
3232 if(opcode2[i]==0x17) // DSRAV
3233 {
3234 assert(th>=0);
3235 emit_shr(sl,HOST_TEMPREG,tl);
3236 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3237 if(real_th>=0) {
3238 assert(temp>=0);
3239 emit_sarimm(th,31,temp);
3240 }
3241 emit_orrshl(sh,HOST_TEMPREG,tl);
3242 emit_andimm(shift,31,HOST_TEMPREG);
3243 emit_testimm(shift,32);
3244 emit_sar(sh,HOST_TEMPREG,th);
3245 emit_cmovne_reg(th,tl);
3246 if(real_th>=0) emit_cmovne_reg(temp,th);
3247 }
3248 }
3249 }
3250 }
3251 }
3252}
3253#define shift_assemble shift_assemble_arm
3254
3255void loadlr_assemble_arm(int i,struct regstat *i_regs)
3256{
3257 int s,th,tl,temp,temp2,addr,map=-1;
3258 int offset;
3259 int jaddr=0;
3260 int memtarget,c=0;
3261 u_int hr,reglist=0;
3262 th=get_reg(i_regs->regmap,rt1[i]|64);
3263 tl=get_reg(i_regs->regmap,rt1[i]);
3264 s=get_reg(i_regs->regmap,rs1[i]);
3265 temp=get_reg(i_regs->regmap,-1);
3266 temp2=get_reg(i_regs->regmap,FTEMP);
3267 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3268 assert(addr<0);
3269 offset=imm[i];
3270 for(hr=0;hr<HOST_REGS;hr++) {
3271 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3272 }
3273 reglist|=1<<temp;
3274 if(offset||s<0||c) addr=temp2;
3275 else addr=s;
3276 if(s>=0) {
3277 c=(i_regs->wasconst>>s)&1;
4cb76aa4 3278 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
57871462 3279 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3280 }
535d208a 3281 if(!using_tlb) {
3282 if(!c) {
3283 #ifdef RAM_OFFSET
3284 map=get_reg(i_regs->regmap,ROREG);
3285 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3286 #endif
3287 emit_shlimm(addr,3,temp);
3288 if (opcode[i]==0x22||opcode[i]==0x26) {
3289 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
57871462 3290 }else{
535d208a 3291 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
57871462 3292 }
535d208a 3293 emit_cmpimm(addr,RAM_SIZE);
3294 jaddr=(int)out;
3295 emit_jno(0);
3296 }
3297 else {
3298 if (opcode[i]==0x22||opcode[i]==0x26) {
3299 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3300 }else{
3301 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 3302 }
57871462 3303 }
535d208a 3304 }else{ // using tlb
3305 int a;
3306 if(c) {
3307 a=-1;
3308 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3309 a=0xFFFFFFFC; // LWL/LWR
3310 }else{
3311 a=0xFFFFFFF8; // LDL/LDR
3312 }
3313 map=get_reg(i_regs->regmap,TLREG);
3314 assert(map>=0);
3315 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3316 if(c) {
3317 if (opcode[i]==0x22||opcode[i]==0x26) {
3318 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3319 }else{
3320 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 3321 }
535d208a 3322 }
3323 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3324 }
3325 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3326 if(!c||memtarget) {
3327 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3328 emit_readword_indexed_tlb(0,temp2,map,temp2);
3329 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3330 }
3331 else
3332 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3333 if(rt1[i]) {
3334 assert(tl>=0);
57871462 3335 emit_andimm(temp,24,temp);
2002a1db 3336#ifdef BIG_ENDIAN_MIPS
3337 if (opcode[i]==0x26) // LWR
3338#else
3339 if (opcode[i]==0x22) // LWL
3340#endif
3341 emit_xorimm(temp,24,temp);
57871462 3342 emit_movimm(-1,HOST_TEMPREG);
3343 if (opcode[i]==0x26) {
3344 emit_shr(temp2,temp,temp2);
3345 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3346 }else{
3347 emit_shl(temp2,temp,temp2);
3348 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3349 }
3350 emit_or(temp2,tl,tl);
57871462 3351 }
535d208a 3352 //emit_storereg(rt1[i],tl); // DEBUG
3353 }
3354 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3355 // FIXME: little endian
3356 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3357 if(!c||memtarget) {
3358 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3359 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3360 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3361 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3362 }
3363 else
3364 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3365 if(rt1[i]) {
3366 assert(th>=0);
3367 assert(tl>=0);
57871462 3368 emit_testimm(temp,32);
3369 emit_andimm(temp,24,temp);
3370 if (opcode[i]==0x1A) { // LDL
3371 emit_rsbimm(temp,32,HOST_TEMPREG);
3372 emit_shl(temp2h,temp,temp2h);
3373 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3374 emit_movimm(-1,HOST_TEMPREG);
3375 emit_shl(temp2,temp,temp2);
3376 emit_cmove_reg(temp2h,th);
3377 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3378 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3379 emit_orreq(temp2,tl,tl);
3380 emit_orrne(temp2,th,th);
3381 }
3382 if (opcode[i]==0x1B) { // LDR
3383 emit_xorimm(temp,24,temp);
3384 emit_rsbimm(temp,32,HOST_TEMPREG);
3385 emit_shr(temp2,temp,temp2);
3386 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3387 emit_movimm(-1,HOST_TEMPREG);
3388 emit_shr(temp2h,temp,temp2h);
3389 emit_cmovne_reg(temp2,tl);
3390 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3391 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3392 emit_orrne(temp2h,th,th);
3393 emit_orreq(temp2h,tl,tl);
3394 }
3395 }
3396 }
3397}
3398#define loadlr_assemble loadlr_assemble_arm
3399
3400void cop0_assemble(int i,struct regstat *i_regs)
3401{
3402 if(opcode2[i]==0) // MFC0
3403 {
3404 signed char t=get_reg(i_regs->regmap,rt1[i]);
3405 char copr=(source[i]>>11)&0x1f;
3406 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 3407 if(t>=0&&rt1[i]!=0) {
7139f3c8 3408#ifdef MUPEN64
57871462 3409 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3410 emit_movimm((source[i]>>11)&0x1f,1);
3411 emit_writeword(0,(int)&PC);
3412 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3413 if(copr==9) {
3414 emit_readword((int)&last_count,ECX);
3415 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3416 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3417 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3418 emit_writeword(HOST_CCREG,(int)&Count);
3419 }
3420 emit_call((int)MFC0);
3421 emit_readword((int)&readmem_dword,t);
7139f3c8 3422#else
3423 emit_readword((int)&reg_cop0+copr*4,t);
3424#endif
57871462 3425 }
3426 }
3427 else if(opcode2[i]==4) // MTC0
3428 {
3429 signed char s=get_reg(i_regs->regmap,rs1[i]);
3430 char copr=(source[i]>>11)&0x1f;
3431 assert(s>=0);
3432 emit_writeword(s,(int)&readmem_dword);
3433 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
fca1aef2 3434#ifdef MUPEN64
57871462 3435 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3436 emit_movimm((source[i]>>11)&0x1f,1);
3437 emit_writeword(0,(int)&PC);
3438 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
7139f3c8 3439#endif
3440 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3441 emit_readword((int)&last_count,ECX);
3442 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3443 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3444 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3445 emit_writeword(HOST_CCREG,(int)&Count);
3446 }
3447 // What a mess. The status register (12) can enable interrupts,
3448 // so needs a special case to handle a pending interrupt.
3449 // The interrupt must be taken immediately, because a subsequent
3450 // instruction might disable interrupts again.
7139f3c8 3451 if(copr==12||copr==13) {
fca1aef2 3452#ifdef PCSX
3453 if (is_delayslot) {
3454 // burn cycles to cause cc_interrupt, which will
3455 // reschedule next_interupt. Relies on CCREG from above.
3456 assem_debug("MTC0 DS %d\n", copr);
3457 emit_writeword(HOST_CCREG,(int)&last_count);
3458 emit_movimm(0,HOST_CCREG);
3459 emit_storereg(CCREG,HOST_CCREG);
3460 emit_movimm(copr,0);
3461 emit_call((int)pcsx_mtc0_ds);
3462 return;
3463 }
3464#endif
57871462 3465 emit_movimm(start+i*4+4,0);
3466 emit_movimm(0,1);
3467 emit_writeword(0,(int)&pcaddr);
3468 emit_writeword(1,(int)&pending_exception);
3469 }
3470 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3471 //else
fca1aef2 3472#ifdef PCSX
3473 emit_movimm(copr,0);
3474 emit_call((int)pcsx_mtc0);
3475#else
57871462 3476 emit_call((int)MTC0);
fca1aef2 3477#endif
7139f3c8 3478 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3479 emit_readword((int)&Count,HOST_CCREG);
3480 emit_readword((int)&next_interupt,ECX);
3481 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3482 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3483 emit_writeword(ECX,(int)&last_count);
3484 emit_storereg(CCREG,HOST_CCREG);
3485 }
7139f3c8 3486 if(copr==12||copr==13) {
57871462 3487 assert(!is_delayslot);
3488 emit_readword((int)&pending_exception,14);
3489 }
3490 emit_loadreg(rs1[i],s);
3491 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3492 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
7139f3c8 3493 if(copr==12||copr==13) {
57871462 3494 emit_test(14,14);
3495 emit_jne((int)&do_interrupt);
3496 }
3497 cop1_usable=0;
3498 }
3499 else
3500 {
3501 assert(opcode2[i]==0x10);
3d624f89 3502#ifndef DISABLE_TLB
57871462 3503 if((source[i]&0x3f)==0x01) // TLBR
3504 emit_call((int)TLBR);
3505 if((source[i]&0x3f)==0x02) // TLBWI
3506 emit_call((int)TLBWI_new);
3507 if((source[i]&0x3f)==0x06) { // TLBWR
3508 // The TLB entry written by TLBWR is dependent on the count,
3509 // so update the cycle count
3510 emit_readword((int)&last_count,ECX);
3511 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3512 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3513 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3514 emit_writeword(HOST_CCREG,(int)&Count);
3515 emit_call((int)TLBWR_new);
3516 }
3517 if((source[i]&0x3f)==0x08) // TLBP
3518 emit_call((int)TLBP);
3d624f89 3519#endif
576bbd8f 3520#ifdef PCSX
3521 if((source[i]&0x3f)==0x10) // RFE
3522 {
3523 emit_readword((int)&Status,0);
3524 emit_andimm(0,0x3c,1);
3525 emit_andimm(0,~0xf,0);
3526 emit_orrshr_imm(1,2,0);
3527 emit_writeword(0,(int)&Status);
3528 }
3529#else
57871462 3530 if((source[i]&0x3f)==0x18) // ERET
3531 {
3532 int count=ccadj[i];
3533 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3534 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
3535 emit_jmp((int)jump_eret);
3536 }
576bbd8f 3537#endif
57871462 3538 }
3539}
3540
b9b61529 3541static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3542{
3543 switch (copr) {
3544 case 1:
3545 case 3:
3546 case 5:
3547 case 8:
3548 case 9:
3549 case 10:
3550 case 11:
3551 emit_readword((int)&reg_cop2d[copr],tl);
3552 emit_signextend16(tl,tl);
3553 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3554 break;
3555 case 7:
3556 case 16:
3557 case 17:
3558 case 18:
3559 case 19:
3560 emit_readword((int)&reg_cop2d[copr],tl);
3561 emit_andimm(tl,0xffff,tl);
3562 emit_writeword(tl,(int)&reg_cop2d[copr]);
3563 break;
3564 case 15:
3565 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3566 emit_writeword(tl,(int)&reg_cop2d[copr]);
3567 break;
3568 case 28:
b9b61529 3569 case 29:
3570 emit_readword((int)&reg_cop2d[9],temp);
3571 emit_testimm(temp,0x8000); // do we need this?
3572 emit_andimm(temp,0xf80,temp);
3573 emit_andne_imm(temp,0,temp);
f70d384d 3574 emit_shrimm(temp,7,tl);
b9b61529 3575 emit_readword((int)&reg_cop2d[10],temp);
3576 emit_testimm(temp,0x8000);
3577 emit_andimm(temp,0xf80,temp);
3578 emit_andne_imm(temp,0,temp);
f70d384d 3579 emit_orrshr_imm(temp,2,tl);
b9b61529 3580 emit_readword((int)&reg_cop2d[11],temp);
3581 emit_testimm(temp,0x8000);
3582 emit_andimm(temp,0xf80,temp);
3583 emit_andne_imm(temp,0,temp);
f70d384d 3584 emit_orrshl_imm(temp,3,tl);
b9b61529 3585 emit_writeword(tl,(int)&reg_cop2d[copr]);
3586 break;
3587 default:
3588 emit_readword((int)&reg_cop2d[copr],tl);
3589 break;
3590 }
3591}
3592
3593static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3594{
3595 switch (copr) {
3596 case 15:
3597 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3598 emit_writeword(sl,(int)&reg_cop2d[copr]);
3599 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3600 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3601 emit_writeword(sl,(int)&reg_cop2d[14]);
3602 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3603 break;
3604 case 28:
3605 emit_andimm(sl,0x001f,temp);
f70d384d 3606 emit_shlimm(temp,7,temp);
b9b61529 3607 emit_writeword(temp,(int)&reg_cop2d[9]);
3608 emit_andimm(sl,0x03e0,temp);
f70d384d 3609 emit_shlimm(temp,2,temp);
b9b61529 3610 emit_writeword(temp,(int)&reg_cop2d[10]);
3611 emit_andimm(sl,0x7c00,temp);
f70d384d 3612 emit_shrimm(temp,3,temp);
b9b61529 3613 emit_writeword(temp,(int)&reg_cop2d[11]);
3614 emit_writeword(sl,(int)&reg_cop2d[28]);
3615 break;
3616 case 30:
3617 emit_movs(sl,temp);
3618 emit_mvnmi(temp,temp);
3619 emit_clz(temp,temp);
3620 emit_writeword(sl,(int)&reg_cop2d[30]);
3621 emit_writeword(temp,(int)&reg_cop2d[31]);
3622 break;
b9b61529 3623 case 31:
3624 break;
3625 default:
3626 emit_writeword(sl,(int)&reg_cop2d[copr]);
3627 break;
3628 }
3629}
3630
3631void cop2_assemble(int i,struct regstat *i_regs)
3632{
3633 u_int copr=(source[i]>>11)&0x1f;
3634 signed char temp=get_reg(i_regs->regmap,-1);
3635 if (opcode2[i]==0) { // MFC2
3636 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3637 if(tl>=0&&rt1[i]!=0)
b9b61529 3638 cop2_get_dreg(copr,tl,temp);
3639 }
3640 else if (opcode2[i]==4) { // MTC2
3641 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3642 cop2_put_dreg(copr,sl,temp);
3643 }
3644 else if (opcode2[i]==2) // CFC2
3645 {
3646 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3647 if(tl>=0&&rt1[i]!=0)
b9b61529 3648 emit_readword((int)&reg_cop2c[copr],tl);
3649 }
3650 else if (opcode2[i]==6) // CTC2
3651 {
3652 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3653 switch(copr) {
3654 case 4:
3655 case 12:
3656 case 20:
3657 case 26:
3658 case 27:
3659 case 29:
3660 case 30:
3661 emit_signextend16(sl,temp);
3662 break;
3663 case 31:
3664 //value = value & 0x7ffff000;
3665 //if (value & 0x7f87e000) value |= 0x80000000;
3666 emit_shrimm(sl,12,temp);
3667 emit_shlimm(temp,12,temp);
3668 emit_testimm(temp,0x7f000000);
3669 emit_testeqimm(temp,0x00870000);
3670 emit_testeqimm(temp,0x0000e000);
3671 emit_orrne_imm(temp,0x80000000,temp);
3672 break;
3673 default:
3674 temp=sl;
3675 break;
3676 }
3677 emit_writeword(temp,(int)&reg_cop2c[copr]);
3678 assert(sl>=0);
3679 }
3680}
3681
3682void c2op_assemble(int i,struct regstat *i_regs)
3683{
3684 signed char temp=get_reg(i_regs->regmap,-1);
3685 u_int c2op=source[i]&0x3f;
3686 u_int hr,reglist=0;
3687 for(hr=0;hr<HOST_REGS;hr++) {
3688 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3689 }
3690 if(i==0||itype[i-1]!=C2OP)
3691 save_regs(reglist);
3692
3693 if (gte_handlers[c2op]!=NULL) {
3694 int cc=get_reg(i_regs->regmap,CCREG);
3695 emit_movimm(source[i],temp); // opcode
3696 if (cc>=0&&gte_cycletab[c2op])
3697 emit_addimm(cc,gte_cycletab[c2op]/2,cc); // XXX: cound just adjust ccadj?
3698 emit_writeword(temp,(int)&psxRegs.code);
3699 emit_call((int)gte_handlers[c2op]);
3700 }
3701
3702 if(i>=slen-1||itype[i+1]!=C2OP)
3703 restore_regs(reglist);
3704}
3705
3706void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 3707{
3708 // XXX: should just just do the exception instead
3709 if(!cop1_usable) {
3710 int jaddr=(int)out;
3711 emit_jmp(0);
3712 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3713 cop1_usable=1;
3714 }
3715}
3716
57871462 3717void cop1_assemble(int i,struct regstat *i_regs)
3718{
3d624f89 3719#ifndef DISABLE_COP1
57871462 3720 // Check cop1 unusable
3721 if(!cop1_usable) {
3722 signed char rs=get_reg(i_regs->regmap,CSREG);
3723 assert(rs>=0);
3724 emit_testimm(rs,0x20000000);
3725 int jaddr=(int)out;
3726 emit_jeq(0);
3727 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3728 cop1_usable=1;
3729 }
3730 if (opcode2[i]==0) { // MFC1
3731 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3732 if(tl>=0) {
3733 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
3734 emit_readword_indexed(0,tl,tl);
3735 }
3736 }
3737 else if (opcode2[i]==1) { // DMFC1
3738 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3739 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
3740 if(tl>=0) {
3741 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
3742 if(th>=0) emit_readword_indexed(4,tl,th);
3743 emit_readword_indexed(0,tl,tl);
3744 }
3745 }
3746 else if (opcode2[i]==4) { // MTC1
3747 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3748 signed char temp=get_reg(i_regs->regmap,-1);
3749 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3750 emit_writeword_indexed(sl,0,temp);
3751 }
3752 else if (opcode2[i]==5) { // DMTC1
3753 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3754 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
3755 signed char temp=get_reg(i_regs->regmap,-1);
3756 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3757 emit_writeword_indexed(sh,4,temp);
3758 emit_writeword_indexed(sl,0,temp);
3759 }
3760 else if (opcode2[i]==2) // CFC1
3761 {
3762 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3763 if(tl>=0) {
3764 u_int copr=(source[i]>>11)&0x1f;
3765 if(copr==0) emit_readword((int)&FCR0,tl);
3766 if(copr==31) emit_readword((int)&FCR31,tl);
3767 }
3768 }
3769 else if (opcode2[i]==6) // CTC1
3770 {
3771 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3772 u_int copr=(source[i]>>11)&0x1f;
3773 assert(sl>=0);
3774 if(copr==31)
3775 {
3776 emit_writeword(sl,(int)&FCR31);
3777 // Set the rounding mode
3778 //FIXME
3779 //char temp=get_reg(i_regs->regmap,-1);
3780 //emit_andimm(sl,3,temp);
3781 //emit_fldcw_indexed((int)&rounding_modes,temp);
3782 }
3783 }
3d624f89 3784#else
3785 cop1_unusable(i, i_regs);
3786#endif
57871462 3787}
3788
3789void fconv_assemble_arm(int i,struct regstat *i_regs)
3790{
3d624f89 3791#ifndef DISABLE_COP1
57871462 3792 signed char temp=get_reg(i_regs->regmap,-1);
3793 assert(temp>=0);
3794 // Check cop1 unusable
3795 if(!cop1_usable) {
3796 signed char rs=get_reg(i_regs->regmap,CSREG);
3797 assert(rs>=0);
3798 emit_testimm(rs,0x20000000);
3799 int jaddr=(int)out;
3800 emit_jeq(0);
3801 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3802 cop1_usable=1;
3803 }
3804
3805 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
3806 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
3807 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3808 emit_flds(temp,15);
3809 emit_ftosizs(15,15); // float->int, truncate
3810 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3811 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3812 emit_fsts(15,temp);
3813 return;
3814 }
3815 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
3816 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3817 emit_vldr(temp,7);
3818 emit_ftosizd(7,13); // double->int, truncate
3819 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3820 emit_fsts(13,temp);
3821 return;
3822 }
3823
3824 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
3825 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3826 emit_flds(temp,13);
3827 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3828 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3829 emit_fsitos(13,15);
3830 emit_fsts(15,temp);
3831 return;
3832 }
3833 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
3834 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3835 emit_flds(temp,13);
3836 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3837 emit_fsitod(13,7);
3838 emit_vstr(7,temp);
3839 return;
3840 }
3841
3842 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
3843 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3844 emit_flds(temp,13);
3845 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3846 emit_fcvtds(13,7);
3847 emit_vstr(7,temp);
3848 return;
3849 }
3850 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
3851 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3852 emit_vldr(temp,7);
3853 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3854 emit_fcvtsd(7,13);
3855 emit_fsts(13,temp);
3856 return;
3857 }
3858 #endif
3859
3860 // C emulation code
3861
3862 u_int hr,reglist=0;
3863 for(hr=0;hr<HOST_REGS;hr++) {
3864 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3865 }
3866 save_regs(reglist);
3867
3868 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
3869 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3870 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3871 emit_call((int)cvt_s_w);
3872 }
3873 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
3874 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3875 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3876 emit_call((int)cvt_d_w);
3877 }
3878 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
3879 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3880 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3881 emit_call((int)cvt_s_l);
3882 }
3883 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
3884 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3885 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3886 emit_call((int)cvt_d_l);
3887 }
3888
3889 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
3890 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3891 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3892 emit_call((int)cvt_d_s);
3893 }
3894 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
3895 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3896 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3897 emit_call((int)cvt_w_s);
3898 }
3899 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
3900 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3901 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3902 emit_call((int)cvt_l_s);
3903 }
3904
3905 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
3906 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3907 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3908 emit_call((int)cvt_s_d);
3909 }
3910 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
3911 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3912 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3913 emit_call((int)cvt_w_d);
3914 }
3915 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
3916 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3917 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3918 emit_call((int)cvt_l_d);
3919 }
3920
3921 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
3922 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3923 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3924 emit_call((int)round_l_s);
3925 }
3926 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
3927 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3928 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3929 emit_call((int)trunc_l_s);
3930 }
3931 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
3932 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3933 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3934 emit_call((int)ceil_l_s);
3935 }
3936 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
3937 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3938 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3939 emit_call((int)floor_l_s);
3940 }
3941 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
3942 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3943 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3944 emit_call((int)round_w_s);
3945 }
3946 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
3947 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3948 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3949 emit_call((int)trunc_w_s);
3950 }
3951 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
3952 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3953 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3954 emit_call((int)ceil_w_s);
3955 }
3956 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
3957 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3958 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3959 emit_call((int)floor_w_s);
3960 }
3961
3962 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
3963 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3964 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3965 emit_call((int)round_l_d);
3966 }
3967 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
3968 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3969 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3970 emit_call((int)trunc_l_d);
3971 }
3972 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
3973 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3974 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3975 emit_call((int)ceil_l_d);
3976 }
3977 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
3978 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3979 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3980 emit_call((int)floor_l_d);
3981 }
3982 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
3983 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3984 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3985 emit_call((int)round_w_d);
3986 }
3987 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
3988 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3989 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3990 emit_call((int)trunc_w_d);
3991 }
3992 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
3993 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3994 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3995 emit_call((int)ceil_w_d);
3996 }
3997 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
3998 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3999 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4000 emit_call((int)floor_w_d);
4001 }
4002
4003 restore_regs(reglist);
3d624f89 4004#else
4005 cop1_unusable(i, i_regs);
4006#endif
57871462 4007}
4008#define fconv_assemble fconv_assemble_arm
4009
4010void fcomp_assemble(int i,struct regstat *i_regs)
4011{
3d624f89 4012#ifndef DISABLE_COP1
57871462 4013 signed char fs=get_reg(i_regs->regmap,FSREG);
4014 signed char temp=get_reg(i_regs->regmap,-1);
4015 assert(temp>=0);
4016 // Check cop1 unusable
4017 if(!cop1_usable) {
4018 signed char cs=get_reg(i_regs->regmap,CSREG);
4019 assert(cs>=0);
4020 emit_testimm(cs,0x20000000);
4021 int jaddr=(int)out;
4022 emit_jeq(0);
4023 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4024 cop1_usable=1;
4025 }
4026
4027 if((source[i]&0x3f)==0x30) {
4028 emit_andimm(fs,~0x800000,fs);
4029 return;
4030 }
4031
4032 if((source[i]&0x3e)==0x38) {
4033 // sf/ngle - these should throw exceptions for NaNs
4034 emit_andimm(fs,~0x800000,fs);
4035 return;
4036 }
4037
4038 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4039 if(opcode2[i]==0x10) {
4040 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4041 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4042 emit_orimm(fs,0x800000,fs);
4043 emit_flds(temp,14);
4044 emit_flds(HOST_TEMPREG,15);
4045 emit_fcmps(14,15);
4046 emit_fmstat();
4047 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
4048 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
4049 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
4050 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
4051 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
4052 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
4053 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
4054 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
4055 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
4056 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
4057 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
4058 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
4059 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
4060 return;
4061 }
4062 if(opcode2[i]==0x11) {
4063 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4064 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4065 emit_orimm(fs,0x800000,fs);
4066 emit_vldr(temp,6);
4067 emit_vldr(HOST_TEMPREG,7);
4068 emit_fcmpd(6,7);
4069 emit_fmstat();
4070 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
4071 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
4072 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
4073 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
4074 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
4075 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
4076 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
4077 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
4078 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
4079 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
4080 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
4081 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
4082 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
4083 return;
4084 }
4085 #endif
4086
4087 // C only
4088
4089 u_int hr,reglist=0;
4090 for(hr=0;hr<HOST_REGS;hr++) {
4091 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4092 }
4093 reglist&=~(1<<fs);
4094 save_regs(reglist);
4095 if(opcode2[i]==0x10) {
4096 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4097 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4098 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
4099 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
4100 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
4101 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
4102 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
4103 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
4104 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
4105 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
4106 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
4107 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
4108 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
4109 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
4110 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
4111 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
4112 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
4113 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
4114 }
4115 if(opcode2[i]==0x11) {
4116 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4117 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4118 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
4119 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
4120 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
4121 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
4122 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
4123 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
4124 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
4125 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
4126 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
4127 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
4128 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
4129 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
4130 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
4131 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
4132 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
4133 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
4134 }
4135 restore_regs(reglist);
4136 emit_loadreg(FSREG,fs);
3d624f89 4137#else
4138 cop1_unusable(i, i_regs);
4139#endif
57871462 4140}
4141
4142void float_assemble(int i,struct regstat *i_regs)
4143{
3d624f89 4144#ifndef DISABLE_COP1
57871462 4145 signed char temp=get_reg(i_regs->regmap,-1);
4146 assert(temp>=0);
4147 // Check cop1 unusable
4148 if(!cop1_usable) {
4149 signed char cs=get_reg(i_regs->regmap,CSREG);
4150 assert(cs>=0);
4151 emit_testimm(cs,0x20000000);
4152 int jaddr=(int)out;
4153 emit_jeq(0);
4154 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4155 cop1_usable=1;
4156 }
4157
4158 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4159 if((source[i]&0x3f)==6) // mov
4160 {
4161 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4162 if(opcode2[i]==0x10) {
4163 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4164 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
4165 emit_readword_indexed(0,temp,temp);
4166 emit_writeword_indexed(temp,0,HOST_TEMPREG);
4167 }
4168 if(opcode2[i]==0x11) {
4169 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4170 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
4171 emit_vldr(temp,7);
4172 emit_vstr(7,HOST_TEMPREG);
4173 }
4174 }
4175 return;
4176 }
4177
4178 if((source[i]&0x3f)>3)
4179 {
4180 if(opcode2[i]==0x10) {
4181 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4182 emit_flds(temp,15);
4183 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4184 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4185 }
4186 if((source[i]&0x3f)==4) // sqrt
4187 emit_fsqrts(15,15);
4188 if((source[i]&0x3f)==5) // abs
4189 emit_fabss(15,15);
4190 if((source[i]&0x3f)==7) // neg
4191 emit_fnegs(15,15);
4192 emit_fsts(15,temp);
4193 }
4194 if(opcode2[i]==0x11) {
4195 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4196 emit_vldr(temp,7);
4197 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4198 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4199 }
4200 if((source[i]&0x3f)==4) // sqrt
4201 emit_fsqrtd(7,7);
4202 if((source[i]&0x3f)==5) // abs
4203 emit_fabsd(7,7);
4204 if((source[i]&0x3f)==7) // neg
4205 emit_fnegd(7,7);
4206 emit_vstr(7,temp);
4207 }
4208 return;
4209 }
4210 if((source[i]&0x3f)<4)
4211 {
4212 if(opcode2[i]==0x10) {
4213 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4214 }
4215 if(opcode2[i]==0x11) {
4216 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4217 }
4218 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
4219 if(opcode2[i]==0x10) {
4220 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4221 emit_flds(temp,15);
4222 emit_flds(HOST_TEMPREG,13);
4223 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4224 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4225 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4226 }
4227 }
4228 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
4229 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
4230 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
4231 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
4232 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4233 emit_fsts(15,HOST_TEMPREG);
4234 }else{
4235 emit_fsts(15,temp);
4236 }
4237 }
4238 else if(opcode2[i]==0x11) {
4239 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4240 emit_vldr(temp,7);
4241 emit_vldr(HOST_TEMPREG,6);
4242 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4243 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4244 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4245 }
4246 }
4247 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
4248 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
4249 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
4250 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
4251 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4252 emit_vstr(7,HOST_TEMPREG);
4253 }else{
4254 emit_vstr(7,temp);
4255 }
4256 }
4257 }
4258 else {
4259 if(opcode2[i]==0x10) {
4260 emit_flds(temp,15);
4261 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4262 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4263 }
4264 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
4265 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
4266 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
4267 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
4268 emit_fsts(15,temp);
4269 }
4270 else if(opcode2[i]==0x11) {
4271 emit_vldr(temp,7);
4272 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4273 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4274 }
4275 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
4276 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
4277 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
4278 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
4279 emit_vstr(7,temp);
4280 }
4281 }
4282 return;
4283 }
4284 #endif
4285
4286 u_int hr,reglist=0;
4287 for(hr=0;hr<HOST_REGS;hr++) {
4288 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4289 }
4290 if(opcode2[i]==0x10) { // Single precision
4291 save_regs(reglist);
4292 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4293 if((source[i]&0x3f)<4) {
4294 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4295 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
4296 }else{
4297 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4298 }
4299 switch(source[i]&0x3f)
4300 {
4301 case 0x00: emit_call((int)add_s);break;
4302 case 0x01: emit_call((int)sub_s);break;
4303 case 0x02: emit_call((int)mul_s);break;
4304 case 0x03: emit_call((int)div_s);break;
4305 case 0x04: emit_call((int)sqrt_s);break;
4306 case 0x05: emit_call((int)abs_s);break;
4307 case 0x06: emit_call((int)mov_s);break;
4308 case 0x07: emit_call((int)neg_s);break;
4309 }
4310 restore_regs(reglist);
4311 }
4312 if(opcode2[i]==0x11) { // Double precision
4313 save_regs(reglist);
4314 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4315 if((source[i]&0x3f)<4) {
4316 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4317 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
4318 }else{
4319 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4320 }
4321 switch(source[i]&0x3f)
4322 {
4323 case 0x00: emit_call((int)add_d);break;
4324 case 0x01: emit_call((int)sub_d);break;
4325 case 0x02: emit_call((int)mul_d);break;
4326 case 0x03: emit_call((int)div_d);break;
4327 case 0x04: emit_call((int)sqrt_d);break;
4328 case 0x05: emit_call((int)abs_d);break;
4329 case 0x06: emit_call((int)mov_d);break;
4330 case 0x07: emit_call((int)neg_d);break;
4331 }
4332 restore_regs(reglist);
4333 }
3d624f89 4334#else
4335 cop1_unusable(i, i_regs);
4336#endif
57871462 4337}
4338
4339void multdiv_assemble_arm(int i,struct regstat *i_regs)
4340{
4341 // case 0x18: MULT
4342 // case 0x19: MULTU
4343 // case 0x1A: DIV
4344 // case 0x1B: DIVU
4345 // case 0x1C: DMULT
4346 // case 0x1D: DMULTU
4347 // case 0x1E: DDIV
4348 // case 0x1F: DDIVU
4349 if(rs1[i]&&rs2[i])
4350 {
4351 if((opcode2[i]&4)==0) // 32-bit
4352 {
4353 if(opcode2[i]==0x18) // MULT
4354 {
4355 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4356 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4357 signed char hi=get_reg(i_regs->regmap,HIREG);
4358 signed char lo=get_reg(i_regs->regmap,LOREG);
4359 assert(m1>=0);
4360 assert(m2>=0);
4361 assert(hi>=0);
4362 assert(lo>=0);
4363 emit_smull(m1,m2,hi,lo);
4364 }
4365 if(opcode2[i]==0x19) // MULTU
4366 {
4367 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4368 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4369 signed char hi=get_reg(i_regs->regmap,HIREG);
4370 signed char lo=get_reg(i_regs->regmap,LOREG);
4371 assert(m1>=0);
4372 assert(m2>=0);
4373 assert(hi>=0);
4374 assert(lo>=0);
4375 emit_umull(m1,m2,hi,lo);
4376 }
4377 if(opcode2[i]==0x1A) // DIV
4378 {
4379 signed char d1=get_reg(i_regs->regmap,rs1[i]);
4380 signed char d2=get_reg(i_regs->regmap,rs2[i]);
4381 assert(d1>=0);
4382 assert(d2>=0);
4383 signed char quotient=get_reg(i_regs->regmap,LOREG);
4384 signed char remainder=get_reg(i_regs->regmap,HIREG);
4385 assert(quotient>=0);
4386 assert(remainder>=0);
4387 emit_movs(d1,remainder);
4388 emit_negmi(remainder,remainder);
4389 emit_movs(d2,HOST_TEMPREG);
4390 emit_jeq((int)out+52); // Division by zero
4391 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
4392 emit_clz(HOST_TEMPREG,quotient);
4393 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
4394 emit_orimm(quotient,1<<31,quotient);
4395 emit_shr(quotient,quotient,quotient);
4396 emit_cmp(remainder,HOST_TEMPREG);
4397 emit_subcs(remainder,HOST_TEMPREG,remainder);
4398 emit_adcs(quotient,quotient,quotient);
4399 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
4400 emit_jcc((int)out-16); // -4
4401 emit_teq(d1,d2);
4402 emit_negmi(quotient,quotient);
4403 emit_test(d1,d1);
4404 emit_negmi(remainder,remainder);
4405 }
4406 if(opcode2[i]==0x1B) // DIVU
4407 {
4408 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
4409 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
4410 assert(d1>=0);
4411 assert(d2>=0);
4412 signed char quotient=get_reg(i_regs->regmap,LOREG);
4413 signed char remainder=get_reg(i_regs->regmap,HIREG);
4414 assert(quotient>=0);
4415 assert(remainder>=0);
4416 emit_test(d2,d2);
4417 emit_jeq((int)out+44); // Division by zero
4418 emit_clz(d2,HOST_TEMPREG);
4419 emit_movimm(1<<31,quotient);
4420 emit_shl(d2,HOST_TEMPREG,d2);
4421 emit_mov(d1,remainder);
4422 emit_shr(quotient,HOST_TEMPREG,quotient);
4423 emit_cmp(remainder,d2);
4424 emit_subcs(remainder,d2,remainder);
4425 emit_adcs(quotient,quotient,quotient);
4426 emit_shrcc_imm(d2,1,d2);
4427 emit_jcc((int)out-16); // -4
4428 }
4429 }
4430 else // 64-bit
4431 {
4432 if(opcode2[i]==0x1C) // DMULT
4433 {
4434 assert(opcode2[i]!=0x1C);
4435 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4436 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4437 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4438 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4439 assert(m1h>=0);
4440 assert(m2h>=0);
4441 assert(m1l>=0);
4442 assert(m2l>=0);
4443 emit_pushreg(m2h);
4444 emit_pushreg(m2l);
4445 emit_pushreg(m1h);
4446 emit_pushreg(m1l);
4447 emit_call((int)&mult64);
4448 emit_popreg(m1l);
4449 emit_popreg(m1h);
4450 emit_popreg(m2l);
4451 emit_popreg(m2h);
4452 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4453 signed char hil=get_reg(i_regs->regmap,HIREG);
4454 if(hih>=0) emit_loadreg(HIREG|64,hih);
4455 if(hil>=0) emit_loadreg(HIREG,hil);
4456 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4457 signed char lol=get_reg(i_regs->regmap,LOREG);
4458 if(loh>=0) emit_loadreg(LOREG|64,loh);
4459 if(lol>=0) emit_loadreg(LOREG,lol);
4460 }
4461 if(opcode2[i]==0x1D) // DMULTU
4462 {
4463 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4464 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4465 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4466 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4467 assert(m1h>=0);
4468 assert(m2h>=0);
4469 assert(m1l>=0);
4470 assert(m2l>=0);
4471 save_regs(0x100f);
4472 if(m1l!=0) emit_mov(m1l,0);
4473 if(m1h==0) emit_readword((int)&dynarec_local,1);
4474 else if(m1h>1) emit_mov(m1h,1);
4475 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
4476 else if(m2l>2) emit_mov(m2l,2);
4477 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
4478 else if(m2h>3) emit_mov(m2h,3);
4479 emit_call((int)&multu64);
4480 restore_regs(0x100f);
4481 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4482 signed char hil=get_reg(i_regs->regmap,HIREG);
4483 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4484 signed char lol=get_reg(i_regs->regmap,LOREG);
4485 /*signed char temp=get_reg(i_regs->regmap,-1);
4486 signed char rh=get_reg(i_regs->regmap,HIREG|64);
4487 signed char rl=get_reg(i_regs->regmap,HIREG);
4488 assert(m1h>=0);
4489 assert(m2h>=0);
4490 assert(m1l>=0);
4491 assert(m2l>=0);
4492 assert(temp>=0);
4493 //emit_mov(m1l,EAX);
4494 //emit_mul(m2l);
4495 emit_umull(rl,rh,m1l,m2l);
4496 emit_storereg(LOREG,rl);
4497 emit_mov(rh,temp);
4498 //emit_mov(m1h,EAX);
4499 //emit_mul(m2l);
4500 emit_umull(rl,rh,m1h,m2l);
4501 emit_adds(rl,temp,temp);
4502 emit_adcimm(rh,0,rh);
4503 emit_storereg(HIREG,rh);
4504 //emit_mov(m2h,EAX);
4505 //emit_mul(m1l);
4506 emit_umull(rl,rh,m1l,m2h);
4507 emit_adds(rl,temp,temp);
4508 emit_adcimm(rh,0,rh);
4509 emit_storereg(LOREG|64,temp);
4510 emit_mov(rh,temp);
4511 //emit_mov(m2h,EAX);
4512 //emit_mul(m1h);
4513 emit_umull(rl,rh,m1h,m2h);
4514 emit_adds(rl,temp,rl);
4515 emit_loadreg(HIREG,temp);
4516 emit_adcimm(rh,0,rh);
4517 emit_adds(rl,temp,rl);
4518 emit_adcimm(rh,0,rh);
4519 // DEBUG
4520 /*
4521 emit_pushreg(m2h);
4522 emit_pushreg(m2l);
4523 emit_pushreg(m1h);
4524 emit_pushreg(m1l);
4525 emit_call((int)&multu64);
4526 emit_popreg(m1l);
4527 emit_popreg(m1h);
4528 emit_popreg(m2l);
4529 emit_popreg(m2h);
4530 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4531 signed char hil=get_reg(i_regs->regmap,HIREG);
4532 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
4533 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
4534 */
4535 // Shouldn't be necessary
4536 //char loh=get_reg(i_regs->regmap,LOREG|64);
4537 //char lol=get_reg(i_regs->regmap,LOREG);
4538 //if(loh>=0) emit_loadreg(LOREG|64,loh);
4539 //if(lol>=0) emit_loadreg(LOREG,lol);
4540 }
4541 if(opcode2[i]==0x1E) // DDIV
4542 {
4543 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4544 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4545 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4546 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4547 assert(d1h>=0);
4548 assert(d2h>=0);
4549 assert(d1l>=0);
4550 assert(d2l>=0);
4551 save_regs(0x100f);
4552 if(d1l!=0) emit_mov(d1l,0);
4553 if(d1h==0) emit_readword((int)&dynarec_local,1);
4554 else if(d1h>1) emit_mov(d1h,1);
4555 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4556 else if(d2l>2) emit_mov(d2l,2);
4557 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4558 else if(d2h>3) emit_mov(d2h,3);
4559 emit_call((int)&div64);
4560 restore_regs(0x100f);
4561 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4562 signed char hil=get_reg(i_regs->regmap,HIREG);
4563 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4564 signed char lol=get_reg(i_regs->regmap,LOREG);
4565 if(hih>=0) emit_loadreg(HIREG|64,hih);
4566 if(hil>=0) emit_loadreg(HIREG,hil);
4567 if(loh>=0) emit_loadreg(LOREG|64,loh);
4568 if(lol>=0) emit_loadreg(LOREG,lol);
4569 }
4570 if(opcode2[i]==0x1F) // DDIVU
4571 {
4572 //u_int hr,reglist=0;
4573 //for(hr=0;hr<HOST_REGS;hr++) {
4574 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
4575 //}
4576 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4577 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4578 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4579 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4580 assert(d1h>=0);
4581 assert(d2h>=0);
4582 assert(d1l>=0);
4583 assert(d2l>=0);
4584 save_regs(0x100f);
4585 if(d1l!=0) emit_mov(d1l,0);
4586 if(d1h==0) emit_readword((int)&dynarec_local,1);
4587 else if(d1h>1) emit_mov(d1h,1);
4588 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4589 else if(d2l>2) emit_mov(d2l,2);
4590 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4591 else if(d2h>3) emit_mov(d2h,3);
4592 emit_call((int)&divu64);
4593 restore_regs(0x100f);
4594 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4595 signed char hil=get_reg(i_regs->regmap,HIREG);
4596 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4597 signed char lol=get_reg(i_regs->regmap,LOREG);
4598 if(hih>=0) emit_loadreg(HIREG|64,hih);
4599 if(hil>=0) emit_loadreg(HIREG,hil);
4600 if(loh>=0) emit_loadreg(LOREG|64,loh);
4601 if(lol>=0) emit_loadreg(LOREG,lol);
4602 }
4603 }
4604 }
4605 else
4606 {
4607 // Multiply by zero is zero.
4608 // MIPS does not have a divide by zero exception.
4609 // The result is undefined, we return zero.
4610 signed char hr=get_reg(i_regs->regmap,HIREG);
4611 signed char lr=get_reg(i_regs->regmap,LOREG);
4612 if(hr>=0) emit_zeroreg(hr);
4613 if(lr>=0) emit_zeroreg(lr);
4614 }
4615}
4616#define multdiv_assemble multdiv_assemble_arm
4617
4618void do_preload_rhash(int r) {
4619 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
4620 // register. On ARM the hash can be done with a single instruction (below)
4621}
4622
4623void do_preload_rhtbl(int ht) {
4624 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
4625}
4626
4627void do_rhash(int rs,int rh) {
4628 emit_andimm(rs,0xf8,rh);
4629}
4630
4631void do_miniht_load(int ht,int rh) {
4632 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
4633 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
4634}
4635
4636void do_miniht_jump(int rs,int rh,int ht) {
4637 emit_cmp(rh,rs);
4638 emit_ldreq_indexed(ht,4,15);
4639 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4640 emit_mov(rs,7);
4641 emit_jmp(jump_vaddr_reg[7]);
4642 #else
4643 emit_jmp(jump_vaddr_reg[rs]);
4644 #endif
4645}
4646
4647void do_miniht_insert(u_int return_address,int rt,int temp) {
4648 #ifdef ARMv5_ONLY
4649 emit_movimm(return_address,rt); // PC into link register
4650 add_to_linker((int)out,return_address,1);
4651 emit_pcreladdr(temp);
4652 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4653 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4654 #else
4655 emit_movw(return_address&0x0000FFFF,rt);
4656 add_to_linker((int)out,return_address,1);
4657 emit_pcreladdr(temp);
4658 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4659 emit_movt(return_address&0xFFFF0000,rt);
4660 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4661 #endif
4662}
4663
4664// Sign-extend to 64 bits and write out upper half of a register
4665// This is useful where we have a 32-bit value in a register, and want to
4666// keep it in a 32-bit register, but can't guarantee that it won't be read
4667// as a 64-bit value later.
4668void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
4669{
24385cae 4670#ifndef FORCE32
57871462 4671 if(is32_pre==is32) return;
4672 int hr,reg;
4673 for(hr=0;hr<HOST_REGS;hr++) {
4674 if(hr!=EXCLUDE_REG) {
4675 //if(pre[hr]==entry[hr]) {
4676 if((reg=pre[hr])>=0) {
4677 if((dirty>>hr)&1) {
4678 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
4679 emit_sarimm(hr,31,HOST_TEMPREG);
4680 emit_storereg(reg|64,HOST_TEMPREG);
4681 }
4682 }
4683 }
4684 //}
4685 }
4686 }
24385cae 4687#endif
57871462 4688}
4689
4690void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
4691{
4692 //if(dirty_pre==dirty) return;
4693 int hr,reg,new_hr;
4694 for(hr=0;hr<HOST_REGS;hr++) {
4695 if(hr!=EXCLUDE_REG) {
4696 reg=pre[hr];
4697 if(((~u)>>(reg&63))&1) {
4698 if(reg==entry[hr]||(reg>0&&entry[hr]<0)) {
4699 if(((dirty_pre&~dirty)>>hr)&1) {
4700 if(reg>0&&reg<34) {
4701 emit_storereg(reg,hr);
4702 if( ((is32_pre&~uu)>>reg)&1 ) {
4703 emit_sarimm(hr,31,HOST_TEMPREG);
4704 emit_storereg(reg|64,HOST_TEMPREG);
4705 }
4706 }
4707 else if(reg>=64) {
4708 emit_storereg(reg,hr);
4709 }
4710 }
4711 }
4712 else // Check if register moved to a different register
4713 if((new_hr=get_reg(entry,reg))>=0) {
4714 if((dirty_pre>>hr)&(~dirty>>new_hr)&1) {
4715 if(reg>0&&reg<34) {
4716 emit_storereg(reg,hr);
4717 if( ((is32_pre&~uu)>>reg)&1 ) {
4718 emit_sarimm(hr,31,HOST_TEMPREG);
4719 emit_storereg(reg|64,HOST_TEMPREG);
4720 }
4721 }
4722 else if(reg>=64) {
4723 emit_storereg(reg,hr);
4724 }
4725 }
4726 }
4727 }
4728 }
4729 }
4730}
4731
4732
4733/* using strd could possibly help but you'd have to allocate registers in pairs
4734void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
4735{
4736 int hr;
4737 int wrote=-1;
4738 for(hr=HOST_REGS-1;hr>=0;hr--) {
4739 if(hr!=EXCLUDE_REG) {
4740 if(pre[hr]!=entry[hr]) {
4741 if(pre[hr]>=0) {
4742 if((dirty>>hr)&1) {
4743 if(get_reg(entry,pre[hr])<0) {
4744 if(pre[hr]<64) {
4745 if(!((u>>pre[hr])&1)) {
4746 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4747 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4748 emit_sarimm(hr,31,hr+1);
4749 emit_strdreg(pre[hr],hr);
4750 }
4751 else
4752 emit_storereg(pre[hr],hr);
4753 }else{
4754 emit_storereg(pre[hr],hr);
4755 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4756 emit_sarimm(hr,31,hr);
4757 emit_storereg(pre[hr]|64,hr);
4758 }
4759 }
4760 }
4761 }else{
4762 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4763 emit_storereg(pre[hr],hr);
4764 }
4765 }
4766 wrote=hr;
4767 }
4768 }
4769 }
4770 }
4771 }
4772 }
4773 for(hr=0;hr<HOST_REGS;hr++) {
4774 if(hr!=EXCLUDE_REG) {
4775 if(pre[hr]!=entry[hr]) {
4776 if(pre[hr]>=0) {
4777 int nr;
4778 if((nr=get_reg(entry,pre[hr]))>=0) {
4779 emit_mov(hr,nr);
4780 }
4781 }
4782 }
4783 }
4784 }
4785}
4786#define wb_invalidate wb_invalidate_arm
4787*/
4788
dd3a91a1 4789// Clearing the cache is rather slow on ARM Linux, so mark the areas
4790// that need to be cleared, and then only clear these areas once.
4791void do_clear_cache()
4792{
4793 int i,j;
4794 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
4795 {
4796 u_int bitmap=needs_clear_cache[i];
4797 if(bitmap) {
4798 u_int start,end;
4799 for(j=0;j<32;j++)
4800 {
4801 if(bitmap&(1<<j)) {
4802 start=BASE_ADDR+i*131072+j*4096;
4803 end=start+4095;
4804 j++;
4805 while(j<32) {
4806 if(bitmap&(1<<j)) {
4807 end+=4096;
4808 j++;
4809 }else{
4810 __clear_cache((void *)start,(void *)end);
4811 break;
4812 }
4813 }
4814 }
4815 }
4816 needs_clear_cache[i]=0;
4817 }
4818 }
4819}
4820
57871462 4821// CPU-architecture-specific initialization
4822void arch_init() {
3d624f89 4823#ifndef DISABLE_COP1
57871462 4824 rounding_modes[0]=0x0<<22; // round
4825 rounding_modes[1]=0x3<<22; // trunc
4826 rounding_modes[2]=0x1<<22; // ceil
4827 rounding_modes[3]=0x2<<22; // floor
3d624f89 4828#endif
57871462 4829}
b9b61529 4830
4831// vim:shiftwidth=2:expandtab