drc: merge Ari64's patch: 14_dont_save_or_restore_temporary
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21extern int cycle_count;
22extern int last_count;
23extern int pcaddr;
24extern int pending_exception;
25extern int branch_target;
26extern uint64_t readmem_dword;
27#ifdef MUPEN64
28extern precomp_instr fake_pc;
29#endif
30extern void *dynarec_local;
31extern u_int memory_map[1048576];
32extern u_int mini_ht[32][2];
33extern u_int rounding_modes[4];
34
35void indirect_jump_indexed();
36void indirect_jump();
37void do_interrupt();
38void jump_vaddr_r0();
39void jump_vaddr_r1();
40void jump_vaddr_r2();
41void jump_vaddr_r3();
42void jump_vaddr_r4();
43void jump_vaddr_r5();
44void jump_vaddr_r6();
45void jump_vaddr_r7();
46void jump_vaddr_r8();
47void jump_vaddr_r9();
48void jump_vaddr_r10();
49void jump_vaddr_r12();
50
51const u_int jump_vaddr_reg[16] = {
52 (int)jump_vaddr_r0,
53 (int)jump_vaddr_r1,
54 (int)jump_vaddr_r2,
55 (int)jump_vaddr_r3,
56 (int)jump_vaddr_r4,
57 (int)jump_vaddr_r5,
58 (int)jump_vaddr_r6,
59 (int)jump_vaddr_r7,
60 (int)jump_vaddr_r8,
61 (int)jump_vaddr_r9,
62 (int)jump_vaddr_r10,
63 0,
64 (int)jump_vaddr_r12,
65 0,
66 0,
67 0};
68
69void invalidate_addr_r0();
70void invalidate_addr_r1();
71void invalidate_addr_r2();
72void invalidate_addr_r3();
73void invalidate_addr_r4();
74void invalidate_addr_r5();
75void invalidate_addr_r6();
76void invalidate_addr_r7();
77void invalidate_addr_r8();
78void invalidate_addr_r9();
79void invalidate_addr_r10();
80void invalidate_addr_r12();
81
82const u_int invalidate_addr_reg[16] = {
83 (int)invalidate_addr_r0,
84 (int)invalidate_addr_r1,
85 (int)invalidate_addr_r2,
86 (int)invalidate_addr_r3,
87 (int)invalidate_addr_r4,
88 (int)invalidate_addr_r5,
89 (int)invalidate_addr_r6,
90 (int)invalidate_addr_r7,
91 (int)invalidate_addr_r8,
92 (int)invalidate_addr_r9,
93 (int)invalidate_addr_r10,
94 0,
95 (int)invalidate_addr_r12,
96 0,
97 0,
98 0};
99
100#include "fpu.h"
101
102unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
103
104/* Linker */
105
106void set_jump_target(int addr,u_int target)
107{
108 u_char *ptr=(u_char *)addr;
109 u_int *ptr2=(u_int *)ptr;
110 if(ptr[3]==0xe2) {
111 assert((target-(u_int)ptr2-8)<1024);
112 assert((addr&3)==0);
113 assert((target&3)==0);
114 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
115 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
116 }
117 else if(ptr[3]==0x72) {
118 // generated by emit_jno_unlikely
119 if((target-(u_int)ptr2-8)<1024) {
120 assert((addr&3)==0);
121 assert((target&3)==0);
122 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
123 }
124 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
125 assert((addr&3)==0);
126 assert((target&3)==0);
127 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
128 }
129 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
130 }
131 else {
132 assert((ptr[3]&0x0e)==0xa);
133 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
134 }
135}
136
137// This optionally copies the instruction from the target of the branch into
138// the space before the branch. Works, but the difference in speed is
139// usually insignificant.
140void set_jump_target_fillslot(int addr,u_int target,int copy)
141{
142 u_char *ptr=(u_char *)addr;
143 u_int *ptr2=(u_int *)ptr;
144 assert(!copy||ptr2[-1]==0xe28dd000);
145 if(ptr[3]==0xe2) {
146 assert(!copy);
147 assert((target-(u_int)ptr2-8)<4096);
148 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
149 }
150 else {
151 assert((ptr[3]&0x0e)==0xa);
152 u_int target_insn=*(u_int *)target;
153 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
154 copy=0;
155 }
156 if((target_insn&0x0c100000)==0x04100000) { // Load
157 copy=0;
158 }
159 if(target_insn&0x08000000) {
160 copy=0;
161 }
162 if(copy) {
163 ptr2[-1]=target_insn;
164 target+=4;
165 }
166 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
167 }
168}
169
170/* Literal pool */
171add_literal(int addr,int val)
172{
173 literals[literalcount][0]=addr;
174 literals[literalcount][1]=val;
175 literalcount++;
176}
177
178void *kill_pointer(void *stub)
179{
180 int *ptr=(int *)(stub+4);
181 assert((*ptr&0x0ff00000)==0x05900000);
182 u_int offset=*ptr&0xfff;
183 int **l_ptr=(void *)ptr+offset+8;
184 int *i_ptr=*l_ptr;
185 set_jump_target((int)i_ptr,(int)stub);
186 return i_ptr;
187}
188
189int get_pointer(void *stub)
190{
191 //printf("get_pointer(%x)\n",(int)stub);
192 int *ptr=(int *)(stub+4);
193 assert((*ptr&0x0ff00000)==0x05900000);
194 u_int offset=*ptr&0xfff;
195 int **l_ptr=(void *)ptr+offset+8;
196 int *i_ptr=*l_ptr;
197 assert((*i_ptr&0x0f000000)==0x0a000000);
198 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
199}
200
201// Find the "clean" entry point from a "dirty" entry point
202// by skipping past the call to verify_code
203u_int get_clean_addr(int addr)
204{
205 int *ptr=(int *)addr;
206 #ifdef ARMv5_ONLY
207 ptr+=4;
208 #else
209 ptr+=6;
210 #endif
211 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
212 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
213 ptr++;
214 if((*ptr&0xFF000000)==0xea000000) {
215 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
216 }
217 return (u_int)ptr;
218}
219
220int verify_dirty(int addr)
221{
222 u_int *ptr=(u_int *)addr;
223 #ifdef ARMv5_ONLY
224 // get from literal pool
225 assert((*ptr&0xFFF00000)==0xe5900000);
226 u_int offset=*ptr&0xfff;
227 u_int *l_ptr=(void *)ptr+offset+8;
228 u_int source=l_ptr[0];
229 u_int copy=l_ptr[1];
230 u_int len=l_ptr[2];
231 ptr+=4;
232 #else
233 // ARMv7 movw/movt
234 assert((*ptr&0xFFF00000)==0xe3000000);
235 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
236 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
237 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
238 ptr+=6;
239 #endif
240 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
241 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
242 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
243 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
244 unsigned int page=source>>12;
245 unsigned int map_value=memory_map[page];
246 if(map_value>=0x80000000) return 0;
247 while(page<((source+len-1)>>12)) {
248 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
249 }
250 source = source+(map_value<<2);
251 }
252 //printf("verify_dirty: %x %x %x\n",source,copy,len);
253 return !memcmp((void *)source,(void *)copy,len);
254}
255
256// This doesn't necessarily find all clean entry points, just
257// guarantees that it's not dirty
258int isclean(int addr)
259{
260 #ifdef ARMv5_ONLY
261 int *ptr=((u_int *)addr)+4;
262 #else
263 int *ptr=((u_int *)addr)+6;
264 #endif
265 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
266 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
267 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
268 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
269 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
270 return 1;
271}
272
273void get_bounds(int addr,u_int *start,u_int *end)
274{
275 u_int *ptr=(u_int *)addr;
276 #ifdef ARMv5_ONLY
277 // get from literal pool
278 assert((*ptr&0xFFF00000)==0xe5900000);
279 u_int offset=*ptr&0xfff;
280 u_int *l_ptr=(void *)ptr+offset+8;
281 u_int source=l_ptr[0];
282 //u_int copy=l_ptr[1];
283 u_int len=l_ptr[2];
284 ptr+=4;
285 #else
286 // ARMv7 movw/movt
287 assert((*ptr&0xFFF00000)==0xe3000000);
288 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
289 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
290 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
291 ptr+=6;
292 #endif
293 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
294 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
295 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
296 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
297 if(memory_map[source>>12]>=0x80000000) source = 0;
298 else source = source+(memory_map[source>>12]<<2);
299 }
300 *start=source;
301 *end=source+len;
302}
303
304/* Register allocation */
305
306// Note: registers are allocated clean (unmodified state)
307// if you intend to modify the register, you must call dirty_reg().
308void alloc_reg(struct regstat *cur,int i,signed char reg)
309{
310 int r,hr;
311 int preferred_reg = (reg&7);
312 if(reg==CCREG) preferred_reg=HOST_CCREG;
313 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
314
315 // Don't allocate unused registers
316 if((cur->u>>reg)&1) return;
317
318 // see if it's already allocated
319 for(hr=0;hr<HOST_REGS;hr++)
320 {
321 if(cur->regmap[hr]==reg) return;
322 }
323
324 // Keep the same mapping if the register was already allocated in a loop
325 preferred_reg = loop_reg(i,reg,preferred_reg);
326
327 // Try to allocate the preferred register
328 if(cur->regmap[preferred_reg]==-1) {
329 cur->regmap[preferred_reg]=reg;
330 cur->dirty&=~(1<<preferred_reg);
331 cur->isconst&=~(1<<preferred_reg);
332 return;
333 }
334 r=cur->regmap[preferred_reg];
335 if(r<64&&((cur->u>>r)&1)) {
336 cur->regmap[preferred_reg]=reg;
337 cur->dirty&=~(1<<preferred_reg);
338 cur->isconst&=~(1<<preferred_reg);
339 return;
340 }
341 if(r>=64&&((cur->uu>>(r&63))&1)) {
342 cur->regmap[preferred_reg]=reg;
343 cur->dirty&=~(1<<preferred_reg);
344 cur->isconst&=~(1<<preferred_reg);
345 return;
346 }
347
348 // Clear any unneeded registers
349 // We try to keep the mapping consistent, if possible, because it
350 // makes branches easier (especially loops). So we try to allocate
351 // first (see above) before removing old mappings. If this is not
352 // possible then go ahead and clear out the registers that are no
353 // longer needed.
354 for(hr=0;hr<HOST_REGS;hr++)
355 {
356 r=cur->regmap[hr];
357 if(r>=0) {
358 if(r<64) {
359 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
360 }
361 else
362 {
363 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
364 }
365 }
366 }
367 // Try to allocate any available register, but prefer
368 // registers that have not been used recently.
369 if(i>0) {
370 for(hr=0;hr<HOST_REGS;hr++) {
371 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
372 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
373 cur->regmap[hr]=reg;
374 cur->dirty&=~(1<<hr);
375 cur->isconst&=~(1<<hr);
376 return;
377 }
378 }
379 }
380 }
381 // Try to allocate any available register
382 for(hr=0;hr<HOST_REGS;hr++) {
383 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
384 cur->regmap[hr]=reg;
385 cur->dirty&=~(1<<hr);
386 cur->isconst&=~(1<<hr);
387 return;
388 }
389 }
390
391 // Ok, now we have to evict someone
392 // Pick a register we hopefully won't need soon
393 u_char hsn[MAXREG+1];
394 memset(hsn,10,sizeof(hsn));
395 int j;
396 lsn(hsn,i,&preferred_reg);
397 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
398 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
399 if(i>0) {
400 // Don't evict the cycle count at entry points, otherwise the entry
401 // stub will have to write it.
402 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
403 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
404 for(j=10;j>=3;j--)
405 {
406 // Alloc preferred register if available
407 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
408 for(hr=0;hr<HOST_REGS;hr++) {
409 // Evict both parts of a 64-bit register
410 if((cur->regmap[hr]&63)==r) {
411 cur->regmap[hr]=-1;
412 cur->dirty&=~(1<<hr);
413 cur->isconst&=~(1<<hr);
414 }
415 }
416 cur->regmap[preferred_reg]=reg;
417 return;
418 }
419 for(r=1;r<=MAXREG;r++)
420 {
421 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
422 for(hr=0;hr<HOST_REGS;hr++) {
423 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
424 if(cur->regmap[hr]==r+64) {
425 cur->regmap[hr]=reg;
426 cur->dirty&=~(1<<hr);
427 cur->isconst&=~(1<<hr);
428 return;
429 }
430 }
431 }
432 for(hr=0;hr<HOST_REGS;hr++) {
433 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
434 if(cur->regmap[hr]==r) {
435 cur->regmap[hr]=reg;
436 cur->dirty&=~(1<<hr);
437 cur->isconst&=~(1<<hr);
438 return;
439 }
440 }
441 }
442 }
443 }
444 }
445 }
446 for(j=10;j>=0;j--)
447 {
448 for(r=1;r<=MAXREG;r++)
449 {
450 if(hsn[r]==j) {
451 for(hr=0;hr<HOST_REGS;hr++) {
452 if(cur->regmap[hr]==r+64) {
453 cur->regmap[hr]=reg;
454 cur->dirty&=~(1<<hr);
455 cur->isconst&=~(1<<hr);
456 return;
457 }
458 }
459 for(hr=0;hr<HOST_REGS;hr++) {
460 if(cur->regmap[hr]==r) {
461 cur->regmap[hr]=reg;
462 cur->dirty&=~(1<<hr);
463 cur->isconst&=~(1<<hr);
464 return;
465 }
466 }
467 }
468 }
469 }
470 printf("This shouldn't happen (alloc_reg)");exit(1);
471}
472
473void alloc_reg64(struct regstat *cur,int i,signed char reg)
474{
475 int preferred_reg = 8+(reg&1);
476 int r,hr;
477
478 // allocate the lower 32 bits
479 alloc_reg(cur,i,reg);
480
481 // Don't allocate unused registers
482 if((cur->uu>>reg)&1) return;
483
484 // see if the upper half is already allocated
485 for(hr=0;hr<HOST_REGS;hr++)
486 {
487 if(cur->regmap[hr]==reg+64) return;
488 }
489
490 // Keep the same mapping if the register was already allocated in a loop
491 preferred_reg = loop_reg(i,reg,preferred_reg);
492
493 // Try to allocate the preferred register
494 if(cur->regmap[preferred_reg]==-1) {
495 cur->regmap[preferred_reg]=reg|64;
496 cur->dirty&=~(1<<preferred_reg);
497 cur->isconst&=~(1<<preferred_reg);
498 return;
499 }
500 r=cur->regmap[preferred_reg];
501 if(r<64&&((cur->u>>r)&1)) {
502 cur->regmap[preferred_reg]=reg|64;
503 cur->dirty&=~(1<<preferred_reg);
504 cur->isconst&=~(1<<preferred_reg);
505 return;
506 }
507 if(r>=64&&((cur->uu>>(r&63))&1)) {
508 cur->regmap[preferred_reg]=reg|64;
509 cur->dirty&=~(1<<preferred_reg);
510 cur->isconst&=~(1<<preferred_reg);
511 return;
512 }
513
514 // Clear any unneeded registers
515 // We try to keep the mapping consistent, if possible, because it
516 // makes branches easier (especially loops). So we try to allocate
517 // first (see above) before removing old mappings. If this is not
518 // possible then go ahead and clear out the registers that are no
519 // longer needed.
520 for(hr=HOST_REGS-1;hr>=0;hr--)
521 {
522 r=cur->regmap[hr];
523 if(r>=0) {
524 if(r<64) {
525 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
526 }
527 else
528 {
529 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
530 }
531 }
532 }
533 // Try to allocate any available register, but prefer
534 // registers that have not been used recently.
535 if(i>0) {
536 for(hr=0;hr<HOST_REGS;hr++) {
537 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
538 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
539 cur->regmap[hr]=reg|64;
540 cur->dirty&=~(1<<hr);
541 cur->isconst&=~(1<<hr);
542 return;
543 }
544 }
545 }
546 }
547 // Try to allocate any available register
548 for(hr=0;hr<HOST_REGS;hr++) {
549 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
550 cur->regmap[hr]=reg|64;
551 cur->dirty&=~(1<<hr);
552 cur->isconst&=~(1<<hr);
553 return;
554 }
555 }
556
557 // Ok, now we have to evict someone
558 // Pick a register we hopefully won't need soon
559 u_char hsn[MAXREG+1];
560 memset(hsn,10,sizeof(hsn));
561 int j;
562 lsn(hsn,i,&preferred_reg);
563 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
564 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
565 if(i>0) {
566 // Don't evict the cycle count at entry points, otherwise the entry
567 // stub will have to write it.
568 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
569 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
570 for(j=10;j>=3;j--)
571 {
572 // Alloc preferred register if available
573 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
574 for(hr=0;hr<HOST_REGS;hr++) {
575 // Evict both parts of a 64-bit register
576 if((cur->regmap[hr]&63)==r) {
577 cur->regmap[hr]=-1;
578 cur->dirty&=~(1<<hr);
579 cur->isconst&=~(1<<hr);
580 }
581 }
582 cur->regmap[preferred_reg]=reg|64;
583 return;
584 }
585 for(r=1;r<=MAXREG;r++)
586 {
587 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
588 for(hr=0;hr<HOST_REGS;hr++) {
589 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
590 if(cur->regmap[hr]==r+64) {
591 cur->regmap[hr]=reg|64;
592 cur->dirty&=~(1<<hr);
593 cur->isconst&=~(1<<hr);
594 return;
595 }
596 }
597 }
598 for(hr=0;hr<HOST_REGS;hr++) {
599 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
600 if(cur->regmap[hr]==r) {
601 cur->regmap[hr]=reg|64;
602 cur->dirty&=~(1<<hr);
603 cur->isconst&=~(1<<hr);
604 return;
605 }
606 }
607 }
608 }
609 }
610 }
611 }
612 for(j=10;j>=0;j--)
613 {
614 for(r=1;r<=MAXREG;r++)
615 {
616 if(hsn[r]==j) {
617 for(hr=0;hr<HOST_REGS;hr++) {
618 if(cur->regmap[hr]==r+64) {
619 cur->regmap[hr]=reg|64;
620 cur->dirty&=~(1<<hr);
621 cur->isconst&=~(1<<hr);
622 return;
623 }
624 }
625 for(hr=0;hr<HOST_REGS;hr++) {
626 if(cur->regmap[hr]==r) {
627 cur->regmap[hr]=reg|64;
628 cur->dirty&=~(1<<hr);
629 cur->isconst&=~(1<<hr);
630 return;
631 }
632 }
633 }
634 }
635 }
636 printf("This shouldn't happen");exit(1);
637}
638
639// Allocate a temporary register. This is done without regard to
640// dirty status or whether the register we request is on the unneeded list
641// Note: This will only allocate one register, even if called multiple times
642void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
643{
644 int r,hr;
645 int preferred_reg = -1;
646
647 // see if it's already allocated
648 for(hr=0;hr<HOST_REGS;hr++)
649 {
650 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
651 }
652
653 // Try to allocate any available register
654 for(hr=HOST_REGS-1;hr>=0;hr--) {
655 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
656 cur->regmap[hr]=reg;
657 cur->dirty&=~(1<<hr);
658 cur->isconst&=~(1<<hr);
659 return;
660 }
661 }
662
663 // Find an unneeded register
664 for(hr=HOST_REGS-1;hr>=0;hr--)
665 {
666 r=cur->regmap[hr];
667 if(r>=0) {
668 if(r<64) {
669 if((cur->u>>r)&1) {
670 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
671 cur->regmap[hr]=reg;
672 cur->dirty&=~(1<<hr);
673 cur->isconst&=~(1<<hr);
674 return;
675 }
676 }
677 }
678 else
679 {
680 if((cur->uu>>(r&63))&1) {
681 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
682 cur->regmap[hr]=reg;
683 cur->dirty&=~(1<<hr);
684 cur->isconst&=~(1<<hr);
685 return;
686 }
687 }
688 }
689 }
690 }
691
692 // Ok, now we have to evict someone
693 // Pick a register we hopefully won't need soon
694 // TODO: we might want to follow unconditional jumps here
695 // TODO: get rid of dupe code and make this into a function
696 u_char hsn[MAXREG+1];
697 memset(hsn,10,sizeof(hsn));
698 int j;
699 lsn(hsn,i,&preferred_reg);
700 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
701 if(i>0) {
702 // Don't evict the cycle count at entry points, otherwise the entry
703 // stub will have to write it.
704 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
705 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
706 for(j=10;j>=3;j--)
707 {
708 for(r=1;r<=MAXREG;r++)
709 {
710 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
711 for(hr=0;hr<HOST_REGS;hr++) {
712 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
713 if(cur->regmap[hr]==r+64) {
714 cur->regmap[hr]=reg;
715 cur->dirty&=~(1<<hr);
716 cur->isconst&=~(1<<hr);
717 return;
718 }
719 }
720 }
721 for(hr=0;hr<HOST_REGS;hr++) {
722 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
723 if(cur->regmap[hr]==r) {
724 cur->regmap[hr]=reg;
725 cur->dirty&=~(1<<hr);
726 cur->isconst&=~(1<<hr);
727 return;
728 }
729 }
730 }
731 }
732 }
733 }
734 }
735 for(j=10;j>=0;j--)
736 {
737 for(r=1;r<=MAXREG;r++)
738 {
739 if(hsn[r]==j) {
740 for(hr=0;hr<HOST_REGS;hr++) {
741 if(cur->regmap[hr]==r+64) {
742 cur->regmap[hr]=reg;
743 cur->dirty&=~(1<<hr);
744 cur->isconst&=~(1<<hr);
745 return;
746 }
747 }
748 for(hr=0;hr<HOST_REGS;hr++) {
749 if(cur->regmap[hr]==r) {
750 cur->regmap[hr]=reg;
751 cur->dirty&=~(1<<hr);
752 cur->isconst&=~(1<<hr);
753 return;
754 }
755 }
756 }
757 }
758 }
759 printf("This shouldn't happen");exit(1);
760}
761// Allocate a specific ARM register.
762void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
763{
764 int n;
765
766 // see if it's already allocated (and dealloc it)
767 for(n=0;n<HOST_REGS;n++)
768 {
769 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {cur->regmap[n]=-1;}
770 }
771
772 cur->regmap[hr]=reg;
773 cur->dirty&=~(1<<hr);
774 cur->isconst&=~(1<<hr);
775}
776
777// Alloc cycle count into dedicated register
778alloc_cc(struct regstat *cur,int i)
779{
780 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
781}
782
783/* Special alloc */
784
785
786/* Assembler */
787
788char regname[16][4] = {
789 "r0",
790 "r1",
791 "r2",
792 "r3",
793 "r4",
794 "r5",
795 "r6",
796 "r7",
797 "r8",
798 "r9",
799 "r10",
800 "fp",
801 "r12",
802 "sp",
803 "lr",
804 "pc"};
805
806void output_byte(u_char byte)
807{
808 *(out++)=byte;
809}
810void output_modrm(u_char mod,u_char rm,u_char ext)
811{
812 assert(mod<4);
813 assert(rm<8);
814 assert(ext<8);
815 u_char byte=(mod<<6)|(ext<<3)|rm;
816 *(out++)=byte;
817}
818void output_sib(u_char scale,u_char index,u_char base)
819{
820 assert(scale<4);
821 assert(index<8);
822 assert(base<8);
823 u_char byte=(scale<<6)|(index<<3)|base;
824 *(out++)=byte;
825}
826void output_w32(u_int word)
827{
828 *((u_int *)out)=word;
829 out+=4;
830}
831u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
832{
833 assert(rd<16);
834 assert(rn<16);
835 assert(rm<16);
836 return((rn<<16)|(rd<<12)|rm);
837}
838u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
839{
840 assert(rd<16);
841 assert(rn<16);
842 assert(imm<256);
843 assert((shift&1)==0);
844 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
845}
846u_int genimm(u_int imm,u_int *encoded)
847{
848 *encoded=0;
849 if(imm==0) return 1;
850 int i=32;
851 while(i>0)
852 {
853 if(imm<256) {
854 *encoded=((i&30)<<7)|imm;
855 return 1;
856 }
857 imm=(imm>>2)|(imm<<30);i-=2;
858 }
859 return 0;
860}
861void genimm_checked(u_int imm,u_int *encoded)
862{
863 u_int ret=genimm(imm,encoded);
864 assert(ret);
865}
866u_int genjmp(u_int addr)
867{
868 int offset=addr-(int)out-8;
869 if(offset<-33554432||offset>=33554432) {
870 if (addr>2) {
871 printf("genjmp: out of range: %08x\n", offset);
872 exit(1);
873 }
874 return 0;
875 }
876 return ((u_int)offset>>2)&0xffffff;
877}
878
879void emit_mov(int rs,int rt)
880{
881 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
882 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
883}
884
885void emit_movs(int rs,int rt)
886{
887 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
888 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
889}
890
891void emit_add(int rs1,int rs2,int rt)
892{
893 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
894 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
895}
896
897void emit_adds(int rs1,int rs2,int rt)
898{
899 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
900 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
901}
902
903void emit_adcs(int rs1,int rs2,int rt)
904{
905 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
906 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
907}
908
909void emit_sbc(int rs1,int rs2,int rt)
910{
911 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
912 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
913}
914
915void emit_sbcs(int rs1,int rs2,int rt)
916{
917 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
918 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
919}
920
921void emit_neg(int rs, int rt)
922{
923 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
924 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
925}
926
927void emit_negs(int rs, int rt)
928{
929 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
930 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
931}
932
933void emit_sub(int rs1,int rs2,int rt)
934{
935 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
936 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
937}
938
939void emit_subs(int rs1,int rs2,int rt)
940{
941 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
942 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
943}
944
945void emit_zeroreg(int rt)
946{
947 assem_debug("mov %s,#0\n",regname[rt]);
948 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
949}
950
951void emit_loadlp(u_int imm,u_int rt)
952{
953 add_literal((int)out,imm);
954 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
955 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
956}
957void emit_movw(u_int imm,u_int rt)
958{
959 assert(imm<65536);
960 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
961 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
962}
963void emit_movt(u_int imm,u_int rt)
964{
965 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
966 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
967}
968void emit_movimm(u_int imm,u_int rt)
969{
970 u_int armval;
971 if(genimm(imm,&armval)) {
972 assem_debug("mov %s,#%d\n",regname[rt],imm);
973 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
974 }else if(genimm(~imm,&armval)) {
975 assem_debug("mvn %s,#%d\n",regname[rt],imm);
976 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
977 }else if(imm<65536) {
978 #ifdef ARMv5_ONLY
979 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
980 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
981 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
982 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
983 #else
984 emit_movw(imm,rt);
985 #endif
986 }else{
987 #ifdef ARMv5_ONLY
988 emit_loadlp(imm,rt);
989 #else
990 emit_movw(imm&0x0000FFFF,rt);
991 emit_movt(imm&0xFFFF0000,rt);
992 #endif
993 }
994}
995void emit_pcreladdr(u_int rt)
996{
997 assem_debug("add %s,pc,#?\n",regname[rt]);
998 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
999}
1000
1001void emit_loadreg(int r, int hr)
1002{
1003#ifdef FORCE32
1004 if(r&64) {
1005 printf("64bit load in 32bit mode!\n");
1006 assert(0);
1007 return;
1008 }
1009#endif
1010 if((r&63)==0)
1011 emit_zeroreg(hr);
1012 else {
1013 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
1014 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1015 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1016 if(r==CCREG) addr=(int)&cycle_count;
1017 if(r==CSREG) addr=(int)&Status;
1018 if(r==FSREG) addr=(int)&FCR31;
1019 if(r==INVCP) addr=(int)&invc_ptr;
1020 u_int offset = addr-(u_int)&dynarec_local;
1021 assert(offset<4096);
1022 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1023 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1024 }
1025}
1026void emit_storereg(int r, int hr)
1027{
1028#ifdef FORCE32
1029 if(r&64) {
1030 printf("64bit store in 32bit mode!\n");
1031 assert(0);
1032 return;
1033 }
1034#endif
1035 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
1036 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1037 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1038 if(r==CCREG) addr=(int)&cycle_count;
1039 if(r==FSREG) addr=(int)&FCR31;
1040 u_int offset = addr-(u_int)&dynarec_local;
1041 assert(offset<4096);
1042 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1043 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1044}
1045
1046void emit_test(int rs, int rt)
1047{
1048 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1049 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1050}
1051
1052void emit_testimm(int rs,int imm)
1053{
1054 u_int armval;
1055 assem_debug("tst %s,$%d\n",regname[rs],imm);
1056 genimm_checked(imm,&armval);
1057 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1058}
1059
1060void emit_testeqimm(int rs,int imm)
1061{
1062 u_int armval;
1063 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
1064 genimm_checked(imm,&armval);
1065 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1066}
1067
1068void emit_not(int rs,int rt)
1069{
1070 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1071 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1072}
1073
1074void emit_mvnmi(int rs,int rt)
1075{
1076 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1077 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1078}
1079
1080void emit_and(u_int rs1,u_int rs2,u_int rt)
1081{
1082 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1083 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1084}
1085
1086void emit_or(u_int rs1,u_int rs2,u_int rt)
1087{
1088 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1089 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1090}
1091void emit_or_and_set_flags(int rs1,int rs2,int rt)
1092{
1093 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1094 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1095}
1096
1097void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1098{
1099 assert(rs<16);
1100 assert(rt<16);
1101 assert(imm<32);
1102 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1103 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1104}
1105
1106void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1107{
1108 assert(rs<16);
1109 assert(rt<16);
1110 assert(imm<32);
1111 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1112 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1113}
1114
1115void emit_xor(u_int rs1,u_int rs2,u_int rt)
1116{
1117 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1118 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1119}
1120
1121void emit_addimm(u_int rs,int imm,u_int rt)
1122{
1123 assert(rs<16);
1124 assert(rt<16);
1125 if(imm!=0) {
1126 assert(imm>-65536&&imm<65536);
1127 u_int armval;
1128 if(genimm(imm,&armval)) {
1129 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1130 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1131 }else if(genimm(-imm,&armval)) {
1132 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1133 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1134 }else if(imm<0) {
1135 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1136 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1137 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1138 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1139 }else{
1140 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1141 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1142 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1143 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1144 }
1145 }
1146 else if(rs!=rt) emit_mov(rs,rt);
1147}
1148
1149void emit_addimm_and_set_flags(int imm,int rt)
1150{
1151 assert(imm>-65536&&imm<65536);
1152 u_int armval;
1153 if(genimm(imm,&armval)) {
1154 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1155 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1156 }else if(genimm(-imm,&armval)) {
1157 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1158 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1159 }else if(imm<0) {
1160 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1161 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1162 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1163 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1164 }else{
1165 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1166 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1167 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1168 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1169 }
1170}
1171void emit_addimm_no_flags(u_int imm,u_int rt)
1172{
1173 emit_addimm(rt,imm,rt);
1174}
1175
1176void emit_addnop(u_int r)
1177{
1178 assert(r<16);
1179 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1180 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1181}
1182
1183void emit_adcimm(u_int rs,int imm,u_int rt)
1184{
1185 u_int armval;
1186 genimm_checked(imm,&armval);
1187 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1188 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1189}
1190/*void emit_sbcimm(int imm,u_int rt)
1191{
1192 u_int armval;
1193 genimm_checked(imm,&armval);
1194 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1195 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1196}*/
1197void emit_sbbimm(int imm,u_int rt)
1198{
1199 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1200 assert(rt<8);
1201 if(imm<128&&imm>=-128) {
1202 output_byte(0x83);
1203 output_modrm(3,rt,3);
1204 output_byte(imm);
1205 }
1206 else
1207 {
1208 output_byte(0x81);
1209 output_modrm(3,rt,3);
1210 output_w32(imm);
1211 }
1212}
1213void emit_rscimm(int rs,int imm,u_int rt)
1214{
1215 assert(0);
1216 u_int armval;
1217 genimm_checked(imm,&armval);
1218 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1219 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1220}
1221
1222void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1223{
1224 // TODO: if(genimm(imm,&armval)) ...
1225 // else
1226 emit_movimm(imm,HOST_TEMPREG);
1227 emit_adds(HOST_TEMPREG,rsl,rtl);
1228 emit_adcimm(rsh,0,rth);
1229}
1230
1231void emit_sbb(int rs1,int rs2)
1232{
1233 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1234 output_byte(0x19);
1235 output_modrm(3,rs1,rs2);
1236}
1237
1238void emit_andimm(int rs,int imm,int rt)
1239{
1240 u_int armval;
1241 if(imm==0) {
1242 emit_zeroreg(rt);
1243 }else if(genimm(imm,&armval)) {
1244 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1245 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1246 }else if(genimm(~imm,&armval)) {
1247 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1248 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1249 }else if(imm==65535) {
1250 #ifdef ARMv5_ONLY
1251 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1252 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1253 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1254 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1255 #else
1256 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1257 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1258 #endif
1259 }else{
1260 assert(imm>0&&imm<65535);
1261 #ifdef ARMv5_ONLY
1262 assem_debug("mov r14,#%d\n",imm&0xFF00);
1263 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1264 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1265 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1266 #else
1267 emit_movw(imm,HOST_TEMPREG);
1268 #endif
1269 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1270 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1271 }
1272}
1273
1274void emit_orimm(int rs,int imm,int rt)
1275{
1276 u_int armval;
1277 if(imm==0) {
1278 if(rs!=rt) emit_mov(rs,rt);
1279 }else if(genimm(imm,&armval)) {
1280 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1281 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1282 }else{
1283 assert(imm>0&&imm<65536);
1284 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1285 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1286 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1287 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1288 }
1289}
1290
1291void emit_xorimm(int rs,int imm,int rt)
1292{
1293 u_int armval;
1294 if(imm==0) {
1295 if(rs!=rt) emit_mov(rs,rt);
1296 }else if(genimm(imm,&armval)) {
1297 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1298 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1299 }else{
1300 assert(imm>0&&imm<65536);
1301 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1302 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1303 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1304 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1305 }
1306}
1307
1308void emit_shlimm(int rs,u_int imm,int rt)
1309{
1310 assert(imm>0);
1311 assert(imm<32);
1312 //if(imm==1) ...
1313 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1314 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1315}
1316
1317void emit_shrimm(int rs,u_int imm,int rt)
1318{
1319 assert(imm>0);
1320 assert(imm<32);
1321 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1322 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1323}
1324
1325void emit_sarimm(int rs,u_int imm,int rt)
1326{
1327 assert(imm>0);
1328 assert(imm<32);
1329 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1330 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1331}
1332
1333void emit_rorimm(int rs,u_int imm,int rt)
1334{
1335 assert(imm>0);
1336 assert(imm<32);
1337 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1338 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1339}
1340
1341void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1342{
1343 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1344 assert(imm>0);
1345 assert(imm<32);
1346 //if(imm==1) ...
1347 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1348 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1349 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1350 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1351}
1352
1353void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1354{
1355 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1356 assert(imm>0);
1357 assert(imm<32);
1358 //if(imm==1) ...
1359 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1360 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1361 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1362 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1363}
1364
1365void emit_signextend16(int rs,int rt)
1366{
1367 #ifdef ARMv5_ONLY
1368 emit_shlimm(rs,16,rt);
1369 emit_sarimm(rt,16,rt);
1370 #else
1371 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1372 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1373 #endif
1374}
1375
1376void emit_shl(u_int rs,u_int shift,u_int rt)
1377{
1378 assert(rs<16);
1379 assert(rt<16);
1380 assert(shift<16);
1381 //if(imm==1) ...
1382 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1383 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1384}
1385void emit_shr(u_int rs,u_int shift,u_int rt)
1386{
1387 assert(rs<16);
1388 assert(rt<16);
1389 assert(shift<16);
1390 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1391 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1392}
1393void emit_sar(u_int rs,u_int shift,u_int rt)
1394{
1395 assert(rs<16);
1396 assert(rt<16);
1397 assert(shift<16);
1398 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1399 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1400}
1401void emit_shlcl(int r)
1402{
1403 assem_debug("shl %%%s,%%cl\n",regname[r]);
1404 assert(0);
1405}
1406void emit_shrcl(int r)
1407{
1408 assem_debug("shr %%%s,%%cl\n",regname[r]);
1409 assert(0);
1410}
1411void emit_sarcl(int r)
1412{
1413 assem_debug("sar %%%s,%%cl\n",regname[r]);
1414 assert(0);
1415}
1416
1417void emit_shldcl(int r1,int r2)
1418{
1419 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1420 assert(0);
1421}
1422void emit_shrdcl(int r1,int r2)
1423{
1424 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1425 assert(0);
1426}
1427void emit_orrshl(u_int rs,u_int shift,u_int rt)
1428{
1429 assert(rs<16);
1430 assert(rt<16);
1431 assert(shift<16);
1432 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1433 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1434}
1435void emit_orrshr(u_int rs,u_int shift,u_int rt)
1436{
1437 assert(rs<16);
1438 assert(rt<16);
1439 assert(shift<16);
1440 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1441 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1442}
1443
1444void emit_cmpimm(int rs,int imm)
1445{
1446 u_int armval;
1447 if(genimm(imm,&armval)) {
1448 assem_debug("cmp %s,$%d\n",regname[rs],imm);
1449 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1450 }else if(genimm(-imm,&armval)) {
1451 assem_debug("cmn %s,$%d\n",regname[rs],imm);
1452 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1453 }else if(imm>0) {
1454 assert(imm<65536);
1455 #ifdef ARMv5_ONLY
1456 emit_movimm(imm,HOST_TEMPREG);
1457 #else
1458 emit_movw(imm,HOST_TEMPREG);
1459 #endif
1460 assem_debug("cmp %s,r14\n",regname[rs]);
1461 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1462 }else{
1463 assert(imm>-65536);
1464 #ifdef ARMv5_ONLY
1465 emit_movimm(-imm,HOST_TEMPREG);
1466 #else
1467 emit_movw(-imm,HOST_TEMPREG);
1468 #endif
1469 assem_debug("cmn %s,r14\n",regname[rs]);
1470 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1471 }
1472}
1473
1474void emit_cmovne(u_int *addr,int rt)
1475{
1476 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1477 assert(0);
1478}
1479void emit_cmovl(u_int *addr,int rt)
1480{
1481 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1482 assert(0);
1483}
1484void emit_cmovs(u_int *addr,int rt)
1485{
1486 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1487 assert(0);
1488}
1489void emit_cmovne_imm(int imm,int rt)
1490{
1491 assem_debug("movne %s,#%d\n",regname[rt],imm);
1492 u_int armval;
1493 genimm_checked(imm,&armval);
1494 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1495}
1496void emit_cmovl_imm(int imm,int rt)
1497{
1498 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1499 u_int armval;
1500 genimm_checked(imm,&armval);
1501 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1502}
1503void emit_cmovb_imm(int imm,int rt)
1504{
1505 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1506 u_int armval;
1507 genimm_checked(imm,&armval);
1508 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1509}
1510void emit_cmovs_imm(int imm,int rt)
1511{
1512 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1513 u_int armval;
1514 genimm_checked(imm,&armval);
1515 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1516}
1517void emit_cmove_reg(int rs,int rt)
1518{
1519 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1520 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1521}
1522void emit_cmovne_reg(int rs,int rt)
1523{
1524 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1525 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1526}
1527void emit_cmovl_reg(int rs,int rt)
1528{
1529 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1530 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1531}
1532void emit_cmovs_reg(int rs,int rt)
1533{
1534 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1535 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1536}
1537
1538void emit_slti32(int rs,int imm,int rt)
1539{
1540 if(rs!=rt) emit_zeroreg(rt);
1541 emit_cmpimm(rs,imm);
1542 if(rs==rt) emit_movimm(0,rt);
1543 emit_cmovl_imm(1,rt);
1544}
1545void emit_sltiu32(int rs,int imm,int rt)
1546{
1547 if(rs!=rt) emit_zeroreg(rt);
1548 emit_cmpimm(rs,imm);
1549 if(rs==rt) emit_movimm(0,rt);
1550 emit_cmovb_imm(1,rt);
1551}
1552void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1553{
1554 assert(rsh!=rt);
1555 emit_slti32(rsl,imm,rt);
1556 if(imm>=0)
1557 {
1558 emit_test(rsh,rsh);
1559 emit_cmovne_imm(0,rt);
1560 emit_cmovs_imm(1,rt);
1561 }
1562 else
1563 {
1564 emit_cmpimm(rsh,-1);
1565 emit_cmovne_imm(0,rt);
1566 emit_cmovl_imm(1,rt);
1567 }
1568}
1569void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1570{
1571 assert(rsh!=rt);
1572 emit_sltiu32(rsl,imm,rt);
1573 if(imm>=0)
1574 {
1575 emit_test(rsh,rsh);
1576 emit_cmovne_imm(0,rt);
1577 }
1578 else
1579 {
1580 emit_cmpimm(rsh,-1);
1581 emit_cmovne_imm(1,rt);
1582 }
1583}
1584
1585void emit_cmp(int rs,int rt)
1586{
1587 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1588 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1589}
1590void emit_set_gz32(int rs, int rt)
1591{
1592 //assem_debug("set_gz32\n");
1593 emit_cmpimm(rs,1);
1594 emit_movimm(1,rt);
1595 emit_cmovl_imm(0,rt);
1596}
1597void emit_set_nz32(int rs, int rt)
1598{
1599 //assem_debug("set_nz32\n");
1600 if(rs!=rt) emit_movs(rs,rt);
1601 else emit_test(rs,rs);
1602 emit_cmovne_imm(1,rt);
1603}
1604void emit_set_gz64_32(int rsh, int rsl, int rt)
1605{
1606 //assem_debug("set_gz64\n");
1607 emit_set_gz32(rsl,rt);
1608 emit_test(rsh,rsh);
1609 emit_cmovne_imm(1,rt);
1610 emit_cmovs_imm(0,rt);
1611}
1612void emit_set_nz64_32(int rsh, int rsl, int rt)
1613{
1614 //assem_debug("set_nz64\n");
1615 emit_or_and_set_flags(rsh,rsl,rt);
1616 emit_cmovne_imm(1,rt);
1617}
1618void emit_set_if_less32(int rs1, int rs2, int rt)
1619{
1620 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1621 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1622 emit_cmp(rs1,rs2);
1623 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1624 emit_cmovl_imm(1,rt);
1625}
1626void emit_set_if_carry32(int rs1, int rs2, int rt)
1627{
1628 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1629 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1630 emit_cmp(rs1,rs2);
1631 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1632 emit_cmovb_imm(1,rt);
1633}
1634void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1635{
1636 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1637 assert(u1!=rt);
1638 assert(u2!=rt);
1639 emit_cmp(l1,l2);
1640 emit_movimm(0,rt);
1641 emit_sbcs(u1,u2,HOST_TEMPREG);
1642 emit_cmovl_imm(1,rt);
1643}
1644void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1645{
1646 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1647 assert(u1!=rt);
1648 assert(u2!=rt);
1649 emit_cmp(l1,l2);
1650 emit_movimm(0,rt);
1651 emit_sbcs(u1,u2,HOST_TEMPREG);
1652 emit_cmovb_imm(1,rt);
1653}
1654
1655void emit_call(int a)
1656{
1657 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1658 u_int offset=genjmp(a);
1659 output_w32(0xeb000000|offset);
1660}
1661void emit_jmp(int a)
1662{
1663 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1664 u_int offset=genjmp(a);
1665 output_w32(0xea000000|offset);
1666}
1667void emit_jne(int a)
1668{
1669 assem_debug("bne %x\n",a);
1670 u_int offset=genjmp(a);
1671 output_w32(0x1a000000|offset);
1672}
1673void emit_jeq(int a)
1674{
1675 assem_debug("beq %x\n",a);
1676 u_int offset=genjmp(a);
1677 output_w32(0x0a000000|offset);
1678}
1679void emit_js(int a)
1680{
1681 assem_debug("bmi %x\n",a);
1682 u_int offset=genjmp(a);
1683 output_w32(0x4a000000|offset);
1684}
1685void emit_jns(int a)
1686{
1687 assem_debug("bpl %x\n",a);
1688 u_int offset=genjmp(a);
1689 output_w32(0x5a000000|offset);
1690}
1691void emit_jl(int a)
1692{
1693 assem_debug("blt %x\n",a);
1694 u_int offset=genjmp(a);
1695 output_w32(0xba000000|offset);
1696}
1697void emit_jge(int a)
1698{
1699 assem_debug("bge %x\n",a);
1700 u_int offset=genjmp(a);
1701 output_w32(0xaa000000|offset);
1702}
1703void emit_jno(int a)
1704{
1705 assem_debug("bvc %x\n",a);
1706 u_int offset=genjmp(a);
1707 output_w32(0x7a000000|offset);
1708}
1709void emit_jc(int a)
1710{
1711 assem_debug("bcs %x\n",a);
1712 u_int offset=genjmp(a);
1713 output_w32(0x2a000000|offset);
1714}
1715void emit_jcc(int a)
1716{
1717 assem_debug("bcc %x\n",a);
1718 u_int offset=genjmp(a);
1719 output_w32(0x3a000000|offset);
1720}
1721
1722void emit_pushimm(int imm)
1723{
1724 assem_debug("push $%x\n",imm);
1725 assert(0);
1726}
1727void emit_pusha()
1728{
1729 assem_debug("pusha\n");
1730 assert(0);
1731}
1732void emit_popa()
1733{
1734 assem_debug("popa\n");
1735 assert(0);
1736}
1737void emit_pushreg(u_int r)
1738{
1739 assem_debug("push %%%s\n",regname[r]);
1740 assert(0);
1741}
1742void emit_popreg(u_int r)
1743{
1744 assem_debug("pop %%%s\n",regname[r]);
1745 assert(0);
1746}
1747void emit_callreg(u_int r)
1748{
1749 assem_debug("call *%%%s\n",regname[r]);
1750 assert(0);
1751}
1752void emit_jmpreg(u_int r)
1753{
1754 assem_debug("mov pc,%s\n",regname[r]);
1755 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1756}
1757
1758void emit_readword_indexed(int offset, int rs, int rt)
1759{
1760 assert(offset>-4096&&offset<4096);
1761 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1762 if(offset>=0) {
1763 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1764 }else{
1765 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1766 }
1767}
1768void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1769{
1770 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1771 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1772}
1773void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1774{
1775 if(map<0) emit_readword_indexed(addr, rs, rt);
1776 else {
1777 assert(addr==0);
1778 emit_readword_dualindexedx4(rs, map, rt);
1779 }
1780}
1781void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1782{
1783 if(map<0) {
1784 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1785 emit_readword_indexed(addr+4, rs, rl);
1786 }else{
1787 assert(rh!=rs);
1788 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1789 emit_addimm(map,1,map);
1790 emit_readword_indexed_tlb(addr, rs, map, rl);
1791 }
1792}
1793void emit_movsbl_indexed(int offset, int rs, int rt)
1794{
1795 assert(offset>-256&&offset<256);
1796 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1797 if(offset>=0) {
1798 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1799 }else{
1800 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1801 }
1802}
1803void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1804{
1805 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1806 else {
1807 if(addr==0) {
1808 emit_shlimm(map,2,map);
1809 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1810 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1811 }else{
1812 assert(addr>-256&&addr<256);
1813 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1814 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1815 emit_movsbl_indexed(addr, rt, rt);
1816 }
1817 }
1818}
1819void emit_movswl_indexed(int offset, int rs, int rt)
1820{
1821 assert(offset>-256&&offset<256);
1822 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1823 if(offset>=0) {
1824 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1825 }else{
1826 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1827 }
1828}
1829void emit_movzbl_indexed(int offset, int rs, int rt)
1830{
1831 assert(offset>-4096&&offset<4096);
1832 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1833 if(offset>=0) {
1834 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1835 }else{
1836 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1837 }
1838}
1839void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1840{
1841 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1842 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1843}
1844void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1845{
1846 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1847 else {
1848 if(addr==0) {
1849 emit_movzbl_dualindexedx4(rs, map, rt);
1850 }else{
1851 emit_addimm(rs,addr,rt);
1852 emit_movzbl_dualindexedx4(rt, map, rt);
1853 }
1854 }
1855}
1856void emit_movzwl_indexed(int offset, int rs, int rt)
1857{
1858 assert(offset>-256&&offset<256);
1859 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1860 if(offset>=0) {
1861 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1862 }else{
1863 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1864 }
1865}
1866void emit_readword(int addr, int rt)
1867{
1868 u_int offset = addr-(u_int)&dynarec_local;
1869 assert(offset<4096);
1870 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1871 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1872}
1873void emit_movsbl(int addr, int rt)
1874{
1875 u_int offset = addr-(u_int)&dynarec_local;
1876 assert(offset<256);
1877 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1878 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1879}
1880void emit_movswl(int addr, int rt)
1881{
1882 u_int offset = addr-(u_int)&dynarec_local;
1883 assert(offset<256);
1884 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1885 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1886}
1887void emit_movzbl(int addr, int rt)
1888{
1889 u_int offset = addr-(u_int)&dynarec_local;
1890 assert(offset<4096);
1891 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1892 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1893}
1894void emit_movzwl(int addr, int rt)
1895{
1896 u_int offset = addr-(u_int)&dynarec_local;
1897 assert(offset<256);
1898 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1899 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1900}
1901void emit_movzwl_reg(int rs, int rt)
1902{
1903 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1904 assert(0);
1905}
1906
1907void emit_xchg(int rs, int rt)
1908{
1909 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1910 assert(0);
1911}
1912void emit_writeword_indexed(int rt, int offset, int rs)
1913{
1914 assert(offset>-4096&&offset<4096);
1915 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1916 if(offset>=0) {
1917 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1918 }else{
1919 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1920 }
1921}
1922void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1923{
1924 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1925 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1926}
1927void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1928{
1929 if(map<0) emit_writeword_indexed(rt, addr, rs);
1930 else {
1931 assert(addr==0);
1932 emit_writeword_dualindexedx4(rt, rs, map);
1933 }
1934}
1935void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1936{
1937 if(map<0) {
1938 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1939 emit_writeword_indexed(rl, addr+4, rs);
1940 }else{
1941 assert(rh>=0);
1942 if(temp!=rs) emit_addimm(map,1,temp);
1943 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1944 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1945 else {
1946 emit_addimm(rs,4,rs);
1947 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1948 }
1949 }
1950}
1951void emit_writehword_indexed(int rt, int offset, int rs)
1952{
1953 assert(offset>-256&&offset<256);
1954 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1955 if(offset>=0) {
1956 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1957 }else{
1958 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1959 }
1960}
1961void emit_writebyte_indexed(int rt, int offset, int rs)
1962{
1963 assert(offset>-4096&&offset<4096);
1964 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1965 if(offset>=0) {
1966 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1967 }else{
1968 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1969 }
1970}
1971void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1972{
1973 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1974 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1975}
1976void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1977{
1978 if(map<0) emit_writebyte_indexed(rt, addr, rs);
1979 else {
1980 if(addr==0) {
1981 emit_writebyte_dualindexedx4(rt, rs, map);
1982 }else{
1983 emit_addimm(rs,addr,temp);
1984 emit_writebyte_dualindexedx4(rt, temp, map);
1985 }
1986 }
1987}
1988void emit_writeword(int rt, int addr)
1989{
1990 u_int offset = addr-(u_int)&dynarec_local;
1991 assert(offset<4096);
1992 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1993 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1994}
1995void emit_writehword(int rt, int addr)
1996{
1997 u_int offset = addr-(u_int)&dynarec_local;
1998 assert(offset<256);
1999 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2000 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2001}
2002void emit_writebyte(int rt, int addr)
2003{
2004 u_int offset = addr-(u_int)&dynarec_local;
2005 assert(offset<4096);
2006 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
2007 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2008}
2009void emit_writeword_imm(int imm, int addr)
2010{
2011 assem_debug("movl $%x,%x\n",imm,addr);
2012 assert(0);
2013}
2014void emit_writebyte_imm(int imm, int addr)
2015{
2016 assem_debug("movb $%x,%x\n",imm,addr);
2017 assert(0);
2018}
2019
2020void emit_mul(int rs)
2021{
2022 assem_debug("mul %%%s\n",regname[rs]);
2023 assert(0);
2024}
2025void emit_imul(int rs)
2026{
2027 assem_debug("imul %%%s\n",regname[rs]);
2028 assert(0);
2029}
2030void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2031{
2032 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2033 assert(rs1<16);
2034 assert(rs2<16);
2035 assert(hi<16);
2036 assert(lo<16);
2037 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2038}
2039void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2040{
2041 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2042 assert(rs1<16);
2043 assert(rs2<16);
2044 assert(hi<16);
2045 assert(lo<16);
2046 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2047}
2048
2049void emit_div(int rs)
2050{
2051 assem_debug("div %%%s\n",regname[rs]);
2052 assert(0);
2053}
2054void emit_idiv(int rs)
2055{
2056 assem_debug("idiv %%%s\n",regname[rs]);
2057 assert(0);
2058}
2059void emit_cdq()
2060{
2061 assem_debug("cdq\n");
2062 assert(0);
2063}
2064
2065void emit_clz(int rs,int rt)
2066{
2067 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2068 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2069}
2070
2071void emit_subcs(int rs1,int rs2,int rt)
2072{
2073 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2074 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2075}
2076
2077void emit_shrcc_imm(int rs,u_int imm,int rt)
2078{
2079 assert(imm>0);
2080 assert(imm<32);
2081 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2082 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2083}
2084
2085void emit_negmi(int rs, int rt)
2086{
2087 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2088 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2089}
2090
2091void emit_negsmi(int rs, int rt)
2092{
2093 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2094 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2095}
2096
2097void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2098{
2099 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2100 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2101}
2102
2103void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2104{
2105 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2106 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2107}
2108
2109void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2110{
2111 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2112 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2113}
2114
2115void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2116{
2117 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2118 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2119}
2120
2121void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2122{
2123 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2124 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2125}
2126
2127void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2128{
2129 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2130 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2131}
2132
2133void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2134{
2135 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2136 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2137}
2138
2139void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2140{
2141 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2142 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2143}
2144
2145void emit_teq(int rs, int rt)
2146{
2147 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2148 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2149}
2150
2151void emit_rsbimm(int rs, int imm, int rt)
2152{
2153 u_int armval;
2154 genimm_checked(imm,&armval);
2155 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2156 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2157}
2158
2159// Load 2 immediates optimizing for small code size
2160void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2161{
2162 emit_movimm(imm1,rt1);
2163 u_int armval;
2164 if(genimm(imm2-imm1,&armval)) {
2165 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2166 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2167 }else if(genimm(imm1-imm2,&armval)) {
2168 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2169 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2170 }
2171 else emit_movimm(imm2,rt2);
2172}
2173
2174// Conditionally select one of two immediates, optimizing for small code size
2175// This will only be called if HAVE_CMOV_IMM is defined
2176void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2177{
2178 u_int armval;
2179 if(genimm(imm2-imm1,&armval)) {
2180 emit_movimm(imm1,rt);
2181 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2182 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2183 }else if(genimm(imm1-imm2,&armval)) {
2184 emit_movimm(imm1,rt);
2185 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2186 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2187 }
2188 else {
2189 #ifdef ARMv5_ONLY
2190 emit_movimm(imm1,rt);
2191 add_literal((int)out,imm2);
2192 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2193 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2194 #else
2195 emit_movw(imm1&0x0000FFFF,rt);
2196 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2197 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2198 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2199 }
2200 emit_movt(imm1&0xFFFF0000,rt);
2201 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2202 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2203 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2204 }
2205 #endif
2206 }
2207}
2208
2209// special case for checking invalid_code
2210void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2211{
2212 assert(0);
2213}
2214
2215// special case for checking invalid_code
2216void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2217{
2218 assert(imm<128&&imm>=0);
2219 assert(r>=0&&r<16);
2220 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2221 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2222 emit_cmpimm(HOST_TEMPREG,imm);
2223}
2224
2225// special case for tlb mapping
2226void emit_addsr12(int rs1,int rs2,int rt)
2227{
2228 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2229 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2230}
2231
2232void emit_callne(int a)
2233{
2234 assem_debug("blne %x\n",a);
2235 u_int offset=genjmp(a);
2236 output_w32(0x1b000000|offset);
2237}
2238
2239// Used to preload hash table entries
2240void emit_prefetch(void *addr)
2241{
2242 assem_debug("prefetch %x\n",(int)addr);
2243 output_byte(0x0F);
2244 output_byte(0x18);
2245 output_modrm(0,5,1);
2246 output_w32((int)addr);
2247}
2248void emit_prefetchreg(int r)
2249{
2250 assem_debug("pld %s\n",regname[r]);
2251 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2252}
2253
2254// Special case for mini_ht
2255void emit_ldreq_indexed(int rs, u_int offset, int rt)
2256{
2257 assert(offset<4096);
2258 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2259 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2260}
2261
2262void emit_flds(int r,int sr)
2263{
2264 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2265 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2266}
2267
2268void emit_vldr(int r,int vr)
2269{
2270 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2271 output_w32(0xed900b00|(vr<<12)|(r<<16));
2272}
2273
2274void emit_fsts(int sr,int r)
2275{
2276 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2277 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2278}
2279
2280void emit_vstr(int vr,int r)
2281{
2282 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2283 output_w32(0xed800b00|(vr<<12)|(r<<16));
2284}
2285
2286void emit_ftosizs(int s,int d)
2287{
2288 assem_debug("ftosizs s%d,s%d\n",d,s);
2289 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2290}
2291
2292void emit_ftosizd(int s,int d)
2293{
2294 assem_debug("ftosizd s%d,d%d\n",d,s);
2295 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2296}
2297
2298void emit_fsitos(int s,int d)
2299{
2300 assem_debug("fsitos s%d,s%d\n",d,s);
2301 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2302}
2303
2304void emit_fsitod(int s,int d)
2305{
2306 assem_debug("fsitod d%d,s%d\n",d,s);
2307 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2308}
2309
2310void emit_fcvtds(int s,int d)
2311{
2312 assem_debug("fcvtds d%d,s%d\n",d,s);
2313 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2314}
2315
2316void emit_fcvtsd(int s,int d)
2317{
2318 assem_debug("fcvtsd s%d,d%d\n",d,s);
2319 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2320}
2321
2322void emit_fsqrts(int s,int d)
2323{
2324 assem_debug("fsqrts d%d,s%d\n",d,s);
2325 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2326}
2327
2328void emit_fsqrtd(int s,int d)
2329{
2330 assem_debug("fsqrtd s%d,d%d\n",d,s);
2331 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2332}
2333
2334void emit_fabss(int s,int d)
2335{
2336 assem_debug("fabss d%d,s%d\n",d,s);
2337 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2338}
2339
2340void emit_fabsd(int s,int d)
2341{
2342 assem_debug("fabsd s%d,d%d\n",d,s);
2343 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2344}
2345
2346void emit_fnegs(int s,int d)
2347{
2348 assem_debug("fnegs d%d,s%d\n",d,s);
2349 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2350}
2351
2352void emit_fnegd(int s,int d)
2353{
2354 assem_debug("fnegd s%d,d%d\n",d,s);
2355 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2356}
2357
2358void emit_fadds(int s1,int s2,int d)
2359{
2360 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2361 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2362}
2363
2364void emit_faddd(int s1,int s2,int d)
2365{
2366 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2367 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2368}
2369
2370void emit_fsubs(int s1,int s2,int d)
2371{
2372 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2373 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2374}
2375
2376void emit_fsubd(int s1,int s2,int d)
2377{
2378 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2379 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2380}
2381
2382void emit_fmuls(int s1,int s2,int d)
2383{
2384 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2385 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2386}
2387
2388void emit_fmuld(int s1,int s2,int d)
2389{
2390 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2391 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2392}
2393
2394void emit_fdivs(int s1,int s2,int d)
2395{
2396 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2397 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2398}
2399
2400void emit_fdivd(int s1,int s2,int d)
2401{
2402 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2403 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2404}
2405
2406void emit_fcmps(int x,int y)
2407{
2408 assem_debug("fcmps s14, s15\n");
2409 output_w32(0xeeb47a67);
2410}
2411
2412void emit_fcmpd(int x,int y)
2413{
2414 assem_debug("fcmpd d6, d7\n");
2415 output_w32(0xeeb46b47);
2416}
2417
2418void emit_fmstat()
2419{
2420 assem_debug("fmstat\n");
2421 output_w32(0xeef1fa10);
2422}
2423
2424void emit_bicne_imm(int rs,int imm,int rt)
2425{
2426 u_int armval;
2427 genimm_checked(imm,&armval);
2428 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2429 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2430}
2431
2432void emit_biccs_imm(int rs,int imm,int rt)
2433{
2434 u_int armval;
2435 genimm_checked(imm,&armval);
2436 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2437 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2438}
2439
2440void emit_bicvc_imm(int rs,int imm,int rt)
2441{
2442 u_int armval;
2443 genimm_checked(imm,&armval);
2444 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2445 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2446}
2447
2448void emit_bichi_imm(int rs,int imm,int rt)
2449{
2450 u_int armval;
2451 genimm_checked(imm,&armval);
2452 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2453 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2454}
2455
2456void emit_orrvs_imm(int rs,int imm,int rt)
2457{
2458 u_int armval;
2459 genimm_checked(imm,&armval);
2460 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2461 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2462}
2463
2464void emit_orrne_imm(int rs,int imm,int rt)
2465{
2466 u_int armval;
2467 genimm_checked(imm,&armval);
2468 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2469 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2470}
2471
2472void emit_andne_imm(int rs,int imm,int rt)
2473{
2474 u_int armval;
2475 genimm_checked(imm,&armval);
2476 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2477 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2478}
2479
2480void emit_jno_unlikely(int a)
2481{
2482 //emit_jno(a);
2483 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2484 output_w32(0x72800000|rd_rn_rm(15,15,0));
2485}
2486
2487// Save registers before function call
2488void save_regs(u_int reglist)
2489{
2490 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2491 if(!reglist) return;
2492 assem_debug("stmia fp,{");
2493 if(reglist&1) assem_debug("r0, ");
2494 if(reglist&2) assem_debug("r1, ");
2495 if(reglist&4) assem_debug("r2, ");
2496 if(reglist&8) assem_debug("r3, ");
2497 if(reglist&0x1000) assem_debug("r12");
2498 assem_debug("}\n");
2499 output_w32(0xe88b0000|reglist);
2500}
2501// Restore registers after function call
2502void restore_regs(u_int reglist)
2503{
2504 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2505 if(!reglist) return;
2506 assem_debug("ldmia fp,{");
2507 if(reglist&1) assem_debug("r0, ");
2508 if(reglist&2) assem_debug("r1, ");
2509 if(reglist&4) assem_debug("r2, ");
2510 if(reglist&8) assem_debug("r3, ");
2511 if(reglist&0x1000) assem_debug("r12");
2512 assem_debug("}\n");
2513 output_w32(0xe89b0000|reglist);
2514}
2515
2516// Write back consts using r14 so we don't disturb the other registers
2517void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2518{
2519 int hr;
2520 for(hr=0;hr<HOST_REGS;hr++) {
2521 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2522 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2523 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2524 int value=constmap[i][hr];
2525 if(value==0) {
2526 emit_zeroreg(HOST_TEMPREG);
2527 }
2528 else {
2529 emit_movimm(value,HOST_TEMPREG);
2530 }
2531 emit_storereg(i_regmap[hr],HOST_TEMPREG);
2532#ifndef FORCE32
2533 if((i_is32>>i_regmap[hr])&1) {
2534 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2535 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2536 }
2537#endif
2538 }
2539 }
2540 }
2541 }
2542}
2543
2544/* Stubs/epilogue */
2545
2546void literal_pool(int n)
2547{
2548 if(!literalcount) return;
2549 if(n) {
2550 if((int)out-literals[0][0]<4096-n) return;
2551 }
2552 u_int *ptr;
2553 int i;
2554 for(i=0;i<literalcount;i++)
2555 {
2556 ptr=(u_int *)literals[i][0];
2557 u_int offset=(u_int)out-(u_int)ptr-8;
2558 assert(offset<4096);
2559 assert(!(offset&3));
2560 *ptr|=offset;
2561 output_w32(literals[i][1]);
2562 }
2563 literalcount=0;
2564}
2565
2566void literal_pool_jumpover(int n)
2567{
2568 if(!literalcount) return;
2569 if(n) {
2570 if((int)out-literals[0][0]<4096-n) return;
2571 }
2572 int jaddr=(int)out;
2573 emit_jmp(0);
2574 literal_pool(0);
2575 set_jump_target(jaddr,(int)out);
2576}
2577
2578emit_extjump2(int addr, int target, int linker)
2579{
2580 u_char *ptr=(u_char *)addr;
2581 assert((ptr[3]&0x0e)==0xa);
2582 emit_loadlp(target,0);
2583 emit_loadlp(addr,1);
2584 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
2585 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2586//DEBUG >
2587#ifdef DEBUG_CYCLE_COUNT
2588 emit_readword((int)&last_count,ECX);
2589 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2590 emit_readword((int)&next_interupt,ECX);
2591 emit_writeword(HOST_CCREG,(int)&Count);
2592 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2593 emit_writeword(ECX,(int)&last_count);
2594#endif
2595//DEBUG <
2596 emit_jmp(linker);
2597}
2598
2599emit_extjump(int addr, int target)
2600{
2601 emit_extjump2(addr, target, (int)dyna_linker);
2602}
2603emit_extjump_ds(int addr, int target)
2604{
2605 emit_extjump2(addr, target, (int)dyna_linker_ds);
2606}
2607
2608#ifdef PCSX
2609#include "pcsxmem_inline.c"
2610#endif
2611
2612do_readstub(int n)
2613{
2614 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2615 literal_pool(256);
2616 set_jump_target(stubs[n][1],(int)out);
2617 int type=stubs[n][0];
2618 int i=stubs[n][3];
2619 int rs=stubs[n][4];
2620 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2621 u_int reglist=stubs[n][7];
2622 signed char *i_regmap=i_regs->regmap;
2623 int addr=get_reg(i_regmap,AGEN1+(i&1));
2624 int rth,rt;
2625 int ds;
2626 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
2627 rth=get_reg(i_regmap,FTEMP|64);
2628 rt=get_reg(i_regmap,FTEMP);
2629 }else{
2630 rth=get_reg(i_regmap,rt1[i]|64);
2631 rt=get_reg(i_regmap,rt1[i]);
2632 }
2633 assert(rs>=0);
2634 if(addr<0) addr=rt;
2635 if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
2636 assert(addr>=0);
2637 int ftable=0;
2638 if(type==LOADB_STUB||type==LOADBU_STUB)
2639 ftable=(int)readmemb;
2640 if(type==LOADH_STUB||type==LOADHU_STUB)
2641 ftable=(int)readmemh;
2642 if(type==LOADW_STUB)
2643 ftable=(int)readmem;
2644#ifndef FORCE32
2645 if(type==LOADD_STUB)
2646 ftable=(int)readmemd;
2647#endif
2648 assert(ftable!=0);
2649 emit_writeword(rs,(int)&address);
2650 //emit_pusha();
2651 save_regs(reglist);
2652#ifndef PCSX
2653 ds=i_regs!=&regs[i];
2654 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2655 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2656 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2657 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2658 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2659#endif
2660 emit_shrimm(rs,16,1);
2661 int cc=get_reg(i_regmap,CCREG);
2662 if(cc<0) {
2663 emit_loadreg(CCREG,2);
2664 }
2665 emit_movimm(ftable,0);
2666 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2667#ifndef PCSX
2668 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2669#endif
2670 //emit_readword((int)&last_count,temp);
2671 //emit_add(cc,temp,cc);
2672 //emit_writeword(cc,(int)&Count);
2673 //emit_mov(15,14);
2674 emit_call((int)&indirect_jump_indexed);
2675 //emit_callreg(rs);
2676 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
2677#ifndef PCSX
2678 // We really shouldn't need to update the count here,
2679 // but not doing so causes random crashes...
2680 emit_readword((int)&Count,HOST_TEMPREG);
2681 emit_readword((int)&next_interupt,2);
2682 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2683 emit_writeword(2,(int)&last_count);
2684 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2685 if(cc<0) {
2686 emit_storereg(CCREG,HOST_TEMPREG);
2687 }
2688#endif
2689 //emit_popa();
2690 restore_regs(reglist);
2691 //if((cc=get_reg(regmap,CCREG))>=0) {
2692 // emit_loadreg(CCREG,cc);
2693 //}
2694 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2695 assert(rt>=0);
2696 if(type==LOADB_STUB)
2697 emit_movsbl((int)&readmem_dword,rt);
2698 if(type==LOADBU_STUB)
2699 emit_movzbl((int)&readmem_dword,rt);
2700 if(type==LOADH_STUB)
2701 emit_movswl((int)&readmem_dword,rt);
2702 if(type==LOADHU_STUB)
2703 emit_movzwl((int)&readmem_dword,rt);
2704 if(type==LOADW_STUB)
2705 emit_readword((int)&readmem_dword,rt);
2706 if(type==LOADD_STUB) {
2707 emit_readword((int)&readmem_dword,rt);
2708 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2709 }
2710 }
2711 emit_jmp(stubs[n][2]); // return address
2712}
2713
2714inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2715{
2716 int rs=get_reg(regmap,target);
2717 int rth=get_reg(regmap,target|64);
2718 int rt=get_reg(regmap,target);
2719 if(rs<0) rs=get_reg(regmap,-1);
2720 assert(rs>=0);
2721 int ftable=0;
2722 if(type==LOADB_STUB||type==LOADBU_STUB)
2723 ftable=(int)readmemb;
2724 if(type==LOADH_STUB||type==LOADHU_STUB)
2725 ftable=(int)readmemh;
2726 if(type==LOADW_STUB)
2727 ftable=(int)readmem;
2728#ifndef FORCE32
2729 if(type==LOADD_STUB)
2730 ftable=(int)readmemd;
2731#endif
2732 assert(ftable!=0);
2733#ifdef PCSX
2734 if(pcsx_direct_read(type,addr,target?rs:-1,rt))
2735 return;
2736#endif
2737 if(target==0)
2738 emit_movimm(addr,rs);
2739 emit_writeword(rs,(int)&address);
2740 //emit_pusha();
2741 save_regs(reglist);
2742#ifndef PCSX
2743 if((signed int)addr>=(signed int)0xC0000000) {
2744 // Theoretically we can have a pagefault here, if the TLB has never
2745 // been enabled and the address is outside the range 80000000..BFFFFFFF
2746 // Write out the registers so the pagefault can be handled. This is
2747 // a very rare case and likely represents a bug.
2748 int ds=regmap!=regs[i].regmap;
2749 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2750 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
2751 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
2752 }
2753#endif
2754 //emit_shrimm(rs,16,1);
2755 int cc=get_reg(regmap,CCREG);
2756 if(cc<0) {
2757 emit_loadreg(CCREG,2);
2758 }
2759 //emit_movimm(ftable,0);
2760 emit_movimm(((u_int *)ftable)[addr>>16],0);
2761 //emit_readword((int)&last_count,12);
2762 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2763#ifndef PCSX
2764 if((signed int)addr>=(signed int)0xC0000000) {
2765 // Pagefault address
2766 int ds=regmap!=regs[i].regmap;
2767 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2768 }
2769#endif
2770 //emit_add(12,2,2);
2771 //emit_writeword(2,(int)&Count);
2772 //emit_call(((u_int *)ftable)[addr>>16]);
2773 emit_call((int)&indirect_jump);
2774#ifndef PCSX
2775 // We really shouldn't need to update the count here,
2776 // but not doing so causes random crashes...
2777 emit_readword((int)&Count,HOST_TEMPREG);
2778 emit_readword((int)&next_interupt,2);
2779 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2780 emit_writeword(2,(int)&last_count);
2781 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2782 if(cc<0) {
2783 emit_storereg(CCREG,HOST_TEMPREG);
2784 }
2785#endif
2786 //emit_popa();
2787 restore_regs(reglist);
2788 if(rt>=0) {
2789 if(type==LOADB_STUB)
2790 emit_movsbl((int)&readmem_dword,rt);
2791 if(type==LOADBU_STUB)
2792 emit_movzbl((int)&readmem_dword,rt);
2793 if(type==LOADH_STUB)
2794 emit_movswl((int)&readmem_dword,rt);
2795 if(type==LOADHU_STUB)
2796 emit_movzwl((int)&readmem_dword,rt);
2797 if(type==LOADW_STUB)
2798 emit_readword((int)&readmem_dword,rt);
2799 if(type==LOADD_STUB) {
2800 emit_readword((int)&readmem_dword,rt);
2801 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2802 }
2803 }
2804}
2805
2806do_writestub(int n)
2807{
2808 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2809 literal_pool(256);
2810 set_jump_target(stubs[n][1],(int)out);
2811 int type=stubs[n][0];
2812 int i=stubs[n][3];
2813 int rs=stubs[n][4];
2814 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2815 u_int reglist=stubs[n][7];
2816 signed char *i_regmap=i_regs->regmap;
2817 int addr=get_reg(i_regmap,AGEN1+(i&1));
2818 int rth,rt,r;
2819 int ds;
2820 if(itype[i]==C1LS||itype[i]==C2LS) {
2821 rth=get_reg(i_regmap,FTEMP|64);
2822 rt=get_reg(i_regmap,r=FTEMP);
2823 }else{
2824 rth=get_reg(i_regmap,rs2[i]|64);
2825 rt=get_reg(i_regmap,r=rs2[i]);
2826 }
2827 assert(rs>=0);
2828 assert(rt>=0);
2829 if(addr<0) addr=get_reg(i_regmap,-1);
2830 assert(addr>=0);
2831 int ftable=0;
2832 if(type==STOREB_STUB)
2833 ftable=(int)writememb;
2834 if(type==STOREH_STUB)
2835 ftable=(int)writememh;
2836 if(type==STOREW_STUB)
2837 ftable=(int)writemem;
2838#ifndef FORCE32
2839 if(type==STORED_STUB)
2840 ftable=(int)writememd;
2841#endif
2842 assert(ftable!=0);
2843 emit_writeword(rs,(int)&address);
2844 //emit_shrimm(rs,16,rs);
2845 //emit_movmem_indexedx4(ftable,rs,rs);
2846 if(type==STOREB_STUB)
2847 emit_writebyte(rt,(int)&byte);
2848 if(type==STOREH_STUB)
2849 emit_writehword(rt,(int)&hword);
2850 if(type==STOREW_STUB)
2851 emit_writeword(rt,(int)&word);
2852 if(type==STORED_STUB) {
2853#ifndef FORCE32
2854 emit_writeword(rt,(int)&dword);
2855 emit_writeword(r?rth:rt,(int)&dword+4);
2856#else
2857 printf("STORED_STUB\n");
2858#endif
2859 }
2860 //emit_pusha();
2861 save_regs(reglist);
2862#ifndef PCSX
2863 ds=i_regs!=&regs[i];
2864 int real_rs=get_reg(i_regmap,rs1[i]);
2865 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2866 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2867 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2868 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2869#endif
2870 emit_shrimm(rs,16,1);
2871 int cc=get_reg(i_regmap,CCREG);
2872 if(cc<0) {
2873 emit_loadreg(CCREG,2);
2874 }
2875 emit_movimm(ftable,0);
2876 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2877#ifndef PCSX
2878 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2879#endif
2880 //emit_readword((int)&last_count,temp);
2881 //emit_addimm(cc,2*stubs[n][5]+2,cc);
2882 //emit_add(cc,temp,cc);
2883 //emit_writeword(cc,(int)&Count);
2884 emit_call((int)&indirect_jump_indexed);
2885 //emit_callreg(rs);
2886 emit_readword((int)&Count,HOST_TEMPREG);
2887 emit_readword((int)&next_interupt,2);
2888 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2889 emit_writeword(2,(int)&last_count);
2890 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2891 if(cc<0) {
2892 emit_storereg(CCREG,HOST_TEMPREG);
2893 }
2894 //emit_popa();
2895 restore_regs(reglist);
2896 //if((cc=get_reg(regmap,CCREG))>=0) {
2897 // emit_loadreg(CCREG,cc);
2898 //}
2899 emit_jmp(stubs[n][2]); // return address
2900}
2901
2902inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2903{
2904 int rs=get_reg(regmap,-1);
2905 int rth=get_reg(regmap,target|64);
2906 int rt=get_reg(regmap,target);
2907 assert(rs>=0);
2908 assert(rt>=0);
2909#ifdef PCSX
2910 if(pcsx_direct_write(type,addr,rs,rt,regmap))
2911 return;
2912#endif
2913 int ftable=0;
2914 if(type==STOREB_STUB)
2915 ftable=(int)writememb;
2916 if(type==STOREH_STUB)
2917 ftable=(int)writememh;
2918 if(type==STOREW_STUB)
2919 ftable=(int)writemem;
2920#ifndef FORCE32
2921 if(type==STORED_STUB)
2922 ftable=(int)writememd;
2923#endif
2924 assert(ftable!=0);
2925 emit_writeword(rs,(int)&address);
2926 //emit_shrimm(rs,16,rs);
2927 //emit_movmem_indexedx4(ftable,rs,rs);
2928 if(type==STOREB_STUB)
2929 emit_writebyte(rt,(int)&byte);
2930 if(type==STOREH_STUB)
2931 emit_writehword(rt,(int)&hword);
2932 if(type==STOREW_STUB)
2933 emit_writeword(rt,(int)&word);
2934 if(type==STORED_STUB) {
2935#ifndef FORCE32
2936 emit_writeword(rt,(int)&dword);
2937 emit_writeword(target?rth:rt,(int)&dword+4);
2938#else
2939 printf("STORED_STUB\n");
2940#endif
2941 }
2942 //emit_pusha();
2943 save_regs(reglist);
2944#ifndef PCSX
2945 // rearmed note: load_all_consts prevents BIOS boot, some bug?
2946 if((signed int)addr>=(signed int)0xC0000000) {
2947 // Theoretically we can have a pagefault here, if the TLB has never
2948 // been enabled and the address is outside the range 80000000..BFFFFFFF
2949 // Write out the registers so the pagefault can be handled. This is
2950 // a very rare case and likely represents a bug.
2951 int ds=regmap!=regs[i].regmap;
2952 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2953 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
2954 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
2955 }
2956#endif
2957 //emit_shrimm(rs,16,1);
2958 int cc=get_reg(regmap,CCREG);
2959 if(cc<0) {
2960 emit_loadreg(CCREG,2);
2961 }
2962 //emit_movimm(ftable,0);
2963 emit_movimm(((u_int *)ftable)[addr>>16],0);
2964 //emit_readword((int)&last_count,12);
2965 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2966#ifndef PCSX
2967 if((signed int)addr>=(signed int)0xC0000000) {
2968 // Pagefault address
2969 int ds=regmap!=regs[i].regmap;
2970 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2971 }
2972#endif
2973 //emit_add(12,2,2);
2974 //emit_writeword(2,(int)&Count);
2975 //emit_call(((u_int *)ftable)[addr>>16]);
2976 emit_call((int)&indirect_jump);
2977 emit_readword((int)&Count,HOST_TEMPREG);
2978 emit_readword((int)&next_interupt,2);
2979 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2980 emit_writeword(2,(int)&last_count);
2981 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2982 if(cc<0) {
2983 emit_storereg(CCREG,HOST_TEMPREG);
2984 }
2985 //emit_popa();
2986 restore_regs(reglist);
2987}
2988
2989do_unalignedwritestub(int n)
2990{
2991 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
2992 literal_pool(256);
2993 set_jump_target(stubs[n][1],(int)out);
2994
2995 int i=stubs[n][3];
2996 struct regstat *i_regs=(struct regstat *)stubs[n][4];
2997 int addr=stubs[n][5];
2998 u_int reglist=stubs[n][7];
2999 signed char *i_regmap=i_regs->regmap;
3000 int temp2=get_reg(i_regmap,FTEMP);
3001 int rt;
3002 int ds, real_rs;
3003 rt=get_reg(i_regmap,rs2[i]);
3004 assert(rt>=0);
3005 assert(addr>=0);
3006 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
3007 reglist|=(1<<addr);
3008 reglist&=~(1<<temp2);
3009
3010 emit_andimm(addr,0xfffffffc,temp2);
3011 emit_writeword(temp2,(int)&address);
3012
3013 save_regs(reglist);
3014#ifndef PCSX
3015 ds=i_regs!=&regs[i];
3016 real_rs=get_reg(i_regmap,rs1[i]);
3017 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3018 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3019 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3020 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
3021#endif
3022 emit_shrimm(addr,16,1);
3023 int cc=get_reg(i_regmap,CCREG);
3024 if(cc<0) {
3025 emit_loadreg(CCREG,2);
3026 }
3027 emit_movimm((u_int)readmem,0);
3028 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
3029#ifndef PCSX
3030 // pagefault address
3031 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3032#endif
3033 emit_call((int)&indirect_jump_indexed);
3034 restore_regs(reglist);
3035
3036 emit_readword((int)&readmem_dword,temp2);
3037 int temp=addr; //hmh
3038 emit_shlimm(addr,3,temp);
3039 emit_andimm(temp,24,temp);
3040#ifdef BIG_ENDIAN_MIPS
3041 if (opcode[i]==0x2e) // SWR
3042#else
3043 if (opcode[i]==0x2a) // SWL
3044#endif
3045 emit_xorimm(temp,24,temp);
3046 emit_movimm(-1,HOST_TEMPREG);
3047 if (opcode[i]==0x2a) { // SWL
3048 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
3049 emit_orrshr(rt,temp,temp2);
3050 }else{
3051 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
3052 emit_orrshl(rt,temp,temp2);
3053 }
3054 emit_readword((int)&address,addr);
3055 emit_writeword(temp2,(int)&word);
3056 //save_regs(reglist); // don't need to, no state changes
3057 emit_shrimm(addr,16,1);
3058 emit_movimm((u_int)writemem,0);
3059 //emit_call((int)&indirect_jump_indexed);
3060 emit_mov(15,14);
3061 emit_readword_dualindexedx4(0,1,15);
3062 emit_readword((int)&Count,HOST_TEMPREG);
3063 emit_readword((int)&next_interupt,2);
3064 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3065 emit_writeword(2,(int)&last_count);
3066 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3067 if(cc<0) {
3068 emit_storereg(CCREG,HOST_TEMPREG);
3069 }
3070 restore_regs(reglist);
3071 emit_jmp(stubs[n][2]); // return address
3072}
3073
3074void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
3075{
3076 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
3077}
3078
3079do_invstub(int n)
3080{
3081 literal_pool(20);
3082 u_int reglist=stubs[n][3];
3083 set_jump_target(stubs[n][1],(int)out);
3084 save_regs(reglist);
3085 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
3086 emit_call((int)&invalidate_addr);
3087 restore_regs(reglist);
3088 emit_jmp(stubs[n][2]); // return address
3089}
3090
3091int do_dirty_stub(int i)
3092{
3093 assem_debug("do_dirty_stub %x\n",start+i*4);
3094 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
3095 #ifdef PCSX
3096 addr=(u_int)source;
3097 #endif
3098 // Careful about the code output here, verify_dirty needs to parse it.
3099 #ifdef ARMv5_ONLY
3100 emit_loadlp(addr,1);
3101 emit_loadlp((int)copy,2);
3102 emit_loadlp(slen*4,3);
3103 #else
3104 emit_movw(addr&0x0000FFFF,1);
3105 emit_movw(((u_int)copy)&0x0000FFFF,2);
3106 emit_movt(addr&0xFFFF0000,1);
3107 emit_movt(((u_int)copy)&0xFFFF0000,2);
3108 emit_movw(slen*4,3);
3109 #endif
3110 emit_movimm(start+i*4,0);
3111 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3112 int entry=(int)out;
3113 load_regs_entry(i);
3114 if(entry==(int)out) entry=instr_addr[i];
3115 emit_jmp(instr_addr[i]);
3116 return entry;
3117}
3118
3119void do_dirty_stub_ds()
3120{
3121 // Careful about the code output here, verify_dirty needs to parse it.
3122 #ifdef ARMv5_ONLY
3123 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3124 emit_loadlp((int)copy,2);
3125 emit_loadlp(slen*4,3);
3126 #else
3127 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3128 emit_movw(((u_int)copy)&0x0000FFFF,2);
3129 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3130 emit_movt(((u_int)copy)&0xFFFF0000,2);
3131 emit_movw(slen*4,3);
3132 #endif
3133 emit_movimm(start+1,0);
3134 emit_call((int)&verify_code_ds);
3135}
3136
3137do_cop1stub(int n)
3138{
3139 literal_pool(256);
3140 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3141 set_jump_target(stubs[n][1],(int)out);
3142 int i=stubs[n][3];
3143// int rs=stubs[n][4];
3144 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3145 int ds=stubs[n][6];
3146 if(!ds) {
3147 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3148 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3149 }
3150 //else {printf("fp exception in delay slot\n");}
3151 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3152 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3153 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3154 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3155 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3156}
3157
3158/* TLB */
3159
3160int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3161{
3162 if(c) {
3163 if((signed int)addr>=(signed int)0xC0000000) {
3164 // address_generation already loaded the const
3165 emit_readword_dualindexedx4(FP,map,map);
3166 }
3167 else
3168 return -1; // No mapping
3169 }
3170 else {
3171 assert(s!=map);
3172 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3173 emit_addsr12(map,s,map);
3174 // Schedule this while we wait on the load
3175 //if(x) emit_xorimm(s,x,ar);
3176 if(shift>=0) emit_shlimm(s,3,shift);
3177 if(~a) emit_andimm(s,a,ar);
3178 emit_readword_dualindexedx4(FP,map,map);
3179 }
3180 return map;
3181}
3182int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3183{
3184 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3185 emit_test(map,map);
3186 *jaddr=(int)out;
3187 emit_js(0);
3188 }
3189 return map;
3190}
3191
3192int gen_tlb_addr_r(int ar, int map) {
3193 if(map>=0) {
3194 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3195 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3196 }
3197}
3198
3199int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3200{
3201 if(c) {
3202 if(addr<0x80800000||addr>=0xC0000000) {
3203 // address_generation already loaded the const
3204 emit_readword_dualindexedx4(FP,map,map);
3205 }
3206 else
3207 return -1; // No mapping
3208 }
3209 else {
3210 assert(s!=map);
3211 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3212 emit_addsr12(map,s,map);
3213 // Schedule this while we wait on the load
3214 //if(x) emit_xorimm(s,x,ar);
3215 emit_readword_dualindexedx4(FP,map,map);
3216 }
3217 return map;
3218}
3219int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3220{
3221 if(!c||addr<0x80800000||addr>=0xC0000000) {
3222 emit_testimm(map,0x40000000);
3223 *jaddr=(int)out;
3224 emit_jne(0);
3225 }
3226}
3227
3228int gen_tlb_addr_w(int ar, int map) {
3229 if(map>=0) {
3230 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3231 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3232 }
3233}
3234
3235// Generate the address of the memory_map entry, relative to dynarec_local
3236generate_map_const(u_int addr,int reg) {
3237 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3238 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3239}
3240
3241/* Special assem */
3242
3243void shift_assemble_arm(int i,struct regstat *i_regs)
3244{
3245 if(rt1[i]) {
3246 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3247 {
3248 signed char s,t,shift;
3249 t=get_reg(i_regs->regmap,rt1[i]);
3250 s=get_reg(i_regs->regmap,rs1[i]);
3251 shift=get_reg(i_regs->regmap,rs2[i]);
3252 if(t>=0){
3253 if(rs1[i]==0)
3254 {
3255 emit_zeroreg(t);
3256 }
3257 else if(rs2[i]==0)
3258 {
3259 assert(s>=0);
3260 if(s!=t) emit_mov(s,t);
3261 }
3262 else
3263 {
3264 emit_andimm(shift,31,HOST_TEMPREG);
3265 if(opcode2[i]==4) // SLLV
3266 {
3267 emit_shl(s,HOST_TEMPREG,t);
3268 }
3269 if(opcode2[i]==6) // SRLV
3270 {
3271 emit_shr(s,HOST_TEMPREG,t);
3272 }
3273 if(opcode2[i]==7) // SRAV
3274 {
3275 emit_sar(s,HOST_TEMPREG,t);
3276 }
3277 }
3278 }
3279 } else { // DSLLV/DSRLV/DSRAV
3280 signed char sh,sl,th,tl,shift;
3281 th=get_reg(i_regs->regmap,rt1[i]|64);
3282 tl=get_reg(i_regs->regmap,rt1[i]);
3283 sh=get_reg(i_regs->regmap,rs1[i]|64);
3284 sl=get_reg(i_regs->regmap,rs1[i]);
3285 shift=get_reg(i_regs->regmap,rs2[i]);
3286 if(tl>=0){
3287 if(rs1[i]==0)
3288 {
3289 emit_zeroreg(tl);
3290 if(th>=0) emit_zeroreg(th);
3291 }
3292 else if(rs2[i]==0)
3293 {
3294 assert(sl>=0);
3295 if(sl!=tl) emit_mov(sl,tl);
3296 if(th>=0&&sh!=th) emit_mov(sh,th);
3297 }
3298 else
3299 {
3300 // FIXME: What if shift==tl ?
3301 assert(shift!=tl);
3302 int temp=get_reg(i_regs->regmap,-1);
3303 int real_th=th;
3304 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3305 assert(sl>=0);
3306 assert(sh>=0);
3307 emit_andimm(shift,31,HOST_TEMPREG);
3308 if(opcode2[i]==0x14) // DSLLV
3309 {
3310 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3311 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3312 emit_orrshr(sl,HOST_TEMPREG,th);
3313 emit_andimm(shift,31,HOST_TEMPREG);
3314 emit_testimm(shift,32);
3315 emit_shl(sl,HOST_TEMPREG,tl);
3316 if(th>=0) emit_cmovne_reg(tl,th);
3317 emit_cmovne_imm(0,tl);
3318 }
3319 if(opcode2[i]==0x16) // DSRLV
3320 {
3321 assert(th>=0);
3322 emit_shr(sl,HOST_TEMPREG,tl);
3323 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3324 emit_orrshl(sh,HOST_TEMPREG,tl);
3325 emit_andimm(shift,31,HOST_TEMPREG);
3326 emit_testimm(shift,32);
3327 emit_shr(sh,HOST_TEMPREG,th);
3328 emit_cmovne_reg(th,tl);
3329 if(real_th>=0) emit_cmovne_imm(0,th);
3330 }
3331 if(opcode2[i]==0x17) // DSRAV
3332 {
3333 assert(th>=0);
3334 emit_shr(sl,HOST_TEMPREG,tl);
3335 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3336 if(real_th>=0) {
3337 assert(temp>=0);
3338 emit_sarimm(th,31,temp);
3339 }
3340 emit_orrshl(sh,HOST_TEMPREG,tl);
3341 emit_andimm(shift,31,HOST_TEMPREG);
3342 emit_testimm(shift,32);
3343 emit_sar(sh,HOST_TEMPREG,th);
3344 emit_cmovne_reg(th,tl);
3345 if(real_th>=0) emit_cmovne_reg(temp,th);
3346 }
3347 }
3348 }
3349 }
3350 }
3351}
3352#define shift_assemble shift_assemble_arm
3353
3354void loadlr_assemble_arm(int i,struct regstat *i_regs)
3355{
3356 int s,th,tl,temp,temp2,addr,map=-1;
3357 int offset;
3358 int jaddr=0;
3359 int memtarget=0,c=0;
3360 u_int hr,reglist=0;
3361 th=get_reg(i_regs->regmap,rt1[i]|64);
3362 tl=get_reg(i_regs->regmap,rt1[i]);
3363 s=get_reg(i_regs->regmap,rs1[i]);
3364 temp=get_reg(i_regs->regmap,-1);
3365 temp2=get_reg(i_regs->regmap,FTEMP);
3366 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3367 assert(addr<0);
3368 offset=imm[i];
3369 for(hr=0;hr<HOST_REGS;hr++) {
3370 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3371 }
3372 reglist|=1<<temp;
3373 if(offset||s<0||c) addr=temp2;
3374 else addr=s;
3375 if(s>=0) {
3376 c=(i_regs->wasconst>>s)&1;
3377 if(c) {
3378 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3379 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3380 }
3381 }
3382 if(!using_tlb) {
3383 if(!c) {
3384 #ifdef RAM_OFFSET
3385 map=get_reg(i_regs->regmap,ROREG);
3386 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3387 #endif
3388 emit_shlimm(addr,3,temp);
3389 if (opcode[i]==0x22||opcode[i]==0x26) {
3390 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3391 }else{
3392 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3393 }
3394 emit_cmpimm(addr,RAM_SIZE);
3395 jaddr=(int)out;
3396 emit_jno(0);
3397 }
3398 else {
3399 if (opcode[i]==0x22||opcode[i]==0x26) {
3400 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3401 }else{
3402 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3403 }
3404 }
3405 }else{ // using tlb
3406 int a;
3407 if(c) {
3408 a=-1;
3409 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3410 a=0xFFFFFFFC; // LWL/LWR
3411 }else{
3412 a=0xFFFFFFF8; // LDL/LDR
3413 }
3414 map=get_reg(i_regs->regmap,TLREG);
3415 assert(map>=0);
3416 reglist&=~(1<<map);
3417 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3418 if(c) {
3419 if (opcode[i]==0x22||opcode[i]==0x26) {
3420 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3421 }else{
3422 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3423 }
3424 }
3425 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3426 }
3427 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3428 if(!c||memtarget) {
3429 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3430 emit_readword_indexed_tlb(0,temp2,map,temp2);
3431 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3432 }
3433 else
3434 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3435 if(rt1[i]) {
3436 assert(tl>=0);
3437 emit_andimm(temp,24,temp);
3438#ifdef BIG_ENDIAN_MIPS
3439 if (opcode[i]==0x26) // LWR
3440#else
3441 if (opcode[i]==0x22) // LWL
3442#endif
3443 emit_xorimm(temp,24,temp);
3444 emit_movimm(-1,HOST_TEMPREG);
3445 if (opcode[i]==0x26) {
3446 emit_shr(temp2,temp,temp2);
3447 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3448 }else{
3449 emit_shl(temp2,temp,temp2);
3450 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3451 }
3452 emit_or(temp2,tl,tl);
3453 }
3454 //emit_storereg(rt1[i],tl); // DEBUG
3455 }
3456 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3457 // FIXME: little endian
3458 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3459 if(!c||memtarget) {
3460 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3461 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3462 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3463 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3464 }
3465 else
3466 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3467 if(rt1[i]) {
3468 assert(th>=0);
3469 assert(tl>=0);
3470 emit_testimm(temp,32);
3471 emit_andimm(temp,24,temp);
3472 if (opcode[i]==0x1A) { // LDL
3473 emit_rsbimm(temp,32,HOST_TEMPREG);
3474 emit_shl(temp2h,temp,temp2h);
3475 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3476 emit_movimm(-1,HOST_TEMPREG);
3477 emit_shl(temp2,temp,temp2);
3478 emit_cmove_reg(temp2h,th);
3479 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3480 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3481 emit_orreq(temp2,tl,tl);
3482 emit_orrne(temp2,th,th);
3483 }
3484 if (opcode[i]==0x1B) { // LDR
3485 emit_xorimm(temp,24,temp);
3486 emit_rsbimm(temp,32,HOST_TEMPREG);
3487 emit_shr(temp2,temp,temp2);
3488 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3489 emit_movimm(-1,HOST_TEMPREG);
3490 emit_shr(temp2h,temp,temp2h);
3491 emit_cmovne_reg(temp2,tl);
3492 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3493 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3494 emit_orrne(temp2h,th,th);
3495 emit_orreq(temp2h,tl,tl);
3496 }
3497 }
3498 }
3499}
3500#define loadlr_assemble loadlr_assemble_arm
3501
3502void cop0_assemble(int i,struct regstat *i_regs)
3503{
3504 if(opcode2[i]==0) // MFC0
3505 {
3506 signed char t=get_reg(i_regs->regmap,rt1[i]);
3507 char copr=(source[i]>>11)&0x1f;
3508 //assert(t>=0); // Why does this happen? OOT is weird
3509 if(t>=0&&rt1[i]!=0) {
3510#ifdef MUPEN64
3511 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3512 emit_movimm((source[i]>>11)&0x1f,1);
3513 emit_writeword(0,(int)&PC);
3514 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3515 if(copr==9) {
3516 emit_readword((int)&last_count,ECX);
3517 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3518 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3519 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3520 emit_writeword(HOST_CCREG,(int)&Count);
3521 }
3522 emit_call((int)MFC0);
3523 emit_readword((int)&readmem_dword,t);
3524#else
3525 emit_readword((int)&reg_cop0+copr*4,t);
3526#endif
3527 }
3528 }
3529 else if(opcode2[i]==4) // MTC0
3530 {
3531 signed char s=get_reg(i_regs->regmap,rs1[i]);
3532 char copr=(source[i]>>11)&0x1f;
3533 assert(s>=0);
3534 emit_writeword(s,(int)&readmem_dword);
3535 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
3536#ifdef MUPEN64
3537 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3538 emit_movimm((source[i]>>11)&0x1f,1);
3539 emit_writeword(0,(int)&PC);
3540 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3541#endif
3542 if(copr==9||copr==11||copr==12||copr==13) {
3543 emit_readword((int)&last_count,ECX);
3544 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3545 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3546 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3547 emit_writeword(HOST_CCREG,(int)&Count);
3548 }
3549 // What a mess. The status register (12) can enable interrupts,
3550 // so needs a special case to handle a pending interrupt.
3551 // The interrupt must be taken immediately, because a subsequent
3552 // instruction might disable interrupts again.
3553 if(copr==12||copr==13) {
3554#ifdef PCSX
3555 if (is_delayslot) {
3556 // burn cycles to cause cc_interrupt, which will
3557 // reschedule next_interupt. Relies on CCREG from above.
3558 assem_debug("MTC0 DS %d\n", copr);
3559 emit_writeword(HOST_CCREG,(int)&last_count);
3560 emit_movimm(0,HOST_CCREG);
3561 emit_storereg(CCREG,HOST_CCREG);
3562 emit_movimm(copr,0);
3563 emit_call((int)pcsx_mtc0_ds);
3564 return;
3565 }
3566#endif
3567 emit_movimm(start+i*4+4,0);
3568 emit_movimm(0,1);
3569 emit_writeword(0,(int)&pcaddr);
3570 emit_writeword(1,(int)&pending_exception);
3571 }
3572 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3573 //else
3574#ifdef PCSX
3575 emit_movimm(copr,0);
3576 emit_call((int)pcsx_mtc0);
3577#else
3578 emit_call((int)MTC0);
3579#endif
3580 if(copr==9||copr==11||copr==12||copr==13) {
3581 emit_readword((int)&Count,HOST_CCREG);
3582 emit_readword((int)&next_interupt,ECX);
3583 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3584 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3585 emit_writeword(ECX,(int)&last_count);
3586 emit_storereg(CCREG,HOST_CCREG);
3587 }
3588 if(copr==12||copr==13) {
3589 assert(!is_delayslot);
3590 emit_readword((int)&pending_exception,14);
3591 }
3592 emit_loadreg(rs1[i],s);
3593 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3594 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
3595 if(copr==12||copr==13) {
3596 emit_test(14,14);
3597 emit_jne((int)&do_interrupt);
3598 }
3599 cop1_usable=0;
3600 }
3601 else
3602 {
3603 assert(opcode2[i]==0x10);
3604#ifndef DISABLE_TLB
3605 if((source[i]&0x3f)==0x01) // TLBR
3606 emit_call((int)TLBR);
3607 if((source[i]&0x3f)==0x02) // TLBWI
3608 emit_call((int)TLBWI_new);
3609 if((source[i]&0x3f)==0x06) { // TLBWR
3610 // The TLB entry written by TLBWR is dependent on the count,
3611 // so update the cycle count
3612 emit_readword((int)&last_count,ECX);
3613 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3614 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3615 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3616 emit_writeword(HOST_CCREG,(int)&Count);
3617 emit_call((int)TLBWR_new);
3618 }
3619 if((source[i]&0x3f)==0x08) // TLBP
3620 emit_call((int)TLBP);
3621#endif
3622#ifdef PCSX
3623 if((source[i]&0x3f)==0x10) // RFE
3624 {
3625 emit_readword((int)&Status,0);
3626 emit_andimm(0,0x3c,1);
3627 emit_andimm(0,~0xf,0);
3628 emit_orrshr_imm(1,2,0);
3629 emit_writeword(0,(int)&Status);
3630 }
3631#else
3632 if((source[i]&0x3f)==0x18) // ERET
3633 {
3634 int count=ccadj[i];
3635 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3636 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
3637 emit_jmp((int)jump_eret);
3638 }
3639#endif
3640 }
3641}
3642
3643static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3644{
3645 switch (copr) {
3646 case 1:
3647 case 3:
3648 case 5:
3649 case 8:
3650 case 9:
3651 case 10:
3652 case 11:
3653 emit_readword((int)&reg_cop2d[copr],tl);
3654 emit_signextend16(tl,tl);
3655 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3656 break;
3657 case 7:
3658 case 16:
3659 case 17:
3660 case 18:
3661 case 19:
3662 emit_readword((int)&reg_cop2d[copr],tl);
3663 emit_andimm(tl,0xffff,tl);
3664 emit_writeword(tl,(int)&reg_cop2d[copr]);
3665 break;
3666 case 15:
3667 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3668 emit_writeword(tl,(int)&reg_cop2d[copr]);
3669 break;
3670 case 28:
3671 case 29:
3672 emit_readword((int)&reg_cop2d[9],temp);
3673 emit_testimm(temp,0x8000); // do we need this?
3674 emit_andimm(temp,0xf80,temp);
3675 emit_andne_imm(temp,0,temp);
3676 emit_shrimm(temp,7,tl);
3677 emit_readword((int)&reg_cop2d[10],temp);
3678 emit_testimm(temp,0x8000);
3679 emit_andimm(temp,0xf80,temp);
3680 emit_andne_imm(temp,0,temp);
3681 emit_orrshr_imm(temp,2,tl);
3682 emit_readword((int)&reg_cop2d[11],temp);
3683 emit_testimm(temp,0x8000);
3684 emit_andimm(temp,0xf80,temp);
3685 emit_andne_imm(temp,0,temp);
3686 emit_orrshl_imm(temp,3,tl);
3687 emit_writeword(tl,(int)&reg_cop2d[copr]);
3688 break;
3689 default:
3690 emit_readword((int)&reg_cop2d[copr],tl);
3691 break;
3692 }
3693}
3694
3695static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3696{
3697 switch (copr) {
3698 case 15:
3699 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3700 emit_writeword(sl,(int)&reg_cop2d[copr]);
3701 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3702 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3703 emit_writeword(sl,(int)&reg_cop2d[14]);
3704 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3705 break;
3706 case 28:
3707 emit_andimm(sl,0x001f,temp);
3708 emit_shlimm(temp,7,temp);
3709 emit_writeword(temp,(int)&reg_cop2d[9]);
3710 emit_andimm(sl,0x03e0,temp);
3711 emit_shlimm(temp,2,temp);
3712 emit_writeword(temp,(int)&reg_cop2d[10]);
3713 emit_andimm(sl,0x7c00,temp);
3714 emit_shrimm(temp,3,temp);
3715 emit_writeword(temp,(int)&reg_cop2d[11]);
3716 emit_writeword(sl,(int)&reg_cop2d[28]);
3717 break;
3718 case 30:
3719 emit_movs(sl,temp);
3720 emit_mvnmi(temp,temp);
3721 emit_clz(temp,temp);
3722 emit_writeword(sl,(int)&reg_cop2d[30]);
3723 emit_writeword(temp,(int)&reg_cop2d[31]);
3724 break;
3725 case 31:
3726 break;
3727 default:
3728 emit_writeword(sl,(int)&reg_cop2d[copr]);
3729 break;
3730 }
3731}
3732
3733void cop2_assemble(int i,struct regstat *i_regs)
3734{
3735 u_int copr=(source[i]>>11)&0x1f;
3736 signed char temp=get_reg(i_regs->regmap,-1);
3737 if (opcode2[i]==0) { // MFC2
3738 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3739 if(tl>=0&&rt1[i]!=0)
3740 cop2_get_dreg(copr,tl,temp);
3741 }
3742 else if (opcode2[i]==4) { // MTC2
3743 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3744 cop2_put_dreg(copr,sl,temp);
3745 }
3746 else if (opcode2[i]==2) // CFC2
3747 {
3748 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3749 if(tl>=0&&rt1[i]!=0)
3750 emit_readword((int)&reg_cop2c[copr],tl);
3751 }
3752 else if (opcode2[i]==6) // CTC2
3753 {
3754 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3755 switch(copr) {
3756 case 4:
3757 case 12:
3758 case 20:
3759 case 26:
3760 case 27:
3761 case 29:
3762 case 30:
3763 emit_signextend16(sl,temp);
3764 break;
3765 case 31:
3766 //value = value & 0x7ffff000;
3767 //if (value & 0x7f87e000) value |= 0x80000000;
3768 emit_shrimm(sl,12,temp);
3769 emit_shlimm(temp,12,temp);
3770 emit_testimm(temp,0x7f000000);
3771 emit_testeqimm(temp,0x00870000);
3772 emit_testeqimm(temp,0x0000e000);
3773 emit_orrne_imm(temp,0x80000000,temp);
3774 break;
3775 default:
3776 temp=sl;
3777 break;
3778 }
3779 emit_writeword(temp,(int)&reg_cop2c[copr]);
3780 assert(sl>=0);
3781 }
3782}
3783
3784void c2op_assemble(int i,struct regstat *i_regs)
3785{
3786 signed char temp=get_reg(i_regs->regmap,-1);
3787 u_int c2op=source[i]&0x3f;
3788 u_int hr,reglist=0;
3789 for(hr=0;hr<HOST_REGS;hr++) {
3790 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3791 }
3792 if(i==0||itype[i-1]!=C2OP)
3793 save_regs(reglist);
3794
3795 if (gte_handlers[c2op]!=NULL) {
3796 int cc=get_reg(i_regs->regmap,CCREG);
3797 emit_movimm(source[i],1); // opcode
3798 if (cc>=0&&gte_cycletab[c2op])
3799 emit_addimm(cc,gte_cycletab[c2op]/2,cc); // XXX: could just adjust ccadj?
3800 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
3801 emit_writeword(1,(int)&psxRegs.code);
3802 emit_call((int)gte_handlers[c2op]);
3803 }
3804
3805 if(i>=slen-1||itype[i+1]!=C2OP)
3806 restore_regs(reglist);
3807}
3808
3809void cop1_unusable(int i,struct regstat *i_regs)
3810{
3811 // XXX: should just just do the exception instead
3812 if(!cop1_usable) {
3813 int jaddr=(int)out;
3814 emit_jmp(0);
3815 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3816 cop1_usable=1;
3817 }
3818}
3819
3820void cop1_assemble(int i,struct regstat *i_regs)
3821{
3822#ifndef DISABLE_COP1
3823 // Check cop1 unusable
3824 if(!cop1_usable) {
3825 signed char rs=get_reg(i_regs->regmap,CSREG);
3826 assert(rs>=0);
3827 emit_testimm(rs,0x20000000);
3828 int jaddr=(int)out;
3829 emit_jeq(0);
3830 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3831 cop1_usable=1;
3832 }
3833 if (opcode2[i]==0) { // MFC1
3834 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3835 if(tl>=0) {
3836 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
3837 emit_readword_indexed(0,tl,tl);
3838 }
3839 }
3840 else if (opcode2[i]==1) { // DMFC1
3841 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3842 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
3843 if(tl>=0) {
3844 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
3845 if(th>=0) emit_readword_indexed(4,tl,th);
3846 emit_readword_indexed(0,tl,tl);
3847 }
3848 }
3849 else if (opcode2[i]==4) { // MTC1
3850 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3851 signed char temp=get_reg(i_regs->regmap,-1);
3852 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3853 emit_writeword_indexed(sl,0,temp);
3854 }
3855 else if (opcode2[i]==5) { // DMTC1
3856 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3857 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
3858 signed char temp=get_reg(i_regs->regmap,-1);
3859 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3860 emit_writeword_indexed(sh,4,temp);
3861 emit_writeword_indexed(sl,0,temp);
3862 }
3863 else if (opcode2[i]==2) // CFC1
3864 {
3865 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3866 if(tl>=0) {
3867 u_int copr=(source[i]>>11)&0x1f;
3868 if(copr==0) emit_readword((int)&FCR0,tl);
3869 if(copr==31) emit_readword((int)&FCR31,tl);
3870 }
3871 }
3872 else if (opcode2[i]==6) // CTC1
3873 {
3874 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3875 u_int copr=(source[i]>>11)&0x1f;
3876 assert(sl>=0);
3877 if(copr==31)
3878 {
3879 emit_writeword(sl,(int)&FCR31);
3880 // Set the rounding mode
3881 //FIXME
3882 //char temp=get_reg(i_regs->regmap,-1);
3883 //emit_andimm(sl,3,temp);
3884 //emit_fldcw_indexed((int)&rounding_modes,temp);
3885 }
3886 }
3887#else
3888 cop1_unusable(i, i_regs);
3889#endif
3890}
3891
3892void fconv_assemble_arm(int i,struct regstat *i_regs)
3893{
3894#ifndef DISABLE_COP1
3895 signed char temp=get_reg(i_regs->regmap,-1);
3896 assert(temp>=0);
3897 // Check cop1 unusable
3898 if(!cop1_usable) {
3899 signed char rs=get_reg(i_regs->regmap,CSREG);
3900 assert(rs>=0);
3901 emit_testimm(rs,0x20000000);
3902 int jaddr=(int)out;
3903 emit_jeq(0);
3904 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3905 cop1_usable=1;
3906 }
3907
3908 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
3909 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
3910 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3911 emit_flds(temp,15);
3912 emit_ftosizs(15,15); // float->int, truncate
3913 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3914 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3915 emit_fsts(15,temp);
3916 return;
3917 }
3918 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
3919 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3920 emit_vldr(temp,7);
3921 emit_ftosizd(7,13); // double->int, truncate
3922 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3923 emit_fsts(13,temp);
3924 return;
3925 }
3926
3927 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
3928 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3929 emit_flds(temp,13);
3930 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3931 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3932 emit_fsitos(13,15);
3933 emit_fsts(15,temp);
3934 return;
3935 }
3936 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
3937 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3938 emit_flds(temp,13);
3939 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3940 emit_fsitod(13,7);
3941 emit_vstr(7,temp);
3942 return;
3943 }
3944
3945 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
3946 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3947 emit_flds(temp,13);
3948 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3949 emit_fcvtds(13,7);
3950 emit_vstr(7,temp);
3951 return;
3952 }
3953 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
3954 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3955 emit_vldr(temp,7);
3956 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3957 emit_fcvtsd(7,13);
3958 emit_fsts(13,temp);
3959 return;
3960 }
3961 #endif
3962
3963 // C emulation code
3964
3965 u_int hr,reglist=0;
3966 for(hr=0;hr<HOST_REGS;hr++) {
3967 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3968 }
3969 save_regs(reglist);
3970
3971 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
3972 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3973 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3974 emit_call((int)cvt_s_w);
3975 }
3976 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
3977 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3978 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3979 emit_call((int)cvt_d_w);
3980 }
3981 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
3982 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3983 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3984 emit_call((int)cvt_s_l);
3985 }
3986 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
3987 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3988 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3989 emit_call((int)cvt_d_l);
3990 }
3991
3992 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
3993 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3994 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3995 emit_call((int)cvt_d_s);
3996 }
3997 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
3998 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3999 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4000 emit_call((int)cvt_w_s);
4001 }
4002 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
4003 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4004 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4005 emit_call((int)cvt_l_s);
4006 }
4007
4008 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
4009 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4010 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4011 emit_call((int)cvt_s_d);
4012 }
4013 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
4014 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4015 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4016 emit_call((int)cvt_w_d);
4017 }
4018 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
4019 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4020 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4021 emit_call((int)cvt_l_d);
4022 }
4023
4024 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
4025 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4026 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4027 emit_call((int)round_l_s);
4028 }
4029 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
4030 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4031 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4032 emit_call((int)trunc_l_s);
4033 }
4034 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
4035 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4036 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4037 emit_call((int)ceil_l_s);
4038 }
4039 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
4040 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4041 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4042 emit_call((int)floor_l_s);
4043 }
4044 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
4045 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4046 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4047 emit_call((int)round_w_s);
4048 }
4049 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
4050 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4051 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4052 emit_call((int)trunc_w_s);
4053 }
4054 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
4055 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4056 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4057 emit_call((int)ceil_w_s);
4058 }
4059 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
4060 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4061 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4062 emit_call((int)floor_w_s);
4063 }
4064
4065 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
4066 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4067 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4068 emit_call((int)round_l_d);
4069 }
4070 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
4071 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4072 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4073 emit_call((int)trunc_l_d);
4074 }
4075 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
4076 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4077 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4078 emit_call((int)ceil_l_d);
4079 }
4080 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
4081 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4082 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4083 emit_call((int)floor_l_d);
4084 }
4085 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
4086 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4087 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4088 emit_call((int)round_w_d);
4089 }
4090 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
4091 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4092 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4093 emit_call((int)trunc_w_d);
4094 }
4095 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
4096 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4097 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4098 emit_call((int)ceil_w_d);
4099 }
4100 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
4101 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4102 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4103 emit_call((int)floor_w_d);
4104 }
4105
4106 restore_regs(reglist);
4107#else
4108 cop1_unusable(i, i_regs);
4109#endif
4110}
4111#define fconv_assemble fconv_assemble_arm
4112
4113void fcomp_assemble(int i,struct regstat *i_regs)
4114{
4115#ifndef DISABLE_COP1
4116 signed char fs=get_reg(i_regs->regmap,FSREG);
4117 signed char temp=get_reg(i_regs->regmap,-1);
4118 assert(temp>=0);
4119 // Check cop1 unusable
4120 if(!cop1_usable) {
4121 signed char cs=get_reg(i_regs->regmap,CSREG);
4122 assert(cs>=0);
4123 emit_testimm(cs,0x20000000);
4124 int jaddr=(int)out;
4125 emit_jeq(0);
4126 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4127 cop1_usable=1;
4128 }
4129
4130 if((source[i]&0x3f)==0x30) {
4131 emit_andimm(fs,~0x800000,fs);
4132 return;
4133 }
4134
4135 if((source[i]&0x3e)==0x38) {
4136 // sf/ngle - these should throw exceptions for NaNs
4137 emit_andimm(fs,~0x800000,fs);
4138 return;
4139 }
4140
4141 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4142 if(opcode2[i]==0x10) {
4143 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4144 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4145 emit_orimm(fs,0x800000,fs);
4146 emit_flds(temp,14);
4147 emit_flds(HOST_TEMPREG,15);
4148 emit_fcmps(14,15);
4149 emit_fmstat();
4150 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
4151 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
4152 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
4153 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
4154 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
4155 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
4156 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
4157 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
4158 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
4159 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
4160 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
4161 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
4162 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
4163 return;
4164 }
4165 if(opcode2[i]==0x11) {
4166 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4167 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4168 emit_orimm(fs,0x800000,fs);
4169 emit_vldr(temp,6);
4170 emit_vldr(HOST_TEMPREG,7);
4171 emit_fcmpd(6,7);
4172 emit_fmstat();
4173 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
4174 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
4175 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
4176 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
4177 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
4178 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
4179 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
4180 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
4181 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
4182 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
4183 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
4184 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
4185 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
4186 return;
4187 }
4188 #endif
4189
4190 // C only
4191
4192 u_int hr,reglist=0;
4193 for(hr=0;hr<HOST_REGS;hr++) {
4194 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4195 }
4196 reglist&=~(1<<fs);
4197 save_regs(reglist);
4198 if(opcode2[i]==0x10) {
4199 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4200 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4201 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
4202 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
4203 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
4204 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
4205 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
4206 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
4207 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
4208 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
4209 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
4210 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
4211 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
4212 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
4213 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
4214 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
4215 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
4216 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
4217 }
4218 if(opcode2[i]==0x11) {
4219 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4220 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4221 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
4222 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
4223 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
4224 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
4225 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
4226 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
4227 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
4228 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
4229 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
4230 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
4231 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
4232 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
4233 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
4234 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
4235 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
4236 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
4237 }
4238 restore_regs(reglist);
4239 emit_loadreg(FSREG,fs);
4240#else
4241 cop1_unusable(i, i_regs);
4242#endif
4243}
4244
4245void float_assemble(int i,struct regstat *i_regs)
4246{
4247#ifndef DISABLE_COP1
4248 signed char temp=get_reg(i_regs->regmap,-1);
4249 assert(temp>=0);
4250 // Check cop1 unusable
4251 if(!cop1_usable) {
4252 signed char cs=get_reg(i_regs->regmap,CSREG);
4253 assert(cs>=0);
4254 emit_testimm(cs,0x20000000);
4255 int jaddr=(int)out;
4256 emit_jeq(0);
4257 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4258 cop1_usable=1;
4259 }
4260
4261 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4262 if((source[i]&0x3f)==6) // mov
4263 {
4264 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4265 if(opcode2[i]==0x10) {
4266 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4267 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
4268 emit_readword_indexed(0,temp,temp);
4269 emit_writeword_indexed(temp,0,HOST_TEMPREG);
4270 }
4271 if(opcode2[i]==0x11) {
4272 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4273 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
4274 emit_vldr(temp,7);
4275 emit_vstr(7,HOST_TEMPREG);
4276 }
4277 }
4278 return;
4279 }
4280
4281 if((source[i]&0x3f)>3)
4282 {
4283 if(opcode2[i]==0x10) {
4284 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4285 emit_flds(temp,15);
4286 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4287 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4288 }
4289 if((source[i]&0x3f)==4) // sqrt
4290 emit_fsqrts(15,15);
4291 if((source[i]&0x3f)==5) // abs
4292 emit_fabss(15,15);
4293 if((source[i]&0x3f)==7) // neg
4294 emit_fnegs(15,15);
4295 emit_fsts(15,temp);
4296 }
4297 if(opcode2[i]==0x11) {
4298 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4299 emit_vldr(temp,7);
4300 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4301 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4302 }
4303 if((source[i]&0x3f)==4) // sqrt
4304 emit_fsqrtd(7,7);
4305 if((source[i]&0x3f)==5) // abs
4306 emit_fabsd(7,7);
4307 if((source[i]&0x3f)==7) // neg
4308 emit_fnegd(7,7);
4309 emit_vstr(7,temp);
4310 }
4311 return;
4312 }
4313 if((source[i]&0x3f)<4)
4314 {
4315 if(opcode2[i]==0x10) {
4316 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4317 }
4318 if(opcode2[i]==0x11) {
4319 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4320 }
4321 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
4322 if(opcode2[i]==0x10) {
4323 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4324 emit_flds(temp,15);
4325 emit_flds(HOST_TEMPREG,13);
4326 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4327 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4328 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4329 }
4330 }
4331 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
4332 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
4333 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
4334 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
4335 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4336 emit_fsts(15,HOST_TEMPREG);
4337 }else{
4338 emit_fsts(15,temp);
4339 }
4340 }
4341 else if(opcode2[i]==0x11) {
4342 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4343 emit_vldr(temp,7);
4344 emit_vldr(HOST_TEMPREG,6);
4345 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4346 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4347 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4348 }
4349 }
4350 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
4351 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
4352 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
4353 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
4354 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4355 emit_vstr(7,HOST_TEMPREG);
4356 }else{
4357 emit_vstr(7,temp);
4358 }
4359 }
4360 }
4361 else {
4362 if(opcode2[i]==0x10) {
4363 emit_flds(temp,15);
4364 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4365 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4366 }
4367 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
4368 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
4369 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
4370 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
4371 emit_fsts(15,temp);
4372 }
4373 else if(opcode2[i]==0x11) {
4374 emit_vldr(temp,7);
4375 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4376 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4377 }
4378 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
4379 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
4380 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
4381 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
4382 emit_vstr(7,temp);
4383 }
4384 }
4385 return;
4386 }
4387 #endif
4388
4389 u_int hr,reglist=0;
4390 for(hr=0;hr<HOST_REGS;hr++) {
4391 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4392 }
4393 if(opcode2[i]==0x10) { // Single precision
4394 save_regs(reglist);
4395 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4396 if((source[i]&0x3f)<4) {
4397 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4398 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
4399 }else{
4400 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4401 }
4402 switch(source[i]&0x3f)
4403 {
4404 case 0x00: emit_call((int)add_s);break;
4405 case 0x01: emit_call((int)sub_s);break;
4406 case 0x02: emit_call((int)mul_s);break;
4407 case 0x03: emit_call((int)div_s);break;
4408 case 0x04: emit_call((int)sqrt_s);break;
4409 case 0x05: emit_call((int)abs_s);break;
4410 case 0x06: emit_call((int)mov_s);break;
4411 case 0x07: emit_call((int)neg_s);break;
4412 }
4413 restore_regs(reglist);
4414 }
4415 if(opcode2[i]==0x11) { // Double precision
4416 save_regs(reglist);
4417 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4418 if((source[i]&0x3f)<4) {
4419 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4420 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
4421 }else{
4422 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4423 }
4424 switch(source[i]&0x3f)
4425 {
4426 case 0x00: emit_call((int)add_d);break;
4427 case 0x01: emit_call((int)sub_d);break;
4428 case 0x02: emit_call((int)mul_d);break;
4429 case 0x03: emit_call((int)div_d);break;
4430 case 0x04: emit_call((int)sqrt_d);break;
4431 case 0x05: emit_call((int)abs_d);break;
4432 case 0x06: emit_call((int)mov_d);break;
4433 case 0x07: emit_call((int)neg_d);break;
4434 }
4435 restore_regs(reglist);
4436 }
4437#else
4438 cop1_unusable(i, i_regs);
4439#endif
4440}
4441
4442void multdiv_assemble_arm(int i,struct regstat *i_regs)
4443{
4444 // case 0x18: MULT
4445 // case 0x19: MULTU
4446 // case 0x1A: DIV
4447 // case 0x1B: DIVU
4448 // case 0x1C: DMULT
4449 // case 0x1D: DMULTU
4450 // case 0x1E: DDIV
4451 // case 0x1F: DDIVU
4452 if(rs1[i]&&rs2[i])
4453 {
4454 if((opcode2[i]&4)==0) // 32-bit
4455 {
4456 if(opcode2[i]==0x18) // MULT
4457 {
4458 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4459 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4460 signed char hi=get_reg(i_regs->regmap,HIREG);
4461 signed char lo=get_reg(i_regs->regmap,LOREG);
4462 assert(m1>=0);
4463 assert(m2>=0);
4464 assert(hi>=0);
4465 assert(lo>=0);
4466 emit_smull(m1,m2,hi,lo);
4467 }
4468 if(opcode2[i]==0x19) // MULTU
4469 {
4470 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4471 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4472 signed char hi=get_reg(i_regs->regmap,HIREG);
4473 signed char lo=get_reg(i_regs->regmap,LOREG);
4474 assert(m1>=0);
4475 assert(m2>=0);
4476 assert(hi>=0);
4477 assert(lo>=0);
4478 emit_umull(m1,m2,hi,lo);
4479 }
4480 if(opcode2[i]==0x1A) // DIV
4481 {
4482 signed char d1=get_reg(i_regs->regmap,rs1[i]);
4483 signed char d2=get_reg(i_regs->regmap,rs2[i]);
4484 assert(d1>=0);
4485 assert(d2>=0);
4486 signed char quotient=get_reg(i_regs->regmap,LOREG);
4487 signed char remainder=get_reg(i_regs->regmap,HIREG);
4488 assert(quotient>=0);
4489 assert(remainder>=0);
4490 emit_movs(d1,remainder);
4491 emit_negmi(remainder,remainder);
4492 emit_movs(d2,HOST_TEMPREG);
4493 emit_jeq((int)out+52); // Division by zero
4494 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
4495 emit_clz(HOST_TEMPREG,quotient);
4496 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
4497 emit_orimm(quotient,1<<31,quotient);
4498 emit_shr(quotient,quotient,quotient);
4499 emit_cmp(remainder,HOST_TEMPREG);
4500 emit_subcs(remainder,HOST_TEMPREG,remainder);
4501 emit_adcs(quotient,quotient,quotient);
4502 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
4503 emit_jcc((int)out-16); // -4
4504 emit_teq(d1,d2);
4505 emit_negmi(quotient,quotient);
4506 emit_test(d1,d1);
4507 emit_negmi(remainder,remainder);
4508 }
4509 if(opcode2[i]==0x1B) // DIVU
4510 {
4511 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
4512 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
4513 assert(d1>=0);
4514 assert(d2>=0);
4515 signed char quotient=get_reg(i_regs->regmap,LOREG);
4516 signed char remainder=get_reg(i_regs->regmap,HIREG);
4517 assert(quotient>=0);
4518 assert(remainder>=0);
4519 emit_test(d2,d2);
4520 emit_jeq((int)out+44); // Division by zero
4521 emit_clz(d2,HOST_TEMPREG);
4522 emit_movimm(1<<31,quotient);
4523 emit_shl(d2,HOST_TEMPREG,d2);
4524 emit_mov(d1,remainder);
4525 emit_shr(quotient,HOST_TEMPREG,quotient);
4526 emit_cmp(remainder,d2);
4527 emit_subcs(remainder,d2,remainder);
4528 emit_adcs(quotient,quotient,quotient);
4529 emit_shrcc_imm(d2,1,d2);
4530 emit_jcc((int)out-16); // -4
4531 }
4532 }
4533 else // 64-bit
4534 {
4535 if(opcode2[i]==0x1C) // DMULT
4536 {
4537 assert(opcode2[i]!=0x1C);
4538 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4539 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4540 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4541 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4542 assert(m1h>=0);
4543 assert(m2h>=0);
4544 assert(m1l>=0);
4545 assert(m2l>=0);
4546 emit_pushreg(m2h);
4547 emit_pushreg(m2l);
4548 emit_pushreg(m1h);
4549 emit_pushreg(m1l);
4550 emit_call((int)&mult64);
4551 emit_popreg(m1l);
4552 emit_popreg(m1h);
4553 emit_popreg(m2l);
4554 emit_popreg(m2h);
4555 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4556 signed char hil=get_reg(i_regs->regmap,HIREG);
4557 if(hih>=0) emit_loadreg(HIREG|64,hih);
4558 if(hil>=0) emit_loadreg(HIREG,hil);
4559 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4560 signed char lol=get_reg(i_regs->regmap,LOREG);
4561 if(loh>=0) emit_loadreg(LOREG|64,loh);
4562 if(lol>=0) emit_loadreg(LOREG,lol);
4563 }
4564 if(opcode2[i]==0x1D) // DMULTU
4565 {
4566 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4567 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4568 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4569 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4570 assert(m1h>=0);
4571 assert(m2h>=0);
4572 assert(m1l>=0);
4573 assert(m2l>=0);
4574 save_regs(0x100f);
4575 if(m1l!=0) emit_mov(m1l,0);
4576 if(m1h==0) emit_readword((int)&dynarec_local,1);
4577 else if(m1h>1) emit_mov(m1h,1);
4578 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
4579 else if(m2l>2) emit_mov(m2l,2);
4580 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
4581 else if(m2h>3) emit_mov(m2h,3);
4582 emit_call((int)&multu64);
4583 restore_regs(0x100f);
4584 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4585 signed char hil=get_reg(i_regs->regmap,HIREG);
4586 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4587 signed char lol=get_reg(i_regs->regmap,LOREG);
4588 /*signed char temp=get_reg(i_regs->regmap,-1);
4589 signed char rh=get_reg(i_regs->regmap,HIREG|64);
4590 signed char rl=get_reg(i_regs->regmap,HIREG);
4591 assert(m1h>=0);
4592 assert(m2h>=0);
4593 assert(m1l>=0);
4594 assert(m2l>=0);
4595 assert(temp>=0);
4596 //emit_mov(m1l,EAX);
4597 //emit_mul(m2l);
4598 emit_umull(rl,rh,m1l,m2l);
4599 emit_storereg(LOREG,rl);
4600 emit_mov(rh,temp);
4601 //emit_mov(m1h,EAX);
4602 //emit_mul(m2l);
4603 emit_umull(rl,rh,m1h,m2l);
4604 emit_adds(rl,temp,temp);
4605 emit_adcimm(rh,0,rh);
4606 emit_storereg(HIREG,rh);
4607 //emit_mov(m2h,EAX);
4608 //emit_mul(m1l);
4609 emit_umull(rl,rh,m1l,m2h);
4610 emit_adds(rl,temp,temp);
4611 emit_adcimm(rh,0,rh);
4612 emit_storereg(LOREG|64,temp);
4613 emit_mov(rh,temp);
4614 //emit_mov(m2h,EAX);
4615 //emit_mul(m1h);
4616 emit_umull(rl,rh,m1h,m2h);
4617 emit_adds(rl,temp,rl);
4618 emit_loadreg(HIREG,temp);
4619 emit_adcimm(rh,0,rh);
4620 emit_adds(rl,temp,rl);
4621 emit_adcimm(rh,0,rh);
4622 // DEBUG
4623 /*
4624 emit_pushreg(m2h);
4625 emit_pushreg(m2l);
4626 emit_pushreg(m1h);
4627 emit_pushreg(m1l);
4628 emit_call((int)&multu64);
4629 emit_popreg(m1l);
4630 emit_popreg(m1h);
4631 emit_popreg(m2l);
4632 emit_popreg(m2h);
4633 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4634 signed char hil=get_reg(i_regs->regmap,HIREG);
4635 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
4636 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
4637 */
4638 // Shouldn't be necessary
4639 //char loh=get_reg(i_regs->regmap,LOREG|64);
4640 //char lol=get_reg(i_regs->regmap,LOREG);
4641 //if(loh>=0) emit_loadreg(LOREG|64,loh);
4642 //if(lol>=0) emit_loadreg(LOREG,lol);
4643 }
4644 if(opcode2[i]==0x1E) // DDIV
4645 {
4646 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4647 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4648 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4649 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4650 assert(d1h>=0);
4651 assert(d2h>=0);
4652 assert(d1l>=0);
4653 assert(d2l>=0);
4654 save_regs(0x100f);
4655 if(d1l!=0) emit_mov(d1l,0);
4656 if(d1h==0) emit_readword((int)&dynarec_local,1);
4657 else if(d1h>1) emit_mov(d1h,1);
4658 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4659 else if(d2l>2) emit_mov(d2l,2);
4660 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4661 else if(d2h>3) emit_mov(d2h,3);
4662 emit_call((int)&div64);
4663 restore_regs(0x100f);
4664 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4665 signed char hil=get_reg(i_regs->regmap,HIREG);
4666 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4667 signed char lol=get_reg(i_regs->regmap,LOREG);
4668 if(hih>=0) emit_loadreg(HIREG|64,hih);
4669 if(hil>=0) emit_loadreg(HIREG,hil);
4670 if(loh>=0) emit_loadreg(LOREG|64,loh);
4671 if(lol>=0) emit_loadreg(LOREG,lol);
4672 }
4673 if(opcode2[i]==0x1F) // DDIVU
4674 {
4675 //u_int hr,reglist=0;
4676 //for(hr=0;hr<HOST_REGS;hr++) {
4677 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
4678 //}
4679 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4680 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4681 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4682 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4683 assert(d1h>=0);
4684 assert(d2h>=0);
4685 assert(d1l>=0);
4686 assert(d2l>=0);
4687 save_regs(0x100f);
4688 if(d1l!=0) emit_mov(d1l,0);
4689 if(d1h==0) emit_readword((int)&dynarec_local,1);
4690 else if(d1h>1) emit_mov(d1h,1);
4691 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4692 else if(d2l>2) emit_mov(d2l,2);
4693 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4694 else if(d2h>3) emit_mov(d2h,3);
4695 emit_call((int)&divu64);
4696 restore_regs(0x100f);
4697 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4698 signed char hil=get_reg(i_regs->regmap,HIREG);
4699 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4700 signed char lol=get_reg(i_regs->regmap,LOREG);
4701 if(hih>=0) emit_loadreg(HIREG|64,hih);
4702 if(hil>=0) emit_loadreg(HIREG,hil);
4703 if(loh>=0) emit_loadreg(LOREG|64,loh);
4704 if(lol>=0) emit_loadreg(LOREG,lol);
4705 }
4706 }
4707 }
4708 else
4709 {
4710 // Multiply by zero is zero.
4711 // MIPS does not have a divide by zero exception.
4712 // The result is undefined, we return zero.
4713 signed char hr=get_reg(i_regs->regmap,HIREG);
4714 signed char lr=get_reg(i_regs->regmap,LOREG);
4715 if(hr>=0) emit_zeroreg(hr);
4716 if(lr>=0) emit_zeroreg(lr);
4717 }
4718}
4719#define multdiv_assemble multdiv_assemble_arm
4720
4721void do_preload_rhash(int r) {
4722 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
4723 // register. On ARM the hash can be done with a single instruction (below)
4724}
4725
4726void do_preload_rhtbl(int ht) {
4727 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
4728}
4729
4730void do_rhash(int rs,int rh) {
4731 emit_andimm(rs,0xf8,rh);
4732}
4733
4734void do_miniht_load(int ht,int rh) {
4735 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
4736 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
4737}
4738
4739void do_miniht_jump(int rs,int rh,int ht) {
4740 emit_cmp(rh,rs);
4741 emit_ldreq_indexed(ht,4,15);
4742 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4743 emit_mov(rs,7);
4744 emit_jmp(jump_vaddr_reg[7]);
4745 #else
4746 emit_jmp(jump_vaddr_reg[rs]);
4747 #endif
4748}
4749
4750void do_miniht_insert(u_int return_address,int rt,int temp) {
4751 #ifdef ARMv5_ONLY
4752 emit_movimm(return_address,rt); // PC into link register
4753 add_to_linker((int)out,return_address,1);
4754 emit_pcreladdr(temp);
4755 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4756 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4757 #else
4758 emit_movw(return_address&0x0000FFFF,rt);
4759 add_to_linker((int)out,return_address,1);
4760 emit_pcreladdr(temp);
4761 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4762 emit_movt(return_address&0xFFFF0000,rt);
4763 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4764 #endif
4765}
4766
4767// Sign-extend to 64 bits and write out upper half of a register
4768// This is useful where we have a 32-bit value in a register, and want to
4769// keep it in a 32-bit register, but can't guarantee that it won't be read
4770// as a 64-bit value later.
4771void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
4772{
4773#ifndef FORCE32
4774 if(is32_pre==is32) return;
4775 int hr,reg;
4776 for(hr=0;hr<HOST_REGS;hr++) {
4777 if(hr!=EXCLUDE_REG) {
4778 //if(pre[hr]==entry[hr]) {
4779 if((reg=pre[hr])>=0) {
4780 if((dirty>>hr)&1) {
4781 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
4782 emit_sarimm(hr,31,HOST_TEMPREG);
4783 emit_storereg(reg|64,HOST_TEMPREG);
4784 }
4785 }
4786 }
4787 //}
4788 }
4789 }
4790#endif
4791}
4792
4793void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
4794{
4795 //if(dirty_pre==dirty) return;
4796 int hr,reg,new_hr;
4797 for(hr=0;hr<HOST_REGS;hr++) {
4798 if(hr!=EXCLUDE_REG) {
4799 reg=pre[hr];
4800 if(((~u)>>(reg&63))&1) {
4801 if(reg==entry[hr]||(reg>0&&entry[hr]<0)) {
4802 if(((dirty_pre&~dirty)>>hr)&1) {
4803 if(reg>0&&reg<34) {
4804 emit_storereg(reg,hr);
4805 if( ((is32_pre&~uu)>>reg)&1 ) {
4806 emit_sarimm(hr,31,HOST_TEMPREG);
4807 emit_storereg(reg|64,HOST_TEMPREG);
4808 }
4809 }
4810 else if(reg>=64) {
4811 emit_storereg(reg,hr);
4812 }
4813 }
4814 }
4815 else // Check if register moved to a different register
4816 if((new_hr=get_reg(entry,reg))>=0) {
4817 if((dirty_pre>>hr)&(~dirty>>new_hr)&1) {
4818 if(reg>0&&reg<34) {
4819 emit_storereg(reg,hr);
4820 if( ((is32_pre&~uu)>>reg)&1 ) {
4821 emit_sarimm(hr,31,HOST_TEMPREG);
4822 emit_storereg(reg|64,HOST_TEMPREG);
4823 }
4824 }
4825 else if(reg>=64) {
4826 emit_storereg(reg,hr);
4827 }
4828 }
4829 }
4830 }
4831 }
4832 }
4833}
4834
4835
4836/* using strd could possibly help but you'd have to allocate registers in pairs
4837void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
4838{
4839 int hr;
4840 int wrote=-1;
4841 for(hr=HOST_REGS-1;hr>=0;hr--) {
4842 if(hr!=EXCLUDE_REG) {
4843 if(pre[hr]!=entry[hr]) {
4844 if(pre[hr]>=0) {
4845 if((dirty>>hr)&1) {
4846 if(get_reg(entry,pre[hr])<0) {
4847 if(pre[hr]<64) {
4848 if(!((u>>pre[hr])&1)) {
4849 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4850 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4851 emit_sarimm(hr,31,hr+1);
4852 emit_strdreg(pre[hr],hr);
4853 }
4854 else
4855 emit_storereg(pre[hr],hr);
4856 }else{
4857 emit_storereg(pre[hr],hr);
4858 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4859 emit_sarimm(hr,31,hr);
4860 emit_storereg(pre[hr]|64,hr);
4861 }
4862 }
4863 }
4864 }else{
4865 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4866 emit_storereg(pre[hr],hr);
4867 }
4868 }
4869 wrote=hr;
4870 }
4871 }
4872 }
4873 }
4874 }
4875 }
4876 for(hr=0;hr<HOST_REGS;hr++) {
4877 if(hr!=EXCLUDE_REG) {
4878 if(pre[hr]!=entry[hr]) {
4879 if(pre[hr]>=0) {
4880 int nr;
4881 if((nr=get_reg(entry,pre[hr]))>=0) {
4882 emit_mov(hr,nr);
4883 }
4884 }
4885 }
4886 }
4887 }
4888}
4889#define wb_invalidate wb_invalidate_arm
4890*/
4891
4892// Clearing the cache is rather slow on ARM Linux, so mark the areas
4893// that need to be cleared, and then only clear these areas once.
4894void do_clear_cache()
4895{
4896 int i,j;
4897 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
4898 {
4899 u_int bitmap=needs_clear_cache[i];
4900 if(bitmap) {
4901 u_int start,end;
4902 for(j=0;j<32;j++)
4903 {
4904 if(bitmap&(1<<j)) {
4905 start=BASE_ADDR+i*131072+j*4096;
4906 end=start+4095;
4907 j++;
4908 while(j<32) {
4909 if(bitmap&(1<<j)) {
4910 end+=4096;
4911 j++;
4912 }else{
4913 __clear_cache((void *)start,(void *)end);
4914 break;
4915 }
4916 }
4917 }
4918 }
4919 needs_clear_cache[i]=0;
4920 }
4921 }
4922}
4923
4924// CPU-architecture-specific initialization
4925void arch_init() {
4926#ifndef DISABLE_COP1
4927 rounding_modes[0]=0x0<<22; // round
4928 rounding_modes[1]=0x3<<22; // trunc
4929 rounding_modes[2]=0x1<<22; // ceil
4930 rounding_modes[3]=0x2<<22; // floor
4931#endif
4932}
4933
4934// vim:shiftwidth=2:expandtab