drc: get rid of pass 7/provisional_r32 too
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_arm.c *
3 * Copyright (C) 2009-2010 Ari64 *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21extern int cycle_count;
22extern int last_count;
23extern int pcaddr;
24extern int pending_exception;
25extern int branch_target;
26extern uint64_t readmem_dword;
27#ifdef MUPEN64
28extern precomp_instr fake_pc;
29#endif
30extern void *dynarec_local;
31extern u_int memory_map[1048576];
32extern u_int mini_ht[32][2];
33extern u_int rounding_modes[4];
34
35void indirect_jump_indexed();
36void indirect_jump();
37void do_interrupt();
38void jump_vaddr_r0();
39void jump_vaddr_r1();
40void jump_vaddr_r2();
41void jump_vaddr_r3();
42void jump_vaddr_r4();
43void jump_vaddr_r5();
44void jump_vaddr_r6();
45void jump_vaddr_r7();
46void jump_vaddr_r8();
47void jump_vaddr_r9();
48void jump_vaddr_r10();
49void jump_vaddr_r12();
50
51const u_int jump_vaddr_reg[16] = {
52 (int)jump_vaddr_r0,
53 (int)jump_vaddr_r1,
54 (int)jump_vaddr_r2,
55 (int)jump_vaddr_r3,
56 (int)jump_vaddr_r4,
57 (int)jump_vaddr_r5,
58 (int)jump_vaddr_r6,
59 (int)jump_vaddr_r7,
60 (int)jump_vaddr_r8,
61 (int)jump_vaddr_r9,
62 (int)jump_vaddr_r10,
63 0,
64 (int)jump_vaddr_r12,
65 0,
66 0,
67 0};
68
69#include "fpu.h"
70
71/* Linker */
72
73void set_jump_target(int addr,u_int target)
74{
75 u_char *ptr=(u_char *)addr;
76 u_int *ptr2=(u_int *)ptr;
77 if(ptr[3]==0xe2) {
78 assert((target-(u_int)ptr2-8)<1024);
79 assert((addr&3)==0);
80 assert((target&3)==0);
81 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
82 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
83 }
84 else if(ptr[3]==0x72) {
85 // generated by emit_jno_unlikely
86 if((target-(u_int)ptr2-8)<1024) {
87 assert((addr&3)==0);
88 assert((target&3)==0);
89 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
90 }
91 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
92 assert((addr&3)==0);
93 assert((target&3)==0);
94 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
95 }
96 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
97 }
98 else {
99 assert((ptr[3]&0x0e)==0xa);
100 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
101 }
102}
103
104// This optionally copies the instruction from the target of the branch into
105// the space before the branch. Works, but the difference in speed is
106// usually insignificant.
107void set_jump_target_fillslot(int addr,u_int target,int copy)
108{
109 u_char *ptr=(u_char *)addr;
110 u_int *ptr2=(u_int *)ptr;
111 assert(!copy||ptr2[-1]==0xe28dd000);
112 if(ptr[3]==0xe2) {
113 assert(!copy);
114 assert((target-(u_int)ptr2-8)<4096);
115 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
116 }
117 else {
118 assert((ptr[3]&0x0e)==0xa);
119 u_int target_insn=*(u_int *)target;
120 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
121 copy=0;
122 }
123 if((target_insn&0x0c100000)==0x04100000) { // Load
124 copy=0;
125 }
126 if(target_insn&0x08000000) {
127 copy=0;
128 }
129 if(copy) {
130 ptr2[-1]=target_insn;
131 target+=4;
132 }
133 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
134 }
135}
136
137/* Literal pool */
138add_literal(int addr,int val)
139{
140 literals[literalcount][0]=addr;
141 literals[literalcount][1]=val;
142 literalcount++;
143}
144
145void *kill_pointer(void *stub)
146{
147 int *ptr=(int *)(stub+4);
148 assert((*ptr&0x0ff00000)==0x05900000);
149 u_int offset=*ptr&0xfff;
150 int **l_ptr=(void *)ptr+offset+8;
151 int *i_ptr=*l_ptr;
152 set_jump_target((int)i_ptr,(int)stub);
153 return i_ptr;
154}
155
156int get_pointer(void *stub)
157{
158 //printf("get_pointer(%x)\n",(int)stub);
159 int *ptr=(int *)(stub+4);
160 assert((*ptr&0x0ff00000)==0x05900000);
161 u_int offset=*ptr&0xfff;
162 int **l_ptr=(void *)ptr+offset+8;
163 int *i_ptr=*l_ptr;
164 assert((*i_ptr&0x0f000000)==0x0a000000);
165 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
166}
167
168// Find the "clean" entry point from a "dirty" entry point
169// by skipping past the call to verify_code
170u_int get_clean_addr(int addr)
171{
172 int *ptr=(int *)addr;
173 #ifdef ARMv5_ONLY
174 ptr+=4;
175 #else
176 ptr+=6;
177 #endif
178 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
179 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
180 ptr++;
181 if((*ptr&0xFF000000)==0xea000000) {
182 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
183 }
184 return (u_int)ptr;
185}
186
187int verify_dirty(int addr)
188{
189 u_int *ptr=(u_int *)addr;
190 #ifdef ARMv5_ONLY
191 // get from literal pool
192 assert((*ptr&0xFFF00000)==0xe5900000);
193 u_int offset=*ptr&0xfff;
194 u_int *l_ptr=(void *)ptr+offset+8;
195 u_int source=l_ptr[0];
196 u_int copy=l_ptr[1];
197 u_int len=l_ptr[2];
198 ptr+=4;
199 #else
200 // ARMv7 movw/movt
201 assert((*ptr&0xFFF00000)==0xe3000000);
202 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
203 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
204 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
205 ptr+=6;
206 #endif
207 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
208 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
209 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
210 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
211 unsigned int page=source>>12;
212 unsigned int map_value=memory_map[page];
213 if(map_value>=0x80000000) return 0;
214 while(page<((source+len-1)>>12)) {
215 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
216 }
217 source = source+(map_value<<2);
218 }
219 //printf("verify_dirty: %x %x %x\n",source,copy,len);
220 return !memcmp((void *)source,(void *)copy,len);
221}
222
223// This doesn't necessarily find all clean entry points, just
224// guarantees that it's not dirty
225int isclean(int addr)
226{
227 #ifdef ARMv5_ONLY
228 int *ptr=((u_int *)addr)+4;
229 #else
230 int *ptr=((u_int *)addr)+6;
231 #endif
232 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
233 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
234 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
235 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
236 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
237 return 1;
238}
239
240void get_bounds(int addr,u_int *start,u_int *end)
241{
242 u_int *ptr=(u_int *)addr;
243 #ifdef ARMv5_ONLY
244 // get from literal pool
245 assert((*ptr&0xFFF00000)==0xe5900000);
246 u_int offset=*ptr&0xfff;
247 u_int *l_ptr=(void *)ptr+offset+8;
248 u_int source=l_ptr[0];
249 //u_int copy=l_ptr[1];
250 u_int len=l_ptr[2];
251 ptr+=4;
252 #else
253 // ARMv7 movw/movt
254 assert((*ptr&0xFFF00000)==0xe3000000);
255 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
256 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
257 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
258 ptr+=6;
259 #endif
260 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
261 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
262 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
263 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
264 if(memory_map[source>>12]>=0x80000000) source = 0;
265 else source = source+(memory_map[source>>12]<<2);
266 }
267 *start=source;
268 *end=source+len;
269}
270
271/* Register allocation */
272
273// Note: registers are allocated clean (unmodified state)
274// if you intend to modify the register, you must call dirty_reg().
275void alloc_reg(struct regstat *cur,int i,signed char reg)
276{
277 int r,hr;
278 int preferred_reg = (reg&7);
279 if(reg==CCREG) preferred_reg=HOST_CCREG;
280 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
281
282 // Don't allocate unused registers
283 if((cur->u>>reg)&1) return;
284
285 // see if it's already allocated
286 for(hr=0;hr<HOST_REGS;hr++)
287 {
288 if(cur->regmap[hr]==reg) return;
289 }
290
291 // Keep the same mapping if the register was already allocated in a loop
292 preferred_reg = loop_reg(i,reg,preferred_reg);
293
294 // Try to allocate the preferred register
295 if(cur->regmap[preferred_reg]==-1) {
296 cur->regmap[preferred_reg]=reg;
297 cur->dirty&=~(1<<preferred_reg);
298 cur->isconst&=~(1<<preferred_reg);
299 return;
300 }
301 r=cur->regmap[preferred_reg];
302 if(r<64&&((cur->u>>r)&1)) {
303 cur->regmap[preferred_reg]=reg;
304 cur->dirty&=~(1<<preferred_reg);
305 cur->isconst&=~(1<<preferred_reg);
306 return;
307 }
308 if(r>=64&&((cur->uu>>(r&63))&1)) {
309 cur->regmap[preferred_reg]=reg;
310 cur->dirty&=~(1<<preferred_reg);
311 cur->isconst&=~(1<<preferred_reg);
312 return;
313 }
314
315 // Clear any unneeded registers
316 // We try to keep the mapping consistent, if possible, because it
317 // makes branches easier (especially loops). So we try to allocate
318 // first (see above) before removing old mappings. If this is not
319 // possible then go ahead and clear out the registers that are no
320 // longer needed.
321 for(hr=0;hr<HOST_REGS;hr++)
322 {
323 r=cur->regmap[hr];
324 if(r>=0) {
325 if(r<64) {
326 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
327 }
328 else
329 {
330 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
331 }
332 }
333 }
334 // Try to allocate any available register, but prefer
335 // registers that have not been used recently.
336 if(i>0) {
337 for(hr=0;hr<HOST_REGS;hr++) {
338 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
339 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
340 cur->regmap[hr]=reg;
341 cur->dirty&=~(1<<hr);
342 cur->isconst&=~(1<<hr);
343 return;
344 }
345 }
346 }
347 }
348 // Try to allocate any available register
349 for(hr=0;hr<HOST_REGS;hr++) {
350 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
351 cur->regmap[hr]=reg;
352 cur->dirty&=~(1<<hr);
353 cur->isconst&=~(1<<hr);
354 return;
355 }
356 }
357
358 // Ok, now we have to evict someone
359 // Pick a register we hopefully won't need soon
360 u_char hsn[MAXREG+1];
361 memset(hsn,10,sizeof(hsn));
362 int j;
363 lsn(hsn,i,&preferred_reg);
364 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
365 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
366 if(i>0) {
367 // Don't evict the cycle count at entry points, otherwise the entry
368 // stub will have to write it.
369 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
370 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
371 for(j=10;j>=3;j--)
372 {
373 // Alloc preferred register if available
374 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
375 for(hr=0;hr<HOST_REGS;hr++) {
376 // Evict both parts of a 64-bit register
377 if((cur->regmap[hr]&63)==r) {
378 cur->regmap[hr]=-1;
379 cur->dirty&=~(1<<hr);
380 cur->isconst&=~(1<<hr);
381 }
382 }
383 cur->regmap[preferred_reg]=reg;
384 return;
385 }
386 for(r=1;r<=MAXREG;r++)
387 {
388 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
389 for(hr=0;hr<HOST_REGS;hr++) {
390 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
391 if(cur->regmap[hr]==r+64) {
392 cur->regmap[hr]=reg;
393 cur->dirty&=~(1<<hr);
394 cur->isconst&=~(1<<hr);
395 return;
396 }
397 }
398 }
399 for(hr=0;hr<HOST_REGS;hr++) {
400 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
401 if(cur->regmap[hr]==r) {
402 cur->regmap[hr]=reg;
403 cur->dirty&=~(1<<hr);
404 cur->isconst&=~(1<<hr);
405 return;
406 }
407 }
408 }
409 }
410 }
411 }
412 }
413 for(j=10;j>=0;j--)
414 {
415 for(r=1;r<=MAXREG;r++)
416 {
417 if(hsn[r]==j) {
418 for(hr=0;hr<HOST_REGS;hr++) {
419 if(cur->regmap[hr]==r+64) {
420 cur->regmap[hr]=reg;
421 cur->dirty&=~(1<<hr);
422 cur->isconst&=~(1<<hr);
423 return;
424 }
425 }
426 for(hr=0;hr<HOST_REGS;hr++) {
427 if(cur->regmap[hr]==r) {
428 cur->regmap[hr]=reg;
429 cur->dirty&=~(1<<hr);
430 cur->isconst&=~(1<<hr);
431 return;
432 }
433 }
434 }
435 }
436 }
437 printf("This shouldn't happen (alloc_reg)");exit(1);
438}
439
440void alloc_reg64(struct regstat *cur,int i,signed char reg)
441{
442 int preferred_reg = 8+(reg&1);
443 int r,hr;
444
445 // allocate the lower 32 bits
446 alloc_reg(cur,i,reg);
447
448 // Don't allocate unused registers
449 if((cur->uu>>reg)&1) return;
450
451 // see if the upper half is already allocated
452 for(hr=0;hr<HOST_REGS;hr++)
453 {
454 if(cur->regmap[hr]==reg+64) return;
455 }
456
457 // Keep the same mapping if the register was already allocated in a loop
458 preferred_reg = loop_reg(i,reg,preferred_reg);
459
460 // Try to allocate the preferred register
461 if(cur->regmap[preferred_reg]==-1) {
462 cur->regmap[preferred_reg]=reg|64;
463 cur->dirty&=~(1<<preferred_reg);
464 cur->isconst&=~(1<<preferred_reg);
465 return;
466 }
467 r=cur->regmap[preferred_reg];
468 if(r<64&&((cur->u>>r)&1)) {
469 cur->regmap[preferred_reg]=reg|64;
470 cur->dirty&=~(1<<preferred_reg);
471 cur->isconst&=~(1<<preferred_reg);
472 return;
473 }
474 if(r>=64&&((cur->uu>>(r&63))&1)) {
475 cur->regmap[preferred_reg]=reg|64;
476 cur->dirty&=~(1<<preferred_reg);
477 cur->isconst&=~(1<<preferred_reg);
478 return;
479 }
480
481 // Clear any unneeded registers
482 // We try to keep the mapping consistent, if possible, because it
483 // makes branches easier (especially loops). So we try to allocate
484 // first (see above) before removing old mappings. If this is not
485 // possible then go ahead and clear out the registers that are no
486 // longer needed.
487 for(hr=HOST_REGS-1;hr>=0;hr--)
488 {
489 r=cur->regmap[hr];
490 if(r>=0) {
491 if(r<64) {
492 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
493 }
494 else
495 {
496 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
497 }
498 }
499 }
500 // Try to allocate any available register, but prefer
501 // registers that have not been used recently.
502 if(i>0) {
503 for(hr=0;hr<HOST_REGS;hr++) {
504 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
505 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
506 cur->regmap[hr]=reg|64;
507 cur->dirty&=~(1<<hr);
508 cur->isconst&=~(1<<hr);
509 return;
510 }
511 }
512 }
513 }
514 // Try to allocate any available register
515 for(hr=0;hr<HOST_REGS;hr++) {
516 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
517 cur->regmap[hr]=reg|64;
518 cur->dirty&=~(1<<hr);
519 cur->isconst&=~(1<<hr);
520 return;
521 }
522 }
523
524 // Ok, now we have to evict someone
525 // Pick a register we hopefully won't need soon
526 u_char hsn[MAXREG+1];
527 memset(hsn,10,sizeof(hsn));
528 int j;
529 lsn(hsn,i,&preferred_reg);
530 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
531 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
532 if(i>0) {
533 // Don't evict the cycle count at entry points, otherwise the entry
534 // stub will have to write it.
535 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
536 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
537 for(j=10;j>=3;j--)
538 {
539 // Alloc preferred register if available
540 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
541 for(hr=0;hr<HOST_REGS;hr++) {
542 // Evict both parts of a 64-bit register
543 if((cur->regmap[hr]&63)==r) {
544 cur->regmap[hr]=-1;
545 cur->dirty&=~(1<<hr);
546 cur->isconst&=~(1<<hr);
547 }
548 }
549 cur->regmap[preferred_reg]=reg|64;
550 return;
551 }
552 for(r=1;r<=MAXREG;r++)
553 {
554 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
555 for(hr=0;hr<HOST_REGS;hr++) {
556 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
557 if(cur->regmap[hr]==r+64) {
558 cur->regmap[hr]=reg|64;
559 cur->dirty&=~(1<<hr);
560 cur->isconst&=~(1<<hr);
561 return;
562 }
563 }
564 }
565 for(hr=0;hr<HOST_REGS;hr++) {
566 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
567 if(cur->regmap[hr]==r) {
568 cur->regmap[hr]=reg|64;
569 cur->dirty&=~(1<<hr);
570 cur->isconst&=~(1<<hr);
571 return;
572 }
573 }
574 }
575 }
576 }
577 }
578 }
579 for(j=10;j>=0;j--)
580 {
581 for(r=1;r<=MAXREG;r++)
582 {
583 if(hsn[r]==j) {
584 for(hr=0;hr<HOST_REGS;hr++) {
585 if(cur->regmap[hr]==r+64) {
586 cur->regmap[hr]=reg|64;
587 cur->dirty&=~(1<<hr);
588 cur->isconst&=~(1<<hr);
589 return;
590 }
591 }
592 for(hr=0;hr<HOST_REGS;hr++) {
593 if(cur->regmap[hr]==r) {
594 cur->regmap[hr]=reg|64;
595 cur->dirty&=~(1<<hr);
596 cur->isconst&=~(1<<hr);
597 return;
598 }
599 }
600 }
601 }
602 }
603 printf("This shouldn't happen");exit(1);
604}
605
606// Allocate a temporary register. This is done without regard to
607// dirty status or whether the register we request is on the unneeded list
608// Note: This will only allocate one register, even if called multiple times
609void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
610{
611 int r,hr;
612 int preferred_reg = -1;
613
614 // see if it's already allocated
615 for(hr=0;hr<HOST_REGS;hr++)
616 {
617 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
618 }
619
620 // Try to allocate any available register
621 for(hr=HOST_REGS-1;hr>=0;hr--) {
622 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
623 cur->regmap[hr]=reg;
624 cur->dirty&=~(1<<hr);
625 cur->isconst&=~(1<<hr);
626 return;
627 }
628 }
629
630 // Find an unneeded register
631 for(hr=HOST_REGS-1;hr>=0;hr--)
632 {
633 r=cur->regmap[hr];
634 if(r>=0) {
635 if(r<64) {
636 if((cur->u>>r)&1) {
637 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
638 cur->regmap[hr]=reg;
639 cur->dirty&=~(1<<hr);
640 cur->isconst&=~(1<<hr);
641 return;
642 }
643 }
644 }
645 else
646 {
647 if((cur->uu>>(r&63))&1) {
648 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
649 cur->regmap[hr]=reg;
650 cur->dirty&=~(1<<hr);
651 cur->isconst&=~(1<<hr);
652 return;
653 }
654 }
655 }
656 }
657 }
658
659 // Ok, now we have to evict someone
660 // Pick a register we hopefully won't need soon
661 // TODO: we might want to follow unconditional jumps here
662 // TODO: get rid of dupe code and make this into a function
663 u_char hsn[MAXREG+1];
664 memset(hsn,10,sizeof(hsn));
665 int j;
666 lsn(hsn,i,&preferred_reg);
667 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
668 if(i>0) {
669 // Don't evict the cycle count at entry points, otherwise the entry
670 // stub will have to write it.
671 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
672 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
673 for(j=10;j>=3;j--)
674 {
675 for(r=1;r<=MAXREG;r++)
676 {
677 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
678 for(hr=0;hr<HOST_REGS;hr++) {
679 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
680 if(cur->regmap[hr]==r+64) {
681 cur->regmap[hr]=reg;
682 cur->dirty&=~(1<<hr);
683 cur->isconst&=~(1<<hr);
684 return;
685 }
686 }
687 }
688 for(hr=0;hr<HOST_REGS;hr++) {
689 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
690 if(cur->regmap[hr]==r) {
691 cur->regmap[hr]=reg;
692 cur->dirty&=~(1<<hr);
693 cur->isconst&=~(1<<hr);
694 return;
695 }
696 }
697 }
698 }
699 }
700 }
701 }
702 for(j=10;j>=0;j--)
703 {
704 for(r=1;r<=MAXREG;r++)
705 {
706 if(hsn[r]==j) {
707 for(hr=0;hr<HOST_REGS;hr++) {
708 if(cur->regmap[hr]==r+64) {
709 cur->regmap[hr]=reg;
710 cur->dirty&=~(1<<hr);
711 cur->isconst&=~(1<<hr);
712 return;
713 }
714 }
715 for(hr=0;hr<HOST_REGS;hr++) {
716 if(cur->regmap[hr]==r) {
717 cur->regmap[hr]=reg;
718 cur->dirty&=~(1<<hr);
719 cur->isconst&=~(1<<hr);
720 return;
721 }
722 }
723 }
724 }
725 }
726 printf("This shouldn't happen");exit(1);
727}
728// Allocate a specific ARM register.
729void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
730{
731 int n;
732
733 // see if it's already allocated (and dealloc it)
734 for(n=0;n<HOST_REGS;n++)
735 {
736 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {cur->regmap[n]=-1;}
737 }
738
739 cur->regmap[hr]=reg;
740 cur->dirty&=~(1<<hr);
741 cur->isconst&=~(1<<hr);
742}
743
744// Alloc cycle count into dedicated register
745alloc_cc(struct regstat *cur,int i)
746{
747 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
748}
749
750/* Special alloc */
751
752
753/* Assembler */
754
755char regname[16][4] = {
756 "r0",
757 "r1",
758 "r2",
759 "r3",
760 "r4",
761 "r5",
762 "r6",
763 "r7",
764 "r8",
765 "r9",
766 "r10",
767 "fp",
768 "r12",
769 "sp",
770 "lr",
771 "pc"};
772
773void output_byte(u_char byte)
774{
775 *(out++)=byte;
776}
777void output_modrm(u_char mod,u_char rm,u_char ext)
778{
779 assert(mod<4);
780 assert(rm<8);
781 assert(ext<8);
782 u_char byte=(mod<<6)|(ext<<3)|rm;
783 *(out++)=byte;
784}
785void output_sib(u_char scale,u_char index,u_char base)
786{
787 assert(scale<4);
788 assert(index<8);
789 assert(base<8);
790 u_char byte=(scale<<6)|(index<<3)|base;
791 *(out++)=byte;
792}
793void output_w32(u_int word)
794{
795 *((u_int *)out)=word;
796 out+=4;
797}
798u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
799{
800 assert(rd<16);
801 assert(rn<16);
802 assert(rm<16);
803 return((rn<<16)|(rd<<12)|rm);
804}
805u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
806{
807 assert(rd<16);
808 assert(rn<16);
809 assert(imm<256);
810 assert((shift&1)==0);
811 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
812}
813u_int genimm(u_int imm,u_int *encoded)
814{
815 if(imm==0) {*encoded=0;return 1;}
816 int i=32;
817 while(i>0)
818 {
819 if(imm<256) {
820 *encoded=((i&30)<<7)|imm;
821 return 1;
822 }
823 imm=(imm>>2)|(imm<<30);i-=2;
824 }
825 return 0;
826}
827void genimm_checked(u_int imm,u_int *encoded)
828{
829 u_int ret=genimm(imm,encoded);
830 assert(ret);
831}
832u_int genjmp(u_int addr)
833{
834 int offset=addr-(int)out-8;
835 if(offset<-33554432||offset>=33554432) {
836 if (addr>2) {
837 printf("genjmp: out of range: %08x\n", offset);
838 exit(1);
839 }
840 return 0;
841 }
842 return ((u_int)offset>>2)&0xffffff;
843}
844
845void emit_mov(int rs,int rt)
846{
847 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
848 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
849}
850
851void emit_movs(int rs,int rt)
852{
853 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
854 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
855}
856
857void emit_add(int rs1,int rs2,int rt)
858{
859 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
860 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
861}
862
863void emit_adds(int rs1,int rs2,int rt)
864{
865 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
866 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
867}
868
869void emit_adcs(int rs1,int rs2,int rt)
870{
871 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
872 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
873}
874
875void emit_sbc(int rs1,int rs2,int rt)
876{
877 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
878 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
879}
880
881void emit_sbcs(int rs1,int rs2,int rt)
882{
883 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
884 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
885}
886
887void emit_neg(int rs, int rt)
888{
889 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
890 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
891}
892
893void emit_negs(int rs, int rt)
894{
895 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
896 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
897}
898
899void emit_sub(int rs1,int rs2,int rt)
900{
901 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
902 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
903}
904
905void emit_subs(int rs1,int rs2,int rt)
906{
907 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
908 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
909}
910
911void emit_zeroreg(int rt)
912{
913 assem_debug("mov %s,#0\n",regname[rt]);
914 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
915}
916
917void emit_loadreg(int r, int hr)
918{
919#ifdef FORCE32
920 if(r&64) {
921 printf("64bit load in 32bit mode!\n");
922 exit(1);
923 }
924#endif
925 if((r&63)==0)
926 emit_zeroreg(hr);
927 else {
928 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
929 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
930 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
931 if(r==CCREG) addr=(int)&cycle_count;
932 if(r==CSREG) addr=(int)&Status;
933 if(r==FSREG) addr=(int)&FCR31;
934 if(r==INVCP) addr=(int)&invc_ptr;
935 u_int offset = addr-(u_int)&dynarec_local;
936 assert(offset<4096);
937 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
938 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
939 }
940}
941void emit_storereg(int r, int hr)
942{
943#ifdef FORCE32
944 if(r&64) {
945 printf("64bit store in 32bit mode!\n");
946 exit(1);
947 }
948#endif
949 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
950 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
951 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
952 if(r==CCREG) addr=(int)&cycle_count;
953 if(r==FSREG) addr=(int)&FCR31;
954 u_int offset = addr-(u_int)&dynarec_local;
955 assert(offset<4096);
956 assem_debug("str %s,fp+%d\n",regname[hr],offset);
957 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
958}
959
960void emit_test(int rs, int rt)
961{
962 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
963 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
964}
965
966void emit_testimm(int rs,int imm)
967{
968 u_int armval;
969 assem_debug("tst %s,$%d\n",regname[rs],imm);
970 genimm_checked(imm,&armval);
971 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
972}
973
974void emit_testeqimm(int rs,int imm)
975{
976 u_int armval;
977 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
978 genimm_checked(imm,&armval);
979 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
980}
981
982void emit_not(int rs,int rt)
983{
984 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
985 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
986}
987
988void emit_mvnmi(int rs,int rt)
989{
990 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
991 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
992}
993
994void emit_and(u_int rs1,u_int rs2,u_int rt)
995{
996 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
997 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
998}
999
1000void emit_or(u_int rs1,u_int rs2,u_int rt)
1001{
1002 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1003 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1004}
1005void emit_or_and_set_flags(int rs1,int rs2,int rt)
1006{
1007 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1008 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1009}
1010
1011void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1012{
1013 assert(rs<16);
1014 assert(rt<16);
1015 assert(imm<32);
1016 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1017 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1018}
1019
1020void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1021{
1022 assert(rs<16);
1023 assert(rt<16);
1024 assert(imm<32);
1025 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1026 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1027}
1028
1029void emit_xor(u_int rs1,u_int rs2,u_int rt)
1030{
1031 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1032 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1033}
1034
1035void emit_loadlp(u_int imm,u_int rt)
1036{
1037 add_literal((int)out,imm);
1038 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
1039 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
1040}
1041void emit_movw(u_int imm,u_int rt)
1042{
1043 assert(imm<65536);
1044 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
1045 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
1046}
1047void emit_movt(u_int imm,u_int rt)
1048{
1049 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
1050 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
1051}
1052void emit_movimm(u_int imm,u_int rt)
1053{
1054 u_int armval;
1055 if(genimm(imm,&armval)) {
1056 assem_debug("mov %s,#%d\n",regname[rt],imm);
1057 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1058 }else if(genimm(~imm,&armval)) {
1059 assem_debug("mvn %s,#%d\n",regname[rt],imm);
1060 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1061 }else if(imm<65536) {
1062 #ifdef ARMv5_ONLY
1063 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1064 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1065 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1066 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1067 #else
1068 emit_movw(imm,rt);
1069 #endif
1070 }else{
1071 #ifdef ARMv5_ONLY
1072 emit_loadlp(imm,rt);
1073 #else
1074 emit_movw(imm&0x0000FFFF,rt);
1075 emit_movt(imm&0xFFFF0000,rt);
1076 #endif
1077 }
1078}
1079void emit_pcreladdr(u_int rt)
1080{
1081 assem_debug("add %s,pc,#?\n",regname[rt]);
1082 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1083}
1084
1085void emit_addimm(u_int rs,int imm,u_int rt)
1086{
1087 assert(rs<16);
1088 assert(rt<16);
1089 if(imm!=0) {
1090 assert(imm>-65536&&imm<65536);
1091 u_int armval;
1092 if(genimm(imm,&armval)) {
1093 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1094 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1095 }else if(genimm(-imm,&armval)) {
1096 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1097 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1098 }else if(imm<0) {
1099 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1100 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1101 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1102 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1103 }else{
1104 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1105 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1106 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1107 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1108 }
1109 }
1110 else if(rs!=rt) emit_mov(rs,rt);
1111}
1112
1113void emit_addimm_and_set_flags(int imm,int rt)
1114{
1115 assert(imm>-65536&&imm<65536);
1116 u_int armval;
1117 if(genimm(imm,&armval)) {
1118 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1119 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1120 }else if(genimm(-imm,&armval)) {
1121 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1122 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1123 }else if(imm<0) {
1124 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1125 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1126 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1127 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1128 }else{
1129 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1130 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1131 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1132 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1133 }
1134}
1135void emit_addimm_no_flags(u_int imm,u_int rt)
1136{
1137 emit_addimm(rt,imm,rt);
1138}
1139
1140void emit_addnop(u_int r)
1141{
1142 assert(r<16);
1143 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1144 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1145}
1146
1147void emit_adcimm(u_int rs,int imm,u_int rt)
1148{
1149 u_int armval;
1150 genimm_checked(imm,&armval);
1151 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1152 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1153}
1154/*void emit_sbcimm(int imm,u_int rt)
1155{
1156 u_int armval;
1157 genimm_checked(imm,&armval);
1158 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1159 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1160}*/
1161void emit_sbbimm(int imm,u_int rt)
1162{
1163 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1164 assert(rt<8);
1165 if(imm<128&&imm>=-128) {
1166 output_byte(0x83);
1167 output_modrm(3,rt,3);
1168 output_byte(imm);
1169 }
1170 else
1171 {
1172 output_byte(0x81);
1173 output_modrm(3,rt,3);
1174 output_w32(imm);
1175 }
1176}
1177void emit_rscimm(int rs,int imm,u_int rt)
1178{
1179 assert(0);
1180 u_int armval;
1181 genimm_checked(imm,&armval);
1182 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1183 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1184}
1185
1186void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1187{
1188 // TODO: if(genimm(imm,&armval)) ...
1189 // else
1190 emit_movimm(imm,HOST_TEMPREG);
1191 emit_adds(HOST_TEMPREG,rsl,rtl);
1192 emit_adcimm(rsh,0,rth);
1193}
1194
1195void emit_sbb(int rs1,int rs2)
1196{
1197 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1198 output_byte(0x19);
1199 output_modrm(3,rs1,rs2);
1200}
1201
1202void emit_andimm(int rs,int imm,int rt)
1203{
1204 u_int armval;
1205 if(genimm(imm,&armval)) {
1206 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1207 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1208 }else if(genimm(~imm,&armval)) {
1209 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1210 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1211 }else if(imm==65535) {
1212 #ifdef ARMv5_ONLY
1213 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1214 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1215 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1216 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1217 #else
1218 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1219 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1220 #endif
1221 }else{
1222 assert(imm>0&&imm<65535);
1223 #ifdef ARMv5_ONLY
1224 assem_debug("mov r14,#%d\n",imm&0xFF00);
1225 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1226 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1227 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1228 #else
1229 emit_movw(imm,HOST_TEMPREG);
1230 #endif
1231 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1232 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1233 }
1234}
1235
1236void emit_orimm(int rs,int imm,int rt)
1237{
1238 u_int armval;
1239 if(genimm(imm,&armval)) {
1240 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1241 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1242 }else{
1243 assert(imm>0&&imm<65536);
1244 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1245 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1246 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1247 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1248 }
1249}
1250
1251void emit_xorimm(int rs,int imm,int rt)
1252{
1253 u_int armval;
1254 if(genimm(imm,&armval)) {
1255 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1256 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1257 }else{
1258 assert(imm>0&&imm<65536);
1259 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1260 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1261 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1262 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1263 }
1264}
1265
1266void emit_shlimm(int rs,u_int imm,int rt)
1267{
1268 assert(imm>0);
1269 assert(imm<32);
1270 //if(imm==1) ...
1271 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1272 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1273}
1274
1275void emit_shrimm(int rs,u_int imm,int rt)
1276{
1277 assert(imm>0);
1278 assert(imm<32);
1279 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1280 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1281}
1282
1283void emit_sarimm(int rs,u_int imm,int rt)
1284{
1285 assert(imm>0);
1286 assert(imm<32);
1287 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1288 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1289}
1290
1291void emit_rorimm(int rs,u_int imm,int rt)
1292{
1293 assert(imm>0);
1294 assert(imm<32);
1295 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1296 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1297}
1298
1299void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1300{
1301 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1302 assert(imm>0);
1303 assert(imm<32);
1304 //if(imm==1) ...
1305 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1306 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1307 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1308 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1309}
1310
1311void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1312{
1313 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1314 assert(imm>0);
1315 assert(imm<32);
1316 //if(imm==1) ...
1317 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1318 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1319 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1320 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1321}
1322
1323void emit_signextend16(int rs,int rt)
1324{
1325 #ifdef ARMv5_ONLY
1326 emit_shlimm(rs,16,rt);
1327 emit_sarimm(rt,16,rt);
1328 #else
1329 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1330 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1331 #endif
1332}
1333
1334void emit_shl(u_int rs,u_int shift,u_int rt)
1335{
1336 assert(rs<16);
1337 assert(rt<16);
1338 assert(shift<16);
1339 //if(imm==1) ...
1340 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1341 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1342}
1343void emit_shr(u_int rs,u_int shift,u_int rt)
1344{
1345 assert(rs<16);
1346 assert(rt<16);
1347 assert(shift<16);
1348 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1349 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1350}
1351void emit_sar(u_int rs,u_int shift,u_int rt)
1352{
1353 assert(rs<16);
1354 assert(rt<16);
1355 assert(shift<16);
1356 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1357 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1358}
1359void emit_shlcl(int r)
1360{
1361 assem_debug("shl %%%s,%%cl\n",regname[r]);
1362 assert(0);
1363}
1364void emit_shrcl(int r)
1365{
1366 assem_debug("shr %%%s,%%cl\n",regname[r]);
1367 assert(0);
1368}
1369void emit_sarcl(int r)
1370{
1371 assem_debug("sar %%%s,%%cl\n",regname[r]);
1372 assert(0);
1373}
1374
1375void emit_shldcl(int r1,int r2)
1376{
1377 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1378 assert(0);
1379}
1380void emit_shrdcl(int r1,int r2)
1381{
1382 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1383 assert(0);
1384}
1385void emit_orrshl(u_int rs,u_int shift,u_int rt)
1386{
1387 assert(rs<16);
1388 assert(rt<16);
1389 assert(shift<16);
1390 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1391 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1392}
1393void emit_orrshr(u_int rs,u_int shift,u_int rt)
1394{
1395 assert(rs<16);
1396 assert(rt<16);
1397 assert(shift<16);
1398 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1399 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1400}
1401
1402void emit_cmpimm(int rs,int imm)
1403{
1404 u_int armval;
1405 if(genimm(imm,&armval)) {
1406 assem_debug("cmp %s,$%d\n",regname[rs],imm);
1407 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1408 }else if(genimm(-imm,&armval)) {
1409 assem_debug("cmn %s,$%d\n",regname[rs],imm);
1410 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1411 }else if(imm>0) {
1412 assert(imm<65536);
1413 #ifdef ARMv5_ONLY
1414 emit_movimm(imm,HOST_TEMPREG);
1415 #else
1416 emit_movw(imm,HOST_TEMPREG);
1417 #endif
1418 assem_debug("cmp %s,r14\n",regname[rs]);
1419 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1420 }else{
1421 assert(imm>-65536);
1422 #ifdef ARMv5_ONLY
1423 emit_movimm(-imm,HOST_TEMPREG);
1424 #else
1425 emit_movw(-imm,HOST_TEMPREG);
1426 #endif
1427 assem_debug("cmn %s,r14\n",regname[rs]);
1428 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1429 }
1430}
1431
1432void emit_cmovne(u_int *addr,int rt)
1433{
1434 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1435 assert(0);
1436}
1437void emit_cmovl(u_int *addr,int rt)
1438{
1439 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1440 assert(0);
1441}
1442void emit_cmovs(u_int *addr,int rt)
1443{
1444 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1445 assert(0);
1446}
1447void emit_cmovne_imm(int imm,int rt)
1448{
1449 assem_debug("movne %s,#%d\n",regname[rt],imm);
1450 u_int armval;
1451 genimm_checked(imm,&armval);
1452 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1453}
1454void emit_cmovl_imm(int imm,int rt)
1455{
1456 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1457 u_int armval;
1458 genimm_checked(imm,&armval);
1459 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1460}
1461void emit_cmovb_imm(int imm,int rt)
1462{
1463 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1464 u_int armval;
1465 genimm_checked(imm,&armval);
1466 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1467}
1468void emit_cmovs_imm(int imm,int rt)
1469{
1470 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1471 u_int armval;
1472 genimm_checked(imm,&armval);
1473 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1474}
1475void emit_cmove_reg(int rs,int rt)
1476{
1477 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1478 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1479}
1480void emit_cmovne_reg(int rs,int rt)
1481{
1482 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1483 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1484}
1485void emit_cmovl_reg(int rs,int rt)
1486{
1487 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1488 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1489}
1490void emit_cmovs_reg(int rs,int rt)
1491{
1492 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1493 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1494}
1495
1496void emit_slti32(int rs,int imm,int rt)
1497{
1498 if(rs!=rt) emit_zeroreg(rt);
1499 emit_cmpimm(rs,imm);
1500 if(rs==rt) emit_movimm(0,rt);
1501 emit_cmovl_imm(1,rt);
1502}
1503void emit_sltiu32(int rs,int imm,int rt)
1504{
1505 if(rs!=rt) emit_zeroreg(rt);
1506 emit_cmpimm(rs,imm);
1507 if(rs==rt) emit_movimm(0,rt);
1508 emit_cmovb_imm(1,rt);
1509}
1510void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1511{
1512 assert(rsh!=rt);
1513 emit_slti32(rsl,imm,rt);
1514 if(imm>=0)
1515 {
1516 emit_test(rsh,rsh);
1517 emit_cmovne_imm(0,rt);
1518 emit_cmovs_imm(1,rt);
1519 }
1520 else
1521 {
1522 emit_cmpimm(rsh,-1);
1523 emit_cmovne_imm(0,rt);
1524 emit_cmovl_imm(1,rt);
1525 }
1526}
1527void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1528{
1529 assert(rsh!=rt);
1530 emit_sltiu32(rsl,imm,rt);
1531 if(imm>=0)
1532 {
1533 emit_test(rsh,rsh);
1534 emit_cmovne_imm(0,rt);
1535 }
1536 else
1537 {
1538 emit_cmpimm(rsh,-1);
1539 emit_cmovne_imm(1,rt);
1540 }
1541}
1542
1543void emit_cmp(int rs,int rt)
1544{
1545 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1546 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1547}
1548void emit_set_gz32(int rs, int rt)
1549{
1550 //assem_debug("set_gz32\n");
1551 emit_cmpimm(rs,1);
1552 emit_movimm(1,rt);
1553 emit_cmovl_imm(0,rt);
1554}
1555void emit_set_nz32(int rs, int rt)
1556{
1557 //assem_debug("set_nz32\n");
1558 if(rs!=rt) emit_movs(rs,rt);
1559 else emit_test(rs,rs);
1560 emit_cmovne_imm(1,rt);
1561}
1562void emit_set_gz64_32(int rsh, int rsl, int rt)
1563{
1564 //assem_debug("set_gz64\n");
1565 emit_set_gz32(rsl,rt);
1566 emit_test(rsh,rsh);
1567 emit_cmovne_imm(1,rt);
1568 emit_cmovs_imm(0,rt);
1569}
1570void emit_set_nz64_32(int rsh, int rsl, int rt)
1571{
1572 //assem_debug("set_nz64\n");
1573 emit_or_and_set_flags(rsh,rsl,rt);
1574 emit_cmovne_imm(1,rt);
1575}
1576void emit_set_if_less32(int rs1, int rs2, int rt)
1577{
1578 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1579 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1580 emit_cmp(rs1,rs2);
1581 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1582 emit_cmovl_imm(1,rt);
1583}
1584void emit_set_if_carry32(int rs1, int rs2, int rt)
1585{
1586 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1587 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1588 emit_cmp(rs1,rs2);
1589 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1590 emit_cmovb_imm(1,rt);
1591}
1592void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1593{
1594 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1595 assert(u1!=rt);
1596 assert(u2!=rt);
1597 emit_cmp(l1,l2);
1598 emit_movimm(0,rt);
1599 emit_sbcs(u1,u2,HOST_TEMPREG);
1600 emit_cmovl_imm(1,rt);
1601}
1602void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1603{
1604 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1605 assert(u1!=rt);
1606 assert(u2!=rt);
1607 emit_cmp(l1,l2);
1608 emit_movimm(0,rt);
1609 emit_sbcs(u1,u2,HOST_TEMPREG);
1610 emit_cmovb_imm(1,rt);
1611}
1612
1613void emit_call(int a)
1614{
1615 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1616 u_int offset=genjmp(a);
1617 output_w32(0xeb000000|offset);
1618}
1619void emit_jmp(int a)
1620{
1621 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1622 u_int offset=genjmp(a);
1623 output_w32(0xea000000|offset);
1624}
1625void emit_jne(int a)
1626{
1627 assem_debug("bne %x\n",a);
1628 u_int offset=genjmp(a);
1629 output_w32(0x1a000000|offset);
1630}
1631void emit_jeq(int a)
1632{
1633 assem_debug("beq %x\n",a);
1634 u_int offset=genjmp(a);
1635 output_w32(0x0a000000|offset);
1636}
1637void emit_js(int a)
1638{
1639 assem_debug("bmi %x\n",a);
1640 u_int offset=genjmp(a);
1641 output_w32(0x4a000000|offset);
1642}
1643void emit_jns(int a)
1644{
1645 assem_debug("bpl %x\n",a);
1646 u_int offset=genjmp(a);
1647 output_w32(0x5a000000|offset);
1648}
1649void emit_jl(int a)
1650{
1651 assem_debug("blt %x\n",a);
1652 u_int offset=genjmp(a);
1653 output_w32(0xba000000|offset);
1654}
1655void emit_jge(int a)
1656{
1657 assem_debug("bge %x\n",a);
1658 u_int offset=genjmp(a);
1659 output_w32(0xaa000000|offset);
1660}
1661void emit_jno(int a)
1662{
1663 assem_debug("bvc %x\n",a);
1664 u_int offset=genjmp(a);
1665 output_w32(0x7a000000|offset);
1666}
1667void emit_jc(int a)
1668{
1669 assem_debug("bcs %x\n",a);
1670 u_int offset=genjmp(a);
1671 output_w32(0x2a000000|offset);
1672}
1673void emit_jcc(int a)
1674{
1675 assem_debug("bcc %x\n",a);
1676 u_int offset=genjmp(a);
1677 output_w32(0x3a000000|offset);
1678}
1679
1680void emit_pushimm(int imm)
1681{
1682 assem_debug("push $%x\n",imm);
1683 assert(0);
1684}
1685void emit_pusha()
1686{
1687 assem_debug("pusha\n");
1688 assert(0);
1689}
1690void emit_popa()
1691{
1692 assem_debug("popa\n");
1693 assert(0);
1694}
1695void emit_pushreg(u_int r)
1696{
1697 assem_debug("push %%%s\n",regname[r]);
1698 assert(0);
1699}
1700void emit_popreg(u_int r)
1701{
1702 assem_debug("pop %%%s\n",regname[r]);
1703 assert(0);
1704}
1705void emit_callreg(u_int r)
1706{
1707 assem_debug("call *%%%s\n",regname[r]);
1708 assert(0);
1709}
1710void emit_jmpreg(u_int r)
1711{
1712 assem_debug("mov pc,%s\n",regname[r]);
1713 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1714}
1715
1716void emit_readword_indexed(int offset, int rs, int rt)
1717{
1718 assert(offset>-4096&&offset<4096);
1719 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1720 if(offset>=0) {
1721 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1722 }else{
1723 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1724 }
1725}
1726void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1727{
1728 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1729 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1730}
1731void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1732{
1733 if(map<0) emit_readword_indexed(addr, rs, rt);
1734 else {
1735 assert(addr==0);
1736 emit_readword_dualindexedx4(rs, map, rt);
1737 }
1738}
1739void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1740{
1741 if(map<0) {
1742 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1743 emit_readword_indexed(addr+4, rs, rl);
1744 }else{
1745 assert(rh!=rs);
1746 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1747 emit_addimm(map,1,map);
1748 emit_readword_indexed_tlb(addr, rs, map, rl);
1749 }
1750}
1751void emit_movsbl_indexed(int offset, int rs, int rt)
1752{
1753 assert(offset>-256&&offset<256);
1754 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1755 if(offset>=0) {
1756 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1757 }else{
1758 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1759 }
1760}
1761void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1762{
1763 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1764 else {
1765 if(addr==0) {
1766 emit_shlimm(map,2,map);
1767 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1768 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1769 }else{
1770 assert(addr>-256&&addr<256);
1771 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1772 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1773 emit_movsbl_indexed(addr, rt, rt);
1774 }
1775 }
1776}
1777void emit_movswl_indexed(int offset, int rs, int rt)
1778{
1779 assert(offset>-256&&offset<256);
1780 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1781 if(offset>=0) {
1782 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1783 }else{
1784 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1785 }
1786}
1787void emit_movzbl_indexed(int offset, int rs, int rt)
1788{
1789 assert(offset>-4096&&offset<4096);
1790 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1791 if(offset>=0) {
1792 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1793 }else{
1794 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1795 }
1796}
1797void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1798{
1799 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1800 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1801}
1802void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1803{
1804 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1805 else {
1806 if(addr==0) {
1807 emit_movzbl_dualindexedx4(rs, map, rt);
1808 }else{
1809 emit_addimm(rs,addr,rt);
1810 emit_movzbl_dualindexedx4(rt, map, rt);
1811 }
1812 }
1813}
1814void emit_movzwl_indexed(int offset, int rs, int rt)
1815{
1816 assert(offset>-256&&offset<256);
1817 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1818 if(offset>=0) {
1819 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1820 }else{
1821 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1822 }
1823}
1824void emit_readword(int addr, int rt)
1825{
1826 u_int offset = addr-(u_int)&dynarec_local;
1827 assert(offset<4096);
1828 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1829 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1830}
1831void emit_movsbl(int addr, int rt)
1832{
1833 u_int offset = addr-(u_int)&dynarec_local;
1834 assert(offset<256);
1835 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1836 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1837}
1838void emit_movswl(int addr, int rt)
1839{
1840 u_int offset = addr-(u_int)&dynarec_local;
1841 assert(offset<256);
1842 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1843 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1844}
1845void emit_movzbl(int addr, int rt)
1846{
1847 u_int offset = addr-(u_int)&dynarec_local;
1848 assert(offset<4096);
1849 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1850 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1851}
1852void emit_movzwl(int addr, int rt)
1853{
1854 u_int offset = addr-(u_int)&dynarec_local;
1855 assert(offset<256);
1856 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1857 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1858}
1859void emit_movzwl_reg(int rs, int rt)
1860{
1861 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1862 assert(0);
1863}
1864
1865void emit_xchg(int rs, int rt)
1866{
1867 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1868 assert(0);
1869}
1870void emit_writeword_indexed(int rt, int offset, int rs)
1871{
1872 assert(offset>-4096&&offset<4096);
1873 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1874 if(offset>=0) {
1875 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1876 }else{
1877 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1878 }
1879}
1880void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1881{
1882 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1883 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1884}
1885void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1886{
1887 if(map<0) emit_writeword_indexed(rt, addr, rs);
1888 else {
1889 assert(addr==0);
1890 emit_writeword_dualindexedx4(rt, rs, map);
1891 }
1892}
1893void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1894{
1895 if(map<0) {
1896 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1897 emit_writeword_indexed(rl, addr+4, rs);
1898 }else{
1899 assert(rh>=0);
1900 if(temp!=rs) emit_addimm(map,1,temp);
1901 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1902 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1903 else {
1904 emit_addimm(rs,4,rs);
1905 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1906 }
1907 }
1908}
1909void emit_writehword_indexed(int rt, int offset, int rs)
1910{
1911 assert(offset>-256&&offset<256);
1912 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1913 if(offset>=0) {
1914 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1915 }else{
1916 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1917 }
1918}
1919void emit_writebyte_indexed(int rt, int offset, int rs)
1920{
1921 assert(offset>-4096&&offset<4096);
1922 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1923 if(offset>=0) {
1924 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1925 }else{
1926 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1927 }
1928}
1929void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1930{
1931 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1932 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1933}
1934void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1935{
1936 if(map<0) emit_writebyte_indexed(rt, addr, rs);
1937 else {
1938 if(addr==0) {
1939 emit_writebyte_dualindexedx4(rt, rs, map);
1940 }else{
1941 emit_addimm(rs,addr,temp);
1942 emit_writebyte_dualindexedx4(rt, temp, map);
1943 }
1944 }
1945}
1946void emit_writeword(int rt, int addr)
1947{
1948 u_int offset = addr-(u_int)&dynarec_local;
1949 assert(offset<4096);
1950 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1951 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1952}
1953void emit_writehword(int rt, int addr)
1954{
1955 u_int offset = addr-(u_int)&dynarec_local;
1956 assert(offset<256);
1957 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
1958 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1959}
1960void emit_writebyte(int rt, int addr)
1961{
1962 u_int offset = addr-(u_int)&dynarec_local;
1963 assert(offset<4096);
1964 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
1965 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
1966}
1967void emit_writeword_imm(int imm, int addr)
1968{
1969 assem_debug("movl $%x,%x\n",imm,addr);
1970 assert(0);
1971}
1972void emit_writebyte_imm(int imm, int addr)
1973{
1974 assem_debug("movb $%x,%x\n",imm,addr);
1975 assert(0);
1976}
1977
1978void emit_mul(int rs)
1979{
1980 assem_debug("mul %%%s\n",regname[rs]);
1981 assert(0);
1982}
1983void emit_imul(int rs)
1984{
1985 assem_debug("imul %%%s\n",regname[rs]);
1986 assert(0);
1987}
1988void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1989{
1990 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1991 assert(rs1<16);
1992 assert(rs2<16);
1993 assert(hi<16);
1994 assert(lo<16);
1995 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1996}
1997void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1998{
1999 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2000 assert(rs1<16);
2001 assert(rs2<16);
2002 assert(hi<16);
2003 assert(lo<16);
2004 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2005}
2006
2007void emit_div(int rs)
2008{
2009 assem_debug("div %%%s\n",regname[rs]);
2010 assert(0);
2011}
2012void emit_idiv(int rs)
2013{
2014 assem_debug("idiv %%%s\n",regname[rs]);
2015 assert(0);
2016}
2017void emit_cdq()
2018{
2019 assem_debug("cdq\n");
2020 assert(0);
2021}
2022
2023void emit_clz(int rs,int rt)
2024{
2025 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2026 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2027}
2028
2029void emit_subcs(int rs1,int rs2,int rt)
2030{
2031 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2032 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2033}
2034
2035void emit_shrcc_imm(int rs,u_int imm,int rt)
2036{
2037 assert(imm>0);
2038 assert(imm<32);
2039 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2040 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2041}
2042
2043void emit_negmi(int rs, int rt)
2044{
2045 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2046 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2047}
2048
2049void emit_negsmi(int rs, int rt)
2050{
2051 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2052 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2053}
2054
2055void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2056{
2057 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2058 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2059}
2060
2061void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2062{
2063 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2064 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2065}
2066
2067void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2068{
2069 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2070 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2071}
2072
2073void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2074{
2075 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2076 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2077}
2078
2079void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2080{
2081 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2082 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2083}
2084
2085void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2086{
2087 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2088 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2089}
2090
2091void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2092{
2093 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2094 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2095}
2096
2097void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2098{
2099 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2100 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2101}
2102
2103void emit_teq(int rs, int rt)
2104{
2105 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2106 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2107}
2108
2109void emit_rsbimm(int rs, int imm, int rt)
2110{
2111 u_int armval;
2112 genimm_checked(imm,&armval);
2113 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2114 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2115}
2116
2117// Load 2 immediates optimizing for small code size
2118void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2119{
2120 emit_movimm(imm1,rt1);
2121 u_int armval;
2122 if(genimm(imm2-imm1,&armval)) {
2123 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2124 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2125 }else if(genimm(imm1-imm2,&armval)) {
2126 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2127 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2128 }
2129 else emit_movimm(imm2,rt2);
2130}
2131
2132// Conditionally select one of two immediates, optimizing for small code size
2133// This will only be called if HAVE_CMOV_IMM is defined
2134void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2135{
2136 u_int armval;
2137 if(genimm(imm2-imm1,&armval)) {
2138 emit_movimm(imm1,rt);
2139 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2140 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2141 }else if(genimm(imm1-imm2,&armval)) {
2142 emit_movimm(imm1,rt);
2143 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2144 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2145 }
2146 else {
2147 #ifdef ARMv5_ONLY
2148 emit_movimm(imm1,rt);
2149 add_literal((int)out,imm2);
2150 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2151 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2152 #else
2153 emit_movw(imm1&0x0000FFFF,rt);
2154 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2155 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2156 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2157 }
2158 emit_movt(imm1&0xFFFF0000,rt);
2159 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2160 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2161 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2162 }
2163 #endif
2164 }
2165}
2166
2167// special case for checking invalid_code
2168void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2169{
2170 assert(0);
2171}
2172
2173// special case for checking invalid_code
2174void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2175{
2176 assert(imm<128&&imm>=0);
2177 assert(r>=0&&r<16);
2178 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2179 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2180 emit_cmpimm(HOST_TEMPREG,imm);
2181}
2182
2183// special case for tlb mapping
2184void emit_addsr12(int rs1,int rs2,int rt)
2185{
2186 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2187 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2188}
2189
2190// Used to preload hash table entries
2191void emit_prefetch(void *addr)
2192{
2193 assem_debug("prefetch %x\n",(int)addr);
2194 output_byte(0x0F);
2195 output_byte(0x18);
2196 output_modrm(0,5,1);
2197 output_w32((int)addr);
2198}
2199void emit_prefetchreg(int r)
2200{
2201 assem_debug("pld %s\n",regname[r]);
2202 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2203}
2204
2205// Special case for mini_ht
2206void emit_ldreq_indexed(int rs, u_int offset, int rt)
2207{
2208 assert(offset<4096);
2209 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2210 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2211}
2212
2213void emit_flds(int r,int sr)
2214{
2215 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2216 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2217}
2218
2219void emit_vldr(int r,int vr)
2220{
2221 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2222 output_w32(0xed900b00|(vr<<12)|(r<<16));
2223}
2224
2225void emit_fsts(int sr,int r)
2226{
2227 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2228 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2229}
2230
2231void emit_vstr(int vr,int r)
2232{
2233 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2234 output_w32(0xed800b00|(vr<<12)|(r<<16));
2235}
2236
2237void emit_ftosizs(int s,int d)
2238{
2239 assem_debug("ftosizs s%d,s%d\n",d,s);
2240 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2241}
2242
2243void emit_ftosizd(int s,int d)
2244{
2245 assem_debug("ftosizd s%d,d%d\n",d,s);
2246 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2247}
2248
2249void emit_fsitos(int s,int d)
2250{
2251 assem_debug("fsitos s%d,s%d\n",d,s);
2252 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2253}
2254
2255void emit_fsitod(int s,int d)
2256{
2257 assem_debug("fsitod d%d,s%d\n",d,s);
2258 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2259}
2260
2261void emit_fcvtds(int s,int d)
2262{
2263 assem_debug("fcvtds d%d,s%d\n",d,s);
2264 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2265}
2266
2267void emit_fcvtsd(int s,int d)
2268{
2269 assem_debug("fcvtsd s%d,d%d\n",d,s);
2270 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2271}
2272
2273void emit_fsqrts(int s,int d)
2274{
2275 assem_debug("fsqrts d%d,s%d\n",d,s);
2276 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2277}
2278
2279void emit_fsqrtd(int s,int d)
2280{
2281 assem_debug("fsqrtd s%d,d%d\n",d,s);
2282 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2283}
2284
2285void emit_fabss(int s,int d)
2286{
2287 assem_debug("fabss d%d,s%d\n",d,s);
2288 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2289}
2290
2291void emit_fabsd(int s,int d)
2292{
2293 assem_debug("fabsd s%d,d%d\n",d,s);
2294 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2295}
2296
2297void emit_fnegs(int s,int d)
2298{
2299 assem_debug("fnegs d%d,s%d\n",d,s);
2300 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2301}
2302
2303void emit_fnegd(int s,int d)
2304{
2305 assem_debug("fnegd s%d,d%d\n",d,s);
2306 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2307}
2308
2309void emit_fadds(int s1,int s2,int d)
2310{
2311 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2312 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2313}
2314
2315void emit_faddd(int s1,int s2,int d)
2316{
2317 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2318 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2319}
2320
2321void emit_fsubs(int s1,int s2,int d)
2322{
2323 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2324 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2325}
2326
2327void emit_fsubd(int s1,int s2,int d)
2328{
2329 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2330 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2331}
2332
2333void emit_fmuls(int s1,int s2,int d)
2334{
2335 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2336 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2337}
2338
2339void emit_fmuld(int s1,int s2,int d)
2340{
2341 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2342 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2343}
2344
2345void emit_fdivs(int s1,int s2,int d)
2346{
2347 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2348 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2349}
2350
2351void emit_fdivd(int s1,int s2,int d)
2352{
2353 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2354 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2355}
2356
2357void emit_fcmps(int x,int y)
2358{
2359 assem_debug("fcmps s14, s15\n");
2360 output_w32(0xeeb47a67);
2361}
2362
2363void emit_fcmpd(int x,int y)
2364{
2365 assem_debug("fcmpd d6, d7\n");
2366 output_w32(0xeeb46b47);
2367}
2368
2369void emit_fmstat()
2370{
2371 assem_debug("fmstat\n");
2372 output_w32(0xeef1fa10);
2373}
2374
2375void emit_bicne_imm(int rs,int imm,int rt)
2376{
2377 u_int armval;
2378 genimm_checked(imm,&armval);
2379 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2380 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2381}
2382
2383void emit_biccs_imm(int rs,int imm,int rt)
2384{
2385 u_int armval;
2386 genimm_checked(imm,&armval);
2387 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2388 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2389}
2390
2391void emit_bicvc_imm(int rs,int imm,int rt)
2392{
2393 u_int armval;
2394 genimm_checked(imm,&armval);
2395 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2396 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2397}
2398
2399void emit_bichi_imm(int rs,int imm,int rt)
2400{
2401 u_int armval;
2402 genimm_checked(imm,&armval);
2403 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2404 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2405}
2406
2407void emit_orrvs_imm(int rs,int imm,int rt)
2408{
2409 u_int armval;
2410 genimm_checked(imm,&armval);
2411 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2412 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2413}
2414
2415void emit_orrne_imm(int rs,int imm,int rt)
2416{
2417 u_int armval;
2418 genimm_checked(imm,&armval);
2419 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2420 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2421}
2422
2423void emit_andne_imm(int rs,int imm,int rt)
2424{
2425 u_int armval;
2426 genimm_checked(imm,&armval);
2427 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2428 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2429}
2430
2431void emit_jno_unlikely(int a)
2432{
2433 //emit_jno(a);
2434 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2435 output_w32(0x72800000|rd_rn_rm(15,15,0));
2436}
2437
2438// Save registers before function call
2439void save_regs(u_int reglist)
2440{
2441 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2442 if(!reglist) return;
2443 assem_debug("stmia fp,{");
2444 if(reglist&1) assem_debug("r0, ");
2445 if(reglist&2) assem_debug("r1, ");
2446 if(reglist&4) assem_debug("r2, ");
2447 if(reglist&8) assem_debug("r3, ");
2448 if(reglist&0x1000) assem_debug("r12");
2449 assem_debug("}\n");
2450 output_w32(0xe88b0000|reglist);
2451}
2452// Restore registers after function call
2453void restore_regs(u_int reglist)
2454{
2455 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2456 if(!reglist) return;
2457 assem_debug("ldmia fp,{");
2458 if(reglist&1) assem_debug("r0, ");
2459 if(reglist&2) assem_debug("r1, ");
2460 if(reglist&4) assem_debug("r2, ");
2461 if(reglist&8) assem_debug("r3, ");
2462 if(reglist&0x1000) assem_debug("r12");
2463 assem_debug("}\n");
2464 output_w32(0xe89b0000|reglist);
2465}
2466
2467// Write back consts using r14 so we don't disturb the other registers
2468void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2469{
2470 int hr;
2471 for(hr=0;hr<HOST_REGS;hr++) {
2472 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2473 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2474 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2475 int value=constmap[i][hr];
2476 if(value==0) {
2477 emit_zeroreg(HOST_TEMPREG);
2478 }
2479 else {
2480 emit_movimm(value,HOST_TEMPREG);
2481 }
2482 emit_storereg(i_regmap[hr],HOST_TEMPREG);
2483#ifndef FORCE32
2484 if((i_is32>>i_regmap[hr])&1) {
2485 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2486 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2487 }
2488#endif
2489 }
2490 }
2491 }
2492 }
2493}
2494
2495/* Stubs/epilogue */
2496
2497void literal_pool(int n)
2498{
2499 if(!literalcount) return;
2500 if(n) {
2501 if((int)out-literals[0][0]<4096-n) return;
2502 }
2503 u_int *ptr;
2504 int i;
2505 for(i=0;i<literalcount;i++)
2506 {
2507 ptr=(u_int *)literals[i][0];
2508 u_int offset=(u_int)out-(u_int)ptr-8;
2509 assert(offset<4096);
2510 assert(!(offset&3));
2511 *ptr|=offset;
2512 output_w32(literals[i][1]);
2513 }
2514 literalcount=0;
2515}
2516
2517void literal_pool_jumpover(int n)
2518{
2519 if(!literalcount) return;
2520 if(n) {
2521 if((int)out-literals[0][0]<4096-n) return;
2522 }
2523 int jaddr=(int)out;
2524 emit_jmp(0);
2525 literal_pool(0);
2526 set_jump_target(jaddr,(int)out);
2527}
2528
2529emit_extjump2(int addr, int target, int linker)
2530{
2531 u_char *ptr=(u_char *)addr;
2532 assert((ptr[3]&0x0e)==0xa);
2533 emit_loadlp(target,0);
2534 emit_loadlp(addr,1);
2535 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
2536 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2537//DEBUG >
2538#ifdef DEBUG_CYCLE_COUNT
2539 emit_readword((int)&last_count,ECX);
2540 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2541 emit_readword((int)&next_interupt,ECX);
2542 emit_writeword(HOST_CCREG,(int)&Count);
2543 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2544 emit_writeword(ECX,(int)&last_count);
2545#endif
2546//DEBUG <
2547 emit_jmp(linker);
2548}
2549
2550emit_extjump(int addr, int target)
2551{
2552 emit_extjump2(addr, target, (int)dyna_linker);
2553}
2554emit_extjump_ds(int addr, int target)
2555{
2556 emit_extjump2(addr, target, (int)dyna_linker_ds);
2557}
2558
2559do_readstub(int n)
2560{
2561 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2562 literal_pool(256);
2563 set_jump_target(stubs[n][1],(int)out);
2564 int type=stubs[n][0];
2565 int i=stubs[n][3];
2566 int rs=stubs[n][4];
2567 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2568 u_int reglist=stubs[n][7];
2569 signed char *i_regmap=i_regs->regmap;
2570 int addr=get_reg(i_regmap,AGEN1+(i&1));
2571 int rth,rt;
2572 int ds;
2573 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
2574 rth=get_reg(i_regmap,FTEMP|64);
2575 rt=get_reg(i_regmap,FTEMP);
2576 }else{
2577 rth=get_reg(i_regmap,rt1[i]|64);
2578 rt=get_reg(i_regmap,rt1[i]);
2579 }
2580 assert(rs>=0);
2581 if(addr<0) addr=rt;
2582 if(addr<0)
2583 // assume dummy read, no alloced reg
2584 addr=get_reg(i_regmap,-1);
2585 assert(addr>=0);
2586 int ftable=0;
2587 if(type==LOADB_STUB||type==LOADBU_STUB)
2588 ftable=(int)readmemb;
2589 if(type==LOADH_STUB||type==LOADHU_STUB)
2590 ftable=(int)readmemh;
2591 if(type==LOADW_STUB)
2592 ftable=(int)readmem;
2593#ifndef FORCE32
2594 if(type==LOADD_STUB)
2595 ftable=(int)readmemd;
2596#endif
2597 assert(ftable!=0);
2598 emit_writeword(rs,(int)&address);
2599 //emit_pusha();
2600 save_regs(reglist);
2601 ds=i_regs!=&regs[i];
2602 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2603 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2604 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2605 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2606 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2607 emit_shrimm(rs,16,1);
2608 int cc=get_reg(i_regmap,CCREG);
2609 if(cc<0) {
2610 emit_loadreg(CCREG,2);
2611 }
2612 emit_movimm(ftable,0);
2613 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2614 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2615 //emit_readword((int)&last_count,temp);
2616 //emit_add(cc,temp,cc);
2617 //emit_writeword(cc,(int)&Count);
2618 //emit_mov(15,14);
2619 emit_call((int)&indirect_jump_indexed);
2620 //emit_callreg(rs);
2621 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
2622 // We really shouldn't need to update the count here,
2623 // but not doing so causes random crashes...
2624 emit_readword((int)&Count,HOST_TEMPREG);
2625 emit_readword((int)&next_interupt,2);
2626 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2627 emit_writeword(2,(int)&last_count);
2628 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2629 if(cc<0) {
2630 emit_storereg(CCREG,HOST_TEMPREG);
2631 }
2632 //emit_popa();
2633 restore_regs(reglist);
2634 //if((cc=get_reg(regmap,CCREG))>=0) {
2635 // emit_loadreg(CCREG,cc);
2636 //}
2637 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2638 assert(rt>=0);
2639 if(type==LOADB_STUB)
2640 emit_movsbl((int)&readmem_dword,rt);
2641 if(type==LOADBU_STUB)
2642 emit_movzbl((int)&readmem_dword,rt);
2643 if(type==LOADH_STUB)
2644 emit_movswl((int)&readmem_dword,rt);
2645 if(type==LOADHU_STUB)
2646 emit_movzwl((int)&readmem_dword,rt);
2647 if(type==LOADW_STUB)
2648 emit_readword((int)&readmem_dword,rt);
2649 if(type==LOADD_STUB) {
2650 emit_readword((int)&readmem_dword,rt);
2651 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2652 }
2653 }
2654 emit_jmp(stubs[n][2]); // return address
2655}
2656
2657inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2658{
2659 int rs=get_reg(regmap,target);
2660 int rth=get_reg(regmap,target|64);
2661 int rt=get_reg(regmap,target);
2662 // allow for PCSX dummy reads
2663 //assert(rt>=0);
2664 if(rs<0)
2665 rs=get_reg(regmap,-1);
2666 assert(rs>=0);
2667 int ftable=0;
2668 if(type==LOADB_STUB||type==LOADBU_STUB)
2669 ftable=(int)readmemb;
2670 if(type==LOADH_STUB||type==LOADHU_STUB)
2671 ftable=(int)readmemh;
2672 if(type==LOADW_STUB)
2673 ftable=(int)readmem;
2674#ifndef FORCE32
2675 if(type==LOADD_STUB)
2676 ftable=(int)readmemd;
2677#endif
2678 assert(ftable!=0);
2679 if(target==0)
2680 emit_movimm(addr,rs);
2681 emit_writeword(rs,(int)&address);
2682 //emit_pusha();
2683 save_regs(reglist);
2684 //emit_shrimm(rs,16,1);
2685 int cc=get_reg(regmap,CCREG);
2686 if(cc<0) {
2687 emit_loadreg(CCREG,2);
2688 }
2689 //emit_movimm(ftable,0);
2690 emit_movimm(((u_int *)ftable)[addr>>16],0);
2691 //emit_readword((int)&last_count,12);
2692 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2693 if((signed int)addr>=(signed int)0xC0000000) {
2694 // Pagefault address
2695 int ds=regmap!=regs[i].regmap;
2696 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2697 }
2698 //emit_add(12,2,2);
2699 //emit_writeword(2,(int)&Count);
2700 //emit_call(((u_int *)ftable)[addr>>16]);
2701 emit_call((int)&indirect_jump);
2702 // We really shouldn't need to update the count here,
2703 // but not doing so causes random crashes...
2704 emit_readword((int)&Count,HOST_TEMPREG);
2705 emit_readword((int)&next_interupt,2);
2706 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2707 emit_writeword(2,(int)&last_count);
2708 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2709 if(cc<0) {
2710 emit_storereg(CCREG,HOST_TEMPREG);
2711 }
2712 //emit_popa();
2713 restore_regs(reglist);
2714 if(rt>=0) {
2715 if(type==LOADB_STUB)
2716 emit_movsbl((int)&readmem_dword,rt);
2717 if(type==LOADBU_STUB)
2718 emit_movzbl((int)&readmem_dword,rt);
2719 if(type==LOADH_STUB)
2720 emit_movswl((int)&readmem_dword,rt);
2721 if(type==LOADHU_STUB)
2722 emit_movzwl((int)&readmem_dword,rt);
2723 if(type==LOADW_STUB)
2724 emit_readword((int)&readmem_dword,rt);
2725 if(type==LOADD_STUB) {
2726 emit_readword((int)&readmem_dword,rt);
2727 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2728 }
2729 }
2730}
2731
2732do_writestub(int n)
2733{
2734 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2735 literal_pool(256);
2736 set_jump_target(stubs[n][1],(int)out);
2737 int type=stubs[n][0];
2738 int i=stubs[n][3];
2739 int rs=stubs[n][4];
2740 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2741 u_int reglist=stubs[n][7];
2742 signed char *i_regmap=i_regs->regmap;
2743 int addr=get_reg(i_regmap,AGEN1+(i&1));
2744 int rth,rt,r;
2745 int ds;
2746 if(itype[i]==C1LS||itype[i]==C2LS) {
2747 rth=get_reg(i_regmap,FTEMP|64);
2748 rt=get_reg(i_regmap,r=FTEMP);
2749 }else{
2750 rth=get_reg(i_regmap,rs2[i]|64);
2751 rt=get_reg(i_regmap,r=rs2[i]);
2752 }
2753 assert(rs>=0);
2754 assert(rt>=0);
2755 if(addr<0) addr=get_reg(i_regmap,-1);
2756 assert(addr>=0);
2757 int ftable=0;
2758 if(type==STOREB_STUB)
2759 ftable=(int)writememb;
2760 if(type==STOREH_STUB)
2761 ftable=(int)writememh;
2762 if(type==STOREW_STUB)
2763 ftable=(int)writemem;
2764#ifndef FORCE32
2765 if(type==STORED_STUB)
2766 ftable=(int)writememd;
2767#endif
2768 assert(ftable!=0);
2769 emit_writeword(rs,(int)&address);
2770 //emit_shrimm(rs,16,rs);
2771 //emit_movmem_indexedx4(ftable,rs,rs);
2772 if(type==STOREB_STUB)
2773 emit_writebyte(rt,(int)&byte);
2774 if(type==STOREH_STUB)
2775 emit_writehword(rt,(int)&hword);
2776 if(type==STOREW_STUB)
2777 emit_writeword(rt,(int)&word);
2778 if(type==STORED_STUB) {
2779#ifndef FORCE32
2780 emit_writeword(rt,(int)&dword);
2781 emit_writeword(r?rth:rt,(int)&dword+4);
2782#else
2783 printf("STORED_STUB\n");
2784#endif
2785 }
2786 //emit_pusha();
2787 save_regs(reglist);
2788 ds=i_regs!=&regs[i];
2789 int real_rs=get_reg(i_regmap,rs1[i]);
2790 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2791 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2792 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2793 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2794 emit_shrimm(rs,16,1);
2795 int cc=get_reg(i_regmap,CCREG);
2796 if(cc<0) {
2797 emit_loadreg(CCREG,2);
2798 }
2799 emit_movimm(ftable,0);
2800 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2801 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2802 //emit_readword((int)&last_count,temp);
2803 //emit_addimm(cc,2*stubs[n][5]+2,cc);
2804 //emit_add(cc,temp,cc);
2805 //emit_writeword(cc,(int)&Count);
2806 emit_call((int)&indirect_jump_indexed);
2807 //emit_callreg(rs);
2808 emit_readword((int)&Count,HOST_TEMPREG);
2809 emit_readword((int)&next_interupt,2);
2810 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2811 emit_writeword(2,(int)&last_count);
2812 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2813 if(cc<0) {
2814 emit_storereg(CCREG,HOST_TEMPREG);
2815 }
2816 //emit_popa();
2817 restore_regs(reglist);
2818 //if((cc=get_reg(regmap,CCREG))>=0) {
2819 // emit_loadreg(CCREG,cc);
2820 //}
2821 emit_jmp(stubs[n][2]); // return address
2822}
2823
2824inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2825{
2826 int rs=get_reg(regmap,-1);
2827 int rth=get_reg(regmap,target|64);
2828 int rt=get_reg(regmap,target);
2829 assert(rs>=0);
2830 assert(rt>=0);
2831 int ftable=0;
2832 if(type==STOREB_STUB)
2833 ftable=(int)writememb;
2834 if(type==STOREH_STUB)
2835 ftable=(int)writememh;
2836 if(type==STOREW_STUB)
2837 ftable=(int)writemem;
2838#ifndef FORCE32
2839 if(type==STORED_STUB)
2840 ftable=(int)writememd;
2841#endif
2842 assert(ftable!=0);
2843 emit_writeword(rs,(int)&address);
2844 //emit_shrimm(rs,16,rs);
2845 //emit_movmem_indexedx4(ftable,rs,rs);
2846 if(type==STOREB_STUB)
2847 emit_writebyte(rt,(int)&byte);
2848 if(type==STOREH_STUB)
2849 emit_writehword(rt,(int)&hword);
2850 if(type==STOREW_STUB)
2851 emit_writeword(rt,(int)&word);
2852 if(type==STORED_STUB) {
2853#ifndef FORCE32
2854 emit_writeword(rt,(int)&dword);
2855 emit_writeword(target?rth:rt,(int)&dword+4);
2856#else
2857 printf("STORED_STUB\n");
2858#endif
2859 }
2860 //emit_pusha();
2861 save_regs(reglist);
2862 //emit_shrimm(rs,16,1);
2863 int cc=get_reg(regmap,CCREG);
2864 if(cc<0) {
2865 emit_loadreg(CCREG,2);
2866 }
2867 //emit_movimm(ftable,0);
2868 emit_movimm(((u_int *)ftable)[addr>>16],0);
2869 //emit_readword((int)&last_count,12);
2870 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2871 if((signed int)addr>=(signed int)0xC0000000) {
2872 // Pagefault address
2873 int ds=regmap!=regs[i].regmap;
2874 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2875 }
2876 //emit_add(12,2,2);
2877 //emit_writeword(2,(int)&Count);
2878 //emit_call(((u_int *)ftable)[addr>>16]);
2879 emit_call((int)&indirect_jump);
2880 emit_readword((int)&Count,HOST_TEMPREG);
2881 emit_readword((int)&next_interupt,2);
2882 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2883 emit_writeword(2,(int)&last_count);
2884 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2885 if(cc<0) {
2886 emit_storereg(CCREG,HOST_TEMPREG);
2887 }
2888 //emit_popa();
2889 restore_regs(reglist);
2890}
2891
2892do_unalignedwritestub(int n)
2893{
2894 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
2895 literal_pool(256);
2896 set_jump_target(stubs[n][1],(int)out);
2897
2898 int i=stubs[n][3];
2899 struct regstat *i_regs=(struct regstat *)stubs[n][4];
2900 int addr=stubs[n][5];
2901 u_int reglist=stubs[n][7];
2902 signed char *i_regmap=i_regs->regmap;
2903 int temp2=get_reg(i_regmap,FTEMP);
2904 int rt;
2905 int ds, real_rs;
2906 rt=get_reg(i_regmap,rs2[i]);
2907 assert(rt>=0);
2908 assert(addr>=0);
2909 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2910 reglist|=(1<<addr);
2911 reglist&=~(1<<temp2);
2912
2913 emit_andimm(addr,0xfffffffc,temp2);
2914 emit_writeword(temp2,(int)&address);
2915
2916 save_regs(reglist);
2917 ds=i_regs!=&regs[i];
2918 real_rs=get_reg(i_regmap,rs1[i]);
2919 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2920 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2921 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2922 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2923 emit_shrimm(addr,16,1);
2924 int cc=get_reg(i_regmap,CCREG);
2925 if(cc<0) {
2926 emit_loadreg(CCREG,2);
2927 }
2928 emit_movimm((u_int)readmem,0);
2929 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2930 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3); // XXX: can be rm'd?
2931 emit_call((int)&indirect_jump_indexed);
2932 restore_regs(reglist);
2933
2934 emit_readword((int)&readmem_dword,temp2);
2935 int temp=addr; //hmh
2936 emit_shlimm(addr,3,temp);
2937 emit_andimm(temp,24,temp);
2938#ifdef BIG_ENDIAN_MIPS
2939 if (opcode[i]==0x2e) // SWR
2940#else
2941 if (opcode[i]==0x2a) // SWL
2942#endif
2943 emit_xorimm(temp,24,temp);
2944 emit_movimm(-1,HOST_TEMPREG);
2945 if (opcode[i]==0x2a) { // SWL
2946 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2947 emit_orrshr(rt,temp,temp2);
2948 }else{
2949 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2950 emit_orrshl(rt,temp,temp2);
2951 }
2952 emit_readword((int)&address,addr);
2953 emit_writeword(temp2,(int)&word);
2954 //save_regs(reglist); // don't need to, no state changes
2955 emit_shrimm(addr,16,1);
2956 emit_movimm((u_int)writemem,0);
2957 //emit_call((int)&indirect_jump_indexed);
2958 emit_mov(15,14);
2959 emit_readword_dualindexedx4(0,1,15);
2960 emit_readword((int)&Count,HOST_TEMPREG);
2961 emit_readword((int)&next_interupt,2);
2962 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2963 emit_writeword(2,(int)&last_count);
2964 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2965 if(cc<0) {
2966 emit_storereg(CCREG,HOST_TEMPREG);
2967 }
2968 restore_regs(reglist);
2969 emit_jmp(stubs[n][2]); // return address
2970}
2971
2972void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
2973{
2974 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
2975}
2976
2977do_invstub(int n)
2978{
2979 literal_pool(20);
2980 u_int reglist=stubs[n][3];
2981 set_jump_target(stubs[n][1],(int)out);
2982 save_regs(reglist);
2983 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2984 emit_call((int)&invalidate_addr);
2985 restore_regs(reglist);
2986 emit_jmp(stubs[n][2]); // return address
2987}
2988
2989int do_dirty_stub(int i)
2990{
2991 assem_debug("do_dirty_stub %x\n",start+i*4);
2992 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
2993 #ifdef PCSX
2994 addr=(u_int)source;
2995 #endif
2996 // Careful about the code output here, verify_dirty needs to parse it.
2997 #ifdef ARMv5_ONLY
2998 emit_loadlp(addr,1);
2999 emit_loadlp((int)copy,2);
3000 emit_loadlp(slen*4,3);
3001 #else
3002 emit_movw(addr&0x0000FFFF,1);
3003 emit_movw(((u_int)copy)&0x0000FFFF,2);
3004 emit_movt(addr&0xFFFF0000,1);
3005 emit_movt(((u_int)copy)&0xFFFF0000,2);
3006 emit_movw(slen*4,3);
3007 #endif
3008 emit_movimm(start+i*4,0);
3009 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3010 int entry=(int)out;
3011 load_regs_entry(i);
3012 if(entry==(int)out) entry=instr_addr[i];
3013 emit_jmp(instr_addr[i]);
3014 return entry;
3015}
3016
3017void do_dirty_stub_ds()
3018{
3019 // Careful about the code output here, verify_dirty needs to parse it.
3020 #ifdef ARMv5_ONLY
3021 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3022 emit_loadlp((int)copy,2);
3023 emit_loadlp(slen*4,3);
3024 #else
3025 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3026 emit_movw(((u_int)copy)&0x0000FFFF,2);
3027 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3028 emit_movt(((u_int)copy)&0xFFFF0000,2);
3029 emit_movw(slen*4,3);
3030 #endif
3031 emit_movimm(start+1,0);
3032 emit_call((int)&verify_code_ds);
3033}
3034
3035do_cop1stub(int n)
3036{
3037 literal_pool(256);
3038 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3039 set_jump_target(stubs[n][1],(int)out);
3040 int i=stubs[n][3];
3041// int rs=stubs[n][4];
3042 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3043 int ds=stubs[n][6];
3044 if(!ds) {
3045 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3046 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3047 }
3048 //else {printf("fp exception in delay slot\n");}
3049 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3050 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3051 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3052 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3053 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3054}
3055
3056/* TLB */
3057
3058int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3059{
3060 if(c) {
3061 if((signed int)addr>=(signed int)0xC0000000) {
3062 // address_generation already loaded the const
3063 emit_readword_dualindexedx4(FP,map,map);
3064 }
3065 else
3066 return -1; // No mapping
3067 }
3068 else {
3069 assert(s!=map);
3070 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3071 emit_addsr12(map,s,map);
3072 // Schedule this while we wait on the load
3073 //if(x) emit_xorimm(s,x,ar);
3074 if(shift>=0) emit_shlimm(s,3,shift);
3075 if(~a) emit_andimm(s,a,ar);
3076 emit_readword_dualindexedx4(FP,map,map);
3077 }
3078 return map;
3079}
3080int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3081{
3082 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3083 emit_test(map,map);
3084 *jaddr=(int)out;
3085 emit_js(0);
3086 }
3087 return map;
3088}
3089
3090int gen_tlb_addr_r(int ar, int map) {
3091 if(map>=0) {
3092 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3093 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3094 }
3095}
3096
3097int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3098{
3099 if(c) {
3100 if(addr<0x80800000||addr>=0xC0000000) {
3101 // address_generation already loaded the const
3102 emit_readword_dualindexedx4(FP,map,map);
3103 }
3104 else
3105 return -1; // No mapping
3106 }
3107 else {
3108 assert(s!=map);
3109 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3110 emit_addsr12(map,s,map);
3111 // Schedule this while we wait on the load
3112 //if(x) emit_xorimm(s,x,ar);
3113 emit_readword_dualindexedx4(FP,map,map);
3114 }
3115 return map;
3116}
3117int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3118{
3119 if(!c||addr<0x80800000||addr>=0xC0000000) {
3120 emit_testimm(map,0x40000000);
3121 *jaddr=(int)out;
3122 emit_jne(0);
3123 }
3124}
3125
3126int gen_tlb_addr_w(int ar, int map) {
3127 if(map>=0) {
3128 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3129 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3130 }
3131}
3132
3133// Generate the address of the memory_map entry, relative to dynarec_local
3134generate_map_const(u_int addr,int reg) {
3135 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3136 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3137}
3138
3139/* Special assem */
3140
3141void shift_assemble_arm(int i,struct regstat *i_regs)
3142{
3143 if(rt1[i]) {
3144 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3145 {
3146 signed char s,t,shift;
3147 t=get_reg(i_regs->regmap,rt1[i]);
3148 s=get_reg(i_regs->regmap,rs1[i]);
3149 shift=get_reg(i_regs->regmap,rs2[i]);
3150 if(t>=0){
3151 if(rs1[i]==0)
3152 {
3153 emit_zeroreg(t);
3154 }
3155 else if(rs2[i]==0)
3156 {
3157 assert(s>=0);
3158 if(s!=t) emit_mov(s,t);
3159 }
3160 else
3161 {
3162 emit_andimm(shift,31,HOST_TEMPREG);
3163 if(opcode2[i]==4) // SLLV
3164 {
3165 emit_shl(s,HOST_TEMPREG,t);
3166 }
3167 if(opcode2[i]==6) // SRLV
3168 {
3169 emit_shr(s,HOST_TEMPREG,t);
3170 }
3171 if(opcode2[i]==7) // SRAV
3172 {
3173 emit_sar(s,HOST_TEMPREG,t);
3174 }
3175 }
3176 }
3177 } else { // DSLLV/DSRLV/DSRAV
3178 signed char sh,sl,th,tl,shift;
3179 th=get_reg(i_regs->regmap,rt1[i]|64);
3180 tl=get_reg(i_regs->regmap,rt1[i]);
3181 sh=get_reg(i_regs->regmap,rs1[i]|64);
3182 sl=get_reg(i_regs->regmap,rs1[i]);
3183 shift=get_reg(i_regs->regmap,rs2[i]);
3184 if(tl>=0){
3185 if(rs1[i]==0)
3186 {
3187 emit_zeroreg(tl);
3188 if(th>=0) emit_zeroreg(th);
3189 }
3190 else if(rs2[i]==0)
3191 {
3192 assert(sl>=0);
3193 if(sl!=tl) emit_mov(sl,tl);
3194 if(th>=0&&sh!=th) emit_mov(sh,th);
3195 }
3196 else
3197 {
3198 // FIXME: What if shift==tl ?
3199 assert(shift!=tl);
3200 int temp=get_reg(i_regs->regmap,-1);
3201 int real_th=th;
3202 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3203 assert(sl>=0);
3204 assert(sh>=0);
3205 emit_andimm(shift,31,HOST_TEMPREG);
3206 if(opcode2[i]==0x14) // DSLLV
3207 {
3208 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3209 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3210 emit_orrshr(sl,HOST_TEMPREG,th);
3211 emit_andimm(shift,31,HOST_TEMPREG);
3212 emit_testimm(shift,32);
3213 emit_shl(sl,HOST_TEMPREG,tl);
3214 if(th>=0) emit_cmovne_reg(tl,th);
3215 emit_cmovne_imm(0,tl);
3216 }
3217 if(opcode2[i]==0x16) // DSRLV
3218 {
3219 assert(th>=0);
3220 emit_shr(sl,HOST_TEMPREG,tl);
3221 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3222 emit_orrshl(sh,HOST_TEMPREG,tl);
3223 emit_andimm(shift,31,HOST_TEMPREG);
3224 emit_testimm(shift,32);
3225 emit_shr(sh,HOST_TEMPREG,th);
3226 emit_cmovne_reg(th,tl);
3227 if(real_th>=0) emit_cmovne_imm(0,th);
3228 }
3229 if(opcode2[i]==0x17) // DSRAV
3230 {
3231 assert(th>=0);
3232 emit_shr(sl,HOST_TEMPREG,tl);
3233 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3234 if(real_th>=0) {
3235 assert(temp>=0);
3236 emit_sarimm(th,31,temp);
3237 }
3238 emit_orrshl(sh,HOST_TEMPREG,tl);
3239 emit_andimm(shift,31,HOST_TEMPREG);
3240 emit_testimm(shift,32);
3241 emit_sar(sh,HOST_TEMPREG,th);
3242 emit_cmovne_reg(th,tl);
3243 if(real_th>=0) emit_cmovne_reg(temp,th);
3244 }
3245 }
3246 }
3247 }
3248 }
3249}
3250#define shift_assemble shift_assemble_arm
3251
3252void loadlr_assemble_arm(int i,struct regstat *i_regs)
3253{
3254 int s,th,tl,temp,temp2,addr,map=-1;
3255 int offset;
3256 int jaddr=0;
3257 int memtarget,c=0;
3258 u_int hr,reglist=0;
3259 th=get_reg(i_regs->regmap,rt1[i]|64);
3260 tl=get_reg(i_regs->regmap,rt1[i]);
3261 s=get_reg(i_regs->regmap,rs1[i]);
3262 temp=get_reg(i_regs->regmap,-1);
3263 temp2=get_reg(i_regs->regmap,FTEMP);
3264 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3265 assert(addr<0);
3266 offset=imm[i];
3267 for(hr=0;hr<HOST_REGS;hr++) {
3268 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3269 }
3270 reglist|=1<<temp;
3271 if(offset||s<0||c) addr=temp2;
3272 else addr=s;
3273 if(s>=0) {
3274 c=(i_regs->wasconst>>s)&1;
3275 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3276 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3277 }
3278 if(tl>=0) {
3279 //assert(tl>=0);
3280 //assert(rt1[i]);
3281 if(!using_tlb) {
3282 if(!c) {
3283 emit_shlimm(addr,3,temp);
3284 if (opcode[i]==0x22||opcode[i]==0x26) {
3285 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3286 }else{
3287 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3288 }
3289 emit_cmpimm(addr,RAM_SIZE);
3290 jaddr=(int)out;
3291 emit_jno(0);
3292 }
3293 else {
3294 if (opcode[i]==0x22||opcode[i]==0x26) {
3295 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3296 }else{
3297 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3298 }
3299 }
3300 }else{ // using tlb
3301 int a;
3302 if(c) {
3303 a=-1;
3304 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3305 a=0xFFFFFFFC; // LWL/LWR
3306 }else{
3307 a=0xFFFFFFF8; // LDL/LDR
3308 }
3309 map=get_reg(i_regs->regmap,TLREG);
3310 assert(map>=0);
3311 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3312 if(c) {
3313 if (opcode[i]==0x22||opcode[i]==0x26) {
3314 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3315 }else{
3316 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3317 }
3318 }
3319 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3320 }
3321 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3322 if(!c||memtarget) {
3323 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3324 emit_readword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2);
3325 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3326 }
3327 else
3328 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3329 emit_andimm(temp,24,temp);
3330#ifdef BIG_ENDIAN_MIPS
3331 if (opcode[i]==0x26) // LWR
3332#else
3333 if (opcode[i]==0x22) // LWL
3334#endif
3335 emit_xorimm(temp,24,temp);
3336 emit_movimm(-1,HOST_TEMPREG);
3337 if (opcode[i]==0x26) {
3338 emit_shr(temp2,temp,temp2);
3339 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3340 }else{
3341 emit_shl(temp2,temp,temp2);
3342 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3343 }
3344 emit_or(temp2,tl,tl);
3345 //emit_storereg(rt1[i],tl); // DEBUG
3346 }
3347 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3348 // FIXME: little endian
3349 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3350 if(!c||memtarget) {
3351 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3352 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3353 emit_readdword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2h,temp2);
3354 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3355 }
3356 else
3357 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3358 emit_testimm(temp,32);
3359 emit_andimm(temp,24,temp);
3360 if (opcode[i]==0x1A) { // LDL
3361 emit_rsbimm(temp,32,HOST_TEMPREG);
3362 emit_shl(temp2h,temp,temp2h);
3363 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3364 emit_movimm(-1,HOST_TEMPREG);
3365 emit_shl(temp2,temp,temp2);
3366 emit_cmove_reg(temp2h,th);
3367 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3368 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3369 emit_orreq(temp2,tl,tl);
3370 emit_orrne(temp2,th,th);
3371 }
3372 if (opcode[i]==0x1B) { // LDR
3373 emit_xorimm(temp,24,temp);
3374 emit_rsbimm(temp,32,HOST_TEMPREG);
3375 emit_shr(temp2,temp,temp2);
3376 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3377 emit_movimm(-1,HOST_TEMPREG);
3378 emit_shr(temp2h,temp,temp2h);
3379 emit_cmovne_reg(temp2,tl);
3380 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3381 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3382 emit_orrne(temp2h,th,th);
3383 emit_orreq(temp2h,tl,tl);
3384 }
3385 }
3386 }
3387}
3388#define loadlr_assemble loadlr_assemble_arm
3389
3390void cop0_assemble(int i,struct regstat *i_regs)
3391{
3392 if(opcode2[i]==0) // MFC0
3393 {
3394 signed char t=get_reg(i_regs->regmap,rt1[i]);
3395 char copr=(source[i]>>11)&0x1f;
3396 //assert(t>=0); // Why does this happen? OOT is weird
3397 if(t>=0&&rt1[i]!=0) {
3398#ifdef MUPEN64
3399 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3400 emit_movimm((source[i]>>11)&0x1f,1);
3401 emit_writeword(0,(int)&PC);
3402 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3403 if(copr==9) {
3404 emit_readword((int)&last_count,ECX);
3405 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3406 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3407 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3408 emit_writeword(HOST_CCREG,(int)&Count);
3409 }
3410 emit_call((int)MFC0);
3411 emit_readword((int)&readmem_dword,t);
3412#else
3413 emit_readword((int)&reg_cop0+copr*4,t);
3414#endif
3415 }
3416 }
3417 else if(opcode2[i]==4) // MTC0
3418 {
3419 signed char s=get_reg(i_regs->regmap,rs1[i]);
3420 char copr=(source[i]>>11)&0x1f;
3421 assert(s>=0);
3422 emit_writeword(s,(int)&readmem_dword);
3423 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
3424#ifdef MUPEN64
3425 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3426 emit_movimm((source[i]>>11)&0x1f,1);
3427 emit_writeword(0,(int)&PC);
3428 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3429#endif
3430 if(copr==9||copr==11||copr==12||copr==13) {
3431 emit_readword((int)&last_count,ECX);
3432 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3433 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3434 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3435 emit_writeword(HOST_CCREG,(int)&Count);
3436 }
3437 // What a mess. The status register (12) can enable interrupts,
3438 // so needs a special case to handle a pending interrupt.
3439 // The interrupt must be taken immediately, because a subsequent
3440 // instruction might disable interrupts again.
3441 if(copr==12||copr==13) {
3442#ifdef PCSX
3443 if (is_delayslot) {
3444 // burn cycles to cause cc_interrupt, which will
3445 // reschedule next_interupt. Relies on CCREG from above.
3446 assem_debug("MTC0 DS %d\n", copr);
3447 emit_writeword(HOST_CCREG,(int)&last_count);
3448 emit_movimm(0,HOST_CCREG);
3449 emit_storereg(CCREG,HOST_CCREG);
3450 emit_movimm(copr,0);
3451 emit_call((int)pcsx_mtc0_ds);
3452 return;
3453 }
3454#endif
3455 emit_movimm(start+i*4+4,0);
3456 emit_movimm(0,1);
3457 emit_writeword(0,(int)&pcaddr);
3458 emit_writeword(1,(int)&pending_exception);
3459 }
3460 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3461 //else
3462#ifdef PCSX
3463 emit_movimm(copr,0);
3464 emit_call((int)pcsx_mtc0);
3465#else
3466 emit_call((int)MTC0);
3467#endif
3468 if(copr==9||copr==11||copr==12||copr==13) {
3469 emit_readword((int)&Count,HOST_CCREG);
3470 emit_readword((int)&next_interupt,ECX);
3471 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3472 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3473 emit_writeword(ECX,(int)&last_count);
3474 emit_storereg(CCREG,HOST_CCREG);
3475 }
3476 if(copr==12||copr==13) {
3477 assert(!is_delayslot);
3478 emit_readword((int)&pending_exception,14);
3479 }
3480 emit_loadreg(rs1[i],s);
3481 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3482 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
3483 if(copr==12||copr==13) {
3484 emit_test(14,14);
3485 emit_jne((int)&do_interrupt);
3486 }
3487 cop1_usable=0;
3488 }
3489 else
3490 {
3491 assert(opcode2[i]==0x10);
3492#ifndef DISABLE_TLB
3493 if((source[i]&0x3f)==0x01) // TLBR
3494 emit_call((int)TLBR);
3495 if((source[i]&0x3f)==0x02) // TLBWI
3496 emit_call((int)TLBWI_new);
3497 if((source[i]&0x3f)==0x06) { // TLBWR
3498 // The TLB entry written by TLBWR is dependent on the count,
3499 // so update the cycle count
3500 emit_readword((int)&last_count,ECX);
3501 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3502 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3503 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3504 emit_writeword(HOST_CCREG,(int)&Count);
3505 emit_call((int)TLBWR_new);
3506 }
3507 if((source[i]&0x3f)==0x08) // TLBP
3508 emit_call((int)TLBP);
3509#endif
3510#ifdef PCSX
3511 if((source[i]&0x3f)==0x10) // RFE
3512 {
3513 emit_readword((int)&Status,0);
3514 emit_andimm(0,0x3c,1);
3515 emit_andimm(0,~0xf,0);
3516 emit_orrshr_imm(1,2,0);
3517 emit_writeword(0,(int)&Status);
3518 }
3519#else
3520 if((source[i]&0x3f)==0x18) // ERET
3521 {
3522 int count=ccadj[i];
3523 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3524 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
3525 emit_jmp((int)jump_eret);
3526 }
3527#endif
3528 }
3529}
3530
3531static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3532{
3533 switch (copr) {
3534 case 1:
3535 case 3:
3536 case 5:
3537 case 8:
3538 case 9:
3539 case 10:
3540 case 11:
3541 emit_readword((int)&reg_cop2d[copr],tl);
3542 emit_signextend16(tl,tl);
3543 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3544 break;
3545 case 7:
3546 case 16:
3547 case 17:
3548 case 18:
3549 case 19:
3550 emit_readword((int)&reg_cop2d[copr],tl);
3551 emit_andimm(tl,0xffff,tl);
3552 emit_writeword(tl,(int)&reg_cop2d[copr]);
3553 break;
3554 case 15:
3555 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3556 emit_writeword(tl,(int)&reg_cop2d[copr]);
3557 break;
3558 case 28:
3559 case 29:
3560 emit_readword((int)&reg_cop2d[9],temp);
3561 emit_testimm(temp,0x8000); // do we need this?
3562 emit_andimm(temp,0xf80,temp);
3563 emit_andne_imm(temp,0,temp);
3564 emit_shrimm(temp,7,tl);
3565 emit_readword((int)&reg_cop2d[10],temp);
3566 emit_testimm(temp,0x8000);
3567 emit_andimm(temp,0xf80,temp);
3568 emit_andne_imm(temp,0,temp);
3569 emit_orrshr_imm(temp,2,tl);
3570 emit_readword((int)&reg_cop2d[11],temp);
3571 emit_testimm(temp,0x8000);
3572 emit_andimm(temp,0xf80,temp);
3573 emit_andne_imm(temp,0,temp);
3574 emit_orrshl_imm(temp,3,tl);
3575 emit_writeword(tl,(int)&reg_cop2d[copr]);
3576 break;
3577 default:
3578 emit_readword((int)&reg_cop2d[copr],tl);
3579 break;
3580 }
3581}
3582
3583static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3584{
3585 switch (copr) {
3586 case 15:
3587 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3588 emit_writeword(sl,(int)&reg_cop2d[copr]);
3589 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3590 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3591 emit_writeword(sl,(int)&reg_cop2d[14]);
3592 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3593 break;
3594 case 28:
3595 emit_andimm(sl,0x001f,temp);
3596 emit_shlimm(temp,7,temp);
3597 emit_writeword(temp,(int)&reg_cop2d[9]);
3598 emit_andimm(sl,0x03e0,temp);
3599 emit_shlimm(temp,2,temp);
3600 emit_writeword(temp,(int)&reg_cop2d[10]);
3601 emit_andimm(sl,0x7c00,temp);
3602 emit_shrimm(temp,3,temp);
3603 emit_writeword(temp,(int)&reg_cop2d[11]);
3604 emit_writeword(sl,(int)&reg_cop2d[28]);
3605 break;
3606 case 30:
3607 emit_movs(sl,temp);
3608 emit_mvnmi(temp,temp);
3609 emit_clz(temp,temp);
3610 emit_writeword(sl,(int)&reg_cop2d[30]);
3611 emit_writeword(temp,(int)&reg_cop2d[31]);
3612 break;
3613 case 31:
3614 break;
3615 default:
3616 emit_writeword(sl,(int)&reg_cop2d[copr]);
3617 break;
3618 }
3619}
3620
3621void cop2_assemble(int i,struct regstat *i_regs)
3622{
3623 u_int copr=(source[i]>>11)&0x1f;
3624 signed char temp=get_reg(i_regs->regmap,-1);
3625 if (opcode2[i]==0) { // MFC2
3626 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3627 if(tl>=0&&rt1[i]!=0)
3628 cop2_get_dreg(copr,tl,temp);
3629 }
3630 else if (opcode2[i]==4) { // MTC2
3631 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3632 cop2_put_dreg(copr,sl,temp);
3633 }
3634 else if (opcode2[i]==2) // CFC2
3635 {
3636 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3637 if(tl>=0&&rt1[i]!=0)
3638 emit_readword((int)&reg_cop2c[copr],tl);
3639 }
3640 else if (opcode2[i]==6) // CTC2
3641 {
3642 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3643 switch(copr) {
3644 case 4:
3645 case 12:
3646 case 20:
3647 case 26:
3648 case 27:
3649 case 29:
3650 case 30:
3651 emit_signextend16(sl,temp);
3652 break;
3653 case 31:
3654 //value = value & 0x7ffff000;
3655 //if (value & 0x7f87e000) value |= 0x80000000;
3656 emit_shrimm(sl,12,temp);
3657 emit_shlimm(temp,12,temp);
3658 emit_testimm(temp,0x7f000000);
3659 emit_testeqimm(temp,0x00870000);
3660 emit_testeqimm(temp,0x0000e000);
3661 emit_orrne_imm(temp,0x80000000,temp);
3662 break;
3663 default:
3664 temp=sl;
3665 break;
3666 }
3667 emit_writeword(temp,(int)&reg_cop2c[copr]);
3668 assert(sl>=0);
3669 }
3670}
3671
3672void c2op_assemble(int i,struct regstat *i_regs)
3673{
3674 signed char temp=get_reg(i_regs->regmap,-1);
3675 u_int c2op=source[i]&0x3f;
3676 u_int hr,reglist=0;
3677 for(hr=0;hr<HOST_REGS;hr++) {
3678 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3679 }
3680 if(i==0||itype[i-1]!=C2OP)
3681 save_regs(reglist);
3682
3683 if (gte_handlers[c2op]!=NULL) {
3684 int cc=get_reg(i_regs->regmap,CCREG);
3685 emit_movimm(source[i],temp); // opcode
3686 if (cc>=0&&gte_cycletab[c2op])
3687 emit_addimm(cc,gte_cycletab[c2op]/2,cc); // XXX: cound just adjust ccadj?
3688 emit_writeword(temp,(int)&psxRegs.code);
3689 emit_call((int)gte_handlers[c2op]);
3690 }
3691
3692 if(i>=slen-1||itype[i+1]!=C2OP)
3693 restore_regs(reglist);
3694}
3695
3696void cop1_unusable(int i,struct regstat *i_regs)
3697{
3698 // XXX: should just just do the exception instead
3699 if(!cop1_usable) {
3700 int jaddr=(int)out;
3701 emit_jmp(0);
3702 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3703 cop1_usable=1;
3704 }
3705}
3706
3707void cop1_assemble(int i,struct regstat *i_regs)
3708{
3709#ifndef DISABLE_COP1
3710 // Check cop1 unusable
3711 if(!cop1_usable) {
3712 signed char rs=get_reg(i_regs->regmap,CSREG);
3713 assert(rs>=0);
3714 emit_testimm(rs,0x20000000);
3715 int jaddr=(int)out;
3716 emit_jeq(0);
3717 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3718 cop1_usable=1;
3719 }
3720 if (opcode2[i]==0) { // MFC1
3721 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3722 if(tl>=0) {
3723 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
3724 emit_readword_indexed(0,tl,tl);
3725 }
3726 }
3727 else if (opcode2[i]==1) { // DMFC1
3728 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3729 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
3730 if(tl>=0) {
3731 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
3732 if(th>=0) emit_readword_indexed(4,tl,th);
3733 emit_readword_indexed(0,tl,tl);
3734 }
3735 }
3736 else if (opcode2[i]==4) { // MTC1
3737 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3738 signed char temp=get_reg(i_regs->regmap,-1);
3739 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3740 emit_writeword_indexed(sl,0,temp);
3741 }
3742 else if (opcode2[i]==5) { // DMTC1
3743 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3744 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
3745 signed char temp=get_reg(i_regs->regmap,-1);
3746 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3747 emit_writeword_indexed(sh,4,temp);
3748 emit_writeword_indexed(sl,0,temp);
3749 }
3750 else if (opcode2[i]==2) // CFC1
3751 {
3752 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3753 if(tl>=0) {
3754 u_int copr=(source[i]>>11)&0x1f;
3755 if(copr==0) emit_readword((int)&FCR0,tl);
3756 if(copr==31) emit_readword((int)&FCR31,tl);
3757 }
3758 }
3759 else if (opcode2[i]==6) // CTC1
3760 {
3761 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3762 u_int copr=(source[i]>>11)&0x1f;
3763 assert(sl>=0);
3764 if(copr==31)
3765 {
3766 emit_writeword(sl,(int)&FCR31);
3767 // Set the rounding mode
3768 //FIXME
3769 //char temp=get_reg(i_regs->regmap,-1);
3770 //emit_andimm(sl,3,temp);
3771 //emit_fldcw_indexed((int)&rounding_modes,temp);
3772 }
3773 }
3774#else
3775 cop1_unusable(i, i_regs);
3776#endif
3777}
3778
3779void fconv_assemble_arm(int i,struct regstat *i_regs)
3780{
3781#ifndef DISABLE_COP1
3782 signed char temp=get_reg(i_regs->regmap,-1);
3783 assert(temp>=0);
3784 // Check cop1 unusable
3785 if(!cop1_usable) {
3786 signed char rs=get_reg(i_regs->regmap,CSREG);
3787 assert(rs>=0);
3788 emit_testimm(rs,0x20000000);
3789 int jaddr=(int)out;
3790 emit_jeq(0);
3791 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3792 cop1_usable=1;
3793 }
3794
3795 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
3796 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
3797 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3798 emit_flds(temp,15);
3799 emit_ftosizs(15,15); // float->int, truncate
3800 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3801 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3802 emit_fsts(15,temp);
3803 return;
3804 }
3805 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
3806 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3807 emit_vldr(temp,7);
3808 emit_ftosizd(7,13); // double->int, truncate
3809 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3810 emit_fsts(13,temp);
3811 return;
3812 }
3813
3814 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
3815 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3816 emit_flds(temp,13);
3817 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3818 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3819 emit_fsitos(13,15);
3820 emit_fsts(15,temp);
3821 return;
3822 }
3823 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
3824 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3825 emit_flds(temp,13);
3826 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3827 emit_fsitod(13,7);
3828 emit_vstr(7,temp);
3829 return;
3830 }
3831
3832 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
3833 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3834 emit_flds(temp,13);
3835 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3836 emit_fcvtds(13,7);
3837 emit_vstr(7,temp);
3838 return;
3839 }
3840 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
3841 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3842 emit_vldr(temp,7);
3843 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3844 emit_fcvtsd(7,13);
3845 emit_fsts(13,temp);
3846 return;
3847 }
3848 #endif
3849
3850 // C emulation code
3851
3852 u_int hr,reglist=0;
3853 for(hr=0;hr<HOST_REGS;hr++) {
3854 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3855 }
3856 save_regs(reglist);
3857
3858 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
3859 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3860 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3861 emit_call((int)cvt_s_w);
3862 }
3863 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
3864 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3865 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3866 emit_call((int)cvt_d_w);
3867 }
3868 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
3869 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3870 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3871 emit_call((int)cvt_s_l);
3872 }
3873 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
3874 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3875 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3876 emit_call((int)cvt_d_l);
3877 }
3878
3879 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
3880 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3881 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3882 emit_call((int)cvt_d_s);
3883 }
3884 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
3885 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3886 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3887 emit_call((int)cvt_w_s);
3888 }
3889 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
3890 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3891 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3892 emit_call((int)cvt_l_s);
3893 }
3894
3895 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
3896 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3897 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3898 emit_call((int)cvt_s_d);
3899 }
3900 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
3901 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3902 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3903 emit_call((int)cvt_w_d);
3904 }
3905 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
3906 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3907 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3908 emit_call((int)cvt_l_d);
3909 }
3910
3911 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
3912 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3913 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3914 emit_call((int)round_l_s);
3915 }
3916 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
3917 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3918 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3919 emit_call((int)trunc_l_s);
3920 }
3921 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
3922 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3923 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3924 emit_call((int)ceil_l_s);
3925 }
3926 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
3927 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3928 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3929 emit_call((int)floor_l_s);
3930 }
3931 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
3932 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3933 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3934 emit_call((int)round_w_s);
3935 }
3936 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
3937 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3938 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3939 emit_call((int)trunc_w_s);
3940 }
3941 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
3942 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3943 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3944 emit_call((int)ceil_w_s);
3945 }
3946 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
3947 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3948 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3949 emit_call((int)floor_w_s);
3950 }
3951
3952 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
3953 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3954 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3955 emit_call((int)round_l_d);
3956 }
3957 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
3958 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3959 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3960 emit_call((int)trunc_l_d);
3961 }
3962 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
3963 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3964 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3965 emit_call((int)ceil_l_d);
3966 }
3967 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
3968 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3969 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3970 emit_call((int)floor_l_d);
3971 }
3972 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
3973 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3974 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3975 emit_call((int)round_w_d);
3976 }
3977 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
3978 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3979 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3980 emit_call((int)trunc_w_d);
3981 }
3982 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
3983 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3984 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3985 emit_call((int)ceil_w_d);
3986 }
3987 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
3988 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3989 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3990 emit_call((int)floor_w_d);
3991 }
3992
3993 restore_regs(reglist);
3994#else
3995 cop1_unusable(i, i_regs);
3996#endif
3997}
3998#define fconv_assemble fconv_assemble_arm
3999
4000void fcomp_assemble(int i,struct regstat *i_regs)
4001{
4002#ifndef DISABLE_COP1
4003 signed char fs=get_reg(i_regs->regmap,FSREG);
4004 signed char temp=get_reg(i_regs->regmap,-1);
4005 assert(temp>=0);
4006 // Check cop1 unusable
4007 if(!cop1_usable) {
4008 signed char cs=get_reg(i_regs->regmap,CSREG);
4009 assert(cs>=0);
4010 emit_testimm(cs,0x20000000);
4011 int jaddr=(int)out;
4012 emit_jeq(0);
4013 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4014 cop1_usable=1;
4015 }
4016
4017 if((source[i]&0x3f)==0x30) {
4018 emit_andimm(fs,~0x800000,fs);
4019 return;
4020 }
4021
4022 if((source[i]&0x3e)==0x38) {
4023 // sf/ngle - these should throw exceptions for NaNs
4024 emit_andimm(fs,~0x800000,fs);
4025 return;
4026 }
4027
4028 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4029 if(opcode2[i]==0x10) {
4030 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4031 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4032 emit_orimm(fs,0x800000,fs);
4033 emit_flds(temp,14);
4034 emit_flds(HOST_TEMPREG,15);
4035 emit_fcmps(14,15);
4036 emit_fmstat();
4037 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
4038 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
4039 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
4040 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
4041 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
4042 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
4043 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
4044 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
4045 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
4046 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
4047 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
4048 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
4049 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
4050 return;
4051 }
4052 if(opcode2[i]==0x11) {
4053 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4054 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4055 emit_orimm(fs,0x800000,fs);
4056 emit_vldr(temp,6);
4057 emit_vldr(HOST_TEMPREG,7);
4058 emit_fcmpd(6,7);
4059 emit_fmstat();
4060 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
4061 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
4062 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
4063 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
4064 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
4065 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
4066 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
4067 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
4068 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
4069 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
4070 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
4071 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
4072 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
4073 return;
4074 }
4075 #endif
4076
4077 // C only
4078
4079 u_int hr,reglist=0;
4080 for(hr=0;hr<HOST_REGS;hr++) {
4081 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4082 }
4083 reglist&=~(1<<fs);
4084 save_regs(reglist);
4085 if(opcode2[i]==0x10) {
4086 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4087 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4088 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
4089 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
4090 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
4091 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
4092 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
4093 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
4094 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
4095 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
4096 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
4097 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
4098 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
4099 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
4100 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
4101 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
4102 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
4103 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
4104 }
4105 if(opcode2[i]==0x11) {
4106 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4107 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4108 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
4109 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
4110 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
4111 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
4112 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
4113 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
4114 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
4115 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
4116 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
4117 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
4118 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
4119 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
4120 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
4121 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
4122 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
4123 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
4124 }
4125 restore_regs(reglist);
4126 emit_loadreg(FSREG,fs);
4127#else
4128 cop1_unusable(i, i_regs);
4129#endif
4130}
4131
4132void float_assemble(int i,struct regstat *i_regs)
4133{
4134#ifndef DISABLE_COP1
4135 signed char temp=get_reg(i_regs->regmap,-1);
4136 assert(temp>=0);
4137 // Check cop1 unusable
4138 if(!cop1_usable) {
4139 signed char cs=get_reg(i_regs->regmap,CSREG);
4140 assert(cs>=0);
4141 emit_testimm(cs,0x20000000);
4142 int jaddr=(int)out;
4143 emit_jeq(0);
4144 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4145 cop1_usable=1;
4146 }
4147
4148 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4149 if((source[i]&0x3f)==6) // mov
4150 {
4151 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4152 if(opcode2[i]==0x10) {
4153 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4154 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
4155 emit_readword_indexed(0,temp,temp);
4156 emit_writeword_indexed(temp,0,HOST_TEMPREG);
4157 }
4158 if(opcode2[i]==0x11) {
4159 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4160 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
4161 emit_vldr(temp,7);
4162 emit_vstr(7,HOST_TEMPREG);
4163 }
4164 }
4165 return;
4166 }
4167
4168 if((source[i]&0x3f)>3)
4169 {
4170 if(opcode2[i]==0x10) {
4171 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4172 emit_flds(temp,15);
4173 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4174 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4175 }
4176 if((source[i]&0x3f)==4) // sqrt
4177 emit_fsqrts(15,15);
4178 if((source[i]&0x3f)==5) // abs
4179 emit_fabss(15,15);
4180 if((source[i]&0x3f)==7) // neg
4181 emit_fnegs(15,15);
4182 emit_fsts(15,temp);
4183 }
4184 if(opcode2[i]==0x11) {
4185 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4186 emit_vldr(temp,7);
4187 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4188 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4189 }
4190 if((source[i]&0x3f)==4) // sqrt
4191 emit_fsqrtd(7,7);
4192 if((source[i]&0x3f)==5) // abs
4193 emit_fabsd(7,7);
4194 if((source[i]&0x3f)==7) // neg
4195 emit_fnegd(7,7);
4196 emit_vstr(7,temp);
4197 }
4198 return;
4199 }
4200 if((source[i]&0x3f)<4)
4201 {
4202 if(opcode2[i]==0x10) {
4203 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4204 }
4205 if(opcode2[i]==0x11) {
4206 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4207 }
4208 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
4209 if(opcode2[i]==0x10) {
4210 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4211 emit_flds(temp,15);
4212 emit_flds(HOST_TEMPREG,13);
4213 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4214 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4215 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4216 }
4217 }
4218 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
4219 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
4220 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
4221 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
4222 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4223 emit_fsts(15,HOST_TEMPREG);
4224 }else{
4225 emit_fsts(15,temp);
4226 }
4227 }
4228 else if(opcode2[i]==0x11) {
4229 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4230 emit_vldr(temp,7);
4231 emit_vldr(HOST_TEMPREG,6);
4232 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4233 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4234 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4235 }
4236 }
4237 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
4238 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
4239 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
4240 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
4241 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4242 emit_vstr(7,HOST_TEMPREG);
4243 }else{
4244 emit_vstr(7,temp);
4245 }
4246 }
4247 }
4248 else {
4249 if(opcode2[i]==0x10) {
4250 emit_flds(temp,15);
4251 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4252 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4253 }
4254 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
4255 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
4256 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
4257 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
4258 emit_fsts(15,temp);
4259 }
4260 else if(opcode2[i]==0x11) {
4261 emit_vldr(temp,7);
4262 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4263 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4264 }
4265 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
4266 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
4267 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
4268 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
4269 emit_vstr(7,temp);
4270 }
4271 }
4272 return;
4273 }
4274 #endif
4275
4276 u_int hr,reglist=0;
4277 for(hr=0;hr<HOST_REGS;hr++) {
4278 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4279 }
4280 if(opcode2[i]==0x10) { // Single precision
4281 save_regs(reglist);
4282 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4283 if((source[i]&0x3f)<4) {
4284 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4285 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
4286 }else{
4287 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4288 }
4289 switch(source[i]&0x3f)
4290 {
4291 case 0x00: emit_call((int)add_s);break;
4292 case 0x01: emit_call((int)sub_s);break;
4293 case 0x02: emit_call((int)mul_s);break;
4294 case 0x03: emit_call((int)div_s);break;
4295 case 0x04: emit_call((int)sqrt_s);break;
4296 case 0x05: emit_call((int)abs_s);break;
4297 case 0x06: emit_call((int)mov_s);break;
4298 case 0x07: emit_call((int)neg_s);break;
4299 }
4300 restore_regs(reglist);
4301 }
4302 if(opcode2[i]==0x11) { // Double precision
4303 save_regs(reglist);
4304 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4305 if((source[i]&0x3f)<4) {
4306 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4307 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
4308 }else{
4309 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4310 }
4311 switch(source[i]&0x3f)
4312 {
4313 case 0x00: emit_call((int)add_d);break;
4314 case 0x01: emit_call((int)sub_d);break;
4315 case 0x02: emit_call((int)mul_d);break;
4316 case 0x03: emit_call((int)div_d);break;
4317 case 0x04: emit_call((int)sqrt_d);break;
4318 case 0x05: emit_call((int)abs_d);break;
4319 case 0x06: emit_call((int)mov_d);break;
4320 case 0x07: emit_call((int)neg_d);break;
4321 }
4322 restore_regs(reglist);
4323 }
4324#else
4325 cop1_unusable(i, i_regs);
4326#endif
4327}
4328
4329void multdiv_assemble_arm(int i,struct regstat *i_regs)
4330{
4331 // case 0x18: MULT
4332 // case 0x19: MULTU
4333 // case 0x1A: DIV
4334 // case 0x1B: DIVU
4335 // case 0x1C: DMULT
4336 // case 0x1D: DMULTU
4337 // case 0x1E: DDIV
4338 // case 0x1F: DDIVU
4339 if(rs1[i]&&rs2[i])
4340 {
4341 if((opcode2[i]&4)==0) // 32-bit
4342 {
4343 if(opcode2[i]==0x18) // MULT
4344 {
4345 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4346 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4347 signed char hi=get_reg(i_regs->regmap,HIREG);
4348 signed char lo=get_reg(i_regs->regmap,LOREG);
4349 assert(m1>=0);
4350 assert(m2>=0);
4351 assert(hi>=0);
4352 assert(lo>=0);
4353 emit_smull(m1,m2,hi,lo);
4354 }
4355 if(opcode2[i]==0x19) // MULTU
4356 {
4357 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4358 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4359 signed char hi=get_reg(i_regs->regmap,HIREG);
4360 signed char lo=get_reg(i_regs->regmap,LOREG);
4361 assert(m1>=0);
4362 assert(m2>=0);
4363 assert(hi>=0);
4364 assert(lo>=0);
4365 emit_umull(m1,m2,hi,lo);
4366 }
4367 if(opcode2[i]==0x1A) // DIV
4368 {
4369 signed char d1=get_reg(i_regs->regmap,rs1[i]);
4370 signed char d2=get_reg(i_regs->regmap,rs2[i]);
4371 assert(d1>=0);
4372 assert(d2>=0);
4373 signed char quotient=get_reg(i_regs->regmap,LOREG);
4374 signed char remainder=get_reg(i_regs->regmap,HIREG);
4375 assert(quotient>=0);
4376 assert(remainder>=0);
4377 emit_movs(d1,remainder);
4378 emit_negmi(remainder,remainder);
4379 emit_movs(d2,HOST_TEMPREG);
4380 emit_jeq((int)out+52); // Division by zero
4381 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
4382 emit_clz(HOST_TEMPREG,quotient);
4383 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
4384 emit_orimm(quotient,1<<31,quotient);
4385 emit_shr(quotient,quotient,quotient);
4386 emit_cmp(remainder,HOST_TEMPREG);
4387 emit_subcs(remainder,HOST_TEMPREG,remainder);
4388 emit_adcs(quotient,quotient,quotient);
4389 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
4390 emit_jcc((int)out-16); // -4
4391 emit_teq(d1,d2);
4392 emit_negmi(quotient,quotient);
4393 emit_test(d1,d1);
4394 emit_negmi(remainder,remainder);
4395 }
4396 if(opcode2[i]==0x1B) // DIVU
4397 {
4398 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
4399 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
4400 assert(d1>=0);
4401 assert(d2>=0);
4402 signed char quotient=get_reg(i_regs->regmap,LOREG);
4403 signed char remainder=get_reg(i_regs->regmap,HIREG);
4404 assert(quotient>=0);
4405 assert(remainder>=0);
4406 emit_test(d2,d2);
4407 emit_jeq((int)out+44); // Division by zero
4408 emit_clz(d2,HOST_TEMPREG);
4409 emit_movimm(1<<31,quotient);
4410 emit_shl(d2,HOST_TEMPREG,d2);
4411 emit_mov(d1,remainder);
4412 emit_shr(quotient,HOST_TEMPREG,quotient);
4413 emit_cmp(remainder,d2);
4414 emit_subcs(remainder,d2,remainder);
4415 emit_adcs(quotient,quotient,quotient);
4416 emit_shrcc_imm(d2,1,d2);
4417 emit_jcc((int)out-16); // -4
4418 }
4419 }
4420 else // 64-bit
4421 {
4422 if(opcode2[i]==0x1C) // DMULT
4423 {
4424 assert(opcode2[i]!=0x1C);
4425 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4426 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4427 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4428 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4429 assert(m1h>=0);
4430 assert(m2h>=0);
4431 assert(m1l>=0);
4432 assert(m2l>=0);
4433 emit_pushreg(m2h);
4434 emit_pushreg(m2l);
4435 emit_pushreg(m1h);
4436 emit_pushreg(m1l);
4437 emit_call((int)&mult64);
4438 emit_popreg(m1l);
4439 emit_popreg(m1h);
4440 emit_popreg(m2l);
4441 emit_popreg(m2h);
4442 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4443 signed char hil=get_reg(i_regs->regmap,HIREG);
4444 if(hih>=0) emit_loadreg(HIREG|64,hih);
4445 if(hil>=0) emit_loadreg(HIREG,hil);
4446 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4447 signed char lol=get_reg(i_regs->regmap,LOREG);
4448 if(loh>=0) emit_loadreg(LOREG|64,loh);
4449 if(lol>=0) emit_loadreg(LOREG,lol);
4450 }
4451 if(opcode2[i]==0x1D) // DMULTU
4452 {
4453 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4454 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4455 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4456 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4457 assert(m1h>=0);
4458 assert(m2h>=0);
4459 assert(m1l>=0);
4460 assert(m2l>=0);
4461 save_regs(0x100f);
4462 if(m1l!=0) emit_mov(m1l,0);
4463 if(m1h==0) emit_readword((int)&dynarec_local,1);
4464 else if(m1h>1) emit_mov(m1h,1);
4465 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
4466 else if(m2l>2) emit_mov(m2l,2);
4467 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
4468 else if(m2h>3) emit_mov(m2h,3);
4469 emit_call((int)&multu64);
4470 restore_regs(0x100f);
4471 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4472 signed char hil=get_reg(i_regs->regmap,HIREG);
4473 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4474 signed char lol=get_reg(i_regs->regmap,LOREG);
4475 /*signed char temp=get_reg(i_regs->regmap,-1);
4476 signed char rh=get_reg(i_regs->regmap,HIREG|64);
4477 signed char rl=get_reg(i_regs->regmap,HIREG);
4478 assert(m1h>=0);
4479 assert(m2h>=0);
4480 assert(m1l>=0);
4481 assert(m2l>=0);
4482 assert(temp>=0);
4483 //emit_mov(m1l,EAX);
4484 //emit_mul(m2l);
4485 emit_umull(rl,rh,m1l,m2l);
4486 emit_storereg(LOREG,rl);
4487 emit_mov(rh,temp);
4488 //emit_mov(m1h,EAX);
4489 //emit_mul(m2l);
4490 emit_umull(rl,rh,m1h,m2l);
4491 emit_adds(rl,temp,temp);
4492 emit_adcimm(rh,0,rh);
4493 emit_storereg(HIREG,rh);
4494 //emit_mov(m2h,EAX);
4495 //emit_mul(m1l);
4496 emit_umull(rl,rh,m1l,m2h);
4497 emit_adds(rl,temp,temp);
4498 emit_adcimm(rh,0,rh);
4499 emit_storereg(LOREG|64,temp);
4500 emit_mov(rh,temp);
4501 //emit_mov(m2h,EAX);
4502 //emit_mul(m1h);
4503 emit_umull(rl,rh,m1h,m2h);
4504 emit_adds(rl,temp,rl);
4505 emit_loadreg(HIREG,temp);
4506 emit_adcimm(rh,0,rh);
4507 emit_adds(rl,temp,rl);
4508 emit_adcimm(rh,0,rh);
4509 // DEBUG
4510 /*
4511 emit_pushreg(m2h);
4512 emit_pushreg(m2l);
4513 emit_pushreg(m1h);
4514 emit_pushreg(m1l);
4515 emit_call((int)&multu64);
4516 emit_popreg(m1l);
4517 emit_popreg(m1h);
4518 emit_popreg(m2l);
4519 emit_popreg(m2h);
4520 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4521 signed char hil=get_reg(i_regs->regmap,HIREG);
4522 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
4523 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
4524 */
4525 // Shouldn't be necessary
4526 //char loh=get_reg(i_regs->regmap,LOREG|64);
4527 //char lol=get_reg(i_regs->regmap,LOREG);
4528 //if(loh>=0) emit_loadreg(LOREG|64,loh);
4529 //if(lol>=0) emit_loadreg(LOREG,lol);
4530 }
4531 if(opcode2[i]==0x1E) // DDIV
4532 {
4533 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4534 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4535 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4536 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4537 assert(d1h>=0);
4538 assert(d2h>=0);
4539 assert(d1l>=0);
4540 assert(d2l>=0);
4541 save_regs(0x100f);
4542 if(d1l!=0) emit_mov(d1l,0);
4543 if(d1h==0) emit_readword((int)&dynarec_local,1);
4544 else if(d1h>1) emit_mov(d1h,1);
4545 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4546 else if(d2l>2) emit_mov(d2l,2);
4547 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4548 else if(d2h>3) emit_mov(d2h,3);
4549 emit_call((int)&div64);
4550 restore_regs(0x100f);
4551 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4552 signed char hil=get_reg(i_regs->regmap,HIREG);
4553 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4554 signed char lol=get_reg(i_regs->regmap,LOREG);
4555 if(hih>=0) emit_loadreg(HIREG|64,hih);
4556 if(hil>=0) emit_loadreg(HIREG,hil);
4557 if(loh>=0) emit_loadreg(LOREG|64,loh);
4558 if(lol>=0) emit_loadreg(LOREG,lol);
4559 }
4560 if(opcode2[i]==0x1F) // DDIVU
4561 {
4562 //u_int hr,reglist=0;
4563 //for(hr=0;hr<HOST_REGS;hr++) {
4564 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
4565 //}
4566 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4567 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4568 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4569 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4570 assert(d1h>=0);
4571 assert(d2h>=0);
4572 assert(d1l>=0);
4573 assert(d2l>=0);
4574 save_regs(0x100f);
4575 if(d1l!=0) emit_mov(d1l,0);
4576 if(d1h==0) emit_readword((int)&dynarec_local,1);
4577 else if(d1h>1) emit_mov(d1h,1);
4578 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4579 else if(d2l>2) emit_mov(d2l,2);
4580 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4581 else if(d2h>3) emit_mov(d2h,3);
4582 emit_call((int)&divu64);
4583 restore_regs(0x100f);
4584 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4585 signed char hil=get_reg(i_regs->regmap,HIREG);
4586 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4587 signed char lol=get_reg(i_regs->regmap,LOREG);
4588 if(hih>=0) emit_loadreg(HIREG|64,hih);
4589 if(hil>=0) emit_loadreg(HIREG,hil);
4590 if(loh>=0) emit_loadreg(LOREG|64,loh);
4591 if(lol>=0) emit_loadreg(LOREG,lol);
4592 }
4593 }
4594 }
4595 else
4596 {
4597 // Multiply by zero is zero.
4598 // MIPS does not have a divide by zero exception.
4599 // The result is undefined, we return zero.
4600 signed char hr=get_reg(i_regs->regmap,HIREG);
4601 signed char lr=get_reg(i_regs->regmap,LOREG);
4602 if(hr>=0) emit_zeroreg(hr);
4603 if(lr>=0) emit_zeroreg(lr);
4604 }
4605}
4606#define multdiv_assemble multdiv_assemble_arm
4607
4608void do_preload_rhash(int r) {
4609 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
4610 // register. On ARM the hash can be done with a single instruction (below)
4611}
4612
4613void do_preload_rhtbl(int ht) {
4614 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
4615}
4616
4617void do_rhash(int rs,int rh) {
4618 emit_andimm(rs,0xf8,rh);
4619}
4620
4621void do_miniht_load(int ht,int rh) {
4622 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
4623 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
4624}
4625
4626void do_miniht_jump(int rs,int rh,int ht) {
4627 emit_cmp(rh,rs);
4628 emit_ldreq_indexed(ht,4,15);
4629 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4630 emit_mov(rs,7);
4631 emit_jmp(jump_vaddr_reg[7]);
4632 #else
4633 emit_jmp(jump_vaddr_reg[rs]);
4634 #endif
4635}
4636
4637void do_miniht_insert(u_int return_address,int rt,int temp) {
4638 #ifdef ARMv5_ONLY
4639 emit_movimm(return_address,rt); // PC into link register
4640 add_to_linker((int)out,return_address,1);
4641 emit_pcreladdr(temp);
4642 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4643 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4644 #else
4645 emit_movw(return_address&0x0000FFFF,rt);
4646 add_to_linker((int)out,return_address,1);
4647 emit_pcreladdr(temp);
4648 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4649 emit_movt(return_address&0xFFFF0000,rt);
4650 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4651 #endif
4652}
4653
4654// Sign-extend to 64 bits and write out upper half of a register
4655// This is useful where we have a 32-bit value in a register, and want to
4656// keep it in a 32-bit register, but can't guarantee that it won't be read
4657// as a 64-bit value later.
4658void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
4659{
4660#ifndef FORCE32
4661 if(is32_pre==is32) return;
4662 int hr,reg;
4663 for(hr=0;hr<HOST_REGS;hr++) {
4664 if(hr!=EXCLUDE_REG) {
4665 //if(pre[hr]==entry[hr]) {
4666 if((reg=pre[hr])>=0) {
4667 if((dirty>>hr)&1) {
4668 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
4669 emit_sarimm(hr,31,HOST_TEMPREG);
4670 emit_storereg(reg|64,HOST_TEMPREG);
4671 }
4672 }
4673 }
4674 //}
4675 }
4676 }
4677#endif
4678}
4679
4680void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
4681{
4682 //if(dirty_pre==dirty) return;
4683 int hr,reg,new_hr;
4684 for(hr=0;hr<HOST_REGS;hr++) {
4685 if(hr!=EXCLUDE_REG) {
4686 reg=pre[hr];
4687 if(((~u)>>(reg&63))&1) {
4688 if(reg==entry[hr]||(reg>0&&entry[hr]<0)) {
4689 if(((dirty_pre&~dirty)>>hr)&1) {
4690 if(reg>0&&reg<34) {
4691 emit_storereg(reg,hr);
4692 if( ((is32_pre&~uu)>>reg)&1 ) {
4693 emit_sarimm(hr,31,HOST_TEMPREG);
4694 emit_storereg(reg|64,HOST_TEMPREG);
4695 }
4696 }
4697 else if(reg>=64) {
4698 emit_storereg(reg,hr);
4699 }
4700 }
4701 }
4702 else // Check if register moved to a different register
4703 if((new_hr=get_reg(entry,reg))>=0) {
4704 if((dirty_pre>>hr)&(~dirty>>new_hr)&1) {
4705 if(reg>0&&reg<34) {
4706 emit_storereg(reg,hr);
4707 if( ((is32_pre&~uu)>>reg)&1 ) {
4708 emit_sarimm(hr,31,HOST_TEMPREG);
4709 emit_storereg(reg|64,HOST_TEMPREG);
4710 }
4711 }
4712 else if(reg>=64) {
4713 emit_storereg(reg,hr);
4714 }
4715 }
4716 }
4717 }
4718 }
4719 }
4720}
4721
4722
4723/* using strd could possibly help but you'd have to allocate registers in pairs
4724void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
4725{
4726 int hr;
4727 int wrote=-1;
4728 for(hr=HOST_REGS-1;hr>=0;hr--) {
4729 if(hr!=EXCLUDE_REG) {
4730 if(pre[hr]!=entry[hr]) {
4731 if(pre[hr]>=0) {
4732 if((dirty>>hr)&1) {
4733 if(get_reg(entry,pre[hr])<0) {
4734 if(pre[hr]<64) {
4735 if(!((u>>pre[hr])&1)) {
4736 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4737 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4738 emit_sarimm(hr,31,hr+1);
4739 emit_strdreg(pre[hr],hr);
4740 }
4741 else
4742 emit_storereg(pre[hr],hr);
4743 }else{
4744 emit_storereg(pre[hr],hr);
4745 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4746 emit_sarimm(hr,31,hr);
4747 emit_storereg(pre[hr]|64,hr);
4748 }
4749 }
4750 }
4751 }else{
4752 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4753 emit_storereg(pre[hr],hr);
4754 }
4755 }
4756 wrote=hr;
4757 }
4758 }
4759 }
4760 }
4761 }
4762 }
4763 for(hr=0;hr<HOST_REGS;hr++) {
4764 if(hr!=EXCLUDE_REG) {
4765 if(pre[hr]!=entry[hr]) {
4766 if(pre[hr]>=0) {
4767 int nr;
4768 if((nr=get_reg(entry,pre[hr]))>=0) {
4769 emit_mov(hr,nr);
4770 }
4771 }
4772 }
4773 }
4774 }
4775}
4776#define wb_invalidate wb_invalidate_arm
4777*/
4778
4779// CPU-architecture-specific initialization
4780void arch_init() {
4781#ifndef DISABLE_COP1
4782 rounding_modes[0]=0x0<<22; // round
4783 rounding_modes[1]=0x3<<22; // trunc
4784 rounding_modes[2]=0x1<<22; // ceil
4785 rounding_modes[3]=0x2<<22; // floor
4786#endif
4787}
4788
4789// vim:shiftwidth=2:expandtab