merge from libretro
[pcsx_rearmed.git] / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_arm.c *
3 * Copyright (C) 2009-2010 Ari64 *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21extern int cycle_count;
22extern int last_count;
23extern int pcaddr;
24extern int pending_exception;
25extern int branch_target;
26extern uint64_t readmem_dword;
27#ifdef MUPEN64
28extern precomp_instr fake_pc;
29#endif
30extern void *dynarec_local;
31extern u_int memory_map[1048576];
32extern u_int mini_ht[32][2];
33extern u_int rounding_modes[4];
34
35void indirect_jump_indexed();
36void indirect_jump();
37void do_interrupt();
38void jump_vaddr_r0();
39void jump_vaddr_r1();
40void jump_vaddr_r2();
41void jump_vaddr_r3();
42void jump_vaddr_r4();
43void jump_vaddr_r5();
44void jump_vaddr_r6();
45void jump_vaddr_r7();
46void jump_vaddr_r8();
47void jump_vaddr_r9();
48void jump_vaddr_r10();
49void jump_vaddr_r12();
50
51const u_int jump_vaddr_reg[16] = {
52 (int)jump_vaddr_r0,
53 (int)jump_vaddr_r1,
54 (int)jump_vaddr_r2,
55 (int)jump_vaddr_r3,
56 (int)jump_vaddr_r4,
57 (int)jump_vaddr_r5,
58 (int)jump_vaddr_r6,
59 (int)jump_vaddr_r7,
60 (int)jump_vaddr_r8,
61 (int)jump_vaddr_r9,
62 (int)jump_vaddr_r10,
63 0,
64 (int)jump_vaddr_r12,
65 0,
66 0,
67 0};
68
69#include "fpu.h"
70
71/* Linker */
72
73void set_jump_target(int addr,u_int target)
74{
75 u_char *ptr=(u_char *)addr;
76 u_int *ptr2=(u_int *)ptr;
77 if(ptr[3]==0xe2) {
78 assert((target-(u_int)ptr2-8)<1024);
79 assert((addr&3)==0);
80 assert((target&3)==0);
81 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
82 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
83 }
84 else if(ptr[3]==0x72) {
85 // generated by emit_jno_unlikely
86 if((target-(u_int)ptr2-8)<1024) {
87 assert((addr&3)==0);
88 assert((target&3)==0);
89 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
90 }
91 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
92 assert((addr&3)==0);
93 assert((target&3)==0);
94 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
95 }
96 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
97 }
98 else {
99 assert((ptr[3]&0x0e)==0xa);
100 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
101 }
102}
103
104// This optionally copies the instruction from the target of the branch into
105// the space before the branch. Works, but the difference in speed is
106// usually insignificant.
107void set_jump_target_fillslot(int addr,u_int target,int copy)
108{
109 u_char *ptr=(u_char *)addr;
110 u_int *ptr2=(u_int *)ptr;
111 assert(!copy||ptr2[-1]==0xe28dd000);
112 if(ptr[3]==0xe2) {
113 assert(!copy);
114 assert((target-(u_int)ptr2-8)<4096);
115 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
116 }
117 else {
118 assert((ptr[3]&0x0e)==0xa);
119 u_int target_insn=*(u_int *)target;
120 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
121 copy=0;
122 }
123 if((target_insn&0x0c100000)==0x04100000) { // Load
124 copy=0;
125 }
126 if(target_insn&0x08000000) {
127 copy=0;
128 }
129 if(copy) {
130 ptr2[-1]=target_insn;
131 target+=4;
132 }
133 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
134 }
135}
136
137/* Literal pool */
138add_literal(int addr,int val)
139{
140 literals[literalcount][0]=addr;
141 literals[literalcount][1]=val;
142 literalcount++;
143}
144
145void *kill_pointer(void *stub)
146{
147 int *ptr=(int *)(stub+4);
148 assert((*ptr&0x0ff00000)==0x05900000);
149 u_int offset=*ptr&0xfff;
150 int **l_ptr=(void *)ptr+offset+8;
151 int *i_ptr=*l_ptr;
152 set_jump_target((int)i_ptr,(int)stub);
153 return i_ptr;
154}
155
156int get_pointer(void *stub)
157{
158 //printf("get_pointer(%x)\n",(int)stub);
159 int *ptr=(int *)(stub+4);
160 assert((*ptr&0x0ff00000)==0x05900000);
161 u_int offset=*ptr&0xfff;
162 int **l_ptr=(void *)ptr+offset+8;
163 int *i_ptr=*l_ptr;
164 assert((*i_ptr&0x0f000000)==0x0a000000);
165 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
166}
167
168// Find the "clean" entry point from a "dirty" entry point
169// by skipping past the call to verify_code
170u_int get_clean_addr(int addr)
171{
172 int *ptr=(int *)addr;
173 #ifdef ARMv5_ONLY
174 ptr+=4;
175 #else
176 ptr+=6;
177 #endif
178 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
179 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
180 ptr++;
181 if((*ptr&0xFF000000)==0xea000000) {
182 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
183 }
184 return (u_int)ptr;
185}
186
187int verify_dirty(int addr)
188{
189 u_int *ptr=(u_int *)addr;
190 #ifdef ARMv5_ONLY
191 // get from literal pool
192 assert((*ptr&0xFFF00000)==0xe5900000);
193 u_int offset=*ptr&0xfff;
194 u_int *l_ptr=(void *)ptr+offset+8;
195 u_int source=l_ptr[0];
196 u_int copy=l_ptr[1];
197 u_int len=l_ptr[2];
198 ptr+=4;
199 #else
200 // ARMv7 movw/movt
201 assert((*ptr&0xFFF00000)==0xe3000000);
202 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
203 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
204 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
205 ptr+=6;
206 #endif
207 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
208 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
209 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
210 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
211 unsigned int page=source>>12;
212 unsigned int map_value=memory_map[page];
213 if(map_value>=0x80000000) return 0;
214 while(page<((source+len-1)>>12)) {
215 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
216 }
217 source = source+(map_value<<2);
218 }
219 //printf("verify_dirty: %x %x %x\n",source,copy,len);
220 return !memcmp((void *)source,(void *)copy,len);
221}
222
223// This doesn't necessarily find all clean entry points, just
224// guarantees that it's not dirty
225int isclean(int addr)
226{
227 #ifdef ARMv5_ONLY
228 int *ptr=((u_int *)addr)+4;
229 #else
230 int *ptr=((u_int *)addr)+6;
231 #endif
232 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
233 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
234 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
235 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
236 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
237 return 1;
238}
239
240void get_bounds(int addr,u_int *start,u_int *end)
241{
242 u_int *ptr=(u_int *)addr;
243 #ifdef ARMv5_ONLY
244 // get from literal pool
245 assert((*ptr&0xFFF00000)==0xe5900000);
246 u_int offset=*ptr&0xfff;
247 u_int *l_ptr=(void *)ptr+offset+8;
248 u_int source=l_ptr[0];
249 //u_int copy=l_ptr[1];
250 u_int len=l_ptr[2];
251 ptr+=4;
252 #else
253 // ARMv7 movw/movt
254 assert((*ptr&0xFFF00000)==0xe3000000);
255 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
256 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
257 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
258 ptr+=6;
259 #endif
260 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
261 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
262 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
263 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
264 if(memory_map[source>>12]>=0x80000000) source = 0;
265 else source = source+(memory_map[source>>12]<<2);
266 }
267 *start=source;
268 *end=source+len;
269}
270
271/* Register allocation */
272
273// Note: registers are allocated clean (unmodified state)
274// if you intend to modify the register, you must call dirty_reg().
275void alloc_reg(struct regstat *cur,int i,signed char reg)
276{
277 int r,hr;
278 int preferred_reg = (reg&7);
279 if(reg==CCREG) preferred_reg=HOST_CCREG;
280 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
281
282 // Don't allocate unused registers
283 if((cur->u>>reg)&1) return;
284
285 // see if it's already allocated
286 for(hr=0;hr<HOST_REGS;hr++)
287 {
288 if(cur->regmap[hr]==reg) return;
289 }
290
291 // Keep the same mapping if the register was already allocated in a loop
292 preferred_reg = loop_reg(i,reg,preferred_reg);
293
294 // Try to allocate the preferred register
295 if(cur->regmap[preferred_reg]==-1) {
296 cur->regmap[preferred_reg]=reg;
297 cur->dirty&=~(1<<preferred_reg);
298 cur->isconst&=~(1<<preferred_reg);
299 return;
300 }
301 r=cur->regmap[preferred_reg];
302 if(r<64&&((cur->u>>r)&1)) {
303 cur->regmap[preferred_reg]=reg;
304 cur->dirty&=~(1<<preferred_reg);
305 cur->isconst&=~(1<<preferred_reg);
306 return;
307 }
308 if(r>=64&&((cur->uu>>(r&63))&1)) {
309 cur->regmap[preferred_reg]=reg;
310 cur->dirty&=~(1<<preferred_reg);
311 cur->isconst&=~(1<<preferred_reg);
312 return;
313 }
314
315 // Clear any unneeded registers
316 // We try to keep the mapping consistent, if possible, because it
317 // makes branches easier (especially loops). So we try to allocate
318 // first (see above) before removing old mappings. If this is not
319 // possible then go ahead and clear out the registers that are no
320 // longer needed.
321 for(hr=0;hr<HOST_REGS;hr++)
322 {
323 r=cur->regmap[hr];
324 if(r>=0) {
325 if(r<64) {
326 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
327 }
328 else
329 {
330 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
331 }
332 }
333 }
334 // Try to allocate any available register, but prefer
335 // registers that have not been used recently.
336 if(i>0) {
337 for(hr=0;hr<HOST_REGS;hr++) {
338 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
339 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
340 cur->regmap[hr]=reg;
341 cur->dirty&=~(1<<hr);
342 cur->isconst&=~(1<<hr);
343 return;
344 }
345 }
346 }
347 }
348 // Try to allocate any available register
349 for(hr=0;hr<HOST_REGS;hr++) {
350 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
351 cur->regmap[hr]=reg;
352 cur->dirty&=~(1<<hr);
353 cur->isconst&=~(1<<hr);
354 return;
355 }
356 }
357
358 // Ok, now we have to evict someone
359 // Pick a register we hopefully won't need soon
360 u_char hsn[MAXREG+1];
361 memset(hsn,10,sizeof(hsn));
362 int j;
363 lsn(hsn,i,&preferred_reg);
364 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
365 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
366 if(i>0) {
367 // Don't evict the cycle count at entry points, otherwise the entry
368 // stub will have to write it.
369 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
370 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
371 for(j=10;j>=3;j--)
372 {
373 // Alloc preferred register if available
374 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
375 for(hr=0;hr<HOST_REGS;hr++) {
376 // Evict both parts of a 64-bit register
377 if((cur->regmap[hr]&63)==r) {
378 cur->regmap[hr]=-1;
379 cur->dirty&=~(1<<hr);
380 cur->isconst&=~(1<<hr);
381 }
382 }
383 cur->regmap[preferred_reg]=reg;
384 return;
385 }
386 for(r=1;r<=MAXREG;r++)
387 {
388 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
389 for(hr=0;hr<HOST_REGS;hr++) {
390 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
391 if(cur->regmap[hr]==r+64) {
392 cur->regmap[hr]=reg;
393 cur->dirty&=~(1<<hr);
394 cur->isconst&=~(1<<hr);
395 return;
396 }
397 }
398 }
399 for(hr=0;hr<HOST_REGS;hr++) {
400 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
401 if(cur->regmap[hr]==r) {
402 cur->regmap[hr]=reg;
403 cur->dirty&=~(1<<hr);
404 cur->isconst&=~(1<<hr);
405 return;
406 }
407 }
408 }
409 }
410 }
411 }
412 }
413 for(j=10;j>=0;j--)
414 {
415 for(r=1;r<=MAXREG;r++)
416 {
417 if(hsn[r]==j) {
418 for(hr=0;hr<HOST_REGS;hr++) {
419 if(cur->regmap[hr]==r+64) {
420 cur->regmap[hr]=reg;
421 cur->dirty&=~(1<<hr);
422 cur->isconst&=~(1<<hr);
423 return;
424 }
425 }
426 for(hr=0;hr<HOST_REGS;hr++) {
427 if(cur->regmap[hr]==r) {
428 cur->regmap[hr]=reg;
429 cur->dirty&=~(1<<hr);
430 cur->isconst&=~(1<<hr);
431 return;
432 }
433 }
434 }
435 }
436 }
437 printf("This shouldn't happen (alloc_reg)");exit(1);
438}
439
440void alloc_reg64(struct regstat *cur,int i,signed char reg)
441{
442 int preferred_reg = 8+(reg&1);
443 int r,hr;
444
445 // allocate the lower 32 bits
446 alloc_reg(cur,i,reg);
447
448 // Don't allocate unused registers
449 if((cur->uu>>reg)&1) return;
450
451 // see if the upper half is already allocated
452 for(hr=0;hr<HOST_REGS;hr++)
453 {
454 if(cur->regmap[hr]==reg+64) return;
455 }
456
457 // Keep the same mapping if the register was already allocated in a loop
458 preferred_reg = loop_reg(i,reg,preferred_reg);
459
460 // Try to allocate the preferred register
461 if(cur->regmap[preferred_reg]==-1) {
462 cur->regmap[preferred_reg]=reg|64;
463 cur->dirty&=~(1<<preferred_reg);
464 cur->isconst&=~(1<<preferred_reg);
465 return;
466 }
467 r=cur->regmap[preferred_reg];
468 if(r<64&&((cur->u>>r)&1)) {
469 cur->regmap[preferred_reg]=reg|64;
470 cur->dirty&=~(1<<preferred_reg);
471 cur->isconst&=~(1<<preferred_reg);
472 return;
473 }
474 if(r>=64&&((cur->uu>>(r&63))&1)) {
475 cur->regmap[preferred_reg]=reg|64;
476 cur->dirty&=~(1<<preferred_reg);
477 cur->isconst&=~(1<<preferred_reg);
478 return;
479 }
480
481 // Clear any unneeded registers
482 // We try to keep the mapping consistent, if possible, because it
483 // makes branches easier (especially loops). So we try to allocate
484 // first (see above) before removing old mappings. If this is not
485 // possible then go ahead and clear out the registers that are no
486 // longer needed.
487 for(hr=HOST_REGS-1;hr>=0;hr--)
488 {
489 r=cur->regmap[hr];
490 if(r>=0) {
491 if(r<64) {
492 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
493 }
494 else
495 {
496 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
497 }
498 }
499 }
500 // Try to allocate any available register, but prefer
501 // registers that have not been used recently.
502 if(i>0) {
503 for(hr=0;hr<HOST_REGS;hr++) {
504 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
505 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
506 cur->regmap[hr]=reg|64;
507 cur->dirty&=~(1<<hr);
508 cur->isconst&=~(1<<hr);
509 return;
510 }
511 }
512 }
513 }
514 // Try to allocate any available register
515 for(hr=0;hr<HOST_REGS;hr++) {
516 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
517 cur->regmap[hr]=reg|64;
518 cur->dirty&=~(1<<hr);
519 cur->isconst&=~(1<<hr);
520 return;
521 }
522 }
523
524 // Ok, now we have to evict someone
525 // Pick a register we hopefully won't need soon
526 u_char hsn[MAXREG+1];
527 memset(hsn,10,sizeof(hsn));
528 int j;
529 lsn(hsn,i,&preferred_reg);
530 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
531 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
532 if(i>0) {
533 // Don't evict the cycle count at entry points, otherwise the entry
534 // stub will have to write it.
535 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
536 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
537 for(j=10;j>=3;j--)
538 {
539 // Alloc preferred register if available
540 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
541 for(hr=0;hr<HOST_REGS;hr++) {
542 // Evict both parts of a 64-bit register
543 if((cur->regmap[hr]&63)==r) {
544 cur->regmap[hr]=-1;
545 cur->dirty&=~(1<<hr);
546 cur->isconst&=~(1<<hr);
547 }
548 }
549 cur->regmap[preferred_reg]=reg|64;
550 return;
551 }
552 for(r=1;r<=MAXREG;r++)
553 {
554 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
555 for(hr=0;hr<HOST_REGS;hr++) {
556 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
557 if(cur->regmap[hr]==r+64) {
558 cur->regmap[hr]=reg|64;
559 cur->dirty&=~(1<<hr);
560 cur->isconst&=~(1<<hr);
561 return;
562 }
563 }
564 }
565 for(hr=0;hr<HOST_REGS;hr++) {
566 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
567 if(cur->regmap[hr]==r) {
568 cur->regmap[hr]=reg|64;
569 cur->dirty&=~(1<<hr);
570 cur->isconst&=~(1<<hr);
571 return;
572 }
573 }
574 }
575 }
576 }
577 }
578 }
579 for(j=10;j>=0;j--)
580 {
581 for(r=1;r<=MAXREG;r++)
582 {
583 if(hsn[r]==j) {
584 for(hr=0;hr<HOST_REGS;hr++) {
585 if(cur->regmap[hr]==r+64) {
586 cur->regmap[hr]=reg|64;
587 cur->dirty&=~(1<<hr);
588 cur->isconst&=~(1<<hr);
589 return;
590 }
591 }
592 for(hr=0;hr<HOST_REGS;hr++) {
593 if(cur->regmap[hr]==r) {
594 cur->regmap[hr]=reg|64;
595 cur->dirty&=~(1<<hr);
596 cur->isconst&=~(1<<hr);
597 return;
598 }
599 }
600 }
601 }
602 }
603 printf("This shouldn't happen");exit(1);
604}
605
606// Allocate a temporary register. This is done without regard to
607// dirty status or whether the register we request is on the unneeded list
608// Note: This will only allocate one register, even if called multiple times
609void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
610{
611 int r,hr;
612 int preferred_reg = -1;
613
614 // see if it's already allocated
615 for(hr=0;hr<HOST_REGS;hr++)
616 {
617 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
618 }
619
620 // Try to allocate any available register
621 for(hr=HOST_REGS-1;hr>=0;hr--) {
622 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
623 cur->regmap[hr]=reg;
624 cur->dirty&=~(1<<hr);
625 cur->isconst&=~(1<<hr);
626 return;
627 }
628 }
629
630 // Find an unneeded register
631 for(hr=HOST_REGS-1;hr>=0;hr--)
632 {
633 r=cur->regmap[hr];
634 if(r>=0) {
635 if(r<64) {
636 if((cur->u>>r)&1) {
637 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
638 cur->regmap[hr]=reg;
639 cur->dirty&=~(1<<hr);
640 cur->isconst&=~(1<<hr);
641 return;
642 }
643 }
644 }
645 else
646 {
647 if((cur->uu>>(r&63))&1) {
648 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
649 cur->regmap[hr]=reg;
650 cur->dirty&=~(1<<hr);
651 cur->isconst&=~(1<<hr);
652 return;
653 }
654 }
655 }
656 }
657 }
658
659 // Ok, now we have to evict someone
660 // Pick a register we hopefully won't need soon
661 // TODO: we might want to follow unconditional jumps here
662 // TODO: get rid of dupe code and make this into a function
663 u_char hsn[MAXREG+1];
664 memset(hsn,10,sizeof(hsn));
665 int j;
666 lsn(hsn,i,&preferred_reg);
667 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
668 if(i>0) {
669 // Don't evict the cycle count at entry points, otherwise the entry
670 // stub will have to write it.
671 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
672 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
673 for(j=10;j>=3;j--)
674 {
675 for(r=1;r<=MAXREG;r++)
676 {
677 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
678 for(hr=0;hr<HOST_REGS;hr++) {
679 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
680 if(cur->regmap[hr]==r+64) {
681 cur->regmap[hr]=reg;
682 cur->dirty&=~(1<<hr);
683 cur->isconst&=~(1<<hr);
684 return;
685 }
686 }
687 }
688 for(hr=0;hr<HOST_REGS;hr++) {
689 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
690 if(cur->regmap[hr]==r) {
691 cur->regmap[hr]=reg;
692 cur->dirty&=~(1<<hr);
693 cur->isconst&=~(1<<hr);
694 return;
695 }
696 }
697 }
698 }
699 }
700 }
701 }
702 for(j=10;j>=0;j--)
703 {
704 for(r=1;r<=MAXREG;r++)
705 {
706 if(hsn[r]==j) {
707 for(hr=0;hr<HOST_REGS;hr++) {
708 if(cur->regmap[hr]==r+64) {
709 cur->regmap[hr]=reg;
710 cur->dirty&=~(1<<hr);
711 cur->isconst&=~(1<<hr);
712 return;
713 }
714 }
715 for(hr=0;hr<HOST_REGS;hr++) {
716 if(cur->regmap[hr]==r) {
717 cur->regmap[hr]=reg;
718 cur->dirty&=~(1<<hr);
719 cur->isconst&=~(1<<hr);
720 return;
721 }
722 }
723 }
724 }
725 }
726 printf("This shouldn't happen");exit(1);
727}
728// Allocate a specific ARM register.
729void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
730{
731 int n;
732
733 // see if it's already allocated (and dealloc it)
734 for(n=0;n<HOST_REGS;n++)
735 {
736 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {cur->regmap[n]=-1;}
737 }
738
739 cur->regmap[hr]=reg;
740 cur->dirty&=~(1<<hr);
741 cur->isconst&=~(1<<hr);
742}
743
744// Alloc cycle count into dedicated register
745alloc_cc(struct regstat *cur,int i)
746{
747 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
748}
749
750/* Special alloc */
751
752
753/* Assembler */
754
755char regname[16][4] = {
756 "r0",
757 "r1",
758 "r2",
759 "r3",
760 "r4",
761 "r5",
762 "r6",
763 "r7",
764 "r8",
765 "r9",
766 "r10",
767 "fp",
768 "r12",
769 "sp",
770 "lr",
771 "pc"};
772
773void output_byte(u_char byte)
774{
775 *(out++)=byte;
776}
777void output_modrm(u_char mod,u_char rm,u_char ext)
778{
779 assert(mod<4);
780 assert(rm<8);
781 assert(ext<8);
782 u_char byte=(mod<<6)|(ext<<3)|rm;
783 *(out++)=byte;
784}
785void output_sib(u_char scale,u_char index,u_char base)
786{
787 assert(scale<4);
788 assert(index<8);
789 assert(base<8);
790 u_char byte=(scale<<6)|(index<<3)|base;
791 *(out++)=byte;
792}
793void output_w32(u_int word)
794{
795 *((u_int *)out)=word;
796 out+=4;
797}
798u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
799{
800 assert(rd<16);
801 assert(rn<16);
802 assert(rm<16);
803 return((rn<<16)|(rd<<12)|rm);
804}
805u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
806{
807 assert(rd<16);
808 assert(rn<16);
809 assert(imm<256);
810 assert((shift&1)==0);
811 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
812}
813u_int genimm(u_int imm,u_int *encoded)
814{
815 if(imm==0) {*encoded=0;return 1;}
816 int i=32;
817 while(i>0)
818 {
819 if(imm<256) {
820 *encoded=((i&30)<<7)|imm;
821 return 1;
822 }
823 imm=(imm>>2)|(imm<<30);i-=2;
824 }
825 return 0;
826}
827void genimm_checked(u_int imm,u_int *encoded)
828{
829 u_int ret=genimm(imm,encoded);
830 assert(ret);
831}
832u_int genjmp(u_int addr)
833{
834 int offset=addr-(int)out-8;
835 if(offset<-33554432||offset>=33554432) {
836 if (addr>2) {
837 printf("genjmp: out of range: %08x\n", offset);
838 exit(1);
839 }
840 return 0;
841 }
842 return ((u_int)offset>>2)&0xffffff;
843}
844
845void emit_mov(int rs,int rt)
846{
847 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
848 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
849}
850
851void emit_movs(int rs,int rt)
852{
853 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
854 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
855}
856
857void emit_add(int rs1,int rs2,int rt)
858{
859 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
860 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
861}
862
863void emit_adds(int rs1,int rs2,int rt)
864{
865 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
866 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
867}
868
869void emit_adcs(int rs1,int rs2,int rt)
870{
871 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
872 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
873}
874
875void emit_sbc(int rs1,int rs2,int rt)
876{
877 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
878 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
879}
880
881void emit_sbcs(int rs1,int rs2,int rt)
882{
883 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
884 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
885}
886
887void emit_neg(int rs, int rt)
888{
889 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
890 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
891}
892
893void emit_negs(int rs, int rt)
894{
895 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
896 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
897}
898
899void emit_sub(int rs1,int rs2,int rt)
900{
901 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
902 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
903}
904
905void emit_subs(int rs1,int rs2,int rt)
906{
907 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
908 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
909}
910
911void emit_zeroreg(int rt)
912{
913 assem_debug("mov %s,#0\n",regname[rt]);
914 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
915}
916
917void emit_loadreg(int r, int hr)
918{
919#ifdef FORCE32
920 if(r&64) {
921 printf("64bit load in 32bit mode!\n");
922 exit(1);
923 }
924#endif
925 if((r&63)==0)
926 emit_zeroreg(hr);
927 else {
928 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
929 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
930 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
931 if(r==CCREG) addr=(int)&cycle_count;
932 if(r==CSREG) addr=(int)&Status;
933 if(r==FSREG) addr=(int)&FCR31;
934 if(r==INVCP) addr=(int)&invc_ptr;
935 u_int offset = addr-(u_int)&dynarec_local;
936 assert(offset<4096);
937 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
938 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
939 }
940}
941void emit_storereg(int r, int hr)
942{
943#ifdef FORCE32
944 if(r&64) {
945 printf("64bit store in 32bit mode!\n");
946 exit(1);
947 }
948#endif
949 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
950 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
951 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
952 if(r==CCREG) addr=(int)&cycle_count;
953 if(r==FSREG) addr=(int)&FCR31;
954 u_int offset = addr-(u_int)&dynarec_local;
955 assert(offset<4096);
956 assem_debug("str %s,fp+%d\n",regname[hr],offset);
957 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
958}
959
960void emit_test(int rs, int rt)
961{
962 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
963 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
964}
965
966void emit_testimm(int rs,int imm)
967{
968 u_int armval;
969 assem_debug("tst %s,$%d\n",regname[rs],imm);
970 genimm_checked(imm,&armval);
971 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
972}
973
974void emit_testeqimm(int rs,int imm)
975{
976 u_int armval;
977 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
978 genimm_checked(imm,&armval);
979 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
980}
981
982void emit_not(int rs,int rt)
983{
984 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
985 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
986}
987
988void emit_mvnmi(int rs,int rt)
989{
990 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
991 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
992}
993
994void emit_and(u_int rs1,u_int rs2,u_int rt)
995{
996 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
997 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
998}
999
1000void emit_or(u_int rs1,u_int rs2,u_int rt)
1001{
1002 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1003 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1004}
1005void emit_or_and_set_flags(int rs1,int rs2,int rt)
1006{
1007 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1008 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1009}
1010
1011void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1012{
1013 assert(rs<16);
1014 assert(rt<16);
1015 assert(imm<32);
1016 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1017 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1018}
1019
1020void emit_xor(u_int rs1,u_int rs2,u_int rt)
1021{
1022 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1023 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1024}
1025
1026void emit_loadlp(u_int imm,u_int rt)
1027{
1028 add_literal((int)out,imm);
1029 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
1030 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
1031}
1032void emit_movw(u_int imm,u_int rt)
1033{
1034 assert(imm<65536);
1035 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
1036 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
1037}
1038void emit_movt(u_int imm,u_int rt)
1039{
1040 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
1041 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
1042}
1043void emit_movimm(u_int imm,u_int rt)
1044{
1045 u_int armval;
1046 if(genimm(imm,&armval)) {
1047 assem_debug("mov %s,#%d\n",regname[rt],imm);
1048 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1049 }else if(genimm(~imm,&armval)) {
1050 assem_debug("mvn %s,#%d\n",regname[rt],imm);
1051 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1052 }else if(imm<65536) {
1053 #ifdef ARMv5_ONLY
1054 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1055 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1056 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1057 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1058 #else
1059 emit_movw(imm,rt);
1060 #endif
1061 }else{
1062 #ifdef ARMv5_ONLY
1063 emit_loadlp(imm,rt);
1064 #else
1065 emit_movw(imm&0x0000FFFF,rt);
1066 emit_movt(imm&0xFFFF0000,rt);
1067 #endif
1068 }
1069}
1070void emit_pcreladdr(u_int rt)
1071{
1072 assem_debug("add %s,pc,#?\n",regname[rt]);
1073 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1074}
1075
1076void emit_addimm(u_int rs,int imm,u_int rt)
1077{
1078 assert(rs<16);
1079 assert(rt<16);
1080 if(imm!=0) {
1081 assert(imm>-65536&&imm<65536);
1082 u_int armval;
1083 if(genimm(imm,&armval)) {
1084 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1085 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1086 }else if(genimm(-imm,&armval)) {
1087 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1088 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1089 }else if(imm<0) {
1090 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1091 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1092 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1093 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1094 }else{
1095 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1096 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1097 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1098 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1099 }
1100 }
1101 else if(rs!=rt) emit_mov(rs,rt);
1102}
1103
1104void emit_addimm_and_set_flags(int imm,int rt)
1105{
1106 assert(imm>-65536&&imm<65536);
1107 u_int armval;
1108 if(genimm(imm,&armval)) {
1109 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1110 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1111 }else if(genimm(-imm,&armval)) {
1112 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1113 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1114 }else if(imm<0) {
1115 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1116 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1117 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1118 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1119 }else{
1120 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1121 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1122 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1123 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1124 }
1125}
1126void emit_addimm_no_flags(u_int imm,u_int rt)
1127{
1128 emit_addimm(rt,imm,rt);
1129}
1130
1131void emit_addnop(u_int r)
1132{
1133 assert(r<16);
1134 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1135 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1136}
1137
1138void emit_adcimm(u_int rs,int imm,u_int rt)
1139{
1140 u_int armval;
1141 genimm_checked(imm,&armval);
1142 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1143 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1144}
1145/*void emit_sbcimm(int imm,u_int rt)
1146{
1147 u_int armval;
1148 genimm_checked(imm,&armval);
1149 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1150 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1151}*/
1152void emit_sbbimm(int imm,u_int rt)
1153{
1154 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1155 assert(rt<8);
1156 if(imm<128&&imm>=-128) {
1157 output_byte(0x83);
1158 output_modrm(3,rt,3);
1159 output_byte(imm);
1160 }
1161 else
1162 {
1163 output_byte(0x81);
1164 output_modrm(3,rt,3);
1165 output_w32(imm);
1166 }
1167}
1168void emit_rscimm(int rs,int imm,u_int rt)
1169{
1170 assert(0);
1171 u_int armval;
1172 genimm_checked(imm,&armval);
1173 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1174 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1175}
1176
1177void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1178{
1179 // TODO: if(genimm(imm,&armval)) ...
1180 // else
1181 emit_movimm(imm,HOST_TEMPREG);
1182 emit_adds(HOST_TEMPREG,rsl,rtl);
1183 emit_adcimm(rsh,0,rth);
1184}
1185
1186void emit_sbb(int rs1,int rs2)
1187{
1188 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1189 output_byte(0x19);
1190 output_modrm(3,rs1,rs2);
1191}
1192
1193void emit_andimm(int rs,int imm,int rt)
1194{
1195 u_int armval;
1196 if(genimm(imm,&armval)) {
1197 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1198 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1199 }else if(genimm(~imm,&armval)) {
1200 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1201 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1202 }else if(imm==65535) {
1203 #ifdef ARMv5_ONLY
1204 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1205 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1206 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1207 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1208 #else
1209 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1210 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1211 #endif
1212 }else{
1213 assert(imm>0&&imm<65535);
1214 #ifdef ARMv5_ONLY
1215 assem_debug("mov r14,#%d\n",imm&0xFF00);
1216 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1217 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1218 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1219 #else
1220 emit_movw(imm,HOST_TEMPREG);
1221 #endif
1222 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1223 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1224 }
1225}
1226
1227void emit_orimm(int rs,int imm,int rt)
1228{
1229 u_int armval;
1230 if(genimm(imm,&armval)) {
1231 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1232 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1233 }else{
1234 assert(imm>0&&imm<65536);
1235 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1236 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1237 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1238 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1239 }
1240}
1241
1242void emit_xorimm(int rs,int imm,int rt)
1243{
1244 u_int armval;
1245 if(genimm(imm,&armval)) {
1246 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1247 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1248 }else{
1249 assert(imm>0&&imm<65536);
1250 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1251 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1252 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1253 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1254 }
1255}
1256
1257void emit_shlimm(int rs,u_int imm,int rt)
1258{
1259 assert(imm>0);
1260 assert(imm<32);
1261 //if(imm==1) ...
1262 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1263 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1264}
1265
1266void emit_shrimm(int rs,u_int imm,int rt)
1267{
1268 assert(imm>0);
1269 assert(imm<32);
1270 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1271 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1272}
1273
1274void emit_sarimm(int rs,u_int imm,int rt)
1275{
1276 assert(imm>0);
1277 assert(imm<32);
1278 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1279 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1280}
1281
1282void emit_rorimm(int rs,u_int imm,int rt)
1283{
1284 assert(imm>0);
1285 assert(imm<32);
1286 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1287 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1288}
1289
1290void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1291{
1292 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1293 assert(imm>0);
1294 assert(imm<32);
1295 //if(imm==1) ...
1296 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1297 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1298 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1299 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1300}
1301
1302void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1303{
1304 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1305 assert(imm>0);
1306 assert(imm<32);
1307 //if(imm==1) ...
1308 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1309 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1310 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1311 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1312}
1313
1314void emit_signextend16(int rs,int rt)
1315{
1316 #ifdef ARMv5_ONLY
1317 emit_shlimm(rs,16,rt);
1318 emit_sarimm(rt,16,rt);
1319 #else
1320 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1321 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1322 #endif
1323}
1324
1325void emit_shl(u_int rs,u_int shift,u_int rt)
1326{
1327 assert(rs<16);
1328 assert(rt<16);
1329 assert(shift<16);
1330 //if(imm==1) ...
1331 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1332 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1333}
1334void emit_shr(u_int rs,u_int shift,u_int rt)
1335{
1336 assert(rs<16);
1337 assert(rt<16);
1338 assert(shift<16);
1339 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1340 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1341}
1342void emit_sar(u_int rs,u_int shift,u_int rt)
1343{
1344 assert(rs<16);
1345 assert(rt<16);
1346 assert(shift<16);
1347 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1348 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1349}
1350void emit_shlcl(int r)
1351{
1352 assem_debug("shl %%%s,%%cl\n",regname[r]);
1353 assert(0);
1354}
1355void emit_shrcl(int r)
1356{
1357 assem_debug("shr %%%s,%%cl\n",regname[r]);
1358 assert(0);
1359}
1360void emit_sarcl(int r)
1361{
1362 assem_debug("sar %%%s,%%cl\n",regname[r]);
1363 assert(0);
1364}
1365
1366void emit_shldcl(int r1,int r2)
1367{
1368 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1369 assert(0);
1370}
1371void emit_shrdcl(int r1,int r2)
1372{
1373 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1374 assert(0);
1375}
1376void emit_orrshl(u_int rs,u_int shift,u_int rt)
1377{
1378 assert(rs<16);
1379 assert(rt<16);
1380 assert(shift<16);
1381 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1382 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1383}
1384void emit_orrshr(u_int rs,u_int shift,u_int rt)
1385{
1386 assert(rs<16);
1387 assert(rt<16);
1388 assert(shift<16);
1389 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1390 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1391}
1392
1393void emit_cmpimm(int rs,int imm)
1394{
1395 u_int armval;
1396 if(genimm(imm,&armval)) {
1397 assem_debug("cmp %s,$%d\n",regname[rs],imm);
1398 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1399 }else if(genimm(-imm,&armval)) {
1400 assem_debug("cmn %s,$%d\n",regname[rs],imm);
1401 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1402 }else if(imm>0) {
1403 assert(imm<65536);
1404 #ifdef ARMv5_ONLY
1405 emit_movimm(imm,HOST_TEMPREG);
1406 #else
1407 emit_movw(imm,HOST_TEMPREG);
1408 #endif
1409 assem_debug("cmp %s,r14\n",regname[rs]);
1410 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1411 }else{
1412 assert(imm>-65536);
1413 #ifdef ARMv5_ONLY
1414 emit_movimm(-imm,HOST_TEMPREG);
1415 #else
1416 emit_movw(-imm,HOST_TEMPREG);
1417 #endif
1418 assem_debug("cmn %s,r14\n",regname[rs]);
1419 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1420 }
1421}
1422
1423void emit_cmovne(u_int *addr,int rt)
1424{
1425 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1426 assert(0);
1427}
1428void emit_cmovl(u_int *addr,int rt)
1429{
1430 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1431 assert(0);
1432}
1433void emit_cmovs(u_int *addr,int rt)
1434{
1435 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1436 assert(0);
1437}
1438void emit_cmovne_imm(int imm,int rt)
1439{
1440 assem_debug("movne %s,#%d\n",regname[rt],imm);
1441 u_int armval;
1442 genimm_checked(imm,&armval);
1443 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1444}
1445void emit_cmovl_imm(int imm,int rt)
1446{
1447 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1448 u_int armval;
1449 genimm_checked(imm,&armval);
1450 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1451}
1452void emit_cmovb_imm(int imm,int rt)
1453{
1454 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1455 u_int armval;
1456 genimm_checked(imm,&armval);
1457 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1458}
1459void emit_cmovs_imm(int imm,int rt)
1460{
1461 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1462 u_int armval;
1463 genimm_checked(imm,&armval);
1464 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1465}
1466void emit_cmove_reg(int rs,int rt)
1467{
1468 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1469 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1470}
1471void emit_cmovne_reg(int rs,int rt)
1472{
1473 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1474 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1475}
1476void emit_cmovl_reg(int rs,int rt)
1477{
1478 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1479 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1480}
1481void emit_cmovs_reg(int rs,int rt)
1482{
1483 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1484 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1485}
1486
1487void emit_slti32(int rs,int imm,int rt)
1488{
1489 if(rs!=rt) emit_zeroreg(rt);
1490 emit_cmpimm(rs,imm);
1491 if(rs==rt) emit_movimm(0,rt);
1492 emit_cmovl_imm(1,rt);
1493}
1494void emit_sltiu32(int rs,int imm,int rt)
1495{
1496 if(rs!=rt) emit_zeroreg(rt);
1497 emit_cmpimm(rs,imm);
1498 if(rs==rt) emit_movimm(0,rt);
1499 emit_cmovb_imm(1,rt);
1500}
1501void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1502{
1503 assert(rsh!=rt);
1504 emit_slti32(rsl,imm,rt);
1505 if(imm>=0)
1506 {
1507 emit_test(rsh,rsh);
1508 emit_cmovne_imm(0,rt);
1509 emit_cmovs_imm(1,rt);
1510 }
1511 else
1512 {
1513 emit_cmpimm(rsh,-1);
1514 emit_cmovne_imm(0,rt);
1515 emit_cmovl_imm(1,rt);
1516 }
1517}
1518void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1519{
1520 assert(rsh!=rt);
1521 emit_sltiu32(rsl,imm,rt);
1522 if(imm>=0)
1523 {
1524 emit_test(rsh,rsh);
1525 emit_cmovne_imm(0,rt);
1526 }
1527 else
1528 {
1529 emit_cmpimm(rsh,-1);
1530 emit_cmovne_imm(1,rt);
1531 }
1532}
1533
1534void emit_cmp(int rs,int rt)
1535{
1536 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1537 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1538}
1539void emit_set_gz32(int rs, int rt)
1540{
1541 //assem_debug("set_gz32\n");
1542 emit_cmpimm(rs,1);
1543 emit_movimm(1,rt);
1544 emit_cmovl_imm(0,rt);
1545}
1546void emit_set_nz32(int rs, int rt)
1547{
1548 //assem_debug("set_nz32\n");
1549 if(rs!=rt) emit_movs(rs,rt);
1550 else emit_test(rs,rs);
1551 emit_cmovne_imm(1,rt);
1552}
1553void emit_set_gz64_32(int rsh, int rsl, int rt)
1554{
1555 //assem_debug("set_gz64\n");
1556 emit_set_gz32(rsl,rt);
1557 emit_test(rsh,rsh);
1558 emit_cmovne_imm(1,rt);
1559 emit_cmovs_imm(0,rt);
1560}
1561void emit_set_nz64_32(int rsh, int rsl, int rt)
1562{
1563 //assem_debug("set_nz64\n");
1564 emit_or_and_set_flags(rsh,rsl,rt);
1565 emit_cmovne_imm(1,rt);
1566}
1567void emit_set_if_less32(int rs1, int rs2, int rt)
1568{
1569 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1570 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1571 emit_cmp(rs1,rs2);
1572 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1573 emit_cmovl_imm(1,rt);
1574}
1575void emit_set_if_carry32(int rs1, int rs2, int rt)
1576{
1577 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1578 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1579 emit_cmp(rs1,rs2);
1580 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1581 emit_cmovb_imm(1,rt);
1582}
1583void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1584{
1585 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1586 assert(u1!=rt);
1587 assert(u2!=rt);
1588 emit_cmp(l1,l2);
1589 emit_movimm(0,rt);
1590 emit_sbcs(u1,u2,HOST_TEMPREG);
1591 emit_cmovl_imm(1,rt);
1592}
1593void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1594{
1595 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1596 assert(u1!=rt);
1597 assert(u2!=rt);
1598 emit_cmp(l1,l2);
1599 emit_movimm(0,rt);
1600 emit_sbcs(u1,u2,HOST_TEMPREG);
1601 emit_cmovb_imm(1,rt);
1602}
1603
1604void emit_call(int a)
1605{
1606 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1607 u_int offset=genjmp(a);
1608 output_w32(0xeb000000|offset);
1609}
1610void emit_jmp(int a)
1611{
1612 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1613 u_int offset=genjmp(a);
1614 output_w32(0xea000000|offset);
1615}
1616void emit_jne(int a)
1617{
1618 assem_debug("bne %x\n",a);
1619 u_int offset=genjmp(a);
1620 output_w32(0x1a000000|offset);
1621}
1622void emit_jeq(int a)
1623{
1624 assem_debug("beq %x\n",a);
1625 u_int offset=genjmp(a);
1626 output_w32(0x0a000000|offset);
1627}
1628void emit_js(int a)
1629{
1630 assem_debug("bmi %x\n",a);
1631 u_int offset=genjmp(a);
1632 output_w32(0x4a000000|offset);
1633}
1634void emit_jns(int a)
1635{
1636 assem_debug("bpl %x\n",a);
1637 u_int offset=genjmp(a);
1638 output_w32(0x5a000000|offset);
1639}
1640void emit_jl(int a)
1641{
1642 assem_debug("blt %x\n",a);
1643 u_int offset=genjmp(a);
1644 output_w32(0xba000000|offset);
1645}
1646void emit_jge(int a)
1647{
1648 assem_debug("bge %x\n",a);
1649 u_int offset=genjmp(a);
1650 output_w32(0xaa000000|offset);
1651}
1652void emit_jno(int a)
1653{
1654 assem_debug("bvc %x\n",a);
1655 u_int offset=genjmp(a);
1656 output_w32(0x7a000000|offset);
1657}
1658void emit_jc(int a)
1659{
1660 assem_debug("bcs %x\n",a);
1661 u_int offset=genjmp(a);
1662 output_w32(0x2a000000|offset);
1663}
1664void emit_jcc(int a)
1665{
1666 assem_debug("bcc %x\n",a);
1667 u_int offset=genjmp(a);
1668 output_w32(0x3a000000|offset);
1669}
1670
1671void emit_pushimm(int imm)
1672{
1673 assem_debug("push $%x\n",imm);
1674 assert(0);
1675}
1676void emit_pusha()
1677{
1678 assem_debug("pusha\n");
1679 assert(0);
1680}
1681void emit_popa()
1682{
1683 assem_debug("popa\n");
1684 assert(0);
1685}
1686void emit_pushreg(u_int r)
1687{
1688 assem_debug("push %%%s\n",regname[r]);
1689 assert(0);
1690}
1691void emit_popreg(u_int r)
1692{
1693 assem_debug("pop %%%s\n",regname[r]);
1694 assert(0);
1695}
1696void emit_callreg(u_int r)
1697{
1698 assem_debug("call *%%%s\n",regname[r]);
1699 assert(0);
1700}
1701void emit_jmpreg(u_int r)
1702{
1703 assem_debug("mov pc,%s\n",regname[r]);
1704 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1705}
1706
1707void emit_readword_indexed(int offset, int rs, int rt)
1708{
1709 assert(offset>-4096&&offset<4096);
1710 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1711 if(offset>=0) {
1712 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1713 }else{
1714 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1715 }
1716}
1717void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1718{
1719 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1720 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1721}
1722void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1723{
1724 if(map<0) emit_readword_indexed(addr, rs, rt);
1725 else {
1726 assert(addr==0);
1727 emit_readword_dualindexedx4(rs, map, rt);
1728 }
1729}
1730void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1731{
1732 if(map<0) {
1733 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1734 emit_readword_indexed(addr+4, rs, rl);
1735 }else{
1736 assert(rh!=rs);
1737 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1738 emit_addimm(map,1,map);
1739 emit_readword_indexed_tlb(addr, rs, map, rl);
1740 }
1741}
1742void emit_movsbl_indexed(int offset, int rs, int rt)
1743{
1744 assert(offset>-256&&offset<256);
1745 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1746 if(offset>=0) {
1747 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1748 }else{
1749 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1750 }
1751}
1752void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1753{
1754 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1755 else {
1756 if(addr==0) {
1757 emit_shlimm(map,2,map);
1758 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1759 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1760 }else{
1761 assert(addr>-256&&addr<256);
1762 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1763 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1764 emit_movsbl_indexed(addr, rt, rt);
1765 }
1766 }
1767}
1768void emit_movswl_indexed(int offset, int rs, int rt)
1769{
1770 assert(offset>-256&&offset<256);
1771 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1772 if(offset>=0) {
1773 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1774 }else{
1775 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1776 }
1777}
1778void emit_movzbl_indexed(int offset, int rs, int rt)
1779{
1780 assert(offset>-4096&&offset<4096);
1781 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1782 if(offset>=0) {
1783 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1784 }else{
1785 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1786 }
1787}
1788void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1789{
1790 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1791 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1792}
1793void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1794{
1795 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1796 else {
1797 if(addr==0) {
1798 emit_movzbl_dualindexedx4(rs, map, rt);
1799 }else{
1800 emit_addimm(rs,addr,rt);
1801 emit_movzbl_dualindexedx4(rt, map, rt);
1802 }
1803 }
1804}
1805void emit_movzwl_indexed(int offset, int rs, int rt)
1806{
1807 assert(offset>-256&&offset<256);
1808 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1809 if(offset>=0) {
1810 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1811 }else{
1812 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1813 }
1814}
1815void emit_readword(int addr, int rt)
1816{
1817 u_int offset = addr-(u_int)&dynarec_local;
1818 assert(offset<4096);
1819 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1820 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1821}
1822void emit_movsbl(int addr, int rt)
1823{
1824 u_int offset = addr-(u_int)&dynarec_local;
1825 assert(offset<256);
1826 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1827 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1828}
1829void emit_movswl(int addr, int rt)
1830{
1831 u_int offset = addr-(u_int)&dynarec_local;
1832 assert(offset<256);
1833 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1834 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1835}
1836void emit_movzbl(int addr, int rt)
1837{
1838 u_int offset = addr-(u_int)&dynarec_local;
1839 assert(offset<4096);
1840 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1841 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1842}
1843void emit_movzwl(int addr, int rt)
1844{
1845 u_int offset = addr-(u_int)&dynarec_local;
1846 assert(offset<256);
1847 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1848 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1849}
1850void emit_movzwl_reg(int rs, int rt)
1851{
1852 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1853 assert(0);
1854}
1855
1856void emit_xchg(int rs, int rt)
1857{
1858 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1859 assert(0);
1860}
1861void emit_writeword_indexed(int rt, int offset, int rs)
1862{
1863 assert(offset>-4096&&offset<4096);
1864 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1865 if(offset>=0) {
1866 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1867 }else{
1868 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1869 }
1870}
1871void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1872{
1873 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1874 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1875}
1876void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1877{
1878 if(map<0) emit_writeword_indexed(rt, addr, rs);
1879 else {
1880 assert(addr==0);
1881 emit_writeword_dualindexedx4(rt, rs, map);
1882 }
1883}
1884void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1885{
1886 if(map<0) {
1887 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1888 emit_writeword_indexed(rl, addr+4, rs);
1889 }else{
1890 assert(rh>=0);
1891 if(temp!=rs) emit_addimm(map,1,temp);
1892 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1893 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1894 else {
1895 emit_addimm(rs,4,rs);
1896 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1897 }
1898 }
1899}
1900void emit_writehword_indexed(int rt, int offset, int rs)
1901{
1902 assert(offset>-256&&offset<256);
1903 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1904 if(offset>=0) {
1905 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1906 }else{
1907 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1908 }
1909}
1910void emit_writebyte_indexed(int rt, int offset, int rs)
1911{
1912 assert(offset>-4096&&offset<4096);
1913 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1914 if(offset>=0) {
1915 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1916 }else{
1917 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1918 }
1919}
1920void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1921{
1922 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1923 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1924}
1925void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1926{
1927 if(map<0) emit_writebyte_indexed(rt, addr, rs);
1928 else {
1929 if(addr==0) {
1930 emit_writebyte_dualindexedx4(rt, rs, map);
1931 }else{
1932 emit_addimm(rs,addr,temp);
1933 emit_writebyte_dualindexedx4(rt, temp, map);
1934 }
1935 }
1936}
1937void emit_writeword(int rt, int addr)
1938{
1939 u_int offset = addr-(u_int)&dynarec_local;
1940 assert(offset<4096);
1941 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1942 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1943}
1944void emit_writehword(int rt, int addr)
1945{
1946 u_int offset = addr-(u_int)&dynarec_local;
1947 assert(offset<256);
1948 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
1949 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1950}
1951void emit_writebyte(int rt, int addr)
1952{
1953 u_int offset = addr-(u_int)&dynarec_local;
1954 assert(offset<4096);
1955 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
1956 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
1957}
1958void emit_writeword_imm(int imm, int addr)
1959{
1960 assem_debug("movl $%x,%x\n",imm,addr);
1961 assert(0);
1962}
1963void emit_writebyte_imm(int imm, int addr)
1964{
1965 assem_debug("movb $%x,%x\n",imm,addr);
1966 assert(0);
1967}
1968
1969void emit_mul(int rs)
1970{
1971 assem_debug("mul %%%s\n",regname[rs]);
1972 assert(0);
1973}
1974void emit_imul(int rs)
1975{
1976 assem_debug("imul %%%s\n",regname[rs]);
1977 assert(0);
1978}
1979void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1980{
1981 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1982 assert(rs1<16);
1983 assert(rs2<16);
1984 assert(hi<16);
1985 assert(lo<16);
1986 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1987}
1988void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1989{
1990 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1991 assert(rs1<16);
1992 assert(rs2<16);
1993 assert(hi<16);
1994 assert(lo<16);
1995 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1996}
1997
1998void emit_div(int rs)
1999{
2000 assem_debug("div %%%s\n",regname[rs]);
2001 assert(0);
2002}
2003void emit_idiv(int rs)
2004{
2005 assem_debug("idiv %%%s\n",regname[rs]);
2006 assert(0);
2007}
2008void emit_cdq()
2009{
2010 assem_debug("cdq\n");
2011 assert(0);
2012}
2013
2014void emit_clz(int rs,int rt)
2015{
2016 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2017 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2018}
2019
2020void emit_subcs(int rs1,int rs2,int rt)
2021{
2022 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2023 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2024}
2025
2026void emit_shrcc_imm(int rs,u_int imm,int rt)
2027{
2028 assert(imm>0);
2029 assert(imm<32);
2030 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2031 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2032}
2033
2034void emit_negmi(int rs, int rt)
2035{
2036 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2037 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2038}
2039
2040void emit_negsmi(int rs, int rt)
2041{
2042 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2043 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2044}
2045
2046void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2047{
2048 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2049 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2050}
2051
2052void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2053{
2054 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2055 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2056}
2057
2058void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2059{
2060 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2061 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2062}
2063
2064void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2065{
2066 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2067 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2068}
2069
2070void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2071{
2072 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2073 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2074}
2075
2076void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2077{
2078 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2079 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2080}
2081
2082void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2083{
2084 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2085 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2086}
2087
2088void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2089{
2090 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2091 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2092}
2093
2094void emit_teq(int rs, int rt)
2095{
2096 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2097 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2098}
2099
2100void emit_rsbimm(int rs, int imm, int rt)
2101{
2102 u_int armval;
2103 genimm_checked(imm,&armval);
2104 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2105 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2106}
2107
2108// Load 2 immediates optimizing for small code size
2109void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2110{
2111 emit_movimm(imm1,rt1);
2112 u_int armval;
2113 if(genimm(imm2-imm1,&armval)) {
2114 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2115 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2116 }else if(genimm(imm1-imm2,&armval)) {
2117 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2118 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2119 }
2120 else emit_movimm(imm2,rt2);
2121}
2122
2123// Conditionally select one of two immediates, optimizing for small code size
2124// This will only be called if HAVE_CMOV_IMM is defined
2125void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2126{
2127 u_int armval;
2128 if(genimm(imm2-imm1,&armval)) {
2129 emit_movimm(imm1,rt);
2130 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2131 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2132 }else if(genimm(imm1-imm2,&armval)) {
2133 emit_movimm(imm1,rt);
2134 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2135 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2136 }
2137 else {
2138 #ifdef ARMv5_ONLY
2139 emit_movimm(imm1,rt);
2140 add_literal((int)out,imm2);
2141 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2142 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2143 #else
2144 emit_movw(imm1&0x0000FFFF,rt);
2145 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2146 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2147 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2148 }
2149 emit_movt(imm1&0xFFFF0000,rt);
2150 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2151 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2152 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2153 }
2154 #endif
2155 }
2156}
2157
2158// special case for checking invalid_code
2159void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2160{
2161 assert(0);
2162}
2163
2164// special case for checking invalid_code
2165void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2166{
2167 assert(imm<128&&imm>=0);
2168 assert(r>=0&&r<16);
2169 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2170 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2171 emit_cmpimm(HOST_TEMPREG,imm);
2172}
2173
2174// special case for tlb mapping
2175void emit_addsr12(int rs1,int rs2,int rt)
2176{
2177 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2178 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2179}
2180
2181// Used to preload hash table entries
2182void emit_prefetch(void *addr)
2183{
2184 assem_debug("prefetch %x\n",(int)addr);
2185 output_byte(0x0F);
2186 output_byte(0x18);
2187 output_modrm(0,5,1);
2188 output_w32((int)addr);
2189}
2190void emit_prefetchreg(int r)
2191{
2192 assem_debug("pld %s\n",regname[r]);
2193 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2194}
2195
2196// Special case for mini_ht
2197void emit_ldreq_indexed(int rs, u_int offset, int rt)
2198{
2199 assert(offset<4096);
2200 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2201 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2202}
2203
2204void emit_flds(int r,int sr)
2205{
2206 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2207 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2208}
2209
2210void emit_vldr(int r,int vr)
2211{
2212 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2213 output_w32(0xed900b00|(vr<<12)|(r<<16));
2214}
2215
2216void emit_fsts(int sr,int r)
2217{
2218 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2219 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2220}
2221
2222void emit_vstr(int vr,int r)
2223{
2224 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2225 output_w32(0xed800b00|(vr<<12)|(r<<16));
2226}
2227
2228void emit_ftosizs(int s,int d)
2229{
2230 assem_debug("ftosizs s%d,s%d\n",d,s);
2231 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2232}
2233
2234void emit_ftosizd(int s,int d)
2235{
2236 assem_debug("ftosizd s%d,d%d\n",d,s);
2237 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2238}
2239
2240void emit_fsitos(int s,int d)
2241{
2242 assem_debug("fsitos s%d,s%d\n",d,s);
2243 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2244}
2245
2246void emit_fsitod(int s,int d)
2247{
2248 assem_debug("fsitod d%d,s%d\n",d,s);
2249 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2250}
2251
2252void emit_fcvtds(int s,int d)
2253{
2254 assem_debug("fcvtds d%d,s%d\n",d,s);
2255 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2256}
2257
2258void emit_fcvtsd(int s,int d)
2259{
2260 assem_debug("fcvtsd s%d,d%d\n",d,s);
2261 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2262}
2263
2264void emit_fsqrts(int s,int d)
2265{
2266 assem_debug("fsqrts d%d,s%d\n",d,s);
2267 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2268}
2269
2270void emit_fsqrtd(int s,int d)
2271{
2272 assem_debug("fsqrtd s%d,d%d\n",d,s);
2273 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2274}
2275
2276void emit_fabss(int s,int d)
2277{
2278 assem_debug("fabss d%d,s%d\n",d,s);
2279 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2280}
2281
2282void emit_fabsd(int s,int d)
2283{
2284 assem_debug("fabsd s%d,d%d\n",d,s);
2285 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2286}
2287
2288void emit_fnegs(int s,int d)
2289{
2290 assem_debug("fnegs d%d,s%d\n",d,s);
2291 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2292}
2293
2294void emit_fnegd(int s,int d)
2295{
2296 assem_debug("fnegd s%d,d%d\n",d,s);
2297 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2298}
2299
2300void emit_fadds(int s1,int s2,int d)
2301{
2302 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2303 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2304}
2305
2306void emit_faddd(int s1,int s2,int d)
2307{
2308 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2309 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2310}
2311
2312void emit_fsubs(int s1,int s2,int d)
2313{
2314 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2315 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2316}
2317
2318void emit_fsubd(int s1,int s2,int d)
2319{
2320 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2321 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2322}
2323
2324void emit_fmuls(int s1,int s2,int d)
2325{
2326 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2327 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2328}
2329
2330void emit_fmuld(int s1,int s2,int d)
2331{
2332 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2333 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2334}
2335
2336void emit_fdivs(int s1,int s2,int d)
2337{
2338 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2339 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2340}
2341
2342void emit_fdivd(int s1,int s2,int d)
2343{
2344 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2345 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2346}
2347
2348void emit_fcmps(int x,int y)
2349{
2350 assem_debug("fcmps s14, s15\n");
2351 output_w32(0xeeb47a67);
2352}
2353
2354void emit_fcmpd(int x,int y)
2355{
2356 assem_debug("fcmpd d6, d7\n");
2357 output_w32(0xeeb46b47);
2358}
2359
2360void emit_fmstat()
2361{
2362 assem_debug("fmstat\n");
2363 output_w32(0xeef1fa10);
2364}
2365
2366void emit_bicne_imm(int rs,int imm,int rt)
2367{
2368 u_int armval;
2369 genimm_checked(imm,&armval);
2370 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2371 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2372}
2373
2374void emit_biccs_imm(int rs,int imm,int rt)
2375{
2376 u_int armval;
2377 genimm_checked(imm,&armval);
2378 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2379 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2380}
2381
2382void emit_bicvc_imm(int rs,int imm,int rt)
2383{
2384 u_int armval;
2385 genimm_checked(imm,&armval);
2386 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2387 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2388}
2389
2390void emit_bichi_imm(int rs,int imm,int rt)
2391{
2392 u_int armval;
2393 genimm_checked(imm,&armval);
2394 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2395 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2396}
2397
2398void emit_orrvs_imm(int rs,int imm,int rt)
2399{
2400 u_int armval;
2401 genimm_checked(imm,&armval);
2402 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2403 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2404}
2405
2406void emit_orrne_imm(int rs,int imm,int rt)
2407{
2408 u_int armval;
2409 genimm_checked(imm,&armval);
2410 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2411 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2412}
2413
2414void emit_andne_imm(int rs,int imm,int rt)
2415{
2416 u_int armval;
2417 genimm_checked(imm,&armval);
2418 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2419 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2420}
2421
2422void emit_jno_unlikely(int a)
2423{
2424 //emit_jno(a);
2425 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2426 output_w32(0x72800000|rd_rn_rm(15,15,0));
2427}
2428
2429// Save registers before function call
2430void save_regs(u_int reglist)
2431{
2432 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2433 if(!reglist) return;
2434 assem_debug("stmia fp,{");
2435 if(reglist&1) assem_debug("r0, ");
2436 if(reglist&2) assem_debug("r1, ");
2437 if(reglist&4) assem_debug("r2, ");
2438 if(reglist&8) assem_debug("r3, ");
2439 if(reglist&0x1000) assem_debug("r12");
2440 assem_debug("}\n");
2441 output_w32(0xe88b0000|reglist);
2442}
2443// Restore registers after function call
2444void restore_regs(u_int reglist)
2445{
2446 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2447 if(!reglist) return;
2448 assem_debug("ldmia fp,{");
2449 if(reglist&1) assem_debug("r0, ");
2450 if(reglist&2) assem_debug("r1, ");
2451 if(reglist&4) assem_debug("r2, ");
2452 if(reglist&8) assem_debug("r3, ");
2453 if(reglist&0x1000) assem_debug("r12");
2454 assem_debug("}\n");
2455 output_w32(0xe89b0000|reglist);
2456}
2457
2458// Write back consts using r14 so we don't disturb the other registers
2459void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2460{
2461 int hr;
2462 for(hr=0;hr<HOST_REGS;hr++) {
2463 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2464 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2465 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2466 int value=constmap[i][hr];
2467 if(value==0) {
2468 emit_zeroreg(HOST_TEMPREG);
2469 }
2470 else {
2471 emit_movimm(value,HOST_TEMPREG);
2472 }
2473 emit_storereg(i_regmap[hr],HOST_TEMPREG);
2474#ifndef FORCE32
2475 if((i_is32>>i_regmap[hr])&1) {
2476 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2477 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2478 }
2479#endif
2480 }
2481 }
2482 }
2483 }
2484}
2485
2486/* Stubs/epilogue */
2487
2488void literal_pool(int n)
2489{
2490 if(!literalcount) return;
2491 if(n) {
2492 if((int)out-literals[0][0]<4096-n) return;
2493 }
2494 u_int *ptr;
2495 int i;
2496 for(i=0;i<literalcount;i++)
2497 {
2498 ptr=(u_int *)literals[i][0];
2499 u_int offset=(u_int)out-(u_int)ptr-8;
2500 assert(offset<4096);
2501 assert(!(offset&3));
2502 *ptr|=offset;
2503 output_w32(literals[i][1]);
2504 }
2505 literalcount=0;
2506}
2507
2508void literal_pool_jumpover(int n)
2509{
2510 if(!literalcount) return;
2511 if(n) {
2512 if((int)out-literals[0][0]<4096-n) return;
2513 }
2514 int jaddr=(int)out;
2515 emit_jmp(0);
2516 literal_pool(0);
2517 set_jump_target(jaddr,(int)out);
2518}
2519
2520emit_extjump2(int addr, int target, int linker)
2521{
2522 u_char *ptr=(u_char *)addr;
2523 assert((ptr[3]&0x0e)==0xa);
2524 emit_loadlp(target,0);
2525 emit_loadlp(addr,1);
2526 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
2527 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2528//DEBUG >
2529#ifdef DEBUG_CYCLE_COUNT
2530 emit_readword((int)&last_count,ECX);
2531 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2532 emit_readword((int)&next_interupt,ECX);
2533 emit_writeword(HOST_CCREG,(int)&Count);
2534 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2535 emit_writeword(ECX,(int)&last_count);
2536#endif
2537//DEBUG <
2538 emit_jmp(linker);
2539}
2540
2541emit_extjump(int addr, int target)
2542{
2543 emit_extjump2(addr, target, (int)dyna_linker);
2544}
2545emit_extjump_ds(int addr, int target)
2546{
2547 emit_extjump2(addr, target, (int)dyna_linker_ds);
2548}
2549
2550do_readstub(int n)
2551{
2552 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2553 literal_pool(256);
2554 set_jump_target(stubs[n][1],(int)out);
2555 int type=stubs[n][0];
2556 int i=stubs[n][3];
2557 int rs=stubs[n][4];
2558 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2559 u_int reglist=stubs[n][7];
2560 signed char *i_regmap=i_regs->regmap;
2561 int addr=get_reg(i_regmap,AGEN1+(i&1));
2562 int rth,rt;
2563 int ds;
2564 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
2565 rth=get_reg(i_regmap,FTEMP|64);
2566 rt=get_reg(i_regmap,FTEMP);
2567 }else{
2568 rth=get_reg(i_regmap,rt1[i]|64);
2569 rt=get_reg(i_regmap,rt1[i]);
2570 }
2571 assert(rs>=0);
2572 if(addr<0) addr=rt;
2573 if(addr<0)
2574 // assume dummy read, no alloced reg
2575 addr=get_reg(i_regmap,-1);
2576 assert(addr>=0);
2577 int ftable=0;
2578 if(type==LOADB_STUB||type==LOADBU_STUB)
2579 ftable=(int)readmemb;
2580 if(type==LOADH_STUB||type==LOADHU_STUB)
2581 ftable=(int)readmemh;
2582 if(type==LOADW_STUB)
2583 ftable=(int)readmem;
2584#ifndef FORCE32
2585 if(type==LOADD_STUB)
2586 ftable=(int)readmemd;
2587#endif
2588 assert(ftable!=0);
2589 emit_writeword(rs,(int)&address);
2590 //emit_pusha();
2591 save_regs(reglist);
2592 ds=i_regs!=&regs[i];
2593 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2594 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2595 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2596 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2597 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2598 emit_shrimm(rs,16,1);
2599 int cc=get_reg(i_regmap,CCREG);
2600 if(cc<0) {
2601 emit_loadreg(CCREG,2);
2602 }
2603 emit_movimm(ftable,0);
2604 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2605 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2606 //emit_readword((int)&last_count,temp);
2607 //emit_add(cc,temp,cc);
2608 //emit_writeword(cc,(int)&Count);
2609 //emit_mov(15,14);
2610 emit_call((int)&indirect_jump_indexed);
2611 //emit_callreg(rs);
2612 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
2613 // We really shouldn't need to update the count here,
2614 // but not doing so causes random crashes...
2615 emit_readword((int)&Count,HOST_TEMPREG);
2616 emit_readword((int)&next_interupt,2);
2617 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2618 emit_writeword(2,(int)&last_count);
2619 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2620 if(cc<0) {
2621 emit_storereg(CCREG,HOST_TEMPREG);
2622 }
2623 //emit_popa();
2624 restore_regs(reglist);
2625 //if((cc=get_reg(regmap,CCREG))>=0) {
2626 // emit_loadreg(CCREG,cc);
2627 //}
2628 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2629 assert(rt>=0);
2630 if(type==LOADB_STUB)
2631 emit_movsbl((int)&readmem_dword,rt);
2632 if(type==LOADBU_STUB)
2633 emit_movzbl((int)&readmem_dword,rt);
2634 if(type==LOADH_STUB)
2635 emit_movswl((int)&readmem_dword,rt);
2636 if(type==LOADHU_STUB)
2637 emit_movzwl((int)&readmem_dword,rt);
2638 if(type==LOADW_STUB)
2639 emit_readword((int)&readmem_dword,rt);
2640 if(type==LOADD_STUB) {
2641 emit_readword((int)&readmem_dword,rt);
2642 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2643 }
2644 }
2645 emit_jmp(stubs[n][2]); // return address
2646}
2647
2648inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2649{
2650 int rs=get_reg(regmap,target);
2651 int rth=get_reg(regmap,target|64);
2652 int rt=get_reg(regmap,target);
2653 assert(rs>=0);
2654 assert(rt>=0);
2655 int ftable=0;
2656 if(type==LOADB_STUB||type==LOADBU_STUB)
2657 ftable=(int)readmemb;
2658 if(type==LOADH_STUB||type==LOADHU_STUB)
2659 ftable=(int)readmemh;
2660 if(type==LOADW_STUB)
2661 ftable=(int)readmem;
2662#ifndef FORCE32
2663 if(type==LOADD_STUB)
2664 ftable=(int)readmemd;
2665#endif
2666 assert(ftable!=0);
2667 emit_writeword(rs,(int)&address);
2668 //emit_pusha();
2669 save_regs(reglist);
2670 //emit_shrimm(rs,16,1);
2671 int cc=get_reg(regmap,CCREG);
2672 if(cc<0) {
2673 emit_loadreg(CCREG,2);
2674 }
2675 //emit_movimm(ftable,0);
2676 emit_movimm(((u_int *)ftable)[addr>>16],0);
2677 //emit_readword((int)&last_count,12);
2678 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2679 if((signed int)addr>=(signed int)0xC0000000) {
2680 // Pagefault address
2681 int ds=regmap!=regs[i].regmap;
2682 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2683 }
2684 //emit_add(12,2,2);
2685 //emit_writeword(2,(int)&Count);
2686 //emit_call(((u_int *)ftable)[addr>>16]);
2687 emit_call((int)&indirect_jump);
2688 // We really shouldn't need to update the count here,
2689 // but not doing so causes random crashes...
2690 emit_readword((int)&Count,HOST_TEMPREG);
2691 emit_readword((int)&next_interupt,2);
2692 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2693 emit_writeword(2,(int)&last_count);
2694 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2695 if(cc<0) {
2696 emit_storereg(CCREG,HOST_TEMPREG);
2697 }
2698 //emit_popa();
2699 restore_regs(reglist);
2700 if(type==LOADB_STUB)
2701 emit_movsbl((int)&readmem_dword,rt);
2702 if(type==LOADBU_STUB)
2703 emit_movzbl((int)&readmem_dword,rt);
2704 if(type==LOADH_STUB)
2705 emit_movswl((int)&readmem_dword,rt);
2706 if(type==LOADHU_STUB)
2707 emit_movzwl((int)&readmem_dword,rt);
2708 if(type==LOADW_STUB)
2709 emit_readword((int)&readmem_dword,rt);
2710 if(type==LOADD_STUB) {
2711 emit_readword((int)&readmem_dword,rt);
2712 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2713 }
2714}
2715
2716do_writestub(int n)
2717{
2718 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2719 literal_pool(256);
2720 set_jump_target(stubs[n][1],(int)out);
2721 int type=stubs[n][0];
2722 int i=stubs[n][3];
2723 int rs=stubs[n][4];
2724 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2725 u_int reglist=stubs[n][7];
2726 signed char *i_regmap=i_regs->regmap;
2727 int addr=get_reg(i_regmap,AGEN1+(i&1));
2728 int rth,rt,r;
2729 int ds;
2730 if(itype[i]==C1LS||itype[i]==C2LS) {
2731 rth=get_reg(i_regmap,FTEMP|64);
2732 rt=get_reg(i_regmap,r=FTEMP);
2733 }else{
2734 rth=get_reg(i_regmap,rs2[i]|64);
2735 rt=get_reg(i_regmap,r=rs2[i]);
2736 }
2737 assert(rs>=0);
2738 assert(rt>=0);
2739 if(addr<0) addr=get_reg(i_regmap,-1);
2740 assert(addr>=0);
2741 int ftable=0;
2742 if(type==STOREB_STUB)
2743 ftable=(int)writememb;
2744 if(type==STOREH_STUB)
2745 ftable=(int)writememh;
2746 if(type==STOREW_STUB)
2747 ftable=(int)writemem;
2748#ifndef FORCE32
2749 if(type==STORED_STUB)
2750 ftable=(int)writememd;
2751#endif
2752 assert(ftable!=0);
2753 emit_writeword(rs,(int)&address);
2754 //emit_shrimm(rs,16,rs);
2755 //emit_movmem_indexedx4(ftable,rs,rs);
2756 if(type==STOREB_STUB)
2757 emit_writebyte(rt,(int)&byte);
2758 if(type==STOREH_STUB)
2759 emit_writehword(rt,(int)&hword);
2760 if(type==STOREW_STUB)
2761 emit_writeword(rt,(int)&word);
2762 if(type==STORED_STUB) {
2763#ifndef FORCE32
2764 emit_writeword(rt,(int)&dword);
2765 emit_writeword(r?rth:rt,(int)&dword+4);
2766#else
2767 printf("STORED_STUB\n");
2768#endif
2769 }
2770 //emit_pusha();
2771 save_regs(reglist);
2772 ds=i_regs!=&regs[i];
2773 int real_rs=get_reg(i_regmap,rs1[i]);
2774 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2775 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2776 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2777 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2778 emit_shrimm(rs,16,1);
2779 int cc=get_reg(i_regmap,CCREG);
2780 if(cc<0) {
2781 emit_loadreg(CCREG,2);
2782 }
2783 emit_movimm(ftable,0);
2784 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2785 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2786 //emit_readword((int)&last_count,temp);
2787 //emit_addimm(cc,2*stubs[n][5]+2,cc);
2788 //emit_add(cc,temp,cc);
2789 //emit_writeword(cc,(int)&Count);
2790 emit_call((int)&indirect_jump_indexed);
2791 //emit_callreg(rs);
2792 emit_readword((int)&Count,HOST_TEMPREG);
2793 emit_readword((int)&next_interupt,2);
2794 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2795 emit_writeword(2,(int)&last_count);
2796 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2797 if(cc<0) {
2798 emit_storereg(CCREG,HOST_TEMPREG);
2799 }
2800 //emit_popa();
2801 restore_regs(reglist);
2802 //if((cc=get_reg(regmap,CCREG))>=0) {
2803 // emit_loadreg(CCREG,cc);
2804 //}
2805 emit_jmp(stubs[n][2]); // return address
2806}
2807
2808inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2809{
2810 int rs=get_reg(regmap,-1);
2811 int rth=get_reg(regmap,target|64);
2812 int rt=get_reg(regmap,target);
2813 assert(rs>=0);
2814 assert(rt>=0);
2815 int ftable=0;
2816 if(type==STOREB_STUB)
2817 ftable=(int)writememb;
2818 if(type==STOREH_STUB)
2819 ftable=(int)writememh;
2820 if(type==STOREW_STUB)
2821 ftable=(int)writemem;
2822#ifndef FORCE32
2823 if(type==STORED_STUB)
2824 ftable=(int)writememd;
2825#endif
2826 assert(ftable!=0);
2827 emit_writeword(rs,(int)&address);
2828 //emit_shrimm(rs,16,rs);
2829 //emit_movmem_indexedx4(ftable,rs,rs);
2830 if(type==STOREB_STUB)
2831 emit_writebyte(rt,(int)&byte);
2832 if(type==STOREH_STUB)
2833 emit_writehword(rt,(int)&hword);
2834 if(type==STOREW_STUB)
2835 emit_writeword(rt,(int)&word);
2836 if(type==STORED_STUB) {
2837#ifndef FORCE32
2838 emit_writeword(rt,(int)&dword);
2839 emit_writeword(target?rth:rt,(int)&dword+4);
2840#else
2841 printf("STORED_STUB\n");
2842#endif
2843 }
2844 //emit_pusha();
2845 save_regs(reglist);
2846 //emit_shrimm(rs,16,1);
2847 int cc=get_reg(regmap,CCREG);
2848 if(cc<0) {
2849 emit_loadreg(CCREG,2);
2850 }
2851 //emit_movimm(ftable,0);
2852 emit_movimm(((u_int *)ftable)[addr>>16],0);
2853 //emit_readword((int)&last_count,12);
2854 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2855 if((signed int)addr>=(signed int)0xC0000000) {
2856 // Pagefault address
2857 int ds=regmap!=regs[i].regmap;
2858 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2859 }
2860 //emit_add(12,2,2);
2861 //emit_writeword(2,(int)&Count);
2862 //emit_call(((u_int *)ftable)[addr>>16]);
2863 emit_call((int)&indirect_jump);
2864 emit_readword((int)&Count,HOST_TEMPREG);
2865 emit_readword((int)&next_interupt,2);
2866 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2867 emit_writeword(2,(int)&last_count);
2868 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2869 if(cc<0) {
2870 emit_storereg(CCREG,HOST_TEMPREG);
2871 }
2872 //emit_popa();
2873 restore_regs(reglist);
2874}
2875
2876do_unalignedwritestub(int n)
2877{
2878 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
2879 literal_pool(256);
2880 set_jump_target(stubs[n][1],(int)out);
2881
2882 int i=stubs[n][3];
2883 struct regstat *i_regs=(struct regstat *)stubs[n][4];
2884 int addr=stubs[n][5];
2885 u_int reglist=stubs[n][7];
2886 signed char *i_regmap=i_regs->regmap;
2887 int temp2=get_reg(i_regmap,FTEMP);
2888 int rt;
2889 int ds, real_rs;
2890 rt=get_reg(i_regmap,rs2[i]);
2891 assert(rt>=0);
2892 assert(addr>=0);
2893 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2894 reglist|=(1<<addr);
2895 reglist&=~(1<<temp2);
2896
2897 emit_andimm(addr,0xfffffffc,temp2);
2898 emit_writeword(temp2,(int)&address);
2899
2900 save_regs(reglist);
2901 ds=i_regs!=&regs[i];
2902 real_rs=get_reg(i_regmap,rs1[i]);
2903 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2904 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2905 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2906 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2907 emit_shrimm(addr,16,1);
2908 int cc=get_reg(i_regmap,CCREG);
2909 if(cc<0) {
2910 emit_loadreg(CCREG,2);
2911 }
2912 emit_movimm((u_int)readmem,0);
2913 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2914 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3); // XXX: can be rm'd?
2915 emit_call((int)&indirect_jump_indexed);
2916 restore_regs(reglist);
2917
2918 emit_readword((int)&readmem_dword,temp2);
2919 int temp=addr; //hmh
2920 emit_shlimm(addr,3,temp);
2921 emit_andimm(temp,24,temp);
2922#ifdef BIG_ENDIAN_MIPS
2923 if (opcode[i]==0x2e) // SWR
2924#else
2925 if (opcode[i]==0x2a) // SWL
2926#endif
2927 emit_xorimm(temp,24,temp);
2928 emit_movimm(-1,HOST_TEMPREG);
2929 if (opcode[i]==0x2a) { // SWL
2930 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2931 emit_orrshr(rt,temp,temp2);
2932 }else{
2933 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2934 emit_orrshl(rt,temp,temp2);
2935 }
2936 emit_readword((int)&address,addr);
2937 emit_writeword(temp2,(int)&word);
2938 //save_regs(reglist); // don't need to, no state changes
2939 emit_shrimm(addr,16,1);
2940 emit_movimm((u_int)writemem,0);
2941 //emit_call((int)&indirect_jump_indexed);
2942 emit_mov(15,14);
2943 emit_readword_dualindexedx4(0,1,15);
2944 emit_readword((int)&Count,HOST_TEMPREG);
2945 emit_readword((int)&next_interupt,2);
2946 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2947 emit_writeword(2,(int)&last_count);
2948 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2949 if(cc<0) {
2950 emit_storereg(CCREG,HOST_TEMPREG);
2951 }
2952 restore_regs(reglist);
2953 emit_jmp(stubs[n][2]); // return address
2954}
2955
2956void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
2957{
2958 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
2959}
2960
2961do_invstub(int n)
2962{
2963 literal_pool(20);
2964 u_int reglist=stubs[n][3];
2965 set_jump_target(stubs[n][1],(int)out);
2966 save_regs(reglist);
2967 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2968 emit_call((int)&invalidate_addr);
2969 restore_regs(reglist);
2970 emit_jmp(stubs[n][2]); // return address
2971}
2972
2973int do_dirty_stub(int i)
2974{
2975 assem_debug("do_dirty_stub %x\n",start+i*4);
2976 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
2977 #ifdef PCSX
2978 addr=(u_int)source;
2979 #endif
2980 // Careful about the code output here, verify_dirty needs to parse it.
2981 #ifdef ARMv5_ONLY
2982 emit_loadlp(addr,1);
2983 emit_loadlp((int)copy,2);
2984 emit_loadlp(slen*4,3);
2985 #else
2986 emit_movw(addr&0x0000FFFF,1);
2987 emit_movw(((u_int)copy)&0x0000FFFF,2);
2988 emit_movt(addr&0xFFFF0000,1);
2989 emit_movt(((u_int)copy)&0xFFFF0000,2);
2990 emit_movw(slen*4,3);
2991 #endif
2992 emit_movimm(start+i*4,0);
2993 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
2994 int entry=(int)out;
2995 load_regs_entry(i);
2996 if(entry==(int)out) entry=instr_addr[i];
2997 emit_jmp(instr_addr[i]);
2998 return entry;
2999}
3000
3001void do_dirty_stub_ds()
3002{
3003 // Careful about the code output here, verify_dirty needs to parse it.
3004 #ifdef ARMv5_ONLY
3005 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3006 emit_loadlp((int)copy,2);
3007 emit_loadlp(slen*4,3);
3008 #else
3009 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3010 emit_movw(((u_int)copy)&0x0000FFFF,2);
3011 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3012 emit_movt(((u_int)copy)&0xFFFF0000,2);
3013 emit_movw(slen*4,3);
3014 #endif
3015 emit_movimm(start+1,0);
3016 emit_call((int)&verify_code_ds);
3017}
3018
3019do_cop1stub(int n)
3020{
3021 literal_pool(256);
3022 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3023 set_jump_target(stubs[n][1],(int)out);
3024 int i=stubs[n][3];
3025// int rs=stubs[n][4];
3026 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3027 int ds=stubs[n][6];
3028 if(!ds) {
3029 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3030 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3031 }
3032 //else {printf("fp exception in delay slot\n");}
3033 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3034 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3035 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3036 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3037 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3038}
3039
3040/* TLB */
3041
3042int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3043{
3044 if(c) {
3045 if((signed int)addr>=(signed int)0xC0000000) {
3046 // address_generation already loaded the const
3047 emit_readword_dualindexedx4(FP,map,map);
3048 }
3049 else
3050 return -1; // No mapping
3051 }
3052 else {
3053 assert(s!=map);
3054 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3055 emit_addsr12(map,s,map);
3056 // Schedule this while we wait on the load
3057 //if(x) emit_xorimm(s,x,ar);
3058 if(shift>=0) emit_shlimm(s,3,shift);
3059 if(~a) emit_andimm(s,a,ar);
3060 emit_readword_dualindexedx4(FP,map,map);
3061 }
3062 return map;
3063}
3064int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3065{
3066 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3067 emit_test(map,map);
3068 *jaddr=(int)out;
3069 emit_js(0);
3070 }
3071 return map;
3072}
3073
3074int gen_tlb_addr_r(int ar, int map) {
3075 if(map>=0) {
3076 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3077 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3078 }
3079}
3080
3081int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3082{
3083 if(c) {
3084 if(addr<0x80800000||addr>=0xC0000000) {
3085 // address_generation already loaded the const
3086 emit_readword_dualindexedx4(FP,map,map);
3087 }
3088 else
3089 return -1; // No mapping
3090 }
3091 else {
3092 assert(s!=map);
3093 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3094 emit_addsr12(map,s,map);
3095 // Schedule this while we wait on the load
3096 //if(x) emit_xorimm(s,x,ar);
3097 emit_readword_dualindexedx4(FP,map,map);
3098 }
3099 return map;
3100}
3101int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3102{
3103 if(!c||addr<0x80800000||addr>=0xC0000000) {
3104 emit_testimm(map,0x40000000);
3105 *jaddr=(int)out;
3106 emit_jne(0);
3107 }
3108}
3109
3110int gen_tlb_addr_w(int ar, int map) {
3111 if(map>=0) {
3112 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3113 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3114 }
3115}
3116
3117// Generate the address of the memory_map entry, relative to dynarec_local
3118generate_map_const(u_int addr,int reg) {
3119 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3120 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3121}
3122
3123/* Special assem */
3124
3125void shift_assemble_arm(int i,struct regstat *i_regs)
3126{
3127 if(rt1[i]) {
3128 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3129 {
3130 signed char s,t,shift;
3131 t=get_reg(i_regs->regmap,rt1[i]);
3132 s=get_reg(i_regs->regmap,rs1[i]);
3133 shift=get_reg(i_regs->regmap,rs2[i]);
3134 if(t>=0){
3135 if(rs1[i]==0)
3136 {
3137 emit_zeroreg(t);
3138 }
3139 else if(rs2[i]==0)
3140 {
3141 assert(s>=0);
3142 if(s!=t) emit_mov(s,t);
3143 }
3144 else
3145 {
3146 emit_andimm(shift,31,HOST_TEMPREG);
3147 if(opcode2[i]==4) // SLLV
3148 {
3149 emit_shl(s,HOST_TEMPREG,t);
3150 }
3151 if(opcode2[i]==6) // SRLV
3152 {
3153 emit_shr(s,HOST_TEMPREG,t);
3154 }
3155 if(opcode2[i]==7) // SRAV
3156 {
3157 emit_sar(s,HOST_TEMPREG,t);
3158 }
3159 }
3160 }
3161 } else { // DSLLV/DSRLV/DSRAV
3162 signed char sh,sl,th,tl,shift;
3163 th=get_reg(i_regs->regmap,rt1[i]|64);
3164 tl=get_reg(i_regs->regmap,rt1[i]);
3165 sh=get_reg(i_regs->regmap,rs1[i]|64);
3166 sl=get_reg(i_regs->regmap,rs1[i]);
3167 shift=get_reg(i_regs->regmap,rs2[i]);
3168 if(tl>=0){
3169 if(rs1[i]==0)
3170 {
3171 emit_zeroreg(tl);
3172 if(th>=0) emit_zeroreg(th);
3173 }
3174 else if(rs2[i]==0)
3175 {
3176 assert(sl>=0);
3177 if(sl!=tl) emit_mov(sl,tl);
3178 if(th>=0&&sh!=th) emit_mov(sh,th);
3179 }
3180 else
3181 {
3182 // FIXME: What if shift==tl ?
3183 assert(shift!=tl);
3184 int temp=get_reg(i_regs->regmap,-1);
3185 int real_th=th;
3186 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3187 assert(sl>=0);
3188 assert(sh>=0);
3189 emit_andimm(shift,31,HOST_TEMPREG);
3190 if(opcode2[i]==0x14) // DSLLV
3191 {
3192 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3193 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3194 emit_orrshr(sl,HOST_TEMPREG,th);
3195 emit_andimm(shift,31,HOST_TEMPREG);
3196 emit_testimm(shift,32);
3197 emit_shl(sl,HOST_TEMPREG,tl);
3198 if(th>=0) emit_cmovne_reg(tl,th);
3199 emit_cmovne_imm(0,tl);
3200 }
3201 if(opcode2[i]==0x16) // DSRLV
3202 {
3203 assert(th>=0);
3204 emit_shr(sl,HOST_TEMPREG,tl);
3205 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3206 emit_orrshl(sh,HOST_TEMPREG,tl);
3207 emit_andimm(shift,31,HOST_TEMPREG);
3208 emit_testimm(shift,32);
3209 emit_shr(sh,HOST_TEMPREG,th);
3210 emit_cmovne_reg(th,tl);
3211 if(real_th>=0) emit_cmovne_imm(0,th);
3212 }
3213 if(opcode2[i]==0x17) // DSRAV
3214 {
3215 assert(th>=0);
3216 emit_shr(sl,HOST_TEMPREG,tl);
3217 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3218 if(real_th>=0) {
3219 assert(temp>=0);
3220 emit_sarimm(th,31,temp);
3221 }
3222 emit_orrshl(sh,HOST_TEMPREG,tl);
3223 emit_andimm(shift,31,HOST_TEMPREG);
3224 emit_testimm(shift,32);
3225 emit_sar(sh,HOST_TEMPREG,th);
3226 emit_cmovne_reg(th,tl);
3227 if(real_th>=0) emit_cmovne_reg(temp,th);
3228 }
3229 }
3230 }
3231 }
3232 }
3233}
3234#define shift_assemble shift_assemble_arm
3235
3236void loadlr_assemble_arm(int i,struct regstat *i_regs)
3237{
3238 int s,th,tl,temp,temp2,addr,map=-1;
3239 int offset;
3240 int jaddr=0;
3241 int memtarget,c=0;
3242 u_int hr,reglist=0;
3243 th=get_reg(i_regs->regmap,rt1[i]|64);
3244 tl=get_reg(i_regs->regmap,rt1[i]);
3245 s=get_reg(i_regs->regmap,rs1[i]);
3246 temp=get_reg(i_regs->regmap,-1);
3247 temp2=get_reg(i_regs->regmap,FTEMP);
3248 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3249 assert(addr<0);
3250 offset=imm[i];
3251 for(hr=0;hr<HOST_REGS;hr++) {
3252 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3253 }
3254 reglist|=1<<temp;
3255 if(offset||s<0||c) addr=temp2;
3256 else addr=s;
3257 if(s>=0) {
3258 c=(i_regs->wasconst>>s)&1;
3259 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3260 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3261 }
3262 if(tl>=0) {
3263 //assert(tl>=0);
3264 //assert(rt1[i]);
3265 if(!using_tlb) {
3266 if(!c) {
3267 emit_shlimm(addr,3,temp);
3268 if (opcode[i]==0x22||opcode[i]==0x26) {
3269 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3270 }else{
3271 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3272 }
3273 emit_cmpimm(addr,RAM_SIZE);
3274 jaddr=(int)out;
3275 emit_jno(0);
3276 }
3277 else {
3278 if (opcode[i]==0x22||opcode[i]==0x26) {
3279 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3280 }else{
3281 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3282 }
3283 }
3284 }else{ // using tlb
3285 int a;
3286 if(c) {
3287 a=-1;
3288 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3289 a=0xFFFFFFFC; // LWL/LWR
3290 }else{
3291 a=0xFFFFFFF8; // LDL/LDR
3292 }
3293 map=get_reg(i_regs->regmap,TLREG);
3294 assert(map>=0);
3295 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3296 if(c) {
3297 if (opcode[i]==0x22||opcode[i]==0x26) {
3298 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3299 }else{
3300 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3301 }
3302 }
3303 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3304 }
3305 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3306 if(!c||memtarget) {
3307 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3308 emit_readword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2);
3309 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3310 }
3311 else
3312 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3313 emit_andimm(temp,24,temp);
3314#ifdef BIG_ENDIAN_MIPS
3315 if (opcode[i]==0x26) // LWR
3316#else
3317 if (opcode[i]==0x22) // LWL
3318#endif
3319 emit_xorimm(temp,24,temp);
3320 emit_movimm(-1,HOST_TEMPREG);
3321 if (opcode[i]==0x26) {
3322 emit_shr(temp2,temp,temp2);
3323 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3324 }else{
3325 emit_shl(temp2,temp,temp2);
3326 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3327 }
3328 emit_or(temp2,tl,tl);
3329 //emit_storereg(rt1[i],tl); // DEBUG
3330 }
3331 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3332 // FIXME: little endian
3333 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3334 if(!c||memtarget) {
3335 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3336 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3337 emit_readdword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2h,temp2);
3338 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3339 }
3340 else
3341 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3342 emit_testimm(temp,32);
3343 emit_andimm(temp,24,temp);
3344 if (opcode[i]==0x1A) { // LDL
3345 emit_rsbimm(temp,32,HOST_TEMPREG);
3346 emit_shl(temp2h,temp,temp2h);
3347 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3348 emit_movimm(-1,HOST_TEMPREG);
3349 emit_shl(temp2,temp,temp2);
3350 emit_cmove_reg(temp2h,th);
3351 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3352 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3353 emit_orreq(temp2,tl,tl);
3354 emit_orrne(temp2,th,th);
3355 }
3356 if (opcode[i]==0x1B) { // LDR
3357 emit_xorimm(temp,24,temp);
3358 emit_rsbimm(temp,32,HOST_TEMPREG);
3359 emit_shr(temp2,temp,temp2);
3360 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3361 emit_movimm(-1,HOST_TEMPREG);
3362 emit_shr(temp2h,temp,temp2h);
3363 emit_cmovne_reg(temp2,tl);
3364 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3365 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3366 emit_orrne(temp2h,th,th);
3367 emit_orreq(temp2h,tl,tl);
3368 }
3369 }
3370 }
3371}
3372#define loadlr_assemble loadlr_assemble_arm
3373
3374void cop0_assemble(int i,struct regstat *i_regs)
3375{
3376 if(opcode2[i]==0) // MFC0
3377 {
3378 signed char t=get_reg(i_regs->regmap,rt1[i]);
3379 char copr=(source[i]>>11)&0x1f;
3380 //assert(t>=0); // Why does this happen? OOT is weird
3381 if(t>=0&&rt1[i]!=0) {
3382#ifdef MUPEN64
3383 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3384 emit_movimm((source[i]>>11)&0x1f,1);
3385 emit_writeword(0,(int)&PC);
3386 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3387 if(copr==9) {
3388 emit_readword((int)&last_count,ECX);
3389 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3390 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3391 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3392 emit_writeword(HOST_CCREG,(int)&Count);
3393 }
3394 emit_call((int)MFC0);
3395 emit_readword((int)&readmem_dword,t);
3396#else
3397 emit_readword((int)&reg_cop0+copr*4,t);
3398#endif
3399 }
3400 }
3401 else if(opcode2[i]==4) // MTC0
3402 {
3403 signed char s=get_reg(i_regs->regmap,rs1[i]);
3404 char copr=(source[i]>>11)&0x1f;
3405 assert(s>=0);
3406 emit_writeword(s,(int)&readmem_dword);
3407 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
3408#ifdef MUPEN64 /// FIXME
3409 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3410 emit_movimm((source[i]>>11)&0x1f,1);
3411 emit_writeword(0,(int)&PC);
3412 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3413#endif
3414#ifdef PCSX
3415 emit_movimm(source[i],0);
3416 emit_writeword(0,(int)&psxRegs.code);
3417#endif
3418 if(copr==9||copr==11||copr==12||copr==13) {
3419 emit_readword((int)&last_count,ECX);
3420 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3421 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3422 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3423 emit_writeword(HOST_CCREG,(int)&Count);
3424 }
3425 // What a mess. The status register (12) can enable interrupts,
3426 // so needs a special case to handle a pending interrupt.
3427 // The interrupt must be taken immediately, because a subsequent
3428 // instruction might disable interrupts again.
3429 if(copr==12||copr==13) {
3430 emit_movimm(start+i*4+4,0);
3431 emit_movimm(0,1);
3432 emit_writeword(0,(int)&pcaddr);
3433 emit_writeword(1,(int)&pending_exception);
3434 }
3435 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3436 //else
3437 emit_call((int)MTC0);
3438 if(copr==9||copr==11||copr==12||copr==13) {
3439 emit_readword((int)&Count,HOST_CCREG);
3440 emit_readword((int)&next_interupt,ECX);
3441 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3442 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3443 emit_writeword(ECX,(int)&last_count);
3444 emit_storereg(CCREG,HOST_CCREG);
3445 }
3446 if(copr==12||copr==13) {
3447 assert(!is_delayslot);
3448 emit_readword((int)&pending_exception,14);
3449 }
3450 emit_loadreg(rs1[i],s);
3451 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3452 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
3453 if(copr==12||copr==13) {
3454 emit_test(14,14);
3455 emit_jne((int)&do_interrupt);
3456 }
3457 cop1_usable=0;
3458 }
3459 else
3460 {
3461 assert(opcode2[i]==0x10);
3462#ifndef DISABLE_TLB
3463 if((source[i]&0x3f)==0x01) // TLBR
3464 emit_call((int)TLBR);
3465 if((source[i]&0x3f)==0x02) // TLBWI
3466 emit_call((int)TLBWI_new);
3467 if((source[i]&0x3f)==0x06) { // TLBWR
3468 // The TLB entry written by TLBWR is dependent on the count,
3469 // so update the cycle count
3470 emit_readword((int)&last_count,ECX);
3471 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3472 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3473 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3474 emit_writeword(HOST_CCREG,(int)&Count);
3475 emit_call((int)TLBWR_new);
3476 }
3477 if((source[i]&0x3f)==0x08) // TLBP
3478 emit_call((int)TLBP);
3479#endif
3480#ifdef PCSX
3481 if((source[i]&0x3f)==0x10) // RFE
3482 {
3483 emit_readword((int)&Status,0);
3484 emit_andimm(0,0x3c,1);
3485 emit_andimm(0,~0xf,0);
3486 emit_orrshr_imm(1,2,0);
3487 emit_writeword(0,(int)&Status);
3488 }
3489#else
3490 if((source[i]&0x3f)==0x18) // ERET
3491 {
3492 int count=ccadj[i];
3493 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3494 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
3495 emit_jmp((int)jump_eret);
3496 }
3497#endif
3498 }
3499}
3500
3501static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3502{
3503 switch (copr) {
3504 case 1:
3505 case 3:
3506 case 5:
3507 case 8:
3508 case 9:
3509 case 10:
3510 case 11:
3511 emit_readword((int)&reg_cop2d[copr],tl);
3512 emit_signextend16(tl,tl);
3513 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3514 break;
3515 case 7:
3516 case 16:
3517 case 17:
3518 case 18:
3519 case 19:
3520 emit_readword((int)&reg_cop2d[copr],tl);
3521 emit_andimm(tl,0xffff,tl);
3522 emit_writeword(tl,(int)&reg_cop2d[copr]);
3523 break;
3524 case 15:
3525 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3526 emit_writeword(tl,(int)&reg_cop2d[copr]);
3527 break;
3528 case 28:
3529 case 30:
3530 emit_movimm(0,tl);
3531 break;
3532 case 29:
3533 emit_readword((int)&reg_cop2d[9],temp);
3534 emit_testimm(temp,0x8000); // do we need this?
3535 emit_andimm(temp,0xf80,temp);
3536 emit_andne_imm(temp,0,temp);
3537 emit_shr(temp,7,tl);
3538 emit_readword((int)&reg_cop2d[10],temp);
3539 emit_testimm(temp,0x8000);
3540 emit_andimm(temp,0xf80,temp);
3541 emit_andne_imm(temp,0,temp);
3542 emit_orrshr(temp,2,tl);
3543 emit_readword((int)&reg_cop2d[11],temp);
3544 emit_testimm(temp,0x8000);
3545 emit_andimm(temp,0xf80,temp);
3546 emit_andne_imm(temp,0,temp);
3547 emit_orrshl(temp,3,tl);
3548 emit_writeword(tl,(int)&reg_cop2d[copr]);
3549 break;
3550 default:
3551 emit_readword((int)&reg_cop2d[copr],tl);
3552 break;
3553 }
3554}
3555
3556static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3557{
3558 switch (copr) {
3559 case 15:
3560 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3561 emit_writeword(sl,(int)&reg_cop2d[copr]);
3562 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3563 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3564 emit_writeword(sl,(int)&reg_cop2d[14]);
3565 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3566 break;
3567 case 28:
3568 emit_andimm(sl,0x001f,temp);
3569 emit_shl(temp,7,temp);
3570 emit_writeword(temp,(int)&reg_cop2d[9]);
3571 emit_andimm(sl,0x03e0,temp);
3572 emit_shl(temp,2,temp);
3573 emit_writeword(temp,(int)&reg_cop2d[10]);
3574 emit_andimm(sl,0x7c00,temp);
3575 emit_shr(temp,3,temp);
3576 emit_writeword(temp,(int)&reg_cop2d[11]);
3577 emit_writeword(sl,(int)&reg_cop2d[28]);
3578 break;
3579 case 30:
3580 emit_movs(sl,temp);
3581 emit_mvnmi(temp,temp);
3582 emit_clz(temp,temp);
3583 emit_writeword(sl,(int)&reg_cop2d[30]);
3584 emit_writeword(temp,(int)&reg_cop2d[31]);
3585 break;
3586 case 7:
3587 case 29:
3588 case 31:
3589 break;
3590 default:
3591 emit_writeword(sl,(int)&reg_cop2d[copr]);
3592 break;
3593 }
3594}
3595
3596void cop2_assemble(int i,struct regstat *i_regs)
3597{
3598 u_int copr=(source[i]>>11)&0x1f;
3599 signed char temp=get_reg(i_regs->regmap,-1);
3600 if (opcode2[i]==0) { // MFC2
3601 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3602 if(tl>=0&&rt1[i]!=0)
3603 cop2_get_dreg(copr,tl,temp);
3604 }
3605 else if (opcode2[i]==4) { // MTC2
3606 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3607 cop2_put_dreg(copr,sl,temp);
3608 }
3609 else if (opcode2[i]==2) // CFC2
3610 {
3611 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3612 if(tl>=0&&rt1[i]!=0)
3613 emit_readword((int)&reg_cop2c[copr],tl);
3614 }
3615 else if (opcode2[i]==6) // CTC2
3616 {
3617 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3618 switch(copr) {
3619 case 4:
3620 case 12:
3621 case 20:
3622 case 26:
3623 case 27:
3624 case 29:
3625 case 30:
3626 emit_signextend16(sl,temp);
3627 break;
3628 case 31:
3629 //value = value & 0x7ffff000;
3630 //if (value & 0x7f87e000) value |= 0x80000000;
3631 emit_shrimm(sl,12,temp);
3632 emit_shlimm(temp,12,temp);
3633 emit_testimm(temp,0x7f000000);
3634 emit_testeqimm(temp,0x00870000);
3635 emit_testeqimm(temp,0x0000e000);
3636 emit_orrne_imm(temp,0x80000000,temp);
3637 break;
3638 default:
3639 temp=sl;
3640 break;
3641 }
3642 emit_writeword(temp,(int)&reg_cop2c[copr]);
3643 assert(sl>=0);
3644 }
3645}
3646
3647void c2op_assemble(int i,struct regstat *i_regs)
3648{
3649 signed char temp=get_reg(i_regs->regmap,-1);
3650 u_int c2op=source[i]&0x3f;
3651 u_int hr,reglist=0;
3652 for(hr=0;hr<HOST_REGS;hr++) {
3653 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3654 }
3655 if(i==0||itype[i-1]!=C2OP)
3656 save_regs(reglist);
3657
3658 if (gte_handlers[c2op]!=NULL) {
3659 int cc=get_reg(i_regs->regmap,CCREG);
3660 emit_movimm(source[i],temp); // opcode
3661 if (cc>=0&&gte_cycletab[c2op])
3662 emit_addimm(cc,gte_cycletab[c2op]/2,cc); // XXX: cound just adjust ccadj?
3663 emit_writeword(temp,(int)&psxRegs.code);
3664 emit_call((int)gte_handlers[c2op]);
3665 }
3666
3667 if(i>=slen-1||itype[i+1]!=C2OP)
3668 restore_regs(reglist);
3669}
3670
3671void cop1_unusable(int i,struct regstat *i_regs)
3672{
3673 // XXX: should just just do the exception instead
3674 if(!cop1_usable) {
3675 int jaddr=(int)out;
3676 emit_jmp(0);
3677 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3678 cop1_usable=1;
3679 }
3680}
3681
3682void cop1_assemble(int i,struct regstat *i_regs)
3683{
3684#ifndef DISABLE_COP1
3685 // Check cop1 unusable
3686 if(!cop1_usable) {
3687 signed char rs=get_reg(i_regs->regmap,CSREG);
3688 assert(rs>=0);
3689 emit_testimm(rs,0x20000000);
3690 int jaddr=(int)out;
3691 emit_jeq(0);
3692 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3693 cop1_usable=1;
3694 }
3695 if (opcode2[i]==0) { // MFC1
3696 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3697 if(tl>=0) {
3698 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
3699 emit_readword_indexed(0,tl,tl);
3700 }
3701 }
3702 else if (opcode2[i]==1) { // DMFC1
3703 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3704 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
3705 if(tl>=0) {
3706 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
3707 if(th>=0) emit_readword_indexed(4,tl,th);
3708 emit_readword_indexed(0,tl,tl);
3709 }
3710 }
3711 else if (opcode2[i]==4) { // MTC1
3712 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3713 signed char temp=get_reg(i_regs->regmap,-1);
3714 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3715 emit_writeword_indexed(sl,0,temp);
3716 }
3717 else if (opcode2[i]==5) { // DMTC1
3718 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3719 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
3720 signed char temp=get_reg(i_regs->regmap,-1);
3721 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3722 emit_writeword_indexed(sh,4,temp);
3723 emit_writeword_indexed(sl,0,temp);
3724 }
3725 else if (opcode2[i]==2) // CFC1
3726 {
3727 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3728 if(tl>=0) {
3729 u_int copr=(source[i]>>11)&0x1f;
3730 if(copr==0) emit_readword((int)&FCR0,tl);
3731 if(copr==31) emit_readword((int)&FCR31,tl);
3732 }
3733 }
3734 else if (opcode2[i]==6) // CTC1
3735 {
3736 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3737 u_int copr=(source[i]>>11)&0x1f;
3738 assert(sl>=0);
3739 if(copr==31)
3740 {
3741 emit_writeword(sl,(int)&FCR31);
3742 // Set the rounding mode
3743 //FIXME
3744 //char temp=get_reg(i_regs->regmap,-1);
3745 //emit_andimm(sl,3,temp);
3746 //emit_fldcw_indexed((int)&rounding_modes,temp);
3747 }
3748 }
3749#else
3750 cop1_unusable(i, i_regs);
3751#endif
3752}
3753
3754void fconv_assemble_arm(int i,struct regstat *i_regs)
3755{
3756#ifndef DISABLE_COP1
3757 signed char temp=get_reg(i_regs->regmap,-1);
3758 assert(temp>=0);
3759 // Check cop1 unusable
3760 if(!cop1_usable) {
3761 signed char rs=get_reg(i_regs->regmap,CSREG);
3762 assert(rs>=0);
3763 emit_testimm(rs,0x20000000);
3764 int jaddr=(int)out;
3765 emit_jeq(0);
3766 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3767 cop1_usable=1;
3768 }
3769
3770 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
3771 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
3772 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3773 emit_flds(temp,15);
3774 emit_ftosizs(15,15); // float->int, truncate
3775 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3776 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3777 emit_fsts(15,temp);
3778 return;
3779 }
3780 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
3781 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3782 emit_vldr(temp,7);
3783 emit_ftosizd(7,13); // double->int, truncate
3784 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3785 emit_fsts(13,temp);
3786 return;
3787 }
3788
3789 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
3790 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3791 emit_flds(temp,13);
3792 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3793 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3794 emit_fsitos(13,15);
3795 emit_fsts(15,temp);
3796 return;
3797 }
3798 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
3799 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3800 emit_flds(temp,13);
3801 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3802 emit_fsitod(13,7);
3803 emit_vstr(7,temp);
3804 return;
3805 }
3806
3807 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
3808 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3809 emit_flds(temp,13);
3810 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3811 emit_fcvtds(13,7);
3812 emit_vstr(7,temp);
3813 return;
3814 }
3815 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
3816 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3817 emit_vldr(temp,7);
3818 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3819 emit_fcvtsd(7,13);
3820 emit_fsts(13,temp);
3821 return;
3822 }
3823 #endif
3824
3825 // C emulation code
3826
3827 u_int hr,reglist=0;
3828 for(hr=0;hr<HOST_REGS;hr++) {
3829 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3830 }
3831 save_regs(reglist);
3832
3833 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
3834 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3835 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3836 emit_call((int)cvt_s_w);
3837 }
3838 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
3839 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3840 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3841 emit_call((int)cvt_d_w);
3842 }
3843 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
3844 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3845 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3846 emit_call((int)cvt_s_l);
3847 }
3848 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
3849 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3850 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3851 emit_call((int)cvt_d_l);
3852 }
3853
3854 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
3855 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3856 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3857 emit_call((int)cvt_d_s);
3858 }
3859 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
3860 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3861 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3862 emit_call((int)cvt_w_s);
3863 }
3864 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
3865 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3866 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3867 emit_call((int)cvt_l_s);
3868 }
3869
3870 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
3871 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3872 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3873 emit_call((int)cvt_s_d);
3874 }
3875 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
3876 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3877 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3878 emit_call((int)cvt_w_d);
3879 }
3880 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
3881 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3882 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3883 emit_call((int)cvt_l_d);
3884 }
3885
3886 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
3887 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3888 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3889 emit_call((int)round_l_s);
3890 }
3891 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
3892 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3893 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3894 emit_call((int)trunc_l_s);
3895 }
3896 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
3897 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3898 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3899 emit_call((int)ceil_l_s);
3900 }
3901 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
3902 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3903 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3904 emit_call((int)floor_l_s);
3905 }
3906 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
3907 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3908 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3909 emit_call((int)round_w_s);
3910 }
3911 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
3912 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3913 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3914 emit_call((int)trunc_w_s);
3915 }
3916 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
3917 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3918 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3919 emit_call((int)ceil_w_s);
3920 }
3921 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
3922 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3923 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3924 emit_call((int)floor_w_s);
3925 }
3926
3927 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
3928 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3929 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3930 emit_call((int)round_l_d);
3931 }
3932 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
3933 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3934 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3935 emit_call((int)trunc_l_d);
3936 }
3937 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
3938 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3939 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3940 emit_call((int)ceil_l_d);
3941 }
3942 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
3943 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3944 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3945 emit_call((int)floor_l_d);
3946 }
3947 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
3948 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3949 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3950 emit_call((int)round_w_d);
3951 }
3952 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
3953 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3954 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3955 emit_call((int)trunc_w_d);
3956 }
3957 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
3958 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3959 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3960 emit_call((int)ceil_w_d);
3961 }
3962 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
3963 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3964 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3965 emit_call((int)floor_w_d);
3966 }
3967
3968 restore_regs(reglist);
3969#else
3970 cop1_unusable(i, i_regs);
3971#endif
3972}
3973#define fconv_assemble fconv_assemble_arm
3974
3975void fcomp_assemble(int i,struct regstat *i_regs)
3976{
3977#ifndef DISABLE_COP1
3978 signed char fs=get_reg(i_regs->regmap,FSREG);
3979 signed char temp=get_reg(i_regs->regmap,-1);
3980 assert(temp>=0);
3981 // Check cop1 unusable
3982 if(!cop1_usable) {
3983 signed char cs=get_reg(i_regs->regmap,CSREG);
3984 assert(cs>=0);
3985 emit_testimm(cs,0x20000000);
3986 int jaddr=(int)out;
3987 emit_jeq(0);
3988 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
3989 cop1_usable=1;
3990 }
3991
3992 if((source[i]&0x3f)==0x30) {
3993 emit_andimm(fs,~0x800000,fs);
3994 return;
3995 }
3996
3997 if((source[i]&0x3e)==0x38) {
3998 // sf/ngle - these should throw exceptions for NaNs
3999 emit_andimm(fs,~0x800000,fs);
4000 return;
4001 }
4002
4003 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4004 if(opcode2[i]==0x10) {
4005 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4006 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4007 emit_orimm(fs,0x800000,fs);
4008 emit_flds(temp,14);
4009 emit_flds(HOST_TEMPREG,15);
4010 emit_fcmps(14,15);
4011 emit_fmstat();
4012 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
4013 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
4014 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
4015 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
4016 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
4017 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
4018 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
4019 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
4020 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
4021 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
4022 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
4023 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
4024 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
4025 return;
4026 }
4027 if(opcode2[i]==0x11) {
4028 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4029 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4030 emit_orimm(fs,0x800000,fs);
4031 emit_vldr(temp,6);
4032 emit_vldr(HOST_TEMPREG,7);
4033 emit_fcmpd(6,7);
4034 emit_fmstat();
4035 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
4036 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
4037 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
4038 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
4039 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
4040 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
4041 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
4042 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
4043 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
4044 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
4045 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
4046 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
4047 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
4048 return;
4049 }
4050 #endif
4051
4052 // C only
4053
4054 u_int hr,reglist=0;
4055 for(hr=0;hr<HOST_REGS;hr++) {
4056 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4057 }
4058 reglist&=~(1<<fs);
4059 save_regs(reglist);
4060 if(opcode2[i]==0x10) {
4061 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4062 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4063 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
4064 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
4065 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
4066 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
4067 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
4068 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
4069 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
4070 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
4071 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
4072 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
4073 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
4074 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
4075 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
4076 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
4077 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
4078 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
4079 }
4080 if(opcode2[i]==0x11) {
4081 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4082 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4083 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
4084 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
4085 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
4086 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
4087 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
4088 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
4089 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
4090 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
4091 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
4092 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
4093 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
4094 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
4095 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
4096 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
4097 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
4098 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
4099 }
4100 restore_regs(reglist);
4101 emit_loadreg(FSREG,fs);
4102#else
4103 cop1_unusable(i, i_regs);
4104#endif
4105}
4106
4107void float_assemble(int i,struct regstat *i_regs)
4108{
4109#ifndef DISABLE_COP1
4110 signed char temp=get_reg(i_regs->regmap,-1);
4111 assert(temp>=0);
4112 // Check cop1 unusable
4113 if(!cop1_usable) {
4114 signed char cs=get_reg(i_regs->regmap,CSREG);
4115 assert(cs>=0);
4116 emit_testimm(cs,0x20000000);
4117 int jaddr=(int)out;
4118 emit_jeq(0);
4119 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4120 cop1_usable=1;
4121 }
4122
4123 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4124 if((source[i]&0x3f)==6) // mov
4125 {
4126 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4127 if(opcode2[i]==0x10) {
4128 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4129 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
4130 emit_readword_indexed(0,temp,temp);
4131 emit_writeword_indexed(temp,0,HOST_TEMPREG);
4132 }
4133 if(opcode2[i]==0x11) {
4134 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4135 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
4136 emit_vldr(temp,7);
4137 emit_vstr(7,HOST_TEMPREG);
4138 }
4139 }
4140 return;
4141 }
4142
4143 if((source[i]&0x3f)>3)
4144 {
4145 if(opcode2[i]==0x10) {
4146 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4147 emit_flds(temp,15);
4148 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4149 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4150 }
4151 if((source[i]&0x3f)==4) // sqrt
4152 emit_fsqrts(15,15);
4153 if((source[i]&0x3f)==5) // abs
4154 emit_fabss(15,15);
4155 if((source[i]&0x3f)==7) // neg
4156 emit_fnegs(15,15);
4157 emit_fsts(15,temp);
4158 }
4159 if(opcode2[i]==0x11) {
4160 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4161 emit_vldr(temp,7);
4162 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4163 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4164 }
4165 if((source[i]&0x3f)==4) // sqrt
4166 emit_fsqrtd(7,7);
4167 if((source[i]&0x3f)==5) // abs
4168 emit_fabsd(7,7);
4169 if((source[i]&0x3f)==7) // neg
4170 emit_fnegd(7,7);
4171 emit_vstr(7,temp);
4172 }
4173 return;
4174 }
4175 if((source[i]&0x3f)<4)
4176 {
4177 if(opcode2[i]==0x10) {
4178 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4179 }
4180 if(opcode2[i]==0x11) {
4181 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4182 }
4183 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
4184 if(opcode2[i]==0x10) {
4185 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4186 emit_flds(temp,15);
4187 emit_flds(HOST_TEMPREG,13);
4188 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4189 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4190 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4191 }
4192 }
4193 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
4194 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
4195 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
4196 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
4197 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4198 emit_fsts(15,HOST_TEMPREG);
4199 }else{
4200 emit_fsts(15,temp);
4201 }
4202 }
4203 else if(opcode2[i]==0x11) {
4204 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4205 emit_vldr(temp,7);
4206 emit_vldr(HOST_TEMPREG,6);
4207 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4208 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4209 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4210 }
4211 }
4212 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
4213 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
4214 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
4215 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
4216 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4217 emit_vstr(7,HOST_TEMPREG);
4218 }else{
4219 emit_vstr(7,temp);
4220 }
4221 }
4222 }
4223 else {
4224 if(opcode2[i]==0x10) {
4225 emit_flds(temp,15);
4226 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4227 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4228 }
4229 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
4230 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
4231 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
4232 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
4233 emit_fsts(15,temp);
4234 }
4235 else if(opcode2[i]==0x11) {
4236 emit_vldr(temp,7);
4237 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4238 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4239 }
4240 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
4241 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
4242 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
4243 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
4244 emit_vstr(7,temp);
4245 }
4246 }
4247 return;
4248 }
4249 #endif
4250
4251 u_int hr,reglist=0;
4252 for(hr=0;hr<HOST_REGS;hr++) {
4253 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4254 }
4255 if(opcode2[i]==0x10) { // Single precision
4256 save_regs(reglist);
4257 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4258 if((source[i]&0x3f)<4) {
4259 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4260 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
4261 }else{
4262 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4263 }
4264 switch(source[i]&0x3f)
4265 {
4266 case 0x00: emit_call((int)add_s);break;
4267 case 0x01: emit_call((int)sub_s);break;
4268 case 0x02: emit_call((int)mul_s);break;
4269 case 0x03: emit_call((int)div_s);break;
4270 case 0x04: emit_call((int)sqrt_s);break;
4271 case 0x05: emit_call((int)abs_s);break;
4272 case 0x06: emit_call((int)mov_s);break;
4273 case 0x07: emit_call((int)neg_s);break;
4274 }
4275 restore_regs(reglist);
4276 }
4277 if(opcode2[i]==0x11) { // Double precision
4278 save_regs(reglist);
4279 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4280 if((source[i]&0x3f)<4) {
4281 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4282 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
4283 }else{
4284 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4285 }
4286 switch(source[i]&0x3f)
4287 {
4288 case 0x00: emit_call((int)add_d);break;
4289 case 0x01: emit_call((int)sub_d);break;
4290 case 0x02: emit_call((int)mul_d);break;
4291 case 0x03: emit_call((int)div_d);break;
4292 case 0x04: emit_call((int)sqrt_d);break;
4293 case 0x05: emit_call((int)abs_d);break;
4294 case 0x06: emit_call((int)mov_d);break;
4295 case 0x07: emit_call((int)neg_d);break;
4296 }
4297 restore_regs(reglist);
4298 }
4299#else
4300 cop1_unusable(i, i_regs);
4301#endif
4302}
4303
4304void multdiv_assemble_arm(int i,struct regstat *i_regs)
4305{
4306 // case 0x18: MULT
4307 // case 0x19: MULTU
4308 // case 0x1A: DIV
4309 // case 0x1B: DIVU
4310 // case 0x1C: DMULT
4311 // case 0x1D: DMULTU
4312 // case 0x1E: DDIV
4313 // case 0x1F: DDIVU
4314 if(rs1[i]&&rs2[i])
4315 {
4316 if((opcode2[i]&4)==0) // 32-bit
4317 {
4318 if(opcode2[i]==0x18) // MULT
4319 {
4320 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4321 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4322 signed char hi=get_reg(i_regs->regmap,HIREG);
4323 signed char lo=get_reg(i_regs->regmap,LOREG);
4324 assert(m1>=0);
4325 assert(m2>=0);
4326 assert(hi>=0);
4327 assert(lo>=0);
4328 emit_smull(m1,m2,hi,lo);
4329 }
4330 if(opcode2[i]==0x19) // MULTU
4331 {
4332 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4333 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4334 signed char hi=get_reg(i_regs->regmap,HIREG);
4335 signed char lo=get_reg(i_regs->regmap,LOREG);
4336 assert(m1>=0);
4337 assert(m2>=0);
4338 assert(hi>=0);
4339 assert(lo>=0);
4340 emit_umull(m1,m2,hi,lo);
4341 }
4342 if(opcode2[i]==0x1A) // DIV
4343 {
4344 signed char d1=get_reg(i_regs->regmap,rs1[i]);
4345 signed char d2=get_reg(i_regs->regmap,rs2[i]);
4346 assert(d1>=0);
4347 assert(d2>=0);
4348 signed char quotient=get_reg(i_regs->regmap,LOREG);
4349 signed char remainder=get_reg(i_regs->regmap,HIREG);
4350 assert(quotient>=0);
4351 assert(remainder>=0);
4352 emit_movs(d1,remainder);
4353 emit_negmi(remainder,remainder);
4354 emit_movs(d2,HOST_TEMPREG);
4355 emit_jeq((int)out+52); // Division by zero
4356 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
4357 emit_clz(HOST_TEMPREG,quotient);
4358 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
4359 emit_orimm(quotient,1<<31,quotient);
4360 emit_shr(quotient,quotient,quotient);
4361 emit_cmp(remainder,HOST_TEMPREG);
4362 emit_subcs(remainder,HOST_TEMPREG,remainder);
4363 emit_adcs(quotient,quotient,quotient);
4364 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
4365 emit_jcc((int)out-16); // -4
4366 emit_teq(d1,d2);
4367 emit_negmi(quotient,quotient);
4368 emit_test(d1,d1);
4369 emit_negmi(remainder,remainder);
4370 }
4371 if(opcode2[i]==0x1B) // DIVU
4372 {
4373 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
4374 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
4375 assert(d1>=0);
4376 assert(d2>=0);
4377 signed char quotient=get_reg(i_regs->regmap,LOREG);
4378 signed char remainder=get_reg(i_regs->regmap,HIREG);
4379 assert(quotient>=0);
4380 assert(remainder>=0);
4381 emit_test(d2,d2);
4382 emit_jeq((int)out+44); // Division by zero
4383 emit_clz(d2,HOST_TEMPREG);
4384 emit_movimm(1<<31,quotient);
4385 emit_shl(d2,HOST_TEMPREG,d2);
4386 emit_mov(d1,remainder);
4387 emit_shr(quotient,HOST_TEMPREG,quotient);
4388 emit_cmp(remainder,d2);
4389 emit_subcs(remainder,d2,remainder);
4390 emit_adcs(quotient,quotient,quotient);
4391 emit_shrcc_imm(d2,1,d2);
4392 emit_jcc((int)out-16); // -4
4393 }
4394 }
4395 else // 64-bit
4396 {
4397 if(opcode2[i]==0x1C) // DMULT
4398 {
4399 assert(opcode2[i]!=0x1C);
4400 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4401 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4402 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4403 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4404 assert(m1h>=0);
4405 assert(m2h>=0);
4406 assert(m1l>=0);
4407 assert(m2l>=0);
4408 emit_pushreg(m2h);
4409 emit_pushreg(m2l);
4410 emit_pushreg(m1h);
4411 emit_pushreg(m1l);
4412 emit_call((int)&mult64);
4413 emit_popreg(m1l);
4414 emit_popreg(m1h);
4415 emit_popreg(m2l);
4416 emit_popreg(m2h);
4417 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4418 signed char hil=get_reg(i_regs->regmap,HIREG);
4419 if(hih>=0) emit_loadreg(HIREG|64,hih);
4420 if(hil>=0) emit_loadreg(HIREG,hil);
4421 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4422 signed char lol=get_reg(i_regs->regmap,LOREG);
4423 if(loh>=0) emit_loadreg(LOREG|64,loh);
4424 if(lol>=0) emit_loadreg(LOREG,lol);
4425 }
4426 if(opcode2[i]==0x1D) // DMULTU
4427 {
4428 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4429 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4430 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4431 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4432 assert(m1h>=0);
4433 assert(m2h>=0);
4434 assert(m1l>=0);
4435 assert(m2l>=0);
4436 save_regs(0x100f);
4437 if(m1l!=0) emit_mov(m1l,0);
4438 if(m1h==0) emit_readword((int)&dynarec_local,1);
4439 else if(m1h>1) emit_mov(m1h,1);
4440 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
4441 else if(m2l>2) emit_mov(m2l,2);
4442 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
4443 else if(m2h>3) emit_mov(m2h,3);
4444 emit_call((int)&multu64);
4445 restore_regs(0x100f);
4446 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4447 signed char hil=get_reg(i_regs->regmap,HIREG);
4448 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4449 signed char lol=get_reg(i_regs->regmap,LOREG);
4450 /*signed char temp=get_reg(i_regs->regmap,-1);
4451 signed char rh=get_reg(i_regs->regmap,HIREG|64);
4452 signed char rl=get_reg(i_regs->regmap,HIREG);
4453 assert(m1h>=0);
4454 assert(m2h>=0);
4455 assert(m1l>=0);
4456 assert(m2l>=0);
4457 assert(temp>=0);
4458 //emit_mov(m1l,EAX);
4459 //emit_mul(m2l);
4460 emit_umull(rl,rh,m1l,m2l);
4461 emit_storereg(LOREG,rl);
4462 emit_mov(rh,temp);
4463 //emit_mov(m1h,EAX);
4464 //emit_mul(m2l);
4465 emit_umull(rl,rh,m1h,m2l);
4466 emit_adds(rl,temp,temp);
4467 emit_adcimm(rh,0,rh);
4468 emit_storereg(HIREG,rh);
4469 //emit_mov(m2h,EAX);
4470 //emit_mul(m1l);
4471 emit_umull(rl,rh,m1l,m2h);
4472 emit_adds(rl,temp,temp);
4473 emit_adcimm(rh,0,rh);
4474 emit_storereg(LOREG|64,temp);
4475 emit_mov(rh,temp);
4476 //emit_mov(m2h,EAX);
4477 //emit_mul(m1h);
4478 emit_umull(rl,rh,m1h,m2h);
4479 emit_adds(rl,temp,rl);
4480 emit_loadreg(HIREG,temp);
4481 emit_adcimm(rh,0,rh);
4482 emit_adds(rl,temp,rl);
4483 emit_adcimm(rh,0,rh);
4484 // DEBUG
4485 /*
4486 emit_pushreg(m2h);
4487 emit_pushreg(m2l);
4488 emit_pushreg(m1h);
4489 emit_pushreg(m1l);
4490 emit_call((int)&multu64);
4491 emit_popreg(m1l);
4492 emit_popreg(m1h);
4493 emit_popreg(m2l);
4494 emit_popreg(m2h);
4495 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4496 signed char hil=get_reg(i_regs->regmap,HIREG);
4497 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
4498 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
4499 */
4500 // Shouldn't be necessary
4501 //char loh=get_reg(i_regs->regmap,LOREG|64);
4502 //char lol=get_reg(i_regs->regmap,LOREG);
4503 //if(loh>=0) emit_loadreg(LOREG|64,loh);
4504 //if(lol>=0) emit_loadreg(LOREG,lol);
4505 }
4506 if(opcode2[i]==0x1E) // DDIV
4507 {
4508 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4509 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4510 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4511 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4512 assert(d1h>=0);
4513 assert(d2h>=0);
4514 assert(d1l>=0);
4515 assert(d2l>=0);
4516 save_regs(0x100f);
4517 if(d1l!=0) emit_mov(d1l,0);
4518 if(d1h==0) emit_readword((int)&dynarec_local,1);
4519 else if(d1h>1) emit_mov(d1h,1);
4520 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4521 else if(d2l>2) emit_mov(d2l,2);
4522 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4523 else if(d2h>3) emit_mov(d2h,3);
4524 emit_call((int)&div64);
4525 restore_regs(0x100f);
4526 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4527 signed char hil=get_reg(i_regs->regmap,HIREG);
4528 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4529 signed char lol=get_reg(i_regs->regmap,LOREG);
4530 if(hih>=0) emit_loadreg(HIREG|64,hih);
4531 if(hil>=0) emit_loadreg(HIREG,hil);
4532 if(loh>=0) emit_loadreg(LOREG|64,loh);
4533 if(lol>=0) emit_loadreg(LOREG,lol);
4534 }
4535 if(opcode2[i]==0x1F) // DDIVU
4536 {
4537 //u_int hr,reglist=0;
4538 //for(hr=0;hr<HOST_REGS;hr++) {
4539 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
4540 //}
4541 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4542 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4543 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4544 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4545 assert(d1h>=0);
4546 assert(d2h>=0);
4547 assert(d1l>=0);
4548 assert(d2l>=0);
4549 save_regs(0x100f);
4550 if(d1l!=0) emit_mov(d1l,0);
4551 if(d1h==0) emit_readword((int)&dynarec_local,1);
4552 else if(d1h>1) emit_mov(d1h,1);
4553 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4554 else if(d2l>2) emit_mov(d2l,2);
4555 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4556 else if(d2h>3) emit_mov(d2h,3);
4557 emit_call((int)&divu64);
4558 restore_regs(0x100f);
4559 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4560 signed char hil=get_reg(i_regs->regmap,HIREG);
4561 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4562 signed char lol=get_reg(i_regs->regmap,LOREG);
4563 if(hih>=0) emit_loadreg(HIREG|64,hih);
4564 if(hil>=0) emit_loadreg(HIREG,hil);
4565 if(loh>=0) emit_loadreg(LOREG|64,loh);
4566 if(lol>=0) emit_loadreg(LOREG,lol);
4567 }
4568 }
4569 }
4570 else
4571 {
4572 // Multiply by zero is zero.
4573 // MIPS does not have a divide by zero exception.
4574 // The result is undefined, we return zero.
4575 signed char hr=get_reg(i_regs->regmap,HIREG);
4576 signed char lr=get_reg(i_regs->regmap,LOREG);
4577 if(hr>=0) emit_zeroreg(hr);
4578 if(lr>=0) emit_zeroreg(lr);
4579 }
4580}
4581#define multdiv_assemble multdiv_assemble_arm
4582
4583void do_preload_rhash(int r) {
4584 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
4585 // register. On ARM the hash can be done with a single instruction (below)
4586}
4587
4588void do_preload_rhtbl(int ht) {
4589 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
4590}
4591
4592void do_rhash(int rs,int rh) {
4593 emit_andimm(rs,0xf8,rh);
4594}
4595
4596void do_miniht_load(int ht,int rh) {
4597 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
4598 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
4599}
4600
4601void do_miniht_jump(int rs,int rh,int ht) {
4602 emit_cmp(rh,rs);
4603 emit_ldreq_indexed(ht,4,15);
4604 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4605 emit_mov(rs,7);
4606 emit_jmp(jump_vaddr_reg[7]);
4607 #else
4608 emit_jmp(jump_vaddr_reg[rs]);
4609 #endif
4610}
4611
4612void do_miniht_insert(u_int return_address,int rt,int temp) {
4613 #ifdef ARMv5_ONLY
4614 emit_movimm(return_address,rt); // PC into link register
4615 add_to_linker((int)out,return_address,1);
4616 emit_pcreladdr(temp);
4617 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4618 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4619 #else
4620 emit_movw(return_address&0x0000FFFF,rt);
4621 add_to_linker((int)out,return_address,1);
4622 emit_pcreladdr(temp);
4623 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4624 emit_movt(return_address&0xFFFF0000,rt);
4625 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4626 #endif
4627}
4628
4629// Sign-extend to 64 bits and write out upper half of a register
4630// This is useful where we have a 32-bit value in a register, and want to
4631// keep it in a 32-bit register, but can't guarantee that it won't be read
4632// as a 64-bit value later.
4633void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
4634{
4635#ifndef FORCE32
4636 if(is32_pre==is32) return;
4637 int hr,reg;
4638 for(hr=0;hr<HOST_REGS;hr++) {
4639 if(hr!=EXCLUDE_REG) {
4640 //if(pre[hr]==entry[hr]) {
4641 if((reg=pre[hr])>=0) {
4642 if((dirty>>hr)&1) {
4643 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
4644 emit_sarimm(hr,31,HOST_TEMPREG);
4645 emit_storereg(reg|64,HOST_TEMPREG);
4646 }
4647 }
4648 }
4649 //}
4650 }
4651 }
4652#endif
4653}
4654
4655void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
4656{
4657 //if(dirty_pre==dirty) return;
4658 int hr,reg,new_hr;
4659 for(hr=0;hr<HOST_REGS;hr++) {
4660 if(hr!=EXCLUDE_REG) {
4661 reg=pre[hr];
4662 if(((~u)>>(reg&63))&1) {
4663 if(reg==entry[hr]||(reg>0&&entry[hr]<0)) {
4664 if(((dirty_pre&~dirty)>>hr)&1) {
4665 if(reg>0&&reg<34) {
4666 emit_storereg(reg,hr);
4667 if( ((is32_pre&~uu)>>reg)&1 ) {
4668 emit_sarimm(hr,31,HOST_TEMPREG);
4669 emit_storereg(reg|64,HOST_TEMPREG);
4670 }
4671 }
4672 else if(reg>=64) {
4673 emit_storereg(reg,hr);
4674 }
4675 }
4676 }
4677 else // Check if register moved to a different register
4678 if((new_hr=get_reg(entry,reg))>=0) {
4679 if((dirty_pre>>hr)&(~dirty>>new_hr)&1) {
4680 if(reg>0&&reg<34) {
4681 emit_storereg(reg,hr);
4682 if( ((is32_pre&~uu)>>reg)&1 ) {
4683 emit_sarimm(hr,31,HOST_TEMPREG);
4684 emit_storereg(reg|64,HOST_TEMPREG);
4685 }
4686 }
4687 else if(reg>=64) {
4688 emit_storereg(reg,hr);
4689 }
4690 }
4691 }
4692 }
4693 }
4694 }
4695}
4696
4697
4698/* using strd could possibly help but you'd have to allocate registers in pairs
4699void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
4700{
4701 int hr;
4702 int wrote=-1;
4703 for(hr=HOST_REGS-1;hr>=0;hr--) {
4704 if(hr!=EXCLUDE_REG) {
4705 if(pre[hr]!=entry[hr]) {
4706 if(pre[hr]>=0) {
4707 if((dirty>>hr)&1) {
4708 if(get_reg(entry,pre[hr])<0) {
4709 if(pre[hr]<64) {
4710 if(!((u>>pre[hr])&1)) {
4711 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4712 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4713 emit_sarimm(hr,31,hr+1);
4714 emit_strdreg(pre[hr],hr);
4715 }
4716 else
4717 emit_storereg(pre[hr],hr);
4718 }else{
4719 emit_storereg(pre[hr],hr);
4720 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4721 emit_sarimm(hr,31,hr);
4722 emit_storereg(pre[hr]|64,hr);
4723 }
4724 }
4725 }
4726 }else{
4727 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4728 emit_storereg(pre[hr],hr);
4729 }
4730 }
4731 wrote=hr;
4732 }
4733 }
4734 }
4735 }
4736 }
4737 }
4738 for(hr=0;hr<HOST_REGS;hr++) {
4739 if(hr!=EXCLUDE_REG) {
4740 if(pre[hr]!=entry[hr]) {
4741 if(pre[hr]>=0) {
4742 int nr;
4743 if((nr=get_reg(entry,pre[hr]))>=0) {
4744 emit_mov(hr,nr);
4745 }
4746 }
4747 }
4748 }
4749 }
4750}
4751#define wb_invalidate wb_invalidate_arm
4752*/
4753
4754// CPU-architecture-specific initialization
4755void arch_init() {
4756#ifndef DISABLE_COP1
4757 rounding_modes[0]=0x0<<22; // round
4758 rounding_modes[1]=0x3<<22; // trunc
4759 rounding_modes[2]=0x1<<22; // ceil
4760 rounding_modes[3]=0x2<<22; // floor
4761#endif
4762}
4763
4764// vim:shiftwidth=2:expandtab