drc: fix: storelr should also use AGR
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_arm.c *
3 * Copyright (C) 2009-2010 Ari64 *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21extern int cycle_count;
22extern int last_count;
23extern int pcaddr;
24extern int pending_exception;
25extern int branch_target;
26extern uint64_t readmem_dword;
3d624f89 27#ifdef MUPEN64
57871462 28extern precomp_instr fake_pc;
3d624f89 29#endif
57871462 30extern void *dynarec_local;
31extern u_int memory_map[1048576];
32extern u_int mini_ht[32][2];
33extern u_int rounding_modes[4];
34
35void indirect_jump_indexed();
36void indirect_jump();
37void do_interrupt();
38void jump_vaddr_r0();
39void jump_vaddr_r1();
40void jump_vaddr_r2();
41void jump_vaddr_r3();
42void jump_vaddr_r4();
43void jump_vaddr_r5();
44void jump_vaddr_r6();
45void jump_vaddr_r7();
46void jump_vaddr_r8();
47void jump_vaddr_r9();
48void jump_vaddr_r10();
49void jump_vaddr_r12();
50
51const u_int jump_vaddr_reg[16] = {
52 (int)jump_vaddr_r0,
53 (int)jump_vaddr_r1,
54 (int)jump_vaddr_r2,
55 (int)jump_vaddr_r3,
56 (int)jump_vaddr_r4,
57 (int)jump_vaddr_r5,
58 (int)jump_vaddr_r6,
59 (int)jump_vaddr_r7,
60 (int)jump_vaddr_r8,
61 (int)jump_vaddr_r9,
62 (int)jump_vaddr_r10,
63 0,
64 (int)jump_vaddr_r12,
65 0,
66 0,
67 0};
68
69#include "fpu.h"
70
71/* Linker */
72
73void set_jump_target(int addr,u_int target)
74{
75 u_char *ptr=(u_char *)addr;
76 u_int *ptr2=(u_int *)ptr;
77 if(ptr[3]==0xe2) {
78 assert((target-(u_int)ptr2-8)<1024);
79 assert((addr&3)==0);
80 assert((target&3)==0);
81 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
82 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
83 }
84 else if(ptr[3]==0x72) {
85 // generated by emit_jno_unlikely
86 if((target-(u_int)ptr2-8)<1024) {
87 assert((addr&3)==0);
88 assert((target&3)==0);
89 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
90 }
91 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
92 assert((addr&3)==0);
93 assert((target&3)==0);
94 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
95 }
96 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
97 }
98 else {
99 assert((ptr[3]&0x0e)==0xa);
100 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
101 }
102}
103
104// This optionally copies the instruction from the target of the branch into
105// the space before the branch. Works, but the difference in speed is
106// usually insignificant.
107void set_jump_target_fillslot(int addr,u_int target,int copy)
108{
109 u_char *ptr=(u_char *)addr;
110 u_int *ptr2=(u_int *)ptr;
111 assert(!copy||ptr2[-1]==0xe28dd000);
112 if(ptr[3]==0xe2) {
113 assert(!copy);
114 assert((target-(u_int)ptr2-8)<4096);
115 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
116 }
117 else {
118 assert((ptr[3]&0x0e)==0xa);
119 u_int target_insn=*(u_int *)target;
120 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
121 copy=0;
122 }
123 if((target_insn&0x0c100000)==0x04100000) { // Load
124 copy=0;
125 }
126 if(target_insn&0x08000000) {
127 copy=0;
128 }
129 if(copy) {
130 ptr2[-1]=target_insn;
131 target+=4;
132 }
133 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
134 }
135}
136
137/* Literal pool */
138add_literal(int addr,int val)
139{
140 literals[literalcount][0]=addr;
141 literals[literalcount][1]=val;
142 literalcount++;
143}
144
f76eeef9 145void *kill_pointer(void *stub)
57871462 146{
147 int *ptr=(int *)(stub+4);
148 assert((*ptr&0x0ff00000)==0x05900000);
149 u_int offset=*ptr&0xfff;
150 int **l_ptr=(void *)ptr+offset+8;
151 int *i_ptr=*l_ptr;
152 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 153 return i_ptr;
57871462 154}
155
156int get_pointer(void *stub)
157{
158 //printf("get_pointer(%x)\n",(int)stub);
159 int *ptr=(int *)(stub+4);
160 assert((*ptr&0x0ff00000)==0x05900000);
161 u_int offset=*ptr&0xfff;
162 int **l_ptr=(void *)ptr+offset+8;
163 int *i_ptr=*l_ptr;
164 assert((*i_ptr&0x0f000000)==0x0a000000);
165 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
166}
167
168// Find the "clean" entry point from a "dirty" entry point
169// by skipping past the call to verify_code
170u_int get_clean_addr(int addr)
171{
172 int *ptr=(int *)addr;
173 #ifdef ARMv5_ONLY
174 ptr+=4;
175 #else
176 ptr+=6;
177 #endif
178 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
179 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
180 ptr++;
181 if((*ptr&0xFF000000)==0xea000000) {
182 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
183 }
184 return (u_int)ptr;
185}
186
187int verify_dirty(int addr)
188{
189 u_int *ptr=(u_int *)addr;
190 #ifdef ARMv5_ONLY
191 // get from literal pool
192 assert((*ptr&0xFFF00000)==0xe5900000);
193 u_int offset=*ptr&0xfff;
194 u_int *l_ptr=(void *)ptr+offset+8;
195 u_int source=l_ptr[0];
196 u_int copy=l_ptr[1];
197 u_int len=l_ptr[2];
198 ptr+=4;
199 #else
200 // ARMv7 movw/movt
201 assert((*ptr&0xFFF00000)==0xe3000000);
202 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
203 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
204 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
205 ptr+=6;
206 #endif
207 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
208 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 209 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 210 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
211 unsigned int page=source>>12;
212 unsigned int map_value=memory_map[page];
213 if(map_value>=0x80000000) return 0;
214 while(page<((source+len-1)>>12)) {
215 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
216 }
217 source = source+(map_value<<2);
218 }
219 //printf("verify_dirty: %x %x %x\n",source,copy,len);
220 return !memcmp((void *)source,(void *)copy,len);
221}
222
223// This doesn't necessarily find all clean entry points, just
224// guarantees that it's not dirty
225int isclean(int addr)
226{
227 #ifdef ARMv5_ONLY
228 int *ptr=((u_int *)addr)+4;
229 #else
230 int *ptr=((u_int *)addr)+6;
231 #endif
232 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
233 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
234 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
235 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
236 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
237 return 1;
238}
239
240void get_bounds(int addr,u_int *start,u_int *end)
241{
242 u_int *ptr=(u_int *)addr;
243 #ifdef ARMv5_ONLY
244 // get from literal pool
245 assert((*ptr&0xFFF00000)==0xe5900000);
246 u_int offset=*ptr&0xfff;
247 u_int *l_ptr=(void *)ptr+offset+8;
248 u_int source=l_ptr[0];
249 //u_int copy=l_ptr[1];
250 u_int len=l_ptr[2];
251 ptr+=4;
252 #else
253 // ARMv7 movw/movt
254 assert((*ptr&0xFFF00000)==0xe3000000);
255 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
256 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
257 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
258 ptr+=6;
259 #endif
260 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
261 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 262 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 263 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
264 if(memory_map[source>>12]>=0x80000000) source = 0;
265 else source = source+(memory_map[source>>12]<<2);
266 }
267 *start=source;
268 *end=source+len;
269}
270
271/* Register allocation */
272
273// Note: registers are allocated clean (unmodified state)
274// if you intend to modify the register, you must call dirty_reg().
275void alloc_reg(struct regstat *cur,int i,signed char reg)
276{
277 int r,hr;
278 int preferred_reg = (reg&7);
279 if(reg==CCREG) preferred_reg=HOST_CCREG;
280 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
281
282 // Don't allocate unused registers
283 if((cur->u>>reg)&1) return;
284
285 // see if it's already allocated
286 for(hr=0;hr<HOST_REGS;hr++)
287 {
288 if(cur->regmap[hr]==reg) return;
289 }
290
291 // Keep the same mapping if the register was already allocated in a loop
292 preferred_reg = loop_reg(i,reg,preferred_reg);
293
294 // Try to allocate the preferred register
295 if(cur->regmap[preferred_reg]==-1) {
296 cur->regmap[preferred_reg]=reg;
297 cur->dirty&=~(1<<preferred_reg);
298 cur->isconst&=~(1<<preferred_reg);
299 return;
300 }
301 r=cur->regmap[preferred_reg];
302 if(r<64&&((cur->u>>r)&1)) {
303 cur->regmap[preferred_reg]=reg;
304 cur->dirty&=~(1<<preferred_reg);
305 cur->isconst&=~(1<<preferred_reg);
306 return;
307 }
308 if(r>=64&&((cur->uu>>(r&63))&1)) {
309 cur->regmap[preferred_reg]=reg;
310 cur->dirty&=~(1<<preferred_reg);
311 cur->isconst&=~(1<<preferred_reg);
312 return;
313 }
314
315 // Clear any unneeded registers
316 // We try to keep the mapping consistent, if possible, because it
317 // makes branches easier (especially loops). So we try to allocate
318 // first (see above) before removing old mappings. If this is not
319 // possible then go ahead and clear out the registers that are no
320 // longer needed.
321 for(hr=0;hr<HOST_REGS;hr++)
322 {
323 r=cur->regmap[hr];
324 if(r>=0) {
325 if(r<64) {
326 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
327 }
328 else
329 {
330 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
331 }
332 }
333 }
334 // Try to allocate any available register, but prefer
335 // registers that have not been used recently.
336 if(i>0) {
337 for(hr=0;hr<HOST_REGS;hr++) {
338 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
339 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
340 cur->regmap[hr]=reg;
341 cur->dirty&=~(1<<hr);
342 cur->isconst&=~(1<<hr);
343 return;
344 }
345 }
346 }
347 }
348 // Try to allocate any available register
349 for(hr=0;hr<HOST_REGS;hr++) {
350 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
351 cur->regmap[hr]=reg;
352 cur->dirty&=~(1<<hr);
353 cur->isconst&=~(1<<hr);
354 return;
355 }
356 }
357
358 // Ok, now we have to evict someone
359 // Pick a register we hopefully won't need soon
360 u_char hsn[MAXREG+1];
361 memset(hsn,10,sizeof(hsn));
362 int j;
363 lsn(hsn,i,&preferred_reg);
364 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
365 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
366 if(i>0) {
367 // Don't evict the cycle count at entry points, otherwise the entry
368 // stub will have to write it.
369 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
370 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
371 for(j=10;j>=3;j--)
372 {
373 // Alloc preferred register if available
374 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
375 for(hr=0;hr<HOST_REGS;hr++) {
376 // Evict both parts of a 64-bit register
377 if((cur->regmap[hr]&63)==r) {
378 cur->regmap[hr]=-1;
379 cur->dirty&=~(1<<hr);
380 cur->isconst&=~(1<<hr);
381 }
382 }
383 cur->regmap[preferred_reg]=reg;
384 return;
385 }
386 for(r=1;r<=MAXREG;r++)
387 {
388 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
389 for(hr=0;hr<HOST_REGS;hr++) {
390 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
391 if(cur->regmap[hr]==r+64) {
392 cur->regmap[hr]=reg;
393 cur->dirty&=~(1<<hr);
394 cur->isconst&=~(1<<hr);
395 return;
396 }
397 }
398 }
399 for(hr=0;hr<HOST_REGS;hr++) {
400 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
401 if(cur->regmap[hr]==r) {
402 cur->regmap[hr]=reg;
403 cur->dirty&=~(1<<hr);
404 cur->isconst&=~(1<<hr);
405 return;
406 }
407 }
408 }
409 }
410 }
411 }
412 }
413 for(j=10;j>=0;j--)
414 {
415 for(r=1;r<=MAXREG;r++)
416 {
417 if(hsn[r]==j) {
418 for(hr=0;hr<HOST_REGS;hr++) {
419 if(cur->regmap[hr]==r+64) {
420 cur->regmap[hr]=reg;
421 cur->dirty&=~(1<<hr);
422 cur->isconst&=~(1<<hr);
423 return;
424 }
425 }
426 for(hr=0;hr<HOST_REGS;hr++) {
427 if(cur->regmap[hr]==r) {
428 cur->regmap[hr]=reg;
429 cur->dirty&=~(1<<hr);
430 cur->isconst&=~(1<<hr);
431 return;
432 }
433 }
434 }
435 }
436 }
437 printf("This shouldn't happen (alloc_reg)");exit(1);
438}
439
440void alloc_reg64(struct regstat *cur,int i,signed char reg)
441{
442 int preferred_reg = 8+(reg&1);
443 int r,hr;
444
445 // allocate the lower 32 bits
446 alloc_reg(cur,i,reg);
447
448 // Don't allocate unused registers
449 if((cur->uu>>reg)&1) return;
450
451 // see if the upper half is already allocated
452 for(hr=0;hr<HOST_REGS;hr++)
453 {
454 if(cur->regmap[hr]==reg+64) return;
455 }
456
457 // Keep the same mapping if the register was already allocated in a loop
458 preferred_reg = loop_reg(i,reg,preferred_reg);
459
460 // Try to allocate the preferred register
461 if(cur->regmap[preferred_reg]==-1) {
462 cur->regmap[preferred_reg]=reg|64;
463 cur->dirty&=~(1<<preferred_reg);
464 cur->isconst&=~(1<<preferred_reg);
465 return;
466 }
467 r=cur->regmap[preferred_reg];
468 if(r<64&&((cur->u>>r)&1)) {
469 cur->regmap[preferred_reg]=reg|64;
470 cur->dirty&=~(1<<preferred_reg);
471 cur->isconst&=~(1<<preferred_reg);
472 return;
473 }
474 if(r>=64&&((cur->uu>>(r&63))&1)) {
475 cur->regmap[preferred_reg]=reg|64;
476 cur->dirty&=~(1<<preferred_reg);
477 cur->isconst&=~(1<<preferred_reg);
478 return;
479 }
480
481 // Clear any unneeded registers
482 // We try to keep the mapping consistent, if possible, because it
483 // makes branches easier (especially loops). So we try to allocate
484 // first (see above) before removing old mappings. If this is not
485 // possible then go ahead and clear out the registers that are no
486 // longer needed.
487 for(hr=HOST_REGS-1;hr>=0;hr--)
488 {
489 r=cur->regmap[hr];
490 if(r>=0) {
491 if(r<64) {
492 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
493 }
494 else
495 {
496 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
497 }
498 }
499 }
500 // Try to allocate any available register, but prefer
501 // registers that have not been used recently.
502 if(i>0) {
503 for(hr=0;hr<HOST_REGS;hr++) {
504 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
505 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
506 cur->regmap[hr]=reg|64;
507 cur->dirty&=~(1<<hr);
508 cur->isconst&=~(1<<hr);
509 return;
510 }
511 }
512 }
513 }
514 // Try to allocate any available register
515 for(hr=0;hr<HOST_REGS;hr++) {
516 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
517 cur->regmap[hr]=reg|64;
518 cur->dirty&=~(1<<hr);
519 cur->isconst&=~(1<<hr);
520 return;
521 }
522 }
523
524 // Ok, now we have to evict someone
525 // Pick a register we hopefully won't need soon
526 u_char hsn[MAXREG+1];
527 memset(hsn,10,sizeof(hsn));
528 int j;
529 lsn(hsn,i,&preferred_reg);
530 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
531 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
532 if(i>0) {
533 // Don't evict the cycle count at entry points, otherwise the entry
534 // stub will have to write it.
535 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
536 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
537 for(j=10;j>=3;j--)
538 {
539 // Alloc preferred register if available
540 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
541 for(hr=0;hr<HOST_REGS;hr++) {
542 // Evict both parts of a 64-bit register
543 if((cur->regmap[hr]&63)==r) {
544 cur->regmap[hr]=-1;
545 cur->dirty&=~(1<<hr);
546 cur->isconst&=~(1<<hr);
547 }
548 }
549 cur->regmap[preferred_reg]=reg|64;
550 return;
551 }
552 for(r=1;r<=MAXREG;r++)
553 {
554 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
555 for(hr=0;hr<HOST_REGS;hr++) {
556 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
557 if(cur->regmap[hr]==r+64) {
558 cur->regmap[hr]=reg|64;
559 cur->dirty&=~(1<<hr);
560 cur->isconst&=~(1<<hr);
561 return;
562 }
563 }
564 }
565 for(hr=0;hr<HOST_REGS;hr++) {
566 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
567 if(cur->regmap[hr]==r) {
568 cur->regmap[hr]=reg|64;
569 cur->dirty&=~(1<<hr);
570 cur->isconst&=~(1<<hr);
571 return;
572 }
573 }
574 }
575 }
576 }
577 }
578 }
579 for(j=10;j>=0;j--)
580 {
581 for(r=1;r<=MAXREG;r++)
582 {
583 if(hsn[r]==j) {
584 for(hr=0;hr<HOST_REGS;hr++) {
585 if(cur->regmap[hr]==r+64) {
586 cur->regmap[hr]=reg|64;
587 cur->dirty&=~(1<<hr);
588 cur->isconst&=~(1<<hr);
589 return;
590 }
591 }
592 for(hr=0;hr<HOST_REGS;hr++) {
593 if(cur->regmap[hr]==r) {
594 cur->regmap[hr]=reg|64;
595 cur->dirty&=~(1<<hr);
596 cur->isconst&=~(1<<hr);
597 return;
598 }
599 }
600 }
601 }
602 }
603 printf("This shouldn't happen");exit(1);
604}
605
606// Allocate a temporary register. This is done without regard to
607// dirty status or whether the register we request is on the unneeded list
608// Note: This will only allocate one register, even if called multiple times
609void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
610{
611 int r,hr;
612 int preferred_reg = -1;
613
614 // see if it's already allocated
615 for(hr=0;hr<HOST_REGS;hr++)
616 {
617 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
618 }
619
620 // Try to allocate any available register
621 for(hr=HOST_REGS-1;hr>=0;hr--) {
622 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
623 cur->regmap[hr]=reg;
624 cur->dirty&=~(1<<hr);
625 cur->isconst&=~(1<<hr);
626 return;
627 }
628 }
629
630 // Find an unneeded register
631 for(hr=HOST_REGS-1;hr>=0;hr--)
632 {
633 r=cur->regmap[hr];
634 if(r>=0) {
635 if(r<64) {
636 if((cur->u>>r)&1) {
637 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
638 cur->regmap[hr]=reg;
639 cur->dirty&=~(1<<hr);
640 cur->isconst&=~(1<<hr);
641 return;
642 }
643 }
644 }
645 else
646 {
647 if((cur->uu>>(r&63))&1) {
648 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
649 cur->regmap[hr]=reg;
650 cur->dirty&=~(1<<hr);
651 cur->isconst&=~(1<<hr);
652 return;
653 }
654 }
655 }
656 }
657 }
658
659 // Ok, now we have to evict someone
660 // Pick a register we hopefully won't need soon
661 // TODO: we might want to follow unconditional jumps here
662 // TODO: get rid of dupe code and make this into a function
663 u_char hsn[MAXREG+1];
664 memset(hsn,10,sizeof(hsn));
665 int j;
666 lsn(hsn,i,&preferred_reg);
667 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
668 if(i>0) {
669 // Don't evict the cycle count at entry points, otherwise the entry
670 // stub will have to write it.
671 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
672 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
673 for(j=10;j>=3;j--)
674 {
675 for(r=1;r<=MAXREG;r++)
676 {
677 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
678 for(hr=0;hr<HOST_REGS;hr++) {
679 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
680 if(cur->regmap[hr]==r+64) {
681 cur->regmap[hr]=reg;
682 cur->dirty&=~(1<<hr);
683 cur->isconst&=~(1<<hr);
684 return;
685 }
686 }
687 }
688 for(hr=0;hr<HOST_REGS;hr++) {
689 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
690 if(cur->regmap[hr]==r) {
691 cur->regmap[hr]=reg;
692 cur->dirty&=~(1<<hr);
693 cur->isconst&=~(1<<hr);
694 return;
695 }
696 }
697 }
698 }
699 }
700 }
701 }
702 for(j=10;j>=0;j--)
703 {
704 for(r=1;r<=MAXREG;r++)
705 {
706 if(hsn[r]==j) {
707 for(hr=0;hr<HOST_REGS;hr++) {
708 if(cur->regmap[hr]==r+64) {
709 cur->regmap[hr]=reg;
710 cur->dirty&=~(1<<hr);
711 cur->isconst&=~(1<<hr);
712 return;
713 }
714 }
715 for(hr=0;hr<HOST_REGS;hr++) {
716 if(cur->regmap[hr]==r) {
717 cur->regmap[hr]=reg;
718 cur->dirty&=~(1<<hr);
719 cur->isconst&=~(1<<hr);
720 return;
721 }
722 }
723 }
724 }
725 }
726 printf("This shouldn't happen");exit(1);
727}
728// Allocate a specific ARM register.
729void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
730{
731 int n;
732
733 // see if it's already allocated (and dealloc it)
734 for(n=0;n<HOST_REGS;n++)
735 {
736 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {cur->regmap[n]=-1;}
737 }
738
739 cur->regmap[hr]=reg;
740 cur->dirty&=~(1<<hr);
741 cur->isconst&=~(1<<hr);
742}
743
744// Alloc cycle count into dedicated register
745alloc_cc(struct regstat *cur,int i)
746{
747 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
748}
749
750/* Special alloc */
751
752
753/* Assembler */
754
755char regname[16][4] = {
756 "r0",
757 "r1",
758 "r2",
759 "r3",
760 "r4",
761 "r5",
762 "r6",
763 "r7",
764 "r8",
765 "r9",
766 "r10",
767 "fp",
768 "r12",
769 "sp",
770 "lr",
771 "pc"};
772
773void output_byte(u_char byte)
774{
775 *(out++)=byte;
776}
777void output_modrm(u_char mod,u_char rm,u_char ext)
778{
779 assert(mod<4);
780 assert(rm<8);
781 assert(ext<8);
782 u_char byte=(mod<<6)|(ext<<3)|rm;
783 *(out++)=byte;
784}
785void output_sib(u_char scale,u_char index,u_char base)
786{
787 assert(scale<4);
788 assert(index<8);
789 assert(base<8);
790 u_char byte=(scale<<6)|(index<<3)|base;
791 *(out++)=byte;
792}
793void output_w32(u_int word)
794{
795 *((u_int *)out)=word;
796 out+=4;
797}
798u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
799{
800 assert(rd<16);
801 assert(rn<16);
802 assert(rm<16);
803 return((rn<<16)|(rd<<12)|rm);
804}
805u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
806{
807 assert(rd<16);
808 assert(rn<16);
809 assert(imm<256);
810 assert((shift&1)==0);
811 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
812}
813u_int genimm(u_int imm,u_int *encoded)
814{
815 if(imm==0) {*encoded=0;return 1;}
816 int i=32;
817 while(i>0)
818 {
819 if(imm<256) {
820 *encoded=((i&30)<<7)|imm;
821 return 1;
822 }
823 imm=(imm>>2)|(imm<<30);i-=2;
824 }
825 return 0;
826}
827u_int genjmp(u_int addr)
828{
829 int offset=addr-(int)out-8;
e80343e2 830 if(offset<-33554432||offset>=33554432) {
831 if (addr>2) {
832 printf("genjmp: out of range: %08x\n", offset);
833 exit(1);
834 }
835 return 0;
836 }
57871462 837 return ((u_int)offset>>2)&0xffffff;
838}
839
840void emit_mov(int rs,int rt)
841{
842 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
843 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
844}
845
846void emit_movs(int rs,int rt)
847{
848 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
849 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
850}
851
852void emit_add(int rs1,int rs2,int rt)
853{
854 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
855 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
856}
857
858void emit_adds(int rs1,int rs2,int rt)
859{
860 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
861 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
862}
863
864void emit_adcs(int rs1,int rs2,int rt)
865{
866 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
867 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
868}
869
870void emit_sbc(int rs1,int rs2,int rt)
871{
872 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
873 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
874}
875
876void emit_sbcs(int rs1,int rs2,int rt)
877{
878 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
879 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
880}
881
882void emit_neg(int rs, int rt)
883{
884 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
885 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
886}
887
888void emit_negs(int rs, int rt)
889{
890 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
891 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
892}
893
894void emit_sub(int rs1,int rs2,int rt)
895{
896 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
897 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
898}
899
900void emit_subs(int rs1,int rs2,int rt)
901{
902 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
903 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
904}
905
906void emit_zeroreg(int rt)
907{
908 assem_debug("mov %s,#0\n",regname[rt]);
909 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
910}
911
912void emit_loadreg(int r, int hr)
913{
3d624f89 914#ifdef FORCE32
915 if(r&64) {
916 printf("64bit load in 32bit mode!\n");
917 exit(1);
918 }
919#endif
57871462 920 if((r&63)==0)
921 emit_zeroreg(hr);
922 else {
3d624f89 923 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 924 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
925 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
926 if(r==CCREG) addr=(int)&cycle_count;
927 if(r==CSREG) addr=(int)&Status;
928 if(r==FSREG) addr=(int)&FCR31;
929 if(r==INVCP) addr=(int)&invc_ptr;
930 u_int offset = addr-(u_int)&dynarec_local;
931 assert(offset<4096);
932 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
933 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
934 }
935}
936void emit_storereg(int r, int hr)
937{
3d624f89 938#ifdef FORCE32
939 if(r&64) {
940 printf("64bit store in 32bit mode!\n");
941 exit(1);
942 }
943#endif
944 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 945 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
946 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
947 if(r==CCREG) addr=(int)&cycle_count;
948 if(r==FSREG) addr=(int)&FCR31;
949 u_int offset = addr-(u_int)&dynarec_local;
950 assert(offset<4096);
951 assem_debug("str %s,fp+%d\n",regname[hr],offset);
952 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
953}
954
955void emit_test(int rs, int rt)
956{
957 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
958 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
959}
960
961void emit_testimm(int rs,int imm)
962{
963 u_int armval;
964 assem_debug("tst %s,$%d\n",regname[rs],imm);
965 assert(genimm(imm,&armval));
966 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
967}
968
b9b61529 969void emit_testeqimm(int rs,int imm)
970{
971 u_int armval;
972 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
973 assert(genimm(imm,&armval));
974 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
975}
976
57871462 977void emit_not(int rs,int rt)
978{
979 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
980 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
981}
982
b9b61529 983void emit_mvnmi(int rs,int rt)
984{
985 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
986 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
987}
988
57871462 989void emit_and(u_int rs1,u_int rs2,u_int rt)
990{
991 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
992 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
993}
994
995void emit_or(u_int rs1,u_int rs2,u_int rt)
996{
997 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
998 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
999}
1000void emit_or_and_set_flags(int rs1,int rs2,int rt)
1001{
1002 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1003 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1004}
1005
1006void emit_xor(u_int rs1,u_int rs2,u_int rt)
1007{
1008 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1009 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1010}
1011
1012void emit_loadlp(u_int imm,u_int rt)
1013{
1014 add_literal((int)out,imm);
1015 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
1016 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
1017}
1018void emit_movw(u_int imm,u_int rt)
1019{
1020 assert(imm<65536);
1021 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
1022 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
1023}
1024void emit_movt(u_int imm,u_int rt)
1025{
1026 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
1027 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
1028}
1029void emit_movimm(u_int imm,u_int rt)
1030{
1031 u_int armval;
1032 if(genimm(imm,&armval)) {
1033 assem_debug("mov %s,#%d\n",regname[rt],imm);
1034 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1035 }else if(genimm(~imm,&armval)) {
1036 assem_debug("mvn %s,#%d\n",regname[rt],imm);
1037 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1038 }else if(imm<65536) {
1039 #ifdef ARMv5_ONLY
1040 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1041 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1042 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1043 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1044 #else
1045 emit_movw(imm,rt);
1046 #endif
1047 }else{
1048 #ifdef ARMv5_ONLY
1049 emit_loadlp(imm,rt);
1050 #else
1051 emit_movw(imm&0x0000FFFF,rt);
1052 emit_movt(imm&0xFFFF0000,rt);
1053 #endif
1054 }
1055}
1056void emit_pcreladdr(u_int rt)
1057{
1058 assem_debug("add %s,pc,#?\n",regname[rt]);
1059 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1060}
1061
1062void emit_addimm(u_int rs,int imm,u_int rt)
1063{
1064 assert(rs<16);
1065 assert(rt<16);
1066 if(imm!=0) {
1067 assert(imm>-65536&&imm<65536);
1068 u_int armval;
1069 if(genimm(imm,&armval)) {
1070 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1071 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1072 }else if(genimm(-imm,&armval)) {
1073 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1074 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1075 }else if(imm<0) {
1076 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1077 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1078 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1079 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1080 }else{
1081 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1082 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1083 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1084 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1085 }
1086 }
1087 else if(rs!=rt) emit_mov(rs,rt);
1088}
1089
1090void emit_addimm_and_set_flags(int imm,int rt)
1091{
1092 assert(imm>-65536&&imm<65536);
1093 u_int armval;
1094 if(genimm(imm,&armval)) {
1095 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1096 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1097 }else if(genimm(-imm,&armval)) {
1098 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1099 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1100 }else if(imm<0) {
1101 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1102 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1103 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1104 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1105 }else{
1106 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1107 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1108 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1109 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1110 }
1111}
1112void emit_addimm_no_flags(u_int imm,u_int rt)
1113{
1114 emit_addimm(rt,imm,rt);
1115}
1116
1117void emit_addnop(u_int r)
1118{
1119 assert(r<16);
1120 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1121 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1122}
1123
1124void emit_adcimm(u_int rs,int imm,u_int rt)
1125{
1126 u_int armval;
1127 assert(genimm(imm,&armval));
1128 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1129 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1130}
1131/*void emit_sbcimm(int imm,u_int rt)
1132{
1133 u_int armval;
1134 assert(genimm(imm,&armval));
1135 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1136 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1137}*/
1138void emit_sbbimm(int imm,u_int rt)
1139{
1140 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1141 assert(rt<8);
1142 if(imm<128&&imm>=-128) {
1143 output_byte(0x83);
1144 output_modrm(3,rt,3);
1145 output_byte(imm);
1146 }
1147 else
1148 {
1149 output_byte(0x81);
1150 output_modrm(3,rt,3);
1151 output_w32(imm);
1152 }
1153}
1154void emit_rscimm(int rs,int imm,u_int rt)
1155{
1156 assert(0);
1157 u_int armval;
1158 assert(genimm(imm,&armval));
1159 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1160 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1161}
1162
1163void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1164{
1165 // TODO: if(genimm(imm,&armval)) ...
1166 // else
1167 emit_movimm(imm,HOST_TEMPREG);
1168 emit_adds(HOST_TEMPREG,rsl,rtl);
1169 emit_adcimm(rsh,0,rth);
1170}
1171
1172void emit_sbb(int rs1,int rs2)
1173{
1174 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1175 output_byte(0x19);
1176 output_modrm(3,rs1,rs2);
1177}
1178
1179void emit_andimm(int rs,int imm,int rt)
1180{
1181 u_int armval;
1182 if(genimm(imm,&armval)) {
1183 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1184 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1185 }else if(genimm(~imm,&armval)) {
1186 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1187 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1188 }else if(imm==65535) {
1189 #ifdef ARMv5_ONLY
1190 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1191 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1192 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1193 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1194 #else
1195 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1196 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1197 #endif
1198 }else{
1199 assert(imm>0&&imm<65535);
1200 #ifdef ARMv5_ONLY
1201 assem_debug("mov r14,#%d\n",imm&0xFF00);
1202 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1203 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1204 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1205 #else
1206 emit_movw(imm,HOST_TEMPREG);
1207 #endif
1208 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1209 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1210 }
1211}
1212
1213void emit_orimm(int rs,int imm,int rt)
1214{
1215 u_int armval;
1216 if(genimm(imm,&armval)) {
1217 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1218 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1219 }else{
1220 assert(imm>0&&imm<65536);
1221 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1222 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1223 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1224 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1225 }
1226}
1227
1228void emit_xorimm(int rs,int imm,int rt)
1229{
57871462 1230 u_int armval;
1231 if(genimm(imm,&armval)) {
1232 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1233 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1234 }else{
514ed0d9 1235 assert(imm>0&&imm<65536);
57871462 1236 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1237 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1238 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1239 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1240 }
1241}
1242
1243void emit_shlimm(int rs,u_int imm,int rt)
1244{
1245 assert(imm>0);
1246 assert(imm<32);
1247 //if(imm==1) ...
1248 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1249 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1250}
1251
1252void emit_shrimm(int rs,u_int imm,int rt)
1253{
1254 assert(imm>0);
1255 assert(imm<32);
1256 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1257 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1258}
1259
1260void emit_sarimm(int rs,u_int imm,int rt)
1261{
1262 assert(imm>0);
1263 assert(imm<32);
1264 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1265 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1266}
1267
1268void emit_rorimm(int rs,u_int imm,int rt)
1269{
1270 assert(imm>0);
1271 assert(imm<32);
1272 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1273 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1274}
1275
1276void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1277{
1278 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1279 assert(imm>0);
1280 assert(imm<32);
1281 //if(imm==1) ...
1282 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1283 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1284 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1285 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1286}
1287
1288void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1289{
1290 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1291 assert(imm>0);
1292 assert(imm<32);
1293 //if(imm==1) ...
1294 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1295 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1296 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1297 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1298}
1299
b9b61529 1300void emit_signextend16(int rs,int rt)
1301{
1302 #ifdef ARMv5_ONLY
1303 emit_shlimm(rs,16,rt);
1304 emit_sarimm(rt,16,rt);
1305 #else
1306 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1307 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1308 #endif
1309}
1310
57871462 1311void emit_shl(u_int rs,u_int shift,u_int rt)
1312{
1313 assert(rs<16);
1314 assert(rt<16);
1315 assert(shift<16);
1316 //if(imm==1) ...
1317 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1318 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1319}
1320void emit_shr(u_int rs,u_int shift,u_int rt)
1321{
1322 assert(rs<16);
1323 assert(rt<16);
1324 assert(shift<16);
1325 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1326 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1327}
1328void emit_sar(u_int rs,u_int shift,u_int rt)
1329{
1330 assert(rs<16);
1331 assert(rt<16);
1332 assert(shift<16);
1333 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1334 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1335}
1336void emit_shlcl(int r)
1337{
1338 assem_debug("shl %%%s,%%cl\n",regname[r]);
1339 assert(0);
1340}
1341void emit_shrcl(int r)
1342{
1343 assem_debug("shr %%%s,%%cl\n",regname[r]);
1344 assert(0);
1345}
1346void emit_sarcl(int r)
1347{
1348 assem_debug("sar %%%s,%%cl\n",regname[r]);
1349 assert(0);
1350}
1351
1352void emit_shldcl(int r1,int r2)
1353{
1354 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1355 assert(0);
1356}
1357void emit_shrdcl(int r1,int r2)
1358{
1359 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1360 assert(0);
1361}
1362void emit_orrshl(u_int rs,u_int shift,u_int rt)
1363{
1364 assert(rs<16);
1365 assert(rt<16);
1366 assert(shift<16);
1367 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1368 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1369}
1370void emit_orrshr(u_int rs,u_int shift,u_int rt)
1371{
1372 assert(rs<16);
1373 assert(rt<16);
1374 assert(shift<16);
1375 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1376 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1377}
1378
1379void emit_cmpimm(int rs,int imm)
1380{
1381 u_int armval;
1382 if(genimm(imm,&armval)) {
1383 assem_debug("cmp %s,$%d\n",regname[rs],imm);
1384 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1385 }else if(genimm(-imm,&armval)) {
1386 assem_debug("cmn %s,$%d\n",regname[rs],imm);
1387 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1388 }else if(imm>0) {
1389 assert(imm<65536);
1390 #ifdef ARMv5_ONLY
1391 emit_movimm(imm,HOST_TEMPREG);
1392 #else
1393 emit_movw(imm,HOST_TEMPREG);
1394 #endif
1395 assem_debug("cmp %s,r14\n",regname[rs]);
1396 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1397 }else{
1398 assert(imm>-65536);
1399 #ifdef ARMv5_ONLY
1400 emit_movimm(-imm,HOST_TEMPREG);
1401 #else
1402 emit_movw(-imm,HOST_TEMPREG);
1403 #endif
1404 assem_debug("cmn %s,r14\n",regname[rs]);
1405 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1406 }
1407}
1408
1409void emit_cmovne(u_int *addr,int rt)
1410{
1411 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1412 assert(0);
1413}
1414void emit_cmovl(u_int *addr,int rt)
1415{
1416 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1417 assert(0);
1418}
1419void emit_cmovs(u_int *addr,int rt)
1420{
1421 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1422 assert(0);
1423}
1424void emit_cmovne_imm(int imm,int rt)
1425{
1426 assem_debug("movne %s,#%d\n",regname[rt],imm);
1427 u_int armval;
1428 assert(genimm(imm,&armval));
1429 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1430}
1431void emit_cmovl_imm(int imm,int rt)
1432{
1433 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1434 u_int armval;
1435 assert(genimm(imm,&armval));
1436 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1437}
1438void emit_cmovb_imm(int imm,int rt)
1439{
1440 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1441 u_int armval;
1442 assert(genimm(imm,&armval));
1443 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1444}
1445void emit_cmovs_imm(int imm,int rt)
1446{
1447 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1448 u_int armval;
1449 assert(genimm(imm,&armval));
1450 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1451}
1452void emit_cmove_reg(int rs,int rt)
1453{
1454 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1455 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1456}
1457void emit_cmovne_reg(int rs,int rt)
1458{
1459 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1460 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1461}
1462void emit_cmovl_reg(int rs,int rt)
1463{
1464 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1465 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1466}
1467void emit_cmovs_reg(int rs,int rt)
1468{
1469 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1470 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1471}
1472
1473void emit_slti32(int rs,int imm,int rt)
1474{
1475 if(rs!=rt) emit_zeroreg(rt);
1476 emit_cmpimm(rs,imm);
1477 if(rs==rt) emit_movimm(0,rt);
1478 emit_cmovl_imm(1,rt);
1479}
1480void emit_sltiu32(int rs,int imm,int rt)
1481{
1482 if(rs!=rt) emit_zeroreg(rt);
1483 emit_cmpimm(rs,imm);
1484 if(rs==rt) emit_movimm(0,rt);
1485 emit_cmovb_imm(1,rt);
1486}
1487void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1488{
1489 assert(rsh!=rt);
1490 emit_slti32(rsl,imm,rt);
1491 if(imm>=0)
1492 {
1493 emit_test(rsh,rsh);
1494 emit_cmovne_imm(0,rt);
1495 emit_cmovs_imm(1,rt);
1496 }
1497 else
1498 {
1499 emit_cmpimm(rsh,-1);
1500 emit_cmovne_imm(0,rt);
1501 emit_cmovl_imm(1,rt);
1502 }
1503}
1504void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1505{
1506 assert(rsh!=rt);
1507 emit_sltiu32(rsl,imm,rt);
1508 if(imm>=0)
1509 {
1510 emit_test(rsh,rsh);
1511 emit_cmovne_imm(0,rt);
1512 }
1513 else
1514 {
1515 emit_cmpimm(rsh,-1);
1516 emit_cmovne_imm(1,rt);
1517 }
1518}
1519
1520void emit_cmp(int rs,int rt)
1521{
1522 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1523 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1524}
1525void emit_set_gz32(int rs, int rt)
1526{
1527 //assem_debug("set_gz32\n");
1528 emit_cmpimm(rs,1);
1529 emit_movimm(1,rt);
1530 emit_cmovl_imm(0,rt);
1531}
1532void emit_set_nz32(int rs, int rt)
1533{
1534 //assem_debug("set_nz32\n");
1535 if(rs!=rt) emit_movs(rs,rt);
1536 else emit_test(rs,rs);
1537 emit_cmovne_imm(1,rt);
1538}
1539void emit_set_gz64_32(int rsh, int rsl, int rt)
1540{
1541 //assem_debug("set_gz64\n");
1542 emit_set_gz32(rsl,rt);
1543 emit_test(rsh,rsh);
1544 emit_cmovne_imm(1,rt);
1545 emit_cmovs_imm(0,rt);
1546}
1547void emit_set_nz64_32(int rsh, int rsl, int rt)
1548{
1549 //assem_debug("set_nz64\n");
1550 emit_or_and_set_flags(rsh,rsl,rt);
1551 emit_cmovne_imm(1,rt);
1552}
1553void emit_set_if_less32(int rs1, int rs2, int rt)
1554{
1555 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1556 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1557 emit_cmp(rs1,rs2);
1558 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1559 emit_cmovl_imm(1,rt);
1560}
1561void emit_set_if_carry32(int rs1, int rs2, int rt)
1562{
1563 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1564 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1565 emit_cmp(rs1,rs2);
1566 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1567 emit_cmovb_imm(1,rt);
1568}
1569void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1570{
1571 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1572 assert(u1!=rt);
1573 assert(u2!=rt);
1574 emit_cmp(l1,l2);
1575 emit_movimm(0,rt);
1576 emit_sbcs(u1,u2,HOST_TEMPREG);
1577 emit_cmovl_imm(1,rt);
1578}
1579void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1580{
1581 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1582 assert(u1!=rt);
1583 assert(u2!=rt);
1584 emit_cmp(l1,l2);
1585 emit_movimm(0,rt);
1586 emit_sbcs(u1,u2,HOST_TEMPREG);
1587 emit_cmovb_imm(1,rt);
1588}
1589
1590void emit_call(int a)
1591{
1592 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1593 u_int offset=genjmp(a);
1594 output_w32(0xeb000000|offset);
1595}
1596void emit_jmp(int a)
1597{
1598 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1599 u_int offset=genjmp(a);
1600 output_w32(0xea000000|offset);
1601}
1602void emit_jne(int a)
1603{
1604 assem_debug("bne %x\n",a);
1605 u_int offset=genjmp(a);
1606 output_w32(0x1a000000|offset);
1607}
1608void emit_jeq(int a)
1609{
1610 assem_debug("beq %x\n",a);
1611 u_int offset=genjmp(a);
1612 output_w32(0x0a000000|offset);
1613}
1614void emit_js(int a)
1615{
1616 assem_debug("bmi %x\n",a);
1617 u_int offset=genjmp(a);
1618 output_w32(0x4a000000|offset);
1619}
1620void emit_jns(int a)
1621{
1622 assem_debug("bpl %x\n",a);
1623 u_int offset=genjmp(a);
1624 output_w32(0x5a000000|offset);
1625}
1626void emit_jl(int a)
1627{
1628 assem_debug("blt %x\n",a);
1629 u_int offset=genjmp(a);
1630 output_w32(0xba000000|offset);
1631}
1632void emit_jge(int a)
1633{
1634 assem_debug("bge %x\n",a);
1635 u_int offset=genjmp(a);
1636 output_w32(0xaa000000|offset);
1637}
1638void emit_jno(int a)
1639{
1640 assem_debug("bvc %x\n",a);
1641 u_int offset=genjmp(a);
1642 output_w32(0x7a000000|offset);
1643}
1644void emit_jc(int a)
1645{
1646 assem_debug("bcs %x\n",a);
1647 u_int offset=genjmp(a);
1648 output_w32(0x2a000000|offset);
1649}
1650void emit_jcc(int a)
1651{
1652 assem_debug("bcc %x\n",a);
1653 u_int offset=genjmp(a);
1654 output_w32(0x3a000000|offset);
1655}
1656
1657void emit_pushimm(int imm)
1658{
1659 assem_debug("push $%x\n",imm);
1660 assert(0);
1661}
1662void emit_pusha()
1663{
1664 assem_debug("pusha\n");
1665 assert(0);
1666}
1667void emit_popa()
1668{
1669 assem_debug("popa\n");
1670 assert(0);
1671}
1672void emit_pushreg(u_int r)
1673{
1674 assem_debug("push %%%s\n",regname[r]);
1675 assert(0);
1676}
1677void emit_popreg(u_int r)
1678{
1679 assem_debug("pop %%%s\n",regname[r]);
1680 assert(0);
1681}
1682void emit_callreg(u_int r)
1683{
1684 assem_debug("call *%%%s\n",regname[r]);
1685 assert(0);
1686}
1687void emit_jmpreg(u_int r)
1688{
1689 assem_debug("mov pc,%s\n",regname[r]);
1690 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1691}
1692
1693void emit_readword_indexed(int offset, int rs, int rt)
1694{
1695 assert(offset>-4096&&offset<4096);
1696 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1697 if(offset>=0) {
1698 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1699 }else{
1700 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1701 }
1702}
1703void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1704{
1705 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1706 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1707}
1708void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1709{
1710 if(map<0) emit_readword_indexed(addr, rs, rt);
1711 else {
1712 assert(addr==0);
1713 emit_readword_dualindexedx4(rs, map, rt);
1714 }
1715}
1716void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1717{
1718 if(map<0) {
1719 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1720 emit_readword_indexed(addr+4, rs, rl);
1721 }else{
1722 assert(rh!=rs);
1723 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1724 emit_addimm(map,1,map);
1725 emit_readword_indexed_tlb(addr, rs, map, rl);
1726 }
1727}
1728void emit_movsbl_indexed(int offset, int rs, int rt)
1729{
1730 assert(offset>-256&&offset<256);
1731 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1732 if(offset>=0) {
1733 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1734 }else{
1735 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1736 }
1737}
1738void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1739{
1740 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1741 else {
1742 if(addr==0) {
1743 emit_shlimm(map,2,map);
1744 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1745 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1746 }else{
1747 assert(addr>-256&&addr<256);
1748 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1749 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1750 emit_movsbl_indexed(addr, rt, rt);
1751 }
1752 }
1753}
1754void emit_movswl_indexed(int offset, int rs, int rt)
1755{
1756 assert(offset>-256&&offset<256);
1757 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1758 if(offset>=0) {
1759 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1760 }else{
1761 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1762 }
1763}
1764void emit_movzbl_indexed(int offset, int rs, int rt)
1765{
1766 assert(offset>-4096&&offset<4096);
1767 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1768 if(offset>=0) {
1769 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1770 }else{
1771 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1772 }
1773}
1774void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1775{
1776 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1777 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1778}
1779void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1780{
1781 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1782 else {
1783 if(addr==0) {
1784 emit_movzbl_dualindexedx4(rs, map, rt);
1785 }else{
1786 emit_addimm(rs,addr,rt);
1787 emit_movzbl_dualindexedx4(rt, map, rt);
1788 }
1789 }
1790}
1791void emit_movzwl_indexed(int offset, int rs, int rt)
1792{
1793 assert(offset>-256&&offset<256);
1794 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1795 if(offset>=0) {
1796 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1797 }else{
1798 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1799 }
1800}
1801void emit_readword(int addr, int rt)
1802{
1803 u_int offset = addr-(u_int)&dynarec_local;
1804 assert(offset<4096);
1805 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1806 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1807}
1808void emit_movsbl(int addr, int rt)
1809{
1810 u_int offset = addr-(u_int)&dynarec_local;
1811 assert(offset<256);
1812 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1813 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1814}
1815void emit_movswl(int addr, int rt)
1816{
1817 u_int offset = addr-(u_int)&dynarec_local;
1818 assert(offset<256);
1819 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1820 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1821}
1822void emit_movzbl(int addr, int rt)
1823{
1824 u_int offset = addr-(u_int)&dynarec_local;
1825 assert(offset<4096);
1826 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1827 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1828}
1829void emit_movzwl(int addr, int rt)
1830{
1831 u_int offset = addr-(u_int)&dynarec_local;
1832 assert(offset<256);
1833 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1834 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1835}
1836void emit_movzwl_reg(int rs, int rt)
1837{
1838 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1839 assert(0);
1840}
1841
1842void emit_xchg(int rs, int rt)
1843{
1844 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1845 assert(0);
1846}
1847void emit_writeword_indexed(int rt, int offset, int rs)
1848{
1849 assert(offset>-4096&&offset<4096);
1850 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1851 if(offset>=0) {
1852 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1853 }else{
1854 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1855 }
1856}
1857void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1858{
1859 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1860 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1861}
1862void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1863{
1864 if(map<0) emit_writeword_indexed(rt, addr, rs);
1865 else {
1866 assert(addr==0);
1867 emit_writeword_dualindexedx4(rt, rs, map);
1868 }
1869}
1870void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1871{
1872 if(map<0) {
1873 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1874 emit_writeword_indexed(rl, addr+4, rs);
1875 }else{
1876 assert(rh>=0);
1877 if(temp!=rs) emit_addimm(map,1,temp);
1878 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1879 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1880 else {
1881 emit_addimm(rs,4,rs);
1882 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1883 }
1884 }
1885}
1886void emit_writehword_indexed(int rt, int offset, int rs)
1887{
1888 assert(offset>-256&&offset<256);
1889 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1890 if(offset>=0) {
1891 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1892 }else{
1893 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1894 }
1895}
1896void emit_writebyte_indexed(int rt, int offset, int rs)
1897{
1898 assert(offset>-4096&&offset<4096);
1899 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1900 if(offset>=0) {
1901 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1902 }else{
1903 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1904 }
1905}
1906void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1907{
1908 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1909 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1910}
1911void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1912{
1913 if(map<0) emit_writebyte_indexed(rt, addr, rs);
1914 else {
1915 if(addr==0) {
1916 emit_writebyte_dualindexedx4(rt, rs, map);
1917 }else{
1918 emit_addimm(rs,addr,temp);
1919 emit_writebyte_dualindexedx4(rt, temp, map);
1920 }
1921 }
1922}
1923void emit_writeword(int rt, int addr)
1924{
1925 u_int offset = addr-(u_int)&dynarec_local;
1926 assert(offset<4096);
1927 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1928 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1929}
1930void emit_writehword(int rt, int addr)
1931{
1932 u_int offset = addr-(u_int)&dynarec_local;
1933 assert(offset<256);
1934 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
1935 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1936}
1937void emit_writebyte(int rt, int addr)
1938{
1939 u_int offset = addr-(u_int)&dynarec_local;
1940 assert(offset<4096);
1941 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1942 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
1943}
1944void emit_writeword_imm(int imm, int addr)
1945{
1946 assem_debug("movl $%x,%x\n",imm,addr);
1947 assert(0);
1948}
1949void emit_writebyte_imm(int imm, int addr)
1950{
1951 assem_debug("movb $%x,%x\n",imm,addr);
1952 assert(0);
1953}
1954
1955void emit_mul(int rs)
1956{
1957 assem_debug("mul %%%s\n",regname[rs]);
1958 assert(0);
1959}
1960void emit_imul(int rs)
1961{
1962 assem_debug("imul %%%s\n",regname[rs]);
1963 assert(0);
1964}
1965void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1966{
1967 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1968 assert(rs1<16);
1969 assert(rs2<16);
1970 assert(hi<16);
1971 assert(lo<16);
1972 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1973}
1974void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1975{
1976 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1977 assert(rs1<16);
1978 assert(rs2<16);
1979 assert(hi<16);
1980 assert(lo<16);
1981 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1982}
1983
1984void emit_div(int rs)
1985{
1986 assem_debug("div %%%s\n",regname[rs]);
1987 assert(0);
1988}
1989void emit_idiv(int rs)
1990{
1991 assem_debug("idiv %%%s\n",regname[rs]);
1992 assert(0);
1993}
1994void emit_cdq()
1995{
1996 assem_debug("cdq\n");
1997 assert(0);
1998}
1999
2000void emit_clz(int rs,int rt)
2001{
2002 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2003 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2004}
2005
2006void emit_subcs(int rs1,int rs2,int rt)
2007{
2008 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2009 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2010}
2011
2012void emit_shrcc_imm(int rs,u_int imm,int rt)
2013{
2014 assert(imm>0);
2015 assert(imm<32);
2016 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2017 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2018}
2019
2020void emit_negmi(int rs, int rt)
2021{
2022 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2023 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2024}
2025
2026void emit_negsmi(int rs, int rt)
2027{
2028 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2029 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2030}
2031
2032void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2033{
2034 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2035 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2036}
2037
2038void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2039{
2040 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2041 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2042}
2043
2044void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2045{
2046 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2047 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2048}
2049
2050void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2051{
2052 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2053 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2054}
2055
2056void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2057{
2058 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2059 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2060}
2061
2062void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2063{
2064 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2065 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2066}
2067
2068void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2069{
2070 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2071 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2072}
2073
2074void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2075{
2076 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2077 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2078}
2079
2080void emit_teq(int rs, int rt)
2081{
2082 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2083 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2084}
2085
2086void emit_rsbimm(int rs, int imm, int rt)
2087{
2088 u_int armval;
2089 assert(genimm(imm,&armval));
2090 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2091 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2092}
2093
2094// Load 2 immediates optimizing for small code size
2095void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2096{
2097 emit_movimm(imm1,rt1);
2098 u_int armval;
2099 if(genimm(imm2-imm1,&armval)) {
2100 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2101 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2102 }else if(genimm(imm1-imm2,&armval)) {
2103 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2104 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2105 }
2106 else emit_movimm(imm2,rt2);
2107}
2108
2109// Conditionally select one of two immediates, optimizing for small code size
2110// This will only be called if HAVE_CMOV_IMM is defined
2111void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2112{
2113 u_int armval;
2114 if(genimm(imm2-imm1,&armval)) {
2115 emit_movimm(imm1,rt);
2116 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2117 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2118 }else if(genimm(imm1-imm2,&armval)) {
2119 emit_movimm(imm1,rt);
2120 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2121 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2122 }
2123 else {
2124 #ifdef ARMv5_ONLY
2125 emit_movimm(imm1,rt);
2126 add_literal((int)out,imm2);
2127 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2128 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2129 #else
2130 emit_movw(imm1&0x0000FFFF,rt);
2131 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2132 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2133 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2134 }
2135 emit_movt(imm1&0xFFFF0000,rt);
2136 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2137 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2138 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2139 }
2140 #endif
2141 }
2142}
2143
2144// special case for checking invalid_code
2145void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2146{
2147 assert(0);
2148}
2149
2150// special case for checking invalid_code
2151void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2152{
2153 assert(imm<128&&imm>=0);
2154 assert(r>=0&&r<16);
2155 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2156 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2157 emit_cmpimm(HOST_TEMPREG,imm);
2158}
2159
2160// special case for tlb mapping
2161void emit_addsr12(int rs1,int rs2,int rt)
2162{
2163 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2164 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2165}
2166
2167// Used to preload hash table entries
2168void emit_prefetch(void *addr)
2169{
2170 assem_debug("prefetch %x\n",(int)addr);
2171 output_byte(0x0F);
2172 output_byte(0x18);
2173 output_modrm(0,5,1);
2174 output_w32((int)addr);
2175}
2176void emit_prefetchreg(int r)
2177{
2178 assem_debug("pld %s\n",regname[r]);
2179 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2180}
2181
2182// Special case for mini_ht
2183void emit_ldreq_indexed(int rs, u_int offset, int rt)
2184{
2185 assert(offset<4096);
2186 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2187 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2188}
2189
2190void emit_flds(int r,int sr)
2191{
2192 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2193 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2194}
2195
2196void emit_vldr(int r,int vr)
2197{
2198 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2199 output_w32(0xed900b00|(vr<<12)|(r<<16));
2200}
2201
2202void emit_fsts(int sr,int r)
2203{
2204 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2205 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2206}
2207
2208void emit_vstr(int vr,int r)
2209{
2210 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2211 output_w32(0xed800b00|(vr<<12)|(r<<16));
2212}
2213
2214void emit_ftosizs(int s,int d)
2215{
2216 assem_debug("ftosizs s%d,s%d\n",d,s);
2217 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2218}
2219
2220void emit_ftosizd(int s,int d)
2221{
2222 assem_debug("ftosizd s%d,d%d\n",d,s);
2223 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2224}
2225
2226void emit_fsitos(int s,int d)
2227{
2228 assem_debug("fsitos s%d,s%d\n",d,s);
2229 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2230}
2231
2232void emit_fsitod(int s,int d)
2233{
2234 assem_debug("fsitod d%d,s%d\n",d,s);
2235 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2236}
2237
2238void emit_fcvtds(int s,int d)
2239{
2240 assem_debug("fcvtds d%d,s%d\n",d,s);
2241 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2242}
2243
2244void emit_fcvtsd(int s,int d)
2245{
2246 assem_debug("fcvtsd s%d,d%d\n",d,s);
2247 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2248}
2249
2250void emit_fsqrts(int s,int d)
2251{
2252 assem_debug("fsqrts d%d,s%d\n",d,s);
2253 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2254}
2255
2256void emit_fsqrtd(int s,int d)
2257{
2258 assem_debug("fsqrtd s%d,d%d\n",d,s);
2259 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2260}
2261
2262void emit_fabss(int s,int d)
2263{
2264 assem_debug("fabss d%d,s%d\n",d,s);
2265 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2266}
2267
2268void emit_fabsd(int s,int d)
2269{
2270 assem_debug("fabsd s%d,d%d\n",d,s);
2271 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2272}
2273
2274void emit_fnegs(int s,int d)
2275{
2276 assem_debug("fnegs d%d,s%d\n",d,s);
2277 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2278}
2279
2280void emit_fnegd(int s,int d)
2281{
2282 assem_debug("fnegd s%d,d%d\n",d,s);
2283 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2284}
2285
2286void emit_fadds(int s1,int s2,int d)
2287{
2288 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2289 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2290}
2291
2292void emit_faddd(int s1,int s2,int d)
2293{
2294 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2295 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2296}
2297
2298void emit_fsubs(int s1,int s2,int d)
2299{
2300 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2301 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2302}
2303
2304void emit_fsubd(int s1,int s2,int d)
2305{
2306 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2307 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2308}
2309
2310void emit_fmuls(int s1,int s2,int d)
2311{
2312 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2313 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2314}
2315
2316void emit_fmuld(int s1,int s2,int d)
2317{
2318 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2319 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2320}
2321
2322void emit_fdivs(int s1,int s2,int d)
2323{
2324 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2325 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2326}
2327
2328void emit_fdivd(int s1,int s2,int d)
2329{
2330 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2331 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2332}
2333
2334void emit_fcmps(int x,int y)
2335{
2336 assem_debug("fcmps s14, s15\n");
2337 output_w32(0xeeb47a67);
2338}
2339
2340void emit_fcmpd(int x,int y)
2341{
2342 assem_debug("fcmpd d6, d7\n");
2343 output_w32(0xeeb46b47);
2344}
2345
2346void emit_fmstat()
2347{
2348 assem_debug("fmstat\n");
2349 output_w32(0xeef1fa10);
2350}
2351
2352void emit_bicne_imm(int rs,int imm,int rt)
2353{
2354 u_int armval;
2355 assert(genimm(imm,&armval));
2356 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2357 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2358}
2359
2360void emit_biccs_imm(int rs,int imm,int rt)
2361{
2362 u_int armval;
2363 assert(genimm(imm,&armval));
2364 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2365 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2366}
2367
2368void emit_bicvc_imm(int rs,int imm,int rt)
2369{
2370 u_int armval;
2371 assert(genimm(imm,&armval));
2372 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2373 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2374}
2375
2376void emit_bichi_imm(int rs,int imm,int rt)
2377{
2378 u_int armval;
2379 assert(genimm(imm,&armval));
2380 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2381 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2382}
2383
2384void emit_orrvs_imm(int rs,int imm,int rt)
2385{
2386 u_int armval;
2387 assert(genimm(imm,&armval));
2388 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2389 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2390}
2391
b9b61529 2392void emit_orrne_imm(int rs,int imm,int rt)
2393{
2394 u_int armval;
2395 assert(genimm(imm,&armval));
2396 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2397 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2398}
2399
2400void emit_andne_imm(int rs,int imm,int rt)
2401{
2402 u_int armval;
2403 assert(genimm(imm,&armval));
2404 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2405 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2406}
2407
57871462 2408void emit_jno_unlikely(int a)
2409{
2410 //emit_jno(a);
2411 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2412 output_w32(0x72800000|rd_rn_rm(15,15,0));
2413}
2414
2415// Save registers before function call
2416void save_regs(u_int reglist)
2417{
2418 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2419 if(!reglist) return;
2420 assem_debug("stmia fp,{");
2421 if(reglist&1) assem_debug("r0, ");
2422 if(reglist&2) assem_debug("r1, ");
2423 if(reglist&4) assem_debug("r2, ");
2424 if(reglist&8) assem_debug("r3, ");
2425 if(reglist&0x1000) assem_debug("r12");
2426 assem_debug("}\n");
2427 output_w32(0xe88b0000|reglist);
2428}
2429// Restore registers after function call
2430void restore_regs(u_int reglist)
2431{
2432 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2433 if(!reglist) return;
2434 assem_debug("ldmia fp,{");
2435 if(reglist&1) assem_debug("r0, ");
2436 if(reglist&2) assem_debug("r1, ");
2437 if(reglist&4) assem_debug("r2, ");
2438 if(reglist&8) assem_debug("r3, ");
2439 if(reglist&0x1000) assem_debug("r12");
2440 assem_debug("}\n");
2441 output_w32(0xe89b0000|reglist);
2442}
2443
2444// Write back consts using r14 so we don't disturb the other registers
2445void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2446{
2447 int hr;
2448 for(hr=0;hr<HOST_REGS;hr++) {
2449 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2450 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2451 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2452 int value=constmap[i][hr];
2453 if(value==0) {
2454 emit_zeroreg(HOST_TEMPREG);
2455 }
2456 else {
2457 emit_movimm(value,HOST_TEMPREG);
2458 }
2459 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2460#ifndef FORCE32
57871462 2461 if((i_is32>>i_regmap[hr])&1) {
2462 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2463 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2464 }
24385cae 2465#endif
57871462 2466 }
2467 }
2468 }
2469 }
2470}
2471
2472/* Stubs/epilogue */
2473
2474void literal_pool(int n)
2475{
2476 if(!literalcount) return;
2477 if(n) {
2478 if((int)out-literals[0][0]<4096-n) return;
2479 }
2480 u_int *ptr;
2481 int i;
2482 for(i=0;i<literalcount;i++)
2483 {
2484 ptr=(u_int *)literals[i][0];
2485 u_int offset=(u_int)out-(u_int)ptr-8;
2486 assert(offset<4096);
2487 assert(!(offset&3));
2488 *ptr|=offset;
2489 output_w32(literals[i][1]);
2490 }
2491 literalcount=0;
2492}
2493
2494void literal_pool_jumpover(int n)
2495{
2496 if(!literalcount) return;
2497 if(n) {
2498 if((int)out-literals[0][0]<4096-n) return;
2499 }
2500 int jaddr=(int)out;
2501 emit_jmp(0);
2502 literal_pool(0);
2503 set_jump_target(jaddr,(int)out);
2504}
2505
2506emit_extjump2(int addr, int target, int linker)
2507{
2508 u_char *ptr=(u_char *)addr;
2509 assert((ptr[3]&0x0e)==0xa);
2510 emit_loadlp(target,0);
2511 emit_loadlp(addr,1);
24385cae 2512 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2513 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2514//DEBUG >
2515#ifdef DEBUG_CYCLE_COUNT
2516 emit_readword((int)&last_count,ECX);
2517 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2518 emit_readword((int)&next_interupt,ECX);
2519 emit_writeword(HOST_CCREG,(int)&Count);
2520 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2521 emit_writeword(ECX,(int)&last_count);
2522#endif
2523//DEBUG <
2524 emit_jmp(linker);
2525}
2526
2527emit_extjump(int addr, int target)
2528{
2529 emit_extjump2(addr, target, (int)dyna_linker);
2530}
2531emit_extjump_ds(int addr, int target)
2532{
2533 emit_extjump2(addr, target, (int)dyna_linker_ds);
2534}
2535
2536do_readstub(int n)
2537{
2538 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2539 literal_pool(256);
2540 set_jump_target(stubs[n][1],(int)out);
2541 int type=stubs[n][0];
2542 int i=stubs[n][3];
2543 int rs=stubs[n][4];
2544 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2545 u_int reglist=stubs[n][7];
2546 signed char *i_regmap=i_regs->regmap;
2547 int addr=get_reg(i_regmap,AGEN1+(i&1));
2548 int rth,rt;
2549 int ds;
b9b61529 2550 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2551 rth=get_reg(i_regmap,FTEMP|64);
2552 rt=get_reg(i_regmap,FTEMP);
2553 }else{
2554 rth=get_reg(i_regmap,rt1[i]|64);
2555 rt=get_reg(i_regmap,rt1[i]);
2556 }
5bf843dc 2557#ifdef PCSX
2558 if(rt<0)
2559 // assume forced dummy read
2560 rt=get_reg(i_regmap,-1);
2561#endif
57871462 2562 assert(rs>=0);
2563 assert(rt>=0);
2564 if(addr<0) addr=rt;
2565 assert(addr>=0);
2566 int ftable=0;
2567 if(type==LOADB_STUB||type==LOADBU_STUB)
2568 ftable=(int)readmemb;
2569 if(type==LOADH_STUB||type==LOADHU_STUB)
2570 ftable=(int)readmemh;
2571 if(type==LOADW_STUB)
2572 ftable=(int)readmem;
24385cae 2573#ifndef FORCE32
57871462 2574 if(type==LOADD_STUB)
2575 ftable=(int)readmemd;
24385cae 2576#endif
2577 assert(ftable!=0);
57871462 2578 emit_writeword(rs,(int)&address);
2579 //emit_pusha();
2580 save_regs(reglist);
2581 ds=i_regs!=&regs[i];
2582 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2583 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2584 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2585 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2586 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2587 emit_shrimm(rs,16,1);
2588 int cc=get_reg(i_regmap,CCREG);
2589 if(cc<0) {
2590 emit_loadreg(CCREG,2);
2591 }
2592 emit_movimm(ftable,0);
2593 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2594 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2595 //emit_readword((int)&last_count,temp);
2596 //emit_add(cc,temp,cc);
2597 //emit_writeword(cc,(int)&Count);
2598 //emit_mov(15,14);
2599 emit_call((int)&indirect_jump_indexed);
2600 //emit_callreg(rs);
2601 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
2602 // We really shouldn't need to update the count here,
2603 // but not doing so causes random crashes...
2604 emit_readword((int)&Count,HOST_TEMPREG);
2605 emit_readword((int)&next_interupt,2);
2606 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2607 emit_writeword(2,(int)&last_count);
2608 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2609 if(cc<0) {
2610 emit_storereg(CCREG,HOST_TEMPREG);
2611 }
2612 //emit_popa();
2613 restore_regs(reglist);
2614 //if((cc=get_reg(regmap,CCREG))>=0) {
2615 // emit_loadreg(CCREG,cc);
2616 //}
2617 if(type==LOADB_STUB)
2618 emit_movsbl((int)&readmem_dword,rt);
2619 if(type==LOADBU_STUB)
2620 emit_movzbl((int)&readmem_dword,rt);
2621 if(type==LOADH_STUB)
2622 emit_movswl((int)&readmem_dword,rt);
2623 if(type==LOADHU_STUB)
2624 emit_movzwl((int)&readmem_dword,rt);
2625 if(type==LOADW_STUB)
2626 emit_readword((int)&readmem_dword,rt);
2627 if(type==LOADD_STUB) {
2628 emit_readword((int)&readmem_dword,rt);
2629 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2630 }
2631 emit_jmp(stubs[n][2]); // return address
2632}
2633
2634inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2635{
2636 int rs=get_reg(regmap,target);
2637 int rth=get_reg(regmap,target|64);
2638 int rt=get_reg(regmap,target);
2639 assert(rs>=0);
2640 assert(rt>=0);
2641 int ftable=0;
2642 if(type==LOADB_STUB||type==LOADBU_STUB)
2643 ftable=(int)readmemb;
2644 if(type==LOADH_STUB||type==LOADHU_STUB)
2645 ftable=(int)readmemh;
2646 if(type==LOADW_STUB)
2647 ftable=(int)readmem;
24385cae 2648#ifndef FORCE32
57871462 2649 if(type==LOADD_STUB)
2650 ftable=(int)readmemd;
24385cae 2651#endif
2652 assert(ftable!=0);
57871462 2653 emit_writeword(rs,(int)&address);
2654 //emit_pusha();
2655 save_regs(reglist);
2656 //emit_shrimm(rs,16,1);
2657 int cc=get_reg(regmap,CCREG);
2658 if(cc<0) {
2659 emit_loadreg(CCREG,2);
2660 }
2661 //emit_movimm(ftable,0);
2662 emit_movimm(((u_int *)ftable)[addr>>16],0);
2663 //emit_readword((int)&last_count,12);
2664 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2665 if((signed int)addr>=(signed int)0xC0000000) {
2666 // Pagefault address
2667 int ds=regmap!=regs[i].regmap;
2668 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2669 }
2670 //emit_add(12,2,2);
2671 //emit_writeword(2,(int)&Count);
2672 //emit_call(((u_int *)ftable)[addr>>16]);
2673 emit_call((int)&indirect_jump);
2674 // We really shouldn't need to update the count here,
2675 // but not doing so causes random crashes...
2676 emit_readword((int)&Count,HOST_TEMPREG);
2677 emit_readword((int)&next_interupt,2);
2678 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2679 emit_writeword(2,(int)&last_count);
2680 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2681 if(cc<0) {
2682 emit_storereg(CCREG,HOST_TEMPREG);
2683 }
2684 //emit_popa();
2685 restore_regs(reglist);
2686 if(type==LOADB_STUB)
2687 emit_movsbl((int)&readmem_dword,rt);
2688 if(type==LOADBU_STUB)
2689 emit_movzbl((int)&readmem_dword,rt);
2690 if(type==LOADH_STUB)
2691 emit_movswl((int)&readmem_dword,rt);
2692 if(type==LOADHU_STUB)
2693 emit_movzwl((int)&readmem_dword,rt);
2694 if(type==LOADW_STUB)
2695 emit_readword((int)&readmem_dword,rt);
2696 if(type==LOADD_STUB) {
2697 emit_readword((int)&readmem_dword,rt);
2698 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2699 }
2700}
2701
2702do_writestub(int n)
2703{
2704 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2705 literal_pool(256);
2706 set_jump_target(stubs[n][1],(int)out);
2707 int type=stubs[n][0];
2708 int i=stubs[n][3];
2709 int rs=stubs[n][4];
2710 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2711 u_int reglist=stubs[n][7];
2712 signed char *i_regmap=i_regs->regmap;
2713 int addr=get_reg(i_regmap,AGEN1+(i&1));
2714 int rth,rt,r;
2715 int ds;
b9b61529 2716 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2717 rth=get_reg(i_regmap,FTEMP|64);
2718 rt=get_reg(i_regmap,r=FTEMP);
2719 }else{
2720 rth=get_reg(i_regmap,rs2[i]|64);
2721 rt=get_reg(i_regmap,r=rs2[i]);
2722 }
2723 assert(rs>=0);
2724 assert(rt>=0);
2725 if(addr<0) addr=get_reg(i_regmap,-1);
2726 assert(addr>=0);
2727 int ftable=0;
2728 if(type==STOREB_STUB)
2729 ftable=(int)writememb;
2730 if(type==STOREH_STUB)
2731 ftable=(int)writememh;
2732 if(type==STOREW_STUB)
2733 ftable=(int)writemem;
24385cae 2734#ifndef FORCE32
57871462 2735 if(type==STORED_STUB)
2736 ftable=(int)writememd;
24385cae 2737#endif
2738 assert(ftable!=0);
57871462 2739 emit_writeword(rs,(int)&address);
2740 //emit_shrimm(rs,16,rs);
2741 //emit_movmem_indexedx4(ftable,rs,rs);
2742 if(type==STOREB_STUB)
2743 emit_writebyte(rt,(int)&byte);
2744 if(type==STOREH_STUB)
2745 emit_writehword(rt,(int)&hword);
2746 if(type==STOREW_STUB)
2747 emit_writeword(rt,(int)&word);
2748 if(type==STORED_STUB) {
3d624f89 2749#ifndef FORCE32
57871462 2750 emit_writeword(rt,(int)&dword);
2751 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 2752#else
2753 printf("STORED_STUB\n");
2754#endif
57871462 2755 }
2756 //emit_pusha();
2757 save_regs(reglist);
2758 ds=i_regs!=&regs[i];
2759 int real_rs=get_reg(i_regmap,rs1[i]);
2760 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2761 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2762 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2763 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2764 emit_shrimm(rs,16,1);
2765 int cc=get_reg(i_regmap,CCREG);
2766 if(cc<0) {
2767 emit_loadreg(CCREG,2);
2768 }
2769 emit_movimm(ftable,0);
2770 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2771 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2772 //emit_readword((int)&last_count,temp);
2773 //emit_addimm(cc,2*stubs[n][5]+2,cc);
2774 //emit_add(cc,temp,cc);
2775 //emit_writeword(cc,(int)&Count);
2776 emit_call((int)&indirect_jump_indexed);
2777 //emit_callreg(rs);
2778 emit_readword((int)&Count,HOST_TEMPREG);
2779 emit_readword((int)&next_interupt,2);
2780 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2781 emit_writeword(2,(int)&last_count);
2782 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2783 if(cc<0) {
2784 emit_storereg(CCREG,HOST_TEMPREG);
2785 }
2786 //emit_popa();
2787 restore_regs(reglist);
2788 //if((cc=get_reg(regmap,CCREG))>=0) {
2789 // emit_loadreg(CCREG,cc);
2790 //}
2791 emit_jmp(stubs[n][2]); // return address
2792}
2793
2794inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2795{
2796 int rs=get_reg(regmap,-1);
2797 int rth=get_reg(regmap,target|64);
2798 int rt=get_reg(regmap,target);
2799 assert(rs>=0);
2800 assert(rt>=0);
2801 int ftable=0;
2802 if(type==STOREB_STUB)
2803 ftable=(int)writememb;
2804 if(type==STOREH_STUB)
2805 ftable=(int)writememh;
2806 if(type==STOREW_STUB)
2807 ftable=(int)writemem;
24385cae 2808#ifndef FORCE32
57871462 2809 if(type==STORED_STUB)
2810 ftable=(int)writememd;
24385cae 2811#endif
2812 assert(ftable!=0);
57871462 2813 emit_writeword(rs,(int)&address);
2814 //emit_shrimm(rs,16,rs);
2815 //emit_movmem_indexedx4(ftable,rs,rs);
2816 if(type==STOREB_STUB)
2817 emit_writebyte(rt,(int)&byte);
2818 if(type==STOREH_STUB)
2819 emit_writehword(rt,(int)&hword);
2820 if(type==STOREW_STUB)
2821 emit_writeword(rt,(int)&word);
2822 if(type==STORED_STUB) {
3d624f89 2823#ifndef FORCE32
57871462 2824 emit_writeword(rt,(int)&dword);
2825 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 2826#else
2827 printf("STORED_STUB\n");
2828#endif
57871462 2829 }
2830 //emit_pusha();
2831 save_regs(reglist);
2832 //emit_shrimm(rs,16,1);
2833 int cc=get_reg(regmap,CCREG);
2834 if(cc<0) {
2835 emit_loadreg(CCREG,2);
2836 }
2837 //emit_movimm(ftable,0);
2838 emit_movimm(((u_int *)ftable)[addr>>16],0);
2839 //emit_readword((int)&last_count,12);
2840 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2841 if((signed int)addr>=(signed int)0xC0000000) {
2842 // Pagefault address
2843 int ds=regmap!=regs[i].regmap;
2844 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2845 }
2846 //emit_add(12,2,2);
2847 //emit_writeword(2,(int)&Count);
2848 //emit_call(((u_int *)ftable)[addr>>16]);
2849 emit_call((int)&indirect_jump);
2850 emit_readword((int)&Count,HOST_TEMPREG);
2851 emit_readword((int)&next_interupt,2);
2852 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2853 emit_writeword(2,(int)&last_count);
2854 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2855 if(cc<0) {
2856 emit_storereg(CCREG,HOST_TEMPREG);
2857 }
2858 //emit_popa();
2859 restore_regs(reglist);
2860}
2861
2862do_unalignedwritestub(int n)
2863{
b7918751 2864 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
2865 literal_pool(256);
57871462 2866 set_jump_target(stubs[n][1],(int)out);
b7918751 2867
2868 int i=stubs[n][3];
2869 struct regstat *i_regs=(struct regstat *)stubs[n][4];
2870 int addr=stubs[n][5];
2871 u_int reglist=stubs[n][7];
2872 signed char *i_regmap=i_regs->regmap;
2873 int temp2=get_reg(i_regmap,FTEMP);
2874 int rt;
2875 int ds, real_rs;
2876 rt=get_reg(i_regmap,rs2[i]);
2877 assert(rt>=0);
2878 assert(addr>=0);
2879 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2880 reglist|=(1<<addr);
2881 reglist&=~(1<<temp2);
2882
2883 emit_andimm(addr,0xfffffffc,temp2);
2884 emit_writeword(temp2,(int)&address);
2885
2886 save_regs(reglist);
2887 ds=i_regs!=&regs[i];
2888 real_rs=get_reg(i_regmap,rs1[i]);
2889 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2890 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2891 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2892 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2893 emit_shrimm(addr,16,1);
2894 int cc=get_reg(i_regmap,CCREG);
2895 if(cc<0) {
2896 emit_loadreg(CCREG,2);
2897 }
2898 emit_movimm((u_int)readmem,0);
2899 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2900 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3); // XXX: can be rm'd?
2901 emit_call((int)&indirect_jump_indexed);
2902 restore_regs(reglist);
2903
2904 emit_readword((int)&readmem_dword,temp2);
2905 int temp=addr; //hmh
2906 emit_shlimm(addr,3,temp);
2907 emit_andimm(temp,24,temp);
2908#ifdef BIG_ENDIAN_MIPS
2909 if (opcode[i]==0x2e) // SWR
2910#else
2911 if (opcode[i]==0x2a) // SWL
2912#endif
2913 emit_xorimm(temp,24,temp);
2914 emit_movimm(-1,HOST_TEMPREG);
2915 if (opcode[i]==0x2e) { // SWR
2916 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2917 emit_orrshr(rt,temp,temp2);
2918 }else{
2919 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2920 emit_orrshl(rt,temp,temp2);
2921 }
2922 emit_readword((int)&address,addr);
2923 emit_writeword(temp2,(int)&word);
2924 //save_regs(reglist); // don't need to, no state changes
2925 emit_shrimm(addr,16,1);
2926 emit_movimm((u_int)writemem,0);
2927 //emit_call((int)&indirect_jump_indexed);
2928 emit_mov(15,14);
2929 emit_readword_dualindexedx4(0,1,15);
2930 emit_readword((int)&Count,HOST_TEMPREG);
2931 emit_readword((int)&next_interupt,2);
2932 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2933 emit_writeword(2,(int)&last_count);
2934 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2935 if(cc<0) {
2936 emit_storereg(CCREG,HOST_TEMPREG);
2937 }
2938 restore_regs(reglist);
57871462 2939 emit_jmp(stubs[n][2]); // return address
2940}
2941
2942void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
2943{
2944 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
2945}
2946
2947do_invstub(int n)
2948{
2949 literal_pool(20);
2950 u_int reglist=stubs[n][3];
2951 set_jump_target(stubs[n][1],(int)out);
2952 save_regs(reglist);
2953 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2954 emit_call((int)&invalidate_addr);
2955 restore_regs(reglist);
2956 emit_jmp(stubs[n][2]); // return address
2957}
2958
2959int do_dirty_stub(int i)
2960{
2961 assem_debug("do_dirty_stub %x\n",start+i*4);
2962 // Careful about the code output here, verify_dirty needs to parse it.
2963 #ifdef ARMv5_ONLY
2964 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2965 emit_loadlp((int)copy,2);
2966 emit_loadlp(slen*4,3);
2967 #else
2968 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2969 emit_movw(((u_int)copy)&0x0000FFFF,2);
2970 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2971 emit_movt(((u_int)copy)&0xFFFF0000,2);
2972 emit_movw(slen*4,3);
2973 #endif
2974 emit_movimm(start+i*4,0);
2975 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
2976 int entry=(int)out;
2977 load_regs_entry(i);
2978 if(entry==(int)out) entry=instr_addr[i];
2979 emit_jmp(instr_addr[i]);
2980 return entry;
2981}
2982
2983void do_dirty_stub_ds()
2984{
2985 // Careful about the code output here, verify_dirty needs to parse it.
2986 #ifdef ARMv5_ONLY
2987 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2988 emit_loadlp((int)copy,2);
2989 emit_loadlp(slen*4,3);
2990 #else
2991 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2992 emit_movw(((u_int)copy)&0x0000FFFF,2);
2993 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2994 emit_movt(((u_int)copy)&0xFFFF0000,2);
2995 emit_movw(slen*4,3);
2996 #endif
2997 emit_movimm(start+1,0);
2998 emit_call((int)&verify_code_ds);
2999}
3000
3001do_cop1stub(int n)
3002{
3003 literal_pool(256);
3004 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3005 set_jump_target(stubs[n][1],(int)out);
3006 int i=stubs[n][3];
3d624f89 3007// int rs=stubs[n][4];
57871462 3008 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3009 int ds=stubs[n][6];
3010 if(!ds) {
3011 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3012 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3013 }
3014 //else {printf("fp exception in delay slot\n");}
3015 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3016 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3017 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3018 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3019 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3020}
3021
3022/* TLB */
3023
3024int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3025{
3026 if(c) {
3027 if((signed int)addr>=(signed int)0xC0000000) {
3028 // address_generation already loaded the const
3029 emit_readword_dualindexedx4(FP,map,map);
3030 }
3031 else
3032 return -1; // No mapping
3033 }
3034 else {
3035 assert(s!=map);
3036 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3037 emit_addsr12(map,s,map);
3038 // Schedule this while we wait on the load
3039 //if(x) emit_xorimm(s,x,ar);
3040 if(shift>=0) emit_shlimm(s,3,shift);
3041 if(~a) emit_andimm(s,a,ar);
3042 emit_readword_dualindexedx4(FP,map,map);
3043 }
3044 return map;
3045}
3046int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3047{
3048 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3049 emit_test(map,map);
3050 *jaddr=(int)out;
3051 emit_js(0);
3052 }
3053 return map;
3054}
3055
3056int gen_tlb_addr_r(int ar, int map) {
3057 if(map>=0) {
3058 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3059 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3060 }
3061}
3062
3063int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3064{
3065 if(c) {
3066 if(addr<0x80800000||addr>=0xC0000000) {
3067 // address_generation already loaded the const
3068 emit_readword_dualindexedx4(FP,map,map);
3069 }
3070 else
3071 return -1; // No mapping
3072 }
3073 else {
3074 assert(s!=map);
3075 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3076 emit_addsr12(map,s,map);
3077 // Schedule this while we wait on the load
3078 //if(x) emit_xorimm(s,x,ar);
3079 emit_readword_dualindexedx4(FP,map,map);
3080 }
3081 return map;
3082}
3083int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3084{
3085 if(!c||addr<0x80800000||addr>=0xC0000000) {
3086 emit_testimm(map,0x40000000);
3087 *jaddr=(int)out;
3088 emit_jne(0);
3089 }
3090}
3091
3092int gen_tlb_addr_w(int ar, int map) {
3093 if(map>=0) {
3094 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3095 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3096 }
3097}
3098
3099// Generate the address of the memory_map entry, relative to dynarec_local
3100generate_map_const(u_int addr,int reg) {
3101 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3102 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3103}
3104
3105/* Special assem */
3106
3107void shift_assemble_arm(int i,struct regstat *i_regs)
3108{
3109 if(rt1[i]) {
3110 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3111 {
3112 signed char s,t,shift;
3113 t=get_reg(i_regs->regmap,rt1[i]);
3114 s=get_reg(i_regs->regmap,rs1[i]);
3115 shift=get_reg(i_regs->regmap,rs2[i]);
3116 if(t>=0){
3117 if(rs1[i]==0)
3118 {
3119 emit_zeroreg(t);
3120 }
3121 else if(rs2[i]==0)
3122 {
3123 assert(s>=0);
3124 if(s!=t) emit_mov(s,t);
3125 }
3126 else
3127 {
3128 emit_andimm(shift,31,HOST_TEMPREG);
3129 if(opcode2[i]==4) // SLLV
3130 {
3131 emit_shl(s,HOST_TEMPREG,t);
3132 }
3133 if(opcode2[i]==6) // SRLV
3134 {
3135 emit_shr(s,HOST_TEMPREG,t);
3136 }
3137 if(opcode2[i]==7) // SRAV
3138 {
3139 emit_sar(s,HOST_TEMPREG,t);
3140 }
3141 }
3142 }
3143 } else { // DSLLV/DSRLV/DSRAV
3144 signed char sh,sl,th,tl,shift;
3145 th=get_reg(i_regs->regmap,rt1[i]|64);
3146 tl=get_reg(i_regs->regmap,rt1[i]);
3147 sh=get_reg(i_regs->regmap,rs1[i]|64);
3148 sl=get_reg(i_regs->regmap,rs1[i]);
3149 shift=get_reg(i_regs->regmap,rs2[i]);
3150 if(tl>=0){
3151 if(rs1[i]==0)
3152 {
3153 emit_zeroreg(tl);
3154 if(th>=0) emit_zeroreg(th);
3155 }
3156 else if(rs2[i]==0)
3157 {
3158 assert(sl>=0);
3159 if(sl!=tl) emit_mov(sl,tl);
3160 if(th>=0&&sh!=th) emit_mov(sh,th);
3161 }
3162 else
3163 {
3164 // FIXME: What if shift==tl ?
3165 assert(shift!=tl);
3166 int temp=get_reg(i_regs->regmap,-1);
3167 int real_th=th;
3168 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3169 assert(sl>=0);
3170 assert(sh>=0);
3171 emit_andimm(shift,31,HOST_TEMPREG);
3172 if(opcode2[i]==0x14) // DSLLV
3173 {
3174 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3175 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3176 emit_orrshr(sl,HOST_TEMPREG,th);
3177 emit_andimm(shift,31,HOST_TEMPREG);
3178 emit_testimm(shift,32);
3179 emit_shl(sl,HOST_TEMPREG,tl);
3180 if(th>=0) emit_cmovne_reg(tl,th);
3181 emit_cmovne_imm(0,tl);
3182 }
3183 if(opcode2[i]==0x16) // DSRLV
3184 {
3185 assert(th>=0);
3186 emit_shr(sl,HOST_TEMPREG,tl);
3187 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3188 emit_orrshl(sh,HOST_TEMPREG,tl);
3189 emit_andimm(shift,31,HOST_TEMPREG);
3190 emit_testimm(shift,32);
3191 emit_shr(sh,HOST_TEMPREG,th);
3192 emit_cmovne_reg(th,tl);
3193 if(real_th>=0) emit_cmovne_imm(0,th);
3194 }
3195 if(opcode2[i]==0x17) // DSRAV
3196 {
3197 assert(th>=0);
3198 emit_shr(sl,HOST_TEMPREG,tl);
3199 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3200 if(real_th>=0) {
3201 assert(temp>=0);
3202 emit_sarimm(th,31,temp);
3203 }
3204 emit_orrshl(sh,HOST_TEMPREG,tl);
3205 emit_andimm(shift,31,HOST_TEMPREG);
3206 emit_testimm(shift,32);
3207 emit_sar(sh,HOST_TEMPREG,th);
3208 emit_cmovne_reg(th,tl);
3209 if(real_th>=0) emit_cmovne_reg(temp,th);
3210 }
3211 }
3212 }
3213 }
3214 }
3215}
3216#define shift_assemble shift_assemble_arm
3217
3218void loadlr_assemble_arm(int i,struct regstat *i_regs)
3219{
3220 int s,th,tl,temp,temp2,addr,map=-1;
3221 int offset;
3222 int jaddr=0;
3223 int memtarget,c=0;
3224 u_int hr,reglist=0;
3225 th=get_reg(i_regs->regmap,rt1[i]|64);
3226 tl=get_reg(i_regs->regmap,rt1[i]);
3227 s=get_reg(i_regs->regmap,rs1[i]);
3228 temp=get_reg(i_regs->regmap,-1);
3229 temp2=get_reg(i_regs->regmap,FTEMP);
3230 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3231 assert(addr<0);
3232 offset=imm[i];
3233 for(hr=0;hr<HOST_REGS;hr++) {
3234 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3235 }
3236 reglist|=1<<temp;
3237 if(offset||s<0||c) addr=temp2;
3238 else addr=s;
3239 if(s>=0) {
3240 c=(i_regs->wasconst>>s)&1;
3241 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80800000;
3242 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3243 }
3244 if(tl>=0) {
3245 //assert(tl>=0);
3246 //assert(rt1[i]);
3247 if(!using_tlb) {
3248 if(!c) {
3249 emit_shlimm(addr,3,temp);
3250 if (opcode[i]==0x22||opcode[i]==0x26) {
3251 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3252 }else{
3253 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3254 }
3255 emit_cmpimm(addr,0x800000);
3256 jaddr=(int)out;
3257 emit_jno(0);
3258 }
3259 else {
3260 if (opcode[i]==0x22||opcode[i]==0x26) {
3261 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3262 }else{
3263 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3264 }
3265 }
3266 }else{ // using tlb
3267 int a;
3268 if(c) {
3269 a=-1;
3270 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3271 a=0xFFFFFFFC; // LWL/LWR
3272 }else{
3273 a=0xFFFFFFF8; // LDL/LDR
3274 }
3275 map=get_reg(i_regs->regmap,TLREG);
3276 assert(map>=0);
3277 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3278 if(c) {
3279 if (opcode[i]==0x22||opcode[i]==0x26) {
3280 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3281 }else{
3282 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3283 }
3284 }
3285 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3286 }
3287 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3288 if(!c||memtarget) {
3289 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3290 emit_readword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2);
3291 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3292 }
3293 else
3294 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3295 emit_andimm(temp,24,temp);
2002a1db 3296#ifdef BIG_ENDIAN_MIPS
3297 if (opcode[i]==0x26) // LWR
3298#else
3299 if (opcode[i]==0x22) // LWL
3300#endif
3301 emit_xorimm(temp,24,temp);
57871462 3302 emit_movimm(-1,HOST_TEMPREG);
3303 if (opcode[i]==0x26) {
3304 emit_shr(temp2,temp,temp2);
3305 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3306 }else{
3307 emit_shl(temp2,temp,temp2);
3308 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3309 }
3310 emit_or(temp2,tl,tl);
3311 //emit_storereg(rt1[i],tl); // DEBUG
3312 }
3313 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
2002a1db 3314 // FIXME: little endian
57871462 3315 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3316 if(!c||memtarget) {
3317 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3318 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3319 emit_readdword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2h,temp2);
3320 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3321 }
3322 else
3323 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3324 emit_testimm(temp,32);
3325 emit_andimm(temp,24,temp);
3326 if (opcode[i]==0x1A) { // LDL
3327 emit_rsbimm(temp,32,HOST_TEMPREG);
3328 emit_shl(temp2h,temp,temp2h);
3329 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3330 emit_movimm(-1,HOST_TEMPREG);
3331 emit_shl(temp2,temp,temp2);
3332 emit_cmove_reg(temp2h,th);
3333 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3334 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3335 emit_orreq(temp2,tl,tl);
3336 emit_orrne(temp2,th,th);
3337 }
3338 if (opcode[i]==0x1B) { // LDR
3339 emit_xorimm(temp,24,temp);
3340 emit_rsbimm(temp,32,HOST_TEMPREG);
3341 emit_shr(temp2,temp,temp2);
3342 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3343 emit_movimm(-1,HOST_TEMPREG);
3344 emit_shr(temp2h,temp,temp2h);
3345 emit_cmovne_reg(temp2,tl);
3346 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3347 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3348 emit_orrne(temp2h,th,th);
3349 emit_orreq(temp2h,tl,tl);
3350 }
3351 }
3352 }
3353}
3354#define loadlr_assemble loadlr_assemble_arm
3355
3356void cop0_assemble(int i,struct regstat *i_regs)
3357{
3358 if(opcode2[i]==0) // MFC0
3359 {
3360 signed char t=get_reg(i_regs->regmap,rt1[i]);
3361 char copr=(source[i]>>11)&0x1f;
3362 //assert(t>=0); // Why does this happen? OOT is weird
3363 if(t>=0) {
7139f3c8 3364#ifdef MUPEN64
57871462 3365 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3366 emit_movimm((source[i]>>11)&0x1f,1);
3367 emit_writeword(0,(int)&PC);
3368 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3369 if(copr==9) {
3370 emit_readword((int)&last_count,ECX);
3371 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3372 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3373 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3374 emit_writeword(HOST_CCREG,(int)&Count);
3375 }
3376 emit_call((int)MFC0);
3377 emit_readword((int)&readmem_dword,t);
7139f3c8 3378#else
3379 emit_readword((int)&reg_cop0+copr*4,t);
3380#endif
57871462 3381 }
3382 }
3383 else if(opcode2[i]==4) // MTC0
3384 {
3385 signed char s=get_reg(i_regs->regmap,rs1[i]);
3386 char copr=(source[i]>>11)&0x1f;
3387 assert(s>=0);
3388 emit_writeword(s,(int)&readmem_dword);
3389 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
3d624f89 3390#ifdef MUPEN64 /// FIXME
57871462 3391 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3392 emit_movimm((source[i]>>11)&0x1f,1);
3393 emit_writeword(0,(int)&PC);
3394 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3d624f89 3395#endif
7139f3c8 3396#ifdef PCSX
3397 emit_movimm(source[i],0);
3398 emit_writeword(0,(int)&psxRegs.code);
3399#endif
3400 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3401 emit_readword((int)&last_count,ECX);
3402 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3403 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3404 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3405 emit_writeword(HOST_CCREG,(int)&Count);
3406 }
3407 // What a mess. The status register (12) can enable interrupts,
3408 // so needs a special case to handle a pending interrupt.
3409 // The interrupt must be taken immediately, because a subsequent
3410 // instruction might disable interrupts again.
7139f3c8 3411 if(copr==12||copr==13) {
57871462 3412 emit_movimm(start+i*4+4,0);
3413 emit_movimm(0,1);
3414 emit_writeword(0,(int)&pcaddr);
3415 emit_writeword(1,(int)&pending_exception);
3416 }
3417 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3418 //else
3419 emit_call((int)MTC0);
7139f3c8 3420 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3421 emit_readword((int)&Count,HOST_CCREG);
3422 emit_readword((int)&next_interupt,ECX);
3423 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3424 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3425 emit_writeword(ECX,(int)&last_count);
3426 emit_storereg(CCREG,HOST_CCREG);
3427 }
7139f3c8 3428 if(copr==12||copr==13) {
57871462 3429 assert(!is_delayslot);
3430 emit_readword((int)&pending_exception,14);
3431 }
3432 emit_loadreg(rs1[i],s);
3433 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3434 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
7139f3c8 3435 if(copr==12||copr==13) {
57871462 3436 emit_test(14,14);
3437 emit_jne((int)&do_interrupt);
3438 }
3439 cop1_usable=0;
3440 }
3441 else
3442 {
3443 assert(opcode2[i]==0x10);
3d624f89 3444#ifndef DISABLE_TLB
57871462 3445 if((source[i]&0x3f)==0x01) // TLBR
3446 emit_call((int)TLBR);
3447 if((source[i]&0x3f)==0x02) // TLBWI
3448 emit_call((int)TLBWI_new);
3449 if((source[i]&0x3f)==0x06) { // TLBWR
3450 // The TLB entry written by TLBWR is dependent on the count,
3451 // so update the cycle count
3452 emit_readword((int)&last_count,ECX);
3453 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3454 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3455 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3456 emit_writeword(HOST_CCREG,(int)&Count);
3457 emit_call((int)TLBWR_new);
3458 }
3459 if((source[i]&0x3f)==0x08) // TLBP
3460 emit_call((int)TLBP);
3d624f89 3461#endif
57871462 3462 if((source[i]&0x3f)==0x18) // ERET
3463 {
3464 int count=ccadj[i];
3465 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3466 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
3467 emit_jmp((int)jump_eret);
3468 }
3469 }
3470}
3471
b9b61529 3472static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3473{
3474 switch (copr) {
3475 case 1:
3476 case 3:
3477 case 5:
3478 case 8:
3479 case 9:
3480 case 10:
3481 case 11:
3482 emit_readword((int)&reg_cop2d[copr],tl);
3483 emit_signextend16(tl,tl);
3484 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3485 break;
3486 case 7:
3487 case 16:
3488 case 17:
3489 case 18:
3490 case 19:
3491 emit_readword((int)&reg_cop2d[copr],tl);
3492 emit_andimm(tl,0xffff,tl);
3493 emit_writeword(tl,(int)&reg_cop2d[copr]);
3494 break;
3495 case 15:
3496 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3497 emit_writeword(tl,(int)&reg_cop2d[copr]);
3498 break;
3499 case 28:
3500 case 30:
3501 emit_movimm(0,tl);
3502 break;
3503 case 29:
3504 emit_readword((int)&reg_cop2d[9],temp);
3505 emit_testimm(temp,0x8000); // do we need this?
3506 emit_andimm(temp,0xf80,temp);
3507 emit_andne_imm(temp,0,temp);
3508 emit_shr(temp,7,tl);
3509 emit_readword((int)&reg_cop2d[10],temp);
3510 emit_testimm(temp,0x8000);
3511 emit_andimm(temp,0xf80,temp);
3512 emit_andne_imm(temp,0,temp);
3513 emit_orrshr(temp,2,tl);
3514 emit_readword((int)&reg_cop2d[11],temp);
3515 emit_testimm(temp,0x8000);
3516 emit_andimm(temp,0xf80,temp);
3517 emit_andne_imm(temp,0,temp);
3518 emit_orrshl(temp,3,tl);
3519 emit_writeword(tl,(int)&reg_cop2d[copr]);
3520 break;
3521 default:
3522 emit_readword((int)&reg_cop2d[copr],tl);
3523 break;
3524 }
3525}
3526
3527static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3528{
3529 switch (copr) {
3530 case 15:
3531 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3532 emit_writeword(sl,(int)&reg_cop2d[copr]);
3533 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3534 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3535 emit_writeword(sl,(int)&reg_cop2d[14]);
3536 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3537 break;
3538 case 28:
3539 emit_andimm(sl,0x001f,temp);
3540 emit_shl(temp,7,temp);
3541 emit_writeword(temp,(int)&reg_cop2d[9]);
3542 emit_andimm(sl,0x03e0,temp);
3543 emit_shl(temp,2,temp);
3544 emit_writeword(temp,(int)&reg_cop2d[10]);
3545 emit_andimm(sl,0x7c00,temp);
3546 emit_shr(temp,3,temp);
3547 emit_writeword(temp,(int)&reg_cop2d[11]);
3548 emit_writeword(sl,(int)&reg_cop2d[28]);
3549 break;
3550 case 30:
3551 emit_movs(sl,temp);
3552 emit_mvnmi(temp,temp);
3553 emit_clz(temp,temp);
3554 emit_writeword(sl,(int)&reg_cop2d[30]);
3555 emit_writeword(temp,(int)&reg_cop2d[31]);
3556 break;
3557 case 7:
3558 case 29:
3559 case 31:
3560 break;
3561 default:
3562 emit_writeword(sl,(int)&reg_cop2d[copr]);
3563 break;
3564 }
3565}
3566
3567void cop2_assemble(int i,struct regstat *i_regs)
3568{
3569 u_int copr=(source[i]>>11)&0x1f;
3570 signed char temp=get_reg(i_regs->regmap,-1);
3571 if (opcode2[i]==0) { // MFC2
3572 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3573 if(tl>=0)
3574 cop2_get_dreg(copr,tl,temp);
3575 }
3576 else if (opcode2[i]==4) { // MTC2
3577 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3578 cop2_put_dreg(copr,sl,temp);
3579 }
3580 else if (opcode2[i]==2) // CFC2
3581 {
3582 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3583 if(tl>=0)
3584 emit_readword((int)&reg_cop2c[copr],tl);
3585 }
3586 else if (opcode2[i]==6) // CTC2
3587 {
3588 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3589 switch(copr) {
3590 case 4:
3591 case 12:
3592 case 20:
3593 case 26:
3594 case 27:
3595 case 29:
3596 case 30:
3597 emit_signextend16(sl,temp);
3598 break;
3599 case 31:
3600 //value = value & 0x7ffff000;
3601 //if (value & 0x7f87e000) value |= 0x80000000;
3602 emit_shrimm(sl,12,temp);
3603 emit_shlimm(temp,12,temp);
3604 emit_testimm(temp,0x7f000000);
3605 emit_testeqimm(temp,0x00870000);
3606 emit_testeqimm(temp,0x0000e000);
3607 emit_orrne_imm(temp,0x80000000,temp);
3608 break;
3609 default:
3610 temp=sl;
3611 break;
3612 }
3613 emit_writeword(temp,(int)&reg_cop2c[copr]);
3614 assert(sl>=0);
3615 }
3616}
3617
3618void c2op_assemble(int i,struct regstat *i_regs)
3619{
3620 signed char temp=get_reg(i_regs->regmap,-1);
3621 u_int c2op=source[i]&0x3f;
3622 u_int hr,reglist=0;
3623 for(hr=0;hr<HOST_REGS;hr++) {
3624 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3625 }
3626 if(i==0||itype[i-1]!=C2OP)
3627 save_regs(reglist);
3628
3629 if (gte_handlers[c2op]!=NULL) {
3630 int cc=get_reg(i_regs->regmap,CCREG);
3631 emit_movimm(source[i],temp); // opcode
3632 if (cc>=0&&gte_cycletab[c2op])
3633 emit_addimm(cc,gte_cycletab[c2op]/2,cc); // XXX: cound just adjust ccadj?
3634 emit_writeword(temp,(int)&psxRegs.code);
3635 emit_call((int)gte_handlers[c2op]);
3636 }
3637
3638 if(i>=slen-1||itype[i+1]!=C2OP)
3639 restore_regs(reglist);
3640}
3641
3642void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 3643{
3644 // XXX: should just just do the exception instead
3645 if(!cop1_usable) {
3646 int jaddr=(int)out;
3647 emit_jmp(0);
3648 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3649 cop1_usable=1;
3650 }
3651}
3652
57871462 3653void cop1_assemble(int i,struct regstat *i_regs)
3654{
3d624f89 3655#ifndef DISABLE_COP1
57871462 3656 // Check cop1 unusable
3657 if(!cop1_usable) {
3658 signed char rs=get_reg(i_regs->regmap,CSREG);
3659 assert(rs>=0);
3660 emit_testimm(rs,0x20000000);
3661 int jaddr=(int)out;
3662 emit_jeq(0);
3663 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3664 cop1_usable=1;
3665 }
3666 if (opcode2[i]==0) { // MFC1
3667 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3668 if(tl>=0) {
3669 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
3670 emit_readword_indexed(0,tl,tl);
3671 }
3672 }
3673 else if (opcode2[i]==1) { // DMFC1
3674 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3675 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
3676 if(tl>=0) {
3677 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
3678 if(th>=0) emit_readword_indexed(4,tl,th);
3679 emit_readword_indexed(0,tl,tl);
3680 }
3681 }
3682 else if (opcode2[i]==4) { // MTC1
3683 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3684 signed char temp=get_reg(i_regs->regmap,-1);
3685 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3686 emit_writeword_indexed(sl,0,temp);
3687 }
3688 else if (opcode2[i]==5) { // DMTC1
3689 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3690 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
3691 signed char temp=get_reg(i_regs->regmap,-1);
3692 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3693 emit_writeword_indexed(sh,4,temp);
3694 emit_writeword_indexed(sl,0,temp);
3695 }
3696 else if (opcode2[i]==2) // CFC1
3697 {
3698 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3699 if(tl>=0) {
3700 u_int copr=(source[i]>>11)&0x1f;
3701 if(copr==0) emit_readword((int)&FCR0,tl);
3702 if(copr==31) emit_readword((int)&FCR31,tl);
3703 }
3704 }
3705 else if (opcode2[i]==6) // CTC1
3706 {
3707 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3708 u_int copr=(source[i]>>11)&0x1f;
3709 assert(sl>=0);
3710 if(copr==31)
3711 {
3712 emit_writeword(sl,(int)&FCR31);
3713 // Set the rounding mode
3714 //FIXME
3715 //char temp=get_reg(i_regs->regmap,-1);
3716 //emit_andimm(sl,3,temp);
3717 //emit_fldcw_indexed((int)&rounding_modes,temp);
3718 }
3719 }
3d624f89 3720#else
3721 cop1_unusable(i, i_regs);
3722#endif
57871462 3723}
3724
3725void fconv_assemble_arm(int i,struct regstat *i_regs)
3726{
3d624f89 3727#ifndef DISABLE_COP1
57871462 3728 signed char temp=get_reg(i_regs->regmap,-1);
3729 assert(temp>=0);
3730 // Check cop1 unusable
3731 if(!cop1_usable) {
3732 signed char rs=get_reg(i_regs->regmap,CSREG);
3733 assert(rs>=0);
3734 emit_testimm(rs,0x20000000);
3735 int jaddr=(int)out;
3736 emit_jeq(0);
3737 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3738 cop1_usable=1;
3739 }
3740
3741 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
3742 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
3743 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3744 emit_flds(temp,15);
3745 emit_ftosizs(15,15); // float->int, truncate
3746 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3747 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3748 emit_fsts(15,temp);
3749 return;
3750 }
3751 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
3752 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3753 emit_vldr(temp,7);
3754 emit_ftosizd(7,13); // double->int, truncate
3755 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3756 emit_fsts(13,temp);
3757 return;
3758 }
3759
3760 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
3761 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3762 emit_flds(temp,13);
3763 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3764 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3765 emit_fsitos(13,15);
3766 emit_fsts(15,temp);
3767 return;
3768 }
3769 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
3770 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3771 emit_flds(temp,13);
3772 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3773 emit_fsitod(13,7);
3774 emit_vstr(7,temp);
3775 return;
3776 }
3777
3778 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
3779 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3780 emit_flds(temp,13);
3781 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3782 emit_fcvtds(13,7);
3783 emit_vstr(7,temp);
3784 return;
3785 }
3786 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
3787 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3788 emit_vldr(temp,7);
3789 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3790 emit_fcvtsd(7,13);
3791 emit_fsts(13,temp);
3792 return;
3793 }
3794 #endif
3795
3796 // C emulation code
3797
3798 u_int hr,reglist=0;
3799 for(hr=0;hr<HOST_REGS;hr++) {
3800 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3801 }
3802 save_regs(reglist);
3803
3804 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
3805 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3806 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3807 emit_call((int)cvt_s_w);
3808 }
3809 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
3810 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3811 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3812 emit_call((int)cvt_d_w);
3813 }
3814 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
3815 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3816 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3817 emit_call((int)cvt_s_l);
3818 }
3819 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
3820 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3821 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3822 emit_call((int)cvt_d_l);
3823 }
3824
3825 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
3826 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3827 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3828 emit_call((int)cvt_d_s);
3829 }
3830 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
3831 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3832 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3833 emit_call((int)cvt_w_s);
3834 }
3835 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
3836 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3837 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3838 emit_call((int)cvt_l_s);
3839 }
3840
3841 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
3842 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3843 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3844 emit_call((int)cvt_s_d);
3845 }
3846 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
3847 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3848 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3849 emit_call((int)cvt_w_d);
3850 }
3851 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
3852 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3853 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3854 emit_call((int)cvt_l_d);
3855 }
3856
3857 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
3858 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3859 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3860 emit_call((int)round_l_s);
3861 }
3862 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
3863 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3864 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3865 emit_call((int)trunc_l_s);
3866 }
3867 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
3868 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3869 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3870 emit_call((int)ceil_l_s);
3871 }
3872 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
3873 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3874 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3875 emit_call((int)floor_l_s);
3876 }
3877 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
3878 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3879 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3880 emit_call((int)round_w_s);
3881 }
3882 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
3883 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3884 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3885 emit_call((int)trunc_w_s);
3886 }
3887 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
3888 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3889 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3890 emit_call((int)ceil_w_s);
3891 }
3892 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
3893 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3894 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3895 emit_call((int)floor_w_s);
3896 }
3897
3898 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
3899 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3900 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3901 emit_call((int)round_l_d);
3902 }
3903 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
3904 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3905 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3906 emit_call((int)trunc_l_d);
3907 }
3908 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
3909 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3910 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3911 emit_call((int)ceil_l_d);
3912 }
3913 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
3914 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3915 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3916 emit_call((int)floor_l_d);
3917 }
3918 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
3919 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3920 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3921 emit_call((int)round_w_d);
3922 }
3923 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
3924 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3925 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3926 emit_call((int)trunc_w_d);
3927 }
3928 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
3929 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3930 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3931 emit_call((int)ceil_w_d);
3932 }
3933 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
3934 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3935 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3936 emit_call((int)floor_w_d);
3937 }
3938
3939 restore_regs(reglist);
3d624f89 3940#else
3941 cop1_unusable(i, i_regs);
3942#endif
57871462 3943}
3944#define fconv_assemble fconv_assemble_arm
3945
3946void fcomp_assemble(int i,struct regstat *i_regs)
3947{
3d624f89 3948#ifndef DISABLE_COP1
57871462 3949 signed char fs=get_reg(i_regs->regmap,FSREG);
3950 signed char temp=get_reg(i_regs->regmap,-1);
3951 assert(temp>=0);
3952 // Check cop1 unusable
3953 if(!cop1_usable) {
3954 signed char cs=get_reg(i_regs->regmap,CSREG);
3955 assert(cs>=0);
3956 emit_testimm(cs,0x20000000);
3957 int jaddr=(int)out;
3958 emit_jeq(0);
3959 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
3960 cop1_usable=1;
3961 }
3962
3963 if((source[i]&0x3f)==0x30) {
3964 emit_andimm(fs,~0x800000,fs);
3965 return;
3966 }
3967
3968 if((source[i]&0x3e)==0x38) {
3969 // sf/ngle - these should throw exceptions for NaNs
3970 emit_andimm(fs,~0x800000,fs);
3971 return;
3972 }
3973
3974 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
3975 if(opcode2[i]==0x10) {
3976 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3977 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
3978 emit_orimm(fs,0x800000,fs);
3979 emit_flds(temp,14);
3980 emit_flds(HOST_TEMPREG,15);
3981 emit_fcmps(14,15);
3982 emit_fmstat();
3983 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
3984 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
3985 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
3986 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
3987 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
3988 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
3989 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
3990 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
3991 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
3992 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
3993 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
3994 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
3995 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
3996 return;
3997 }
3998 if(opcode2[i]==0x11) {
3999 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4000 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4001 emit_orimm(fs,0x800000,fs);
4002 emit_vldr(temp,6);
4003 emit_vldr(HOST_TEMPREG,7);
4004 emit_fcmpd(6,7);
4005 emit_fmstat();
4006 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
4007 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
4008 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
4009 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
4010 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
4011 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
4012 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
4013 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
4014 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
4015 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
4016 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
4017 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
4018 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
4019 return;
4020 }
4021 #endif
4022
4023 // C only
4024
4025 u_int hr,reglist=0;
4026 for(hr=0;hr<HOST_REGS;hr++) {
4027 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4028 }
4029 reglist&=~(1<<fs);
4030 save_regs(reglist);
4031 if(opcode2[i]==0x10) {
4032 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4033 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4034 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
4035 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
4036 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
4037 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
4038 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
4039 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
4040 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
4041 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
4042 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
4043 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
4044 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
4045 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
4046 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
4047 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
4048 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
4049 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
4050 }
4051 if(opcode2[i]==0x11) {
4052 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4053 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4054 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
4055 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
4056 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
4057 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
4058 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
4059 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
4060 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
4061 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
4062 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
4063 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
4064 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
4065 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
4066 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
4067 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
4068 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
4069 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
4070 }
4071 restore_regs(reglist);
4072 emit_loadreg(FSREG,fs);
3d624f89 4073#else
4074 cop1_unusable(i, i_regs);
4075#endif
57871462 4076}
4077
4078void float_assemble(int i,struct regstat *i_regs)
4079{
3d624f89 4080#ifndef DISABLE_COP1
57871462 4081 signed char temp=get_reg(i_regs->regmap,-1);
4082 assert(temp>=0);
4083 // Check cop1 unusable
4084 if(!cop1_usable) {
4085 signed char cs=get_reg(i_regs->regmap,CSREG);
4086 assert(cs>=0);
4087 emit_testimm(cs,0x20000000);
4088 int jaddr=(int)out;
4089 emit_jeq(0);
4090 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4091 cop1_usable=1;
4092 }
4093
4094 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4095 if((source[i]&0x3f)==6) // mov
4096 {
4097 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4098 if(opcode2[i]==0x10) {
4099 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4100 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
4101 emit_readword_indexed(0,temp,temp);
4102 emit_writeword_indexed(temp,0,HOST_TEMPREG);
4103 }
4104 if(opcode2[i]==0x11) {
4105 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4106 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
4107 emit_vldr(temp,7);
4108 emit_vstr(7,HOST_TEMPREG);
4109 }
4110 }
4111 return;
4112 }
4113
4114 if((source[i]&0x3f)>3)
4115 {
4116 if(opcode2[i]==0x10) {
4117 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4118 emit_flds(temp,15);
4119 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4120 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4121 }
4122 if((source[i]&0x3f)==4) // sqrt
4123 emit_fsqrts(15,15);
4124 if((source[i]&0x3f)==5) // abs
4125 emit_fabss(15,15);
4126 if((source[i]&0x3f)==7) // neg
4127 emit_fnegs(15,15);
4128 emit_fsts(15,temp);
4129 }
4130 if(opcode2[i]==0x11) {
4131 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4132 emit_vldr(temp,7);
4133 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4134 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4135 }
4136 if((source[i]&0x3f)==4) // sqrt
4137 emit_fsqrtd(7,7);
4138 if((source[i]&0x3f)==5) // abs
4139 emit_fabsd(7,7);
4140 if((source[i]&0x3f)==7) // neg
4141 emit_fnegd(7,7);
4142 emit_vstr(7,temp);
4143 }
4144 return;
4145 }
4146 if((source[i]&0x3f)<4)
4147 {
4148 if(opcode2[i]==0x10) {
4149 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4150 }
4151 if(opcode2[i]==0x11) {
4152 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4153 }
4154 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
4155 if(opcode2[i]==0x10) {
4156 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4157 emit_flds(temp,15);
4158 emit_flds(HOST_TEMPREG,13);
4159 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4160 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4161 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4162 }
4163 }
4164 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
4165 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
4166 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
4167 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
4168 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4169 emit_fsts(15,HOST_TEMPREG);
4170 }else{
4171 emit_fsts(15,temp);
4172 }
4173 }
4174 else if(opcode2[i]==0x11) {
4175 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4176 emit_vldr(temp,7);
4177 emit_vldr(HOST_TEMPREG,6);
4178 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4179 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4180 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4181 }
4182 }
4183 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
4184 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
4185 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
4186 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
4187 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4188 emit_vstr(7,HOST_TEMPREG);
4189 }else{
4190 emit_vstr(7,temp);
4191 }
4192 }
4193 }
4194 else {
4195 if(opcode2[i]==0x10) {
4196 emit_flds(temp,15);
4197 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4198 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4199 }
4200 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
4201 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
4202 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
4203 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
4204 emit_fsts(15,temp);
4205 }
4206 else if(opcode2[i]==0x11) {
4207 emit_vldr(temp,7);
4208 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4209 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4210 }
4211 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
4212 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
4213 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
4214 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
4215 emit_vstr(7,temp);
4216 }
4217 }
4218 return;
4219 }
4220 #endif
4221
4222 u_int hr,reglist=0;
4223 for(hr=0;hr<HOST_REGS;hr++) {
4224 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4225 }
4226 if(opcode2[i]==0x10) { // Single precision
4227 save_regs(reglist);
4228 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4229 if((source[i]&0x3f)<4) {
4230 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4231 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
4232 }else{
4233 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4234 }
4235 switch(source[i]&0x3f)
4236 {
4237 case 0x00: emit_call((int)add_s);break;
4238 case 0x01: emit_call((int)sub_s);break;
4239 case 0x02: emit_call((int)mul_s);break;
4240 case 0x03: emit_call((int)div_s);break;
4241 case 0x04: emit_call((int)sqrt_s);break;
4242 case 0x05: emit_call((int)abs_s);break;
4243 case 0x06: emit_call((int)mov_s);break;
4244 case 0x07: emit_call((int)neg_s);break;
4245 }
4246 restore_regs(reglist);
4247 }
4248 if(opcode2[i]==0x11) { // Double precision
4249 save_regs(reglist);
4250 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4251 if((source[i]&0x3f)<4) {
4252 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4253 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
4254 }else{
4255 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4256 }
4257 switch(source[i]&0x3f)
4258 {
4259 case 0x00: emit_call((int)add_d);break;
4260 case 0x01: emit_call((int)sub_d);break;
4261 case 0x02: emit_call((int)mul_d);break;
4262 case 0x03: emit_call((int)div_d);break;
4263 case 0x04: emit_call((int)sqrt_d);break;
4264 case 0x05: emit_call((int)abs_d);break;
4265 case 0x06: emit_call((int)mov_d);break;
4266 case 0x07: emit_call((int)neg_d);break;
4267 }
4268 restore_regs(reglist);
4269 }
3d624f89 4270#else
4271 cop1_unusable(i, i_regs);
4272#endif
57871462 4273}
4274
4275void multdiv_assemble_arm(int i,struct regstat *i_regs)
4276{
4277 // case 0x18: MULT
4278 // case 0x19: MULTU
4279 // case 0x1A: DIV
4280 // case 0x1B: DIVU
4281 // case 0x1C: DMULT
4282 // case 0x1D: DMULTU
4283 // case 0x1E: DDIV
4284 // case 0x1F: DDIVU
4285 if(rs1[i]&&rs2[i])
4286 {
4287 if((opcode2[i]&4)==0) // 32-bit
4288 {
4289 if(opcode2[i]==0x18) // MULT
4290 {
4291 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4292 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4293 signed char hi=get_reg(i_regs->regmap,HIREG);
4294 signed char lo=get_reg(i_regs->regmap,LOREG);
4295 assert(m1>=0);
4296 assert(m2>=0);
4297 assert(hi>=0);
4298 assert(lo>=0);
4299 emit_smull(m1,m2,hi,lo);
4300 }
4301 if(opcode2[i]==0x19) // MULTU
4302 {
4303 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4304 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4305 signed char hi=get_reg(i_regs->regmap,HIREG);
4306 signed char lo=get_reg(i_regs->regmap,LOREG);
4307 assert(m1>=0);
4308 assert(m2>=0);
4309 assert(hi>=0);
4310 assert(lo>=0);
4311 emit_umull(m1,m2,hi,lo);
4312 }
4313 if(opcode2[i]==0x1A) // DIV
4314 {
4315 signed char d1=get_reg(i_regs->regmap,rs1[i]);
4316 signed char d2=get_reg(i_regs->regmap,rs2[i]);
4317 assert(d1>=0);
4318 assert(d2>=0);
4319 signed char quotient=get_reg(i_regs->regmap,LOREG);
4320 signed char remainder=get_reg(i_regs->regmap,HIREG);
4321 assert(quotient>=0);
4322 assert(remainder>=0);
4323 emit_movs(d1,remainder);
4324 emit_negmi(remainder,remainder);
4325 emit_movs(d2,HOST_TEMPREG);
4326 emit_jeq((int)out+52); // Division by zero
4327 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
4328 emit_clz(HOST_TEMPREG,quotient);
4329 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
4330 emit_orimm(quotient,1<<31,quotient);
4331 emit_shr(quotient,quotient,quotient);
4332 emit_cmp(remainder,HOST_TEMPREG);
4333 emit_subcs(remainder,HOST_TEMPREG,remainder);
4334 emit_adcs(quotient,quotient,quotient);
4335 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
4336 emit_jcc((int)out-16); // -4
4337 emit_teq(d1,d2);
4338 emit_negmi(quotient,quotient);
4339 emit_test(d1,d1);
4340 emit_negmi(remainder,remainder);
4341 }
4342 if(opcode2[i]==0x1B) // DIVU
4343 {
4344 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
4345 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
4346 assert(d1>=0);
4347 assert(d2>=0);
4348 signed char quotient=get_reg(i_regs->regmap,LOREG);
4349 signed char remainder=get_reg(i_regs->regmap,HIREG);
4350 assert(quotient>=0);
4351 assert(remainder>=0);
4352 emit_test(d2,d2);
4353 emit_jeq((int)out+44); // Division by zero
4354 emit_clz(d2,HOST_TEMPREG);
4355 emit_movimm(1<<31,quotient);
4356 emit_shl(d2,HOST_TEMPREG,d2);
4357 emit_mov(d1,remainder);
4358 emit_shr(quotient,HOST_TEMPREG,quotient);
4359 emit_cmp(remainder,d2);
4360 emit_subcs(remainder,d2,remainder);
4361 emit_adcs(quotient,quotient,quotient);
4362 emit_shrcc_imm(d2,1,d2);
4363 emit_jcc((int)out-16); // -4
4364 }
4365 }
4366 else // 64-bit
4367 {
4368 if(opcode2[i]==0x1C) // DMULT
4369 {
4370 assert(opcode2[i]!=0x1C);
4371 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4372 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4373 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4374 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4375 assert(m1h>=0);
4376 assert(m2h>=0);
4377 assert(m1l>=0);
4378 assert(m2l>=0);
4379 emit_pushreg(m2h);
4380 emit_pushreg(m2l);
4381 emit_pushreg(m1h);
4382 emit_pushreg(m1l);
4383 emit_call((int)&mult64);
4384 emit_popreg(m1l);
4385 emit_popreg(m1h);
4386 emit_popreg(m2l);
4387 emit_popreg(m2h);
4388 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4389 signed char hil=get_reg(i_regs->regmap,HIREG);
4390 if(hih>=0) emit_loadreg(HIREG|64,hih);
4391 if(hil>=0) emit_loadreg(HIREG,hil);
4392 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4393 signed char lol=get_reg(i_regs->regmap,LOREG);
4394 if(loh>=0) emit_loadreg(LOREG|64,loh);
4395 if(lol>=0) emit_loadreg(LOREG,lol);
4396 }
4397 if(opcode2[i]==0x1D) // DMULTU
4398 {
4399 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4400 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4401 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4402 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4403 assert(m1h>=0);
4404 assert(m2h>=0);
4405 assert(m1l>=0);
4406 assert(m2l>=0);
4407 save_regs(0x100f);
4408 if(m1l!=0) emit_mov(m1l,0);
4409 if(m1h==0) emit_readword((int)&dynarec_local,1);
4410 else if(m1h>1) emit_mov(m1h,1);
4411 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
4412 else if(m2l>2) emit_mov(m2l,2);
4413 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
4414 else if(m2h>3) emit_mov(m2h,3);
4415 emit_call((int)&multu64);
4416 restore_regs(0x100f);
4417 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4418 signed char hil=get_reg(i_regs->regmap,HIREG);
4419 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4420 signed char lol=get_reg(i_regs->regmap,LOREG);
4421 /*signed char temp=get_reg(i_regs->regmap,-1);
4422 signed char rh=get_reg(i_regs->regmap,HIREG|64);
4423 signed char rl=get_reg(i_regs->regmap,HIREG);
4424 assert(m1h>=0);
4425 assert(m2h>=0);
4426 assert(m1l>=0);
4427 assert(m2l>=0);
4428 assert(temp>=0);
4429 //emit_mov(m1l,EAX);
4430 //emit_mul(m2l);
4431 emit_umull(rl,rh,m1l,m2l);
4432 emit_storereg(LOREG,rl);
4433 emit_mov(rh,temp);
4434 //emit_mov(m1h,EAX);
4435 //emit_mul(m2l);
4436 emit_umull(rl,rh,m1h,m2l);
4437 emit_adds(rl,temp,temp);
4438 emit_adcimm(rh,0,rh);
4439 emit_storereg(HIREG,rh);
4440 //emit_mov(m2h,EAX);
4441 //emit_mul(m1l);
4442 emit_umull(rl,rh,m1l,m2h);
4443 emit_adds(rl,temp,temp);
4444 emit_adcimm(rh,0,rh);
4445 emit_storereg(LOREG|64,temp);
4446 emit_mov(rh,temp);
4447 //emit_mov(m2h,EAX);
4448 //emit_mul(m1h);
4449 emit_umull(rl,rh,m1h,m2h);
4450 emit_adds(rl,temp,rl);
4451 emit_loadreg(HIREG,temp);
4452 emit_adcimm(rh,0,rh);
4453 emit_adds(rl,temp,rl);
4454 emit_adcimm(rh,0,rh);
4455 // DEBUG
4456 /*
4457 emit_pushreg(m2h);
4458 emit_pushreg(m2l);
4459 emit_pushreg(m1h);
4460 emit_pushreg(m1l);
4461 emit_call((int)&multu64);
4462 emit_popreg(m1l);
4463 emit_popreg(m1h);
4464 emit_popreg(m2l);
4465 emit_popreg(m2h);
4466 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4467 signed char hil=get_reg(i_regs->regmap,HIREG);
4468 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
4469 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
4470 */
4471 // Shouldn't be necessary
4472 //char loh=get_reg(i_regs->regmap,LOREG|64);
4473 //char lol=get_reg(i_regs->regmap,LOREG);
4474 //if(loh>=0) emit_loadreg(LOREG|64,loh);
4475 //if(lol>=0) emit_loadreg(LOREG,lol);
4476 }
4477 if(opcode2[i]==0x1E) // DDIV
4478 {
4479 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4480 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4481 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4482 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4483 assert(d1h>=0);
4484 assert(d2h>=0);
4485 assert(d1l>=0);
4486 assert(d2l>=0);
4487 save_regs(0x100f);
4488 if(d1l!=0) emit_mov(d1l,0);
4489 if(d1h==0) emit_readword((int)&dynarec_local,1);
4490 else if(d1h>1) emit_mov(d1h,1);
4491 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4492 else if(d2l>2) emit_mov(d2l,2);
4493 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4494 else if(d2h>3) emit_mov(d2h,3);
4495 emit_call((int)&div64);
4496 restore_regs(0x100f);
4497 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4498 signed char hil=get_reg(i_regs->regmap,HIREG);
4499 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4500 signed char lol=get_reg(i_regs->regmap,LOREG);
4501 if(hih>=0) emit_loadreg(HIREG|64,hih);
4502 if(hil>=0) emit_loadreg(HIREG,hil);
4503 if(loh>=0) emit_loadreg(LOREG|64,loh);
4504 if(lol>=0) emit_loadreg(LOREG,lol);
4505 }
4506 if(opcode2[i]==0x1F) // DDIVU
4507 {
4508 //u_int hr,reglist=0;
4509 //for(hr=0;hr<HOST_REGS;hr++) {
4510 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
4511 //}
4512 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4513 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4514 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4515 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4516 assert(d1h>=0);
4517 assert(d2h>=0);
4518 assert(d1l>=0);
4519 assert(d2l>=0);
4520 save_regs(0x100f);
4521 if(d1l!=0) emit_mov(d1l,0);
4522 if(d1h==0) emit_readword((int)&dynarec_local,1);
4523 else if(d1h>1) emit_mov(d1h,1);
4524 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4525 else if(d2l>2) emit_mov(d2l,2);
4526 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4527 else if(d2h>3) emit_mov(d2h,3);
4528 emit_call((int)&divu64);
4529 restore_regs(0x100f);
4530 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4531 signed char hil=get_reg(i_regs->regmap,HIREG);
4532 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4533 signed char lol=get_reg(i_regs->regmap,LOREG);
4534 if(hih>=0) emit_loadreg(HIREG|64,hih);
4535 if(hil>=0) emit_loadreg(HIREG,hil);
4536 if(loh>=0) emit_loadreg(LOREG|64,loh);
4537 if(lol>=0) emit_loadreg(LOREG,lol);
4538 }
4539 }
4540 }
4541 else
4542 {
4543 // Multiply by zero is zero.
4544 // MIPS does not have a divide by zero exception.
4545 // The result is undefined, we return zero.
4546 signed char hr=get_reg(i_regs->regmap,HIREG);
4547 signed char lr=get_reg(i_regs->regmap,LOREG);
4548 if(hr>=0) emit_zeroreg(hr);
4549 if(lr>=0) emit_zeroreg(lr);
4550 }
4551}
4552#define multdiv_assemble multdiv_assemble_arm
4553
4554void do_preload_rhash(int r) {
4555 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
4556 // register. On ARM the hash can be done with a single instruction (below)
4557}
4558
4559void do_preload_rhtbl(int ht) {
4560 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
4561}
4562
4563void do_rhash(int rs,int rh) {
4564 emit_andimm(rs,0xf8,rh);
4565}
4566
4567void do_miniht_load(int ht,int rh) {
4568 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
4569 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
4570}
4571
4572void do_miniht_jump(int rs,int rh,int ht) {
4573 emit_cmp(rh,rs);
4574 emit_ldreq_indexed(ht,4,15);
4575 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4576 emit_mov(rs,7);
4577 emit_jmp(jump_vaddr_reg[7]);
4578 #else
4579 emit_jmp(jump_vaddr_reg[rs]);
4580 #endif
4581}
4582
4583void do_miniht_insert(u_int return_address,int rt,int temp) {
4584 #ifdef ARMv5_ONLY
4585 emit_movimm(return_address,rt); // PC into link register
4586 add_to_linker((int)out,return_address,1);
4587 emit_pcreladdr(temp);
4588 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4589 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4590 #else
4591 emit_movw(return_address&0x0000FFFF,rt);
4592 add_to_linker((int)out,return_address,1);
4593 emit_pcreladdr(temp);
4594 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4595 emit_movt(return_address&0xFFFF0000,rt);
4596 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4597 #endif
4598}
4599
4600// Sign-extend to 64 bits and write out upper half of a register
4601// This is useful where we have a 32-bit value in a register, and want to
4602// keep it in a 32-bit register, but can't guarantee that it won't be read
4603// as a 64-bit value later.
4604void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
4605{
24385cae 4606#ifndef FORCE32
57871462 4607 if(is32_pre==is32) return;
4608 int hr,reg;
4609 for(hr=0;hr<HOST_REGS;hr++) {
4610 if(hr!=EXCLUDE_REG) {
4611 //if(pre[hr]==entry[hr]) {
4612 if((reg=pre[hr])>=0) {
4613 if((dirty>>hr)&1) {
4614 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
4615 emit_sarimm(hr,31,HOST_TEMPREG);
4616 emit_storereg(reg|64,HOST_TEMPREG);
4617 }
4618 }
4619 }
4620 //}
4621 }
4622 }
24385cae 4623#endif
57871462 4624}
4625
4626void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
4627{
4628 //if(dirty_pre==dirty) return;
4629 int hr,reg,new_hr;
4630 for(hr=0;hr<HOST_REGS;hr++) {
4631 if(hr!=EXCLUDE_REG) {
4632 reg=pre[hr];
4633 if(((~u)>>(reg&63))&1) {
4634 if(reg==entry[hr]||(reg>0&&entry[hr]<0)) {
4635 if(((dirty_pre&~dirty)>>hr)&1) {
4636 if(reg>0&&reg<34) {
4637 emit_storereg(reg,hr);
4638 if( ((is32_pre&~uu)>>reg)&1 ) {
4639 emit_sarimm(hr,31,HOST_TEMPREG);
4640 emit_storereg(reg|64,HOST_TEMPREG);
4641 }
4642 }
4643 else if(reg>=64) {
4644 emit_storereg(reg,hr);
4645 }
4646 }
4647 }
4648 else // Check if register moved to a different register
4649 if((new_hr=get_reg(entry,reg))>=0) {
4650 if((dirty_pre>>hr)&(~dirty>>new_hr)&1) {
4651 if(reg>0&&reg<34) {
4652 emit_storereg(reg,hr);
4653 if( ((is32_pre&~uu)>>reg)&1 ) {
4654 emit_sarimm(hr,31,HOST_TEMPREG);
4655 emit_storereg(reg|64,HOST_TEMPREG);
4656 }
4657 }
4658 else if(reg>=64) {
4659 emit_storereg(reg,hr);
4660 }
4661 }
4662 }
4663 }
4664 }
4665 }
4666}
4667
4668
4669/* using strd could possibly help but you'd have to allocate registers in pairs
4670void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
4671{
4672 int hr;
4673 int wrote=-1;
4674 for(hr=HOST_REGS-1;hr>=0;hr--) {
4675 if(hr!=EXCLUDE_REG) {
4676 if(pre[hr]!=entry[hr]) {
4677 if(pre[hr]>=0) {
4678 if((dirty>>hr)&1) {
4679 if(get_reg(entry,pre[hr])<0) {
4680 if(pre[hr]<64) {
4681 if(!((u>>pre[hr])&1)) {
4682 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4683 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4684 emit_sarimm(hr,31,hr+1);
4685 emit_strdreg(pre[hr],hr);
4686 }
4687 else
4688 emit_storereg(pre[hr],hr);
4689 }else{
4690 emit_storereg(pre[hr],hr);
4691 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4692 emit_sarimm(hr,31,hr);
4693 emit_storereg(pre[hr]|64,hr);
4694 }
4695 }
4696 }
4697 }else{
4698 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4699 emit_storereg(pre[hr],hr);
4700 }
4701 }
4702 wrote=hr;
4703 }
4704 }
4705 }
4706 }
4707 }
4708 }
4709 for(hr=0;hr<HOST_REGS;hr++) {
4710 if(hr!=EXCLUDE_REG) {
4711 if(pre[hr]!=entry[hr]) {
4712 if(pre[hr]>=0) {
4713 int nr;
4714 if((nr=get_reg(entry,pre[hr]))>=0) {
4715 emit_mov(hr,nr);
4716 }
4717 }
4718 }
4719 }
4720 }
4721}
4722#define wb_invalidate wb_invalidate_arm
4723*/
4724
4725// CPU-architecture-specific initialization
4726void arch_init() {
3d624f89 4727#ifndef DISABLE_COP1
57871462 4728 rounding_modes[0]=0x0<<22; // round
4729 rounding_modes[1]=0x3<<22; // trunc
4730 rounding_modes[2]=0x1<<22; // ceil
4731 rounding_modes[3]=0x2<<22; // floor
3d624f89 4732#endif
57871462 4733}
b9b61529 4734
4735// vim:shiftwidth=2:expandtab