drc: fix SWL/SWR confusion in do_unalignedwritestub
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_arm.c *
3 * Copyright (C) 2009-2010 Ari64 *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21extern int cycle_count;
22extern int last_count;
23extern int pcaddr;
24extern int pending_exception;
25extern int branch_target;
26extern uint64_t readmem_dword;
3d624f89 27#ifdef MUPEN64
57871462 28extern precomp_instr fake_pc;
3d624f89 29#endif
57871462 30extern void *dynarec_local;
31extern u_int memory_map[1048576];
32extern u_int mini_ht[32][2];
33extern u_int rounding_modes[4];
34
35void indirect_jump_indexed();
36void indirect_jump();
37void do_interrupt();
38void jump_vaddr_r0();
39void jump_vaddr_r1();
40void jump_vaddr_r2();
41void jump_vaddr_r3();
42void jump_vaddr_r4();
43void jump_vaddr_r5();
44void jump_vaddr_r6();
45void jump_vaddr_r7();
46void jump_vaddr_r8();
47void jump_vaddr_r9();
48void jump_vaddr_r10();
49void jump_vaddr_r12();
50
51const u_int jump_vaddr_reg[16] = {
52 (int)jump_vaddr_r0,
53 (int)jump_vaddr_r1,
54 (int)jump_vaddr_r2,
55 (int)jump_vaddr_r3,
56 (int)jump_vaddr_r4,
57 (int)jump_vaddr_r5,
58 (int)jump_vaddr_r6,
59 (int)jump_vaddr_r7,
60 (int)jump_vaddr_r8,
61 (int)jump_vaddr_r9,
62 (int)jump_vaddr_r10,
63 0,
64 (int)jump_vaddr_r12,
65 0,
66 0,
67 0};
68
69#include "fpu.h"
70
71/* Linker */
72
73void set_jump_target(int addr,u_int target)
74{
75 u_char *ptr=(u_char *)addr;
76 u_int *ptr2=(u_int *)ptr;
77 if(ptr[3]==0xe2) {
78 assert((target-(u_int)ptr2-8)<1024);
79 assert((addr&3)==0);
80 assert((target&3)==0);
81 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
82 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
83 }
84 else if(ptr[3]==0x72) {
85 // generated by emit_jno_unlikely
86 if((target-(u_int)ptr2-8)<1024) {
87 assert((addr&3)==0);
88 assert((target&3)==0);
89 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
90 }
91 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
92 assert((addr&3)==0);
93 assert((target&3)==0);
94 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
95 }
96 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
97 }
98 else {
99 assert((ptr[3]&0x0e)==0xa);
100 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
101 }
102}
103
104// This optionally copies the instruction from the target of the branch into
105// the space before the branch. Works, but the difference in speed is
106// usually insignificant.
107void set_jump_target_fillslot(int addr,u_int target,int copy)
108{
109 u_char *ptr=(u_char *)addr;
110 u_int *ptr2=(u_int *)ptr;
111 assert(!copy||ptr2[-1]==0xe28dd000);
112 if(ptr[3]==0xe2) {
113 assert(!copy);
114 assert((target-(u_int)ptr2-8)<4096);
115 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
116 }
117 else {
118 assert((ptr[3]&0x0e)==0xa);
119 u_int target_insn=*(u_int *)target;
120 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
121 copy=0;
122 }
123 if((target_insn&0x0c100000)==0x04100000) { // Load
124 copy=0;
125 }
126 if(target_insn&0x08000000) {
127 copy=0;
128 }
129 if(copy) {
130 ptr2[-1]=target_insn;
131 target+=4;
132 }
133 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
134 }
135}
136
137/* Literal pool */
138add_literal(int addr,int val)
139{
140 literals[literalcount][0]=addr;
141 literals[literalcount][1]=val;
142 literalcount++;
143}
144
f76eeef9 145void *kill_pointer(void *stub)
57871462 146{
147 int *ptr=(int *)(stub+4);
148 assert((*ptr&0x0ff00000)==0x05900000);
149 u_int offset=*ptr&0xfff;
150 int **l_ptr=(void *)ptr+offset+8;
151 int *i_ptr=*l_ptr;
152 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 153 return i_ptr;
57871462 154}
155
156int get_pointer(void *stub)
157{
158 //printf("get_pointer(%x)\n",(int)stub);
159 int *ptr=(int *)(stub+4);
160 assert((*ptr&0x0ff00000)==0x05900000);
161 u_int offset=*ptr&0xfff;
162 int **l_ptr=(void *)ptr+offset+8;
163 int *i_ptr=*l_ptr;
164 assert((*i_ptr&0x0f000000)==0x0a000000);
165 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
166}
167
168// Find the "clean" entry point from a "dirty" entry point
169// by skipping past the call to verify_code
170u_int get_clean_addr(int addr)
171{
172 int *ptr=(int *)addr;
173 #ifdef ARMv5_ONLY
174 ptr+=4;
175 #else
176 ptr+=6;
177 #endif
178 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
179 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
180 ptr++;
181 if((*ptr&0xFF000000)==0xea000000) {
182 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
183 }
184 return (u_int)ptr;
185}
186
187int verify_dirty(int addr)
188{
189 u_int *ptr=(u_int *)addr;
190 #ifdef ARMv5_ONLY
191 // get from literal pool
192 assert((*ptr&0xFFF00000)==0xe5900000);
193 u_int offset=*ptr&0xfff;
194 u_int *l_ptr=(void *)ptr+offset+8;
195 u_int source=l_ptr[0];
196 u_int copy=l_ptr[1];
197 u_int len=l_ptr[2];
198 ptr+=4;
199 #else
200 // ARMv7 movw/movt
201 assert((*ptr&0xFFF00000)==0xe3000000);
202 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
203 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
204 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
205 ptr+=6;
206 #endif
207 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
208 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 209 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 210 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
211 unsigned int page=source>>12;
212 unsigned int map_value=memory_map[page];
213 if(map_value>=0x80000000) return 0;
214 while(page<((source+len-1)>>12)) {
215 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
216 }
217 source = source+(map_value<<2);
218 }
219 //printf("verify_dirty: %x %x %x\n",source,copy,len);
220 return !memcmp((void *)source,(void *)copy,len);
221}
222
223// This doesn't necessarily find all clean entry points, just
224// guarantees that it's not dirty
225int isclean(int addr)
226{
227 #ifdef ARMv5_ONLY
228 int *ptr=((u_int *)addr)+4;
229 #else
230 int *ptr=((u_int *)addr)+6;
231 #endif
232 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
233 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
234 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
235 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
236 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
237 return 1;
238}
239
240void get_bounds(int addr,u_int *start,u_int *end)
241{
242 u_int *ptr=(u_int *)addr;
243 #ifdef ARMv5_ONLY
244 // get from literal pool
245 assert((*ptr&0xFFF00000)==0xe5900000);
246 u_int offset=*ptr&0xfff;
247 u_int *l_ptr=(void *)ptr+offset+8;
248 u_int source=l_ptr[0];
249 //u_int copy=l_ptr[1];
250 u_int len=l_ptr[2];
251 ptr+=4;
252 #else
253 // ARMv7 movw/movt
254 assert((*ptr&0xFFF00000)==0xe3000000);
255 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
256 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
257 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
258 ptr+=6;
259 #endif
260 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
261 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 262 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 263 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
264 if(memory_map[source>>12]>=0x80000000) source = 0;
265 else source = source+(memory_map[source>>12]<<2);
266 }
267 *start=source;
268 *end=source+len;
269}
270
271/* Register allocation */
272
273// Note: registers are allocated clean (unmodified state)
274// if you intend to modify the register, you must call dirty_reg().
275void alloc_reg(struct regstat *cur,int i,signed char reg)
276{
277 int r,hr;
278 int preferred_reg = (reg&7);
279 if(reg==CCREG) preferred_reg=HOST_CCREG;
280 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
281
282 // Don't allocate unused registers
283 if((cur->u>>reg)&1) return;
284
285 // see if it's already allocated
286 for(hr=0;hr<HOST_REGS;hr++)
287 {
288 if(cur->regmap[hr]==reg) return;
289 }
290
291 // Keep the same mapping if the register was already allocated in a loop
292 preferred_reg = loop_reg(i,reg,preferred_reg);
293
294 // Try to allocate the preferred register
295 if(cur->regmap[preferred_reg]==-1) {
296 cur->regmap[preferred_reg]=reg;
297 cur->dirty&=~(1<<preferred_reg);
298 cur->isconst&=~(1<<preferred_reg);
299 return;
300 }
301 r=cur->regmap[preferred_reg];
302 if(r<64&&((cur->u>>r)&1)) {
303 cur->regmap[preferred_reg]=reg;
304 cur->dirty&=~(1<<preferred_reg);
305 cur->isconst&=~(1<<preferred_reg);
306 return;
307 }
308 if(r>=64&&((cur->uu>>(r&63))&1)) {
309 cur->regmap[preferred_reg]=reg;
310 cur->dirty&=~(1<<preferred_reg);
311 cur->isconst&=~(1<<preferred_reg);
312 return;
313 }
314
315 // Clear any unneeded registers
316 // We try to keep the mapping consistent, if possible, because it
317 // makes branches easier (especially loops). So we try to allocate
318 // first (see above) before removing old mappings. If this is not
319 // possible then go ahead and clear out the registers that are no
320 // longer needed.
321 for(hr=0;hr<HOST_REGS;hr++)
322 {
323 r=cur->regmap[hr];
324 if(r>=0) {
325 if(r<64) {
326 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
327 }
328 else
329 {
330 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
331 }
332 }
333 }
334 // Try to allocate any available register, but prefer
335 // registers that have not been used recently.
336 if(i>0) {
337 for(hr=0;hr<HOST_REGS;hr++) {
338 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
339 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
340 cur->regmap[hr]=reg;
341 cur->dirty&=~(1<<hr);
342 cur->isconst&=~(1<<hr);
343 return;
344 }
345 }
346 }
347 }
348 // Try to allocate any available register
349 for(hr=0;hr<HOST_REGS;hr++) {
350 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
351 cur->regmap[hr]=reg;
352 cur->dirty&=~(1<<hr);
353 cur->isconst&=~(1<<hr);
354 return;
355 }
356 }
357
358 // Ok, now we have to evict someone
359 // Pick a register we hopefully won't need soon
360 u_char hsn[MAXREG+1];
361 memset(hsn,10,sizeof(hsn));
362 int j;
363 lsn(hsn,i,&preferred_reg);
364 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
365 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
366 if(i>0) {
367 // Don't evict the cycle count at entry points, otherwise the entry
368 // stub will have to write it.
369 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
370 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
371 for(j=10;j>=3;j--)
372 {
373 // Alloc preferred register if available
374 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
375 for(hr=0;hr<HOST_REGS;hr++) {
376 // Evict both parts of a 64-bit register
377 if((cur->regmap[hr]&63)==r) {
378 cur->regmap[hr]=-1;
379 cur->dirty&=~(1<<hr);
380 cur->isconst&=~(1<<hr);
381 }
382 }
383 cur->regmap[preferred_reg]=reg;
384 return;
385 }
386 for(r=1;r<=MAXREG;r++)
387 {
388 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
389 for(hr=0;hr<HOST_REGS;hr++) {
390 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
391 if(cur->regmap[hr]==r+64) {
392 cur->regmap[hr]=reg;
393 cur->dirty&=~(1<<hr);
394 cur->isconst&=~(1<<hr);
395 return;
396 }
397 }
398 }
399 for(hr=0;hr<HOST_REGS;hr++) {
400 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
401 if(cur->regmap[hr]==r) {
402 cur->regmap[hr]=reg;
403 cur->dirty&=~(1<<hr);
404 cur->isconst&=~(1<<hr);
405 return;
406 }
407 }
408 }
409 }
410 }
411 }
412 }
413 for(j=10;j>=0;j--)
414 {
415 for(r=1;r<=MAXREG;r++)
416 {
417 if(hsn[r]==j) {
418 for(hr=0;hr<HOST_REGS;hr++) {
419 if(cur->regmap[hr]==r+64) {
420 cur->regmap[hr]=reg;
421 cur->dirty&=~(1<<hr);
422 cur->isconst&=~(1<<hr);
423 return;
424 }
425 }
426 for(hr=0;hr<HOST_REGS;hr++) {
427 if(cur->regmap[hr]==r) {
428 cur->regmap[hr]=reg;
429 cur->dirty&=~(1<<hr);
430 cur->isconst&=~(1<<hr);
431 return;
432 }
433 }
434 }
435 }
436 }
437 printf("This shouldn't happen (alloc_reg)");exit(1);
438}
439
440void alloc_reg64(struct regstat *cur,int i,signed char reg)
441{
442 int preferred_reg = 8+(reg&1);
443 int r,hr;
444
445 // allocate the lower 32 bits
446 alloc_reg(cur,i,reg);
447
448 // Don't allocate unused registers
449 if((cur->uu>>reg)&1) return;
450
451 // see if the upper half is already allocated
452 for(hr=0;hr<HOST_REGS;hr++)
453 {
454 if(cur->regmap[hr]==reg+64) return;
455 }
456
457 // Keep the same mapping if the register was already allocated in a loop
458 preferred_reg = loop_reg(i,reg,preferred_reg);
459
460 // Try to allocate the preferred register
461 if(cur->regmap[preferred_reg]==-1) {
462 cur->regmap[preferred_reg]=reg|64;
463 cur->dirty&=~(1<<preferred_reg);
464 cur->isconst&=~(1<<preferred_reg);
465 return;
466 }
467 r=cur->regmap[preferred_reg];
468 if(r<64&&((cur->u>>r)&1)) {
469 cur->regmap[preferred_reg]=reg|64;
470 cur->dirty&=~(1<<preferred_reg);
471 cur->isconst&=~(1<<preferred_reg);
472 return;
473 }
474 if(r>=64&&((cur->uu>>(r&63))&1)) {
475 cur->regmap[preferred_reg]=reg|64;
476 cur->dirty&=~(1<<preferred_reg);
477 cur->isconst&=~(1<<preferred_reg);
478 return;
479 }
480
481 // Clear any unneeded registers
482 // We try to keep the mapping consistent, if possible, because it
483 // makes branches easier (especially loops). So we try to allocate
484 // first (see above) before removing old mappings. If this is not
485 // possible then go ahead and clear out the registers that are no
486 // longer needed.
487 for(hr=HOST_REGS-1;hr>=0;hr--)
488 {
489 r=cur->regmap[hr];
490 if(r>=0) {
491 if(r<64) {
492 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
493 }
494 else
495 {
496 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
497 }
498 }
499 }
500 // Try to allocate any available register, but prefer
501 // registers that have not been used recently.
502 if(i>0) {
503 for(hr=0;hr<HOST_REGS;hr++) {
504 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
505 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
506 cur->regmap[hr]=reg|64;
507 cur->dirty&=~(1<<hr);
508 cur->isconst&=~(1<<hr);
509 return;
510 }
511 }
512 }
513 }
514 // Try to allocate any available register
515 for(hr=0;hr<HOST_REGS;hr++) {
516 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
517 cur->regmap[hr]=reg|64;
518 cur->dirty&=~(1<<hr);
519 cur->isconst&=~(1<<hr);
520 return;
521 }
522 }
523
524 // Ok, now we have to evict someone
525 // Pick a register we hopefully won't need soon
526 u_char hsn[MAXREG+1];
527 memset(hsn,10,sizeof(hsn));
528 int j;
529 lsn(hsn,i,&preferred_reg);
530 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
531 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
532 if(i>0) {
533 // Don't evict the cycle count at entry points, otherwise the entry
534 // stub will have to write it.
535 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
536 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
537 for(j=10;j>=3;j--)
538 {
539 // Alloc preferred register if available
540 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
541 for(hr=0;hr<HOST_REGS;hr++) {
542 // Evict both parts of a 64-bit register
543 if((cur->regmap[hr]&63)==r) {
544 cur->regmap[hr]=-1;
545 cur->dirty&=~(1<<hr);
546 cur->isconst&=~(1<<hr);
547 }
548 }
549 cur->regmap[preferred_reg]=reg|64;
550 return;
551 }
552 for(r=1;r<=MAXREG;r++)
553 {
554 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
555 for(hr=0;hr<HOST_REGS;hr++) {
556 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
557 if(cur->regmap[hr]==r+64) {
558 cur->regmap[hr]=reg|64;
559 cur->dirty&=~(1<<hr);
560 cur->isconst&=~(1<<hr);
561 return;
562 }
563 }
564 }
565 for(hr=0;hr<HOST_REGS;hr++) {
566 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
567 if(cur->regmap[hr]==r) {
568 cur->regmap[hr]=reg|64;
569 cur->dirty&=~(1<<hr);
570 cur->isconst&=~(1<<hr);
571 return;
572 }
573 }
574 }
575 }
576 }
577 }
578 }
579 for(j=10;j>=0;j--)
580 {
581 for(r=1;r<=MAXREG;r++)
582 {
583 if(hsn[r]==j) {
584 for(hr=0;hr<HOST_REGS;hr++) {
585 if(cur->regmap[hr]==r+64) {
586 cur->regmap[hr]=reg|64;
587 cur->dirty&=~(1<<hr);
588 cur->isconst&=~(1<<hr);
589 return;
590 }
591 }
592 for(hr=0;hr<HOST_REGS;hr++) {
593 if(cur->regmap[hr]==r) {
594 cur->regmap[hr]=reg|64;
595 cur->dirty&=~(1<<hr);
596 cur->isconst&=~(1<<hr);
597 return;
598 }
599 }
600 }
601 }
602 }
603 printf("This shouldn't happen");exit(1);
604}
605
606// Allocate a temporary register. This is done without regard to
607// dirty status or whether the register we request is on the unneeded list
608// Note: This will only allocate one register, even if called multiple times
609void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
610{
611 int r,hr;
612 int preferred_reg = -1;
613
614 // see if it's already allocated
615 for(hr=0;hr<HOST_REGS;hr++)
616 {
617 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
618 }
619
620 // Try to allocate any available register
621 for(hr=HOST_REGS-1;hr>=0;hr--) {
622 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
623 cur->regmap[hr]=reg;
624 cur->dirty&=~(1<<hr);
625 cur->isconst&=~(1<<hr);
626 return;
627 }
628 }
629
630 // Find an unneeded register
631 for(hr=HOST_REGS-1;hr>=0;hr--)
632 {
633 r=cur->regmap[hr];
634 if(r>=0) {
635 if(r<64) {
636 if((cur->u>>r)&1) {
637 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
638 cur->regmap[hr]=reg;
639 cur->dirty&=~(1<<hr);
640 cur->isconst&=~(1<<hr);
641 return;
642 }
643 }
644 }
645 else
646 {
647 if((cur->uu>>(r&63))&1) {
648 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
649 cur->regmap[hr]=reg;
650 cur->dirty&=~(1<<hr);
651 cur->isconst&=~(1<<hr);
652 return;
653 }
654 }
655 }
656 }
657 }
658
659 // Ok, now we have to evict someone
660 // Pick a register we hopefully won't need soon
661 // TODO: we might want to follow unconditional jumps here
662 // TODO: get rid of dupe code and make this into a function
663 u_char hsn[MAXREG+1];
664 memset(hsn,10,sizeof(hsn));
665 int j;
666 lsn(hsn,i,&preferred_reg);
667 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
668 if(i>0) {
669 // Don't evict the cycle count at entry points, otherwise the entry
670 // stub will have to write it.
671 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
672 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
673 for(j=10;j>=3;j--)
674 {
675 for(r=1;r<=MAXREG;r++)
676 {
677 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
678 for(hr=0;hr<HOST_REGS;hr++) {
679 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
680 if(cur->regmap[hr]==r+64) {
681 cur->regmap[hr]=reg;
682 cur->dirty&=~(1<<hr);
683 cur->isconst&=~(1<<hr);
684 return;
685 }
686 }
687 }
688 for(hr=0;hr<HOST_REGS;hr++) {
689 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
690 if(cur->regmap[hr]==r) {
691 cur->regmap[hr]=reg;
692 cur->dirty&=~(1<<hr);
693 cur->isconst&=~(1<<hr);
694 return;
695 }
696 }
697 }
698 }
699 }
700 }
701 }
702 for(j=10;j>=0;j--)
703 {
704 for(r=1;r<=MAXREG;r++)
705 {
706 if(hsn[r]==j) {
707 for(hr=0;hr<HOST_REGS;hr++) {
708 if(cur->regmap[hr]==r+64) {
709 cur->regmap[hr]=reg;
710 cur->dirty&=~(1<<hr);
711 cur->isconst&=~(1<<hr);
712 return;
713 }
714 }
715 for(hr=0;hr<HOST_REGS;hr++) {
716 if(cur->regmap[hr]==r) {
717 cur->regmap[hr]=reg;
718 cur->dirty&=~(1<<hr);
719 cur->isconst&=~(1<<hr);
720 return;
721 }
722 }
723 }
724 }
725 }
726 printf("This shouldn't happen");exit(1);
727}
728// Allocate a specific ARM register.
729void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
730{
731 int n;
732
733 // see if it's already allocated (and dealloc it)
734 for(n=0;n<HOST_REGS;n++)
735 {
736 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {cur->regmap[n]=-1;}
737 }
738
739 cur->regmap[hr]=reg;
740 cur->dirty&=~(1<<hr);
741 cur->isconst&=~(1<<hr);
742}
743
744// Alloc cycle count into dedicated register
745alloc_cc(struct regstat *cur,int i)
746{
747 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
748}
749
750/* Special alloc */
751
752
753/* Assembler */
754
755char regname[16][4] = {
756 "r0",
757 "r1",
758 "r2",
759 "r3",
760 "r4",
761 "r5",
762 "r6",
763 "r7",
764 "r8",
765 "r9",
766 "r10",
767 "fp",
768 "r12",
769 "sp",
770 "lr",
771 "pc"};
772
773void output_byte(u_char byte)
774{
775 *(out++)=byte;
776}
777void output_modrm(u_char mod,u_char rm,u_char ext)
778{
779 assert(mod<4);
780 assert(rm<8);
781 assert(ext<8);
782 u_char byte=(mod<<6)|(ext<<3)|rm;
783 *(out++)=byte;
784}
785void output_sib(u_char scale,u_char index,u_char base)
786{
787 assert(scale<4);
788 assert(index<8);
789 assert(base<8);
790 u_char byte=(scale<<6)|(index<<3)|base;
791 *(out++)=byte;
792}
793void output_w32(u_int word)
794{
795 *((u_int *)out)=word;
796 out+=4;
797}
798u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
799{
800 assert(rd<16);
801 assert(rn<16);
802 assert(rm<16);
803 return((rn<<16)|(rd<<12)|rm);
804}
805u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
806{
807 assert(rd<16);
808 assert(rn<16);
809 assert(imm<256);
810 assert((shift&1)==0);
811 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
812}
813u_int genimm(u_int imm,u_int *encoded)
814{
815 if(imm==0) {*encoded=0;return 1;}
816 int i=32;
817 while(i>0)
818 {
819 if(imm<256) {
820 *encoded=((i&30)<<7)|imm;
821 return 1;
822 }
823 imm=(imm>>2)|(imm<<30);i-=2;
824 }
825 return 0;
826}
827u_int genjmp(u_int addr)
828{
829 int offset=addr-(int)out-8;
e80343e2 830 if(offset<-33554432||offset>=33554432) {
831 if (addr>2) {
832 printf("genjmp: out of range: %08x\n", offset);
833 exit(1);
834 }
835 return 0;
836 }
57871462 837 return ((u_int)offset>>2)&0xffffff;
838}
839
840void emit_mov(int rs,int rt)
841{
842 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
843 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
844}
845
846void emit_movs(int rs,int rt)
847{
848 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
849 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
850}
851
852void emit_add(int rs1,int rs2,int rt)
853{
854 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
855 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
856}
857
858void emit_adds(int rs1,int rs2,int rt)
859{
860 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
861 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
862}
863
864void emit_adcs(int rs1,int rs2,int rt)
865{
866 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
867 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
868}
869
870void emit_sbc(int rs1,int rs2,int rt)
871{
872 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
873 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
874}
875
876void emit_sbcs(int rs1,int rs2,int rt)
877{
878 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
879 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
880}
881
882void emit_neg(int rs, int rt)
883{
884 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
885 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
886}
887
888void emit_negs(int rs, int rt)
889{
890 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
891 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
892}
893
894void emit_sub(int rs1,int rs2,int rt)
895{
896 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
897 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
898}
899
900void emit_subs(int rs1,int rs2,int rt)
901{
902 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
903 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
904}
905
906void emit_zeroreg(int rt)
907{
908 assem_debug("mov %s,#0\n",regname[rt]);
909 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
910}
911
912void emit_loadreg(int r, int hr)
913{
3d624f89 914#ifdef FORCE32
915 if(r&64) {
916 printf("64bit load in 32bit mode!\n");
917 exit(1);
918 }
919#endif
57871462 920 if((r&63)==0)
921 emit_zeroreg(hr);
922 else {
3d624f89 923 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 924 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
925 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
926 if(r==CCREG) addr=(int)&cycle_count;
927 if(r==CSREG) addr=(int)&Status;
928 if(r==FSREG) addr=(int)&FCR31;
929 if(r==INVCP) addr=(int)&invc_ptr;
930 u_int offset = addr-(u_int)&dynarec_local;
931 assert(offset<4096);
932 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
933 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
934 }
935}
936void emit_storereg(int r, int hr)
937{
3d624f89 938#ifdef FORCE32
939 if(r&64) {
940 printf("64bit store in 32bit mode!\n");
941 exit(1);
942 }
943#endif
944 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 945 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
946 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
947 if(r==CCREG) addr=(int)&cycle_count;
948 if(r==FSREG) addr=(int)&FCR31;
949 u_int offset = addr-(u_int)&dynarec_local;
950 assert(offset<4096);
951 assem_debug("str %s,fp+%d\n",regname[hr],offset);
952 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
953}
954
955void emit_test(int rs, int rt)
956{
957 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
958 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
959}
960
961void emit_testimm(int rs,int imm)
962{
963 u_int armval;
964 assem_debug("tst %s,$%d\n",regname[rs],imm);
965 assert(genimm(imm,&armval));
966 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
967}
968
b9b61529 969void emit_testeqimm(int rs,int imm)
970{
971 u_int armval;
972 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
973 assert(genimm(imm,&armval));
974 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
975}
976
57871462 977void emit_not(int rs,int rt)
978{
979 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
980 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
981}
982
b9b61529 983void emit_mvnmi(int rs,int rt)
984{
985 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
986 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
987}
988
57871462 989void emit_and(u_int rs1,u_int rs2,u_int rt)
990{
991 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
992 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
993}
994
995void emit_or(u_int rs1,u_int rs2,u_int rt)
996{
997 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
998 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
999}
1000void emit_or_and_set_flags(int rs1,int rs2,int rt)
1001{
1002 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1003 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1004}
1005
1006void emit_xor(u_int rs1,u_int rs2,u_int rt)
1007{
1008 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1009 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1010}
1011
1012void emit_loadlp(u_int imm,u_int rt)
1013{
1014 add_literal((int)out,imm);
1015 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
1016 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
1017}
1018void emit_movw(u_int imm,u_int rt)
1019{
1020 assert(imm<65536);
1021 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
1022 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
1023}
1024void emit_movt(u_int imm,u_int rt)
1025{
1026 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
1027 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
1028}
1029void emit_movimm(u_int imm,u_int rt)
1030{
1031 u_int armval;
1032 if(genimm(imm,&armval)) {
1033 assem_debug("mov %s,#%d\n",regname[rt],imm);
1034 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1035 }else if(genimm(~imm,&armval)) {
1036 assem_debug("mvn %s,#%d\n",regname[rt],imm);
1037 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1038 }else if(imm<65536) {
1039 #ifdef ARMv5_ONLY
1040 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1041 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1042 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1043 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1044 #else
1045 emit_movw(imm,rt);
1046 #endif
1047 }else{
1048 #ifdef ARMv5_ONLY
1049 emit_loadlp(imm,rt);
1050 #else
1051 emit_movw(imm&0x0000FFFF,rt);
1052 emit_movt(imm&0xFFFF0000,rt);
1053 #endif
1054 }
1055}
1056void emit_pcreladdr(u_int rt)
1057{
1058 assem_debug("add %s,pc,#?\n",regname[rt]);
1059 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1060}
1061
1062void emit_addimm(u_int rs,int imm,u_int rt)
1063{
1064 assert(rs<16);
1065 assert(rt<16);
1066 if(imm!=0) {
1067 assert(imm>-65536&&imm<65536);
1068 u_int armval;
1069 if(genimm(imm,&armval)) {
1070 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1071 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1072 }else if(genimm(-imm,&armval)) {
1073 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1074 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1075 }else if(imm<0) {
1076 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1077 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1078 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1079 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1080 }else{
1081 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1082 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1083 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1084 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1085 }
1086 }
1087 else if(rs!=rt) emit_mov(rs,rt);
1088}
1089
1090void emit_addimm_and_set_flags(int imm,int rt)
1091{
1092 assert(imm>-65536&&imm<65536);
1093 u_int armval;
1094 if(genimm(imm,&armval)) {
1095 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1096 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1097 }else if(genimm(-imm,&armval)) {
1098 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1099 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1100 }else if(imm<0) {
1101 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1102 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1103 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1104 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1105 }else{
1106 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1107 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1108 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1109 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1110 }
1111}
1112void emit_addimm_no_flags(u_int imm,u_int rt)
1113{
1114 emit_addimm(rt,imm,rt);
1115}
1116
1117void emit_addnop(u_int r)
1118{
1119 assert(r<16);
1120 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1121 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1122}
1123
1124void emit_adcimm(u_int rs,int imm,u_int rt)
1125{
1126 u_int armval;
1127 assert(genimm(imm,&armval));
1128 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1129 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1130}
1131/*void emit_sbcimm(int imm,u_int rt)
1132{
1133 u_int armval;
1134 assert(genimm(imm,&armval));
1135 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1136 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1137}*/
1138void emit_sbbimm(int imm,u_int rt)
1139{
1140 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1141 assert(rt<8);
1142 if(imm<128&&imm>=-128) {
1143 output_byte(0x83);
1144 output_modrm(3,rt,3);
1145 output_byte(imm);
1146 }
1147 else
1148 {
1149 output_byte(0x81);
1150 output_modrm(3,rt,3);
1151 output_w32(imm);
1152 }
1153}
1154void emit_rscimm(int rs,int imm,u_int rt)
1155{
1156 assert(0);
1157 u_int armval;
1158 assert(genimm(imm,&armval));
1159 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1160 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1161}
1162
1163void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1164{
1165 // TODO: if(genimm(imm,&armval)) ...
1166 // else
1167 emit_movimm(imm,HOST_TEMPREG);
1168 emit_adds(HOST_TEMPREG,rsl,rtl);
1169 emit_adcimm(rsh,0,rth);
1170}
1171
1172void emit_sbb(int rs1,int rs2)
1173{
1174 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1175 output_byte(0x19);
1176 output_modrm(3,rs1,rs2);
1177}
1178
1179void emit_andimm(int rs,int imm,int rt)
1180{
1181 u_int armval;
1182 if(genimm(imm,&armval)) {
1183 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1184 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1185 }else if(genimm(~imm,&armval)) {
1186 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1187 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1188 }else if(imm==65535) {
1189 #ifdef ARMv5_ONLY
1190 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1191 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1192 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1193 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1194 #else
1195 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1196 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1197 #endif
1198 }else{
1199 assert(imm>0&&imm<65535);
1200 #ifdef ARMv5_ONLY
1201 assem_debug("mov r14,#%d\n",imm&0xFF00);
1202 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1203 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1204 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1205 #else
1206 emit_movw(imm,HOST_TEMPREG);
1207 #endif
1208 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1209 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1210 }
1211}
1212
1213void emit_orimm(int rs,int imm,int rt)
1214{
1215 u_int armval;
1216 if(genimm(imm,&armval)) {
1217 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1218 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1219 }else{
1220 assert(imm>0&&imm<65536);
1221 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1222 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1223 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1224 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1225 }
1226}
1227
1228void emit_xorimm(int rs,int imm,int rt)
1229{
57871462 1230 u_int armval;
1231 if(genimm(imm,&armval)) {
1232 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1233 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1234 }else{
514ed0d9 1235 assert(imm>0&&imm<65536);
57871462 1236 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1237 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1238 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1239 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1240 }
1241}
1242
1243void emit_shlimm(int rs,u_int imm,int rt)
1244{
1245 assert(imm>0);
1246 assert(imm<32);
1247 //if(imm==1) ...
1248 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1249 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1250}
1251
1252void emit_shrimm(int rs,u_int imm,int rt)
1253{
1254 assert(imm>0);
1255 assert(imm<32);
1256 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1257 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1258}
1259
1260void emit_sarimm(int rs,u_int imm,int rt)
1261{
1262 assert(imm>0);
1263 assert(imm<32);
1264 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1265 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1266}
1267
1268void emit_rorimm(int rs,u_int imm,int rt)
1269{
1270 assert(imm>0);
1271 assert(imm<32);
1272 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1273 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1274}
1275
1276void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1277{
1278 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1279 assert(imm>0);
1280 assert(imm<32);
1281 //if(imm==1) ...
1282 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1283 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1284 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1285 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1286}
1287
1288void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1289{
1290 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1291 assert(imm>0);
1292 assert(imm<32);
1293 //if(imm==1) ...
1294 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1295 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1296 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1297 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1298}
1299
b9b61529 1300void emit_signextend16(int rs,int rt)
1301{
1302 #ifdef ARMv5_ONLY
1303 emit_shlimm(rs,16,rt);
1304 emit_sarimm(rt,16,rt);
1305 #else
1306 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1307 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1308 #endif
1309}
1310
57871462 1311void emit_shl(u_int rs,u_int shift,u_int rt)
1312{
1313 assert(rs<16);
1314 assert(rt<16);
1315 assert(shift<16);
1316 //if(imm==1) ...
1317 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1318 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1319}
1320void emit_shr(u_int rs,u_int shift,u_int rt)
1321{
1322 assert(rs<16);
1323 assert(rt<16);
1324 assert(shift<16);
1325 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1326 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1327}
1328void emit_sar(u_int rs,u_int shift,u_int rt)
1329{
1330 assert(rs<16);
1331 assert(rt<16);
1332 assert(shift<16);
1333 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1334 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1335}
1336void emit_shlcl(int r)
1337{
1338 assem_debug("shl %%%s,%%cl\n",regname[r]);
1339 assert(0);
1340}
1341void emit_shrcl(int r)
1342{
1343 assem_debug("shr %%%s,%%cl\n",regname[r]);
1344 assert(0);
1345}
1346void emit_sarcl(int r)
1347{
1348 assem_debug("sar %%%s,%%cl\n",regname[r]);
1349 assert(0);
1350}
1351
1352void emit_shldcl(int r1,int r2)
1353{
1354 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1355 assert(0);
1356}
1357void emit_shrdcl(int r1,int r2)
1358{
1359 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1360 assert(0);
1361}
1362void emit_orrshl(u_int rs,u_int shift,u_int rt)
1363{
1364 assert(rs<16);
1365 assert(rt<16);
1366 assert(shift<16);
1367 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1368 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1369}
1370void emit_orrshr(u_int rs,u_int shift,u_int rt)
1371{
1372 assert(rs<16);
1373 assert(rt<16);
1374 assert(shift<16);
1375 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1376 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1377}
1378
1379void emit_cmpimm(int rs,int imm)
1380{
1381 u_int armval;
1382 if(genimm(imm,&armval)) {
1383 assem_debug("cmp %s,$%d\n",regname[rs],imm);
1384 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1385 }else if(genimm(-imm,&armval)) {
1386 assem_debug("cmn %s,$%d\n",regname[rs],imm);
1387 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1388 }else if(imm>0) {
1389 assert(imm<65536);
1390 #ifdef ARMv5_ONLY
1391 emit_movimm(imm,HOST_TEMPREG);
1392 #else
1393 emit_movw(imm,HOST_TEMPREG);
1394 #endif
1395 assem_debug("cmp %s,r14\n",regname[rs]);
1396 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1397 }else{
1398 assert(imm>-65536);
1399 #ifdef ARMv5_ONLY
1400 emit_movimm(-imm,HOST_TEMPREG);
1401 #else
1402 emit_movw(-imm,HOST_TEMPREG);
1403 #endif
1404 assem_debug("cmn %s,r14\n",regname[rs]);
1405 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1406 }
1407}
1408
1409void emit_cmovne(u_int *addr,int rt)
1410{
1411 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1412 assert(0);
1413}
1414void emit_cmovl(u_int *addr,int rt)
1415{
1416 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1417 assert(0);
1418}
1419void emit_cmovs(u_int *addr,int rt)
1420{
1421 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1422 assert(0);
1423}
1424void emit_cmovne_imm(int imm,int rt)
1425{
1426 assem_debug("movne %s,#%d\n",regname[rt],imm);
1427 u_int armval;
1428 assert(genimm(imm,&armval));
1429 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1430}
1431void emit_cmovl_imm(int imm,int rt)
1432{
1433 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1434 u_int armval;
1435 assert(genimm(imm,&armval));
1436 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1437}
1438void emit_cmovb_imm(int imm,int rt)
1439{
1440 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1441 u_int armval;
1442 assert(genimm(imm,&armval));
1443 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1444}
1445void emit_cmovs_imm(int imm,int rt)
1446{
1447 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1448 u_int armval;
1449 assert(genimm(imm,&armval));
1450 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1451}
1452void emit_cmove_reg(int rs,int rt)
1453{
1454 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1455 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1456}
1457void emit_cmovne_reg(int rs,int rt)
1458{
1459 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1460 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1461}
1462void emit_cmovl_reg(int rs,int rt)
1463{
1464 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1465 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1466}
1467void emit_cmovs_reg(int rs,int rt)
1468{
1469 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1470 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1471}
1472
1473void emit_slti32(int rs,int imm,int rt)
1474{
1475 if(rs!=rt) emit_zeroreg(rt);
1476 emit_cmpimm(rs,imm);
1477 if(rs==rt) emit_movimm(0,rt);
1478 emit_cmovl_imm(1,rt);
1479}
1480void emit_sltiu32(int rs,int imm,int rt)
1481{
1482 if(rs!=rt) emit_zeroreg(rt);
1483 emit_cmpimm(rs,imm);
1484 if(rs==rt) emit_movimm(0,rt);
1485 emit_cmovb_imm(1,rt);
1486}
1487void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1488{
1489 assert(rsh!=rt);
1490 emit_slti32(rsl,imm,rt);
1491 if(imm>=0)
1492 {
1493 emit_test(rsh,rsh);
1494 emit_cmovne_imm(0,rt);
1495 emit_cmovs_imm(1,rt);
1496 }
1497 else
1498 {
1499 emit_cmpimm(rsh,-1);
1500 emit_cmovne_imm(0,rt);
1501 emit_cmovl_imm(1,rt);
1502 }
1503}
1504void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1505{
1506 assert(rsh!=rt);
1507 emit_sltiu32(rsl,imm,rt);
1508 if(imm>=0)
1509 {
1510 emit_test(rsh,rsh);
1511 emit_cmovne_imm(0,rt);
1512 }
1513 else
1514 {
1515 emit_cmpimm(rsh,-1);
1516 emit_cmovne_imm(1,rt);
1517 }
1518}
1519
1520void emit_cmp(int rs,int rt)
1521{
1522 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1523 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1524}
1525void emit_set_gz32(int rs, int rt)
1526{
1527 //assem_debug("set_gz32\n");
1528 emit_cmpimm(rs,1);
1529 emit_movimm(1,rt);
1530 emit_cmovl_imm(0,rt);
1531}
1532void emit_set_nz32(int rs, int rt)
1533{
1534 //assem_debug("set_nz32\n");
1535 if(rs!=rt) emit_movs(rs,rt);
1536 else emit_test(rs,rs);
1537 emit_cmovne_imm(1,rt);
1538}
1539void emit_set_gz64_32(int rsh, int rsl, int rt)
1540{
1541 //assem_debug("set_gz64\n");
1542 emit_set_gz32(rsl,rt);
1543 emit_test(rsh,rsh);
1544 emit_cmovne_imm(1,rt);
1545 emit_cmovs_imm(0,rt);
1546}
1547void emit_set_nz64_32(int rsh, int rsl, int rt)
1548{
1549 //assem_debug("set_nz64\n");
1550 emit_or_and_set_flags(rsh,rsl,rt);
1551 emit_cmovne_imm(1,rt);
1552}
1553void emit_set_if_less32(int rs1, int rs2, int rt)
1554{
1555 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1556 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1557 emit_cmp(rs1,rs2);
1558 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1559 emit_cmovl_imm(1,rt);
1560}
1561void emit_set_if_carry32(int rs1, int rs2, int rt)
1562{
1563 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1564 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1565 emit_cmp(rs1,rs2);
1566 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1567 emit_cmovb_imm(1,rt);
1568}
1569void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1570{
1571 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1572 assert(u1!=rt);
1573 assert(u2!=rt);
1574 emit_cmp(l1,l2);
1575 emit_movimm(0,rt);
1576 emit_sbcs(u1,u2,HOST_TEMPREG);
1577 emit_cmovl_imm(1,rt);
1578}
1579void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1580{
1581 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1582 assert(u1!=rt);
1583 assert(u2!=rt);
1584 emit_cmp(l1,l2);
1585 emit_movimm(0,rt);
1586 emit_sbcs(u1,u2,HOST_TEMPREG);
1587 emit_cmovb_imm(1,rt);
1588}
1589
1590void emit_call(int a)
1591{
1592 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1593 u_int offset=genjmp(a);
1594 output_w32(0xeb000000|offset);
1595}
1596void emit_jmp(int a)
1597{
1598 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1599 u_int offset=genjmp(a);
1600 output_w32(0xea000000|offset);
1601}
1602void emit_jne(int a)
1603{
1604 assem_debug("bne %x\n",a);
1605 u_int offset=genjmp(a);
1606 output_w32(0x1a000000|offset);
1607}
1608void emit_jeq(int a)
1609{
1610 assem_debug("beq %x\n",a);
1611 u_int offset=genjmp(a);
1612 output_w32(0x0a000000|offset);
1613}
1614void emit_js(int a)
1615{
1616 assem_debug("bmi %x\n",a);
1617 u_int offset=genjmp(a);
1618 output_w32(0x4a000000|offset);
1619}
1620void emit_jns(int a)
1621{
1622 assem_debug("bpl %x\n",a);
1623 u_int offset=genjmp(a);
1624 output_w32(0x5a000000|offset);
1625}
1626void emit_jl(int a)
1627{
1628 assem_debug("blt %x\n",a);
1629 u_int offset=genjmp(a);
1630 output_w32(0xba000000|offset);
1631}
1632void emit_jge(int a)
1633{
1634 assem_debug("bge %x\n",a);
1635 u_int offset=genjmp(a);
1636 output_w32(0xaa000000|offset);
1637}
1638void emit_jno(int a)
1639{
1640 assem_debug("bvc %x\n",a);
1641 u_int offset=genjmp(a);
1642 output_w32(0x7a000000|offset);
1643}
1644void emit_jc(int a)
1645{
1646 assem_debug("bcs %x\n",a);
1647 u_int offset=genjmp(a);
1648 output_w32(0x2a000000|offset);
1649}
1650void emit_jcc(int a)
1651{
1652 assem_debug("bcc %x\n",a);
1653 u_int offset=genjmp(a);
1654 output_w32(0x3a000000|offset);
1655}
1656
1657void emit_pushimm(int imm)
1658{
1659 assem_debug("push $%x\n",imm);
1660 assert(0);
1661}
1662void emit_pusha()
1663{
1664 assem_debug("pusha\n");
1665 assert(0);
1666}
1667void emit_popa()
1668{
1669 assem_debug("popa\n");
1670 assert(0);
1671}
1672void emit_pushreg(u_int r)
1673{
1674 assem_debug("push %%%s\n",regname[r]);
1675 assert(0);
1676}
1677void emit_popreg(u_int r)
1678{
1679 assem_debug("pop %%%s\n",regname[r]);
1680 assert(0);
1681}
1682void emit_callreg(u_int r)
1683{
1684 assem_debug("call *%%%s\n",regname[r]);
1685 assert(0);
1686}
1687void emit_jmpreg(u_int r)
1688{
1689 assem_debug("mov pc,%s\n",regname[r]);
1690 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1691}
1692
1693void emit_readword_indexed(int offset, int rs, int rt)
1694{
1695 assert(offset>-4096&&offset<4096);
1696 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1697 if(offset>=0) {
1698 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1699 }else{
1700 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1701 }
1702}
1703void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1704{
1705 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1706 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1707}
1708void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1709{
1710 if(map<0) emit_readword_indexed(addr, rs, rt);
1711 else {
1712 assert(addr==0);
1713 emit_readword_dualindexedx4(rs, map, rt);
1714 }
1715}
1716void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1717{
1718 if(map<0) {
1719 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1720 emit_readword_indexed(addr+4, rs, rl);
1721 }else{
1722 assert(rh!=rs);
1723 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1724 emit_addimm(map,1,map);
1725 emit_readword_indexed_tlb(addr, rs, map, rl);
1726 }
1727}
1728void emit_movsbl_indexed(int offset, int rs, int rt)
1729{
1730 assert(offset>-256&&offset<256);
1731 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1732 if(offset>=0) {
1733 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1734 }else{
1735 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1736 }
1737}
1738void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1739{
1740 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1741 else {
1742 if(addr==0) {
1743 emit_shlimm(map,2,map);
1744 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1745 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1746 }else{
1747 assert(addr>-256&&addr<256);
1748 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1749 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1750 emit_movsbl_indexed(addr, rt, rt);
1751 }
1752 }
1753}
1754void emit_movswl_indexed(int offset, int rs, int rt)
1755{
1756 assert(offset>-256&&offset<256);
1757 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1758 if(offset>=0) {
1759 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1760 }else{
1761 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1762 }
1763}
1764void emit_movzbl_indexed(int offset, int rs, int rt)
1765{
1766 assert(offset>-4096&&offset<4096);
1767 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1768 if(offset>=0) {
1769 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1770 }else{
1771 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1772 }
1773}
1774void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1775{
1776 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1777 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1778}
1779void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1780{
1781 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1782 else {
1783 if(addr==0) {
1784 emit_movzbl_dualindexedx4(rs, map, rt);
1785 }else{
1786 emit_addimm(rs,addr,rt);
1787 emit_movzbl_dualindexedx4(rt, map, rt);
1788 }
1789 }
1790}
1791void emit_movzwl_indexed(int offset, int rs, int rt)
1792{
1793 assert(offset>-256&&offset<256);
1794 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1795 if(offset>=0) {
1796 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1797 }else{
1798 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1799 }
1800}
1801void emit_readword(int addr, int rt)
1802{
1803 u_int offset = addr-(u_int)&dynarec_local;
1804 assert(offset<4096);
1805 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1806 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1807}
1808void emit_movsbl(int addr, int rt)
1809{
1810 u_int offset = addr-(u_int)&dynarec_local;
1811 assert(offset<256);
1812 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1813 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1814}
1815void emit_movswl(int addr, int rt)
1816{
1817 u_int offset = addr-(u_int)&dynarec_local;
1818 assert(offset<256);
1819 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1820 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1821}
1822void emit_movzbl(int addr, int rt)
1823{
1824 u_int offset = addr-(u_int)&dynarec_local;
1825 assert(offset<4096);
1826 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1827 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1828}
1829void emit_movzwl(int addr, int rt)
1830{
1831 u_int offset = addr-(u_int)&dynarec_local;
1832 assert(offset<256);
1833 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1834 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1835}
1836void emit_movzwl_reg(int rs, int rt)
1837{
1838 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1839 assert(0);
1840}
1841
1842void emit_xchg(int rs, int rt)
1843{
1844 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1845 assert(0);
1846}
1847void emit_writeword_indexed(int rt, int offset, int rs)
1848{
1849 assert(offset>-4096&&offset<4096);
1850 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1851 if(offset>=0) {
1852 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1853 }else{
1854 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1855 }
1856}
1857void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1858{
1859 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1860 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1861}
1862void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1863{
1864 if(map<0) emit_writeword_indexed(rt, addr, rs);
1865 else {
1866 assert(addr==0);
1867 emit_writeword_dualindexedx4(rt, rs, map);
1868 }
1869}
1870void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1871{
1872 if(map<0) {
1873 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1874 emit_writeword_indexed(rl, addr+4, rs);
1875 }else{
1876 assert(rh>=0);
1877 if(temp!=rs) emit_addimm(map,1,temp);
1878 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1879 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1880 else {
1881 emit_addimm(rs,4,rs);
1882 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1883 }
1884 }
1885}
1886void emit_writehword_indexed(int rt, int offset, int rs)
1887{
1888 assert(offset>-256&&offset<256);
1889 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1890 if(offset>=0) {
1891 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1892 }else{
1893 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1894 }
1895}
1896void emit_writebyte_indexed(int rt, int offset, int rs)
1897{
1898 assert(offset>-4096&&offset<4096);
1899 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1900 if(offset>=0) {
1901 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1902 }else{
1903 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1904 }
1905}
1906void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1907{
1908 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1909 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1910}
1911void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1912{
1913 if(map<0) emit_writebyte_indexed(rt, addr, rs);
1914 else {
1915 if(addr==0) {
1916 emit_writebyte_dualindexedx4(rt, rs, map);
1917 }else{
1918 emit_addimm(rs,addr,temp);
1919 emit_writebyte_dualindexedx4(rt, temp, map);
1920 }
1921 }
1922}
1923void emit_writeword(int rt, int addr)
1924{
1925 u_int offset = addr-(u_int)&dynarec_local;
1926 assert(offset<4096);
1927 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1928 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1929}
1930void emit_writehword(int rt, int addr)
1931{
1932 u_int offset = addr-(u_int)&dynarec_local;
1933 assert(offset<256);
1934 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
1935 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1936}
1937void emit_writebyte(int rt, int addr)
1938{
1939 u_int offset = addr-(u_int)&dynarec_local;
1940 assert(offset<4096);
1941 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1942 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
1943}
1944void emit_writeword_imm(int imm, int addr)
1945{
1946 assem_debug("movl $%x,%x\n",imm,addr);
1947 assert(0);
1948}
1949void emit_writebyte_imm(int imm, int addr)
1950{
1951 assem_debug("movb $%x,%x\n",imm,addr);
1952 assert(0);
1953}
1954
1955void emit_mul(int rs)
1956{
1957 assem_debug("mul %%%s\n",regname[rs]);
1958 assert(0);
1959}
1960void emit_imul(int rs)
1961{
1962 assem_debug("imul %%%s\n",regname[rs]);
1963 assert(0);
1964}
1965void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1966{
1967 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1968 assert(rs1<16);
1969 assert(rs2<16);
1970 assert(hi<16);
1971 assert(lo<16);
1972 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1973}
1974void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1975{
1976 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1977 assert(rs1<16);
1978 assert(rs2<16);
1979 assert(hi<16);
1980 assert(lo<16);
1981 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1982}
1983
1984void emit_div(int rs)
1985{
1986 assem_debug("div %%%s\n",regname[rs]);
1987 assert(0);
1988}
1989void emit_idiv(int rs)
1990{
1991 assem_debug("idiv %%%s\n",regname[rs]);
1992 assert(0);
1993}
1994void emit_cdq()
1995{
1996 assem_debug("cdq\n");
1997 assert(0);
1998}
1999
2000void emit_clz(int rs,int rt)
2001{
2002 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2003 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2004}
2005
2006void emit_subcs(int rs1,int rs2,int rt)
2007{
2008 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2009 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2010}
2011
2012void emit_shrcc_imm(int rs,u_int imm,int rt)
2013{
2014 assert(imm>0);
2015 assert(imm<32);
2016 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2017 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2018}
2019
2020void emit_negmi(int rs, int rt)
2021{
2022 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2023 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2024}
2025
2026void emit_negsmi(int rs, int rt)
2027{
2028 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2029 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2030}
2031
2032void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2033{
2034 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2035 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2036}
2037
2038void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2039{
2040 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2041 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2042}
2043
2044void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2045{
2046 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2047 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2048}
2049
2050void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2051{
2052 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2053 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2054}
2055
2056void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2057{
2058 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2059 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2060}
2061
2062void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2063{
2064 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2065 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2066}
2067
2068void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2069{
2070 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2071 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2072}
2073
2074void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2075{
2076 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2077 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2078}
2079
2080void emit_teq(int rs, int rt)
2081{
2082 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2083 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2084}
2085
2086void emit_rsbimm(int rs, int imm, int rt)
2087{
2088 u_int armval;
2089 assert(genimm(imm,&armval));
2090 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2091 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2092}
2093
2094// Load 2 immediates optimizing for small code size
2095void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2096{
2097 emit_movimm(imm1,rt1);
2098 u_int armval;
2099 if(genimm(imm2-imm1,&armval)) {
2100 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2101 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2102 }else if(genimm(imm1-imm2,&armval)) {
2103 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2104 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2105 }
2106 else emit_movimm(imm2,rt2);
2107}
2108
2109// Conditionally select one of two immediates, optimizing for small code size
2110// This will only be called if HAVE_CMOV_IMM is defined
2111void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2112{
2113 u_int armval;
2114 if(genimm(imm2-imm1,&armval)) {
2115 emit_movimm(imm1,rt);
2116 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2117 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2118 }else if(genimm(imm1-imm2,&armval)) {
2119 emit_movimm(imm1,rt);
2120 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2121 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2122 }
2123 else {
2124 #ifdef ARMv5_ONLY
2125 emit_movimm(imm1,rt);
2126 add_literal((int)out,imm2);
2127 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2128 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2129 #else
2130 emit_movw(imm1&0x0000FFFF,rt);
2131 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2132 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2133 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2134 }
2135 emit_movt(imm1&0xFFFF0000,rt);
2136 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2137 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2138 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2139 }
2140 #endif
2141 }
2142}
2143
2144// special case for checking invalid_code
2145void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2146{
2147 assert(0);
2148}
2149
2150// special case for checking invalid_code
2151void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2152{
2153 assert(imm<128&&imm>=0);
2154 assert(r>=0&&r<16);
2155 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2156 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2157 emit_cmpimm(HOST_TEMPREG,imm);
2158}
2159
2160// special case for tlb mapping
2161void emit_addsr12(int rs1,int rs2,int rt)
2162{
2163 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2164 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2165}
2166
2167// Used to preload hash table entries
2168void emit_prefetch(void *addr)
2169{
2170 assem_debug("prefetch %x\n",(int)addr);
2171 output_byte(0x0F);
2172 output_byte(0x18);
2173 output_modrm(0,5,1);
2174 output_w32((int)addr);
2175}
2176void emit_prefetchreg(int r)
2177{
2178 assem_debug("pld %s\n",regname[r]);
2179 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2180}
2181
2182// Special case for mini_ht
2183void emit_ldreq_indexed(int rs, u_int offset, int rt)
2184{
2185 assert(offset<4096);
2186 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2187 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2188}
2189
2190void emit_flds(int r,int sr)
2191{
2192 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2193 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2194}
2195
2196void emit_vldr(int r,int vr)
2197{
2198 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2199 output_w32(0xed900b00|(vr<<12)|(r<<16));
2200}
2201
2202void emit_fsts(int sr,int r)
2203{
2204 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2205 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2206}
2207
2208void emit_vstr(int vr,int r)
2209{
2210 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2211 output_w32(0xed800b00|(vr<<12)|(r<<16));
2212}
2213
2214void emit_ftosizs(int s,int d)
2215{
2216 assem_debug("ftosizs s%d,s%d\n",d,s);
2217 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2218}
2219
2220void emit_ftosizd(int s,int d)
2221{
2222 assem_debug("ftosizd s%d,d%d\n",d,s);
2223 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2224}
2225
2226void emit_fsitos(int s,int d)
2227{
2228 assem_debug("fsitos s%d,s%d\n",d,s);
2229 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2230}
2231
2232void emit_fsitod(int s,int d)
2233{
2234 assem_debug("fsitod d%d,s%d\n",d,s);
2235 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2236}
2237
2238void emit_fcvtds(int s,int d)
2239{
2240 assem_debug("fcvtds d%d,s%d\n",d,s);
2241 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2242}
2243
2244void emit_fcvtsd(int s,int d)
2245{
2246 assem_debug("fcvtsd s%d,d%d\n",d,s);
2247 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2248}
2249
2250void emit_fsqrts(int s,int d)
2251{
2252 assem_debug("fsqrts d%d,s%d\n",d,s);
2253 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2254}
2255
2256void emit_fsqrtd(int s,int d)
2257{
2258 assem_debug("fsqrtd s%d,d%d\n",d,s);
2259 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2260}
2261
2262void emit_fabss(int s,int d)
2263{
2264 assem_debug("fabss d%d,s%d\n",d,s);
2265 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2266}
2267
2268void emit_fabsd(int s,int d)
2269{
2270 assem_debug("fabsd s%d,d%d\n",d,s);
2271 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2272}
2273
2274void emit_fnegs(int s,int d)
2275{
2276 assem_debug("fnegs d%d,s%d\n",d,s);
2277 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2278}
2279
2280void emit_fnegd(int s,int d)
2281{
2282 assem_debug("fnegd s%d,d%d\n",d,s);
2283 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2284}
2285
2286void emit_fadds(int s1,int s2,int d)
2287{
2288 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2289 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2290}
2291
2292void emit_faddd(int s1,int s2,int d)
2293{
2294 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2295 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2296}
2297
2298void emit_fsubs(int s1,int s2,int d)
2299{
2300 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2301 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2302}
2303
2304void emit_fsubd(int s1,int s2,int d)
2305{
2306 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2307 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2308}
2309
2310void emit_fmuls(int s1,int s2,int d)
2311{
2312 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2313 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2314}
2315
2316void emit_fmuld(int s1,int s2,int d)
2317{
2318 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2319 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2320}
2321
2322void emit_fdivs(int s1,int s2,int d)
2323{
2324 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2325 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2326}
2327
2328void emit_fdivd(int s1,int s2,int d)
2329{
2330 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2331 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2332}
2333
2334void emit_fcmps(int x,int y)
2335{
2336 assem_debug("fcmps s14, s15\n");
2337 output_w32(0xeeb47a67);
2338}
2339
2340void emit_fcmpd(int x,int y)
2341{
2342 assem_debug("fcmpd d6, d7\n");
2343 output_w32(0xeeb46b47);
2344}
2345
2346void emit_fmstat()
2347{
2348 assem_debug("fmstat\n");
2349 output_w32(0xeef1fa10);
2350}
2351
2352void emit_bicne_imm(int rs,int imm,int rt)
2353{
2354 u_int armval;
2355 assert(genimm(imm,&armval));
2356 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2357 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2358}
2359
2360void emit_biccs_imm(int rs,int imm,int rt)
2361{
2362 u_int armval;
2363 assert(genimm(imm,&armval));
2364 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2365 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2366}
2367
2368void emit_bicvc_imm(int rs,int imm,int rt)
2369{
2370 u_int armval;
2371 assert(genimm(imm,&armval));
2372 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2373 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2374}
2375
2376void emit_bichi_imm(int rs,int imm,int rt)
2377{
2378 u_int armval;
2379 assert(genimm(imm,&armval));
2380 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2381 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2382}
2383
2384void emit_orrvs_imm(int rs,int imm,int rt)
2385{
2386 u_int armval;
2387 assert(genimm(imm,&armval));
2388 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2389 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2390}
2391
b9b61529 2392void emit_orrne_imm(int rs,int imm,int rt)
2393{
2394 u_int armval;
2395 assert(genimm(imm,&armval));
2396 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2397 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2398}
2399
2400void emit_andne_imm(int rs,int imm,int rt)
2401{
2402 u_int armval;
2403 assert(genimm(imm,&armval));
2404 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2405 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2406}
2407
57871462 2408void emit_jno_unlikely(int a)
2409{
2410 //emit_jno(a);
2411 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2412 output_w32(0x72800000|rd_rn_rm(15,15,0));
2413}
2414
2415// Save registers before function call
2416void save_regs(u_int reglist)
2417{
2418 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2419 if(!reglist) return;
2420 assem_debug("stmia fp,{");
2421 if(reglist&1) assem_debug("r0, ");
2422 if(reglist&2) assem_debug("r1, ");
2423 if(reglist&4) assem_debug("r2, ");
2424 if(reglist&8) assem_debug("r3, ");
2425 if(reglist&0x1000) assem_debug("r12");
2426 assem_debug("}\n");
2427 output_w32(0xe88b0000|reglist);
2428}
2429// Restore registers after function call
2430void restore_regs(u_int reglist)
2431{
2432 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2433 if(!reglist) return;
2434 assem_debug("ldmia fp,{");
2435 if(reglist&1) assem_debug("r0, ");
2436 if(reglist&2) assem_debug("r1, ");
2437 if(reglist&4) assem_debug("r2, ");
2438 if(reglist&8) assem_debug("r3, ");
2439 if(reglist&0x1000) assem_debug("r12");
2440 assem_debug("}\n");
2441 output_w32(0xe89b0000|reglist);
2442}
2443
2444// Write back consts using r14 so we don't disturb the other registers
2445void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2446{
2447 int hr;
2448 for(hr=0;hr<HOST_REGS;hr++) {
2449 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2450 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2451 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2452 int value=constmap[i][hr];
2453 if(value==0) {
2454 emit_zeroreg(HOST_TEMPREG);
2455 }
2456 else {
2457 emit_movimm(value,HOST_TEMPREG);
2458 }
2459 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2460#ifndef FORCE32
57871462 2461 if((i_is32>>i_regmap[hr])&1) {
2462 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2463 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2464 }
24385cae 2465#endif
57871462 2466 }
2467 }
2468 }
2469 }
2470}
2471
2472/* Stubs/epilogue */
2473
2474void literal_pool(int n)
2475{
2476 if(!literalcount) return;
2477 if(n) {
2478 if((int)out-literals[0][0]<4096-n) return;
2479 }
2480 u_int *ptr;
2481 int i;
2482 for(i=0;i<literalcount;i++)
2483 {
2484 ptr=(u_int *)literals[i][0];
2485 u_int offset=(u_int)out-(u_int)ptr-8;
2486 assert(offset<4096);
2487 assert(!(offset&3));
2488 *ptr|=offset;
2489 output_w32(literals[i][1]);
2490 }
2491 literalcount=0;
2492}
2493
2494void literal_pool_jumpover(int n)
2495{
2496 if(!literalcount) return;
2497 if(n) {
2498 if((int)out-literals[0][0]<4096-n) return;
2499 }
2500 int jaddr=(int)out;
2501 emit_jmp(0);
2502 literal_pool(0);
2503 set_jump_target(jaddr,(int)out);
2504}
2505
2506emit_extjump2(int addr, int target, int linker)
2507{
2508 u_char *ptr=(u_char *)addr;
2509 assert((ptr[3]&0x0e)==0xa);
2510 emit_loadlp(target,0);
2511 emit_loadlp(addr,1);
24385cae 2512 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2513 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2514//DEBUG >
2515#ifdef DEBUG_CYCLE_COUNT
2516 emit_readword((int)&last_count,ECX);
2517 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2518 emit_readword((int)&next_interupt,ECX);
2519 emit_writeword(HOST_CCREG,(int)&Count);
2520 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2521 emit_writeword(ECX,(int)&last_count);
2522#endif
2523//DEBUG <
2524 emit_jmp(linker);
2525}
2526
2527emit_extjump(int addr, int target)
2528{
2529 emit_extjump2(addr, target, (int)dyna_linker);
2530}
2531emit_extjump_ds(int addr, int target)
2532{
2533 emit_extjump2(addr, target, (int)dyna_linker_ds);
2534}
2535
2536do_readstub(int n)
2537{
2538 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2539 literal_pool(256);
2540 set_jump_target(stubs[n][1],(int)out);
2541 int type=stubs[n][0];
2542 int i=stubs[n][3];
2543 int rs=stubs[n][4];
2544 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2545 u_int reglist=stubs[n][7];
2546 signed char *i_regmap=i_regs->regmap;
2547 int addr=get_reg(i_regmap,AGEN1+(i&1));
2548 int rth,rt;
2549 int ds;
b9b61529 2550 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2551 rth=get_reg(i_regmap,FTEMP|64);
2552 rt=get_reg(i_regmap,FTEMP);
2553 }else{
2554 rth=get_reg(i_regmap,rt1[i]|64);
2555 rt=get_reg(i_regmap,rt1[i]);
2556 }
5bf843dc 2557#ifdef PCSX
2558 if(rt<0)
2559 // assume forced dummy read
2560 rt=get_reg(i_regmap,-1);
2561#endif
57871462 2562 assert(rs>=0);
2563 assert(rt>=0);
2564 if(addr<0) addr=rt;
2565 assert(addr>=0);
2566 int ftable=0;
2567 if(type==LOADB_STUB||type==LOADBU_STUB)
2568 ftable=(int)readmemb;
2569 if(type==LOADH_STUB||type==LOADHU_STUB)
2570 ftable=(int)readmemh;
2571 if(type==LOADW_STUB)
2572 ftable=(int)readmem;
24385cae 2573#ifndef FORCE32
57871462 2574 if(type==LOADD_STUB)
2575 ftable=(int)readmemd;
24385cae 2576#endif
2577 assert(ftable!=0);
57871462 2578 emit_writeword(rs,(int)&address);
2579 //emit_pusha();
2580 save_regs(reglist);
2581 ds=i_regs!=&regs[i];
2582 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2583 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2584 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2585 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2586 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2587 emit_shrimm(rs,16,1);
2588 int cc=get_reg(i_regmap,CCREG);
2589 if(cc<0) {
2590 emit_loadreg(CCREG,2);
2591 }
2592 emit_movimm(ftable,0);
2593 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2594 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2595 //emit_readword((int)&last_count,temp);
2596 //emit_add(cc,temp,cc);
2597 //emit_writeword(cc,(int)&Count);
2598 //emit_mov(15,14);
2599 emit_call((int)&indirect_jump_indexed);
2600 //emit_callreg(rs);
2601 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
2602 // We really shouldn't need to update the count here,
2603 // but not doing so causes random crashes...
2604 emit_readword((int)&Count,HOST_TEMPREG);
2605 emit_readword((int)&next_interupt,2);
2606 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2607 emit_writeword(2,(int)&last_count);
2608 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2609 if(cc<0) {
2610 emit_storereg(CCREG,HOST_TEMPREG);
2611 }
2612 //emit_popa();
2613 restore_regs(reglist);
2614 //if((cc=get_reg(regmap,CCREG))>=0) {
2615 // emit_loadreg(CCREG,cc);
2616 //}
2617 if(type==LOADB_STUB)
2618 emit_movsbl((int)&readmem_dword,rt);
2619 if(type==LOADBU_STUB)
2620 emit_movzbl((int)&readmem_dword,rt);
2621 if(type==LOADH_STUB)
2622 emit_movswl((int)&readmem_dword,rt);
2623 if(type==LOADHU_STUB)
2624 emit_movzwl((int)&readmem_dword,rt);
2625 if(type==LOADW_STUB)
2626 emit_readword((int)&readmem_dword,rt);
2627 if(type==LOADD_STUB) {
2628 emit_readword((int)&readmem_dword,rt);
2629 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2630 }
2631 emit_jmp(stubs[n][2]); // return address
2632}
2633
2634inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2635{
2636 int rs=get_reg(regmap,target);
2637 int rth=get_reg(regmap,target|64);
2638 int rt=get_reg(regmap,target);
2639 assert(rs>=0);
2640 assert(rt>=0);
2641 int ftable=0;
2642 if(type==LOADB_STUB||type==LOADBU_STUB)
2643 ftable=(int)readmemb;
2644 if(type==LOADH_STUB||type==LOADHU_STUB)
2645 ftable=(int)readmemh;
2646 if(type==LOADW_STUB)
2647 ftable=(int)readmem;
24385cae 2648#ifndef FORCE32
57871462 2649 if(type==LOADD_STUB)
2650 ftable=(int)readmemd;
24385cae 2651#endif
2652 assert(ftable!=0);
57871462 2653 emit_writeword(rs,(int)&address);
2654 //emit_pusha();
2655 save_regs(reglist);
2656 //emit_shrimm(rs,16,1);
2657 int cc=get_reg(regmap,CCREG);
2658 if(cc<0) {
2659 emit_loadreg(CCREG,2);
2660 }
2661 //emit_movimm(ftable,0);
2662 emit_movimm(((u_int *)ftable)[addr>>16],0);
2663 //emit_readword((int)&last_count,12);
2664 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2665 if((signed int)addr>=(signed int)0xC0000000) {
2666 // Pagefault address
2667 int ds=regmap!=regs[i].regmap;
2668 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2669 }
2670 //emit_add(12,2,2);
2671 //emit_writeword(2,(int)&Count);
2672 //emit_call(((u_int *)ftable)[addr>>16]);
2673 emit_call((int)&indirect_jump);
2674 // We really shouldn't need to update the count here,
2675 // but not doing so causes random crashes...
2676 emit_readword((int)&Count,HOST_TEMPREG);
2677 emit_readword((int)&next_interupt,2);
2678 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2679 emit_writeword(2,(int)&last_count);
2680 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2681 if(cc<0) {
2682 emit_storereg(CCREG,HOST_TEMPREG);
2683 }
2684 //emit_popa();
2685 restore_regs(reglist);
2686 if(type==LOADB_STUB)
2687 emit_movsbl((int)&readmem_dword,rt);
2688 if(type==LOADBU_STUB)
2689 emit_movzbl((int)&readmem_dword,rt);
2690 if(type==LOADH_STUB)
2691 emit_movswl((int)&readmem_dword,rt);
2692 if(type==LOADHU_STUB)
2693 emit_movzwl((int)&readmem_dword,rt);
2694 if(type==LOADW_STUB)
2695 emit_readword((int)&readmem_dword,rt);
2696 if(type==LOADD_STUB) {
2697 emit_readword((int)&readmem_dword,rt);
2698 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2699 }
2700}
2701
2702do_writestub(int n)
2703{
2704 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2705 literal_pool(256);
2706 set_jump_target(stubs[n][1],(int)out);
2707 int type=stubs[n][0];
2708 int i=stubs[n][3];
2709 int rs=stubs[n][4];
2710 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2711 u_int reglist=stubs[n][7];
2712 signed char *i_regmap=i_regs->regmap;
2713 int addr=get_reg(i_regmap,AGEN1+(i&1));
2714 int rth,rt,r;
2715 int ds;
b9b61529 2716 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2717 rth=get_reg(i_regmap,FTEMP|64);
2718 rt=get_reg(i_regmap,r=FTEMP);
2719 }else{
2720 rth=get_reg(i_regmap,rs2[i]|64);
2721 rt=get_reg(i_regmap,r=rs2[i]);
2722 }
2723 assert(rs>=0);
2724 assert(rt>=0);
2725 if(addr<0) addr=get_reg(i_regmap,-1);
2726 assert(addr>=0);
2727 int ftable=0;
2728 if(type==STOREB_STUB)
2729 ftable=(int)writememb;
2730 if(type==STOREH_STUB)
2731 ftable=(int)writememh;
2732 if(type==STOREW_STUB)
2733 ftable=(int)writemem;
24385cae 2734#ifndef FORCE32
57871462 2735 if(type==STORED_STUB)
2736 ftable=(int)writememd;
24385cae 2737#endif
2738 assert(ftable!=0);
57871462 2739 emit_writeword(rs,(int)&address);
2740 //emit_shrimm(rs,16,rs);
2741 //emit_movmem_indexedx4(ftable,rs,rs);
2742 if(type==STOREB_STUB)
2743 emit_writebyte(rt,(int)&byte);
2744 if(type==STOREH_STUB)
2745 emit_writehword(rt,(int)&hword);
2746 if(type==STOREW_STUB)
2747 emit_writeword(rt,(int)&word);
2748 if(type==STORED_STUB) {
3d624f89 2749#ifndef FORCE32
57871462 2750 emit_writeword(rt,(int)&dword);
2751 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 2752#else
2753 printf("STORED_STUB\n");
2754#endif
57871462 2755 }
2756 //emit_pusha();
2757 save_regs(reglist);
2758 ds=i_regs!=&regs[i];
2759 int real_rs=get_reg(i_regmap,rs1[i]);
2760 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2761 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2762 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2763 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2764 emit_shrimm(rs,16,1);
2765 int cc=get_reg(i_regmap,CCREG);
2766 if(cc<0) {
2767 emit_loadreg(CCREG,2);
2768 }
2769 emit_movimm(ftable,0);
2770 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2771 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2772 //emit_readword((int)&last_count,temp);
2773 //emit_addimm(cc,2*stubs[n][5]+2,cc);
2774 //emit_add(cc,temp,cc);
2775 //emit_writeword(cc,(int)&Count);
2776 emit_call((int)&indirect_jump_indexed);
2777 //emit_callreg(rs);
2778 emit_readword((int)&Count,HOST_TEMPREG);
2779 emit_readword((int)&next_interupt,2);
2780 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2781 emit_writeword(2,(int)&last_count);
2782 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2783 if(cc<0) {
2784 emit_storereg(CCREG,HOST_TEMPREG);
2785 }
2786 //emit_popa();
2787 restore_regs(reglist);
2788 //if((cc=get_reg(regmap,CCREG))>=0) {
2789 // emit_loadreg(CCREG,cc);
2790 //}
2791 emit_jmp(stubs[n][2]); // return address
2792}
2793
2794inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2795{
2796 int rs=get_reg(regmap,-1);
2797 int rth=get_reg(regmap,target|64);
2798 int rt=get_reg(regmap,target);
2799 assert(rs>=0);
2800 assert(rt>=0);
2801 int ftable=0;
2802 if(type==STOREB_STUB)
2803 ftable=(int)writememb;
2804 if(type==STOREH_STUB)
2805 ftable=(int)writememh;
2806 if(type==STOREW_STUB)
2807 ftable=(int)writemem;
24385cae 2808#ifndef FORCE32
57871462 2809 if(type==STORED_STUB)
2810 ftable=(int)writememd;
24385cae 2811#endif
2812 assert(ftable!=0);
57871462 2813 emit_writeword(rs,(int)&address);
2814 //emit_shrimm(rs,16,rs);
2815 //emit_movmem_indexedx4(ftable,rs,rs);
2816 if(type==STOREB_STUB)
2817 emit_writebyte(rt,(int)&byte);
2818 if(type==STOREH_STUB)
2819 emit_writehword(rt,(int)&hword);
2820 if(type==STOREW_STUB)
2821 emit_writeword(rt,(int)&word);
2822 if(type==STORED_STUB) {
3d624f89 2823#ifndef FORCE32
57871462 2824 emit_writeword(rt,(int)&dword);
2825 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 2826#else
2827 printf("STORED_STUB\n");
2828#endif
57871462 2829 }
2830 //emit_pusha();
2831 save_regs(reglist);
2832 //emit_shrimm(rs,16,1);
2833 int cc=get_reg(regmap,CCREG);
2834 if(cc<0) {
2835 emit_loadreg(CCREG,2);
2836 }
2837 //emit_movimm(ftable,0);
2838 emit_movimm(((u_int *)ftable)[addr>>16],0);
2839 //emit_readword((int)&last_count,12);
2840 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2841 if((signed int)addr>=(signed int)0xC0000000) {
2842 // Pagefault address
2843 int ds=regmap!=regs[i].regmap;
2844 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2845 }
2846 //emit_add(12,2,2);
2847 //emit_writeword(2,(int)&Count);
2848 //emit_call(((u_int *)ftable)[addr>>16]);
2849 emit_call((int)&indirect_jump);
2850 emit_readword((int)&Count,HOST_TEMPREG);
2851 emit_readword((int)&next_interupt,2);
2852 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2853 emit_writeword(2,(int)&last_count);
2854 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2855 if(cc<0) {
2856 emit_storereg(CCREG,HOST_TEMPREG);
2857 }
2858 //emit_popa();
2859 restore_regs(reglist);
2860}
2861
2862do_unalignedwritestub(int n)
2863{
b7918751 2864 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
2865 literal_pool(256);
57871462 2866 set_jump_target(stubs[n][1],(int)out);
b7918751 2867
2868 int i=stubs[n][3];
2869 struct regstat *i_regs=(struct regstat *)stubs[n][4];
2870 int addr=stubs[n][5];
2871 u_int reglist=stubs[n][7];
2872 signed char *i_regmap=i_regs->regmap;
2873 int temp2=get_reg(i_regmap,FTEMP);
2874 int rt;
2875 int ds, real_rs;
2876 rt=get_reg(i_regmap,rs2[i]);
2877 assert(rt>=0);
2878 assert(addr>=0);
2879 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2880 reglist|=(1<<addr);
2881 reglist&=~(1<<temp2);
2882
2883 emit_andimm(addr,0xfffffffc,temp2);
2884 emit_writeword(temp2,(int)&address);
2885
2886 save_regs(reglist);
2887 ds=i_regs!=&regs[i];
2888 real_rs=get_reg(i_regmap,rs1[i]);
2889 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2890 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2891 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2892 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2893 emit_shrimm(addr,16,1);
2894 int cc=get_reg(i_regmap,CCREG);
2895 if(cc<0) {
2896 emit_loadreg(CCREG,2);
2897 }
2898 emit_movimm((u_int)readmem,0);
2899 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2900 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3); // XXX: can be rm'd?
2901 emit_call((int)&indirect_jump_indexed);
2902 restore_regs(reglist);
2903
2904 emit_readword((int)&readmem_dword,temp2);
2905 int temp=addr; //hmh
2906 emit_shlimm(addr,3,temp);
2907 emit_andimm(temp,24,temp);
2908#ifdef BIG_ENDIAN_MIPS
2909 if (opcode[i]==0x2e) // SWR
2910#else
2911 if (opcode[i]==0x2a) // SWL
2912#endif
2913 emit_xorimm(temp,24,temp);
2914 emit_movimm(-1,HOST_TEMPREG);
55439448 2915 if (opcode[i]==0x2a) { // SWL
b7918751 2916 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2917 emit_orrshr(rt,temp,temp2);
2918 }else{
2919 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2920 emit_orrshl(rt,temp,temp2);
2921 }
2922 emit_readword((int)&address,addr);
2923 emit_writeword(temp2,(int)&word);
2924 //save_regs(reglist); // don't need to, no state changes
2925 emit_shrimm(addr,16,1);
2926 emit_movimm((u_int)writemem,0);
2927 //emit_call((int)&indirect_jump_indexed);
2928 emit_mov(15,14);
2929 emit_readword_dualindexedx4(0,1,15);
2930 emit_readword((int)&Count,HOST_TEMPREG);
2931 emit_readword((int)&next_interupt,2);
2932 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2933 emit_writeword(2,(int)&last_count);
2934 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2935 if(cc<0) {
2936 emit_storereg(CCREG,HOST_TEMPREG);
2937 }
2938 restore_regs(reglist);
57871462 2939 emit_jmp(stubs[n][2]); // return address
2940}
2941
2942void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
2943{
2944 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
2945}
2946
2947do_invstub(int n)
2948{
2949 literal_pool(20);
2950 u_int reglist=stubs[n][3];
2951 set_jump_target(stubs[n][1],(int)out);
2952 save_regs(reglist);
2953 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2954 emit_call((int)&invalidate_addr);
2955 restore_regs(reglist);
2956 emit_jmp(stubs[n][2]); // return address
2957}
2958
2959int do_dirty_stub(int i)
2960{
2961 assem_debug("do_dirty_stub %x\n",start+i*4);
ac545b3a 2962 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
2963 #ifdef PCSX
2964 addr=(u_int)source;
2965 #endif
57871462 2966 // Careful about the code output here, verify_dirty needs to parse it.
2967 #ifdef ARMv5_ONLY
ac545b3a 2968 emit_loadlp(addr,1);
57871462 2969 emit_loadlp((int)copy,2);
2970 emit_loadlp(slen*4,3);
2971 #else
ac545b3a 2972 emit_movw(addr&0x0000FFFF,1);
57871462 2973 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 2974 emit_movt(addr&0xFFFF0000,1);
57871462 2975 emit_movt(((u_int)copy)&0xFFFF0000,2);
2976 emit_movw(slen*4,3);
2977 #endif
2978 emit_movimm(start+i*4,0);
2979 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
2980 int entry=(int)out;
2981 load_regs_entry(i);
2982 if(entry==(int)out) entry=instr_addr[i];
2983 emit_jmp(instr_addr[i]);
2984 return entry;
2985}
2986
2987void do_dirty_stub_ds()
2988{
2989 // Careful about the code output here, verify_dirty needs to parse it.
2990 #ifdef ARMv5_ONLY
2991 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2992 emit_loadlp((int)copy,2);
2993 emit_loadlp(slen*4,3);
2994 #else
2995 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2996 emit_movw(((u_int)copy)&0x0000FFFF,2);
2997 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2998 emit_movt(((u_int)copy)&0xFFFF0000,2);
2999 emit_movw(slen*4,3);
3000 #endif
3001 emit_movimm(start+1,0);
3002 emit_call((int)&verify_code_ds);
3003}
3004
3005do_cop1stub(int n)
3006{
3007 literal_pool(256);
3008 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3009 set_jump_target(stubs[n][1],(int)out);
3010 int i=stubs[n][3];
3d624f89 3011// int rs=stubs[n][4];
57871462 3012 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3013 int ds=stubs[n][6];
3014 if(!ds) {
3015 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3016 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3017 }
3018 //else {printf("fp exception in delay slot\n");}
3019 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3020 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3021 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3022 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3023 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3024}
3025
3026/* TLB */
3027
3028int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3029{
3030 if(c) {
3031 if((signed int)addr>=(signed int)0xC0000000) {
3032 // address_generation already loaded the const
3033 emit_readword_dualindexedx4(FP,map,map);
3034 }
3035 else
3036 return -1; // No mapping
3037 }
3038 else {
3039 assert(s!=map);
3040 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3041 emit_addsr12(map,s,map);
3042 // Schedule this while we wait on the load
3043 //if(x) emit_xorimm(s,x,ar);
3044 if(shift>=0) emit_shlimm(s,3,shift);
3045 if(~a) emit_andimm(s,a,ar);
3046 emit_readword_dualindexedx4(FP,map,map);
3047 }
3048 return map;
3049}
3050int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3051{
3052 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3053 emit_test(map,map);
3054 *jaddr=(int)out;
3055 emit_js(0);
3056 }
3057 return map;
3058}
3059
3060int gen_tlb_addr_r(int ar, int map) {
3061 if(map>=0) {
3062 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3063 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3064 }
3065}
3066
3067int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3068{
3069 if(c) {
3070 if(addr<0x80800000||addr>=0xC0000000) {
3071 // address_generation already loaded the const
3072 emit_readword_dualindexedx4(FP,map,map);
3073 }
3074 else
3075 return -1; // No mapping
3076 }
3077 else {
3078 assert(s!=map);
3079 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3080 emit_addsr12(map,s,map);
3081 // Schedule this while we wait on the load
3082 //if(x) emit_xorimm(s,x,ar);
3083 emit_readword_dualindexedx4(FP,map,map);
3084 }
3085 return map;
3086}
3087int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3088{
3089 if(!c||addr<0x80800000||addr>=0xC0000000) {
3090 emit_testimm(map,0x40000000);
3091 *jaddr=(int)out;
3092 emit_jne(0);
3093 }
3094}
3095
3096int gen_tlb_addr_w(int ar, int map) {
3097 if(map>=0) {
3098 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3099 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3100 }
3101}
3102
3103// Generate the address of the memory_map entry, relative to dynarec_local
3104generate_map_const(u_int addr,int reg) {
3105 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3106 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3107}
3108
3109/* Special assem */
3110
3111void shift_assemble_arm(int i,struct regstat *i_regs)
3112{
3113 if(rt1[i]) {
3114 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3115 {
3116 signed char s,t,shift;
3117 t=get_reg(i_regs->regmap,rt1[i]);
3118 s=get_reg(i_regs->regmap,rs1[i]);
3119 shift=get_reg(i_regs->regmap,rs2[i]);
3120 if(t>=0){
3121 if(rs1[i]==0)
3122 {
3123 emit_zeroreg(t);
3124 }
3125 else if(rs2[i]==0)
3126 {
3127 assert(s>=0);
3128 if(s!=t) emit_mov(s,t);
3129 }
3130 else
3131 {
3132 emit_andimm(shift,31,HOST_TEMPREG);
3133 if(opcode2[i]==4) // SLLV
3134 {
3135 emit_shl(s,HOST_TEMPREG,t);
3136 }
3137 if(opcode2[i]==6) // SRLV
3138 {
3139 emit_shr(s,HOST_TEMPREG,t);
3140 }
3141 if(opcode2[i]==7) // SRAV
3142 {
3143 emit_sar(s,HOST_TEMPREG,t);
3144 }
3145 }
3146 }
3147 } else { // DSLLV/DSRLV/DSRAV
3148 signed char sh,sl,th,tl,shift;
3149 th=get_reg(i_regs->regmap,rt1[i]|64);
3150 tl=get_reg(i_regs->regmap,rt1[i]);
3151 sh=get_reg(i_regs->regmap,rs1[i]|64);
3152 sl=get_reg(i_regs->regmap,rs1[i]);
3153 shift=get_reg(i_regs->regmap,rs2[i]);
3154 if(tl>=0){
3155 if(rs1[i]==0)
3156 {
3157 emit_zeroreg(tl);
3158 if(th>=0) emit_zeroreg(th);
3159 }
3160 else if(rs2[i]==0)
3161 {
3162 assert(sl>=0);
3163 if(sl!=tl) emit_mov(sl,tl);
3164 if(th>=0&&sh!=th) emit_mov(sh,th);
3165 }
3166 else
3167 {
3168 // FIXME: What if shift==tl ?
3169 assert(shift!=tl);
3170 int temp=get_reg(i_regs->regmap,-1);
3171 int real_th=th;
3172 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3173 assert(sl>=0);
3174 assert(sh>=0);
3175 emit_andimm(shift,31,HOST_TEMPREG);
3176 if(opcode2[i]==0x14) // DSLLV
3177 {
3178 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3179 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3180 emit_orrshr(sl,HOST_TEMPREG,th);
3181 emit_andimm(shift,31,HOST_TEMPREG);
3182 emit_testimm(shift,32);
3183 emit_shl(sl,HOST_TEMPREG,tl);
3184 if(th>=0) emit_cmovne_reg(tl,th);
3185 emit_cmovne_imm(0,tl);
3186 }
3187 if(opcode2[i]==0x16) // DSRLV
3188 {
3189 assert(th>=0);
3190 emit_shr(sl,HOST_TEMPREG,tl);
3191 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3192 emit_orrshl(sh,HOST_TEMPREG,tl);
3193 emit_andimm(shift,31,HOST_TEMPREG);
3194 emit_testimm(shift,32);
3195 emit_shr(sh,HOST_TEMPREG,th);
3196 emit_cmovne_reg(th,tl);
3197 if(real_th>=0) emit_cmovne_imm(0,th);
3198 }
3199 if(opcode2[i]==0x17) // DSRAV
3200 {
3201 assert(th>=0);
3202 emit_shr(sl,HOST_TEMPREG,tl);
3203 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3204 if(real_th>=0) {
3205 assert(temp>=0);
3206 emit_sarimm(th,31,temp);
3207 }
3208 emit_orrshl(sh,HOST_TEMPREG,tl);
3209 emit_andimm(shift,31,HOST_TEMPREG);
3210 emit_testimm(shift,32);
3211 emit_sar(sh,HOST_TEMPREG,th);
3212 emit_cmovne_reg(th,tl);
3213 if(real_th>=0) emit_cmovne_reg(temp,th);
3214 }
3215 }
3216 }
3217 }
3218 }
3219}
3220#define shift_assemble shift_assemble_arm
3221
3222void loadlr_assemble_arm(int i,struct regstat *i_regs)
3223{
3224 int s,th,tl,temp,temp2,addr,map=-1;
3225 int offset;
3226 int jaddr=0;
3227 int memtarget,c=0;
3228 u_int hr,reglist=0;
3229 th=get_reg(i_regs->regmap,rt1[i]|64);
3230 tl=get_reg(i_regs->regmap,rt1[i]);
3231 s=get_reg(i_regs->regmap,rs1[i]);
3232 temp=get_reg(i_regs->regmap,-1);
3233 temp2=get_reg(i_regs->regmap,FTEMP);
3234 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3235 assert(addr<0);
3236 offset=imm[i];
3237 for(hr=0;hr<HOST_REGS;hr++) {
3238 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3239 }
3240 reglist|=1<<temp;
3241 if(offset||s<0||c) addr=temp2;
3242 else addr=s;
3243 if(s>=0) {
3244 c=(i_regs->wasconst>>s)&1;
4cb76aa4 3245 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
57871462 3246 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3247 }
3248 if(tl>=0) {
3249 //assert(tl>=0);
3250 //assert(rt1[i]);
3251 if(!using_tlb) {
3252 if(!c) {
3253 emit_shlimm(addr,3,temp);
3254 if (opcode[i]==0x22||opcode[i]==0x26) {
3255 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3256 }else{
3257 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3258 }
4cb76aa4 3259 emit_cmpimm(addr,RAM_SIZE);
57871462 3260 jaddr=(int)out;
3261 emit_jno(0);
3262 }
3263 else {
3264 if (opcode[i]==0x22||opcode[i]==0x26) {
3265 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3266 }else{
3267 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3268 }
3269 }
3270 }else{ // using tlb
3271 int a;
3272 if(c) {
3273 a=-1;
3274 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3275 a=0xFFFFFFFC; // LWL/LWR
3276 }else{
3277 a=0xFFFFFFF8; // LDL/LDR
3278 }
3279 map=get_reg(i_regs->regmap,TLREG);
3280 assert(map>=0);
3281 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3282 if(c) {
3283 if (opcode[i]==0x22||opcode[i]==0x26) {
3284 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3285 }else{
3286 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3287 }
3288 }
3289 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3290 }
3291 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3292 if(!c||memtarget) {
3293 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3294 emit_readword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2);
3295 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3296 }
3297 else
3298 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3299 emit_andimm(temp,24,temp);
2002a1db 3300#ifdef BIG_ENDIAN_MIPS
3301 if (opcode[i]==0x26) // LWR
3302#else
3303 if (opcode[i]==0x22) // LWL
3304#endif
3305 emit_xorimm(temp,24,temp);
57871462 3306 emit_movimm(-1,HOST_TEMPREG);
3307 if (opcode[i]==0x26) {
3308 emit_shr(temp2,temp,temp2);
3309 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3310 }else{
3311 emit_shl(temp2,temp,temp2);
3312 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3313 }
3314 emit_or(temp2,tl,tl);
3315 //emit_storereg(rt1[i],tl); // DEBUG
3316 }
3317 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
2002a1db 3318 // FIXME: little endian
57871462 3319 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3320 if(!c||memtarget) {
3321 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3322 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3323 emit_readdword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2h,temp2);
3324 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3325 }
3326 else
3327 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3328 emit_testimm(temp,32);
3329 emit_andimm(temp,24,temp);
3330 if (opcode[i]==0x1A) { // LDL
3331 emit_rsbimm(temp,32,HOST_TEMPREG);
3332 emit_shl(temp2h,temp,temp2h);
3333 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3334 emit_movimm(-1,HOST_TEMPREG);
3335 emit_shl(temp2,temp,temp2);
3336 emit_cmove_reg(temp2h,th);
3337 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3338 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3339 emit_orreq(temp2,tl,tl);
3340 emit_orrne(temp2,th,th);
3341 }
3342 if (opcode[i]==0x1B) { // LDR
3343 emit_xorimm(temp,24,temp);
3344 emit_rsbimm(temp,32,HOST_TEMPREG);
3345 emit_shr(temp2,temp,temp2);
3346 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3347 emit_movimm(-1,HOST_TEMPREG);
3348 emit_shr(temp2h,temp,temp2h);
3349 emit_cmovne_reg(temp2,tl);
3350 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3351 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3352 emit_orrne(temp2h,th,th);
3353 emit_orreq(temp2h,tl,tl);
3354 }
3355 }
3356 }
3357}
3358#define loadlr_assemble loadlr_assemble_arm
3359
3360void cop0_assemble(int i,struct regstat *i_regs)
3361{
3362 if(opcode2[i]==0) // MFC0
3363 {
3364 signed char t=get_reg(i_regs->regmap,rt1[i]);
3365 char copr=(source[i]>>11)&0x1f;
3366 //assert(t>=0); // Why does this happen? OOT is weird
3367 if(t>=0) {
7139f3c8 3368#ifdef MUPEN64
57871462 3369 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3370 emit_movimm((source[i]>>11)&0x1f,1);
3371 emit_writeword(0,(int)&PC);
3372 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3373 if(copr==9) {
3374 emit_readword((int)&last_count,ECX);
3375 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3376 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3377 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3378 emit_writeword(HOST_CCREG,(int)&Count);
3379 }
3380 emit_call((int)MFC0);
3381 emit_readword((int)&readmem_dword,t);
7139f3c8 3382#else
3383 emit_readword((int)&reg_cop0+copr*4,t);
3384#endif
57871462 3385 }
3386 }
3387 else if(opcode2[i]==4) // MTC0
3388 {
3389 signed char s=get_reg(i_regs->regmap,rs1[i]);
3390 char copr=(source[i]>>11)&0x1f;
3391 assert(s>=0);
3392 emit_writeword(s,(int)&readmem_dword);
3393 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
3d624f89 3394#ifdef MUPEN64 /// FIXME
57871462 3395 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3396 emit_movimm((source[i]>>11)&0x1f,1);
3397 emit_writeword(0,(int)&PC);
3398 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3d624f89 3399#endif
7139f3c8 3400#ifdef PCSX
3401 emit_movimm(source[i],0);
3402 emit_writeword(0,(int)&psxRegs.code);
3403#endif
3404 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3405 emit_readword((int)&last_count,ECX);
3406 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3407 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3408 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3409 emit_writeword(HOST_CCREG,(int)&Count);
3410 }
3411 // What a mess. The status register (12) can enable interrupts,
3412 // so needs a special case to handle a pending interrupt.
3413 // The interrupt must be taken immediately, because a subsequent
3414 // instruction might disable interrupts again.
7139f3c8 3415 if(copr==12||copr==13) {
57871462 3416 emit_movimm(start+i*4+4,0);
3417 emit_movimm(0,1);
3418 emit_writeword(0,(int)&pcaddr);
3419 emit_writeword(1,(int)&pending_exception);
3420 }
3421 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3422 //else
3423 emit_call((int)MTC0);
7139f3c8 3424 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3425 emit_readword((int)&Count,HOST_CCREG);
3426 emit_readword((int)&next_interupt,ECX);
3427 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3428 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3429 emit_writeword(ECX,(int)&last_count);
3430 emit_storereg(CCREG,HOST_CCREG);
3431 }
7139f3c8 3432 if(copr==12||copr==13) {
57871462 3433 assert(!is_delayslot);
3434 emit_readword((int)&pending_exception,14);
3435 }
3436 emit_loadreg(rs1[i],s);
3437 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3438 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
7139f3c8 3439 if(copr==12||copr==13) {
57871462 3440 emit_test(14,14);
3441 emit_jne((int)&do_interrupt);
3442 }
3443 cop1_usable=0;
3444 }
3445 else
3446 {
3447 assert(opcode2[i]==0x10);
3d624f89 3448#ifndef DISABLE_TLB
57871462 3449 if((source[i]&0x3f)==0x01) // TLBR
3450 emit_call((int)TLBR);
3451 if((source[i]&0x3f)==0x02) // TLBWI
3452 emit_call((int)TLBWI_new);
3453 if((source[i]&0x3f)==0x06) { // TLBWR
3454 // The TLB entry written by TLBWR is dependent on the count,
3455 // so update the cycle count
3456 emit_readword((int)&last_count,ECX);
3457 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3458 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3459 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3460 emit_writeword(HOST_CCREG,(int)&Count);
3461 emit_call((int)TLBWR_new);
3462 }
3463 if((source[i]&0x3f)==0x08) // TLBP
3464 emit_call((int)TLBP);
3d624f89 3465#endif
57871462 3466 if((source[i]&0x3f)==0x18) // ERET
3467 {
3468 int count=ccadj[i];
3469 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3470 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
3471 emit_jmp((int)jump_eret);
3472 }
3473 }
3474}
3475
b9b61529 3476static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3477{
3478 switch (copr) {
3479 case 1:
3480 case 3:
3481 case 5:
3482 case 8:
3483 case 9:
3484 case 10:
3485 case 11:
3486 emit_readword((int)&reg_cop2d[copr],tl);
3487 emit_signextend16(tl,tl);
3488 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3489 break;
3490 case 7:
3491 case 16:
3492 case 17:
3493 case 18:
3494 case 19:
3495 emit_readword((int)&reg_cop2d[copr],tl);
3496 emit_andimm(tl,0xffff,tl);
3497 emit_writeword(tl,(int)&reg_cop2d[copr]);
3498 break;
3499 case 15:
3500 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3501 emit_writeword(tl,(int)&reg_cop2d[copr]);
3502 break;
3503 case 28:
3504 case 30:
3505 emit_movimm(0,tl);
3506 break;
3507 case 29:
3508 emit_readword((int)&reg_cop2d[9],temp);
3509 emit_testimm(temp,0x8000); // do we need this?
3510 emit_andimm(temp,0xf80,temp);
3511 emit_andne_imm(temp,0,temp);
3512 emit_shr(temp,7,tl);
3513 emit_readword((int)&reg_cop2d[10],temp);
3514 emit_testimm(temp,0x8000);
3515 emit_andimm(temp,0xf80,temp);
3516 emit_andne_imm(temp,0,temp);
3517 emit_orrshr(temp,2,tl);
3518 emit_readword((int)&reg_cop2d[11],temp);
3519 emit_testimm(temp,0x8000);
3520 emit_andimm(temp,0xf80,temp);
3521 emit_andne_imm(temp,0,temp);
3522 emit_orrshl(temp,3,tl);
3523 emit_writeword(tl,(int)&reg_cop2d[copr]);
3524 break;
3525 default:
3526 emit_readword((int)&reg_cop2d[copr],tl);
3527 break;
3528 }
3529}
3530
3531static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3532{
3533 switch (copr) {
3534 case 15:
3535 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3536 emit_writeword(sl,(int)&reg_cop2d[copr]);
3537 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3538 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3539 emit_writeword(sl,(int)&reg_cop2d[14]);
3540 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3541 break;
3542 case 28:
3543 emit_andimm(sl,0x001f,temp);
3544 emit_shl(temp,7,temp);
3545 emit_writeword(temp,(int)&reg_cop2d[9]);
3546 emit_andimm(sl,0x03e0,temp);
3547 emit_shl(temp,2,temp);
3548 emit_writeword(temp,(int)&reg_cop2d[10]);
3549 emit_andimm(sl,0x7c00,temp);
3550 emit_shr(temp,3,temp);
3551 emit_writeword(temp,(int)&reg_cop2d[11]);
3552 emit_writeword(sl,(int)&reg_cop2d[28]);
3553 break;
3554 case 30:
3555 emit_movs(sl,temp);
3556 emit_mvnmi(temp,temp);
3557 emit_clz(temp,temp);
3558 emit_writeword(sl,(int)&reg_cop2d[30]);
3559 emit_writeword(temp,(int)&reg_cop2d[31]);
3560 break;
3561 case 7:
3562 case 29:
3563 case 31:
3564 break;
3565 default:
3566 emit_writeword(sl,(int)&reg_cop2d[copr]);
3567 break;
3568 }
3569}
3570
3571void cop2_assemble(int i,struct regstat *i_regs)
3572{
3573 u_int copr=(source[i]>>11)&0x1f;
3574 signed char temp=get_reg(i_regs->regmap,-1);
3575 if (opcode2[i]==0) { // MFC2
3576 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3577 if(tl>=0)
3578 cop2_get_dreg(copr,tl,temp);
3579 }
3580 else if (opcode2[i]==4) { // MTC2
3581 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3582 cop2_put_dreg(copr,sl,temp);
3583 }
3584 else if (opcode2[i]==2) // CFC2
3585 {
3586 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3587 if(tl>=0)
3588 emit_readword((int)&reg_cop2c[copr],tl);
3589 }
3590 else if (opcode2[i]==6) // CTC2
3591 {
3592 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3593 switch(copr) {
3594 case 4:
3595 case 12:
3596 case 20:
3597 case 26:
3598 case 27:
3599 case 29:
3600 case 30:
3601 emit_signextend16(sl,temp);
3602 break;
3603 case 31:
3604 //value = value & 0x7ffff000;
3605 //if (value & 0x7f87e000) value |= 0x80000000;
3606 emit_shrimm(sl,12,temp);
3607 emit_shlimm(temp,12,temp);
3608 emit_testimm(temp,0x7f000000);
3609 emit_testeqimm(temp,0x00870000);
3610 emit_testeqimm(temp,0x0000e000);
3611 emit_orrne_imm(temp,0x80000000,temp);
3612 break;
3613 default:
3614 temp=sl;
3615 break;
3616 }
3617 emit_writeword(temp,(int)&reg_cop2c[copr]);
3618 assert(sl>=0);
3619 }
3620}
3621
3622void c2op_assemble(int i,struct regstat *i_regs)
3623{
3624 signed char temp=get_reg(i_regs->regmap,-1);
3625 u_int c2op=source[i]&0x3f;
3626 u_int hr,reglist=0;
3627 for(hr=0;hr<HOST_REGS;hr++) {
3628 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3629 }
3630 if(i==0||itype[i-1]!=C2OP)
3631 save_regs(reglist);
3632
3633 if (gte_handlers[c2op]!=NULL) {
3634 int cc=get_reg(i_regs->regmap,CCREG);
3635 emit_movimm(source[i],temp); // opcode
3636 if (cc>=0&&gte_cycletab[c2op])
3637 emit_addimm(cc,gte_cycletab[c2op]/2,cc); // XXX: cound just adjust ccadj?
3638 emit_writeword(temp,(int)&psxRegs.code);
3639 emit_call((int)gte_handlers[c2op]);
3640 }
3641
3642 if(i>=slen-1||itype[i+1]!=C2OP)
3643 restore_regs(reglist);
3644}
3645
3646void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 3647{
3648 // XXX: should just just do the exception instead
3649 if(!cop1_usable) {
3650 int jaddr=(int)out;
3651 emit_jmp(0);
3652 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3653 cop1_usable=1;
3654 }
3655}
3656
57871462 3657void cop1_assemble(int i,struct regstat *i_regs)
3658{
3d624f89 3659#ifndef DISABLE_COP1
57871462 3660 // Check cop1 unusable
3661 if(!cop1_usable) {
3662 signed char rs=get_reg(i_regs->regmap,CSREG);
3663 assert(rs>=0);
3664 emit_testimm(rs,0x20000000);
3665 int jaddr=(int)out;
3666 emit_jeq(0);
3667 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3668 cop1_usable=1;
3669 }
3670 if (opcode2[i]==0) { // MFC1
3671 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3672 if(tl>=0) {
3673 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
3674 emit_readword_indexed(0,tl,tl);
3675 }
3676 }
3677 else if (opcode2[i]==1) { // DMFC1
3678 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3679 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
3680 if(tl>=0) {
3681 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
3682 if(th>=0) emit_readword_indexed(4,tl,th);
3683 emit_readword_indexed(0,tl,tl);
3684 }
3685 }
3686 else if (opcode2[i]==4) { // MTC1
3687 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3688 signed char temp=get_reg(i_regs->regmap,-1);
3689 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3690 emit_writeword_indexed(sl,0,temp);
3691 }
3692 else if (opcode2[i]==5) { // DMTC1
3693 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3694 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
3695 signed char temp=get_reg(i_regs->regmap,-1);
3696 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3697 emit_writeword_indexed(sh,4,temp);
3698 emit_writeword_indexed(sl,0,temp);
3699 }
3700 else if (opcode2[i]==2) // CFC1
3701 {
3702 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3703 if(tl>=0) {
3704 u_int copr=(source[i]>>11)&0x1f;
3705 if(copr==0) emit_readword((int)&FCR0,tl);
3706 if(copr==31) emit_readword((int)&FCR31,tl);
3707 }
3708 }
3709 else if (opcode2[i]==6) // CTC1
3710 {
3711 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3712 u_int copr=(source[i]>>11)&0x1f;
3713 assert(sl>=0);
3714 if(copr==31)
3715 {
3716 emit_writeword(sl,(int)&FCR31);
3717 // Set the rounding mode
3718 //FIXME
3719 //char temp=get_reg(i_regs->regmap,-1);
3720 //emit_andimm(sl,3,temp);
3721 //emit_fldcw_indexed((int)&rounding_modes,temp);
3722 }
3723 }
3d624f89 3724#else
3725 cop1_unusable(i, i_regs);
3726#endif
57871462 3727}
3728
3729void fconv_assemble_arm(int i,struct regstat *i_regs)
3730{
3d624f89 3731#ifndef DISABLE_COP1
57871462 3732 signed char temp=get_reg(i_regs->regmap,-1);
3733 assert(temp>=0);
3734 // Check cop1 unusable
3735 if(!cop1_usable) {
3736 signed char rs=get_reg(i_regs->regmap,CSREG);
3737 assert(rs>=0);
3738 emit_testimm(rs,0x20000000);
3739 int jaddr=(int)out;
3740 emit_jeq(0);
3741 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3742 cop1_usable=1;
3743 }
3744
3745 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
3746 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
3747 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3748 emit_flds(temp,15);
3749 emit_ftosizs(15,15); // float->int, truncate
3750 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3751 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3752 emit_fsts(15,temp);
3753 return;
3754 }
3755 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
3756 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3757 emit_vldr(temp,7);
3758 emit_ftosizd(7,13); // double->int, truncate
3759 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3760 emit_fsts(13,temp);
3761 return;
3762 }
3763
3764 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
3765 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3766 emit_flds(temp,13);
3767 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3768 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3769 emit_fsitos(13,15);
3770 emit_fsts(15,temp);
3771 return;
3772 }
3773 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
3774 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3775 emit_flds(temp,13);
3776 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3777 emit_fsitod(13,7);
3778 emit_vstr(7,temp);
3779 return;
3780 }
3781
3782 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
3783 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3784 emit_flds(temp,13);
3785 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3786 emit_fcvtds(13,7);
3787 emit_vstr(7,temp);
3788 return;
3789 }
3790 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
3791 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3792 emit_vldr(temp,7);
3793 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3794 emit_fcvtsd(7,13);
3795 emit_fsts(13,temp);
3796 return;
3797 }
3798 #endif
3799
3800 // C emulation code
3801
3802 u_int hr,reglist=0;
3803 for(hr=0;hr<HOST_REGS;hr++) {
3804 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3805 }
3806 save_regs(reglist);
3807
3808 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
3809 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3810 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3811 emit_call((int)cvt_s_w);
3812 }
3813 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
3814 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3815 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3816 emit_call((int)cvt_d_w);
3817 }
3818 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
3819 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3820 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3821 emit_call((int)cvt_s_l);
3822 }
3823 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
3824 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3825 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3826 emit_call((int)cvt_d_l);
3827 }
3828
3829 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
3830 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3831 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3832 emit_call((int)cvt_d_s);
3833 }
3834 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
3835 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3836 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3837 emit_call((int)cvt_w_s);
3838 }
3839 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
3840 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3841 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3842 emit_call((int)cvt_l_s);
3843 }
3844
3845 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
3846 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3847 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3848 emit_call((int)cvt_s_d);
3849 }
3850 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
3851 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3852 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3853 emit_call((int)cvt_w_d);
3854 }
3855 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
3856 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3857 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3858 emit_call((int)cvt_l_d);
3859 }
3860
3861 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
3862 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3863 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3864 emit_call((int)round_l_s);
3865 }
3866 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
3867 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3868 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3869 emit_call((int)trunc_l_s);
3870 }
3871 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
3872 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3873 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3874 emit_call((int)ceil_l_s);
3875 }
3876 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
3877 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3878 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3879 emit_call((int)floor_l_s);
3880 }
3881 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
3882 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3883 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3884 emit_call((int)round_w_s);
3885 }
3886 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
3887 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3888 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3889 emit_call((int)trunc_w_s);
3890 }
3891 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
3892 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3893 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3894 emit_call((int)ceil_w_s);
3895 }
3896 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
3897 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3898 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3899 emit_call((int)floor_w_s);
3900 }
3901
3902 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
3903 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3904 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3905 emit_call((int)round_l_d);
3906 }
3907 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
3908 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3909 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3910 emit_call((int)trunc_l_d);
3911 }
3912 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
3913 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3914 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3915 emit_call((int)ceil_l_d);
3916 }
3917 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
3918 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3919 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3920 emit_call((int)floor_l_d);
3921 }
3922 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
3923 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3924 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3925 emit_call((int)round_w_d);
3926 }
3927 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
3928 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3929 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3930 emit_call((int)trunc_w_d);
3931 }
3932 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
3933 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3934 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3935 emit_call((int)ceil_w_d);
3936 }
3937 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
3938 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3939 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3940 emit_call((int)floor_w_d);
3941 }
3942
3943 restore_regs(reglist);
3d624f89 3944#else
3945 cop1_unusable(i, i_regs);
3946#endif
57871462 3947}
3948#define fconv_assemble fconv_assemble_arm
3949
3950void fcomp_assemble(int i,struct regstat *i_regs)
3951{
3d624f89 3952#ifndef DISABLE_COP1
57871462 3953 signed char fs=get_reg(i_regs->regmap,FSREG);
3954 signed char temp=get_reg(i_regs->regmap,-1);
3955 assert(temp>=0);
3956 // Check cop1 unusable
3957 if(!cop1_usable) {
3958 signed char cs=get_reg(i_regs->regmap,CSREG);
3959 assert(cs>=0);
3960 emit_testimm(cs,0x20000000);
3961 int jaddr=(int)out;
3962 emit_jeq(0);
3963 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
3964 cop1_usable=1;
3965 }
3966
3967 if((source[i]&0x3f)==0x30) {
3968 emit_andimm(fs,~0x800000,fs);
3969 return;
3970 }
3971
3972 if((source[i]&0x3e)==0x38) {
3973 // sf/ngle - these should throw exceptions for NaNs
3974 emit_andimm(fs,~0x800000,fs);
3975 return;
3976 }
3977
3978 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
3979 if(opcode2[i]==0x10) {
3980 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3981 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
3982 emit_orimm(fs,0x800000,fs);
3983 emit_flds(temp,14);
3984 emit_flds(HOST_TEMPREG,15);
3985 emit_fcmps(14,15);
3986 emit_fmstat();
3987 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
3988 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
3989 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
3990 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
3991 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
3992 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
3993 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
3994 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
3995 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
3996 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
3997 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
3998 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
3999 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
4000 return;
4001 }
4002 if(opcode2[i]==0x11) {
4003 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4004 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4005 emit_orimm(fs,0x800000,fs);
4006 emit_vldr(temp,6);
4007 emit_vldr(HOST_TEMPREG,7);
4008 emit_fcmpd(6,7);
4009 emit_fmstat();
4010 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
4011 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
4012 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
4013 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
4014 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
4015 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
4016 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
4017 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
4018 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
4019 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
4020 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
4021 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
4022 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
4023 return;
4024 }
4025 #endif
4026
4027 // C only
4028
4029 u_int hr,reglist=0;
4030 for(hr=0;hr<HOST_REGS;hr++) {
4031 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4032 }
4033 reglist&=~(1<<fs);
4034 save_regs(reglist);
4035 if(opcode2[i]==0x10) {
4036 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4037 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4038 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
4039 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
4040 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
4041 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
4042 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
4043 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
4044 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
4045 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
4046 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
4047 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
4048 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
4049 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
4050 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
4051 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
4052 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
4053 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
4054 }
4055 if(opcode2[i]==0x11) {
4056 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4057 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4058 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
4059 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
4060 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
4061 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
4062 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
4063 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
4064 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
4065 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
4066 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
4067 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
4068 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
4069 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
4070 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
4071 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
4072 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
4073 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
4074 }
4075 restore_regs(reglist);
4076 emit_loadreg(FSREG,fs);
3d624f89 4077#else
4078 cop1_unusable(i, i_regs);
4079#endif
57871462 4080}
4081
4082void float_assemble(int i,struct regstat *i_regs)
4083{
3d624f89 4084#ifndef DISABLE_COP1
57871462 4085 signed char temp=get_reg(i_regs->regmap,-1);
4086 assert(temp>=0);
4087 // Check cop1 unusable
4088 if(!cop1_usable) {
4089 signed char cs=get_reg(i_regs->regmap,CSREG);
4090 assert(cs>=0);
4091 emit_testimm(cs,0x20000000);
4092 int jaddr=(int)out;
4093 emit_jeq(0);
4094 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4095 cop1_usable=1;
4096 }
4097
4098 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4099 if((source[i]&0x3f)==6) // mov
4100 {
4101 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4102 if(opcode2[i]==0x10) {
4103 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4104 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
4105 emit_readword_indexed(0,temp,temp);
4106 emit_writeword_indexed(temp,0,HOST_TEMPREG);
4107 }
4108 if(opcode2[i]==0x11) {
4109 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4110 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
4111 emit_vldr(temp,7);
4112 emit_vstr(7,HOST_TEMPREG);
4113 }
4114 }
4115 return;
4116 }
4117
4118 if((source[i]&0x3f)>3)
4119 {
4120 if(opcode2[i]==0x10) {
4121 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4122 emit_flds(temp,15);
4123 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4124 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4125 }
4126 if((source[i]&0x3f)==4) // sqrt
4127 emit_fsqrts(15,15);
4128 if((source[i]&0x3f)==5) // abs
4129 emit_fabss(15,15);
4130 if((source[i]&0x3f)==7) // neg
4131 emit_fnegs(15,15);
4132 emit_fsts(15,temp);
4133 }
4134 if(opcode2[i]==0x11) {
4135 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4136 emit_vldr(temp,7);
4137 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4138 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4139 }
4140 if((source[i]&0x3f)==4) // sqrt
4141 emit_fsqrtd(7,7);
4142 if((source[i]&0x3f)==5) // abs
4143 emit_fabsd(7,7);
4144 if((source[i]&0x3f)==7) // neg
4145 emit_fnegd(7,7);
4146 emit_vstr(7,temp);
4147 }
4148 return;
4149 }
4150 if((source[i]&0x3f)<4)
4151 {
4152 if(opcode2[i]==0x10) {
4153 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4154 }
4155 if(opcode2[i]==0x11) {
4156 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4157 }
4158 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
4159 if(opcode2[i]==0x10) {
4160 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4161 emit_flds(temp,15);
4162 emit_flds(HOST_TEMPREG,13);
4163 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4164 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4165 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4166 }
4167 }
4168 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
4169 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
4170 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
4171 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
4172 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4173 emit_fsts(15,HOST_TEMPREG);
4174 }else{
4175 emit_fsts(15,temp);
4176 }
4177 }
4178 else if(opcode2[i]==0x11) {
4179 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4180 emit_vldr(temp,7);
4181 emit_vldr(HOST_TEMPREG,6);
4182 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4183 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4184 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4185 }
4186 }
4187 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
4188 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
4189 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
4190 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
4191 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4192 emit_vstr(7,HOST_TEMPREG);
4193 }else{
4194 emit_vstr(7,temp);
4195 }
4196 }
4197 }
4198 else {
4199 if(opcode2[i]==0x10) {
4200 emit_flds(temp,15);
4201 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4202 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4203 }
4204 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
4205 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
4206 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
4207 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
4208 emit_fsts(15,temp);
4209 }
4210 else if(opcode2[i]==0x11) {
4211 emit_vldr(temp,7);
4212 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4213 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4214 }
4215 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
4216 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
4217 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
4218 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
4219 emit_vstr(7,temp);
4220 }
4221 }
4222 return;
4223 }
4224 #endif
4225
4226 u_int hr,reglist=0;
4227 for(hr=0;hr<HOST_REGS;hr++) {
4228 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4229 }
4230 if(opcode2[i]==0x10) { // Single precision
4231 save_regs(reglist);
4232 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4233 if((source[i]&0x3f)<4) {
4234 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4235 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
4236 }else{
4237 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4238 }
4239 switch(source[i]&0x3f)
4240 {
4241 case 0x00: emit_call((int)add_s);break;
4242 case 0x01: emit_call((int)sub_s);break;
4243 case 0x02: emit_call((int)mul_s);break;
4244 case 0x03: emit_call((int)div_s);break;
4245 case 0x04: emit_call((int)sqrt_s);break;
4246 case 0x05: emit_call((int)abs_s);break;
4247 case 0x06: emit_call((int)mov_s);break;
4248 case 0x07: emit_call((int)neg_s);break;
4249 }
4250 restore_regs(reglist);
4251 }
4252 if(opcode2[i]==0x11) { // Double precision
4253 save_regs(reglist);
4254 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4255 if((source[i]&0x3f)<4) {
4256 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4257 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
4258 }else{
4259 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4260 }
4261 switch(source[i]&0x3f)
4262 {
4263 case 0x00: emit_call((int)add_d);break;
4264 case 0x01: emit_call((int)sub_d);break;
4265 case 0x02: emit_call((int)mul_d);break;
4266 case 0x03: emit_call((int)div_d);break;
4267 case 0x04: emit_call((int)sqrt_d);break;
4268 case 0x05: emit_call((int)abs_d);break;
4269 case 0x06: emit_call((int)mov_d);break;
4270 case 0x07: emit_call((int)neg_d);break;
4271 }
4272 restore_regs(reglist);
4273 }
3d624f89 4274#else
4275 cop1_unusable(i, i_regs);
4276#endif
57871462 4277}
4278
4279void multdiv_assemble_arm(int i,struct regstat *i_regs)
4280{
4281 // case 0x18: MULT
4282 // case 0x19: MULTU
4283 // case 0x1A: DIV
4284 // case 0x1B: DIVU
4285 // case 0x1C: DMULT
4286 // case 0x1D: DMULTU
4287 // case 0x1E: DDIV
4288 // case 0x1F: DDIVU
4289 if(rs1[i]&&rs2[i])
4290 {
4291 if((opcode2[i]&4)==0) // 32-bit
4292 {
4293 if(opcode2[i]==0x18) // MULT
4294 {
4295 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4296 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4297 signed char hi=get_reg(i_regs->regmap,HIREG);
4298 signed char lo=get_reg(i_regs->regmap,LOREG);
4299 assert(m1>=0);
4300 assert(m2>=0);
4301 assert(hi>=0);
4302 assert(lo>=0);
4303 emit_smull(m1,m2,hi,lo);
4304 }
4305 if(opcode2[i]==0x19) // MULTU
4306 {
4307 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4308 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4309 signed char hi=get_reg(i_regs->regmap,HIREG);
4310 signed char lo=get_reg(i_regs->regmap,LOREG);
4311 assert(m1>=0);
4312 assert(m2>=0);
4313 assert(hi>=0);
4314 assert(lo>=0);
4315 emit_umull(m1,m2,hi,lo);
4316 }
4317 if(opcode2[i]==0x1A) // DIV
4318 {
4319 signed char d1=get_reg(i_regs->regmap,rs1[i]);
4320 signed char d2=get_reg(i_regs->regmap,rs2[i]);
4321 assert(d1>=0);
4322 assert(d2>=0);
4323 signed char quotient=get_reg(i_regs->regmap,LOREG);
4324 signed char remainder=get_reg(i_regs->regmap,HIREG);
4325 assert(quotient>=0);
4326 assert(remainder>=0);
4327 emit_movs(d1,remainder);
4328 emit_negmi(remainder,remainder);
4329 emit_movs(d2,HOST_TEMPREG);
4330 emit_jeq((int)out+52); // Division by zero
4331 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
4332 emit_clz(HOST_TEMPREG,quotient);
4333 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
4334 emit_orimm(quotient,1<<31,quotient);
4335 emit_shr(quotient,quotient,quotient);
4336 emit_cmp(remainder,HOST_TEMPREG);
4337 emit_subcs(remainder,HOST_TEMPREG,remainder);
4338 emit_adcs(quotient,quotient,quotient);
4339 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
4340 emit_jcc((int)out-16); // -4
4341 emit_teq(d1,d2);
4342 emit_negmi(quotient,quotient);
4343 emit_test(d1,d1);
4344 emit_negmi(remainder,remainder);
4345 }
4346 if(opcode2[i]==0x1B) // DIVU
4347 {
4348 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
4349 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
4350 assert(d1>=0);
4351 assert(d2>=0);
4352 signed char quotient=get_reg(i_regs->regmap,LOREG);
4353 signed char remainder=get_reg(i_regs->regmap,HIREG);
4354 assert(quotient>=0);
4355 assert(remainder>=0);
4356 emit_test(d2,d2);
4357 emit_jeq((int)out+44); // Division by zero
4358 emit_clz(d2,HOST_TEMPREG);
4359 emit_movimm(1<<31,quotient);
4360 emit_shl(d2,HOST_TEMPREG,d2);
4361 emit_mov(d1,remainder);
4362 emit_shr(quotient,HOST_TEMPREG,quotient);
4363 emit_cmp(remainder,d2);
4364 emit_subcs(remainder,d2,remainder);
4365 emit_adcs(quotient,quotient,quotient);
4366 emit_shrcc_imm(d2,1,d2);
4367 emit_jcc((int)out-16); // -4
4368 }
4369 }
4370 else // 64-bit
4371 {
4372 if(opcode2[i]==0x1C) // DMULT
4373 {
4374 assert(opcode2[i]!=0x1C);
4375 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4376 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4377 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4378 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4379 assert(m1h>=0);
4380 assert(m2h>=0);
4381 assert(m1l>=0);
4382 assert(m2l>=0);
4383 emit_pushreg(m2h);
4384 emit_pushreg(m2l);
4385 emit_pushreg(m1h);
4386 emit_pushreg(m1l);
4387 emit_call((int)&mult64);
4388 emit_popreg(m1l);
4389 emit_popreg(m1h);
4390 emit_popreg(m2l);
4391 emit_popreg(m2h);
4392 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4393 signed char hil=get_reg(i_regs->regmap,HIREG);
4394 if(hih>=0) emit_loadreg(HIREG|64,hih);
4395 if(hil>=0) emit_loadreg(HIREG,hil);
4396 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4397 signed char lol=get_reg(i_regs->regmap,LOREG);
4398 if(loh>=0) emit_loadreg(LOREG|64,loh);
4399 if(lol>=0) emit_loadreg(LOREG,lol);
4400 }
4401 if(opcode2[i]==0x1D) // DMULTU
4402 {
4403 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4404 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4405 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4406 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4407 assert(m1h>=0);
4408 assert(m2h>=0);
4409 assert(m1l>=0);
4410 assert(m2l>=0);
4411 save_regs(0x100f);
4412 if(m1l!=0) emit_mov(m1l,0);
4413 if(m1h==0) emit_readword((int)&dynarec_local,1);
4414 else if(m1h>1) emit_mov(m1h,1);
4415 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
4416 else if(m2l>2) emit_mov(m2l,2);
4417 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
4418 else if(m2h>3) emit_mov(m2h,3);
4419 emit_call((int)&multu64);
4420 restore_regs(0x100f);
4421 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4422 signed char hil=get_reg(i_regs->regmap,HIREG);
4423 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4424 signed char lol=get_reg(i_regs->regmap,LOREG);
4425 /*signed char temp=get_reg(i_regs->regmap,-1);
4426 signed char rh=get_reg(i_regs->regmap,HIREG|64);
4427 signed char rl=get_reg(i_regs->regmap,HIREG);
4428 assert(m1h>=0);
4429 assert(m2h>=0);
4430 assert(m1l>=0);
4431 assert(m2l>=0);
4432 assert(temp>=0);
4433 //emit_mov(m1l,EAX);
4434 //emit_mul(m2l);
4435 emit_umull(rl,rh,m1l,m2l);
4436 emit_storereg(LOREG,rl);
4437 emit_mov(rh,temp);
4438 //emit_mov(m1h,EAX);
4439 //emit_mul(m2l);
4440 emit_umull(rl,rh,m1h,m2l);
4441 emit_adds(rl,temp,temp);
4442 emit_adcimm(rh,0,rh);
4443 emit_storereg(HIREG,rh);
4444 //emit_mov(m2h,EAX);
4445 //emit_mul(m1l);
4446 emit_umull(rl,rh,m1l,m2h);
4447 emit_adds(rl,temp,temp);
4448 emit_adcimm(rh,0,rh);
4449 emit_storereg(LOREG|64,temp);
4450 emit_mov(rh,temp);
4451 //emit_mov(m2h,EAX);
4452 //emit_mul(m1h);
4453 emit_umull(rl,rh,m1h,m2h);
4454 emit_adds(rl,temp,rl);
4455 emit_loadreg(HIREG,temp);
4456 emit_adcimm(rh,0,rh);
4457 emit_adds(rl,temp,rl);
4458 emit_adcimm(rh,0,rh);
4459 // DEBUG
4460 /*
4461 emit_pushreg(m2h);
4462 emit_pushreg(m2l);
4463 emit_pushreg(m1h);
4464 emit_pushreg(m1l);
4465 emit_call((int)&multu64);
4466 emit_popreg(m1l);
4467 emit_popreg(m1h);
4468 emit_popreg(m2l);
4469 emit_popreg(m2h);
4470 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4471 signed char hil=get_reg(i_regs->regmap,HIREG);
4472 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
4473 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
4474 */
4475 // Shouldn't be necessary
4476 //char loh=get_reg(i_regs->regmap,LOREG|64);
4477 //char lol=get_reg(i_regs->regmap,LOREG);
4478 //if(loh>=0) emit_loadreg(LOREG|64,loh);
4479 //if(lol>=0) emit_loadreg(LOREG,lol);
4480 }
4481 if(opcode2[i]==0x1E) // DDIV
4482 {
4483 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4484 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4485 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4486 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4487 assert(d1h>=0);
4488 assert(d2h>=0);
4489 assert(d1l>=0);
4490 assert(d2l>=0);
4491 save_regs(0x100f);
4492 if(d1l!=0) emit_mov(d1l,0);
4493 if(d1h==0) emit_readword((int)&dynarec_local,1);
4494 else if(d1h>1) emit_mov(d1h,1);
4495 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4496 else if(d2l>2) emit_mov(d2l,2);
4497 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4498 else if(d2h>3) emit_mov(d2h,3);
4499 emit_call((int)&div64);
4500 restore_regs(0x100f);
4501 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4502 signed char hil=get_reg(i_regs->regmap,HIREG);
4503 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4504 signed char lol=get_reg(i_regs->regmap,LOREG);
4505 if(hih>=0) emit_loadreg(HIREG|64,hih);
4506 if(hil>=0) emit_loadreg(HIREG,hil);
4507 if(loh>=0) emit_loadreg(LOREG|64,loh);
4508 if(lol>=0) emit_loadreg(LOREG,lol);
4509 }
4510 if(opcode2[i]==0x1F) // DDIVU
4511 {
4512 //u_int hr,reglist=0;
4513 //for(hr=0;hr<HOST_REGS;hr++) {
4514 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
4515 //}
4516 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4517 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4518 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4519 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4520 assert(d1h>=0);
4521 assert(d2h>=0);
4522 assert(d1l>=0);
4523 assert(d2l>=0);
4524 save_regs(0x100f);
4525 if(d1l!=0) emit_mov(d1l,0);
4526 if(d1h==0) emit_readword((int)&dynarec_local,1);
4527 else if(d1h>1) emit_mov(d1h,1);
4528 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4529 else if(d2l>2) emit_mov(d2l,2);
4530 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4531 else if(d2h>3) emit_mov(d2h,3);
4532 emit_call((int)&divu64);
4533 restore_regs(0x100f);
4534 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4535 signed char hil=get_reg(i_regs->regmap,HIREG);
4536 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4537 signed char lol=get_reg(i_regs->regmap,LOREG);
4538 if(hih>=0) emit_loadreg(HIREG|64,hih);
4539 if(hil>=0) emit_loadreg(HIREG,hil);
4540 if(loh>=0) emit_loadreg(LOREG|64,loh);
4541 if(lol>=0) emit_loadreg(LOREG,lol);
4542 }
4543 }
4544 }
4545 else
4546 {
4547 // Multiply by zero is zero.
4548 // MIPS does not have a divide by zero exception.
4549 // The result is undefined, we return zero.
4550 signed char hr=get_reg(i_regs->regmap,HIREG);
4551 signed char lr=get_reg(i_regs->regmap,LOREG);
4552 if(hr>=0) emit_zeroreg(hr);
4553 if(lr>=0) emit_zeroreg(lr);
4554 }
4555}
4556#define multdiv_assemble multdiv_assemble_arm
4557
4558void do_preload_rhash(int r) {
4559 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
4560 // register. On ARM the hash can be done with a single instruction (below)
4561}
4562
4563void do_preload_rhtbl(int ht) {
4564 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
4565}
4566
4567void do_rhash(int rs,int rh) {
4568 emit_andimm(rs,0xf8,rh);
4569}
4570
4571void do_miniht_load(int ht,int rh) {
4572 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
4573 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
4574}
4575
4576void do_miniht_jump(int rs,int rh,int ht) {
4577 emit_cmp(rh,rs);
4578 emit_ldreq_indexed(ht,4,15);
4579 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4580 emit_mov(rs,7);
4581 emit_jmp(jump_vaddr_reg[7]);
4582 #else
4583 emit_jmp(jump_vaddr_reg[rs]);
4584 #endif
4585}
4586
4587void do_miniht_insert(u_int return_address,int rt,int temp) {
4588 #ifdef ARMv5_ONLY
4589 emit_movimm(return_address,rt); // PC into link register
4590 add_to_linker((int)out,return_address,1);
4591 emit_pcreladdr(temp);
4592 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4593 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4594 #else
4595 emit_movw(return_address&0x0000FFFF,rt);
4596 add_to_linker((int)out,return_address,1);
4597 emit_pcreladdr(temp);
4598 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4599 emit_movt(return_address&0xFFFF0000,rt);
4600 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4601 #endif
4602}
4603
4604// Sign-extend to 64 bits and write out upper half of a register
4605// This is useful where we have a 32-bit value in a register, and want to
4606// keep it in a 32-bit register, but can't guarantee that it won't be read
4607// as a 64-bit value later.
4608void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
4609{
24385cae 4610#ifndef FORCE32
57871462 4611 if(is32_pre==is32) return;
4612 int hr,reg;
4613 for(hr=0;hr<HOST_REGS;hr++) {
4614 if(hr!=EXCLUDE_REG) {
4615 //if(pre[hr]==entry[hr]) {
4616 if((reg=pre[hr])>=0) {
4617 if((dirty>>hr)&1) {
4618 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
4619 emit_sarimm(hr,31,HOST_TEMPREG);
4620 emit_storereg(reg|64,HOST_TEMPREG);
4621 }
4622 }
4623 }
4624 //}
4625 }
4626 }
24385cae 4627#endif
57871462 4628}
4629
4630void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
4631{
4632 //if(dirty_pre==dirty) return;
4633 int hr,reg,new_hr;
4634 for(hr=0;hr<HOST_REGS;hr++) {
4635 if(hr!=EXCLUDE_REG) {
4636 reg=pre[hr];
4637 if(((~u)>>(reg&63))&1) {
4638 if(reg==entry[hr]||(reg>0&&entry[hr]<0)) {
4639 if(((dirty_pre&~dirty)>>hr)&1) {
4640 if(reg>0&&reg<34) {
4641 emit_storereg(reg,hr);
4642 if( ((is32_pre&~uu)>>reg)&1 ) {
4643 emit_sarimm(hr,31,HOST_TEMPREG);
4644 emit_storereg(reg|64,HOST_TEMPREG);
4645 }
4646 }
4647 else if(reg>=64) {
4648 emit_storereg(reg,hr);
4649 }
4650 }
4651 }
4652 else // Check if register moved to a different register
4653 if((new_hr=get_reg(entry,reg))>=0) {
4654 if((dirty_pre>>hr)&(~dirty>>new_hr)&1) {
4655 if(reg>0&&reg<34) {
4656 emit_storereg(reg,hr);
4657 if( ((is32_pre&~uu)>>reg)&1 ) {
4658 emit_sarimm(hr,31,HOST_TEMPREG);
4659 emit_storereg(reg|64,HOST_TEMPREG);
4660 }
4661 }
4662 else if(reg>=64) {
4663 emit_storereg(reg,hr);
4664 }
4665 }
4666 }
4667 }
4668 }
4669 }
4670}
4671
4672
4673/* using strd could possibly help but you'd have to allocate registers in pairs
4674void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
4675{
4676 int hr;
4677 int wrote=-1;
4678 for(hr=HOST_REGS-1;hr>=0;hr--) {
4679 if(hr!=EXCLUDE_REG) {
4680 if(pre[hr]!=entry[hr]) {
4681 if(pre[hr]>=0) {
4682 if((dirty>>hr)&1) {
4683 if(get_reg(entry,pre[hr])<0) {
4684 if(pre[hr]<64) {
4685 if(!((u>>pre[hr])&1)) {
4686 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4687 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4688 emit_sarimm(hr,31,hr+1);
4689 emit_strdreg(pre[hr],hr);
4690 }
4691 else
4692 emit_storereg(pre[hr],hr);
4693 }else{
4694 emit_storereg(pre[hr],hr);
4695 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4696 emit_sarimm(hr,31,hr);
4697 emit_storereg(pre[hr]|64,hr);
4698 }
4699 }
4700 }
4701 }else{
4702 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4703 emit_storereg(pre[hr],hr);
4704 }
4705 }
4706 wrote=hr;
4707 }
4708 }
4709 }
4710 }
4711 }
4712 }
4713 for(hr=0;hr<HOST_REGS;hr++) {
4714 if(hr!=EXCLUDE_REG) {
4715 if(pre[hr]!=entry[hr]) {
4716 if(pre[hr]>=0) {
4717 int nr;
4718 if((nr=get_reg(entry,pre[hr]))>=0) {
4719 emit_mov(hr,nr);
4720 }
4721 }
4722 }
4723 }
4724 }
4725}
4726#define wb_invalidate wb_invalidate_arm
4727*/
4728
4729// CPU-architecture-specific initialization
4730void arch_init() {
3d624f89 4731#ifndef DISABLE_COP1
57871462 4732 rounding_modes[0]=0x0<<22; // round
4733 rounding_modes[1]=0x3<<22; // trunc
4734 rounding_modes[2]=0x1<<22; // ceil
4735 rounding_modes[3]=0x2<<22; // floor
3d624f89 4736#endif
57871462 4737}
b9b61529 4738
4739// vim:shiftwidth=2:expandtab