drc: don't clear ARM caches on whole translation cache - it's very slow
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_arm.c *
3 * Copyright (C) 2009-2010 Ari64 *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21extern int cycle_count;
22extern int last_count;
23extern int pcaddr;
24extern int pending_exception;
25extern int branch_target;
26extern uint64_t readmem_dword;
3d624f89 27#ifdef MUPEN64
57871462 28extern precomp_instr fake_pc;
3d624f89 29#endif
57871462 30extern void *dynarec_local;
31extern u_int memory_map[1048576];
32extern u_int mini_ht[32][2];
33extern u_int rounding_modes[4];
34
35void indirect_jump_indexed();
36void indirect_jump();
37void do_interrupt();
38void jump_vaddr_r0();
39void jump_vaddr_r1();
40void jump_vaddr_r2();
41void jump_vaddr_r3();
42void jump_vaddr_r4();
43void jump_vaddr_r5();
44void jump_vaddr_r6();
45void jump_vaddr_r7();
46void jump_vaddr_r8();
47void jump_vaddr_r9();
48void jump_vaddr_r10();
49void jump_vaddr_r12();
50
51const u_int jump_vaddr_reg[16] = {
52 (int)jump_vaddr_r0,
53 (int)jump_vaddr_r1,
54 (int)jump_vaddr_r2,
55 (int)jump_vaddr_r3,
56 (int)jump_vaddr_r4,
57 (int)jump_vaddr_r5,
58 (int)jump_vaddr_r6,
59 (int)jump_vaddr_r7,
60 (int)jump_vaddr_r8,
61 (int)jump_vaddr_r9,
62 (int)jump_vaddr_r10,
63 0,
64 (int)jump_vaddr_r12,
65 0,
66 0,
67 0};
68
69#include "fpu.h"
70
71/* Linker */
72
73void set_jump_target(int addr,u_int target)
74{
75 u_char *ptr=(u_char *)addr;
76 u_int *ptr2=(u_int *)ptr;
77 if(ptr[3]==0xe2) {
78 assert((target-(u_int)ptr2-8)<1024);
79 assert((addr&3)==0);
80 assert((target&3)==0);
81 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
82 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
83 }
84 else if(ptr[3]==0x72) {
85 // generated by emit_jno_unlikely
86 if((target-(u_int)ptr2-8)<1024) {
87 assert((addr&3)==0);
88 assert((target&3)==0);
89 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
90 }
91 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
92 assert((addr&3)==0);
93 assert((target&3)==0);
94 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
95 }
96 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
97 }
98 else {
99 assert((ptr[3]&0x0e)==0xa);
100 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
101 }
102}
103
104// This optionally copies the instruction from the target of the branch into
105// the space before the branch. Works, but the difference in speed is
106// usually insignificant.
107void set_jump_target_fillslot(int addr,u_int target,int copy)
108{
109 u_char *ptr=(u_char *)addr;
110 u_int *ptr2=(u_int *)ptr;
111 assert(!copy||ptr2[-1]==0xe28dd000);
112 if(ptr[3]==0xe2) {
113 assert(!copy);
114 assert((target-(u_int)ptr2-8)<4096);
115 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
116 }
117 else {
118 assert((ptr[3]&0x0e)==0xa);
119 u_int target_insn=*(u_int *)target;
120 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
121 copy=0;
122 }
123 if((target_insn&0x0c100000)==0x04100000) { // Load
124 copy=0;
125 }
126 if(target_insn&0x08000000) {
127 copy=0;
128 }
129 if(copy) {
130 ptr2[-1]=target_insn;
131 target+=4;
132 }
133 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
134 }
135}
136
137/* Literal pool */
138add_literal(int addr,int val)
139{
140 literals[literalcount][0]=addr;
141 literals[literalcount][1]=val;
142 literalcount++;
143}
144
f76eeef9 145void *kill_pointer(void *stub)
57871462 146{
147 int *ptr=(int *)(stub+4);
148 assert((*ptr&0x0ff00000)==0x05900000);
149 u_int offset=*ptr&0xfff;
150 int **l_ptr=(void *)ptr+offset+8;
151 int *i_ptr=*l_ptr;
152 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 153 return i_ptr;
57871462 154}
155
156int get_pointer(void *stub)
157{
158 //printf("get_pointer(%x)\n",(int)stub);
159 int *ptr=(int *)(stub+4);
160 assert((*ptr&0x0ff00000)==0x05900000);
161 u_int offset=*ptr&0xfff;
162 int **l_ptr=(void *)ptr+offset+8;
163 int *i_ptr=*l_ptr;
164 assert((*i_ptr&0x0f000000)==0x0a000000);
165 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
166}
167
168// Find the "clean" entry point from a "dirty" entry point
169// by skipping past the call to verify_code
170u_int get_clean_addr(int addr)
171{
172 int *ptr=(int *)addr;
173 #ifdef ARMv5_ONLY
174 ptr+=4;
175 #else
176 ptr+=6;
177 #endif
178 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
179 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
180 ptr++;
181 if((*ptr&0xFF000000)==0xea000000) {
182 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
183 }
184 return (u_int)ptr;
185}
186
187int verify_dirty(int addr)
188{
189 u_int *ptr=(u_int *)addr;
190 #ifdef ARMv5_ONLY
191 // get from literal pool
192 assert((*ptr&0xFFF00000)==0xe5900000);
193 u_int offset=*ptr&0xfff;
194 u_int *l_ptr=(void *)ptr+offset+8;
195 u_int source=l_ptr[0];
196 u_int copy=l_ptr[1];
197 u_int len=l_ptr[2];
198 ptr+=4;
199 #else
200 // ARMv7 movw/movt
201 assert((*ptr&0xFFF00000)==0xe3000000);
202 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
203 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
204 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
205 ptr+=6;
206 #endif
207 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
208 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 209 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 210 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
211 unsigned int page=source>>12;
212 unsigned int map_value=memory_map[page];
213 if(map_value>=0x80000000) return 0;
214 while(page<((source+len-1)>>12)) {
215 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
216 }
217 source = source+(map_value<<2);
218 }
219 //printf("verify_dirty: %x %x %x\n",source,copy,len);
220 return !memcmp((void *)source,(void *)copy,len);
221}
222
223// This doesn't necessarily find all clean entry points, just
224// guarantees that it's not dirty
225int isclean(int addr)
226{
227 #ifdef ARMv5_ONLY
228 int *ptr=((u_int *)addr)+4;
229 #else
230 int *ptr=((u_int *)addr)+6;
231 #endif
232 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
233 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
234 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
235 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
236 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
237 return 1;
238}
239
240void get_bounds(int addr,u_int *start,u_int *end)
241{
242 u_int *ptr=(u_int *)addr;
243 #ifdef ARMv5_ONLY
244 // get from literal pool
245 assert((*ptr&0xFFF00000)==0xe5900000);
246 u_int offset=*ptr&0xfff;
247 u_int *l_ptr=(void *)ptr+offset+8;
248 u_int source=l_ptr[0];
249 //u_int copy=l_ptr[1];
250 u_int len=l_ptr[2];
251 ptr+=4;
252 #else
253 // ARMv7 movw/movt
254 assert((*ptr&0xFFF00000)==0xe3000000);
255 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
256 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
257 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
258 ptr+=6;
259 #endif
260 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
261 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
cfcba99a 262 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 263 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
264 if(memory_map[source>>12]>=0x80000000) source = 0;
265 else source = source+(memory_map[source>>12]<<2);
266 }
267 *start=source;
268 *end=source+len;
269}
270
271/* Register allocation */
272
273// Note: registers are allocated clean (unmodified state)
274// if you intend to modify the register, you must call dirty_reg().
275void alloc_reg(struct regstat *cur,int i,signed char reg)
276{
277 int r,hr;
278 int preferred_reg = (reg&7);
279 if(reg==CCREG) preferred_reg=HOST_CCREG;
280 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
281
282 // Don't allocate unused registers
283 if((cur->u>>reg)&1) return;
284
285 // see if it's already allocated
286 for(hr=0;hr<HOST_REGS;hr++)
287 {
288 if(cur->regmap[hr]==reg) return;
289 }
290
291 // Keep the same mapping if the register was already allocated in a loop
292 preferred_reg = loop_reg(i,reg,preferred_reg);
293
294 // Try to allocate the preferred register
295 if(cur->regmap[preferred_reg]==-1) {
296 cur->regmap[preferred_reg]=reg;
297 cur->dirty&=~(1<<preferred_reg);
298 cur->isconst&=~(1<<preferred_reg);
299 return;
300 }
301 r=cur->regmap[preferred_reg];
302 if(r<64&&((cur->u>>r)&1)) {
303 cur->regmap[preferred_reg]=reg;
304 cur->dirty&=~(1<<preferred_reg);
305 cur->isconst&=~(1<<preferred_reg);
306 return;
307 }
308 if(r>=64&&((cur->uu>>(r&63))&1)) {
309 cur->regmap[preferred_reg]=reg;
310 cur->dirty&=~(1<<preferred_reg);
311 cur->isconst&=~(1<<preferred_reg);
312 return;
313 }
314
315 // Clear any unneeded registers
316 // We try to keep the mapping consistent, if possible, because it
317 // makes branches easier (especially loops). So we try to allocate
318 // first (see above) before removing old mappings. If this is not
319 // possible then go ahead and clear out the registers that are no
320 // longer needed.
321 for(hr=0;hr<HOST_REGS;hr++)
322 {
323 r=cur->regmap[hr];
324 if(r>=0) {
325 if(r<64) {
326 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
327 }
328 else
329 {
330 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
331 }
332 }
333 }
334 // Try to allocate any available register, but prefer
335 // registers that have not been used recently.
336 if(i>0) {
337 for(hr=0;hr<HOST_REGS;hr++) {
338 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
339 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
340 cur->regmap[hr]=reg;
341 cur->dirty&=~(1<<hr);
342 cur->isconst&=~(1<<hr);
343 return;
344 }
345 }
346 }
347 }
348 // Try to allocate any available register
349 for(hr=0;hr<HOST_REGS;hr++) {
350 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
351 cur->regmap[hr]=reg;
352 cur->dirty&=~(1<<hr);
353 cur->isconst&=~(1<<hr);
354 return;
355 }
356 }
357
358 // Ok, now we have to evict someone
359 // Pick a register we hopefully won't need soon
360 u_char hsn[MAXREG+1];
361 memset(hsn,10,sizeof(hsn));
362 int j;
363 lsn(hsn,i,&preferred_reg);
364 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
365 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
366 if(i>0) {
367 // Don't evict the cycle count at entry points, otherwise the entry
368 // stub will have to write it.
369 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
370 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
371 for(j=10;j>=3;j--)
372 {
373 // Alloc preferred register if available
374 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
375 for(hr=0;hr<HOST_REGS;hr++) {
376 // Evict both parts of a 64-bit register
377 if((cur->regmap[hr]&63)==r) {
378 cur->regmap[hr]=-1;
379 cur->dirty&=~(1<<hr);
380 cur->isconst&=~(1<<hr);
381 }
382 }
383 cur->regmap[preferred_reg]=reg;
384 return;
385 }
386 for(r=1;r<=MAXREG;r++)
387 {
388 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
389 for(hr=0;hr<HOST_REGS;hr++) {
390 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
391 if(cur->regmap[hr]==r+64) {
392 cur->regmap[hr]=reg;
393 cur->dirty&=~(1<<hr);
394 cur->isconst&=~(1<<hr);
395 return;
396 }
397 }
398 }
399 for(hr=0;hr<HOST_REGS;hr++) {
400 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
401 if(cur->regmap[hr]==r) {
402 cur->regmap[hr]=reg;
403 cur->dirty&=~(1<<hr);
404 cur->isconst&=~(1<<hr);
405 return;
406 }
407 }
408 }
409 }
410 }
411 }
412 }
413 for(j=10;j>=0;j--)
414 {
415 for(r=1;r<=MAXREG;r++)
416 {
417 if(hsn[r]==j) {
418 for(hr=0;hr<HOST_REGS;hr++) {
419 if(cur->regmap[hr]==r+64) {
420 cur->regmap[hr]=reg;
421 cur->dirty&=~(1<<hr);
422 cur->isconst&=~(1<<hr);
423 return;
424 }
425 }
426 for(hr=0;hr<HOST_REGS;hr++) {
427 if(cur->regmap[hr]==r) {
428 cur->regmap[hr]=reg;
429 cur->dirty&=~(1<<hr);
430 cur->isconst&=~(1<<hr);
431 return;
432 }
433 }
434 }
435 }
436 }
437 printf("This shouldn't happen (alloc_reg)");exit(1);
438}
439
440void alloc_reg64(struct regstat *cur,int i,signed char reg)
441{
442 int preferred_reg = 8+(reg&1);
443 int r,hr;
444
445 // allocate the lower 32 bits
446 alloc_reg(cur,i,reg);
447
448 // Don't allocate unused registers
449 if((cur->uu>>reg)&1) return;
450
451 // see if the upper half is already allocated
452 for(hr=0;hr<HOST_REGS;hr++)
453 {
454 if(cur->regmap[hr]==reg+64) return;
455 }
456
457 // Keep the same mapping if the register was already allocated in a loop
458 preferred_reg = loop_reg(i,reg,preferred_reg);
459
460 // Try to allocate the preferred register
461 if(cur->regmap[preferred_reg]==-1) {
462 cur->regmap[preferred_reg]=reg|64;
463 cur->dirty&=~(1<<preferred_reg);
464 cur->isconst&=~(1<<preferred_reg);
465 return;
466 }
467 r=cur->regmap[preferred_reg];
468 if(r<64&&((cur->u>>r)&1)) {
469 cur->regmap[preferred_reg]=reg|64;
470 cur->dirty&=~(1<<preferred_reg);
471 cur->isconst&=~(1<<preferred_reg);
472 return;
473 }
474 if(r>=64&&((cur->uu>>(r&63))&1)) {
475 cur->regmap[preferred_reg]=reg|64;
476 cur->dirty&=~(1<<preferred_reg);
477 cur->isconst&=~(1<<preferred_reg);
478 return;
479 }
480
481 // Clear any unneeded registers
482 // We try to keep the mapping consistent, if possible, because it
483 // makes branches easier (especially loops). So we try to allocate
484 // first (see above) before removing old mappings. If this is not
485 // possible then go ahead and clear out the registers that are no
486 // longer needed.
487 for(hr=HOST_REGS-1;hr>=0;hr--)
488 {
489 r=cur->regmap[hr];
490 if(r>=0) {
491 if(r<64) {
492 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
493 }
494 else
495 {
496 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
497 }
498 }
499 }
500 // Try to allocate any available register, but prefer
501 // registers that have not been used recently.
502 if(i>0) {
503 for(hr=0;hr<HOST_REGS;hr++) {
504 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
505 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
506 cur->regmap[hr]=reg|64;
507 cur->dirty&=~(1<<hr);
508 cur->isconst&=~(1<<hr);
509 return;
510 }
511 }
512 }
513 }
514 // Try to allocate any available register
515 for(hr=0;hr<HOST_REGS;hr++) {
516 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
517 cur->regmap[hr]=reg|64;
518 cur->dirty&=~(1<<hr);
519 cur->isconst&=~(1<<hr);
520 return;
521 }
522 }
523
524 // Ok, now we have to evict someone
525 // Pick a register we hopefully won't need soon
526 u_char hsn[MAXREG+1];
527 memset(hsn,10,sizeof(hsn));
528 int j;
529 lsn(hsn,i,&preferred_reg);
530 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
531 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
532 if(i>0) {
533 // Don't evict the cycle count at entry points, otherwise the entry
534 // stub will have to write it.
535 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
536 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
537 for(j=10;j>=3;j--)
538 {
539 // Alloc preferred register if available
540 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
541 for(hr=0;hr<HOST_REGS;hr++) {
542 // Evict both parts of a 64-bit register
543 if((cur->regmap[hr]&63)==r) {
544 cur->regmap[hr]=-1;
545 cur->dirty&=~(1<<hr);
546 cur->isconst&=~(1<<hr);
547 }
548 }
549 cur->regmap[preferred_reg]=reg|64;
550 return;
551 }
552 for(r=1;r<=MAXREG;r++)
553 {
554 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
555 for(hr=0;hr<HOST_REGS;hr++) {
556 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
557 if(cur->regmap[hr]==r+64) {
558 cur->regmap[hr]=reg|64;
559 cur->dirty&=~(1<<hr);
560 cur->isconst&=~(1<<hr);
561 return;
562 }
563 }
564 }
565 for(hr=0;hr<HOST_REGS;hr++) {
566 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
567 if(cur->regmap[hr]==r) {
568 cur->regmap[hr]=reg|64;
569 cur->dirty&=~(1<<hr);
570 cur->isconst&=~(1<<hr);
571 return;
572 }
573 }
574 }
575 }
576 }
577 }
578 }
579 for(j=10;j>=0;j--)
580 {
581 for(r=1;r<=MAXREG;r++)
582 {
583 if(hsn[r]==j) {
584 for(hr=0;hr<HOST_REGS;hr++) {
585 if(cur->regmap[hr]==r+64) {
586 cur->regmap[hr]=reg|64;
587 cur->dirty&=~(1<<hr);
588 cur->isconst&=~(1<<hr);
589 return;
590 }
591 }
592 for(hr=0;hr<HOST_REGS;hr++) {
593 if(cur->regmap[hr]==r) {
594 cur->regmap[hr]=reg|64;
595 cur->dirty&=~(1<<hr);
596 cur->isconst&=~(1<<hr);
597 return;
598 }
599 }
600 }
601 }
602 }
603 printf("This shouldn't happen");exit(1);
604}
605
606// Allocate a temporary register. This is done without regard to
607// dirty status or whether the register we request is on the unneeded list
608// Note: This will only allocate one register, even if called multiple times
609void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
610{
611 int r,hr;
612 int preferred_reg = -1;
613
614 // see if it's already allocated
615 for(hr=0;hr<HOST_REGS;hr++)
616 {
617 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
618 }
619
620 // Try to allocate any available register
621 for(hr=HOST_REGS-1;hr>=0;hr--) {
622 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
623 cur->regmap[hr]=reg;
624 cur->dirty&=~(1<<hr);
625 cur->isconst&=~(1<<hr);
626 return;
627 }
628 }
629
630 // Find an unneeded register
631 for(hr=HOST_REGS-1;hr>=0;hr--)
632 {
633 r=cur->regmap[hr];
634 if(r>=0) {
635 if(r<64) {
636 if((cur->u>>r)&1) {
637 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
638 cur->regmap[hr]=reg;
639 cur->dirty&=~(1<<hr);
640 cur->isconst&=~(1<<hr);
641 return;
642 }
643 }
644 }
645 else
646 {
647 if((cur->uu>>(r&63))&1) {
648 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
649 cur->regmap[hr]=reg;
650 cur->dirty&=~(1<<hr);
651 cur->isconst&=~(1<<hr);
652 return;
653 }
654 }
655 }
656 }
657 }
658
659 // Ok, now we have to evict someone
660 // Pick a register we hopefully won't need soon
661 // TODO: we might want to follow unconditional jumps here
662 // TODO: get rid of dupe code and make this into a function
663 u_char hsn[MAXREG+1];
664 memset(hsn,10,sizeof(hsn));
665 int j;
666 lsn(hsn,i,&preferred_reg);
667 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
668 if(i>0) {
669 // Don't evict the cycle count at entry points, otherwise the entry
670 // stub will have to write it.
671 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
672 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
673 for(j=10;j>=3;j--)
674 {
675 for(r=1;r<=MAXREG;r++)
676 {
677 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
678 for(hr=0;hr<HOST_REGS;hr++) {
679 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
680 if(cur->regmap[hr]==r+64) {
681 cur->regmap[hr]=reg;
682 cur->dirty&=~(1<<hr);
683 cur->isconst&=~(1<<hr);
684 return;
685 }
686 }
687 }
688 for(hr=0;hr<HOST_REGS;hr++) {
689 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
690 if(cur->regmap[hr]==r) {
691 cur->regmap[hr]=reg;
692 cur->dirty&=~(1<<hr);
693 cur->isconst&=~(1<<hr);
694 return;
695 }
696 }
697 }
698 }
699 }
700 }
701 }
702 for(j=10;j>=0;j--)
703 {
704 for(r=1;r<=MAXREG;r++)
705 {
706 if(hsn[r]==j) {
707 for(hr=0;hr<HOST_REGS;hr++) {
708 if(cur->regmap[hr]==r+64) {
709 cur->regmap[hr]=reg;
710 cur->dirty&=~(1<<hr);
711 cur->isconst&=~(1<<hr);
712 return;
713 }
714 }
715 for(hr=0;hr<HOST_REGS;hr++) {
716 if(cur->regmap[hr]==r) {
717 cur->regmap[hr]=reg;
718 cur->dirty&=~(1<<hr);
719 cur->isconst&=~(1<<hr);
720 return;
721 }
722 }
723 }
724 }
725 }
726 printf("This shouldn't happen");exit(1);
727}
728// Allocate a specific ARM register.
729void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
730{
731 int n;
732
733 // see if it's already allocated (and dealloc it)
734 for(n=0;n<HOST_REGS;n++)
735 {
736 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {cur->regmap[n]=-1;}
737 }
738
739 cur->regmap[hr]=reg;
740 cur->dirty&=~(1<<hr);
741 cur->isconst&=~(1<<hr);
742}
743
744// Alloc cycle count into dedicated register
745alloc_cc(struct regstat *cur,int i)
746{
747 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
748}
749
750/* Special alloc */
751
752
753/* Assembler */
754
755char regname[16][4] = {
756 "r0",
757 "r1",
758 "r2",
759 "r3",
760 "r4",
761 "r5",
762 "r6",
763 "r7",
764 "r8",
765 "r9",
766 "r10",
767 "fp",
768 "r12",
769 "sp",
770 "lr",
771 "pc"};
772
773void output_byte(u_char byte)
774{
775 *(out++)=byte;
776}
777void output_modrm(u_char mod,u_char rm,u_char ext)
778{
779 assert(mod<4);
780 assert(rm<8);
781 assert(ext<8);
782 u_char byte=(mod<<6)|(ext<<3)|rm;
783 *(out++)=byte;
784}
785void output_sib(u_char scale,u_char index,u_char base)
786{
787 assert(scale<4);
788 assert(index<8);
789 assert(base<8);
790 u_char byte=(scale<<6)|(index<<3)|base;
791 *(out++)=byte;
792}
793void output_w32(u_int word)
794{
795 *((u_int *)out)=word;
796 out+=4;
797}
798u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
799{
800 assert(rd<16);
801 assert(rn<16);
802 assert(rm<16);
803 return((rn<<16)|(rd<<12)|rm);
804}
805u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
806{
807 assert(rd<16);
808 assert(rn<16);
809 assert(imm<256);
810 assert((shift&1)==0);
811 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
812}
813u_int genimm(u_int imm,u_int *encoded)
814{
815 if(imm==0) {*encoded=0;return 1;}
816 int i=32;
817 while(i>0)
818 {
819 if(imm<256) {
820 *encoded=((i&30)<<7)|imm;
821 return 1;
822 }
823 imm=(imm>>2)|(imm<<30);i-=2;
824 }
825 return 0;
826}
827u_int genjmp(u_int addr)
828{
829 int offset=addr-(int)out-8;
e80343e2 830 if(offset<-33554432||offset>=33554432) {
831 if (addr>2) {
832 printf("genjmp: out of range: %08x\n", offset);
833 exit(1);
834 }
835 return 0;
836 }
57871462 837 return ((u_int)offset>>2)&0xffffff;
838}
839
840void emit_mov(int rs,int rt)
841{
842 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
843 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
844}
845
846void emit_movs(int rs,int rt)
847{
848 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
849 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
850}
851
852void emit_add(int rs1,int rs2,int rt)
853{
854 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
855 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
856}
857
858void emit_adds(int rs1,int rs2,int rt)
859{
860 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
861 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
862}
863
864void emit_adcs(int rs1,int rs2,int rt)
865{
866 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
867 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
868}
869
870void emit_sbc(int rs1,int rs2,int rt)
871{
872 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
873 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
874}
875
876void emit_sbcs(int rs1,int rs2,int rt)
877{
878 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
879 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
880}
881
882void emit_neg(int rs, int rt)
883{
884 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
885 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
886}
887
888void emit_negs(int rs, int rt)
889{
890 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
891 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
892}
893
894void emit_sub(int rs1,int rs2,int rt)
895{
896 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
897 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
898}
899
900void emit_subs(int rs1,int rs2,int rt)
901{
902 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
903 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
904}
905
906void emit_zeroreg(int rt)
907{
908 assem_debug("mov %s,#0\n",regname[rt]);
909 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
910}
911
912void emit_loadreg(int r, int hr)
913{
3d624f89 914#ifdef FORCE32
915 if(r&64) {
916 printf("64bit load in 32bit mode!\n");
917 exit(1);
918 }
919#endif
57871462 920 if((r&63)==0)
921 emit_zeroreg(hr);
922 else {
3d624f89 923 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 924 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
925 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
926 if(r==CCREG) addr=(int)&cycle_count;
927 if(r==CSREG) addr=(int)&Status;
928 if(r==FSREG) addr=(int)&FCR31;
929 if(r==INVCP) addr=(int)&invc_ptr;
930 u_int offset = addr-(u_int)&dynarec_local;
931 assert(offset<4096);
932 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
933 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
934 }
935}
936void emit_storereg(int r, int hr)
937{
3d624f89 938#ifdef FORCE32
939 if(r&64) {
940 printf("64bit store in 32bit mode!\n");
941 exit(1);
942 }
943#endif
944 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 945 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
946 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
947 if(r==CCREG) addr=(int)&cycle_count;
948 if(r==FSREG) addr=(int)&FCR31;
949 u_int offset = addr-(u_int)&dynarec_local;
950 assert(offset<4096);
951 assem_debug("str %s,fp+%d\n",regname[hr],offset);
952 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
953}
954
955void emit_test(int rs, int rt)
956{
957 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
958 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
959}
960
961void emit_testimm(int rs,int imm)
962{
963 u_int armval;
964 assem_debug("tst %s,$%d\n",regname[rs],imm);
965 assert(genimm(imm,&armval));
966 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
967}
968
b9b61529 969void emit_testeqimm(int rs,int imm)
970{
971 u_int armval;
972 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
973 assert(genimm(imm,&armval));
974 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
975}
976
57871462 977void emit_not(int rs,int rt)
978{
979 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
980 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
981}
982
b9b61529 983void emit_mvnmi(int rs,int rt)
984{
985 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
986 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
987}
988
57871462 989void emit_and(u_int rs1,u_int rs2,u_int rt)
990{
991 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
992 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
993}
994
995void emit_or(u_int rs1,u_int rs2,u_int rt)
996{
997 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
998 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
999}
1000void emit_or_and_set_flags(int rs1,int rs2,int rt)
1001{
1002 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1003 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1004}
1005
1006void emit_xor(u_int rs1,u_int rs2,u_int rt)
1007{
1008 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1009 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1010}
1011
1012void emit_loadlp(u_int imm,u_int rt)
1013{
1014 add_literal((int)out,imm);
1015 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
1016 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
1017}
1018void emit_movw(u_int imm,u_int rt)
1019{
1020 assert(imm<65536);
1021 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
1022 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
1023}
1024void emit_movt(u_int imm,u_int rt)
1025{
1026 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
1027 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
1028}
1029void emit_movimm(u_int imm,u_int rt)
1030{
1031 u_int armval;
1032 if(genimm(imm,&armval)) {
1033 assem_debug("mov %s,#%d\n",regname[rt],imm);
1034 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1035 }else if(genimm(~imm,&armval)) {
1036 assem_debug("mvn %s,#%d\n",regname[rt],imm);
1037 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1038 }else if(imm<65536) {
1039 #ifdef ARMv5_ONLY
1040 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1041 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1042 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1043 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1044 #else
1045 emit_movw(imm,rt);
1046 #endif
1047 }else{
1048 #ifdef ARMv5_ONLY
1049 emit_loadlp(imm,rt);
1050 #else
1051 emit_movw(imm&0x0000FFFF,rt);
1052 emit_movt(imm&0xFFFF0000,rt);
1053 #endif
1054 }
1055}
1056void emit_pcreladdr(u_int rt)
1057{
1058 assem_debug("add %s,pc,#?\n",regname[rt]);
1059 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1060}
1061
1062void emit_addimm(u_int rs,int imm,u_int rt)
1063{
1064 assert(rs<16);
1065 assert(rt<16);
1066 if(imm!=0) {
1067 assert(imm>-65536&&imm<65536);
1068 u_int armval;
1069 if(genimm(imm,&armval)) {
1070 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1071 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1072 }else if(genimm(-imm,&armval)) {
1073 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1074 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1075 }else if(imm<0) {
1076 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1077 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1078 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1079 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1080 }else{
1081 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1082 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1083 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1084 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1085 }
1086 }
1087 else if(rs!=rt) emit_mov(rs,rt);
1088}
1089
1090void emit_addimm_and_set_flags(int imm,int rt)
1091{
1092 assert(imm>-65536&&imm<65536);
1093 u_int armval;
1094 if(genimm(imm,&armval)) {
1095 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1096 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1097 }else if(genimm(-imm,&armval)) {
1098 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1099 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1100 }else if(imm<0) {
1101 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1102 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1103 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1104 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1105 }else{
1106 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1107 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1108 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1109 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1110 }
1111}
1112void emit_addimm_no_flags(u_int imm,u_int rt)
1113{
1114 emit_addimm(rt,imm,rt);
1115}
1116
1117void emit_addnop(u_int r)
1118{
1119 assert(r<16);
1120 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1121 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1122}
1123
1124void emit_adcimm(u_int rs,int imm,u_int rt)
1125{
1126 u_int armval;
1127 assert(genimm(imm,&armval));
1128 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1129 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1130}
1131/*void emit_sbcimm(int imm,u_int rt)
1132{
1133 u_int armval;
1134 assert(genimm(imm,&armval));
1135 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1136 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1137}*/
1138void emit_sbbimm(int imm,u_int rt)
1139{
1140 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1141 assert(rt<8);
1142 if(imm<128&&imm>=-128) {
1143 output_byte(0x83);
1144 output_modrm(3,rt,3);
1145 output_byte(imm);
1146 }
1147 else
1148 {
1149 output_byte(0x81);
1150 output_modrm(3,rt,3);
1151 output_w32(imm);
1152 }
1153}
1154void emit_rscimm(int rs,int imm,u_int rt)
1155{
1156 assert(0);
1157 u_int armval;
1158 assert(genimm(imm,&armval));
1159 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1160 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1161}
1162
1163void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1164{
1165 // TODO: if(genimm(imm,&armval)) ...
1166 // else
1167 emit_movimm(imm,HOST_TEMPREG);
1168 emit_adds(HOST_TEMPREG,rsl,rtl);
1169 emit_adcimm(rsh,0,rth);
1170}
1171
1172void emit_sbb(int rs1,int rs2)
1173{
1174 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1175 output_byte(0x19);
1176 output_modrm(3,rs1,rs2);
1177}
1178
1179void emit_andimm(int rs,int imm,int rt)
1180{
1181 u_int armval;
1182 if(genimm(imm,&armval)) {
1183 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1184 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1185 }else if(genimm(~imm,&armval)) {
1186 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1187 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1188 }else if(imm==65535) {
1189 #ifdef ARMv5_ONLY
1190 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1191 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1192 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1193 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1194 #else
1195 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1196 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1197 #endif
1198 }else{
1199 assert(imm>0&&imm<65535);
1200 #ifdef ARMv5_ONLY
1201 assem_debug("mov r14,#%d\n",imm&0xFF00);
1202 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1203 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1204 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1205 #else
1206 emit_movw(imm,HOST_TEMPREG);
1207 #endif
1208 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1209 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1210 }
1211}
1212
1213void emit_orimm(int rs,int imm,int rt)
1214{
1215 u_int armval;
1216 if(genimm(imm,&armval)) {
1217 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1218 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1219 }else{
1220 assert(imm>0&&imm<65536);
1221 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1222 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1223 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1224 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1225 }
1226}
1227
1228void emit_xorimm(int rs,int imm,int rt)
1229{
57871462 1230 u_int armval;
1231 if(genimm(imm,&armval)) {
1232 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1233 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1234 }else{
514ed0d9 1235 assert(imm>0&&imm<65536);
57871462 1236 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1237 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1238 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1239 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1240 }
1241}
1242
1243void emit_shlimm(int rs,u_int imm,int rt)
1244{
1245 assert(imm>0);
1246 assert(imm<32);
1247 //if(imm==1) ...
1248 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1249 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1250}
1251
1252void emit_shrimm(int rs,u_int imm,int rt)
1253{
1254 assert(imm>0);
1255 assert(imm<32);
1256 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1257 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1258}
1259
1260void emit_sarimm(int rs,u_int imm,int rt)
1261{
1262 assert(imm>0);
1263 assert(imm<32);
1264 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1265 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1266}
1267
1268void emit_rorimm(int rs,u_int imm,int rt)
1269{
1270 assert(imm>0);
1271 assert(imm<32);
1272 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1273 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1274}
1275
1276void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1277{
1278 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1279 assert(imm>0);
1280 assert(imm<32);
1281 //if(imm==1) ...
1282 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1283 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1284 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1285 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1286}
1287
1288void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1289{
1290 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1291 assert(imm>0);
1292 assert(imm<32);
1293 //if(imm==1) ...
1294 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1295 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1296 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1297 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1298}
1299
b9b61529 1300void emit_signextend16(int rs,int rt)
1301{
1302 #ifdef ARMv5_ONLY
1303 emit_shlimm(rs,16,rt);
1304 emit_sarimm(rt,16,rt);
1305 #else
1306 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1307 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1308 #endif
1309}
1310
57871462 1311void emit_shl(u_int rs,u_int shift,u_int rt)
1312{
1313 assert(rs<16);
1314 assert(rt<16);
1315 assert(shift<16);
1316 //if(imm==1) ...
1317 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1318 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1319}
1320void emit_shr(u_int rs,u_int shift,u_int rt)
1321{
1322 assert(rs<16);
1323 assert(rt<16);
1324 assert(shift<16);
1325 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1326 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1327}
1328void emit_sar(u_int rs,u_int shift,u_int rt)
1329{
1330 assert(rs<16);
1331 assert(rt<16);
1332 assert(shift<16);
1333 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1334 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1335}
1336void emit_shlcl(int r)
1337{
1338 assem_debug("shl %%%s,%%cl\n",regname[r]);
1339 assert(0);
1340}
1341void emit_shrcl(int r)
1342{
1343 assem_debug("shr %%%s,%%cl\n",regname[r]);
1344 assert(0);
1345}
1346void emit_sarcl(int r)
1347{
1348 assem_debug("sar %%%s,%%cl\n",regname[r]);
1349 assert(0);
1350}
1351
1352void emit_shldcl(int r1,int r2)
1353{
1354 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1355 assert(0);
1356}
1357void emit_shrdcl(int r1,int r2)
1358{
1359 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1360 assert(0);
1361}
1362void emit_orrshl(u_int rs,u_int shift,u_int rt)
1363{
1364 assert(rs<16);
1365 assert(rt<16);
1366 assert(shift<16);
1367 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1368 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1369}
1370void emit_orrshr(u_int rs,u_int shift,u_int rt)
1371{
1372 assert(rs<16);
1373 assert(rt<16);
1374 assert(shift<16);
1375 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1376 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1377}
1378
1379void emit_cmpimm(int rs,int imm)
1380{
1381 u_int armval;
1382 if(genimm(imm,&armval)) {
1383 assem_debug("cmp %s,$%d\n",regname[rs],imm);
1384 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1385 }else if(genimm(-imm,&armval)) {
1386 assem_debug("cmn %s,$%d\n",regname[rs],imm);
1387 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1388 }else if(imm>0) {
1389 assert(imm<65536);
1390 #ifdef ARMv5_ONLY
1391 emit_movimm(imm,HOST_TEMPREG);
1392 #else
1393 emit_movw(imm,HOST_TEMPREG);
1394 #endif
1395 assem_debug("cmp %s,r14\n",regname[rs]);
1396 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1397 }else{
1398 assert(imm>-65536);
1399 #ifdef ARMv5_ONLY
1400 emit_movimm(-imm,HOST_TEMPREG);
1401 #else
1402 emit_movw(-imm,HOST_TEMPREG);
1403 #endif
1404 assem_debug("cmn %s,r14\n",regname[rs]);
1405 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1406 }
1407}
1408
1409void emit_cmovne(u_int *addr,int rt)
1410{
1411 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1412 assert(0);
1413}
1414void emit_cmovl(u_int *addr,int rt)
1415{
1416 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1417 assert(0);
1418}
1419void emit_cmovs(u_int *addr,int rt)
1420{
1421 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1422 assert(0);
1423}
1424void emit_cmovne_imm(int imm,int rt)
1425{
1426 assem_debug("movne %s,#%d\n",regname[rt],imm);
1427 u_int armval;
1428 assert(genimm(imm,&armval));
1429 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1430}
1431void emit_cmovl_imm(int imm,int rt)
1432{
1433 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1434 u_int armval;
1435 assert(genimm(imm,&armval));
1436 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1437}
1438void emit_cmovb_imm(int imm,int rt)
1439{
1440 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1441 u_int armval;
1442 assert(genimm(imm,&armval));
1443 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1444}
1445void emit_cmovs_imm(int imm,int rt)
1446{
1447 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1448 u_int armval;
1449 assert(genimm(imm,&armval));
1450 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1451}
1452void emit_cmove_reg(int rs,int rt)
1453{
1454 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1455 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1456}
1457void emit_cmovne_reg(int rs,int rt)
1458{
1459 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1460 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1461}
1462void emit_cmovl_reg(int rs,int rt)
1463{
1464 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1465 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1466}
1467void emit_cmovs_reg(int rs,int rt)
1468{
1469 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1470 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1471}
1472
1473void emit_slti32(int rs,int imm,int rt)
1474{
1475 if(rs!=rt) emit_zeroreg(rt);
1476 emit_cmpimm(rs,imm);
1477 if(rs==rt) emit_movimm(0,rt);
1478 emit_cmovl_imm(1,rt);
1479}
1480void emit_sltiu32(int rs,int imm,int rt)
1481{
1482 if(rs!=rt) emit_zeroreg(rt);
1483 emit_cmpimm(rs,imm);
1484 if(rs==rt) emit_movimm(0,rt);
1485 emit_cmovb_imm(1,rt);
1486}
1487void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1488{
1489 assert(rsh!=rt);
1490 emit_slti32(rsl,imm,rt);
1491 if(imm>=0)
1492 {
1493 emit_test(rsh,rsh);
1494 emit_cmovne_imm(0,rt);
1495 emit_cmovs_imm(1,rt);
1496 }
1497 else
1498 {
1499 emit_cmpimm(rsh,-1);
1500 emit_cmovne_imm(0,rt);
1501 emit_cmovl_imm(1,rt);
1502 }
1503}
1504void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1505{
1506 assert(rsh!=rt);
1507 emit_sltiu32(rsl,imm,rt);
1508 if(imm>=0)
1509 {
1510 emit_test(rsh,rsh);
1511 emit_cmovne_imm(0,rt);
1512 }
1513 else
1514 {
1515 emit_cmpimm(rsh,-1);
1516 emit_cmovne_imm(1,rt);
1517 }
1518}
1519
1520void emit_cmp(int rs,int rt)
1521{
1522 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1523 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1524}
1525void emit_set_gz32(int rs, int rt)
1526{
1527 //assem_debug("set_gz32\n");
1528 emit_cmpimm(rs,1);
1529 emit_movimm(1,rt);
1530 emit_cmovl_imm(0,rt);
1531}
1532void emit_set_nz32(int rs, int rt)
1533{
1534 //assem_debug("set_nz32\n");
1535 if(rs!=rt) emit_movs(rs,rt);
1536 else emit_test(rs,rs);
1537 emit_cmovne_imm(1,rt);
1538}
1539void emit_set_gz64_32(int rsh, int rsl, int rt)
1540{
1541 //assem_debug("set_gz64\n");
1542 emit_set_gz32(rsl,rt);
1543 emit_test(rsh,rsh);
1544 emit_cmovne_imm(1,rt);
1545 emit_cmovs_imm(0,rt);
1546}
1547void emit_set_nz64_32(int rsh, int rsl, int rt)
1548{
1549 //assem_debug("set_nz64\n");
1550 emit_or_and_set_flags(rsh,rsl,rt);
1551 emit_cmovne_imm(1,rt);
1552}
1553void emit_set_if_less32(int rs1, int rs2, int rt)
1554{
1555 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1556 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1557 emit_cmp(rs1,rs2);
1558 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1559 emit_cmovl_imm(1,rt);
1560}
1561void emit_set_if_carry32(int rs1, int rs2, int rt)
1562{
1563 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1564 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1565 emit_cmp(rs1,rs2);
1566 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1567 emit_cmovb_imm(1,rt);
1568}
1569void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1570{
1571 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1572 assert(u1!=rt);
1573 assert(u2!=rt);
1574 emit_cmp(l1,l2);
1575 emit_movimm(0,rt);
1576 emit_sbcs(u1,u2,HOST_TEMPREG);
1577 emit_cmovl_imm(1,rt);
1578}
1579void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1580{
1581 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1582 assert(u1!=rt);
1583 assert(u2!=rt);
1584 emit_cmp(l1,l2);
1585 emit_movimm(0,rt);
1586 emit_sbcs(u1,u2,HOST_TEMPREG);
1587 emit_cmovb_imm(1,rt);
1588}
1589
1590void emit_call(int a)
1591{
1592 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1593 u_int offset=genjmp(a);
1594 output_w32(0xeb000000|offset);
1595}
1596void emit_jmp(int a)
1597{
1598 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1599 u_int offset=genjmp(a);
1600 output_w32(0xea000000|offset);
1601}
1602void emit_jne(int a)
1603{
1604 assem_debug("bne %x\n",a);
1605 u_int offset=genjmp(a);
1606 output_w32(0x1a000000|offset);
1607}
1608void emit_jeq(int a)
1609{
1610 assem_debug("beq %x\n",a);
1611 u_int offset=genjmp(a);
1612 output_w32(0x0a000000|offset);
1613}
1614void emit_js(int a)
1615{
1616 assem_debug("bmi %x\n",a);
1617 u_int offset=genjmp(a);
1618 output_w32(0x4a000000|offset);
1619}
1620void emit_jns(int a)
1621{
1622 assem_debug("bpl %x\n",a);
1623 u_int offset=genjmp(a);
1624 output_w32(0x5a000000|offset);
1625}
1626void emit_jl(int a)
1627{
1628 assem_debug("blt %x\n",a);
1629 u_int offset=genjmp(a);
1630 output_w32(0xba000000|offset);
1631}
1632void emit_jge(int a)
1633{
1634 assem_debug("bge %x\n",a);
1635 u_int offset=genjmp(a);
1636 output_w32(0xaa000000|offset);
1637}
1638void emit_jno(int a)
1639{
1640 assem_debug("bvc %x\n",a);
1641 u_int offset=genjmp(a);
1642 output_w32(0x7a000000|offset);
1643}
1644void emit_jc(int a)
1645{
1646 assem_debug("bcs %x\n",a);
1647 u_int offset=genjmp(a);
1648 output_w32(0x2a000000|offset);
1649}
1650void emit_jcc(int a)
1651{
1652 assem_debug("bcc %x\n",a);
1653 u_int offset=genjmp(a);
1654 output_w32(0x3a000000|offset);
1655}
1656
1657void emit_pushimm(int imm)
1658{
1659 assem_debug("push $%x\n",imm);
1660 assert(0);
1661}
1662void emit_pusha()
1663{
1664 assem_debug("pusha\n");
1665 assert(0);
1666}
1667void emit_popa()
1668{
1669 assem_debug("popa\n");
1670 assert(0);
1671}
1672void emit_pushreg(u_int r)
1673{
1674 assem_debug("push %%%s\n",regname[r]);
1675 assert(0);
1676}
1677void emit_popreg(u_int r)
1678{
1679 assem_debug("pop %%%s\n",regname[r]);
1680 assert(0);
1681}
1682void emit_callreg(u_int r)
1683{
1684 assem_debug("call *%%%s\n",regname[r]);
1685 assert(0);
1686}
1687void emit_jmpreg(u_int r)
1688{
1689 assem_debug("mov pc,%s\n",regname[r]);
1690 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1691}
1692
1693void emit_readword_indexed(int offset, int rs, int rt)
1694{
1695 assert(offset>-4096&&offset<4096);
1696 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1697 if(offset>=0) {
1698 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1699 }else{
1700 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1701 }
1702}
1703void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1704{
1705 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1706 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1707}
1708void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1709{
1710 if(map<0) emit_readword_indexed(addr, rs, rt);
1711 else {
1712 assert(addr==0);
1713 emit_readword_dualindexedx4(rs, map, rt);
1714 }
1715}
1716void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1717{
1718 if(map<0) {
1719 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1720 emit_readword_indexed(addr+4, rs, rl);
1721 }else{
1722 assert(rh!=rs);
1723 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1724 emit_addimm(map,1,map);
1725 emit_readword_indexed_tlb(addr, rs, map, rl);
1726 }
1727}
1728void emit_movsbl_indexed(int offset, int rs, int rt)
1729{
1730 assert(offset>-256&&offset<256);
1731 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1732 if(offset>=0) {
1733 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1734 }else{
1735 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1736 }
1737}
1738void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1739{
1740 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1741 else {
1742 if(addr==0) {
1743 emit_shlimm(map,2,map);
1744 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1745 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1746 }else{
1747 assert(addr>-256&&addr<256);
1748 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1749 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1750 emit_movsbl_indexed(addr, rt, rt);
1751 }
1752 }
1753}
1754void emit_movswl_indexed(int offset, int rs, int rt)
1755{
1756 assert(offset>-256&&offset<256);
1757 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1758 if(offset>=0) {
1759 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1760 }else{
1761 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1762 }
1763}
1764void emit_movzbl_indexed(int offset, int rs, int rt)
1765{
1766 assert(offset>-4096&&offset<4096);
1767 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1768 if(offset>=0) {
1769 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1770 }else{
1771 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1772 }
1773}
1774void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1775{
1776 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1777 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1778}
1779void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1780{
1781 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1782 else {
1783 if(addr==0) {
1784 emit_movzbl_dualindexedx4(rs, map, rt);
1785 }else{
1786 emit_addimm(rs,addr,rt);
1787 emit_movzbl_dualindexedx4(rt, map, rt);
1788 }
1789 }
1790}
1791void emit_movzwl_indexed(int offset, int rs, int rt)
1792{
1793 assert(offset>-256&&offset<256);
1794 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1795 if(offset>=0) {
1796 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1797 }else{
1798 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1799 }
1800}
1801void emit_readword(int addr, int rt)
1802{
1803 u_int offset = addr-(u_int)&dynarec_local;
1804 assert(offset<4096);
1805 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1806 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1807}
1808void emit_movsbl(int addr, int rt)
1809{
1810 u_int offset = addr-(u_int)&dynarec_local;
1811 assert(offset<256);
1812 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1813 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1814}
1815void emit_movswl(int addr, int rt)
1816{
1817 u_int offset = addr-(u_int)&dynarec_local;
1818 assert(offset<256);
1819 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1820 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1821}
1822void emit_movzbl(int addr, int rt)
1823{
1824 u_int offset = addr-(u_int)&dynarec_local;
1825 assert(offset<4096);
1826 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1827 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1828}
1829void emit_movzwl(int addr, int rt)
1830{
1831 u_int offset = addr-(u_int)&dynarec_local;
1832 assert(offset<256);
1833 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1834 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1835}
1836void emit_movzwl_reg(int rs, int rt)
1837{
1838 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1839 assert(0);
1840}
1841
1842void emit_xchg(int rs, int rt)
1843{
1844 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1845 assert(0);
1846}
1847void emit_writeword_indexed(int rt, int offset, int rs)
1848{
1849 assert(offset>-4096&&offset<4096);
1850 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1851 if(offset>=0) {
1852 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1853 }else{
1854 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1855 }
1856}
1857void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1858{
1859 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1860 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1861}
1862void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1863{
1864 if(map<0) emit_writeword_indexed(rt, addr, rs);
1865 else {
1866 assert(addr==0);
1867 emit_writeword_dualindexedx4(rt, rs, map);
1868 }
1869}
1870void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1871{
1872 if(map<0) {
1873 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1874 emit_writeword_indexed(rl, addr+4, rs);
1875 }else{
1876 assert(rh>=0);
1877 if(temp!=rs) emit_addimm(map,1,temp);
1878 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1879 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1880 else {
1881 emit_addimm(rs,4,rs);
1882 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1883 }
1884 }
1885}
1886void emit_writehword_indexed(int rt, int offset, int rs)
1887{
1888 assert(offset>-256&&offset<256);
1889 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1890 if(offset>=0) {
1891 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1892 }else{
1893 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1894 }
1895}
1896void emit_writebyte_indexed(int rt, int offset, int rs)
1897{
1898 assert(offset>-4096&&offset<4096);
1899 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1900 if(offset>=0) {
1901 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1902 }else{
1903 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1904 }
1905}
1906void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1907{
1908 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1909 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1910}
1911void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1912{
1913 if(map<0) emit_writebyte_indexed(rt, addr, rs);
1914 else {
1915 if(addr==0) {
1916 emit_writebyte_dualindexedx4(rt, rs, map);
1917 }else{
1918 emit_addimm(rs,addr,temp);
1919 emit_writebyte_dualindexedx4(rt, temp, map);
1920 }
1921 }
1922}
1923void emit_writeword(int rt, int addr)
1924{
1925 u_int offset = addr-(u_int)&dynarec_local;
1926 assert(offset<4096);
1927 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1928 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1929}
1930void emit_writehword(int rt, int addr)
1931{
1932 u_int offset = addr-(u_int)&dynarec_local;
1933 assert(offset<256);
1934 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
1935 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1936}
1937void emit_writebyte(int rt, int addr)
1938{
1939 u_int offset = addr-(u_int)&dynarec_local;
1940 assert(offset<4096);
1941 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1942 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
1943}
1944void emit_writeword_imm(int imm, int addr)
1945{
1946 assem_debug("movl $%x,%x\n",imm,addr);
1947 assert(0);
1948}
1949void emit_writebyte_imm(int imm, int addr)
1950{
1951 assem_debug("movb $%x,%x\n",imm,addr);
1952 assert(0);
1953}
1954
1955void emit_mul(int rs)
1956{
1957 assem_debug("mul %%%s\n",regname[rs]);
1958 assert(0);
1959}
1960void emit_imul(int rs)
1961{
1962 assem_debug("imul %%%s\n",regname[rs]);
1963 assert(0);
1964}
1965void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1966{
1967 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1968 assert(rs1<16);
1969 assert(rs2<16);
1970 assert(hi<16);
1971 assert(lo<16);
1972 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1973}
1974void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1975{
1976 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1977 assert(rs1<16);
1978 assert(rs2<16);
1979 assert(hi<16);
1980 assert(lo<16);
1981 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1982}
1983
1984void emit_div(int rs)
1985{
1986 assem_debug("div %%%s\n",regname[rs]);
1987 assert(0);
1988}
1989void emit_idiv(int rs)
1990{
1991 assem_debug("idiv %%%s\n",regname[rs]);
1992 assert(0);
1993}
1994void emit_cdq()
1995{
1996 assem_debug("cdq\n");
1997 assert(0);
1998}
1999
2000void emit_clz(int rs,int rt)
2001{
2002 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2003 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2004}
2005
2006void emit_subcs(int rs1,int rs2,int rt)
2007{
2008 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2009 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2010}
2011
2012void emit_shrcc_imm(int rs,u_int imm,int rt)
2013{
2014 assert(imm>0);
2015 assert(imm<32);
2016 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2017 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2018}
2019
2020void emit_negmi(int rs, int rt)
2021{
2022 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2023 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2024}
2025
2026void emit_negsmi(int rs, int rt)
2027{
2028 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2029 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2030}
2031
2032void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2033{
2034 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2035 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2036}
2037
2038void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2039{
2040 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2041 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2042}
2043
2044void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2045{
2046 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2047 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2048}
2049
2050void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2051{
2052 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2053 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2054}
2055
2056void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2057{
2058 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2059 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2060}
2061
2062void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2063{
2064 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2065 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2066}
2067
2068void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2069{
2070 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2071 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2072}
2073
2074void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2075{
2076 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2077 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2078}
2079
2080void emit_teq(int rs, int rt)
2081{
2082 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2083 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2084}
2085
2086void emit_rsbimm(int rs, int imm, int rt)
2087{
2088 u_int armval;
2089 assert(genimm(imm,&armval));
2090 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2091 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2092}
2093
2094// Load 2 immediates optimizing for small code size
2095void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2096{
2097 emit_movimm(imm1,rt1);
2098 u_int armval;
2099 if(genimm(imm2-imm1,&armval)) {
2100 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2101 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2102 }else if(genimm(imm1-imm2,&armval)) {
2103 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2104 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2105 }
2106 else emit_movimm(imm2,rt2);
2107}
2108
2109// Conditionally select one of two immediates, optimizing for small code size
2110// This will only be called if HAVE_CMOV_IMM is defined
2111void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2112{
2113 u_int armval;
2114 if(genimm(imm2-imm1,&armval)) {
2115 emit_movimm(imm1,rt);
2116 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2117 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2118 }else if(genimm(imm1-imm2,&armval)) {
2119 emit_movimm(imm1,rt);
2120 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2121 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2122 }
2123 else {
2124 #ifdef ARMv5_ONLY
2125 emit_movimm(imm1,rt);
2126 add_literal((int)out,imm2);
2127 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2128 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2129 #else
2130 emit_movw(imm1&0x0000FFFF,rt);
2131 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2132 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2133 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2134 }
2135 emit_movt(imm1&0xFFFF0000,rt);
2136 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2137 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2138 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2139 }
2140 #endif
2141 }
2142}
2143
2144// special case for checking invalid_code
2145void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2146{
2147 assert(0);
2148}
2149
2150// special case for checking invalid_code
2151void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2152{
2153 assert(imm<128&&imm>=0);
2154 assert(r>=0&&r<16);
2155 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2156 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2157 emit_cmpimm(HOST_TEMPREG,imm);
2158}
2159
2160// special case for tlb mapping
2161void emit_addsr12(int rs1,int rs2,int rt)
2162{
2163 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2164 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2165}
2166
2167// Used to preload hash table entries
2168void emit_prefetch(void *addr)
2169{
2170 assem_debug("prefetch %x\n",(int)addr);
2171 output_byte(0x0F);
2172 output_byte(0x18);
2173 output_modrm(0,5,1);
2174 output_w32((int)addr);
2175}
2176void emit_prefetchreg(int r)
2177{
2178 assem_debug("pld %s\n",regname[r]);
2179 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2180}
2181
2182// Special case for mini_ht
2183void emit_ldreq_indexed(int rs, u_int offset, int rt)
2184{
2185 assert(offset<4096);
2186 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2187 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2188}
2189
2190void emit_flds(int r,int sr)
2191{
2192 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2193 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2194}
2195
2196void emit_vldr(int r,int vr)
2197{
2198 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2199 output_w32(0xed900b00|(vr<<12)|(r<<16));
2200}
2201
2202void emit_fsts(int sr,int r)
2203{
2204 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2205 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2206}
2207
2208void emit_vstr(int vr,int r)
2209{
2210 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2211 output_w32(0xed800b00|(vr<<12)|(r<<16));
2212}
2213
2214void emit_ftosizs(int s,int d)
2215{
2216 assem_debug("ftosizs s%d,s%d\n",d,s);
2217 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2218}
2219
2220void emit_ftosizd(int s,int d)
2221{
2222 assem_debug("ftosizd s%d,d%d\n",d,s);
2223 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2224}
2225
2226void emit_fsitos(int s,int d)
2227{
2228 assem_debug("fsitos s%d,s%d\n",d,s);
2229 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2230}
2231
2232void emit_fsitod(int s,int d)
2233{
2234 assem_debug("fsitod d%d,s%d\n",d,s);
2235 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2236}
2237
2238void emit_fcvtds(int s,int d)
2239{
2240 assem_debug("fcvtds d%d,s%d\n",d,s);
2241 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2242}
2243
2244void emit_fcvtsd(int s,int d)
2245{
2246 assem_debug("fcvtsd s%d,d%d\n",d,s);
2247 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2248}
2249
2250void emit_fsqrts(int s,int d)
2251{
2252 assem_debug("fsqrts d%d,s%d\n",d,s);
2253 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2254}
2255
2256void emit_fsqrtd(int s,int d)
2257{
2258 assem_debug("fsqrtd s%d,d%d\n",d,s);
2259 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2260}
2261
2262void emit_fabss(int s,int d)
2263{
2264 assem_debug("fabss d%d,s%d\n",d,s);
2265 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2266}
2267
2268void emit_fabsd(int s,int d)
2269{
2270 assem_debug("fabsd s%d,d%d\n",d,s);
2271 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2272}
2273
2274void emit_fnegs(int s,int d)
2275{
2276 assem_debug("fnegs d%d,s%d\n",d,s);
2277 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2278}
2279
2280void emit_fnegd(int s,int d)
2281{
2282 assem_debug("fnegd s%d,d%d\n",d,s);
2283 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2284}
2285
2286void emit_fadds(int s1,int s2,int d)
2287{
2288 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2289 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2290}
2291
2292void emit_faddd(int s1,int s2,int d)
2293{
2294 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2295 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2296}
2297
2298void emit_fsubs(int s1,int s2,int d)
2299{
2300 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2301 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2302}
2303
2304void emit_fsubd(int s1,int s2,int d)
2305{
2306 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2307 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2308}
2309
2310void emit_fmuls(int s1,int s2,int d)
2311{
2312 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2313 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2314}
2315
2316void emit_fmuld(int s1,int s2,int d)
2317{
2318 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2319 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2320}
2321
2322void emit_fdivs(int s1,int s2,int d)
2323{
2324 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2325 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2326}
2327
2328void emit_fdivd(int s1,int s2,int d)
2329{
2330 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2331 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2332}
2333
2334void emit_fcmps(int x,int y)
2335{
2336 assem_debug("fcmps s14, s15\n");
2337 output_w32(0xeeb47a67);
2338}
2339
2340void emit_fcmpd(int x,int y)
2341{
2342 assem_debug("fcmpd d6, d7\n");
2343 output_w32(0xeeb46b47);
2344}
2345
2346void emit_fmstat()
2347{
2348 assem_debug("fmstat\n");
2349 output_w32(0xeef1fa10);
2350}
2351
2352void emit_bicne_imm(int rs,int imm,int rt)
2353{
2354 u_int armval;
2355 assert(genimm(imm,&armval));
2356 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2357 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2358}
2359
2360void emit_biccs_imm(int rs,int imm,int rt)
2361{
2362 u_int armval;
2363 assert(genimm(imm,&armval));
2364 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2365 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2366}
2367
2368void emit_bicvc_imm(int rs,int imm,int rt)
2369{
2370 u_int armval;
2371 assert(genimm(imm,&armval));
2372 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2373 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2374}
2375
2376void emit_bichi_imm(int rs,int imm,int rt)
2377{
2378 u_int armval;
2379 assert(genimm(imm,&armval));
2380 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2381 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2382}
2383
2384void emit_orrvs_imm(int rs,int imm,int rt)
2385{
2386 u_int armval;
2387 assert(genimm(imm,&armval));
2388 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2389 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2390}
2391
b9b61529 2392void emit_orrne_imm(int rs,int imm,int rt)
2393{
2394 u_int armval;
2395 assert(genimm(imm,&armval));
2396 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2397 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2398}
2399
2400void emit_andne_imm(int rs,int imm,int rt)
2401{
2402 u_int armval;
2403 assert(genimm(imm,&armval));
2404 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2405 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2406}
2407
57871462 2408void emit_jno_unlikely(int a)
2409{
2410 //emit_jno(a);
2411 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2412 output_w32(0x72800000|rd_rn_rm(15,15,0));
2413}
2414
2415// Save registers before function call
2416void save_regs(u_int reglist)
2417{
2418 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2419 if(!reglist) return;
2420 assem_debug("stmia fp,{");
2421 if(reglist&1) assem_debug("r0, ");
2422 if(reglist&2) assem_debug("r1, ");
2423 if(reglist&4) assem_debug("r2, ");
2424 if(reglist&8) assem_debug("r3, ");
2425 if(reglist&0x1000) assem_debug("r12");
2426 assem_debug("}\n");
2427 output_w32(0xe88b0000|reglist);
2428}
2429// Restore registers after function call
2430void restore_regs(u_int reglist)
2431{
2432 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2433 if(!reglist) return;
2434 assem_debug("ldmia fp,{");
2435 if(reglist&1) assem_debug("r0, ");
2436 if(reglist&2) assem_debug("r1, ");
2437 if(reglist&4) assem_debug("r2, ");
2438 if(reglist&8) assem_debug("r3, ");
2439 if(reglist&0x1000) assem_debug("r12");
2440 assem_debug("}\n");
2441 output_w32(0xe89b0000|reglist);
2442}
2443
2444// Write back consts using r14 so we don't disturb the other registers
2445void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2446{
2447 int hr;
2448 for(hr=0;hr<HOST_REGS;hr++) {
2449 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2450 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2451 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2452 int value=constmap[i][hr];
2453 if(value==0) {
2454 emit_zeroreg(HOST_TEMPREG);
2455 }
2456 else {
2457 emit_movimm(value,HOST_TEMPREG);
2458 }
2459 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2460#ifndef FORCE32
57871462 2461 if((i_is32>>i_regmap[hr])&1) {
2462 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2463 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2464 }
24385cae 2465#endif
57871462 2466 }
2467 }
2468 }
2469 }
2470}
2471
2472/* Stubs/epilogue */
2473
2474void literal_pool(int n)
2475{
2476 if(!literalcount) return;
2477 if(n) {
2478 if((int)out-literals[0][0]<4096-n) return;
2479 }
2480 u_int *ptr;
2481 int i;
2482 for(i=0;i<literalcount;i++)
2483 {
2484 ptr=(u_int *)literals[i][0];
2485 u_int offset=(u_int)out-(u_int)ptr-8;
2486 assert(offset<4096);
2487 assert(!(offset&3));
2488 *ptr|=offset;
2489 output_w32(literals[i][1]);
2490 }
2491 literalcount=0;
2492}
2493
2494void literal_pool_jumpover(int n)
2495{
2496 if(!literalcount) return;
2497 if(n) {
2498 if((int)out-literals[0][0]<4096-n) return;
2499 }
2500 int jaddr=(int)out;
2501 emit_jmp(0);
2502 literal_pool(0);
2503 set_jump_target(jaddr,(int)out);
2504}
2505
2506emit_extjump2(int addr, int target, int linker)
2507{
2508 u_char *ptr=(u_char *)addr;
2509 assert((ptr[3]&0x0e)==0xa);
2510 emit_loadlp(target,0);
2511 emit_loadlp(addr,1);
24385cae 2512 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2513 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2514//DEBUG >
2515#ifdef DEBUG_CYCLE_COUNT
2516 emit_readword((int)&last_count,ECX);
2517 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2518 emit_readword((int)&next_interupt,ECX);
2519 emit_writeword(HOST_CCREG,(int)&Count);
2520 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2521 emit_writeword(ECX,(int)&last_count);
2522#endif
2523//DEBUG <
2524 emit_jmp(linker);
2525}
2526
2527emit_extjump(int addr, int target)
2528{
2529 emit_extjump2(addr, target, (int)dyna_linker);
2530}
2531emit_extjump_ds(int addr, int target)
2532{
2533 emit_extjump2(addr, target, (int)dyna_linker_ds);
2534}
2535
2536do_readstub(int n)
2537{
2538 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2539 literal_pool(256);
2540 set_jump_target(stubs[n][1],(int)out);
2541 int type=stubs[n][0];
2542 int i=stubs[n][3];
2543 int rs=stubs[n][4];
2544 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2545 u_int reglist=stubs[n][7];
2546 signed char *i_regmap=i_regs->regmap;
2547 int addr=get_reg(i_regmap,AGEN1+(i&1));
2548 int rth,rt;
2549 int ds;
b9b61529 2550 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2551 rth=get_reg(i_regmap,FTEMP|64);
2552 rt=get_reg(i_regmap,FTEMP);
2553 }else{
2554 rth=get_reg(i_regmap,rt1[i]|64);
2555 rt=get_reg(i_regmap,rt1[i]);
2556 }
5bf843dc 2557#ifdef PCSX
2558 if(rt<0)
2559 // assume forced dummy read
2560 rt=get_reg(i_regmap,-1);
2561#endif
57871462 2562 assert(rs>=0);
2563 assert(rt>=0);
2564 if(addr<0) addr=rt;
2565 assert(addr>=0);
2566 int ftable=0;
2567 if(type==LOADB_STUB||type==LOADBU_STUB)
2568 ftable=(int)readmemb;
2569 if(type==LOADH_STUB||type==LOADHU_STUB)
2570 ftable=(int)readmemh;
2571 if(type==LOADW_STUB)
2572 ftable=(int)readmem;
24385cae 2573#ifndef FORCE32
57871462 2574 if(type==LOADD_STUB)
2575 ftable=(int)readmemd;
24385cae 2576#endif
2577 assert(ftable!=0);
57871462 2578 emit_writeword(rs,(int)&address);
2579 //emit_pusha();
2580 save_regs(reglist);
2581 ds=i_regs!=&regs[i];
2582 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2583 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2584 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2585 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2586 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2587 emit_shrimm(rs,16,1);
2588 int cc=get_reg(i_regmap,CCREG);
2589 if(cc<0) {
2590 emit_loadreg(CCREG,2);
2591 }
2592 emit_movimm(ftable,0);
2593 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2594 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2595 //emit_readword((int)&last_count,temp);
2596 //emit_add(cc,temp,cc);
2597 //emit_writeword(cc,(int)&Count);
2598 //emit_mov(15,14);
2599 emit_call((int)&indirect_jump_indexed);
2600 //emit_callreg(rs);
2601 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
2602 // We really shouldn't need to update the count here,
2603 // but not doing so causes random crashes...
2604 emit_readword((int)&Count,HOST_TEMPREG);
2605 emit_readword((int)&next_interupt,2);
2606 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2607 emit_writeword(2,(int)&last_count);
2608 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2609 if(cc<0) {
2610 emit_storereg(CCREG,HOST_TEMPREG);
2611 }
2612 //emit_popa();
2613 restore_regs(reglist);
2614 //if((cc=get_reg(regmap,CCREG))>=0) {
2615 // emit_loadreg(CCREG,cc);
2616 //}
2617 if(type==LOADB_STUB)
2618 emit_movsbl((int)&readmem_dword,rt);
2619 if(type==LOADBU_STUB)
2620 emit_movzbl((int)&readmem_dword,rt);
2621 if(type==LOADH_STUB)
2622 emit_movswl((int)&readmem_dword,rt);
2623 if(type==LOADHU_STUB)
2624 emit_movzwl((int)&readmem_dword,rt);
2625 if(type==LOADW_STUB)
2626 emit_readword((int)&readmem_dword,rt);
2627 if(type==LOADD_STUB) {
2628 emit_readword((int)&readmem_dword,rt);
2629 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2630 }
2631 emit_jmp(stubs[n][2]); // return address
2632}
2633
2634inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2635{
2636 int rs=get_reg(regmap,target);
2637 int rth=get_reg(regmap,target|64);
2638 int rt=get_reg(regmap,target);
2639 assert(rs>=0);
2640 assert(rt>=0);
2641 int ftable=0;
2642 if(type==LOADB_STUB||type==LOADBU_STUB)
2643 ftable=(int)readmemb;
2644 if(type==LOADH_STUB||type==LOADHU_STUB)
2645 ftable=(int)readmemh;
2646 if(type==LOADW_STUB)
2647 ftable=(int)readmem;
24385cae 2648#ifndef FORCE32
57871462 2649 if(type==LOADD_STUB)
2650 ftable=(int)readmemd;
24385cae 2651#endif
2652 assert(ftable!=0);
57871462 2653 emit_writeword(rs,(int)&address);
2654 //emit_pusha();
2655 save_regs(reglist);
2656 //emit_shrimm(rs,16,1);
2657 int cc=get_reg(regmap,CCREG);
2658 if(cc<0) {
2659 emit_loadreg(CCREG,2);
2660 }
2661 //emit_movimm(ftable,0);
2662 emit_movimm(((u_int *)ftable)[addr>>16],0);
2663 //emit_readword((int)&last_count,12);
2664 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2665 if((signed int)addr>=(signed int)0xC0000000) {
2666 // Pagefault address
2667 int ds=regmap!=regs[i].regmap;
2668 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2669 }
2670 //emit_add(12,2,2);
2671 //emit_writeword(2,(int)&Count);
2672 //emit_call(((u_int *)ftable)[addr>>16]);
2673 emit_call((int)&indirect_jump);
2674 // We really shouldn't need to update the count here,
2675 // but not doing so causes random crashes...
2676 emit_readword((int)&Count,HOST_TEMPREG);
2677 emit_readword((int)&next_interupt,2);
2678 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2679 emit_writeword(2,(int)&last_count);
2680 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2681 if(cc<0) {
2682 emit_storereg(CCREG,HOST_TEMPREG);
2683 }
2684 //emit_popa();
2685 restore_regs(reglist);
2686 if(type==LOADB_STUB)
2687 emit_movsbl((int)&readmem_dword,rt);
2688 if(type==LOADBU_STUB)
2689 emit_movzbl((int)&readmem_dword,rt);
2690 if(type==LOADH_STUB)
2691 emit_movswl((int)&readmem_dword,rt);
2692 if(type==LOADHU_STUB)
2693 emit_movzwl((int)&readmem_dword,rt);
2694 if(type==LOADW_STUB)
2695 emit_readword((int)&readmem_dword,rt);
2696 if(type==LOADD_STUB) {
2697 emit_readword((int)&readmem_dword,rt);
2698 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2699 }
2700}
2701
2702do_writestub(int n)
2703{
2704 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2705 literal_pool(256);
2706 set_jump_target(stubs[n][1],(int)out);
2707 int type=stubs[n][0];
2708 int i=stubs[n][3];
2709 int rs=stubs[n][4];
2710 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2711 u_int reglist=stubs[n][7];
2712 signed char *i_regmap=i_regs->regmap;
2713 int addr=get_reg(i_regmap,AGEN1+(i&1));
2714 int rth,rt,r;
2715 int ds;
b9b61529 2716 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2717 rth=get_reg(i_regmap,FTEMP|64);
2718 rt=get_reg(i_regmap,r=FTEMP);
2719 }else{
2720 rth=get_reg(i_regmap,rs2[i]|64);
2721 rt=get_reg(i_regmap,r=rs2[i]);
2722 }
2723 assert(rs>=0);
2724 assert(rt>=0);
2725 if(addr<0) addr=get_reg(i_regmap,-1);
2726 assert(addr>=0);
2727 int ftable=0;
2728 if(type==STOREB_STUB)
2729 ftable=(int)writememb;
2730 if(type==STOREH_STUB)
2731 ftable=(int)writememh;
2732 if(type==STOREW_STUB)
2733 ftable=(int)writemem;
24385cae 2734#ifndef FORCE32
57871462 2735 if(type==STORED_STUB)
2736 ftable=(int)writememd;
24385cae 2737#endif
2738 assert(ftable!=0);
57871462 2739 emit_writeword(rs,(int)&address);
2740 //emit_shrimm(rs,16,rs);
2741 //emit_movmem_indexedx4(ftable,rs,rs);
2742 if(type==STOREB_STUB)
2743 emit_writebyte(rt,(int)&byte);
2744 if(type==STOREH_STUB)
2745 emit_writehword(rt,(int)&hword);
2746 if(type==STOREW_STUB)
2747 emit_writeword(rt,(int)&word);
2748 if(type==STORED_STUB) {
3d624f89 2749#ifndef FORCE32
57871462 2750 emit_writeword(rt,(int)&dword);
2751 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 2752#else
2753 printf("STORED_STUB\n");
2754#endif
57871462 2755 }
2756 //emit_pusha();
2757 save_regs(reglist);
2758 ds=i_regs!=&regs[i];
2759 int real_rs=get_reg(i_regmap,rs1[i]);
2760 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2761 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2762 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2763 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2764 emit_shrimm(rs,16,1);
2765 int cc=get_reg(i_regmap,CCREG);
2766 if(cc<0) {
2767 emit_loadreg(CCREG,2);
2768 }
2769 emit_movimm(ftable,0);
2770 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2771 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2772 //emit_readword((int)&last_count,temp);
2773 //emit_addimm(cc,2*stubs[n][5]+2,cc);
2774 //emit_add(cc,temp,cc);
2775 //emit_writeword(cc,(int)&Count);
2776 emit_call((int)&indirect_jump_indexed);
2777 //emit_callreg(rs);
2778 emit_readword((int)&Count,HOST_TEMPREG);
2779 emit_readword((int)&next_interupt,2);
2780 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2781 emit_writeword(2,(int)&last_count);
2782 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2783 if(cc<0) {
2784 emit_storereg(CCREG,HOST_TEMPREG);
2785 }
2786 //emit_popa();
2787 restore_regs(reglist);
2788 //if((cc=get_reg(regmap,CCREG))>=0) {
2789 // emit_loadreg(CCREG,cc);
2790 //}
2791 emit_jmp(stubs[n][2]); // return address
2792}
2793
2794inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2795{
2796 int rs=get_reg(regmap,-1);
2797 int rth=get_reg(regmap,target|64);
2798 int rt=get_reg(regmap,target);
2799 assert(rs>=0);
2800 assert(rt>=0);
2801 int ftable=0;
2802 if(type==STOREB_STUB)
2803 ftable=(int)writememb;
2804 if(type==STOREH_STUB)
2805 ftable=(int)writememh;
2806 if(type==STOREW_STUB)
2807 ftable=(int)writemem;
24385cae 2808#ifndef FORCE32
57871462 2809 if(type==STORED_STUB)
2810 ftable=(int)writememd;
24385cae 2811#endif
2812 assert(ftable!=0);
57871462 2813 emit_writeword(rs,(int)&address);
2814 //emit_shrimm(rs,16,rs);
2815 //emit_movmem_indexedx4(ftable,rs,rs);
2816 if(type==STOREB_STUB)
2817 emit_writebyte(rt,(int)&byte);
2818 if(type==STOREH_STUB)
2819 emit_writehword(rt,(int)&hword);
2820 if(type==STOREW_STUB)
2821 emit_writeword(rt,(int)&word);
2822 if(type==STORED_STUB) {
3d624f89 2823#ifndef FORCE32
57871462 2824 emit_writeword(rt,(int)&dword);
2825 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 2826#else
2827 printf("STORED_STUB\n");
2828#endif
57871462 2829 }
2830 //emit_pusha();
2831 save_regs(reglist);
2832 //emit_shrimm(rs,16,1);
2833 int cc=get_reg(regmap,CCREG);
2834 if(cc<0) {
2835 emit_loadreg(CCREG,2);
2836 }
2837 //emit_movimm(ftable,0);
2838 emit_movimm(((u_int *)ftable)[addr>>16],0);
2839 //emit_readword((int)&last_count,12);
2840 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2841 if((signed int)addr>=(signed int)0xC0000000) {
2842 // Pagefault address
2843 int ds=regmap!=regs[i].regmap;
2844 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2845 }
2846 //emit_add(12,2,2);
2847 //emit_writeword(2,(int)&Count);
2848 //emit_call(((u_int *)ftable)[addr>>16]);
2849 emit_call((int)&indirect_jump);
2850 emit_readword((int)&Count,HOST_TEMPREG);
2851 emit_readword((int)&next_interupt,2);
2852 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2853 emit_writeword(2,(int)&last_count);
2854 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2855 if(cc<0) {
2856 emit_storereg(CCREG,HOST_TEMPREG);
2857 }
2858 //emit_popa();
2859 restore_regs(reglist);
2860}
2861
2862do_unalignedwritestub(int n)
2863{
2864 set_jump_target(stubs[n][1],(int)out);
2865 output_w32(0xef000000);
2866 emit_jmp(stubs[n][2]); // return address
2867}
2868
2869void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
2870{
2871 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
2872}
2873
2874do_invstub(int n)
2875{
2876 literal_pool(20);
2877 u_int reglist=stubs[n][3];
2878 set_jump_target(stubs[n][1],(int)out);
2879 save_regs(reglist);
2880 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2881 emit_call((int)&invalidate_addr);
2882 restore_regs(reglist);
2883 emit_jmp(stubs[n][2]); // return address
2884}
2885
2886int do_dirty_stub(int i)
2887{
2888 assem_debug("do_dirty_stub %x\n",start+i*4);
2889 // Careful about the code output here, verify_dirty needs to parse it.
2890 #ifdef ARMv5_ONLY
2891 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2892 emit_loadlp((int)copy,2);
2893 emit_loadlp(slen*4,3);
2894 #else
2895 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2896 emit_movw(((u_int)copy)&0x0000FFFF,2);
2897 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2898 emit_movt(((u_int)copy)&0xFFFF0000,2);
2899 emit_movw(slen*4,3);
2900 #endif
2901 emit_movimm(start+i*4,0);
2902 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
2903 int entry=(int)out;
2904 load_regs_entry(i);
2905 if(entry==(int)out) entry=instr_addr[i];
2906 emit_jmp(instr_addr[i]);
2907 return entry;
2908}
2909
2910void do_dirty_stub_ds()
2911{
2912 // Careful about the code output here, verify_dirty needs to parse it.
2913 #ifdef ARMv5_ONLY
2914 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2915 emit_loadlp((int)copy,2);
2916 emit_loadlp(slen*4,3);
2917 #else
2918 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2919 emit_movw(((u_int)copy)&0x0000FFFF,2);
2920 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2921 emit_movt(((u_int)copy)&0xFFFF0000,2);
2922 emit_movw(slen*4,3);
2923 #endif
2924 emit_movimm(start+1,0);
2925 emit_call((int)&verify_code_ds);
2926}
2927
2928do_cop1stub(int n)
2929{
2930 literal_pool(256);
2931 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
2932 set_jump_target(stubs[n][1],(int)out);
2933 int i=stubs[n][3];
3d624f89 2934// int rs=stubs[n][4];
57871462 2935 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2936 int ds=stubs[n][6];
2937 if(!ds) {
2938 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2939 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
2940 }
2941 //else {printf("fp exception in delay slot\n");}
2942 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
2943 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2944 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2945 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
2946 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
2947}
2948
2949/* TLB */
2950
2951int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
2952{
2953 if(c) {
2954 if((signed int)addr>=(signed int)0xC0000000) {
2955 // address_generation already loaded the const
2956 emit_readword_dualindexedx4(FP,map,map);
2957 }
2958 else
2959 return -1; // No mapping
2960 }
2961 else {
2962 assert(s!=map);
2963 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
2964 emit_addsr12(map,s,map);
2965 // Schedule this while we wait on the load
2966 //if(x) emit_xorimm(s,x,ar);
2967 if(shift>=0) emit_shlimm(s,3,shift);
2968 if(~a) emit_andimm(s,a,ar);
2969 emit_readword_dualindexedx4(FP,map,map);
2970 }
2971 return map;
2972}
2973int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
2974{
2975 if(!c||(signed int)addr>=(signed int)0xC0000000) {
2976 emit_test(map,map);
2977 *jaddr=(int)out;
2978 emit_js(0);
2979 }
2980 return map;
2981}
2982
2983int gen_tlb_addr_r(int ar, int map) {
2984 if(map>=0) {
2985 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
2986 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
2987 }
2988}
2989
2990int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
2991{
2992 if(c) {
2993 if(addr<0x80800000||addr>=0xC0000000) {
2994 // address_generation already loaded the const
2995 emit_readword_dualindexedx4(FP,map,map);
2996 }
2997 else
2998 return -1; // No mapping
2999 }
3000 else {
3001 assert(s!=map);
3002 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3003 emit_addsr12(map,s,map);
3004 // Schedule this while we wait on the load
3005 //if(x) emit_xorimm(s,x,ar);
3006 emit_readword_dualindexedx4(FP,map,map);
3007 }
3008 return map;
3009}
3010int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3011{
3012 if(!c||addr<0x80800000||addr>=0xC0000000) {
3013 emit_testimm(map,0x40000000);
3014 *jaddr=(int)out;
3015 emit_jne(0);
3016 }
3017}
3018
3019int gen_tlb_addr_w(int ar, int map) {
3020 if(map>=0) {
3021 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3022 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3023 }
3024}
3025
3026// Generate the address of the memory_map entry, relative to dynarec_local
3027generate_map_const(u_int addr,int reg) {
3028 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3029 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3030}
3031
3032/* Special assem */
3033
3034void shift_assemble_arm(int i,struct regstat *i_regs)
3035{
3036 if(rt1[i]) {
3037 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3038 {
3039 signed char s,t,shift;
3040 t=get_reg(i_regs->regmap,rt1[i]);
3041 s=get_reg(i_regs->regmap,rs1[i]);
3042 shift=get_reg(i_regs->regmap,rs2[i]);
3043 if(t>=0){
3044 if(rs1[i]==0)
3045 {
3046 emit_zeroreg(t);
3047 }
3048 else if(rs2[i]==0)
3049 {
3050 assert(s>=0);
3051 if(s!=t) emit_mov(s,t);
3052 }
3053 else
3054 {
3055 emit_andimm(shift,31,HOST_TEMPREG);
3056 if(opcode2[i]==4) // SLLV
3057 {
3058 emit_shl(s,HOST_TEMPREG,t);
3059 }
3060 if(opcode2[i]==6) // SRLV
3061 {
3062 emit_shr(s,HOST_TEMPREG,t);
3063 }
3064 if(opcode2[i]==7) // SRAV
3065 {
3066 emit_sar(s,HOST_TEMPREG,t);
3067 }
3068 }
3069 }
3070 } else { // DSLLV/DSRLV/DSRAV
3071 signed char sh,sl,th,tl,shift;
3072 th=get_reg(i_regs->regmap,rt1[i]|64);
3073 tl=get_reg(i_regs->regmap,rt1[i]);
3074 sh=get_reg(i_regs->regmap,rs1[i]|64);
3075 sl=get_reg(i_regs->regmap,rs1[i]);
3076 shift=get_reg(i_regs->regmap,rs2[i]);
3077 if(tl>=0){
3078 if(rs1[i]==0)
3079 {
3080 emit_zeroreg(tl);
3081 if(th>=0) emit_zeroreg(th);
3082 }
3083 else if(rs2[i]==0)
3084 {
3085 assert(sl>=0);
3086 if(sl!=tl) emit_mov(sl,tl);
3087 if(th>=0&&sh!=th) emit_mov(sh,th);
3088 }
3089 else
3090 {
3091 // FIXME: What if shift==tl ?
3092 assert(shift!=tl);
3093 int temp=get_reg(i_regs->regmap,-1);
3094 int real_th=th;
3095 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3096 assert(sl>=0);
3097 assert(sh>=0);
3098 emit_andimm(shift,31,HOST_TEMPREG);
3099 if(opcode2[i]==0x14) // DSLLV
3100 {
3101 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3102 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3103 emit_orrshr(sl,HOST_TEMPREG,th);
3104 emit_andimm(shift,31,HOST_TEMPREG);
3105 emit_testimm(shift,32);
3106 emit_shl(sl,HOST_TEMPREG,tl);
3107 if(th>=0) emit_cmovne_reg(tl,th);
3108 emit_cmovne_imm(0,tl);
3109 }
3110 if(opcode2[i]==0x16) // DSRLV
3111 {
3112 assert(th>=0);
3113 emit_shr(sl,HOST_TEMPREG,tl);
3114 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3115 emit_orrshl(sh,HOST_TEMPREG,tl);
3116 emit_andimm(shift,31,HOST_TEMPREG);
3117 emit_testimm(shift,32);
3118 emit_shr(sh,HOST_TEMPREG,th);
3119 emit_cmovne_reg(th,tl);
3120 if(real_th>=0) emit_cmovne_imm(0,th);
3121 }
3122 if(opcode2[i]==0x17) // DSRAV
3123 {
3124 assert(th>=0);
3125 emit_shr(sl,HOST_TEMPREG,tl);
3126 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3127 if(real_th>=0) {
3128 assert(temp>=0);
3129 emit_sarimm(th,31,temp);
3130 }
3131 emit_orrshl(sh,HOST_TEMPREG,tl);
3132 emit_andimm(shift,31,HOST_TEMPREG);
3133 emit_testimm(shift,32);
3134 emit_sar(sh,HOST_TEMPREG,th);
3135 emit_cmovne_reg(th,tl);
3136 if(real_th>=0) emit_cmovne_reg(temp,th);
3137 }
3138 }
3139 }
3140 }
3141 }
3142}
3143#define shift_assemble shift_assemble_arm
3144
3145void loadlr_assemble_arm(int i,struct regstat *i_regs)
3146{
3147 int s,th,tl,temp,temp2,addr,map=-1;
3148 int offset;
3149 int jaddr=0;
3150 int memtarget,c=0;
3151 u_int hr,reglist=0;
3152 th=get_reg(i_regs->regmap,rt1[i]|64);
3153 tl=get_reg(i_regs->regmap,rt1[i]);
3154 s=get_reg(i_regs->regmap,rs1[i]);
3155 temp=get_reg(i_regs->regmap,-1);
3156 temp2=get_reg(i_regs->regmap,FTEMP);
3157 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3158 assert(addr<0);
3159 offset=imm[i];
3160 for(hr=0;hr<HOST_REGS;hr++) {
3161 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3162 }
3163 reglist|=1<<temp;
3164 if(offset||s<0||c) addr=temp2;
3165 else addr=s;
3166 if(s>=0) {
3167 c=(i_regs->wasconst>>s)&1;
3168 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80800000;
3169 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3170 }
3171 if(tl>=0) {
3172 //assert(tl>=0);
3173 //assert(rt1[i]);
3174 if(!using_tlb) {
3175 if(!c) {
3176 emit_shlimm(addr,3,temp);
3177 if (opcode[i]==0x22||opcode[i]==0x26) {
3178 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3179 }else{
3180 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3181 }
3182 emit_cmpimm(addr,0x800000);
3183 jaddr=(int)out;
3184 emit_jno(0);
3185 }
3186 else {
3187 if (opcode[i]==0x22||opcode[i]==0x26) {
3188 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3189 }else{
3190 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3191 }
3192 }
3193 }else{ // using tlb
3194 int a;
3195 if(c) {
3196 a=-1;
3197 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3198 a=0xFFFFFFFC; // LWL/LWR
3199 }else{
3200 a=0xFFFFFFF8; // LDL/LDR
3201 }
3202 map=get_reg(i_regs->regmap,TLREG);
3203 assert(map>=0);
3204 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3205 if(c) {
3206 if (opcode[i]==0x22||opcode[i]==0x26) {
3207 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3208 }else{
3209 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3210 }
3211 }
3212 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3213 }
3214 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3215 if(!c||memtarget) {
3216 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3217 emit_readword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2);
3218 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3219 }
3220 else
3221 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3222 emit_andimm(temp,24,temp);
2002a1db 3223#ifdef BIG_ENDIAN_MIPS
3224 if (opcode[i]==0x26) // LWR
3225#else
3226 if (opcode[i]==0x22) // LWL
3227#endif
3228 emit_xorimm(temp,24,temp);
57871462 3229 emit_movimm(-1,HOST_TEMPREG);
3230 if (opcode[i]==0x26) {
3231 emit_shr(temp2,temp,temp2);
3232 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3233 }else{
3234 emit_shl(temp2,temp,temp2);
3235 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3236 }
3237 emit_or(temp2,tl,tl);
3238 //emit_storereg(rt1[i],tl); // DEBUG
3239 }
3240 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
2002a1db 3241 // FIXME: little endian
57871462 3242 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3243 if(!c||memtarget) {
3244 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3245 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3246 emit_readdword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2h,temp2);
3247 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3248 }
3249 else
3250 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3251 emit_testimm(temp,32);
3252 emit_andimm(temp,24,temp);
3253 if (opcode[i]==0x1A) { // LDL
3254 emit_rsbimm(temp,32,HOST_TEMPREG);
3255 emit_shl(temp2h,temp,temp2h);
3256 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3257 emit_movimm(-1,HOST_TEMPREG);
3258 emit_shl(temp2,temp,temp2);
3259 emit_cmove_reg(temp2h,th);
3260 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3261 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3262 emit_orreq(temp2,tl,tl);
3263 emit_orrne(temp2,th,th);
3264 }
3265 if (opcode[i]==0x1B) { // LDR
3266 emit_xorimm(temp,24,temp);
3267 emit_rsbimm(temp,32,HOST_TEMPREG);
3268 emit_shr(temp2,temp,temp2);
3269 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3270 emit_movimm(-1,HOST_TEMPREG);
3271 emit_shr(temp2h,temp,temp2h);
3272 emit_cmovne_reg(temp2,tl);
3273 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3274 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3275 emit_orrne(temp2h,th,th);
3276 emit_orreq(temp2h,tl,tl);
3277 }
3278 }
3279 }
3280}
3281#define loadlr_assemble loadlr_assemble_arm
3282
3283void cop0_assemble(int i,struct regstat *i_regs)
3284{
3285 if(opcode2[i]==0) // MFC0
3286 {
3287 signed char t=get_reg(i_regs->regmap,rt1[i]);
3288 char copr=(source[i]>>11)&0x1f;
3289 //assert(t>=0); // Why does this happen? OOT is weird
3290 if(t>=0) {
7139f3c8 3291#ifdef MUPEN64
57871462 3292 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3293 emit_movimm((source[i]>>11)&0x1f,1);
3294 emit_writeword(0,(int)&PC);
3295 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3296 if(copr==9) {
3297 emit_readword((int)&last_count,ECX);
3298 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3299 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3300 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3301 emit_writeword(HOST_CCREG,(int)&Count);
3302 }
3303 emit_call((int)MFC0);
3304 emit_readword((int)&readmem_dword,t);
7139f3c8 3305#else
3306 emit_readword((int)&reg_cop0+copr*4,t);
3307#endif
57871462 3308 }
3309 }
3310 else if(opcode2[i]==4) // MTC0
3311 {
3312 signed char s=get_reg(i_regs->regmap,rs1[i]);
3313 char copr=(source[i]>>11)&0x1f;
3314 assert(s>=0);
3315 emit_writeword(s,(int)&readmem_dword);
3316 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
3d624f89 3317#ifdef MUPEN64 /// FIXME
57871462 3318 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3319 emit_movimm((source[i]>>11)&0x1f,1);
3320 emit_writeword(0,(int)&PC);
3321 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3d624f89 3322#endif
7139f3c8 3323#ifdef PCSX
3324 emit_movimm(source[i],0);
3325 emit_writeword(0,(int)&psxRegs.code);
3326#endif
3327 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3328 emit_readword((int)&last_count,ECX);
3329 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3330 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3331 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3332 emit_writeword(HOST_CCREG,(int)&Count);
3333 }
3334 // What a mess. The status register (12) can enable interrupts,
3335 // so needs a special case to handle a pending interrupt.
3336 // The interrupt must be taken immediately, because a subsequent
3337 // instruction might disable interrupts again.
7139f3c8 3338 if(copr==12||copr==13) {
57871462 3339 emit_movimm(start+i*4+4,0);
3340 emit_movimm(0,1);
3341 emit_writeword(0,(int)&pcaddr);
3342 emit_writeword(1,(int)&pending_exception);
3343 }
3344 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3345 //else
3346 emit_call((int)MTC0);
7139f3c8 3347 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3348 emit_readword((int)&Count,HOST_CCREG);
3349 emit_readword((int)&next_interupt,ECX);
3350 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3351 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3352 emit_writeword(ECX,(int)&last_count);
3353 emit_storereg(CCREG,HOST_CCREG);
3354 }
7139f3c8 3355 if(copr==12||copr==13) {
57871462 3356 assert(!is_delayslot);
3357 emit_readword((int)&pending_exception,14);
3358 }
3359 emit_loadreg(rs1[i],s);
3360 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3361 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
7139f3c8 3362 if(copr==12||copr==13) {
57871462 3363 emit_test(14,14);
3364 emit_jne((int)&do_interrupt);
3365 }
3366 cop1_usable=0;
3367 }
3368 else
3369 {
3370 assert(opcode2[i]==0x10);
3d624f89 3371#ifndef DISABLE_TLB
57871462 3372 if((source[i]&0x3f)==0x01) // TLBR
3373 emit_call((int)TLBR);
3374 if((source[i]&0x3f)==0x02) // TLBWI
3375 emit_call((int)TLBWI_new);
3376 if((source[i]&0x3f)==0x06) { // TLBWR
3377 // The TLB entry written by TLBWR is dependent on the count,
3378 // so update the cycle count
3379 emit_readword((int)&last_count,ECX);
3380 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3381 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3382 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3383 emit_writeword(HOST_CCREG,(int)&Count);
3384 emit_call((int)TLBWR_new);
3385 }
3386 if((source[i]&0x3f)==0x08) // TLBP
3387 emit_call((int)TLBP);
3d624f89 3388#endif
57871462 3389 if((source[i]&0x3f)==0x18) // ERET
3390 {
3391 int count=ccadj[i];
3392 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3393 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
3394 emit_jmp((int)jump_eret);
3395 }
3396 }
3397}
3398
b9b61529 3399static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3400{
3401 switch (copr) {
3402 case 1:
3403 case 3:
3404 case 5:
3405 case 8:
3406 case 9:
3407 case 10:
3408 case 11:
3409 emit_readword((int)&reg_cop2d[copr],tl);
3410 emit_signextend16(tl,tl);
3411 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3412 break;
3413 case 7:
3414 case 16:
3415 case 17:
3416 case 18:
3417 case 19:
3418 emit_readword((int)&reg_cop2d[copr],tl);
3419 emit_andimm(tl,0xffff,tl);
3420 emit_writeword(tl,(int)&reg_cop2d[copr]);
3421 break;
3422 case 15:
3423 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3424 emit_writeword(tl,(int)&reg_cop2d[copr]);
3425 break;
3426 case 28:
3427 case 30:
3428 emit_movimm(0,tl);
3429 break;
3430 case 29:
3431 emit_readword((int)&reg_cop2d[9],temp);
3432 emit_testimm(temp,0x8000); // do we need this?
3433 emit_andimm(temp,0xf80,temp);
3434 emit_andne_imm(temp,0,temp);
3435 emit_shr(temp,7,tl);
3436 emit_readword((int)&reg_cop2d[10],temp);
3437 emit_testimm(temp,0x8000);
3438 emit_andimm(temp,0xf80,temp);
3439 emit_andne_imm(temp,0,temp);
3440 emit_orrshr(temp,2,tl);
3441 emit_readword((int)&reg_cop2d[11],temp);
3442 emit_testimm(temp,0x8000);
3443 emit_andimm(temp,0xf80,temp);
3444 emit_andne_imm(temp,0,temp);
3445 emit_orrshl(temp,3,tl);
3446 emit_writeword(tl,(int)&reg_cop2d[copr]);
3447 break;
3448 default:
3449 emit_readword((int)&reg_cop2d[copr],tl);
3450 break;
3451 }
3452}
3453
3454static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3455{
3456 switch (copr) {
3457 case 15:
3458 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3459 emit_writeword(sl,(int)&reg_cop2d[copr]);
3460 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3461 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3462 emit_writeword(sl,(int)&reg_cop2d[14]);
3463 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3464 break;
3465 case 28:
3466 emit_andimm(sl,0x001f,temp);
3467 emit_shl(temp,7,temp);
3468 emit_writeword(temp,(int)&reg_cop2d[9]);
3469 emit_andimm(sl,0x03e0,temp);
3470 emit_shl(temp,2,temp);
3471 emit_writeword(temp,(int)&reg_cop2d[10]);
3472 emit_andimm(sl,0x7c00,temp);
3473 emit_shr(temp,3,temp);
3474 emit_writeword(temp,(int)&reg_cop2d[11]);
3475 emit_writeword(sl,(int)&reg_cop2d[28]);
3476 break;
3477 case 30:
3478 emit_movs(sl,temp);
3479 emit_mvnmi(temp,temp);
3480 emit_clz(temp,temp);
3481 emit_writeword(sl,(int)&reg_cop2d[30]);
3482 emit_writeword(temp,(int)&reg_cop2d[31]);
3483 break;
3484 case 7:
3485 case 29:
3486 case 31:
3487 break;
3488 default:
3489 emit_writeword(sl,(int)&reg_cop2d[copr]);
3490 break;
3491 }
3492}
3493
3494void cop2_assemble(int i,struct regstat *i_regs)
3495{
3496 u_int copr=(source[i]>>11)&0x1f;
3497 signed char temp=get_reg(i_regs->regmap,-1);
3498 if (opcode2[i]==0) { // MFC2
3499 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3500 if(tl>=0)
3501 cop2_get_dreg(copr,tl,temp);
3502 }
3503 else if (opcode2[i]==4) { // MTC2
3504 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3505 cop2_put_dreg(copr,sl,temp);
3506 }
3507 else if (opcode2[i]==2) // CFC2
3508 {
3509 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3510 if(tl>=0)
3511 emit_readword((int)&reg_cop2c[copr],tl);
3512 }
3513 else if (opcode2[i]==6) // CTC2
3514 {
3515 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3516 switch(copr) {
3517 case 4:
3518 case 12:
3519 case 20:
3520 case 26:
3521 case 27:
3522 case 29:
3523 case 30:
3524 emit_signextend16(sl,temp);
3525 break;
3526 case 31:
3527 //value = value & 0x7ffff000;
3528 //if (value & 0x7f87e000) value |= 0x80000000;
3529 emit_shrimm(sl,12,temp);
3530 emit_shlimm(temp,12,temp);
3531 emit_testimm(temp,0x7f000000);
3532 emit_testeqimm(temp,0x00870000);
3533 emit_testeqimm(temp,0x0000e000);
3534 emit_orrne_imm(temp,0x80000000,temp);
3535 break;
3536 default:
3537 temp=sl;
3538 break;
3539 }
3540 emit_writeword(temp,(int)&reg_cop2c[copr]);
3541 assert(sl>=0);
3542 }
3543}
3544
3545void c2op_assemble(int i,struct regstat *i_regs)
3546{
3547 signed char temp=get_reg(i_regs->regmap,-1);
3548 u_int c2op=source[i]&0x3f;
3549 u_int hr,reglist=0;
3550 for(hr=0;hr<HOST_REGS;hr++) {
3551 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3552 }
3553 if(i==0||itype[i-1]!=C2OP)
3554 save_regs(reglist);
3555
3556 if (gte_handlers[c2op]!=NULL) {
3557 int cc=get_reg(i_regs->regmap,CCREG);
3558 emit_movimm(source[i],temp); // opcode
3559 if (cc>=0&&gte_cycletab[c2op])
3560 emit_addimm(cc,gte_cycletab[c2op]/2,cc); // XXX: cound just adjust ccadj?
3561 emit_writeword(temp,(int)&psxRegs.code);
3562 emit_call((int)gte_handlers[c2op]);
3563 }
3564
3565 if(i>=slen-1||itype[i+1]!=C2OP)
3566 restore_regs(reglist);
3567}
3568
3569void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 3570{
3571 // XXX: should just just do the exception instead
3572 if(!cop1_usable) {
3573 int jaddr=(int)out;
3574 emit_jmp(0);
3575 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3576 cop1_usable=1;
3577 }
3578}
3579
57871462 3580void cop1_assemble(int i,struct regstat *i_regs)
3581{
3d624f89 3582#ifndef DISABLE_COP1
57871462 3583 // Check cop1 unusable
3584 if(!cop1_usable) {
3585 signed char rs=get_reg(i_regs->regmap,CSREG);
3586 assert(rs>=0);
3587 emit_testimm(rs,0x20000000);
3588 int jaddr=(int)out;
3589 emit_jeq(0);
3590 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3591 cop1_usable=1;
3592 }
3593 if (opcode2[i]==0) { // MFC1
3594 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3595 if(tl>=0) {
3596 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
3597 emit_readword_indexed(0,tl,tl);
3598 }
3599 }
3600 else if (opcode2[i]==1) { // DMFC1
3601 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3602 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
3603 if(tl>=0) {
3604 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
3605 if(th>=0) emit_readword_indexed(4,tl,th);
3606 emit_readword_indexed(0,tl,tl);
3607 }
3608 }
3609 else if (opcode2[i]==4) { // MTC1
3610 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3611 signed char temp=get_reg(i_regs->regmap,-1);
3612 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3613 emit_writeword_indexed(sl,0,temp);
3614 }
3615 else if (opcode2[i]==5) { // DMTC1
3616 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3617 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
3618 signed char temp=get_reg(i_regs->regmap,-1);
3619 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3620 emit_writeword_indexed(sh,4,temp);
3621 emit_writeword_indexed(sl,0,temp);
3622 }
3623 else if (opcode2[i]==2) // CFC1
3624 {
3625 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3626 if(tl>=0) {
3627 u_int copr=(source[i]>>11)&0x1f;
3628 if(copr==0) emit_readword((int)&FCR0,tl);
3629 if(copr==31) emit_readword((int)&FCR31,tl);
3630 }
3631 }
3632 else if (opcode2[i]==6) // CTC1
3633 {
3634 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3635 u_int copr=(source[i]>>11)&0x1f;
3636 assert(sl>=0);
3637 if(copr==31)
3638 {
3639 emit_writeword(sl,(int)&FCR31);
3640 // Set the rounding mode
3641 //FIXME
3642 //char temp=get_reg(i_regs->regmap,-1);
3643 //emit_andimm(sl,3,temp);
3644 //emit_fldcw_indexed((int)&rounding_modes,temp);
3645 }
3646 }
3d624f89 3647#else
3648 cop1_unusable(i, i_regs);
3649#endif
57871462 3650}
3651
3652void fconv_assemble_arm(int i,struct regstat *i_regs)
3653{
3d624f89 3654#ifndef DISABLE_COP1
57871462 3655 signed char temp=get_reg(i_regs->regmap,-1);
3656 assert(temp>=0);
3657 // Check cop1 unusable
3658 if(!cop1_usable) {
3659 signed char rs=get_reg(i_regs->regmap,CSREG);
3660 assert(rs>=0);
3661 emit_testimm(rs,0x20000000);
3662 int jaddr=(int)out;
3663 emit_jeq(0);
3664 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3665 cop1_usable=1;
3666 }
3667
3668 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
3669 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
3670 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3671 emit_flds(temp,15);
3672 emit_ftosizs(15,15); // float->int, truncate
3673 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3674 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3675 emit_fsts(15,temp);
3676 return;
3677 }
3678 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
3679 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3680 emit_vldr(temp,7);
3681 emit_ftosizd(7,13); // double->int, truncate
3682 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3683 emit_fsts(13,temp);
3684 return;
3685 }
3686
3687 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
3688 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3689 emit_flds(temp,13);
3690 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3691 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3692 emit_fsitos(13,15);
3693 emit_fsts(15,temp);
3694 return;
3695 }
3696 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
3697 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3698 emit_flds(temp,13);
3699 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3700 emit_fsitod(13,7);
3701 emit_vstr(7,temp);
3702 return;
3703 }
3704
3705 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
3706 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3707 emit_flds(temp,13);
3708 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3709 emit_fcvtds(13,7);
3710 emit_vstr(7,temp);
3711 return;
3712 }
3713 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
3714 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3715 emit_vldr(temp,7);
3716 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3717 emit_fcvtsd(7,13);
3718 emit_fsts(13,temp);
3719 return;
3720 }
3721 #endif
3722
3723 // C emulation code
3724
3725 u_int hr,reglist=0;
3726 for(hr=0;hr<HOST_REGS;hr++) {
3727 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3728 }
3729 save_regs(reglist);
3730
3731 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
3732 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3733 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3734 emit_call((int)cvt_s_w);
3735 }
3736 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
3737 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3738 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3739 emit_call((int)cvt_d_w);
3740 }
3741 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
3742 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3743 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3744 emit_call((int)cvt_s_l);
3745 }
3746 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
3747 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3748 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3749 emit_call((int)cvt_d_l);
3750 }
3751
3752 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
3753 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3754 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3755 emit_call((int)cvt_d_s);
3756 }
3757 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
3758 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3759 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3760 emit_call((int)cvt_w_s);
3761 }
3762 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
3763 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3764 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3765 emit_call((int)cvt_l_s);
3766 }
3767
3768 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
3769 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3770 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3771 emit_call((int)cvt_s_d);
3772 }
3773 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
3774 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3775 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3776 emit_call((int)cvt_w_d);
3777 }
3778 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
3779 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3780 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3781 emit_call((int)cvt_l_d);
3782 }
3783
3784 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
3785 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3786 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3787 emit_call((int)round_l_s);
3788 }
3789 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
3790 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3791 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3792 emit_call((int)trunc_l_s);
3793 }
3794 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
3795 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3796 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3797 emit_call((int)ceil_l_s);
3798 }
3799 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
3800 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3801 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3802 emit_call((int)floor_l_s);
3803 }
3804 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
3805 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3806 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3807 emit_call((int)round_w_s);
3808 }
3809 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
3810 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3811 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3812 emit_call((int)trunc_w_s);
3813 }
3814 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
3815 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3816 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3817 emit_call((int)ceil_w_s);
3818 }
3819 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
3820 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3821 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3822 emit_call((int)floor_w_s);
3823 }
3824
3825 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
3826 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3827 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3828 emit_call((int)round_l_d);
3829 }
3830 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
3831 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3832 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3833 emit_call((int)trunc_l_d);
3834 }
3835 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
3836 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3837 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3838 emit_call((int)ceil_l_d);
3839 }
3840 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
3841 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3842 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3843 emit_call((int)floor_l_d);
3844 }
3845 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
3846 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3847 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3848 emit_call((int)round_w_d);
3849 }
3850 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
3851 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3852 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3853 emit_call((int)trunc_w_d);
3854 }
3855 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
3856 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3857 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3858 emit_call((int)ceil_w_d);
3859 }
3860 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
3861 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3862 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3863 emit_call((int)floor_w_d);
3864 }
3865
3866 restore_regs(reglist);
3d624f89 3867#else
3868 cop1_unusable(i, i_regs);
3869#endif
57871462 3870}
3871#define fconv_assemble fconv_assemble_arm
3872
3873void fcomp_assemble(int i,struct regstat *i_regs)
3874{
3d624f89 3875#ifndef DISABLE_COP1
57871462 3876 signed char fs=get_reg(i_regs->regmap,FSREG);
3877 signed char temp=get_reg(i_regs->regmap,-1);
3878 assert(temp>=0);
3879 // Check cop1 unusable
3880 if(!cop1_usable) {
3881 signed char cs=get_reg(i_regs->regmap,CSREG);
3882 assert(cs>=0);
3883 emit_testimm(cs,0x20000000);
3884 int jaddr=(int)out;
3885 emit_jeq(0);
3886 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
3887 cop1_usable=1;
3888 }
3889
3890 if((source[i]&0x3f)==0x30) {
3891 emit_andimm(fs,~0x800000,fs);
3892 return;
3893 }
3894
3895 if((source[i]&0x3e)==0x38) {
3896 // sf/ngle - these should throw exceptions for NaNs
3897 emit_andimm(fs,~0x800000,fs);
3898 return;
3899 }
3900
3901 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
3902 if(opcode2[i]==0x10) {
3903 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3904 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
3905 emit_orimm(fs,0x800000,fs);
3906 emit_flds(temp,14);
3907 emit_flds(HOST_TEMPREG,15);
3908 emit_fcmps(14,15);
3909 emit_fmstat();
3910 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
3911 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
3912 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
3913 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
3914 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
3915 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
3916 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
3917 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
3918 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
3919 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
3920 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
3921 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
3922 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
3923 return;
3924 }
3925 if(opcode2[i]==0x11) {
3926 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3927 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
3928 emit_orimm(fs,0x800000,fs);
3929 emit_vldr(temp,6);
3930 emit_vldr(HOST_TEMPREG,7);
3931 emit_fcmpd(6,7);
3932 emit_fmstat();
3933 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
3934 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
3935 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
3936 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
3937 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
3938 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
3939 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
3940 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
3941 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
3942 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
3943 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
3944 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
3945 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
3946 return;
3947 }
3948 #endif
3949
3950 // C only
3951
3952 u_int hr,reglist=0;
3953 for(hr=0;hr<HOST_REGS;hr++) {
3954 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3955 }
3956 reglist&=~(1<<fs);
3957 save_regs(reglist);
3958 if(opcode2[i]==0x10) {
3959 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3960 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
3961 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
3962 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
3963 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
3964 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
3965 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
3966 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
3967 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
3968 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
3969 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
3970 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
3971 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
3972 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
3973 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
3974 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
3975 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
3976 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
3977 }
3978 if(opcode2[i]==0x11) {
3979 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3980 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
3981 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
3982 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
3983 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
3984 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
3985 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
3986 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
3987 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
3988 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
3989 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
3990 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
3991 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
3992 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
3993 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
3994 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
3995 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
3996 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
3997 }
3998 restore_regs(reglist);
3999 emit_loadreg(FSREG,fs);
3d624f89 4000#else
4001 cop1_unusable(i, i_regs);
4002#endif
57871462 4003}
4004
4005void float_assemble(int i,struct regstat *i_regs)
4006{
3d624f89 4007#ifndef DISABLE_COP1
57871462 4008 signed char temp=get_reg(i_regs->regmap,-1);
4009 assert(temp>=0);
4010 // Check cop1 unusable
4011 if(!cop1_usable) {
4012 signed char cs=get_reg(i_regs->regmap,CSREG);
4013 assert(cs>=0);
4014 emit_testimm(cs,0x20000000);
4015 int jaddr=(int)out;
4016 emit_jeq(0);
4017 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4018 cop1_usable=1;
4019 }
4020
4021 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4022 if((source[i]&0x3f)==6) // mov
4023 {
4024 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4025 if(opcode2[i]==0x10) {
4026 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4027 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
4028 emit_readword_indexed(0,temp,temp);
4029 emit_writeword_indexed(temp,0,HOST_TEMPREG);
4030 }
4031 if(opcode2[i]==0x11) {
4032 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4033 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
4034 emit_vldr(temp,7);
4035 emit_vstr(7,HOST_TEMPREG);
4036 }
4037 }
4038 return;
4039 }
4040
4041 if((source[i]&0x3f)>3)
4042 {
4043 if(opcode2[i]==0x10) {
4044 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4045 emit_flds(temp,15);
4046 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4047 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4048 }
4049 if((source[i]&0x3f)==4) // sqrt
4050 emit_fsqrts(15,15);
4051 if((source[i]&0x3f)==5) // abs
4052 emit_fabss(15,15);
4053 if((source[i]&0x3f)==7) // neg
4054 emit_fnegs(15,15);
4055 emit_fsts(15,temp);
4056 }
4057 if(opcode2[i]==0x11) {
4058 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4059 emit_vldr(temp,7);
4060 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4061 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4062 }
4063 if((source[i]&0x3f)==4) // sqrt
4064 emit_fsqrtd(7,7);
4065 if((source[i]&0x3f)==5) // abs
4066 emit_fabsd(7,7);
4067 if((source[i]&0x3f)==7) // neg
4068 emit_fnegd(7,7);
4069 emit_vstr(7,temp);
4070 }
4071 return;
4072 }
4073 if((source[i]&0x3f)<4)
4074 {
4075 if(opcode2[i]==0x10) {
4076 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4077 }
4078 if(opcode2[i]==0x11) {
4079 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4080 }
4081 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
4082 if(opcode2[i]==0x10) {
4083 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4084 emit_flds(temp,15);
4085 emit_flds(HOST_TEMPREG,13);
4086 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4087 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4088 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4089 }
4090 }
4091 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
4092 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
4093 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
4094 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
4095 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4096 emit_fsts(15,HOST_TEMPREG);
4097 }else{
4098 emit_fsts(15,temp);
4099 }
4100 }
4101 else if(opcode2[i]==0x11) {
4102 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4103 emit_vldr(temp,7);
4104 emit_vldr(HOST_TEMPREG,6);
4105 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4106 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4107 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4108 }
4109 }
4110 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
4111 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
4112 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
4113 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
4114 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4115 emit_vstr(7,HOST_TEMPREG);
4116 }else{
4117 emit_vstr(7,temp);
4118 }
4119 }
4120 }
4121 else {
4122 if(opcode2[i]==0x10) {
4123 emit_flds(temp,15);
4124 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4125 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4126 }
4127 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
4128 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
4129 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
4130 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
4131 emit_fsts(15,temp);
4132 }
4133 else if(opcode2[i]==0x11) {
4134 emit_vldr(temp,7);
4135 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4136 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4137 }
4138 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
4139 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
4140 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
4141 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
4142 emit_vstr(7,temp);
4143 }
4144 }
4145 return;
4146 }
4147 #endif
4148
4149 u_int hr,reglist=0;
4150 for(hr=0;hr<HOST_REGS;hr++) {
4151 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4152 }
4153 if(opcode2[i]==0x10) { // Single precision
4154 save_regs(reglist);
4155 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4156 if((source[i]&0x3f)<4) {
4157 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4158 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
4159 }else{
4160 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4161 }
4162 switch(source[i]&0x3f)
4163 {
4164 case 0x00: emit_call((int)add_s);break;
4165 case 0x01: emit_call((int)sub_s);break;
4166 case 0x02: emit_call((int)mul_s);break;
4167 case 0x03: emit_call((int)div_s);break;
4168 case 0x04: emit_call((int)sqrt_s);break;
4169 case 0x05: emit_call((int)abs_s);break;
4170 case 0x06: emit_call((int)mov_s);break;
4171 case 0x07: emit_call((int)neg_s);break;
4172 }
4173 restore_regs(reglist);
4174 }
4175 if(opcode2[i]==0x11) { // Double precision
4176 save_regs(reglist);
4177 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4178 if((source[i]&0x3f)<4) {
4179 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4180 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
4181 }else{
4182 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4183 }
4184 switch(source[i]&0x3f)
4185 {
4186 case 0x00: emit_call((int)add_d);break;
4187 case 0x01: emit_call((int)sub_d);break;
4188 case 0x02: emit_call((int)mul_d);break;
4189 case 0x03: emit_call((int)div_d);break;
4190 case 0x04: emit_call((int)sqrt_d);break;
4191 case 0x05: emit_call((int)abs_d);break;
4192 case 0x06: emit_call((int)mov_d);break;
4193 case 0x07: emit_call((int)neg_d);break;
4194 }
4195 restore_regs(reglist);
4196 }
3d624f89 4197#else
4198 cop1_unusable(i, i_regs);
4199#endif
57871462 4200}
4201
4202void multdiv_assemble_arm(int i,struct regstat *i_regs)
4203{
4204 // case 0x18: MULT
4205 // case 0x19: MULTU
4206 // case 0x1A: DIV
4207 // case 0x1B: DIVU
4208 // case 0x1C: DMULT
4209 // case 0x1D: DMULTU
4210 // case 0x1E: DDIV
4211 // case 0x1F: DDIVU
4212 if(rs1[i]&&rs2[i])
4213 {
4214 if((opcode2[i]&4)==0) // 32-bit
4215 {
4216 if(opcode2[i]==0x18) // MULT
4217 {
4218 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4219 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4220 signed char hi=get_reg(i_regs->regmap,HIREG);
4221 signed char lo=get_reg(i_regs->regmap,LOREG);
4222 assert(m1>=0);
4223 assert(m2>=0);
4224 assert(hi>=0);
4225 assert(lo>=0);
4226 emit_smull(m1,m2,hi,lo);
4227 }
4228 if(opcode2[i]==0x19) // MULTU
4229 {
4230 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4231 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4232 signed char hi=get_reg(i_regs->regmap,HIREG);
4233 signed char lo=get_reg(i_regs->regmap,LOREG);
4234 assert(m1>=0);
4235 assert(m2>=0);
4236 assert(hi>=0);
4237 assert(lo>=0);
4238 emit_umull(m1,m2,hi,lo);
4239 }
4240 if(opcode2[i]==0x1A) // DIV
4241 {
4242 signed char d1=get_reg(i_regs->regmap,rs1[i]);
4243 signed char d2=get_reg(i_regs->regmap,rs2[i]);
4244 assert(d1>=0);
4245 assert(d2>=0);
4246 signed char quotient=get_reg(i_regs->regmap,LOREG);
4247 signed char remainder=get_reg(i_regs->regmap,HIREG);
4248 assert(quotient>=0);
4249 assert(remainder>=0);
4250 emit_movs(d1,remainder);
4251 emit_negmi(remainder,remainder);
4252 emit_movs(d2,HOST_TEMPREG);
4253 emit_jeq((int)out+52); // Division by zero
4254 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
4255 emit_clz(HOST_TEMPREG,quotient);
4256 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
4257 emit_orimm(quotient,1<<31,quotient);
4258 emit_shr(quotient,quotient,quotient);
4259 emit_cmp(remainder,HOST_TEMPREG);
4260 emit_subcs(remainder,HOST_TEMPREG,remainder);
4261 emit_adcs(quotient,quotient,quotient);
4262 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
4263 emit_jcc((int)out-16); // -4
4264 emit_teq(d1,d2);
4265 emit_negmi(quotient,quotient);
4266 emit_test(d1,d1);
4267 emit_negmi(remainder,remainder);
4268 }
4269 if(opcode2[i]==0x1B) // DIVU
4270 {
4271 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
4272 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
4273 assert(d1>=0);
4274 assert(d2>=0);
4275 signed char quotient=get_reg(i_regs->regmap,LOREG);
4276 signed char remainder=get_reg(i_regs->regmap,HIREG);
4277 assert(quotient>=0);
4278 assert(remainder>=0);
4279 emit_test(d2,d2);
4280 emit_jeq((int)out+44); // Division by zero
4281 emit_clz(d2,HOST_TEMPREG);
4282 emit_movimm(1<<31,quotient);
4283 emit_shl(d2,HOST_TEMPREG,d2);
4284 emit_mov(d1,remainder);
4285 emit_shr(quotient,HOST_TEMPREG,quotient);
4286 emit_cmp(remainder,d2);
4287 emit_subcs(remainder,d2,remainder);
4288 emit_adcs(quotient,quotient,quotient);
4289 emit_shrcc_imm(d2,1,d2);
4290 emit_jcc((int)out-16); // -4
4291 }
4292 }
4293 else // 64-bit
4294 {
4295 if(opcode2[i]==0x1C) // DMULT
4296 {
4297 assert(opcode2[i]!=0x1C);
4298 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4299 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4300 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4301 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4302 assert(m1h>=0);
4303 assert(m2h>=0);
4304 assert(m1l>=0);
4305 assert(m2l>=0);
4306 emit_pushreg(m2h);
4307 emit_pushreg(m2l);
4308 emit_pushreg(m1h);
4309 emit_pushreg(m1l);
4310 emit_call((int)&mult64);
4311 emit_popreg(m1l);
4312 emit_popreg(m1h);
4313 emit_popreg(m2l);
4314 emit_popreg(m2h);
4315 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4316 signed char hil=get_reg(i_regs->regmap,HIREG);
4317 if(hih>=0) emit_loadreg(HIREG|64,hih);
4318 if(hil>=0) emit_loadreg(HIREG,hil);
4319 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4320 signed char lol=get_reg(i_regs->regmap,LOREG);
4321 if(loh>=0) emit_loadreg(LOREG|64,loh);
4322 if(lol>=0) emit_loadreg(LOREG,lol);
4323 }
4324 if(opcode2[i]==0x1D) // DMULTU
4325 {
4326 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4327 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4328 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4329 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4330 assert(m1h>=0);
4331 assert(m2h>=0);
4332 assert(m1l>=0);
4333 assert(m2l>=0);
4334 save_regs(0x100f);
4335 if(m1l!=0) emit_mov(m1l,0);
4336 if(m1h==0) emit_readword((int)&dynarec_local,1);
4337 else if(m1h>1) emit_mov(m1h,1);
4338 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
4339 else if(m2l>2) emit_mov(m2l,2);
4340 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
4341 else if(m2h>3) emit_mov(m2h,3);
4342 emit_call((int)&multu64);
4343 restore_regs(0x100f);
4344 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4345 signed char hil=get_reg(i_regs->regmap,HIREG);
4346 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4347 signed char lol=get_reg(i_regs->regmap,LOREG);
4348 /*signed char temp=get_reg(i_regs->regmap,-1);
4349 signed char rh=get_reg(i_regs->regmap,HIREG|64);
4350 signed char rl=get_reg(i_regs->regmap,HIREG);
4351 assert(m1h>=0);
4352 assert(m2h>=0);
4353 assert(m1l>=0);
4354 assert(m2l>=0);
4355 assert(temp>=0);
4356 //emit_mov(m1l,EAX);
4357 //emit_mul(m2l);
4358 emit_umull(rl,rh,m1l,m2l);
4359 emit_storereg(LOREG,rl);
4360 emit_mov(rh,temp);
4361 //emit_mov(m1h,EAX);
4362 //emit_mul(m2l);
4363 emit_umull(rl,rh,m1h,m2l);
4364 emit_adds(rl,temp,temp);
4365 emit_adcimm(rh,0,rh);
4366 emit_storereg(HIREG,rh);
4367 //emit_mov(m2h,EAX);
4368 //emit_mul(m1l);
4369 emit_umull(rl,rh,m1l,m2h);
4370 emit_adds(rl,temp,temp);
4371 emit_adcimm(rh,0,rh);
4372 emit_storereg(LOREG|64,temp);
4373 emit_mov(rh,temp);
4374 //emit_mov(m2h,EAX);
4375 //emit_mul(m1h);
4376 emit_umull(rl,rh,m1h,m2h);
4377 emit_adds(rl,temp,rl);
4378 emit_loadreg(HIREG,temp);
4379 emit_adcimm(rh,0,rh);
4380 emit_adds(rl,temp,rl);
4381 emit_adcimm(rh,0,rh);
4382 // DEBUG
4383 /*
4384 emit_pushreg(m2h);
4385 emit_pushreg(m2l);
4386 emit_pushreg(m1h);
4387 emit_pushreg(m1l);
4388 emit_call((int)&multu64);
4389 emit_popreg(m1l);
4390 emit_popreg(m1h);
4391 emit_popreg(m2l);
4392 emit_popreg(m2h);
4393 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4394 signed char hil=get_reg(i_regs->regmap,HIREG);
4395 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
4396 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
4397 */
4398 // Shouldn't be necessary
4399 //char loh=get_reg(i_regs->regmap,LOREG|64);
4400 //char lol=get_reg(i_regs->regmap,LOREG);
4401 //if(loh>=0) emit_loadreg(LOREG|64,loh);
4402 //if(lol>=0) emit_loadreg(LOREG,lol);
4403 }
4404 if(opcode2[i]==0x1E) // DDIV
4405 {
4406 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4407 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4408 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4409 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4410 assert(d1h>=0);
4411 assert(d2h>=0);
4412 assert(d1l>=0);
4413 assert(d2l>=0);
4414 save_regs(0x100f);
4415 if(d1l!=0) emit_mov(d1l,0);
4416 if(d1h==0) emit_readword((int)&dynarec_local,1);
4417 else if(d1h>1) emit_mov(d1h,1);
4418 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4419 else if(d2l>2) emit_mov(d2l,2);
4420 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4421 else if(d2h>3) emit_mov(d2h,3);
4422 emit_call((int)&div64);
4423 restore_regs(0x100f);
4424 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4425 signed char hil=get_reg(i_regs->regmap,HIREG);
4426 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4427 signed char lol=get_reg(i_regs->regmap,LOREG);
4428 if(hih>=0) emit_loadreg(HIREG|64,hih);
4429 if(hil>=0) emit_loadreg(HIREG,hil);
4430 if(loh>=0) emit_loadreg(LOREG|64,loh);
4431 if(lol>=0) emit_loadreg(LOREG,lol);
4432 }
4433 if(opcode2[i]==0x1F) // DDIVU
4434 {
4435 //u_int hr,reglist=0;
4436 //for(hr=0;hr<HOST_REGS;hr++) {
4437 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
4438 //}
4439 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4440 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4441 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4442 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4443 assert(d1h>=0);
4444 assert(d2h>=0);
4445 assert(d1l>=0);
4446 assert(d2l>=0);
4447 save_regs(0x100f);
4448 if(d1l!=0) emit_mov(d1l,0);
4449 if(d1h==0) emit_readword((int)&dynarec_local,1);
4450 else if(d1h>1) emit_mov(d1h,1);
4451 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4452 else if(d2l>2) emit_mov(d2l,2);
4453 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4454 else if(d2h>3) emit_mov(d2h,3);
4455 emit_call((int)&divu64);
4456 restore_regs(0x100f);
4457 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4458 signed char hil=get_reg(i_regs->regmap,HIREG);
4459 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4460 signed char lol=get_reg(i_regs->regmap,LOREG);
4461 if(hih>=0) emit_loadreg(HIREG|64,hih);
4462 if(hil>=0) emit_loadreg(HIREG,hil);
4463 if(loh>=0) emit_loadreg(LOREG|64,loh);
4464 if(lol>=0) emit_loadreg(LOREG,lol);
4465 }
4466 }
4467 }
4468 else
4469 {
4470 // Multiply by zero is zero.
4471 // MIPS does not have a divide by zero exception.
4472 // The result is undefined, we return zero.
4473 signed char hr=get_reg(i_regs->regmap,HIREG);
4474 signed char lr=get_reg(i_regs->regmap,LOREG);
4475 if(hr>=0) emit_zeroreg(hr);
4476 if(lr>=0) emit_zeroreg(lr);
4477 }
4478}
4479#define multdiv_assemble multdiv_assemble_arm
4480
4481void do_preload_rhash(int r) {
4482 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
4483 // register. On ARM the hash can be done with a single instruction (below)
4484}
4485
4486void do_preload_rhtbl(int ht) {
4487 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
4488}
4489
4490void do_rhash(int rs,int rh) {
4491 emit_andimm(rs,0xf8,rh);
4492}
4493
4494void do_miniht_load(int ht,int rh) {
4495 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
4496 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
4497}
4498
4499void do_miniht_jump(int rs,int rh,int ht) {
4500 emit_cmp(rh,rs);
4501 emit_ldreq_indexed(ht,4,15);
4502 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4503 emit_mov(rs,7);
4504 emit_jmp(jump_vaddr_reg[7]);
4505 #else
4506 emit_jmp(jump_vaddr_reg[rs]);
4507 #endif
4508}
4509
4510void do_miniht_insert(u_int return_address,int rt,int temp) {
4511 #ifdef ARMv5_ONLY
4512 emit_movimm(return_address,rt); // PC into link register
4513 add_to_linker((int)out,return_address,1);
4514 emit_pcreladdr(temp);
4515 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4516 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4517 #else
4518 emit_movw(return_address&0x0000FFFF,rt);
4519 add_to_linker((int)out,return_address,1);
4520 emit_pcreladdr(temp);
4521 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4522 emit_movt(return_address&0xFFFF0000,rt);
4523 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4524 #endif
4525}
4526
4527// Sign-extend to 64 bits and write out upper half of a register
4528// This is useful where we have a 32-bit value in a register, and want to
4529// keep it in a 32-bit register, but can't guarantee that it won't be read
4530// as a 64-bit value later.
4531void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
4532{
24385cae 4533#ifndef FORCE32
57871462 4534 if(is32_pre==is32) return;
4535 int hr,reg;
4536 for(hr=0;hr<HOST_REGS;hr++) {
4537 if(hr!=EXCLUDE_REG) {
4538 //if(pre[hr]==entry[hr]) {
4539 if((reg=pre[hr])>=0) {
4540 if((dirty>>hr)&1) {
4541 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
4542 emit_sarimm(hr,31,HOST_TEMPREG);
4543 emit_storereg(reg|64,HOST_TEMPREG);
4544 }
4545 }
4546 }
4547 //}
4548 }
4549 }
24385cae 4550#endif
57871462 4551}
4552
4553void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
4554{
4555 //if(dirty_pre==dirty) return;
4556 int hr,reg,new_hr;
4557 for(hr=0;hr<HOST_REGS;hr++) {
4558 if(hr!=EXCLUDE_REG) {
4559 reg=pre[hr];
4560 if(((~u)>>(reg&63))&1) {
4561 if(reg==entry[hr]||(reg>0&&entry[hr]<0)) {
4562 if(((dirty_pre&~dirty)>>hr)&1) {
4563 if(reg>0&&reg<34) {
4564 emit_storereg(reg,hr);
4565 if( ((is32_pre&~uu)>>reg)&1 ) {
4566 emit_sarimm(hr,31,HOST_TEMPREG);
4567 emit_storereg(reg|64,HOST_TEMPREG);
4568 }
4569 }
4570 else if(reg>=64) {
4571 emit_storereg(reg,hr);
4572 }
4573 }
4574 }
4575 else // Check if register moved to a different register
4576 if((new_hr=get_reg(entry,reg))>=0) {
4577 if((dirty_pre>>hr)&(~dirty>>new_hr)&1) {
4578 if(reg>0&&reg<34) {
4579 emit_storereg(reg,hr);
4580 if( ((is32_pre&~uu)>>reg)&1 ) {
4581 emit_sarimm(hr,31,HOST_TEMPREG);
4582 emit_storereg(reg|64,HOST_TEMPREG);
4583 }
4584 }
4585 else if(reg>=64) {
4586 emit_storereg(reg,hr);
4587 }
4588 }
4589 }
4590 }
4591 }
4592 }
4593}
4594
4595
4596/* using strd could possibly help but you'd have to allocate registers in pairs
4597void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
4598{
4599 int hr;
4600 int wrote=-1;
4601 for(hr=HOST_REGS-1;hr>=0;hr--) {
4602 if(hr!=EXCLUDE_REG) {
4603 if(pre[hr]!=entry[hr]) {
4604 if(pre[hr]>=0) {
4605 if((dirty>>hr)&1) {
4606 if(get_reg(entry,pre[hr])<0) {
4607 if(pre[hr]<64) {
4608 if(!((u>>pre[hr])&1)) {
4609 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4610 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4611 emit_sarimm(hr,31,hr+1);
4612 emit_strdreg(pre[hr],hr);
4613 }
4614 else
4615 emit_storereg(pre[hr],hr);
4616 }else{
4617 emit_storereg(pre[hr],hr);
4618 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4619 emit_sarimm(hr,31,hr);
4620 emit_storereg(pre[hr]|64,hr);
4621 }
4622 }
4623 }
4624 }else{
4625 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4626 emit_storereg(pre[hr],hr);
4627 }
4628 }
4629 wrote=hr;
4630 }
4631 }
4632 }
4633 }
4634 }
4635 }
4636 for(hr=0;hr<HOST_REGS;hr++) {
4637 if(hr!=EXCLUDE_REG) {
4638 if(pre[hr]!=entry[hr]) {
4639 if(pre[hr]>=0) {
4640 int nr;
4641 if((nr=get_reg(entry,pre[hr]))>=0) {
4642 emit_mov(hr,nr);
4643 }
4644 }
4645 }
4646 }
4647 }
4648}
4649#define wb_invalidate wb_invalidate_arm
4650*/
4651
4652// CPU-architecture-specific initialization
4653void arch_init() {
3d624f89 4654#ifndef DISABLE_COP1
57871462 4655 rounding_modes[0]=0x0<<22; // round
4656 rounding_modes[1]=0x3<<22; // trunc
4657 rounding_modes[2]=0x1<<22; // ceil
4658 rounding_modes[3]=0x2<<22; // floor
3d624f89 4659#endif
57871462 4660}
b9b61529 4661
4662// vim:shiftwidth=2:expandtab