drc: initial cop2/gte implementation (works, mostly)
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_arm.c *
3 * Copyright (C) 2009-2010 Ari64 *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21extern int cycle_count;
22extern int last_count;
23extern int pcaddr;
24extern int pending_exception;
25extern int branch_target;
26extern uint64_t readmem_dword;
27#ifdef MUPEN64
28extern precomp_instr fake_pc;
29#endif
30extern void *dynarec_local;
31extern u_int memory_map[1048576];
32extern u_int mini_ht[32][2];
33extern u_int rounding_modes[4];
34
35void indirect_jump_indexed();
36void indirect_jump();
37void do_interrupt();
38void jump_vaddr_r0();
39void jump_vaddr_r1();
40void jump_vaddr_r2();
41void jump_vaddr_r3();
42void jump_vaddr_r4();
43void jump_vaddr_r5();
44void jump_vaddr_r6();
45void jump_vaddr_r7();
46void jump_vaddr_r8();
47void jump_vaddr_r9();
48void jump_vaddr_r10();
49void jump_vaddr_r12();
50
51const u_int jump_vaddr_reg[16] = {
52 (int)jump_vaddr_r0,
53 (int)jump_vaddr_r1,
54 (int)jump_vaddr_r2,
55 (int)jump_vaddr_r3,
56 (int)jump_vaddr_r4,
57 (int)jump_vaddr_r5,
58 (int)jump_vaddr_r6,
59 (int)jump_vaddr_r7,
60 (int)jump_vaddr_r8,
61 (int)jump_vaddr_r9,
62 (int)jump_vaddr_r10,
63 0,
64 (int)jump_vaddr_r12,
65 0,
66 0,
67 0};
68
69#include "fpu.h"
70
71/* Linker */
72
73void set_jump_target(int addr,u_int target)
74{
75 u_char *ptr=(u_char *)addr;
76 u_int *ptr2=(u_int *)ptr;
77 if(ptr[3]==0xe2) {
78 assert((target-(u_int)ptr2-8)<1024);
79 assert((addr&3)==0);
80 assert((target&3)==0);
81 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
82 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
83 }
84 else if(ptr[3]==0x72) {
85 // generated by emit_jno_unlikely
86 if((target-(u_int)ptr2-8)<1024) {
87 assert((addr&3)==0);
88 assert((target&3)==0);
89 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
90 }
91 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
92 assert((addr&3)==0);
93 assert((target&3)==0);
94 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
95 }
96 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
97 }
98 else {
99 assert((ptr[3]&0x0e)==0xa);
100 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
101 }
102}
103
104// This optionally copies the instruction from the target of the branch into
105// the space before the branch. Works, but the difference in speed is
106// usually insignificant.
107void set_jump_target_fillslot(int addr,u_int target,int copy)
108{
109 u_char *ptr=(u_char *)addr;
110 u_int *ptr2=(u_int *)ptr;
111 assert(!copy||ptr2[-1]==0xe28dd000);
112 if(ptr[3]==0xe2) {
113 assert(!copy);
114 assert((target-(u_int)ptr2-8)<4096);
115 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
116 }
117 else {
118 assert((ptr[3]&0x0e)==0xa);
119 u_int target_insn=*(u_int *)target;
120 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
121 copy=0;
122 }
123 if((target_insn&0x0c100000)==0x04100000) { // Load
124 copy=0;
125 }
126 if(target_insn&0x08000000) {
127 copy=0;
128 }
129 if(copy) {
130 ptr2[-1]=target_insn;
131 target+=4;
132 }
133 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
134 }
135}
136
137/* Literal pool */
138add_literal(int addr,int val)
139{
140 literals[literalcount][0]=addr;
141 literals[literalcount][1]=val;
142 literalcount++;
143}
144
145void kill_pointer(void *stub)
146{
147 int *ptr=(int *)(stub+4);
148 assert((*ptr&0x0ff00000)==0x05900000);
149 u_int offset=*ptr&0xfff;
150 int **l_ptr=(void *)ptr+offset+8;
151 int *i_ptr=*l_ptr;
152 set_jump_target((int)i_ptr,(int)stub);
153}
154
155int get_pointer(void *stub)
156{
157 //printf("get_pointer(%x)\n",(int)stub);
158 int *ptr=(int *)(stub+4);
159 assert((*ptr&0x0ff00000)==0x05900000);
160 u_int offset=*ptr&0xfff;
161 int **l_ptr=(void *)ptr+offset+8;
162 int *i_ptr=*l_ptr;
163 assert((*i_ptr&0x0f000000)==0x0a000000);
164 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
165}
166
167// Find the "clean" entry point from a "dirty" entry point
168// by skipping past the call to verify_code
169u_int get_clean_addr(int addr)
170{
171 int *ptr=(int *)addr;
172 #ifdef ARMv5_ONLY
173 ptr+=4;
174 #else
175 ptr+=6;
176 #endif
177 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
178 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
179 ptr++;
180 if((*ptr&0xFF000000)==0xea000000) {
181 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
182 }
183 return (u_int)ptr;
184}
185
186int verify_dirty(int addr)
187{
188 u_int *ptr=(u_int *)addr;
189 #ifdef ARMv5_ONLY
190 // get from literal pool
191 assert((*ptr&0xFFF00000)==0xe5900000);
192 u_int offset=*ptr&0xfff;
193 u_int *l_ptr=(void *)ptr+offset+8;
194 u_int source=l_ptr[0];
195 u_int copy=l_ptr[1];
196 u_int len=l_ptr[2];
197 ptr+=4;
198 #else
199 // ARMv7 movw/movt
200 assert((*ptr&0xFFF00000)==0xe3000000);
201 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
202 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
203 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
204 ptr+=6;
205 #endif
206 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
207 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
208 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
209 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
210 unsigned int page=source>>12;
211 unsigned int map_value=memory_map[page];
212 if(map_value>=0x80000000) return 0;
213 while(page<((source+len-1)>>12)) {
214 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
215 }
216 source = source+(map_value<<2);
217 }
218 //printf("verify_dirty: %x %x %x\n",source,copy,len);
219 return !memcmp((void *)source,(void *)copy,len);
220}
221
222// This doesn't necessarily find all clean entry points, just
223// guarantees that it's not dirty
224int isclean(int addr)
225{
226 #ifdef ARMv5_ONLY
227 int *ptr=((u_int *)addr)+4;
228 #else
229 int *ptr=((u_int *)addr)+6;
230 #endif
231 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
232 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
233 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
234 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
235 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
236 return 1;
237}
238
239void get_bounds(int addr,u_int *start,u_int *end)
240{
241 u_int *ptr=(u_int *)addr;
242 #ifdef ARMv5_ONLY
243 // get from literal pool
244 assert((*ptr&0xFFF00000)==0xe5900000);
245 u_int offset=*ptr&0xfff;
246 u_int *l_ptr=(void *)ptr+offset+8;
247 u_int source=l_ptr[0];
248 //u_int copy=l_ptr[1];
249 u_int len=l_ptr[2];
250 ptr+=4;
251 #else
252 // ARMv7 movw/movt
253 assert((*ptr&0xFFF00000)==0xe3000000);
254 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
255 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
256 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
257 ptr+=6;
258 #endif
259 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
260 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
261 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
262 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
263 if(memory_map[source>>12]>=0x80000000) source = 0;
264 else source = source+(memory_map[source>>12]<<2);
265 }
266 *start=source;
267 *end=source+len;
268}
269
270/* Register allocation */
271
272// Note: registers are allocated clean (unmodified state)
273// if you intend to modify the register, you must call dirty_reg().
274void alloc_reg(struct regstat *cur,int i,signed char reg)
275{
276 int r,hr;
277 int preferred_reg = (reg&7);
278 if(reg==CCREG) preferred_reg=HOST_CCREG;
279 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
280
281 // Don't allocate unused registers
282 if((cur->u>>reg)&1) return;
283
284 // see if it's already allocated
285 for(hr=0;hr<HOST_REGS;hr++)
286 {
287 if(cur->regmap[hr]==reg) return;
288 }
289
290 // Keep the same mapping if the register was already allocated in a loop
291 preferred_reg = loop_reg(i,reg,preferred_reg);
292
293 // Try to allocate the preferred register
294 if(cur->regmap[preferred_reg]==-1) {
295 cur->regmap[preferred_reg]=reg;
296 cur->dirty&=~(1<<preferred_reg);
297 cur->isconst&=~(1<<preferred_reg);
298 return;
299 }
300 r=cur->regmap[preferred_reg];
301 if(r<64&&((cur->u>>r)&1)) {
302 cur->regmap[preferred_reg]=reg;
303 cur->dirty&=~(1<<preferred_reg);
304 cur->isconst&=~(1<<preferred_reg);
305 return;
306 }
307 if(r>=64&&((cur->uu>>(r&63))&1)) {
308 cur->regmap[preferred_reg]=reg;
309 cur->dirty&=~(1<<preferred_reg);
310 cur->isconst&=~(1<<preferred_reg);
311 return;
312 }
313
314 // Clear any unneeded registers
315 // We try to keep the mapping consistent, if possible, because it
316 // makes branches easier (especially loops). So we try to allocate
317 // first (see above) before removing old mappings. If this is not
318 // possible then go ahead and clear out the registers that are no
319 // longer needed.
320 for(hr=0;hr<HOST_REGS;hr++)
321 {
322 r=cur->regmap[hr];
323 if(r>=0) {
324 if(r<64) {
325 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
326 }
327 else
328 {
329 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
330 }
331 }
332 }
333 // Try to allocate any available register, but prefer
334 // registers that have not been used recently.
335 if(i>0) {
336 for(hr=0;hr<HOST_REGS;hr++) {
337 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
338 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
339 cur->regmap[hr]=reg;
340 cur->dirty&=~(1<<hr);
341 cur->isconst&=~(1<<hr);
342 return;
343 }
344 }
345 }
346 }
347 // Try to allocate any available register
348 for(hr=0;hr<HOST_REGS;hr++) {
349 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
350 cur->regmap[hr]=reg;
351 cur->dirty&=~(1<<hr);
352 cur->isconst&=~(1<<hr);
353 return;
354 }
355 }
356
357 // Ok, now we have to evict someone
358 // Pick a register we hopefully won't need soon
359 u_char hsn[MAXREG+1];
360 memset(hsn,10,sizeof(hsn));
361 int j;
362 lsn(hsn,i,&preferred_reg);
363 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
364 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
365 if(i>0) {
366 // Don't evict the cycle count at entry points, otherwise the entry
367 // stub will have to write it.
368 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
369 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
370 for(j=10;j>=3;j--)
371 {
372 // Alloc preferred register if available
373 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
374 for(hr=0;hr<HOST_REGS;hr++) {
375 // Evict both parts of a 64-bit register
376 if((cur->regmap[hr]&63)==r) {
377 cur->regmap[hr]=-1;
378 cur->dirty&=~(1<<hr);
379 cur->isconst&=~(1<<hr);
380 }
381 }
382 cur->regmap[preferred_reg]=reg;
383 return;
384 }
385 for(r=1;r<=MAXREG;r++)
386 {
387 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
388 for(hr=0;hr<HOST_REGS;hr++) {
389 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
390 if(cur->regmap[hr]==r+64) {
391 cur->regmap[hr]=reg;
392 cur->dirty&=~(1<<hr);
393 cur->isconst&=~(1<<hr);
394 return;
395 }
396 }
397 }
398 for(hr=0;hr<HOST_REGS;hr++) {
399 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
400 if(cur->regmap[hr]==r) {
401 cur->regmap[hr]=reg;
402 cur->dirty&=~(1<<hr);
403 cur->isconst&=~(1<<hr);
404 return;
405 }
406 }
407 }
408 }
409 }
410 }
411 }
412 for(j=10;j>=0;j--)
413 {
414 for(r=1;r<=MAXREG;r++)
415 {
416 if(hsn[r]==j) {
417 for(hr=0;hr<HOST_REGS;hr++) {
418 if(cur->regmap[hr]==r+64) {
419 cur->regmap[hr]=reg;
420 cur->dirty&=~(1<<hr);
421 cur->isconst&=~(1<<hr);
422 return;
423 }
424 }
425 for(hr=0;hr<HOST_REGS;hr++) {
426 if(cur->regmap[hr]==r) {
427 cur->regmap[hr]=reg;
428 cur->dirty&=~(1<<hr);
429 cur->isconst&=~(1<<hr);
430 return;
431 }
432 }
433 }
434 }
435 }
436 printf("This shouldn't happen (alloc_reg)");exit(1);
437}
438
439void alloc_reg64(struct regstat *cur,int i,signed char reg)
440{
441 int preferred_reg = 8+(reg&1);
442 int r,hr;
443
444 // allocate the lower 32 bits
445 alloc_reg(cur,i,reg);
446
447 // Don't allocate unused registers
448 if((cur->uu>>reg)&1) return;
449
450 // see if the upper half is already allocated
451 for(hr=0;hr<HOST_REGS;hr++)
452 {
453 if(cur->regmap[hr]==reg+64) return;
454 }
455
456 // Keep the same mapping if the register was already allocated in a loop
457 preferred_reg = loop_reg(i,reg,preferred_reg);
458
459 // Try to allocate the preferred register
460 if(cur->regmap[preferred_reg]==-1) {
461 cur->regmap[preferred_reg]=reg|64;
462 cur->dirty&=~(1<<preferred_reg);
463 cur->isconst&=~(1<<preferred_reg);
464 return;
465 }
466 r=cur->regmap[preferred_reg];
467 if(r<64&&((cur->u>>r)&1)) {
468 cur->regmap[preferred_reg]=reg|64;
469 cur->dirty&=~(1<<preferred_reg);
470 cur->isconst&=~(1<<preferred_reg);
471 return;
472 }
473 if(r>=64&&((cur->uu>>(r&63))&1)) {
474 cur->regmap[preferred_reg]=reg|64;
475 cur->dirty&=~(1<<preferred_reg);
476 cur->isconst&=~(1<<preferred_reg);
477 return;
478 }
479
480 // Clear any unneeded registers
481 // We try to keep the mapping consistent, if possible, because it
482 // makes branches easier (especially loops). So we try to allocate
483 // first (see above) before removing old mappings. If this is not
484 // possible then go ahead and clear out the registers that are no
485 // longer needed.
486 for(hr=HOST_REGS-1;hr>=0;hr--)
487 {
488 r=cur->regmap[hr];
489 if(r>=0) {
490 if(r<64) {
491 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
492 }
493 else
494 {
495 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
496 }
497 }
498 }
499 // Try to allocate any available register, but prefer
500 // registers that have not been used recently.
501 if(i>0) {
502 for(hr=0;hr<HOST_REGS;hr++) {
503 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
504 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
505 cur->regmap[hr]=reg|64;
506 cur->dirty&=~(1<<hr);
507 cur->isconst&=~(1<<hr);
508 return;
509 }
510 }
511 }
512 }
513 // Try to allocate any available register
514 for(hr=0;hr<HOST_REGS;hr++) {
515 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
516 cur->regmap[hr]=reg|64;
517 cur->dirty&=~(1<<hr);
518 cur->isconst&=~(1<<hr);
519 return;
520 }
521 }
522
523 // Ok, now we have to evict someone
524 // Pick a register we hopefully won't need soon
525 u_char hsn[MAXREG+1];
526 memset(hsn,10,sizeof(hsn));
527 int j;
528 lsn(hsn,i,&preferred_reg);
529 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
530 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
531 if(i>0) {
532 // Don't evict the cycle count at entry points, otherwise the entry
533 // stub will have to write it.
534 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
535 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
536 for(j=10;j>=3;j--)
537 {
538 // Alloc preferred register if available
539 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
540 for(hr=0;hr<HOST_REGS;hr++) {
541 // Evict both parts of a 64-bit register
542 if((cur->regmap[hr]&63)==r) {
543 cur->regmap[hr]=-1;
544 cur->dirty&=~(1<<hr);
545 cur->isconst&=~(1<<hr);
546 }
547 }
548 cur->regmap[preferred_reg]=reg|64;
549 return;
550 }
551 for(r=1;r<=MAXREG;r++)
552 {
553 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
554 for(hr=0;hr<HOST_REGS;hr++) {
555 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
556 if(cur->regmap[hr]==r+64) {
557 cur->regmap[hr]=reg|64;
558 cur->dirty&=~(1<<hr);
559 cur->isconst&=~(1<<hr);
560 return;
561 }
562 }
563 }
564 for(hr=0;hr<HOST_REGS;hr++) {
565 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
566 if(cur->regmap[hr]==r) {
567 cur->regmap[hr]=reg|64;
568 cur->dirty&=~(1<<hr);
569 cur->isconst&=~(1<<hr);
570 return;
571 }
572 }
573 }
574 }
575 }
576 }
577 }
578 for(j=10;j>=0;j--)
579 {
580 for(r=1;r<=MAXREG;r++)
581 {
582 if(hsn[r]==j) {
583 for(hr=0;hr<HOST_REGS;hr++) {
584 if(cur->regmap[hr]==r+64) {
585 cur->regmap[hr]=reg|64;
586 cur->dirty&=~(1<<hr);
587 cur->isconst&=~(1<<hr);
588 return;
589 }
590 }
591 for(hr=0;hr<HOST_REGS;hr++) {
592 if(cur->regmap[hr]==r) {
593 cur->regmap[hr]=reg|64;
594 cur->dirty&=~(1<<hr);
595 cur->isconst&=~(1<<hr);
596 return;
597 }
598 }
599 }
600 }
601 }
602 printf("This shouldn't happen");exit(1);
603}
604
605// Allocate a temporary register. This is done without regard to
606// dirty status or whether the register we request is on the unneeded list
607// Note: This will only allocate one register, even if called multiple times
608void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
609{
610 int r,hr;
611 int preferred_reg = -1;
612
613 // see if it's already allocated
614 for(hr=0;hr<HOST_REGS;hr++)
615 {
616 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
617 }
618
619 // Try to allocate any available register
620 for(hr=HOST_REGS-1;hr>=0;hr--) {
621 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
622 cur->regmap[hr]=reg;
623 cur->dirty&=~(1<<hr);
624 cur->isconst&=~(1<<hr);
625 return;
626 }
627 }
628
629 // Find an unneeded register
630 for(hr=HOST_REGS-1;hr>=0;hr--)
631 {
632 r=cur->regmap[hr];
633 if(r>=0) {
634 if(r<64) {
635 if((cur->u>>r)&1) {
636 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
637 cur->regmap[hr]=reg;
638 cur->dirty&=~(1<<hr);
639 cur->isconst&=~(1<<hr);
640 return;
641 }
642 }
643 }
644 else
645 {
646 if((cur->uu>>(r&63))&1) {
647 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
648 cur->regmap[hr]=reg;
649 cur->dirty&=~(1<<hr);
650 cur->isconst&=~(1<<hr);
651 return;
652 }
653 }
654 }
655 }
656 }
657
658 // Ok, now we have to evict someone
659 // Pick a register we hopefully won't need soon
660 // TODO: we might want to follow unconditional jumps here
661 // TODO: get rid of dupe code and make this into a function
662 u_char hsn[MAXREG+1];
663 memset(hsn,10,sizeof(hsn));
664 int j;
665 lsn(hsn,i,&preferred_reg);
666 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
667 if(i>0) {
668 // Don't evict the cycle count at entry points, otherwise the entry
669 // stub will have to write it.
670 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
671 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
672 for(j=10;j>=3;j--)
673 {
674 for(r=1;r<=MAXREG;r++)
675 {
676 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
677 for(hr=0;hr<HOST_REGS;hr++) {
678 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
679 if(cur->regmap[hr]==r+64) {
680 cur->regmap[hr]=reg;
681 cur->dirty&=~(1<<hr);
682 cur->isconst&=~(1<<hr);
683 return;
684 }
685 }
686 }
687 for(hr=0;hr<HOST_REGS;hr++) {
688 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
689 if(cur->regmap[hr]==r) {
690 cur->regmap[hr]=reg;
691 cur->dirty&=~(1<<hr);
692 cur->isconst&=~(1<<hr);
693 return;
694 }
695 }
696 }
697 }
698 }
699 }
700 }
701 for(j=10;j>=0;j--)
702 {
703 for(r=1;r<=MAXREG;r++)
704 {
705 if(hsn[r]==j) {
706 for(hr=0;hr<HOST_REGS;hr++) {
707 if(cur->regmap[hr]==r+64) {
708 cur->regmap[hr]=reg;
709 cur->dirty&=~(1<<hr);
710 cur->isconst&=~(1<<hr);
711 return;
712 }
713 }
714 for(hr=0;hr<HOST_REGS;hr++) {
715 if(cur->regmap[hr]==r) {
716 cur->regmap[hr]=reg;
717 cur->dirty&=~(1<<hr);
718 cur->isconst&=~(1<<hr);
719 return;
720 }
721 }
722 }
723 }
724 }
725 printf("This shouldn't happen");exit(1);
726}
727// Allocate a specific ARM register.
728void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
729{
730 int n;
731
732 // see if it's already allocated (and dealloc it)
733 for(n=0;n<HOST_REGS;n++)
734 {
735 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {cur->regmap[n]=-1;}
736 }
737
738 cur->regmap[hr]=reg;
739 cur->dirty&=~(1<<hr);
740 cur->isconst&=~(1<<hr);
741}
742
743// Alloc cycle count into dedicated register
744alloc_cc(struct regstat *cur,int i)
745{
746 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
747}
748
749/* Special alloc */
750
751
752/* Assembler */
753
754char regname[16][4] = {
755 "r0",
756 "r1",
757 "r2",
758 "r3",
759 "r4",
760 "r5",
761 "r6",
762 "r7",
763 "r8",
764 "r9",
765 "r10",
766 "fp",
767 "r12",
768 "sp",
769 "lr",
770 "pc"};
771
772void output_byte(u_char byte)
773{
774 *(out++)=byte;
775}
776void output_modrm(u_char mod,u_char rm,u_char ext)
777{
778 assert(mod<4);
779 assert(rm<8);
780 assert(ext<8);
781 u_char byte=(mod<<6)|(ext<<3)|rm;
782 *(out++)=byte;
783}
784void output_sib(u_char scale,u_char index,u_char base)
785{
786 assert(scale<4);
787 assert(index<8);
788 assert(base<8);
789 u_char byte=(scale<<6)|(index<<3)|base;
790 *(out++)=byte;
791}
792void output_w32(u_int word)
793{
794 *((u_int *)out)=word;
795 out+=4;
796}
797u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
798{
799 assert(rd<16);
800 assert(rn<16);
801 assert(rm<16);
802 return((rn<<16)|(rd<<12)|rm);
803}
804u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
805{
806 assert(rd<16);
807 assert(rn<16);
808 assert(imm<256);
809 assert((shift&1)==0);
810 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
811}
812u_int genimm(u_int imm,u_int *encoded)
813{
814 if(imm==0) {*encoded=0;return 1;}
815 int i=32;
816 while(i>0)
817 {
818 if(imm<256) {
819 *encoded=((i&30)<<7)|imm;
820 return 1;
821 }
822 imm=(imm>>2)|(imm<<30);i-=2;
823 }
824 return 0;
825}
826u_int genjmp(u_int addr)
827{
828 int offset=addr-(int)out-8;
829 if(offset<-33554432||offset>=33554432) {
830 if (addr>2) {
831 printf("genjmp: out of range: %08x\n", offset);
832 exit(1);
833 }
834 return 0;
835 }
836 return ((u_int)offset>>2)&0xffffff;
837}
838
839void emit_mov(int rs,int rt)
840{
841 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
842 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
843}
844
845void emit_movs(int rs,int rt)
846{
847 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
848 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
849}
850
851void emit_add(int rs1,int rs2,int rt)
852{
853 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
854 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
855}
856
857void emit_adds(int rs1,int rs2,int rt)
858{
859 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
860 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
861}
862
863void emit_adcs(int rs1,int rs2,int rt)
864{
865 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
866 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
867}
868
869void emit_sbc(int rs1,int rs2,int rt)
870{
871 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
872 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
873}
874
875void emit_sbcs(int rs1,int rs2,int rt)
876{
877 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
878 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
879}
880
881void emit_neg(int rs, int rt)
882{
883 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
884 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
885}
886
887void emit_negs(int rs, int rt)
888{
889 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
890 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
891}
892
893void emit_sub(int rs1,int rs2,int rt)
894{
895 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
896 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
897}
898
899void emit_subs(int rs1,int rs2,int rt)
900{
901 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
902 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
903}
904
905void emit_zeroreg(int rt)
906{
907 assem_debug("mov %s,#0\n",regname[rt]);
908 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
909}
910
911void emit_loadreg(int r, int hr)
912{
913#ifdef FORCE32
914 if(r&64) {
915 printf("64bit load in 32bit mode!\n");
916 exit(1);
917 }
918#endif
919 if((r&63)==0)
920 emit_zeroreg(hr);
921 else {
922 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
923 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
924 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
925 if(r==CCREG) addr=(int)&cycle_count;
926 if(r==CSREG) addr=(int)&Status;
927 if(r==FSREG) addr=(int)&FCR31;
928 if(r==INVCP) addr=(int)&invc_ptr;
929 u_int offset = addr-(u_int)&dynarec_local;
930 assert(offset<4096);
931 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
932 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
933 }
934}
935void emit_storereg(int r, int hr)
936{
937#ifdef FORCE32
938 if(r&64) {
939 printf("64bit store in 32bit mode!\n");
940 exit(1);
941 }
942#endif
943 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
944 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
945 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
946 if(r==CCREG) addr=(int)&cycle_count;
947 if(r==FSREG) addr=(int)&FCR31;
948 u_int offset = addr-(u_int)&dynarec_local;
949 assert(offset<4096);
950 assem_debug("str %s,fp+%d\n",regname[hr],offset);
951 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
952}
953
954void emit_test(int rs, int rt)
955{
956 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
957 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
958}
959
960void emit_testimm(int rs,int imm)
961{
962 u_int armval;
963 assem_debug("tst %s,$%d\n",regname[rs],imm);
964 assert(genimm(imm,&armval));
965 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
966}
967
968void emit_testeqimm(int rs,int imm)
969{
970 u_int armval;
971 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
972 assert(genimm(imm,&armval));
973 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
974}
975
976void emit_not(int rs,int rt)
977{
978 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
979 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
980}
981
982void emit_mvnmi(int rs,int rt)
983{
984 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
985 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
986}
987
988void emit_and(u_int rs1,u_int rs2,u_int rt)
989{
990 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
991 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
992}
993
994void emit_or(u_int rs1,u_int rs2,u_int rt)
995{
996 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
997 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
998}
999void emit_or_and_set_flags(int rs1,int rs2,int rt)
1000{
1001 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1002 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1003}
1004
1005void emit_xor(u_int rs1,u_int rs2,u_int rt)
1006{
1007 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1008 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1009}
1010
1011void emit_loadlp(u_int imm,u_int rt)
1012{
1013 add_literal((int)out,imm);
1014 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
1015 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
1016}
1017void emit_movw(u_int imm,u_int rt)
1018{
1019 assert(imm<65536);
1020 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
1021 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
1022}
1023void emit_movt(u_int imm,u_int rt)
1024{
1025 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
1026 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
1027}
1028void emit_movimm(u_int imm,u_int rt)
1029{
1030 u_int armval;
1031 if(genimm(imm,&armval)) {
1032 assem_debug("mov %s,#%d\n",regname[rt],imm);
1033 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1034 }else if(genimm(~imm,&armval)) {
1035 assem_debug("mvn %s,#%d\n",regname[rt],imm);
1036 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1037 }else if(imm<65536) {
1038 #ifdef ARMv5_ONLY
1039 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1040 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1041 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1042 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1043 #else
1044 emit_movw(imm,rt);
1045 #endif
1046 }else{
1047 #ifdef ARMv5_ONLY
1048 emit_loadlp(imm,rt);
1049 #else
1050 emit_movw(imm&0x0000FFFF,rt);
1051 emit_movt(imm&0xFFFF0000,rt);
1052 #endif
1053 }
1054}
1055void emit_pcreladdr(u_int rt)
1056{
1057 assem_debug("add %s,pc,#?\n",regname[rt]);
1058 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1059}
1060
1061void emit_addimm(u_int rs,int imm,u_int rt)
1062{
1063 assert(rs<16);
1064 assert(rt<16);
1065 if(imm!=0) {
1066 assert(imm>-65536&&imm<65536);
1067 u_int armval;
1068 if(genimm(imm,&armval)) {
1069 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1070 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1071 }else if(genimm(-imm,&armval)) {
1072 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm);
1073 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1074 }else if(imm<0) {
1075 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1076 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1077 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1078 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1079 }else{
1080 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1081 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1082 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1083 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1084 }
1085 }
1086 else if(rs!=rt) emit_mov(rs,rt);
1087}
1088
1089void emit_addimm_and_set_flags(int imm,int rt)
1090{
1091 assert(imm>-65536&&imm<65536);
1092 u_int armval;
1093 if(genimm(imm,&armval)) {
1094 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1095 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1096 }else if(genimm(-imm,&armval)) {
1097 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1098 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1099 }else if(imm<0) {
1100 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1101 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1102 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1103 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1104 }else{
1105 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1106 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1107 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1108 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1109 }
1110}
1111void emit_addimm_no_flags(u_int imm,u_int rt)
1112{
1113 emit_addimm(rt,imm,rt);
1114}
1115
1116void emit_addnop(u_int r)
1117{
1118 assert(r<16);
1119 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1120 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1121}
1122
1123void emit_adcimm(u_int rs,int imm,u_int rt)
1124{
1125 u_int armval;
1126 assert(genimm(imm,&armval));
1127 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1128 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1129}
1130/*void emit_sbcimm(int imm,u_int rt)
1131{
1132 u_int armval;
1133 assert(genimm(imm,&armval));
1134 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1135 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1136}*/
1137void emit_sbbimm(int imm,u_int rt)
1138{
1139 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1140 assert(rt<8);
1141 if(imm<128&&imm>=-128) {
1142 output_byte(0x83);
1143 output_modrm(3,rt,3);
1144 output_byte(imm);
1145 }
1146 else
1147 {
1148 output_byte(0x81);
1149 output_modrm(3,rt,3);
1150 output_w32(imm);
1151 }
1152}
1153void emit_rscimm(int rs,int imm,u_int rt)
1154{
1155 assert(0);
1156 u_int armval;
1157 assert(genimm(imm,&armval));
1158 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1159 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1160}
1161
1162void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1163{
1164 // TODO: if(genimm(imm,&armval)) ...
1165 // else
1166 emit_movimm(imm,HOST_TEMPREG);
1167 emit_adds(HOST_TEMPREG,rsl,rtl);
1168 emit_adcimm(rsh,0,rth);
1169}
1170
1171void emit_sbb(int rs1,int rs2)
1172{
1173 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1174 output_byte(0x19);
1175 output_modrm(3,rs1,rs2);
1176}
1177
1178void emit_andimm(int rs,int imm,int rt)
1179{
1180 u_int armval;
1181 if(genimm(imm,&armval)) {
1182 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1183 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1184 }else if(genimm(~imm,&armval)) {
1185 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1186 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1187 }else if(imm==65535) {
1188 #ifdef ARMv5_ONLY
1189 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1190 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1191 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1192 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1193 #else
1194 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1195 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1196 #endif
1197 }else{
1198 assert(imm>0&&imm<65535);
1199 #ifdef ARMv5_ONLY
1200 assem_debug("mov r14,#%d\n",imm&0xFF00);
1201 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1202 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1203 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1204 #else
1205 emit_movw(imm,HOST_TEMPREG);
1206 #endif
1207 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1208 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1209 }
1210}
1211
1212void emit_orimm(int rs,int imm,int rt)
1213{
1214 u_int armval;
1215 if(genimm(imm,&armval)) {
1216 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1217 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1218 }else{
1219 assert(imm>0&&imm<65536);
1220 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1221 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1222 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1223 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1224 }
1225}
1226
1227void emit_xorimm(int rs,int imm,int rt)
1228{
1229 assert(imm>0&&imm<65536);
1230 u_int armval;
1231 if(genimm(imm,&armval)) {
1232 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1233 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1234 }else{
1235 assert(imm>0);
1236 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1237 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1238 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1239 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1240 }
1241}
1242
1243void emit_shlimm(int rs,u_int imm,int rt)
1244{
1245 assert(imm>0);
1246 assert(imm<32);
1247 //if(imm==1) ...
1248 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1249 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1250}
1251
1252void emit_shrimm(int rs,u_int imm,int rt)
1253{
1254 assert(imm>0);
1255 assert(imm<32);
1256 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1257 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1258}
1259
1260void emit_sarimm(int rs,u_int imm,int rt)
1261{
1262 assert(imm>0);
1263 assert(imm<32);
1264 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1265 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1266}
1267
1268void emit_rorimm(int rs,u_int imm,int rt)
1269{
1270 assert(imm>0);
1271 assert(imm<32);
1272 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1273 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1274}
1275
1276void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1277{
1278 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1279 assert(imm>0);
1280 assert(imm<32);
1281 //if(imm==1) ...
1282 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1283 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1284 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1285 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1286}
1287
1288void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1289{
1290 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1291 assert(imm>0);
1292 assert(imm<32);
1293 //if(imm==1) ...
1294 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1295 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1296 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1297 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1298}
1299
1300void emit_signextend16(int rs,int rt)
1301{
1302 #ifdef ARMv5_ONLY
1303 emit_shlimm(rs,16,rt);
1304 emit_sarimm(rt,16,rt);
1305 #else
1306 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1307 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1308 #endif
1309}
1310
1311void emit_shl(u_int rs,u_int shift,u_int rt)
1312{
1313 assert(rs<16);
1314 assert(rt<16);
1315 assert(shift<16);
1316 //if(imm==1) ...
1317 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1318 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1319}
1320void emit_shr(u_int rs,u_int shift,u_int rt)
1321{
1322 assert(rs<16);
1323 assert(rt<16);
1324 assert(shift<16);
1325 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1326 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1327}
1328void emit_sar(u_int rs,u_int shift,u_int rt)
1329{
1330 assert(rs<16);
1331 assert(rt<16);
1332 assert(shift<16);
1333 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1334 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1335}
1336void emit_shlcl(int r)
1337{
1338 assem_debug("shl %%%s,%%cl\n",regname[r]);
1339 assert(0);
1340}
1341void emit_shrcl(int r)
1342{
1343 assem_debug("shr %%%s,%%cl\n",regname[r]);
1344 assert(0);
1345}
1346void emit_sarcl(int r)
1347{
1348 assem_debug("sar %%%s,%%cl\n",regname[r]);
1349 assert(0);
1350}
1351
1352void emit_shldcl(int r1,int r2)
1353{
1354 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1355 assert(0);
1356}
1357void emit_shrdcl(int r1,int r2)
1358{
1359 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1360 assert(0);
1361}
1362void emit_orrshl(u_int rs,u_int shift,u_int rt)
1363{
1364 assert(rs<16);
1365 assert(rt<16);
1366 assert(shift<16);
1367 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1368 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1369}
1370void emit_orrshr(u_int rs,u_int shift,u_int rt)
1371{
1372 assert(rs<16);
1373 assert(rt<16);
1374 assert(shift<16);
1375 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1376 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1377}
1378
1379void emit_cmpimm(int rs,int imm)
1380{
1381 u_int armval;
1382 if(genimm(imm,&armval)) {
1383 assem_debug("cmp %s,$%d\n",regname[rs],imm);
1384 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1385 }else if(genimm(-imm,&armval)) {
1386 assem_debug("cmn %s,$%d\n",regname[rs],imm);
1387 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1388 }else if(imm>0) {
1389 assert(imm<65536);
1390 #ifdef ARMv5_ONLY
1391 emit_movimm(imm,HOST_TEMPREG);
1392 #else
1393 emit_movw(imm,HOST_TEMPREG);
1394 #endif
1395 assem_debug("cmp %s,r14\n",regname[rs]);
1396 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1397 }else{
1398 assert(imm>-65536);
1399 #ifdef ARMv5_ONLY
1400 emit_movimm(-imm,HOST_TEMPREG);
1401 #else
1402 emit_movw(-imm,HOST_TEMPREG);
1403 #endif
1404 assem_debug("cmn %s,r14\n",regname[rs]);
1405 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1406 }
1407}
1408
1409void emit_cmovne(u_int *addr,int rt)
1410{
1411 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1412 assert(0);
1413}
1414void emit_cmovl(u_int *addr,int rt)
1415{
1416 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1417 assert(0);
1418}
1419void emit_cmovs(u_int *addr,int rt)
1420{
1421 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1422 assert(0);
1423}
1424void emit_cmovne_imm(int imm,int rt)
1425{
1426 assem_debug("movne %s,#%d\n",regname[rt],imm);
1427 u_int armval;
1428 assert(genimm(imm,&armval));
1429 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1430}
1431void emit_cmovl_imm(int imm,int rt)
1432{
1433 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1434 u_int armval;
1435 assert(genimm(imm,&armval));
1436 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1437}
1438void emit_cmovb_imm(int imm,int rt)
1439{
1440 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1441 u_int armval;
1442 assert(genimm(imm,&armval));
1443 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1444}
1445void emit_cmovs_imm(int imm,int rt)
1446{
1447 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1448 u_int armval;
1449 assert(genimm(imm,&armval));
1450 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1451}
1452void emit_cmove_reg(int rs,int rt)
1453{
1454 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1455 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1456}
1457void emit_cmovne_reg(int rs,int rt)
1458{
1459 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1460 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1461}
1462void emit_cmovl_reg(int rs,int rt)
1463{
1464 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1465 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1466}
1467void emit_cmovs_reg(int rs,int rt)
1468{
1469 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1470 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1471}
1472
1473void emit_slti32(int rs,int imm,int rt)
1474{
1475 if(rs!=rt) emit_zeroreg(rt);
1476 emit_cmpimm(rs,imm);
1477 if(rs==rt) emit_movimm(0,rt);
1478 emit_cmovl_imm(1,rt);
1479}
1480void emit_sltiu32(int rs,int imm,int rt)
1481{
1482 if(rs!=rt) emit_zeroreg(rt);
1483 emit_cmpimm(rs,imm);
1484 if(rs==rt) emit_movimm(0,rt);
1485 emit_cmovb_imm(1,rt);
1486}
1487void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1488{
1489 assert(rsh!=rt);
1490 emit_slti32(rsl,imm,rt);
1491 if(imm>=0)
1492 {
1493 emit_test(rsh,rsh);
1494 emit_cmovne_imm(0,rt);
1495 emit_cmovs_imm(1,rt);
1496 }
1497 else
1498 {
1499 emit_cmpimm(rsh,-1);
1500 emit_cmovne_imm(0,rt);
1501 emit_cmovl_imm(1,rt);
1502 }
1503}
1504void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1505{
1506 assert(rsh!=rt);
1507 emit_sltiu32(rsl,imm,rt);
1508 if(imm>=0)
1509 {
1510 emit_test(rsh,rsh);
1511 emit_cmovne_imm(0,rt);
1512 }
1513 else
1514 {
1515 emit_cmpimm(rsh,-1);
1516 emit_cmovne_imm(1,rt);
1517 }
1518}
1519
1520void emit_cmp(int rs,int rt)
1521{
1522 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1523 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1524}
1525void emit_set_gz32(int rs, int rt)
1526{
1527 //assem_debug("set_gz32\n");
1528 emit_cmpimm(rs,1);
1529 emit_movimm(1,rt);
1530 emit_cmovl_imm(0,rt);
1531}
1532void emit_set_nz32(int rs, int rt)
1533{
1534 //assem_debug("set_nz32\n");
1535 if(rs!=rt) emit_movs(rs,rt);
1536 else emit_test(rs,rs);
1537 emit_cmovne_imm(1,rt);
1538}
1539void emit_set_gz64_32(int rsh, int rsl, int rt)
1540{
1541 //assem_debug("set_gz64\n");
1542 emit_set_gz32(rsl,rt);
1543 emit_test(rsh,rsh);
1544 emit_cmovne_imm(1,rt);
1545 emit_cmovs_imm(0,rt);
1546}
1547void emit_set_nz64_32(int rsh, int rsl, int rt)
1548{
1549 //assem_debug("set_nz64\n");
1550 emit_or_and_set_flags(rsh,rsl,rt);
1551 emit_cmovne_imm(1,rt);
1552}
1553void emit_set_if_less32(int rs1, int rs2, int rt)
1554{
1555 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1556 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1557 emit_cmp(rs1,rs2);
1558 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1559 emit_cmovl_imm(1,rt);
1560}
1561void emit_set_if_carry32(int rs1, int rs2, int rt)
1562{
1563 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1564 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1565 emit_cmp(rs1,rs2);
1566 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1567 emit_cmovb_imm(1,rt);
1568}
1569void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1570{
1571 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1572 assert(u1!=rt);
1573 assert(u2!=rt);
1574 emit_cmp(l1,l2);
1575 emit_movimm(0,rt);
1576 emit_sbcs(u1,u2,HOST_TEMPREG);
1577 emit_cmovl_imm(1,rt);
1578}
1579void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1580{
1581 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1582 assert(u1!=rt);
1583 assert(u2!=rt);
1584 emit_cmp(l1,l2);
1585 emit_movimm(0,rt);
1586 emit_sbcs(u1,u2,HOST_TEMPREG);
1587 emit_cmovb_imm(1,rt);
1588}
1589
1590void emit_call(int a)
1591{
1592 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1593 u_int offset=genjmp(a);
1594 output_w32(0xeb000000|offset);
1595}
1596void emit_jmp(int a)
1597{
1598 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1599 u_int offset=genjmp(a);
1600 output_w32(0xea000000|offset);
1601}
1602void emit_jne(int a)
1603{
1604 assem_debug("bne %x\n",a);
1605 u_int offset=genjmp(a);
1606 output_w32(0x1a000000|offset);
1607}
1608void emit_jeq(int a)
1609{
1610 assem_debug("beq %x\n",a);
1611 u_int offset=genjmp(a);
1612 output_w32(0x0a000000|offset);
1613}
1614void emit_js(int a)
1615{
1616 assem_debug("bmi %x\n",a);
1617 u_int offset=genjmp(a);
1618 output_w32(0x4a000000|offset);
1619}
1620void emit_jns(int a)
1621{
1622 assem_debug("bpl %x\n",a);
1623 u_int offset=genjmp(a);
1624 output_w32(0x5a000000|offset);
1625}
1626void emit_jl(int a)
1627{
1628 assem_debug("blt %x\n",a);
1629 u_int offset=genjmp(a);
1630 output_w32(0xba000000|offset);
1631}
1632void emit_jge(int a)
1633{
1634 assem_debug("bge %x\n",a);
1635 u_int offset=genjmp(a);
1636 output_w32(0xaa000000|offset);
1637}
1638void emit_jno(int a)
1639{
1640 assem_debug("bvc %x\n",a);
1641 u_int offset=genjmp(a);
1642 output_w32(0x7a000000|offset);
1643}
1644void emit_jc(int a)
1645{
1646 assem_debug("bcs %x\n",a);
1647 u_int offset=genjmp(a);
1648 output_w32(0x2a000000|offset);
1649}
1650void emit_jcc(int a)
1651{
1652 assem_debug("bcc %x\n",a);
1653 u_int offset=genjmp(a);
1654 output_w32(0x3a000000|offset);
1655}
1656
1657void emit_pushimm(int imm)
1658{
1659 assem_debug("push $%x\n",imm);
1660 assert(0);
1661}
1662void emit_pusha()
1663{
1664 assem_debug("pusha\n");
1665 assert(0);
1666}
1667void emit_popa()
1668{
1669 assem_debug("popa\n");
1670 assert(0);
1671}
1672void emit_pushreg(u_int r)
1673{
1674 assem_debug("push %%%s\n",regname[r]);
1675 assert(0);
1676}
1677void emit_popreg(u_int r)
1678{
1679 assem_debug("pop %%%s\n",regname[r]);
1680 assert(0);
1681}
1682void emit_callreg(u_int r)
1683{
1684 assem_debug("call *%%%s\n",regname[r]);
1685 assert(0);
1686}
1687void emit_jmpreg(u_int r)
1688{
1689 assem_debug("mov pc,%s\n",regname[r]);
1690 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1691}
1692
1693void emit_readword_indexed(int offset, int rs, int rt)
1694{
1695 assert(offset>-4096&&offset<4096);
1696 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1697 if(offset>=0) {
1698 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1699 }else{
1700 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1701 }
1702}
1703void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1704{
1705 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1706 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1707}
1708void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1709{
1710 if(map<0) emit_readword_indexed(addr, rs, rt);
1711 else {
1712 assert(addr==0);
1713 emit_readword_dualindexedx4(rs, map, rt);
1714 }
1715}
1716void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1717{
1718 if(map<0) {
1719 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1720 emit_readword_indexed(addr+4, rs, rl);
1721 }else{
1722 assert(rh!=rs);
1723 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1724 emit_addimm(map,1,map);
1725 emit_readword_indexed_tlb(addr, rs, map, rl);
1726 }
1727}
1728void emit_movsbl_indexed(int offset, int rs, int rt)
1729{
1730 assert(offset>-256&&offset<256);
1731 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1732 if(offset>=0) {
1733 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1734 }else{
1735 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1736 }
1737}
1738void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1739{
1740 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1741 else {
1742 if(addr==0) {
1743 emit_shlimm(map,2,map);
1744 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1745 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1746 }else{
1747 assert(addr>-256&&addr<256);
1748 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1749 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1750 emit_movsbl_indexed(addr, rt, rt);
1751 }
1752 }
1753}
1754void emit_movswl_indexed(int offset, int rs, int rt)
1755{
1756 assert(offset>-256&&offset<256);
1757 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1758 if(offset>=0) {
1759 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1760 }else{
1761 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1762 }
1763}
1764void emit_movzbl_indexed(int offset, int rs, int rt)
1765{
1766 assert(offset>-4096&&offset<4096);
1767 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1768 if(offset>=0) {
1769 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1770 }else{
1771 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1772 }
1773}
1774void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1775{
1776 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1777 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1778}
1779void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1780{
1781 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1782 else {
1783 if(addr==0) {
1784 emit_movzbl_dualindexedx4(rs, map, rt);
1785 }else{
1786 emit_addimm(rs,addr,rt);
1787 emit_movzbl_dualindexedx4(rt, map, rt);
1788 }
1789 }
1790}
1791void emit_movzwl_indexed(int offset, int rs, int rt)
1792{
1793 assert(offset>-256&&offset<256);
1794 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1795 if(offset>=0) {
1796 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1797 }else{
1798 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1799 }
1800}
1801void emit_readword(int addr, int rt)
1802{
1803 u_int offset = addr-(u_int)&dynarec_local;
1804 assert(offset<4096);
1805 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1806 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1807}
1808void emit_movsbl(int addr, int rt)
1809{
1810 u_int offset = addr-(u_int)&dynarec_local;
1811 assert(offset<256);
1812 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1813 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1814}
1815void emit_movswl(int addr, int rt)
1816{
1817 u_int offset = addr-(u_int)&dynarec_local;
1818 assert(offset<256);
1819 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1820 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1821}
1822void emit_movzbl(int addr, int rt)
1823{
1824 u_int offset = addr-(u_int)&dynarec_local;
1825 assert(offset<4096);
1826 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1827 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1828}
1829void emit_movzwl(int addr, int rt)
1830{
1831 u_int offset = addr-(u_int)&dynarec_local;
1832 assert(offset<256);
1833 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1834 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1835}
1836void emit_movzwl_reg(int rs, int rt)
1837{
1838 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1839 assert(0);
1840}
1841
1842void emit_xchg(int rs, int rt)
1843{
1844 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1845 assert(0);
1846}
1847void emit_writeword_indexed(int rt, int offset, int rs)
1848{
1849 assert(offset>-4096&&offset<4096);
1850 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1851 if(offset>=0) {
1852 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1853 }else{
1854 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1855 }
1856}
1857void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
1858{
1859 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1860 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1861}
1862void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1863{
1864 if(map<0) emit_writeword_indexed(rt, addr, rs);
1865 else {
1866 assert(addr==0);
1867 emit_writeword_dualindexedx4(rt, rs, map);
1868 }
1869}
1870void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
1871{
1872 if(map<0) {
1873 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1874 emit_writeword_indexed(rl, addr+4, rs);
1875 }else{
1876 assert(rh>=0);
1877 if(temp!=rs) emit_addimm(map,1,temp);
1878 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1879 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1880 else {
1881 emit_addimm(rs,4,rs);
1882 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1883 }
1884 }
1885}
1886void emit_writehword_indexed(int rt, int offset, int rs)
1887{
1888 assert(offset>-256&&offset<256);
1889 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1890 if(offset>=0) {
1891 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1892 }else{
1893 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1894 }
1895}
1896void emit_writebyte_indexed(int rt, int offset, int rs)
1897{
1898 assert(offset>-4096&&offset<4096);
1899 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1900 if(offset>=0) {
1901 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1902 }else{
1903 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1904 }
1905}
1906void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
1907{
1908 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1909 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1910}
1911void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
1912{
1913 if(map<0) emit_writebyte_indexed(rt, addr, rs);
1914 else {
1915 if(addr==0) {
1916 emit_writebyte_dualindexedx4(rt, rs, map);
1917 }else{
1918 emit_addimm(rs,addr,temp);
1919 emit_writebyte_dualindexedx4(rt, temp, map);
1920 }
1921 }
1922}
1923void emit_writeword(int rt, int addr)
1924{
1925 u_int offset = addr-(u_int)&dynarec_local;
1926 assert(offset<4096);
1927 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1928 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1929}
1930void emit_writehword(int rt, int addr)
1931{
1932 u_int offset = addr-(u_int)&dynarec_local;
1933 assert(offset<256);
1934 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
1935 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1936}
1937void emit_writebyte(int rt, int addr)
1938{
1939 u_int offset = addr-(u_int)&dynarec_local;
1940 assert(offset<4096);
1941 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1942 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
1943}
1944void emit_writeword_imm(int imm, int addr)
1945{
1946 assem_debug("movl $%x,%x\n",imm,addr);
1947 assert(0);
1948}
1949void emit_writebyte_imm(int imm, int addr)
1950{
1951 assem_debug("movb $%x,%x\n",imm,addr);
1952 assert(0);
1953}
1954
1955void emit_mul(int rs)
1956{
1957 assem_debug("mul %%%s\n",regname[rs]);
1958 assert(0);
1959}
1960void emit_imul(int rs)
1961{
1962 assem_debug("imul %%%s\n",regname[rs]);
1963 assert(0);
1964}
1965void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1966{
1967 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1968 assert(rs1<16);
1969 assert(rs2<16);
1970 assert(hi<16);
1971 assert(lo<16);
1972 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1973}
1974void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1975{
1976 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1977 assert(rs1<16);
1978 assert(rs2<16);
1979 assert(hi<16);
1980 assert(lo<16);
1981 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1982}
1983
1984void emit_div(int rs)
1985{
1986 assem_debug("div %%%s\n",regname[rs]);
1987 assert(0);
1988}
1989void emit_idiv(int rs)
1990{
1991 assem_debug("idiv %%%s\n",regname[rs]);
1992 assert(0);
1993}
1994void emit_cdq()
1995{
1996 assem_debug("cdq\n");
1997 assert(0);
1998}
1999
2000void emit_clz(int rs,int rt)
2001{
2002 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2003 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2004}
2005
2006void emit_subcs(int rs1,int rs2,int rt)
2007{
2008 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2009 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2010}
2011
2012void emit_shrcc_imm(int rs,u_int imm,int rt)
2013{
2014 assert(imm>0);
2015 assert(imm<32);
2016 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2017 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2018}
2019
2020void emit_negmi(int rs, int rt)
2021{
2022 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2023 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2024}
2025
2026void emit_negsmi(int rs, int rt)
2027{
2028 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2029 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2030}
2031
2032void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2033{
2034 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2035 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2036}
2037
2038void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2039{
2040 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2041 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2042}
2043
2044void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2045{
2046 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2047 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2048}
2049
2050void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2051{
2052 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2053 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2054}
2055
2056void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2057{
2058 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2059 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2060}
2061
2062void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2063{
2064 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2065 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2066}
2067
2068void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2069{
2070 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2071 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2072}
2073
2074void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2075{
2076 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2077 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2078}
2079
2080void emit_teq(int rs, int rt)
2081{
2082 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2083 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2084}
2085
2086void emit_rsbimm(int rs, int imm, int rt)
2087{
2088 u_int armval;
2089 assert(genimm(imm,&armval));
2090 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2091 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2092}
2093
2094// Load 2 immediates optimizing for small code size
2095void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2096{
2097 emit_movimm(imm1,rt1);
2098 u_int armval;
2099 if(genimm(imm2-imm1,&armval)) {
2100 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2101 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2102 }else if(genimm(imm1-imm2,&armval)) {
2103 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2104 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2105 }
2106 else emit_movimm(imm2,rt2);
2107}
2108
2109// Conditionally select one of two immediates, optimizing for small code size
2110// This will only be called if HAVE_CMOV_IMM is defined
2111void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2112{
2113 u_int armval;
2114 if(genimm(imm2-imm1,&armval)) {
2115 emit_movimm(imm1,rt);
2116 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2117 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2118 }else if(genimm(imm1-imm2,&armval)) {
2119 emit_movimm(imm1,rt);
2120 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2121 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2122 }
2123 else {
2124 #ifdef ARMv5_ONLY
2125 emit_movimm(imm1,rt);
2126 add_literal((int)out,imm2);
2127 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2128 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2129 #else
2130 emit_movw(imm1&0x0000FFFF,rt);
2131 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2132 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2133 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2134 }
2135 emit_movt(imm1&0xFFFF0000,rt);
2136 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2137 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2138 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2139 }
2140 #endif
2141 }
2142}
2143
2144// special case for checking invalid_code
2145void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2146{
2147 assert(0);
2148}
2149
2150// special case for checking invalid_code
2151void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2152{
2153 assert(imm<128&&imm>=0);
2154 assert(r>=0&&r<16);
2155 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2156 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2157 emit_cmpimm(HOST_TEMPREG,imm);
2158}
2159
2160// special case for tlb mapping
2161void emit_addsr12(int rs1,int rs2,int rt)
2162{
2163 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2164 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2165}
2166
2167// Used to preload hash table entries
2168void emit_prefetch(void *addr)
2169{
2170 assem_debug("prefetch %x\n",(int)addr);
2171 output_byte(0x0F);
2172 output_byte(0x18);
2173 output_modrm(0,5,1);
2174 output_w32((int)addr);
2175}
2176void emit_prefetchreg(int r)
2177{
2178 assem_debug("pld %s\n",regname[r]);
2179 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2180}
2181
2182// Special case for mini_ht
2183void emit_ldreq_indexed(int rs, u_int offset, int rt)
2184{
2185 assert(offset<4096);
2186 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2187 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2188}
2189
2190void emit_flds(int r,int sr)
2191{
2192 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2193 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2194}
2195
2196void emit_vldr(int r,int vr)
2197{
2198 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2199 output_w32(0xed900b00|(vr<<12)|(r<<16));
2200}
2201
2202void emit_fsts(int sr,int r)
2203{
2204 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2205 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2206}
2207
2208void emit_vstr(int vr,int r)
2209{
2210 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2211 output_w32(0xed800b00|(vr<<12)|(r<<16));
2212}
2213
2214void emit_ftosizs(int s,int d)
2215{
2216 assem_debug("ftosizs s%d,s%d\n",d,s);
2217 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2218}
2219
2220void emit_ftosizd(int s,int d)
2221{
2222 assem_debug("ftosizd s%d,d%d\n",d,s);
2223 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2224}
2225
2226void emit_fsitos(int s,int d)
2227{
2228 assem_debug("fsitos s%d,s%d\n",d,s);
2229 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2230}
2231
2232void emit_fsitod(int s,int d)
2233{
2234 assem_debug("fsitod d%d,s%d\n",d,s);
2235 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2236}
2237
2238void emit_fcvtds(int s,int d)
2239{
2240 assem_debug("fcvtds d%d,s%d\n",d,s);
2241 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2242}
2243
2244void emit_fcvtsd(int s,int d)
2245{
2246 assem_debug("fcvtsd s%d,d%d\n",d,s);
2247 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2248}
2249
2250void emit_fsqrts(int s,int d)
2251{
2252 assem_debug("fsqrts d%d,s%d\n",d,s);
2253 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2254}
2255
2256void emit_fsqrtd(int s,int d)
2257{
2258 assem_debug("fsqrtd s%d,d%d\n",d,s);
2259 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2260}
2261
2262void emit_fabss(int s,int d)
2263{
2264 assem_debug("fabss d%d,s%d\n",d,s);
2265 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2266}
2267
2268void emit_fabsd(int s,int d)
2269{
2270 assem_debug("fabsd s%d,d%d\n",d,s);
2271 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2272}
2273
2274void emit_fnegs(int s,int d)
2275{
2276 assem_debug("fnegs d%d,s%d\n",d,s);
2277 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2278}
2279
2280void emit_fnegd(int s,int d)
2281{
2282 assem_debug("fnegd s%d,d%d\n",d,s);
2283 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2284}
2285
2286void emit_fadds(int s1,int s2,int d)
2287{
2288 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2289 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2290}
2291
2292void emit_faddd(int s1,int s2,int d)
2293{
2294 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2295 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2296}
2297
2298void emit_fsubs(int s1,int s2,int d)
2299{
2300 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2301 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2302}
2303
2304void emit_fsubd(int s1,int s2,int d)
2305{
2306 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2307 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2308}
2309
2310void emit_fmuls(int s1,int s2,int d)
2311{
2312 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2313 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2314}
2315
2316void emit_fmuld(int s1,int s2,int d)
2317{
2318 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2319 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2320}
2321
2322void emit_fdivs(int s1,int s2,int d)
2323{
2324 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2325 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2326}
2327
2328void emit_fdivd(int s1,int s2,int d)
2329{
2330 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2331 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2332}
2333
2334void emit_fcmps(int x,int y)
2335{
2336 assem_debug("fcmps s14, s15\n");
2337 output_w32(0xeeb47a67);
2338}
2339
2340void emit_fcmpd(int x,int y)
2341{
2342 assem_debug("fcmpd d6, d7\n");
2343 output_w32(0xeeb46b47);
2344}
2345
2346void emit_fmstat()
2347{
2348 assem_debug("fmstat\n");
2349 output_w32(0xeef1fa10);
2350}
2351
2352void emit_bicne_imm(int rs,int imm,int rt)
2353{
2354 u_int armval;
2355 assert(genimm(imm,&armval));
2356 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2357 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2358}
2359
2360void emit_biccs_imm(int rs,int imm,int rt)
2361{
2362 u_int armval;
2363 assert(genimm(imm,&armval));
2364 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2365 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2366}
2367
2368void emit_bicvc_imm(int rs,int imm,int rt)
2369{
2370 u_int armval;
2371 assert(genimm(imm,&armval));
2372 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2373 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2374}
2375
2376void emit_bichi_imm(int rs,int imm,int rt)
2377{
2378 u_int armval;
2379 assert(genimm(imm,&armval));
2380 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2381 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2382}
2383
2384void emit_orrvs_imm(int rs,int imm,int rt)
2385{
2386 u_int armval;
2387 assert(genimm(imm,&armval));
2388 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2389 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2390}
2391
2392void emit_orrne_imm(int rs,int imm,int rt)
2393{
2394 u_int armval;
2395 assert(genimm(imm,&armval));
2396 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2397 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2398}
2399
2400void emit_andne_imm(int rs,int imm,int rt)
2401{
2402 u_int armval;
2403 assert(genimm(imm,&armval));
2404 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2405 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2406}
2407
2408void emit_jno_unlikely(int a)
2409{
2410 //emit_jno(a);
2411 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2412 output_w32(0x72800000|rd_rn_rm(15,15,0));
2413}
2414
2415// Save registers before function call
2416void save_regs(u_int reglist)
2417{
2418 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2419 if(!reglist) return;
2420 assem_debug("stmia fp,{");
2421 if(reglist&1) assem_debug("r0, ");
2422 if(reglist&2) assem_debug("r1, ");
2423 if(reglist&4) assem_debug("r2, ");
2424 if(reglist&8) assem_debug("r3, ");
2425 if(reglist&0x1000) assem_debug("r12");
2426 assem_debug("}\n");
2427 output_w32(0xe88b0000|reglist);
2428}
2429// Restore registers after function call
2430void restore_regs(u_int reglist)
2431{
2432 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2433 if(!reglist) return;
2434 assem_debug("ldmia fp,{");
2435 if(reglist&1) assem_debug("r0, ");
2436 if(reglist&2) assem_debug("r1, ");
2437 if(reglist&4) assem_debug("r2, ");
2438 if(reglist&8) assem_debug("r3, ");
2439 if(reglist&0x1000) assem_debug("r12");
2440 assem_debug("}\n");
2441 output_w32(0xe89b0000|reglist);
2442}
2443
2444// Write back consts using r14 so we don't disturb the other registers
2445void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2446{
2447 int hr;
2448 for(hr=0;hr<HOST_REGS;hr++) {
2449 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2450 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2451 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2452 int value=constmap[i][hr];
2453 if(value==0) {
2454 emit_zeroreg(HOST_TEMPREG);
2455 }
2456 else {
2457 emit_movimm(value,HOST_TEMPREG);
2458 }
2459 emit_storereg(i_regmap[hr],HOST_TEMPREG);
2460#ifndef FORCE32
2461 if((i_is32>>i_regmap[hr])&1) {
2462 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2463 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2464 }
2465#endif
2466 }
2467 }
2468 }
2469 }
2470}
2471
2472/* Stubs/epilogue */
2473
2474void literal_pool(int n)
2475{
2476 if(!literalcount) return;
2477 if(n) {
2478 if((int)out-literals[0][0]<4096-n) return;
2479 }
2480 u_int *ptr;
2481 int i;
2482 for(i=0;i<literalcount;i++)
2483 {
2484 ptr=(u_int *)literals[i][0];
2485 u_int offset=(u_int)out-(u_int)ptr-8;
2486 assert(offset<4096);
2487 assert(!(offset&3));
2488 *ptr|=offset;
2489 output_w32(literals[i][1]);
2490 }
2491 literalcount=0;
2492}
2493
2494void literal_pool_jumpover(int n)
2495{
2496 if(!literalcount) return;
2497 if(n) {
2498 if((int)out-literals[0][0]<4096-n) return;
2499 }
2500 int jaddr=(int)out;
2501 emit_jmp(0);
2502 literal_pool(0);
2503 set_jump_target(jaddr,(int)out);
2504}
2505
2506emit_extjump2(int addr, int target, int linker)
2507{
2508 u_char *ptr=(u_char *)addr;
2509 assert((ptr[3]&0x0e)==0xa);
2510 emit_loadlp(target,0);
2511 emit_loadlp(addr,1);
2512 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
2513 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2514//DEBUG >
2515#ifdef DEBUG_CYCLE_COUNT
2516 emit_readword((int)&last_count,ECX);
2517 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2518 emit_readword((int)&next_interupt,ECX);
2519 emit_writeword(HOST_CCREG,(int)&Count);
2520 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2521 emit_writeword(ECX,(int)&last_count);
2522#endif
2523//DEBUG <
2524 emit_jmp(linker);
2525}
2526
2527emit_extjump(int addr, int target)
2528{
2529 emit_extjump2(addr, target, (int)dyna_linker);
2530}
2531emit_extjump_ds(int addr, int target)
2532{
2533 emit_extjump2(addr, target, (int)dyna_linker_ds);
2534}
2535
2536do_readstub(int n)
2537{
2538 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2539 literal_pool(256);
2540 set_jump_target(stubs[n][1],(int)out);
2541 int type=stubs[n][0];
2542 int i=stubs[n][3];
2543 int rs=stubs[n][4];
2544 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2545 u_int reglist=stubs[n][7];
2546 signed char *i_regmap=i_regs->regmap;
2547 int addr=get_reg(i_regmap,AGEN1+(i&1));
2548 int rth,rt;
2549 int ds;
2550 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
2551 rth=get_reg(i_regmap,FTEMP|64);
2552 rt=get_reg(i_regmap,FTEMP);
2553 }else{
2554 rth=get_reg(i_regmap,rt1[i]|64);
2555 rt=get_reg(i_regmap,rt1[i]);
2556 }
2557 assert(rs>=0);
2558 assert(rt>=0);
2559 if(addr<0) addr=rt;
2560 assert(addr>=0);
2561 int ftable=0;
2562 if(type==LOADB_STUB||type==LOADBU_STUB)
2563 ftable=(int)readmemb;
2564 if(type==LOADH_STUB||type==LOADHU_STUB)
2565 ftable=(int)readmemh;
2566 if(type==LOADW_STUB)
2567 ftable=(int)readmem;
2568#ifndef FORCE32
2569 if(type==LOADD_STUB)
2570 ftable=(int)readmemd;
2571#endif
2572 assert(ftable!=0);
2573 emit_writeword(rs,(int)&address);
2574 //emit_pusha();
2575 save_regs(reglist);
2576 ds=i_regs!=&regs[i];
2577 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2578 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2579 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2580 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2581 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2582 emit_shrimm(rs,16,1);
2583 int cc=get_reg(i_regmap,CCREG);
2584 if(cc<0) {
2585 emit_loadreg(CCREG,2);
2586 }
2587 emit_movimm(ftable,0);
2588 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2589 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2590 //emit_readword((int)&last_count,temp);
2591 //emit_add(cc,temp,cc);
2592 //emit_writeword(cc,(int)&Count);
2593 //emit_mov(15,14);
2594 emit_call((int)&indirect_jump_indexed);
2595 //emit_callreg(rs);
2596 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
2597 // We really shouldn't need to update the count here,
2598 // but not doing so causes random crashes...
2599 emit_readword((int)&Count,HOST_TEMPREG);
2600 emit_readword((int)&next_interupt,2);
2601 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2602 emit_writeword(2,(int)&last_count);
2603 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2604 if(cc<0) {
2605 emit_storereg(CCREG,HOST_TEMPREG);
2606 }
2607 //emit_popa();
2608 restore_regs(reglist);
2609 //if((cc=get_reg(regmap,CCREG))>=0) {
2610 // emit_loadreg(CCREG,cc);
2611 //}
2612 if(type==LOADB_STUB)
2613 emit_movsbl((int)&readmem_dword,rt);
2614 if(type==LOADBU_STUB)
2615 emit_movzbl((int)&readmem_dword,rt);
2616 if(type==LOADH_STUB)
2617 emit_movswl((int)&readmem_dword,rt);
2618 if(type==LOADHU_STUB)
2619 emit_movzwl((int)&readmem_dword,rt);
2620 if(type==LOADW_STUB)
2621 emit_readword((int)&readmem_dword,rt);
2622 if(type==LOADD_STUB) {
2623 emit_readword((int)&readmem_dword,rt);
2624 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2625 }
2626 emit_jmp(stubs[n][2]); // return address
2627}
2628
2629inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2630{
2631 int rs=get_reg(regmap,target);
2632 int rth=get_reg(regmap,target|64);
2633 int rt=get_reg(regmap,target);
2634 assert(rs>=0);
2635 assert(rt>=0);
2636 int ftable=0;
2637 if(type==LOADB_STUB||type==LOADBU_STUB)
2638 ftable=(int)readmemb;
2639 if(type==LOADH_STUB||type==LOADHU_STUB)
2640 ftable=(int)readmemh;
2641 if(type==LOADW_STUB)
2642 ftable=(int)readmem;
2643#ifndef FORCE32
2644 if(type==LOADD_STUB)
2645 ftable=(int)readmemd;
2646#endif
2647 assert(ftable!=0);
2648 emit_writeword(rs,(int)&address);
2649 //emit_pusha();
2650 save_regs(reglist);
2651 //emit_shrimm(rs,16,1);
2652 int cc=get_reg(regmap,CCREG);
2653 if(cc<0) {
2654 emit_loadreg(CCREG,2);
2655 }
2656 //emit_movimm(ftable,0);
2657 emit_movimm(((u_int *)ftable)[addr>>16],0);
2658 //emit_readword((int)&last_count,12);
2659 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2660 if((signed int)addr>=(signed int)0xC0000000) {
2661 // Pagefault address
2662 int ds=regmap!=regs[i].regmap;
2663 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2664 }
2665 //emit_add(12,2,2);
2666 //emit_writeword(2,(int)&Count);
2667 //emit_call(((u_int *)ftable)[addr>>16]);
2668 emit_call((int)&indirect_jump);
2669 // We really shouldn't need to update the count here,
2670 // but not doing so causes random crashes...
2671 emit_readword((int)&Count,HOST_TEMPREG);
2672 emit_readword((int)&next_interupt,2);
2673 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2674 emit_writeword(2,(int)&last_count);
2675 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2676 if(cc<0) {
2677 emit_storereg(CCREG,HOST_TEMPREG);
2678 }
2679 //emit_popa();
2680 restore_regs(reglist);
2681 if(type==LOADB_STUB)
2682 emit_movsbl((int)&readmem_dword,rt);
2683 if(type==LOADBU_STUB)
2684 emit_movzbl((int)&readmem_dword,rt);
2685 if(type==LOADH_STUB)
2686 emit_movswl((int)&readmem_dword,rt);
2687 if(type==LOADHU_STUB)
2688 emit_movzwl((int)&readmem_dword,rt);
2689 if(type==LOADW_STUB)
2690 emit_readword((int)&readmem_dword,rt);
2691 if(type==LOADD_STUB) {
2692 emit_readword((int)&readmem_dword,rt);
2693 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2694 }
2695}
2696
2697do_writestub(int n)
2698{
2699 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2700 literal_pool(256);
2701 set_jump_target(stubs[n][1],(int)out);
2702 int type=stubs[n][0];
2703 int i=stubs[n][3];
2704 int rs=stubs[n][4];
2705 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2706 u_int reglist=stubs[n][7];
2707 signed char *i_regmap=i_regs->regmap;
2708 int addr=get_reg(i_regmap,AGEN1+(i&1));
2709 int rth,rt,r;
2710 int ds;
2711 if(itype[i]==C1LS||itype[i]==C2LS) {
2712 rth=get_reg(i_regmap,FTEMP|64);
2713 rt=get_reg(i_regmap,r=FTEMP);
2714 }else{
2715 rth=get_reg(i_regmap,rs2[i]|64);
2716 rt=get_reg(i_regmap,r=rs2[i]);
2717 }
2718 assert(rs>=0);
2719 assert(rt>=0);
2720 if(addr<0) addr=get_reg(i_regmap,-1);
2721 assert(addr>=0);
2722 int ftable=0;
2723 if(type==STOREB_STUB)
2724 ftable=(int)writememb;
2725 if(type==STOREH_STUB)
2726 ftable=(int)writememh;
2727 if(type==STOREW_STUB)
2728 ftable=(int)writemem;
2729#ifndef FORCE32
2730 if(type==STORED_STUB)
2731 ftable=(int)writememd;
2732#endif
2733 assert(ftable!=0);
2734 emit_writeword(rs,(int)&address);
2735 //emit_shrimm(rs,16,rs);
2736 //emit_movmem_indexedx4(ftable,rs,rs);
2737 if(type==STOREB_STUB)
2738 emit_writebyte(rt,(int)&byte);
2739 if(type==STOREH_STUB)
2740 emit_writehword(rt,(int)&hword);
2741 if(type==STOREW_STUB)
2742 emit_writeword(rt,(int)&word);
2743 if(type==STORED_STUB) {
2744#ifndef FORCE32
2745 emit_writeword(rt,(int)&dword);
2746 emit_writeword(r?rth:rt,(int)&dword+4);
2747#else
2748 printf("STORED_STUB\n");
2749#endif
2750 }
2751 //emit_pusha();
2752 save_regs(reglist);
2753 ds=i_regs!=&regs[i];
2754 int real_rs=get_reg(i_regmap,rs1[i]);
2755 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2756 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2757 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2758 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
2759 emit_shrimm(rs,16,1);
2760 int cc=get_reg(i_regmap,CCREG);
2761 if(cc<0) {
2762 emit_loadreg(CCREG,2);
2763 }
2764 emit_movimm(ftable,0);
2765 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
2766 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2767 //emit_readword((int)&last_count,temp);
2768 //emit_addimm(cc,2*stubs[n][5]+2,cc);
2769 //emit_add(cc,temp,cc);
2770 //emit_writeword(cc,(int)&Count);
2771 emit_call((int)&indirect_jump_indexed);
2772 //emit_callreg(rs);
2773 emit_readword((int)&Count,HOST_TEMPREG);
2774 emit_readword((int)&next_interupt,2);
2775 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2776 emit_writeword(2,(int)&last_count);
2777 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2778 if(cc<0) {
2779 emit_storereg(CCREG,HOST_TEMPREG);
2780 }
2781 //emit_popa();
2782 restore_regs(reglist);
2783 //if((cc=get_reg(regmap,CCREG))>=0) {
2784 // emit_loadreg(CCREG,cc);
2785 //}
2786 emit_jmp(stubs[n][2]); // return address
2787}
2788
2789inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2790{
2791 int rs=get_reg(regmap,-1);
2792 int rth=get_reg(regmap,target|64);
2793 int rt=get_reg(regmap,target);
2794 assert(rs>=0);
2795 assert(rt>=0);
2796 int ftable=0;
2797 if(type==STOREB_STUB)
2798 ftable=(int)writememb;
2799 if(type==STOREH_STUB)
2800 ftable=(int)writememh;
2801 if(type==STOREW_STUB)
2802 ftable=(int)writemem;
2803#ifndef FORCE32
2804 if(type==STORED_STUB)
2805 ftable=(int)writememd;
2806#endif
2807 assert(ftable!=0);
2808 emit_writeword(rs,(int)&address);
2809 //emit_shrimm(rs,16,rs);
2810 //emit_movmem_indexedx4(ftable,rs,rs);
2811 if(type==STOREB_STUB)
2812 emit_writebyte(rt,(int)&byte);
2813 if(type==STOREH_STUB)
2814 emit_writehword(rt,(int)&hword);
2815 if(type==STOREW_STUB)
2816 emit_writeword(rt,(int)&word);
2817 if(type==STORED_STUB) {
2818#ifndef FORCE32
2819 emit_writeword(rt,(int)&dword);
2820 emit_writeword(target?rth:rt,(int)&dword+4);
2821#else
2822 printf("STORED_STUB\n");
2823#endif
2824 }
2825 //emit_pusha();
2826 save_regs(reglist);
2827 //emit_shrimm(rs,16,1);
2828 int cc=get_reg(regmap,CCREG);
2829 if(cc<0) {
2830 emit_loadreg(CCREG,2);
2831 }
2832 //emit_movimm(ftable,0);
2833 emit_movimm(((u_int *)ftable)[addr>>16],0);
2834 //emit_readword((int)&last_count,12);
2835 emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2);
2836 if((signed int)addr>=(signed int)0xC0000000) {
2837 // Pagefault address
2838 int ds=regmap!=regs[i].regmap;
2839 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
2840 }
2841 //emit_add(12,2,2);
2842 //emit_writeword(2,(int)&Count);
2843 //emit_call(((u_int *)ftable)[addr>>16]);
2844 emit_call((int)&indirect_jump);
2845 emit_readword((int)&Count,HOST_TEMPREG);
2846 emit_readword((int)&next_interupt,2);
2847 emit_addimm(HOST_TEMPREG,-CLOCK_DIVIDER*(adj+1),HOST_TEMPREG);
2848 emit_writeword(2,(int)&last_count);
2849 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2850 if(cc<0) {
2851 emit_storereg(CCREG,HOST_TEMPREG);
2852 }
2853 //emit_popa();
2854 restore_regs(reglist);
2855}
2856
2857do_unalignedwritestub(int n)
2858{
2859 set_jump_target(stubs[n][1],(int)out);
2860 output_w32(0xef000000);
2861 emit_jmp(stubs[n][2]); // return address
2862}
2863
2864void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
2865{
2866 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
2867}
2868
2869do_invstub(int n)
2870{
2871 literal_pool(20);
2872 u_int reglist=stubs[n][3];
2873 set_jump_target(stubs[n][1],(int)out);
2874 save_regs(reglist);
2875 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2876 emit_call((int)&invalidate_addr);
2877 restore_regs(reglist);
2878 emit_jmp(stubs[n][2]); // return address
2879}
2880
2881int do_dirty_stub(int i)
2882{
2883 assem_debug("do_dirty_stub %x\n",start+i*4);
2884 // Careful about the code output here, verify_dirty needs to parse it.
2885 #ifdef ARMv5_ONLY
2886 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2887 emit_loadlp((int)copy,2);
2888 emit_loadlp(slen*4,3);
2889 #else
2890 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2891 emit_movw(((u_int)copy)&0x0000FFFF,2);
2892 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2893 emit_movt(((u_int)copy)&0xFFFF0000,2);
2894 emit_movw(slen*4,3);
2895 #endif
2896 emit_movimm(start+i*4,0);
2897 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
2898 int entry=(int)out;
2899 load_regs_entry(i);
2900 if(entry==(int)out) entry=instr_addr[i];
2901 emit_jmp(instr_addr[i]);
2902 return entry;
2903}
2904
2905void do_dirty_stub_ds()
2906{
2907 // Careful about the code output here, verify_dirty needs to parse it.
2908 #ifdef ARMv5_ONLY
2909 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2910 emit_loadlp((int)copy,2);
2911 emit_loadlp(slen*4,3);
2912 #else
2913 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2914 emit_movw(((u_int)copy)&0x0000FFFF,2);
2915 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2916 emit_movt(((u_int)copy)&0xFFFF0000,2);
2917 emit_movw(slen*4,3);
2918 #endif
2919 emit_movimm(start+1,0);
2920 emit_call((int)&verify_code_ds);
2921}
2922
2923do_cop1stub(int n)
2924{
2925 literal_pool(256);
2926 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
2927 set_jump_target(stubs[n][1],(int)out);
2928 int i=stubs[n][3];
2929// int rs=stubs[n][4];
2930 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2931 int ds=stubs[n][6];
2932 if(!ds) {
2933 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2934 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
2935 }
2936 //else {printf("fp exception in delay slot\n");}
2937 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
2938 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2939 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2940 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
2941 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
2942}
2943
2944/* TLB */
2945
2946int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
2947{
2948 if(c) {
2949 if((signed int)addr>=(signed int)0xC0000000) {
2950 // address_generation already loaded the const
2951 emit_readword_dualindexedx4(FP,map,map);
2952 }
2953 else
2954 return -1; // No mapping
2955 }
2956 else {
2957 assert(s!=map);
2958 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
2959 emit_addsr12(map,s,map);
2960 // Schedule this while we wait on the load
2961 //if(x) emit_xorimm(s,x,ar);
2962 if(shift>=0) emit_shlimm(s,3,shift);
2963 if(~a) emit_andimm(s,a,ar);
2964 emit_readword_dualindexedx4(FP,map,map);
2965 }
2966 return map;
2967}
2968int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
2969{
2970 if(!c||(signed int)addr>=(signed int)0xC0000000) {
2971 emit_test(map,map);
2972 *jaddr=(int)out;
2973 emit_js(0);
2974 }
2975 return map;
2976}
2977
2978int gen_tlb_addr_r(int ar, int map) {
2979 if(map>=0) {
2980 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
2981 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
2982 }
2983}
2984
2985int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
2986{
2987 if(c) {
2988 if(addr<0x80800000||addr>=0xC0000000) {
2989 // address_generation already loaded the const
2990 emit_readword_dualindexedx4(FP,map,map);
2991 }
2992 else
2993 return -1; // No mapping
2994 }
2995 else {
2996 assert(s!=map);
2997 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
2998 emit_addsr12(map,s,map);
2999 // Schedule this while we wait on the load
3000 //if(x) emit_xorimm(s,x,ar);
3001 emit_readword_dualindexedx4(FP,map,map);
3002 }
3003 return map;
3004}
3005int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3006{
3007 if(!c||addr<0x80800000||addr>=0xC0000000) {
3008 emit_testimm(map,0x40000000);
3009 *jaddr=(int)out;
3010 emit_jne(0);
3011 }
3012}
3013
3014int gen_tlb_addr_w(int ar, int map) {
3015 if(map>=0) {
3016 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3017 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3018 }
3019}
3020
3021// Generate the address of the memory_map entry, relative to dynarec_local
3022generate_map_const(u_int addr,int reg) {
3023 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3024 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3025}
3026
3027/* Special assem */
3028
3029void shift_assemble_arm(int i,struct regstat *i_regs)
3030{
3031 if(rt1[i]) {
3032 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3033 {
3034 signed char s,t,shift;
3035 t=get_reg(i_regs->regmap,rt1[i]);
3036 s=get_reg(i_regs->regmap,rs1[i]);
3037 shift=get_reg(i_regs->regmap,rs2[i]);
3038 if(t>=0){
3039 if(rs1[i]==0)
3040 {
3041 emit_zeroreg(t);
3042 }
3043 else if(rs2[i]==0)
3044 {
3045 assert(s>=0);
3046 if(s!=t) emit_mov(s,t);
3047 }
3048 else
3049 {
3050 emit_andimm(shift,31,HOST_TEMPREG);
3051 if(opcode2[i]==4) // SLLV
3052 {
3053 emit_shl(s,HOST_TEMPREG,t);
3054 }
3055 if(opcode2[i]==6) // SRLV
3056 {
3057 emit_shr(s,HOST_TEMPREG,t);
3058 }
3059 if(opcode2[i]==7) // SRAV
3060 {
3061 emit_sar(s,HOST_TEMPREG,t);
3062 }
3063 }
3064 }
3065 } else { // DSLLV/DSRLV/DSRAV
3066 signed char sh,sl,th,tl,shift;
3067 th=get_reg(i_regs->regmap,rt1[i]|64);
3068 tl=get_reg(i_regs->regmap,rt1[i]);
3069 sh=get_reg(i_regs->regmap,rs1[i]|64);
3070 sl=get_reg(i_regs->regmap,rs1[i]);
3071 shift=get_reg(i_regs->regmap,rs2[i]);
3072 if(tl>=0){
3073 if(rs1[i]==0)
3074 {
3075 emit_zeroreg(tl);
3076 if(th>=0) emit_zeroreg(th);
3077 }
3078 else if(rs2[i]==0)
3079 {
3080 assert(sl>=0);
3081 if(sl!=tl) emit_mov(sl,tl);
3082 if(th>=0&&sh!=th) emit_mov(sh,th);
3083 }
3084 else
3085 {
3086 // FIXME: What if shift==tl ?
3087 assert(shift!=tl);
3088 int temp=get_reg(i_regs->regmap,-1);
3089 int real_th=th;
3090 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3091 assert(sl>=0);
3092 assert(sh>=0);
3093 emit_andimm(shift,31,HOST_TEMPREG);
3094 if(opcode2[i]==0x14) // DSLLV
3095 {
3096 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3097 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3098 emit_orrshr(sl,HOST_TEMPREG,th);
3099 emit_andimm(shift,31,HOST_TEMPREG);
3100 emit_testimm(shift,32);
3101 emit_shl(sl,HOST_TEMPREG,tl);
3102 if(th>=0) emit_cmovne_reg(tl,th);
3103 emit_cmovne_imm(0,tl);
3104 }
3105 if(opcode2[i]==0x16) // DSRLV
3106 {
3107 assert(th>=0);
3108 emit_shr(sl,HOST_TEMPREG,tl);
3109 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3110 emit_orrshl(sh,HOST_TEMPREG,tl);
3111 emit_andimm(shift,31,HOST_TEMPREG);
3112 emit_testimm(shift,32);
3113 emit_shr(sh,HOST_TEMPREG,th);
3114 emit_cmovne_reg(th,tl);
3115 if(real_th>=0) emit_cmovne_imm(0,th);
3116 }
3117 if(opcode2[i]==0x17) // DSRAV
3118 {
3119 assert(th>=0);
3120 emit_shr(sl,HOST_TEMPREG,tl);
3121 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3122 if(real_th>=0) {
3123 assert(temp>=0);
3124 emit_sarimm(th,31,temp);
3125 }
3126 emit_orrshl(sh,HOST_TEMPREG,tl);
3127 emit_andimm(shift,31,HOST_TEMPREG);
3128 emit_testimm(shift,32);
3129 emit_sar(sh,HOST_TEMPREG,th);
3130 emit_cmovne_reg(th,tl);
3131 if(real_th>=0) emit_cmovne_reg(temp,th);
3132 }
3133 }
3134 }
3135 }
3136 }
3137}
3138#define shift_assemble shift_assemble_arm
3139
3140void loadlr_assemble_arm(int i,struct regstat *i_regs)
3141{
3142 int s,th,tl,temp,temp2,addr,map=-1;
3143 int offset;
3144 int jaddr=0;
3145 int memtarget,c=0;
3146 u_int hr,reglist=0;
3147 th=get_reg(i_regs->regmap,rt1[i]|64);
3148 tl=get_reg(i_regs->regmap,rt1[i]);
3149 s=get_reg(i_regs->regmap,rs1[i]);
3150 temp=get_reg(i_regs->regmap,-1);
3151 temp2=get_reg(i_regs->regmap,FTEMP);
3152 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3153 assert(addr<0);
3154 offset=imm[i];
3155 for(hr=0;hr<HOST_REGS;hr++) {
3156 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3157 }
3158 reglist|=1<<temp;
3159 if(offset||s<0||c) addr=temp2;
3160 else addr=s;
3161 if(s>=0) {
3162 c=(i_regs->wasconst>>s)&1;
3163 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80800000;
3164 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3165 }
3166 if(tl>=0) {
3167 //assert(tl>=0);
3168 //assert(rt1[i]);
3169 if(!using_tlb) {
3170 if(!c) {
3171 emit_shlimm(addr,3,temp);
3172 if (opcode[i]==0x22||opcode[i]==0x26) {
3173 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3174 }else{
3175 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3176 }
3177 emit_cmpimm(addr,0x800000);
3178 jaddr=(int)out;
3179 emit_jno(0);
3180 }
3181 else {
3182 if (opcode[i]==0x22||opcode[i]==0x26) {
3183 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3184 }else{
3185 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3186 }
3187 }
3188 }else{ // using tlb
3189 int a;
3190 if(c) {
3191 a=-1;
3192 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3193 a=0xFFFFFFFC; // LWL/LWR
3194 }else{
3195 a=0xFFFFFFF8; // LDL/LDR
3196 }
3197 map=get_reg(i_regs->regmap,TLREG);
3198 assert(map>=0);
3199 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3200 if(c) {
3201 if (opcode[i]==0x22||opcode[i]==0x26) {
3202 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3203 }else{
3204 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3205 }
3206 }
3207 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3208 }
3209 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3210 if(!c||memtarget) {
3211 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3212 emit_readword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2);
3213 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3214 }
3215 else
3216 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3217 emit_andimm(temp,24,temp);
3218#ifdef BIG_ENDIAN_MIPS
3219 if (opcode[i]==0x26) // LWR
3220#else
3221 if (opcode[i]==0x22) // LWL
3222#endif
3223 emit_xorimm(temp,24,temp);
3224 emit_movimm(-1,HOST_TEMPREG);
3225 if (opcode[i]==0x26) {
3226 emit_shr(temp2,temp,temp2);
3227 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3228 }else{
3229 emit_shl(temp2,temp,temp2);
3230 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3231 }
3232 emit_or(temp2,tl,tl);
3233 //emit_storereg(rt1[i],tl); // DEBUG
3234 }
3235 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3236 // FIXME: little endian
3237 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3238 if(!c||memtarget) {
3239 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3240 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3241 emit_readdword_indexed_tlb((int)rdram-0x80000000,temp2,map,temp2h,temp2);
3242 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3243 }
3244 else
3245 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3246 emit_testimm(temp,32);
3247 emit_andimm(temp,24,temp);
3248 if (opcode[i]==0x1A) { // LDL
3249 emit_rsbimm(temp,32,HOST_TEMPREG);
3250 emit_shl(temp2h,temp,temp2h);
3251 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3252 emit_movimm(-1,HOST_TEMPREG);
3253 emit_shl(temp2,temp,temp2);
3254 emit_cmove_reg(temp2h,th);
3255 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3256 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3257 emit_orreq(temp2,tl,tl);
3258 emit_orrne(temp2,th,th);
3259 }
3260 if (opcode[i]==0x1B) { // LDR
3261 emit_xorimm(temp,24,temp);
3262 emit_rsbimm(temp,32,HOST_TEMPREG);
3263 emit_shr(temp2,temp,temp2);
3264 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3265 emit_movimm(-1,HOST_TEMPREG);
3266 emit_shr(temp2h,temp,temp2h);
3267 emit_cmovne_reg(temp2,tl);
3268 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3269 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3270 emit_orrne(temp2h,th,th);
3271 emit_orreq(temp2h,tl,tl);
3272 }
3273 }
3274 }
3275}
3276#define loadlr_assemble loadlr_assemble_arm
3277
3278void cop0_assemble(int i,struct regstat *i_regs)
3279{
3280 if(opcode2[i]==0) // MFC0
3281 {
3282 signed char t=get_reg(i_regs->regmap,rt1[i]);
3283 char copr=(source[i]>>11)&0x1f;
3284 //assert(t>=0); // Why does this happen? OOT is weird
3285 if(t>=0) {
3286#ifdef MUPEN64
3287 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3288 emit_movimm((source[i]>>11)&0x1f,1);
3289 emit_writeword(0,(int)&PC);
3290 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3291 if(copr==9) {
3292 emit_readword((int)&last_count,ECX);
3293 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3294 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3295 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3296 emit_writeword(HOST_CCREG,(int)&Count);
3297 }
3298 emit_call((int)MFC0);
3299 emit_readword((int)&readmem_dword,t);
3300#else
3301 emit_readword((int)&reg_cop0+copr*4,t);
3302#endif
3303 }
3304 }
3305 else if(opcode2[i]==4) // MTC0
3306 {
3307 signed char s=get_reg(i_regs->regmap,rs1[i]);
3308 char copr=(source[i]>>11)&0x1f;
3309 assert(s>=0);
3310 emit_writeword(s,(int)&readmem_dword);
3311 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
3312#ifdef MUPEN64 /// FIXME
3313 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
3314 emit_movimm((source[i]>>11)&0x1f,1);
3315 emit_writeword(0,(int)&PC);
3316 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
3317#endif
3318#ifdef PCSX
3319 emit_movimm(source[i],0);
3320 emit_writeword(0,(int)&psxRegs.code);
3321#endif
3322 if(copr==9||copr==11||copr==12||copr==13) {
3323 emit_readword((int)&last_count,ECX);
3324 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3325 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3326 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3327 emit_writeword(HOST_CCREG,(int)&Count);
3328 }
3329 // What a mess. The status register (12) can enable interrupts,
3330 // so needs a special case to handle a pending interrupt.
3331 // The interrupt must be taken immediately, because a subsequent
3332 // instruction might disable interrupts again.
3333 if(copr==12||copr==13) {
3334 emit_movimm(start+i*4+4,0);
3335 emit_movimm(0,1);
3336 emit_writeword(0,(int)&pcaddr);
3337 emit_writeword(1,(int)&pending_exception);
3338 }
3339 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3340 //else
3341 emit_call((int)MTC0);
3342 if(copr==9||copr==11||copr==12||copr==13) {
3343 emit_readword((int)&Count,HOST_CCREG);
3344 emit_readword((int)&next_interupt,ECX);
3345 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3346 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3347 emit_writeword(ECX,(int)&last_count);
3348 emit_storereg(CCREG,HOST_CCREG);
3349 }
3350 if(copr==12||copr==13) {
3351 assert(!is_delayslot);
3352 emit_readword((int)&pending_exception,14);
3353 }
3354 emit_loadreg(rs1[i],s);
3355 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3356 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
3357 if(copr==12||copr==13) {
3358 emit_test(14,14);
3359 emit_jne((int)&do_interrupt);
3360 }
3361 cop1_usable=0;
3362 }
3363 else
3364 {
3365 assert(opcode2[i]==0x10);
3366#ifndef DISABLE_TLB
3367 if((source[i]&0x3f)==0x01) // TLBR
3368 emit_call((int)TLBR);
3369 if((source[i]&0x3f)==0x02) // TLBWI
3370 emit_call((int)TLBWI_new);
3371 if((source[i]&0x3f)==0x06) { // TLBWR
3372 // The TLB entry written by TLBWR is dependent on the count,
3373 // so update the cycle count
3374 emit_readword((int)&last_count,ECX);
3375 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3376 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3377 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3378 emit_writeword(HOST_CCREG,(int)&Count);
3379 emit_call((int)TLBWR_new);
3380 }
3381 if((source[i]&0x3f)==0x08) // TLBP
3382 emit_call((int)TLBP);
3383#endif
3384 if((source[i]&0x3f)==0x18) // ERET
3385 {
3386 int count=ccadj[i];
3387 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3388 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
3389 emit_jmp((int)jump_eret);
3390 }
3391 }
3392}
3393
3394static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3395{
3396 switch (copr) {
3397 case 1:
3398 case 3:
3399 case 5:
3400 case 8:
3401 case 9:
3402 case 10:
3403 case 11:
3404 emit_readword((int)&reg_cop2d[copr],tl);
3405 emit_signextend16(tl,tl);
3406 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3407 break;
3408 case 7:
3409 case 16:
3410 case 17:
3411 case 18:
3412 case 19:
3413 emit_readword((int)&reg_cop2d[copr],tl);
3414 emit_andimm(tl,0xffff,tl);
3415 emit_writeword(tl,(int)&reg_cop2d[copr]);
3416 break;
3417 case 15:
3418 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3419 emit_writeword(tl,(int)&reg_cop2d[copr]);
3420 break;
3421 case 28:
3422 case 30:
3423 emit_movimm(0,tl);
3424 break;
3425 case 29:
3426 emit_readword((int)&reg_cop2d[9],temp);
3427 emit_testimm(temp,0x8000); // do we need this?
3428 emit_andimm(temp,0xf80,temp);
3429 emit_andne_imm(temp,0,temp);
3430 emit_shr(temp,7,tl);
3431 emit_readword((int)&reg_cop2d[10],temp);
3432 emit_testimm(temp,0x8000);
3433 emit_andimm(temp,0xf80,temp);
3434 emit_andne_imm(temp,0,temp);
3435 emit_orrshr(temp,2,tl);
3436 emit_readword((int)&reg_cop2d[11],temp);
3437 emit_testimm(temp,0x8000);
3438 emit_andimm(temp,0xf80,temp);
3439 emit_andne_imm(temp,0,temp);
3440 emit_orrshl(temp,3,tl);
3441 emit_writeword(tl,(int)&reg_cop2d[copr]);
3442 break;
3443 default:
3444 emit_readword((int)&reg_cop2d[copr],tl);
3445 break;
3446 }
3447}
3448
3449static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3450{
3451 switch (copr) {
3452 case 15:
3453 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3454 emit_writeword(sl,(int)&reg_cop2d[copr]);
3455 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3456 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3457 emit_writeword(sl,(int)&reg_cop2d[14]);
3458 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3459 break;
3460 case 28:
3461 emit_andimm(sl,0x001f,temp);
3462 emit_shl(temp,7,temp);
3463 emit_writeword(temp,(int)&reg_cop2d[9]);
3464 emit_andimm(sl,0x03e0,temp);
3465 emit_shl(temp,2,temp);
3466 emit_writeword(temp,(int)&reg_cop2d[10]);
3467 emit_andimm(sl,0x7c00,temp);
3468 emit_shr(temp,3,temp);
3469 emit_writeword(temp,(int)&reg_cop2d[11]);
3470 emit_writeword(sl,(int)&reg_cop2d[28]);
3471 break;
3472 case 30:
3473 emit_movs(sl,temp);
3474 emit_mvnmi(temp,temp);
3475 emit_clz(temp,temp);
3476 emit_writeword(sl,(int)&reg_cop2d[30]);
3477 emit_writeword(temp,(int)&reg_cop2d[31]);
3478 break;
3479 case 7:
3480 case 29:
3481 case 31:
3482 break;
3483 default:
3484 emit_writeword(sl,(int)&reg_cop2d[copr]);
3485 break;
3486 }
3487}
3488
3489void cop2_assemble(int i,struct regstat *i_regs)
3490{
3491 u_int copr=(source[i]>>11)&0x1f;
3492 signed char temp=get_reg(i_regs->regmap,-1);
3493 if (opcode2[i]==0) { // MFC2
3494 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3495 if(tl>=0)
3496 cop2_get_dreg(copr,tl,temp);
3497 }
3498 else if (opcode2[i]==4) { // MTC2
3499 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3500 cop2_put_dreg(copr,sl,temp);
3501 }
3502 else if (opcode2[i]==2) // CFC2
3503 {
3504 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3505 if(tl>=0)
3506 emit_readword((int)&reg_cop2c[copr],tl);
3507 }
3508 else if (opcode2[i]==6) // CTC2
3509 {
3510 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3511 switch(copr) {
3512 case 4:
3513 case 12:
3514 case 20:
3515 case 26:
3516 case 27:
3517 case 29:
3518 case 30:
3519 emit_signextend16(sl,temp);
3520 break;
3521 case 31:
3522 //value = value & 0x7ffff000;
3523 //if (value & 0x7f87e000) value |= 0x80000000;
3524 emit_shrimm(sl,12,temp);
3525 emit_shlimm(temp,12,temp);
3526 emit_testimm(temp,0x7f000000);
3527 emit_testeqimm(temp,0x00870000);
3528 emit_testeqimm(temp,0x0000e000);
3529 emit_orrne_imm(temp,0x80000000,temp);
3530 break;
3531 default:
3532 temp=sl;
3533 break;
3534 }
3535 emit_writeword(temp,(int)&reg_cop2c[copr]);
3536 assert(sl>=0);
3537 }
3538}
3539
3540void c2op_assemble(int i,struct regstat *i_regs)
3541{
3542 signed char temp=get_reg(i_regs->regmap,-1);
3543 u_int c2op=source[i]&0x3f;
3544 u_int hr,reglist=0;
3545 for(hr=0;hr<HOST_REGS;hr++) {
3546 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3547 }
3548 if(i==0||itype[i-1]!=C2OP)
3549 save_regs(reglist);
3550
3551 if (gte_handlers[c2op]!=NULL) {
3552 int cc=get_reg(i_regs->regmap,CCREG);
3553 emit_movimm(source[i],temp); // opcode
3554 if (cc>=0&&gte_cycletab[c2op])
3555 emit_addimm(cc,gte_cycletab[c2op]/2,cc); // XXX: cound just adjust ccadj?
3556 emit_writeword(temp,(int)&psxRegs.code);
3557 emit_call((int)gte_handlers[c2op]);
3558 }
3559
3560 if(i>=slen-1||itype[i+1]!=C2OP)
3561 restore_regs(reglist);
3562}
3563
3564void cop1_unusable(int i,struct regstat *i_regs)
3565{
3566 // XXX: should just just do the exception instead
3567 if(!cop1_usable) {
3568 int jaddr=(int)out;
3569 emit_jmp(0);
3570 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3571 cop1_usable=1;
3572 }
3573}
3574
3575void cop1_assemble(int i,struct regstat *i_regs)
3576{
3577#ifndef DISABLE_COP1
3578 // Check cop1 unusable
3579 if(!cop1_usable) {
3580 signed char rs=get_reg(i_regs->regmap,CSREG);
3581 assert(rs>=0);
3582 emit_testimm(rs,0x20000000);
3583 int jaddr=(int)out;
3584 emit_jeq(0);
3585 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3586 cop1_usable=1;
3587 }
3588 if (opcode2[i]==0) { // MFC1
3589 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3590 if(tl>=0) {
3591 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
3592 emit_readword_indexed(0,tl,tl);
3593 }
3594 }
3595 else if (opcode2[i]==1) { // DMFC1
3596 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3597 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
3598 if(tl>=0) {
3599 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
3600 if(th>=0) emit_readword_indexed(4,tl,th);
3601 emit_readword_indexed(0,tl,tl);
3602 }
3603 }
3604 else if (opcode2[i]==4) { // MTC1
3605 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3606 signed char temp=get_reg(i_regs->regmap,-1);
3607 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3608 emit_writeword_indexed(sl,0,temp);
3609 }
3610 else if (opcode2[i]==5) { // DMTC1
3611 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3612 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
3613 signed char temp=get_reg(i_regs->regmap,-1);
3614 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3615 emit_writeword_indexed(sh,4,temp);
3616 emit_writeword_indexed(sl,0,temp);
3617 }
3618 else if (opcode2[i]==2) // CFC1
3619 {
3620 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3621 if(tl>=0) {
3622 u_int copr=(source[i]>>11)&0x1f;
3623 if(copr==0) emit_readword((int)&FCR0,tl);
3624 if(copr==31) emit_readword((int)&FCR31,tl);
3625 }
3626 }
3627 else if (opcode2[i]==6) // CTC1
3628 {
3629 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3630 u_int copr=(source[i]>>11)&0x1f;
3631 assert(sl>=0);
3632 if(copr==31)
3633 {
3634 emit_writeword(sl,(int)&FCR31);
3635 // Set the rounding mode
3636 //FIXME
3637 //char temp=get_reg(i_regs->regmap,-1);
3638 //emit_andimm(sl,3,temp);
3639 //emit_fldcw_indexed((int)&rounding_modes,temp);
3640 }
3641 }
3642#else
3643 cop1_unusable(i, i_regs);
3644#endif
3645}
3646
3647void fconv_assemble_arm(int i,struct regstat *i_regs)
3648{
3649#ifndef DISABLE_COP1
3650 signed char temp=get_reg(i_regs->regmap,-1);
3651 assert(temp>=0);
3652 // Check cop1 unusable
3653 if(!cop1_usable) {
3654 signed char rs=get_reg(i_regs->regmap,CSREG);
3655 assert(rs>=0);
3656 emit_testimm(rs,0x20000000);
3657 int jaddr=(int)out;
3658 emit_jeq(0);
3659 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3660 cop1_usable=1;
3661 }
3662
3663 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
3664 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
3665 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3666 emit_flds(temp,15);
3667 emit_ftosizs(15,15); // float->int, truncate
3668 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3669 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3670 emit_fsts(15,temp);
3671 return;
3672 }
3673 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
3674 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3675 emit_vldr(temp,7);
3676 emit_ftosizd(7,13); // double->int, truncate
3677 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3678 emit_fsts(13,temp);
3679 return;
3680 }
3681
3682 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
3683 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3684 emit_flds(temp,13);
3685 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3686 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3687 emit_fsitos(13,15);
3688 emit_fsts(15,temp);
3689 return;
3690 }
3691 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
3692 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3693 emit_flds(temp,13);
3694 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3695 emit_fsitod(13,7);
3696 emit_vstr(7,temp);
3697 return;
3698 }
3699
3700 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
3701 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3702 emit_flds(temp,13);
3703 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3704 emit_fcvtds(13,7);
3705 emit_vstr(7,temp);
3706 return;
3707 }
3708 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
3709 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3710 emit_vldr(temp,7);
3711 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3712 emit_fcvtsd(7,13);
3713 emit_fsts(13,temp);
3714 return;
3715 }
3716 #endif
3717
3718 // C emulation code
3719
3720 u_int hr,reglist=0;
3721 for(hr=0;hr<HOST_REGS;hr++) {
3722 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3723 }
3724 save_regs(reglist);
3725
3726 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
3727 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3728 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3729 emit_call((int)cvt_s_w);
3730 }
3731 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
3732 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3733 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3734 emit_call((int)cvt_d_w);
3735 }
3736 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
3737 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3738 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3739 emit_call((int)cvt_s_l);
3740 }
3741 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
3742 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3743 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3744 emit_call((int)cvt_d_l);
3745 }
3746
3747 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
3748 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3749 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3750 emit_call((int)cvt_d_s);
3751 }
3752 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
3753 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3754 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3755 emit_call((int)cvt_w_s);
3756 }
3757 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
3758 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3759 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3760 emit_call((int)cvt_l_s);
3761 }
3762
3763 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
3764 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3765 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3766 emit_call((int)cvt_s_d);
3767 }
3768 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
3769 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3770 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3771 emit_call((int)cvt_w_d);
3772 }
3773 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
3774 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3775 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3776 emit_call((int)cvt_l_d);
3777 }
3778
3779 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
3780 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3781 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3782 emit_call((int)round_l_s);
3783 }
3784 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
3785 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3786 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3787 emit_call((int)trunc_l_s);
3788 }
3789 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
3790 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3791 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3792 emit_call((int)ceil_l_s);
3793 }
3794 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
3795 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3796 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3797 emit_call((int)floor_l_s);
3798 }
3799 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
3800 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3801 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3802 emit_call((int)round_w_s);
3803 }
3804 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
3805 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3806 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3807 emit_call((int)trunc_w_s);
3808 }
3809 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
3810 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3811 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3812 emit_call((int)ceil_w_s);
3813 }
3814 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
3815 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3816 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3817 emit_call((int)floor_w_s);
3818 }
3819
3820 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
3821 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3822 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3823 emit_call((int)round_l_d);
3824 }
3825 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
3826 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3827 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3828 emit_call((int)trunc_l_d);
3829 }
3830 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
3831 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3832 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3833 emit_call((int)ceil_l_d);
3834 }
3835 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
3836 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3837 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
3838 emit_call((int)floor_l_d);
3839 }
3840 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
3841 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3842 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3843 emit_call((int)round_w_d);
3844 }
3845 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
3846 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3847 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3848 emit_call((int)trunc_w_d);
3849 }
3850 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
3851 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3852 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3853 emit_call((int)ceil_w_d);
3854 }
3855 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
3856 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3857 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
3858 emit_call((int)floor_w_d);
3859 }
3860
3861 restore_regs(reglist);
3862#else
3863 cop1_unusable(i, i_regs);
3864#endif
3865}
3866#define fconv_assemble fconv_assemble_arm
3867
3868void fcomp_assemble(int i,struct regstat *i_regs)
3869{
3870#ifndef DISABLE_COP1
3871 signed char fs=get_reg(i_regs->regmap,FSREG);
3872 signed char temp=get_reg(i_regs->regmap,-1);
3873 assert(temp>=0);
3874 // Check cop1 unusable
3875 if(!cop1_usable) {
3876 signed char cs=get_reg(i_regs->regmap,CSREG);
3877 assert(cs>=0);
3878 emit_testimm(cs,0x20000000);
3879 int jaddr=(int)out;
3880 emit_jeq(0);
3881 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
3882 cop1_usable=1;
3883 }
3884
3885 if((source[i]&0x3f)==0x30) {
3886 emit_andimm(fs,~0x800000,fs);
3887 return;
3888 }
3889
3890 if((source[i]&0x3e)==0x38) {
3891 // sf/ngle - these should throw exceptions for NaNs
3892 emit_andimm(fs,~0x800000,fs);
3893 return;
3894 }
3895
3896 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
3897 if(opcode2[i]==0x10) {
3898 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3899 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
3900 emit_orimm(fs,0x800000,fs);
3901 emit_flds(temp,14);
3902 emit_flds(HOST_TEMPREG,15);
3903 emit_fcmps(14,15);
3904 emit_fmstat();
3905 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
3906 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
3907 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
3908 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
3909 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
3910 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
3911 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
3912 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
3913 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
3914 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
3915 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
3916 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
3917 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
3918 return;
3919 }
3920 if(opcode2[i]==0x11) {
3921 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3922 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
3923 emit_orimm(fs,0x800000,fs);
3924 emit_vldr(temp,6);
3925 emit_vldr(HOST_TEMPREG,7);
3926 emit_fcmpd(6,7);
3927 emit_fmstat();
3928 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
3929 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
3930 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
3931 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
3932 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
3933 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
3934 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
3935 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
3936 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
3937 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
3938 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
3939 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
3940 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
3941 return;
3942 }
3943 #endif
3944
3945 // C only
3946
3947 u_int hr,reglist=0;
3948 for(hr=0;hr<HOST_REGS;hr++) {
3949 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3950 }
3951 reglist&=~(1<<fs);
3952 save_regs(reglist);
3953 if(opcode2[i]==0x10) {
3954 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
3955 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
3956 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
3957 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
3958 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
3959 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
3960 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
3961 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
3962 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
3963 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
3964 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
3965 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
3966 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
3967 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
3968 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
3969 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
3970 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
3971 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
3972 }
3973 if(opcode2[i]==0x11) {
3974 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
3975 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
3976 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
3977 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
3978 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
3979 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
3980 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
3981 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
3982 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
3983 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
3984 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
3985 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
3986 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
3987 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
3988 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
3989 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
3990 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
3991 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
3992 }
3993 restore_regs(reglist);
3994 emit_loadreg(FSREG,fs);
3995#else
3996 cop1_unusable(i, i_regs);
3997#endif
3998}
3999
4000void float_assemble(int i,struct regstat *i_regs)
4001{
4002#ifndef DISABLE_COP1
4003 signed char temp=get_reg(i_regs->regmap,-1);
4004 assert(temp>=0);
4005 // Check cop1 unusable
4006 if(!cop1_usable) {
4007 signed char cs=get_reg(i_regs->regmap,CSREG);
4008 assert(cs>=0);
4009 emit_testimm(cs,0x20000000);
4010 int jaddr=(int)out;
4011 emit_jeq(0);
4012 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4013 cop1_usable=1;
4014 }
4015
4016 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4017 if((source[i]&0x3f)==6) // mov
4018 {
4019 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4020 if(opcode2[i]==0x10) {
4021 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4022 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
4023 emit_readword_indexed(0,temp,temp);
4024 emit_writeword_indexed(temp,0,HOST_TEMPREG);
4025 }
4026 if(opcode2[i]==0x11) {
4027 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4028 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
4029 emit_vldr(temp,7);
4030 emit_vstr(7,HOST_TEMPREG);
4031 }
4032 }
4033 return;
4034 }
4035
4036 if((source[i]&0x3f)>3)
4037 {
4038 if(opcode2[i]==0x10) {
4039 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4040 emit_flds(temp,15);
4041 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4042 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4043 }
4044 if((source[i]&0x3f)==4) // sqrt
4045 emit_fsqrts(15,15);
4046 if((source[i]&0x3f)==5) // abs
4047 emit_fabss(15,15);
4048 if((source[i]&0x3f)==7) // neg
4049 emit_fnegs(15,15);
4050 emit_fsts(15,temp);
4051 }
4052 if(opcode2[i]==0x11) {
4053 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4054 emit_vldr(temp,7);
4055 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4056 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4057 }
4058 if((source[i]&0x3f)==4) // sqrt
4059 emit_fsqrtd(7,7);
4060 if((source[i]&0x3f)==5) // abs
4061 emit_fabsd(7,7);
4062 if((source[i]&0x3f)==7) // neg
4063 emit_fnegd(7,7);
4064 emit_vstr(7,temp);
4065 }
4066 return;
4067 }
4068 if((source[i]&0x3f)<4)
4069 {
4070 if(opcode2[i]==0x10) {
4071 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4072 }
4073 if(opcode2[i]==0x11) {
4074 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4075 }
4076 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
4077 if(opcode2[i]==0x10) {
4078 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4079 emit_flds(temp,15);
4080 emit_flds(HOST_TEMPREG,13);
4081 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4082 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4083 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4084 }
4085 }
4086 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
4087 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
4088 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
4089 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
4090 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4091 emit_fsts(15,HOST_TEMPREG);
4092 }else{
4093 emit_fsts(15,temp);
4094 }
4095 }
4096 else if(opcode2[i]==0x11) {
4097 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4098 emit_vldr(temp,7);
4099 emit_vldr(HOST_TEMPREG,6);
4100 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4101 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4102 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4103 }
4104 }
4105 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
4106 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
4107 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
4108 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
4109 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
4110 emit_vstr(7,HOST_TEMPREG);
4111 }else{
4112 emit_vstr(7,temp);
4113 }
4114 }
4115 }
4116 else {
4117 if(opcode2[i]==0x10) {
4118 emit_flds(temp,15);
4119 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4120 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4121 }
4122 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
4123 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
4124 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
4125 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
4126 emit_fsts(15,temp);
4127 }
4128 else if(opcode2[i]==0x11) {
4129 emit_vldr(temp,7);
4130 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
4131 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4132 }
4133 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
4134 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
4135 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
4136 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
4137 emit_vstr(7,temp);
4138 }
4139 }
4140 return;
4141 }
4142 #endif
4143
4144 u_int hr,reglist=0;
4145 for(hr=0;hr<HOST_REGS;hr++) {
4146 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4147 }
4148 if(opcode2[i]==0x10) { // Single precision
4149 save_regs(reglist);
4150 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4151 if((source[i]&0x3f)<4) {
4152 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4153 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
4154 }else{
4155 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4156 }
4157 switch(source[i]&0x3f)
4158 {
4159 case 0x00: emit_call((int)add_s);break;
4160 case 0x01: emit_call((int)sub_s);break;
4161 case 0x02: emit_call((int)mul_s);break;
4162 case 0x03: emit_call((int)div_s);break;
4163 case 0x04: emit_call((int)sqrt_s);break;
4164 case 0x05: emit_call((int)abs_s);break;
4165 case 0x06: emit_call((int)mov_s);break;
4166 case 0x07: emit_call((int)neg_s);break;
4167 }
4168 restore_regs(reglist);
4169 }
4170 if(opcode2[i]==0x11) { // Double precision
4171 save_regs(reglist);
4172 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4173 if((source[i]&0x3f)<4) {
4174 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
4175 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
4176 }else{
4177 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4178 }
4179 switch(source[i]&0x3f)
4180 {
4181 case 0x00: emit_call((int)add_d);break;
4182 case 0x01: emit_call((int)sub_d);break;
4183 case 0x02: emit_call((int)mul_d);break;
4184 case 0x03: emit_call((int)div_d);break;
4185 case 0x04: emit_call((int)sqrt_d);break;
4186 case 0x05: emit_call((int)abs_d);break;
4187 case 0x06: emit_call((int)mov_d);break;
4188 case 0x07: emit_call((int)neg_d);break;
4189 }
4190 restore_regs(reglist);
4191 }
4192#else
4193 cop1_unusable(i, i_regs);
4194#endif
4195}
4196
4197void multdiv_assemble_arm(int i,struct regstat *i_regs)
4198{
4199 // case 0x18: MULT
4200 // case 0x19: MULTU
4201 // case 0x1A: DIV
4202 // case 0x1B: DIVU
4203 // case 0x1C: DMULT
4204 // case 0x1D: DMULTU
4205 // case 0x1E: DDIV
4206 // case 0x1F: DDIVU
4207 if(rs1[i]&&rs2[i])
4208 {
4209 if((opcode2[i]&4)==0) // 32-bit
4210 {
4211 if(opcode2[i]==0x18) // MULT
4212 {
4213 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4214 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4215 signed char hi=get_reg(i_regs->regmap,HIREG);
4216 signed char lo=get_reg(i_regs->regmap,LOREG);
4217 assert(m1>=0);
4218 assert(m2>=0);
4219 assert(hi>=0);
4220 assert(lo>=0);
4221 emit_smull(m1,m2,hi,lo);
4222 }
4223 if(opcode2[i]==0x19) // MULTU
4224 {
4225 signed char m1=get_reg(i_regs->regmap,rs1[i]);
4226 signed char m2=get_reg(i_regs->regmap,rs2[i]);
4227 signed char hi=get_reg(i_regs->regmap,HIREG);
4228 signed char lo=get_reg(i_regs->regmap,LOREG);
4229 assert(m1>=0);
4230 assert(m2>=0);
4231 assert(hi>=0);
4232 assert(lo>=0);
4233 emit_umull(m1,m2,hi,lo);
4234 }
4235 if(opcode2[i]==0x1A) // DIV
4236 {
4237 signed char d1=get_reg(i_regs->regmap,rs1[i]);
4238 signed char d2=get_reg(i_regs->regmap,rs2[i]);
4239 assert(d1>=0);
4240 assert(d2>=0);
4241 signed char quotient=get_reg(i_regs->regmap,LOREG);
4242 signed char remainder=get_reg(i_regs->regmap,HIREG);
4243 assert(quotient>=0);
4244 assert(remainder>=0);
4245 emit_movs(d1,remainder);
4246 emit_negmi(remainder,remainder);
4247 emit_movs(d2,HOST_TEMPREG);
4248 emit_jeq((int)out+52); // Division by zero
4249 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
4250 emit_clz(HOST_TEMPREG,quotient);
4251 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
4252 emit_orimm(quotient,1<<31,quotient);
4253 emit_shr(quotient,quotient,quotient);
4254 emit_cmp(remainder,HOST_TEMPREG);
4255 emit_subcs(remainder,HOST_TEMPREG,remainder);
4256 emit_adcs(quotient,quotient,quotient);
4257 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
4258 emit_jcc((int)out-16); // -4
4259 emit_teq(d1,d2);
4260 emit_negmi(quotient,quotient);
4261 emit_test(d1,d1);
4262 emit_negmi(remainder,remainder);
4263 }
4264 if(opcode2[i]==0x1B) // DIVU
4265 {
4266 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
4267 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
4268 assert(d1>=0);
4269 assert(d2>=0);
4270 signed char quotient=get_reg(i_regs->regmap,LOREG);
4271 signed char remainder=get_reg(i_regs->regmap,HIREG);
4272 assert(quotient>=0);
4273 assert(remainder>=0);
4274 emit_test(d2,d2);
4275 emit_jeq((int)out+44); // Division by zero
4276 emit_clz(d2,HOST_TEMPREG);
4277 emit_movimm(1<<31,quotient);
4278 emit_shl(d2,HOST_TEMPREG,d2);
4279 emit_mov(d1,remainder);
4280 emit_shr(quotient,HOST_TEMPREG,quotient);
4281 emit_cmp(remainder,d2);
4282 emit_subcs(remainder,d2,remainder);
4283 emit_adcs(quotient,quotient,quotient);
4284 emit_shrcc_imm(d2,1,d2);
4285 emit_jcc((int)out-16); // -4
4286 }
4287 }
4288 else // 64-bit
4289 {
4290 if(opcode2[i]==0x1C) // DMULT
4291 {
4292 assert(opcode2[i]!=0x1C);
4293 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4294 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4295 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4296 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4297 assert(m1h>=0);
4298 assert(m2h>=0);
4299 assert(m1l>=0);
4300 assert(m2l>=0);
4301 emit_pushreg(m2h);
4302 emit_pushreg(m2l);
4303 emit_pushreg(m1h);
4304 emit_pushreg(m1l);
4305 emit_call((int)&mult64);
4306 emit_popreg(m1l);
4307 emit_popreg(m1h);
4308 emit_popreg(m2l);
4309 emit_popreg(m2h);
4310 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4311 signed char hil=get_reg(i_regs->regmap,HIREG);
4312 if(hih>=0) emit_loadreg(HIREG|64,hih);
4313 if(hil>=0) emit_loadreg(HIREG,hil);
4314 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4315 signed char lol=get_reg(i_regs->regmap,LOREG);
4316 if(loh>=0) emit_loadreg(LOREG|64,loh);
4317 if(lol>=0) emit_loadreg(LOREG,lol);
4318 }
4319 if(opcode2[i]==0x1D) // DMULTU
4320 {
4321 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4322 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
4323 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4324 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
4325 assert(m1h>=0);
4326 assert(m2h>=0);
4327 assert(m1l>=0);
4328 assert(m2l>=0);
4329 save_regs(0x100f);
4330 if(m1l!=0) emit_mov(m1l,0);
4331 if(m1h==0) emit_readword((int)&dynarec_local,1);
4332 else if(m1h>1) emit_mov(m1h,1);
4333 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
4334 else if(m2l>2) emit_mov(m2l,2);
4335 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
4336 else if(m2h>3) emit_mov(m2h,3);
4337 emit_call((int)&multu64);
4338 restore_regs(0x100f);
4339 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4340 signed char hil=get_reg(i_regs->regmap,HIREG);
4341 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4342 signed char lol=get_reg(i_regs->regmap,LOREG);
4343 /*signed char temp=get_reg(i_regs->regmap,-1);
4344 signed char rh=get_reg(i_regs->regmap,HIREG|64);
4345 signed char rl=get_reg(i_regs->regmap,HIREG);
4346 assert(m1h>=0);
4347 assert(m2h>=0);
4348 assert(m1l>=0);
4349 assert(m2l>=0);
4350 assert(temp>=0);
4351 //emit_mov(m1l,EAX);
4352 //emit_mul(m2l);
4353 emit_umull(rl,rh,m1l,m2l);
4354 emit_storereg(LOREG,rl);
4355 emit_mov(rh,temp);
4356 //emit_mov(m1h,EAX);
4357 //emit_mul(m2l);
4358 emit_umull(rl,rh,m1h,m2l);
4359 emit_adds(rl,temp,temp);
4360 emit_adcimm(rh,0,rh);
4361 emit_storereg(HIREG,rh);
4362 //emit_mov(m2h,EAX);
4363 //emit_mul(m1l);
4364 emit_umull(rl,rh,m1l,m2h);
4365 emit_adds(rl,temp,temp);
4366 emit_adcimm(rh,0,rh);
4367 emit_storereg(LOREG|64,temp);
4368 emit_mov(rh,temp);
4369 //emit_mov(m2h,EAX);
4370 //emit_mul(m1h);
4371 emit_umull(rl,rh,m1h,m2h);
4372 emit_adds(rl,temp,rl);
4373 emit_loadreg(HIREG,temp);
4374 emit_adcimm(rh,0,rh);
4375 emit_adds(rl,temp,rl);
4376 emit_adcimm(rh,0,rh);
4377 // DEBUG
4378 /*
4379 emit_pushreg(m2h);
4380 emit_pushreg(m2l);
4381 emit_pushreg(m1h);
4382 emit_pushreg(m1l);
4383 emit_call((int)&multu64);
4384 emit_popreg(m1l);
4385 emit_popreg(m1h);
4386 emit_popreg(m2l);
4387 emit_popreg(m2h);
4388 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4389 signed char hil=get_reg(i_regs->regmap,HIREG);
4390 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
4391 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
4392 */
4393 // Shouldn't be necessary
4394 //char loh=get_reg(i_regs->regmap,LOREG|64);
4395 //char lol=get_reg(i_regs->regmap,LOREG);
4396 //if(loh>=0) emit_loadreg(LOREG|64,loh);
4397 //if(lol>=0) emit_loadreg(LOREG,lol);
4398 }
4399 if(opcode2[i]==0x1E) // DDIV
4400 {
4401 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4402 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4403 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4404 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4405 assert(d1h>=0);
4406 assert(d2h>=0);
4407 assert(d1l>=0);
4408 assert(d2l>=0);
4409 save_regs(0x100f);
4410 if(d1l!=0) emit_mov(d1l,0);
4411 if(d1h==0) emit_readword((int)&dynarec_local,1);
4412 else if(d1h>1) emit_mov(d1h,1);
4413 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4414 else if(d2l>2) emit_mov(d2l,2);
4415 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4416 else if(d2h>3) emit_mov(d2h,3);
4417 emit_call((int)&div64);
4418 restore_regs(0x100f);
4419 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4420 signed char hil=get_reg(i_regs->regmap,HIREG);
4421 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4422 signed char lol=get_reg(i_regs->regmap,LOREG);
4423 if(hih>=0) emit_loadreg(HIREG|64,hih);
4424 if(hil>=0) emit_loadreg(HIREG,hil);
4425 if(loh>=0) emit_loadreg(LOREG|64,loh);
4426 if(lol>=0) emit_loadreg(LOREG,lol);
4427 }
4428 if(opcode2[i]==0x1F) // DDIVU
4429 {
4430 //u_int hr,reglist=0;
4431 //for(hr=0;hr<HOST_REGS;hr++) {
4432 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
4433 //}
4434 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4435 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
4436 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4437 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
4438 assert(d1h>=0);
4439 assert(d2h>=0);
4440 assert(d1l>=0);
4441 assert(d2l>=0);
4442 save_regs(0x100f);
4443 if(d1l!=0) emit_mov(d1l,0);
4444 if(d1h==0) emit_readword((int)&dynarec_local,1);
4445 else if(d1h>1) emit_mov(d1h,1);
4446 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
4447 else if(d2l>2) emit_mov(d2l,2);
4448 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
4449 else if(d2h>3) emit_mov(d2h,3);
4450 emit_call((int)&divu64);
4451 restore_regs(0x100f);
4452 signed char hih=get_reg(i_regs->regmap,HIREG|64);
4453 signed char hil=get_reg(i_regs->regmap,HIREG);
4454 signed char loh=get_reg(i_regs->regmap,LOREG|64);
4455 signed char lol=get_reg(i_regs->regmap,LOREG);
4456 if(hih>=0) emit_loadreg(HIREG|64,hih);
4457 if(hil>=0) emit_loadreg(HIREG,hil);
4458 if(loh>=0) emit_loadreg(LOREG|64,loh);
4459 if(lol>=0) emit_loadreg(LOREG,lol);
4460 }
4461 }
4462 }
4463 else
4464 {
4465 // Multiply by zero is zero.
4466 // MIPS does not have a divide by zero exception.
4467 // The result is undefined, we return zero.
4468 signed char hr=get_reg(i_regs->regmap,HIREG);
4469 signed char lr=get_reg(i_regs->regmap,LOREG);
4470 if(hr>=0) emit_zeroreg(hr);
4471 if(lr>=0) emit_zeroreg(lr);
4472 }
4473}
4474#define multdiv_assemble multdiv_assemble_arm
4475
4476void do_preload_rhash(int r) {
4477 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
4478 // register. On ARM the hash can be done with a single instruction (below)
4479}
4480
4481void do_preload_rhtbl(int ht) {
4482 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
4483}
4484
4485void do_rhash(int rs,int rh) {
4486 emit_andimm(rs,0xf8,rh);
4487}
4488
4489void do_miniht_load(int ht,int rh) {
4490 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
4491 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
4492}
4493
4494void do_miniht_jump(int rs,int rh,int ht) {
4495 emit_cmp(rh,rs);
4496 emit_ldreq_indexed(ht,4,15);
4497 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4498 emit_mov(rs,7);
4499 emit_jmp(jump_vaddr_reg[7]);
4500 #else
4501 emit_jmp(jump_vaddr_reg[rs]);
4502 #endif
4503}
4504
4505void do_miniht_insert(u_int return_address,int rt,int temp) {
4506 #ifdef ARMv5_ONLY
4507 emit_movimm(return_address,rt); // PC into link register
4508 add_to_linker((int)out,return_address,1);
4509 emit_pcreladdr(temp);
4510 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4511 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4512 #else
4513 emit_movw(return_address&0x0000FFFF,rt);
4514 add_to_linker((int)out,return_address,1);
4515 emit_pcreladdr(temp);
4516 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4517 emit_movt(return_address&0xFFFF0000,rt);
4518 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4519 #endif
4520}
4521
4522// Sign-extend to 64 bits and write out upper half of a register
4523// This is useful where we have a 32-bit value in a register, and want to
4524// keep it in a 32-bit register, but can't guarantee that it won't be read
4525// as a 64-bit value later.
4526void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
4527{
4528#ifndef FORCE32
4529 if(is32_pre==is32) return;
4530 int hr,reg;
4531 for(hr=0;hr<HOST_REGS;hr++) {
4532 if(hr!=EXCLUDE_REG) {
4533 //if(pre[hr]==entry[hr]) {
4534 if((reg=pre[hr])>=0) {
4535 if((dirty>>hr)&1) {
4536 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
4537 emit_sarimm(hr,31,HOST_TEMPREG);
4538 emit_storereg(reg|64,HOST_TEMPREG);
4539 }
4540 }
4541 }
4542 //}
4543 }
4544 }
4545#endif
4546}
4547
4548void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
4549{
4550 //if(dirty_pre==dirty) return;
4551 int hr,reg,new_hr;
4552 for(hr=0;hr<HOST_REGS;hr++) {
4553 if(hr!=EXCLUDE_REG) {
4554 reg=pre[hr];
4555 if(((~u)>>(reg&63))&1) {
4556 if(reg==entry[hr]||(reg>0&&entry[hr]<0)) {
4557 if(((dirty_pre&~dirty)>>hr)&1) {
4558 if(reg>0&&reg<34) {
4559 emit_storereg(reg,hr);
4560 if( ((is32_pre&~uu)>>reg)&1 ) {
4561 emit_sarimm(hr,31,HOST_TEMPREG);
4562 emit_storereg(reg|64,HOST_TEMPREG);
4563 }
4564 }
4565 else if(reg>=64) {
4566 emit_storereg(reg,hr);
4567 }
4568 }
4569 }
4570 else // Check if register moved to a different register
4571 if((new_hr=get_reg(entry,reg))>=0) {
4572 if((dirty_pre>>hr)&(~dirty>>new_hr)&1) {
4573 if(reg>0&&reg<34) {
4574 emit_storereg(reg,hr);
4575 if( ((is32_pre&~uu)>>reg)&1 ) {
4576 emit_sarimm(hr,31,HOST_TEMPREG);
4577 emit_storereg(reg|64,HOST_TEMPREG);
4578 }
4579 }
4580 else if(reg>=64) {
4581 emit_storereg(reg,hr);
4582 }
4583 }
4584 }
4585 }
4586 }
4587 }
4588}
4589
4590
4591/* using strd could possibly help but you'd have to allocate registers in pairs
4592void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
4593{
4594 int hr;
4595 int wrote=-1;
4596 for(hr=HOST_REGS-1;hr>=0;hr--) {
4597 if(hr!=EXCLUDE_REG) {
4598 if(pre[hr]!=entry[hr]) {
4599 if(pre[hr]>=0) {
4600 if((dirty>>hr)&1) {
4601 if(get_reg(entry,pre[hr])<0) {
4602 if(pre[hr]<64) {
4603 if(!((u>>pre[hr])&1)) {
4604 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4605 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4606 emit_sarimm(hr,31,hr+1);
4607 emit_strdreg(pre[hr],hr);
4608 }
4609 else
4610 emit_storereg(pre[hr],hr);
4611 }else{
4612 emit_storereg(pre[hr],hr);
4613 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4614 emit_sarimm(hr,31,hr);
4615 emit_storereg(pre[hr]|64,hr);
4616 }
4617 }
4618 }
4619 }else{
4620 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4621 emit_storereg(pre[hr],hr);
4622 }
4623 }
4624 wrote=hr;
4625 }
4626 }
4627 }
4628 }
4629 }
4630 }
4631 for(hr=0;hr<HOST_REGS;hr++) {
4632 if(hr!=EXCLUDE_REG) {
4633 if(pre[hr]!=entry[hr]) {
4634 if(pre[hr]>=0) {
4635 int nr;
4636 if((nr=get_reg(entry,pre[hr]))>=0) {
4637 emit_mov(hr,nr);
4638 }
4639 }
4640 }
4641 }
4642 }
4643}
4644#define wb_invalidate wb_invalidate_arm
4645*/
4646
4647// CPU-architecture-specific initialization
4648void arch_init() {
4649#ifndef DISABLE_COP1
4650 rounding_modes[0]=0x0<<22; // round
4651 rounding_modes[1]=0x3<<22; // trunc
4652 rounding_modes[2]=0x1<<22; // ceil
4653 rounding_modes[3]=0x2<<22; // floor
4654#endif
4655}
4656
4657// vim:shiftwidth=2:expandtab