1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_x86.c *
3 * Copyright (C) 2009-2011 Ari64 *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
24 int pending_exception;
26 uint64_t readmem_dword;
27 static precomp_instr fake_pc;
28 u_int memory_map[1048576];
29 static u_int mini_ht[32][2] __attribute__((aligned(8)));
30 u_char restore_candidate[512] __attribute__((aligned(4)));
33 void jump_vaddr_eax();
34 void jump_vaddr_ecx();
35 void jump_vaddr_edx();
36 void jump_vaddr_ebx();
37 void jump_vaddr_ebp();
38 void jump_vaddr_edi();
40 static const u_int jump_vaddr_reg[8] = {
48 (int)jump_vaddr_edi };
50 void invalidate_block_eax();
51 void invalidate_block_ecx();
52 void invalidate_block_edx();
53 void invalidate_block_ebx();
54 void invalidate_block_ebp();
55 void invalidate_block_esi();
56 void invalidate_block_edi();
58 static const u_int invalidate_block_reg[8] = {
59 (int)invalidate_block_eax,
60 (int)invalidate_block_ecx,
61 (int)invalidate_block_edx,
62 (int)invalidate_block_ebx,
64 (int)invalidate_block_ebp,
65 (int)invalidate_block_esi,
66 (int)invalidate_block_edi };
68 static const u_short rounding_modes[4] = {
76 // We need these for cmovcc instructions on x86
77 static const u_int const_zero=0;
78 static const u_int const_one=1;
82 static void set_jump_target(int addr,int target)
84 u_char *ptr=(u_char *)addr;
87 assert(ptr[1]>=0x80&&ptr[1]<=0x8f);
88 u_int *ptr2=(u_int *)(ptr+2);
89 *ptr2=target-(int)ptr2-4;
91 else if(*ptr==0xe8||*ptr==0xe9) {
92 u_int *ptr2=(u_int *)(ptr+1);
93 *ptr2=target-(int)ptr2-4;
97 assert(*ptr==0xc7); /* mov immediate (store address) */
98 u_int *ptr2=(u_int *)(ptr+6);
103 static void *kill_pointer(void *stub)
105 int *i_ptr=*((int **)(stub+6));
106 *i_ptr=(int)stub-(int)i_ptr-4;
109 static int get_pointer(void *stub)
111 int *i_ptr=*((int **)(stub+6));
112 return *i_ptr+(int)i_ptr+4;
115 // Find the "clean" entry point from a "dirty" entry point
116 // by skipping past the call to verify_code
117 static u_int get_clean_addr(int addr)
119 u_char *ptr=(u_char *)addr;
120 assert(ptr[20]==0xE8); // call instruction
121 assert(ptr[25]==0x83); // pop (add esp,4) instruction
122 if(ptr[28]==0xE9) return *(u_int *)(ptr+29)+addr+33; // follow jmp
123 else return(addr+28);
126 static int verify_dirty(void *addr)
128 u_char *ptr=(u_char *)addr;
129 assert(ptr[5]==0xB8);
130 u_int source=*(u_int *)(ptr+6);
131 u_int copy=*(u_int *)(ptr+11);
132 u_int len=*(u_int *)(ptr+16);
133 assert(ptr[20]==0xE8); // call instruction
134 u_int verifier=*(u_int *)(ptr+21)+(u_int)ptr+25;
135 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
136 unsigned int page=source>>12;
137 unsigned int map_value=memory_map[page];
138 if(map_value>=0x80000000) return 0;
139 while(page<((source+len-1)>>12)) {
140 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
142 source = source+(map_value<<2);
144 //DebugMessage(M64MSG_VERBOSE, "verify_dirty: %x %x %x",source,copy,len);
145 return !memcmp((void *)source,(void *)copy,len);
148 // This doesn't necessarily find all clean entry points, just
149 // guarantees that it's not dirty
150 static int isclean(int addr)
152 u_char *ptr=(u_char *)addr;
153 if(ptr[5]!=0xB8) return 1; // mov imm,%eax
154 if(ptr[10]!=0xBB) return 1; // mov imm,%ebx
155 if(ptr[15]!=0xB9) return 1; // mov imm,%ecx
156 if(ptr[20]!=0xE8) return 1; // call instruction
157 if(ptr[25]!=0x83) return 1; // pop (add esp,4) instruction
161 static void get_bounds(int addr,u_int *start,u_int *end)
163 u_char *ptr=(u_char *)addr;
164 assert(ptr[5]==0xB8);
165 u_int source=*(u_int *)(ptr+6);
166 //u_int copy=*(u_int *)(ptr+11);
167 u_int len=*(u_int *)(ptr+16);
168 assert(ptr[20]==0xE8); // call instruction
169 u_int verifier=*(u_int *)(ptr+21)+(u_int)ptr+25;
170 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
171 if(memory_map[source>>12]>=0x80000000) source = 0;
172 else source = source+(memory_map[source>>12]<<2);
174 if(start) *start=source;
175 if(end) *end=source+len;
178 /* Register allocation */
180 // Note: registers are allocated clean (unmodified state)
181 // if you intend to modify the register, you must call dirty_reg().
182 static void alloc_reg(struct regstat *cur,int i,signed char reg)
185 int preferred_reg = (reg&3)+(reg>28)*4-(reg==32)+2*(reg==36)-(reg==40);
187 // Don't allocate unused registers
188 if((cur->u>>reg)&1) return;
190 // see if it's already allocated
191 for(hr=0;hr<HOST_REGS;hr++)
193 if(cur->regmap[hr]==reg) return;
196 // Keep the same mapping if the register was already allocated in a loop
197 preferred_reg = loop_reg(i,reg,preferred_reg);
199 // Try to allocate the preferred register
200 if(cur->regmap[preferred_reg]==-1) {
201 cur->regmap[preferred_reg]=reg;
202 cur->dirty&=~(1<<preferred_reg);
203 cur->isconst&=~(1<<preferred_reg);
206 r=cur->regmap[preferred_reg];
207 if(r<64&&((cur->u>>r)&1)) {
208 cur->regmap[preferred_reg]=reg;
209 cur->dirty&=~(1<<preferred_reg);
210 cur->isconst&=~(1<<preferred_reg);
213 if(r>=64&&((cur->uu>>(r&63))&1)) {
214 cur->regmap[preferred_reg]=reg;
215 cur->dirty&=~(1<<preferred_reg);
216 cur->isconst&=~(1<<preferred_reg);
220 // Try to allocate EAX, EBX, ECX, or EDX
221 // We prefer these because they can do byte and halfword loads
222 for(hr=0;hr<4;hr++) {
223 if(cur->regmap[hr]==-1) {
225 cur->dirty&=~(1<<hr);
226 cur->isconst&=~(1<<hr);
231 // Clear any unneeded registers
232 // We try to keep the mapping consistent, if possible, because it
233 // makes branches easier (especially loops). So we try to allocate
234 // first (see above) before removing old mappings. If this is not
235 // possible then go ahead and clear out the registers that are no
237 for(hr=0;hr<HOST_REGS;hr++)
243 if(i==0||(unneeded_reg[i-1]>>r)&1) {cur->regmap[hr]=-1;break;}
247 if((cur->uu>>(r&63))&1)
248 if(i==0||(unneeded_reg_upper[i-1]>>(r&63))&1) {cur->regmap[hr]=-1;break;}
252 // Try to allocate any available register, but prefer
253 // registers that have not been used recently.
255 for(hr=0;hr<HOST_REGS;hr++) {
256 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
257 if(regs[i-1].regmap[hr]!=rs1[i-1]&®s[i-1].regmap[hr]!=rs2[i-1]&®s[i-1].regmap[hr]!=rt1[i-1]&®s[i-1].regmap[hr]!=rt2[i-1]) {
259 cur->dirty&=~(1<<hr);
260 cur->isconst&=~(1<<hr);
266 // Try to allocate any available register
267 for(hr=0;hr<HOST_REGS;hr++) {
268 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
270 cur->dirty&=~(1<<hr);
271 cur->isconst&=~(1<<hr);
276 // Ok, now we have to evict someone
277 // Pick a register we hopefully won't need soon
278 u_char hsn[MAXREG+1];
279 memset(hsn,10,sizeof(hsn));
281 lsn(hsn,i,&preferred_reg);
282 //DebugMessage(M64MSG_VERBOSE, "hsn(%x): %d %d %d %d %d %d %d",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
284 // Don't evict the cycle count at entry points, otherwise the entry
285 // stub will have to write it.
286 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
287 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
290 // Alloc preferred register if available
291 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
292 for(hr=0;hr<HOST_REGS;hr++) {
293 // Evict both parts of a 64-bit register
294 if((cur->regmap[hr]&63)==r) {
296 cur->dirty&=~(1<<hr);
297 cur->isconst&=~(1<<hr);
300 cur->regmap[preferred_reg]=reg;
303 for(r=1;r<=MAXREG;r++)
305 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
306 for(hr=0;hr<HOST_REGS;hr++) {
307 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
308 if(cur->regmap[hr]==r+64) {
310 cur->dirty&=~(1<<hr);
311 cur->isconst&=~(1<<hr);
316 for(hr=0;hr<HOST_REGS;hr++) {
317 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
318 if(cur->regmap[hr]==r) {
320 cur->dirty&=~(1<<hr);
321 cur->isconst&=~(1<<hr);
332 for(r=1;r<=MAXREG;r++)
335 for(hr=0;hr<HOST_REGS;hr++) {
336 if(cur->regmap[hr]==r+64) {
338 cur->dirty&=~(1<<hr);
339 cur->isconst&=~(1<<hr);
343 for(hr=0;hr<HOST_REGS;hr++) {
344 if(cur->regmap[hr]==r) {
346 cur->dirty&=~(1<<hr);
347 cur->isconst&=~(1<<hr);
354 DebugMessage(M64MSG_ERROR, "This shouldn't happen (alloc_reg)");exit(1);
357 static void alloc_reg64(struct regstat *cur,int i,signed char reg)
359 int preferred_reg = 5+reg%3;
362 // allocate the lower 32 bits
363 alloc_reg(cur,i,reg);
365 // Don't allocate unused registers
366 if((cur->uu>>reg)&1) return;
368 // see if the upper half is already allocated
369 for(hr=0;hr<HOST_REGS;hr++)
371 if(cur->regmap[hr]==reg+64) return;
374 // Keep the same mapping if the register was already allocated in a loop
375 preferred_reg = loop_reg(i,reg,preferred_reg);
377 // Try to allocate the preferred register
378 if(cur->regmap[preferred_reg]==-1) {
379 cur->regmap[preferred_reg]=reg|64;
380 cur->dirty&=~(1<<preferred_reg);
381 cur->isconst&=~(1<<preferred_reg);
384 r=cur->regmap[preferred_reg];
385 if(r<64&&((cur->u>>r)&1)) {
386 cur->regmap[preferred_reg]=reg|64;
387 cur->dirty&=~(1<<preferred_reg);
388 cur->isconst&=~(1<<preferred_reg);
391 if(r>=64&&((cur->uu>>(r&63))&1)) {
392 cur->regmap[preferred_reg]=reg|64;
393 cur->dirty&=~(1<<preferred_reg);
394 cur->isconst&=~(1<<preferred_reg);
398 // Try to allocate EBP, ESI or EDI
399 for(hr=5;hr<8;hr++) {
400 if(cur->regmap[hr]==-1) {
401 cur->regmap[hr]=reg|64;
402 cur->dirty&=~(1<<hr);
403 cur->isconst&=~(1<<hr);
408 // Clear any unneeded registers
409 // We try to keep the mapping consistent, if possible, because it
410 // makes branches easier (especially loops). So we try to allocate
411 // first (see above) before removing old mappings. If this is not
412 // possible then go ahead and clear out the registers that are no
414 for(hr=HOST_REGS-1;hr>=0;hr--)
419 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
423 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
427 // Try to allocate any available register, but prefer
428 // registers that have not been used recently.
430 for(hr=0;hr<HOST_REGS;hr++) {
431 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
432 if(regs[i-1].regmap[hr]!=rs1[i-1]&®s[i-1].regmap[hr]!=rs2[i-1]&®s[i-1].regmap[hr]!=rt1[i-1]&®s[i-1].regmap[hr]!=rt2[i-1]) {
433 cur->regmap[hr]=reg|64;
434 cur->dirty&=~(1<<hr);
435 cur->isconst&=~(1<<hr);
441 // Try to allocate any available register
442 for(hr=0;hr<HOST_REGS;hr++) {
443 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
444 cur->regmap[hr]=reg|64;
445 cur->dirty&=~(1<<hr);
446 cur->isconst&=~(1<<hr);
451 // Ok, now we have to evict someone
452 // Pick a register we hopefully won't need soon
453 u_char hsn[MAXREG+1];
454 memset(hsn,10,sizeof(hsn));
456 lsn(hsn,i,&preferred_reg);
457 //DebugMessage(M64MSG_VERBOSE, "eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
458 //DebugMessage(M64MSG_VERBOSE, "hsn(%x): %d %d %d %d %d %d %d",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
460 // Don't evict the cycle count at entry points, otherwise the entry
461 // stub will have to write it.
462 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
463 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
466 // Alloc preferred register if available
467 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
468 for(hr=0;hr<HOST_REGS;hr++) {
469 // Evict both parts of a 64-bit register
470 if((cur->regmap[hr]&63)==r) {
472 cur->dirty&=~(1<<hr);
473 cur->isconst&=~(1<<hr);
476 cur->regmap[preferred_reg]=reg|64;
479 for(r=1;r<=MAXREG;r++)
481 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
482 for(hr=0;hr<HOST_REGS;hr++) {
483 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
484 if(cur->regmap[hr]==r+64) {
485 cur->regmap[hr]=reg|64;
486 cur->dirty&=~(1<<hr);
487 cur->isconst&=~(1<<hr);
492 for(hr=0;hr<HOST_REGS;hr++) {
493 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
494 if(cur->regmap[hr]==r) {
495 cur->regmap[hr]=reg|64;
496 cur->dirty&=~(1<<hr);
497 cur->isconst&=~(1<<hr);
508 for(r=1;r<=MAXREG;r++)
511 for(hr=0;hr<HOST_REGS;hr++) {
512 if(cur->regmap[hr]==r+64) {
513 cur->regmap[hr]=reg|64;
514 cur->dirty&=~(1<<hr);
515 cur->isconst&=~(1<<hr);
519 for(hr=0;hr<HOST_REGS;hr++) {
520 if(cur->regmap[hr]==r) {
521 cur->regmap[hr]=reg|64;
522 cur->dirty&=~(1<<hr);
523 cur->isconst&=~(1<<hr);
530 DebugMessage(M64MSG_ERROR, "This shouldn't happen");exit(1);
533 // Allocate a temporary register. This is done without regard to
534 // dirty status or whether the register we request is on the unneeded list
535 // Note: This will only allocate one register, even if called multiple times
536 static void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
539 int preferred_reg = -1;
541 // see if it's already allocated
542 for(hr=0;hr<HOST_REGS;hr++)
544 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
547 // Try to allocate any available register, starting with EDI, ESI, EBP...
548 // We prefer EDI, ESI, EBP since the others are used for byte/halfword stores
549 for(hr=HOST_REGS-1;hr>=0;hr--) {
550 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
552 cur->dirty&=~(1<<hr);
553 cur->isconst&=~(1<<hr);
558 // Find an unneeded register
559 for(hr=HOST_REGS-1;hr>=0;hr--)
565 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
567 cur->dirty&=~(1<<hr);
568 cur->isconst&=~(1<<hr);
575 if((cur->uu>>(r&63))&1) {
576 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
578 cur->dirty&=~(1<<hr);
579 cur->isconst&=~(1<<hr);
587 // Ok, now we have to evict someone
588 // Pick a register we hopefully won't need soon
589 // TODO: we might want to follow unconditional jumps here
590 // TODO: get rid of dupe code and make this into a function
591 u_char hsn[MAXREG+1];
592 memset(hsn,10,sizeof(hsn));
594 lsn(hsn,i,&preferred_reg);
595 //DebugMessage(M64MSG_VERBOSE, "hsn: %d %d %d %d %d %d %d",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
597 // Don't evict the cycle count at entry points, otherwise the entry
598 // stub will have to write it.
599 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
600 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
603 for(r=1;r<=MAXREG;r++)
605 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
606 for(hr=0;hr<HOST_REGS;hr++) {
607 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
608 if(cur->regmap[hr]==r+64) {
610 cur->dirty&=~(1<<hr);
611 cur->isconst&=~(1<<hr);
616 for(hr=0;hr<HOST_REGS;hr++) {
617 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
618 if(cur->regmap[hr]==r) {
620 cur->dirty&=~(1<<hr);
621 cur->isconst&=~(1<<hr);
632 for(r=1;r<=MAXREG;r++)
635 for(hr=0;hr<HOST_REGS;hr++) {
636 if(cur->regmap[hr]==r+64) {
638 cur->dirty&=~(1<<hr);
639 cur->isconst&=~(1<<hr);
643 for(hr=0;hr<HOST_REGS;hr++) {
644 if(cur->regmap[hr]==r) {
646 cur->dirty&=~(1<<hr);
647 cur->isconst&=~(1<<hr);
654 DebugMessage(M64MSG_ERROR, "This shouldn't happen");exit(1);
656 // Allocate a specific x86 register.
657 static void alloc_x86_reg(struct regstat *cur,int i,signed char reg,int hr)
662 // see if it's already allocated (and dealloc it)
663 for(n=0;n<HOST_REGS;n++)
665 if(n!=ESP&&cur->regmap[n]==reg) {
666 dirty=(cur->dirty>>n)&1;
672 cur->dirty&=~(1<<hr);
673 cur->dirty|=dirty<<hr;
674 cur->isconst&=~(1<<hr);
677 // Alloc cycle count into dedicated register
678 static void alloc_cc(struct regstat *cur,int i)
680 alloc_x86_reg(cur,i,CCREG,ESI);
685 static void multdiv_alloc_x86(struct regstat *current,int i)
695 clear_const(current,rs1[i]);
696 clear_const(current,rs2[i]);
699 if((opcode2[i]&4)==0) // 32-bit
701 current->u&=~(1LL<<HIREG);
702 current->u&=~(1LL<<LOREG);
703 alloc_x86_reg(current,i,HIREG,EDX);
704 alloc_x86_reg(current,i,LOREG,EAX);
705 alloc_reg(current,i,rs1[i]);
706 alloc_reg(current,i,rs2[i]);
707 current->is32|=1LL<<HIREG;
708 current->is32|=1LL<<LOREG;
709 dirty_reg(current,HIREG);
710 dirty_reg(current,LOREG);
714 alloc_x86_reg(current,i,HIREG|64,EDX);
715 alloc_x86_reg(current,i,HIREG,EAX);
716 alloc_reg64(current,i,rs1[i]);
717 alloc_reg64(current,i,rs2[i]);
718 alloc_all(current,i);
719 current->is32&=~(1LL<<HIREG);
720 current->is32&=~(1LL<<LOREG);
721 dirty_reg(current,HIREG);
722 dirty_reg(current,LOREG);
727 // Multiply by zero is zero.
728 // MIPS does not have a divide by zero exception.
729 // The result is undefined, we return zero.
730 alloc_reg(current,i,HIREG);
731 alloc_reg(current,i,LOREG);
732 current->is32|=1LL<<HIREG;
733 current->is32|=1LL<<LOREG;
734 dirty_reg(current,HIREG);
735 dirty_reg(current,LOREG);
738 #define multdiv_alloc multdiv_alloc_x86
742 static const char const regname[8][4] = {
752 static void output_byte(u_char byte)
756 static void output_modrm(u_char mod,u_char rm,u_char ext)
761 u_char byte=(mod<<6)|(ext<<3)|rm;
764 static void output_sib(u_char scale,u_char index,u_char base)
769 u_char byte=(scale<<6)|(index<<3)|base;
772 static void output_w32(u_int word)
774 *((u_int *)out)=word;
778 static void emit_mov(int rs,int rt)
780 assem_debug("mov %%%s,%%%s",regname[rs],regname[rt]);
782 output_modrm(3,rt,rs);
785 static void emit_add(int rs1,int rs2,int rt)
788 assem_debug("add %%%s,%%%s",regname[rs2],regname[rs1]);
790 output_modrm(3,rs1,rs2);
792 assem_debug("add %%%s,%%%s",regname[rs1],regname[rs2]);
794 output_modrm(3,rs2,rs1);
796 assem_debug("lea (%%%s,%%%s),%%%s",regname[rs1],regname[rs2],regname[rt]);
799 output_modrm(0,4,rt);
800 output_sib(0,rs2,rs1);
802 output_modrm(0,4,rt);
803 output_sib(0,rs1,rs2);
804 }else /* lea 0(,%ebp,2) */{
805 output_modrm(0,4,rt);
812 static void emit_adds(int rs1,int rs2,int rt)
814 emit_add(rs1,rs2,rt);
817 static void emit_lea8(int rs1,int rt)
819 assem_debug("lea 0(%%%s,8),%%%s",regname[rs1],regname[rt]);
821 output_modrm(0,4,rt);
825 static void emit_leairrx1(int imm,int rs1,int rs2,int rt)
827 assem_debug("lea %x(%%%s,%%%s,1),%%%s",imm,regname[rs1],regname[rs2],regname[rt]);
829 if(imm!=0||rs1==EBP) {
830 output_modrm(2,4,rt);
831 output_sib(0,rs2,rs1);
834 output_modrm(0,4,rt);
835 output_sib(0,rs2,rs1);
838 static void emit_leairrx4(int imm,int rs1,int rs2,int rt)
840 assem_debug("lea %x(%%%s,%%%s,4),%%%s",imm,regname[rs1],regname[rs2],regname[rt]);
842 if(imm!=0||rs1==EBP) {
843 output_modrm(2,4,rt);
844 output_sib(2,rs2,rs1);
847 output_modrm(0,4,rt);
848 output_sib(2,rs2,rs1);
852 static void emit_neg(int rs, int rt)
854 if(rs!=rt) emit_mov(rs,rt);
855 assem_debug("neg %%%s",regname[rt]);
857 output_modrm(3,rt,3);
860 static void emit_negs(int rs, int rt)
865 static void emit_sub(int rs1,int rs2,int rt)
868 assem_debug("sub %%%s,%%%s",regname[rs2],regname[rs1]);
870 output_modrm(3,rs1,rs2);
873 emit_add(rs2,rs1,rs2);
880 static void emit_subs(int rs1,int rs2,int rt)
882 emit_sub(rs1,rs2,rt);
885 static void emit_zeroreg(int rt)
888 output_modrm(3,rt,rt);
889 assem_debug("xor %%%s,%%%s",regname[rt],regname[rt]);
892 static void emit_loadreg(int r, int hr)
897 int addr=((int)reg)+((r&63)<<3)+((r&64)>>4);
898 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
899 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
900 if(r==CCREG) addr=(int)&cycle_count;
901 if(r==CSREG) addr=(int)&Status;
902 if(r==FSREG) addr=(int)&FCR31;
903 assem_debug("mov %x+%d,%%%s",addr,r,regname[hr]);
905 output_modrm(0,5,hr);
909 static void emit_storereg(int r, int hr)
911 int addr=((int)reg)+((r&63)<<3)+((r&64)>>4);
912 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
913 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
914 if(r==CCREG) addr=(int)&cycle_count;
915 if(r==FSREG) addr=(int)&FCR31;
916 assem_debug("mov %%%s,%x+%d",regname[hr],addr,r);
918 output_modrm(0,5,hr);
922 static void emit_test(int rs, int rt)
924 assem_debug("test %%%s,%%%s",regname[rs],regname[rt]);
926 output_modrm(3,rs,rt);
929 static void emit_testimm(int rs,int imm)
931 assem_debug("test $0x%x,%%%s",imm,regname[rs]);
932 if(imm<128&&imm>=-128&&rs<4) {
934 output_modrm(3,rs,0);
940 output_modrm(3,rs,0);
945 static void emit_not(int rs,int rt)
947 if(rs!=rt) emit_mov(rs,rt);
948 assem_debug("not %%%s",regname[rt]);
950 output_modrm(3,rt,2);
953 static void emit_and(u_int rs1,u_int rs2,u_int rt)
959 assem_debug("and %%%s,%%%s",regname[rs2],regname[rt]);
961 output_modrm(3,rs1,rs2);
965 assem_debug("and %%%s,%%%s",regname[rs1],regname[rt]);
967 output_modrm(3,rs2,rs1);
975 static void emit_or(u_int rs1,u_int rs2,u_int rt)
981 assem_debug("or %%%s,%%%s",regname[rs2],regname[rt]);
983 output_modrm(3,rs1,rs2);
987 assem_debug("or %%%s,%%%s",regname[rs1],regname[rt]);
989 output_modrm(3,rs2,rs1);
996 static void emit_or_and_set_flags(int rs1,int rs2,int rt)
1001 static void emit_xor(u_int rs1,u_int rs2,u_int rt)
1007 assem_debug("xor %%%s,%%%s",regname[rs2],regname[rt]);
1009 output_modrm(3,rs1,rs2);
1013 assem_debug("xor %%%s,%%%s",regname[rs1],regname[rt]);
1015 output_modrm(3,rs2,rs1);
1019 emit_xor(rt,rs2,rt);
1023 static void emit_movimm(int imm,u_int rt)
1025 assem_debug("mov $%d,%%%s",imm,regname[rt]);
1027 output_byte(0xB8+rt);
1031 static void emit_addimm(int rs,int imm,int rt)
1035 assem_debug("add $%d,%%%s",imm,regname[rt]);
1036 if(imm<128&&imm>=-128) {
1038 output_modrm(3,rt,0);
1044 output_modrm(3,rt,0);
1051 assem_debug("lea %d(%%%s),%%%s",imm,regname[rs],regname[rt]);
1053 if(imm<128&&imm>=-128) {
1054 output_modrm(1,rs,rt);
1057 output_modrm(2,rs,rt);
1066 static void emit_addimm_and_set_flags(int imm,int rt)
1068 assem_debug("add $%d,%%%s",imm,regname[rt]);
1069 if(imm<128&&imm>=-128) {
1071 output_modrm(3,rt,0);
1077 output_modrm(3,rt,0);
1081 static void emit_addimm_no_flags(int imm,int rt)
1084 assem_debug("lea %d(%%%s),%%%s",imm,regname[rt],regname[rt]);
1086 if(imm<128&&imm>=-128) {
1087 output_modrm(1,rt,rt);
1090 output_modrm(2,rt,rt);
1096 static void emit_adcimm(int imm,u_int rt)
1098 assem_debug("adc $%d,%%%s",imm,regname[rt]);
1100 if(imm<128&&imm>=-128) {
1102 output_modrm(3,rt,2);
1108 output_modrm(3,rt,2);
1112 static void emit_sbbimm(int imm,u_int rt)
1114 assem_debug("sbb $%d,%%%s",imm,regname[rt]);
1116 if(imm<128&&imm>=-128) {
1118 output_modrm(3,rt,3);
1124 output_modrm(3,rt,3);
1129 static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1131 if(rsh==rth&&rsl==rtl) {
1132 assem_debug("add $%d,%%%s",imm,regname[rtl]);
1133 if(imm<128&&imm>=-128) {
1135 output_modrm(3,rtl,0);
1141 output_modrm(3,rtl,0);
1144 assem_debug("adc $%d,%%%s",imm>>31,regname[rth]);
1146 output_modrm(3,rth,2);
1147 output_byte(imm>>31);
1152 emit_addimm64_32(rth,rtl,imm,rth,rtl);
1156 static void emit_sbb(int rs1,int rs2)
1158 assem_debug("sbb %%%s,%%%s",regname[rs1],regname[rs2]);
1160 output_modrm(3,rs2,rs1);
1163 static void emit_andimm(int rs,int imm,int rt)
1169 assem_debug("and $%d,%%%s",imm,regname[rt]);
1170 if(imm<128&&imm>=-128) {
1172 output_modrm(3,rt,4);
1178 output_modrm(3,rt,4);
1184 emit_andimm(rt,imm,rt);
1188 static void emit_orimm(int rs,int imm,int rt)
1192 assem_debug("or $%d,%%%s",imm,regname[rt]);
1193 if(imm<128&&imm>=-128) {
1195 output_modrm(3,rt,1);
1201 output_modrm(3,rt,1);
1208 emit_orimm(rt,imm,rt);
1212 static void emit_xorimm(int rs,int imm,int rt)
1216 assem_debug("xor $%d,%%%s",imm,regname[rt]);
1217 if(imm<128&&imm>=-128) {
1219 output_modrm(3,rt,6);
1225 output_modrm(3,rt,6);
1232 emit_xorimm(rt,imm,rt);
1236 static void emit_shlimm(int rs,u_int imm,int rt)
1239 assem_debug("shl %%%s,%d",regname[rt],imm);
1241 if(imm==1) output_byte(0xD1);
1242 else output_byte(0xC1);
1243 output_modrm(3,rt,4);
1244 if(imm>1) output_byte(imm);
1248 emit_shlimm(rt,imm,rt);
1252 static void emit_shrimm(int rs,u_int imm,int rt)
1255 assem_debug("shr %%%s,%d",regname[rt],imm);
1257 if(imm==1) output_byte(0xD1);
1258 else output_byte(0xC1);
1259 output_modrm(3,rt,5);
1260 if(imm>1) output_byte(imm);
1264 emit_shrimm(rt,imm,rt);
1268 static void emit_sarimm(int rs,u_int imm,int rt)
1271 assem_debug("sar %%%s,%d",regname[rt],imm);
1273 if(imm==1) output_byte(0xD1);
1274 else output_byte(0xC1);
1275 output_modrm(3,rt,7);
1276 if(imm>1) output_byte(imm);
1280 emit_sarimm(rt,imm,rt);
1284 static void emit_rorimm(int rs,u_int imm,int rt)
1287 assem_debug("ror %%%s,%d",regname[rt],imm);
1289 if(imm==1) output_byte(0xD1);
1290 else output_byte(0xC1);
1291 output_modrm(3,rt,1);
1292 if(imm>1) output_byte(imm);
1296 emit_rorimm(rt,imm,rt);
1300 static void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1303 assem_debug("shld %%%s,%%%s,%d",regname[rt],regname[rs2],imm);
1307 output_modrm(3,rt,rs2);
1312 emit_shldimm(rt,rs2,imm,rt);
1316 static void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1319 assem_debug("shrd %%%s,%%%s,%d",regname[rt],regname[rs2],imm);
1323 output_modrm(3,rt,rs2);
1328 emit_shrdimm(rt,rs2,imm,rt);
1332 static void emit_shlcl(int r)
1334 assem_debug("shl %%%s,%%cl",regname[r]);
1336 output_modrm(3,r,4);
1338 static void emit_shrcl(int r)
1340 assem_debug("shr %%%s,%%cl",regname[r]);
1342 output_modrm(3,r,5);
1344 static void emit_sarcl(int r)
1346 assem_debug("sar %%%s,%%cl",regname[r]);
1348 output_modrm(3,r,7);
1351 static void emit_shldcl(int r1,int r2)
1353 assem_debug("shld %%%s,%%%s,%%cl",regname[r1],regname[r2]);
1356 output_modrm(3,r1,r2);
1358 static void emit_shrdcl(int r1,int r2)
1360 assem_debug("shrd %%%s,%%%s,%%cl",regname[r1],regname[r2]);
1363 output_modrm(3,r1,r2);
1366 static void emit_cmpimm(int rs,int imm)
1368 assem_debug("cmp $%d,%%%s",imm,regname[rs]);
1369 if(imm<128&&imm>=-128) {
1371 output_modrm(3,rs,7);
1377 output_modrm(3,rs,7);
1382 static void emit_cmovne(const u_int *addr,int rt)
1384 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1385 if(addr==&const_zero) assem_debug(" [zero]");
1386 else if(addr==&const_one) assem_debug(" [one]");
1387 else assem_debug("");
1390 output_modrm(0,5,rt);
1391 output_w32((int)addr);
1393 static void emit_cmovl(const u_int *addr,int rt)
1395 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1396 if(addr==&const_zero) assem_debug(" [zero]");
1397 else if(addr==&const_one) assem_debug(" [one]");
1398 else assem_debug("");
1401 output_modrm(0,5,rt);
1402 output_w32((int)addr);
1404 static void emit_cmovs(const u_int *addr,int rt)
1406 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1407 if(addr==&const_zero) assem_debug(" [zero]");
1408 else if(addr==&const_one) assem_debug(" [one]");
1409 else assem_debug("");
1412 output_modrm(0,5,rt);
1413 output_w32((int)addr);
1415 static void emit_cmovne_reg(int rs,int rt)
1417 assem_debug("cmovne %%%s,%%%s",regname[rs],regname[rt]);
1420 output_modrm(3,rs,rt);
1422 static void emit_cmovl_reg(int rs,int rt)
1424 assem_debug("cmovl %%%s,%%%s",regname[rs],regname[rt]);
1427 output_modrm(3,rs,rt);
1429 static void emit_cmovs_reg(int rs,int rt)
1431 assem_debug("cmovs %%%s,%%%s",regname[rs],regname[rt]);
1434 output_modrm(3,rs,rt);
1436 static void emit_cmovnc_reg(int rs,int rt)
1438 assem_debug("cmovae %%%s,%%%s",regname[rs],regname[rt]);
1441 output_modrm(3,rs,rt);
1443 static void emit_cmova_reg(int rs,int rt)
1445 assem_debug("cmova %%%s,%%%s",regname[rs],regname[rt]);
1448 output_modrm(3,rs,rt);
1450 static void emit_cmovp_reg(int rs,int rt)
1452 assem_debug("cmovp %%%s,%%%s",regname[rs],regname[rt]);
1455 output_modrm(3,rs,rt);
1457 static void emit_cmovnp_reg(int rs,int rt)
1459 assem_debug("cmovnp %%%s,%%%s",regname[rs],regname[rt]);
1462 output_modrm(3,rs,rt);
1464 static void emit_setl(int rt)
1466 assem_debug("setl %%%s",regname[rt]);
1469 output_modrm(3,rt,2);
1471 static void emit_movzbl_reg(int rs, int rt)
1473 assem_debug("movzbl %%%s,%%%s",regname[rs]+1,regname[rt]);
1476 output_modrm(3,rs,rt);
1479 static void emit_slti32(int rs,int imm,int rt)
1481 if(rs!=rt) emit_zeroreg(rt);
1482 emit_cmpimm(rs,imm);
1485 if(rs==rt) emit_movzbl_reg(rt,rt);
1489 if(rs==rt) emit_movimm(0,rt);
1490 emit_cmovl(&const_one,rt);
1493 static void emit_sltiu32(int rs,int imm,int rt)
1495 if(rs!=rt) emit_zeroreg(rt);
1496 emit_cmpimm(rs,imm);
1497 if(rs==rt) emit_movimm(0,rt);
1500 static void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1503 emit_slti32(rsl,imm,rt);
1507 emit_cmovne(&const_zero,rt);
1508 emit_cmovs(&const_one,rt);
1512 emit_cmpimm(rsh,-1);
1513 emit_cmovne(&const_zero,rt);
1514 emit_cmovl(&const_one,rt);
1517 static void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1520 emit_sltiu32(rsl,imm,rt);
1524 emit_cmovne(&const_zero,rt);
1528 emit_cmpimm(rsh,-1);
1529 emit_cmovne(&const_one,rt);
1533 static void emit_cmp(int rs,int rt)
1535 assem_debug("cmp %%%s,%%%s",regname[rt],regname[rs]);
1537 output_modrm(3,rs,rt);
1539 static void emit_set_gz32(int rs, int rt)
1541 //assem_debug("set_gz32");
1544 emit_cmovl(&const_zero,rt);
1546 static void emit_set_nz32(int rs, int rt)
1548 //assem_debug("set_nz32");
1553 static void emit_set_gz64_32(int rsh, int rsl, int rt)
1555 //assem_debug("set_gz64");
1556 emit_set_gz32(rsl,rt);
1558 emit_cmovne(&const_one,rt);
1559 emit_cmovs(&const_zero,rt);
1561 static void emit_set_nz64_32(int rsh, int rsl, int rt)
1563 //assem_debug("set_nz64");
1564 emit_or_and_set_flags(rsh,rsl,rt);
1565 emit_cmovne(&const_one,rt);
1567 static void emit_set_if_less32(int rs1, int rs2, int rt)
1569 //assem_debug("set if less (%%%s,%%%s),%%%s",regname[rs1],regname[rs2],regname[rt]);
1570 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1572 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1573 emit_cmovl(&const_one,rt);
1575 static void emit_set_if_carry32(int rs1, int rs2, int rt)
1577 //assem_debug("set if carry (%%%s,%%%s),%%%s",regname[rs1],regname[rs2],regname[rt]);
1578 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1580 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1583 static void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1585 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1592 emit_cmovl(&const_one,rt);
1594 static void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1596 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1606 static void emit_call(int a)
1608 assem_debug("call %x (%x+%x)",a,(int)out+5,a-(int)out-5);
1610 output_w32(a-(int)out-4);
1612 static void emit_jmp(int a)
1614 assem_debug("jmp %x (%x+%x)",a,(int)out+5,a-(int)out-5);
1616 output_w32(a-(int)out-4);
1618 static void emit_jne(int a)
1620 assem_debug("jne %x",a);
1623 output_w32(a-(int)out-4);
1625 static void emit_jeq(int a)
1627 assem_debug("jeq %x",a);
1630 output_w32(a-(int)out-4);
1632 static void emit_js(int a)
1634 assem_debug("js %x",a);
1637 output_w32(a-(int)out-4);
1639 static void emit_jns(int a)
1641 assem_debug("jns %x",a);
1644 output_w32(a-(int)out-4);
1646 static void emit_jl(int a)
1648 assem_debug("jl %x",a);
1651 output_w32(a-(int)out-4);
1653 static void emit_jge(int a)
1655 assem_debug("jge %x",a);
1658 output_w32(a-(int)out-4);
1660 static void emit_jno(int a)
1662 assem_debug("jno %x",a);
1665 output_w32(a-(int)out-4);
1667 static void emit_jc(int a)
1669 assem_debug("jc %x",a);
1672 output_w32(a-(int)out-4);
1675 static void emit_pushimm(int imm)
1677 assem_debug("push $%x",imm);
1681 static void emit_pushmem(int addr)
1683 assem_debug("push *%x",addr);
1685 output_modrm(0,5,6);
1688 static void emit_pusha()
1690 assem_debug("pusha");
1693 static void emit_popa()
1695 assem_debug("popa");
1698 static void emit_pushreg(u_int r)
1700 assem_debug("push %%%s",regname[r]);
1702 output_byte(0x50+r);
1704 static void emit_popreg(u_int r)
1706 assem_debug("pop %%%s",regname[r]);
1708 output_byte(0x58+r);
1710 static void emit_callreg(u_int r)
1712 assem_debug("call *%%%s",regname[r]);
1715 output_modrm(3,r,2);
1717 /*static void emit_jmpreg(u_int r)
1719 assem_debug("jmp *%%%s",regname[r]);
1722 output_modrm(3,r,4);
1724 static void emit_jmpmem_indexed(u_int addr,u_int r)
1726 assem_debug("jmp *%x(%%%s)",addr,regname[r]);
1729 output_modrm(2,r,4);
1733 static void emit_readword(int addr, int rt)
1735 assem_debug("mov %x,%%%s",addr,regname[rt]);
1737 output_modrm(0,5,rt);
1740 static void emit_readword_indexed(int addr, int rs, int rt)
1742 assem_debug("mov %x+%%%s,%%%s",addr,regname[rs],regname[rt]);
1744 if(addr<128&&addr>=-128) {
1745 output_modrm(1,rs,rt);
1746 if(rs==ESP) output_sib(0,4,4);
1751 output_modrm(2,rs,rt);
1752 if(rs==ESP) output_sib(0,4,4);
1756 static void emit_readword_tlb(int addr, int map, int rt)
1758 if(map<0) emit_readword(addr+(int)rdram-0x80000000, rt);
1761 assem_debug("mov (%x,%%%s,4),%%%s",addr,regname[map],regname[rt]);
1763 output_modrm(0,4,rt);
1764 output_sib(2,map,5);
1768 static void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1770 if(map<0) emit_readword_indexed(addr+(int)rdram-0x80000000, rs, rt);
1772 assem_debug("mov %x(%%%s,%%%s,4),%%%s",addr,regname[rs],regname[map],regname[rt]);
1775 if(addr==0&&rs!=EBP) {
1776 output_modrm(0,4,rt);
1777 output_sib(2,map,rs);
1779 else if(addr<128&&addr>=-128) {
1780 output_modrm(1,4,rt);
1781 output_sib(2,map,rs);
1786 output_modrm(2,4,rt);
1787 output_sib(2,map,rs);
1792 static void emit_movmem_indexedx4(int addr, int rs, int rt)
1794 assem_debug("mov (%x,%%%s,4),%%%s",addr,regname[rs],regname[rt]);
1796 output_modrm(0,4,rt);
1800 static void emit_readdword_tlb(int addr, int map, int rh, int rl)
1803 if(rh>=0) emit_readword(addr+(int)rdram-0x80000000, rh);
1804 emit_readword(addr+(int)rdram-0x7FFFFFFC, rl);
1807 if(rh>=0) emit_movmem_indexedx4(addr, map, rh);
1808 emit_movmem_indexedx4(addr+4, map, rl);
1811 static void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1814 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1815 emit_readword_indexed_tlb(addr+4, rs, map, rl);
1817 static void emit_movsbl(int addr, int rt)
1819 assem_debug("movsbl %x,%%%s",addr,regname[rt]);
1822 output_modrm(0,5,rt);
1825 static void emit_movsbl_indexed(int addr, int rs, int rt)
1827 assem_debug("movsbl %x+%%%s,%%%s",addr,regname[rs],regname[rt]);
1830 output_modrm(2,rs,rt);
1833 static void emit_movsbl_tlb(int addr, int map, int rt)
1835 if(map<0) emit_movsbl(addr+(int)rdram-0x80000000, rt);
1838 assem_debug("movsbl (%x,%%%s,4),%%%s",addr,regname[map],regname[rt]);
1841 output_modrm(0,4,rt);
1842 output_sib(2,map,5);
1846 static void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1848 if(map<0) emit_movsbl_indexed(addr+(int)rdram-0x80000000, rs, rt);
1850 assem_debug("movsbl %x(%%%s,%%%s,4),%%%s",addr,regname[rs],regname[map],regname[rt]);
1854 if(addr==0&&rs!=EBP) {
1855 output_modrm(0,4,rt);
1856 output_sib(2,map,rs);
1858 else if(addr<128&&addr>=-128) {
1859 output_modrm(1,4,rt);
1860 output_sib(2,map,rs);
1865 output_modrm(2,4,rt);
1866 output_sib(2,map,rs);
1871 static void emit_movswl(int addr, int rt)
1873 assem_debug("movswl %x,%%%s",addr,regname[rt]);
1876 output_modrm(0,5,rt);
1879 static void emit_movswl_indexed(int addr, int rs, int rt)
1881 assem_debug("movswl %x+%%%s,%%%s",addr,regname[rs],regname[rt]);
1884 output_modrm(2,rs,rt);
1887 static void emit_movswl_tlb(int addr, int map, int rt)
1889 if(map<0) emit_movswl(addr+(int)rdram-0x80000000, rt);
1892 assem_debug("movswl (%x,%%%s,4),%%%s",addr,regname[map],regname[rt]);
1895 output_modrm(0,4,rt);
1896 output_sib(2,map,5);
1900 static void emit_movzbl(int addr, int rt)
1902 assem_debug("movzbl %x,%%%s",addr,regname[rt]);
1905 output_modrm(0,5,rt);
1908 static void emit_movzbl_indexed(int addr, int rs, int rt)
1910 assem_debug("movzbl %x+%%%s,%%%s",addr,regname[rs],regname[rt]);
1913 output_modrm(2,rs,rt);
1916 static void emit_movzbl_tlb(int addr, int map, int rt)
1918 if(map<0) emit_movzbl(addr+(int)rdram-0x80000000, rt);
1921 assem_debug("movzbl (%x,%%%s,4),%%%s",addr,regname[map],regname[rt]);
1924 output_modrm(0,4,rt);
1925 output_sib(2,map,5);
1929 static void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1931 if(map<0) emit_movzbl_indexed(addr+(int)rdram-0x80000000, rs, rt);
1933 assem_debug("movzbl %x(%%%s,%%%s,4),%%%s",addr,regname[rs],regname[map],regname[rt]);
1937 if(addr==0&&rs!=EBP) {
1938 output_modrm(0,4,rt);
1939 output_sib(2,map,rs);
1941 else if(addr<128&&addr>=-128) {
1942 output_modrm(1,4,rt);
1943 output_sib(2,map,rs);
1948 output_modrm(2,4,rt);
1949 output_sib(2,map,rs);
1954 static void emit_movzwl(int addr, int rt)
1956 assem_debug("movzwl %x,%%%s",addr,regname[rt]);
1959 output_modrm(0,5,rt);
1962 static void emit_movzwl_indexed(int addr, int rs, int rt)
1964 assem_debug("movzwl %x+%%%s,%%%s",addr,regname[rs],regname[rt]);
1967 output_modrm(2,rs,rt);
1970 static void emit_movzwl_tlb(int addr, int map, int rt)
1972 if(map<0) emit_movzwl(addr+(int)rdram-0x80000000, rt);
1975 assem_debug("movzwl (%x,%%%s,4),%%%s",addr,regname[map],regname[rt]);
1978 output_modrm(0,4,rt);
1979 output_sib(2,map,5);
1984 static void emit_movzwl_reg(int rs, int rt)
1986 assem_debug("movzwl %%%s,%%%s",regname[rs]+1,regname[rt]);
1989 output_modrm(3,rs,rt);
1992 static void emit_xchg(int rs, int rt)
1994 assem_debug("xchg %%%s,%%%s",regname[rs],regname[rt]);
1996 output_byte(0x90+rt);
2001 output_modrm(3,rs,rt);
2004 static void emit_writeword(int rt, int addr)
2006 assem_debug("movl %%%s,%x",regname[rt],addr);
2008 output_modrm(0,5,rt);
2011 static void emit_writeword_indexed(int rt, int addr, int rs)
2013 assem_debug("mov %%%s,%x+%%%s",regname[rt],addr,regname[rs]);
2015 if(addr<128&&addr>=-128) {
2016 output_modrm(1,rs,rt);
2017 if(rs==ESP) output_sib(0,4,4);
2022 output_modrm(2,rs,rt);
2023 if(rs==ESP) output_sib(0,4,4);
2027 static void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2029 if(map<0) emit_writeword_indexed(rt, addr+(int)rdram-0x80000000, rs);
2031 assem_debug("mov %%%s,%x(%%%s,%%%s,1)",regname[rt],addr,regname[rs],regname[map]);
2034 if(addr==0&&rs!=EBP) {
2035 output_modrm(0,4,rt);
2036 output_sib(0,map,rs);
2038 else if(addr<128&&addr>=-128) {
2039 output_modrm(1,4,rt);
2040 output_sib(0,map,rs);
2045 output_modrm(2,4,rt);
2046 output_sib(0,map,rs);
2051 static void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
2054 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
2055 emit_writeword_indexed_tlb(rl, addr+4, rs, map, temp);
2057 static void emit_writehword(int rt, int addr)
2059 assem_debug("movw %%%s,%x",regname[rt]+1,addr);
2062 output_modrm(0,5,rt);
2065 static void emit_writehword_indexed(int rt, int addr, int rs)
2067 assem_debug("movw %%%s,%x+%%%s",regname[rt]+1,addr,regname[rs]);
2070 if(addr<128&&addr>=-128) {
2071 output_modrm(1,rs,rt);
2076 output_modrm(2,rs,rt);
2080 static void emit_writebyte(int rt, int addr)
2083 assem_debug("movb %%%cl,%x",regname[rt][1],addr);
2085 output_modrm(0,5,rt);
2091 emit_writebyte(EAX,addr);
2095 static void emit_writebyte_indexed(int rt, int addr, int rs)
2098 assem_debug("movb %%%cl,%x+%%%s",regname[rt][1],addr,regname[rs]);
2100 if(addr<128&&addr>=-128) {
2101 output_modrm(1,rs,rt);
2106 output_modrm(2,rs,rt);
2113 emit_writebyte_indexed(EAX,addr,rs==EAX?rt:rs);
2117 static void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2119 if(map<0) emit_writebyte_indexed(rt, addr+(int)rdram-0x80000000, rs);
2122 assem_debug("movb %%%cl,%x(%%%s,%%%s,1)",regname[rt][1],addr,regname[rs],regname[map]);
2125 if(addr==0&&rs!=EBP) {
2126 output_modrm(0,4,rt);
2127 output_sib(0,map,rs);
2129 else if(addr<128&&addr>=-128) {
2130 output_modrm(1,4,rt);
2131 output_sib(0,map,rs);
2136 output_modrm(2,4,rt);
2137 output_sib(0,map,rs);
2144 emit_writebyte_indexed_tlb(EAX,addr,rs==EAX?rt:rs,map==EAX?rt:map,temp);
2148 static void emit_writeword_imm(int imm, int addr)
2150 assem_debug("movl $%x,%x",imm,addr);
2152 output_modrm(0,5,0);
2156 static void emit_writeword_imm_esp(int imm, int addr)
2158 assem_debug("mov $%x,%x(%%esp)",imm,addr);
2159 assert(addr>=-128&&addr<128);
2161 output_modrm(1,4,0);
2166 static void emit_writebyte_imm(int imm, int addr)
2168 assem_debug("movb $%x,%x",imm,addr);
2169 assert(imm>=-128&&imm<128);
2171 output_modrm(0,5,0);
2176 static void emit_mul(int rs)
2178 assem_debug("mul %%%s",regname[rs]);
2180 output_modrm(3,rs,4);
2182 static void emit_imul(int rs)
2184 assem_debug("imul %%%s",regname[rs]);
2186 output_modrm(3,rs,5);
2188 static void emit_div(int rs)
2190 assem_debug("div %%%s",regname[rs]);
2192 output_modrm(3,rs,6);
2194 static void emit_idiv(int rs)
2196 assem_debug("idiv %%%s",regname[rs]);
2198 output_modrm(3,rs,7);
2200 static void emit_cdq()
2206 // Load 2 immediates optimizing for small code size
2207 static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2209 emit_movimm(imm1,rt1);
2210 if(imm2-imm1<128&&imm2-imm1>=-128) emit_addimm(rt1,imm2-imm1,rt2);
2211 else emit_movimm(imm2,rt2);
2214 // special case for checking pending_exception
2215 static void emit_cmpmem_imm_byte(int addr,int imm)
2217 assert(imm<128&&imm>=-127);
2218 assem_debug("cmpb $%d,%x",imm,addr);
2220 output_modrm(0,5,7);
2225 // special case for checking invalid_code
2226 static void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2228 assert(imm<128&&imm>=-127);
2230 emit_shrimm(r,12,r);
2231 assem_debug("cmp $%d,%x+%%%s",imm,addr,regname[r]);
2233 output_modrm(2,r,7);
2238 // special case for checking hash_table
2239 static void emit_cmpmem_indexed(int addr,int rs,int rt)
2241 assert(rs>=0&&rs<8);
2242 assert(rt>=0&&rt<8);
2243 assem_debug("cmp %x+%%%s,%%%s",addr,regname[rs],regname[rt]);
2245 output_modrm(2,rs,rt);
2249 // Used to preload hash table entries
2251 static void emit_prefetch(void *addr)
2253 assem_debug("prefetch %x",(int)addr);
2256 output_modrm(0,5,1);
2257 output_w32((int)addr);
2261 /*void emit_submem(int r,int addr)
2264 assem_debug("sub %x,%%%s",addr,regname[r]);
2266 output_modrm(0,5,r);
2267 output_w32((int)addr);
2269 static void emit_subfrommem(int addr,int r)
2272 assem_debug("sub %%%s,%x",regname[r],addr);
2274 output_modrm(0,5,r);
2275 output_w32((int)addr);
2278 static void emit_flds(int r)
2280 assem_debug("flds (%%%s)",regname[r]);
2282 if(r!=EBP) output_modrm(0,r,0);
2283 else {output_modrm(1,EBP,0);output_byte(0);}
2285 static void emit_fldl(int r)
2287 assem_debug("fldl (%%%s)",regname[r]);
2289 if(r!=EBP) output_modrm(0,r,0);
2290 else {output_modrm(1,EBP,0);output_byte(0);}
2292 static void emit_fucomip(u_int r)
2294 assem_debug("fucomip %d",r);
2297 output_byte(0xe8+r);
2299 static void emit_fchs()
2301 assem_debug("fchs");
2305 static void emit_fabs()
2307 assem_debug("fabs");
2311 static void emit_fsqrt()
2313 assem_debug("fsqrt");
2317 static void emit_fadds(int r)
2319 assem_debug("fadds (%%%s)",regname[r]);
2321 if(r!=EBP) output_modrm(0,r,0);
2322 else {output_modrm(1,EBP,0);output_byte(0);}
2324 static void emit_faddl(int r)
2326 assem_debug("faddl (%%%s)",regname[r]);
2328 if(r!=EBP) output_modrm(0,r,0);
2329 else {output_modrm(1,EBP,0);output_byte(0);}
2331 static void emit_fadd(int r)
2333 assem_debug("fadd st%d",r);
2335 output_byte(0xc0+r);
2337 static void emit_fsubs(int r)
2339 assem_debug("fsubs (%%%s)",regname[r]);
2341 if(r!=EBP) output_modrm(0,r,4);
2342 else {output_modrm(1,EBP,4);output_byte(0);}
2344 static void emit_fsubl(int r)
2346 assem_debug("fsubl (%%%s)",regname[r]);
2348 if(r!=EBP) output_modrm(0,r,4);
2349 else {output_modrm(1,EBP,4);output_byte(0);}
2351 static void emit_fsub(int r)
2353 assem_debug("fsub st%d",r);
2355 output_byte(0xe0+r);
2357 static void emit_fmuls(int r)
2359 assem_debug("fmuls (%%%s)",regname[r]);
2361 if(r!=EBP) output_modrm(0,r,1);
2362 else {output_modrm(1,EBP,1);output_byte(0);}
2364 static void emit_fmull(int r)
2366 assem_debug("fmull (%%%s)",regname[r]);
2368 if(r!=EBP) output_modrm(0,r,1);
2369 else {output_modrm(1,EBP,1);output_byte(0);}
2371 static void emit_fmul(int r)
2373 assem_debug("fmul st%d",r);
2375 output_byte(0xc8+r);
2377 static void emit_fdivs(int r)
2379 assem_debug("fdivs (%%%s)",regname[r]);
2381 if(r!=EBP) output_modrm(0,r,6);
2382 else {output_modrm(1,EBP,6);output_byte(0);}
2384 static void emit_fdivl(int r)
2386 assem_debug("fdivl (%%%s)",regname[r]);
2388 if(r!=EBP) output_modrm(0,r,6);
2389 else {output_modrm(1,EBP,6);output_byte(0);}
2391 static void emit_fdiv(int r)
2393 assem_debug("fdiv st%d",r);
2395 output_byte(0xf0+r);
2397 static void emit_fpop()
2400 assem_debug("fpop");
2404 static void emit_fildl(int r)
2406 assem_debug("fildl (%%%s)",regname[r]);
2408 if(r!=EBP) output_modrm(0,r,0);
2409 else {output_modrm(1,EBP,0);output_byte(0);}
2411 static void emit_fildll(int r)
2413 assem_debug("fildll (%%%s)",regname[r]);
2415 if(r!=EBP) output_modrm(0,r,5);
2416 else {output_modrm(1,EBP,5);output_byte(0);}
2418 static void emit_fistpl(int r)
2420 assem_debug("fistpl (%%%s)",regname[r]);
2422 if(r!=EBP) output_modrm(0,r,3);
2423 else {output_modrm(1,EBP,3);output_byte(0);}
2425 static void emit_fistpll(int r)
2427 assem_debug("fistpll (%%%s)",regname[r]);
2429 if(r!=EBP) output_modrm(0,r,7);
2430 else {output_modrm(1,EBP,7);output_byte(0);}
2432 static void emit_fstps(int r)
2434 assem_debug("fstps (%%%s)",regname[r]);
2436 if(r!=EBP) output_modrm(0,r,3);
2437 else {output_modrm(1,EBP,3);output_byte(0);}
2439 static void emit_fstpl(int r)
2441 assem_debug("fstpl (%%%s)",regname[r]);
2443 if(r!=EBP) output_modrm(0,r,3);
2444 else {output_modrm(1,EBP,3);output_byte(0);}
2446 static void emit_fnstcw_stack()
2448 assem_debug("fnstcw (%%esp)");
2450 output_modrm(0,4,7);
2453 static void emit_fldcw_stack()
2455 assem_debug("fldcw (%%esp)");
2457 output_modrm(0,4,5);
2460 static void emit_fldcw_indexed(int addr,int r)
2462 assem_debug("fldcw %x(%%%s)",addr,regname[r]);
2464 output_modrm(0,4,5);
2468 static void emit_fldcw(int addr)
2470 assem_debug("fldcw %x",addr);
2472 output_modrm(0,5,5);
2476 static void emit_movss_load(u_int addr,u_int ssereg)
2478 assem_debug("movss (%%%s),xmm%d",regname[addr],ssereg);
2483 if(addr!=EBP) output_modrm(0,addr,ssereg);
2484 else {output_modrm(1,EBP,ssereg);output_byte(0);}
2486 static void emit_movsd_load(u_int addr,u_int ssereg)
2488 assem_debug("movsd (%%%s),xmm%d",regname[addr],ssereg);
2493 if(addr!=EBP) output_modrm(0,addr,ssereg);
2494 else {output_modrm(1,EBP,ssereg);output_byte(0);}
2496 static void emit_movd_store(u_int ssereg,u_int addr)
2498 assem_debug("movd xmm%d,(%%%s)",ssereg,regname[addr]);
2503 if(addr!=EBP) output_modrm(0,addr,ssereg);
2504 else {output_modrm(1,EBP,ssereg);output_byte(0);}
2506 static void emit_cvttps2dq(u_int ssereg1,u_int ssereg2)
2508 assem_debug("cvttps2dq xmm%d,xmm%d",ssereg1,ssereg2);
2514 output_modrm(3,ssereg1,ssereg2);
2516 static void emit_cvttpd2dq(u_int ssereg1,u_int ssereg2)
2518 assem_debug("cvttpd2dq xmm%d,xmm%d",ssereg1,ssereg2);
2524 output_modrm(3,ssereg1,ssereg2);
2528 /* Stubs/epilogue */
2530 static void emit_extjump2(int addr, int target, int linker)
2532 u_char *ptr=(u_char *)addr;
2535 assert(ptr[1]>=0x80&&ptr[1]<=0x8f);
2540 assert(*ptr==0xe8||*ptr==0xe9);
2543 emit_movimm(target,EAX);
2544 emit_movimm(addr,EBX);
2545 //assert(addr>=0x7000000&&addr<0x7FFFFFF);
2546 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2548 #ifdef DEBUG_CYCLE_COUNT
2549 emit_readword((int)&last_count,ECX);
2550 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2551 emit_readword((int)&next_interupt,ECX);
2552 emit_writeword(HOST_CCREG,(int)&Count);
2553 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2554 emit_writeword(ECX,(int)&last_count);
2560 static void emit_extjump(int addr, int target)
2562 emit_extjump2(addr, target, (int)dyna_linker);
2564 static void emit_extjump_ds(int addr, int target)
2566 emit_extjump2(addr, target, (int)dyna_linker_ds);
2569 static void do_readstub(int n)
2571 assem_debug("do_readstub %x",start+stubs[n][3]*4);
2572 set_jump_target(stubs[n][1],(int)out);
2573 int type=stubs[n][0];
2576 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2577 signed char *i_regmap=i_regs->regmap;
2578 int addr=get_reg(i_regmap,AGEN1+(i&1));
2581 if(itype[i]==C1LS||itype[i]==LOADLR) {
2582 rth=get_reg(i_regmap,FTEMP|64);
2583 rt=get_reg(i_regmap,FTEMP);
2585 rth=get_reg(i_regmap,rt1[i]|64);
2586 rt=get_reg(i_regmap,rt1[i]);
2590 if(addr<0&&itype[i]!=C1LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
2593 if(type==LOADB_STUB||type==LOADBU_STUB)
2594 ftable=(int)readmemb;
2595 if(type==LOADH_STUB||type==LOADHU_STUB)
2596 ftable=(int)readmemh;
2597 if(type==LOADW_STUB)
2598 ftable=(int)readmem;
2599 if(type==LOADD_STUB)
2600 ftable=(int)readmemd;
2601 emit_writeword(rs,(int)&address);
2602 emit_shrimm(rs,16,addr);
2603 emit_movmem_indexedx4(ftable,addr,addr);
2605 ds=i_regs!=®s[i];
2606 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2607 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)),i);
2608 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2611 int cc=get_reg(i_regmap,CCREG);
2613 if(addr==HOST_CCREG)
2616 assert(cc!=HOST_CCREG);
2617 assert(temp!=HOST_CCREG);
2618 emit_loadreg(CCREG,cc);
2623 emit_loadreg(CCREG,cc);
2631 emit_readword((int)&last_count,temp);
2632 emit_addimm(cc,CLOCK_DIVIDER*(stubs[n][6]+1),cc);
2633 emit_writeword_imm_esp(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,32);
2634 emit_add(cc,temp,cc);
2635 emit_writeword(cc,(int)&Count);
2637 // We really shouldn't need to update the count here,
2638 // but not doing so causes random crashes...
2639 emit_readword((int)&Count,HOST_CCREG);
2640 emit_readword((int)&next_interupt,ECX);
2641 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*(stubs[n][6]+1),HOST_CCREG);
2642 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2643 emit_writeword(ECX,(int)&last_count);
2644 emit_storereg(CCREG,HOST_CCREG);
2646 if((cc=get_reg(i_regmap,CCREG))>=0) {
2647 emit_loadreg(CCREG,cc);
2650 if(type==LOADB_STUB)
2651 emit_movsbl((int)&readmem_dword,rt);
2652 if(type==LOADBU_STUB)
2653 emit_movzbl((int)&readmem_dword,rt);
2654 if(type==LOADH_STUB)
2655 emit_movswl((int)&readmem_dword,rt);
2656 if(type==LOADHU_STUB)
2657 emit_movzwl((int)&readmem_dword,rt);
2658 if(type==LOADW_STUB)
2659 emit_readword((int)&readmem_dword,rt);
2660 if(type==LOADD_STUB) {
2661 emit_readword((int)&readmem_dword,rt);
2662 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2665 emit_jmp(stubs[n][2]); // return address
2668 static void inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2670 assem_debug("inline_readstub");
2671 int rs=get_reg(regmap,target);
2672 int rth=get_reg(regmap,target|64);
2673 int rt=get_reg(regmap,target);
2674 if(rs<0) rs=get_reg(regmap,-1);
2677 if(type==LOADB_STUB||type==LOADBU_STUB)
2678 ftable=(int)readmemb;
2679 if(type==LOADH_STUB||type==LOADHU_STUB)
2680 ftable=(int)readmemh;
2681 if(type==LOADW_STUB)
2682 ftable=(int)readmem;
2683 if(type==LOADD_STUB)
2684 ftable=(int)readmemd;
2685 #ifdef HOST_IMM_ADDR32
2686 emit_writeword_imm(addr,(int)&address);
2688 emit_writeword(rs,(int)&address);
2691 if((signed int)addr>=(signed int)0xC0000000) {
2692 // Theoretically we can have a pagefault here, if the TLB has never
2693 // been enabled and the address is outside the range 80000000..BFFFFFFF
2694 // Write out the registers so the pagefault can be handled. This is
2695 // a very rare case and likely represents a bug.
2696 int ds=regmap!=regs[i].regmap;
2697 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2698 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
2699 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
2701 int cc=get_reg(regmap,CCREG);
2707 assert(cc!=HOST_CCREG);
2708 assert(temp!=HOST_CCREG);
2709 emit_loadreg(CCREG,cc);
2714 emit_loadreg(CCREG,cc);
2722 emit_readword((int)&last_count,temp);
2723 emit_addimm(cc,CLOCK_DIVIDER*(adj+1),cc);
2724 emit_add(cc,temp,cc);
2725 emit_writeword(cc,(int)&Count);
2726 if((signed int)addr>=(signed int)0xC0000000) {
2727 // Pagefault address
2728 int ds=regmap!=regs[i].regmap;
2729 emit_writeword_imm_esp(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,32);
2731 emit_call(((u_int *)ftable)[addr>>16]);
2732 // We really shouldn't need to update the count here,
2733 // but not doing so causes random crashes...
2734 emit_readword((int)&Count,HOST_CCREG);
2735 emit_readword((int)&next_interupt,ECX);
2736 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*(adj+1),HOST_CCREG);
2737 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2738 emit_writeword(ECX,(int)&last_count);
2739 emit_storereg(CCREG,HOST_CCREG);
2741 if((cc=get_reg(regmap,CCREG))>=0) {
2742 emit_loadreg(CCREG,cc);
2745 if(type==LOADB_STUB)
2746 emit_movsbl((int)&readmem_dword,rt);
2747 if(type==LOADBU_STUB)
2748 emit_movzbl((int)&readmem_dword,rt);
2749 if(type==LOADH_STUB)
2750 emit_movswl((int)&readmem_dword,rt);
2751 if(type==LOADHU_STUB)
2752 emit_movzwl((int)&readmem_dword,rt);
2753 if(type==LOADW_STUB)
2754 emit_readword((int)&readmem_dword,rt);
2755 if(type==LOADD_STUB) {
2756 emit_readword((int)&readmem_dword,rt);
2757 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2762 static void do_writestub(int n)
2764 assem_debug("do_writestub %x",start+stubs[n][3]*4);
2765 set_jump_target(stubs[n][1],(int)out);
2766 int type=stubs[n][0];
2769 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2770 signed char *i_regmap=i_regs->regmap;
2771 int addr=get_reg(i_regmap,AGEN1+(i&1));
2774 if(itype[i]==C1LS) {
2775 rth=get_reg(i_regmap,FTEMP|64);
2776 rt=get_reg(i_regmap,r=FTEMP);
2778 rth=get_reg(i_regmap,rs2[i]|64);
2779 rt=get_reg(i_regmap,r=rs2[i]);
2783 if(addr<0) addr=get_reg(i_regmap,-1);
2786 if(type==STOREB_STUB)
2787 ftable=(int)writememb;
2788 if(type==STOREH_STUB)
2789 ftable=(int)writememh;
2790 if(type==STOREW_STUB)
2791 ftable=(int)writemem;
2792 if(type==STORED_STUB)
2793 ftable=(int)writememd;
2794 emit_writeword(rs,(int)&address);
2795 emit_shrimm(rs,16,addr);
2796 emit_movmem_indexedx4(ftable,addr,addr);
2797 if(type==STOREB_STUB)
2798 emit_writebyte(rt,(int)&cpu_byte);
2799 if(type==STOREH_STUB)
2800 emit_writehword(rt,(int)&hword);
2801 if(type==STOREW_STUB)
2802 emit_writeword(rt,(int)&word);
2803 if(type==STORED_STUB) {
2804 emit_writeword(rt,(int)&dword);
2805 emit_writeword(r?rth:rt,(int)&dword+4);
2808 ds=i_regs!=®s[i];
2809 int real_rs=get_reg(i_regmap,rs1[i]);
2810 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)),i);
2811 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2814 int cc=get_reg(i_regmap,CCREG);
2816 if(addr==HOST_CCREG)
2819 assert(cc!=HOST_CCREG);
2820 assert(temp!=HOST_CCREG);
2821 emit_loadreg(CCREG,cc);
2826 emit_loadreg(CCREG,cc);
2834 emit_readword((int)&last_count,temp);
2835 emit_addimm(cc,CLOCK_DIVIDER*(stubs[n][6]+1),cc);
2836 emit_writeword_imm_esp(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,32);
2837 emit_add(cc,temp,cc);
2838 emit_writeword(cc,(int)&Count);
2840 emit_readword((int)&Count,HOST_CCREG);
2841 emit_readword((int)&next_interupt,ECX);
2842 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*(stubs[n][6]+1),HOST_CCREG);
2843 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2844 emit_writeword(ECX,(int)&last_count);
2845 emit_storereg(CCREG,HOST_CCREG);
2847 if((cc=get_reg(i_regmap,CCREG))>=0) {
2848 emit_loadreg(CCREG,cc);
2850 emit_jmp(stubs[n][2]); // return address
2853 static void inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2855 assem_debug("inline_writestub");
2856 int rs=get_reg(regmap,-1);
2857 int rth=get_reg(regmap,target|64);
2858 int rt=get_reg(regmap,target);
2862 if(type==STOREB_STUB)
2863 ftable=(int)writememb;
2864 if(type==STOREH_STUB)
2865 ftable=(int)writememh;
2866 if(type==STOREW_STUB)
2867 ftable=(int)writemem;
2868 if(type==STORED_STUB)
2869 ftable=(int)writememd;
2870 emit_writeword(rs,(int)&address);
2871 if(type==STOREB_STUB)
2872 emit_writebyte(rt,(int)&cpu_byte);
2873 if(type==STOREH_STUB)
2874 emit_writehword(rt,(int)&hword);
2875 if(type==STOREW_STUB)
2876 emit_writeword(rt,(int)&word);
2877 if(type==STORED_STUB) {
2878 emit_writeword(rt,(int)&dword);
2879 emit_writeword(target?rth:rt,(int)&dword+4);
2882 if((signed int)addr>=(signed int)0xC0000000) {
2883 // Theoretically we can have a pagefault here, if the TLB has never
2884 // been enabled and the address is outside the range 80000000..BFFFFFFF
2885 // Write out the registers so the pagefault can be handled. This is
2886 // a very rare case and likely represents a bug.
2887 int ds=regmap!=regs[i].regmap;
2888 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2889 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
2890 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
2892 int cc=get_reg(regmap,CCREG);
2898 assert(cc!=HOST_CCREG);
2899 assert(temp!=HOST_CCREG);
2900 emit_loadreg(CCREG,cc);
2905 emit_loadreg(CCREG,cc);
2913 emit_readword((int)&last_count,temp);
2914 emit_addimm(cc,CLOCK_DIVIDER*(adj+1),cc);
2915 emit_add(cc,temp,cc);
2916 emit_writeword(cc,(int)&Count);
2917 if((signed int)addr>=(signed int)0xC0000000) {
2918 // Pagefault address
2919 int ds=regmap!=regs[i].regmap;
2920 emit_writeword_imm_esp(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,32);
2922 emit_call(((u_int *)ftable)[addr>>16]);
2923 emit_readword((int)&Count,HOST_CCREG);
2924 emit_readword((int)&next_interupt,ECX);
2925 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*(adj+1),HOST_CCREG);
2926 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2927 emit_writeword(ECX,(int)&last_count);
2928 emit_storereg(CCREG,HOST_CCREG);
2930 if((cc=get_reg(regmap,CCREG))>=0) {
2931 emit_loadreg(CCREG,cc);
2935 static void do_unalignedwritestub(int n)
2937 set_jump_target(stubs[n][1],(int)out);
2939 emit_jmp(stubs[n][2]); // return address
2942 static void do_invstub(int n)
2944 set_jump_target(stubs[n][1],(int)out);
2945 emit_call(invalidate_block_reg[stubs[n][4]]);
2946 emit_jmp(stubs[n][2]); // return address
2949 static int do_dirty_stub(int i)
2951 assem_debug("do_dirty_stub %x",start+i*4);
2952 emit_pushimm(start+i*4);
2953 emit_movimm((int)start<(int)0xC0000000?(int)source:(int)start,EAX);
2954 emit_movimm((int)copy,EBX);
2955 emit_movimm(slen*4,ECX);
2956 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
2957 emit_addimm(ESP,4,ESP);
2960 if(entry==(int)out) entry=instr_addr[i];
2961 emit_jmp(instr_addr[i]);
2965 static void do_dirty_stub_ds()
2967 emit_pushimm(start+1);
2968 emit_movimm((int)start<(int)0xC0000000?(int)source:(int)start,EAX);
2969 emit_movimm((int)copy,EBX);
2970 emit_movimm(slen*4,ECX);
2971 emit_call((int)&verify_code_ds);
2972 emit_addimm(ESP,4,ESP);
2975 static void do_cop1stub(int n)
2977 assem_debug("do_cop1stub %x",start+stubs[n][3]*4);
2978 set_jump_target(stubs[n][1],(int)out);
2980 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2983 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2984 //if(i_regs!=®s[i]) DebugMessage(M64MSG_VERBOSE, "oops: regs[i]=%x i_regs=%x",(int)®s[i],(int)i_regs);
2986 //else {DebugMessage(M64MSG_VERBOSE, "fp exception in delay slot");}
2987 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
2988 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2989 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2990 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
2991 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
2996 static int do_tlb_r(int s,int ar,int map,int cache,int x,int a,int shift,int c,u_int addr)
2999 if((signed int)addr>=(signed int)0xC0000000) {
3000 emit_readword((int)(memory_map+(addr>>12)),map);
3003 return -1; // No mapping
3006 if(s!=map) emit_mov(s,map);
3007 emit_shrimm(map,12,map);
3008 // Schedule this while we wait on the load
3009 //if(x) emit_xorimm(addr,x,addr);
3010 if(shift>=0) emit_lea8(s,shift);
3011 if(~a) emit_andimm(s,a,ar);
3012 emit_movmem_indexedx4((int)memory_map,map,map);
3016 static int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3018 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3026 static void gen_tlb_addr_r(int ar, int map) {
3028 emit_leairrx4(0,ar,map,ar);
3032 static int do_tlb_w(int s,int ar,int map,int cache,int x,int c,u_int addr)
3035 if(addr<0x80800000||addr>=0xC0000000) {
3036 emit_readword((int)(memory_map+(addr>>12)),map);
3039 return -1; // No mapping
3042 if(s!=map) emit_mov(s,map);
3043 //if(s!=ar) emit_mov(s,ar);
3044 emit_shrimm(map,12,map);
3045 // Schedule this while we wait on the load
3046 //if(x) emit_xorimm(s,x,addr);
3047 emit_movmem_indexedx4((int)memory_map,map,map);
3049 emit_shlimm(map,2,map);
3052 static void do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3054 if(!c||addr<0x80800000||addr>=0xC0000000) {
3060 static void gen_tlb_addr_w(int ar, int map) {
3062 emit_leairrx1(0,ar,map,ar);
3066 // We don't need this for x86
3067 static void generate_map_const(u_int addr,int reg) {
3068 // void *mapaddr=memory_map+(addr>>12);
3073 static void shift_assemble_x86(int i,struct regstat *i_regs)
3076 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3079 t=get_reg(i_regs->regmap,rt1[i]);
3080 s=get_reg(i_regs->regmap,rs1[i]);
3081 shift=get_reg(i_regs->regmap,rs2[i]);
3090 if(s!=t) emit_mov(s,t);
3094 char temp=get_reg(i_regs->regmap,-1);
3096 if(t==ECX&&s!=ECX) {
3097 if(shift!=ECX) emit_mov(shift,ECX);
3098 if(rt1[i]==rs2[i]) {shift=temp;}
3099 if(s!=shift) emit_mov(s,shift);
3103 if(rt1[i]==rs2[i]) {emit_mov(shift,temp);shift=temp;}
3104 if(s!=t) emit_mov(s,t);
3106 if(i_regs->regmap[ECX]<0)
3107 emit_mov(shift,ECX);
3109 emit_xchg(shift,ECX);
3112 if(opcode2[i]==4) // SLLV
3114 emit_shlcl(t==ECX?shift:t);
3116 if(opcode2[i]==6) // SRLV
3118 emit_shrcl(t==ECX?shift:t);
3120 if(opcode2[i]==7) // SRAV
3122 emit_sarcl(t==ECX?shift:t);
3124 if(shift!=ECX&&i_regs->regmap[ECX]>=0) emit_xchg(shift,ECX);
3127 } else { // DSLLV/DSRLV/DSRAV
3128 char sh,sl,th,tl,shift;
3129 th=get_reg(i_regs->regmap,rt1[i]|64);
3130 tl=get_reg(i_regs->regmap,rt1[i]);
3131 sh=get_reg(i_regs->regmap,rs1[i]|64);
3132 sl=get_reg(i_regs->regmap,rs1[i]);
3133 shift=get_reg(i_regs->regmap,rs2[i]);
3138 if(th>=0) emit_zeroreg(th);
3143 if(sl!=tl) emit_mov(sl,tl);
3144 if(th>=0&&sh!=th) emit_mov(sh,th);
3148 // FIXME: What if shift==tl ?
3150 int temp=get_reg(i_regs->regmap,-1);
3152 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3155 if(tl==ECX&&sl!=ECX) {
3156 if(shift!=ECX) emit_mov(shift,ECX);
3157 if(sl!=shift) emit_mov(sl,shift);
3158 if(th>=0 && sh!=th) emit_mov(sh,th);
3160 else if(th==ECX&&sh!=ECX) {
3161 if(shift!=ECX) emit_mov(shift,ECX);
3162 if(sh!=shift) emit_mov(sh,shift);
3163 if(sl!=tl) emit_mov(sl,tl);
3167 if(sl!=tl) emit_mov(sl,tl);
3168 if(th>=0 && sh!=th) emit_mov(sh,th);
3170 if(i_regs->regmap[ECX]<0)
3171 emit_mov(shift,ECX);
3173 emit_xchg(shift,ECX);
3176 if(opcode2[i]==0x14) // DSLLV
3178 if(th>=0) emit_shldcl(th==ECX?shift:th,tl==ECX?shift:tl);
3179 emit_shlcl(tl==ECX?shift:tl);
3180 emit_testimm(ECX,32);
3181 if(th>=0) emit_cmovne_reg(tl==ECX?shift:tl,th==ECX?shift:th);
3182 emit_cmovne(&const_zero,tl==ECX?shift:tl);
3184 if(opcode2[i]==0x16) // DSRLV
3187 emit_shrdcl(tl==ECX?shift:tl,th==ECX?shift:th);
3188 emit_shrcl(th==ECX?shift:th);
3189 emit_testimm(ECX,32);
3190 emit_cmovne_reg(th==ECX?shift:th,tl==ECX?shift:tl);
3191 if(real_th>=0) emit_cmovne(&const_zero,th==ECX?shift:th);
3193 if(opcode2[i]==0x17) // DSRAV
3196 emit_shrdcl(tl==ECX?shift:tl,th==ECX?shift:th);
3199 emit_mov(th==ECX?shift:th,temp==ECX?shift:temp);
3201 emit_sarcl(th==ECX?shift:th);
3202 if(real_th>=0) emit_sarimm(temp==ECX?shift:temp,31,temp==ECX?shift:temp);
3203 emit_testimm(ECX,32);
3204 emit_cmovne_reg(th==ECX?shift:th,tl==ECX?shift:tl);
3205 if(real_th>=0) emit_cmovne_reg(temp==ECX?shift:temp,th==ECX?shift:th);
3207 if(shift!=ECX&&(i_regs->regmap[ECX]>=0||temp==ECX)) emit_xchg(shift,ECX);
3213 #define shift_assemble shift_assemble_x86
3215 static void loadlr_assemble_x86(int i,struct regstat *i_regs)
3217 int s,th,tl,temp,temp2,addr,map=-1;
3222 th=get_reg(i_regs->regmap,rt1[i]|64);
3223 tl=get_reg(i_regs->regmap,rt1[i]);
3224 s=get_reg(i_regs->regmap,rs1[i]);
3225 temp=get_reg(i_regs->regmap,-1);
3226 temp2=get_reg(i_regs->regmap,FTEMP);
3227 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3230 for(hr=0;hr<HOST_REGS;hr++) {
3231 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3234 if(offset||s<0||c) addr=temp2;
3237 c=(i_regs->wasconst>>s)&1;
3238 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80800000;
3239 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3243 emit_lea8(addr,temp);
3244 if (opcode[i]==0x22||opcode[i]==0x26) {
3245 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3247 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3249 emit_cmpimm(addr,0x800000);
3254 if (opcode[i]==0x22||opcode[i]==0x26) {
3255 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3257 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3264 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3265 a=0xFFFFFFFC; // LWL/LWR
3267 a=0xFFFFFFF8; // LDL/LDR
3269 map=get_reg(i_regs->regmap,TLREG);
3272 map=do_tlb_r(addr,temp2,map,-1,0,a,c?-1:temp,c,constmap[i][s]+offset);
3274 if (opcode[i]==0x22||opcode[i]==0x26) {
3275 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3277 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3280 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3282 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3284 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3285 emit_readword_indexed_tlb(0,temp2,map,temp2);
3286 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3289 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3292 emit_andimm(temp,24,temp);
3293 if (opcode[i]==0x26) emit_xorimm(temp,24,temp); // LWR
3297 if(temp3==temp2) temp3++;
3298 emit_pushreg(temp3);
3299 emit_movimm(-1,temp3);
3300 if (opcode[i]==0x26) {
3307 emit_mov(temp3,ECX);
3314 if(temp3==temp) temp3++;
3315 if(temp3==temp2) temp3++;
3316 if(temp3==temp) temp3++;
3317 emit_xchg(ECX,temp);
3318 emit_pushreg(temp3);
3319 emit_movimm(-1,temp3);
3320 if (opcode[i]==0x26) {
3322 emit_shrcl(temp2==ECX?temp:temp2);
3325 emit_shlcl(temp2==ECX?temp:temp2);
3327 emit_not(temp3,temp3);
3329 emit_mov(temp3,temp);
3332 emit_and(temp,tl,tl);
3333 emit_or(temp2,tl,tl);
3334 //emit_storereg(rt1[i],tl); // DEBUG
3336 //save_regs(0x100f);
3337 emit_readword((int)&last_count,ECX);
3338 if(get_reg(i_regs->regmap,CCREG)<0)
3339 emit_loadreg(CCREG,HOST_CCREG);
3340 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3341 emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG);
3342 emit_writeword(HOST_CCREG,(int)&Count);
3343 emit_call((int)memdebug);
3345 //restore_regs(0x100f);*/
3348 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3350 if((i_regs->wasdirty>>s)&1)
3351 emit_storereg(rs1[i],s);
3352 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3353 if((i_regs->wasdirty>>get_reg(i_regs->regmap,rs1[i]|64))&1)
3354 emit_storereg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
3355 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3357 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3358 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3359 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3360 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3363 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3367 emit_andimm(temp,56,temp);
3369 emit_pushreg(temp2h);
3370 emit_pushreg(temp2);
3373 if(opcode[i]==0x1A) emit_call((int)ldl_merge);
3374 if(opcode[i]==0x1B) emit_call((int)ldr_merge);
3375 emit_addimm(ESP,20,ESP);
3377 if(tl!=EAX) emit_mov(EAX,tl);
3378 if(th!=EDX) emit_mov(EDX,th);
3381 if(th!=EDX) emit_mov(EDX,th);
3382 if(tl!=EAX) emit_mov(EAX,tl);
3386 if(s>=0) emit_loadreg(rs1[i],s);
3387 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3388 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
3392 #define loadlr_assemble loadlr_assemble_x86
3394 static void cop0_assemble(int i,struct regstat *i_regs)
3396 if(opcode2[i]==0) // MFC0
3399 signed char t=get_reg(i_regs->regmap,rt1[i]);
3400 char copr=(source[i]>>11)&0x1f;
3402 emit_writeword_imm((int)&fake_pc,(int)&PC);
3403 emit_writebyte_imm((source[i]>>11)&0x1f,(int)&(fake_pc.f.r.nrd));
3405 emit_readword((int)&last_count,ECX);
3406 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3407 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3408 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3409 emit_writeword(HOST_CCREG,(int)&Count);
3411 emit_call((int)cached_interpreter_table.MFC0);
3412 emit_readword((int)&readmem_dword,t);
3416 else if(opcode2[i]==4) // MTC0
3418 signed char s=get_reg(i_regs->regmap,rs1[i]);
3419 char copr=(source[i]>>11)&0x1f;
3421 emit_writeword(s,(int)&readmem_dword);
3423 emit_writeword_imm((int)&fake_pc,(int)&PC);
3424 emit_writebyte_imm((source[i]>>11)&0x1f,(int)&(fake_pc.f.r.nrd));
3425 if(copr==9||copr==11||copr==12) {
3426 if(copr==12&&!is_delayslot) {
3427 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
3429 emit_readword((int)&last_count,ECX);
3430 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3431 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3432 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3433 emit_writeword(HOST_CCREG,(int)&Count);
3435 // What a mess. The status register (12) can enable interrupts,
3436 // so needs a special case to handle a pending interrupt.
3437 // The interrupt must be taken immediately, because a subsequent
3438 // instruction might disable interrupts again.
3439 if(copr==12&&!is_delayslot) {
3440 emit_writeword_imm(start+i*4+4,(int)&pcaddr);
3441 emit_writebyte_imm(0,(int)&pending_exception);
3443 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3445 emit_call((int)cached_interpreter_table.MTC0);
3446 if(copr==9||copr==11||copr==12) {
3447 emit_readword((int)&Count,HOST_CCREG);
3448 emit_readword((int)&next_interupt,ECX);
3449 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3450 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3451 emit_writeword(ECX,(int)&last_count);
3452 emit_storereg(CCREG,HOST_CCREG);
3456 assert(!is_delayslot);
3457 //if(is_delayslot) output_byte(0xcc);
3458 emit_cmpmem_imm_byte((int)&pending_exception,0);
3459 emit_jne((int)&do_interrupt);
3465 assert(opcode2[i]==0x10);
3466 if((source[i]&0x3f)==0x01) // TLBR
3467 emit_call((int)cached_interpreter_table.TLBR);
3468 if((source[i]&0x3f)==0x02) // TLBWI
3469 emit_call((int)TLBWI_new);
3470 if((source[i]&0x3f)==0x06) { // TLBWR
3471 // The TLB entry written by TLBWR is dependent on the count,
3472 // so update the cycle count
3473 emit_readword((int)&last_count,ECX);
3474 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3475 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3476 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3477 emit_writeword(HOST_CCREG,(int)&Count);
3478 emit_call((int)TLBWR_new);
3480 if((source[i]&0x3f)==0x08) // TLBP
3481 emit_call((int)cached_interpreter_table.TLBP);
3482 if((source[i]&0x3f)==0x18) // ERET
3485 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3486 emit_addimm_and_set_flags(CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
3487 emit_jmp((int)jump_eret);
3492 static void cop1_assemble(int i,struct regstat *i_regs)
3494 // Check cop1 unusable
3496 signed char rs=get_reg(i_regs->regmap,CSREG);
3498 emit_testimm(rs,0x20000000);
3501 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3504 if (opcode2[i]==0) { // MFC1
3505 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3507 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],tl);
3508 emit_readword_indexed(0,tl,tl);
3511 else if (opcode2[i]==1) { // DMFC1
3512 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3513 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
3515 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],tl);
3516 if(th>=0) emit_readword_indexed(4,tl,th);
3517 emit_readword_indexed(0,tl,tl);
3520 else if (opcode2[i]==4) { // MTC1
3521 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3522 signed char temp=get_reg(i_regs->regmap,-1);
3523 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3524 emit_writeword_indexed(sl,0,temp);
3526 else if (opcode2[i]==5) { // DMTC1
3527 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3528 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
3529 signed char temp=get_reg(i_regs->regmap,-1);
3530 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3531 emit_writeword_indexed(sh,4,temp);
3532 emit_writeword_indexed(sl,0,temp);
3534 else if (opcode2[i]==2) // CFC1
3536 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3538 u_int copr=(source[i]>>11)&0x1f;
3539 if(copr==0) emit_readword((int)&FCR0,tl);
3540 if(copr==31) emit_readword((int)&FCR31,tl);
3543 else if (opcode2[i]==6) // CTC1
3545 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3546 u_int copr=(source[i]>>11)&0x1f;
3550 emit_writeword(sl,(int)&FCR31);
3551 // Set the rounding mode
3552 char temp=get_reg(i_regs->regmap,-1);
3553 emit_movimm(3,temp);
3554 emit_and(sl,temp,temp);
3555 emit_fldcw_indexed((int)&rounding_modes,temp);
3560 static void fconv_assemble_x86(int i,struct regstat *i_regs)
3562 signed char temp=get_reg(i_regs->regmap,-1);
3564 // Check cop1 unusable
3566 signed char rs=get_reg(i_regs->regmap,CSREG);
3568 emit_testimm(rs,0x20000000);
3571 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3575 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
3576 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3577 emit_movss_load(temp,0);
3578 emit_cvttps2dq(0,0); // float->int, truncate
3579 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3580 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
3581 emit_movd_store(0,temp);
3584 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
3585 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3586 emit_movsd_load(temp,0);
3587 emit_cvttpd2dq(0,0); // double->int, truncate
3588 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
3589 emit_movd_store(0,temp);
3594 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
3595 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3597 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3598 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
3602 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
3603 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3605 emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp);
3609 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) { // cvt_s_l
3610 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3612 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
3616 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) { // cvt_d_l
3617 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3619 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3620 emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp);
3625 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
3626 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3628 emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp);
3632 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
3633 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3635 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
3640 if(opcode2[i]==0x10) { // cvt_*_s
3641 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3644 if(opcode2[i]==0x11) { // cvt_*_d
3645 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3648 if((source[i]&0x3f)<0x10) {
3649 emit_fnstcw_stack();
3650 if((source[i]&3)==0) emit_fldcw((int)&round_mode); //DebugMessage(M64MSG_VERBOSE, "round");
3651 if((source[i]&3)==1) emit_fldcw((int)&trunc_mode); //DebugMessage(M64MSG_VERBOSE, "trunc");
3652 if((source[i]&3)==2) emit_fldcw((int)&ceil_mode); //DebugMessage(M64MSG_VERBOSE, "ceil");
3653 if((source[i]&3)==3) emit_fldcw((int)&floor_mode); //DebugMessage(M64MSG_VERBOSE, "floor");
3655 if((source[i]&0x3f)==0x24||(source[i]&0x3c)==0x0c) { // cvt_w_*
3656 if(opcode2[i]!=0x10||((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3657 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
3660 if((source[i]&0x3f)==0x25||(source[i]&0x3c)==0x08) { // cvt_l_*
3661 if(opcode2[i]!=0x11||((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3662 emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp);
3665 if((source[i]&0x3f)<0x10) {
3670 // C emulation code for debugging
3674 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
3675 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3676 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3677 emit_call((int)cvt_s_w);
3679 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
3680 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3681 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3682 emit_call((int)cvt_d_w);
3684 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
3685 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3686 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
3687 emit_call((int)cvt_s_l);
3689 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
3690 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3691 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
3692 emit_call((int)cvt_d_l);
3695 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
3696 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3697 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3698 emit_call((int)cvt_d_s);
3700 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
3701 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3702 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3703 emit_call((int)cvt_w_s);
3705 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
3706 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3707 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3708 emit_call((int)cvt_l_s);
3711 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
3712 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3713 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
3714 emit_call((int)cvt_s_d);
3716 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
3717 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3718 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
3719 emit_call((int)cvt_w_d);
3721 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
3722 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3723 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
3724 emit_call((int)cvt_l_d);
3727 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
3728 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3729 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3730 emit_call((int)round_l_s);
3732 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
3733 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3734 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3735 emit_call((int)trunc_l_s);
3737 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
3738 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3739 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3740 emit_call((int)ceil_l_s);
3742 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
3743 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3744 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3745 emit_call((int)floor_l_s);
3747 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
3748 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3749 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3750 emit_call((int)round_w_s);
3752 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
3753 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3754 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3755 emit_call((int)trunc_w_s);
3757 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
3758 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3759 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3760 emit_call((int)ceil_w_s);
3762 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
3763 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3764 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3765 emit_call((int)floor_w_s);
3768 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
3769 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3770 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
3771 emit_call((int)round_l_d);
3773 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
3774 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3775 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
3776 emit_call((int)trunc_l_d);
3778 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
3779 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3780 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
3781 emit_call((int)ceil_l_d);
3783 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
3784 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3785 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
3786 emit_call((int)floor_l_d);
3788 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
3789 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3790 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
3791 emit_call((int)round_w_d);
3793 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
3794 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3795 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
3796 emit_call((int)trunc_w_d);
3798 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
3799 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3800 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
3801 emit_call((int)ceil_w_d);
3803 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
3804 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3805 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
3806 emit_call((int)floor_w_d);
3809 emit_addimm(ESP,8,ESP);
3811 //emit_loadreg(CSREG,rs);
3814 #define fconv_assemble fconv_assemble_x86
3816 static void fcomp_assemble(int i,struct regstat *i_regs)
3818 signed char fs=get_reg(i_regs->regmap,FSREG);
3819 signed char temp=get_reg(i_regs->regmap,-1);
3821 // Check cop1 unusable
3823 signed char cs=get_reg(i_regs->regmap,CSREG);
3825 emit_testimm(cs,0x20000000);
3828 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
3832 if((source[i]&0x3f)==0x30) {
3833 emit_andimm(fs,~0x800000,fs);
3837 if((source[i]&0x3e)==0x38) {
3838 // sf/ngle - these should throw exceptions for NaNs
3839 emit_andimm(fs,~0x800000,fs);
3843 if(opcode2[i]==0x10) {
3844 emit_readword((int)®_cop1_simple[(source[i]>>16)&0x1f],temp);
3846 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3848 emit_movimm(0x800000,temp);
3849 emit_or(fs,temp,fs);
3850 emit_xor(temp,fs,temp);
3853 if((source[i]&0x3f)==0x31) emit_cmovnp_reg(temp,fs); // c_un_s
3854 if((source[i]&0x3f)==0x32) {emit_cmovne_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_eq_s
3855 if((source[i]&0x3f)==0x33) emit_cmovne_reg(temp,fs); // c_ueq_s
3856 if((source[i]&0x3f)==0x34) {emit_cmovnc_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_olt_s
3857 if((source[i]&0x3f)==0x35) emit_cmovnc_reg(temp,fs); // c_ult_s
3858 if((source[i]&0x3f)==0x36) {emit_cmova_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_ole_s
3859 if((source[i]&0x3f)==0x37) emit_cmova_reg(temp,fs); // c_ule_s
3860 if((source[i]&0x3f)==0x3a) emit_cmovne_reg(temp,fs); // c_seq_s
3861 if((source[i]&0x3f)==0x3b) emit_cmovne_reg(temp,fs); // c_ngl_s
3862 if((source[i]&0x3f)==0x3c) emit_cmovnc_reg(temp,fs); // c_lt_s
3863 if((source[i]&0x3f)==0x3d) emit_cmovnc_reg(temp,fs); // c_nge_s
3864 if((source[i]&0x3f)==0x3e) emit_cmova_reg(temp,fs); // c_le_s
3865 if((source[i]&0x3f)==0x3f) emit_cmova_reg(temp,fs); // c_ngt_s
3868 if(opcode2[i]==0x11) {
3869 emit_readword((int)®_cop1_double[(source[i]>>16)&0x1f],temp);
3871 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3873 emit_movimm(0x800000,temp);
3874 emit_or(fs,temp,fs);
3875 emit_xor(temp,fs,temp);
3878 if((source[i]&0x3f)==0x31) emit_cmovnp_reg(temp,fs); // c_un_d
3879 if((source[i]&0x3f)==0x32) {emit_cmovne_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_eq_d
3880 if((source[i]&0x3f)==0x33) emit_cmovne_reg(temp,fs); // c_ueq_d
3881 if((source[i]&0x3f)==0x34) {emit_cmovnc_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_olt_d
3882 if((source[i]&0x3f)==0x35) emit_cmovnc_reg(temp,fs); // c_ult_d
3883 if((source[i]&0x3f)==0x36) {emit_cmova_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_ole_d
3884 if((source[i]&0x3f)==0x37) emit_cmova_reg(temp,fs); // c_ule_d
3885 if((source[i]&0x3f)==0x3a) emit_cmovne_reg(temp,fs); // c_seq_d
3886 if((source[i]&0x3f)==0x3b) emit_cmovne_reg(temp,fs); // c_ngl_d
3887 if((source[i]&0x3f)==0x3c) emit_cmovnc_reg(temp,fs); // c_lt_d
3888 if((source[i]&0x3f)==0x3d) emit_cmovnc_reg(temp,fs); // c_nge_d
3889 if((source[i]&0x3f)==0x3e) emit_cmova_reg(temp,fs); // c_le_d
3890 if((source[i]&0x3f)==0x3f) emit_cmova_reg(temp,fs); // c_ngt_d
3895 if(opcode2[i]==0x10) {
3896 emit_pushmem((int)®_cop1_simple[(source[i]>>16)&0x1f]);
3897 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3898 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
3899 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
3900 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
3901 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
3902 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
3903 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
3904 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
3905 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
3906 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
3907 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
3908 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
3909 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
3910 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
3911 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
3912 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
3913 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
3915 if(opcode2[i]==0x11) {
3916 emit_pushmem((int)®_cop1_double[(source[i]>>16)&0x1f]);
3917 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
3918 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
3919 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
3920 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
3921 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
3922 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
3923 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
3924 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
3925 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
3926 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
3927 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
3928 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
3929 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
3930 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
3931 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
3932 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
3933 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
3935 emit_addimm(ESP,8,ESP);
3937 emit_loadreg(FSREG,fs);
3941 static void float_assemble(int i,struct regstat *i_regs)
3943 signed char temp=get_reg(i_regs->regmap,-1);
3945 // Check cop1 unusable
3947 signed char cs=get_reg(i_regs->regmap,CSREG);
3949 emit_testimm(cs,0x20000000);
3952 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
3956 if((source[i]&0x3f)==6) // mov
3958 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3959 if(opcode2[i]==0x10) {
3960 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3962 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
3965 if(opcode2[i]==0x11) {
3966 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3968 emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp);
3975 if((source[i]&0x3f)>3)
3977 if(opcode2[i]==0x10) {
3978 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3980 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3981 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
3984 if(opcode2[i]==0x11) {
3985 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3987 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3988 emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp);
3991 if((source[i]&0x3f)==4) // sqrt
3993 if((source[i]&0x3f)==5) // abs
3995 if((source[i]&0x3f)==7) // neg
3997 if(opcode2[i]==0x10) {
4000 if(opcode2[i]==0x11) {
4005 if((source[i]&0x3f)<4)
4007 if(opcode2[i]==0x10) {
4008 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
4011 if(opcode2[i]==0x11) {
4012 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
4015 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
4016 if(opcode2[i]==0x10) {
4017 emit_readword((int)®_cop1_simple[(source[i]>>16)&0x1f],temp);
4018 if((source[i]&0x3f)==0) emit_fadds(temp);
4019 if((source[i]&0x3f)==1) emit_fsubs(temp);
4020 if((source[i]&0x3f)==2) emit_fmuls(temp);
4021 if((source[i]&0x3f)==3) emit_fdivs(temp);
4023 else if(opcode2[i]==0x11) {
4024 emit_readword((int)®_cop1_double[(source[i]>>16)&0x1f],temp);
4025 if((source[i]&0x3f)==0) emit_faddl(temp);
4026 if((source[i]&0x3f)==1) emit_fsubl(temp);
4027 if((source[i]&0x3f)==2) emit_fmull(temp);
4028 if((source[i]&0x3f)==3) emit_fdivl(temp);
4032 if((source[i]&0x3f)==0) emit_fadd(0);
4033 if((source[i]&0x3f)==1) emit_fsub(0);
4034 if((source[i]&0x3f)==2) emit_fmul(0);
4035 if((source[i]&0x3f)==3) emit_fdiv(0);
4037 if(opcode2[i]==0x10) {
4038 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4039 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
4043 if(opcode2[i]==0x11) {
4044 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4045 emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp);
4052 if(opcode2[i]==0x10) { // Single precision
4054 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
4055 if((source[i]&0x3f)<4)
4056 emit_pushmem((int)®_cop1_simple[(source[i]>>16)&0x1f]);
4057 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
4058 switch(source[i]&0x3f)
4060 case 0x00: emit_call((int)add_s);break;
4061 case 0x01: emit_call((int)sub_s);break;
4062 case 0x02: emit_call((int)mul_s);break;
4063 case 0x03: emit_call((int)div_s);break;
4064 case 0x04: emit_call((int)sqrt_s);break;
4065 case 0x05: emit_call((int)abs_s);break;
4066 case 0x06: emit_call((int)mov_s);break;
4067 case 0x07: emit_call((int)neg_s);break;
4069 emit_addimm(ESP,(source[i]&0x3f)<4?12:8,ESP);
4072 if(opcode2[i]==0x11) { // Double precision
4074 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
4075 if((source[i]&0x3f)<4)
4076 emit_pushmem((int)®_cop1_double[(source[i]>>16)&0x1f]);
4077 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
4078 switch(source[i]&0x3f)
4080 case 0x00: emit_call((int)add_d);break;
4081 case 0x01: emit_call((int)sub_d);break;
4082 case 0x02: emit_call((int)mul_d);break;
4083 case 0x03: emit_call((int)div_d);break;
4084 case 0x04: emit_call((int)sqrt_d);break;
4085 case 0x05: emit_call((int)abs_d);break;
4086 case 0x06: emit_call((int)mov_d);break;
4087 case 0x07: emit_call((int)neg_d);break;
4089 emit_addimm(ESP,(source[i]&0x3f)<4?12:8,ESP);
4094 static void multdiv_assemble_x86(int i,struct regstat *i_regs)
4101 // case 0x1D: DMULTU
4106 if((opcode2[i]&4)==0) // 32-bit
4108 if(opcode2[i]==0x18) // MULT
4110 char m1=get_reg(i_regs->regmap,rs1[i]);
4111 char m2=get_reg(i_regs->regmap,rs2[i]);
4117 if(opcode2[i]==0x19) // MULTU
4119 char m1=get_reg(i_regs->regmap,rs1[i]);
4120 char m2=get_reg(i_regs->regmap,rs2[i]);
4126 if(opcode2[i]==0x1A) // DIV
4128 char d1=get_reg(i_regs->regmap,rs1[i]);
4129 char d2=get_reg(i_regs->regmap,rs2[i]);
4135 emit_jeq((int)out+8);
4138 if(opcode2[i]==0x1B) // DIVU
4140 char d1=get_reg(i_regs->regmap,rs1[i]);
4141 char d2=get_reg(i_regs->regmap,rs2[i]);
4147 emit_jeq((int)out+8);
4153 if(opcode2[i]==0x1C) // DMULT
4155 char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4156 char m1l=get_reg(i_regs->regmap,rs1[i]);
4157 char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4158 char m2l=get_reg(i_regs->regmap,rs2[i]);
4167 emit_call((int)&mult64);
4172 char hih=get_reg(i_regs->regmap,HIREG|64);
4173 char hil=get_reg(i_regs->regmap,HIREG);
4174 if(hih>=0) emit_loadreg(HIREG|64,hih);
4175 if(hil>=0) emit_loadreg(HIREG,hil);
4176 char loh=get_reg(i_regs->regmap,LOREG|64);
4177 char lol=get_reg(i_regs->regmap,LOREG);
4178 if(loh>=0) emit_loadreg(LOREG|64,loh);
4179 if(lol>=0) emit_loadreg(LOREG,lol);
4181 if(opcode2[i]==0x1D) // DMULTU
4183 char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4184 char m1l=get_reg(i_regs->regmap,rs1[i]);
4185 char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4186 char m2l=get_reg(i_regs->regmap,rs2[i]);
4187 char temp=get_reg(i_regs->regmap,-1);
4195 emit_storereg(LOREG,EAX);
4199 emit_add(EAX,temp,temp);
4201 emit_storereg(HIREG,EDX);
4204 emit_add(EAX,temp,temp);
4206 emit_storereg(LOREG|64,temp);
4210 emit_add(EAX,temp,EAX);
4211 emit_loadreg(HIREG,temp);
4213 emit_add(EAX,temp,EAX);
4221 emit_call((int)&multu64);
4226 char hih=get_reg(i_regs->regmap,HIREG|64);
4227 char hil=get_reg(i_regs->regmap,HIREG);
4228 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
4229 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
4231 // Shouldn't be necessary
4232 //char loh=get_reg(i_regs->regmap,LOREG|64);
4233 //char lol=get_reg(i_regs->regmap,LOREG);
4234 //if(loh>=0) emit_loadreg(LOREG|64,loh);
4235 //if(lol>=0) emit_loadreg(LOREG,lol);
4237 if(opcode2[i]==0x1E) // DDIV
4239 char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4240 char d1l=get_reg(i_regs->regmap,rs1[i]);
4241 char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4242 char d2l=get_reg(i_regs->regmap,rs2[i]);
4247 //emit_pushreg(d2h);
4248 //emit_pushreg(d2l);
4249 //emit_pushreg(d1h);
4250 //emit_pushreg(d1l);
4251 emit_addimm(ESP,-16,ESP);
4252 emit_writeword_indexed(d2h,12,ESP);
4253 emit_writeword_indexed(d2l,8,ESP);
4254 emit_writeword_indexed(d1h,4,ESP);
4255 emit_writeword_indexed(d1l,0,ESP);
4256 emit_call((int)&div64);
4261 emit_readword_indexed(0,ESP,d1l);
4262 emit_readword_indexed(4,ESP,d1h);
4263 emit_readword_indexed(8,ESP,d2l);
4264 emit_readword_indexed(12,ESP,d2h);
4265 emit_addimm(ESP,16,ESP);
4266 char hih=get_reg(i_regs->regmap,HIREG|64);
4267 char hil=get_reg(i_regs->regmap,HIREG);
4268 char loh=get_reg(i_regs->regmap,LOREG|64);
4269 char lol=get_reg(i_regs->regmap,LOREG);
4270 if(hih>=0) emit_loadreg(HIREG|64,hih);
4271 if(hil>=0) emit_loadreg(HIREG,hil);
4272 if(loh>=0) emit_loadreg(LOREG|64,loh);
4273 if(lol>=0) emit_loadreg(LOREG,lol);
4275 if(opcode2[i]==0x1F) // DDIVU
4277 char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4278 char d1l=get_reg(i_regs->regmap,rs1[i]);
4279 char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4280 char d2l=get_reg(i_regs->regmap,rs2[i]);
4285 //emit_pushreg(d2h);
4286 //emit_pushreg(d2l);
4287 //emit_pushreg(d1h);
4288 //emit_pushreg(d1l);
4289 emit_addimm(ESP,-16,ESP);
4290 emit_writeword_indexed(d2h,12,ESP);
4291 emit_writeword_indexed(d2l,8,ESP);
4292 emit_writeword_indexed(d1h,4,ESP);
4293 emit_writeword_indexed(d1l,0,ESP);
4294 emit_call((int)&divu64);
4299 emit_readword_indexed(0,ESP,d1l);
4300 emit_readword_indexed(4,ESP,d1h);
4301 emit_readword_indexed(8,ESP,d2l);
4302 emit_readword_indexed(12,ESP,d2h);
4303 emit_addimm(ESP,16,ESP);
4304 char hih=get_reg(i_regs->regmap,HIREG|64);
4305 char hil=get_reg(i_regs->regmap,HIREG);
4306 char loh=get_reg(i_regs->regmap,LOREG|64);
4307 char lol=get_reg(i_regs->regmap,LOREG);
4308 if(hih>=0) emit_loadreg(HIREG|64,hih);
4309 if(hil>=0) emit_loadreg(HIREG,hil);
4310 if(loh>=0) emit_loadreg(LOREG|64,loh);
4311 if(lol>=0) emit_loadreg(LOREG,lol);
4317 // Multiply by zero is zero.
4318 // MIPS does not have a divide by zero exception.
4319 // The result is undefined, we return zero.
4320 char hr=get_reg(i_regs->regmap,HIREG);
4321 char lr=get_reg(i_regs->regmap,LOREG);
4322 if(hr>=0) emit_zeroreg(hr);
4323 if(lr>=0) emit_zeroreg(lr);
4326 #define multdiv_assemble multdiv_assemble_x86
4328 static void do_preload_rhash(int r) {
4329 emit_movimm(0xf8,r);
4332 static void do_preload_rhtbl(int r) {
4333 // Don't need this for x86
4336 static void do_rhash(int rs,int rh) {
4340 static void do_miniht_load(int ht,int rh) {
4341 // Don't need this for x86. The load and compare can be combined into
4342 // a single instruction (below)
4345 static void do_miniht_jump(int rs,int rh,int ht) {
4346 emit_cmpmem_indexed((int)mini_ht,rh,rs);
4347 emit_jne(jump_vaddr_reg[rs]);
4348 emit_jmpmem_indexed((int)mini_ht+4,rh);
4351 static void do_miniht_insert(int return_address,int rt,int temp) {
4352 emit_movimm(return_address,rt); // PC into link register
4353 //emit_writeword_imm(return_address,(int)&mini_ht[(return_address&0xFF)>>8][0]);
4354 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4355 add_to_linker((int)out,return_address,1);
4356 emit_writeword_imm(0,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4359 // We don't need this for x86
4360 static void literal_pool(int n) {}
4361 static void literal_pool_jumpover(int n) {}
4363 // CPU-architecture-specific initialization, not needed for x86
4364 static void arch_init() {}