1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_x86.c *
3 * Copyright (C) 2009-2010 Ari64 *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
24 int pending_exception;
26 uint64_t readmem_dword;
27 precomp_instr fake_pc;
28 u_int memory_map[1048576];
29 u_int mini_ht[32][2] __attribute__((aligned(8)));
30 u_char restore_candidate[512] __attribute__((aligned(4)));
33 void jump_vaddr_eax();
34 void jump_vaddr_ecx();
35 void jump_vaddr_edx();
36 void jump_vaddr_ebx();
37 void jump_vaddr_ebp();
38 void jump_vaddr_edi();
40 const u_int jump_vaddr_reg[8] = {
48 (int)jump_vaddr_edi };
50 const u_short rounding_modes[4] = {
58 // We need these for cmovcc instructions on x86
64 void set_jump_target(int addr,int target)
66 u_char *ptr=(u_char *)addr;
69 assert(ptr[1]>=0x80&&ptr[1]<=0x8f);
70 u_int *ptr2=(u_int *)(ptr+2);
71 *ptr2=target-(int)ptr2-4;
73 else if(*ptr==0xe8||*ptr==0xe9) {
74 u_int *ptr2=(u_int *)(ptr+1);
75 *ptr2=target-(int)ptr2-4;
79 assert(*ptr==0xc7); /* mov immediate (store address) */
80 u_int *ptr2=(u_int *)(ptr+6);
85 void kill_pointer(void *stub)
87 int *i_ptr=*((int **)(stub+6));
88 *i_ptr=(int)stub-(int)i_ptr-4;
90 int get_pointer(void *stub)
92 int *i_ptr=*((int **)(stub+6));
93 return *i_ptr+(int)i_ptr+4;
96 // Find the "clean" entry point from a "dirty" entry point
97 // by skipping past the call to verify_code
98 u_int get_clean_addr(int addr)
100 u_char *ptr=(u_char *)addr;
101 assert(ptr[20]==0xE8); // call instruction
102 assert(ptr[25]==0x83); // pop (add esp,4) instruction
103 if(ptr[28]==0xE9) return *(u_int *)(ptr+29)+addr+33; // follow jmp
104 else return(addr+28);
107 int verify_dirty(int addr)
109 u_char *ptr=(u_char *)addr;
110 assert(ptr[5]==0xB8);
111 u_int source=*(u_int *)(ptr+6);
112 u_int copy=*(u_int *)(ptr+11);
113 u_int len=*(u_int *)(ptr+16);
114 assert(ptr[20]==0xE8); // call instruction
115 u_int verifier=*(u_int *)(ptr+21)+(u_int)ptr+25;
116 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
117 unsigned int page=source>>12;
118 unsigned int map_value=memory_map[page];
119 if(map_value>=0x80000000) return 0;
120 while(page<((source+len-1)>>12)) {
121 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
123 source = source+(map_value<<2);
125 //printf("verify_dirty: %x %x %x\n",source,copy,len);
126 return !memcmp((void *)source,(void *)copy,len);
129 // This doesn't necessarily find all clean entry points, just
130 // guarantees that it's not dirty
131 int isclean(int addr)
133 u_char *ptr=(u_char *)addr;
134 if(ptr[5]!=0xB8) return 1; // mov imm,%eax
135 if(ptr[10]!=0xBB) return 1; // mov imm,%ebx
136 if(ptr[15]!=0xB9) return 1; // mov imm,%ecx
137 if(ptr[20]!=0xE8) return 1; // call instruction
138 if(ptr[25]!=0x83) return 1; // pop (add esp,4) instruction
142 void get_bounds(int addr,u_int *start,u_int *end)
144 u_char *ptr=(u_char *)addr;
145 assert(ptr[5]==0xB8);
146 u_int source=*(u_int *)(ptr+6);
147 //u_int copy=*(u_int *)(ptr+11);
148 u_int len=*(u_int *)(ptr+16);
149 assert(ptr[20]==0xE8); // call instruction
150 u_int verifier=*(u_int *)(ptr+21)+(u_int)ptr+25;
151 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
152 if(memory_map[source>>12]>=0x80000000) source = 0;
153 else source = source+(memory_map[source>>12]<<2);
155 if(start) *start=source;
156 if(end) *end=source+len;
159 /* Register allocation */
161 // Note: registers are allocated clean (unmodified state)
162 // if you intend to modify the register, you must call dirty_reg().
163 void alloc_reg(struct regstat *cur,int i,signed char reg)
166 int preferred_reg = (reg&3)+(reg>28)*4-(reg==32)+2*(reg==36)-(reg==40);
168 // Don't allocate unused registers
169 if((cur->u>>reg)&1) return;
171 // see if it's already allocated
172 for(hr=0;hr<HOST_REGS;hr++)
174 if(cur->regmap[hr]==reg) return;
177 // Keep the same mapping if the register was already allocated in a loop
178 preferred_reg = loop_reg(i,reg,preferred_reg);
180 // Try to allocate the preferred register
181 if(cur->regmap[preferred_reg]==-1) {
182 cur->regmap[preferred_reg]=reg;
183 cur->dirty&=~(1<<preferred_reg);
184 cur->isconst&=~(1<<preferred_reg);
187 r=cur->regmap[preferred_reg];
188 if(r<64&&((cur->u>>r)&1)) {
189 cur->regmap[preferred_reg]=reg;
190 cur->dirty&=~(1<<preferred_reg);
191 cur->isconst&=~(1<<preferred_reg);
194 if(r>=64&&((cur->uu>>(r&63))&1)) {
195 cur->regmap[preferred_reg]=reg;
196 cur->dirty&=~(1<<preferred_reg);
197 cur->isconst&=~(1<<preferred_reg);
201 // Try to allocate EAX, EBX, ECX, or EDX
202 // We prefer these because they can do byte and halfword loads
203 for(hr=0;hr<4;hr++) {
204 if(cur->regmap[hr]==-1) {
206 cur->dirty&=~(1<<hr);
207 cur->isconst&=~(1<<hr);
212 // Clear any unneeded registers
213 // We try to keep the mapping consistent, if possible, because it
214 // makes branches easier (especially loops). So we try to allocate
215 // first (see above) before removing old mappings. If this is not
216 // possible then go ahead and clear out the registers that are no
218 for(hr=0;hr<HOST_REGS;hr++)
224 if(i==0||(unneeded_reg[i-1]>>r)&1) {cur->regmap[hr]=-1;break;}
228 if((cur->uu>>(r&63))&1)
229 if(i==0||(unneeded_reg_upper[i-1]>>(r&63))&1) {cur->regmap[hr]=-1;break;}
233 // Try to allocate any available register, but prefer
234 // registers that have not been used recently.
236 for(hr=0;hr<HOST_REGS;hr++) {
237 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
238 if(regs[i-1].regmap[hr]!=rs1[i-1]&®s[i-1].regmap[hr]!=rs2[i-1]&®s[i-1].regmap[hr]!=rt1[i-1]&®s[i-1].regmap[hr]!=rt2[i-1]) {
240 cur->dirty&=~(1<<hr);
241 cur->isconst&=~(1<<hr);
247 // Try to allocate any available register
248 for(hr=0;hr<HOST_REGS;hr++) {
249 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
251 cur->dirty&=~(1<<hr);
252 cur->isconst&=~(1<<hr);
257 // Ok, now we have to evict someone
258 // Pick a register we hopefully won't need soon
259 u_char hsn[MAXREG+1];
260 memset(hsn,10,sizeof(hsn));
262 lsn(hsn,i,&preferred_reg);
263 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
265 // Don't evict the cycle count at entry points, otherwise the entry
266 // stub will have to write it.
267 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
268 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
271 // Alloc preferred register if available
272 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
273 for(hr=0;hr<HOST_REGS;hr++) {
274 // Evict both parts of a 64-bit register
275 if((cur->regmap[hr]&63)==r) {
277 cur->dirty&=~(1<<hr);
278 cur->isconst&=~(1<<hr);
281 cur->regmap[preferred_reg]=reg;
284 for(r=1;r<=MAXREG;r++)
286 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
287 for(hr=0;hr<HOST_REGS;hr++) {
288 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
289 if(cur->regmap[hr]==r+64) {
291 cur->dirty&=~(1<<hr);
292 cur->isconst&=~(1<<hr);
297 for(hr=0;hr<HOST_REGS;hr++) {
298 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
299 if(cur->regmap[hr]==r) {
301 cur->dirty&=~(1<<hr);
302 cur->isconst&=~(1<<hr);
313 for(r=1;r<=MAXREG;r++)
316 for(hr=0;hr<HOST_REGS;hr++) {
317 if(cur->regmap[hr]==r+64) {
319 cur->dirty&=~(1<<hr);
320 cur->isconst&=~(1<<hr);
324 for(hr=0;hr<HOST_REGS;hr++) {
325 if(cur->regmap[hr]==r) {
327 cur->dirty&=~(1<<hr);
328 cur->isconst&=~(1<<hr);
335 printf("This shouldn't happen (alloc_reg)");exit(1);
338 void alloc_reg64(struct regstat *cur,int i,signed char reg)
340 int preferred_reg = 5+reg%3;
343 // allocate the lower 32 bits
344 alloc_reg(cur,i,reg);
346 // Don't allocate unused registers
347 if((cur->uu>>reg)&1) return;
349 // see if the upper half is already allocated
350 for(hr=0;hr<HOST_REGS;hr++)
352 if(cur->regmap[hr]==reg+64) return;
355 // Keep the same mapping if the register was already allocated in a loop
356 preferred_reg = loop_reg(i,reg,preferred_reg);
358 // Try to allocate the preferred register
359 if(cur->regmap[preferred_reg]==-1) {
360 cur->regmap[preferred_reg]=reg|64;
361 cur->dirty&=~(1<<preferred_reg);
362 cur->isconst&=~(1<<preferred_reg);
365 r=cur->regmap[preferred_reg];
366 if(r<64&&((cur->u>>r)&1)) {
367 cur->regmap[preferred_reg]=reg|64;
368 cur->dirty&=~(1<<preferred_reg);
369 cur->isconst&=~(1<<preferred_reg);
372 if(r>=64&&((cur->uu>>(r&63))&1)) {
373 cur->regmap[preferred_reg]=reg|64;
374 cur->dirty&=~(1<<preferred_reg);
375 cur->isconst&=~(1<<preferred_reg);
379 // Try to allocate EBP, ESI or EDI
380 for(hr=5;hr<8;hr++) {
381 if(cur->regmap[hr]==-1) {
382 cur->regmap[hr]=reg|64;
383 cur->dirty&=~(1<<hr);
384 cur->isconst&=~(1<<hr);
389 // Clear any unneeded registers
390 // We try to keep the mapping consistent, if possible, because it
391 // makes branches easier (especially loops). So we try to allocate
392 // first (see above) before removing old mappings. If this is not
393 // possible then go ahead and clear out the registers that are no
395 for(hr=HOST_REGS-1;hr>=0;hr--)
400 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
404 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
408 // Try to allocate any available register, but prefer
409 // registers that have not been used recently.
411 for(hr=0;hr<HOST_REGS;hr++) {
412 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
413 if(regs[i-1].regmap[hr]!=rs1[i-1]&®s[i-1].regmap[hr]!=rs2[i-1]&®s[i-1].regmap[hr]!=rt1[i-1]&®s[i-1].regmap[hr]!=rt2[i-1]) {
414 cur->regmap[hr]=reg|64;
415 cur->dirty&=~(1<<hr);
416 cur->isconst&=~(1<<hr);
422 // Try to allocate any available register
423 for(hr=0;hr<HOST_REGS;hr++) {
424 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
425 cur->regmap[hr]=reg|64;
426 cur->dirty&=~(1<<hr);
427 cur->isconst&=~(1<<hr);
432 // Ok, now we have to evict someone
433 // Pick a register we hopefully won't need soon
434 u_char hsn[MAXREG+1];
435 memset(hsn,10,sizeof(hsn));
437 lsn(hsn,i,&preferred_reg);
438 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
439 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
441 // Don't evict the cycle count at entry points, otherwise the entry
442 // stub will have to write it.
443 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
444 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
447 // Alloc preferred register if available
448 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
449 for(hr=0;hr<HOST_REGS;hr++) {
450 // Evict both parts of a 64-bit register
451 if((cur->regmap[hr]&63)==r) {
453 cur->dirty&=~(1<<hr);
454 cur->isconst&=~(1<<hr);
457 cur->regmap[preferred_reg]=reg|64;
460 for(r=1;r<=MAXREG;r++)
462 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
463 for(hr=0;hr<HOST_REGS;hr++) {
464 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
465 if(cur->regmap[hr]==r+64) {
466 cur->regmap[hr]=reg|64;
467 cur->dirty&=~(1<<hr);
468 cur->isconst&=~(1<<hr);
473 for(hr=0;hr<HOST_REGS;hr++) {
474 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
475 if(cur->regmap[hr]==r) {
476 cur->regmap[hr]=reg|64;
477 cur->dirty&=~(1<<hr);
478 cur->isconst&=~(1<<hr);
489 for(r=1;r<=MAXREG;r++)
492 for(hr=0;hr<HOST_REGS;hr++) {
493 if(cur->regmap[hr]==r+64) {
494 cur->regmap[hr]=reg|64;
495 cur->dirty&=~(1<<hr);
496 cur->isconst&=~(1<<hr);
500 for(hr=0;hr<HOST_REGS;hr++) {
501 if(cur->regmap[hr]==r) {
502 cur->regmap[hr]=reg|64;
503 cur->dirty&=~(1<<hr);
504 cur->isconst&=~(1<<hr);
511 printf("This shouldn't happen");exit(1);
514 // Allocate a temporary register. This is done without regard to
515 // dirty status or whether the register we request is on the unneeded list
516 // Note: This will only allocate one register, even if called multiple times
517 void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
520 int preferred_reg = -1;
522 // see if it's already allocated
523 for(hr=0;hr<HOST_REGS;hr++)
525 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
528 // Try to allocate any available register, starting with EDI, ESI, EBP...
529 // We prefer EDI, ESI, EBP since the others are used for byte/halfword stores
530 for(hr=HOST_REGS-1;hr>=0;hr--) {
531 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
533 cur->dirty&=~(1<<hr);
534 cur->isconst&=~(1<<hr);
539 // Find an unneeded register
540 for(hr=HOST_REGS-1;hr>=0;hr--)
546 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
548 cur->dirty&=~(1<<hr);
549 cur->isconst&=~(1<<hr);
556 if((cur->uu>>(r&63))&1) {
557 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
559 cur->dirty&=~(1<<hr);
560 cur->isconst&=~(1<<hr);
568 // Ok, now we have to evict someone
569 // Pick a register we hopefully won't need soon
570 // TODO: we might want to follow unconditional jumps here
571 // TODO: get rid of dupe code and make this into a function
572 u_char hsn[MAXREG+1];
573 memset(hsn,10,sizeof(hsn));
575 lsn(hsn,i,&preferred_reg);
576 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
578 // Don't evict the cycle count at entry points, otherwise the entry
579 // stub will have to write it.
580 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
581 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
584 for(r=1;r<=MAXREG;r++)
586 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
587 for(hr=0;hr<HOST_REGS;hr++) {
588 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
589 if(cur->regmap[hr]==r+64) {
591 cur->dirty&=~(1<<hr);
592 cur->isconst&=~(1<<hr);
597 for(hr=0;hr<HOST_REGS;hr++) {
598 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
599 if(cur->regmap[hr]==r) {
601 cur->dirty&=~(1<<hr);
602 cur->isconst&=~(1<<hr);
613 for(r=1;r<=MAXREG;r++)
616 for(hr=0;hr<HOST_REGS;hr++) {
617 if(cur->regmap[hr]==r+64) {
619 cur->dirty&=~(1<<hr);
620 cur->isconst&=~(1<<hr);
624 for(hr=0;hr<HOST_REGS;hr++) {
625 if(cur->regmap[hr]==r) {
627 cur->dirty&=~(1<<hr);
628 cur->isconst&=~(1<<hr);
635 printf("This shouldn't happen");exit(1);
637 // Allocate a specific x86 register.
638 void alloc_x86_reg(struct regstat *cur,int i,signed char reg,char hr)
642 // see if it's already allocated (and dealloc it)
643 for(n=0;n<HOST_REGS;n++)
645 if(n!=ESP&&cur->regmap[n]==reg) {cur->regmap[n]=-1;}
649 cur->dirty&=~(1<<hr);
650 cur->isconst&=~(1<<hr);
653 // Alloc cycle count into dedicated register
654 alloc_cc(struct regstat *cur,int i)
656 alloc_x86_reg(cur,i,CCREG,ESI);
661 void multdiv_alloc_x86(struct regstat *current,int i)
671 clear_const(current,rs1[i]);
672 clear_const(current,rs2[i]);
675 if((opcode2[i]&4)==0) // 32-bit
677 current->u&=~(1LL<<HIREG);
678 current->u&=~(1LL<<LOREG);
679 alloc_x86_reg(current,i,HIREG,EDX);
680 alloc_x86_reg(current,i,LOREG,EAX);
681 alloc_reg(current,i,rs1[i]);
682 alloc_reg(current,i,rs2[i]);
683 current->is32|=1LL<<HIREG;
684 current->is32|=1LL<<LOREG;
685 dirty_reg(current,HIREG);
686 dirty_reg(current,LOREG);
690 alloc_x86_reg(current,i,HIREG|64,EDX);
691 alloc_x86_reg(current,i,HIREG,EAX);
692 alloc_reg64(current,i,rs1[i]);
693 alloc_reg64(current,i,rs2[i]);
694 alloc_all(current,i);
695 current->is32&=~(1LL<<HIREG);
696 current->is32&=~(1LL<<LOREG);
697 dirty_reg(current,HIREG);
698 dirty_reg(current,LOREG);
703 // Multiply by zero is zero.
704 // MIPS does not have a divide by zero exception.
705 // The result is undefined, we return zero.
706 alloc_reg(current,i,HIREG);
707 alloc_reg(current,i,LOREG);
708 current->is32|=1LL<<HIREG;
709 current->is32|=1LL<<LOREG;
710 dirty_reg(current,HIREG);
711 dirty_reg(current,LOREG);
714 #define multdiv_alloc multdiv_alloc_x86
718 char regname[8][4] = {
728 void output_byte(u_char byte)
732 void output_modrm(u_char mod,u_char rm,u_char ext)
737 u_char byte=(mod<<6)|(ext<<3)|rm;
740 void output_sib(u_char scale,u_char index,u_char base)
745 u_char byte=(scale<<6)|(index<<3)|base;
748 void output_w32(u_int word)
750 *((u_int *)out)=word;
754 void emit_mov(int rs,int rt)
756 assem_debug("mov %%%s,%%%s\n",regname[rs],regname[rt]);
758 output_modrm(3,rt,rs);
761 void emit_add(int rs1,int rs2,int rt)
764 assem_debug("add %%%s,%%%s\n",regname[rs2],regname[rs1]);
766 output_modrm(3,rs1,rs2);
768 assem_debug("add %%%s,%%%s\n",regname[rs1],regname[rs2]);
770 output_modrm(3,rs2,rs1);
772 assem_debug("lea (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
775 output_modrm(0,4,rt);
776 output_sib(0,rs2,rs1);
778 output_modrm(0,4,rt);
779 output_sib(0,rs1,rs2);
780 }else /* lea 0(,%ebp,2) */{
781 output_modrm(0,4,rt);
788 void emit_adds(int rs1,int rs2,int rt)
790 emit_add(rs1,rs2,rt);
793 void emit_lea8(int rs1,int rt)
795 assem_debug("lea 0(%%%s,8),%%%s\n",regname[rs1],regname[rt]);
797 output_modrm(0,4,rt);
801 void emit_leairrx1(int imm,int rs1,int rs2,int rt)
803 assem_debug("lea %x(%%%s,%%%s,1),%%%s\n",imm,regname[rs1],regname[rs2],regname[rt]);
805 if(imm!=0||rs1==EBP) {
806 output_modrm(2,4,rt);
807 output_sib(0,rs2,rs1);
810 output_modrm(0,4,rt);
811 output_sib(0,rs2,rs1);
814 void emit_leairrx4(int imm,int rs1,int rs2,int rt)
816 assem_debug("lea %x(%%%s,%%%s,4),%%%s\n",imm,regname[rs1],regname[rs2],regname[rt]);
818 if(imm!=0||rs1==EBP) {
819 output_modrm(2,4,rt);
820 output_sib(2,rs2,rs1);
823 output_modrm(0,4,rt);
824 output_sib(2,rs2,rs1);
828 void emit_neg(int rs, int rt)
830 if(rs!=rt) emit_mov(rs,rt);
831 assem_debug("neg %%%s\n",regname[rt]);
833 output_modrm(3,rt,3);
836 void emit_negs(int rs, int rt)
841 void emit_sub(int rs1,int rs2,int rt)
844 assem_debug("sub %%%s,%%%s\n",regname[rs2],regname[rs1]);
846 output_modrm(3,rs1,rs2);
849 emit_add(rs2,rs1,rs2);
856 void emit_subs(int rs1,int rs2,int rt)
858 emit_sub(rs1,rs2,rt);
861 void emit_zeroreg(int rt)
864 output_modrm(3,rt,rt);
865 assem_debug("xor %%%s,%%%s\n",regname[rt],regname[rt]);
868 void emit_loadreg(int r, int hr)
873 int addr=((int)reg)+((r&63)<<3)+((r&64)>>4);
874 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
875 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
876 if(r==CCREG) addr=(int)&cycle_count;
877 if(r==CSREG) addr=(int)&Status;
878 if(r==FSREG) addr=(int)&FCR31;
879 assem_debug("mov %x+%d,%%%s\n",addr,r,regname[hr]);
881 output_modrm(0,5,hr);
885 void emit_storereg(int r, int hr)
887 int addr=((int)reg)+((r&63)<<3)+((r&64)>>4);
888 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
889 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
890 if(r==CCREG) addr=(int)&cycle_count;
891 if(r==FSREG) addr=(int)&FCR31;
892 assem_debug("mov %%%s,%x+%d\n",regname[hr],addr,r);
894 output_modrm(0,5,hr);
898 void emit_test(int rs, int rt)
900 assem_debug("test %%%s,%%%s\n",regname[rs],regname[rt]);
902 output_modrm(3,rs,rt);
905 void emit_testimm(int rs,int imm)
907 assem_debug("test $0x%x,%%%s\n",imm,regname[rs]);
908 if(imm<128&&imm>=-128&&rs<4) {
910 output_modrm(3,rs,0);
916 output_modrm(3,rs,0);
921 void emit_not(int rs,int rt)
923 if(rs!=rt) emit_mov(rs,rt);
924 assem_debug("not %%%s\n",regname[rt]);
926 output_modrm(3,rt,2);
929 void emit_and(u_int rs1,u_int rs2,u_int rt)
935 assem_debug("and %%%s,%%%s\n",regname[rs2],regname[rt]);
937 output_modrm(3,rs1,rs2);
941 assem_debug("and %%%s,%%%s\n",regname[rs1],regname[rt]);
943 output_modrm(3,rs2,rs1);
951 void emit_or(u_int rs1,u_int rs2,u_int rt)
957 assem_debug("or %%%s,%%%s\n",regname[rs2],regname[rt]);
959 output_modrm(3,rs1,rs2);
963 assem_debug("or %%%s,%%%s\n",regname[rs1],regname[rt]);
965 output_modrm(3,rs2,rs1);
972 void emit_or_and_set_flags(int rs1,int rs2,int rt)
977 void emit_xor(u_int rs1,u_int rs2,u_int rt)
983 assem_debug("xor %%%s,%%%s\n",regname[rs2],regname[rt]);
985 output_modrm(3,rs1,rs2);
989 assem_debug("xor %%%s,%%%s\n",regname[rs1],regname[rt]);
991 output_modrm(3,rs2,rs1);
999 void emit_movimm(int imm,u_int rt)
1001 assem_debug("mov $%d,%%%s\n",imm,regname[rt]);
1003 output_byte(0xB8+rt);
1007 void emit_addimm(int rs,int imm,int rt)
1011 assem_debug("add $%d,%%%s\n",imm,regname[rt]);
1012 if(imm<128&&imm>=-128) {
1014 output_modrm(3,rt,0);
1020 output_modrm(3,rt,0);
1027 assem_debug("lea %d(%%%s),%%%s\n",imm,regname[rs],regname[rt]);
1029 if(imm<128&&imm>=-128) {
1030 output_modrm(1,rs,rt);
1033 output_modrm(2,rs,rt);
1042 void emit_addimm_and_set_flags(int imm,int rt)
1044 assem_debug("add $%d,%%%s\n",imm,regname[rt]);
1045 if(imm<128&&imm>=-128) {
1047 output_modrm(3,rt,0);
1053 output_modrm(3,rt,0);
1057 void emit_addimm_no_flags(int imm,int rt)
1060 assem_debug("lea %d(%%%s),%%%s\n",imm,regname[rt],regname[rt]);
1062 if(imm<128&&imm>=-128) {
1063 output_modrm(1,rt,rt);
1066 output_modrm(2,rt,rt);
1072 void emit_adcimm(int imm,u_int rt)
1074 assem_debug("adc $%d,%%%s\n",imm,regname[rt]);
1076 if(imm<128&&imm>=-128) {
1078 output_modrm(3,rt,2);
1084 output_modrm(3,rt,2);
1088 void emit_sbbimm(int imm,u_int rt)
1090 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1092 if(imm<128&&imm>=-128) {
1094 output_modrm(3,rt,3);
1100 output_modrm(3,rt,3);
1105 void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1107 if(rsh==rth&&rsl==rtl) {
1108 assem_debug("add $%d,%%%s\n",imm,regname[rtl]);
1109 if(imm<128&&imm>=-128) {
1111 output_modrm(3,rtl,0);
1117 output_modrm(3,rtl,0);
1120 assem_debug("adc $%d,%%%s\n",imm>>31,regname[rth]);
1122 output_modrm(3,rth,2);
1123 output_byte(imm>>31);
1128 emit_addimm64_32(rth,rtl,imm,rth,rtl);
1132 void emit_sbb(int rs1,int rs2)
1134 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1136 output_modrm(3,rs1,rs2);
1139 void emit_andimm(int rs,int imm,int rt)
1142 assem_debug("and $%d,%%%s\n",imm,regname[rt]);
1143 if(imm<128&&imm>=-128) {
1145 output_modrm(3,rt,4);
1151 output_modrm(3,rt,4);
1157 emit_andimm(rt,imm,rt);
1161 void emit_orimm(int rs,int imm,int rt)
1164 assem_debug("or $%d,%%%s\n",imm,regname[rt]);
1165 if(imm<128&&imm>=-128) {
1167 output_modrm(3,rt,1);
1173 output_modrm(3,rt,1);
1179 emit_orimm(rt,imm,rt);
1183 void emit_xorimm(int rs,int imm,int rt)
1186 assem_debug("xor $%d,%%%s\n",imm,regname[rt]);
1187 if(imm<128&&imm>=-128) {
1189 output_modrm(3,rt,6);
1195 output_modrm(3,rt,6);
1201 emit_xorimm(rt,imm,rt);
1205 void emit_shlimm(int rs,u_int imm,int rt)
1208 assem_debug("shl %%%s,%d\n",regname[rt],imm);
1210 if(imm==1) output_byte(0xD1);
1211 else output_byte(0xC1);
1212 output_modrm(3,rt,4);
1213 if(imm>1) output_byte(imm);
1217 emit_shlimm(rt,imm,rt);
1221 void emit_shrimm(int rs,u_int imm,int rt)
1224 assem_debug("shr %%%s,%d\n",regname[rt],imm);
1226 if(imm==1) output_byte(0xD1);
1227 else output_byte(0xC1);
1228 output_modrm(3,rt,5);
1229 if(imm>1) output_byte(imm);
1233 emit_shrimm(rt,imm,rt);
1237 void emit_sarimm(int rs,u_int imm,int rt)
1240 assem_debug("sar %%%s,%d\n",regname[rt],imm);
1242 if(imm==1) output_byte(0xD1);
1243 else output_byte(0xC1);
1244 output_modrm(3,rt,7);
1245 if(imm>1) output_byte(imm);
1249 emit_sarimm(rt,imm,rt);
1253 void emit_rorimm(int rs,u_int imm,int rt)
1256 assem_debug("ror %%%s,%d\n",regname[rt],imm);
1258 if(imm==1) output_byte(0xD1);
1259 else output_byte(0xC1);
1260 output_modrm(3,rt,1);
1261 if(imm>1) output_byte(imm);
1265 emit_sarimm(rt,imm,rt);
1269 void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1272 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1276 output_modrm(3,rt,rs2);
1281 emit_shldimm(rt,rs2,imm,rt);
1285 void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1288 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1292 output_modrm(3,rt,rs2);
1297 emit_shrdimm(rt,rs2,imm,rt);
1301 void emit_shlcl(int r)
1303 assem_debug("shl %%%s,%%cl\n",regname[r]);
1305 output_modrm(3,r,4);
1307 void emit_shrcl(int r)
1309 assem_debug("shr %%%s,%%cl\n",regname[r]);
1311 output_modrm(3,r,5);
1313 void emit_sarcl(int r)
1315 assem_debug("sar %%%s,%%cl\n",regname[r]);
1317 output_modrm(3,r,7);
1320 void emit_shldcl(int r1,int r2)
1322 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1325 output_modrm(3,r1,r2);
1327 void emit_shrdcl(int r1,int r2)
1329 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1332 output_modrm(3,r1,r2);
1335 void emit_cmpimm(int rs,int imm)
1337 assem_debug("cmp $%d,%%%s\n",imm,regname[rs]);
1338 if(imm<128&&imm>=-128) {
1340 output_modrm(3,rs,7);
1346 output_modrm(3,rs,7);
1351 void emit_cmovne(u_int *addr,int rt)
1353 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1354 if(addr==&const_zero) assem_debug(" [zero]\n");
1355 else if(addr==&const_one) assem_debug(" [one]\n");
1356 else assem_debug("\n");
1359 output_modrm(0,5,rt);
1360 output_w32((int)addr);
1362 void emit_cmovl(u_int *addr,int rt)
1364 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1365 if(addr==&const_zero) assem_debug(" [zero]\n");
1366 else if(addr==&const_one) assem_debug(" [one]\n");
1367 else assem_debug("\n");
1370 output_modrm(0,5,rt);
1371 output_w32((int)addr);
1373 void emit_cmovs(u_int *addr,int rt)
1375 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1376 if(addr==&const_zero) assem_debug(" [zero]\n");
1377 else if(addr==&const_one) assem_debug(" [one]\n");
1378 else assem_debug("\n");
1381 output_modrm(0,5,rt);
1382 output_w32((int)addr);
1384 void emit_cmovne_reg(int rs,int rt)
1386 assem_debug("cmovne %%%s,%%%s\n",regname[rs],regname[rt]);
1389 output_modrm(3,rs,rt);
1391 void emit_cmovl_reg(int rs,int rt)
1393 assem_debug("cmovl %%%s,%%%s\n",regname[rs],regname[rt]);
1396 output_modrm(3,rs,rt);
1398 void emit_cmovs_reg(int rs,int rt)
1400 assem_debug("cmovs %%%s,%%%s\n",regname[rs],regname[rt]);
1403 output_modrm(3,rs,rt);
1405 void emit_cmovnc_reg(int rs,int rt)
1407 assem_debug("cmovae %%%s,%%%s\n",regname[rs],regname[rt]);
1410 output_modrm(3,rs,rt);
1412 void emit_cmova_reg(int rs,int rt)
1414 assem_debug("cmova %%%s,%%%s\n",regname[rs],regname[rt]);
1417 output_modrm(3,rs,rt);
1419 void emit_cmovp_reg(int rs,int rt)
1421 assem_debug("cmovp %%%s,%%%s\n",regname[rs],regname[rt]);
1424 output_modrm(3,rs,rt);
1426 void emit_cmovnp_reg(int rs,int rt)
1428 assem_debug("cmovnp %%%s,%%%s\n",regname[rs],regname[rt]);
1431 output_modrm(3,rs,rt);
1433 void emit_setl(int rt)
1435 assem_debug("setl %%%s\n",regname[rt]);
1438 output_modrm(3,rt,2);
1440 void emit_movzbl_reg(int rs, int rt)
1442 assem_debug("movzbl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1445 output_modrm(3,rs,rt);
1448 void emit_slti32(int rs,int imm,int rt)
1450 if(rs!=rt) emit_zeroreg(rt);
1451 emit_cmpimm(rs,imm);
1454 if(rs==rt) emit_movzbl_reg(rt,rt);
1458 if(rs==rt) emit_movimm(0,rt);
1459 emit_cmovl(&const_one,rt);
1462 void emit_sltiu32(int rs,int imm,int rt)
1464 if(rs!=rt) emit_zeroreg(rt);
1465 emit_cmpimm(rs,imm);
1466 if(rs==rt) emit_movimm(0,rt);
1469 void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1472 emit_slti32(rsl,imm,rt);
1476 emit_cmovne(&const_zero,rt);
1477 emit_cmovs(&const_one,rt);
1481 emit_cmpimm(rsh,-1);
1482 emit_cmovne(&const_zero,rt);
1483 emit_cmovl(&const_one,rt);
1486 void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1489 emit_sltiu32(rsl,imm,rt);
1493 emit_cmovne(&const_zero,rt);
1497 emit_cmpimm(rsh,-1);
1498 emit_cmovne(&const_one,rt);
1502 void emit_cmp(int rs,int rt)
1504 assem_debug("cmp %%%s,%%%s\n",regname[rt],regname[rs]);
1506 output_modrm(3,rs,rt);
1508 void emit_set_gz32(int rs, int rt)
1510 //assem_debug("set_gz32\n");
1513 emit_cmovl(&const_zero,rt);
1515 void emit_set_nz32(int rs, int rt)
1517 //assem_debug("set_nz32\n");
1522 void emit_set_gz64_32(int rsh, int rsl, int rt)
1524 //assem_debug("set_gz64\n");
1525 emit_set_gz32(rsl,rt);
1527 emit_cmovne(&const_one,rt);
1528 emit_cmovs(&const_zero,rt);
1530 void emit_set_nz64_32(int rsh, int rsl, int rt)
1532 //assem_debug("set_nz64\n");
1533 emit_or_and_set_flags(rsh,rsl,rt);
1534 emit_cmovne(&const_one,rt);
1536 void emit_set_if_less32(int rs1, int rs2, int rt)
1538 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1539 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1541 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1542 emit_cmovl(&const_one,rt);
1544 void emit_set_if_carry32(int rs1, int rs2, int rt)
1546 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1547 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1549 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1552 void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1554 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1561 emit_cmovl(&const_one,rt);
1563 void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1565 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1575 void emit_call(int a)
1577 assem_debug("call %x (%x+%x)\n",a,(int)out+5,a-(int)out-5);
1579 output_w32(a-(int)out-4);
1581 void emit_jmp(int a)
1583 assem_debug("jmp %x (%x+%x)\n",a,(int)out+5,a-(int)out-5);
1585 output_w32(a-(int)out-4);
1587 void emit_jne(int a)
1589 assem_debug("jne %x\n",a);
1592 output_w32(a-(int)out-4);
1594 void emit_jeq(int a)
1596 assem_debug("jeq %x\n",a);
1599 output_w32(a-(int)out-4);
1603 assem_debug("js %x\n",a);
1606 output_w32(a-(int)out-4);
1608 void emit_jns(int a)
1610 assem_debug("jns %x\n",a);
1613 output_w32(a-(int)out-4);
1617 assem_debug("jl %x\n",a);
1620 output_w32(a-(int)out-4);
1622 void emit_jge(int a)
1624 assem_debug("jge %x\n",a);
1627 output_w32(a-(int)out-4);
1629 void emit_jno(int a)
1631 assem_debug("jno %x\n",a);
1634 output_w32(a-(int)out-4);
1638 assem_debug("jc %x\n",a);
1641 output_w32(a-(int)out-4);
1644 void emit_pushimm(int imm)
1646 assem_debug("push $%x\n",imm);
1650 void emit_pushmem(int addr)
1652 assem_debug("push *%x\n",addr);
1654 output_modrm(0,5,6);
1659 assem_debug("pusha\n");
1664 assem_debug("popa\n");
1667 void emit_pushreg(u_int r)
1669 assem_debug("push %%%s\n",regname[r]);
1671 output_byte(0x50+r);
1673 void emit_popreg(u_int r)
1675 assem_debug("pop %%%s\n",regname[r]);
1677 output_byte(0x58+r);
1679 void emit_callreg(u_int r)
1681 assem_debug("call *%%%s\n",regname[r]);
1684 output_modrm(3,r,2);
1686 void emit_jmpreg(u_int r)
1688 assem_debug("jmp *%%%s\n",regname[r]);
1691 output_modrm(3,r,4);
1693 void emit_jmpmem_indexed(u_int addr,u_int r)
1695 assem_debug("jmp *%x(%%%s)\n",addr,regname[r]);
1698 output_modrm(2,r,4);
1702 void emit_readword(int addr, int rt)
1704 assem_debug("mov %x,%%%s\n",addr,regname[rt]);
1706 output_modrm(0,5,rt);
1709 void emit_readword_indexed(int addr, int rs, int rt)
1711 assem_debug("mov %x+%%%s,%%%s\n",addr,regname[rs],regname[rt]);
1713 if(addr<128&&addr>=-128) {
1714 output_modrm(1,rs,rt);
1715 if(rs==ESP) output_sib(0,4,4);
1720 output_modrm(2,rs,rt);
1721 if(rs==ESP) output_sib(0,4,4);
1725 void emit_readword_tlb(int addr, int map, int rt)
1727 if(map<0) emit_readword(addr+(int)rdram-0x80000000, rt);
1730 assem_debug("mov (%x,%%%s,4),%%%s\n",addr+(int)rdram-0x80000000,regname[map],regname[rt]);
1732 output_modrm(0,4,rt);
1733 output_sib(2,map,5);
1734 output_w32(addr+(int)rdram-0x80000000);
1737 void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1739 if(map<0) emit_readword_indexed(addr+(int)rdram-0x80000000, rs, rt);
1741 assem_debug("mov %x(%%%s,%%%s,4),%%%s\n",addr,regname[rs],regname[map],regname[rt]);
1744 if(addr==0&&rs!=EBP) {
1745 output_modrm(0,4,rt);
1746 output_sib(2,map,rs);
1748 else if(addr<128&&addr>=-128) {
1749 output_modrm(1,4,rt);
1750 output_sib(2,map,rs);
1755 output_modrm(2,4,rt);
1756 output_sib(2,map,rs);
1761 void emit_movmem_indexedx4(int addr, int rs, int rt)
1763 assem_debug("mov (%x,%%%s,4),%%%s\n",addr,regname[rs],regname[rt]);
1765 output_modrm(0,4,rt);
1769 void emit_readdword_tlb(int addr, int map, int rh, int rl)
1772 if(rh>=0) emit_readword(addr+(int)rdram-0x80000000, rh);
1773 emit_readword(addr+(int)rdram-0x7FFFFFFC, rl);
1776 if(rh>=0) emit_movmem_indexedx4(addr+(int)rdram-0x80000000, map, rh);
1777 emit_movmem_indexedx4(addr+(int)rdram-0x7FFFFFFC, map, rl);
1780 void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1783 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1784 emit_readword_indexed_tlb(addr+4, rs, map, rl);
1786 void emit_movsbl(int addr, int rt)
1788 assem_debug("movsbl %x,%%%s\n",addr,regname[rt]);
1791 output_modrm(0,5,rt);
1794 void emit_movsbl_indexed(int addr, int rs, int rt)
1796 assem_debug("movsbl %x+%%%s,%%%s\n",addr,regname[rs],regname[rt]);
1799 output_modrm(2,rs,rt);
1802 void emit_movsbl_tlb(int addr, int map, int rt)
1804 if(map<0) emit_movsbl(addr+(int)rdram-0x80000000, rt);
1807 assem_debug("movsbl (%x,%%%s,4),%%%s\n",addr+(int)rdram-0x80000000,regname[map],regname[rt]);
1810 output_modrm(0,4,rt);
1811 output_sib(2,map,5);
1812 output_w32(addr+(int)rdram-0x80000000);
1815 void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1817 if(map<0) emit_movsbl_indexed(addr+(int)rdram-0x80000000, rs, rt);
1819 assem_debug("movsbl %x(%%%s,%%%s,4),%%%s\n",addr,regname[rs],regname[map],regname[rt]);
1823 if(addr==0&&rs!=EBP) {
1824 output_modrm(0,4,rt);
1825 output_sib(2,map,rs);
1827 else if(addr<128&&addr>=-128) {
1828 output_modrm(1,4,rt);
1829 output_sib(2,map,rs);
1834 output_modrm(2,4,rt);
1835 output_sib(2,map,rs);
1840 void emit_movswl(int addr, int rt)
1842 assem_debug("movswl %x,%%%s\n",addr,regname[rt]);
1845 output_modrm(0,5,rt);
1848 void emit_movswl_indexed(int addr, int rs, int rt)
1850 assem_debug("movswl %x+%%%s,%%%s\n",addr,regname[rs],regname[rt]);
1853 output_modrm(2,rs,rt);
1856 void emit_movswl_tlb(int addr, int map, int rt)
1858 if(map<0) emit_movswl(addr+(int)rdram-0x80000000, rt);
1861 assem_debug("movswl (%x,%%%s,4),%%%s\n",addr+(int)rdram-0x80000000,regname[map],regname[rt]);
1864 output_modrm(0,4,rt);
1865 output_sib(2,map,5);
1866 output_w32(addr+(int)rdram-0x80000000);
1869 void emit_movzbl(int addr, int rt)
1871 assem_debug("movzbl %x,%%%s\n",addr,regname[rt]);
1874 output_modrm(0,5,rt);
1877 void emit_movzbl_indexed(int addr, int rs, int rt)
1879 assem_debug("movzbl %x+%%%s,%%%s\n",addr,regname[rs],regname[rt]);
1882 output_modrm(2,rs,rt);
1885 void emit_movzbl_tlb(int addr, int map, int rt)
1887 if(map<0) emit_movzbl(addr+(int)rdram-0x80000000, rt);
1890 assem_debug("movzbl (%x,%%%s,4),%%%s\n",addr+(int)rdram-0x80000000,regname[map],regname[rt]);
1893 output_modrm(0,4,rt);
1894 output_sib(2,map,5);
1895 output_w32(addr+(int)rdram-0x80000000);
1898 void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1900 if(map<0) emit_movzbl_indexed(addr+(int)rdram-0x80000000, rs, rt);
1902 assem_debug("movzbl %x(%%%s,%%%s,4),%%%s\n",addr,regname[rs],regname[map],regname[rt]);
1906 if(addr==0&&rs!=EBP) {
1907 output_modrm(0,4,rt);
1908 output_sib(2,map,rs);
1910 else if(addr<128&&addr>=-128) {
1911 output_modrm(1,4,rt);
1912 output_sib(2,map,rs);
1917 output_modrm(2,4,rt);
1918 output_sib(2,map,rs);
1923 void emit_movzwl(int addr, int rt)
1925 assem_debug("movzwl %x,%%%s\n",addr,regname[rt]);
1928 output_modrm(0,5,rt);
1931 void emit_movzwl_indexed(int addr, int rs, int rt)
1933 assem_debug("movzwl %x+%%%s,%%%s\n",addr,regname[rs],regname[rt]);
1936 output_modrm(2,rs,rt);
1939 void emit_movzwl_tlb(int addr, int map, int rt)
1941 if(map<0) emit_movzwl(addr+(int)rdram-0x80000000, rt);
1944 assem_debug("movzwl (%x,%%%s,4),%%%s\n",addr+(int)rdram-0x80000000,regname[map],regname[rt]);
1947 output_modrm(0,4,rt);
1948 output_sib(2,map,5);
1949 output_w32(addr+(int)rdram-0x80000000);
1952 void emit_movzwl_reg(int rs, int rt)
1954 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1957 output_modrm(3,rs,rt);
1960 void emit_xchg(int rs, int rt)
1962 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1964 output_byte(0x90+rt);
1969 output_modrm(3,rs,rt);
1972 void emit_writeword(int rt, int addr)
1974 assem_debug("movl %%%s,%x\n",regname[rt],addr);
1976 output_modrm(0,5,rt);
1979 void emit_writeword_indexed(int rt, int addr, int rs)
1981 assem_debug("mov %%%s,%x+%%%s\n",regname[rt],addr,regname[rs]);
1983 if(addr<128&&addr>=-128) {
1984 output_modrm(1,rs,rt);
1985 if(rs==ESP) output_sib(0,4,4);
1990 output_modrm(2,rs,rt);
1991 if(rs==ESP) output_sib(0,4,4);
1995 void emit_writeword_tlb(int rt, int addr, int map)
1998 emit_writeword(rt, addr+(int)rdram-0x80000000);
2000 emit_writeword_indexed(rt, addr+(int)rdram-0x80000000, map);
2003 void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2005 if(map<0) emit_writeword_indexed(rt, addr+(int)rdram-0x80000000, rs);
2007 assem_debug("mov %%%s,%x(%%%s,%%%s,1)\n",regname[rt],addr,regname[rs],regname[map]);
2010 if(addr==0&&rs!=EBP) {
2011 output_modrm(0,4,rt);
2012 output_sib(0,map,rs);
2014 else if(addr<128&&addr>=-128) {
2015 output_modrm(1,4,rt);
2016 output_sib(0,map,rs);
2021 output_modrm(2,4,rt);
2022 output_sib(0,map,rs);
2027 void emit_writedword_tlb(int rh, int rl, int addr, int map)
2031 emit_writeword(rh, addr+(int)rdram-0x80000000);
2032 emit_writeword(rl, addr+(int)rdram-0x7FFFFFFC);
2035 emit_writeword_indexed(rh, addr+(int)rdram-0x80000000, map);
2036 emit_writeword_indexed(rl, addr+(int)rdram-0x7FFFFFFC, map);
2039 void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
2042 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
2043 emit_writeword_indexed_tlb(rl, addr+4, rs, map, temp);
2045 void emit_writehword(int rt, int addr)
2047 assem_debug("movw %%%s,%x\n",regname[rt]+1,addr);
2050 output_modrm(0,5,rt);
2053 void emit_writehword_indexed(int rt, int addr, int rs)
2055 assem_debug("movw %%%s,%x+%%%s\n",regname[rt]+1,addr,regname[rs]);
2058 if(addr<128&&addr>=-128) {
2059 output_modrm(1,rs,rt);
2064 output_modrm(2,rs,rt);
2068 void emit_writehword_tlb(int rt, int addr, int map)
2071 emit_writehword(rt, addr+(int)rdram-0x80000000);
2073 emit_writehword_indexed(rt, addr+(int)rdram-0x80000000, map);
2076 void emit_writebyte(int rt, int addr)
2079 assem_debug("movb %%%cl,%x\n",regname[rt][1],addr);
2081 output_modrm(0,5,rt);
2087 emit_writebyte(EAX,addr);
2091 void emit_writebyte_indexed(int rt, int addr, int rs)
2094 assem_debug("movb %%%cl,%x+%%%s\n",regname[rt][1],addr,regname[rs]);
2096 if(addr<128&&addr>=-128) {
2097 output_modrm(1,rs,rt);
2102 output_modrm(2,rs,rt);
2109 emit_writebyte_indexed(EAX,addr,rs==EAX?rt:rs);
2113 void emit_writebyte_tlb(int rt, int addr, int map)
2116 emit_writebyte(rt, addr+(int)rdram-0x80000000);
2118 emit_writebyte_indexed(rt, addr+(int)rdram-0x80000000, map);
2121 void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2123 if(map<0) emit_writebyte_indexed(rt, addr+(int)rdram-0x80000000, rs);
2126 assem_debug("movb %%%cl,%x(%%%s,%%%s,1)\n",regname[rt][1],addr,regname[rs],regname[map]);
2129 if(addr==0&&rs!=EBP) {
2130 output_modrm(0,4,rt);
2131 output_sib(0,map,rs);
2133 else if(addr<128&&addr>=-128) {
2134 output_modrm(1,4,rt);
2135 output_sib(0,map,rs);
2140 output_modrm(2,4,rt);
2141 output_sib(0,map,rs);
2148 emit_writebyte_indexed_tlb(EAX,addr,rs==EAX?rt:rs,map==EAX?rt:map,temp);
2152 void emit_writeword_imm(int imm, int addr)
2154 assem_debug("movl $%x,%x\n",imm,addr);
2156 output_modrm(0,5,0);
2160 void emit_writeword_imm_esp(int imm, int addr)
2162 assem_debug("mov $%x,%x(%%esp)\n",imm,addr);
2163 assert(addr>=-128&&addr<128);
2165 output_modrm(1,4,0);
2170 void emit_writebyte_imm(int imm, int addr)
2172 assem_debug("movb $%x,%x\n",imm,addr);
2173 assert(imm>=-128&&imm<128);
2175 output_modrm(0,5,0);
2179 void emit_writebyte_imm_esp(int imm, int addr)
2181 assem_debug("movb $%x,%x(%%esp)\n",imm,addr);
2182 assert(addr>=-128&&addr<128);
2184 output_modrm(1,4,0);
2190 void emit_mul(int rs)
2192 assem_debug("mul %%%s\n",regname[rs]);
2194 output_modrm(3,rs,4);
2196 void emit_imul(int rs)
2198 assem_debug("imul %%%s\n",regname[rs]);
2200 output_modrm(3,rs,5);
2202 void emit_div(int rs)
2204 assem_debug("div %%%s\n",regname[rs]);
2206 output_modrm(3,rs,6);
2208 void emit_idiv(int rs)
2210 assem_debug("idiv %%%s\n",regname[rs]);
2212 output_modrm(3,rs,7);
2216 assem_debug("cdq\n");
2220 // Load 2 immediates optimizing for small code size
2221 void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2223 emit_movimm(imm1,rt1);
2224 if(imm2-imm1<128&&imm2-imm1>=-128) emit_addimm(rt1,imm2-imm1,rt2);
2225 else emit_movimm(imm2,rt2);
2228 // special case for checking pending_exception
2229 void emit_cmpmem_imm_byte(int addr,int imm)
2231 assert(imm<128&&imm>=-127);
2232 assem_debug("cmpb $%d,%x\n",imm,addr);
2234 output_modrm(0,5,7);
2239 // special case for checking invalid_code
2240 void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2242 assert(imm<128&&imm>=-127);
2244 emit_shrimm(r,12,r);
2245 assem_debug("cmp $%d,%x+%%%s\n",imm,addr,regname[r]);
2247 output_modrm(2,r,7);
2252 // special case for checking hash_table
2253 void emit_cmpmem_indexed(int addr,int rs,int rt)
2255 assert(rs>=0&&rs<8);
2256 assert(rt>=0&&rt<8);
2257 assem_debug("cmp %x+%%%s,%%%s\n",addr,regname[rs],regname[rt]);
2259 output_modrm(2,rs,rt);
2263 // special case for checking memory_map in verify_mapping
2264 void emit_cmpmem(int addr,int rt)
2266 assert(rt>=0&&rt<8);
2267 assem_debug("cmp %x,%%%s\n",addr,regname[rt]);
2269 output_modrm(0,5,rt);
2273 // Used to preload hash table entries
2274 void emit_prefetch(void *addr)
2276 assem_debug("prefetch %x\n",(int)addr);
2279 output_modrm(0,5,1);
2280 output_w32((int)addr);
2283 /*void emit_submem(int r,int addr)
2286 assem_debug("sub %x,%%%s\n",addr,regname[r]);
2288 output_modrm(0,5,r);
2289 output_w32((int)addr);
2291 void emit_subfrommem(int addr,int r)
2294 assem_debug("sub %%%s,%x\n",regname[r],addr);
2296 output_modrm(0,5,r);
2297 output_w32((int)addr);
2300 void emit_flds(int r)
2302 assem_debug("flds (%%%s)\n",regname[r]);
2304 if(r!=EBP) output_modrm(0,r,0);
2305 else {output_modrm(1,EBP,0);output_byte(0);}
2307 void emit_fldl(int r)
2309 assem_debug("fldl (%%%s)\n",regname[r]);
2311 if(r!=EBP) output_modrm(0,r,0);
2312 else {output_modrm(1,EBP,0);output_byte(0);}
2314 void emit_fucomip(u_int r)
2316 assem_debug("fucomip %d\n",r);
2319 output_byte(0xe8+r);
2323 assem_debug("fchs\n");
2329 assem_debug("fabs\n");
2335 assem_debug("fsqrt\n");
2339 void emit_fadds(int r)
2341 assem_debug("fadds (%%%s)\n",regname[r]);
2343 if(r!=EBP) output_modrm(0,r,0);
2344 else {output_modrm(1,EBP,0);output_byte(0);}
2346 void emit_faddl(int r)
2348 assem_debug("faddl (%%%s)\n",regname[r]);
2350 if(r!=EBP) output_modrm(0,r,0);
2351 else {output_modrm(1,EBP,0);output_byte(0);}
2353 void emit_fadd(int r)
2355 assem_debug("fadd st%d\n",r);
2357 output_byte(0xc0+r);
2359 void emit_fsubs(int r)
2361 assem_debug("fsubs (%%%s)\n",regname[r]);
2363 if(r!=EBP) output_modrm(0,r,4);
2364 else {output_modrm(1,EBP,4);output_byte(0);}
2366 void emit_fsubl(int r)
2368 assem_debug("fsubl (%%%s)\n",regname[r]);
2370 if(r!=EBP) output_modrm(0,r,4);
2371 else {output_modrm(1,EBP,4);output_byte(0);}
2373 void emit_fsub(int r)
2375 assem_debug("fsub st%d\n",r);
2377 output_byte(0xe0+r);
2379 void emit_fmuls(int r)
2381 assem_debug("fmuls (%%%s)\n",regname[r]);
2383 if(r!=EBP) output_modrm(0,r,1);
2384 else {output_modrm(1,EBP,1);output_byte(0);}
2386 void emit_fmull(int r)
2388 assem_debug("fmull (%%%s)\n",regname[r]);
2390 if(r!=EBP) output_modrm(0,r,1);
2391 else {output_modrm(1,EBP,1);output_byte(0);}
2393 void emit_fmul(int r)
2395 assem_debug("fmul st%d\n",r);
2397 output_byte(0xc8+r);
2399 void emit_fdivs(int r)
2401 assem_debug("fdivs (%%%s)\n",regname[r]);
2403 if(r!=EBP) output_modrm(0,r,6);
2404 else {output_modrm(1,EBP,6);output_byte(0);}
2406 void emit_fdivl(int r)
2408 assem_debug("fdivl (%%%s)\n",regname[r]);
2410 if(r!=EBP) output_modrm(0,r,6);
2411 else {output_modrm(1,EBP,6);output_byte(0);}
2413 void emit_fdiv(int r)
2415 assem_debug("fdiv st%d\n",r);
2417 output_byte(0xf0+r);
2422 assem_debug("fpop\n");
2426 void emit_fildl(int r)
2428 assem_debug("fildl (%%%s)\n",regname[r]);
2430 if(r!=EBP) output_modrm(0,r,0);
2431 else {output_modrm(1,EBP,0);output_byte(0);}
2433 void emit_fildll(int r)
2435 assem_debug("fildll (%%%s)\n",regname[r]);
2437 if(r!=EBP) output_modrm(0,r,5);
2438 else {output_modrm(1,EBP,5);output_byte(0);}
2440 void emit_fistpl(int r)
2442 assem_debug("fistpl (%%%s)\n",regname[r]);
2444 if(r!=EBP) output_modrm(0,r,3);
2445 else {output_modrm(1,EBP,3);output_byte(0);}
2447 void emit_fistpll(int r)
2449 assem_debug("fistpll (%%%s)\n",regname[r]);
2451 if(r!=EBP) output_modrm(0,r,7);
2452 else {output_modrm(1,EBP,7);output_byte(0);}
2454 void emit_fstps(int r)
2456 assem_debug("fstps (%%%s)\n",regname[r]);
2458 if(r!=EBP) output_modrm(0,r,3);
2459 else {output_modrm(1,EBP,3);output_byte(0);}
2461 void emit_fstpl(int r)
2463 assem_debug("fstpl (%%%s)\n",regname[r]);
2465 if(r!=EBP) output_modrm(0,r,3);
2466 else {output_modrm(1,EBP,3);output_byte(0);}
2468 void emit_fnstcw_stack()
2470 assem_debug("fnstcw (%%esp)\n");
2472 output_modrm(0,4,7);
2475 void emit_fldcw_stack()
2477 assem_debug("fldcw (%%esp)\n");
2479 output_modrm(0,4,5);
2482 void emit_fldcw_indexed(int addr,int r)
2484 assem_debug("fldcw %x(%%%s)\n",addr,regname[r]);
2486 output_modrm(0,4,5);
2490 void emit_fldcw(int addr)
2492 assem_debug("fldcw %x\n",addr);
2494 output_modrm(0,5,5);
2497 void emit_movss_load(u_int addr,u_int ssereg)
2499 assem_debug("movss (%%%s),xmm%d\n",regname[addr],ssereg);
2504 if(addr!=EBP) output_modrm(0,addr,ssereg);
2505 else {output_modrm(1,EBP,ssereg);output_byte(0);}
2507 void emit_movsd_load(u_int addr,u_int ssereg)
2509 assem_debug("movsd (%%%s),xmm%d\n",regname[addr],ssereg);
2514 if(addr!=EBP) output_modrm(0,addr,ssereg);
2515 else {output_modrm(1,EBP,ssereg);output_byte(0);}
2517 void emit_movd_store(u_int ssereg,u_int addr)
2519 assem_debug("movd xmm%d,(%%%s)\n",ssereg,regname[addr]);
2524 if(addr!=EBP) output_modrm(0,addr,ssereg);
2525 else {output_modrm(1,EBP,ssereg);output_byte(0);}
2527 void emit_cvttps2dq(u_int ssereg1,u_int ssereg2)
2529 assem_debug("cvttps2dq xmm%d,xmm%d\n",ssereg1,ssereg2);
2535 output_modrm(3,ssereg1,ssereg2);
2537 void emit_cvttpd2dq(u_int ssereg1,u_int ssereg2)
2539 assem_debug("cvttpd2dq xmm%d,xmm%d\n",ssereg1,ssereg2);
2545 output_modrm(3,ssereg1,ssereg2);
2548 /* Stubs/epilogue */
2550 emit_extjump2(int addr, int target, int linker)
2552 u_char *ptr=(u_char *)addr;
2555 assert(ptr[1]>=0x80&&ptr[1]<=0x8f);
2560 assert(*ptr==0xe8||*ptr==0xe9);
2563 emit_movimm(target,EAX);
2564 emit_movimm(addr,EBX);
2565 //assert(addr>=0x7000000&&addr<0x7FFFFFF);
2566 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2568 #ifdef DEBUG_CYCLE_COUNT
2569 emit_readword((int)&last_count,ECX);
2570 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2571 emit_readword((int)&next_interupt,ECX);
2572 emit_writeword(HOST_CCREG,(int)&Count);
2573 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2574 emit_writeword(ECX,(int)&last_count);
2580 emit_extjump(int addr, int target)
2582 emit_extjump2(addr, target, (int)dyna_linker);
2584 emit_extjump_ds(int addr, int target)
2586 emit_extjump2(addr, target, (int)dyna_linker_ds);
2591 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2592 set_jump_target(stubs[n][1],(int)out);
2593 int type=stubs[n][0];
2596 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2597 signed char *i_regmap=i_regs->regmap;
2598 int addr=get_reg(i_regmap,AGEN1+(i&1));
2601 if(itype[i]==C1LS||itype[i]==LOADLR) {
2602 rth=get_reg(i_regmap,FTEMP|64);
2603 rt=get_reg(i_regmap,FTEMP);
2605 rth=get_reg(i_regmap,rt1[i]|64);
2606 rt=get_reg(i_regmap,rt1[i]);
2613 if(type==LOADB_STUB||type==LOADBU_STUB)
2614 ftable=(int)readmemb;
2615 if(type==LOADH_STUB||type==LOADHU_STUB)
2616 ftable=(int)readmemh;
2617 if(type==LOADW_STUB)
2618 ftable=(int)readmem;
2619 if(type==LOADD_STUB)
2620 ftable=(int)readmemd;
2621 emit_writeword(rs,(int)&address);
2622 emit_shrimm(rs,16,addr);
2623 emit_movmem_indexedx4(ftable,addr,addr);
2625 ds=i_regs!=®s[i];
2626 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2627 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)),i);
2628 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2631 int cc=get_reg(i_regmap,CCREG);
2633 if(addr==HOST_CCREG)
2636 assert(cc!=HOST_CCREG);
2637 assert(temp!=HOST_CCREG);
2638 emit_loadreg(CCREG,cc);
2643 emit_loadreg(CCREG,cc);
2651 emit_readword((int)&last_count,temp);
2652 emit_addimm(cc,CLOCK_DIVIDER*(stubs[n][6]+1),cc);
2653 emit_writeword_imm_esp(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,32);
2654 emit_add(cc,temp,cc);
2655 emit_writeword(cc,(int)&Count);
2657 // We really shouldn't need to update the count here,
2658 // but not doing so causes random crashes...
2659 emit_readword((int)&Count,HOST_CCREG);
2660 emit_readword((int)&next_interupt,ECX);
2661 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*(stubs[n][6]+1),HOST_CCREG);
2662 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2663 emit_writeword(ECX,(int)&last_count);
2664 emit_storereg(CCREG,HOST_CCREG);
2666 if((cc=get_reg(i_regmap,CCREG))>=0) {
2667 emit_loadreg(CCREG,cc);
2669 if(type==LOADB_STUB)
2670 emit_movsbl((int)&readmem_dword,rt);
2671 if(type==LOADBU_STUB)
2672 emit_movzbl((int)&readmem_dword,rt);
2673 if(type==LOADH_STUB)
2674 emit_movswl((int)&readmem_dword,rt);
2675 if(type==LOADHU_STUB)
2676 emit_movzwl((int)&readmem_dword,rt);
2677 if(type==LOADW_STUB)
2678 emit_readword((int)&readmem_dword,rt);
2679 if(type==LOADD_STUB) {
2680 emit_readword((int)&readmem_dword,rt);
2681 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2683 emit_jmp(stubs[n][2]); // return address
2686 inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2688 assem_debug("inline_readstub\n");
2689 int rs=get_reg(regmap,target);
2690 int rth=get_reg(regmap,target|64);
2691 int rt=get_reg(regmap,target);
2695 if(type==LOADB_STUB||type==LOADBU_STUB)
2696 ftable=(int)readmemb;
2697 if(type==LOADH_STUB||type==LOADHU_STUB)
2698 ftable=(int)readmemh;
2699 if(type==LOADW_STUB)
2700 ftable=(int)readmem;
2701 if(type==LOADD_STUB)
2702 ftable=(int)readmemd;
2703 #ifdef HOST_IMM_ADDR32
2704 emit_writeword_imm(addr,(int)&address);
2706 emit_writeword(rs,(int)&address);
2709 int cc=get_reg(regmap,CCREG);
2715 assert(cc!=HOST_CCREG);
2716 assert(temp!=HOST_CCREG);
2717 emit_loadreg(CCREG,cc);
2722 emit_loadreg(CCREG,cc);
2730 emit_readword((int)&last_count,temp);
2731 emit_addimm(cc,CLOCK_DIVIDER*(adj+1),cc);
2732 emit_add(cc,temp,cc);
2733 emit_writeword(cc,(int)&Count);
2734 if((signed int)addr>=(signed int)0xC0000000) {
2735 // Pagefault address
2736 int ds=regmap!=regs[i].regmap;
2737 emit_writeword_imm_esp(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,32);
2739 emit_call(((u_int *)ftable)[addr>>16]);
2740 // We really shouldn't need to update the count here,
2741 // but not doing so causes random crashes...
2742 emit_readword((int)&Count,HOST_CCREG);
2743 emit_readword((int)&next_interupt,ECX);
2744 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*(adj+1),HOST_CCREG);
2745 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2746 emit_writeword(ECX,(int)&last_count);
2747 emit_storereg(CCREG,HOST_CCREG);
2749 if((cc=get_reg(regmap,CCREG))>=0) {
2750 emit_loadreg(CCREG,cc);
2752 if(type==LOADB_STUB)
2753 emit_movsbl((int)&readmem_dword,rt);
2754 if(type==LOADBU_STUB)
2755 emit_movzbl((int)&readmem_dword,rt);
2756 if(type==LOADH_STUB)
2757 emit_movswl((int)&readmem_dword,rt);
2758 if(type==LOADHU_STUB)
2759 emit_movzwl((int)&readmem_dword,rt);
2760 if(type==LOADW_STUB)
2761 emit_readword((int)&readmem_dword,rt);
2762 if(type==LOADD_STUB) {
2763 emit_readword((int)&readmem_dword,rt);
2764 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2770 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2771 set_jump_target(stubs[n][1],(int)out);
2772 int type=stubs[n][0];
2775 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2776 signed char *i_regmap=i_regs->regmap;
2777 int addr=get_reg(i_regmap,AGEN1+(i&1));
2780 if(itype[i]==C1LS) {
2781 rth=get_reg(i_regmap,FTEMP|64);
2782 rt=get_reg(i_regmap,r=FTEMP);
2784 rth=get_reg(i_regmap,rs2[i]|64);
2785 rt=get_reg(i_regmap,r=rs2[i]);
2789 if(addr<0) addr=get_reg(i_regmap,-1);
2792 if(type==STOREB_STUB)
2793 ftable=(int)writememb;
2794 if(type==STOREH_STUB)
2795 ftable=(int)writememh;
2796 if(type==STOREW_STUB)
2797 ftable=(int)writemem;
2798 if(type==STORED_STUB)
2799 ftable=(int)writememd;
2800 emit_writeword(rs,(int)&address);
2801 emit_shrimm(rs,16,addr);
2802 emit_movmem_indexedx4(ftable,addr,addr);
2803 if(type==STOREB_STUB)
2804 emit_writebyte(rt,(int)&byte);
2805 if(type==STOREH_STUB)
2806 emit_writehword(rt,(int)&hword);
2807 if(type==STOREW_STUB)
2808 emit_writeword(rt,(int)&word);
2809 if(type==STORED_STUB) {
2810 emit_writeword(rt,(int)&dword);
2811 emit_writeword(r?rth:rt,(int)&dword+4);
2814 ds=i_regs!=®s[i];
2815 int real_rs=get_reg(i_regmap,rs1[i]);
2816 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)),i);
2817 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2820 int cc=get_reg(i_regmap,CCREG);
2822 if(addr==HOST_CCREG)
2825 assert(cc!=HOST_CCREG);
2826 assert(temp!=HOST_CCREG);
2827 emit_loadreg(CCREG,cc);
2832 emit_loadreg(CCREG,cc);
2840 emit_readword((int)&last_count,temp);
2841 emit_addimm(cc,CLOCK_DIVIDER*(stubs[n][6]+1),cc);
2842 emit_writeword_imm_esp(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,32);
2843 emit_add(cc,temp,cc);
2844 emit_writeword(cc,(int)&Count);
2846 emit_readword((int)&Count,HOST_CCREG);
2847 emit_readword((int)&next_interupt,ECX);
2848 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*(stubs[n][6]+1),HOST_CCREG);
2849 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2850 emit_writeword(ECX,(int)&last_count);
2851 emit_storereg(CCREG,HOST_CCREG);
2853 if((cc=get_reg(i_regmap,CCREG))>=0) {
2854 emit_loadreg(CCREG,cc);
2856 emit_jmp(stubs[n][2]); // return address
2859 inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2861 assem_debug("inline_writestub\n");
2862 int rs=get_reg(regmap,-1);
2863 int rth=get_reg(regmap,target|64);
2864 int rt=get_reg(regmap,target);
2868 if(type==STOREB_STUB)
2869 ftable=(int)writememb;
2870 if(type==STOREH_STUB)
2871 ftable=(int)writememh;
2872 if(type==STOREW_STUB)
2873 ftable=(int)writemem;
2874 if(type==STORED_STUB)
2875 ftable=(int)writememd;
2876 emit_writeword(rs,(int)&address);
2877 if(type==STOREB_STUB)
2878 emit_writebyte(rt,(int)&byte);
2879 if(type==STOREH_STUB)
2880 emit_writehword(rt,(int)&hword);
2881 if(type==STOREW_STUB)
2882 emit_writeword(rt,(int)&word);
2883 if(type==STORED_STUB) {
2884 emit_writeword(rt,(int)&dword);
2885 emit_writeword(target?rth:rt,(int)&dword+4);
2888 int cc=get_reg(regmap,CCREG);
2894 assert(cc!=HOST_CCREG);
2895 assert(temp!=HOST_CCREG);
2896 emit_loadreg(CCREG,cc);
2901 emit_loadreg(CCREG,cc);
2909 emit_readword((int)&last_count,temp);
2910 emit_addimm(cc,CLOCK_DIVIDER*(adj+1),cc);
2911 emit_add(cc,temp,cc);
2912 emit_writeword(cc,(int)&Count);
2913 if((signed int)addr>=(signed int)0xC0000000) {
2914 // Pagefault address
2915 int ds=regmap!=regs[i].regmap;
2916 emit_writeword_imm_esp(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,32);
2918 emit_call(((u_int *)ftable)[addr>>16]);
2919 emit_readword((int)&Count,HOST_CCREG);
2920 emit_readword((int)&next_interupt,ECX);
2921 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*(adj+1),HOST_CCREG);
2922 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2923 emit_writeword(ECX,(int)&last_count);
2924 emit_storereg(CCREG,HOST_CCREG);
2926 if((cc=get_reg(regmap,CCREG))>=0) {
2927 emit_loadreg(CCREG,cc);
2931 do_unalignedwritestub(int n)
2933 set_jump_target(stubs[n][1],(int)out);
2935 emit_jmp(stubs[n][2]); // return address
2938 void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
2940 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
2945 set_jump_target(stubs[n][1],(int)out);
2946 if(stubs[n][4]!=EDI) emit_xchg(stubs[n][4],EDI);
2948 emit_call((int)&invalidate_block);
2950 if(stubs[n][4]!=EDI) emit_xchg(stubs[n][4],EDI);
2951 emit_jmp(stubs[n][2]); // return address
2954 int do_dirty_stub(int i)
2956 assem_debug("do_dirty_stub %x\n",start+i*4);
2957 emit_pushimm(start+i*4);
2958 emit_movimm((int)start<(int)0xC0000000?(int)source:(int)start,EAX);
2959 emit_movimm((int)copy,EBX);
2960 emit_movimm(slen*4,ECX);
2961 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
2962 emit_addimm(ESP,4,ESP);
2965 if(entry==(int)out) entry=instr_addr[i];
2966 emit_jmp(instr_addr[i]);
2970 void do_dirty_stub_ds()
2972 emit_pushimm(start+1);
2973 emit_movimm((int)start<(int)0xC0000000?(int)source:(int)start,EAX);
2974 emit_movimm((int)copy,EBX);
2975 emit_movimm(slen*4,ECX);
2976 emit_call((int)&verify_code_ds);
2977 emit_addimm(ESP,4,ESP);
2982 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
2983 set_jump_target(stubs[n][1],(int)out);
2986 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2989 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2990 //if(i_regs!=®s[i]) printf("oops: regs[i]=%x i_regs=%x",(int)®s[i],(int)i_regs);
2992 //else {printf("fp exception in delay slot\n");}
2993 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
2994 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2995 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2996 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
2997 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3002 int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3005 if((signed int)addr>=(signed int)0xC0000000) {
3006 emit_readword((int)(memory_map+(addr>>12)),map);
3009 return -1; // No mapping
3012 if(s!=map) emit_mov(s,map);
3013 emit_shrimm(map,12,map);
3014 // Schedule this while we wait on the load
3015 //if(x) emit_xorimm(addr,x,addr);
3016 if(shift>=0) emit_lea8(s,shift);
3017 if(~a) emit_andimm(s,a,ar);
3018 emit_movmem_indexedx4((int)memory_map,map,map);
3022 int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3024 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3032 int gen_tlb_addr_r(int ar, int map) {
3034 emit_leairrx4(0,ar,map,ar);
3038 int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3041 if(addr<0x80800000||addr>=0xC0000000) {
3042 emit_readword((int)(memory_map+(addr>>12)),map);
3045 return -1; // No mapping
3048 if(s!=map) emit_mov(s,map);
3049 //if(s!=ar) emit_mov(s,ar);
3050 emit_shrimm(map,12,map);
3051 // Schedule this while we wait on the load
3052 //if(x) emit_xorimm(s,x,addr);
3053 emit_movmem_indexedx4((int)memory_map,map,map);
3055 emit_shlimm(map,2,map);
3058 int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3060 if(!c||addr<0x80800000||addr>=0xC0000000) {
3066 int gen_tlb_addr_w(int ar, int map) {
3068 emit_leairrx1(0,ar,map,ar);
3072 // We don't need this for x86
3073 generate_map_const(u_int addr,int reg) {
3074 // void *mapaddr=memory_map+(addr>>12);
3079 void shift_assemble_x86(int i,struct regstat *i_regs)
3082 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3085 t=get_reg(i_regs->regmap,rt1[i]);
3086 s=get_reg(i_regs->regmap,rs1[i]);
3087 shift=get_reg(i_regs->regmap,rs2[i]);
3096 if(s!=t) emit_mov(s,t);
3100 char temp=get_reg(i_regs->regmap,-1);
3102 if(t==ECX&&s!=ECX) {
3103 if(shift!=ECX) emit_mov(shift,ECX);
3104 if(rt1[i]==rs2[i]) {shift=temp;}
3105 if(s!=shift) emit_mov(s,shift);
3109 if(rt1[i]==rs2[i]) {emit_mov(shift,temp);shift=temp;}
3110 if(s!=t) emit_mov(s,t);
3112 if(i_regs->regmap[ECX]<0)
3113 emit_mov(shift,ECX);
3115 emit_xchg(shift,ECX);
3118 if(opcode2[i]==4) // SLLV
3120 emit_shlcl(t==ECX?shift:t);
3122 if(opcode2[i]==6) // SRLV
3124 emit_shrcl(t==ECX?shift:t);
3126 if(opcode2[i]==7) // SRAV
3128 emit_sarcl(t==ECX?shift:t);
3130 if(shift!=ECX&&i_regs->regmap[ECX]>=0) emit_xchg(shift,ECX);
3133 } else { // DSLLV/DSRLV/DSRAV
3134 char sh,sl,th,tl,shift;
3135 th=get_reg(i_regs->regmap,rt1[i]|64);
3136 tl=get_reg(i_regs->regmap,rt1[i]);
3137 sh=get_reg(i_regs->regmap,rs1[i]|64);
3138 sl=get_reg(i_regs->regmap,rs1[i]);
3139 shift=get_reg(i_regs->regmap,rs2[i]);
3144 if(th>=0) emit_zeroreg(th);
3149 if(sl!=tl) emit_mov(sl,tl);
3150 if(th>=0&&sh!=th) emit_mov(sh,th);
3154 // FIXME: What if shift==tl ?
3156 int temp=get_reg(i_regs->regmap,-1);
3158 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3161 if(tl==ECX&&sl!=ECX) {
3162 if(shift!=ECX) emit_mov(shift,ECX);
3163 if(sl!=shift) emit_mov(sl,shift);
3164 if(th>=0 && sh!=th) emit_mov(sh,th);
3166 else if(th==ECX&&sh!=ECX) {
3167 if(shift!=ECX) emit_mov(shift,ECX);
3168 if(sh!=shift) emit_mov(sh,shift);
3169 if(sl!=tl) emit_mov(sl,tl);
3173 if(sl!=tl) emit_mov(sl,tl);
3174 if(th>=0 && sh!=th) emit_mov(sh,th);
3176 if(i_regs->regmap[ECX]<0)
3177 emit_mov(shift,ECX);
3179 emit_xchg(shift,ECX);
3182 if(opcode2[i]==0x14) // DSLLV
3184 if(th>=0) emit_shldcl(th==ECX?shift:th,tl==ECX?shift:tl);
3185 emit_shlcl(tl==ECX?shift:tl);
3186 emit_testimm(ECX,32);
3187 if(th>=0) emit_cmovne_reg(tl==ECX?shift:tl,th==ECX?shift:th);
3188 emit_cmovne(&const_zero,tl==ECX?shift:tl);
3190 if(opcode2[i]==0x16) // DSRLV
3193 emit_shrdcl(tl==ECX?shift:tl,th==ECX?shift:th);
3194 emit_shrcl(th==ECX?shift:th);
3195 emit_testimm(ECX,32);
3196 emit_cmovne_reg(th==ECX?shift:th,tl==ECX?shift:tl);
3197 if(real_th>=0) emit_cmovne(&const_zero,th==ECX?shift:th);
3199 if(opcode2[i]==0x17) // DSRAV
3202 emit_shrdcl(tl==ECX?shift:tl,th==ECX?shift:th);
3205 emit_mov(th==ECX?shift:th,temp==ECX?shift:temp);
3207 emit_sarcl(th==ECX?shift:th);
3208 if(real_th>=0) emit_sarimm(temp==ECX?shift:temp,31,temp==ECX?shift:temp);
3209 emit_testimm(ECX,32);
3210 emit_cmovne_reg(th==ECX?shift:th,tl==ECX?shift:tl);
3211 if(real_th>=0) emit_cmovne_reg(temp==ECX?shift:temp,th==ECX?shift:th);
3213 if(shift!=ECX&&(i_regs->regmap[ECX]>=0||temp==ECX)) emit_xchg(shift,ECX);
3219 #define shift_assemble shift_assemble_x86
3221 void loadlr_assemble_x86(int i,struct regstat *i_regs)
3223 int s,th,tl,temp,temp2,addr,map=-1;
3228 th=get_reg(i_regs->regmap,rt1[i]|64);
3229 tl=get_reg(i_regs->regmap,rt1[i]);
3230 s=get_reg(i_regs->regmap,rs1[i]);
3231 temp=get_reg(i_regs->regmap,-1);
3232 temp2=get_reg(i_regs->regmap,FTEMP);
3233 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3236 for(hr=0;hr<HOST_REGS;hr++) {
3237 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3240 if(offset||s<0||c) addr=temp2;
3243 c=(i_regs->wasconst>>s)&1;
3244 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80800000;
3245 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3252 emit_lea8(addr,temp);
3253 if (opcode[i]==0x22||opcode[i]==0x26) {
3254 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3256 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3258 emit_cmpimm(addr,0x800000);
3263 if (opcode[i]==0x22||opcode[i]==0x26) {
3264 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3266 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3273 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3274 a=0xFFFFFFFC; // LWL/LWR
3276 a=0xFFFFFFF8; // LDL/LDR
3278 map=get_reg(i_regs->regmap,TLREG);
3280 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3282 if (opcode[i]==0x22||opcode[i]==0x26) {
3283 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3285 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3288 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3290 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3292 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3293 emit_readword_indexed_tlb(0,temp2,map,temp2);
3294 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3297 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3298 emit_andimm(temp,24,temp);
3299 if (opcode[i]==0x26) emit_xorimm(temp,24,temp); // LWR
3303 if(temp3==temp2) temp3++;
3304 emit_pushreg(temp3);
3305 emit_movimm(-1,temp3);
3306 if (opcode[i]==0x26) {
3313 emit_mov(temp3,ECX);
3320 if(temp3==temp) temp3++;
3321 if(temp3==temp2) temp3++;
3322 if(temp3==temp) temp3++;
3323 emit_xchg(ECX,temp);
3324 emit_pushreg(temp3);
3325 emit_movimm(-1,temp3);
3326 if (opcode[i]==0x26) {
3328 emit_shrcl(temp2==ECX?temp:temp2);
3331 emit_shlcl(temp2==ECX?temp:temp2);
3333 emit_not(temp3,temp3);
3335 emit_mov(temp3,temp);
3338 emit_and(temp,tl,tl);
3339 emit_or(temp2,tl,tl);
3340 //emit_storereg(rt1[i],tl); // DEBUG
3342 //save_regs(0x100f);
3343 emit_readword((int)&last_count,ECX);
3344 if(get_reg(i_regs->regmap,CCREG)<0)
3345 emit_loadreg(CCREG,HOST_CCREG);
3346 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3347 emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG);
3348 emit_writeword(HOST_CCREG,(int)&Count);
3349 emit_call((int)memdebug);
3351 //restore_regs(0x100f);*/
3353 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3355 if((i_regs->wasdirty>>s)&1)
3356 emit_storereg(rs1[i],s);
3357 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3358 if((i_regs->wasdirty>>get_reg(i_regs->regmap,rs1[i]|64))&1)
3359 emit_storereg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
3360 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3362 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3363 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3364 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3365 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3368 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3369 emit_andimm(temp,56,temp);
3371 emit_pushreg(temp2h);
3372 emit_pushreg(temp2);
3375 if(opcode[i]==0x1A) emit_call((int)ldl_merge);
3376 if(opcode[i]==0x1B) emit_call((int)ldr_merge);
3377 emit_addimm(ESP,20,ESP);
3379 if(tl!=EAX) emit_mov(EAX,tl);
3380 if(th!=EDX) emit_mov(EDX,th);
3383 if(th!=EDX) emit_mov(EDX,th);
3384 if(tl!=EAX) emit_mov(EAX,tl);
3388 if(s>=0) emit_loadreg(rs1[i],s);
3389 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3390 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
3394 #define loadlr_assemble loadlr_assemble_x86
3396 void cop0_assemble(int i,struct regstat *i_regs)
3398 if(opcode2[i]==0) // MFC0
3400 signed char t=get_reg(i_regs->regmap,rt1[i]);
3401 char copr=(source[i]>>11)&0x1f;
3402 //assert(t>=0); // Why does this happen? OOT is weird
3404 emit_writeword_imm((int)&fake_pc,(int)&PC);
3405 emit_writebyte_imm((source[i]>>11)&0x1f,(int)&(fake_pc.f.r.nrd));
3407 emit_readword((int)&last_count,ECX);
3408 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3409 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3410 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3411 emit_writeword(HOST_CCREG,(int)&Count);
3413 emit_call((int)MFC0);
3414 emit_readword((int)&readmem_dword,t);
3417 else if(opcode2[i]==4) // MTC0
3419 signed char s=get_reg(i_regs->regmap,rs1[i]);
3420 char copr=(source[i]>>11)&0x1f;
3422 emit_writeword(s,(int)&readmem_dword);
3424 emit_writeword_imm((int)&fake_pc,(int)&PC);
3425 emit_writebyte_imm((source[i]>>11)&0x1f,(int)&(fake_pc.f.r.nrd));
3426 if(copr==9||copr==11||copr==12) {
3427 if(copr==12&&!is_delayslot) {
3428 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
3430 emit_readword((int)&last_count,ECX);
3431 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3432 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3433 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3434 emit_writeword(HOST_CCREG,(int)&Count);
3436 // What a mess. The status register (12) can enable interrupts,
3437 // so needs a special case to handle a pending interrupt.
3438 // The interrupt must be taken immediately, because a subsequent
3439 // instruction might disable interrupts again.
3440 if(copr==12&&!is_delayslot) {
3441 emit_writeword_imm(start+i*4+4,(int)&pcaddr);
3442 emit_writebyte_imm(0,(int)&pending_exception);
3444 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3446 emit_call((int)MTC0);
3447 if(copr==9||copr==11||copr==12) {
3448 emit_readword((int)&Count,HOST_CCREG);
3449 emit_readword((int)&next_interupt,ECX);
3450 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3451 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3452 emit_writeword(ECX,(int)&last_count);
3453 emit_storereg(CCREG,HOST_CCREG);
3457 assert(!is_delayslot);
3458 //if(is_delayslot) output_byte(0xcc);
3459 emit_cmpmem_imm_byte((int)&pending_exception,0);
3460 emit_jne((int)&do_interrupt);
3466 assert(opcode2[i]==0x10);
3467 if((source[i]&0x3f)==0x01) // TLBR
3468 emit_call((int)TLBR);
3469 if((source[i]&0x3f)==0x02) // TLBWI
3470 emit_call((int)TLBWI_new);
3471 if((source[i]&0x3f)==0x06) { // TLBWR
3472 // The TLB entry written by TLBWR is dependent on the count,
3473 // so update the cycle count
3474 emit_readword((int)&last_count,ECX);
3475 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3476 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3477 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3478 emit_writeword(HOST_CCREG,(int)&Count);
3479 emit_call((int)TLBWR_new);
3481 if((source[i]&0x3f)==0x08) // TLBP
3482 emit_call((int)TLBP);
3483 if((source[i]&0x3f)==0x18) // ERET
3486 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3487 emit_addimm_and_set_flags(CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
3488 emit_jmp((int)jump_eret);
3493 void cop1_assemble(int i,struct regstat *i_regs)
3495 // Check cop1 unusable
3497 signed char rs=get_reg(i_regs->regmap,CSREG);
3499 emit_testimm(rs,0x20000000);
3502 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3505 if (opcode2[i]==0) { // MFC1
3506 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3508 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],tl);
3509 emit_readword_indexed(0,tl,tl);
3512 else if (opcode2[i]==1) { // DMFC1
3513 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3514 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
3516 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],tl);
3517 if(th>=0) emit_readword_indexed(4,tl,th);
3518 emit_readword_indexed(0,tl,tl);
3521 else if (opcode2[i]==4) { // MTC1
3522 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3523 signed char temp=get_reg(i_regs->regmap,-1);
3524 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3525 emit_writeword_indexed(sl,0,temp);
3527 else if (opcode2[i]==5) { // DMTC1
3528 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3529 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
3530 signed char temp=get_reg(i_regs->regmap,-1);
3531 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3532 emit_writeword_indexed(sh,4,temp);
3533 emit_writeword_indexed(sl,0,temp);
3535 else if (opcode2[i]==2) // CFC1
3537 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3539 u_int copr=(source[i]>>11)&0x1f;
3540 if(copr==0) emit_readword((int)&FCR0,tl);
3541 if(copr==31) emit_readword((int)&FCR31,tl);
3544 else if (opcode2[i]==6) // CTC1
3546 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3547 u_int copr=(source[i]>>11)&0x1f;
3551 emit_writeword(sl,(int)&FCR31);
3552 // Set the rounding mode
3553 char temp=get_reg(i_regs->regmap,-1);
3554 emit_movimm(3,temp);
3555 emit_and(sl,temp,temp);
3556 emit_fldcw_indexed((int)&rounding_modes,temp);
3561 void fconv_assemble_x86(int i,struct regstat *i_regs)
3563 signed char temp=get_reg(i_regs->regmap,-1);
3565 // Check cop1 unusable
3567 signed char rs=get_reg(i_regs->regmap,CSREG);
3569 emit_testimm(rs,0x20000000);
3572 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3575 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
3576 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3577 emit_movss_load(temp,0);
3578 emit_cvttps2dq(0,0); // float->int, truncate
3579 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3580 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
3581 emit_movd_store(0,temp);
3584 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
3585 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3586 emit_movsd_load(temp,0);
3587 emit_cvttpd2dq(0,0); // double->int, truncate
3588 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
3589 emit_movd_store(0,temp);
3593 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
3594 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3596 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3597 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
3601 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
3602 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3604 emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp);
3608 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) { // cvt_s_l
3609 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3611 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
3615 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) { // cvt_d_l
3616 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3618 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3619 emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp);
3624 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
3625 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3627 emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp);
3631 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
3632 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3634 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
3639 if(opcode2[i]==0x10) { // cvt_*_s
3640 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3643 if(opcode2[i]==0x11) { // cvt_*_d
3644 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3647 if((source[i]&0x3f)<0x10) {
3648 emit_fnstcw_stack();
3649 if((source[i]&3)==0) emit_fldcw((int)&round_mode); //printf("round\n");
3650 if((source[i]&3)==1) emit_fldcw((int)&trunc_mode); //printf("trunc\n");
3651 if((source[i]&3)==2) emit_fldcw((int)&ceil_mode); //printf("ceil\n");
3652 if((source[i]&3)==3) emit_fldcw((int)&floor_mode); //printf("floor\n");
3654 if((source[i]&0x3f)==0x24||(source[i]&0x3c)==0x0c) { // cvt_w_*
3655 if(opcode2[i]!=0x10||((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3656 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
3659 if((source[i]&0x3f)==0x25||(source[i]&0x3c)==0x08) { // cvt_l_*
3660 if(opcode2[i]!=0x11||((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3661 emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp);
3664 if((source[i]&0x3f)<0x10) {
3669 // C emulation code for debugging
3673 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
3674 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3675 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3676 emit_call((int)cvt_s_w);
3678 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
3679 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3680 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3681 emit_call((int)cvt_d_w);
3683 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
3684 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3685 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
3686 emit_call((int)cvt_s_l);
3688 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
3689 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3690 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
3691 emit_call((int)cvt_d_l);
3694 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
3695 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3696 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3697 emit_call((int)cvt_d_s);
3699 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
3700 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3701 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3702 emit_call((int)cvt_w_s);
3704 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
3705 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3706 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3707 emit_call((int)cvt_l_s);
3710 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
3711 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3712 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
3713 emit_call((int)cvt_s_d);
3715 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
3716 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3717 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
3718 emit_call((int)cvt_w_d);
3720 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
3721 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3722 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
3723 emit_call((int)cvt_l_d);
3726 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
3727 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3728 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3729 emit_call((int)round_l_s);
3731 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
3732 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3733 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3734 emit_call((int)trunc_l_s);
3736 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
3737 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3738 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3739 emit_call((int)ceil_l_s);
3741 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
3742 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3743 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3744 emit_call((int)floor_l_s);
3746 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
3747 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3748 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3749 emit_call((int)round_w_s);
3751 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
3752 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3753 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3754 emit_call((int)trunc_w_s);
3756 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
3757 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3758 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3759 emit_call((int)ceil_w_s);
3761 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
3762 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3763 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3764 emit_call((int)floor_w_s);
3767 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
3768 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3769 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
3770 emit_call((int)round_l_d);
3772 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
3773 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3774 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
3775 emit_call((int)trunc_l_d);
3777 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
3778 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3779 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
3780 emit_call((int)ceil_l_d);
3782 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
3783 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
3784 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
3785 emit_call((int)floor_l_d);
3787 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
3788 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3789 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
3790 emit_call((int)round_w_d);
3792 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
3793 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3794 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
3795 emit_call((int)trunc_w_d);
3797 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
3798 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3799 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
3800 emit_call((int)ceil_w_d);
3802 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
3803 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
3804 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
3805 emit_call((int)floor_w_d);
3808 emit_addimm(ESP,8,ESP);
3810 //emit_loadreg(CSREG,rs);
3813 #define fconv_assemble fconv_assemble_x86
3815 void fcomp_assemble(int i,struct regstat *i_regs)
3817 signed char fs=get_reg(i_regs->regmap,FSREG);
3818 signed char temp=get_reg(i_regs->regmap,-1);
3820 // Check cop1 unusable
3822 signed char cs=get_reg(i_regs->regmap,CSREG);
3824 emit_testimm(cs,0x20000000);
3827 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
3831 if((source[i]&0x3f)==0x30) {
3832 emit_andimm(fs,~0x800000,fs);
3836 if((source[i]&0x3e)==0x38) {
3837 // sf/ngle - these should throw exceptions for NaNs
3838 emit_andimm(fs,~0x800000,fs);
3842 if(opcode2[i]==0x10) {
3843 emit_readword((int)®_cop1_simple[(source[i]>>16)&0x1f],temp);
3845 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3847 emit_movimm(0x800000,temp);
3848 emit_or(fs,temp,fs);
3849 emit_xor(temp,fs,temp);
3852 if((source[i]&0x3f)==0x31) emit_cmovnp_reg(temp,fs); // c_un_s
3853 if((source[i]&0x3f)==0x32) {emit_cmovne_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_eq_s
3854 if((source[i]&0x3f)==0x33) emit_cmovne_reg(temp,fs); // c_ueq_s
3855 if((source[i]&0x3f)==0x34) {emit_cmovnc_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_olt_s
3856 if((source[i]&0x3f)==0x35) emit_cmovnc_reg(temp,fs); // c_ult_s
3857 if((source[i]&0x3f)==0x36) {emit_cmova_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_ole_s
3858 if((source[i]&0x3f)==0x37) emit_cmova_reg(temp,fs); // c_ule_s
3859 if((source[i]&0x3f)==0x3a) emit_cmovne_reg(temp,fs); // c_seq_s
3860 if((source[i]&0x3f)==0x3b) emit_cmovne_reg(temp,fs); // c_ngl_s
3861 if((source[i]&0x3f)==0x3c) emit_cmovnc_reg(temp,fs); // c_lt_s
3862 if((source[i]&0x3f)==0x3d) emit_cmovnc_reg(temp,fs); // c_nge_s
3863 if((source[i]&0x3f)==0x3e) emit_cmova_reg(temp,fs); // c_le_s
3864 if((source[i]&0x3f)==0x3f) emit_cmova_reg(temp,fs); // c_ngt_s
3867 if(opcode2[i]==0x11) {
3868 emit_readword((int)®_cop1_double[(source[i]>>16)&0x1f],temp);
3870 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3872 emit_movimm(0x800000,temp);
3873 emit_or(fs,temp,fs);
3874 emit_xor(temp,fs,temp);
3877 if((source[i]&0x3f)==0x31) emit_cmovnp_reg(temp,fs); // c_un_d
3878 if((source[i]&0x3f)==0x32) {emit_cmovne_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_eq_d
3879 if((source[i]&0x3f)==0x33) emit_cmovne_reg(temp,fs); // c_ueq_d
3880 if((source[i]&0x3f)==0x34) {emit_cmovnc_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_olt_d
3881 if((source[i]&0x3f)==0x35) emit_cmovnc_reg(temp,fs); // c_ult_d
3882 if((source[i]&0x3f)==0x36) {emit_cmova_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_ole_d
3883 if((source[i]&0x3f)==0x37) emit_cmova_reg(temp,fs); // c_ule_d
3884 if((source[i]&0x3f)==0x3a) emit_cmovne_reg(temp,fs); // c_seq_d
3885 if((source[i]&0x3f)==0x3b) emit_cmovne_reg(temp,fs); // c_ngl_d
3886 if((source[i]&0x3f)==0x3c) emit_cmovnc_reg(temp,fs); // c_lt_d
3887 if((source[i]&0x3f)==0x3d) emit_cmovnc_reg(temp,fs); // c_nge_d
3888 if((source[i]&0x3f)==0x3e) emit_cmova_reg(temp,fs); // c_le_d
3889 if((source[i]&0x3f)==0x3f) emit_cmova_reg(temp,fs); // c_ngt_d
3894 if(opcode2[i]==0x10) {
3895 emit_pushmem((int)®_cop1_simple[(source[i]>>16)&0x1f]);
3896 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
3897 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
3898 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
3899 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
3900 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
3901 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
3902 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
3903 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
3904 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
3905 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
3906 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
3907 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
3908 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
3909 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
3910 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
3911 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
3912 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
3914 if(opcode2[i]==0x11) {
3915 emit_pushmem((int)®_cop1_double[(source[i]>>16)&0x1f]);
3916 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
3917 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
3918 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
3919 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
3920 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
3921 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
3922 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
3923 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
3924 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
3925 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
3926 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
3927 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
3928 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
3929 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
3930 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
3931 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
3932 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
3934 emit_addimm(ESP,8,ESP);
3936 emit_loadreg(FSREG,fs);
3940 void float_assemble(int i,struct regstat *i_regs)
3942 signed char temp=get_reg(i_regs->regmap,-1);
3944 // Check cop1 unusable
3946 signed char cs=get_reg(i_regs->regmap,CSREG);
3948 emit_testimm(cs,0x20000000);
3951 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
3955 if((source[i]&0x3f)==6) // mov
3957 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3958 if(opcode2[i]==0x10) {
3959 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3961 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
3964 if(opcode2[i]==0x11) {
3965 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3967 emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp);
3974 if((source[i]&0x3f)>3)
3976 if(opcode2[i]==0x10) {
3977 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3979 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3980 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
3983 if(opcode2[i]==0x11) {
3984 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3986 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3987 emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp);
3990 if((source[i]&0x3f)==4) // sqrt
3992 if((source[i]&0x3f)==5) // abs
3994 if((source[i]&0x3f)==7) // neg
3996 if(opcode2[i]==0x10) {
3999 if(opcode2[i]==0x11) {
4004 if((source[i]&0x3f)<4)
4006 if(opcode2[i]==0x10) {
4007 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
4010 if(opcode2[i]==0x11) {
4011 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
4014 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
4015 if(opcode2[i]==0x10) {
4016 emit_readword((int)®_cop1_simple[(source[i]>>16)&0x1f],temp);
4017 if((source[i]&0x3f)==0) emit_fadds(temp);
4018 if((source[i]&0x3f)==1) emit_fsubs(temp);
4019 if((source[i]&0x3f)==2) emit_fmuls(temp);
4020 if((source[i]&0x3f)==3) emit_fdivs(temp);
4022 else if(opcode2[i]==0x11) {
4023 emit_readword((int)®_cop1_double[(source[i]>>16)&0x1f],temp);
4024 if((source[i]&0x3f)==0) emit_faddl(temp);
4025 if((source[i]&0x3f)==1) emit_fsubl(temp);
4026 if((source[i]&0x3f)==2) emit_fmull(temp);
4027 if((source[i]&0x3f)==3) emit_fdivl(temp);
4031 if((source[i]&0x3f)==0) emit_fadd(0);
4032 if((source[i]&0x3f)==1) emit_fsub(0);
4033 if((source[i]&0x3f)==2) emit_fmul(0);
4034 if((source[i]&0x3f)==3) emit_fdiv(0);
4036 if(opcode2[i]==0x10) {
4037 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4038 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
4042 if(opcode2[i]==0x11) {
4043 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4044 emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp);
4051 if(opcode2[i]==0x10) { // Single precision
4053 emit_pushmem((int)®_cop1_simple[(source[i]>> 6)&0x1f]);
4054 if((source[i]&0x3f)<4)
4055 emit_pushmem((int)®_cop1_simple[(source[i]>>16)&0x1f]);
4056 emit_pushmem((int)®_cop1_simple[(source[i]>>11)&0x1f]);
4057 switch(source[i]&0x3f)
4059 case 0x00: emit_call((int)add_s);break;
4060 case 0x01: emit_call((int)sub_s);break;
4061 case 0x02: emit_call((int)mul_s);break;
4062 case 0x03: emit_call((int)div_s);break;
4063 case 0x04: emit_call((int)sqrt_s);break;
4064 case 0x05: emit_call((int)abs_s);break;
4065 case 0x06: emit_call((int)mov_s);break;
4066 case 0x07: emit_call((int)neg_s);break;
4068 emit_addimm(ESP,(source[i]&0x3f)<4?12:8,ESP);
4071 if(opcode2[i]==0x11) { // Double precision
4073 emit_pushmem((int)®_cop1_double[(source[i]>> 6)&0x1f]);
4074 if((source[i]&0x3f)<4)
4075 emit_pushmem((int)®_cop1_double[(source[i]>>16)&0x1f]);
4076 emit_pushmem((int)®_cop1_double[(source[i]>>11)&0x1f]);
4077 switch(source[i]&0x3f)
4079 case 0x00: emit_call((int)add_d);break;
4080 case 0x01: emit_call((int)sub_d);break;
4081 case 0x02: emit_call((int)mul_d);break;
4082 case 0x03: emit_call((int)div_d);break;
4083 case 0x04: emit_call((int)sqrt_d);break;
4084 case 0x05: emit_call((int)abs_d);break;
4085 case 0x06: emit_call((int)mov_d);break;
4086 case 0x07: emit_call((int)neg_d);break;
4088 emit_addimm(ESP,(source[i]&0x3f)<4?12:8,ESP);
4093 void multdiv_assemble_x86(int i,struct regstat *i_regs)
4100 // case 0x1D: DMULTU
4105 if((opcode2[i]&4)==0) // 32-bit
4107 if(opcode2[i]==0x18) // MULT
4109 char m1=get_reg(i_regs->regmap,rs1[i]);
4110 char m2=get_reg(i_regs->regmap,rs2[i]);
4116 if(opcode2[i]==0x19) // MULTU
4118 char m1=get_reg(i_regs->regmap,rs1[i]);
4119 char m2=get_reg(i_regs->regmap,rs2[i]);
4125 if(opcode2[i]==0x1A) // DIV
4127 char d1=get_reg(i_regs->regmap,rs1[i]);
4128 char d2=get_reg(i_regs->regmap,rs2[i]);
4134 emit_jeq((int)out+8);
4137 if(opcode2[i]==0x1B) // DIVU
4139 char d1=get_reg(i_regs->regmap,rs1[i]);
4140 char d2=get_reg(i_regs->regmap,rs2[i]);
4146 emit_jeq((int)out+8);
4152 if(opcode2[i]==0x1C) // DMULT
4154 char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4155 char m1l=get_reg(i_regs->regmap,rs1[i]);
4156 char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4157 char m2l=get_reg(i_regs->regmap,rs2[i]);
4166 emit_call((int)&mult64);
4171 char hih=get_reg(i_regs->regmap,HIREG|64);
4172 char hil=get_reg(i_regs->regmap,HIREG);
4173 if(hih>=0) emit_loadreg(HIREG|64,hih);
4174 if(hil>=0) emit_loadreg(HIREG,hil);
4175 char loh=get_reg(i_regs->regmap,LOREG|64);
4176 char lol=get_reg(i_regs->regmap,LOREG);
4177 if(loh>=0) emit_loadreg(LOREG|64,loh);
4178 if(lol>=0) emit_loadreg(LOREG,lol);
4180 if(opcode2[i]==0x1D) // DMULTU
4182 char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4183 char m1l=get_reg(i_regs->regmap,rs1[i]);
4184 char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4185 char m2l=get_reg(i_regs->regmap,rs2[i]);
4186 char temp=get_reg(i_regs->regmap,-1);
4194 emit_storereg(LOREG,EAX);
4198 emit_add(EAX,temp,temp);
4200 emit_storereg(HIREG,EDX);
4203 emit_add(EAX,temp,temp);
4205 emit_storereg(LOREG|64,temp);
4209 emit_add(EAX,temp,EAX);
4210 emit_loadreg(HIREG,temp);
4212 emit_add(EAX,temp,EAX);
4220 emit_call((int)&multu64);
4225 char hih=get_reg(i_regs->regmap,HIREG|64);
4226 char hil=get_reg(i_regs->regmap,HIREG);
4227 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
4228 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
4230 // Shouldn't be necessary
4231 //char loh=get_reg(i_regs->regmap,LOREG|64);
4232 //char lol=get_reg(i_regs->regmap,LOREG);
4233 //if(loh>=0) emit_loadreg(LOREG|64,loh);
4234 //if(lol>=0) emit_loadreg(LOREG,lol);
4236 if(opcode2[i]==0x1E) // DDIV
4238 char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4239 char d1l=get_reg(i_regs->regmap,rs1[i]);
4240 char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4241 char d2l=get_reg(i_regs->regmap,rs2[i]);
4246 //emit_pushreg(d2h);
4247 //emit_pushreg(d2l);
4248 //emit_pushreg(d1h);
4249 //emit_pushreg(d1l);
4250 emit_addimm(ESP,-16,ESP);
4251 emit_writeword_indexed(d2h,12,ESP);
4252 emit_writeword_indexed(d2l,8,ESP);
4253 emit_writeword_indexed(d1h,4,ESP);
4254 emit_writeword_indexed(d1l,0,ESP);
4255 emit_call((int)&div64);
4260 emit_readword_indexed(0,ESP,d1l);
4261 emit_readword_indexed(4,ESP,d1h);
4262 emit_readword_indexed(8,ESP,d2l);
4263 emit_readword_indexed(12,ESP,d2h);
4264 emit_addimm(ESP,16,ESP);
4265 char hih=get_reg(i_regs->regmap,HIREG|64);
4266 char hil=get_reg(i_regs->regmap,HIREG);
4267 char loh=get_reg(i_regs->regmap,LOREG|64);
4268 char lol=get_reg(i_regs->regmap,LOREG);
4269 if(hih>=0) emit_loadreg(HIREG|64,hih);
4270 if(hil>=0) emit_loadreg(HIREG,hil);
4271 if(loh>=0) emit_loadreg(LOREG|64,loh);
4272 if(lol>=0) emit_loadreg(LOREG,lol);
4274 if(opcode2[i]==0x1F) // DDIVU
4276 char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4277 char d1l=get_reg(i_regs->regmap,rs1[i]);
4278 char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4279 char d2l=get_reg(i_regs->regmap,rs2[i]);
4284 //emit_pushreg(d2h);
4285 //emit_pushreg(d2l);
4286 //emit_pushreg(d1h);
4287 //emit_pushreg(d1l);
4288 emit_addimm(ESP,-16,ESP);
4289 emit_writeword_indexed(d2h,12,ESP);
4290 emit_writeword_indexed(d2l,8,ESP);
4291 emit_writeword_indexed(d1h,4,ESP);
4292 emit_writeword_indexed(d1l,0,ESP);
4293 emit_call((int)&divu64);
4298 emit_readword_indexed(0,ESP,d1l);
4299 emit_readword_indexed(4,ESP,d1h);
4300 emit_readword_indexed(8,ESP,d2l);
4301 emit_readword_indexed(12,ESP,d2h);
4302 emit_addimm(ESP,16,ESP);
4303 char hih=get_reg(i_regs->regmap,HIREG|64);
4304 char hil=get_reg(i_regs->regmap,HIREG);
4305 char loh=get_reg(i_regs->regmap,LOREG|64);
4306 char lol=get_reg(i_regs->regmap,LOREG);
4307 if(hih>=0) emit_loadreg(HIREG|64,hih);
4308 if(hil>=0) emit_loadreg(HIREG,hil);
4309 if(loh>=0) emit_loadreg(LOREG|64,loh);
4310 if(lol>=0) emit_loadreg(LOREG,lol);
4316 // Multiply by zero is zero.
4317 // MIPS does not have a divide by zero exception.
4318 // The result is undefined, we return zero.
4319 char hr=get_reg(i_regs->regmap,HIREG);
4320 char lr=get_reg(i_regs->regmap,LOREG);
4321 if(hr>=0) emit_zeroreg(hr);
4322 if(lr>=0) emit_zeroreg(lr);
4325 #define multdiv_assemble multdiv_assemble_x86
4327 void do_preload_rhash(int r) {
4328 emit_movimm(0xf8,r);
4331 void do_preload_rhtbl(int r) {
4332 // Don't need this for x86
4335 void do_rhash(int rs,int rh) {
4339 void do_miniht_load(int ht,int rh) {
4340 // Don't need this for x86. The load and compare can be combined into
4341 // a single instruction (below)
4344 void do_miniht_jump(int rs,int rh,int ht) {
4345 emit_cmpmem_indexed((int)mini_ht,rh,rs);
4346 emit_jne(jump_vaddr_reg[rs]);
4347 emit_jmpmem_indexed((int)mini_ht+4,rh);
4350 void do_miniht_insert(int return_address,int rt,int temp) {
4351 emit_movimm(return_address,rt); // PC into link register
4352 //emit_writeword_imm(return_address,(int)&mini_ht[(return_address&0xFF)>>8][0]);
4353 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4354 add_to_linker((int)out,return_address,1);
4355 emit_writeword_imm(0,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4358 // We don't need this for x86
4359 void literal_pool(int n) {}
4360 void literal_pool_jumpover(int n) {}
4362 // CPU-architecture-specific initialization, not needed for x86