1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_x64.c *
3 * Copyright (C) 2009-2010 Ari64 *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
24 int pending_exception;
26 uint64_t readmem_dword;
27 precomp_instr fake_pc;
28 u_int memory_map[1048576];
29 u_int mini_ht[32][2] __attribute__((aligned(8)));
30 u_char restore_candidate[512] __attribute__((aligned(4)));
33 void jump_vaddr_eax();
34 void jump_vaddr_ecx();
35 void jump_vaddr_edx();
36 void jump_vaddr_ebx();
37 void jump_vaddr_ebp();
38 void jump_vaddr_edi();
40 const void * jump_vaddr_reg[8] = {
50 const u_short rounding_modes[4] = {
58 // We need these for cmovcc instructions on x86
64 void set_jump_target(int addr,int target)
66 u_char *ptr=(u_char *)addr;
69 assert(ptr[1]>=0x80&&ptr[1]<=0x8f);
70 u_int *ptr2=(u_int *)(ptr+2);
71 *ptr2=target-(int)ptr2-4;
73 else if(*ptr==0xe8||*ptr==0xe9) {
74 u_int *ptr2=(u_int *)(ptr+1);
75 *ptr2=target-(int)ptr2-4;
79 assert(*ptr==0xc7); /* mov immediate (store address) */
80 u_int *ptr2=(u_int *)(ptr+6);
85 void kill_pointer(void *stub)
87 int i_ptr=*((int *)(stub+6));
88 *((int *)i_ptr)=(int)stub-(int)i_ptr-4;
90 int get_pointer(void *stub)
92 int i_ptr=*((int *)(stub+6));
93 return *((int *)i_ptr)+(int)i_ptr+4;
96 // Find the "clean" entry point from a "dirty" entry point
97 // by skipping past the call to verify_code
98 u_int get_clean_addr(int addr)
100 u_char *ptr=(u_char *)addr;
101 assert(ptr[21]==0xE8); // call instruction
102 if(ptr[26]==0xE9) return *(u_int *)(ptr+27)+addr+31; // follow jmp
103 else return(addr+26);
106 int verify_dirty(int addr)
108 u_char *ptr=(u_char *)addr;
109 assert(ptr[0]==0xB8);
110 u_int source=*(u_int *)(ptr+1);
111 u_int copy=*(u_int *)(ptr+6);
112 u_int len=*(u_int *)(ptr+11);
113 //printf("source=%x source-rdram=%x\n",source,source-(int)rdram);
114 assert(ptr[21]==0xE8); // call instruction
115 u_int verifier=*(u_int *)(ptr+22)+(u_int)ptr+26;
116 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
117 unsigned int page=source>>12;
118 unsigned int map_value=memory_map[page];
119 if(map_value>=0x80000000) return 0;
120 while(page<((source+len-1)>>12)) {
121 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
123 source = source+(map_value<<2);
125 //printf("verify_dirty: %x %x %x\n",source,copy,len);
126 return !memcmp((void *)source,(void *)copy,len);
129 // This doesn't necessarily find all clean entry points, just
130 // guarantees that it's not dirty
131 int isclean(int addr)
133 u_char *ptr=(u_char *)addr;
134 if(ptr[0]!=0xB8) return 1; // mov imm,%eax
135 if(ptr[5]!=0xBB) return 1; // mov imm,%ebx
136 if(ptr[10]!=0xB9) return 1; // mov imm,%ecx
137 if(ptr[15]!=0x41) return 1; // rex prefix
138 if(ptr[16]!=0xBC) return 1; // mov imm,%r12d
139 if(ptr[21]!=0xE8) return 1; // call instruction
143 void get_bounds(int addr,u_int *start,u_int *end)
145 u_char *ptr=(u_char *)addr;
146 assert(ptr[0]==0xB8);
147 u_int source=*(u_int *)(ptr+1);
148 //u_int copy=*(u_int *)(ptr+6);
149 u_int len=*(u_int *)(ptr+11);
150 assert(ptr[21]==0xE8); // call instruction
151 u_int verifier=*(u_int *)(ptr+22)+(u_int)ptr+26;
152 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
153 if(memory_map[source>>12]>=0x80000000) source = 0;
154 else source = source+(memory_map[source>>12]<<2);
160 /* Register allocation */
162 // Note: registers are allocated clean (unmodified state)
163 // if you intend to modify the register, you must call dirty_reg().
164 void alloc_reg(struct regstat *cur,int i,signed char reg)
167 int preferred_reg = (reg&3)+(reg>28)*4-(reg==32)+2*(reg==36)-(reg==40);
169 // Don't allocate unused registers
170 if((cur->u>>reg)&1) return;
172 // see if it's already allocated
173 for(hr=0;hr<HOST_REGS;hr++)
175 if(cur->regmap[hr]==reg) return;
178 // Keep the same mapping if the register was already allocated in a loop
179 preferred_reg = loop_reg(i,reg,preferred_reg);
181 // Try to allocate the preferred register
182 if(cur->regmap[preferred_reg]==-1) {
183 cur->regmap[preferred_reg]=reg;
184 cur->dirty&=~(1<<preferred_reg);
185 cur->isconst&=~(1<<preferred_reg);
188 r=cur->regmap[preferred_reg];
189 if(r<64&&((cur->u>>r)&1)) {
190 cur->regmap[preferred_reg]=reg;
191 cur->dirty&=~(1<<preferred_reg);
192 cur->isconst&=~(1<<preferred_reg);
195 if(r>=64&&((cur->uu>>(r&63))&1)) {
196 cur->regmap[preferred_reg]=reg;
197 cur->dirty&=~(1<<preferred_reg);
198 cur->isconst&=~(1<<preferred_reg);
202 // Try to allocate EAX, EBX, ECX, or EDX
203 // We prefer these because they can do byte and halfword loads
204 for(hr=0;hr<4;hr++) {
205 if(cur->regmap[hr]==-1) {
207 cur->dirty&=~(1<<hr);
208 cur->isconst&=~(1<<hr);
213 // Clear any unneeded registers
214 // We try to keep the mapping consistent, if possible, because it
215 // makes branches easier (especially loops). So we try to allocate
216 // first (see above) before removing old mappings. If this is not
217 // possible then go ahead and clear out the registers that are no
219 for(hr=0;hr<HOST_REGS;hr++)
225 if(i==0||(unneeded_reg[i-1]>>r)&1) {cur->regmap[hr]=-1;break;}
229 if((cur->uu>>(r&63))&1)
230 if(i==0||(unneeded_reg_upper[i-1]>>(r&63))&1) {cur->regmap[hr]=-1;break;}
234 // Try to allocate any available register, but prefer
235 // registers that have not been used recently.
237 for(hr=0;hr<HOST_REGS;hr++) {
238 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
239 if(regs[i-1].regmap[hr]!=rs1[i-1]&®s[i-1].regmap[hr]!=rs2[i-1]&®s[i-1].regmap[hr]!=rt1[i-1]&®s[i-1].regmap[hr]!=rt2[i-1]) {
241 cur->dirty&=~(1<<hr);
242 cur->isconst&=~(1<<hr);
248 // Try to allocate any available register
249 for(hr=0;hr<HOST_REGS;hr++) {
250 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
252 cur->dirty&=~(1<<hr);
253 cur->isconst&=~(1<<hr);
258 // Ok, now we have to evict someone
259 // Pick a register we hopefully won't need soon
260 u_char hsn[MAXREG+1];
261 memset(hsn,10,sizeof(hsn));
263 lsn(hsn,i,&preferred_reg);
264 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
266 // Don't evict the cycle count at entry points, otherwise the entry
267 // stub will have to write it.
268 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
269 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
272 // Alloc preferred register if available
273 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
274 for(hr=0;hr<HOST_REGS;hr++) {
275 // Evict both parts of a 64-bit register
276 if((cur->regmap[hr]&63)==r) {
278 cur->dirty&=~(1<<hr);
279 cur->isconst&=~(1<<hr);
282 cur->regmap[preferred_reg]=reg;
285 for(r=1;r<=MAXREG;r++)
287 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
288 for(hr=0;hr<HOST_REGS;hr++) {
289 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
290 if(cur->regmap[hr]==r+64) {
292 cur->dirty&=~(1<<hr);
293 cur->isconst&=~(1<<hr);
298 for(hr=0;hr<HOST_REGS;hr++) {
299 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
300 if(cur->regmap[hr]==r) {
302 cur->dirty&=~(1<<hr);
303 cur->isconst&=~(1<<hr);
314 for(r=1;r<=MAXREG;r++)
317 for(hr=0;hr<HOST_REGS;hr++) {
318 if(cur->regmap[hr]==r+64) {
320 cur->dirty&=~(1<<hr);
321 cur->isconst&=~(1<<hr);
325 for(hr=0;hr<HOST_REGS;hr++) {
326 if(cur->regmap[hr]==r) {
328 cur->dirty&=~(1<<hr);
329 cur->isconst&=~(1<<hr);
336 printf("This shouldn't happen (alloc_reg)");exit(1);
339 void alloc_reg64(struct regstat *cur,int i,signed char reg)
341 int preferred_reg = 5+reg%3;
344 // allocate the lower 32 bits
345 alloc_reg(cur,i,reg);
347 // Don't allocate unused registers
348 if((cur->uu>>reg)&1) return;
350 // see if the upper half is already allocated
351 for(hr=0;hr<HOST_REGS;hr++)
353 if(cur->regmap[hr]==reg+64) return;
356 // Keep the same mapping if the register was already allocated in a loop
357 preferred_reg = loop_reg(i,reg,preferred_reg);
359 // Try to allocate the preferred register
360 if(cur->regmap[preferred_reg]==-1) {
361 cur->regmap[preferred_reg]=reg|64;
362 cur->dirty&=~(1<<preferred_reg);
363 cur->isconst&=~(1<<preferred_reg);
366 r=cur->regmap[preferred_reg];
367 if(r<64&&((cur->u>>r)&1)) {
368 cur->regmap[preferred_reg]=reg|64;
369 cur->dirty&=~(1<<preferred_reg);
370 cur->isconst&=~(1<<preferred_reg);
373 if(r>=64&&((cur->uu>>(r&63))&1)) {
374 cur->regmap[preferred_reg]=reg|64;
375 cur->dirty&=~(1<<preferred_reg);
376 cur->isconst&=~(1<<preferred_reg);
380 // Try to allocate EBP, ESI or EDI
381 for(hr=5;hr<8;hr++) {
382 if(cur->regmap[hr]==-1) {
383 cur->regmap[hr]=reg|64;
384 cur->dirty&=~(1<<hr);
385 cur->isconst&=~(1<<hr);
390 // Clear any unneeded registers
391 // We try to keep the mapping consistent, if possible, because it
392 // makes branches easier (especially loops). So we try to allocate
393 // first (see above) before removing old mappings. If this is not
394 // possible then go ahead and clear out the registers that are no
396 for(hr=HOST_REGS-1;hr>=0;hr--)
401 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
405 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
409 // Try to allocate any available register, but prefer
410 // registers that have not been used recently.
412 for(hr=0;hr<HOST_REGS;hr++) {
413 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
414 if(regs[i-1].regmap[hr]!=rs1[i-1]&®s[i-1].regmap[hr]!=rs2[i-1]&®s[i-1].regmap[hr]!=rt1[i-1]&®s[i-1].regmap[hr]!=rt2[i-1]) {
415 cur->regmap[hr]=reg|64;
416 cur->dirty&=~(1<<hr);
417 cur->isconst&=~(1<<hr);
423 // Try to allocate any available register
424 for(hr=0;hr<HOST_REGS;hr++) {
425 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
426 cur->regmap[hr]=reg|64;
427 cur->dirty&=~(1<<hr);
428 cur->isconst&=~(1<<hr);
433 // Ok, now we have to evict someone
434 // Pick a register we hopefully won't need soon
435 u_char hsn[MAXREG+1];
436 memset(hsn,10,sizeof(hsn));
438 lsn(hsn,i,&preferred_reg);
439 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
440 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
442 // Don't evict the cycle count at entry points, otherwise the entry
443 // stub will have to write it.
444 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
445 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
448 // Alloc preferred register if available
449 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
450 for(hr=0;hr<HOST_REGS;hr++) {
451 // Evict both parts of a 64-bit register
452 if((cur->regmap[hr]&63)==r) {
454 cur->dirty&=~(1<<hr);
455 cur->isconst&=~(1<<hr);
458 cur->regmap[preferred_reg]=reg|64;
461 for(r=1;r<=MAXREG;r++)
463 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
464 for(hr=0;hr<HOST_REGS;hr++) {
465 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
466 if(cur->regmap[hr]==r+64) {
467 cur->regmap[hr]=reg|64;
468 cur->dirty&=~(1<<hr);
469 cur->isconst&=~(1<<hr);
474 for(hr=0;hr<HOST_REGS;hr++) {
475 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
476 if(cur->regmap[hr]==r) {
477 cur->regmap[hr]=reg|64;
478 cur->dirty&=~(1<<hr);
479 cur->isconst&=~(1<<hr);
490 for(r=1;r<=MAXREG;r++)
493 for(hr=0;hr<HOST_REGS;hr++) {
494 if(cur->regmap[hr]==r+64) {
495 cur->regmap[hr]=reg|64;
496 cur->dirty&=~(1<<hr);
497 cur->isconst&=~(1<<hr);
501 for(hr=0;hr<HOST_REGS;hr++) {
502 if(cur->regmap[hr]==r) {
503 cur->regmap[hr]=reg|64;
504 cur->dirty&=~(1<<hr);
505 cur->isconst&=~(1<<hr);
512 printf("This shouldn't happen");exit(1);
515 // Allocate a temporary register. This is done without regard to
516 // dirty status or whether the register we request is on the unneeded list
517 // Note: This will only allocate one register, even if called multiple times
518 void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
521 int preferred_reg = -1;
523 // see if it's already allocated
524 for(hr=0;hr<HOST_REGS;hr++)
526 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
529 // Try to allocate any available register, starting with EDI, ESI, EBP...
530 // We prefer EDI, ESI, EBP since the others are used for byte/halfword stores
531 for(hr=HOST_REGS-1;hr>=0;hr--) {
532 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
534 cur->dirty&=~(1<<hr);
535 cur->isconst&=~(1<<hr);
540 // Find an unneeded register
541 for(hr=HOST_REGS-1;hr>=0;hr--)
547 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
549 cur->dirty&=~(1<<hr);
550 cur->isconst&=~(1<<hr);
557 if((cur->uu>>(r&63))&1) {
558 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
560 cur->dirty&=~(1<<hr);
561 cur->isconst&=~(1<<hr);
569 // Ok, now we have to evict someone
570 // Pick a register we hopefully won't need soon
571 // TODO: we might want to follow unconditional jumps here
572 // TODO: get rid of dupe code and make this into a function
573 u_char hsn[MAXREG+1];
574 memset(hsn,10,sizeof(hsn));
576 lsn(hsn,i,&preferred_reg);
577 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
579 // Don't evict the cycle count at entry points, otherwise the entry
580 // stub will have to write it.
581 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
582 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
585 for(r=1;r<=MAXREG;r++)
587 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
588 for(hr=0;hr<HOST_REGS;hr++) {
589 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
590 if(cur->regmap[hr]==r+64) {
592 cur->dirty&=~(1<<hr);
593 cur->isconst&=~(1<<hr);
598 for(hr=0;hr<HOST_REGS;hr++) {
599 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
600 if(cur->regmap[hr]==r) {
602 cur->dirty&=~(1<<hr);
603 cur->isconst&=~(1<<hr);
614 for(r=1;r<=MAXREG;r++)
617 for(hr=0;hr<HOST_REGS;hr++) {
618 if(cur->regmap[hr]==r+64) {
620 cur->dirty&=~(1<<hr);
621 cur->isconst&=~(1<<hr);
625 for(hr=0;hr<HOST_REGS;hr++) {
626 if(cur->regmap[hr]==r) {
628 cur->dirty&=~(1<<hr);
629 cur->isconst&=~(1<<hr);
636 printf("This shouldn't happen");exit(1);
638 // Allocate a specific x86 register.
639 void alloc_x86_reg(struct regstat *cur,int i,signed char reg,char hr)
643 // see if it's already allocated (and dealloc it)
644 for(n=0;n<HOST_REGS;n++)
646 if(n!=ESP&&cur->regmap[n]==reg) {cur->regmap[n]=-1;}
650 cur->dirty&=~(1<<hr);
651 cur->isconst&=~(1<<hr);
654 // Alloc cycle count into dedicated register
655 alloc_cc(struct regstat *cur,int i)
657 alloc_x86_reg(cur,i,CCREG,ESI);
662 void multdiv_alloc_x86(struct regstat *current,int i)
672 clear_const(current,rs1[i]);
673 clear_const(current,rs2[i]);
676 if((opcode2[i]&4)==0) // 32-bit
678 current->u&=~(1LL<<HIREG);
679 current->u&=~(1LL<<LOREG);
680 alloc_x86_reg(current,i,HIREG,EDX);
681 alloc_x86_reg(current,i,LOREG,EAX);
682 alloc_reg(current,i,rs1[i]);
683 alloc_reg(current,i,rs2[i]);
684 current->is32|=1LL<<HIREG;
685 current->is32|=1LL<<LOREG;
686 dirty_reg(current,HIREG);
687 dirty_reg(current,LOREG);
691 alloc_x86_reg(current,i,HIREG|64,EDX);
692 alloc_x86_reg(current,i,HIREG,EAX);
693 alloc_reg64(current,i,rs1[i]);
694 alloc_reg64(current,i,rs2[i]);
695 alloc_all(current,i);
696 current->is32&=~(1LL<<HIREG);
697 current->is32&=~(1LL<<LOREG);
698 dirty_reg(current,HIREG);
699 dirty_reg(current,LOREG);
704 // Multiply by zero is zero.
705 // MIPS does not have a divide by zero exception.
706 // The result is undefined, we return zero.
707 alloc_reg(current,i,HIREG);
708 alloc_reg(current,i,LOREG);
709 current->is32|=1LL<<HIREG;
710 current->is32|=1LL<<LOREG;
711 dirty_reg(current,HIREG);
712 dirty_reg(current,LOREG);
715 #define multdiv_alloc multdiv_alloc_x86
719 char regname[16][4] = {
737 void output_byte(u_char byte)
741 void output_modrm(u_char mod,u_char rm,u_char ext)
746 u_char byte=(mod<<6)|(ext<<3)|rm;
749 void output_sib(u_char scale,u_char index,u_char base)
754 u_char byte=(scale<<6)|(index<<3)|base;
757 void output_rex(u_char w,u_char r,u_char x,u_char b)
763 u_char byte=0x40|(w<<3)|(r<<2)|(x<<1)|b;
766 void output_w32(u_int word)
768 *((u_int *)out)=word;
772 void emit_mov(int rs,int rt)
774 assem_debug("mov %%%s,%%%s\n",regname[rs],regname[rt]);
776 output_modrm(3,rt,rs);
779 void emit_mov64(int rs,int rt)
781 assem_debug("mov %%%s,%%%s\n",regname[rs],regname[rt]);
782 output_rex(1,0,0,rt>>3);
784 output_modrm(3,rt,rs);
787 void emit_add(int rs1,int rs2,int rt)
790 assem_debug("add %%%s,%%%s\n",regname[rs2],regname[rs1]);
792 output_modrm(3,rs1,rs2);
794 assem_debug("add %%%s,%%%s\n",regname[rs1],regname[rs2]);
796 output_modrm(3,rs2,rs1);
798 assem_debug("lea (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
801 output_modrm(0,4,rt);
802 output_sib(0,rs2,rs1);
804 output_modrm(0,4,rt);
805 output_sib(0,rs1,rs2);
806 }else /* lea 0(,%ebp,2) */{
807 output_modrm(0,4,rt);
814 void emit_adds(int rs1,int rs2,int rt)
816 emit_add(rs1,rs2,rt);
819 void emit_lea8(int rs1,int rt)
821 assem_debug("lea 0(%%%s,8),%%%s\n",regname[rs1],regname[rt]);
823 output_modrm(0,4,rt);
827 void emit_leairrx1(int imm,int rs1,int rs2,int rt)
829 assem_debug("lea %x(%%%s,%%%s,1),%%%s\n",imm,regname[rs1],regname[rs2],regname[rt]);
831 if(imm!=0||rs1==EBP) {
832 output_modrm(2,4,rt);
833 output_sib(0,rs2,rs1);
836 output_modrm(0,4,rt);
837 output_sib(0,rs2,rs1);
840 void emit_leairrx4(int imm,int rs1,int rs2,int rt)
842 assem_debug("lea %x(%%%s,%%%s,4),%%%s\n",imm,regname[rs1],regname[rs2],regname[rt]);
844 if(imm!=0||rs1==EBP) {
845 output_modrm(2,4,rt);
846 output_sib(2,rs2,rs1);
849 output_modrm(0,4,rt);
850 output_sib(2,rs2,rs1);
854 void emit_neg(int rs, int rt)
856 if(rs!=rt) emit_mov(rs,rt);
857 assem_debug("neg %%%s\n",regname[rt]);
859 output_modrm(3,rt,3);
862 void emit_negs(int rs, int rt)
867 void emit_sub(int rs1,int rs2,int rt)
870 assem_debug("sub %%%s,%%%s\n",regname[rs2],regname[rs1]);
872 output_modrm(3,rs1,rs2);
875 emit_add(rs2,rs1,rs2);
882 void emit_subs(int rs1,int rs2,int rt)
884 emit_sub(rs1,rs2,rt);
887 void emit_zeroreg(int rt)
890 output_modrm(3,rt,rt);
891 assem_debug("xor %%%s,%%%s\n",regname[rt],regname[rt]);
894 void emit_loadreg(int r, int hr)
899 int addr=((int)reg)+((r&63)<<3)+((r&64)>>4);
900 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
901 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
902 if(r==CCREG) addr=(int)&cycle_count;
903 if(r==CSREG) addr=(int)&Status;
904 if(r==FSREG) addr=(int)&FCR31;
905 assem_debug("mov %x+%d,%%%s\n",addr,r,regname[hr]);
907 output_modrm(0,5,hr);
908 output_w32(addr-(int)out-4); // Note: rip-relative in 64-bit mode
911 void emit_storereg(int r, int hr)
913 int addr=((int)reg)+((r&63)<<3)+((r&64)>>4);
914 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
915 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
916 if(r==CCREG) addr=(int)&cycle_count;
917 if(r==FSREG) addr=(int)&FCR31;
918 assem_debug("mov %%%s,%x+%d\n",regname[hr],addr,r);
920 output_modrm(0,5,hr);
921 output_w32(addr-(int)out-4); // Note: rip-relative in 64-bit mode
924 void emit_test(int rs, int rt)
926 assem_debug("test %%%s,%%%s\n",regname[rs],regname[rt]);
928 output_modrm(3,rs,rt);
931 void emit_testimm(int rs,int imm)
933 assem_debug("test $0x%x,%%%s\n",imm,regname[rs]);
934 if(imm<128&&imm>=-128&&rs<4) {
936 output_modrm(3,rs,0);
942 output_modrm(3,rs,0);
947 void emit_not(int rs,int rt)
949 if(rs!=rt) emit_mov(rs,rt);
950 assem_debug("not %%%s\n",regname[rt]);
952 output_modrm(3,rt,2);
955 void emit_and(u_int rs1,u_int rs2,u_int rt)
961 assem_debug("and %%%s,%%%s\n",regname[rs2],regname[rt]);
963 output_modrm(3,rs1,rs2);
967 assem_debug("and %%%s,%%%s\n",regname[rs1],regname[rt]);
969 output_modrm(3,rs2,rs1);
977 void emit_or(u_int rs1,u_int rs2,u_int rt)
983 assem_debug("or %%%s,%%%s\n",regname[rs2],regname[rt]);
985 output_modrm(3,rs1,rs2);
989 assem_debug("or %%%s,%%%s\n",regname[rs1],regname[rt]);
991 output_modrm(3,rs2,rs1);
998 void emit_or_and_set_flags(int rs1,int rs2,int rt)
1000 emit_or(rs1,rs2,rt);
1003 void emit_xor(u_int rs1,u_int rs2,u_int rt)
1009 assem_debug("xor %%%s,%%%s\n",regname[rs2],regname[rt]);
1011 output_modrm(3,rs1,rs2);
1015 assem_debug("xor %%%s,%%%s\n",regname[rs1],regname[rt]);
1017 output_modrm(3,rs2,rs1);
1021 emit_xor(rt,rs2,rt);
1025 void emit_movimm(int imm,u_int rt)
1027 assem_debug("mov $%d,%%%s\n",imm,regname[rt]);
1029 if(rt>=8) output_rex(0,0,0,1);
1030 output_byte(0xB8+(rt&7));
1034 void emit_addimm(int rs,int imm,int rt)
1038 assem_debug("add $%d,%%%s\n",imm,regname[rt]);
1039 if(imm<128&&imm>=-128) {
1041 output_modrm(3,rt,0);
1047 output_modrm(3,rt,0);
1054 assem_debug("lea %d(%%%s),%%%s\n",imm,regname[rs],regname[rt]);
1056 if(imm<128&&imm>=-128) {
1057 output_modrm(1,rs,rt);
1060 output_modrm(2,rs,rt);
1069 void emit_addimm64(int rs,int imm,int rt)
1073 assem_debug("add $%d,%%%s\n",imm,regname[rt]);
1074 if(imm<128&&imm>=-128) {
1075 output_rex(1,0,0,rt>>3);
1077 output_modrm(3,rt&7,0);
1082 output_rex(1,0,0,rt>>3);
1084 output_modrm(3,rt&7,0);
1091 assem_debug("lea %d(%%%s),%%%s\n",imm,regname[rs],regname[rt]);
1092 output_rex(1,rt>>3,0,rs>>3);
1094 if(imm<128&&imm>=-128) {
1095 output_modrm(1,rs&7,rt&7);
1098 output_modrm(2,rs&7,rt&7);
1107 void emit_addimm_and_set_flags(int imm,int rt)
1109 assem_debug("add $%d,%%%s\n",imm,regname[rt]);
1110 if(imm<128&&imm>=-128) {
1112 output_modrm(3,rt,0);
1118 output_modrm(3,rt,0);
1122 void emit_addimm_no_flags(int imm,int rt)
1125 assem_debug("lea %d(%%%s),%%%s\n",imm,regname[rt],regname[rt]);
1127 if(imm<128&&imm>=-128) {
1128 output_modrm(1,rt,rt);
1131 output_modrm(2,rt,rt);
1137 void emit_adcimm(int imm,u_int rt)
1139 assem_debug("adc $%d,%%%s\n",imm,regname[rt]);
1141 if(imm<128&&imm>=-128) {
1143 output_modrm(3,rt,2);
1149 output_modrm(3,rt,2);
1153 void emit_sbbimm(int imm,u_int rt)
1155 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1157 if(imm<128&&imm>=-128) {
1159 output_modrm(3,rt,3);
1165 output_modrm(3,rt,3);
1170 void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1172 if(rsh==rth&&rsl==rtl) {
1173 assem_debug("add $%d,%%%s\n",imm,regname[rtl]);
1174 if(imm<128&&imm>=-128) {
1176 output_modrm(3,rtl,0);
1182 output_modrm(3,rtl,0);
1185 assem_debug("adc $%d,%%%s\n",imm>>31,regname[rth]);
1187 output_modrm(3,rth,2);
1188 output_byte(imm>>31);
1193 emit_addimm64_32(rth,rtl,imm,rth,rtl);
1197 void emit_sbb(int rs1,int rs2)
1199 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1201 output_modrm(3,rs1,rs2);
1204 void emit_andimm(int rs,int imm,int rt)
1207 assem_debug("and $%d,%%%s\n",imm,regname[rt]);
1208 if(imm<128&&imm>=-128) {
1210 output_modrm(3,rt,4);
1216 output_modrm(3,rt,4);
1222 emit_andimm(rt,imm,rt);
1226 void emit_orimm(int rs,int imm,int rt)
1229 assem_debug("or $%d,%%%s\n",imm,regname[rt]);
1230 if(imm<128&&imm>=-128) {
1232 output_modrm(3,rt,1);
1238 output_modrm(3,rt,1);
1244 emit_orimm(rt,imm,rt);
1248 void emit_xorimm(int rs,int imm,int rt)
1251 assem_debug("xor $%d,%%%s\n",imm,regname[rt]);
1252 if(imm<128&&imm>=-128) {
1254 output_modrm(3,rt,6);
1260 output_modrm(3,rt,6);
1266 emit_xorimm(rt,imm,rt);
1270 void emit_shlimm(int rs,u_int imm,int rt)
1273 assem_debug("shl %%%s,%d\n",regname[rt],imm);
1275 if(imm==1) output_byte(0xD1);
1276 else output_byte(0xC1);
1277 output_modrm(3,rt,4);
1278 if(imm>1) output_byte(imm);
1282 emit_shlimm(rt,imm,rt);
1286 void emit_shrimm(int rs,u_int imm,int rt)
1289 assem_debug("shr %%%s,%d\n",regname[rt],imm);
1291 if(imm==1) output_byte(0xD1);
1292 else output_byte(0xC1);
1293 output_modrm(3,rt,5);
1294 if(imm>1) output_byte(imm);
1298 emit_shrimm(rt,imm,rt);
1302 void emit_shrimm64(int rs,u_int imm,int rt)
1306 assem_debug("shr %%%s,%d\n",regname[rt],imm);
1308 output_rex(1,0,0,rt>>3);
1309 if(imm==1) output_byte(0xD1);
1310 else output_byte(0xC1);
1311 output_modrm(3,rt,5);
1312 if(imm>1) output_byte(imm);
1316 emit_shrimm(rt,imm,rt);
1320 void emit_sarimm(int rs,u_int imm,int rt)
1323 assem_debug("sar %%%s,%d\n",regname[rt],imm);
1325 if(imm==1) output_byte(0xD1);
1326 else output_byte(0xC1);
1327 output_modrm(3,rt,7);
1328 if(imm>1) output_byte(imm);
1332 emit_sarimm(rt,imm,rt);
1336 void emit_rorimm(int rs,u_int imm,int rt)
1339 assem_debug("ror %%%s,%d\n",regname[rt],imm);
1341 if(imm==1) output_byte(0xD1);
1342 else output_byte(0xC1);
1343 output_modrm(3,rt,1);
1344 if(imm>1) output_byte(imm);
1348 emit_sarimm(rt,imm,rt);
1352 void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1355 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1359 output_modrm(3,rt,rs2);
1364 emit_shldimm(rt,rs2,imm,rt);
1368 void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1371 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1375 output_modrm(3,rt,rs2);
1380 emit_shrdimm(rt,rs2,imm,rt);
1384 void emit_shlcl(int r)
1386 assem_debug("shl %%%s,%%cl\n",regname[r]);
1388 output_modrm(3,r,4);
1390 void emit_shrcl(int r)
1392 assem_debug("shr %%%s,%%cl\n",regname[r]);
1394 output_modrm(3,r,5);
1396 void emit_sarcl(int r)
1398 assem_debug("sar %%%s,%%cl\n",regname[r]);
1400 output_modrm(3,r,7);
1403 void emit_shldcl(int r1,int r2)
1405 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1408 output_modrm(3,r1,r2);
1410 void emit_shrdcl(int r1,int r2)
1412 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1415 output_modrm(3,r1,r2);
1418 void emit_cmpimm(int rs,int imm)
1420 assem_debug("cmp $%d,%%%s\n",imm,regname[rs]);
1421 if(imm<128&&imm>=-128) {
1423 output_modrm(3,rs,7);
1429 output_modrm(3,rs,7);
1434 void emit_cmovne(u_int *addr,int rt)
1436 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1437 if(addr==&const_zero) assem_debug(" [zero]\n");
1438 else if(addr==&const_one) assem_debug(" [one]\n");
1439 else assem_debug("\n");
1442 output_modrm(0,5,rt);
1443 output_w32((int)addr-(int)out-4); // Note: rip-relative in 64-bit mode
1445 void emit_cmovl(u_int *addr,int rt)
1447 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1448 if(addr==&const_zero) assem_debug(" [zero]\n");
1449 else if(addr==&const_one) assem_debug(" [one]\n");
1450 else assem_debug("\n");
1453 output_modrm(0,5,rt);
1454 output_w32((int)addr-(int)out-4); // Note: rip-relative in 64-bit mode
1456 void emit_cmovs(u_int *addr,int rt)
1458 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1459 if(addr==&const_zero) assem_debug(" [zero]\n");
1460 else if(addr==&const_one) assem_debug(" [one]\n");
1461 else assem_debug("\n");
1464 output_modrm(0,5,rt);
1465 output_w32((int)addr-(int)out-4); // Note: rip-relative in 64-bit mode
1467 void emit_cmovne_reg(int rs,int rt)
1469 assem_debug("cmovne %%%s,%%%s\n",regname[rs],regname[rt]);
1472 output_modrm(3,rs,rt);
1474 void emit_cmovl_reg(int rs,int rt)
1476 assem_debug("cmovl %%%s,%%%s\n",regname[rs],regname[rt]);
1479 output_modrm(3,rs,rt);
1481 void emit_cmovs_reg(int rs,int rt)
1483 assem_debug("cmovs %%%s,%%%s\n",regname[rs],regname[rt]);
1486 output_modrm(3,rs,rt);
1488 void emit_cmovnc_reg(int rs,int rt)
1490 assem_debug("cmovae %%%s,%%%s\n",regname[rs],regname[rt]);
1493 output_modrm(3,rs,rt);
1495 void emit_cmova_reg(int rs,int rt)
1497 assem_debug("cmova %%%s,%%%s\n",regname[rs],regname[rt]);
1500 output_modrm(3,rs,rt);
1502 void emit_cmovp_reg(int rs,int rt)
1504 assem_debug("cmovp %%%s,%%%s\n",regname[rs],regname[rt]);
1507 output_modrm(3,rs,rt);
1509 void emit_cmovnp_reg(int rs,int rt)
1511 assem_debug("cmovnp %%%s,%%%s\n",regname[rs],regname[rt]);
1514 output_modrm(3,rs,rt);
1516 void emit_setl(int rt)
1518 assem_debug("setl %%%s\n",regname[rt]);
1521 output_modrm(3,rt,2);
1523 void emit_movzbl_reg(int rs, int rt)
1525 assem_debug("movzbl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1528 output_modrm(3,rs,rt);
1531 void emit_slti32(int rs,int imm,int rt)
1533 if(rs!=rt) emit_zeroreg(rt);
1534 emit_cmpimm(rs,imm);
1537 if(rs==rt) emit_movzbl_reg(rt,rt);
1541 if(rs==rt) emit_movimm(0,rt);
1542 emit_cmovl(&const_one,rt);
1545 void emit_sltiu32(int rs,int imm,int rt)
1547 if(rs!=rt) emit_zeroreg(rt);
1548 emit_cmpimm(rs,imm);
1549 if(rs==rt) emit_movimm(0,rt);
1552 void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1555 emit_slti32(rsl,imm,rt);
1559 emit_cmovne(&const_zero,rt);
1560 emit_cmovs(&const_one,rt);
1564 emit_cmpimm(rsh,-1);
1565 emit_cmovne(&const_zero,rt);
1566 emit_cmovl(&const_one,rt);
1569 void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1572 emit_sltiu32(rsl,imm,rt);
1576 emit_cmovne(&const_zero,rt);
1580 emit_cmpimm(rsh,-1);
1581 emit_cmovne(&const_one,rt);
1585 void emit_cmp(int rs,int rt)
1587 assem_debug("cmp %%%s,%%%s\n",regname[rt],regname[rs]);
1589 output_modrm(3,rs,rt);
1591 void emit_set_gz32(int rs, int rt)
1593 //assem_debug("set_gz32\n");
1596 emit_cmovl(&const_zero,rt);
1598 void emit_set_nz32(int rs, int rt)
1600 //assem_debug("set_nz32\n");
1605 void emit_set_gz64_32(int rsh, int rsl, int rt)
1607 //assem_debug("set_gz64\n");
1608 emit_set_gz32(rsl,rt);
1610 emit_cmovne(&const_one,rt);
1611 emit_cmovs(&const_zero,rt);
1613 void emit_set_nz64_32(int rsh, int rsl, int rt)
1615 //assem_debug("set_nz64\n");
1616 emit_or_and_set_flags(rsh,rsl,rt);
1617 emit_cmovne(&const_one,rt);
1619 void emit_set_if_less32(int rs1, int rs2, int rt)
1621 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1622 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1624 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1625 emit_cmovl(&const_one,rt);
1627 void emit_set_if_carry32(int rs1, int rs2, int rt)
1629 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1630 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1632 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1635 void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1637 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1644 emit_cmovl(&const_one,rt);
1646 void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1648 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1658 void emit_call(int a)
1660 assem_debug("call %x (%x+%x)\n",a,(int)out+5,a-(int)out-5);
1662 output_w32(a-(int)out-4);
1664 void emit_jmp(int a)
1666 assem_debug("jmp %x (%x+%x)\n",a,(int)out+5,a-(int)out-5);
1668 output_w32(a-(int)out-4);
1670 void emit_jne(int a)
1672 assem_debug("jne %x\n",a);
1675 output_w32(a-(int)out-4);
1677 void emit_jeq(int a)
1679 assem_debug("jeq %x\n",a);
1682 output_w32(a-(int)out-4);
1686 assem_debug("js %x\n",a);
1689 output_w32(a-(int)out-4);
1691 void emit_jns(int a)
1693 assem_debug("jns %x\n",a);
1696 output_w32(a-(int)out-4);
1700 assem_debug("jl %x\n",a);
1703 output_w32(a-(int)out-4);
1705 void emit_jge(int a)
1707 assem_debug("jge %x\n",a);
1710 output_w32(a-(int)out-4);
1712 void emit_jno(int a)
1714 assem_debug("jno %x\n",a);
1717 output_w32(a-(int)out-4);
1721 assem_debug("jc %x\n",a);
1724 output_w32(a-(int)out-4);
1727 void emit_pushimm(int imm)
1729 assem_debug("push $%x\n",imm);
1735 // assem_debug("pusha\n");
1736 // output_byte(0x60);
1740 // assem_debug("popa\n");
1741 // output_byte(0x61);
1743 void emit_pushreg(u_int r)
1745 assem_debug("push %%%s\n",regname[r]);
1747 output_byte(0x50+r);
1749 void emit_popreg(u_int r)
1751 assem_debug("pop %%%s\n",regname[r]);
1753 output_byte(0x58+r);
1755 void emit_callreg(u_int r)
1757 assem_debug("call *%%%s\n",regname[r]);
1760 output_modrm(3,r,2);
1762 void emit_jmpreg(u_int r)
1764 assem_debug("jmp *%%%s\n",regname[r]);
1767 output_modrm(3,r,4);
1769 void emit_jmpmem_indexed(u_int addr,u_int r)
1771 assem_debug("jmp *%x(%%%s)\n",addr,regname[r]);
1774 output_modrm(2,r,4);
1778 void emit_readword(int addr, int rt)
1780 assem_debug("mov %x,%%%s\n",addr,regname[rt]);
1782 output_modrm(0,5,rt);
1783 output_w32(addr-(int)out-4); // Note: rip-relative in 64-bit mode
1785 void emit_readword_indexed(int addr, int rs, int rt)
1787 assem_debug("mov %x+%%%s,%%%s\n",addr,regname[rs],regname[rt]);
1789 if(addr<128&&addr>=-128) {
1790 output_modrm(1,rs,rt);
1791 if(rs==ESP) output_sib(0,4,4);
1796 output_modrm(2,rs,rt);
1797 if(rs==ESP) output_sib(0,4,4);
1801 void emit_readword_tlb(int addr, int map, int rt)
1803 if(map<0) emit_readword(addr+(int)rdram-0x80000000, rt);
1806 assem_debug("addr32 mov %x(,%%%s,4),%%%s\n",addr,regname[map],regname[rt]);
1809 output_modrm(0,4,rt);
1810 output_sib(2,map,5);
1814 void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1816 if(map<0) emit_readword_indexed(addr+(int)rdram-0x80000000, rs, rt);
1818 assem_debug("addr32 mov %x(%%%s,%%%s,4),%%%s\n",addr,regname[rs],regname[map],regname[rt]);
1822 if(addr==0&&rs!=EBP) {
1823 output_modrm(0,4,rt);
1824 output_sib(2,map,rs);
1826 else if(addr<128&&addr>=-128) {
1827 output_modrm(1,4,rt);
1828 output_sib(2,map,rs);
1833 output_modrm(2,4,rt);
1834 output_sib(2,map,rs);
1839 void emit_movmem_indexedx4(int addr, int rs, int rt)
1841 assem_debug("mov (%x,%%%s,4),%%%s\n",addr,regname[rs],regname[rt]);
1843 output_modrm(0,4,rt);
1847 void emit_movmem_indexedx4_addr32(int addr, int rs, int rt)
1849 assem_debug("addr32 mov (%x,%%%s,4),%%%s\n",addr,regname[rs],regname[rt]);
1852 output_modrm(0,4,rt);
1856 void emit_movmem_indexedx8(int addr, int rs, int rt)
1858 assem_debug("mov (%x,%%%s,8),%%%s\n",addr,regname[rs],regname[rt]);
1860 output_modrm(0,4,rt);
1864 void emit_readdword_tlb(int addr, int map, int rh, int rl)
1867 if(rh>=0) emit_readword(addr+(int)rdram-0x80000000, rh);
1868 emit_readword(addr+(int)rdram-0x7FFFFFFC, rl);
1871 if(rh>=0) emit_movmem_indexedx4_addr32(addr, map, rh);
1872 emit_movmem_indexedx4_addr32(addr+4, map, rl);
1875 void emit_readdword_indexed(int addr, int rs, int rt)
1877 assem_debug("mov %x+%%%s,%%%s\n",addr,regname[rs],regname[rt]);
1878 output_rex(1,rt>>3,0,rs>>3);
1880 if(addr<128&&addr>=-128) {
1881 output_modrm(1,rs&7,rt&7);
1882 if(rs==ESP) output_sib(0,4,4);
1887 output_modrm(2,rs&7,rt&7);
1888 if(rs==ESP) output_sib(0,4,4);
1892 void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1895 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1896 emit_readword_indexed_tlb(addr+4, rs, map, rl);
1898 void emit_movsbl(int addr, int rt)
1900 assem_debug("movsbl %x,%%%s\n",addr,regname[rt]);
1903 output_modrm(0,5,rt);
1904 output_w32(addr-(int)out-4); // Note: rip-relative in 64-bit mode
1906 void emit_movsbl_indexed(int addr, int rs, int rt)
1908 assem_debug("movsbl %x+%%%s,%%%s\n",addr,regname[rs],regname[rt]);
1911 output_modrm(2,rs,rt);
1914 void emit_movsbl_tlb(int addr, int map, int rt)
1916 if(map<0) emit_movsbl(addr+(int)rdram-0x80000000, rt);
1919 assem_debug("addr32 movsbl %x(,%%%s,4),%%%s\n",addr,regname[map],regname[rt]);
1923 output_modrm(0,4,rt);
1924 output_sib(2,map,5);
1928 void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1930 if(map<0) emit_movsbl_indexed(addr+(int)rdram-0x80000000, rs, rt);
1932 assem_debug("addr32 movsbl %x(%%%s,%%%s,4),%%%s\n",addr,regname[rs],regname[map],regname[rt]);
1937 if(addr==0&&rs!=EBP) {
1938 output_modrm(0,4,rt);
1939 output_sib(2,map,rs);
1941 else if(addr<128&&addr>=-128) {
1942 output_modrm(1,4,rt);
1943 output_sib(2,map,rs);
1948 output_modrm(2,4,rt);
1949 output_sib(2,map,rs);
1954 void emit_movswl(int addr, int rt)
1956 assem_debug("movswl %x,%%%s\n",addr,regname[rt]);
1959 output_modrm(0,5,rt);
1960 output_w32(addr-(int)out-4); // Note: rip-relative in 64-bit mode
1962 void emit_movswl_indexed(int addr, int rs, int rt)
1964 assem_debug("movswl %x+%%%s,%%%s\n",addr,regname[rs],regname[rt]);
1967 output_modrm(2,rs,rt);
1970 void emit_movswl_tlb(int addr, int map, int rt)
1972 if(map<0) emit_movswl(addr+(int)rdram-0x80000000, rt);
1975 assem_debug("addr32 movswl %x(,%%%s,4),%%%s\n",addr,regname[map],regname[rt]);
1979 output_modrm(0,4,rt);
1980 output_sib(2,map,5);
1984 void emit_movzbl(int addr, int rt)
1986 assem_debug("movzbl %x,%%%s\n",addr,regname[rt]);
1989 output_modrm(0,5,rt);
1990 output_w32(addr-(int)out-4); // Note: rip-relative in 64-bit mode
1992 void emit_movzbl_indexed(int addr, int rs, int rt)
1994 assem_debug("movzbl %x+%%%s,%%%s\n",addr,regname[rs],regname[rt]);
1997 output_modrm(2,rs,rt);
2000 void emit_movzbl_tlb(int addr, int map, int rt)
2002 if(map<0) emit_movzbl(addr+(int)rdram-0x80000000, rt);
2005 assem_debug("addr32 movzbl %x(,%%%s,4),%%%s\n",addr,regname[map],regname[rt]);
2009 output_modrm(0,4,rt);
2010 output_sib(2,map,5);
2014 void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
2016 if(map<0) emit_movzbl_indexed(addr+(int)rdram-0x80000000, rs, rt);
2018 assem_debug("addr32 movzbl %x(%%%s,%%%s,4),%%%s\n",addr,regname[rs],regname[map],regname[rt]);
2023 if(addr==0&&rs!=EBP) {
2024 output_modrm(0,4,rt);
2025 output_sib(2,map,rs);
2027 else if(addr<128&&addr>=-128) {
2028 output_modrm(1,4,rt);
2029 output_sib(2,map,rs);
2034 output_modrm(2,4,rt);
2035 output_sib(2,map,rs);
2040 void emit_movzwl(int addr, int rt)
2042 assem_debug("movzwl %x,%%%s\n",addr,regname[rt]);
2045 output_modrm(0,5,rt);
2046 output_w32(addr-(int)out-4); // Note: rip-relative in 64-bit mode
2048 void emit_movzwl_indexed(int addr, int rs, int rt)
2050 assem_debug("movzwl %x+%%%s,%%%s\n",addr,regname[rs],regname[rt]);
2053 output_modrm(2,rs,rt);
2056 void emit_movzwl_tlb(int addr, int map, int rt)
2058 if(map<0) emit_movzwl(addr+(int)rdram-0x80000000, rt);
2061 assem_debug("addr32 movzwl %x(,%%%s,4),%%%s\n",addr,regname[map],regname[rt]);
2065 output_modrm(0,4,rt);
2066 output_sib(2,map,5);
2070 void emit_movzwl_reg(int rs, int rt)
2072 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
2075 output_modrm(3,rs,rt);
2078 void emit_xchg(int rs, int rt)
2080 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
2082 output_byte(0x90+rt);
2087 output_modrm(3,rs,rt);
2090 void emit_writeword(int rt, int addr)
2092 assem_debug("movl %%%s,%x\n",regname[rt],addr);
2094 output_modrm(0,5,rt);
2095 output_w32(addr-(int)out-4); // Note: rip-relative in 64-bit mode
2097 void emit_writeword_indexed(int rt, int addr, int rs)
2099 assem_debug("mov %%%s,%x+%%%s\n",regname[rt],addr,regname[rs]);
2101 if(addr<128&&addr>=-128) {
2102 output_modrm(1,rs,rt);
2103 if(rs==ESP) output_sib(0,4,4);
2108 output_modrm(2,rs,rt);
2109 if(rs==ESP) output_sib(0,4,4);
2113 void emit_writeword_tlb(int rt, int addr, int map)
2116 emit_writeword(rt, addr+(int)rdram-0x80000000);
2118 emit_writeword_indexed(rt, addr, map);
2121 void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2123 if(map<0) emit_writeword_indexed(rt, addr+(int)rdram-0x80000000, rs);
2125 assem_debug("addr32 mov %%%s,%x(%%%s,%%%s,1)\n",regname[rt],addr,regname[rs],regname[map]);
2129 if(addr==0&&rs!=EBP) {
2130 output_modrm(0,4,rt);
2131 output_sib(0,map,rs);
2133 else if(addr<128&&addr>=-128) {
2134 output_modrm(1,4,rt);
2135 output_sib(0,map,rs);
2140 output_modrm(2,4,rt);
2141 output_sib(0,map,rs);
2146 void emit_writedword_tlb(int rh, int rl, int addr, int map)
2150 emit_writeword(rh, addr+(int)rdram-0x80000000);
2151 emit_writeword(rl, addr+(int)rdram-0x7FFFFFFC);
2154 emit_writeword_indexed(rh, addr, map);
2155 emit_writeword_indexed(rl, addr+4, map);
2158 void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
2161 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
2162 emit_writeword_indexed_tlb(rl, addr+4, rs, map, temp);
2164 void emit_writehword(int rt, int addr)
2166 assem_debug("movw %%%s,%x\n",regname[rt]+1,addr);
2169 output_modrm(0,5,rt);
2170 output_w32(addr-(int)out-4); // Note: rip-relative in 64-bit mode
2172 void emit_writehword_indexed(int rt, int addr, int rs)
2174 assem_debug("movw %%%s,%x+%%%s\n",regname[rt]+1,addr,regname[rs]);
2177 if(addr<128&&addr>=-128) {
2178 output_modrm(1,rs,rt);
2183 output_modrm(2,rs,rt);
2187 void emit_writehword_tlb(int rt, int addr, int map)
2190 emit_writehword(rt, addr+(int)rdram-0x80000000);
2192 emit_writehword_indexed(rt, addr, map);
2195 void emit_writebyte(int rt, int addr)
2198 assem_debug("movb %%%cl,%x\n",regname[rt][1],addr);
2200 output_modrm(0,5,rt);
2201 output_w32(addr-(int)out-4); // Note: rip-relative in 64-bit mode
2206 emit_writebyte(EAX,addr);
2210 void emit_writebyte_indexed(int rt, int addr, int rs)
2213 assem_debug("movb %%%cl,%x+%%%s\n",regname[rt][1],addr,regname[rs]);
2215 if(addr<128&&addr>=-128) {
2216 output_modrm(1,rs,rt);
2221 output_modrm(2,rs,rt);
2228 emit_writebyte_indexed(EAX,addr,rs==EAX?rt:rs);
2232 void emit_writebyte_tlb(int rt, int addr, int map)
2235 emit_writebyte(rt, addr+(int)rdram-0x80000000);
2237 emit_writebyte_indexed(rt, addr, map);
2240 void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2242 if(map<0) emit_writebyte_indexed(rt, addr+(int)rdram-0x80000000, rs);
2245 assem_debug("addr32 movb %%%cl,%x(%%%s,%%%s,1)\n",regname[rt][1],addr,regname[rs],regname[map]);
2249 if(addr==0&&rs!=EBP) {
2250 output_modrm(0,4,rt);
2251 output_sib(0,map,rs);
2253 else if(addr<128&&addr>=-128) {
2254 output_modrm(1,4,rt);
2255 output_sib(0,map,rs);
2260 output_modrm(2,4,rt);
2261 output_sib(0,map,rs);
2268 emit_writebyte_indexed_tlb(EAX,addr,rs==EAX?rt:rs,map==EAX?rt:map,temp);
2272 void emit_writeword_imm(int imm, int addr)
2274 assem_debug("movl $%x,%x\n",imm,addr);
2276 output_modrm(0,5,0);
2277 output_w32(addr-(int)out-8); // Note: rip-relative in 64-bit mode
2280 void emit_writeword_imm_esp(int imm, int addr)
2282 assem_debug("mov $%x,%x(%%esp)\n",imm,addr);
2283 assert(addr>=-128&&addr<128);
2285 output_modrm(!!addr,4,0);
2287 if(addr) output_byte(addr);
2290 void emit_writedword_imm32(int imm, int addr)
2292 assem_debug("movq $%x,%x\n",imm,addr);
2293 output_rex(1,0,0,0);
2295 output_modrm(0,5,0);
2296 output_w32(addr-(int)out-8); // Note: rip-relative in 64-bit mode
2297 output_w32(imm); // Note: This 32-bit value will be sign extended
2299 void emit_writebyte_imm(int imm, int addr)
2301 assem_debug("movb $%x,%x\n",imm,addr);
2302 assert(imm>=-128&&imm<128);
2304 output_modrm(0,5,0);
2305 output_w32(addr-(int)out-5); // Note: rip-relative in 64-bit mode
2309 void emit_mul(int rs)
2311 assem_debug("mul %%%s\n",regname[rs]);
2313 output_modrm(3,rs,4);
2315 void emit_imul(int rs)
2317 assem_debug("imul %%%s\n",regname[rs]);
2319 output_modrm(3,rs,5);
2321 void emit_div(int rs)
2323 assem_debug("div %%%s\n",regname[rs]);
2325 output_modrm(3,rs,6);
2327 void emit_idiv(int rs)
2329 assem_debug("idiv %%%s\n",regname[rs]);
2331 output_modrm(3,rs,7);
2335 assem_debug("cdq\n");
2339 // Load 2 immediates optimizing for small code size
2340 void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2342 emit_movimm(imm1,rt1);
2343 if(imm2-imm1<128&&imm2-imm1>=-128) emit_addimm(rt1,imm2-imm1,rt2);
2344 else emit_movimm(imm2,rt2);
2347 // special case for checking pending_exception
2348 void emit_cmpmem_imm_byte(int addr,int imm)
2350 assert(imm<128&&imm>=-127);
2351 assem_debug("cmpb $%d,%x\n",imm,addr);
2353 output_modrm(0,5,7);
2354 output_w32(addr-(int)out-5); // Note: rip-relative in 64-bit mode
2358 // special case for checking invalid_code
2359 void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2361 assert(imm<128&&imm>=-127);
2363 emit_shrimm(r,12,r);
2364 assem_debug("cmp $%d,%x+%%%s\n",imm,addr,regname[r]);
2366 output_modrm(2,r,7);
2371 // special case for checking hash_table
2372 void emit_cmpmem_indexed(int addr,int rs,int rt)
2374 assert(rs>=0&&rs<8);
2375 assert(rt>=0&&rt<8);
2376 assem_debug("cmp %x+%%%s,%%%s\n",addr,regname[rs],regname[rt]);
2378 output_modrm(2,rs,rt);
2382 // special case for checking memory_map in verify_mapping
2383 void emit_cmpmem(int addr,int rt)
2385 assert(rt>=0&&rt<8);
2386 assem_debug("cmp %x,%%%s\n",addr,regname[rt]);
2388 output_modrm(0,5,rt);
2389 output_w32((int)addr-(int)out-4); // Note: rip-relative in 64-bit mode
2392 // Used to preload hash table entries
2393 void emit_prefetch(void *addr)
2395 assem_debug("prefetch %x\n",(int)addr);
2398 output_modrm(0,5,1);
2399 output_w32((int)addr-(int)out-4); // Note: rip-relative in 64-bit mode
2402 /*void emit_submem(int r,int addr)
2405 assem_debug("sub %x,%%%s\n",addr,regname[r]);
2407 output_modrm(0,5,r);
2408 output_w32((int)addr);
2411 void emit_flds(int r)
2413 assem_debug("flds (%%%s)\n",regname[r]);
2415 if(r!=EBP) output_modrm(0,r,0);
2416 else {output_modrm(1,EBP,0);output_byte(0);}
2418 void emit_fldl(int r)
2420 assem_debug("fldl (%%%s)\n",regname[r]);
2422 if(r!=EBP) output_modrm(0,r,0);
2423 else {output_modrm(1,EBP,0);output_byte(0);}
2425 void emit_fucomip(u_int r)
2427 assem_debug("fucomip %d\n",r);
2430 output_byte(0xe8+r);
2434 assem_debug("fchs\n");
2440 assem_debug("fabs\n");
2446 assem_debug("fsqrt\n");
2450 void emit_fadds(int r)
2452 assem_debug("fadds (%%%s)\n",regname[r]);
2454 if(r!=EBP) output_modrm(0,r,0);
2455 else {output_modrm(1,EBP,0);output_byte(0);}
2457 void emit_faddl(int r)
2459 assem_debug("faddl (%%%s)\n",regname[r]);
2461 if(r!=EBP) output_modrm(0,r,0);
2462 else {output_modrm(1,EBP,0);output_byte(0);}
2464 void emit_fadd(int r)
2466 assem_debug("fadd st%d\n",r);
2468 output_byte(0xc0+r);
2470 void emit_fsubs(int r)
2472 assem_debug("fsubs (%%%s)\n",regname[r]);
2474 if(r!=EBP) output_modrm(0,r,4);
2475 else {output_modrm(1,EBP,4);output_byte(0);}
2477 void emit_fsubl(int r)
2479 assem_debug("fsubl (%%%s)\n",regname[r]);
2481 if(r!=EBP) output_modrm(0,r,4);
2482 else {output_modrm(1,EBP,4);output_byte(0);}
2484 void emit_fsub(int r)
2486 assem_debug("fsub st%d\n",r);
2488 output_byte(0xe0+r);
2490 void emit_fmuls(int r)
2492 assem_debug("fmuls (%%%s)\n",regname[r]);
2494 if(r!=EBP) output_modrm(0,r,1);
2495 else {output_modrm(1,EBP,1);output_byte(0);}
2497 void emit_fmull(int r)
2499 assem_debug("fmull (%%%s)\n",regname[r]);
2501 if(r!=EBP) output_modrm(0,r,1);
2502 else {output_modrm(1,EBP,1);output_byte(0);}
2504 void emit_fmul(int r)
2506 assem_debug("fmul st%d\n",r);
2508 output_byte(0xc8+r);
2510 void emit_fdivs(int r)
2512 assem_debug("fdivs (%%%s)\n",regname[r]);
2514 if(r!=EBP) output_modrm(0,r,6);
2515 else {output_modrm(1,EBP,6);output_byte(0);}
2517 void emit_fdivl(int r)
2519 assem_debug("fdivl (%%%s)\n",regname[r]);
2521 if(r!=EBP) output_modrm(0,r,6);
2522 else {output_modrm(1,EBP,6);output_byte(0);}
2524 void emit_fdiv(int r)
2526 assem_debug("fdiv st%d\n",r);
2528 output_byte(0xf0+r);
2533 assem_debug("fpop\n");
2537 void emit_fildl(int r)
2539 assem_debug("fildl (%%%s)\n",regname[r]);
2541 if(r!=EBP) output_modrm(0,r,0);
2542 else {output_modrm(1,EBP,0);output_byte(0);}
2544 void emit_fildll(int r)
2546 assem_debug("fildll (%%%s)\n",regname[r]);
2548 if(r!=EBP) output_modrm(0,r,5);
2549 else {output_modrm(1,EBP,5);output_byte(0);}
2551 void emit_fistpl(int r)
2553 assem_debug("fistpl (%%%s)\n",regname[r]);
2555 if(r!=EBP) output_modrm(0,r,3);
2556 else {output_modrm(1,EBP,3);output_byte(0);}
2558 void emit_fistpll(int r)
2560 assem_debug("fistpll (%%%s)\n",regname[r]);
2562 if(r!=EBP) output_modrm(0,r,7);
2563 else {output_modrm(1,EBP,7);output_byte(0);}
2565 void emit_fstps(int r)
2567 assem_debug("fstps (%%%s)\n",regname[r]);
2569 if(r!=EBP) output_modrm(0,r,3);
2570 else {output_modrm(1,EBP,3);output_byte(0);}
2572 void emit_fstpl(int r)
2574 assem_debug("fstpl (%%%s)\n",regname[r]);
2576 if(r!=EBP) output_modrm(0,r,3);
2577 else {output_modrm(1,EBP,3);output_byte(0);}
2579 void emit_fnstcw_stack()
2581 assem_debug("fnstcw (%%esp)\n");
2583 output_modrm(0,4,7);
2586 void emit_fldcw_stack()
2588 assem_debug("fldcw (%%esp)\n");
2590 output_modrm(0,4,5);
2593 void emit_fldcw_indexed(int addr,int r)
2595 assem_debug("fldcw %x(%%%s)\n",addr,regname[r]);
2597 output_modrm(0,4,5);
2601 void emit_fldcw(int addr)
2603 assem_debug("fldcw %x\n",addr);
2605 output_modrm(0,5,5);
2606 output_w32(addr-(int)out-4); // Note: rip-relative in 64-bit mode
2608 void emit_movss_load(u_int addr,u_int ssereg)
2610 assem_debug("movss (%%%s),xmm%d\n",regname[addr],ssereg);
2615 if(addr!=EBP) output_modrm(0,addr,ssereg);
2616 else {output_modrm(1,EBP,ssereg);output_byte(0);}
2618 void emit_movsd_load(u_int addr,u_int ssereg)
2620 assem_debug("movsd (%%%s),xmm%d\n",regname[addr],ssereg);
2625 if(addr!=EBP) output_modrm(0,addr,ssereg);
2626 else {output_modrm(1,EBP,ssereg);output_byte(0);}
2628 void emit_movd_store(u_int ssereg,u_int addr)
2630 assem_debug("movd xmm%d,(%%%s)\n",ssereg,regname[addr]);
2635 if(addr!=EBP) output_modrm(0,addr,ssereg);
2636 else {output_modrm(1,EBP,ssereg);output_byte(0);}
2638 void emit_cvttps2dq(u_int ssereg1,u_int ssereg2)
2640 assem_debug("cvttps2dq xmm%d,xmm%d\n",ssereg1,ssereg2);
2646 output_modrm(3,ssereg1,ssereg2);
2648 void emit_cvttpd2dq(u_int ssereg1,u_int ssereg2)
2650 assem_debug("cvttpd2dq xmm%d,xmm%d\n",ssereg1,ssereg2);
2656 output_modrm(3,ssereg1,ssereg2);
2659 unsigned int count_bits(u_int reglist)
2670 // Save registers before function call
2671 // This code is executed infrequently so we try to minimize code size
2672 // by pushing registers onto the stack instead of writing them to their
2674 void save_regs(u_int reglist)
2677 int count=count_bits(reglist);
2679 for(hr=0;hr<HOST_REGS;hr++) {
2680 if(hr!=EXCLUDE_REG) {
2681 if((reglist>>hr)&1) {
2687 emit_addimm(ESP,-(8-count)*8,ESP);
2689 // Restore registers after function call
2690 void restore_regs(u_int reglist)
2693 int count=count_bits(reglist);
2694 emit_addimm(ESP,(8-count)*8,ESP);
2696 for(hr=HOST_REGS-1;hr>=0;hr--) {
2697 if(hr!=EXCLUDE_REG) {
2698 if((reglist>>hr)&1) {
2706 /* Stubs/epilogue */
2708 emit_extjump2(int addr, int target, int linker)
2710 u_char *ptr=(u_char *)addr;
2713 assert(ptr[1]>=0x80&&ptr[1]<=0x8f);
2718 assert(*ptr==0xe8||*ptr==0xe9);
2721 emit_movimm(target,EAX);
2722 emit_movimm(addr,EBX);
2723 //assert(addr>=0x7000000&&addr<0x7FFFFFF);
2724 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2726 #ifdef DEBUG_CYCLE_COUNT
2727 emit_readword((int)&last_count,ECX);
2728 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2729 emit_readword((int)&next_interupt,ECX);
2730 emit_writeword(HOST_CCREG,(int)&Count);
2731 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2732 emit_writeword(ECX,(int)&last_count);
2738 emit_extjump(int addr, int target)
2740 emit_extjump2(addr, target, (int)dyna_linker);
2742 emit_extjump_ds(int addr, int target)
2744 emit_extjump2(addr, target, (int)dyna_linker_ds);
2749 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2750 set_jump_target(stubs[n][1],(int)out);
2751 int type=stubs[n][0];
2754 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2755 u_int reglist=stubs[n][7];
2756 signed char *i_regmap=i_regs->regmap;
2757 int addr=get_reg(i_regmap,AGEN1+(i&1));
2760 if(itype[i]==C1LS||itype[i]==LOADLR) {
2761 rth=get_reg(i_regmap,FTEMP|64);
2762 rt=get_reg(i_regmap,FTEMP);
2764 rth=get_reg(i_regmap,rt1[i]|64);
2765 rt=get_reg(i_regmap,rt1[i]);
2772 if(type==LOADB_STUB||type==LOADBU_STUB)
2773 ftable=(int)readmemb;
2774 if(type==LOADH_STUB||type==LOADHU_STUB)
2775 ftable=(int)readmemh;
2776 if(type==LOADW_STUB)
2777 ftable=(int)readmem;
2778 if(type==LOADD_STUB)
2779 ftable=(int)readmemd;
2780 emit_writeword(rs,(int)&address);
2781 emit_shrimm(rs,16,addr);
2782 emit_movmem_indexedx8(ftable,addr,addr);
2784 ds=i_regs!=®s[i];
2785 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2786 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)),i);
2787 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2790 int cc=get_reg(i_regmap,CCREG);
2792 if(addr==HOST_CCREG)
2795 assert(cc!=HOST_CCREG);
2796 assert(temp!=HOST_CCREG);
2797 emit_loadreg(CCREG,cc);
2802 emit_loadreg(CCREG,cc);
2810 emit_readword((int)&last_count,temp);
2811 emit_addimm(cc,CLOCK_DIVIDER*(stubs[n][6]+1),cc);
2812 emit_writeword_imm_esp(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,0);
2813 emit_add(cc,temp,cc);
2814 emit_writeword(cc,(int)&Count);
2816 // We really shouldn't need to update the count here,
2817 // but not doing so causes random crashes...
2818 emit_readword((int)&Count,HOST_CCREG);
2819 emit_readword((int)&next_interupt,ECX);
2820 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*(stubs[n][6]+1),HOST_CCREG);
2821 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2822 emit_writeword(ECX,(int)&last_count);
2823 emit_storereg(CCREG,HOST_CCREG);
2824 restore_regs(reglist);
2825 if((cc=get_reg(i_regmap,CCREG))>=0) {
2826 emit_loadreg(CCREG,cc);
2828 if(type==LOADB_STUB)
2829 emit_movsbl((int)&readmem_dword,rt);
2830 if(type==LOADBU_STUB)
2831 emit_movzbl((int)&readmem_dword,rt);
2832 if(type==LOADH_STUB)
2833 emit_movswl((int)&readmem_dword,rt);
2834 if(type==LOADHU_STUB)
2835 emit_movzwl((int)&readmem_dword,rt);
2836 if(type==LOADW_STUB)
2837 emit_readword((int)&readmem_dword,rt);
2838 if(type==LOADD_STUB) {
2839 emit_readword((int)&readmem_dword,rt);
2840 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2842 emit_jmp(stubs[n][2]); // return address
2845 inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2847 assem_debug("inline_readstub\n");
2848 int rs=get_reg(regmap,target);
2849 int rth=get_reg(regmap,target|64);
2850 int rt=get_reg(regmap,target);
2854 if(type==LOADB_STUB||type==LOADBU_STUB)
2855 ftable=(int)readmemb;
2856 if(type==LOADH_STUB||type==LOADHU_STUB)
2857 ftable=(int)readmemh;
2858 if(type==LOADW_STUB)
2859 ftable=(int)readmem;
2860 if(type==LOADD_STUB)
2861 ftable=(int)readmemd;
2862 #ifdef HOST_IMM_ADDR32
2863 emit_writeword_imm(addr,(int)&address);
2865 emit_writeword(rs,(int)&address);
2868 int cc=get_reg(regmap,CCREG);
2874 assert(cc!=HOST_CCREG);
2875 assert(temp!=HOST_CCREG);
2876 emit_loadreg(CCREG,cc);
2881 emit_loadreg(CCREG,cc);
2889 emit_readword((int)&last_count,temp);
2890 emit_addimm(cc,CLOCK_DIVIDER*(adj+1),cc);
2891 emit_add(cc,temp,cc);
2892 emit_writeword(cc,(int)&Count);
2893 if((signed int)addr>=(signed int)0xC0000000) {
2894 // Pagefault address
2895 int ds=regmap!=regs[i].regmap;
2896 emit_writeword_imm_esp(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,0);
2898 emit_call(((uint64_t *)ftable)[addr>>16]);
2899 // We really shouldn't need to update the count here,
2900 // but not doing so causes random crashes...
2901 emit_readword((int)&Count,HOST_CCREG);
2902 emit_readword((int)&next_interupt,ECX);
2903 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*(adj+1),HOST_CCREG);
2904 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2905 emit_writeword(ECX,(int)&last_count);
2906 emit_storereg(CCREG,HOST_CCREG);
2907 restore_regs(reglist);
2908 if((cc=get_reg(regmap,CCREG))>=0) {
2909 emit_loadreg(CCREG,cc);
2911 if(type==LOADB_STUB)
2912 emit_movsbl((int)&readmem_dword,rt);
2913 if(type==LOADBU_STUB)
2914 emit_movzbl((int)&readmem_dword,rt);
2915 if(type==LOADH_STUB)
2916 emit_movswl((int)&readmem_dword,rt);
2917 if(type==LOADHU_STUB)
2918 emit_movzwl((int)&readmem_dword,rt);
2919 if(type==LOADW_STUB)
2920 emit_readword((int)&readmem_dword,rt);
2921 if(type==LOADD_STUB) {
2922 emit_readword((int)&readmem_dword,rt);
2923 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2929 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2930 set_jump_target(stubs[n][1],(int)out);
2931 int type=stubs[n][0];
2934 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2935 u_int reglist=stubs[n][7];
2936 signed char *i_regmap=i_regs->regmap;
2937 int addr=get_reg(i_regmap,AGEN1+(i&1));
2940 if(itype[i]==C1LS) {
2941 rth=get_reg(i_regmap,FTEMP|64);
2942 rt=get_reg(i_regmap,r=FTEMP);
2944 rth=get_reg(i_regmap,rs2[i]|64);
2945 rt=get_reg(i_regmap,r=rs2[i]);
2949 if(addr<0) addr=get_reg(i_regmap,-1);
2952 if(type==STOREB_STUB)
2953 ftable=(int)writememb;
2954 if(type==STOREH_STUB)
2955 ftable=(int)writememh;
2956 if(type==STOREW_STUB)
2957 ftable=(int)writemem;
2958 if(type==STORED_STUB)
2959 ftable=(int)writememd;
2960 emit_writeword(rs,(int)&address);
2961 emit_shrimm(rs,16,addr);
2962 emit_movmem_indexedx8(ftable,addr,addr);
2963 if(type==STOREB_STUB)
2964 emit_writebyte(rt,(int)&byte);
2965 if(type==STOREH_STUB)
2966 emit_writehword(rt,(int)&hword);
2967 if(type==STOREW_STUB)
2968 emit_writeword(rt,(int)&word);
2969 if(type==STORED_STUB) {
2970 emit_writeword(rt,(int)&dword);
2971 emit_writeword(r?rth:rt,(int)&dword+4);
2974 ds=i_regs!=®s[i];
2975 int real_rs=get_reg(i_regmap,rs1[i]);
2976 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)),i);
2977 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2980 int cc=get_reg(i_regmap,CCREG);
2982 if(addr==HOST_CCREG)
2985 assert(cc!=HOST_CCREG);
2986 assert(temp!=HOST_CCREG);
2987 emit_loadreg(CCREG,cc);
2992 emit_loadreg(CCREG,cc);
3000 emit_readword((int)&last_count,temp);
3001 emit_addimm(cc,CLOCK_DIVIDER*(stubs[n][6]+1),cc);
3002 emit_writeword_imm_esp(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,0);
3003 emit_add(cc,temp,cc);
3004 emit_writeword(cc,(int)&Count);
3006 emit_readword((int)&Count,HOST_CCREG);
3007 emit_readword((int)&next_interupt,ECX);
3008 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*(stubs[n][6]+1),HOST_CCREG);
3009 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3010 emit_writeword(ECX,(int)&last_count);
3011 emit_storereg(CCREG,HOST_CCREG);
3012 restore_regs(reglist);
3013 if((cc=get_reg(i_regmap,CCREG))>=0) {
3014 emit_loadreg(CCREG,cc);
3016 emit_jmp(stubs[n][2]); // return address
3019 inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
3021 assem_debug("inline_writestub\n");
3022 int rs=get_reg(regmap,-1);
3023 int rth=get_reg(regmap,target|64);
3024 int rt=get_reg(regmap,target);
3028 if(type==STOREB_STUB)
3029 ftable=(int)writememb;
3030 if(type==STOREH_STUB)
3031 ftable=(int)writememh;
3032 if(type==STOREW_STUB)
3033 ftable=(int)writemem;
3034 if(type==STORED_STUB)
3035 ftable=(int)writememd;
3036 emit_writeword(rs,(int)&address);
3037 if(type==STOREB_STUB)
3038 emit_writebyte(rt,(int)&byte);
3039 if(type==STOREH_STUB)
3040 emit_writehword(rt,(int)&hword);
3041 if(type==STOREW_STUB)
3042 emit_writeword(rt,(int)&word);
3043 if(type==STORED_STUB) {
3044 emit_writeword(rt,(int)&dword);
3045 emit_writeword(target?rth:rt,(int)&dword+4);
3048 int cc=get_reg(regmap,CCREG);
3054 assert(cc!=HOST_CCREG);
3055 assert(temp!=HOST_CCREG);
3056 emit_loadreg(CCREG,cc);
3061 emit_loadreg(CCREG,cc);
3069 emit_readword((int)&last_count,temp);
3070 emit_addimm(cc,CLOCK_DIVIDER*(adj+1),cc);
3071 emit_add(cc,temp,cc);
3072 emit_writeword(cc,(int)&Count);
3073 if((signed int)addr>=(signed int)0xC0000000) {
3074 // Pagefault address
3075 int ds=regmap!=regs[i].regmap;
3076 emit_writeword_imm_esp(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,0);
3078 emit_call(((uint64_t *)ftable)[addr>>16]);
3079 emit_readword((int)&Count,HOST_CCREG);
3080 emit_readword((int)&next_interupt,ECX);
3081 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*(adj+1),HOST_CCREG);
3082 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3083 emit_writeword(ECX,(int)&last_count);
3084 emit_storereg(CCREG,HOST_CCREG);
3085 restore_regs(reglist);
3086 if((cc=get_reg(regmap,CCREG))>=0) {
3087 emit_loadreg(CCREG,cc);
3091 do_unalignedwritestub(int n)
3093 set_jump_target(stubs[n][1],(int)out);
3095 emit_jmp(stubs[n][2]); // return address
3098 void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
3100 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
3105 u_int reglist=stubs[n][3];
3106 set_jump_target(stubs[n][1],(int)out);
3108 if(stubs[n][4]!=EDI) emit_mov(stubs[n][4],EDI);
3109 emit_call((int)&invalidate_block);
3110 restore_regs(reglist);
3111 emit_jmp(stubs[n][2]); // return address
3114 int do_dirty_stub(int i)
3116 assem_debug("do_dirty_stub %x\n",start+i*4);
3117 emit_movimm((int)start<(int)0xC0000000?(int)source:(int)start,EAX);
3118 emit_movimm((int)copy,EBX);
3119 emit_movimm(slen*4,ECX);
3120 emit_movimm(start+i*4,12);
3121 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3124 if(entry==(int)out) entry=instr_addr[i];
3125 emit_jmp(instr_addr[i]);
3129 void do_dirty_stub_ds()
3131 emit_movimm((int)start<(int)0xC0000000?(int)source:(int)start,EAX);
3132 emit_movimm((int)copy,EBX);
3133 emit_movimm(slen*4,ECX);
3134 emit_movimm(start+1,12);
3135 emit_call((int)&verify_code_ds);
3140 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3141 set_jump_target(stubs[n][1],(int)out);
3144 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3147 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3148 //if(i_regs!=®s[i]) printf("oops: regs[i]=%x i_regs=%x",(int)®s[i],(int)i_regs);
3150 //else {printf("fp exception in delay slot\n");}
3151 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3152 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3153 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3154 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3155 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3160 int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3163 if((signed int)addr>=(signed int)0xC0000000) {
3164 emit_readword((int)(memory_map+(addr>>12)),map);
3167 return -1; // No mapping
3170 if(s!=map) emit_mov(s,map);
3171 emit_shrimm(map,12,map);
3172 // Schedule this while we wait on the load
3173 //if(x) emit_xorimm(addr,x,addr);
3174 if(shift>=0) emit_lea8(s,shift);
3175 if(~a) emit_andimm(s,a,ar);
3176 emit_movmem_indexedx4((int)memory_map,map,map);
3180 int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3182 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3190 int gen_tlb_addr_r(int ar, int map) {
3192 emit_leairrx4(0,ar,map,ar);
3196 int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3199 if(addr<0x80800000||addr>=0xC0000000) {
3200 emit_readword((int)(memory_map+(addr>>12)),map);
3203 return -1; // No mapping
3206 if(s!=map) emit_mov(s,map);
3207 //if(s!=ar) emit_mov(s,ar);
3208 emit_shrimm(map,12,map);
3209 // Schedule this while we wait on the load
3210 //if(x) emit_xorimm(s,x,addr);
3211 emit_movmem_indexedx4((int)memory_map,map,map);
3213 emit_shlimm(map,2,map);
3216 int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3218 if(!c||addr<0x80800000||addr>=0xC0000000) {
3224 int gen_tlb_addr_w(int ar, int map) {
3226 emit_leairrx1(0,ar,map,ar);
3230 // We don't need this for x86
3231 generate_map_const(u_int addr,int reg) {
3232 // void *mapaddr=memory_map+(addr>>12);
3237 void shift_assemble_x86(int i,struct regstat *i_regs)
3240 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3243 t=get_reg(i_regs->regmap,rt1[i]);
3244 s=get_reg(i_regs->regmap,rs1[i]);
3245 shift=get_reg(i_regs->regmap,rs2[i]);
3254 if(s!=t) emit_mov(s,t);
3258 char temp=get_reg(i_regs->regmap,-1);
3260 if(t==ECX&&s!=ECX) {
3261 if(shift!=ECX) emit_mov(shift,ECX);
3262 if(rt1[i]==rs2[i]) {shift=temp;}
3263 if(s!=shift) emit_mov(s,shift);
3267 if(rt1[i]==rs2[i]) {emit_mov(shift,temp);shift=temp;}
3268 if(s!=t) emit_mov(s,t);
3270 if(i_regs->regmap[ECX]<0)
3271 emit_mov(shift,ECX);
3273 emit_xchg(shift,ECX);
3276 if(opcode2[i]==4) // SLLV
3278 emit_shlcl(t==ECX?shift:t);
3280 if(opcode2[i]==6) // SRLV
3282 emit_shrcl(t==ECX?shift:t);
3284 if(opcode2[i]==7) // SRAV
3286 emit_sarcl(t==ECX?shift:t);
3288 if(shift!=ECX&&i_regs->regmap[ECX]>=0) emit_xchg(shift,ECX);
3291 } else { // DSLLV/DSRLV/DSRAV
3292 char sh,sl,th,tl,shift;
3293 th=get_reg(i_regs->regmap,rt1[i]|64);
3294 tl=get_reg(i_regs->regmap,rt1[i]);
3295 sh=get_reg(i_regs->regmap,rs1[i]|64);
3296 sl=get_reg(i_regs->regmap,rs1[i]);
3297 shift=get_reg(i_regs->regmap,rs2[i]);
3302 if(th>=0) emit_zeroreg(th);
3307 if(sl!=tl) emit_mov(sl,tl);
3308 if(th>=0&&sh!=th) emit_mov(sh,th);
3312 // FIXME: What if shift==tl ?
3314 int temp=get_reg(i_regs->regmap,-1);
3316 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3319 if(tl==ECX&&sl!=ECX) {
3320 if(shift!=ECX) emit_mov(shift,ECX);
3321 if(sl!=shift) emit_mov(sl,shift);
3322 if(th>=0 && sh!=th) emit_mov(sh,th);
3324 else if(th==ECX&&sh!=ECX) {
3325 if(shift!=ECX) emit_mov(shift,ECX);
3326 if(sh!=shift) emit_mov(sh,shift);
3327 if(sl!=tl) emit_mov(sl,tl);
3331 if(sl!=tl) emit_mov(sl,tl);
3332 if(th>=0 && sh!=th) emit_mov(sh,th);
3334 if(i_regs->regmap[ECX]<0)
3335 emit_mov(shift,ECX);
3337 emit_xchg(shift,ECX);
3340 if(opcode2[i]==0x14) // DSLLV
3342 if(th>=0) emit_shldcl(th==ECX?shift:th,tl==ECX?shift:tl);
3343 emit_shlcl(tl==ECX?shift:tl);
3344 emit_testimm(ECX,32);
3345 if(th>=0) emit_cmovne_reg(tl==ECX?shift:tl,th==ECX?shift:th);
3346 emit_cmovne(&const_zero,tl==ECX?shift:tl);
3348 if(opcode2[i]==0x16) // DSRLV
3351 emit_shrdcl(tl==ECX?shift:tl,th==ECX?shift:th);
3352 emit_shrcl(th==ECX?shift:th);
3353 emit_testimm(ECX,32);
3354 emit_cmovne_reg(th==ECX?shift:th,tl==ECX?shift:tl);
3355 if(real_th>=0) emit_cmovne(&const_zero,th==ECX?shift:th);
3357 if(opcode2[i]==0x17) // DSRAV
3360 emit_shrdcl(tl==ECX?shift:tl,th==ECX?shift:th);
3363 emit_mov(th==ECX?shift:th,temp==ECX?shift:temp);
3365 emit_sarcl(th==ECX?shift:th);
3366 if(real_th>=0) emit_sarimm(temp==ECX?shift:temp,31,temp==ECX?shift:temp);
3367 emit_testimm(ECX,32);
3368 emit_cmovne_reg(th==ECX?shift:th,tl==ECX?shift:tl);
3369 if(real_th>=0) emit_cmovne_reg(temp==ECX?shift:temp,th==ECX?shift:th);
3371 if(shift!=ECX&&(i_regs->regmap[ECX]>=0||temp==ECX)) emit_xchg(shift,ECX);
3377 #define shift_assemble shift_assemble_x86
3379 void loadlr_assemble_x86(int i,struct regstat *i_regs)
3381 int s,th,tl,temp,temp2,addr,map=-1;
3386 th=get_reg(i_regs->regmap,rt1[i]|64);
3387 tl=get_reg(i_regs->regmap,rt1[i]);
3388 s=get_reg(i_regs->regmap,rs1[i]);
3389 temp=get_reg(i_regs->regmap,-1);
3390 temp2=get_reg(i_regs->regmap,FTEMP);
3391 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3394 for(hr=0;hr<HOST_REGS;hr++) {
3395 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3398 if(offset||s<0||c) addr=temp2;
3401 c=(i_regs->wasconst>>s)&1;
3402 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80800000;
3403 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3410 emit_lea8(addr,temp);
3411 if (opcode[i]==0x22||opcode[i]==0x26) {
3412 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3414 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3416 emit_cmpimm(addr,0x800000);
3421 if (opcode[i]==0x22||opcode[i]==0x26) {
3422 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3424 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3431 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3432 a=0xFFFFFFFC; // LWL/LWR
3434 a=0xFFFFFFF8; // LDL/LDR
3436 map=get_reg(i_regs->regmap,TLREG);
3438 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3440 if (opcode[i]==0x22||opcode[i]==0x26) {
3441 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3443 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3446 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3448 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3450 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3451 emit_readword_indexed_tlb(0,temp2,map,temp2);
3452 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3455 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3456 emit_andimm(temp,24,temp);
3457 if (opcode[i]==0x26) emit_xorimm(temp,24,temp); // LWR
3461 if(temp3==temp2) temp3++;
3462 emit_pushreg(temp3);
3463 emit_movimm(-1,temp3);
3464 if (opcode[i]==0x26) {
3471 emit_mov(temp3,ECX);
3478 if(temp3==temp) temp3++;
3479 if(temp3==temp2) temp3++;
3480 if(temp3==temp) temp3++;
3481 emit_xchg(ECX,temp);
3482 emit_pushreg(temp3);
3483 emit_movimm(-1,temp3);
3484 if (opcode[i]==0x26) {
3486 emit_shrcl(temp2==ECX?temp:temp2);
3489 emit_shlcl(temp2==ECX?temp:temp2);
3491 emit_not(temp3,temp3);
3493 emit_mov(temp3,temp);
3496 emit_and(temp,tl,tl);
3497 emit_or(temp2,tl,tl);
3498 //emit_storereg(rt1[i],tl); // DEBUG
3500 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3502 if((i_regs->wasdirty>>s)&1)
3503 emit_storereg(rs1[i],s);
3504 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3505 if((i_regs->wasdirty>>get_reg(i_regs->regmap,rs1[i]|64))&1)
3506 emit_storereg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
3507 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3509 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3510 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3511 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3512 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3515 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3516 emit_andimm(temp,56,temp);
3517 //output_byte(0xCC);
3518 //emit_pushreg(temp);
3519 //emit_pushreg(temp2h);
3520 //emit_pushreg(temp2);
3523 emit_addimm64(ESP,-20,ESP);
3524 emit_writeword_indexed(temp,16,ESP);
3525 emit_writeword_indexed(temp2h,12,ESP);
3526 emit_writeword_indexed(temp2,8,ESP);
3527 emit_writeword_indexed(th,4,ESP);
3528 emit_writeword_indexed(tl,0,ESP);
3530 emit_readdword_indexed(0,ESP,ARG1_REG);
3531 emit_readdword_indexed(8,ESP,ARG2_REG);
3532 if(opcode[i]==0x1A) emit_call((int)ldl_merge);
3533 if(opcode[i]==0x1B) emit_call((int)ldr_merge);
3534 emit_addimm64(ESP,20,ESP);
3539 emit_shrimm64(th,32,th);
3540 if(s>=0) emit_loadreg(rs1[i],s);
3541 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3542 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
3546 #define loadlr_assemble loadlr_assemble_x86
3548 void cop0_assemble(int i,struct regstat *i_regs)
3550 if(opcode2[i]==0) // MFC0
3552 signed char t=get_reg(i_regs->regmap,rt1[i]);
3553 char copr=(source[i]>>11)&0x1f;
3554 //assert(t>=0); // Why does this happen? OOT is weird
3556 emit_writedword_imm32((int)&fake_pc,(int)&PC);
3557 emit_writebyte_imm((source[i]>>11)&0x1f,(int)&(fake_pc.f.r.nrd));
3559 emit_readword((int)&last_count,ECX);
3560 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3561 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3562 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3563 emit_writeword(HOST_CCREG,(int)&Count);
3565 emit_call((int)MFC0);
3566 emit_readword((int)&readmem_dword,t);
3569 else if(opcode2[i]==4) // MTC0
3571 signed char s=get_reg(i_regs->regmap,rs1[i]);
3572 char copr=(source[i]>>11)&0x1f;
3574 emit_writeword(s,(int)&readmem_dword);
3575 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->was32); // FIXME
3576 emit_writedword_imm32((int)&fake_pc,(int)&PC);
3577 emit_writebyte_imm((source[i]>>11)&0x1f,(int)&(fake_pc.f.r.nrd));
3578 if(copr==9||copr==11||copr==12) {
3579 emit_readword((int)&last_count,ECX);
3580 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3581 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3582 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3583 emit_writeword(HOST_CCREG,(int)&Count);
3585 // What a mess. The status register (12) can enable interrupts,
3586 // so needs a special case to handle a pending interrupt.
3587 // The interrupt must be taken immediately, because a subsequent
3588 // instruction might disable interrupts again.
3589 if(copr==12&&!is_delayslot) {
3590 emit_writeword_imm(start+i*4+4,(int)&pcaddr);
3591 emit_writebyte_imm(0,(int)&pending_exception);
3593 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3595 emit_call((int)MTC0);
3596 if(copr==9||copr==11||copr==12) {
3597 emit_readword((int)&Count,HOST_CCREG);
3598 emit_readword((int)&next_interupt,ECX);
3599 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3600 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3601 emit_writeword(ECX,(int)&last_count);
3602 emit_storereg(CCREG,HOST_CCREG);
3604 emit_loadreg(rs1[i],s);
3606 assert(!is_delayslot);
3607 //if(is_delayslot) output_byte(0xcc);
3608 emit_cmpmem_imm_byte((int)&pending_exception,0);
3609 emit_jne((int)&do_interrupt);
3615 assert(opcode2[i]==0x10);
3616 if((source[i]&0x3f)==0x01) // TLBR
3617 emit_call((int)TLBR);
3618 if((source[i]&0x3f)==0x02) // TLBWI
3619 emit_call((int)TLBWI_new);
3620 if((source[i]&0x3f)==0x06) { // TLBWR
3621 // The TLB entry written by TLBWR is dependent on the count,
3622 // so update the cycle count
3623 emit_readword((int)&last_count,ECX);
3624 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3625 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3626 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3627 emit_writeword(HOST_CCREG,(int)&Count);
3628 emit_call((int)TLBWR_new);
3630 if((source[i]&0x3f)==0x08) // TLBP
3631 emit_call((int)TLBP);
3632 if((source[i]&0x3f)==0x18) // ERET
3635 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3636 emit_addimm_and_set_flags(CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
3637 emit_jmp((int)jump_eret);
3642 void cop1_assemble(int i,struct regstat *i_regs)
3644 // Check cop1 unusable
3646 signed char rs=get_reg(i_regs->regmap,CSREG);
3648 emit_testimm(rs,0x20000000);
3651 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3654 if (opcode2[i]==0) { // MFC1
3655 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3657 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],tl);
3658 emit_readword_indexed(0,tl,tl);
3661 else if (opcode2[i]==1) { // DMFC1
3662 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3663 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
3665 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],tl);
3666 if(th>=0) emit_readword_indexed(4,tl,th);
3667 emit_readword_indexed(0,tl,tl);
3670 else if (opcode2[i]==4) { // MTC1
3671 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3672 signed char temp=get_reg(i_regs->regmap,-1);
3673 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3674 emit_writeword_indexed(sl,0,temp);
3676 else if (opcode2[i]==5) { // DMTC1
3677 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3678 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
3679 signed char temp=get_reg(i_regs->regmap,-1);
3680 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3681 emit_writeword_indexed(sh,4,temp);
3682 emit_writeword_indexed(sl,0,temp);
3684 else if (opcode2[i]==2) // CFC1
3686 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3688 u_int copr=(source[i]>>11)&0x1f;
3689 if(copr==0) emit_readword((int)&FCR0,tl);
3690 if(copr==31) emit_readword((int)&FCR31,tl);
3693 else if (opcode2[i]==6) // CTC1
3695 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3696 u_int copr=(source[i]>>11)&0x1f;
3700 emit_writeword(sl,(int)&FCR31);
3701 // Set the rounding mode
3702 char temp=get_reg(i_regs->regmap,-1);
3703 emit_movimm(3,temp);
3704 emit_and(sl,temp,temp);
3705 emit_fldcw_indexed((int)&rounding_modes,temp);
3710 void fconv_assemble_x86(int i,struct regstat *i_regs)
3712 signed char temp=get_reg(i_regs->regmap,-1);
3714 // Check cop1 unusable
3716 signed char rs=get_reg(i_regs->regmap,CSREG);
3718 emit_testimm(rs,0x20000000);
3721 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3724 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
3725 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3726 emit_movss_load(temp,0);
3727 emit_cvttps2dq(0,0); // float->int, truncate
3728 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3729 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
3730 emit_movd_store(0,temp);
3733 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
3734 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3735 emit_movsd_load(temp,0);
3736 emit_cvttpd2dq(0,0); // double->int, truncate
3737 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
3738 emit_movd_store(0,temp);
3742 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
3743 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3745 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3746 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
3750 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
3751 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3753 emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp);
3757 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) { // cvt_s_l
3758 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3760 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
3764 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) { // cvt_d_l
3765 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3767 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3768 emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp);
3773 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
3774 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3776 emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp);
3780 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
3781 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3783 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
3788 if(opcode2[i]==0x10) { // cvt_*_s
3789 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3792 if(opcode2[i]==0x11) { // cvt_*_d
3793 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3796 if((source[i]&0x3f)<0x10) {
3797 emit_fnstcw_stack();
3798 if((source[i]&3)==0) emit_fldcw((int)&round_mode); //printf("round\n");
3799 if((source[i]&3)==1) emit_fldcw((int)&trunc_mode); //printf("trunc\n");
3800 if((source[i]&3)==2) emit_fldcw((int)&ceil_mode); //printf("ceil\n");
3801 if((source[i]&3)==3) emit_fldcw((int)&floor_mode); //printf("floor\n");
3803 if((source[i]&0x3f)==0x24||(source[i]&0x3c)==0x0c) { // cvt_w_*
3804 if(opcode2[i]!=0x10||((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3805 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
3808 if((source[i]&0x3f)==0x25||(source[i]&0x3c)==0x08) { // cvt_l_*
3809 if(opcode2[i]!=0x11||((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3810 emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp);
3813 if((source[i]&0x3f)<0x10) {
3818 #define fconv_assemble fconv_assemble_x86
3820 void fcomp_assemble(int i,struct regstat *i_regs)
3822 signed char fs=get_reg(i_regs->regmap,FSREG);
3823 signed char temp=get_reg(i_regs->regmap,-1);
3825 // Check cop1 unusable
3827 signed char cs=get_reg(i_regs->regmap,CSREG);
3829 emit_testimm(cs,0x20000000);
3832 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
3836 if((source[i]&0x3f)==0x30) {
3837 emit_andimm(fs,~0x800000,fs);
3841 if((source[i]&0x3e)==0x38) {
3842 // sf/ngle - these should throw exceptions for NaNs
3843 emit_andimm(fs,~0x800000,fs);
3847 if(opcode2[i]==0x10) {
3848 emit_readword((int)®_cop1_simple[(source[i]>>16)&0x1f],temp);
3850 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3852 emit_movimm(0x800000,temp);
3853 emit_or(fs,temp,fs);
3854 emit_xor(temp,fs,temp);
3857 if((source[i]&0x3f)==0x31) emit_cmovnp_reg(temp,fs); // c_un_s
3858 if((source[i]&0x3f)==0x32) {emit_cmovne_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_eq_s
3859 if((source[i]&0x3f)==0x33) emit_cmovne_reg(temp,fs); // c_ueq_s
3860 if((source[i]&0x3f)==0x34) {emit_cmovnc_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_olt_s
3861 if((source[i]&0x3f)==0x35) emit_cmovnc_reg(temp,fs); // c_ult_s
3862 if((source[i]&0x3f)==0x36) {emit_cmova_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_ole_s
3863 if((source[i]&0x3f)==0x37) emit_cmova_reg(temp,fs); // c_ule_s
3864 if((source[i]&0x3f)==0x3a) emit_cmovne_reg(temp,fs); // c_seq_s
3865 if((source[i]&0x3f)==0x3b) emit_cmovne_reg(temp,fs); // c_ngl_s
3866 if((source[i]&0x3f)==0x3c) emit_cmovnc_reg(temp,fs); // c_lt_s
3867 if((source[i]&0x3f)==0x3d) emit_cmovnc_reg(temp,fs); // c_nge_s
3868 if((source[i]&0x3f)==0x3e) emit_cmova_reg(temp,fs); // c_le_s
3869 if((source[i]&0x3f)==0x3f) emit_cmova_reg(temp,fs); // c_ngt_s
3872 if(opcode2[i]==0x11) {
3873 emit_readword((int)®_cop1_double[(source[i]>>16)&0x1f],temp);
3875 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3877 emit_movimm(0x800000,temp);
3878 emit_or(fs,temp,fs);
3879 emit_xor(temp,fs,temp);
3882 if((source[i]&0x3f)==0x31) emit_cmovnp_reg(temp,fs); // c_un_d
3883 if((source[i]&0x3f)==0x32) {emit_cmovne_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_eq_d
3884 if((source[i]&0x3f)==0x33) emit_cmovne_reg(temp,fs); // c_ueq_d
3885 if((source[i]&0x3f)==0x34) {emit_cmovnc_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_olt_d
3886 if((source[i]&0x3f)==0x35) emit_cmovnc_reg(temp,fs); // c_ult_d
3887 if((source[i]&0x3f)==0x36) {emit_cmova_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_ole_d
3888 if((source[i]&0x3f)==0x37) emit_cmova_reg(temp,fs); // c_ule_d
3889 if((source[i]&0x3f)==0x3a) emit_cmovne_reg(temp,fs); // c_seq_d
3890 if((source[i]&0x3f)==0x3b) emit_cmovne_reg(temp,fs); // c_ngl_d
3891 if((source[i]&0x3f)==0x3c) emit_cmovnc_reg(temp,fs); // c_lt_d
3892 if((source[i]&0x3f)==0x3d) emit_cmovnc_reg(temp,fs); // c_nge_d
3893 if((source[i]&0x3f)==0x3e) emit_cmova_reg(temp,fs); // c_le_d
3894 if((source[i]&0x3f)==0x3f) emit_cmova_reg(temp,fs); // c_ngt_d
3899 void float_assemble(int i,struct regstat *i_regs)
3901 signed char temp=get_reg(i_regs->regmap,-1);
3903 // Check cop1 unusable
3905 signed char cs=get_reg(i_regs->regmap,CSREG);
3907 emit_testimm(cs,0x20000000);
3910 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
3914 if((source[i]&0x3f)==6) // mov
3916 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3917 if(opcode2[i]==0x10) {
3918 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3920 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
3923 if(opcode2[i]==0x11) {
3924 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3926 emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp);
3933 if((source[i]&0x3f)>3)
3935 if(opcode2[i]==0x10) {
3936 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3938 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3939 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
3942 if(opcode2[i]==0x11) {
3943 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3945 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3946 emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp);
3949 if((source[i]&0x3f)==4) // sqrt
3951 if((source[i]&0x3f)==5) // abs
3953 if((source[i]&0x3f)==7) // neg
3955 if(opcode2[i]==0x10) {
3958 if(opcode2[i]==0x11) {
3963 if((source[i]&0x3f)<4)
3965 if(opcode2[i]==0x10) {
3966 emit_readword((int)®_cop1_simple[(source[i]>>11)&0x1f],temp);
3969 if(opcode2[i]==0x11) {
3970 emit_readword((int)®_cop1_double[(source[i]>>11)&0x1f],temp);
3973 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
3974 if(opcode2[i]==0x10) {
3975 emit_readword((int)®_cop1_simple[(source[i]>>16)&0x1f],temp);
3976 if((source[i]&0x3f)==0) emit_fadds(temp);
3977 if((source[i]&0x3f)==1) emit_fsubs(temp);
3978 if((source[i]&0x3f)==2) emit_fmuls(temp);
3979 if((source[i]&0x3f)==3) emit_fdivs(temp);
3981 else if(opcode2[i]==0x11) {
3982 emit_readword((int)®_cop1_double[(source[i]>>16)&0x1f],temp);
3983 if((source[i]&0x3f)==0) emit_faddl(temp);
3984 if((source[i]&0x3f)==1) emit_fsubl(temp);
3985 if((source[i]&0x3f)==2) emit_fmull(temp);
3986 if((source[i]&0x3f)==3) emit_fdivl(temp);
3990 if((source[i]&0x3f)==0) emit_fadd(0);
3991 if((source[i]&0x3f)==1) emit_fsub(0);
3992 if((source[i]&0x3f)==2) emit_fmul(0);
3993 if((source[i]&0x3f)==3) emit_fdiv(0);
3995 if(opcode2[i]==0x10) {
3996 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
3997 emit_readword((int)®_cop1_simple[(source[i]>>6)&0x1f],temp);
4001 if(opcode2[i]==0x11) {
4002 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4003 emit_readword((int)®_cop1_double[(source[i]>>6)&0x1f],temp);
4011 void multdiv_assemble_x86(int i,struct regstat *i_regs)
4018 // case 0x1D: DMULTU
4023 if((opcode2[i]&4)==0) // 32-bit
4025 if(opcode2[i]==0x18) // MULT
4027 char m1=get_reg(i_regs->regmap,rs1[i]);
4028 char m2=get_reg(i_regs->regmap,rs2[i]);
4034 if(opcode2[i]==0x19) // MULTU
4036 char m1=get_reg(i_regs->regmap,rs1[i]);
4037 char m2=get_reg(i_regs->regmap,rs2[i]);
4043 if(opcode2[i]==0x1A) // DIV
4045 char d1=get_reg(i_regs->regmap,rs1[i]);
4046 char d2=get_reg(i_regs->regmap,rs2[i]);
4052 emit_jeq((int)out+8);
4055 if(opcode2[i]==0x1B) // DIVU
4057 char d1=get_reg(i_regs->regmap,rs1[i]);
4058 char d2=get_reg(i_regs->regmap,rs2[i]);
4064 emit_jeq((int)out+8);
4070 if(opcode2[i]==0x1C) // DMULT
4072 char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4073 char m1l=get_reg(i_regs->regmap,rs1[i]);
4074 char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4075 char m2l=get_reg(i_regs->regmap,rs2[i]);
4085 emit_call((int)&mult64);
4090 char hih=get_reg(i_regs->regmap,HIREG|64);
4091 char hil=get_reg(i_regs->regmap,HIREG);
4092 if(hih>=0) emit_loadreg(HIREG|64,hih);
4093 if(hil>=0) emit_loadreg(HIREG,hil);
4094 char loh=get_reg(i_regs->regmap,LOREG|64);
4095 char lol=get_reg(i_regs->regmap,LOREG);
4096 if(loh>=0) emit_loadreg(LOREG|64,loh);
4097 if(lol>=0) emit_loadreg(LOREG,lol);
4099 if(opcode2[i]==0x1D) // DMULTU
4101 char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4102 char m1l=get_reg(i_regs->regmap,rs1[i]);
4103 char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4104 char m2l=get_reg(i_regs->regmap,rs2[i]);
4105 char temp=get_reg(i_regs->regmap,-1);
4113 emit_storereg(LOREG,EAX);
4117 emit_add(EAX,temp,temp);
4119 emit_storereg(HIREG,EDX);
4122 emit_add(EAX,temp,temp);
4124 emit_storereg(LOREG|64,temp);
4128 emit_add(EAX,temp,EAX);
4129 emit_loadreg(HIREG,temp);
4131 emit_add(EAX,temp,EAX);
4139 emit_call((int)&multu64);
4144 char hih=get_reg(i_regs->regmap,HIREG|64);
4145 char hil=get_reg(i_regs->regmap,HIREG);
4146 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
4147 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
4149 // Shouldn't be necessary
4150 //char loh=get_reg(i_regs->regmap,LOREG|64);
4151 //char lol=get_reg(i_regs->regmap,LOREG);
4152 //if(loh>=0) emit_loadreg(LOREG|64,loh);
4153 //if(lol>=0) emit_loadreg(LOREG,lol);
4155 if(opcode2[i]==0x1E) // DDIV
4157 char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4158 char d1l=get_reg(i_regs->regmap,rs1[i]);
4159 char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4160 char d2l=get_reg(i_regs->regmap,rs2[i]);
4165 //emit_pushreg(d2h);
4166 //emit_pushreg(d2l);
4167 //emit_pushreg(d1h);
4168 //emit_pushreg(d1l);
4169 emit_addimm64(ESP,-16,ESP);
4170 emit_writeword_indexed(d2h,12,ESP);
4171 emit_writeword_indexed(d2l,8,ESP);
4172 emit_writeword_indexed(d1h,4,ESP);
4173 emit_writeword_indexed(d1l,0,ESP);
4174 emit_readdword_indexed(0,ESP,ARG1_REG);
4175 emit_readdword_indexed(8,ESP,ARG2_REG);
4176 emit_call((int)&div64);
4181 emit_readword_indexed(0,ESP,d1l);
4182 emit_readword_indexed(4,ESP,d1h);
4183 emit_readword_indexed(8,ESP,d2l);
4184 emit_readword_indexed(12,ESP,d2h);
4185 emit_addimm64(ESP,16,ESP);
4186 char hih=get_reg(i_regs->regmap,HIREG|64);
4187 char hil=get_reg(i_regs->regmap,HIREG);
4188 char loh=get_reg(i_regs->regmap,LOREG|64);
4189 char lol=get_reg(i_regs->regmap,LOREG);
4190 if(hih>=0) emit_loadreg(HIREG|64,hih);
4191 if(hil>=0) emit_loadreg(HIREG,hil);
4192 if(loh>=0) emit_loadreg(LOREG|64,loh);
4193 if(lol>=0) emit_loadreg(LOREG,lol);
4195 if(opcode2[i]==0x1F) // DDIVU
4197 char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4198 char d1l=get_reg(i_regs->regmap,rs1[i]);
4199 char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4200 char d2l=get_reg(i_regs->regmap,rs2[i]);
4205 //emit_pushreg(d2h);
4206 //emit_pushreg(d2l);
4207 //emit_pushreg(d1h);
4208 //emit_pushreg(d1l);
4209 emit_addimm64(ESP,-16,ESP);
4210 emit_writeword_indexed(d2h,12,ESP);
4211 emit_writeword_indexed(d2l,8,ESP);
4212 emit_writeword_indexed(d1h,4,ESP);
4213 emit_writeword_indexed(d1l,0,ESP);
4214 emit_readdword_indexed(0,ESP,ARG1_REG);
4215 emit_readdword_indexed(8,ESP,ARG2_REG);
4216 emit_call((int)&divu64);
4221 emit_readword_indexed(0,ESP,d1l);
4222 emit_readword_indexed(4,ESP,d1h);
4223 emit_readword_indexed(8,ESP,d2l);
4224 emit_readword_indexed(12,ESP,d2h);
4225 emit_addimm64(ESP,16,ESP);
4226 char hih=get_reg(i_regs->regmap,HIREG|64);
4227 char hil=get_reg(i_regs->regmap,HIREG);
4228 char loh=get_reg(i_regs->regmap,LOREG|64);
4229 char lol=get_reg(i_regs->regmap,LOREG);
4230 if(hih>=0) emit_loadreg(HIREG|64,hih);
4231 if(hil>=0) emit_loadreg(HIREG,hil);
4232 if(loh>=0) emit_loadreg(LOREG|64,loh);
4233 if(lol>=0) emit_loadreg(LOREG,lol);
4239 // Multiply by zero is zero.
4240 // MIPS does not have a divide by zero exception.
4241 // The result is undefined, we return zero.
4242 char hr=get_reg(i_regs->regmap,HIREG);
4243 char lr=get_reg(i_regs->regmap,LOREG);
4244 if(hr>=0) emit_zeroreg(hr);
4245 if(lr>=0) emit_zeroreg(lr);
4248 #define multdiv_assemble multdiv_assemble_x86
4250 void do_preload_rhash(int r) {
4251 emit_movimm(0xf8,r);
4254 void do_preload_rhtbl(int r) {
4255 // Don't need this for x86
4258 void do_rhash(int rs,int rh) {
4262 void do_miniht_load(int ht,int rh) {
4263 // Don't need this for x86. The load and compare can be combined into
4264 // a single instruction (below)
4267 void do_miniht_jump(int rs,int rh,int ht) {
4268 emit_cmpmem_indexed((int)mini_ht,rh,rs);
4269 emit_jne(jump_vaddr_reg[rs]);
4270 emit_readword_indexed((int)mini_ht+4,rh,rh);
4274 void do_miniht_insert(int return_address,int rt,int temp) {
4275 emit_movimm(return_address,rt); // PC into link register
4276 //emit_writeword_imm(return_address,(int)&mini_ht[(return_address&0xFF)>>8][0]);
4277 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4278 add_to_linker((int)out,return_address,1);
4279 emit_writeword_imm(0,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4282 // We don't need this for x86
4283 void literal_pool(int n) {}
4284 void literal_pool_jumpover(int n) {}
4286 // CPU-architecture-specific initialization, not needed for x86