fix some alignment issues
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_x86.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_x86.c *
3 * Copyright (C) 2009-2010 Ari64 *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21int cycle_count;
22int last_count;
23int pcaddr;
24int pending_exception;
25int branch_target;
26uint64_t readmem_dword;
27precomp_instr fake_pc;
28u_int memory_map[1048576];
29u_int mini_ht[32][2] __attribute__((aligned(8)));
30u_char restore_candidate[512] __attribute__((aligned(4)));
31
32void do_interrupt();
33void jump_vaddr_eax();
34void jump_vaddr_ecx();
35void jump_vaddr_edx();
36void jump_vaddr_ebx();
37void jump_vaddr_ebp();
38void jump_vaddr_edi();
39
40const u_int jump_vaddr_reg[8] = {
41 (int)jump_vaddr_eax,
42 (int)jump_vaddr_ecx,
43 (int)jump_vaddr_edx,
44 (int)jump_vaddr_ebx,
45 0,
46 (int)jump_vaddr_ebp,
47 0,
48 (int)jump_vaddr_edi };
49
50const u_short rounding_modes[4] = {
51 0x33F, // round
52 0xF3F, // trunc
53 0xB3F, // ceil
54 0x73F};// floor
55
56#include "fpu.h"
57
58// We need these for cmovcc instructions on x86
59u_int const_zero=0;
60u_int const_one=1;
61
62/* Linker */
63
64void set_jump_target(int addr,int target)
65{
66 u_char *ptr=(u_char *)addr;
67 if(*ptr==0x0f)
68 {
69 assert(ptr[1]>=0x80&&ptr[1]<=0x8f);
70 u_int *ptr2=(u_int *)(ptr+2);
71 *ptr2=target-(int)ptr2-4;
72 }
73 else if(*ptr==0xe8||*ptr==0xe9) {
74 u_int *ptr2=(u_int *)(ptr+1);
75 *ptr2=target-(int)ptr2-4;
76 }
77 else
78 {
79 assert(*ptr==0xc7); /* mov immediate (store address) */
80 u_int *ptr2=(u_int *)(ptr+6);
81 *ptr2=target;
82 }
83}
84
85void kill_pointer(void *stub)
86{
87 int *i_ptr=*((int **)(stub+6));
88 *i_ptr=(int)stub-(int)i_ptr-4;
89}
90int get_pointer(void *stub)
91{
92 int *i_ptr=*((int **)(stub+6));
93 return *i_ptr+(int)i_ptr+4;
94}
95
96// Find the "clean" entry point from a "dirty" entry point
97// by skipping past the call to verify_code
98u_int get_clean_addr(int addr)
99{
100 u_char *ptr=(u_char *)addr;
101 assert(ptr[20]==0xE8); // call instruction
102 assert(ptr[25]==0x83); // pop (add esp,4) instruction
103 if(ptr[28]==0xE9) return *(u_int *)(ptr+29)+addr+33; // follow jmp
104 else return(addr+28);
105}
106
107int verify_dirty(int addr)
108{
109 u_char *ptr=(u_char *)addr;
110 assert(ptr[5]==0xB8);
111 u_int source=*(u_int *)(ptr+6);
112 u_int copy=*(u_int *)(ptr+11);
113 u_int len=*(u_int *)(ptr+16);
114 assert(ptr[20]==0xE8); // call instruction
115 u_int verifier=*(u_int *)(ptr+21)+(u_int)ptr+25;
116 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
117 unsigned int page=source>>12;
118 unsigned int map_value=memory_map[page];
119 if(map_value>=0x80000000) return 0;
120 while(page<((source+len-1)>>12)) {
121 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
122 }
123 source = source+(map_value<<2);
124 }
125 //printf("verify_dirty: %x %x %x\n",source,copy,len);
126 return !memcmp((void *)source,(void *)copy,len);
127}
128
129// This doesn't necessarily find all clean entry points, just
130// guarantees that it's not dirty
131int isclean(int addr)
132{
133 u_char *ptr=(u_char *)addr;
134 if(ptr[5]!=0xB8) return 1; // mov imm,%eax
135 if(ptr[10]!=0xBB) return 1; // mov imm,%ebx
136 if(ptr[15]!=0xB9) return 1; // mov imm,%ecx
137 if(ptr[20]!=0xE8) return 1; // call instruction
138 if(ptr[25]!=0x83) return 1; // pop (add esp,4) instruction
139 return 0;
140}
141
142void get_bounds(int addr,u_int *start,u_int *end)
143{
144 u_char *ptr=(u_char *)addr;
145 assert(ptr[5]==0xB8);
146 u_int source=*(u_int *)(ptr+6);
147 //u_int copy=*(u_int *)(ptr+11);
148 u_int len=*(u_int *)(ptr+16);
149 assert(ptr[20]==0xE8); // call instruction
150 u_int verifier=*(u_int *)(ptr+21)+(u_int)ptr+25;
151 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
152 if(memory_map[source>>12]>=0x80000000) source = 0;
153 else source = source+(memory_map[source>>12]<<2);
154 }
155 if(start) *start=source;
156 if(end) *end=source+len;
157}
158
159/* Register allocation */
160
161// Note: registers are allocated clean (unmodified state)
162// if you intend to modify the register, you must call dirty_reg().
163void alloc_reg(struct regstat *cur,int i,signed char reg)
164{
165 int r,hr;
166 int preferred_reg = (reg&3)+(reg>28)*4-(reg==32)+2*(reg==36)-(reg==40);
167
168 // Don't allocate unused registers
169 if((cur->u>>reg)&1) return;
170
171 // see if it's already allocated
172 for(hr=0;hr<HOST_REGS;hr++)
173 {
174 if(cur->regmap[hr]==reg) return;
175 }
176
177 // Keep the same mapping if the register was already allocated in a loop
178 preferred_reg = loop_reg(i,reg,preferred_reg);
179
180 // Try to allocate the preferred register
181 if(cur->regmap[preferred_reg]==-1) {
182 cur->regmap[preferred_reg]=reg;
183 cur->dirty&=~(1<<preferred_reg);
184 cur->isconst&=~(1<<preferred_reg);
185 return;
186 }
187 r=cur->regmap[preferred_reg];
188 if(r<64&&((cur->u>>r)&1)) {
189 cur->regmap[preferred_reg]=reg;
190 cur->dirty&=~(1<<preferred_reg);
191 cur->isconst&=~(1<<preferred_reg);
192 return;
193 }
194 if(r>=64&&((cur->uu>>(r&63))&1)) {
195 cur->regmap[preferred_reg]=reg;
196 cur->dirty&=~(1<<preferred_reg);
197 cur->isconst&=~(1<<preferred_reg);
198 return;
199 }
200
201 // Try to allocate EAX, EBX, ECX, or EDX
202 // We prefer these because they can do byte and halfword loads
203 for(hr=0;hr<4;hr++) {
204 if(cur->regmap[hr]==-1) {
205 cur->regmap[hr]=reg;
206 cur->dirty&=~(1<<hr);
207 cur->isconst&=~(1<<hr);
208 return;
209 }
210 }
211
212 // Clear any unneeded registers
213 // We try to keep the mapping consistent, if possible, because it
214 // makes branches easier (especially loops). So we try to allocate
215 // first (see above) before removing old mappings. If this is not
216 // possible then go ahead and clear out the registers that are no
217 // longer needed.
218 for(hr=0;hr<HOST_REGS;hr++)
219 {
220 r=cur->regmap[hr];
221 if(r>=0) {
222 if(r<64) {
223 if((cur->u>>r)&1)
224 if(i==0||(unneeded_reg[i-1]>>r)&1) {cur->regmap[hr]=-1;break;}
225 }
226 else
227 {
228 if((cur->uu>>(r&63))&1)
229 if(i==0||(unneeded_reg_upper[i-1]>>(r&63))&1) {cur->regmap[hr]=-1;break;}
230 }
231 }
232 }
233 // Try to allocate any available register, but prefer
234 // registers that have not been used recently.
235 if(i>0) {
236 for(hr=0;hr<HOST_REGS;hr++) {
237 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
238 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
239 cur->regmap[hr]=reg;
240 cur->dirty&=~(1<<hr);
241 cur->isconst&=~(1<<hr);
242 return;
243 }
244 }
245 }
246 }
247 // Try to allocate any available register
248 for(hr=0;hr<HOST_REGS;hr++) {
249 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
250 cur->regmap[hr]=reg;
251 cur->dirty&=~(1<<hr);
252 cur->isconst&=~(1<<hr);
253 return;
254 }
255 }
256
257 // Ok, now we have to evict someone
258 // Pick a register we hopefully won't need soon
259 u_char hsn[MAXREG+1];
260 memset(hsn,10,sizeof(hsn));
261 int j;
262 lsn(hsn,i,&preferred_reg);
263 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
264 if(i>0) {
265 // Don't evict the cycle count at entry points, otherwise the entry
266 // stub will have to write it.
267 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
268 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
269 for(j=10;j>=3;j--)
270 {
271 // Alloc preferred register if available
272 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
273 for(hr=0;hr<HOST_REGS;hr++) {
274 // Evict both parts of a 64-bit register
275 if((cur->regmap[hr]&63)==r) {
276 cur->regmap[hr]=-1;
277 cur->dirty&=~(1<<hr);
278 cur->isconst&=~(1<<hr);
279 }
280 }
281 cur->regmap[preferred_reg]=reg;
282 return;
283 }
284 for(r=1;r<=MAXREG;r++)
285 {
286 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
287 for(hr=0;hr<HOST_REGS;hr++) {
288 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
289 if(cur->regmap[hr]==r+64) {
290 cur->regmap[hr]=reg;
291 cur->dirty&=~(1<<hr);
292 cur->isconst&=~(1<<hr);
293 return;
294 }
295 }
296 }
297 for(hr=0;hr<HOST_REGS;hr++) {
298 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
299 if(cur->regmap[hr]==r) {
300 cur->regmap[hr]=reg;
301 cur->dirty&=~(1<<hr);
302 cur->isconst&=~(1<<hr);
303 return;
304 }
305 }
306 }
307 }
308 }
309 }
310 }
311 for(j=10;j>=0;j--)
312 {
313 for(r=1;r<=MAXREG;r++)
314 {
315 if(hsn[r]==j) {
316 for(hr=0;hr<HOST_REGS;hr++) {
317 if(cur->regmap[hr]==r+64) {
318 cur->regmap[hr]=reg;
319 cur->dirty&=~(1<<hr);
320 cur->isconst&=~(1<<hr);
321 return;
322 }
323 }
324 for(hr=0;hr<HOST_REGS;hr++) {
325 if(cur->regmap[hr]==r) {
326 cur->regmap[hr]=reg;
327 cur->dirty&=~(1<<hr);
328 cur->isconst&=~(1<<hr);
329 return;
330 }
331 }
332 }
333 }
334 }
335 printf("This shouldn't happen (alloc_reg)");exit(1);
336}
337
338void alloc_reg64(struct regstat *cur,int i,signed char reg)
339{
340 int preferred_reg = 5+reg%3;
341 int r,hr;
342
343 // allocate the lower 32 bits
344 alloc_reg(cur,i,reg);
345
346 // Don't allocate unused registers
347 if((cur->uu>>reg)&1) return;
348
349 // see if the upper half is already allocated
350 for(hr=0;hr<HOST_REGS;hr++)
351 {
352 if(cur->regmap[hr]==reg+64) return;
353 }
354
355 // Keep the same mapping if the register was already allocated in a loop
356 preferred_reg = loop_reg(i,reg,preferred_reg);
357
358 // Try to allocate the preferred register
359 if(cur->regmap[preferred_reg]==-1) {
360 cur->regmap[preferred_reg]=reg|64;
361 cur->dirty&=~(1<<preferred_reg);
362 cur->isconst&=~(1<<preferred_reg);
363 return;
364 }
365 r=cur->regmap[preferred_reg];
366 if(r<64&&((cur->u>>r)&1)) {
367 cur->regmap[preferred_reg]=reg|64;
368 cur->dirty&=~(1<<preferred_reg);
369 cur->isconst&=~(1<<preferred_reg);
370 return;
371 }
372 if(r>=64&&((cur->uu>>(r&63))&1)) {
373 cur->regmap[preferred_reg]=reg|64;
374 cur->dirty&=~(1<<preferred_reg);
375 cur->isconst&=~(1<<preferred_reg);
376 return;
377 }
378
379 // Try to allocate EBP, ESI or EDI
380 for(hr=5;hr<8;hr++) {
381 if(cur->regmap[hr]==-1) {
382 cur->regmap[hr]=reg|64;
383 cur->dirty&=~(1<<hr);
384 cur->isconst&=~(1<<hr);
385 return;
386 }
387 }
388
389 // Clear any unneeded registers
390 // We try to keep the mapping consistent, if possible, because it
391 // makes branches easier (especially loops). So we try to allocate
392 // first (see above) before removing old mappings. If this is not
393 // possible then go ahead and clear out the registers that are no
394 // longer needed.
395 for(hr=HOST_REGS-1;hr>=0;hr--)
396 {
397 r=cur->regmap[hr];
398 if(r>=0) {
399 if(r<64) {
400 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
401 }
402 else
403 {
404 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
405 }
406 }
407 }
408 // Try to allocate any available register, but prefer
409 // registers that have not been used recently.
410 if(i>0) {
411 for(hr=0;hr<HOST_REGS;hr++) {
412 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
413 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
414 cur->regmap[hr]=reg|64;
415 cur->dirty&=~(1<<hr);
416 cur->isconst&=~(1<<hr);
417 return;
418 }
419 }
420 }
421 }
422 // Try to allocate any available register
423 for(hr=0;hr<HOST_REGS;hr++) {
424 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
425 cur->regmap[hr]=reg|64;
426 cur->dirty&=~(1<<hr);
427 cur->isconst&=~(1<<hr);
428 return;
429 }
430 }
431
432 // Ok, now we have to evict someone
433 // Pick a register we hopefully won't need soon
434 u_char hsn[MAXREG+1];
435 memset(hsn,10,sizeof(hsn));
436 int j;
437 lsn(hsn,i,&preferred_reg);
438 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
439 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
440 if(i>0) {
441 // Don't evict the cycle count at entry points, otherwise the entry
442 // stub will have to write it.
443 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
444 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
445 for(j=10;j>=3;j--)
446 {
447 // Alloc preferred register if available
448 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
449 for(hr=0;hr<HOST_REGS;hr++) {
450 // Evict both parts of a 64-bit register
451 if((cur->regmap[hr]&63)==r) {
452 cur->regmap[hr]=-1;
453 cur->dirty&=~(1<<hr);
454 cur->isconst&=~(1<<hr);
455 }
456 }
457 cur->regmap[preferred_reg]=reg|64;
458 return;
459 }
460 for(r=1;r<=MAXREG;r++)
461 {
462 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
463 for(hr=0;hr<HOST_REGS;hr++) {
464 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
465 if(cur->regmap[hr]==r+64) {
466 cur->regmap[hr]=reg|64;
467 cur->dirty&=~(1<<hr);
468 cur->isconst&=~(1<<hr);
469 return;
470 }
471 }
472 }
473 for(hr=0;hr<HOST_REGS;hr++) {
474 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
475 if(cur->regmap[hr]==r) {
476 cur->regmap[hr]=reg|64;
477 cur->dirty&=~(1<<hr);
478 cur->isconst&=~(1<<hr);
479 return;
480 }
481 }
482 }
483 }
484 }
485 }
486 }
487 for(j=10;j>=0;j--)
488 {
489 for(r=1;r<=MAXREG;r++)
490 {
491 if(hsn[r]==j) {
492 for(hr=0;hr<HOST_REGS;hr++) {
493 if(cur->regmap[hr]==r+64) {
494 cur->regmap[hr]=reg|64;
495 cur->dirty&=~(1<<hr);
496 cur->isconst&=~(1<<hr);
497 return;
498 }
499 }
500 for(hr=0;hr<HOST_REGS;hr++) {
501 if(cur->regmap[hr]==r) {
502 cur->regmap[hr]=reg|64;
503 cur->dirty&=~(1<<hr);
504 cur->isconst&=~(1<<hr);
505 return;
506 }
507 }
508 }
509 }
510 }
511 printf("This shouldn't happen");exit(1);
512}
513
514// Allocate a temporary register. This is done without regard to
515// dirty status or whether the register we request is on the unneeded list
516// Note: This will only allocate one register, even if called multiple times
517void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
518{
519 int r,hr;
520 int preferred_reg = -1;
521
522 // see if it's already allocated
523 for(hr=0;hr<HOST_REGS;hr++)
524 {
525 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
526 }
527
528 // Try to allocate any available register, starting with EDI, ESI, EBP...
529 // We prefer EDI, ESI, EBP since the others are used for byte/halfword stores
530 for(hr=HOST_REGS-1;hr>=0;hr--) {
531 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
532 cur->regmap[hr]=reg;
533 cur->dirty&=~(1<<hr);
534 cur->isconst&=~(1<<hr);
535 return;
536 }
537 }
538
539 // Find an unneeded register
540 for(hr=HOST_REGS-1;hr>=0;hr--)
541 {
542 r=cur->regmap[hr];
543 if(r>=0) {
544 if(r<64) {
545 if((cur->u>>r)&1) {
546 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
547 cur->regmap[hr]=reg;
548 cur->dirty&=~(1<<hr);
549 cur->isconst&=~(1<<hr);
550 return;
551 }
552 }
553 }
554 else
555 {
556 if((cur->uu>>(r&63))&1) {
557 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
558 cur->regmap[hr]=reg;
559 cur->dirty&=~(1<<hr);
560 cur->isconst&=~(1<<hr);
561 return;
562 }
563 }
564 }
565 }
566 }
567
568 // Ok, now we have to evict someone
569 // Pick a register we hopefully won't need soon
570 // TODO: we might want to follow unconditional jumps here
571 // TODO: get rid of dupe code and make this into a function
572 u_char hsn[MAXREG+1];
573 memset(hsn,10,sizeof(hsn));
574 int j;
575 lsn(hsn,i,&preferred_reg);
576 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
577 if(i>0) {
578 // Don't evict the cycle count at entry points, otherwise the entry
579 // stub will have to write it.
580 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
581 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
582 for(j=10;j>=3;j--)
583 {
584 for(r=1;r<=MAXREG;r++)
585 {
586 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
587 for(hr=0;hr<HOST_REGS;hr++) {
588 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
589 if(cur->regmap[hr]==r+64) {
590 cur->regmap[hr]=reg;
591 cur->dirty&=~(1<<hr);
592 cur->isconst&=~(1<<hr);
593 return;
594 }
595 }
596 }
597 for(hr=0;hr<HOST_REGS;hr++) {
598 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
599 if(cur->regmap[hr]==r) {
600 cur->regmap[hr]=reg;
601 cur->dirty&=~(1<<hr);
602 cur->isconst&=~(1<<hr);
603 return;
604 }
605 }
606 }
607 }
608 }
609 }
610 }
611 for(j=10;j>=0;j--)
612 {
613 for(r=1;r<=MAXREG;r++)
614 {
615 if(hsn[r]==j) {
616 for(hr=0;hr<HOST_REGS;hr++) {
617 if(cur->regmap[hr]==r+64) {
618 cur->regmap[hr]=reg;
619 cur->dirty&=~(1<<hr);
620 cur->isconst&=~(1<<hr);
621 return;
622 }
623 }
624 for(hr=0;hr<HOST_REGS;hr++) {
625 if(cur->regmap[hr]==r) {
626 cur->regmap[hr]=reg;
627 cur->dirty&=~(1<<hr);
628 cur->isconst&=~(1<<hr);
629 return;
630 }
631 }
632 }
633 }
634 }
635 printf("This shouldn't happen");exit(1);
636}
637// Allocate a specific x86 register.
638void alloc_x86_reg(struct regstat *cur,int i,signed char reg,char hr)
639{
640 int n;
641
642 // see if it's already allocated (and dealloc it)
643 for(n=0;n<HOST_REGS;n++)
644 {
645 if(n!=ESP&&cur->regmap[n]==reg) {cur->regmap[n]=-1;}
646 }
647
648 cur->regmap[hr]=reg;
649 cur->dirty&=~(1<<hr);
650 cur->isconst&=~(1<<hr);
651}
652
653// Alloc cycle count into dedicated register
654alloc_cc(struct regstat *cur,int i)
655{
656 alloc_x86_reg(cur,i,CCREG,ESI);
657}
658
659/* Special alloc */
660
661void multdiv_alloc_x86(struct regstat *current,int i)
662{
663 // case 0x18: MULT
664 // case 0x19: MULTU
665 // case 0x1A: DIV
666 // case 0x1B: DIVU
667 // case 0x1C: DMULT
668 // case 0x1D: DMULTU
669 // case 0x1E: DDIV
670 // case 0x1F: DDIVU
671 clear_const(current,rs1[i]);
672 clear_const(current,rs2[i]);
673 if(rs1[i]&&rs2[i])
674 {
675 if((opcode2[i]&4)==0) // 32-bit
676 {
677 current->u&=~(1LL<<HIREG);
678 current->u&=~(1LL<<LOREG);
679 alloc_x86_reg(current,i,HIREG,EDX);
680 alloc_x86_reg(current,i,LOREG,EAX);
681 alloc_reg(current,i,rs1[i]);
682 alloc_reg(current,i,rs2[i]);
683 current->is32|=1LL<<HIREG;
684 current->is32|=1LL<<LOREG;
685 dirty_reg(current,HIREG);
686 dirty_reg(current,LOREG);
687 }
688 else // 64-bit
689 {
690 alloc_x86_reg(current,i,HIREG|64,EDX);
691 alloc_x86_reg(current,i,HIREG,EAX);
692 alloc_reg64(current,i,rs1[i]);
693 alloc_reg64(current,i,rs2[i]);
694 alloc_all(current,i);
695 current->is32&=~(1LL<<HIREG);
696 current->is32&=~(1LL<<LOREG);
697 dirty_reg(current,HIREG);
698 dirty_reg(current,LOREG);
699 }
700 }
701 else
702 {
703 // Multiply by zero is zero.
704 // MIPS does not have a divide by zero exception.
705 // The result is undefined, we return zero.
706 alloc_reg(current,i,HIREG);
707 alloc_reg(current,i,LOREG);
708 current->is32|=1LL<<HIREG;
709 current->is32|=1LL<<LOREG;
710 dirty_reg(current,HIREG);
711 dirty_reg(current,LOREG);
712 }
713}
714#define multdiv_alloc multdiv_alloc_x86
715
716/* Assembler */
717
718char regname[8][4] = {
719 "eax",
720 "ecx",
721 "edx",
722 "ebx",
723 "esp",
724 "ebp",
725 "esi",
726 "edi"};
727
728void output_byte(u_char byte)
729{
730 *(out++)=byte;
731}
732void output_modrm(u_char mod,u_char rm,u_char ext)
733{
734 assert(mod<4);
735 assert(rm<8);
736 assert(ext<8);
737 u_char byte=(mod<<6)|(ext<<3)|rm;
738 *(out++)=byte;
739}
740void output_sib(u_char scale,u_char index,u_char base)
741{
742 assert(scale<4);
743 assert(index<8);
744 assert(base<8);
745 u_char byte=(scale<<6)|(index<<3)|base;
746 *(out++)=byte;
747}
748void output_w32(u_int word)
749{
750 *((u_int *)out)=word;
751 out+=4;
752}
753
754void emit_mov(int rs,int rt)
755{
756 assem_debug("mov %%%s,%%%s\n",regname[rs],regname[rt]);
757 output_byte(0x89);
758 output_modrm(3,rt,rs);
759}
760
761void emit_add(int rs1,int rs2,int rt)
762{
763 if(rs1==rt) {
764 assem_debug("add %%%s,%%%s\n",regname[rs2],regname[rs1]);
765 output_byte(0x01);
766 output_modrm(3,rs1,rs2);
767 }else if(rs2==rt) {
768 assem_debug("add %%%s,%%%s\n",regname[rs1],regname[rs2]);
769 output_byte(0x01);
770 output_modrm(3,rs2,rs1);
771 }else {
772 assem_debug("lea (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
773 output_byte(0x8D);
774 if(rs1!=EBP) {
775 output_modrm(0,4,rt);
776 output_sib(0,rs2,rs1);
777 }else if(rs2!=EBP) {
778 output_modrm(0,4,rt);
779 output_sib(0,rs1,rs2);
780 }else /* lea 0(,%ebp,2) */{
781 output_modrm(0,4,rt);
782 output_sib(1,EBP,5);
783 output_w32(0);
784 }
785 }
786}
787
788void emit_adds(int rs1,int rs2,int rt)
789{
790 emit_add(rs1,rs2,rt);
791}
792
793void emit_lea8(int rs1,int rt)
794{
795 assem_debug("lea 0(%%%s,8),%%%s\n",regname[rs1],regname[rt]);
796 output_byte(0x8D);
797 output_modrm(0,4,rt);
798 output_sib(3,rs1,5);
799 output_w32(0);
800}
801void emit_leairrx1(int imm,int rs1,int rs2,int rt)
802{
803 assem_debug("lea %x(%%%s,%%%s,1),%%%s\n",imm,regname[rs1],regname[rs2],regname[rt]);
804 output_byte(0x8D);
805 if(imm!=0||rs1==EBP) {
806 output_modrm(2,4,rt);
807 output_sib(0,rs2,rs1);
808 output_w32(imm);
809 }else{
810 output_modrm(0,4,rt);
811 output_sib(0,rs2,rs1);
812 }
813}
814void emit_leairrx4(int imm,int rs1,int rs2,int rt)
815{
816 assem_debug("lea %x(%%%s,%%%s,4),%%%s\n",imm,regname[rs1],regname[rs2],regname[rt]);
817 output_byte(0x8D);
818 if(imm!=0||rs1==EBP) {
819 output_modrm(2,4,rt);
820 output_sib(2,rs2,rs1);
821 output_w32(imm);
822 }else{
823 output_modrm(0,4,rt);
824 output_sib(2,rs2,rs1);
825 }
826}
827
828void emit_neg(int rs, int rt)
829{
830 if(rs!=rt) emit_mov(rs,rt);
831 assem_debug("neg %%%s\n",regname[rt]);
832 output_byte(0xF7);
833 output_modrm(3,rt,3);
834}
835
836void emit_negs(int rs, int rt)
837{
838 emit_neg(rs,rt);
839}
840
841void emit_sub(int rs1,int rs2,int rt)
842{
843 if(rs1==rt) {
844 assem_debug("sub %%%s,%%%s\n",regname[rs2],regname[rs1]);
845 output_byte(0x29);
846 output_modrm(3,rs1,rs2);
847 } else if(rs2==rt) {
848 emit_neg(rs2,rs2);
849 emit_add(rs2,rs1,rs2);
850 } else {
851 emit_mov(rs1,rt);
852 emit_sub(rt,rs2,rt);
853 }
854}
855
856void emit_subs(int rs1,int rs2,int rt)
857{
858 emit_sub(rs1,rs2,rt);
859}
860
861void emit_zeroreg(int rt)
862{
863 output_byte(0x31);
864 output_modrm(3,rt,rt);
865 assem_debug("xor %%%s,%%%s\n",regname[rt],regname[rt]);
866}
867
868void emit_loadreg(int r, int hr)
869{
870 if((r&63)==0)
871 emit_zeroreg(hr);
872 else {
873 int addr=((int)reg)+((r&63)<<3)+((r&64)>>4);
874 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
875 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
876 if(r==CCREG) addr=(int)&cycle_count;
877 if(r==CSREG) addr=(int)&Status;
878 if(r==FSREG) addr=(int)&FCR31;
879 assem_debug("mov %x+%d,%%%s\n",addr,r,regname[hr]);
880 output_byte(0x8B);
881 output_modrm(0,5,hr);
882 output_w32(addr);
883 }
884}
885void emit_storereg(int r, int hr)
886{
887 int addr=((int)reg)+((r&63)<<3)+((r&64)>>4);
888 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
889 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
890 if(r==CCREG) addr=(int)&cycle_count;
891 if(r==FSREG) addr=(int)&FCR31;
892 assem_debug("mov %%%s,%x+%d\n",regname[hr],addr,r);
893 output_byte(0x89);
894 output_modrm(0,5,hr);
895 output_w32(addr);
896}
897
898void emit_test(int rs, int rt)
899{
900 assem_debug("test %%%s,%%%s\n",regname[rs],regname[rt]);
901 output_byte(0x85);
902 output_modrm(3,rs,rt);
903}
904
905void emit_testimm(int rs,int imm)
906{
907 assem_debug("test $0x%x,%%%s\n",imm,regname[rs]);
908 if(imm<128&&imm>=-128&&rs<4) {
909 output_byte(0xF6);
910 output_modrm(3,rs,0);
911 output_byte(imm);
912 }
913 else
914 {
915 output_byte(0xF7);
916 output_modrm(3,rs,0);
917 output_w32(imm);
918 }
919}
920
921void emit_not(int rs,int rt)
922{
923 if(rs!=rt) emit_mov(rs,rt);
924 assem_debug("not %%%s\n",regname[rt]);
925 output_byte(0xF7);
926 output_modrm(3,rt,2);
927}
928
929void emit_and(u_int rs1,u_int rs2,u_int rt)
930{
931 assert(rs1<8);
932 assert(rs2<8);
933 assert(rt<8);
934 if(rs1==rt) {
935 assem_debug("and %%%s,%%%s\n",regname[rs2],regname[rt]);
936 output_byte(0x21);
937 output_modrm(3,rs1,rs2);
938 }
939 else
940 if(rs2==rt) {
941 assem_debug("and %%%s,%%%s\n",regname[rs1],regname[rt]);
942 output_byte(0x21);
943 output_modrm(3,rs2,rs1);
944 }
945 else {
946 emit_mov(rs1,rt);
947 emit_and(rt,rs2,rt);
948 }
949}
950
951void emit_or(u_int rs1,u_int rs2,u_int rt)
952{
953 assert(rs1<8);
954 assert(rs2<8);
955 assert(rt<8);
956 if(rs1==rt) {
957 assem_debug("or %%%s,%%%s\n",regname[rs2],regname[rt]);
958 output_byte(0x09);
959 output_modrm(3,rs1,rs2);
960 }
961 else
962 if(rs2==rt) {
963 assem_debug("or %%%s,%%%s\n",regname[rs1],regname[rt]);
964 output_byte(0x09);
965 output_modrm(3,rs2,rs1);
966 }
967 else {
968 emit_mov(rs1,rt);
969 emit_or(rt,rs2,rt);
970 }
971}
972void emit_or_and_set_flags(int rs1,int rs2,int rt)
973{
974 emit_or(rs1,rs2,rt);
975}
976
977void emit_xor(u_int rs1,u_int rs2,u_int rt)
978{
979 assert(rs1<8);
980 assert(rs2<8);
981 assert(rt<8);
982 if(rs1==rt) {
983 assem_debug("xor %%%s,%%%s\n",regname[rs2],regname[rt]);
984 output_byte(0x31);
985 output_modrm(3,rs1,rs2);
986 }
987 else
988 if(rs2==rt) {
989 assem_debug("xor %%%s,%%%s\n",regname[rs1],regname[rt]);
990 output_byte(0x31);
991 output_modrm(3,rs2,rs1);
992 }
993 else {
994 emit_mov(rs1,rt);
995 emit_xor(rt,rs2,rt);
996 }
997}
998
999void emit_movimm(int imm,u_int rt)
1000{
1001 assem_debug("mov $%d,%%%s\n",imm,regname[rt]);
1002 assert(rt<8);
1003 output_byte(0xB8+rt);
1004 output_w32(imm);
1005}
1006
1007void emit_addimm(int rs,int imm,int rt)
1008{
1009 if(rs==rt) {
1010 if(imm!=0) {
1011 assem_debug("add $%d,%%%s\n",imm,regname[rt]);
1012 if(imm<128&&imm>=-128) {
1013 output_byte(0x83);
1014 output_modrm(3,rt,0);
1015 output_byte(imm);
1016 }
1017 else
1018 {
1019 output_byte(0x81);
1020 output_modrm(3,rt,0);
1021 output_w32(imm);
1022 }
1023 }
1024 }
1025 else {
1026 if(imm!=0) {
1027 assem_debug("lea %d(%%%s),%%%s\n",imm,regname[rs],regname[rt]);
1028 output_byte(0x8D);
1029 if(imm<128&&imm>=-128) {
1030 output_modrm(1,rs,rt);
1031 output_byte(imm);
1032 }else{
1033 output_modrm(2,rs,rt);
1034 output_w32(imm);
1035 }
1036 }else{
1037 emit_mov(rs,rt);
1038 }
1039 }
1040}
1041
1042void emit_addimm_and_set_flags(int imm,int rt)
1043{
1044 assem_debug("add $%d,%%%s\n",imm,regname[rt]);
1045 if(imm<128&&imm>=-128) {
1046 output_byte(0x83);
1047 output_modrm(3,rt,0);
1048 output_byte(imm);
1049 }
1050 else
1051 {
1052 output_byte(0x81);
1053 output_modrm(3,rt,0);
1054 output_w32(imm);
1055 }
1056}
1057void emit_addimm_no_flags(int imm,int rt)
1058{
1059 if(imm!=0) {
1060 assem_debug("lea %d(%%%s),%%%s\n",imm,regname[rt],regname[rt]);
1061 output_byte(0x8D);
1062 if(imm<128&&imm>=-128) {
1063 output_modrm(1,rt,rt);
1064 output_byte(imm);
1065 }else{
1066 output_modrm(2,rt,rt);
1067 output_w32(imm);
1068 }
1069 }
1070}
1071
1072void emit_adcimm(int imm,u_int rt)
1073{
1074 assem_debug("adc $%d,%%%s\n",imm,regname[rt]);
1075 assert(rt<8);
1076 if(imm<128&&imm>=-128) {
1077 output_byte(0x83);
1078 output_modrm(3,rt,2);
1079 output_byte(imm);
1080 }
1081 else
1082 {
1083 output_byte(0x81);
1084 output_modrm(3,rt,2);
1085 output_w32(imm);
1086 }
1087}
1088void emit_sbbimm(int imm,u_int rt)
1089{
1090 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1091 assert(rt<8);
1092 if(imm<128&&imm>=-128) {
1093 output_byte(0x83);
1094 output_modrm(3,rt,3);
1095 output_byte(imm);
1096 }
1097 else
1098 {
1099 output_byte(0x81);
1100 output_modrm(3,rt,3);
1101 output_w32(imm);
1102 }
1103}
1104
1105void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1106{
1107 if(rsh==rth&&rsl==rtl) {
1108 assem_debug("add $%d,%%%s\n",imm,regname[rtl]);
1109 if(imm<128&&imm>=-128) {
1110 output_byte(0x83);
1111 output_modrm(3,rtl,0);
1112 output_byte(imm);
1113 }
1114 else
1115 {
1116 output_byte(0x81);
1117 output_modrm(3,rtl,0);
1118 output_w32(imm);
1119 }
1120 assem_debug("adc $%d,%%%s\n",imm>>31,regname[rth]);
1121 output_byte(0x83);
1122 output_modrm(3,rth,2);
1123 output_byte(imm>>31);
1124 }
1125 else {
1126 emit_mov(rsh,rth);
1127 emit_mov(rsl,rtl);
1128 emit_addimm64_32(rth,rtl,imm,rth,rtl);
1129 }
1130}
1131
1132void emit_sbb(int rs1,int rs2)
1133{
1134 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1135 output_byte(0x19);
1136 output_modrm(3,rs1,rs2);
1137}
1138
1139void emit_andimm(int rs,int imm,int rt)
1140{
1141 if(rs==rt) {
1142 assem_debug("and $%d,%%%s\n",imm,regname[rt]);
1143 if(imm<128&&imm>=-128) {
1144 output_byte(0x83);
1145 output_modrm(3,rt,4);
1146 output_byte(imm);
1147 }
1148 else
1149 {
1150 output_byte(0x81);
1151 output_modrm(3,rt,4);
1152 output_w32(imm);
1153 }
1154 }
1155 else {
1156 emit_mov(rs,rt);
1157 emit_andimm(rt,imm,rt);
1158 }
1159}
1160
1161void emit_orimm(int rs,int imm,int rt)
1162{
1163 if(rs==rt) {
1164 assem_debug("or $%d,%%%s\n",imm,regname[rt]);
1165 if(imm<128&&imm>=-128) {
1166 output_byte(0x83);
1167 output_modrm(3,rt,1);
1168 output_byte(imm);
1169 }
1170 else
1171 {
1172 output_byte(0x81);
1173 output_modrm(3,rt,1);
1174 output_w32(imm);
1175 }
1176 }
1177 else {
1178 emit_mov(rs,rt);
1179 emit_orimm(rt,imm,rt);
1180 }
1181}
1182
1183void emit_xorimm(int rs,int imm,int rt)
1184{
1185 if(rs==rt) {
1186 assem_debug("xor $%d,%%%s\n",imm,regname[rt]);
1187 if(imm<128&&imm>=-128) {
1188 output_byte(0x83);
1189 output_modrm(3,rt,6);
1190 output_byte(imm);
1191 }
1192 else
1193 {
1194 output_byte(0x81);
1195 output_modrm(3,rt,6);
1196 output_w32(imm);
1197 }
1198 }
1199 else {
1200 emit_mov(rs,rt);
1201 emit_xorimm(rt,imm,rt);
1202 }
1203}
1204
1205void emit_shlimm(int rs,u_int imm,int rt)
1206{
1207 if(rs==rt) {
1208 assem_debug("shl %%%s,%d\n",regname[rt],imm);
1209 assert(imm>0);
1210 if(imm==1) output_byte(0xD1);
1211 else output_byte(0xC1);
1212 output_modrm(3,rt,4);
1213 if(imm>1) output_byte(imm);
1214 }
1215 else {
1216 emit_mov(rs,rt);
1217 emit_shlimm(rt,imm,rt);
1218 }
1219}
1220
1221void emit_shrimm(int rs,u_int imm,int rt)
1222{
1223 if(rs==rt) {
1224 assem_debug("shr %%%s,%d\n",regname[rt],imm);
1225 assert(imm>0);
1226 if(imm==1) output_byte(0xD1);
1227 else output_byte(0xC1);
1228 output_modrm(3,rt,5);
1229 if(imm>1) output_byte(imm);
1230 }
1231 else {
1232 emit_mov(rs,rt);
1233 emit_shrimm(rt,imm,rt);
1234 }
1235}
1236
1237void emit_sarimm(int rs,u_int imm,int rt)
1238{
1239 if(rs==rt) {
1240 assem_debug("sar %%%s,%d\n",regname[rt],imm);
1241 assert(imm>0);
1242 if(imm==1) output_byte(0xD1);
1243 else output_byte(0xC1);
1244 output_modrm(3,rt,7);
1245 if(imm>1) output_byte(imm);
1246 }
1247 else {
1248 emit_mov(rs,rt);
1249 emit_sarimm(rt,imm,rt);
1250 }
1251}
1252
1253void emit_rorimm(int rs,u_int imm,int rt)
1254{
1255 if(rs==rt) {
1256 assem_debug("ror %%%s,%d\n",regname[rt],imm);
1257 assert(imm>0);
1258 if(imm==1) output_byte(0xD1);
1259 else output_byte(0xC1);
1260 output_modrm(3,rt,1);
1261 if(imm>1) output_byte(imm);
1262 }
1263 else {
1264 emit_mov(rs,rt);
1265 emit_sarimm(rt,imm,rt);
1266 }
1267}
1268
1269void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1270{
1271 if(rs==rt) {
1272 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1273 assert(imm>0);
1274 output_byte(0x0F);
1275 output_byte(0xA4);
1276 output_modrm(3,rt,rs2);
1277 output_byte(imm);
1278 }
1279 else {
1280 emit_mov(rs,rt);
1281 emit_shldimm(rt,rs2,imm,rt);
1282 }
1283}
1284
1285void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1286{
1287 if(rs==rt) {
1288 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1289 assert(imm>0);
1290 output_byte(0x0F);
1291 output_byte(0xAC);
1292 output_modrm(3,rt,rs2);
1293 output_byte(imm);
1294 }
1295 else {
1296 emit_mov(rs,rt);
1297 emit_shrdimm(rt,rs2,imm,rt);
1298 }
1299}
1300
1301void emit_shlcl(int r)
1302{
1303 assem_debug("shl %%%s,%%cl\n",regname[r]);
1304 output_byte(0xD3);
1305 output_modrm(3,r,4);
1306}
1307void emit_shrcl(int r)
1308{
1309 assem_debug("shr %%%s,%%cl\n",regname[r]);
1310 output_byte(0xD3);
1311 output_modrm(3,r,5);
1312}
1313void emit_sarcl(int r)
1314{
1315 assem_debug("sar %%%s,%%cl\n",regname[r]);
1316 output_byte(0xD3);
1317 output_modrm(3,r,7);
1318}
1319
1320void emit_shldcl(int r1,int r2)
1321{
1322 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1323 output_byte(0x0F);
1324 output_byte(0xA5);
1325 output_modrm(3,r1,r2);
1326}
1327void emit_shrdcl(int r1,int r2)
1328{
1329 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1330 output_byte(0x0F);
1331 output_byte(0xAD);
1332 output_modrm(3,r1,r2);
1333}
1334
1335void emit_cmpimm(int rs,int imm)
1336{
1337 assem_debug("cmp $%d,%%%s\n",imm,regname[rs]);
1338 if(imm<128&&imm>=-128) {
1339 output_byte(0x83);
1340 output_modrm(3,rs,7);
1341 output_byte(imm);
1342 }
1343 else
1344 {
1345 output_byte(0x81);
1346 output_modrm(3,rs,7);
1347 output_w32(imm);
1348 }
1349}
1350
1351void emit_cmovne(u_int *addr,int rt)
1352{
1353 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1354 if(addr==&const_zero) assem_debug(" [zero]\n");
1355 else if(addr==&const_one) assem_debug(" [one]\n");
1356 else assem_debug("\n");
1357 output_byte(0x0F);
1358 output_byte(0x45);
1359 output_modrm(0,5,rt);
1360 output_w32((int)addr);
1361}
1362void emit_cmovl(u_int *addr,int rt)
1363{
1364 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1365 if(addr==&const_zero) assem_debug(" [zero]\n");
1366 else if(addr==&const_one) assem_debug(" [one]\n");
1367 else assem_debug("\n");
1368 output_byte(0x0F);
1369 output_byte(0x4C);
1370 output_modrm(0,5,rt);
1371 output_w32((int)addr);
1372}
1373void emit_cmovs(u_int *addr,int rt)
1374{
1375 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1376 if(addr==&const_zero) assem_debug(" [zero]\n");
1377 else if(addr==&const_one) assem_debug(" [one]\n");
1378 else assem_debug("\n");
1379 output_byte(0x0F);
1380 output_byte(0x48);
1381 output_modrm(0,5,rt);
1382 output_w32((int)addr);
1383}
1384void emit_cmovne_reg(int rs,int rt)
1385{
1386 assem_debug("cmovne %%%s,%%%s\n",regname[rs],regname[rt]);
1387 output_byte(0x0F);
1388 output_byte(0x45);
1389 output_modrm(3,rs,rt);
1390}
1391void emit_cmovl_reg(int rs,int rt)
1392{
1393 assem_debug("cmovl %%%s,%%%s\n",regname[rs],regname[rt]);
1394 output_byte(0x0F);
1395 output_byte(0x4C);
1396 output_modrm(3,rs,rt);
1397}
1398void emit_cmovs_reg(int rs,int rt)
1399{
1400 assem_debug("cmovs %%%s,%%%s\n",regname[rs],regname[rt]);
1401 output_byte(0x0F);
1402 output_byte(0x48);
1403 output_modrm(3,rs,rt);
1404}
1405void emit_cmovnc_reg(int rs,int rt)
1406{
1407 assem_debug("cmovae %%%s,%%%s\n",regname[rs],regname[rt]);
1408 output_byte(0x0F);
1409 output_byte(0x43);
1410 output_modrm(3,rs,rt);
1411}
1412void emit_cmova_reg(int rs,int rt)
1413{
1414 assem_debug("cmova %%%s,%%%s\n",regname[rs],regname[rt]);
1415 output_byte(0x0F);
1416 output_byte(0x47);
1417 output_modrm(3,rs,rt);
1418}
1419void emit_cmovp_reg(int rs,int rt)
1420{
1421 assem_debug("cmovp %%%s,%%%s\n",regname[rs],regname[rt]);
1422 output_byte(0x0F);
1423 output_byte(0x4A);
1424 output_modrm(3,rs,rt);
1425}
1426void emit_cmovnp_reg(int rs,int rt)
1427{
1428 assem_debug("cmovnp %%%s,%%%s\n",regname[rs],regname[rt]);
1429 output_byte(0x0F);
1430 output_byte(0x4B);
1431 output_modrm(3,rs,rt);
1432}
1433void emit_setl(int rt)
1434{
1435 assem_debug("setl %%%s\n",regname[rt]);
1436 output_byte(0x0F);
1437 output_byte(0x9C);
1438 output_modrm(3,rt,2);
1439}
1440void emit_movzbl_reg(int rs, int rt)
1441{
1442 assem_debug("movzbl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1443 output_byte(0x0F);
1444 output_byte(0xB6);
1445 output_modrm(3,rs,rt);
1446}
1447
1448void emit_slti32(int rs,int imm,int rt)
1449{
1450 if(rs!=rt) emit_zeroreg(rt);
1451 emit_cmpimm(rs,imm);
1452 if(rt<4) {
1453 emit_setl(rt);
1454 if(rs==rt) emit_movzbl_reg(rt,rt);
1455 }
1456 else
1457 {
1458 if(rs==rt) emit_movimm(0,rt);
1459 emit_cmovl(&const_one,rt);
1460 }
1461}
1462void emit_sltiu32(int rs,int imm,int rt)
1463{
1464 if(rs!=rt) emit_zeroreg(rt);
1465 emit_cmpimm(rs,imm);
1466 if(rs==rt) emit_movimm(0,rt);
1467 emit_adcimm(0,rt);
1468}
1469void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1470{
1471 assert(rsh!=rt);
1472 emit_slti32(rsl,imm,rt);
1473 if(imm>=0)
1474 {
1475 emit_test(rsh,rsh);
1476 emit_cmovne(&const_zero,rt);
1477 emit_cmovs(&const_one,rt);
1478 }
1479 else
1480 {
1481 emit_cmpimm(rsh,-1);
1482 emit_cmovne(&const_zero,rt);
1483 emit_cmovl(&const_one,rt);
1484 }
1485}
1486void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1487{
1488 assert(rsh!=rt);
1489 emit_sltiu32(rsl,imm,rt);
1490 if(imm>=0)
1491 {
1492 emit_test(rsh,rsh);
1493 emit_cmovne(&const_zero,rt);
1494 }
1495 else
1496 {
1497 emit_cmpimm(rsh,-1);
1498 emit_cmovne(&const_one,rt);
1499 }
1500}
1501
1502void emit_cmp(int rs,int rt)
1503{
1504 assem_debug("cmp %%%s,%%%s\n",regname[rt],regname[rs]);
1505 output_byte(0x39);
1506 output_modrm(3,rs,rt);
1507}
1508void emit_set_gz32(int rs, int rt)
1509{
1510 //assem_debug("set_gz32\n");
1511 emit_cmpimm(rs,1);
1512 emit_movimm(1,rt);
1513 emit_cmovl(&const_zero,rt);
1514}
1515void emit_set_nz32(int rs, int rt)
1516{
1517 //assem_debug("set_nz32\n");
1518 emit_cmpimm(rs,1);
1519 emit_movimm(1,rt);
1520 emit_sbbimm(0,rt);
1521}
1522void emit_set_gz64_32(int rsh, int rsl, int rt)
1523{
1524 //assem_debug("set_gz64\n");
1525 emit_set_gz32(rsl,rt);
1526 emit_test(rsh,rsh);
1527 emit_cmovne(&const_one,rt);
1528 emit_cmovs(&const_zero,rt);
1529}
1530void emit_set_nz64_32(int rsh, int rsl, int rt)
1531{
1532 //assem_debug("set_nz64\n");
1533 emit_or_and_set_flags(rsh,rsl,rt);
1534 emit_cmovne(&const_one,rt);
1535}
1536void emit_set_if_less32(int rs1, int rs2, int rt)
1537{
1538 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1539 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1540 emit_cmp(rs1,rs2);
1541 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1542 emit_cmovl(&const_one,rt);
1543}
1544void emit_set_if_carry32(int rs1, int rs2, int rt)
1545{
1546 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1547 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1548 emit_cmp(rs1,rs2);
1549 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1550 emit_adcimm(0,rt);
1551}
1552void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1553{
1554 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1555 assert(u1!=rt);
1556 assert(u2!=rt);
1557 emit_cmp(l1,l2);
1558 emit_mov(u1,rt);
1559 emit_sbb(rt,u2);
1560 emit_movimm(0,rt);
1561 emit_cmovl(&const_one,rt);
1562}
1563void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1564{
1565 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1566 assert(u1!=rt);
1567 assert(u2!=rt);
1568 emit_cmp(l1,l2);
1569 emit_mov(u1,rt);
1570 emit_sbb(rt,u2);
1571 emit_movimm(0,rt);
1572 emit_adcimm(0,rt);
1573}
1574
1575void emit_call(int a)
1576{
1577 assem_debug("call %x (%x+%x)\n",a,(int)out+5,a-(int)out-5);
1578 output_byte(0xe8);
1579 output_w32(a-(int)out-4);
1580}
1581void emit_jmp(int a)
1582{
1583 assem_debug("jmp %x (%x+%x)\n",a,(int)out+5,a-(int)out-5);
1584 output_byte(0xe9);
1585 output_w32(a-(int)out-4);
1586}
1587void emit_jne(int a)
1588{
1589 assem_debug("jne %x\n",a);
1590 output_byte(0x0f);
1591 output_byte(0x85);
1592 output_w32(a-(int)out-4);
1593}
1594void emit_jeq(int a)
1595{
1596 assem_debug("jeq %x\n",a);
1597 output_byte(0x0f);
1598 output_byte(0x84);
1599 output_w32(a-(int)out-4);
1600}
1601void emit_js(int a)
1602{
1603 assem_debug("js %x\n",a);
1604 output_byte(0x0f);
1605 output_byte(0x88);
1606 output_w32(a-(int)out-4);
1607}
1608void emit_jns(int a)
1609{
1610 assem_debug("jns %x\n",a);
1611 output_byte(0x0f);
1612 output_byte(0x89);
1613 output_w32(a-(int)out-4);
1614}
1615void emit_jl(int a)
1616{
1617 assem_debug("jl %x\n",a);
1618 output_byte(0x0f);
1619 output_byte(0x8c);
1620 output_w32(a-(int)out-4);
1621}
1622void emit_jge(int a)
1623{
1624 assem_debug("jge %x\n",a);
1625 output_byte(0x0f);
1626 output_byte(0x8d);
1627 output_w32(a-(int)out-4);
1628}
1629void emit_jno(int a)
1630{
1631 assem_debug("jno %x\n",a);
1632 output_byte(0x0f);
1633 output_byte(0x81);
1634 output_w32(a-(int)out-4);
1635}
1636void emit_jc(int a)
1637{
1638 assem_debug("jc %x\n",a);
1639 output_byte(0x0f);
1640 output_byte(0x82);
1641 output_w32(a-(int)out-4);
1642}
1643
1644void emit_pushimm(int imm)
1645{
1646 assem_debug("push $%x\n",imm);
1647 output_byte(0x68);
1648 output_w32(imm);
1649}
1650void emit_pushmem(int addr)
1651{
1652 assem_debug("push *%x\n",addr);
1653 output_byte(0xFF);
1654 output_modrm(0,5,6);
1655 output_w32(addr);
1656}
1657void emit_pusha()
1658{
1659 assem_debug("pusha\n");
1660 output_byte(0x60);
1661}
1662void emit_popa()
1663{
1664 assem_debug("popa\n");
1665 output_byte(0x61);
1666}
1667void emit_pushreg(u_int r)
1668{
1669 assem_debug("push %%%s\n",regname[r]);
1670 assert(r<8);
1671 output_byte(0x50+r);
1672}
1673void emit_popreg(u_int r)
1674{
1675 assem_debug("pop %%%s\n",regname[r]);
1676 assert(r<8);
1677 output_byte(0x58+r);
1678}
1679void emit_callreg(u_int r)
1680{
1681 assem_debug("call *%%%s\n",regname[r]);
1682 assert(r<8);
1683 output_byte(0xFF);
1684 output_modrm(3,r,2);
1685}
1686void emit_jmpreg(u_int r)
1687{
1688 assem_debug("jmp *%%%s\n",regname[r]);
1689 assert(r<8);
1690 output_byte(0xFF);
1691 output_modrm(3,r,4);
1692}
1693void emit_jmpmem_indexed(u_int addr,u_int r)
1694{
1695 assem_debug("jmp *%x(%%%s)\n",addr,regname[r]);
1696 assert(r<8);
1697 output_byte(0xFF);
1698 output_modrm(2,r,4);
1699 output_w32(addr);
1700}
1701
1702void emit_readword(int addr, int rt)
1703{
1704 assem_debug("mov %x,%%%s\n",addr,regname[rt]);
1705 output_byte(0x8B);
1706 output_modrm(0,5,rt);
1707 output_w32(addr);
1708}
1709void emit_readword_indexed(int addr, int rs, int rt)
1710{
1711 assem_debug("mov %x+%%%s,%%%s\n",addr,regname[rs],regname[rt]);
1712 output_byte(0x8B);
1713 if(addr<128&&addr>=-128) {
1714 output_modrm(1,rs,rt);
1715 if(rs==ESP) output_sib(0,4,4);
1716 output_byte(addr);
1717 }
1718 else
1719 {
1720 output_modrm(2,rs,rt);
1721 if(rs==ESP) output_sib(0,4,4);
1722 output_w32(addr);
1723 }
1724}
1725void emit_readword_tlb(int addr, int map, int rt)
1726{
1727 if(map<0) emit_readword(addr+(int)rdram-0x80000000, rt);
1728 else
1729 {
1730 assem_debug("mov (%x,%%%s,4),%%%s\n",addr+(int)rdram-0x80000000,regname[map],regname[rt]);
1731 output_byte(0x8B);
1732 output_modrm(0,4,rt);
1733 output_sib(2,map,5);
1734 output_w32(addr+(int)rdram-0x80000000);
1735 }
1736}
1737void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1738{
1739 if(map<0) emit_readword_indexed(addr+(int)rdram-0x80000000, rs, rt);
1740 else {
1741 assem_debug("mov %x(%%%s,%%%s,4),%%%s\n",addr,regname[rs],regname[map],regname[rt]);
1742 assert(rs!=ESP);
1743 output_byte(0x8B);
1744 if(addr==0&&rs!=EBP) {
1745 output_modrm(0,4,rt);
1746 output_sib(2,map,rs);
1747 }
1748 else if(addr<128&&addr>=-128) {
1749 output_modrm(1,4,rt);
1750 output_sib(2,map,rs);
1751 output_byte(addr);
1752 }
1753 else
1754 {
1755 output_modrm(2,4,rt);
1756 output_sib(2,map,rs);
1757 output_w32(addr);
1758 }
1759 }
1760}
1761void emit_movmem_indexedx4(int addr, int rs, int rt)
1762{
1763 assem_debug("mov (%x,%%%s,4),%%%s\n",addr,regname[rs],regname[rt]);
1764 output_byte(0x8B);
1765 output_modrm(0,4,rt);
1766 output_sib(2,rs,5);
1767 output_w32(addr);
1768}
1769void emit_readdword_tlb(int addr, int map, int rh, int rl)
1770{
1771 if(map<0) {
1772 if(rh>=0) emit_readword(addr+(int)rdram-0x80000000, rh);
1773 emit_readword(addr+(int)rdram-0x7FFFFFFC, rl);
1774 }
1775 else {
1776 if(rh>=0) emit_movmem_indexedx4(addr+(int)rdram-0x80000000, map, rh);
1777 emit_movmem_indexedx4(addr+(int)rdram-0x7FFFFFFC, map, rl);
1778 }
1779}
1780void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1781{
1782 assert(rh!=rs);
1783 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1784 emit_readword_indexed_tlb(addr+4, rs, map, rl);
1785}
1786void emit_movsbl(int addr, int rt)
1787{
1788 assem_debug("movsbl %x,%%%s\n",addr,regname[rt]);
1789 output_byte(0x0F);
1790 output_byte(0xBE);
1791 output_modrm(0,5,rt);
1792 output_w32(addr);
1793}
1794void emit_movsbl_indexed(int addr, int rs, int rt)
1795{
1796 assem_debug("movsbl %x+%%%s,%%%s\n",addr,regname[rs],regname[rt]);
1797 output_byte(0x0F);
1798 output_byte(0xBE);
1799 output_modrm(2,rs,rt);
1800 output_w32(addr);
1801}
1802void emit_movsbl_tlb(int addr, int map, int rt)
1803{
1804 if(map<0) emit_movsbl(addr+(int)rdram-0x80000000, rt);
1805 else
1806 {
1807 assem_debug("movsbl (%x,%%%s,4),%%%s\n",addr+(int)rdram-0x80000000,regname[map],regname[rt]);
1808 output_byte(0x0F);
1809 output_byte(0xBE);
1810 output_modrm(0,4,rt);
1811 output_sib(2,map,5);
1812 output_w32(addr+(int)rdram-0x80000000);
1813 }
1814}
1815void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1816{
1817 if(map<0) emit_movsbl_indexed(addr+(int)rdram-0x80000000, rs, rt);
1818 else {
1819 assem_debug("movsbl %x(%%%s,%%%s,4),%%%s\n",addr,regname[rs],regname[map],regname[rt]);
1820 assert(rs!=ESP);
1821 output_byte(0x0F);
1822 output_byte(0xBE);
1823 if(addr==0&&rs!=EBP) {
1824 output_modrm(0,4,rt);
1825 output_sib(2,map,rs);
1826 }
1827 else if(addr<128&&addr>=-128) {
1828 output_modrm(1,4,rt);
1829 output_sib(2,map,rs);
1830 output_byte(addr);
1831 }
1832 else
1833 {
1834 output_modrm(2,4,rt);
1835 output_sib(2,map,rs);
1836 output_w32(addr);
1837 }
1838 }
1839}
1840void emit_movswl(int addr, int rt)
1841{
1842 assem_debug("movswl %x,%%%s\n",addr,regname[rt]);
1843 output_byte(0x0F);
1844 output_byte(0xBF);
1845 output_modrm(0,5,rt);
1846 output_w32(addr);
1847}
1848void emit_movswl_indexed(int addr, int rs, int rt)
1849{
1850 assem_debug("movswl %x+%%%s,%%%s\n",addr,regname[rs],regname[rt]);
1851 output_byte(0x0F);
1852 output_byte(0xBF);
1853 output_modrm(2,rs,rt);
1854 output_w32(addr);
1855}
1856void emit_movswl_tlb(int addr, int map, int rt)
1857{
1858 if(map<0) emit_movswl(addr+(int)rdram-0x80000000, rt);
1859 else
1860 {
1861 assem_debug("movswl (%x,%%%s,4),%%%s\n",addr+(int)rdram-0x80000000,regname[map],regname[rt]);
1862 output_byte(0x0F);
1863 output_byte(0xBF);
1864 output_modrm(0,4,rt);
1865 output_sib(2,map,5);
1866 output_w32(addr+(int)rdram-0x80000000);
1867 }
1868}
1869void emit_movzbl(int addr, int rt)
1870{
1871 assem_debug("movzbl %x,%%%s\n",addr,regname[rt]);
1872 output_byte(0x0F);
1873 output_byte(0xB6);
1874 output_modrm(0,5,rt);
1875 output_w32(addr);
1876}
1877void emit_movzbl_indexed(int addr, int rs, int rt)
1878{
1879 assem_debug("movzbl %x+%%%s,%%%s\n",addr,regname[rs],regname[rt]);
1880 output_byte(0x0F);
1881 output_byte(0xB6);
1882 output_modrm(2,rs,rt);
1883 output_w32(addr);
1884}
1885void emit_movzbl_tlb(int addr, int map, int rt)
1886{
1887 if(map<0) emit_movzbl(addr+(int)rdram-0x80000000, rt);
1888 else
1889 {
1890 assem_debug("movzbl (%x,%%%s,4),%%%s\n",addr+(int)rdram-0x80000000,regname[map],regname[rt]);
1891 output_byte(0x0F);
1892 output_byte(0xB6);
1893 output_modrm(0,4,rt);
1894 output_sib(2,map,5);
1895 output_w32(addr+(int)rdram-0x80000000);
1896 }
1897}
1898void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1899{
1900 if(map<0) emit_movzbl_indexed(addr+(int)rdram-0x80000000, rs, rt);
1901 else {
1902 assem_debug("movzbl %x(%%%s,%%%s,4),%%%s\n",addr,regname[rs],regname[map],regname[rt]);
1903 assert(rs!=ESP);
1904 output_byte(0x0F);
1905 output_byte(0xB6);
1906 if(addr==0&&rs!=EBP) {
1907 output_modrm(0,4,rt);
1908 output_sib(2,map,rs);
1909 }
1910 else if(addr<128&&addr>=-128) {
1911 output_modrm(1,4,rt);
1912 output_sib(2,map,rs);
1913 output_byte(addr);
1914 }
1915 else
1916 {
1917 output_modrm(2,4,rt);
1918 output_sib(2,map,rs);
1919 output_w32(addr);
1920 }
1921 }
1922}
1923void emit_movzwl(int addr, int rt)
1924{
1925 assem_debug("movzwl %x,%%%s\n",addr,regname[rt]);
1926 output_byte(0x0F);
1927 output_byte(0xB7);
1928 output_modrm(0,5,rt);
1929 output_w32(addr);
1930}
1931void emit_movzwl_indexed(int addr, int rs, int rt)
1932{
1933 assem_debug("movzwl %x+%%%s,%%%s\n",addr,regname[rs],regname[rt]);
1934 output_byte(0x0F);
1935 output_byte(0xB7);
1936 output_modrm(2,rs,rt);
1937 output_w32(addr);
1938}
1939void emit_movzwl_tlb(int addr, int map, int rt)
1940{
1941 if(map<0) emit_movzwl(addr+(int)rdram-0x80000000, rt);
1942 else
1943 {
1944 assem_debug("movzwl (%x,%%%s,4),%%%s\n",addr+(int)rdram-0x80000000,regname[map],regname[rt]);
1945 output_byte(0x0F);
1946 output_byte(0xB7);
1947 output_modrm(0,4,rt);
1948 output_sib(2,map,5);
1949 output_w32(addr+(int)rdram-0x80000000);
1950 }
1951}
1952void emit_movzwl_reg(int rs, int rt)
1953{
1954 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1955 output_byte(0x0F);
1956 output_byte(0xB7);
1957 output_modrm(3,rs,rt);
1958}
1959
1960void emit_xchg(int rs, int rt)
1961{
1962 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1963 if(rs==EAX) {
1964 output_byte(0x90+rt);
1965 }
1966 else
1967 {
1968 output_byte(0x87);
1969 output_modrm(3,rs,rt);
1970 }
1971}
1972void emit_writeword(int rt, int addr)
1973{
1974 assem_debug("movl %%%s,%x\n",regname[rt],addr);
1975 output_byte(0x89);
1976 output_modrm(0,5,rt);
1977 output_w32(addr);
1978}
1979void emit_writeword_indexed(int rt, int addr, int rs)
1980{
1981 assem_debug("mov %%%s,%x+%%%s\n",regname[rt],addr,regname[rs]);
1982 output_byte(0x89);
1983 if(addr<128&&addr>=-128) {
1984 output_modrm(1,rs,rt);
1985 if(rs==ESP) output_sib(0,4,4);
1986 output_byte(addr);
1987 }
1988 else
1989 {
1990 output_modrm(2,rs,rt);
1991 if(rs==ESP) output_sib(0,4,4);
1992 output_w32(addr);
1993 }
1994}
1995void emit_writeword_tlb(int rt, int addr, int map)
1996{
1997 if(map<0) {
1998 emit_writeword(rt, addr+(int)rdram-0x80000000);
1999 } else {
2000 emit_writeword_indexed(rt, addr+(int)rdram-0x80000000, map);
2001 }
2002}
2003void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2004{
2005 if(map<0) emit_writeword_indexed(rt, addr+(int)rdram-0x80000000, rs);
2006 else {
2007 assem_debug("mov %%%s,%x(%%%s,%%%s,1)\n",regname[rt],addr,regname[rs],regname[map]);
2008 assert(rs!=ESP);
2009 output_byte(0x89);
2010 if(addr==0&&rs!=EBP) {
2011 output_modrm(0,4,rt);
2012 output_sib(0,map,rs);
2013 }
2014 else if(addr<128&&addr>=-128) {
2015 output_modrm(1,4,rt);
2016 output_sib(0,map,rs);
2017 output_byte(addr);
2018 }
2019 else
2020 {
2021 output_modrm(2,4,rt);
2022 output_sib(0,map,rs);
2023 output_w32(addr);
2024 }
2025 }
2026}
2027void emit_writedword_tlb(int rh, int rl, int addr, int map)
2028{
2029 assert(rh>=0);
2030 if(map<0) {
2031 emit_writeword(rh, addr+(int)rdram-0x80000000);
2032 emit_writeword(rl, addr+(int)rdram-0x7FFFFFFC);
2033 }
2034 else {
2035 emit_writeword_indexed(rh, addr+(int)rdram-0x80000000, map);
2036 emit_writeword_indexed(rl, addr+(int)rdram-0x7FFFFFFC, map);
2037 }
2038}
2039void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
2040{
2041 assert(rh>=0);
2042 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
2043 emit_writeword_indexed_tlb(rl, addr+4, rs, map, temp);
2044}
2045void emit_writehword(int rt, int addr)
2046{
2047 assem_debug("movw %%%s,%x\n",regname[rt]+1,addr);
2048 output_byte(0x66);
2049 output_byte(0x89);
2050 output_modrm(0,5,rt);
2051 output_w32(addr);
2052}
2053void emit_writehword_indexed(int rt, int addr, int rs)
2054{
2055 assem_debug("movw %%%s,%x+%%%s\n",regname[rt]+1,addr,regname[rs]);
2056 output_byte(0x66);
2057 output_byte(0x89);
2058 if(addr<128&&addr>=-128) {
2059 output_modrm(1,rs,rt);
2060 output_byte(addr);
2061 }
2062 else
2063 {
2064 output_modrm(2,rs,rt);
2065 output_w32(addr);
2066 }
2067}
2068void emit_writehword_tlb(int rt, int addr, int map)
2069{
2070 if(map<0) {
2071 emit_writehword(rt, addr+(int)rdram-0x80000000);
2072 } else {
2073 emit_writehword_indexed(rt, addr+(int)rdram-0x80000000, map);
2074 }
2075}
2076void emit_writebyte(int rt, int addr)
2077{
2078 if(rt<4) {
2079 assem_debug("movb %%%cl,%x\n",regname[rt][1],addr);
2080 output_byte(0x88);
2081 output_modrm(0,5,rt);
2082 output_w32(addr);
2083 }
2084 else
2085 {
2086 emit_xchg(EAX,rt);
2087 emit_writebyte(EAX,addr);
2088 emit_xchg(EAX,rt);
2089 }
2090}
2091void emit_writebyte_indexed(int rt, int addr, int rs)
2092{
2093 if(rt<4) {
2094 assem_debug("movb %%%cl,%x+%%%s\n",regname[rt][1],addr,regname[rs]);
2095 output_byte(0x88);
2096 if(addr<128&&addr>=-128) {
2097 output_modrm(1,rs,rt);
2098 output_byte(addr);
2099 }
2100 else
2101 {
2102 output_modrm(2,rs,rt);
2103 output_w32(addr);
2104 }
2105 }
2106 else
2107 {
2108 emit_xchg(EAX,rt);
2109 emit_writebyte_indexed(EAX,addr,rs==EAX?rt:rs);
2110 emit_xchg(EAX,rt);
2111 }
2112}
2113void emit_writebyte_tlb(int rt, int addr, int map)
2114{
2115 if(map<0) {
2116 emit_writebyte(rt, addr+(int)rdram-0x80000000);
2117 } else {
2118 emit_writebyte_indexed(rt, addr+(int)rdram-0x80000000, map);
2119 }
2120}
2121void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2122{
2123 if(map<0) emit_writebyte_indexed(rt, addr+(int)rdram-0x80000000, rs);
2124 else
2125 if(rt<4) {
2126 assem_debug("movb %%%cl,%x(%%%s,%%%s,1)\n",regname[rt][1],addr,regname[rs],regname[map]);
2127 assert(rs!=ESP);
2128 output_byte(0x88);
2129 if(addr==0&&rs!=EBP) {
2130 output_modrm(0,4,rt);
2131 output_sib(0,map,rs);
2132 }
2133 else if(addr<128&&addr>=-128) {
2134 output_modrm(1,4,rt);
2135 output_sib(0,map,rs);
2136 output_byte(addr);
2137 }
2138 else
2139 {
2140 output_modrm(2,4,rt);
2141 output_sib(0,map,rs);
2142 output_w32(addr);
2143 }
2144 }
2145 else
2146 {
2147 emit_xchg(EAX,rt);
2148 emit_writebyte_indexed_tlb(EAX,addr,rs==EAX?rt:rs,map==EAX?rt:map,temp);
2149 emit_xchg(EAX,rt);
2150 }
2151}
2152void emit_writeword_imm(int imm, int addr)
2153{
2154 assem_debug("movl $%x,%x\n",imm,addr);
2155 output_byte(0xC7);
2156 output_modrm(0,5,0);
2157 output_w32(addr);
2158 output_w32(imm);
2159}
2160void emit_writeword_imm_esp(int imm, int addr)
2161{
2162 assem_debug("mov $%x,%x(%%esp)\n",imm,addr);
2163 assert(addr>=-128&&addr<128);
2164 output_byte(0xC7);
2165 output_modrm(1,4,0);
2166 output_sib(0,4,4);
2167 output_byte(addr);
2168 output_w32(imm);
2169}
2170void emit_writebyte_imm(int imm, int addr)
2171{
2172 assem_debug("movb $%x,%x\n",imm,addr);
2173 assert(imm>=-128&&imm<128);
2174 output_byte(0xC6);
2175 output_modrm(0,5,0);
2176 output_w32(addr);
2177 output_byte(imm);
2178}
2179void emit_writebyte_imm_esp(int imm, int addr)
2180{
2181 assem_debug("movb $%x,%x(%%esp)\n",imm,addr);
2182 assert(addr>=-128&&addr<128);
2183 output_byte(0xC6);
2184 output_modrm(1,4,0);
2185 output_sib(0,4,4);
2186 output_byte(addr);
2187 output_byte(imm);
2188}
2189
2190void emit_mul(int rs)
2191{
2192 assem_debug("mul %%%s\n",regname[rs]);
2193 output_byte(0xF7);
2194 output_modrm(3,rs,4);
2195}
2196void emit_imul(int rs)
2197{
2198 assem_debug("imul %%%s\n",regname[rs]);
2199 output_byte(0xF7);
2200 output_modrm(3,rs,5);
2201}
2202void emit_div(int rs)
2203{
2204 assem_debug("div %%%s\n",regname[rs]);
2205 output_byte(0xF7);
2206 output_modrm(3,rs,6);
2207}
2208void emit_idiv(int rs)
2209{
2210 assem_debug("idiv %%%s\n",regname[rs]);
2211 output_byte(0xF7);
2212 output_modrm(3,rs,7);
2213}
2214void emit_cdq()
2215{
2216 assem_debug("cdq\n");
2217 output_byte(0x99);
2218}
2219
2220// Load 2 immediates optimizing for small code size
2221void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2222{
2223 emit_movimm(imm1,rt1);
2224 if(imm2-imm1<128&&imm2-imm1>=-128) emit_addimm(rt1,imm2-imm1,rt2);
2225 else emit_movimm(imm2,rt2);
2226}
2227
2228// special case for checking pending_exception
2229void emit_cmpmem_imm_byte(int addr,int imm)
2230{
2231 assert(imm<128&&imm>=-127);
2232 assem_debug("cmpb $%d,%x\n",imm,addr);
2233 output_byte(0x80);
2234 output_modrm(0,5,7);
2235 output_w32(addr);
2236 output_byte(imm);
2237}
2238
2239// special case for checking invalid_code
2240void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2241{
2242 assert(imm<128&&imm>=-127);
2243 assert(r>=0&&r<8);
2244 emit_shrimm(r,12,r);
2245 assem_debug("cmp $%d,%x+%%%s\n",imm,addr,regname[r]);
2246 output_byte(0x80);
2247 output_modrm(2,r,7);
2248 output_w32(addr);
2249 output_byte(imm);
2250}
2251
2252// special case for checking hash_table
2253void emit_cmpmem_indexed(int addr,int rs,int rt)
2254{
2255 assert(rs>=0&&rs<8);
2256 assert(rt>=0&&rt<8);
2257 assem_debug("cmp %x+%%%s,%%%s\n",addr,regname[rs],regname[rt]);
2258 output_byte(0x39);
2259 output_modrm(2,rs,rt);
2260 output_w32(addr);
2261}
2262
2263// special case for checking memory_map in verify_mapping
2264void emit_cmpmem(int addr,int rt)
2265{
2266 assert(rt>=0&&rt<8);
2267 assem_debug("cmp %x,%%%s\n",addr,regname[rt]);
2268 output_byte(0x39);
2269 output_modrm(0,5,rt);
2270 output_w32(addr);
2271}
2272
2273// Used to preload hash table entries
2274void emit_prefetch(void *addr)
2275{
2276 assem_debug("prefetch %x\n",(int)addr);
2277 output_byte(0x0F);
2278 output_byte(0x18);
2279 output_modrm(0,5,1);
2280 output_w32((int)addr);
2281}
2282
2283/*void emit_submem(int r,int addr)
2284{
2285 assert(r>=0&&r<8);
2286 assem_debug("sub %x,%%%s\n",addr,regname[r]);
2287 output_byte(0x2B);
2288 output_modrm(0,5,r);
2289 output_w32((int)addr);
2290}*/
2291void emit_subfrommem(int addr,int r)
2292{
2293 assert(r>=0&&r<8);
2294 assem_debug("sub %%%s,%x\n",regname[r],addr);
2295 output_byte(0x29);
2296 output_modrm(0,5,r);
2297 output_w32((int)addr);
2298}
2299
2300void emit_flds(int r)
2301{
2302 assem_debug("flds (%%%s)\n",regname[r]);
2303 output_byte(0xd9);
2304 if(r!=EBP) output_modrm(0,r,0);
2305 else {output_modrm(1,EBP,0);output_byte(0);}
2306}
2307void emit_fldl(int r)
2308{
2309 assem_debug("fldl (%%%s)\n",regname[r]);
2310 output_byte(0xdd);
2311 if(r!=EBP) output_modrm(0,r,0);
2312 else {output_modrm(1,EBP,0);output_byte(0);}
2313}
2314void emit_fucomip(u_int r)
2315{
2316 assem_debug("fucomip %d\n",r);
2317 assert(r<8);
2318 output_byte(0xdf);
2319 output_byte(0xe8+r);
2320}
2321void emit_fchs()
2322{
2323 assem_debug("fchs\n");
2324 output_byte(0xd9);
2325 output_byte(0xe0);
2326}
2327void emit_fabs()
2328{
2329 assem_debug("fabs\n");
2330 output_byte(0xd9);
2331 output_byte(0xe1);
2332}
2333void emit_fsqrt()
2334{
2335 assem_debug("fsqrt\n");
2336 output_byte(0xd9);
2337 output_byte(0xfa);
2338}
2339void emit_fadds(int r)
2340{
2341 assem_debug("fadds (%%%s)\n",regname[r]);
2342 output_byte(0xd8);
2343 if(r!=EBP) output_modrm(0,r,0);
2344 else {output_modrm(1,EBP,0);output_byte(0);}
2345}
2346void emit_faddl(int r)
2347{
2348 assem_debug("faddl (%%%s)\n",regname[r]);
2349 output_byte(0xdc);
2350 if(r!=EBP) output_modrm(0,r,0);
2351 else {output_modrm(1,EBP,0);output_byte(0);}
2352}
2353void emit_fadd(int r)
2354{
2355 assem_debug("fadd st%d\n",r);
2356 output_byte(0xd8);
2357 output_byte(0xc0+r);
2358}
2359void emit_fsubs(int r)
2360{
2361 assem_debug("fsubs (%%%s)\n",regname[r]);
2362 output_byte(0xd8);
2363 if(r!=EBP) output_modrm(0,r,4);
2364 else {output_modrm(1,EBP,4);output_byte(0);}
2365}
2366void emit_fsubl(int r)
2367{
2368 assem_debug("fsubl (%%%s)\n",regname[r]);
2369 output_byte(0xdc);
2370 if(r!=EBP) output_modrm(0,r,4);
2371 else {output_modrm(1,EBP,4);output_byte(0);}
2372}
2373void emit_fsub(int r)
2374{
2375 assem_debug("fsub st%d\n",r);
2376 output_byte(0xd8);
2377 output_byte(0xe0+r);
2378}
2379void emit_fmuls(int r)
2380{
2381 assem_debug("fmuls (%%%s)\n",regname[r]);
2382 output_byte(0xd8);
2383 if(r!=EBP) output_modrm(0,r,1);
2384 else {output_modrm(1,EBP,1);output_byte(0);}
2385}
2386void emit_fmull(int r)
2387{
2388 assem_debug("fmull (%%%s)\n",regname[r]);
2389 output_byte(0xdc);
2390 if(r!=EBP) output_modrm(0,r,1);
2391 else {output_modrm(1,EBP,1);output_byte(0);}
2392}
2393void emit_fmul(int r)
2394{
2395 assem_debug("fmul st%d\n",r);
2396 output_byte(0xd8);
2397 output_byte(0xc8+r);
2398}
2399void emit_fdivs(int r)
2400{
2401 assem_debug("fdivs (%%%s)\n",regname[r]);
2402 output_byte(0xd8);
2403 if(r!=EBP) output_modrm(0,r,6);
2404 else {output_modrm(1,EBP,6);output_byte(0);}
2405}
2406void emit_fdivl(int r)
2407{
2408 assem_debug("fdivl (%%%s)\n",regname[r]);
2409 output_byte(0xdc);
2410 if(r!=EBP) output_modrm(0,r,6);
2411 else {output_modrm(1,EBP,6);output_byte(0);}
2412}
2413void emit_fdiv(int r)
2414{
2415 assem_debug("fdiv st%d\n",r);
2416 output_byte(0xd8);
2417 output_byte(0xf0+r);
2418}
2419void emit_fpop()
2420{
2421 // fstp st(0)
2422 assem_debug("fpop\n");
2423 output_byte(0xdd);
2424 output_byte(0xd8);
2425}
2426void emit_fildl(int r)
2427{
2428 assem_debug("fildl (%%%s)\n",regname[r]);
2429 output_byte(0xdb);
2430 if(r!=EBP) output_modrm(0,r,0);
2431 else {output_modrm(1,EBP,0);output_byte(0);}
2432}
2433void emit_fildll(int r)
2434{
2435 assem_debug("fildll (%%%s)\n",regname[r]);
2436 output_byte(0xdf);
2437 if(r!=EBP) output_modrm(0,r,5);
2438 else {output_modrm(1,EBP,5);output_byte(0);}
2439}
2440void emit_fistpl(int r)
2441{
2442 assem_debug("fistpl (%%%s)\n",regname[r]);
2443 output_byte(0xdb);
2444 if(r!=EBP) output_modrm(0,r,3);
2445 else {output_modrm(1,EBP,3);output_byte(0);}
2446}
2447void emit_fistpll(int r)
2448{
2449 assem_debug("fistpll (%%%s)\n",regname[r]);
2450 output_byte(0xdf);
2451 if(r!=EBP) output_modrm(0,r,7);
2452 else {output_modrm(1,EBP,7);output_byte(0);}
2453}
2454void emit_fstps(int r)
2455{
2456 assem_debug("fstps (%%%s)\n",regname[r]);
2457 output_byte(0xd9);
2458 if(r!=EBP) output_modrm(0,r,3);
2459 else {output_modrm(1,EBP,3);output_byte(0);}
2460}
2461void emit_fstpl(int r)
2462{
2463 assem_debug("fstpl (%%%s)\n",regname[r]);
2464 output_byte(0xdd);
2465 if(r!=EBP) output_modrm(0,r,3);
2466 else {output_modrm(1,EBP,3);output_byte(0);}
2467}
2468void emit_fnstcw_stack()
2469{
2470 assem_debug("fnstcw (%%esp)\n");
2471 output_byte(0xd9);
2472 output_modrm(0,4,7);
2473 output_sib(0,4,4);
2474}
2475void emit_fldcw_stack()
2476{
2477 assem_debug("fldcw (%%esp)\n");
2478 output_byte(0xd9);
2479 output_modrm(0,4,5);
2480 output_sib(0,4,4);
2481}
2482void emit_fldcw_indexed(int addr,int r)
2483{
2484 assem_debug("fldcw %x(%%%s)\n",addr,regname[r]);
2485 output_byte(0xd9);
2486 output_modrm(0,4,5);
2487 output_sib(1,r,5);
2488 output_w32(addr);
2489}
2490void emit_fldcw(int addr)
2491{
2492 assem_debug("fldcw %x\n",addr);
2493 output_byte(0xd9);
2494 output_modrm(0,5,5);
2495 output_w32(addr);
2496}
2497void emit_movss_load(u_int addr,u_int ssereg)
2498{
2499 assem_debug("movss (%%%s),xmm%d\n",regname[addr],ssereg);
2500 assert(ssereg<8);
2501 output_byte(0xf3);
2502 output_byte(0x0f);
2503 output_byte(0x10);
2504 if(addr!=EBP) output_modrm(0,addr,ssereg);
2505 else {output_modrm(1,EBP,ssereg);output_byte(0);}
2506}
2507void emit_movsd_load(u_int addr,u_int ssereg)
2508{
2509 assem_debug("movsd (%%%s),xmm%d\n",regname[addr],ssereg);
2510 assert(ssereg<8);
2511 output_byte(0xf2);
2512 output_byte(0x0f);
2513 output_byte(0x10);
2514 if(addr!=EBP) output_modrm(0,addr,ssereg);
2515 else {output_modrm(1,EBP,ssereg);output_byte(0);}
2516}
2517void emit_movd_store(u_int ssereg,u_int addr)
2518{
2519 assem_debug("movd xmm%d,(%%%s)\n",ssereg,regname[addr]);
2520 assert(ssereg<8);
2521 output_byte(0x66);
2522 output_byte(0x0f);
2523 output_byte(0x7e);
2524 if(addr!=EBP) output_modrm(0,addr,ssereg);
2525 else {output_modrm(1,EBP,ssereg);output_byte(0);}
2526}
2527void emit_cvttps2dq(u_int ssereg1,u_int ssereg2)
2528{
2529 assem_debug("cvttps2dq xmm%d,xmm%d\n",ssereg1,ssereg2);
2530 assert(ssereg1<8);
2531 assert(ssereg2<8);
2532 output_byte(0xf3);
2533 output_byte(0x0f);
2534 output_byte(0x5b);
2535 output_modrm(3,ssereg1,ssereg2);
2536}
2537void emit_cvttpd2dq(u_int ssereg1,u_int ssereg2)
2538{
2539 assem_debug("cvttpd2dq xmm%d,xmm%d\n",ssereg1,ssereg2);
2540 assert(ssereg1<8);
2541 assert(ssereg2<8);
2542 output_byte(0x66);
2543 output_byte(0x0f);
2544 output_byte(0xe6);
2545 output_modrm(3,ssereg1,ssereg2);
2546}
2547
2548/* Stubs/epilogue */
2549
2550emit_extjump2(int addr, int target, int linker)
2551{
2552 u_char *ptr=(u_char *)addr;
2553 if(*ptr==0x0f)
2554 {
2555 assert(ptr[1]>=0x80&&ptr[1]<=0x8f);
2556 addr+=2;
2557 }
2558 else
2559 {
2560 assert(*ptr==0xe8||*ptr==0xe9);
2561 addr++;
2562 }
2563 emit_movimm(target,EAX);
2564 emit_movimm(addr,EBX);
2565 //assert(addr>=0x7000000&&addr<0x7FFFFFF);
2566 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2567//DEBUG >
2568#ifdef DEBUG_CYCLE_COUNT
2569 emit_readword((int)&last_count,ECX);
2570 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2571 emit_readword((int)&next_interupt,ECX);
2572 emit_writeword(HOST_CCREG,(int)&Count);
2573 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2574 emit_writeword(ECX,(int)&last_count);
2575#endif
2576//DEBUG <
2577 emit_jmp(linker);
2578}
2579
2580emit_extjump(int addr, int target)
2581{
2582 emit_extjump2(addr, target, (int)dyna_linker);
2583}
2584emit_extjump_ds(int addr, int target)
2585{
2586 emit_extjump2(addr, target, (int)dyna_linker_ds);
2587}
2588
2589do_readstub(int n)
2590{
2591 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2592 set_jump_target(stubs[n][1],(int)out);
2593 int type=stubs[n][0];
2594 int i=stubs[n][3];
2595 int rs=stubs[n][4];
2596 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2597 signed char *i_regmap=i_regs->regmap;
2598 int addr=get_reg(i_regmap,AGEN1+(i&1));
2599 int rth,rt;
2600 int ds;
2601 if(itype[i]==C1LS||itype[i]==LOADLR) {
2602 rth=get_reg(i_regmap,FTEMP|64);
2603 rt=get_reg(i_regmap,FTEMP);
2604 }else{
2605 rth=get_reg(i_regmap,rt1[i]|64);
2606 rt=get_reg(i_regmap,rt1[i]);
2607 }
2608 assert(rs>=0);
2609 assert(rt>=0);
2610 if(addr<0) addr=rt;
2611 assert(addr>=0);
2612 int ftable=0;
2613 if(type==LOADB_STUB||type==LOADBU_STUB)
2614 ftable=(int)readmemb;
2615 if(type==LOADH_STUB||type==LOADHU_STUB)
2616 ftable=(int)readmemh;
2617 if(type==LOADW_STUB)
2618 ftable=(int)readmem;
2619 if(type==LOADD_STUB)
2620 ftable=(int)readmemd;
2621 emit_writeword(rs,(int)&address);
2622 emit_shrimm(rs,16,addr);
2623 emit_movmem_indexedx4(ftable,addr,addr);
2624 emit_pusha();
2625 ds=i_regs!=&regs[i];
2626 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2627 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)),i);
2628 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2629
2630 int temp;
2631 int cc=get_reg(i_regmap,CCREG);
2632 if(cc<0) {
2633 if(addr==HOST_CCREG)
2634 {
2635 cc=0;temp=1;
2636 assert(cc!=HOST_CCREG);
2637 assert(temp!=HOST_CCREG);
2638 emit_loadreg(CCREG,cc);
2639 }
2640 else
2641 {
2642 cc=HOST_CCREG;
2643 emit_loadreg(CCREG,cc);
2644 temp=!addr;
2645 }
2646 }
2647 else
2648 {
2649 temp=!addr;
2650 }
2651 emit_readword((int)&last_count,temp);
2652 emit_addimm(cc,CLOCK_DIVIDER*(stubs[n][6]+1),cc);
2653 emit_writeword_imm_esp(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,32);
2654 emit_add(cc,temp,cc);
2655 emit_writeword(cc,(int)&Count);
2656 emit_callreg(addr);
2657 // We really shouldn't need to update the count here,
2658 // but not doing so causes random crashes...
2659 emit_readword((int)&Count,HOST_CCREG);
2660 emit_readword((int)&next_interupt,ECX);
2661 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*(stubs[n][6]+1),HOST_CCREG);
2662 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2663 emit_writeword(ECX,(int)&last_count);
2664 emit_storereg(CCREG,HOST_CCREG);
2665 emit_popa();
2666 if((cc=get_reg(i_regmap,CCREG))>=0) {
2667 emit_loadreg(CCREG,cc);
2668 }
2669 if(type==LOADB_STUB)
2670 emit_movsbl((int)&readmem_dword,rt);
2671 if(type==LOADBU_STUB)
2672 emit_movzbl((int)&readmem_dword,rt);
2673 if(type==LOADH_STUB)
2674 emit_movswl((int)&readmem_dword,rt);
2675 if(type==LOADHU_STUB)
2676 emit_movzwl((int)&readmem_dword,rt);
2677 if(type==LOADW_STUB)
2678 emit_readword((int)&readmem_dword,rt);
2679 if(type==LOADD_STUB) {
2680 emit_readword((int)&readmem_dword,rt);
2681 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2682 }
2683 emit_jmp(stubs[n][2]); // return address
2684}
2685
2686inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2687{
2688 assem_debug("inline_readstub\n");
2689 int rs=get_reg(regmap,target);
2690 int rth=get_reg(regmap,target|64);
2691 int rt=get_reg(regmap,target);
2692 assert(rs>=0);
2693 assert(rt>=0);
2694 int ftable=0;
2695 if(type==LOADB_STUB||type==LOADBU_STUB)
2696 ftable=(int)readmemb;
2697 if(type==LOADH_STUB||type==LOADHU_STUB)
2698 ftable=(int)readmemh;
2699 if(type==LOADW_STUB)
2700 ftable=(int)readmem;
2701 if(type==LOADD_STUB)
2702 ftable=(int)readmemd;
2703 #ifdef HOST_IMM_ADDR32
2704 emit_writeword_imm(addr,(int)&address);
2705 #else
2706 emit_writeword(rs,(int)&address);
2707 #endif
2708 emit_pusha();
2709 int cc=get_reg(regmap,CCREG);
2710 int temp;
2711 if(cc<0) {
2712 if(rs==HOST_CCREG)
2713 {
2714 cc=0;temp=1;
2715 assert(cc!=HOST_CCREG);
2716 assert(temp!=HOST_CCREG);
2717 emit_loadreg(CCREG,cc);
2718 }
2719 else
2720 {
2721 cc=HOST_CCREG;
2722 emit_loadreg(CCREG,cc);
2723 temp=!rs;
2724 }
2725 }
2726 else
2727 {
2728 temp=!rs;
2729 }
2730 emit_readword((int)&last_count,temp);
2731 emit_addimm(cc,CLOCK_DIVIDER*(adj+1),cc);
2732 emit_add(cc,temp,cc);
2733 emit_writeword(cc,(int)&Count);
2734 if((signed int)addr>=(signed int)0xC0000000) {
2735 // Pagefault address
2736 int ds=regmap!=regs[i].regmap;
2737 emit_writeword_imm_esp(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,32);
2738 }
2739 emit_call(((u_int *)ftable)[addr>>16]);
2740 // We really shouldn't need to update the count here,
2741 // but not doing so causes random crashes...
2742 emit_readword((int)&Count,HOST_CCREG);
2743 emit_readword((int)&next_interupt,ECX);
2744 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*(adj+1),HOST_CCREG);
2745 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2746 emit_writeword(ECX,(int)&last_count);
2747 emit_storereg(CCREG,HOST_CCREG);
2748 emit_popa();
2749 if((cc=get_reg(regmap,CCREG))>=0) {
2750 emit_loadreg(CCREG,cc);
2751 }
2752 if(type==LOADB_STUB)
2753 emit_movsbl((int)&readmem_dword,rt);
2754 if(type==LOADBU_STUB)
2755 emit_movzbl((int)&readmem_dword,rt);
2756 if(type==LOADH_STUB)
2757 emit_movswl((int)&readmem_dword,rt);
2758 if(type==LOADHU_STUB)
2759 emit_movzwl((int)&readmem_dword,rt);
2760 if(type==LOADW_STUB)
2761 emit_readword((int)&readmem_dword,rt);
2762 if(type==LOADD_STUB) {
2763 emit_readword((int)&readmem_dword,rt);
2764 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2765 }
2766}
2767
2768do_writestub(int n)
2769{
2770 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2771 set_jump_target(stubs[n][1],(int)out);
2772 int type=stubs[n][0];
2773 int i=stubs[n][3];
2774 int rs=stubs[n][4];
2775 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2776 signed char *i_regmap=i_regs->regmap;
2777 int addr=get_reg(i_regmap,AGEN1+(i&1));
2778 int rth,rt,r;
2779 int ds;
2780 if(itype[i]==C1LS) {
2781 rth=get_reg(i_regmap,FTEMP|64);
2782 rt=get_reg(i_regmap,r=FTEMP);
2783 }else{
2784 rth=get_reg(i_regmap,rs2[i]|64);
2785 rt=get_reg(i_regmap,r=rs2[i]);
2786 }
2787 assert(rs>=0);
2788 assert(rt>=0);
2789 if(addr<0) addr=get_reg(i_regmap,-1);
2790 assert(addr>=0);
2791 int ftable=0;
2792 if(type==STOREB_STUB)
2793 ftable=(int)writememb;
2794 if(type==STOREH_STUB)
2795 ftable=(int)writememh;
2796 if(type==STOREW_STUB)
2797 ftable=(int)writemem;
2798 if(type==STORED_STUB)
2799 ftable=(int)writememd;
2800 emit_writeword(rs,(int)&address);
2801 emit_shrimm(rs,16,addr);
2802 emit_movmem_indexedx4(ftable,addr,addr);
2803 if(type==STOREB_STUB)
2804 emit_writebyte(rt,(int)&byte);
2805 if(type==STOREH_STUB)
2806 emit_writehword(rt,(int)&hword);
2807 if(type==STOREW_STUB)
2808 emit_writeword(rt,(int)&word);
2809 if(type==STORED_STUB) {
2810 emit_writeword(rt,(int)&dword);
2811 emit_writeword(r?rth:rt,(int)&dword+4);
2812 }
2813 emit_pusha();
2814 ds=i_regs!=&regs[i];
2815 int real_rs=get_reg(i_regmap,rs1[i]);
2816 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)),i);
2817 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2818
2819 int temp;
2820 int cc=get_reg(i_regmap,CCREG);
2821 if(cc<0) {
2822 if(addr==HOST_CCREG)
2823 {
2824 cc=0;temp=1;
2825 assert(cc!=HOST_CCREG);
2826 assert(temp!=HOST_CCREG);
2827 emit_loadreg(CCREG,cc);
2828 }
2829 else
2830 {
2831 cc=HOST_CCREG;
2832 emit_loadreg(CCREG,cc);
2833 temp=!addr;
2834 }
2835 }
2836 else
2837 {
2838 temp=!addr;
2839 }
2840 emit_readword((int)&last_count,temp);
2841 emit_addimm(cc,CLOCK_DIVIDER*(stubs[n][6]+1),cc);
2842 emit_writeword_imm_esp(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,32);
2843 emit_add(cc,temp,cc);
2844 emit_writeword(cc,(int)&Count);
2845 emit_callreg(addr);
2846 emit_readword((int)&Count,HOST_CCREG);
2847 emit_readword((int)&next_interupt,ECX);
2848 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*(stubs[n][6]+1),HOST_CCREG);
2849 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2850 emit_writeword(ECX,(int)&last_count);
2851 emit_storereg(CCREG,HOST_CCREG);
2852 emit_popa();
2853 if((cc=get_reg(i_regmap,CCREG))>=0) {
2854 emit_loadreg(CCREG,cc);
2855 }
2856 emit_jmp(stubs[n][2]); // return address
2857}
2858
2859inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2860{
2861 assem_debug("inline_writestub\n");
2862 int rs=get_reg(regmap,-1);
2863 int rth=get_reg(regmap,target|64);
2864 int rt=get_reg(regmap,target);
2865 assert(rs>=0);
2866 assert(rt>=0);
2867 int ftable=0;
2868 if(type==STOREB_STUB)
2869 ftable=(int)writememb;
2870 if(type==STOREH_STUB)
2871 ftable=(int)writememh;
2872 if(type==STOREW_STUB)
2873 ftable=(int)writemem;
2874 if(type==STORED_STUB)
2875 ftable=(int)writememd;
2876 emit_writeword(rs,(int)&address);
2877 if(type==STOREB_STUB)
2878 emit_writebyte(rt,(int)&byte);
2879 if(type==STOREH_STUB)
2880 emit_writehword(rt,(int)&hword);
2881 if(type==STOREW_STUB)
2882 emit_writeword(rt,(int)&word);
2883 if(type==STORED_STUB) {
2884 emit_writeword(rt,(int)&dword);
2885 emit_writeword(target?rth:rt,(int)&dword+4);
2886 }
2887 emit_pusha();
2888 int cc=get_reg(regmap,CCREG);
2889 int temp;
2890 if(cc<0) {
2891 if(rs==HOST_CCREG)
2892 {
2893 cc=0;temp=1;
2894 assert(cc!=HOST_CCREG);
2895 assert(temp!=HOST_CCREG);
2896 emit_loadreg(CCREG,cc);
2897 }
2898 else
2899 {
2900 cc=HOST_CCREG;
2901 emit_loadreg(CCREG,cc);
2902 temp=!rs;
2903 }
2904 }
2905 else
2906 {
2907 temp=!rs;
2908 }
2909 emit_readword((int)&last_count,temp);
2910 emit_addimm(cc,CLOCK_DIVIDER*(adj+1),cc);
2911 emit_add(cc,temp,cc);
2912 emit_writeword(cc,(int)&Count);
2913 if((signed int)addr>=(signed int)0xC0000000) {
2914 // Pagefault address
2915 int ds=regmap!=regs[i].regmap;
2916 emit_writeword_imm_esp(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,32);
2917 }
2918 emit_call(((u_int *)ftable)[addr>>16]);
2919 emit_readword((int)&Count,HOST_CCREG);
2920 emit_readword((int)&next_interupt,ECX);
2921 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*(adj+1),HOST_CCREG);
2922 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2923 emit_writeword(ECX,(int)&last_count);
2924 emit_storereg(CCREG,HOST_CCREG);
2925 emit_popa();
2926 if((cc=get_reg(regmap,CCREG))>=0) {
2927 emit_loadreg(CCREG,cc);
2928 }
2929}
2930
2931do_unalignedwritestub(int n)
2932{
2933 set_jump_target(stubs[n][1],(int)out);
2934 output_byte(0xCC);
2935 emit_jmp(stubs[n][2]); // return address
2936}
2937
2938void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
2939{
2940 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
2941}
2942
2943do_invstub(int n)
2944{
2945 set_jump_target(stubs[n][1],(int)out);
2946 if(stubs[n][4]!=EDI) emit_xchg(stubs[n][4],EDI);
2947 emit_pusha();
2948 emit_call((int)&invalidate_block);
2949 emit_popa();
2950 if(stubs[n][4]!=EDI) emit_xchg(stubs[n][4],EDI);
2951 emit_jmp(stubs[n][2]); // return address
2952}
2953
2954int do_dirty_stub(int i)
2955{
2956 assem_debug("do_dirty_stub %x\n",start+i*4);
2957 emit_pushimm(start+i*4);
2958 emit_movimm((int)start<(int)0xC0000000?(int)source:(int)start,EAX);
2959 emit_movimm((int)copy,EBX);
2960 emit_movimm(slen*4,ECX);
2961 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
2962 emit_addimm(ESP,4,ESP);
2963 int entry=(int)out;
2964 load_regs_entry(i);
2965 if(entry==(int)out) entry=instr_addr[i];
2966 emit_jmp(instr_addr[i]);
2967 return entry;
2968}
2969
2970void do_dirty_stub_ds()
2971{
2972 emit_pushimm(start+1);
2973 emit_movimm((int)start<(int)0xC0000000?(int)source:(int)start,EAX);
2974 emit_movimm((int)copy,EBX);
2975 emit_movimm(slen*4,ECX);
2976 emit_call((int)&verify_code_ds);
2977 emit_addimm(ESP,4,ESP);
2978}
2979
2980do_cop1stub(int n)
2981{
2982 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
2983 set_jump_target(stubs[n][1],(int)out);
2984 int i=stubs[n][3];
2985 int rs=stubs[n][4];
2986 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2987 int ds=stubs[n][6];
2988 if(!ds) {
2989 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2990 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
2991 }
2992 //else {printf("fp exception in delay slot\n");}
2993 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
2994 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2995 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2996 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
2997 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
2998}
2999
3000/* TLB */
3001
3002int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3003{
3004 if(c) {
3005 if((signed int)addr>=(signed int)0xC0000000) {
3006 emit_readword((int)(memory_map+(addr>>12)),map);
3007 }
3008 else
3009 return -1; // No mapping
3010 }
3011 else {
3012 if(s!=map) emit_mov(s,map);
3013 emit_shrimm(map,12,map);
3014 // Schedule this while we wait on the load
3015 //if(x) emit_xorimm(addr,x,addr);
3016 if(shift>=0) emit_lea8(s,shift);
3017 if(~a) emit_andimm(s,a,ar);
3018 emit_movmem_indexedx4((int)memory_map,map,map);
3019 }
3020 return map;
3021}
3022int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3023{
3024 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3025 emit_test(map,map);
3026 *jaddr=(int)out;
3027 emit_js(0);
3028 }
3029 return map;
3030}
3031
3032int gen_tlb_addr_r(int ar, int map) {
3033 if(map>=0) {
3034 emit_leairrx4(0,ar,map,ar);
3035 }
3036}
3037
3038int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3039{
3040 if(c) {
3041 if(addr<0x80800000||addr>=0xC0000000) {
3042 emit_readword((int)(memory_map+(addr>>12)),map);
3043 }
3044 else
3045 return -1; // No mapping
3046 }
3047 else {
3048 if(s!=map) emit_mov(s,map);
3049 //if(s!=ar) emit_mov(s,ar);
3050 emit_shrimm(map,12,map);
3051 // Schedule this while we wait on the load
3052 //if(x) emit_xorimm(s,x,addr);
3053 emit_movmem_indexedx4((int)memory_map,map,map);
3054 }
3055 emit_shlimm(map,2,map);
3056 return map;
3057}
3058int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3059{
3060 if(!c||addr<0x80800000||addr>=0xC0000000) {
3061 *jaddr=(int)out;
3062 emit_jc(0);
3063 }
3064}
3065
3066int gen_tlb_addr_w(int ar, int map) {
3067 if(map>=0) {
3068 emit_leairrx1(0,ar,map,ar);
3069 }
3070}
3071
3072// We don't need this for x86
3073generate_map_const(u_int addr,int reg) {
3074 // void *mapaddr=memory_map+(addr>>12);
3075}
3076
3077/* Special assem */
3078
3079void shift_assemble_x86(int i,struct regstat *i_regs)
3080{
3081 if(rt1[i]) {
3082 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3083 {
3084 char s,t,shift;
3085 t=get_reg(i_regs->regmap,rt1[i]);
3086 s=get_reg(i_regs->regmap,rs1[i]);
3087 shift=get_reg(i_regs->regmap,rs2[i]);
3088 if(t>=0){
3089 if(rs1[i]==0)
3090 {
3091 emit_zeroreg(t);
3092 }
3093 else if(rs2[i]==0)
3094 {
3095 assert(s>=0);
3096 if(s!=t) emit_mov(s,t);
3097 }
3098 else
3099 {
3100 char temp=get_reg(i_regs->regmap,-1);
3101 assert(s>=0);
3102 if(t==ECX&&s!=ECX) {
3103 if(shift!=ECX) emit_mov(shift,ECX);
3104 if(rt1[i]==rs2[i]) {shift=temp;}
3105 if(s!=shift) emit_mov(s,shift);
3106 }
3107 else
3108 {
3109 if(rt1[i]==rs2[i]) {emit_mov(shift,temp);shift=temp;}
3110 if(s!=t) emit_mov(s,t);
3111 if(shift!=ECX) {
3112 if(i_regs->regmap[ECX]<0)
3113 emit_mov(shift,ECX);
3114 else
3115 emit_xchg(shift,ECX);
3116 }
3117 }
3118 if(opcode2[i]==4) // SLLV
3119 {
3120 emit_shlcl(t==ECX?shift:t);
3121 }
3122 if(opcode2[i]==6) // SRLV
3123 {
3124 emit_shrcl(t==ECX?shift:t);
3125 }
3126 if(opcode2[i]==7) // SRAV
3127 {
3128 emit_sarcl(t==ECX?shift:t);
3129 }
3130 if(shift!=ECX&&i_regs->regmap[ECX]>=0) emit_xchg(shift,ECX);
3131 }
3132 }
3133 } else { // DSLLV/DSRLV/DSRAV
3134 char sh,sl,th,tl,shift;
3135 th=get_reg(i_regs->regmap,rt1[i]|64);
3136 tl=get_reg(i_regs->regmap,rt1[i]);
3137 sh=get_reg(i_regs->regmap,rs1[i]|64);
3138 sl=get_reg(i_regs->regmap,rs1[i]);
3139 shift=get_reg(i_regs->regmap,rs2[i]);
3140 if(tl>=0){
3141 if(rs1[i]==0)
3142 {
3143 emit_zeroreg(tl);
3144 if(th>=0) emit_zeroreg(th);
3145 }
3146 else if(rs2[i]==0)
3147 {
3148 assert(sl>=0);
3149 if(sl!=tl) emit_mov(sl,tl);
3150 if(th>=0&&sh!=th) emit_mov(sh,th);
3151 }
3152 else
3153 {
3154 // FIXME: What if shift==tl ?
3155 assert(shift!=tl);
3156 int temp=get_reg(i_regs->regmap,-1);
3157 int real_th=th;
3158 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3159 assert(sl>=0);
3160 assert(sh>=0);
3161 if(tl==ECX&&sl!=ECX) {
3162 if(shift!=ECX) emit_mov(shift,ECX);
3163 if(sl!=shift) emit_mov(sl,shift);
3164 if(th>=0 && sh!=th) emit_mov(sh,th);
3165 }
3166 else if(th==ECX&&sh!=ECX) {
3167 if(shift!=ECX) emit_mov(shift,ECX);
3168 if(sh!=shift) emit_mov(sh,shift);
3169 if(sl!=tl) emit_mov(sl,tl);
3170 }
3171 else
3172 {
3173 if(sl!=tl) emit_mov(sl,tl);
3174 if(th>=0 && sh!=th) emit_mov(sh,th);
3175 if(shift!=ECX) {
3176 if(i_regs->regmap[ECX]<0)
3177 emit_mov(shift,ECX);
3178 else
3179 emit_xchg(shift,ECX);
3180 }
3181 }
3182 if(opcode2[i]==0x14) // DSLLV
3183 {
3184 if(th>=0) emit_shldcl(th==ECX?shift:th,tl==ECX?shift:tl);
3185 emit_shlcl(tl==ECX?shift:tl);
3186 emit_testimm(ECX,32);
3187 if(th>=0) emit_cmovne_reg(tl==ECX?shift:tl,th==ECX?shift:th);
3188 emit_cmovne(&const_zero,tl==ECX?shift:tl);
3189 }
3190 if(opcode2[i]==0x16) // DSRLV
3191 {
3192 assert(th>=0);
3193 emit_shrdcl(tl==ECX?shift:tl,th==ECX?shift:th);
3194 emit_shrcl(th==ECX?shift:th);
3195 emit_testimm(ECX,32);
3196 emit_cmovne_reg(th==ECX?shift:th,tl==ECX?shift:tl);
3197 if(real_th>=0) emit_cmovne(&const_zero,th==ECX?shift:th);
3198 }
3199 if(opcode2[i]==0x17) // DSRAV
3200 {
3201 assert(th>=0);
3202 emit_shrdcl(tl==ECX?shift:tl,th==ECX?shift:th);
3203 if(real_th>=0) {
3204 assert(temp>=0);
3205 emit_mov(th==ECX?shift:th,temp==ECX?shift:temp);
3206 }
3207 emit_sarcl(th==ECX?shift:th);
3208 if(real_th>=0) emit_sarimm(temp==ECX?shift:temp,31,temp==ECX?shift:temp);
3209 emit_testimm(ECX,32);
3210 emit_cmovne_reg(th==ECX?shift:th,tl==ECX?shift:tl);
3211 if(real_th>=0) emit_cmovne_reg(temp==ECX?shift:temp,th==ECX?shift:th);
3212 }
3213 if(shift!=ECX&&(i_regs->regmap[ECX]>=0||temp==ECX)) emit_xchg(shift,ECX);
3214 }
3215 }
3216 }
3217 }
3218}
3219#define shift_assemble shift_assemble_x86
3220
3221void loadlr_assemble_x86(int i,struct regstat *i_regs)
3222{
3223 int s,th,tl,temp,temp2,addr,map=-1;
3224 int offset;
3225 int jaddr=0;
3226 int memtarget,c=0;
3227 u_int hr,reglist=0;
3228 th=get_reg(i_regs->regmap,rt1[i]|64);
3229 tl=get_reg(i_regs->regmap,rt1[i]);
3230 s=get_reg(i_regs->regmap,rs1[i]);
3231 temp=get_reg(i_regs->regmap,-1);
3232 temp2=get_reg(i_regs->regmap,FTEMP);
3233 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3234 assert(addr<0);
3235 offset=imm[i];
3236 for(hr=0;hr<HOST_REGS;hr++) {
3237 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3238 }
3239 reglist|=1<<temp;
3240 if(offset||s<0||c) addr=temp2;
3241 else addr=s;
3242 if(s>=0) {
3243 c=(i_regs->wasconst>>s)&1;
3244 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80800000;
3245 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3246 }
3247 if(tl>=0) {
3248 //assert(tl>=0);
3249 //assert(rt1[i]);
3250 if(!using_tlb) {
3251 if(!c) {
3252 emit_lea8(addr,temp);
3253 if (opcode[i]==0x22||opcode[i]==0x26) {
3254 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3255 }else{
3256 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3257 }
3258 emit_cmpimm(addr,0x800000);
3259 jaddr=(int)out;
3260 emit_jno(0);
3261 }
3262 else {
3263 if (opcode[i]==0x22||opcode[i]==0x26) {
3264 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3265 }else{
3266 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3267 }
3268 }
3269 }else{ // using tlb
3270 int a;
3271 if(c) {
3272 a=-1;
3273 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3274 a=0xFFFFFFFC; // LWL/LWR
3275 }else{
3276 a=0xFFFFFFF8; // LDL/LDR
3277 }
3278 map=get_reg(i_regs->regmap,TLREG);
3279 assert(map>=0);
3280 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3281 if(c) {
3282 if (opcode[i]==0x22||opcode[i]==0x26) {
3283 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3284 }else{
3285 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3286 }
3287 }
3288 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3289 }
3290 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3291 if(!c||memtarget) {
3292 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3293 emit_readword_indexed_tlb(0,temp2,map,temp2);
3294 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3295 }
3296 else
3297 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3298 emit_andimm(temp,24,temp);
3299 if (opcode[i]==0x26) emit_xorimm(temp,24,temp); // LWR
3300 if(temp==ECX)
3301 {
3302 int temp3=EDX;
3303 if(temp3==temp2) temp3++;
3304 emit_pushreg(temp3);
3305 emit_movimm(-1,temp3);
3306 if (opcode[i]==0x26) {
3307 emit_shrcl(temp3);
3308 emit_shrcl(temp2);
3309 }else{
3310 emit_shlcl(temp3);
3311 emit_shlcl(temp2);
3312 }
3313 emit_mov(temp3,ECX);
3314 emit_not(ECX,ECX);
3315 emit_popreg(temp3);
3316 }
3317 else
3318 {
3319 int temp3=EBP;
3320 if(temp3==temp) temp3++;
3321 if(temp3==temp2) temp3++;
3322 if(temp3==temp) temp3++;
3323 emit_xchg(ECX,temp);
3324 emit_pushreg(temp3);
3325 emit_movimm(-1,temp3);
3326 if (opcode[i]==0x26) {
3327 emit_shrcl(temp3);
3328 emit_shrcl(temp2==ECX?temp:temp2);
3329 }else{
3330 emit_shlcl(temp3);
3331 emit_shlcl(temp2==ECX?temp:temp2);
3332 }
3333 emit_not(temp3,temp3);
3334 emit_mov(temp,ECX);
3335 emit_mov(temp3,temp);
3336 emit_popreg(temp3);
3337 }
3338 emit_and(temp,tl,tl);
3339 emit_or(temp2,tl,tl);
3340 //emit_storereg(rt1[i],tl); // DEBUG
3341 /*emit_pusha();
3342 //save_regs(0x100f);
3343 emit_readword((int)&last_count,ECX);
3344 if(get_reg(i_regs->regmap,CCREG)<0)
3345 emit_loadreg(CCREG,HOST_CCREG);
3346 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3347 emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG);
3348 emit_writeword(HOST_CCREG,(int)&Count);
3349 emit_call((int)memdebug);
3350 emit_popa();
3351 //restore_regs(0x100f);*/
3352 }
3353 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3354 if(s>=0)
3355 if((i_regs->wasdirty>>s)&1)
3356 emit_storereg(rs1[i],s);
3357 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3358 if((i_regs->wasdirty>>get_reg(i_regs->regmap,rs1[i]|64))&1)
3359 emit_storereg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
3360 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3361 if(!c||memtarget) {
3362 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3363 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3364 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3365 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3366 }
3367 else
3368 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3369 emit_andimm(temp,56,temp);
3370 emit_pushreg(temp);
3371 emit_pushreg(temp2h);
3372 emit_pushreg(temp2);
3373 emit_pushreg(th);
3374 emit_pushreg(tl);
3375 if(opcode[i]==0x1A) emit_call((int)ldl_merge);
3376 if(opcode[i]==0x1B) emit_call((int)ldr_merge);
3377 emit_addimm(ESP,20,ESP);
3378 if(tl!=EDX) {
3379 if(tl!=EAX) emit_mov(EAX,tl);
3380 if(th!=EDX) emit_mov(EDX,th);
3381 } else
3382 if(th!=EAX) {
3383 if(th!=EDX) emit_mov(EDX,th);
3384 if(tl!=EAX) emit_mov(EAX,tl);
3385 } else {
3386 emit_xchg(EAX,EDX);
3387 }
3388 if(s>=0) emit_loadreg(rs1[i],s);
3389 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3390 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
3391 }
3392 }
3393}
3394#define loadlr_assemble loadlr_assemble_x86
3395
3396void cop0_assemble(int i,struct regstat *i_regs)
3397{
3398 if(opcode2[i]==0) // MFC0
3399 {
3400 signed char t=get_reg(i_regs->regmap,rt1[i]);
3401 char copr=(source[i]>>11)&0x1f;
3402 //assert(t>=0); // Why does this happen? OOT is weird
3403 if(t>=0) {
3404 emit_writeword_imm((int)&fake_pc,(int)&PC);
3405 emit_writebyte_imm((source[i]>>11)&0x1f,(int)&(fake_pc.f.r.nrd));
3406 if(copr==9) {
3407 emit_readword((int)&last_count,ECX);
3408 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3409 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3410 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3411 emit_writeword(HOST_CCREG,(int)&Count);
3412 }
3413 emit_call((int)MFC0);
3414 emit_readword((int)&readmem_dword,t);
3415 }
3416 }
3417 else if(opcode2[i]==4) // MTC0
3418 {
3419 signed char s=get_reg(i_regs->regmap,rs1[i]);
3420 char copr=(source[i]>>11)&0x1f;
3421 assert(s>=0);
3422 emit_writeword(s,(int)&readmem_dword);
3423 emit_pusha();
3424 emit_writeword_imm((int)&fake_pc,(int)&PC);
3425 emit_writebyte_imm((source[i]>>11)&0x1f,(int)&(fake_pc.f.r.nrd));
3426 if(copr==9||copr==11||copr==12) {
3427 if(copr==12&&!is_delayslot) {
3428 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
3429 }
3430 emit_readword((int)&last_count,ECX);
3431 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3432 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3433 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3434 emit_writeword(HOST_CCREG,(int)&Count);
3435 }
3436 // What a mess. The status register (12) can enable interrupts,
3437 // so needs a special case to handle a pending interrupt.
3438 // The interrupt must be taken immediately, because a subsequent
3439 // instruction might disable interrupts again.
3440 if(copr==12&&!is_delayslot) {
3441 emit_writeword_imm(start+i*4+4,(int)&pcaddr);
3442 emit_writebyte_imm(0,(int)&pending_exception);
3443 }
3444 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3445 //else
3446 emit_call((int)MTC0);
3447 if(copr==9||copr==11||copr==12) {
3448 emit_readword((int)&Count,HOST_CCREG);
3449 emit_readword((int)&next_interupt,ECX);
3450 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3451 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3452 emit_writeword(ECX,(int)&last_count);
3453 emit_storereg(CCREG,HOST_CCREG);
3454 }
3455 emit_popa();
3456 if(copr==12) {
3457 assert(!is_delayslot);
3458 //if(is_delayslot) output_byte(0xcc);
3459 emit_cmpmem_imm_byte((int)&pending_exception,0);
3460 emit_jne((int)&do_interrupt);
3461 }
3462 cop1_usable=0;
3463 }
3464 else
3465 {
3466 assert(opcode2[i]==0x10);
3467 if((source[i]&0x3f)==0x01) // TLBR
3468 emit_call((int)TLBR);
3469 if((source[i]&0x3f)==0x02) // TLBWI
3470 emit_call((int)TLBWI_new);
3471 if((source[i]&0x3f)==0x06) { // TLBWR
3472 // The TLB entry written by TLBWR is dependent on the count,
3473 // so update the cycle count
3474 emit_readword((int)&last_count,ECX);
3475 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3476 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3477 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3478 emit_writeword(HOST_CCREG,(int)&Count);
3479 emit_call((int)TLBWR_new);
3480 }
3481 if((source[i]&0x3f)==0x08) // TLBP
3482 emit_call((int)TLBP);
3483 if((source[i]&0x3f)==0x18) // ERET
3484 {
3485 int count=ccadj[i];
3486 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3487 emit_addimm_and_set_flags(CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
3488 emit_jmp((int)jump_eret);
3489 }
3490 }
3491}
3492
3493void cop1_assemble(int i,struct regstat *i_regs)
3494{
3495 // Check cop1 unusable
3496 if(!cop1_usable) {
3497 signed char rs=get_reg(i_regs->regmap,CSREG);
3498 assert(rs>=0);
3499 emit_testimm(rs,0x20000000);
3500 int jaddr=(int)out;
3501 emit_jeq(0);
3502 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3503 cop1_usable=1;
3504 }
3505 if (opcode2[i]==0) { // MFC1
3506 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3507 if(tl>=0) {
3508 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
3509 emit_readword_indexed(0,tl,tl);
3510 }
3511 }
3512 else if (opcode2[i]==1) { // DMFC1
3513 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3514 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
3515 if(tl>=0) {
3516 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
3517 if(th>=0) emit_readword_indexed(4,tl,th);
3518 emit_readword_indexed(0,tl,tl);
3519 }
3520 }
3521 else if (opcode2[i]==4) { // MTC1
3522 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3523 signed char temp=get_reg(i_regs->regmap,-1);
3524 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3525 emit_writeword_indexed(sl,0,temp);
3526 }
3527 else if (opcode2[i]==5) { // DMTC1
3528 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3529 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
3530 signed char temp=get_reg(i_regs->regmap,-1);
3531 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3532 emit_writeword_indexed(sh,4,temp);
3533 emit_writeword_indexed(sl,0,temp);
3534 }
3535 else if (opcode2[i]==2) // CFC1
3536 {
3537 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3538 if(tl>=0) {
3539 u_int copr=(source[i]>>11)&0x1f;
3540 if(copr==0) emit_readword((int)&FCR0,tl);
3541 if(copr==31) emit_readword((int)&FCR31,tl);
3542 }
3543 }
3544 else if (opcode2[i]==6) // CTC1
3545 {
3546 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3547 u_int copr=(source[i]>>11)&0x1f;
3548 assert(sl>=0);
3549 if(copr==31)
3550 {
3551 emit_writeword(sl,(int)&FCR31);
3552 // Set the rounding mode
3553 char temp=get_reg(i_regs->regmap,-1);
3554 emit_movimm(3,temp);
3555 emit_and(sl,temp,temp);
3556 emit_fldcw_indexed((int)&rounding_modes,temp);
3557 }
3558 }
3559}
3560
3561void fconv_assemble_x86(int i,struct regstat *i_regs)
3562{
3563 signed char temp=get_reg(i_regs->regmap,-1);
3564 assert(temp>=0);
3565 // Check cop1 unusable
3566 if(!cop1_usable) {
3567 signed char rs=get_reg(i_regs->regmap,CSREG);
3568 assert(rs>=0);
3569 emit_testimm(rs,0x20000000);
3570 int jaddr=(int)out;
3571 emit_jeq(0);
3572 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3573 cop1_usable=1;
3574 }
3575 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
3576 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3577 emit_movss_load(temp,0);
3578 emit_cvttps2dq(0,0); // float->int, truncate
3579 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3580 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3581 emit_movd_store(0,temp);
3582 return;
3583 }
3584 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
3585 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3586 emit_movsd_load(temp,0);
3587 emit_cvttpd2dq(0,0); // double->int, truncate
3588 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3589 emit_movd_store(0,temp);
3590 return;
3591 }
3592
3593 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
3594 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3595 emit_fildl(temp);
3596 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3597 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3598 emit_fstps(temp);
3599 return;
3600 }
3601 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
3602 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3603 emit_fildl(temp);
3604 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3605 emit_fstpl(temp);
3606 return;
3607 }
3608 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) { // cvt_s_l
3609 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3610 emit_fildll(temp);
3611 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3612 emit_fstps(temp);
3613 return;
3614 }
3615 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) { // cvt_d_l
3616 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3617 emit_fildll(temp);
3618 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3619 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3620 emit_fstpl(temp);
3621 return;
3622 }
3623
3624 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
3625 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3626 emit_flds(temp);
3627 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3628 emit_fstpl(temp);
3629 return;
3630 }
3631 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
3632 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3633 emit_fldl(temp);
3634 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3635 emit_fstps(temp);
3636 return;
3637 }
3638
3639 if(opcode2[i]==0x10) { // cvt_*_s
3640 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3641 emit_flds(temp);
3642 }
3643 if(opcode2[i]==0x11) { // cvt_*_d
3644 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3645 emit_fldl(temp);
3646 }
3647 if((source[i]&0x3f)<0x10) {
3648 emit_fnstcw_stack();
3649 if((source[i]&3)==0) emit_fldcw((int)&round_mode); //printf("round\n");
3650 if((source[i]&3)==1) emit_fldcw((int)&trunc_mode); //printf("trunc\n");
3651 if((source[i]&3)==2) emit_fldcw((int)&ceil_mode); //printf("ceil\n");
3652 if((source[i]&3)==3) emit_fldcw((int)&floor_mode); //printf("floor\n");
3653 }
3654 if((source[i]&0x3f)==0x24||(source[i]&0x3c)==0x0c) { // cvt_w_*
3655 if(opcode2[i]!=0x10||((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3656 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3657 emit_fistpl(temp);
3658 }
3659 if((source[i]&0x3f)==0x25||(source[i]&0x3c)==0x08) { // cvt_l_*
3660 if(opcode2[i]!=0x11||((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3661 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3662 emit_fistpll(temp);
3663 }
3664 if((source[i]&0x3f)<0x10) {
3665 emit_fldcw_stack();
3666 }
3667 return;
3668
3669 // C emulation code for debugging
3670
3671 emit_pusha();
3672
3673 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
3674 emit_pushmem((int)&reg_cop1_simple[(source[i]>> 6)&0x1f]);
3675 emit_pushmem((int)&reg_cop1_simple[(source[i]>>11)&0x1f]);
3676 emit_call((int)cvt_s_w);
3677 }
3678 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
3679 emit_pushmem((int)&reg_cop1_double[(source[i]>> 6)&0x1f]);
3680 emit_pushmem((int)&reg_cop1_simple[(source[i]>>11)&0x1f]);
3681 emit_call((int)cvt_d_w);
3682 }
3683 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
3684 emit_pushmem((int)&reg_cop1_simple[(source[i]>> 6)&0x1f]);
3685 emit_pushmem((int)&reg_cop1_double[(source[i]>>11)&0x1f]);
3686 emit_call((int)cvt_s_l);
3687 }
3688 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
3689 emit_pushmem((int)&reg_cop1_double[(source[i]>> 6)&0x1f]);
3690 emit_pushmem((int)&reg_cop1_double[(source[i]>>11)&0x1f]);
3691 emit_call((int)cvt_d_l);
3692 }
3693
3694 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
3695 emit_pushmem((int)&reg_cop1_double[(source[i]>> 6)&0x1f]);
3696 emit_pushmem((int)&reg_cop1_simple[(source[i]>>11)&0x1f]);
3697 emit_call((int)cvt_d_s);
3698 }
3699 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
3700 emit_pushmem((int)&reg_cop1_simple[(source[i]>> 6)&0x1f]);
3701 emit_pushmem((int)&reg_cop1_simple[(source[i]>>11)&0x1f]);
3702 emit_call((int)cvt_w_s);
3703 }
3704 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
3705 emit_pushmem((int)&reg_cop1_double[(source[i]>> 6)&0x1f]);
3706 emit_pushmem((int)&reg_cop1_simple[(source[i]>>11)&0x1f]);
3707 emit_call((int)cvt_l_s);
3708 }
3709
3710 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
3711 emit_pushmem((int)&reg_cop1_simple[(source[i]>> 6)&0x1f]);
3712 emit_pushmem((int)&reg_cop1_double[(source[i]>>11)&0x1f]);
3713 emit_call((int)cvt_s_d);
3714 }
3715 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
3716 emit_pushmem((int)&reg_cop1_simple[(source[i]>> 6)&0x1f]);
3717 emit_pushmem((int)&reg_cop1_double[(source[i]>>11)&0x1f]);
3718 emit_call((int)cvt_w_d);
3719 }
3720 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
3721 emit_pushmem((int)&reg_cop1_double[(source[i]>> 6)&0x1f]);
3722 emit_pushmem((int)&reg_cop1_double[(source[i]>>11)&0x1f]);
3723 emit_call((int)cvt_l_d);
3724 }
3725
3726 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
3727 emit_pushmem((int)&reg_cop1_double[(source[i]>> 6)&0x1f]);
3728 emit_pushmem((int)&reg_cop1_simple[(source[i]>>11)&0x1f]);
3729 emit_call((int)round_l_s);
3730 }
3731 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
3732 emit_pushmem((int)&reg_cop1_double[(source[i]>> 6)&0x1f]);
3733 emit_pushmem((int)&reg_cop1_simple[(source[i]>>11)&0x1f]);
3734 emit_call((int)trunc_l_s);
3735 }
3736 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
3737 emit_pushmem((int)&reg_cop1_double[(source[i]>> 6)&0x1f]);
3738 emit_pushmem((int)&reg_cop1_simple[(source[i]>>11)&0x1f]);
3739 emit_call((int)ceil_l_s);
3740 }
3741 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
3742 emit_pushmem((int)&reg_cop1_double[(source[i]>> 6)&0x1f]);
3743 emit_pushmem((int)&reg_cop1_simple[(source[i]>>11)&0x1f]);
3744 emit_call((int)floor_l_s);
3745 }
3746 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
3747 emit_pushmem((int)&reg_cop1_simple[(source[i]>> 6)&0x1f]);
3748 emit_pushmem((int)&reg_cop1_simple[(source[i]>>11)&0x1f]);
3749 emit_call((int)round_w_s);
3750 }
3751 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
3752 emit_pushmem((int)&reg_cop1_simple[(source[i]>> 6)&0x1f]);
3753 emit_pushmem((int)&reg_cop1_simple[(source[i]>>11)&0x1f]);
3754 emit_call((int)trunc_w_s);
3755 }
3756 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
3757 emit_pushmem((int)&reg_cop1_simple[(source[i]>> 6)&0x1f]);
3758 emit_pushmem((int)&reg_cop1_simple[(source[i]>>11)&0x1f]);
3759 emit_call((int)ceil_w_s);
3760 }
3761 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
3762 emit_pushmem((int)&reg_cop1_simple[(source[i]>> 6)&0x1f]);
3763 emit_pushmem((int)&reg_cop1_simple[(source[i]>>11)&0x1f]);
3764 emit_call((int)floor_w_s);
3765 }
3766
3767 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
3768 emit_pushmem((int)&reg_cop1_double[(source[i]>> 6)&0x1f]);
3769 emit_pushmem((int)&reg_cop1_double[(source[i]>>11)&0x1f]);
3770 emit_call((int)round_l_d);
3771 }
3772 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
3773 emit_pushmem((int)&reg_cop1_double[(source[i]>> 6)&0x1f]);
3774 emit_pushmem((int)&reg_cop1_double[(source[i]>>11)&0x1f]);
3775 emit_call((int)trunc_l_d);
3776 }
3777 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
3778 emit_pushmem((int)&reg_cop1_double[(source[i]>> 6)&0x1f]);
3779 emit_pushmem((int)&reg_cop1_double[(source[i]>>11)&0x1f]);
3780 emit_call((int)ceil_l_d);
3781 }
3782 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
3783 emit_pushmem((int)&reg_cop1_double[(source[i]>> 6)&0x1f]);
3784 emit_pushmem((int)&reg_cop1_double[(source[i]>>11)&0x1f]);
3785 emit_call((int)floor_l_d);
3786 }
3787 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
3788 emit_pushmem((int)&reg_cop1_simple[(source[i]>> 6)&0x1f]);
3789 emit_pushmem((int)&reg_cop1_double[(source[i]>>11)&0x1f]);
3790 emit_call((int)round_w_d);
3791 }
3792 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
3793 emit_pushmem((int)&reg_cop1_simple[(source[i]>> 6)&0x1f]);
3794 emit_pushmem((int)&reg_cop1_double[(source[i]>>11)&0x1f]);
3795 emit_call((int)trunc_w_d);
3796 }
3797 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
3798 emit_pushmem((int)&reg_cop1_simple[(source[i]>> 6)&0x1f]);
3799 emit_pushmem((int)&reg_cop1_double[(source[i]>>11)&0x1f]);
3800 emit_call((int)ceil_w_d);
3801 }
3802 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
3803 emit_pushmem((int)&reg_cop1_simple[(source[i]>> 6)&0x1f]);
3804 emit_pushmem((int)&reg_cop1_double[(source[i]>>11)&0x1f]);
3805 emit_call((int)floor_w_d);
3806 }
3807
3808 emit_addimm(ESP,8,ESP);
3809 emit_popa();
3810 //emit_loadreg(CSREG,rs);
3811 return;
3812}
3813#define fconv_assemble fconv_assemble_x86
3814
3815void fcomp_assemble(int i,struct regstat *i_regs)
3816{
3817 signed char fs=get_reg(i_regs->regmap,FSREG);
3818 signed char temp=get_reg(i_regs->regmap,-1);
3819 assert(temp>=0);
3820 // Check cop1 unusable
3821 if(!cop1_usable) {
3822 signed char cs=get_reg(i_regs->regmap,CSREG);
3823 assert(cs>=0);
3824 emit_testimm(cs,0x20000000);
3825 int jaddr=(int)out;
3826 emit_jeq(0);
3827 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
3828 cop1_usable=1;
3829 }
3830
3831 if((source[i]&0x3f)==0x30) {
3832 emit_andimm(fs,~0x800000,fs);
3833 return;
3834 }
3835
3836 if((source[i]&0x3e)==0x38) {
3837 // sf/ngle - these should throw exceptions for NaNs
3838 emit_andimm(fs,~0x800000,fs);
3839 return;
3840 }
3841
3842 if(opcode2[i]==0x10) {
3843 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],temp);
3844 emit_flds(temp);
3845 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3846 emit_flds(temp);
3847 emit_movimm(0x800000,temp);
3848 emit_or(fs,temp,fs);
3849 emit_xor(temp,fs,temp);
3850 emit_fucomip(1);
3851 emit_fpop();
3852 if((source[i]&0x3f)==0x31) emit_cmovnp_reg(temp,fs); // c_un_s
3853 if((source[i]&0x3f)==0x32) {emit_cmovne_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_eq_s
3854 if((source[i]&0x3f)==0x33) emit_cmovne_reg(temp,fs); // c_ueq_s
3855 if((source[i]&0x3f)==0x34) {emit_cmovnc_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_olt_s
3856 if((source[i]&0x3f)==0x35) emit_cmovnc_reg(temp,fs); // c_ult_s
3857 if((source[i]&0x3f)==0x36) {emit_cmova_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_ole_s
3858 if((source[i]&0x3f)==0x37) emit_cmova_reg(temp,fs); // c_ule_s
3859 if((source[i]&0x3f)==0x3a) emit_cmovne_reg(temp,fs); // c_seq_s
3860 if((source[i]&0x3f)==0x3b) emit_cmovne_reg(temp,fs); // c_ngl_s
3861 if((source[i]&0x3f)==0x3c) emit_cmovnc_reg(temp,fs); // c_lt_s
3862 if((source[i]&0x3f)==0x3d) emit_cmovnc_reg(temp,fs); // c_nge_s
3863 if((source[i]&0x3f)==0x3e) emit_cmova_reg(temp,fs); // c_le_s
3864 if((source[i]&0x3f)==0x3f) emit_cmova_reg(temp,fs); // c_ngt_s
3865 return;
3866 }
3867 if(opcode2[i]==0x11) {
3868 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],temp);
3869 emit_fldl(temp);
3870 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3871 emit_fldl(temp);
3872 emit_movimm(0x800000,temp);
3873 emit_or(fs,temp,fs);
3874 emit_xor(temp,fs,temp);
3875 emit_fucomip(1);
3876 emit_fpop();
3877 if((source[i]&0x3f)==0x31) emit_cmovnp_reg(temp,fs); // c_un_d
3878 if((source[i]&0x3f)==0x32) {emit_cmovne_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_eq_d
3879 if((source[i]&0x3f)==0x33) emit_cmovne_reg(temp,fs); // c_ueq_d
3880 if((source[i]&0x3f)==0x34) {emit_cmovnc_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_olt_d
3881 if((source[i]&0x3f)==0x35) emit_cmovnc_reg(temp,fs); // c_ult_d
3882 if((source[i]&0x3f)==0x36) {emit_cmova_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_ole_d
3883 if((source[i]&0x3f)==0x37) emit_cmova_reg(temp,fs); // c_ule_d
3884 if((source[i]&0x3f)==0x3a) emit_cmovne_reg(temp,fs); // c_seq_d
3885 if((source[i]&0x3f)==0x3b) emit_cmovne_reg(temp,fs); // c_ngl_d
3886 if((source[i]&0x3f)==0x3c) emit_cmovnc_reg(temp,fs); // c_lt_d
3887 if((source[i]&0x3f)==0x3d) emit_cmovnc_reg(temp,fs); // c_nge_d
3888 if((source[i]&0x3f)==0x3e) emit_cmova_reg(temp,fs); // c_le_d
3889 if((source[i]&0x3f)==0x3f) emit_cmova_reg(temp,fs); // c_ngt_d
3890 return;
3891 }
3892
3893 emit_pusha();
3894 if(opcode2[i]==0x10) {
3895 emit_pushmem((int)&reg_cop1_simple[(source[i]>>16)&0x1f]);
3896 emit_pushmem((int)&reg_cop1_simple[(source[i]>>11)&0x1f]);
3897 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
3898 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
3899 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
3900 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
3901 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
3902 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
3903 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
3904 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
3905 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
3906 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
3907 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
3908 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
3909 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
3910 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
3911 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
3912 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
3913 }
3914 if(opcode2[i]==0x11) {
3915 emit_pushmem((int)&reg_cop1_double[(source[i]>>16)&0x1f]);
3916 emit_pushmem((int)&reg_cop1_double[(source[i]>>11)&0x1f]);
3917 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
3918 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
3919 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
3920 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
3921 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
3922 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
3923 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
3924 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
3925 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
3926 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
3927 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
3928 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
3929 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
3930 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
3931 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
3932 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
3933 }
3934 emit_addimm(ESP,8,ESP);
3935 emit_popa();
3936 emit_loadreg(FSREG,fs);
3937 return;
3938}
3939
3940void float_assemble(int i,struct regstat *i_regs)
3941{
3942 signed char temp=get_reg(i_regs->regmap,-1);
3943 assert(temp>=0);
3944 // Check cop1 unusable
3945 if(!cop1_usable) {
3946 signed char cs=get_reg(i_regs->regmap,CSREG);
3947 assert(cs>=0);
3948 emit_testimm(cs,0x20000000);
3949 int jaddr=(int)out;
3950 emit_jeq(0);
3951 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
3952 cop1_usable=1;
3953 }
3954
3955 if((source[i]&0x3f)==6) // mov
3956 {
3957 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3958 if(opcode2[i]==0x10) {
3959 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3960 emit_flds(temp);
3961 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3962 emit_fstps(temp);
3963 }
3964 if(opcode2[i]==0x11) {
3965 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3966 emit_fldl(temp);
3967 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3968 emit_fstpl(temp);
3969 }
3970 }
3971 return;
3972 }
3973
3974 if((source[i]&0x3f)>3)
3975 {
3976 if(opcode2[i]==0x10) {
3977 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3978 emit_flds(temp);
3979 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3980 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3981 }
3982 }
3983 if(opcode2[i]==0x11) {
3984 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3985 emit_fldl(temp);
3986 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3987 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3988 }
3989 }
3990 if((source[i]&0x3f)==4) // sqrt
3991 emit_fsqrt();
3992 if((source[i]&0x3f)==5) // abs
3993 emit_fabs();
3994 if((source[i]&0x3f)==7) // neg
3995 emit_fchs();
3996 if(opcode2[i]==0x10) {
3997 emit_fstps(temp);
3998 }
3999 if(opcode2[i]==0x11) {
4000 emit_fstpl(temp);
4001 }
4002 return;
4003 }
4004 if((source[i]&0x3f)<4)
4005 {
4006 if(opcode2[i]==0x10) {
4007 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4008 emit_flds(temp);
4009 }
4010 if(opcode2[i]==0x11) {
4011 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4012 emit_fldl(temp);
4013 }
4014 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
4015 if(opcode2[i]==0x10) {
4016 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],temp);
4017 if((source[i]&0x3f)==0) emit_fadds(temp);
4018 if((source[i]&0x3f)==1) emit_fsubs(temp);
4019 if((source[i]&0x3f)==2) emit_fmuls(temp);
4020 if((source[i]&0x3f)==3) emit_fdivs(temp);
4021 }
4022 else if(opcode2[i]==0x11) {
4023 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],temp);
4024 if((source[i]&0x3f)==0) emit_faddl(temp);
4025 if((source[i]&0x3f)==1) emit_fsubl(temp);
4026 if((source[i]&0x3f)==2) emit_fmull(temp);
4027 if((source[i]&0x3f)==3) emit_fdivl(temp);
4028 }
4029 }
4030 else {
4031 if((source[i]&0x3f)==0) emit_fadd(0);
4032 if((source[i]&0x3f)==1) emit_fsub(0);
4033 if((source[i]&0x3f)==2) emit_fmul(0);
4034 if((source[i]&0x3f)==3) emit_fdiv(0);
4035 }
4036 if(opcode2[i]==0x10) {
4037 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4038 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4039 }
4040 emit_fstps(temp);
4041 }
4042 if(opcode2[i]==0x11) {
4043 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4044 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4045 }
4046 emit_fstpl(temp);
4047 }
4048 return;
4049 }
4050
4051 if(opcode2[i]==0x10) { // Single precision
4052 emit_pusha();
4053 emit_pushmem((int)&reg_cop1_simple[(source[i]>> 6)&0x1f]);
4054 if((source[i]&0x3f)<4)
4055 emit_pushmem((int)&reg_cop1_simple[(source[i]>>16)&0x1f]);
4056 emit_pushmem((int)&reg_cop1_simple[(source[i]>>11)&0x1f]);
4057 switch(source[i]&0x3f)
4058 {
4059 case 0x00: emit_call((int)add_s);break;
4060 case 0x01: emit_call((int)sub_s);break;
4061 case 0x02: emit_call((int)mul_s);break;
4062 case 0x03: emit_call((int)div_s);break;
4063 case 0x04: emit_call((int)sqrt_s);break;
4064 case 0x05: emit_call((int)abs_s);break;
4065 case 0x06: emit_call((int)mov_s);break;
4066 case 0x07: emit_call((int)neg_s);break;
4067 }
4068 emit_addimm(ESP,(source[i]&0x3f)<4?12:8,ESP);
4069 emit_popa();
4070 }
4071 if(opcode2[i]==0x11) { // Double precision
4072 emit_pusha();
4073 emit_pushmem((int)&reg_cop1_double[(source[i]>> 6)&0x1f]);
4074 if((source[i]&0x3f)<4)
4075 emit_pushmem((int)&reg_cop1_double[(source[i]>>16)&0x1f]);
4076 emit_pushmem((int)&reg_cop1_double[(source[i]>>11)&0x1f]);
4077 switch(source[i]&0x3f)
4078 {
4079 case 0x00: emit_call((int)add_d);break;
4080 case 0x01: emit_call((int)sub_d);break;
4081 case 0x02: emit_call((int)mul_d);break;
4082 case 0x03: emit_call((int)div_d);break;
4083 case 0x04: emit_call((int)sqrt_d);break;
4084 case 0x05: emit_call((int)abs_d);break;
4085 case 0x06: emit_call((int)mov_d);break;
4086 case 0x07: emit_call((int)neg_d);break;
4087 }
4088 emit_addimm(ESP,(source[i]&0x3f)<4?12:8,ESP);
4089 emit_popa();
4090 }
4091}
4092
4093void multdiv_assemble_x86(int i,struct regstat *i_regs)
4094{
4095 // case 0x18: MULT
4096 // case 0x19: MULTU
4097 // case 0x1A: DIV
4098 // case 0x1B: DIVU
4099 // case 0x1C: DMULT
4100 // case 0x1D: DMULTU
4101 // case 0x1E: DDIV
4102 // case 0x1F: DDIVU
4103 if(rs1[i]&&rs2[i])
4104 {
4105 if((opcode2[i]&4)==0) // 32-bit
4106 {
4107 if(opcode2[i]==0x18) // MULT
4108 {
4109 char m1=get_reg(i_regs->regmap,rs1[i]);
4110 char m2=get_reg(i_regs->regmap,rs2[i]);
4111 assert(m1>=0);
4112 assert(m2>=0);
4113 emit_mov(m1,EAX);
4114 emit_imul(m2);
4115 }
4116 if(opcode2[i]==0x19) // MULTU
4117 {
4118 char m1=get_reg(i_regs->regmap,rs1[i]);
4119 char m2=get_reg(i_regs->regmap,rs2[i]);
4120 assert(m1>=0);
4121 assert(m2>=0);
4122 emit_mov(m1,EAX);
4123 emit_mul(m2);
4124 }
4125 if(opcode2[i]==0x1A) // DIV
4126 {
4127 char d1=get_reg(i_regs->regmap,rs1[i]);
4128 char d2=get_reg(i_regs->regmap,rs2[i]);
4129 assert(d1>=0);
4130 assert(d2>=0);
4131 emit_mov(d1,EAX);
4132 emit_cdq();
4133 emit_test(d2,d2);
4134 emit_jeq((int)out+8);
4135 emit_idiv(d2);
4136 }
4137 if(opcode2[i]==0x1B) // DIVU
4138 {
4139 char d1=get_reg(i_regs->regmap,rs1[i]);
4140 char d2=get_reg(i_regs->regmap,rs2[i]);
4141 assert(d1>=0);
4142 assert(d2>=0);
4143 emit_mov(d1,EAX);
4144 emit_zeroreg(EDX);
4145 emit_test(d2,d2);
4146 emit_jeq((int)out+8);
4147 emit_div(d2);
4148 }
4149 }
4150 else // 64-bit
4151 {
4152 if(opcode2[i]==0x1C) // DMULT
4153 {
4154 char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4155 char m1l=get_reg(i_regs->regmap,rs1[i]);
4156 char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4157 char m2l=get_reg(i_regs->regmap,rs2[i]);
4158 assert(m1h>=0);
4159 assert(m2h>=0);
4160 assert(m1l>=0);
4161 assert(m2l>=0);
4162 emit_pushreg(m2h);
4163 emit_pushreg(m2l);
4164 emit_pushreg(m1h);
4165 emit_pushreg(m1l);
4166 emit_call((int)&mult64);
4167 emit_popreg(m1l);
4168 emit_popreg(m1h);
4169 emit_popreg(m2l);
4170 emit_popreg(m2h);
4171 char hih=get_reg(i_regs->regmap,HIREG|64);
4172 char hil=get_reg(i_regs->regmap,HIREG);
4173 if(hih>=0) emit_loadreg(HIREG|64,hih);
4174 if(hil>=0) emit_loadreg(HIREG,hil);
4175 char loh=get_reg(i_regs->regmap,LOREG|64);
4176 char lol=get_reg(i_regs->regmap,LOREG);
4177 if(loh>=0) emit_loadreg(LOREG|64,loh);
4178 if(lol>=0) emit_loadreg(LOREG,lol);
4179 }
4180 if(opcode2[i]==0x1D) // DMULTU
4181 {
4182 char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4183 char m1l=get_reg(i_regs->regmap,rs1[i]);
4184 char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4185 char m2l=get_reg(i_regs->regmap,rs2[i]);
4186 char temp=get_reg(i_regs->regmap,-1);
4187 assert(m1h>=0);
4188 assert(m2h>=0);
4189 assert(m1l>=0);
4190 assert(m2l>=0);
4191 assert(temp>=0);
4192 emit_mov(m1l,EAX);
4193 emit_mul(m2l);
4194 emit_storereg(LOREG,EAX);
4195 emit_mov(EDX,temp);
4196 emit_mov(m1h,EAX);
4197 emit_mul(m2l);
4198 emit_add(EAX,temp,temp);
4199 emit_adcimm(0,EDX);
4200 emit_storereg(HIREG,EDX);
4201 emit_mov(m2h,EAX);
4202 emit_mul(m1l);
4203 emit_add(EAX,temp,temp);
4204 emit_adcimm(0,EDX);
4205 emit_storereg(LOREG|64,temp);
4206 emit_mov(EDX,temp);
4207 emit_mov(m2h,EAX);
4208 emit_mul(m1h);
4209 emit_add(EAX,temp,EAX);
4210 emit_loadreg(HIREG,temp);
4211 emit_adcimm(0,EDX);
4212 emit_add(EAX,temp,EAX);
4213 emit_adcimm(0,EDX);
4214 // DEBUG
4215 /*
4216 emit_pushreg(m2h);
4217 emit_pushreg(m2l);
4218 emit_pushreg(m1h);
4219 emit_pushreg(m1l);
4220 emit_call((int)&multu64);
4221 emit_popreg(m1l);
4222 emit_popreg(m1h);
4223 emit_popreg(m2l);
4224 emit_popreg(m2h);
4225 char hih=get_reg(i_regs->regmap,HIREG|64);
4226 char hil=get_reg(i_regs->regmap,HIREG);
4227 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
4228 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
4229 */
4230 // Shouldn't be necessary
4231 //char loh=get_reg(i_regs->regmap,LOREG|64);
4232 //char lol=get_reg(i_regs->regmap,LOREG);
4233 //if(loh>=0) emit_loadreg(LOREG|64,loh);
4234 //if(lol>=0) emit_loadreg(LOREG,lol);
4235 }
4236 if(opcode2[i]==0x1E) // DDIV
4237 {
4238 char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4239 char d1l=get_reg(i_regs->regmap,rs1[i]);
4240 char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4241 char d2l=get_reg(i_regs->regmap,rs2[i]);
4242 assert(d1h>=0);
4243 assert(d2h>=0);
4244 assert(d1l>=0);
4245 assert(d2l>=0);
4246 //emit_pushreg(d2h);
4247 //emit_pushreg(d2l);
4248 //emit_pushreg(d1h);
4249 //emit_pushreg(d1l);
4250 emit_addimm(ESP,-16,ESP);
4251 emit_writeword_indexed(d2h,12,ESP);
4252 emit_writeword_indexed(d2l,8,ESP);
4253 emit_writeword_indexed(d1h,4,ESP);
4254 emit_writeword_indexed(d1l,0,ESP);
4255 emit_call((int)&div64);
4256 //emit_popreg(d1l);
4257 //emit_popreg(d1h);
4258 //emit_popreg(d2l);
4259 //emit_popreg(d2h);
4260 emit_readword_indexed(0,ESP,d1l);
4261 emit_readword_indexed(4,ESP,d1h);
4262 emit_readword_indexed(8,ESP,d2l);
4263 emit_readword_indexed(12,ESP,d2h);
4264 emit_addimm(ESP,16,ESP);
4265 char hih=get_reg(i_regs->regmap,HIREG|64);
4266 char hil=get_reg(i_regs->regmap,HIREG);
4267 char loh=get_reg(i_regs->regmap,LOREG|64);
4268 char lol=get_reg(i_regs->regmap,LOREG);
4269 if(hih>=0) emit_loadreg(HIREG|64,hih);
4270 if(hil>=0) emit_loadreg(HIREG,hil);
4271 if(loh>=0) emit_loadreg(LOREG|64,loh);
4272 if(lol>=0) emit_loadreg(LOREG,lol);
4273 }
4274 if(opcode2[i]==0x1F) // DDIVU
4275 {
4276 char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4277 char d1l=get_reg(i_regs->regmap,rs1[i]);
4278 char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4279 char d2l=get_reg(i_regs->regmap,rs2[i]);
4280 assert(d1h>=0);
4281 assert(d2h>=0);
4282 assert(d1l>=0);
4283 assert(d2l>=0);
4284 //emit_pushreg(d2h);
4285 //emit_pushreg(d2l);
4286 //emit_pushreg(d1h);
4287 //emit_pushreg(d1l);
4288 emit_addimm(ESP,-16,ESP);
4289 emit_writeword_indexed(d2h,12,ESP);
4290 emit_writeword_indexed(d2l,8,ESP);
4291 emit_writeword_indexed(d1h,4,ESP);
4292 emit_writeword_indexed(d1l,0,ESP);
4293 emit_call((int)&divu64);
4294 //emit_popreg(d1l);
4295 //emit_popreg(d1h);
4296 //emit_popreg(d2l);
4297 //emit_popreg(d2h);
4298 emit_readword_indexed(0,ESP,d1l);
4299 emit_readword_indexed(4,ESP,d1h);
4300 emit_readword_indexed(8,ESP,d2l);
4301 emit_readword_indexed(12,ESP,d2h);
4302 emit_addimm(ESP,16,ESP);
4303 char hih=get_reg(i_regs->regmap,HIREG|64);
4304 char hil=get_reg(i_regs->regmap,HIREG);
4305 char loh=get_reg(i_regs->regmap,LOREG|64);
4306 char lol=get_reg(i_regs->regmap,LOREG);
4307 if(hih>=0) emit_loadreg(HIREG|64,hih);
4308 if(hil>=0) emit_loadreg(HIREG,hil);
4309 if(loh>=0) emit_loadreg(LOREG|64,loh);
4310 if(lol>=0) emit_loadreg(LOREG,lol);
4311 }
4312 }
4313 }
4314 else
4315 {
4316 // Multiply by zero is zero.
4317 // MIPS does not have a divide by zero exception.
4318 // The result is undefined, we return zero.
4319 char hr=get_reg(i_regs->regmap,HIREG);
4320 char lr=get_reg(i_regs->regmap,LOREG);
4321 if(hr>=0) emit_zeroreg(hr);
4322 if(lr>=0) emit_zeroreg(lr);
4323 }
4324}
4325#define multdiv_assemble multdiv_assemble_x86
4326
4327void do_preload_rhash(int r) {
4328 emit_movimm(0xf8,r);
4329}
4330
4331void do_preload_rhtbl(int r) {
4332 // Don't need this for x86
4333}
4334
4335void do_rhash(int rs,int rh) {
4336 emit_and(rs,rh,rh);
4337}
4338
4339void do_miniht_load(int ht,int rh) {
4340 // Don't need this for x86. The load and compare can be combined into
4341 // a single instruction (below)
4342}
4343
4344void do_miniht_jump(int rs,int rh,int ht) {
4345 emit_cmpmem_indexed((int)mini_ht,rh,rs);
4346 emit_jne(jump_vaddr_reg[rs]);
4347 emit_jmpmem_indexed((int)mini_ht+4,rh);
4348}
4349
4350void do_miniht_insert(int return_address,int rt,int temp) {
4351 emit_movimm(return_address,rt); // PC into link register
4352 //emit_writeword_imm(return_address,(int)&mini_ht[(return_address&0xFF)>>8][0]);
4353 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4354 add_to_linker((int)out,return_address,1);
4355 emit_writeword_imm(0,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4356}
4357
4358// We don't need this for x86
4359void literal_pool(int n) {}
4360void literal_pool_jumpover(int n) {}
4361
4362// CPU-architecture-specific initialization, not needed for x86
4363void arch_init() {}