ALL: Huge upstream synch + PerRom DelaySI & CountPerOp parameters
[mupen64plus-pandora.git] / source / mupen64plus-core / src / r4300 / new_dynarec / assem_x86.c
CommitLineData
451ab91e 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_x86.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21int cycle_count;
22int last_count;
23int pcaddr;
24int pending_exception;
25int branch_target;
26uint64_t readmem_dword;
27static precomp_instr fake_pc;
28u_int memory_map[1048576];
29static u_int mini_ht[32][2] __attribute__((aligned(8)));
30u_char restore_candidate[512] __attribute__((aligned(4)));
31
32void do_interrupt();
33void jump_vaddr_eax();
34void jump_vaddr_ecx();
35void jump_vaddr_edx();
36void jump_vaddr_ebx();
37void jump_vaddr_ebp();
38void jump_vaddr_edi();
39
40static const u_int jump_vaddr_reg[8] = {
41 (int)jump_vaddr_eax,
42 (int)jump_vaddr_ecx,
43 (int)jump_vaddr_edx,
44 (int)jump_vaddr_ebx,
45 0,
46 (int)jump_vaddr_ebp,
47 0,
48 (int)jump_vaddr_edi };
49
50void invalidate_block_eax();
51void invalidate_block_ecx();
52void invalidate_block_edx();
53void invalidate_block_ebx();
54void invalidate_block_ebp();
55void invalidate_block_esi();
56void invalidate_block_edi();
57
58static const u_int invalidate_block_reg[8] = {
59 (int)invalidate_block_eax,
60 (int)invalidate_block_ecx,
61 (int)invalidate_block_edx,
62 (int)invalidate_block_ebx,
63 0,
64 (int)invalidate_block_ebp,
65 (int)invalidate_block_esi,
66 (int)invalidate_block_edi };
67
68static const u_short rounding_modes[4] = {
69 0x33F, // round
70 0xF3F, // trunc
71 0xB3F, // ceil
72 0x73F};// floor
73
74#include "../fpu.h"
75
76// We need these for cmovcc instructions on x86
77static const u_int const_zero=0;
78static const u_int const_one=1;
79
80/* Linker */
81
82static void set_jump_target(int addr,int target)
83{
84 u_char *ptr=(u_char *)addr;
85 if(*ptr==0x0f)
86 {
87 assert(ptr[1]>=0x80&&ptr[1]<=0x8f);
88 u_int *ptr2=(u_int *)(ptr+2);
89 *ptr2=target-(int)ptr2-4;
90 }
91 else if(*ptr==0xe8||*ptr==0xe9) {
92 u_int *ptr2=(u_int *)(ptr+1);
93 *ptr2=target-(int)ptr2-4;
94 }
95 else
96 {
97 assert(*ptr==0xc7); /* mov immediate (store address) */
98 u_int *ptr2=(u_int *)(ptr+6);
99 *ptr2=target;
100 }
101}
102
103static void *kill_pointer(void *stub)
104{
105 int *i_ptr=*((int **)(stub+6));
106 *i_ptr=(int)stub-(int)i_ptr-4;
107 return i_ptr;
108}
109static int get_pointer(void *stub)
110{
111 int *i_ptr=*((int **)(stub+6));
112 return *i_ptr+(int)i_ptr+4;
113}
114
115// Find the "clean" entry point from a "dirty" entry point
116// by skipping past the call to verify_code
117static u_int get_clean_addr(int addr)
118{
119 u_char *ptr=(u_char *)addr;
120 assert(ptr[20]==0xE8); // call instruction
121 assert(ptr[25]==0x83); // pop (add esp,4) instruction
122 if(ptr[28]==0xE9) return *(u_int *)(ptr+29)+addr+33; // follow jmp
123 else return(addr+28);
124}
125
126static int verify_dirty(void *addr)
127{
128 u_char *ptr=(u_char *)addr;
129 assert(ptr[5]==0xB8);
130 u_int source=*(u_int *)(ptr+6);
131 u_int copy=*(u_int *)(ptr+11);
132 u_int len=*(u_int *)(ptr+16);
133 assert(ptr[20]==0xE8); // call instruction
134 u_int verifier=*(u_int *)(ptr+21)+(u_int)ptr+25;
135 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
136 unsigned int page=source>>12;
137 unsigned int map_value=memory_map[page];
138 if(map_value>=0x80000000) return 0;
139 while(page<((source+len-1)>>12)) {
140 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
141 }
142 source = source+(map_value<<2);
143 }
144 //DebugMessage(M64MSG_VERBOSE, "verify_dirty: %x %x %x",source,copy,len);
145 return !memcmp((void *)source,(void *)copy,len);
146}
147
148// This doesn't necessarily find all clean entry points, just
149// guarantees that it's not dirty
150static int isclean(int addr)
151{
152 u_char *ptr=(u_char *)addr;
153 if(ptr[5]!=0xB8) return 1; // mov imm,%eax
154 if(ptr[10]!=0xBB) return 1; // mov imm,%ebx
155 if(ptr[15]!=0xB9) return 1; // mov imm,%ecx
156 if(ptr[20]!=0xE8) return 1; // call instruction
157 if(ptr[25]!=0x83) return 1; // pop (add esp,4) instruction
158 return 0;
159}
160
161static void get_bounds(int addr,u_int *start,u_int *end)
162{
163 u_char *ptr=(u_char *)addr;
164 assert(ptr[5]==0xB8);
165 u_int source=*(u_int *)(ptr+6);
166 //u_int copy=*(u_int *)(ptr+11);
167 u_int len=*(u_int *)(ptr+16);
168 assert(ptr[20]==0xE8); // call instruction
169 u_int verifier=*(u_int *)(ptr+21)+(u_int)ptr+25;
170 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
171 if(memory_map[source>>12]>=0x80000000) source = 0;
172 else source = source+(memory_map[source>>12]<<2);
173 }
174 if(start) *start=source;
175 if(end) *end=source+len;
176}
177
178/* Register allocation */
179
180// Note: registers are allocated clean (unmodified state)
181// if you intend to modify the register, you must call dirty_reg().
182static void alloc_reg(struct regstat *cur,int i,signed char reg)
183{
184 int r,hr;
185 int preferred_reg = (reg&3)+(reg>28)*4-(reg==32)+2*(reg==36)-(reg==40);
186
187 // Don't allocate unused registers
188 if((cur->u>>reg)&1) return;
189
190 // see if it's already allocated
191 for(hr=0;hr<HOST_REGS;hr++)
192 {
193 if(cur->regmap[hr]==reg) return;
194 }
195
196 // Keep the same mapping if the register was already allocated in a loop
197 preferred_reg = loop_reg(i,reg,preferred_reg);
198
199 // Try to allocate the preferred register
200 if(cur->regmap[preferred_reg]==-1) {
201 cur->regmap[preferred_reg]=reg;
202 cur->dirty&=~(1<<preferred_reg);
203 cur->isconst&=~(1<<preferred_reg);
204 return;
205 }
206 r=cur->regmap[preferred_reg];
207 if(r<64&&((cur->u>>r)&1)) {
208 cur->regmap[preferred_reg]=reg;
209 cur->dirty&=~(1<<preferred_reg);
210 cur->isconst&=~(1<<preferred_reg);
211 return;
212 }
213 if(r>=64&&((cur->uu>>(r&63))&1)) {
214 cur->regmap[preferred_reg]=reg;
215 cur->dirty&=~(1<<preferred_reg);
216 cur->isconst&=~(1<<preferred_reg);
217 return;
218 }
219
220 // Try to allocate EAX, EBX, ECX, or EDX
221 // We prefer these because they can do byte and halfword loads
222 for(hr=0;hr<4;hr++) {
223 if(cur->regmap[hr]==-1) {
224 cur->regmap[hr]=reg;
225 cur->dirty&=~(1<<hr);
226 cur->isconst&=~(1<<hr);
227 return;
228 }
229 }
230
231 // Clear any unneeded registers
232 // We try to keep the mapping consistent, if possible, because it
233 // makes branches easier (especially loops). So we try to allocate
234 // first (see above) before removing old mappings. If this is not
235 // possible then go ahead and clear out the registers that are no
236 // longer needed.
237 for(hr=0;hr<HOST_REGS;hr++)
238 {
239 r=cur->regmap[hr];
240 if(r>=0) {
241 if(r<64) {
242 if((cur->u>>r)&1)
243 if(i==0||(unneeded_reg[i-1]>>r)&1) {cur->regmap[hr]=-1;break;}
244 }
245 else
246 {
247 if((cur->uu>>(r&63))&1)
248 if(i==0||(unneeded_reg_upper[i-1]>>(r&63))&1) {cur->regmap[hr]=-1;break;}
249 }
250 }
251 }
252 // Try to allocate any available register, but prefer
253 // registers that have not been used recently.
254 if(i>0) {
255 for(hr=0;hr<HOST_REGS;hr++) {
256 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
257 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
258 cur->regmap[hr]=reg;
259 cur->dirty&=~(1<<hr);
260 cur->isconst&=~(1<<hr);
261 return;
262 }
263 }
264 }
265 }
266 // Try to allocate any available register
267 for(hr=0;hr<HOST_REGS;hr++) {
268 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
269 cur->regmap[hr]=reg;
270 cur->dirty&=~(1<<hr);
271 cur->isconst&=~(1<<hr);
272 return;
273 }
274 }
275
276 // Ok, now we have to evict someone
277 // Pick a register we hopefully won't need soon
278 u_char hsn[MAXREG+1];
279 memset(hsn,10,sizeof(hsn));
280 int j;
281 lsn(hsn,i,&preferred_reg);
282 //DebugMessage(M64MSG_VERBOSE, "hsn(%x): %d %d %d %d %d %d %d",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
283 if(i>0) {
284 // Don't evict the cycle count at entry points, otherwise the entry
285 // stub will have to write it.
286 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
287 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
288 for(j=10;j>=3;j--)
289 {
290 // Alloc preferred register if available
291 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
292 for(hr=0;hr<HOST_REGS;hr++) {
293 // Evict both parts of a 64-bit register
294 if((cur->regmap[hr]&63)==r) {
295 cur->regmap[hr]=-1;
296 cur->dirty&=~(1<<hr);
297 cur->isconst&=~(1<<hr);
298 }
299 }
300 cur->regmap[preferred_reg]=reg;
301 return;
302 }
303 for(r=1;r<=MAXREG;r++)
304 {
305 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
306 for(hr=0;hr<HOST_REGS;hr++) {
307 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
308 if(cur->regmap[hr]==r+64) {
309 cur->regmap[hr]=reg;
310 cur->dirty&=~(1<<hr);
311 cur->isconst&=~(1<<hr);
312 return;
313 }
314 }
315 }
316 for(hr=0;hr<HOST_REGS;hr++) {
317 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
318 if(cur->regmap[hr]==r) {
319 cur->regmap[hr]=reg;
320 cur->dirty&=~(1<<hr);
321 cur->isconst&=~(1<<hr);
322 return;
323 }
324 }
325 }
326 }
327 }
328 }
329 }
330 for(j=10;j>=0;j--)
331 {
332 for(r=1;r<=MAXREG;r++)
333 {
334 if(hsn[r]==j) {
335 for(hr=0;hr<HOST_REGS;hr++) {
336 if(cur->regmap[hr]==r+64) {
337 cur->regmap[hr]=reg;
338 cur->dirty&=~(1<<hr);
339 cur->isconst&=~(1<<hr);
340 return;
341 }
342 }
343 for(hr=0;hr<HOST_REGS;hr++) {
344 if(cur->regmap[hr]==r) {
345 cur->regmap[hr]=reg;
346 cur->dirty&=~(1<<hr);
347 cur->isconst&=~(1<<hr);
348 return;
349 }
350 }
351 }
352 }
353 }
354 DebugMessage(M64MSG_ERROR, "This shouldn't happen (alloc_reg)");exit(1);
355}
356
357static void alloc_reg64(struct regstat *cur,int i,signed char reg)
358{
359 int preferred_reg = 5+reg%3;
360 int r,hr;
361
362 // allocate the lower 32 bits
363 alloc_reg(cur,i,reg);
364
365 // Don't allocate unused registers
366 if((cur->uu>>reg)&1) return;
367
368 // see if the upper half is already allocated
369 for(hr=0;hr<HOST_REGS;hr++)
370 {
371 if(cur->regmap[hr]==reg+64) return;
372 }
373
374 // Keep the same mapping if the register was already allocated in a loop
375 preferred_reg = loop_reg(i,reg,preferred_reg);
376
377 // Try to allocate the preferred register
378 if(cur->regmap[preferred_reg]==-1) {
379 cur->regmap[preferred_reg]=reg|64;
380 cur->dirty&=~(1<<preferred_reg);
381 cur->isconst&=~(1<<preferred_reg);
382 return;
383 }
384 r=cur->regmap[preferred_reg];
385 if(r<64&&((cur->u>>r)&1)) {
386 cur->regmap[preferred_reg]=reg|64;
387 cur->dirty&=~(1<<preferred_reg);
388 cur->isconst&=~(1<<preferred_reg);
389 return;
390 }
391 if(r>=64&&((cur->uu>>(r&63))&1)) {
392 cur->regmap[preferred_reg]=reg|64;
393 cur->dirty&=~(1<<preferred_reg);
394 cur->isconst&=~(1<<preferred_reg);
395 return;
396 }
397
398 // Try to allocate EBP, ESI or EDI
399 for(hr=5;hr<8;hr++) {
400 if(cur->regmap[hr]==-1) {
401 cur->regmap[hr]=reg|64;
402 cur->dirty&=~(1<<hr);
403 cur->isconst&=~(1<<hr);
404 return;
405 }
406 }
407
408 // Clear any unneeded registers
409 // We try to keep the mapping consistent, if possible, because it
410 // makes branches easier (especially loops). So we try to allocate
411 // first (see above) before removing old mappings. If this is not
412 // possible then go ahead and clear out the registers that are no
413 // longer needed.
414 for(hr=HOST_REGS-1;hr>=0;hr--)
415 {
416 r=cur->regmap[hr];
417 if(r>=0) {
418 if(r<64) {
419 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
420 }
421 else
422 {
423 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
424 }
425 }
426 }
427 // Try to allocate any available register, but prefer
428 // registers that have not been used recently.
429 if(i>0) {
430 for(hr=0;hr<HOST_REGS;hr++) {
431 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
432 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
433 cur->regmap[hr]=reg|64;
434 cur->dirty&=~(1<<hr);
435 cur->isconst&=~(1<<hr);
436 return;
437 }
438 }
439 }
440 }
441 // Try to allocate any available register
442 for(hr=0;hr<HOST_REGS;hr++) {
443 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
444 cur->regmap[hr]=reg|64;
445 cur->dirty&=~(1<<hr);
446 cur->isconst&=~(1<<hr);
447 return;
448 }
449 }
450
451 // Ok, now we have to evict someone
452 // Pick a register we hopefully won't need soon
453 u_char hsn[MAXREG+1];
454 memset(hsn,10,sizeof(hsn));
455 int j;
456 lsn(hsn,i,&preferred_reg);
457 //DebugMessage(M64MSG_VERBOSE, "eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
458 //DebugMessage(M64MSG_VERBOSE, "hsn(%x): %d %d %d %d %d %d %d",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
459 if(i>0) {
460 // Don't evict the cycle count at entry points, otherwise the entry
461 // stub will have to write it.
462 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
463 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
464 for(j=10;j>=3;j--)
465 {
466 // Alloc preferred register if available
467 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
468 for(hr=0;hr<HOST_REGS;hr++) {
469 // Evict both parts of a 64-bit register
470 if((cur->regmap[hr]&63)==r) {
471 cur->regmap[hr]=-1;
472 cur->dirty&=~(1<<hr);
473 cur->isconst&=~(1<<hr);
474 }
475 }
476 cur->regmap[preferred_reg]=reg|64;
477 return;
478 }
479 for(r=1;r<=MAXREG;r++)
480 {
481 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
482 for(hr=0;hr<HOST_REGS;hr++) {
483 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
484 if(cur->regmap[hr]==r+64) {
485 cur->regmap[hr]=reg|64;
486 cur->dirty&=~(1<<hr);
487 cur->isconst&=~(1<<hr);
488 return;
489 }
490 }
491 }
492 for(hr=0;hr<HOST_REGS;hr++) {
493 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
494 if(cur->regmap[hr]==r) {
495 cur->regmap[hr]=reg|64;
496 cur->dirty&=~(1<<hr);
497 cur->isconst&=~(1<<hr);
498 return;
499 }
500 }
501 }
502 }
503 }
504 }
505 }
506 for(j=10;j>=0;j--)
507 {
508 for(r=1;r<=MAXREG;r++)
509 {
510 if(hsn[r]==j) {
511 for(hr=0;hr<HOST_REGS;hr++) {
512 if(cur->regmap[hr]==r+64) {
513 cur->regmap[hr]=reg|64;
514 cur->dirty&=~(1<<hr);
515 cur->isconst&=~(1<<hr);
516 return;
517 }
518 }
519 for(hr=0;hr<HOST_REGS;hr++) {
520 if(cur->regmap[hr]==r) {
521 cur->regmap[hr]=reg|64;
522 cur->dirty&=~(1<<hr);
523 cur->isconst&=~(1<<hr);
524 return;
525 }
526 }
527 }
528 }
529 }
530 DebugMessage(M64MSG_ERROR, "This shouldn't happen");exit(1);
531}
532
533// Allocate a temporary register. This is done without regard to
534// dirty status or whether the register we request is on the unneeded list
535// Note: This will only allocate one register, even if called multiple times
536static void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
537{
538 int r,hr;
539 int preferred_reg = -1;
540
541 // see if it's already allocated
542 for(hr=0;hr<HOST_REGS;hr++)
543 {
544 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
545 }
546
547 // Try to allocate any available register, starting with EDI, ESI, EBP...
548 // We prefer EDI, ESI, EBP since the others are used for byte/halfword stores
549 for(hr=HOST_REGS-1;hr>=0;hr--) {
550 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
551 cur->regmap[hr]=reg;
552 cur->dirty&=~(1<<hr);
553 cur->isconst&=~(1<<hr);
554 return;
555 }
556 }
557
558 // Find an unneeded register
559 for(hr=HOST_REGS-1;hr>=0;hr--)
560 {
561 r=cur->regmap[hr];
562 if(r>=0) {
563 if(r<64) {
564 if((cur->u>>r)&1) {
565 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
566 cur->regmap[hr]=reg;
567 cur->dirty&=~(1<<hr);
568 cur->isconst&=~(1<<hr);
569 return;
570 }
571 }
572 }
573 else
574 {
575 if((cur->uu>>(r&63))&1) {
576 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
577 cur->regmap[hr]=reg;
578 cur->dirty&=~(1<<hr);
579 cur->isconst&=~(1<<hr);
580 return;
581 }
582 }
583 }
584 }
585 }
586
587 // Ok, now we have to evict someone
588 // Pick a register we hopefully won't need soon
589 // TODO: we might want to follow unconditional jumps here
590 // TODO: get rid of dupe code and make this into a function
591 u_char hsn[MAXREG+1];
592 memset(hsn,10,sizeof(hsn));
593 int j;
594 lsn(hsn,i,&preferred_reg);
595 //DebugMessage(M64MSG_VERBOSE, "hsn: %d %d %d %d %d %d %d",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
596 if(i>0) {
597 // Don't evict the cycle count at entry points, otherwise the entry
598 // stub will have to write it.
599 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
600 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
601 for(j=10;j>=3;j--)
602 {
603 for(r=1;r<=MAXREG;r++)
604 {
605 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
606 for(hr=0;hr<HOST_REGS;hr++) {
607 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
608 if(cur->regmap[hr]==r+64) {
609 cur->regmap[hr]=reg;
610 cur->dirty&=~(1<<hr);
611 cur->isconst&=~(1<<hr);
612 return;
613 }
614 }
615 }
616 for(hr=0;hr<HOST_REGS;hr++) {
617 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
618 if(cur->regmap[hr]==r) {
619 cur->regmap[hr]=reg;
620 cur->dirty&=~(1<<hr);
621 cur->isconst&=~(1<<hr);
622 return;
623 }
624 }
625 }
626 }
627 }
628 }
629 }
630 for(j=10;j>=0;j--)
631 {
632 for(r=1;r<=MAXREG;r++)
633 {
634 if(hsn[r]==j) {
635 for(hr=0;hr<HOST_REGS;hr++) {
636 if(cur->regmap[hr]==r+64) {
637 cur->regmap[hr]=reg;
638 cur->dirty&=~(1<<hr);
639 cur->isconst&=~(1<<hr);
640 return;
641 }
642 }
643 for(hr=0;hr<HOST_REGS;hr++) {
644 if(cur->regmap[hr]==r) {
645 cur->regmap[hr]=reg;
646 cur->dirty&=~(1<<hr);
647 cur->isconst&=~(1<<hr);
648 return;
649 }
650 }
651 }
652 }
653 }
654 DebugMessage(M64MSG_ERROR, "This shouldn't happen");exit(1);
655}
656// Allocate a specific x86 register.
657static void alloc_x86_reg(struct regstat *cur,int i,signed char reg,int hr)
658{
659 int n;
660 int dirty=0;
661
662 // see if it's already allocated (and dealloc it)
663 for(n=0;n<HOST_REGS;n++)
664 {
665 if(n!=ESP&&cur->regmap[n]==reg) {
666 dirty=(cur->dirty>>n)&1;
667 cur->regmap[n]=-1;
668 }
669 }
670
671 cur->regmap[hr]=reg;
672 cur->dirty&=~(1<<hr);
673 cur->dirty|=dirty<<hr;
674 cur->isconst&=~(1<<hr);
675}
676
677// Alloc cycle count into dedicated register
678static void alloc_cc(struct regstat *cur,int i)
679{
680 alloc_x86_reg(cur,i,CCREG,ESI);
681}
682
683/* Special alloc */
684
685static void multdiv_alloc_x86(struct regstat *current,int i)
686{
687 // case 0x18: MULT
688 // case 0x19: MULTU
689 // case 0x1A: DIV
690 // case 0x1B: DIVU
691 // case 0x1C: DMULT
692 // case 0x1D: DMULTU
693 // case 0x1E: DDIV
694 // case 0x1F: DDIVU
695 clear_const(current,rs1[i]);
696 clear_const(current,rs2[i]);
697 if(rs1[i]&&rs2[i])
698 {
699 if((opcode2[i]&4)==0) // 32-bit
700 {
701 current->u&=~(1LL<<HIREG);
702 current->u&=~(1LL<<LOREG);
703 alloc_x86_reg(current,i,HIREG,EDX);
704 alloc_x86_reg(current,i,LOREG,EAX);
705 alloc_reg(current,i,rs1[i]);
706 alloc_reg(current,i,rs2[i]);
707 current->is32|=1LL<<HIREG;
708 current->is32|=1LL<<LOREG;
709 dirty_reg(current,HIREG);
710 dirty_reg(current,LOREG);
711 }
712 else // 64-bit
713 {
714 alloc_x86_reg(current,i,HIREG|64,EDX);
715 alloc_x86_reg(current,i,HIREG,EAX);
716 alloc_reg64(current,i,rs1[i]);
717 alloc_reg64(current,i,rs2[i]);
718 alloc_all(current,i);
719 current->is32&=~(1LL<<HIREG);
720 current->is32&=~(1LL<<LOREG);
721 dirty_reg(current,HIREG);
722 dirty_reg(current,LOREG);
723 }
724 }
725 else
726 {
727 // Multiply by zero is zero.
728 // MIPS does not have a divide by zero exception.
729 // The result is undefined, we return zero.
730 alloc_reg(current,i,HIREG);
731 alloc_reg(current,i,LOREG);
732 current->is32|=1LL<<HIREG;
733 current->is32|=1LL<<LOREG;
734 dirty_reg(current,HIREG);
735 dirty_reg(current,LOREG);
736 }
737}
738#define multdiv_alloc multdiv_alloc_x86
739
740/* Assembler */
741
742static const char const regname[8][4] = {
743 "eax",
744 "ecx",
745 "edx",
746 "ebx",
747 "esp",
748 "ebp",
749 "esi",
750 "edi"};
751
752static void output_byte(u_char byte)
753{
754 *(out++)=byte;
755}
756static void output_modrm(u_char mod,u_char rm,u_char ext)
757{
758 assert(mod<4);
759 assert(rm<8);
760 assert(ext<8);
761 u_char byte=(mod<<6)|(ext<<3)|rm;
762 *(out++)=byte;
763}
764static void output_sib(u_char scale,u_char index,u_char base)
765{
766 assert(scale<4);
767 assert(index<8);
768 assert(base<8);
769 u_char byte=(scale<<6)|(index<<3)|base;
770 *(out++)=byte;
771}
772static void output_w32(u_int word)
773{
774 *((u_int *)out)=word;
775 out+=4;
776}
777
778static void emit_mov(int rs,int rt)
779{
780 assem_debug("mov %%%s,%%%s",regname[rs],regname[rt]);
781 output_byte(0x89);
782 output_modrm(3,rt,rs);
783}
784
785static void emit_add(int rs1,int rs2,int rt)
786{
787 if(rs1==rt) {
788 assem_debug("add %%%s,%%%s",regname[rs2],regname[rs1]);
789 output_byte(0x01);
790 output_modrm(3,rs1,rs2);
791 }else if(rs2==rt) {
792 assem_debug("add %%%s,%%%s",regname[rs1],regname[rs2]);
793 output_byte(0x01);
794 output_modrm(3,rs2,rs1);
795 }else {
796 assem_debug("lea (%%%s,%%%s),%%%s",regname[rs1],regname[rs2],regname[rt]);
797 output_byte(0x8D);
798 if(rs1!=EBP) {
799 output_modrm(0,4,rt);
800 output_sib(0,rs2,rs1);
801 }else if(rs2!=EBP) {
802 output_modrm(0,4,rt);
803 output_sib(0,rs1,rs2);
804 }else /* lea 0(,%ebp,2) */{
805 output_modrm(0,4,rt);
806 output_sib(1,EBP,5);
807 output_w32(0);
808 }
809 }
810}
811
812static void emit_adds(int rs1,int rs2,int rt)
813{
814 emit_add(rs1,rs2,rt);
815}
816
817static void emit_lea8(int rs1,int rt)
818{
819 assem_debug("lea 0(%%%s,8),%%%s",regname[rs1],regname[rt]);
820 output_byte(0x8D);
821 output_modrm(0,4,rt);
822 output_sib(3,rs1,5);
823 output_w32(0);
824}
825static void emit_leairrx1(int imm,int rs1,int rs2,int rt)
826{
827 assem_debug("lea %x(%%%s,%%%s,1),%%%s",imm,regname[rs1],regname[rs2],regname[rt]);
828 output_byte(0x8D);
829 if(imm!=0||rs1==EBP) {
830 output_modrm(2,4,rt);
831 output_sib(0,rs2,rs1);
832 output_w32(imm);
833 }else{
834 output_modrm(0,4,rt);
835 output_sib(0,rs2,rs1);
836 }
837}
838static void emit_leairrx4(int imm,int rs1,int rs2,int rt)
839{
840 assem_debug("lea %x(%%%s,%%%s,4),%%%s",imm,regname[rs1],regname[rs2],regname[rt]);
841 output_byte(0x8D);
842 if(imm!=0||rs1==EBP) {
843 output_modrm(2,4,rt);
844 output_sib(2,rs2,rs1);
845 output_w32(imm);
846 }else{
847 output_modrm(0,4,rt);
848 output_sib(2,rs2,rs1);
849 }
850}
851
852static void emit_neg(int rs, int rt)
853{
854 if(rs!=rt) emit_mov(rs,rt);
855 assem_debug("neg %%%s",regname[rt]);
856 output_byte(0xF7);
857 output_modrm(3,rt,3);
858}
859
860static void emit_negs(int rs, int rt)
861{
862 emit_neg(rs,rt);
863}
864
865static void emit_sub(int rs1,int rs2,int rt)
866{
867 if(rs1==rt) {
868 assem_debug("sub %%%s,%%%s",regname[rs2],regname[rs1]);
869 output_byte(0x29);
870 output_modrm(3,rs1,rs2);
871 } else if(rs2==rt) {
872 emit_neg(rs2,rs2);
873 emit_add(rs2,rs1,rs2);
874 } else {
875 emit_mov(rs1,rt);
876 emit_sub(rt,rs2,rt);
877 }
878}
879
880static void emit_subs(int rs1,int rs2,int rt)
881{
882 emit_sub(rs1,rs2,rt);
883}
884
885static void emit_zeroreg(int rt)
886{
887 output_byte(0x31);
888 output_modrm(3,rt,rt);
889 assem_debug("xor %%%s,%%%s",regname[rt],regname[rt]);
890}
891
892static void emit_loadreg(int r, int hr)
893{
894 if((r&63)==0)
895 emit_zeroreg(hr);
896 else {
897 int addr=((int)reg)+((r&63)<<3)+((r&64)>>4);
898 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
899 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
900 if(r==CCREG) addr=(int)&cycle_count;
901 if(r==CSREG) addr=(int)&Status;
902 if(r==FSREG) addr=(int)&FCR31;
903 assem_debug("mov %x+%d,%%%s",addr,r,regname[hr]);
904 output_byte(0x8B);
905 output_modrm(0,5,hr);
906 output_w32(addr);
907 }
908}
909static void emit_storereg(int r, int hr)
910{
911 int addr=((int)reg)+((r&63)<<3)+((r&64)>>4);
912 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
913 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
914 if(r==CCREG) addr=(int)&cycle_count;
915 if(r==FSREG) addr=(int)&FCR31;
916 assem_debug("mov %%%s,%x+%d",regname[hr],addr,r);
917 output_byte(0x89);
918 output_modrm(0,5,hr);
919 output_w32(addr);
920}
921
922static void emit_test(int rs, int rt)
923{
924 assem_debug("test %%%s,%%%s",regname[rs],regname[rt]);
925 output_byte(0x85);
926 output_modrm(3,rs,rt);
927}
928
929static void emit_testimm(int rs,int imm)
930{
931 assem_debug("test $0x%x,%%%s",imm,regname[rs]);
932 if(imm<128&&imm>=-128&&rs<4) {
933 output_byte(0xF6);
934 output_modrm(3,rs,0);
935 output_byte(imm);
936 }
937 else
938 {
939 output_byte(0xF7);
940 output_modrm(3,rs,0);
941 output_w32(imm);
942 }
943}
944
945static void emit_not(int rs,int rt)
946{
947 if(rs!=rt) emit_mov(rs,rt);
948 assem_debug("not %%%s",regname[rt]);
949 output_byte(0xF7);
950 output_modrm(3,rt,2);
951}
952
953static void emit_and(u_int rs1,u_int rs2,u_int rt)
954{
955 assert(rs1<8);
956 assert(rs2<8);
957 assert(rt<8);
958 if(rs1==rt) {
959 assem_debug("and %%%s,%%%s",regname[rs2],regname[rt]);
960 output_byte(0x21);
961 output_modrm(3,rs1,rs2);
962 }
963 else
964 if(rs2==rt) {
965 assem_debug("and %%%s,%%%s",regname[rs1],regname[rt]);
966 output_byte(0x21);
967 output_modrm(3,rs2,rs1);
968 }
969 else {
970 emit_mov(rs1,rt);
971 emit_and(rt,rs2,rt);
972 }
973}
974
975static void emit_or(u_int rs1,u_int rs2,u_int rt)
976{
977 assert(rs1<8);
978 assert(rs2<8);
979 assert(rt<8);
980 if(rs1==rt) {
981 assem_debug("or %%%s,%%%s",regname[rs2],regname[rt]);
982 output_byte(0x09);
983 output_modrm(3,rs1,rs2);
984 }
985 else
986 if(rs2==rt) {
987 assem_debug("or %%%s,%%%s",regname[rs1],regname[rt]);
988 output_byte(0x09);
989 output_modrm(3,rs2,rs1);
990 }
991 else {
992 emit_mov(rs1,rt);
993 emit_or(rt,rs2,rt);
994 }
995}
996static void emit_or_and_set_flags(int rs1,int rs2,int rt)
997{
998 emit_or(rs1,rs2,rt);
999}
1000
1001static void emit_xor(u_int rs1,u_int rs2,u_int rt)
1002{
1003 assert(rs1<8);
1004 assert(rs2<8);
1005 assert(rt<8);
1006 if(rs1==rt) {
1007 assem_debug("xor %%%s,%%%s",regname[rs2],regname[rt]);
1008 output_byte(0x31);
1009 output_modrm(3,rs1,rs2);
1010 }
1011 else
1012 if(rs2==rt) {
1013 assem_debug("xor %%%s,%%%s",regname[rs1],regname[rt]);
1014 output_byte(0x31);
1015 output_modrm(3,rs2,rs1);
1016 }
1017 else {
1018 emit_mov(rs1,rt);
1019 emit_xor(rt,rs2,rt);
1020 }
1021}
1022
1023static void emit_movimm(int imm,u_int rt)
1024{
1025 assem_debug("mov $%d,%%%s",imm,regname[rt]);
1026 assert(rt<8);
1027 output_byte(0xB8+rt);
1028 output_w32(imm);
1029}
1030
1031static void emit_addimm(int rs,int imm,int rt)
1032{
1033 if(rs==rt) {
1034 if(imm!=0) {
1035 assem_debug("add $%d,%%%s",imm,regname[rt]);
1036 if(imm<128&&imm>=-128) {
1037 output_byte(0x83);
1038 output_modrm(3,rt,0);
1039 output_byte(imm);
1040 }
1041 else
1042 {
1043 output_byte(0x81);
1044 output_modrm(3,rt,0);
1045 output_w32(imm);
1046 }
1047 }
1048 }
1049 else {
1050 if(imm!=0) {
1051 assem_debug("lea %d(%%%s),%%%s",imm,regname[rs],regname[rt]);
1052 output_byte(0x8D);
1053 if(imm<128&&imm>=-128) {
1054 output_modrm(1,rs,rt);
1055 output_byte(imm);
1056 }else{
1057 output_modrm(2,rs,rt);
1058 output_w32(imm);
1059 }
1060 }else{
1061 emit_mov(rs,rt);
1062 }
1063 }
1064}
1065
1066static void emit_addimm_and_set_flags(int imm,int rt)
1067{
1068 assem_debug("add $%d,%%%s",imm,regname[rt]);
1069 if(imm<128&&imm>=-128) {
1070 output_byte(0x83);
1071 output_modrm(3,rt,0);
1072 output_byte(imm);
1073 }
1074 else
1075 {
1076 output_byte(0x81);
1077 output_modrm(3,rt,0);
1078 output_w32(imm);
1079 }
1080}
1081static void emit_addimm_no_flags(int imm,int rt)
1082{
1083 if(imm!=0) {
1084 assem_debug("lea %d(%%%s),%%%s",imm,regname[rt],regname[rt]);
1085 output_byte(0x8D);
1086 if(imm<128&&imm>=-128) {
1087 output_modrm(1,rt,rt);
1088 output_byte(imm);
1089 }else{
1090 output_modrm(2,rt,rt);
1091 output_w32(imm);
1092 }
1093 }
1094}
1095
1096static void emit_adcimm(int imm,u_int rt)
1097{
1098 assem_debug("adc $%d,%%%s",imm,regname[rt]);
1099 assert(rt<8);
1100 if(imm<128&&imm>=-128) {
1101 output_byte(0x83);
1102 output_modrm(3,rt,2);
1103 output_byte(imm);
1104 }
1105 else
1106 {
1107 output_byte(0x81);
1108 output_modrm(3,rt,2);
1109 output_w32(imm);
1110 }
1111}
1112static void emit_sbbimm(int imm,u_int rt)
1113{
1114 assem_debug("sbb $%d,%%%s",imm,regname[rt]);
1115 assert(rt<8);
1116 if(imm<128&&imm>=-128) {
1117 output_byte(0x83);
1118 output_modrm(3,rt,3);
1119 output_byte(imm);
1120 }
1121 else
1122 {
1123 output_byte(0x81);
1124 output_modrm(3,rt,3);
1125 output_w32(imm);
1126 }
1127}
1128
1129static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1130{
1131 if(rsh==rth&&rsl==rtl) {
1132 assem_debug("add $%d,%%%s",imm,regname[rtl]);
1133 if(imm<128&&imm>=-128) {
1134 output_byte(0x83);
1135 output_modrm(3,rtl,0);
1136 output_byte(imm);
1137 }
1138 else
1139 {
1140 output_byte(0x81);
1141 output_modrm(3,rtl,0);
1142 output_w32(imm);
1143 }
1144 assem_debug("adc $%d,%%%s",imm>>31,regname[rth]);
1145 output_byte(0x83);
1146 output_modrm(3,rth,2);
1147 output_byte(imm>>31);
1148 }
1149 else {
1150 emit_mov(rsh,rth);
1151 emit_mov(rsl,rtl);
1152 emit_addimm64_32(rth,rtl,imm,rth,rtl);
1153 }
1154}
1155
1156static void emit_sbb(int rs1,int rs2)
1157{
1158 assem_debug("sbb %%%s,%%%s",regname[rs1],regname[rs2]);
1159 output_byte(0x19);
1160 output_modrm(3,rs2,rs1);
1161}
1162
1163static void emit_andimm(int rs,int imm,int rt)
1164{
1165 if(imm==0) {
1166 emit_zeroreg(rt);
1167 }
1168 else if(rs==rt) {
1169 assem_debug("and $%d,%%%s",imm,regname[rt]);
1170 if(imm<128&&imm>=-128) {
1171 output_byte(0x83);
1172 output_modrm(3,rt,4);
1173 output_byte(imm);
1174 }
1175 else
1176 {
1177 output_byte(0x81);
1178 output_modrm(3,rt,4);
1179 output_w32(imm);
1180 }
1181 }
1182 else {
1183 emit_mov(rs,rt);
1184 emit_andimm(rt,imm,rt);
1185 }
1186}
1187
1188static void emit_orimm(int rs,int imm,int rt)
1189{
1190 if(rs==rt) {
1191 if(imm!=0) {
1192 assem_debug("or $%d,%%%s",imm,regname[rt]);
1193 if(imm<128&&imm>=-128) {
1194 output_byte(0x83);
1195 output_modrm(3,rt,1);
1196 output_byte(imm);
1197 }
1198 else
1199 {
1200 output_byte(0x81);
1201 output_modrm(3,rt,1);
1202 output_w32(imm);
1203 }
1204 }
1205 }
1206 else {
1207 emit_mov(rs,rt);
1208 emit_orimm(rt,imm,rt);
1209 }
1210}
1211
1212static void emit_xorimm(int rs,int imm,int rt)
1213{
1214 if(rs==rt) {
1215 if(imm!=0) {
1216 assem_debug("xor $%d,%%%s",imm,regname[rt]);
1217 if(imm<128&&imm>=-128) {
1218 output_byte(0x83);
1219 output_modrm(3,rt,6);
1220 output_byte(imm);
1221 }
1222 else
1223 {
1224 output_byte(0x81);
1225 output_modrm(3,rt,6);
1226 output_w32(imm);
1227 }
1228 }
1229 }
1230 else {
1231 emit_mov(rs,rt);
1232 emit_xorimm(rt,imm,rt);
1233 }
1234}
1235
1236static void emit_shlimm(int rs,u_int imm,int rt)
1237{
1238 if(rs==rt) {
1239 assem_debug("shl %%%s,%d",regname[rt],imm);
1240 assert(imm>0);
1241 if(imm==1) output_byte(0xD1);
1242 else output_byte(0xC1);
1243 output_modrm(3,rt,4);
1244 if(imm>1) output_byte(imm);
1245 }
1246 else {
1247 emit_mov(rs,rt);
1248 emit_shlimm(rt,imm,rt);
1249 }
1250}
1251
1252static void emit_shrimm(int rs,u_int imm,int rt)
1253{
1254 if(rs==rt) {
1255 assem_debug("shr %%%s,%d",regname[rt],imm);
1256 assert(imm>0);
1257 if(imm==1) output_byte(0xD1);
1258 else output_byte(0xC1);
1259 output_modrm(3,rt,5);
1260 if(imm>1) output_byte(imm);
1261 }
1262 else {
1263 emit_mov(rs,rt);
1264 emit_shrimm(rt,imm,rt);
1265 }
1266}
1267
1268static void emit_sarimm(int rs,u_int imm,int rt)
1269{
1270 if(rs==rt) {
1271 assem_debug("sar %%%s,%d",regname[rt],imm);
1272 assert(imm>0);
1273 if(imm==1) output_byte(0xD1);
1274 else output_byte(0xC1);
1275 output_modrm(3,rt,7);
1276 if(imm>1) output_byte(imm);
1277 }
1278 else {
1279 emit_mov(rs,rt);
1280 emit_sarimm(rt,imm,rt);
1281 }
1282}
1283
1284static void emit_rorimm(int rs,u_int imm,int rt)
1285{
1286 if(rs==rt) {
1287 assem_debug("ror %%%s,%d",regname[rt],imm);
1288 assert(imm>0);
1289 if(imm==1) output_byte(0xD1);
1290 else output_byte(0xC1);
1291 output_modrm(3,rt,1);
1292 if(imm>1) output_byte(imm);
1293 }
1294 else {
1295 emit_mov(rs,rt);
1296 emit_rorimm(rt,imm,rt);
1297 }
1298}
1299
1300static void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1301{
1302 if(rs==rt) {
1303 assem_debug("shld %%%s,%%%s,%d",regname[rt],regname[rs2],imm);
1304 assert(imm>0);
1305 output_byte(0x0F);
1306 output_byte(0xA4);
1307 output_modrm(3,rt,rs2);
1308 output_byte(imm);
1309 }
1310 else {
1311 emit_mov(rs,rt);
1312 emit_shldimm(rt,rs2,imm,rt);
1313 }
1314}
1315
1316static void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1317{
1318 if(rs==rt) {
1319 assem_debug("shrd %%%s,%%%s,%d",regname[rt],regname[rs2],imm);
1320 assert(imm>0);
1321 output_byte(0x0F);
1322 output_byte(0xAC);
1323 output_modrm(3,rt,rs2);
1324 output_byte(imm);
1325 }
1326 else {
1327 emit_mov(rs,rt);
1328 emit_shrdimm(rt,rs2,imm,rt);
1329 }
1330}
1331
1332static void emit_shlcl(int r)
1333{
1334 assem_debug("shl %%%s,%%cl",regname[r]);
1335 output_byte(0xD3);
1336 output_modrm(3,r,4);
1337}
1338static void emit_shrcl(int r)
1339{
1340 assem_debug("shr %%%s,%%cl",regname[r]);
1341 output_byte(0xD3);
1342 output_modrm(3,r,5);
1343}
1344static void emit_sarcl(int r)
1345{
1346 assem_debug("sar %%%s,%%cl",regname[r]);
1347 output_byte(0xD3);
1348 output_modrm(3,r,7);
1349}
1350
1351static void emit_shldcl(int r1,int r2)
1352{
1353 assem_debug("shld %%%s,%%%s,%%cl",regname[r1],regname[r2]);
1354 output_byte(0x0F);
1355 output_byte(0xA5);
1356 output_modrm(3,r1,r2);
1357}
1358static void emit_shrdcl(int r1,int r2)
1359{
1360 assem_debug("shrd %%%s,%%%s,%%cl",regname[r1],regname[r2]);
1361 output_byte(0x0F);
1362 output_byte(0xAD);
1363 output_modrm(3,r1,r2);
1364}
1365
1366static void emit_cmpimm(int rs,int imm)
1367{
1368 assem_debug("cmp $%d,%%%s",imm,regname[rs]);
1369 if(imm<128&&imm>=-128) {
1370 output_byte(0x83);
1371 output_modrm(3,rs,7);
1372 output_byte(imm);
1373 }
1374 else
1375 {
1376 output_byte(0x81);
1377 output_modrm(3,rs,7);
1378 output_w32(imm);
1379 }
1380}
1381
1382static void emit_cmovne(const u_int *addr,int rt)
1383{
1384 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1385 if(addr==&const_zero) assem_debug(" [zero]");
1386 else if(addr==&const_one) assem_debug(" [one]");
1387 else assem_debug("");
1388 output_byte(0x0F);
1389 output_byte(0x45);
1390 output_modrm(0,5,rt);
1391 output_w32((int)addr);
1392}
1393static void emit_cmovl(const u_int *addr,int rt)
1394{
1395 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1396 if(addr==&const_zero) assem_debug(" [zero]");
1397 else if(addr==&const_one) assem_debug(" [one]");
1398 else assem_debug("");
1399 output_byte(0x0F);
1400 output_byte(0x4C);
1401 output_modrm(0,5,rt);
1402 output_w32((int)addr);
1403}
1404static void emit_cmovs(const u_int *addr,int rt)
1405{
1406 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1407 if(addr==&const_zero) assem_debug(" [zero]");
1408 else if(addr==&const_one) assem_debug(" [one]");
1409 else assem_debug("");
1410 output_byte(0x0F);
1411 output_byte(0x48);
1412 output_modrm(0,5,rt);
1413 output_w32((int)addr);
1414}
1415static void emit_cmovne_reg(int rs,int rt)
1416{
1417 assem_debug("cmovne %%%s,%%%s",regname[rs],regname[rt]);
1418 output_byte(0x0F);
1419 output_byte(0x45);
1420 output_modrm(3,rs,rt);
1421}
1422static void emit_cmovl_reg(int rs,int rt)
1423{
1424 assem_debug("cmovl %%%s,%%%s",regname[rs],regname[rt]);
1425 output_byte(0x0F);
1426 output_byte(0x4C);
1427 output_modrm(3,rs,rt);
1428}
1429static void emit_cmovs_reg(int rs,int rt)
1430{
1431 assem_debug("cmovs %%%s,%%%s",regname[rs],regname[rt]);
1432 output_byte(0x0F);
1433 output_byte(0x48);
1434 output_modrm(3,rs,rt);
1435}
1436static void emit_cmovnc_reg(int rs,int rt)
1437{
1438 assem_debug("cmovae %%%s,%%%s",regname[rs],regname[rt]);
1439 output_byte(0x0F);
1440 output_byte(0x43);
1441 output_modrm(3,rs,rt);
1442}
1443static void emit_cmova_reg(int rs,int rt)
1444{
1445 assem_debug("cmova %%%s,%%%s",regname[rs],regname[rt]);
1446 output_byte(0x0F);
1447 output_byte(0x47);
1448 output_modrm(3,rs,rt);
1449}
1450static void emit_cmovp_reg(int rs,int rt)
1451{
1452 assem_debug("cmovp %%%s,%%%s",regname[rs],regname[rt]);
1453 output_byte(0x0F);
1454 output_byte(0x4A);
1455 output_modrm(3,rs,rt);
1456}
1457static void emit_cmovnp_reg(int rs,int rt)
1458{
1459 assem_debug("cmovnp %%%s,%%%s",regname[rs],regname[rt]);
1460 output_byte(0x0F);
1461 output_byte(0x4B);
1462 output_modrm(3,rs,rt);
1463}
1464static void emit_setl(int rt)
1465{
1466 assem_debug("setl %%%s",regname[rt]);
1467 output_byte(0x0F);
1468 output_byte(0x9C);
1469 output_modrm(3,rt,2);
1470}
1471static void emit_movzbl_reg(int rs, int rt)
1472{
1473 assem_debug("movzbl %%%s,%%%s",regname[rs]+1,regname[rt]);
1474 output_byte(0x0F);
1475 output_byte(0xB6);
1476 output_modrm(3,rs,rt);
1477}
1478
1479static void emit_slti32(int rs,int imm,int rt)
1480{
1481 if(rs!=rt) emit_zeroreg(rt);
1482 emit_cmpimm(rs,imm);
1483 if(rt<4) {
1484 emit_setl(rt);
1485 if(rs==rt) emit_movzbl_reg(rt,rt);
1486 }
1487 else
1488 {
1489 if(rs==rt) emit_movimm(0,rt);
1490 emit_cmovl(&const_one,rt);
1491 }
1492}
1493static void emit_sltiu32(int rs,int imm,int rt)
1494{
1495 if(rs!=rt) emit_zeroreg(rt);
1496 emit_cmpimm(rs,imm);
1497 if(rs==rt) emit_movimm(0,rt);
1498 emit_adcimm(0,rt);
1499}
1500static void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1501{
1502 assert(rsh!=rt);
1503 emit_slti32(rsl,imm,rt);
1504 if(imm>=0)
1505 {
1506 emit_test(rsh,rsh);
1507 emit_cmovne(&const_zero,rt);
1508 emit_cmovs(&const_one,rt);
1509 }
1510 else
1511 {
1512 emit_cmpimm(rsh,-1);
1513 emit_cmovne(&const_zero,rt);
1514 emit_cmovl(&const_one,rt);
1515 }
1516}
1517static void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1518{
1519 assert(rsh!=rt);
1520 emit_sltiu32(rsl,imm,rt);
1521 if(imm>=0)
1522 {
1523 emit_test(rsh,rsh);
1524 emit_cmovne(&const_zero,rt);
1525 }
1526 else
1527 {
1528 emit_cmpimm(rsh,-1);
1529 emit_cmovne(&const_one,rt);
1530 }
1531}
1532
1533static void emit_cmp(int rs,int rt)
1534{
1535 assem_debug("cmp %%%s,%%%s",regname[rt],regname[rs]);
1536 output_byte(0x39);
1537 output_modrm(3,rs,rt);
1538}
1539static void emit_set_gz32(int rs, int rt)
1540{
1541 //assem_debug("set_gz32");
1542 emit_cmpimm(rs,1);
1543 emit_movimm(1,rt);
1544 emit_cmovl(&const_zero,rt);
1545}
1546static void emit_set_nz32(int rs, int rt)
1547{
1548 //assem_debug("set_nz32");
1549 emit_cmpimm(rs,1);
1550 emit_movimm(1,rt);
1551 emit_sbbimm(0,rt);
1552}
1553static void emit_set_gz64_32(int rsh, int rsl, int rt)
1554{
1555 //assem_debug("set_gz64");
1556 emit_set_gz32(rsl,rt);
1557 emit_test(rsh,rsh);
1558 emit_cmovne(&const_one,rt);
1559 emit_cmovs(&const_zero,rt);
1560}
1561static void emit_set_nz64_32(int rsh, int rsl, int rt)
1562{
1563 //assem_debug("set_nz64");
1564 emit_or_and_set_flags(rsh,rsl,rt);
1565 emit_cmovne(&const_one,rt);
1566}
1567static void emit_set_if_less32(int rs1, int rs2, int rt)
1568{
1569 //assem_debug("set if less (%%%s,%%%s),%%%s",regname[rs1],regname[rs2],regname[rt]);
1570 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1571 emit_cmp(rs1,rs2);
1572 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1573 emit_cmovl(&const_one,rt);
1574}
1575static void emit_set_if_carry32(int rs1, int rs2, int rt)
1576{
1577 //assem_debug("set if carry (%%%s,%%%s),%%%s",regname[rs1],regname[rs2],regname[rt]);
1578 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1579 emit_cmp(rs1,rs2);
1580 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1581 emit_adcimm(0,rt);
1582}
1583static void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1584{
1585 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1586 assert(u1!=rt);
1587 assert(u2!=rt);
1588 emit_cmp(l1,l2);
1589 emit_mov(u1,rt);
1590 emit_sbb(u2,rt);
1591 emit_movimm(0,rt);
1592 emit_cmovl(&const_one,rt);
1593}
1594static void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1595{
1596 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1597 assert(u1!=rt);
1598 assert(u2!=rt);
1599 emit_cmp(l1,l2);
1600 emit_mov(u1,rt);
1601 emit_sbb(u2,rt);
1602 emit_movimm(0,rt);
1603 emit_adcimm(0,rt);
1604}
1605
1606static void emit_call(int a)
1607{
1608 assem_debug("call %x (%x+%x)",a,(int)out+5,a-(int)out-5);
1609 output_byte(0xe8);
1610 output_w32(a-(int)out-4);
1611}
1612static void emit_jmp(int a)
1613{
1614 assem_debug("jmp %x (%x+%x)",a,(int)out+5,a-(int)out-5);
1615 output_byte(0xe9);
1616 output_w32(a-(int)out-4);
1617}
1618static void emit_jne(int a)
1619{
1620 assem_debug("jne %x",a);
1621 output_byte(0x0f);
1622 output_byte(0x85);
1623 output_w32(a-(int)out-4);
1624}
1625static void emit_jeq(int a)
1626{
1627 assem_debug("jeq %x",a);
1628 output_byte(0x0f);
1629 output_byte(0x84);
1630 output_w32(a-(int)out-4);
1631}
1632static void emit_js(int a)
1633{
1634 assem_debug("js %x",a);
1635 output_byte(0x0f);
1636 output_byte(0x88);
1637 output_w32(a-(int)out-4);
1638}
1639static void emit_jns(int a)
1640{
1641 assem_debug("jns %x",a);
1642 output_byte(0x0f);
1643 output_byte(0x89);
1644 output_w32(a-(int)out-4);
1645}
1646static void emit_jl(int a)
1647{
1648 assem_debug("jl %x",a);
1649 output_byte(0x0f);
1650 output_byte(0x8c);
1651 output_w32(a-(int)out-4);
1652}
1653static void emit_jge(int a)
1654{
1655 assem_debug("jge %x",a);
1656 output_byte(0x0f);
1657 output_byte(0x8d);
1658 output_w32(a-(int)out-4);
1659}
1660static void emit_jno(int a)
1661{
1662 assem_debug("jno %x",a);
1663 output_byte(0x0f);
1664 output_byte(0x81);
1665 output_w32(a-(int)out-4);
1666}
1667static void emit_jc(int a)
1668{
1669 assem_debug("jc %x",a);
1670 output_byte(0x0f);
1671 output_byte(0x82);
1672 output_w32(a-(int)out-4);
1673}
1674
1675static void emit_pushimm(int imm)
1676{
1677 assem_debug("push $%x",imm);
1678 output_byte(0x68);
1679 output_w32(imm);
1680}
1681static void emit_pushmem(int addr)
1682{
1683 assem_debug("push *%x",addr);
1684 output_byte(0xFF);
1685 output_modrm(0,5,6);
1686 output_w32(addr);
1687}
1688static void emit_pusha()
1689{
1690 assem_debug("pusha");
1691 output_byte(0x60);
1692}
1693static void emit_popa()
1694{
1695 assem_debug("popa");
1696 output_byte(0x61);
1697}
1698static void emit_pushreg(u_int r)
1699{
1700 assem_debug("push %%%s",regname[r]);
1701 assert(r<8);
1702 output_byte(0x50+r);
1703}
1704static void emit_popreg(u_int r)
1705{
1706 assem_debug("pop %%%s",regname[r]);
1707 assert(r<8);
1708 output_byte(0x58+r);
1709}
1710static void emit_callreg(u_int r)
1711{
1712 assem_debug("call *%%%s",regname[r]);
1713 assert(r<8);
1714 output_byte(0xFF);
1715 output_modrm(3,r,2);
1716}
1717/*static void emit_jmpreg(u_int r)
1718{
1719 assem_debug("jmp *%%%s",regname[r]);
1720 assert(r<8);
1721 output_byte(0xFF);
1722 output_modrm(3,r,4);
1723}*/
1724static void emit_jmpmem_indexed(u_int addr,u_int r)
1725{
1726 assem_debug("jmp *%x(%%%s)",addr,regname[r]);
1727 assert(r<8);
1728 output_byte(0xFF);
1729 output_modrm(2,r,4);
1730 output_w32(addr);
1731}
1732
1733static void emit_readword(int addr, int rt)
1734{
1735 assem_debug("mov %x,%%%s",addr,regname[rt]);
1736 output_byte(0x8B);
1737 output_modrm(0,5,rt);
1738 output_w32(addr);
1739}
1740static void emit_readword_indexed(int addr, int rs, int rt)
1741{
1742 assem_debug("mov %x+%%%s,%%%s",addr,regname[rs],regname[rt]);
1743 output_byte(0x8B);
1744 if(addr<128&&addr>=-128) {
1745 output_modrm(1,rs,rt);
1746 if(rs==ESP) output_sib(0,4,4);
1747 output_byte(addr);
1748 }
1749 else
1750 {
1751 output_modrm(2,rs,rt);
1752 if(rs==ESP) output_sib(0,4,4);
1753 output_w32(addr);
1754 }
1755}
1756static void emit_readword_tlb(int addr, int map, int rt)
1757{
1758 if(map<0) emit_readword(addr+(int)rdram-0x80000000, rt);
1759 else
1760 {
1761 assem_debug("mov (%x,%%%s,4),%%%s",addr,regname[map],regname[rt]);
1762 output_byte(0x8B);
1763 output_modrm(0,4,rt);
1764 output_sib(2,map,5);
1765 output_w32(addr);
1766 }
1767}
1768static void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1769{
1770 if(map<0) emit_readword_indexed(addr+(int)rdram-0x80000000, rs, rt);
1771 else {
1772 assem_debug("mov %x(%%%s,%%%s,4),%%%s",addr,regname[rs],regname[map],regname[rt]);
1773 assert(rs!=ESP);
1774 output_byte(0x8B);
1775 if(addr==0&&rs!=EBP) {
1776 output_modrm(0,4,rt);
1777 output_sib(2,map,rs);
1778 }
1779 else if(addr<128&&addr>=-128) {
1780 output_modrm(1,4,rt);
1781 output_sib(2,map,rs);
1782 output_byte(addr);
1783 }
1784 else
1785 {
1786 output_modrm(2,4,rt);
1787 output_sib(2,map,rs);
1788 output_w32(addr);
1789 }
1790 }
1791}
1792static void emit_movmem_indexedx4(int addr, int rs, int rt)
1793{
1794 assem_debug("mov (%x,%%%s,4),%%%s",addr,regname[rs],regname[rt]);
1795 output_byte(0x8B);
1796 output_modrm(0,4,rt);
1797 output_sib(2,rs,5);
1798 output_w32(addr);
1799}
1800static void emit_readdword_tlb(int addr, int map, int rh, int rl)
1801{
1802 if(map<0) {
1803 if(rh>=0) emit_readword(addr+(int)rdram-0x80000000, rh);
1804 emit_readword(addr+(int)rdram-0x7FFFFFFC, rl);
1805 }
1806 else {
1807 if(rh>=0) emit_movmem_indexedx4(addr, map, rh);
1808 emit_movmem_indexedx4(addr+4, map, rl);
1809 }
1810}
1811static void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1812{
1813 assert(rh!=rs);
1814 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1815 emit_readword_indexed_tlb(addr+4, rs, map, rl);
1816}
1817static void emit_movsbl(int addr, int rt)
1818{
1819 assem_debug("movsbl %x,%%%s",addr,regname[rt]);
1820 output_byte(0x0F);
1821 output_byte(0xBE);
1822 output_modrm(0,5,rt);
1823 output_w32(addr);
1824}
1825static void emit_movsbl_indexed(int addr, int rs, int rt)
1826{
1827 assem_debug("movsbl %x+%%%s,%%%s",addr,regname[rs],regname[rt]);
1828 output_byte(0x0F);
1829 output_byte(0xBE);
1830 output_modrm(2,rs,rt);
1831 output_w32(addr);
1832}
1833static void emit_movsbl_tlb(int addr, int map, int rt)
1834{
1835 if(map<0) emit_movsbl(addr+(int)rdram-0x80000000, rt);
1836 else
1837 {
1838 assem_debug("movsbl (%x,%%%s,4),%%%s",addr,regname[map],regname[rt]);
1839 output_byte(0x0F);
1840 output_byte(0xBE);
1841 output_modrm(0,4,rt);
1842 output_sib(2,map,5);
1843 output_w32(addr);
1844 }
1845}
1846static void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1847{
1848 if(map<0) emit_movsbl_indexed(addr+(int)rdram-0x80000000, rs, rt);
1849 else {
1850 assem_debug("movsbl %x(%%%s,%%%s,4),%%%s",addr,regname[rs],regname[map],regname[rt]);
1851 assert(rs!=ESP);
1852 output_byte(0x0F);
1853 output_byte(0xBE);
1854 if(addr==0&&rs!=EBP) {
1855 output_modrm(0,4,rt);
1856 output_sib(2,map,rs);
1857 }
1858 else if(addr<128&&addr>=-128) {
1859 output_modrm(1,4,rt);
1860 output_sib(2,map,rs);
1861 output_byte(addr);
1862 }
1863 else
1864 {
1865 output_modrm(2,4,rt);
1866 output_sib(2,map,rs);
1867 output_w32(addr);
1868 }
1869 }
1870}
1871static void emit_movswl(int addr, int rt)
1872{
1873 assem_debug("movswl %x,%%%s",addr,regname[rt]);
1874 output_byte(0x0F);
1875 output_byte(0xBF);
1876 output_modrm(0,5,rt);
1877 output_w32(addr);
1878}
1879static void emit_movswl_indexed(int addr, int rs, int rt)
1880{
1881 assem_debug("movswl %x+%%%s,%%%s",addr,regname[rs],regname[rt]);
1882 output_byte(0x0F);
1883 output_byte(0xBF);
1884 output_modrm(2,rs,rt);
1885 output_w32(addr);
1886}
1887static void emit_movswl_tlb(int addr, int map, int rt)
1888{
1889 if(map<0) emit_movswl(addr+(int)rdram-0x80000000, rt);
1890 else
1891 {
1892 assem_debug("movswl (%x,%%%s,4),%%%s",addr,regname[map],regname[rt]);
1893 output_byte(0x0F);
1894 output_byte(0xBF);
1895 output_modrm(0,4,rt);
1896 output_sib(2,map,5);
1897 output_w32(addr);
1898 }
1899}
1900static void emit_movzbl(int addr, int rt)
1901{
1902 assem_debug("movzbl %x,%%%s",addr,regname[rt]);
1903 output_byte(0x0F);
1904 output_byte(0xB6);
1905 output_modrm(0,5,rt);
1906 output_w32(addr);
1907}
1908static void emit_movzbl_indexed(int addr, int rs, int rt)
1909{
1910 assem_debug("movzbl %x+%%%s,%%%s",addr,regname[rs],regname[rt]);
1911 output_byte(0x0F);
1912 output_byte(0xB6);
1913 output_modrm(2,rs,rt);
1914 output_w32(addr);
1915}
1916static void emit_movzbl_tlb(int addr, int map, int rt)
1917{
1918 if(map<0) emit_movzbl(addr+(int)rdram-0x80000000, rt);
1919 else
1920 {
1921 assem_debug("movzbl (%x,%%%s,4),%%%s",addr,regname[map],regname[rt]);
1922 output_byte(0x0F);
1923 output_byte(0xB6);
1924 output_modrm(0,4,rt);
1925 output_sib(2,map,5);
1926 output_w32(addr);
1927 }
1928}
1929static void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1930{
1931 if(map<0) emit_movzbl_indexed(addr+(int)rdram-0x80000000, rs, rt);
1932 else {
1933 assem_debug("movzbl %x(%%%s,%%%s,4),%%%s",addr,regname[rs],regname[map],regname[rt]);
1934 assert(rs!=ESP);
1935 output_byte(0x0F);
1936 output_byte(0xB6);
1937 if(addr==0&&rs!=EBP) {
1938 output_modrm(0,4,rt);
1939 output_sib(2,map,rs);
1940 }
1941 else if(addr<128&&addr>=-128) {
1942 output_modrm(1,4,rt);
1943 output_sib(2,map,rs);
1944 output_byte(addr);
1945 }
1946 else
1947 {
1948 output_modrm(2,4,rt);
1949 output_sib(2,map,rs);
1950 output_w32(addr);
1951 }
1952 }
1953}
1954static void emit_movzwl(int addr, int rt)
1955{
1956 assem_debug("movzwl %x,%%%s",addr,regname[rt]);
1957 output_byte(0x0F);
1958 output_byte(0xB7);
1959 output_modrm(0,5,rt);
1960 output_w32(addr);
1961}
1962static void emit_movzwl_indexed(int addr, int rs, int rt)
1963{
1964 assem_debug("movzwl %x+%%%s,%%%s",addr,regname[rs],regname[rt]);
1965 output_byte(0x0F);
1966 output_byte(0xB7);
1967 output_modrm(2,rs,rt);
1968 output_w32(addr);
1969}
1970static void emit_movzwl_tlb(int addr, int map, int rt)
1971{
1972 if(map<0) emit_movzwl(addr+(int)rdram-0x80000000, rt);
1973 else
1974 {
1975 assem_debug("movzwl (%x,%%%s,4),%%%s",addr,regname[map],regname[rt]);
1976 output_byte(0x0F);
1977 output_byte(0xB7);
1978 output_modrm(0,4,rt);
1979 output_sib(2,map,5);
1980 output_w32(addr);
1981 }
1982}
1983/*
1984static void emit_movzwl_reg(int rs, int rt)
1985{
1986 assem_debug("movzwl %%%s,%%%s",regname[rs]+1,regname[rt]);
1987 output_byte(0x0F);
1988 output_byte(0xB7);
1989 output_modrm(3,rs,rt);
1990}*/
1991
1992static void emit_xchg(int rs, int rt)
1993{
1994 assem_debug("xchg %%%s,%%%s",regname[rs],regname[rt]);
1995 if(rs==EAX) {
1996 output_byte(0x90+rt);
1997 }
1998 else
1999 {
2000 output_byte(0x87);
2001 output_modrm(3,rs,rt);
2002 }
2003}
2004static void emit_writeword(int rt, int addr)
2005{
2006 assem_debug("movl %%%s,%x",regname[rt],addr);
2007 output_byte(0x89);
2008 output_modrm(0,5,rt);
2009 output_w32(addr);
2010}
2011static void emit_writeword_indexed(int rt, int addr, int rs)
2012{
2013 assem_debug("mov %%%s,%x+%%%s",regname[rt],addr,regname[rs]);
2014 output_byte(0x89);
2015 if(addr<128&&addr>=-128) {
2016 output_modrm(1,rs,rt);
2017 if(rs==ESP) output_sib(0,4,4);
2018 output_byte(addr);
2019 }
2020 else
2021 {
2022 output_modrm(2,rs,rt);
2023 if(rs==ESP) output_sib(0,4,4);
2024 output_w32(addr);
2025 }
2026}
2027static void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2028{
2029 if(map<0) emit_writeword_indexed(rt, addr+(int)rdram-0x80000000, rs);
2030 else {
2031 assem_debug("mov %%%s,%x(%%%s,%%%s,1)",regname[rt],addr,regname[rs],regname[map]);
2032 assert(rs!=ESP);
2033 output_byte(0x89);
2034 if(addr==0&&rs!=EBP) {
2035 output_modrm(0,4,rt);
2036 output_sib(0,map,rs);
2037 }
2038 else if(addr<128&&addr>=-128) {
2039 output_modrm(1,4,rt);
2040 output_sib(0,map,rs);
2041 output_byte(addr);
2042 }
2043 else
2044 {
2045 output_modrm(2,4,rt);
2046 output_sib(0,map,rs);
2047 output_w32(addr);
2048 }
2049 }
2050}
2051static void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
2052{
2053 assert(rh>=0);
2054 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
2055 emit_writeword_indexed_tlb(rl, addr+4, rs, map, temp);
2056}
2057static void emit_writehword(int rt, int addr)
2058{
2059 assem_debug("movw %%%s,%x",regname[rt]+1,addr);
2060 output_byte(0x66);
2061 output_byte(0x89);
2062 output_modrm(0,5,rt);
2063 output_w32(addr);
2064}
2065static void emit_writehword_indexed(int rt, int addr, int rs)
2066{
2067 assem_debug("movw %%%s,%x+%%%s",regname[rt]+1,addr,regname[rs]);
2068 output_byte(0x66);
2069 output_byte(0x89);
2070 if(addr<128&&addr>=-128) {
2071 output_modrm(1,rs,rt);
2072 output_byte(addr);
2073 }
2074 else
2075 {
2076 output_modrm(2,rs,rt);
2077 output_w32(addr);
2078 }
2079}
2080static void emit_writebyte(int rt, int addr)
2081{
2082 if(rt<4) {
2083 assem_debug("movb %%%cl,%x",regname[rt][1],addr);
2084 output_byte(0x88);
2085 output_modrm(0,5,rt);
2086 output_w32(addr);
2087 }
2088 else
2089 {
2090 emit_xchg(EAX,rt);
2091 emit_writebyte(EAX,addr);
2092 emit_xchg(EAX,rt);
2093 }
2094}
2095static void emit_writebyte_indexed(int rt, int addr, int rs)
2096{
2097 if(rt<4) {
2098 assem_debug("movb %%%cl,%x+%%%s",regname[rt][1],addr,regname[rs]);
2099 output_byte(0x88);
2100 if(addr<128&&addr>=-128) {
2101 output_modrm(1,rs,rt);
2102 output_byte(addr);
2103 }
2104 else
2105 {
2106 output_modrm(2,rs,rt);
2107 output_w32(addr);
2108 }
2109 }
2110 else
2111 {
2112 emit_xchg(EAX,rt);
2113 emit_writebyte_indexed(EAX,addr,rs==EAX?rt:rs);
2114 emit_xchg(EAX,rt);
2115 }
2116}
2117static void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2118{
2119 if(map<0) emit_writebyte_indexed(rt, addr+(int)rdram-0x80000000, rs);
2120 else
2121 if(rt<4) {
2122 assem_debug("movb %%%cl,%x(%%%s,%%%s,1)",regname[rt][1],addr,regname[rs],regname[map]);
2123 assert(rs!=ESP);
2124 output_byte(0x88);
2125 if(addr==0&&rs!=EBP) {
2126 output_modrm(0,4,rt);
2127 output_sib(0,map,rs);
2128 }
2129 else if(addr<128&&addr>=-128) {
2130 output_modrm(1,4,rt);
2131 output_sib(0,map,rs);
2132 output_byte(addr);
2133 }
2134 else
2135 {
2136 output_modrm(2,4,rt);
2137 output_sib(0,map,rs);
2138 output_w32(addr);
2139 }
2140 }
2141 else
2142 {
2143 emit_xchg(EAX,rt);
2144 emit_writebyte_indexed_tlb(EAX,addr,rs==EAX?rt:rs,map==EAX?rt:map,temp);
2145 emit_xchg(EAX,rt);
2146 }
2147}
2148static void emit_writeword_imm(int imm, int addr)
2149{
2150 assem_debug("movl $%x,%x",imm,addr);
2151 output_byte(0xC7);
2152 output_modrm(0,5,0);
2153 output_w32(addr);
2154 output_w32(imm);
2155}
2156static void emit_writeword_imm_esp(int imm, int addr)
2157{
2158 assem_debug("mov $%x,%x(%%esp)",imm,addr);
2159 assert(addr>=-128&&addr<128);
2160 output_byte(0xC7);
2161 output_modrm(1,4,0);
2162 output_sib(0,4,4);
2163 output_byte(addr);
2164 output_w32(imm);
2165}
2166static void emit_writebyte_imm(int imm, int addr)
2167{
2168 assem_debug("movb $%x,%x",imm,addr);
2169 assert(imm>=-128&&imm<128);
2170 output_byte(0xC6);
2171 output_modrm(0,5,0);
2172 output_w32(addr);
2173 output_byte(imm);
2174}
2175
2176static void emit_mul(int rs)
2177{
2178 assem_debug("mul %%%s",regname[rs]);
2179 output_byte(0xF7);
2180 output_modrm(3,rs,4);
2181}
2182static void emit_imul(int rs)
2183{
2184 assem_debug("imul %%%s",regname[rs]);
2185 output_byte(0xF7);
2186 output_modrm(3,rs,5);
2187}
2188static void emit_div(int rs)
2189{
2190 assem_debug("div %%%s",regname[rs]);
2191 output_byte(0xF7);
2192 output_modrm(3,rs,6);
2193}
2194static void emit_idiv(int rs)
2195{
2196 assem_debug("idiv %%%s",regname[rs]);
2197 output_byte(0xF7);
2198 output_modrm(3,rs,7);
2199}
2200static void emit_cdq()
2201{
2202 assem_debug("cdq");
2203 output_byte(0x99);
2204}
2205
2206// Load 2 immediates optimizing for small code size
2207static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2208{
2209 emit_movimm(imm1,rt1);
2210 if(imm2-imm1<128&&imm2-imm1>=-128) emit_addimm(rt1,imm2-imm1,rt2);
2211 else emit_movimm(imm2,rt2);
2212}
2213
2214// special case for checking pending_exception
2215static void emit_cmpmem_imm_byte(int addr,int imm)
2216{
2217 assert(imm<128&&imm>=-127);
2218 assem_debug("cmpb $%d,%x",imm,addr);
2219 output_byte(0x80);
2220 output_modrm(0,5,7);
2221 output_w32(addr);
2222 output_byte(imm);
2223}
2224
2225// special case for checking invalid_code
2226static void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2227{
2228 assert(imm<128&&imm>=-127);
2229 assert(r>=0&&r<8);
2230 emit_shrimm(r,12,r);
2231 assem_debug("cmp $%d,%x+%%%s",imm,addr,regname[r]);
2232 output_byte(0x80);
2233 output_modrm(2,r,7);
2234 output_w32(addr);
2235 output_byte(imm);
2236}
2237
2238// special case for checking hash_table
2239static void emit_cmpmem_indexed(int addr,int rs,int rt)
2240{
2241 assert(rs>=0&&rs<8);
2242 assert(rt>=0&&rt<8);
2243 assem_debug("cmp %x+%%%s,%%%s",addr,regname[rs],regname[rt]);
2244 output_byte(0x39);
2245 output_modrm(2,rs,rt);
2246 output_w32(addr);
2247}
2248
2249// Used to preload hash table entries
2250#ifdef IMM_PREFETCH
2251static void emit_prefetch(void *addr)
2252{
2253 assem_debug("prefetch %x",(int)addr);
2254 output_byte(0x0F);
2255 output_byte(0x18);
2256 output_modrm(0,5,1);
2257 output_w32((int)addr);
2258}
2259#endif
2260
2261/*void emit_submem(int r,int addr)
2262{
2263 assert(r>=0&&r<8);
2264 assem_debug("sub %x,%%%s",addr,regname[r]);
2265 output_byte(0x2B);
2266 output_modrm(0,5,r);
2267 output_w32((int)addr);
2268}
2269static void emit_subfrommem(int addr,int r)
2270{
2271 assert(r>=0&&r<8);
2272 assem_debug("sub %%%s,%x",regname[r],addr);
2273 output_byte(0x29);
2274 output_modrm(0,5,r);
2275 output_w32((int)addr);
2276}*/
2277
2278static void emit_flds(int r)
2279{
2280 assem_debug("flds (%%%s)",regname[r]);
2281 output_byte(0xd9);
2282 if(r!=EBP) output_modrm(0,r,0);
2283 else {output_modrm(1,EBP,0);output_byte(0);}
2284}
2285static void emit_fldl(int r)
2286{
2287 assem_debug("fldl (%%%s)",regname[r]);
2288 output_byte(0xdd);
2289 if(r!=EBP) output_modrm(0,r,0);
2290 else {output_modrm(1,EBP,0);output_byte(0);}
2291}
2292static void emit_fucomip(u_int r)
2293{
2294 assem_debug("fucomip %d",r);
2295 assert(r<8);
2296 output_byte(0xdf);
2297 output_byte(0xe8+r);
2298}
2299static void emit_fchs()
2300{
2301 assem_debug("fchs");
2302 output_byte(0xd9);
2303 output_byte(0xe0);
2304}
2305static void emit_fabs()
2306{
2307 assem_debug("fabs");
2308 output_byte(0xd9);
2309 output_byte(0xe1);
2310}
2311static void emit_fsqrt()
2312{
2313 assem_debug("fsqrt");
2314 output_byte(0xd9);
2315 output_byte(0xfa);
2316}
2317static void emit_fadds(int r)
2318{
2319 assem_debug("fadds (%%%s)",regname[r]);
2320 output_byte(0xd8);
2321 if(r!=EBP) output_modrm(0,r,0);
2322 else {output_modrm(1,EBP,0);output_byte(0);}
2323}
2324static void emit_faddl(int r)
2325{
2326 assem_debug("faddl (%%%s)",regname[r]);
2327 output_byte(0xdc);
2328 if(r!=EBP) output_modrm(0,r,0);
2329 else {output_modrm(1,EBP,0);output_byte(0);}
2330}
2331static void emit_fadd(int r)
2332{
2333 assem_debug("fadd st%d",r);
2334 output_byte(0xd8);
2335 output_byte(0xc0+r);
2336}
2337static void emit_fsubs(int r)
2338{
2339 assem_debug("fsubs (%%%s)",regname[r]);
2340 output_byte(0xd8);
2341 if(r!=EBP) output_modrm(0,r,4);
2342 else {output_modrm(1,EBP,4);output_byte(0);}
2343}
2344static void emit_fsubl(int r)
2345{
2346 assem_debug("fsubl (%%%s)",regname[r]);
2347 output_byte(0xdc);
2348 if(r!=EBP) output_modrm(0,r,4);
2349 else {output_modrm(1,EBP,4);output_byte(0);}
2350}
2351static void emit_fsub(int r)
2352{
2353 assem_debug("fsub st%d",r);
2354 output_byte(0xd8);
2355 output_byte(0xe0+r);
2356}
2357static void emit_fmuls(int r)
2358{
2359 assem_debug("fmuls (%%%s)",regname[r]);
2360 output_byte(0xd8);
2361 if(r!=EBP) output_modrm(0,r,1);
2362 else {output_modrm(1,EBP,1);output_byte(0);}
2363}
2364static void emit_fmull(int r)
2365{
2366 assem_debug("fmull (%%%s)",regname[r]);
2367 output_byte(0xdc);
2368 if(r!=EBP) output_modrm(0,r,1);
2369 else {output_modrm(1,EBP,1);output_byte(0);}
2370}
2371static void emit_fmul(int r)
2372{
2373 assem_debug("fmul st%d",r);
2374 output_byte(0xd8);
2375 output_byte(0xc8+r);
2376}
2377static void emit_fdivs(int r)
2378{
2379 assem_debug("fdivs (%%%s)",regname[r]);
2380 output_byte(0xd8);
2381 if(r!=EBP) output_modrm(0,r,6);
2382 else {output_modrm(1,EBP,6);output_byte(0);}
2383}
2384static void emit_fdivl(int r)
2385{
2386 assem_debug("fdivl (%%%s)",regname[r]);
2387 output_byte(0xdc);
2388 if(r!=EBP) output_modrm(0,r,6);
2389 else {output_modrm(1,EBP,6);output_byte(0);}
2390}
2391static void emit_fdiv(int r)
2392{
2393 assem_debug("fdiv st%d",r);
2394 output_byte(0xd8);
2395 output_byte(0xf0+r);
2396}
2397static void emit_fpop()
2398{
2399 // fstp st(0)
2400 assem_debug("fpop");
2401 output_byte(0xdd);
2402 output_byte(0xd8);
2403}
2404static void emit_fildl(int r)
2405{
2406 assem_debug("fildl (%%%s)",regname[r]);
2407 output_byte(0xdb);
2408 if(r!=EBP) output_modrm(0,r,0);
2409 else {output_modrm(1,EBP,0);output_byte(0);}
2410}
2411static void emit_fildll(int r)
2412{
2413 assem_debug("fildll (%%%s)",regname[r]);
2414 output_byte(0xdf);
2415 if(r!=EBP) output_modrm(0,r,5);
2416 else {output_modrm(1,EBP,5);output_byte(0);}
2417}
2418static void emit_fistpl(int r)
2419{
2420 assem_debug("fistpl (%%%s)",regname[r]);
2421 output_byte(0xdb);
2422 if(r!=EBP) output_modrm(0,r,3);
2423 else {output_modrm(1,EBP,3);output_byte(0);}
2424}
2425static void emit_fistpll(int r)
2426{
2427 assem_debug("fistpll (%%%s)",regname[r]);
2428 output_byte(0xdf);
2429 if(r!=EBP) output_modrm(0,r,7);
2430 else {output_modrm(1,EBP,7);output_byte(0);}
2431}
2432static void emit_fstps(int r)
2433{
2434 assem_debug("fstps (%%%s)",regname[r]);
2435 output_byte(0xd9);
2436 if(r!=EBP) output_modrm(0,r,3);
2437 else {output_modrm(1,EBP,3);output_byte(0);}
2438}
2439static void emit_fstpl(int r)
2440{
2441 assem_debug("fstpl (%%%s)",regname[r]);
2442 output_byte(0xdd);
2443 if(r!=EBP) output_modrm(0,r,3);
2444 else {output_modrm(1,EBP,3);output_byte(0);}
2445}
2446static void emit_fnstcw_stack()
2447{
2448 assem_debug("fnstcw (%%esp)");
2449 output_byte(0xd9);
2450 output_modrm(0,4,7);
2451 output_sib(0,4,4);
2452}
2453static void emit_fldcw_stack()
2454{
2455 assem_debug("fldcw (%%esp)");
2456 output_byte(0xd9);
2457 output_modrm(0,4,5);
2458 output_sib(0,4,4);
2459}
2460static void emit_fldcw_indexed(int addr,int r)
2461{
2462 assem_debug("fldcw %x(%%%s)",addr,regname[r]);
2463 output_byte(0xd9);
2464 output_modrm(0,4,5);
2465 output_sib(1,r,5);
2466 output_w32(addr);
2467}
2468static void emit_fldcw(int addr)
2469{
2470 assem_debug("fldcw %x",addr);
2471 output_byte(0xd9);
2472 output_modrm(0,5,5);
2473 output_w32(addr);
2474}
2475#ifdef __SSE__
2476static void emit_movss_load(u_int addr,u_int ssereg)
2477{
2478 assem_debug("movss (%%%s),xmm%d",regname[addr],ssereg);
2479 assert(ssereg<8);
2480 output_byte(0xf3);
2481 output_byte(0x0f);
2482 output_byte(0x10);
2483 if(addr!=EBP) output_modrm(0,addr,ssereg);
2484 else {output_modrm(1,EBP,ssereg);output_byte(0);}
2485}
2486static void emit_movsd_load(u_int addr,u_int ssereg)
2487{
2488 assem_debug("movsd (%%%s),xmm%d",regname[addr],ssereg);
2489 assert(ssereg<8);
2490 output_byte(0xf2);
2491 output_byte(0x0f);
2492 output_byte(0x10);
2493 if(addr!=EBP) output_modrm(0,addr,ssereg);
2494 else {output_modrm(1,EBP,ssereg);output_byte(0);}
2495}
2496static void emit_movd_store(u_int ssereg,u_int addr)
2497{
2498 assem_debug("movd xmm%d,(%%%s)",ssereg,regname[addr]);
2499 assert(ssereg<8);
2500 output_byte(0x66);
2501 output_byte(0x0f);
2502 output_byte(0x7e);
2503 if(addr!=EBP) output_modrm(0,addr,ssereg);
2504 else {output_modrm(1,EBP,ssereg);output_byte(0);}
2505}
2506static void emit_cvttps2dq(u_int ssereg1,u_int ssereg2)
2507{
2508 assem_debug("cvttps2dq xmm%d,xmm%d",ssereg1,ssereg2);
2509 assert(ssereg1<8);
2510 assert(ssereg2<8);
2511 output_byte(0xf3);
2512 output_byte(0x0f);
2513 output_byte(0x5b);
2514 output_modrm(3,ssereg1,ssereg2);
2515}
2516static void emit_cvttpd2dq(u_int ssereg1,u_int ssereg2)
2517{
2518 assem_debug("cvttpd2dq xmm%d,xmm%d",ssereg1,ssereg2);
2519 assert(ssereg1<8);
2520 assert(ssereg2<8);
2521 output_byte(0x66);
2522 output_byte(0x0f);
2523 output_byte(0xe6);
2524 output_modrm(3,ssereg1,ssereg2);
2525}
2526#endif
2527
2528/* Stubs/epilogue */
2529
2530static void emit_extjump2(int addr, int target, int linker)
2531{
2532 u_char *ptr=(u_char *)addr;
2533 if(*ptr==0x0f)
2534 {
2535 assert(ptr[1]>=0x80&&ptr[1]<=0x8f);
2536 addr+=2;
2537 }
2538 else
2539 {
2540 assert(*ptr==0xe8||*ptr==0xe9);
2541 addr++;
2542 }
2543 emit_movimm(target,EAX);
2544 emit_movimm(addr,EBX);
2545 //assert(addr>=0x7000000&&addr<0x7FFFFFF);
2546 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2547//DEBUG >
2548#ifdef DEBUG_CYCLE_COUNT
2549 emit_readword((int)&last_count,ECX);
2550 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2551 emit_readword((int)&next_interupt,ECX);
2552 emit_writeword(HOST_CCREG,(int)&Count);
2553 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2554 emit_writeword(ECX,(int)&last_count);
2555#endif
2556//DEBUG <
2557 emit_jmp(linker);
2558}
2559
2560static void emit_extjump(int addr, int target)
2561{
2562 emit_extjump2(addr, target, (int)dyna_linker);
2563}
2564static void emit_extjump_ds(int addr, int target)
2565{
2566 emit_extjump2(addr, target, (int)dyna_linker_ds);
2567}
2568
2569static void do_readstub(int n)
2570{
2571 assem_debug("do_readstub %x",start+stubs[n][3]*4);
2572 set_jump_target(stubs[n][1],(int)out);
2573 int type=stubs[n][0];
2574 int i=stubs[n][3];
2575 int rs=stubs[n][4];
2576 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2577 signed char *i_regmap=i_regs->regmap;
2578 int addr=get_reg(i_regmap,AGEN1+(i&1));
2579 int rth,rt;
2580 int ds;
2581 if(itype[i]==C1LS||itype[i]==LOADLR) {
2582 rth=get_reg(i_regmap,FTEMP|64);
2583 rt=get_reg(i_regmap,FTEMP);
2584 }else{
2585 rth=get_reg(i_regmap,rt1[i]|64);
2586 rt=get_reg(i_regmap,rt1[i]);
2587 }
2588 assert(rs>=0);
2589 if(addr<0) addr=rt;
2590 if(addr<0&&itype[i]!=C1LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
2591 assert(addr>=0);
2592 int ftable=0;
2593 if(type==LOADB_STUB||type==LOADBU_STUB)
2594 ftable=(int)readmemb;
2595 if(type==LOADH_STUB||type==LOADHU_STUB)
2596 ftable=(int)readmemh;
2597 if(type==LOADW_STUB)
2598 ftable=(int)readmem;
2599 if(type==LOADD_STUB)
2600 ftable=(int)readmemd;
2601 emit_writeword(rs,(int)&address);
2602 emit_shrimm(rs,16,addr);
2603 emit_movmem_indexedx4(ftable,addr,addr);
2604 emit_pusha();
2605 ds=i_regs!=&regs[i];
2606 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2607 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)),i);
2608 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2609
2610 int temp;
2611 int cc=get_reg(i_regmap,CCREG);
2612 if(cc<0) {
2613 if(addr==HOST_CCREG)
2614 {
2615 cc=0;temp=1;
2616 assert(cc!=HOST_CCREG);
2617 assert(temp!=HOST_CCREG);
2618 emit_loadreg(CCREG,cc);
2619 }
2620 else
2621 {
2622 cc=HOST_CCREG;
2623 emit_loadreg(CCREG,cc);
2624 temp=!addr;
2625 }
2626 }
2627 else
2628 {
2629 temp=!addr;
2630 }
2631 emit_readword((int)&last_count,temp);
2632 emit_addimm(cc,CLOCK_DIVIDER*(stubs[n][6]+1),cc);
2633 emit_writeword_imm_esp(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,32);
2634 emit_add(cc,temp,cc);
2635 emit_writeword(cc,(int)&Count);
2636 emit_callreg(addr);
2637 // We really shouldn't need to update the count here,
2638 // but not doing so causes random crashes...
2639 emit_readword((int)&Count,HOST_CCREG);
2640 emit_readword((int)&next_interupt,ECX);
2641 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*(stubs[n][6]+1),HOST_CCREG);
2642 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2643 emit_writeword(ECX,(int)&last_count);
2644 emit_storereg(CCREG,HOST_CCREG);
2645 emit_popa();
2646 if((cc=get_reg(i_regmap,CCREG))>=0) {
2647 emit_loadreg(CCREG,cc);
2648 }
2649 if(rt>=0) {
2650 if(type==LOADB_STUB)
2651 emit_movsbl((int)&readmem_dword,rt);
2652 if(type==LOADBU_STUB)
2653 emit_movzbl((int)&readmem_dword,rt);
2654 if(type==LOADH_STUB)
2655 emit_movswl((int)&readmem_dword,rt);
2656 if(type==LOADHU_STUB)
2657 emit_movzwl((int)&readmem_dword,rt);
2658 if(type==LOADW_STUB)
2659 emit_readword((int)&readmem_dword,rt);
2660 if(type==LOADD_STUB) {
2661 emit_readword((int)&readmem_dword,rt);
2662 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2663 }
2664 }
2665 emit_jmp(stubs[n][2]); // return address
2666}
2667
2668static void inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2669{
2670 assem_debug("inline_readstub");
2671 int rs=get_reg(regmap,target);
2672 int rth=get_reg(regmap,target|64);
2673 int rt=get_reg(regmap,target);
2674 if(rs<0) rs=get_reg(regmap,-1);
2675 assert(rs>=0);
2676 int ftable=0;
2677 if(type==LOADB_STUB||type==LOADBU_STUB)
2678 ftable=(int)readmemb;
2679 if(type==LOADH_STUB||type==LOADHU_STUB)
2680 ftable=(int)readmemh;
2681 if(type==LOADW_STUB)
2682 ftable=(int)readmem;
2683 if(type==LOADD_STUB)
2684 ftable=(int)readmemd;
2685 #ifdef HOST_IMM_ADDR32
2686 emit_writeword_imm(addr,(int)&address);
2687 #else
2688 emit_writeword(rs,(int)&address);
2689 #endif
2690 emit_pusha();
2691 if((signed int)addr>=(signed int)0xC0000000) {
2692 // Theoretically we can have a pagefault here, if the TLB has never
2693 // been enabled and the address is outside the range 80000000..BFFFFFFF
2694 // Write out the registers so the pagefault can be handled. This is
2695 // a very rare case and likely represents a bug.
2696 int ds=regmap!=regs[i].regmap;
2697 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2698 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
2699 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
2700 }
2701 int cc=get_reg(regmap,CCREG);
2702 int temp;
2703 if(cc<0) {
2704 if(rs==HOST_CCREG)
2705 {
2706 cc=0;temp=1;
2707 assert(cc!=HOST_CCREG);
2708 assert(temp!=HOST_CCREG);
2709 emit_loadreg(CCREG,cc);
2710 }
2711 else
2712 {
2713 cc=HOST_CCREG;
2714 emit_loadreg(CCREG,cc);
2715 temp=!rs;
2716 }
2717 }
2718 else
2719 {
2720 temp=!rs;
2721 }
2722 emit_readword((int)&last_count,temp);
2723 emit_addimm(cc,CLOCK_DIVIDER*(adj+1),cc);
2724 emit_add(cc,temp,cc);
2725 emit_writeword(cc,(int)&Count);
2726 if((signed int)addr>=(signed int)0xC0000000) {
2727 // Pagefault address
2728 int ds=regmap!=regs[i].regmap;
2729 emit_writeword_imm_esp(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,32);
2730 }
2731 emit_call(((u_int *)ftable)[addr>>16]);
2732 // We really shouldn't need to update the count here,
2733 // but not doing so causes random crashes...
2734 emit_readword((int)&Count,HOST_CCREG);
2735 emit_readword((int)&next_interupt,ECX);
2736 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*(adj+1),HOST_CCREG);
2737 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2738 emit_writeword(ECX,(int)&last_count);
2739 emit_storereg(CCREG,HOST_CCREG);
2740 emit_popa();
2741 if((cc=get_reg(regmap,CCREG))>=0) {
2742 emit_loadreg(CCREG,cc);
2743 }
2744 if(rt>=0) {
2745 if(type==LOADB_STUB)
2746 emit_movsbl((int)&readmem_dword,rt);
2747 if(type==LOADBU_STUB)
2748 emit_movzbl((int)&readmem_dword,rt);
2749 if(type==LOADH_STUB)
2750 emit_movswl((int)&readmem_dword,rt);
2751 if(type==LOADHU_STUB)
2752 emit_movzwl((int)&readmem_dword,rt);
2753 if(type==LOADW_STUB)
2754 emit_readword((int)&readmem_dword,rt);
2755 if(type==LOADD_STUB) {
2756 emit_readword((int)&readmem_dword,rt);
2757 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2758 }
2759 }
2760}
2761
2762static void do_writestub(int n)
2763{
2764 assem_debug("do_writestub %x",start+stubs[n][3]*4);
2765 set_jump_target(stubs[n][1],(int)out);
2766 int type=stubs[n][0];
2767 int i=stubs[n][3];
2768 int rs=stubs[n][4];
2769 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2770 signed char *i_regmap=i_regs->regmap;
2771 int addr=get_reg(i_regmap,AGEN1+(i&1));
2772 int rth,rt,r;
2773 int ds;
2774 if(itype[i]==C1LS) {
2775 rth=get_reg(i_regmap,FTEMP|64);
2776 rt=get_reg(i_regmap,r=FTEMP);
2777 }else{
2778 rth=get_reg(i_regmap,rs2[i]|64);
2779 rt=get_reg(i_regmap,r=rs2[i]);
2780 }
2781 assert(rs>=0);
2782 assert(rt>=0);
2783 if(addr<0) addr=get_reg(i_regmap,-1);
2784 assert(addr>=0);
2785 int ftable=0;
2786 if(type==STOREB_STUB)
2787 ftable=(int)writememb;
2788 if(type==STOREH_STUB)
2789 ftable=(int)writememh;
2790 if(type==STOREW_STUB)
2791 ftable=(int)writemem;
2792 if(type==STORED_STUB)
2793 ftable=(int)writememd;
2794 emit_writeword(rs,(int)&address);
2795 emit_shrimm(rs,16,addr);
2796 emit_movmem_indexedx4(ftable,addr,addr);
2797 if(type==STOREB_STUB)
2798 emit_writebyte(rt,(int)&cpu_byte);
2799 if(type==STOREH_STUB)
2800 emit_writehword(rt,(int)&hword);
2801 if(type==STOREW_STUB)
2802 emit_writeword(rt,(int)&word);
2803 if(type==STORED_STUB) {
2804 emit_writeword(rt,(int)&dword);
2805 emit_writeword(r?rth:rt,(int)&dword+4);
2806 }
2807 emit_pusha();
2808 ds=i_regs!=&regs[i];
2809 int real_rs=get_reg(i_regmap,rs1[i]);
2810 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)),i);
2811 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2812
2813 int temp;
2814 int cc=get_reg(i_regmap,CCREG);
2815 if(cc<0) {
2816 if(addr==HOST_CCREG)
2817 {
2818 cc=0;temp=1;
2819 assert(cc!=HOST_CCREG);
2820 assert(temp!=HOST_CCREG);
2821 emit_loadreg(CCREG,cc);
2822 }
2823 else
2824 {
2825 cc=HOST_CCREG;
2826 emit_loadreg(CCREG,cc);
2827 temp=!addr;
2828 }
2829 }
2830 else
2831 {
2832 temp=!addr;
2833 }
2834 emit_readword((int)&last_count,temp);
2835 emit_addimm(cc,CLOCK_DIVIDER*(stubs[n][6]+1),cc);
2836 emit_writeword_imm_esp(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,32);
2837 emit_add(cc,temp,cc);
2838 emit_writeword(cc,(int)&Count);
2839 emit_callreg(addr);
2840 emit_readword((int)&Count,HOST_CCREG);
2841 emit_readword((int)&next_interupt,ECX);
2842 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*(stubs[n][6]+1),HOST_CCREG);
2843 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2844 emit_writeword(ECX,(int)&last_count);
2845 emit_storereg(CCREG,HOST_CCREG);
2846 emit_popa();
2847 if((cc=get_reg(i_regmap,CCREG))>=0) {
2848 emit_loadreg(CCREG,cc);
2849 }
2850 emit_jmp(stubs[n][2]); // return address
2851}
2852
2853static void inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2854{
2855 assem_debug("inline_writestub");
2856 int rs=get_reg(regmap,-1);
2857 int rth=get_reg(regmap,target|64);
2858 int rt=get_reg(regmap,target);
2859 assert(rs>=0);
2860 assert(rt>=0);
2861 int ftable=0;
2862 if(type==STOREB_STUB)
2863 ftable=(int)writememb;
2864 if(type==STOREH_STUB)
2865 ftable=(int)writememh;
2866 if(type==STOREW_STUB)
2867 ftable=(int)writemem;
2868 if(type==STORED_STUB)
2869 ftable=(int)writememd;
2870 emit_writeword(rs,(int)&address);
2871 if(type==STOREB_STUB)
2872 emit_writebyte(rt,(int)&cpu_byte);
2873 if(type==STOREH_STUB)
2874 emit_writehword(rt,(int)&hword);
2875 if(type==STOREW_STUB)
2876 emit_writeword(rt,(int)&word);
2877 if(type==STORED_STUB) {
2878 emit_writeword(rt,(int)&dword);
2879 emit_writeword(target?rth:rt,(int)&dword+4);
2880 }
2881 emit_pusha();
2882 if((signed int)addr>=(signed int)0xC0000000) {
2883 // Theoretically we can have a pagefault here, if the TLB has never
2884 // been enabled and the address is outside the range 80000000..BFFFFFFF
2885 // Write out the registers so the pagefault can be handled. This is
2886 // a very rare case and likely represents a bug.
2887 int ds=regmap!=regs[i].regmap;
2888 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2889 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
2890 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
2891 }
2892 int cc=get_reg(regmap,CCREG);
2893 int temp;
2894 if(cc<0) {
2895 if(rs==HOST_CCREG)
2896 {
2897 cc=0;temp=1;
2898 assert(cc!=HOST_CCREG);
2899 assert(temp!=HOST_CCREG);
2900 emit_loadreg(CCREG,cc);
2901 }
2902 else
2903 {
2904 cc=HOST_CCREG;
2905 emit_loadreg(CCREG,cc);
2906 temp=!rs;
2907 }
2908 }
2909 else
2910 {
2911 temp=!rs;
2912 }
2913 emit_readword((int)&last_count,temp);
2914 emit_addimm(cc,CLOCK_DIVIDER*(adj+1),cc);
2915 emit_add(cc,temp,cc);
2916 emit_writeword(cc,(int)&Count);
2917 if((signed int)addr>=(signed int)0xC0000000) {
2918 // Pagefault address
2919 int ds=regmap!=regs[i].regmap;
2920 emit_writeword_imm_esp(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,32);
2921 }
2922 emit_call(((u_int *)ftable)[addr>>16]);
2923 emit_readword((int)&Count,HOST_CCREG);
2924 emit_readword((int)&next_interupt,ECX);
2925 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*(adj+1),HOST_CCREG);
2926 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2927 emit_writeword(ECX,(int)&last_count);
2928 emit_storereg(CCREG,HOST_CCREG);
2929 emit_popa();
2930 if((cc=get_reg(regmap,CCREG))>=0) {
2931 emit_loadreg(CCREG,cc);
2932 }
2933}
2934
2935static void do_unalignedwritestub(int n)
2936{
2937 set_jump_target(stubs[n][1],(int)out);
2938 output_byte(0xCC);
2939 emit_jmp(stubs[n][2]); // return address
2940}
2941
2942static void do_invstub(int n)
2943{
2944 set_jump_target(stubs[n][1],(int)out);
2945 emit_call(invalidate_block_reg[stubs[n][4]]);
2946 emit_jmp(stubs[n][2]); // return address
2947}
2948
2949static int do_dirty_stub(int i)
2950{
2951 assem_debug("do_dirty_stub %x",start+i*4);
2952 emit_pushimm(start+i*4);
2953 emit_movimm((int)start<(int)0xC0000000?(int)source:(int)start,EAX);
2954 emit_movimm((int)copy,EBX);
2955 emit_movimm(slen*4,ECX);
2956 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
2957 emit_addimm(ESP,4,ESP);
2958 int entry=(int)out;
2959 load_regs_entry(i);
2960 if(entry==(int)out) entry=instr_addr[i];
2961 emit_jmp(instr_addr[i]);
2962 return entry;
2963}
2964
2965static void do_dirty_stub_ds()
2966{
2967 emit_pushimm(start+1);
2968 emit_movimm((int)start<(int)0xC0000000?(int)source:(int)start,EAX);
2969 emit_movimm((int)copy,EBX);
2970 emit_movimm(slen*4,ECX);
2971 emit_call((int)&verify_code_ds);
2972 emit_addimm(ESP,4,ESP);
2973}
2974
2975static void do_cop1stub(int n)
2976{
2977 assem_debug("do_cop1stub %x",start+stubs[n][3]*4);
2978 set_jump_target(stubs[n][1],(int)out);
2979 int i=stubs[n][3];
2980 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2981 int ds=stubs[n][6];
2982 if(!ds) {
2983 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2984 //if(i_regs!=&regs[i]) DebugMessage(M64MSG_VERBOSE, "oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
2985 }
2986 //else {DebugMessage(M64MSG_VERBOSE, "fp exception in delay slot");}
2987 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
2988 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2989 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2990 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
2991 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
2992}
2993
2994/* TLB */
2995
2996static int do_tlb_r(int s,int ar,int map,int cache,int x,int a,int shift,int c,u_int addr)
2997{
2998 if(c) {
2999 if((signed int)addr>=(signed int)0xC0000000) {
3000 emit_readword((int)(memory_map+(addr>>12)),map);
3001 }
3002 else
3003 return -1; // No mapping
3004 }
3005 else {
3006 if(s!=map) emit_mov(s,map);
3007 emit_shrimm(map,12,map);
3008 // Schedule this while we wait on the load
3009 //if(x) emit_xorimm(addr,x,addr);
3010 if(shift>=0) emit_lea8(s,shift);
3011 if(~a) emit_andimm(s,a,ar);
3012 emit_movmem_indexedx4((int)memory_map,map,map);
3013 }
3014 return map;
3015}
3016static int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3017{
3018 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3019 emit_test(map,map);
3020 *jaddr=(int)out;
3021 emit_js(0);
3022 }
3023 return map;
3024}
3025
3026static void gen_tlb_addr_r(int ar, int map) {
3027 if(map>=0) {
3028 emit_leairrx4(0,ar,map,ar);
3029 }
3030}
3031
3032static int do_tlb_w(int s,int ar,int map,int cache,int x,int c,u_int addr)
3033{
3034 if(c) {
3035 if(addr<0x80800000||addr>=0xC0000000) {
3036 emit_readword((int)(memory_map+(addr>>12)),map);
3037 }
3038 else
3039 return -1; // No mapping
3040 }
3041 else {
3042 if(s!=map) emit_mov(s,map);
3043 //if(s!=ar) emit_mov(s,ar);
3044 emit_shrimm(map,12,map);
3045 // Schedule this while we wait on the load
3046 //if(x) emit_xorimm(s,x,addr);
3047 emit_movmem_indexedx4((int)memory_map,map,map);
3048 }
3049 emit_shlimm(map,2,map);
3050 return map;
3051}
3052static void do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3053{
3054 if(!c||addr<0x80800000||addr>=0xC0000000) {
3055 *jaddr=(int)out;
3056 emit_jc(0);
3057 }
3058}
3059
3060static void gen_tlb_addr_w(int ar, int map) {
3061 if(map>=0) {
3062 emit_leairrx1(0,ar,map,ar);
3063 }
3064}
3065
3066// We don't need this for x86
3067static void generate_map_const(u_int addr,int reg) {
3068 // void *mapaddr=memory_map+(addr>>12);
3069}
3070
3071/* Special assem */
3072
3073static void shift_assemble_x86(int i,struct regstat *i_regs)
3074{
3075 if(rt1[i]) {
3076 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3077 {
3078 char s,t,shift;
3079 t=get_reg(i_regs->regmap,rt1[i]);
3080 s=get_reg(i_regs->regmap,rs1[i]);
3081 shift=get_reg(i_regs->regmap,rs2[i]);
3082 if(t>=0){
3083 if(rs1[i]==0)
3084 {
3085 emit_zeroreg(t);
3086 }
3087 else if(rs2[i]==0)
3088 {
3089 assert(s>=0);
3090 if(s!=t) emit_mov(s,t);
3091 }
3092 else
3093 {
3094 char temp=get_reg(i_regs->regmap,-1);
3095 assert(s>=0);
3096 if(t==ECX&&s!=ECX) {
3097 if(shift!=ECX) emit_mov(shift,ECX);
3098 if(rt1[i]==rs2[i]) {shift=temp;}
3099 if(s!=shift) emit_mov(s,shift);
3100 }
3101 else
3102 {
3103 if(rt1[i]==rs2[i]) {emit_mov(shift,temp);shift=temp;}
3104 if(s!=t) emit_mov(s,t);
3105 if(shift!=ECX) {
3106 if(i_regs->regmap[ECX]<0)
3107 emit_mov(shift,ECX);
3108 else
3109 emit_xchg(shift,ECX);
3110 }
3111 }
3112 if(opcode2[i]==4) // SLLV
3113 {
3114 emit_shlcl(t==ECX?shift:t);
3115 }
3116 if(opcode2[i]==6) // SRLV
3117 {
3118 emit_shrcl(t==ECX?shift:t);
3119 }
3120 if(opcode2[i]==7) // SRAV
3121 {
3122 emit_sarcl(t==ECX?shift:t);
3123 }
3124 if(shift!=ECX&&i_regs->regmap[ECX]>=0) emit_xchg(shift,ECX);
3125 }
3126 }
3127 } else { // DSLLV/DSRLV/DSRAV
3128 char sh,sl,th,tl,shift;
3129 th=get_reg(i_regs->regmap,rt1[i]|64);
3130 tl=get_reg(i_regs->regmap,rt1[i]);
3131 sh=get_reg(i_regs->regmap,rs1[i]|64);
3132 sl=get_reg(i_regs->regmap,rs1[i]);
3133 shift=get_reg(i_regs->regmap,rs2[i]);
3134 if(tl>=0){
3135 if(rs1[i]==0)
3136 {
3137 emit_zeroreg(tl);
3138 if(th>=0) emit_zeroreg(th);
3139 }
3140 else if(rs2[i]==0)
3141 {
3142 assert(sl>=0);
3143 if(sl!=tl) emit_mov(sl,tl);
3144 if(th>=0&&sh!=th) emit_mov(sh,th);
3145 }
3146 else
3147 {
3148 // FIXME: What if shift==tl ?
3149 assert(shift!=tl);
3150 int temp=get_reg(i_regs->regmap,-1);
3151 int real_th=th;
3152 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3153 assert(sl>=0);
3154 assert(sh>=0);
3155 if(tl==ECX&&sl!=ECX) {
3156 if(shift!=ECX) emit_mov(shift,ECX);
3157 if(sl!=shift) emit_mov(sl,shift);
3158 if(th>=0 && sh!=th) emit_mov(sh,th);
3159 }
3160 else if(th==ECX&&sh!=ECX) {
3161 if(shift!=ECX) emit_mov(shift,ECX);
3162 if(sh!=shift) emit_mov(sh,shift);
3163 if(sl!=tl) emit_mov(sl,tl);
3164 }
3165 else
3166 {
3167 if(sl!=tl) emit_mov(sl,tl);
3168 if(th>=0 && sh!=th) emit_mov(sh,th);
3169 if(shift!=ECX) {
3170 if(i_regs->regmap[ECX]<0)
3171 emit_mov(shift,ECX);
3172 else
3173 emit_xchg(shift,ECX);
3174 }
3175 }
3176 if(opcode2[i]==0x14) // DSLLV
3177 {
3178 if(th>=0) emit_shldcl(th==ECX?shift:th,tl==ECX?shift:tl);
3179 emit_shlcl(tl==ECX?shift:tl);
3180 emit_testimm(ECX,32);
3181 if(th>=0) emit_cmovne_reg(tl==ECX?shift:tl,th==ECX?shift:th);
3182 emit_cmovne(&const_zero,tl==ECX?shift:tl);
3183 }
3184 if(opcode2[i]==0x16) // DSRLV
3185 {
3186 assert(th>=0);
3187 emit_shrdcl(tl==ECX?shift:tl,th==ECX?shift:th);
3188 emit_shrcl(th==ECX?shift:th);
3189 emit_testimm(ECX,32);
3190 emit_cmovne_reg(th==ECX?shift:th,tl==ECX?shift:tl);
3191 if(real_th>=0) emit_cmovne(&const_zero,th==ECX?shift:th);
3192 }
3193 if(opcode2[i]==0x17) // DSRAV
3194 {
3195 assert(th>=0);
3196 emit_shrdcl(tl==ECX?shift:tl,th==ECX?shift:th);
3197 if(real_th>=0) {
3198 assert(temp>=0);
3199 emit_mov(th==ECX?shift:th,temp==ECX?shift:temp);
3200 }
3201 emit_sarcl(th==ECX?shift:th);
3202 if(real_th>=0) emit_sarimm(temp==ECX?shift:temp,31,temp==ECX?shift:temp);
3203 emit_testimm(ECX,32);
3204 emit_cmovne_reg(th==ECX?shift:th,tl==ECX?shift:tl);
3205 if(real_th>=0) emit_cmovne_reg(temp==ECX?shift:temp,th==ECX?shift:th);
3206 }
3207 if(shift!=ECX&&(i_regs->regmap[ECX]>=0||temp==ECX)) emit_xchg(shift,ECX);
3208 }
3209 }
3210 }
3211 }
3212}
3213#define shift_assemble shift_assemble_x86
3214
3215static void loadlr_assemble_x86(int i,struct regstat *i_regs)
3216{
3217 int s,th,tl,temp,temp2,addr,map=-1;
3218 int offset;
3219 int jaddr=0;
3220 int memtarget,c=0;
3221 u_int hr,reglist=0;
3222 th=get_reg(i_regs->regmap,rt1[i]|64);
3223 tl=get_reg(i_regs->regmap,rt1[i]);
3224 s=get_reg(i_regs->regmap,rs1[i]);
3225 temp=get_reg(i_regs->regmap,-1);
3226 temp2=get_reg(i_regs->regmap,FTEMP);
3227 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3228 assert(addr<0);
3229 offset=imm[i];
3230 for(hr=0;hr<HOST_REGS;hr++) {
3231 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3232 }
3233 reglist|=1<<temp;
3234 if(offset||s<0||c) addr=temp2;
3235 else addr=s;
3236 if(s>=0) {
3237 c=(i_regs->wasconst>>s)&1;
3238 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80800000;
3239 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3240 }
3241 if(!using_tlb) {
3242 if(!c) {
3243 emit_lea8(addr,temp);
3244 if (opcode[i]==0x22||opcode[i]==0x26) {
3245 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3246 }else{
3247 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3248 }
3249 emit_cmpimm(addr,0x800000);
3250 jaddr=(int)out;
3251 emit_jno(0);
3252 }
3253 else {
3254 if (opcode[i]==0x22||opcode[i]==0x26) {
3255 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3256 }else{
3257 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3258 }
3259 }
3260 }else{ // using tlb
3261 int a;
3262 if(c) {
3263 a=-1;
3264 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3265 a=0xFFFFFFFC; // LWL/LWR
3266 }else{
3267 a=0xFFFFFFF8; // LDL/LDR
3268 }
3269 map=get_reg(i_regs->regmap,TLREG);
3270 assert(map>=0);
3271 reglist&=~(1<<map);
3272 map=do_tlb_r(addr,temp2,map,-1,0,a,c?-1:temp,c,constmap[i][s]+offset);
3273 if(c) {
3274 if (opcode[i]==0x22||opcode[i]==0x26) {
3275 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3276 }else{
3277 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3278 }
3279 }
3280 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3281 }
3282 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3283 if(!c||memtarget) {
3284 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3285 emit_readword_indexed_tlb(0,temp2,map,temp2);
3286 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3287 }
3288 else
3289 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3290 if(rt1[i]) {
3291 assert(tl>=0);
3292 emit_andimm(temp,24,temp);
3293 if (opcode[i]==0x26) emit_xorimm(temp,24,temp); // LWR
3294 if(temp==ECX)
3295 {
3296 int temp3=EDX;
3297 if(temp3==temp2) temp3++;
3298 emit_pushreg(temp3);
3299 emit_movimm(-1,temp3);
3300 if (opcode[i]==0x26) {
3301 emit_shrcl(temp3);
3302 emit_shrcl(temp2);
3303 }else{
3304 emit_shlcl(temp3);
3305 emit_shlcl(temp2);
3306 }
3307 emit_mov(temp3,ECX);
3308 emit_not(ECX,ECX);
3309 emit_popreg(temp3);
3310 }
3311 else
3312 {
3313 int temp3=EBP;
3314 if(temp3==temp) temp3++;
3315 if(temp3==temp2) temp3++;
3316 if(temp3==temp) temp3++;
3317 emit_xchg(ECX,temp);
3318 emit_pushreg(temp3);
3319 emit_movimm(-1,temp3);
3320 if (opcode[i]==0x26) {
3321 emit_shrcl(temp3);
3322 emit_shrcl(temp2==ECX?temp:temp2);
3323 }else{
3324 emit_shlcl(temp3);
3325 emit_shlcl(temp2==ECX?temp:temp2);
3326 }
3327 emit_not(temp3,temp3);
3328 emit_mov(temp,ECX);
3329 emit_mov(temp3,temp);
3330 emit_popreg(temp3);
3331 }
3332 emit_and(temp,tl,tl);
3333 emit_or(temp2,tl,tl);
3334 //emit_storereg(rt1[i],tl); // DEBUG
3335 /*emit_pusha();
3336 //save_regs(0x100f);
3337 emit_readword((int)&last_count,ECX);
3338 if(get_reg(i_regs->regmap,CCREG)<0)
3339 emit_loadreg(CCREG,HOST_CCREG);
3340 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3341 emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG);
3342 emit_writeword(HOST_CCREG,(int)&Count);
3343 emit_call((int)memdebug);
3344 emit_popa();
3345 //restore_regs(0x100f);*/
3346 }
3347 }
3348 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3349 if(s>=0)
3350 if((i_regs->wasdirty>>s)&1)
3351 emit_storereg(rs1[i],s);
3352 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3353 if((i_regs->wasdirty>>get_reg(i_regs->regmap,rs1[i]|64))&1)
3354 emit_storereg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
3355 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3356 if(!c||memtarget) {
3357 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3358 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3359 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3360 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3361 }
3362 else
3363 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3364 if(rt1[i]) {
3365 assert(th>=0);
3366 assert(tl>=0);
3367 emit_andimm(temp,56,temp);
3368 emit_pushreg(temp);
3369 emit_pushreg(temp2h);
3370 emit_pushreg(temp2);
3371 emit_pushreg(th);
3372 emit_pushreg(tl);
3373 if(opcode[i]==0x1A) emit_call((int)ldl_merge);
3374 if(opcode[i]==0x1B) emit_call((int)ldr_merge);
3375 emit_addimm(ESP,20,ESP);
3376 if(tl!=EDX) {
3377 if(tl!=EAX) emit_mov(EAX,tl);
3378 if(th!=EDX) emit_mov(EDX,th);
3379 } else
3380 if(th!=EAX) {
3381 if(th!=EDX) emit_mov(EDX,th);
3382 if(tl!=EAX) emit_mov(EAX,tl);
3383 } else {
3384 emit_xchg(EAX,EDX);
3385 }
3386 if(s>=0) emit_loadreg(rs1[i],s);
3387 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3388 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
3389 }
3390 }
3391}
3392#define loadlr_assemble loadlr_assemble_x86
3393
3394static void cop0_assemble(int i,struct regstat *i_regs)
3395{
3396 if(opcode2[i]==0) // MFC0
3397 {
3398 if(rt1[i]) {
3399 signed char t=get_reg(i_regs->regmap,rt1[i]);
3400 char copr=(source[i]>>11)&0x1f;
3401 if(t>=0) {
3402 emit_writeword_imm((int)&fake_pc,(int)&PC);
3403 emit_writebyte_imm((source[i]>>11)&0x1f,(int)&(fake_pc.f.r.nrd));
3404 if(copr==9) {
3405 emit_readword((int)&last_count,ECX);
3406 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3407 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3408 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3409 emit_writeword(HOST_CCREG,(int)&Count);
3410 }
3411 emit_call((int)cached_interpreter_table.MFC0);
3412 emit_readword((int)&readmem_dword,t);
3413 }
3414 }
3415 }
3416 else if(opcode2[i]==4) // MTC0
3417 {
3418 signed char s=get_reg(i_regs->regmap,rs1[i]);
3419 char copr=(source[i]>>11)&0x1f;
3420 assert(s>=0);
3421 emit_writeword(s,(int)&readmem_dword);
3422 emit_pusha();
3423 emit_writeword_imm((int)&fake_pc,(int)&PC);
3424 emit_writebyte_imm((source[i]>>11)&0x1f,(int)&(fake_pc.f.r.nrd));
3425 if(copr==9||copr==11||copr==12) {
3426 if(copr==12&&!is_delayslot) {
3427 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
3428 }
3429 emit_readword((int)&last_count,ECX);
3430 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3431 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3432 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3433 emit_writeword(HOST_CCREG,(int)&Count);
3434 }
3435 // What a mess. The status register (12) can enable interrupts,
3436 // so needs a special case to handle a pending interrupt.
3437 // The interrupt must be taken immediately, because a subsequent
3438 // instruction might disable interrupts again.
3439 if(copr==12&&!is_delayslot) {
3440 emit_writeword_imm(start+i*4+4,(int)&pcaddr);
3441 emit_writebyte_imm(0,(int)&pending_exception);
3442 }
3443 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3444 //else
3445 emit_call((int)cached_interpreter_table.MTC0);
3446 if(copr==9||copr==11||copr==12) {
3447 emit_readword((int)&Count,HOST_CCREG);
3448 emit_readword((int)&next_interupt,ECX);
3449 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3450 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3451 emit_writeword(ECX,(int)&last_count);
3452 emit_storereg(CCREG,HOST_CCREG);
3453 }
3454 emit_popa();
3455 if(copr==12) {
3456 assert(!is_delayslot);
3457 //if(is_delayslot) output_byte(0xcc);
3458 emit_cmpmem_imm_byte((int)&pending_exception,0);
3459 emit_jne((int)&do_interrupt);
3460 }
3461 cop1_usable=0;
3462 }
3463 else
3464 {
3465 assert(opcode2[i]==0x10);
3466 if((source[i]&0x3f)==0x01) // TLBR
3467 emit_call((int)cached_interpreter_table.TLBR);
3468 if((source[i]&0x3f)==0x02) // TLBWI
3469 emit_call((int)TLBWI_new);
3470 if((source[i]&0x3f)==0x06) { // TLBWR
3471 // The TLB entry written by TLBWR is dependent on the count,
3472 // so update the cycle count
3473 emit_readword((int)&last_count,ECX);
3474 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3475 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3476 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3477 emit_writeword(HOST_CCREG,(int)&Count);
3478 emit_call((int)TLBWR_new);
3479 }
3480 if((source[i]&0x3f)==0x08) // TLBP
3481 emit_call((int)cached_interpreter_table.TLBP);
3482 if((source[i]&0x3f)==0x18) // ERET
3483 {
3484 int count=ccadj[i];
3485 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3486 emit_addimm_and_set_flags(CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
3487 emit_jmp((int)jump_eret);
3488 }
3489 }
3490}
3491
3492static void cop1_assemble(int i,struct regstat *i_regs)
3493{
3494 // Check cop1 unusable
3495 if(!cop1_usable) {
3496 signed char rs=get_reg(i_regs->regmap,CSREG);
3497 assert(rs>=0);
3498 emit_testimm(rs,0x20000000);
3499 int jaddr=(int)out;
3500 emit_jeq(0);
3501 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3502 cop1_usable=1;
3503 }
3504 if (opcode2[i]==0) { // MFC1
3505 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3506 if(tl>=0) {
3507 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
3508 emit_readword_indexed(0,tl,tl);
3509 }
3510 }
3511 else if (opcode2[i]==1) { // DMFC1
3512 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3513 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
3514 if(tl>=0) {
3515 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
3516 if(th>=0) emit_readword_indexed(4,tl,th);
3517 emit_readword_indexed(0,tl,tl);
3518 }
3519 }
3520 else if (opcode2[i]==4) { // MTC1
3521 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3522 signed char temp=get_reg(i_regs->regmap,-1);
3523 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3524 emit_writeword_indexed(sl,0,temp);
3525 }
3526 else if (opcode2[i]==5) { // DMTC1
3527 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3528 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
3529 signed char temp=get_reg(i_regs->regmap,-1);
3530 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3531 emit_writeword_indexed(sh,4,temp);
3532 emit_writeword_indexed(sl,0,temp);
3533 }
3534 else if (opcode2[i]==2) // CFC1
3535 {
3536 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3537 if(tl>=0) {
3538 u_int copr=(source[i]>>11)&0x1f;
3539 if(copr==0) emit_readword((int)&FCR0,tl);
3540 if(copr==31) emit_readword((int)&FCR31,tl);
3541 }
3542 }
3543 else if (opcode2[i]==6) // CTC1
3544 {
3545 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3546 u_int copr=(source[i]>>11)&0x1f;
3547 assert(sl>=0);
3548 if(copr==31)
3549 {
3550 emit_writeword(sl,(int)&FCR31);
3551 // Set the rounding mode
3552 char temp=get_reg(i_regs->regmap,-1);
3553 emit_movimm(3,temp);
3554 emit_and(sl,temp,temp);
3555 emit_fldcw_indexed((int)&rounding_modes,temp);
3556 }
3557 }
3558}
3559
3560static void fconv_assemble_x86(int i,struct regstat *i_regs)
3561{
3562 signed char temp=get_reg(i_regs->regmap,-1);
3563 assert(temp>=0);
3564 // Check cop1 unusable
3565 if(!cop1_usable) {
3566 signed char rs=get_reg(i_regs->regmap,CSREG);
3567 assert(rs>=0);
3568 emit_testimm(rs,0x20000000);
3569 int jaddr=(int)out;
3570 emit_jeq(0);
3571 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3572 cop1_usable=1;
3573 }
3574#ifdef __SSE__
3575 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
3576 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3577 emit_movss_load(temp,0);
3578 emit_cvttps2dq(0,0); // float->int, truncate
3579 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3580 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3581 emit_movd_store(0,temp);
3582 return;
3583 }
3584 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
3585 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3586 emit_movsd_load(temp,0);
3587 emit_cvttpd2dq(0,0); // double->int, truncate
3588 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3589 emit_movd_store(0,temp);
3590 return;
3591 }
3592#endif
3593
3594 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
3595 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3596 emit_fildl(temp);
3597 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3598 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3599 emit_fstps(temp);
3600 return;
3601 }
3602 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
3603 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3604 emit_fildl(temp);
3605 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3606 emit_fstpl(temp);
3607 return;
3608 }
3609 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) { // cvt_s_l
3610 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3611 emit_fildll(temp);
3612 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3613 emit_fstps(temp);
3614 return;
3615 }
3616 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) { // cvt_d_l
3617 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3618 emit_fildll(temp);
3619 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3620 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3621 emit_fstpl(temp);
3622 return;
3623 }
3624
3625 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
3626 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3627 emit_flds(temp);
3628 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3629 emit_fstpl(temp);
3630 return;
3631 }
3632 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
3633 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3634 emit_fldl(temp);
3635 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3636 emit_fstps(temp);
3637 return;
3638 }
3639
3640 if(opcode2[i]==0x10) { // cvt_*_s
3641 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3642 emit_flds(temp);
3643 }
3644 if(opcode2[i]==0x11) { // cvt_*_d
3645 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3646 emit_fldl(temp);
3647 }
3648 if((source[i]&0x3f)<0x10) {
3649 emit_fnstcw_stack();
3650 if((source[i]&3)==0) emit_fldcw((int)&round_mode); //DebugMessage(M64MSG_VERBOSE, "round");
3651 if((source[i]&3)==1) emit_fldcw((int)&trunc_mode); //DebugMessage(M64MSG_VERBOSE, "trunc");
3652 if((source[i]&3)==2) emit_fldcw((int)&ceil_mode); //DebugMessage(M64MSG_VERBOSE, "ceil");
3653 if((source[i]&3)==3) emit_fldcw((int)&floor_mode); //DebugMessage(M64MSG_VERBOSE, "floor");
3654 }
3655 if((source[i]&0x3f)==0x24||(source[i]&0x3c)==0x0c) { // cvt_w_*
3656 if(opcode2[i]!=0x10||((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3657 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3658 emit_fistpl(temp);
3659 }
3660 if((source[i]&0x3f)==0x25||(source[i]&0x3c)==0x08) { // cvt_l_*
3661 if(opcode2[i]!=0x11||((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
3662 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3663 emit_fistpll(temp);
3664 }
3665 if((source[i]&0x3f)<0x10) {
3666 emit_fldcw_stack();
3667 }
3668 return;
3669
3670 // C emulation code for debugging
3671
3672 emit_pusha();
3673
3674 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
3675 emit_pushmem((int)&reg_cop1_simple[(source[i]>> 6)&0x1f]);
3676 emit_pushmem((int)&reg_cop1_simple[(source[i]>>11)&0x1f]);
3677 emit_call((int)cvt_s_w);
3678 }
3679 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
3680 emit_pushmem((int)&reg_cop1_double[(source[i]>> 6)&0x1f]);
3681 emit_pushmem((int)&reg_cop1_simple[(source[i]>>11)&0x1f]);
3682 emit_call((int)cvt_d_w);
3683 }
3684 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
3685 emit_pushmem((int)&reg_cop1_simple[(source[i]>> 6)&0x1f]);
3686 emit_pushmem((int)&reg_cop1_double[(source[i]>>11)&0x1f]);
3687 emit_call((int)cvt_s_l);
3688 }
3689 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
3690 emit_pushmem((int)&reg_cop1_double[(source[i]>> 6)&0x1f]);
3691 emit_pushmem((int)&reg_cop1_double[(source[i]>>11)&0x1f]);
3692 emit_call((int)cvt_d_l);
3693 }
3694
3695 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
3696 emit_pushmem((int)&reg_cop1_double[(source[i]>> 6)&0x1f]);
3697 emit_pushmem((int)&reg_cop1_simple[(source[i]>>11)&0x1f]);
3698 emit_call((int)cvt_d_s);
3699 }
3700 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
3701 emit_pushmem((int)&reg_cop1_simple[(source[i]>> 6)&0x1f]);
3702 emit_pushmem((int)&reg_cop1_simple[(source[i]>>11)&0x1f]);
3703 emit_call((int)cvt_w_s);
3704 }
3705 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
3706 emit_pushmem((int)&reg_cop1_double[(source[i]>> 6)&0x1f]);
3707 emit_pushmem((int)&reg_cop1_simple[(source[i]>>11)&0x1f]);
3708 emit_call((int)cvt_l_s);
3709 }
3710
3711 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
3712 emit_pushmem((int)&reg_cop1_simple[(source[i]>> 6)&0x1f]);
3713 emit_pushmem((int)&reg_cop1_double[(source[i]>>11)&0x1f]);
3714 emit_call((int)cvt_s_d);
3715 }
3716 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
3717 emit_pushmem((int)&reg_cop1_simple[(source[i]>> 6)&0x1f]);
3718 emit_pushmem((int)&reg_cop1_double[(source[i]>>11)&0x1f]);
3719 emit_call((int)cvt_w_d);
3720 }
3721 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
3722 emit_pushmem((int)&reg_cop1_double[(source[i]>> 6)&0x1f]);
3723 emit_pushmem((int)&reg_cop1_double[(source[i]>>11)&0x1f]);
3724 emit_call((int)cvt_l_d);
3725 }
3726
3727 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
3728 emit_pushmem((int)&reg_cop1_double[(source[i]>> 6)&0x1f]);
3729 emit_pushmem((int)&reg_cop1_simple[(source[i]>>11)&0x1f]);
3730 emit_call((int)round_l_s);
3731 }
3732 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
3733 emit_pushmem((int)&reg_cop1_double[(source[i]>> 6)&0x1f]);
3734 emit_pushmem((int)&reg_cop1_simple[(source[i]>>11)&0x1f]);
3735 emit_call((int)trunc_l_s);
3736 }
3737 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
3738 emit_pushmem((int)&reg_cop1_double[(source[i]>> 6)&0x1f]);
3739 emit_pushmem((int)&reg_cop1_simple[(source[i]>>11)&0x1f]);
3740 emit_call((int)ceil_l_s);
3741 }
3742 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
3743 emit_pushmem((int)&reg_cop1_double[(source[i]>> 6)&0x1f]);
3744 emit_pushmem((int)&reg_cop1_simple[(source[i]>>11)&0x1f]);
3745 emit_call((int)floor_l_s);
3746 }
3747 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
3748 emit_pushmem((int)&reg_cop1_simple[(source[i]>> 6)&0x1f]);
3749 emit_pushmem((int)&reg_cop1_simple[(source[i]>>11)&0x1f]);
3750 emit_call((int)round_w_s);
3751 }
3752 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
3753 emit_pushmem((int)&reg_cop1_simple[(source[i]>> 6)&0x1f]);
3754 emit_pushmem((int)&reg_cop1_simple[(source[i]>>11)&0x1f]);
3755 emit_call((int)trunc_w_s);
3756 }
3757 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
3758 emit_pushmem((int)&reg_cop1_simple[(source[i]>> 6)&0x1f]);
3759 emit_pushmem((int)&reg_cop1_simple[(source[i]>>11)&0x1f]);
3760 emit_call((int)ceil_w_s);
3761 }
3762 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
3763 emit_pushmem((int)&reg_cop1_simple[(source[i]>> 6)&0x1f]);
3764 emit_pushmem((int)&reg_cop1_simple[(source[i]>>11)&0x1f]);
3765 emit_call((int)floor_w_s);
3766 }
3767
3768 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
3769 emit_pushmem((int)&reg_cop1_double[(source[i]>> 6)&0x1f]);
3770 emit_pushmem((int)&reg_cop1_double[(source[i]>>11)&0x1f]);
3771 emit_call((int)round_l_d);
3772 }
3773 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
3774 emit_pushmem((int)&reg_cop1_double[(source[i]>> 6)&0x1f]);
3775 emit_pushmem((int)&reg_cop1_double[(source[i]>>11)&0x1f]);
3776 emit_call((int)trunc_l_d);
3777 }
3778 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
3779 emit_pushmem((int)&reg_cop1_double[(source[i]>> 6)&0x1f]);
3780 emit_pushmem((int)&reg_cop1_double[(source[i]>>11)&0x1f]);
3781 emit_call((int)ceil_l_d);
3782 }
3783 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
3784 emit_pushmem((int)&reg_cop1_double[(source[i]>> 6)&0x1f]);
3785 emit_pushmem((int)&reg_cop1_double[(source[i]>>11)&0x1f]);
3786 emit_call((int)floor_l_d);
3787 }
3788 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
3789 emit_pushmem((int)&reg_cop1_simple[(source[i]>> 6)&0x1f]);
3790 emit_pushmem((int)&reg_cop1_double[(source[i]>>11)&0x1f]);
3791 emit_call((int)round_w_d);
3792 }
3793 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
3794 emit_pushmem((int)&reg_cop1_simple[(source[i]>> 6)&0x1f]);
3795 emit_pushmem((int)&reg_cop1_double[(source[i]>>11)&0x1f]);
3796 emit_call((int)trunc_w_d);
3797 }
3798 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
3799 emit_pushmem((int)&reg_cop1_simple[(source[i]>> 6)&0x1f]);
3800 emit_pushmem((int)&reg_cop1_double[(source[i]>>11)&0x1f]);
3801 emit_call((int)ceil_w_d);
3802 }
3803 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
3804 emit_pushmem((int)&reg_cop1_simple[(source[i]>> 6)&0x1f]);
3805 emit_pushmem((int)&reg_cop1_double[(source[i]>>11)&0x1f]);
3806 emit_call((int)floor_w_d);
3807 }
3808
3809 emit_addimm(ESP,8,ESP);
3810 emit_popa();
3811 //emit_loadreg(CSREG,rs);
3812 return;
3813}
3814#define fconv_assemble fconv_assemble_x86
3815
3816static void fcomp_assemble(int i,struct regstat *i_regs)
3817{
3818 signed char fs=get_reg(i_regs->regmap,FSREG);
3819 signed char temp=get_reg(i_regs->regmap,-1);
3820 assert(temp>=0);
3821 // Check cop1 unusable
3822 if(!cop1_usable) {
3823 signed char cs=get_reg(i_regs->regmap,CSREG);
3824 assert(cs>=0);
3825 emit_testimm(cs,0x20000000);
3826 int jaddr=(int)out;
3827 emit_jeq(0);
3828 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
3829 cop1_usable=1;
3830 }
3831
3832 if((source[i]&0x3f)==0x30) {
3833 emit_andimm(fs,~0x800000,fs);
3834 return;
3835 }
3836
3837 if((source[i]&0x3e)==0x38) {
3838 // sf/ngle - these should throw exceptions for NaNs
3839 emit_andimm(fs,~0x800000,fs);
3840 return;
3841 }
3842
3843 if(opcode2[i]==0x10) {
3844 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],temp);
3845 emit_flds(temp);
3846 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3847 emit_flds(temp);
3848 emit_movimm(0x800000,temp);
3849 emit_or(fs,temp,fs);
3850 emit_xor(temp,fs,temp);
3851 emit_fucomip(1);
3852 emit_fpop();
3853 if((source[i]&0x3f)==0x31) emit_cmovnp_reg(temp,fs); // c_un_s
3854 if((source[i]&0x3f)==0x32) {emit_cmovne_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_eq_s
3855 if((source[i]&0x3f)==0x33) emit_cmovne_reg(temp,fs); // c_ueq_s
3856 if((source[i]&0x3f)==0x34) {emit_cmovnc_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_olt_s
3857 if((source[i]&0x3f)==0x35) emit_cmovnc_reg(temp,fs); // c_ult_s
3858 if((source[i]&0x3f)==0x36) {emit_cmova_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_ole_s
3859 if((source[i]&0x3f)==0x37) emit_cmova_reg(temp,fs); // c_ule_s
3860 if((source[i]&0x3f)==0x3a) emit_cmovne_reg(temp,fs); // c_seq_s
3861 if((source[i]&0x3f)==0x3b) emit_cmovne_reg(temp,fs); // c_ngl_s
3862 if((source[i]&0x3f)==0x3c) emit_cmovnc_reg(temp,fs); // c_lt_s
3863 if((source[i]&0x3f)==0x3d) emit_cmovnc_reg(temp,fs); // c_nge_s
3864 if((source[i]&0x3f)==0x3e) emit_cmova_reg(temp,fs); // c_le_s
3865 if((source[i]&0x3f)==0x3f) emit_cmova_reg(temp,fs); // c_ngt_s
3866 return;
3867 }
3868 if(opcode2[i]==0x11) {
3869 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],temp);
3870 emit_fldl(temp);
3871 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3872 emit_fldl(temp);
3873 emit_movimm(0x800000,temp);
3874 emit_or(fs,temp,fs);
3875 emit_xor(temp,fs,temp);
3876 emit_fucomip(1);
3877 emit_fpop();
3878 if((source[i]&0x3f)==0x31) emit_cmovnp_reg(temp,fs); // c_un_d
3879 if((source[i]&0x3f)==0x32) {emit_cmovne_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_eq_d
3880 if((source[i]&0x3f)==0x33) emit_cmovne_reg(temp,fs); // c_ueq_d
3881 if((source[i]&0x3f)==0x34) {emit_cmovnc_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_olt_d
3882 if((source[i]&0x3f)==0x35) emit_cmovnc_reg(temp,fs); // c_ult_d
3883 if((source[i]&0x3f)==0x36) {emit_cmova_reg(temp,fs);emit_cmovp_reg(temp,fs);} // c_ole_d
3884 if((source[i]&0x3f)==0x37) emit_cmova_reg(temp,fs); // c_ule_d
3885 if((source[i]&0x3f)==0x3a) emit_cmovne_reg(temp,fs); // c_seq_d
3886 if((source[i]&0x3f)==0x3b) emit_cmovne_reg(temp,fs); // c_ngl_d
3887 if((source[i]&0x3f)==0x3c) emit_cmovnc_reg(temp,fs); // c_lt_d
3888 if((source[i]&0x3f)==0x3d) emit_cmovnc_reg(temp,fs); // c_nge_d
3889 if((source[i]&0x3f)==0x3e) emit_cmova_reg(temp,fs); // c_le_d
3890 if((source[i]&0x3f)==0x3f) emit_cmova_reg(temp,fs); // c_ngt_d
3891 return;
3892 }
3893
3894 emit_pusha();
3895 if(opcode2[i]==0x10) {
3896 emit_pushmem((int)&reg_cop1_simple[(source[i]>>16)&0x1f]);
3897 emit_pushmem((int)&reg_cop1_simple[(source[i]>>11)&0x1f]);
3898 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
3899 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
3900 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
3901 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
3902 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
3903 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
3904 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
3905 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
3906 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
3907 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
3908 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
3909 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
3910 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
3911 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
3912 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
3913 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
3914 }
3915 if(opcode2[i]==0x11) {
3916 emit_pushmem((int)&reg_cop1_double[(source[i]>>16)&0x1f]);
3917 emit_pushmem((int)&reg_cop1_double[(source[i]>>11)&0x1f]);
3918 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
3919 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
3920 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
3921 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
3922 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
3923 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
3924 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
3925 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
3926 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
3927 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
3928 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
3929 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
3930 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
3931 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
3932 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
3933 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
3934 }
3935 emit_addimm(ESP,8,ESP);
3936 emit_popa();
3937 emit_loadreg(FSREG,fs);
3938 return;
3939}
3940
3941static void float_assemble(int i,struct regstat *i_regs)
3942{
3943 signed char temp=get_reg(i_regs->regmap,-1);
3944 assert(temp>=0);
3945 // Check cop1 unusable
3946 if(!cop1_usable) {
3947 signed char cs=get_reg(i_regs->regmap,CSREG);
3948 assert(cs>=0);
3949 emit_testimm(cs,0x20000000);
3950 int jaddr=(int)out;
3951 emit_jeq(0);
3952 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
3953 cop1_usable=1;
3954 }
3955
3956 if((source[i]&0x3f)==6) // mov
3957 {
3958 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3959 if(opcode2[i]==0x10) {
3960 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3961 emit_flds(temp);
3962 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3963 emit_fstps(temp);
3964 }
3965 if(opcode2[i]==0x11) {
3966 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3967 emit_fldl(temp);
3968 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3969 emit_fstpl(temp);
3970 }
3971 }
3972 return;
3973 }
3974
3975 if((source[i]&0x3f)>3)
3976 {
3977 if(opcode2[i]==0x10) {
3978 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
3979 emit_flds(temp);
3980 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3981 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
3982 }
3983 }
3984 if(opcode2[i]==0x11) {
3985 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
3986 emit_fldl(temp);
3987 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
3988 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
3989 }
3990 }
3991 if((source[i]&0x3f)==4) // sqrt
3992 emit_fsqrt();
3993 if((source[i]&0x3f)==5) // abs
3994 emit_fabs();
3995 if((source[i]&0x3f)==7) // neg
3996 emit_fchs();
3997 if(opcode2[i]==0x10) {
3998 emit_fstps(temp);
3999 }
4000 if(opcode2[i]==0x11) {
4001 emit_fstpl(temp);
4002 }
4003 return;
4004 }
4005 if((source[i]&0x3f)<4)
4006 {
4007 if(opcode2[i]==0x10) {
4008 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4009 emit_flds(temp);
4010 }
4011 if(opcode2[i]==0x11) {
4012 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4013 emit_fldl(temp);
4014 }
4015 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
4016 if(opcode2[i]==0x10) {
4017 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],temp);
4018 if((source[i]&0x3f)==0) emit_fadds(temp);
4019 if((source[i]&0x3f)==1) emit_fsubs(temp);
4020 if((source[i]&0x3f)==2) emit_fmuls(temp);
4021 if((source[i]&0x3f)==3) emit_fdivs(temp);
4022 }
4023 else if(opcode2[i]==0x11) {
4024 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],temp);
4025 if((source[i]&0x3f)==0) emit_faddl(temp);
4026 if((source[i]&0x3f)==1) emit_fsubl(temp);
4027 if((source[i]&0x3f)==2) emit_fmull(temp);
4028 if((source[i]&0x3f)==3) emit_fdivl(temp);
4029 }
4030 }
4031 else {
4032 if((source[i]&0x3f)==0) emit_fadd(0);
4033 if((source[i]&0x3f)==1) emit_fsub(0);
4034 if((source[i]&0x3f)==2) emit_fmul(0);
4035 if((source[i]&0x3f)==3) emit_fdiv(0);
4036 }
4037 if(opcode2[i]==0x10) {
4038 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4039 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4040 }
4041 emit_fstps(temp);
4042 }
4043 if(opcode2[i]==0x11) {
4044 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
4045 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4046 }
4047 emit_fstpl(temp);
4048 }
4049 return;
4050 }
4051
4052 if(opcode2[i]==0x10) { // Single precision
4053 emit_pusha();
4054 emit_pushmem((int)&reg_cop1_simple[(source[i]>> 6)&0x1f]);
4055 if((source[i]&0x3f)<4)
4056 emit_pushmem((int)&reg_cop1_simple[(source[i]>>16)&0x1f]);
4057 emit_pushmem((int)&reg_cop1_simple[(source[i]>>11)&0x1f]);
4058 switch(source[i]&0x3f)
4059 {
4060 case 0x00: emit_call((int)add_s);break;
4061 case 0x01: emit_call((int)sub_s);break;
4062 case 0x02: emit_call((int)mul_s);break;
4063 case 0x03: emit_call((int)div_s);break;
4064 case 0x04: emit_call((int)sqrt_s);break;
4065 case 0x05: emit_call((int)abs_s);break;
4066 case 0x06: emit_call((int)mov_s);break;
4067 case 0x07: emit_call((int)neg_s);break;
4068 }
4069 emit_addimm(ESP,(source[i]&0x3f)<4?12:8,ESP);
4070 emit_popa();
4071 }
4072 if(opcode2[i]==0x11) { // Double precision
4073 emit_pusha();
4074 emit_pushmem((int)&reg_cop1_double[(source[i]>> 6)&0x1f]);
4075 if((source[i]&0x3f)<4)
4076 emit_pushmem((int)&reg_cop1_double[(source[i]>>16)&0x1f]);
4077 emit_pushmem((int)&reg_cop1_double[(source[i]>>11)&0x1f]);
4078 switch(source[i]&0x3f)
4079 {
4080 case 0x00: emit_call((int)add_d);break;
4081 case 0x01: emit_call((int)sub_d);break;
4082 case 0x02: emit_call((int)mul_d);break;
4083 case 0x03: emit_call((int)div_d);break;
4084 case 0x04: emit_call((int)sqrt_d);break;
4085 case 0x05: emit_call((int)abs_d);break;
4086 case 0x06: emit_call((int)mov_d);break;
4087 case 0x07: emit_call((int)neg_d);break;
4088 }
4089 emit_addimm(ESP,(source[i]&0x3f)<4?12:8,ESP);
4090 emit_popa();
4091 }
4092}
4093
4094static void multdiv_assemble_x86(int i,struct regstat *i_regs)
4095{
4096 // case 0x18: MULT
4097 // case 0x19: MULTU
4098 // case 0x1A: DIV
4099 // case 0x1B: DIVU
4100 // case 0x1C: DMULT
4101 // case 0x1D: DMULTU
4102 // case 0x1E: DDIV
4103 // case 0x1F: DDIVU
4104 if(rs1[i]&&rs2[i])
4105 {
4106 if((opcode2[i]&4)==0) // 32-bit
4107 {
4108 if(opcode2[i]==0x18) // MULT
4109 {
4110 char m1=get_reg(i_regs->regmap,rs1[i]);
4111 char m2=get_reg(i_regs->regmap,rs2[i]);
4112 assert(m1>=0);
4113 assert(m2>=0);
4114 emit_mov(m1,EAX);
4115 emit_imul(m2);
4116 }
4117 if(opcode2[i]==0x19) // MULTU
4118 {
4119 char m1=get_reg(i_regs->regmap,rs1[i]);
4120 char m2=get_reg(i_regs->regmap,rs2[i]);
4121 assert(m1>=0);
4122 assert(m2>=0);
4123 emit_mov(m1,EAX);
4124 emit_mul(m2);
4125 }
4126 if(opcode2[i]==0x1A) // DIV
4127 {
4128 char d1=get_reg(i_regs->regmap,rs1[i]);
4129 char d2=get_reg(i_regs->regmap,rs2[i]);
4130 assert(d1>=0);
4131 assert(d2>=0);
4132 emit_mov(d1,EAX);
4133 emit_cdq();
4134 emit_test(d2,d2);
4135 emit_jeq((int)out+8);
4136 emit_idiv(d2);
4137 }
4138 if(opcode2[i]==0x1B) // DIVU
4139 {
4140 char d1=get_reg(i_regs->regmap,rs1[i]);
4141 char d2=get_reg(i_regs->regmap,rs2[i]);
4142 assert(d1>=0);
4143 assert(d2>=0);
4144 emit_mov(d1,EAX);
4145 emit_zeroreg(EDX);
4146 emit_test(d2,d2);
4147 emit_jeq((int)out+8);
4148 emit_div(d2);
4149 }
4150 }
4151 else // 64-bit
4152 {
4153 if(opcode2[i]==0x1C) // DMULT
4154 {
4155 char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4156 char m1l=get_reg(i_regs->regmap,rs1[i]);
4157 char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4158 char m2l=get_reg(i_regs->regmap,rs2[i]);
4159 assert(m1h>=0);
4160 assert(m2h>=0);
4161 assert(m1l>=0);
4162 assert(m2l>=0);
4163 emit_pushreg(m2h);
4164 emit_pushreg(m2l);
4165 emit_pushreg(m1h);
4166 emit_pushreg(m1l);
4167 emit_call((int)&mult64);
4168 emit_popreg(m1l);
4169 emit_popreg(m1h);
4170 emit_popreg(m2l);
4171 emit_popreg(m2h);
4172 char hih=get_reg(i_regs->regmap,HIREG|64);
4173 char hil=get_reg(i_regs->regmap,HIREG);
4174 if(hih>=0) emit_loadreg(HIREG|64,hih);
4175 if(hil>=0) emit_loadreg(HIREG,hil);
4176 char loh=get_reg(i_regs->regmap,LOREG|64);
4177 char lol=get_reg(i_regs->regmap,LOREG);
4178 if(loh>=0) emit_loadreg(LOREG|64,loh);
4179 if(lol>=0) emit_loadreg(LOREG,lol);
4180 }
4181 if(opcode2[i]==0x1D) // DMULTU
4182 {
4183 char m1h=get_reg(i_regs->regmap,rs1[i]|64);
4184 char m1l=get_reg(i_regs->regmap,rs1[i]);
4185 char m2h=get_reg(i_regs->regmap,rs2[i]|64);
4186 char m2l=get_reg(i_regs->regmap,rs2[i]);
4187 char temp=get_reg(i_regs->regmap,-1);
4188 assert(m1h>=0);
4189 assert(m2h>=0);
4190 assert(m1l>=0);
4191 assert(m2l>=0);
4192 assert(temp>=0);
4193 emit_mov(m1l,EAX);
4194 emit_mul(m2l);
4195 emit_storereg(LOREG,EAX);
4196 emit_mov(EDX,temp);
4197 emit_mov(m1h,EAX);
4198 emit_mul(m2l);
4199 emit_add(EAX,temp,temp);
4200 emit_adcimm(0,EDX);
4201 emit_storereg(HIREG,EDX);
4202 emit_mov(m2h,EAX);
4203 emit_mul(m1l);
4204 emit_add(EAX,temp,temp);
4205 emit_adcimm(0,EDX);
4206 emit_storereg(LOREG|64,temp);
4207 emit_mov(EDX,temp);
4208 emit_mov(m2h,EAX);
4209 emit_mul(m1h);
4210 emit_add(EAX,temp,EAX);
4211 emit_loadreg(HIREG,temp);
4212 emit_adcimm(0,EDX);
4213 emit_add(EAX,temp,EAX);
4214 emit_adcimm(0,EDX);
4215 // DEBUG
4216 /*
4217 emit_pushreg(m2h);
4218 emit_pushreg(m2l);
4219 emit_pushreg(m1h);
4220 emit_pushreg(m1l);
4221 emit_call((int)&multu64);
4222 emit_popreg(m1l);
4223 emit_popreg(m1h);
4224 emit_popreg(m2l);
4225 emit_popreg(m2h);
4226 char hih=get_reg(i_regs->regmap,HIREG|64);
4227 char hil=get_reg(i_regs->regmap,HIREG);
4228 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
4229 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
4230 */
4231 // Shouldn't be necessary
4232 //char loh=get_reg(i_regs->regmap,LOREG|64);
4233 //char lol=get_reg(i_regs->regmap,LOREG);
4234 //if(loh>=0) emit_loadreg(LOREG|64,loh);
4235 //if(lol>=0) emit_loadreg(LOREG,lol);
4236 }
4237 if(opcode2[i]==0x1E) // DDIV
4238 {
4239 char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4240 char d1l=get_reg(i_regs->regmap,rs1[i]);
4241 char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4242 char d2l=get_reg(i_regs->regmap,rs2[i]);
4243 assert(d1h>=0);
4244 assert(d2h>=0);
4245 assert(d1l>=0);
4246 assert(d2l>=0);
4247 //emit_pushreg(d2h);
4248 //emit_pushreg(d2l);
4249 //emit_pushreg(d1h);
4250 //emit_pushreg(d1l);
4251 emit_addimm(ESP,-16,ESP);
4252 emit_writeword_indexed(d2h,12,ESP);
4253 emit_writeword_indexed(d2l,8,ESP);
4254 emit_writeword_indexed(d1h,4,ESP);
4255 emit_writeword_indexed(d1l,0,ESP);
4256 emit_call((int)&div64);
4257 //emit_popreg(d1l);
4258 //emit_popreg(d1h);
4259 //emit_popreg(d2l);
4260 //emit_popreg(d2h);
4261 emit_readword_indexed(0,ESP,d1l);
4262 emit_readword_indexed(4,ESP,d1h);
4263 emit_readword_indexed(8,ESP,d2l);
4264 emit_readword_indexed(12,ESP,d2h);
4265 emit_addimm(ESP,16,ESP);
4266 char hih=get_reg(i_regs->regmap,HIREG|64);
4267 char hil=get_reg(i_regs->regmap,HIREG);
4268 char loh=get_reg(i_regs->regmap,LOREG|64);
4269 char lol=get_reg(i_regs->regmap,LOREG);
4270 if(hih>=0) emit_loadreg(HIREG|64,hih);
4271 if(hil>=0) emit_loadreg(HIREG,hil);
4272 if(loh>=0) emit_loadreg(LOREG|64,loh);
4273 if(lol>=0) emit_loadreg(LOREG,lol);
4274 }
4275 if(opcode2[i]==0x1F) // DDIVU
4276 {
4277 char d1h=get_reg(i_regs->regmap,rs1[i]|64);
4278 char d1l=get_reg(i_regs->regmap,rs1[i]);
4279 char d2h=get_reg(i_regs->regmap,rs2[i]|64);
4280 char d2l=get_reg(i_regs->regmap,rs2[i]);
4281 assert(d1h>=0);
4282 assert(d2h>=0);
4283 assert(d1l>=0);
4284 assert(d2l>=0);
4285 //emit_pushreg(d2h);
4286 //emit_pushreg(d2l);
4287 //emit_pushreg(d1h);
4288 //emit_pushreg(d1l);
4289 emit_addimm(ESP,-16,ESP);
4290 emit_writeword_indexed(d2h,12,ESP);
4291 emit_writeword_indexed(d2l,8,ESP);
4292 emit_writeword_indexed(d1h,4,ESP);
4293 emit_writeword_indexed(d1l,0,ESP);
4294 emit_call((int)&divu64);
4295 //emit_popreg(d1l);
4296 //emit_popreg(d1h);
4297 //emit_popreg(d2l);
4298 //emit_popreg(d2h);
4299 emit_readword_indexed(0,ESP,d1l);
4300 emit_readword_indexed(4,ESP,d1h);
4301 emit_readword_indexed(8,ESP,d2l);
4302 emit_readword_indexed(12,ESP,d2h);
4303 emit_addimm(ESP,16,ESP);
4304 char hih=get_reg(i_regs->regmap,HIREG|64);
4305 char hil=get_reg(i_regs->regmap,HIREG);
4306 char loh=get_reg(i_regs->regmap,LOREG|64);
4307 char lol=get_reg(i_regs->regmap,LOREG);
4308 if(hih>=0) emit_loadreg(HIREG|64,hih);
4309 if(hil>=0) emit_loadreg(HIREG,hil);
4310 if(loh>=0) emit_loadreg(LOREG|64,loh);
4311 if(lol>=0) emit_loadreg(LOREG,lol);
4312 }
4313 }
4314 }
4315 else
4316 {
4317 // Multiply by zero is zero.
4318 // MIPS does not have a divide by zero exception.
4319 // The result is undefined, we return zero.
4320 char hr=get_reg(i_regs->regmap,HIREG);
4321 char lr=get_reg(i_regs->regmap,LOREG);
4322 if(hr>=0) emit_zeroreg(hr);
4323 if(lr>=0) emit_zeroreg(lr);
4324 }
4325}
4326#define multdiv_assemble multdiv_assemble_x86
4327
4328static void do_preload_rhash(int r) {
4329 emit_movimm(0xf8,r);
4330}
4331
4332static void do_preload_rhtbl(int r) {
4333 // Don't need this for x86
4334}
4335
4336static void do_rhash(int rs,int rh) {
4337 emit_and(rs,rh,rh);
4338}
4339
4340static void do_miniht_load(int ht,int rh) {
4341 // Don't need this for x86. The load and compare can be combined into
4342 // a single instruction (below)
4343}
4344
4345static void do_miniht_jump(int rs,int rh,int ht) {
4346 emit_cmpmem_indexed((int)mini_ht,rh,rs);
4347 emit_jne(jump_vaddr_reg[rs]);
4348 emit_jmpmem_indexed((int)mini_ht+4,rh);
4349}
4350
4351static void do_miniht_insert(int return_address,int rt,int temp) {
4352 emit_movimm(return_address,rt); // PC into link register
4353 //emit_writeword_imm(return_address,(int)&mini_ht[(return_address&0xFF)>>8][0]);
4354 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4355 add_to_linker((int)out,return_address,1);
4356 emit_writeword_imm(0,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4357}
4358
4359// We don't need this for x86
4360static void literal_pool(int n) {}
4361static void literal_pool_jumpover(int n) {}
4362
4363// CPU-architecture-specific initialization, not needed for x86
4364static void arch_init() {}