add unmodified Ari64 drc to track it's changes
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_x64.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - assem_x64.c *
3 * Copyright (C) 2009-2010 Ari64 *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21int cycle_count;
22int last_count;
23int pcaddr;
24int pending_exception;
25int branch_target;
26uint64_t readmem_dword;
27precomp_instr fake_pc;
28u_int memory_map[1048576];
29u_int mini_ht[32][2] __attribute__((aligned(8)));
30u_char restore_candidate[512] __attribute__((aligned(4)));
31
32void do_interrupt();
33void jump_vaddr_eax();
34void jump_vaddr_ecx();
35void jump_vaddr_edx();
36void jump_vaddr_ebx();
37void jump_vaddr_ebp();
38void jump_vaddr_edi();
39
40const void * jump_vaddr_reg[8] = {
41 jump_vaddr_eax,
42 jump_vaddr_ecx,
43 jump_vaddr_edx,
44 jump_vaddr_ebx,
45 0,
46 jump_vaddr_ebp,
47 0,
48 jump_vaddr_edi };
49
50const u_short rounding_modes[4] = {
51 0x33F, // round
52 0xF3F, // trunc
53 0xB3F, // ceil
54 0x73F};// floor
55
56#include "fpu.h"
57
58// We need these for cmovcc instructions on x86
59u_int const_zero=0;
60u_int const_one=1;
61
62/* Linker */
63
64void set_jump_target(int addr,int target)
65{
66 u_char *ptr=(u_char *)addr;
67 if(*ptr==0x0f)
68 {
69 assert(ptr[1]>=0x80&&ptr[1]<=0x8f);
70 u_int *ptr2=(u_int *)(ptr+2);
71 *ptr2=target-(int)ptr2-4;
72 }
73 else if(*ptr==0xe8||*ptr==0xe9) {
74 u_int *ptr2=(u_int *)(ptr+1);
75 *ptr2=target-(int)ptr2-4;
76 }
77 else
78 {
79 assert(*ptr==0xc7); /* mov immediate (store address) */
80 u_int *ptr2=(u_int *)(ptr+6);
81 *ptr2=target;
82 }
83}
84
85void kill_pointer(void *stub)
86{
87 int i_ptr=*((int *)(stub+6));
88 *((int *)i_ptr)=(int)stub-(int)i_ptr-4;
89}
90int get_pointer(void *stub)
91{
92 int i_ptr=*((int *)(stub+6));
93 return *((int *)i_ptr)+(int)i_ptr+4;
94}
95
96// Find the "clean" entry point from a "dirty" entry point
97// by skipping past the call to verify_code
98u_int get_clean_addr(int addr)
99{
100 u_char *ptr=(u_char *)addr;
101 assert(ptr[21]==0xE8); // call instruction
102 if(ptr[26]==0xE9) return *(u_int *)(ptr+27)+addr+31; // follow jmp
103 else return(addr+26);
104}
105
106int verify_dirty(int addr)
107{
108 u_char *ptr=(u_char *)addr;
109 assert(ptr[0]==0xB8);
110 u_int source=*(u_int *)(ptr+1);
111 u_int copy=*(u_int *)(ptr+6);
112 u_int len=*(u_int *)(ptr+11);
113 //printf("source=%x source-rdram=%x\n",source,source-(int)rdram);
114 assert(ptr[21]==0xE8); // call instruction
115 u_int verifier=*(u_int *)(ptr+22)+(u_int)ptr+26;
116 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
117 unsigned int page=source>>12;
118 unsigned int map_value=memory_map[page];
119 if(map_value>=0x80000000) return 0;
120 while(page<((source+len-1)>>12)) {
121 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
122 }
123 source = source+(map_value<<2);
124 }
125 //printf("verify_dirty: %x %x %x\n",source,copy,len);
126 return !memcmp((void *)source,(void *)copy,len);
127}
128
129// This doesn't necessarily find all clean entry points, just
130// guarantees that it's not dirty
131int isclean(int addr)
132{
133 u_char *ptr=(u_char *)addr;
134 if(ptr[0]!=0xB8) return 1; // mov imm,%eax
135 if(ptr[5]!=0xBB) return 1; // mov imm,%ebx
136 if(ptr[10]!=0xB9) return 1; // mov imm,%ecx
137 if(ptr[15]!=0x41) return 1; // rex prefix
138 if(ptr[16]!=0xBC) return 1; // mov imm,%r12d
139 if(ptr[21]!=0xE8) return 1; // call instruction
140 return 0;
141}
142
143void get_bounds(int addr,u_int *start,u_int *end)
144{
145 u_char *ptr=(u_char *)addr;
146 assert(ptr[0]==0xB8);
147 u_int source=*(u_int *)(ptr+1);
148 //u_int copy=*(u_int *)(ptr+6);
149 u_int len=*(u_int *)(ptr+11);
150 assert(ptr[21]==0xE8); // call instruction
151 u_int verifier=*(u_int *)(ptr+22)+(u_int)ptr+26;
152 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
153 if(memory_map[source>>12]>=0x80000000) source = 0;
154 else source = source+(memory_map[source>>12]<<2);
155 }
156 *start=source;
157 *end=source+len;
158}
159
160/* Register allocation */
161
162// Note: registers are allocated clean (unmodified state)
163// if you intend to modify the register, you must call dirty_reg().
164void alloc_reg(struct regstat *cur,int i,signed char reg)
165{
166 int r,hr;
167 int preferred_reg = (reg&3)+(reg>28)*4-(reg==32)+2*(reg==36)-(reg==40);
168
169 // Don't allocate unused registers
170 if((cur->u>>reg)&1) return;
171
172 // see if it's already allocated
173 for(hr=0;hr<HOST_REGS;hr++)
174 {
175 if(cur->regmap[hr]==reg) return;
176 }
177
178 // Keep the same mapping if the register was already allocated in a loop
179 preferred_reg = loop_reg(i,reg,preferred_reg);
180
181 // Try to allocate the preferred register
182 if(cur->regmap[preferred_reg]==-1) {
183 cur->regmap[preferred_reg]=reg;
184 cur->dirty&=~(1<<preferred_reg);
185 cur->isconst&=~(1<<preferred_reg);
186 return;
187 }
188 r=cur->regmap[preferred_reg];
189 if(r<64&&((cur->u>>r)&1)) {
190 cur->regmap[preferred_reg]=reg;
191 cur->dirty&=~(1<<preferred_reg);
192 cur->isconst&=~(1<<preferred_reg);
193 return;
194 }
195 if(r>=64&&((cur->uu>>(r&63))&1)) {
196 cur->regmap[preferred_reg]=reg;
197 cur->dirty&=~(1<<preferred_reg);
198 cur->isconst&=~(1<<preferred_reg);
199 return;
200 }
201
202 // Try to allocate EAX, EBX, ECX, or EDX
203 // We prefer these because they can do byte and halfword loads
204 for(hr=0;hr<4;hr++) {
205 if(cur->regmap[hr]==-1) {
206 cur->regmap[hr]=reg;
207 cur->dirty&=~(1<<hr);
208 cur->isconst&=~(1<<hr);
209 return;
210 }
211 }
212
213 // Clear any unneeded registers
214 // We try to keep the mapping consistent, if possible, because it
215 // makes branches easier (especially loops). So we try to allocate
216 // first (see above) before removing old mappings. If this is not
217 // possible then go ahead and clear out the registers that are no
218 // longer needed.
219 for(hr=0;hr<HOST_REGS;hr++)
220 {
221 r=cur->regmap[hr];
222 if(r>=0) {
223 if(r<64) {
224 if((cur->u>>r)&1)
225 if(i==0||(unneeded_reg[i-1]>>r)&1) {cur->regmap[hr]=-1;break;}
226 }
227 else
228 {
229 if((cur->uu>>(r&63))&1)
230 if(i==0||(unneeded_reg_upper[i-1]>>(r&63))&1) {cur->regmap[hr]=-1;break;}
231 }
232 }
233 }
234 // Try to allocate any available register, but prefer
235 // registers that have not been used recently.
236 if(i>0) {
237 for(hr=0;hr<HOST_REGS;hr++) {
238 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
239 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
240 cur->regmap[hr]=reg;
241 cur->dirty&=~(1<<hr);
242 cur->isconst&=~(1<<hr);
243 return;
244 }
245 }
246 }
247 }
248 // Try to allocate any available register
249 for(hr=0;hr<HOST_REGS;hr++) {
250 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
251 cur->regmap[hr]=reg;
252 cur->dirty&=~(1<<hr);
253 cur->isconst&=~(1<<hr);
254 return;
255 }
256 }
257
258 // Ok, now we have to evict someone
259 // Pick a register we hopefully won't need soon
260 u_char hsn[MAXREG+1];
261 memset(hsn,10,sizeof(hsn));
262 int j;
263 lsn(hsn,i,&preferred_reg);
264 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
265 if(i>0) {
266 // Don't evict the cycle count at entry points, otherwise the entry
267 // stub will have to write it.
268 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
269 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
270 for(j=10;j>=3;j--)
271 {
272 // Alloc preferred register if available
273 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
274 for(hr=0;hr<HOST_REGS;hr++) {
275 // Evict both parts of a 64-bit register
276 if((cur->regmap[hr]&63)==r) {
277 cur->regmap[hr]=-1;
278 cur->dirty&=~(1<<hr);
279 cur->isconst&=~(1<<hr);
280 }
281 }
282 cur->regmap[preferred_reg]=reg;
283 return;
284 }
285 for(r=1;r<=MAXREG;r++)
286 {
287 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
288 for(hr=0;hr<HOST_REGS;hr++) {
289 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
290 if(cur->regmap[hr]==r+64) {
291 cur->regmap[hr]=reg;
292 cur->dirty&=~(1<<hr);
293 cur->isconst&=~(1<<hr);
294 return;
295 }
296 }
297 }
298 for(hr=0;hr<HOST_REGS;hr++) {
299 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
300 if(cur->regmap[hr]==r) {
301 cur->regmap[hr]=reg;
302 cur->dirty&=~(1<<hr);
303 cur->isconst&=~(1<<hr);
304 return;
305 }
306 }
307 }
308 }
309 }
310 }
311 }
312 for(j=10;j>=0;j--)
313 {
314 for(r=1;r<=MAXREG;r++)
315 {
316 if(hsn[r]==j) {
317 for(hr=0;hr<HOST_REGS;hr++) {
318 if(cur->regmap[hr]==r+64) {
319 cur->regmap[hr]=reg;
320 cur->dirty&=~(1<<hr);
321 cur->isconst&=~(1<<hr);
322 return;
323 }
324 }
325 for(hr=0;hr<HOST_REGS;hr++) {
326 if(cur->regmap[hr]==r) {
327 cur->regmap[hr]=reg;
328 cur->dirty&=~(1<<hr);
329 cur->isconst&=~(1<<hr);
330 return;
331 }
332 }
333 }
334 }
335 }
336 printf("This shouldn't happen (alloc_reg)");exit(1);
337}
338
339void alloc_reg64(struct regstat *cur,int i,signed char reg)
340{
341 int preferred_reg = 5+reg%3;
342 int r,hr;
343
344 // allocate the lower 32 bits
345 alloc_reg(cur,i,reg);
346
347 // Don't allocate unused registers
348 if((cur->uu>>reg)&1) return;
349
350 // see if the upper half is already allocated
351 for(hr=0;hr<HOST_REGS;hr++)
352 {
353 if(cur->regmap[hr]==reg+64) return;
354 }
355
356 // Keep the same mapping if the register was already allocated in a loop
357 preferred_reg = loop_reg(i,reg,preferred_reg);
358
359 // Try to allocate the preferred register
360 if(cur->regmap[preferred_reg]==-1) {
361 cur->regmap[preferred_reg]=reg|64;
362 cur->dirty&=~(1<<preferred_reg);
363 cur->isconst&=~(1<<preferred_reg);
364 return;
365 }
366 r=cur->regmap[preferred_reg];
367 if(r<64&&((cur->u>>r)&1)) {
368 cur->regmap[preferred_reg]=reg|64;
369 cur->dirty&=~(1<<preferred_reg);
370 cur->isconst&=~(1<<preferred_reg);
371 return;
372 }
373 if(r>=64&&((cur->uu>>(r&63))&1)) {
374 cur->regmap[preferred_reg]=reg|64;
375 cur->dirty&=~(1<<preferred_reg);
376 cur->isconst&=~(1<<preferred_reg);
377 return;
378 }
379
380 // Try to allocate EBP, ESI or EDI
381 for(hr=5;hr<8;hr++) {
382 if(cur->regmap[hr]==-1) {
383 cur->regmap[hr]=reg|64;
384 cur->dirty&=~(1<<hr);
385 cur->isconst&=~(1<<hr);
386 return;
387 }
388 }
389
390 // Clear any unneeded registers
391 // We try to keep the mapping consistent, if possible, because it
392 // makes branches easier (especially loops). So we try to allocate
393 // first (see above) before removing old mappings. If this is not
394 // possible then go ahead and clear out the registers that are no
395 // longer needed.
396 for(hr=HOST_REGS-1;hr>=0;hr--)
397 {
398 r=cur->regmap[hr];
399 if(r>=0) {
400 if(r<64) {
401 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
402 }
403 else
404 {
405 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
406 }
407 }
408 }
409 // Try to allocate any available register, but prefer
410 // registers that have not been used recently.
411 if(i>0) {
412 for(hr=0;hr<HOST_REGS;hr++) {
413 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
414 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
415 cur->regmap[hr]=reg|64;
416 cur->dirty&=~(1<<hr);
417 cur->isconst&=~(1<<hr);
418 return;
419 }
420 }
421 }
422 }
423 // Try to allocate any available register
424 for(hr=0;hr<HOST_REGS;hr++) {
425 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
426 cur->regmap[hr]=reg|64;
427 cur->dirty&=~(1<<hr);
428 cur->isconst&=~(1<<hr);
429 return;
430 }
431 }
432
433 // Ok, now we have to evict someone
434 // Pick a register we hopefully won't need soon
435 u_char hsn[MAXREG+1];
436 memset(hsn,10,sizeof(hsn));
437 int j;
438 lsn(hsn,i,&preferred_reg);
439 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
440 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
441 if(i>0) {
442 // Don't evict the cycle count at entry points, otherwise the entry
443 // stub will have to write it.
444 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
445 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
446 for(j=10;j>=3;j--)
447 {
448 // Alloc preferred register if available
449 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
450 for(hr=0;hr<HOST_REGS;hr++) {
451 // Evict both parts of a 64-bit register
452 if((cur->regmap[hr]&63)==r) {
453 cur->regmap[hr]=-1;
454 cur->dirty&=~(1<<hr);
455 cur->isconst&=~(1<<hr);
456 }
457 }
458 cur->regmap[preferred_reg]=reg|64;
459 return;
460 }
461 for(r=1;r<=MAXREG;r++)
462 {
463 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
464 for(hr=0;hr<HOST_REGS;hr++) {
465 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
466 if(cur->regmap[hr]==r+64) {
467 cur->regmap[hr]=reg|64;
468 cur->dirty&=~(1<<hr);
469 cur->isconst&=~(1<<hr);
470 return;
471 }
472 }
473 }
474 for(hr=0;hr<HOST_REGS;hr++) {
475 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
476 if(cur->regmap[hr]==r) {
477 cur->regmap[hr]=reg|64;
478 cur->dirty&=~(1<<hr);
479 cur->isconst&=~(1<<hr);
480 return;
481 }
482 }
483 }
484 }
485 }
486 }
487 }
488 for(j=10;j>=0;j--)
489 {
490 for(r=1;r<=MAXREG;r++)
491 {
492 if(hsn[r]==j) {
493 for(hr=0;hr<HOST_REGS;hr++) {
494 if(cur->regmap[hr]==r+64) {
495 cur->regmap[hr]=reg|64;
496 cur->dirty&=~(1<<hr);
497 cur->isconst&=~(1<<hr);
498 return;
499 }
500 }
501 for(hr=0;hr<HOST_REGS;hr++) {
502 if(cur->regmap[hr]==r) {
503 cur->regmap[hr]=reg|64;
504 cur->dirty&=~(1<<hr);
505 cur->isconst&=~(1<<hr);
506 return;
507 }
508 }
509 }
510 }
511 }
512 printf("This shouldn't happen");exit(1);
513}
514
515// Allocate a temporary register. This is done without regard to
516// dirty status or whether the register we request is on the unneeded list
517// Note: This will only allocate one register, even if called multiple times
518void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
519{
520 int r,hr;
521 int preferred_reg = -1;
522
523 // see if it's already allocated
524 for(hr=0;hr<HOST_REGS;hr++)
525 {
526 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
527 }
528
529 // Try to allocate any available register, starting with EDI, ESI, EBP...
530 // We prefer EDI, ESI, EBP since the others are used for byte/halfword stores
531 for(hr=HOST_REGS-1;hr>=0;hr--) {
532 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
533 cur->regmap[hr]=reg;
534 cur->dirty&=~(1<<hr);
535 cur->isconst&=~(1<<hr);
536 return;
537 }
538 }
539
540 // Find an unneeded register
541 for(hr=HOST_REGS-1;hr>=0;hr--)
542 {
543 r=cur->regmap[hr];
544 if(r>=0) {
545 if(r<64) {
546 if((cur->u>>r)&1) {
547 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
548 cur->regmap[hr]=reg;
549 cur->dirty&=~(1<<hr);
550 cur->isconst&=~(1<<hr);
551 return;
552 }
553 }
554 }
555 else
556 {
557 if((cur->uu>>(r&63))&1) {
558 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
559 cur->regmap[hr]=reg;
560 cur->dirty&=~(1<<hr);
561 cur->isconst&=~(1<<hr);
562 return;
563 }
564 }
565 }
566 }
567 }
568
569 // Ok, now we have to evict someone
570 // Pick a register we hopefully won't need soon
571 // TODO: we might want to follow unconditional jumps here
572 // TODO: get rid of dupe code and make this into a function
573 u_char hsn[MAXREG+1];
574 memset(hsn,10,sizeof(hsn));
575 int j;
576 lsn(hsn,i,&preferred_reg);
577 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
578 if(i>0) {
579 // Don't evict the cycle count at entry points, otherwise the entry
580 // stub will have to write it.
581 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
582 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
583 for(j=10;j>=3;j--)
584 {
585 for(r=1;r<=MAXREG;r++)
586 {
587 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
588 for(hr=0;hr<HOST_REGS;hr++) {
589 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
590 if(cur->regmap[hr]==r+64) {
591 cur->regmap[hr]=reg;
592 cur->dirty&=~(1<<hr);
593 cur->isconst&=~(1<<hr);
594 return;
595 }
596 }
597 }
598 for(hr=0;hr<HOST_REGS;hr++) {
599 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
600 if(cur->regmap[hr]==r) {
601 cur->regmap[hr]=reg;
602 cur->dirty&=~(1<<hr);
603 cur->isconst&=~(1<<hr);
604 return;
605 }
606 }
607 }
608 }
609 }
610 }
611 }
612 for(j=10;j>=0;j--)
613 {
614 for(r=1;r<=MAXREG;r++)
615 {
616 if(hsn[r]==j) {
617 for(hr=0;hr<HOST_REGS;hr++) {
618 if(cur->regmap[hr]==r+64) {
619 cur->regmap[hr]=reg;
620 cur->dirty&=~(1<<hr);
621 cur->isconst&=~(1<<hr);
622 return;
623 }
624 }
625 for(hr=0;hr<HOST_REGS;hr++) {
626 if(cur->regmap[hr]==r) {
627 cur->regmap[hr]=reg;
628 cur->dirty&=~(1<<hr);
629 cur->isconst&=~(1<<hr);
630 return;
631 }
632 }
633 }
634 }
635 }
636 printf("This shouldn't happen");exit(1);
637}
638// Allocate a specific x86 register.
639void alloc_x86_reg(struct regstat *cur,int i,signed char reg,char hr)
640{
641 int n;
642
643 // see if it's already allocated (and dealloc it)
644 for(n=0;n<HOST_REGS;n++)
645 {
646 if(n!=ESP&&cur->regmap[n]==reg) {cur->regmap[n]=-1;}
647 }
648
649 cur->regmap[hr]=reg;
650 cur->dirty&=~(1<<hr);
651 cur->isconst&=~(1<<hr);
652}
653
654// Alloc cycle count into dedicated register
655alloc_cc(struct regstat *cur,int i)
656{
657 alloc_x86_reg(cur,i,CCREG,ESI);
658}
659
660/* Special alloc */
661
662void multdiv_alloc_x86(struct regstat *current,int i)
663{
664 // case 0x18: MULT
665 // case 0x19: MULTU
666 // case 0x1A: DIV
667 // case 0x1B: DIVU
668 // case 0x1C: DMULT
669 // case 0x1D: DMULTU
670 // case 0x1E: DDIV
671 // case 0x1F: DDIVU
672 clear_const(current,rs1[i]);
673 clear_const(current,rs2[i]);
674 if(rs1[i]&&rs2[i])
675 {
676 if((opcode2[i]&4)==0) // 32-bit
677 {
678 current->u&=~(1LL<<HIREG);
679 current->u&=~(1LL<<LOREG);
680 alloc_x86_reg(current,i,HIREG,EDX);
681 alloc_x86_reg(current,i,LOREG,EAX);
682 alloc_reg(current,i,rs1[i]);
683 alloc_reg(current,i,rs2[i]);
684 current->is32|=1LL<<HIREG;
685 current->is32|=1LL<<LOREG;
686 dirty_reg(current,HIREG);
687 dirty_reg(current,LOREG);
688 }
689 else // 64-bit
690 {
691 alloc_x86_reg(current,i,HIREG|64,EDX);
692 alloc_x86_reg(current,i,HIREG,EAX);
693 alloc_reg64(current,i,rs1[i]);
694 alloc_reg64(current,i,rs2[i]);
695 alloc_all(current,i);
696 current->is32&=~(1LL<<HIREG);
697 current->is32&=~(1LL<<LOREG);
698 dirty_reg(current,HIREG);
699 dirty_reg(current,LOREG);
700 }
701 }
702 else
703 {
704 // Multiply by zero is zero.
705 // MIPS does not have a divide by zero exception.
706 // The result is undefined, we return zero.
707 alloc_reg(current,i,HIREG);
708 alloc_reg(current,i,LOREG);
709 current->is32|=1LL<<HIREG;
710 current->is32|=1LL<<LOREG;
711 dirty_reg(current,HIREG);
712 dirty_reg(current,LOREG);
713 }
714}
715#define multdiv_alloc multdiv_alloc_x86
716
717/* Assembler */
718
719char regname[16][4] = {
720 "eax",
721 "ecx",
722 "edx",
723 "ebx",
724 "esp",
725 "ebp",
726 "esi",
727 "edi",
728 "r8",
729 "r9",
730 "r10",
731 "r11",
732 "r12",
733 "r13",
734 "r14",
735 "r15"};
736
737void output_byte(u_char byte)
738{
739 *(out++)=byte;
740}
741void output_modrm(u_char mod,u_char rm,u_char ext)
742{
743 assert(mod<4);
744 assert(rm<8);
745 assert(ext<8);
746 u_char byte=(mod<<6)|(ext<<3)|rm;
747 *(out++)=byte;
748}
749void output_sib(u_char scale,u_char index,u_char base)
750{
751 assert(scale<4);
752 assert(index<8);
753 assert(base<8);
754 u_char byte=(scale<<6)|(index<<3)|base;
755 *(out++)=byte;
756}
757void output_rex(u_char w,u_char r,u_char x,u_char b)
758{
759 assert(w<2);
760 assert(r<2);
761 assert(x<2);
762 assert(b<2);
763 u_char byte=0x40|(w<<3)|(r<<2)|(x<<1)|b;
764 *(out++)=byte;
765}
766void output_w32(u_int word)
767{
768 *((u_int *)out)=word;
769 out+=4;
770}
771
772void emit_mov(int rs,int rt)
773{
774 assem_debug("mov %%%s,%%%s\n",regname[rs],regname[rt]);
775 output_byte(0x89);
776 output_modrm(3,rt,rs);
777}
778
779void emit_mov64(int rs,int rt)
780{
781 assem_debug("mov %%%s,%%%s\n",regname[rs],regname[rt]);
782 output_rex(1,0,0,rt>>3);
783 output_byte(0x89);
784 output_modrm(3,rt,rs);
785}
786
787void emit_add(int rs1,int rs2,int rt)
788{
789 if(rs1==rt) {
790 assem_debug("add %%%s,%%%s\n",regname[rs2],regname[rs1]);
791 output_byte(0x01);
792 output_modrm(3,rs1,rs2);
793 }else if(rs2==rt) {
794 assem_debug("add %%%s,%%%s\n",regname[rs1],regname[rs2]);
795 output_byte(0x01);
796 output_modrm(3,rs2,rs1);
797 }else {
798 assem_debug("lea (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
799 output_byte(0x8D);
800 if(rs1!=EBP) {
801 output_modrm(0,4,rt);
802 output_sib(0,rs2,rs1);
803 }else if(rs2!=EBP) {
804 output_modrm(0,4,rt);
805 output_sib(0,rs1,rs2);
806 }else /* lea 0(,%ebp,2) */{
807 output_modrm(0,4,rt);
808 output_sib(1,EBP,5);
809 output_w32(0);
810 }
811 }
812}
813
814void emit_adds(int rs1,int rs2,int rt)
815{
816 emit_add(rs1,rs2,rt);
817}
818
819void emit_lea8(int rs1,int rt)
820{
821 assem_debug("lea 0(%%%s,8),%%%s\n",regname[rs1],regname[rt]);
822 output_byte(0x8D);
823 output_modrm(0,4,rt);
824 output_sib(3,rs1,5);
825 output_w32(0);
826}
827void emit_leairrx1(int imm,int rs1,int rs2,int rt)
828{
829 assem_debug("lea %x(%%%s,%%%s,1),%%%s\n",imm,regname[rs1],regname[rs2],regname[rt]);
830 output_byte(0x8D);
831 if(imm!=0||rs1==EBP) {
832 output_modrm(2,4,rt);
833 output_sib(0,rs2,rs1);
834 output_w32(imm);
835 }else{
836 output_modrm(0,4,rt);
837 output_sib(0,rs2,rs1);
838 }
839}
840void emit_leairrx4(int imm,int rs1,int rs2,int rt)
841{
842 assem_debug("lea %x(%%%s,%%%s,4),%%%s\n",imm,regname[rs1],regname[rs2],regname[rt]);
843 output_byte(0x8D);
844 if(imm!=0||rs1==EBP) {
845 output_modrm(2,4,rt);
846 output_sib(2,rs2,rs1);
847 output_w32(imm);
848 }else{
849 output_modrm(0,4,rt);
850 output_sib(2,rs2,rs1);
851 }
852}
853
854void emit_neg(int rs, int rt)
855{
856 if(rs!=rt) emit_mov(rs,rt);
857 assem_debug("neg %%%s\n",regname[rt]);
858 output_byte(0xF7);
859 output_modrm(3,rt,3);
860}
861
862void emit_negs(int rs, int rt)
863{
864 emit_neg(rs,rt);
865}
866
867void emit_sub(int rs1,int rs2,int rt)
868{
869 if(rs1==rt) {
870 assem_debug("sub %%%s,%%%s\n",regname[rs2],regname[rs1]);
871 output_byte(0x29);
872 output_modrm(3,rs1,rs2);
873 } else if(rs2==rt) {
874 emit_neg(rs2,rs2);
875 emit_add(rs2,rs1,rs2);
876 } else {
877 emit_mov(rs1,rt);
878 emit_sub(rt,rs2,rt);
879 }
880}
881
882void emit_subs(int rs1,int rs2,int rt)
883{
884 emit_sub(rs1,rs2,rt);
885}
886
887void emit_zeroreg(int rt)
888{
889 output_byte(0x31);
890 output_modrm(3,rt,rt);
891 assem_debug("xor %%%s,%%%s\n",regname[rt],regname[rt]);
892}
893
894void emit_loadreg(int r, int hr)
895{
896 if((r&63)==0)
897 emit_zeroreg(hr);
898 else {
899 int addr=((int)reg)+((r&63)<<3)+((r&64)>>4);
900 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
901 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
902 if(r==CCREG) addr=(int)&cycle_count;
903 if(r==CSREG) addr=(int)&Status;
904 if(r==FSREG) addr=(int)&FCR31;
905 assem_debug("mov %x+%d,%%%s\n",addr,r,regname[hr]);
906 output_byte(0x8B);
907 output_modrm(0,5,hr);
908 output_w32(addr-(int)out-4); // Note: rip-relative in 64-bit mode
909 }
910}
911void emit_storereg(int r, int hr)
912{
913 int addr=((int)reg)+((r&63)<<3)+((r&64)>>4);
914 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
915 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
916 if(r==CCREG) addr=(int)&cycle_count;
917 if(r==FSREG) addr=(int)&FCR31;
918 assem_debug("mov %%%s,%x+%d\n",regname[hr],addr,r);
919 output_byte(0x89);
920 output_modrm(0,5,hr);
921 output_w32(addr-(int)out-4); // Note: rip-relative in 64-bit mode
922}
923
924void emit_test(int rs, int rt)
925{
926 assem_debug("test %%%s,%%%s\n",regname[rs],regname[rt]);
927 output_byte(0x85);
928 output_modrm(3,rs,rt);
929}
930
931void emit_testimm(int rs,int imm)
932{
933 assem_debug("test $0x%x,%%%s\n",imm,regname[rs]);
934 if(imm<128&&imm>=-128&&rs<4) {
935 output_byte(0xF6);
936 output_modrm(3,rs,0);
937 output_byte(imm);
938 }
939 else
940 {
941 output_byte(0xF7);
942 output_modrm(3,rs,0);
943 output_w32(imm);
944 }
945}
946
947void emit_not(int rs,int rt)
948{
949 if(rs!=rt) emit_mov(rs,rt);
950 assem_debug("not %%%s\n",regname[rt]);
951 output_byte(0xF7);
952 output_modrm(3,rt,2);
953}
954
955void emit_and(u_int rs1,u_int rs2,u_int rt)
956{
957 assert(rs1<8);
958 assert(rs2<8);
959 assert(rt<8);
960 if(rs1==rt) {
961 assem_debug("and %%%s,%%%s\n",regname[rs2],regname[rt]);
962 output_byte(0x21);
963 output_modrm(3,rs1,rs2);
964 }
965 else
966 if(rs2==rt) {
967 assem_debug("and %%%s,%%%s\n",regname[rs1],regname[rt]);
968 output_byte(0x21);
969 output_modrm(3,rs2,rs1);
970 }
971 else {
972 emit_mov(rs1,rt);
973 emit_and(rt,rs2,rt);
974 }
975}
976
977void emit_or(u_int rs1,u_int rs2,u_int rt)
978{
979 assert(rs1<8);
980 assert(rs2<8);
981 assert(rt<8);
982 if(rs1==rt) {
983 assem_debug("or %%%s,%%%s\n",regname[rs2],regname[rt]);
984 output_byte(0x09);
985 output_modrm(3,rs1,rs2);
986 }
987 else
988 if(rs2==rt) {
989 assem_debug("or %%%s,%%%s\n",regname[rs1],regname[rt]);
990 output_byte(0x09);
991 output_modrm(3,rs2,rs1);
992 }
993 else {
994 emit_mov(rs1,rt);
995 emit_or(rt,rs2,rt);
996 }
997}
998void emit_or_and_set_flags(int rs1,int rs2,int rt)
999{
1000 emit_or(rs1,rs2,rt);
1001}
1002
1003void emit_xor(u_int rs1,u_int rs2,u_int rt)
1004{
1005 assert(rs1<8);
1006 assert(rs2<8);
1007 assert(rt<8);
1008 if(rs1==rt) {
1009 assem_debug("xor %%%s,%%%s\n",regname[rs2],regname[rt]);
1010 output_byte(0x31);
1011 output_modrm(3,rs1,rs2);
1012 }
1013 else
1014 if(rs2==rt) {
1015 assem_debug("xor %%%s,%%%s\n",regname[rs1],regname[rt]);
1016 output_byte(0x31);
1017 output_modrm(3,rs2,rs1);
1018 }
1019 else {
1020 emit_mov(rs1,rt);
1021 emit_xor(rt,rs2,rt);
1022 }
1023}
1024
1025void emit_movimm(int imm,u_int rt)
1026{
1027 assem_debug("mov $%d,%%%s\n",imm,regname[rt]);
1028 assert(rt<16);
1029 if(rt>=8) output_rex(0,0,0,1);
1030 output_byte(0xB8+(rt&7));
1031 output_w32(imm);
1032}
1033
1034void emit_addimm(int rs,int imm,int rt)
1035{
1036 if(rs==rt) {
1037 if(imm!=0) {
1038 assem_debug("add $%d,%%%s\n",imm,regname[rt]);
1039 if(imm<128&&imm>=-128) {
1040 output_byte(0x83);
1041 output_modrm(3,rt,0);
1042 output_byte(imm);
1043 }
1044 else
1045 {
1046 output_byte(0x81);
1047 output_modrm(3,rt,0);
1048 output_w32(imm);
1049 }
1050 }
1051 }
1052 else {
1053 if(imm!=0) {
1054 assem_debug("lea %d(%%%s),%%%s\n",imm,regname[rs],regname[rt]);
1055 output_byte(0x8D);
1056 if(imm<128&&imm>=-128) {
1057 output_modrm(1,rs,rt);
1058 output_byte(imm);
1059 }else{
1060 output_modrm(2,rs,rt);
1061 output_w32(imm);
1062 }
1063 }else{
1064 emit_mov(rs,rt);
1065 }
1066 }
1067}
1068
1069void emit_addimm64(int rs,int imm,int rt)
1070{
1071 if(rs==rt) {
1072 if(imm!=0) {
1073 assem_debug("add $%d,%%%s\n",imm,regname[rt]);
1074 if(imm<128&&imm>=-128) {
1075 output_rex(1,0,0,rt>>3);
1076 output_byte(0x83);
1077 output_modrm(3,rt&7,0);
1078 output_byte(imm);
1079 }
1080 else
1081 {
1082 output_rex(1,0,0,rt>>3);
1083 output_byte(0x81);
1084 output_modrm(3,rt&7,0);
1085 output_w32(imm);
1086 }
1087 }
1088 }
1089 else {
1090 if(imm!=0) {
1091 assem_debug("lea %d(%%%s),%%%s\n",imm,regname[rs],regname[rt]);
1092 output_rex(1,rt>>3,0,rs>>3);
1093 output_byte(0x8D);
1094 if(imm<128&&imm>=-128) {
1095 output_modrm(1,rs&7,rt&7);
1096 output_byte(imm);
1097 }else{
1098 output_modrm(2,rs&7,rt&7);
1099 output_w32(imm);
1100 }
1101 }else{
1102 emit_mov(rs,rt);
1103 }
1104 }
1105}
1106
1107void emit_addimm_and_set_flags(int imm,int rt)
1108{
1109 assem_debug("add $%d,%%%s\n",imm,regname[rt]);
1110 if(imm<128&&imm>=-128) {
1111 output_byte(0x83);
1112 output_modrm(3,rt,0);
1113 output_byte(imm);
1114 }
1115 else
1116 {
1117 output_byte(0x81);
1118 output_modrm(3,rt,0);
1119 output_w32(imm);
1120 }
1121}
1122void emit_addimm_no_flags(int imm,int rt)
1123{
1124 if(imm!=0) {
1125 assem_debug("lea %d(%%%s),%%%s\n",imm,regname[rt],regname[rt]);
1126 output_byte(0x8D);
1127 if(imm<128&&imm>=-128) {
1128 output_modrm(1,rt,rt);
1129 output_byte(imm);
1130 }else{
1131 output_modrm(2,rt,rt);
1132 output_w32(imm);
1133 }
1134 }
1135}
1136
1137void emit_adcimm(int imm,u_int rt)
1138{
1139 assem_debug("adc $%d,%%%s\n",imm,regname[rt]);
1140 assert(rt<8);
1141 if(imm<128&&imm>=-128) {
1142 output_byte(0x83);
1143 output_modrm(3,rt,2);
1144 output_byte(imm);
1145 }
1146 else
1147 {
1148 output_byte(0x81);
1149 output_modrm(3,rt,2);
1150 output_w32(imm);
1151 }
1152}
1153void emit_sbbimm(int imm,u_int rt)
1154{
1155 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1156 assert(rt<8);
1157 if(imm<128&&imm>=-128) {
1158 output_byte(0x83);
1159 output_modrm(3,rt,3);
1160 output_byte(imm);
1161 }
1162 else
1163 {
1164 output_byte(0x81);
1165 output_modrm(3,rt,3);
1166 output_w32(imm);
1167 }
1168}
1169
1170void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1171{
1172 if(rsh==rth&&rsl==rtl) {
1173 assem_debug("add $%d,%%%s\n",imm,regname[rtl]);
1174 if(imm<128&&imm>=-128) {
1175 output_byte(0x83);
1176 output_modrm(3,rtl,0);
1177 output_byte(imm);
1178 }
1179 else
1180 {
1181 output_byte(0x81);
1182 output_modrm(3,rtl,0);
1183 output_w32(imm);
1184 }
1185 assem_debug("adc $%d,%%%s\n",imm>>31,regname[rth]);
1186 output_byte(0x83);
1187 output_modrm(3,rth,2);
1188 output_byte(imm>>31);
1189 }
1190 else {
1191 emit_mov(rsh,rth);
1192 emit_mov(rsl,rtl);
1193 emit_addimm64_32(rth,rtl,imm,rth,rtl);
1194 }
1195}
1196
1197void emit_sbb(int rs1,int rs2)
1198{
1199 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1200 output_byte(0x19);
1201 output_modrm(3,rs1,rs2);
1202}
1203
1204void emit_andimm(int rs,int imm,int rt)
1205{
1206 if(rs==rt) {
1207 assem_debug("and $%d,%%%s\n",imm,regname[rt]);
1208 if(imm<128&&imm>=-128) {
1209 output_byte(0x83);
1210 output_modrm(3,rt,4);
1211 output_byte(imm);
1212 }
1213 else
1214 {
1215 output_byte(0x81);
1216 output_modrm(3,rt,4);
1217 output_w32(imm);
1218 }
1219 }
1220 else {
1221 emit_mov(rs,rt);
1222 emit_andimm(rt,imm,rt);
1223 }
1224}
1225
1226void emit_orimm(int rs,int imm,int rt)
1227{
1228 if(rs==rt) {
1229 assem_debug("or $%d,%%%s\n",imm,regname[rt]);
1230 if(imm<128&&imm>=-128) {
1231 output_byte(0x83);
1232 output_modrm(3,rt,1);
1233 output_byte(imm);
1234 }
1235 else
1236 {
1237 output_byte(0x81);
1238 output_modrm(3,rt,1);
1239 output_w32(imm);
1240 }
1241 }
1242 else {
1243 emit_mov(rs,rt);
1244 emit_orimm(rt,imm,rt);
1245 }
1246}
1247
1248void emit_xorimm(int rs,int imm,int rt)
1249{
1250 if(rs==rt) {
1251 assem_debug("xor $%d,%%%s\n",imm,regname[rt]);
1252 if(imm<128&&imm>=-128) {
1253 output_byte(0x83);
1254 output_modrm(3,rt,6);
1255 output_byte(imm);
1256 }
1257 else
1258 {
1259 output_byte(0x81);
1260 output_modrm(3,rt,6);
1261 output_w32(imm);
1262 }
1263 }
1264 else {
1265 emit_mov(rs,rt);
1266 emit_xorimm(rt,imm,rt);
1267 }
1268}
1269
1270void emit_shlimm(int rs,u_int imm,int rt)
1271{
1272 if(rs==rt) {
1273 assem_debug("shl %%%s,%d\n",regname[rt],imm);
1274 assert(imm>0);
1275 if(imm==1) output_byte(0xD1);
1276 else output_byte(0xC1);
1277 output_modrm(3,rt,4);
1278 if(imm>1) output_byte(imm);
1279 }
1280 else {
1281 emit_mov(rs,rt);
1282 emit_shlimm(rt,imm,rt);
1283 }
1284}
1285
1286void emit_shrimm(int rs,u_int imm,int rt)
1287{
1288 if(rs==rt) {
1289 assem_debug("shr %%%s,%d\n",regname[rt],imm);
1290 assert(imm>0);
1291 if(imm==1) output_byte(0xD1);
1292 else output_byte(0xC1);
1293 output_modrm(3,rt,5);
1294 if(imm>1) output_byte(imm);
1295 }
1296 else {
1297 emit_mov(rs,rt);
1298 emit_shrimm(rt,imm,rt);
1299 }
1300}
1301
1302void emit_shrimm64(int rs,u_int imm,int rt)
1303{
1304 assert(rs==rt);
1305 if(rs==rt) {
1306 assem_debug("shr %%%s,%d\n",regname[rt],imm);
1307 assert(imm>0);
1308 output_rex(1,0,0,rt>>3);
1309 if(imm==1) output_byte(0xD1);
1310 else output_byte(0xC1);
1311 output_modrm(3,rt,5);
1312 if(imm>1) output_byte(imm);
1313 }
1314 else {
1315 emit_mov(rs,rt);
1316 emit_shrimm(rt,imm,rt);
1317 }
1318}
1319
1320void emit_sarimm(int rs,u_int imm,int rt)
1321{
1322 if(rs==rt) {
1323 assem_debug("sar %%%s,%d\n",regname[rt],imm);
1324 assert(imm>0);
1325 if(imm==1) output_byte(0xD1);
1326 else output_byte(0xC1);
1327 output_modrm(3,rt,7);
1328 if(imm>1) output_byte(imm);
1329 }
1330 else {
1331 emit_mov(rs,rt);
1332 emit_sarimm(rt,imm,rt);
1333 }
1334}
1335
1336void emit_rorimm(int rs,u_int imm,int rt)
1337{
1338 if(rs==rt) {
1339 assem_debug("ror %%%s,%d\n",regname[rt],imm);
1340 assert(imm>0);
1341 if(imm==1) output_byte(0xD1);
1342 else output_byte(0xC1);
1343 output_modrm(3,rt,1);
1344 if(imm>1) output_byte(imm);
1345 }
1346 else {
1347 emit_mov(rs,rt);
1348 emit_sarimm(rt,imm,rt);
1349 }
1350}
1351
1352void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1353{
1354 if(rs==rt) {
1355 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1356 assert(imm>0);
1357 output_byte(0x0F);
1358 output_byte(0xA4);
1359 output_modrm(3,rt,rs2);
1360 output_byte(imm);
1361 }
1362 else {
1363 emit_mov(rs,rt);
1364 emit_shldimm(rt,rs2,imm,rt);
1365 }
1366}
1367
1368void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1369{
1370 if(rs==rt) {
1371 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1372 assert(imm>0);
1373 output_byte(0x0F);
1374 output_byte(0xAC);
1375 output_modrm(3,rt,rs2);
1376 output_byte(imm);
1377 }
1378 else {
1379 emit_mov(rs,rt);
1380 emit_shrdimm(rt,rs2,imm,rt);
1381 }
1382}
1383
1384void emit_shlcl(int r)
1385{
1386 assem_debug("shl %%%s,%%cl\n",regname[r]);
1387 output_byte(0xD3);
1388 output_modrm(3,r,4);
1389}
1390void emit_shrcl(int r)
1391{
1392 assem_debug("shr %%%s,%%cl\n",regname[r]);
1393 output_byte(0xD3);
1394 output_modrm(3,r,5);
1395}
1396void emit_sarcl(int r)
1397{
1398 assem_debug("sar %%%s,%%cl\n",regname[r]);
1399 output_byte(0xD3);
1400 output_modrm(3,r,7);
1401}
1402
1403void emit_shldcl(int r1,int r2)
1404{
1405 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1406 output_byte(0x0F);
1407 output_byte(0xA5);
1408 output_modrm(3,r1,r2);
1409}
1410void emit_shrdcl(int r1,int r2)
1411{
1412 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1413 output_byte(0x0F);
1414 output_byte(0xAD);
1415 output_modrm(3,r1,r2);
1416}
1417
1418void emit_cmpimm(int rs,int imm)
1419{
1420 assem_debug("cmp $%d,%%%s\n",imm,regname[rs]);
1421 if(imm<128&&imm>=-128) {
1422 output_byte(0x83);
1423 output_modrm(3,rs,7);
1424 output_byte(imm);
1425 }
1426 else
1427 {
1428 output_byte(0x81);
1429 output_modrm(3,rs,7);
1430 output_w32(imm);
1431 }
1432}
1433
1434void emit_cmovne(u_int *addr,int rt)
1435{
1436 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1437 if(addr==&const_zero) assem_debug(" [zero]\n");
1438 else if(addr==&const_one) assem_debug(" [one]\n");
1439 else assem_debug("\n");
1440 output_byte(0x0F);
1441 output_byte(0x45);
1442 output_modrm(0,5,rt);
1443 output_w32((int)addr-(int)out-4); // Note: rip-relative in 64-bit mode
1444}
1445void emit_cmovl(u_int *addr,int rt)
1446{
1447 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1448 if(addr==&const_zero) assem_debug(" [zero]\n");
1449 else if(addr==&const_one) assem_debug(" [one]\n");
1450 else assem_debug("\n");
1451 output_byte(0x0F);
1452 output_byte(0x4C);
1453 output_modrm(0,5,rt);
1454 output_w32((int)addr-(int)out-4); // Note: rip-relative in 64-bit mode
1455}
1456void emit_cmovs(u_int *addr,int rt)
1457{
1458 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1459 if(addr==&const_zero) assem_debug(" [zero]\n");
1460 else if(addr==&const_one) assem_debug(" [one]\n");
1461 else assem_debug("\n");
1462 output_byte(0x0F);
1463 output_byte(0x48);
1464 output_modrm(0,5,rt);
1465 output_w32((int)addr-(int)out-4); // Note: rip-relative in 64-bit mode
1466}
1467void emit_cmovne_reg(int rs,int rt)
1468{
1469 assem_debug("cmovne %%%s,%%%s\n",regname[rs],regname[rt]);
1470 output_byte(0x0F);
1471 output_byte(0x45);
1472 output_modrm(3,rs,rt);
1473}
1474void emit_cmovl_reg(int rs,int rt)
1475{
1476 assem_debug("cmovl %%%s,%%%s\n",regname[rs],regname[rt]);
1477 output_byte(0x0F);
1478 output_byte(0x4C);
1479 output_modrm(3,rs,rt);
1480}
1481void emit_cmovs_reg(int rs,int rt)
1482{
1483 assem_debug("cmovs %%%s,%%%s\n",regname[rs],regname[rt]);
1484 output_byte(0x0F);
1485 output_byte(0x48);
1486 output_modrm(3,rs,rt);
1487}
1488void emit_cmovnc_reg(int rs,int rt)
1489{
1490 assem_debug("cmovae %%%s,%%%s\n",regname[rs],regname[rt]);
1491 output_byte(0x0F);
1492 output_byte(0x43);
1493 output_modrm(3,rs,rt);
1494}
1495void emit_cmova_reg(int rs,int rt)
1496{
1497 assem_debug("cmova %%%s,%%%s\n",regname[rs],regname[rt]);
1498 output_byte(0x0F);
1499 output_byte(0x47);
1500 output_modrm(3,rs,rt);
1501}
1502void emit_cmovp_reg(int rs,int rt)
1503{
1504 assem_debug("cmovp %%%s,%%%s\n",regname[rs],regname[rt]);
1505 output_byte(0x0F);
1506 output_byte(0x4A);
1507 output_modrm(3,rs,rt);
1508}
1509void emit_cmovnp_reg(int rs,int rt)
1510{
1511 assem_debug("cmovnp %%%s,%%%s\n",regname[rs],regname[rt]);
1512 output_byte(0x0F);
1513 output_byte(0x4B);
1514 output_modrm(3,rs,rt);
1515}
1516void emit_setl(int rt)
1517{
1518 assem_debug("setl %%%s\n",regname[rt]);
1519 output_byte(0x0F);
1520 output_byte(0x9C);
1521 output_modrm(3,rt,2);
1522}
1523void emit_movzbl_reg(int rs, int rt)
1524{
1525 assem_debug("movzbl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1526 output_byte(0x0F);
1527 output_byte(0xB6);
1528 output_modrm(3,rs,rt);
1529}
1530
1531void emit_slti32(int rs,int imm,int rt)
1532{
1533 if(rs!=rt) emit_zeroreg(rt);
1534 emit_cmpimm(rs,imm);
1535 if(rt<4) {
1536 emit_setl(rt);
1537 if(rs==rt) emit_movzbl_reg(rt,rt);
1538 }
1539 else
1540 {
1541 if(rs==rt) emit_movimm(0,rt);
1542 emit_cmovl(&const_one,rt);
1543 }
1544}
1545void emit_sltiu32(int rs,int imm,int rt)
1546{
1547 if(rs!=rt) emit_zeroreg(rt);
1548 emit_cmpimm(rs,imm);
1549 if(rs==rt) emit_movimm(0,rt);
1550 emit_adcimm(0,rt);
1551}
1552void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1553{
1554 assert(rsh!=rt);
1555 emit_slti32(rsl,imm,rt);
1556 if(imm>=0)
1557 {
1558 emit_test(rsh,rsh);
1559 emit_cmovne(&const_zero,rt);
1560 emit_cmovs(&const_one,rt);
1561 }
1562 else
1563 {
1564 emit_cmpimm(rsh,-1);
1565 emit_cmovne(&const_zero,rt);
1566 emit_cmovl(&const_one,rt);
1567 }
1568}
1569void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1570{
1571 assert(rsh!=rt);
1572 emit_sltiu32(rsl,imm,rt);
1573 if(imm>=0)
1574 {
1575 emit_test(rsh,rsh);
1576 emit_cmovne(&const_zero,rt);
1577 }
1578 else
1579 {
1580 emit_cmpimm(rsh,-1);
1581 emit_cmovne(&const_one,rt);
1582 }
1583}
1584
1585void emit_cmp(int rs,int rt)
1586{
1587 assem_debug("cmp %%%s,%%%s\n",regname[rt],regname[rs]);
1588 output_byte(0x39);
1589 output_modrm(3,rs,rt);
1590}
1591void emit_set_gz32(int rs, int rt)
1592{
1593 //assem_debug("set_gz32\n");
1594 emit_cmpimm(rs,1);
1595 emit_movimm(1,rt);
1596 emit_cmovl(&const_zero,rt);
1597}
1598void emit_set_nz32(int rs, int rt)
1599{
1600 //assem_debug("set_nz32\n");
1601 emit_cmpimm(rs,1);
1602 emit_movimm(1,rt);
1603 emit_sbbimm(0,rt);
1604}
1605void emit_set_gz64_32(int rsh, int rsl, int rt)
1606{
1607 //assem_debug("set_gz64\n");
1608 emit_set_gz32(rsl,rt);
1609 emit_test(rsh,rsh);
1610 emit_cmovne(&const_one,rt);
1611 emit_cmovs(&const_zero,rt);
1612}
1613void emit_set_nz64_32(int rsh, int rsl, int rt)
1614{
1615 //assem_debug("set_nz64\n");
1616 emit_or_and_set_flags(rsh,rsl,rt);
1617 emit_cmovne(&const_one,rt);
1618}
1619void emit_set_if_less32(int rs1, int rs2, int rt)
1620{
1621 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1622 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1623 emit_cmp(rs1,rs2);
1624 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1625 emit_cmovl(&const_one,rt);
1626}
1627void emit_set_if_carry32(int rs1, int rs2, int rt)
1628{
1629 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1630 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1631 emit_cmp(rs1,rs2);
1632 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1633 emit_adcimm(0,rt);
1634}
1635void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1636{
1637 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1638 assert(u1!=rt);
1639 assert(u2!=rt);
1640 emit_cmp(l1,l2);
1641 emit_mov(u1,rt);
1642 emit_sbb(rt,u2);
1643 emit_movimm(0,rt);
1644 emit_cmovl(&const_one,rt);
1645}
1646void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1647{
1648 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1649 assert(u1!=rt);
1650 assert(u2!=rt);
1651 emit_cmp(l1,l2);
1652 emit_mov(u1,rt);
1653 emit_sbb(rt,u2);
1654 emit_movimm(0,rt);
1655 emit_adcimm(0,rt);
1656}
1657
1658void emit_call(int a)
1659{
1660 assem_debug("call %x (%x+%x)\n",a,(int)out+5,a-(int)out-5);
1661 output_byte(0xe8);
1662 output_w32(a-(int)out-4);
1663}
1664void emit_jmp(int a)
1665{
1666 assem_debug("jmp %x (%x+%x)\n",a,(int)out+5,a-(int)out-5);
1667 output_byte(0xe9);
1668 output_w32(a-(int)out-4);
1669}
1670void emit_jne(int a)
1671{
1672 assem_debug("jne %x\n",a);
1673 output_byte(0x0f);
1674 output_byte(0x85);
1675 output_w32(a-(int)out-4);
1676}
1677void emit_jeq(int a)
1678{
1679 assem_debug("jeq %x\n",a);
1680 output_byte(0x0f);
1681 output_byte(0x84);
1682 output_w32(a-(int)out-4);
1683}
1684void emit_js(int a)
1685{
1686 assem_debug("js %x\n",a);
1687 output_byte(0x0f);
1688 output_byte(0x88);
1689 output_w32(a-(int)out-4);
1690}
1691void emit_jns(int a)
1692{
1693 assem_debug("jns %x\n",a);
1694 output_byte(0x0f);
1695 output_byte(0x89);
1696 output_w32(a-(int)out-4);
1697}
1698void emit_jl(int a)
1699{
1700 assem_debug("jl %x\n",a);
1701 output_byte(0x0f);
1702 output_byte(0x8c);
1703 output_w32(a-(int)out-4);
1704}
1705void emit_jge(int a)
1706{
1707 assem_debug("jge %x\n",a);
1708 output_byte(0x0f);
1709 output_byte(0x8d);
1710 output_w32(a-(int)out-4);
1711}
1712void emit_jno(int a)
1713{
1714 assem_debug("jno %x\n",a);
1715 output_byte(0x0f);
1716 output_byte(0x81);
1717 output_w32(a-(int)out-4);
1718}
1719void emit_jc(int a)
1720{
1721 assem_debug("jc %x\n",a);
1722 output_byte(0x0f);
1723 output_byte(0x82);
1724 output_w32(a-(int)out-4);
1725}
1726
1727void emit_pushimm(int imm)
1728{
1729 assem_debug("push $%x\n",imm);
1730 output_byte(0x68);
1731 output_w32(imm);
1732}
1733//void emit_pusha()
1734//{
1735// assem_debug("pusha\n");
1736// output_byte(0x60);
1737//}
1738//void emit_popa()
1739//{
1740// assem_debug("popa\n");
1741// output_byte(0x61);
1742//}
1743void emit_pushreg(u_int r)
1744{
1745 assem_debug("push %%%s\n",regname[r]);
1746 assert(r<8);
1747 output_byte(0x50+r);
1748}
1749void emit_popreg(u_int r)
1750{
1751 assem_debug("pop %%%s\n",regname[r]);
1752 assert(r<8);
1753 output_byte(0x58+r);
1754}
1755void emit_callreg(u_int r)
1756{
1757 assem_debug("call *%%%s\n",regname[r]);
1758 assert(r<8);
1759 output_byte(0xFF);
1760 output_modrm(3,r,2);
1761}
1762void emit_jmpreg(u_int r)
1763{
1764 assem_debug("jmp *%%%s\n",regname[r]);
1765 assert(r<8);
1766 output_byte(0xFF);
1767 output_modrm(3,r,4);
1768}
1769void emit_jmpmem_indexed(u_int addr,u_int r)
1770{
1771 assem_debug("jmp *%x(%%%s)\n",addr,regname[r]);
1772 assert(r<8);
1773 output_byte(0xFF);
1774 output_modrm(2,r,4);
1775 output_w32(addr);
1776}
1777
1778void emit_readword(int addr, int rt)
1779{
1780 assem_debug("mov %x,%%%s\n",addr,regname[rt]);
1781 output_byte(0x8B);
1782 output_modrm(0,5,rt);
1783 output_w32(addr-(int)out-4); // Note: rip-relative in 64-bit mode
1784}
1785void emit_readword_indexed(int addr, int rs, int rt)
1786{
1787 assem_debug("mov %x+%%%s,%%%s\n",addr,regname[rs],regname[rt]);
1788 output_byte(0x8B);
1789 if(addr<128&&addr>=-128) {
1790 output_modrm(1,rs,rt);
1791 if(rs==ESP) output_sib(0,4,4);
1792 output_byte(addr);
1793 }
1794 else
1795 {
1796 output_modrm(2,rs,rt);
1797 if(rs==ESP) output_sib(0,4,4);
1798 output_w32(addr);
1799 }
1800}
1801void emit_readword_tlb(int addr, int map, int rt)
1802{
1803 if(map<0) emit_readword(addr+(int)rdram-0x80000000, rt);
1804 else
1805 {
1806 assem_debug("addr32 mov %x(,%%%s,4),%%%s\n",addr,regname[map],regname[rt]);
1807 output_byte(0x67);
1808 output_byte(0x8B);
1809 output_modrm(0,4,rt);
1810 output_sib(2,map,5);
1811 output_w32(addr);
1812 }
1813}
1814void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1815{
1816 if(map<0) emit_readword_indexed(addr+(int)rdram-0x80000000, rs, rt);
1817 else {
1818 assem_debug("addr32 mov %x(%%%s,%%%s,4),%%%s\n",addr,regname[rs],regname[map],regname[rt]);
1819 assert(rs!=ESP);
1820 output_byte(0x67);
1821 output_byte(0x8B);
1822 if(addr==0&&rs!=EBP) {
1823 output_modrm(0,4,rt);
1824 output_sib(2,map,rs);
1825 }
1826 else if(addr<128&&addr>=-128) {
1827 output_modrm(1,4,rt);
1828 output_sib(2,map,rs);
1829 output_byte(addr);
1830 }
1831 else
1832 {
1833 output_modrm(2,4,rt);
1834 output_sib(2,map,rs);
1835 output_w32(addr);
1836 }
1837 }
1838}
1839void emit_movmem_indexedx4(int addr, int rs, int rt)
1840{
1841 assem_debug("mov (%x,%%%s,4),%%%s\n",addr,regname[rs],regname[rt]);
1842 output_byte(0x8B);
1843 output_modrm(0,4,rt);
1844 output_sib(2,rs,5);
1845 output_w32(addr);
1846}
1847void emit_movmem_indexedx4_addr32(int addr, int rs, int rt)
1848{
1849 assem_debug("addr32 mov (%x,%%%s,4),%%%s\n",addr,regname[rs],regname[rt]);
1850 output_byte(0x67);
1851 output_byte(0x8B);
1852 output_modrm(0,4,rt);
1853 output_sib(2,rs,5);
1854 output_w32(addr);
1855}
1856void emit_movmem_indexedx8(int addr, int rs, int rt)
1857{
1858 assem_debug("mov (%x,%%%s,8),%%%s\n",addr,regname[rs],regname[rt]);
1859 output_byte(0x8B);
1860 output_modrm(0,4,rt);
1861 output_sib(3,rs,5);
1862 output_w32(addr);
1863}
1864void emit_readdword_tlb(int addr, int map, int rh, int rl)
1865{
1866 if(map<0) {
1867 if(rh>=0) emit_readword(addr+(int)rdram-0x80000000, rh);
1868 emit_readword(addr+(int)rdram-0x7FFFFFFC, rl);
1869 }
1870 else {
1871 if(rh>=0) emit_movmem_indexedx4_addr32(addr, map, rh);
1872 emit_movmem_indexedx4_addr32(addr+4, map, rl);
1873 }
1874}
1875void emit_readdword_indexed(int addr, int rs, int rt)
1876{
1877 assem_debug("mov %x+%%%s,%%%s\n",addr,regname[rs],regname[rt]);
1878 output_rex(1,rt>>3,0,rs>>3);
1879 output_byte(0x8B);
1880 if(addr<128&&addr>=-128) {
1881 output_modrm(1,rs&7,rt&7);
1882 if(rs==ESP) output_sib(0,4,4);
1883 output_byte(addr);
1884 }
1885 else
1886 {
1887 output_modrm(2,rs&7,rt&7);
1888 if(rs==ESP) output_sib(0,4,4);
1889 output_w32(addr);
1890 }
1891}
1892void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1893{
1894 assert(rh!=rs);
1895 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1896 emit_readword_indexed_tlb(addr+4, rs, map, rl);
1897}
1898void emit_movsbl(int addr, int rt)
1899{
1900 assem_debug("movsbl %x,%%%s\n",addr,regname[rt]);
1901 output_byte(0x0F);
1902 output_byte(0xBE);
1903 output_modrm(0,5,rt);
1904 output_w32(addr-(int)out-4); // Note: rip-relative in 64-bit mode
1905}
1906void emit_movsbl_indexed(int addr, int rs, int rt)
1907{
1908 assem_debug("movsbl %x+%%%s,%%%s\n",addr,regname[rs],regname[rt]);
1909 output_byte(0x0F);
1910 output_byte(0xBE);
1911 output_modrm(2,rs,rt);
1912 output_w32(addr);
1913}
1914void emit_movsbl_tlb(int addr, int map, int rt)
1915{
1916 if(map<0) emit_movsbl(addr+(int)rdram-0x80000000, rt);
1917 else
1918 {
1919 assem_debug("addr32 movsbl %x(,%%%s,4),%%%s\n",addr,regname[map],regname[rt]);
1920 output_byte(0x67);
1921 output_byte(0x0F);
1922 output_byte(0xBE);
1923 output_modrm(0,4,rt);
1924 output_sib(2,map,5);
1925 output_w32(addr);
1926 }
1927}
1928void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1929{
1930 if(map<0) emit_movsbl_indexed(addr+(int)rdram-0x80000000, rs, rt);
1931 else {
1932 assem_debug("addr32 movsbl %x(%%%s,%%%s,4),%%%s\n",addr,regname[rs],regname[map],regname[rt]);
1933 assert(rs!=ESP);
1934 output_byte(0x67);
1935 output_byte(0x0F);
1936 output_byte(0xBE);
1937 if(addr==0&&rs!=EBP) {
1938 output_modrm(0,4,rt);
1939 output_sib(2,map,rs);
1940 }
1941 else if(addr<128&&addr>=-128) {
1942 output_modrm(1,4,rt);
1943 output_sib(2,map,rs);
1944 output_byte(addr);
1945 }
1946 else
1947 {
1948 output_modrm(2,4,rt);
1949 output_sib(2,map,rs);
1950 output_w32(addr);
1951 }
1952 }
1953}
1954void emit_movswl(int addr, int rt)
1955{
1956 assem_debug("movswl %x,%%%s\n",addr,regname[rt]);
1957 output_byte(0x0F);
1958 output_byte(0xBF);
1959 output_modrm(0,5,rt);
1960 output_w32(addr-(int)out-4); // Note: rip-relative in 64-bit mode
1961}
1962void emit_movswl_indexed(int addr, int rs, int rt)
1963{
1964 assem_debug("movswl %x+%%%s,%%%s\n",addr,regname[rs],regname[rt]);
1965 output_byte(0x0F);
1966 output_byte(0xBF);
1967 output_modrm(2,rs,rt);
1968 output_w32(addr);
1969}
1970void emit_movswl_tlb(int addr, int map, int rt)
1971{
1972 if(map<0) emit_movswl(addr+(int)rdram-0x80000000, rt);
1973 else
1974 {
1975 assem_debug("addr32 movswl %x(,%%%s,4),%%%s\n",addr,regname[map],regname[rt]);
1976 output_byte(0x67);
1977 output_byte(0x0F);
1978 output_byte(0xBF);
1979 output_modrm(0,4,rt);
1980 output_sib(2,map,5);
1981 output_w32(addr);
1982 }
1983}
1984void emit_movzbl(int addr, int rt)
1985{
1986 assem_debug("movzbl %x,%%%s\n",addr,regname[rt]);
1987 output_byte(0x0F);
1988 output_byte(0xB6);
1989 output_modrm(0,5,rt);
1990 output_w32(addr-(int)out-4); // Note: rip-relative in 64-bit mode
1991}
1992void emit_movzbl_indexed(int addr, int rs, int rt)
1993{
1994 assem_debug("movzbl %x+%%%s,%%%s\n",addr,regname[rs],regname[rt]);
1995 output_byte(0x0F);
1996 output_byte(0xB6);
1997 output_modrm(2,rs,rt);
1998 output_w32(addr);
1999}
2000void emit_movzbl_tlb(int addr, int map, int rt)
2001{
2002 if(map<0) emit_movzbl(addr+(int)rdram-0x80000000, rt);
2003 else
2004 {
2005 assem_debug("addr32 movzbl %x(,%%%s,4),%%%s\n",addr,regname[map],regname[rt]);
2006 output_byte(0x67);
2007 output_byte(0x0F);
2008 output_byte(0xB6);
2009 output_modrm(0,4,rt);
2010 output_sib(2,map,5);
2011 output_w32(addr);
2012 }
2013}
2014void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
2015{
2016 if(map<0) emit_movzbl_indexed(addr+(int)rdram-0x80000000, rs, rt);
2017 else {
2018 assem_debug("addr32 movzbl %x(%%%s,%%%s,4),%%%s\n",addr,regname[rs],regname[map],regname[rt]);
2019 assert(rs!=ESP);
2020 output_byte(0x67);
2021 output_byte(0x0F);
2022 output_byte(0xB6);
2023 if(addr==0&&rs!=EBP) {
2024 output_modrm(0,4,rt);
2025 output_sib(2,map,rs);
2026 }
2027 else if(addr<128&&addr>=-128) {
2028 output_modrm(1,4,rt);
2029 output_sib(2,map,rs);
2030 output_byte(addr);
2031 }
2032 else
2033 {
2034 output_modrm(2,4,rt);
2035 output_sib(2,map,rs);
2036 output_w32(addr);
2037 }
2038 }
2039}
2040void emit_movzwl(int addr, int rt)
2041{
2042 assem_debug("movzwl %x,%%%s\n",addr,regname[rt]);
2043 output_byte(0x0F);
2044 output_byte(0xB7);
2045 output_modrm(0,5,rt);
2046 output_w32(addr-(int)out-4); // Note: rip-relative in 64-bit mode
2047}
2048void emit_movzwl_indexed(int addr, int rs, int rt)
2049{
2050 assem_debug("movzwl %x+%%%s,%%%s\n",addr,regname[rs],regname[rt]);
2051 output_byte(0x0F);
2052 output_byte(0xB7);
2053 output_modrm(2,rs,rt);
2054 output_w32(addr);
2055}
2056void emit_movzwl_tlb(int addr, int map, int rt)
2057{
2058 if(map<0) emit_movzwl(addr+(int)rdram-0x80000000, rt);
2059 else
2060 {
2061 assem_debug("addr32 movzwl %x(,%%%s,4),%%%s\n",addr,regname[map],regname[rt]);
2062 output_byte(0x67);
2063 output_byte(0x0F);
2064 output_byte(0xB7);
2065 output_modrm(0,4,rt);
2066 output_sib(2,map,5);
2067 output_w32(addr);
2068 }
2069}
2070void emit_movzwl_reg(int rs, int rt)
2071{
2072 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
2073 output_byte(0x0F);
2074 output_byte(0xB7);
2075 output_modrm(3,rs,rt);
2076}
2077
2078void emit_xchg(int rs, int rt)
2079{
2080 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
2081 if(rs==EAX) {
2082 output_byte(0x90+rt);
2083 }
2084 else
2085 {
2086 output_byte(0x87);
2087 output_modrm(3,rs,rt);
2088 }
2089}
2090void emit_writeword(int rt, int addr)
2091{
2092 assem_debug("movl %%%s,%x\n",regname[rt],addr);
2093 output_byte(0x89);
2094 output_modrm(0,5,rt);
2095 output_w32(addr-(int)out-4); // Note: rip-relative in 64-bit mode
2096}
2097void emit_writeword_indexed(int rt, int addr, int rs)
2098{
2099 assem_debug("mov %%%s,%x+%%%s\n",regname[rt],addr,regname[rs]);
2100 output_byte(0x89);
2101 if(addr<128&&addr>=-128) {
2102 output_modrm(1,rs,rt);
2103 if(rs==ESP) output_sib(0,4,4);
2104 output_byte(addr);
2105 }
2106 else
2107 {
2108 output_modrm(2,rs,rt);
2109 if(rs==ESP) output_sib(0,4,4);
2110 output_w32(addr);
2111 }
2112}
2113void emit_writeword_tlb(int rt, int addr, int map)
2114{
2115 if(map<0) {
2116 emit_writeword(rt, addr+(int)rdram-0x80000000);
2117 } else {
2118 emit_writeword_indexed(rt, addr, map);
2119 }
2120}
2121void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2122{
2123 if(map<0) emit_writeword_indexed(rt, addr+(int)rdram-0x80000000, rs);
2124 else {
2125 assem_debug("addr32 mov %%%s,%x(%%%s,%%%s,1)\n",regname[rt],addr,regname[rs],regname[map]);
2126 assert(rs!=ESP);
2127 output_byte(0x67);
2128 output_byte(0x89);
2129 if(addr==0&&rs!=EBP) {
2130 output_modrm(0,4,rt);
2131 output_sib(0,map,rs);
2132 }
2133 else if(addr<128&&addr>=-128) {
2134 output_modrm(1,4,rt);
2135 output_sib(0,map,rs);
2136 output_byte(addr);
2137 }
2138 else
2139 {
2140 output_modrm(2,4,rt);
2141 output_sib(0,map,rs);
2142 output_w32(addr);
2143 }
2144 }
2145}
2146void emit_writedword_tlb(int rh, int rl, int addr, int map)
2147{
2148 assert(rh>=0);
2149 if(map<0) {
2150 emit_writeword(rh, addr+(int)rdram-0x80000000);
2151 emit_writeword(rl, addr+(int)rdram-0x7FFFFFFC);
2152 }
2153 else {
2154 emit_writeword_indexed(rh, addr, map);
2155 emit_writeword_indexed(rl, addr+4, map);
2156 }
2157}
2158void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
2159{
2160 assert(rh>=0);
2161 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
2162 emit_writeword_indexed_tlb(rl, addr+4, rs, map, temp);
2163}
2164void emit_writehword(int rt, int addr)
2165{
2166 assem_debug("movw %%%s,%x\n",regname[rt]+1,addr);
2167 output_byte(0x66);
2168 output_byte(0x89);
2169 output_modrm(0,5,rt);
2170 output_w32(addr-(int)out-4); // Note: rip-relative in 64-bit mode
2171}
2172void emit_writehword_indexed(int rt, int addr, int rs)
2173{
2174 assem_debug("movw %%%s,%x+%%%s\n",regname[rt]+1,addr,regname[rs]);
2175 output_byte(0x66);
2176 output_byte(0x89);
2177 if(addr<128&&addr>=-128) {
2178 output_modrm(1,rs,rt);
2179 output_byte(addr);
2180 }
2181 else
2182 {
2183 output_modrm(2,rs,rt);
2184 output_w32(addr);
2185 }
2186}
2187void emit_writehword_tlb(int rt, int addr, int map)
2188{
2189 if(map<0) {
2190 emit_writehword(rt, addr+(int)rdram-0x80000000);
2191 } else {
2192 emit_writehword_indexed(rt, addr, map);
2193 }
2194}
2195void emit_writebyte(int rt, int addr)
2196{
2197 if(rt<4) {
2198 assem_debug("movb %%%cl,%x\n",regname[rt][1],addr);
2199 output_byte(0x88);
2200 output_modrm(0,5,rt);
2201 output_w32(addr-(int)out-4); // Note: rip-relative in 64-bit mode
2202 }
2203 else
2204 {
2205 emit_xchg(EAX,rt);
2206 emit_writebyte(EAX,addr);
2207 emit_xchg(EAX,rt);
2208 }
2209}
2210void emit_writebyte_indexed(int rt, int addr, int rs)
2211{
2212 if(rt<4) {
2213 assem_debug("movb %%%cl,%x+%%%s\n",regname[rt][1],addr,regname[rs]);
2214 output_byte(0x88);
2215 if(addr<128&&addr>=-128) {
2216 output_modrm(1,rs,rt);
2217 output_byte(addr);
2218 }
2219 else
2220 {
2221 output_modrm(2,rs,rt);
2222 output_w32(addr);
2223 }
2224 }
2225 else
2226 {
2227 emit_xchg(EAX,rt);
2228 emit_writebyte_indexed(EAX,addr,rs==EAX?rt:rs);
2229 emit_xchg(EAX,rt);
2230 }
2231}
2232void emit_writebyte_tlb(int rt, int addr, int map)
2233{
2234 if(map<0) {
2235 emit_writebyte(rt, addr+(int)rdram-0x80000000);
2236 } else {
2237 emit_writebyte_indexed(rt, addr, map);
2238 }
2239}
2240void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2241{
2242 if(map<0) emit_writebyte_indexed(rt, addr+(int)rdram-0x80000000, rs);
2243 else
2244 if(rt<4) {
2245 assem_debug("addr32 movb %%%cl,%x(%%%s,%%%s,1)\n",regname[rt][1],addr,regname[rs],regname[map]);
2246 assert(rs!=ESP);
2247 output_byte(0x67);
2248 output_byte(0x88);
2249 if(addr==0&&rs!=EBP) {
2250 output_modrm(0,4,rt);
2251 output_sib(0,map,rs);
2252 }
2253 else if(addr<128&&addr>=-128) {
2254 output_modrm(1,4,rt);
2255 output_sib(0,map,rs);
2256 output_byte(addr);
2257 }
2258 else
2259 {
2260 output_modrm(2,4,rt);
2261 output_sib(0,map,rs);
2262 output_w32(addr);
2263 }
2264 }
2265 else
2266 {
2267 emit_xchg(EAX,rt);
2268 emit_writebyte_indexed_tlb(EAX,addr,rs==EAX?rt:rs,map==EAX?rt:map,temp);
2269 emit_xchg(EAX,rt);
2270 }
2271}
2272void emit_writeword_imm(int imm, int addr)
2273{
2274 assem_debug("movl $%x,%x\n",imm,addr);
2275 output_byte(0xC7);
2276 output_modrm(0,5,0);
2277 output_w32(addr-(int)out-8); // Note: rip-relative in 64-bit mode
2278 output_w32(imm);
2279}
2280void emit_writeword_imm_esp(int imm, int addr)
2281{
2282 assem_debug("mov $%x,%x(%%esp)\n",imm,addr);
2283 assert(addr>=-128&&addr<128);
2284 output_byte(0xC7);
2285 output_modrm(!!addr,4,0);
2286 output_sib(0,4,4);
2287 if(addr) output_byte(addr);
2288 output_w32(imm);
2289}
2290void emit_writedword_imm32(int imm, int addr)
2291{
2292 assem_debug("movq $%x,%x\n",imm,addr);
2293 output_rex(1,0,0,0);
2294 output_byte(0xC7);
2295 output_modrm(0,5,0);
2296 output_w32(addr-(int)out-8); // Note: rip-relative in 64-bit mode
2297 output_w32(imm); // Note: This 32-bit value will be sign extended
2298}
2299void emit_writebyte_imm(int imm, int addr)
2300{
2301 assem_debug("movb $%x,%x\n",imm,addr);
2302 assert(imm>=-128&&imm<128);
2303 output_byte(0xC6);
2304 output_modrm(0,5,0);
2305 output_w32(addr-(int)out-5); // Note: rip-relative in 64-bit mode
2306 output_byte(imm);
2307}
2308
2309void emit_mul(int rs)
2310{
2311 assem_debug("mul %%%s\n",regname[rs]);
2312 output_byte(0xF7);
2313 output_modrm(3,rs,4);
2314}
2315void emit_imul(int rs)
2316{
2317 assem_debug("imul %%%s\n",regname[rs]);
2318 output_byte(0xF7);
2319 output_modrm(3,rs,5);
2320}
2321void emit_div(int rs)
2322{
2323 assem_debug("div %%%s\n",regname[rs]);
2324 output_byte(0xF7);
2325 output_modrm(3,rs,6);
2326}
2327void emit_idiv(int rs)
2328{
2329 assem_debug("idiv %%%s\n",regname[rs]);
2330 output_byte(0xF7);
2331 output_modrm(3,rs,7);
2332}
2333void emit_cdq()
2334{
2335 assem_debug("cdq\n");
2336 output_byte(0x99);
2337}
2338
2339// Load 2 immediates optimizing for small code size
2340void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2341{
2342 emit_movimm(imm1,rt1);
2343 if(imm2-imm1<128&&imm2-imm1>=-128) emit_addimm(rt1,imm2-imm1,rt2);
2344 else emit_movimm(imm2,rt2);
2345}
2346
2347// special case for checking pending_exception
2348void emit_cmpmem_imm_byte(int addr,int imm)
2349{
2350 assert(imm<128&&imm>=-127);
2351 assem_debug("cmpb $%d,%x\n",imm,addr);
2352 output_byte(0x80);
2353 output_modrm(0,5,7);
2354 output_w32(addr-(int)out-5); // Note: rip-relative in 64-bit mode
2355 output_byte(imm);
2356}
2357
2358// special case for checking invalid_code
2359void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2360{
2361 assert(imm<128&&imm>=-127);
2362 assert(r>=0&&r<8);
2363 emit_shrimm(r,12,r);
2364 assem_debug("cmp $%d,%x+%%%s\n",imm,addr,regname[r]);
2365 output_byte(0x80);
2366 output_modrm(2,r,7);
2367 output_w32(addr);
2368 output_byte(imm);
2369}
2370
2371// special case for checking hash_table
2372void emit_cmpmem_indexed(int addr,int rs,int rt)
2373{
2374 assert(rs>=0&&rs<8);
2375 assert(rt>=0&&rt<8);
2376 assem_debug("cmp %x+%%%s,%%%s\n",addr,regname[rs],regname[rt]);
2377 output_byte(0x39);
2378 output_modrm(2,rs,rt);
2379 output_w32(addr);
2380}
2381
2382// special case for checking memory_map in verify_mapping
2383void emit_cmpmem(int addr,int rt)
2384{
2385 assert(rt>=0&&rt<8);
2386 assem_debug("cmp %x,%%%s\n",addr,regname[rt]);
2387 output_byte(0x39);
2388 output_modrm(0,5,rt);
2389 output_w32((int)addr-(int)out-4); // Note: rip-relative in 64-bit mode
2390}
2391
2392// Used to preload hash table entries
2393void emit_prefetch(void *addr)
2394{
2395 assem_debug("prefetch %x\n",(int)addr);
2396 output_byte(0x0F);
2397 output_byte(0x18);
2398 output_modrm(0,5,1);
2399 output_w32((int)addr-(int)out-4); // Note: rip-relative in 64-bit mode
2400}
2401
2402/*void emit_submem(int r,int addr)
2403{
2404 assert(r>=0&&r<8);
2405 assem_debug("sub %x,%%%s\n",addr,regname[r]);
2406 output_byte(0x2B);
2407 output_modrm(0,5,r);
2408 output_w32((int)addr);
2409}*/
2410
2411void emit_flds(int r)
2412{
2413 assem_debug("flds (%%%s)\n",regname[r]);
2414 output_byte(0xd9);
2415 if(r!=EBP) output_modrm(0,r,0);
2416 else {output_modrm(1,EBP,0);output_byte(0);}
2417}
2418void emit_fldl(int r)
2419{
2420 assem_debug("fldl (%%%s)\n",regname[r]);
2421 output_byte(0xdd);
2422 if(r!=EBP) output_modrm(0,r,0);
2423 else {output_modrm(1,EBP,0);output_byte(0);}
2424}
2425void emit_fucomip(u_int r)
2426{
2427 assem_debug("fucomip %d\n",r);
2428 assert(r<8);
2429 output_byte(0xdf);
2430 output_byte(0xe8+r);
2431}
2432void emit_fchs()
2433{
2434 assem_debug("fchs\n");
2435 output_byte(0xd9);
2436 output_byte(0xe0);
2437}
2438void emit_fabs()
2439{
2440 assem_debug("fabs\n");
2441 output_byte(0xd9);
2442 output_byte(0xe1);
2443}
2444void emit_fsqrt()
2445{
2446 assem_debug("fsqrt\n");
2447 output_byte(0xd9);
2448 output_byte(0xfa);
2449}
2450void emit_fadds(int r)
2451{
2452 assem_debug("fadds (%%%s)\n",regname[r]);
2453 output_byte(0xd8);
2454 if(r!=EBP) output_modrm(0,r,0);
2455 else {output_modrm(1,EBP,0);output_byte(0);}
2456}
2457void emit_faddl(int r)
2458{
2459 assem_debug("faddl (%%%s)\n",regname[r]);
2460 output_byte(0xdc);
2461 if(r!=EBP) output_modrm(0,r,0);
2462 else {output_modrm(1,EBP,0);output_byte(0);}
2463}
2464void emit_fadd(int r)
2465{
2466 assem_debug("fadd st%d\n",r);
2467 output_byte(0xd8);
2468 output_byte(0xc0+r);
2469}
2470void emit_fsubs(int r)
2471{
2472 assem_debug("fsubs (%%%s)\n",regname[r]);
2473 output_byte(0xd8);
2474 if(r!=EBP) output_modrm(0,r,4);
2475 else {output_modrm(1,EBP,4);output_byte(0);}
2476}
2477void emit_fsubl(int r)
2478{
2479 assem_debug("fsubl (%%%s)\n",regname[r]);
2480 output_byte(0xdc);
2481 if(r!=EBP) output_modrm(0,r,4);
2482 else {output_modrm(1,EBP,4);output_byte(0);}
2483}
2484void emit_fsub(int r)
2485{
2486 assem_debug("fsub st%d\n",r);
2487 output_byte(0xd8);
2488 output_byte(0xe0+r);
2489}
2490void emit_fmuls(int r)
2491{
2492 assem_debug("fmuls (%%%s)\n",regname[r]);
2493 output_byte(0xd8);
2494 if(r!=EBP) output_modrm(0,r,1);
2495 else {output_modrm(1,EBP,1);output_byte(0);}
2496}
2497void emit_fmull(int r)
2498{
2499 assem_debug("fmull (%%%s)\n",regname[r]);
2500 output_byte(0xdc);
2501 if(r!=EBP) output_modrm(0,r,1);
2502 else {output_modrm(1,EBP,1);output_byte(0);}
2503}
2504void emit_fmul(int r)
2505{
2506 assem_debug("fmul st%d\n",r);
2507 output_byte(0xd8);
2508 output_byte(0xc8+r);
2509}
2510void emit_fdivs(int r)
2511{
2512 assem_debug("fdivs (%%%s)\n",regname[r]);
2513 output_byte(0xd8);
2514 if(r!=EBP) output_modrm(0,r,6);
2515 else {output_modrm(1,EBP,6);output_byte(0);}
2516}
2517void emit_fdivl(int r)
2518{
2519 assem_debug("fdivl (%%%s)\n",regname[r]);
2520 output_byte(0xdc);
2521 if(r!=EBP) output_modrm(0,r,6);
2522 else {output_modrm(1,EBP,6);output_byte(0);}
2523}
2524void emit_fdiv(int r)
2525{
2526 assem_debug("fdiv st%d\n",r);
2527 output_byte(0xd8);
2528 output_byte(0xf0+r);
2529}
2530void emit_fpop()
2531{
2532 // fstp st(0)
2533 assem_debug("fpop\n");
2534 output_byte(0xdd);
2535 output_byte(0xd8);
2536}
2537void emit_fildl(int r)
2538{
2539 assem_debug("fildl (%%%s)\n",regname[r]);
2540 output_byte(0xdb);
2541 if(r!=EBP) output_modrm(0,r,0);
2542 else {output_modrm(1,EBP,0);output_byte(0);}
2543}
2544void emit_fildll(int r)
2545{
2546 assem_debug("fildll (%%%s)\n",regname[r]);
2547 output_byte(0xdf);
2548 if(r!=EBP) output_modrm(0,r,5);
2549 else {output_modrm(1,EBP,5);output_byte(0);}
2550}
2551void emit_fistpl(int r)
2552{
2553 assem_debug("fistpl (%%%s)\n",regname[r]);
2554 output_byte(0xdb);
2555 if(r!=EBP) output_modrm(0,r,3);
2556 else {output_modrm(1,EBP,3);output_byte(0);}
2557}
2558void emit_fistpll(int r)
2559{
2560 assem_debug("fistpll (%%%s)\n",regname[r]);
2561 output_byte(0xdf);
2562 if(r!=EBP) output_modrm(0,r,7);
2563 else {output_modrm(1,EBP,7);output_byte(0);}
2564}
2565void emit_fstps(int r)
2566{
2567 assem_debug("fstps (%%%s)\n",regname[r]);
2568 output_byte(0xd9);
2569 if(r!=EBP) output_modrm(0,r,3);
2570 else {output_modrm(1,EBP,3);output_byte(0);}
2571}
2572void emit_fstpl(int r)
2573{
2574 assem_debug("fstpl (%%%s)\n",regname[r]);
2575 output_byte(0xdd);
2576 if(r!=EBP) output_modrm(0,r,3);
2577 else {output_modrm(1,EBP,3);output_byte(0);}
2578}
2579void emit_fnstcw_stack()
2580{
2581 assem_debug("fnstcw (%%esp)\n");
2582 output_byte(0xd9);
2583 output_modrm(0,4,7);
2584 output_sib(0,4,4);
2585}
2586void emit_fldcw_stack()
2587{
2588 assem_debug("fldcw (%%esp)\n");
2589 output_byte(0xd9);
2590 output_modrm(0,4,5);
2591 output_sib(0,4,4);
2592}
2593void emit_fldcw_indexed(int addr,int r)
2594{
2595 assem_debug("fldcw %x(%%%s)\n",addr,regname[r]);
2596 output_byte(0xd9);
2597 output_modrm(0,4,5);
2598 output_sib(1,r,5);
2599 output_w32(addr);
2600}
2601void emit_fldcw(int addr)
2602{
2603 assem_debug("fldcw %x\n",addr);
2604 output_byte(0xd9);
2605 output_modrm(0,5,5);
2606 output_w32(addr-(int)out-4); // Note: rip-relative in 64-bit mode
2607}
2608void emit_movss_load(u_int addr,u_int ssereg)
2609{
2610 assem_debug("movss (%%%s),xmm%d\n",regname[addr],ssereg);
2611 assert(ssereg<8);
2612 output_byte(0xf3);
2613 output_byte(0x0f);
2614 output_byte(0x10);
2615 if(addr!=EBP) output_modrm(0,addr,ssereg);
2616 else {output_modrm(1,EBP,ssereg);output_byte(0);}
2617}
2618void emit_movsd_load(u_int addr,u_int ssereg)
2619{
2620 assem_debug("movsd (%%%s),xmm%d\n",regname[addr],ssereg);
2621 assert(ssereg<8);
2622 output_byte(0xf2);
2623 output_byte(0x0f);
2624 output_byte(0x10);
2625 if(addr!=EBP) output_modrm(0,addr,ssereg);
2626 else {output_modrm(1,EBP,ssereg);output_byte(0);}
2627}
2628void emit_movd_store(u_int ssereg,u_int addr)
2629{
2630 assem_debug("movd xmm%d,(%%%s)\n",ssereg,regname[addr]);
2631 assert(ssereg<8);
2632 output_byte(0x66);
2633 output_byte(0x0f);
2634 output_byte(0x7e);
2635 if(addr!=EBP) output_modrm(0,addr,ssereg);
2636 else {output_modrm(1,EBP,ssereg);output_byte(0);}
2637}
2638void emit_cvttps2dq(u_int ssereg1,u_int ssereg2)
2639{
2640 assem_debug("cvttps2dq xmm%d,xmm%d\n",ssereg1,ssereg2);
2641 assert(ssereg1<8);
2642 assert(ssereg2<8);
2643 output_byte(0xf3);
2644 output_byte(0x0f);
2645 output_byte(0x5b);
2646 output_modrm(3,ssereg1,ssereg2);
2647}
2648void emit_cvttpd2dq(u_int ssereg1,u_int ssereg2)
2649{
2650 assem_debug("cvttpd2dq xmm%d,xmm%d\n",ssereg1,ssereg2);
2651 assert(ssereg1<8);
2652 assert(ssereg2<8);
2653 output_byte(0x66);
2654 output_byte(0x0f);
2655 output_byte(0xe6);
2656 output_modrm(3,ssereg1,ssereg2);
2657}
2658
2659unsigned int count_bits(u_int reglist)
2660{
2661 int count=0;
2662 while(reglist)
2663 {
2664 count+=reglist&1;
2665 reglist>>=1;
2666 }
2667 return count;
2668}
2669
2670// Save registers before function call
2671// This code is executed infrequently so we try to minimize code size
2672// by pushing registers onto the stack instead of writing them to their
2673// usual locations
2674void save_regs(u_int reglist)
2675{
2676 int hr;
2677 int count=count_bits(reglist);
2678 if(count) {
2679 for(hr=0;hr<HOST_REGS;hr++) {
2680 if(hr!=EXCLUDE_REG) {
2681 if((reglist>>hr)&1) {
2682 emit_pushreg(hr);
2683 }
2684 }
2685 }
2686 }
2687 emit_addimm(ESP,-(8-count)*8,ESP);
2688}
2689// Restore registers after function call
2690void restore_regs(u_int reglist)
2691{
2692 int hr;
2693 int count=count_bits(reglist);
2694 emit_addimm(ESP,(8-count)*8,ESP);
2695 if(count) {
2696 for(hr=HOST_REGS-1;hr>=0;hr--) {
2697 if(hr!=EXCLUDE_REG) {
2698 if((reglist>>hr)&1) {
2699 emit_popreg(hr);
2700 }
2701 }
2702 }
2703 }
2704}
2705
2706/* Stubs/epilogue */
2707
2708emit_extjump2(int addr, int target, int linker)
2709{
2710 u_char *ptr=(u_char *)addr;
2711 if(*ptr==0x0f)
2712 {
2713 assert(ptr[1]>=0x80&&ptr[1]<=0x8f);
2714 addr+=2;
2715 }
2716 else
2717 {
2718 assert(*ptr==0xe8||*ptr==0xe9);
2719 addr++;
2720 }
2721 emit_movimm(target,EAX);
2722 emit_movimm(addr,EBX);
2723 //assert(addr>=0x7000000&&addr<0x7FFFFFF);
2724 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2725//DEBUG >
2726#ifdef DEBUG_CYCLE_COUNT
2727 emit_readword((int)&last_count,ECX);
2728 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2729 emit_readword((int)&next_interupt,ECX);
2730 emit_writeword(HOST_CCREG,(int)&Count);
2731 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2732 emit_writeword(ECX,(int)&last_count);
2733#endif
2734//DEBUG <
2735 emit_jmp(linker);
2736}
2737
2738emit_extjump(int addr, int target)
2739{
2740 emit_extjump2(addr, target, (int)dyna_linker);
2741}
2742emit_extjump_ds(int addr, int target)
2743{
2744 emit_extjump2(addr, target, (int)dyna_linker_ds);
2745}
2746
2747do_readstub(int n)
2748{
2749 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2750 set_jump_target(stubs[n][1],(int)out);
2751 int type=stubs[n][0];
2752 int i=stubs[n][3];
2753 int rs=stubs[n][4];
2754 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2755 u_int reglist=stubs[n][7];
2756 signed char *i_regmap=i_regs->regmap;
2757 int addr=get_reg(i_regmap,AGEN1+(i&1));
2758 int rth,rt;
2759 int ds;
2760 if(itype[i]==C1LS||itype[i]==LOADLR) {
2761 rth=get_reg(i_regmap,FTEMP|64);
2762 rt=get_reg(i_regmap,FTEMP);
2763 }else{
2764 rth=get_reg(i_regmap,rt1[i]|64);
2765 rt=get_reg(i_regmap,rt1[i]);
2766 }
2767 assert(rs>=0);
2768 assert(rt>=0);
2769 if(addr<0) addr=rt;
2770 assert(addr>=0);
2771 int ftable=0;
2772 if(type==LOADB_STUB||type==LOADBU_STUB)
2773 ftable=(int)readmemb;
2774 if(type==LOADH_STUB||type==LOADHU_STUB)
2775 ftable=(int)readmemh;
2776 if(type==LOADW_STUB)
2777 ftable=(int)readmem;
2778 if(type==LOADD_STUB)
2779 ftable=(int)readmemd;
2780 emit_writeword(rs,(int)&address);
2781 emit_shrimm(rs,16,addr);
2782 emit_movmem_indexedx8(ftable,addr,addr);
2783 save_regs(reglist);
2784 ds=i_regs!=&regs[i];
2785 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2786 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)),i);
2787 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2788
2789 int temp;
2790 int cc=get_reg(i_regmap,CCREG);
2791 if(cc<0) {
2792 if(addr==HOST_CCREG)
2793 {
2794 cc=0;temp=1;
2795 assert(cc!=HOST_CCREG);
2796 assert(temp!=HOST_CCREG);
2797 emit_loadreg(CCREG,cc);
2798 }
2799 else
2800 {
2801 cc=HOST_CCREG;
2802 emit_loadreg(CCREG,cc);
2803 temp=!addr;
2804 }
2805 }
2806 else
2807 {
2808 temp=!addr;
2809 }
2810 emit_readword((int)&last_count,temp);
2811 emit_addimm(cc,CLOCK_DIVIDER*(stubs[n][6]+1),cc);
2812 emit_writeword_imm_esp(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,0);
2813 emit_add(cc,temp,cc);
2814 emit_writeword(cc,(int)&Count);
2815 emit_callreg(addr);
2816 // We really shouldn't need to update the count here,
2817 // but not doing so causes random crashes...
2818 emit_readword((int)&Count,HOST_CCREG);
2819 emit_readword((int)&next_interupt,ECX);
2820 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*(stubs[n][6]+1),HOST_CCREG);
2821 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2822 emit_writeword(ECX,(int)&last_count);
2823 emit_storereg(CCREG,HOST_CCREG);
2824 restore_regs(reglist);
2825 if((cc=get_reg(i_regmap,CCREG))>=0) {
2826 emit_loadreg(CCREG,cc);
2827 }
2828 if(type==LOADB_STUB)
2829 emit_movsbl((int)&readmem_dword,rt);
2830 if(type==LOADBU_STUB)
2831 emit_movzbl((int)&readmem_dword,rt);
2832 if(type==LOADH_STUB)
2833 emit_movswl((int)&readmem_dword,rt);
2834 if(type==LOADHU_STUB)
2835 emit_movzwl((int)&readmem_dword,rt);
2836 if(type==LOADW_STUB)
2837 emit_readword((int)&readmem_dword,rt);
2838 if(type==LOADD_STUB) {
2839 emit_readword((int)&readmem_dword,rt);
2840 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2841 }
2842 emit_jmp(stubs[n][2]); // return address
2843}
2844
2845inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2846{
2847 assem_debug("inline_readstub\n");
2848 int rs=get_reg(regmap,target);
2849 int rth=get_reg(regmap,target|64);
2850 int rt=get_reg(regmap,target);
2851 assert(rs>=0);
2852 assert(rt>=0);
2853 int ftable=0;
2854 if(type==LOADB_STUB||type==LOADBU_STUB)
2855 ftable=(int)readmemb;
2856 if(type==LOADH_STUB||type==LOADHU_STUB)
2857 ftable=(int)readmemh;
2858 if(type==LOADW_STUB)
2859 ftable=(int)readmem;
2860 if(type==LOADD_STUB)
2861 ftable=(int)readmemd;
2862 #ifdef HOST_IMM_ADDR32
2863 emit_writeword_imm(addr,(int)&address);
2864 #else
2865 emit_writeword(rs,(int)&address);
2866 #endif
2867 save_regs(reglist);
2868 int cc=get_reg(regmap,CCREG);
2869 int temp;
2870 if(cc<0) {
2871 if(rs==HOST_CCREG)
2872 {
2873 cc=0;temp=1;
2874 assert(cc!=HOST_CCREG);
2875 assert(temp!=HOST_CCREG);
2876 emit_loadreg(CCREG,cc);
2877 }
2878 else
2879 {
2880 cc=HOST_CCREG;
2881 emit_loadreg(CCREG,cc);
2882 temp=!rs;
2883 }
2884 }
2885 else
2886 {
2887 temp=!rs;
2888 }
2889 emit_readword((int)&last_count,temp);
2890 emit_addimm(cc,CLOCK_DIVIDER*(adj+1),cc);
2891 emit_add(cc,temp,cc);
2892 emit_writeword(cc,(int)&Count);
2893 if((signed int)addr>=(signed int)0xC0000000) {
2894 // Pagefault address
2895 int ds=regmap!=regs[i].regmap;
2896 emit_writeword_imm_esp(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,0);
2897 }
2898 emit_call(((uint64_t *)ftable)[addr>>16]);
2899 // We really shouldn't need to update the count here,
2900 // but not doing so causes random crashes...
2901 emit_readword((int)&Count,HOST_CCREG);
2902 emit_readword((int)&next_interupt,ECX);
2903 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*(adj+1),HOST_CCREG);
2904 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2905 emit_writeword(ECX,(int)&last_count);
2906 emit_storereg(CCREG,HOST_CCREG);
2907 restore_regs(reglist);
2908 if((cc=get_reg(regmap,CCREG))>=0) {
2909 emit_loadreg(CCREG,cc);
2910 }
2911 if(type==LOADB_STUB)
2912 emit_movsbl((int)&readmem_dword,rt);
2913 if(type==LOADBU_STUB)
2914 emit_movzbl((int)&readmem_dword,rt);
2915 if(type==LOADH_STUB)
2916 emit_movswl((int)&readmem_dword,rt);
2917 if(type==LOADHU_STUB)
2918 emit_movzwl((int)&readmem_dword,rt);
2919 if(type==LOADW_STUB)
2920 emit_readword((int)&readmem_dword,rt);
2921 if(type==LOADD_STUB) {
2922 emit_readword((int)&readmem_dword,rt);
2923 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2924 }
2925}
2926
2927do_writestub(int n)
2928{
2929 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2930 set_jump_target(stubs[n][1],(int)out);
2931 int type=stubs[n][0];
2932 int i=stubs[n][3];
2933 int rs=stubs[n][4];
2934 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2935 u_int reglist=stubs[n][7];
2936 signed char *i_regmap=i_regs->regmap;
2937 int addr=get_reg(i_regmap,AGEN1+(i&1));
2938 int rth,rt,r;
2939 int ds;
2940 if(itype[i]==C1LS) {
2941 rth=get_reg(i_regmap,FTEMP|64);
2942 rt=get_reg(i_regmap,r=FTEMP);
2943 }else{
2944 rth=get_reg(i_regmap,rs2[i]|64);
2945 rt=get_reg(i_regmap,r=rs2[i]);
2946 }
2947 assert(rs>=0);
2948 assert(rt>=0);
2949 if(addr<0) addr=get_reg(i_regmap,-1);
2950 assert(addr>=0);
2951 int ftable=0;
2952 if(type==STOREB_STUB)
2953 ftable=(int)writememb;
2954 if(type==STOREH_STUB)
2955 ftable=(int)writememh;
2956 if(type==STOREW_STUB)
2957 ftable=(int)writemem;
2958 if(type==STORED_STUB)
2959 ftable=(int)writememd;
2960 emit_writeword(rs,(int)&address);
2961 emit_shrimm(rs,16,addr);
2962 emit_movmem_indexedx8(ftable,addr,addr);
2963 if(type==STOREB_STUB)
2964 emit_writebyte(rt,(int)&byte);
2965 if(type==STOREH_STUB)
2966 emit_writehword(rt,(int)&hword);
2967 if(type==STOREW_STUB)
2968 emit_writeword(rt,(int)&word);
2969 if(type==STORED_STUB) {
2970 emit_writeword(rt,(int)&dword);
2971 emit_writeword(r?rth:rt,(int)&dword+4);
2972 }
2973 save_regs(reglist);
2974 ds=i_regs!=&regs[i];
2975 int real_rs=get_reg(i_regmap,rs1[i]);
2976 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)),i);
2977 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2978
2979 int temp;
2980 int cc=get_reg(i_regmap,CCREG);
2981 if(cc<0) {
2982 if(addr==HOST_CCREG)
2983 {
2984 cc=0;temp=1;
2985 assert(cc!=HOST_CCREG);
2986 assert(temp!=HOST_CCREG);
2987 emit_loadreg(CCREG,cc);
2988 }
2989 else
2990 {
2991 cc=HOST_CCREG;
2992 emit_loadreg(CCREG,cc);
2993 temp=!addr;
2994 }
2995 }
2996 else
2997 {
2998 temp=!addr;
2999 }
3000 emit_readword((int)&last_count,temp);
3001 emit_addimm(cc,CLOCK_DIVIDER*(stubs[n][6]+1),cc);
3002 emit_writeword_imm_esp(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,0);
3003 emit_add(cc,temp,cc);
3004 emit_writeword(cc,(int)&Count);
3005 emit_callreg(addr);
3006 emit_readword((int)&Count,HOST_CCREG);
3007 emit_readword((int)&next_interupt,ECX);
3008 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*(stubs[n][6]+1),HOST_CCREG);
3009 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3010 emit_writeword(ECX,(int)&last_count);
3011 emit_storereg(CCREG,HOST_CCREG);
3012 restore_regs(reglist);
3013 if((cc=get_reg(i_regmap,CCREG))>=0) {
3014 emit_loadreg(CCREG,cc);
3015 }
3016 emit_jmp(stubs[n][2]); // return address
3017}
3018
3019inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
3020{
3021 assem_debug("inline_writestub\n");
3022 int rs=get_reg(regmap,-1);
3023 int rth=get_reg(regmap,target|64);
3024 int rt=get_reg(regmap,target);
3025 assert(rs>=0);
3026 assert(rt>=0);
3027 int ftable=0;
3028 if(type==STOREB_STUB)
3029 ftable=(int)writememb;
3030 if(type==STOREH_STUB)
3031 ftable=(int)writememh;
3032 if(type==STOREW_STUB)
3033 ftable=(int)writemem;
3034 if(type==STORED_STUB)
3035 ftable=(int)writememd;
3036 emit_writeword(rs,(int)&address);
3037 if(type==STOREB_STUB)
3038 emit_writebyte(rt,(int)&byte);
3039 if(type==STOREH_STUB)
3040 emit_writehword(rt,(int)&hword);
3041 if(type==STOREW_STUB)
3042 emit_writeword(rt,(int)&word);
3043 if(type==STORED_STUB) {
3044 emit_writeword(rt,(int)&dword);
3045 emit_writeword(target?rth:rt,(int)&dword+4);
3046 }
3047 save_regs(reglist);
3048 int cc=get_reg(regmap,CCREG);
3049 int temp;
3050 if(cc<0) {
3051 if(rs==HOST_CCREG)
3052 {
3053 cc=0;temp=1;
3054 assert(cc!=HOST_CCREG);
3055 assert(temp!=HOST_CCREG);
3056 emit_loadreg(CCREG,cc);
3057 }
3058 else
3059 {
3060 cc=HOST_CCREG;
3061 emit_loadreg(CCREG,cc);
3062 temp=!rs;
3063 }
3064 }
3065 else
3066 {
3067 temp=!rs;
3068 }
3069 emit_readword((int)&last_count,temp);
3070 emit_addimm(cc,CLOCK_DIVIDER*(adj+1),cc);
3071 emit_add(cc,temp,cc);
3072 emit_writeword(cc,(int)&Count);
3073 if((signed int)addr>=(signed int)0xC0000000) {
3074 // Pagefault address
3075 int ds=regmap!=regs[i].regmap;
3076 emit_writeword_imm_esp(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,0);
3077 }
3078 emit_call(((uint64_t *)ftable)[addr>>16]);
3079 emit_readword((int)&Count,HOST_CCREG);
3080 emit_readword((int)&next_interupt,ECX);
3081 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*(adj+1),HOST_CCREG);
3082 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3083 emit_writeword(ECX,(int)&last_count);
3084 emit_storereg(CCREG,HOST_CCREG);
3085 restore_regs(reglist);
3086 if((cc=get_reg(regmap,CCREG))>=0) {
3087 emit_loadreg(CCREG,cc);
3088 }
3089}
3090
3091do_unalignedwritestub(int n)
3092{
3093 set_jump_target(stubs[n][1],(int)out);
3094 output_byte(0xCC);
3095 emit_jmp(stubs[n][2]); // return address
3096}
3097
3098void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
3099{
3100 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
3101}
3102
3103do_invstub(int n)
3104{
3105 u_int reglist=stubs[n][3];
3106 set_jump_target(stubs[n][1],(int)out);
3107 save_regs(reglist);
3108 if(stubs[n][4]!=EDI) emit_mov(stubs[n][4],EDI);
3109 emit_call((int)&invalidate_block);
3110 restore_regs(reglist);
3111 emit_jmp(stubs[n][2]); // return address
3112}
3113
3114int do_dirty_stub(int i)
3115{
3116 assem_debug("do_dirty_stub %x\n",start+i*4);
3117 emit_movimm((int)start<(int)0xC0000000?(int)source:(int)start,EAX);
3118 emit_movimm((int)copy,EBX);
3119 emit_movimm(slen*4,ECX);
3120 emit_movimm(start+i*4,12);
3121 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3122 int entry=(int)out;
3123 load_regs_entry(i);
3124 if(entry==(int)out) entry=instr_addr[i];
3125 emit_jmp(instr_addr[i]);
3126 return entry;
3127}
3128
3129void do_dirty_stub_ds()
3130{
3131 emit_movimm((int)start<(int)0xC0000000?(int)source:(int)start,EAX);
3132 emit_movimm((int)copy,EBX);
3133 emit_movimm(slen*4,ECX);
3134 emit_movimm(start+1,12);
3135 emit_call((int)&verify_code_ds);
3136}
3137
3138do_cop1stub(int n)
3139{
3140 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3141 set_jump_target(stubs[n][1],(int)out);
3142 int i=stubs[n][3];
3143 int rs=stubs[n][4];
3144 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3145 int ds=stubs[n][6];
3146 if(!ds) {
3147 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3148 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3149 }
3150 //else {printf("fp exception in delay slot\n");}
3151 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3152 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3153 emit_movimm(start+(i-ds)*4,EAX); // Get PC
3154 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
3155 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3156}
3157
3158/* TLB */
3159
3160int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3161{
3162 if(c) {
3163 if((signed int)addr>=(signed int)0xC0000000) {
3164 emit_readword((int)(memory_map+(addr>>12)),map);
3165 }
3166 else
3167 return -1; // No mapping
3168 }
3169 else {
3170 if(s!=map) emit_mov(s,map);
3171 emit_shrimm(map,12,map);
3172 // Schedule this while we wait on the load
3173 //if(x) emit_xorimm(addr,x,addr);
3174 if(shift>=0) emit_lea8(s,shift);
3175 if(~a) emit_andimm(s,a,ar);
3176 emit_movmem_indexedx4((int)memory_map,map,map);
3177 }
3178 return map;
3179}
3180int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3181{
3182 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3183 emit_test(map,map);
3184 *jaddr=(int)out;
3185 emit_js(0);
3186 }
3187 return map;
3188}
3189
3190int gen_tlb_addr_r(int ar, int map) {
3191 if(map>=0) {
3192 emit_leairrx4(0,ar,map,ar);
3193 }
3194}
3195
3196int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3197{
3198 if(c) {
3199 if(addr<0x80800000||addr>=0xC0000000) {
3200 emit_readword((int)(memory_map+(addr>>12)),map);
3201 }
3202 else
3203 return -1; // No mapping
3204 }
3205 else {
3206 if(s!=map) emit_mov(s,map);
3207 //if(s!=ar) emit_mov(s,ar);
3208 emit_shrimm(map,12,map);
3209 // Schedule this while we wait on the load
3210 //if(x) emit_xorimm(s,x,addr);
3211 emit_movmem_indexedx4((int)memory_map,map,map);
3212 }
3213 emit_shlimm(map,2,map);
3214 return map;
3215}
3216int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3217{
3218 if(!c||addr<0x80800000||addr>=0xC0000000) {
3219 *jaddr=(int)out;
3220 emit_jc(0);
3221 }
3222}
3223
3224int gen_tlb_addr_w(int ar, int map) {
3225 if(map>=0) {
3226 emit_leairrx1(0,ar,map,ar);
3227 }
3228}
3229
3230// We don't need this for x86
3231generate_map_const(u_int addr,int reg) {
3232 // void *mapaddr=memory_map+(addr>>12);
3233}
3234
3235/* Special assem */
3236
3237void shift_assemble_x86(int i,struct regstat *i_regs)
3238{
3239 if(rt1[i]) {
3240 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3241 {
3242 char s,t,shift;
3243 t=get_reg(i_regs->regmap,rt1[i]);
3244 s=get_reg(i_regs->regmap,rs1[i]);
3245 shift=get_reg(i_regs->regmap,rs2[i]);
3246 if(t>=0){
3247 if(rs1[i]==0)
3248 {
3249 emit_zeroreg(t);
3250 }
3251 else if(rs2[i]==0)
3252 {
3253 assert(s>=0);
3254 if(s!=t) emit_mov(s,t);
3255 }
3256 else
3257 {
3258 char temp=get_reg(i_regs->regmap,-1);
3259 assert(s>=0);
3260 if(t==ECX&&s!=ECX) {
3261 if(shift!=ECX) emit_mov(shift,ECX);
3262 if(rt1[i]==rs2[i]) {shift=temp;}
3263 if(s!=shift) emit_mov(s,shift);
3264 }
3265 else
3266 {
3267 if(rt1[i]==rs2[i]) {emit_mov(shift,temp);shift=temp;}
3268 if(s!=t) emit_mov(s,t);
3269 if(shift!=ECX) {
3270 if(i_regs->regmap[ECX]<0)
3271 emit_mov(shift,ECX);
3272 else
3273 emit_xchg(shift,ECX);
3274 }
3275 }
3276 if(opcode2[i]==4) // SLLV
3277 {
3278 emit_shlcl(t==ECX?shift:t);
3279 }
3280 if(opcode2[i]==6) // SRLV
3281 {
3282 emit_shrcl(t==ECX?shift:t);
3283 }
3284 if(opcode2[i]==7) // SRAV
3285 {
3286 emit_sarcl(t==ECX?shift:t);
3287 }
3288 if(shift!=ECX&&i_regs->regmap[ECX]>=0) emit_xchg(shift,ECX);
3289 }
3290 }
3291 } else { // DSLLV/DSRLV/DSRAV
3292 char sh,sl,th,tl,shift;
3293 th=get_reg(i_regs->regmap,rt1[i]|64);
3294 tl=get_reg(i_regs->regmap,rt1[i]);
3295 sh=get_reg(i_regs->regmap,rs1[i]|64);
3296 sl=get_reg(i_regs->regmap,rs1[i]);
3297 shift=get_reg(i_regs->regmap,rs2[i]);
3298 if(tl>=0){
3299 if(rs1[i]==0)
3300 {
3301 emit_zeroreg(tl);
3302 if(th>=0) emit_zeroreg(th);
3303 }
3304 else if(rs2[i]==0)
3305 {
3306 assert(sl>=0);
3307 if(sl!=tl) emit_mov(sl,tl);
3308 if(th>=0&&sh!=th) emit_mov(sh,th);
3309 }
3310 else
3311 {
3312 // FIXME: What if shift==tl ?
3313 assert(shift!=tl);
3314 int temp=get_reg(i_regs->regmap,-1);
3315 int real_th=th;
3316 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3317 assert(sl>=0);
3318 assert(sh>=0);
3319 if(tl==ECX&&sl!=ECX) {
3320 if(shift!=ECX) emit_mov(shift,ECX);
3321 if(sl!=shift) emit_mov(sl,shift);
3322 if(th>=0 && sh!=th) emit_mov(sh,th);
3323 }
3324 else if(th==ECX&&sh!=ECX) {
3325 if(shift!=ECX) emit_mov(shift,ECX);
3326 if(sh!=shift) emit_mov(sh,shift);
3327 if(sl!=tl) emit_mov(sl,tl);
3328 }
3329 else
3330 {
3331 if(sl!=tl) emit_mov(sl,tl);
3332 if(th>=0 && sh!=th) emit_mov(sh,th);
3333 if(shift!=ECX) {
3334 if(i_regs->regmap[ECX]<0)
3335 emit_mov(shift,ECX);
3336 else
3337 emit_xchg(shift,ECX);
3338 }
3339 }
3340 if(opcode2[i]==0x14) // DSLLV
3341 {
3342 if(th>=0) emit_shldcl(th==ECX?shift:th,tl==ECX?shift:tl);
3343 emit_shlcl(tl==ECX?shift:tl);
3344 emit_testimm(ECX,32);
3345 if(th>=0) emit_cmovne_reg(tl==ECX?shift:tl,th==ECX?shift:th);
3346 emit_cmovne(&const_zero,tl==ECX?shift:tl);
3347 }
3348 if(opcode2[i]==0x16) // DSRLV
3349 {
3350 assert(th>=0);
3351 emit_shrdcl(tl==ECX?shift:tl,th==ECX?shift:th);
3352 emit_shrcl(th==ECX?shift:th);
3353 emit_testimm(ECX,32);
3354 emit_cmovne_reg(th==ECX?shift:th,tl==ECX?shift:tl);
3355 if(real_th>=0) emit_cmovne(&const_zero,th==ECX?shift:th);
3356 }
3357 if(opcode2[i]==0x17) // DSRAV
3358 {
3359 assert(th>=0);
3360 emit_shrdcl(tl==ECX?shift:tl,th==ECX?shift:th);
3361 if(real_th>=0) {
3362 assert(temp>=0);
3363 emit_mov(th==ECX?shift:th,temp==ECX?shift:temp);
3364 }
3365 emit_sarcl(th==ECX?shift:th);
3366 if(real_th>=0) emit_sarimm(temp==ECX?shift:temp,31,temp==ECX?shift:temp);
3367 emit_testimm(ECX,32);
3368 emit_cmovne_reg(th==ECX?shift:th,tl==ECX?shift:tl);
3369 if(real_th>=0) emit_cmovne_reg(temp==ECX?shift:temp,th==ECX?shift:th);
3370 }
3371 if(shift!=ECX&&(i_regs->regmap[ECX]>=0||temp==ECX)) emit_xchg(shift,ECX);
3372 }
3373 }
3374 }
3375 }
3376}
3377#define shift_assemble shift_assemble_x86
3378
3379void loadlr_assemble_x86(int i,struct regstat *i_regs)
3380{
3381 int s,th,tl,temp,temp2,addr,map=-1;
3382 int offset;
3383 int jaddr=0;
3384 int memtarget,c=0;
3385 u_int hr,reglist=0;
3386 th=get_reg(i_regs->regmap,rt1[i]|64);
3387 tl=get_reg(i_regs->regmap,rt1[i]);
3388 s=get_reg(i_regs->regmap,rs1[i]);
3389 temp=get_reg(i_regs->regmap,-1);
3390 temp2=get_reg(i_regs->regmap,FTEMP);
3391 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3392 assert(addr<0);
3393 offset=imm[i];
3394 for(hr=0;hr<HOST_REGS;hr++) {
3395 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3396 }
3397 reglist|=1<<temp;
3398 if(offset||s<0||c) addr=temp2;
3399 else addr=s;
3400 if(s>=0) {
3401 c=(i_regs->wasconst>>s)&1;
3402 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80800000;
3403 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3404 }
3405 if(tl>=0) {
3406 //assert(tl>=0);
3407 //assert(rt1[i]);
3408 if(!using_tlb) {
3409 if(!c) {
3410 emit_lea8(addr,temp);
3411 if (opcode[i]==0x22||opcode[i]==0x26) {
3412 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3413 }else{
3414 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3415 }
3416 emit_cmpimm(addr,0x800000);
3417 jaddr=(int)out;
3418 emit_jno(0);
3419 }
3420 else {
3421 if (opcode[i]==0x22||opcode[i]==0x26) {
3422 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3423 }else{
3424 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3425 }
3426 }
3427 }else{ // using tlb
3428 int a;
3429 if(c) {
3430 a=-1;
3431 }else if (opcode[i]==0x22||opcode[i]==0x26) {
3432 a=0xFFFFFFFC; // LWL/LWR
3433 }else{
3434 a=0xFFFFFFF8; // LDL/LDR
3435 }
3436 map=get_reg(i_regs->regmap,TLREG);
3437 assert(map>=0);
3438 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
3439 if(c) {
3440 if (opcode[i]==0x22||opcode[i]==0x26) {
3441 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3442 }else{
3443 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3444 }
3445 }
3446 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
3447 }
3448 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3449 if(!c||memtarget) {
3450 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
3451 emit_readword_indexed_tlb(0,temp2,map,temp2);
3452 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3453 }
3454 else
3455 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3456 emit_andimm(temp,24,temp);
3457 if (opcode[i]==0x26) emit_xorimm(temp,24,temp); // LWR
3458 if(temp==ECX)
3459 {
3460 int temp3=EDX;
3461 if(temp3==temp2) temp3++;
3462 emit_pushreg(temp3);
3463 emit_movimm(-1,temp3);
3464 if (opcode[i]==0x26) {
3465 emit_shrcl(temp3);
3466 emit_shrcl(temp2);
3467 }else{
3468 emit_shlcl(temp3);
3469 emit_shlcl(temp2);
3470 }
3471 emit_mov(temp3,ECX);
3472 emit_not(ECX,ECX);
3473 emit_popreg(temp3);
3474 }
3475 else
3476 {
3477 int temp3=EBP;
3478 if(temp3==temp) temp3++;
3479 if(temp3==temp2) temp3++;
3480 if(temp3==temp) temp3++;
3481 emit_xchg(ECX,temp);
3482 emit_pushreg(temp3);
3483 emit_movimm(-1,temp3);
3484 if (opcode[i]==0x26) {
3485 emit_shrcl(temp3);
3486 emit_shrcl(temp2==ECX?temp:temp2);
3487 }else{
3488 emit_shlcl(temp3);
3489 emit_shlcl(temp2==ECX?temp:temp2);
3490 }
3491 emit_not(temp3,temp3);
3492 emit_mov(temp,ECX);
3493 emit_mov(temp3,temp);
3494 emit_popreg(temp3);
3495 }
3496 emit_and(temp,tl,tl);
3497 emit_or(temp2,tl,tl);
3498 //emit_storereg(rt1[i],tl); // DEBUG
3499 }
3500 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3501 if(s>=0)
3502 if((i_regs->wasdirty>>s)&1)
3503 emit_storereg(rs1[i],s);
3504 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3505 if((i_regs->wasdirty>>get_reg(i_regs->regmap,rs1[i]|64))&1)
3506 emit_storereg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
3507 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3508 if(!c||memtarget) {
3509 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3510 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3511 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3512 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3513 }
3514 else
3515 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3516 emit_andimm(temp,56,temp);
3517 //output_byte(0xCC);
3518 //emit_pushreg(temp);
3519 //emit_pushreg(temp2h);
3520 //emit_pushreg(temp2);
3521 //emit_pushreg(th);
3522 //emit_pushreg(tl);
3523 emit_addimm64(ESP,-20,ESP);
3524 emit_writeword_indexed(temp,16,ESP);
3525 emit_writeword_indexed(temp2h,12,ESP);
3526 emit_writeword_indexed(temp2,8,ESP);
3527 emit_writeword_indexed(th,4,ESP);
3528 emit_writeword_indexed(tl,0,ESP);
3529 emit_mov(temp,EDX);
3530 emit_readdword_indexed(0,ESP,ARG1_REG);
3531 emit_readdword_indexed(8,ESP,ARG2_REG);
3532 if(opcode[i]==0x1A) emit_call((int)ldl_merge);
3533 if(opcode[i]==0x1B) emit_call((int)ldr_merge);
3534 emit_addimm64(ESP,20,ESP);
3535 if(th!=EAX) {
3536 emit_mov64(EAX,th);
3537 }
3538 emit_mov(EAX,tl);
3539 emit_shrimm64(th,32,th);
3540 if(s>=0) emit_loadreg(rs1[i],s);
3541 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3542 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
3543 }
3544 }
3545}
3546#define loadlr_assemble loadlr_assemble_x86
3547
3548void cop0_assemble(int i,struct regstat *i_regs)
3549{
3550 if(opcode2[i]==0) // MFC0
3551 {
3552 signed char t=get_reg(i_regs->regmap,rt1[i]);
3553 char copr=(source[i]>>11)&0x1f;
3554 //assert(t>=0); // Why does this happen? OOT is weird
3555 if(t>=0) {
3556 emit_writedword_imm32((int)&fake_pc,(int)&PC);
3557 emit_writebyte_imm((source[i]>>11)&0x1f,(int)&(fake_pc.f.r.nrd));
3558 if(copr==9) {
3559 emit_readword((int)&last_count,ECX);
3560 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3561 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3562 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3563 emit_writeword(HOST_CCREG,(int)&Count);
3564 }
3565 emit_call((int)MFC0);
3566 emit_readword((int)&readmem_dword,t);
3567 }
3568 }
3569 else if(opcode2[i]==4) // MTC0
3570 {
3571 signed char s=get_reg(i_regs->regmap,rs1[i]);
3572 char copr=(source[i]>>11)&0x1f;
3573 assert(s>=0);
3574 emit_writeword(s,(int)&readmem_dword);
3575 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->was32); // FIXME
3576 emit_writedword_imm32((int)&fake_pc,(int)&PC);
3577 emit_writebyte_imm((source[i]>>11)&0x1f,(int)&(fake_pc.f.r.nrd));
3578 if(copr==9||copr==11||copr==12) {
3579 emit_readword((int)&last_count,ECX);
3580 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3581 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3582 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3583 emit_writeword(HOST_CCREG,(int)&Count);
3584 }
3585 // What a mess. The status register (12) can enable interrupts,
3586 // so needs a special case to handle a pending interrupt.
3587 // The interrupt must be taken immediately, because a subsequent
3588 // instruction might disable interrupts again.
3589 if(copr==12&&!is_delayslot) {
3590 emit_writeword_imm(start+i*4+4,(int)&pcaddr);
3591 emit_writebyte_imm(0,(int)&pending_exception);
3592 }
3593 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3594 //else
3595 emit_call((int)MTC0);
3596 if(copr==9||copr==11||copr==12) {
3597 emit_readword((int)&Count,HOST_CCREG);
3598 emit_readword((int)&next_interupt,ECX);
3599 emit_addimm(HOST_CCREG,-CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3600 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
3601 emit_writeword(ECX,(int)&last_count);
3602 emit_storereg(CCREG,HOST_CCREG);
3603 }
3604 emit_loadreg(rs1[i],s);
3605 if(copr==12) {
3606 assert(!is_delayslot);
3607 //if(is_delayslot) output_byte(0xcc);
3608 emit_cmpmem_imm_byte((int)&pending_exception,0);
3609 emit_jne((int)&do_interrupt);
3610 }
3611 cop1_usable=0;
3612 }
3613 else
3614 {
3615 assert(opcode2[i]==0x10);
3616 if((source[i]&0x3f)==0x01) // TLBR
3617 emit_call((int)TLBR);
3618 if((source[i]&0x3f)==0x02) // TLBWI
3619 emit_call((int)TLBWI_new);
3620 if((source[i]&0x3f)==0x06) { // TLBWR
3621 // The TLB entry written by TLBWR is dependent on the count,
3622 // so update the cycle count
3623 emit_readword((int)&last_count,ECX);
3624 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3625 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3626 emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG);
3627 emit_writeword(HOST_CCREG,(int)&Count);
3628 emit_call((int)TLBWR_new);
3629 }
3630 if((source[i]&0x3f)==0x08) // TLBP
3631 emit_call((int)TLBP);
3632 if((source[i]&0x3f)==0x18) // ERET
3633 {
3634 int count=ccadj[i];
3635 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3636 emit_addimm_and_set_flags(CLOCK_DIVIDER*count,HOST_CCREG); // TODO: Should there be an extra cycle here?
3637 emit_jmp((int)jump_eret);
3638 }
3639 }