drc: ujump DS $ra overwrite fix?
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / new_dynarec.c
1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2  *   Mupen64plus - new_dynarec.c                                           *
3  *   Copyright (C) 2009-2010 Ari64                                         *
4  *                                                                         *
5  *   This program is free software; you can redistribute it and/or modify  *
6  *   it under the terms of the GNU General Public License as published by  *
7  *   the Free Software Foundation; either version 2 of the License, or     *
8  *   (at your option) any later version.                                   *
9  *                                                                         *
10  *   This program is distributed in the hope that it will be useful,       *
11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
13  *   GNU General Public License for more details.                          *
14  *                                                                         *
15  *   You should have received a copy of the GNU General Public License     *
16  *   along with this program; if not, write to the                         *
17  *   Free Software Foundation, Inc.,                                       *
18  *   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.          *
19  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21 #include <stdlib.h>
22 #include <stdint.h> //include for uint64_t
23 #include <assert.h>
24
25 #include "emu_if.h" //emulator interface
26
27 #include <sys/mman.h>
28
29 #ifdef __i386__
30 #include "assem_x86.h"
31 #endif
32 #ifdef __x86_64__
33 #include "assem_x64.h"
34 #endif
35 #ifdef __arm__
36 #include "assem_arm.h"
37 #endif
38
39 #define MAXBLOCK 4096
40 #define MAX_OUTPUT_BLOCK_SIZE 262144
41 #define CLOCK_DIVIDER 2
42
43 struct regstat
44 {
45   signed char regmap_entry[HOST_REGS];
46   signed char regmap[HOST_REGS];
47   uint64_t was32;
48   uint64_t is32;
49   uint64_t wasdirty;
50   uint64_t dirty;
51   uint64_t u;
52   uint64_t uu;
53   u_int wasconst;
54   u_int isconst;
55   uint64_t constmap[HOST_REGS];
56 };
57
58 struct ll_entry
59 {
60   u_int vaddr;
61   u_int reg32;
62   void *addr;
63   struct ll_entry *next;
64 };
65
66   u_int start;
67   u_int *source;
68   u_int pagelimit;
69   char insn[MAXBLOCK][10];
70   u_char itype[MAXBLOCK];
71   u_char opcode[MAXBLOCK];
72   u_char opcode2[MAXBLOCK];
73   u_char bt[MAXBLOCK];
74   u_char rs1[MAXBLOCK];
75   u_char rs2[MAXBLOCK];
76   u_char rt1[MAXBLOCK];
77   u_char rt2[MAXBLOCK];
78   u_char us1[MAXBLOCK];
79   u_char us2[MAXBLOCK];
80   u_char dep1[MAXBLOCK];
81   u_char dep2[MAXBLOCK];
82   u_char lt1[MAXBLOCK];
83   int imm[MAXBLOCK];
84   u_int ba[MAXBLOCK];
85   char likely[MAXBLOCK];
86   char is_ds[MAXBLOCK];
87   char ooo[MAXBLOCK];
88   uint64_t unneeded_reg[MAXBLOCK];
89   uint64_t unneeded_reg_upper[MAXBLOCK];
90   uint64_t branch_unneeded_reg[MAXBLOCK];
91   uint64_t branch_unneeded_reg_upper[MAXBLOCK];
92   uint64_t p32[MAXBLOCK];
93   uint64_t pr32[MAXBLOCK];
94   signed char regmap_pre[MAXBLOCK][HOST_REGS];
95   signed char regmap[MAXBLOCK][HOST_REGS];
96   signed char regmap_entry[MAXBLOCK][HOST_REGS];
97   uint64_t constmap[MAXBLOCK][HOST_REGS];
98   struct regstat regs[MAXBLOCK];
99   struct regstat branch_regs[MAXBLOCK];
100   signed char minimum_free_regs[MAXBLOCK];
101   u_int needed_reg[MAXBLOCK];
102   uint64_t requires_32bit[MAXBLOCK];
103   u_int wont_dirty[MAXBLOCK];
104   u_int will_dirty[MAXBLOCK];
105   int ccadj[MAXBLOCK];
106   int slen;
107   u_int instr_addr[MAXBLOCK];
108   u_int link_addr[MAXBLOCK][3];
109   int linkcount;
110   u_int stubs[MAXBLOCK*3][8];
111   int stubcount;
112   u_int literals[1024][2];
113   int literalcount;
114   int is_delayslot;
115   int cop1_usable;
116   u_char *out;
117   struct ll_entry *jump_in[4096];
118   struct ll_entry *jump_out[4096];
119   struct ll_entry *jump_dirty[4096];
120   u_int hash_table[65536][4]  __attribute__((aligned(16)));
121   char shadow[1048576]  __attribute__((aligned(16)));
122   void *copy;
123   int expirep;
124 #ifndef PCSX
125   u_int using_tlb;
126 #else
127   static const u_int using_tlb=0;
128 #endif
129   static u_int sp_in_mirror;
130   u_int stop_after_jal;
131   extern u_char restore_candidate[512];
132   extern int cycle_count;
133
134   /* registers that may be allocated */
135   /* 1-31 gpr */
136 #define HIREG 32 // hi
137 #define LOREG 33 // lo
138 #define FSREG 34 // FPU status (FCSR)
139 #define CSREG 35 // Coprocessor status
140 #define CCREG 36 // Cycle count
141 #define INVCP 37 // Pointer to invalid_code
142 #define MMREG 38 // Pointer to memory_map
143 #define ROREG 39 // ram offset (if rdram!=0x80000000)
144 #define TEMPREG 40
145 #define FTEMP 40 // FPU temporary register
146 #define PTEMP 41 // Prefetch temporary register
147 #define TLREG 42 // TLB mapping offset
148 #define RHASH 43 // Return address hash
149 #define RHTBL 44 // Return address hash table address
150 #define RTEMP 45 // JR/JALR address register
151 #define MAXREG 45
152 #define AGEN1 46 // Address generation temporary register
153 #define AGEN2 47 // Address generation temporary register
154 #define MGEN1 48 // Maptable address generation temporary register
155 #define MGEN2 49 // Maptable address generation temporary register
156 #define BTREG 50 // Branch target temporary register
157
158   /* instruction types */
159 #define NOP 0     // No operation
160 #define LOAD 1    // Load
161 #define STORE 2   // Store
162 #define LOADLR 3  // Unaligned load
163 #define STORELR 4 // Unaligned store
164 #define MOV 5     // Move 
165 #define ALU 6     // Arithmetic/logic
166 #define MULTDIV 7 // Multiply/divide
167 #define SHIFT 8   // Shift by register
168 #define SHIFTIMM 9// Shift by immediate
169 #define IMM16 10  // 16-bit immediate
170 #define RJUMP 11  // Unconditional jump to register
171 #define UJUMP 12  // Unconditional jump
172 #define CJUMP 13  // Conditional branch (BEQ/BNE/BGTZ/BLEZ)
173 #define SJUMP 14  // Conditional branch (regimm format)
174 #define COP0 15   // Coprocessor 0
175 #define COP1 16   // Coprocessor 1
176 #define C1LS 17   // Coprocessor 1 load/store
177 #define FJUMP 18  // Conditional branch (floating point)
178 #define FLOAT 19  // Floating point unit
179 #define FCONV 20  // Convert integer to float
180 #define FCOMP 21  // Floating point compare (sets FSREG)
181 #define SYSCALL 22// SYSCALL
182 #define OTHER 23  // Other
183 #define SPAN 24   // Branch/delay slot spans 2 pages
184 #define NI 25     // Not implemented
185 #define HLECALL 26// PCSX fake opcodes for HLE
186 #define COP2 27   // Coprocessor 2 move
187 #define C2LS 28   // Coprocessor 2 load/store
188 #define C2OP 29   // Coprocessor 2 operation
189 #define INTCALL 30// Call interpreter to handle rare corner cases
190
191   /* stubs */
192 #define CC_STUB 1
193 #define FP_STUB 2
194 #define LOADB_STUB 3
195 #define LOADH_STUB 4
196 #define LOADW_STUB 5
197 #define LOADD_STUB 6
198 #define LOADBU_STUB 7
199 #define LOADHU_STUB 8
200 #define STOREB_STUB 9
201 #define STOREH_STUB 10
202 #define STOREW_STUB 11
203 #define STORED_STUB 12
204 #define STORELR_STUB 13
205 #define INVCODE_STUB 14
206
207   /* branch codes */
208 #define TAKEN 1
209 #define NOTTAKEN 2
210 #define NULLDS 3
211
212 // asm linkage
213 int new_recompile_block(int addr);
214 void *get_addr_ht(u_int vaddr);
215 void invalidate_block(u_int block);
216 void invalidate_addr(u_int addr);
217 void remove_hash(int vaddr);
218 void jump_vaddr();
219 void dyna_linker();
220 void dyna_linker_ds();
221 void verify_code();
222 void verify_code_vm();
223 void verify_code_ds();
224 void cc_interrupt();
225 void fp_exception();
226 void fp_exception_ds();
227 void jump_syscall();
228 void jump_syscall_hle();
229 void jump_eret();
230 void jump_hlecall();
231 void jump_intcall();
232 void new_dyna_leave();
233
234 // TLB
235 void TLBWI_new();
236 void TLBWR_new();
237 void read_nomem_new();
238 void read_nomemb_new();
239 void read_nomemh_new();
240 void read_nomemd_new();
241 void write_nomem_new();
242 void write_nomemb_new();
243 void write_nomemh_new();
244 void write_nomemd_new();
245 void write_rdram_new();
246 void write_rdramb_new();
247 void write_rdramh_new();
248 void write_rdramd_new();
249 extern u_int memory_map[1048576];
250
251 // Needed by assembler
252 void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32);
253 void wb_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty);
254 void wb_needed_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr);
255 void load_all_regs(signed char i_regmap[]);
256 void load_needed_regs(signed char i_regmap[],signed char next_regmap[]);
257 void load_regs_entry(int t);
258 void load_all_consts(signed char regmap[],int is32,u_int dirty,int i);
259
260 int tracedebug=0;
261
262 //#define DEBUG_CYCLE_COUNT 1
263
264 void nullf() {}
265 //#define assem_debug printf
266 //#define inv_debug printf
267 #define assem_debug nullf
268 #define inv_debug nullf
269
270 static void tlb_hacks()
271 {
272 #ifndef DISABLE_TLB
273   // Goldeneye hack
274   if (strncmp((char *) ROM_HEADER->nom, "GOLDENEYE",9) == 0)
275   {
276     u_int addr;
277     int n;
278     switch (ROM_HEADER->Country_code&0xFF) 
279     {
280       case 0x45: // U
281         addr=0x34b30;
282         break;                   
283       case 0x4A: // J 
284         addr=0x34b70;    
285         break;    
286       case 0x50: // E 
287         addr=0x329f0;
288         break;                        
289       default: 
290         // Unknown country code
291         addr=0;
292         break;
293     }
294     u_int rom_addr=(u_int)rom;
295     #ifdef ROM_COPY
296     // Since memory_map is 32-bit, on 64-bit systems the rom needs to be
297     // in the lower 4G of memory to use this hack.  Copy it if necessary.
298     if((void *)rom>(void *)0xffffffff) {
299       munmap(ROM_COPY, 67108864);
300       if(mmap(ROM_COPY, 12582912,
301               PROT_READ | PROT_WRITE,
302               MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
303               -1, 0) <= 0) {printf("mmap() failed\n");}
304       memcpy(ROM_COPY,rom,12582912);
305       rom_addr=(u_int)ROM_COPY;
306     }
307     #endif
308     if(addr) {
309       for(n=0x7F000;n<0x80000;n++) {
310         memory_map[n]=(((u_int)(rom_addr+addr-0x7F000000))>>2)|0x40000000;
311       }
312     }
313   }
314 #endif
315 }
316
317 static u_int get_page(u_int vaddr)
318 {
319 #ifndef PCSX
320   u_int page=(vaddr^0x80000000)>>12;
321 #else
322   u_int page=vaddr&~0xe0000000;
323   if (page < 0x1000000)
324     page &= ~0x0e00000; // RAM mirrors
325   page>>=12;
326 #endif
327 #ifndef DISABLE_TLB
328   if(page>262143&&tlb_LUT_r[vaddr>>12]) page=(tlb_LUT_r[vaddr>>12]^0x80000000)>>12;
329 #endif
330   if(page>2048) page=2048+(page&2047);
331   return page;
332 }
333
334 static u_int get_vpage(u_int vaddr)
335 {
336   u_int vpage=(vaddr^0x80000000)>>12;
337 #ifndef DISABLE_TLB
338   if(vpage>262143&&tlb_LUT_r[vaddr>>12]) vpage&=2047; // jump_dirty uses a hash of the virtual address instead
339 #endif
340   if(vpage>2048) vpage=2048+(vpage&2047);
341   return vpage;
342 }
343
344 // Get address from virtual address
345 // This is called from the recompiled JR/JALR instructions
346 void *get_addr(u_int vaddr)
347 {
348   u_int page=get_page(vaddr);
349   u_int vpage=get_vpage(vaddr);
350   struct ll_entry *head;
351   //printf("TRACE: count=%d next=%d (get_addr %x,page %d)\n",Count,next_interupt,vaddr,page);
352   head=jump_in[page];
353   while(head!=NULL) {
354     if(head->vaddr==vaddr&&head->reg32==0) {
355   //printf("TRACE: count=%d next=%d (get_addr match %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr);
356       int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
357       ht_bin[3]=ht_bin[1];
358       ht_bin[2]=ht_bin[0];
359       ht_bin[1]=(int)head->addr;
360       ht_bin[0]=vaddr;
361       return head->addr;
362     }
363     head=head->next;
364   }
365   head=jump_dirty[vpage];
366   while(head!=NULL) {
367     if(head->vaddr==vaddr&&head->reg32==0) {
368       //printf("TRACE: count=%d next=%d (get_addr match dirty %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr);
369       // Don't restore blocks which are about to expire from the cache
370       if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
371       if(verify_dirty(head->addr)) {
372         //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]);
373         invalid_code[vaddr>>12]=0;
374         memory_map[vaddr>>12]|=0x40000000;
375         if(vpage<2048) {
376 #ifndef DISABLE_TLB
377           if(tlb_LUT_r[vaddr>>12]) {
378             invalid_code[tlb_LUT_r[vaddr>>12]>>12]=0;
379             memory_map[tlb_LUT_r[vaddr>>12]>>12]|=0x40000000;
380           }
381 #endif
382           restore_candidate[vpage>>3]|=1<<(vpage&7);
383         }
384         else restore_candidate[page>>3]|=1<<(page&7);
385         int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
386         if(ht_bin[0]==vaddr) {
387           ht_bin[1]=(int)head->addr; // Replace existing entry
388         }
389         else
390         {
391           ht_bin[3]=ht_bin[1];
392           ht_bin[2]=ht_bin[0];
393           ht_bin[1]=(int)head->addr;
394           ht_bin[0]=vaddr;
395         }
396         return head->addr;
397       }
398     }
399     head=head->next;
400   }
401   //printf("TRACE: count=%d next=%d (get_addr no-match %x)\n",Count,next_interupt,vaddr);
402   int r=new_recompile_block(vaddr);
403   if(r==0) return get_addr(vaddr);
404   // Execute in unmapped page, generate pagefault execption
405   Status|=2;
406   Cause=(vaddr<<31)|0x8;
407   EPC=(vaddr&1)?vaddr-5:vaddr;
408   BadVAddr=(vaddr&~1);
409   Context=(Context&0xFF80000F)|((BadVAddr>>9)&0x007FFFF0);
410   EntryHi=BadVAddr&0xFFFFE000;
411   return get_addr_ht(0x80000000);
412 }
413 // Look up address in hash table first
414 void *get_addr_ht(u_int vaddr)
415 {
416   //printf("TRACE: count=%d next=%d (get_addr_ht %x)\n",Count,next_interupt,vaddr);
417   int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
418   if(ht_bin[0]==vaddr) return (void *)ht_bin[1];
419   if(ht_bin[2]==vaddr) return (void *)ht_bin[3];
420   return get_addr(vaddr);
421 }
422
423 void *get_addr_32(u_int vaddr,u_int flags)
424 {
425 #ifdef FORCE32
426   return get_addr(vaddr);
427 #else
428   //printf("TRACE: count=%d next=%d (get_addr_32 %x,flags %x)\n",Count,next_interupt,vaddr,flags);
429   int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
430   if(ht_bin[0]==vaddr) return (void *)ht_bin[1];
431   if(ht_bin[2]==vaddr) return (void *)ht_bin[3];
432   u_int page=get_page(vaddr);
433   u_int vpage=get_vpage(vaddr);
434   struct ll_entry *head;
435   head=jump_in[page];
436   while(head!=NULL) {
437     if(head->vaddr==vaddr&&(head->reg32&flags)==0) {
438       //printf("TRACE: count=%d next=%d (get_addr_32 match %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr);
439       if(head->reg32==0) {
440         int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
441         if(ht_bin[0]==-1) {
442           ht_bin[1]=(int)head->addr;
443           ht_bin[0]=vaddr;
444         }else if(ht_bin[2]==-1) {
445           ht_bin[3]=(int)head->addr;
446           ht_bin[2]=vaddr;
447         }
448         //ht_bin[3]=ht_bin[1];
449         //ht_bin[2]=ht_bin[0];
450         //ht_bin[1]=(int)head->addr;
451         //ht_bin[0]=vaddr;
452       }
453       return head->addr;
454     }
455     head=head->next;
456   }
457   head=jump_dirty[vpage];
458   while(head!=NULL) {
459     if(head->vaddr==vaddr&&(head->reg32&flags)==0) {
460       //printf("TRACE: count=%d next=%d (get_addr_32 match dirty %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr);
461       // Don't restore blocks which are about to expire from the cache
462       if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
463       if(verify_dirty(head->addr)) {
464         //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]);
465         invalid_code[vaddr>>12]=0;
466         memory_map[vaddr>>12]|=0x40000000;
467         if(vpage<2048) {
468 #ifndef DISABLE_TLB
469           if(tlb_LUT_r[vaddr>>12]) {
470             invalid_code[tlb_LUT_r[vaddr>>12]>>12]=0;
471             memory_map[tlb_LUT_r[vaddr>>12]>>12]|=0x40000000;
472           }
473 #endif
474           restore_candidate[vpage>>3]|=1<<(vpage&7);
475         }
476         else restore_candidate[page>>3]|=1<<(page&7);
477         if(head->reg32==0) {
478           int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
479           if(ht_bin[0]==-1) {
480             ht_bin[1]=(int)head->addr;
481             ht_bin[0]=vaddr;
482           }else if(ht_bin[2]==-1) {
483             ht_bin[3]=(int)head->addr;
484             ht_bin[2]=vaddr;
485           }
486           //ht_bin[3]=ht_bin[1];
487           //ht_bin[2]=ht_bin[0];
488           //ht_bin[1]=(int)head->addr;
489           //ht_bin[0]=vaddr;
490         }
491         return head->addr;
492       }
493     }
494     head=head->next;
495   }
496   //printf("TRACE: count=%d next=%d (get_addr_32 no-match %x,flags %x)\n",Count,next_interupt,vaddr,flags);
497   int r=new_recompile_block(vaddr);
498   if(r==0) return get_addr(vaddr);
499   // Execute in unmapped page, generate pagefault execption
500   Status|=2;
501   Cause=(vaddr<<31)|0x8;
502   EPC=(vaddr&1)?vaddr-5:vaddr;
503   BadVAddr=(vaddr&~1);
504   Context=(Context&0xFF80000F)|((BadVAddr>>9)&0x007FFFF0);
505   EntryHi=BadVAddr&0xFFFFE000;
506   return get_addr_ht(0x80000000);
507 #endif
508 }
509
510 void clear_all_regs(signed char regmap[])
511 {
512   int hr;
513   for (hr=0;hr<HOST_REGS;hr++) regmap[hr]=-1;
514 }
515
516 signed char get_reg(signed char regmap[],int r)
517 {
518   int hr;
519   for (hr=0;hr<HOST_REGS;hr++) if(hr!=EXCLUDE_REG&&regmap[hr]==r) return hr;
520   return -1;
521 }
522
523 // Find a register that is available for two consecutive cycles
524 signed char get_reg2(signed char regmap1[],signed char regmap2[],int r)
525 {
526   int hr;
527   for (hr=0;hr<HOST_REGS;hr++) if(hr!=EXCLUDE_REG&&regmap1[hr]==r&&regmap2[hr]==r) return hr;
528   return -1;
529 }
530
531 int count_free_regs(signed char regmap[])
532 {
533   int count=0;
534   int hr;
535   for(hr=0;hr<HOST_REGS;hr++)
536   {
537     if(hr!=EXCLUDE_REG) {
538       if(regmap[hr]<0) count++;
539     }
540   }
541   return count;
542 }
543
544 void dirty_reg(struct regstat *cur,signed char reg)
545 {
546   int hr;
547   if(!reg) return;
548   for (hr=0;hr<HOST_REGS;hr++) {
549     if((cur->regmap[hr]&63)==reg) {
550       cur->dirty|=1<<hr;
551     }
552   }
553 }
554
555 // If we dirty the lower half of a 64 bit register which is now being
556 // sign-extended, we need to dump the upper half.
557 // Note: Do this only after completion of the instruction, because
558 // some instructions may need to read the full 64-bit value even if
559 // overwriting it (eg SLTI, DSRA32).
560 static void flush_dirty_uppers(struct regstat *cur)
561 {
562   int hr,reg;
563   for (hr=0;hr<HOST_REGS;hr++) {
564     if((cur->dirty>>hr)&1) {
565       reg=cur->regmap[hr];
566       if(reg>=64) 
567         if((cur->is32>>(reg&63))&1) cur->regmap[hr]=-1;
568     }
569   }
570 }
571
572 void set_const(struct regstat *cur,signed char reg,uint64_t value)
573 {
574   int hr;
575   if(!reg) return;
576   for (hr=0;hr<HOST_REGS;hr++) {
577     if(cur->regmap[hr]==reg) {
578       cur->isconst|=1<<hr;
579       cur->constmap[hr]=value;
580     }
581     else if((cur->regmap[hr]^64)==reg) {
582       cur->isconst|=1<<hr;
583       cur->constmap[hr]=value>>32;
584     }
585   }
586 }
587
588 void clear_const(struct regstat *cur,signed char reg)
589 {
590   int hr;
591   if(!reg) return;
592   for (hr=0;hr<HOST_REGS;hr++) {
593     if((cur->regmap[hr]&63)==reg) {
594       cur->isconst&=~(1<<hr);
595     }
596   }
597 }
598
599 int is_const(struct regstat *cur,signed char reg)
600 {
601   int hr;
602   if(!reg) return 1;
603   for (hr=0;hr<HOST_REGS;hr++) {
604     if((cur->regmap[hr]&63)==reg) {
605       return (cur->isconst>>hr)&1;
606     }
607   }
608   return 0;
609 }
610 uint64_t get_const(struct regstat *cur,signed char reg)
611 {
612   int hr;
613   if(!reg) return 0;
614   for (hr=0;hr<HOST_REGS;hr++) {
615     if(cur->regmap[hr]==reg) {
616       return cur->constmap[hr];
617     }
618   }
619   printf("Unknown constant in r%d\n",reg);
620   exit(1);
621 }
622
623 // Least soon needed registers
624 // Look at the next ten instructions and see which registers
625 // will be used.  Try not to reallocate these.
626 void lsn(u_char hsn[], int i, int *preferred_reg)
627 {
628   int j;
629   int b=-1;
630   for(j=0;j<9;j++)
631   {
632     if(i+j>=slen) {
633       j=slen-i-1;
634       break;
635     }
636     if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
637     {
638       // Don't go past an unconditonal jump
639       j++;
640       break;
641     }
642   }
643   for(;j>=0;j--)
644   {
645     if(rs1[i+j]) hsn[rs1[i+j]]=j;
646     if(rs2[i+j]) hsn[rs2[i+j]]=j;
647     if(rt1[i+j]) hsn[rt1[i+j]]=j;
648     if(rt2[i+j]) hsn[rt2[i+j]]=j;
649     if(itype[i+j]==STORE || itype[i+j]==STORELR) {
650       // Stores can allocate zero
651       hsn[rs1[i+j]]=j;
652       hsn[rs2[i+j]]=j;
653     }
654     // On some architectures stores need invc_ptr
655     #if defined(HOST_IMM8)
656     if(itype[i+j]==STORE || itype[i+j]==STORELR || (opcode[i+j]&0x3b)==0x39 || (opcode[i+j]&0x3b)==0x3a) {
657       hsn[INVCP]=j;
658     }
659     #endif
660     if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP||itype[i+j]==FJUMP))
661     {
662       hsn[CCREG]=j;
663       b=j;
664     }
665   }
666   if(b>=0)
667   {
668     if(ba[i+b]>=start && ba[i+b]<(start+slen*4))
669     {
670       // Follow first branch
671       int t=(ba[i+b]-start)>>2;
672       j=7-b;if(t+j>=slen) j=slen-t-1;
673       for(;j>=0;j--)
674       {
675         if(rs1[t+j]) if(hsn[rs1[t+j]]>j+b+2) hsn[rs1[t+j]]=j+b+2;
676         if(rs2[t+j]) if(hsn[rs2[t+j]]>j+b+2) hsn[rs2[t+j]]=j+b+2;
677         //if(rt1[t+j]) if(hsn[rt1[t+j]]>j+b+2) hsn[rt1[t+j]]=j+b+2;
678         //if(rt2[t+j]) if(hsn[rt2[t+j]]>j+b+2) hsn[rt2[t+j]]=j+b+2;
679       }
680     }
681     // TODO: preferred register based on backward branch
682   }
683   // Delay slot should preferably not overwrite branch conditions or cycle count
684   if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP)) {
685     if(rs1[i-1]) if(hsn[rs1[i-1]]>1) hsn[rs1[i-1]]=1;
686     if(rs2[i-1]) if(hsn[rs2[i-1]]>1) hsn[rs2[i-1]]=1;
687     hsn[CCREG]=1;
688     // ...or hash tables
689     hsn[RHASH]=1;
690     hsn[RHTBL]=1;
691   }
692   // Coprocessor load/store needs FTEMP, even if not declared
693   if(itype[i]==C1LS||itype[i]==C2LS) {
694     hsn[FTEMP]=0;
695   }
696   // Load L/R also uses FTEMP as a temporary register
697   if(itype[i]==LOADLR) {
698     hsn[FTEMP]=0;
699   }
700   // Also SWL/SWR/SDL/SDR
701   if(opcode[i]==0x2a||opcode[i]==0x2e||opcode[i]==0x2c||opcode[i]==0x2d) {
702     hsn[FTEMP]=0;
703   }
704   // Don't remove the TLB registers either
705   if(itype[i]==LOAD || itype[i]==LOADLR || itype[i]==STORE || itype[i]==STORELR || itype[i]==C1LS || itype[i]==C2LS) {
706     hsn[TLREG]=0;
707   }
708   // Don't remove the miniht registers
709   if(itype[i]==UJUMP||itype[i]==RJUMP)
710   {
711     hsn[RHASH]=0;
712     hsn[RHTBL]=0;
713   }
714 }
715
716 // We only want to allocate registers if we're going to use them again soon
717 int needed_again(int r, int i)
718 {
719   int j;
720   int b=-1;
721   int rn=10;
722   int hr;
723   u_char hsn[MAXREG+1];
724   int preferred_reg;
725   
726   memset(hsn,10,sizeof(hsn));
727   lsn(hsn,i,&preferred_reg);
728   
729   if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000))
730   {
731     if(ba[i-1]<start || ba[i-1]>start+slen*4-4)
732       return 0; // Don't need any registers if exiting the block
733   }
734   for(j=0;j<9;j++)
735   {
736     if(i+j>=slen) {
737       j=slen-i-1;
738       break;
739     }
740     if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
741     {
742       // Don't go past an unconditonal jump
743       j++;
744       break;
745     }
746     if(itype[i+j]==SYSCALL||itype[i+j]==HLECALL||itype[i+j]==INTCALL||((source[i+j]&0xfc00003f)==0x0d))
747     {
748       break;
749     }
750   }
751   for(;j>=1;j--)
752   {
753     if(rs1[i+j]==r) rn=j;
754     if(rs2[i+j]==r) rn=j;
755     if((unneeded_reg[i+j]>>r)&1) rn=10;
756     if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP||itype[i+j]==FJUMP))
757     {
758       b=j;
759     }
760   }
761   /*
762   if(b>=0)
763   {
764     if(ba[i+b]>=start && ba[i+b]<(start+slen*4))
765     {
766       // Follow first branch
767       int o=rn;
768       int t=(ba[i+b]-start)>>2;
769       j=7-b;if(t+j>=slen) j=slen-t-1;
770       for(;j>=0;j--)
771       {
772         if(!((unneeded_reg[t+j]>>r)&1)) {
773           if(rs1[t+j]==r) if(rn>j+b+2) rn=j+b+2;
774           if(rs2[t+j]==r) if(rn>j+b+2) rn=j+b+2;
775         }
776         else rn=o;
777       }
778     }
779   }*/
780   for(hr=0;hr<HOST_REGS;hr++) {
781     if(hr!=EXCLUDE_REG) {
782       if(rn<hsn[hr]) return 1;
783     }
784   }
785   return 0;
786 }
787
788 // Try to match register allocations at the end of a loop with those
789 // at the beginning
790 int loop_reg(int i, int r, int hr)
791 {
792   int j,k;
793   for(j=0;j<9;j++)
794   {
795     if(i+j>=slen) {
796       j=slen-i-1;
797       break;
798     }
799     if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
800     {
801       // Don't go past an unconditonal jump
802       j++;
803       break;
804     }
805   }
806   k=0;
807   if(i>0){
808     if(itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP)
809       k--;
810   }
811   for(;k<j;k++)
812   {
813     if(r<64&&((unneeded_reg[i+k]>>r)&1)) return hr;
814     if(r>64&&((unneeded_reg_upper[i+k]>>r)&1)) return hr;
815     if(i+k>=0&&(itype[i+k]==UJUMP||itype[i+k]==CJUMP||itype[i+k]==SJUMP||itype[i+k]==FJUMP))
816     {
817       if(ba[i+k]>=start && ba[i+k]<(start+i*4))
818       {
819         int t=(ba[i+k]-start)>>2;
820         int reg=get_reg(regs[t].regmap_entry,r);
821         if(reg>=0) return reg;
822         //reg=get_reg(regs[t+1].regmap_entry,r);
823         //if(reg>=0) return reg;
824       }
825     }
826   }
827   return hr;
828 }
829
830
831 // Allocate every register, preserving source/target regs
832 void alloc_all(struct regstat *cur,int i)
833 {
834   int hr;
835   
836   for(hr=0;hr<HOST_REGS;hr++) {
837     if(hr!=EXCLUDE_REG) {
838       if(((cur->regmap[hr]&63)!=rs1[i])&&((cur->regmap[hr]&63)!=rs2[i])&&
839          ((cur->regmap[hr]&63)!=rt1[i])&&((cur->regmap[hr]&63)!=rt2[i]))
840       {
841         cur->regmap[hr]=-1;
842         cur->dirty&=~(1<<hr);
843       }
844       // Don't need zeros
845       if((cur->regmap[hr]&63)==0)
846       {
847         cur->regmap[hr]=-1;
848         cur->dirty&=~(1<<hr);
849       }
850     }
851   }
852 }
853
854
855 void div64(int64_t dividend,int64_t divisor)
856 {
857   lo=dividend/divisor;
858   hi=dividend%divisor;
859   //printf("TRACE: ddiv %8x%8x %8x%8x\n" ,(int)reg[HIREG],(int)(reg[HIREG]>>32)
860   //                                     ,(int)reg[LOREG],(int)(reg[LOREG]>>32));
861 }
862 void divu64(uint64_t dividend,uint64_t divisor)
863 {
864   lo=dividend/divisor;
865   hi=dividend%divisor;
866   //printf("TRACE: ddivu %8x%8x %8x%8x\n",(int)reg[HIREG],(int)(reg[HIREG]>>32)
867   //                                     ,(int)reg[LOREG],(int)(reg[LOREG]>>32));
868 }
869
870 void mult64(uint64_t m1,uint64_t m2)
871 {
872    unsigned long long int op1, op2, op3, op4;
873    unsigned long long int result1, result2, result3, result4;
874    unsigned long long int temp1, temp2, temp3, temp4;
875    int sign = 0;
876    
877    if (m1 < 0)
878      {
879     op2 = -m1;
880     sign = 1 - sign;
881      }
882    else op2 = m1;
883    if (m2 < 0)
884      {
885     op4 = -m2;
886     sign = 1 - sign;
887      }
888    else op4 = m2;
889    
890    op1 = op2 & 0xFFFFFFFF;
891    op2 = (op2 >> 32) & 0xFFFFFFFF;
892    op3 = op4 & 0xFFFFFFFF;
893    op4 = (op4 >> 32) & 0xFFFFFFFF;
894    
895    temp1 = op1 * op3;
896    temp2 = (temp1 >> 32) + op1 * op4;
897    temp3 = op2 * op3;
898    temp4 = (temp3 >> 32) + op2 * op4;
899    
900    result1 = temp1 & 0xFFFFFFFF;
901    result2 = temp2 + (temp3 & 0xFFFFFFFF);
902    result3 = (result2 >> 32) + temp4;
903    result4 = (result3 >> 32);
904    
905    lo = result1 | (result2 << 32);
906    hi = (result3 & 0xFFFFFFFF) | (result4 << 32);
907    if (sign)
908      {
909     hi = ~hi;
910     if (!lo) hi++;
911     else lo = ~lo + 1;
912      }
913 }
914
915 void multu64(uint64_t m1,uint64_t m2)
916 {
917    unsigned long long int op1, op2, op3, op4;
918    unsigned long long int result1, result2, result3, result4;
919    unsigned long long int temp1, temp2, temp3, temp4;
920    
921    op1 = m1 & 0xFFFFFFFF;
922    op2 = (m1 >> 32) & 0xFFFFFFFF;
923    op3 = m2 & 0xFFFFFFFF;
924    op4 = (m2 >> 32) & 0xFFFFFFFF;
925    
926    temp1 = op1 * op3;
927    temp2 = (temp1 >> 32) + op1 * op4;
928    temp3 = op2 * op3;
929    temp4 = (temp3 >> 32) + op2 * op4;
930    
931    result1 = temp1 & 0xFFFFFFFF;
932    result2 = temp2 + (temp3 & 0xFFFFFFFF);
933    result3 = (result2 >> 32) + temp4;
934    result4 = (result3 >> 32);
935    
936    lo = result1 | (result2 << 32);
937    hi = (result3 & 0xFFFFFFFF) | (result4 << 32);
938    
939   //printf("TRACE: dmultu %8x%8x %8x%8x\n",(int)reg[HIREG],(int)(reg[HIREG]>>32)
940   //                                      ,(int)reg[LOREG],(int)(reg[LOREG]>>32));
941 }
942
943 uint64_t ldl_merge(uint64_t original,uint64_t loaded,u_int bits)
944 {
945   if(bits) {
946     original<<=64-bits;
947     original>>=64-bits;
948     loaded<<=bits;
949     original|=loaded;
950   }
951   else original=loaded;
952   return original;
953 }
954 uint64_t ldr_merge(uint64_t original,uint64_t loaded,u_int bits)
955 {
956   if(bits^56) {
957     original>>=64-(bits^56);
958     original<<=64-(bits^56);
959     loaded>>=bits^56;
960     original|=loaded;
961   }
962   else original=loaded;
963   return original;
964 }
965
966 #ifdef __i386__
967 #include "assem_x86.c"
968 #endif
969 #ifdef __x86_64__
970 #include "assem_x64.c"
971 #endif
972 #ifdef __arm__
973 #include "assem_arm.c"
974 #endif
975
976 // Add virtual address mapping to linked list
977 void ll_add(struct ll_entry **head,int vaddr,void *addr)
978 {
979   struct ll_entry *new_entry;
980   new_entry=malloc(sizeof(struct ll_entry));
981   assert(new_entry!=NULL);
982   new_entry->vaddr=vaddr;
983   new_entry->reg32=0;
984   new_entry->addr=addr;
985   new_entry->next=*head;
986   *head=new_entry;
987 }
988
989 // Add virtual address mapping for 32-bit compiled block
990 void ll_add_32(struct ll_entry **head,int vaddr,u_int reg32,void *addr)
991 {
992   ll_add(head,vaddr,addr);
993 #ifndef FORCE32
994   (*head)->reg32=reg32;
995 #endif
996 }
997
998 // Check if an address is already compiled
999 // but don't return addresses which are about to expire from the cache
1000 void *check_addr(u_int vaddr)
1001 {
1002   u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
1003   if(ht_bin[0]==vaddr) {
1004     if(((ht_bin[1]-MAX_OUTPUT_BLOCK_SIZE-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
1005       if(isclean(ht_bin[1])) return (void *)ht_bin[1];
1006   }
1007   if(ht_bin[2]==vaddr) {
1008     if(((ht_bin[3]-MAX_OUTPUT_BLOCK_SIZE-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
1009       if(isclean(ht_bin[3])) return (void *)ht_bin[3];
1010   }
1011   u_int page=get_page(vaddr);
1012   struct ll_entry *head;
1013   head=jump_in[page];
1014   while(head!=NULL) {
1015     if(head->vaddr==vaddr&&head->reg32==0) {
1016       if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) {
1017         // Update existing entry with current address
1018         if(ht_bin[0]==vaddr) {
1019           ht_bin[1]=(int)head->addr;
1020           return head->addr;
1021         }
1022         if(ht_bin[2]==vaddr) {
1023           ht_bin[3]=(int)head->addr;
1024           return head->addr;
1025         }
1026         // Insert into hash table with low priority.
1027         // Don't evict existing entries, as they are probably
1028         // addresses that are being accessed frequently.
1029         if(ht_bin[0]==-1) {
1030           ht_bin[1]=(int)head->addr;
1031           ht_bin[0]=vaddr;
1032         }else if(ht_bin[2]==-1) {
1033           ht_bin[3]=(int)head->addr;
1034           ht_bin[2]=vaddr;
1035         }
1036         return head->addr;
1037       }
1038     }
1039     head=head->next;
1040   }
1041   return 0;
1042 }
1043
1044 void remove_hash(int vaddr)
1045 {
1046   //printf("remove hash: %x\n",vaddr);
1047   int *ht_bin=hash_table[(((vaddr)>>16)^vaddr)&0xFFFF];
1048   if(ht_bin[2]==vaddr) {
1049     ht_bin[2]=ht_bin[3]=-1;
1050   }
1051   if(ht_bin[0]==vaddr) {
1052     ht_bin[0]=ht_bin[2];
1053     ht_bin[1]=ht_bin[3];
1054     ht_bin[2]=ht_bin[3]=-1;
1055   }
1056 }
1057
1058 void ll_remove_matching_addrs(struct ll_entry **head,int addr,int shift)
1059 {
1060   struct ll_entry *next;
1061   while(*head) {
1062     if(((u_int)((*head)->addr)>>shift)==(addr>>shift) || 
1063        ((u_int)((*head)->addr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift))
1064     {
1065       inv_debug("EXP: Remove pointer to %x (%x)\n",(int)(*head)->addr,(*head)->vaddr);
1066       remove_hash((*head)->vaddr);
1067       next=(*head)->next;
1068       free(*head);
1069       *head=next;
1070     }
1071     else
1072     {
1073       head=&((*head)->next);
1074     }
1075   }
1076 }
1077
1078 // Remove all entries from linked list
1079 void ll_clear(struct ll_entry **head)
1080 {
1081   struct ll_entry *cur;
1082   struct ll_entry *next;
1083   if(cur=*head) {
1084     *head=0;
1085     while(cur) {
1086       next=cur->next;
1087       free(cur);
1088       cur=next;
1089     }
1090   }
1091 }
1092
1093 // Dereference the pointers and remove if it matches
1094 void ll_kill_pointers(struct ll_entry *head,int addr,int shift)
1095 {
1096   while(head) {
1097     int ptr=get_pointer(head->addr);
1098     inv_debug("EXP: Lookup pointer to %x at %x (%x)\n",(int)ptr,(int)head->addr,head->vaddr);
1099     if(((ptr>>shift)==(addr>>shift)) ||
1100        (((ptr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift)))
1101     {
1102       inv_debug("EXP: Kill pointer at %x (%x)\n",(int)head->addr,head->vaddr);
1103       u_int host_addr=(u_int)kill_pointer(head->addr);
1104       #ifdef __arm__
1105         needs_clear_cache[(host_addr-(u_int)BASE_ADDR)>>17]|=1<<(((host_addr-(u_int)BASE_ADDR)>>12)&31);
1106       #endif
1107     }
1108     head=head->next;
1109   }
1110 }
1111
1112 // This is called when we write to a compiled block (see do_invstub)
1113 void invalidate_page(u_int page)
1114 {
1115   struct ll_entry *head;
1116   struct ll_entry *next;
1117   head=jump_in[page];
1118   jump_in[page]=0;
1119   while(head!=NULL) {
1120     inv_debug("INVALIDATE: %x\n",head->vaddr);
1121     remove_hash(head->vaddr);
1122     next=head->next;
1123     free(head);
1124     head=next;
1125   }
1126   head=jump_out[page];
1127   jump_out[page]=0;
1128   while(head!=NULL) {
1129     inv_debug("INVALIDATE: kill pointer to %x (%x)\n",head->vaddr,(int)head->addr);
1130     u_int host_addr=(u_int)kill_pointer(head->addr);
1131     #ifdef __arm__
1132       needs_clear_cache[(host_addr-(u_int)BASE_ADDR)>>17]|=1<<(((host_addr-(u_int)BASE_ADDR)>>12)&31);
1133     #endif
1134     next=head->next;
1135     free(head);
1136     head=next;
1137   }
1138 }
1139 void invalidate_block(u_int block)
1140 {
1141   u_int page=get_page(block<<12);
1142   u_int vpage=get_vpage(block<<12);
1143   inv_debug("INVALIDATE: %x (%d)\n",block<<12,page);
1144   //inv_debug("invalid_code[block]=%d\n",invalid_code[block]);
1145   u_int first,last;
1146   first=last=page;
1147   struct ll_entry *head;
1148   head=jump_dirty[vpage];
1149   //printf("page=%d vpage=%d\n",page,vpage);
1150   while(head!=NULL) {
1151     u_int start,end;
1152     if(vpage>2047||(head->vaddr>>12)==block) { // Ignore vaddr hash collision
1153       get_bounds((int)head->addr,&start,&end);
1154       //printf("start: %x end: %x\n",start,end);
1155       if(page<2048&&start>=0x80000000&&end<0x80000000+RAM_SIZE) {
1156         if(((start-(u_int)rdram)>>12)<=page&&((end-1-(u_int)rdram)>>12)>=page) {
1157           if((((start-(u_int)rdram)>>12)&2047)<first) first=((start-(u_int)rdram)>>12)&2047;
1158           if((((end-1-(u_int)rdram)>>12)&2047)>last) last=((end-1-(u_int)rdram)>>12)&2047;
1159         }
1160       }
1161 #ifndef DISABLE_TLB
1162       if(page<2048&&(signed int)start>=(signed int)0xC0000000&&(signed int)end>=(signed int)0xC0000000) {
1163         if(((start+memory_map[start>>12]-(u_int)rdram)>>12)<=page&&((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)>=page) {
1164           if((((start+memory_map[start>>12]-(u_int)rdram)>>12)&2047)<first) first=((start+memory_map[start>>12]-(u_int)rdram)>>12)&2047;
1165           if((((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047)>last) last=((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047;
1166         }
1167       }
1168 #endif
1169     }
1170     head=head->next;
1171   }
1172   //printf("first=%d last=%d\n",first,last);
1173   invalidate_page(page);
1174   assert(first+5>page); // NB: this assumes MAXBLOCK<=4096 (4 pages)
1175   assert(last<page+5);
1176   // Invalidate the adjacent pages if a block crosses a 4K boundary
1177   while(first<page) {
1178     invalidate_page(first);
1179     first++;
1180   }
1181   for(first=page+1;first<last;first++) {
1182     invalidate_page(first);
1183   }
1184   #ifdef __arm__
1185     do_clear_cache();
1186   #endif
1187   
1188   // Don't trap writes
1189   invalid_code[block]=1;
1190 #ifdef PCSX
1191   invalid_code[((u_int)0x80000000>>12)|page]=1;
1192 #endif
1193 #ifndef DISABLE_TLB
1194   // If there is a valid TLB entry for this page, remove write protect
1195   if(tlb_LUT_w[block]) {
1196     assert(tlb_LUT_r[block]==tlb_LUT_w[block]);
1197     // CHECK: Is this right?
1198     memory_map[block]=((tlb_LUT_w[block]&0xFFFFF000)-(block<<12)+(unsigned int)rdram-0x80000000)>>2;
1199     u_int real_block=tlb_LUT_w[block]>>12;
1200     invalid_code[real_block]=1;
1201     if(real_block>=0x80000&&real_block<0x80800) memory_map[real_block]=((u_int)rdram-0x80000000)>>2;
1202   }
1203   else if(block>=0x80000&&block<0x80800) memory_map[block]=((u_int)rdram-0x80000000)>>2;
1204 #endif
1205
1206   #ifdef USE_MINI_HT
1207   memset(mini_ht,-1,sizeof(mini_ht));
1208   #endif
1209 }
1210 void invalidate_addr(u_int addr)
1211 {
1212   invalidate_block(addr>>12);
1213 }
1214 // This is called when loading a save state.
1215 // Anything could have changed, so invalidate everything.
1216 void invalidate_all_pages()
1217 {
1218   u_int page,n;
1219   for(page=0;page<4096;page++)
1220     invalidate_page(page);
1221   for(page=0;page<1048576;page++)
1222     if(!invalid_code[page]) {
1223       restore_candidate[(page&2047)>>3]|=1<<(page&7);
1224       restore_candidate[((page&2047)>>3)+256]|=1<<(page&7);
1225     }
1226   #ifdef __arm__
1227   __clear_cache((void *)BASE_ADDR,(void *)BASE_ADDR+(1<<TARGET_SIZE_2));
1228   #endif
1229   #ifdef USE_MINI_HT
1230   memset(mini_ht,-1,sizeof(mini_ht));
1231   #endif
1232   #ifndef DISABLE_TLB
1233   // TLB
1234   for(page=0;page<0x100000;page++) {
1235     if(tlb_LUT_r[page]) {
1236       memory_map[page]=((tlb_LUT_r[page]&0xFFFFF000)-(page<<12)+(unsigned int)rdram-0x80000000)>>2;
1237       if(!tlb_LUT_w[page]||!invalid_code[page])
1238         memory_map[page]|=0x40000000; // Write protect
1239     }
1240     else memory_map[page]=-1;
1241     if(page==0x80000) page=0xC0000;
1242   }
1243   tlb_hacks();
1244   #endif
1245 }
1246
1247 // Add an entry to jump_out after making a link
1248 void add_link(u_int vaddr,void *src)
1249 {
1250   u_int page=get_page(vaddr);
1251   inv_debug("add_link: %x -> %x (%d)\n",(int)src,vaddr,page);
1252   ll_add(jump_out+page,vaddr,src);
1253   //int ptr=get_pointer(src);
1254   //inv_debug("add_link: Pointer is to %x\n",(int)ptr);
1255 }
1256
1257 // If a code block was found to be unmodified (bit was set in
1258 // restore_candidate) and it remains unmodified (bit is clear
1259 // in invalid_code) then move the entries for that 4K page from
1260 // the dirty list to the clean list.
1261 void clean_blocks(u_int page)
1262 {
1263   struct ll_entry *head;
1264   inv_debug("INV: clean_blocks page=%d\n",page);
1265   head=jump_dirty[page];
1266   while(head!=NULL) {
1267     if(!invalid_code[head->vaddr>>12]) {
1268       // Don't restore blocks which are about to expire from the cache
1269       if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) {
1270         u_int start,end;
1271         if(verify_dirty((int)head->addr)) {
1272           //printf("Possibly Restore %x (%x)\n",head->vaddr, (int)head->addr);
1273           u_int i;
1274           u_int inv=0;
1275           get_bounds((int)head->addr,&start,&end);
1276           if(start-(u_int)rdram<RAM_SIZE) {
1277             for(i=(start-(u_int)rdram+0x80000000)>>12;i<=(end-1-(u_int)rdram+0x80000000)>>12;i++) {
1278               inv|=invalid_code[i];
1279             }
1280           }
1281           if((signed int)head->vaddr>=(signed int)0xC0000000) {
1282             u_int addr = (head->vaddr+(memory_map[head->vaddr>>12]<<2));
1283             //printf("addr=%x start=%x end=%x\n",addr,start,end);
1284             if(addr<start||addr>=end) inv=1;
1285           }
1286           else if((signed int)head->vaddr>=(signed int)0x80000000+RAM_SIZE) {
1287             inv=1;
1288           }
1289           if(!inv) {
1290             void * clean_addr=(void *)get_clean_addr((int)head->addr);
1291             if((((u_int)clean_addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) {
1292               u_int ppage=page;
1293 #ifndef DISABLE_TLB
1294               if(page<2048&&tlb_LUT_r[head->vaddr>>12]) ppage=(tlb_LUT_r[head->vaddr>>12]^0x80000000)>>12;
1295 #endif
1296               inv_debug("INV: Restored %x (%x/%x)\n",head->vaddr, (int)head->addr, (int)clean_addr);
1297               //printf("page=%x, addr=%x\n",page,head->vaddr);
1298               //assert(head->vaddr>>12==(page|0x80000));
1299               ll_add_32(jump_in+ppage,head->vaddr,head->reg32,clean_addr);
1300               int *ht_bin=hash_table[((head->vaddr>>16)^head->vaddr)&0xFFFF];
1301               if(!head->reg32) {
1302                 if(ht_bin[0]==head->vaddr) {
1303                   ht_bin[1]=(int)clean_addr; // Replace existing entry
1304                 }
1305                 if(ht_bin[2]==head->vaddr) {
1306                   ht_bin[3]=(int)clean_addr; // Replace existing entry
1307                 }
1308               }
1309             }
1310           }
1311         }
1312       }
1313     }
1314     head=head->next;
1315   }
1316 }
1317
1318
1319 void mov_alloc(struct regstat *current,int i)
1320 {
1321   // Note: Don't need to actually alloc the source registers
1322   if((~current->is32>>rs1[i])&1) {
1323     //alloc_reg64(current,i,rs1[i]);
1324     alloc_reg64(current,i,rt1[i]);
1325     current->is32&=~(1LL<<rt1[i]);
1326   } else {
1327     //alloc_reg(current,i,rs1[i]);
1328     alloc_reg(current,i,rt1[i]);
1329     current->is32|=(1LL<<rt1[i]);
1330   }
1331   clear_const(current,rs1[i]);
1332   clear_const(current,rt1[i]);
1333   dirty_reg(current,rt1[i]);
1334 }
1335
1336 void shiftimm_alloc(struct regstat *current,int i)
1337 {
1338   clear_const(current,rs1[i]);
1339   clear_const(current,rt1[i]);
1340   if(opcode2[i]<=0x3) // SLL/SRL/SRA
1341   {
1342     if(rt1[i]) {
1343       if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1344       else lt1[i]=rs1[i];
1345       alloc_reg(current,i,rt1[i]);
1346       current->is32|=1LL<<rt1[i];
1347       dirty_reg(current,rt1[i]);
1348     }
1349   }
1350   if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA
1351   {
1352     if(rt1[i]) {
1353       if(rs1[i]) alloc_reg64(current,i,rs1[i]);
1354       alloc_reg64(current,i,rt1[i]);
1355       current->is32&=~(1LL<<rt1[i]);
1356       dirty_reg(current,rt1[i]);
1357     }
1358   }
1359   if(opcode2[i]==0x3c) // DSLL32
1360   {
1361     if(rt1[i]) {
1362       if(rs1[i]) alloc_reg(current,i,rs1[i]);
1363       alloc_reg64(current,i,rt1[i]);
1364       current->is32&=~(1LL<<rt1[i]);
1365       dirty_reg(current,rt1[i]);
1366     }
1367   }
1368   if(opcode2[i]==0x3e) // DSRL32
1369   {
1370     if(rt1[i]) {
1371       alloc_reg64(current,i,rs1[i]);
1372       if(imm[i]==32) {
1373         alloc_reg64(current,i,rt1[i]);
1374         current->is32&=~(1LL<<rt1[i]);
1375       } else {
1376         alloc_reg(current,i,rt1[i]);
1377         current->is32|=1LL<<rt1[i];
1378       }
1379       dirty_reg(current,rt1[i]);
1380     }
1381   }
1382   if(opcode2[i]==0x3f) // DSRA32
1383   {
1384     if(rt1[i]) {
1385       alloc_reg64(current,i,rs1[i]);
1386       alloc_reg(current,i,rt1[i]);
1387       current->is32|=1LL<<rt1[i];
1388       dirty_reg(current,rt1[i]);
1389     }
1390   }
1391 }
1392
1393 void shift_alloc(struct regstat *current,int i)
1394 {
1395   if(rt1[i]) {
1396     if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
1397     {
1398       if(rs1[i]) alloc_reg(current,i,rs1[i]);
1399       if(rs2[i]) alloc_reg(current,i,rs2[i]);
1400       alloc_reg(current,i,rt1[i]);
1401       if(rt1[i]==rs2[i]) {
1402         alloc_reg_temp(current,i,-1);
1403         minimum_free_regs[i]=1;
1404       }
1405       current->is32|=1LL<<rt1[i];
1406     } else { // DSLLV/DSRLV/DSRAV
1407       if(rs1[i]) alloc_reg64(current,i,rs1[i]);
1408       if(rs2[i]) alloc_reg(current,i,rs2[i]);
1409       alloc_reg64(current,i,rt1[i]);
1410       current->is32&=~(1LL<<rt1[i]);
1411       if(opcode2[i]==0x16||opcode2[i]==0x17) // DSRLV and DSRAV need a temporary register
1412       {
1413         alloc_reg_temp(current,i,-1);
1414         minimum_free_regs[i]=1;
1415       }
1416     }
1417     clear_const(current,rs1[i]);
1418     clear_const(current,rs2[i]);
1419     clear_const(current,rt1[i]);
1420     dirty_reg(current,rt1[i]);
1421   }
1422 }
1423
1424 void alu_alloc(struct regstat *current,int i)
1425 {
1426   if(opcode2[i]>=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU
1427     if(rt1[i]) {
1428       if(rs1[i]&&rs2[i]) {
1429         alloc_reg(current,i,rs1[i]);
1430         alloc_reg(current,i,rs2[i]);
1431       }
1432       else {
1433         if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1434         if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg(current,i,rs2[i]);
1435       }
1436       alloc_reg(current,i,rt1[i]);
1437     }
1438     current->is32|=1LL<<rt1[i];
1439   }
1440   if(opcode2[i]==0x2a||opcode2[i]==0x2b) { // SLT/SLTU
1441     if(rt1[i]) {
1442       if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1))
1443       {
1444         alloc_reg64(current,i,rs1[i]);
1445         alloc_reg64(current,i,rs2[i]);
1446         alloc_reg(current,i,rt1[i]);
1447       } else {
1448         alloc_reg(current,i,rs1[i]);
1449         alloc_reg(current,i,rs2[i]);
1450         alloc_reg(current,i,rt1[i]);
1451       }
1452     }
1453     current->is32|=1LL<<rt1[i];
1454   }
1455   if(opcode2[i]>=0x24&&opcode2[i]<=0x27) { // AND/OR/XOR/NOR
1456     if(rt1[i]) {
1457       if(rs1[i]&&rs2[i]) {
1458         alloc_reg(current,i,rs1[i]);
1459         alloc_reg(current,i,rs2[i]);
1460       }
1461       else
1462       {
1463         if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1464         if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg(current,i,rs2[i]);
1465       }
1466       alloc_reg(current,i,rt1[i]);
1467       if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1))
1468       {
1469         if(!((current->uu>>rt1[i])&1)) {
1470           alloc_reg64(current,i,rt1[i]);
1471         }
1472         if(get_reg(current->regmap,rt1[i]|64)>=0) {
1473           if(rs1[i]&&rs2[i]) {
1474             alloc_reg64(current,i,rs1[i]);
1475             alloc_reg64(current,i,rs2[i]);
1476           }
1477           else
1478           {
1479             // Is is really worth it to keep 64-bit values in registers?
1480             #ifdef NATIVE_64BIT
1481             if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg64(current,i,rs1[i]);
1482             if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg64(current,i,rs2[i]);
1483             #endif
1484           }
1485         }
1486         current->is32&=~(1LL<<rt1[i]);
1487       } else {
1488         current->is32|=1LL<<rt1[i];
1489       }
1490     }
1491   }
1492   if(opcode2[i]>=0x2c&&opcode2[i]<=0x2f) { // DADD/DADDU/DSUB/DSUBU
1493     if(rt1[i]) {
1494       if(rs1[i]&&rs2[i]) {
1495         if(!((current->uu>>rt1[i])&1)||get_reg(current->regmap,rt1[i]|64)>=0) {
1496           alloc_reg64(current,i,rs1[i]);
1497           alloc_reg64(current,i,rs2[i]);
1498           alloc_reg64(current,i,rt1[i]);
1499         } else {
1500           alloc_reg(current,i,rs1[i]);
1501           alloc_reg(current,i,rs2[i]);
1502           alloc_reg(current,i,rt1[i]);
1503         }
1504       }
1505       else {
1506         alloc_reg(current,i,rt1[i]);
1507         if(!((current->uu>>rt1[i])&1)||get_reg(current->regmap,rt1[i]|64)>=0) {
1508           // DADD used as move, or zeroing
1509           // If we have a 64-bit source, then make the target 64 bits too
1510           if(rs1[i]&&!((current->is32>>rs1[i])&1)) {
1511             if(get_reg(current->regmap,rs1[i])>=0) alloc_reg64(current,i,rs1[i]);
1512             alloc_reg64(current,i,rt1[i]);
1513           } else if(rs2[i]&&!((current->is32>>rs2[i])&1)) {
1514             if(get_reg(current->regmap,rs2[i])>=0) alloc_reg64(current,i,rs2[i]);
1515             alloc_reg64(current,i,rt1[i]);
1516           }
1517           if(opcode2[i]>=0x2e&&rs2[i]) {
1518             // DSUB used as negation - 64-bit result
1519             // If we have a 32-bit register, extend it to 64 bits
1520             if(get_reg(current->regmap,rs2[i])>=0) alloc_reg64(current,i,rs2[i]);
1521             alloc_reg64(current,i,rt1[i]);
1522           }
1523         }
1524       }
1525       if(rs1[i]&&rs2[i]) {
1526         current->is32&=~(1LL<<rt1[i]);
1527       } else if(rs1[i]) {
1528         current->is32&=~(1LL<<rt1[i]);
1529         if((current->is32>>rs1[i])&1)
1530           current->is32|=1LL<<rt1[i];
1531       } else if(rs2[i]) {
1532         current->is32&=~(1LL<<rt1[i]);
1533         if((current->is32>>rs2[i])&1)
1534           current->is32|=1LL<<rt1[i];
1535       } else {
1536         current->is32|=1LL<<rt1[i];
1537       }
1538     }
1539   }
1540   clear_const(current,rs1[i]);
1541   clear_const(current,rs2[i]);
1542   clear_const(current,rt1[i]);
1543   dirty_reg(current,rt1[i]);
1544 }
1545
1546 void imm16_alloc(struct regstat *current,int i)
1547 {
1548   if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1549   else lt1[i]=rs1[i];
1550   if(rt1[i]) alloc_reg(current,i,rt1[i]);
1551   if(opcode[i]==0x18||opcode[i]==0x19) { // DADDI/DADDIU
1552     current->is32&=~(1LL<<rt1[i]);
1553     if(!((current->uu>>rt1[i])&1)||get_reg(current->regmap,rt1[i]|64)>=0) {
1554       // TODO: Could preserve the 32-bit flag if the immediate is zero
1555       alloc_reg64(current,i,rt1[i]);
1556       alloc_reg64(current,i,rs1[i]);
1557     }
1558     clear_const(current,rs1[i]);
1559     clear_const(current,rt1[i]);
1560   }
1561   else if(opcode[i]==0x0a||opcode[i]==0x0b) { // SLTI/SLTIU
1562     if((~current->is32>>rs1[i])&1) alloc_reg64(current,i,rs1[i]);
1563     current->is32|=1LL<<rt1[i];
1564     clear_const(current,rs1[i]);
1565     clear_const(current,rt1[i]);
1566   }
1567   else if(opcode[i]>=0x0c&&opcode[i]<=0x0e) { // ANDI/ORI/XORI
1568     if(((~current->is32>>rs1[i])&1)&&opcode[i]>0x0c) {
1569       if(rs1[i]!=rt1[i]) {
1570         if(needed_again(rs1[i],i)) alloc_reg64(current,i,rs1[i]);
1571         alloc_reg64(current,i,rt1[i]);
1572         current->is32&=~(1LL<<rt1[i]);
1573       }
1574     }
1575     else current->is32|=1LL<<rt1[i]; // ANDI clears upper bits
1576     if(is_const(current,rs1[i])) {
1577       int v=get_const(current,rs1[i]);
1578       if(opcode[i]==0x0c) set_const(current,rt1[i],v&imm[i]);
1579       if(opcode[i]==0x0d) set_const(current,rt1[i],v|imm[i]);
1580       if(opcode[i]==0x0e) set_const(current,rt1[i],v^imm[i]);
1581     }
1582     else clear_const(current,rt1[i]);
1583   }
1584   else if(opcode[i]==0x08||opcode[i]==0x09) { // ADDI/ADDIU
1585     if(is_const(current,rs1[i])) {
1586       int v=get_const(current,rs1[i]);
1587       set_const(current,rt1[i],v+imm[i]);
1588     }
1589     else clear_const(current,rt1[i]);
1590     current->is32|=1LL<<rt1[i];
1591   }
1592   else {
1593     set_const(current,rt1[i],((long long)((short)imm[i]))<<16); // LUI
1594     current->is32|=1LL<<rt1[i];
1595   }
1596   dirty_reg(current,rt1[i]);
1597 }
1598
1599 void load_alloc(struct regstat *current,int i)
1600 {
1601   clear_const(current,rt1[i]);
1602   //if(rs1[i]!=rt1[i]&&needed_again(rs1[i],i)) clear_const(current,rs1[i]); // Does this help or hurt?
1603   if(!rs1[i]) current->u&=~1LL; // Allow allocating r0 if it's the source register
1604   if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1605   if(rt1[i]) {
1606     alloc_reg(current,i,rt1[i]);
1607     if(get_reg(current->regmap,rt1[i])<0) {
1608       // dummy load, but we still need a register to calculate the address
1609       alloc_reg_temp(current,i,-1);
1610       minimum_free_regs[i]=1;
1611     }
1612     if(opcode[i]==0x27||opcode[i]==0x37) // LWU/LD
1613     {
1614       current->is32&=~(1LL<<rt1[i]);
1615       alloc_reg64(current,i,rt1[i]);
1616     }
1617     else if(opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR
1618     {
1619       current->is32&=~(1LL<<rt1[i]);
1620       alloc_reg64(current,i,rt1[i]);
1621       alloc_all(current,i);
1622       alloc_reg64(current,i,FTEMP);
1623       minimum_free_regs[i]=HOST_REGS;
1624     }
1625     else current->is32|=1LL<<rt1[i];
1626     dirty_reg(current,rt1[i]);
1627     // If using TLB, need a register for pointer to the mapping table
1628     if(using_tlb) alloc_reg(current,i,TLREG);
1629     // LWL/LWR need a temporary register for the old value
1630     if(opcode[i]==0x22||opcode[i]==0x26)
1631     {
1632       alloc_reg(current,i,FTEMP);
1633       alloc_reg_temp(current,i,-1);
1634       minimum_free_regs[i]=1;
1635     }
1636   }
1637   else
1638   {
1639     // Load to r0 (dummy load)
1640     // but we still need a register to calculate the address
1641     if(opcode[i]==0x22||opcode[i]==0x26)
1642     {
1643       alloc_reg(current,i,FTEMP); // LWL/LWR need another temporary
1644     }
1645     alloc_reg_temp(current,i,-1);
1646     minimum_free_regs[i]=1;
1647     if(opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR
1648     {
1649       alloc_all(current,i);
1650       alloc_reg64(current,i,FTEMP);
1651       minimum_free_regs[i]=HOST_REGS;
1652     }
1653   }
1654 }
1655
1656 void store_alloc(struct regstat *current,int i)
1657 {
1658   clear_const(current,rs2[i]);
1659   if(!(rs2[i])) current->u&=~1LL; // Allow allocating r0 if necessary
1660   if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1661   alloc_reg(current,i,rs2[i]);
1662   if(opcode[i]==0x2c||opcode[i]==0x2d||opcode[i]==0x3f) { // 64-bit SDL/SDR/SD
1663     alloc_reg64(current,i,rs2[i]);
1664     if(rs2[i]) alloc_reg(current,i,FTEMP);
1665   }
1666   // If using TLB, need a register for pointer to the mapping table
1667   if(using_tlb) alloc_reg(current,i,TLREG);
1668   #if defined(HOST_IMM8)
1669   // On CPUs without 32-bit immediates we need a pointer to invalid_code
1670   else alloc_reg(current,i,INVCP);
1671   #endif
1672   if(opcode[i]==0x2a||opcode[i]==0x2e||opcode[i]==0x2c||opcode[i]==0x2d) { // SWL/SWL/SDL/SDR
1673     alloc_reg(current,i,FTEMP);
1674   }
1675   // We need a temporary register for address generation
1676   alloc_reg_temp(current,i,-1);
1677   minimum_free_regs[i]=1;
1678 }
1679
1680 void c1ls_alloc(struct regstat *current,int i)
1681 {
1682   //clear_const(current,rs1[i]); // FIXME
1683   clear_const(current,rt1[i]);
1684   if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1685   alloc_reg(current,i,CSREG); // Status
1686   alloc_reg(current,i,FTEMP);
1687   if(opcode[i]==0x35||opcode[i]==0x3d) { // 64-bit LDC1/SDC1
1688     alloc_reg64(current,i,FTEMP);
1689   }
1690   // If using TLB, need a register for pointer to the mapping table
1691   if(using_tlb) alloc_reg(current,i,TLREG);
1692   #if defined(HOST_IMM8)
1693   // On CPUs without 32-bit immediates we need a pointer to invalid_code
1694   else if((opcode[i]&0x3b)==0x39) // SWC1/SDC1
1695     alloc_reg(current,i,INVCP);
1696   #endif
1697   // We need a temporary register for address generation
1698   alloc_reg_temp(current,i,-1);
1699 }
1700
1701 void c2ls_alloc(struct regstat *current,int i)
1702 {
1703   clear_const(current,rt1[i]);
1704   if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1705   alloc_reg(current,i,FTEMP);
1706   // If using TLB, need a register for pointer to the mapping table
1707   if(using_tlb) alloc_reg(current,i,TLREG);
1708   #if defined(HOST_IMM8)
1709   // On CPUs without 32-bit immediates we need a pointer to invalid_code
1710   else if((opcode[i]&0x3b)==0x3a) // SWC2/SDC2
1711     alloc_reg(current,i,INVCP);
1712   #endif
1713   // We need a temporary register for address generation
1714   alloc_reg_temp(current,i,-1);
1715   minimum_free_regs[i]=1;
1716 }
1717
1718 #ifndef multdiv_alloc
1719 void multdiv_alloc(struct regstat *current,int i)
1720 {
1721   //  case 0x18: MULT
1722   //  case 0x19: MULTU
1723   //  case 0x1A: DIV
1724   //  case 0x1B: DIVU
1725   //  case 0x1C: DMULT
1726   //  case 0x1D: DMULTU
1727   //  case 0x1E: DDIV
1728   //  case 0x1F: DDIVU
1729   clear_const(current,rs1[i]);
1730   clear_const(current,rs2[i]);
1731   if(rs1[i]&&rs2[i])
1732   {
1733     if((opcode2[i]&4)==0) // 32-bit
1734     {
1735       current->u&=~(1LL<<HIREG);
1736       current->u&=~(1LL<<LOREG);
1737       alloc_reg(current,i,HIREG);
1738       alloc_reg(current,i,LOREG);
1739       alloc_reg(current,i,rs1[i]);
1740       alloc_reg(current,i,rs2[i]);
1741       current->is32|=1LL<<HIREG;
1742       current->is32|=1LL<<LOREG;
1743       dirty_reg(current,HIREG);
1744       dirty_reg(current,LOREG);
1745     }
1746     else // 64-bit
1747     {
1748       current->u&=~(1LL<<HIREG);
1749       current->u&=~(1LL<<LOREG);
1750       current->uu&=~(1LL<<HIREG);
1751       current->uu&=~(1LL<<LOREG);
1752       alloc_reg64(current,i,HIREG);
1753       //if(HOST_REGS>10) alloc_reg64(current,i,LOREG);
1754       alloc_reg64(current,i,rs1[i]);
1755       alloc_reg64(current,i,rs2[i]);
1756       alloc_all(current,i);
1757       current->is32&=~(1LL<<HIREG);
1758       current->is32&=~(1LL<<LOREG);
1759       dirty_reg(current,HIREG);
1760       dirty_reg(current,LOREG);
1761       minimum_free_regs[i]=HOST_REGS;
1762     }
1763   }
1764   else
1765   {
1766     // Multiply by zero is zero.
1767     // MIPS does not have a divide by zero exception.
1768     // The result is undefined, we return zero.
1769     alloc_reg(current,i,HIREG);
1770     alloc_reg(current,i,LOREG);
1771     current->is32|=1LL<<HIREG;
1772     current->is32|=1LL<<LOREG;
1773     dirty_reg(current,HIREG);
1774     dirty_reg(current,LOREG);
1775   }
1776 }
1777 #endif
1778
1779 void cop0_alloc(struct regstat *current,int i)
1780 {
1781   if(opcode2[i]==0) // MFC0
1782   {
1783     if(rt1[i]) {
1784       clear_const(current,rt1[i]);
1785       alloc_all(current,i);
1786       alloc_reg(current,i,rt1[i]);
1787       current->is32|=1LL<<rt1[i];
1788       dirty_reg(current,rt1[i]);
1789     }
1790   }
1791   else if(opcode2[i]==4) // MTC0
1792   {
1793     if(rs1[i]){
1794       clear_const(current,rs1[i]);
1795       alloc_reg(current,i,rs1[i]);
1796       alloc_all(current,i);
1797     }
1798     else {
1799       alloc_all(current,i); // FIXME: Keep r0
1800       current->u&=~1LL;
1801       alloc_reg(current,i,0);
1802     }
1803   }
1804   else
1805   {
1806     // TLBR/TLBWI/TLBWR/TLBP/ERET
1807     assert(opcode2[i]==0x10);
1808     alloc_all(current,i);
1809   }
1810   minimum_free_regs[i]=HOST_REGS;
1811 }
1812
1813 void cop1_alloc(struct regstat *current,int i)
1814 {
1815   alloc_reg(current,i,CSREG); // Load status
1816   if(opcode2[i]<3) // MFC1/DMFC1/CFC1
1817   {
1818     if(rt1[i]){
1819       clear_const(current,rt1[i]);
1820       if(opcode2[i]==1) {
1821         alloc_reg64(current,i,rt1[i]); // DMFC1
1822         current->is32&=~(1LL<<rt1[i]);
1823       }else{
1824         alloc_reg(current,i,rt1[i]); // MFC1/CFC1
1825         current->is32|=1LL<<rt1[i];
1826       }
1827       dirty_reg(current,rt1[i]);
1828     }
1829     alloc_reg_temp(current,i,-1);
1830   }
1831   else if(opcode2[i]>3) // MTC1/DMTC1/CTC1
1832   {
1833     if(rs1[i]){
1834       clear_const(current,rs1[i]);
1835       if(opcode2[i]==5)
1836         alloc_reg64(current,i,rs1[i]); // DMTC1
1837       else
1838         alloc_reg(current,i,rs1[i]); // MTC1/CTC1
1839       alloc_reg_temp(current,i,-1);
1840     }
1841     else {
1842       current->u&=~1LL;
1843       alloc_reg(current,i,0);
1844       alloc_reg_temp(current,i,-1);
1845     }
1846   }
1847   minimum_free_regs[i]=1;
1848 }
1849 void fconv_alloc(struct regstat *current,int i)
1850 {
1851   alloc_reg(current,i,CSREG); // Load status
1852   alloc_reg_temp(current,i,-1);
1853   minimum_free_regs[i]=1;
1854 }
1855 void float_alloc(struct regstat *current,int i)
1856 {
1857   alloc_reg(current,i,CSREG); // Load status
1858   alloc_reg_temp(current,i,-1);
1859   minimum_free_regs[i]=1;
1860 }
1861 void c2op_alloc(struct regstat *current,int i)
1862 {
1863   alloc_reg_temp(current,i,-1);
1864 }
1865 void fcomp_alloc(struct regstat *current,int i)
1866 {
1867   alloc_reg(current,i,CSREG); // Load status
1868   alloc_reg(current,i,FSREG); // Load flags
1869   dirty_reg(current,FSREG); // Flag will be modified
1870   alloc_reg_temp(current,i,-1);
1871   minimum_free_regs[i]=1;
1872 }
1873
1874 void syscall_alloc(struct regstat *current,int i)
1875 {
1876   alloc_cc(current,i);
1877   dirty_reg(current,CCREG);
1878   alloc_all(current,i);
1879   minimum_free_regs[i]=HOST_REGS;
1880   current->isconst=0;
1881 }
1882
1883 void delayslot_alloc(struct regstat *current,int i)
1884 {
1885   switch(itype[i]) {
1886     case UJUMP:
1887     case CJUMP:
1888     case SJUMP:
1889     case RJUMP:
1890     case FJUMP:
1891     case SYSCALL:
1892     case HLECALL:
1893     case SPAN:
1894       assem_debug("jump in the delay slot.  this shouldn't happen.\n");//exit(1);
1895       printf("Disabled speculative precompilation\n");
1896       stop_after_jal=1;
1897       break;
1898     case IMM16:
1899       imm16_alloc(current,i);
1900       break;
1901     case LOAD:
1902     case LOADLR:
1903       load_alloc(current,i);
1904       break;
1905     case STORE:
1906     case STORELR:
1907       store_alloc(current,i);
1908       break;
1909     case ALU:
1910       alu_alloc(current,i);
1911       break;
1912     case SHIFT:
1913       shift_alloc(current,i);
1914       break;
1915     case MULTDIV:
1916       multdiv_alloc(current,i);
1917       break;
1918     case SHIFTIMM:
1919       shiftimm_alloc(current,i);
1920       break;
1921     case MOV:
1922       mov_alloc(current,i);
1923       break;
1924     case COP0:
1925       cop0_alloc(current,i);
1926       break;
1927     case COP1:
1928     case COP2:
1929       cop1_alloc(current,i);
1930       break;
1931     case C1LS:
1932       c1ls_alloc(current,i);
1933       break;
1934     case C2LS:
1935       c2ls_alloc(current,i);
1936       break;
1937     case FCONV:
1938       fconv_alloc(current,i);
1939       break;
1940     case FLOAT:
1941       float_alloc(current,i);
1942       break;
1943     case FCOMP:
1944       fcomp_alloc(current,i);
1945       break;
1946     case C2OP:
1947       c2op_alloc(current,i);
1948       break;
1949   }
1950 }
1951
1952 // Special case where a branch and delay slot span two pages in virtual memory
1953 static void pagespan_alloc(struct regstat *current,int i)
1954 {
1955   current->isconst=0;
1956   current->wasconst=0;
1957   regs[i].wasconst=0;
1958   minimum_free_regs[i]=HOST_REGS;
1959   alloc_all(current,i);
1960   alloc_cc(current,i);
1961   dirty_reg(current,CCREG);
1962   if(opcode[i]==3) // JAL
1963   {
1964     alloc_reg(current,i,31);
1965     dirty_reg(current,31);
1966   }
1967   if(opcode[i]==0&&(opcode2[i]&0x3E)==8) // JR/JALR
1968   {
1969     alloc_reg(current,i,rs1[i]);
1970     if (rt1[i]!=0) {
1971       alloc_reg(current,i,rt1[i]);
1972       dirty_reg(current,rt1[i]);
1973     }
1974   }
1975   if((opcode[i]&0x2E)==4) // BEQ/BNE/BEQL/BNEL
1976   {
1977     if(rs1[i]) alloc_reg(current,i,rs1[i]);
1978     if(rs2[i]) alloc_reg(current,i,rs2[i]);
1979     if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1))
1980     {
1981       if(rs1[i]) alloc_reg64(current,i,rs1[i]);
1982       if(rs2[i]) alloc_reg64(current,i,rs2[i]);
1983     }
1984   }
1985   else
1986   if((opcode[i]&0x2E)==6) // BLEZ/BGTZ/BLEZL/BGTZL
1987   {
1988     if(rs1[i]) alloc_reg(current,i,rs1[i]);
1989     if(!((current->is32>>rs1[i])&1))
1990     {
1991       if(rs1[i]) alloc_reg64(current,i,rs1[i]);
1992     }
1993   }
1994   else
1995   if(opcode[i]==0x11) // BC1
1996   {
1997     alloc_reg(current,i,FSREG);
1998     alloc_reg(current,i,CSREG);
1999   }
2000   //else ...
2001 }
2002
2003 add_stub(int type,int addr,int retaddr,int a,int b,int c,int d,int e)
2004 {
2005   stubs[stubcount][0]=type;
2006   stubs[stubcount][1]=addr;
2007   stubs[stubcount][2]=retaddr;
2008   stubs[stubcount][3]=a;
2009   stubs[stubcount][4]=b;
2010   stubs[stubcount][5]=c;
2011   stubs[stubcount][6]=d;
2012   stubs[stubcount][7]=e;
2013   stubcount++;
2014 }
2015
2016 // Write out a single register
2017 void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32)
2018 {
2019   int hr;
2020   for(hr=0;hr<HOST_REGS;hr++) {
2021     if(hr!=EXCLUDE_REG) {
2022       if((regmap[hr]&63)==r) {
2023         if((dirty>>hr)&1) {
2024           if(regmap[hr]<64) {
2025             emit_storereg(r,hr);
2026 #ifndef FORCE32
2027             if((is32>>regmap[hr])&1) {
2028               emit_sarimm(hr,31,hr);
2029               emit_storereg(r|64,hr);
2030             }
2031 #endif
2032           }else{
2033             emit_storereg(r|64,hr);
2034           }
2035         }
2036       }
2037     }
2038   }
2039 }
2040
2041 int mchecksum()
2042 {
2043   //if(!tracedebug) return 0;
2044   int i;
2045   int sum=0;
2046   for(i=0;i<2097152;i++) {
2047     unsigned int temp=sum;
2048     sum<<=1;
2049     sum|=(~temp)>>31;
2050     sum^=((u_int *)rdram)[i];
2051   }
2052   return sum;
2053 }
2054 int rchecksum()
2055 {
2056   int i;
2057   int sum=0;
2058   for(i=0;i<64;i++)
2059     sum^=((u_int *)reg)[i];
2060   return sum;
2061 }
2062 void rlist()
2063 {
2064   int i;
2065   printf("TRACE: ");
2066   for(i=0;i<32;i++)
2067     printf("r%d:%8x%8x ",i,((int *)(reg+i))[1],((int *)(reg+i))[0]);
2068   printf("\n");
2069 #ifndef DISABLE_COP1
2070   printf("TRACE: ");
2071   for(i=0;i<32;i++)
2072     printf("f%d:%8x%8x ",i,((int*)reg_cop1_simple[i])[1],*((int*)reg_cop1_simple[i]));
2073   printf("\n");
2074 #endif
2075 }
2076
2077 void enabletrace()
2078 {
2079   tracedebug=1;
2080 }
2081
2082 void memdebug(int i)
2083 {
2084   //printf("TRACE: count=%d next=%d (checksum %x) lo=%8x%8x\n",Count,next_interupt,mchecksum(),(int)(reg[LOREG]>>32),(int)reg[LOREG]);
2085   //printf("TRACE: count=%d next=%d (rchecksum %x)\n",Count,next_interupt,rchecksum());
2086   //rlist();
2087   //if(tracedebug) {
2088   //if(Count>=-2084597794) {
2089   if((signed int)Count>=-2084597794&&(signed int)Count<0) {
2090   //if(0) {
2091     printf("TRACE: count=%d next=%d (checksum %x)\n",Count,next_interupt,mchecksum());
2092     //printf("TRACE: count=%d next=%d (checksum %x) Status=%x\n",Count,next_interupt,mchecksum(),Status);
2093     //printf("TRACE: count=%d next=%d (checksum %x) hi=%8x%8x\n",Count,next_interupt,mchecksum(),(int)(reg[HIREG]>>32),(int)reg[HIREG]);
2094     rlist();
2095     #ifdef __i386__
2096     printf("TRACE: %x\n",(&i)[-1]);
2097     #endif
2098     #ifdef __arm__
2099     int j;
2100     printf("TRACE: %x \n",(&j)[10]);
2101     printf("TRACE: %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x\n",(&j)[1],(&j)[2],(&j)[3],(&j)[4],(&j)[5],(&j)[6],(&j)[7],(&j)[8],(&j)[9],(&j)[10],(&j)[11],(&j)[12],(&j)[13],(&j)[14],(&j)[15],(&j)[16],(&j)[17],(&j)[18],(&j)[19],(&j)[20]);
2102     #endif
2103     //fflush(stdout);
2104   }
2105   //printf("TRACE: %x\n",(&i)[-1]);
2106 }
2107
2108 void tlb_debug(u_int cause, u_int addr, u_int iaddr)
2109 {
2110   printf("TLB Exception: instruction=%x addr=%x cause=%x\n",iaddr, addr, cause);
2111 }
2112
2113 void alu_assemble(int i,struct regstat *i_regs)
2114 {
2115   if(opcode2[i]>=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU
2116     if(rt1[i]) {
2117       signed char s1,s2,t;
2118       t=get_reg(i_regs->regmap,rt1[i]);
2119       if(t>=0) {
2120         s1=get_reg(i_regs->regmap,rs1[i]);
2121         s2=get_reg(i_regs->regmap,rs2[i]);
2122         if(rs1[i]&&rs2[i]) {
2123           assert(s1>=0);
2124           assert(s2>=0);
2125           if(opcode2[i]&2) emit_sub(s1,s2,t);
2126           else emit_add(s1,s2,t);
2127         }
2128         else if(rs1[i]) {
2129           if(s1>=0) emit_mov(s1,t);
2130           else emit_loadreg(rs1[i],t);
2131         }
2132         else if(rs2[i]) {
2133           if(s2>=0) {
2134             if(opcode2[i]&2) emit_neg(s2,t);
2135             else emit_mov(s2,t);
2136           }
2137           else {
2138             emit_loadreg(rs2[i],t);
2139             if(opcode2[i]&2) emit_neg(t,t);
2140           }
2141         }
2142         else emit_zeroreg(t);
2143       }
2144     }
2145   }
2146   if(opcode2[i]>=0x2c&&opcode2[i]<=0x2f) { // DADD/DADDU/DSUB/DSUBU
2147     if(rt1[i]) {
2148       signed char s1l,s2l,s1h,s2h,tl,th;
2149       tl=get_reg(i_regs->regmap,rt1[i]);
2150       th=get_reg(i_regs->regmap,rt1[i]|64);
2151       if(tl>=0) {
2152         s1l=get_reg(i_regs->regmap,rs1[i]);
2153         s2l=get_reg(i_regs->regmap,rs2[i]);
2154         s1h=get_reg(i_regs->regmap,rs1[i]|64);
2155         s2h=get_reg(i_regs->regmap,rs2[i]|64);
2156         if(rs1[i]&&rs2[i]) {
2157           assert(s1l>=0);
2158           assert(s2l>=0);
2159           if(opcode2[i]&2) emit_subs(s1l,s2l,tl);
2160           else emit_adds(s1l,s2l,tl);
2161           if(th>=0) {
2162             #ifdef INVERTED_CARRY
2163             if(opcode2[i]&2) {if(s1h!=th) emit_mov(s1h,th);emit_sbb(th,s2h);}
2164             #else
2165             if(opcode2[i]&2) emit_sbc(s1h,s2h,th);
2166             #endif
2167             else emit_add(s1h,s2h,th);
2168           }
2169         }
2170         else if(rs1[i]) {
2171           if(s1l>=0) emit_mov(s1l,tl);
2172           else emit_loadreg(rs1[i],tl);
2173           if(th>=0) {
2174             if(s1h>=0) emit_mov(s1h,th);
2175             else emit_loadreg(rs1[i]|64,th);
2176           }
2177         }
2178         else if(rs2[i]) {
2179           if(s2l>=0) {
2180             if(opcode2[i]&2) emit_negs(s2l,tl);
2181             else emit_mov(s2l,tl);
2182           }
2183           else {
2184             emit_loadreg(rs2[i],tl);
2185             if(opcode2[i]&2) emit_negs(tl,tl);
2186           }
2187           if(th>=0) {
2188             #ifdef INVERTED_CARRY
2189             if(s2h>=0) emit_mov(s2h,th);
2190             else emit_loadreg(rs2[i]|64,th);
2191             if(opcode2[i]&2) {
2192               emit_adcimm(-1,th); // x86 has inverted carry flag
2193               emit_not(th,th);
2194             }
2195             #else
2196             if(opcode2[i]&2) {
2197               if(s2h>=0) emit_rscimm(s2h,0,th);
2198               else {
2199                 emit_loadreg(rs2[i]|64,th);
2200                 emit_rscimm(th,0,th);
2201               }
2202             }else{
2203               if(s2h>=0) emit_mov(s2h,th);
2204               else emit_loadreg(rs2[i]|64,th);
2205             }
2206             #endif
2207           }
2208         }
2209         else {
2210           emit_zeroreg(tl);
2211           if(th>=0) emit_zeroreg(th);
2212         }
2213       }
2214     }
2215   }
2216   if(opcode2[i]==0x2a||opcode2[i]==0x2b) { // SLT/SLTU
2217     if(rt1[i]) {
2218       signed char s1l,s1h,s2l,s2h,t;
2219       if(!((i_regs->was32>>rs1[i])&(i_regs->was32>>rs2[i])&1))
2220       {
2221         t=get_reg(i_regs->regmap,rt1[i]);
2222         //assert(t>=0);
2223         if(t>=0) {
2224           s1l=get_reg(i_regs->regmap,rs1[i]);
2225           s1h=get_reg(i_regs->regmap,rs1[i]|64);
2226           s2l=get_reg(i_regs->regmap,rs2[i]);
2227           s2h=get_reg(i_regs->regmap,rs2[i]|64);
2228           if(rs2[i]==0) // rx<r0
2229           {
2230             assert(s1h>=0);
2231             if(opcode2[i]==0x2a) // SLT
2232               emit_shrimm(s1h,31,t);
2233             else // SLTU (unsigned can not be less than zero)
2234               emit_zeroreg(t);
2235           }
2236           else if(rs1[i]==0) // r0<rx
2237           {
2238             assert(s2h>=0);
2239             if(opcode2[i]==0x2a) // SLT
2240               emit_set_gz64_32(s2h,s2l,t);
2241             else // SLTU (set if not zero)
2242               emit_set_nz64_32(s2h,s2l,t);
2243           }
2244           else {
2245             assert(s1l>=0);assert(s1h>=0);
2246             assert(s2l>=0);assert(s2h>=0);
2247             if(opcode2[i]==0x2a) // SLT
2248               emit_set_if_less64_32(s1h,s1l,s2h,s2l,t);
2249             else // SLTU
2250               emit_set_if_carry64_32(s1h,s1l,s2h,s2l,t);
2251           }
2252         }
2253       } else {
2254         t=get_reg(i_regs->regmap,rt1[i]);
2255         //assert(t>=0);
2256         if(t>=0) {
2257           s1l=get_reg(i_regs->regmap,rs1[i]);
2258           s2l=get_reg(i_regs->regmap,rs2[i]);
2259           if(rs2[i]==0) // rx<r0
2260           {
2261             assert(s1l>=0);
2262             if(opcode2[i]==0x2a) // SLT
2263               emit_shrimm(s1l,31,t);
2264             else // SLTU (unsigned can not be less than zero)
2265               emit_zeroreg(t);
2266           }
2267           else if(rs1[i]==0) // r0<rx
2268           {
2269             assert(s2l>=0);
2270             if(opcode2[i]==0x2a) // SLT
2271               emit_set_gz32(s2l,t);
2272             else // SLTU (set if not zero)
2273               emit_set_nz32(s2l,t);
2274           }
2275           else{
2276             assert(s1l>=0);assert(s2l>=0);
2277             if(opcode2[i]==0x2a) // SLT
2278               emit_set_if_less32(s1l,s2l,t);
2279             else // SLTU
2280               emit_set_if_carry32(s1l,s2l,t);
2281           }
2282         }
2283       }
2284     }
2285   }
2286   if(opcode2[i]>=0x24&&opcode2[i]<=0x27) { // AND/OR/XOR/NOR
2287     if(rt1[i]) {
2288       signed char s1l,s1h,s2l,s2h,th,tl;
2289       tl=get_reg(i_regs->regmap,rt1[i]);
2290       th=get_reg(i_regs->regmap,rt1[i]|64);
2291       if(!((i_regs->was32>>rs1[i])&(i_regs->was32>>rs2[i])&1)&&th>=0)
2292       {
2293         assert(tl>=0);
2294         if(tl>=0) {
2295           s1l=get_reg(i_regs->regmap,rs1[i]);
2296           s1h=get_reg(i_regs->regmap,rs1[i]|64);
2297           s2l=get_reg(i_regs->regmap,rs2[i]);
2298           s2h=get_reg(i_regs->regmap,rs2[i]|64);
2299           if(rs1[i]&&rs2[i]) {
2300             assert(s1l>=0);assert(s1h>=0);
2301             assert(s2l>=0);assert(s2h>=0);
2302             if(opcode2[i]==0x24) { // AND
2303               emit_and(s1l,s2l,tl);
2304               emit_and(s1h,s2h,th);
2305             } else
2306             if(opcode2[i]==0x25) { // OR
2307               emit_or(s1l,s2l,tl);
2308               emit_or(s1h,s2h,th);
2309             } else
2310             if(opcode2[i]==0x26) { // XOR
2311               emit_xor(s1l,s2l,tl);
2312               emit_xor(s1h,s2h,th);
2313             } else
2314             if(opcode2[i]==0x27) { // NOR
2315               emit_or(s1l,s2l,tl);
2316               emit_or(s1h,s2h,th);
2317               emit_not(tl,tl);
2318               emit_not(th,th);
2319             }
2320           }
2321           else
2322           {
2323             if(opcode2[i]==0x24) { // AND
2324               emit_zeroreg(tl);
2325               emit_zeroreg(th);
2326             } else
2327             if(opcode2[i]==0x25||opcode2[i]==0x26) { // OR/XOR
2328               if(rs1[i]){
2329                 if(s1l>=0) emit_mov(s1l,tl);
2330                 else emit_loadreg(rs1[i],tl);
2331                 if(s1h>=0) emit_mov(s1h,th);
2332                 else emit_loadreg(rs1[i]|64,th);
2333               }
2334               else
2335               if(rs2[i]){
2336                 if(s2l>=0) emit_mov(s2l,tl);
2337                 else emit_loadreg(rs2[i],tl);
2338                 if(s2h>=0) emit_mov(s2h,th);
2339                 else emit_loadreg(rs2[i]|64,th);
2340               }
2341               else{
2342                 emit_zeroreg(tl);
2343                 emit_zeroreg(th);
2344               }
2345             } else
2346             if(opcode2[i]==0x27) { // NOR
2347               if(rs1[i]){
2348                 if(s1l>=0) emit_not(s1l,tl);
2349                 else{
2350                   emit_loadreg(rs1[i],tl);
2351                   emit_not(tl,tl);
2352                 }
2353                 if(s1h>=0) emit_not(s1h,th);
2354                 else{
2355                   emit_loadreg(rs1[i]|64,th);
2356                   emit_not(th,th);
2357                 }
2358               }
2359               else
2360               if(rs2[i]){
2361                 if(s2l>=0) emit_not(s2l,tl);
2362                 else{
2363                   emit_loadreg(rs2[i],tl);
2364                   emit_not(tl,tl);
2365                 }
2366                 if(s2h>=0) emit_not(s2h,th);
2367                 else{
2368                   emit_loadreg(rs2[i]|64,th);
2369                   emit_not(th,th);
2370                 }
2371               }
2372               else {
2373                 emit_movimm(-1,tl);
2374                 emit_movimm(-1,th);
2375               }
2376             }
2377           }
2378         }
2379       }
2380       else
2381       {
2382         // 32 bit
2383         if(tl>=0) {
2384           s1l=get_reg(i_regs->regmap,rs1[i]);
2385           s2l=get_reg(i_regs->regmap,rs2[i]);
2386           if(rs1[i]&&rs2[i]) {
2387             assert(s1l>=0);
2388             assert(s2l>=0);
2389             if(opcode2[i]==0x24) { // AND
2390               emit_and(s1l,s2l,tl);
2391             } else
2392             if(opcode2[i]==0x25) { // OR
2393               emit_or(s1l,s2l,tl);
2394             } else
2395             if(opcode2[i]==0x26) { // XOR
2396               emit_xor(s1l,s2l,tl);
2397             } else
2398             if(opcode2[i]==0x27) { // NOR
2399               emit_or(s1l,s2l,tl);
2400               emit_not(tl,tl);
2401             }
2402           }
2403           else
2404           {
2405             if(opcode2[i]==0x24) { // AND
2406               emit_zeroreg(tl);
2407             } else
2408             if(opcode2[i]==0x25||opcode2[i]==0x26) { // OR/XOR
2409               if(rs1[i]){
2410                 if(s1l>=0) emit_mov(s1l,tl);
2411                 else emit_loadreg(rs1[i],tl); // CHECK: regmap_entry?
2412               }
2413               else
2414               if(rs2[i]){
2415                 if(s2l>=0) emit_mov(s2l,tl);
2416                 else emit_loadreg(rs2[i],tl); // CHECK: regmap_entry?
2417               }
2418               else emit_zeroreg(tl);
2419             } else
2420             if(opcode2[i]==0x27) { // NOR
2421               if(rs1[i]){
2422                 if(s1l>=0) emit_not(s1l,tl);
2423                 else {
2424                   emit_loadreg(rs1[i],tl);
2425                   emit_not(tl,tl);
2426                 }
2427               }
2428               else
2429               if(rs2[i]){
2430                 if(s2l>=0) emit_not(s2l,tl);
2431                 else {
2432                   emit_loadreg(rs2[i],tl);
2433                   emit_not(tl,tl);
2434                 }
2435               }
2436               else emit_movimm(-1,tl);
2437             }
2438           }
2439         }
2440       }
2441     }
2442   }
2443 }
2444
2445 void imm16_assemble(int i,struct regstat *i_regs)
2446 {
2447   if (opcode[i]==0x0f) { // LUI
2448     if(rt1[i]) {
2449       signed char t;
2450       t=get_reg(i_regs->regmap,rt1[i]);
2451       //assert(t>=0);
2452       if(t>=0) {
2453         if(!((i_regs->isconst>>t)&1))
2454           emit_movimm(imm[i]<<16,t);
2455       }
2456     }
2457   }
2458   if(opcode[i]==0x08||opcode[i]==0x09) { // ADDI/ADDIU
2459     if(rt1[i]) {
2460       signed char s,t;
2461       t=get_reg(i_regs->regmap,rt1[i]);
2462       s=get_reg(i_regs->regmap,rs1[i]);
2463       if(rs1[i]) {
2464         //assert(t>=0);
2465         //assert(s>=0);
2466         if(t>=0) {
2467           if(!((i_regs->isconst>>t)&1)) {
2468             if(s<0) {
2469               if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2470               emit_addimm(t,imm[i],t);
2471             }else{
2472               if(!((i_regs->wasconst>>s)&1))
2473                 emit_addimm(s,imm[i],t);
2474               else
2475                 emit_movimm(constmap[i][s]+imm[i],t);
2476             }
2477           }
2478         }
2479       } else {
2480         if(t>=0) {
2481           if(!((i_regs->isconst>>t)&1))
2482             emit_movimm(imm[i],t);
2483         }
2484       }
2485     }
2486   }
2487   if(opcode[i]==0x18||opcode[i]==0x19) { // DADDI/DADDIU
2488     if(rt1[i]) {
2489       signed char sh,sl,th,tl;
2490       th=get_reg(i_regs->regmap,rt1[i]|64);
2491       tl=get_reg(i_regs->regmap,rt1[i]);
2492       sh=get_reg(i_regs->regmap,rs1[i]|64);
2493       sl=get_reg(i_regs->regmap,rs1[i]);
2494       if(tl>=0) {
2495         if(rs1[i]) {
2496           assert(sh>=0);
2497           assert(sl>=0);
2498           if(th>=0) {
2499             emit_addimm64_32(sh,sl,imm[i],th,tl);
2500           }
2501           else {
2502             emit_addimm(sl,imm[i],tl);
2503           }
2504         } else {
2505           emit_movimm(imm[i],tl);
2506           if(th>=0) emit_movimm(((signed int)imm[i])>>31,th);
2507         }
2508       }
2509     }
2510   }
2511   else if(opcode[i]==0x0a||opcode[i]==0x0b) { // SLTI/SLTIU
2512     if(rt1[i]) {
2513       //assert(rs1[i]!=0); // r0 might be valid, but it's probably a bug
2514       signed char sh,sl,t;
2515       t=get_reg(i_regs->regmap,rt1[i]);
2516       sh=get_reg(i_regs->regmap,rs1[i]|64);
2517       sl=get_reg(i_regs->regmap,rs1[i]);
2518       //assert(t>=0);
2519       if(t>=0) {
2520         if(rs1[i]>0) {
2521           if(sh<0) assert((i_regs->was32>>rs1[i])&1);
2522           if(sh<0||((i_regs->was32>>rs1[i])&1)) {
2523             if(opcode[i]==0x0a) { // SLTI
2524               if(sl<0) {
2525                 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2526                 emit_slti32(t,imm[i],t);
2527               }else{
2528                 emit_slti32(sl,imm[i],t);
2529               }
2530             }
2531             else { // SLTIU
2532               if(sl<0) {
2533                 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2534                 emit_sltiu32(t,imm[i],t);
2535               }else{
2536                 emit_sltiu32(sl,imm[i],t);
2537               }
2538             }
2539           }else{ // 64-bit
2540             assert(sl>=0);
2541             if(opcode[i]==0x0a) // SLTI
2542               emit_slti64_32(sh,sl,imm[i],t);
2543             else // SLTIU
2544               emit_sltiu64_32(sh,sl,imm[i],t);
2545           }
2546         }else{
2547           // SLTI(U) with r0 is just stupid,
2548           // nonetheless examples can be found
2549           if(opcode[i]==0x0a) // SLTI
2550             if(0<imm[i]) emit_movimm(1,t);
2551             else emit_zeroreg(t);
2552           else // SLTIU
2553           {
2554             if(imm[i]) emit_movimm(1,t);
2555             else emit_zeroreg(t);
2556           }
2557         }
2558       }
2559     }
2560   }
2561   else if(opcode[i]>=0x0c&&opcode[i]<=0x0e) { // ANDI/ORI/XORI
2562     if(rt1[i]) {
2563       signed char sh,sl,th,tl;
2564       th=get_reg(i_regs->regmap,rt1[i]|64);
2565       tl=get_reg(i_regs->regmap,rt1[i]);
2566       sh=get_reg(i_regs->regmap,rs1[i]|64);
2567       sl=get_reg(i_regs->regmap,rs1[i]);
2568       if(tl>=0 && !((i_regs->isconst>>tl)&1)) {
2569         if(opcode[i]==0x0c) //ANDI
2570         {
2571           if(rs1[i]) {
2572             if(sl<0) {
2573               if(i_regs->regmap_entry[tl]!=rs1[i]) emit_loadreg(rs1[i],tl);
2574               emit_andimm(tl,imm[i],tl);
2575             }else{
2576               if(!((i_regs->wasconst>>sl)&1))
2577                 emit_andimm(sl,imm[i],tl);
2578               else
2579                 emit_movimm(constmap[i][sl]&imm[i],tl);
2580             }
2581           }
2582           else
2583             emit_zeroreg(tl);
2584           if(th>=0) emit_zeroreg(th);
2585         }
2586         else
2587         {
2588           if(rs1[i]) {
2589             if(sl<0) {
2590               if(i_regs->regmap_entry[tl]!=rs1[i]) emit_loadreg(rs1[i],tl);
2591             }
2592             if(th>=0) {
2593               if(sh<0) {
2594                 emit_loadreg(rs1[i]|64,th);
2595               }else{
2596                 emit_mov(sh,th);
2597               }
2598             }
2599             if(opcode[i]==0x0d) //ORI
2600             if(sl<0) {
2601               emit_orimm(tl,imm[i],tl);
2602             }else{
2603               if(!((i_regs->wasconst>>sl)&1))
2604                 emit_orimm(sl,imm[i],tl);
2605               else
2606                 emit_movimm(constmap[i][sl]|imm[i],tl);
2607             }
2608             if(opcode[i]==0x0e) //XORI
2609             if(sl<0) {
2610               emit_xorimm(tl,imm[i],tl);
2611             }else{
2612               if(!((i_regs->wasconst>>sl)&1))
2613                 emit_xorimm(sl,imm[i],tl);
2614               else
2615                 emit_movimm(constmap[i][sl]^imm[i],tl);
2616             }
2617           }
2618           else {
2619             emit_movimm(imm[i],tl);
2620             if(th>=0) emit_zeroreg(th);
2621           }
2622         }
2623       }
2624     }
2625   }
2626 }
2627
2628 void shiftimm_assemble(int i,struct regstat *i_regs)
2629 {
2630   if(opcode2[i]<=0x3) // SLL/SRL/SRA
2631   {
2632     if(rt1[i]) {
2633       signed char s,t;
2634       t=get_reg(i_regs->regmap,rt1[i]);
2635       s=get_reg(i_regs->regmap,rs1[i]);
2636       //assert(t>=0);
2637       if(t>=0){
2638         if(rs1[i]==0)
2639         {
2640           emit_zeroreg(t);
2641         }
2642         else
2643         {
2644           if(s<0&&i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2645           if(imm[i]) {
2646             if(opcode2[i]==0) // SLL
2647             {
2648               emit_shlimm(s<0?t:s,imm[i],t);
2649             }
2650             if(opcode2[i]==2) // SRL
2651             {
2652               emit_shrimm(s<0?t:s,imm[i],t);
2653             }
2654             if(opcode2[i]==3) // SRA
2655             {
2656               emit_sarimm(s<0?t:s,imm[i],t);
2657             }
2658           }else{
2659             // Shift by zero
2660             if(s>=0 && s!=t) emit_mov(s,t);
2661           }
2662         }
2663       }
2664       //emit_storereg(rt1[i],t); //DEBUG
2665     }
2666   }
2667   if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA
2668   {
2669     if(rt1[i]) {
2670       signed char sh,sl,th,tl;
2671       th=get_reg(i_regs->regmap,rt1[i]|64);
2672       tl=get_reg(i_regs->regmap,rt1[i]);
2673       sh=get_reg(i_regs->regmap,rs1[i]|64);
2674       sl=get_reg(i_regs->regmap,rs1[i]);
2675       if(tl>=0) {
2676         if(rs1[i]==0)
2677         {
2678           emit_zeroreg(tl);
2679           if(th>=0) emit_zeroreg(th);
2680         }
2681         else
2682         {
2683           assert(sl>=0);
2684           assert(sh>=0);
2685           if(imm[i]) {
2686             if(opcode2[i]==0x38) // DSLL
2687             {
2688               if(th>=0) emit_shldimm(sh,sl,imm[i],th);
2689               emit_shlimm(sl,imm[i],tl);
2690             }
2691             if(opcode2[i]==0x3a) // DSRL
2692             {
2693               emit_shrdimm(sl,sh,imm[i],tl);
2694               if(th>=0) emit_shrimm(sh,imm[i],th);
2695             }
2696             if(opcode2[i]==0x3b) // DSRA
2697             {
2698               emit_shrdimm(sl,sh,imm[i],tl);
2699               if(th>=0) emit_sarimm(sh,imm[i],th);
2700             }
2701           }else{
2702             // Shift by zero
2703             if(sl!=tl) emit_mov(sl,tl);
2704             if(th>=0&&sh!=th) emit_mov(sh,th);
2705           }
2706         }
2707       }
2708     }
2709   }
2710   if(opcode2[i]==0x3c) // DSLL32
2711   {
2712     if(rt1[i]) {
2713       signed char sl,tl,th;
2714       tl=get_reg(i_regs->regmap,rt1[i]);
2715       th=get_reg(i_regs->regmap,rt1[i]|64);
2716       sl=get_reg(i_regs->regmap,rs1[i]);
2717       if(th>=0||tl>=0){
2718         assert(tl>=0);
2719         assert(th>=0);
2720         assert(sl>=0);
2721         emit_mov(sl,th);
2722         emit_zeroreg(tl);
2723         if(imm[i]>32)
2724         {
2725           emit_shlimm(th,imm[i]&31,th);
2726         }
2727       }
2728     }
2729   }
2730   if(opcode2[i]==0x3e) // DSRL32
2731   {
2732     if(rt1[i]) {
2733       signed char sh,tl,th;
2734       tl=get_reg(i_regs->regmap,rt1[i]);
2735       th=get_reg(i_regs->regmap,rt1[i]|64);
2736       sh=get_reg(i_regs->regmap,rs1[i]|64);
2737       if(tl>=0){
2738         assert(sh>=0);
2739         emit_mov(sh,tl);
2740         if(th>=0) emit_zeroreg(th);
2741         if(imm[i]>32)
2742         {
2743           emit_shrimm(tl,imm[i]&31,tl);
2744         }
2745       }
2746     }
2747   }
2748   if(opcode2[i]==0x3f) // DSRA32
2749   {
2750     if(rt1[i]) {
2751       signed char sh,tl;
2752       tl=get_reg(i_regs->regmap,rt1[i]);
2753       sh=get_reg(i_regs->regmap,rs1[i]|64);
2754       if(tl>=0){
2755         assert(sh>=0);
2756         emit_mov(sh,tl);
2757         if(imm[i]>32)
2758         {
2759           emit_sarimm(tl,imm[i]&31,tl);
2760         }
2761       }
2762     }
2763   }
2764 }
2765
2766 #ifndef shift_assemble
2767 void shift_assemble(int i,struct regstat *i_regs)
2768 {
2769   printf("Need shift_assemble for this architecture.\n");
2770   exit(1);
2771 }
2772 #endif
2773
2774 void load_assemble(int i,struct regstat *i_regs)
2775 {
2776   int s,th,tl,addr,map=-1;
2777   int offset;
2778   int jaddr=0;
2779   int memtarget=0,c=0;
2780   u_int hr,reglist=0;
2781   th=get_reg(i_regs->regmap,rt1[i]|64);
2782   tl=get_reg(i_regs->regmap,rt1[i]);
2783   s=get_reg(i_regs->regmap,rs1[i]);
2784   offset=imm[i];
2785   for(hr=0;hr<HOST_REGS;hr++) {
2786     if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2787   }
2788   if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
2789   if(s>=0) {
2790     c=(i_regs->wasconst>>s)&1;
2791     if (c) {
2792       memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
2793       if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
2794     }
2795   }
2796   //printf("load_assemble: c=%d\n",c);
2797   //if(c) printf("load_assemble: const=%x\n",(int)constmap[i][s]+offset);
2798   // FIXME: Even if the load is a NOP, we should check for pagefaults...
2799 #ifdef PCSX
2800   if(tl<0&&(!c||(((u_int)constmap[i][s]+offset)>>16)==0x1f80)
2801     ||rt1[i]==0) {
2802       // could be FIFO, must perform the read
2803       // ||dummy read
2804       assem_debug("(forced read)\n");
2805       tl=get_reg(i_regs->regmap,-1);
2806       assert(tl>=0);
2807   }
2808 #endif
2809   if(offset||s<0||c) addr=tl;
2810   else addr=s;
2811   //if(tl<0) tl=get_reg(i_regs->regmap,-1);
2812  if(tl>=0) {
2813   //printf("load_assemble: c=%d\n",c);
2814   //if(c) printf("load_assemble: const=%x\n",(int)constmap[i][s]+offset);
2815   assert(tl>=0); // Even if the load is a NOP, we must check for pagefaults and I/O
2816   reglist&=~(1<<tl);
2817   if(th>=0) reglist&=~(1<<th);
2818   if(!using_tlb) {
2819     if(!c) {
2820       #ifdef RAM_OFFSET
2821       map=get_reg(i_regs->regmap,ROREG);
2822       if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
2823       #endif
2824 //#define R29_HACK 1
2825       #ifdef R29_HACK
2826       // Strmnnrmn's speed hack
2827       if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
2828       #endif
2829       {
2830         #ifdef PCSX
2831         if(sp_in_mirror&&rs1[i]==29) {
2832           emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
2833           emit_cmpimm(HOST_TEMPREG,RAM_SIZE);
2834         }
2835         else
2836         #endif
2837         emit_cmpimm(addr,RAM_SIZE);
2838         jaddr=(int)out;
2839         #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
2840         // Hint to branch predictor that the branch is unlikely to be taken
2841         if(rs1[i]>=28)
2842           emit_jno_unlikely(0);
2843         else
2844         #endif
2845         emit_jno(0);
2846       }
2847     }
2848   }else{ // using tlb
2849     int x=0;
2850     if (opcode[i]==0x20||opcode[i]==0x24) x=3; // LB/LBU
2851     if (opcode[i]==0x21||opcode[i]==0x25) x=2; // LH/LHU
2852     map=get_reg(i_regs->regmap,TLREG);
2853     assert(map>=0);
2854     map=do_tlb_r(addr,tl,map,x,-1,-1,c,constmap[i][s]+offset);
2855     do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
2856   }
2857   int dummy=(rt1[i]==0)||(tl!=get_reg(i_regs->regmap,rt1[i])); // ignore loads to r0 and unneeded reg
2858   if (opcode[i]==0x20) { // LB
2859     if(!c||memtarget) {
2860       if(!dummy) {
2861         #ifdef HOST_IMM_ADDR32
2862         if(c)
2863           emit_movsbl_tlb((constmap[i][s]+offset)^3,map,tl);
2864         else
2865         #endif
2866         {
2867           //emit_xorimm(addr,3,tl);
2868           //gen_tlb_addr_r(tl,map);
2869           //emit_movsbl_indexed((int)rdram-0x80000000,tl,tl);
2870           int x=0,a=tl;
2871 #ifdef BIG_ENDIAN_MIPS
2872           if(!c) emit_xorimm(addr,3,tl);
2873           else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
2874 #else
2875           if(!c) a=addr;
2876 #endif
2877 #ifdef PCSX
2878           if(sp_in_mirror&&rs1[i]==29) a=HOST_TEMPREG;
2879 #endif
2880           emit_movsbl_indexed_tlb(x,a,map,tl);
2881         }
2882       }
2883       if(jaddr)
2884         add_stub(LOADB_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
2885     }
2886     else
2887       inline_readstub(LOADB_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2888   }
2889   if (opcode[i]==0x21) { // LH
2890     if(!c||memtarget) {
2891       if(!dummy) {
2892         #ifdef HOST_IMM_ADDR32
2893         if(c)
2894           emit_movswl_tlb((constmap[i][s]+offset)^2,map,tl);
2895         else
2896         #endif
2897         {
2898           int x=0,a=tl;
2899 #ifdef BIG_ENDIAN_MIPS
2900           if(!c) emit_xorimm(addr,2,tl);
2901           else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
2902 #else
2903           if(!c) a=addr;
2904 #endif
2905 #ifdef PCSX
2906           if(sp_in_mirror&&rs1[i]==29) a=HOST_TEMPREG;
2907 #endif
2908           //#ifdef
2909           //emit_movswl_indexed_tlb(x,tl,map,tl);
2910           //else
2911           if(map>=0) {
2912             gen_tlb_addr_r(a,map);
2913             emit_movswl_indexed(x,a,tl);
2914           }else{
2915             #ifdef RAM_OFFSET
2916             emit_movswl_indexed(x,a,tl);
2917             #else
2918             emit_movswl_indexed((int)rdram-0x80000000+x,a,tl);
2919             #endif
2920           }
2921         }
2922       }
2923       if(jaddr)
2924         add_stub(LOADH_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
2925     }
2926     else
2927       inline_readstub(LOADH_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2928   }
2929   if (opcode[i]==0x23) { // LW
2930     if(!c||memtarget) {
2931       if(!dummy) {
2932         int a=addr;
2933 #ifdef PCSX
2934         if(sp_in_mirror&&rs1[i]==29) a=HOST_TEMPREG;
2935 #endif
2936         //emit_readword_indexed((int)rdram-0x80000000,addr,tl);
2937         #ifdef HOST_IMM_ADDR32
2938         if(c)
2939           emit_readword_tlb(constmap[i][s]+offset,map,tl);
2940         else
2941         #endif
2942         emit_readword_indexed_tlb(0,a,map,tl);
2943       }
2944       if(jaddr)
2945         add_stub(LOADW_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
2946     }
2947     else
2948       inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2949   }
2950   if (opcode[i]==0x24) { // LBU
2951     if(!c||memtarget) {
2952       if(!dummy) {
2953         #ifdef HOST_IMM_ADDR32
2954         if(c)
2955           emit_movzbl_tlb((constmap[i][s]+offset)^3,map,tl);
2956         else
2957         #endif
2958         {
2959           //emit_xorimm(addr,3,tl);
2960           //gen_tlb_addr_r(tl,map);
2961           //emit_movzbl_indexed((int)rdram-0x80000000,tl,tl);
2962           int x=0,a=tl;
2963 #ifdef BIG_ENDIAN_MIPS
2964           if(!c) emit_xorimm(addr,3,tl);
2965           else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
2966 #else
2967           if(!c) a=addr;
2968 #endif
2969 #ifdef PCSX
2970           if(sp_in_mirror&&rs1[i]==29) a=HOST_TEMPREG;
2971 #endif
2972           emit_movzbl_indexed_tlb(x,a,map,tl);
2973         }
2974       }
2975       if(jaddr)
2976         add_stub(LOADBU_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
2977     }
2978     else
2979       inline_readstub(LOADBU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2980   }
2981   if (opcode[i]==0x25) { // LHU
2982     if(!c||memtarget) {
2983       if(!dummy) {
2984         #ifdef HOST_IMM_ADDR32
2985         if(c)
2986           emit_movzwl_tlb((constmap[i][s]+offset)^2,map,tl);
2987         else
2988         #endif
2989         {
2990           int x=0,a=tl;
2991 #ifdef BIG_ENDIAN_MIPS
2992           if(!c) emit_xorimm(addr,2,tl);
2993           else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
2994 #else
2995           if(!c) a=addr;
2996 #endif
2997 #ifdef PCSX
2998           if(sp_in_mirror&&rs1[i]==29) a=HOST_TEMPREG;
2999 #endif
3000           //#ifdef
3001           //emit_movzwl_indexed_tlb(x,tl,map,tl);
3002           //#else
3003           if(map>=0) {
3004             gen_tlb_addr_r(a,map);
3005             emit_movzwl_indexed(x,a,tl);
3006           }else{
3007             #ifdef RAM_OFFSET
3008             emit_movzwl_indexed(x,a,tl);
3009             #else
3010             emit_movzwl_indexed((int)rdram-0x80000000+x,a,tl);
3011             #endif
3012           }
3013         }
3014       }
3015       if(jaddr)
3016         add_stub(LOADHU_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
3017     }
3018     else
3019       inline_readstub(LOADHU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
3020   }
3021   if (opcode[i]==0x27) { // LWU
3022     assert(th>=0);
3023     if(!c||memtarget) {
3024       if(!dummy) {
3025         int a=addr;
3026 #ifdef PCSX
3027         if(sp_in_mirror&&rs1[i]==29) a=HOST_TEMPREG;
3028 #endif
3029         //emit_readword_indexed((int)rdram-0x80000000,addr,tl);
3030         #ifdef HOST_IMM_ADDR32
3031         if(c)
3032           emit_readword_tlb(constmap[i][s]+offset,map,tl);
3033         else
3034         #endif
3035         emit_readword_indexed_tlb(0,a,map,tl);
3036       }
3037       if(jaddr)
3038         add_stub(LOADW_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
3039     }
3040     else {
3041       inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
3042     }
3043     emit_zeroreg(th);
3044   }
3045   if (opcode[i]==0x37) { // LD
3046     if(!c||memtarget) {
3047       if(!dummy) {
3048         int a=addr;
3049 #ifdef PCSX
3050         if(sp_in_mirror&&rs1[i]==29) a=HOST_TEMPREG;
3051 #endif
3052         //gen_tlb_addr_r(tl,map);
3053         //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,addr,th);
3054         //emit_readword_indexed((int)rdram-0x7FFFFFFC,addr,tl);
3055         #ifdef HOST_IMM_ADDR32
3056         if(c)
3057           emit_readdword_tlb(constmap[i][s]+offset,map,th,tl);
3058         else
3059         #endif
3060         emit_readdword_indexed_tlb(0,a,map,th,tl);
3061       }
3062       if(jaddr)
3063         add_stub(LOADD_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
3064     }
3065     else
3066       inline_readstub(LOADD_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
3067   }
3068  }
3069   //emit_storereg(rt1[i],tl); // DEBUG
3070   //if(opcode[i]==0x23)
3071   //if(opcode[i]==0x24)
3072   //if(opcode[i]==0x23||opcode[i]==0x24)
3073   /*if(opcode[i]==0x21||opcode[i]==0x23||opcode[i]==0x24)
3074   {
3075     //emit_pusha();
3076     save_regs(0x100f);
3077         emit_readword((int)&last_count,ECX);
3078         #ifdef __i386__
3079         if(get_reg(i_regs->regmap,CCREG)<0)
3080           emit_loadreg(CCREG,HOST_CCREG);
3081         emit_add(HOST_CCREG,ECX,HOST_CCREG);
3082         emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG);
3083         emit_writeword(HOST_CCREG,(int)&Count);
3084         #endif
3085         #ifdef __arm__
3086         if(get_reg(i_regs->regmap,CCREG)<0)
3087           emit_loadreg(CCREG,0);
3088         else
3089           emit_mov(HOST_CCREG,0);
3090         emit_add(0,ECX,0);
3091         emit_addimm(0,2*ccadj[i],0);
3092         emit_writeword(0,(int)&Count);
3093         #endif
3094     emit_call((int)memdebug);
3095     //emit_popa();
3096     restore_regs(0x100f);
3097   }/**/
3098 }
3099
3100 #ifndef loadlr_assemble
3101 void loadlr_assemble(int i,struct regstat *i_regs)
3102 {
3103   printf("Need loadlr_assemble for this architecture.\n");
3104   exit(1);
3105 }
3106 #endif
3107
3108 void store_assemble(int i,struct regstat *i_regs)
3109 {
3110   int s,th,tl,map=-1;
3111   int addr,temp;
3112   int offset;
3113   int jaddr=0,jaddr2,type;
3114   int memtarget=0,c=0;
3115   int agr=AGEN1+(i&1);
3116   u_int hr,reglist=0;
3117   th=get_reg(i_regs->regmap,rs2[i]|64);
3118   tl=get_reg(i_regs->regmap,rs2[i]);
3119   s=get_reg(i_regs->regmap,rs1[i]);
3120   temp=get_reg(i_regs->regmap,agr);
3121   if(temp<0) temp=get_reg(i_regs->regmap,-1);
3122   offset=imm[i];
3123   if(s>=0) {
3124     c=(i_regs->wasconst>>s)&1;
3125     if(c) {
3126       memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3127       if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3128     }
3129   }
3130   assert(tl>=0);
3131   assert(temp>=0);
3132   for(hr=0;hr<HOST_REGS;hr++) {
3133     if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3134   }
3135   if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
3136   if(offset||s<0||c) addr=temp;
3137   else addr=s;
3138   if(!using_tlb) {
3139     if(!c) {
3140       #ifdef PCSX
3141       if(sp_in_mirror&&rs1[i]==29) {
3142         emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3143         emit_cmpimm(HOST_TEMPREG,RAM_SIZE);
3144       }
3145       else
3146       #endif
3147       #ifdef R29_HACK
3148       // Strmnnrmn's speed hack
3149       if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
3150       #endif
3151       emit_cmpimm(addr,RAM_SIZE);
3152       #ifdef DESTRUCTIVE_SHIFT
3153       if(s==addr) emit_mov(s,temp);
3154       #endif
3155       #ifdef R29_HACK
3156       memtarget=1;
3157       if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
3158       #endif
3159       {
3160         jaddr=(int)out;
3161         #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3162         // Hint to branch predictor that the branch is unlikely to be taken
3163         if(rs1[i]>=28)
3164           emit_jno_unlikely(0);
3165         else
3166         #endif
3167         emit_jno(0);
3168       }
3169     }
3170   }else{ // using tlb
3171     int x=0;
3172     if (opcode[i]==0x28) x=3; // SB
3173     if (opcode[i]==0x29) x=2; // SH
3174     map=get_reg(i_regs->regmap,TLREG);
3175     assert(map>=0);
3176     map=do_tlb_w(addr,temp,map,x,c,constmap[i][s]+offset);
3177     do_tlb_w_branch(map,c,constmap[i][s]+offset,&jaddr);
3178   }
3179
3180   if (opcode[i]==0x28) { // SB
3181     if(!c||memtarget) {
3182       int x=0,a=temp;
3183 #ifdef BIG_ENDIAN_MIPS
3184       if(!c) emit_xorimm(addr,3,temp);
3185       else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
3186 #else
3187       if(!c) a=addr;
3188 #endif
3189 #ifdef PCSX
3190       if(sp_in_mirror&&rs1[i]==29) a=HOST_TEMPREG;
3191 #endif
3192       //gen_tlb_addr_w(temp,map);
3193       //emit_writebyte_indexed(tl,(int)rdram-0x80000000,temp);
3194       emit_writebyte_indexed_tlb(tl,x,a,map,a);
3195     }
3196     type=STOREB_STUB;
3197   }
3198   if (opcode[i]==0x29) { // SH
3199     if(!c||memtarget) {
3200       int x=0,a=temp;
3201 #ifdef BIG_ENDIAN_MIPS
3202       if(!c) emit_xorimm(addr,2,temp);
3203       else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
3204 #else
3205       if(!c) a=addr;
3206 #endif
3207 #ifdef PCSX
3208       if(sp_in_mirror&&rs1[i]==29) a=HOST_TEMPREG;
3209 #endif
3210       //#ifdef
3211       //emit_writehword_indexed_tlb(tl,x,temp,map,temp);
3212       //#else
3213       if(map>=0) {
3214         gen_tlb_addr_w(a,map);
3215         emit_writehword_indexed(tl,x,a);
3216       }else
3217         emit_writehword_indexed(tl,(int)rdram-0x80000000+x,a);
3218     }
3219     type=STOREH_STUB;
3220   }
3221   if (opcode[i]==0x2B) { // SW
3222     if(!c||memtarget) {
3223       int a=addr;
3224 #ifdef PCSX
3225       if(sp_in_mirror&&rs1[i]==29) a=HOST_TEMPREG;
3226 #endif
3227       //emit_writeword_indexed(tl,(int)rdram-0x80000000,addr);
3228       emit_writeword_indexed_tlb(tl,0,a,map,temp);
3229     }
3230     type=STOREW_STUB;
3231   }
3232   if (opcode[i]==0x3F) { // SD
3233     if(!c||memtarget) {
3234       int a=addr;
3235 #ifdef PCSX
3236       if(sp_in_mirror&&rs1[i]==29) a=HOST_TEMPREG;
3237 #endif
3238       if(rs2[i]) {
3239         assert(th>=0);
3240         //emit_writeword_indexed(th,(int)rdram-0x80000000,addr);
3241         //emit_writeword_indexed(tl,(int)rdram-0x7FFFFFFC,addr);
3242         emit_writedword_indexed_tlb(th,tl,0,a,map,temp);
3243       }else{
3244         // Store zero
3245         //emit_writeword_indexed(tl,(int)rdram-0x80000000,temp);
3246         //emit_writeword_indexed(tl,(int)rdram-0x7FFFFFFC,temp);
3247         emit_writedword_indexed_tlb(tl,tl,0,a,map,temp);
3248       }
3249     }
3250     type=STORED_STUB;
3251   }
3252   if(!using_tlb) {
3253     if(!c||memtarget) {
3254       #ifdef DESTRUCTIVE_SHIFT
3255       // The x86 shift operation is 'destructive'; it overwrites the
3256       // source register, so we need to make a copy first and use that.
3257       addr=temp;
3258       #endif
3259       #if defined(HOST_IMM8)
3260       int ir=get_reg(i_regs->regmap,INVCP);
3261       assert(ir>=0);
3262       emit_cmpmem_indexedsr12_reg(ir,addr,1);
3263       #else
3264       emit_cmpmem_indexedsr12_imm((int)invalid_code,addr,1);
3265       #endif
3266       #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT)
3267       emit_callne(invalidate_addr_reg[addr]);
3268       #else
3269       jaddr2=(int)out;
3270       emit_jne(0);
3271       add_stub(INVCODE_STUB,jaddr2,(int)out,reglist|(1<<HOST_CCREG),addr,0,0,0);
3272       #endif
3273     }
3274   }
3275   if(jaddr) {
3276     add_stub(type,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
3277   } else if(c&&!memtarget) {
3278     inline_writestub(type,i,constmap[i][s]+offset,i_regs->regmap,rs2[i],ccadj[i],reglist);
3279   }
3280   //if(opcode[i]==0x2B || opcode[i]==0x3F)
3281   //if(opcode[i]==0x2B || opcode[i]==0x28)
3282   //if(opcode[i]==0x2B || opcode[i]==0x29)
3283   //if(opcode[i]==0x2B)
3284   /*if(opcode[i]==0x2B || opcode[i]==0x28 || opcode[i]==0x29 || opcode[i]==0x3F)
3285   {
3286     //emit_pusha();
3287     save_regs(0x100f);
3288         emit_readword((int)&last_count,ECX);
3289         #ifdef __i386__
3290         if(get_reg(i_regs->regmap,CCREG)<0)
3291           emit_loadreg(CCREG,HOST_CCREG);
3292         emit_add(HOST_CCREG,ECX,HOST_CCREG);
3293         emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG);
3294         emit_writeword(HOST_CCREG,(int)&Count);
3295         #endif
3296         #ifdef __arm__
3297         if(get_reg(i_regs->regmap,CCREG)<0)
3298           emit_loadreg(CCREG,0);
3299         else
3300           emit_mov(HOST_CCREG,0);
3301         emit_add(0,ECX,0);
3302         emit_addimm(0,2*ccadj[i],0);
3303         emit_writeword(0,(int)&Count);
3304         #endif
3305     emit_call((int)memdebug);
3306     //emit_popa();
3307     restore_regs(0x100f);
3308   }/**/
3309 }
3310
3311 void storelr_assemble(int i,struct regstat *i_regs)
3312 {
3313   int s,th,tl;
3314   int temp;
3315   int temp2;
3316   int offset;
3317   int jaddr=0,jaddr2;
3318   int case1,case2,case3;
3319   int done0,done1,done2;
3320   int memtarget=0,c=0;
3321   int agr=AGEN1+(i&1);
3322   u_int hr,reglist=0;
3323   th=get_reg(i_regs->regmap,rs2[i]|64);
3324   tl=get_reg(i_regs->regmap,rs2[i]);
3325   s=get_reg(i_regs->regmap,rs1[i]);
3326   temp=get_reg(i_regs->regmap,agr);
3327   if(temp<0) temp=get_reg(i_regs->regmap,-1);
3328   offset=imm[i];
3329   if(s>=0) {
3330     c=(i_regs->isconst>>s)&1;
3331     if(c) {
3332       memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3333       if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3334     }
3335   }
3336   assert(tl>=0);
3337   for(hr=0;hr<HOST_REGS;hr++) {
3338     if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3339   }
3340   assert(temp>=0);
3341   if(!using_tlb) {
3342     if(!c) {
3343       emit_cmpimm(s<0||offset?temp:s,RAM_SIZE);
3344       if(!offset&&s!=temp) emit_mov(s,temp);
3345       jaddr=(int)out;
3346       emit_jno(0);
3347     }
3348     else
3349     {
3350       if(!memtarget||!rs1[i]) {
3351         jaddr=(int)out;
3352         emit_jmp(0);
3353       }
3354     }
3355     #ifdef RAM_OFFSET
3356     int map=get_reg(i_regs->regmap,ROREG);
3357     if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3358     gen_tlb_addr_w(temp,map);
3359     #else
3360     if((u_int)rdram!=0x80000000) 
3361       emit_addimm_no_flags((u_int)rdram-(u_int)0x80000000,temp);
3362     #endif
3363   }else{ // using tlb
3364     int map=get_reg(i_regs->regmap,TLREG);
3365     assert(map>=0);
3366     map=do_tlb_w(c||s<0||offset?temp:s,temp,map,0,c,constmap[i][s]+offset);
3367     if(!c&&!offset&&s>=0) emit_mov(s,temp);
3368     do_tlb_w_branch(map,c,constmap[i][s]+offset,&jaddr);
3369     if(!jaddr&&!memtarget) {
3370       jaddr=(int)out;
3371       emit_jmp(0);
3372     }
3373     gen_tlb_addr_w(temp,map);
3374   }
3375
3376   if (opcode[i]==0x2C||opcode[i]==0x2D) { // SDL/SDR
3377     temp2=get_reg(i_regs->regmap,FTEMP);
3378     if(!rs2[i]) temp2=th=tl;
3379   }
3380
3381 #ifndef BIG_ENDIAN_MIPS