drc: implement block addr list saving
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / new_dynarec.c
1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2  *   Mupen64plus - new_dynarec.c                                           *
3  *   Copyright (C) 2009-2011 Ari64                                         *
4  *                                                                         *
5  *   This program is free software; you can redistribute it and/or modify  *
6  *   it under the terms of the GNU General Public License as published by  *
7  *   the Free Software Foundation; either version 2 of the License, or     *
8  *   (at your option) any later version.                                   *
9  *                                                                         *
10  *   This program is distributed in the hope that it will be useful,       *
11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
13  *   GNU General Public License for more details.                          *
14  *                                                                         *
15  *   You should have received a copy of the GNU General Public License     *
16  *   along with this program; if not, write to the                         *
17  *   Free Software Foundation, Inc.,                                       *
18  *   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.          *
19  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21 #include <stdlib.h>
22 #include <stdint.h> //include for uint64_t
23 #include <assert.h>
24 #include <errno.h>
25 #include <sys/mman.h>
26
27 #include "emu_if.h" //emulator interface
28
29 //#define DISASM
30 //#define assem_debug printf
31 //#define inv_debug printf
32 #define assem_debug(...)
33 #define inv_debug(...)
34
35 #ifdef __i386__
36 #include "assem_x86.h"
37 #endif
38 #ifdef __x86_64__
39 #include "assem_x64.h"
40 #endif
41 #ifdef __arm__
42 #include "assem_arm.h"
43 #endif
44
45 #ifdef __BLACKBERRY_QNX__
46 #undef __clear_cache
47 #define __clear_cache(start,end) msync(start, (size_t)((void*)end - (void*)start), MS_SYNC | MS_CACHE_ONLY | MS_INVALIDATE_ICACHE);
48 #elif defined(__MACH__)
49 #include <libkern/OSCacheControl.h>
50 #define __clear_cache mach_clear_cache
51 static void __clear_cache(void *start, void *end) {
52   size_t len = (char *)end - (char *)start;
53   sys_dcache_flush(start, len);
54   sys_icache_invalidate(start, len);
55 }
56 #endif
57
58 #define MAXBLOCK 4096
59 #define MAX_OUTPUT_BLOCK_SIZE 262144
60
61 struct regstat
62 {
63   signed char regmap_entry[HOST_REGS];
64   signed char regmap[HOST_REGS];
65   uint64_t was32;
66   uint64_t is32;
67   uint64_t wasdirty;
68   uint64_t dirty;
69   uint64_t u;
70   uint64_t uu;
71   u_int wasconst;
72   u_int isconst;
73   u_int loadedconst;             // host regs that have constants loaded
74   u_int waswritten;              // MIPS regs that were used as store base before
75 };
76
77 // note: asm depends on this layout
78 struct ll_entry
79 {
80   u_int vaddr;
81   u_int reg_sv_flags;
82   void *addr;
83   struct ll_entry *next;
84 };
85
86   u_int start;
87   u_int *source;
88   char insn[MAXBLOCK][10];
89   u_char itype[MAXBLOCK];
90   u_char opcode[MAXBLOCK];
91   u_char opcode2[MAXBLOCK];
92   u_char bt[MAXBLOCK];
93   u_char rs1[MAXBLOCK];
94   u_char rs2[MAXBLOCK];
95   u_char rt1[MAXBLOCK];
96   u_char rt2[MAXBLOCK];
97   u_char us1[MAXBLOCK];
98   u_char us2[MAXBLOCK];
99   u_char dep1[MAXBLOCK];
100   u_char dep2[MAXBLOCK];
101   u_char lt1[MAXBLOCK];
102   static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs
103   static uint64_t gte_rt[MAXBLOCK];
104   static uint64_t gte_unneeded[MAXBLOCK];
105   static u_int smrv[32]; // speculated MIPS register values
106   static u_int smrv_strong; // mask or regs that are likely to have correct values
107   static u_int smrv_weak; // same, but somewhat less likely
108   static u_int smrv_strong_next; // same, but after current insn executes
109   static u_int smrv_weak_next;
110   int imm[MAXBLOCK];
111   u_int ba[MAXBLOCK];
112   char likely[MAXBLOCK];
113   char is_ds[MAXBLOCK];
114   char ooo[MAXBLOCK];
115   uint64_t unneeded_reg[MAXBLOCK];
116   uint64_t unneeded_reg_upper[MAXBLOCK];
117   uint64_t branch_unneeded_reg[MAXBLOCK];
118   uint64_t branch_unneeded_reg_upper[MAXBLOCK];
119   uint64_t p32[MAXBLOCK];
120   uint64_t pr32[MAXBLOCK];
121   signed char regmap_pre[MAXBLOCK][HOST_REGS];
122   static uint64_t current_constmap[HOST_REGS];
123   static uint64_t constmap[MAXBLOCK][HOST_REGS];
124   static struct regstat regs[MAXBLOCK];
125   static struct regstat branch_regs[MAXBLOCK];
126   signed char minimum_free_regs[MAXBLOCK];
127   u_int needed_reg[MAXBLOCK];
128   uint64_t requires_32bit[MAXBLOCK];
129   u_int wont_dirty[MAXBLOCK];
130   u_int will_dirty[MAXBLOCK];
131   int ccadj[MAXBLOCK];
132   int slen;
133   u_int instr_addr[MAXBLOCK];
134   u_int link_addr[MAXBLOCK][3];
135   int linkcount;
136   u_int stubs[MAXBLOCK*3][8];
137   int stubcount;
138   u_int literals[1024][2];
139   int literalcount;
140   int is_delayslot;
141   int cop1_usable;
142   u_char *out;
143   struct ll_entry *jump_in[4096] __attribute__((aligned(16)));
144   struct ll_entry *jump_out[4096];
145   struct ll_entry *jump_dirty[4096];
146   u_int hash_table[65536][4]  __attribute__((aligned(16)));
147   char shadow[1048576]  __attribute__((aligned(16)));
148   void *copy;
149   int expirep;
150 #ifndef PCSX
151   u_int using_tlb;
152 #else
153   static const u_int using_tlb=0;
154 #endif
155   int new_dynarec_did_compile;
156   int new_dynarec_hacks;
157   u_int stop_after_jal;
158 #ifndef RAM_FIXED
159   static u_int ram_offset;
160 #else
161   static const u_int ram_offset=0;
162 #endif
163   extern u_char restore_candidate[512];
164   extern int cycle_count;
165
166   /* registers that may be allocated */
167   /* 1-31 gpr */
168 #define HIREG 32 // hi
169 #define LOREG 33 // lo
170 #define FSREG 34 // FPU status (FCSR)
171 #define CSREG 35 // Coprocessor status
172 #define CCREG 36 // Cycle count
173 #define INVCP 37 // Pointer to invalid_code
174 #define MMREG 38 // Pointer to memory_map
175 #define ROREG 39 // ram offset (if rdram!=0x80000000)
176 #define TEMPREG 40
177 #define FTEMP 40 // FPU temporary register
178 #define PTEMP 41 // Prefetch temporary register
179 #define TLREG 42 // TLB mapping offset
180 #define RHASH 43 // Return address hash
181 #define RHTBL 44 // Return address hash table address
182 #define RTEMP 45 // JR/JALR address register
183 #define MAXREG 45
184 #define AGEN1 46 // Address generation temporary register
185 #define AGEN2 47 // Address generation temporary register
186 #define MGEN1 48 // Maptable address generation temporary register
187 #define MGEN2 49 // Maptable address generation temporary register
188 #define BTREG 50 // Branch target temporary register
189
190   /* instruction types */
191 #define NOP 0     // No operation
192 #define LOAD 1    // Load
193 #define STORE 2   // Store
194 #define LOADLR 3  // Unaligned load
195 #define STORELR 4 // Unaligned store
196 #define MOV 5     // Move 
197 #define ALU 6     // Arithmetic/logic
198 #define MULTDIV 7 // Multiply/divide
199 #define SHIFT 8   // Shift by register
200 #define SHIFTIMM 9// Shift by immediate
201 #define IMM16 10  // 16-bit immediate
202 #define RJUMP 11  // Unconditional jump to register
203 #define UJUMP 12  // Unconditional jump
204 #define CJUMP 13  // Conditional branch (BEQ/BNE/BGTZ/BLEZ)
205 #define SJUMP 14  // Conditional branch (regimm format)
206 #define COP0 15   // Coprocessor 0
207 #define COP1 16   // Coprocessor 1
208 #define C1LS 17   // Coprocessor 1 load/store
209 #define FJUMP 18  // Conditional branch (floating point)
210 #define FLOAT 19  // Floating point unit
211 #define FCONV 20  // Convert integer to float
212 #define FCOMP 21  // Floating point compare (sets FSREG)
213 #define SYSCALL 22// SYSCALL
214 #define OTHER 23  // Other
215 #define SPAN 24   // Branch/delay slot spans 2 pages
216 #define NI 25     // Not implemented
217 #define HLECALL 26// PCSX fake opcodes for HLE
218 #define COP2 27   // Coprocessor 2 move
219 #define C2LS 28   // Coprocessor 2 load/store
220 #define C2OP 29   // Coprocessor 2 operation
221 #define INTCALL 30// Call interpreter to handle rare corner cases
222
223   /* stubs */
224 #define CC_STUB 1
225 #define FP_STUB 2
226 #define LOADB_STUB 3
227 #define LOADH_STUB 4
228 #define LOADW_STUB 5
229 #define LOADD_STUB 6
230 #define LOADBU_STUB 7
231 #define LOADHU_STUB 8
232 #define STOREB_STUB 9
233 #define STOREH_STUB 10
234 #define STOREW_STUB 11
235 #define STORED_STUB 12
236 #define STORELR_STUB 13
237 #define INVCODE_STUB 14
238
239   /* branch codes */
240 #define TAKEN 1
241 #define NOTTAKEN 2
242 #define NULLDS 3
243
244 // asm linkage
245 int new_recompile_block(int addr);
246 void *get_addr_ht(u_int vaddr);
247 void invalidate_block(u_int block);
248 void invalidate_addr(u_int addr);
249 void remove_hash(int vaddr);
250 void jump_vaddr();
251 void dyna_linker();
252 void dyna_linker_ds();
253 void verify_code();
254 void verify_code_vm();
255 void verify_code_ds();
256 void cc_interrupt();
257 void fp_exception();
258 void fp_exception_ds();
259 void jump_syscall();
260 void jump_syscall_hle();
261 void jump_eret();
262 void jump_hlecall();
263 void jump_intcall();
264 void new_dyna_leave();
265
266 // TLB
267 void TLBWI_new();
268 void TLBWR_new();
269 void read_nomem_new();
270 void read_nomemb_new();
271 void read_nomemh_new();
272 void read_nomemd_new();
273 void write_nomem_new();
274 void write_nomemb_new();
275 void write_nomemh_new();
276 void write_nomemd_new();
277 void write_rdram_new();
278 void write_rdramb_new();
279 void write_rdramh_new();
280 void write_rdramd_new();
281 extern u_int memory_map[1048576];
282
283 // Needed by assembler
284 void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32);
285 void wb_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty);
286 void wb_needed_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr);
287 void load_all_regs(signed char i_regmap[]);
288 void load_needed_regs(signed char i_regmap[],signed char next_regmap[]);
289 void load_regs_entry(int t);
290 void load_all_consts(signed char regmap[],int is32,u_int dirty,int i);
291
292 int tracedebug=0;
293
294 //#define DEBUG_CYCLE_COUNT 1
295
296 #define NO_CYCLE_PENALTY_THR 12
297
298 int cycle_multiplier; // 100 for 1.0
299
300 static int CLOCK_ADJUST(int x)
301 {
302   int s=(x>>31)|1;
303   return (x * cycle_multiplier + s * 50) / 100;
304 }
305
306 static void tlb_hacks()
307 {
308 #ifndef DISABLE_TLB
309   // Goldeneye hack
310   if (strncmp((char *) ROM_HEADER->nom, "GOLDENEYE",9) == 0)
311   {
312     u_int addr;
313     int n;
314     switch (ROM_HEADER->Country_code&0xFF) 
315     {
316       case 0x45: // U
317         addr=0x34b30;
318         break;                   
319       case 0x4A: // J 
320         addr=0x34b70;    
321         break;    
322       case 0x50: // E 
323         addr=0x329f0;
324         break;                        
325       default: 
326         // Unknown country code
327         addr=0;
328         break;
329     }
330     u_int rom_addr=(u_int)rom;
331     #ifdef ROM_COPY
332     // Since memory_map is 32-bit, on 64-bit systems the rom needs to be
333     // in the lower 4G of memory to use this hack.  Copy it if necessary.
334     if((void *)rom>(void *)0xffffffff) {
335       munmap(ROM_COPY, 67108864);
336       if(mmap(ROM_COPY, 12582912,
337               PROT_READ | PROT_WRITE,
338               MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
339               -1, 0) <= 0) {printf("mmap() failed\n");}
340       memcpy(ROM_COPY,rom,12582912);
341       rom_addr=(u_int)ROM_COPY;
342     }
343     #endif
344     if(addr) {
345       for(n=0x7F000;n<0x80000;n++) {
346         memory_map[n]=(((u_int)(rom_addr+addr-0x7F000000))>>2)|0x40000000;
347       }
348     }
349   }
350 #endif
351 }
352
353 static u_int get_page(u_int vaddr)
354 {
355 #ifndef PCSX
356   u_int page=(vaddr^0x80000000)>>12;
357 #else
358   u_int page=vaddr&~0xe0000000;
359   if (page < 0x1000000)
360     page &= ~0x0e00000; // RAM mirrors
361   page>>=12;
362 #endif
363 #ifndef DISABLE_TLB
364   if(page>262143&&tlb_LUT_r[vaddr>>12]) page=(tlb_LUT_r[vaddr>>12]^0x80000000)>>12;
365 #endif
366   if(page>2048) page=2048+(page&2047);
367   return page;
368 }
369
370 #ifndef PCSX
371 static u_int get_vpage(u_int vaddr)
372 {
373   u_int vpage=(vaddr^0x80000000)>>12;
374 #ifndef DISABLE_TLB
375   if(vpage>262143&&tlb_LUT_r[vaddr>>12]) vpage&=2047; // jump_dirty uses a hash of the virtual address instead
376 #endif
377   if(vpage>2048) vpage=2048+(vpage&2047);
378   return vpage;
379 }
380 #else
381 // no virtual mem in PCSX
382 static u_int get_vpage(u_int vaddr)
383 {
384   return get_page(vaddr);
385 }
386 #endif
387
388 // Get address from virtual address
389 // This is called from the recompiled JR/JALR instructions
390 void *get_addr(u_int vaddr)
391 {
392   u_int page=get_page(vaddr);
393   u_int vpage=get_vpage(vaddr);
394   struct ll_entry *head;
395   //printf("TRACE: count=%d next=%d (get_addr %x,page %d)\n",Count,next_interupt,vaddr,page);
396   head=jump_in[page];
397   while(head!=NULL) {
398     if(head->vaddr==vaddr) {
399   //printf("TRACE: count=%d next=%d (get_addr match %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr);
400       int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
401       ht_bin[3]=ht_bin[1];
402       ht_bin[2]=ht_bin[0];
403       ht_bin[1]=(int)head->addr;
404       ht_bin[0]=vaddr;
405       return head->addr;
406     }
407     head=head->next;
408   }
409   head=jump_dirty[vpage];
410   while(head!=NULL) {
411     if(head->vaddr==vaddr) {
412       //printf("TRACE: count=%d next=%d (get_addr match dirty %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr);
413       // Don't restore blocks which are about to expire from the cache
414       if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
415       if(verify_dirty(head->addr)) {
416         //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]);
417         invalid_code[vaddr>>12]=0;
418         inv_code_start=inv_code_end=~0;
419 #ifndef DISABLE_TLB
420         memory_map[vaddr>>12]|=0x40000000;
421 #endif
422         if(vpage<2048) {
423 #ifndef DISABLE_TLB
424           if(tlb_LUT_r[vaddr>>12]) {
425             invalid_code[tlb_LUT_r[vaddr>>12]>>12]=0;
426             memory_map[tlb_LUT_r[vaddr>>12]>>12]|=0x40000000;
427           }
428 #endif
429           restore_candidate[vpage>>3]|=1<<(vpage&7);
430         }
431         else restore_candidate[page>>3]|=1<<(page&7);
432         int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
433         if(ht_bin[0]==vaddr) {
434           ht_bin[1]=(int)head->addr; // Replace existing entry
435         }
436         else
437         {
438           ht_bin[3]=ht_bin[1];
439           ht_bin[2]=ht_bin[0];
440           ht_bin[1]=(int)head->addr;
441           ht_bin[0]=vaddr;
442         }
443         return head->addr;
444       }
445     }
446     head=head->next;
447   }
448   //printf("TRACE: count=%d next=%d (get_addr no-match %x)\n",Count,next_interupt,vaddr);
449   int r=new_recompile_block(vaddr);
450   if(r==0) return get_addr(vaddr);
451   // Execute in unmapped page, generate pagefault execption
452   Status|=2;
453   Cause=(vaddr<<31)|0x8;
454   EPC=(vaddr&1)?vaddr-5:vaddr;
455   BadVAddr=(vaddr&~1);
456   Context=(Context&0xFF80000F)|((BadVAddr>>9)&0x007FFFF0);
457   EntryHi=BadVAddr&0xFFFFE000;
458   return get_addr_ht(0x80000000);
459 }
460 // Look up address in hash table first
461 void *get_addr_ht(u_int vaddr)
462 {
463   //printf("TRACE: count=%d next=%d (get_addr_ht %x)\n",Count,next_interupt,vaddr);
464   int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
465   if(ht_bin[0]==vaddr) return (void *)ht_bin[1];
466   if(ht_bin[2]==vaddr) return (void *)ht_bin[3];
467   return get_addr(vaddr);
468 }
469
470 void clear_all_regs(signed char regmap[])
471 {
472   int hr;
473   for (hr=0;hr<HOST_REGS;hr++) regmap[hr]=-1;
474 }
475
476 signed char get_reg(signed char regmap[],int r)
477 {
478   int hr;
479   for (hr=0;hr<HOST_REGS;hr++) if(hr!=EXCLUDE_REG&&regmap[hr]==r) return hr;
480   return -1;
481 }
482
483 // Find a register that is available for two consecutive cycles
484 signed char get_reg2(signed char regmap1[],signed char regmap2[],int r)
485 {
486   int hr;
487   for (hr=0;hr<HOST_REGS;hr++) if(hr!=EXCLUDE_REG&&regmap1[hr]==r&&regmap2[hr]==r) return hr;
488   return -1;
489 }
490
491 int count_free_regs(signed char regmap[])
492 {
493   int count=0;
494   int hr;
495   for(hr=0;hr<HOST_REGS;hr++)
496   {
497     if(hr!=EXCLUDE_REG) {
498       if(regmap[hr]<0) count++;
499     }
500   }
501   return count;
502 }
503
504 void dirty_reg(struct regstat *cur,signed char reg)
505 {
506   int hr;
507   if(!reg) return;
508   for (hr=0;hr<HOST_REGS;hr++) {
509     if((cur->regmap[hr]&63)==reg) {
510       cur->dirty|=1<<hr;
511     }
512   }
513 }
514
515 // If we dirty the lower half of a 64 bit register which is now being
516 // sign-extended, we need to dump the upper half.
517 // Note: Do this only after completion of the instruction, because
518 // some instructions may need to read the full 64-bit value even if
519 // overwriting it (eg SLTI, DSRA32).
520 static void flush_dirty_uppers(struct regstat *cur)
521 {
522   int hr,reg;
523   for (hr=0;hr<HOST_REGS;hr++) {
524     if((cur->dirty>>hr)&1) {
525       reg=cur->regmap[hr];
526       if(reg>=64) 
527         if((cur->is32>>(reg&63))&1) cur->regmap[hr]=-1;
528     }
529   }
530 }
531
532 void set_const(struct regstat *cur,signed char reg,uint64_t value)
533 {
534   int hr;
535   if(!reg) return;
536   for (hr=0;hr<HOST_REGS;hr++) {
537     if(cur->regmap[hr]==reg) {
538       cur->isconst|=1<<hr;
539       current_constmap[hr]=value;
540     }
541     else if((cur->regmap[hr]^64)==reg) {
542       cur->isconst|=1<<hr;
543       current_constmap[hr]=value>>32;
544     }
545   }
546 }
547
548 void clear_const(struct regstat *cur,signed char reg)
549 {
550   int hr;
551   if(!reg) return;
552   for (hr=0;hr<HOST_REGS;hr++) {
553     if((cur->regmap[hr]&63)==reg) {
554       cur->isconst&=~(1<<hr);
555     }
556   }
557 }
558
559 int is_const(struct regstat *cur,signed char reg)
560 {
561   int hr;
562   if(reg<0) return 0;
563   if(!reg) return 1;
564   for (hr=0;hr<HOST_REGS;hr++) {
565     if((cur->regmap[hr]&63)==reg) {
566       return (cur->isconst>>hr)&1;
567     }
568   }
569   return 0;
570 }
571 uint64_t get_const(struct regstat *cur,signed char reg)
572 {
573   int hr;
574   if(!reg) return 0;
575   for (hr=0;hr<HOST_REGS;hr++) {
576     if(cur->regmap[hr]==reg) {
577       return current_constmap[hr];
578     }
579   }
580   SysPrintf("Unknown constant in r%d\n",reg);
581   exit(1);
582 }
583
584 // Least soon needed registers
585 // Look at the next ten instructions and see which registers
586 // will be used.  Try not to reallocate these.
587 void lsn(u_char hsn[], int i, int *preferred_reg)
588 {
589   int j;
590   int b=-1;
591   for(j=0;j<9;j++)
592   {
593     if(i+j>=slen) {
594       j=slen-i-1;
595       break;
596     }
597     if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
598     {
599       // Don't go past an unconditonal jump
600       j++;
601       break;
602     }
603   }
604   for(;j>=0;j--)
605   {
606     if(rs1[i+j]) hsn[rs1[i+j]]=j;
607     if(rs2[i+j]) hsn[rs2[i+j]]=j;
608     if(rt1[i+j]) hsn[rt1[i+j]]=j;
609     if(rt2[i+j]) hsn[rt2[i+j]]=j;
610     if(itype[i+j]==STORE || itype[i+j]==STORELR) {
611       // Stores can allocate zero
612       hsn[rs1[i+j]]=j;
613       hsn[rs2[i+j]]=j;
614     }
615     // On some architectures stores need invc_ptr
616     #if defined(HOST_IMM8)
617     if(itype[i+j]==STORE || itype[i+j]==STORELR || (opcode[i+j]&0x3b)==0x39 || (opcode[i+j]&0x3b)==0x3a) {
618       hsn[INVCP]=j;
619     }
620     #endif
621     if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP||itype[i+j]==FJUMP))
622     {
623       hsn[CCREG]=j;
624       b=j;
625     }
626   }
627   if(b>=0)
628   {
629     if(ba[i+b]>=start && ba[i+b]<(start+slen*4))
630     {
631       // Follow first branch
632       int t=(ba[i+b]-start)>>2;
633       j=7-b;if(t+j>=slen) j=slen-t-1;
634       for(;j>=0;j--)
635       {
636         if(rs1[t+j]) if(hsn[rs1[t+j]]>j+b+2) hsn[rs1[t+j]]=j+b+2;
637         if(rs2[t+j]) if(hsn[rs2[t+j]]>j+b+2) hsn[rs2[t+j]]=j+b+2;
638         //if(rt1[t+j]) if(hsn[rt1[t+j]]>j+b+2) hsn[rt1[t+j]]=j+b+2;
639         //if(rt2[t+j]) if(hsn[rt2[t+j]]>j+b+2) hsn[rt2[t+j]]=j+b+2;
640       }
641     }
642     // TODO: preferred register based on backward branch
643   }
644   // Delay slot should preferably not overwrite branch conditions or cycle count
645   if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP)) {
646     if(rs1[i-1]) if(hsn[rs1[i-1]]>1) hsn[rs1[i-1]]=1;
647     if(rs2[i-1]) if(hsn[rs2[i-1]]>1) hsn[rs2[i-1]]=1;
648     hsn[CCREG]=1;
649     // ...or hash tables
650     hsn[RHASH]=1;
651     hsn[RHTBL]=1;
652   }
653   // Coprocessor load/store needs FTEMP, even if not declared
654   if(itype[i]==C1LS||itype[i]==C2LS) {
655     hsn[FTEMP]=0;
656   }
657   // Load L/R also uses FTEMP as a temporary register
658   if(itype[i]==LOADLR) {
659     hsn[FTEMP]=0;
660   }
661   // Also SWL/SWR/SDL/SDR
662   if(opcode[i]==0x2a||opcode[i]==0x2e||opcode[i]==0x2c||opcode[i]==0x2d) {
663     hsn[FTEMP]=0;
664   }
665   // Don't remove the TLB registers either
666   if(itype[i]==LOAD || itype[i]==LOADLR || itype[i]==STORE || itype[i]==STORELR || itype[i]==C1LS || itype[i]==C2LS) {
667     hsn[TLREG]=0;
668   }
669   // Don't remove the miniht registers
670   if(itype[i]==UJUMP||itype[i]==RJUMP)
671   {
672     hsn[RHASH]=0;
673     hsn[RHTBL]=0;
674   }
675 }
676
677 // We only want to allocate registers if we're going to use them again soon
678 int needed_again(int r, int i)
679 {
680   int j;
681   int b=-1;
682   int rn=10;
683   
684   if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000))
685   {
686     if(ba[i-1]<start || ba[i-1]>start+slen*4-4)
687       return 0; // Don't need any registers if exiting the block
688   }
689   for(j=0;j<9;j++)
690   {
691     if(i+j>=slen) {
692       j=slen-i-1;
693       break;
694     }
695     if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
696     {
697       // Don't go past an unconditonal jump
698       j++;
699       break;
700     }
701     if(itype[i+j]==SYSCALL||itype[i+j]==HLECALL||itype[i+j]==INTCALL||((source[i+j]&0xfc00003f)==0x0d))
702     {
703       break;
704     }
705   }
706   for(;j>=1;j--)
707   {
708     if(rs1[i+j]==r) rn=j;
709     if(rs2[i+j]==r) rn=j;
710     if((unneeded_reg[i+j]>>r)&1) rn=10;
711     if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP||itype[i+j]==FJUMP))
712     {
713       b=j;
714     }
715   }
716   /*
717   if(b>=0)
718   {
719     if(ba[i+b]>=start && ba[i+b]<(start+slen*4))
720     {
721       // Follow first branch
722       int o=rn;
723       int t=(ba[i+b]-start)>>2;
724       j=7-b;if(t+j>=slen) j=slen-t-1;
725       for(;j>=0;j--)
726       {
727         if(!((unneeded_reg[t+j]>>r)&1)) {
728           if(rs1[t+j]==r) if(rn>j+b+2) rn=j+b+2;
729           if(rs2[t+j]==r) if(rn>j+b+2) rn=j+b+2;
730         }
731         else rn=o;
732       }
733     }
734   }*/
735   if(rn<10) return 1;
736   return 0;
737 }
738
739 // Try to match register allocations at the end of a loop with those
740 // at the beginning
741 int loop_reg(int i, int r, int hr)
742 {
743   int j,k;
744   for(j=0;j<9;j++)
745   {
746     if(i+j>=slen) {
747       j=slen-i-1;
748       break;
749     }
750     if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
751     {
752       // Don't go past an unconditonal jump
753       j++;
754       break;
755     }
756   }
757   k=0;
758   if(i>0){
759     if(itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP)
760       k--;
761   }
762   for(;k<j;k++)
763   {
764     if(r<64&&((unneeded_reg[i+k]>>r)&1)) return hr;
765     if(r>64&&((unneeded_reg_upper[i+k]>>r)&1)) return hr;
766     if(i+k>=0&&(itype[i+k]==UJUMP||itype[i+k]==CJUMP||itype[i+k]==SJUMP||itype[i+k]==FJUMP))
767     {
768       if(ba[i+k]>=start && ba[i+k]<(start+i*4))
769       {
770         int t=(ba[i+k]-start)>>2;
771         int reg=get_reg(regs[t].regmap_entry,r);
772         if(reg>=0) return reg;
773         //reg=get_reg(regs[t+1].regmap_entry,r);
774         //if(reg>=0) return reg;
775       }
776     }
777   }
778   return hr;
779 }
780
781
782 // Allocate every register, preserving source/target regs
783 void alloc_all(struct regstat *cur,int i)
784 {
785   int hr;
786   
787   for(hr=0;hr<HOST_REGS;hr++) {
788     if(hr!=EXCLUDE_REG) {
789       if(((cur->regmap[hr]&63)!=rs1[i])&&((cur->regmap[hr]&63)!=rs2[i])&&
790          ((cur->regmap[hr]&63)!=rt1[i])&&((cur->regmap[hr]&63)!=rt2[i]))
791       {
792         cur->regmap[hr]=-1;
793         cur->dirty&=~(1<<hr);
794       }
795       // Don't need zeros
796       if((cur->regmap[hr]&63)==0)
797       {
798         cur->regmap[hr]=-1;
799         cur->dirty&=~(1<<hr);
800       }
801     }
802   }
803 }
804
805 #ifndef FORCE32
806 void div64(int64_t dividend,int64_t divisor)
807 {
808   lo=dividend/divisor;
809   hi=dividend%divisor;
810   //printf("TRACE: ddiv %8x%8x %8x%8x\n" ,(int)reg[HIREG],(int)(reg[HIREG]>>32)
811   //                                     ,(int)reg[LOREG],(int)(reg[LOREG]>>32));
812 }
813 void divu64(uint64_t dividend,uint64_t divisor)
814 {
815   lo=dividend/divisor;
816   hi=dividend%divisor;
817   //printf("TRACE: ddivu %8x%8x %8x%8x\n",(int)reg[HIREG],(int)(reg[HIREG]>>32)
818   //                                     ,(int)reg[LOREG],(int)(reg[LOREG]>>32));
819 }
820
821 void mult64(uint64_t m1,uint64_t m2)
822 {
823    unsigned long long int op1, op2, op3, op4;
824    unsigned long long int result1, result2, result3, result4;
825    unsigned long long int temp1, temp2, temp3, temp4;
826    int sign = 0;
827    
828    if (m1 < 0)
829      {
830     op2 = -m1;
831     sign = 1 - sign;
832      }
833    else op2 = m1;
834    if (m2 < 0)
835      {
836     op4 = -m2;
837     sign = 1 - sign;
838      }
839    else op4 = m2;
840    
841    op1 = op2 & 0xFFFFFFFF;
842    op2 = (op2 >> 32) & 0xFFFFFFFF;
843    op3 = op4 & 0xFFFFFFFF;
844    op4 = (op4 >> 32) & 0xFFFFFFFF;
845    
846    temp1 = op1 * op3;
847    temp2 = (temp1 >> 32) + op1 * op4;
848    temp3 = op2 * op3;
849    temp4 = (temp3 >> 32) + op2 * op4;
850    
851    result1 = temp1 & 0xFFFFFFFF;
852    result2 = temp2 + (temp3 & 0xFFFFFFFF);
853    result3 = (result2 >> 32) + temp4;
854    result4 = (result3 >> 32);
855    
856    lo = result1 | (result2 << 32);
857    hi = (result3 & 0xFFFFFFFF) | (result4 << 32);
858    if (sign)
859      {
860     hi = ~hi;
861     if (!lo) hi++;
862     else lo = ~lo + 1;
863      }
864 }
865
866 void multu64(uint64_t m1,uint64_t m2)
867 {
868    unsigned long long int op1, op2, op3, op4;
869    unsigned long long int result1, result2, result3, result4;
870    unsigned long long int temp1, temp2, temp3, temp4;
871    
872    op1 = m1 & 0xFFFFFFFF;
873    op2 = (m1 >> 32) & 0xFFFFFFFF;
874    op3 = m2 & 0xFFFFFFFF;
875    op4 = (m2 >> 32) & 0xFFFFFFFF;
876    
877    temp1 = op1 * op3;
878    temp2 = (temp1 >> 32) + op1 * op4;
879    temp3 = op2 * op3;
880    temp4 = (temp3 >> 32) + op2 * op4;
881    
882    result1 = temp1 & 0xFFFFFFFF;
883    result2 = temp2 + (temp3 & 0xFFFFFFFF);
884    result3 = (result2 >> 32) + temp4;
885    result4 = (result3 >> 32);
886    
887    lo = result1 | (result2 << 32);
888    hi = (result3 & 0xFFFFFFFF) | (result4 << 32);
889    
890   //printf("TRACE: dmultu %8x%8x %8x%8x\n",(int)reg[HIREG],(int)(reg[HIREG]>>32)
891   //                                      ,(int)reg[LOREG],(int)(reg[LOREG]>>32));
892 }
893
894 uint64_t ldl_merge(uint64_t original,uint64_t loaded,u_int bits)
895 {
896   if(bits) {
897     original<<=64-bits;
898     original>>=64-bits;
899     loaded<<=bits;
900     original|=loaded;
901   }
902   else original=loaded;
903   return original;
904 }
905 uint64_t ldr_merge(uint64_t original,uint64_t loaded,u_int bits)
906 {
907   if(bits^56) {
908     original>>=64-(bits^56);
909     original<<=64-(bits^56);
910     loaded>>=bits^56;
911     original|=loaded;
912   }
913   else original=loaded;
914   return original;
915 }
916 #endif
917
918 #ifdef __i386__
919 #include "assem_x86.c"
920 #endif
921 #ifdef __x86_64__
922 #include "assem_x64.c"
923 #endif
924 #ifdef __arm__
925 #include "assem_arm.c"
926 #endif
927
928 // Add virtual address mapping to linked list
929 void ll_add(struct ll_entry **head,int vaddr,void *addr)
930 {
931   struct ll_entry *new_entry;
932   new_entry=malloc(sizeof(struct ll_entry));
933   assert(new_entry!=NULL);
934   new_entry->vaddr=vaddr;
935   new_entry->reg_sv_flags=0;
936   new_entry->addr=addr;
937   new_entry->next=*head;
938   *head=new_entry;
939 }
940
941 void ll_add_flags(struct ll_entry **head,int vaddr,u_int reg_sv_flags,void *addr)
942 {
943   ll_add(head,vaddr,addr);
944   (*head)->reg_sv_flags=reg_sv_flags;
945 }
946
947 // Check if an address is already compiled
948 // but don't return addresses which are about to expire from the cache
949 void *check_addr(u_int vaddr)
950 {
951   u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
952   if(ht_bin[0]==vaddr) {
953     if(((ht_bin[1]-MAX_OUTPUT_BLOCK_SIZE-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
954       if(isclean(ht_bin[1])) return (void *)ht_bin[1];
955   }
956   if(ht_bin[2]==vaddr) {
957     if(((ht_bin[3]-MAX_OUTPUT_BLOCK_SIZE-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
958       if(isclean(ht_bin[3])) return (void *)ht_bin[3];
959   }
960   u_int page=get_page(vaddr);
961   struct ll_entry *head;
962   head=jump_in[page];
963   while(head!=NULL) {
964     if(head->vaddr==vaddr) {
965       if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) {
966         // Update existing entry with current address
967         if(ht_bin[0]==vaddr) {
968           ht_bin[1]=(int)head->addr;
969           return head->addr;
970         }
971         if(ht_bin[2]==vaddr) {
972           ht_bin[3]=(int)head->addr;
973           return head->addr;
974         }
975         // Insert into hash table with low priority.
976         // Don't evict existing entries, as they are probably
977         // addresses that are being accessed frequently.
978         if(ht_bin[0]==-1) {
979           ht_bin[1]=(int)head->addr;
980           ht_bin[0]=vaddr;
981         }else if(ht_bin[2]==-1) {
982           ht_bin[3]=(int)head->addr;
983           ht_bin[2]=vaddr;
984         }
985         return head->addr;
986       }
987     }
988     head=head->next;
989   }
990   return 0;
991 }
992
993 void remove_hash(int vaddr)
994 {
995   //printf("remove hash: %x\n",vaddr);
996   int *ht_bin=hash_table[(((vaddr)>>16)^vaddr)&0xFFFF];
997   if(ht_bin[2]==vaddr) {
998     ht_bin[2]=ht_bin[3]=-1;
999   }
1000   if(ht_bin[0]==vaddr) {
1001     ht_bin[0]=ht_bin[2];
1002     ht_bin[1]=ht_bin[3];
1003     ht_bin[2]=ht_bin[3]=-1;
1004   }
1005 }
1006
1007 void ll_remove_matching_addrs(struct ll_entry **head,int addr,int shift)
1008 {
1009   struct ll_entry *next;
1010   while(*head) {
1011     if(((u_int)((*head)->addr)>>shift)==(addr>>shift) || 
1012        ((u_int)((*head)->addr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift))
1013     {
1014       inv_debug("EXP: Remove pointer to %x (%x)\n",(int)(*head)->addr,(*head)->vaddr);
1015       remove_hash((*head)->vaddr);
1016       next=(*head)->next;
1017       free(*head);
1018       *head=next;
1019     }
1020     else
1021     {
1022       head=&((*head)->next);
1023     }
1024   }
1025 }
1026
1027 // Remove all entries from linked list
1028 void ll_clear(struct ll_entry **head)
1029 {
1030   struct ll_entry *cur;
1031   struct ll_entry *next;
1032   if(cur=*head) {
1033     *head=0;
1034     while(cur) {
1035       next=cur->next;
1036       free(cur);
1037       cur=next;
1038     }
1039   }
1040 }
1041
1042 // Dereference the pointers and remove if it matches
1043 void ll_kill_pointers(struct ll_entry *head,int addr,int shift)
1044 {
1045   while(head) {
1046     int ptr=get_pointer(head->addr);
1047     inv_debug("EXP: Lookup pointer to %x at %x (%x)\n",(int)ptr,(int)head->addr,head->vaddr);
1048     if(((ptr>>shift)==(addr>>shift)) ||
1049        (((ptr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift)))
1050     {
1051       inv_debug("EXP: Kill pointer at %x (%x)\n",(int)head->addr,head->vaddr);
1052       u_int host_addr=(u_int)kill_pointer(head->addr);
1053       #ifdef __arm__
1054         needs_clear_cache[(host_addr-(u_int)BASE_ADDR)>>17]|=1<<(((host_addr-(u_int)BASE_ADDR)>>12)&31);
1055       #endif
1056     }
1057     head=head->next;
1058   }
1059 }
1060
1061 // This is called when we write to a compiled block (see do_invstub)
1062 void invalidate_page(u_int page)
1063 {
1064   struct ll_entry *head;
1065   struct ll_entry *next;
1066   head=jump_in[page];
1067   jump_in[page]=0;
1068   while(head!=NULL) {
1069     inv_debug("INVALIDATE: %x\n",head->vaddr);
1070     remove_hash(head->vaddr);
1071     next=head->next;
1072     free(head);
1073     head=next;
1074   }
1075   head=jump_out[page];
1076   jump_out[page]=0;
1077   while(head!=NULL) {
1078     inv_debug("INVALIDATE: kill pointer to %x (%x)\n",head->vaddr,(int)head->addr);
1079     u_int host_addr=(u_int)kill_pointer(head->addr);
1080     #ifdef __arm__
1081       needs_clear_cache[(host_addr-(u_int)BASE_ADDR)>>17]|=1<<(((host_addr-(u_int)BASE_ADDR)>>12)&31);
1082     #endif
1083     next=head->next;
1084     free(head);
1085     head=next;
1086   }
1087 }
1088
1089 static void invalidate_block_range(u_int block, u_int first, u_int last)
1090 {
1091   u_int page=get_page(block<<12);
1092   //printf("first=%d last=%d\n",first,last);
1093   invalidate_page(page);
1094   assert(first+5>page); // NB: this assumes MAXBLOCK<=4096 (4 pages)
1095   assert(last<page+5);
1096   // Invalidate the adjacent pages if a block crosses a 4K boundary
1097   while(first<page) {
1098     invalidate_page(first);
1099     first++;
1100   }
1101   for(first=page+1;first<last;first++) {
1102     invalidate_page(first);
1103   }
1104   #ifdef __arm__
1105     do_clear_cache();
1106   #endif
1107   
1108   // Don't trap writes
1109   invalid_code[block]=1;
1110 #ifndef DISABLE_TLB
1111   // If there is a valid TLB entry for this page, remove write protect
1112   if(tlb_LUT_w[block]) {
1113     assert(tlb_LUT_r[block]==tlb_LUT_w[block]);
1114     // CHECK: Is this right?
1115     memory_map[block]=((tlb_LUT_w[block]&0xFFFFF000)-(block<<12)+(unsigned int)rdram-0x80000000)>>2;
1116     u_int real_block=tlb_LUT_w[block]>>12;
1117     invalid_code[real_block]=1;
1118     if(real_block>=0x80000&&real_block<0x80800) memory_map[real_block]=((u_int)rdram-0x80000000)>>2;
1119   }
1120   else if(block>=0x80000&&block<0x80800) memory_map[block]=((u_int)rdram-0x80000000)>>2;
1121 #endif
1122
1123   #ifdef USE_MINI_HT
1124   memset(mini_ht,-1,sizeof(mini_ht));
1125   #endif
1126 }
1127
1128 void invalidate_block(u_int block)
1129 {
1130   u_int page=get_page(block<<12);
1131   u_int vpage=get_vpage(block<<12);
1132   inv_debug("INVALIDATE: %x (%d)\n",block<<12,page);
1133   //inv_debug("invalid_code[block]=%d\n",invalid_code[block]);
1134   u_int first,last;
1135   first=last=page;
1136   struct ll_entry *head;
1137   head=jump_dirty[vpage];
1138   //printf("page=%d vpage=%d\n",page,vpage);
1139   while(head!=NULL) {
1140     u_int start,end;
1141     if(vpage>2047||(head->vaddr>>12)==block) { // Ignore vaddr hash collision
1142       get_bounds((int)head->addr,&start,&end);
1143       //printf("start: %x end: %x\n",start,end);
1144       if(page<2048&&start>=(u_int)rdram&&end<(u_int)rdram+RAM_SIZE) {
1145         if(((start-(u_int)rdram)>>12)<=page&&((end-1-(u_int)rdram)>>12)>=page) {
1146           if((((start-(u_int)rdram)>>12)&2047)<first) first=((start-(u_int)rdram)>>12)&2047;
1147           if((((end-1-(u_int)rdram)>>12)&2047)>last) last=((end-1-(u_int)rdram)>>12)&2047;
1148         }
1149       }
1150 #ifndef DISABLE_TLB
1151       if(page<2048&&(signed int)start>=(signed int)0xC0000000&&(signed int)end>=(signed int)0xC0000000) {
1152         if(((start+memory_map[start>>12]-(u_int)rdram)>>12)<=page&&((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)>=page) {
1153           if((((start+memory_map[start>>12]-(u_int)rdram)>>12)&2047)<first) first=((start+memory_map[start>>12]-(u_int)rdram)>>12)&2047;
1154           if((((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047)>last) last=((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047;
1155         }
1156       }
1157 #endif
1158     }
1159     head=head->next;
1160   }
1161   invalidate_block_range(block,first,last);
1162 }
1163
1164 void invalidate_addr(u_int addr)
1165 {
1166 #ifdef PCSX
1167   //static int rhits;
1168   // this check is done by the caller
1169   //if (inv_code_start<=addr&&addr<=inv_code_end) { rhits++; return; }
1170   u_int page=get_vpage(addr);
1171   if(page<2048) { // RAM
1172     struct ll_entry *head;
1173     u_int addr_min=~0, addr_max=0;
1174     u_int mask=RAM_SIZE-1;
1175     u_int addr_main=0x80000000|(addr&mask);
1176     int pg1;
1177     inv_code_start=addr_main&~0xfff;
1178     inv_code_end=addr_main|0xfff;
1179     pg1=page;
1180     if (pg1>0) {
1181       // must check previous page too because of spans..
1182       pg1--;
1183       inv_code_start-=0x1000;
1184     }
1185     for(;pg1<=page;pg1++) {
1186       for(head=jump_dirty[pg1];head!=NULL;head=head->next) {
1187         u_int start,end;
1188         get_bounds((int)head->addr,&start,&end);
1189         if(ram_offset) {
1190           start-=ram_offset;
1191           end-=ram_offset;
1192         }
1193         if(start<=addr_main&&addr_main<end) {
1194           if(start<addr_min) addr_min=start;
1195           if(end>addr_max) addr_max=end;
1196         }
1197         else if(addr_main<start) {
1198           if(start<inv_code_end)
1199             inv_code_end=start-1;
1200         }
1201         else {
1202           if(end>inv_code_start)
1203             inv_code_start=end;
1204         }
1205       }
1206     }
1207     if (addr_min!=~0) {
1208       inv_debug("INV ADDR: %08x hit %08x-%08x\n", addr, addr_min, addr_max);
1209       inv_code_start=inv_code_end=~0;
1210       invalidate_block_range(addr>>12,(addr_min&mask)>>12,(addr_max&mask)>>12);
1211       return;
1212     }
1213     else {
1214       inv_code_start=(addr&~mask)|(inv_code_start&mask);
1215       inv_code_end=(addr&~mask)|(inv_code_end&mask);
1216       inv_debug("INV ADDR: %08x miss, inv %08x-%08x, sk %d\n", addr, inv_code_start, inv_code_end, 0);
1217       return;
1218     }
1219   }
1220 #endif
1221   invalidate_block(addr>>12);
1222 }
1223
1224 // This is called when loading a save state.
1225 // Anything could have changed, so invalidate everything.
1226 void invalidate_all_pages()
1227 {
1228   u_int page,n;
1229   for(page=0;page<4096;page++)
1230     invalidate_page(page);
1231   for(page=0;page<1048576;page++)
1232     if(!invalid_code[page]) {
1233       restore_candidate[(page&2047)>>3]|=1<<(page&7);
1234       restore_candidate[((page&2047)>>3)+256]|=1<<(page&7);
1235     }
1236   #ifdef __arm__
1237   __clear_cache((void *)BASE_ADDR,(void *)BASE_ADDR+(1<<TARGET_SIZE_2));
1238   #endif
1239   #ifdef USE_MINI_HT
1240   memset(mini_ht,-1,sizeof(mini_ht));
1241   #endif
1242   #ifndef DISABLE_TLB
1243   // TLB
1244   for(page=0;page<0x100000;page++) {
1245     if(tlb_LUT_r[page]) {
1246       memory_map[page]=((tlb_LUT_r[page]&0xFFFFF000)-(page<<12)+(unsigned int)rdram-0x80000000)>>2;
1247       if(!tlb_LUT_w[page]||!invalid_code[page])
1248         memory_map[page]|=0x40000000; // Write protect
1249     }
1250     else memory_map[page]=-1;
1251     if(page==0x80000) page=0xC0000;
1252   }
1253   tlb_hacks();
1254   #endif
1255 }
1256
1257 // Add an entry to jump_out after making a link
1258 void add_link(u_int vaddr,void *src)
1259 {
1260   u_int page=get_page(vaddr);
1261   inv_debug("add_link: %x -> %x (%d)\n",(int)src,vaddr,page);
1262   int *ptr=(int *)(src+4);
1263   assert((*ptr&0x0fff0000)==0x059f0000);
1264   ll_add(jump_out+page,vaddr,src);
1265   //int ptr=get_pointer(src);
1266   //inv_debug("add_link: Pointer is to %x\n",(int)ptr);
1267 }
1268
1269 // If a code block was found to be unmodified (bit was set in
1270 // restore_candidate) and it remains unmodified (bit is clear
1271 // in invalid_code) then move the entries for that 4K page from
1272 // the dirty list to the clean list.
1273 void clean_blocks(u_int page)
1274 {
1275   struct ll_entry *head;
1276   inv_debug("INV: clean_blocks page=%d\n",page);
1277   head=jump_dirty[page];
1278   while(head!=NULL) {
1279     if(!invalid_code[head->vaddr>>12]) {
1280       // Don't restore blocks which are about to expire from the cache
1281       if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) {
1282         u_int start,end;
1283         if(verify_dirty((int)head->addr)) {
1284           //printf("Possibly Restore %x (%x)\n",head->vaddr, (int)head->addr);
1285           u_int i;
1286           u_int inv=0;
1287           get_bounds((int)head->addr,&start,&end);
1288           if(start-(u_int)rdram<RAM_SIZE) {
1289             for(i=(start-(u_int)rdram+0x80000000)>>12;i<=(end-1-(u_int)rdram+0x80000000)>>12;i++) {
1290               inv|=invalid_code[i];
1291             }
1292           }
1293 #ifndef DISABLE_TLB
1294           if((signed int)head->vaddr>=(signed int)0xC0000000) {
1295             u_int addr = (head->vaddr+(memory_map[head->vaddr>>12]<<2));
1296             //printf("addr=%x start=%x end=%x\n",addr,start,end);
1297             if(addr<start||addr>=end) inv=1;
1298           }
1299 #endif
1300           else if((signed int)head->vaddr>=(signed int)0x80000000+RAM_SIZE) {
1301             inv=1;
1302           }
1303           if(!inv) {
1304             void * clean_addr=(void *)get_clean_addr((int)head->addr);
1305             if((((u_int)clean_addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) {
1306               u_int ppage=page;
1307 #ifndef DISABLE_TLB
1308               if(page<2048&&tlb_LUT_r[head->vaddr>>12]) ppage=(tlb_LUT_r[head->vaddr>>12]^0x80000000)>>12;
1309 #endif
1310               inv_debug("INV: Restored %x (%x/%x)\n",head->vaddr, (int)head->addr, (int)clean_addr);
1311               //printf("page=%x, addr=%x\n",page,head->vaddr);
1312               //assert(head->vaddr>>12==(page|0x80000));
1313               ll_add_flags(jump_in+ppage,head->vaddr,head->reg_sv_flags,clean_addr);
1314               int *ht_bin=hash_table[((head->vaddr>>16)^head->vaddr)&0xFFFF];
1315               if(ht_bin[0]==head->vaddr) {
1316                 ht_bin[1]=(int)clean_addr; // Replace existing entry
1317               }
1318               if(ht_bin[2]==head->vaddr) {
1319                 ht_bin[3]=(int)clean_addr; // Replace existing entry
1320               }
1321             }
1322           }
1323         }
1324       }
1325     }
1326     head=head->next;
1327   }
1328 }
1329
1330
1331 void mov_alloc(struct regstat *current,int i)
1332 {
1333   // Note: Don't need to actually alloc the source registers
1334   if((~current->is32>>rs1[i])&1) {
1335     //alloc_reg64(current,i,rs1[i]);
1336     alloc_reg64(current,i,rt1[i]);
1337     current->is32&=~(1LL<<rt1[i]);
1338   } else {
1339     //alloc_reg(current,i,rs1[i]);
1340     alloc_reg(current,i,rt1[i]);
1341     current->is32|=(1LL<<rt1[i]);
1342   }
1343   clear_const(current,rs1[i]);
1344   clear_const(current,rt1[i]);
1345   dirty_reg(current,rt1[i]);
1346 }
1347
1348 void shiftimm_alloc(struct regstat *current,int i)
1349 {
1350   if(opcode2[i]<=0x3) // SLL/SRL/SRA
1351   {
1352     if(rt1[i]) {
1353       if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1354       else lt1[i]=rs1[i];
1355       alloc_reg(current,i,rt1[i]);
1356       current->is32|=1LL<<rt1[i];
1357       dirty_reg(current,rt1[i]);
1358       if(is_const(current,rs1[i])) {
1359         int v=get_const(current,rs1[i]);
1360         if(opcode2[i]==0x00) set_const(current,rt1[i],v<<imm[i]);
1361         if(opcode2[i]==0x02) set_const(current,rt1[i],(u_int)v>>imm[i]);
1362         if(opcode2[i]==0x03) set_const(current,rt1[i],v>>imm[i]);
1363       }
1364       else clear_const(current,rt1[i]);
1365     }
1366   }
1367   else
1368   {
1369     clear_const(current,rs1[i]);
1370     clear_const(current,rt1[i]);
1371   }
1372
1373   if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA
1374   {
1375     if(rt1[i]) {
1376       if(rs1[i]) alloc_reg64(current,i,rs1[i]);
1377       alloc_reg64(current,i,rt1[i]);
1378       current->is32&=~(1LL<<rt1[i]);
1379       dirty_reg(current,rt1[i]);
1380     }
1381   }
1382   if(opcode2[i]==0x3c) // DSLL32
1383   {
1384     if(rt1[i]) {
1385       if(rs1[i]) alloc_reg(current,i,rs1[i]);
1386       alloc_reg64(current,i,rt1[i]);
1387       current->is32&=~(1LL<<rt1[i]);
1388       dirty_reg(current,rt1[i]);
1389     }
1390   }
1391   if(opcode2[i]==0x3e) // DSRL32
1392   {
1393     if(rt1[i]) {
1394       alloc_reg64(current,i,rs1[i]);
1395       if(imm[i]==32) {
1396         alloc_reg64(current,i,rt1[i]);
1397         current->is32&=~(1LL<<rt1[i]);
1398       } else {
1399         alloc_reg(current,i,rt1[i]);
1400         current->is32|=1LL<<rt1[i];
1401       }
1402       dirty_reg(current,rt1[i]);
1403     }
1404   }
1405   if(opcode2[i]==0x3f) // DSRA32
1406   {
1407     if(rt1[i]) {
1408       alloc_reg64(current,i,rs1[i]);
1409       alloc_reg(current,i,rt1[i]);
1410       current->is32|=1LL<<rt1[i];
1411       dirty_reg(current,rt1[i]);
1412     }
1413   }
1414 }
1415
1416 void shift_alloc(struct regstat *current,int i)
1417 {
1418   if(rt1[i]) {
1419     if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
1420     {
1421       if(rs1[i]) alloc_reg(current,i,rs1[i]);
1422       if(rs2[i]) alloc_reg(current,i,rs2[i]);
1423       alloc_reg(current,i,rt1[i]);
1424       if(rt1[i]==rs2[i]) {
1425         alloc_reg_temp(current,i,-1);
1426         minimum_free_regs[i]=1;
1427       }
1428       current->is32|=1LL<<rt1[i];
1429     } else { // DSLLV/DSRLV/DSRAV
1430       if(rs1[i]) alloc_reg64(current,i,rs1[i]);
1431       if(rs2[i]) alloc_reg(current,i,rs2[i]);
1432       alloc_reg64(current,i,rt1[i]);
1433       current->is32&=~(1LL<<rt1[i]);
1434       if(opcode2[i]==0x16||opcode2[i]==0x17) // DSRLV and DSRAV need a temporary register
1435       {
1436         alloc_reg_temp(current,i,-1);
1437         minimum_free_regs[i]=1;
1438       }
1439     }
1440     clear_const(current,rs1[i]);
1441     clear_const(current,rs2[i]);
1442     clear_const(current,rt1[i]);
1443     dirty_reg(current,rt1[i]);
1444   }
1445 }
1446
1447 void alu_alloc(struct regstat *current,int i)
1448 {
1449   if(opcode2[i]>=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU
1450     if(rt1[i]) {
1451       if(rs1[i]&&rs2[i]) {
1452         alloc_reg(current,i,rs1[i]);
1453         alloc_reg(current,i,rs2[i]);
1454       }
1455       else {
1456         if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1457         if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg(current,i,rs2[i]);
1458       }
1459       alloc_reg(current,i,rt1[i]);
1460     }
1461     current->is32|=1LL<<rt1[i];
1462   }
1463   if(opcode2[i]==0x2a||opcode2[i]==0x2b) { // SLT/SLTU
1464     if(rt1[i]) {
1465       if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1))
1466       {
1467         alloc_reg64(current,i,rs1[i]);
1468         alloc_reg64(current,i,rs2[i]);
1469         alloc_reg(current,i,rt1[i]);
1470       } else {
1471         alloc_reg(current,i,rs1[i]);
1472         alloc_reg(current,i,rs2[i]);
1473         alloc_reg(current,i,rt1[i]);
1474       }
1475     }
1476     current->is32|=1LL<<rt1[i];
1477   }
1478   if(opcode2[i]>=0x24&&opcode2[i]<=0x27) { // AND/OR/XOR/NOR
1479     if(rt1[i]) {
1480       if(rs1[i]&&rs2[i]) {
1481         alloc_reg(current,i,rs1[i]);
1482         alloc_reg(current,i,rs2[i]);
1483       }
1484       else
1485       {
1486         if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1487         if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg(current,i,rs2[i]);
1488       }
1489       alloc_reg(current,i,rt1[i]);
1490       if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1))
1491       {
1492         if(!((current->uu>>rt1[i])&1)) {
1493           alloc_reg64(current,i,rt1[i]);
1494         }
1495         if(get_reg(current->regmap,rt1[i]|64)>=0) {
1496           if(rs1[i]&&rs2[i]) {
1497             alloc_reg64(current,i,rs1[i]);
1498             alloc_reg64(current,i,rs2[i]);
1499           }
1500           else
1501           {
1502             // Is is really worth it to keep 64-bit values in registers?
1503             #ifdef NATIVE_64BIT
1504             if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg64(current,i,rs1[i]);
1505             if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg64(current,i,rs2[i]);
1506             #endif
1507           }
1508         }
1509         current->is32&=~(1LL<<rt1[i]);
1510       } else {
1511         current->is32|=1LL<<rt1[i];
1512       }
1513     }
1514   }
1515   if(opcode2[i]>=0x2c&&opcode2[i]<=0x2f) { // DADD/DADDU/DSUB/DSUBU
1516     if(rt1[i]) {
1517       if(rs1[i]&&rs2[i]) {
1518         if(!((current->uu>>rt1[i])&1)||get_reg(current->regmap,rt1[i]|64)>=0) {
1519           alloc_reg64(current,i,rs1[i]);
1520           alloc_reg64(current,i,rs2[i]);
1521           alloc_reg64(current,i,rt1[i]);
1522         } else {
1523           alloc_reg(current,i,rs1[i]);
1524           alloc_reg(current,i,rs2[i]);
1525           alloc_reg(current,i,rt1[i]);
1526         }
1527       }
1528       else {
1529         alloc_reg(current,i,rt1[i]);
1530         if(!((current->uu>>rt1[i])&1)||get_reg(current->regmap,rt1[i]|64)>=0) {
1531           // DADD used as move, or zeroing
1532           // If we have a 64-bit source, then make the target 64 bits too
1533           if(rs1[i]&&!((current->is32>>rs1[i])&1)) {
1534             if(get_reg(current->regmap,rs1[i])>=0) alloc_reg64(current,i,rs1[i]);
1535             alloc_reg64(current,i,rt1[i]);
1536           } else if(rs2[i]&&!((current->is32>>rs2[i])&1)) {
1537             if(get_reg(current->regmap,rs2[i])>=0) alloc_reg64(current,i,rs2[i]);
1538             alloc_reg64(current,i,rt1[i]);
1539           }
1540           if(opcode2[i]>=0x2e&&rs2[i]) {
1541             // DSUB used as negation - 64-bit result
1542             // If we have a 32-bit register, extend it to 64 bits
1543             if(get_reg(current->regmap,rs2[i])>=0) alloc_reg64(current,i,rs2[i]);
1544             alloc_reg64(current,i,rt1[i]);
1545           }
1546         }
1547       }
1548       if(rs1[i]&&rs2[i]) {
1549         current->is32&=~(1LL<<rt1[i]);
1550       } else if(rs1[i]) {
1551         current->is32&=~(1LL<<rt1[i]);
1552         if((current->is32>>rs1[i])&1)
1553           current->is32|=1LL<<rt1[i];
1554       } else if(rs2[i]) {
1555         current->is32&=~(1LL<<rt1[i]);
1556         if((current->is32>>rs2[i])&1)
1557           current->is32|=1LL<<rt1[i];
1558       } else {
1559         current->is32|=1LL<<rt1[i];
1560       }
1561     }
1562   }
1563   clear_const(current,rs1[i]);
1564   clear_const(current,rs2[i]);
1565   clear_const(current,rt1[i]);
1566   dirty_reg(current,rt1[i]);
1567 }
1568
1569 void imm16_alloc(struct regstat *current,int i)
1570 {
1571   if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1572   else lt1[i]=rs1[i];
1573   if(rt1[i]) alloc_reg(current,i,rt1[i]);
1574   if(opcode[i]==0x18||opcode[i]==0x19) { // DADDI/DADDIU
1575     current->is32&=~(1LL<<rt1[i]);
1576     if(!((current->uu>>rt1[i])&1)||get_reg(current->regmap,rt1[i]|64)>=0) {
1577       // TODO: Could preserve the 32-bit flag if the immediate is zero
1578       alloc_reg64(current,i,rt1[i]);
1579       alloc_reg64(current,i,rs1[i]);
1580     }
1581     clear_const(current,rs1[i]);
1582     clear_const(current,rt1[i]);
1583   }
1584   else if(opcode[i]==0x0a||opcode[i]==0x0b) { // SLTI/SLTIU
1585     if((~current->is32>>rs1[i])&1) alloc_reg64(current,i,rs1[i]);
1586     current->is32|=1LL<<rt1[i];
1587     clear_const(current,rs1[i]);
1588     clear_const(current,rt1[i]);
1589   }
1590   else if(opcode[i]>=0x0c&&opcode[i]<=0x0e) { // ANDI/ORI/XORI
1591     if(((~current->is32>>rs1[i])&1)&&opcode[i]>0x0c) {
1592       if(rs1[i]!=rt1[i]) {
1593         if(needed_again(rs1[i],i)) alloc_reg64(current,i,rs1[i]);
1594         alloc_reg64(current,i,rt1[i]);
1595         current->is32&=~(1LL<<rt1[i]);
1596       }
1597     }
1598     else current->is32|=1LL<<rt1[i]; // ANDI clears upper bits
1599     if(is_const(current,rs1[i])) {
1600       int v=get_const(current,rs1[i]);
1601       if(opcode[i]==0x0c) set_const(current,rt1[i],v&imm[i]);
1602       if(opcode[i]==0x0d) set_const(current,rt1[i],v|imm[i]);
1603       if(opcode[i]==0x0e) set_const(current,rt1[i],v^imm[i]);
1604     }
1605     else clear_const(current,rt1[i]);
1606   }
1607   else if(opcode[i]==0x08||opcode[i]==0x09) { // ADDI/ADDIU
1608     if(is_const(current,rs1[i])) {
1609       int v=get_const(current,rs1[i]);
1610       set_const(current,rt1[i],v+imm[i]);
1611     }
1612     else clear_const(current,rt1[i]);
1613     current->is32|=1LL<<rt1[i];
1614   }
1615   else {
1616     set_const(current,rt1[i],((long long)((short)imm[i]))<<16); // LUI
1617     current->is32|=1LL<<rt1[i];
1618   }
1619   dirty_reg(current,rt1[i]);
1620 }
1621
1622 void load_alloc(struct regstat *current,int i)
1623 {
1624   clear_const(current,rt1[i]);
1625   //if(rs1[i]!=rt1[i]&&needed_again(rs1[i],i)) clear_const(current,rs1[i]); // Does this help or hurt?
1626   if(!rs1[i]) current->u&=~1LL; // Allow allocating r0 if it's the source register
1627   if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1628   if(rt1[i]&&!((current->u>>rt1[i])&1)) {
1629     alloc_reg(current,i,rt1[i]);
1630     assert(get_reg(current->regmap,rt1[i])>=0);
1631     if(opcode[i]==0x27||opcode[i]==0x37) // LWU/LD
1632     {
1633       current->is32&=~(1LL<<rt1[i]);
1634       alloc_reg64(current,i,rt1[i]);
1635     }
1636     else if(opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR
1637     {
1638       current->is32&=~(1LL<<rt1[i]);
1639       alloc_reg64(current,i,rt1[i]);
1640       alloc_all(current,i);
1641       alloc_reg64(current,i,FTEMP);
1642       minimum_free_regs[i]=HOST_REGS;
1643     }
1644     else current->is32|=1LL<<rt1[i];
1645     dirty_reg(current,rt1[i]);
1646     // If using TLB, need a register for pointer to the mapping table
1647     if(using_tlb) alloc_reg(current,i,TLREG);
1648     // LWL/LWR need a temporary register for the old value
1649     if(opcode[i]==0x22||opcode[i]==0x26)
1650     {
1651       alloc_reg(current,i,FTEMP);
1652       alloc_reg_temp(current,i,-1);
1653       minimum_free_regs[i]=1;
1654     }
1655   }
1656   else
1657   {
1658     // Load to r0 or unneeded register (dummy load)
1659     // but we still need a register to calculate the address
1660     if(opcode[i]==0x22||opcode[i]==0x26)
1661     {
1662       alloc_reg(current,i,FTEMP); // LWL/LWR need another temporary
1663     }
1664     // If using TLB, need a register for pointer to the mapping table
1665     if(using_tlb) alloc_reg(current,i,TLREG);
1666     alloc_reg_temp(current,i,-1);
1667     minimum_free_regs[i]=1;
1668     if(opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR
1669     {
1670       alloc_all(current,i);
1671       alloc_reg64(current,i,FTEMP);
1672       minimum_free_regs[i]=HOST_REGS;
1673     }
1674   }
1675 }
1676
1677 void store_alloc(struct regstat *current,int i)
1678 {
1679   clear_const(current,rs2[i]);
1680   if(!(rs2[i])) current->u&=~1LL; // Allow allocating r0 if necessary
1681   if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1682   alloc_reg(current,i,rs2[i]);
1683   if(opcode[i]==0x2c||opcode[i]==0x2d||opcode[i]==0x3f) { // 64-bit SDL/SDR/SD
1684     alloc_reg64(current,i,rs2[i]);
1685     if(rs2[i]) alloc_reg(current,i,FTEMP);
1686   }
1687   // If using TLB, need a register for pointer to the mapping table
1688   if(using_tlb) alloc_reg(current,i,TLREG);
1689   #if defined(HOST_IMM8)
1690   // On CPUs without 32-bit immediates we need a pointer to invalid_code
1691   else alloc_reg(current,i,INVCP);
1692   #endif
1693   if(opcode[i]==0x2a||opcode[i]==0x2e||opcode[i]==0x2c||opcode[i]==0x2d) { // SWL/SWL/SDL/SDR
1694     alloc_reg(current,i,FTEMP);
1695   }
1696   // We need a temporary register for address generation
1697   alloc_reg_temp(current,i,-1);
1698   minimum_free_regs[i]=1;
1699 }
1700
1701 void c1ls_alloc(struct regstat *current,int i)
1702 {
1703   //clear_const(current,rs1[i]); // FIXME
1704   clear_const(current,rt1[i]);
1705   if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1706   alloc_reg(current,i,CSREG); // Status
1707   alloc_reg(current,i,FTEMP);
1708   if(opcode[i]==0x35||opcode[i]==0x3d) { // 64-bit LDC1/SDC1
1709     alloc_reg64(current,i,FTEMP);
1710   }
1711   // If using TLB, need a register for pointer to the mapping table
1712   if(using_tlb) alloc_reg(current,i,TLREG);
1713   #if defined(HOST_IMM8)
1714   // On CPUs without 32-bit immediates we need a pointer to invalid_code
1715   else if((opcode[i]&0x3b)==0x39) // SWC1/SDC1
1716     alloc_reg(current,i,INVCP);
1717   #endif
1718   // We need a temporary register for address generation
1719   alloc_reg_temp(current,i,-1);
1720 }
1721
1722 void c2ls_alloc(struct regstat *current,int i)
1723 {
1724   clear_const(current,rt1[i]);
1725   if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1726   alloc_reg(current,i,FTEMP);
1727   // If using TLB, need a register for pointer to the mapping table
1728   if(using_tlb) alloc_reg(current,i,TLREG);
1729   #if defined(HOST_IMM8)
1730   // On CPUs without 32-bit immediates we need a pointer to invalid_code
1731   else if((opcode[i]&0x3b)==0x3a) // SWC2/SDC2
1732     alloc_reg(current,i,INVCP);
1733   #endif
1734   // We need a temporary register for address generation
1735   alloc_reg_temp(current,i,-1);
1736   minimum_free_regs[i]=1;
1737 }
1738
1739 #ifndef multdiv_alloc
1740 void multdiv_alloc(struct regstat *current,int i)
1741 {
1742   //  case 0x18: MULT
1743   //  case 0x19: MULTU
1744   //  case 0x1A: DIV
1745   //  case 0x1B: DIVU
1746   //  case 0x1C: DMULT
1747   //  case 0x1D: DMULTU
1748   //  case 0x1E: DDIV
1749   //  case 0x1F: DDIVU
1750   clear_const(current,rs1[i]);
1751   clear_const(current,rs2[i]);
1752   if(rs1[i]&&rs2[i])
1753   {
1754     if((opcode2[i]&4)==0) // 32-bit
1755     {
1756       current->u&=~(1LL<<HIREG);
1757       current->u&=~(1LL<<LOREG);
1758       alloc_reg(current,i,HIREG);
1759       alloc_reg(current,i,LOREG);
1760       alloc_reg(current,i,rs1[i]);
1761       alloc_reg(current,i,rs2[i]);
1762       current->is32|=1LL<<HIREG;
1763       current->is32|=1LL<<LOREG;
1764       dirty_reg(current,HIREG);
1765       dirty_reg(current,LOREG);
1766     }
1767     else // 64-bit
1768     {
1769       current->u&=~(1LL<<HIREG);
1770       current->u&=~(1LL<<LOREG);
1771       current->uu&=~(1LL<<HIREG);
1772       current->uu&=~(1LL<<LOREG);
1773       alloc_reg64(current,i,HIREG);
1774       //if(HOST_REGS>10) alloc_reg64(current,i,LOREG);
1775       alloc_reg64(current,i,rs1[i]);
1776       alloc_reg64(current,i,rs2[i]);
1777       alloc_all(current,i);
1778       current->is32&=~(1LL<<HIREG);
1779       current->is32&=~(1LL<<LOREG);
1780       dirty_reg(current,HIREG);
1781       dirty_reg(current,LOREG);
1782       minimum_free_regs[i]=HOST_REGS;
1783     }
1784   }
1785   else
1786   {
1787     // Multiply by zero is zero.
1788     // MIPS does not have a divide by zero exception.
1789     // The result is undefined, we return zero.
1790     alloc_reg(current,i,HIREG);
1791     alloc_reg(current,i,LOREG);
1792     current->is32|=1LL<<HIREG;
1793     current->is32|=1LL<<LOREG;
1794     dirty_reg(current,HIREG);
1795     dirty_reg(current,LOREG);
1796   }
1797 }
1798 #endif
1799
1800 void cop0_alloc(struct regstat *current,int i)
1801 {
1802   if(opcode2[i]==0) // MFC0
1803   {
1804     if(rt1[i]) {
1805       clear_const(current,rt1[i]);
1806       alloc_all(current,i);
1807       alloc_reg(current,i,rt1[i]);
1808       current->is32|=1LL<<rt1[i];
1809       dirty_reg(current,rt1[i]);
1810     }
1811   }
1812   else if(opcode2[i]==4) // MTC0
1813   {
1814     if(rs1[i]){
1815       clear_const(current,rs1[i]);
1816       alloc_reg(current,i,rs1[i]);
1817       alloc_all(current,i);
1818     }
1819     else {
1820       alloc_all(current,i); // FIXME: Keep r0
1821       current->u&=~1LL;
1822       alloc_reg(current,i,0);
1823     }
1824   }
1825   else
1826   {
1827     // TLBR/TLBWI/TLBWR/TLBP/ERET
1828     assert(opcode2[i]==0x10);
1829     alloc_all(current,i);
1830   }
1831   minimum_free_regs[i]=HOST_REGS;
1832 }
1833
1834 void cop1_alloc(struct regstat *current,int i)
1835 {
1836   alloc_reg(current,i,CSREG); // Load status
1837   if(opcode2[i]<3) // MFC1/DMFC1/CFC1
1838   {
1839     if(rt1[i]){
1840       clear_const(current,rt1[i]);
1841       if(opcode2[i]==1) {
1842         alloc_reg64(current,i,rt1[i]); // DMFC1
1843         current->is32&=~(1LL<<rt1[i]);
1844       }else{
1845         alloc_reg(current,i,rt1[i]); // MFC1/CFC1
1846         current->is32|=1LL<<rt1[i];
1847       }
1848       dirty_reg(current,rt1[i]);
1849     }
1850     alloc_reg_temp(current,i,-1);
1851   }
1852   else if(opcode2[i]>3) // MTC1/DMTC1/CTC1
1853   {
1854     if(rs1[i]){
1855       clear_const(current,rs1[i]);
1856       if(opcode2[i]==5)
1857         alloc_reg64(current,i,rs1[i]); // DMTC1
1858       else
1859         alloc_reg(current,i,rs1[i]); // MTC1/CTC1
1860       alloc_reg_temp(current,i,-1);
1861     }
1862     else {
1863       current->u&=~1LL;
1864       alloc_reg(current,i,0);
1865       alloc_reg_temp(current,i,-1);
1866     }
1867   }
1868   minimum_free_regs[i]=1;
1869 }
1870 void fconv_alloc(struct regstat *current,int i)
1871 {
1872   alloc_reg(current,i,CSREG); // Load status
1873   alloc_reg_temp(current,i,-1);
1874   minimum_free_regs[i]=1;
1875 }
1876 void float_alloc(struct regstat *current,int i)
1877 {
1878   alloc_reg(current,i,CSREG); // Load status
1879   alloc_reg_temp(current,i,-1);
1880   minimum_free_regs[i]=1;
1881 }
1882 void c2op_alloc(struct regstat *current,int i)
1883 {
1884   alloc_reg_temp(current,i,-1);
1885 }
1886 void fcomp_alloc(struct regstat *current,int i)
1887 {
1888   alloc_reg(current,i,CSREG); // Load status
1889   alloc_reg(current,i,FSREG); // Load flags
1890   dirty_reg(current,FSREG); // Flag will be modified
1891   alloc_reg_temp(current,i,-1);
1892   minimum_free_regs[i]=1;
1893 }
1894
1895 void syscall_alloc(struct regstat *current,int i)
1896 {
1897   alloc_cc(current,i);
1898   dirty_reg(current,CCREG);
1899   alloc_all(current,i);
1900   minimum_free_regs[i]=HOST_REGS;
1901   current->isconst=0;
1902 }
1903
1904 void delayslot_alloc(struct regstat *current,int i)
1905 {
1906   switch(itype[i]) {
1907     case UJUMP:
1908     case CJUMP:
1909     case SJUMP:
1910     case RJUMP:
1911     case FJUMP:
1912     case SYSCALL:
1913     case HLECALL:
1914     case SPAN:
1915       assem_debug("jump in the delay slot.  this shouldn't happen.\n");//exit(1);
1916       SysPrintf("Disabled speculative precompilation\n");
1917       stop_after_jal=1;
1918       break;
1919     case IMM16:
1920       imm16_alloc(current,i);
1921       break;
1922     case LOAD:
1923     case LOADLR:
1924       load_alloc(current,i);
1925       break;
1926     case STORE:
1927     case STORELR:
1928       store_alloc(current,i);
1929       break;
1930     case ALU:
1931       alu_alloc(current,i);
1932       break;
1933     case SHIFT:
1934       shift_alloc(current,i);
1935       break;
1936     case MULTDIV:
1937       multdiv_alloc(current,i);
1938       break;
1939     case SHIFTIMM:
1940       shiftimm_alloc(current,i);
1941       break;
1942     case MOV:
1943       mov_alloc(current,i);
1944       break;
1945     case COP0:
1946       cop0_alloc(current,i);
1947       break;
1948     case COP1:
1949     case COP2:
1950       cop1_alloc(current,i);
1951       break;
1952     case C1LS:
1953       c1ls_alloc(current,i);
1954       break;
1955     case C2LS:
1956       c2ls_alloc(current,i);
1957       break;
1958     case FCONV:
1959       fconv_alloc(current,i);
1960       break;
1961     case FLOAT:
1962       float_alloc(current,i);
1963       break;
1964     case FCOMP:
1965       fcomp_alloc(current,i);
1966       break;
1967     case C2OP:
1968       c2op_alloc(current,i);
1969       break;
1970   }
1971 }
1972
1973 // Special case where a branch and delay slot span two pages in virtual memory
1974 static void pagespan_alloc(struct regstat *current,int i)
1975 {
1976   current->isconst=0;
1977   current->wasconst=0;
1978   regs[i].wasconst=0;
1979   minimum_free_regs[i]=HOST_REGS;
1980   alloc_all(current,i);
1981   alloc_cc(current,i);
1982   dirty_reg(current,CCREG);
1983   if(opcode[i]==3) // JAL
1984   {
1985     alloc_reg(current,i,31);
1986     dirty_reg(current,31);
1987   }
1988   if(opcode[i]==0&&(opcode2[i]&0x3E)==8) // JR/JALR
1989   {
1990     alloc_reg(current,i,rs1[i]);
1991     if (rt1[i]!=0) {
1992       alloc_reg(current,i,rt1[i]);
1993       dirty_reg(current,rt1[i]);
1994     }
1995   }
1996   if((opcode[i]&0x2E)==4) // BEQ/BNE/BEQL/BNEL
1997   {
1998     if(rs1[i]) alloc_reg(current,i,rs1[i]);
1999     if(rs2[i]) alloc_reg(current,i,rs2[i]);
2000     if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1))
2001     {
2002       if(rs1[i]) alloc_reg64(current,i,rs1[i]);
2003       if(rs2[i]) alloc_reg64(current,i,rs2[i]);
2004     }
2005   }
2006   else
2007   if((opcode[i]&0x2E)==6) // BLEZ/BGTZ/BLEZL/BGTZL
2008   {
2009     if(rs1[i]) alloc_reg(current,i,rs1[i]);
2010     if(!((current->is32>>rs1[i])&1))
2011     {
2012       if(rs1[i]) alloc_reg64(current,i,rs1[i]);
2013     }
2014   }
2015   else
2016   if(opcode[i]==0x11) // BC1
2017   {
2018     alloc_reg(current,i,FSREG);
2019     alloc_reg(current,i,CSREG);
2020   }
2021   //else ...
2022 }
2023
2024 add_stub(int type,int addr,int retaddr,int a,int b,int c,int d,int e)
2025 {
2026   stubs[stubcount][0]=type;
2027   stubs[stubcount][1]=addr;
2028   stubs[stubcount][2]=retaddr;
2029   stubs[stubcount][3]=a;
2030   stubs[stubcount][4]=b;
2031   stubs[stubcount][5]=c;
2032   stubs[stubcount][6]=d;
2033   stubs[stubcount][7]=e;
2034   stubcount++;
2035 }
2036
2037 // Write out a single register
2038 void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32)
2039 {
2040   int hr;
2041   for(hr=0;hr<HOST_REGS;hr++) {
2042     if(hr!=EXCLUDE_REG) {
2043       if((regmap[hr]&63)==r) {
2044         if((dirty>>hr)&1) {
2045           if(regmap[hr]<64) {
2046             emit_storereg(r,hr);
2047 #ifndef FORCE32
2048             if((is32>>regmap[hr])&1) {
2049               emit_sarimm(hr,31,hr);
2050               emit_storereg(r|64,hr);
2051             }
2052 #endif
2053           }else{
2054             emit_storereg(r|64,hr);
2055           }
2056         }
2057       }
2058     }
2059   }
2060 }
2061
2062 int mchecksum()
2063 {
2064   //if(!tracedebug) return 0;
2065   int i;
2066   int sum=0;
2067   for(i=0;i<2097152;i++) {
2068     unsigned int temp=sum;
2069     sum<<=1;
2070     sum|=(~temp)>>31;
2071     sum^=((u_int *)rdram)[i];
2072   }
2073   return sum;
2074 }
2075 int rchecksum()
2076 {
2077   int i;
2078   int sum=0;
2079   for(i=0;i<64;i++)
2080     sum^=((u_int *)reg)[i];
2081   return sum;
2082 }
2083 void rlist()
2084 {
2085   int i;
2086   printf("TRACE: ");
2087   for(i=0;i<32;i++)
2088     printf("r%d:%8x%8x ",i,((int *)(reg+i))[1],((int *)(reg+i))[0]);
2089   printf("\n");
2090 #ifndef DISABLE_COP1
2091   printf("TRACE: ");
2092   for(i=0;i<32;i++)
2093     printf("f%d:%8x%8x ",i,((int*)reg_cop1_simple[i])[1],*((int*)reg_cop1_simple[i]));
2094   printf("\n");
2095 #endif
2096 }
2097
2098 void enabletrace()
2099 {
2100   tracedebug=1;
2101 }
2102
2103 void memdebug(int i)
2104 {
2105   //printf("TRACE: count=%d next=%d (checksum %x) lo=%8x%8x\n",Count,next_interupt,mchecksum(),(int)(reg[LOREG]>>32),(int)reg[LOREG]);
2106   //printf("TRACE: count=%d next=%d (rchecksum %x)\n",Count,next_interupt,rchecksum());
2107   //rlist();
2108   //if(tracedebug) {
2109   //if(Count>=-2084597794) {
2110   if((signed int)Count>=-2084597794&&(signed int)Count<0) {
2111   //if(0) {
2112     printf("TRACE: count=%d next=%d (checksum %x)\n",Count,next_interupt,mchecksum());
2113     //printf("TRACE: count=%d next=%d (checksum %x) Status=%x\n",Count,next_interupt,mchecksum(),Status);
2114     //printf("TRACE: count=%d next=%d (checksum %x) hi=%8x%8x\n",Count,next_interupt,mchecksum(),(int)(reg[HIREG]>>32),(int)reg[HIREG]);
2115     rlist();
2116     #ifdef __i386__
2117     printf("TRACE: %x\n",(&i)[-1]);
2118     #endif
2119     #ifdef __arm__
2120     int j;
2121     printf("TRACE: %x \n",(&j)[10]);
2122     printf("TRACE: %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x\n",(&j)[1],(&j)[2],(&j)[3],(&j)[4],(&j)[5],(&j)[6],(&j)[7],(&j)[8],(&j)[9],(&j)[10],(&j)[11],(&j)[12],(&j)[13],(&j)[14],(&j)[15],(&j)[16],(&j)[17],(&j)[18],(&j)[19],(&j)[20]);
2123     #endif
2124     //fflush(stdout);
2125   }
2126   //printf("TRACE: %x\n",(&i)[-1]);
2127 }
2128
2129 void tlb_debug(u_int cause, u_int addr, u_int iaddr)
2130 {
2131   printf("TLB Exception: instruction=%x addr=%x cause=%x\n",iaddr, addr, cause);
2132 }
2133
2134 void alu_assemble(int i,struct regstat *i_regs)
2135 {
2136   if(opcode2[i]>=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU
2137     if(rt1[i]) {
2138       signed char s1,s2,t;
2139       t=get_reg(i_regs->regmap,rt1[i]);
2140       if(t>=0) {
2141         s1=get_reg(i_regs->regmap,rs1[i]);
2142         s2=get_reg(i_regs->regmap,rs2[i]);
2143         if(rs1[i]&&rs2[i]) {
2144           assert(s1>=0);
2145           assert(s2>=0);
2146           if(opcode2[i]&2) emit_sub(s1,s2,t);
2147           else emit_add(s1,s2,t);
2148         }
2149         else if(rs1[i]) {
2150           if(s1>=0) emit_mov(s1,t);
2151           else emit_loadreg(rs1[i],t);
2152         }
2153         else if(rs2[i]) {
2154           if(s2>=0) {
2155             if(opcode2[i]&2) emit_neg(s2,t);
2156             else emit_mov(s2,t);
2157           }
2158           else {
2159             emit_loadreg(rs2[i],t);
2160             if(opcode2[i]&2) emit_neg(t,t);
2161           }
2162         }
2163         else emit_zeroreg(t);
2164       }
2165     }
2166   }
2167   if(opcode2[i]>=0x2c&&opcode2[i]<=0x2f) { // DADD/DADDU/DSUB/DSUBU
2168     if(rt1[i]) {
2169       signed char s1l,s2l,s1h,s2h,tl,th;
2170       tl=get_reg(i_regs->regmap,rt1[i]);
2171       th=get_reg(i_regs->regmap,rt1[i]|64);
2172       if(tl>=0) {
2173         s1l=get_reg(i_regs->regmap,rs1[i]);
2174         s2l=get_reg(i_regs->regmap,rs2[i]);
2175         s1h=get_reg(i_regs->regmap,rs1[i]|64);
2176         s2h=get_reg(i_regs->regmap,rs2[i]|64);
2177         if(rs1[i]&&rs2[i]) {
2178           assert(s1l>=0);
2179           assert(s2l>=0);
2180           if(opcode2[i]&2) emit_subs(s1l,s2l,tl);
2181           else emit_adds(s1l,s2l,tl);
2182           if(th>=0) {
2183             #ifdef INVERTED_CARRY
2184             if(opcode2[i]&2) {if(s1h!=th) emit_mov(s1h,th);emit_sbb(th,s2h);}
2185             #else
2186             if(opcode2[i]&2) emit_sbc(s1h,s2h,th);
2187             #endif
2188             else emit_add(s1h,s2h,th);
2189           }
2190         }
2191         else if(rs1[i]) {
2192           if(s1l>=0) emit_mov(s1l,tl);
2193           else emit_loadreg(rs1[i],tl);
2194           if(th>=0) {
2195             if(s1h>=0) emit_mov(s1h,th);
2196             else emit_loadreg(rs1[i]|64,th);
2197           }
2198         }
2199         else if(rs2[i]) {
2200           if(s2l>=0) {
2201             if(opcode2[i]&2) emit_negs(s2l,tl);
2202             else emit_mov(s2l,tl);
2203           }
2204           else {
2205             emit_loadreg(rs2[i],tl);
2206             if(opcode2[i]&2) emit_negs(tl,tl);
2207           }
2208           if(th>=0) {
2209             #ifdef INVERTED_CARRY
2210             if(s2h>=0) emit_mov(s2h,th);
2211             else emit_loadreg(rs2[i]|64,th);
2212             if(opcode2[i]&2) {
2213               emit_adcimm(-1,th); // x86 has inverted carry flag
2214               emit_not(th,th);
2215             }
2216             #else
2217             if(opcode2[i]&2) {
2218               if(s2h>=0) emit_rscimm(s2h,0,th);
2219               else {
2220                 emit_loadreg(rs2[i]|64,th);
2221                 emit_rscimm(th,0,th);
2222               }
2223             }else{
2224               if(s2h>=0) emit_mov(s2h,th);
2225               else emit_loadreg(rs2[i]|64,th);
2226             }
2227             #endif
2228           }
2229         }
2230         else {
2231           emit_zeroreg(tl);
2232           if(th>=0) emit_zeroreg(th);
2233         }
2234       }
2235     }
2236   }
2237   if(opcode2[i]==0x2a||opcode2[i]==0x2b) { // SLT/SLTU
2238     if(rt1[i]) {
2239       signed char s1l,s1h,s2l,s2h,t;
2240       if(!((i_regs->was32>>rs1[i])&(i_regs->was32>>rs2[i])&1))
2241       {
2242         t=get_reg(i_regs->regmap,rt1[i]);
2243         //assert(t>=0);
2244         if(t>=0) {
2245           s1l=get_reg(i_regs->regmap,rs1[i]);
2246           s1h=get_reg(i_regs->regmap,rs1[i]|64);
2247           s2l=get_reg(i_regs->regmap,rs2[i]);
2248           s2h=get_reg(i_regs->regmap,rs2[i]|64);
2249           if(rs2[i]==0) // rx<r0
2250           {
2251             assert(s1h>=0);
2252             if(opcode2[i]==0x2a) // SLT
2253               emit_shrimm(s1h,31,t);
2254             else // SLTU (unsigned can not be less than zero)
2255               emit_zeroreg(t);
2256           }
2257           else if(rs1[i]==0) // r0<rx
2258           {
2259             assert(s2h>=0);
2260             if(opcode2[i]==0x2a) // SLT
2261               emit_set_gz64_32(s2h,s2l,t);
2262             else // SLTU (set if not zero)
2263               emit_set_nz64_32(s2h,s2l,t);
2264           }
2265           else {
2266             assert(s1l>=0);assert(s1h>=0);
2267             assert(s2l>=0);assert(s2h>=0);
2268             if(opcode2[i]==0x2a) // SLT
2269               emit_set_if_less64_32(s1h,s1l,s2h,s2l,t);
2270             else // SLTU
2271               emit_set_if_carry64_32(s1h,s1l,s2h,s2l,t);
2272           }
2273         }
2274       } else {
2275         t=get_reg(i_regs->regmap,rt1[i]);
2276         //assert(t>=0);
2277         if(t>=0) {
2278           s1l=get_reg(i_regs->regmap,rs1[i]);
2279           s2l=get_reg(i_regs->regmap,rs2[i]);
2280           if(rs2[i]==0) // rx<r0
2281           {
2282             assert(s1l>=0);
2283             if(opcode2[i]==0x2a) // SLT
2284               emit_shrimm(s1l,31,t);
2285             else // SLTU (unsigned can not be less than zero)
2286               emit_zeroreg(t);
2287           }
2288           else if(rs1[i]==0) // r0<rx
2289           {
2290             assert(s2l>=0);
2291             if(opcode2[i]==0x2a) // SLT
2292               emit_set_gz32(s2l,t);
2293             else // SLTU (set if not zero)
2294               emit_set_nz32(s2l,t);
2295           }
2296           else{
2297             assert(s1l>=0);assert(s2l>=0);
2298             if(opcode2[i]==0x2a) // SLT
2299               emit_set_if_less32(s1l,s2l,t);
2300             else // SLTU
2301               emit_set_if_carry32(s1l,s2l,t);
2302           }
2303         }
2304       }
2305     }
2306   }
2307   if(opcode2[i]>=0x24&&opcode2[i]<=0x27) { // AND/OR/XOR/NOR
2308     if(rt1[i]) {
2309       signed char s1l,s1h,s2l,s2h,th,tl;
2310       tl=get_reg(i_regs->regmap,rt1[i]);
2311       th=get_reg(i_regs->regmap,rt1[i]|64);
2312       if(!((i_regs->was32>>rs1[i])&(i_regs->was32>>rs2[i])&1)&&th>=0)
2313       {
2314         assert(tl>=0);
2315         if(tl>=0) {
2316           s1l=get_reg(i_regs->regmap,rs1[i]);
2317           s1h=get_reg(i_regs->regmap,rs1[i]|64);
2318           s2l=get_reg(i_regs->regmap,rs2[i]);
2319           s2h=get_reg(i_regs->regmap,rs2[i]|64);
2320           if(rs1[i]&&rs2[i]) {
2321             assert(s1l>=0);assert(s1h>=0);
2322             assert(s2l>=0);assert(s2h>=0);
2323             if(opcode2[i]==0x24) { // AND
2324               emit_and(s1l,s2l,tl);
2325               emit_and(s1h,s2h,th);
2326             } else
2327             if(opcode2[i]==0x25) { // OR
2328               emit_or(s1l,s2l,tl);
2329               emit_or(s1h,s2h,th);
2330             } else
2331             if(opcode2[i]==0x26) { // XOR
2332               emit_xor(s1l,s2l,tl);
2333               emit_xor(s1h,s2h,th);
2334             } else
2335             if(opcode2[i]==0x27) { // NOR
2336               emit_or(s1l,s2l,tl);
2337               emit_or(s1h,s2h,th);
2338               emit_not(tl,tl);
2339               emit_not(th,th);
2340             }
2341           }
2342           else
2343           {
2344             if(opcode2[i]==0x24) { // AND
2345               emit_zeroreg(tl);
2346               emit_zeroreg(th);
2347             } else
2348             if(opcode2[i]==0x25||opcode2[i]==0x26) { // OR/XOR
2349               if(rs1[i]){
2350                 if(s1l>=0) emit_mov(s1l,tl);
2351                 else emit_loadreg(rs1[i],tl);
2352                 if(s1h>=0) emit_mov(s1h,th);
2353                 else emit_loadreg(rs1[i]|64,th);
2354               }
2355               else
2356               if(rs2[i]){
2357                 if(s2l>=0) emit_mov(s2l,tl);
2358                 else emit_loadreg(rs2[i],tl);
2359                 if(s2h>=0) emit_mov(s2h,th);
2360                 else emit_loadreg(rs2[i]|64,th);
2361               }
2362               else{
2363                 emit_zeroreg(tl);
2364                 emit_zeroreg(th);
2365               }
2366             } else
2367             if(opcode2[i]==0x27) { // NOR
2368               if(rs1[i]){
2369                 if(s1l>=0) emit_not(s1l,tl);
2370                 else{
2371                   emit_loadreg(rs1[i],tl);
2372                   emit_not(tl,tl);
2373                 }
2374                 if(s1h>=0) emit_not(s1h,th);
2375                 else{
2376                   emit_loadreg(rs1[i]|64,th);
2377                   emit_not(th,th);
2378                 }
2379               }
2380               else
2381               if(rs2[i]){
2382                 if(s2l>=0) emit_not(s2l,tl);
2383                 else{
2384                   emit_loadreg(rs2[i],tl);
2385                   emit_not(tl,tl);
2386                 }
2387                 if(s2h>=0) emit_not(s2h,th);
2388                 else{
2389                   emit_loadreg(rs2[i]|64,th);
2390                   emit_not(th,th);
2391                 }
2392               }
2393               else {
2394                 emit_movimm(-1,tl);
2395                 emit_movimm(-1,th);
2396               }
2397             }
2398           }
2399         }
2400       }
2401       else
2402       {
2403         // 32 bit
2404         if(tl>=0) {
2405           s1l=get_reg(i_regs->regmap,rs1[i]);
2406           s2l=get_reg(i_regs->regmap,rs2[i]);
2407           if(rs1[i]&&rs2[i]) {
2408             assert(s1l>=0);
2409             assert(s2l>=0);
2410             if(opcode2[i]==0x24) { // AND
2411               emit_and(s1l,s2l,tl);
2412             } else
2413             if(opcode2[i]==0x25) { // OR
2414               emit_or(s1l,s2l,tl);
2415             } else
2416             if(opcode2[i]==0x26) { // XOR
2417               emit_xor(s1l,s2l,tl);
2418             } else
2419             if(opcode2[i]==0x27) { // NOR
2420               emit_or(s1l,s2l,tl);
2421               emit_not(tl,tl);
2422             }
2423           }
2424           else
2425           {
2426             if(opcode2[i]==0x24) { // AND
2427               emit_zeroreg(tl);
2428             } else
2429             if(opcode2[i]==0x25||opcode2[i]==0x26) { // OR/XOR
2430               if(rs1[i]){
2431                 if(s1l>=0) emit_mov(s1l,tl);
2432                 else emit_loadreg(rs1[i],tl); // CHECK: regmap_entry?
2433               }
2434               else
2435               if(rs2[i]){
2436                 if(s2l>=0) emit_mov(s2l,tl);
2437                 else emit_loadreg(rs2[i],tl); // CHECK: regmap_entry?
2438               }
2439               else emit_zeroreg(tl);
2440             } else
2441             if(opcode2[i]==0x27) { // NOR
2442               if(rs1[i]){
2443                 if(s1l>=0) emit_not(s1l,tl);
2444                 else {
2445                   emit_loadreg(rs1[i],tl);
2446                   emit_not(tl,tl);
2447                 }
2448               }
2449               else
2450               if(rs2[i]){
2451                 if(s2l>=0) emit_not(s2l,tl);
2452                 else {
2453                   emit_loadreg(rs2[i],tl);
2454                   emit_not(tl,tl);
2455                 }
2456               }
2457               else emit_movimm(-1,tl);
2458             }
2459           }
2460         }
2461       }
2462     }
2463   }
2464 }
2465
2466 void imm16_assemble(int i,struct regstat *i_regs)
2467 {
2468   if (opcode[i]==0x0f) { // LUI
2469     if(rt1[i]) {
2470       signed char t;
2471       t=get_reg(i_regs->regmap,rt1[i]);
2472       //assert(t>=0);
2473       if(t>=0) {
2474         if(!((i_regs->isconst>>t)&1))
2475           emit_movimm(imm[i]<<16,t);
2476       }
2477     }
2478   }
2479   if(opcode[i]==0x08||opcode[i]==0x09) { // ADDI/ADDIU
2480     if(rt1[i]) {
2481       signed char s,t;
2482       t=get_reg(i_regs->regmap,rt1[i]);
2483       s=get_reg(i_regs->regmap,rs1[i]);
2484       if(rs1[i]) {
2485         //assert(t>=0);
2486         //assert(s>=0);
2487         if(t>=0) {
2488           if(!((i_regs->isconst>>t)&1)) {
2489             if(s<0) {
2490               if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2491               emit_addimm(t,imm[i],t);
2492             }else{
2493               if(!((i_regs->wasconst>>s)&1))
2494                 emit_addimm(s,imm[i],t);
2495               else
2496                 emit_movimm(constmap[i][s]+imm[i],t);
2497             }
2498           }
2499         }
2500       } else {
2501         if(t>=0) {
2502           if(!((i_regs->isconst>>t)&1))
2503             emit_movimm(imm[i],t);
2504         }
2505       }
2506     }
2507   }
2508   if(opcode[i]==0x18||opcode[i]==0x19) { // DADDI/DADDIU
2509     if(rt1[i]) {
2510       signed char sh,sl,th,tl;
2511       th=get_reg(i_regs->regmap,rt1[i]|64);
2512       tl=get_reg(i_regs->regmap,rt1[i]);
2513       sh=get_reg(i_regs->regmap,rs1[i]|64);
2514       sl=get_reg(i_regs->regmap,rs1[i]);
2515       if(tl>=0) {
2516         if(rs1[i]) {
2517           assert(sh>=0);
2518           assert(sl>=0);
2519           if(th>=0) {
2520             emit_addimm64_32(sh,sl,imm[i],th,tl);
2521           }
2522           else {
2523             emit_addimm(sl,imm[i],tl);
2524           }
2525         } else {
2526           emit_movimm(imm[i],tl);
2527           if(th>=0) emit_movimm(((signed int)imm[i])>>31,th);
2528         }
2529       }
2530     }
2531   }
2532   else if(opcode[i]==0x0a||opcode[i]==0x0b) { // SLTI/SLTIU
2533     if(rt1[i]) {
2534       //assert(rs1[i]!=0); // r0 might be valid, but it's probably a bug
2535       signed char sh,sl,t;
2536       t=get_reg(i_regs->regmap,rt1[i]);
2537       sh=get_reg(i_regs->regmap,rs1[i]|64);
2538       sl=get_reg(i_regs->regmap,rs1[i]);
2539       //assert(t>=0);
2540       if(t>=0) {
2541         if(rs1[i]>0) {
2542           if(sh<0) assert((i_regs->was32>>rs1[i])&1);
2543           if(sh<0||((i_regs->was32>>rs1[i])&1)) {
2544             if(opcode[i]==0x0a) { // SLTI
2545               if(sl<0) {
2546                 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2547                 emit_slti32(t,imm[i],t);
2548               }else{
2549                 emit_slti32(sl,imm[i],t);
2550               }
2551             }
2552             else { // SLTIU
2553               if(sl<0) {
2554                 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2555                 emit_sltiu32(t,imm[i],t);
2556               }else{
2557                 emit_sltiu32(sl,imm[i],t);
2558               }
2559             }
2560           }else{ // 64-bit
2561             assert(sl>=0);
2562             if(opcode[i]==0x0a) // SLTI
2563               emit_slti64_32(sh,sl,imm[i],t);
2564             else // SLTIU
2565               emit_sltiu64_32(sh,sl,imm[i],t);
2566           }
2567         }else{
2568           // SLTI(U) with r0 is just stupid,
2569           // nonetheless examples can be found
2570           if(opcode[i]==0x0a) // SLTI
2571             if(0<imm[i]) emit_movimm(1,t);
2572             else emit_zeroreg(t);
2573           else // SLTIU
2574           {
2575             if(imm[i]) emit_movimm(1,t);
2576             else emit_zeroreg(t);
2577           }
2578         }
2579       }
2580     }
2581   }
2582   else if(opcode[i]>=0x0c&&opcode[i]<=0x0e) { // ANDI/ORI/XORI
2583     if(rt1[i]) {
2584       signed char sh,sl,th,tl;
2585       th=get_reg(i_regs->regmap,rt1[i]|64);
2586       tl=get_reg(i_regs->regmap,rt1[i]);
2587       sh=get_reg(i_regs->regmap,rs1[i]|64);
2588       sl=get_reg(i_regs->regmap,rs1[i]);
2589       if(tl>=0 && !((i_regs->isconst>>tl)&1)) {
2590         if(opcode[i]==0x0c) //ANDI
2591         {
2592           if(rs1[i]) {
2593             if(sl<0) {
2594               if(i_regs->regmap_entry[tl]!=rs1[i]) emit_loadreg(rs1[i],tl);
2595               emit_andimm(tl,imm[i],tl);
2596             }else{
2597               if(!((i_regs->wasconst>>sl)&1))
2598                 emit_andimm(sl,imm[i],tl);
2599               else
2600                 emit_movimm(constmap[i][sl]&imm[i],tl);
2601             }
2602           }
2603           else
2604             emit_zeroreg(tl);
2605           if(th>=0) emit_zeroreg(th);
2606         }
2607         else
2608         {
2609           if(rs1[i]) {
2610             if(sl<0) {
2611               if(i_regs->regmap_entry[tl]!=rs1[i]) emit_loadreg(rs1[i],tl);
2612             }
2613             if(th>=0) {
2614               if(sh<0) {
2615                 emit_loadreg(rs1[i]|64,th);
2616               }else{
2617                 emit_mov(sh,th);
2618               }
2619             }
2620             if(opcode[i]==0x0d) //ORI
2621             if(sl<0) {
2622               emit_orimm(tl,imm[i],tl);
2623             }else{
2624               if(!((i_regs->wasconst>>sl)&1))
2625                 emit_orimm(sl,imm[i],tl);
2626               else
2627                 emit_movimm(constmap[i][sl]|imm[i],tl);
2628             }
2629             if(opcode[i]==0x0e) //XORI
2630             if(sl<0) {
2631               emit_xorimm(tl,imm[i],tl);
2632             }else{
2633               if(!((i_regs->wasconst>>sl)&1))
2634                 emit_xorimm(sl,imm[i],tl);
2635               else
2636                 emit_movimm(constmap[i][sl]^imm[i],tl);
2637             }
2638           }
2639           else {
2640             emit_movimm(imm[i],tl);
2641             if(th>=0) emit_zeroreg(th);
2642           }
2643         }
2644       }
2645     }
2646   }
2647 }
2648
2649 void shiftimm_assemble(int i,struct regstat *i_regs)
2650 {
2651   if(opcode2[i]<=0x3) // SLL/SRL/SRA
2652   {
2653     if(rt1[i]) {
2654       signed char s,t;
2655       t=get_reg(i_regs->regmap,rt1[i]);
2656       s=get_reg(i_regs->regmap,rs1[i]);
2657       //assert(t>=0);
2658       if(t>=0&&!((i_regs->isconst>>t)&1)){
2659         if(rs1[i]==0)
2660         {
2661           emit_zeroreg(t);
2662         }
2663         else
2664         {
2665           if(s<0&&i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2666           if(imm[i]) {
2667             if(opcode2[i]==0) // SLL
2668             {
2669               emit_shlimm(s<0?t:s,imm[i],t);
2670             }
2671             if(opcode2[i]==2) // SRL
2672             {
2673               emit_shrimm(s<0?t:s,imm[i],t);
2674             }
2675             if(opcode2[i]==3) // SRA
2676             {
2677               emit_sarimm(s<0?t:s,imm[i],t);
2678             }
2679           }else{
2680             // Shift by zero
2681             if(s>=0 && s!=t) emit_mov(s,t);
2682           }
2683         }
2684       }
2685       //emit_storereg(rt1[i],t); //DEBUG
2686     }
2687   }
2688   if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA
2689   {
2690     if(rt1[i]) {
2691       signed char sh,sl,th,tl;
2692       th=get_reg(i_regs->regmap,rt1[i]|64);
2693       tl=get_reg(i_regs->regmap,rt1[i]);
2694       sh=get_reg(i_regs->regmap,rs1[i]|64);
2695       sl=get_reg(i_regs->regmap,rs1[i]);
2696       if(tl>=0) {
2697         if(rs1[i]==0)
2698         {
2699           emit_zeroreg(tl);
2700           if(th>=0) emit_zeroreg(th);
2701         }
2702         else
2703         {
2704           assert(sl>=0);
2705           assert(sh>=0);
2706           if(imm[i]) {
2707             if(opcode2[i]==0x38) // DSLL
2708             {
2709               if(th>=0) emit_shldimm(sh,sl,imm[i],th);
2710               emit_shlimm(sl,imm[i],tl);
2711             }
2712             if(opcode2[i]==0x3a) // DSRL
2713             {
2714               emit_shrdimm(sl,sh,imm[i],tl);
2715               if(th>=0) emit_shrimm(sh,imm[i],th);
2716             }
2717             if(opcode2[i]==0x3b) // DSRA
2718             {
2719               emit_shrdimm(sl,sh,imm[i],tl);
2720               if(th>=0) emit_sarimm(sh,imm[i],th);
2721             }
2722           }else{
2723             // Shift by zero
2724             if(sl!=tl) emit_mov(sl,tl);
2725             if(th>=0&&sh!=th) emit_mov(sh,th);
2726           }
2727         }
2728       }
2729     }
2730   }
2731   if(opcode2[i]==0x3c) // DSLL32
2732   {
2733     if(rt1[i]) {
2734       signed char sl,tl,th;
2735       tl=get_reg(i_regs->regmap,rt1[i]);
2736       th=get_reg(i_regs->regmap,rt1[i]|64);
2737       sl=get_reg(i_regs->regmap,rs1[i]);
2738       if(th>=0||tl>=0){
2739         assert(tl>=0);
2740         assert(th>=0);
2741         assert(sl>=0);
2742         emit_mov(sl,th);
2743         emit_zeroreg(tl);
2744         if(imm[i]>32)
2745         {
2746           emit_shlimm(th,imm[i]&31,th);
2747         }
2748       }
2749     }
2750   }
2751   if(opcode2[i]==0x3e) // DSRL32
2752   {
2753     if(rt1[i]) {
2754       signed char sh,tl,th;
2755       tl=get_reg(i_regs->regmap,rt1[i]);
2756       th=get_reg(i_regs->regmap,rt1[i]|64);
2757       sh=get_reg(i_regs->regmap,rs1[i]|64);
2758       if(tl>=0){
2759         assert(sh>=0);
2760         emit_mov(sh,tl);
2761         if(th>=0) emit_zeroreg(th);
2762         if(imm[i]>32)
2763         {
2764           emit_shrimm(tl,imm[i]&31,tl);
2765         }
2766       }
2767     }
2768   }
2769   if(opcode2[i]==0x3f) // DSRA32
2770   {
2771     if(rt1[i]) {
2772       signed char sh,tl;
2773       tl=get_reg(i_regs->regmap,rt1[i]);
2774       sh=get_reg(i_regs->regmap,rs1[i]|64);
2775       if(tl>=0){
2776         assert(sh>=0);
2777         emit_mov(sh,tl);
2778         if(imm[i]>32)
2779         {
2780           emit_sarimm(tl,imm[i]&31,tl);
2781         }
2782       }
2783     }
2784   }
2785 }
2786
2787 #ifndef shift_assemble
2788 void shift_assemble(int i,struct regstat *i_regs)
2789 {
2790   printf("Need shift_assemble for this architecture.\n");
2791   exit(1);
2792 }
2793 #endif
2794
2795 void load_assemble(int i,struct regstat *i_regs)
2796 {
2797   int s,th,tl,addr,map=-1;
2798   int offset;
2799   int jaddr=0;
2800   int memtarget=0,c=0;
2801   int fastload_reg_override=0;
2802   u_int hr,reglist=0;
2803   th=get_reg(i_regs->regmap,rt1[i]|64);
2804   tl=get_reg(i_regs->regmap,rt1[i]);
2805   s=get_reg(i_regs->regmap,rs1[i]);
2806   offset=imm[i];
2807   for(hr=0;hr<HOST_REGS;hr++) {
2808     if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2809   }
2810   if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
2811   if(s>=0) {
2812     c=(i_regs->wasconst>>s)&1;
2813     if (c) {
2814       memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
2815       if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
2816     }
2817   }
2818   //printf("load_assemble: c=%d\n",c);
2819   //if(c) printf("load_assemble: const=%x\n",(int)constmap[i][s]+offset);
2820   // FIXME: Even if the load is a NOP, we should check for pagefaults...
2821 #ifdef PCSX
2822   if(tl<0&&(!c||(((u_int)constmap[i][s]+offset)>>16)==0x1f80)
2823     ||rt1[i]==0) {
2824       // could be FIFO, must perform the read
2825       // ||dummy read
2826       assem_debug("(forced read)\n");
2827       tl=get_reg(i_regs->regmap,-1);
2828       assert(tl>=0);
2829   }
2830 #endif
2831   if(offset||s<0||c) addr=tl;
2832   else addr=s;
2833   //if(tl<0) tl=get_reg(i_regs->regmap,-1);
2834  if(tl>=0) {
2835   //printf("load_assemble: c=%d\n",c);
2836   //if(c) printf("load_assemble: const=%x\n",(int)constmap[i][s]+offset);
2837   assert(tl>=0); // Even if the load is a NOP, we must check for pagefaults and I/O
2838   reglist&=~(1<<tl);
2839   if(th>=0) reglist&=~(1<<th);
2840   if(!using_tlb) {
2841     if(!c) {
2842       #ifdef RAM_OFFSET
2843       map=get_reg(i_regs->regmap,ROREG);
2844       if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
2845       #endif
2846 //#define R29_HACK 1
2847       #ifdef R29_HACK
2848       // Strmnnrmn's speed hack
2849       if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
2850       #endif
2851       {
2852         jaddr=emit_fastpath_cmp_jump(i,addr,&fastload_reg_override);
2853       }
2854     }
2855     else if(ram_offset&&memtarget) {
2856       emit_addimm(addr,ram_offset,HOST_TEMPREG);
2857       fastload_reg_override=HOST_TEMPREG;
2858     }
2859   }else{ // using tlb
2860     int x=0;
2861     if (opcode[i]==0x20||opcode[i]==0x24) x=3; // LB/LBU
2862     if (opcode[i]==0x21||opcode[i]==0x25) x=2; // LH/LHU
2863     map=get_reg(i_regs->regmap,TLREG);
2864     assert(map>=0);
2865     reglist&=~(1<<map);
2866     map=do_tlb_r(addr,tl,map,x,-1,-1,c,constmap[i][s]+offset);
2867     do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
2868   }
2869   int dummy=(rt1[i]==0)||(tl!=get_reg(i_regs->regmap,rt1[i])); // ignore loads to r0 and unneeded reg
2870   if (opcode[i]==0x20) { // LB
2871     if(!c||memtarget) {
2872       if(!dummy) {
2873         #ifdef HOST_IMM_ADDR32
2874         if(c)
2875           emit_movsbl_tlb((constmap[i][s]+offset)^3,map,tl);
2876         else
2877         #endif
2878         {
2879           //emit_xorimm(addr,3,tl);
2880           //gen_tlb_addr_r(tl,map);
2881           //emit_movsbl_indexed((int)rdram-0x80000000,tl,tl);
2882           int x=0,a=tl;
2883 #ifdef BIG_ENDIAN_MIPS
2884           if(!c) emit_xorimm(addr,3,tl);
2885           else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
2886 #else
2887           if(!c) a=addr;
2888 #endif
2889           if(fastload_reg_override) a=fastload_reg_override;
2890
2891           emit_movsbl_indexed_tlb(x,a,map,tl);
2892         }
2893       }
2894       if(jaddr)
2895         add_stub(LOADB_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
2896     }
2897     else
2898       inline_readstub(LOADB_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2899   }
2900   if (opcode[i]==0x21) { // LH
2901     if(!c||memtarget) {
2902       if(!dummy) {
2903         #ifdef HOST_IMM_ADDR32
2904         if(c)
2905           emit_movswl_tlb((constmap[i][s]+offset)^2,map,tl);
2906         else
2907         #endif
2908         {
2909           int x=0,a=tl;
2910 #ifdef BIG_ENDIAN_MIPS
2911           if(!c) emit_xorimm(addr,2,tl);
2912           else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
2913 #else
2914           if(!c) a=addr;
2915 #endif
2916           if(fastload_reg_override) a=fastload_reg_override;
2917           //#ifdef
2918           //emit_movswl_indexed_tlb(x,tl,map,tl);
2919           //else
2920           if(map>=0) {
2921             gen_tlb_addr_r(a,map);
2922             emit_movswl_indexed(x,a,tl);
2923           }else{
2924             #if 1 //def RAM_OFFSET
2925             emit_movswl_indexed(x,a,tl);
2926             #else
2927             emit_movswl_indexed((int)rdram-0x80000000+x,a,tl);
2928             #endif
2929           }
2930         }
2931       }
2932       if(jaddr)
2933         add_stub(LOADH_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
2934     }
2935     else
2936       inline_readstub(LOADH_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2937   }
2938   if (opcode[i]==0x23) { // LW
2939     if(!c||memtarget) {
2940       if(!dummy) {
2941         int a=addr;
2942         if(fastload_reg_override) a=fastload_reg_override;
2943         //emit_readword_indexed((int)rdram-0x80000000,addr,tl);
2944         #ifdef HOST_IMM_ADDR32
2945         if(c)
2946           emit_readword_tlb(constmap[i][s]+offset,map,tl);
2947         else
2948         #endif
2949         emit_readword_indexed_tlb(0,a,map,tl);
2950       }
2951       if(jaddr)
2952         add_stub(LOADW_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
2953     }
2954     else
2955       inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2956   }
2957   if (opcode[i]==0x24) { // LBU
2958     if(!c||memtarget) {
2959       if(!dummy) {
2960         #ifdef HOST_IMM_ADDR32
2961         if(c)
2962           emit_movzbl_tlb((constmap[i][s]+offset)^3,map,tl);
2963         else
2964         #endif
2965         {
2966           //emit_xorimm(addr,3,tl);
2967           //gen_tlb_addr_r(tl,map);
2968           //emit_movzbl_indexed((int)rdram-0x80000000,tl,tl);
2969           int x=0,a=tl;
2970 #ifdef BIG_ENDIAN_MIPS
2971           if(!c) emit_xorimm(addr,3,tl);
2972           else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
2973 #else
2974           if(!c) a=addr;
2975 #endif
2976           if(fastload_reg_override) a=fastload_reg_override;
2977
2978           emit_movzbl_indexed_tlb(x,a,map,tl);
2979         }
2980       }
2981       if(jaddr)
2982         add_stub(LOADBU_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
2983     }
2984     else
2985       inline_readstub(LOADBU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2986   }
2987   if (opcode[i]==0x25) { // LHU
2988     if(!c||memtarget) {
2989       if(!dummy) {
2990         #ifdef HOST_IMM_ADDR32
2991         if(c)
2992           emit_movzwl_tlb((constmap[i][s]+offset)^2,map,tl);
2993         else
2994         #endif
2995         {
2996           int x=0,a=tl;
2997 #ifdef BIG_ENDIAN_MIPS
2998           if(!c) emit_xorimm(addr,2,tl);
2999           else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
3000 #else
3001           if(!c) a=addr;
3002 #endif
3003           if(fastload_reg_override) a=fastload_reg_override;
3004           //#ifdef
3005           //emit_movzwl_indexed_tlb(x,tl,map,tl);
3006           //#else
3007           if(map>=0) {
3008             gen_tlb_addr_r(a,map);
3009             emit_movzwl_indexed(x,a,tl);
3010           }else{
3011             #if 1 //def RAM_OFFSET
3012             emit_movzwl_indexed(x,a,tl);
3013             #else
3014             emit_movzwl_indexed((int)rdram-0x80000000+x,a,tl);
3015             #endif
3016           }
3017         }
3018       }
3019       if(jaddr)
3020         add_stub(LOADHU_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
3021     }
3022     else
3023       inline_readstub(LOADHU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
3024   }
3025   if (opcode[i]==0x27) { // LWU
3026     assert(th>=0);
3027     if(!c||memtarget) {
3028       if(!dummy) {
3029         int a=addr;
3030         if(fastload_reg_override) a=fastload_reg_override;
3031         //emit_readword_indexed((int)rdram-0x80000000,addr,tl);
3032         #ifdef HOST_IMM_ADDR32
3033         if(c)
3034           emit_readword_tlb(constmap[i][s]+offset,map,tl);
3035         else
3036         #endif
3037         emit_readword_indexed_tlb(0,a,map,tl);
3038       }
3039       if(jaddr)
3040         add_stub(LOADW_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
3041     }
3042     else {
3043       inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
3044     }
3045     emit_zeroreg(th);
3046   }
3047   if (opcode[i]==0x37) { // LD
3048     if(!c||memtarget) {
3049       if(!dummy) {
3050         int a=addr;
3051         if(fastload_reg_override) a=fastload_reg_override;
3052         //gen_tlb_addr_r(tl,map);
3053         //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,addr,th);
3054         //emit_readword_indexed((int)rdram-0x7FFFFFFC,addr,tl);
3055         #ifdef HOST_IMM_ADDR32
3056         if(c)
3057           emit_readdword_tlb(constmap[i][s]+offset,map,th,tl);
3058         else
3059         #endif
3060         emit_readdword_indexed_tlb(0,a,map,th,tl);
3061       }
3062       if(jaddr)
3063         add_stub(LOADD_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
3064     }
3065     else
3066       inline_readstub(LOADD_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
3067   }
3068  }
3069   //emit_storereg(rt1[i],tl); // DEBUG
3070   //if(opcode[i]==0x23)
3071   //if(opcode[i]==0x24)
3072   //if(opcode[i]==0x23||opcode[i]==0x24)
3073   /*if(opcode[i]==0x21||opcode[i]==0x23||opcode[i]==0x24)
3074   {
3075     //emit_pusha();
3076     save_regs(0x100f);
3077         emit_readword((int)&last_count,ECX);
3078         #ifdef __i386__
3079         if(get_reg(i_regs->regmap,CCREG)<0)
3080           emit_loadreg(CCREG,HOST_CCREG);
3081         emit_add(HOST_CCREG,ECX,HOST_CCREG);
3082         emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG);
3083         emit_writeword(HOST_CCREG,(int)&Count);
3084         #endif
3085         #ifdef __arm__
3086         if(get_reg(i_regs->regmap,CCREG)<0)
3087           emit_loadreg(CCREG,0);
3088         else
3089           emit_mov(HOST_CCREG,0);
3090         emit_add(0,ECX,0);
3091         emit_addimm(0,2*ccadj[i],0);
3092         emit_writeword(0,(int)&Count);
3093         #endif
3094     emit_call((int)memdebug);
3095     //emit_popa();
3096     restore_regs(0x100f);
3097   }/**/
3098 }
3099
3100 #ifndef loadlr_assemble
3101 void loadlr_assemble(int i,struct regstat *i_regs)
3102 {
3103   printf("Need loadlr_assemble for this architecture.\n");
3104   exit(1);
3105 }
3106 #endif
3107
3108 void store_assemble(int i,struct regstat *i_regs)
3109 {
3110   int s,th,tl,map=-1;
3111   int addr,temp;
3112   int offset;
3113   int jaddr=0,jaddr2,type;
3114   int memtarget=0,c=0;
3115   int agr=AGEN1+(i&1);
3116   int faststore_reg_override=0;
3117   u_int hr,reglist=0;
3118   th=get_reg(i_regs->regmap,rs2[i]|64);
3119   tl=get_reg(i_regs->regmap,rs2[i]);
3120   s=get_reg(i_regs->regmap,rs1[i]);
3121   temp=get_reg(i_regs->regmap,agr);
3122   if(temp<0) temp=get_reg(i_regs->regmap,-1);
3123   offset=imm[i];
3124   if(s>=0) {
3125     c=(i_regs->wasconst>>s)&1;
3126     if(c) {
3127       memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3128       if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3129     }
3130   }
3131   assert(tl>=0);
3132   assert(temp>=0);
3133   for(hr=0;hr<HOST_REGS;hr++) {
3134     if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3135   }
3136   if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
3137   if(offset||s<0||c) addr=temp;
3138   else addr=s;
3139   if(!using_tlb) {
3140     if(!c) {
3141       #ifndef PCSX
3142       #ifdef R29_HACK
3143       // Strmnnrmn's speed hack
3144       if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
3145       #endif
3146       emit_cmpimm(addr,RAM_SIZE);
3147       #ifdef DESTRUCTIVE_SHIFT
3148       if(s==addr) emit_mov(s,temp);
3149       #endif
3150       #ifdef R29_HACK
3151       memtarget=1;
3152       if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
3153       #endif
3154       {
3155         jaddr=(int)out;
3156         #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3157         // Hint to branch predictor that the branch is unlikely to be taken
3158         if(rs1[i]>=28)
3159           emit_jno_unlikely(0);
3160         else
3161         #endif
3162         emit_jno(0);
3163       }
3164       #else
3165         jaddr=emit_fastpath_cmp_jump(i,addr,&faststore_reg_override);
3166       #endif
3167     }
3168     else if(ram_offset&&memtarget) {
3169       emit_addimm(addr,ram_offset,HOST_TEMPREG);
3170       faststore_reg_override=HOST_TEMPREG;
3171     }
3172   }else{ // using tlb
3173     int x=0;
3174     if (opcode[i]==0x28) x=3; // SB
3175     if (opcode[i]==0x29) x=2; // SH
3176     map=get_reg(i_regs->regmap,TLREG);
3177     assert(map>=0);
3178     reglist&=~(1<<map);
3179     map=do_tlb_w(addr,temp,map,x,c,constmap[i][s]+offset);
3180     do_tlb_w_branch(map,c,constmap[i][s]+offset,&jaddr);
3181   }
3182
3183   if (opcode[i]==0x28) { // SB
3184     if(!c||memtarget) {
3185       int x=0,a=temp;
3186 #ifdef BIG_ENDIAN_MIPS
3187       if(!c) emit_xorimm(addr,3,temp);
3188       else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
3189 #else
3190       if(!c) a=addr;
3191 #endif
3192       if(faststore_reg_override) a=faststore_reg_override;
3193       //gen_tlb_addr_w(temp,map);
3194       //emit_writebyte_indexed(tl,(int)rdram-0x80000000,temp);
3195       emit_writebyte_indexed_tlb(tl,x,a,map,a);
3196     }
3197     type=STOREB_STUB;
3198   }
3199   if (opcode[i]==0x29) { // SH
3200     if(!c||memtarget) {
3201       int x=0,a=temp;
3202 #ifdef BIG_ENDIAN_MIPS
3203       if(!c) emit_xorimm(addr,2,temp);
3204       else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
3205 #else
3206       if(!c) a=addr;
3207 #endif
3208       if(faststore_reg_override) a=faststore_reg_override;
3209       //#ifdef
3210       //emit_writehword_indexed_tlb(tl,x,temp,map,temp);
3211       //#else
3212       if(map>=0) {
3213         gen_tlb_addr_w(a,map);
3214         emit_writehword_indexed(tl,x,a);
3215       }else
3216         //emit_writehword_indexed(tl,(int)rdram-0x80000000+x,a);
3217         emit_writehword_indexed(tl,x,a);
3218     }
3219     type=STOREH_STUB;
3220   }
3221   if (opcode[i]==0x2B) { // SW
3222     if(!c||memtarget) {
3223       int a=addr;
3224       if(faststore_reg_override) a=faststore_reg_override;
3225       //emit_writeword_indexed(tl,(int)rdram-0x80000000,addr);
3226       emit_writeword_indexed_tlb(tl,0,a,map,temp);
3227     }
3228     type=STOREW_STUB;
3229   }
3230   if (opcode[i]==0x3F) { // SD
3231     if(!c||memtarget) {
3232       int a=addr;
3233       if(faststore_reg_override) a=faststore_reg_override;
3234       if(rs2[i]) {
3235         assert(th>=0);
3236         //emit_writeword_indexed(th,(int)rdram-0x80000000,addr);
3237         //emit_writeword_indexed(tl,(int)rdram-0x7FFFFFFC,addr);
3238         emit_writedword_indexed_tlb(th,tl,0,a,map,temp);
3239       }else{
3240         // Store zero
3241         //emit_writeword_indexed(tl,(int)rdram-0x80000000,temp);
3242         //emit_writeword_indexed(tl,(int)rdram-0x7FFFFFFC,temp);
3243         emit_writedword_indexed_tlb(tl,tl,0,a,map,temp);
3244       }
3245     }
3246     type=STORED_STUB;
3247   }
3248 #ifdef PCSX
3249   if(jaddr) {
3250     // PCSX store handlers don't check invcode again
3251     reglist|=1<<addr;
3252     add_stub(type,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
3253     jaddr=0;
3254   }
3255 #endif
3256   if(!using_tlb&&!(i_regs->waswritten&(1<<rs1[i]))&&!(new_dynarec_hacks&NDHACK_NO_SMC_CHECK)) {
3257     if(!c||memtarget) {
3258       #ifdef DESTRUCTIVE_SHIFT
3259       // The x86 shift operation is 'destructive'; it overwrites the
3260       // source register, so we need to make a copy first and use that.
3261       addr=temp;
3262       #endif
3263       #if defined(HOST_IMM8)
3264       int ir=get_reg(i_regs->regmap,INVCP);
3265       assert(ir>=0);
3266       emit_cmpmem_indexedsr12_reg(ir,addr,1);
3267       #else
3268       emit_cmpmem_indexedsr12_imm((int)invalid_code,addr,1);
3269       #endif
3270       #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT)
3271       emit_callne(invalidate_addr_reg[addr]);
3272       #else
3273       jaddr2=(int)out;
3274       emit_jne(0);
3275       add_stub(INVCODE_STUB,jaddr2,(int)out,reglist|(1<<HOST_CCREG),addr,0,0,0);
3276       #endif
3277     }
3278   }
3279   u_int addr_val=constmap[i][s]+offset;
3280   if(jaddr) {
3281     add_stub(type,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
3282   } else if(c&&!memtarget) {
3283     inline_writestub(type,i,addr_val,i_regs->regmap,rs2[i],ccadj[i],reglist);
3284   }
3285   // basic current block modification detection..
3286   // not looking back as that should be in mips cache already
3287   if(c&&start+i*4<addr_val&&addr_val<start+slen*4) {
3288     SysPrintf("write to %08x hits block %08x, pc=%08x\n",addr_val,start,start+i*4);
3289     assert(i_regs->regmap==regs[i].regmap); // not delay slot
3290     if(i_regs->regmap==regs[i].regmap) {
3291       load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3292       wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3293       emit_movimm(start+i*4+4,0);
3294       emit_writeword(0,(int)&pcaddr);
3295       emit_jmp((int)do_interrupt);
3296     }
3297   }
3298   //if(opcode[i]==0x2B || opcode[i]==0x3F)
3299   //if(opcode[i]==0x2B || opcode[i]==0x28)
3300   //if(opcode[i]==0x2B || opcode[i]==0x29)
3301   //if(opcode[i]==0x2B)
3302   /*if(opcode[i]==0x2B || opcode[i]==0x28 || opcode[i]==0x29 || opcode[i]==0x3F)
3303   {
3304     #ifdef __i386__
3305     emit_pusha();
3306     #endif
3307     #ifdef __arm__
3308     save_regs(0x100f);
3309     #endif
3310         emit_readword((int)&last_count,ECX);
3311         #ifdef __i386__
3312         if(get_reg(i_regs->regmap,CCREG)<0)
3313           emit_loadreg(CCREG,HOST_CCREG);
3314         emit_add(HOST_CCREG,ECX,HOST_CCREG);
3315         emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG);
3316         emit_writeword(HOST_CCREG,(int)&Count);
3317         #endif
3318         #ifdef __arm__
3319         if(get_reg(i_regs->regmap,CCREG)<0)
3320           emit_loadreg(CCREG,0);
3321         else
3322           emit_mov(HOST_CCREG,0);
3323         emit_add(0,ECX,0);
3324         emit_addimm(0,2*ccadj[i],0);
3325         emit_writeword(0,(int)&Count);
3326         #endif
3327     emit_call((int)memdebug);
3328     #ifdef __i386__
3329     emit_popa();
3330     #endif
3331     #ifdef __arm__
3332     restore_regs(0x100f);
3333     #endif
3334   }/**/
3335 }
3336
3337 void storelr_assemble(int i,struct regstat *i_regs)
3338 {
3339   int s,th,tl;
3340   int temp;
3341   int temp2;
3342   int offset;
3343   int jaddr=0,jaddr2;
3344   int case1,case2,case3;
3345   int done0,done1,done2;
3346   int memtarget=0,c=0;
3347   int agr=AGEN1+(i&1);
3348   u_int hr,reglist=0;
3349   th=get_reg(i_regs->regmap,rs2[i]|64);
3350   tl=get_reg(i_regs->regmap,rs2[i]);
3351   s=get_reg(i_regs->regmap,rs1[i]);
3352   temp=get_reg(i_regs->regmap,agr);
3353   if(temp<0) temp=get_reg(i_regs->regmap,-1);
3354   offset=imm[i];
3355   if(s>=0) {
3356     c=(i_regs->isconst>>s)&1;
3357     if(c) {
3358       memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3359       if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3360     }
3361   }
3362   assert(tl>=0);
3363   for(hr=0;hr<HOST_REGS;hr++) {
3364     if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3365   }
3366   assert(temp>=0);
3367   if(!using_tlb) {
3368     if(!c) {
3369       emit_cmpimm(s<0||offset?temp:s,RAM_SIZE);
3370       if(!offset&&s!=temp) emit_mov(s,temp);
3371       jaddr=(int)out;
3372       emit_jno(0);
3373     }
3374     else
3375     {
3376       if(!memtarget||!rs1[i]) {
3377         jaddr=(int)out;
3378         emit_jmp(0);
3379       }
3380     }
3381     #ifd