drc: further hacks, hle handling
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / new_dynarec.c
1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2  *   Mupen64plus - new_dynarec.c                                           *
3  *   Copyright (C) 2009-2010 Ari64                                         *
4  *                                                                         *
5  *   This program is free software; you can redistribute it and/or modify  *
6  *   it under the terms of the GNU General Public License as published by  *
7  *   the Free Software Foundation; either version 2 of the License, or     *
8  *   (at your option) any later version.                                   *
9  *                                                                         *
10  *   This program is distributed in the hope that it will be useful,       *
11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
13  *   GNU General Public License for more details.                          *
14  *                                                                         *
15  *   You should have received a copy of the GNU General Public License     *
16  *   along with this program; if not, write to the                         *
17  *   Free Software Foundation, Inc.,                                       *
18  *   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.          *
19  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21 #include <stdlib.h>
22 #include <stdint.h> //include for uint64_t
23 #include <assert.h>
24
25 #include "emu_if.h" //emulator interface
26
27 #include <sys/mman.h>
28
29 #ifdef __i386__
30 #include "assem_x86.h"
31 #endif
32 #ifdef __x86_64__
33 #include "assem_x64.h"
34 #endif
35 #ifdef __arm__
36 #include "assem_arm.h"
37 #endif
38
39 #define MAXBLOCK 4096
40 #define MAX_OUTPUT_BLOCK_SIZE 262144
41 #define CLOCK_DIVIDER 2
42
43 struct regstat
44 {
45   signed char regmap_entry[HOST_REGS];
46   signed char regmap[HOST_REGS];
47   uint64_t was32;
48   uint64_t is32;
49   uint64_t wasdirty;
50   uint64_t dirty;
51   uint64_t u;
52   uint64_t uu;
53   u_int wasconst;
54   u_int isconst;
55   uint64_t constmap[HOST_REGS];
56 };
57
58 struct ll_entry
59 {
60   u_int vaddr;
61   u_int reg32;
62   void *addr;
63   struct ll_entry *next;
64 };
65
66   u_int start;
67   u_int *source;
68   u_int pagelimit;
69   char insn[MAXBLOCK][10];
70   u_char itype[MAXBLOCK];
71   u_char opcode[MAXBLOCK];
72   u_char opcode2[MAXBLOCK];
73   u_char bt[MAXBLOCK];
74   u_char rs1[MAXBLOCK];
75   u_char rs2[MAXBLOCK];
76   u_char rt1[MAXBLOCK];
77   u_char rt2[MAXBLOCK];
78   u_char us1[MAXBLOCK];
79   u_char us2[MAXBLOCK];
80   u_char dep1[MAXBLOCK];
81   u_char dep2[MAXBLOCK];
82   u_char lt1[MAXBLOCK];
83   int imm[MAXBLOCK];
84   u_int ba[MAXBLOCK];
85   char likely[MAXBLOCK];
86   char is_ds[MAXBLOCK];
87   uint64_t unneeded_reg[MAXBLOCK];
88   uint64_t unneeded_reg_upper[MAXBLOCK];
89   uint64_t branch_unneeded_reg[MAXBLOCK];
90   uint64_t branch_unneeded_reg_upper[MAXBLOCK];
91   uint64_t p32[MAXBLOCK];
92   uint64_t pr32[MAXBLOCK];
93   signed char regmap_pre[MAXBLOCK][HOST_REGS];
94   signed char regmap[MAXBLOCK][HOST_REGS];
95   signed char regmap_entry[MAXBLOCK][HOST_REGS];
96   uint64_t constmap[MAXBLOCK][HOST_REGS];
97   uint64_t known_value[HOST_REGS];
98   u_int known_reg;
99   struct regstat regs[MAXBLOCK];
100   struct regstat branch_regs[MAXBLOCK];
101   u_int needed_reg[MAXBLOCK];
102   uint64_t requires_32bit[MAXBLOCK];
103   u_int wont_dirty[MAXBLOCK];
104   u_int will_dirty[MAXBLOCK];
105   int ccadj[MAXBLOCK];
106   int slen;
107   u_int instr_addr[MAXBLOCK];
108   u_int link_addr[MAXBLOCK][3];
109   int linkcount;
110   u_int stubs[MAXBLOCK*3][8];
111   int stubcount;
112   u_int literals[1024][2];
113   int literalcount;
114   int is_delayslot;
115   int cop1_usable;
116   u_char *out;
117   struct ll_entry *jump_in[4096];
118   struct ll_entry *jump_out[4096];
119   struct ll_entry *jump_dirty[4096];
120   u_int hash_table[65536][4]  __attribute__((aligned(16)));
121   char shadow[1048576]  __attribute__((aligned(16)));
122   void *copy;
123   int expirep;
124   u_int using_tlb;
125   u_int stop_after_jal;
126   extern u_char restore_candidate[512];
127   extern int cycle_count;
128
129   /* registers that may be allocated */
130   /* 1-31 gpr */
131 #define HIREG 32 // hi
132 #define LOREG 33 // lo
133 #define FSREG 34 // FPU status (FCSR)
134 #define CSREG 35 // Coprocessor status
135 #define CCREG 36 // Cycle count
136 #define INVCP 37 // Pointer to invalid_code
137 #define TEMPREG 38
138 #define FTEMP 38 // FPU temporary register
139 #define PTEMP 39 // Prefetch temporary register
140 #define TLREG 40 // TLB mapping offset
141 #define RHASH 41 // Return address hash
142 #define RHTBL 42 // Return address hash table address
143 #define RTEMP 43 // JR/JALR address register
144 #define MAXREG 43
145 #define AGEN1 44 // Address generation temporary register
146 #define AGEN2 45 // Address generation temporary register
147 #define MGEN1 46 // Maptable address generation temporary register
148 #define MGEN2 47 // Maptable address generation temporary register
149 #define BTREG 48 // Branch target temporary register
150
151   /* instruction types */
152 #define NOP 0     // No operation
153 #define LOAD 1    // Load
154 #define STORE 2   // Store
155 #define LOADLR 3  // Unaligned load
156 #define STORELR 4 // Unaligned store
157 #define MOV 5     // Move 
158 #define ALU 6     // Arithmetic/logic
159 #define MULTDIV 7 // Multiply/divide
160 #define SHIFT 8   // Shift by register
161 #define SHIFTIMM 9// Shift by immediate
162 #define IMM16 10  // 16-bit immediate
163 #define RJUMP 11  // Unconditional jump to register
164 #define UJUMP 12  // Unconditional jump
165 #define CJUMP 13  // Conditional branch (BEQ/BNE/BGTZ/BLEZ)
166 #define SJUMP 14  // Conditional branch (regimm format)
167 #define COP0 15   // Coprocessor 0
168 #define COP1 16   // Coprocessor 1
169 #define C1LS 17   // Coprocessor 1 load/store
170 #define FJUMP 18  // Conditional branch (floating point)
171 #define FLOAT 19  // Floating point unit
172 #define FCONV 20  // Convert integer to float
173 #define FCOMP 21  // Floating point compare (sets FSREG)
174 #define SYSCALL 22// SYSCALL
175 #define OTHER 23  // Other
176 #define SPAN 24   // Branch/delay slot spans 2 pages
177 #define NI 25     // Not implemented
178 #define HLECALL 26// PCSX fake opcodes for HLE
179
180   /* stubs */
181 #define CC_STUB 1
182 #define FP_STUB 2
183 #define LOADB_STUB 3
184 #define LOADH_STUB 4
185 #define LOADW_STUB 5
186 #define LOADD_STUB 6
187 #define LOADBU_STUB 7
188 #define LOADHU_STUB 8
189 #define STOREB_STUB 9
190 #define STOREH_STUB 10
191 #define STOREW_STUB 11
192 #define STORED_STUB 12
193 #define STORELR_STUB 13
194 #define INVCODE_STUB 14
195
196   /* branch codes */
197 #define TAKEN 1
198 #define NOTTAKEN 2
199 #define NULLDS 3
200
201 // asm linkage
202 int new_recompile_block(int addr);
203 void *get_addr_ht(u_int vaddr);
204 void invalidate_block(u_int block);
205 void invalidate_addr(u_int addr);
206 void remove_hash(int vaddr);
207 void jump_vaddr();
208 void dyna_linker();
209 void dyna_linker_ds();
210 void verify_code();
211 void verify_code_vm();
212 void verify_code_ds();
213 void cc_interrupt();
214 void fp_exception();
215 void fp_exception_ds();
216 void jump_syscall();
217 void jump_syscall_hle();
218 void jump_eret();
219 void jump_hlecall();
220 void new_dyna_leave();
221
222 // TLB
223 void TLBWI_new();
224 void TLBWR_new();
225 void read_nomem_new();
226 void read_nomemb_new();
227 void read_nomemh_new();
228 void read_nomemd_new();
229 void write_nomem_new();
230 void write_nomemb_new();
231 void write_nomemh_new();
232 void write_nomemd_new();
233 void write_rdram_new();
234 void write_rdramb_new();
235 void write_rdramh_new();
236 void write_rdramd_new();
237 extern u_int memory_map[1048576];
238
239 // Needed by assembler
240 void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32);
241 void wb_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty);
242 void wb_needed_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr);
243 void load_all_regs(signed char i_regmap[]);
244 void load_needed_regs(signed char i_regmap[],signed char next_regmap[]);
245 void load_regs_entry(int t);
246 void load_all_consts(signed char regmap[],int is32,u_int dirty,int i);
247
248 int tracedebug=0;
249
250 //#define DEBUG_CYCLE_COUNT 1
251
252 void nullf() {}
253 //#define assem_debug printf
254 //#define inv_debug printf
255 #define assem_debug nullf
256 #define inv_debug nullf
257
258 static void tlb_hacks()
259 {
260 #ifndef DISABLE_TLB
261   // Goldeneye hack
262   if (strncmp((char *) ROM_HEADER->nom, "GOLDENEYE",9) == 0)
263   {
264     u_int addr;
265     int n;
266     switch (ROM_HEADER->Country_code&0xFF) 
267     {
268       case 0x45: // U
269         addr=0x34b30;
270         break;                   
271       case 0x4A: // J 
272         addr=0x34b70;    
273         break;    
274       case 0x50: // E 
275         addr=0x329f0;
276         break;                        
277       default: 
278         // Unknown country code
279         addr=0;
280         break;
281     }
282     u_int rom_addr=(u_int)rom;
283     #ifdef ROM_COPY
284     // Since memory_map is 32-bit, on 64-bit systems the rom needs to be
285     // in the lower 4G of memory to use this hack.  Copy it if necessary.
286     if((void *)rom>(void *)0xffffffff) {
287       munmap(ROM_COPY, 67108864);
288       if(mmap(ROM_COPY, 12582912,
289               PROT_READ | PROT_WRITE,
290               MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
291               -1, 0) <= 0) {printf("mmap() failed\n");}
292       memcpy(ROM_COPY,rom,12582912);
293       rom_addr=(u_int)ROM_COPY;
294     }
295     #endif
296     if(addr) {
297       for(n=0x7F000;n<0x80000;n++) {
298         memory_map[n]=(((u_int)(rom_addr+addr-0x7F000000))>>2)|0x40000000;
299       }
300     }
301   }
302 #endif
303 }
304
305 static u_int get_page(u_int vaddr)
306 {
307   u_int page=(vaddr^0x80000000)>>12;
308 #ifndef DISABLE_TLB
309   if(page>262143&&tlb_LUT_r[vaddr>>12]) page=(tlb_LUT_r[vaddr>>12]^0x80000000)>>12;
310 #endif
311   if(page>2048) page=2048+(page&2047);
312   return page;
313 }
314
315 static u_int get_vpage(u_int vaddr)
316 {
317   u_int vpage=(vaddr^0x80000000)>>12;
318 #ifndef DISABLE_TLB
319   if(vpage>262143&&tlb_LUT_r[vaddr>>12]) vpage&=2047; // jump_dirty uses a hash of the virtual address instead
320 #endif
321   if(vpage>2048) vpage=2048+(vpage&2047);
322   return vpage;
323 }
324
325 // Get address from virtual address
326 // This is called from the recompiled JR/JALR instructions
327 void *get_addr(u_int vaddr)
328 {
329   u_int page=get_page(vaddr);
330   u_int vpage=get_vpage(vaddr);
331   struct ll_entry *head;
332   //printf("TRACE: count=%d next=%d (get_addr %x,page %d)\n",Count,next_interupt,vaddr,page);
333   head=jump_in[page];
334   while(head!=NULL) {
335     if(head->vaddr==vaddr&&head->reg32==0) {
336   //printf("TRACE: count=%d next=%d (get_addr match %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr);
337       int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
338       ht_bin[3]=ht_bin[1];
339       ht_bin[2]=ht_bin[0];
340       ht_bin[1]=(int)head->addr;
341       ht_bin[0]=vaddr;
342       return head->addr;
343     }
344     head=head->next;
345   }
346   head=jump_dirty[vpage];
347   while(head!=NULL) {
348     if(head->vaddr==vaddr&&head->reg32==0) {
349       //printf("TRACE: count=%d next=%d (get_addr match dirty %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr);
350       // Don't restore blocks which are about to expire from the cache
351       if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
352       if(verify_dirty(head->addr)) {
353         //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]);
354         invalid_code[vaddr>>12]=0;
355         memory_map[vaddr>>12]|=0x40000000;
356         if(vpage<2048) {
357 #ifndef DISABLE_TLB
358           if(tlb_LUT_r[vaddr>>12]) {
359             invalid_code[tlb_LUT_r[vaddr>>12]>>12]=0;
360             memory_map[tlb_LUT_r[vaddr>>12]>>12]|=0x40000000;
361           }
362 #endif
363           restore_candidate[vpage>>3]|=1<<(vpage&7);
364         }
365         else restore_candidate[page>>3]|=1<<(page&7);
366         int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
367         if(ht_bin[0]==vaddr) {
368           ht_bin[1]=(int)head->addr; // Replace existing entry
369         }
370         else
371         {
372           ht_bin[3]=ht_bin[1];
373           ht_bin[2]=ht_bin[0];
374           ht_bin[1]=(int)head->addr;
375           ht_bin[0]=vaddr;
376         }
377         return head->addr;
378       }
379     }
380     head=head->next;
381   }
382   //printf("TRACE: count=%d next=%d (get_addr no-match %x)\n",Count,next_interupt,vaddr);
383   int r=new_recompile_block(vaddr);
384   if(r==0) return get_addr(vaddr);
385   // Execute in unmapped page, generate pagefault execption
386   Status|=2;
387   Cause=(vaddr<<31)|0x8;
388   EPC=(vaddr&1)?vaddr-5:vaddr;
389   BadVAddr=(vaddr&~1);
390   Context=(Context&0xFF80000F)|((BadVAddr>>9)&0x007FFFF0);
391   EntryHi=BadVAddr&0xFFFFE000;
392   return get_addr_ht(0x80000000);
393 }
394 // Look up address in hash table first
395 void *get_addr_ht(u_int vaddr)
396 {
397   //printf("TRACE: count=%d next=%d (get_addr_ht %x)\n",Count,next_interupt,vaddr);
398   int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
399   if(ht_bin[0]==vaddr) return (void *)ht_bin[1];
400   if(ht_bin[2]==vaddr) return (void *)ht_bin[3];
401   return get_addr(vaddr);
402 }
403
404 void *get_addr_32(u_int vaddr,u_int flags)
405 {
406 #ifdef FORCE32
407   return get_addr(vaddr);
408 #endif
409   //printf("TRACE: count=%d next=%d (get_addr_32 %x,flags %x)\n",Count,next_interupt,vaddr,flags);
410   int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
411   if(ht_bin[0]==vaddr) return (void *)ht_bin[1];
412   if(ht_bin[2]==vaddr) return (void *)ht_bin[3];
413   u_int page=get_page(vaddr);
414   u_int vpage=get_vpage(vaddr);
415   struct ll_entry *head;
416   head=jump_in[page];
417   while(head!=NULL) {
418     if(head->vaddr==vaddr&&(head->reg32&flags)==0) {
419       //printf("TRACE: count=%d next=%d (get_addr_32 match %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr);
420       if(head->reg32==0) {
421         int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
422         if(ht_bin[0]==-1) {
423           ht_bin[1]=(int)head->addr;
424           ht_bin[0]=vaddr;
425         }else if(ht_bin[2]==-1) {
426           ht_bin[3]=(int)head->addr;
427           ht_bin[2]=vaddr;
428         }
429         //ht_bin[3]=ht_bin[1];
430         //ht_bin[2]=ht_bin[0];
431         //ht_bin[1]=(int)head->addr;
432         //ht_bin[0]=vaddr;
433       }
434       return head->addr;
435     }
436     head=head->next;
437   }
438   head=jump_dirty[vpage];
439   while(head!=NULL) {
440     if(head->vaddr==vaddr&&(head->reg32&flags)==0) {
441       //printf("TRACE: count=%d next=%d (get_addr_32 match dirty %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr);
442       // Don't restore blocks which are about to expire from the cache
443       if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
444       if(verify_dirty(head->addr)) {
445         //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]);
446         invalid_code[vaddr>>12]=0;
447         memory_map[vaddr>>12]|=0x40000000;
448         if(vpage<2048) {
449 #ifndef DISABLE_TLB
450           if(tlb_LUT_r[vaddr>>12]) {
451             invalid_code[tlb_LUT_r[vaddr>>12]>>12]=0;
452             memory_map[tlb_LUT_r[vaddr>>12]>>12]|=0x40000000;
453           }
454 #endif
455           restore_candidate[vpage>>3]|=1<<(vpage&7);
456         }
457         else restore_candidate[page>>3]|=1<<(page&7);
458         if(head->reg32==0) {
459           int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
460           if(ht_bin[0]==-1) {
461             ht_bin[1]=(int)head->addr;
462             ht_bin[0]=vaddr;
463           }else if(ht_bin[2]==-1) {
464             ht_bin[3]=(int)head->addr;
465             ht_bin[2]=vaddr;
466           }
467           //ht_bin[3]=ht_bin[1];
468           //ht_bin[2]=ht_bin[0];
469           //ht_bin[1]=(int)head->addr;
470           //ht_bin[0]=vaddr;
471         }
472         return head->addr;
473       }
474     }
475     head=head->next;
476   }
477   //printf("TRACE: count=%d next=%d (get_addr_32 no-match %x,flags %x)\n",Count,next_interupt,vaddr,flags);
478   int r=new_recompile_block(vaddr);
479   if(r==0) return get_addr(vaddr);
480   // Execute in unmapped page, generate pagefault execption
481   Status|=2;
482   Cause=(vaddr<<31)|0x8;
483   EPC=(vaddr&1)?vaddr-5:vaddr;
484   BadVAddr=(vaddr&~1);
485   Context=(Context&0xFF80000F)|((BadVAddr>>9)&0x007FFFF0);
486   EntryHi=BadVAddr&0xFFFFE000;
487   return get_addr_ht(0x80000000);
488 }
489
490 void clear_all_regs(signed char regmap[])
491 {
492   int hr;
493   for (hr=0;hr<HOST_REGS;hr++) regmap[hr]=-1;
494 }
495
496 signed char get_reg(signed char regmap[],int r)
497 {
498   int hr;
499   for (hr=0;hr<HOST_REGS;hr++) if(hr!=EXCLUDE_REG&&regmap[hr]==r) return hr;
500   return -1;
501 }
502
503 // Find a register that is available for two consecutive cycles
504 signed char get_reg2(signed char regmap1[],signed char regmap2[],int r)
505 {
506   int hr;
507   for (hr=0;hr<HOST_REGS;hr++) if(hr!=EXCLUDE_REG&&regmap1[hr]==r&&regmap2[hr]==r) return hr;
508   return -1;
509 }
510
511 int count_free_regs(signed char regmap[])
512 {
513   int count=0;
514   int hr;
515   for(hr=0;hr<HOST_REGS;hr++)
516   {
517     if(hr!=EXCLUDE_REG) {
518       if(regmap[hr]<0) count++;
519     }
520   }
521   return count;
522 }
523
524 void dirty_reg(struct regstat *cur,signed char reg)
525 {
526   int hr;
527   if(!reg) return;
528   for (hr=0;hr<HOST_REGS;hr++) {
529     if((cur->regmap[hr]&63)==reg) {
530       cur->dirty|=1<<hr;
531     }
532   }
533 }
534
535 // If we dirty the lower half of a 64 bit register which is now being
536 // sign-extended, we need to dump the upper half.
537 // Note: Do this only after completion of the instruction, because
538 // some instructions may need to read the full 64-bit value even if
539 // overwriting it (eg SLTI, DSRA32).
540 static void flush_dirty_uppers(struct regstat *cur)
541 {
542   int hr,reg;
543   for (hr=0;hr<HOST_REGS;hr++) {
544     if((cur->dirty>>hr)&1) {
545       reg=cur->regmap[hr];
546       if(reg>=64) 
547         if((cur->is32>>(reg&63))&1) cur->regmap[hr]=-1;
548     }
549   }
550 }
551
552 void set_const(struct regstat *cur,signed char reg,uint64_t value)
553 {
554   int hr;
555   if(!reg) return;
556   for (hr=0;hr<HOST_REGS;hr++) {
557     if(cur->regmap[hr]==reg) {
558       cur->isconst|=1<<hr;
559       cur->constmap[hr]=value;
560     }
561     else if((cur->regmap[hr]^64)==reg) {
562       cur->isconst|=1<<hr;
563       cur->constmap[hr]=value>>32;
564     }
565   }
566 }
567
568 void clear_const(struct regstat *cur,signed char reg)
569 {
570   int hr;
571   if(!reg) return;
572   for (hr=0;hr<HOST_REGS;hr++) {
573     if((cur->regmap[hr]&63)==reg) {
574       cur->isconst&=~(1<<hr);
575     }
576   }
577 }
578
579 int is_const(struct regstat *cur,signed char reg)
580 {
581   int hr;
582   if(!reg) return 1;
583   for (hr=0;hr<HOST_REGS;hr++) {
584     if((cur->regmap[hr]&63)==reg) {
585       return (cur->isconst>>hr)&1;
586     }
587   }
588   return 0;
589 }
590 uint64_t get_const(struct regstat *cur,signed char reg)
591 {
592   int hr;
593   if(!reg) return 0;
594   for (hr=0;hr<HOST_REGS;hr++) {
595     if(cur->regmap[hr]==reg) {
596       return cur->constmap[hr];
597     }
598   }
599   printf("Unknown constant in r%d\n",reg);
600   exit(1);
601 }
602
603 // Least soon needed registers
604 // Look at the next ten instructions and see which registers
605 // will be used.  Try not to reallocate these.
606 void lsn(u_char hsn[], int i, int *preferred_reg)
607 {
608   int j;
609   int b=-1;
610   for(j=0;j<9;j++)
611   {
612     if(i+j>=slen) {
613       j=slen-i-1;
614       break;
615     }
616     if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
617     {
618       // Don't go past an unconditonal jump
619       j++;
620       break;
621     }
622   }
623   for(;j>=0;j--)
624   {
625     if(rs1[i+j]) hsn[rs1[i+j]]=j;
626     if(rs2[i+j]) hsn[rs2[i+j]]=j;
627     if(rt1[i+j]) hsn[rt1[i+j]]=j;
628     if(rt2[i+j]) hsn[rt2[i+j]]=j;
629     if(itype[i+j]==STORE || itype[i+j]==STORELR) {
630       // Stores can allocate zero
631       hsn[rs1[i+j]]=j;
632       hsn[rs2[i+j]]=j;
633     }
634     // On some architectures stores need invc_ptr
635     #if defined(HOST_IMM8)
636     if(itype[i+j]==STORE || itype[i+j]==STORELR || (opcode[i+j]&0x3b)==0x39) {
637       hsn[INVCP]=j;
638     }
639     #endif
640     if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP||itype[i+j]==FJUMP))
641     {
642       hsn[CCREG]=j;
643       b=j;
644     }
645   }
646   if(b>=0)
647   {
648     if(ba[i+b]>=start && ba[i+b]<(start+slen*4))
649     {
650       // Follow first branch
651       int t=(ba[i+b]-start)>>2;
652       j=7-b;if(t+j>=slen) j=slen-t-1;
653       for(;j>=0;j--)
654       {
655         if(rs1[t+j]) if(hsn[rs1[t+j]]>j+b+2) hsn[rs1[t+j]]=j+b+2;
656         if(rs2[t+j]) if(hsn[rs2[t+j]]>j+b+2) hsn[rs2[t+j]]=j+b+2;
657         //if(rt1[t+j]) if(hsn[rt1[t+j]]>j+b+2) hsn[rt1[t+j]]=j+b+2;
658         //if(rt2[t+j]) if(hsn[rt2[t+j]]>j+b+2) hsn[rt2[t+j]]=j+b+2;
659       }
660     }
661     // TODO: preferred register based on backward branch
662   }
663   // Delay slot should preferably not overwrite branch conditions or cycle count
664   if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP)) {
665     if(rs1[i-1]) if(hsn[rs1[i-1]]>1) hsn[rs1[i-1]]=1;
666     if(rs2[i-1]) if(hsn[rs2[i-1]]>1) hsn[rs2[i-1]]=1;
667     hsn[CCREG]=1;
668     // ...or hash tables
669     hsn[RHASH]=1;
670     hsn[RHTBL]=1;
671   }
672   // Coprocessor load/store needs FTEMP, even if not declared
673   if(itype[i]==C1LS) {
674     hsn[FTEMP]=0;
675   }
676   // Load L/R also uses FTEMP as a temporary register
677   if(itype[i]==LOADLR) {
678     hsn[FTEMP]=0;
679   }
680   // Also 64-bit SDL/SDR
681   if(opcode[i]==0x2c||opcode[i]==0x2d) {
682     hsn[FTEMP]=0;
683   }
684   // Don't remove the TLB registers either
685   if(itype[i]==LOAD || itype[i]==LOADLR || itype[i]==STORE || itype[i]==STORELR || itype[i]==C1LS ) {
686     hsn[TLREG]=0;
687   }
688   // Don't remove the miniht registers
689   if(itype[i]==UJUMP||itype[i]==RJUMP)
690   {
691     hsn[RHASH]=0;
692     hsn[RHTBL]=0;
693   }
694 }
695
696 // We only want to allocate registers if we're going to use them again soon
697 int needed_again(int r, int i)
698 {
699   int j;
700   int b=-1;
701   int rn=10;
702   int hr;
703   u_char hsn[MAXREG+1];
704   int preferred_reg;
705   
706   memset(hsn,10,sizeof(hsn));
707   lsn(hsn,i,&preferred_reg);
708   
709   if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000))
710   {
711     if(ba[i-1]<start || ba[i-1]>start+slen*4-4)
712       return 0; // Don't need any registers if exiting the block
713   }
714   for(j=0;j<9;j++)
715   {
716     if(i+j>=slen) {
717       j=slen-i-1;
718       break;
719     }
720     if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
721     {
722       // Don't go past an unconditonal jump
723       j++;
724       break;
725     }
726     if(itype[i+j]==SYSCALL||itype[i+j]==HLECALL||((source[i+j]&0xfc00003f)==0x0d))
727     {
728       break;
729     }
730   }
731   for(;j>=1;j--)
732   {
733     if(rs1[i+j]==r) rn=j;
734     if(rs2[i+j]==r) rn=j;
735     if((unneeded_reg[i+j]>>r)&1) rn=10;
736     if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP||itype[i+j]==FJUMP))
737     {
738       b=j;
739     }
740   }
741   /*
742   if(b>=0)
743   {
744     if(ba[i+b]>=start && ba[i+b]<(start+slen*4))
745     {
746       // Follow first branch
747       int o=rn;
748       int t=(ba[i+b]-start)>>2;
749       j=7-b;if(t+j>=slen) j=slen-t-1;
750       for(;j>=0;j--)
751       {
752         if(!((unneeded_reg[t+j]>>r)&1)) {
753           if(rs1[t+j]==r) if(rn>j+b+2) rn=j+b+2;
754           if(rs2[t+j]==r) if(rn>j+b+2) rn=j+b+2;
755         }
756         else rn=o;
757       }
758     }
759   }*/
760   for(hr=0;hr<HOST_REGS;hr++) {
761     if(hr!=EXCLUDE_REG) {
762       if(rn<hsn[hr]) return 1;
763     }
764   }
765   return 0;
766 }
767
768 // Try to match register allocations at the end of a loop with those
769 // at the beginning
770 int loop_reg(int i, int r, int hr)
771 {
772   int j,k;
773   for(j=0;j<9;j++)
774   {
775     if(i+j>=slen) {
776       j=slen-i-1;
777       break;
778     }
779     if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
780     {
781       // Don't go past an unconditonal jump
782       j++;
783       break;
784     }
785   }
786   k=0;
787   if(i>0){
788     if(itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP)
789       k--;
790   }
791   for(;k<j;k++)
792   {
793     if(r<64&&((unneeded_reg[i+k]>>r)&1)) return hr;
794     if(r>64&&((unneeded_reg_upper[i+k]>>r)&1)) return hr;
795     if(i+k>=0&&(itype[i+k]==UJUMP||itype[i+k]==CJUMP||itype[i+k]==SJUMP||itype[i+k]==FJUMP))
796     {
797       if(ba[i+k]>=start && ba[i+k]<(start+i*4))
798       {
799         int t=(ba[i+k]-start)>>2;
800         int reg=get_reg(regs[t].regmap_entry,r);
801         if(reg>=0) return reg;
802         //reg=get_reg(regs[t+1].regmap_entry,r);
803         //if(reg>=0) return reg;
804       }
805     }
806   }
807   return hr;
808 }
809
810
811 // Allocate every register, preserving source/target regs
812 void alloc_all(struct regstat *cur,int i)
813 {
814   int hr;
815   
816   for(hr=0;hr<HOST_REGS;hr++) {
817     if(hr!=EXCLUDE_REG) {
818       if(((cur->regmap[hr]&63)!=rs1[i])&&((cur->regmap[hr]&63)!=rs2[i])&&
819          ((cur->regmap[hr]&63)!=rt1[i])&&((cur->regmap[hr]&63)!=rt2[i]))
820       {
821         cur->regmap[hr]=-1;
822         cur->dirty&=~(1<<hr);
823       }
824       // Don't need zeros
825       if((cur->regmap[hr]&63)==0)
826       {
827         cur->regmap[hr]=-1;
828         cur->dirty&=~(1<<hr);
829       }
830     }
831   }
832 }
833
834
835 void div64(int64_t dividend,int64_t divisor)
836 {
837   lo=dividend/divisor;
838   hi=dividend%divisor;
839   //printf("TRACE: ddiv %8x%8x %8x%8x\n" ,(int)reg[HIREG],(int)(reg[HIREG]>>32)
840   //                                     ,(int)reg[LOREG],(int)(reg[LOREG]>>32));
841 }
842 void divu64(uint64_t dividend,uint64_t divisor)
843 {
844   lo=dividend/divisor;
845   hi=dividend%divisor;
846   //printf("TRACE: ddivu %8x%8x %8x%8x\n",(int)reg[HIREG],(int)(reg[HIREG]>>32)
847   //                                     ,(int)reg[LOREG],(int)(reg[LOREG]>>32));
848 }
849
850 void mult64(uint64_t m1,uint64_t m2)
851 {
852    unsigned long long int op1, op2, op3, op4;
853    unsigned long long int result1, result2, result3, result4;
854    unsigned long long int temp1, temp2, temp3, temp4;
855    int sign = 0;
856    
857    if (m1 < 0)
858      {
859     op2 = -m1;
860     sign = 1 - sign;
861      }
862    else op2 = m1;
863    if (m2 < 0)
864      {
865     op4 = -m2;
866     sign = 1 - sign;
867      }
868    else op4 = m2;
869    
870    op1 = op2 & 0xFFFFFFFF;
871    op2 = (op2 >> 32) & 0xFFFFFFFF;
872    op3 = op4 & 0xFFFFFFFF;
873    op4 = (op4 >> 32) & 0xFFFFFFFF;
874    
875    temp1 = op1 * op3;
876    temp2 = (temp1 >> 32) + op1 * op4;
877    temp3 = op2 * op3;
878    temp4 = (temp3 >> 32) + op2 * op4;
879    
880    result1 = temp1 & 0xFFFFFFFF;
881    result2 = temp2 + (temp3 & 0xFFFFFFFF);
882    result3 = (result2 >> 32) + temp4;
883    result4 = (result3 >> 32);
884    
885    lo = result1 | (result2 << 32);
886    hi = (result3 & 0xFFFFFFFF) | (result4 << 32);
887    if (sign)
888      {
889     hi = ~hi;
890     if (!lo) hi++;
891     else lo = ~lo + 1;
892      }
893 }
894
895 void multu64(uint64_t m1,uint64_t m2)
896 {
897    unsigned long long int op1, op2, op3, op4;
898    unsigned long long int result1, result2, result3, result4;
899    unsigned long long int temp1, temp2, temp3, temp4;
900    
901    op1 = m1 & 0xFFFFFFFF;
902    op2 = (m1 >> 32) & 0xFFFFFFFF;
903    op3 = m2 & 0xFFFFFFFF;
904    op4 = (m2 >> 32) & 0xFFFFFFFF;
905    
906    temp1 = op1 * op3;
907    temp2 = (temp1 >> 32) + op1 * op4;
908    temp3 = op2 * op3;
909    temp4 = (temp3 >> 32) + op2 * op4;
910    
911    result1 = temp1 & 0xFFFFFFFF;
912    result2 = temp2 + (temp3 & 0xFFFFFFFF);
913    result3 = (result2 >> 32) + temp4;
914    result4 = (result3 >> 32);
915    
916    lo = result1 | (result2 << 32);
917    hi = (result3 & 0xFFFFFFFF) | (result4 << 32);
918    
919   //printf("TRACE: dmultu %8x%8x %8x%8x\n",(int)reg[HIREG],(int)(reg[HIREG]>>32)
920   //                                      ,(int)reg[LOREG],(int)(reg[LOREG]>>32));
921 }
922
923 uint64_t ldl_merge(uint64_t original,uint64_t loaded,u_int bits)
924 {
925   if(bits) {
926     original<<=64-bits;
927     original>>=64-bits;
928     loaded<<=bits;
929     original|=loaded;
930   }
931   else original=loaded;
932   return original;
933 }
934 uint64_t ldr_merge(uint64_t original,uint64_t loaded,u_int bits)
935 {
936   if(bits^56) {
937     original>>=64-(bits^56);
938     original<<=64-(bits^56);
939     loaded>>=bits^56;
940     original|=loaded;
941   }
942   else original=loaded;
943   return original;
944 }
945
946 #ifdef __i386__
947 #include "assem_x86.c"
948 #endif
949 #ifdef __x86_64__
950 #include "assem_x64.c"
951 #endif
952 #ifdef __arm__
953 #include "assem_arm.c"
954 #endif
955
956 // Add virtual address mapping to linked list
957 void ll_add(struct ll_entry **head,int vaddr,void *addr)
958 {
959   struct ll_entry *new_entry;
960   new_entry=malloc(sizeof(struct ll_entry));
961   assert(new_entry!=NULL);
962   new_entry->vaddr=vaddr;
963   new_entry->reg32=0;
964   new_entry->addr=addr;
965   new_entry->next=*head;
966   *head=new_entry;
967 }
968
969 // Add virtual address mapping for 32-bit compiled block
970 void ll_add_32(struct ll_entry **head,int vaddr,u_int reg32,void *addr)
971 {
972   ll_add(head,vaddr,addr);
973 #ifndef FORCE32
974   (*head)->reg32=reg32;
975 #endif
976 }
977
978 // Check if an address is already compiled
979 // but don't return addresses which are about to expire from the cache
980 void *check_addr(u_int vaddr)
981 {
982   u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
983   if(ht_bin[0]==vaddr) {
984     if(((ht_bin[1]-MAX_OUTPUT_BLOCK_SIZE-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
985       if(isclean(ht_bin[1])) return (void *)ht_bin[1];
986   }
987   if(ht_bin[2]==vaddr) {
988     if(((ht_bin[3]-MAX_OUTPUT_BLOCK_SIZE-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
989       if(isclean(ht_bin[3])) return (void *)ht_bin[3];
990   }
991   u_int page=get_page(vaddr);
992   struct ll_entry *head;
993   head=jump_in[page];
994   while(head!=NULL) {
995     if(head->vaddr==vaddr&&head->reg32==0) {
996       if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) {
997         // Update existing entry with current address
998         if(ht_bin[0]==vaddr) {
999           ht_bin[1]=(int)head->addr;
1000           return head->addr;
1001         }
1002         if(ht_bin[2]==vaddr) {
1003           ht_bin[3]=(int)head->addr;
1004           return head->addr;
1005         }
1006         // Insert into hash table with low priority.
1007         // Don't evict existing entries, as they are probably
1008         // addresses that are being accessed frequently.
1009         if(ht_bin[0]==-1) {
1010           ht_bin[1]=(int)head->addr;
1011           ht_bin[0]=vaddr;
1012         }else if(ht_bin[2]==-1) {
1013           ht_bin[3]=(int)head->addr;
1014           ht_bin[2]=vaddr;
1015         }
1016         return head->addr;
1017       }
1018     }
1019     head=head->next;
1020   }
1021   return 0;
1022 }
1023
1024 void remove_hash(int vaddr)
1025 {
1026   //printf("remove hash: %x\n",vaddr);
1027   int *ht_bin=hash_table[(((vaddr)>>16)^vaddr)&0xFFFF];
1028   if(ht_bin[2]==vaddr) {
1029     ht_bin[2]=ht_bin[3]=-1;
1030   }
1031   if(ht_bin[0]==vaddr) {
1032     ht_bin[0]=ht_bin[2];
1033     ht_bin[1]=ht_bin[3];
1034     ht_bin[2]=ht_bin[3]=-1;
1035   }
1036 }
1037
1038 void ll_remove_matching_addrs(struct ll_entry **head,int addr,int shift)
1039 {
1040   struct ll_entry *next;
1041   while(*head) {
1042     if(((u_int)((*head)->addr)>>shift)==(addr>>shift) || 
1043        ((u_int)((*head)->addr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift))
1044     {
1045       inv_debug("EXP: Remove pointer to %x (%x)\n",(int)(*head)->addr,(*head)->vaddr);
1046       remove_hash((*head)->vaddr);
1047       next=(*head)->next;
1048       free(*head);
1049       *head=next;
1050     }
1051     else
1052     {
1053       head=&((*head)->next);
1054     }
1055   }
1056 }
1057
1058 // Remove all entries from linked list
1059 void ll_clear(struct ll_entry **head)
1060 {
1061   struct ll_entry *cur;
1062   struct ll_entry *next;
1063   if(cur=*head) {
1064     *head=0;
1065     while(cur) {
1066       next=cur->next;
1067       free(cur);
1068       cur=next;
1069     }
1070   }
1071 }
1072
1073 // Dereference the pointers and remove if it matches
1074 void ll_kill_pointers(struct ll_entry *head,int addr,int shift)
1075 {
1076   while(head) {
1077     int ptr=get_pointer(head->addr);
1078     inv_debug("EXP: Lookup pointer to %x at %x (%x)\n",(int)ptr,(int)head->addr,head->vaddr);
1079     if(((ptr>>shift)==(addr>>shift)) ||
1080        (((ptr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift)))
1081     {
1082       inv_debug("EXP: Kill pointer at %x (%x)\n",(int)head->addr,head->vaddr);
1083       kill_pointer(head->addr);
1084     }
1085     head=head->next;
1086   }
1087 }
1088
1089 // This is called when we write to a compiled block (see do_invstub)
1090 int invalidate_page(u_int page)
1091 {
1092   int modified=0;
1093   struct ll_entry *head;
1094   struct ll_entry *next;
1095   head=jump_in[page];
1096   jump_in[page]=0;
1097   while(head!=NULL) {
1098     inv_debug("INVALIDATE: %x\n",head->vaddr);
1099     remove_hash(head->vaddr);
1100     next=head->next;
1101     free(head);
1102     head=next;
1103   }
1104   head=jump_out[page];
1105   jump_out[page]=0;
1106   while(head!=NULL) {
1107     inv_debug("INVALIDATE: kill pointer to %x (%x)\n",head->vaddr,(int)head->addr);
1108     kill_pointer(head->addr);
1109     modified=1;
1110     next=head->next;
1111     free(head);
1112     head=next;
1113   }
1114   return modified;
1115 }
1116 void invalidate_block(u_int block)
1117 {
1118   int modified;
1119   u_int page=get_page(block<<12);
1120   u_int vpage=get_vpage(block<<12);
1121   inv_debug("INVALIDATE: %x (%d)\n",block<<12,page);
1122   //inv_debug("invalid_code[block]=%d\n",invalid_code[block]);
1123   u_int first,last;
1124   first=last=page;
1125   struct ll_entry *head;
1126   head=jump_dirty[vpage];
1127   //printf("page=%d vpage=%d\n",page,vpage);
1128   while(head!=NULL) {
1129     u_int start,end;
1130     if(vpage>2047||(head->vaddr>>12)==block) { // Ignore vaddr hash collision
1131       get_bounds((int)head->addr,&start,&end);
1132       //printf("start: %x end: %x\n",start,end);
1133       if(page<2048&&start>=0x80000000&&end<0x80800000) {
1134         if(((start-(u_int)rdram)>>12)<=page&&((end-1-(u_int)rdram)>>12)>=page) {
1135           if((((start-(u_int)rdram)>>12)&2047)<first) first=((start-(u_int)rdram)>>12)&2047;
1136           if((((end-1-(u_int)rdram)>>12)&2047)>last) last=((end-1-(u_int)rdram)>>12)&2047;
1137         }
1138       }
1139 #ifndef DISABLE_TLB
1140       if(page<2048&&(signed int)start>=(signed int)0xC0000000&&(signed int)end>=(signed int)0xC0000000) {
1141         if(((start+memory_map[start>>12]-(u_int)rdram)>>12)<=page&&((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)>=page) {
1142           if((((start+memory_map[start>>12]-(u_int)rdram)>>12)&2047)<first) first=((start+memory_map[start>>12]-(u_int)rdram)>>12)&2047;
1143           if((((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047)>last) last=((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047;
1144         }
1145       }
1146 #endif
1147     }
1148     head=head->next;
1149   }
1150   //printf("first=%d last=%d\n",first,last);
1151   modified=invalidate_page(page);
1152   assert(first+5>page); // NB: this assumes MAXBLOCK<=4096 (4 pages)
1153   assert(last<page+5);
1154   // Invalidate the adjacent pages if a block crosses a 4K boundary
1155   while(first<page) {
1156     invalidate_page(first);
1157     first++;
1158   }
1159   for(first=page+1;first<last;first++) {
1160     invalidate_page(first);
1161   }
1162   
1163   // Don't trap writes
1164   invalid_code[block]=1;
1165 #ifndef DISABLE_TLB
1166   // If there is a valid TLB entry for this page, remove write protect
1167   if(tlb_LUT_w[block]) {
1168     assert(tlb_LUT_r[block]==tlb_LUT_w[block]);
1169     // CHECK: Is this right?
1170     memory_map[block]=((tlb_LUT_w[block]&0xFFFFF000)-(block<<12)+(unsigned int)rdram-0x80000000)>>2;
1171     u_int real_block=tlb_LUT_w[block]>>12;
1172     invalid_code[real_block]=1;
1173     if(real_block>=0x80000&&real_block<0x80800) memory_map[real_block]=((u_int)rdram-0x80000000)>>2;
1174   }
1175   else if(block>=0x80000&&block<0x80800) memory_map[block]=((u_int)rdram-0x80000000)>>2;
1176 #endif
1177   #ifdef __arm__
1178   if(modified)
1179     __clear_cache((void *)BASE_ADDR,(void *)BASE_ADDR+(1<<TARGET_SIZE_2));
1180   #endif
1181   #ifdef USE_MINI_HT
1182   memset(mini_ht,-1,sizeof(mini_ht));
1183   #endif
1184 }
1185 void invalidate_addr(u_int addr)
1186 {
1187   invalidate_block(addr>>12);
1188 }
1189 void invalidate_all_pages()
1190 {
1191   u_int page,n;
1192   for(page=0;page<4096;page++)
1193     invalidate_page(page);
1194   for(page=0;page<1048576;page++)
1195     if(!invalid_code[page]) {
1196       restore_candidate[(page&2047)>>3]|=1<<(page&7);
1197       restore_candidate[((page&2047)>>3)+256]|=1<<(page&7);
1198     }
1199   #ifdef __arm__
1200   __clear_cache((void *)BASE_ADDR,(void *)BASE_ADDR+(1<<TARGET_SIZE_2));
1201   #endif
1202   #ifdef USE_MINI_HT
1203   memset(mini_ht,-1,sizeof(mini_ht));
1204   #endif
1205   #ifndef DISABLE_TLB
1206   // TLB
1207   for(page=0;page<0x100000;page++) {
1208     if(tlb_LUT_r[page]) {
1209       memory_map[page]=((tlb_LUT_r[page]&0xFFFFF000)-(page<<12)+(unsigned int)rdram-0x80000000)>>2;
1210       if(!tlb_LUT_w[page]||!invalid_code[page])
1211         memory_map[page]|=0x40000000; // Write protect
1212     }
1213     else memory_map[page]=-1;
1214     if(page==0x80000) page=0xC0000;
1215   }
1216   tlb_hacks();
1217   #endif
1218 }
1219
1220 // Add an entry to jump_out after making a link
1221 void add_link(u_int vaddr,void *src)
1222 {
1223   u_int page=get_page(vaddr);
1224   inv_debug("add_link: %x -> %x (%d)\n",(int)src,vaddr,page);
1225   ll_add(jump_out+page,vaddr,src);
1226   //int ptr=get_pointer(src);
1227   //inv_debug("add_link: Pointer is to %x\n",(int)ptr);
1228 }
1229
1230 // If a code block was found to be unmodified (bit was set in
1231 // restore_candidate) and it remains unmodified (bit is clear
1232 // in invalid_code) then move the entries for that 4K page from
1233 // the dirty list to the clean list.
1234 void clean_blocks(u_int page)
1235 {
1236   struct ll_entry *head;
1237   inv_debug("INV: clean_blocks page=%d\n",page);
1238   head=jump_dirty[page];
1239   while(head!=NULL) {
1240     if(!invalid_code[head->vaddr>>12]) {
1241       // Don't restore blocks which are about to expire from the cache
1242       if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) {
1243         u_int start,end;
1244         if(verify_dirty((int)head->addr)) {
1245           //printf("Possibly Restore %x (%x)\n",head->vaddr, (int)head->addr);
1246           u_int i;
1247           u_int inv=0;
1248           get_bounds((int)head->addr,&start,&end);
1249           if(start-(u_int)rdram<0x800000) {
1250             for(i=(start-(u_int)rdram+0x80000000)>>12;i<=(end-1-(u_int)rdram+0x80000000)>>12;i++) {
1251               inv|=invalid_code[i];
1252             }
1253           }
1254           if((signed int)head->vaddr>=(signed int)0xC0000000) {
1255             u_int addr = (head->vaddr+(memory_map[head->vaddr>>12]<<2));
1256             //printf("addr=%x start=%x end=%x\n",addr,start,end);
1257             if(addr<start||addr>=end) inv=1;
1258           }
1259           else if((signed int)head->vaddr>=(signed int)0x80800000) {
1260             inv=1;
1261           }
1262           if(!inv) {
1263             void * clean_addr=(void *)get_clean_addr((int)head->addr);
1264             if((((u_int)clean_addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) {
1265               u_int ppage=page;
1266 #ifndef DISABLE_TLB
1267               if(page<2048&&tlb_LUT_r[head->vaddr>>12]) ppage=(tlb_LUT_r[head->vaddr>>12]^0x80000000)>>12;
1268 #endif
1269               inv_debug("INV: Restored %x (%x/%x)\n",head->vaddr, (int)head->addr, (int)clean_addr);
1270               //printf("page=%x, addr=%x\n",page,head->vaddr);
1271               //assert(head->vaddr>>12==(page|0x80000));
1272               ll_add_32(jump_in+ppage,head->vaddr,head->reg32,clean_addr);
1273               int *ht_bin=hash_table[((head->vaddr>>16)^head->vaddr)&0xFFFF];
1274               if(!head->reg32) {
1275                 if(ht_bin[0]==head->vaddr) {
1276                   ht_bin[1]=(int)clean_addr; // Replace existing entry
1277                 }
1278                 if(ht_bin[2]==head->vaddr) {
1279                   ht_bin[3]=(int)clean_addr; // Replace existing entry
1280                 }
1281               }
1282             }
1283           }
1284         }
1285       }
1286     }
1287     head=head->next;
1288   }
1289 }
1290
1291
1292 void mov_alloc(struct regstat *current,int i)
1293 {
1294   // Note: Don't need to actually alloc the source registers
1295   if((~current->is32>>rs1[i])&1) {
1296     //alloc_reg64(current,i,rs1[i]);
1297     alloc_reg64(current,i,rt1[i]);
1298     current->is32&=~(1LL<<rt1[i]);
1299   } else {
1300     //alloc_reg(current,i,rs1[i]);
1301     alloc_reg(current,i,rt1[i]);
1302     current->is32|=(1LL<<rt1[i]);
1303   }
1304   clear_const(current,rs1[i]);
1305   clear_const(current,rt1[i]);
1306   dirty_reg(current,rt1[i]);
1307 }
1308
1309 void shiftimm_alloc(struct regstat *current,int i)
1310 {
1311   clear_const(current,rs1[i]);
1312   clear_const(current,rt1[i]);
1313   if(opcode2[i]<=0x3) // SLL/SRL/SRA
1314   {
1315     if(rt1[i]) {
1316       if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1317       else lt1[i]=rs1[i];
1318       alloc_reg(current,i,rt1[i]);
1319       current->is32|=1LL<<rt1[i];
1320       dirty_reg(current,rt1[i]);
1321     }
1322   }
1323   if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA
1324   {
1325     if(rt1[i]) {
1326       if(rs1[i]) alloc_reg64(current,i,rs1[i]);
1327       alloc_reg64(current,i,rt1[i]);
1328       current->is32&=~(1LL<<rt1[i]);
1329       dirty_reg(current,rt1[i]);
1330     }
1331   }
1332   if(opcode2[i]==0x3c) // DSLL32
1333   {
1334     if(rt1[i]) {
1335       if(rs1[i]) alloc_reg(current,i,rs1[i]);
1336       alloc_reg64(current,i,rt1[i]);
1337       current->is32&=~(1LL<<rt1[i]);
1338       dirty_reg(current,rt1[i]);
1339     }
1340   }
1341   if(opcode2[i]==0x3e) // DSRL32
1342   {
1343     if(rt1[i]) {
1344       alloc_reg64(current,i,rs1[i]);
1345       if(imm[i]==32) {
1346         alloc_reg64(current,i,rt1[i]);
1347         current->is32&=~(1LL<<rt1[i]);
1348       } else {
1349         alloc_reg(current,i,rt1[i]);
1350         current->is32|=1LL<<rt1[i];
1351       }
1352       dirty_reg(current,rt1[i]);
1353     }
1354   }
1355   if(opcode2[i]==0x3f) // DSRA32
1356   {
1357     if(rt1[i]) {
1358       alloc_reg64(current,i,rs1[i]);
1359       alloc_reg(current,i,rt1[i]);
1360       current->is32|=1LL<<rt1[i];
1361       dirty_reg(current,rt1[i]);
1362     }
1363   }
1364 }
1365
1366 void shift_alloc(struct regstat *current,int i)
1367 {
1368   if(rt1[i]) {
1369     if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
1370     {
1371       if(rs1[i]) alloc_reg(current,i,rs1[i]);
1372       if(rs2[i]) alloc_reg(current,i,rs2[i]);
1373       alloc_reg(current,i,rt1[i]);
1374       if(rt1[i]==rs2[i]) alloc_reg_temp(current,i,-1);
1375       current->is32|=1LL<<rt1[i];
1376     } else { // DSLLV/DSRLV/DSRAV
1377       if(rs1[i]) alloc_reg64(current,i,rs1[i]);
1378       if(rs2[i]) alloc_reg(current,i,rs2[i]);
1379       alloc_reg64(current,i,rt1[i]);
1380       current->is32&=~(1LL<<rt1[i]);
1381       if(opcode2[i]==0x16||opcode2[i]==0x17) // DSRLV and DSRAV need a temporary register
1382         alloc_reg_temp(current,i,-1);
1383     }
1384     clear_const(current,rs1[i]);
1385     clear_const(current,rs2[i]);
1386     clear_const(current,rt1[i]);
1387     dirty_reg(current,rt1[i]);
1388   }
1389 }
1390
1391 void alu_alloc(struct regstat *current,int i)
1392 {
1393   if(opcode2[i]>=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU
1394     if(rt1[i]) {
1395       if(rs1[i]&&rs2[i]) {
1396         alloc_reg(current,i,rs1[i]);
1397         alloc_reg(current,i,rs2[i]);
1398       }
1399       else {
1400         if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1401         if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg(current,i,rs2[i]);
1402       }
1403       alloc_reg(current,i,rt1[i]);
1404     }
1405     current->is32|=1LL<<rt1[i];
1406   }
1407   if(opcode2[i]==0x2a||opcode2[i]==0x2b) { // SLT/SLTU
1408     if(rt1[i]) {
1409       if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1))
1410       {
1411         alloc_reg64(current,i,rs1[i]);
1412         alloc_reg64(current,i,rs2[i]);
1413         alloc_reg(current,i,rt1[i]);
1414       } else {
1415         alloc_reg(current,i,rs1[i]);
1416         alloc_reg(current,i,rs2[i]);
1417         alloc_reg(current,i,rt1[i]);
1418       }
1419     }
1420     current->is32|=1LL<<rt1[i];
1421   }
1422   if(opcode2[i]>=0x24&&opcode2[i]<=0x27) { // AND/OR/XOR/NOR
1423     if(rt1[i]) {
1424       if(rs1[i]&&rs2[i]) {
1425         alloc_reg(current,i,rs1[i]);
1426         alloc_reg(current,i,rs2[i]);
1427       }
1428       else
1429       {
1430         if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1431         if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg(current,i,rs2[i]);
1432       }
1433       alloc_reg(current,i,rt1[i]);
1434       if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1))
1435       {
1436         if(!((current->uu>>rt1[i])&1)) {
1437           alloc_reg64(current,i,rt1[i]);
1438         }
1439         if(get_reg(current->regmap,rt1[i]|64)>=0) {
1440           if(rs1[i]&&rs2[i]) {
1441             alloc_reg64(current,i,rs1[i]);
1442             alloc_reg64(current,i,rs2[i]);
1443           }
1444           else
1445           {
1446             // Is is really worth it to keep 64-bit values in registers?
1447             #ifdef NATIVE_64BIT
1448             if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg64(current,i,rs1[i]);
1449             if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg64(current,i,rs2[i]);
1450             #endif
1451           }
1452         }
1453         current->is32&=~(1LL<<rt1[i]);
1454       } else {
1455         current->is32|=1LL<<rt1[i];
1456       }
1457     }
1458   }
1459   if(opcode2[i]>=0x2c&&opcode2[i]<=0x2f) { // DADD/DADDU/DSUB/DSUBU
1460     if(rt1[i]) {
1461       if(rs1[i]&&rs2[i]) {
1462         if(!((current->uu>>rt1[i])&1)||get_reg(current->regmap,rt1[i]|64)>=0) {
1463           alloc_reg64(current,i,rs1[i]);
1464           alloc_reg64(current,i,rs2[i]);
1465           alloc_reg64(current,i,rt1[i]);
1466         } else {
1467           alloc_reg(current,i,rs1[i]);
1468           alloc_reg(current,i,rs2[i]);
1469           alloc_reg(current,i,rt1[i]);
1470         }
1471       }
1472       else {
1473         alloc_reg(current,i,rt1[i]);
1474         if(!((current->uu>>rt1[i])&1)||get_reg(current->regmap,rt1[i]|64)>=0) {
1475           // DADD used as move, or zeroing
1476           // If we have a 64-bit source, then make the target 64 bits too
1477           if(rs1[i]&&!((current->is32>>rs1[i])&1)) {
1478             if(get_reg(current->regmap,rs1[i])>=0) alloc_reg64(current,i,rs1[i]);
1479             alloc_reg64(current,i,rt1[i]);
1480           } else if(rs2[i]&&!((current->is32>>rs2[i])&1)) {
1481             if(get_reg(current->regmap,rs2[i])>=0) alloc_reg64(current,i,rs2[i]);
1482             alloc_reg64(current,i,rt1[i]);
1483           }
1484           if(opcode2[i]>=0x2e&&rs2[i]) {
1485             // DSUB used as negation - 64-bit result
1486             // If we have a 32-bit register, extend it to 64 bits
1487             if(get_reg(current->regmap,rs2[i])>=0) alloc_reg64(current,i,rs2[i]);
1488             alloc_reg64(current,i,rt1[i]);
1489           }
1490         }
1491       }
1492       if(rs1[i]&&rs2[i]) {
1493         current->is32&=~(1LL<<rt1[i]);
1494       } else if(rs1[i]) {
1495         current->is32&=~(1LL<<rt1[i]);
1496         if((current->is32>>rs1[i])&1)
1497           current->is32|=1LL<<rt1[i];
1498       } else if(rs2[i]) {
1499         current->is32&=~(1LL<<rt1[i]);
1500         if((current->is32>>rs2[i])&1)
1501           current->is32|=1LL<<rt1[i];
1502       } else {
1503         current->is32|=1LL<<rt1[i];
1504       }
1505     }
1506   }
1507   clear_const(current,rs1[i]);
1508   clear_const(current,rs2[i]);
1509   clear_const(current,rt1[i]);
1510   dirty_reg(current,rt1[i]);
1511 }
1512
1513 void imm16_alloc(struct regstat *current,int i)
1514 {
1515   if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1516   else lt1[i]=rs1[i];
1517   if(rt1[i]) alloc_reg(current,i,rt1[i]);
1518   if(opcode[i]==0x18||opcode[i]==0x19) { // DADDI/DADDIU
1519     current->is32&=~(1LL<<rt1[i]);
1520     if(!((current->uu>>rt1[i])&1)||get_reg(current->regmap,rt1[i]|64)>=0) {
1521       // TODO: Could preserve the 32-bit flag if the immediate is zero
1522       alloc_reg64(current,i,rt1[i]);
1523       alloc_reg64(current,i,rs1[i]);
1524     }
1525     clear_const(current,rs1[i]);
1526     clear_const(current,rt1[i]);
1527   }
1528   else if(opcode[i]==0x0a||opcode[i]==0x0b) { // SLTI/SLTIU
1529     if((~current->is32>>rs1[i])&1) alloc_reg64(current,i,rs1[i]);
1530     current->is32|=1LL<<rt1[i];
1531     clear_const(current,rs1[i]);
1532     clear_const(current,rt1[i]);
1533   }
1534   else if(opcode[i]>=0x0c&&opcode[i]<=0x0e) { // ANDI/ORI/XORI
1535     if(((~current->is32>>rs1[i])&1)&&opcode[i]>0x0c) {
1536       if(rs1[i]!=rt1[i]) {
1537         if(needed_again(rs1[i],i)) alloc_reg64(current,i,rs1[i]);
1538         alloc_reg64(current,i,rt1[i]);
1539         current->is32&=~(1LL<<rt1[i]);
1540       }
1541     }
1542     else current->is32|=1LL<<rt1[i]; // ANDI clears upper bits
1543     if(is_const(current,rs1[i])) {
1544       int v=get_const(current,rs1[i]);
1545       if(opcode[i]==0x0c) set_const(current,rt1[i],v&imm[i]);
1546       if(opcode[i]==0x0d) set_const(current,rt1[i],v|imm[i]);
1547       if(opcode[i]==0x0e) set_const(current,rt1[i],v^imm[i]);
1548     }
1549     else clear_const(current,rt1[i]);
1550   }
1551   else if(opcode[i]==0x08||opcode[i]==0x09) { // ADDI/ADDIU
1552     if(is_const(current,rs1[i])) {
1553       int v=get_const(current,rs1[i]);
1554       set_const(current,rt1[i],v+imm[i]);
1555     }
1556     else clear_const(current,rt1[i]);
1557     current->is32|=1LL<<rt1[i];
1558   }
1559   else {
1560     set_const(current,rt1[i],((long long)((short)imm[i]))<<16); // LUI
1561     current->is32|=1LL<<rt1[i];
1562   }
1563   dirty_reg(current,rt1[i]);
1564 }
1565
1566 void load_alloc(struct regstat *current,int i)
1567 {
1568   clear_const(current,rt1[i]);
1569   //if(rs1[i]!=rt1[i]&&needed_again(rs1[i],i)) clear_const(current,rs1[i]); // Does this help or hurt?
1570   if(!rs1[i]) current->u&=~1LL; // Allow allocating r0 if it's the source register
1571   if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1572   if(rt1[i]) {
1573     alloc_reg(current,i,rt1[i]);
1574     if(opcode[i]==0x27||opcode[i]==0x37) // LWU/LD
1575     {
1576       current->is32&=~(1LL<<rt1[i]);
1577       alloc_reg64(current,i,rt1[i]);
1578     }
1579     else if(opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR
1580     {
1581       current->is32&=~(1LL<<rt1[i]);
1582       alloc_reg64(current,i,rt1[i]);
1583       alloc_all(current,i);
1584       alloc_reg64(current,i,FTEMP);
1585     }
1586     else current->is32|=1LL<<rt1[i];
1587     dirty_reg(current,rt1[i]);
1588     // If using TLB, need a register for pointer to the mapping table
1589     if(using_tlb) alloc_reg(current,i,TLREG);
1590     // LWL/LWR need a temporary register for the old value
1591     if(opcode[i]==0x22||opcode[i]==0x26)
1592     {
1593       alloc_reg(current,i,FTEMP);
1594       alloc_reg_temp(current,i,-1);
1595     }
1596   }
1597   else
1598   {
1599     // Load to r0 (dummy load)
1600     // but we still need a register to calculate the address
1601     alloc_reg_temp(current,i,-1);
1602   }
1603 }
1604
1605 void store_alloc(struct regstat *current,int i)
1606 {
1607   clear_const(current,rs2[i]);
1608   if(!(rs2[i])) current->u&=~1LL; // Allow allocating r0 if necessary
1609   if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1610   alloc_reg(current,i,rs2[i]);
1611   if(opcode[i]==0x2c||opcode[i]==0x2d||opcode[i]==0x3f) { // 64-bit SDL/SDR/SD
1612     alloc_reg64(current,i,rs2[i]);
1613     if(rs2[i]) alloc_reg(current,i,FTEMP);
1614   }
1615   // If using TLB, need a register for pointer to the mapping table
1616   if(using_tlb) alloc_reg(current,i,TLREG);
1617   #if defined(HOST_IMM8)
1618   // On CPUs without 32-bit immediates we need a pointer to invalid_code
1619   else alloc_reg(current,i,INVCP);
1620   #endif
1621   if(opcode[i]==0x2c||opcode[i]==0x2d) { // 64-bit SDL/SDR
1622     alloc_reg(current,i,FTEMP);
1623   }
1624   // We need a temporary register for address generation
1625   alloc_reg_temp(current,i,-1);
1626 }
1627
1628 void c1ls_alloc(struct regstat *current,int i)
1629 {
1630   //clear_const(current,rs1[i]); // FIXME
1631   clear_const(current,rt1[i]);
1632   if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1633   alloc_reg(current,i,CSREG); // Status
1634   alloc_reg(current,i,FTEMP);
1635   if(opcode[i]==0x35||opcode[i]==0x3d) { // 64-bit LDC1/SDC1
1636     alloc_reg64(current,i,FTEMP);
1637   }
1638   // If using TLB, need a register for pointer to the mapping table
1639   if(using_tlb) alloc_reg(current,i,TLREG);
1640   #if defined(HOST_IMM8)
1641   // On CPUs without 32-bit immediates we need a pointer to invalid_code
1642   else if((opcode[i]&0x3b)==0x39) // SWC1/SDC1
1643     alloc_reg(current,i,INVCP);
1644   #endif
1645   // We need a temporary register for address generation
1646   alloc_reg_temp(current,i,-1);
1647 }
1648
1649 #ifndef multdiv_alloc
1650 void multdiv_alloc(struct regstat *current,int i)
1651 {
1652   //  case 0x18: MULT
1653   //  case 0x19: MULTU
1654   //  case 0x1A: DIV
1655   //  case 0x1B: DIVU
1656   //  case 0x1C: DMULT
1657   //  case 0x1D: DMULTU
1658   //  case 0x1E: DDIV
1659   //  case 0x1F: DDIVU
1660   clear_const(current,rs1[i]);
1661   clear_const(current,rs2[i]);
1662   if(rs1[i]&&rs2[i])
1663   {
1664     if((opcode2[i]&4)==0) // 32-bit
1665     {
1666       current->u&=~(1LL<<HIREG);
1667       current->u&=~(1LL<<LOREG);
1668       alloc_reg(current,i,HIREG);
1669       alloc_reg(current,i,LOREG);
1670       alloc_reg(current,i,rs1[i]);
1671       alloc_reg(current,i,rs2[i]);
1672       current->is32|=1LL<<HIREG;
1673       current->is32|=1LL<<LOREG;
1674       dirty_reg(current,HIREG);
1675       dirty_reg(current,LOREG);
1676     }
1677     else // 64-bit
1678     {
1679       current->u&=~(1LL<<HIREG);
1680       current->u&=~(1LL<<LOREG);
1681       current->uu&=~(1LL<<HIREG);
1682       current->uu&=~(1LL<<LOREG);
1683       alloc_reg64(current,i,HIREG);
1684       //if(HOST_REGS>10) alloc_reg64(current,i,LOREG);
1685       alloc_reg64(current,i,rs1[i]);
1686       alloc_reg64(current,i,rs2[i]);
1687       alloc_all(current,i);
1688       current->is32&=~(1LL<<HIREG);
1689       current->is32&=~(1LL<<LOREG);
1690       dirty_reg(current,HIREG);
1691       dirty_reg(current,LOREG);
1692     }
1693   }
1694   else
1695   {
1696     // Multiply by zero is zero.
1697     // MIPS does not have a divide by zero exception.
1698     // The result is undefined, we return zero.
1699     alloc_reg(current,i,HIREG);
1700     alloc_reg(current,i,LOREG);
1701     current->is32|=1LL<<HIREG;
1702     current->is32|=1LL<<LOREG;
1703     dirty_reg(current,HIREG);
1704     dirty_reg(current,LOREG);
1705   }
1706 }
1707 #endif
1708
1709 void cop0_alloc(struct regstat *current,int i)
1710 {
1711   if(opcode2[i]==0) // MFC0
1712   {
1713     if(rt1[i]) {
1714       clear_const(current,rt1[i]);
1715       alloc_all(current,i);
1716       alloc_reg(current,i,rt1[i]);
1717       current->is32|=1LL<<rt1[i];
1718       dirty_reg(current,rt1[i]);
1719     }
1720   }
1721   else if(opcode2[i]==4) // MTC0
1722   {
1723     if(rs1[i]){
1724       clear_const(current,rs1[i]);
1725       alloc_reg(current,i,rs1[i]);
1726       alloc_all(current,i);
1727     }
1728     else {
1729       alloc_all(current,i); // FIXME: Keep r0
1730       current->u&=~1LL;
1731       alloc_reg(current,i,0);
1732     }
1733   }
1734   else
1735   {
1736     // TLBR/TLBWI/TLBWR/TLBP/ERET
1737     assert(opcode2[i]==0x10);
1738     alloc_all(current,i);
1739   }
1740 }
1741
1742 void cop1_alloc(struct regstat *current,int i)
1743 {
1744   alloc_reg(current,i,CSREG); // Load status
1745   if(opcode2[i]<3) // MFC1/DMFC1/CFC1
1746   {
1747     assert(rt1[i]);
1748     clear_const(current,rt1[i]);
1749     if(opcode2[i]==1) {
1750       alloc_reg64(current,i,rt1[i]); // DMFC1
1751       current->is32&=~(1LL<<rt1[i]);
1752     }else{
1753       alloc_reg(current,i,rt1[i]); // MFC1/CFC1
1754       current->is32|=1LL<<rt1[i];
1755     }
1756     dirty_reg(current,rt1[i]);
1757     alloc_reg_temp(current,i,-1);
1758   }
1759   else if(opcode2[i]>3) // MTC1/DMTC1/CTC1
1760   {
1761     if(rs1[i]){
1762       clear_const(current,rs1[i]);
1763       if(opcode2[i]==5)
1764         alloc_reg64(current,i,rs1[i]); // DMTC1
1765       else
1766         alloc_reg(current,i,rs1[i]); // MTC1/CTC1
1767       alloc_reg_temp(current,i,-1);
1768     }
1769     else {
1770       current->u&=~1LL;
1771       alloc_reg(current,i,0);
1772       alloc_reg_temp(current,i,-1);
1773     }
1774   }
1775 }
1776 void fconv_alloc(struct regstat *current,int i)
1777 {
1778   alloc_reg(current,i,CSREG); // Load status
1779   alloc_reg_temp(current,i,-1);
1780 }
1781 void float_alloc(struct regstat *current,int i)
1782 {
1783   alloc_reg(current,i,CSREG); // Load status
1784   alloc_reg_temp(current,i,-1);
1785 }
1786 void fcomp_alloc(struct regstat *current,int i)
1787 {
1788   alloc_reg(current,i,CSREG); // Load status
1789   alloc_reg(current,i,FSREG); // Load flags
1790   dirty_reg(current,FSREG); // Flag will be modified
1791   alloc_reg_temp(current,i,-1);
1792 }
1793
1794 void syscall_alloc(struct regstat *current,int i)
1795 {
1796   alloc_cc(current,i);
1797   dirty_reg(current,CCREG);
1798   alloc_all(current,i);
1799   current->isconst=0;
1800 }
1801
1802 void delayslot_alloc(struct regstat *current,int i)
1803 {
1804   switch(itype[i]) {
1805     case UJUMP:
1806     case CJUMP:
1807     case SJUMP:
1808     case RJUMP:
1809     case FJUMP:
1810     case SYSCALL:
1811     case HLECALL:
1812     case SPAN:
1813       assem_debug("jump in the delay slot.  this shouldn't happen.\n");//exit(1);
1814       printf("Disabled speculative precompilation\n");
1815       stop_after_jal=1;
1816       break;
1817     case IMM16:
1818       imm16_alloc(current,i);
1819       break;
1820     case LOAD:
1821     case LOADLR:
1822       load_alloc(current,i);
1823       break;
1824     case STORE:
1825     case STORELR:
1826       store_alloc(current,i);
1827       break;
1828     case ALU:
1829       alu_alloc(current,i);
1830       break;
1831     case SHIFT:
1832       shift_alloc(current,i);
1833       break;
1834     case MULTDIV:
1835       multdiv_alloc(current,i);
1836       break;
1837     case SHIFTIMM:
1838       shiftimm_alloc(current,i);
1839       break;
1840     case MOV:
1841       mov_alloc(current,i);
1842       break;
1843     case COP0:
1844       cop0_alloc(current,i);
1845       break;
1846     case COP1:
1847       cop1_alloc(current,i);
1848       break;
1849     case C1LS:
1850       c1ls_alloc(current,i);
1851       break;
1852     case FCONV:
1853       fconv_alloc(current,i);
1854       break;
1855     case FLOAT:
1856       float_alloc(current,i);
1857       break;
1858     case FCOMP:
1859       fcomp_alloc(current,i);
1860       break;
1861   }
1862 }
1863
1864 // Special case where a branch and delay slot span two pages in virtual memory
1865 static void pagespan_alloc(struct regstat *current,int i)
1866 {
1867   current->isconst=0;
1868   current->wasconst=0;
1869   regs[i].wasconst=0;
1870   alloc_all(current,i);
1871   alloc_cc(current,i);
1872   dirty_reg(current,CCREG);
1873   if(opcode[i]==3) // JAL
1874   {
1875     alloc_reg(current,i,31);
1876     dirty_reg(current,31);
1877   }
1878   if(opcode[i]==0&&(opcode2[i]&0x3E)==8) // JR/JALR
1879   {
1880     alloc_reg(current,i,rs1[i]);
1881     if (rt1[i]==31) {
1882       alloc_reg(current,i,31);
1883       dirty_reg(current,31);
1884     }
1885   }
1886   if((opcode[i]&0x2E)==4) // BEQ/BNE/BEQL/BNEL
1887   {
1888     if(rs1[i]) alloc_reg(current,i,rs1[i]);
1889     if(rs2[i]) alloc_reg(current,i,rs2[i]);
1890     if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1))
1891     {
1892       if(rs1[i]) alloc_reg64(current,i,rs1[i]);
1893       if(rs2[i]) alloc_reg64(current,i,rs2[i]);
1894     }
1895   }
1896   else
1897   if((opcode[i]&0x2E)==6) // BLEZ/BGTZ/BLEZL/BGTZL
1898   {
1899     if(rs1[i]) alloc_reg(current,i,rs1[i]);
1900     if(!((current->is32>>rs1[i])&1))
1901     {
1902       if(rs1[i]) alloc_reg64(current,i,rs1[i]);
1903     }
1904   }
1905   else
1906   if(opcode[i]==0x11) // BC1
1907   {
1908     alloc_reg(current,i,FSREG);
1909     alloc_reg(current,i,CSREG);
1910   }
1911   //else ...
1912 }
1913
1914 add_stub(int type,int addr,int retaddr,int a,int b,int c,int d,int e)
1915 {
1916   stubs[stubcount][0]=type;
1917   stubs[stubcount][1]=addr;
1918   stubs[stubcount][2]=retaddr;
1919   stubs[stubcount][3]=a;
1920   stubs[stubcount][4]=b;
1921   stubs[stubcount][5]=c;
1922   stubs[stubcount][6]=d;
1923   stubs[stubcount][7]=e;
1924   stubcount++;
1925 }
1926
1927 // Write out a single register
1928 void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32)
1929 {
1930   int hr;
1931   for(hr=0;hr<HOST_REGS;hr++) {
1932     if(hr!=EXCLUDE_REG) {
1933       if((regmap[hr]&63)==r) {
1934         if((dirty>>hr)&1) {
1935           if(regmap[hr]<64) {
1936             emit_storereg(r,hr);
1937 #ifndef FORCE32
1938             if((is32>>regmap[hr])&1) {
1939               emit_sarimm(hr,31,hr);
1940               emit_storereg(r|64,hr);
1941             }
1942 #endif
1943           }else{
1944             emit_storereg(r|64,hr);
1945           }
1946         }
1947       }
1948     }
1949   }
1950 }
1951
1952 int mchecksum()
1953 {
1954   //if(!tracedebug) return 0;
1955   int i;
1956   int sum=0;
1957   for(i=0;i<2097152;i++) {
1958     unsigned int temp=sum;
1959     sum<<=1;
1960     sum|=(~temp)>>31;
1961     sum^=((u_int *)rdram)[i];
1962   }
1963   return sum;
1964 }
1965 int rchecksum()
1966 {
1967   int i;
1968   int sum=0;
1969   for(i=0;i<64;i++)
1970     sum^=((u_int *)reg)[i];
1971   return sum;
1972 }
1973 void rlist()
1974 {
1975   int i;
1976   printf("TRACE: ");
1977   for(i=0;i<32;i++)
1978     printf("r%d:%8x%8x ",i,((int *)(reg+i))[1],((int *)(reg+i))[0]);
1979   printf("\n");
1980 #ifndef DISABLE_COP1
1981   printf("TRACE: ");
1982   for(i=0;i<32;i++)
1983     printf("f%d:%8x%8x ",i,((int*)reg_cop1_simple[i])[1],*((int*)reg_cop1_simple[i]));
1984   printf("\n");
1985 #endif
1986 }
1987
1988 void enabletrace()
1989 {
1990   tracedebug=1;
1991 }
1992
1993 void memdebug(int i)
1994 {
1995   //printf("TRACE: count=%d next=%d (checksum %x) lo=%8x%8x\n",Count,next_interupt,mchecksum(),(int)(reg[LOREG]>>32),(int)reg[LOREG]);
1996   //printf("TRACE: count=%d next=%d (rchecksum %x)\n",Count,next_interupt,rchecksum());
1997   //rlist();
1998   //if(tracedebug) {
1999   //if(Count>=-2084597794) {
2000   if((signed int)Count>=-2084597794&&(signed int)Count<0) {
2001   //if(0) {
2002     printf("TRACE: count=%d next=%d (checksum %x)\n",Count,next_interupt,mchecksum());
2003     //printf("TRACE: count=%d next=%d (checksum %x) Status=%x\n",Count,next_interupt,mchecksum(),Status);
2004     //printf("TRACE: count=%d next=%d (checksum %x) hi=%8x%8x\n",Count,next_interupt,mchecksum(),(int)(reg[HIREG]>>32),(int)reg[HIREG]);
2005     rlist();
2006     #ifdef __i386__
2007     printf("TRACE: %x\n",(&i)[-1]);
2008     #endif
2009     #ifdef __arm__
2010     int j;
2011     printf("TRACE: %x \n",(&j)[10]);
2012     printf("TRACE: %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x\n",(&j)[1],(&j)[2],(&j)[3],(&j)[4],(&j)[5],(&j)[6],(&j)[7],(&j)[8],(&j)[9],(&j)[10],(&j)[11],(&j)[12],(&j)[13],(&j)[14],(&j)[15],(&j)[16],(&j)[17],(&j)[18],(&j)[19],(&j)[20]);
2013     #endif
2014     //fflush(stdout);
2015   }
2016   //printf("TRACE: %x\n",(&i)[-1]);
2017 }
2018
2019 void tlb_debug(u_int cause, u_int addr, u_int iaddr)
2020 {
2021   printf("TLB Exception: instruction=%x addr=%x cause=%x\n",iaddr, addr, cause);
2022 }
2023
2024 void alu_assemble(int i,struct regstat *i_regs)
2025 {
2026   if(opcode2[i]>=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU
2027     if(rt1[i]) {
2028       signed char s1,s2,t;
2029       t=get_reg(i_regs->regmap,rt1[i]);
2030       if(t>=0) {
2031         s1=get_reg(i_regs->regmap,rs1[i]);
2032         s2=get_reg(i_regs->regmap,rs2[i]);
2033         if(rs1[i]&&rs2[i]) {
2034           assert(s1>=0);
2035           assert(s2>=0);
2036           if(opcode2[i]&2) emit_sub(s1,s2,t);
2037           else emit_add(s1,s2,t);
2038         }
2039         else if(rs1[i]) {
2040           if(s1>=0) emit_mov(s1,t);
2041           else emit_loadreg(rs1[i],t);
2042         }
2043         else if(rs2[i]) {
2044           if(s2>=0) {
2045             if(opcode2[i]&2) emit_neg(s2,t);
2046             else emit_mov(s2,t);
2047           }
2048           else {
2049             emit_loadreg(rs2[i],t);
2050             if(opcode2[i]&2) emit_neg(t,t);
2051           }
2052         }
2053         else emit_zeroreg(t);
2054       }
2055     }
2056   }
2057   if(opcode2[i]>=0x2c&&opcode2[i]<=0x2f) { // DADD/DADDU/DSUB/DSUBU
2058     if(rt1[i]) {
2059       signed char s1l,s2l,s1h,s2h,tl,th;
2060       tl=get_reg(i_regs->regmap,rt1[i]);
2061       th=get_reg(i_regs->regmap,rt1[i]|64);
2062       if(tl>=0) {
2063         s1l=get_reg(i_regs->regmap,rs1[i]);
2064         s2l=get_reg(i_regs->regmap,rs2[i]);
2065         s1h=get_reg(i_regs->regmap,rs1[i]|64);
2066         s2h=get_reg(i_regs->regmap,rs2[i]|64);
2067         if(rs1[i]&&rs2[i]) {
2068           assert(s1l>=0);
2069           assert(s2l>=0);
2070           if(opcode2[i]&2) emit_subs(s1l,s2l,tl);
2071           else emit_adds(s1l,s2l,tl);
2072           if(th>=0) {
2073             #ifdef INVERTED_CARRY
2074             if(opcode2[i]&2) {if(s1h!=th) emit_mov(s1h,th);emit_sbb(th,s2h);}
2075             #else
2076             if(opcode2[i]&2) emit_sbc(s1h,s2h,th);
2077             #endif
2078             else emit_add(s1h,s2h,th);
2079           }
2080         }
2081         else if(rs1[i]) {
2082           if(s1l>=0) emit_mov(s1l,tl);
2083           else emit_loadreg(rs1[i],tl);
2084           if(th>=0) {
2085             if(s1h>=0) emit_mov(s1h,th);
2086             else emit_loadreg(rs1[i]|64,th);
2087           }
2088         }
2089         else if(rs2[i]) {
2090           if(s2l>=0) {
2091             if(opcode2[i]&2) emit_negs(s2l,tl);
2092             else emit_mov(s2l,tl);
2093           }
2094           else {
2095             emit_loadreg(rs2[i],tl);
2096             if(opcode2[i]&2) emit_negs(tl,tl);
2097           }
2098           if(th>=0) {
2099             #ifdef INVERTED_CARRY
2100             if(s2h>=0) emit_mov(s2h,th);
2101             else emit_loadreg(rs2[i]|64,th);
2102             if(opcode2[i]&2) {
2103               emit_adcimm(-1,th); // x86 has inverted carry flag
2104               emit_not(th,th);
2105             }
2106             #else
2107             if(opcode2[i]&2) {
2108               if(s2h>=0) emit_rscimm(s2h,0,th);
2109               else {
2110                 emit_loadreg(rs2[i]|64,th);
2111                 emit_rscimm(th,0,th);
2112               }
2113             }else{
2114               if(s2h>=0) emit_mov(s2h,th);
2115               else emit_loadreg(rs2[i]|64,th);
2116             }
2117             #endif
2118           }
2119         }
2120         else {
2121           emit_zeroreg(tl);
2122           if(th>=0) emit_zeroreg(th);
2123         }
2124       }
2125     }
2126   }
2127   if(opcode2[i]==0x2a||opcode2[i]==0x2b) { // SLT/SLTU
2128     if(rt1[i]) {
2129       signed char s1l,s1h,s2l,s2h,t;
2130       if(!((i_regs->was32>>rs1[i])&(i_regs->was32>>rs2[i])&1))
2131       {
2132         t=get_reg(i_regs->regmap,rt1[i]);
2133         //assert(t>=0);
2134         if(t>=0) {
2135           s1l=get_reg(i_regs->regmap,rs1[i]);
2136           s1h=get_reg(i_regs->regmap,rs1[i]|64);
2137           s2l=get_reg(i_regs->regmap,rs2[i]);
2138           s2h=get_reg(i_regs->regmap,rs2[i]|64);
2139           if(rs2[i]==0) // rx<r0
2140           {
2141             assert(s1h>=0);
2142             if(opcode2[i]==0x2a) // SLT
2143               emit_shrimm(s1h,31,t);
2144             else // SLTU (unsigned can not be less than zero)
2145               emit_zeroreg(t);
2146           }
2147           else if(rs1[i]==0) // r0<rx
2148           {
2149             assert(s2h>=0);
2150             if(opcode2[i]==0x2a) // SLT
2151               emit_set_gz64_32(s2h,s2l,t);
2152             else // SLTU (set if not zero)
2153               emit_set_nz64_32(s2h,s2l,t);
2154           }
2155           else {
2156             assert(s1l>=0);assert(s1h>=0);
2157             assert(s2l>=0);assert(s2h>=0);
2158             if(opcode2[i]==0x2a) // SLT
2159               emit_set_if_less64_32(s1h,s1l,s2h,s2l,t);
2160             else // SLTU
2161               emit_set_if_carry64_32(s1h,s1l,s2h,s2l,t);
2162           }
2163         }
2164       } else {
2165         t=get_reg(i_regs->regmap,rt1[i]);
2166         //assert(t>=0);
2167         if(t>=0) {
2168           s1l=get_reg(i_regs->regmap,rs1[i]);
2169           s2l=get_reg(i_regs->regmap,rs2[i]);
2170           if(rs2[i]==0) // rx<r0
2171           {
2172             assert(s1l>=0);
2173             if(opcode2[i]==0x2a) // SLT
2174               emit_shrimm(s1l,31,t);
2175             else // SLTU (unsigned can not be less than zero)
2176               emit_zeroreg(t);
2177           }
2178           else if(rs1[i]==0) // r0<rx
2179           {
2180             assert(s2l>=0);
2181             if(opcode2[i]==0x2a) // SLT
2182               emit_set_gz32(s2l,t);
2183             else // SLTU (set if not zero)
2184               emit_set_nz32(s2l,t);
2185           }
2186           else{
2187             assert(s1l>=0);assert(s2l>=0);
2188             if(opcode2[i]==0x2a) // SLT
2189               emit_set_if_less32(s1l,s2l,t);
2190             else // SLTU
2191               emit_set_if_carry32(s1l,s2l,t);
2192           }
2193         }
2194       }
2195     }
2196   }
2197   if(opcode2[i]>=0x24&&opcode2[i]<=0x27) { // AND/OR/XOR/NOR
2198     if(rt1[i]) {
2199       signed char s1l,s1h,s2l,s2h,th,tl;
2200       tl=get_reg(i_regs->regmap,rt1[i]);
2201       th=get_reg(i_regs->regmap,rt1[i]|64);
2202       if(!((i_regs->was32>>rs1[i])&(i_regs->was32>>rs2[i])&1)&&th>=0)
2203       {
2204         assert(tl>=0);
2205         if(tl>=0) {
2206           s1l=get_reg(i_regs->regmap,rs1[i]);
2207           s1h=get_reg(i_regs->regmap,rs1[i]|64);
2208           s2l=get_reg(i_regs->regmap,rs2[i]);
2209           s2h=get_reg(i_regs->regmap,rs2[i]|64);
2210           if(rs1[i]&&rs2[i]) {
2211             assert(s1l>=0);assert(s1h>=0);
2212             assert(s2l>=0);assert(s2h>=0);
2213             if(opcode2[i]==0x24) { // AND
2214               emit_and(s1l,s2l,tl);
2215               emit_and(s1h,s2h,th);
2216             } else
2217             if(opcode2[i]==0x25) { // OR
2218               emit_or(s1l,s2l,tl);
2219               emit_or(s1h,s2h,th);
2220             } else
2221             if(opcode2[i]==0x26) { // XOR
2222               emit_xor(s1l,s2l,tl);
2223               emit_xor(s1h,s2h,th);
2224             } else
2225             if(opcode2[i]==0x27) { // NOR
2226               emit_or(s1l,s2l,tl);
2227               emit_or(s1h,s2h,th);
2228               emit_not(tl,tl);
2229               emit_not(th,th);
2230             }
2231           }
2232           else
2233           {
2234             if(opcode2[i]==0x24) { // AND
2235               emit_zeroreg(tl);
2236               emit_zeroreg(th);
2237             } else
2238             if(opcode2[i]==0x25||opcode2[i]==0x26) { // OR/XOR
2239               if(rs1[i]){
2240                 if(s1l>=0) emit_mov(s1l,tl);
2241                 else emit_loadreg(rs1[i],tl);
2242                 if(s1h>=0) emit_mov(s1h,th);
2243                 else emit_loadreg(rs1[i]|64,th);
2244               }
2245               else
2246               if(rs2[i]){
2247                 if(s2l>=0) emit_mov(s2l,tl);
2248                 else emit_loadreg(rs2[i],tl);
2249                 if(s2h>=0) emit_mov(s2h,th);
2250                 else emit_loadreg(rs2[i]|64,th);
2251               }
2252               else{
2253                 emit_zeroreg(tl);
2254                 emit_zeroreg(th);
2255               }
2256             } else
2257             if(opcode2[i]==0x27) { // NOR
2258               if(rs1[i]){
2259                 if(s1l>=0) emit_not(s1l,tl);
2260                 else{
2261                   emit_loadreg(rs1[i],tl);
2262                   emit_not(tl,tl);
2263                 }
2264                 if(s1h>=0) emit_not(s1h,th);
2265                 else{
2266                   emit_loadreg(rs1[i]|64,th);
2267                   emit_not(th,th);
2268                 }
2269               }
2270               else
2271               if(rs2[i]){
2272                 if(s2l>=0) emit_not(s2l,tl);
2273                 else{
2274                   emit_loadreg(rs2[i],tl);
2275                   emit_not(tl,tl);
2276                 }
2277                 if(s2h>=0) emit_not(s2h,th);
2278                 else{
2279                   emit_loadreg(rs2[i]|64,th);
2280                   emit_not(th,th);
2281                 }
2282               }
2283               else {
2284                 emit_movimm(-1,tl);
2285                 emit_movimm(-1,th);
2286               }
2287             }
2288           }
2289         }
2290       }
2291       else
2292       {
2293         // 32 bit
2294         if(tl>=0) {
2295           s1l=get_reg(i_regs->regmap,rs1[i]);
2296           s2l=get_reg(i_regs->regmap,rs2[i]);
2297           if(rs1[i]&&rs2[i]) {
2298             assert(s1l>=0);
2299             assert(s2l>=0);
2300             if(opcode2[i]==0x24) { // AND
2301               emit_and(s1l,s2l,tl);
2302             } else
2303             if(opcode2[i]==0x25) { // OR
2304               emit_or(s1l,s2l,tl);
2305             } else
2306             if(opcode2[i]==0x26) { // XOR
2307               emit_xor(s1l,s2l,tl);
2308             } else
2309             if(opcode2[i]==0x27) { // NOR
2310               emit_or(s1l,s2l,tl);
2311               emit_not(tl,tl);
2312             }
2313           }
2314           else
2315           {
2316             if(opcode2[i]==0x24) { // AND
2317               emit_zeroreg(tl);
2318             } else
2319             if(opcode2[i]==0x25||opcode2[i]==0x26) { // OR/XOR
2320               if(rs1[i]){
2321                 if(s1l>=0) emit_mov(s1l,tl);
2322                 else emit_loadreg(rs1[i],tl); // CHECK: regmap_entry?
2323               }
2324               else
2325               if(rs2[i]){
2326                 if(s2l>=0) emit_mov(s2l,tl);
2327                 else emit_loadreg(rs2[i],tl); // CHECK: regmap_entry?
2328               }
2329               else emit_zeroreg(tl);
2330             } else
2331             if(opcode2[i]==0x27) { // NOR
2332               if(rs1[i]){
2333                 if(s1l>=0) emit_not(s1l,tl);
2334                 else {
2335                   emit_loadreg(rs1[i],tl);
2336                   emit_not(tl,tl);
2337                 }
2338               }
2339               else
2340               if(rs2[i]){
2341                 if(s2l>=0) emit_not(s2l,tl);
2342                 else {
2343                   emit_loadreg(rs2[i],tl);
2344                   emit_not(tl,tl);
2345                 }
2346               }
2347               else emit_movimm(-1,tl);
2348             }
2349           }
2350         }
2351       }
2352     }
2353   }
2354 }
2355
2356 void imm16_assemble(int i,struct regstat *i_regs)
2357 {
2358   if (opcode[i]==0x0f) { // LUI
2359     if(rt1[i]) {
2360       signed char t;
2361       t=get_reg(i_regs->regmap,rt1[i]);
2362       //assert(t>=0);
2363       if(t>=0) {
2364         if(!((i_regs->isconst>>t)&1))
2365           emit_movimm(imm[i]<<16,t);
2366       }
2367     }
2368   }
2369   if(opcode[i]==0x08||opcode[i]==0x09) { // ADDI/ADDIU
2370     if(rt1[i]) {
2371       signed char s,t;
2372       t=get_reg(i_regs->regmap,rt1[i]);
2373       s=get_reg(i_regs->regmap,rs1[i]);
2374       if(rs1[i]) {
2375         //assert(t>=0);
2376         //assert(s>=0);
2377         if(t>=0) {
2378           if(!((i_regs->isconst>>t)&1)) {
2379             if(s<0) {
2380               if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2381               emit_addimm(t,imm[i],t);
2382             }else{
2383               if(!((i_regs->wasconst>>s)&1))
2384                 emit_addimm(s,imm[i],t);
2385               else
2386                 emit_movimm(constmap[i][s]+imm[i],t);
2387             }
2388           }
2389         }
2390       } else {
2391         if(t>=0) {
2392           if(!((i_regs->isconst>>t)&1))
2393             emit_movimm(imm[i],t);
2394         }
2395       }
2396     }
2397   }
2398   if(opcode[i]==0x18||opcode[i]==0x19) { // DADDI/DADDIU
2399     if(rt1[i]) {
2400       signed char sh,sl,th,tl;
2401       th=get_reg(i_regs->regmap,rt1[i]|64);
2402       tl=get_reg(i_regs->regmap,rt1[i]);
2403       sh=get_reg(i_regs->regmap,rs1[i]|64);
2404       sl=get_reg(i_regs->regmap,rs1[i]);
2405       if(tl>=0) {
2406         if(rs1[i]) {
2407           assert(sh>=0);
2408           assert(sl>=0);
2409           if(th>=0) {
2410             emit_addimm64_32(sh,sl,imm[i],th,tl);
2411           }
2412           else {
2413             emit_addimm(sl,imm[i],tl);
2414           }
2415         } else {
2416           emit_movimm(imm[i],tl);
2417           if(th>=0) emit_movimm(((signed int)imm[i])>>31,th);
2418         }
2419       }
2420     }
2421   }
2422   else if(opcode[i]==0x0a||opcode[i]==0x0b) { // SLTI/SLTIU
2423     if(rt1[i]) {
2424       //assert(rs1[i]!=0); // r0 might be valid, but it's probably a bug
2425       signed char sh,sl,t;
2426       t=get_reg(i_regs->regmap,rt1[i]);
2427       sh=get_reg(i_regs->regmap,rs1[i]|64);
2428       sl=get_reg(i_regs->regmap,rs1[i]);
2429       //assert(t>=0);
2430       if(t>=0) {
2431         if(rs1[i]>0) {
2432           if(sh<0) assert((i_regs->was32>>rs1[i])&1);
2433           if(sh<0||((i_regs->was32>>rs1[i])&1)) {
2434             if(opcode[i]==0x0a) { // SLTI
2435               if(sl<0) {
2436                 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2437                 emit_slti32(t,imm[i],t);
2438               }else{
2439                 emit_slti32(sl,imm[i],t);
2440               }
2441             }
2442             else { // SLTIU
2443               if(sl<0) {
2444                 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2445                 emit_sltiu32(t,imm[i],t);
2446               }else{
2447                 emit_sltiu32(sl,imm[i],t);
2448               }
2449             }
2450           }else{ // 64-bit
2451             assert(sl>=0);
2452             if(opcode[i]==0x0a) // SLTI
2453               emit_slti64_32(sh,sl,imm[i],t);
2454             else // SLTIU
2455               emit_sltiu64_32(sh,sl,imm[i],t);
2456           }
2457         }else{
2458           // SLTI(U) with r0 is just stupid,
2459           // nonetheless examples can be found
2460           if(opcode[i]==0x0a) // SLTI
2461             if(0<imm[i]) emit_movimm(1,t);
2462             else emit_zeroreg(t);
2463           else // SLTIU
2464           {
2465             if(imm[i]) emit_movimm(1,t);
2466             else emit_zeroreg(t);
2467           }
2468         }
2469       }
2470     }
2471   }
2472   else if(opcode[i]>=0x0c&&opcode[i]<=0x0e) { // ANDI/ORI/XORI
2473     if(rt1[i]) {
2474       signed char sh,sl,th,tl;
2475       th=get_reg(i_regs->regmap,rt1[i]|64);
2476       tl=get_reg(i_regs->regmap,rt1[i]);
2477       sh=get_reg(i_regs->regmap,rs1[i]|64);
2478       sl=get_reg(i_regs->regmap,rs1[i]);
2479       if(tl>=0 && !((i_regs->isconst>>tl)&1)) {
2480         if(opcode[i]==0x0c) //ANDI
2481         {
2482           if(rs1[i]) {
2483             if(sl<0) {
2484               if(i_regs->regmap_entry[tl]!=rs1[i]) emit_loadreg(rs1[i],tl);
2485               emit_andimm(tl,imm[i],tl);
2486             }else{
2487               if(!((i_regs->wasconst>>sl)&1))
2488                 emit_andimm(sl,imm[i],tl);
2489               else
2490                 emit_movimm(constmap[i][sl]&imm[i],tl);
2491             }
2492           }
2493           else
2494             emit_zeroreg(tl);
2495           if(th>=0) emit_zeroreg(th);
2496         }
2497         else
2498         {
2499           if(rs1[i]) {
2500             if(sl<0) {
2501               if(i_regs->regmap_entry[tl]!=rs1[i]) emit_loadreg(rs1[i],tl);
2502             }
2503             if(th>=0) {
2504               if(sh<0) {
2505                 emit_loadreg(rs1[i]|64,th);
2506               }else{
2507                 emit_mov(sh,th);
2508               }
2509             }
2510             if(opcode[i]==0x0d) //ORI
2511             if(sl<0) {
2512               emit_orimm(tl,imm[i],tl);
2513             }else{
2514               if(!((i_regs->wasconst>>sl)&1))
2515                 emit_orimm(sl,imm[i],tl);
2516               else
2517                 emit_movimm(constmap[i][sl]|imm[i],tl);
2518             }
2519             if(opcode[i]==0x0e) //XORI
2520             if(sl<0) {
2521               emit_xorimm(tl,imm[i],tl);
2522             }else{
2523               if(!((i_regs->wasconst>>sl)&1))
2524                 emit_xorimm(sl,imm[i],tl);
2525               else
2526                 emit_movimm(constmap[i][sl]^imm[i],tl);
2527             }
2528           }
2529           else {
2530             emit_movimm(imm[i],tl);
2531             if(th>=0) emit_zeroreg(th);
2532           }
2533         }
2534       }
2535     }
2536   }
2537 }
2538
2539 void shiftimm_assemble(int i,struct regstat *i_regs)
2540 {
2541   if(opcode2[i]<=0x3) // SLL/SRL/SRA
2542   {
2543     if(rt1[i]) {
2544       signed char s,t;
2545       t=get_reg(i_regs->regmap,rt1[i]);
2546       s=get_reg(i_regs->regmap,rs1[i]);
2547       //assert(t>=0);
2548       if(t>=0){
2549         if(rs1[i]==0)
2550         {
2551           emit_zeroreg(t);
2552         }
2553         else
2554         {
2555           if(s<0&&i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2556           if(imm[i]) {
2557             if(opcode2[i]==0) // SLL
2558             {
2559               emit_shlimm(s<0?t:s,imm[i],t);
2560             }
2561             if(opcode2[i]==2) // SRL
2562             {
2563               emit_shrimm(s<0?t:s,imm[i],t);
2564             }
2565             if(opcode2[i]==3) // SRA
2566             {
2567               emit_sarimm(s<0?t:s,imm[i],t);
2568             }
2569           }else{
2570             // Shift by zero
2571             if(s>=0 && s!=t) emit_mov(s,t);
2572           }
2573         }
2574       }
2575       //emit_storereg(rt1[i],t); //DEBUG
2576     }
2577   }
2578   if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA
2579   {
2580     if(rt1[i]) {
2581       signed char sh,sl,th,tl;
2582       th=get_reg(i_regs->regmap,rt1[i]|64);
2583       tl=get_reg(i_regs->regmap,rt1[i]);
2584       sh=get_reg(i_regs->regmap,rs1[i]|64);
2585       sl=get_reg(i_regs->regmap,rs1[i]);
2586       if(tl>=0) {
2587         if(rs1[i]==0)
2588         {
2589           emit_zeroreg(tl);
2590           if(th>=0) emit_zeroreg(th);
2591         }
2592         else
2593         {
2594           assert(sl>=0);
2595           assert(sh>=0);
2596           if(imm[i]) {
2597             if(opcode2[i]==0x38) // DSLL
2598             {
2599               if(th>=0) emit_shldimm(sh,sl,imm[i],th);
2600               emit_shlimm(sl,imm[i],tl);
2601             }
2602             if(opcode2[i]==0x3a) // DSRL
2603             {
2604               emit_shrdimm(sl,sh,imm[i],tl);
2605               if(th>=0) emit_shrimm(sh,imm[i],th);
2606             }
2607             if(opcode2[i]==0x3b) // DSRA
2608             {
2609               emit_shrdimm(sl,sh,imm[i],tl);
2610               if(th>=0) emit_sarimm(sh,imm[i],th);
2611             }
2612           }else{
2613             // Shift by zero
2614             if(sl!=tl) emit_mov(sl,tl);
2615             if(th>=0&&sh!=th) emit_mov(sh,th);
2616           }
2617         }
2618       }
2619     }
2620   }
2621   if(opcode2[i]==0x3c) // DSLL32
2622   {
2623     if(rt1[i]) {
2624       signed char sl,tl,th;
2625       tl=get_reg(i_regs->regmap,rt1[i]);
2626       th=get_reg(i_regs->regmap,rt1[i]|64);
2627       sl=get_reg(i_regs->regmap,rs1[i]);
2628       if(th>=0||tl>=0){
2629         assert(tl>=0);
2630         assert(th>=0);
2631         assert(sl>=0);
2632         emit_mov(sl,th);
2633         emit_zeroreg(tl);
2634         if(imm[i]>32)
2635         {
2636           emit_shlimm(th,imm[i]&31,th);
2637         }
2638       }
2639     }
2640   }
2641   if(opcode2[i]==0x3e) // DSRL32
2642   {
2643     if(rt1[i]) {
2644       signed char sh,tl,th;
2645       tl=get_reg(i_regs->regmap,rt1[i]);
2646       th=get_reg(i_regs->regmap,rt1[i]|64);
2647       sh=get_reg(i_regs->regmap,rs1[i]|64);
2648       if(tl>=0){
2649         assert(sh>=0);
2650         emit_mov(sh,tl);
2651         if(th>=0) emit_zeroreg(th);
2652         if(imm[i]>32)
2653         {
2654           emit_shrimm(tl,imm[i]&31,tl);
2655         }
2656       }
2657     }
2658   }
2659   if(opcode2[i]==0x3f) // DSRA32
2660   {
2661     if(rt1[i]) {
2662       signed char sh,tl;
2663       tl=get_reg(i_regs->regmap,rt1[i]);
2664       sh=get_reg(i_regs->regmap,rs1[i]|64);
2665       if(tl>=0){
2666         assert(sh>=0);
2667         emit_mov(sh,tl);
2668         if(imm[i]>32)
2669         {
2670           emit_sarimm(tl,imm[i]&31,tl);
2671         }
2672       }
2673     }
2674   }
2675 }
2676
2677 #ifndef shift_assemble
2678 void shift_assemble(int i,struct regstat *i_regs)
2679 {
2680   printf("Need shift_assemble for this architecture.\n");
2681   exit(1);
2682 }
2683 #endif
2684
2685 void load_assemble(int i,struct regstat *i_regs)
2686 {
2687   int s,th,tl,addr,map=-1;
2688   int offset;
2689   int jaddr=0;
2690   int memtarget,c=0;
2691   u_int hr,reglist=0;
2692   th=get_reg(i_regs->regmap,rt1[i]|64);
2693   tl=get_reg(i_regs->regmap,rt1[i]);
2694   s=get_reg(i_regs->regmap,rs1[i]);
2695   offset=imm[i];
2696   for(hr=0;hr<HOST_REGS;hr++) {
2697     if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2698   }
2699   if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
2700   if(s>=0) {
2701     c=(i_regs->wasconst>>s)&1;
2702     memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80800000;
2703     if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
2704   }
2705   if(offset||s<0||c) addr=tl;
2706   else addr=s;
2707   //printf("load_assemble: c=%d\n",c);
2708   //if(c) printf("load_assemble: const=%x\n",(int)constmap[i][s]+offset);
2709   // FIXME: Even if the load is a NOP, we should check for pagefaults...
2710   if(tl>=0) {
2711     //assert(tl>=0);
2712     //assert(rt1[i]);
2713     reglist&=~(1<<tl);
2714     if(th>=0) reglist&=~(1<<th);
2715     if(!using_tlb) {
2716       if(!c) {
2717 //#define R29_HACK 1
2718         #ifdef R29_HACK
2719         // Strmnnrmn's speed hack
2720         if(rs1[i]!=29||start<0x80001000||start>=0x80800000)
2721         #endif
2722         {
2723           emit_cmpimm(addr,0x800000);
2724           jaddr=(int)out;
2725           #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
2726           // Hint to branch predictor that the branch is unlikely to be taken
2727           if(rs1[i]>=28)
2728             emit_jno_unlikely(0);
2729           else
2730           #endif
2731           emit_jno(0);
2732         }
2733       }
2734     }else{ // using tlb
2735       int x=0;
2736       if (opcode[i]==0x20||opcode[i]==0x24) x=3; // LB/LBU
2737       if (opcode[i]==0x21||opcode[i]==0x25) x=2; // LH/LHU
2738       map=get_reg(i_regs->regmap,TLREG);
2739       assert(map>=0);
2740       map=do_tlb_r(addr,tl,map,x,-1,-1,c,constmap[i][s]+offset);
2741       do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
2742     }
2743     if (opcode[i]==0x20) { // LB
2744       if(!c||memtarget) {
2745         #ifdef HOST_IMM_ADDR32
2746         if(c)
2747           emit_movsbl_tlb((constmap[i][s]+offset)^3,map,tl);
2748         else
2749         #endif
2750         {
2751           //emit_xorimm(addr,3,tl);
2752           //gen_tlb_addr_r(tl,map);
2753           //emit_movsbl_indexed((int)rdram-0x80000000,tl,tl);
2754           int x=0;
2755 #ifdef BIG_ENDIAN_MIPS
2756           if(!c) emit_xorimm(addr,3,tl);
2757           else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
2758 #else
2759           if(c) x=(constmap[i][s]+offset)-(constmap[i][s]+offset);
2760           else if (tl!=addr) emit_mov(addr,tl);
2761 #endif
2762           emit_movsbl_indexed_tlb(x,tl,map,tl);
2763         }
2764         if(jaddr)
2765           add_stub(LOADB_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
2766       }
2767       else
2768         inline_readstub(LOADB_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2769     }
2770     if (opcode[i]==0x21) { // LH
2771       if(!c||memtarget) {
2772         #ifdef HOST_IMM_ADDR32
2773         if(c)
2774           emit_movswl_tlb((constmap[i][s]+offset)^2,map,tl);
2775         else
2776         #endif
2777         {
2778           int x=0;
2779 #ifdef BIG_ENDIAN_MIPS
2780           if(!c) emit_xorimm(addr,2,tl);
2781           else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
2782 #else
2783           if(c) x=(constmap[i][s]+offset)-(constmap[i][s]+offset);
2784           else if (tl!=addr) emit_mov(addr,tl);
2785 #endif
2786           //#ifdef
2787           //emit_movswl_indexed_tlb(x,tl,map,tl);
2788           //else
2789           if(map>=0) {
2790             gen_tlb_addr_r(tl,map);
2791             emit_movswl_indexed(x,tl,tl);
2792           }else
2793             emit_movswl_indexed((int)rdram-0x80000000+x,tl,tl);
2794         }
2795         if(jaddr)
2796           add_stub(LOADH_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
2797       }
2798       else
2799         inline_readstub(LOADH_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2800     }
2801     if (opcode[i]==0x23) { // LW
2802       if(!c||memtarget) {
2803         //emit_readword_indexed((int)rdram-0x80000000,addr,tl);
2804         #ifdef HOST_IMM_ADDR32
2805         if(c)
2806           emit_readword_tlb(constmap[i][s]+offset,map,tl);
2807         else
2808         #endif
2809         emit_readword_indexed_tlb(0,addr,map,tl);
2810         if(jaddr)
2811           add_stub(LOADW_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
2812       }
2813       else
2814         inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2815     }
2816     if (opcode[i]==0x24) { // LBU
2817       if(!c||memtarget) {
2818         #ifdef HOST_IMM_ADDR32
2819         if(c)
2820           emit_movzbl_tlb((constmap[i][s]+offset)^3,map,tl);
2821         else
2822         #endif
2823         {
2824           //emit_xorimm(addr,3,tl);
2825           //gen_tlb_addr_r(tl,map);
2826           //emit_movzbl_indexed((int)rdram-0x80000000,tl,tl);
2827           int x=0;
2828 #ifdef BIG_ENDIAN_MIPS
2829           if(!c) emit_xorimm(addr,3,tl);
2830           else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
2831 #else
2832           if(c) x=(constmap[i][s]+offset)-(constmap[i][s]+offset);
2833           else if (tl!=addr) emit_mov(addr,tl);
2834 #endif
2835           emit_movzbl_indexed_tlb(x,tl,map,tl);
2836         }
2837         if(jaddr)
2838           add_stub(LOADBU_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
2839       }
2840       else
2841         inline_readstub(LOADBU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2842     }
2843     if (opcode[i]==0x25) { // LHU
2844       if(!c||memtarget) {
2845         #ifdef HOST_IMM_ADDR32
2846         if(c)
2847           emit_movzwl_tlb((constmap[i][s]+offset)^2,map,tl);
2848         else
2849         #endif
2850         {
2851           int x=0;
2852 #ifdef BIG_ENDIAN_MIPS
2853           if(!c) emit_xorimm(addr,2,tl);
2854           else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
2855 #else
2856           if(c) x=(constmap[i][s]+offset)-(constmap[i][s]+offset);
2857           else if (tl!=addr) emit_mov(addr,tl);
2858 #endif
2859           //#ifdef
2860           //emit_movzwl_indexed_tlb(x,tl,map,tl);
2861           //#else
2862           if(map>=0) {
2863             gen_tlb_addr_r(tl,map);
2864             emit_movzwl_indexed(x,tl,tl);
2865           }else
2866             emit_movzwl_indexed((int)rdram-0x80000000+x,tl,tl);
2867           if(jaddr)
2868             add_stub(LOADHU_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
2869         }
2870       }
2871       else
2872         inline_readstub(LOADHU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2873     }
2874     if (opcode[i]==0x27) { // LWU
2875       assert(th>=0);
2876       if(!c||memtarget) {
2877         //emit_readword_indexed((int)rdram-0x80000000,addr,tl);
2878         #ifdef HOST_IMM_ADDR32
2879         if(c)
2880           emit_readword_tlb(constmap[i][s]+offset,map,tl);
2881         else
2882         #endif
2883         emit_readword_indexed_tlb(0,addr,map,tl);
2884         if(jaddr)
2885           add_stub(LOADW_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
2886       }
2887       else {
2888         inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2889       }
2890       emit_zeroreg(th);
2891     }
2892     if (opcode[i]==0x37) { // LD
2893       if(!c||memtarget) {
2894         //gen_tlb_addr_r(tl,map);
2895         //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,addr,th);
2896         //emit_readword_indexed((int)rdram-0x7FFFFFFC,addr,tl);
2897         #ifdef HOST_IMM_ADDR32
2898         if(c)
2899           emit_readdword_tlb(constmap[i][s]+offset,map,th,tl);
2900         else
2901         #endif
2902         emit_readdword_indexed_tlb(0,addr,map,th,tl);
2903         if(jaddr)
2904           add_stub(LOADD_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
2905       }
2906       else
2907         inline_readstub(LOADD_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2908     }
2909     //emit_storereg(rt1[i],tl); // DEBUG
2910   }
2911   //if(opcode[i]==0x23)
2912   //if(opcode[i]==0x24)
2913   //if(opcode[i]==0x23||opcode[i]==0x24)
2914   /*if(opcode[i]==0x21||opcode[i]==0x23||opcode[i]==0x24)
2915   {
2916     //emit_pusha();
2917     save_regs(0x100f);
2918         emit_readword((int)&last_count,ECX);
2919         #ifdef __i386__
2920         if(get_reg(i_regs->regmap,CCREG)<0)
2921           emit_loadreg(CCREG,HOST_CCREG);
2922         emit_add(HOST_CCREG,ECX,HOST_CCREG);
2923         emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG);
2924         emit_writeword(HOST_CCREG,(int)&Count);
2925         #endif
2926         #ifdef __arm__
2927         if(get_reg(i_regs->regmap,CCREG)<0)
2928           emit_loadreg(CCREG,0);
2929         else
2930           emit_mov(HOST_CCREG,0);
2931         emit_add(0,ECX,0);
2932         emit_addimm(0,2*ccadj[i],0);
2933         emit_writeword(0,(int)&Count);
2934         #endif
2935     emit_call((int)memdebug);
2936     //emit_popa();
2937     restore_regs(0x100f);
2938   }/**/
2939 }
2940
2941 #ifndef loadlr_assemble
2942 void loadlr_assemble(int i,struct regstat *i_regs)
2943 {
2944   printf("Need loadlr_assemble for this architecture.\n");
2945   exit(1);
2946 }
2947 #endif
2948
2949 void store_assemble(int i,struct regstat *i_regs)
2950 {
2951   int s,th,tl,map=-1;
2952   int addr,temp;
2953   int offset;
2954   int jaddr=0,jaddr2,type;
2955   int memtarget,c=0;
2956   int agr=AGEN1+(i&1);
2957   u_int hr,reglist=0;
2958   th=get_reg(i_regs->regmap,rs2[i]|64);
2959   tl=get_reg(i_regs->regmap,rs2[i]);
2960   s=get_reg(i_regs->regmap,rs1[i]);
2961   temp=get_reg(i_regs->regmap,agr);
2962   if(temp<0) temp=get_reg(i_regs->regmap,-1);
2963   offset=imm[i];
2964   if(s>=0) {
2965     c=(i_regs->wasconst>>s)&1;
2966     memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80800000;
2967     if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
2968   }
2969   assert(tl>=0);
2970   assert(temp>=0);
2971   for(hr=0;hr<HOST_REGS;hr++) {
2972     if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2973   }
2974   if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
2975   if(offset||s<0||c) addr=temp;
2976   else addr=s;
2977   if(!using_tlb) {
2978     if(!c) {
2979       #ifdef R29_HACK
2980       // Strmnnrmn's speed hack
2981       memtarget=1;
2982       if(rs1[i]!=29||start<0x80001000||start>=0x80800000)
2983       #endif
2984       emit_cmpimm(addr,0x800000);
2985       #ifdef DESTRUCTIVE_SHIFT
2986       if(s==addr) emit_mov(s,temp);
2987       #endif
2988       #ifdef R29_HACK
2989       if(rs1[i]!=29||start<0x80001000||start>=0x80800000)
2990       #endif
2991       {
2992         jaddr=(int)out;
2993         #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
2994         // Hint to branch predictor that the branch is unlikely to be taken
2995         if(rs1[i]>=28)
2996           emit_jno_unlikely(0);
2997         else
2998         #endif
2999         emit_jno(0);
3000       }
3001     }
3002   }else{ // using tlb
3003     int x=0;
3004     if (opcode[i]==0x28) x=3; // SB
3005     if (opcode[i]==0x29) x=2; // SH
3006     map=get_reg(i_regs->regmap,TLREG);
3007     assert(map>=0);
3008     map=do_tlb_w(addr,temp,map,x,c,constmap[i][s]+offset);
3009     do_tlb_w_branch(map,c,constmap[i][s]+offset,&jaddr);
3010   }
3011
3012   if (opcode[i]==0x28) { // SB
3013     if(!c||memtarget) {
3014       int x=0;
3015 #ifdef BIG_ENDIAN_MIPS
3016       if(!c) emit_xorimm(addr,3,temp);
3017       else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
3018 #else
3019       if(c) x=(constmap[i][s]+offset)-(constmap[i][s]+offset);
3020       else if (addr!=temp) emit_mov(addr,temp);
3021 #endif
3022       //gen_tlb_addr_w(temp,map);
3023       //emit_writebyte_indexed(tl,(int)rdram-0x80000000,temp);
3024       emit_writebyte_indexed_tlb(tl,x,temp,map,temp);
3025     }
3026     type=STOREB_STUB;
3027   }
3028   if (opcode[i]==0x29) { // SH
3029     if(!c||memtarget) {
3030       int x=0;
3031 #ifdef BIG_ENDIAN_MIPS
3032       if(!c) emit_xorimm(addr,2,temp);
3033       else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
3034 #else
3035       if(c) x=(constmap[i][s]+offset)-(constmap[i][s]+offset);
3036       else if (addr!=temp) emit_mov(addr,temp);
3037 #endif
3038       //#ifdef
3039       //emit_writehword_indexed_tlb(tl,x,temp,map,temp);
3040       //#else
3041       if(map>=0) {
3042         gen_tlb_addr_w(temp,map);
3043         emit_writehword_indexed(tl,x,temp);
3044       }else
3045         emit_writehword_indexed(tl,(int)rdram-0x80000000+x,temp);
3046     }
3047     type=STOREH_STUB;
3048   }
3049   if (opcode[i]==0x2B) { // SW
3050     if(!c||memtarget)
3051       //emit_writeword_indexed(tl,(int)rdram-0x80000000,addr);
3052       emit_writeword_indexed_tlb(tl,0,addr,map,temp);
3053     type=STOREW_STUB;
3054   }
3055   if (opcode[i]==0x3F) { // SD
3056     if(!c||memtarget) {
3057       if(rs2[i]) {
3058         assert(th>=0);
3059         //emit_writeword_indexed(th,(int)rdram-0x80000000,addr);
3060         //emit_writeword_indexed(tl,(int)rdram-0x7FFFFFFC,addr);
3061         emit_writedword_indexed_tlb(th,tl,0,addr,map,temp);
3062       }else{
3063         // Store zero
3064         //emit_writeword_indexed(tl,(int)rdram-0x80000000,temp);
3065         //emit_writeword_indexed(tl,(int)rdram-0x7FFFFFFC,temp);
3066         emit_writedword_indexed_tlb(tl,tl,0,addr,map,temp);
3067       }
3068     }
3069     type=STORED_STUB;
3070   }
3071   if(jaddr) {
3072     add_stub(type,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
3073   } else if(!memtarget) {
3074     inline_writestub(type,i,constmap[i][s]+offset,i_regs->regmap,rs2[i],ccadj[i],reglist);
3075   }
3076   if(!using_tlb) {
3077     if(!c||memtarget) {
3078       #ifdef DESTRUCTIVE_SHIFT
3079       // The x86 shift operation is 'destructive'; it overwrites the
3080       // source register, so we need to make a copy first and use that.
3081       addr=temp;
3082       #endif
3083       #if defined(HOST_IMM8)
3084       int ir=get_reg(i_regs->regmap,INVCP);
3085       assert(ir>=0);
3086       emit_cmpmem_indexedsr12_reg(ir,addr,1);
3087       #else
3088       emit_cmpmem_indexedsr12_imm((int)invalid_code,addr,1);
3089       #endif
3090       jaddr2=(int)out;
3091       emit_jne(0);
3092       add_stub(INVCODE_STUB,jaddr2,(int)out,reglist|(1<<HOST_CCREG),addr,0,0,0);
3093     }
3094   }
3095   //if(opcode[i]==0x2B || opcode[i]==0x3F)
3096   //if(opcode[i]==0x2B || opcode[i]==0x28)
3097   //if(opcode[i]==0x2B || opcode[i]==0x29)
3098   //if(opcode[i]==0x2B)
3099   /*if(opcode[i]==0x2B || opcode[i]==0x28 || opcode[i]==0x29 || opcode[i]==0x3F)
3100   {
3101     //emit_pusha();
3102     save_regs(0x100f);
3103         emit_readword((int)&last_count,ECX);
3104         #ifdef __i386__
3105         if(get_reg(i_regs->regmap,CCREG)<0)
3106           emit_loadreg(CCREG,HOST_CCREG);
3107         emit_add(HOST_CCREG,ECX,HOST_CCREG);
3108         emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG);
3109         emit_writeword(HOST_CCREG,(int)&Count);
3110         #endif
3111         #ifdef __arm__
3112         if(get_reg(i_regs->regmap,CCREG)<0)
3113           emit_loadreg(CCREG,0);
3114         else
3115           emit_mov(HOST_CCREG,0);
3116         emit_add(0,ECX,0);
3117         emit_addimm(0,2*ccadj[i],0);
3118         emit_writeword(0,(int)&Count);
3119         #endif
3120     emit_call((int)memdebug);
3121     //emit_popa();
3122     restore_regs(0x100f);
3123   }/**/
3124 }
3125
3126 void storelr_assemble(int i,struct regstat *i_regs)
3127 {
3128   int s,th,tl;
3129   int temp;
3130   int temp2;
3131   int offset;
3132   int jaddr=0,jaddr2;
3133   int case1,case2,case3;
3134   int done0,done1,done2;
3135   int memtarget,c=0;
3136   u_int hr,reglist=0;
3137   th=get_reg(i_regs->regmap,rs2[i]|64);
3138   tl=get_reg(i_regs->regmap,rs2[i]);
3139   s=get_reg(i_regs->regmap,rs1[i]);
3140   temp=get_reg(i_regs->regmap,-1);
3141   offset=imm[i];
3142   if(s>=0) {
3143     c=(i_regs->isconst>>s)&1;
3144     memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80800000;
3145     if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3146   }
3147   assert(tl>=0);
3148   for(hr=0;hr<HOST_REGS;hr++) {
3149     if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3150   }
3151   if(tl>=0) {
3152     assert(temp>=0);
3153     if(!using_tlb) {
3154       if(!c) {
3155         emit_cmpimm(s<0||offset?temp:s,0x800000);
3156         if(!offset&&s!=temp) emit_mov(s,temp);
3157         jaddr=(int)out;
3158         emit_jno(0);
3159       }
3160       else
3161       {
3162         if(!memtarget||!rs1[i]) {
3163           jaddr=(int)out;
3164           emit_jmp(0);
3165         }
3166       }
3167       if((u_int)rdram!=0x80000000) 
3168         emit_addimm_no_flags((u_int)rdram-(u_int)0x80000000,temp);
3169     }else{ // using tlb
3170       int map=get_reg(i_regs->regmap,TLREG);
3171       assert(map>=0);
3172       map=do_tlb_w(c||s<0||offset?temp:s,temp,map,0,c,constmap[i][s]+offset);
3173       if(!c&&!offset&&s>=0) emit_mov(s,temp);
3174       do_tlb_w_branch(map,c,constmap[i][s]+offset,&jaddr);
3175       if(!jaddr&&!memtarget) {
3176         jaddr=(int)out;
3177         emit_jmp(0);
3178       }
3179       gen_tlb_addr_w(temp,map);
3180     }
3181
3182     if (opcode[i]==0x2C||opcode[i]==0x2D) { // SDL/SDR
3183       temp2=get_reg(i_regs->regmap,FTEMP);
3184       if(!rs2[i]) temp2=th=tl;
3185     }
3186
3187 #ifndef BIG_ENDIAN_MIPS
3188     emit_xorimm(temp,3,temp);
3189 #endif
3190     emit_testimm(temp,2);
3191     case2=(int)out;
3192     emit_jne(0);
3193     emit_testimm(temp,1);
3194     case1=(int)out;
3195     emit_jne(0);
3196     // 0
3197     if (opcode[i]==0x2A) { // SWL
3198       emit_writeword_indexed(tl,0,temp);
3199     }
3200     if (opcode[i]==0x2E) { // SWR
3201       emit_writebyte_indexed(tl,3,temp);
3202     }
3203     if (opcode[i]==0x2C) { // SDL
3204       emit_writeword_indexed(th,0,temp);
3205       if(rs2[i]) emit_mov(tl,temp2);
3206     }
3207     if (opcode[i]==0x2D) { // SDR
3208       emit_writebyte_indexed(tl,3,temp);
3209       if(rs2[i]) emit_shldimm(th,tl,24,temp2);
3210     }
3211     done0=(int)out;
3212     emit_jmp(0);
3213     // 1
3214     set_jump_target(case1,(int)out);
3215     if (opcode[i]==0x2A) { // SWL
3216       // Write 3 msb into three least significant bytes
3217       if(rs2[i]) emit_rorimm(tl,8,tl);
3218       emit_writehword_indexed(tl,-1,temp);
3219       if(rs2[i]) emit_rorimm(tl,16,tl);
3220       emit_writebyte_indexed(tl,1,temp);
3221       if(rs2[i]) emit_rorimm(tl,8,tl);
3222     }
3223     if (opcode[i]==0x2E) { // SWR
3224       // Write two lsb into two most significant bytes
3225       emit_writehword_indexed(tl,1,temp);
3226     }
3227     if (opcode[i]==0x2C) { // SDL
3228       if(rs2[i]) emit_shrdimm(tl,th,8,temp2);
3229       // Write 3 msb into three least significant bytes
3230       if(rs2[i]) emit_rorimm(th,8,th);
3231       emit_writehword_indexed(th,-1,temp);
3232       if(rs2[i]) emit_rorimm(th,16,th);
3233       emit_writebyte_indexed(th,1,temp);
3234       if(rs2[i]) emit_rorimm(th,8,th);
3235     }
3236     if (opcode[i]==0x2D) { // SDR
3237       if(rs2[i]) emit_shldimm(th,tl,16,temp2);
3238       // Write two lsb into two most significant bytes
3239       emit_writehword_indexed(tl,1,temp);
3240     }
3241     done1=(int)out;
3242     emit_jmp(0);
3243     // 2
3244     set_jump_target(case2,(int)out);
3245     emit_testimm(temp,1);
3246     case3=(int)out;
3247     emit_jne(0);
3248     if (opcode[i]==0x2A) { // SWL
3249       // Write two msb into two least significant bytes
3250       if(rs2[i]) emit_rorimm(tl,16,tl);
3251       emit_writehword_indexed(tl,-2,temp);
3252       if(rs2[i]) emit_rorimm(tl,16,tl);
3253     }
3254     if (opcode[i]==0x2E) { // SWR
3255       // Write 3 lsb into three most significant bytes
3256       emit_writebyte_indexed(tl,-1,temp);
3257       if(rs2[i]) emit_rorimm(tl,8,tl);
3258       emit_writehword_indexed(tl,0,temp);
3259       if(rs2[i]) emit_rorimm(tl,24,tl);
3260     }
3261     if (opcode[i]==0x2C) { // SDL
3262       if(rs2[i]) emit_shrdimm(tl,th,16,temp2);
3263       // Write two msb into two least significant bytes
3264       if(rs2[i]) emit_rorimm(th,16,th);
3265       emit_writehword_indexed(th,-2,temp);
3266       if(rs2[i]) emit_rorimm(th,16,th);
3267     }
3268     if (opcode[i]==0x2D) { // SDR
3269       if(rs2[i]) emit_shldimm(th,tl,8,temp2);
3270       // Write 3 lsb into three most significant bytes
3271       emit_writebyte_indexed(tl,-1,temp);
3272       if(rs2[i]) emit_rorimm(tl,8,tl);
3273       emit_writehword_indexed(tl,0,temp);
3274       if(rs2[i]) emit_rorimm(tl,24,tl);
3275     }
3276     done2=(int)out;
3277     emit_jmp(0);
3278     // 3
3279     set_jump_target(case3,(int)out);
3280     if (opcode[i]==0x2A) { // SWL
3281       // Write msb into least significant byte
3282       if(rs2[i]) emit_rorimm(tl,24,tl);
3283       emit_writebyte_indexed(tl,-3,temp);
3284       if(rs2[i]) emit_rorimm(tl,8,tl);
3285     }
3286     if (opcode[i]==0x2E) { // SWR
3287       // Write entire word
3288       emit_writeword_indexed(tl,-3,temp);
3289     }
3290     if (opcode[i]==0x2C) { // SDL
3291       if(rs2[i]) emit_shrdimm(tl,th,24,temp2);
3292       // Write msb into least significant byte
3293       if(rs2[i]) emit_rorimm(th,24,th);
3294       emit_writebyte_indexed(th,-3,temp);
3295       if(rs2[i]) emit_rorimm(th,8,th);
3296     }
3297     if (opcode[i]==0x2D) { // SDR
3298       if(rs2[i]) emit_mov(th,temp2);
3299       // Write entire word
3300       emit_writeword_indexed(tl,-3,temp);
3301     }
3302     set_jump_target(done0,(int)out);
3303     set_jump_target(done1,(int)out);
3304     set_jump_target(done2,(int)out);
3305     if (opcode[i]==0x2C) { // SDL
3306       emit_testimm(temp,4);
3307       done0=(int)out;
3308       emit_jne(0);
3309       emit_andimm(temp,~3,temp);
3310       emit_writeword_indexed(temp2,4,temp);
3311       set_jump_target(done0,(int)out);
3312     }
3313     if (opcode[i]==0x2D) { // SDR
3314       emit_testimm(temp,4);
3315       done0=(int)out;
3316       emit_jeq(0);
3317       emit_andimm(temp,~3,temp);
3318       emit_writeword_indexed(temp2,-4,temp);
3319       set_jump_target(done0,(int)out);
3320     }
3321     if(!c||!memtarget)
3322       add_stub(STORELR_STUB,jaddr,(int)out,0,(int)i_regs,rs2[i],ccadj[i],reglist);
3323   }
3324   if(!using_tlb) {
3325     emit_addimm_no_flags((u_int)0x80000000-(u_int)rdram,temp);
3326     #if defined(HOST_IMM8)
3327     int ir=get_reg(i_regs->regmap,INVCP);
3328     assert(ir>=0);
3329     emit_cmpmem_indexedsr12_reg(ir,temp,1);
3330     #else
3331     emit_cmpmem_indexedsr12_imm((int)invalid_code,temp,1);
3332     #endif
3333     jaddr2=(int)out;
3334     emit_jne(0);
3335     add_stub(INVCODE_STUB,jaddr2,(int)out,reglist|(1<<HOST_CCREG),temp,0,0,0);
3336   }
3337   /*
3338     emit_pusha();
3339     //save_regs(0x100f);
3340         emit_readword((int)&last_count,ECX);
3341         if(get_reg(i_regs->regmap,CCREG)<0)
3342           emit_loadreg(CCREG,HOST_CCREG);
3343         emit_add(HOST_CCREG,ECX,HOST_CCREG);
3344         emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG);
3345         emit_writeword(HOST_CCREG,(int)&Count);
3346     emit_call((int)memdebug);
3347     emit_popa();
3348     //restore_regs(0x100f);
3349   /**/
3350 }
3351
3352 void c1ls_assemble(int i,struct regstat *i_regs)
3353 {
3354 #ifndef DISABLE_COP1
3355   int s,th,tl;
3356   int temp,ar;
3357   int map=-1;
3358   int offset;
3359   int c=0;
3360   int jaddr,jaddr2=0,jaddr3,type;
3361   int agr=AGEN1+(i&1);
3362   u_int hr,reglist=0;
3363   th=get_reg(i_regs->regmap,FTEMP|64);
3364   tl=get_reg(i_regs->regmap,FTEMP);
3365   s=get_reg(i_regs->regmap,rs1[i]);
3366   temp=get_reg(i_regs->regmap,agr);
3367   if(temp<0) temp=get_reg(i_regs->regmap,-1);
3368   offset=imm[i];
3369   assert(tl>=0);
3370   assert(rs1[i]>0);
3371   assert(temp>=0);
3372   for(hr=0;hr<HOST_REGS;hr++) {
3373     if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3374   }
3375   if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
3376   if (opcode[i]==0x31||opcode[i]==0x35) // LWC1/LDC1
3377   {
3378     // Loads use a temporary register which we need to save
3379     reglist|=1<<temp;
3380   }
3381   if (opcode[i]==0x39||opcode[i]==0x3D) // SWC1/SDC1
3382     ar=temp;
3383   else // LWC1/LDC1
3384     ar=tl;
3385   //if(s<0) emit_loadreg(rs1[i],ar); //address_generation does this now
3386   //else c=(i_regs->wasconst>>s)&1;
3387   if(s>=0) c=(i_regs->wasconst>>s)&1;
3388   // Check cop1 unusable
3389   if(!cop1_usable) {
3390     signed char rs=get_reg(i_regs->regmap,CSREG);
3391     assert(rs>=0);
3392     emit_testimm(rs,0x20000000);
3393     jaddr=(int)out;
3394     emit_jeq(0);
3395     add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3396     cop1_usable=1;
3397   }
3398   if (opcode[i]==0x39) { // SWC1 (get float address)
3399     emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],tl);
3400   }
3401   if (opcode[i]==0x3D) { // SDC1 (get double address)
3402     emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],tl);
3403   }
3404   // Generate address + offset
3405   if(!using_tlb) {
3406     if(!c)
3407       emit_cmpimm(offset||c||s<0?ar:s,0x800000);
3408   }
3409   else
3410   {
3411     map=get_reg(i_regs->regmap,TLREG);
3412     assert(map>=0);
3413     if (opcode[i]==0x31||opcode[i]==0x35) { // LWC1/LDC1
3414       map=do_tlb_r(offset||c||s<0?ar:s,ar,map,0,-1,-1,c,constmap[i][s]+offset);
3415     }
3416     if (opcode[i]==0x39||opcode[i]==0x3D) { // SWC1/SDC1
3417       map=do_tlb_w(offset||c||s<0?ar:s,ar,map,0,c,constmap[i][s]+offset);
3418     }
3419   }
3420   if (opcode[i]==0x39) { // SWC1 (read float)
3421     emit_readword_indexed(0,tl,tl);
3422   }
3423   if (opcode[i]==0x3D) { // SDC1 (read double)
3424     emit_readword_indexed(4,tl,th);
3425     emit_readword_indexed(0,tl,tl);
3426   }
3427   if (opcode[i]==0x31) { // LWC1 (get target address)
3428     emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],temp);
3429   }
3430   if (opcode[i]==0x35) { // LDC1 (get target address)
3431     emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],temp);
3432   }
3433   if(!using_tlb) {
3434     if(!c) {
3435       jaddr2=(int)out;
3436       emit_jno(0);
3437     }
3438     else if(((signed int)(constmap[i][s]+offset))>=(signed int)0x80800000) {
3439       jaddr2=(int)out;
3440       emit_jmp(0); // inline_readstub/inline_writestub?  Very rare case
3441     }
3442     #ifdef DESTRUCTIVE_SHIFT
3443     if (opcode[i]==0x39||opcode[i]==0x3D) { // SWC1/SDC1
3444       if(!offset&&!c&&s>=0) emit_mov(s,ar);
3445     }
3446     #endif
3447   }else{
3448     if (opcode[i]==0x31||opcode[i]==0x35) { // LWC1/LDC1
3449       do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr2);
3450     }
3451     if (opcode[i]==0x39||opcode[i]==0x3D) { // SWC1/SDC1
3452       do_tlb_w_branch(map,c,constmap[i][s]+offset,&jaddr2);
3453     }
3454   }
3455   if (opcode[i]==0x31) { // LWC1
3456     //if(s>=0&&!c&&!offset) emit_mov(s,tl);
3457     //gen_tlb_addr_r(ar,map);
3458     //emit_readword_indexed((int)rdram-0x80000000,tl,tl);
3459     #ifdef HOST_IMM_ADDR32
3460     if(c) emit_readword_tlb(constmap[i][s]+offset,map,tl);
3461     else
3462     #endif
3463     emit_readword_indexed_tlb(0,offset||c||s<0?tl:s,map,tl);
3464     type=LOADW_STUB;
3465   }
3466   if (opcode[i]==0x35) { // LDC1
3467     assert(th>=0);
3468     //if(s>=0&&!c&&!offset) emit_mov(s,tl);
3469     //gen_tlb_addr_r(ar,map);
3470     //emit_readword_indexed((int)rdram-0x80000000,tl,th);
3471     //emit_readword_indexed((int)rdram-0x7FFFFFFC,tl,tl);
3472     #ifdef HOST_IMM_ADDR32
3473     if(c) emit_readdword_tlb(constmap[i][s]+offset,map,th,tl);
3474     else
3475     #endif
3476     emit_readdword_indexed_tlb(0,offset||c||s<0?tl:s,map,th,tl);
3477     type=LOADD_STUB;
3478   }
3479   if (opcode[i]==0x39) { // SWC1
3480     //emit_writeword_indexed(tl,(int)rdram-0x80000000,temp);
3481     emit_writeword_indexed_tlb(tl,0,offset||c||s<0?temp:s,map,temp);
3482     type=STOREW_STUB;
3483   }
3484   if (opcode[i]==0x3D) { // SDC1
3485     assert(th>=0);
3486     //emit_writeword_indexed(th,(int)rdram-0x80000000,temp);
3487     //emit_writeword_indexed(tl,(int)rdram-0x7FFFFFFC,temp);
3488     emit_writedword_indexed_tlb(th,tl,0,offset||c||s<0?temp:s,map,temp);
3489     type=STORED_STUB;
3490   }
3491   if(!using_tlb) {
3492     if (opcode[i]==0x39||opcode[i]==0x3D) { // SWC1/SDC1
3493       #ifndef DESTRUCTIVE_SHIFT
3494       temp=offset||c||s<0?ar:s;
3495       #endif
3496       #if defined(HOST_IMM8)
3497       int ir=get_reg(i_regs->regmap,INVCP);
3498       assert(ir>=0);
3499       emit_cmpmem_indexedsr12_reg(ir,temp,1);
3500       #else
3501       emit_cmpmem_indexedsr12_imm((int)invalid_code,temp,1);
3502       #endif
3503       jaddr3=(int)out;
3504       emit_jne(0);
3505       add_stub(INVCODE_STUB,jaddr3,(int)out,reglist|(1<<HOST_CCREG),temp,0,0,0);
3506     }
3507   }
3508   if(jaddr2) add_stub(type,jaddr2,(int)out,i,offset||c||s<0?ar:s,(int)i_regs,ccadj[i],reglist);
3509   if (opcode[i]==0x31) { // LWC1 (write float)
3510     emit_writeword_indexed(tl,0,temp);
3511   }
3512   if (opcode[i]==0x35) { // LDC1 (write double)
3513     emit_writeword_indexed(th,4,temp);
3514     emit_writeword_indexed(tl,0,temp);
3515   }
3516   //if(opcode[i]==0x39)
3517   /*if(opcode[i]==0x39||opcode[i]==0x31)
3518   {
3519     emit_pusha();
3520         emit_readword((int)&last_count,ECX);
3521         if(get_reg(i_regs->regmap,CCREG)<0)
3522           emit_loadreg(CCREG,HOST_CCREG);
3523         emit_add(HOST_CCREG,ECX,HOST_CCREG);
3524         emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG);
3525         emit_writeword(HOST_CCREG,(int)&Count);
3526     emit_call((int)memdebug);
3527     emit_popa();
3528   }/**/
3529 #else
3530   cop1_unusable(i, i_regs);
3531 #endif
3532 }
3533
3534 #ifndef multdiv_assemble
3535 void multdiv_assemble(int i,struct regstat *i_regs)
3536 {
3537   printf("Need multdiv_assemble for this architecture.\n");
3538   exit(1);
3539 }
3540 #endif
3541
3542 void mov_assemble(int i,struct regstat *i_regs)
3543 {
3544   //if(opcode2[i]==0x10||opcode2[i]==0x12) { // MFHI/MFLO
3545   //if(opcode2[i]==0x11||opcode2[i]==0x13) { // MTHI/MTLO
3546   assert(rt1[i]>0);
3547   if(rt1[i]) {
3548     signed char sh,sl,th,tl;
3549     th=get_reg(i_regs->regmap,rt1[i]|64);
3550     tl=get_reg(i_regs->regmap,rt1[i]);
3551     //assert(tl>=0);
3552     if(tl>=0) {
3553       sh=get_reg(i_regs->regmap,rs1[i]|64);
3554       sl=get_reg(i_regs->regmap,rs1[i]);
3555       if(sl>=0) emit_mov(sl,tl);
3556       else emit_loadreg(rs1[i],tl);
3557       if(th>=0) {
3558         if(sh>=0) emit_mov(sh,th);
3559         else emit_loadreg(rs1[i]|64,th);
3560       }
3561     }
3562   }
3563 }
3564
3565 #ifndef fconv_assemble
3566 void fconv_assemble(int i,struct regstat *i_regs)
3567 {
3568   printf("Need fconv_assemble for this architecture.\n");
3569   exit(1);
3570 }
3571 #endif
3572
3573 #if 0
3574 void float_assemble(int i,struct regstat *i_regs)
3575 {
3576   printf("Need float_assemble for this architecture.\n");
3577   exit(1);
3578 }
3579 #endif
3580
3581 void syscall_assemble(int i,struct regstat *i_regs)
3582 {
3583   signed char ccreg=get_reg(i_regs->regmap,CCREG);
3584   assert(ccreg==HOST_CCREG);
3585   assert(!is_delayslot);
3586   emit_movimm(start+i*4,EAX); // Get PC
3587   emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right?  There should probably be an extra cycle...
3588   emit_jmp((int)jump_syscall_hle); // XXX
3589 }
3590
3591 void hlecall_assemble(int i,struct regstat *i_regs)
3592 {
3593   signed char ccreg=get_reg(i_regs->regmap,CCREG);
3594   assert(ccreg==HOST_CCREG);
3595   assert(!is_delayslot);
3596   emit_movimm(start+i*4+4,0); // Get PC
3597   emit_movimm(source[i],1); // opcode
3598   emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // XXX
3599   emit_jmp((int)jump_hlecall); // XXX
3600 }
3601
3602 void ds_assemble(int i,struct regstat *i_regs)
3603 {
3604   is_delayslot=1;
3605   switch(itype[i]) {
3606     case ALU:
3607       alu_assemble(i,i_regs);break;
3608     case IMM16:
3609       imm16_assemble(i,i_regs);break;
3610     case SHIFT:
3611       shift_assemble(i,i_regs);break;
3612     case SHIFTIMM:
3613       shiftimm_assemble(i,i_regs);break;
3614     case LOAD:
3615       load_assemble(i,i_regs);break;
3616     case LOADLR:
3617       loadlr_assemble(i,i_regs);break;
3618     case STORE:
3619       store_assemble(i,i_regs);break;
3620     case STORELR:
3621       storelr_assemble(i,i_regs);break;
3622     case COP0:
3623       cop0_assemble(i,i_regs);break;
3624     case COP1:
3625       cop1_assemble(i,i_regs);break;
3626     case C1LS:
3627       c1ls_assemble(i,i_regs);break;
3628     case FCONV:
3629       fconv_assemble(i,i_regs);break;
3630     case FLOAT:
3631       float_assemble(i,i_regs);break;
3632     case FCOMP:
3633       fcomp_assemble(i,i_regs);break;
3634     case MULTDIV:
3635       multdiv_assemble(i,i_regs);break;
3636     case MOV:
3637       mov_assemble(i,i_regs);break;
3638     case SYSCALL:
3639     case HLECALL:
3640     case SPAN:
3641     case UJUMP:
3642     case RJUMP:
3643     case CJUMP:
3644     case SJUMP:
3645     case FJUMP:
3646       printf("Jump in the delay slot.  This is probably a bug.\n");
3647   }
3648   is_delayslot=0;
3649 }
3650
3651 // Is the branch target a valid internal jump?
3652 int internal_branch(uint64_t i_is32,int addr)
3653 {
3654   if(addr&1) return 0; // Indirect (register) jump
3655   if(addr>=start && addr<start+slen*4-4)
3656   {
3657     int t=(addr-start)>>2;
3658     // Delay slots are not valid branch targets
3659     //if(t>0&&(itype[t-1]==RJUMP||itype[t-1]==UJUMP||itype[t-1]==CJUMP||itype[t-1]==SJUMP||itype[t-1]==FJUMP)) return 0;
3660     // 64 -> 32 bit transition requires a recompile
3661     /*if(is32[t]&~unneeded_reg_upper[t]&~i_is32)
3662     {
3663       if(requires_32bit[t]&~i_is32) printf("optimizable: no\n");
3664       else printf("optimizable: yes\n");
3665     }*/
3666     //if(is32[t]&~unneeded_reg_upper[t]&~i_is32) return 0;
3667     if(requires_32bit[t]&~i_is32) return 0;
3668     else return 1;
3669   }
3670   return 0;
3671 }
3672
3673 #ifndef wb_invalidate
3674 void wb_invalidate(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,
3675   uint64_t u,uint64_t uu)
3676 {
3677   int hr;
3678   for(hr=0;hr<HOST_REGS;hr++) {
3679     if(hr!=EXCLUDE_REG) {
3680       if(pre[hr]!=entry[hr]) {
3681         if(pre[hr]>=0) {
3682           if((dirty>>hr)&1) {
3683             if(get_reg(entry,pre[hr])<0) {
3684               if(pre[hr]<64) {
3685                 if(!((u>>pre[hr])&1)) {
3686                   emit_storereg(pre[hr],hr);
3687                   if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
3688                     emit_sarimm(hr,31,hr);
3689                     emit_storereg(pre[hr]|64,hr);
3690                   }
3691                 }
3692               }else{
3693                 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
3694                   emit_storereg(pre[hr],hr);
3695                 }
3696               }
3697             }
3698           }
3699         }
3700       }
3701     }
3702   }
3703   // Move from one register to another (no writeback)
3704   for(hr=0;hr<HOST_REGS;hr++) {
3705     if(hr!=EXCLUDE_REG) {
3706       if(pre[hr]!=entry[hr]) {
3707         if(pre[hr]>=0&&(pre[hr]&63)<TEMPREG) {
3708           int nr;
3709           if((nr=get_reg(entry,pre[hr]))>=0) {
3710             emit_mov(hr,nr);
3711           }
3712         }
3713       }
3714     }
3715   }
3716 }
3717 #endif
3718
3719 // Load the specified registers
3720 // This only loads the registers given as arguments because
3721 // we don't want to load things that will be overwritten
3722 void load_regs(signed char entry[],signed char regmap[],int is32,int rs1,int rs2)
3723 {
3724   int hr;
3725   // Load 32-bit regs
3726   for(hr=0;hr<HOST_REGS;hr++) {
3727     if(hr!=EXCLUDE_REG&&regmap[hr]>=0) {
3728       if(entry[hr]!=regmap[hr]) {
3729         if(regmap[hr]==rs1||regmap[hr]==rs2)
3730         {
3731           if(regmap[hr]==0) {
3732             emit_zeroreg(hr);
3733           }
3734           else
3735           {
3736             emit_loadreg(regmap[hr],hr);
3737           }
3738         }
3739       }
3740     }
3741   }
3742   //Load 64-bit regs
3743   for(hr=0;hr<HOST_REGS;hr++) {
3744     if(hr!=EXCLUDE_REG&&regmap[hr]>=0) {
3745       if(entry[hr]!=regmap[hr]) {
3746         if(regmap[hr]-64==rs1||regmap[hr]-64==rs2)
3747         {
3748           assert(regmap[hr]!=64);
3749           if((is32>>(regmap[hr]&63))&1) {
3750             int lr=get_reg(regmap,regmap[hr]-64);
3751             if(lr>=0)
3752               emit_sarimm(lr,31,hr);
3753             else
3754               emit_loadreg(regmap[hr],hr);
3755           }
3756           else
3757           {
3758             emit_loadreg(regmap[hr],hr);
3759           }
3760         }
3761       }
3762     }
3763   }
3764 }
3765
3766 // Load registers prior to the start of a loop
3767 // so that they are not loaded within the loop
3768 static void loop_preload(signed char pre[],signed char entry[])
3769 {
3770   int hr;
3771   for(hr=0;hr<HOST_REGS;hr++) {
3772     if(hr!=EXCLUDE_REG) {
3773       if(pre[hr]!=entry[hr]) {
3774         if(entry[hr]>=0) {
3775           if(get_reg(pre,entry[hr])<0) {
3776             assem_debug("loop preload:\n");
3777             //printf("loop preload: %d\n",hr);
3778             if(entry[hr]==0) {
3779               emit_zeroreg(hr);
3780             }
3781             else if(entry[hr]<TEMPREG)
3782             {
3783               emit_loadreg(entry[hr],hr);
3784             }
3785             else if(entry[hr]-64<TEMPREG)
3786             {
3787               emit_loadreg(entry[hr],hr);
3788             }
3789           }
3790         }
3791       }
3792     }
3793   }
3794 }
3795
3796 // Generate address for load/store instruction
3797 void address_generation(int i,struct regstat *i_regs,signed char entry[])
3798 {
3799   if(itype[i]==LOAD||itype[i]==LOADLR||itype[i]==STORE||itype[i]==STORELR||itype[i]==C1LS) {
3800     int ra;
3801     int agr=AGEN1+(i&1);
3802     int mgr=MGEN1+(i&1);
3803     if(itype[i]==LOAD) {
3804       ra=get_reg(i_regs->regmap,rt1[i]);
3805       //if(rt1[i]) assert(ra>=0);
3806     }
3807     if(itype[i]==LOADLR) {
3808       ra=get_reg(i_regs->regmap,FTEMP);
3809     }
3810     if(itype[i]==STORE||itype[i]==STORELR) {
3811       ra=get_reg(i_regs->regmap,agr);
3812       if(ra<0) ra=get_reg(i_regs->regmap,-1);
3813     }
3814     if(itype[i]==C1LS) {
3815       if (opcode[i]==0x31||opcode[i]==0x35) // LWC1/LDC1
3816         ra=get_reg(i_regs->regmap,FTEMP);
3817       else { // SWC1/SDC1
3818         ra=get_reg(i_regs->regmap,agr);
3819         if(ra<0) ra=get_reg(i_regs->regmap,-1);
3820       }
3821     }
3822     int rs=get_reg(i_regs->regmap,rs1[i]);
3823     int rm=get_reg(i_regs->regmap,TLREG);
3824     if(ra>=0) {
3825       int offset=imm[i];
3826       int c=(i_regs->wasconst>>rs)&1;
3827       if(rs1[i]==0) {
3828         // Using r0 as a base address
3829         /*if(rm>=0) {
3830           if(!entry||entry[rm]!=mgr) {
3831             generate_map_const(offset,rm);
3832           } // else did it in the previous cycle
3833         }*/
3834         if(!entry||entry[ra]!=agr) {
3835           if (opcode[i]==0x22||opcode[i]==0x26) {
3836             emit_movimm(offset&0xFFFFFFFC,ra); // LWL/LWR
3837           }else if (opcode[i]==0x1a||opcode[i]==0x1b) {
3838             emit_movimm(offset&0xFFFFFFF8,ra); // LDL/LDR
3839           }else{
3840             emit_movimm(offset,ra);
3841           }
3842         } // else did it in the previous cycle
3843       }
3844       else if(rs<0) {
3845         if(!entry||entry[ra]!=rs1[i])
3846           emit_loadreg(rs1[i],ra);
3847         //if(!entry||entry[ra]!=rs1[i])
3848         //  printf("poor load scheduling!\n");
3849       }
3850       else if(c) {
3851         if(rm>=0) {
3852           if(!entry||entry[rm]!=mgr) {
3853             if(itype[i]==STORE||itype[i]==STORELR||opcode[i]==0x39||opcode[i]==0x3D) {
3854               // Stores to memory go thru the mapper to detect self-modifying
3855               // code, loads don't.
3856               if((unsigned int)(constmap[i][rs]+offset)>=0xC0000000 ||
3857                  (unsigned int)(constmap[i][rs]+offset)<0x80800000 )
3858                 generate_map_const(constmap[i][rs]+offset,rm);
3859             }else{
3860               if((signed int)(constmap[i][rs]+offset)>=(signed int)0xC0000000)
3861                 generate_map_const(constmap[i][rs]+offset,rm);
3862             }
3863           }
3864         }
3865         if(rs1[i]!=rt1[i]||itype[i]!=LOAD) {
3866           if(!entry||entry[ra]!=agr) {
3867             if (opcode[i]==0x22||opcode[i]==0x26) {
3868               emit_movimm((constmap[i][rs]+offset)&0xFFFFFFFC,ra); // LWL/LWR
3869             }else if (opcode[i]==0x1a||opcode[i]==0x1b) {
3870               emit_movimm((constmap[i][rs]+offset)&0xFFFFFFF8,ra); // LDL/LDR
3871             }else{
3872               #ifdef HOST_IMM_ADDR32
3873               if((itype[i]!=LOAD&&opcode[i]!=0x31&&opcode[i]!=0x35) ||
3874                  (using_tlb&&((signed int)constmap[i][rs]+offset)>=(signed int)0xC0000000))
3875               #endif
3876               emit_movimm(constmap[i][rs]+offset,ra);
3877             }
3878           } // else did it in the previous cycle
3879         } // else load_consts already did it
3880       }
3881       if(offset&&!c&&rs1[i]) {
3882         if(rs>=0) {
3883           emit_addimm(rs,offset,ra);
3884         }else{
3885           emit_addimm(ra,offset,ra);
3886         }
3887       }
3888     }
3889   }
3890   // Preload constants for next instruction
3891   if(itype[i+1]==LOAD||itype[i+1]==LOADLR||itype[i+1]==STORE||itype[i+1]==STORELR||itype[i+1]==C1LS) {
3892     int agr,ra;
3893     #ifndef HOST_IMM_ADDR32
3894     // Mapper entry
3895     agr=MGEN1+((i+1)&1);
3896     ra=get_reg(i_regs->regmap,agr);
3897     if(ra>=0) {
3898       int rs=get_reg(regs[i+1].regmap,rs1[i+1]);
3899       int offset=imm[i+1];
3900       int c=(regs[i+1].wasconst>>rs)&1;
3901       if(c) {
3902         if(itype[i+1]==STORE||itype[i+1]==STORELR||opcode[i+1]==0x39||opcode[i+1]==0x3D) {
3903           // Stores to memory go thru the mapper to detect self-modifying
3904           // code, loads don't.
3905           if((unsigned int)(constmap[i+1][rs]+offset)>=0xC0000000 ||
3906              (unsigned int)(constmap[i+1][rs]+offset)<0x80800000 )
3907             generate_map_const(constmap[i+1][rs]+offset,ra);
3908         }else{
3909           if((signed int)(constmap[i+1][rs]+offset)>=(signed int)0xC0000000)
3910             generate_map_const(constmap[i+1][rs]+offset,ra);
3911         }
3912       }
3913       /*else if(rs1[i]==0) {
3914         generate_map_const(offset,ra);
3915       }*/
3916<