allow to disable TLB
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / new_dynarec.c
1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2  *   Mupen64plus - new_dynarec.c                                           *
3  *   Copyright (C) 2009-2010 Ari64                                         *
4  *                                                                         *
5  *   This program is free software; you can redistribute it and/or modify  *
6  *   it under the terms of the GNU General Public License as published by  *
7  *   the Free Software Foundation; either version 2 of the License, or     *
8  *   (at your option) any later version.                                   *
9  *                                                                         *
10  *   This program is distributed in the hope that it will be useful,       *
11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
13  *   GNU General Public License for more details.                          *
14  *                                                                         *
15  *   You should have received a copy of the GNU General Public License     *
16  *   along with this program; if not, write to the                         *
17  *   Free Software Foundation, Inc.,                                       *
18  *   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.          *
19  * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21 #include <stdlib.h>
22 #include <stdint.h> //include for uint64_t
23 #include <assert.h>
24
25 #include "../recomp.h"
26 #include "../recomph.h" //include for function prototypes
27 #include "../macros.h"
28 #include "../r4300.h"
29 #include "../ops.h"
30 #include "../interupt.h"
31
32 #include "../../memory/memory.h"
33
34 #include <sys/mman.h>
35
36 #ifdef __i386__
37 #include "assem_x86.h"
38 #endif
39 #ifdef __x86_64__
40 #include "assem_x64.h"
41 #endif
42 #ifdef __arm__
43 #include "assem_arm.h"
44 #endif
45
46 #define MAXBLOCK 4096
47 #define MAX_OUTPUT_BLOCK_SIZE 262144
48 #define CLOCK_DIVIDER 2
49
50 struct regstat
51 {
52   signed char regmap_entry[HOST_REGS];
53   signed char regmap[HOST_REGS];
54   uint64_t was32;
55   uint64_t is32;
56   uint64_t wasdirty;
57   uint64_t dirty;
58   uint64_t u;
59   uint64_t uu;
60   u_int wasconst;
61   u_int isconst;
62   uint64_t constmap[HOST_REGS];
63 };
64
65 struct ll_entry
66 {
67   u_int vaddr;
68   u_int reg32;
69   void *addr;
70   struct ll_entry *next;
71 };
72
73   u_int start;
74   u_int *source;
75   u_int pagelimit;
76   char insn[MAXBLOCK][10];
77   u_char itype[MAXBLOCK];
78   u_char opcode[MAXBLOCK];
79   u_char opcode2[MAXBLOCK];
80   u_char bt[MAXBLOCK];
81   u_char rs1[MAXBLOCK];
82   u_char rs2[MAXBLOCK];
83   u_char rt1[MAXBLOCK];
84   u_char rt2[MAXBLOCK];
85   u_char us1[MAXBLOCK];
86   u_char us2[MAXBLOCK];
87   u_char dep1[MAXBLOCK];
88   u_char dep2[MAXBLOCK];
89   u_char lt1[MAXBLOCK];
90   int imm[MAXBLOCK];
91   u_int ba[MAXBLOCK];
92   char likely[MAXBLOCK];
93   char is_ds[MAXBLOCK];
94   uint64_t unneeded_reg[MAXBLOCK];
95   uint64_t unneeded_reg_upper[MAXBLOCK];
96   uint64_t branch_unneeded_reg[MAXBLOCK];
97   uint64_t branch_unneeded_reg_upper[MAXBLOCK];
98   uint64_t p32[MAXBLOCK];
99   uint64_t pr32[MAXBLOCK];
100   signed char regmap_pre[MAXBLOCK][HOST_REGS];
101   signed char regmap[MAXBLOCK][HOST_REGS];
102   signed char regmap_entry[MAXBLOCK][HOST_REGS];
103   uint64_t constmap[MAXBLOCK][HOST_REGS];
104   uint64_t known_value[HOST_REGS];
105   u_int known_reg;
106   struct regstat regs[MAXBLOCK];
107   struct regstat branch_regs[MAXBLOCK];
108   u_int needed_reg[MAXBLOCK];
109   uint64_t requires_32bit[MAXBLOCK];
110   u_int wont_dirty[MAXBLOCK];
111   u_int will_dirty[MAXBLOCK];
112   int ccadj[MAXBLOCK];
113   int slen;
114   u_int instr_addr[MAXBLOCK];
115   u_int link_addr[MAXBLOCK][3];
116   int linkcount;
117   u_int stubs[MAXBLOCK*3][8];
118   int stubcount;
119   u_int literals[1024][2];
120   int literalcount;
121   int is_delayslot;
122   int cop1_usable;
123   u_char *out;
124   struct ll_entry *jump_in[4096];
125   struct ll_entry *jump_out[4096];
126   struct ll_entry *jump_dirty[4096];
127   u_int hash_table[65536][4]  __attribute__((aligned(16)));
128   char shadow[1048576]  __attribute__((aligned(16)));
129   void *copy;
130   int expirep;
131   u_int using_tlb;
132   u_int stop_after_jal;
133   extern u_char restore_candidate[512];
134   extern int cycle_count;
135
136   /* registers that may be allocated */
137   /* 1-31 gpr */
138 #define HIREG 32 // hi
139 #define LOREG 33 // lo
140 #define FSREG 34 // FPU status (FCSR)
141 #define CSREG 35 // Coprocessor status
142 #define CCREG 36 // Cycle count
143 #define INVCP 37 // Pointer to invalid_code
144 #define TEMPREG 38
145 #define FTEMP 38 // FPU temporary register
146 #define PTEMP 39 // Prefetch temporary register
147 #define TLREG 40 // TLB mapping offset
148 #define RHASH 41 // Return address hash
149 #define RHTBL 42 // Return address hash table address
150 #define RTEMP 43 // JR/JALR address register
151 #define MAXREG 43
152 #define AGEN1 44 // Address generation temporary register
153 #define AGEN2 45 // Address generation temporary register
154 #define MGEN1 46 // Maptable address generation temporary register
155 #define MGEN2 47 // Maptable address generation temporary register
156 #define BTREG 48 // Branch target temporary register
157
158   /* instruction types */
159 #define NOP 0     // No operation
160 #define LOAD 1    // Load
161 #define STORE 2   // Store
162 #define LOADLR 3  // Unaligned load
163 #define STORELR 4 // Unaligned store
164 #define MOV 5     // Move 
165 #define ALU 6     // Arithmetic/logic
166 #define MULTDIV 7 // Multiply/divide
167 #define SHIFT 8   // Shift by register
168 #define SHIFTIMM 9// Shift by immediate
169 #define IMM16 10  // 16-bit immediate
170 #define RJUMP 11  // Unconditional jump to register
171 #define UJUMP 12  // Unconditional jump
172 #define CJUMP 13  // Conditional branch (BEQ/BNE/BGTZ/BLEZ)
173 #define SJUMP 14  // Conditional branch (regimm format)
174 #define COP0 15   // Coprocessor 0
175 #define COP1 16   // Coprocessor 1
176 #define C1LS 17   // Coprocessor 1 load/store
177 #define FJUMP 18  // Conditional branch (floating point)
178 #define FLOAT 19  // Floating point unit
179 #define FCONV 20  // Convert integer to float
180 #define FCOMP 21  // Floating point compare (sets FSREG)
181 #define SYSCALL 22// SYSCALL
182 #define OTHER 23  // Other
183 #define SPAN 24   // Branch/delay slot spans 2 pages
184 #define NI 25     // Not implemented
185
186   /* stubs */
187 #define CC_STUB 1
188 #define FP_STUB 2
189 #define LOADB_STUB 3
190 #define LOADH_STUB 4
191 #define LOADW_STUB 5
192 #define LOADD_STUB 6
193 #define LOADBU_STUB 7
194 #define LOADHU_STUB 8
195 #define STOREB_STUB 9
196 #define STOREH_STUB 10
197 #define STOREW_STUB 11
198 #define STORED_STUB 12
199 #define STORELR_STUB 13
200 #define INVCODE_STUB 14
201
202   /* branch codes */
203 #define TAKEN 1
204 #define NOTTAKEN 2
205 #define NULLDS 3
206
207 // asm linkage
208 int new_recompile_block(int addr);
209 void *get_addr_ht(u_int vaddr);
210 void invalidate_block(u_int block);
211 void invalidate_addr(u_int addr);
212 void remove_hash(int vaddr);
213 void jump_vaddr();
214 void dyna_linker();
215 void dyna_linker_ds();
216 void verify_code();
217 void verify_code_vm();
218 void verify_code_ds();
219 void cc_interrupt();
220 void fp_exception();
221 void fp_exception_ds();
222 void jump_syscall();
223 void jump_eret();
224
225 // TLB
226 void TLBWI_new();
227 void TLBWR_new();
228 void read_nomem_new();
229 void read_nomemb_new();
230 void read_nomemh_new();
231 void read_nomemd_new();
232 void write_nomem_new();
233 void write_nomemb_new();
234 void write_nomemh_new();
235 void write_nomemd_new();
236 void write_rdram_new();
237 void write_rdramb_new();
238 void write_rdramh_new();
239 void write_rdramd_new();
240 extern u_int memory_map[1048576];
241
242 // Needed by assembler
243 void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32);
244 void wb_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty);
245 void wb_needed_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr);
246 void load_all_regs(signed char i_regmap[]);
247 void load_needed_regs(signed char i_regmap[],signed char next_regmap[]);
248 void load_regs_entry(int t);
249 void load_all_consts(signed char regmap[],int is32,u_int dirty,int i);
250
251 int tracedebug=0;
252
253 //#define DEBUG_CYCLE_COUNT 1
254
255 void nullf() {}
256 //#define assem_debug printf
257 //#define inv_debug printf
258 #define assem_debug nullf
259 #define inv_debug nullf
260
261 static void tlb_hacks()
262 {
263 #ifndef DISABLE_TLB
264   // Goldeneye hack
265   if (strncmp((char *) ROM_HEADER->nom, "GOLDENEYE",9) == 0)
266   {
267     u_int addr;
268     int n;
269     switch (ROM_HEADER->Country_code&0xFF) 
270     {
271       case 0x45: // U
272         addr=0x34b30;
273         break;                   
274       case 0x4A: // J 
275         addr=0x34b70;    
276         break;    
277       case 0x50: // E 
278         addr=0x329f0;
279         break;                        
280       default: 
281         // Unknown country code
282         addr=0;
283         break;
284     }
285     u_int rom_addr=(u_int)rom;
286     #ifdef ROM_COPY
287     // Since memory_map is 32-bit, on 64-bit systems the rom needs to be
288     // in the lower 4G of memory to use this hack.  Copy it if necessary.
289     if((void *)rom>(void *)0xffffffff) {
290       munmap(ROM_COPY, 67108864);
291       if(mmap(ROM_COPY, 12582912,
292               PROT_READ | PROT_WRITE,
293               MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
294               -1, 0) <= 0) {printf("mmap() failed\n");}
295       memcpy(ROM_COPY,rom,12582912);
296       rom_addr=(u_int)ROM_COPY;
297     }
298     #endif
299     if(addr) {
300       for(n=0x7F000;n<0x80000;n++) {
301         memory_map[n]=(((u_int)(rom_addr+addr-0x7F000000))>>2)|0x40000000;
302       }
303     }
304   }
305 #endif
306 }
307
308 static u_int get_page(u_int vaddr)
309 {
310   u_int page=(vaddr^0x80000000)>>12;
311 #ifndef DISABLE_TLB
312   if(page>262143&&tlb_LUT_r[vaddr>>12]) page=(tlb_LUT_r[vaddr>>12]^0x80000000)>>12;
313 #endif
314   if(page>2048) page=2048+(page&2047);
315   return page;
316 }
317
318 static u_int get_vpage(u_int vaddr)
319 {
320   u_int vpage=(vaddr^0x80000000)>>12;
321 #ifndef DISABLE_TLB
322   if(vpage>262143&&tlb_LUT_r[vaddr>>12]) vpage&=2047; // jump_dirty uses a hash of the virtual address instead
323 #endif
324   if(vpage>2048) vpage=2048+(vpage&2047);
325   return vpage;
326 }
327
328 // Get address from virtual address
329 // This is called from the recompiled JR/JALR instructions
330 void *get_addr(u_int vaddr)
331 {
332   u_int page=get_page(vaddr);
333   u_int vpage=get_vpage(vaddr);
334   struct ll_entry *head;
335   //printf("TRACE: count=%d next=%d (get_addr %x,page %d)\n",Count,next_interupt,vaddr,page);
336   head=jump_in[page];
337   while(head!=NULL) {
338     if(head->vaddr==vaddr&&head->reg32==0) {
339   //printf("TRACE: count=%d next=%d (get_addr match %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr);
340       int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
341       ht_bin[3]=ht_bin[1];
342       ht_bin[2]=ht_bin[0];
343       ht_bin[1]=(int)head->addr;
344       ht_bin[0]=vaddr;
345       return head->addr;
346     }
347     head=head->next;
348   }
349   head=jump_dirty[vpage];
350   while(head!=NULL) {
351     if(head->vaddr==vaddr&&head->reg32==0) {
352       //printf("TRACE: count=%d next=%d (get_addr match dirty %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr);
353       // Don't restore blocks which are about to expire from the cache
354       if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
355       if(verify_dirty(head->addr)) {
356         //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]);
357         invalid_code[vaddr>>12]=0;
358         memory_map[vaddr>>12]|=0x40000000;
359         if(vpage<2048) {
360 #ifndef DISABLE_TLB
361           if(tlb_LUT_r[vaddr>>12]) {
362             invalid_code[tlb_LUT_r[vaddr>>12]>>12]=0;
363             memory_map[tlb_LUT_r[vaddr>>12]>>12]|=0x40000000;
364           }
365 #endif
366           restore_candidate[vpage>>3]|=1<<(vpage&7);
367         }
368         else restore_candidate[page>>3]|=1<<(page&7);
369         int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
370         if(ht_bin[0]==vaddr) {
371           ht_bin[1]=(int)head->addr; // Replace existing entry
372         }
373         else
374         {
375           ht_bin[3]=ht_bin[1];
376           ht_bin[2]=ht_bin[0];
377           ht_bin[1]=(int)head->addr;
378           ht_bin[0]=vaddr;
379         }
380         return head->addr;
381       }
382     }
383     head=head->next;
384   }
385   //printf("TRACE: count=%d next=%d (get_addr no-match %x)\n",Count,next_interupt,vaddr);
386   int r=new_recompile_block(vaddr);
387   if(r==0) return get_addr(vaddr);
388   // Execute in unmapped page, generate pagefault execption
389   Status|=2;
390   Cause=(vaddr<<31)|0x8;
391   EPC=(vaddr&1)?vaddr-5:vaddr;
392   BadVAddr=(vaddr&~1);
393   Context=(Context&0xFF80000F)|((BadVAddr>>9)&0x007FFFF0);
394   EntryHi=BadVAddr&0xFFFFE000;
395   return get_addr_ht(0x80000000);
396 }
397 // Look up address in hash table first
398 void *get_addr_ht(u_int vaddr)
399 {
400   //printf("TRACE: count=%d next=%d (get_addr_ht %x)\n",Count,next_interupt,vaddr);
401   int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
402   if(ht_bin[0]==vaddr) return (void *)ht_bin[1];
403   if(ht_bin[2]==vaddr) return (void *)ht_bin[3];
404   return get_addr(vaddr);
405 }
406
407 void *get_addr_32(u_int vaddr,u_int flags)
408 {
409   //printf("TRACE: count=%d next=%d (get_addr_32 %x,flags %x)\n",Count,next_interupt,vaddr,flags);
410   int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
411   if(ht_bin[0]==vaddr) return (void *)ht_bin[1];
412   if(ht_bin[2]==vaddr) return (void *)ht_bin[3];
413   u_int page=get_page(vaddr);
414   u_int vpage=get_vpage(vaddr);
415   struct ll_entry *head;
416   head=jump_in[page];
417   while(head!=NULL) {
418     if(head->vaddr==vaddr&&(head->reg32&flags)==0) {
419       //printf("TRACE: count=%d next=%d (get_addr_32 match %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr);
420       if(head->reg32==0) {
421         int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
422         if(ht_bin[0]==-1) {
423           ht_bin[1]=(int)head->addr;
424           ht_bin[0]=vaddr;
425         }else if(ht_bin[2]==-1) {
426           ht_bin[3]=(int)head->addr;
427           ht_bin[2]=vaddr;
428         }
429         //ht_bin[3]=ht_bin[1];
430         //ht_bin[2]=ht_bin[0];
431         //ht_bin[1]=(int)head->addr;
432         //ht_bin[0]=vaddr;
433       }
434       return head->addr;
435     }
436     head=head->next;
437   }
438   head=jump_dirty[vpage];
439   while(head!=NULL) {
440     if(head->vaddr==vaddr&&(head->reg32&flags)==0) {
441       //printf("TRACE: count=%d next=%d (get_addr_32 match dirty %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr);
442       // Don't restore blocks which are about to expire from the cache
443       if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
444       if(verify_dirty(head->addr)) {
445         //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]);
446         invalid_code[vaddr>>12]=0;
447         memory_map[vaddr>>12]|=0x40000000;
448         if(vpage<2048) {
449 #ifndef DISABLE_TLB
450           if(tlb_LUT_r[vaddr>>12]) {
451             invalid_code[tlb_LUT_r[vaddr>>12]>>12]=0;
452             memory_map[tlb_LUT_r[vaddr>>12]>>12]|=0x40000000;
453           }
454 #endif
455           restore_candidate[vpage>>3]|=1<<(vpage&7);
456         }
457         else restore_candidate[page>>3]|=1<<(page&7);
458         if(head->reg32==0) {
459           int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
460           if(ht_bin[0]==-1) {
461             ht_bin[1]=(int)head->addr;
462             ht_bin[0]=vaddr;
463           }else if(ht_bin[2]==-1) {
464             ht_bin[3]=(int)head->addr;
465             ht_bin[2]=vaddr;
466           }
467           //ht_bin[3]=ht_bin[1];
468           //ht_bin[2]=ht_bin[0];
469           //ht_bin[1]=(int)head->addr;
470           //ht_bin[0]=vaddr;
471         }
472         return head->addr;
473       }
474     }
475     head=head->next;
476   }
477   //printf("TRACE: count=%d next=%d (get_addr_32 no-match %x,flags %x)\n",Count,next_interupt,vaddr,flags);
478   int r=new_recompile_block(vaddr);
479   if(r==0) return get_addr(vaddr);
480   // Execute in unmapped page, generate pagefault execption
481   Status|=2;
482   Cause=(vaddr<<31)|0x8;
483   EPC=(vaddr&1)?vaddr-5:vaddr;
484   BadVAddr=(vaddr&~1);
485   Context=(Context&0xFF80000F)|((BadVAddr>>9)&0x007FFFF0);
486   EntryHi=BadVAddr&0xFFFFE000;
487   return get_addr_ht(0x80000000);
488 }
489
490 void clear_all_regs(signed char regmap[])
491 {
492   int hr;
493   for (hr=0;hr<HOST_REGS;hr++) regmap[hr]=-1;
494 }
495
496 signed char get_reg(signed char regmap[],int r)
497 {
498   int hr;
499   for (hr=0;hr<HOST_REGS;hr++) if(hr!=EXCLUDE_REG&&regmap[hr]==r) return hr;
500   return -1;
501 }
502
503 // Find a register that is available for two consecutive cycles
504 signed char get_reg2(signed char regmap1[],signed char regmap2[],int r)
505 {
506   int hr;
507   for (hr=0;hr<HOST_REGS;hr++) if(hr!=EXCLUDE_REG&&regmap1[hr]==r&&regmap2[hr]==r) return hr;
508   return -1;
509 }
510
511 int count_free_regs(signed char regmap[])
512 {
513   int count=0;
514   int hr;
515   for(hr=0;hr<HOST_REGS;hr++)
516   {
517     if(hr!=EXCLUDE_REG) {
518       if(regmap[hr]<0) count++;
519     }
520   }
521   return count;
522 }
523
524 void dirty_reg(struct regstat *cur,signed char reg)
525 {
526   int hr;
527   if(!reg) return;
528   for (hr=0;hr<HOST_REGS;hr++) {
529     if((cur->regmap[hr]&63)==reg) {
530       cur->dirty|=1<<hr;
531     }
532   }
533 }
534
535 // If we dirty the lower half of a 64 bit register which is now being
536 // sign-extended, we need to dump the upper half.
537 // Note: Do this only after completion of the instruction, because
538 // some instructions may need to read the full 64-bit value even if
539 // overwriting it (eg SLTI, DSRA32).
540 static void flush_dirty_uppers(struct regstat *cur)
541 {
542   int hr,reg;
543   for (hr=0;hr<HOST_REGS;hr++) {
544     if((cur->dirty>>hr)&1) {
545       reg=cur->regmap[hr];
546       if(reg>=64) 
547         if((cur->is32>>(reg&63))&1) cur->regmap[hr]=-1;
548     }
549   }
550 }
551
552 void set_const(struct regstat *cur,signed char reg,uint64_t value)
553 {
554   int hr;
555   if(!reg) return;
556   for (hr=0;hr<HOST_REGS;hr++) {
557     if(cur->regmap[hr]==reg) {
558       cur->isconst|=1<<hr;
559       cur->constmap[hr]=value;
560     }
561     else if((cur->regmap[hr]^64)==reg) {
562       cur->isconst|=1<<hr;
563       cur->constmap[hr]=value>>32;
564     }
565   }
566 }
567
568 void clear_const(struct regstat *cur,signed char reg)
569 {
570   int hr;
571   if(!reg) return;
572   for (hr=0;hr<HOST_REGS;hr++) {
573     if((cur->regmap[hr]&63)==reg) {
574       cur->isconst&=~(1<<hr);
575     }
576   }
577 }
578
579 int is_const(struct regstat *cur,signed char reg)
580 {
581   int hr;
582   if(!reg) return 1;
583   for (hr=0;hr<HOST_REGS;hr++) {
584     if((cur->regmap[hr]&63)==reg) {
585       return (cur->isconst>>hr)&1;
586     }
587   }
588   return 0;
589 }
590 uint64_t get_const(struct regstat *cur,signed char reg)
591 {
592   int hr;
593   if(!reg) return 0;
594   for (hr=0;hr<HOST_REGS;hr++) {
595     if(cur->regmap[hr]==reg) {
596       return cur->constmap[hr];
597     }
598   }
599   printf("Unknown constant in r%d\n",reg);
600   exit(1);
601 }
602
603 // Least soon needed registers
604 // Look at the next ten instructions and see which registers
605 // will be used.  Try not to reallocate these.
606 void lsn(u_char hsn[], int i, int *preferred_reg)
607 {
608   int j;
609   int b=-1;
610   for(j=0;j<9;j++)
611   {
612     if(i+j>=slen) {
613       j=slen-i-1;
614       break;
615     }
616     if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
617     {
618       // Don't go past an unconditonal jump
619       j++;
620       break;
621     }
622   }
623   for(;j>=0;j--)
624   {
625     if(rs1[i+j]) hsn[rs1[i+j]]=j;
626     if(rs2[i+j]) hsn[rs2[i+j]]=j;
627     if(rt1[i+j]) hsn[rt1[i+j]]=j;
628     if(rt2[i+j]) hsn[rt2[i+j]]=j;
629     if(itype[i+j]==STORE || itype[i+j]==STORELR) {
630       // Stores can allocate zero
631       hsn[rs1[i+j]]=j;
632       hsn[rs2[i+j]]=j;
633     }
634     // On some architectures stores need invc_ptr
635     #if defined(HOST_IMM8)
636     if(itype[i+j]==STORE || itype[i+j]==STORELR || (opcode[i+j]&0x3b)==0x39) {
637       hsn[INVCP]=j;
638     }
639     #endif
640     if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP||itype[i+j]==FJUMP))
641     {
642       hsn[CCREG]=j;
643       b=j;
644     }
645   }
646   if(b>=0)
647   {
648     if(ba[i+b]>=start && ba[i+b]<(start+slen*4))
649     {
650       // Follow first branch
651       int t=(ba[i+b]-start)>>2;
652       j=7-b;if(t+j>=slen) j=slen-t-1;
653       for(;j>=0;j--)
654       {
655         if(rs1[t+j]) if(hsn[rs1[t+j]]>j+b+2) hsn[rs1[t+j]]=j+b+2;
656         if(rs2[t+j]) if(hsn[rs2[t+j]]>j+b+2) hsn[rs2[t+j]]=j+b+2;
657         //if(rt1[t+j]) if(hsn[rt1[t+j]]>j+b+2) hsn[rt1[t+j]]=j+b+2;
658         //if(rt2[t+j]) if(hsn[rt2[t+j]]>j+b+2) hsn[rt2[t+j]]=j+b+2;
659       }
660     }
661     // TODO: preferred register based on backward branch
662   }
663   // Delay slot should preferably not overwrite branch conditions or cycle count
664   if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP)) {
665     if(rs1[i-1]) if(hsn[rs1[i-1]]>1) hsn[rs1[i-1]]=1;
666     if(rs2[i-1]) if(hsn[rs2[i-1]]>1) hsn[rs2[i-1]]=1;
667     hsn[CCREG]=1;
668     // ...or hash tables
669     hsn[RHASH]=1;
670     hsn[RHTBL]=1;
671   }
672   // Coprocessor load/store needs FTEMP, even if not declared
673   if(itype[i]==C1LS) {
674     hsn[FTEMP]=0;
675   }
676   // Load L/R also uses FTEMP as a temporary register
677   if(itype[i]==LOADLR) {
678     hsn[FTEMP]=0;
679   }
680   // Also 64-bit SDL/SDR
681   if(opcode[i]==0x2c||opcode[i]==0x2d) {
682     hsn[FTEMP]=0;
683   }
684   // Don't remove the TLB registers either
685   if(itype[i]==LOAD || itype[i]==LOADLR || itype[i]==STORE || itype[i]==STORELR || itype[i]==C1LS ) {
686     hsn[TLREG]=0;
687   }
688   // Don't remove the miniht registers
689   if(itype[i]==UJUMP||itype[i]==RJUMP)
690   {
691     hsn[RHASH]=0;
692     hsn[RHTBL]=0;
693   }
694 }
695
696 // We only want to allocate registers if we're going to use them again soon
697 int needed_again(int r, int i)
698 {
699   int j;
700   int b=-1;
701   int rn=10;
702   int hr;
703   u_char hsn[MAXREG+1];
704   int preferred_reg;
705   
706   memset(hsn,10,sizeof(hsn));
707   lsn(hsn,i,&preferred_reg);
708   
709   if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000))
710   {
711     if(ba[i-1]<start || ba[i-1]>start+slen*4-4)
712       return 0; // Don't need any registers if exiting the block
713   }
714   for(j=0;j<9;j++)
715   {
716     if(i+j>=slen) {
717       j=slen-i-1;
718       break;
719     }
720     if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
721     {
722       // Don't go past an unconditonal jump
723       j++;
724       break;
725     }
726     if(itype[i+j]==SYSCALL||((source[i+j]&0xfc00003f)==0x0d))
727     {
728       break;
729     }
730   }
731   for(;j>=1;j--)
732   {
733     if(rs1[i+j]==r) rn=j;
734     if(rs2[i+j]==r) rn=j;
735     if((unneeded_reg[i+j]>>r)&1) rn=10;
736     if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP||itype[i+j]==FJUMP))
737     {
738       b=j;
739     }
740   }
741   /*
742   if(b>=0)
743   {
744     if(ba[i+b]>=start && ba[i+b]<(start+slen*4))
745     {
746       // Follow first branch
747       int o=rn;
748       int t=(ba[i+b]-start)>>2;
749       j=7-b;if(t+j>=slen) j=slen-t-1;
750       for(;j>=0;j--)
751       {
752         if(!((unneeded_reg[t+j]>>r)&1)) {
753           if(rs1[t+j]==r) if(rn>j+b+2) rn=j+b+2;
754           if(rs2[t+j]==r) if(rn>j+b+2) rn=j+b+2;
755         }
756         else rn=o;
757       }
758     }
759   }*/
760   for(hr=0;hr<HOST_REGS;hr++) {
761     if(hr!=EXCLUDE_REG) {
762       if(rn<hsn[hr]) return 1;
763     }
764   }
765   return 0;
766 }
767
768 // Try to match register allocations at the end of a loop with those
769 // at the beginning
770 int loop_reg(int i, int r, int hr)
771 {
772   int j,k;
773   for(j=0;j<9;j++)
774   {
775     if(i+j>=slen) {
776       j=slen-i-1;
777       break;
778     }
779     if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
780     {
781       // Don't go past an unconditonal jump
782       j++;
783       break;
784     }
785   }
786   k=0;
787   if(i>0){
788     if(itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP)
789       k--;
790   }
791   for(;k<j;k++)
792   {
793     if(r<64&&((unneeded_reg[i+k]>>r)&1)) return hr;
794     if(r>64&&((unneeded_reg_upper[i+k]>>r)&1)) return hr;
795     if(i+k>=0&&(itype[i+k]==UJUMP||itype[i+k]==CJUMP||itype[i+k]==SJUMP||itype[i+k]==FJUMP))
796     {
797       if(ba[i+k]>=start && ba[i+k]<(start+i*4))
798       {
799         int t=(ba[i+k]-start)>>2;
800         int reg=get_reg(regs[t].regmap_entry,r);
801         if(reg>=0) return reg;
802         //reg=get_reg(regs[t+1].regmap_entry,r);
803         //if(reg>=0) return reg;
804       }
805     }
806   }
807   return hr;
808 }
809
810
811 // Allocate every register, preserving source/target regs
812 void alloc_all(struct regstat *cur,int i)
813 {
814   int hr;
815   
816   for(hr=0;hr<HOST_REGS;hr++) {
817     if(hr!=EXCLUDE_REG) {
818       if(((cur->regmap[hr]&63)!=rs1[i])&&((cur->regmap[hr]&63)!=rs2[i])&&
819          ((cur->regmap[hr]&63)!=rt1[i])&&((cur->regmap[hr]&63)!=rt2[i]))
820       {
821         cur->regmap[hr]=-1;
822         cur->dirty&=~(1<<hr);
823       }
824       // Don't need zeros
825       if((cur->regmap[hr]&63)==0)
826       {
827         cur->regmap[hr]=-1;
828         cur->dirty&=~(1<<hr);
829       }
830     }
831   }
832 }
833
834
835 void div64(int64_t dividend,int64_t divisor)
836 {
837   lo=dividend/divisor;
838   hi=dividend%divisor;
839   //printf("TRACE: ddiv %8x%8x %8x%8x\n" ,(int)reg[HIREG],(int)(reg[HIREG]>>32)
840   //                                     ,(int)reg[LOREG],(int)(reg[LOREG]>>32));
841 }
842 void divu64(uint64_t dividend,uint64_t divisor)
843 {
844   lo=dividend/divisor;
845   hi=dividend%divisor;
846   //printf("TRACE: ddivu %8x%8x %8x%8x\n",(int)reg[HIREG],(int)(reg[HIREG]>>32)
847   //                                     ,(int)reg[LOREG],(int)(reg[LOREG]>>32));
848 }
849
850 void mult64(uint64_t m1,uint64_t m2)
851 {
852    unsigned long long int op1, op2, op3, op4;
853    unsigned long long int result1, result2, result3, result4;
854    unsigned long long int temp1, temp2, temp3, temp4;
855    int sign = 0;
856    
857    if (m1 < 0)
858      {
859     op2 = -m1;
860     sign = 1 - sign;
861      }
862    else op2 = m1;
863    if (m2 < 0)
864      {
865     op4 = -m2;
866     sign = 1 - sign;
867      }
868    else op4 = m2;
869    
870    op1 = op2 & 0xFFFFFFFF;
871    op2 = (op2 >> 32) & 0xFFFFFFFF;
872    op3 = op4 & 0xFFFFFFFF;
873    op4 = (op4 >> 32) & 0xFFFFFFFF;
874    
875    temp1 = op1 * op3;
876    temp2 = (temp1 >> 32) + op1 * op4;
877    temp3 = op2 * op3;
878    temp4 = (temp3 >> 32) + op2 * op4;
879    
880    result1 = temp1 & 0xFFFFFFFF;
881    result2 = temp2 + (temp3 & 0xFFFFFFFF);
882    result3 = (result2 >> 32) + temp4;
883    result4 = (result3 >> 32);
884    
885    lo = result1 | (result2 << 32);
886    hi = (result3 & 0xFFFFFFFF) | (result4 << 32);
887    if (sign)
888      {
889     hi = ~hi;
890     if (!lo) hi++;
891     else lo = ~lo + 1;
892      }
893 }
894
895 void multu64(uint64_t m1,uint64_t m2)
896 {
897    unsigned long long int op1, op2, op3, op4;
898    unsigned long long int result1, result2, result3, result4;
899    unsigned long long int temp1, temp2, temp3, temp4;
900    
901    op1 = m1 & 0xFFFFFFFF;
902    op2 = (m1 >> 32) & 0xFFFFFFFF;
903    op3 = m2 & 0xFFFFFFFF;
904    op4 = (m2 >> 32) & 0xFFFFFFFF;
905    
906    temp1 = op1 * op3;
907    temp2 = (temp1 >> 32) + op1 * op4;
908    temp3 = op2 * op3;
909    temp4 = (temp3 >> 32) + op2 * op4;
910    
911    result1 = temp1 & 0xFFFFFFFF;
912    result2 = temp2 + (temp3 & 0xFFFFFFFF);
913    result3 = (result2 >> 32) + temp4;
914    result4 = (result3 >> 32);
915    
916    lo = result1 | (result2 << 32);
917    hi = (result3 & 0xFFFFFFFF) | (result4 << 32);
918    
919   //printf("TRACE: dmultu %8x%8x %8x%8x\n",(int)reg[HIREG],(int)(reg[HIREG]>>32)
920   //                                      ,(int)reg[LOREG],(int)(reg[LOREG]>>32));
921 }
922
923 uint64_t ldl_merge(uint64_t original,uint64_t loaded,u_int bits)
924 {
925   if(bits) {
926     original<<=64-bits;
927     original>>=64-bits;
928     loaded<<=bits;
929     original|=loaded;
930   }
931   else original=loaded;
932   return original;
933 }
934 uint64_t ldr_merge(uint64_t original,uint64_t loaded,u_int bits)
935 {
936   if(bits^56) {
937     original>>=64-(bits^56);
938     original<<=64-(bits^56);
939     loaded>>=bits^56;
940     original|=loaded;
941   }
942   else original=loaded;
943   return original;
944 }
945
946 #ifdef __i386__
947 #include "assem_x86.c"
948 #endif
949 #ifdef __x86_64__
950 #include "assem_x64.c"
951 #endif
952 #ifdef __arm__
953 #include "assem_arm.c"
954 #endif
955
956 // Add virtual address mapping to linked list
957 void ll_add(struct ll_entry **head,int vaddr,void *addr)
958 {
959   struct ll_entry *new_entry;
960   new_entry=malloc(sizeof(struct ll_entry));
961   assert(new_entry!=NULL);
962   new_entry->vaddr=vaddr;
963   new_entry->reg32=0;
964   new_entry->addr=addr;
965   new_entry->next=*head;
966   *head=new_entry;
967 }
968
969 // Add virtual address mapping for 32-bit compiled block
970 void ll_add_32(struct ll_entry **head,int vaddr,u_int reg32,void *addr)
971 {
972   struct ll_entry *new_entry;
973   new_entry=malloc(sizeof(struct ll_entry));
974   assert(new_entry!=NULL);
975   new_entry->vaddr=vaddr;
976   new_entry->reg32=reg32;
977   new_entry->addr=addr;
978   new_entry->next=*head;
979   *head=new_entry;
980 }
981
982 // Check if an address is already compiled
983 // but don't return addresses which are about to expire from the cache
984 void *check_addr(u_int vaddr)
985 {
986   u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
987   if(ht_bin[0]==vaddr) {
988     if(((ht_bin[1]-MAX_OUTPUT_BLOCK_SIZE-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
989       if(isclean(ht_bin[1])) return (void *)ht_bin[1];
990   }
991   if(ht_bin[2]==vaddr) {
992     if(((ht_bin[3]-MAX_OUTPUT_BLOCK_SIZE-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
993       if(isclean(ht_bin[3])) return (void *)ht_bin[3];
994   }
995   u_int page=get_page(vaddr);
996   struct ll_entry *head;
997   head=jump_in[page];
998   while(head!=NULL) {
999     if(head->vaddr==vaddr&&head->reg32==0) {
1000       if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) {
1001         // Update existing entry with current address
1002         if(ht_bin[0]==vaddr) {
1003           ht_bin[1]=(int)head->addr;
1004           return head->addr;
1005         }
1006         if(ht_bin[2]==vaddr) {
1007           ht_bin[3]=(int)head->addr;
1008           return head->addr;
1009         }
1010         // Insert into hash table with low priority.
1011         // Don't evict existing entries, as they are probably
1012         // addresses that are being accessed frequently.
1013         if(ht_bin[0]==-1) {
1014           ht_bin[1]=(int)head->addr;
1015           ht_bin[0]=vaddr;
1016         }else if(ht_bin[2]==-1) {
1017           ht_bin[3]=(int)head->addr;
1018           ht_bin[2]=vaddr;
1019         }
1020         return head->addr;
1021       }
1022     }
1023     head=head->next;
1024   }
1025   return 0;
1026 }
1027
1028 void remove_hash(int vaddr)
1029 {
1030   //printf("remove hash: %x\n",vaddr);
1031   int *ht_bin=hash_table[(((vaddr)>>16)^vaddr)&0xFFFF];
1032   if(ht_bin[2]==vaddr) {
1033     ht_bin[2]=ht_bin[3]=-1;
1034   }
1035   if(ht_bin[0]==vaddr) {
1036     ht_bin[0]=ht_bin[2];
1037     ht_bin[1]=ht_bin[3];
1038     ht_bin[2]=ht_bin[3]=-1;
1039   }
1040 }
1041
1042 void ll_remove_matching_addrs(struct ll_entry **head,int addr,int shift)
1043 {
1044   struct ll_entry *next;
1045   while(*head) {
1046     if(((u_int)((*head)->addr)>>shift)==(addr>>shift) || 
1047        ((u_int)((*head)->addr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift))
1048     {
1049       inv_debug("EXP: Remove pointer to %x (%x)\n",(int)(*head)->addr,(*head)->vaddr);
1050       remove_hash((*head)->vaddr);
1051       next=(*head)->next;
1052       free(*head);
1053       *head=next;
1054     }
1055     else
1056     {
1057       head=&((*head)->next);
1058     }
1059   }
1060 }
1061
1062 // Remove all entries from linked list
1063 void ll_clear(struct ll_entry **head)
1064 {
1065   struct ll_entry *cur;
1066   struct ll_entry *next;
1067   if(cur=*head) {
1068     *head=0;
1069     while(cur) {
1070       next=cur->next;
1071       free(cur);
1072       cur=next;
1073     }
1074   }
1075 }
1076
1077 // Dereference the pointers and remove if it matches
1078 void ll_kill_pointers(struct ll_entry *head,int addr,int shift)
1079 {
1080   while(head) {
1081     int ptr=get_pointer(head->addr);
1082     inv_debug("EXP: Lookup pointer to %x at %x (%x)\n",(int)ptr,(int)head->addr,head->vaddr);
1083     if(((ptr>>shift)==(addr>>shift)) ||
1084        (((ptr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift)))
1085     {
1086       inv_debug("EXP: Kill pointer at %x (%x)\n",(int)head->addr,head->vaddr);
1087       kill_pointer(head->addr);
1088     }
1089     head=head->next;
1090   }
1091 }
1092
1093 // This is called when we write to a compiled block (see do_invstub)
1094 int invalidate_page(u_int page)
1095 {
1096   int modified=0;
1097   struct ll_entry *head;
1098   struct ll_entry *next;
1099   head=jump_in[page];
1100   jump_in[page]=0;
1101   while(head!=NULL) {
1102     inv_debug("INVALIDATE: %x\n",head->vaddr);
1103     remove_hash(head->vaddr);
1104     next=head->next;
1105     free(head);
1106     head=next;
1107   }
1108   head=jump_out[page];
1109   jump_out[page]=0;
1110   while(head!=NULL) {
1111     inv_debug("INVALIDATE: kill pointer to %x (%x)\n",head->vaddr,(int)head->addr);
1112     kill_pointer(head->addr);
1113     modified=1;
1114     next=head->next;
1115     free(head);
1116     head=next;
1117   }
1118   return modified;
1119 }
1120 void invalidate_block(u_int block)
1121 {
1122   int modified;
1123   u_int page=get_page(block<<12);
1124   u_int vpage=get_vpage(block<<12);
1125   inv_debug("INVALIDATE: %x (%d)\n",block<<12,page);
1126   //inv_debug("invalid_code[block]=%d\n",invalid_code[block]);
1127   u_int first,last;
1128   first=last=page;
1129   struct ll_entry *head;
1130   head=jump_dirty[vpage];
1131   //printf("page=%d vpage=%d\n",page,vpage);
1132   while(head!=NULL) {
1133     u_int start,end;
1134     if(vpage>2047||(head->vaddr>>12)==block) { // Ignore vaddr hash collision
1135       get_bounds((int)head->addr,&start,&end);
1136       //printf("start: %x end: %x\n",start,end);
1137       if(page<2048&&start>=0x80000000&&end<0x80800000) {
1138         if(((start-(u_int)rdram)>>12)<=page&&((end-1-(u_int)rdram)>>12)>=page) {
1139           if((((start-(u_int)rdram)>>12)&2047)<first) first=((start-(u_int)rdram)>>12)&2047;
1140           if((((end-1-(u_int)rdram)>>12)&2047)>last) last=((end-1-(u_int)rdram)>>12)&2047;
1141         }
1142       }
1143       if(page<2048&&(signed int)start>=(signed int)0xC0000000&&(signed int)end>=(signed int)0xC0000000) {
1144         if(((start+memory_map[start>>12]-(u_int)rdram)>>12)<=page&&((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)>=page) {
1145           if((((start+memory_map[start>>12]-(u_int)rdram)>>12)&2047)<first) first=((start+memory_map[start>>12]-(u_int)rdram)>>12)&2047;
1146           if((((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047)>last) last=((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047;
1147         }
1148       }
1149     }
1150     head=head->next;
1151   }
1152   //printf("first=%d last=%d\n",first,last);
1153   modified=invalidate_page(page);
1154   assert(first+5>page); // NB: this assumes MAXBLOCK<=4096 (4 pages)
1155   assert(last<page+5);
1156   // Invalidate the adjacent pages if a block crosses a 4K boundary
1157   while(first<page) {
1158     invalidate_page(first);
1159     first++;
1160   }
1161   for(first=page+1;first<last;first++) {
1162     invalidate_page(first);
1163   }
1164   
1165   // Don't trap writes
1166   invalid_code[block]=1;
1167 #ifndef DISABLE_TLB
1168   // If there is a valid TLB entry for this page, remove write protect
1169   if(tlb_LUT_w[block]) {
1170     assert(tlb_LUT_r[block]==tlb_LUT_w[block]);
1171     // CHECK: Is this right?
1172     memory_map[block]=((tlb_LUT_w[block]&0xFFFFF000)-(block<<12)+(unsigned int)rdram-0x80000000)>>2;
1173     u_int real_block=tlb_LUT_w[block]>>12;
1174     invalid_code[real_block]=1;
1175     if(real_block>=0x80000&&real_block<0x80800) memory_map[real_block]=((u_int)rdram-0x80000000)>>2;
1176   }
1177   else if(block>=0x80000&&block<0x80800) memory_map[block]=((u_int)rdram-0x80000000)>>2;
1178 #endif
1179   #ifdef __arm__
1180   if(modified)
1181     __clear_cache((void *)BASE_ADDR,(void *)BASE_ADDR+(1<<TARGET_SIZE_2));
1182   #endif
1183   #ifdef USE_MINI_HT
1184   memset(mini_ht,-1,sizeof(mini_ht));
1185   #endif
1186 }
1187 void invalidate_addr(u_int addr)
1188 {
1189   invalidate_block(addr>>12);
1190 }
1191 void invalidate_all_pages()
1192 {
1193   u_int page,n;
1194   for(page=0;page<4096;page++)
1195     invalidate_page(page);
1196   for(page=0;page<1048576;page++)
1197     if(!invalid_code[page]) {
1198       restore_candidate[(page&2047)>>3]|=1<<(page&7);
1199       restore_candidate[((page&2047)>>3)+256]|=1<<(page&7);
1200     }
1201   #ifdef __arm__
1202   __clear_cache((void *)BASE_ADDR,(void *)BASE_ADDR+(1<<TARGET_SIZE_2));
1203   #endif
1204   #ifdef USE_MINI_HT
1205   memset(mini_ht,-1,sizeof(mini_ht));
1206   #endif
1207   #ifndef DISABLE_TLB
1208   // TLB
1209   for(page=0;page<0x100000;page++) {
1210     if(tlb_LUT_r[page]) {
1211       memory_map[page]=((tlb_LUT_r[page]&0xFFFFF000)-(page<<12)+(unsigned int)rdram-0x80000000)>>2;
1212       if(!tlb_LUT_w[page]||!invalid_code[page])
1213         memory_map[page]|=0x40000000; // Write protect
1214     }
1215     else memory_map[page]=-1;
1216     if(page==0x80000) page=0xC0000;
1217   }
1218   tlb_hacks();
1219   #endif
1220 }
1221
1222 // Add an entry to jump_out after making a link
1223 void add_link(u_int vaddr,void *src)
1224 {
1225   u_int page=get_page(vaddr);
1226   inv_debug("add_link: %x -> %x (%d)\n",(int)src,vaddr,page);
1227   ll_add(jump_out+page,vaddr,src);
1228   //int ptr=get_pointer(src);
1229   //inv_debug("add_link: Pointer is to %x\n",(int)ptr);
1230 }
1231
1232 // If a code block was found to be unmodified (bit was set in
1233 // restore_candidate) and it remains unmodified (bit is clear
1234 // in invalid_code) then move the entries for that 4K page from
1235 // the dirty list to the clean list.
1236 void clean_blocks(u_int page)
1237 {
1238   struct ll_entry *head;
1239   inv_debug("INV: clean_blocks page=%d\n",page);
1240   head=jump_dirty[page];
1241   while(head!=NULL) {
1242     if(!invalid_code[head->vaddr>>12]) {
1243       // Don't restore blocks which are about to expire from the cache
1244       if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) {
1245         u_int start,end;
1246         if(verify_dirty((int)head->addr)) {
1247           //printf("Possibly Restore %x (%x)\n",head->vaddr, (int)head->addr);
1248           u_int i;
1249           u_int inv=0;
1250           get_bounds((int)head->addr,&start,&end);
1251           if(start-(u_int)rdram<0x800000) {
1252             for(i=(start-(u_int)rdram+0x80000000)>>12;i<=(end-1-(u_int)rdram+0x80000000)>>12;i++) {
1253               inv|=invalid_code[i];
1254             }
1255           }
1256           if((signed int)head->vaddr>=(signed int)0xC0000000) {
1257             u_int addr = (head->vaddr+(memory_map[head->vaddr>>12]<<2));
1258             //printf("addr=%x start=%x end=%x\n",addr,start,end);
1259             if(addr<start||addr>=end) inv=1;
1260           }
1261           else if((signed int)head->vaddr>=(signed int)0x80800000) {
1262             inv=1;
1263           }
1264           if(!inv) {
1265             void * clean_addr=(void *)get_clean_addr((int)head->addr);
1266             if((((u_int)clean_addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) {
1267               u_int ppage=page;
1268 #ifndef DISABLE_TLB
1269               if(page<2048&&tlb_LUT_r[head->vaddr>>12]) ppage=(tlb_LUT_r[head->vaddr>>12]^0x80000000)>>12;
1270 #endif
1271               inv_debug("INV: Restored %x (%x/%x)\n",head->vaddr, (int)head->addr, (int)clean_addr);
1272               //printf("page=%x, addr=%x\n",page,head->vaddr);
1273               //assert(head->vaddr>>12==(page|0x80000));
1274               ll_add_32(jump_in+ppage,head->vaddr,head->reg32,clean_addr);
1275               int *ht_bin=hash_table[((head->vaddr>>16)^head->vaddr)&0xFFFF];
1276               if(!head->reg32) {
1277                 if(ht_bin[0]==head->vaddr) {
1278                   ht_bin[1]=(int)clean_addr; // Replace existing entry
1279                 }
1280                 if(ht_bin[2]==head->vaddr) {
1281                   ht_bin[3]=(int)clean_addr; // Replace existing entry
1282                 }
1283               }
1284             }
1285           }
1286         }
1287       }
1288     }
1289     head=head->next;
1290   }
1291 }
1292
1293
1294 void mov_alloc(struct regstat *current,int i)
1295 {
1296   // Note: Don't need to actually alloc the source registers
1297   if((~current->is32>>rs1[i])&1) {
1298     //alloc_reg64(current,i,rs1[i]);
1299     alloc_reg64(current,i,rt1[i]);
1300     current->is32&=~(1LL<<rt1[i]);
1301   } else {
1302     //alloc_reg(current,i,rs1[i]);
1303     alloc_reg(current,i,rt1[i]);
1304     current->is32|=(1LL<<rt1[i]);
1305   }
1306   clear_const(current,rs1[i]);
1307   clear_const(current,rt1[i]);
1308   dirty_reg(current,rt1[i]);
1309 }
1310
1311 void shiftimm_alloc(struct regstat *current,int i)
1312 {
1313   clear_const(current,rs1[i]);
1314   clear_const(current,rt1[i]);
1315   if(opcode2[i]<=0x3) // SLL/SRL/SRA
1316   {
1317     if(rt1[i]) {
1318       if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1319       else lt1[i]=rs1[i];
1320       alloc_reg(current,i,rt1[i]);
1321       current->is32|=1LL<<rt1[i];
1322       dirty_reg(current,rt1[i]);
1323     }
1324   }
1325   if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA
1326   {
1327     if(rt1[i]) {
1328       if(rs1[i]) alloc_reg64(current,i,rs1[i]);
1329       alloc_reg64(current,i,rt1[i]);
1330       current->is32&=~(1LL<<rt1[i]);
1331       dirty_reg(current,rt1[i]);
1332     }
1333   }
1334   if(opcode2[i]==0x3c) // DSLL32
1335   {
1336     if(rt1[i]) {
1337       if(rs1[i]) alloc_reg(current,i,rs1[i]);
1338       alloc_reg64(current,i,rt1[i]);
1339       current->is32&=~(1LL<<rt1[i]);
1340       dirty_reg(current,rt1[i]);
1341     }
1342   }
1343   if(opcode2[i]==0x3e) // DSRL32
1344   {
1345     if(rt1[i]) {
1346       alloc_reg64(current,i,rs1[i]);
1347       if(imm[i]==32) {
1348         alloc_reg64(current,i,rt1[i]);
1349         current->is32&=~(1LL<<rt1[i]);
1350       } else {
1351         alloc_reg(current,i,rt1[i]);
1352         current->is32|=1LL<<rt1[i];
1353       }
1354       dirty_reg(current,rt1[i]);
1355     }
1356   }
1357   if(opcode2[i]==0x3f) // DSRA32
1358   {
1359     if(rt1[i]) {
1360       alloc_reg64(current,i,rs1[i]);
1361       alloc_reg(current,i,rt1[i]);
1362       current->is32|=1LL<<rt1[i];
1363       dirty_reg(current,rt1[i]);
1364     }
1365   }
1366 }
1367
1368 void shift_alloc(struct regstat *current,int i)
1369 {
1370   if(rt1[i]) {
1371     if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
1372     {
1373       if(rs1[i]) alloc_reg(current,i,rs1[i]);
1374       if(rs2[i]) alloc_reg(current,i,rs2[i]);
1375       alloc_reg(current,i,rt1[i]);
1376       if(rt1[i]==rs2[i]) alloc_reg_temp(current,i,-1);
1377       current->is32|=1LL<<rt1[i];
1378     } else { // DSLLV/DSRLV/DSRAV
1379       if(rs1[i]) alloc_reg64(current,i,rs1[i]);
1380       if(rs2[i]) alloc_reg(current,i,rs2[i]);
1381       alloc_reg64(current,i,rt1[i]);
1382       current->is32&=~(1LL<<rt1[i]);
1383       if(opcode2[i]==0x16||opcode2[i]==0x17) // DSRLV and DSRAV need a temporary register
1384         alloc_reg_temp(current,i,-1);
1385     }
1386     clear_const(current,rs1[i]);
1387     clear_const(current,rs2[i]);
1388     clear_const(current,rt1[i]);
1389     dirty_reg(current,rt1[i]);
1390   }
1391 }
1392
1393 void alu_alloc(struct regstat *current,int i)
1394 {
1395   if(opcode2[i]>=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU
1396     if(rt1[i]) {
1397       if(rs1[i]&&rs2[i]) {
1398         alloc_reg(current,i,rs1[i]);
1399         alloc_reg(current,i,rs2[i]);
1400       }
1401       else {
1402         if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1403         if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg(current,i,rs2[i]);
1404       }
1405       alloc_reg(current,i,rt1[i]);
1406     }
1407     current->is32|=1LL<<rt1[i];
1408   }
1409   if(opcode2[i]==0x2a||opcode2[i]==0x2b) { // SLT/SLTU
1410     if(rt1[i]) {
1411       if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1))
1412       {
1413         alloc_reg64(current,i,rs1[i]);
1414         alloc_reg64(current,i,rs2[i]);
1415         alloc_reg(current,i,rt1[i]);
1416       } else {
1417         alloc_reg(current,i,rs1[i]);
1418         alloc_reg(current,i,rs2[i]);
1419         alloc_reg(current,i,rt1[i]);
1420       }
1421     }
1422     current->is32|=1LL<<rt1[i];
1423   }
1424   if(opcode2[i]>=0x24&&opcode2[i]<=0x27) { // AND/OR/XOR/NOR
1425     if(rt1[i]) {
1426       if(rs1[i]&&rs2[i]) {
1427         alloc_reg(current,i,rs1[i]);
1428         alloc_reg(current,i,rs2[i]);
1429       }
1430       else
1431       {
1432         if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1433         if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg(current,i,rs2[i]);
1434       }
1435       alloc_reg(current,i,rt1[i]);
1436       if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1))
1437       {
1438         if(!((current->uu>>rt1[i])&1)) {
1439           alloc_reg64(current,i,rt1[i]);
1440         }
1441         if(get_reg(current->regmap,rt1[i]|64)>=0) {
1442           if(rs1[i]&&rs2[i]) {
1443             alloc_reg64(current,i,rs1[i]);
1444             alloc_reg64(current,i,rs2[i]);
1445           }
1446           else
1447           {
1448             // Is is really worth it to keep 64-bit values in registers?
1449             #ifdef NATIVE_64BIT
1450             if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg64(current,i,rs1[i]);
1451             if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg64(current,i,rs2[i]);
1452             #endif
1453           }
1454         }
1455         current->is32&=~(1LL<<rt1[i]);
1456       } else {
1457         current->is32|=1LL<<rt1[i];
1458       }
1459     }
1460   }
1461   if(opcode2[i]>=0x2c&&opcode2[i]<=0x2f) { // DADD/DADDU/DSUB/DSUBU
1462     if(rt1[i]) {
1463       if(rs1[i]&&rs2[i]) {
1464         if(!((current->uu>>rt1[i])&1)||get_reg(current->regmap,rt1[i]|64)>=0) {
1465           alloc_reg64(current,i,rs1[i]);
1466           alloc_reg64(current,i,rs2[i]);
1467           alloc_reg64(current,i,rt1[i]);
1468         } else {
1469           alloc_reg(current,i,rs1[i]);
1470           alloc_reg(current,i,rs2[i]);
1471           alloc_reg(current,i,rt1[i]);
1472         }
1473       }
1474       else {
1475         alloc_reg(current,i,rt1[i]);
1476         if(!((current->uu>>rt1[i])&1)||get_reg(current->regmap,rt1[i]|64)>=0) {
1477           // DADD used as move, or zeroing
1478           // If we have a 64-bit source, then make the target 64 bits too
1479           if(rs1[i]&&!((current->is32>>rs1[i])&1)) {
1480             if(get_reg(current->regmap,rs1[i])>=0) alloc_reg64(current,i,rs1[i]);
1481             alloc_reg64(current,i,rt1[i]);
1482           } else if(rs2[i]&&!((current->is32>>rs2[i])&1)) {
1483             if(get_reg(current->regmap,rs2[i])>=0) alloc_reg64(current,i,rs2[i]);
1484             alloc_reg64(current,i,rt1[i]);
1485           }
1486           if(opcode2[i]>=0x2e&&rs2[i]) {
1487             // DSUB used as negation - 64-bit result
1488             // If we have a 32-bit register, extend it to 64 bits
1489             if(get_reg(current->regmap,rs2[i])>=0) alloc_reg64(current,i,rs2[i]);
1490             alloc_reg64(current,i,rt1[i]);
1491           }
1492         }
1493       }
1494       if(rs1[i]&&rs2[i]) {
1495         current->is32&=~(1LL<<rt1[i]);
1496       } else if(rs1[i]) {
1497         current->is32&=~(1LL<<rt1[i]);
1498         if((current->is32>>rs1[i])&1)
1499           current->is32|=1LL<<rt1[i];
1500       } else if(rs2[i]) {
1501         current->is32&=~(1LL<<rt1[i]);
1502         if((current->is32>>rs2[i])&1)
1503           current->is32|=1LL<<rt1[i];
1504       } else {
1505         current->is32|=1LL<<rt1[i];
1506       }
1507     }
1508   }
1509   clear_const(current,rs1[i]);
1510   clear_const(current,rs2[i]);
1511   clear_const(current,rt1[i]);
1512   dirty_reg(current,rt1[i]);
1513 }
1514
1515 void imm16_alloc(struct regstat *current,int i)
1516 {
1517   if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1518   else lt1[i]=rs1[i];
1519   if(rt1[i]) alloc_reg(current,i,rt1[i]);
1520   if(opcode[i]==0x18||opcode[i]==0x19) { // DADDI/DADDIU
1521     current->is32&=~(1LL<<rt1[i]);
1522     if(!((current->uu>>rt1[i])&1)||get_reg(current->regmap,rt1[i]|64)>=0) {
1523       // TODO: Could preserve the 32-bit flag if the immediate is zero
1524       alloc_reg64(current,i,rt1[i]);
1525       alloc_reg64(current,i,rs1[i]);
1526     }
1527     clear_const(current,rs1[i]);
1528     clear_const(current,rt1[i]);
1529   }
1530   else if(opcode[i]==0x0a||opcode[i]==0x0b) { // SLTI/SLTIU
1531     if((~current->is32>>rs1[i])&1) alloc_reg64(current,i,rs1[i]);
1532     current->is32|=1LL<<rt1[i];
1533     clear_const(current,rs1[i]);
1534     clear_const(current,rt1[i]);
1535   }
1536   else if(opcode[i]>=0x0c&&opcode[i]<=0x0e) { // ANDI/ORI/XORI
1537     if(((~current->is32>>rs1[i])&1)&&opcode[i]>0x0c) {
1538       if(rs1[i]!=rt1[i]) {
1539         if(needed_again(rs1[i],i)) alloc_reg64(current,i,rs1[i]);
1540         alloc_reg64(current,i,rt1[i]);
1541         current->is32&=~(1LL<<rt1[i]);
1542       }
1543     }
1544     else current->is32|=1LL<<rt1[i]; // ANDI clears upper bits
1545     if(is_const(current,rs1[i])) {
1546       int v=get_const(current,rs1[i]);
1547       if(opcode[i]==0x0c) set_const(current,rt1[i],v&imm[i]);
1548       if(opcode[i]==0x0d) set_const(current,rt1[i],v|imm[i]);
1549       if(opcode[i]==0x0e) set_const(current,rt1[i],v^imm[i]);
1550     }
1551     else clear_const(current,rt1[i]);
1552   }
1553   else if(opcode[i]==0x08||opcode[i]==0x09) { // ADDI/ADDIU
1554     if(is_const(current,rs1[i])) {
1555       int v=get_const(current,rs1[i]);
1556       set_const(current,rt1[i],v+imm[i]);
1557     }
1558     else clear_const(current,rt1[i]);
1559     current->is32|=1LL<<rt1[i];
1560   }
1561   else {
1562     set_const(current,rt1[i],((long long)((short)imm[i]))<<16); // LUI
1563     current->is32|=1LL<<rt1[i];
1564   }
1565   dirty_reg(current,rt1[i]);
1566 }
1567
1568 void load_alloc(struct regstat *current,int i)
1569 {
1570   clear_const(current,rt1[i]);
1571   //if(rs1[i]!=rt1[i]&&needed_again(rs1[i],i)) clear_const(current,rs1[i]); // Does this help or hurt?
1572   if(!rs1[i]) current->u&=~1LL; // Allow allocating r0 if it's the source register
1573   if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1574   if(rt1[i]) {
1575     alloc_reg(current,i,rt1[i]);
1576     if(opcode[i]==0x27||opcode[i]==0x37) // LWU/LD
1577     {
1578       current->is32&=~(1LL<<rt1[i]);
1579       alloc_reg64(current,i,rt1[i]);
1580     }
1581     else if(opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR
1582     {
1583       current->is32&=~(1LL<<rt1[i]);
1584       alloc_reg64(current,i,rt1[i]);
1585       alloc_all(current,i);
1586       alloc_reg64(current,i,FTEMP);
1587     }
1588     else current->is32|=1LL<<rt1[i];
1589     dirty_reg(current,rt1[i]);
1590     // If using TLB, need a register for pointer to the mapping table
1591     if(using_tlb) alloc_reg(current,i,TLREG);
1592     // LWL/LWR need a temporary register for the old value
1593     if(opcode[i]==0x22||opcode[i]==0x26)
1594     {
1595       alloc_reg(current,i,FTEMP);
1596       alloc_reg_temp(current,i,-1);
1597     }
1598   }
1599   else
1600   {
1601     // Load to r0 (dummy load)
1602     // but we still need a register to calculate the address
1603     alloc_reg_temp(current,i,-1);
1604   }
1605 }
1606
1607 void store_alloc(struct regstat *current,int i)
1608 {
1609   clear_const(current,rs2[i]);
1610   if(!(rs2[i])) current->u&=~1LL; // Allow allocating r0 if necessary
1611   if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1612   alloc_reg(current,i,rs2[i]);
1613   if(opcode[i]==0x2c||opcode[i]==0x2d||opcode[i]==0x3f) { // 64-bit SDL/SDR/SD
1614     alloc_reg64(current,i,rs2[i]);
1615     if(rs2[i]) alloc_reg(current,i,FTEMP);
1616   }
1617   // If using TLB, need a register for pointer to the mapping table
1618   if(using_tlb) alloc_reg(current,i,TLREG);
1619   #if defined(HOST_IMM8)
1620   // On CPUs without 32-bit immediates we need a pointer to invalid_code
1621   else alloc_reg(current,i,INVCP);
1622   #endif
1623   if(opcode[i]==0x2c||opcode[i]==0x2d) { // 64-bit SDL/SDR
1624     alloc_reg(current,i,FTEMP);
1625   }
1626   // We need a temporary register for address generation
1627   alloc_reg_temp(current,i,-1);
1628 }
1629
1630 void c1ls_alloc(struct regstat *current,int i)
1631 {
1632   //clear_const(current,rs1[i]); // FIXME
1633   clear_const(current,rt1[i]);
1634   if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1635   alloc_reg(current,i,CSREG); // Status
1636   alloc_reg(current,i,FTEMP);
1637   if(opcode[i]==0x35||opcode[i]==0x3d) { // 64-bit LDC1/SDC1
1638     alloc_reg64(current,i,FTEMP);
1639   }
1640   // If using TLB, need a register for pointer to the mapping table
1641   if(using_tlb) alloc_reg(current,i,TLREG);
1642   #if defined(HOST_IMM8)
1643   // On CPUs without 32-bit immediates we need a pointer to invalid_code
1644   else if((opcode[i]&0x3b)==0x39) // SWC1/SDC1
1645     alloc_reg(current,i,INVCP);
1646   #endif
1647   // We need a temporary register for address generation
1648   alloc_reg_temp(current,i,-1);
1649 }
1650
1651 #ifndef multdiv_alloc
1652 void multdiv_alloc(struct regstat *current,int i)
1653 {
1654   //  case 0x18: MULT
1655   //  case 0x19: MULTU
1656   //  case 0x1A: DIV
1657   //  case 0x1B: DIVU
1658   //  case 0x1C: DMULT
1659   //  case 0x1D: DMULTU
1660   //  case 0x1E: DDIV
1661   //  case 0x1F: DDIVU
1662   clear_const(current,rs1[i]);
1663   clear_const(current,rs2[i]);
1664   if(rs1[i]&&rs2[i])
1665   {
1666     if((opcode2[i]&4)==0) // 32-bit
1667     {
1668       current->u&=~(1LL<<HIREG);
1669       current->u&=~(1LL<<LOREG);
1670       alloc_reg(current,i,HIREG);
1671       alloc_reg(current,i,LOREG);
1672       alloc_reg(current,i,rs1[i]);
1673       alloc_reg(current,i,rs2[i]);
1674       current->is32|=1LL<<HIREG;
1675       current->is32|=1LL<<LOREG;
1676       dirty_reg(current,HIREG);
1677       dirty_reg(current,LOREG);
1678     }
1679     else // 64-bit
1680     {
1681       current->u&=~(1LL<<HIREG);
1682       current->u&=~(1LL<<LOREG);
1683       current->uu&=~(1LL<<HIREG);
1684       current->uu&=~(1LL<<LOREG);
1685       alloc_reg64(current,i,HIREG);
1686       //if(HOST_REGS>10) alloc_reg64(current,i,LOREG);
1687       alloc_reg64(current,i,rs1[i]);
1688       alloc_reg64(current,i,rs2[i]);
1689       alloc_all(current,i);
1690       current->is32&=~(1LL<<HIREG);
1691       current->is32&=~(1LL<<LOREG);
1692       dirty_reg(current,HIREG);
1693       dirty_reg(current,LOREG);
1694     }
1695   }
1696   else
1697   {
1698     // Multiply by zero is zero.
1699     // MIPS does not have a divide by zero exception.
1700     // The result is undefined, we return zero.
1701     alloc_reg(current,i,HIREG);
1702     alloc_reg(current,i,LOREG);
1703     current->is32|=1LL<<HIREG;
1704     current->is32|=1LL<<LOREG;
1705     dirty_reg(current,HIREG);
1706     dirty_reg(current,LOREG);
1707   }
1708 }
1709 #endif
1710
1711 void cop0_alloc(struct regstat *current,int i)
1712 {
1713   if(opcode2[i]==0) // MFC0
1714   {
1715     if(rt1[i]) {
1716       clear_const(current,rt1[i]);
1717       alloc_all(current,i);
1718       alloc_reg(current,i,rt1[i]);
1719       current->is32|=1LL<<rt1[i];
1720       dirty_reg(current,rt1[i]);
1721     }
1722   }
1723   else if(opcode2[i]==4) // MTC0
1724   {
1725     if(rs1[i]){
1726       clear_const(current,rs1[i]);
1727       alloc_reg(current,i,rs1[i]);
1728       alloc_all(current,i);
1729     }
1730     else {
1731       alloc_all(current,i); // FIXME: Keep r0
1732       current->u&=~1LL;
1733       alloc_reg(current,i,0);
1734     }
1735   }
1736   else
1737   {
1738     // TLBR/TLBWI/TLBWR/TLBP/ERET
1739     assert(opcode2[i]==0x10);
1740     alloc_all(current,i);
1741   }
1742 }
1743
1744 void cop1_alloc(struct regstat *current,int i)
1745 {
1746   alloc_reg(current,i,CSREG); // Load status
1747   if(opcode2[i]<3) // MFC1/DMFC1/CFC1
1748   {
1749     assert(rt1[i]);
1750     clear_const(current,rt1[i]);
1751     if(opcode2[i]==1) {
1752       alloc_reg64(current,i,rt1[i]); // DMFC1
1753       current->is32&=~(1LL<<rt1[i]);
1754     }else{
1755       alloc_reg(current,i,rt1[i]); // MFC1/CFC1
1756       current->is32|=1LL<<rt1[i];
1757     }
1758     dirty_reg(current,rt1[i]);
1759     alloc_reg_temp(current,i,-1);
1760   }
1761   else if(opcode2[i]>3) // MTC1/DMTC1/CTC1
1762   {
1763     if(rs1[i]){
1764       clear_const(current,rs1[i]);
1765       if(opcode2[i]==5)
1766         alloc_reg64(current,i,rs1[i]); // DMTC1
1767       else
1768         alloc_reg(current,i,rs1[i]); // MTC1/CTC1
1769       alloc_reg_temp(current,i,-1);
1770     }
1771     else {
1772       current->u&=~1LL;
1773       alloc_reg(current,i,0);
1774       alloc_reg_temp(current,i,-1);
1775     }
1776   }
1777 }
1778 void fconv_alloc(struct regstat *current,int i)
1779 {
1780   alloc_reg(current,i,CSREG); // Load status
1781   alloc_reg_temp(current,i,-1);
1782 }
1783 void float_alloc(struct regstat *current,int i)
1784 {
1785   alloc_reg(current,i,CSREG); // Load status
1786   alloc_reg_temp(current,i,-1);
1787 }
1788 void fcomp_alloc(struct regstat *current,int i)
1789 {
1790   alloc_reg(current,i,CSREG); // Load status
1791   alloc_reg(current,i,FSREG); // Load flags
1792   dirty_reg(current,FSREG); // Flag will be modified
1793   alloc_reg_temp(current,i,-1);
1794 }
1795
1796 void syscall_alloc(struct regstat *current,int i)
1797 {
1798   alloc_cc(current,i);
1799   dirty_reg(current,CCREG);
1800   alloc_all(current,i);
1801   current->isconst=0;
1802 }
1803
1804 void delayslot_alloc(struct regstat *current,int i)
1805 {
1806   switch(itype[i]) {
1807     case UJUMP:
1808     case CJUMP:
1809     case SJUMP:
1810     case RJUMP:
1811     case FJUMP:
1812     case SYSCALL:
1813     case SPAN:
1814       assem_debug("jump in the delay slot.  this shouldn't happen.\n");//exit(1);
1815       printf("Disabled speculative precompilation\n");
1816       stop_after_jal=1;
1817       break;
1818     case IMM16:
1819       imm16_alloc(current,i);
1820       break;
1821     case LOAD:
1822     case LOADLR:
1823       load_alloc(current,i);
1824       break;
1825     case STORE:
1826     case STORELR:
1827       store_alloc(current,i);
1828       break;
1829     case ALU:
1830       alu_alloc(current,i);
1831       break;
1832     case SHIFT:
1833       shift_alloc(current,i);
1834       break;
1835     case MULTDIV:
1836       multdiv_alloc(current,i);
1837       break;
1838     case SHIFTIMM:
1839       shiftimm_alloc(current,i);
1840       break;
1841     case MOV:
1842       mov_alloc(current,i);
1843       break;
1844     case COP0:
1845       cop0_alloc(current,i);
1846       break;
1847     case COP1:
1848       cop1_alloc(current,i);
1849       break;
1850     case C1LS:
1851       c1ls_alloc(current,i);
1852       break;
1853     case FCONV:
1854       fconv_alloc(current,i);
1855       break;
1856     case FLOAT:
1857       float_alloc(current,i);
1858       break;
1859     case FCOMP:
1860       fcomp_alloc(current,i);
1861       break;
1862   }
1863 }
1864
1865 // Special case where a branch and delay slot span two pages in virtual memory
1866 static void pagespan_alloc(struct regstat *current,int i)
1867 {
1868   current->isconst=0;
1869   current->wasconst=0;
1870   regs[i].wasconst=0;
1871   alloc_all(current,i);
1872   alloc_cc(current,i);
1873   dirty_reg(current,CCREG);
1874   if(opcode[i]==3) // JAL
1875   {
1876     alloc_reg(current,i,31);
1877     dirty_reg(current,31);
1878   }
1879   if(opcode[i]==0&&(opcode2[i]&0x3E)==8) // JR/JALR
1880   {
1881     alloc_reg(current,i,rs1[i]);
1882     if (rt1[i]==31) {
1883       alloc_reg(current,i,31);
1884       dirty_reg(current,31);
1885     }
1886   }
1887   if((opcode[i]&0x2E)==4) // BEQ/BNE/BEQL/BNEL
1888   {
1889     if(rs1[i]) alloc_reg(current,i,rs1[i]);
1890     if(rs2[i]) alloc_reg(current,i,rs2[i]);
1891     if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1))
1892     {
1893       if(rs1[i]) alloc_reg64(current,i,rs1[i]);
1894       if(rs2[i]) alloc_reg64(current,i,rs2[i]);
1895     }
1896   }
1897   else
1898   if((opcode[i]&0x2E)==6) // BLEZ/BGTZ/BLEZL/BGTZL
1899   {
1900     if(rs1[i]) alloc_reg(current,i,rs1[i]);
1901     if(!((current->is32>>rs1[i])&1))
1902     {
1903       if(rs1[i]) alloc_reg64(current,i,rs1[i]);
1904     }
1905   }
1906   else
1907   if(opcode[i]==0x11) // BC1
1908   {
1909     alloc_reg(current,i,FSREG);
1910     alloc_reg(current,i,CSREG);
1911   }
1912   //else ...
1913 }
1914
1915 add_stub(int type,int addr,int retaddr,int a,int b,int c,int d,int e)
1916 {
1917   stubs[stubcount][0]=type;
1918   stubs[stubcount][1]=addr;
1919   stubs[stubcount][2]=retaddr;
1920   stubs[stubcount][3]=a;
1921   stubs[stubcount][4]=b;
1922   stubs[stubcount][5]=c;
1923   stubs[stubcount][6]=d;
1924   stubs[stubcount][7]=e;
1925   stubcount++;
1926 }
1927
1928 // Write out a single register
1929 void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32)
1930 {
1931   int hr;
1932   for(hr=0;hr<HOST_REGS;hr++) {
1933     if(hr!=EXCLUDE_REG) {
1934       if((regmap[hr]&63)==r) {
1935         if((dirty>>hr)&1) {
1936           if(regmap[hr]<64) {
1937             emit_storereg(r,hr);
1938             if((is32>>regmap[hr])&1) {
1939               emit_sarimm(hr,31,hr);
1940               emit_storereg(r|64,hr);
1941             }
1942           }else{
1943             emit_storereg(r|64,hr);
1944           }
1945         }
1946       }
1947     }
1948   }
1949 }
1950
1951 int mchecksum()
1952 {
1953   //if(!tracedebug) return 0;
1954   int i;
1955   int sum=0;
1956   for(i=0;i<2097152;i++) {
1957     unsigned int temp=sum;
1958     sum<<=1;
1959     sum|=(~temp)>>31;
1960     sum^=((u_int *)rdram)[i];
1961   }
1962   return sum;
1963 }
1964 int rchecksum()
1965 {
1966   int i;
1967   int sum=0;
1968   for(i=0;i<64;i++)
1969     sum^=((u_int *)reg)[i];
1970   return sum;
1971 }
1972 int fchecksum()
1973 {
1974   int i;
1975   int sum=0;
1976   for(i=0;i<64;i++)
1977     sum^=((u_int *)reg_cop1_fgr_64)[i];
1978   return sum;
1979 }
1980 void rlist()
1981 {
1982   int i;
1983   printf("TRACE: ");
1984   for(i=0;i<32;i++)
1985     printf("r%d:%8x%8x ",i,((int *)(reg+i))[1],((int *)(reg+i))[0]);
1986   printf("\n");
1987   printf("TRACE: ");
1988   for(i=0;i<32;i++)
1989     printf("f%d:%8x%8x ",i,((int*)reg_cop1_simple[i])[1],*((int*)reg_cop1_simple[i]));
1990   printf("\n");
1991 }
1992
1993 void enabletrace()
1994 {
1995   tracedebug=1;
1996 }
1997
1998 void memdebug(int i)
1999 {
2000   //printf("TRACE: count=%d next=%d (checksum %x) lo=%8x%8x\n",Count,next_interupt,mchecksum(),(int)(reg[LOREG]>>32),(int)reg[LOREG]);
2001   //printf("TRACE: count=%d next=%d (rchecksum %x)\n",Count,next_interupt,rchecksum());
2002   //rlist();
2003   //if(tracedebug) {
2004   //if(Count>=-2084597794) {
2005   if((signed int)Count>=-2084597794&&(signed int)Count<0) {
2006   //if(0) {
2007     printf("TRACE: count=%d next=%d (checksum %x)\n",Count,next_interupt,mchecksum());
2008     //printf("TRACE: count=%d next=%d (checksum %x) Status=%x\n",Count,next_interupt,mchecksum(),Status);
2009     //printf("TRACE: count=%d next=%d (checksum %x) hi=%8x%8x\n",Count,next_interupt,mchecksum(),(int)(reg[HIREG]>>32),(int)reg[HIREG]);
2010     rlist();
2011     #ifdef __i386__
2012     printf("TRACE: %x\n",(&i)[-1]);
2013     #endif
2014     #ifdef __arm__
2015     int j;
2016     printf("TRACE: %x \n",(&j)[10]);
2017     printf("TRACE: %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x\n",(&j)[1],(&j)[2],(&j)[3],(&j)[4],(&j)[5],(&j)[6],(&j)[7],(&j)[8],(&j)[9],(&j)[10],(&j)[11],(&j)[12],(&j)[13],(&j)[14],(&j)[15],(&j)[16],(&j)[17],(&j)[18],(&j)[19],(&j)[20]);
2018     #endif
2019     //fflush(stdout);
2020   }
2021   //printf("TRACE: %x\n",(&i)[-1]);
2022 }
2023
2024 void tlb_debug(u_int cause, u_int addr, u_int iaddr)
2025 {
2026   printf("TLB Exception: instruction=%x addr=%x cause=%x\n",iaddr, addr, cause);
2027 }
2028
2029 void alu_assemble(int i,struct regstat *i_regs)
2030 {
2031   if(opcode2[i]>=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU
2032     if(rt1[i]) {
2033       signed char s1,s2,t;
2034       t=get_reg(i_regs->regmap,rt1[i]);
2035       if(t>=0) {
2036         s1=get_reg(i_regs->regmap,rs1[i]);
2037         s2=get_reg(i_regs->regmap,rs2[i]);
2038         if(rs1[i]&&rs2[i]) {
2039           assert(s1>=0);
2040           assert(s2>=0);
2041           if(opcode2[i]&2) emit_sub(s1,s2,t);
2042           else emit_add(s1,s2,t);
2043         }
2044         else if(rs1[i]) {
2045           if(s1>=0) emit_mov(s1,t);
2046           else emit_loadreg(rs1[i],t);
2047         }
2048         else if(rs2[i]) {
2049           if(s2>=0) {
2050             if(opcode2[i]&2) emit_neg(s2,t);
2051             else emit_mov(s2,t);
2052           }
2053           else {
2054             emit_loadreg(rs2[i],t);
2055             if(opcode2[i]&2) emit_neg(t,t);
2056           }
2057         }
2058         else emit_zeroreg(t);
2059       }
2060     }
2061   }
2062   if(opcode2[i]>=0x2c&&opcode2[i]<=0x2f) { // DADD/DADDU/DSUB/DSUBU
2063     if(rt1[i]) {
2064       signed char s1l,s2l,s1h,s2h,tl,th;
2065       tl=get_reg(i_regs->regmap,rt1[i]);
2066       th=get_reg(i_regs->regmap,rt1[i]|64);
2067       if(tl>=0) {
2068         s1l=get_reg(i_regs->regmap,rs1[i]);
2069         s2l=get_reg(i_regs->regmap,rs2[i]);
2070         s1h=get_reg(i_regs->regmap,rs1[i]|64);
2071         s2h=get_reg(i_regs->regmap,rs2[i]|64);
2072         if(rs1[i]&&rs2[i]) {
2073           assert(s1l>=0);
2074           assert(s2l>=0);
2075           if(opcode2[i]&2) emit_subs(s1l,s2l,tl);
2076           else emit_adds(s1l,s2l,tl);
2077           if(th>=0) {
2078             #ifdef INVERTED_CARRY
2079             if(opcode2[i]&2) {if(s1h!=th) emit_mov(s1h,th);emit_sbb(th,s2h);}
2080             #else
2081             if(opcode2[i]&2) emit_sbc(s1h,s2h,th);
2082             #endif
2083             else emit_add(s1h,s2h,th);
2084           }
2085         }
2086         else if(rs1[i]) {
2087           if(s1l>=0) emit_mov(s1l,tl);
2088           else emit_loadreg(rs1[i],tl);
2089           if(th>=0) {
2090             if(s1h>=0) emit_mov(s1h,th);
2091             else emit_loadreg(rs1[i]|64,th);
2092           }
2093         }
2094         else if(rs2[i]) {
2095           if(s2l>=0) {
2096             if(opcode2[i]&2) emit_negs(s2l,tl);
2097             else emit_mov(s2l,tl);
2098           }
2099           else {
2100             emit_loadreg(rs2[i],tl);
2101             if(opcode2[i]&2) emit_negs(tl,tl);
2102           }
2103           if(th>=0) {
2104             #ifdef INVERTED_CARRY
2105             if(s2h>=0) emit_mov(s2h,th);
2106             else emit_loadreg(rs2[i]|64,th);
2107             if(opcode2[i]&2) {
2108               emit_adcimm(-1,th); // x86 has inverted carry flag
2109               emit_not(th,th);
2110             }
2111             #else
2112             if(opcode2[i]&2) {
2113               if(s2h>=0) emit_rscimm(s2h,0,th);
2114               else {
2115                 emit_loadreg(rs2[i]|64,th);
2116                 emit_rscimm(th,0,th);
2117               }
2118             }else{
2119               if(s2h>=0) emit_mov(s2h,th);
2120               else emit_loadreg(rs2[i]|64,th);
2121             }
2122             #endif
2123           }
2124         }
2125         else {
2126           emit_zeroreg(tl);
2127           if(th>=0) emit_zeroreg(th);
2128         }
2129       }
2130     }
2131   }
2132   if(opcode2[i]==0x2a||opcode2[i]==0x2b) { // SLT/SLTU
2133     if(rt1[i]) {
2134       signed char s1l,s1h,s2l,s2h,t;
2135       if(!((i_regs->was32>>rs1[i])&(i_regs->was32>>rs2[i])&1))
2136       {
2137         t=get_reg(i_regs->regmap,rt1[i]);
2138         //assert(t>=0);
2139         if(t>=0) {
2140           s1l=get_reg(i_regs->regmap,rs1[i]);
2141           s1h=get_reg(i_regs->regmap,rs1[i]|64);
2142           s2l=get_reg(i_regs->regmap,rs2[i]);
2143           s2h=get_reg(i_regs->regmap,rs2[i]|64);
2144           if(rs2[i]==0) // rx<r0
2145           {
2146             assert(s1h>=0);
2147             if(opcode2[i]==0x2a) // SLT
2148               emit_shrimm(s1h,31,t);
2149             else // SLTU (unsigned can not be less than zero)
2150               emit_zeroreg(t);
2151           }
2152           else if(rs1[i]==0) // r0<rx
2153           {
2154             assert(s2h>=0);
2155             if(opcode2[i]==0x2a) // SLT
2156               emit_set_gz64_32(s2h,s2l,t);
2157             else // SLTU (set if not zero)
2158               emit_set_nz64_32(s2h,s2l,t);
2159           }
2160           else {
2161             assert(s1l>=0);assert(s1h>=0);
2162             assert(s2l>=0);assert(s2h>=0);
2163             if(opcode2[i]==0x2a) // SLT
2164               emit_set_if_less64_32(s1h,s1l,s2h,s2l,t);
2165             else // SLTU
2166               emit_set_if_carry64_32(s1h,s1l,s2h,s2l,t);
2167           }
2168         }
2169       } else {
2170         t=get_reg(i_regs->regmap,rt1[i]);
2171         //assert(t>=0);
2172         if(t>=0) {
2173           s1l=get_reg(i_regs->regmap,rs1[i]);
2174           s2l=get_reg(i_regs->regmap,rs2[i]);
2175           if(rs2[i]==0) // rx<r0
2176           {
2177             assert(s1l>=0);
2178             if(opcode2[i]==0x2a) // SLT
2179               emit_shrimm(s1l,31,t);
2180             else // SLTU (unsigned can not be less than zero)
2181               emit_zeroreg(t);
2182           }
2183           else if(rs1[i]==0) // r0<rx
2184           {
2185             assert(s2l>=0);
2186             if(opcode2[i]==0x2a) // SLT
2187               emit_set_gz32(s2l,t);
2188             else // SLTU (set if not zero)
2189               emit_set_nz32(s2l,t);
2190           }
2191           else{
2192             assert(s1l>=0);assert(s2l>=0);
2193             if(opcode2[i]==0x2a) // SLT
2194               emit_set_if_less32(s1l,s2l,t);
2195             else // SLTU
2196               emit_set_if_carry32(s1l,s2l,t);
2197           }
2198         }
2199       }
2200     }
2201   }
2202   if(opcode2[i]>=0x24&&opcode2[i]<=0x27) { // AND/OR/XOR/NOR
2203     if(rt1[i]) {
2204       signed char s1l,s1h,s2l,s2h,th,tl;
2205       tl=get_reg(i_regs->regmap,rt1[i]);
2206       th=get_reg(i_regs->regmap,rt1[i]|64);
2207       if(!((i_regs->was32>>rs1[i])&(i_regs->was32>>rs2[i])&1)&&th>=0)
2208       {
2209         assert(tl>=0);
2210         if(tl>=0) {
2211           s1l=get_reg(i_regs->regmap,rs1[i]);
2212           s1h=get_reg(i_regs->regmap,rs1[i]|64);
2213           s2l=get_reg(i_regs->regmap,rs2[i]);
2214           s2h=get_reg(i_regs->regmap,rs2[i]|64);
2215           if(rs1[i]&&rs2[i]) {
2216             assert(s1l>=0);assert(s1h>=0);
2217             assert(s2l>=0);assert(s2h>=0);
2218             if(opcode2[i]==0x24) { // AND
2219               emit_and(s1l,s2l,tl);
2220               emit_and(s1h,s2h,th);
2221             } else
2222             if(opcode2[i]==0x25) { // OR
2223               emit_or(s1l,s2l,tl);
2224               emit_or(s1h,s2h,th);
2225             } else
2226             if(opcode2[i]==0x26) { // XOR
2227               emit_xor(s1l,s2l,tl);
2228               emit_xor(s1h,s2h,th);
2229             } else
2230             if(opcode2[i]==0x27) { // NOR
2231               emit_or(s1l,s2l,tl);
2232               emit_or(s1h,s2h,th);
2233               emit_not(tl,tl);
2234               emit_not(th,th);
2235             }
2236           }
2237           else
2238           {
2239             if(opcode2[i]==0x24) { // AND
2240               emit_zeroreg(tl);
2241               emit_zeroreg(th);
2242             } else
2243             if(opcode2[i]==0x25||opcode2[i]==0x26) { // OR/XOR
2244               if(rs1[i]){
2245                 if(s1l>=0) emit_mov(s1l,tl);
2246                 else emit_loadreg(rs1[i],tl);
2247                 if(s1h>=0) emit_mov(s1h,th);
2248                 else emit_loadreg(rs1[i]|64,th);
2249               }
2250               else
2251               if(rs2[i]){
2252                 if(s2l>=0) emit_mov(s2l,tl);
2253                 else emit_loadreg(rs2[i],tl);
2254                 if(s2h>=0) emit_mov(s2h,th);
2255                 else emit_loadreg(rs2[i]|64,th);
2256               }
2257               else{
2258                 emit_zeroreg(tl);
2259                 emit_zeroreg(th);
2260               }
2261             } else
2262             if(opcode2[i]==0x27) { // NOR
2263               if(rs1[i]){
2264                 if(s1l>=0) emit_not(s1l,tl);
2265                 else{
2266                   emit_loadreg(rs1[i],tl);
2267                   emit_not(tl,tl);
2268                 }
2269                 if(s1h>=0) emit_not(s1h,th);
2270                 else{
2271                   emit_loadreg(rs1[i]|64,th);
2272                   emit_not(th,th);
2273                 }
2274               }
2275               else
2276               if(rs2[i]){
2277                 if(s2l>=0) emit_not(s2l,tl);
2278                 else{
2279                   emit_loadreg(rs2[i],tl);
2280                   emit_not(tl,tl);
2281                 }
2282                 if(s2h>=0) emit_not(s2h,th);
2283                 else{
2284                   emit_loadreg(rs2[i]|64,th);
2285                   emit_not(th,th);
2286                 }
2287               }
2288               else {
2289                 emit_movimm(-1,tl);
2290                 emit_movimm(-1,th);
2291               }
2292             }
2293           }
2294         }
2295       }
2296       else
2297       {
2298         // 32 bit
2299         if(tl>=0) {
2300           s1l=get_reg(i_regs->regmap,rs1[i]);
2301           s2l=get_reg(i_regs->regmap,rs2[i]);
2302           if(rs1[i]&&rs2[i]) {
2303             assert(s1l>=0);
2304             assert(s2l>=0);
2305             if(opcode2[i]==0x24) { // AND
2306               emit_and(s1l,s2l,tl);
2307             } else
2308             if(opcode2[i]==0x25) { // OR
2309               emit_or(s1l,s2l,tl);
2310             } else
2311             if(opcode2[i]==0x26) { // XOR
2312               emit_xor(s1l,s2l,tl);
2313             } else
2314             if(opcode2[i]==0x27) { // NOR
2315               emit_or(s1l,s2l,tl);
2316               emit_not(tl,tl);
2317             }
2318           }
2319           else
2320           {
2321             if(opcode2[i]==0x24) { // AND
2322               emit_zeroreg(tl);
2323             } else
2324             if(opcode2[i]==0x25||opcode2[i]==0x26) { // OR/XOR
2325               if(rs1[i]){
2326                 if(s1l>=0) emit_mov(s1l,tl);
2327                 else emit_loadreg(rs1[i],tl); // CHECK: regmap_entry?
2328               }
2329               else
2330               if(rs2[i]){
2331                 if(s2l>=0) emit_mov(s2l,tl);
2332                 else emit_loadreg(rs2[i],tl); // CHECK: regmap_entry?
2333               }
2334               else emit_zeroreg(tl);
2335             } else
2336             if(opcode2[i]==0x27) { // NOR
2337               if(rs1[i]){
2338                 if(s1l>=0) emit_not(s1l,tl);
2339                 else {
2340                   emit_loadreg(rs1[i],tl);
2341                   emit_not(tl,tl);
2342                 }
2343               }
2344               else
2345               if(rs2[i]){
2346                 if(s2l>=0) emit_not(s2l,tl);
2347                 else {
2348                   emit_loadreg(rs2[i],tl);
2349                   emit_not(tl,tl);
2350                 }
2351               }
2352               else emit_movimm(-1,tl);
2353             }
2354           }
2355         }
2356       }
2357     }
2358   }
2359 }
2360
2361 void imm16_assemble(int i,struct regstat *i_regs)
2362 {
2363   if (opcode[i]==0x0f) { // LUI
2364     if(rt1[i]) {
2365       signed char t;
2366       t=get_reg(i_regs->regmap,rt1[i]);
2367       //assert(t>=0);
2368       if(t>=0) {
2369         if(!((i_regs->isconst>>t)&1))
2370           emit_movimm(imm[i]<<16,t);
2371       }
2372     }
2373   }
2374   if(opcode[i]==0x08||opcode[i]==0x09) { // ADDI/ADDIU
2375     if(rt1[i]) {
2376       signed char s,t;
2377       t=get_reg(i_regs->regmap,rt1[i]);
2378       s=get_reg(i_regs->regmap,rs1[i]);
2379       if(rs1[i]) {
2380         //assert(t>=0);
2381         //assert(s>=0);
2382         if(t>=0) {
2383           if(!((i_regs->isconst>>t)&1)) {
2384             if(s<0) {
2385               if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2386               emit_addimm(t,imm[i],t);
2387             }else{
2388               if(!((i_regs->wasconst>>s)&1))
2389                 emit_addimm(s,imm[i],t);
2390               else
2391                 emit_movimm(constmap[i][s]+imm[i],t);
2392             }
2393           }
2394         }
2395       } else {
2396         if(t>=0) {
2397           if(!((i_regs->isconst>>t)&1))
2398             emit_movimm(imm[i],t);
2399         }
2400       }
2401     }
2402   }
2403   if(opcode[i]==0x18||opcode[i]==0x19) { // DADDI/DADDIU
2404     if(rt1[i]) {
2405       signed char sh,sl,th,tl;
2406       th=get_reg(i_regs->regmap,rt1[i]|64);
2407       tl=get_reg(i_regs->regmap,rt1[i]);
2408       sh=get_reg(i_regs->regmap,rs1[i]|64);
2409       sl=get_reg(i_regs->regmap,rs1[i]);
2410       if(tl>=0) {
2411         if(rs1[i]) {
2412           assert(sh>=0);
2413           assert(sl>=0);
2414           if(th>=0) {
2415             emit_addimm64_32(sh,sl,imm[i],th,tl);
2416           }
2417           else {
2418             emit_addimm(sl,imm[i],tl);
2419           }
2420         } else {
2421           emit_movimm(imm[i],tl);
2422           if(th>=0) emit_movimm(((signed int)imm[i])>>31,th);
2423         }
2424       }
2425     }
2426   }
2427   else if(opcode[i]==0x0a||opcode[i]==0x0b) { // SLTI/SLTIU
2428     if(rt1[i]) {
2429       //assert(rs1[i]!=0); // r0 might be valid, but it's probably a bug
2430       signed char sh,sl,t;
2431       t=get_reg(i_regs->regmap,rt1[i]);
2432       sh=get_reg(i_regs->regmap,rs1[i]|64);
2433       sl=get_reg(i_regs->regmap,rs1[i]);
2434       //assert(t>=0);
2435       if(t>=0) {
2436         if(rs1[i]>0) {
2437           if(sh<0) assert((i_regs->was32>>rs1[i])&1);
2438           if(sh<0||((i_regs->was32>>rs1[i])&1)) {
2439             if(opcode[i]==0x0a) { // SLTI
2440               if(sl<0) {
2441                 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2442                 emit_slti32(t,imm[i],t);
2443               }else{
2444                 emit_slti32(sl,imm[i],t);
2445               }
2446             }
2447             else { // SLTIU
2448               if(sl<0) {
2449                 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2450                 emit_sltiu32(t,imm[i],t);
2451               }else{
2452                 emit_sltiu32(sl,imm[i],t);
2453               }
2454             }
2455           }else{ // 64-bit
2456             assert(sl>=0);
2457             if(opcode[i]==0x0a) // SLTI
2458               emit_slti64_32(sh,sl,imm[i],t);
2459             else // SLTIU
2460               emit_sltiu64_32(sh,sl,imm[i],t);
2461           }
2462         }else{
2463           // SLTI(U) with r0 is just stupid,
2464           // nonetheless examples can be found
2465           if(opcode[i]==0x0a) // SLTI
2466             if(0<imm[i]) emit_movimm(1,t);
2467             else emit_zeroreg(t);
2468           else // SLTIU
2469           {
2470             if(imm[i]) emit_movimm(1,t);
2471             else emit_zeroreg(t);
2472           }
2473         }
2474       }
2475     }
2476   }
2477   else if(opcode[i]>=0x0c&&opcode[i]<=0x0e) { // ANDI/ORI/XORI
2478     if(rt1[i]) {
2479       signed char sh,sl,th,tl;
2480       th=get_reg(i_regs->regmap,rt1[i]|64);
2481       tl=get_reg(i_regs->regmap,rt1[i]);
2482       sh=get_reg(i_regs->regmap,rs1[i]|64);
2483       sl=get_reg(i_regs->regmap,rs1[i]);
2484       if(tl>=0 && !((i_regs->isconst>>tl)&1)) {
2485         if(opcode[i]==0x0c) //ANDI
2486         {
2487           if(rs1[i]) {
2488             if(sl<0) {
2489               if(i_regs->regmap_entry[tl]!=rs1[i]) emit_loadreg(rs1[i],tl);
2490               emit_andimm(tl,imm[i],tl);
2491             }else{
2492               if(!((i_regs->wasconst>>sl)&1))
2493                 emit_andimm(sl,imm[i],tl);
2494               else
2495                 emit_movimm(constmap[i][sl]&imm[i],tl);
2496             }
2497           }
2498           else
2499             emit_zeroreg(tl);
2500           if(th>=0) emit_zeroreg(th);
2501         }
2502         else
2503         {
2504           if(rs1[i]) {
2505             if(sl<0) {
2506               if(i_regs->regmap_entry[tl]!=rs1[i]) emit_loadreg(rs1[i],tl);
2507             }
2508             if(th>=0) {
2509               if(sh<0) {
2510                 emit_loadreg(rs1[i]|64,th);
2511               }else{
2512                 emit_mov(sh,th);
2513               }
2514             }
2515             if(opcode[i]==0x0d) //ORI
2516             if(sl<0) {
2517               emit_orimm(tl,imm[i],tl);
2518             }else{
2519               if(!((i_regs->wasconst>>sl)&1))
2520                 emit_orimm(sl,imm[i],tl);
2521               else
2522                 emit_movimm(constmap[i][sl]|imm[i],tl);
2523             }
2524             if(opcode[i]==0x0e) //XORI
2525             if(sl<0) {
2526               emit_xorimm(tl,imm[i],tl);
2527             }else{
2528               if(!((i_regs->wasconst>>sl)&1))
2529                 emit_xorimm(sl,imm[i],tl);
2530               else
2531                 emit_movimm(constmap[i][sl]^imm[i],tl);
2532             }
2533           }
2534           else {
2535             emit_movimm(imm[i],tl);
2536             if(th>=0) emit_zeroreg(th);
2537           }
2538         }
2539       }
2540     }
2541   }
2542 }
2543
2544 void shiftimm_assemble(int i,struct regstat *i_regs)
2545 {
2546   if(opcode2[i]<=0x3) // SLL/SRL/SRA
2547   {
2548     if(rt1[i]) {
2549       signed char s,t;
2550       t=get_reg(i_regs->regmap,rt1[i]);
2551       s=get_reg(i_regs->regmap,rs1[i]);
2552       //assert(t>=0);
2553       if(t>=0){
2554         if(rs1[i]==0)
2555         {
2556           emit_zeroreg(t);
2557         }
2558         else
2559         {
2560           if(s<0&&i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2561           if(imm[i]) {
2562             if(opcode2[i]==0) // SLL
2563             {
2564               emit_shlimm(s<0?t:s,imm[i],t);
2565             }
2566             if(opcode2[i]==2) // SRL
2567             {
2568               emit_shrimm(s<0?t:s,imm[i],t);
2569             }
2570             if(opcode2[i]==3) // SRA
2571             {
2572               emit_sarimm(s<0?t:s,imm[i],t);
2573             }
2574           }else{
2575             // Shift by zero
2576             if(s>=0 && s!=t) emit_mov(s,t);
2577           }
2578         }
2579       }
2580       //emit_storereg(rt1[i],t); //DEBUG
2581     }
2582   }
2583   if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA
2584   {
2585     if(rt1[i]) {
2586       signed char sh,sl,th,tl;
2587       th=get_reg(i_regs->regmap,rt1[i]|64);
2588       tl=get_reg(i_regs->regmap,rt1[i]);
2589       sh=get_reg(i_regs->regmap,rs1[i]|64);
2590       sl=get_reg(i_regs->regmap,rs1[i]);
2591       if(tl>=0) {
2592         if(rs1[i]==0)
2593         {
2594           emit_zeroreg(tl);
2595           if(th>=0) emit_zeroreg(th);
2596         }
2597         else
2598         {
2599           assert(sl>=0);
2600           assert(sh>=0);
2601           if(imm[i]) {
2602             if(opcode2[i]==0x38) // DSLL
2603             {
2604               if(th>=0) emit_shldimm(sh,sl,imm[i],th);
2605               emit_shlimm(sl,imm[i],tl);
2606             }
2607             if(opcode2[i]==0x3a) // DSRL
2608             {
2609               emit_shrdimm(sl,sh,imm[i],tl);
2610               if(th>=0) emit_shrimm(sh,imm[i],th);
2611             }
2612             if(opcode2[i]==0x3b) // DSRA
2613             {
2614               emit_shrdimm(sl,sh,imm[i],tl);
2615               if(th>=0) emit_sarimm(sh,imm[i],th);
2616             }
2617           }else{
2618             // Shift by zero
2619             if(sl!=tl) emit_mov(sl,tl);
2620             if(th>=0&&sh!=th) emit_mov(sh,th);
2621           }
2622         }
2623       }
2624     }
2625   }
2626   if(opcode2[i]==0x3c) // DSLL32
2627   {
2628     if(rt1[i]) {
2629       signed char sl,tl,th;
2630       tl=get_reg(i_regs->regmap,rt1[i]);
2631       th=get_reg(i_regs->regmap,rt1[i]|64);
2632       sl=get_reg(i_regs->regmap,rs1[i]);
2633       if(th>=0||tl>=0){
2634         assert(tl>=0);
2635         assert(th>=0);
2636         assert(sl>=0);
2637         emit_mov(sl,th);
2638         emit_zeroreg(tl);
2639         if(imm[i]>32)
2640         {
2641           emit_shlimm(th,imm[i]&31,th);
2642         }
2643       }
2644     }
2645   }
2646   if(opcode2[i]==0x3e) // DSRL32
2647   {
2648     if(rt1[i]) {
2649       signed char sh,tl,th;
2650       tl=get_reg(i_regs->regmap,rt1[i]);
2651       th=get_reg(i_regs->regmap,rt1[i]|64);
2652       sh=get_reg(i_regs->regmap,rs1[i]|64);
2653       if(tl>=0){
2654         assert(sh>=0);
2655         emit_mov(sh,tl);
2656         if(th>=0) emit_zeroreg(th);
2657         if(imm[i]>32)
2658         {
2659           emit_shrimm(tl,imm[i]&31,tl);
2660         }
2661       }
2662     }
2663   }
2664   if(opcode2[i]==0x3f) // DSRA32
2665   {
2666     if(rt1[i]) {
2667       signed char sh,tl;
2668       tl=get_reg(i_regs->regmap,rt1[i]);
2669       sh=get_reg(i_regs->regmap,rs1[i]|64);
2670       if(tl>=0){
2671         assert(sh>=0);
2672         emit_mov(sh,tl);
2673         if(imm[i]>32)
2674         {
2675           emit_sarimm(tl,imm[i]&31,tl);
2676         }
2677       }
2678     }
2679   }
2680 }
2681
2682 #ifndef shift_assemble
2683 void shift_assemble(int i,struct regstat *i_regs)
2684 {
2685   printf("Need shift_assemble for this architecture.\n");
2686   exit(1);
2687 }
2688 #endif
2689
2690 void load_assemble(int i,struct regstat *i_regs)
2691 {
2692   int s,th,tl,addr,map=-1;
2693   int offset;
2694   int jaddr=0;
2695   int memtarget,c=0;
2696   u_int hr,reglist=0;
2697   th=get_reg(i_regs->regmap,rt1[i]|64);
2698   tl=get_reg(i_regs->regmap,rt1[i]);
2699   s=get_reg(i_regs->regmap,rs1[i]);
2700   offset=imm[i];
2701   for(hr=0;hr<HOST_REGS;hr++) {
2702     if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2703   }
2704   if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
2705   if(s>=0) {
2706     c=(i_regs->wasconst>>s)&1;
2707     memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80800000;
2708     if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
2709   }
2710   if(offset||s<0||c) addr=tl;
2711   else addr=s;
2712   //printf("load_assemble: c=%d\n",c);
2713   //if(c) printf("load_assemble: const=%x\n",(int)constmap[i][s]+offset);
2714   // FIXME: Even if the load is a NOP, we should check for pagefaults...
2715   if(tl>=0) {
2716     //assert(tl>=0);
2717     //assert(rt1[i]);
2718     reglist&=~(1<<tl);
2719     if(th>=0) reglist&=~(1<<th);
2720     if(!using_tlb) {
2721       if(!c) {
2722 //#define R29_HACK 1
2723         #ifdef R29_HACK
2724         // Strmnnrmn's speed hack
2725         if(rs1[i]!=29||start<0x80001000||start>=0x80800000)
2726         #endif
2727         {
2728           emit_cmpimm(addr,0x800000);
2729           jaddr=(int)out;
2730           #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
2731           // Hint to branch predictor that the branch is unlikely to be taken
2732           if(rs1[i]>=28)
2733             emit_jno_unlikely(0);
2734           else
2735           #endif
2736           emit_jno(0);
2737         }
2738       }
2739     }else{ // using tlb
2740       int x=0;
2741       if (opcode[i]==0x20||opcode[i]==0x24) x=3; // LB/LBU
2742       if (opcode[i]==0x21||opcode[i]==0x25) x=2; // LH/LHU
2743       map=get_reg(i_regs->regmap,TLREG);
2744       assert(map>=0);
2745       map=do_tlb_r(addr,tl,map,x,-1,-1,c,constmap[i][s]+offset);
2746       do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
2747     }
2748     if (opcode[i]==0x20) { // LB
2749       if(!c||memtarget) {
2750         #ifdef HOST_IMM_ADDR32
2751         if(c)
2752           emit_movsbl_tlb((constmap[i][s]+offset)^3,map,tl);
2753         else
2754         #endif
2755         {
2756           //emit_xorimm(addr,3,tl);
2757           //gen_tlb_addr_r(tl,map);
2758           //emit_movsbl_indexed((int)rdram-0x80000000,tl,tl);
2759           int x=0;
2760           if(!c) emit_xorimm(addr,3,tl);
2761           else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
2762           emit_movsbl_indexed_tlb(x,tl,map,tl);
2763         }
2764         if(jaddr)
2765           add_stub(LOADB_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
2766       }
2767       else
2768         inline_readstub(LOADB_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2769     }
2770     if (opcode[i]==0x21) { // LH
2771       if(!c||memtarget) {
2772         #ifdef HOST_IMM_ADDR32
2773         if(c)
2774           emit_movswl_tlb((constmap[i][s]+offset)^2,map,tl);
2775         else
2776         #endif
2777         {
2778           int x=0;
2779           if(!c) emit_xorimm(addr,2,tl);
2780           else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
2781           //#ifdef
2782           //emit_movswl_indexed_tlb(x,tl,map,tl);
2783           //else
2784           if(map>=0) {
2785             gen_tlb_addr_r(tl,map);
2786             emit_movswl_indexed(x,tl,tl);
2787           }else
2788             emit_movswl_indexed((int)rdram-0x80000000+x,tl,tl);
2789         }
2790         if(jaddr)
2791           add_stub(LOADH_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
2792       }
2793       else
2794         inline_readstub(LOADH_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2795     }
2796     if (opcode[i]==0x23) { // LW
2797       if(!c||memtarget) {
2798         //emit_readword_indexed((int)rdram-0x80000000,addr,tl);
2799         #ifdef HOST_IMM_ADDR32
2800         if(c)
2801           emit_readword_tlb(constmap[i][s]+offset,map,tl);
2802         else
2803         #endif
2804         emit_readword_indexed_tlb(0,addr,map,tl);
2805         if(jaddr)
2806           add_stub(LOADW_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
2807       }
2808       else
2809         inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2810     }
2811     if (opcode[i]==0x24) { // LBU
2812       if(!c||memtarget) {
2813         #ifdef HOST_IMM_ADDR32
2814         if(c)
2815           emit_movzbl_tlb((constmap[i][s]+offset)^3,map,tl);
2816         else
2817         #endif
2818         {
2819           //emit_xorimm(addr,3,tl);
2820           //gen_tlb_addr_r(tl,map);
2821           //emit_movzbl_indexed((int)rdram-0x80000000,tl,tl);
2822           int x=0;
2823           if(!c) emit_xorimm(addr,3,tl);
2824           else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
2825           emit_movzbl_indexed_tlb(x,tl,map,tl);
2826         }
2827         if(jaddr)
2828           add_stub(LOADBU_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
2829       }
2830       else
2831         inline_readstub(LOADBU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2832     }
2833     if (opcode[i]==0x25) { // LHU
2834       if(!c||memtarget) {
2835         #ifdef HOST_IMM_ADDR32
2836         if(c)
2837           emit_movzwl_tlb((constmap[i][s]+offset)^2,map,tl);
2838         else
2839         #endif
2840         {
2841           int x=0;
2842           if(!c) emit_xorimm(addr,2,tl);
2843           else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
2844           //#ifdef
2845           //emit_movzwl_indexed_tlb(x,tl,map,tl);
2846           //#else
2847           if(map>=0) {
2848             gen_tlb_addr_r(tl,map);
2849             emit_movzwl_indexed(x,tl,tl);
2850           }else
2851             emit_movzwl_indexed((int)rdram-0x80000000+x,tl,tl);
2852           if(jaddr)
2853             add_stub(LOADHU_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
2854         }
2855       }
2856       else
2857         inline_readstub(LOADHU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2858     }
2859     if (opcode[i]==0x27) { // LWU
2860       assert(th>=0);
2861       if(!c||memtarget) {
2862         //emit_readword_indexed((int)rdram-0x80000000,addr,tl);
2863         #ifdef HOST_IMM_ADDR32
2864         if(c)
2865           emit_readword_tlb(constmap[i][s]+offset,map,tl);
2866         else
2867         #endif
2868         emit_readword_indexed_tlb(0,addr,map,tl);
2869         if(jaddr)
2870           add_stub(LOADW_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
2871       }
2872       else {
2873         inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2874       }
2875       emit_zeroreg(th);
2876     }
2877     if (opcode[i]==0x37) { // LD
2878       if(!c||memtarget) {
2879         //gen_tlb_addr_r(tl,map);
2880         //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,addr,th);
2881         //emit_readword_indexed((int)rdram-0x7FFFFFFC,addr,tl);
2882         #ifdef HOST_IMM_ADDR32
2883         if(c)
2884           emit_readdword_tlb(constmap[i][s]+offset,map,th,tl);
2885         else
2886         #endif
2887         emit_readdword_indexed_tlb(0,addr,map,th,tl);
2888         if(jaddr)
2889           add_stub(LOADD_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
2890       }
2891       else
2892         inline_readstub(LOADD_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2893     }
2894     //emit_storereg(rt1[i],tl); // DEBUG
2895   }
2896   //if(opcode[i]==0x23)
2897   //if(opcode[i]==0x24)
2898   //if(opcode[i]==0x23||opcode[i]==0x24)
2899   /*if(opcode[i]==0x21||opcode[i]==0x23||opcode[i]==0x24)
2900   {
2901     //emit_pusha();
2902     save_regs(0x100f);
2903         emit_readword((int)&last_count,ECX);
2904         #ifdef __i386__
2905         if(get_reg(i_regs->regmap,CCREG)<0)
2906           emit_loadreg(CCREG,HOST_CCREG);
2907         emit_add(HOST_CCREG,ECX,HOST_CCREG);
2908         emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG);
2909         emit_writeword(HOST_CCREG,(int)&Count);
2910         #endif
2911         #ifdef __arm__
2912         if(get_reg(i_regs->regmap,CCREG)<0)
2913           emit_loadreg(CCREG,0);
2914         else
2915           emit_mov(HOST_CCREG,0);
2916         emit_add(0,ECX,0);
2917         emit_addimm(0,2*ccadj[i],0);
2918         emit_writeword(0,(int)&Count);
2919         #endif
2920     emit_call((int)memdebug);
2921     //emit_popa();
2922     restore_regs(0x100f);
2923   }/**/
2924 }
2925
2926 #ifndef loadlr_assemble
2927 void loadlr_assemble(int i,struct regstat *i_regs)
2928 {
2929   printf("Need loadlr_assemble for this architecture.\n");
2930   exit(1);
2931 }
2932 #endif
2933
2934 void store_assemble(int i,struct regstat *i_regs)
2935 {
2936   int s,th,tl,map=-1;
2937   int addr,temp;
2938   int offset;
2939   int jaddr=0,jaddr2,type;
2940   int memtarget,c=0;
2941   int agr=AGEN1+(i&1);
2942   u_int hr,reglist=0;
2943   th=get_reg(i_regs->regmap,rs2[i]|64);
2944   tl=get_reg(i_regs->regmap,rs2[i]);
2945   s=get_reg(i_regs->regmap,rs1[i]);
2946   temp=get_reg(i_regs->regmap,agr);
2947   if(temp<0) temp=get_reg(i_regs->regmap,-1);
2948   offset=imm[i];
2949   if(s>=0) {
2950     c=(i_regs->wasconst>>s)&1;
2951     memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80800000;
2952     if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
2953   }
2954   assert(tl>=0);
2955   assert(temp>=0);
2956   for(hr=0;hr<HOST_REGS;hr++) {
2957     if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2958   }
2959   if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
2960   if(offset||s<0||c) addr=temp;
2961   else addr=s;
2962   if(!using_tlb) {
2963     if(!c) {
2964       #ifdef R29_HACK
2965       // Strmnnrmn's speed hack
2966       memtarget=1;
2967       if(rs1[i]!=29||start<0x80001000||start>=0x80800000)
2968       #endif
2969       emit_cmpimm(addr,0x800000);
2970       #ifdef DESTRUCTIVE_SHIFT
2971       if(s==addr) emit_mov(s,temp);
2972       #endif
2973       #ifdef R29_HACK
2974       if(rs1[i]!=29||start<0x80001000||start>=0x80800000)
2975       #endif
2976       {
2977         jaddr=(int)out;
2978         #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
2979         // Hint to branch predictor that the branch is unlikely to be taken
2980         if(rs1[i]>=28)
2981           emit_jno_unlikely(0);
2982         else
2983         #endif
2984         emit_jno(0);
2985       }
2986     }
2987   }else{ // using tlb
2988     int x=0;
2989     if (opcode[i]==0x28) x=3; // SB
2990     if (opcode[i]==0x29) x=2; // SH
2991     map=get_reg(i_regs->regmap,TLREG);
2992     assert(map>=0);
2993     map=do_tlb_w(addr,temp,map,x,c,constmap[i][s]+offset);
2994     do_tlb_w_branch(map,c,constmap[i][s]+offset,&jaddr);
2995   }
2996
2997   if (opcode[i]==0x28) { // SB
2998     if(!c||memtarget) {
2999       int x=0;
3000       if(!c) emit_xorimm(addr,3,temp);
3001       else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
3002       //gen_tlb_addr_w(temp,map);
3003       //emit_writebyte_indexed(tl,(int)rdram-0x80000000,temp);
3004       emit_writebyte_indexed_tlb(tl,x,temp,map,temp);
3005     }
3006     type=STOREB_STUB;
3007   }
3008   if (opcode[i]==0x29) { // SH
3009     if(!c||memtarget) {
3010       int x=0;
3011       if(!c) emit_xorimm(addr,2,temp);
3012       else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
3013       //#ifdef
3014       //emit_writehword_indexed_tlb(tl,x,temp,map,temp);
3015       //#else
3016       if(map>=0) {
3017         gen_tlb_addr_w(temp,map);
3018         emit_writehword_indexed(tl,x,temp);
3019       }else
3020         emit_writehword_indexed(tl,(int)rdram-0x80000000+x,temp);
3021     }
3022     type=STOREH_STUB;
3023   }
3024   if (opcode[i]==0x2B) { // SW
3025     if(!c||memtarget)
3026       //emit_writeword_indexed(tl,(int)rdram-0x80000000,addr);
3027       emit_writeword_indexed_tlb(tl,0,addr,map,temp);
3028     type=STOREW_STUB;
3029   }
3030   if (opcode[i]==0x3F) { // SD
3031     if(!c||memtarget) {
3032       if(rs2[i]) {
3033         assert(th>=0);
3034         //emit_writeword_indexed(th,(int)rdram-0x80000000,addr);
3035         //emit_writeword_indexed(tl,(int)rdram-0x7FFFFFFC,addr);
3036         emit_writedword_indexed_tlb(th,tl,0,addr,map,temp);
3037       }else{
3038         // Store zero
3039         //emit_writeword_indexed(tl,(int)rdram-0x80000000,temp);
3040         //emit_writeword_indexed(tl,(int)rdram-0x7FFFFFFC,temp);
3041         emit_writedword_indexed_tlb(tl,tl,0,addr,map,temp);
3042       }
3043     }
3044     type=STORED_STUB;
3045   }
3046   if(jaddr) {
3047     add_stub(type,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
3048   } else if(!memtarget) {
3049     inline_writestub(type,i,constmap[i][s]+offset,i_regs->regmap,rs2[i],ccadj[i],reglist);
3050   }
3051   if(!using_tlb) {
3052     if(!c||memtarget) {
3053       #ifdef DESTRUCTIVE_SHIFT
3054       // The x86 shift operation is 'destructive'; it overwrites the
3055       // source register, so we need to make a copy first and use that.
3056       addr=temp;
3057       #endif
3058       #if defined(HOST_IMM8)
3059       int ir=get_reg(i_regs->regmap,INVCP);
3060       assert(ir>=0);
3061       emit_cmpmem_indexedsr12_reg(ir,addr,1);
3062       #else
3063       emit_cmpmem_indexedsr12_imm((int)invalid_code,addr,1);
3064       #endif
3065       jaddr2=(int)out;
3066       emit_jne(0);
3067       add_stub(INVCODE_STUB,jaddr2,(int)out,reglist|(1<<HOST_CCREG),addr,0,0,0);
3068     }
3069   }
3070   //if(opcode[i]==0x2B || opcode[i]==0x3F)
3071   //if(opcode[i]==0x2B || opcode[i]==0x28)
3072   //if(opcode[i]==0x2B || opcode[i]==0x29)
3073   //if(opcode[i]==0x2B)
3074   /*if(opcode[i]==0x2B || opcode[i]==0x28 || opcode[i]==0x29 || opcode[i]==0x3F)
3075   {
3076     //emit_pusha();
3077     save_regs(0x100f);
3078         emit_readword((int)&last_count,ECX);
3079         #ifdef __i386__
3080         if(get_reg(i_regs->regmap,CCREG)<0)
3081           emit_loadreg(CCREG,HOST_CCREG);
3082         emit_add(HOST_CCREG,ECX,HOST_CCREG);
3083         emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG);
3084         emit_writeword(HOST_CCREG,(int)&Count);
3085         #endif
3086         #ifdef __arm__
3087         if(get_reg(i_regs->regmap,CCREG)<0)
3088           emit_loadreg(CCREG,0);
3089         else
3090           emit_mov(HOST_CCREG,0);
3091         emit_add(0,ECX,0);
3092         emit_addimm(0,2*ccadj[i],0);
3093         emit_writeword(0,(int)&Count);
3094         #endif
3095     emit_call((int)memdebug);
3096     //emit_popa();
3097     restore_regs(0x100f);
3098   }/**/
3099 }
3100
3101 void storelr_assemble(int i,struct regstat *i_regs)
3102 {
3103   int s,th,tl;
3104   int temp;
3105   int temp2;
3106   int offset;
3107   int jaddr=0,jaddr2;
3108   int case1,case2,case3;
3109   int done0,done1,done2;
3110   int memtarget,c=0;
3111   u_int hr,reglist=0;
3112   th=get_reg(i_regs->regmap,rs2[i]|64);
3113   tl=get_reg(i_regs->regmap,rs2[i]);
3114   s=get_reg(i_regs->regmap,rs1[i]);
3115   temp=get_reg(i_regs->regmap,-1);
3116   offset=imm[i];
3117   if(s>=0) {
3118     c=(i_regs->isconst>>s)&1;
3119     memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80800000;
3120     if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3121   }
3122   assert(tl>=0);
3123   for(hr=0;hr<HOST_REGS;hr++) {
3124     if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3125   }
3126   if(tl>=0) {
3127     assert(temp>=0);
3128     if(!using_tlb) {
3129       if(!c) {
3130         emit_cmpimm(s<0||offset?temp:s,0x800000);
3131         if(!offset&&s!=temp) emit_mov(s,temp);
3132         jaddr=(int)out;
3133         emit_jno(0);
3134       }
3135       else
3136       {
3137         if(!memtarget||!rs1[i]) {
3138           jaddr=(int)out;
3139           emit_jmp(0);
3140         }
3141       }
3142       if((u_int)rdram!=0x80000000) 
3143         emit_addimm_no_flags((u_int)rdram-(u_int)0x80000000,temp);
3144     }else{ // using tlb
3145       int map=get_reg(i_regs->regmap,TLREG);
3146       assert(map>=0);
3147       map=do_tlb_w(c||s<0||offset?temp:s,temp,map,0,c,constmap[i][s]+offset);
3148       if(!c&&!offset&&s>=0) emit_mov(s,temp);
3149       do_tlb_w_branch(map,c,constmap[i][s]+offset,&jaddr);
3150       if(!jaddr&&!memtarget) {
3151         jaddr=(int)out;
3152         emit_jmp(0);
3153       }
3154       gen_tlb_addr_w(temp,map);
3155     }
3156
3157     if (opcode[i]==0x2C||opcode[i]==0x2D) { // SDL/SDR
3158       temp2=get_reg(i_regs->regmap,FTEMP);
3159       if(!rs2[i]) temp2=th=tl;
3160     }
3161
3162     emit_testimm(temp,2);
3163     case2=(int)out;
3164     emit_jne(0);
3165     emit_testimm(temp,1);
3166     case1=(int)out;
3167     emit_jne(0);
3168     // 0
3169     if (opcode[i]==0x2A) { // SWL
3170       emit_writeword_indexed(tl,0,temp);
3171     }
3172     if (opcode[i]==0x2E) { // SWR
3173       emit_writebyte_indexed(tl,3,temp);
3174     }
3175     if (opcode[i]==0x2C) { // SDL
3176       emit_writeword_indexed(th,0,temp);
3177       if(rs2[i]) emit_mov(tl,temp2);
3178     }
3179     if (opcode[i]==0x2D) { // SDR
3180       emit_writebyte_indexed(tl,3,temp);
3181       if(rs2[i]) emit_shldimm(th,tl,24,temp2);
3182     }
3183     done0=(int)out;
3184     emit_jmp(0);
3185     // 1
3186     set_jump_target(case1,(int)out);
3187     if (opcode[i]==0x2A) { // SWL
3188       // Write 3 msb into three least significant bytes
3189       if(rs2[i]) emit_rorimm(tl,8,tl);
3190       emit_writehword_indexed(tl,-1,temp);
3191       if(rs2[i]) emit_rorimm(tl,16,tl);
3192       emit_writebyte_indexed(tl,1,temp);
3193       if(rs2[i]) emit_rorimm(tl,8,tl);
3194     }
3195     if (opcode[i]==0x2E) { // SWR
3196       // Write two lsb into two most significant bytes
3197       emit_writehword_indexed(tl,1,temp);
3198     }
3199     if (opcode[i]==0x2C) { // SDL
3200       if(rs2[i]) emit_shrdimm(tl,th,8,temp2);
3201       // Write 3 msb into three least significant bytes
3202       if(rs2[i]) emit_rorimm(th,8,th);
3203       emit_writehword_indexed(th,-1,temp);
3204       if(rs2[i]) emit_rorimm(th,16,th);
3205       emit_writebyte_indexed(th,1,temp);
3206       if(rs2[i]) emit_rorimm(th,8,th);
3207     }
3208     if (opcode[i]==0x2D) { // SDR
3209       if(rs2[i]) emit_shldimm(th,tl,16,temp2);
3210       // Write two lsb into two most significant bytes
3211       emit_writehword_indexed(tl,1,temp);
3212     }
3213     done1=(int)out;
3214     emit_jmp(0);
3215     // 2
3216     set_jump_target(case2,(int)out);
3217     emit_testimm(temp,1);
3218     case3=(int)out;
3219     emit_jne(0);
3220     if (opcode[i]==0x2A) { // SWL
3221       // Write two msb into two least significant bytes
3222       if(rs2[i]) emit_rorimm(tl,16,tl);
3223       emit_writehword_indexed(tl,-2,temp);
3224       if(rs2[i]) emit_rorimm(tl,16,tl);
3225     }
3226     if (opcode[i]==0x2E) { // SWR
3227       // Write 3 lsb into three most significant bytes
3228       emit_writebyte_indexed(tl,-1,temp);
3229       if(rs2[i]) emit_rorimm(tl,8,tl);
3230       emit_writehword_indexed(tl,0,temp);
3231       if(rs2[i]) emit_rorimm(tl,24,tl);
3232     }
3233     if (opcode[i]==0x2C) { // SDL
3234       if(rs2[i]) emit_shrdimm(tl,th,16,temp2);
3235       // Write two msb into two least significant bytes
3236       if(rs2[i]) emit_rorimm(th,16,th);
3237       emit_writehword_indexed(th,-2,temp);
3238       if(rs2[i]) emit_rorimm(th,16,th);
3239     }
3240     if (opcode[i]==0x2D) { // SDR
3241       if(rs2[i]) emit_shldimm(th,tl,8,temp2);
3242       // Write 3 lsb into three most significant bytes
3243       emit_writebyte_indexed(tl,-1,temp);
3244       if(rs2[i]) emit_rorimm(tl,8,tl);
3245       emit_writehword_indexed(tl,0,temp);
3246       if(rs2[i]) emit_rorimm(tl,24,tl);
3247     }
3248     done2=(int)out;
3249     emit_jmp(0);
3250     // 3
3251     set_jump_target(case3,(int)out);
3252     if (opcode[i]==0x2A) { // SWL
3253       // Write msb into least significant byte
3254       if(rs2[i]) emit_rorimm(tl,24,tl);
3255       emit_writebyte_indexed(tl,-3,temp);
3256       if(rs2[i]) emit_rorimm(tl,8,tl);
3257     }
3258     if (opcode[i]==0x2E) { // SWR
3259       // Write entire word
3260       emit_writeword_indexed(tl,-3,temp);
3261     }
3262     if (opcode[i]==0x2C) { // SDL
3263       if(rs2[i]) emit_shrdimm(tl,th,24,temp2);
3264       // Write msb into least significant byte
3265       if(rs2[i]) emit_rorimm(th,24,th);
3266       emit_writebyte_indexed(th,-3,temp);
3267       if(rs2[i]) emit_rorimm(th,8,th);
3268     }
3269     if (opcode[i]==0x2D) { // SDR
3270       if(rs2[i]) emit_mov(th,temp2);
3271       // Write entire word
3272       emit_writeword_indexed(tl,-3,temp);
3273     }
3274     set_jump_target(done0,(int)out);
3275     set_jump_target(done1,(int)out);
3276     set_jump_target(done2,(int)out);
3277     if (opcode[i]==0x2C) { // SDL
3278       emit_testimm(temp,4);
3279       done0=(int)out;
3280       emit_jne(0);
3281       emit_andimm(temp,~3,temp);
3282       emit_writeword_indexed(temp2,4,temp);
3283       set_jump_target(done0,(int)out);
3284     }
3285     if (opcode[i]==0x2D) { // SDR
3286       emit_testimm(temp,4);
3287       done0=(int)out;
3288       emit_jeq(0);
3289       emit_andimm(temp,~3,temp);
3290       emit_writeword_indexed(temp2,-4,temp);
3291       set_jump_target(done0,(int)out);
3292     }
3293     if(!c||!memtarget)
3294       add_stub(STORELR_STUB,jaddr,(int)out,0,(int)i_regs,rs2[i],ccadj[i],reglist);
3295   }
3296   if(!using_tlb) {
3297     emit_addimm_no_flags((u_int)0x80000000-(u_int)rdram,temp);
3298     #if defined(HOST_IMM8)
3299     int ir=get_reg(i_regs->regmap,INVCP);
3300     assert(ir>=0);
3301     emit_cmpmem_indexedsr12_reg(ir,temp,1);
3302     #else
3303     emit_cmpmem_indexedsr12_imm((int)invalid_code,temp,1);
3304     #endif
3305     jaddr2=(int)out;
3306     emit_jne(0);
3307     add_stub(INVCODE_STUB,jaddr2,(int)out,reglist|(1<<HOST_CCREG),temp,0,0,0);
3308   }
3309   /*
3310     emit_pusha();
3311     //save_regs(0x100f);
3312         emit_readword((int)&last_count,ECX);
3313         if(get_reg(i_regs->regmap,CCREG)<0)
3314           emit_loadreg(CCREG,HOST_CCREG);
3315         emit_add(HOST_CCREG,ECX,HOST_CCREG);
3316         emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG);
3317         emit_writeword(HOST_CCREG,(int)&Count);
3318     emit_call((int)memdebug);
3319     emit_popa();
3320     //restore_regs(0x100f);
3321   /**/
3322 }
3323
3324 void c1ls_assemble(int i,struct regstat *i_regs)
3325 {
3326   int s,th,tl;
3327   int temp,ar;
3328   int map=-1;
3329   int offset;
3330   int c=0;
3331   int jaddr,jaddr2=0,jaddr3,type;
3332   int agr=AGEN1+(i&1);
3333   u_int hr,reglist=0;
3334   th=get_reg(i_regs->regmap,FTEMP|64);
3335   tl=get_reg(i_regs->regmap,FTEMP);
3336   s=get_reg(i_regs->regmap,rs1[i]);
3337   temp=get_reg(i_regs->regmap,agr);
3338   if(temp<0) temp=get_reg(i_regs->regmap,-1);
3339   offset=imm[i];
3340   assert(tl>=0);
3341   assert(rs1[i]>0);
3342   assert(temp>=0);
3343   for(hr=0;hr<HOST_REGS;hr++) {
3344     if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3345   }
3346   if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
3347   if (opcode[i]==0x31||opcode[i]==0x35) // LWC1/LDC1
3348   {
3349     // Loads use a temporary register which we need to save
3350     reglist|=1<<temp;
3351   }
3352   if (opcode[i]==0x39||opcode[i]==0x3D) // SWC1/SDC1
3353     ar=temp;
3354   else // LWC1/LDC1
3355     ar=tl;
3356   //if(s<0) emit_loadreg(rs1[i],ar); //address_generation does this now
3357   //else c=(i_regs->wasconst>>s)&1;
3358   if(s>=0) c=(i_regs->wasconst>>s)&1;
3359   // Check cop1 unusable
3360   if(!cop1_usable) {
3361     signed char rs=get_reg(i_regs->regmap,CSREG);
3362     assert(rs>=0);
3363     emit_testimm(rs,0x20000000);
3364     jaddr=(int)out;
3365     emit_jeq(0);
3366     add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3367     cop1_usable=1;
3368   }
3369   if (opcode[i]==0x39) { // SWC1 (get float address)
3370     emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],tl);
3371   }
3372   if (opcode[i]==0x3D) { // SDC1 (get double address)
3373     emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],tl);
3374   }
3375   // Generate address + offset
3376   if(!using_tlb) {
3377     if(!c)
3378       emit_cmpimm(offset||c||s<0?ar:s,0x800000);
3379   }
3380   else
3381   {
3382     map=get_reg(i_regs->regmap,TLREG);
3383     assert(map>=0);
3384     if (opcode[i]==0x31||opcode[i]==0x35) { // LWC1/LDC1
3385       map=do_tlb_r(offset||c||s<0?ar:s,ar,map,0,-1,-1,c,constmap[i][s]+offset);
3386     }
3387     if (opcode[i]==0x39||opcode[i]==0x3D) { // SWC1/SDC1
3388       map=do_tlb_w(offset||c||s<0?ar:s,ar,map,0,c,constmap[i][s]+offset);
3389     }
3390   }
3391   if (opcode[i]==0x39) { // SWC1 (read float)
3392     emit_readword_indexed(0,tl,tl);
3393   }
3394   if (opcode[i]==0x3D) { // SDC1 (read double)
3395     emit_readword_indexed(4,tl,th);
3396     emit_readword_indexed(0,tl,tl);
3397   }
3398   if (opcode[i]==0x31) { // LWC1 (get target address)
3399     emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],temp);
3400   }
3401   if (opcode[i]==0x35) { // LDC1 (get target address)
3402     emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],temp);
3403   }
3404   if(!using_tlb) {
3405     if(!c) {
3406       jaddr2=(int)out;
3407       emit_jno(0);
3408     }
3409     else if(((signed int)(constmap[i][s]+offset))>=(signed int)0x80800000) {
3410       jaddr2=(int)out;
3411       emit_jmp(0); // inline_readstub/inline_writestub?  Very rare case
3412     }
3413     #ifdef DESTRUCTIVE_SHIFT
3414     if (opcode[i]==0x39||opcode[i]==0x3D) { // SWC1/SDC1
3415       if(!offset&&!c&&s>=0) emit_mov(s,ar);
3416     }
3417     #endif
3418   }else{
3419     if (opcode[i]==0x31||opcode[i]==0x35) { // LWC1/LDC1
3420       do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr2);
3421     }
3422     if (opcode[i]==0x39||opcode[i]==0x3D) { // SWC1/SDC1
3423       do_tlb_w_branch(map,c,constmap[i][s]+offset,&jaddr2);
3424     }
3425   }
3426   if (opcode[i]==0x31) { // LWC1
3427     //if(s>=0&&!c&&!offset) emit_mov(s,tl);
3428     //gen_tlb_addr_r(ar,map);
3429     //emit_readword_indexed((int)rdram-0x80000000,tl,tl);
3430     #ifdef HOST_IMM_ADDR32
3431     if(c) emit_readword_tlb(constmap[i][s]+offset,map,tl);
3432     else
3433     #endif
3434     emit_readword_indexed_tlb(0,offset||c||s<0?tl:s,map,tl);
3435     type=LOADW_STUB;
3436   }
3437   if (opcode[i]==0x35) { // LDC1
3438     assert(th>=0);
3439     //if(s>=0&&!c&&!offset) emit_mov(s,tl);
3440     //gen_tlb_addr_r(ar,map);
3441     //emit_readword_indexed((int)rdram-0x80000000,tl,th);
3442     //emit_readword_indexed((int)rdram-0x7FFFFFFC,tl,tl);
3443     #ifdef HOST_IMM_ADDR32
3444     if(c) emit_readdword_tlb(constmap[i][s]+offset,map,th,tl);
3445     else
3446     #endif
3447     emit_readdword_indexed_tlb(0,offset||c||s<0?tl:s,map,th,tl);
3448     type=LOADD_STUB;
3449   }
3450   if (opcode[i]==0x39) { // SWC1
3451     //emit_writeword_indexed(tl,(int)rdram-0x80000000,temp);
3452     emit_writeword_indexed_tlb(tl,0,offset||c||s<0?temp:s,map,temp);
3453     type=STOREW_STUB;
3454   }
3455   if (opcode[i]==0x3D) { // SDC1
3456     assert(th>=0);
3457     //emit_writeword_indexed(th,(int)rdram-0x80000000,temp);
3458     //emit_writeword_indexed(tl,(int)rdram-0x7FFFFFFC,temp);
3459     emit_writedword_indexed_tlb(th,tl,0,offset||c||s<0?temp:s,map,temp);
3460     type=STORED_STUB;
3461   }
3462   if(!using_tlb) {
3463     if (opcode[i]==0x39||opcode[i]==0x3D) { // SWC1/SDC1
3464       #ifndef DESTRUCTIVE_SHIFT
3465       temp=offset||c||s<0?ar:s;
3466       #endif
3467       #if defined(HOST_IMM8)
3468       int ir=get_reg(i_regs->regmap,INVCP);
3469       assert(ir>=0);
3470       emit_cmpmem_indexedsr12_reg(ir,temp,1);
3471       #else
3472       emit_cmpmem_indexedsr12_imm((int)invalid_code,temp,1);
3473       #endif
3474       jaddr3=(int)out;
3475       emit_jne(0);
3476       add_stub(INVCODE_STUB,jaddr3,(int)out,reglist|(1<<HOST_CCREG),temp,0,0,0);
3477     }
3478   }
3479   if(jaddr2) add_stub(type,jaddr2,(int)out,i,offset||c||s<0?ar:s,(int)i_regs,ccadj[i],reglist);
3480   if (opcode[i]==0x31) { // LWC1 (write float)
3481     emit_writeword_indexed(tl,0,temp);
3482   }
3483   if (opcode[i]==0x35) { // LDC1 (write double)
3484     emit_writeword_indexed(th,4,temp);
3485     emit_writeword_indexed(tl,0,temp);
3486   }
3487   //if(opcode[i]==0x39)
3488   /*if(opcode[i]==0x39||opcode[i]==0x31)
3489   {
3490     emit_pusha();
3491         emit_readword((int)&last_count,ECX);
3492         if(get_reg(i_regs->regmap,CCREG)<0)
3493           emit_loadreg(CCREG,HOST_CCREG);
3494         emit_add(HOST_CCREG,ECX,HOST_CCREG);
3495         emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG);
3496         emit_writeword(HOST_CCREG,(int)&Count);
3497     emit_call((int)memdebug);
3498     emit_popa();
3499   }/**/
3500 }
3501
3502 #ifndef multdiv_assemble
3503 void multdiv_assemble(int i,struct regstat *i_regs)
3504 {
3505   printf("Need multdiv_assemble for this architecture.\n");
3506   exit(1);
3507 }
3508 #endif
3509
3510 void mov_assemble(int i,struct regstat *i_regs)
3511 {
3512   //if(opcode2[i]==0x10||opcode2[i]==0x12) { // MFHI/MFLO
3513   //if(opcode2[i]==0x11||opcode2[i]==0x13) { // MTHI/MTLO
3514   assert(rt1[i]>0);
3515   if(rt1[i]) {
3516     signed char sh,sl,th,tl;
3517     th=get_reg(i_regs->regmap,rt1[i]|64);
3518     tl=get_reg(i_regs->regmap,rt1[i]);
3519     //assert(tl>=0);
3520     if(tl>=0) {
3521       sh=get_reg(i_regs->regmap,rs1[i]|64);
3522       sl=get_reg(i_regs->regmap,rs1[i]);
3523       if(sl>=0) emit_mov(sl,tl);
3524       else emit_loadreg(rs1[i],tl);
3525       if(th>=0) {
3526         if(sh>=0) emit_mov(sh,th);
3527         else emit_loadreg(rs1[i]|64,th);
3528       }
3529     }
3530   }
3531 }
3532
3533 #ifndef fconv_assemble
3534 void fconv_assemble(int i,struct regstat *i_regs)
3535 {
3536   printf("Need fconv_assemble for this architecture.\n");
3537   exit(1);
3538 }
3539 #endif
3540
3541 #if 0
3542 void float_assemble(int i,struct regstat *i_regs)
3543 {
3544   printf("Need float_assemble for this architecture.\n");
3545   exit(1);
3546 }
3547 #endif
3548
3549 void syscall_assemble(int i,struct regstat *i_regs)
3550 {
3551   signed char ccreg=get_reg(i_regs->regmap,CCREG);
3552   assert(ccreg==HOST_CCREG);
3553   assert(!is_delayslot);
3554   emit_movimm(start+i*4,EAX); // Get PC
3555   emit_addimm(HOST_CCREG,CLOCK_DIVIDER*ccadj[i],HOST_CCREG); // CHECK: is this right?  There should probably be an extra cycle...
3556   emit_jmp((int)jump_syscall);
3557 }
3558
3559 void ds_assemble(int i,struct regstat *i_regs)
3560 {
3561   is_delayslot=1;
3562   switch(itype[i]) {
3563     case ALU:
3564       alu_assemble(i,i_regs);break;
3565     case IMM16:
3566       imm16_assemble(i,i_regs);break;
3567     case SHIFT:
3568       shift_assemble(i,i_regs);break;
3569     case SHIFTIMM:
3570       shiftimm_assemble(i,i_regs);break;
3571     case LOAD:
3572       load_assemble(i,i_regs);break;
3573     case LOADLR:
3574       loadlr_assemble(i,i_regs);break;
3575     case STORE:
3576       store_assemble(i,i_regs);break;
3577     case STORELR:
3578       storelr_assemble(i,i_regs);break;
3579     case COP0:
3580       cop0_assemble(i,i_regs);break;
3581     case COP1:
3582       cop1_assemble(i,i_regs);break;
3583     case C1LS:
3584       c1ls_assemble(i,i_regs);break;
3585     case FCONV:
3586       fconv_assemble(i,i_regs);break;
3587     case FLOAT:
3588       float_assemble(i,i_regs);break;
3589     case FCOMP:
3590       fcomp_assemble(i,i_regs);break;
3591     case MULTDIV:
3592       multdiv_assemble(i,i_regs);break;
3593     case MOV:
3594       mov_assemble(i,i_regs);break;
3595     case SYSCALL:
3596     case SPAN:
3597     case UJUMP:
3598     case RJUMP:
3599     case CJUMP:
3600     case SJUMP:
3601     case FJUMP:
3602       printf("Jump in the delay slot.  This is probably a bug.\n");
3603   }
3604   is_delayslot=0;
3605 }
3606
3607 // Is the branch target a valid internal jump?
3608 int internal_branch(uint64_t i_is32,int addr)
3609 {
3610   if(addr&1) return 0; // Indirect (register) jump
3611   if(addr>=start && addr<start+slen*4-4)
3612   {
3613     int t=(addr-start)>>2;
3614     // Delay slots are not valid branch targets
3615     //if(t>0&&(itype[t-1]==RJUMP||itype[t-1]==UJUMP||itype[t-1]==CJUMP||itype[t-1]==SJUMP||itype[t-1]==FJUMP)) return 0;
3616     // 64 -> 32 bit transition requires a recompile
3617     /*if(is32[t]&~unneeded_reg_upper[t]&~i_is32)
3618     {
3619       if(requires_32bit[t]&~i_is32) printf("optimizable: no\n");
3620       else printf("optimizable: yes\n");
3621     }*/
3622     //if(is32[t]&~unneeded_reg_upper[t]&~i_is32) return 0;
3623     if(requires_32bit[t]&~i_is32) return 0;
3624     else return 1;
3625   }
3626   return 0;
3627 }
3628
3629 #ifndef wb_invalidate
3630 void wb_invalidate(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,
3631   uint64_t u,uint64_t uu)
3632 {
3633   int hr;
3634   for(hr=0;hr<HOST_REGS;hr++) {
3635     if(hr!=EXCLUDE_REG) {
3636       if(pre[hr]!=entry[hr]) {
3637         if(pre[hr]>=0) {
3638           if((dirty>>hr)&1) {
3639             if(get_reg(entry,pre[hr])<0) {
3640               if(pre[hr]<64) {
3641                 if(!((u>>pre[hr])&1)) {
3642                   emit_storereg(pre[hr],hr);
3643                   if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
3644                     emit_sarimm(hr,31,hr);
3645                     emit_storereg(pre[hr]|64,hr);
3646                   }
3647                 }
3648               }else{
3649                 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
3650                   emit_storereg(pre[hr],hr);
3651                 }
3652               }
3653             }
3654           }
3655         }
3656       }
3657     }
3658   }
3659   // Move from one register to another (no writeback)
3660   for(hr=0;hr<HOST_REGS;hr++) {
3661     if(hr!=EXCLUDE_REG) {
3662       if(pre[hr]!=entry[hr]) {
3663         if(pre[hr]>=0&&(pre[hr]&63)<TEMPREG) {
3664           int nr;
3665           if((nr=get_reg(entry,pre[hr]))>=0) {
3666             emit_mov(hr,nr);
3667           }
3668         }
3669       }
3670     }
3671   }
3672 }
3673 #endif
3674
3675 // Load the specified registers
3676 // This only loads the registers given as arguments because
3677 // we don't want to load things that will be overwritten
3678 void load_regs(signed char entry[],signed char regmap[],int is32,int rs1,int rs2)
3679 {
3680   int hr;
3681   // Load 32-bit regs
3682   for(hr=0;hr<HOST_REGS;hr++) {
3683     if(hr!=EXCLUDE_REG&&regmap[hr]>=0) {
3684       if(entry[hr]!=regmap[hr]) {
3685         if(regmap[hr]==rs1||regmap[hr]==rs2)
3686         {
3687           if(regmap[hr]==0) {
3688             emit_zeroreg(hr);
3689           }
3690           else
3691           {
3692             emit_loadreg(regmap[hr],hr);
3693           }
3694         }
3695       }
3696     }
3697   }
3698   //Load 64-bit regs
3699   for(hr=0;hr<HOST_REGS;hr++) {
3700     if(hr!=EXCLUDE_REG&&regmap[hr]>=0) {
3701       if(entry[hr]!=regmap[hr]) {
3702         if(regmap[hr]-64==rs1||regmap[hr]-64==rs2)
3703         {
3704           assert(regmap[hr]!=64);
3705           if((is32>>(regmap[hr]&63))&1) {
3706             int lr=get_reg(regmap,regmap[hr]-64);
3707             if(lr>=0)
3708               emit_sarimm(lr,31,hr);
3709             else
3710               emit_loadreg(regmap[hr],hr);
3711           }
3712           else
3713           {
3714             emit_loadreg(regmap[hr],hr);
3715           }
3716         }
3717       }
3718     }
3719   }
3720 }
3721
3722 // Load registers prior to the start of a loop
3723 // so that they are not loaded within the loop
3724 static void loop_preload(signed char pre[],signed char entry[])
3725 {
3726   int hr;
3727   for(hr=0;hr<HOST_REGS;hr++) {
3728     if(hr!=EXCLUDE_REG) {
3729       if(pre[hr]!=entry[hr]) {
3730         if(entry[hr]>=0) {
3731           if(get_reg(pre,entry[hr])<0) {
3732             assem_debug("loop preload:\n");
3733             //printf("loop preload: %d\n",hr);
3734             if(entry[hr]==0) {
3735               emit_zeroreg(hr);
3736             }
3737             else if(entry[hr]<TEMPREG)
3738             {
3739               emit_loadreg(entry[hr],hr);
3740             }
3741             else if(entry[hr]-64<TEMPREG)
3742             {
3743               emit_loadreg(entry[hr],hr);
3744             }
3745           }
3746         }
3747       }
3748     }
3749   }
3750 }
3751
3752 // Generate address for load/store instruction
3753 void address_generation(int i,struct regstat *i_regs,signed char entry[])
3754 {
3755   if(itype[i]==LOAD||itype[i]==LOADLR||itype[i]==STORE||itype[i]==STORELR||itype[i]==C1LS) {
3756     int ra;
3757     int agr=AGEN1+(i&1);
3758     int mgr=MGEN1+(i&1);
3759     if(itype[i]==LOAD) {
3760       ra=get_reg(i_regs->regmap,rt1[i]);
3761       //if(rt1[i]) assert(ra>=0);
3762     }
3763     if(itype[i]==LOADLR) {
3764       ra=get_reg(i_regs->regmap,FTEMP);
3765     }
3766     if(itype[i]==STORE||itype[i]==STORELR) {
3767       ra=get_reg(i_regs->regmap,agr);
3768       if(ra<0) ra=get_reg(i_regs->regmap,-1);
3769     }
3770     if(itype[i]==C1LS) {
3771       if (opcode[i]==0x31||opcode[i]==0x35) // LWC1/LDC1
3772         ra=get_reg(i_regs->regmap,FTEMP);
3773       else { // SWC1/SDC1
3774         ra=get_reg(i_regs->regmap,agr);
3775         if(ra<0) ra=get_reg(i_regs->regmap,-1);
3776       }
3777     }
3778     int rs=get_reg(i_regs->regmap,rs1[i]);
3779     int rm=get_reg(i_regs->regmap,TLREG);
3780     if(ra>=0) {
3781       int offset=imm[i];
3782       int c=(i_regs->wasconst>>rs)&1;
3783       if(rs1[i]==0) {
3784         // Using r0 as a base address
3785         /*if(rm>=0) {
3786           if(!entry||entry[rm]!=mgr) {
3787             generate_map_const(offset,rm);
3788           } // else did it in the previous cycle
3789         }*/
3790         if(!entry||entry[ra]!=agr) {
3791           if (opcode[i]==0x22||opcode[i]==0x26) {
3792             emit_movimm(offset&0xFFFFFFFC,ra); // LWL/LWR
3793           }else if (opcode[i]==0x1a||opcode[i]==0x1b) {
3794             emit_movimm(offset&0xFFFFFFF8,ra); // LDL/LDR
3795           }else{
3796             emit_movimm(offset,ra);
3797           }
3798         } // else did it in the previous cycle
3799       }
3800       else if(rs<0) {
3801         if(!entry||entry[ra]!=rs1[i])
3802           emit_loadreg(rs1[i],ra);
3803         //if(!entry||entry[ra]!=rs1[i])
3804         //  printf("poor load scheduling!\n");
3805       }
3806       else if(c) {
3807         if(rm>=0) {
3808           if(!entry||entry[rm]!=mgr) {
3809             if(itype[i]==STORE||itype[i]==STORELR||opcode[i]==0x39||opcode[i]==0x3D) {
3810               // Stores to memory go thru the mapper to detect self-modifying
3811               // code, loads don't.
3812               if((unsigned int)(constmap[i][rs]+offset)>=0xC0000000 ||
3813                  (unsigned int)(constmap[i][rs]+offset)<0x80800000 )
3814                 generate_map_const(constmap[i][rs]+offset,rm);
3815             }else{
3816               if((signed int)(constmap[i][rs]+offset)>=(signed int)0xC0000000)
3817                 generate_map_const(constmap[i][rs]+offset,rm);
3818             }
3819           }
3820         }
3821         if(rs1[i]!=rt1[i]||itype[i]!=LOAD) {
3822           if(!entry||entry[ra]!=agr) {
3823             if (opcode[i]==0x22||opcode[i]==0x26) {
3824               emit_movimm((constmap[i][rs]+offset)&0xFFFFFFFC,ra); // LWL/LWR
3825             }else if (opcode[i]==0x1a||opcode[i]==0x1b) {
3826               emit_movimm((constmap[i][rs]+offset)&0xFFFFFFF8,ra); // LDL/LDR
3827             }else{
3828               #ifdef HOST_IMM_ADDR32
3829               if((itype[i]!=LOAD&&opcode[i]!=0x31&&opcode[i]!=0x35) ||
3830                  (using_tlb&&((signed int)constmap[i][rs]+offset)>=(signed int)0xC0000000))
3831               #endif
3832               emit_movimm(constmap[i][rs]+offset,ra);
3833             }
3834           } // else did it in the previous cycle
3835         } // else load_consts already did it
3836       }
3837       if(offset&&!c&&rs1[i]) {
3838         if(rs>=0) {
3839           emit_addimm(rs,offset,ra);
3840         }else{
3841           emit_addimm(ra,offset,ra);
3842         }
3843       }
3844     }
3845   }
3846   // Preload constants for next instruction
3847   if(itype[i+1]==LOAD||itype[i+1]==LOADLR||itype[i+1]==STORE||itype[i+1]==STORELR||itype[i+1]==C1LS) {
3848     int agr,ra;
3849     #ifndef HOST_IMM_ADDR32
3850     // Mapper entry
3851     agr=MGEN1+((i+1)&1);
3852     ra=get_reg(i_regs->regmap,agr);
3853     if(ra>=0) {
3854       int rs=get_reg(regs[i+1].regmap,rs1[i+1]);
3855       int offset=imm[i+1];
3856       int c=(regs[i+1].wasconst>>rs)&1;
3857       if(c) {
3858         if(itype[i+1]==STORE||itype[i+1]==STORELR||opcode[i+1]==0x39||opcode[i+1]==0x3D) {
3859           // Stores to memory go thru the mapper to detect self-modifying
3860           // code, loads don't.
3861           if((unsigned int)(constmap[i+1][rs]+offset)>=0xC0000000 ||
3862              (unsigned int)(constmap[i+1][rs]+offset)<0x80800000 )
3863             generate_map_const(constmap[i+1][rs]+offset,ra);
3864         }else{
3865           if((signed int)(constmap[i+1][rs]+offset)>=(signed int)0xC0000000)
3866             generate_map_const(constmap[i+1][rs]+offset,ra);
3867         }
3868       }
3869       /*else if(rs1[i]==0) {
3870         generate_map_const(offset,ra);
3871       }*/
3872     }
3873     #endif
3874     // Actual address
3875     agr=AGEN1+((i+1)&1);
3876     ra=get_reg(i_regs->regmap,agr);
3877     if(ra>=0) {
3878       int rs=get_reg(regs[i+1].regmap,rs1[i+1]);
3879       int offset=imm[i+1];
3880       int c=(regs[i+1].wasconst>>rs)&1;
3881       if(c&&(rs1[i+1]!=rt1[i+1]||itype[i+1]!=LOAD)) {
3882         if (opcode[i+1]==0x22||opcode[i+1]==0x26) {
3883           emit_movimm((constmap[i+1][rs]+offset)&0xFFFFFFFC,ra); // LWL/LWR
3884         }else if (opcode[i+1]==0x1a||opcode[i+1]==0x1b) {
3885           emit_movimm((constmap[i+1][rs]+offset)&0xFFFFFFF8,ra); // LDL/LDR
3886         }else{
3887           #ifdef HOST_IMM_ADDR32
3888           if((itype[i+1]!=LOAD&&opcode[i+1]!=0x31&&opcode[i+1]!=0x35) ||
3889              (using_tlb&&((signed int)constmap[i+1][rs]+offset)>=(signed int)0xC0000000))
3890           #endif
3891           emit_movimm(constmap[i+1][rs]+offset,ra);
3892         }
3893       }
3894       else if(rs1[i+1]==0) {
3895         // Using r0 as a base address
3896         if (opcode[i+1]==0x22||opcode[i+1]==0x26) {
3897           emit_movimm(offset&0xFFFFFFFC,ra); // LWL/LWR
3898         }else if (opcode[i+1]==0x1a||opcode[i+1]==0x1b) {
3899           emit_movimm(offset&0xFFFFFFF8,ra); // LDL/LDR
3900         }else{
3901           emit_movimm(offset,ra);
3902         }
3903       }
3904     }
3905   }
3906 }
3907
3908 int get_final_value(int hr, int i, int *value)
3909 {
3910   int reg=regs[i].regmap[hr];
3911   while(i<slen-1) {