drc: replace unused reg32 with new reg_sv_flags
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / new_dynarec.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - new_dynarec.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
57871462 4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21#include <stdlib.h>
22#include <stdint.h> //include for uint64_t
23#include <assert.h>
d848b60a 24#include <errno.h>
4600ba03 25#include <sys/mman.h>
57871462 26
3d624f89 27#include "emu_if.h" //emulator interface
57871462 28
4600ba03 29//#define DISASM
30//#define assem_debug printf
31//#define inv_debug printf
32#define assem_debug(...)
33#define inv_debug(...)
57871462 34
35#ifdef __i386__
36#include "assem_x86.h"
37#endif
38#ifdef __x86_64__
39#include "assem_x64.h"
40#endif
41#ifdef __arm__
42#include "assem_arm.h"
43#endif
44
f23d3386 45#ifdef __BLACKBERRY_QNX__
a4874585
C
46#undef __clear_cache
47#define __clear_cache(start,end) msync(start, (size_t)((void*)end - (void*)start), MS_SYNC | MS_CACHE_ONLY | MS_INVALIDATE_ICACHE);
c7b746f0 48#elif defined(__MACH__)
49#include <libkern/OSCacheControl.h>
50#define __clear_cache mach_clear_cache
51static void __clear_cache(void *start, void *end) {
52 size_t len = (char *)end - (char *)start;
53 sys_dcache_flush(start, len);
54 sys_icache_invalidate(start, len);
55}
f23d3386 56#endif
a4874585 57
57871462 58#define MAXBLOCK 4096
59#define MAX_OUTPUT_BLOCK_SIZE 262144
2573466a 60
57871462 61struct regstat
62{
63 signed char regmap_entry[HOST_REGS];
64 signed char regmap[HOST_REGS];
65 uint64_t was32;
66 uint64_t is32;
67 uint64_t wasdirty;
68 uint64_t dirty;
69 uint64_t u;
70 uint64_t uu;
71 u_int wasconst;
72 u_int isconst;
8575a877 73 u_int loadedconst; // host regs that have constants loaded
74 u_int waswritten; // MIPS regs that were used as store base before
57871462 75};
76
de5a60c3 77// note: asm depends on this layout
57871462 78struct ll_entry
79{
80 u_int vaddr;
de5a60c3 81 u_int reg_sv_flags;
57871462 82 void *addr;
83 struct ll_entry *next;
84};
85
86 u_int start;
87 u_int *source;
88 u_int pagelimit;
89 char insn[MAXBLOCK][10];
90 u_char itype[MAXBLOCK];
91 u_char opcode[MAXBLOCK];
92 u_char opcode2[MAXBLOCK];
93 u_char bt[MAXBLOCK];
94 u_char rs1[MAXBLOCK];
95 u_char rs2[MAXBLOCK];
96 u_char rt1[MAXBLOCK];
97 u_char rt2[MAXBLOCK];
98 u_char us1[MAXBLOCK];
99 u_char us2[MAXBLOCK];
100 u_char dep1[MAXBLOCK];
101 u_char dep2[MAXBLOCK];
102 u_char lt1[MAXBLOCK];
bedfea38 103 static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs
104 static uint64_t gte_rt[MAXBLOCK];
105 static uint64_t gte_unneeded[MAXBLOCK];
ffb0b9e0 106 static u_int smrv[32]; // speculated MIPS register values
107 static u_int smrv_strong; // mask or regs that are likely to have correct values
108 static u_int smrv_weak; // same, but somewhat less likely
109 static u_int smrv_strong_next; // same, but after current insn executes
110 static u_int smrv_weak_next;
57871462 111 int imm[MAXBLOCK];
112 u_int ba[MAXBLOCK];
113 char likely[MAXBLOCK];
114 char is_ds[MAXBLOCK];
e1190b87 115 char ooo[MAXBLOCK];
57871462 116 uint64_t unneeded_reg[MAXBLOCK];
117 uint64_t unneeded_reg_upper[MAXBLOCK];
118 uint64_t branch_unneeded_reg[MAXBLOCK];
119 uint64_t branch_unneeded_reg_upper[MAXBLOCK];
120 uint64_t p32[MAXBLOCK];
121 uint64_t pr32[MAXBLOCK];
122 signed char regmap_pre[MAXBLOCK][HOST_REGS];
956f3129 123 static uint64_t current_constmap[HOST_REGS];
124 static uint64_t constmap[MAXBLOCK][HOST_REGS];
125 static struct regstat regs[MAXBLOCK];
126 static struct regstat branch_regs[MAXBLOCK];
e1190b87 127 signed char minimum_free_regs[MAXBLOCK];
57871462 128 u_int needed_reg[MAXBLOCK];
129 uint64_t requires_32bit[MAXBLOCK];
130 u_int wont_dirty[MAXBLOCK];
131 u_int will_dirty[MAXBLOCK];
132 int ccadj[MAXBLOCK];
133 int slen;
134 u_int instr_addr[MAXBLOCK];
135 u_int link_addr[MAXBLOCK][3];
136 int linkcount;
137 u_int stubs[MAXBLOCK*3][8];
138 int stubcount;
139 u_int literals[1024][2];
140 int literalcount;
141 int is_delayslot;
142 int cop1_usable;
143 u_char *out;
de5a60c3 144 struct ll_entry *jump_in[4096] __attribute__((aligned(16)));
57871462 145 struct ll_entry *jump_out[4096];
146 struct ll_entry *jump_dirty[4096];
147 u_int hash_table[65536][4] __attribute__((aligned(16)));
148 char shadow[1048576] __attribute__((aligned(16)));
149 void *copy;
150 int expirep;
af4ee1fe 151#ifndef PCSX
57871462 152 u_int using_tlb;
af4ee1fe 153#else
154 static const u_int using_tlb=0;
155#endif
2f546f9a 156 int new_dynarec_did_compile;
0ff8c62c 157 int new_dynarec_hacks;
57871462 158 u_int stop_after_jal;
a327ad27 159#ifndef RAM_FIXED
160 static u_int ram_offset;
161#else
162 static const u_int ram_offset=0;
163#endif
57871462 164 extern u_char restore_candidate[512];
165 extern int cycle_count;
166
167 /* registers that may be allocated */
168 /* 1-31 gpr */
169#define HIREG 32 // hi
170#define LOREG 33 // lo
171#define FSREG 34 // FPU status (FCSR)
172#define CSREG 35 // Coprocessor status
173#define CCREG 36 // Cycle count
174#define INVCP 37 // Pointer to invalid_code
619e5ded 175#define MMREG 38 // Pointer to memory_map
176#define ROREG 39 // ram offset (if rdram!=0x80000000)
177#define TEMPREG 40
178#define FTEMP 40 // FPU temporary register
179#define PTEMP 41 // Prefetch temporary register
180#define TLREG 42 // TLB mapping offset
181#define RHASH 43 // Return address hash
182#define RHTBL 44 // Return address hash table address
183#define RTEMP 45 // JR/JALR address register
184#define MAXREG 45
185#define AGEN1 46 // Address generation temporary register
186#define AGEN2 47 // Address generation temporary register
187#define MGEN1 48 // Maptable address generation temporary register
188#define MGEN2 49 // Maptable address generation temporary register
189#define BTREG 50 // Branch target temporary register
57871462 190
191 /* instruction types */
192#define NOP 0 // No operation
193#define LOAD 1 // Load
194#define STORE 2 // Store
195#define LOADLR 3 // Unaligned load
196#define STORELR 4 // Unaligned store
197#define MOV 5 // Move
198#define ALU 6 // Arithmetic/logic
199#define MULTDIV 7 // Multiply/divide
200#define SHIFT 8 // Shift by register
201#define SHIFTIMM 9// Shift by immediate
202#define IMM16 10 // 16-bit immediate
203#define RJUMP 11 // Unconditional jump to register
204#define UJUMP 12 // Unconditional jump
205#define CJUMP 13 // Conditional branch (BEQ/BNE/BGTZ/BLEZ)
206#define SJUMP 14 // Conditional branch (regimm format)
207#define COP0 15 // Coprocessor 0
208#define COP1 16 // Coprocessor 1
209#define C1LS 17 // Coprocessor 1 load/store
210#define FJUMP 18 // Conditional branch (floating point)
211#define FLOAT 19 // Floating point unit
212#define FCONV 20 // Convert integer to float
213#define FCOMP 21 // Floating point compare (sets FSREG)
214#define SYSCALL 22// SYSCALL
215#define OTHER 23 // Other
216#define SPAN 24 // Branch/delay slot spans 2 pages
217#define NI 25 // Not implemented
7139f3c8 218#define HLECALL 26// PCSX fake opcodes for HLE
b9b61529 219#define COP2 27 // Coprocessor 2 move
220#define C2LS 28 // Coprocessor 2 load/store
221#define C2OP 29 // Coprocessor 2 operation
1e973cb0 222#define INTCALL 30// Call interpreter to handle rare corner cases
57871462 223
224 /* stubs */
225#define CC_STUB 1
226#define FP_STUB 2
227#define LOADB_STUB 3
228#define LOADH_STUB 4
229#define LOADW_STUB 5
230#define LOADD_STUB 6
231#define LOADBU_STUB 7
232#define LOADHU_STUB 8
233#define STOREB_STUB 9
234#define STOREH_STUB 10
235#define STOREW_STUB 11
236#define STORED_STUB 12
237#define STORELR_STUB 13
238#define INVCODE_STUB 14
239
240 /* branch codes */
241#define TAKEN 1
242#define NOTTAKEN 2
243#define NULLDS 3
244
245// asm linkage
246int new_recompile_block(int addr);
247void *get_addr_ht(u_int vaddr);
248void invalidate_block(u_int block);
249void invalidate_addr(u_int addr);
250void remove_hash(int vaddr);
251void jump_vaddr();
252void dyna_linker();
253void dyna_linker_ds();
254void verify_code();
255void verify_code_vm();
256void verify_code_ds();
257void cc_interrupt();
258void fp_exception();
259void fp_exception_ds();
260void jump_syscall();
7139f3c8 261void jump_syscall_hle();
57871462 262void jump_eret();
7139f3c8 263void jump_hlecall();
1e973cb0 264void jump_intcall();
7139f3c8 265void new_dyna_leave();
57871462 266
267// TLB
268void TLBWI_new();
269void TLBWR_new();
270void read_nomem_new();
271void read_nomemb_new();
272void read_nomemh_new();
273void read_nomemd_new();
274void write_nomem_new();
275void write_nomemb_new();
276void write_nomemh_new();
277void write_nomemd_new();
278void write_rdram_new();
279void write_rdramb_new();
280void write_rdramh_new();
281void write_rdramd_new();
282extern u_int memory_map[1048576];
283
284// Needed by assembler
285void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32);
286void wb_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty);
287void wb_needed_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr);
288void load_all_regs(signed char i_regmap[]);
289void load_needed_regs(signed char i_regmap[],signed char next_regmap[]);
290void load_regs_entry(int t);
291void load_all_consts(signed char regmap[],int is32,u_int dirty,int i);
292
293int tracedebug=0;
294
295//#define DEBUG_CYCLE_COUNT 1
296
b6e87b2b 297#define NO_CYCLE_PENALTY_THR 12
298
4e9dcd7f 299int cycle_multiplier; // 100 for 1.0
300
301static int CLOCK_ADJUST(int x)
302{
303 int s=(x>>31)|1;
304 return (x * cycle_multiplier + s * 50) / 100;
305}
306
94d23bb9 307static void tlb_hacks()
57871462 308{
94d23bb9 309#ifndef DISABLE_TLB
57871462 310 // Goldeneye hack
311 if (strncmp((char *) ROM_HEADER->nom, "GOLDENEYE",9) == 0)
312 {
313 u_int addr;
314 int n;
315 switch (ROM_HEADER->Country_code&0xFF)
316 {
317 case 0x45: // U
318 addr=0x34b30;
319 break;
320 case 0x4A: // J
321 addr=0x34b70;
322 break;
323 case 0x50: // E
324 addr=0x329f0;
325 break;
326 default:
327 // Unknown country code
328 addr=0;
329 break;
330 }
331 u_int rom_addr=(u_int)rom;
332 #ifdef ROM_COPY
333 // Since memory_map is 32-bit, on 64-bit systems the rom needs to be
334 // in the lower 4G of memory to use this hack. Copy it if necessary.
335 if((void *)rom>(void *)0xffffffff) {
336 munmap(ROM_COPY, 67108864);
337 if(mmap(ROM_COPY, 12582912,
338 PROT_READ | PROT_WRITE,
339 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
340 -1, 0) <= 0) {printf("mmap() failed\n");}
341 memcpy(ROM_COPY,rom,12582912);
342 rom_addr=(u_int)ROM_COPY;
343 }
344 #endif
345 if(addr) {
346 for(n=0x7F000;n<0x80000;n++) {
347 memory_map[n]=(((u_int)(rom_addr+addr-0x7F000000))>>2)|0x40000000;
348 }
349 }
350 }
94d23bb9 351#endif
57871462 352}
353
94d23bb9 354static u_int get_page(u_int vaddr)
57871462 355{
0ce47d46 356#ifndef PCSX
57871462 357 u_int page=(vaddr^0x80000000)>>12;
0ce47d46 358#else
359 u_int page=vaddr&~0xe0000000;
360 if (page < 0x1000000)
361 page &= ~0x0e00000; // RAM mirrors
362 page>>=12;
363#endif
94d23bb9 364#ifndef DISABLE_TLB
57871462 365 if(page>262143&&tlb_LUT_r[vaddr>>12]) page=(tlb_LUT_r[vaddr>>12]^0x80000000)>>12;
94d23bb9 366#endif
57871462 367 if(page>2048) page=2048+(page&2047);
94d23bb9 368 return page;
369}
370
d25604ca 371#ifndef PCSX
94d23bb9 372static u_int get_vpage(u_int vaddr)
373{
374 u_int vpage=(vaddr^0x80000000)>>12;
375#ifndef DISABLE_TLB
57871462 376 if(vpage>262143&&tlb_LUT_r[vaddr>>12]) vpage&=2047; // jump_dirty uses a hash of the virtual address instead
94d23bb9 377#endif
57871462 378 if(vpage>2048) vpage=2048+(vpage&2047);
94d23bb9 379 return vpage;
380}
d25604ca 381#else
382// no virtual mem in PCSX
383static u_int get_vpage(u_int vaddr)
384{
385 return get_page(vaddr);
386}
387#endif
94d23bb9 388
389// Get address from virtual address
390// This is called from the recompiled JR/JALR instructions
391void *get_addr(u_int vaddr)
392{
393 u_int page=get_page(vaddr);
394 u_int vpage=get_vpage(vaddr);
57871462 395 struct ll_entry *head;
396 //printf("TRACE: count=%d next=%d (get_addr %x,page %d)\n",Count,next_interupt,vaddr,page);
397 head=jump_in[page];
398 while(head!=NULL) {
de5a60c3 399 if(head->vaddr==vaddr) {
57871462 400 //printf("TRACE: count=%d next=%d (get_addr match %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr);
401 int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
402 ht_bin[3]=ht_bin[1];
403 ht_bin[2]=ht_bin[0];
404 ht_bin[1]=(int)head->addr;
405 ht_bin[0]=vaddr;
406 return head->addr;
407 }
408 head=head->next;
409 }
410 head=jump_dirty[vpage];
411 while(head!=NULL) {
de5a60c3 412 if(head->vaddr==vaddr) {
57871462 413 //printf("TRACE: count=%d next=%d (get_addr match dirty %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr);
414 // Don't restore blocks which are about to expire from the cache
415 if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
416 if(verify_dirty(head->addr)) {
417 //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]);
418 invalid_code[vaddr>>12]=0;
9be4ba64 419 inv_code_start=inv_code_end=~0;
63cb0298 420#ifndef DISABLE_TLB
57871462 421 memory_map[vaddr>>12]|=0x40000000;
63cb0298 422#endif
57871462 423 if(vpage<2048) {
94d23bb9 424#ifndef DISABLE_TLB
57871462 425 if(tlb_LUT_r[vaddr>>12]) {
426 invalid_code[tlb_LUT_r[vaddr>>12]>>12]=0;
427 memory_map[tlb_LUT_r[vaddr>>12]>>12]|=0x40000000;
428 }
94d23bb9 429#endif
57871462 430 restore_candidate[vpage>>3]|=1<<(vpage&7);
431 }
432 else restore_candidate[page>>3]|=1<<(page&7);
433 int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
434 if(ht_bin[0]==vaddr) {
435 ht_bin[1]=(int)head->addr; // Replace existing entry
436 }
437 else
438 {
439 ht_bin[3]=ht_bin[1];
440 ht_bin[2]=ht_bin[0];
441 ht_bin[1]=(int)head->addr;
442 ht_bin[0]=vaddr;
443 }
444 return head->addr;
445 }
446 }
447 head=head->next;
448 }
449 //printf("TRACE: count=%d next=%d (get_addr no-match %x)\n",Count,next_interupt,vaddr);
450 int r=new_recompile_block(vaddr);
451 if(r==0) return get_addr(vaddr);
452 // Execute in unmapped page, generate pagefault execption
453 Status|=2;
454 Cause=(vaddr<<31)|0x8;
455 EPC=(vaddr&1)?vaddr-5:vaddr;
456 BadVAddr=(vaddr&~1);
457 Context=(Context&0xFF80000F)|((BadVAddr>>9)&0x007FFFF0);
458 EntryHi=BadVAddr&0xFFFFE000;
459 return get_addr_ht(0x80000000);
460}
461// Look up address in hash table first
462void *get_addr_ht(u_int vaddr)
463{
464 //printf("TRACE: count=%d next=%d (get_addr_ht %x)\n",Count,next_interupt,vaddr);
465 int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
466 if(ht_bin[0]==vaddr) return (void *)ht_bin[1];
467 if(ht_bin[2]==vaddr) return (void *)ht_bin[3];
468 return get_addr(vaddr);
469}
470
57871462 471void clear_all_regs(signed char regmap[])
472{
473 int hr;
474 for (hr=0;hr<HOST_REGS;hr++) regmap[hr]=-1;
475}
476
477signed char get_reg(signed char regmap[],int r)
478{
479 int hr;
480 for (hr=0;hr<HOST_REGS;hr++) if(hr!=EXCLUDE_REG&&regmap[hr]==r) return hr;
481 return -1;
482}
483
484// Find a register that is available for two consecutive cycles
485signed char get_reg2(signed char regmap1[],signed char regmap2[],int r)
486{
487 int hr;
488 for (hr=0;hr<HOST_REGS;hr++) if(hr!=EXCLUDE_REG&&regmap1[hr]==r&&regmap2[hr]==r) return hr;
489 return -1;
490}
491
492int count_free_regs(signed char regmap[])
493{
494 int count=0;
495 int hr;
496 for(hr=0;hr<HOST_REGS;hr++)
497 {
498 if(hr!=EXCLUDE_REG) {
499 if(regmap[hr]<0) count++;
500 }
501 }
502 return count;
503}
504
505void dirty_reg(struct regstat *cur,signed char reg)
506{
507 int hr;
508 if(!reg) return;
509 for (hr=0;hr<HOST_REGS;hr++) {
510 if((cur->regmap[hr]&63)==reg) {
511 cur->dirty|=1<<hr;
512 }
513 }
514}
515
516// If we dirty the lower half of a 64 bit register which is now being
517// sign-extended, we need to dump the upper half.
518// Note: Do this only after completion of the instruction, because
519// some instructions may need to read the full 64-bit value even if
520// overwriting it (eg SLTI, DSRA32).
521static void flush_dirty_uppers(struct regstat *cur)
522{
523 int hr,reg;
524 for (hr=0;hr<HOST_REGS;hr++) {
525 if((cur->dirty>>hr)&1) {
526 reg=cur->regmap[hr];
527 if(reg>=64)
528 if((cur->is32>>(reg&63))&1) cur->regmap[hr]=-1;
529 }
530 }
531}
532
533void set_const(struct regstat *cur,signed char reg,uint64_t value)
534{
535 int hr;
536 if(!reg) return;
537 for (hr=0;hr<HOST_REGS;hr++) {
538 if(cur->regmap[hr]==reg) {
539 cur->isconst|=1<<hr;
956f3129 540 current_constmap[hr]=value;
57871462 541 }
542 else if((cur->regmap[hr]^64)==reg) {
543 cur->isconst|=1<<hr;
956f3129 544 current_constmap[hr]=value>>32;
57871462 545 }
546 }
547}
548
549void clear_const(struct regstat *cur,signed char reg)
550{
551 int hr;
552 if(!reg) return;
553 for (hr=0;hr<HOST_REGS;hr++) {
554 if((cur->regmap[hr]&63)==reg) {
555 cur->isconst&=~(1<<hr);
556 }
557 }
558}
559
560int is_const(struct regstat *cur,signed char reg)
561{
562 int hr;
79c75f1b 563 if(reg<0) return 0;
57871462 564 if(!reg) return 1;
565 for (hr=0;hr<HOST_REGS;hr++) {
566 if((cur->regmap[hr]&63)==reg) {
567 return (cur->isconst>>hr)&1;
568 }
569 }
570 return 0;
571}
572uint64_t get_const(struct regstat *cur,signed char reg)
573{
574 int hr;
575 if(!reg) return 0;
576 for (hr=0;hr<HOST_REGS;hr++) {
577 if(cur->regmap[hr]==reg) {
956f3129 578 return current_constmap[hr];
57871462 579 }
580 }
c43b5311 581 SysPrintf("Unknown constant in r%d\n",reg);
57871462 582 exit(1);
583}
584
585// Least soon needed registers
586// Look at the next ten instructions and see which registers
587// will be used. Try not to reallocate these.
588void lsn(u_char hsn[], int i, int *preferred_reg)
589{
590 int j;
591 int b=-1;
592 for(j=0;j<9;j++)
593 {
594 if(i+j>=slen) {
595 j=slen-i-1;
596 break;
597 }
598 if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
599 {
600 // Don't go past an unconditonal jump
601 j++;
602 break;
603 }
604 }
605 for(;j>=0;j--)
606 {
607 if(rs1[i+j]) hsn[rs1[i+j]]=j;
608 if(rs2[i+j]) hsn[rs2[i+j]]=j;
609 if(rt1[i+j]) hsn[rt1[i+j]]=j;
610 if(rt2[i+j]) hsn[rt2[i+j]]=j;
611 if(itype[i+j]==STORE || itype[i+j]==STORELR) {
612 // Stores can allocate zero
613 hsn[rs1[i+j]]=j;
614 hsn[rs2[i+j]]=j;
615 }
616 // On some architectures stores need invc_ptr
617 #if defined(HOST_IMM8)
b9b61529 618 if(itype[i+j]==STORE || itype[i+j]==STORELR || (opcode[i+j]&0x3b)==0x39 || (opcode[i+j]&0x3b)==0x3a) {
57871462 619 hsn[INVCP]=j;
620 }
621 #endif
622 if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP||itype[i+j]==FJUMP))
623 {
624 hsn[CCREG]=j;
625 b=j;
626 }
627 }
628 if(b>=0)
629 {
630 if(ba[i+b]>=start && ba[i+b]<(start+slen*4))
631 {
632 // Follow first branch
633 int t=(ba[i+b]-start)>>2;
634 j=7-b;if(t+j>=slen) j=slen-t-1;
635 for(;j>=0;j--)
636 {
637 if(rs1[t+j]) if(hsn[rs1[t+j]]>j+b+2) hsn[rs1[t+j]]=j+b+2;
638 if(rs2[t+j]) if(hsn[rs2[t+j]]>j+b+2) hsn[rs2[t+j]]=j+b+2;
639 //if(rt1[t+j]) if(hsn[rt1[t+j]]>j+b+2) hsn[rt1[t+j]]=j+b+2;
640 //if(rt2[t+j]) if(hsn[rt2[t+j]]>j+b+2) hsn[rt2[t+j]]=j+b+2;
641 }
642 }
643 // TODO: preferred register based on backward branch
644 }
645 // Delay slot should preferably not overwrite branch conditions or cycle count
646 if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP)) {
647 if(rs1[i-1]) if(hsn[rs1[i-1]]>1) hsn[rs1[i-1]]=1;
648 if(rs2[i-1]) if(hsn[rs2[i-1]]>1) hsn[rs2[i-1]]=1;
649 hsn[CCREG]=1;
650 // ...or hash tables
651 hsn[RHASH]=1;
652 hsn[RHTBL]=1;
653 }
654 // Coprocessor load/store needs FTEMP, even if not declared
b9b61529 655 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 656 hsn[FTEMP]=0;
657 }
658 // Load L/R also uses FTEMP as a temporary register
659 if(itype[i]==LOADLR) {
660 hsn[FTEMP]=0;
661 }
b7918751 662 // Also SWL/SWR/SDL/SDR
663 if(opcode[i]==0x2a||opcode[i]==0x2e||opcode[i]==0x2c||opcode[i]==0x2d) {
57871462 664 hsn[FTEMP]=0;
665 }
666 // Don't remove the TLB registers either
b9b61529 667 if(itype[i]==LOAD || itype[i]==LOADLR || itype[i]==STORE || itype[i]==STORELR || itype[i]==C1LS || itype[i]==C2LS) {
57871462 668 hsn[TLREG]=0;
669 }
670 // Don't remove the miniht registers
671 if(itype[i]==UJUMP||itype[i]==RJUMP)
672 {
673 hsn[RHASH]=0;
674 hsn[RHTBL]=0;
675 }
676}
677
678// We only want to allocate registers if we're going to use them again soon
679int needed_again(int r, int i)
680{
681 int j;
682 int b=-1;
683 int rn=10;
57871462 684
685 if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000))
686 {
687 if(ba[i-1]<start || ba[i-1]>start+slen*4-4)
688 return 0; // Don't need any registers if exiting the block
689 }
690 for(j=0;j<9;j++)
691 {
692 if(i+j>=slen) {
693 j=slen-i-1;
694 break;
695 }
696 if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
697 {
698 // Don't go past an unconditonal jump
699 j++;
700 break;
701 }
1e973cb0 702 if(itype[i+j]==SYSCALL||itype[i+j]==HLECALL||itype[i+j]==INTCALL||((source[i+j]&0xfc00003f)==0x0d))
57871462 703 {
704 break;
705 }
706 }
707 for(;j>=1;j--)
708 {
709 if(rs1[i+j]==r) rn=j;
710 if(rs2[i+j]==r) rn=j;
711 if((unneeded_reg[i+j]>>r)&1) rn=10;
712 if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP||itype[i+j]==FJUMP))
713 {
714 b=j;
715 }
716 }
717 /*
718 if(b>=0)
719 {
720 if(ba[i+b]>=start && ba[i+b]<(start+slen*4))
721 {
722 // Follow first branch
723 int o=rn;
724 int t=(ba[i+b]-start)>>2;
725 j=7-b;if(t+j>=slen) j=slen-t-1;
726 for(;j>=0;j--)
727 {
728 if(!((unneeded_reg[t+j]>>r)&1)) {
729 if(rs1[t+j]==r) if(rn>j+b+2) rn=j+b+2;
730 if(rs2[t+j]==r) if(rn>j+b+2) rn=j+b+2;
731 }
732 else rn=o;
733 }
734 }
735 }*/
b7217e13 736 if(rn<10) return 1;
57871462 737 return 0;
738}
739
740// Try to match register allocations at the end of a loop with those
741// at the beginning
742int loop_reg(int i, int r, int hr)
743{
744 int j,k;
745 for(j=0;j<9;j++)
746 {
747 if(i+j>=slen) {
748 j=slen-i-1;
749 break;
750 }
751 if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
752 {
753 // Don't go past an unconditonal jump
754 j++;
755 break;
756 }
757 }
758 k=0;
759 if(i>0){
760 if(itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP)
761 k--;
762 }
763 for(;k<j;k++)
764 {
765 if(r<64&&((unneeded_reg[i+k]>>r)&1)) return hr;
766 if(r>64&&((unneeded_reg_upper[i+k]>>r)&1)) return hr;
767 if(i+k>=0&&(itype[i+k]==UJUMP||itype[i+k]==CJUMP||itype[i+k]==SJUMP||itype[i+k]==FJUMP))
768 {
769 if(ba[i+k]>=start && ba[i+k]<(start+i*4))
770 {
771 int t=(ba[i+k]-start)>>2;
772 int reg=get_reg(regs[t].regmap_entry,r);
773 if(reg>=0) return reg;
774 //reg=get_reg(regs[t+1].regmap_entry,r);
775 //if(reg>=0) return reg;
776 }
777 }
778 }
779 return hr;
780}
781
782
783// Allocate every register, preserving source/target regs
784void alloc_all(struct regstat *cur,int i)
785{
786 int hr;
787
788 for(hr=0;hr<HOST_REGS;hr++) {
789 if(hr!=EXCLUDE_REG) {
790 if(((cur->regmap[hr]&63)!=rs1[i])&&((cur->regmap[hr]&63)!=rs2[i])&&
791 ((cur->regmap[hr]&63)!=rt1[i])&&((cur->regmap[hr]&63)!=rt2[i]))
792 {
793 cur->regmap[hr]=-1;
794 cur->dirty&=~(1<<hr);
795 }
796 // Don't need zeros
797 if((cur->regmap[hr]&63)==0)
798 {
799 cur->regmap[hr]=-1;
800 cur->dirty&=~(1<<hr);
801 }
802 }
803 }
804}
805
4600ba03 806#ifndef FORCE32
57871462 807void div64(int64_t dividend,int64_t divisor)
808{
809 lo=dividend/divisor;
810 hi=dividend%divisor;
811 //printf("TRACE: ddiv %8x%8x %8x%8x\n" ,(int)reg[HIREG],(int)(reg[HIREG]>>32)
812 // ,(int)reg[LOREG],(int)(reg[LOREG]>>32));
813}
814void divu64(uint64_t dividend,uint64_t divisor)
815{
816 lo=dividend/divisor;
817 hi=dividend%divisor;
818 //printf("TRACE: ddivu %8x%8x %8x%8x\n",(int)reg[HIREG],(int)(reg[HIREG]>>32)
819 // ,(int)reg[LOREG],(int)(reg[LOREG]>>32));
820}
821
822void mult64(uint64_t m1,uint64_t m2)
823{
824 unsigned long long int op1, op2, op3, op4;
825 unsigned long long int result1, result2, result3, result4;
826 unsigned long long int temp1, temp2, temp3, temp4;
827 int sign = 0;
828
829 if (m1 < 0)
830 {
831 op2 = -m1;
832 sign = 1 - sign;
833 }
834 else op2 = m1;
835 if (m2 < 0)
836 {
837 op4 = -m2;
838 sign = 1 - sign;
839 }
840 else op4 = m2;
841
842 op1 = op2 & 0xFFFFFFFF;
843 op2 = (op2 >> 32) & 0xFFFFFFFF;
844 op3 = op4 & 0xFFFFFFFF;
845 op4 = (op4 >> 32) & 0xFFFFFFFF;
846
847 temp1 = op1 * op3;
848 temp2 = (temp1 >> 32) + op1 * op4;
849 temp3 = op2 * op3;
850 temp4 = (temp3 >> 32) + op2 * op4;
851
852 result1 = temp1 & 0xFFFFFFFF;
853 result2 = temp2 + (temp3 & 0xFFFFFFFF);
854 result3 = (result2 >> 32) + temp4;
855 result4 = (result3 >> 32);
856
857 lo = result1 | (result2 << 32);
858 hi = (result3 & 0xFFFFFFFF) | (result4 << 32);
859 if (sign)
860 {
861 hi = ~hi;
862 if (!lo) hi++;
863 else lo = ~lo + 1;
864 }
865}
866
867void multu64(uint64_t m1,uint64_t m2)
868{
869 unsigned long long int op1, op2, op3, op4;
870 unsigned long long int result1, result2, result3, result4;
871 unsigned long long int temp1, temp2, temp3, temp4;
872
873 op1 = m1 & 0xFFFFFFFF;
874 op2 = (m1 >> 32) & 0xFFFFFFFF;
875 op3 = m2 & 0xFFFFFFFF;
876 op4 = (m2 >> 32) & 0xFFFFFFFF;
877
878 temp1 = op1 * op3;
879 temp2 = (temp1 >> 32) + op1 * op4;
880 temp3 = op2 * op3;
881 temp4 = (temp3 >> 32) + op2 * op4;
882
883 result1 = temp1 & 0xFFFFFFFF;
884 result2 = temp2 + (temp3 & 0xFFFFFFFF);
885 result3 = (result2 >> 32) + temp4;
886 result4 = (result3 >> 32);
887
888 lo = result1 | (result2 << 32);
889 hi = (result3 & 0xFFFFFFFF) | (result4 << 32);
890
891 //printf("TRACE: dmultu %8x%8x %8x%8x\n",(int)reg[HIREG],(int)(reg[HIREG]>>32)
892 // ,(int)reg[LOREG],(int)(reg[LOREG]>>32));
893}
894
895uint64_t ldl_merge(uint64_t original,uint64_t loaded,u_int bits)
896{
897 if(bits) {
898 original<<=64-bits;
899 original>>=64-bits;
900 loaded<<=bits;
901 original|=loaded;
902 }
903 else original=loaded;
904 return original;
905}
906uint64_t ldr_merge(uint64_t original,uint64_t loaded,u_int bits)
907{
908 if(bits^56) {
909 original>>=64-(bits^56);
910 original<<=64-(bits^56);
911 loaded>>=bits^56;
912 original|=loaded;
913 }
914 else original=loaded;
915 return original;
916}
4600ba03 917#endif
57871462 918
919#ifdef __i386__
920#include "assem_x86.c"
921#endif
922#ifdef __x86_64__
923#include "assem_x64.c"
924#endif
925#ifdef __arm__
926#include "assem_arm.c"
927#endif
928
929// Add virtual address mapping to linked list
930void ll_add(struct ll_entry **head,int vaddr,void *addr)
931{
932 struct ll_entry *new_entry;
933 new_entry=malloc(sizeof(struct ll_entry));
934 assert(new_entry!=NULL);
935 new_entry->vaddr=vaddr;
de5a60c3 936 new_entry->reg_sv_flags=0;
57871462 937 new_entry->addr=addr;
938 new_entry->next=*head;
939 *head=new_entry;
940}
941
de5a60c3 942void ll_add_flags(struct ll_entry **head,int vaddr,u_int reg_sv_flags,void *addr)
57871462 943{
7139f3c8 944 ll_add(head,vaddr,addr);
de5a60c3 945 (*head)->reg_sv_flags=reg_sv_flags;
57871462 946}
947
948// Check if an address is already compiled
949// but don't return addresses which are about to expire from the cache
950void *check_addr(u_int vaddr)
951{
952 u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
953 if(ht_bin[0]==vaddr) {
954 if(((ht_bin[1]-MAX_OUTPUT_BLOCK_SIZE-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
955 if(isclean(ht_bin[1])) return (void *)ht_bin[1];
956 }
957 if(ht_bin[2]==vaddr) {
958 if(((ht_bin[3]-MAX_OUTPUT_BLOCK_SIZE-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
959 if(isclean(ht_bin[3])) return (void *)ht_bin[3];
960 }
94d23bb9 961 u_int page=get_page(vaddr);
57871462 962 struct ll_entry *head;
963 head=jump_in[page];
964 while(head!=NULL) {
de5a60c3 965 if(head->vaddr==vaddr) {
57871462 966 if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) {
967 // Update existing entry with current address
968 if(ht_bin[0]==vaddr) {
969 ht_bin[1]=(int)head->addr;
970 return head->addr;
971 }
972 if(ht_bin[2]==vaddr) {
973 ht_bin[3]=(int)head->addr;
974 return head->addr;
975 }
976 // Insert into hash table with low priority.
977 // Don't evict existing entries, as they are probably
978 // addresses that are being accessed frequently.
979 if(ht_bin[0]==-1) {
980 ht_bin[1]=(int)head->addr;
981 ht_bin[0]=vaddr;
982 }else if(ht_bin[2]==-1) {
983 ht_bin[3]=(int)head->addr;
984 ht_bin[2]=vaddr;
985 }
986 return head->addr;
987 }
988 }
989 head=head->next;
990 }
991 return 0;
992}
993
994void remove_hash(int vaddr)
995{
996 //printf("remove hash: %x\n",vaddr);
997 int *ht_bin=hash_table[(((vaddr)>>16)^vaddr)&0xFFFF];
998 if(ht_bin[2]==vaddr) {
999 ht_bin[2]=ht_bin[3]=-1;
1000 }
1001 if(ht_bin[0]==vaddr) {
1002 ht_bin[0]=ht_bin[2];
1003 ht_bin[1]=ht_bin[3];
1004 ht_bin[2]=ht_bin[3]=-1;
1005 }
1006}
1007
1008void ll_remove_matching_addrs(struct ll_entry **head,int addr,int shift)
1009{
1010 struct ll_entry *next;
1011 while(*head) {
1012 if(((u_int)((*head)->addr)>>shift)==(addr>>shift) ||
1013 ((u_int)((*head)->addr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift))
1014 {
1015 inv_debug("EXP: Remove pointer to %x (%x)\n",(int)(*head)->addr,(*head)->vaddr);
1016 remove_hash((*head)->vaddr);
1017 next=(*head)->next;
1018 free(*head);
1019 *head=next;
1020 }
1021 else
1022 {
1023 head=&((*head)->next);
1024 }
1025 }
1026}
1027
1028// Remove all entries from linked list
1029void ll_clear(struct ll_entry **head)
1030{
1031 struct ll_entry *cur;
1032 struct ll_entry *next;
1033 if(cur=*head) {
1034 *head=0;
1035 while(cur) {
1036 next=cur->next;
1037 free(cur);
1038 cur=next;
1039 }
1040 }
1041}
1042
1043// Dereference the pointers and remove if it matches
1044void ll_kill_pointers(struct ll_entry *head,int addr,int shift)
1045{
1046 while(head) {
1047 int ptr=get_pointer(head->addr);
1048 inv_debug("EXP: Lookup pointer to %x at %x (%x)\n",(int)ptr,(int)head->addr,head->vaddr);
1049 if(((ptr>>shift)==(addr>>shift)) ||
1050 (((ptr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift)))
1051 {
5088bb70 1052 inv_debug("EXP: Kill pointer at %x (%x)\n",(int)head->addr,head->vaddr);
f76eeef9 1053 u_int host_addr=(u_int)kill_pointer(head->addr);
dd3a91a1 1054 #ifdef __arm__
1055 needs_clear_cache[(host_addr-(u_int)BASE_ADDR)>>17]|=1<<(((host_addr-(u_int)BASE_ADDR)>>12)&31);
1056 #endif
57871462 1057 }
1058 head=head->next;
1059 }
1060}
1061
1062// This is called when we write to a compiled block (see do_invstub)
f76eeef9 1063void invalidate_page(u_int page)
57871462 1064{
57871462 1065 struct ll_entry *head;
1066 struct ll_entry *next;
1067 head=jump_in[page];
1068 jump_in[page]=0;
1069 while(head!=NULL) {
1070 inv_debug("INVALIDATE: %x\n",head->vaddr);
1071 remove_hash(head->vaddr);
1072 next=head->next;
1073 free(head);
1074 head=next;
1075 }
1076 head=jump_out[page];
1077 jump_out[page]=0;
1078 while(head!=NULL) {
1079 inv_debug("INVALIDATE: kill pointer to %x (%x)\n",head->vaddr,(int)head->addr);
f76eeef9 1080 u_int host_addr=(u_int)kill_pointer(head->addr);
dd3a91a1 1081 #ifdef __arm__
1082 needs_clear_cache[(host_addr-(u_int)BASE_ADDR)>>17]|=1<<(((host_addr-(u_int)BASE_ADDR)>>12)&31);
1083 #endif
57871462 1084 next=head->next;
1085 free(head);
1086 head=next;
1087 }
57871462 1088}
9be4ba64 1089
1090static void invalidate_block_range(u_int block, u_int first, u_int last)
57871462 1091{
94d23bb9 1092 u_int page=get_page(block<<12);
57871462 1093 //printf("first=%d last=%d\n",first,last);
f76eeef9 1094 invalidate_page(page);
57871462 1095 assert(first+5>page); // NB: this assumes MAXBLOCK<=4096 (4 pages)
1096 assert(last<page+5);
1097 // Invalidate the adjacent pages if a block crosses a 4K boundary
1098 while(first<page) {
1099 invalidate_page(first);
1100 first++;
1101 }
1102 for(first=page+1;first<last;first++) {
1103 invalidate_page(first);
1104 }
dd3a91a1 1105 #ifdef __arm__
1106 do_clear_cache();
1107 #endif
57871462 1108
1109 // Don't trap writes
1110 invalid_code[block]=1;
94d23bb9 1111#ifndef DISABLE_TLB
57871462 1112 // If there is a valid TLB entry for this page, remove write protect
1113 if(tlb_LUT_w[block]) {
1114 assert(tlb_LUT_r[block]==tlb_LUT_w[block]);
1115 // CHECK: Is this right?
1116 memory_map[block]=((tlb_LUT_w[block]&0xFFFFF000)-(block<<12)+(unsigned int)rdram-0x80000000)>>2;
1117 u_int real_block=tlb_LUT_w[block]>>12;
1118 invalid_code[real_block]=1;
1119 if(real_block>=0x80000&&real_block<0x80800) memory_map[real_block]=((u_int)rdram-0x80000000)>>2;
1120 }
1121 else if(block>=0x80000&&block<0x80800) memory_map[block]=((u_int)rdram-0x80000000)>>2;
94d23bb9 1122#endif
f76eeef9 1123
57871462 1124 #ifdef USE_MINI_HT
1125 memset(mini_ht,-1,sizeof(mini_ht));
1126 #endif
1127}
9be4ba64 1128
1129void invalidate_block(u_int block)
1130{
1131 u_int page=get_page(block<<12);
1132 u_int vpage=get_vpage(block<<12);
1133 inv_debug("INVALIDATE: %x (%d)\n",block<<12,page);
1134 //inv_debug("invalid_code[block]=%d\n",invalid_code[block]);
1135 u_int first,last;
1136 first=last=page;
1137 struct ll_entry *head;
1138 head=jump_dirty[vpage];
1139 //printf("page=%d vpage=%d\n",page,vpage);
1140 while(head!=NULL) {
1141 u_int start,end;
1142 if(vpage>2047||(head->vaddr>>12)==block) { // Ignore vaddr hash collision
1143 get_bounds((int)head->addr,&start,&end);
1144 //printf("start: %x end: %x\n",start,end);
4a35de07 1145 if(page<2048&&start>=(u_int)rdram&&end<(u_int)rdram+RAM_SIZE) {
9be4ba64 1146 if(((start-(u_int)rdram)>>12)<=page&&((end-1-(u_int)rdram)>>12)>=page) {
1147 if((((start-(u_int)rdram)>>12)&2047)<first) first=((start-(u_int)rdram)>>12)&2047;
1148 if((((end-1-(u_int)rdram)>>12)&2047)>last) last=((end-1-(u_int)rdram)>>12)&2047;
1149 }
1150 }
1151#ifndef DISABLE_TLB
1152 if(page<2048&&(signed int)start>=(signed int)0xC0000000&&(signed int)end>=(signed int)0xC0000000) {
1153 if(((start+memory_map[start>>12]-(u_int)rdram)>>12)<=page&&((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)>=page) {
1154 if((((start+memory_map[start>>12]-(u_int)rdram)>>12)&2047)<first) first=((start+memory_map[start>>12]-(u_int)rdram)>>12)&2047;
1155 if((((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047)>last) last=((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047;
1156 }
1157 }
1158#endif
1159 }
1160 head=head->next;
1161 }
1162 invalidate_block_range(block,first,last);
1163}
1164
57871462 1165void invalidate_addr(u_int addr)
1166{
9be4ba64 1167#ifdef PCSX
1168 //static int rhits;
1169 // this check is done by the caller
1170 //if (inv_code_start<=addr&&addr<=inv_code_end) { rhits++; return; }
d25604ca 1171 u_int page=get_vpage(addr);
9be4ba64 1172 if(page<2048) { // RAM
1173 struct ll_entry *head;
1174 u_int addr_min=~0, addr_max=0;
4a35de07 1175 u_int mask=RAM_SIZE-1;
1176 u_int addr_main=0x80000000|(addr&mask);
9be4ba64 1177 int pg1;
4a35de07 1178 inv_code_start=addr_main&~0xfff;
1179 inv_code_end=addr_main|0xfff;
9be4ba64 1180 pg1=page;
1181 if (pg1>0) {
1182 // must check previous page too because of spans..
1183 pg1--;
1184 inv_code_start-=0x1000;
1185 }
1186 for(;pg1<=page;pg1++) {
1187 for(head=jump_dirty[pg1];head!=NULL;head=head->next) {
1188 u_int start,end;
1189 get_bounds((int)head->addr,&start,&end);
4a35de07 1190 if(ram_offset) {
1191 start-=ram_offset;
1192 end-=ram_offset;
1193 }
1194 if(start<=addr_main&&addr_main<end) {
9be4ba64 1195 if(start<addr_min) addr_min=start;
1196 if(end>addr_max) addr_max=end;
1197 }
4a35de07 1198 else if(addr_main<start) {
9be4ba64 1199 if(start<inv_code_end)
1200 inv_code_end=start-1;
1201 }
1202 else {
1203 if(end>inv_code_start)
1204 inv_code_start=end;
1205 }
1206 }
1207 }
1208 if (addr_min!=~0) {
1209 inv_debug("INV ADDR: %08x hit %08x-%08x\n", addr, addr_min, addr_max);
1210 inv_code_start=inv_code_end=~0;
1211 invalidate_block_range(addr>>12,(addr_min&mask)>>12,(addr_max&mask)>>12);
1212 return;
1213 }
1214 else {
4a35de07 1215 inv_code_start=(addr&~mask)|(inv_code_start&mask);
1216 inv_code_end=(addr&~mask)|(inv_code_end&mask);
d25604ca 1217 inv_debug("INV ADDR: %08x miss, inv %08x-%08x, sk %d\n", addr, inv_code_start, inv_code_end, 0);
9be4ba64 1218 return;
d25604ca 1219 }
9be4ba64 1220 }
1221#endif
57871462 1222 invalidate_block(addr>>12);
1223}
9be4ba64 1224
dd3a91a1 1225// This is called when loading a save state.
1226// Anything could have changed, so invalidate everything.
57871462 1227void invalidate_all_pages()
1228{
1229 u_int page,n;
1230 for(page=0;page<4096;page++)
1231 invalidate_page(page);
1232 for(page=0;page<1048576;page++)
1233 if(!invalid_code[page]) {
1234 restore_candidate[(page&2047)>>3]|=1<<(page&7);
1235 restore_candidate[((page&2047)>>3)+256]|=1<<(page&7);
1236 }
1237 #ifdef __arm__
1238 __clear_cache((void *)BASE_ADDR,(void *)BASE_ADDR+(1<<TARGET_SIZE_2));
1239 #endif
1240 #ifdef USE_MINI_HT
1241 memset(mini_ht,-1,sizeof(mini_ht));
1242 #endif
94d23bb9 1243 #ifndef DISABLE_TLB
57871462 1244 // TLB
1245 for(page=0;page<0x100000;page++) {
1246 if(tlb_LUT_r[page]) {
1247 memory_map[page]=((tlb_LUT_r[page]&0xFFFFF000)-(page<<12)+(unsigned int)rdram-0x80000000)>>2;
1248 if(!tlb_LUT_w[page]||!invalid_code[page])
1249 memory_map[page]|=0x40000000; // Write protect
1250 }
1251 else memory_map[page]=-1;
1252 if(page==0x80000) page=0xC0000;
1253 }
1254 tlb_hacks();
94d23bb9 1255 #endif
57871462 1256}
1257
1258// Add an entry to jump_out after making a link
1259void add_link(u_int vaddr,void *src)
1260{
94d23bb9 1261 u_int page=get_page(vaddr);
57871462 1262 inv_debug("add_link: %x -> %x (%d)\n",(int)src,vaddr,page);
76f71c27 1263 int *ptr=(int *)(src+4);
1264 assert((*ptr&0x0fff0000)==0x059f0000);
57871462 1265 ll_add(jump_out+page,vaddr,src);
1266 //int ptr=get_pointer(src);
1267 //inv_debug("add_link: Pointer is to %x\n",(int)ptr);
1268}
1269
1270// If a code block was found to be unmodified (bit was set in
1271// restore_candidate) and it remains unmodified (bit is clear
1272// in invalid_code) then move the entries for that 4K page from
1273// the dirty list to the clean list.
1274void clean_blocks(u_int page)
1275{
1276 struct ll_entry *head;
1277 inv_debug("INV: clean_blocks page=%d\n",page);
1278 head=jump_dirty[page];
1279 while(head!=NULL) {
1280 if(!invalid_code[head->vaddr>>12]) {
1281 // Don't restore blocks which are about to expire from the cache
1282 if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) {
1283 u_int start,end;
1284 if(verify_dirty((int)head->addr)) {
1285 //printf("Possibly Restore %x (%x)\n",head->vaddr, (int)head->addr);
1286 u_int i;
1287 u_int inv=0;
1288 get_bounds((int)head->addr,&start,&end);
4cb76aa4 1289 if(start-(u_int)rdram<RAM_SIZE) {
57871462 1290 for(i=(start-(u_int)rdram+0x80000000)>>12;i<=(end-1-(u_int)rdram+0x80000000)>>12;i++) {
1291 inv|=invalid_code[i];
1292 }
1293 }
63cb0298 1294#ifndef DISABLE_TLB
57871462 1295 if((signed int)head->vaddr>=(signed int)0xC0000000) {
1296 u_int addr = (head->vaddr+(memory_map[head->vaddr>>12]<<2));
1297 //printf("addr=%x start=%x end=%x\n",addr,start,end);
1298 if(addr<start||addr>=end) inv=1;
1299 }
63cb0298 1300#endif
4cb76aa4 1301 else if((signed int)head->vaddr>=(signed int)0x80000000+RAM_SIZE) {
57871462 1302 inv=1;
1303 }
1304 if(!inv) {
1305 void * clean_addr=(void *)get_clean_addr((int)head->addr);
1306 if((((u_int)clean_addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) {
1307 u_int ppage=page;
94d23bb9 1308#ifndef DISABLE_TLB
57871462 1309 if(page<2048&&tlb_LUT_r[head->vaddr>>12]) ppage=(tlb_LUT_r[head->vaddr>>12]^0x80000000)>>12;
94d23bb9 1310#endif
57871462 1311 inv_debug("INV: Restored %x (%x/%x)\n",head->vaddr, (int)head->addr, (int)clean_addr);
1312 //printf("page=%x, addr=%x\n",page,head->vaddr);
1313 //assert(head->vaddr>>12==(page|0x80000));
de5a60c3 1314 ll_add_flags(jump_in+ppage,head->vaddr,head->reg_sv_flags,clean_addr);
57871462 1315 int *ht_bin=hash_table[((head->vaddr>>16)^head->vaddr)&0xFFFF];
de5a60c3 1316 if(ht_bin[0]==head->vaddr) {
1317 ht_bin[1]=(int)clean_addr; // Replace existing entry
1318 }
1319 if(ht_bin[2]==head->vaddr) {
1320 ht_bin[3]=(int)clean_addr; // Replace existing entry
57871462 1321 }
1322 }
1323 }
1324 }
1325 }
1326 }
1327 head=head->next;
1328 }
1329}
1330
1331
1332void mov_alloc(struct regstat *current,int i)
1333{
1334 // Note: Don't need to actually alloc the source registers
1335 if((~current->is32>>rs1[i])&1) {
1336 //alloc_reg64(current,i,rs1[i]);
1337 alloc_reg64(current,i,rt1[i]);
1338 current->is32&=~(1LL<<rt1[i]);
1339 } else {
1340 //alloc_reg(current,i,rs1[i]);
1341 alloc_reg(current,i,rt1[i]);
1342 current->is32|=(1LL<<rt1[i]);
1343 }
1344 clear_const(current,rs1[i]);
1345 clear_const(current,rt1[i]);
1346 dirty_reg(current,rt1[i]);
1347}
1348
1349void shiftimm_alloc(struct regstat *current,int i)
1350{
57871462 1351 if(opcode2[i]<=0x3) // SLL/SRL/SRA
1352 {
1353 if(rt1[i]) {
1354 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1355 else lt1[i]=rs1[i];
1356 alloc_reg(current,i,rt1[i]);
1357 current->is32|=1LL<<rt1[i];
1358 dirty_reg(current,rt1[i]);
dc49e339 1359 if(is_const(current,rs1[i])) {
1360 int v=get_const(current,rs1[i]);
1361 if(opcode2[i]==0x00) set_const(current,rt1[i],v<<imm[i]);
1362 if(opcode2[i]==0x02) set_const(current,rt1[i],(u_int)v>>imm[i]);
1363 if(opcode2[i]==0x03) set_const(current,rt1[i],v>>imm[i]);
1364 }
1365 else clear_const(current,rt1[i]);
57871462 1366 }
1367 }
dc49e339 1368 else
1369 {
1370 clear_const(current,rs1[i]);
1371 clear_const(current,rt1[i]);
1372 }
1373
57871462 1374 if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA
1375 {
1376 if(rt1[i]) {
1377 if(rs1[i]) alloc_reg64(current,i,rs1[i]);
1378 alloc_reg64(current,i,rt1[i]);
1379 current->is32&=~(1LL<<rt1[i]);
1380 dirty_reg(current,rt1[i]);
1381 }
1382 }
1383 if(opcode2[i]==0x3c) // DSLL32
1384 {
1385 if(rt1[i]) {
1386 if(rs1[i]) alloc_reg(current,i,rs1[i]);
1387 alloc_reg64(current,i,rt1[i]);
1388 current->is32&=~(1LL<<rt1[i]);
1389 dirty_reg(current,rt1[i]);
1390 }
1391 }
1392 if(opcode2[i]==0x3e) // DSRL32
1393 {
1394 if(rt1[i]) {
1395 alloc_reg64(current,i,rs1[i]);
1396 if(imm[i]==32) {
1397 alloc_reg64(current,i,rt1[i]);
1398 current->is32&=~(1LL<<rt1[i]);
1399 } else {
1400 alloc_reg(current,i,rt1[i]);
1401 current->is32|=1LL<<rt1[i];
1402 }
1403 dirty_reg(current,rt1[i]);
1404 }
1405 }
1406 if(opcode2[i]==0x3f) // DSRA32
1407 {
1408 if(rt1[i]) {
1409 alloc_reg64(current,i,rs1[i]);
1410 alloc_reg(current,i,rt1[i]);
1411 current->is32|=1LL<<rt1[i];
1412 dirty_reg(current,rt1[i]);
1413 }
1414 }
1415}
1416
1417void shift_alloc(struct regstat *current,int i)
1418{
1419 if(rt1[i]) {
1420 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
1421 {
1422 if(rs1[i]) alloc_reg(current,i,rs1[i]);
1423 if(rs2[i]) alloc_reg(current,i,rs2[i]);
1424 alloc_reg(current,i,rt1[i]);
e1190b87 1425 if(rt1[i]==rs2[i]) {
1426 alloc_reg_temp(current,i,-1);
1427 minimum_free_regs[i]=1;
1428 }
57871462 1429 current->is32|=1LL<<rt1[i];
1430 } else { // DSLLV/DSRLV/DSRAV
1431 if(rs1[i]) alloc_reg64(current,i,rs1[i]);
1432 if(rs2[i]) alloc_reg(current,i,rs2[i]);
1433 alloc_reg64(current,i,rt1[i]);
1434 current->is32&=~(1LL<<rt1[i]);
1435 if(opcode2[i]==0x16||opcode2[i]==0x17) // DSRLV and DSRAV need a temporary register
e1190b87 1436 {
57871462 1437 alloc_reg_temp(current,i,-1);
e1190b87 1438 minimum_free_regs[i]=1;
1439 }
57871462 1440 }
1441 clear_const(current,rs1[i]);
1442 clear_const(current,rs2[i]);
1443 clear_const(current,rt1[i]);
1444 dirty_reg(current,rt1[i]);
1445 }
1446}
1447
1448void alu_alloc(struct regstat *current,int i)
1449{
1450 if(opcode2[i]>=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU
1451 if(rt1[i]) {
1452 if(rs1[i]&&rs2[i]) {
1453 alloc_reg(current,i,rs1[i]);
1454 alloc_reg(current,i,rs2[i]);
1455 }
1456 else {
1457 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1458 if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg(current,i,rs2[i]);
1459 }
1460 alloc_reg(current,i,rt1[i]);
1461 }
1462 current->is32|=1LL<<rt1[i];
1463 }
1464 if(opcode2[i]==0x2a||opcode2[i]==0x2b) { // SLT/SLTU
1465 if(rt1[i]) {
1466 if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1))
1467 {
1468 alloc_reg64(current,i,rs1[i]);
1469 alloc_reg64(current,i,rs2[i]);
1470 alloc_reg(current,i,rt1[i]);
1471 } else {
1472 alloc_reg(current,i,rs1[i]);
1473 alloc_reg(current,i,rs2[i]);
1474 alloc_reg(current,i,rt1[i]);
1475 }
1476 }
1477 current->is32|=1LL<<rt1[i];
1478 }
1479 if(opcode2[i]>=0x24&&opcode2[i]<=0x27) { // AND/OR/XOR/NOR
1480 if(rt1[i]) {
1481 if(rs1[i]&&rs2[i]) {
1482 alloc_reg(current,i,rs1[i]);
1483 alloc_reg(current,i,rs2[i]);
1484 }
1485 else
1486 {
1487 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1488 if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg(current,i,rs2[i]);
1489 }
1490 alloc_reg(current,i,rt1[i]);
1491 if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1))
1492 {
1493 if(!((current->uu>>rt1[i])&1)) {
1494 alloc_reg64(current,i,rt1[i]);
1495 }
1496 if(get_reg(current->regmap,rt1[i]|64)>=0) {
1497 if(rs1[i]&&rs2[i]) {
1498 alloc_reg64(current,i,rs1[i]);
1499 alloc_reg64(current,i,rs2[i]);
1500 }
1501 else
1502 {
1503 // Is is really worth it to keep 64-bit values in registers?
1504 #ifdef NATIVE_64BIT
1505 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg64(current,i,rs1[i]);
1506 if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg64(current,i,rs2[i]);
1507 #endif
1508 }
1509 }
1510 current->is32&=~(1LL<<rt1[i]);
1511 } else {
1512 current->is32|=1LL<<rt1[i];
1513 }
1514 }
1515 }
1516 if(opcode2[i]>=0x2c&&opcode2[i]<=0x2f) { // DADD/DADDU/DSUB/DSUBU
1517 if(rt1[i]) {
1518 if(rs1[i]&&rs2[i]) {
1519 if(!((current->uu>>rt1[i])&1)||get_reg(current->regmap,rt1[i]|64)>=0) {
1520 alloc_reg64(current,i,rs1[i]);
1521 alloc_reg64(current,i,rs2[i]);
1522 alloc_reg64(current,i,rt1[i]);
1523 } else {
1524 alloc_reg(current,i,rs1[i]);
1525 alloc_reg(current,i,rs2[i]);
1526 alloc_reg(current,i,rt1[i]);
1527 }
1528 }
1529 else {
1530 alloc_reg(current,i,rt1[i]);
1531 if(!((current->uu>>rt1[i])&1)||get_reg(current->regmap,rt1[i]|64)>=0) {
1532 // DADD used as move, or zeroing
1533 // If we have a 64-bit source, then make the target 64 bits too
1534 if(rs1[i]&&!((current->is32>>rs1[i])&1)) {
1535 if(get_reg(current->regmap,rs1[i])>=0) alloc_reg64(current,i,rs1[i]);
1536 alloc_reg64(current,i,rt1[i]);
1537 } else if(rs2[i]&&!((current->is32>>rs2[i])&1)) {
1538 if(get_reg(current->regmap,rs2[i])>=0) alloc_reg64(current,i,rs2[i]);
1539 alloc_reg64(current,i,rt1[i]);
1540 }
1541 if(opcode2[i]>=0x2e&&rs2[i]) {
1542 // DSUB used as negation - 64-bit result
1543 // If we have a 32-bit register, extend it to 64 bits
1544 if(get_reg(current->regmap,rs2[i])>=0) alloc_reg64(current,i,rs2[i]);
1545 alloc_reg64(current,i,rt1[i]);
1546 }
1547 }
1548 }
1549 if(rs1[i]&&rs2[i]) {
1550 current->is32&=~(1LL<<rt1[i]);
1551 } else if(rs1[i]) {
1552 current->is32&=~(1LL<<rt1[i]);
1553 if((current->is32>>rs1[i])&1)
1554 current->is32|=1LL<<rt1[i];
1555 } else if(rs2[i]) {
1556 current->is32&=~(1LL<<rt1[i]);
1557 if((current->is32>>rs2[i])&1)
1558 current->is32|=1LL<<rt1[i];
1559 } else {
1560 current->is32|=1LL<<rt1[i];
1561 }
1562 }
1563 }
1564 clear_const(current,rs1[i]);
1565 clear_const(current,rs2[i]);
1566 clear_const(current,rt1[i]);
1567 dirty_reg(current,rt1[i]);
1568}
1569
1570void imm16_alloc(struct regstat *current,int i)
1571{
1572 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1573 else lt1[i]=rs1[i];
1574 if(rt1[i]) alloc_reg(current,i,rt1[i]);
1575 if(opcode[i]==0x18||opcode[i]==0x19) { // DADDI/DADDIU
1576 current->is32&=~(1LL<<rt1[i]);
1577 if(!((current->uu>>rt1[i])&1)||get_reg(current->regmap,rt1[i]|64)>=0) {
1578 // TODO: Could preserve the 32-bit flag if the immediate is zero
1579 alloc_reg64(current,i,rt1[i]);
1580 alloc_reg64(current,i,rs1[i]);
1581 }
1582 clear_const(current,rs1[i]);
1583 clear_const(current,rt1[i]);
1584 }
1585 else if(opcode[i]==0x0a||opcode[i]==0x0b) { // SLTI/SLTIU
1586 if((~current->is32>>rs1[i])&1) alloc_reg64(current,i,rs1[i]);
1587 current->is32|=1LL<<rt1[i];
1588 clear_const(current,rs1[i]);
1589 clear_const(current,rt1[i]);
1590 }
1591 else if(opcode[i]>=0x0c&&opcode[i]<=0x0e) { // ANDI/ORI/XORI
1592 if(((~current->is32>>rs1[i])&1)&&opcode[i]>0x0c) {
1593 if(rs1[i]!=rt1[i]) {
1594 if(needed_again(rs1[i],i)) alloc_reg64(current,i,rs1[i]);
1595 alloc_reg64(current,i,rt1[i]);
1596 current->is32&=~(1LL<<rt1[i]);
1597 }
1598 }
1599 else current->is32|=1LL<<rt1[i]; // ANDI clears upper bits
1600 if(is_const(current,rs1[i])) {
1601 int v=get_const(current,rs1[i]);
1602 if(opcode[i]==0x0c) set_const(current,rt1[i],v&imm[i]);
1603 if(opcode[i]==0x0d) set_const(current,rt1[i],v|imm[i]);
1604 if(opcode[i]==0x0e) set_const(current,rt1[i],v^imm[i]);
1605 }
1606 else clear_const(current,rt1[i]);
1607 }
1608 else if(opcode[i]==0x08||opcode[i]==0x09) { // ADDI/ADDIU
1609 if(is_const(current,rs1[i])) {
1610 int v=get_const(current,rs1[i]);
1611 set_const(current,rt1[i],v+imm[i]);
1612 }
1613 else clear_const(current,rt1[i]);
1614 current->is32|=1LL<<rt1[i];
1615 }
1616 else {
1617 set_const(current,rt1[i],((long long)((short)imm[i]))<<16); // LUI
1618 current->is32|=1LL<<rt1[i];
1619 }
1620 dirty_reg(current,rt1[i]);
1621}
1622
1623void load_alloc(struct regstat *current,int i)
1624{
1625 clear_const(current,rt1[i]);
1626 //if(rs1[i]!=rt1[i]&&needed_again(rs1[i],i)) clear_const(current,rs1[i]); // Does this help or hurt?
1627 if(!rs1[i]) current->u&=~1LL; // Allow allocating r0 if it's the source register
1628 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
373d1d07 1629 if(rt1[i]&&!((current->u>>rt1[i])&1)) {
57871462 1630 alloc_reg(current,i,rt1[i]);
373d1d07 1631 assert(get_reg(current->regmap,rt1[i])>=0);
57871462 1632 if(opcode[i]==0x27||opcode[i]==0x37) // LWU/LD
1633 {
1634 current->is32&=~(1LL<<rt1[i]);
1635 alloc_reg64(current,i,rt1[i]);
1636 }
1637 else if(opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR
1638 {
1639 current->is32&=~(1LL<<rt1[i]);
1640 alloc_reg64(current,i,rt1[i]);
1641 alloc_all(current,i);
1642 alloc_reg64(current,i,FTEMP);
e1190b87 1643 minimum_free_regs[i]=HOST_REGS;
57871462 1644 }
1645 else current->is32|=1LL<<rt1[i];
1646 dirty_reg(current,rt1[i]);
1647 // If using TLB, need a register for pointer to the mapping table
1648 if(using_tlb) alloc_reg(current,i,TLREG);
1649 // LWL/LWR need a temporary register for the old value
1650 if(opcode[i]==0x22||opcode[i]==0x26)
1651 {
1652 alloc_reg(current,i,FTEMP);
1653 alloc_reg_temp(current,i,-1);
e1190b87 1654 minimum_free_regs[i]=1;
57871462 1655 }
1656 }
1657 else
1658 {
373d1d07 1659 // Load to r0 or unneeded register (dummy load)
57871462 1660 // but we still need a register to calculate the address
535d208a 1661 if(opcode[i]==0x22||opcode[i]==0x26)
1662 {
1663 alloc_reg(current,i,FTEMP); // LWL/LWR need another temporary
1664 }
373d1d07 1665 // If using TLB, need a register for pointer to the mapping table
1666 if(using_tlb) alloc_reg(current,i,TLREG);
57871462 1667 alloc_reg_temp(current,i,-1);
e1190b87 1668 minimum_free_regs[i]=1;
535d208a 1669 if(opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR
1670 {
1671 alloc_all(current,i);
1672 alloc_reg64(current,i,FTEMP);
e1190b87 1673 minimum_free_regs[i]=HOST_REGS;
535d208a 1674 }
57871462 1675 }
1676}
1677
1678void store_alloc(struct regstat *current,int i)
1679{
1680 clear_const(current,rs2[i]);
1681 if(!(rs2[i])) current->u&=~1LL; // Allow allocating r0 if necessary
1682 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1683 alloc_reg(current,i,rs2[i]);
1684 if(opcode[i]==0x2c||opcode[i]==0x2d||opcode[i]==0x3f) { // 64-bit SDL/SDR/SD
1685 alloc_reg64(current,i,rs2[i]);
1686 if(rs2[i]) alloc_reg(current,i,FTEMP);
1687 }
1688 // If using TLB, need a register for pointer to the mapping table
1689 if(using_tlb) alloc_reg(current,i,TLREG);
1690 #if defined(HOST_IMM8)
1691 // On CPUs without 32-bit immediates we need a pointer to invalid_code
1692 else alloc_reg(current,i,INVCP);
1693 #endif
b7918751 1694 if(opcode[i]==0x2a||opcode[i]==0x2e||opcode[i]==0x2c||opcode[i]==0x2d) { // SWL/SWL/SDL/SDR
57871462 1695 alloc_reg(current,i,FTEMP);
1696 }
1697 // We need a temporary register for address generation
1698 alloc_reg_temp(current,i,-1);
e1190b87 1699 minimum_free_regs[i]=1;
57871462 1700}
1701
1702void c1ls_alloc(struct regstat *current,int i)
1703{
1704 //clear_const(current,rs1[i]); // FIXME
1705 clear_const(current,rt1[i]);
1706 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1707 alloc_reg(current,i,CSREG); // Status
1708 alloc_reg(current,i,FTEMP);
1709 if(opcode[i]==0x35||opcode[i]==0x3d) { // 64-bit LDC1/SDC1
1710 alloc_reg64(current,i,FTEMP);
1711 }
1712 // If using TLB, need a register for pointer to the mapping table
1713 if(using_tlb) alloc_reg(current,i,TLREG);
1714 #if defined(HOST_IMM8)
1715 // On CPUs without 32-bit immediates we need a pointer to invalid_code
1716 else if((opcode[i]&0x3b)==0x39) // SWC1/SDC1
1717 alloc_reg(current,i,INVCP);
1718 #endif
1719 // We need a temporary register for address generation
1720 alloc_reg_temp(current,i,-1);
1721}
1722
b9b61529 1723void c2ls_alloc(struct regstat *current,int i)
1724{
1725 clear_const(current,rt1[i]);
1726 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1727 alloc_reg(current,i,FTEMP);
1728 // If using TLB, need a register for pointer to the mapping table
1729 if(using_tlb) alloc_reg(current,i,TLREG);
1730 #if defined(HOST_IMM8)
1731 // On CPUs without 32-bit immediates we need a pointer to invalid_code
1732 else if((opcode[i]&0x3b)==0x3a) // SWC2/SDC2
1733 alloc_reg(current,i,INVCP);
1734 #endif
1735 // We need a temporary register for address generation
1736 alloc_reg_temp(current,i,-1);
e1190b87 1737 minimum_free_regs[i]=1;
b9b61529 1738}
1739
57871462 1740#ifndef multdiv_alloc
1741void multdiv_alloc(struct regstat *current,int i)
1742{
1743 // case 0x18: MULT
1744 // case 0x19: MULTU
1745 // case 0x1A: DIV
1746 // case 0x1B: DIVU
1747 // case 0x1C: DMULT
1748 // case 0x1D: DMULTU
1749 // case 0x1E: DDIV
1750 // case 0x1F: DDIVU
1751 clear_const(current,rs1[i]);
1752 clear_const(current,rs2[i]);
1753 if(rs1[i]&&rs2[i])
1754 {
1755 if((opcode2[i]&4)==0) // 32-bit
1756 {
1757 current->u&=~(1LL<<HIREG);
1758 current->u&=~(1LL<<LOREG);
1759 alloc_reg(current,i,HIREG);
1760 alloc_reg(current,i,LOREG);
1761 alloc_reg(current,i,rs1[i]);
1762 alloc_reg(current,i,rs2[i]);
1763 current->is32|=1LL<<HIREG;
1764 current->is32|=1LL<<LOREG;
1765 dirty_reg(current,HIREG);
1766 dirty_reg(current,LOREG);
1767 }
1768 else // 64-bit
1769 {
1770 current->u&=~(1LL<<HIREG);
1771 current->u&=~(1LL<<LOREG);
1772 current->uu&=~(1LL<<HIREG);
1773 current->uu&=~(1LL<<LOREG);
1774 alloc_reg64(current,i,HIREG);
1775 //if(HOST_REGS>10) alloc_reg64(current,i,LOREG);
1776 alloc_reg64(current,i,rs1[i]);
1777 alloc_reg64(current,i,rs2[i]);
1778 alloc_all(current,i);
1779 current->is32&=~(1LL<<HIREG);
1780 current->is32&=~(1LL<<LOREG);
1781 dirty_reg(current,HIREG);
1782 dirty_reg(current,LOREG);
e1190b87 1783 minimum_free_regs[i]=HOST_REGS;
57871462 1784 }
1785 }
1786 else
1787 {
1788 // Multiply by zero is zero.
1789 // MIPS does not have a divide by zero exception.
1790 // The result is undefined, we return zero.
1791 alloc_reg(current,i,HIREG);
1792 alloc_reg(current,i,LOREG);
1793 current->is32|=1LL<<HIREG;
1794 current->is32|=1LL<<LOREG;
1795 dirty_reg(current,HIREG);
1796 dirty_reg(current,LOREG);
1797 }
1798}
1799#endif
1800
1801void cop0_alloc(struct regstat *current,int i)
1802{
1803 if(opcode2[i]==0) // MFC0
1804 {
1805 if(rt1[i]) {
1806 clear_const(current,rt1[i]);
1807 alloc_all(current,i);
1808 alloc_reg(current,i,rt1[i]);
1809 current->is32|=1LL<<rt1[i];
1810 dirty_reg(current,rt1[i]);
1811 }
1812 }
1813 else if(opcode2[i]==4) // MTC0
1814 {
1815 if(rs1[i]){
1816 clear_const(current,rs1[i]);
1817 alloc_reg(current,i,rs1[i]);
1818 alloc_all(current,i);
1819 }
1820 else {
1821 alloc_all(current,i); // FIXME: Keep r0
1822 current->u&=~1LL;
1823 alloc_reg(current,i,0);
1824 }
1825 }
1826 else
1827 {
1828 // TLBR/TLBWI/TLBWR/TLBP/ERET
1829 assert(opcode2[i]==0x10);
1830 alloc_all(current,i);
1831 }
e1190b87 1832 minimum_free_regs[i]=HOST_REGS;
57871462 1833}
1834
1835void cop1_alloc(struct regstat *current,int i)
1836{
1837 alloc_reg(current,i,CSREG); // Load status
1838 if(opcode2[i]<3) // MFC1/DMFC1/CFC1
1839 {
7de557a6 1840 if(rt1[i]){
1841 clear_const(current,rt1[i]);
1842 if(opcode2[i]==1) {
1843 alloc_reg64(current,i,rt1[i]); // DMFC1
1844 current->is32&=~(1LL<<rt1[i]);
1845 }else{
1846 alloc_reg(current,i,rt1[i]); // MFC1/CFC1
1847 current->is32|=1LL<<rt1[i];
1848 }
1849 dirty_reg(current,rt1[i]);
57871462 1850 }
57871462 1851 alloc_reg_temp(current,i,-1);
1852 }
1853 else if(opcode2[i]>3) // MTC1/DMTC1/CTC1
1854 {
1855 if(rs1[i]){
1856 clear_const(current,rs1[i]);
1857 if(opcode2[i]==5)
1858 alloc_reg64(current,i,rs1[i]); // DMTC1
1859 else
1860 alloc_reg(current,i,rs1[i]); // MTC1/CTC1
1861 alloc_reg_temp(current,i,-1);
1862 }
1863 else {
1864 current->u&=~1LL;
1865 alloc_reg(current,i,0);
1866 alloc_reg_temp(current,i,-1);
1867 }
1868 }
e1190b87 1869 minimum_free_regs[i]=1;
57871462 1870}
1871void fconv_alloc(struct regstat *current,int i)
1872{
1873 alloc_reg(current,i,CSREG); // Load status
1874 alloc_reg_temp(current,i,-1);
e1190b87 1875 minimum_free_regs[i]=1;
57871462 1876}
1877void float_alloc(struct regstat *current,int i)
1878{
1879 alloc_reg(current,i,CSREG); // Load status
1880 alloc_reg_temp(current,i,-1);
e1190b87 1881 minimum_free_regs[i]=1;
57871462 1882}
b9b61529 1883void c2op_alloc(struct regstat *current,int i)
1884{
1885 alloc_reg_temp(current,i,-1);
1886}
57871462 1887void fcomp_alloc(struct regstat *current,int i)
1888{
1889 alloc_reg(current,i,CSREG); // Load status
1890 alloc_reg(current,i,FSREG); // Load flags
1891 dirty_reg(current,FSREG); // Flag will be modified
1892 alloc_reg_temp(current,i,-1);
e1190b87 1893 minimum_free_regs[i]=1;
57871462 1894}
1895
1896void syscall_alloc(struct regstat *current,int i)
1897{
1898 alloc_cc(current,i);
1899 dirty_reg(current,CCREG);
1900 alloc_all(current,i);
e1190b87 1901 minimum_free_regs[i]=HOST_REGS;
57871462 1902 current->isconst=0;
1903}
1904
1905void delayslot_alloc(struct regstat *current,int i)
1906{
1907 switch(itype[i]) {
1908 case UJUMP:
1909 case CJUMP:
1910 case SJUMP:
1911 case RJUMP:
1912 case FJUMP:
1913 case SYSCALL:
7139f3c8 1914 case HLECALL:
57871462 1915 case SPAN:
1916 assem_debug("jump in the delay slot. this shouldn't happen.\n");//exit(1);
c43b5311 1917 SysPrintf("Disabled speculative precompilation\n");
57871462 1918 stop_after_jal=1;
1919 break;
1920 case IMM16:
1921 imm16_alloc(current,i);
1922 break;
1923 case LOAD:
1924 case LOADLR:
1925 load_alloc(current,i);
1926 break;
1927 case STORE:
1928 case STORELR:
1929 store_alloc(current,i);
1930 break;
1931 case ALU:
1932 alu_alloc(current,i);
1933 break;
1934 case SHIFT:
1935 shift_alloc(current,i);
1936 break;
1937 case MULTDIV:
1938 multdiv_alloc(current,i);
1939 break;
1940 case SHIFTIMM:
1941 shiftimm_alloc(current,i);
1942 break;
1943 case MOV:
1944 mov_alloc(current,i);
1945 break;
1946 case COP0:
1947 cop0_alloc(current,i);
1948 break;
1949 case COP1:
b9b61529 1950 case COP2:
57871462 1951 cop1_alloc(current,i);
1952 break;
1953 case C1LS:
1954 c1ls_alloc(current,i);
1955 break;
b9b61529 1956 case C2LS:
1957 c2ls_alloc(current,i);
1958 break;
57871462 1959 case FCONV:
1960 fconv_alloc(current,i);
1961 break;
1962 case FLOAT:
1963 float_alloc(current,i);
1964 break;
1965 case FCOMP:
1966 fcomp_alloc(current,i);
1967 break;
b9b61529 1968 case C2OP:
1969 c2op_alloc(current,i);
1970 break;
57871462 1971 }
1972}
1973
1974// Special case where a branch and delay slot span two pages in virtual memory
1975static void pagespan_alloc(struct regstat *current,int i)
1976{
1977 current->isconst=0;
1978 current->wasconst=0;
1979 regs[i].wasconst=0;
e1190b87 1980 minimum_free_regs[i]=HOST_REGS;
57871462 1981 alloc_all(current,i);
1982 alloc_cc(current,i);
1983 dirty_reg(current,CCREG);
1984 if(opcode[i]==3) // JAL
1985 {
1986 alloc_reg(current,i,31);
1987 dirty_reg(current,31);
1988 }
1989 if(opcode[i]==0&&(opcode2[i]&0x3E)==8) // JR/JALR
1990 {
1991 alloc_reg(current,i,rs1[i]);
5067f341 1992 if (rt1[i]!=0) {
1993 alloc_reg(current,i,rt1[i]);
1994 dirty_reg(current,rt1[i]);
57871462 1995 }
1996 }
1997 if((opcode[i]&0x2E)==4) // BEQ/BNE/BEQL/BNEL
1998 {
1999 if(rs1[i]) alloc_reg(current,i,rs1[i]);
2000 if(rs2[i]) alloc_reg(current,i,rs2[i]);
2001 if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1))
2002 {
2003 if(rs1[i]) alloc_reg64(current,i,rs1[i]);
2004 if(rs2[i]) alloc_reg64(current,i,rs2[i]);
2005 }
2006 }
2007 else
2008 if((opcode[i]&0x2E)==6) // BLEZ/BGTZ/BLEZL/BGTZL
2009 {
2010 if(rs1[i]) alloc_reg(current,i,rs1[i]);
2011 if(!((current->is32>>rs1[i])&1))
2012 {
2013 if(rs1[i]) alloc_reg64(current,i,rs1[i]);
2014 }
2015 }
2016 else
2017 if(opcode[i]==0x11) // BC1
2018 {
2019 alloc_reg(current,i,FSREG);
2020 alloc_reg(current,i,CSREG);
2021 }
2022 //else ...
2023}
2024
2025add_stub(int type,int addr,int retaddr,int a,int b,int c,int d,int e)
2026{
2027 stubs[stubcount][0]=type;
2028 stubs[stubcount][1]=addr;
2029 stubs[stubcount][2]=retaddr;
2030 stubs[stubcount][3]=a;
2031 stubs[stubcount][4]=b;
2032 stubs[stubcount][5]=c;
2033 stubs[stubcount][6]=d;
2034 stubs[stubcount][7]=e;
2035 stubcount++;
2036}
2037
2038// Write out a single register
2039void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32)
2040{
2041 int hr;
2042 for(hr=0;hr<HOST_REGS;hr++) {
2043 if(hr!=EXCLUDE_REG) {
2044 if((regmap[hr]&63)==r) {
2045 if((dirty>>hr)&1) {
2046 if(regmap[hr]<64) {
2047 emit_storereg(r,hr);
24385cae 2048#ifndef FORCE32
57871462 2049 if((is32>>regmap[hr])&1) {
2050 emit_sarimm(hr,31,hr);
2051 emit_storereg(r|64,hr);
2052 }
24385cae 2053#endif
57871462 2054 }else{
2055 emit_storereg(r|64,hr);
2056 }
2057 }
2058 }
2059 }
2060 }
2061}
2062
2063int mchecksum()
2064{
2065 //if(!tracedebug) return 0;
2066 int i;
2067 int sum=0;
2068 for(i=0;i<2097152;i++) {
2069 unsigned int temp=sum;
2070 sum<<=1;
2071 sum|=(~temp)>>31;
2072 sum^=((u_int *)rdram)[i];
2073 }
2074 return sum;
2075}
2076int rchecksum()
2077{
2078 int i;
2079 int sum=0;
2080 for(i=0;i<64;i++)
2081 sum^=((u_int *)reg)[i];
2082 return sum;
2083}
57871462 2084void rlist()
2085{
2086 int i;
2087 printf("TRACE: ");
2088 for(i=0;i<32;i++)
2089 printf("r%d:%8x%8x ",i,((int *)(reg+i))[1],((int *)(reg+i))[0]);
2090 printf("\n");
3d624f89 2091#ifndef DISABLE_COP1
57871462 2092 printf("TRACE: ");
2093 for(i=0;i<32;i++)
2094 printf("f%d:%8x%8x ",i,((int*)reg_cop1_simple[i])[1],*((int*)reg_cop1_simple[i]));
2095 printf("\n");
3d624f89 2096#endif
57871462 2097}
2098
2099void enabletrace()
2100{
2101 tracedebug=1;
2102}
2103
2104void memdebug(int i)
2105{
2106 //printf("TRACE: count=%d next=%d (checksum %x) lo=%8x%8x\n",Count,next_interupt,mchecksum(),(int)(reg[LOREG]>>32),(int)reg[LOREG]);
2107 //printf("TRACE: count=%d next=%d (rchecksum %x)\n",Count,next_interupt,rchecksum());
2108 //rlist();
2109 //if(tracedebug) {
2110 //if(Count>=-2084597794) {
2111 if((signed int)Count>=-2084597794&&(signed int)Count<0) {
2112 //if(0) {
2113 printf("TRACE: count=%d next=%d (checksum %x)\n",Count,next_interupt,mchecksum());
2114 //printf("TRACE: count=%d next=%d (checksum %x) Status=%x\n",Count,next_interupt,mchecksum(),Status);
2115 //printf("TRACE: count=%d next=%d (checksum %x) hi=%8x%8x\n",Count,next_interupt,mchecksum(),(int)(reg[HIREG]>>32),(int)reg[HIREG]);
2116 rlist();
2117 #ifdef __i386__
2118 printf("TRACE: %x\n",(&i)[-1]);
2119 #endif
2120 #ifdef __arm__
2121 int j;
2122 printf("TRACE: %x \n",(&j)[10]);
2123 printf("TRACE: %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x\n",(&j)[1],(&j)[2],(&j)[3],(&j)[4],(&j)[5],(&j)[6],(&j)[7],(&j)[8],(&j)[9],(&j)[10],(&j)[11],(&j)[12],(&j)[13],(&j)[14],(&j)[15],(&j)[16],(&j)[17],(&j)[18],(&j)[19],(&j)[20]);
2124 #endif
2125 //fflush(stdout);
2126 }
2127 //printf("TRACE: %x\n",(&i)[-1]);
2128}
2129
2130void tlb_debug(u_int cause, u_int addr, u_int iaddr)
2131{
2132 printf("TLB Exception: instruction=%x addr=%x cause=%x\n",iaddr, addr, cause);
2133}
2134
2135void alu_assemble(int i,struct regstat *i_regs)
2136{
2137 if(opcode2[i]>=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU
2138 if(rt1[i]) {
2139 signed char s1,s2,t;
2140 t=get_reg(i_regs->regmap,rt1[i]);
2141 if(t>=0) {
2142 s1=get_reg(i_regs->regmap,rs1[i]);
2143 s2=get_reg(i_regs->regmap,rs2[i]);
2144 if(rs1[i]&&rs2[i]) {
2145 assert(s1>=0);
2146 assert(s2>=0);
2147 if(opcode2[i]&2) emit_sub(s1,s2,t);
2148 else emit_add(s1,s2,t);
2149 }
2150 else if(rs1[i]) {
2151 if(s1>=0) emit_mov(s1,t);
2152 else emit_loadreg(rs1[i],t);
2153 }
2154 else if(rs2[i]) {
2155 if(s2>=0) {
2156 if(opcode2[i]&2) emit_neg(s2,t);
2157 else emit_mov(s2,t);
2158 }
2159 else {
2160 emit_loadreg(rs2[i],t);
2161 if(opcode2[i]&2) emit_neg(t,t);
2162 }
2163 }
2164 else emit_zeroreg(t);
2165 }
2166 }
2167 }
2168 if(opcode2[i]>=0x2c&&opcode2[i]<=0x2f) { // DADD/DADDU/DSUB/DSUBU
2169 if(rt1[i]) {
2170 signed char s1l,s2l,s1h,s2h,tl,th;
2171 tl=get_reg(i_regs->regmap,rt1[i]);
2172 th=get_reg(i_regs->regmap,rt1[i]|64);
2173 if(tl>=0) {
2174 s1l=get_reg(i_regs->regmap,rs1[i]);
2175 s2l=get_reg(i_regs->regmap,rs2[i]);
2176 s1h=get_reg(i_regs->regmap,rs1[i]|64);
2177 s2h=get_reg(i_regs->regmap,rs2[i]|64);
2178 if(rs1[i]&&rs2[i]) {
2179 assert(s1l>=0);
2180 assert(s2l>=0);
2181 if(opcode2[i]&2) emit_subs(s1l,s2l,tl);
2182 else emit_adds(s1l,s2l,tl);
2183 if(th>=0) {
2184 #ifdef INVERTED_CARRY
2185 if(opcode2[i]&2) {if(s1h!=th) emit_mov(s1h,th);emit_sbb(th,s2h);}
2186 #else
2187 if(opcode2[i]&2) emit_sbc(s1h,s2h,th);
2188 #endif
2189 else emit_add(s1h,s2h,th);
2190 }
2191 }
2192 else if(rs1[i]) {
2193 if(s1l>=0) emit_mov(s1l,tl);
2194 else emit_loadreg(rs1[i],tl);
2195 if(th>=0) {
2196 if(s1h>=0) emit_mov(s1h,th);
2197 else emit_loadreg(rs1[i]|64,th);
2198 }
2199 }
2200 else if(rs2[i]) {
2201 if(s2l>=0) {
2202 if(opcode2[i]&2) emit_negs(s2l,tl);
2203 else emit_mov(s2l,tl);
2204 }
2205 else {
2206 emit_loadreg(rs2[i],tl);
2207 if(opcode2[i]&2) emit_negs(tl,tl);
2208 }
2209 if(th>=0) {
2210 #ifdef INVERTED_CARRY
2211 if(s2h>=0) emit_mov(s2h,th);
2212 else emit_loadreg(rs2[i]|64,th);
2213 if(opcode2[i]&2) {
2214 emit_adcimm(-1,th); // x86 has inverted carry flag
2215 emit_not(th,th);
2216 }
2217 #else
2218 if(opcode2[i]&2) {
2219 if(s2h>=0) emit_rscimm(s2h,0,th);
2220 else {
2221 emit_loadreg(rs2[i]|64,th);
2222 emit_rscimm(th,0,th);
2223 }
2224 }else{
2225 if(s2h>=0) emit_mov(s2h,th);
2226 else emit_loadreg(rs2[i]|64,th);
2227 }
2228 #endif
2229 }
2230 }
2231 else {
2232 emit_zeroreg(tl);
2233 if(th>=0) emit_zeroreg(th);
2234 }
2235 }
2236 }
2237 }
2238 if(opcode2[i]==0x2a||opcode2[i]==0x2b) { // SLT/SLTU
2239 if(rt1[i]) {
2240 signed char s1l,s1h,s2l,s2h,t;
2241 if(!((i_regs->was32>>rs1[i])&(i_regs->was32>>rs2[i])&1))
2242 {
2243 t=get_reg(i_regs->regmap,rt1[i]);
2244 //assert(t>=0);
2245 if(t>=0) {
2246 s1l=get_reg(i_regs->regmap,rs1[i]);
2247 s1h=get_reg(i_regs->regmap,rs1[i]|64);
2248 s2l=get_reg(i_regs->regmap,rs2[i]);
2249 s2h=get_reg(i_regs->regmap,rs2[i]|64);
2250 if(rs2[i]==0) // rx<r0
2251 {
2252 assert(s1h>=0);
2253 if(opcode2[i]==0x2a) // SLT
2254 emit_shrimm(s1h,31,t);
2255 else // SLTU (unsigned can not be less than zero)
2256 emit_zeroreg(t);
2257 }
2258 else if(rs1[i]==0) // r0<rx
2259 {
2260 assert(s2h>=0);
2261 if(opcode2[i]==0x2a) // SLT
2262 emit_set_gz64_32(s2h,s2l,t);
2263 else // SLTU (set if not zero)
2264 emit_set_nz64_32(s2h,s2l,t);
2265 }
2266 else {
2267 assert(s1l>=0);assert(s1h>=0);
2268 assert(s2l>=0);assert(s2h>=0);
2269 if(opcode2[i]==0x2a) // SLT
2270 emit_set_if_less64_32(s1h,s1l,s2h,s2l,t);
2271 else // SLTU
2272 emit_set_if_carry64_32(s1h,s1l,s2h,s2l,t);
2273 }
2274 }
2275 } else {
2276 t=get_reg(i_regs->regmap,rt1[i]);
2277 //assert(t>=0);
2278 if(t>=0) {
2279 s1l=get_reg(i_regs->regmap,rs1[i]);
2280 s2l=get_reg(i_regs->regmap,rs2[i]);
2281 if(rs2[i]==0) // rx<r0
2282 {
2283 assert(s1l>=0);
2284 if(opcode2[i]==0x2a) // SLT
2285 emit_shrimm(s1l,31,t);
2286 else // SLTU (unsigned can not be less than zero)
2287 emit_zeroreg(t);
2288 }
2289 else if(rs1[i]==0) // r0<rx
2290 {
2291 assert(s2l>=0);
2292 if(opcode2[i]==0x2a) // SLT
2293 emit_set_gz32(s2l,t);
2294 else // SLTU (set if not zero)
2295 emit_set_nz32(s2l,t);
2296 }
2297 else{
2298 assert(s1l>=0);assert(s2l>=0);
2299 if(opcode2[i]==0x2a) // SLT
2300 emit_set_if_less32(s1l,s2l,t);
2301 else // SLTU
2302 emit_set_if_carry32(s1l,s2l,t);
2303 }
2304 }
2305 }
2306 }
2307 }
2308 if(opcode2[i]>=0x24&&opcode2[i]<=0x27) { // AND/OR/XOR/NOR
2309 if(rt1[i]) {
2310 signed char s1l,s1h,s2l,s2h,th,tl;
2311 tl=get_reg(i_regs->regmap,rt1[i]);
2312 th=get_reg(i_regs->regmap,rt1[i]|64);
2313 if(!((i_regs->was32>>rs1[i])&(i_regs->was32>>rs2[i])&1)&&th>=0)
2314 {
2315 assert(tl>=0);
2316 if(tl>=0) {
2317 s1l=get_reg(i_regs->regmap,rs1[i]);
2318 s1h=get_reg(i_regs->regmap,rs1[i]|64);
2319 s2l=get_reg(i_regs->regmap,rs2[i]);
2320 s2h=get_reg(i_regs->regmap,rs2[i]|64);
2321 if(rs1[i]&&rs2[i]) {
2322 assert(s1l>=0);assert(s1h>=0);
2323 assert(s2l>=0);assert(s2h>=0);
2324 if(opcode2[i]==0x24) { // AND
2325 emit_and(s1l,s2l,tl);
2326 emit_and(s1h,s2h,th);
2327 } else
2328 if(opcode2[i]==0x25) { // OR
2329 emit_or(s1l,s2l,tl);
2330 emit_or(s1h,s2h,th);
2331 } else
2332 if(opcode2[i]==0x26) { // XOR
2333 emit_xor(s1l,s2l,tl);
2334 emit_xor(s1h,s2h,th);
2335 } else
2336 if(opcode2[i]==0x27) { // NOR
2337 emit_or(s1l,s2l,tl);
2338 emit_or(s1h,s2h,th);
2339 emit_not(tl,tl);
2340 emit_not(th,th);
2341 }
2342 }
2343 else
2344 {
2345 if(opcode2[i]==0x24) { // AND
2346 emit_zeroreg(tl);
2347 emit_zeroreg(th);
2348 } else
2349 if(opcode2[i]==0x25||opcode2[i]==0x26) { // OR/XOR
2350 if(rs1[i]){
2351 if(s1l>=0) emit_mov(s1l,tl);
2352 else emit_loadreg(rs1[i],tl);
2353 if(s1h>=0) emit_mov(s1h,th);
2354 else emit_loadreg(rs1[i]|64,th);
2355 }
2356 else
2357 if(rs2[i]){
2358 if(s2l>=0) emit_mov(s2l,tl);
2359 else emit_loadreg(rs2[i],tl);
2360 if(s2h>=0) emit_mov(s2h,th);
2361 else emit_loadreg(rs2[i]|64,th);
2362 }
2363 else{
2364 emit_zeroreg(tl);
2365 emit_zeroreg(th);
2366 }
2367 } else
2368 if(opcode2[i]==0x27) { // NOR
2369 if(rs1[i]){
2370 if(s1l>=0) emit_not(s1l,tl);
2371 else{
2372 emit_loadreg(rs1[i],tl);
2373 emit_not(tl,tl);
2374 }
2375 if(s1h>=0) emit_not(s1h,th);
2376 else{
2377 emit_loadreg(rs1[i]|64,th);
2378 emit_not(th,th);
2379 }
2380 }
2381 else
2382 if(rs2[i]){
2383 if(s2l>=0) emit_not(s2l,tl);
2384 else{
2385 emit_loadreg(rs2[i],tl);
2386 emit_not(tl,tl);
2387 }
2388 if(s2h>=0) emit_not(s2h,th);
2389 else{
2390 emit_loadreg(rs2[i]|64,th);
2391 emit_not(th,th);
2392 }
2393 }
2394 else {
2395 emit_movimm(-1,tl);
2396 emit_movimm(-1,th);
2397 }
2398 }
2399 }
2400 }
2401 }
2402 else
2403 {
2404 // 32 bit
2405 if(tl>=0) {
2406 s1l=get_reg(i_regs->regmap,rs1[i]);
2407 s2l=get_reg(i_regs->regmap,rs2[i]);
2408 if(rs1[i]&&rs2[i]) {
2409 assert(s1l>=0);
2410 assert(s2l>=0);
2411 if(opcode2[i]==0x24) { // AND
2412 emit_and(s1l,s2l,tl);
2413 } else
2414 if(opcode2[i]==0x25) { // OR
2415 emit_or(s1l,s2l,tl);
2416 } else
2417 if(opcode2[i]==0x26) { // XOR
2418 emit_xor(s1l,s2l,tl);
2419 } else
2420 if(opcode2[i]==0x27) { // NOR
2421 emit_or(s1l,s2l,tl);
2422 emit_not(tl,tl);
2423 }
2424 }
2425 else
2426 {
2427 if(opcode2[i]==0x24) { // AND
2428 emit_zeroreg(tl);
2429 } else
2430 if(opcode2[i]==0x25||opcode2[i]==0x26) { // OR/XOR
2431 if(rs1[i]){
2432 if(s1l>=0) emit_mov(s1l,tl);
2433 else emit_loadreg(rs1[i],tl); // CHECK: regmap_entry?
2434 }
2435 else
2436 if(rs2[i]){
2437 if(s2l>=0) emit_mov(s2l,tl);
2438 else emit_loadreg(rs2[i],tl); // CHECK: regmap_entry?
2439 }
2440 else emit_zeroreg(tl);
2441 } else
2442 if(opcode2[i]==0x27) { // NOR
2443 if(rs1[i]){
2444 if(s1l>=0) emit_not(s1l,tl);
2445 else {
2446 emit_loadreg(rs1[i],tl);
2447 emit_not(tl,tl);
2448 }
2449 }
2450 else
2451 if(rs2[i]){
2452 if(s2l>=0) emit_not(s2l,tl);
2453 else {
2454 emit_loadreg(rs2[i],tl);
2455 emit_not(tl,tl);
2456 }
2457 }
2458 else emit_movimm(-1,tl);
2459 }
2460 }
2461 }
2462 }
2463 }
2464 }
2465}
2466
2467void imm16_assemble(int i,struct regstat *i_regs)
2468{
2469 if (opcode[i]==0x0f) { // LUI
2470 if(rt1[i]) {
2471 signed char t;
2472 t=get_reg(i_regs->regmap,rt1[i]);
2473 //assert(t>=0);
2474 if(t>=0) {
2475 if(!((i_regs->isconst>>t)&1))
2476 emit_movimm(imm[i]<<16,t);
2477 }
2478 }
2479 }
2480 if(opcode[i]==0x08||opcode[i]==0x09) { // ADDI/ADDIU
2481 if(rt1[i]) {
2482 signed char s,t;
2483 t=get_reg(i_regs->regmap,rt1[i]);
2484 s=get_reg(i_regs->regmap,rs1[i]);
2485 if(rs1[i]) {
2486 //assert(t>=0);
2487 //assert(s>=0);
2488 if(t>=0) {
2489 if(!((i_regs->isconst>>t)&1)) {
2490 if(s<0) {
2491 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2492 emit_addimm(t,imm[i],t);
2493 }else{
2494 if(!((i_regs->wasconst>>s)&1))
2495 emit_addimm(s,imm[i],t);
2496 else
2497 emit_movimm(constmap[i][s]+imm[i],t);
2498 }
2499 }
2500 }
2501 } else {
2502 if(t>=0) {
2503 if(!((i_regs->isconst>>t)&1))
2504 emit_movimm(imm[i],t);
2505 }
2506 }
2507 }
2508 }
2509 if(opcode[i]==0x18||opcode[i]==0x19) { // DADDI/DADDIU
2510 if(rt1[i]) {
2511 signed char sh,sl,th,tl;
2512 th=get_reg(i_regs->regmap,rt1[i]|64);
2513 tl=get_reg(i_regs->regmap,rt1[i]);
2514 sh=get_reg(i_regs->regmap,rs1[i]|64);
2515 sl=get_reg(i_regs->regmap,rs1[i]);
2516 if(tl>=0) {
2517 if(rs1[i]) {
2518 assert(sh>=0);
2519 assert(sl>=0);
2520 if(th>=0) {
2521 emit_addimm64_32(sh,sl,imm[i],th,tl);
2522 }
2523 else {
2524 emit_addimm(sl,imm[i],tl);
2525 }
2526 } else {
2527 emit_movimm(imm[i],tl);
2528 if(th>=0) emit_movimm(((signed int)imm[i])>>31,th);
2529 }
2530 }
2531 }
2532 }
2533 else if(opcode[i]==0x0a||opcode[i]==0x0b) { // SLTI/SLTIU
2534 if(rt1[i]) {
2535 //assert(rs1[i]!=0); // r0 might be valid, but it's probably a bug
2536 signed char sh,sl,t;
2537 t=get_reg(i_regs->regmap,rt1[i]);
2538 sh=get_reg(i_regs->regmap,rs1[i]|64);
2539 sl=get_reg(i_regs->regmap,rs1[i]);
2540 //assert(t>=0);
2541 if(t>=0) {
2542 if(rs1[i]>0) {
2543 if(sh<0) assert((i_regs->was32>>rs1[i])&1);
2544 if(sh<0||((i_regs->was32>>rs1[i])&1)) {
2545 if(opcode[i]==0x0a) { // SLTI
2546 if(sl<0) {
2547 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2548 emit_slti32(t,imm[i],t);
2549 }else{
2550 emit_slti32(sl,imm[i],t);
2551 }
2552 }
2553 else { // SLTIU
2554 if(sl<0) {
2555 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2556 emit_sltiu32(t,imm[i],t);
2557 }else{
2558 emit_sltiu32(sl,imm[i],t);
2559 }
2560 }
2561 }else{ // 64-bit
2562 assert(sl>=0);
2563 if(opcode[i]==0x0a) // SLTI
2564 emit_slti64_32(sh,sl,imm[i],t);
2565 else // SLTIU
2566 emit_sltiu64_32(sh,sl,imm[i],t);
2567 }
2568 }else{
2569 // SLTI(U) with r0 is just stupid,
2570 // nonetheless examples can be found
2571 if(opcode[i]==0x0a) // SLTI
2572 if(0<imm[i]) emit_movimm(1,t);
2573 else emit_zeroreg(t);
2574 else // SLTIU
2575 {
2576 if(imm[i]) emit_movimm(1,t);
2577 else emit_zeroreg(t);
2578 }
2579 }
2580 }
2581 }
2582 }
2583 else if(opcode[i]>=0x0c&&opcode[i]<=0x0e) { // ANDI/ORI/XORI
2584 if(rt1[i]) {
2585 signed char sh,sl,th,tl;
2586 th=get_reg(i_regs->regmap,rt1[i]|64);
2587 tl=get_reg(i_regs->regmap,rt1[i]);
2588 sh=get_reg(i_regs->regmap,rs1[i]|64);
2589 sl=get_reg(i_regs->regmap,rs1[i]);
2590 if(tl>=0 && !((i_regs->isconst>>tl)&1)) {
2591 if(opcode[i]==0x0c) //ANDI
2592 {
2593 if(rs1[i]) {
2594 if(sl<0) {
2595 if(i_regs->regmap_entry[tl]!=rs1[i]) emit_loadreg(rs1[i],tl);
2596 emit_andimm(tl,imm[i],tl);
2597 }else{
2598 if(!((i_regs->wasconst>>sl)&1))
2599 emit_andimm(sl,imm[i],tl);
2600 else
2601 emit_movimm(constmap[i][sl]&imm[i],tl);
2602 }
2603 }
2604 else
2605 emit_zeroreg(tl);
2606 if(th>=0) emit_zeroreg(th);
2607 }
2608 else
2609 {
2610 if(rs1[i]) {
2611 if(sl<0) {
2612 if(i_regs->regmap_entry[tl]!=rs1[i]) emit_loadreg(rs1[i],tl);
2613 }
2614 if(th>=0) {
2615 if(sh<0) {
2616 emit_loadreg(rs1[i]|64,th);
2617 }else{
2618 emit_mov(sh,th);
2619 }
2620 }
2621 if(opcode[i]==0x0d) //ORI
2622 if(sl<0) {
2623 emit_orimm(tl,imm[i],tl);
2624 }else{
2625 if(!((i_regs->wasconst>>sl)&1))
2626 emit_orimm(sl,imm[i],tl);
2627 else
2628 emit_movimm(constmap[i][sl]|imm[i],tl);
2629 }
2630 if(opcode[i]==0x0e) //XORI
2631 if(sl<0) {
2632 emit_xorimm(tl,imm[i],tl);
2633 }else{
2634 if(!((i_regs->wasconst>>sl)&1))
2635 emit_xorimm(sl,imm[i],tl);
2636 else
2637 emit_movimm(constmap[i][sl]^imm[i],tl);
2638 }
2639 }
2640 else {
2641 emit_movimm(imm[i],tl);
2642 if(th>=0) emit_zeroreg(th);
2643 }
2644 }
2645 }
2646 }
2647 }
2648}
2649
2650void shiftimm_assemble(int i,struct regstat *i_regs)
2651{
2652 if(opcode2[i]<=0x3) // SLL/SRL/SRA
2653 {
2654 if(rt1[i]) {
2655 signed char s,t;
2656 t=get_reg(i_regs->regmap,rt1[i]);
2657 s=get_reg(i_regs->regmap,rs1[i]);
2658 //assert(t>=0);
dc49e339 2659 if(t>=0&&!((i_regs->isconst>>t)&1)){
57871462 2660 if(rs1[i]==0)
2661 {
2662 emit_zeroreg(t);
2663 }
2664 else
2665 {
2666 if(s<0&&i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2667 if(imm[i]) {
2668 if(opcode2[i]==0) // SLL
2669 {
2670 emit_shlimm(s<0?t:s,imm[i],t);
2671 }
2672 if(opcode2[i]==2) // SRL
2673 {
2674 emit_shrimm(s<0?t:s,imm[i],t);
2675 }
2676 if(opcode2[i]==3) // SRA
2677 {
2678 emit_sarimm(s<0?t:s,imm[i],t);
2679 }
2680 }else{
2681 // Shift by zero
2682 if(s>=0 && s!=t) emit_mov(s,t);
2683 }
2684 }
2685 }
2686 //emit_storereg(rt1[i],t); //DEBUG
2687 }
2688 }
2689 if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA
2690 {
2691 if(rt1[i]) {
2692 signed char sh,sl,th,tl;
2693 th=get_reg(i_regs->regmap,rt1[i]|64);
2694 tl=get_reg(i_regs->regmap,rt1[i]);
2695 sh=get_reg(i_regs->regmap,rs1[i]|64);
2696 sl=get_reg(i_regs->regmap,rs1[i]);
2697 if(tl>=0) {
2698 if(rs1[i]==0)
2699 {
2700 emit_zeroreg(tl);
2701 if(th>=0) emit_zeroreg(th);
2702 }
2703 else
2704 {
2705 assert(sl>=0);
2706 assert(sh>=0);
2707 if(imm[i]) {
2708 if(opcode2[i]==0x38) // DSLL
2709 {
2710 if(th>=0) emit_shldimm(sh,sl,imm[i],th);
2711 emit_shlimm(sl,imm[i],tl);
2712 }
2713 if(opcode2[i]==0x3a) // DSRL
2714 {
2715 emit_shrdimm(sl,sh,imm[i],tl);
2716 if(th>=0) emit_shrimm(sh,imm[i],th);
2717 }
2718 if(opcode2[i]==0x3b) // DSRA
2719 {
2720 emit_shrdimm(sl,sh,imm[i],tl);
2721 if(th>=0) emit_sarimm(sh,imm[i],th);
2722 }
2723 }else{
2724 // Shift by zero
2725 if(sl!=tl) emit_mov(sl,tl);
2726 if(th>=0&&sh!=th) emit_mov(sh,th);
2727 }
2728 }
2729 }
2730 }
2731 }
2732 if(opcode2[i]==0x3c) // DSLL32
2733 {
2734 if(rt1[i]) {
2735 signed char sl,tl,th;
2736 tl=get_reg(i_regs->regmap,rt1[i]);
2737 th=get_reg(i_regs->regmap,rt1[i]|64);
2738 sl=get_reg(i_regs->regmap,rs1[i]);
2739 if(th>=0||tl>=0){
2740 assert(tl>=0);
2741 assert(th>=0);
2742 assert(sl>=0);
2743 emit_mov(sl,th);
2744 emit_zeroreg(tl);
2745 if(imm[i]>32)
2746 {
2747 emit_shlimm(th,imm[i]&31,th);
2748 }
2749 }
2750 }
2751 }
2752 if(opcode2[i]==0x3e) // DSRL32
2753 {
2754 if(rt1[i]) {
2755 signed char sh,tl,th;
2756 tl=get_reg(i_regs->regmap,rt1[i]);
2757 th=get_reg(i_regs->regmap,rt1[i]|64);
2758 sh=get_reg(i_regs->regmap,rs1[i]|64);
2759 if(tl>=0){
2760 assert(sh>=0);
2761 emit_mov(sh,tl);
2762 if(th>=0) emit_zeroreg(th);
2763 if(imm[i]>32)
2764 {
2765 emit_shrimm(tl,imm[i]&31,tl);
2766 }
2767 }
2768 }
2769 }
2770 if(opcode2[i]==0x3f) // DSRA32
2771 {
2772 if(rt1[i]) {
2773 signed char sh,tl;
2774 tl=get_reg(i_regs->regmap,rt1[i]);
2775 sh=get_reg(i_regs->regmap,rs1[i]|64);
2776 if(tl>=0){
2777 assert(sh>=0);
2778 emit_mov(sh,tl);
2779 if(imm[i]>32)
2780 {
2781 emit_sarimm(tl,imm[i]&31,tl);
2782 }
2783 }
2784 }
2785 }
2786}
2787
2788#ifndef shift_assemble
2789void shift_assemble(int i,struct regstat *i_regs)
2790{
2791 printf("Need shift_assemble for this architecture.\n");
2792 exit(1);
2793}
2794#endif
2795
2796void load_assemble(int i,struct regstat *i_regs)
2797{
2798 int s,th,tl,addr,map=-1;
2799 int offset;
2800 int jaddr=0;
5bf843dc 2801 int memtarget=0,c=0;
b1570849 2802 int fastload_reg_override=0;
57871462 2803 u_int hr,reglist=0;
2804 th=get_reg(i_regs->regmap,rt1[i]|64);
2805 tl=get_reg(i_regs->regmap,rt1[i]);
2806 s=get_reg(i_regs->regmap,rs1[i]);
2807 offset=imm[i];
2808 for(hr=0;hr<HOST_REGS;hr++) {
2809 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2810 }
2811 if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
2812 if(s>=0) {
2813 c=(i_regs->wasconst>>s)&1;
af4ee1fe 2814 if (c) {
2815 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
2816 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
2817 }
57871462 2818 }
57871462 2819 //printf("load_assemble: c=%d\n",c);
2820 //if(c) printf("load_assemble: const=%x\n",(int)constmap[i][s]+offset);
2821 // FIXME: Even if the load is a NOP, we should check for pagefaults...
5bf843dc 2822#ifdef PCSX
f18c0f46 2823 if(tl<0&&(!c||(((u_int)constmap[i][s]+offset)>>16)==0x1f80)
2824 ||rt1[i]==0) {
5bf843dc 2825 // could be FIFO, must perform the read
f18c0f46 2826 // ||dummy read
5bf843dc 2827 assem_debug("(forced read)\n");
2828 tl=get_reg(i_regs->regmap,-1);
2829 assert(tl>=0);
5bf843dc 2830 }
f18c0f46 2831#endif
5bf843dc 2832 if(offset||s<0||c) addr=tl;
2833 else addr=s;
535d208a 2834 //if(tl<0) tl=get_reg(i_regs->regmap,-1);
2835 if(tl>=0) {
2836 //printf("load_assemble: c=%d\n",c);
2837 //if(c) printf("load_assemble: const=%x\n",(int)constmap[i][s]+offset);
2838 assert(tl>=0); // Even if the load is a NOP, we must check for pagefaults and I/O
2839 reglist&=~(1<<tl);
2840 if(th>=0) reglist&=~(1<<th);
2841 if(!using_tlb) {
2842 if(!c) {
2843 #ifdef RAM_OFFSET
2844 map=get_reg(i_regs->regmap,ROREG);
2845 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
2846 #endif
57871462 2847//#define R29_HACK 1
535d208a 2848 #ifdef R29_HACK
2849 // Strmnnrmn's speed hack
2850 if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
2851 #endif
2852 {
ffb0b9e0 2853 jaddr=emit_fastpath_cmp_jump(i,addr,&fastload_reg_override);
57871462 2854 }
535d208a 2855 }
a327ad27 2856 else if(ram_offset&&memtarget) {
2857 emit_addimm(addr,ram_offset,HOST_TEMPREG);
2858 fastload_reg_override=HOST_TEMPREG;
2859 }
535d208a 2860 }else{ // using tlb
2861 int x=0;
2862 if (opcode[i]==0x20||opcode[i]==0x24) x=3; // LB/LBU
2863 if (opcode[i]==0x21||opcode[i]==0x25) x=2; // LH/LHU
2864 map=get_reg(i_regs->regmap,TLREG);
2865 assert(map>=0);
ea3d2e6e 2866 reglist&=~(1<<map);
535d208a 2867 map=do_tlb_r(addr,tl,map,x,-1,-1,c,constmap[i][s]+offset);
2868 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
2869 }
2870 int dummy=(rt1[i]==0)||(tl!=get_reg(i_regs->regmap,rt1[i])); // ignore loads to r0 and unneeded reg
2871 if (opcode[i]==0x20) { // LB
2872 if(!c||memtarget) {
2873 if(!dummy) {
57871462 2874 #ifdef HOST_IMM_ADDR32
2875 if(c)
2876 emit_movsbl_tlb((constmap[i][s]+offset)^3,map,tl);
2877 else
2878 #endif
2879 {
2880 //emit_xorimm(addr,3,tl);
2881 //gen_tlb_addr_r(tl,map);
2882 //emit_movsbl_indexed((int)rdram-0x80000000,tl,tl);
535d208a 2883 int x=0,a=tl;
2002a1db 2884#ifdef BIG_ENDIAN_MIPS
57871462 2885 if(!c) emit_xorimm(addr,3,tl);
2886 else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
2002a1db 2887#else
535d208a 2888 if(!c) a=addr;
dadf55f2 2889#endif
b1570849 2890 if(fastload_reg_override) a=fastload_reg_override;
2891
535d208a 2892 emit_movsbl_indexed_tlb(x,a,map,tl);
57871462 2893 }
57871462 2894 }
535d208a 2895 if(jaddr)
2896 add_stub(LOADB_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
57871462 2897 }
535d208a 2898 else
2899 inline_readstub(LOADB_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2900 }
2901 if (opcode[i]==0x21) { // LH
2902 if(!c||memtarget) {
2903 if(!dummy) {
57871462 2904 #ifdef HOST_IMM_ADDR32
2905 if(c)
2906 emit_movswl_tlb((constmap[i][s]+offset)^2,map,tl);
2907 else
2908 #endif
2909 {
535d208a 2910 int x=0,a=tl;
2002a1db 2911#ifdef BIG_ENDIAN_MIPS
57871462 2912 if(!c) emit_xorimm(addr,2,tl);
2913 else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
2002a1db 2914#else
535d208a 2915 if(!c) a=addr;
dadf55f2 2916#endif
b1570849 2917 if(fastload_reg_override) a=fastload_reg_override;
57871462 2918 //#ifdef
2919 //emit_movswl_indexed_tlb(x,tl,map,tl);
2920 //else
2921 if(map>=0) {
535d208a 2922 gen_tlb_addr_r(a,map);
2923 emit_movswl_indexed(x,a,tl);
2924 }else{
a327ad27 2925 #if 1 //def RAM_OFFSET
535d208a 2926 emit_movswl_indexed(x,a,tl);
2927 #else
2928 emit_movswl_indexed((int)rdram-0x80000000+x,a,tl);
2929 #endif
2930 }
57871462 2931 }
57871462 2932 }
535d208a 2933 if(jaddr)
2934 add_stub(LOADH_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
57871462 2935 }
535d208a 2936 else
2937 inline_readstub(LOADH_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2938 }
2939 if (opcode[i]==0x23) { // LW
2940 if(!c||memtarget) {
2941 if(!dummy) {
dadf55f2 2942 int a=addr;
b1570849 2943 if(fastload_reg_override) a=fastload_reg_override;
57871462 2944 //emit_readword_indexed((int)rdram-0x80000000,addr,tl);
2945 #ifdef HOST_IMM_ADDR32
2946 if(c)
2947 emit_readword_tlb(constmap[i][s]+offset,map,tl);
2948 else
2949 #endif
dadf55f2 2950 emit_readword_indexed_tlb(0,a,map,tl);
57871462 2951 }
535d208a 2952 if(jaddr)
2953 add_stub(LOADW_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
57871462 2954 }
535d208a 2955 else
2956 inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2957 }
2958 if (opcode[i]==0x24) { // LBU
2959 if(!c||memtarget) {
2960 if(!dummy) {
57871462 2961 #ifdef HOST_IMM_ADDR32
2962 if(c)
2963 emit_movzbl_tlb((constmap[i][s]+offset)^3,map,tl);
2964 else
2965 #endif
2966 {
2967 //emit_xorimm(addr,3,tl);
2968 //gen_tlb_addr_r(tl,map);
2969 //emit_movzbl_indexed((int)rdram-0x80000000,tl,tl);
535d208a 2970 int x=0,a=tl;
2002a1db 2971#ifdef BIG_ENDIAN_MIPS
57871462 2972 if(!c) emit_xorimm(addr,3,tl);
2973 else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
2002a1db 2974#else
535d208a 2975 if(!c) a=addr;
dadf55f2 2976#endif
b1570849 2977 if(fastload_reg_override) a=fastload_reg_override;
2978
535d208a 2979 emit_movzbl_indexed_tlb(x,a,map,tl);
57871462 2980 }
57871462 2981 }
535d208a 2982 if(jaddr)
2983 add_stub(LOADBU_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
57871462 2984 }
535d208a 2985 else
2986 inline_readstub(LOADBU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2987 }
2988 if (opcode[i]==0x25) { // LHU
2989 if(!c||memtarget) {
2990 if(!dummy) {
57871462 2991 #ifdef HOST_IMM_ADDR32
2992 if(c)
2993 emit_movzwl_tlb((constmap[i][s]+offset)^2,map,tl);
2994 else
2995 #endif
2996 {
535d208a 2997 int x=0,a=tl;
2002a1db 2998#ifdef BIG_ENDIAN_MIPS
57871462 2999 if(!c) emit_xorimm(addr,2,tl);
3000 else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
2002a1db 3001#else
535d208a 3002 if(!c) a=addr;
dadf55f2 3003#endif
b1570849 3004 if(fastload_reg_override) a=fastload_reg_override;
57871462 3005 //#ifdef
3006 //emit_movzwl_indexed_tlb(x,tl,map,tl);
3007 //#else
3008 if(map>=0) {
535d208a 3009 gen_tlb_addr_r(a,map);
3010 emit_movzwl_indexed(x,a,tl);
3011 }else{
a327ad27 3012 #if 1 //def RAM_OFFSET
535d208a 3013 emit_movzwl_indexed(x,a,tl);
3014 #else
3015 emit_movzwl_indexed((int)rdram-0x80000000+x,a,tl);
3016 #endif
3017 }
57871462 3018 }
3019 }
535d208a 3020 if(jaddr)
3021 add_stub(LOADHU_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
57871462 3022 }
535d208a 3023 else
3024 inline_readstub(LOADHU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
3025 }
3026 if (opcode[i]==0x27) { // LWU
3027 assert(th>=0);
3028 if(!c||memtarget) {
3029 if(!dummy) {
dadf55f2 3030 int a=addr;
b1570849 3031 if(fastload_reg_override) a=fastload_reg_override;
57871462 3032 //emit_readword_indexed((int)rdram-0x80000000,addr,tl);
3033 #ifdef HOST_IMM_ADDR32
3034 if(c)
3035 emit_readword_tlb(constmap[i][s]+offset,map,tl);
3036 else
3037 #endif
dadf55f2 3038 emit_readword_indexed_tlb(0,a,map,tl);
57871462 3039 }
535d208a 3040 if(jaddr)
3041 add_stub(LOADW_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
3042 }
3043 else {
3044 inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
57871462 3045 }
535d208a 3046 emit_zeroreg(th);
3047 }
3048 if (opcode[i]==0x37) { // LD
3049 if(!c||memtarget) {
3050 if(!dummy) {
dadf55f2 3051 int a=addr;
b1570849 3052 if(fastload_reg_override) a=fastload_reg_override;
57871462 3053 //gen_tlb_addr_r(tl,map);
3054 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,addr,th);
3055 //emit_readword_indexed((int)rdram-0x7FFFFFFC,addr,tl);
3056 #ifdef HOST_IMM_ADDR32
3057 if(c)
3058 emit_readdword_tlb(constmap[i][s]+offset,map,th,tl);
3059 else
3060 #endif
dadf55f2 3061 emit_readdword_indexed_tlb(0,a,map,th,tl);
57871462 3062 }
535d208a 3063 if(jaddr)
3064 add_stub(LOADD_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
57871462 3065 }
535d208a 3066 else
3067 inline_readstub(LOADD_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
57871462 3068 }
535d208a 3069 }
3070 //emit_storereg(rt1[i],tl); // DEBUG
57871462 3071 //if(opcode[i]==0x23)
3072 //if(opcode[i]==0x24)
3073 //if(opcode[i]==0x23||opcode[i]==0x24)
3074 /*if(opcode[i]==0x21||opcode[i]==0x23||opcode[i]==0x24)
3075 {
3076 //emit_pusha();
3077 save_regs(0x100f);
3078 emit_readword((int)&last_count,ECX);
3079 #ifdef __i386__
3080 if(get_reg(i_regs->regmap,CCREG)<0)
3081 emit_loadreg(CCREG,HOST_CCREG);
3082 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3083 emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG);
3084 emit_writeword(HOST_CCREG,(int)&Count);
3085 #endif
3086 #ifdef __arm__
3087 if(get_reg(i_regs->regmap,CCREG)<0)
3088 emit_loadreg(CCREG,0);
3089 else
3090 emit_mov(HOST_CCREG,0);
3091 emit_add(0,ECX,0);
3092 emit_addimm(0,2*ccadj[i],0);
3093 emit_writeword(0,(int)&Count);
3094 #endif
3095 emit_call((int)memdebug);
3096 //emit_popa();
3097 restore_regs(0x100f);
3098 }/**/
3099}
3100
3101#ifndef loadlr_assemble
3102void loadlr_assemble(int i,struct regstat *i_regs)
3103{
3104 printf("Need loadlr_assemble for this architecture.\n");
3105 exit(1);
3106}
3107#endif
3108
3109void store_assemble(int i,struct regstat *i_regs)
3110{
3111 int s,th,tl,map=-1;
3112 int addr,temp;
3113 int offset;
3114 int jaddr=0,jaddr2,type;
666a299d 3115 int memtarget=0,c=0;
57871462 3116 int agr=AGEN1+(i&1);
b1570849 3117 int faststore_reg_override=0;
57871462 3118 u_int hr,reglist=0;
3119 th=get_reg(i_regs->regmap,rs2[i]|64);
3120 tl=get_reg(i_regs->regmap,rs2[i]);
3121 s=get_reg(i_regs->regmap,rs1[i]);
3122 temp=get_reg(i_regs->regmap,agr);
3123 if(temp<0) temp=get_reg(i_regs->regmap,-1);
3124 offset=imm[i];
3125 if(s>=0) {
3126 c=(i_regs->wasconst>>s)&1;
af4ee1fe 3127 if(c) {
3128 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3129 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3130 }
57871462 3131 }
3132 assert(tl>=0);
3133 assert(temp>=0);
3134 for(hr=0;hr<HOST_REGS;hr++) {
3135 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3136 }
3137 if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
3138 if(offset||s<0||c) addr=temp;
3139 else addr=s;
3140 if(!using_tlb) {
3141 if(!c) {
ffb0b9e0 3142 #ifndef PCSX
57871462 3143 #ifdef R29_HACK
3144 // Strmnnrmn's speed hack
4cb76aa4 3145 if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
57871462 3146 #endif
4cb76aa4 3147 emit_cmpimm(addr,RAM_SIZE);
57871462 3148 #ifdef DESTRUCTIVE_SHIFT
3149 if(s==addr) emit_mov(s,temp);
3150 #endif
3151 #ifdef R29_HACK
dadf55f2 3152 memtarget=1;
4cb76aa4 3153 if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
57871462 3154 #endif
3155 {
3156 jaddr=(int)out;
3157 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3158 // Hint to branch predictor that the branch is unlikely to be taken
3159 if(rs1[i]>=28)
3160 emit_jno_unlikely(0);
3161 else
3162 #endif
3163 emit_jno(0);
3164 }
ffb0b9e0 3165 #else
3166 jaddr=emit_fastpath_cmp_jump(i,addr,&faststore_reg_override);
3167 #endif
57871462 3168 }
a327ad27 3169 else if(ram_offset&&memtarget) {
3170 emit_addimm(addr,ram_offset,HOST_TEMPREG);
3171 faststore_reg_override=HOST_TEMPREG;
3172 }
57871462 3173 }else{ // using tlb
3174 int x=0;
3175 if (opcode[i]==0x28) x=3; // SB
3176 if (opcode[i]==0x29) x=2; // SH
3177 map=get_reg(i_regs->regmap,TLREG);
3178 assert(map>=0);
ea3d2e6e 3179 reglist&=~(1<<map);
57871462 3180 map=do_tlb_w(addr,temp,map,x,c,constmap[i][s]+offset);
3181 do_tlb_w_branch(map,c,constmap[i][s]+offset,&jaddr);
3182 }
3183
3184 if (opcode[i]==0x28) { // SB
3185 if(!c||memtarget) {
97a238a6 3186 int x=0,a=temp;
2002a1db 3187#ifdef BIG_ENDIAN_MIPS
57871462 3188 if(!c) emit_xorimm(addr,3,temp);
3189 else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
2002a1db 3190#else
97a238a6 3191 if(!c) a=addr;
dadf55f2 3192#endif
b1570849 3193 if(faststore_reg_override) a=faststore_reg_override;
57871462 3194 //gen_tlb_addr_w(temp,map);
3195 //emit_writebyte_indexed(tl,(int)rdram-0x80000000,temp);
97a238a6 3196 emit_writebyte_indexed_tlb(tl,x,a,map,a);
57871462 3197 }
3198 type=STOREB_STUB;
3199 }
3200 if (opcode[i]==0x29) { // SH
3201 if(!c||memtarget) {
97a238a6 3202 int x=0,a=temp;
2002a1db 3203#ifdef BIG_ENDIAN_MIPS
57871462 3204 if(!c) emit_xorimm(addr,2,temp);
3205 else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
2002a1db 3206#else
97a238a6 3207 if(!c) a=addr;
dadf55f2 3208#endif
b1570849 3209 if(faststore_reg_override) a=faststore_reg_override;
57871462 3210 //#ifdef
3211 //emit_writehword_indexed_tlb(tl,x,temp,map,temp);
3212 //#else
3213 if(map>=0) {
97a238a6 3214 gen_tlb_addr_w(a,map);
3215 emit_writehword_indexed(tl,x,a);
57871462 3216 }else
a327ad27 3217 //emit_writehword_indexed(tl,(int)rdram-0x80000000+x,a);
3218 emit_writehword_indexed(tl,x,a);
57871462 3219 }
3220 type=STOREH_STUB;
3221 }
3222 if (opcode[i]==0x2B) { // SW
dadf55f2 3223 if(!c||memtarget) {
3224 int a=addr;
b1570849 3225 if(faststore_reg_override) a=faststore_reg_override;
57871462 3226 //emit_writeword_indexed(tl,(int)rdram-0x80000000,addr);
dadf55f2 3227 emit_writeword_indexed_tlb(tl,0,a,map,temp);
3228 }
57871462 3229 type=STOREW_STUB;
3230 }
3231 if (opcode[i]==0x3F) { // SD
3232 if(!c||memtarget) {
dadf55f2 3233 int a=addr;
b1570849 3234 if(faststore_reg_override) a=faststore_reg_override;
57871462 3235 if(rs2[i]) {
3236 assert(th>=0);
3237 //emit_writeword_indexed(th,(int)rdram-0x80000000,addr);
3238 //emit_writeword_indexed(tl,(int)rdram-0x7FFFFFFC,addr);
dadf55f2 3239 emit_writedword_indexed_tlb(th,tl,0,a,map,temp);
57871462 3240 }else{
3241 // Store zero
3242 //emit_writeword_indexed(tl,(int)rdram-0x80000000,temp);
3243 //emit_writeword_indexed(tl,(int)rdram-0x7FFFFFFC,temp);
dadf55f2 3244 emit_writedword_indexed_tlb(tl,tl,0,a,map,temp);
57871462 3245 }
3246 }
3247 type=STORED_STUB;
3248 }
b96d3df7 3249#ifdef PCSX
3250 if(jaddr) {
3251 // PCSX store handlers don't check invcode again
3252 reglist|=1<<addr;
3253 add_stub(type,jaddr,(int)out,i,addr,(int)i_regs,c