libretro/ios: enable dynarec, update target names
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / new_dynarec.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - new_dynarec.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
57871462 4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21#include <stdlib.h>
22#include <stdint.h> //include for uint64_t
23#include <assert.h>
4600ba03 24#include <sys/mman.h>
57871462 25
3d624f89 26#include "emu_if.h" //emulator interface
57871462 27
4600ba03 28//#define DISASM
29//#define assem_debug printf
30//#define inv_debug printf
31#define assem_debug(...)
32#define inv_debug(...)
57871462 33
34#ifdef __i386__
35#include "assem_x86.h"
36#endif
37#ifdef __x86_64__
38#include "assem_x64.h"
39#endif
40#ifdef __arm__
41#include "assem_arm.h"
42#endif
43
f23d3386 44#ifdef __BLACKBERRY_QNX__
a4874585
C
45#undef __clear_cache
46#define __clear_cache(start,end) msync(start, (size_t)((void*)end - (void*)start), MS_SYNC | MS_CACHE_ONLY | MS_INVALIDATE_ICACHE);
c7b746f0 47#elif defined(__MACH__)
48#include <libkern/OSCacheControl.h>
49#define __clear_cache mach_clear_cache
50static void __clear_cache(void *start, void *end) {
51 size_t len = (char *)end - (char *)start;
52 sys_dcache_flush(start, len);
53 sys_icache_invalidate(start, len);
54}
f23d3386 55#endif
a4874585 56
57871462 57#define MAXBLOCK 4096
58#define MAX_OUTPUT_BLOCK_SIZE 262144
2573466a 59
57871462 60struct regstat
61{
62 signed char regmap_entry[HOST_REGS];
63 signed char regmap[HOST_REGS];
64 uint64_t was32;
65 uint64_t is32;
66 uint64_t wasdirty;
67 uint64_t dirty;
68 uint64_t u;
69 uint64_t uu;
70 u_int wasconst;
71 u_int isconst;
8575a877 72 u_int loadedconst; // host regs that have constants loaded
73 u_int waswritten; // MIPS regs that were used as store base before
57871462 74};
75
76struct ll_entry
77{
78 u_int vaddr;
79 u_int reg32;
80 void *addr;
81 struct ll_entry *next;
82};
83
84 u_int start;
85 u_int *source;
86 u_int pagelimit;
87 char insn[MAXBLOCK][10];
88 u_char itype[MAXBLOCK];
89 u_char opcode[MAXBLOCK];
90 u_char opcode2[MAXBLOCK];
91 u_char bt[MAXBLOCK];
92 u_char rs1[MAXBLOCK];
93 u_char rs2[MAXBLOCK];
94 u_char rt1[MAXBLOCK];
95 u_char rt2[MAXBLOCK];
96 u_char us1[MAXBLOCK];
97 u_char us2[MAXBLOCK];
98 u_char dep1[MAXBLOCK];
99 u_char dep2[MAXBLOCK];
100 u_char lt1[MAXBLOCK];
bedfea38 101 static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs
102 static uint64_t gte_rt[MAXBLOCK];
103 static uint64_t gte_unneeded[MAXBLOCK];
ffb0b9e0 104 static u_int smrv[32]; // speculated MIPS register values
105 static u_int smrv_strong; // mask or regs that are likely to have correct values
106 static u_int smrv_weak; // same, but somewhat less likely
107 static u_int smrv_strong_next; // same, but after current insn executes
108 static u_int smrv_weak_next;
57871462 109 int imm[MAXBLOCK];
110 u_int ba[MAXBLOCK];
111 char likely[MAXBLOCK];
112 char is_ds[MAXBLOCK];
e1190b87 113 char ooo[MAXBLOCK];
57871462 114 uint64_t unneeded_reg[MAXBLOCK];
115 uint64_t unneeded_reg_upper[MAXBLOCK];
116 uint64_t branch_unneeded_reg[MAXBLOCK];
117 uint64_t branch_unneeded_reg_upper[MAXBLOCK];
118 uint64_t p32[MAXBLOCK];
119 uint64_t pr32[MAXBLOCK];
120 signed char regmap_pre[MAXBLOCK][HOST_REGS];
956f3129 121 static uint64_t current_constmap[HOST_REGS];
122 static uint64_t constmap[MAXBLOCK][HOST_REGS];
123 static struct regstat regs[MAXBLOCK];
124 static struct regstat branch_regs[MAXBLOCK];
e1190b87 125 signed char minimum_free_regs[MAXBLOCK];
57871462 126 u_int needed_reg[MAXBLOCK];
127 uint64_t requires_32bit[MAXBLOCK];
128 u_int wont_dirty[MAXBLOCK];
129 u_int will_dirty[MAXBLOCK];
130 int ccadj[MAXBLOCK];
131 int slen;
132 u_int instr_addr[MAXBLOCK];
133 u_int link_addr[MAXBLOCK][3];
134 int linkcount;
135 u_int stubs[MAXBLOCK*3][8];
136 int stubcount;
137 u_int literals[1024][2];
138 int literalcount;
139 int is_delayslot;
140 int cop1_usable;
141 u_char *out;
142 struct ll_entry *jump_in[4096];
143 struct ll_entry *jump_out[4096];
144 struct ll_entry *jump_dirty[4096];
145 u_int hash_table[65536][4] __attribute__((aligned(16)));
146 char shadow[1048576] __attribute__((aligned(16)));
147 void *copy;
148 int expirep;
af4ee1fe 149#ifndef PCSX
57871462 150 u_int using_tlb;
af4ee1fe 151#else
152 static const u_int using_tlb=0;
153#endif
2f546f9a 154 int new_dynarec_did_compile;
0ff8c62c 155 int new_dynarec_hacks;
57871462 156 u_int stop_after_jal;
a327ad27 157#ifndef RAM_FIXED
158 static u_int ram_offset;
159#else
160 static const u_int ram_offset=0;
161#endif
57871462 162 extern u_char restore_candidate[512];
163 extern int cycle_count;
164
165 /* registers that may be allocated */
166 /* 1-31 gpr */
167#define HIREG 32 // hi
168#define LOREG 33 // lo
169#define FSREG 34 // FPU status (FCSR)
170#define CSREG 35 // Coprocessor status
171#define CCREG 36 // Cycle count
172#define INVCP 37 // Pointer to invalid_code
619e5ded 173#define MMREG 38 // Pointer to memory_map
174#define ROREG 39 // ram offset (if rdram!=0x80000000)
175#define TEMPREG 40
176#define FTEMP 40 // FPU temporary register
177#define PTEMP 41 // Prefetch temporary register
178#define TLREG 42 // TLB mapping offset
179#define RHASH 43 // Return address hash
180#define RHTBL 44 // Return address hash table address
181#define RTEMP 45 // JR/JALR address register
182#define MAXREG 45
183#define AGEN1 46 // Address generation temporary register
184#define AGEN2 47 // Address generation temporary register
185#define MGEN1 48 // Maptable address generation temporary register
186#define MGEN2 49 // Maptable address generation temporary register
187#define BTREG 50 // Branch target temporary register
57871462 188
189 /* instruction types */
190#define NOP 0 // No operation
191#define LOAD 1 // Load
192#define STORE 2 // Store
193#define LOADLR 3 // Unaligned load
194#define STORELR 4 // Unaligned store
195#define MOV 5 // Move
196#define ALU 6 // Arithmetic/logic
197#define MULTDIV 7 // Multiply/divide
198#define SHIFT 8 // Shift by register
199#define SHIFTIMM 9// Shift by immediate
200#define IMM16 10 // 16-bit immediate
201#define RJUMP 11 // Unconditional jump to register
202#define UJUMP 12 // Unconditional jump
203#define CJUMP 13 // Conditional branch (BEQ/BNE/BGTZ/BLEZ)
204#define SJUMP 14 // Conditional branch (regimm format)
205#define COP0 15 // Coprocessor 0
206#define COP1 16 // Coprocessor 1
207#define C1LS 17 // Coprocessor 1 load/store
208#define FJUMP 18 // Conditional branch (floating point)
209#define FLOAT 19 // Floating point unit
210#define FCONV 20 // Convert integer to float
211#define FCOMP 21 // Floating point compare (sets FSREG)
212#define SYSCALL 22// SYSCALL
213#define OTHER 23 // Other
214#define SPAN 24 // Branch/delay slot spans 2 pages
215#define NI 25 // Not implemented
7139f3c8 216#define HLECALL 26// PCSX fake opcodes for HLE
b9b61529 217#define COP2 27 // Coprocessor 2 move
218#define C2LS 28 // Coprocessor 2 load/store
219#define C2OP 29 // Coprocessor 2 operation
1e973cb0 220#define INTCALL 30// Call interpreter to handle rare corner cases
57871462 221
222 /* stubs */
223#define CC_STUB 1
224#define FP_STUB 2
225#define LOADB_STUB 3
226#define LOADH_STUB 4
227#define LOADW_STUB 5
228#define LOADD_STUB 6
229#define LOADBU_STUB 7
230#define LOADHU_STUB 8
231#define STOREB_STUB 9
232#define STOREH_STUB 10
233#define STOREW_STUB 11
234#define STORED_STUB 12
235#define STORELR_STUB 13
236#define INVCODE_STUB 14
237
238 /* branch codes */
239#define TAKEN 1
240#define NOTTAKEN 2
241#define NULLDS 3
242
243// asm linkage
244int new_recompile_block(int addr);
245void *get_addr_ht(u_int vaddr);
246void invalidate_block(u_int block);
247void invalidate_addr(u_int addr);
248void remove_hash(int vaddr);
249void jump_vaddr();
250void dyna_linker();
251void dyna_linker_ds();
252void verify_code();
253void verify_code_vm();
254void verify_code_ds();
255void cc_interrupt();
256void fp_exception();
257void fp_exception_ds();
258void jump_syscall();
7139f3c8 259void jump_syscall_hle();
57871462 260void jump_eret();
7139f3c8 261void jump_hlecall();
1e973cb0 262void jump_intcall();
7139f3c8 263void new_dyna_leave();
57871462 264
265// TLB
266void TLBWI_new();
267void TLBWR_new();
268void read_nomem_new();
269void read_nomemb_new();
270void read_nomemh_new();
271void read_nomemd_new();
272void write_nomem_new();
273void write_nomemb_new();
274void write_nomemh_new();
275void write_nomemd_new();
276void write_rdram_new();
277void write_rdramb_new();
278void write_rdramh_new();
279void write_rdramd_new();
280extern u_int memory_map[1048576];
281
282// Needed by assembler
283void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32);
284void wb_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty);
285void wb_needed_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr);
286void load_all_regs(signed char i_regmap[]);
287void load_needed_regs(signed char i_regmap[],signed char next_regmap[]);
288void load_regs_entry(int t);
289void load_all_consts(signed char regmap[],int is32,u_int dirty,int i);
290
291int tracedebug=0;
292
293//#define DEBUG_CYCLE_COUNT 1
294
b6e87b2b 295#define NO_CYCLE_PENALTY_THR 12
296
4e9dcd7f 297int cycle_multiplier; // 100 for 1.0
298
299static int CLOCK_ADJUST(int x)
300{
301 int s=(x>>31)|1;
302 return (x * cycle_multiplier + s * 50) / 100;
303}
304
94d23bb9 305static void tlb_hacks()
57871462 306{
94d23bb9 307#ifndef DISABLE_TLB
57871462 308 // Goldeneye hack
309 if (strncmp((char *) ROM_HEADER->nom, "GOLDENEYE",9) == 0)
310 {
311 u_int addr;
312 int n;
313 switch (ROM_HEADER->Country_code&0xFF)
314 {
315 case 0x45: // U
316 addr=0x34b30;
317 break;
318 case 0x4A: // J
319 addr=0x34b70;
320 break;
321 case 0x50: // E
322 addr=0x329f0;
323 break;
324 default:
325 // Unknown country code
326 addr=0;
327 break;
328 }
329 u_int rom_addr=(u_int)rom;
330 #ifdef ROM_COPY
331 // Since memory_map is 32-bit, on 64-bit systems the rom needs to be
332 // in the lower 4G of memory to use this hack. Copy it if necessary.
333 if((void *)rom>(void *)0xffffffff) {
334 munmap(ROM_COPY, 67108864);
335 if(mmap(ROM_COPY, 12582912,
336 PROT_READ | PROT_WRITE,
337 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
338 -1, 0) <= 0) {printf("mmap() failed\n");}
339 memcpy(ROM_COPY,rom,12582912);
340 rom_addr=(u_int)ROM_COPY;
341 }
342 #endif
343 if(addr) {
344 for(n=0x7F000;n<0x80000;n++) {
345 memory_map[n]=(((u_int)(rom_addr+addr-0x7F000000))>>2)|0x40000000;
346 }
347 }
348 }
94d23bb9 349#endif
57871462 350}
351
94d23bb9 352static u_int get_page(u_int vaddr)
57871462 353{
0ce47d46 354#ifndef PCSX
57871462 355 u_int page=(vaddr^0x80000000)>>12;
0ce47d46 356#else
357 u_int page=vaddr&~0xe0000000;
358 if (page < 0x1000000)
359 page &= ~0x0e00000; // RAM mirrors
360 page>>=12;
361#endif
94d23bb9 362#ifndef DISABLE_TLB
57871462 363 if(page>262143&&tlb_LUT_r[vaddr>>12]) page=(tlb_LUT_r[vaddr>>12]^0x80000000)>>12;
94d23bb9 364#endif
57871462 365 if(page>2048) page=2048+(page&2047);
94d23bb9 366 return page;
367}
368
d25604ca 369#ifndef PCSX
94d23bb9 370static u_int get_vpage(u_int vaddr)
371{
372 u_int vpage=(vaddr^0x80000000)>>12;
373#ifndef DISABLE_TLB
57871462 374 if(vpage>262143&&tlb_LUT_r[vaddr>>12]) vpage&=2047; // jump_dirty uses a hash of the virtual address instead
94d23bb9 375#endif
57871462 376 if(vpage>2048) vpage=2048+(vpage&2047);
94d23bb9 377 return vpage;
378}
d25604ca 379#else
380// no virtual mem in PCSX
381static u_int get_vpage(u_int vaddr)
382{
383 return get_page(vaddr);
384}
385#endif
94d23bb9 386
387// Get address from virtual address
388// This is called from the recompiled JR/JALR instructions
389void *get_addr(u_int vaddr)
390{
391 u_int page=get_page(vaddr);
392 u_int vpage=get_vpage(vaddr);
57871462 393 struct ll_entry *head;
394 //printf("TRACE: count=%d next=%d (get_addr %x,page %d)\n",Count,next_interupt,vaddr,page);
395 head=jump_in[page];
396 while(head!=NULL) {
397 if(head->vaddr==vaddr&&head->reg32==0) {
398 //printf("TRACE: count=%d next=%d (get_addr match %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr);
399 int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
400 ht_bin[3]=ht_bin[1];
401 ht_bin[2]=ht_bin[0];
402 ht_bin[1]=(int)head->addr;
403 ht_bin[0]=vaddr;
404 return head->addr;
405 }
406 head=head->next;
407 }
408 head=jump_dirty[vpage];
409 while(head!=NULL) {
410 if(head->vaddr==vaddr&&head->reg32==0) {
411 //printf("TRACE: count=%d next=%d (get_addr match dirty %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr);
412 // Don't restore blocks which are about to expire from the cache
413 if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
414 if(verify_dirty(head->addr)) {
415 //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]);
416 invalid_code[vaddr>>12]=0;
9be4ba64 417 inv_code_start=inv_code_end=~0;
63cb0298 418#ifndef DISABLE_TLB
57871462 419 memory_map[vaddr>>12]|=0x40000000;
63cb0298 420#endif
57871462 421 if(vpage<2048) {
94d23bb9 422#ifndef DISABLE_TLB
57871462 423 if(tlb_LUT_r[vaddr>>12]) {
424 invalid_code[tlb_LUT_r[vaddr>>12]>>12]=0;
425 memory_map[tlb_LUT_r[vaddr>>12]>>12]|=0x40000000;
426 }
94d23bb9 427#endif
57871462 428 restore_candidate[vpage>>3]|=1<<(vpage&7);
429 }
430 else restore_candidate[page>>3]|=1<<(page&7);
431 int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
432 if(ht_bin[0]==vaddr) {
433 ht_bin[1]=(int)head->addr; // Replace existing entry
434 }
435 else
436 {
437 ht_bin[3]=ht_bin[1];
438 ht_bin[2]=ht_bin[0];
439 ht_bin[1]=(int)head->addr;
440 ht_bin[0]=vaddr;
441 }
442 return head->addr;
443 }
444 }
445 head=head->next;
446 }
447 //printf("TRACE: count=%d next=%d (get_addr no-match %x)\n",Count,next_interupt,vaddr);
448 int r=new_recompile_block(vaddr);
449 if(r==0) return get_addr(vaddr);
450 // Execute in unmapped page, generate pagefault execption
451 Status|=2;
452 Cause=(vaddr<<31)|0x8;
453 EPC=(vaddr&1)?vaddr-5:vaddr;
454 BadVAddr=(vaddr&~1);
455 Context=(Context&0xFF80000F)|((BadVAddr>>9)&0x007FFFF0);
456 EntryHi=BadVAddr&0xFFFFE000;
457 return get_addr_ht(0x80000000);
458}
459// Look up address in hash table first
460void *get_addr_ht(u_int vaddr)
461{
462 //printf("TRACE: count=%d next=%d (get_addr_ht %x)\n",Count,next_interupt,vaddr);
463 int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
464 if(ht_bin[0]==vaddr) return (void *)ht_bin[1];
465 if(ht_bin[2]==vaddr) return (void *)ht_bin[3];
466 return get_addr(vaddr);
467}
468
469void *get_addr_32(u_int vaddr,u_int flags)
470{
7139f3c8 471#ifdef FORCE32
472 return get_addr(vaddr);
560e4a12 473#else
57871462 474 //printf("TRACE: count=%d next=%d (get_addr_32 %x,flags %x)\n",Count,next_interupt,vaddr,flags);
475 int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
476 if(ht_bin[0]==vaddr) return (void *)ht_bin[1];
477 if(ht_bin[2]==vaddr) return (void *)ht_bin[3];
94d23bb9 478 u_int page=get_page(vaddr);
479 u_int vpage=get_vpage(vaddr);
57871462 480 struct ll_entry *head;
481 head=jump_in[page];
482 while(head!=NULL) {
483 if(head->vaddr==vaddr&&(head->reg32&flags)==0) {
484 //printf("TRACE: count=%d next=%d (get_addr_32 match %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr);
485 if(head->reg32==0) {
486 int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
487 if(ht_bin[0]==-1) {
488 ht_bin[1]=(int)head->addr;
489 ht_bin[0]=vaddr;
490 }else if(ht_bin[2]==-1) {
491 ht_bin[3]=(int)head->addr;
492 ht_bin[2]=vaddr;
493 }
494 //ht_bin[3]=ht_bin[1];
495 //ht_bin[2]=ht_bin[0];
496 //ht_bin[1]=(int)head->addr;
497 //ht_bin[0]=vaddr;
498 }
499 return head->addr;
500 }
501 head=head->next;
502 }
503 head=jump_dirty[vpage];
504 while(head!=NULL) {
505 if(head->vaddr==vaddr&&(head->reg32&flags)==0) {
506 //printf("TRACE: count=%d next=%d (get_addr_32 match dirty %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr);
507 // Don't restore blocks which are about to expire from the cache
508 if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
509 if(verify_dirty(head->addr)) {
510 //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]);
511 invalid_code[vaddr>>12]=0;
9be4ba64 512 inv_code_start=inv_code_end=~0;
57871462 513 memory_map[vaddr>>12]|=0x40000000;
514 if(vpage<2048) {
94d23bb9 515#ifndef DISABLE_TLB
57871462 516 if(tlb_LUT_r[vaddr>>12]) {
517 invalid_code[tlb_LUT_r[vaddr>>12]>>12]=0;
518 memory_map[tlb_LUT_r[vaddr>>12]>>12]|=0x40000000;
519 }
94d23bb9 520#endif
57871462 521 restore_candidate[vpage>>3]|=1<<(vpage&7);
522 }
523 else restore_candidate[page>>3]|=1<<(page&7);
524 if(head->reg32==0) {
525 int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
526 if(ht_bin[0]==-1) {
527 ht_bin[1]=(int)head->addr;
528 ht_bin[0]=vaddr;
529 }else if(ht_bin[2]==-1) {
530 ht_bin[3]=(int)head->addr;
531 ht_bin[2]=vaddr;
532 }
533 //ht_bin[3]=ht_bin[1];
534 //ht_bin[2]=ht_bin[0];
535 //ht_bin[1]=(int)head->addr;
536 //ht_bin[0]=vaddr;
537 }
538 return head->addr;
539 }
540 }
541 head=head->next;
542 }
543 //printf("TRACE: count=%d next=%d (get_addr_32 no-match %x,flags %x)\n",Count,next_interupt,vaddr,flags);
544 int r=new_recompile_block(vaddr);
545 if(r==0) return get_addr(vaddr);
546 // Execute in unmapped page, generate pagefault execption
547 Status|=2;
548 Cause=(vaddr<<31)|0x8;
549 EPC=(vaddr&1)?vaddr-5:vaddr;
550 BadVAddr=(vaddr&~1);
551 Context=(Context&0xFF80000F)|((BadVAddr>>9)&0x007FFFF0);
552 EntryHi=BadVAddr&0xFFFFE000;
553 return get_addr_ht(0x80000000);
560e4a12 554#endif
57871462 555}
556
557void clear_all_regs(signed char regmap[])
558{
559 int hr;
560 for (hr=0;hr<HOST_REGS;hr++) regmap[hr]=-1;
561}
562
563signed char get_reg(signed char regmap[],int r)
564{
565 int hr;
566 for (hr=0;hr<HOST_REGS;hr++) if(hr!=EXCLUDE_REG&&regmap[hr]==r) return hr;
567 return -1;
568}
569
570// Find a register that is available for two consecutive cycles
571signed char get_reg2(signed char regmap1[],signed char regmap2[],int r)
572{
573 int hr;
574 for (hr=0;hr<HOST_REGS;hr++) if(hr!=EXCLUDE_REG&&regmap1[hr]==r&&regmap2[hr]==r) return hr;
575 return -1;
576}
577
578int count_free_regs(signed char regmap[])
579{
580 int count=0;
581 int hr;
582 for(hr=0;hr<HOST_REGS;hr++)
583 {
584 if(hr!=EXCLUDE_REG) {
585 if(regmap[hr]<0) count++;
586 }
587 }
588 return count;
589}
590
591void dirty_reg(struct regstat *cur,signed char reg)
592{
593 int hr;
594 if(!reg) return;
595 for (hr=0;hr<HOST_REGS;hr++) {
596 if((cur->regmap[hr]&63)==reg) {
597 cur->dirty|=1<<hr;
598 }
599 }
600}
601
602// If we dirty the lower half of a 64 bit register which is now being
603// sign-extended, we need to dump the upper half.
604// Note: Do this only after completion of the instruction, because
605// some instructions may need to read the full 64-bit value even if
606// overwriting it (eg SLTI, DSRA32).
607static void flush_dirty_uppers(struct regstat *cur)
608{
609 int hr,reg;
610 for (hr=0;hr<HOST_REGS;hr++) {
611 if((cur->dirty>>hr)&1) {
612 reg=cur->regmap[hr];
613 if(reg>=64)
614 if((cur->is32>>(reg&63))&1) cur->regmap[hr]=-1;
615 }
616 }
617}
618
619void set_const(struct regstat *cur,signed char reg,uint64_t value)
620{
621 int hr;
622 if(!reg) return;
623 for (hr=0;hr<HOST_REGS;hr++) {
624 if(cur->regmap[hr]==reg) {
625 cur->isconst|=1<<hr;
956f3129 626 current_constmap[hr]=value;
57871462 627 }
628 else if((cur->regmap[hr]^64)==reg) {
629 cur->isconst|=1<<hr;
956f3129 630 current_constmap[hr]=value>>32;
57871462 631 }
632 }
633}
634
635void clear_const(struct regstat *cur,signed char reg)
636{
637 int hr;
638 if(!reg) return;
639 for (hr=0;hr<HOST_REGS;hr++) {
640 if((cur->regmap[hr]&63)==reg) {
641 cur->isconst&=~(1<<hr);
642 }
643 }
644}
645
646int is_const(struct regstat *cur,signed char reg)
647{
648 int hr;
79c75f1b 649 if(reg<0) return 0;
57871462 650 if(!reg) return 1;
651 for (hr=0;hr<HOST_REGS;hr++) {
652 if((cur->regmap[hr]&63)==reg) {
653 return (cur->isconst>>hr)&1;
654 }
655 }
656 return 0;
657}
658uint64_t get_const(struct regstat *cur,signed char reg)
659{
660 int hr;
661 if(!reg) return 0;
662 for (hr=0;hr<HOST_REGS;hr++) {
663 if(cur->regmap[hr]==reg) {
956f3129 664 return current_constmap[hr];
57871462 665 }
666 }
c43b5311 667 SysPrintf("Unknown constant in r%d\n",reg);
57871462 668 exit(1);
669}
670
671// Least soon needed registers
672// Look at the next ten instructions and see which registers
673// will be used. Try not to reallocate these.
674void lsn(u_char hsn[], int i, int *preferred_reg)
675{
676 int j;
677 int b=-1;
678 for(j=0;j<9;j++)
679 {
680 if(i+j>=slen) {
681 j=slen-i-1;
682 break;
683 }
684 if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
685 {
686 // Don't go past an unconditonal jump
687 j++;
688 break;
689 }
690 }
691 for(;j>=0;j--)
692 {
693 if(rs1[i+j]) hsn[rs1[i+j]]=j;
694 if(rs2[i+j]) hsn[rs2[i+j]]=j;
695 if(rt1[i+j]) hsn[rt1[i+j]]=j;
696 if(rt2[i+j]) hsn[rt2[i+j]]=j;
697 if(itype[i+j]==STORE || itype[i+j]==STORELR) {
698 // Stores can allocate zero
699 hsn[rs1[i+j]]=j;
700 hsn[rs2[i+j]]=j;
701 }
702 // On some architectures stores need invc_ptr
703 #if defined(HOST_IMM8)
b9b61529 704 if(itype[i+j]==STORE || itype[i+j]==STORELR || (opcode[i+j]&0x3b)==0x39 || (opcode[i+j]&0x3b)==0x3a) {
57871462 705 hsn[INVCP]=j;
706 }
707 #endif
708 if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP||itype[i+j]==FJUMP))
709 {
710 hsn[CCREG]=j;
711 b=j;
712 }
713 }
714 if(b>=0)
715 {
716 if(ba[i+b]>=start && ba[i+b]<(start+slen*4))
717 {
718 // Follow first branch
719 int t=(ba[i+b]-start)>>2;
720 j=7-b;if(t+j>=slen) j=slen-t-1;
721 for(;j>=0;j--)
722 {
723 if(rs1[t+j]) if(hsn[rs1[t+j]]>j+b+2) hsn[rs1[t+j]]=j+b+2;
724 if(rs2[t+j]) if(hsn[rs2[t+j]]>j+b+2) hsn[rs2[t+j]]=j+b+2;
725 //if(rt1[t+j]) if(hsn[rt1[t+j]]>j+b+2) hsn[rt1[t+j]]=j+b+2;
726 //if(rt2[t+j]) if(hsn[rt2[t+j]]>j+b+2) hsn[rt2[t+j]]=j+b+2;
727 }
728 }
729 // TODO: preferred register based on backward branch
730 }
731 // Delay slot should preferably not overwrite branch conditions or cycle count
732 if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP)) {
733 if(rs1[i-1]) if(hsn[rs1[i-1]]>1) hsn[rs1[i-1]]=1;
734 if(rs2[i-1]) if(hsn[rs2[i-1]]>1) hsn[rs2[i-1]]=1;
735 hsn[CCREG]=1;
736 // ...or hash tables
737 hsn[RHASH]=1;
738 hsn[RHTBL]=1;
739 }
740 // Coprocessor load/store needs FTEMP, even if not declared
b9b61529 741 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 742 hsn[FTEMP]=0;
743 }
744 // Load L/R also uses FTEMP as a temporary register
745 if(itype[i]==LOADLR) {
746 hsn[FTEMP]=0;
747 }
b7918751 748 // Also SWL/SWR/SDL/SDR
749 if(opcode[i]==0x2a||opcode[i]==0x2e||opcode[i]==0x2c||opcode[i]==0x2d) {
57871462 750 hsn[FTEMP]=0;
751 }
752 // Don't remove the TLB registers either
b9b61529 753 if(itype[i]==LOAD || itype[i]==LOADLR || itype[i]==STORE || itype[i]==STORELR || itype[i]==C1LS || itype[i]==C2LS) {
57871462 754 hsn[TLREG]=0;
755 }
756 // Don't remove the miniht registers
757 if(itype[i]==UJUMP||itype[i]==RJUMP)
758 {
759 hsn[RHASH]=0;
760 hsn[RHTBL]=0;
761 }
762}
763
764// We only want to allocate registers if we're going to use them again soon
765int needed_again(int r, int i)
766{
767 int j;
768 int b=-1;
769 int rn=10;
57871462 770
771 if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000))
772 {
773 if(ba[i-1]<start || ba[i-1]>start+slen*4-4)
774 return 0; // Don't need any registers if exiting the block
775 }
776 for(j=0;j<9;j++)
777 {
778 if(i+j>=slen) {
779 j=slen-i-1;
780 break;
781 }
782 if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
783 {
784 // Don't go past an unconditonal jump
785 j++;
786 break;
787 }
1e973cb0 788 if(itype[i+j]==SYSCALL||itype[i+j]==HLECALL||itype[i+j]==INTCALL||((source[i+j]&0xfc00003f)==0x0d))
57871462 789 {
790 break;
791 }
792 }
793 for(;j>=1;j--)
794 {
795 if(rs1[i+j]==r) rn=j;
796 if(rs2[i+j]==r) rn=j;
797 if((unneeded_reg[i+j]>>r)&1) rn=10;
798 if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP||itype[i+j]==FJUMP))
799 {
800 b=j;
801 }
802 }
803 /*
804 if(b>=0)
805 {
806 if(ba[i+b]>=start && ba[i+b]<(start+slen*4))
807 {
808 // Follow first branch
809 int o=rn;
810 int t=(ba[i+b]-start)>>2;
811 j=7-b;if(t+j>=slen) j=slen-t-1;
812 for(;j>=0;j--)
813 {
814 if(!((unneeded_reg[t+j]>>r)&1)) {
815 if(rs1[t+j]==r) if(rn>j+b+2) rn=j+b+2;
816 if(rs2[t+j]==r) if(rn>j+b+2) rn=j+b+2;
817 }
818 else rn=o;
819 }
820 }
821 }*/
b7217e13 822 if(rn<10) return 1;
57871462 823 return 0;
824}
825
826// Try to match register allocations at the end of a loop with those
827// at the beginning
828int loop_reg(int i, int r, int hr)
829{
830 int j,k;
831 for(j=0;j<9;j++)
832 {
833 if(i+j>=slen) {
834 j=slen-i-1;
835 break;
836 }
837 if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
838 {
839 // Don't go past an unconditonal jump
840 j++;
841 break;
842 }
843 }
844 k=0;
845 if(i>0){
846 if(itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP)
847 k--;
848 }
849 for(;k<j;k++)
850 {
851 if(r<64&&((unneeded_reg[i+k]>>r)&1)) return hr;
852 if(r>64&&((unneeded_reg_upper[i+k]>>r)&1)) return hr;
853 if(i+k>=0&&(itype[i+k]==UJUMP||itype[i+k]==CJUMP||itype[i+k]==SJUMP||itype[i+k]==FJUMP))
854 {
855 if(ba[i+k]>=start && ba[i+k]<(start+i*4))
856 {
857 int t=(ba[i+k]-start)>>2;
858 int reg=get_reg(regs[t].regmap_entry,r);
859 if(reg>=0) return reg;
860 //reg=get_reg(regs[t+1].regmap_entry,r);
861 //if(reg>=0) return reg;
862 }
863 }
864 }
865 return hr;
866}
867
868
869// Allocate every register, preserving source/target regs
870void alloc_all(struct regstat *cur,int i)
871{
872 int hr;
873
874 for(hr=0;hr<HOST_REGS;hr++) {
875 if(hr!=EXCLUDE_REG) {
876 if(((cur->regmap[hr]&63)!=rs1[i])&&((cur->regmap[hr]&63)!=rs2[i])&&
877 ((cur->regmap[hr]&63)!=rt1[i])&&((cur->regmap[hr]&63)!=rt2[i]))
878 {
879 cur->regmap[hr]=-1;
880 cur->dirty&=~(1<<hr);
881 }
882 // Don't need zeros
883 if((cur->regmap[hr]&63)==0)
884 {
885 cur->regmap[hr]=-1;
886 cur->dirty&=~(1<<hr);
887 }
888 }
889 }
890}
891
4600ba03 892#ifndef FORCE32
57871462 893void div64(int64_t dividend,int64_t divisor)
894{
895 lo=dividend/divisor;
896 hi=dividend%divisor;
897 //printf("TRACE: ddiv %8x%8x %8x%8x\n" ,(int)reg[HIREG],(int)(reg[HIREG]>>32)
898 // ,(int)reg[LOREG],(int)(reg[LOREG]>>32));
899}
900void divu64(uint64_t dividend,uint64_t divisor)
901{
902 lo=dividend/divisor;
903 hi=dividend%divisor;
904 //printf("TRACE: ddivu %8x%8x %8x%8x\n",(int)reg[HIREG],(int)(reg[HIREG]>>32)
905 // ,(int)reg[LOREG],(int)(reg[LOREG]>>32));
906}
907
908void mult64(uint64_t m1,uint64_t m2)
909{
910 unsigned long long int op1, op2, op3, op4;
911 unsigned long long int result1, result2, result3, result4;
912 unsigned long long int temp1, temp2, temp3, temp4;
913 int sign = 0;
914
915 if (m1 < 0)
916 {
917 op2 = -m1;
918 sign = 1 - sign;
919 }
920 else op2 = m1;
921 if (m2 < 0)
922 {
923 op4 = -m2;
924 sign = 1 - sign;
925 }
926 else op4 = m2;
927
928 op1 = op2 & 0xFFFFFFFF;
929 op2 = (op2 >> 32) & 0xFFFFFFFF;
930 op3 = op4 & 0xFFFFFFFF;
931 op4 = (op4 >> 32) & 0xFFFFFFFF;
932
933 temp1 = op1 * op3;
934 temp2 = (temp1 >> 32) + op1 * op4;
935 temp3 = op2 * op3;
936 temp4 = (temp3 >> 32) + op2 * op4;
937
938 result1 = temp1 & 0xFFFFFFFF;
939 result2 = temp2 + (temp3 & 0xFFFFFFFF);
940 result3 = (result2 >> 32) + temp4;
941 result4 = (result3 >> 32);
942
943 lo = result1 | (result2 << 32);
944 hi = (result3 & 0xFFFFFFFF) | (result4 << 32);
945 if (sign)
946 {
947 hi = ~hi;
948 if (!lo) hi++;
949 else lo = ~lo + 1;
950 }
951}
952
953void multu64(uint64_t m1,uint64_t m2)
954{
955 unsigned long long int op1, op2, op3, op4;
956 unsigned long long int result1, result2, result3, result4;
957 unsigned long long int temp1, temp2, temp3, temp4;
958
959 op1 = m1 & 0xFFFFFFFF;
960 op2 = (m1 >> 32) & 0xFFFFFFFF;
961 op3 = m2 & 0xFFFFFFFF;
962 op4 = (m2 >> 32) & 0xFFFFFFFF;
963
964 temp1 = op1 * op3;
965 temp2 = (temp1 >> 32) + op1 * op4;
966 temp3 = op2 * op3;
967 temp4 = (temp3 >> 32) + op2 * op4;
968
969 result1 = temp1 & 0xFFFFFFFF;
970 result2 = temp2 + (temp3 & 0xFFFFFFFF);
971 result3 = (result2 >> 32) + temp4;
972 result4 = (result3 >> 32);
973
974 lo = result1 | (result2 << 32);
975 hi = (result3 & 0xFFFFFFFF) | (result4 << 32);
976
977 //printf("TRACE: dmultu %8x%8x %8x%8x\n",(int)reg[HIREG],(int)(reg[HIREG]>>32)
978 // ,(int)reg[LOREG],(int)(reg[LOREG]>>32));
979}
980
981uint64_t ldl_merge(uint64_t original,uint64_t loaded,u_int bits)
982{
983 if(bits) {
984 original<<=64-bits;
985 original>>=64-bits;
986 loaded<<=bits;
987 original|=loaded;
988 }
989 else original=loaded;
990 return original;
991}
992uint64_t ldr_merge(uint64_t original,uint64_t loaded,u_int bits)
993{
994 if(bits^56) {
995 original>>=64-(bits^56);
996 original<<=64-(bits^56);
997 loaded>>=bits^56;
998 original|=loaded;
999 }
1000 else original=loaded;
1001 return original;
1002}
4600ba03 1003#endif
57871462 1004
1005#ifdef __i386__
1006#include "assem_x86.c"
1007#endif
1008#ifdef __x86_64__
1009#include "assem_x64.c"
1010#endif
1011#ifdef __arm__
1012#include "assem_arm.c"
1013#endif
1014
1015// Add virtual address mapping to linked list
1016void ll_add(struct ll_entry **head,int vaddr,void *addr)
1017{
1018 struct ll_entry *new_entry;
1019 new_entry=malloc(sizeof(struct ll_entry));
1020 assert(new_entry!=NULL);
1021 new_entry->vaddr=vaddr;
1022 new_entry->reg32=0;
1023 new_entry->addr=addr;
1024 new_entry->next=*head;
1025 *head=new_entry;
1026}
1027
1028// Add virtual address mapping for 32-bit compiled block
1029void ll_add_32(struct ll_entry **head,int vaddr,u_int reg32,void *addr)
1030{
7139f3c8 1031 ll_add(head,vaddr,addr);
1032#ifndef FORCE32
1033 (*head)->reg32=reg32;
1034#endif
57871462 1035}
1036
1037// Check if an address is already compiled
1038// but don't return addresses which are about to expire from the cache
1039void *check_addr(u_int vaddr)
1040{
1041 u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
1042 if(ht_bin[0]==vaddr) {
1043 if(((ht_bin[1]-MAX_OUTPUT_BLOCK_SIZE-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
1044 if(isclean(ht_bin[1])) return (void *)ht_bin[1];
1045 }
1046 if(ht_bin[2]==vaddr) {
1047 if(((ht_bin[3]-MAX_OUTPUT_BLOCK_SIZE-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
1048 if(isclean(ht_bin[3])) return (void *)ht_bin[3];
1049 }
94d23bb9 1050 u_int page=get_page(vaddr);
57871462 1051 struct ll_entry *head;
1052 head=jump_in[page];
1053 while(head!=NULL) {
1054 if(head->vaddr==vaddr&&head->reg32==0) {
1055 if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) {
1056 // Update existing entry with current address
1057 if(ht_bin[0]==vaddr) {
1058 ht_bin[1]=(int)head->addr;
1059 return head->addr;
1060 }
1061 if(ht_bin[2]==vaddr) {
1062 ht_bin[3]=(int)head->addr;
1063 return head->addr;
1064 }
1065 // Insert into hash table with low priority.
1066 // Don't evict existing entries, as they are probably
1067 // addresses that are being accessed frequently.
1068 if(ht_bin[0]==-1) {
1069 ht_bin[1]=(int)head->addr;
1070 ht_bin[0]=vaddr;
1071 }else if(ht_bin[2]==-1) {
1072 ht_bin[3]=(int)head->addr;
1073 ht_bin[2]=vaddr;
1074 }
1075 return head->addr;
1076 }
1077 }
1078 head=head->next;
1079 }
1080 return 0;
1081}
1082
1083void remove_hash(int vaddr)
1084{
1085 //printf("remove hash: %x\n",vaddr);
1086 int *ht_bin=hash_table[(((vaddr)>>16)^vaddr)&0xFFFF];
1087 if(ht_bin[2]==vaddr) {
1088 ht_bin[2]=ht_bin[3]=-1;
1089 }
1090 if(ht_bin[0]==vaddr) {
1091 ht_bin[0]=ht_bin[2];
1092 ht_bin[1]=ht_bin[3];
1093 ht_bin[2]=ht_bin[3]=-1;
1094 }
1095}
1096
1097void ll_remove_matching_addrs(struct ll_entry **head,int addr,int shift)
1098{
1099 struct ll_entry *next;
1100 while(*head) {
1101 if(((u_int)((*head)->addr)>>shift)==(addr>>shift) ||
1102 ((u_int)((*head)->addr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift))
1103 {
1104 inv_debug("EXP: Remove pointer to %x (%x)\n",(int)(*head)->addr,(*head)->vaddr);
1105 remove_hash((*head)->vaddr);
1106 next=(*head)->next;
1107 free(*head);
1108 *head=next;
1109 }
1110 else
1111 {
1112 head=&((*head)->next);
1113 }
1114 }
1115}
1116
1117// Remove all entries from linked list
1118void ll_clear(struct ll_entry **head)
1119{
1120 struct ll_entry *cur;
1121 struct ll_entry *next;
1122 if(cur=*head) {
1123 *head=0;
1124 while(cur) {
1125 next=cur->next;
1126 free(cur);
1127 cur=next;
1128 }
1129 }
1130}
1131
1132// Dereference the pointers and remove if it matches
1133void ll_kill_pointers(struct ll_entry *head,int addr,int shift)
1134{
1135 while(head) {
1136 int ptr=get_pointer(head->addr);
1137 inv_debug("EXP: Lookup pointer to %x at %x (%x)\n",(int)ptr,(int)head->addr,head->vaddr);
1138 if(((ptr>>shift)==(addr>>shift)) ||
1139 (((ptr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift)))
1140 {
5088bb70 1141 inv_debug("EXP: Kill pointer at %x (%x)\n",(int)head->addr,head->vaddr);
f76eeef9 1142 u_int host_addr=(u_int)kill_pointer(head->addr);
dd3a91a1 1143 #ifdef __arm__
1144 needs_clear_cache[(host_addr-(u_int)BASE_ADDR)>>17]|=1<<(((host_addr-(u_int)BASE_ADDR)>>12)&31);
1145 #endif
57871462 1146 }
1147 head=head->next;
1148 }
1149}
1150
1151// This is called when we write to a compiled block (see do_invstub)
f76eeef9 1152void invalidate_page(u_int page)
57871462 1153{
57871462 1154 struct ll_entry *head;
1155 struct ll_entry *next;
1156 head=jump_in[page];
1157 jump_in[page]=0;
1158 while(head!=NULL) {
1159 inv_debug("INVALIDATE: %x\n",head->vaddr);
1160 remove_hash(head->vaddr);
1161 next=head->next;
1162 free(head);
1163 head=next;
1164 }
1165 head=jump_out[page];
1166 jump_out[page]=0;
1167 while(head!=NULL) {
1168 inv_debug("INVALIDATE: kill pointer to %x (%x)\n",head->vaddr,(int)head->addr);
f76eeef9 1169 u_int host_addr=(u_int)kill_pointer(head->addr);
dd3a91a1 1170 #ifdef __arm__
1171 needs_clear_cache[(host_addr-(u_int)BASE_ADDR)>>17]|=1<<(((host_addr-(u_int)BASE_ADDR)>>12)&31);
1172 #endif
57871462 1173 next=head->next;
1174 free(head);
1175 head=next;
1176 }
57871462 1177}
9be4ba64 1178
1179static void invalidate_block_range(u_int block, u_int first, u_int last)
57871462 1180{
94d23bb9 1181 u_int page=get_page(block<<12);
57871462 1182 //printf("first=%d last=%d\n",first,last);
f76eeef9 1183 invalidate_page(page);
57871462 1184 assert(first+5>page); // NB: this assumes MAXBLOCK<=4096 (4 pages)
1185 assert(last<page+5);
1186 // Invalidate the adjacent pages if a block crosses a 4K boundary
1187 while(first<page) {
1188 invalidate_page(first);
1189 first++;
1190 }
1191 for(first=page+1;first<last;first++) {
1192 invalidate_page(first);
1193 }
dd3a91a1 1194 #ifdef __arm__
1195 do_clear_cache();
1196 #endif
57871462 1197
1198 // Don't trap writes
1199 invalid_code[block]=1;
94d23bb9 1200#ifndef DISABLE_TLB
57871462 1201 // If there is a valid TLB entry for this page, remove write protect
1202 if(tlb_LUT_w[block]) {
1203 assert(tlb_LUT_r[block]==tlb_LUT_w[block]);
1204 // CHECK: Is this right?
1205 memory_map[block]=((tlb_LUT_w[block]&0xFFFFF000)-(block<<12)+(unsigned int)rdram-0x80000000)>>2;
1206 u_int real_block=tlb_LUT_w[block]>>12;
1207 invalid_code[real_block]=1;
1208 if(real_block>=0x80000&&real_block<0x80800) memory_map[real_block]=((u_int)rdram-0x80000000)>>2;
1209 }
1210 else if(block>=0x80000&&block<0x80800) memory_map[block]=((u_int)rdram-0x80000000)>>2;
94d23bb9 1211#endif
f76eeef9 1212
57871462 1213 #ifdef USE_MINI_HT
1214 memset(mini_ht,-1,sizeof(mini_ht));
1215 #endif
1216}
9be4ba64 1217
1218void invalidate_block(u_int block)
1219{
1220 u_int page=get_page(block<<12);
1221 u_int vpage=get_vpage(block<<12);
1222 inv_debug("INVALIDATE: %x (%d)\n",block<<12,page);
1223 //inv_debug("invalid_code[block]=%d\n",invalid_code[block]);
1224 u_int first,last;
1225 first=last=page;
1226 struct ll_entry *head;
1227 head=jump_dirty[vpage];
1228 //printf("page=%d vpage=%d\n",page,vpage);
1229 while(head!=NULL) {
1230 u_int start,end;
1231 if(vpage>2047||(head->vaddr>>12)==block) { // Ignore vaddr hash collision
1232 get_bounds((int)head->addr,&start,&end);
1233 //printf("start: %x end: %x\n",start,end);
4a35de07 1234 if(page<2048&&start>=(u_int)rdram&&end<(u_int)rdram+RAM_SIZE) {
9be4ba64 1235 if(((start-(u_int)rdram)>>12)<=page&&((end-1-(u_int)rdram)>>12)>=page) {
1236 if((((start-(u_int)rdram)>>12)&2047)<first) first=((start-(u_int)rdram)>>12)&2047;
1237 if((((end-1-(u_int)rdram)>>12)&2047)>last) last=((end-1-(u_int)rdram)>>12)&2047;
1238 }
1239 }
1240#ifndef DISABLE_TLB
1241 if(page<2048&&(signed int)start>=(signed int)0xC0000000&&(signed int)end>=(signed int)0xC0000000) {
1242 if(((start+memory_map[start>>12]-(u_int)rdram)>>12)<=page&&((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)>=page) {
1243 if((((start+memory_map[start>>12]-(u_int)rdram)>>12)&2047)<first) first=((start+memory_map[start>>12]-(u_int)rdram)>>12)&2047;
1244 if((((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047)>last) last=((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047;
1245 }
1246 }
1247#endif
1248 }
1249 head=head->next;
1250 }
1251 invalidate_block_range(block,first,last);
1252}
1253
57871462 1254void invalidate_addr(u_int addr)
1255{
9be4ba64 1256#ifdef PCSX
1257 //static int rhits;
1258 // this check is done by the caller
1259 //if (inv_code_start<=addr&&addr<=inv_code_end) { rhits++; return; }
d25604ca 1260 u_int page=get_vpage(addr);
9be4ba64 1261 if(page<2048) { // RAM
1262 struct ll_entry *head;
1263 u_int addr_min=~0, addr_max=0;
4a35de07 1264 u_int mask=RAM_SIZE-1;
1265 u_int addr_main=0x80000000|(addr&mask);
9be4ba64 1266 int pg1;
4a35de07 1267 inv_code_start=addr_main&~0xfff;
1268 inv_code_end=addr_main|0xfff;
9be4ba64 1269 pg1=page;
1270 if (pg1>0) {
1271 // must check previous page too because of spans..
1272 pg1--;
1273 inv_code_start-=0x1000;
1274 }
1275 for(;pg1<=page;pg1++) {
1276 for(head=jump_dirty[pg1];head!=NULL;head=head->next) {
1277 u_int start,end;
1278 get_bounds((int)head->addr,&start,&end);
4a35de07 1279 if(ram_offset) {
1280 start-=ram_offset;
1281 end-=ram_offset;
1282 }
1283 if(start<=addr_main&&addr_main<end) {
9be4ba64 1284 if(start<addr_min) addr_min=start;
1285 if(end>addr_max) addr_max=end;
1286 }
4a35de07 1287 else if(addr_main<start) {
9be4ba64 1288 if(start<inv_code_end)
1289 inv_code_end=start-1;
1290 }
1291 else {
1292 if(end>inv_code_start)
1293 inv_code_start=end;
1294 }
1295 }
1296 }
1297 if (addr_min!=~0) {
1298 inv_debug("INV ADDR: %08x hit %08x-%08x\n", addr, addr_min, addr_max);
1299 inv_code_start=inv_code_end=~0;
1300 invalidate_block_range(addr>>12,(addr_min&mask)>>12,(addr_max&mask)>>12);
1301 return;
1302 }
1303 else {
4a35de07 1304 inv_code_start=(addr&~mask)|(inv_code_start&mask);
1305 inv_code_end=(addr&~mask)|(inv_code_end&mask);
d25604ca 1306 inv_debug("INV ADDR: %08x miss, inv %08x-%08x, sk %d\n", addr, inv_code_start, inv_code_end, 0);
9be4ba64 1307 return;
d25604ca 1308 }
9be4ba64 1309 }
1310#endif
57871462 1311 invalidate_block(addr>>12);
1312}
9be4ba64 1313
dd3a91a1 1314// This is called when loading a save state.
1315// Anything could have changed, so invalidate everything.
57871462 1316void invalidate_all_pages()
1317{
1318 u_int page,n;
1319 for(page=0;page<4096;page++)
1320 invalidate_page(page);
1321 for(page=0;page<1048576;page++)
1322 if(!invalid_code[page]) {
1323 restore_candidate[(page&2047)>>3]|=1<<(page&7);
1324 restore_candidate[((page&2047)>>3)+256]|=1<<(page&7);
1325 }
1326 #ifdef __arm__
1327 __clear_cache((void *)BASE_ADDR,(void *)BASE_ADDR+(1<<TARGET_SIZE_2));
1328 #endif
1329 #ifdef USE_MINI_HT
1330 memset(mini_ht,-1,sizeof(mini_ht));
1331 #endif
94d23bb9 1332 #ifndef DISABLE_TLB
57871462 1333 // TLB
1334 for(page=0;page<0x100000;page++) {
1335 if(tlb_LUT_r[page]) {
1336 memory_map[page]=((tlb_LUT_r[page]&0xFFFFF000)-(page<<12)+(unsigned int)rdram-0x80000000)>>2;
1337 if(!tlb_LUT_w[page]||!invalid_code[page])
1338 memory_map[page]|=0x40000000; // Write protect
1339 }
1340 else memory_map[page]=-1;
1341 if(page==0x80000) page=0xC0000;
1342 }
1343 tlb_hacks();
94d23bb9 1344 #endif
57871462 1345}
1346
1347// Add an entry to jump_out after making a link
1348void add_link(u_int vaddr,void *src)
1349{
94d23bb9 1350 u_int page=get_page(vaddr);
57871462 1351 inv_debug("add_link: %x -> %x (%d)\n",(int)src,vaddr,page);
76f71c27 1352 int *ptr=(int *)(src+4);
1353 assert((*ptr&0x0fff0000)==0x059f0000);
57871462 1354 ll_add(jump_out+page,vaddr,src);
1355 //int ptr=get_pointer(src);
1356 //inv_debug("add_link: Pointer is to %x\n",(int)ptr);
1357}
1358
1359// If a code block was found to be unmodified (bit was set in
1360// restore_candidate) and it remains unmodified (bit is clear
1361// in invalid_code) then move the entries for that 4K page from
1362// the dirty list to the clean list.
1363void clean_blocks(u_int page)
1364{
1365 struct ll_entry *head;
1366 inv_debug("INV: clean_blocks page=%d\n",page);
1367 head=jump_dirty[page];
1368 while(head!=NULL) {
1369 if(!invalid_code[head->vaddr>>12]) {
1370 // Don't restore blocks which are about to expire from the cache
1371 if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) {
1372 u_int start,end;
1373 if(verify_dirty((int)head->addr)) {
1374 //printf("Possibly Restore %x (%x)\n",head->vaddr, (int)head->addr);
1375 u_int i;
1376 u_int inv=0;
1377 get_bounds((int)head->addr,&start,&end);
4cb76aa4 1378 if(start-(u_int)rdram<RAM_SIZE) {
57871462 1379 for(i=(start-(u_int)rdram+0x80000000)>>12;i<=(end-1-(u_int)rdram+0x80000000)>>12;i++) {
1380 inv|=invalid_code[i];
1381 }
1382 }
63cb0298 1383#ifndef DISABLE_TLB
57871462 1384 if((signed int)head->vaddr>=(signed int)0xC0000000) {
1385 u_int addr = (head->vaddr+(memory_map[head->vaddr>>12]<<2));
1386 //printf("addr=%x start=%x end=%x\n",addr,start,end);
1387 if(addr<start||addr>=end) inv=1;
1388 }
63cb0298 1389#endif
4cb76aa4 1390 else if((signed int)head->vaddr>=(signed int)0x80000000+RAM_SIZE) {
57871462 1391 inv=1;
1392 }
1393 if(!inv) {
1394 void * clean_addr=(void *)get_clean_addr((int)head->addr);
1395 if((((u_int)clean_addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) {
1396 u_int ppage=page;
94d23bb9 1397#ifndef DISABLE_TLB
57871462 1398 if(page<2048&&tlb_LUT_r[head->vaddr>>12]) ppage=(tlb_LUT_r[head->vaddr>>12]^0x80000000)>>12;
94d23bb9 1399#endif
57871462 1400 inv_debug("INV: Restored %x (%x/%x)\n",head->vaddr, (int)head->addr, (int)clean_addr);
1401 //printf("page=%x, addr=%x\n",page,head->vaddr);
1402 //assert(head->vaddr>>12==(page|0x80000));
1403 ll_add_32(jump_in+ppage,head->vaddr,head->reg32,clean_addr);
1404 int *ht_bin=hash_table[((head->vaddr>>16)^head->vaddr)&0xFFFF];
1405 if(!head->reg32) {
1406 if(ht_bin[0]==head->vaddr) {
1407 ht_bin[1]=(int)clean_addr; // Replace existing entry
1408 }
1409 if(ht_bin[2]==head->vaddr) {
1410 ht_bin[3]=(int)clean_addr; // Replace existing entry
1411 }
1412 }
1413 }
1414 }
1415 }
1416 }
1417 }
1418 head=head->next;
1419 }
1420}
1421
1422
1423void mov_alloc(struct regstat *current,int i)
1424{
1425 // Note: Don't need to actually alloc the source registers
1426 if((~current->is32>>rs1[i])&1) {
1427 //alloc_reg64(current,i,rs1[i]);
1428 alloc_reg64(current,i,rt1[i]);
1429 current->is32&=~(1LL<<rt1[i]);
1430 } else {
1431 //alloc_reg(current,i,rs1[i]);
1432 alloc_reg(current,i,rt1[i]);
1433 current->is32|=(1LL<<rt1[i]);
1434 }
1435 clear_const(current,rs1[i]);
1436 clear_const(current,rt1[i]);
1437 dirty_reg(current,rt1[i]);
1438}
1439
1440void shiftimm_alloc(struct regstat *current,int i)
1441{
57871462 1442 if(opcode2[i]<=0x3) // SLL/SRL/SRA
1443 {
1444 if(rt1[i]) {
1445 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1446 else lt1[i]=rs1[i];
1447 alloc_reg(current,i,rt1[i]);
1448 current->is32|=1LL<<rt1[i];
1449 dirty_reg(current,rt1[i]);
dc49e339 1450 if(is_const(current,rs1[i])) {
1451 int v=get_const(current,rs1[i]);
1452 if(opcode2[i]==0x00) set_const(current,rt1[i],v<<imm[i]);
1453 if(opcode2[i]==0x02) set_const(current,rt1[i],(u_int)v>>imm[i]);
1454 if(opcode2[i]==0x03) set_const(current,rt1[i],v>>imm[i]);
1455 }
1456 else clear_const(current,rt1[i]);
57871462 1457 }
1458 }
dc49e339 1459 else
1460 {
1461 clear_const(current,rs1[i]);
1462 clear_const(current,rt1[i]);
1463 }
1464
57871462 1465 if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA
1466 {
1467 if(rt1[i]) {
1468 if(rs1[i]) alloc_reg64(current,i,rs1[i]);
1469 alloc_reg64(current,i,rt1[i]);
1470 current->is32&=~(1LL<<rt1[i]);
1471 dirty_reg(current,rt1[i]);
1472 }
1473 }
1474 if(opcode2[i]==0x3c) // DSLL32
1475 {
1476 if(rt1[i]) {
1477 if(rs1[i]) alloc_reg(current,i,rs1[i]);
1478 alloc_reg64(current,i,rt1[i]);
1479 current->is32&=~(1LL<<rt1[i]);
1480 dirty_reg(current,rt1[i]);
1481 }
1482 }
1483 if(opcode2[i]==0x3e) // DSRL32
1484 {
1485 if(rt1[i]) {
1486 alloc_reg64(current,i,rs1[i]);
1487 if(imm[i]==32) {
1488 alloc_reg64(current,i,rt1[i]);
1489 current->is32&=~(1LL<<rt1[i]);
1490 } else {
1491 alloc_reg(current,i,rt1[i]);
1492 current->is32|=1LL<<rt1[i];
1493 }
1494 dirty_reg(current,rt1[i]);
1495 }
1496 }
1497 if(opcode2[i]==0x3f) // DSRA32
1498 {
1499 if(rt1[i]) {
1500 alloc_reg64(current,i,rs1[i]);
1501 alloc_reg(current,i,rt1[i]);
1502 current->is32|=1LL<<rt1[i];
1503 dirty_reg(current,rt1[i]);
1504 }
1505 }
1506}
1507
1508void shift_alloc(struct regstat *current,int i)
1509{
1510 if(rt1[i]) {
1511 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
1512 {
1513 if(rs1[i]) alloc_reg(current,i,rs1[i]);
1514 if(rs2[i]) alloc_reg(current,i,rs2[i]);
1515 alloc_reg(current,i,rt1[i]);
e1190b87 1516 if(rt1[i]==rs2[i]) {
1517 alloc_reg_temp(current,i,-1);
1518 minimum_free_regs[i]=1;
1519 }
57871462 1520 current->is32|=1LL<<rt1[i];
1521 } else { // DSLLV/DSRLV/DSRAV
1522 if(rs1[i]) alloc_reg64(current,i,rs1[i]);
1523 if(rs2[i]) alloc_reg(current,i,rs2[i]);
1524 alloc_reg64(current,i,rt1[i]);
1525 current->is32&=~(1LL<<rt1[i]);
1526 if(opcode2[i]==0x16||opcode2[i]==0x17) // DSRLV and DSRAV need a temporary register
e1190b87 1527 {
57871462 1528 alloc_reg_temp(current,i,-1);
e1190b87 1529 minimum_free_regs[i]=1;
1530 }
57871462 1531 }
1532 clear_const(current,rs1[i]);
1533 clear_const(current,rs2[i]);
1534 clear_const(current,rt1[i]);
1535 dirty_reg(current,rt1[i]);
1536 }
1537}
1538
1539void alu_alloc(struct regstat *current,int i)
1540{
1541 if(opcode2[i]>=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU
1542 if(rt1[i]) {
1543 if(rs1[i]&&rs2[i]) {
1544 alloc_reg(current,i,rs1[i]);
1545 alloc_reg(current,i,rs2[i]);
1546 }
1547 else {
1548 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1549 if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg(current,i,rs2[i]);
1550 }
1551 alloc_reg(current,i,rt1[i]);
1552 }
1553 current->is32|=1LL<<rt1[i];
1554 }
1555 if(opcode2[i]==0x2a||opcode2[i]==0x2b) { // SLT/SLTU
1556 if(rt1[i]) {
1557 if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1))
1558 {
1559 alloc_reg64(current,i,rs1[i]);
1560 alloc_reg64(current,i,rs2[i]);
1561 alloc_reg(current,i,rt1[i]);
1562 } else {
1563 alloc_reg(current,i,rs1[i]);
1564 alloc_reg(current,i,rs2[i]);
1565 alloc_reg(current,i,rt1[i]);
1566 }
1567 }
1568 current->is32|=1LL<<rt1[i];
1569 }
1570 if(opcode2[i]>=0x24&&opcode2[i]<=0x27) { // AND/OR/XOR/NOR
1571 if(rt1[i]) {
1572 if(rs1[i]&&rs2[i]) {
1573 alloc_reg(current,i,rs1[i]);
1574 alloc_reg(current,i,rs2[i]);
1575 }
1576 else
1577 {
1578 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1579 if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg(current,i,rs2[i]);
1580 }
1581 alloc_reg(current,i,rt1[i]);
1582 if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1))
1583 {
1584 if(!((current->uu>>rt1[i])&1)) {
1585 alloc_reg64(current,i,rt1[i]);
1586 }
1587 if(get_reg(current->regmap,rt1[i]|64)>=0) {
1588 if(rs1[i]&&rs2[i]) {
1589 alloc_reg64(current,i,rs1[i]);
1590 alloc_reg64(current,i,rs2[i]);
1591 }
1592 else
1593 {
1594 // Is is really worth it to keep 64-bit values in registers?
1595 #ifdef NATIVE_64BIT
1596 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg64(current,i,rs1[i]);
1597 if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg64(current,i,rs2[i]);
1598 #endif
1599 }
1600 }
1601 current->is32&=~(1LL<<rt1[i]);
1602 } else {
1603 current->is32|=1LL<<rt1[i];
1604 }
1605 }
1606 }
1607 if(opcode2[i]>=0x2c&&opcode2[i]<=0x2f) { // DADD/DADDU/DSUB/DSUBU
1608 if(rt1[i]) {
1609 if(rs1[i]&&rs2[i]) {
1610 if(!((current->uu>>rt1[i])&1)||get_reg(current->regmap,rt1[i]|64)>=0) {
1611 alloc_reg64(current,i,rs1[i]);
1612 alloc_reg64(current,i,rs2[i]);
1613 alloc_reg64(current,i,rt1[i]);
1614 } else {
1615 alloc_reg(current,i,rs1[i]);
1616 alloc_reg(current,i,rs2[i]);
1617 alloc_reg(current,i,rt1[i]);
1618 }
1619 }
1620 else {
1621 alloc_reg(current,i,rt1[i]);
1622 if(!((current->uu>>rt1[i])&1)||get_reg(current->regmap,rt1[i]|64)>=0) {
1623 // DADD used as move, or zeroing
1624 // If we have a 64-bit source, then make the target 64 bits too
1625 if(rs1[i]&&!((current->is32>>rs1[i])&1)) {
1626 if(get_reg(current->regmap,rs1[i])>=0) alloc_reg64(current,i,rs1[i]);
1627 alloc_reg64(current,i,rt1[i]);
1628 } else if(rs2[i]&&!((current->is32>>rs2[i])&1)) {
1629 if(get_reg(current->regmap,rs2[i])>=0) alloc_reg64(current,i,rs2[i]);
1630 alloc_reg64(current,i,rt1[i]);
1631 }
1632 if(opcode2[i]>=0x2e&&rs2[i]) {
1633 // DSUB used as negation - 64-bit result
1634 // If we have a 32-bit register, extend it to 64 bits
1635 if(get_reg(current->regmap,rs2[i])>=0) alloc_reg64(current,i,rs2[i]);
1636 alloc_reg64(current,i,rt1[i]);
1637 }
1638 }
1639 }
1640 if(rs1[i]&&rs2[i]) {
1641 current->is32&=~(1LL<<rt1[i]);
1642 } else if(rs1[i]) {
1643 current->is32&=~(1LL<<rt1[i]);
1644 if((current->is32>>rs1[i])&1)
1645 current->is32|=1LL<<rt1[i];
1646 } else if(rs2[i]) {
1647 current->is32&=~(1LL<<rt1[i]);
1648 if((current->is32>>rs2[i])&1)
1649 current->is32|=1LL<<rt1[i];
1650 } else {
1651 current->is32|=1LL<<rt1[i];
1652 }
1653 }
1654 }
1655 clear_const(current,rs1[i]);
1656 clear_const(current,rs2[i]);
1657 clear_const(current,rt1[i]);
1658 dirty_reg(current,rt1[i]);
1659}
1660
1661void imm16_alloc(struct regstat *current,int i)
1662{
1663 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1664 else lt1[i]=rs1[i];
1665 if(rt1[i]) alloc_reg(current,i,rt1[i]);
1666 if(opcode[i]==0x18||opcode[i]==0x19) { // DADDI/DADDIU
1667 current->is32&=~(1LL<<rt1[i]);
1668 if(!((current->uu>>rt1[i])&1)||get_reg(current->regmap,rt1[i]|64)>=0) {
1669 // TODO: Could preserve the 32-bit flag if the immediate is zero
1670 alloc_reg64(current,i,rt1[i]);
1671 alloc_reg64(current,i,rs1[i]);
1672 }
1673 clear_const(current,rs1[i]);
1674 clear_const(current,rt1[i]);
1675 }
1676 else if(opcode[i]==0x0a||opcode[i]==0x0b) { // SLTI/SLTIU
1677 if((~current->is32>>rs1[i])&1) alloc_reg64(current,i,rs1[i]);
1678 current->is32|=1LL<<rt1[i];
1679 clear_const(current,rs1[i]);
1680 clear_const(current,rt1[i]);
1681 }
1682 else if(opcode[i]>=0x0c&&opcode[i]<=0x0e) { // ANDI/ORI/XORI
1683 if(((~current->is32>>rs1[i])&1)&&opcode[i]>0x0c) {
1684 if(rs1[i]!=rt1[i]) {
1685 if(needed_again(rs1[i],i)) alloc_reg64(current,i,rs1[i]);
1686 alloc_reg64(current,i,rt1[i]);
1687 current->is32&=~(1LL<<rt1[i]);
1688 }
1689 }
1690 else current->is32|=1LL<<rt1[i]; // ANDI clears upper bits
1691 if(is_const(current,rs1[i])) {
1692 int v=get_const(current,rs1[i]);
1693 if(opcode[i]==0x0c) set_const(current,rt1[i],v&imm[i]);
1694 if(opcode[i]==0x0d) set_const(current,rt1[i],v|imm[i]);
1695 if(opcode[i]==0x0e) set_const(current,rt1[i],v^imm[i]);
1696 }
1697 else clear_const(current,rt1[i]);
1698 }
1699 else if(opcode[i]==0x08||opcode[i]==0x09) { // ADDI/ADDIU
1700 if(is_const(current,rs1[i])) {
1701 int v=get_const(current,rs1[i]);
1702 set_const(current,rt1[i],v+imm[i]);
1703 }
1704 else clear_const(current,rt1[i]);
1705 current->is32|=1LL<<rt1[i];
1706 }
1707 else {
1708 set_const(current,rt1[i],((long long)((short)imm[i]))<<16); // LUI
1709 current->is32|=1LL<<rt1[i];
1710 }
1711 dirty_reg(current,rt1[i]);
1712}
1713
1714void load_alloc(struct regstat *current,int i)
1715{
1716 clear_const(current,rt1[i]);
1717 //if(rs1[i]!=rt1[i]&&needed_again(rs1[i],i)) clear_const(current,rs1[i]); // Does this help or hurt?
1718 if(!rs1[i]) current->u&=~1LL; // Allow allocating r0 if it's the source register
1719 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
373d1d07 1720 if(rt1[i]&&!((current->u>>rt1[i])&1)) {
57871462 1721 alloc_reg(current,i,rt1[i]);
373d1d07 1722 assert(get_reg(current->regmap,rt1[i])>=0);
57871462 1723 if(opcode[i]==0x27||opcode[i]==0x37) // LWU/LD
1724 {
1725 current->is32&=~(1LL<<rt1[i]);
1726 alloc_reg64(current,i,rt1[i]);
1727 }
1728 else if(opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR
1729 {
1730 current->is32&=~(1LL<<rt1[i]);
1731 alloc_reg64(current,i,rt1[i]);
1732 alloc_all(current,i);
1733 alloc_reg64(current,i,FTEMP);
e1190b87 1734 minimum_free_regs[i]=HOST_REGS;
57871462 1735 }
1736 else current->is32|=1LL<<rt1[i];
1737 dirty_reg(current,rt1[i]);
1738 // If using TLB, need a register for pointer to the mapping table
1739 if(using_tlb) alloc_reg(current,i,TLREG);
1740 // LWL/LWR need a temporary register for the old value
1741 if(opcode[i]==0x22||opcode[i]==0x26)
1742 {
1743 alloc_reg(current,i,FTEMP);
1744 alloc_reg_temp(current,i,-1);
e1190b87 1745 minimum_free_regs[i]=1;
57871462 1746 }
1747 }
1748 else
1749 {
373d1d07 1750 // Load to r0 or unneeded register (dummy load)
57871462 1751 // but we still need a register to calculate the address
535d208a 1752 if(opcode[i]==0x22||opcode[i]==0x26)
1753 {
1754 alloc_reg(current,i,FTEMP); // LWL/LWR need another temporary
1755 }
373d1d07 1756 // If using TLB, need a register for pointer to the mapping table
1757 if(using_tlb) alloc_reg(current,i,TLREG);
57871462 1758 alloc_reg_temp(current,i,-1);
e1190b87 1759 minimum_free_regs[i]=1;
535d208a 1760 if(opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR
1761 {
1762 alloc_all(current,i);
1763 alloc_reg64(current,i,FTEMP);
e1190b87 1764 minimum_free_regs[i]=HOST_REGS;
535d208a 1765 }
57871462 1766 }
1767}
1768
1769void store_alloc(struct regstat *current,int i)
1770{
1771 clear_const(current,rs2[i]);
1772 if(!(rs2[i])) current->u&=~1LL; // Allow allocating r0 if necessary
1773 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1774 alloc_reg(current,i,rs2[i]);
1775 if(opcode[i]==0x2c||opcode[i]==0x2d||opcode[i]==0x3f) { // 64-bit SDL/SDR/SD
1776 alloc_reg64(current,i,rs2[i]);
1777 if(rs2[i]) alloc_reg(current,i,FTEMP);
1778 }
1779 // If using TLB, need a register for pointer to the mapping table
1780 if(using_tlb) alloc_reg(current,i,TLREG);
1781 #if defined(HOST_IMM8)
1782 // On CPUs without 32-bit immediates we need a pointer to invalid_code
1783 else alloc_reg(current,i,INVCP);
1784 #endif
b7918751 1785 if(opcode[i]==0x2a||opcode[i]==0x2e||opcode[i]==0x2c||opcode[i]==0x2d) { // SWL/SWL/SDL/SDR
57871462 1786 alloc_reg(current,i,FTEMP);
1787 }
1788 // We need a temporary register for address generation
1789 alloc_reg_temp(current,i,-1);
e1190b87 1790 minimum_free_regs[i]=1;
57871462 1791}
1792
1793void c1ls_alloc(struct regstat *current,int i)
1794{
1795 //clear_const(current,rs1[i]); // FIXME
1796 clear_const(current,rt1[i]);
1797 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1798 alloc_reg(current,i,CSREG); // Status
1799 alloc_reg(current,i,FTEMP);
1800 if(opcode[i]==0x35||opcode[i]==0x3d) { // 64-bit LDC1/SDC1
1801 alloc_reg64(current,i,FTEMP);
1802 }
1803 // If using TLB, need a register for pointer to the mapping table
1804 if(using_tlb) alloc_reg(current,i,TLREG);
1805 #if defined(HOST_IMM8)
1806 // On CPUs without 32-bit immediates we need a pointer to invalid_code
1807 else if((opcode[i]&0x3b)==0x39) // SWC1/SDC1
1808 alloc_reg(current,i,INVCP);
1809 #endif
1810 // We need a temporary register for address generation
1811 alloc_reg_temp(current,i,-1);
1812}
1813
b9b61529 1814void c2ls_alloc(struct regstat *current,int i)
1815{
1816 clear_const(current,rt1[i]);
1817 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1818 alloc_reg(current,i,FTEMP);
1819 // If using TLB, need a register for pointer to the mapping table
1820 if(using_tlb) alloc_reg(current,i,TLREG);
1821 #if defined(HOST_IMM8)
1822 // On CPUs without 32-bit immediates we need a pointer to invalid_code
1823 else if((opcode[i]&0x3b)==0x3a) // SWC2/SDC2
1824 alloc_reg(current,i,INVCP);
1825 #endif
1826 // We need a temporary register for address generation
1827 alloc_reg_temp(current,i,-1);
e1190b87 1828 minimum_free_regs[i]=1;
b9b61529 1829}
1830
57871462 1831#ifndef multdiv_alloc
1832void multdiv_alloc(struct regstat *current,int i)
1833{
1834 // case 0x18: MULT
1835 // case 0x19: MULTU
1836 // case 0x1A: DIV
1837 // case 0x1B: DIVU
1838 // case 0x1C: DMULT
1839 // case 0x1D: DMULTU
1840 // case 0x1E: DDIV
1841 // case 0x1F: DDIVU
1842 clear_const(current,rs1[i]);
1843 clear_const(current,rs2[i]);
1844 if(rs1[i]&&rs2[i])
1845 {
1846 if((opcode2[i]&4)==0) // 32-bit
1847 {
1848 current->u&=~(1LL<<HIREG);
1849 current->u&=~(1LL<<LOREG);
1850 alloc_reg(current,i,HIREG);
1851 alloc_reg(current,i,LOREG);
1852 alloc_reg(current,i,rs1[i]);
1853 alloc_reg(current,i,rs2[i]);
1854 current->is32|=1LL<<HIREG;
1855 current->is32|=1LL<<LOREG;
1856 dirty_reg(current,HIREG);
1857 dirty_reg(current,LOREG);
1858 }
1859 else // 64-bit
1860 {
1861 current->u&=~(1LL<<HIREG);
1862 current->u&=~(1LL<<LOREG);
1863 current->uu&=~(1LL<<HIREG);
1864 current->uu&=~(1LL<<LOREG);
1865 alloc_reg64(current,i,HIREG);
1866 //if(HOST_REGS>10) alloc_reg64(current,i,LOREG);
1867 alloc_reg64(current,i,rs1[i]);
1868 alloc_reg64(current,i,rs2[i]);
1869 alloc_all(current,i);
1870 current->is32&=~(1LL<<HIREG);
1871 current->is32&=~(1LL<<LOREG);
1872 dirty_reg(current,HIREG);
1873 dirty_reg(current,LOREG);
e1190b87 1874 minimum_free_regs[i]=HOST_REGS;
57871462 1875 }
1876 }
1877 else
1878 {
1879 // Multiply by zero is zero.
1880 // MIPS does not have a divide by zero exception.
1881 // The result is undefined, we return zero.
1882 alloc_reg(current,i,HIREG);
1883 alloc_reg(current,i,LOREG);
1884 current->is32|=1LL<<HIREG;
1885 current->is32|=1LL<<LOREG;
1886 dirty_reg(current,HIREG);
1887 dirty_reg(current,LOREG);
1888 }
1889}
1890#endif
1891
1892void cop0_alloc(struct regstat *current,int i)
1893{
1894 if(opcode2[i]==0) // MFC0
1895 {
1896 if(rt1[i]) {
1897 clear_const(current,rt1[i]);
1898 alloc_all(current,i);
1899 alloc_reg(current,i,rt1[i]);
1900 current->is32|=1LL<<rt1[i];
1901 dirty_reg(current,rt1[i]);
1902 }
1903 }
1904 else if(opcode2[i]==4) // MTC0
1905 {
1906 if(rs1[i]){
1907 clear_const(current,rs1[i]);
1908 alloc_reg(current,i,rs1[i]);
1909 alloc_all(current,i);
1910 }
1911 else {
1912 alloc_all(current,i); // FIXME: Keep r0
1913 current->u&=~1LL;
1914 alloc_reg(current,i,0);
1915 }
1916 }
1917 else
1918 {
1919 // TLBR/TLBWI/TLBWR/TLBP/ERET
1920 assert(opcode2[i]==0x10);
1921 alloc_all(current,i);
1922 }
e1190b87 1923 minimum_free_regs[i]=HOST_REGS;
57871462 1924}
1925
1926void cop1_alloc(struct regstat *current,int i)
1927{
1928 alloc_reg(current,i,CSREG); // Load status
1929 if(opcode2[i]<3) // MFC1/DMFC1/CFC1
1930 {
7de557a6 1931 if(rt1[i]){
1932 clear_const(current,rt1[i]);
1933 if(opcode2[i]==1) {
1934 alloc_reg64(current,i,rt1[i]); // DMFC1
1935 current->is32&=~(1LL<<rt1[i]);
1936 }else{
1937 alloc_reg(current,i,rt1[i]); // MFC1/CFC1
1938 current->is32|=1LL<<rt1[i];
1939 }
1940 dirty_reg(current,rt1[i]);
57871462 1941 }
57871462 1942 alloc_reg_temp(current,i,-1);
1943 }
1944 else if(opcode2[i]>3) // MTC1/DMTC1/CTC1
1945 {
1946 if(rs1[i]){
1947 clear_const(current,rs1[i]);
1948 if(opcode2[i]==5)
1949 alloc_reg64(current,i,rs1[i]); // DMTC1
1950 else
1951 alloc_reg(current,i,rs1[i]); // MTC1/CTC1
1952 alloc_reg_temp(current,i,-1);
1953 }
1954 else {
1955 current->u&=~1LL;
1956 alloc_reg(current,i,0);
1957 alloc_reg_temp(current,i,-1);
1958 }
1959 }
e1190b87 1960 minimum_free_regs[i]=1;
57871462 1961}
1962void fconv_alloc(struct regstat *current,int i)
1963{
1964 alloc_reg(current,i,CSREG); // Load status
1965 alloc_reg_temp(current,i,-1);
e1190b87 1966 minimum_free_regs[i]=1;
57871462 1967}
1968void float_alloc(struct regstat *current,int i)
1969{
1970 alloc_reg(current,i,CSREG); // Load status
1971 alloc_reg_temp(current,i,-1);
e1190b87 1972 minimum_free_regs[i]=1;
57871462 1973}
b9b61529 1974void c2op_alloc(struct regstat *current,int i)
1975{
1976 alloc_reg_temp(current,i,-1);
1977}
57871462 1978void fcomp_alloc(struct regstat *current,int i)
1979{
1980 alloc_reg(current,i,CSREG); // Load status
1981 alloc_reg(current,i,FSREG); // Load flags
1982 dirty_reg(current,FSREG); // Flag will be modified
1983 alloc_reg_temp(current,i,-1);
e1190b87 1984 minimum_free_regs[i]=1;
57871462 1985}
1986
1987void syscall_alloc(struct regstat *current,int i)
1988{
1989 alloc_cc(current,i);
1990 dirty_reg(current,CCREG);
1991 alloc_all(current,i);
e1190b87 1992 minimum_free_regs[i]=HOST_REGS;
57871462 1993 current->isconst=0;
1994}
1995
1996void delayslot_alloc(struct regstat *current,int i)
1997{
1998 switch(itype[i]) {
1999 case UJUMP:
2000 case CJUMP:
2001 case SJUMP:
2002 case RJUMP:
2003 case FJUMP:
2004 case SYSCALL:
7139f3c8 2005 case HLECALL:
57871462 2006 case SPAN:
2007 assem_debug("jump in the delay slot. this shouldn't happen.\n");//exit(1);
c43b5311 2008 SysPrintf("Disabled speculative precompilation\n");
57871462 2009 stop_after_jal=1;
2010 break;
2011 case IMM16:
2012 imm16_alloc(current,i);
2013 break;
2014 case LOAD:
2015 case LOADLR:
2016 load_alloc(current,i);
2017 break;
2018 case STORE:
2019 case STORELR:
2020 store_alloc(current,i);
2021 break;
2022 case ALU:
2023 alu_alloc(current,i);
2024 break;
2025 case SHIFT:
2026 shift_alloc(current,i);
2027 break;
2028 case MULTDIV:
2029 multdiv_alloc(current,i);
2030 break;
2031 case SHIFTIMM:
2032 shiftimm_alloc(current,i);
2033 break;
2034 case MOV:
2035 mov_alloc(current,i);
2036 break;
2037 case COP0:
2038 cop0_alloc(current,i);
2039 break;
2040 case COP1:
b9b61529 2041 case COP2:
57871462 2042 cop1_alloc(current,i);
2043 break;
2044 case C1LS:
2045 c1ls_alloc(current,i);
2046 break;
b9b61529 2047 case C2LS:
2048 c2ls_alloc(current,i);
2049 break;
57871462 2050 case FCONV:
2051 fconv_alloc(current,i);
2052 break;
2053 case FLOAT:
2054 float_alloc(current,i);
2055 break;
2056 case FCOMP:
2057 fcomp_alloc(current,i);
2058 break;
b9b61529 2059 case C2OP:
2060 c2op_alloc(current,i);
2061 break;
57871462 2062 }
2063}
2064
2065// Special case where a branch and delay slot span two pages in virtual memory
2066static void pagespan_alloc(struct regstat *current,int i)
2067{
2068 current->isconst=0;
2069 current->wasconst=0;
2070 regs[i].wasconst=0;
e1190b87 2071 minimum_free_regs[i]=HOST_REGS;
57871462 2072 alloc_all(current,i);
2073 alloc_cc(current,i);
2074 dirty_reg(current,CCREG);
2075 if(opcode[i]==3) // JAL
2076 {
2077 alloc_reg(current,i,31);
2078 dirty_reg(current,31);
2079 }
2080 if(opcode[i]==0&&(opcode2[i]&0x3E)==8) // JR/JALR
2081 {
2082 alloc_reg(current,i,rs1[i]);
5067f341 2083 if (rt1[i]!=0) {
2084 alloc_reg(current,i,rt1[i]);
2085 dirty_reg(current,rt1[i]);
57871462 2086 }
2087 }
2088 if((opcode[i]&0x2E)==4) // BEQ/BNE/BEQL/BNEL
2089 {
2090 if(rs1[i]) alloc_reg(current,i,rs1[i]);
2091 if(rs2[i]) alloc_reg(current,i,rs2[i]);
2092 if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1))
2093 {
2094 if(rs1[i]) alloc_reg64(current,i,rs1[i]);
2095 if(rs2[i]) alloc_reg64(current,i,rs2[i]);
2096 }
2097 }
2098 else
2099 if((opcode[i]&0x2E)==6) // BLEZ/BGTZ/BLEZL/BGTZL
2100 {
2101 if(rs1[i]) alloc_reg(current,i,rs1[i]);
2102 if(!((current->is32>>rs1[i])&1))
2103 {
2104 if(rs1[i]) alloc_reg64(current,i,rs1[i]);
2105 }
2106 }
2107 else
2108 if(opcode[i]==0x11) // BC1
2109 {
2110 alloc_reg(current,i,FSREG);
2111 alloc_reg(current,i,CSREG);
2112 }
2113 //else ...
2114}
2115
2116add_stub(int type,int addr,int retaddr,int a,int b,int c,int d,int e)
2117{
2118 stubs[stubcount][0]=type;
2119 stubs[stubcount][1]=addr;
2120 stubs[stubcount][2]=retaddr;
2121 stubs[stubcount][3]=a;
2122 stubs[stubcount][4]=b;
2123 stubs[stubcount][5]=c;
2124 stubs[stubcount][6]=d;
2125 stubs[stubcount][7]=e;
2126 stubcount++;
2127}
2128
2129// Write out a single register
2130void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32)
2131{
2132 int hr;
2133 for(hr=0;hr<HOST_REGS;hr++) {
2134 if(hr!=EXCLUDE_REG) {
2135 if((regmap[hr]&63)==r) {
2136 if((dirty>>hr)&1) {
2137 if(regmap[hr]<64) {
2138 emit_storereg(r,hr);
24385cae 2139#ifndef FORCE32
57871462 2140 if((is32>>regmap[hr])&1) {
2141 emit_sarimm(hr,31,hr);
2142 emit_storereg(r|64,hr);
2143 }
24385cae 2144#endif
57871462 2145 }else{
2146 emit_storereg(r|64,hr);
2147 }
2148 }
2149 }
2150 }
2151 }
2152}
2153
2154int mchecksum()
2155{
2156 //if(!tracedebug) return 0;
2157 int i;
2158 int sum=0;
2159 for(i=0;i<2097152;i++) {
2160 unsigned int temp=sum;
2161 sum<<=1;
2162 sum|=(~temp)>>31;
2163 sum^=((u_int *)rdram)[i];
2164 }
2165 return sum;
2166}
2167int rchecksum()
2168{
2169 int i;
2170 int sum=0;
2171 for(i=0;i<64;i++)
2172 sum^=((u_int *)reg)[i];
2173 return sum;
2174}
57871462 2175void rlist()
2176{
2177 int i;
2178 printf("TRACE: ");
2179 for(i=0;i<32;i++)
2180 printf("r%d:%8x%8x ",i,((int *)(reg+i))[1],((int *)(reg+i))[0]);
2181 printf("\n");
3d624f89 2182#ifndef DISABLE_COP1
57871462 2183 printf("TRACE: ");
2184 for(i=0;i<32;i++)
2185 printf("f%d:%8x%8x ",i,((int*)reg_cop1_simple[i])[1],*((int*)reg_cop1_simple[i]));
2186 printf("\n");
3d624f89 2187#endif
57871462 2188}
2189
2190void enabletrace()
2191{
2192 tracedebug=1;
2193}
2194
2195void memdebug(int i)
2196{
2197 //printf("TRACE: count=%d next=%d (checksum %x) lo=%8x%8x\n",Count,next_interupt,mchecksum(),(int)(reg[LOREG]>>32),(int)reg[LOREG]);
2198 //printf("TRACE: count=%d next=%d (rchecksum %x)\n",Count,next_interupt,rchecksum());
2199 //rlist();
2200 //if(tracedebug) {
2201 //if(Count>=-2084597794) {
2202 if((signed int)Count>=-2084597794&&(signed int)Count<0) {
2203 //if(0) {
2204 printf("TRACE: count=%d next=%d (checksum %x)\n",Count,next_interupt,mchecksum());
2205 //printf("TRACE: count=%d next=%d (checksum %x) Status=%x\n",Count,next_interupt,mchecksum(),Status);
2206 //printf("TRACE: count=%d next=%d (checksum %x) hi=%8x%8x\n",Count,next_interupt,mchecksum(),(int)(reg[HIREG]>>32),(int)reg[HIREG]);
2207 rlist();
2208 #ifdef __i386__
2209 printf("TRACE: %x\n",(&i)[-1]);
2210 #endif
2211 #ifdef __arm__
2212 int j;
2213 printf("TRACE: %x \n",(&j)[10]);
2214 printf("TRACE: %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x\n",(&j)[1],(&j)[2],(&j)[3],(&j)[4],(&j)[5],(&j)[6],(&j)[7],(&j)[8],(&j)[9],(&j)[10],(&j)[11],(&j)[12],(&j)[13],(&j)[14],(&j)[15],(&j)[16],(&j)[17],(&j)[18],(&j)[19],(&j)[20]);
2215 #endif
2216 //fflush(stdout);
2217 }
2218 //printf("TRACE: %x\n",(&i)[-1]);
2219}
2220
2221void tlb_debug(u_int cause, u_int addr, u_int iaddr)
2222{
2223 printf("TLB Exception: instruction=%x addr=%x cause=%x\n",iaddr, addr, cause);
2224}
2225
2226void alu_assemble(int i,struct regstat *i_regs)
2227{
2228 if(opcode2[i]>=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU
2229 if(rt1[i]) {
2230 signed char s1,s2,t;
2231 t=get_reg(i_regs->regmap,rt1[i]);
2232 if(t>=0) {
2233 s1=get_reg(i_regs->regmap,rs1[i]);
2234 s2=get_reg(i_regs->regmap,rs2[i]);
2235 if(rs1[i]&&rs2[i]) {
2236 assert(s1>=0);
2237 assert(s2>=0);
2238 if(opcode2[i]&2) emit_sub(s1,s2,t);
2239 else emit_add(s1,s2,t);
2240 }
2241 else if(rs1[i]) {
2242 if(s1>=0) emit_mov(s1,t);
2243 else emit_loadreg(rs1[i],t);
2244 }
2245 else if(rs2[i]) {
2246 if(s2>=0) {
2247 if(opcode2[i]&2) emit_neg(s2,t);
2248 else emit_mov(s2,t);
2249 }
2250 else {
2251 emit_loadreg(rs2[i],t);
2252 if(opcode2[i]&2) emit_neg(t,t);
2253 }
2254 }
2255 else emit_zeroreg(t);
2256 }
2257 }
2258 }
2259 if(opcode2[i]>=0x2c&&opcode2[i]<=0x2f) { // DADD/DADDU/DSUB/DSUBU
2260 if(rt1[i]) {
2261 signed char s1l,s2l,s1h,s2h,tl,th;
2262 tl=get_reg(i_regs->regmap,rt1[i]);
2263 th=get_reg(i_regs->regmap,rt1[i]|64);
2264 if(tl>=0) {
2265 s1l=get_reg(i_regs->regmap,rs1[i]);
2266 s2l=get_reg(i_regs->regmap,rs2[i]);
2267 s1h=get_reg(i_regs->regmap,rs1[i]|64);
2268 s2h=get_reg(i_regs->regmap,rs2[i]|64);
2269 if(rs1[i]&&rs2[i]) {
2270 assert(s1l>=0);
2271 assert(s2l>=0);
2272 if(opcode2[i]&2) emit_subs(s1l,s2l,tl);
2273 else emit_adds(s1l,s2l,tl);
2274 if(th>=0) {
2275 #ifdef INVERTED_CARRY
2276 if(opcode2[i]&2) {if(s1h!=th) emit_mov(s1h,th);emit_sbb(th,s2h);}
2277 #else
2278 if(opcode2[i]&2) emit_sbc(s1h,s2h,th);
2279 #endif
2280 else emit_add(s1h,s2h,th);
2281 }
2282 }
2283 else if(rs1[i]) {
2284 if(s1l>=0) emit_mov(s1l,tl);
2285 else emit_loadreg(rs1[i],tl);
2286 if(th>=0) {
2287 if(s1h>=0) emit_mov(s1h,th);
2288 else emit_loadreg(rs1[i]|64,th);
2289 }
2290 }
2291 else if(rs2[i]) {
2292 if(s2l>=0) {
2293 if(opcode2[i]&2) emit_negs(s2l,tl);
2294 else emit_mov(s2l,tl);
2295 }
2296 else {
2297 emit_loadreg(rs2[i],tl);
2298 if(opcode2[i]&2) emit_negs(tl,tl);
2299 }
2300 if(th>=0) {
2301 #ifdef INVERTED_CARRY
2302 if(s2h>=0) emit_mov(s2h,th);
2303 else emit_loadreg(rs2[i]|64,th);
2304 if(opcode2[i]&2) {
2305 emit_adcimm(-1,th); // x86 has inverted carry flag
2306 emit_not(th,th);
2307 }
2308 #else
2309 if(opcode2[i]&2) {
2310 if(s2h>=0) emit_rscimm(s2h,0,th);
2311 else {
2312 emit_loadreg(rs2[i]|64,th);
2313 emit_rscimm(th,0,th);
2314 }
2315 }else{
2316 if(s2h>=0) emit_mov(s2h,th);
2317 else emit_loadreg(rs2[i]|64,th);
2318 }
2319 #endif
2320 }
2321 }
2322 else {
2323 emit_zeroreg(tl);
2324 if(th>=0) emit_zeroreg(th);
2325 }
2326 }
2327 }
2328 }
2329 if(opcode2[i]==0x2a||opcode2[i]==0x2b) { // SLT/SLTU
2330 if(rt1[i]) {
2331 signed char s1l,s1h,s2l,s2h,t;
2332 if(!((i_regs->was32>>rs1[i])&(i_regs->was32>>rs2[i])&1))
2333 {
2334 t=get_reg(i_regs->regmap,rt1[i]);
2335 //assert(t>=0);
2336 if(t>=0) {
2337 s1l=get_reg(i_regs->regmap,rs1[i]);
2338 s1h=get_reg(i_regs->regmap,rs1[i]|64);
2339 s2l=get_reg(i_regs->regmap,rs2[i]);
2340 s2h=get_reg(i_regs->regmap,rs2[i]|64);
2341 if(rs2[i]==0) // rx<r0
2342 {
2343 assert(s1h>=0);
2344 if(opcode2[i]==0x2a) // SLT
2345 emit_shrimm(s1h,31,t);
2346 else // SLTU (unsigned can not be less than zero)
2347 emit_zeroreg(t);
2348 }
2349 else if(rs1[i]==0) // r0<rx
2350 {
2351 assert(s2h>=0);
2352 if(opcode2[i]==0x2a) // SLT
2353 emit_set_gz64_32(s2h,s2l,t);
2354 else // SLTU (set if not zero)
2355 emit_set_nz64_32(s2h,s2l,t);
2356 }
2357 else {
2358 assert(s1l>=0);assert(s1h>=0);
2359 assert(s2l>=0);assert(s2h>=0);
2360 if(opcode2[i]==0x2a) // SLT
2361 emit_set_if_less64_32(s1h,s1l,s2h,s2l,t);
2362 else // SLTU
2363 emit_set_if_carry64_32(s1h,s1l,s2h,s2l,t);
2364 }
2365 }
2366 } else {
2367 t=get_reg(i_regs->regmap,rt1[i]);
2368 //assert(t>=0);
2369 if(t>=0) {
2370 s1l=get_reg(i_regs->regmap,rs1[i]);
2371 s2l=get_reg(i_regs->regmap,rs2[i]);
2372 if(rs2[i]==0) // rx<r0
2373 {
2374 assert(s1l>=0);
2375 if(opcode2[i]==0x2a) // SLT
2376 emit_shrimm(s1l,31,t);
2377 else // SLTU (unsigned can not be less than zero)
2378 emit_zeroreg(t);
2379 }
2380 else if(rs1[i]==0) // r0<rx
2381 {
2382 assert(s2l>=0);
2383 if(opcode2[i]==0x2a) // SLT
2384 emit_set_gz32(s2l,t);
2385 else // SLTU (set if not zero)
2386 emit_set_nz32(s2l,t);
2387 }
2388 else{
2389 assert(s1l>=0);assert(s2l>=0);
2390 if(opcode2[i]==0x2a) // SLT
2391 emit_set_if_less32(s1l,s2l,t);
2392 else // SLTU
2393 emit_set_if_carry32(s1l,s2l,t);
2394 }
2395 }
2396 }
2397 }
2398 }
2399 if(opcode2[i]>=0x24&&opcode2[i]<=0x27) { // AND/OR/XOR/NOR
2400 if(rt1[i]) {
2401 signed char s1l,s1h,s2l,s2h,th,tl;
2402 tl=get_reg(i_regs->regmap,rt1[i]);
2403 th=get_reg(i_regs->regmap,rt1[i]|64);
2404 if(!((i_regs->was32>>rs1[i])&(i_regs->was32>>rs2[i])&1)&&th>=0)
2405 {
2406 assert(tl>=0);
2407 if(tl>=0) {
2408 s1l=get_reg(i_regs->regmap,rs1[i]);
2409 s1h=get_reg(i_regs->regmap,rs1[i]|64);
2410 s2l=get_reg(i_regs->regmap,rs2[i]);
2411 s2h=get_reg(i_regs->regmap,rs2[i]|64);
2412 if(rs1[i]&&rs2[i]) {
2413 assert(s1l>=0);assert(s1h>=0);
2414 assert(s2l>=0);assert(s2h>=0);
2415 if(opcode2[i]==0x24) { // AND
2416 emit_and(s1l,s2l,tl);
2417 emit_and(s1h,s2h,th);
2418 } else
2419 if(opcode2[i]==0x25) { // OR
2420 emit_or(s1l,s2l,tl);
2421 emit_or(s1h,s2h,th);
2422 } else
2423 if(opcode2[i]==0x26) { // XOR
2424 emit_xor(s1l,s2l,tl);
2425 emit_xor(s1h,s2h,th);
2426 } else
2427 if(opcode2[i]==0x27) { // NOR
2428 emit_or(s1l,s2l,tl);
2429 emit_or(s1h,s2h,th);
2430 emit_not(tl,tl);
2431 emit_not(th,th);
2432 }
2433 }
2434 else
2435 {
2436 if(opcode2[i]==0x24) { // AND
2437 emit_zeroreg(tl);
2438 emit_zeroreg(th);
2439 } else
2440 if(opcode2[i]==0x25||opcode2[i]==0x26) { // OR/XOR
2441 if(rs1[i]){
2442 if(s1l>=0) emit_mov(s1l,tl);
2443 else emit_loadreg(rs1[i],tl);
2444 if(s1h>=0) emit_mov(s1h,th);
2445 else emit_loadreg(rs1[i]|64,th);
2446 }
2447 else
2448 if(rs2[i]){
2449 if(s2l>=0) emit_mov(s2l,tl);
2450 else emit_loadreg(rs2[i],tl);
2451 if(s2h>=0) emit_mov(s2h,th);
2452 else emit_loadreg(rs2[i]|64,th);
2453 }
2454 else{
2455 emit_zeroreg(tl);
2456 emit_zeroreg(th);
2457 }
2458 } else
2459 if(opcode2[i]==0x27) { // NOR
2460 if(rs1[i]){
2461 if(s1l>=0) emit_not(s1l,tl);
2462 else{
2463 emit_loadreg(rs1[i],tl);
2464 emit_not(tl,tl);
2465 }
2466 if(s1h>=0) emit_not(s1h,th);
2467 else{
2468 emit_loadreg(rs1[i]|64,th);
2469 emit_not(th,th);
2470 }
2471 }
2472 else
2473 if(rs2[i]){
2474 if(s2l>=0) emit_not(s2l,tl);
2475 else{
2476 emit_loadreg(rs2[i],tl);
2477 emit_not(tl,tl);
2478 }
2479 if(s2h>=0) emit_not(s2h,th);
2480 else{
2481 emit_loadreg(rs2[i]|64,th);
2482 emit_not(th,th);
2483 }
2484 }
2485 else {
2486 emit_movimm(-1,tl);
2487 emit_movimm(-1,th);
2488 }
2489 }
2490 }
2491 }
2492 }
2493 else
2494 {
2495 // 32 bit
2496 if(tl>=0) {
2497 s1l=get_reg(i_regs->regmap,rs1[i]);
2498 s2l=get_reg(i_regs->regmap,rs2[i]);
2499 if(rs1[i]&&rs2[i]) {
2500 assert(s1l>=0);
2501 assert(s2l>=0);
2502 if(opcode2[i]==0x24) { // AND
2503 emit_and(s1l,s2l,tl);
2504 } else
2505 if(opcode2[i]==0x25) { // OR
2506 emit_or(s1l,s2l,tl);
2507 } else
2508 if(opcode2[i]==0x26) { // XOR
2509 emit_xor(s1l,s2l,tl);
2510 } else
2511 if(opcode2[i]==0x27) { // NOR
2512 emit_or(s1l,s2l,tl);
2513 emit_not(tl,tl);
2514 }
2515 }
2516 else
2517 {
2518 if(opcode2[i]==0x24) { // AND
2519 emit_zeroreg(tl);
2520 } else
2521 if(opcode2[i]==0x25||opcode2[i]==0x26) { // OR/XOR
2522 if(rs1[i]){
2523 if(s1l>=0) emit_mov(s1l,tl);
2524 else emit_loadreg(rs1[i],tl); // CHECK: regmap_entry?
2525 }
2526 else
2527 if(rs2[i]){
2528 if(s2l>=0) emit_mov(s2l,tl);
2529 else emit_loadreg(rs2[i],tl); // CHECK: regmap_entry?
2530 }
2531 else emit_zeroreg(tl);
2532 } else
2533 if(opcode2[i]==0x27) { // NOR
2534 if(rs1[i]){
2535 if(s1l>=0) emit_not(s1l,tl);
2536 else {
2537 emit_loadreg(rs1[i],tl);
2538 emit_not(tl,tl);
2539 }
2540 }
2541 else
2542 if(rs2[i]){
2543 if(s2l>=0) emit_not(s2l,tl);
2544 else {
2545 emit_loadreg(rs2[i],tl);
2546 emit_not(tl,tl);
2547 }
2548 }
2549 else emit_movimm(-1,tl);
2550 }
2551 }
2552 }
2553 }
2554 }
2555 }
2556}
2557
2558void imm16_assemble(int i,struct regstat *i_regs)
2559{
2560 if (opcode[i]==0x0f) { // LUI
2561 if(rt1[i]) {
2562 signed char t;
2563 t=get_reg(i_regs->regmap,rt1[i]);
2564 //assert(t>=0);
2565 if(t>=0) {
2566 if(!((i_regs->isconst>>t)&1))
2567 emit_movimm(imm[i]<<16,t);
2568 }
2569 }
2570 }
2571 if(opcode[i]==0x08||opcode[i]==0x09) { // ADDI/ADDIU
2572 if(rt1[i]) {
2573 signed char s,t;
2574 t=get_reg(i_regs->regmap,rt1[i]);
2575 s=get_reg(i_regs->regmap,rs1[i]);
2576 if(rs1[i]) {
2577 //assert(t>=0);
2578 //assert(s>=0);
2579 if(t>=0) {
2580 if(!((i_regs->isconst>>t)&1)) {
2581 if(s<0) {
2582 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2583 emit_addimm(t,imm[i],t);
2584 }else{
2585 if(!((i_regs->wasconst>>s)&1))
2586 emit_addimm(s,imm[i],t);
2587 else
2588 emit_movimm(constmap[i][s]+imm[i],t);
2589 }
2590 }
2591 }
2592 } else {
2593 if(t>=0) {
2594 if(!((i_regs->isconst>>t)&1))
2595 emit_movimm(imm[i],t);
2596 }
2597 }
2598 }
2599 }
2600 if(opcode[i]==0x18||opcode[i]==0x19) { // DADDI/DADDIU
2601 if(rt1[i]) {
2602 signed char sh,sl,th,tl;
2603 th=get_reg(i_regs->regmap,rt1[i]|64);
2604 tl=get_reg(i_regs->regmap,rt1[i]);
2605 sh=get_reg(i_regs->regmap,rs1[i]|64);
2606 sl=get_reg(i_regs->regmap,rs1[i]);
2607 if(tl>=0) {
2608 if(rs1[i]) {
2609 assert(sh>=0);
2610 assert(sl>=0);
2611 if(th>=0) {
2612 emit_addimm64_32(sh,sl,imm[i],th,tl);
2613 }
2614 else {
2615 emit_addimm(sl,imm[i],tl);
2616 }
2617 } else {
2618 emit_movimm(imm[i],tl);
2619 if(th>=0) emit_movimm(((signed int)imm[i])>>31,th);
2620 }
2621 }
2622 }
2623 }
2624 else if(opcode[i]==0x0a||opcode[i]==0x0b) { // SLTI/SLTIU
2625 if(rt1[i]) {
2626 //assert(rs1[i]!=0); // r0 might be valid, but it's probably a bug
2627 signed char sh,sl,t;
2628 t=get_reg(i_regs->regmap,rt1[i]);
2629 sh=get_reg(i_regs->regmap,rs1[i]|64);
2630 sl=get_reg(i_regs->regmap,rs1[i]);
2631 //assert(t>=0);
2632 if(t>=0) {
2633 if(rs1[i]>0) {
2634 if(sh<0) assert((i_regs->was32>>rs1[i])&1);
2635 if(sh<0||((i_regs->was32>>rs1[i])&1)) {
2636 if(opcode[i]==0x0a) { // SLTI
2637 if(sl<0) {
2638 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2639 emit_slti32(t,imm[i],t);
2640 }else{
2641 emit_slti32(sl,imm[i],t);
2642 }
2643 }
2644 else { // SLTIU
2645 if(sl<0) {
2646 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2647 emit_sltiu32(t,imm[i],t);
2648 }else{
2649 emit_sltiu32(sl,imm[i],t);
2650 }
2651 }
2652 }else{ // 64-bit
2653 assert(sl>=0);
2654 if(opcode[i]==0x0a) // SLTI
2655 emit_slti64_32(sh,sl,imm[i],t);
2656 else // SLTIU
2657 emit_sltiu64_32(sh,sl,imm[i],t);
2658 }
2659 }else{
2660 // SLTI(U) with r0 is just stupid,
2661 // nonetheless examples can be found
2662 if(opcode[i]==0x0a) // SLTI
2663 if(0<imm[i]) emit_movimm(1,t);
2664 else emit_zeroreg(t);
2665 else // SLTIU
2666 {
2667 if(imm[i]) emit_movimm(1,t);
2668 else emit_zeroreg(t);
2669 }
2670 }
2671 }
2672 }
2673 }
2674 else if(opcode[i]>=0x0c&&opcode[i]<=0x0e) { // ANDI/ORI/XORI
2675 if(rt1[i]) {
2676 signed char sh,sl,th,tl;
2677 th=get_reg(i_regs->regmap,rt1[i]|64);
2678 tl=get_reg(i_regs->regmap,rt1[i]);
2679 sh=get_reg(i_regs->regmap,rs1[i]|64);
2680 sl=get_reg(i_regs->regmap,rs1[i]);
2681 if(tl>=0 && !((i_regs->isconst>>tl)&1)) {
2682 if(opcode[i]==0x0c) //ANDI
2683 {
2684 if(rs1[i]) {
2685 if(sl<0) {
2686 if(i_regs->regmap_entry[tl]!=rs1[i]) emit_loadreg(rs1[i],tl);
2687 emit_andimm(tl,imm[i],tl);
2688 }else{
2689 if(!((i_regs->wasconst>>sl)&1))
2690 emit_andimm(sl,imm[i],tl);
2691 else
2692 emit_movimm(constmap[i][sl]&imm[i],tl);
2693 }
2694 }
2695 else
2696 emit_zeroreg(tl);
2697 if(th>=0) emit_zeroreg(th);
2698 }
2699 else
2700 {
2701 if(rs1[i]) {
2702 if(sl<0) {
2703 if(i_regs->regmap_entry[tl]!=rs1[i]) emit_loadreg(rs1[i],tl);
2704 }
2705 if(th>=0) {
2706 if(sh<0) {
2707 emit_loadreg(rs1[i]|64,th);
2708 }else{
2709 emit_mov(sh,th);
2710 }
2711 }
2712 if(opcode[i]==0x0d) //ORI
2713 if(sl<0) {
2714 emit_orimm(tl,imm[i],tl);
2715 }else{
2716 if(!((i_regs->wasconst>>sl)&1))
2717 emit_orimm(sl,imm[i],tl);
2718 else
2719 emit_movimm(constmap[i][sl]|imm[i],tl);
2720 }
2721 if(opcode[i]==0x0e) //XORI
2722 if(sl<0) {
2723 emit_xorimm(tl,imm[i],tl);
2724 }else{
2725 if(!((i_regs->wasconst>>sl)&1))
2726 emit_xorimm(sl,imm[i],tl);
2727 else
2728 emit_movimm(constmap[i][sl]^imm[i],tl);
2729 }
2730 }
2731 else {
2732 emit_movimm(imm[i],tl);
2733 if(th>=0) emit_zeroreg(th);
2734 }
2735 }
2736 }
2737 }
2738 }
2739}
2740
2741void shiftimm_assemble(int i,struct regstat *i_regs)
2742{
2743 if(opcode2[i]<=0x3) // SLL/SRL/SRA
2744 {
2745 if(rt1[i]) {
2746 signed char s,t;
2747 t=get_reg(i_regs->regmap,rt1[i]);
2748 s=get_reg(i_regs->regmap,rs1[i]);
2749 //assert(t>=0);
dc49e339 2750 if(t>=0&&!((i_regs->isconst>>t)&1)){
57871462 2751 if(rs1[i]==0)
2752 {
2753 emit_zeroreg(t);
2754 }
2755 else
2756 {
2757 if(s<0&&i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2758 if(imm[i]) {
2759 if(opcode2[i]==0) // SLL
2760 {
2761 emit_shlimm(s<0?t:s,imm[i],t);
2762 }
2763 if(opcode2[i]==2) // SRL
2764 {
2765 emit_shrimm(s<0?t:s,imm[i],t);
2766 }
2767 if(opcode2[i]==3) // SRA
2768 {
2769 emit_sarimm(s<0?t:s,imm[i],t);
2770 }
2771 }else{
2772 // Shift by zero
2773 if(s>=0 && s!=t) emit_mov(s,t);
2774 }
2775 }
2776 }
2777 //emit_storereg(rt1[i],t); //DEBUG
2778 }
2779 }
2780 if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA
2781 {
2782 if(rt1[i]) {
2783 signed char sh,sl,th,tl;
2784 th=get_reg(i_regs->regmap,rt1[i]|64);
2785 tl=get_reg(i_regs->regmap,rt1[i]);
2786 sh=get_reg(i_regs->regmap,rs1[i]|64);
2787 sl=get_reg(i_regs->regmap,rs1[i]);
2788 if(tl>=0) {
2789 if(rs1[i]==0)
2790 {
2791 emit_zeroreg(tl);
2792 if(th>=0) emit_zeroreg(th);
2793 }
2794 else
2795 {
2796 assert(sl>=0);
2797 assert(sh>=0);
2798 if(imm[i]) {
2799 if(opcode2[i]==0x38) // DSLL
2800 {
2801 if(th>=0) emit_shldimm(sh,sl,imm[i],th);
2802 emit_shlimm(sl,imm[i],tl);
2803 }
2804 if(opcode2[i]==0x3a) // DSRL
2805 {
2806 emit_shrdimm(sl,sh,imm[i],tl);
2807 if(th>=0) emit_shrimm(sh,imm[i],th);
2808 }
2809 if(opcode2[i]==0x3b) // DSRA
2810 {
2811 emit_shrdimm(sl,sh,imm[i],tl);
2812 if(th>=0) emit_sarimm(sh,imm[i],th);
2813 }
2814 }else{
2815 // Shift by zero
2816 if(sl!=tl) emit_mov(sl,tl);
2817 if(th>=0&&sh!=th) emit_mov(sh,th);
2818 }
2819 }
2820 }
2821 }
2822 }
2823 if(opcode2[i]==0x3c) // DSLL32
2824 {
2825 if(rt1[i]) {
2826 signed char sl,tl,th;
2827 tl=get_reg(i_regs->regmap,rt1[i]);
2828 th=get_reg(i_regs->regmap,rt1[i]|64);
2829 sl=get_reg(i_regs->regmap,rs1[i]);
2830 if(th>=0||tl>=0){
2831 assert(tl>=0);
2832 assert(th>=0);
2833 assert(sl>=0);
2834 emit_mov(sl,th);
2835 emit_zeroreg(tl);
2836 if(imm[i]>32)
2837 {
2838 emit_shlimm(th,imm[i]&31,th);
2839 }
2840 }
2841 }
2842 }
2843 if(opcode2[i]==0x3e) // DSRL32
2844 {
2845 if(rt1[i]) {
2846 signed char sh,tl,th;
2847 tl=get_reg(i_regs->regmap,rt1[i]);
2848 th=get_reg(i_regs->regmap,rt1[i]|64);
2849 sh=get_reg(i_regs->regmap,rs1[i]|64);
2850 if(tl>=0){
2851 assert(sh>=0);
2852 emit_mov(sh,tl);
2853 if(th>=0) emit_zeroreg(th);
2854 if(imm[i]>32)
2855 {
2856 emit_shrimm(tl,imm[i]&31,tl);
2857 }
2858 }
2859 }
2860 }
2861 if(opcode2[i]==0x3f) // DSRA32
2862 {
2863 if(rt1[i]) {
2864 signed char sh,tl;
2865 tl=get_reg(i_regs->regmap,rt1[i]);
2866 sh=get_reg(i_regs->regmap,rs1[i]|64);
2867 if(tl>=0){
2868 assert(sh>=0);
2869 emit_mov(sh,tl);
2870 if(imm[i]>32)
2871 {
2872 emit_sarimm(tl,imm[i]&31,tl);
2873 }
2874 }
2875 }
2876 }
2877}
2878
2879#ifndef shift_assemble
2880void shift_assemble(int i,struct regstat *i_regs)
2881{
2882 printf("Need shift_assemble for this architecture.\n");
2883 exit(1);
2884}
2885#endif
2886
2887void load_assemble(int i,struct regstat *i_regs)
2888{
2889 int s,th,tl,addr,map=-1;
2890 int offset;
2891 int jaddr=0;
5bf843dc 2892 int memtarget=0,c=0;
b1570849 2893 int fastload_reg_override=0;
57871462 2894 u_int hr,reglist=0;
2895 th=get_reg(i_regs->regmap,rt1[i]|64);
2896 tl=get_reg(i_regs->regmap,rt1[i]);
2897 s=get_reg(i_regs->regmap,rs1[i]);
2898 offset=imm[i];
2899 for(hr=0;hr<HOST_REGS;hr++) {
2900 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2901 }
2902 if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
2903 if(s>=0) {
2904 c=(i_regs->wasconst>>s)&1;
af4ee1fe 2905 if (c) {
2906 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
2907 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
2908 }
57871462 2909 }
57871462 2910 //printf("load_assemble: c=%d\n",c);
2911 //if(c) printf("load_assemble: const=%x\n",(int)constmap[i][s]+offset);
2912 // FIXME: Even if the load is a NOP, we should check for pagefaults...
5bf843dc 2913#ifdef PCSX
f18c0f46 2914 if(tl<0&&(!c||(((u_int)constmap[i][s]+offset)>>16)==0x1f80)
2915 ||rt1[i]==0) {
5bf843dc 2916 // could be FIFO, must perform the read
f18c0f46 2917 // ||dummy read
5bf843dc 2918 assem_debug("(forced read)\n");
2919 tl=get_reg(i_regs->regmap,-1);
2920 assert(tl>=0);
5bf843dc 2921 }
f18c0f46 2922#endif
5bf843dc 2923 if(offset||s<0||c) addr=tl;
2924 else addr=s;
535d208a 2925 //if(tl<0) tl=get_reg(i_regs->regmap,-1);
2926 if(tl>=0) {
2927 //printf("load_assemble: c=%d\n",c);
2928 //if(c) printf("load_assemble: const=%x\n",(int)constmap[i][s]+offset);
2929 assert(tl>=0); // Even if the load is a NOP, we must check for pagefaults and I/O
2930 reglist&=~(1<<tl);
2931 if(th>=0) reglist&=~(1<<th);
2932 if(!using_tlb) {
2933 if(!c) {
2934 #ifdef RAM_OFFSET
2935 map=get_reg(i_regs->regmap,ROREG);
2936 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
2937 #endif
57871462 2938//#define R29_HACK 1
535d208a 2939 #ifdef R29_HACK
2940 // Strmnnrmn's speed hack
2941 if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
2942 #endif
2943 {
ffb0b9e0 2944 jaddr=emit_fastpath_cmp_jump(i,addr,&fastload_reg_override);
57871462 2945 }
535d208a 2946 }
a327ad27 2947 else if(ram_offset&&memtarget) {
2948 emit_addimm(addr,ram_offset,HOST_TEMPREG);
2949 fastload_reg_override=HOST_TEMPREG;
2950 }
535d208a 2951 }else{ // using tlb
2952 int x=0;
2953 if (opcode[i]==0x20||opcode[i]==0x24) x=3; // LB/LBU
2954 if (opcode[i]==0x21||opcode[i]==0x25) x=2; // LH/LHU
2955 map=get_reg(i_regs->regmap,TLREG);
2956 assert(map>=0);
ea3d2e6e 2957 reglist&=~(1<<map);
535d208a 2958 map=do_tlb_r(addr,tl,map,x,-1,-1,c,constmap[i][s]+offset);
2959 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
2960 }
2961 int dummy=(rt1[i]==0)||(tl!=get_reg(i_regs->regmap,rt1[i])); // ignore loads to r0 and unneeded reg
2962 if (opcode[i]==0x20) { // LB
2963 if(!c||memtarget) {
2964 if(!dummy) {
57871462 2965 #ifdef HOST_IMM_ADDR32
2966 if(c)
2967 emit_movsbl_tlb((constmap[i][s]+offset)^3,map,tl);
2968 else
2969 #endif
2970 {
2971 //emit_xorimm(addr,3,tl);
2972 //gen_tlb_addr_r(tl,map);
2973 //emit_movsbl_indexed((int)rdram-0x80000000,tl,tl);
535d208a 2974 int x=0,a=tl;
2002a1db 2975#ifdef BIG_ENDIAN_MIPS
57871462 2976 if(!c) emit_xorimm(addr,3,tl);
2977 else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
2002a1db 2978#else
535d208a 2979 if(!c) a=addr;
dadf55f2 2980#endif
b1570849 2981 if(fastload_reg_override) a=fastload_reg_override;
2982
535d208a 2983 emit_movsbl_indexed_tlb(x,a,map,tl);
57871462 2984 }
57871462 2985 }
535d208a 2986 if(jaddr)
2987 add_stub(LOADB_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
57871462 2988 }
535d208a 2989 else
2990 inline_readstub(LOADB_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2991 }
2992 if (opcode[i]==0x21) { // LH
2993 if(!c||memtarget) {
2994 if(!dummy) {
57871462 2995 #ifdef HOST_IMM_ADDR32
2996 if(c)
2997 emit_movswl_tlb((constmap[i][s]+offset)^2,map,tl);
2998 else
2999 #endif
3000 {
535d208a 3001 int x=0,a=tl;
2002a1db 3002#ifdef BIG_ENDIAN_MIPS
57871462 3003 if(!c) emit_xorimm(addr,2,tl);
3004 else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
2002a1db 3005#else
535d208a 3006 if(!c) a=addr;
dadf55f2 3007#endif
b1570849 3008 if(fastload_reg_override) a=fastload_reg_override;
57871462 3009 //#ifdef
3010 //emit_movswl_indexed_tlb(x,tl,map,tl);
3011 //else
3012 if(map>=0) {
535d208a 3013 gen_tlb_addr_r(a,map);
3014 emit_movswl_indexed(x,a,tl);
3015 }else{
a327ad27 3016 #if 1 //def RAM_OFFSET
535d208a 3017 emit_movswl_indexed(x,a,tl);
3018 #else
3019 emit_movswl_indexed((int)rdram-0x80000000+x,a,tl);
3020 #endif
3021 }
57871462 3022 }
57871462 3023 }
535d208a 3024 if(jaddr)
3025 add_stub(LOADH_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
57871462 3026 }
535d208a 3027 else
3028 inline_readstub(LOADH_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
3029 }
3030 if (opcode[i]==0x23) { // LW
3031 if(!c||memtarget) {
3032 if(!dummy) {
dadf55f2 3033 int a=addr;
b1570849 3034 if(fastload_reg_override) a=fastload_reg_override;
57871462 3035 //emit_readword_indexed((int)rdram-0x80000000,addr,tl);
3036 #ifdef HOST_IMM_ADDR32
3037 if(c)
3038 emit_readword_tlb(constmap[i][s]+offset,map,tl);
3039 else
3040 #endif
dadf55f2 3041 emit_readword_indexed_tlb(0,a,map,tl);
57871462 3042 }
535d208a 3043 if(jaddr)
3044 add_stub(LOADW_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
57871462 3045 }
535d208a 3046 else
3047 inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
3048 }
3049 if (opcode[i]==0x24) { // LBU
3050 if(!c||memtarget) {
3051 if(!dummy) {
57871462 3052 #ifdef HOST_IMM_ADDR32
3053 if(c)
3054 emit_movzbl_tlb((constmap[i][s]+offset)^3,map,tl);
3055 else
3056 #endif
3057 {
3058 //emit_xorimm(addr,3,tl);
3059 //gen_tlb_addr_r(tl,map);
3060 //emit_movzbl_indexed((int)rdram-0x80000000,tl,tl);
535d208a 3061 int x=0,a=tl;
2002a1db 3062#ifdef BIG_ENDIAN_MIPS
57871462 3063 if(!c) emit_xorimm(addr,3,tl);
3064 else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
2002a1db 3065#else
535d208a 3066 if(!c) a=addr;
dadf55f2 3067#endif
b1570849 3068 if(fastload_reg_override) a=fastload_reg_override;
3069
535d208a 3070 emit_movzbl_indexed_tlb(x,a,map,tl);
57871462 3071 }
57871462 3072 }
535d208a 3073 if(jaddr)
3074 add_stub(LOADBU_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
57871462 3075 }
535d208a 3076 else
3077 inline_readstub(LOADBU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
3078 }
3079 if (opcode[i]==0x25) { // LHU
3080 if(!c||memtarget) {
3081 if(!dummy) {
57871462 3082 #ifdef HOST_IMM_ADDR32
3083 if(c)
3084 emit_movzwl_tlb((constmap[i][s]+offset)^2,map,tl);
3085 else
3086 #endif
3087 {
535d208a 3088 int x=0,a=tl;
2002a1db 3089#ifdef BIG_ENDIAN_MIPS
57871462 3090 if(!c) emit_xorimm(addr,2,tl);
3091 else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
2002a1db 3092#else
535d208a 3093 if(!c) a=addr;
dadf55f2 3094#endif
b1570849 3095 if(fastload_reg_override) a=fastload_reg_override;
57871462 3096 //#ifdef
3097 //emit_movzwl_indexed_tlb(x,tl,map,tl);
3098 //#else
3099 if(map>=0) {
535d208a 3100 gen_tlb_addr_r(a,map);
3101 emit_movzwl_indexed(x,a,tl);
3102 }else{
a327ad27 3103 #if 1 //def RAM_OFFSET
535d208a 3104 emit_movzwl_indexed(x,a,tl);
3105 #else
3106 emit_movzwl_indexed((int)rdram-0x80000000+x,a,tl);
3107 #endif
3108 }
57871462 3109 }
3110 }
535d208a 3111 if(jaddr)
3112 add_stub(LOADHU_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
57871462 3113 }
535d208a 3114 else
3115 inline_readstub(LOADHU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
3116 }
3117 if (opcode[i]==0x27) { // LWU
3118 assert(th>=0);
3119 if(!c||memtarget) {
3120 if(!dummy) {
dadf55f2 3121 int a=addr;
b1570849 3122 if(fastload_reg_override) a=fastload_reg_override;
57871462 3123 //emit_readword_indexed((int)rdram-0x80000000,addr,tl);
3124 #ifdef HOST_IMM_ADDR32
3125 if(c)
3126 emit_readword_tlb(constmap[i][s]+offset,map,tl);
3127 else
3128 #endif
dadf55f2 3129 emit_readword_indexed_tlb(0,a,map,tl);
57871462 3130 }
535d208a 3131 if(jaddr)
3132 add_stub(LOADW_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
3133 }
3134 else {
3135 inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
57871462 3136 }
535d208a 3137 emit_zeroreg(th);
3138 }
3139 if (opcode[i]==0x37) { // LD
3140 if(!c||memtarget) {
3141 if(!dummy) {
dadf55f2 3142 int a=addr;
b1570849 3143 if(fastload_reg_override) a=fastload_reg_override;
57871462 3144 //gen_tlb_addr_r(tl,map);
3145 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,addr,th);
3146 //emit_readword_indexed((int)rdram-0x7FFFFFFC,addr,tl);
3147 #ifdef HOST_IMM_ADDR32
3148 if(c)
3149 emit_readdword_tlb(constmap[i][s]+offset,map,th,tl);
3150 else
3151 #endif
dadf55f2 3152 emit_readdword_indexed_tlb(0,a,map,th,tl);
57871462 3153 }
535d208a 3154 if(jaddr)
3155 add_stub(LOADD_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
57871462 3156 }
535d208a 3157 else
3158 inline_readstub(LOADD_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
57871462 3159 }
535d208a 3160 }
3161 //emit_storereg(rt1[i],tl); // DEBUG
57871462 3162 //if(opcode[i]==0x23)
3163 //if(opcode[i]==0x24)
3164 //if(opcode[i]==0x23||opcode[i]==0x24)
3165 /*if(opcode[i]==0x21||opcode[i]==0x23||opcode[i]==0x24)
3166 {
3167 //emit_pusha();
3168 save_regs(0x100f);
3169 emit_readword((int)&last_count,ECX);
3170 #ifdef __i386__
3171 if(get_reg(i_regs->regmap,CCREG)<0)
3172 emit_loadreg(CCREG,HOST_CCREG);
3173 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3174 emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG);
3175 emit_writeword(HOST_CCREG,(int)&Count);
3176 #endif
3177 #ifdef __arm__
3178 if(get_reg(i_regs->regmap,CCREG)<0)
3179 emit_loadreg(CCREG,0);
3180 else
3181 emit_mov(HOST_CCREG,0);
3182 emit_add(0,ECX,0);
3183 emit_addimm(0,2*ccadj[i],0);
3184 emit_writeword(0,(int)&Count);
3185 #endif
3186 emit_call((int)memdebug);
3187 //emit_popa();
3188 restore_regs(0x100f);
3189 }/**/
3190}
3191
3192#ifndef loadlr_assemble
3193void loadlr_assemble(int i,struct regstat *i_regs)
3194{
3195 printf("Need loadlr_assemble for this architecture.\n");
3196 exit(1);
3197}
3198#endif
3199
3200void store_assemble(int i,struct regstat *i_regs)
3201{
3202 int s,th,tl,map=-1;
3203 int addr,temp;
3204 int offset;
3205 int jaddr=0,jaddr2,type;
666a299d 3206 int memtarget=0,c=0;
57871462 3207 int agr=AGEN1+(i&1);
b1570849 3208 int faststore_reg_override=0;
57871462 3209 u_int hr,reglist=0;
3210 th=get_reg(i_regs->regmap,rs2[i]|64);
3211 tl=get_reg(i_regs->regmap,rs2[i]);
3212 s=get_reg(i_regs->regmap,rs1[i]);
3213 temp=get_reg(i_regs->regmap,agr);
3214 if(temp<0) temp=get_reg(i_regs->regmap,-1);
3215 offset=imm[i];
3216 if(s>=0) {
3217 c=(i_regs->wasconst>>s)&1;
af4ee1fe 3218 if(c) {
3219 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3220 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3221 }
57871462 3222 }
3223 assert(tl>=0);
3224 assert(temp>=0);
3225 for(hr=0;hr<HOST_REGS;hr++) {
3226 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3227 }
3228 if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
3229 if(offset||s<0||c) addr=temp;
3230 else addr=s;
3231 if(!using_tlb) {
3232 if(!c) {
ffb0b9e0 3233 #ifndef PCSX
57871462 3234 #ifdef R29_HACK
3235 // Strmnnrmn's speed hack
4cb76aa4 3236 if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
57871462 3237 #endif
4cb76aa4 3238 emit_cmpimm(addr,RAM_SIZE);
57871462 3239 #ifdef DESTRUCTIVE_SHIFT
3240 if(s==addr) emit_mov(s,temp);
3241 #endif
3242 #ifdef R29_HACK
dadf55f2 3243 memtarget=1;
4cb76aa4 3244 if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
57871462 3245 #endif
3246 {
3247 jaddr=(int)out;
3248 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3249 // Hint to branch predictor that the branch is unlikely to be taken
3250 if(rs1[i]>=28)
3251 emit_jno_unlikely(0);
3252 else
3253 #endif
3254 emit_jno(0);
3255 }
ffb0b9e0 3256 #else
3257 jaddr=emit_fastpath_cmp_jump(i,addr,&faststore_reg_override);
3258 #endif
57871462 3259 }
a327ad27 3260 else if(ram_offset&&memtarget) {
3261 emit_addimm(addr,ram_offset,HOST_TEMPREG);
3262 faststore_reg_override=HOST_TEMPREG;
3263 }
57871462 3264 }else{ // using tlb
3265 int x=0;
3266 if (opcode[i]==0x28) x=3; // SB
3267 if (opcode[i]==0x29) x=2; // SH
3268 map=get_reg(i_regs->regmap,TLREG);
3269 assert(map>=0);
ea3d2e6e 3270 reglist&=~(1<<map);
57871462 3271 map=do_tlb_w(addr,temp,map,x,c,constmap[i][s]+offset);
3272 do_tlb_w_branch(map,c,constmap[i][s]+offset,&jaddr);
3273 }
3274
3275 if (opcode[i]==0x28) { // SB
3276 if(!c||memtarget) {
97a238a6 3277 int x=0,a=temp;
2002a1db 3278#ifdef BIG_ENDIAN_MIPS
57871462 3279 if(!c) emit_xorimm(addr,3,temp);
3280 else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
2002a1db 3281#else