libretro/ios: enable dynarec, update target names
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / new_dynarec.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - new_dynarec.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
57871462 4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21#include <stdlib.h>
22#include <stdint.h> //include for uint64_t
23#include <assert.h>
4600ba03 24#include <sys/mman.h>
57871462 25
3d624f89 26#include "emu_if.h" //emulator interface
57871462 27
4600ba03 28//#define DISASM
29//#define assem_debug printf
30//#define inv_debug printf
31#define assem_debug(...)
32#define inv_debug(...)
57871462 33
34#ifdef __i386__
35#include "assem_x86.h"
36#endif
37#ifdef __x86_64__
38#include "assem_x64.h"
39#endif
40#ifdef __arm__
41#include "assem_arm.h"
42#endif
43
f23d3386 44#ifdef __BLACKBERRY_QNX__
a4874585
C
45#undef __clear_cache
46#define __clear_cache(start,end) msync(start, (size_t)((void*)end - (void*)start), MS_SYNC | MS_CACHE_ONLY | MS_INVALIDATE_ICACHE);
c7b746f0 47#elif defined(__MACH__)
48#include <libkern/OSCacheControl.h>
49#define __clear_cache mach_clear_cache
50static void __clear_cache(void *start, void *end) {
51 size_t len = (char *)end - (char *)start;
52 sys_dcache_flush(start, len);
53 sys_icache_invalidate(start, len);
54}
f23d3386 55#endif
a4874585 56
57871462 57#define MAXBLOCK 4096
58#define MAX_OUTPUT_BLOCK_SIZE 262144
2573466a 59
57871462 60struct regstat
61{
62 signed char regmap_entry[HOST_REGS];
63 signed char regmap[HOST_REGS];
64 uint64_t was32;
65 uint64_t is32;
66 uint64_t wasdirty;
67 uint64_t dirty;
68 uint64_t u;
69 uint64_t uu;
70 u_int wasconst;
71 u_int isconst;
8575a877 72 u_int loadedconst; // host regs that have constants loaded
73 u_int waswritten; // MIPS regs that were used as store base before
57871462 74};
75
76struct ll_entry
77{
78 u_int vaddr;
79 u_int reg32;
80 void *addr;
81 struct ll_entry *next;
82};
83
84 u_int start;
85 u_int *source;
86 u_int pagelimit;
87 char insn[MAXBLOCK][10];
88 u_char itype[MAXBLOCK];
89 u_char opcode[MAXBLOCK];
90 u_char opcode2[MAXBLOCK];
91 u_char bt[MAXBLOCK];
92 u_char rs1[MAXBLOCK];
93 u_char rs2[MAXBLOCK];
94 u_char rt1[MAXBLOCK];
95 u_char rt2[MAXBLOCK];
96 u_char us1[MAXBLOCK];
97 u_char us2[MAXBLOCK];
98 u_char dep1[MAXBLOCK];
99 u_char dep2[MAXBLOCK];
100 u_char lt1[MAXBLOCK];
bedfea38 101 static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs
102 static uint64_t gte_rt[MAXBLOCK];
103 static uint64_t gte_unneeded[MAXBLOCK];
ffb0b9e0 104 static u_int smrv[32]; // speculated MIPS register values
105 static u_int smrv_strong; // mask or regs that are likely to have correct values
106 static u_int smrv_weak; // same, but somewhat less likely
107 static u_int smrv_strong_next; // same, but after current insn executes
108 static u_int smrv_weak_next;
57871462 109 int imm[MAXBLOCK];
110 u_int ba[MAXBLOCK];
111 char likely[MAXBLOCK];
112 char is_ds[MAXBLOCK];
e1190b87 113 char ooo[MAXBLOCK];
57871462 114 uint64_t unneeded_reg[MAXBLOCK];
115 uint64_t unneeded_reg_upper[MAXBLOCK];
116 uint64_t branch_unneeded_reg[MAXBLOCK];
117 uint64_t branch_unneeded_reg_upper[MAXBLOCK];
118 uint64_t p32[MAXBLOCK];
119 uint64_t pr32[MAXBLOCK];
120 signed char regmap_pre[MAXBLOCK][HOST_REGS];
956f3129 121 static uint64_t current_constmap[HOST_REGS];
122 static uint64_t constmap[MAXBLOCK][HOST_REGS];
123 static struct regstat regs[MAXBLOCK];
124 static struct regstat branch_regs[MAXBLOCK];
e1190b87 125 signed char minimum_free_regs[MAXBLOCK];
57871462 126 u_int needed_reg[MAXBLOCK];
127 uint64_t requires_32bit[MAXBLOCK];
128 u_int wont_dirty[MAXBLOCK];
129 u_int will_dirty[MAXBLOCK];
130 int ccadj[MAXBLOCK];
131 int slen;
132 u_int instr_addr[MAXBLOCK];
133 u_int link_addr[MAXBLOCK][3];
134 int linkcount;
135 u_int stubs[MAXBLOCK*3][8];
136 int stubcount;
137 u_int literals[1024][2];
138 int literalcount;
139 int is_delayslot;
140 int cop1_usable;
141 u_char *out;
142 struct ll_entry *jump_in[4096];
143 struct ll_entry *jump_out[4096];
144 struct ll_entry *jump_dirty[4096];
145 u_int hash_table[65536][4] __attribute__((aligned(16)));
146 char shadow[1048576] __attribute__((aligned(16)));
147 void *copy;
148 int expirep;
af4ee1fe 149#ifndef PCSX
57871462 150 u_int using_tlb;
af4ee1fe 151#else
152 static const u_int using_tlb=0;
153#endif
2f546f9a 154 int new_dynarec_did_compile;
0ff8c62c 155 int new_dynarec_hacks;
57871462 156 u_int stop_after_jal;
a327ad27 157#ifndef RAM_FIXED
158 static u_int ram_offset;
159#else
160 static const u_int ram_offset=0;
161#endif
57871462 162 extern u_char restore_candidate[512];
163 extern int cycle_count;
164
165 /* registers that may be allocated */
166 /* 1-31 gpr */
167#define HIREG 32 // hi
168#define LOREG 33 // lo
169#define FSREG 34 // FPU status (FCSR)
170#define CSREG 35 // Coprocessor status
171#define CCREG 36 // Cycle count
172#define INVCP 37 // Pointer to invalid_code
619e5ded 173#define MMREG 38 // Pointer to memory_map
174#define ROREG 39 // ram offset (if rdram!=0x80000000)
175#define TEMPREG 40
176#define FTEMP 40 // FPU temporary register
177#define PTEMP 41 // Prefetch temporary register
178#define TLREG 42 // TLB mapping offset
179#define RHASH 43 // Return address hash
180#define RHTBL 44 // Return address hash table address
181#define RTEMP 45 // JR/JALR address register
182#define MAXREG 45
183#define AGEN1 46 // Address generation temporary register
184#define AGEN2 47 // Address generation temporary register
185#define MGEN1 48 // Maptable address generation temporary register
186#define MGEN2 49 // Maptable address generation temporary register
187#define BTREG 50 // Branch target temporary register
57871462 188
189 /* instruction types */
190#define NOP 0 // No operation
191#define LOAD 1 // Load
192#define STORE 2 // Store
193#define LOADLR 3 // Unaligned load
194#define STORELR 4 // Unaligned store
195#define MOV 5 // Move
196#define ALU 6 // Arithmetic/logic
197#define MULTDIV 7 // Multiply/divide
198#define SHIFT 8 // Shift by register
199#define SHIFTIMM 9// Shift by immediate
200#define IMM16 10 // 16-bit immediate
201#define RJUMP 11 // Unconditional jump to register
202#define UJUMP 12 // Unconditional jump
203#define CJUMP 13 // Conditional branch (BEQ/BNE/BGTZ/BLEZ)
204#define SJUMP 14 // Conditional branch (regimm format)
205#define COP0 15 // Coprocessor 0
206#define COP1 16 // Coprocessor 1
207#define C1LS 17 // Coprocessor 1 load/store
208#define FJUMP 18 // Conditional branch (floating point)
209#define FLOAT 19 // Floating point unit
210#define FCONV 20 // Convert integer to float
211#define FCOMP 21 // Floating point compare (sets FSREG)
212#define SYSCALL 22// SYSCALL
213#define OTHER 23 // Other
214#define SPAN 24 // Branch/delay slot spans 2 pages
215#define NI 25 // Not implemented
7139f3c8 216#define HLECALL 26// PCSX fake opcodes for HLE
b9b61529 217#define COP2 27 // Coprocessor 2 move
218#define C2LS 28 // Coprocessor 2 load/store
219#define C2OP 29 // Coprocessor 2 operation
1e973cb0 220#define INTCALL 30// Call interpreter to handle rare corner cases
57871462 221
222 /* stubs */
223#define CC_STUB 1
224#define FP_STUB 2
225#define LOADB_STUB 3
226#define LOADH_STUB 4
227#define LOADW_STUB 5
228#define LOADD_STUB 6
229#define LOADBU_STUB 7
230#define LOADHU_STUB 8
231#define STOREB_STUB 9
232#define STOREH_STUB 10
233#define STOREW_STUB 11
234#define STORED_STUB 12
235#define STORELR_STUB 13
236#define INVCODE_STUB 14
237
238 /* branch codes */
239#define TAKEN 1
240#define NOTTAKEN 2
241#define NULLDS 3
242
243// asm linkage
244int new_recompile_block(int addr);
245void *get_addr_ht(u_int vaddr);
246void invalidate_block(u_int block);
247void invalidate_addr(u_int addr);
248void remove_hash(int vaddr);
249void jump_vaddr();
250void dyna_linker();
251void dyna_linker_ds();
252void verify_code();
253void verify_code_vm();
254void verify_code_ds();
255void cc_interrupt();
256void fp_exception();
257void fp_exception_ds();
258void jump_syscall();
7139f3c8 259void jump_syscall_hle();
57871462 260void jump_eret();
7139f3c8 261void jump_hlecall();
1e973cb0 262void jump_intcall();
7139f3c8 263void new_dyna_leave();
57871462 264
265// TLB
266void TLBWI_new();
267void TLBWR_new();
268void read_nomem_new();
269void read_nomemb_new();
270void read_nomemh_new();
271void read_nomemd_new();
272void write_nomem_new();
273void write_nomemb_new();
274void write_nomemh_new();
275void write_nomemd_new();
276void write_rdram_new();
277void write_rdramb_new();
278void write_rdramh_new();
279void write_rdramd_new();
280extern u_int memory_map[1048576];
281
282// Needed by assembler
283void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32);
284void wb_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty);
285void wb_needed_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr);
286void load_all_regs(signed char i_regmap[]);
287void load_needed_regs(signed char i_regmap[],signed char next_regmap[]);
288void load_regs_entry(int t);
289void load_all_consts(signed char regmap[],int is32,u_int dirty,int i);
290
291int tracedebug=0;
292
293//#define DEBUG_CYCLE_COUNT 1
294
b6e87b2b 295#define NO_CYCLE_PENALTY_THR 12
296
4e9dcd7f 297int cycle_multiplier; // 100 for 1.0
298
299static int CLOCK_ADJUST(int x)
300{
301 int s=(x>>31)|1;
302 return (x * cycle_multiplier + s * 50) / 100;
303}
304
94d23bb9 305static void tlb_hacks()
57871462 306{
94d23bb9 307#ifndef DISABLE_TLB
57871462 308 // Goldeneye hack
309 if (strncmp((char *) ROM_HEADER->nom, "GOLDENEYE",9) == 0)
310 {
311 u_int addr;
312 int n;
313 switch (ROM_HEADER->Country_code&0xFF)
314 {
315 case 0x45: // U
316 addr=0x34b30;
317 break;
318 case 0x4A: // J
319 addr=0x34b70;
320 break;
321 case 0x50: // E
322 addr=0x329f0;
323 break;
324 default:
325 // Unknown country code
326 addr=0;
327 break;
328 }
329 u_int rom_addr=(u_int)rom;
330 #ifdef ROM_COPY
331 // Since memory_map is 32-bit, on 64-bit systems the rom needs to be
332 // in the lower 4G of memory to use this hack. Copy it if necessary.
333 if((void *)rom>(void *)0xffffffff) {
334 munmap(ROM_COPY, 67108864);
335 if(mmap(ROM_COPY, 12582912,
336 PROT_READ | PROT_WRITE,
337 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
338 -1, 0) <= 0) {printf("mmap() failed\n");}
339 memcpy(ROM_COPY,rom,12582912);
340 rom_addr=(u_int)ROM_COPY;
341 }
342 #endif
343 if(addr) {
344 for(n=0x7F000;n<0x80000;n++) {
345 memory_map[n]=(((u_int)(rom_addr+addr-0x7F000000))>>2)|0x40000000;
346 }
347 }
348 }
94d23bb9 349#endif
57871462 350}
351
94d23bb9 352static u_int get_page(u_int vaddr)
57871462 353{
0ce47d46 354#ifndef PCSX
57871462 355 u_int page=(vaddr^0x80000000)>>12;
0ce47d46 356#else
357 u_int page=vaddr&~0xe0000000;
358 if (page < 0x1000000)
359 page &= ~0x0e00000; // RAM mirrors
360 page>>=12;
361#endif
94d23bb9 362#ifndef DISABLE_TLB
57871462 363 if(page>262143&&tlb_LUT_r[vaddr>>12]) page=(tlb_LUT_r[vaddr>>12]^0x80000000)>>12;
94d23bb9 364#endif
57871462 365 if(page>2048) page=2048+(page&2047);
94d23bb9 366 return page;
367}
368
d25604ca 369#ifndef PCSX
94d23bb9 370static u_int get_vpage(u_int vaddr)
371{
372 u_int vpage=(vaddr^0x80000000)>>12;
373#ifndef DISABLE_TLB
57871462 374 if(vpage>262143&&tlb_LUT_r[vaddr>>12]) vpage&=2047; // jump_dirty uses a hash of the virtual address instead
94d23bb9 375#endif
57871462 376 if(vpage>2048) vpage=2048+(vpage&2047);
94d23bb9 377 return vpage;
378}
d25604ca 379#else
380// no virtual mem in PCSX
381static u_int get_vpage(u_int vaddr)
382{
383 return get_page(vaddr);
384}
385#endif
94d23bb9 386
387// Get address from virtual address
388// This is called from the recompiled JR/JALR instructions
389void *get_addr(u_int vaddr)
390{
391 u_int page=get_page(vaddr);
392 u_int vpage=get_vpage(vaddr);
57871462 393 struct ll_entry *head;
394 //printf("TRACE: count=%d next=%d (get_addr %x,page %d)\n",Count,next_interupt,vaddr,page);
395 head=jump_in[page];
396 while(head!=NULL) {
397 if(head->vaddr==vaddr&&head->reg32==0) {
398 //printf("TRACE: count=%d next=%d (get_addr match %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr);
399 int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
400 ht_bin[3]=ht_bin[1];
401 ht_bin[2]=ht_bin[0];
402 ht_bin[1]=(int)head->addr;
403 ht_bin[0]=vaddr;
404 return head->addr;
405 }
406 head=head->next;
407 }
408 head=jump_dirty[vpage];
409 while(head!=NULL) {
410 if(head->vaddr==vaddr&&head->reg32==0) {
411 //printf("TRACE: count=%d next=%d (get_addr match dirty %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr);
412 // Don't restore blocks which are about to expire from the cache
413 if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
414 if(verify_dirty(head->addr)) {
415 //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]);
416 invalid_code[vaddr>>12]=0;
9be4ba64 417 inv_code_start=inv_code_end=~0;
63cb0298 418#ifndef DISABLE_TLB
57871462 419 memory_map[vaddr>>12]|=0x40000000;
63cb0298 420#endif
57871462 421 if(vpage<2048) {
94d23bb9 422#ifndef DISABLE_TLB
57871462 423 if(tlb_LUT_r[vaddr>>12]) {
424 invalid_code[tlb_LUT_r[vaddr>>12]>>12]=0;
425 memory_map[tlb_LUT_r[vaddr>>12]>>12]|=0x40000000;
426 }
94d23bb9 427#endif
57871462 428 restore_candidate[vpage>>3]|=1<<(vpage&7);
429 }
430 else restore_candidate[page>>3]|=1<<(page&7);
431 int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
432 if(ht_bin[0]==vaddr) {
433 ht_bin[1]=(int)head->addr; // Replace existing entry
434 }
435 else
436 {
437 ht_bin[3]=ht_bin[1];
438 ht_bin[2]=ht_bin[0];
439 ht_bin[1]=(int)head->addr;
440 ht_bin[0]=vaddr;
441 }
442 return head->addr;
443 }
444 }
445 head=head->next;
446 }
447 //printf("TRACE: count=%d next=%d (get_addr no-match %x)\n",Count,next_interupt,vaddr);
448 int r=new_recompile_block(vaddr);
449 if(r==0) return get_addr(vaddr);
450 // Execute in unmapped page, generate pagefault execption
451 Status|=2;
452 Cause=(vaddr<<31)|0x8;
453 EPC=(vaddr&1)?vaddr-5:vaddr;
454 BadVAddr=(vaddr&~1);
455 Context=(Context&0xFF80000F)|((BadVAddr>>9)&0x007FFFF0);
456 EntryHi=BadVAddr&0xFFFFE000;
457 return get_addr_ht(0x80000000);
458}
459// Look up address in hash table first
460void *get_addr_ht(u_int vaddr)
461{
462 //printf("TRACE: count=%d next=%d (get_addr_ht %x)\n",Count,next_interupt,vaddr);
463 int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
464 if(ht_bin[0]==vaddr) return (void *)ht_bin[1];
465 if(ht_bin[2]==vaddr) return (void *)ht_bin[3];
466 return get_addr(vaddr);
467}
468
469void *get_addr_32(u_int vaddr,u_int flags)
470{
7139f3c8 471#ifdef FORCE32
472 return get_addr(vaddr);
560e4a12 473#else
57871462 474 //printf("TRACE: count=%d next=%d (get_addr_32 %x,flags %x)\n",Count,next_interupt,vaddr,flags);
475 int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
476 if(ht_bin[0]==vaddr) return (void *)ht_bin[1];
477 if(ht_bin[2]==vaddr) return (void *)ht_bin[3];
94d23bb9 478 u_int page=get_page(vaddr);
479 u_int vpage=get_vpage(vaddr);
57871462 480 struct ll_entry *head;
481 head=jump_in[page];
482 while(head!=NULL) {
483 if(head->vaddr==vaddr&&(head->reg32&flags)==0) {
484 //printf("TRACE: count=%d next=%d (get_addr_32 match %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr);
485 if(head->reg32==0) {
486 int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
487 if(ht_bin[0]==-1) {
488 ht_bin[1]=(int)head->addr;
489 ht_bin[0]=vaddr;
490 }else if(ht_bin[2]==-1) {
491 ht_bin[3]=(int)head->addr;
492 ht_bin[2]=vaddr;
493 }
494 //ht_bin[3]=ht_bin[1];
495 //ht_bin[2]=ht_bin[0];
496 //ht_bin[1]=(int)head->addr;
497 //ht_bin[0]=vaddr;
498 }
499 return head->addr;
500 }
501 head=head->next;
502 }
503 head=jump_dirty[vpage];
504 while(head!=NULL) {
505 if(head->vaddr==vaddr&&(head->reg32&flags)==0) {
506 //printf("TRACE: count=%d next=%d (get_addr_32 match dirty %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr);
507 // Don't restore blocks which are about to expire from the cache
508 if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
509 if(verify_dirty(head->addr)) {
510 //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]);
511 invalid_code[vaddr>>12]=0;
9be4ba64 512 inv_code_start=inv_code_end=~0;
57871462 513 memory_map[vaddr>>12]|=0x40000000;
514 if(vpage<2048) {
94d23bb9 515#ifndef DISABLE_TLB
57871462 516 if(tlb_LUT_r[vaddr>>12]) {
517 invalid_code[tlb_LUT_r[vaddr>>12]>>12]=0;
518 memory_map[tlb_LUT_r[vaddr>>12]>>12]|=0x40000000;
519 }
94d23bb9 520#endif
57871462 521 restore_candidate[vpage>>3]|=1<<(vpage&7);
522 }
523 else restore_candidate[page>>3]|=1<<(page&7);
524 if(head->reg32==0) {
525 int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
526 if(ht_bin[0]==-1) {
527 ht_bin[1]=(int)head->addr;
528 ht_bin[0]=vaddr;
529 }else if(ht_bin[2]==-1) {
530 ht_bin[3]=(int)head->addr;
531 ht_bin[2]=vaddr;
532 }
533 //ht_bin[3]=ht_bin[1];
534 //ht_bin[2]=ht_bin[0];
535 //ht_bin[1]=(int)head->addr;
536 //ht_bin[0]=vaddr;
537 }
538 return head->addr;
539 }
540 }
541 head=head->next;
542 }
543 //printf("TRACE: count=%d next=%d (get_addr_32 no-match %x,flags %x)\n",Count,next_interupt,vaddr,flags);
544 int r=new_recompile_block(vaddr);
545 if(r==0) return get_addr(vaddr);
546 // Execute in unmapped page, generate pagefault execption
547 Status|=2;
548 Cause=(vaddr<<31)|0x8;
549 EPC=(vaddr&1)?vaddr-5:vaddr;
550 BadVAddr=(vaddr&~1);
551 Context=(Context&0xFF80000F)|((BadVAddr>>9)&0x007FFFF0);
552 EntryHi=BadVAddr&0xFFFFE000;
553 return get_addr_ht(0x80000000);
560e4a12 554#endif
57871462 555}
556
557void clear_all_regs(signed char regmap[])
558{
559 int hr;
560 for (hr=0;hr<HOST_REGS;hr++) regmap[hr]=-1;
561}
562
563signed char get_reg(signed char regmap[],int r)
564{
565 int hr;
566 for (hr=0;hr<HOST_REGS;hr++) if(hr!=EXCLUDE_REG&&regmap[hr]==r) return hr;
567 return -1;
568}
569
570// Find a register that is available for two consecutive cycles
571signed char get_reg2(signed char regmap1[],signed char regmap2[],int r)
572{
573 int hr;
574 for (hr=0;hr<HOST_REGS;hr++) if(hr!=EXCLUDE_REG&&regmap1[hr]==r&&regmap2[hr]==r) return hr;
575 return -1;
576}
577
578int count_free_regs(signed char regmap[])
579{
580 int count=0;
581 int hr;
582 for(hr=0;hr<HOST_REGS;hr++)
583 {
584 if(hr!=EXCLUDE_REG) {
585 if(regmap[hr]<0) count++;
586 }
587 }
588 return count;
589}
590
591void dirty_reg(struct regstat *cur,signed char reg)
592{
593 int hr;
594 if(!reg) return;
595 for (hr=0;hr<HOST_REGS;hr++) {
596 if((cur->regmap[hr]&63)==reg) {
597 cur->dirty|=1<<hr;
598 }
599 }
600}
601
602// If we dirty the lower half of a 64 bit register which is now being
603// sign-extended, we need to dump the upper half.
604// Note: Do this only after completion of the instruction, because
605// some instructions may need to read the full 64-bit value even if
606// overwriting it (eg SLTI, DSRA32).
607static void flush_dirty_uppers(struct regstat *cur)
608{
609 int hr,reg;
610 for (hr=0;hr<HOST_REGS;hr++) {
611 if((cur->dirty>>hr)&1) {
612 reg=cur->regmap[hr];
613 if(reg>=64)
614 if((cur->is32>>(reg&63))&1) cur->regmap[hr]=-1;
615 }
616 }
617}
618
619void set_const(struct regstat *cur,signed char reg,uint64_t value)
620{
621 int hr;
622 if(!reg) return;
623 for (hr=0;hr<HOST_REGS;hr++) {
624 if(cur->regmap[hr]==reg) {
625 cur->isconst|=1<<hr;
956f3129 626 current_constmap[hr]=value;
57871462 627 }
628 else if((cur->regmap[hr]^64)==reg) {
629 cur->isconst|=1<<hr;
956f3129 630 current_constmap[hr]=value>>32;
57871462 631 }
632 }
633}
634
635void clear_const(struct regstat *cur,signed char reg)
636{
637 int hr;
638 if(!reg) return;
639 for (hr=0;hr<HOST_REGS;hr++) {
640 if((cur->regmap[hr]&63)==reg) {
641 cur->isconst&=~(1<<hr);
642 }
643 }
644}
645
646int is_const(struct regstat *cur,signed char reg)
647{
648 int hr;
79c75f1b 649 if(reg<0) return 0;
57871462 650 if(!reg) return 1;
651 for (hr=0;hr<HOST_REGS;hr++) {
652 if((cur->regmap[hr]&63)==reg) {
653 return (cur->isconst>>hr)&1;
654 }
655 }
656 return 0;
657}
658uint64_t get_const(struct regstat *cur,signed char reg)
659{
660 int hr;
661 if(!reg) return 0;
662 for (hr=0;hr<HOST_REGS;hr++) {
663 if(cur->regmap[hr]==reg) {
956f3129 664 return current_constmap[hr];
57871462 665 }
666 }
c43b5311 667 SysPrintf("Unknown constant in r%d\n",reg);
57871462 668 exit(1);
669}
670
671// Least soon needed registers
672// Look at the next ten instructions and see which registers
673// will be used. Try not to reallocate these.
674void lsn(u_char hsn[], int i, int *preferred_reg)
675{
676 int j;
677 int b=-1;
678 for(j=0;j<9;j++)
679 {
680 if(i+j>=slen) {
681 j=slen-i-1;
682 break;
683 }
684 if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
685 {
686 // Don't go past an unconditonal jump
687 j++;
688 break;
689 }
690 }
691 for(;j>=0;j--)
692 {
693 if(rs1[i+j]) hsn[rs1[i+j]]=j;
694 if(rs2[i+j]) hsn[rs2[i+j]]=j;
695 if(rt1[i+j]) hsn[rt1[i+j]]=j;
696 if(rt2[i+j]) hsn[rt2[i+j]]=j;
697 if(itype[i+j]==STORE || itype[i+j]==STORELR) {
698 // Stores can allocate zero
699 hsn[rs1[i+j]]=j;
700 hsn[rs2[i+j]]=j;
701 }
702 // On some architectures stores need invc_ptr
703 #if defined(HOST_IMM8)
b9b61529 704 if(itype[i+j]==STORE || itype[i+j]==STORELR || (opcode[i+j]&0x3b)==0x39 || (opcode[i+j]&0x3b)==0x3a) {
57871462 705 hsn[INVCP]=j;
706 }
707 #endif
708 if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP||itype[i+j]==FJUMP))
709 {
710 hsn[CCREG]=j;
711 b=j;
712 }
713 }
714 if(b>=0)
715 {
716 if(ba[i+b]>=start && ba[i+b]<(start+slen*4))
717 {
718 // Follow first branch
719 int t=(ba[i+b]-start)>>2;
720 j=7-b;if(t+j>=slen) j=slen-t-1;
721 for(;j>=0;j--)
722 {
723 if(rs1[t+j]) if(hsn[rs1[t+j]]>j+b+2) hsn[rs1[t+j]]=j+b+2;
724 if(rs2[t+j]) if(hsn[rs2[t+j]]>j+b+2) hsn[rs2[t+j]]=j+b+2;
725 //if(rt1[t+j]) if(hsn[rt1[t+j]]>j+b+2) hsn[rt1[t+j]]=j+b+2;
726 //if(rt2[t+j]) if(hsn[rt2[t+j]]>j+b+2) hsn[rt2[t+j]]=j+b+2;
727 }
728 }
729 // TODO: preferred register based on backward branch
730 }
731 // Delay slot should preferably not overwrite branch conditions or cycle count
732 if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP)) {
733 if(rs1[i-1]) if(hsn[rs1[i-1]]>1) hsn[rs1[i-1]]=1;
734 if(rs2[i-1]) if(hsn[rs2[i-1]]>1) hsn[rs2[i-1]]=1;
735 hsn[CCREG]=1;
736 // ...or hash tables
737 hsn[RHASH]=1;
738 hsn[RHTBL]=1;
739 }
740 // Coprocessor load/store needs FTEMP, even if not declared
b9b61529 741 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 742 hsn[FTEMP]=0;
743 }
744 // Load L/R also uses FTEMP as a temporary register
745 if(itype[i]==LOADLR) {
746 hsn[FTEMP]=0;
747 }
b7918751 748 // Also SWL/SWR/SDL/SDR
749 if(opcode[i]==0x2a||opcode[i]==0x2e||opcode[i]==0x2c||opcode[i]==0x2d) {
57871462 750 hsn[FTEMP]=0;
751 }
752 // Don't remove the TLB registers either
b9b61529 753 if(itype[i]==LOAD || itype[i]==LOADLR || itype[i]==STORE || itype[i]==STORELR || itype[i]==C1LS || itype[i]==C2LS) {
57871462 754 hsn[TLREG]=0;
755 }
756 // Don't remove the miniht registers
757 if(itype[i]==UJUMP||itype[i]==RJUMP)
758 {
759 hsn[RHASH]=0;
760 hsn[RHTBL]=0;
761 }
762}
763
764// We only want to allocate registers if we're going to use them again soon
765int needed_again(int r, int i)
766{
767 int j;
768 int b=-1;
769 int rn=10;
57871462 770
771 if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000))
772 {
773 if(ba[i-1]<start || ba[i-1]>start+slen*4-4)
774 return 0; // Don't need any registers if exiting the block
775 }
776 for(j=0;j<9;j++)
777 {
778 if(i+j>=slen) {
779 j=slen-i-1;
780 break;
781 }
782 if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
783 {
784 // Don't go past an unconditonal jump
785 j++;
786 break;
787 }
1e973cb0 788 if(itype[i+j]==SYSCALL||itype[i+j]==HLECALL||itype[i+j]==INTCALL||((source[i+j]&0xfc00003f)==0x0d))
57871462 789 {
790 break;
791 }
792 }
793 for(;j>=1;j--)
794 {
795 if(rs1[i+j]==r) rn=j;
796 if(rs2[i+j]==r) rn=j;
797 if((unneeded_reg[i+j]>>r)&1) rn=10;
798 if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP||itype[i+j]==FJUMP))
799 {
800 b=j;
801 }
802 }
803 /*
804 if(b>=0)
805 {
806 if(ba[i+b]>=start && ba[i+b]<(start+slen*4))
807 {
808 // Follow first branch
809 int o=rn;
810 int t=(ba[i+b]-start)>>2;
811 j=7-b;if(t+j>=slen) j=slen-t-1;
812 for(;j>=0;j--)
813 {
814 if(!((unneeded_reg[t+j]>>r)&1)) {
815 if(rs1[t+j]==r) if(rn>j+b+2) rn=j+b+2;
816 if(rs2[t+j]==r) if(rn>j+b+2) rn=j+b+2;
817 }
818 else rn=o;
819 }
820 }
821 }*/
b7217e13 822 if(rn<10) return 1;
57871462 823 return 0;
824}
825
826// Try to match register allocations at the end of a loop with those
827// at the beginning
828int loop_reg(int i, int r, int hr)
829{
830 int j,k;
831 for(j=0;j<9;j++)
832 {
833 if(i+j>=slen) {
834 j=slen-i-1;
835 break;
836 }
837 if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
838 {
839 // Don't go past an unconditonal jump
840 j++;
841 break;
842 }
843 }
844 k=0;
845 if(i>0){
846 if(itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP)
847 k--;
848 }
849 for(;k<j;k++)
850 {
851 if(r<64&&((unneeded_reg[i+k]>>r)&1)) return hr;
852 if(r>64&&((unneeded_reg_upper[i+k]>>r)&1)) return hr;
853 if(i+k>=0&&(itype[i+k]==UJUMP||itype[i+k]==CJUMP||itype[i+k]==SJUMP||itype[i+k]==FJUMP))
854 {
855 if(ba[i+k]>=start && ba[i+k]<(start+i*4))
856 {
857 int t=(ba[i+k]-start)>>2;
858 int reg=get_reg(regs[t].regmap_entry,r);
859 if(reg>=0) return reg;
860 //reg=get_reg(regs[t+1].regmap_entry,r);
861 //if(reg>=0) return reg;
862 }
863 }
864 }
865 return hr;
866}
867
868
869// Allocate every register, preserving source/target regs
870void alloc_all(struct regstat *cur,int i)
871{
872 int hr;
873
874 for(hr=0;hr<HOST_REGS;hr++) {
875 if(hr!=EXCLUDE_REG) {
876 if(((cur->regmap[hr]&63)!=rs1[i])&&((cur->regmap[hr]&63)!=rs2[i])&&
877 ((cur->regmap[hr]&63)!=rt1[i])&&((cur->regmap[hr]&63)!=rt2[i]))
878 {
879 cur->regmap[hr]=-1;
880 cur->dirty&=~(1<<hr);
881 }
882 // Don't need zeros
883 if((cur->regmap[hr]&63)==0)
884 {
885 cur->regmap[hr]=-1;
886 cur->dirty&=~(1<<hr);
887 }
888 }
889 }
890}
891
4600ba03 892#ifndef FORCE32
57871462 893void div64(int64_t dividend,int64_t divisor)
894{
895 lo=dividend/divisor;
896 hi=dividend%divisor;
897 //printf("TRACE: ddiv %8x%8x %8x%8x\n" ,(int)reg[HIREG],(int)(reg[HIREG]>>32)
898 // ,(int)reg[LOREG],(int)(reg[LOREG]>>32));
899}
900void divu64(uint64_t dividend,uint64_t divisor)
901{
902 lo=dividend/divisor;
903 hi=dividend%divisor;
904 //printf("TRACE: ddivu %8x%8x %8x%8x\n",(int)reg[HIREG],(int)(reg[HIREG]>>32)
905 // ,(int)reg[LOREG],(int)(reg[LOREG]>>32));
906}
907
908void mult64(uint64_t m1,uint64_t m2)
909{
910 unsigned long long int op1, op2, op3, op4;
911 unsigned long long int result1, result2, result3, result4;
912 unsigned long long int temp1, temp2, temp3, temp4;
913 int sign = 0;
914
915 if (m1 < 0)
916 {
917 op2 = -m1;
918 sign = 1 - sign;
919 }
920 else op2 = m1;
921 if (m2 < 0)
922 {
923 op4 = -m2;
924 sign = 1 - sign;
925 }
926 else op4 = m2;
927
928 op1 = op2 & 0xFFFFFFFF;
929 op2 = (op2 >> 32) & 0xFFFFFFFF;
930 op3 = op4 & 0xFFFFFFFF;
931 op4 = (op4 >> 32) & 0xFFFFFFFF;
932
933 temp1 = op1 * op3;
934 temp2 = (temp1 >> 32) + op1 * op4;
935 temp3 = op2 * op3;
936 temp4 = (temp3 >> 32) + op2 * op4;
937
938 result1 = temp1 & 0xFFFFFFFF;
939 result2 = temp2 + (temp3 & 0xFFFFFFFF);
940 result3 = (result2 >> 32) + temp4;
941 result4 = (result3 >> 32);
942
943 lo = result1 | (result2 << 32);
944 hi = (result3 & 0xFFFFFFFF) | (result4 << 32);
945 if (sign)
946 {
947 hi = ~hi;
948 if (!lo) hi++;
949 else lo = ~lo + 1;
950 }
951}
952
953void multu64(uint64_t m1,uint64_t m2)
954{
955 unsigned long long int op1, op2, op3, op4;
956 unsigned long long int result1, result2, result3, result4;
957 unsigned long long int temp1, temp2, temp3, temp4;
958
959 op1 = m1 & 0xFFFFFFFF;
960 op2 = (m1 >> 32) & 0xFFFFFFFF;
961 op3 = m2 & 0xFFFFFFFF;
962 op4 = (m2 >> 32) & 0xFFFFFFFF;
963
964 temp1 = op1 * op3;
965 temp2 = (temp1 >> 32) + op1 * op4;
966 temp3 = op2 * op3;
967 temp4 = (temp3 >> 32) + op2 * op4;
968
969 result1 = temp1 & 0xFFFFFFFF;
970 result2 = temp2 + (temp3 & 0xFFFFFFFF);
971 result3 = (result2 >> 32) + temp4;
972 result4 = (result3 >> 32);
973
974 lo = result1 | (result2 << 32);
975 hi = (result3 & 0xFFFFFFFF) | (result4 << 32);
976
977 //printf("TRACE: dmultu %8x%8x %8x%8x\n",(int)reg[HIREG],(int)(reg[HIREG]>>32)
978 // ,(int)reg[LOREG],(int)(reg[LOREG]>>32));
979}
980
981uint64_t ldl_merge(uint64_t original,uint64_t loaded,u_int bits)
982{
983 if(bits) {
984 original<<=64-bits;
985 original>>=64-bits;
986 loaded<<=bits;
987 original|=loaded;
988 }
989 else original=loaded;
990 return original;
991}
992uint64_t ldr_merge(uint64_t original,uint64_t loaded,u_int bits)
993{
994 if(bits^56) {
995 original>>=64-(bits^56);
996 original<<=64-(bits^56);
997 loaded>>=bits^56;
998 original|=loaded;
999 }
1000 else original=loaded;
1001 return original;
1002}
4600ba03 1003#endif
57871462 1004
1005#ifdef __i386__
1006#include "assem_x86.c"
1007#endif
1008#ifdef __x86_64__
1009#include "assem_x64.c"
1010#endif
1011#ifdef __arm__
1012#include "assem_arm.c"
1013#endif
1014
1015// Add virtual address mapping to linked list
1016void ll_add(struct ll_entry **head,int vaddr,void *addr)
1017{
1018 struct ll_entry *new_entry;
1019 new_entry=malloc(sizeof(struct ll_entry));
1020 assert(new_entry!=NULL);
1021 new_entry->vaddr=vaddr;
1022 new_entry->reg32=0;
1023 new_entry->addr=addr;
1024 new_entry->next=*head;
1025 *head=new_entry;
1026}
1027
1028// Add virtual address mapping for 32-bit compiled block
1029void ll_add_32(struct ll_entry **head,int vaddr,u_int reg32,void *addr)
1030{
7139f3c8 1031 ll_add(head,vaddr,addr);
1032#ifndef FORCE32
1033 (*head)->reg32=reg32;
1034#endif
57871462 1035}
1036
1037// Check if an address is already compiled
1038// but don't return addresses which are about to expire from the cache
1039void *check_addr(u_int vaddr)
1040{
1041 u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
1042 if(ht_bin[0]==vaddr) {
1043 if(((ht_bin[1]-MAX_OUTPUT_BLOCK_SIZE-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
1044 if(isclean(ht_bin[1])) return (void *)ht_bin[1];
1045 }
1046 if(ht_bin[2]==vaddr) {
1047 if(((ht_bin[3]-MAX_OUTPUT_BLOCK_SIZE-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
1048 if(isclean(ht_bin[3])) return (void *)ht_bin[3];
1049 }
94d23bb9 1050 u_int page=get_page(vaddr);
57871462 1051 struct ll_entry *head;
1052 head=jump_in[page];
1053 while(head!=NULL) {
1054 if(head->vaddr==vaddr&&head->reg32==0) {
1055 if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) {
1056 // Update existing entry with current address
1057 if(ht_bin[0]==vaddr) {
1058 ht_bin[1]=(int)head->addr;
1059 return head->addr;
1060 }
1061 if(ht_bin[2]==vaddr) {
1062 ht_bin[3]=(int)head->addr;
1063 return head->addr;
1064 }
1065 // Insert into hash table with low priority.
1066 // Don't evict existing entries, as they are probably
1067 // addresses that are being accessed frequently.
1068 if(ht_bin[0]==-1) {
1069 ht_bin[1]=(int)head->addr;
1070 ht_bin[0]=vaddr;
1071 }else if(ht_bin[2]==-1) {
1072 ht_bin[3]=(int)head->addr;
1073 ht_bin[2]=vaddr;
1074 }
1075 return head->addr;
1076 }
1077 }
1078 head=head->next;
1079 }
1080 return 0;
1081}
1082
1083void remove_hash(int vaddr)
1084{
1085 //printf("remove hash: %x\n",vaddr);
1086 int *ht_bin=hash_table[(((vaddr)>>16)^vaddr)&0xFFFF];
1087 if(ht_bin[2]==vaddr) {
1088 ht_bin[2]=ht_bin[3]=-1;
1089 }
1090 if(ht_bin[0]==vaddr) {
1091 ht_bin[0]=ht_bin[2];
1092 ht_bin[1]=ht_bin[3];
1093 ht_bin[2]=ht_bin[3]=-1;
1094 }
1095}
1096
1097void ll_remove_matching_addrs(struct ll_entry **head,int addr,int shift)
1098{
1099 struct ll_entry *next;
1100 while(*head) {
1101 if(((u_int)((*head)->addr)>>shift)==(addr>>shift) ||
1102 ((u_int)((*head)->addr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift))
1103 {
1104 inv_debug("EXP: Remove pointer to %x (%x)\n",(int)(*head)->addr,(*head)->vaddr);
1105 remove_hash((*head)->vaddr);
1106 next=(*head)->next;
1107 free(*head);
1108 *head=next;
1109 }
1110 else
1111 {
1112 head=&((*head)->next);
1113 }
1114 }
1115}
1116
1117// Remove all entries from linked list
1118void ll_clear(struct ll_entry **head)
1119{
1120 struct ll_entry *cur;
1121 struct ll_entry *next;
1122 if(cur=*head) {
1123 *head=0;
1124 while(cur) {
1125 next=cur->next;
1126 free(cur);
1127 cur=next;
1128 }
1129 }
1130}
1131
1132// Dereference the pointers and remove if it matches
1133void ll_kill_pointers(struct ll_entry *head,int addr,int shift)
1134{
1135 while(head) {
1136 int ptr=get_pointer(head->addr);
1137 inv_debug("EXP: Lookup pointer to %x at %x (%x)\n",(int)ptr,(int)head->addr,head->vaddr);
1138 if(((ptr>>shift)==(addr>>shift)) ||
1139 (((ptr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift)))
1140 {
5088bb70 1141 inv_debug("EXP: Kill pointer at %x (%x)\n",(int)head->addr,head->vaddr);
f76eeef9 1142 u_int host_addr=(u_int)kill_pointer(head->addr);
dd3a91a1 1143 #ifdef __arm__
1144 needs_clear_cache[(host_addr-(u_int)BASE_ADDR)>>17]|=1<<(((host_addr-(u_int)BASE_ADDR)>>12)&31);
1145 #endif
57871462 1146 }
1147 head=head->next;
1148 }
1149}
1150
1151// This is called when we write to a compiled block (see do_invstub)
f76eeef9 1152void invalidate_page(u_int page)
57871462 1153{
57871462 1154 struct ll_entry *head;
1155 struct ll_entry *next;
1156 head=jump_in[page];
1157 jump_in[page]=0;
1158 while(head!=NULL) {
1159 inv_debug("INVALIDATE: %x\n",head->vaddr);
1160 remove_hash(head->vaddr);
1161 next=head->next;
1162 free(head);
1163 head=next;
1164 }
1165 head=jump_out[page];
1166 jump_out[page]=0;
1167 while(head!=NULL) {
1168 inv_debug("INVALIDATE: kill pointer to %x (%x)\n",head->vaddr,(int)head->addr);
f76eeef9 1169 u_int host_addr=(u_int)kill_pointer(head->addr);
dd3a91a1 1170 #ifdef __arm__
1171 needs_clear_cache[(host_addr-(u_int)BASE_ADDR)>>17]|=1<<(((host_addr-(u_int)BASE_ADDR)>>12)&31);
1172 #endif
57871462 1173 next=head->next;
1174 free(head);
1175 head=next;
1176 }
57871462 1177}
9be4ba64 1178
1179static void invalidate_block_range(u_int block, u_int first, u_int last)
57871462 1180{
94d23bb9 1181 u_int page=get_page(block<<12);
57871462 1182 //printf("first=%d last=%d\n",first,last);
f76eeef9 1183 invalidate_page(page);
57871462 1184 assert(first+5>page); // NB: this assumes MAXBLOCK<=4096 (4 pages)
1185 assert(last<page+5);
1186 // Invalidate the adjacent pages if a block crosses a 4K boundary
1187 while(first<page) {
1188 invalidate_page(first);
1189 first++;
1190 }
1191 for(first=page+1;first<last;first++) {
1192 invalidate_page(first);
1193 }
dd3a91a1 1194 #ifdef __arm__
1195 do_clear_cache();
1196 #endif
57871462 1197
1198 // Don't trap writes
1199 invalid_code[block]=1;
94d23bb9 1200#ifndef DISABLE_TLB
57871462 1201 // If there is a valid TLB entry for this page, remove write protect
1202 if(tlb_LUT_w[block]) {
1203 assert(tlb_LUT_r[block]==tlb_LUT_w[block]);
1204 // CHECK: Is this right?
1205 memory_map[block]=((tlb_LUT_w[block]&0xFFFFF000)-(block<<12)+(unsigned int)rdram-0x80000000)>>2;
1206 u_int real_block=tlb_LUT_w[block]>>12;
1207 invalid_code[real_block]=1;
1208 if(real_block>=0x80000&&real_block<0x80800) memory_map[real_block]=((u_int)rdram-0x80000000)>>2;
1209 }
1210 else if(block>=0x80000&&block<0x80800) memory_map[block]=((u_int)rdram-0x80000000)>>2;
94d23bb9 1211#endif
f76eeef9 1212
57871462 1213 #ifdef USE_MINI_HT
1214 memset(mini_ht,-1,sizeof(mini_ht));
1215 #endif
1216}
9be4ba64 1217
1218void invalidate_block(u_int block)
1219{
1220 u_int page=get_page(block<<12);
1221 u_int vpage=get_vpage(block<<12);
1222 inv_debug("INVALIDATE: %x (%d)\n",block<<12,page);
1223 //inv_debug("invalid_code[block]=%d\n",invalid_code[block]);
1224 u_int first,last;
1225 first=last=page;
1226 struct ll_entry *head;
1227 head=jump_dirty[vpage];
1228 //printf("page=%d vpage=%d\n",page,vpage);
1229 while(head!=NULL) {
1230 u_int start,end;
1231 if(vpage>2047||(head->vaddr>>12)==block) { // Ignore vaddr hash collision
1232 get_bounds((int)head->addr,&start,&end);
1233 //printf("start: %x end: %x\n",start,end);
4a35de07 1234 if(page<2048&&start>=(u_int)rdram&&end<(u_int)rdram+RAM_SIZE) {
9be4ba64 1235 if(((start-(u_int)rdram)>>12)<=page&&((end-1-(u_int)rdram)>>12)>=page) {
1236 if((((start-(u_int)rdram)>>12)&2047)<first) first=((start-(u_int)rdram)>>12)&2047;
1237 if((((end-1-(u_int)rdram)>>12)&2047)>last) last=((end-1-(u_int)rdram)>>12)&2047;
1238 }
1239 }
1240#ifndef DISABLE_TLB
1241 if(page<2048&&(signed int)start>=(signed int)0xC0000000&&(signed int)end>=(signed int)0xC0000000) {
1242 if(((start+memory_map[start>>12]-(u_int)rdram)>>12)<=page&&((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)>=page) {
1243 if((((start+memory_map[start>>12]-(u_int)rdram)>>12)&2047)<first) first=((start+memory_map[start>>12]-(u_int)rdram)>>12)&2047;
1244 if((((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047)>last) last=((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047;
1245 }
1246 }
1247#endif
1248 }
1249 head=head->next;
1250 }
1251 invalidate_block_range(block,first,last);
1252}
1253
57871462 1254void invalidate_addr(u_int addr)
1255{
9be4ba64 1256#ifdef PCSX
1257 //static int rhits;
1258 // this check is done by the caller
1259 //if (inv_code_start<=addr&&addr<=inv_code_end) { rhits++; return; }
d25604ca 1260 u_int page=get_vpage(addr);
9be4ba64 1261 if(page<2048) { // RAM
1262 struct ll_entry *head;
1263 u_int addr_min=~0, addr_max=0;
4a35de07 1264 u_int mask=RAM_SIZE-1;
1265 u_int addr_main=0x80000000|(addr&mask);
9be4ba64 1266 int pg1;
4a35de07 1267 inv_code_start=addr_main&~0xfff;
1268 inv_code_end=addr_main|0xfff;
9be4ba64 1269 pg1=page;
1270 if (pg1>0) {
1271 // must check previous page too because of spans..
1272 pg1--;
1273 inv_code_start-=0x1000;
1274 }
1275 for(;pg1<=page;pg1++) {
1276 for(head=jump_dirty[pg1];head!=NULL;head=head->next) {
1277 u_int start,end;
1278 get_bounds((int)head->addr,&start,&end);
4a35de07 1279 if(ram_offset) {
1280 start-=ram_offset;
1281 end-=ram_offset;
1282 }
1283 if(start<=addr_main&&addr_main<end) {
9be4ba64 1284 if(start<addr_min) addr_min=start;
1285 if(end>addr_max) addr_max=end;
1286 }
4a35de07 1287 else if(addr_main<start) {
9be4ba64 1288 if(start<inv_code_end)
1289 inv_code_end=start-1;
1290 }
1291 else {
1292 if(end>inv_code_start)
1293 inv_code_start=end;
1294 }
1295 }
1296 }
1297 if (addr_min!=~0) {
1298 inv_debug("INV ADDR: %08x hit %08x-%08x\n", addr, addr_min, addr_max);
1299 inv_code_start=inv_code_end=~0;
1300 invalidate_block_range(addr>>12,(addr_min&mask)>>12,(addr_max&mask)>>12);
1301 return;
1302 }
1303 else {
4a35de07 1304 inv_code_start=(addr&~mask)|(inv_code_start&mask);
1305 inv_code_end=(addr&~mask)|(inv_code_end&mask);
d25604ca 1306 inv_debug("INV ADDR: %08x miss, inv %08x-%08x, sk %d\n", addr, inv_code_start, inv_code_end, 0);
9be4ba64 1307 return;
d25604ca 1308 }
9be4ba64 1309 }
1310#endif
57871462 1311 invalidate_block(addr>>12);
1312}
9be4ba64 1313
dd3a91a1 1314// This is called when loading a save state.
1315// Anything could have changed, so invalidate everything.
57871462 1316void invalidate_all_pages()
1317{
1318 u_int page,n;
1319 for(page=0;page<4096;page++)
1320 invalidate_page(page);
1321 for(page=0;page<1048576;page++)
1322 if(!invalid_code[page]) {
1323 restore_candidate[(page&2047)>>3]|=1<<(page&7);
1324 restore_candidate[((page&2047)>>3)+256]|=1<<(page&7);
1325 }
1326 #ifdef __arm__
1327 __clear_cache((void *)BASE_ADDR,(void *)BASE_ADDR+(1<<TARGET_SIZE_2));
1328 #endif
1329 #ifdef USE_MINI_HT
1330 memset(mini_ht,-1,sizeof(mini_ht));
1331 #endif
94d23bb9 1332 #ifndef DISABLE_TLB
57871462 1333 // TLB
1334 for(page=0;page<0x100000;page++) {
1335 if(tlb_LUT_r[page]) {
1336 memory_map[page]=((tlb_LUT_r[page]&0xFFFFF000)-(page<<12)+(unsigned int)rdram-0x80000000)>>2;
1337 if(!tlb_LUT_w[page]||!invalid_code[page])
1338 memory_map[page]|=0x40000000; // Write protect
1339 }
1340 else memory_map[page]=-1;
1341 if(page==0x80000) page=0xC0000;
1342 }
1343 tlb_hacks();
94d23bb9 1344 #endif
57871462 1345}
1346
1347// Add an entry to jump_out after making a link
1348void add_link(u_int vaddr,void *src)
1349{
94d23bb9 1350 u_int page=get_page(vaddr);
57871462 1351 inv_debug("add_link: %x -> %x (%d)\n",(int)src,vaddr,page);
76f71c27 1352 int *ptr=(int *)(src+4);
1353 assert((*ptr&0x0fff0000)==0x059f0000);
57871462 1354 ll_add(jump_out+page,vaddr,src);
1355 //int ptr=get_pointer(src);
1356 //inv_debug("add_link: Pointer is to %x\n",(int)ptr);
1357}
1358
1359// If a code block was found to be unmodified (bit was set in
1360// restore_candidate) and it remains unmodified (bit is clear
1361// in invalid_code) then move the entries for that 4K page from
1362// the dirty list to the clean list.
1363void clean_blocks(u_int page)
1364{
1365 struct ll_entry *head;
1366 inv_debug("INV: clean_blocks page=%d\n",page);
1367 head=jump_dirty[page];
1368 while(head!=NULL) {
1369 if(!invalid_code[head->vaddr>>12]) {
1370 // Don't restore blocks which are about to expire from the cache
1371 if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) {
1372 u_int start,end;
1373 if(verify_dirty((int)head->addr)) {
1374 //printf("Possibly Restore %x (%x)\n",head->vaddr, (int)head->addr);
1375 u_int i;
1376 u_int inv=0;
1377 get_bounds((int)head->addr,&start,&end);
4cb76aa4 1378 if(start-(u_int)rdram<RAM_SIZE) {
57871462 1379 for(i=(start-(u_int)rdram+0x80000000)>>12;i<=(end-1-(u_int)rdram+0x80000000)>>12;i++) {
1380 inv|=invalid_code[i];
1381 }
1382 }
63cb0298 1383#ifndef DISABLE_TLB
57871462 1384 if((signed int)head->vaddr>=(signed int)0xC0000000) {
1385 u_int addr = (head->vaddr+(memory_map[head->vaddr>>12]<<2));
1386 //printf("addr=%x start=%x end=%x\n",addr,start,end);
1387 if(addr<start||addr>=end) inv=1;
1388 }
63cb0298 1389#endif
4cb76aa4 1390 else if((signed int)head->vaddr>=(signed int)0x80000000+RAM_SIZE) {
57871462 1391 inv=1;
1392 }
1393 if(!inv) {
1394 void * clean_addr=(void *)get_clean_addr((int)head->addr);
1395 if((((u_int)clean_addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) {
1396 u_int ppage=page;
94d23bb9 1397#ifndef DISABLE_TLB
57871462 1398 if(page<2048&&tlb_LUT_r[head->vaddr>>12]) ppage=(tlb_LUT_r[head->vaddr>>12]^0x80000000)>>12;
94d23bb9 1399#endif
57871462 1400 inv_debug("INV: Restored %x (%x/%x)\n",head->vaddr, (int)head->addr, (int)clean_addr);
1401 //printf("page=%x, addr=%x\n",page,head->vaddr);
1402 //assert(head->vaddr>>12==(page|0x80000));
1403 ll_add_32(jump_in+ppage,head->vaddr,head->reg32,clean_addr);
1404 int *ht_bin=hash_table[((head->vaddr>>16)^head->vaddr)&0xFFFF];
1405 if(!head->reg32) {
1406 if(ht_bin[0]==head->vaddr) {
1407 ht_bin[1]=(int)clean_addr; // Replace existing entry
1408 }
1409 if(ht_bin[2]==head->vaddr) {
1410 ht_bin[3]=(int)clean_addr; // Replace existing entry
1411 }
1412 }
1413 }
1414 }
1415 }
1416 }
1417 }
1418 head=head->next;
1419 }
1420}
1421
1422
1423void mov_alloc(struct regstat *current,int i)
1424{
1425 // Note: Don't need to actually alloc the source registers
1426 if((~current->is32>>rs1[i])&1) {
1427 //alloc_reg64(current,i,rs1[i]);
1428 alloc_reg64(current,i,rt1[i]);
1429 current->is32&=~(1LL<<rt1[i]);
1430 } else {
1431 //alloc_reg(current,i,rs1[i]);
1432 alloc_reg(current,i,rt1[i]);
1433 current->is32|=(1LL<<rt1[i]);
1434 }
1435 clear_const(current,rs1[i]);
1436 clear_const(current,rt1[i]);
1437 dirty_reg(current,rt1[i]);
1438}
1439
1440void shiftimm_alloc(struct regstat *current,int i)
1441{
57871462 1442 if(opcode2[i]<=0x3) // SLL/SRL/SRA
1443 {
1444 if(rt1[i]) {
1445 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1446 else lt1[i]=rs1[i];
1447 alloc_reg(current,i,rt1[i]);
1448 current->is32|=1LL<<rt1[i];
1449 dirty_reg(current,rt1[i]);
dc49e339 1450 if(is_const(current,rs1[i])) {
1451 int v=get_const(current,rs1[i]);
1452 if(opcode2[i]==0x00) set_const(current,rt1[i],v<<imm[i]);
1453 if(opcode2[i]==0x02) set_const(current,rt1[i],(u_int)v>>imm[i]);
1454 if(opcode2[i]==0x03) set_const(current,rt1[i],v>>imm[i]);
1455 }
1456 else clear_const(current,rt1[i]);
57871462 1457 }
1458 }
dc49e339 1459 else
1460 {
1461 clear_const(current,rs1[i]);
1462 clear_const(current,rt1[i]);
1463 }
1464
57871462 1465 if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA
1466 {
1467 if(rt1[i]) {
1468 if(rs1[i]) alloc_reg64(current,i,rs1[i]);
1469 alloc_reg64(current,i,rt1[i]);
1470 current->is32&=~(1LL<<rt1[i]);
1471 dirty_reg(current,rt1[i]);
1472 }
1473 }
1474 if(opcode2[i]==0x3c) // DSLL32
1475 {
1476 if(rt1[i]) {
1477 if(rs1[i]) alloc_reg(current,i,rs1[i]);
1478 alloc_reg64(current,i,rt1[i]);
1479 current->is32&=~(1LL<<rt1[i]);
1480 dirty_reg(current,rt1[i]);
1481 }
1482 }
1483 if(opcode2[i]==0x3e) // DSRL32
1484 {
1485 if(rt1[i]) {
1486 alloc_reg64(current,i,rs1[i]);
1487 if(imm[i]==32) {
1488 alloc_reg64(current,i,rt1[i]);
1489 current->is32&=~(1LL<<rt1[i]);
1490 } else {
1491 alloc_reg(current,i,rt1[i]);
1492 current->is32|=1LL<<rt1[i];
1493 }
1494 dirty_reg(current,rt1[i]);
1495 }
1496 }
1497 if(opcode2[i]==0x3f) // DSRA32
1498 {
1499 if(rt1[i]) {
1500 alloc_reg64(current,i,rs1[i]);
1501 alloc_reg(current,i,rt1[i]);
1502 current->is32|=1LL<<rt1[i];
1503 dirty_reg(current,rt1[i]);
1504 }
1505 }
1506}
1507
1508void shift_alloc(struct regstat *current,int i)
1509{
1510 if(rt1[i]) {
1511 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
1512 {
1513 if(rs1[i]) alloc_reg(current,i,rs1[i]);
1514 if(rs2[i]) alloc_reg(current,i,rs2[i]);
1515 alloc_reg(current,i,rt1[i]);
e1190b87 1516 if(rt1[i]==rs2[i]) {
1517 alloc_reg_temp(current,i,-1);
1518 minimum_free_regs[i]=1;
1519 }
57871462 1520 current->is32|=1LL<<rt1[i];
1521 } else { // DSLLV/DSRLV/DSRAV
1522 if(rs1[i]) alloc_reg64(current,i,rs1[i]);
1523 if(rs2[i]) alloc_reg(current,i,rs2[i]);
1524 alloc_reg64(current,i,rt1[i]);
1525 current->is32&=~(1LL<<rt1[i]);
1526 if(opcode2[i]==0x16||opcode2[i]==0x17) // DSRLV and DSRAV need a temporary register
e1190b87 1527 {
57871462 1528 alloc_reg_temp(current,i,-1);
e1190b87 1529 minimum_free_regs[i]=1;
1530 }
57871462 1531 }
1532 clear_const(current,rs1[i]);
1533 clear_const(current,rs2[i]);
1534 clear_const(current,rt1[i]);
1535 dirty_reg(current,rt1[i]);
1536 }
1537}
1538
1539void alu_alloc(struct regstat *current,int i)
1540{
1541 if(opcode2[i]>=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU
1542 if(rt1[i]) {
1543 if(rs1[i]&&rs2[i]) {
1544 alloc_reg(current,i,rs1[i]);
1545 alloc_reg(current,i,rs2[i]);
1546 }
1547 else {
1548 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1549 if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg(current,i,rs2[i]);
1550 }
1551 alloc_reg(current,i,rt1[i]);
1552 }
1553 current->is32|=1LL<<rt1[i];
1554 }
1555 if(opcode2[i]==0x2a||opcode2[i]==0x2b) { // SLT/SLTU
1556 if(rt1[i]) {
1557 if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1))
1558 {
1559 alloc_reg64(current,i,rs1[i]);
1560 alloc_reg64(current,i,rs2[i]);
1561 alloc_reg(current,i,rt1[i]);
1562 } else {
1563 alloc_reg(current,i,rs1[i]);
1564 alloc_reg(current,i,rs2[i]);
1565 alloc_reg(current,i,rt1[i]);
1566 }
1567 }
1568 current->is32|=1LL<<rt1[i];
1569 }
1570 if(opcode2[i]>=0x24&&opcode2[i]<=0x27) { // AND/OR/XOR/NOR
1571 if(rt1[i]) {
1572 if(rs1[i]&&rs2[i]) {
1573 alloc_reg(current,i,rs1[i]);
1574 alloc_reg(current,i,rs2[i]);
1575 }
1576 else
1577 {
1578 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1579 if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg(current,i,rs2[i]);
1580 }
1581 alloc_reg(current,i,rt1[i]);
1582 if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1))
1583 {
1584 if(!((current->uu>>rt1[i])&1)) {
1585 alloc_reg64(current,i,rt1[i]);
1586 }
1587 if(get_reg(current->regmap,rt1[i]|64)>=0) {
1588 if(rs1[i]&&rs2[i]) {
1589 alloc_reg64(current,i,rs1[i]);
1590 alloc_reg64(current,i,rs2[i]);
1591 }
1592 else
1593 {
1594 // Is is really worth it to keep 64-bit values in registers?
1595 #ifdef NATIVE_64BIT
1596 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg64(current,i,rs1[i]);
1597 if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg64(current,i,rs2[i]);
1598 #endif
1599 }
1600 }
1601 current->is32&=~(1LL<<rt1[i]);
1602 } else {
1603 current->is32|=1LL<<rt1[i];
1604 }
1605 }
1606 }
1607 if(opcode2[i]>=0x2c&&opcode2[i]<=0x2f) { // DADD/DADDU/DSUB/DSUBU
1608 if(rt1[i]) {
1609 if(rs1[i]&&rs2[i]) {
1610 if(!((current->uu>>rt1[i])&1)||get_reg(current->regmap,rt1[i]|64)>=0) {
1611 alloc_reg64(current,i,rs1[i]);
1612 alloc_reg64(current,i,rs2[i]);
1613 alloc_reg64(current,i,rt1[i]);
1614 } else {
1615 alloc_reg(current,i,rs1[i]);
1616 alloc_reg(current,i,rs2[i]);
1617 alloc_reg(current,i,rt1[i]);
1618 }
1619 }
1620 else {
1621 alloc_reg(current,i,rt1[i]);
1622 if(!((current->uu>>rt1[i])&1)||get_reg(current->regmap,rt1[i]|64)>=0) {
1623 // DADD used as move, or zeroing
1624 // If we have a 64-bit source, then make the target 64 bits too
1625 if(rs1[i]&&!((current->is32>>rs1[i])&1)) {
1626 if(get_reg(current->regmap,rs1[i])>=0) alloc_reg64(current,i,rs1[i]);
1627 alloc_reg64(current,i,rt1[i]);
1628 } else if(rs2[i]&&!((current->is32>>rs2[i])&1)) {
1629 if(get_reg(current->regmap,rs2[i])>=0) alloc_reg64(current,i,rs2[i]);
1630 alloc_reg64(current,i,rt1[i]);
1631 }
1632 if(opcode2[i]>=0x2e&&rs2[i]) {
1633 // DSUB used as negation - 64-bit result
1634 // If we have a 32-bit register, extend it to 64 bits
1635 if(get_reg(current->regmap,rs2[i])>=0) alloc_reg64(current,i,rs2[i]);
1636 alloc_reg64(current,i,rt1[i]);
1637 }
1638 }
1639 }
1640 if(rs1[i]&&rs2[i]) {
1641 current->is32&=~(1LL<<rt1[i]);
1642 } else if(rs1[i]) {
1643 current->is32&=~(1LL<<rt1[i]);
1644 if((current->is32>>rs1[i])&1)
1645 current->is32|=1LL<<rt1[i];
1646 } else if(rs2[i]) {
1647 current->is32&=~(1LL<<rt1[i]);
1648 if((current->is32>>rs2[i])&1)
1649 current->is32|=1LL<<rt1[i];
1650 } else {
1651 current->is32|=1LL<<rt1[i];
1652 }
1653 }
1654 }
1655 clear_const(current,rs1[i]);
1656 clear_const(current,rs2[i]);
1657 clear_const(current,rt1[i]);
1658 dirty_reg(current,rt1[i]);
1659}
1660
1661void imm16_alloc(struct regstat *current,int i)
1662{
1663 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1664 else lt1[i]=rs1[i];
1665 if(rt1[i]) alloc_reg(current,i,rt1[i]);
1666 if(opcode[i]==0x18||opcode[i]==0x19) { // DADDI/DADDIU
1667 current->is32&=~(1LL<<rt1[i]);
1668 if(!((current->uu>>rt1[i])&1)||get_reg(current->regmap,rt1[i]|64)>=0) {
1669 // TODO: Could preserve the 32-bit flag if the immediate is zero
1670 alloc_reg64(current,i,rt1[i]);
1671 alloc_reg64(current,i,rs1[i]);
1672 }
1673 clear_const(current,rs1[i]);
1674 clear_const(current,rt1[i]);
1675 }
1676 else if(opcode[i]==0x0a||opcode[i]==0x0b) { // SLTI/SLTIU
1677 if((~current->is32>>rs1[i])&1) alloc_reg64(current,i,rs1[i]);
1678 current->is32|=1LL<<rt1[i];
1679 clear_const(current,rs1[i]);
1680 clear_const(current,rt1[i]);
1681 }
1682 else if(opcode[i]>=0x0c&&opcode[i]<=0x0e) { // ANDI/ORI/XORI
1683 if(((~current->is32>>rs1[i])&1)&&opcode[i]>0x0c) {
1684 if(rs1[i]!=rt1[i]) {
1685 if(needed_again(rs1[i],i)) alloc_reg64(current,i,rs1[i]);
1686 alloc_reg64(current,i,rt1[i]);
1687 current->is32&=~(1LL<<rt1[i]);
1688 }
1689 }
1690 else current->is32|=1LL<<rt1[i]; // ANDI clears upper bits
1691 if(is_const(current,rs1[i])) {
1692 int v=get_const(current,rs1[i]);
1693 if(opcode[i]==0x0c) set_const(current,rt1[i],v&imm[i]);
1694 if(opcode[i]==0x0d) set_const(current,rt1[i],v|imm[i]);
1695 if(opcode[i]==0x0e) set_const(current,rt1[i],v^imm[i]);
1696 }
1697 else clear_const(current,rt1[i]);
1698 }
1699 else if(opcode[i]==0x08||opcode[i]==0x09) { // ADDI/ADDIU
1700 if(is_const(current,rs1[i])) {
1701 int v=get_const(current,rs1[i]);
1702 set_const(current,rt1[i],v+imm[i]);
1703 }
1704 else clear_const(current,rt1[i]);
1705 current->is32|=1LL<<rt1[i];
1706 }
1707 else {
1708 set_const(current,rt1[i],((long long)((short)imm[i]))<<16); // LUI
1709 current->is32|=1LL<<rt1[i];
1710 }
1711 dirty_reg(current,rt1[i]);
1712}
1713
1714void load_alloc(struct regstat *current,int i)
1715{
1716 clear_const(current,rt1[i]);
1717 //if(rs1[i]!=rt1[i]&&needed_again(rs1[i],i)) clear_const(current,rs1[i]); // Does this help or hurt?
1718 if(!rs1[i]) current->u&=~1LL; // Allow allocating r0 if it's the source register
1719 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
373d1d07 1720 if(rt1[i]&&!((current->u>>rt1[i])&1)) {
57871462 1721 alloc_reg(current,i,rt1[i]);
373d1d07 1722 assert(get_reg(current->regmap,rt1[i])>=0);
57871462 1723 if(opcode[i]==0x27||opcode[i]==0x37) // LWU/LD
1724 {
1725 current->is32&=~(1LL<<rt1[i]);
1726 alloc_reg64(current,i,rt1[i]);
1727 }
1728 else if(opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR
1729 {
1730 current->is32&=~(1LL<<rt1[i]);
1731 alloc_reg64(current,i,rt1[i]);
1732 alloc_all(current,i);
1733 alloc_reg64(current,i,FTEMP);
e1190b87 1734 minimum_free_regs[i]=HOST_REGS;
57871462 1735 }
1736 else current->is32|=1LL<<rt1[i];
1737 dirty_reg(current,rt1[i]);
1738 // If using TLB, need a register for pointer to the mapping table
1739 if(using_tlb) alloc_reg(current,i,TLREG);
1740 // LWL/LWR need a temporary register for the old value
1741 if(opcode[i]==0x22||opcode[i]==0x26)
1742 {
1743 alloc_reg(current,i,FTEMP);
1744 alloc_reg_temp(current,i,-1);
e1190b87 1745 minimum_free_regs[i]=1;
57871462 1746 }
1747 }
1748 else
1749 {
373d1d07 1750 // Load to r0 or unneeded register (dummy load)
57871462 1751 // but we still need a register to calculate the address
535d208a 1752 if(opcode[i]==0x22||opcode[i]==0x26)
1753 {
1754 alloc_reg(current,i,FTEMP); // LWL/LWR need another temporary
1755 }
373d1d07 1756 // If using TLB, need a register for pointer to the mapping table
1757 if(using_tlb) alloc_reg(current,i,TLREG);
57871462 1758 alloc_reg_temp(current,i,-1);
e1190b87 1759 minimum_free_regs[i]=1;
535d208a 1760 if(opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR
1761 {
1762 alloc_all(current,i);
1763 alloc_reg64(current,i,FTEMP);
e1190b87 1764 minimum_free_regs[i]=HOST_REGS;
535d208a 1765 }
57871462 1766 }
1767}
1768
1769void store_alloc(struct regstat *current,int i)
1770{
1771 clear_const(current,rs2[i]);
1772 if(!(rs2[i])) current->u&=~1LL; // Allow allocating r0 if necessary
1773 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1774 alloc_reg(current,i,rs2[i]);
1775 if(opcode[i]==0x2c||opcode[i]==0x2d||opcode[i]==0x3f) { // 64-bit SDL/SDR/SD
1776 alloc_reg64(current,i,rs2[i]);
1777 if(rs2[i]) alloc_reg(current,i,FTEMP);
1778 }
1779 // If using TLB, need a register for pointer to the mapping table
1780 if(using_tlb) alloc_reg(current,i,TLREG);
1781 #if defined(HOST_IMM8)
1782 // On CPUs without 32-bit immediates we need a pointer to invalid_code
1783 else alloc_reg(current,i,INVCP);
1784 #endif
b7918751 1785 if(opcode[i]==0x2a||opcode[i]==0x2e||opcode[i]==0x2c||opcode[i]==0x2d) { // SWL/SWL/SDL/SDR
57871462 1786 alloc_reg(current,i,FTEMP);
1787 }
1788 // We need a temporary register for address generation
1789 alloc_reg_temp(current,i,-1);
e1190b87 1790 minimum_free_regs[i]=1;
57871462 1791}
1792
1793void c1ls_alloc(struct regstat *current,int i)
1794{
1795 //clear_const(current,rs1[i]); // FIXME
1796 clear_const(current,rt1[i]);
1797 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1798 alloc_reg(current,i,CSREG); // Status
1799 alloc_reg(current,i,FTEMP);
1800 if(opcode[i]==0x35||opcode[i]==0x3d) { // 64-bit LDC1/SDC1
1801 alloc_reg64(current,i,FTEMP);
1802 }
1803 // If using TLB, need a register for pointer to the mapping table
1804 if(using_tlb) alloc_reg(current,i,TLREG);
1805 #if defined(HOST_IMM8)
1806 // On CPUs without 32-bit immediates we need a pointer to invalid_code
1807 else if((opcode[i]&0x3b)==0x39) // SWC1/SDC1
1808 alloc_reg(current,i,INVCP);
1809 #endif
1810 // We need a temporary register for address generation
1811 alloc_reg_temp(current,i,-1);
1812}
1813
b9b61529 1814void c2ls_alloc(struct regstat *current,int i)
1815{
1816 clear_const(current,rt1[i]);
1817 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1818 alloc_reg(current,i,FTEMP);
1819 // If using TLB, need a register for pointer to the mapping table
1820 if(using_tlb) alloc_reg(current,i,TLREG);
1821 #if defined(HOST_IMM8)
1822 // On CPUs without 32-bit immediates we need a pointer to invalid_code
1823 else if((opcode[i]&0x3b)==0x3a) // SWC2/SDC2
1824 alloc_reg(current,i,INVCP);
1825 #endif
1826 // We need a temporary register for address generation
1827 alloc_reg_temp(current,i,-1);
e1190b87 1828 minimum_free_regs[i]=1;
b9b61529 1829}
1830
57871462 1831#ifndef multdiv_alloc
1832void multdiv_alloc(struct regstat *current,int i)
1833{
1834 // case 0x18: MULT
1835 // case 0x19: MULTU
1836 // case 0x1A: DIV
1837 // case 0x1B: DIVU
1838 // case 0x1C: DMULT
1839 // case 0x1D: DMULTU
1840 // case 0x1E: DDIV
1841 // case 0x1F: DDIVU
1842 clear_const(current,rs1[i]);
1843 clear_const(current,rs2[i]);
1844 if(rs1[i]&&rs2[i])
1845 {
1846 if((opcode2[i]&4)==0) // 32-bit
1847 {
1848 current->u&=~(1LL<<HIREG);
1849 current->u&=~(1LL<<LOREG);
1850 alloc_reg(current,i,HIREG);
1851 alloc_reg(current,i,LOREG);
1852 alloc_reg(current,i,rs1[i]);
1853 alloc_reg(current,i,rs2[i]);
1854 current->is32|=1LL<<HIREG;
1855 current->is32|=1LL<<LOREG;
1856 dirty_reg(current,HIREG);
1857 dirty_reg(current,LOREG);
1858 }
1859 else // 64-bit
1860 {
1861 current->u&=~(1LL<<HIREG);
1862 current->u&=~(1LL<<LOREG);
1863 current->uu&=~(1LL<<HIREG);
1864 current->uu&=~(1LL<<LOREG);
1865 alloc_reg64(current,i,HIREG);
1866 //if(HOST_REGS>10) alloc_reg64(current,i,LOREG);
1867 alloc_reg64(current,i,rs1[i]);
1868 alloc_reg64(current,i,rs2[i]);
1869 alloc_all(current,i);
1870 current->is32&=~(1LL<<HIREG);
1871 current->is32&=~(1LL<<LOREG);
1872 dirty_reg(current,HIREG);
1873 dirty_reg(current,LOREG);
e1190b87 1874 minimum_free_regs[i]=HOST_REGS;
57871462 1875 }
1876 }
1877 else
1878 {
1879 // Multiply by zero is zero.
1880 // MIPS does not have a divide by zero exception.
1881 // The result is undefined, we return zero.
1882 alloc_reg(current,i,HIREG);
1883 alloc_reg(current,i,LOREG);
1884 current->is32|=1LL<<HIREG;
1885 current->is32|=1LL<<LOREG;
1886 dirty_reg(current,HIREG);
1887 dirty_reg(current,LOREG);
1888 }
1889}
1890#endif
1891
1892void cop0_alloc(struct regstat *current,int i)
1893{
1894 if(opcode2[i]==0) // MFC0
1895 {
1896 if(rt1[i]) {
1897 clear_const(current,rt1[i]);
1898 alloc_all(current,i);
1899 alloc_reg(current,i,rt1[i]);
1900 current->is32|=1LL<<rt1[i];
1901 dirty_reg(current,rt1[i]);
1902 }
1903 }
1904 else if(opcode2[i]==4) // MTC0
1905 {
1906 if(rs1[i]){
1907 clear_const(current,rs1[i]);
1908 alloc_reg(current,i,rs1[i]);
1909 alloc_all(current,i);
1910 }
1911 else {
1912 alloc_all(current,i); // FIXME: Keep r0
1913 current->u&=~1LL;
1914 alloc_reg(current,i,0);
1915 }
1916 }
1917 else
1918 {
1919 // TLBR/TLBWI/TLBWR/TLBP/ERET
1920 assert(opcode2[i]==0x10);
1921 alloc_all(current,i);
1922 }
e1190b87 1923 minimum_free_regs[i]=HOST_REGS;
57871462 1924}
1925
1926void cop1_alloc(struct regstat *current,int i)
1927{
1928 alloc_reg(current,i,CSREG); // Load status
1929 if(opcode2[i]<3) // MFC1/DMFC1/CFC1
1930 {
7de557a6 1931 if(rt1[i]){
1932 clear_const(current,rt1[i]);
1933 if(opcode2[i]==1) {
1934 alloc_reg64(current,i,rt1[i]); // DMFC1
1935 current->is32&=~(1LL<<rt1[i]);
1936 }else{
1937 alloc_reg(current,i,rt1[i]); // MFC1/CFC1
1938 current->is32|=1LL<<rt1[i];
1939 }
1940 dirty_reg(current,rt1[i]);
57871462 1941 }
57871462 1942 alloc_reg_temp(current,i,-1);
1943 }
1944 else if(opcode2[i]>3) // MTC1/DMTC1/CTC1
1945 {
1946 if(rs1[i]){
1947 clear_const(current,rs1[i]);
1948 if(opcode2[i]==5)
1949 alloc_reg64(current,i,rs1[i]); // DMTC1
1950 else
1951 alloc_reg(current,i,rs1[i]); // MTC1/CTC1
1952 alloc_reg_temp(current,i,-1);
1953 }
1954 else {
1955 current->u&=~1LL;
1956 alloc_reg(current,i,0);
1957 alloc_reg_temp(current,i,-1);
1958 }
1959 }
e1190b87 1960 minimum_free_regs[i]=1;
57871462 1961}
1962void fconv_alloc(struct regstat *current,int i)
1963{
1964 alloc_reg(current,i,CSREG); // Load status
1965 alloc_reg_temp(current,i,-1);
e1190b87 1966 minimum_free_regs[i]=1;
57871462 1967}
1968void float_alloc(struct regstat *current,int i)
1969{
1970 alloc_reg(current,i,CSREG); // Load status
1971 alloc_reg_temp(current,i,-1);
e1190b87 1972 minimum_free_regs[i]=1;
57871462 1973}
b9b61529 1974void c2op_alloc(struct regstat *current,int i)
1975{
1976 alloc_reg_temp(current,i,-1);
1977}
57871462 1978void fcomp_alloc(struct regstat *current,int i)
1979{
1980 alloc_reg(current,i,CSREG); // Load status
1981 alloc_reg(current,i,FSREG); // Load flags
1982 dirty_reg(current,FSREG); // Flag will be modified
1983 alloc_reg_temp(current,i,-1);
e1190b87 1984 minimum_free_regs[i]=1;
57871462 1985}
1986
1987void syscall_alloc(struct regstat *current,int i)
1988{
1989 alloc_cc(current,i);
1990 dirty_reg(current,CCREG);
1991 alloc_all(current,i);
e1190b87 1992 minimum_free_regs[i]=HOST_REGS;
57871462 1993 current->isconst=0;
1994}
1995
1996void delayslot_alloc(struct regstat *current,int i)
1997{
1998 switch(itype[i]) {
1999 case UJUMP:
2000 case CJUMP:
2001 case SJUMP:
2002 case RJUMP:
2003 case FJUMP:
2004 case SYSCALL:
7139f3c8 2005 case HLECALL:
57871462 2006 case SPAN:
2007 assem_debug("jump in the delay slot. this shouldn't happen.\n");//exit(1);
c43b5311 2008 SysPrintf("Disabled speculative precompilation\n");
57871462 2009 stop_after_jal=1;
2010 break;
2011 case IMM16:
2012 imm16_alloc(current,i);
2013 break;
2014 case LOAD:
2015 case LOADLR:
2016 load_alloc(current,i);
2017 break;
2018 case STORE:
2019 case STORELR:
2020 store_alloc(current,i);
2021 break;
2022 case ALU:
2023 alu_alloc(current,i);
2024 break;
2025 case SHIFT:
2026 shift_alloc(current,i);
2027 break;
2028 case MULTDIV:
2029 multdiv_alloc(current,i);
2030 break;
2031 case SHIFTIMM:
2032 shiftimm_alloc(current,i);
2033 break;
2034 case MOV:
2035 mov_alloc(current,i);
2036 break;
2037 case COP0:
2038 cop0_alloc(current,i);
2039 break;
2040 case COP1:
b9b61529 2041 case COP2:
57871462 2042 cop1_alloc(current,i);
2043 break;
2044 case C1LS:
2045 c1ls_alloc(current,i);
2046 break;
b9b61529 2047 case C2LS:
2048 c2ls_alloc(current,i);
2049 break;
57871462 2050 case FCONV:
2051 fconv_alloc(current,i);
2052 break;
2053 case FLOAT:
2054 float_alloc(current,i);
2055 break;
2056 case FCOMP:
2057 fcomp_alloc(current,i);
2058 break;
b9b61529 2059 case C2OP:
2060 c2op_alloc(current,i);
2061 break;
57871462 2062 }
2063}
2064
2065// Special case where a branch and delay slot span two pages in virtual memory
2066static void pagespan_alloc(struct regstat *current,int i)
2067{
2068 current->isconst=0;
2069 current->wasconst=0;
2070 regs[i].wasconst=0;
e1190b87 2071 minimum_free_regs[i]=HOST_REGS;
57871462 2072 alloc_all(current,i);
2073 alloc_cc(current,i);
2074 dirty_reg(current,CCREG);
2075 if(opcode[i]==3) // JAL
2076 {
2077 alloc_reg(current,i,31);
2078 dirty_reg(current,31);
2079 }
2080 if(opcode[i]==0&&(opcode2[i]&0x3E)==8) // JR/JALR
2081 {
2082 alloc_reg(current,i,rs1[i]);
5067f341 2083 if (rt1[i]!=0) {
2084 alloc_reg(current,i,rt1[i]);
2085 dirty_reg(current,rt1[i]);
57871462 2086 }
2087 }
2088 if((opcode[i]&0x2E)==4) // BEQ/BNE/BEQL/BNEL
2089 {
2090 if(rs1[i]) alloc_reg(current,i,rs1[i]);
2091 if(rs2[i]) alloc_reg(current,i,rs2[i]);
2092 if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1))
2093 {
2094 if(rs1[i]) alloc_reg64(current,i,rs1[i]);
2095 if(rs2[i]) alloc_reg64(current,i,rs2[i]);
2096 }
2097 }
2098 else
2099 if((opcode[i]&0x2E)==6) // BLEZ/BGTZ/BLEZL/BGTZL
2100 {
2101 if(rs1[i]) alloc_reg(current,i,rs1[i]);
2102 if(!((current->is32>>rs1[i])&1))
2103 {
2104 if(rs1[i]) alloc_reg64(current,i,rs1[i]);
2105 }
2106 }
2107 else
2108 if(opcode[i]==0x11) // BC1
2109 {
2110 alloc_reg(current,i,FSREG);
2111 alloc_reg(current,i,CSREG);
2112 }
2113 //else ...
2114}
2115
2116add_stub(int type,int addr,int retaddr,int a,int b,int c,int d,int e)
2117{
2118 stubs[stubcount][0]=type;
2119 stubs[stubcount][1]=addr;
2120 stubs[stubcount][2]=retaddr;
2121 stubs[stubcount][3]=a;
2122 stubs[stubcount][4]=b;
2123 stubs[stubcount][5]=c;
2124 stubs[stubcount][6]=d;
2125 stubs[stubcount][7]=e;
2126 stubcount++;
2127}
2128
2129// Write out a single register
2130void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32)
2131{
2132 int hr;
2133 for(hr=0;hr<HOST_REGS;hr++) {
2134 if(hr!=EXCLUDE_REG) {
2135 if((regmap[hr]&63)==r) {
2136 if((dirty>>hr)&1) {
2137 if(regmap[hr]<64) {
2138 emit_storereg(r,hr);
24385cae 2139#ifndef FORCE32
57871462 2140 if((is32>>regmap[hr])&1) {
2141 emit_sarimm(hr,31,hr);
2142 emit_storereg(r|64,hr);
2143 }
24385cae 2144#endif
57871462 2145 }else{
2146 emit_storereg(r|64,hr);
2147 }
2148 }
2149 }
2150 }
2151 }
2152}
2153
2154int mchecksum()
2155{
2156 //if(!tracedebug) return 0;
2157 int i;
2158 int sum=0;
2159 for(i=0;i<2097152;i++) {
2160 unsigned int temp=sum;
2161 sum<<=1;
2162 sum|=(~temp)>>31;
2163 sum^=((u_int *)rdram)[i];
2164 }
2165 return sum;
2166}
2167int rchecksum()
2168{
2169 int i;
2170 int sum=0;
2171 for(i=0;i<64;i++)
2172 sum^=((u_int *)reg)[i];
2173 return sum;
2174}
57871462 2175void rlist()
2176{
2177 int i;
2178 printf("TRACE: ");
2179 for(i=0;i<32;i++)
2180 printf("r%d:%8x%8x ",i,((int *)(reg+i))[1],((int *)(reg+i))[0]);
2181 printf("\n");
3d624f89 2182#ifndef DISABLE_COP1
57871462 2183 printf("TRACE: ");
2184 for(i=0;i<32;i++)
2185 printf("f%d:%8x%8x ",i,((int*)reg_cop1_simple[i])[1],*((int*)reg_cop1_simple[i]));
2186 printf("\n");
3d624f89 2187#endif
57871462 2188}
2189
2190void enabletrace()
2191{
2192 tracedebug=1;
2193}
2194
2195void memdebug(int i)
2196{
2197 //printf("TRACE: count=%d next=%d (checksum %x) lo=%8x%8x\n",Count,next_interupt,mchecksum(),(int)(reg[LOREG]>>32),(int)reg[LOREG]);
2198 //printf("TRACE: count=%d next=%d (rchecksum %x)\n",Count,next_interupt,rchecksum());
2199 //rlist();
2200 //if(tracedebug) {
2201 //if(Count>=-2084597794) {
2202 if((signed int)Count>=-2084597794&&(signed int)Count<0) {
2203 //if(0) {
2204 printf("TRACE: count=%d next=%d (checksum %x)\n",Count,next_interupt,mchecksum());
2205 //printf("TRACE: count=%d next=%d (checksum %x) Status=%x\n",Count,next_interupt,mchecksum(),Status);
2206 //printf("TRACE: count=%d next=%d (checksum %x) hi=%8x%8x\n",Count,next_interupt,mchecksum(),(int)(reg[HIREG]>>32),(int)reg[HIREG]);
2207 rlist();
2208 #ifdef __i386__
2209 printf("TRACE: %x\n",(&i)[-1]);
2210 #endif
2211 #ifdef __arm__
2212 int j;
2213 printf("TRACE: %x \n",(&j)[10]);
2214 printf("TRACE: %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x\n",(&j)[1],(&j)[2],(&j)[3],(&j)[4],(&j)[5],(&j)[6],(&j)[7],(&j)[8],(&j)[9],(&j)[10],(&j)[11],(&j)[12],(&j)[13],(&j)[14],(&j)[15],(&j)[16],(&j)[17],(&j)[18],(&j)[19],(&j)[20]);
2215 #endif
2216 //fflush(stdout);
2217 }
2218 //printf("TRACE: %x\n",(&i)[-1]);
2219}
2220
2221void tlb_debug(u_int cause, u_int addr, u_int iaddr)
2222{
2223 printf("TLB Exception: instruction=%x addr=%x cause=%x\n",iaddr, addr, cause);
2224}
2225
2226void alu_assemble(int i,struct regstat *i_regs)
2227{
2228 if(opcode2[i]>=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU
2229 if(rt1[i]) {
2230 signed char s1,s2,t;
2231 t=get_reg(i_regs->regmap,rt1[i]);
2232 if(t>=0) {
2233 s1=get_reg(i_regs->regmap,rs1[i]);
2234 s2=get_reg(i_regs->regmap,rs2[i]);
2235 if(rs1[i]&&rs2[i]) {
2236 assert(s1>=0);
2237 assert(s2>=0);
2238 if(opcode2[i]&2) emit_sub(s1,s2,t);
2239 else emit_add(s1,s2,t);
2240 }
2241 else if(rs1[i]) {
2242 if(s1>=0) emit_mov(s1,t);
2243 else emit_loadreg(rs1[i],t);
2244 }
2245 else if(rs2[i]) {
2246 if(s2>=0) {
2247 if(opcode2[i]&2) emit_neg(s2,t);
2248 else emit_mov(s2,t);
2249 }
2250 else {
2251 emit_loadreg(rs2[i],t);
2252 if(opcode2[i]&2) emit_neg(t,t);
2253 }
2254 }
2255 else emit_zeroreg(t);
2256 }
2257 }
2258 }
2259 if(opcode2[i]>=0x2c&&opcode2[i]<=0x2f) { // DADD/DADDU/DSUB/DSUBU
2260 if(rt1[i]) {
2261 signed char s1l,s2l,s1h,s2h,tl,th;
2262 tl=get_reg(i_regs->regmap,rt1[i]);
2263 th=get_reg(i_regs->regmap,rt1[i]|64);
2264 if(tl>=0) {
2265 s1l=get_reg(i_regs->regmap,rs1[i]);
2266 s2l=get_reg(i_regs->regmap,rs2[i]);
2267 s1h=get_reg(i_regs->regmap,rs1[i]|64);
2268 s2h=get_reg(i_regs->regmap,rs2[i]|64);
2269 if(rs1[i]&&rs2[i]) {
2270 assert(s1l>=0);
2271 assert(s2l>=0);
2272 if(opcode2[i]&2) emit_subs(s1l,s2l,tl);
2273 else emit_adds(s1l,s2l,tl);
2274 if(th>=0) {
2275 #ifdef INVERTED_CARRY
2276 if(opcode2[i]&2) {if(s1h!=th) emit_mov(s1h,th);emit_sbb(th,s2h);}
2277 #else
2278 if(opcode2[i]&2) emit_sbc(s1h,s2h,th);
2279 #endif
2280 else emit_add(s1h,s2h,th);
2281 }
2282 }
2283 else if(rs1[i]) {
2284 if(s1l>=0) emit_mov(s1l,tl);
2285 else emit_loadreg(rs1[i],tl);
2286 if(th>=0) {
2287 if(s1h>=0) emit_mov(s1h,th);
2288 else emit_loadreg(rs1[i]|64,th);
2289 }
2290 }
2291 else if(rs2[i]) {
2292 if(s2l>=0) {
2293 if(opcode2[i]&2) emit_negs(s2l,tl);
2294 else emit_mov(s2l,tl);
2295 }
2296 else {
2297 emit_loadreg(rs2[i],tl);
2298 if(opcode2[i]&2) emit_negs(tl,tl);
2299 }
2300 if(th>=0) {
2301 #ifdef INVERTED_CARRY
2302 if(s2h>=0) emit_mov(s2h,th);
2303 else emit_loadreg(rs2[i]|64,th);
2304 if(opcode2[i]&2) {
2305 emit_adcimm(-1,th); // x86 has inverted carry flag
2306 emit_not(th,th);
2307 }
2308 #else
2309 if(opcode2[i]&2) {
2310 if(s2h>=0) emit_rscimm(s2h,0,th);
2311 else {
2312 emit_loadreg(rs2[i]|64,th);
2313 emit_rscimm(th,0,th);
2314 }
2315 }else{
2316 if(s2h>=0) emit_mov(s2h,th);
2317 else emit_loadreg(rs2[i]|64,th);
2318 }
2319 #endif
2320 }
2321 }
2322 else {
2323 emit_zeroreg(tl);
2324 if(th>=0) emit_zeroreg(th);
2325 }
2326 }
2327 }
2328 }
2329 if(opcode2[i]==0x2a||opcode2[i]==0x2b) { // SLT/SLTU
2330 if(rt1[i]) {
2331 signed char s1l,s1h,s2l,s2h,t;
2332 if(!((i_regs->was32>>rs1[i])&(i_regs->was32>>rs2[i])&1))
2333 {
2334 t=get_reg(i_regs->regmap,rt1[i]);
2335 //assert(t>=0);
2336 if(t>=0) {
2337 s1l=get_reg(i_regs->regmap,rs1[i]);
2338 s1h=get_reg(i_regs->regmap,rs1[i]|64);
2339 s2l=get_reg(i_regs->regmap,rs2[i]);
2340 s2h=get_reg(i_regs->regmap,rs2[i]|64);
2341 if(rs2[i]==0) // rx<r0
2342 {
2343 assert(s1h>=0);
2344 if(opcode2[i]==0x2a) // SLT
2345 emit_shrimm(s1h,31,t);
2346 else // SLTU (unsigned can not be less than zero)
2347 emit_zeroreg(t);
2348 }
2349 else if(rs1[i]==0) // r0<rx
2350 {
2351 assert(s2h>=0);
2352 if(opcode2[i]==0x2a) // SLT
2353 emit_set_gz64_32(s2h,s2l,t);
2354 else // SLTU (set if not zero)
2355 emit_set_nz64_32(s2h,s2l,t);
2356 }
2357 else {
2358 assert(s1l>=0);assert(s1h>=0);
2359 assert(s2l>=0);assert(s2h>=0);
2360 if(opcode2[i]==0x2a) // SLT
2361 emit_set_if_less64_32(s1h,s1l,s2h,s2l,t);
2362 else // SLTU
2363 emit_set_if_carry64_32(s1h,s1l,s2h,s2l,t);
2364 }
2365 }
2366 } else {
2367 t=get_reg(i_regs->regmap,rt1[i]);
2368 //assert(t>=0);
2369 if(t>=0) {
2370 s1l=get_reg(i_regs->regmap,rs1[i]);
2371 s2l=get_reg(i_regs->regmap,rs2[i]);
2372 if(rs2[i]==0) // rx<r0
2373 {
2374 assert(s1l>=0);
2375 if(opcode2[i]==0x2a) // SLT
2376 emit_shrimm(s1l,31,t);
2377 else // SLTU (unsigned can not be less than zero)
2378 emit_zeroreg(t);
2379 }
2380 else if(rs1[i]==0) // r0<rx
2381 {
2382 assert(s2l>=0);
2383 if(opcode2[i]==0x2a) // SLT
2384 emit_set_gz32(s2l,t);
2385 else // SLTU (set if not zero)
2386 emit_set_nz32(s2l,t);
2387 }
2388 else{
2389 assert(s1l>=0);assert(s2l>=0);
2390 if(opcode2[i]==0x2a) // SLT
2391 emit_set_if_less32(s1l,s2l,t);
2392 else // SLTU
2393 emit_set_if_carry32(s1l,s2l,t);
2394 }
2395 }
2396 }
2397 }
2398 }
2399 if(opcode2[i]>=0x24&&opcode2[i]<=0x27) { // AND/OR/XOR/NOR
2400 if(rt1[i]) {
2401 signed char s1l,s1h,s2l,s2h,th,tl;
2402 tl=get_reg(i_regs->regmap,rt1[i]);
2403 th=get_reg(i_regs->regmap,rt1[i]|64);
2404 if(!((i_regs->was32>>rs1[i])&(i_regs->was32>>rs2[i])&1)&&th>=0)
2405 {
2406 assert(tl>=0);
2407 if(tl>=0) {
2408 s1l=get_reg(i_regs->regmap,rs1[i]);
2409 s1h=get_reg(i_regs->regmap,rs1[i]|64);
2410 s2l=get_reg(i_regs->regmap,rs2[i]);
2411 s2h=get_reg(i_regs->regmap,rs2[i]|64);
2412 if(rs1[i]&&rs2[i]) {
2413 assert(s1l>=0);assert(s1h>=0);
2414 assert(s2l>=0);assert(s2h>=0);
2415 if(opcode2[i]==0x24) { // AND
2416 emit_and(s1l,s2l,tl);
2417 emit_and(s1h,s2h,th);
2418 } else
2419 if(opcode2[i]==0x25) { // OR
2420 emit_or(s1l,s2l,tl);
2421 emit_or(s1h,s2h,th);
2422 } else
2423 if(opcode2[i]==0x26) { // XOR
2424 emit_xor(s1l,s2l,tl);
2425 emit_xor(s1h,s2h,th);
2426 } else
2427 if(opcode2[i]==0x27) { // NOR
2428 emit_or(s1l,s2l,tl);
2429 emit_or(s1h,s2h,th);
2430 emit_not(tl,tl);
2431 emit_not(th,th);
2432 }
2433 }
2434 else
2435 {
2436 if(opcode2[i]==0x24) { // AND
2437 emit_zeroreg(tl);
2438 emit_zeroreg(th);
2439 } else
2440 if(opcode2[i]==0x25||opcode2[i]==0x26) { // OR/XOR
2441 if(rs1[i]){
2442 if(s1l>=0) emit_mov(s1l,tl);
2443 else emit_loadreg(rs1[i],tl);
2444 if(s1h>=0) emit_mov(s1h,th);
2445 else emit_loadreg(rs1[i]|64,th);
2446 }
2447 else
2448 if(rs2[i]){
2449 if(s2l>=0) emit_mov(s2l,tl);
2450 else emit_loadreg(rs2[i],tl);
2451 if(s2h>=0) emit_mov(s2h,th);
2452 else emit_loadreg(rs2[i]|64,th);
2453 }
2454 else{
2455 emit_zeroreg(tl);
2456 emit_zeroreg(th);
2457 }
2458 } else
2459 if(opcode2[i]==0x27) { // NOR
2460 if(rs1[i]){
2461 if(s1l>=0) emit_not(s1l,tl);
2462 else{
2463 emit_loadreg(rs1[i],tl);
2464 emit_not(tl,tl);
2465 }
2466 if(s1h>=0) emit_not(s1h,th);
2467 else{
2468 emit_loadreg(rs1[i]|64,th);
2469 emit_not(th,th);
2470 }
2471 }
2472 else
2473 if(rs2[i]){
2474 if(s2l>=0) emit_not(s2l,tl);
2475 else{
2476 emit_loadreg(rs2[i],tl);
2477 emit_not(tl,tl);
2478 }
2479 if(s2h>=0) emit_not(s2h,th);
2480 else{
2481 emit_loadreg(rs2[i]|64,th);
2482 emit_not(th,th);
2483 }
2484 }
2485 else {
2486 emit_movimm(-1,tl);
2487 emit_movimm(-1,th);
2488 }
2489 }
2490 }
2491 }
2492 }
2493 else
2494 {
2495 // 32 bit
2496 if(tl>=0) {
2497 s1l=get_reg(i_regs->regmap,rs1[i]);
2498 s2l=get_reg(i_regs->regmap,rs2[i]);
2499 if(rs1[i]&&rs2[i]) {
2500 assert(s1l>=0);
2501 assert(s2l>=0);
2502 if(opcode2[i]==0x24) { // AND
2503 emit_and(s1l,s2l,tl);
2504 } else
2505 if(opcode2[i]==0x25) { // OR
2506 emit_or(s1l,s2l,tl);
2507 } else
2508 if(opcode2[i]==0x26) { // XOR
2509 emit_xor(s1l,s2l,tl);
2510 } else
2511 if(opcode2[i]==0x27) { // NOR
2512 emit_or(s1l,s2l,tl);
2513 emit_not(tl,tl);
2514 }
2515 }
2516 else
2517 {
2518 if(opcode2[i]==0x24) { // AND
2519 emit_zeroreg(tl);
2520 } else
2521 if(opcode2[i]==0x25||opcode2[i]==0x26) { // OR/XOR
2522 if(rs1[i]){
2523 if(s1l>=0) emit_mov(s1l,tl);
2524 else emit_loadreg(rs1[i],tl); // CHECK: regmap_entry?
2525 }
2526 else
2527 if(rs2[i]){
2528 if(s2l>=0) emit_mov(s2l,tl);
2529 else emit_loadreg(rs2[i],tl); // CHECK: regmap_entry?
2530 }
2531 else emit_zeroreg(tl);
2532 } else
2533 if(opcode2[i]==0x27) { // NOR
2534 if(rs1[i]){
2535 if(s1l>=0) emit_not(s1l,tl);
2536 else {
2537 emit_loadreg(rs1[i],tl);
2538 emit_not(tl,tl);
2539 }
2540 }
2541 else
2542 if(rs2[i]){
2543 if(s2l>=0) emit_not(s2l,tl);
2544 else {
2545 emit_loadreg(rs2[i],tl);
2546 emit_not(tl,tl);
2547 }
2548 }
2549 else emit_movimm(-1,tl);
2550 }
2551 }
2552 }
2553 }
2554 }
2555 }
2556}
2557
2558void imm16_assemble(int i,struct regstat *i_regs)
2559{
2560 if (opcode[i]==0x0f) { // LUI
2561 if(rt1[i]) {
2562 signed char t;
2563 t=get_reg(i_regs->regmap,rt1[i]);
2564 //assert(t>=0);
2565 if(t>=0) {
2566 if(!((i_regs->isconst>>t)&1))
2567 emit_movimm(imm[i]<<16,t);
2568 }
2569 }
2570 }
2571 if(opcode[i]==0x08||opcode[i]==0x09) { // ADDI/ADDIU
2572 if(rt1[i]) {
2573 signed char s,t;
2574 t=get_reg(i_regs->regmap,rt1[i]);
2575 s=get_reg(i_regs->regmap,rs1[i]);
2576 if(rs1[i]) {
2577 //assert(t>=0);
2578 //assert(s>=0);
2579 if(t>=0) {
2580 if(!((i_regs->isconst>>t)&1)) {
2581 if(s<0) {
2582 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2583 emit_addimm(t,imm[i],t);
2584 }else{
2585 if(!((i_regs->wasconst>>s)&1))
2586 emit_addimm(s,imm[i],t);
2587 else
2588 emit_movimm(constmap[i][s]+imm[i],t);
2589 }
2590 }
2591 }
2592 } else {
2593 if(t>=0) {
2594 if(!((i_regs->isconst>>t)&1))
2595 emit_movimm(imm[i],t);
2596 }
2597 }
2598 }
2599 }
2600 if(opcode[i]==0x18||opcode[i]==0x19) { // DADDI/DADDIU
2601 if(rt1[i]) {
2602 signed char sh,sl,th,tl;
2603 th=get_reg(i_regs->regmap,rt1[i]|64);
2604 tl=get_reg(i_regs->regmap,rt1[i]);
2605 sh=get_reg(i_regs->regmap,rs1[i]|64);
2606 sl=get_reg(i_regs->regmap,rs1[i]);
2607 if(tl>=0) {
2608 if(rs1[i]) {
2609 assert(sh>=0);
2610 assert(sl>=0);
2611 if(th>=0) {
2612 emit_addimm64_32(sh,sl,imm[i],th,tl);
2613 }
2614 else {
2615 emit_addimm(sl,imm[i],tl);
2616 }
2617 } else {
2618 emit_movimm(imm[i],tl);
2619 if(th>=0) emit_movimm(((signed int)imm[i])>>31,th);
2620 }
2621 }
2622 }
2623 }
2624 else if(opcode[i]==0x0a||opcode[i]==0x0b) { // SLTI/SLTIU
2625 if(rt1[i]) {
2626 //assert(rs1[i]!=0); // r0 might be valid, but it's probably a bug
2627 signed char sh,sl,t;
2628 t=get_reg(i_regs->regmap,rt1[i]);
2629 sh=get_reg(i_regs->regmap,rs1[i]|64);
2630 sl=get_reg(i_regs->regmap,rs1[i]);
2631 //assert(t>=0);
2632 if(t>=0) {
2633 if(rs1[i]>0) {
2634 if(sh<0) assert((i_regs->was32>>rs1[i])&1);
2635 if(sh<0||((i_regs->was32>>rs1[i])&1)) {
2636 if(opcode[i]==0x0a) { // SLTI
2637 if(sl<0) {
2638 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2639 emit_slti32(t,imm[i],t);
2640 }else{
2641 emit_slti32(sl,imm[i],t);
2642 }
2643 }
2644 else { // SLTIU
2645 if(sl<0) {
2646 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2647 emit_sltiu32(t,imm[i],t);
2648 }else{
2649 emit_sltiu32(sl,imm[i],t);
2650 }
2651 }
2652 }else{ // 64-bit
2653 assert(sl>=0);
2654 if(opcode[i]==0x0a) // SLTI
2655 emit_slti64_32(sh,sl,imm[i],t);
2656 else // SLTIU
2657 emit_sltiu64_32(sh,sl,imm[i],t);
2658 }
2659 }else{
2660 // SLTI(U) with r0 is just stupid,
2661 // nonetheless examples can be found
2662 if(opcode[i]==0x0a) // SLTI
2663 if(0<imm[i]) emit_movimm(1,t);
2664 else emit_zeroreg(t);
2665 else // SLTIU
2666 {
2667 if(imm[i]) emit_movimm(1,t);
2668 else emit_zeroreg(t);
2669 }
2670 }
2671 }
2672 }
2673 }
2674 else if(opcode[i]>=0x0c&&opcode[i]<=0x0e) { // ANDI/ORI/XORI
2675 if(rt1[i]) {
2676 signed char sh,sl,th,tl;
2677 th=get_reg(i_regs->regmap,rt1[i]|64);
2678 tl=get_reg(i_regs->regmap,rt1[i]);
2679 sh=get_reg(i_regs->regmap,rs1[i]|64);
2680 sl=get_reg(i_regs->regmap,rs1[i]);
2681 if(tl>=0 && !((i_regs->isconst>>tl)&1)) {
2682 if(opcode[i]==0x0c) //ANDI
2683 {
2684 if(rs1[i]) {
2685 if(sl<0) {
2686 if(i_regs->regmap_entry[tl]!=rs1[i]) emit_loadreg(rs1[i],tl);
2687 emit_andimm(tl,imm[i],tl);
2688 }else{
2689 if(!((i_regs->wasconst>>sl)&1))
2690 emit_andimm(sl,imm[i],tl);
2691 else
2692 emit_movimm(constmap[i][sl]&imm[i],tl);
2693 }
2694 }
2695 else
2696 emit_zeroreg(tl);
2697 if(th>=0) emit_zeroreg(th);
2698 }
2699 else
2700 {
2701 if(rs1[i]) {
2702 if(sl<0) {
2703 if(i_regs->regmap_entry[tl]!=rs1[i]) emit_loadreg(rs1[i],tl);
2704 }
2705 if(th>=0) {
2706 if(sh<0) {
2707 emit_loadreg(rs1[i]|64,th);
2708 }else{
2709 emit_mov(sh,th);
2710 }
2711 }
2712 if(opcode[i]==0x0d) //ORI
2713 if(sl<0) {
2714 emit_orimm(tl,imm[i],tl);
2715 }else{
2716 if(!((i_regs->wasconst>>sl)&1))
2717 emit_orimm(sl,imm[i],tl);
2718 else
2719 emit_movimm(constmap[i][sl]|imm[i],tl);
2720 }
2721 if(opcode[i]==0x0e) //XORI
2722 if(sl<0) {
2723 emit_xorimm(tl,imm[i],tl);
2724 }else{
2725 if(!((i_regs->wasconst>>sl)&1))
2726 emit_xorimm(sl,imm[i],tl);
2727 else
2728 emit_movimm(constmap[i][sl]^imm[i],tl);
2729 }
2730 }
2731 else {
2732 emit_movimm(imm[i],tl);
2733 if(th>=0) emit_zeroreg(th);
2734 }
2735 }
2736 }
2737 }
2738 }
2739}
2740
2741void shiftimm_assemble(int i,struct regstat *i_regs)
2742{
2743 if(opcode2[i]<=0x3) // SLL/SRL/SRA
2744 {
2745 if(rt1[i]) {
2746 signed char s,t;
2747 t=get_reg(i_regs->regmap,rt1[i]);
2748 s=get_reg(i_regs->regmap,rs1[i]);
2749 //assert(t>=0);
dc49e339 2750 if(t>=0&&!((i_regs->isconst>>t)&1)){
57871462 2751 if(rs1[i]==0)
2752 {
2753 emit_zeroreg(t);
2754 }
2755 else
2756 {
2757 if(s<0&&i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2758 if(imm[i]) {
2759 if(opcode2[i]==0) // SLL
2760 {
2761 emit_shlimm(s<0?t:s,imm[i],t);
2762 }
2763 if(opcode2[i]==2) // SRL
2764 {
2765 emit_shrimm(s<0?t:s,imm[i],t);
2766 }
2767 if(opcode2[i]==3) // SRA
2768 {
2769 emit_sarimm(s<0?t:s,imm[i],t);
2770 }
2771 }else{
2772 // Shift by zero
2773 if(s>=0 && s!=t) emit_mov(s,t);
2774 }
2775 }
2776 }
2777 //emit_storereg(rt1[i],t); //DEBUG
2778 }
2779 }
2780 if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA
2781 {
2782 if(rt1[i]) {
2783 signed char sh,sl,th,tl;
2784 th=get_reg(i_regs->regmap,rt1[i]|64);
2785 tl=get_reg(i_regs->regmap,rt1[i]);
2786 sh=get_reg(i_regs->regmap,rs1[i]|64);
2787 sl=get_reg(i_regs->regmap,rs1[i]);
2788 if(tl>=0) {
2789 if(rs1[i]==0)
2790 {
2791 emit_zeroreg(tl);
2792 if(th>=0) emit_zeroreg(th);
2793 }
2794 else
2795 {
2796 assert(sl>=0);
2797 assert(sh>=0);
2798 if(imm[i]) {
2799 if(opcode2[i]==0x38) // DSLL
2800 {
2801 if(th>=0) emit_shldimm(sh,sl,imm[i],th);
2802 emit_shlimm(sl,imm[i],tl);
2803 }
2804 if(opcode2[i]==0x3a) // DSRL
2805 {
2806 emit_shrdimm(sl,sh,imm[i],tl);
2807 if(th>=0) emit_shrimm(sh,imm[i],th);
2808 }
2809 if(opcode2[i]==0x3b) // DSRA
2810 {
2811 emit_shrdimm(sl,sh,imm[i],tl);
2812 if(th>=0) emit_sarimm(sh,imm[i],th);
2813 }
2814 }else{
2815 // Shift by zero
2816 if(sl!=tl) emit_mov(sl,tl);
2817 if(th>=0&&sh!=th) emit_mov(sh,th);
2818 }
2819 }
2820 }
2821 }
2822 }
2823 if(opcode2[i]==0x3c) // DSLL32
2824 {
2825 if(rt1[i]) {
2826 signed char sl,tl,th;
2827 tl=get_reg(i_regs->regmap,rt1[i]);
2828 th=get_reg(i_regs->regmap,rt1[i]|64);
2829 sl=get_reg(i_regs->regmap,rs1[i]);
2830 if(th>=0||tl>=0){
2831 assert(tl>=0);
2832 assert(th>=0);
2833 assert(sl>=0);
2834 emit_mov(sl,th);
2835 emit_zeroreg(tl);
2836 if(imm[i]>32)
2837 {
2838 emit_shlimm(th,imm[i]&31,th);
2839 }
2840 }
2841 }
2842 }
2843 if(opcode2[i]==0x3e) // DSRL32
2844 {
2845 if(rt1[i]) {
2846 signed char sh,tl,th;
2847 tl=get_reg(i_regs->regmap,rt1[i]);
2848 th=get_reg(i_regs->regmap,rt1[i]|64);
2849 sh=get_reg(i_regs->regmap,rs1[i]|64);
2850 if(tl>=0){
2851 assert(sh>=0);
2852 emit_mov(sh,tl);
2853 if(th>=0) emit_zeroreg(th);
2854 if(imm[i]>32)
2855 {
2856 emit_shrimm(tl,imm[i]&31,tl);
2857 }
2858 }
2859 }
2860 }
2861 if(opcode2[i]==0x3f) // DSRA32
2862 {
2863 if(rt1[i]) {
2864 signed char sh,tl;
2865 tl=get_reg(i_regs->regmap,rt1[i]);
2866 sh=get_reg(i_regs->regmap,rs1[i]|64);
2867 if(tl>=0){
2868 assert(sh>=0);
2869 emit_mov(sh,tl);
2870 if(imm[i]>32)
2871 {
2872 emit_sarimm(tl,imm[i]&31,tl);
2873 }
2874 }
2875 }
2876 }
2877}
2878
2879#ifndef shift_assemble
2880void shift_assemble(int i,struct regstat *i_regs)
2881{
2882 printf("Need shift_assemble for this architecture.\n");
2883 exit(1);
2884}
2885#endif
2886
2887void load_assemble(int i,struct regstat *i_regs)
2888{
2889 int s,th,tl,addr,map=-1;
2890 int offset;
2891 int jaddr=0;
5bf843dc 2892 int memtarget=0,c=0;
b1570849 2893 int fastload_reg_override=0;
57871462 2894 u_int hr,reglist=0;
2895 th=get_reg(i_regs->regmap,rt1[i]|64);
2896 tl=get_reg(i_regs->regmap,rt1[i]);
2897 s=get_reg(i_regs->regmap,rs1[i]);
2898 offset=imm[i];
2899 for(hr=0;hr<HOST_REGS;hr++) {
2900 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2901 }
2902 if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
2903 if(s>=0) {
2904 c=(i_regs->wasconst>>s)&1;
af4ee1fe 2905 if (c) {
2906 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
2907 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
2908 }
57871462 2909 }
57871462 2910 //printf("load_assemble: c=%d\n",c);
2911 //if(c) printf("load_assemble: const=%x\n",(int)constmap[i][s]+offset);
2912 // FIXME: Even if the load is a NOP, we should check for pagefaults...
5bf843dc 2913#ifdef PCSX
f18c0f46 2914 if(tl<0&&(!c||(((u_int)constmap[i][s]+offset)>>16)==0x1f80)
2915 ||rt1[i]==0) {
5bf843dc 2916 // could be FIFO, must perform the read
f18c0f46 2917 // ||dummy read
5bf843dc 2918 assem_debug("(forced read)\n");
2919 tl=get_reg(i_regs->regmap,-1);
2920 assert(tl>=0);
5bf843dc 2921 }
f18c0f46 2922#endif
5bf843dc 2923 if(offset||s<0||c) addr=tl;
2924 else addr=s;
535d208a 2925 //if(tl<0) tl=get_reg(i_regs->regmap,-1);
2926 if(tl>=0) {
2927 //printf("load_assemble: c=%d\n",c);
2928 //if(c) printf("load_assemble: const=%x\n",(int)constmap[i][s]+offset);
2929 assert(tl>=0); // Even if the load is a NOP, we must check for pagefaults and I/O
2930 reglist&=~(1<<tl);
2931 if(th>=0) reglist&=~(1<<th);
2932 if(!using_tlb) {
2933 if(!c) {
2934 #ifdef RAM_OFFSET
2935 map=get_reg(i_regs->regmap,ROREG);
2936 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
2937 #endif
57871462 2938//#define R29_HACK 1
535d208a 2939 #ifdef R29_HACK
2940 // Strmnnrmn's speed hack
2941 if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
2942 #endif
2943 {
ffb0b9e0 2944 jaddr=emit_fastpath_cmp_jump(i,addr,&fastload_reg_override);
57871462 2945 }
535d208a 2946 }
a327ad27 2947 else if(ram_offset&&memtarget) {
2948 emit_addimm(addr,ram_offset,HOST_TEMPREG);
2949 fastload_reg_override=HOST_TEMPREG;
2950 }
535d208a 2951 }else{ // using tlb
2952 int x=0;
2953 if (opcode[i]==0x20||opcode[i]==0x24) x=3; // LB/LBU
2954 if (opcode[i]==0x21||opcode[i]==0x25) x=2; // LH/LHU
2955 map=get_reg(i_regs->regmap,TLREG);
2956 assert(map>=0);
ea3d2e6e 2957 reglist&=~(1<<map);
535d208a 2958 map=do_tlb_r(addr,tl,map,x,-1,-1,c,constmap[i][s]+offset);
2959 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
2960 }
2961 int dummy=(rt1[i]==0)||(tl!=get_reg(i_regs->regmap,rt1[i])); // ignore loads to r0 and unneeded reg
2962 if (opcode[i]==0x20) { // LB
2963 if(!c||memtarget) {
2964 if(!dummy) {
57871462 2965 #ifdef HOST_IMM_ADDR32
2966 if(c)
2967 emit_movsbl_tlb((constmap[i][s]+offset)^3,map,tl);
2968 else
2969 #endif
2970 {
2971 //emit_xorimm(addr,3,tl);
2972 //gen_tlb_addr_r(tl,map);
2973 //emit_movsbl_indexed((int)rdram-0x80000000,tl,tl);
535d208a 2974 int x=0,a=tl;
2002a1db 2975#ifdef BIG_ENDIAN_MIPS
57871462 2976 if(!c) emit_xorimm(addr,3,tl);
2977 else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
2002a1db 2978#else
535d208a 2979 if(!c) a=addr;
dadf55f2 2980#endif
b1570849 2981 if(fastload_reg_override) a=fastload_reg_override;
2982
535d208a 2983 emit_movsbl_indexed_tlb(x,a,map,tl);
57871462 2984 }
57871462 2985 }
535d208a 2986 if(jaddr)
2987 add_stub(LOADB_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
57871462 2988 }
535d208a 2989 else
2990 inline_readstub(LOADB_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2991 }
2992 if (opcode[i]==0x21) { // LH
2993 if(!c||memtarget) {
2994 if(!dummy) {
57871462 2995 #ifdef HOST_IMM_ADDR32
2996 if(c)
2997 emit_movswl_tlb((constmap[i][s]+offset)^2,map,tl);
2998 else
2999 #endif
3000 {
535d208a 3001 int x=0,a=tl;
2002a1db 3002#ifdef BIG_ENDIAN_MIPS
57871462 3003 if(!c) emit_xorimm(addr,2,tl);
3004 else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
2002a1db 3005#else
535d208a 3006 if(!c) a=addr;
dadf55f2 3007#endif
b1570849 3008 if(fastload_reg_override) a=fastload_reg_override;
57871462 3009 //#ifdef
3010 //emit_movswl_indexed_tlb(x,tl,map,tl);
3011 //else
3012 if(map>=0) {
535d208a 3013 gen_tlb_addr_r(a,map);
3014 emit_movswl_indexed(x,a,tl);
3015 }else{
a327ad27 3016 #if 1 //def RAM_OFFSET
535d208a 3017 emit_movswl_indexed(x,a,tl);
3018 #else
3019 emit_movswl_indexed((int)rdram-0x80000000+x,a,tl);
3020 #endif
3021 }
57871462 3022 }
57871462 3023 }
535d208a 3024 if(jaddr)
3025 add_stub(LOADH_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
57871462 3026 }
535d208a 3027 else
3028 inline_readstub(LOADH_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
3029 }
3030 if (opcode[i]==0x23) { // LW
3031 if(!c||memtarget) {
3032 if(!dummy) {
dadf55f2 3033 int a=addr;
b1570849 3034 if(fastload_reg_override) a=fastload_reg_override;
57871462 3035 //emit_readword_indexed((int)rdram-0x80000000,addr,tl);
3036 #ifdef HOST_IMM_ADDR32
3037 if(c)
3038 emit_readword_tlb(constmap[i][s]+offset,map,tl);
3039 else
3040 #endif
dadf55f2 3041 emit_readword_indexed_tlb(0,a,map,tl);
57871462 3042 }
535d208a 3043 if(jaddr)
3044 add_stub(LOADW_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
57871462 3045 }
535d208a 3046 else
3047 inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
3048 }
3049 if (opcode[i]==0x24) { // LBU
3050 if(!c||memtarget) {
3051 if(!dummy) {
57871462 3052 #ifdef HOST_IMM_ADDR32
3053 if(c)
3054 emit_movzbl_tlb((constmap[i][s]+offset)^3,map,tl);
3055 else
3056 #endif
3057 {
3058 //emit_xorimm(addr,3,tl);
3059 //gen_tlb_addr_r(tl,map);
3060 //emit_movzbl_indexed((int)rdram-0x80000000,tl,tl);
535d208a 3061 int x=0,a=tl;
2002a1db 3062#ifdef BIG_ENDIAN_MIPS
57871462 3063 if(!c) emit_xorimm(addr,3,tl);
3064 else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
2002a1db 3065#else
535d208a 3066 if(!c) a=addr;
dadf55f2 3067#endif
b1570849 3068 if(fastload_reg_override) a=fastload_reg_override;
3069
535d208a 3070 emit_movzbl_indexed_tlb(x,a,map,tl);
57871462 3071 }
57871462 3072 }
535d208a 3073 if(jaddr)
3074 add_stub(LOADBU_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
57871462 3075 }
535d208a 3076 else
3077 inline_readstub(LOADBU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
3078 }
3079 if (opcode[i]==0x25) { // LHU
3080 if(!c||memtarget) {
3081 if(!dummy) {
57871462 3082 #ifdef HOST_IMM_ADDR32
3083 if(c)
3084 emit_movzwl_tlb((constmap[i][s]+offset)^2,map,tl);
3085 else
3086 #endif
3087 {
535d208a 3088 int x=0,a=tl;
2002a1db 3089#ifdef BIG_ENDIAN_MIPS
57871462 3090 if(!c) emit_xorimm(addr,2,tl);
3091 else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
2002a1db 3092#else
535d208a 3093 if(!c) a=addr;
dadf55f2 3094#endif
b1570849 3095 if(fastload_reg_override) a=fastload_reg_override;
57871462 3096 //#ifdef
3097 //emit_movzwl_indexed_tlb(x,tl,map,tl);
3098 //#else
3099 if(map>=0) {
535d208a 3100 gen_tlb_addr_r(a,map);
3101 emit_movzwl_indexed(x,a,tl);
3102 }else{
a327ad27 3103 #if 1 //def RAM_OFFSET
535d208a 3104 emit_movzwl_indexed(x,a,tl);
3105 #else
3106 emit_movzwl_indexed((int)rdram-0x80000000+x,a,tl);
3107 #endif
3108 }
57871462 3109 }
3110 }
535d208a 3111 if(jaddr)
3112 add_stub(LOADHU_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
57871462 3113 }
535d208a 3114 else
3115 inline_readstub(LOADHU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
3116 }
3117 if (opcode[i]==0x27) { // LWU
3118 assert(th>=0);
3119 if(!c||memtarget) {
3120 if(!dummy) {
dadf55f2 3121 int a=addr;
b1570849 3122 if(fastload_reg_override) a=fastload_reg_override;
57871462 3123 //emit_readword_indexed((int)rdram-0x80000000,addr,tl);
3124 #ifdef HOST_IMM_ADDR32
3125 if(c)
3126 emit_readword_tlb(constmap[i][s]+offset,map,tl);
3127 else
3128 #endif
dadf55f2 3129 emit_readword_indexed_tlb(0,a,map,tl);
57871462 3130 }
535d208a 3131 if(jaddr)
3132 add_stub(LOADW_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
3133 }
3134 else {
3135 inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
57871462 3136 }
535d208a 3137 emit_zeroreg(th);
3138 }
3139 if (opcode[i]==0x37) { // LD
3140 if(!c||memtarget) {
3141 if(!dummy) {
dadf55f2 3142 int a=addr;
b1570849 3143 if(fastload_reg_override) a=fastload_reg_override;
57871462 3144 //gen_tlb_addr_r(tl,map);
3145 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,addr,th);
3146 //emit_readword_indexed((int)rdram-0x7FFFFFFC,addr,tl);
3147 #ifdef HOST_IMM_ADDR32
3148 if(c)
3149 emit_readdword_tlb(constmap[i][s]+offset,map,th,tl);
3150 else
3151 #endif
dadf55f2 3152 emit_readdword_indexed_tlb(0,a,map,th,tl);
57871462 3153 }
535d208a 3154 if(jaddr)
3155 add_stub(LOADD_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
57871462 3156 }
535d208a 3157 else
3158 inline_readstub(LOADD_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
57871462 3159 }
535d208a 3160 }
3161 //emit_storereg(rt1[i],tl); // DEBUG
57871462 3162 //if(opcode[i]==0x23)
3163 //if(opcode[i]==0x24)
3164 //if(opcode[i]==0x23||opcode[i]==0x24)
3165 /*if(opcode[i]==0x21||opcode[i]==0x23||opcode[i]==0x24)
3166 {
3167 //emit_pusha();
3168 save_regs(0x100f);
3169 emit_readword((int)&last_count,ECX);
3170 #ifdef __i386__
3171 if(get_reg(i_regs->regmap,CCREG)<0)
3172 emit_loadreg(CCREG,HOST_CCREG);
3173 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3174 emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG);
3175 emit_writeword(HOST_CCREG,(int)&Count);
3176 #endif
3177 #ifdef __arm__
3178 if(get_reg(i_regs->regmap,CCREG)<0)
3179 emit_loadreg(CCREG,0);
3180 else
3181 emit_mov(HOST_CCREG,0);
3182 emit_add(0,ECX,0);
3183 emit_addimm(0,2*ccadj[i],0);
3184 emit_writeword(0,(int)&Count);
3185 #endif
3186 emit_call((int)memdebug);
3187 //emit_popa();
3188 restore_regs(0x100f);
3189 }/**/
3190}
3191
3192#ifndef loadlr_assemble
3193void loadlr_assemble(int i,struct regstat *i_regs)
3194{
3195 printf("Need loadlr_assemble for this architecture.\n");
3196 exit(1);
3197}
3198#endif
3199
3200void store_assemble(int i,struct regstat *i_regs)
3201{
3202 int s,th,tl,map=-1;
3203 int addr,temp;
3204 int offset;
3205 int jaddr=0,jaddr2,type;
666a299d 3206 int memtarget=0,c=0;
57871462 3207 int agr=AGEN1+(i&1);
b1570849 3208 int faststore_reg_override=0;
57871462 3209 u_int hr,reglist=0;
3210 th=get_reg(i_regs->regmap,rs2[i]|64);
3211 tl=get_reg(i_regs->regmap,rs2[i]);
3212 s=get_reg(i_regs->regmap,rs1[i]);
3213 temp=get_reg(i_regs->regmap,agr);
3214 if(temp<0) temp=get_reg(i_regs->regmap,-1);
3215 offset=imm[i];
3216 if(s>=0) {
3217 c=(i_regs->wasconst>>s)&1;
af4ee1fe 3218 if(c) {
3219 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3220 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3221 }
57871462 3222 }
3223 assert(tl>=0);
3224 assert(temp>=0);
3225 for(hr=0;hr<HOST_REGS;hr++) {
3226 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3227 }
3228 if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
3229 if(offset||s<0||c) addr=temp;
3230 else addr=s;
3231 if(!using_tlb) {
3232 if(!c) {
ffb0b9e0 3233 #ifndef PCSX
57871462 3234 #ifdef R29_HACK
3235 // Strmnnrmn's speed hack
4cb76aa4 3236 if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
57871462 3237 #endif
4cb76aa4 3238 emit_cmpimm(addr,RAM_SIZE);
57871462 3239 #ifdef DESTRUCTIVE_SHIFT
3240 if(s==addr) emit_mov(s,temp);
3241 #endif
3242 #ifdef R29_HACK
dadf55f2 3243 memtarget=1;
4cb76aa4 3244 if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
57871462 3245 #endif
3246 {
3247 jaddr=(int)out;
3248 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3249 // Hint to branch predictor that the branch is unlikely to be taken
3250 if(rs1[i]>=28)
3251 emit_jno_unlikely(0);
3252 else
3253 #endif
3254 emit_jno(0);
3255 }
ffb0b9e0 3256 #else
3257 jaddr=emit_fastpath_cmp_jump(i,addr,&faststore_reg_override);
3258 #endif
57871462 3259 }
a327ad27 3260 else if(ram_offset&&memtarget) {
3261 emit_addimm(addr,ram_offset,HOST_TEMPREG);
3262 faststore_reg_override=HOST_TEMPREG;
3263 }
57871462 3264 }else{ // using tlb
3265 int x=0;
3266 if (opcode[i]==0x28) x=3; // SB
3267 if (opcode[i]==0x29) x=2; // SH
3268 map=get_reg(i_regs->regmap,TLREG);
3269 assert(map>=0);
ea3d2e6e 3270 reglist&=~(1<<map);
57871462 3271 map=do_tlb_w(addr,temp,map,x,c,constmap[i][s]+offset);
3272 do_tlb_w_branch(map,c,constmap[i][s]+offset,&jaddr);
3273 }
3274
3275 if (opcode[i]==0x28) { // SB
3276 if(!c||memtarget) {
97a238a6 3277 int x=0,a=temp;
2002a1db 3278#ifdef BIG_ENDIAN_MIPS
57871462 3279 if(!c) emit_xorimm(addr,3,temp);
3280 else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
2002a1db 3281#else
97a238a6 3282 if(!c) a=addr;
dadf55f2 3283#endif
b1570849 3284 if(faststore_reg_override) a=faststore_reg_override;
57871462 3285 //gen_tlb_addr_w(temp,map);
3286 //emit_writebyte_indexed(tl,(int)rdram-0x80000000,temp);
97a238a6 3287 emit_writebyte_indexed_tlb(tl,x,a,map,a);
57871462 3288 }
3289 type=STOREB_STUB;
3290 }
3291 if (opcode[i]==0x29) { // SH
3292 if(!c||memtarget) {
97a238a6 3293 int x=0,a=temp;
2002a1db 3294#ifdef BIG_ENDIAN_MIPS
57871462 3295 if(!c) emit_xorimm(addr,2,temp);
3296 else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
2002a1db 3297#else
97a238a6 3298 if(!c) a=addr;
dadf55f2 3299#endif
b1570849 3300 if(faststore_reg_override) a=faststore_reg_override;
57871462 3301 //#ifdef
3302 //emit_writehword_indexed_tlb(tl,x,temp,map,temp);
3303 //#else
3304 if(map>=0) {
97a238a6 3305 gen_tlb_addr_w(a,map);
3306 emit_writehword_indexed(tl,x,a);
57871462 3307 }else
a327ad27 3308 //emit_writehword_indexed(tl,(int)rdram-0x80000000+x,a);
3309 emit_writehword_indexed(tl,x,a);
57871462 3310 }
3311 type=STOREH_STUB;
3312 }
3313 if (opcode[i]==0x2B) { // SW
dadf55f2 3314 if(!c||memtarget) {
3315 int a=addr;
b1570849 3316 if(faststore_reg_override) a=faststore_reg_override;
57871462 3317 //emit_writeword_indexed(tl,(int)rdram-0x80000000,addr);
dadf55f2 3318 emit_writeword_indexed_tlb(tl,0,a,map,temp);
3319 }
57871462 3320 type=STOREW_STUB;
3321 }
3322 if (opcode[i]==0x3F) { // SD
3323 if(!c||memtarget) {
dadf55f2 3324 int a=addr;
b1570849 3325 if(faststore_reg_override) a=faststore_reg_override;
57871462 3326 if(rs2[i]) {
3327 assert(th>=0);
3328 //emit_writeword_indexed(th,(int)rdram-0x80000000,addr);
3329 //emit_writeword_indexed(tl,(int)rdram-0x7FFFFFFC,addr);
dadf55f2 3330 emit_writedword_indexed_tlb(th,tl,0,a,map,temp);
57871462 3331 }else{
3332 // Store zero
3333 //emit_writeword_indexed(tl,(int)rdram-0x80000000,temp);
3334 //emit_writeword_indexed(tl,(int)rdram-0x7FFFFFFC,temp);
dadf55f2 3335 emit_writedword_indexed_tlb(tl,tl,0,a,map,temp);
57871462 3336 }
3337 }
3338 type=STORED_STUB;
3339 }
b96d3df7 3340#ifdef PCSX
3341 if(jaddr) {
3342 // PCSX store handlers don't check invcode again
3343 reglist|=1<<addr;
3344 add_stub(type,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
3345 jaddr=0;
3346 }
3347#endif
0ff8c62c 3348 if(!using_tlb&&!(i_regs->waswritten&(1<<rs1[i]))&&!(new_dynarec_hacks&NDHACK_NO_SMC_CHECK)) {
57871462 3349 if(!c||memtarget) {
3350 #ifdef DESTRUCTIVE_SHIFT
3351 // The x86 shift operation is 'destructive'; it overwrites the
3352 // source register, so we need to make a copy first and use that.
3353 addr=temp;
3354 #endif
3355 #if defined(HOST_IMM8)
3356 int ir=get_reg(i_regs->regmap,INVCP);
3357 assert(ir>=0);
3358 emit_cmpmem_indexedsr12_reg(ir,addr,1);
3359 #else
3360 emit_cmpmem_indexedsr12_imm((int)invalid_code,addr,1);
3361 #endif
0bbd1454 3362 #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT)
3363 emit_callne(invalidate_addr_reg[addr]);
3364 #else
57871462 3365 jaddr2=(int)out;
3366 emit_jne(0);
3367 add_stub(INVCODE_STUB,jaddr2,(int)out,reglist|(1<<HOST_CCREG),addr,0,0,0);
0bbd1454 3368 #endif
57871462 3369 }
3370 }
7a518516 3371 u_int addr_val=constmap[i][s]+offset;
3eaa7048 3372 if(jaddr) {
3373 add_stub(type,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
3374 } else if(c&&!memtarget) {
7a518516 3375 inline_writestub(type,i,addr_val,i_regs->regmap,rs2[i],ccadj[i],reglist);
3376 }
3377 // basic current block modification detection..
3378 // not looking back as that should be in mips cache already
3379 if(c&&start+i*4<addr_val&&addr_val<start+slen*4) {
c43b5311 3380 SysPrintf("write to %08x hits block %08x, pc=%08x\n",addr_val,start,start+i*4);
7a518516 3381 assert(i_regs->regmap==regs[i].regmap); // not delay slot
3382 if(i_regs->regmap==regs[i].regmap) {
3383 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3384 wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3385 emit_movimm(start+i*4+4,0);
3386 emit_writeword(0,(int)&pcaddr);
3387 emit_jmp((int)do_interrupt);
3388 }
3eaa7048 3389 }
57871462 3390 //if(opcode[i]==0x2B || opcode[i]==0x3F)
3391 //if(opcode[i]==0x2B || opcode[i]==0x28)
3392 //if(opcode[i]==0x2B || opcode[i]==0x29)
3393 //if(opcode[i]==0x2B)
3394 /*if(opcode[i]==0x2B || opcode[i]==0x28 || opcode[i]==0x29 || opcode[i]==0x3F)
3395 {
28d74ee8 3396 #ifdef __i386__
3397 emit_pusha();
3398 #endif
3399 #ifdef __arm__
57871462 3400 save_regs(0x100f);
28d74ee8 3401 #endif
57871462 3402 emit_readword((int)&last_count,ECX);
3403 #ifdef __i386__
3404 if(get_reg(i_regs->regmap,CCREG)<0)
3405 emit_loadreg(CCREG,HOST_CCREG);
3406 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3407 emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG);
3408 emit_writeword(HOST_CCREG,(int)&Count);
3409 #endif
3410 #ifdef __arm__
3411 if(get_reg(i_regs->regmap,CCREG)<0)
3412 emit_loadreg(CCREG,0);
3413 else
3414 emit_mov(HOST_CCREG,0);
3415 emit_add(0,ECX,0);
3416 emit_addimm(0,2*ccadj[i],0);
3417 emit_writeword(0,(int)&Count);
3418 #endif
3419 emit_call((int)memdebug);
28d74ee8 3420 #ifdef __i386__
3421 emit_popa();
3422 #endif
3423 #ifdef __arm__
57871462 3424 restore_regs(0x100f);
28d74ee8 3425 #endif
57871462 3426 }/**/
3427}
3428
3429void storelr_assemble(int i,struct regstat *i_regs)
3430{
3431 int s,th,tl;
3432 int temp;
3433 int temp2;
3434 int offset;
3435 int jaddr=0,jaddr2;
3436 int case1,case2,case3;
3437 int done0,done1,done2;
af4ee1fe 3438 int memtarget=0,c=0;
fab5d06d 3439 int agr=AGEN1+(i&1);
57871462 3440 u_int hr,reglist=0;
3441 th=get_reg(i_regs->regmap,rs2[i]|64);
3442 tl=get_reg(i_regs->regmap,rs2[i]);
3443 s=get_reg(i_regs->regmap,rs1[i]);
fab5d06d 3444 temp=get_reg(i_regs->regmap,agr);
3445 if(temp<0) temp=get_reg(i_regs->regmap,-1);
57871462 3446 offset=imm[i];
3447 if(s>=0) {
3448 c=(i_regs->isconst>>s)&1;
af4ee1fe 3449 if(c) {
3450 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3451 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
3452 }
57871462 3453 }
3454 assert(tl>=0);
3455 for(hr=0;hr<HOST_REGS;hr++) {
3456 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3457 }
535d208a 3458 assert(temp>=0);
3459 if(!using_tlb) {
3460 if(!c) {
3461 emit_cmpimm(s<0||offset?temp:s,RAM_SIZE);
3462 if(!offset&&s!=temp) emit_mov(s,temp);
3463 jaddr=(int)out;
3464 emit_jno(0);
3465 }
3466 else
3467 {
3468 if(!memtarget||!rs1[i]) {
57871462 3469 jaddr=(int)out;
3470 emit_jmp(0);
3471 }
57871462 3472 }
535d208a 3473 #ifdef RAM_OFFSET
3474 int map=get_reg(i_regs->regmap,ROREG);
3475 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3476 gen_tlb_addr_w(temp,map);
3477 #else
3478 if((u_int)rdram!=0x80000000)
3479 emit_addimm_no_flags((u_int)rdram-(u_int)0x80000000,temp);
3480 #endif
3481 }else{ // using tlb
3482 int map=get_reg(i_regs->regmap,TLREG);
3483 assert(map>=0);
ea3d2e6e 3484 reglist&=~(1<<map);
535d208a 3485 map=do_tlb_w(c||s<0||offset?temp:s,temp,map,0,c,constmap[i][s]+offset);
3486 if(!c&&!offset&&s>=0) emit_mov(s,temp);
3487 do_tlb_w_branch(map,c,constmap[i][s]+offset,&jaddr);
3488 if(!jaddr&&!memtarget) {
3489 jaddr=(int)out;
3490 emit_jmp(0);
57871462 3491 }
535d208a 3492 gen_tlb_addr_w(temp,map);
3493 }
3494
3495 if (opcode[i]==0x2C||opcode[i]==0x2D) { // SDL/SDR
3496 temp2=get_reg(i_regs->regmap,FTEMP);
3497 if(!rs2[i]) temp2=th=tl;
3498 }
57871462 3499
2002a1db 3500#ifndef BIG_ENDIAN_MIPS
3501 emit_xorimm(temp,3,temp);
3502#endif
535d208a 3503 emit_testimm(temp,2);
3504 case2=(int)out;
3505 emit_jne(0);
3506 emit_testimm(temp,1);
3507 case1=(int)out;
3508 emit_jne(0);
3509 // 0
3510 if (opcode[i]==0x2A) { // SWL
3511 emit_writeword_indexed(tl,0,temp);
3512 }
3513 if (opcode[i]==0x2E) { // SWR
3514 emit_writebyte_indexed(tl,3,temp);
3515 }
3516 if (opcode[i]==0x2C) { // SDL
3517 emit_writeword_indexed(th,0,temp);
3518 if(rs2[i]) emit_mov(tl,temp2);
3519 }
3520 if (opcode[i]==0x2D) { // SDR
3521 emit_writebyte_indexed(tl,3,temp);
3522 if(rs2[i]) emit_shldimm(th,tl,24,temp2);
3523 }
3524 done0=(int)out;
3525 emit_jmp(0);
3526 // 1
3527 set_jump_target(case1,(int)out);
3528 if (opcode[i]==0x2A) { // SWL
3529 // Write 3 msb into three least significant bytes
3530 if(rs2[i]) emit_rorimm(tl,8,tl);
3531 emit_writehword_indexed(tl,-1,temp);
3532 if(rs2[i]) emit_rorimm(tl,16,tl);
3533 emit_writebyte_indexed(tl,1,temp);
3534 if(rs2[i]) emit_rorimm(tl,8,tl);
3535 }
3536 if (opcode[i]==0x2E) { // SWR
3537 // Write two lsb into two most significant bytes
3538 emit_writehword_indexed(tl,1,temp);
3539 }
3540 if (opcode[i]==0x2C) { // SDL
3541 if(rs2[i]) emit_shrdimm(tl,th,8,temp2);
3542 // Write 3 msb into three least significant bytes
3543 if(rs2[i]) emit_rorimm(th,8,th);
3544 emit_writehword_indexed(th,-1,temp);
3545 if(rs2[i]) emit_rorimm(th,16,th);
3546 emit_writebyte_indexed(th,1,temp);
3547 if(rs2[i]) emit_rorimm(th,8,th);
3548 }
3549 if (opcode[i]==0x2D) { // SDR
3550 if(rs2[i]) emit_shldimm(th,tl,16,temp2);
3551 // Write two lsb into two most significant bytes
3552 emit_writehword_indexed(tl,1,temp);
3553 }
3554 done1=(int)out;
3555 emit_jmp(0);
3556 // 2
3557 set_jump_target(case2,(int)out);
3558 emit_testimm(temp,1);
3559 case3=(int)out;
3560 emit_jne(0);
3561 if (opcode[i]==0x2A) { // SWL
3562 // Write two msb into two least significant bytes
3563 if(rs2[i]) emit_rorimm(tl,16,tl);
3564 emit_writehword_indexed(tl,-2,temp);
3565 if(rs2[i]) emit_rorimm(tl,16,tl);
3566 }
3567 if (opcode[i]==0x2E) { // SWR
3568 // Write 3 lsb into three most significant bytes
3569 emit_writebyte_indexed(tl,-1,temp);
3570 if(rs2[i]) emit_rorimm(tl,8,tl);
3571 emit_writehword_indexed(tl,0,temp);
3572 if(rs2[i]) emit_rorimm(tl,24,tl);
3573 }
3574 if (opcode[i]==0x2C) { // SDL
3575 if(rs2[i]) emit_shrdimm(tl,th,16,temp2);
3576 // Write two msb into two least significant bytes
3577 if(rs2[i]) emit_rorimm(th,16,th);
3578 emit_writehword_indexed(th,-2,temp);
3579 if(rs2[i]) emit_rorimm(th,16,th);
3580 }
3581 if (opcode[i]==0x2D) { // SDR
3582 if(rs2[i]) emit_shldimm(th,tl,8,temp2);
3583 // Write 3 lsb into three most significant bytes
3584 emit_writebyte_indexed(tl,-1,temp);
3585 if(rs2[i]) emit_rorimm(tl,8,tl);
3586 emit_writehword_indexed(tl,0,temp);
3587 if(rs2[i]) emit_rorimm(tl,24,tl);
3588 }
3589 done2=(int)out;
3590 emit_jmp(0);
3591 // 3
3592 set_jump_target(case3,(int)out);
3593 if (opcode[i]==0x2A) { // SWL
3594 // Write msb into least significant byte
3595 if(rs2[i]) emit_rorimm(tl,24,tl);
3596 emit_writebyte_indexed(tl,-3,temp);
3597 if(rs2[i]) emit_rorimm(tl,8,tl);
3598 }
3599 if (opcode[i]==0x2E) { // SWR
3600 // Write entire word
3601 emit_writeword_indexed(tl,-3,temp);
3602 }
3603 if (opcode[i]==0x2C) { // SDL
3604 if(rs2[i]) emit_shrdimm(tl,th,24,temp2);
3605 // Write msb into least significant byte
3606 if(rs2[i]) emit_rorimm(th,24,th);
3607 emit_writebyte_indexed(th,-3,temp);
3608 if(rs2[i]) emit_rorimm(th,8,th);
3609 }
3610 if (opcode[i]==0x2D) { // SDR
3611 if(rs2[i]) emit_mov(th,temp2);
3612 // Write entire word
3613 emit_writeword_indexed(tl,-3,temp);
3614 }
3615 set_jump_target(done0,(int)out);
3616 set_jump_target(done1,(int)out);
3617 set_jump_target(done2,(int)out);
3618 if (opcode[i]==0x2C) { // SDL
3619 emit_testimm(temp,4);
57871462 3620 done0=(int)out;
57871462 3621 emit_jne(0);
535d208a 3622 emit_andimm(temp,~3,temp);
3623 emit_writeword_indexed(temp2,4,temp);
3624 set_jump_target(done0,(int)out);
3625 }
3626 if (opcode[i]==0x2D) { // SDR
3627 emit_testimm(temp,4);
3628 done0=(int)out;
3629 emit_jeq(0);
3630 emit_andimm(temp,~3,temp);
3631 emit_writeword_indexed(temp2,-4,temp);
57871462 3632 set_jump_target(done0,(int)out);
57871462 3633 }
535d208a 3634 if(!c||!memtarget)
3635 add_stub(STORELR_STUB,jaddr,(int)out,i,(int)i_regs,temp,ccadj[i],reglist);
0ff8c62c 3636 if(!using_tlb&&!(i_regs->waswritten&(1<<rs1[i]))&&!(new_dynarec_hacks&NDHACK_NO_SMC_CHECK)) {
535d208a 3637 #ifdef RAM_OFFSET
3638 int map=get_reg(i_regs->regmap,ROREG);
3639 if(map<0) map=HOST_TEMPREG;
3640 gen_orig_addr_w(temp,map);
3641 #else
57871462 3642 emit_addimm_no_flags((u_int)0x80000000-(u_int)rdram,temp);
535d208a 3643 #endif
57871462 3644 #if defined(HOST_IMM8)
3645 int ir=get_reg(i_regs->regmap,INVCP);
3646 assert(ir>=0);
3647 emit_cmpmem_indexedsr12_reg(ir,temp,1);
3648 #else
3649 emit_cmpmem_indexedsr12_imm((int)invalid_code,temp,1);
3650 #endif
535d208a 3651 #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT)
3652 emit_callne(invalidate_addr_reg[temp]);
3653 #else
57871462 3654 jaddr2=(int)out;
3655 emit_jne(0);
3656 add_stub(INVCODE_STUB,jaddr2,(int)out,reglist|(1<<HOST_CCREG),temp,0,0,0);
535d208a 3657 #endif
57871462 3658 }
3659 /*
3660 emit_pusha();
3661 //save_regs(0x100f);
3662 emit_readword((int)&last_count,ECX);
3663 if(get_reg(i_regs->regmap,CCREG)<0)
3664 emit_loadreg(CCREG,HOST_CCREG);
3665 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3666 emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG);
3667 emit_writeword(HOST_CCREG,(int)&Count);
3668 emit_call((int)memdebug);
3669 emit_popa();
3670 //restore_regs(0x100f);
3671 /**/
3672}
3673
3674void c1ls_assemble(int i,struct regstat *i_regs)
3675{
3d624f89 3676#ifndef DISABLE_COP1
57871462 3677 int s,th,tl;
3678 int temp,ar;
3679 int map=-1;
3680 int offset;
3681 int c=0;
3682 int jaddr,jaddr2=0,jaddr3,type;
3683 int agr=AGEN1+(i&1);
3684 u_int hr,reglist=0;
3685 th=get_reg(i_regs->regmap,FTEMP|64);
3686 tl=get_reg(i_regs->regmap,FTEMP);
3687 s=get_reg(i_regs->regmap,rs1[i]);
3688 temp=get_reg(i_regs->regmap,agr);
3689 if(temp<0) temp=get_reg(i_regs->regmap,-1);
3690 offset=imm[i];
3691 assert(tl>=0);
3692 assert(rs1[i]>0);
3693 assert(temp>=0);
3694 for(hr=0;hr<HOST_REGS;hr++) {
3695 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3696 }
3697 if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
3698 if (opcode[i]==0x31||opcode[i]==0x35) // LWC1/LDC1
3699 {
3700 // Loads use a temporary register which we need to save
3701 reglist|=1<<temp;
3702 }
3703 if (opcode[i]==0x39||opcode[i]==0x3D) // SWC1/SDC1
3704 ar=temp;
3705 else // LWC1/LDC1
3706 ar=tl;
3707 //if(s<0) emit_loadreg(rs1[i],ar); //address_generation does this now
3708 //else c=(i_regs->wasconst>>s)&1;
3709 if(s>=0) c=(i_regs->wasconst>>s)&1;
3710 // Check cop1 unusable
3711 if(!cop1_usable) {
3712 signed char rs=get_reg(i_regs->regmap,CSREG);
3713 assert(rs>=0);
3714 emit_testimm(rs,0x20000000);
3715 jaddr=(int)out;
3716 emit_jeq(0);
3717 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
3718 cop1_usable=1;
3719 }
3720 if (opcode[i]==0x39) { // SWC1 (get float address)
3721 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],tl);
3722 }
3723 if (opcode[i]==0x3D) { // SDC1 (get double address)
3724 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],tl);
3725 }
3726 // Generate address + offset
3727 if(!using_tlb) {
3728 if(!c)
4cb76aa4 3729 emit_cmpimm(offset||c||s<0?ar:s,RAM_SIZE);
57871462 3730 }
3731 else
3732 {
3733 map=get_reg(i_regs->regmap,TLREG);
3734 assert(map>=0);
ea3d2e6e 3735 reglist&=~(1<<map);
57871462 3736 if (opcode[i]==0x31||opcode[i]==0x35) { // LWC1/LDC1
3737 map=do_tlb_r(offset||c||s<0?ar:s,ar,map,0,-1,-1,c,constmap[i][s]+offset);
3738 }
3739 if (opcode[i]==0x39||opcode[i]==0x3D) { // SWC1/SDC1
3740 map=do_tlb_w(offset||c||s<0?ar:s,ar,map,0,c,constmap[i][s]+offset);
3741 }
3742 }
3743 if (opcode[i]==0x39) { // SWC1 (read float)
3744 emit_readword_indexed(0,tl,tl);
3745 }
3746 if (opcode[i]==0x3D) { // SDC1 (read double)
3747 emit_readword_indexed(4,tl,th);
3748 emit_readword_indexed(0,tl,tl);
3749 }
3750 if (opcode[i]==0x31) { // LWC1 (get target address)
3751 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],temp);
3752 }
3753 if (opcode[i]==0x35) { // LDC1 (get target address)
3754 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],temp);
3755 }
3756 if(!using_tlb) {
3757 if(!c) {
3758 jaddr2=(int)out;
3759 emit_jno(0);
3760 }
4cb76aa4 3761 else if(((signed int)(constmap[i][s]+offset))>=(signed int)0x80000000+RAM_SIZE) {
57871462 3762 jaddr2=(int)out;
3763 emit_jmp(0); // inline_readstub/inline_writestub? Very rare case
3764 }
3765 #ifdef DESTRUCTIVE_SHIFT
3766 if (opcode[i]==0x39||opcode[i]==0x3D) { // SWC1/SDC1
3767 if(!offset&&!c&&s>=0) emit_mov(s,ar);
3768 }
3769 #endif
3770 }else{
3771 if (opcode[i]==0x31||opcode[i]==0x35) { // LWC1/LDC1
3772 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr2);
3773 }
3774 if (opcode[i]==0x39||opcode[i]==0x3D) { // SWC1/SDC1
3775 do_tlb_w_branch(map,c,constmap[i][s]+offset,&jaddr2);
3776 }
3777 }
3778 if (opcode[i]==0x31) { // LWC1
3779 //if(s>=0&&!c&&!offset) emit_mov(s,tl);
3780 //gen_tlb_addr_r(ar,map);
3781 //emit_readword_indexed((int)rdram-0x80000000,tl,tl);
3782 #ifdef HOST_IMM_ADDR32
3783 if(c) emit_readword_tlb(constmap[i][s]+offset,map,tl);
3784 else
3785 #endif
3786 emit_readword_indexed_tlb(0,offset||c||s<0?tl:s,map,tl);
3787 type=LOADW_STUB;
3788 }
3789 if (opcode[i]==0x35) { // LDC1
3790 assert(th>=0);
3791 //if(s>=0&&!c&&!offset) emit_mov(s,tl);
3792 //gen_tlb_addr_r(ar,map);
3793 //emit_readword_indexed((int)rdram-0x80000000,tl,th);
3794 //emit_readword_indexed((int)rdram-0x7FFFFFFC,tl,tl);
3795 #ifdef HOST_IMM_ADDR32
3796 if(c) emit_readdword_tlb(constmap[i][s]+offset,map,th,tl);
3797 else
3798 #endif
3799 emit_readdword_indexed_tlb(0,offset||c||s<0?tl:s,map,th,tl);
3800 type=LOADD_STUB;
3801 }
3802 if (opcode[i]==0x39) { // SWC1
3803 //emit_writeword_indexed(tl,(int)rdram-0x80000000,temp);
3804 emit_writeword_indexed_tlb(tl,0,offset||c||s<0?temp:s,map,temp);
3805 type=STOREW_STUB;
3806 }
3807 if (opcode[i]==0x3D) { // SDC1
3808 assert(th>=0);
3809 //emit_writeword_indexed(th,(int)rdram-0x80000000,temp);
3810 //emit_writeword_indexed(tl,(int)rdram-0x7FFFFFFC,temp);
3811 emit_writedword_indexed_tlb(th,tl,0,offset||c||s<0?temp:s,map,temp);
3812 type=STORED_STUB;
3813 }
0ff8c62c 3814 if(!using_tlb&&!(i_regs->waswritten&(1<<rs1[i]))&&!(new_dynarec_hacks&NDHACK_NO_SMC_CHECK)) {
57871462 3815 if (opcode[i]==0x39||opcode[i]==0x3D) { // SWC1/SDC1
3816 #ifndef DESTRUCTIVE_SHIFT
3817 temp=offset||c||s<0?ar:s;
3818 #endif
3819 #if defined(HOST_IMM8)
3820 int ir=get_reg(i_regs->regmap,INVCP);
3821 assert(ir>=0);
3822 emit_cmpmem_indexedsr12_reg(ir,temp,1);
3823 #else
3824 emit_cmpmem_indexedsr12_imm((int)invalid_code,temp,1);
3825 #endif
0bbd1454 3826 #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT)
3827 emit_callne(invalidate_addr_reg[temp]);
3828 #else
57871462 3829 jaddr3=(int)out;
3830 emit_jne(0);
3831 add_stub(INVCODE_STUB,jaddr3,(int)out,reglist|(1<<HOST_CCREG),temp,0,0,0);
0bbd1454 3832 #endif
57871462 3833 }
3834 }
3835 if(jaddr2) add_stub(type,jaddr2,(int)out,i,offset||c||s<0?ar:s,(int)i_regs,ccadj[i],reglist);
3836 if (opcode[i]==0x31) { // LWC1 (write float)
3837 emit_writeword_indexed(tl,0,temp);
3838 }
3839 if (opcode[i]==0x35) { // LDC1 (write double)
3840 emit_writeword_indexed(th,4,temp);
3841 emit_writeword_indexed(tl,0,temp);
3842 }
3843 //if(opcode[i]==0x39)
3844 /*if(opcode[i]==0x39||opcode[i]==0x31)
3845 {
3846 emit_pusha();
3847 emit_readword((int)&last_count,ECX);
3848 if(get_reg(i_regs->regmap,CCREG)<0)
3849 emit_loadreg(CCREG,HOST_CCREG);
3850 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3851 emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG);
3852 emit_writeword(HOST_CCREG,(int)&Count);
3853 emit_call((int)memdebug);
3854 emit_popa();
3855 }/**/
3d624f89 3856#else
3857 cop1_unusable(i, i_regs);
3858#endif
57871462 3859}
3860
b9b61529 3861void c2ls_assemble(int i,struct regstat *i_regs)
3862{
3863 int s,tl;
3864 int ar;
3865 int offset;
1fd1aceb 3866 int memtarget=0,c=0;
c2e3bd42 3867 int jaddr2=0,jaddr3,type;
b9b61529 3868 int agr=AGEN1+(i&1);
ffb0b9e0 3869 int fastio_reg_override=0;
b9b61529 3870 u_int hr,reglist=0;
3871 u_int copr=(source[i]>>16)&0x1f;
3872 s=get_reg(i_regs->regmap,rs1[i]);
3873 tl=get_reg(i_regs->regmap,FTEMP);
3874 offset=imm[i];
3875 assert(rs1[i]>0);
3876 assert(tl>=0);
3877 assert(!using_tlb);
3878
3879 for(hr=0;hr<HOST_REGS;hr++) {
3880 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3881 }
3882 if(i_regs->regmap[HOST_CCREG]==CCREG)
3883 reglist&=~(1<<HOST_CCREG);
3884
3885 // get the address
3886 if (opcode[i]==0x3a) { // SWC2
3887 ar=get_reg(i_regs->regmap,agr);
3888 if(ar<0) ar=get_reg(i_regs->regmap,-1);
3889 reglist|=1<<ar;
3890 } else { // LWC2
3891 ar=tl;
3892 }
1fd1aceb 3893 if(s>=0) c=(i_regs->wasconst>>s)&1;
3894 memtarget=c&&(((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE);
b9b61529 3895 if (!offset&&!c&&s>=0) ar=s;
3896 assert(ar>=0);
3897
3898 if (opcode[i]==0x3a) { // SWC2
3899 cop2_get_dreg(copr,tl,HOST_TEMPREG);
1fd1aceb 3900 type=STOREW_STUB;
b9b61529 3901 }
1fd1aceb 3902 else
b9b61529 3903 type=LOADW_STUB;
1fd1aceb 3904
3905 if(c&&!memtarget) {
3906 jaddr2=(int)out;
3907 emit_jmp(0); // inline_readstub/inline_writestub?
b9b61529 3908 }
1fd1aceb 3909 else {
3910 if(!c) {
ffb0b9e0 3911 jaddr2=emit_fastpath_cmp_jump(i,ar,&fastio_reg_override);
1fd1aceb 3912 }
a327ad27 3913 else if(ram_offset&&memtarget) {
3914 emit_addimm(ar,ram_offset,HOST_TEMPREG);
3915 fastio_reg_override=HOST_TEMPREG;
3916 }
1fd1aceb 3917 if (opcode[i]==0x32) { // LWC2
3918 #ifdef HOST_IMM_ADDR32
3919 if(c) emit_readword_tlb(constmap[i][s]+offset,-1,tl);
3920 else
3921 #endif
ffb0b9e0 3922 int a=ar;
3923 if(fastio_reg_override) a=fastio_reg_override;
3924 emit_readword_indexed(0,a,tl);
1fd1aceb 3925 }
3926 if (opcode[i]==0x3a) { // SWC2
3927 #ifdef DESTRUCTIVE_SHIFT
3928 if(!offset&&!c&&s>=0) emit_mov(s,ar);
3929 #endif
ffb0b9e0 3930 int a=ar;
3931 if(fastio_reg_override) a=fastio_reg_override;
3932 emit_writeword_indexed(tl,0,a);
1fd1aceb 3933 }
b9b61529 3934 }
3935 if(jaddr2)
3936 add_stub(type,jaddr2,(int)out,i,ar,(int)i_regs,ccadj[i],reglist);
0ff8c62c 3937 if(opcode[i]==0x3a) // SWC2
3938 if(!(i_regs->waswritten&(1<<rs1[i]))&&!(new_dynarec_hacks&NDHACK_NO_SMC_CHECK)) {
b9b61529 3939#if defined(HOST_IMM8)
3940 int ir=get_reg(i_regs->regmap,INVCP);
3941 assert(ir>=0);
3942 emit_cmpmem_indexedsr12_reg(ir,ar,1);
3943#else
3944 emit_cmpmem_indexedsr12_imm((int)invalid_code,ar,1);
3945#endif
0bbd1454 3946 #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT)
3947 emit_callne(invalidate_addr_reg[ar]);
3948 #else
b9b61529 3949 jaddr3=(int)out;
3950 emit_jne(0);
3951 add_stub(INVCODE_STUB,jaddr3,(int)out,reglist|(1<<HOST_CCREG),ar,0,0,0);
0bbd1454 3952 #endif
b9b61529 3953 }
3954 if (opcode[i]==0x32) { // LWC2
3955 cop2_put_dreg(copr,tl,HOST_TEMPREG);
3956 }
3957}
3958
57871462 3959#ifndef multdiv_assemble
3960void multdiv_assemble(int i,struct regstat *i_regs)
3961{
3962 printf("Need multdiv_assemble for this architecture.\n");
3963 exit(1);
3964}
3965#endif
3966
3967void mov_assemble(int i,struct regstat *i_regs)
3968{
3969 //if(opcode2[i]==0x10||opcode2[i]==0x12) { // MFHI/MFLO
3970 //if(opcode2[i]==0x11||opcode2[i]==0x13) { // MTHI/MTLO
57871462 3971 if(rt1[i]) {
3972 signed char sh,sl,th,tl;
3973 th=get_reg(i_regs->regmap,rt1[i]|64);
3974 tl=get_reg(i_regs->regmap,rt1[i]);
3975 //assert(tl>=0);
3976 if(tl>=0) {
3977 sh=get_reg(i_regs->regmap,rs1[i]|64);
3978 sl=get_reg(i_regs->regmap,rs1[i]);
3979 if(sl>=0) emit_mov(sl,tl);
3980 else emit_loadreg(rs1[i],tl);
3981 if(th>=0) {
3982 if(sh>=0) emit_mov(sh,th);
3983 else emit_loadreg(rs1[i]|64,th);
3984 }
3985 }
3986 }
3987}
3988
3989#ifndef fconv_assemble
3990void fconv_assemble(int i,struct regstat *i_regs)
3991{
3992 printf("Need fconv_assemble for this architecture.\n");
3993 exit(1);
3994}
3995#endif
3996
3997#if 0
3998void float_assemble(int i,struct regstat *i_regs)
3999{
4000 printf("Need float_assemble for this architecture.\n");
4001 exit(1);
4002}
4003#endif
4004
4005void syscall_assemble(int i,struct regstat *i_regs)
4006{
4007 signed char ccreg=get_reg(i_regs->regmap,CCREG);
4008 assert(ccreg==HOST_CCREG);
4009 assert(!is_delayslot);
4010 emit_movimm(start+i*4,EAX); // Get PC
2573466a 4011 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
7139f3c8 4012 emit_jmp((int)jump_syscall_hle); // XXX
4013}
4014
4015void hlecall_assemble(int i,struct regstat *i_regs)
4016{
4017 signed char ccreg=get_reg(i_regs->regmap,CCREG);
4018 assert(ccreg==HOST_CCREG);
4019 assert(!is_delayslot);
4020 emit_movimm(start+i*4+4,0); // Get PC
67ba0fb4 4021 emit_movimm((int)psxHLEt[source[i]&7],1);
2573466a 4022 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // XXX
67ba0fb4 4023 emit_jmp((int)jump_hlecall);
57871462 4024}
4025
1e973cb0 4026void intcall_assemble(int i,struct regstat *i_regs)
4027{
4028 signed char ccreg=get_reg(i_regs->regmap,CCREG);
4029 assert(ccreg==HOST_CCREG);
4030 assert(!is_delayslot);
4031 emit_movimm(start+i*4,0); // Get PC
2573466a 4032 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
1e973cb0 4033 emit_jmp((int)jump_intcall);
4034}
4035
57871462 4036void ds_assemble(int i,struct regstat *i_regs)
4037{
ffb0b9e0 4038 speculate_register_values(i);
57871462 4039 is_delayslot=1;
4040 switch(itype[i]) {
4041 case ALU:
4042 alu_assemble(i,i_regs);break;
4043 case IMM16:
4044 imm16_assemble(i,i_regs);break;
4045 case SHIFT:
4046 shift_assemble(i,i_regs);break;
4047 case SHIFTIMM:
4048 shiftimm_assemble(i,i_regs);break;
4049 case LOAD:
4050 load_assemble(i,i_regs);break;
4051 case LOADLR:
4052 loadlr_assemble(i,i_regs);break;
4053 case STORE:
4054 store_assemble(i,i_regs);break;
4055 case STORELR:
4056 storelr_assemble(i,i_regs);break;
4057 case COP0:
4058 cop0_assemble(i,i_regs);break;
4059 case COP1:
4060 cop1_assemble(i,i_regs);break;
4061 case C1LS:
4062 c1ls_assemble(i,i_regs);break;
b9b61529 4063 case COP2:
4064 cop2_assemble(i,i_regs);break;
4065 case C2LS:
4066 c2ls_assemble(i,i_regs);break;
4067 case C2OP:
4068 c2op_assemble(i,i_regs);break;
57871462 4069 case FCONV:
4070 fconv_assemble(i,i_regs);break;
4071 case FLOAT:
4072 float_assemble(i,i_regs);break;
4073 case FCOMP:
4074 fcomp_assemble(i,i_regs);break;
4075 case MULTDIV:
4076 multdiv_assemble(i,i_regs);break;
4077 case MOV:
4078 mov_assemble(i,i_regs);break;
4079 case SYSCALL:
7139f3c8 4080 case HLECALL:
1e973cb0 4081 case INTCALL:
57871462 4082 case SPAN:
4083 case UJUMP:
4084 case RJUMP:
4085 case CJUMP:
4086 case SJUMP:
4087 case FJUMP:
c43b5311 4088 SysPrintf("Jump in the delay slot. This is probably a bug.\n");
57871462 4089 }
4090 is_delayslot=0;
4091}
4092
4093// Is the branch target a valid internal jump?
4094int internal_branch(uint64_t i_is32,int addr)
4095{
4096 if(addr&1) return 0; // Indirect (register) jump
4097 if(addr>=start && addr<start+slen*4-4)
4098 {
4099 int t=(addr-start)>>2;
4100 // Delay slots are not valid branch targets
4101 //if(t>0&&(itype[t-1]==RJUMP||itype[t-1]==UJUMP||itype[t-1]==CJUMP||itype[t-1]==SJUMP||itype[t-1]==FJUMP)) return 0;
4102 // 64 -> 32 bit transition requires a recompile
4103 /*if(is32[t]&~unneeded_reg_upper[t]&~i_is32)
4104 {
4105 if(requires_32bit[t]&~i_is32) printf("optimizable: no\n");
4106 else printf("optimizable: yes\n");
4107 }*/
4108 //if(is32[t]&~unneeded_reg_upper[t]&~i_is32) return 0;
a28c6ce8 4109#ifndef FORCE32
57871462 4110 if(requires_32bit[t]&~i_is32) return 0;
a28c6ce8 4111 else
4112#endif
4113 return 1;
57871462 4114 }
4115 return 0;
4116}
4117
4118#ifndef wb_invalidate
4119void wb_invalidate(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,
4120 uint64_t u,uint64_t uu)
4121{
4122 int hr;
4123 for(hr=0;hr<HOST_REGS;hr++) {
4124 if(hr!=EXCLUDE_REG) {
4125 if(pre[hr]!=entry[hr]) {
4126 if(pre[hr]>=0) {
4127 if((dirty>>hr)&1) {
4128 if(get_reg(entry,pre[hr])<0) {
4129 if(pre[hr]<64) {
4130 if(!((u>>pre[hr])&1)) {
4131 emit_storereg(pre[hr],hr);
4132 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4133 emit_sarimm(hr,31,hr);
4134 emit_storereg(pre[hr]|64,hr);
4135 }
4136 }
4137 }else{
4138 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4139 emit_storereg(pre[hr],hr);
4140 }
4141 }
4142 }
4143 }
4144 }
4145 }
4146 }
4147 }
4148 // Move from one register to another (no writeback)
4149 for(hr=0;hr<HOST_REGS;hr++) {
4150 if(hr!=EXCLUDE_REG) {
4151 if(pre[hr]!=entry[hr]) {
4152 if(pre[hr]>=0&&(pre[hr]&63)<TEMPREG) {
4153 int nr;
4154 if((nr=get_reg(entry,pre[hr]))>=0) {
4155 emit_mov(hr,nr);
4156 }
4157 }
4158 }
4159 }
4160 }
4161}
4162#endif
4163
4164// Load the specified registers
4165// This only loads the registers given as arguments because
4166// we don't want to load things that will be overwritten
4167void load_regs(signed char entry[],signed char regmap[],int is32,int rs1,int rs2)
4168{
4169 int hr;
4170 // Load 32-bit regs
4171 for(hr=0;hr<HOST_REGS;hr++) {
4172 if(hr!=EXCLUDE_REG&&regmap[hr]>=0) {
4173 if(entry[hr]!=regmap[hr]) {
4174 if(regmap[hr]==rs1||regmap[hr]==rs2)
4175 {
4176 if(regmap[hr]==0) {
4177 emit_zeroreg(hr);
4178 }
4179 else
4180 {
4181 emit_loadreg(regmap[hr],hr);
4182 }
4183 }
4184 }
4185 }
4186 }
4187 //Load 64-bit regs
4188 for(hr=0;hr<HOST_REGS;hr++) {
4189 if(hr!=EXCLUDE_REG&&regmap[hr]>=0) {
4190 if(entry[hr]!=regmap[hr]) {
4191 if(regmap[hr]-64==rs1||regmap[hr]-64==rs2)
4192 {
4193 assert(regmap[hr]!=64);
4194 if((is32>>(regmap[hr]&63))&1) {
4195 int lr=get_reg(regmap,regmap[hr]-64);
4196 if(lr>=0)
4197 emit_sarimm(lr,31,hr);
4198 else
4199 emit_loadreg(regmap[hr],hr);
4200 }
4201 else
4202 {
4203 emit_loadreg(regmap[hr],hr);
4204 }
4205 }
4206 }
4207 }
4208 }
4209}
4210
4211// Load registers prior to the start of a loop
4212// so that they are not loaded within the loop
4213static void loop_preload(signed char pre[],signed char entry[])
4214{
4215 int hr;
4216 for(hr=0;hr<HOST_REGS;hr++) {
4217 if(hr!=EXCLUDE_REG) {
4218 if(pre[hr]!=entry[hr]) {
4219 if(entry[hr]>=0) {
4220 if(get_reg(pre,entry[hr])<0) {
4221 assem_debug("loop preload:\n");
4222 //printf("loop preload: %d\n",hr);
4223 if(entry[hr]==0) {
4224 emit_zeroreg(hr);
4225 }
4226 else if(entry[hr]<TEMPREG)
4227 {
4228 emit_loadreg(entry[hr],hr);
4229 }
4230 else if(entry[hr]-64<TEMPREG)
4231 {
4232 emit_loadreg(entry[hr],hr);
4233 }
4234 }
4235 }
4236 }
4237 }
4238 }
4239}
4240
4241// Generate address for load/store instruction
b9b61529 4242// goes to AGEN for writes, FTEMP for LOADLR and cop1/2 loads
57871462 4243void address_generation(int i,struct regstat *i_regs,signed char entry[])
4244{
b9b61529 4245 if(itype[i]==LOAD||itype[i]==LOADLR||itype[i]==STORE||itype[i]==STORELR||itype[i]==C1LS||itype[i]==C2LS) {
5194fb95 4246 int ra=-1;
57871462 4247 int agr=AGEN1+(i&1);
4248 int mgr=MGEN1+(i&1);
4249 if(itype[i]==LOAD) {
4250 ra=get_reg(i_regs->regmap,rt1[i]);
535d208a 4251 if(ra<0) ra=get_reg(i_regs->regmap,-1);
4252 assert(ra>=0);
57871462 4253 }
4254 if(itype[i]==LOADLR) {
4255 ra=get_reg(i_regs->regmap,FTEMP);
4256 }
4257 if(itype[i]==STORE||itype[i]==STORELR) {
4258 ra=get_reg(i_regs->regmap,agr);
4259 if(ra<0) ra=get_reg(i_regs->regmap,-1);
4260 }
b9b61529 4261 if(itype[i]==C1LS||itype[i]==C2LS) {
4262 if ((opcode[i]&0x3b)==0x31||(opcode[i]&0x3b)==0x32) // LWC1/LDC1/LWC2/LDC2
57871462 4263 ra=get_reg(i_regs->regmap,FTEMP);
1fd1aceb 4264 else { // SWC1/SDC1/SWC2/SDC2
57871462 4265 ra=get_reg(i_regs->regmap,agr);
4266 if(ra<0) ra=get_reg(i_regs->regmap,-1);
4267 }
4268 }
4269 int rs=get_reg(i_regs->regmap,rs1[i]);
4270 int rm=get_reg(i_regs->regmap,TLREG);
4271 if(ra>=0) {
4272 int offset=imm[i];
4273 int c=(i_regs->wasconst>>rs)&1;
4274 if(rs1[i]==0) {
4275 // Using r0 as a base address
4276 /*if(rm>=0) {
4277 if(!entry||entry[rm]!=mgr) {
4278 generate_map_const(offset,rm);
4279 } // else did it in the previous cycle
4280 }*/
4281 if(!entry||entry[ra]!=agr) {
4282 if (opcode[i]==0x22||opcode[i]==0x26) {
4283 emit_movimm(offset&0xFFFFFFFC,ra); // LWL/LWR
4284 }else if (opcode[i]==0x1a||opcode[i]==0x1b) {
4285 emit_movimm(offset&0xFFFFFFF8,ra); // LDL/LDR
4286 }else{
4287 emit_movimm(offset,ra);
4288 }
4289 } // else did it in the previous cycle
4290 }
4291 else if(rs<0) {
4292 if(!entry||entry[ra]!=rs1[i])
4293 emit_loadreg(rs1[i],ra);
4294 //if(!entry||entry[ra]!=rs1[i])
4295 // printf("poor load scheduling!\n");
4296 }
4297 else if(c) {
63cb0298 4298#ifndef DISABLE_TLB
57871462 4299 if(rm>=0) {
4300 if(!entry||entry[rm]!=mgr) {
b9b61529 4301 if(itype[i]==STORE||itype[i]==STORELR||(opcode[i]&0x3b)==0x39||(opcode[i]&0x3b)==0x3a) {
57871462 4302 // Stores to memory go thru the mapper to detect self-modifying
4303 // code, loads don't.
4304 if((unsigned int)(constmap[i][rs]+offset)>=0xC0000000 ||
4cb76aa4 4305 (unsigned int)(constmap[i][rs]+offset)<0x80000000+RAM_SIZE )
57871462 4306 generate_map_const(constmap[i][rs]+offset,rm);
4307 }else{
4308 if((signed int)(constmap[i][rs]+offset)>=(signed int)0xC0000000)
4309 generate_map_const(constmap[i][rs]+offset,rm);
4310 }
4311 }
4312 }
63cb0298 4313#endif
57871462 4314 if(rs1[i]!=rt1[i]||itype[i]!=LOAD) {
4315 if(!entry||entry[ra]!=agr) {
4316 if (opcode[i]==0x22||opcode[i]==0x26) {
4317 emit_movimm((constmap[i][rs]+offset)&0xFFFFFFFC,ra); // LWL/LWR
4318 }else if (opcode[i]==0x1a||opcode[i]==0x1b) {
4319 emit_movimm((constmap[i][rs]+offset)&0xFFFFFFF8,ra); // LDL/LDR
4320 }else{
4321 #ifdef HOST_IMM_ADDR32
b9b61529 4322 if((itype[i]!=LOAD&&(opcode[i]&0x3b)!=0x31&&(opcode[i]&0x3b)!=0x32) || // LWC1/LDC1/LWC2/LDC2
57871462 4323 (using_tlb&&((signed int)constmap[i][rs]+offset)>=(signed int)0xC0000000))
4324 #endif
4325 emit_movimm(constmap[i][rs]+offset,ra);
8575a877 4326 regs[i].loadedconst|=1<<ra;
57871462 4327 }
4328 } // else did it in the previous cycle
4329 } // else load_consts already did it
4330 }
4331 if(offset&&!c&&rs1[i]) {
4332 if(rs>=0) {
4333 emit_addimm(rs,offset,ra);
4334 }else{
4335 emit_addimm(ra,offset,ra);
4336 }
4337 }
4338 }
4339 }
4340 // Preload constants for next instruction
b9b61529 4341 if(itype[i+1]==LOAD||itype[i+1]==LOADLR||itype[i+1]==STORE||itype[i+1]==STORELR||itype[i+1]==C1LS||itype[i+1]==C2LS) {
57871462 4342 int agr,ra;
63cb0298 4343 #if !defined(HOST_IMM_ADDR32) && !defined(DISABLE_TLB)
57871462 4344 // Mapper entry
4345 agr=MGEN1+((i+1)&1);
4346 ra=get_reg(i_regs->regmap,agr);
4347 if(ra>=0) {
4348 int rs=get_reg(regs[i+1].regmap,rs1[i+1]);
4349 int offset=imm[i+1];
4350 int c=(regs[i+1].wasconst>>rs)&1;
4351 if(c) {
b9b61529 4352 if(itype[i+1]==STORE||itype[i+1]==STORELR
4353 ||(opcode[i+1]&0x3b)==0x39||(opcode[i+1]&0x3b)==0x3a) { // SWC1/SDC1, SWC2/SDC2
57871462 4354 // Stores to memory go thru the mapper to detect self-modifying
4355 // code, loads don't.
4356 if((unsigned int)(constmap[i+1][rs]+offset)>=0xC0000000 ||
4cb76aa4 4357 (unsigned int)(constmap[i+1][rs]+offset)<0x80000000+RAM_SIZE )
57871462 4358 generate_map_const(constmap[i+1][rs]+offset,ra);
4359 }else{
4360 if((signed int)(constmap[i+1][rs]+offset)>=(signed int)0xC0000000)
4361 generate_map_const(constmap[i+1][rs]+offset,ra);
4362 }
4363 }
4364 /*else if(rs1[i]==0) {
4365 generate_map_const(offset,ra);
4366 }*/
4367 }
4368 #endif
4369 // Actual address
4370 agr=AGEN1+((i+1)&1);
4371 ra=get_reg(i_regs->regmap,agr);
4372 if(ra>=0) {
4373 int rs=get_reg(regs[i+1].regmap,rs1[i+1]);
4374 int offset=imm[i+1];
4375 int c=(regs[i+1].wasconst>>rs)&1;
4376 if(c&&(rs1[i+1]!=rt1[i+1]||itype[i+1]!=LOAD)) {
4377 if (opcode[i+1]==0x22||opcode[i+1]==0x26) {
4378 emit_movimm((constmap[i+1][rs]+offset)&0xFFFFFFFC,ra); // LWL/LWR
4379 }else if (opcode[i+1]==0x1a||opcode[i+1]==0x1b) {
4380 emit_movimm((constmap[i+1][rs]+offset)&0xFFFFFFF8,ra); // LDL/LDR
4381 }else{
4382 #ifdef HOST_IMM_ADDR32
b9b61529 4383 if((itype[i+1]!=LOAD&&(opcode[i+1]&0x3b)!=0x31&&(opcode[i+1]&0x3b)!=0x32) || // LWC1/LDC1/LWC2/LDC2
57871462 4384 (using_tlb&&((signed int)constmap[i+1][rs]+offset)>=(signed int)0xC0000000))
4385 #endif
4386 emit_movimm(constmap[i+1][rs]+offset,ra);
8575a877 4387 regs[i+1].loadedconst|=1<<ra;
57871462 4388 }
4389 }
4390 else if(rs1[i+1]==0) {
4391 // Using r0 as a base address
4392 if (opcode[i+1]==0x22||opcode[i+1]==0x26) {
4393 emit_movimm(offset&0xFFFFFFFC,ra); // LWL/LWR
4394 }else if (opcode[i+1]==0x1a||opcode[i+1]==0x1b) {
4395 emit_movimm(offset&0xFFFFFFF8,ra); // LDL/LDR
4396 }else{
4397 emit_movimm(offset,ra);
4398 }
4399 }
4400 }
4401 }
4402}
4403
4404int get_final_value(int hr, int i, int *value)
4405{
4406 int reg=regs[i].regmap[hr];
4407 while(i<slen-1) {
4408 if(regs[i+1].regmap[hr]!=reg) break;
4409 if(!((regs[i+1].isconst>>hr)&1)) break;
4410 if(bt[i+1]) break;
4411 i++;
4412 }
4413 if(i<slen-1) {
4414 if(itype[i]==UJUMP||itype[i]==RJUMP||itype[i]==CJUMP||itype[i]==SJUMP) {
4415 *value=constmap[i][hr];
4416 return 1;
4417 }
4418 if(!bt[i+1]) {
4419 if(itype[i+1]==UJUMP||itype[i+1]==RJUMP||itype[i+1]==CJUMP||itype[i+1]==SJUMP) {
4420 // Load in delay slot, out-of-order execution
4421 if(itype[i+2]==LOAD&&rs1[i+2]==reg&&rt1[i+2]==reg&&((regs[i+1].wasconst>>hr)&1))
4422 {
4423 #ifdef HOST_IMM_ADDR32
4424 if(!using_tlb||((signed int)constmap[i][hr]+imm[i+2])<(signed int)0xC0000000) return 0;
4425 #endif
4426 // Precompute load address
4427 *value=constmap[i][hr]+imm[i+2];
4428 return 1;
4429 }
4430 }
4431 if(itype[i+1]==LOAD&&rs1[i+1]==reg&&rt1[i+1]==reg)
4432 {
4433 #ifdef HOST_IMM_ADDR32
4434 if(!using_tlb||((signed int)constmap[i][hr]+imm[i+1])<(signed int)0xC0000000) return 0;
4435 #endif
4436 // Precompute load address
4437 *value=constmap[i][hr]+imm[i+1];
4438 //printf("c=%x imm=%x\n",(int)constmap[i][hr],imm[i+1]);
4439 return 1;
4440 }
4441 }
4442 }
4443 *value=constmap[i][hr];
4444 //printf("c=%x\n",(int)constmap[i][hr]);
4445 if(i==slen-1) return 1;
4446 if(reg<64) {
4447 return !((unneeded_reg[i+1]>>reg)&1);
4448 }else{
4449 return !((unneeded_reg_upper[i+1]>>reg)&1);
4450 }
4451}
4452
4453// Load registers with known constants
4454void load_consts(signed char pre[],signed char regmap[],int is32,int i)
4455{
8575a877 4456 int hr,hr2;
4457 // propagate loaded constant flags
4458 if(i==0||bt[i])
4459 regs[i].loadedconst=0;
4460 else {
4461 for(hr=0;hr<HOST_REGS;hr++) {
4462 if(hr!=EXCLUDE_REG&&regmap[hr]>=0&&((regs[i-1].isconst>>hr)&1)&&pre[hr]==regmap[hr]
4463 &&regmap[hr]==regs[i-1].regmap[hr]&&((regs[i-1].loadedconst>>hr)&1))
4464 {
4465 regs[i].loadedconst|=1<<hr;
4466 }
4467 }
4468 }
57871462 4469 // Load 32-bit regs
4470 for(hr=0;hr<HOST_REGS;hr++) {
4471 if(hr!=EXCLUDE_REG&&regmap[hr]>=0) {
4472 //if(entry[hr]!=regmap[hr]) {
8575a877 4473 if(!((regs[i].loadedconst>>hr)&1)) {
57871462 4474 if(((regs[i].isconst>>hr)&1)&&regmap[hr]<64&&regmap[hr]>0) {
8575a877 4475 int value,similar=0;
57871462 4476 if(get_final_value(hr,i,&value)) {
8575a877 4477 // see if some other register has similar value
4478 for(hr2=0;hr2<HOST_REGS;hr2++) {
4479 if(hr2!=EXCLUDE_REG&&((regs[i].loadedconst>>hr2)&1)) {
4480 if(is_similar_value(value,constmap[i][hr2])) {
4481 similar=1;
4482 break;
4483 }
4484 }
4485 }
4486 if(similar) {
4487 int value2;
4488 if(get_final_value(hr2,i,&value2)) // is this needed?
4489 emit_movimm_from(value2,hr2,value,hr);
4490 else
4491 emit_movimm(value,hr);
4492 }
4493 else if(value==0) {
57871462 4494 emit_zeroreg(hr);
4495 }
4496 else {
4497 emit_movimm(value,hr);
4498 }
4499 }
8575a877 4500 regs[i].loadedconst|=1<<hr;
57871462 4501 }
4502 }
4503 }
4504 }
4505 // Load 64-bit regs
4506 for(hr=0;hr<HOST_REGS;hr++) {
4507 if(hr!=EXCLUDE_REG&&regmap[hr]>=0) {
4508 //if(entry[hr]!=regmap[hr]) {
4509 if(i==0||!((regs[i-1].isconst>>hr)&1)||pre[hr]!=regmap[hr]||bt[i]) {
4510 if(((regs[i].isconst>>hr)&1)&&regmap[hr]>64) {
4511 if((is32>>(regmap[hr]&63))&1) {
4512 int lr=get_reg(regmap,regmap[hr]-64);
4513 assert(lr>=0);
4514 emit_sarimm(lr,31,hr);
4515 }
4516 else
4517 {
4518 int value;
4519 if(get_final_value(hr,i,&value)) {
4520 if(value==0) {
4521 emit_zeroreg(hr);
4522 }
4523 else {
4524 emit_movimm(value,hr);
4525 }
4526 }
4527 }
4528 }
4529 }
4530 }
4531 }
4532}
4533void load_all_consts(signed char regmap[],int is32,u_int dirty,int i)
4534{
4535 int hr;
4536 // Load 32-bit regs
4537 for(hr=0;hr<HOST_REGS;hr++) {
4538 if(hr!=EXCLUDE_REG&&regmap[hr]>=0&&((dirty>>hr)&1)) {
4539 if(((regs[i].isconst>>hr)&1)&&regmap[hr]<64&&regmap[hr]>0) {
4540 int value=constmap[i][hr];
4541 if(value==0) {
4542 emit_zeroreg(hr);
4543 }
4544 else {
4545 emit_movimm(value,hr);
4546 }
4547 }
4548 }
4549 }
4550 // Load 64-bit regs
4551 for(hr=0;hr<HOST_REGS;hr++) {
4552 if(hr!=EXCLUDE_REG&&regmap[hr]>=0&&((dirty>>hr)&1)) {
4553 if(((regs[i].isconst>>hr)&1)&&regmap[hr]>64) {
4554 if((is32>>(regmap[hr]&63))&1) {
4555 int lr=get_reg(regmap,regmap[hr]-64);
4556 assert(lr>=0);
4557 emit_sarimm(lr,31,hr);
4558 }
4559 else
4560 {
4561 int value=constmap[i][hr];
4562 if(value==0) {
4563 emit_zeroreg(hr);
4564 }
4565 else {
4566 emit_movimm(value,hr);
4567 }
4568 }
4569 }
4570 }
4571 }
4572}
4573
4574// Write out all dirty registers (except cycle count)
4575void wb_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty)
4576{
4577 int hr;
4578 for(hr=0;hr<HOST_REGS;hr++) {
4579 if(hr!=EXCLUDE_REG) {
4580 if(i_regmap[hr]>0) {
4581 if(i_regmap[hr]!=CCREG) {
4582 if((i_dirty>>hr)&1) {
4583 if(i_regmap[hr]<64) {
4584 emit_storereg(i_regmap[hr],hr);
24385cae 4585#ifndef FORCE32
57871462 4586 if( ((i_is32>>i_regmap[hr])&1) ) {
4587 #ifdef DESTRUCTIVE_WRITEBACK
4588 emit_sarimm(hr,31,hr);
4589 emit_storereg(i_regmap[hr]|64,hr);
4590 #else
4591 emit_sarimm(hr,31,HOST_TEMPREG);
4592 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
4593 #endif
4594 }
24385cae 4595#endif
57871462 4596 }else{
4597 if( !((i_is32>>(i_regmap[hr]&63))&1) ) {
4598 emit_storereg(i_regmap[hr],hr);
4599 }
4600 }
4601 }
4602 }
4603 }
4604 }
4605 }
4606}
4607// Write out dirty registers that we need to reload (pair with load_needed_regs)
4608// This writes the registers not written by store_regs_bt
4609void wb_needed_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr)
4610{
4611 int hr;
4612 int t=(addr-start)>>2;
4613 for(hr=0;hr<HOST_REGS;hr++) {
4614 if(hr!=EXCLUDE_REG) {
4615 if(i_regmap[hr]>0) {
4616 if(i_regmap[hr]!=CCREG) {
4617 if(i_regmap[hr]==regs[t].regmap_entry[hr] && ((regs[t].dirty>>hr)&1) && !(((i_is32&~regs[t].was32&~unneeded_reg_upper[t])>>(i_regmap[hr]&63))&1)) {
4618 if((i_dirty>>hr)&1) {
4619 if(i_regmap[hr]<64) {
4620 emit_storereg(i_regmap[hr],hr);
24385cae 4621#ifndef FORCE32
57871462 4622 if( ((i_is32>>i_regmap[hr])&1) ) {
4623 #ifdef DESTRUCTIVE_WRITEBACK
4624 emit_sarimm(hr,31,hr);
4625 emit_storereg(i_regmap[hr]|64,hr);
4626 #else
4627 emit_sarimm(hr,31,HOST_TEMPREG);
4628 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
4629 #endif
4630 }
24385cae 4631#endif
57871462 4632 }else{
4633 if( !((i_is32>>(i_regmap[hr]&63))&1) ) {
4634 emit_storereg(i_regmap[hr],hr);
4635 }
4636 }
4637 }
4638 }
4639 }
4640 }
4641 }
4642 }
4643}
4644
4645// Load all registers (except cycle count)
4646void load_all_regs(signed char i_regmap[])
4647{
4648 int hr;
4649 for(hr=0;hr<HOST_REGS;hr++) {
4650 if(hr!=EXCLUDE_REG) {
4651 if(i_regmap[hr]==0) {
4652 emit_zeroreg(hr);
4653 }
4654 else
ea3d2e6e 4655 if(i_regmap[hr]>0 && (i_regmap[hr]&63)<TEMPREG && i_regmap[hr]!=CCREG)
57871462 4656 {
4657 emit_loadreg(i_regmap[hr],hr);
4658 }
4659 }
4660 }
4661}
4662
4663// Load all current registers also needed by next instruction
4664void load_needed_regs(signed char i_regmap[],signed char next_regmap[])
4665{
4666 int hr;
4667 for(hr=0;hr<HOST_REGS;hr++) {
4668 if(hr!=EXCLUDE_REG) {
4669 if(get_reg(next_regmap,i_regmap[hr])>=0) {
4670 if(i_regmap[hr]==0) {
4671 emit_zeroreg(hr);
4672 }
4673 else
ea3d2e6e 4674 if(i_regmap[hr]>0 && (i_regmap[hr]&63)<TEMPREG && i_regmap[hr]!=CCREG)
57871462 4675 {
4676 emit_loadreg(i_regmap[hr],hr);
4677 }
4678 }
4679 }
4680 }
4681}
4682
4683// Load all regs, storing cycle count if necessary
4684void load_regs_entry(int t)
4685{
4686 int hr;
2573466a 4687 if(is_ds[t]) emit_addimm(HOST_CCREG,CLOCK_ADJUST(1),HOST_CCREG);
4688 else if(ccadj[t]) emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[t]),HOST_CCREG);
57871462 4689 if(regs[t].regmap_entry[HOST_CCREG]!=CCREG) {
4690 emit_storereg(CCREG,HOST_CCREG);
4691 }
4692 // Load 32-bit regs
4693 for(hr=0;hr<HOST_REGS;hr++) {
ea3d2e6e 4694 if(regs[t].regmap_entry[hr]>=0&&regs[t].regmap_entry[hr]<TEMPREG) {
57871462 4695 if(regs[t].regmap_entry[hr]==0) {
4696 emit_zeroreg(hr);
4697 }
4698 else if(regs[t].regmap_entry[hr]!=CCREG)
4699 {
4700 emit_loadreg(regs[t].regmap_entry[hr],hr);
4701 }
4702 }
4703 }
4704 // Load 64-bit regs
4705 for(hr=0;hr<HOST_REGS;hr++) {
ea3d2e6e 4706 if(regs[t].regmap_entry[hr]>=64&&regs[t].regmap_entry[hr]<TEMPREG+64) {
57871462 4707 assert(regs[t].regmap_entry[hr]!=64);
4708 if((regs[t].was32>>(regs[t].regmap_entry[hr]&63))&1) {
4709 int lr=get_reg(regs[t].regmap_entry,regs[t].regmap_entry[hr]-64);
4710 if(lr<0) {
4711 emit_loadreg(regs[t].regmap_entry[hr],hr);
4712 }
4713 else
4714 {
4715 emit_sarimm(lr,31,hr);
4716 }
4717 }
4718 else
4719 {
4720 emit_loadreg(regs[t].regmap_entry[hr],hr);
4721 }
4722 }
4723 }
4724}
4725
4726// Store dirty registers prior to branch
4727void store_regs_bt(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr)
4728{
4729 if(internal_branch(i_is32,addr))
4730 {
4731 int t=(addr-start)>>2;
4732 int hr;
4733 for(hr=0;hr<HOST_REGS;hr++) {
4734 if(hr!=EXCLUDE_REG) {
4735 if(i_regmap[hr]>0 && i_regmap[hr]!=CCREG) {
4736 if(i_regmap[hr]!=regs[t].regmap_entry[hr] || !((regs[t].dirty>>hr)&1) || (((i_is32&~regs[t].was32&~unneeded_reg_upper[t])>>(i_regmap[hr]&63))&1)) {
4737 if((i_dirty>>hr)&1) {
4738 if(i_regmap[hr]<64) {
4739 if(!((unneeded_reg[t]>>i_regmap[hr])&1)) {
4740 emit_storereg(i_regmap[hr],hr);
4741 if( ((i_is32>>i_regmap[hr])&1) && !((unneeded_reg_upper[t]>>i_regmap[hr])&1) ) {
4742 #ifdef DESTRUCTIVE_WRITEBACK
4743 emit_sarimm(hr,31,hr);
4744 emit_storereg(i_regmap[hr]|64,hr);
4745 #else
4746 emit_sarimm(hr,31,HOST_TEMPREG);
4747 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
4748 #endif
4749 }
4750 }
4751 }else{
4752 if( !((i_is32>>(i_regmap[hr]&63))&1) && !((unneeded_reg_upper[t]>>(i_regmap[hr]&63))&1) ) {
4753 emit_storereg(i_regmap[hr],hr);
4754 }
4755 }
4756 }
4757 }
4758 }
4759 }
4760 }
4761 }
4762 else
4763 {
4764 // Branch out of this block, write out all dirty regs
4765 wb_dirtys(i_regmap,i_is32,i_dirty);
4766 }
4767}
4768
4769// Load all needed registers for branch target
4770void load_regs_bt(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr)
4771{
4772 //if(addr>=start && addr<(start+slen*4))
4773 if(internal_branch(i_is32,addr))
4774 {
4775 int t=(addr-start)>>2;
4776 int hr;
4777 // Store the cycle count before loading something else
4778 if(i_regmap[HOST_CCREG]!=CCREG) {
4779 assert(i_regmap[HOST_CCREG]==-1);
4780 }
4781 if(regs[t].regmap_entry[HOST_CCREG]!=CCREG) {
4782 emit_storereg(CCREG,HOST_CCREG);
4783 }
4784 // Load 32-bit regs
4785 for(hr=0;hr<HOST_REGS;hr++) {
ea3d2e6e 4786 if(hr!=EXCLUDE_REG&&regs[t].regmap_entry[hr]>=0&&regs[t].regmap_entry[hr]<TEMPREG) {
57871462 4787 #ifdef DESTRUCTIVE_WRITEBACK
4788 if(i_regmap[hr]!=regs[t].regmap_entry[hr] || ( !((regs[t].dirty>>hr)&1) && ((i_dirty>>hr)&1) && (((i_is32&~unneeded_reg_upper[t])>>i_regmap[hr])&1) ) || (((i_is32&~regs[t].was32&~unneeded_reg_upper[t])>>(i_regmap[hr]&63))&1)) {
4789 #else
4790 if(i_regmap[hr]!=regs[t].regmap_entry[hr] ) {
4791 #endif
4792 if(regs[t].regmap_entry[hr]==0) {
4793 emit_zeroreg(hr);
4794 }
4795 else if(regs[t].regmap_entry[hr]!=CCREG)
4796 {
4797 emit_loadreg(regs[t].regmap_entry[hr],hr);
4798 }
4799 }
4800 }
4801 }
4802 //Load 64-bit regs
4803 for(hr=0;hr<HOST_REGS;hr++) {
ea3d2e6e 4804 if(hr!=EXCLUDE_REG&&regs[t].regmap_entry[hr]>=64&&regs[t].regmap_entry[hr]<TEMPREG+64) {
57871462 4805 if(i_regmap[hr]!=regs[t].regmap_entry[hr]) {
4806 assert(regs[t].regmap_entry[hr]!=64);
4807 if((i_is32>>(regs[t].regmap_entry[hr]&63))&1) {
4808 int lr=get_reg(regs[t].regmap_entry,regs[t].regmap_entry[hr]-64);
4809 if(lr<0) {
4810 emit_loadreg(regs[t].regmap_entry[hr],hr);
4811 }
4812 else
4813 {
4814 emit_sarimm(lr,31,hr);
4815 }
4816 }
4817 else
4818 {
4819 emit_loadreg(regs[t].regmap_entry[hr],hr);
4820 }
4821 }
4822 else if((i_is32>>(regs[t].regmap_entry[hr]&63))&1) {
4823 int lr=get_reg(regs[t].regmap_entry,regs[t].regmap_entry[hr]-64);
4824 assert(lr>=0);
4825 emit_sarimm(lr,31,hr);
4826 }
4827 }
4828 }
4829 }
4830}
4831
4832int match_bt(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr)
4833{
4834 if(addr>=start && addr<start+slen*4-4)
4835 {
4836 int t=(addr-start)>>2;
4837 int hr;
4838 if(regs[t].regmap_entry[HOST_CCREG]!=CCREG) return 0;
4839 for(hr=0;hr<HOST_REGS;hr++)
4840 {
4841 if(hr!=EXCLUDE_REG)
4842 {
4843 if(i_regmap[hr]!=regs[t].regmap_entry[hr])
4844 {
ea3d2e6e 4845 if(regs[t].regmap_entry[hr]>=0&&(regs[t].regmap_entry[hr]|64)<TEMPREG+64)
57871462 4846 {
4847 return 0;
4848 }
4849 else
4850 if((i_dirty>>hr)&1)
4851 {
ea3d2e6e 4852 if(i_regmap[hr]<TEMPREG)
57871462 4853 {
4854 if(!((unneeded_reg[t]>>i_regmap[hr])&1))
4855 return 0;
4856 }
ea3d2e6e 4857 else if(i_regmap[hr]>=64&&i_regmap[hr]<TEMPREG+64)
57871462 4858 {
4859 if(!((unneeded_reg_upper[t]>>(i_regmap[hr]&63))&1))
4860 return 0;
4861 }
4862 }
4863 }
4864 else // Same register but is it 32-bit or dirty?
4865 if(i_regmap[hr]>=0)
4866 {
4867 if(!((regs[t].dirty>>hr)&1))
4868 {
4869 if((i_dirty>>hr)&1)
4870 {
4871 if(!((unneeded_reg[t]>>i_regmap[hr])&1))
4872 {
4873 //printf("%x: dirty no match\n",addr);
4874 return 0;
4875 }
4876 }
4877 }
4878 if((((regs[t].was32^i_is32)&~unneeded_reg_upper[t])>>(i_regmap[hr]&63))&1)
4879 {
4880 //printf("%x: is32 no match\n",addr);
4881 return 0;
4882 }
4883 }
4884 }
4885 }
4886 //if(is32[t]&~unneeded_reg_upper[t]&~i_is32) return 0;
a28c6ce8 4887#ifndef FORCE32
57871462 4888 if(requires_32bit[t]&~i_is32) return 0;
a28c6ce8 4889#endif
57871462 4890 // Delay slots are not valid branch targets
4891 //if(t>0&&(itype[t-1]==RJUMP||itype[t-1]==UJUMP||itype[t-1]==CJUMP||itype[t-1]==SJUMP||itype[t-1]==FJUMP)) return 0;
4892 // Delay slots require additional processing, so do not match
4893 if(is_ds[t]) return 0;
4894 }
4895 else
4896 {
4897 int hr;
4898 for(hr=0;hr<HOST_REGS;hr++)
4899 {
4900 if(hr!=EXCLUDE_REG)
4901 {
4902 if(i_regmap[hr]>=0)
4903 {
4904 if(hr!=HOST_CCREG||i_regmap[hr]!=CCREG)
4905 {
4906 if((i_dirty>>hr)&1)
4907 {
4908 return 0;
4909 }
4910 }
4911 }
4912 }
4913 }
4914 }
4915 return 1;
4916}
4917
4918// Used when a branch jumps into the delay slot of another branch
4919void ds_assemble_entry(int i)
4920{
4921 int t=(ba[i]-start)>>2;
4922 if(!instr_addr[t]) instr_addr[t]=(u_int)out;
4923 assem_debug("Assemble delay slot at %x\n",ba[i]);
4924 assem_debug("<->\n");
4925 if(regs[t].regmap_entry[HOST_CCREG]==CCREG&&regs[t].regmap[HOST_CCREG]!=CCREG)
4926 wb_register(CCREG,regs[t].regmap_entry,regs[t].wasdirty,regs[t].was32);
4927 load_regs(regs[t].regmap_entry,regs[t].regmap,regs[t].was32,rs1[t],rs2[t]);
4928 address_generation(t,&regs[t],regs[t].regmap_entry);
b9b61529 4929 if(itype[t]==STORE||itype[t]==STORELR||(opcode[t]&0x3b)==0x39||(opcode[t]&0x3b)==0x3a)
57871462 4930 load_regs(regs[t].regmap_entry,regs[t].regmap,regs[t].was32,INVCP,INVCP);
4931 cop1_usable=0;
4932 is_delayslot=0;
4933 switch(itype[t]) {
4934 case ALU:
4935 alu_assemble(t,&regs[t]);break;
4936 case IMM16:
4937 imm16_assemble(t,&regs[t]);break;
4938 case SHIFT:
4939 shift_assemble(t,&regs[t]);break;
4940 case SHIFTIMM:
4941 shiftimm_assemble(t,&regs[t]);break;
4942 case LOAD:
4943 load_assemble(t,&regs[t]);break;
4944 case LOADLR:
4945 loadlr_assemble(t,&regs[t]);break;
4946 case STORE:
4947 store_assemble(t,&regs[t]);break;
4948 case STORELR:
4949 storelr_assemble(t,&regs[t]);break;
4950 case COP0:
4951 cop0_assemble(t,&regs[t]);break;
4952 case COP1:
4953 cop1_assemble(t,&regs[t]);break;
4954 case C1LS:
4955 c1ls_assemble(t,&regs[t]);break;
b9b61529 4956 case COP2:
4957 cop2_assemble(t,&regs[t]);break;
4958 case C2LS:
4959 c2ls_assemble(t,&regs[t]);break;
4960 case C2OP:
4961 c2op_assemble(t,&regs[t]);break;
57871462 4962 case FCONV:
4963 fconv_assemble(t,&regs[t]);break;
4964 case FLOAT:
4965 float_assemble(t,&regs[t]);break;
4966 case FCOMP:
4967 fcomp_assemble(t,&regs[t]);break;
4968 case MULTDIV:
4969 multdiv_assemble(t,&regs[t]);break;
4970 case MOV:
4971 mov_assemble(t,&regs[t]);break;
4972 case SYSCALL:
7139f3c8 4973 case HLECALL:
1e973cb0 4974 case INTCALL:
57871462 4975 case SPAN:
4976 case UJUMP:
4977 case RJUMP:
4978 case CJUMP:
4979 case SJUMP:
4980 case FJUMP:
c43b5311 4981 SysPrintf("Jump in the delay slot. This is probably a bug.\n");
57871462 4982 }
4983 store_regs_bt(regs[t].regmap,regs[t].is32,regs[t].dirty,ba[i]+4);
4984 load_regs_bt(regs[t].regmap,regs[t].is32,regs[t].dirty,ba[i]+4);
4985 if(internal_branch(regs[t].is32,ba[i]+4))
4986 assem_debug("branch: internal\n");
4987 else
4988 assem_debug("branch: external\n");
4989 assert(internal_branch(regs[t].is32,ba[i]+4));
4990 add_to_linker((int)out,ba[i]+4,internal_branch(regs[t].is32,ba[i]+4));
4991 emit_jmp(0);
4992}
4993
4994void do_cc(int i,signed char i_regmap[],int *adj,int addr,int taken,int invert)
4995{
4996 int count;
4997 int jaddr;
4998 int idle=0;
b6e87b2b 4999 int t=0;
57871462 5000 if(itype[i]==RJUMP)
5001 {
5002 *adj=0;
5003 }
5004 //if(ba[i]>=start && ba[i]<(start+slen*4))
5005 if(internal_branch(branch_regs[i].is32,ba[i]))
5006 {
b6e87b2b 5007 t=(ba[i]-start)>>2;
57871462 5008 if(is_ds[t]) *adj=-1; // Branch into delay slot adds an extra cycle
5009 else *adj=ccadj[t];
5010 }
5011 else
5012 {
5013 *adj=0;
5014 }
5015 count=ccadj[i];
5016 if(taken==TAKEN && i==(ba[i]-start)>>2 && source[i+1]==0) {
5017 // Idle loop
5018 if(count&1) emit_addimm_and_set_flags(2*(count+2),HOST_CCREG);
5019 idle=(int)out;
5020 //emit_subfrommem(&idlecount,HOST_CCREG); // Count idle cycles
5021 emit_andimm(HOST_CCREG,3,HOST_CCREG);
5022 jaddr=(int)out;
5023 emit_jmp(0);
5024 }
5025 else if(*adj==0||invert) {
b6e87b2b 5026 int cycles=CLOCK_ADJUST(count+2);
5027 // faster loop HACK
5028 if (t&&*adj) {
5029 int rel=t-i;
5030 if(-NO_CYCLE_PENALTY_THR<rel&&rel<0)
5031 cycles=CLOCK_ADJUST(*adj)+count+2-*adj;
5032 }
5033 emit_addimm_and_set_flags(cycles,HOST_CCREG);
57871462 5034 jaddr=(int)out;
5035 emit_jns(0);
5036 }
5037 else
5038 {
2573466a 5039 emit_cmpimm(HOST_CCREG,-CLOCK_ADJUST(count+2));
57871462 5040 jaddr=(int)out;
5041 emit_jns(0);
5042 }
5043 add_stub(CC_STUB,jaddr,idle?idle:(int)out,(*adj==0||invert||idle)?0:(count+2),i,addr,taken,0);
5044}
5045
5046void do_ccstub(int n)
5047{
5048 literal_pool(256);
5049 assem_debug("do_ccstub %x\n",start+stubs[n][4]*4);
5050 set_jump_target(stubs[n][1],(int)out);
5051 int i=stubs[n][4];
5052 if(stubs[n][6]==NULLDS) {
5053 // Delay slot instruction is nullified ("likely" branch)
5054 wb_dirtys(regs[i].regmap,regs[i].is32,regs[i].dirty);
5055 }
5056 else if(stubs[n][6]!=TAKEN) {
5057 wb_dirtys(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty);
5058 }
5059 else {
5060 if(internal_branch(branch_regs[i].is32,ba[i]))
5061 wb_needed_dirtys(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
5062 }
5063 if(stubs[n][5]!=-1)
5064 {
5065 // Save PC as return address
5066 emit_movimm(stubs[n][5],EAX);
5067 emit_writeword(EAX,(int)&pcaddr);
5068 }
5069 else
5070 {
5071 // Return address depends on which way the branch goes
5072 if(itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
5073 {
5074 int s1l=get_reg(branch_regs[i].regmap,rs1[i]);
5075 int s1h=get_reg(branch_regs[i].regmap,rs1[i]|64);
5076 int s2l=get_reg(branch_regs[i].regmap,rs2[i]);
5077 int s2h=get_reg(branch_regs[i].regmap,rs2[i]|64);
5078 if(rs1[i]==0)
5079 {
5080 s1l=s2l;s1h=s2h;
5081 s2l=s2h=-1;
5082 }
5083 else if(rs2[i]==0)
5084 {
5085 s2l=s2h=-1;
5086 }
5087 if((branch_regs[i].is32>>rs1[i])&(branch_regs[i].is32>>rs2[i])&1) {
5088 s1h=s2h=-1;
5089 }
5090 assert(s1l>=0);
5091 #ifdef DESTRUCTIVE_WRITEBACK
5092 if(rs1[i]) {
5093 if((branch_regs[i].dirty>>s1l)&(branch_regs[i].is32>>rs1[i])&1)
5094 emit_loadreg(rs1[i],s1l);
5095 }
5096 else {
5097 if((branch_regs[i].dirty>>s1l)&(branch_regs[i].is32>>rs2[i])&1)
5098 emit_loadreg(rs2[i],s1l);
5099 }
5100 if(s2l>=0)
5101 if((branch_regs[i].dirty>>s2l)&(branch_regs[i].is32>>rs2[i])&1)
5102 emit_loadreg(rs2[i],s2l);
5103 #endif
5104 int hr=0;
5194fb95 5105 int addr=-1,alt=-1,ntaddr=-1;
57871462 5106 while(hr<HOST_REGS)
5107 {
5108 if(hr!=EXCLUDE_REG && hr!=HOST_CCREG &&
5109 (branch_regs[i].regmap[hr]&63)!=rs1[i] &&
5110 (branch_regs[i].regmap[hr]&63)!=rs2[i] )
5111 {
5112 addr=hr++;break;
5113 }
5114 hr++;
5115 }
5116 while(hr<HOST_REGS)
5117 {
5118 if(hr!=EXCLUDE_REG && hr!=HOST_CCREG &&
5119 (branch_regs[i].regmap[hr]&63)!=rs1[i] &&
5120 (branch_regs[i].regmap[hr]&63)!=rs2[i] )
5121 {
5122 alt=hr++;break;
5123 }
5124 hr++;
5125 }
5126 if((opcode[i]&0x2E)==6) // BLEZ/BGTZ needs another register
5127 {
5128 while(hr<HOST_REGS)
5129 {
5130 if(hr!=EXCLUDE_REG && hr!=HOST_CCREG &&
5131 (branch_regs[i].regmap[hr]&63)!=rs1[i] &&
5132 (branch_regs[i].regmap[hr]&63)!=rs2[i] )
5133 {
5134 ntaddr=hr;break;
5135 }
5136 hr++;
5137 }
5138 assert(hr<HOST_REGS);
5139 }
5140 if((opcode[i]&0x2f)==4) // BEQ
5141 {
5142 #ifdef HAVE_CMOV_IMM
5143 if(s1h<0) {
5144 if(s2l>=0) emit_cmp(s1l,s2l);
5145 else emit_test(s1l,s1l);
5146 emit_cmov2imm_e_ne_compact(ba[i],start+i*4+8,addr);
5147 }
5148 else
5149 #endif
5150 {
5151 emit_mov2imm_compact(ba[i],addr,start+i*4+8,alt);
5152 if(s1h>=0) {
5153 if(s2h>=0) emit_cmp(s1h,s2h);
5154 else emit_test(s1h,s1h);
5155 emit_cmovne_reg(alt,addr);
5156 }
5157 if(s2l>=0) emit_cmp(s1l,s2l);
5158 else emit_test(s1l,s1l);
5159 emit_cmovne_reg(alt,addr);
5160 }
5161 }
5162 if((opcode[i]&0x2f)==5) // BNE
5163 {
5164 #ifdef HAVE_CMOV_IMM
5165 if(s1h<0) {
5166 if(s2l>=0) emit_cmp(s1l,s2l);
5167 else emit_test(s1l,s1l);
5168 emit_cmov2imm_e_ne_compact(start+i*4+8,ba[i],addr);
5169 }
5170 else
5171 #endif
5172 {
5173 emit_mov2imm_compact(start+i*4+8,addr,ba[i],alt);
5174 if(s1h>=0) {
5175 if(s2h>=0) emit_cmp(s1h,s2h);
5176 else emit_test(s1h,s1h);
5177 emit_cmovne_reg(alt,addr);
5178 }
5179 if(s2l>=0) emit_cmp(s1l,s2l);
5180 else emit_test(s1l,s1l);
5181 emit_cmovne_reg(alt,addr);
5182 }
5183 }
5184 if((opcode[i]&0x2f)==6) // BLEZ
5185 {
5186 //emit_movimm(ba[i],alt);
5187 //emit_movimm(start+i*4+8,addr);
5188 emit_mov2imm_compact(ba[i],alt,start+i*4+8,addr);
5189 emit_cmpimm(s1l,1);
5190 if(s1h>=0) emit_mov(addr,ntaddr);
5191 emit_cmovl_reg(alt,addr);
5192 if(s1h>=0) {
5193 emit_test(s1h,s1h);
5194 emit_cmovne_reg(ntaddr,addr);
5195 emit_cmovs_reg(alt,addr);
5196 }
5197 }
5198 if((opcode[i]&0x2f)==7) // BGTZ
5199 {
5200 //emit_movimm(ba[i],addr);
5201 //emit_movimm(start+i*4+8,ntaddr);
5202 emit_mov2imm_compact(ba[i],addr,start+i*4+8,ntaddr);
5203 emit_cmpimm(s1l,1);
5204 if(s1h>=0) emit_mov(addr,alt);
5205 emit_cmovl_reg(ntaddr,addr);
5206 if(s1h>=0) {
5207 emit_test(s1h,s1h);
5208 emit_cmovne_reg(alt,addr);
5209 emit_cmovs_reg(ntaddr,addr);
5210 }
5211 }
5212 if((opcode[i]==1)&&(opcode2[i]&0x2D)==0) // BLTZ
5213 {
5214 //emit_movimm(ba[i],alt);
5215 //emit_movimm(start+i*4+8,addr);
5216 emit_mov2imm_compact(ba[i],alt,start+i*4+8,addr);
5217 if(s1h>=0) emit_test(s1h,s1h);
5218 else emit_test(s1l,s1l);
5219 emit_cmovs_reg(alt,addr);
5220 }
5221 if((opcode[i]==1)&&(opcode2[i]&0x2D)==1) // BGEZ
5222 {
5223 //emit_movimm(ba[i],addr);
5224 //emit_movimm(start+i*4+8,alt);
5225 emit_mov2imm_compact(ba[i],addr,start+i*4+8,alt);
5226 if(s1h>=0) emit_test(s1h,s1h);
5227 else emit_test(s1l,s1l);
5228 emit_cmovs_reg(alt,addr);
5229 }
5230 if(opcode[i]==0x11 && opcode2[i]==0x08 ) {
5231 if(source[i]&0x10000) // BC1T
5232 {
5233 //emit_movimm(ba[i],alt);
5234 //emit_movimm(start+i*4+8,addr);
5235 emit_mov2imm_compact(ba[i],alt,start+i*4+8,addr);
5236 emit_testimm(s1l,0x800000);
5237 emit_cmovne_reg(alt,addr);
5238 }
5239 else // BC1F
5240 {
5241 //emit_movimm(ba[i],addr);
5242 //emit_movimm(start+i*4+8,alt);
5243 emit_mov2imm_compact(ba[i],addr,start+i*4+8,alt);
5244 emit_testimm(s1l,0x800000);
5245 emit_cmovne_reg(alt,addr);
5246 }
5247 }
5248 emit_writeword(addr,(int)&pcaddr);
5249 }
5250 else
5251 if(itype[i]==RJUMP)
5252 {
5253 int r=get_reg(branch_regs[i].regmap,rs1[i]);
5254 if(rs1[i]==rt1[i+1]||rs1[i]==rt2[i+1]) {
5255 r=get_reg(branch_regs[i].regmap,RTEMP);
5256 }
5257 emit_writeword(r,(int)&pcaddr);
5258 }
c43b5311 5259 else {SysPrintf("Unknown branch type in do_ccstub\n");exit(1);}
57871462 5260 }
5261 // Update cycle count
5262 assert(branch_regs[i].regmap[HOST_CCREG]==CCREG||branch_regs[i].regmap[HOST_CCREG]==-1);
2573466a 5263 if(stubs[n][3]) emit_addimm(HOST_CCREG,CLOCK_ADJUST((int)stubs[n][3]),HOST_CCREG);
57871462 5264 emit_call((int)cc_interrupt);
2573466a 5265 if(stubs[n][3]) emit_addimm(HOST_CCREG,-CLOCK_ADJUST((int)stubs[n][3]),HOST_CCREG);
57871462 5266 if(stubs[n][6]==TAKEN) {
5267 if(internal_branch(branch_regs[i].is32,ba[i]))
5268 load_needed_regs(branch_regs[i].regmap,regs[(ba[i]-start)>>2].regmap_entry);
5269 else if(itype[i]==RJUMP) {
5270 if(get_reg(branch_regs[i].regmap,RTEMP)>=0)
5271 emit_readword((int)&pcaddr,get_reg(branch_regs[i].regmap,RTEMP));
5272 else
5273 emit_loadreg(rs1[i],get_reg(branch_regs[i].regmap,rs1[i]));
5274 }
5275 }else if(stubs[n][6]==NOTTAKEN) {
5276 if(i<slen-2) load_needed_regs(branch_regs[i].regmap,regmap_pre[i+2]);
5277 else load_all_regs(branch_regs[i].regmap);
5278 }else if(stubs[n][6]==NULLDS) {
5279 // Delay slot instruction is nullified ("likely" branch)
5280 if(i<slen-2) load_needed_regs(regs[i].regmap,regmap_pre[i+2]);
5281 else load_all_regs(regs[i].regmap);
5282 }else{
5283 load_all_regs(branch_regs[i].regmap);
5284 }
5285 emit_jmp(stubs[n][2]); // return address
5286
5287 /* This works but uses a lot of memory...
5288 emit_readword((int)&last_count,ECX);
5289 emit_add(HOST_CCREG,ECX,EAX);
5290 emit_writeword(EAX,(int)&Count);
5291 emit_call((int)gen_interupt);
5292 emit_readword((int)&Count,HOST_CCREG);
5293 emit_readword((int)&next_interupt,EAX);
5294 emit_readword((int)&pending_exception,EBX);
5295 emit_writeword(EAX,(int)&last_count);
5296 emit_sub(HOST_CCREG,EAX,HOST_CCREG);
5297 emit_test(EBX,EBX);
5298 int jne_instr=(int)out;
5299 emit_jne(0);
5300 if(stubs[n][3]) emit_addimm(HOST_CCREG,-2*stubs[n][3],HOST_CCREG);
5301 load_all_regs(branch_regs[i].regmap);
5302 emit_jmp(stubs[n][2]); // return address
5303 set_jump_target(jne_instr,(int)out);
5304 emit_readword((int)&pcaddr,EAX);
5305 // Call get_addr_ht instead of doing the hash table here.
5306 // This code is executed infrequently and takes up a lot of space
5307 // so smaller is better.
5308 emit_storereg(CCREG,HOST_CCREG);
5309 emit_pushreg(EAX);
5310 emit_call((int)get_addr_ht);
5311 emit_loadreg(CCREG,HOST_CCREG);
5312 emit_addimm(ESP,4,ESP);
5313 emit_jmpreg(EAX);*/
5314}
5315
5316add_to_linker(int addr,int target,int ext)
5317{
5318 link_addr[linkcount][0]=addr;
5319 link_addr[linkcount][1]=target;
5320 link_addr[linkcount][2]=ext;
5321 linkcount++;
5322}
5323
eba830cd 5324static void ujump_assemble_write_ra(int i)
5325{
5326 int rt;
5327 unsigned int return_address;
5328 rt=get_reg(branch_regs[i].regmap,31);
5329 assem_debug("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
5330 //assert(rt>=0);
5331 return_address=start+i*4+8;
5332 if(rt>=0) {
5333 #ifdef USE_MINI_HT
5334 if(internal_branch(branch_regs[i].is32,return_address)&&rt1[i+1]!=31) {
5335 int temp=-1; // note: must be ds-safe
5336 #ifdef HOST_TEMPREG
5337 temp=HOST_TEMPREG;
5338 #endif
5339 if(temp>=0) do_miniht_insert(return_address,rt,temp);
5340 else emit_movimm(return_address,rt);
5341 }
5342 else
5343 #endif
5344 {
5345 #ifdef REG_PREFETCH
5346 if(temp>=0)
5347 {
5348 if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp);
5349 }
5350 #endif
5351 emit_movimm(return_address,rt); // PC into link register
5352 #ifdef IMM_PREFETCH
5353 emit_prefetch(hash_table[((return_address>>16)^return_address)&0xFFFF]);
5354 #endif
5355 }
5356 }
5357}
5358
57871462 5359void ujump_assemble(int i,struct regstat *i_regs)
5360{
5361 signed char *i_regmap=i_regs->regmap;
eba830cd 5362 int ra_done=0;
57871462 5363 if(i==(ba[i]-start)>>2) assem_debug("idle loop\n");
5364 address_generation(i+1,i_regs,regs[i].regmap_entry);
5365 #ifdef REG_PREFETCH
5366 int temp=get_reg(branch_regs[i].regmap,PTEMP);
5367 if(rt1[i]==31&&temp>=0)
5368 {
5369 int return_address=start+i*4+8;
5370 if(get_reg(branch_regs[i].regmap,31)>0)
5371 if(i_regmap[temp]==PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp);
5372 }
5373 #endif
eba830cd 5374 if(rt1[i]==31&&(rt1[i]==rs1[i+1]||rt1[i]==rs2[i+1])) {
5375 ujump_assemble_write_ra(i); // writeback ra for DS
5376 ra_done=1;
57871462 5377 }
4ef8f67d 5378 ds_assemble(i+1,i_regs);
5379 uint64_t bc_unneeded=branch_regs[i].u;
5380 uint64_t bc_unneeded_upper=branch_regs[i].uu;
5381 bc_unneeded|=1|(1LL<<rt1[i]);
5382 bc_unneeded_upper|=1|(1LL<<rt1[i]);
5383 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,
5384 bc_unneeded,bc_unneeded_upper);
5385 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG);
eba830cd 5386 if(!ra_done&&rt1[i]==31)
5387 ujump_assemble_write_ra(i);
57871462 5388 int cc,adj;
5389 cc=get_reg(branch_regs[i].regmap,CCREG);
5390 assert(cc==HOST_CCREG);
5391 store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
5392 #ifdef REG_PREFETCH
5393 if(rt1[i]==31&&temp>=0) emit_prefetchreg(temp);
5394 #endif
5395 do_cc(i,branch_regs[i].regmap,&adj,ba[i],TAKEN,0);
2573466a 5396 if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc);
57871462 5397 load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
5398 if(internal_branch(branch_regs[i].is32,ba[i]))
5399 assem_debug("branch: internal\n");
5400 else
5401 assem_debug("branch: external\n");
5402 if(internal_branch(branch_regs[i].is32,ba[i])&&is_ds[(ba[i]-start)>>2]) {
5403 ds_assemble_entry(i);
5404 }
5405 else {
5406 add_to_linker((int)out,ba[i],internal_branch(branch_regs[i].is32,ba[i]));
5407 emit_jmp(0);
5408 }
5409}
5410
eba830cd 5411static void rjump_assemble_write_ra(int i)
5412{
5413 int rt,return_address;
5414 assert(rt1[i+1]!=rt1[i]);
5415 assert(rt2[i+1]!=rt1[i]);
5416 rt=get_reg(branch_regs[i].regmap,rt1[i]);
5417 assem_debug("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
5418 assert(rt>=0);
5419 return_address=start+i*4+8;
5420 #ifdef REG_PREFETCH
5421 if(temp>=0)
5422 {
5423 if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp);
5424 }
5425 #endif
5426 emit_movimm(return_address,rt); // PC into link register
5427 #ifdef IMM_PREFETCH
5428 emit_prefetch(hash_table[((return_address>>16)^return_address)&0xFFFF]);
5429 #endif
5430}
5431
57871462 5432void rjump_assemble(int i,struct regstat *i_regs)
5433{
5434 signed char *i_regmap=i_regs->regmap;
5435 int temp;
5436 int rs,cc,adj;
eba830cd 5437 int ra_done=0;
57871462 5438 rs=get_reg(branch_regs[i].regmap,rs1[i]);
5439 assert(rs>=0);
5440 if(rs1[i]==rt1[i+1]||rs1[i]==rt2[i+1]) {
5441 // Delay slot abuse, make a copy of the branch address register
5442 temp=get_reg(branch_regs[i].regmap,RTEMP);
5443 assert(temp>=0);
5444 assert(regs[i].regmap[temp]==RTEMP);
5445 emit_mov(rs,temp);
5446 rs=temp;
5447 }
5448 address_generation(i+1,i_regs,regs[i].regmap_entry);
5449 #ifdef REG_PREFETCH
5450 if(rt1[i]==31)
5451 {
5452 if((temp=get_reg(branch_regs[i].regmap,PTEMP))>=0) {
5453 int return_address=start+i*4+8;
5454 if(i_regmap[temp]==PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp);
5455 }
5456 }
5457 #endif
5458 #ifdef USE_MINI_HT
5459 if(rs1[i]==31) {
5460 int rh=get_reg(regs[i].regmap,RHASH);
5461 if(rh>=0) do_preload_rhash(rh);
5462 }
5463 #endif
eba830cd 5464 if(rt1[i]!=0&&(rt1[i]==rs1[i+1]||rt1[i]==rs2[i+1])) {
5465 rjump_assemble_write_ra(i);
5466 ra_done=1;
57871462 5467 }
d5910d5d 5468 ds_assemble(i+1,i_regs);
5469 uint64_t bc_unneeded=branch_regs[i].u;
5470 uint64_t bc_unneeded_upper=branch_regs[i].uu;
5471 bc_unneeded|=1|(1LL<<rt1[i]);
5472 bc_unneeded_upper|=1|(1LL<<rt1[i]);
5473 bc_unneeded&=~(1LL<<rs1[i]);
5474 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,
5475 bc_unneeded,bc_unneeded_upper);
5476 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i],CCREG);
eba830cd 5477 if(!ra_done&&rt1[i]!=0)
5478 rjump_assemble_write_ra(i);
57871462 5479 cc=get_reg(branch_regs[i].regmap,CCREG);
5480 assert(cc==HOST_CCREG);
5481 #ifdef USE_MINI_HT
5482 int rh=get_reg(branch_regs[i].regmap,RHASH);
5483 int ht=get_reg(branch_regs[i].regmap,RHTBL);
5484 if(rs1[i]==31) {
5485 if(regs[i].regmap[rh]!=RHASH) do_preload_rhash(rh);
5486 do_preload_rhtbl(ht);
5487 do_rhash(rs,rh);
5488 }
5489 #endif
5490 store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,-1);
5491 #ifdef DESTRUCTIVE_WRITEBACK
5492 if((branch_regs[i].dirty>>rs)&(branch_regs[i].is32>>rs1[i])&1) {
5493 if(rs1[i]!=rt1[i+1]&&rs1[i]!=rt2[i+1]) {
5494 emit_loadreg(rs1[i],rs);
5495 }
5496 }
5497 #endif
5498 #ifdef REG_PREFETCH
5499 if(rt1[i]==31&&temp>=0) emit_prefetchreg(temp);
5500 #endif
5501 #ifdef USE_MINI_HT
5502 if(rs1[i]==31) {
5503 do_miniht_load(ht,rh);
5504 }
5505 #endif
5506 //do_cc(i,branch_regs[i].regmap,&adj,-1,TAKEN);
5507 //if(adj) emit_addimm(cc,2*(ccadj[i]+2-adj),cc); // ??? - Shouldn't happen
5508 //assert(adj==0);
2573466a 5509 emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG);
57871462 5510 add_stub(CC_STUB,(int)out,jump_vaddr_reg[rs],0,i,-1,TAKEN,0);
911f2d55 5511#ifdef PCSX
5512 if(itype[i+1]==COP0&&(source[i+1]&0x3f)==0x10)
5513 // special case for RFE
5514 emit_jmp(0);
5515 else
5516#endif
57871462 5517 emit_jns(0);
5518 //load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,-1);
5519 #ifdef USE_MINI_HT
5520 if(rs1[i]==31) {
5521 do_miniht_jump(rs,rh,ht);
5522 }
5523 else
5524 #endif
5525 {
5526 //if(rs!=EAX) emit_mov(rs,EAX);
5527 //emit_jmp((int)jump_vaddr_eax);
5528 emit_jmp(jump_vaddr_reg[rs]);
5529 }
5530 /* Check hash table
5531 temp=!rs;
5532 emit_mov(rs,temp);
5533 emit_shrimm(rs,16,rs);
5534 emit_xor(temp,rs,rs);
5535 emit_movzwl_reg(rs,rs);
5536 emit_shlimm(rs,4,rs);
5537 emit_cmpmem_indexed((int)hash_table,rs,temp);
5538 emit_jne((int)out+14);
5539 emit_readword_indexed((int)hash_table+4,rs,rs);
5540 emit_jmpreg(rs);
5541 emit_cmpmem_indexed((int)hash_table+8,rs,temp);
5542 emit_addimm_no_flags(8,rs);
5543 emit_jeq((int)out-17);
5544 // No hit on hash table, call compiler
5545 emit_pushreg(temp);
5546//DEBUG >
5547#ifdef DEBUG_CYCLE_COUNT
5548 emit_readword((int)&last_count,ECX);
5549 emit_add(HOST_CCREG,ECX,HOST_CCREG);
5550 emit_readword((int)&next_interupt,ECX);
5551 emit_writeword(HOST_CCREG,(int)&Count);
5552 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
5553 emit_writeword(ECX,(int)&last_count);
5554#endif
5555//DEBUG <
5556 emit_storereg(CCREG,HOST_CCREG);
5557 emit_call((int)get_addr);
5558 emit_loadreg(CCREG,HOST_CCREG);
5559 emit_addimm(ESP,4,ESP);
5560 emit_jmpreg(EAX);*/
5561 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
5562 if(rt1[i]!=31&&i<slen-2&&(((u_int)out)&7)) emit_mov(13,13);
5563 #endif
5564}
5565
5566void cjump_assemble(int i,struct regstat *i_regs)
5567{
5568 signed char *i_regmap=i_regs->regmap;
5569 int cc;
5570 int match;
5571 match=match_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
5572 assem_debug("match=%d\n",match);
5573 int s1h,s1l,s2h,s2l;
5574 int prev_cop1_usable=cop1_usable;
5575 int unconditional=0,nop=0;
5576 int only32=0;
57871462 5577 int invert=0;
5578 int internal=internal_branch(branch_regs[i].is32,ba[i]);
5579 if(i==(ba[i]-start)>>2) assem_debug("idle loop\n");
57871462 5580 if(!match) invert=1;
5581 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
5582 if(i>(ba[i]-start)>>2) invert=1;
5583 #endif
e1190b87 5584
5585 if(ooo[i]) {
57871462 5586 s1l=get_reg(branch_regs[i].regmap,rs1[i]);
5587 s1h=get_reg(branch_regs[i].regmap,rs1[i]|64);
5588 s2l=get_reg(branch_regs[i].regmap,rs2[i]);
5589 s2h=get_reg(branch_regs[i].regmap,rs2[i]|64);
5590 }
5591 else {
5592 s1l=get_reg(i_regmap,rs1[i]);
5593 s1h=get_reg(i_regmap,rs1[i]|64);
5594 s2l=get_reg(i_regmap,rs2[i]);
5595 s2h=get_reg(i_regmap,rs2[i]|64);
5596 }
5597 if(rs1[i]==0&&rs2[i]==0)
5598 {
5599 if(opcode[i]&1) nop=1;
5600 else unconditional=1;
5601 //assert(opcode[i]!=5);
5602 //assert(opcode[i]!=7);
5603 //assert(opcode[i]!=0x15);
5604 //assert(opcode[i]!=0x17);
5605 }
5606 else if(rs1[i]==0)
5607 {
5608 s1l=s2l;s1h=s2h;
5609 s2l=s2h=-1;
5610 only32=(regs[i].was32>>rs2[i])&1;
5611 }
5612 else if(rs2[i]==0)
5613 {
5614 s2l=s2h=-1;
5615 only32=(regs[i].was32>>rs1[i])&1;
5616 }
5617 else {
5618 only32=(regs[i].was32>>rs1[i])&(regs[i].was32>>rs2[i])&1;
5619 }
5620
e1190b87 5621 if(ooo[i]) {
57871462 5622 // Out of order execution (delay slot first)
5623 //printf("OOOE\n");
5624 address_generation(i+1,i_regs,regs[i].regmap_entry);
5625 ds_assemble(i+1,i_regs);
5626 int adj;
5627 uint64_t bc_unneeded=branch_regs[i].u;
5628 uint64_t bc_unneeded_upper=branch_regs[i].uu;
5629 bc_unneeded&=~((1LL<<rs1[i])|(1LL<<rs2[i]));
5630 bc_unneeded_upper&=~((1LL<<us1[i])|(1LL<<us2[i]));
5631 bc_unneeded|=1;
5632 bc_unneeded_upper|=1;
5633 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,
5634 bc_unneeded,bc_unneeded_upper);
5635 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i],rs2[i]);
5636 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG);
5637 cc=get_reg(branch_regs[i].regmap,CCREG);
5638 assert(cc==HOST_CCREG);
5639 if(unconditional)
5640 store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
5641 //do_cc(i,branch_regs[i].regmap,&adj,unconditional?ba[i]:-1,unconditional);
5642 //assem_debug("cycle count (adj)\n");
5643 if(unconditional) {
5644 do_cc(i,branch_regs[i].regmap,&adj,ba[i],TAKEN,0);
5645 if(i!=(ba[i]-start)>>2 || source[i+1]!=0) {
2573466a 5646 if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc);
57871462 5647 load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
5648 if(internal)
5649 assem_debug("branch: internal\n");
5650 else
5651 assem_debug("branch: external\n");
5652 if(internal&&is_ds[(ba[i]-start)>>2]) {
5653 ds_assemble_entry(i);
5654 }
5655 else {
5656 add_to_linker((int)out,ba[i],internal);
5657 emit_jmp(0);
5658 }
5659 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
5660 if(((u_int)out)&7) emit_addnop(0);
5661 #endif
5662 }
5663 }
5664 else if(nop) {
2573466a 5665 emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc);
57871462 5666 int jaddr=(int)out;
5667 emit_jns(0);
5668 add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0);
5669 }
5670 else {
5671 int taken=0,nottaken=0,nottaken1=0;
5672 do_cc(i,branch_regs[i].regmap,&adj,-1,0,invert);
2573466a 5673 if(adj&&!invert) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc);
57871462 5674 if(!only32)
5675 {
5676 assert(s1h>=0);
5677 if(opcode[i]==4) // BEQ
5678 {
5679 if(s2h>=0) emit_cmp(s1h,s2h);
5680 else emit_test(s1h,s1h);
5681 nottaken1=(int)out;
5682 emit_jne(1);
5683 }
5684 if(opcode[i]==5) // BNE
5685 {
5686 if(s2h>=0) emit_cmp(s1h,s2h);
5687 else emit_test(s1h,s1h);
5688 if(invert) taken=(int)out;
5689 else add_to_linker((int)out,ba[i],internal);
5690 emit_jne(0);
5691 }
5692 if(opcode[i]==6) // BLEZ
5693 {
5694 emit_test(s1h,s1h);
5695 if(invert) taken=(int)out;
5696 else add_to_linker((int)out,ba[i],internal);
5697 emit_js(0);
5698 nottaken1=(int)out;
5699 emit_jne(1);
5700 }
5701 if(opcode[i]==7) // BGTZ
5702 {
5703 emit_test(s1h,s1h);
5704 nottaken1=(int)out;
5705 emit_js(1);
5706 if(invert) taken=(int)out;
5707 else add_to_linker((int)out,ba[i],internal);
5708 emit_jne(0);
5709 }
5710 } // if(!only32)
5711
5712 //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
5713 assert(s1l>=0);
5714 if(opcode[i]==4) // BEQ
5715 {
5716 if(s2l>=0) emit_cmp(s1l,s2l);
5717 else emit_test(s1l,s1l);
5718 if(invert){
5719 nottaken=(int)out;
5720 emit_jne(1);
5721 }else{
5722 add_to_linker((int)out,ba[i],internal);
5723 emit_jeq(0);
5724 }
5725 }
5726 if(opcode[i]==5) // BNE
5727 {
5728 if(s2l>=0) emit_cmp(s1l,s2l);
5729 else emit_test(s1l,s1l);
5730 if(invert){
5731 nottaken=(int)out;
5732 emit_jeq(1);
5733 }else{
5734 add_to_linker((int)out,ba[i],internal);
5735 emit_jne(0);
5736 }
5737 }
5738 if(opcode[i]==6) // BLEZ
5739 {
5740 emit_cmpimm(s1l,1);
5741 if(invert){
5742 nottaken=(int)out;
5743 emit_jge(1);
5744 }else{
5745 add_to_linker((int)out,ba[i],internal);
5746 emit_jl(0);
5747 }
5748 }
5749 if(opcode[i]==7) // BGTZ
5750 {
5751 emit_cmpimm(s1l,1);
5752 if(invert){
5753 nottaken=(int)out;
5754 emit_jl(1);
5755 }else{
5756 add_to_linker((int)out,ba[i],internal);
5757 emit_jge(0);
5758 }
5759 }
5760 if(invert) {
5761 if(taken) set_jump_target(taken,(int)out);
5762 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
5763 if(match&&(!internal||!is_ds[(ba[i]-start)>>2])) {
5764 if(adj) {
2573466a 5765 emit_addimm(cc,-CLOCK_ADJUST(adj),cc);
57871462 5766 add_to_linker((int)out,ba[i],internal);
5767 }else{
5768 emit_addnop(13);
5769 add_to_linker((int)out,ba[i],internal*2);
5770 }
5771 emit_jmp(0);
5772 }else
5773 #endif
5774 {
2573466a 5775 if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc);
57871462 5776 store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
5777 load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
5778 if(internal)
5779 assem_debug("branch: internal\n");
5780 else
5781 assem_debug("branch: external\n");
5782 if(internal&&is_ds[(ba[i]-start)>>2]) {
5783 ds_assemble_entry(i);
5784 }
5785 else {
5786 add_to_linker((int)out,ba[i],internal);
5787 emit_jmp(0);
5788 }
5789 }
5790 set_jump_target(nottaken,(int)out);
5791 }
5792
5793 if(nottaken1) set_jump_target(nottaken1,(int)out);
5794 if(adj) {
2573466a 5795 if(!invert) emit_addimm(cc,CLOCK_ADJUST(adj),cc);
57871462 5796 }
5797 } // (!unconditional)
5798 } // if(ooo)
5799 else
5800 {
5801 // In-order execution (branch first)
5802 //if(likely[i]) printf("IOL\n");
5803 //else
5804 //printf("IOE\n");
5805 int taken=0,nottaken=0,nottaken1=0;
5806 if(!unconditional&&!nop) {
5807 if(!only32)
5808 {
5809 assert(s1h>=0);
5810 if((opcode[i]&0x2f)==4) // BEQ
5811 {
5812 if(s2h>=0) emit_cmp(s1h,s2h);
5813 else emit_test(s1h,s1h);
5814 nottaken1=(int)out;
5815 emit_jne(2);
5816 }
5817 if((opcode[i]&0x2f)==5) // BNE
5818 {
5819 if(s2h>=0) emit_cmp(s1h,s2h);
5820 else emit_test(s1h,s1h);
5821 taken=(int)out;
5822 emit_jne(1);
5823 }
5824 if((opcode[i]&0x2f)==6) // BLEZ
5825 {
5826 emit_test(s1h,s1h);
5827 taken=(int)out;
5828 emit_js(1);
5829 nottaken1=(int)out;
5830 emit_jne(2);
5831 }
5832 if((opcode[i]&0x2f)==7) // BGTZ
5833 {
5834 emit_test(s1h,s1h);
5835 nottaken1=(int)out;
5836 emit_js(2);
5837 taken=(int)out;
5838 emit_jne(1);
5839 }
5840 } // if(!only32)
5841
5842 //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
5843 assert(s1l>=0);
5844 if((opcode[i]&0x2f)==4) // BEQ
5845 {
5846 if(s2l>=0) emit_cmp(s1l,s2l);
5847 else emit_test(s1l,s1l);
5848 nottaken=(int)out;
5849 emit_jne(2);
5850 }
5851 if((opcode[i]&0x2f)==5) // BNE
5852 {
5853 if(s2l>=0) emit_cmp(s1l,s2l);
5854 else emit_test(s1l,s1l);
5855 nottaken=(int)out;
5856 emit_jeq(2);
5857 }
5858 if((opcode[i]&0x2f)==6) // BLEZ
5859 {
5860 emit_cmpimm(s1l,1);
5861 nottaken=(int)out;
5862 emit_jge(2);
5863 }
5864 if((opcode[i]&0x2f)==7) // BGTZ
5865 {
5866 emit_cmpimm(s1l,1);
5867 nottaken=(int)out;
5868 emit_jl(2);
5869 }
5870 } // if(!unconditional)
5871 int adj;
5872 uint64_t ds_unneeded=branch_regs[i].u;
5873 uint64_t ds_unneeded_upper=branch_regs[i].uu;
5874 ds_unneeded&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
5875 ds_unneeded_upper&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
5876 if((~ds_unneeded_upper>>rt1[i+1])&1) ds_unneeded_upper&=~((1LL<<dep1[i+1])|(1LL<<dep2[i+1]));
5877 ds_unneeded|=1;
5878 ds_unneeded_upper|=1;
5879 // branch taken
5880 if(!nop) {
5881 if(taken) set_jump_target(taken,(int)out);
5882 assem_debug("1:\n");
5883 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,
5884 ds_unneeded,ds_unneeded_upper);
5885 // load regs
5886 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i+1],rs2[i+1]);
5887 address_generation(i+1,&branch_regs[i],0);
5888 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,INVCP);
5889 ds_assemble(i+1,&branch_regs[i]);
5890 cc=get_reg(branch_regs[i].regmap,CCREG);
5891 if(cc==-1) {
5892 emit_loadreg(CCREG,cc=HOST_CCREG);
5893 // CHECK: Is the following instruction (fall thru) allocated ok?
5894 }
5895 assert(cc==HOST_CCREG);
5896 store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
5897 do_cc(i,i_regmap,&adj,ba[i],TAKEN,0);
5898 assem_debug("cycle count (adj)\n");
2573466a 5899 if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc);
57871462 5900 load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
5901 if(internal)
5902 assem_debug("branch: internal\n");
5903 else
5904 assem_debug("branch: external\n");
5905 if(internal&&is_ds[(ba[i]-start)>>2]) {
5906 ds_assemble_entry(i);
5907 }
5908 else {
5909 add_to_linker((int)out,ba[i],internal);
5910 emit_jmp(0);
5911 }
5912 }
5913 // branch not taken
5914 cop1_usable=prev_cop1_usable;
5915 if(!unconditional) {
5916 if(nottaken1) set_jump_target(nottaken1,(int)out);
5917 set_jump_target(nottaken,(int)out);
5918 assem_debug("2:\n");
5919 if(!likely[i]) {
5920 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,
5921 ds_unneeded,ds_unneeded_upper);
5922 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i+1],rs2[i+1]);
5923 address_generation(i+1,&branch_regs[i],0);
5924 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG);
5925 ds_assemble(i+1,&branch_regs[i]);
5926 }
5927 cc=get_reg(branch_regs[i].regmap,CCREG);
5928 if(cc==-1&&!likely[i]) {
5929 // Cycle count isn't in a register, temporarily load it then write it out
5930 emit_loadreg(CCREG,HOST_CCREG);
2573466a 5931 emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG);
57871462 5932 int jaddr=(int)out;
5933 emit_jns(0);
5934 add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0);
5935 emit_storereg(CCREG,HOST_CCREG);
5936 }
5937 else{
5938 cc=get_reg(i_regmap,CCREG);
5939 assert(cc==HOST_CCREG);
2573466a 5940 emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc);
57871462 5941 int jaddr=(int)out;
5942 emit_jns(0);
5943 add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0);
5944 }
5945 }
5946 }
5947}
5948
5949void sjump_assemble(int i,struct regstat *i_regs)
5950{
5951 signed char *i_regmap=i_regs->regmap;
5952 int cc;
5953 int match;
5954 match=match_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
5955 assem_debug("smatch=%d\n",match);
5956 int s1h,s1l;
5957 int prev_cop1_usable=cop1_usable;
5958 int unconditional=0,nevertaken=0;
5959 int only32=0;
57871462 5960 int invert=0;
5961 int internal=internal_branch(branch_regs[i].is32,ba[i]);
5962 if(i==(ba[i]-start)>>2) assem_debug("idle loop\n");
57871462 5963 if(!match) invert=1;
5964 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
5965 if(i>(ba[i]-start)>>2) invert=1;
5966 #endif
5967
5968 //if(opcode2[i]>=0x10) return; // FIXME (BxxZAL)
df894a3a 5969 //assert(opcode2[i]<0x10||rs1[i]==0); // FIXME (BxxZAL)
57871462 5970
e1190b87 5971 if(ooo[i]) {
57871462 5972 s1l=get_reg(branch_regs[i].regmap,rs1[i]);
5973 s1h=get_reg(branch_regs[i].regmap,rs1[i]|64);
5974 }
5975 else {
5976 s1l=get_reg(i_regmap,rs1[i]);
5977 s1h=get_reg(i_regmap,rs1[i]|64);
5978 }
5979 if(rs1[i]==0)
5980 {
5981 if(opcode2[i]&1) unconditional=1;
5982 else nevertaken=1;
5983 // These are never taken (r0 is never less than zero)
5984 //assert(opcode2[i]!=0);
5985 //assert(opcode2[i]!=2);
5986 //assert(opcode2[i]!=0x10);
5987 //assert(opcode2[i]!=0x12);
5988 }
5989 else {
5990 only32=(regs[i].was32>>rs1[i])&1;
5991 }
5992
e1190b87 5993 if(ooo[i]) {
57871462 5994 // Out of order execution (delay slot first)
5995 //printf("OOOE\n");
5996 address_generation(i+1,i_regs,regs[i].regmap_entry);
5997 ds_assemble(i+1,i_regs);
5998 int adj;
5999 uint64_t bc_unneeded=branch_regs[i].u;
6000 uint64_t bc_unneeded_upper=branch_regs[i].uu;
6001 bc_unneeded&=~((1LL<<rs1[i])|(1LL<<rs2[i]));
6002 bc_unneeded_upper&=~((1LL<<us1[i])|(1LL<<us2[i]));
6003 bc_unneeded|=1;
6004 bc_unneeded_upper|=1;
6005 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,
6006 bc_unneeded,bc_unneeded_upper);
6007 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i],rs1[i]);
6008 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG);
6009 if(rt1[i]==31) {
6010 int rt,return_address;
57871462 6011 rt=get_reg(branch_regs[i].regmap,31);
6012 assem_debug("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
6013 if(rt>=0) {
6014 // Save the PC even if the branch is not taken
6015 return_address=start+i*4+8;
6016 emit_movimm(return_address,rt); // PC into link register
6017 #ifdef IMM_PREFETCH
6018 if(!nevertaken) emit_prefetch(hash_table[((return_address>>16)^return_address)&0xFFFF]);
6019 #endif
6020 }
6021 }
6022 cc=get_reg(branch_regs[i].regmap,CCREG);
6023 assert(cc==HOST_CCREG);
6024 if(unconditional)
6025 store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
6026 //do_cc(i,branch_regs[i].regmap,&adj,unconditional?ba[i]:-1,unconditional);
6027 assem_debug("cycle count (adj)\n");
6028 if(unconditional) {
6029 do_cc(i,branch_regs[i].regmap,&adj,ba[i],TAKEN,0);
6030 if(i!=(ba[i]-start)>>2 || source[i+1]!=0) {
2573466a 6031 if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc);
57871462 6032 load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
6033 if(internal)
6034 assem_debug("branch: internal\n");
6035 else
6036 assem_debug("branch: external\n");
6037 if(internal&&is_ds[(ba[i]-start)>>2]) {
6038 ds_assemble_entry(i);
6039 }
6040 else {
6041 add_to_linker((int)out,ba[i],internal);
6042 emit_jmp(0);
6043 }
6044 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
6045 if(((u_int)out)&7) emit_addnop(0);
6046 #endif
6047 }
6048 }
6049 else if(nevertaken) {
2573466a 6050 emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc);
57871462 6051 int jaddr=(int)out;
6052 emit_jns(0);
6053 add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0);
6054 }
6055 else {
6056 int nottaken=0;
6057 do_cc(i,branch_regs[i].regmap,&adj,-1,0,invert);
2573466a 6058 if(adj&&!invert) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc);
57871462 6059 if(!only32)
6060 {
6061 assert(s1h>=0);
df894a3a 6062 if((opcode2[i]&0xf)==0) // BLTZ/BLTZAL
57871462 6063 {
6064 emit_test(s1h,s1h);
6065 if(invert){
6066 nottaken=(int)out;
6067 emit_jns(1);
6068 }else{
6069 add_to_linker((int)out,ba[i],internal);
6070 emit_js(0);
6071 }
6072 }
df894a3a 6073 if((opcode2[i]&0xf)==1) // BGEZ/BLTZAL
57871462 6074 {
6075 emit_test(s1h,s1h);
6076 if(invert){
6077 nottaken=(int)out;
6078 emit_js(1);
6079 }else{
6080 add_to_linker((int)out,ba[i],internal);
6081 emit_jns(0);
6082 }
6083 }
6084 } // if(!only32)
6085 else
6086 {
6087 assert(s1l>=0);
df894a3a 6088 if((opcode2[i]&0xf)==0) // BLTZ/BLTZAL
57871462 6089 {
6090 emit_test(s1l,s1l);
6091 if(invert){
6092 nottaken=(int)out;
6093 emit_jns(1);
6094 }else{
6095 add_to_linker((int)out,ba[i],internal);
6096 emit_js(0);
6097 }
6098 }
df894a3a 6099 if((opcode2[i]&0xf)==1) // BGEZ/BLTZAL
57871462 6100 {
6101 emit_test(s1l,s1l);
6102 if(invert){
6103 nottaken=(int)out;
6104 emit_js(1);
6105 }else{
6106 add_to_linker((int)out,ba[i],internal);
6107 emit_jns(0);
6108 }
6109 }
6110 } // if(!only32)
6111
6112 if(invert) {
6113 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
6114 if(match&&(!internal||!is_ds[(ba[i]-start)>>2])) {
6115 if(adj) {
2573466a 6116 emit_addimm(cc,-CLOCK_ADJUST(adj),cc);
57871462 6117 add_to_linker((int)out,ba[i],internal);
6118 }else{
6119 emit_addnop(13);
6120 add_to_linker((int)out,ba[i],internal*2);
6121 }
6122 emit_jmp(0);
6123 }else
6124 #endif
6125 {
2573466a 6126 if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc);
57871462 6127 store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
6128 load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
6129 if(internal)
6130 assem_debug("branch: internal\n");
6131 else
6132 assem_debug("branch: external\n");
6133 if(internal&&is_ds[(ba[i]-start)>>2]) {
6134 ds_assemble_entry(i);
6135 }
6136 else {
6137 add_to_linker((int)out,ba[i],internal);
6138 emit_jmp(0);
6139 }
6140 }
6141 set_jump_target(nottaken,(int)out);
6142 }
6143
6144 if(adj) {
2573466a 6145 if(!invert) emit_addimm(cc,CLOCK_ADJUST(adj),cc);
57871462 6146 }
6147 } // (!unconditional)
6148 } // if(ooo)
6149 else
6150 {
6151 // In-order execution (branch first)
6152 //printf("IOE\n");
6153 int nottaken=0;
a6491170 6154 if(rt1[i]==31) {
6155 int rt,return_address;
a6491170 6156 rt=get_reg(branch_regs[i].regmap,31);
6157 if(rt>=0) {
6158 // Save the PC even if the branch is not taken
6159 return_address=start+i*4+8;
6160 emit_movimm(return_address,rt); // PC into link register
6161 #ifdef IMM_PREFETCH
6162 emit_prefetch(hash_table[((return_address>>16)^return_address)&0xFFFF]);
6163 #endif
6164 }
6165 }
57871462 6166 if(!unconditional) {
6167 //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
6168 if(!only32)
6169 {
6170 assert(s1h>=0);
a6491170 6171 if((opcode2[i]&0x0d)==0) // BLTZ/BLTZL/BLTZAL/BLTZALL
57871462 6172 {
6173 emit_test(s1h,s1h);
6174 nottaken=(int)out;
6175 emit_jns(1);
6176 }
a6491170 6177 if((opcode2[i]&0x0d)==1) // BGEZ/BGEZL/BGEZAL/BGEZALL
57871462 6178 {
6179 emit_test(s1h,s1h);
6180 nottaken=(int)out;
6181 emit_js(1);
6182 }
6183 } // if(!only32)
6184 else
6185 {
6186 assert(s1l>=0);
a6491170 6187 if((opcode2[i]&0x0d)==0) // BLTZ/BLTZL/BLTZAL/BLTZALL
57871462 6188 {
6189 emit_test(s1l,s1l);
6190 nottaken=(int)out;
6191 emit_jns(1);
6192 }
a6491170 6193 if((opcode2[i]&0x0d)==1) // BGEZ/BGEZL/BGEZAL/BGEZALL
57871462 6194 {
6195 emit_test(s1l,s1l);
6196 nottaken=(int)out;
6197 emit_js(1);
6198 }
6199 }
6200 } // if(!unconditional)
6201 int adj;
6202 uint64_t ds_unneeded=branch_regs[i].u;
6203 uint64_t ds_unneeded_upper=branch_regs[i].uu;
6204 ds_unneeded&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
6205 ds_unneeded_upper&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
6206 if((~ds_unneeded_upper>>rt1[i+1])&1) ds_unneeded_upper&=~((1LL<<dep1[i+1])|(1LL<<dep2[i+1]));
6207 ds_unneeded|=1;
6208 ds_unneeded_upper|=1;
6209 // branch taken
6210 if(!nevertaken) {
6211 //assem_debug("1:\n");
6212 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,
6213 ds_unneeded,ds_unneeded_upper);
6214 // load regs
6215 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i+1],rs2[i+1]);
6216 address_generation(i+1,&branch_regs[i],0);
6217 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,INVCP);
6218 ds_assemble(i+1,&branch_regs[i]);
6219 cc=get_reg(branch_regs[i].regmap,CCREG);
6220 if(cc==-1) {
6221 emit_loadreg(CCREG,cc=HOST_CCREG);
6222 // CHECK: Is the following instruction (fall thru) allocated ok?
6223 }
6224 assert(cc==HOST_CCREG);
6225 store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
6226 do_cc(i,i_regmap,&adj,ba[i],TAKEN,0);
6227 assem_debug("cycle count (adj)\n");
2573466a 6228 if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc);
57871462 6229 load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
6230 if(internal)
6231 assem_debug("branch: internal\n");
6232 else
6233 assem_debug("branch: external\n");
6234 if(internal&&is_ds[(ba[i]-start)>>2]) {
6235 ds_assemble_entry(i);
6236 }
6237 else {
6238 add_to_linker((int)out,ba[i],internal);
6239 emit_jmp(0);
6240 }
6241 }
6242 // branch not taken
6243 cop1_usable=prev_cop1_usable;
6244 if(!unconditional) {
6245 set_jump_target(nottaken,(int)out);
6246 assem_debug("1:\n");
6247 if(!likely[i]) {
6248 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,
6249 ds_unneeded,ds_unneeded_upper);
6250 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i+1],rs2[i+1]);
6251 address_generation(i+1,&branch_regs[i],0);
6252 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG);
6253 ds_assemble(i+1,&branch_regs[i]);
6254 }
6255 cc=get_reg(branch_regs[i].regmap,CCREG);
6256 if(cc==-1&&!likely[i]) {
6257 // Cycle count isn't in a register, temporarily load it then write it out
6258 emit_loadreg(CCREG,HOST_CCREG);
2573466a 6259 emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG);
57871462 6260 int jaddr=(int)out;
6261 emit_jns(0);
6262 add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0);
6263 emit_storereg(CCREG,HOST_CCREG);
6264 }
6265 else{
6266 cc=get_reg(i_regmap,CCREG);
6267 assert(cc==HOST_CCREG);
2573466a 6268 emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc);
57871462 6269 int jaddr=(int)out;
6270 emit_jns(0);
6271 add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0);
6272 }
6273 }
6274 }
6275}
6276
6277void fjump_assemble(int i,struct regstat *i_regs)
6278{
6279 signed char *i_regmap=i_regs->regmap;
6280 int cc;
6281 int match;
6282 match=match_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
6283 assem_debug("fmatch=%d\n",match);
6284 int fs,cs;
6285 int eaddr;
57871462 6286 int invert=0;
6287 int internal=internal_branch(branch_regs[i].is32,ba[i]);
6288 if(i==(ba[i]-start)>>2) assem_debug("idle loop\n");
57871462 6289 if(!match) invert=1;
6290 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
6291 if(i>(ba[i]-start)>>2) invert=1;
6292 #endif
6293
e1190b87 6294 if(ooo[i]) {
57871462 6295 fs=get_reg(branch_regs[i].regmap,FSREG);
6296 address_generation(i+1,i_regs,regs[i].regmap_entry); // Is this okay?
6297 }
6298 else {
6299 fs=get_reg(i_regmap,FSREG);
6300 }
6301
6302 // Check cop1 unusable
6303 if(!cop1_usable) {
6304 cs=get_reg(i_regmap,CSREG);
6305 assert(cs>=0);
6306 emit_testimm(cs,0x20000000);
6307 eaddr=(int)out;
6308 emit_jeq(0);
6309 add_stub(FP_STUB,eaddr,(int)out,i,cs,(int)i_regs,0,0);
6310 cop1_usable=1;
6311 }
6312
e1190b87 6313 if(ooo[i]) {
57871462 6314 // Out of order execution (delay slot first)
6315 //printf("OOOE\n");
6316 ds_assemble(i+1,i_regs);
6317 int adj;
6318 uint64_t bc_unneeded=branch_regs[i].u;
6319 uint64_t bc_unneeded_upper=branch_regs[i].uu;
6320 bc_unneeded&=~((1LL<<rs1[i])|(1LL<<rs2[i]));
6321 bc_unneeded_upper&=~((1LL<<us1[i])|(1LL<<us2[i]));
6322 bc_unneeded|=1;
6323 bc_unneeded_upper|=1;
6324 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,
6325 bc_unneeded,bc_unneeded_upper);
6326 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i],rs1[i]);
6327 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG);
6328 cc=get_reg(branch_regs[i].regmap,CCREG);
6329 assert(cc==HOST_CCREG);
6330 do_cc(i,branch_regs[i].regmap,&adj,-1,0,invert);
6331 assem_debug("cycle count (adj)\n");
6332 if(1) {
6333 int nottaken=0;
2573466a 6334 if(adj&&!invert) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc);
57871462 6335 if(1) {
6336 assert(fs>=0);
6337 emit_testimm(fs,0x800000);
6338 if(source[i]&0x10000) // BC1T
6339 {
6340 if(invert){
6341 nottaken=(int)out;
6342 emit_jeq(1);
6343 }else{
6344 add_to_linker((int)out,ba[i],internal);
6345 emit_jne(0);
6346 }
6347 }
6348 else // BC1F
6349 if(invert){
6350 nottaken=(int)out;
6351 emit_jne(1);
6352 }else{
6353 add_to_linker((int)out,ba[i],internal);
6354 emit_jeq(0);
6355 }
6356 {
6357 }
6358 } // if(!only32)
6359
6360 if(invert) {
2573466a 6361 if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc);
57871462 6362 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
6363 else if(match) emit_addnop(13);
6364 #endif
6365 store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
6366 load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
6367 if(internal)
6368 assem_debug("branch: internal\n");
6369 else
6370 assem_debug("branch: external\n");
6371 if(internal&&is_ds[(ba[i]-start)>>2]) {
6372 ds_assemble_entry(i);
6373 }
6374 else {
6375 add_to_linker((int)out,ba[i],internal);
6376 emit_jmp(0);
6377 }
6378 set_jump_target(nottaken,(int)out);
6379 }
6380
6381 if(adj) {
2573466a 6382 if(!invert) emit_addimm(cc,CLOCK_ADJUST(adj),cc);
57871462 6383 }
6384 } // (!unconditional)
6385 } // if(ooo)
6386 else
6387 {
6388 // In-order execution (branch first)
6389 //printf("IOE\n");
6390 int nottaken=0;
6391 if(1) {
6392 //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
6393 if(1) {
6394 assert(fs>=0);
6395 emit_testimm(fs,0x800000);
6396 if(source[i]&0x10000) // BC1T
6397 {
6398 nottaken=(int)out;
6399 emit_jeq(1);
6400 }
6401 else // BC1F
6402 {
6403 nottaken=(int)out;
6404 emit_jne(1);
6405 }
6406 }
6407 } // if(!unconditional)
6408 int adj;
6409 uint64_t ds_unneeded=branch_regs[i].u;
6410 uint64_t ds_unneeded_upper=branch_regs[i].uu;
6411 ds_unneeded&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
6412 ds_unneeded_upper&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
6413 if((~ds_unneeded_upper>>rt1[i+1])&1) ds_unneeded_upper&=~((1LL<<dep1[i+1])|(1LL<<dep2[i+1]));
6414 ds_unneeded|=1;
6415 ds_unneeded_upper|=1;
6416 // branch taken
6417 //assem_debug("1:\n");
6418 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,
6419 ds_unneeded,ds_unneeded_upper);
6420 // load regs
6421 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i+1],rs2[i+1]);
6422 address_generation(i+1,&branch_regs[i],0);
6423 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,INVCP);
6424 ds_assemble(i+1,&branch_regs[i]);
6425 cc=get_reg(branch_regs[i].regmap,CCREG);
6426 if(cc==-1) {
6427 emit_loadreg(CCREG,cc=HOST_CCREG);
6428 // CHECK: Is the following instruction (fall thru) allocated ok?
6429 }
6430 assert(cc==HOST_CCREG);
6431 store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
6432 do_cc(i,i_regmap,&adj,ba[i],TAKEN,0);
6433 assem_debug("cycle count (adj)\n");
2573466a 6434 if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc);
57871462 6435 load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
6436 if(internal)
6437 assem_debug("branch: internal\n");
6438 else
6439 assem_debug("branch: external\n");
6440 if(internal&&is_ds[(ba[i]-start)>>2]) {
6441 ds_assemble_entry(i);
6442 }
6443 else {
6444 add_to_linker((int)out,ba[i],internal);
6445 emit_jmp(0);
6446 }
6447
6448 // branch not taken
6449 if(1) { // <- FIXME (don't need this)
6450 set_jump_target(nottaken,(int)out);
6451 assem_debug("1:\n");
6452 if(!likely[i]) {
6453 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,
6454 ds_unneeded,ds_unneeded_upper);
6455 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i+1],rs2[i+1]);
6456 address_generation(i+1,&branch_regs[i],0);
6457 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG);
6458 ds_assemble(i+1,&branch_regs[i]);
6459 }
6460 cc=get_reg(branch_regs[i].regmap,CCREG);
6461 if(cc==-1&&!likely[i]) {
6462 // Cycle count isn't in a register, temporarily load it then write it out
6463 emit_loadreg(CCREG,HOST_CCREG);
2573466a 6464 emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG);
57871462 6465 int jaddr=(int)out;
6466 emit_jns(0);
6467 add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0);
6468 emit_storereg(CCREG,HOST_CCREG);
6469 }
6470 else{
6471 cc=get_reg(i_regmap,CCREG);
6472 assert(cc==HOST_CCREG);
2573466a 6473 emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc);
57871462 6474 int jaddr=(int)out;
6475 emit_jns(0);
6476 add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0);
6477 }
6478 }
6479 }
6480}
6481
6482static void pagespan_assemble(int i,struct regstat *i_regs)
6483{
6484 int s1l=get_reg(i_regs->regmap,rs1[i]);
6485 int s1h=get_reg(i_regs->regmap,rs1[i]|64);
6486 int s2l=get_reg(i_regs->regmap,rs2[i]);
6487 int s2h=get_reg(i_regs->regmap,rs2[i]|64);
6488 void *nt_branch=NULL;
6489 int taken=0;
6490 int nottaken=0;
6491 int unconditional=0;
6492 if(rs1[i]==0)
6493 {
6494 s1l=s2l;s1h=s2h;
6495 s2l=s2h=-1;
6496 }
6497 else if(rs2[i]==0)
6498 {
6499 s2l=s2h=-1;
6500 }
6501 if((i_regs->is32>>rs1[i])&(i_regs->is32>>rs2[i])&1) {
6502 s1h=s2h=-1;
6503 }
6504 int hr=0;
6505 int addr,alt,ntaddr;
6506 if(i_regs->regmap[HOST_BTREG]<0) {addr=HOST_BTREG;}
6507 else {
6508 while(hr<HOST_REGS)
6509 {
6510 if(hr!=EXCLUDE_REG && hr!=HOST_CCREG &&
6511 (i_regs->regmap[hr]&63)!=rs1[i] &&
6512 (i_regs->regmap[hr]&63)!=rs2[i] )
6513 {
6514 addr=hr++;break;
6515 }
6516 hr++;
6517 }
6518 }
6519 while(hr<HOST_REGS)
6520 {
6521 if(hr!=EXCLUDE_REG && hr!=HOST_CCREG && hr!=HOST_BTREG &&
6522 (i_regs->regmap[hr]&63)!=rs1[i] &&
6523 (i_regs->regmap[hr]&63)!=rs2[i] )
6524 {
6525 alt=hr++;break;
6526 }
6527 hr++;
6528 }
6529 if((opcode[i]&0x2E)==6) // BLEZ/BGTZ needs another register
6530 {
6531 while(hr<HOST_REGS)
6532 {
6533 if(hr!=EXCLUDE_REG && hr!=HOST_CCREG && hr!=HOST_BTREG &&
6534 (i_regs->regmap[hr]&63)!=rs1[i] &&
6535 (i_regs->regmap[hr]&63)!=rs2[i] )
6536 {
6537 ntaddr=hr;break;
6538 }
6539 hr++;
6540 }
6541 }
6542 assert(hr<HOST_REGS);
6543 if((opcode[i]&0x2e)==4||opcode[i]==0x11) { // BEQ/BNE/BEQL/BNEL/BC1
6544 load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,CCREG,CCREG);
6545 }
2573466a 6546 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG);
57871462 6547 if(opcode[i]==2) // J
6548 {
6549 unconditional=1;
6550 }
6551 if(opcode[i]==3) // JAL
6552 {
6553 // TODO: mini_ht
6554 int rt=get_reg(i_regs->regmap,31);
6555 emit_movimm(start+i*4+8,rt);
6556 unconditional=1;
6557 }
6558 if(opcode[i]==0&&(opcode2[i]&0x3E)==8) // JR/JALR
6559 {
6560 emit_mov(s1l,addr);
6561 if(opcode2[i]==9) // JALR
6562 {
5067f341 6563 int rt=get_reg(i_regs->regmap,rt1[i]);
57871462 6564 emit_movimm(start+i*4+8,rt);
6565 }
6566 }
6567 if((opcode[i]&0x3f)==4) // BEQ
6568 {
6569 if(rs1[i]==rs2[i])
6570 {
6571 unconditional=1;
6572 }
6573 else
6574 #ifdef HAVE_CMOV_IMM
6575 if(s1h<0) {
6576 if(s2l>=0) emit_cmp(s1l,s2l);
6577 else emit_test(s1l,s1l);
6578 emit_cmov2imm_e_ne_compact(ba[i],start+i*4+8,addr);
6579 }
6580 else
6581 #endif
6582 {
6583 assert(s1l>=0);
6584 emit_mov2imm_compact(ba[i],addr,start+i*4+8,alt);
6585 if(s1h>=0) {
6586 if(s2h>=0) emit_cmp(s1h,s2h);
6587 else emit_test(s1h,s1h);
6588 emit_cmovne_reg(alt,addr);
6589 }
6590 if(s2l>=0) emit_cmp(s1l,s2l);
6591 else emit_test(s1l,s1l);
6592 emit_cmovne_reg(alt,addr);
6593 }
6594 }
6595 if((opcode[i]&0x3f)==5) // BNE
6596 {
6597 #ifdef HAVE_CMOV_IMM
6598 if(s1h<0) {
6599 if(s2l>=0) emit_cmp(s1l,s2l);
6600 else emit_test(s1l,s1l);
6601 emit_cmov2imm_e_ne_compact(start+i*4+8,ba[i],addr);
6602 }
6603 else
6604 #endif
6605 {
6606 assert(s1l>=0);
6607 emit_mov2imm_compact(start+i*4+8,addr,ba[i],alt);
6608 if(s1h>=0) {
6609 if(s2h>=0) emit_cmp(s1h,s2h);
6610 else emit_test(s1h,s1h);
6611 emit_cmovne_reg(alt,addr);
6612 }
6613 if(s2l>=0) emit_cmp(s1l,s2l);
6614 else emit_test(s1l,s1l);
6615 emit_cmovne_reg(alt,addr);
6616 }
6617 }
6618 if((opcode[i]&0x3f)==0x14) // BEQL
6619 {
6620 if(s1h>=0) {
6621 if(s2h>=0) emit_cmp(s1h,s2h);
6622 else emit_test(s1h,s1h);
6623 nottaken=(int)out;
6624 emit_jne(0);
6625 }
6626 if(s2l>=0) emit_cmp(s1l,s2l);
6627 else emit_test(s1l,s1l);
6628 if(nottaken) set_jump_target(nottaken,(int)out);
6629 nottaken=(int)out;
6630 emit_jne(0);
6631 }
6632 if((opcode[i]&0x3f)==0x15) // BNEL
6633 {
6634 if(s1h>=0) {
6635 if(s2h>=0) emit_cmp(s1h,s2h);
6636 else emit_test(s1h,s1h);
6637 taken=(int)out;
6638 emit_jne(0);
6639 }
6640 if(s2l>=0) emit_cmp(s1l,s2l);
6641 else emit_test(s1l,s1l);
6642 nottaken=(int)out;
6643 emit_jeq(0);
6644 if(taken) set_jump_target(taken,(int)out);
6645 }
6646 if((opcode[i]&0x3f)==6) // BLEZ
6647 {
6648 emit_mov2imm_compact(ba[i],alt,start+i*4+8,addr);
6649 emit_cmpimm(s1l,1);
6650 if(s1h>=0) emit_mov(addr,ntaddr);
6651 emit_cmovl_reg(alt,addr);
6652 if(s1h>=0) {
6653 emit_test(s1h,s1h);
6654 emit_cmovne_reg(ntaddr,addr);
6655 emit_cmovs_reg(alt,addr);
6656 }
6657 }
6658 if((opcode[i]&0x3f)==7) // BGTZ
6659 {
6660 emit_mov2imm_compact(ba[i],addr,start+i*4+8,ntaddr);
6661 emit_cmpimm(s1l,1);
6662 if(s1h>=0) emit_mov(addr,alt);
6663 emit_cmovl_reg(ntaddr,addr);
6664 if(s1h>=0) {
6665 emit_test(s1h,s1h);
6666 emit_cmovne_reg(alt,addr);
6667 emit_cmovs_reg(ntaddr,addr);
6668 }
6669 }
6670 if((opcode[i]&0x3f)==0x16) // BLEZL
6671 {
6672 assert((opcode[i]&0x3f)!=0x16);
6673 }
6674 if((opcode[i]&0x3f)==0x17) // BGTZL
6675 {
6676 assert((opcode[i]&0x3f)!=0x17);
6677 }
6678 assert(opcode[i]!=1); // BLTZ/BGEZ
6679
6680 //FIXME: Check CSREG
6681 if(opcode[i]==0x11 && opcode2[i]==0x08 ) {
6682 if((source[i]&0x30000)==0) // BC1F
6683 {
6684 emit_mov2imm_compact(ba[i],addr,start+i*4+8,alt);
6685 emit_testimm(s1l,0x800000);
6686 emit_cmovne_reg(alt,addr);
6687 }
6688 if((source[i]&0x30000)==0x10000) // BC1T
6689 {
6690 emit_mov2imm_compact(ba[i],alt,start+i*4+8,addr);
6691 emit_testimm(s1l,0x800000);
6692 emit_cmovne_reg(alt,addr);
6693 }
6694 if((source[i]&0x30000)==0x20000) // BC1FL
6695 {
6696 emit_testimm(s1l,0x800000);
6697 nottaken=(int)out;
6698 emit_jne(0);
6699 }
6700 if((source[i]&0x30000)==0x30000) // BC1TL
6701 {
6702 emit_testimm(s1l,0x800000);
6703 nottaken=(int)out;
6704 emit_jeq(0);
6705 }
6706 }
6707
6708 assert(i_regs->regmap[HOST_CCREG]==CCREG);
6709 wb_dirtys(regs[i].regmap,regs[i].is32,regs[i].dirty);
6710 if(likely[i]||unconditional)
6711 {
6712 emit_movimm(ba[i],HOST_BTREG);
6713 }
6714 else if(addr!=HOST_BTREG)
6715 {
6716 emit_mov(addr,HOST_BTREG);
6717 }
6718 void *branch_addr=out;
6719 emit_jmp(0);
6720 int target_addr=start+i*4+5;
6721 void *stub=out;
6722 void *compiled_target_addr=check_addr(target_addr);
6723 emit_extjump_ds((int)branch_addr,target_addr);
6724 if(compiled_target_addr) {
6725 set_jump_target((int)branch_addr,(int)compiled_target_addr);
6726 add_link(target_addr,stub);
6727 }
6728 else set_jump_target((int)branch_addr,(int)stub);
6729 if(likely[i]) {
6730 // Not-taken path
6731 set_jump_target((int)nottaken,(int)out);
6732 wb_dirtys(regs[i].regmap,regs[i].is32,regs[i].dirty);
6733 void *branch_addr=out;
6734 emit_jmp(0);
6735 int target_addr=start+i*4+8;
6736 void *stub=out;
6737 void *compiled_target_addr=check_addr(target_addr);
6738 emit_extjump_ds((int)branch_addr,target_addr);
6739 if(compiled_target_addr) {
6740 set_jump_target((int)branch_addr,(int)compiled_target_addr);
6741 add_link(target_addr,stub);
6742 }
6743 else set_jump_target((int)branch_addr,(int)stub);
6744 }
6745}
6746
6747// Assemble the delay slot for the above
6748static void pagespan_ds()
6749{
6750 assem_debug("initial delay slot:\n");
6751 u_int vaddr=start+1;
94d23bb9 6752 u_int page=get_page(vaddr);
6753 u_int vpage=get_vpage(vaddr);
57871462 6754 ll_add(jump_dirty+vpage,vaddr,(void *)out);
6755 do_dirty_stub_ds();
6756 ll_add(jump_in+page,vaddr,(void *)out);
6757 assert(regs[0].regmap_entry[HOST_CCREG]==CCREG);
6758 if(regs[0].regmap[HOST_CCREG]!=CCREG)
6759 wb_register(CCREG,regs[0].regmap_entry,regs[0].wasdirty,regs[0].was32);
6760 if(regs[0].regmap[HOST_BTREG]!=BTREG)
6761 emit_writeword(HOST_BTREG,(int)&branch_target);
6762 load_regs(regs[0].regmap_entry,regs[0].regmap,regs[0].was32,rs1[0],rs2[0]);
6763 address_generation(0,&regs[0],regs[0].regmap_entry);
b9b61529 6764 if(itype[0]==STORE||itype[0]==STORELR||(opcode[0]&0x3b)==0x39||(opcode[0]&0x3b)==0x3a)
57871462 6765 load_regs(regs[0].regmap_entry,regs[0].regmap,regs[0].was32,INVCP,INVCP);
6766 cop1_usable=0;
6767 is_delayslot=0;
6768 switch(itype[0]) {
6769 case ALU:
6770 alu_assemble(0,&regs[0]);break;
6771 case IMM16:
6772 imm16_assemble(0,&regs[0]);break;
6773 case SHIFT:
6774 shift_assemble(0,&regs[0]);break;
6775 case SHIFTIMM:
6776 shiftimm_assemble(0,&regs[0]);break;
6777 case LOAD:
6778 load_assemble(0,&regs[0]);break;
6779 case LOADLR:
6780 loadlr_assemble(0,&regs[0]);break;
6781 case STORE:
6782 store_assemble(0,&regs[0]);break;
6783 case STORELR:
6784 storelr_assemble(0,&regs[0]);break;
6785 case COP0:
6786 cop0_assemble(0,&regs[0]);break;
6787 case COP1:
6788 cop1_assemble(0,&regs[0]);break;
6789 case C1LS:
6790 c1ls_assemble(0,&regs[0]);break;
b9b61529 6791 case COP2:
6792 cop2_assemble(0,&regs[0]);break;
6793 case C2LS:
6794 c2ls_assemble(0,&regs[0]);break;
6795 case C2OP:
6796 c2op_assemble(0,&regs[0]);break;
57871462 6797 case FCONV:
6798 fconv_assemble(0,&regs[0]);break;
6799 case FLOAT:
6800 float_assemble(0,&regs[0]);break;
6801 case FCOMP:
6802 fcomp_assemble(0,&regs[0]);break;
6803 case MULTDIV:
6804 multdiv_assemble(0,&regs[0]);break;
6805 case MOV:
6806 mov_assemble(0,&regs[0]);break;
6807 case SYSCALL:
7139f3c8 6808 case HLECALL:
1e973cb0 6809 case INTCALL:
57871462 6810 case SPAN:
6811 case UJUMP:
6812 case RJUMP:
6813 case CJUMP:
6814 case SJUMP:
6815 case FJUMP:
c43b5311 6816 SysPrintf("Jump in the delay slot. This is probably a bug.\n");
57871462 6817 }
6818 int btaddr=get_reg(regs[0].regmap,BTREG);
6819 if(btaddr<0) {
6820 btaddr=get_reg(regs[0].regmap,-1);
6821 emit_readword((int)&branch_target,btaddr);
6822 }
6823 assert(btaddr!=HOST_CCREG);
6824 if(regs[0].regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
6825#ifdef HOST_IMM8
6826 emit_movimm(start+4,HOST_TEMPREG);
6827 emit_cmp(btaddr,HOST_TEMPREG);
6828#else
6829 emit_cmpimm(btaddr,start+4);
6830#endif
6831 int branch=(int)out;
6832 emit_jeq(0);
6833 store_regs_bt(regs[0].regmap,regs[0].is32,regs[0].dirty,-1);
6834 emit_jmp(jump_vaddr_reg[btaddr]);
6835 set_jump_target(branch,(int)out);
6836 store_regs_bt(regs[0].regmap,regs[0].is32,regs[0].dirty,start+4);
6837 load_regs_bt(regs[0].regmap,regs[0].is32,regs[0].dirty,start+4);
6838}
6839
6840// Basic liveness analysis for MIPS registers
6841void unneeded_registers(int istart,int iend,int r)
6842{
6843 int i;
bedfea38 6844 uint64_t u,uu,gte_u,b,bu,gte_bu;
0ff8c62c 6845 uint64_t temp_u,temp_uu,temp_gte_u=0;
57871462 6846 uint64_t tdep;
0ff8c62c 6847 uint64_t gte_u_unknown=0;
6848 if(new_dynarec_hacks&NDHACK_GTE_UNNEEDED)
6849 gte_u_unknown=~0ll;
57871462 6850 if(iend==slen-1) {
6851 u=1;uu=1;
0ff8c62c 6852 gte_u=gte_u_unknown;
57871462 6853 }else{
6854 u=unneeded_reg[iend+1];
6855 uu=unneeded_reg_upper[iend+1];
6856 u=1;uu=1;
0ff8c62c 6857 gte_u=gte_unneeded[iend+1];
57871462 6858 }
bedfea38 6859
57871462 6860 for (i=iend;i>=istart;i--)
6861 {
6862 //printf("unneeded registers i=%d (%d,%d) r=%d\n",i,istart,iend,r);
6863 if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
6864 {
6865 // If subroutine call, flag return address as a possible branch target
6866 if(rt1[i]==31 && i<slen-2) bt[i+2]=1;
6867
6868 if(ba[i]<start || ba[i]>=(start+slen*4))
6869 {
6870 // Branch out of this block, flush all regs
6871 u=1;
6872 uu=1;
0ff8c62c 6873 gte_u=gte_u_unknown;
57871462 6874 /* Hexagon hack
6875 if(itype[i]==UJUMP&&rt1[i]==31)
6876 {
6877 uu=u=0x300C00F; // Discard at, v0-v1, t6-t9
6878 }
6879 if(itype[i]==RJUMP&&rs1[i]==31)
6880 {
6881 uu=u=0x300C0F3; // Discard at, a0-a3, t6-t9
6882 }
4cb76aa4 6883 if(start>0x80000400&&start<0x80000000+RAM_SIZE) {
57871462 6884 if(itype[i]==UJUMP&&rt1[i]==31)
6885 {
6886 //uu=u=0x30300FF0FLL; // Discard at, v0-v1, t0-t9, lo, hi
6887 uu=u=0x300FF0F; // Discard at, v0-v1, t0-t9
6888 }
6889 if(itype[i]==RJUMP&&rs1[i]==31)
6890 {
6891 //uu=u=0x30300FFF3LL; // Discard at, a0-a3, t0-t9, lo, hi
6892 uu=u=0x300FFF3; // Discard at, a0-a3, t0-t9
6893 }
6894 }*/
6895 branch_unneeded_reg[i]=u;
6896 branch_unneeded_reg_upper[i]=uu;
6897 // Merge in delay slot
6898 tdep=(~uu>>rt1[i+1])&1;
6899 u|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
6900 uu|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
6901 u&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
6902 uu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
6903 uu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
6904 u|=1;uu|=1;
bedfea38 6905 gte_u|=gte_rt[i+1];
6906 gte_u&=~gte_rs[i+1];
57871462 6907 // If branch is "likely" (and conditional)
6908 // then we skip the delay slot on the fall-thru path
6909 if(likely[i]) {
6910 if(i<slen-1) {
6911 u&=unneeded_reg[i+2];
6912 uu&=unneeded_reg_upper[i+2];
bedfea38 6913 gte_u&=gte_unneeded[i+2];
57871462 6914 }
6915 else
6916 {
6917 u=1;
6918 uu=1;
0ff8c62c 6919 gte_u=gte_u_unknown;
57871462 6920 }
6921 }
6922 }
6923 else
6924 {
6925 // Internal branch, flag target
6926 bt[(ba[i]-start)>>2]=1;
6927 if(ba[i]<=start+i*4) {
6928 // Backward branch
6929 if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000)
6930 {
6931 // Unconditional branch
6932 temp_u=1;temp_uu=1;
bedfea38 6933 temp_gte_u=0;
57871462 6934 } else {
6935 // Conditional branch (not taken case)
6936 temp_u=unneeded_reg[i+2];
6937 temp_uu=unneeded_reg_upper[i+2];
bedfea38 6938 temp_gte_u&=gte_unneeded[i+2];
57871462 6939 }
6940 // Merge in delay slot
6941 tdep=(~temp_uu>>rt1[i+1])&1;
6942 temp_u|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
6943 temp_uu|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
6944 temp_u&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
6945 temp_uu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
6946 temp_uu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
6947 temp_u|=1;temp_uu|=1;
bedfea38 6948 temp_gte_u|=gte_rt[i+1];
6949 temp_gte_u&=~gte_rs[i+1];
57871462 6950 // If branch is "likely" (and conditional)
6951 // then we skip the delay slot on the fall-thru path
6952 if(likely[i]) {
6953 if(i<slen-1) {
6954 temp_u&=unneeded_reg[i+2];
6955 temp_uu&=unneeded_reg_upper[i+2];
bedfea38 6956 temp_gte_u&=gte_unneeded[i+2];
57871462 6957 }
6958 else
6959 {
6960 temp_u=1;
6961 temp_uu=1;
0ff8c62c 6962 temp_gte_u=gte_u_unknown;
57871462 6963 }
6964 }
6965 tdep=(~temp_uu>>rt1[i])&1;
6966 temp_u|=(1LL<<rt1[i])|(1LL<<rt2[i]);
6967 temp_uu|=(1LL<<rt1[i])|(1LL<<rt2[i]);
6968 temp_u&=~((1LL<<rs1[i])|(1LL<<rs2[i]));
6969 temp_uu&=~((1LL<<us1[i])|(1LL<<us2[i]));
6970 temp_uu&=~((tdep<<dep1[i])|(tdep<<dep2[i]));
6971 temp_u|=1;temp_uu|=1;
bedfea38 6972 temp_gte_u|=gte_rt[i];
6973 temp_gte_u&=~gte_rs[i];
57871462 6974 unneeded_reg[i]=temp_u;
6975 unneeded_reg_upper[i]=temp_uu;
bedfea38 6976 gte_unneeded[i]=temp_gte_u;
57871462 6977 // Only go three levels deep. This recursion can take an
6978 // excessive amount of time if there are a lot of nested loops.
6979 if(r<2) {
6980 unneeded_registers((ba[i]-start)>>2,i-1,r+1);
6981 }else{
6982 unneeded_reg[(ba[i]-start)>>2]=1;
6983 unneeded_reg_upper[(ba[i]-start)>>2]=1;
0ff8c62c 6984 gte_unneeded[(ba[i]-start)>>2]=gte_u_unknown;
57871462 6985 }
6986 } /*else*/ if(1) {
6987 if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000)
6988 {
6989 // Unconditional branch
6990 u=unneeded_reg[(ba[i]-start)>>2];
6991 uu=unneeded_reg_upper[(ba[i]-start)>>2];
bedfea38 6992 gte_u=gte_unneeded[(ba[i]-start)>>2];
57871462 6993 branch_unneeded_reg[i]=u;
6994 branch_unneeded_reg_upper[i]=uu;
6995 //u=1;
6996 //uu=1;
6997 //branch_unneeded_reg[i]=u;
6998 //branch_unneeded_reg_upper[i]=uu;
6999 // Merge in delay slot
7000 tdep=(~uu>>rt1[i+1])&1;
7001 u|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
7002 uu|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
7003 u&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
7004 uu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
7005 uu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
7006 u|=1;uu|=1;
bedfea38 7007 gte_u|=gte_rt[i+1];
7008 gte_u&=~gte_rs[i+1];
57871462 7009 } else {
7010 // Conditional branch
7011 b=unneeded_reg[(ba[i]-start)>>2];
7012 bu=unneeded_reg_upper[(ba[i]-start)>>2];
bedfea38 7013 gte_bu=gte_unneeded[(ba[i]-start)>>2];
57871462 7014 branch_unneeded_reg[i]=b;
7015 branch_unneeded_reg_upper[i]=bu;
7016 //b=1;
7017 //bu=1;
7018 //branch_unneeded_reg[i]=b;
7019 //branch_unneeded_reg_upper[i]=bu;
7020 // Branch delay slot
7021 tdep=(~uu>>rt1[i+1])&1;
7022 b|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
7023 bu|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
7024 b&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
7025 bu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
7026 bu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
7027 b|=1;bu|=1;
bedfea38 7028 gte_bu|=gte_rt[i+1];
7029 gte_bu&=~gte_rs[i+1];
57871462 7030 // If branch is "likely" then we skip the
7031 // delay slot on the fall-thru path
7032 if(likely[i]) {
7033 u=b;
7034 uu=bu;
bedfea38 7035 gte_u=gte_bu;
57871462 7036 if(i<slen-1) {
7037 u&=unneeded_reg[i+2];
7038 uu&=unneeded_reg_upper[i+2];
bedfea38 7039 gte_u&=gte_unneeded[i+2];
57871462 7040 //u=1;
7041 //uu=1;
7042 }
7043 } else {
7044 u&=b;
7045 uu&=bu;
bedfea38 7046 gte_u&=gte_bu;
57871462 7047 //u=1;
7048 //uu=1;
7049 }
7050 if(i<slen-1) {
7051 branch_unneeded_reg[i]&=unneeded_reg[i+2];
7052 branch_unneeded_reg_upper[i]&=unneeded_reg_upper[i+2];
7053 //branch_unneeded_reg[i]=1;
7054 //branch_unneeded_reg_upper[i]=1;
7055 } else {
7056 branch_unneeded_reg[i]=1;
7057 branch_unneeded_reg_upper[i]=1;
7058 }
7059 }
7060 }
7061 }
7062 }
1e973cb0 7063 else if(itype[i]==SYSCALL||itype[i]==HLECALL||itype[i]==INTCALL)
57871462 7064 {
7065 // SYSCALL instruction (software interrupt)
7066 u=1;
7067 uu=1;
7068 }
7069 else if(itype[i]==COP0 && (source[i]&0x3f)==0x18)
7070 {
7071 // ERET instruction (return from interrupt)
7072 u=1;
7073 uu=1;
7074 }
7075 //u=uu=1; // DEBUG
7076 tdep=(~uu>>rt1[i])&1;
7077 // Written registers are unneeded
7078 u|=1LL<<rt1[i];
7079 u|=1LL<<rt2[i];
7080 uu|=1LL<<rt1[i];
7081 uu|=1LL<<rt2[i];
bedfea38 7082 gte_u|=gte_rt[i];
57871462 7083 // Accessed registers are needed
7084 u&=~(1LL<<rs1[i]);
7085 u&=~(1LL<<rs2[i]);
7086 uu&=~(1LL<<us1[i]);
7087 uu&=~(1LL<<us2[i]);
bedfea38 7088 gte_u&=~gte_rs[i];
eaa11918 7089 if(gte_rs[i]&&rt1[i]&&(unneeded_reg[i+1]&(1ll<<rt1[i])))
cbbd8dd7 7090 gte_u|=gte_rs[i]&gte_unneeded[i+1]; // MFC2/CFC2 to dead register, unneeded
57871462 7091 // Source-target dependencies
7092 uu&=~(tdep<<dep1[i]);
7093 uu&=~(tdep<<dep2[i]);
7094 // R0 is always unneeded
7095 u|=1;uu|=1;
7096 // Save it
7097 unneeded_reg[i]=u;
7098 unneeded_reg_upper[i]=uu;
bedfea38 7099 gte_unneeded[i]=gte_u;
57871462 7100 /*
7101 printf("ur (%d,%d) %x: ",istart,iend,start+i*4);
7102 printf("U:");
7103 int r;
7104 for(r=1;r<=CCREG;r++) {
7105 if((unneeded_reg[i]>>r)&1) {
7106 if(r==HIREG) printf(" HI");
7107 else if(r==LOREG) printf(" LO");
7108 else printf(" r%d",r);
7109 }
7110 }
7111 printf(" UU:");
7112 for(r=1;r<=CCREG;r++) {
7113 if(((unneeded_reg_upper[i]&~unneeded_reg[i])>>r)&1) {
7114 if(r==HIREG) printf(" HI");
7115 else if(r==LOREG) printf(" LO");
7116 else printf(" r%d",r);
7117 }
7118 }
7119 printf("\n");*/
7120 }
252c20fc 7121#ifdef FORCE32
7122 for (i=iend;i>=istart;i--)
7123 {
7124 unneeded_reg_upper[i]=branch_unneeded_reg_upper[i]=-1LL;
7125 }
7126#endif
57871462 7127}
7128
7129// Identify registers which are likely to contain 32-bit values
7130// This is used to predict whether any branches will jump to a
7131// location with 64-bit values in registers.
7132static void provisional_32bit()
7133{
7134 int i,j;
7135 uint64_t is32=1;
7136 uint64_t lastbranch=1;
7137
7138 for(i=0;i<slen;i++)
7139 {
7140 if(i>0) {
7141 if(itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP) {
7142 if(i>1) is32=lastbranch;
7143 else is32=1;
7144 }
7145 }
7146 if(i>1)
7147 {
7148 if(itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP) {
7149 if(likely[i-2]) {
7150 if(i>2) is32=lastbranch;
7151 else is32=1;
7152 }
7153 }
7154 if((opcode[i-2]&0x2f)==0x05) // BNE/BNEL
7155 {
7156 if(rs1[i-2]==0||rs2[i-2]==0)
7157 {
7158 if(rs1[i-2]) {
7159 is32|=1LL<<rs1[i-2];
7160 }
7161 if(rs2[i-2]) {
7162 is32|=1LL<<rs2[i-2];
7163 }
7164 }
7165 }
7166 }
7167 // If something jumps here with 64-bit values
7168 // then promote those registers to 64 bits
7169 if(bt[i])
7170 {
7171 uint64_t temp_is32=is32;
7172 for(j=i-1;j>=0;j--)
7173 {
7174 if(ba[j]==start+i*4)
7175 //temp_is32&=branch_regs[j].is32;
7176 temp_is32&=p32[j];
7177 }
7178 for(j=i;j<slen;j++)
7179 {
7180 if(ba[j]==start+i*4)
7181 temp_is32=1;
7182 }
7183 is32=temp_is32;
7184 }
7185 int type=itype[i];
7186 int op=opcode[i];
7187 int op2=opcode2[i];
7188 int rt=rt1[i];
7189 int s1=rs1[i];
7190 int s2=rs2[i];
7191 if(type==UJUMP||type==RJUMP||type==CJUMP||type==SJUMP||type==FJUMP) {
7192 // Branches don't write registers, consider the delay slot instead.
7193 type=itype[i+1];
7194 op=opcode[i+1];
7195 op2=opcode2[i+1];
7196 rt=rt1[i+1];
7197 s1=rs1[i+1];
7198 s2=rs2[i+1];
7199 lastbranch=is32;
7200 }
7201 switch(type) {
7202 case LOAD:
7203 if(opcode[i]==0x27||opcode[i]==0x37|| // LWU/LD
7204 opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR
7205 is32&=~(1LL<<rt);
7206 else
7207 is32|=1LL<<rt;
7208 break;
7209 case STORE:
7210 case STORELR:
7211 break;
7212 case LOADLR:
7213 if(op==0x1a||op==0x1b) is32&=~(1LL<<rt); // LDR/LDL
7214 if(op==0x22) is32|=1LL<<rt; // LWL
7215 break;
7216 case IMM16:
7217 if (op==0x08||op==0x09|| // ADDI/ADDIU
7218 op==0x0a||op==0x0b|| // SLTI/SLTIU
7219 op==0x0c|| // ANDI
7220 op==0x0f) // LUI
7221 {
7222 is32|=1LL<<rt;
7223 }
7224 if(op==0x18||op==0x19) { // DADDI/DADDIU
7225 is32&=~(1LL<<rt);
7226 //if(imm[i]==0)
7227 // is32|=((is32>>s1)&1LL)<<rt;
7228 }
7229 if(op==0x0d||op==0x0e) { // ORI/XORI
7230 uint64_t sr=((is32>>s1)&1LL);
7231 is32&=~(1LL<<rt);
7232 is32|=sr<<rt;
7233 }
7234 break;
7235 case UJUMP:
7236 break;
7237 case RJUMP:
7238 break;
7239 case CJUMP:
7240 break;
7241 case SJUMP:
7242 break;
7243 case FJUMP:
7244 break;
7245 case ALU:
7246 if(op2>=0x20&&op2<=0x23) { // ADD/ADDU/SUB/SUBU
7247 is32|=1LL<<rt;
7248 }
7249 if(op2==0x2a||op2==0x2b) { // SLT/SLTU
7250 is32|=1LL<<rt;
7251 }
7252 else if(op2>=0x24&&op2<=0x27) { // AND/OR/XOR/NOR
7253 uint64_t sr=((is32>>s1)&(is32>>s2)&1LL);
7254 is32&=~(1LL<<rt);
7255 is32|=sr<<rt;
7256 }
7257 else if(op2>=0x2c&&op2<=0x2d) { // DADD/DADDU
7258 if(s1==0&&s2==0) {
7259 is32|=1LL<<rt;
7260 }
7261 else if(s2==0) {
7262 uint64_t sr=((is32>>s1)&1LL);
7263 is32&=~(1LL<<rt);
7264 is32|=sr<<rt;
7265 }
7266 else if(s1==0) {
7267 uint64_t sr=((is32>>s2)&1LL);
7268 is32&=~(1LL<<rt);
7269 is32|=sr<<rt;
7270 }
7271 else {
7272 is32&=~(1LL<<rt);
7273 }
7274 }
7275 else if(op2>=0x2e&&op2<=0x2f) { // DSUB/DSUBU
7276 if(s1==0&&s2==0) {
7277 is32|=1LL<<rt;
7278 }
7279 else if(s2==0) {
7280 uint64_t sr=((is32>>s1)&1LL);
7281 is32&=~(1LL<<rt);
7282 is32|=sr<<rt;
7283 }
7284 else {
7285 is32&=~(1LL<<rt);
7286 }
7287 }
7288 break;
7289 case MULTDIV:
7290 if (op2>=0x1c&&op2<=0x1f) { // DMULT/DMULTU/DDIV/DDIVU
7291 is32&=~((1LL<<HIREG)|(1LL<<LOREG));
7292 }
7293 else {
7294 is32|=(1LL<<HIREG)|(1LL<<LOREG);
7295 }
7296 break;
7297 case MOV:
7298 {
7299 uint64_t sr=((is32>>s1)&1LL);
7300 is32&=~(1LL<<rt);
7301 is32|=sr<<rt;
7302 }
7303 break;
7304 case SHIFT:
7305 if(op2>=0x14&&op2<=0x17) is32&=~(1LL<<rt); // DSLLV/DSRLV/DSRAV
7306 else is32|=1LL<<rt; // SLLV/SRLV/SRAV
7307 break;
7308 case SHIFTIMM:
7309 is32|=1LL<<rt;
7310 // DSLL/DSRL/DSRA/DSLL32/DSRL32 but not DSRA32 have 64-bit result
7311 if(op2>=0x38&&op2<0x3f) is32&=~(1LL<<rt);
7312 break;
7313 case COP0:
7314 if(op2==0) is32|=1LL<<rt; // MFC0
7315 break;
7316 case COP1:
b9b61529 7317 case COP2:
57871462 7318 if(op2==0) is32|=1LL<<rt; // MFC1
7319 if(op2==1) is32&=~(1LL<<rt); // DMFC1
7320 if(op2==2) is32|=1LL<<rt; // CFC1
7321 break;
7322 case C1LS:
b9b61529 7323 case C2LS:
57871462 7324 break;
7325 case FLOAT:
7326 case FCONV:
7327 break;
7328 case FCOMP:
7329 break;
b9b61529 7330 case C2OP:
57871462 7331 case SYSCALL:
7139f3c8 7332 case HLECALL:
57871462 7333 break;
7334 default:
7335 break;
7336 }
7337 is32|=1;
7338 p32[i]=is32;
7339
7340 if(i>0)
7341 {
7342 if(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000)
7343 {
7344 if(rt1[i-1]==31) // JAL/JALR
7345 {
7346 // Subroutine call will return here, don't alloc any registers
7347 is32=1;
7348 }
7349 else if(i+1<slen)
7350 {
7351 // Internal branch will jump here, match registers to caller
7352 is32=0x3FFFFFFFFLL;
7353 }
7354 }
7355 }
7356 }
7357}
7358
7359// Identify registers which may be assumed to contain 32-bit values
7360// and where optimizations will rely on this.
7361// This is used to determine whether backward branches can safely
7362// jump to a location with 64-bit values in registers.
7363static void provisional_r32()
7364{
7365 u_int r32=0;
7366 int i;
7367
7368 for (i=slen-1;i>=0;i--)
7369 {
7370 int hr;
7371 if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
7372 {
7373 if(ba[i]<start || ba[i]>=(start+slen*4))
7374 {
7375 // Branch out of this block, don't need anything
7376 r32=0;
7377 }
7378 else
7379 {
7380 // Internal branch
7381 // Need whatever matches the target
7382 // (and doesn't get overwritten by the delay slot instruction)
7383 r32=0;
7384 int t=(ba[i]-start)>>2;
7385 if(ba[i]>start+i*4) {
7386 // Forward branch
7387 //if(!(requires_32bit[t]&~regs[i].was32))
7388 // r32|=requires_32bit[t]&(~(1LL<<rt1[i+1]))&(~(1LL<<rt2[i+1]));
7389 if(!(pr32[t]&~regs[i].was32))
7390 r32|=pr32[t]&(~(1LL<<rt1[i+1]))&(~(1LL<<rt2[i+1]));
7391 }else{
7392 // Backward branch
7393 if(!(regs[t].was32&~unneeded_reg_upper[t]&~regs[i].was32))
7394 r32|=regs[t].was32&~unneeded_reg_upper[t]&(~(1LL<<rt1[i+1]))&(~(1LL<<rt2[i+1]));
7395 }
7396 }
7397 // Conditional branch may need registers for following instructions
7398 if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&(source[i]>>16)!=0x1000)
7399 {
7400 if(i<slen-2) {
7401 //r32|=requires_32bit[i+2];
7402 r32|=pr32[i+2];
7403 r32&=regs[i].was32;
7404 // Mark this address as a branch target since it may be called
7405 // upon return from interrupt
7406 //bt[i+2]=1;
7407 }
7408 }
7409 // Merge in delay slot
7410 if(!likely[i]) {
7411 // These are overwritten unless the branch is "likely"
7412 // and the delay slot is nullified if not taken
7413 r32&=~(1LL<<rt1[i+1]);
7414 r32&=~(1LL<<rt2[i+1]);
7415 }
7416 // Assume these are needed (delay slot)
7417 if(us1[i+1]>0)
7418 {
7419 if((regs[i].was32>>us1[i+1])&1) r32|=1LL<<us1[i+1];
7420 }
7421 if(us2[i+1]>0)
7422 {
7423 if((regs[i].was32>>us2[i+1])&1) r32|=1LL<<us2[i+1];
7424 }
7425 if(dep1[i+1]&&!((unneeded_reg_upper[i]>>dep1[i+1])&1))
7426 {
7427 if((regs[i].was32>>dep1[i+1])&1) r32|=1LL<<dep1[i+1];
7428 }
7429 if(dep2[i+1]&&!((unneeded_reg_upper[i]>>dep2[i+1])&1))
7430 {
7431 if((regs[i].was32>>dep2[i+1])&1) r32|=1LL<<dep2[i+1];
7432 }
7433 }
1e973cb0 7434 else if(itype[i]==SYSCALL||itype[i]==HLECALL||itype[i]==INTCALL)
57871462 7435 {
7436 // SYSCALL instruction (software interrupt)
7437 r32=0;
7438 }
7439 else if(itype[i]==COP0 && (source[i]&0x3f)==0x18)
7440 {
7441 // ERET instruction (return from interrupt)
7442 r32=0;
7443 }
7444 // Check 32 bits
7445 r32&=~(1LL<<rt1[i]);
7446 r32&=~(1LL<<rt2[i]);
7447 if(us1[i]>0)
7448 {
7449 if((regs[i].was32>>us1[i])&1) r32|=1LL<<us1[i];
7450 }
7451 if(us2[i]>0)
7452 {
7453 if((regs[i].was32>>us2[i])&1) r32|=1LL<<us2[i];
7454 }
7455 if(dep1[i]&&!((unneeded_reg_upper[i]>>dep1[i])&1))
7456 {
7457 if((regs[i].was32>>dep1[i])&1) r32|=1LL<<dep1[i];
7458 }
7459 if(dep2[i]&&!((unneeded_reg_upper[i]>>dep2[i])&1))
7460 {
7461 if((regs[i].was32>>dep2[i])&1) r32|=1LL<<dep2[i];
7462 }
7463 //requires_32bit[i]=r32;
7464 pr32[i]=r32;
7465
7466 // Dirty registers which are 32-bit, require 32-bit input
7467 // as they will be written as 32-bit values
7468 for(hr=0;hr<HOST_REGS;hr++)
7469 {
7470 if(regs[i].regmap_entry[hr]>0&&regs[i].regmap_entry[hr]<64) {
7471 if((regs[i].was32>>regs[i].regmap_entry[hr])&(regs[i].wasdirty>>hr)&1) {
7472 if(!((unneeded_reg_upper[i]>>regs[i].regmap_entry[hr])&1))
7473 pr32[i]|=1LL<<regs[i].regmap_entry[hr];
7474 //requires_32bit[i]|=1LL<<regs[i].regmap_entry[hr];
7475 }
7476 }
7477 }
7478 }
7479}
7480
7481// Write back dirty registers as soon as we will no longer modify them,
7482// so that we don't end up with lots of writes at the branches.
7483void clean_registers(int istart,int iend,int wr)
7484{
7485 int i;
7486 int r;
7487 u_int will_dirty_i,will_dirty_next,temp_will_dirty;
7488 u_int wont_dirty_i,wont_dirty_next,temp_wont_dirty;
7489 if(iend==slen-1) {
7490 will_dirty_i=will_dirty_next=0;
7491 wont_dirty_i=wont_dirty_next=0;
7492 }else{
7493 will_dirty_i=will_dirty_next=will_dirty[iend+1];
7494 wont_dirty_i=wont_dirty_next=wont_dirty[iend+1];
7495 }
7496 for (i=iend;i>=istart;i--)
7497 {
7498 if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
7499 {
7500 if(ba[i]<start || ba[i]>=(start+slen*4))
7501 {
7502 // Branch out of this block, flush all regs
7503 if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000)
7504 {
7505 // Unconditional branch
7506 will_dirty_i=0;
7507 wont_dirty_i=0;
7508 // Merge in delay slot (will dirty)
7509 for(r=0;r<HOST_REGS;r++) {
7510 if(r!=EXCLUDE_REG) {
7511 if((branch_regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
7512 if((branch_regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
7513 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
7514 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
7515 if((branch_regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
7516 if(branch_regs[i].regmap[r]<=0) will_dirty_i&=~(1<<r);
7517 if(branch_regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
7518 if((regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
7519 if((regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
7520 if((regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
7521 if((regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
7522 if((regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
7523 if(regs[i].regmap[r]<=0) will_dirty_i&=~(1<<r);
7524 if(regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
7525 }
7526 }
7527 }
7528 else
7529 {
7530 // Conditional branch
7531 will_dirty_i=0;
7532 wont_dirty_i=wont_dirty_next;
7533 // Merge in delay slot (will dirty)
7534 for(r=0;r<HOST_REGS;r++) {
7535 if(r!=EXCLUDE_REG) {
7536 if(!likely[i]) {
7537 // Might not dirty if likely branch is not taken
7538 if((branch_regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
7539 if((branch_regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
7540 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
7541 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
7542 if((branch_regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
7543 if(branch_regs[i].regmap[r]==0) will_dirty_i&=~(1<<r);
7544 if(branch_regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
7545 //if((regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
7546 //if((regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
7547 if((regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
7548 if((regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
7549 if((regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
7550 if(regs[i].regmap[r]<=0) will_dirty_i&=~(1<<r);
7551 if(regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
7552 }
7553 }
7554 }
7555 }
7556 // Merge in delay slot (wont dirty)
7557 for(r=0;r<HOST_REGS;r++) {
7558 if(r!=EXCLUDE_REG) {
7559 if((regs[i].regmap[r]&63)==rt1[i]) wont_dirty_i|=1<<r;
7560 if((regs[i].regmap[r]&63)==rt2[i]) wont_dirty_i|=1<<r;
7561 if((regs[i].regmap[r]&63)==rt1[i+1]) wont_dirty_i|=1<<r;
7562 if((regs[i].regmap[r]&63)==rt2[i+1]) wont_dirty_i|=1<<r;
7563 if(regs[i].regmap[r]==CCREG) wont_dirty_i|=1<<r;
7564 if((branch_regs[i].regmap[r]&63)==rt1[i]) wont_dirty_i|=1<<r;
7565 if((branch_regs[i].regmap[r]&63)==rt2[i]) wont_dirty_i|=1<<r;
7566 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) wont_dirty_i|=1<<r;
7567 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) wont_dirty_i|=1<<r;
7568 if(branch_regs[i].regmap[r]==CCREG) wont_dirty_i|=1<<r;
7569 }
7570 }
7571 if(wr) {
7572 #ifndef DESTRUCTIVE_WRITEBACK
7573 branch_regs[i].dirty&=wont_dirty_i;
7574 #endif
7575 branch_regs[i].dirty|=will_dirty_i;
7576 }
7577 }
7578 else
7579 {
7580 // Internal branch
7581 if(ba[i]<=start+i*4) {
7582 // Backward branch
7583 if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000)
7584 {
7585 // Unconditional branch
7586 temp_will_dirty=0;
7587 temp_wont_dirty=0;
7588 // Merge in delay slot (will dirty)
7589 for(r=0;r<HOST_REGS;r++) {
7590 if(r!=EXCLUDE_REG) {
7591 if((branch_regs[i].regmap[r]&63)==rt1[i]) temp_will_dirty|=1<<r;
7592 if((branch_regs[i].regmap[r]&63)==rt2[i]) temp_will_dirty|=1<<r;
7593 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) temp_will_dirty|=1<<r;
7594 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) temp_will_dirty|=1<<r;
7595 if((branch_regs[i].regmap[r]&63)>33) temp_will_dirty&=~(1<<r);
7596 if(branch_regs[i].regmap[r]<=0) temp_will_dirty&=~(1<<r);
7597 if(branch_regs[i].regmap[r]==CCREG) temp_will_dirty|=1<<r;
7598 if((regs[i].regmap[r]&63)==rt1[i]) temp_will_dirty|=1<<r;
7599 if((regs[i].regmap[r]&63)==rt2[i]) temp_will_dirty|=1<<r;
7600 if((regs[i].regmap[r]&63)==rt1[i+1]) temp_will_dirty|=1<<r;
7601 if((regs[i].regmap[r]&63)==rt2[i+1]) temp_will_dirty|=1<<r;
7602 if((regs[i].regmap[r]&63)>33) temp_will_dirty&=~(1<<r);
7603 if(regs[i].regmap[r]<=0) temp_will_dirty&=~(1<<r);
7604 if(regs[i].regmap[r]==CCREG) temp_will_dirty|=1<<r;
7605 }
7606 }
7607 } else {
7608 // Conditional branch (not taken case)
7609 temp_will_dirty=will_dirty_next;
7610 temp_wont_dirty=wont_dirty_next;
7611 // Merge in delay slot (will dirty)
7612 for(r=0;r<HOST_REGS;r++) {
7613 if(r!=EXCLUDE_REG) {
7614 if(!likely[i]) {
7615 // Will not dirty if likely branch is not taken
7616 if((branch_regs[i].regmap[r]&63)==rt1[i]) temp_will_dirty|=1<<r;
7617 if((branch_regs[i].regmap[r]&63)==rt2[i]) temp_will_dirty|=1<<r;
7618 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) temp_will_dirty|=1<<r;
7619 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) temp_will_dirty|=1<<r;
7620 if((branch_regs[i].regmap[r]&63)>33) temp_will_dirty&=~(1<<r);
7621 if(branch_regs[i].regmap[r]==0) temp_will_dirty&=~(1<<r);
7622 if(branch_regs[i].regmap[r]==CCREG) temp_will_dirty|=1<<r;
7623 //if((regs[i].regmap[r]&63)==rt1[i]) temp_will_dirty|=1<<r;
7624 //if((regs[i].regmap[r]&63)==rt2[i]) temp_will_dirty|=1<<r;
7625 if((regs[i].regmap[r]&63)==rt1[i+1]) temp_will_dirty|=1<<r;
7626 if((regs[i].regmap[r]&63)==rt2[i+1]) temp_will_dirty|=1<<r;
7627 if((regs[i].regmap[r]&63)>33) temp_will_dirty&=~(1<<r);
7628 if(regs[i].regmap[r]<=0) temp_will_dirty&=~(1<<r);
7629 if(regs[i].regmap[r]==CCREG) temp_will_dirty|=1<<r;
7630 }
7631 }
7632 }
7633 }
7634 // Merge in delay slot (wont dirty)
7635 for(r=0;r<HOST_REGS;r++) {
7636 if(r!=EXCLUDE_REG) {
7637 if((regs[i].regmap[r]&63)==rt1[i]) temp_wont_dirty|=1<<r;
7638 if((regs[i].regmap[r]&63)==rt2[i]) temp_wont_dirty|=1<<r;
7639 if((regs[i].regmap[r]&63)==rt1[i+1]) temp_wont_dirty|=1<<r;
7640 if((regs[i].regmap[r]&63)==rt2[i+1]) temp_wont_dirty|=1<<r;
7641 if(regs[i].regmap[r]==CCREG) temp_wont_dirty|=1<<r;
7642 if((branch_regs[i].regmap[r]&63)==rt1[i]) temp_wont_dirty|=1<<r;
7643 if((branch_regs[i].regmap[r]&63)==rt2[i]) temp_wont_dirty|=1<<r;
7644 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) temp_wont_dirty|=1<<r;
7645 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) temp_wont_dirty|=1<<r;
7646 if(branch_regs[i].regmap[r]==CCREG) temp_wont_dirty|=1<<r;
7647 }
7648 }
7649 // Deal with changed mappings
7650 if(i<iend) {
7651 for(r=0;r<HOST_REGS;r++) {
7652 if(r!=EXCLUDE_REG) {
7653 if(regs[i].regmap[r]!=regmap_pre[i][r]) {
7654 temp_will_dirty&=~(1<<r);
7655 temp_wont_dirty&=~(1<<r);
7656 if((regmap_pre[i][r]&63)>0 && (regmap_pre[i][r]&63)<34) {
7657 temp_will_dirty|=((unneeded_reg[i]>>(regmap_pre[i][r]&63))&1)<<r;
7658 temp_wont_dirty|=((unneeded_reg[i]>>(regmap_pre[i][r]&63))&1)<<r;
7659 } else {
7660 temp_will_dirty|=1<<r;
7661 temp_wont_dirty|=1<<r;
7662 }
7663 }
7664 }
7665 }
7666 }
7667 if(wr) {
7668 will_dirty[i]=temp_will_dirty;
7669 wont_dirty[i]=temp_wont_dirty;
7670 clean_registers((ba[i]-start)>>2,i-1,0);
7671 }else{
7672 // Limit recursion. It can take an excessive amount
7673 // of time if there are a lot of nested loops.
7674 will_dirty[(ba[i]-start)>>2]=0;
7675 wont_dirty[(ba[i]-start)>>2]=-1;
7676 }
7677 }
7678 /*else*/ if(1)
7679 {
7680 if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000)
7681 {
7682 // Unconditional branch
7683 will_dirty_i=0;
7684 wont_dirty_i=0;
7685 //if(ba[i]>start+i*4) { // Disable recursion (for debugging)
7686 for(r=0;r<HOST_REGS;r++) {
7687 if(r!=EXCLUDE_REG) {
7688 if(branch_regs[i].regmap[r]==regs[(ba[i]-start)>>2].regmap_entry[r]) {
7689 will_dirty_i|=will_dirty[(ba[i]-start)>>2]&(1<<r);
7690 wont_dirty_i|=wont_dirty[(ba[i]-start)>>2]&(1<<r);
7691 }
e3234ecf 7692 if(branch_regs[i].regmap[r]>=0) {
7693 will_dirty_i|=((unneeded_reg[(ba[i]-start)>>2]>>(branch_regs[i].regmap[r]&63))&1)<<r;
7694 wont_dirty_i|=((unneeded_reg[(ba[i]-start)>>2]>>(branch_regs[i].regmap[r]&63))&1)<<r;
7695 }
57871462 7696 }
7697 }
7698 //}
7699 // Merge in delay slot
7700 for(r=0;r<HOST_REGS;r++) {
7701 if(r!=EXCLUDE_REG) {
7702 if((branch_regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
7703 if((branch_regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
7704 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
7705 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
7706 if((branch_regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
7707 if(branch_regs[i].regmap[r]<=0) will_dirty_i&=~(1<<r);
7708 if(branch_regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
7709 if((regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
7710 if((regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
7711 if((regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
7712 if((regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
7713 if((regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
7714 if(regs[i].regmap[r]<=0) will_dirty_i&=~(1<<r);
7715 if(regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
7716 }
7717 }
7718 } else {
7719 // Conditional branch
7720 will_dirty_i=will_dirty_next;
7721 wont_dirty_i=wont_dirty_next;
7722 //if(ba[i]>start+i*4) { // Disable recursion (for debugging)
7723 for(r=0;r<HOST_REGS;r++) {
7724 if(r!=EXCLUDE_REG) {
e3234ecf 7725 signed char target_reg=branch_regs[i].regmap[r];
7726 if(target_reg==regs[(ba[i]-start)>>2].regmap_entry[r]) {
57871462 7727 will_dirty_i&=will_dirty[(ba[i]-start)>>2]&(1<<r);
7728 wont_dirty_i|=wont_dirty[(ba[i]-start)>>2]&(1<<r);
7729 }
e3234ecf 7730 else if(target_reg>=0) {
7731 will_dirty_i&=((unneeded_reg[(ba[i]-start)>>2]>>(target_reg&63))&1)<<r;
7732 wont_dirty_i|=((unneeded_reg[(ba[i]-start)>>2]>>(target_reg&63))&1)<<r;
57871462 7733 }
7734 // Treat delay slot as part of branch too
7735 /*if(regs[i+1].regmap[r]==regs[(ba[i]-start)>>2].regmap_entry[r]) {
7736 will_dirty[i+1]&=will_dirty[(ba[i]-start)>>2]&(1<<r);
7737 wont_dirty[i+1]|=wont_dirty[(ba[i]-start)>>2]&(1<<r);
7738 }
7739 else
7740 {
7741 will_dirty[i+1]&=~(1<<r);
7742 }*/
7743 }
7744 }
7745 //}
7746 // Merge in delay slot
7747 for(r=0;r<HOST_REGS;r++) {
7748 if(r!=EXCLUDE_REG) {
7749 if(!likely[i]) {
7750 // Might not dirty if likely branch is not taken
7751 if((branch_regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
7752 if((branch_regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
7753 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
7754 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
7755 if((branch_regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
7756 if(branch_regs[i].regmap[r]<=0) will_dirty_i&=~(1<<r);
7757 if(branch_regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
7758 //if((regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
7759 //if((regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
7760 if((regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
7761 if((regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
7762 if((regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
7763 if(regs[i].regmap[r]<=0) will_dirty_i&=~(1<<r);
7764 if(regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
7765 }
7766 }
7767 }
7768 }
e3234ecf 7769 // Merge in delay slot (won't dirty)
57871462 7770 for(r=0;r<HOST_REGS;r++) {
7771 if(r!=EXCLUDE_REG) {
7772 if((regs[i].regmap[r]&63)==rt1[i]) wont_dirty_i|=1<<r;
7773 if((regs[i].regmap[r]&63)==rt2[i]) wont_dirty_i|=1<<r;
7774 if((regs[i].regmap[r]&63)==rt1[i+1]) wont_dirty_i|=1<<r;
7775 if((regs[i].regmap[r]&63)==rt2[i+1]) wont_dirty_i|=1<<r;
7776 if(regs[i].regmap[r]==CCREG) wont_dirty_i|=1<<r;
7777 if((branch_regs[i].regmap[r]&63)==rt1[i]) wont_dirty_i|=1<<r;
7778 if((branch_regs[i].regmap[r]&63)==rt2[i]) wont_dirty_i|=1<<r;
7779 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) wont_dirty_i|=1<<r;
7780 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) wont_dirty_i|=1<<r;
7781 if(branch_regs[i].regmap[r]==CCREG) wont_dirty_i|=1<<r;
7782 }
7783 }
7784 if(wr) {
7785 #ifndef DESTRUCTIVE_WRITEBACK
7786 branch_regs[i].dirty&=wont_dirty_i;
7787 #endif
7788 branch_regs[i].dirty|=will_dirty_i;
7789 }
7790 }
7791 }
7792 }
1e973cb0 7793 else if(itype[i]==SYSCALL||itype[i]==HLECALL||itype[i]==INTCALL)
57871462 7794 {
7795 // SYSCALL instruction (software interrupt)
7796 will_dirty_i=0;
7797 wont_dirty_i=0;
7798 }
7799 else if(itype[i]==COP0 && (source[i]&0x3f)==0x18)
7800 {
7801 // ERET instruction (return from interrupt)
7802 will_dirty_i=0;
7803 wont_dirty_i=0;
7804 }
7805 will_dirty_next=will_dirty_i;
7806 wont_dirty_next=wont_dirty_i;
7807 for(r=0;r<HOST_REGS;r++) {
7808 if(r!=EXCLUDE_REG) {
7809 if((regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
7810 if((regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
7811 if((regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
7812 if(regs[i].regmap[r]<=0) will_dirty_i&=~(1<<r);
7813 if(regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
7814 if((regs[i].regmap[r]&63)==rt1[i]) wont_dirty_i|=1<<r;
7815 if((regs[i].regmap[r]&63)==rt2[i]) wont_dirty_i|=1<<r;
7816 if(regs[i].regmap[r]==CCREG) wont_dirty_i|=1<<r;
7817 if(i>istart) {
7818 if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=FJUMP)
7819 {
7820 // Don't store a register immediately after writing it,
7821 // may prevent dual-issue.
7822 if((regs[i].regmap[r]&63)==rt1[i-1]) wont_dirty_i|=1<<r;
7823 if((regs[i].regmap[r]&63)==rt2[i-1]) wont_dirty_i|=1<<r;
7824 }
7825 }
7826 }
7827 }
7828 // Save it
7829 will_dirty[i]=will_dirty_i;
7830 wont_dirty[i]=wont_dirty_i;
7831 // Mark registers that won't be dirtied as not dirty
7832 if(wr) {
7833 /*printf("wr (%d,%d) %x will:",istart,iend,start+i*4);
7834 for(r=0;r<HOST_REGS;r++) {
7835 if((will_dirty_i>>r)&1) {
7836 printf(" r%d",r);
7837 }
7838 }
7839 printf("\n");*/
7840
7841 //if(i==istart||(itype[i-1]!=RJUMP&&itype[i-1]!=UJUMP&&itype[i-1]!=CJUMP&&itype[i-1]!=SJUMP&&itype[i-1]!=FJUMP)) {
7842 regs[i].dirty|=will_dirty_i;
7843 #ifndef DESTRUCTIVE_WRITEBACK
7844 regs[i].dirty&=wont_dirty_i;
7845 if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
7846 {
7847 if(i<iend-1&&itype[i]!=RJUMP&&itype[i]!=UJUMP&&(source[i]>>16)!=0x1000) {
7848 for(r=0;r<HOST_REGS;r++) {
7849 if(r!=EXCLUDE_REG) {
7850 if(regs[i].regmap[r]==regmap_pre[i+2][r]) {
7851 regs[i+2].wasdirty&=wont_dirty_i|~(1<<r);
7852 }else {/*printf("i: %x (%d) mismatch(+2): %d\n",start+i*4,i,r);/*assert(!((wont_dirty_i>>r)&1));*/}
7853 }
7854 }
7855 }
7856 }
7857 else
7858 {
7859 if(i<iend) {
7860 for(r=0;r<HOST_REGS;r++) {
7861 if(r!=EXCLUDE_REG) {
7862 if(regs[i].regmap[r]==regmap_pre[i+1][r]) {
7863 regs[i+1].wasdirty&=wont_dirty_i|~(1<<r);
7864 }else {/*printf("i: %x (%d) mismatch(+1): %d\n",start+i*4,i,r);/*assert(!((wont_dirty_i>>r)&1));*/}
7865 }
7866 }
7867 }
7868 }
7869 #endif
7870 //}
7871 }
7872 // Deal with changed mappings
7873 temp_will_dirty=will_dirty_i;
7874 temp_wont_dirty=wont_dirty_i;
7875 for(r=0;r<HOST_REGS;r++) {
7876 if(r!=EXCLUDE_REG) {
7877 int nr;
7878 if(regs[i].regmap[r]==regmap_pre[i][r]) {
7879 if(wr) {
7880 #ifndef DESTRUCTIVE_WRITEBACK
7881 regs[i].wasdirty&=wont_dirty_i|~(1<<r);
7882 #endif
7883 regs[i].wasdirty|=will_dirty_i&(1<<r);
7884 }
7885 }
f776eb14 7886 else if(regmap_pre[i][r]>=0&&(nr=get_reg(regs[i].regmap,regmap_pre[i][r]))>=0) {
57871462 7887 // Register moved to a different register
7888 will_dirty_i&=~(1<<r);
7889 wont_dirty_i&=~(1<<r);
7890 will_dirty_i|=((temp_will_dirty>>nr)&1)<<r;
7891 wont_dirty_i|=((temp_wont_dirty>>nr)&1)<<r;
7892 if(wr) {
7893 #ifndef DESTRUCTIVE_WRITEBACK
7894 regs[i].wasdirty&=wont_dirty_i|~(1<<r);
7895 #endif
7896 regs[i].wasdirty|=will_dirty_i&(1<<r);
7897 }
7898 }
7899 else {
7900 will_dirty_i&=~(1<<r);
7901 wont_dirty_i&=~(1<<r);
7902 if((regmap_pre[i][r]&63)>0 && (regmap_pre[i][r]&63)<34) {
7903 will_dirty_i|=((unneeded_reg[i]>>(regmap_pre[i][r]&63))&1)<<r;
7904 wont_dirty_i|=((unneeded_reg[i]>>(regmap_pre[i][r]&63))&1)<<r;
7905 } else {
7906 wont_dirty_i|=1<<r;
7907 /*printf("i: %x (%d) mismatch: %d\n",start+i*4,i,r);/*assert(!((will_dirty>>r)&1));*/
7908 }
7909 }
7910 }
7911 }
7912 }
7913}
7914
4600ba03 7915#ifdef DISASM
57871462 7916 /* disassembly */
7917void disassemble_inst(int i)
7918{
7919 if (bt[i]) printf("*"); else printf(" ");
7920 switch(itype[i]) {
7921 case UJUMP:
7922 printf (" %x: %s %8x\n",start+i*4,insn[i],ba[i]);break;
7923 case CJUMP:
7924 printf (" %x: %s r%d,r%d,%8x\n",start+i*4,insn[i],rs1[i],rs2[i],i?start+i*4+4+((signed int)((unsigned int)source[i]<<16)>>14):*ba);break;
7925 case SJUMP:
7926 printf (" %x: %s r%d,%8x\n",start+i*4,insn[i],rs1[i],start+i*4+4+((signed int)((unsigned int)source[i]<<16)>>14));break;
7927 case FJUMP:
7928 printf (" %x: %s %8x\n",start+i*4,insn[i],ba[i]);break;
7929 case RJUMP:
74426039 7930 if (opcode[i]==0x9&&rt1[i]!=31)
5067f341 7931 printf (" %x: %s r%d,r%d\n",start+i*4,insn[i],rt1[i],rs1[i]);
7932 else
7933 printf (" %x: %s r%d\n",start+i*4,insn[i],rs1[i]);
7934 break;
57871462 7935 case SPAN:
7936 printf (" %x: %s (pagespan) r%d,r%d,%8x\n",start+i*4,insn[i],rs1[i],rs2[i],ba[i]);break;
7937 case IMM16:
7938 if(opcode[i]==0xf) //LUI
7939 printf (" %x: %s r%d,%4x0000\n",start+i*4,insn[i],rt1[i],imm[i]&0xffff);
7940 else
7941 printf (" %x: %s r%d,r%d,%d\n",start+i*4,insn[i],rt1[i],rs1[i],imm[i]);
7942 break;
7943 case LOAD:
7944 case LOADLR:
7945 printf (" %x: %s r%d,r%d+%x\n",start+i*4,insn[i],rt1[i],rs1[i],imm[i]);
7946 break;
7947 case STORE:
7948 case STORELR:
7949 printf (" %x: %s r%d,r%d+%x\n",start+i*4,insn[i],rs2[i],rs1[i],imm[i]);
7950 break;
7951 case ALU:
7952 case SHIFT:
7953 printf (" %x: %s r%d,r%d,r%d\n",start+i*4,insn[i],rt1[i],rs1[i],rs2[i]);
7954 break;
7955 case MULTDIV:
7956 printf (" %x: %s r%d,r%d\n",start+i*4,insn[i],rs1[i],rs2[i]);
7957 break;
7958 case SHIFTIMM:
7959 printf (" %x: %s r%d,r%d,%d\n",start+i*4,insn[i],rt1[i],rs1[i],imm[i]);
7960 break;
7961 case MOV:
7962 if((opcode2[i]&0x1d)==0x10)
7963 printf (" %x: %s r%d\n",start+i*4,insn[i],rt1[i]);
7964 else if((opcode2[i]&0x1d)==0x11)
7965 printf (" %x: %s r%d\n",start+i*4,insn[i],rs1[i]);
7966 else
7967 printf (" %x: %s\n",start+i*4,insn[i]);
7968 break;
7969 case COP0:
7970 if(opcode2[i]==0)
7971 printf (" %x: %s r%d,cpr0[%d]\n",start+i*4,insn[i],rt1[i],(source[i]>>11)&0x1f); // MFC0
7972 else if(opcode2[i]==4)
7973 printf (" %x: %s r%d,cpr0[%d]\n",start+i*4,insn[i],rs1[i],(source[i]>>11)&0x1f); // MTC0
7974 else printf (" %x: %s\n",start+i*4,insn[i]);
7975 break;
7976 case COP1:
7977 if(opcode2[i]<3)
7978 printf (" %x: %s r%d,cpr1[%d]\n",start+i*4,insn[i],rt1[i],(source[i]>>11)&0x1f); // MFC1
7979 else if(opcode2[i]>3)
7980 printf (" %x: %s r%d,cpr1[%d]\n",start+i*4,insn[i],rs1[i],(source[i]>>11)&0x1f); // MTC1
7981 else printf (" %x: %s\n",start+i*4,insn[i]);
7982 break;
b9b61529 7983 case COP2:
7984 if(opcode2[i]<3)
7985 printf (" %x: %s r%d,cpr2[%d]\n",start+i*4,insn[i],rt1[i],(source[i]>>11)&0x1f); // MFC2
7986 else if(opcode2[i]>3)
7987 printf (" %x: %s r%d,cpr2[%d]\n",start+i*4,insn[i],rs1[i],(source[i]>>11)&0x1f); // MTC2
7988 else printf (" %x: %s\n",start+i*4,insn[i]);
7989 break;
57871462 7990 case C1LS:
7991 printf (" %x: %s cpr1[%d],r%d+%x\n",start+i*4,insn[i],(source[i]>>16)&0x1f,rs1[i],imm[i]);
7992 break;
b9b61529 7993 case C2LS:
7994 printf (" %x: %s cpr2[%d],r%d+%x\n",start+i*4,insn[i],(source[i]>>16)&0x1f,rs1[i],imm[i]);
7995 break;
1e973cb0 7996 case INTCALL:
7997 printf (" %x: %s (INTCALL)\n",start+i*4,insn[i]);
7998 break;
57871462 7999 default:
8000 //printf (" %s %8x\n",insn[i],source[i]);
8001 printf (" %x: %s\n",start+i*4,insn[i]);
8002 }
8003}
4600ba03 8004#else
8005static void disassemble_inst(int i) {}
8006#endif // DISASM
57871462 8007
dc990066 8008// clear the state completely, instead of just marking
8009// things invalid like invalidate_all_pages() does
8010void new_dynarec_clear_full()
57871462 8011{
57871462 8012 int n;
35775df7 8013 out=(u_char *)BASE_ADDR;
8014 memset(invalid_code,1,sizeof(invalid_code));
8015 memset(hash_table,0xff,sizeof(hash_table));
57871462 8016 memset(mini_ht,-1,sizeof(mini_ht));
8017 memset(restore_candidate,0,sizeof(restore_candidate));
dc990066 8018 memset(shadow,0,sizeof(shadow));
57871462 8019 copy=shadow;
8020 expirep=16384; // Expiry pointer, +2 blocks
8021 pending_exception=0;
8022 literalcount=0;
57871462 8023 stop_after_jal=0;
9be4ba64 8024 inv_code_start=inv_code_end=~0;
57871462 8025 // TLB
af4ee1fe 8026#ifndef DISABLE_TLB
57871462 8027 using_tlb=0;
8028 for(n=0;n<524288;n++) // 0 .. 0x7FFFFFFF
8029 memory_map[n]=-1;
8030 for(n=524288;n<526336;n++) // 0x80000000 .. 0x807FFFFF
8031 memory_map[n]=((u_int)rdram-0x80000000)>>2;
8032 for(n=526336;n<1048576;n++) // 0x80800000 .. 0xFFFFFFFF
8033 memory_map[n]=-1;
63cb0298 8034#endif
dc990066 8035 for(n=0;n<4096;n++) ll_clear(jump_in+n);
8036 for(n=0;n<4096;n++) ll_clear(jump_out+n);
8037 for(n=0;n<4096;n++) ll_clear(jump_dirty+n);
8038}
8039
8040void new_dynarec_init()
8041{
8042 printf("Init new dynarec\n");
8043 out=(u_char *)BASE_ADDR;
a327ad27 8044#if BASE_ADDR_FIXED
dc990066 8045 if (mmap (out, 1<<TARGET_SIZE_2,
8046 PROT_READ | PROT_WRITE | PROT_EXEC,
8047 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
c43b5311 8048 -1, 0) <= 0) {SysPrintf("mmap() failed\n");}
bdeade46 8049#else
8050 // not all systems allow execute in data segment by default
8051 if (mprotect(out, 1<<TARGET_SIZE_2, PROT_READ | PROT_WRITE | PROT_EXEC) != 0)
c43b5311 8052 SysPrintf("mprotect() failed\n");
bdeade46 8053#endif
dc990066 8054#ifdef MUPEN64
8055 rdword=&readmem_dword;
8056 fake_pc.f.r.rs=&readmem_dword;
8057 fake_pc.f.r.rt=&readmem_dword;
8058 fake_pc.f.r.rd=&readmem_dword;
8059#endif
8060 int n;
2573466a 8061 cycle_multiplier=200;
dc990066 8062 new_dynarec_clear_full();
8063#ifdef HOST_IMM8
8064 // Copy this into local area so we don't have to put it in every literal pool
8065 invc_ptr=invalid_code;
8066#endif
24385cae 8067#ifdef MUPEN64
57871462 8068 for(n=0;n<0x8000;n++) { // 0 .. 0x7FFFFFFF
8069 writemem[n] = write_nomem_new;
8070 writememb[n] = write_nomemb_new;
8071 writememh[n] = write_nomemh_new;
24385cae 8072#ifndef FORCE32
57871462 8073 writememd[n] = write_nomemd_new;
24385cae 8074#endif
57871462 8075 readmem[n] = read_nomem_new;
8076 readmemb[n] = read_nomemb_new;
8077 readmemh[n] = read_nomemh_new;
24385cae 8078#ifndef FORCE32
57871462 8079 readmemd[n] = read_nomemd_new;
24385cae 8080#endif
57871462 8081 }
8082 for(n=0x8000;n<0x8080;n++) { // 0x80000000 .. 0x807FFFFF
8083 writemem[n] = write_rdram_new;
8084 writememb[n] = write_rdramb_new;
8085 writememh[n] = write_rdramh_new;
24385cae 8086#ifndef FORCE32
57871462 8087 writememd[n] = write_rdramd_new;
24385cae 8088#endif
57871462 8089 }
8090 for(n=0xC000;n<0x10000;n++) { // 0xC0000000 .. 0xFFFFFFFF
8091 writemem[n] = write_nomem_new;
8092 writememb[n] = write_nomemb_new;
8093 writememh[n] = write_nomemh_new;
24385cae 8094#ifndef FORCE32
57871462 8095 writememd[n] = write_nomemd_new;
24385cae 8096#endif
57871462 8097 readmem[n] = read_nomem_new;
8098 readmemb[n] = read_nomemb_new;
8099 readmemh[n] = read_nomemh_new;
24385cae 8100#ifndef FORCE32
57871462 8101 readmemd[n] = read_nomemd_new;
24385cae 8102#endif
57871462 8103 }
24385cae 8104#endif
57871462 8105 tlb_hacks();
8106 arch_init();
a327ad27 8107#ifndef RAM_FIXED
8108 ram_offset=(u_int)rdram-0x80000000;
8109#endif
b105cf4f 8110 if (ram_offset!=0)
c43b5311 8111 SysPrintf("warning: RAM is not directly mapped, performance will suffer\n");
57871462 8112}
8113
8114void new_dynarec_cleanup()
8115{
8116 int n;
a327ad27 8117 #if BASE_ADDR_FIXED
c43b5311 8118 if (munmap ((void *)BASE_ADDR, 1<<TARGET_SIZE_2) < 0) {SysPrintf("munmap() failed\n");}
bdeade46 8119 #endif
57871462 8120 for(n=0;n<4096;n++) ll_clear(jump_in+n);
8121 for(n=0;n<4096;n++) ll_clear(jump_out+n);
8122 for(n=0;n<4096;n++) ll_clear(jump_dirty+n);
8123 #ifdef ROM_COPY
c43b5311 8124 if (munmap (ROM_COPY, 67108864) < 0) {SysPrintf("munmap() failed\n");}
57871462 8125 #endif
8126}
8127
8128int new_recompile_block(int addr)
8129{
8130/*
8131 if(addr==0x800cd050) {
8132 int block;
8133 for(block=0x80000;block<0x80800;block++) invalidate_block(block);
8134 int n;
8135 for(n=0;n<=2048;n++) ll_clear(jump_dirty+n);
8136 }
8137*/
8138 //if(Count==365117028) tracedebug=1;
8139 assem_debug("NOTCOMPILED: addr = %x -> %x\n", (int)addr, (int)out);
8140 //printf("NOTCOMPILED: addr = %x -> %x\n", (int)addr, (int)out);
8141 //printf("TRACE: count=%d next=%d (compile %x)\n",Count,next_interupt,addr);
8142 //if(debug)
8143 //printf("TRACE: count=%d next=%d (checksum %x)\n",Count,next_interupt,mchecksum());
8144 //printf("fpu mapping=%x enabled=%x\n",(Status & 0x04000000)>>26,(Status & 0x20000000)>>29);
8145 /*if(Count>=312978186) {
8146 rlist();
8147 }*/
8148 //rlist();
8149 start = (u_int)addr&~3;
8150 //assert(((u_int)addr&1)==0);
2f546f9a 8151 new_dynarec_did_compile=1;
7139f3c8 8152#ifdef PCSX
9ad4d757 8153 if (Config.HLE && start == 0x80001000) // hlecall
560e4a12 8154 {
7139f3c8 8155 // XXX: is this enough? Maybe check hleSoftCall?
bb5285ef 8156 u_int beginning=(u_int)out;
7139f3c8 8157 u_int page=get_page(start);
7139f3c8 8158 invalid_code[start>>12]=0;
8159 emit_movimm(start,0);
8160 emit_writeword(0,(int)&pcaddr);
bb5285ef 8161 emit_jmp((int)new_dyna_leave);
15776b68 8162 literal_pool(0);
bb5285ef 8163#ifdef __arm__
8164 __clear_cache((void *)beginning,out);
8165#endif
9ad4d757 8166 ll_add(jump_in+page,start,(void *)beginning);
7139f3c8 8167 return 0;
8168 }
560e4a12 8169 else if ((u_int)addr < 0x00200000 ||
8170 (0xa0000000 <= addr && addr < 0xa0200000)) {
7139f3c8 8171 // used for BIOS calls mostly?
560e4a12 8172 source = (u_int *)((u_int)rdram+(start&0x1fffff));
8173 pagelimit = (addr&0xa0000000)|0x00200000;
8174 }
8175 else if (!Config.HLE && (
8176/* (0x9fc00000 <= addr && addr < 0x9fc80000) ||*/
8177 (0xbfc00000 <= addr && addr < 0xbfc80000))) {
8178 // BIOS
8179 source = (u_int *)((u_int)psxR+(start&0x7ffff));
8180 pagelimit = (addr&0xfff00000)|0x80000;
7139f3c8 8181 }
8182 else
8183#endif
3d624f89 8184#ifdef MUPEN64
57871462 8185 if ((int)addr >= 0xa4000000 && (int)addr < 0xa4001000) {
8186 source = (u_int *)((u_int)SP_DMEM+start-0xa4000000);
8187 pagelimit = 0xa4001000;
8188 }
3d624f89 8189 else
8190#endif
4cb76aa4 8191 if ((int)addr >= 0x80000000 && (int)addr < 0x80000000+RAM_SIZE) {
57871462 8192 source = (u_int *)((u_int)rdram+start-0x80000000);
4cb76aa4 8193 pagelimit = 0x80000000+RAM_SIZE;
57871462 8194 }
90ae6d4e 8195#ifndef DISABLE_TLB
57871462 8196 else if ((signed int)addr >= (signed int)0xC0000000) {
8197 //printf("addr=%x mm=%x\n",(u_int)addr,(memory_map[start>>12]<<2));
8198 //if(tlb_LUT_r[start>>12])
8199 //source = (u_int *)(((int)rdram)+(tlb_LUT_r[start>>12]&0xFFFFF000)+(((int)addr)&0xFFF)-0x80000000);
8200 if((signed int)memory_map[start>>12]>=0) {
8201 source = (u_int *)((u_int)(start+(memory_map[start>>12]<<2)));
8202 pagelimit=(start+4096)&0xFFFFF000;
8203 int map=memory_map[start>>12];
8204 int i;
8205 for(i=0;i<5;i++) {
8206 //printf("start: %x next: %x\n",map,memory_map[pagelimit>>12]);
8207 if((map&0xBFFFFFFF)==(memory_map[pagelimit>>12]&0xBFFFFFFF)) pagelimit+=4096;
8208 }
8209 assem_debug("pagelimit=%x\n",pagelimit);
8210 assem_debug("mapping=%x (%x)\n",memory_map[start>>12],(memory_map[start>>12]<<2)+start);
8211 }
8212 else {
8213 assem_debug("Compile at unmapped memory address: %x \n", (int)addr);
8214 //assem_debug("start: %x next: %x\n",memory_map[start>>12],memory_map[(start+4096)>>12]);
560e4a12 8215 return -1; // Caller will invoke exception handler
57871462 8216 }
8217 //printf("source= %x\n",(int)source);
8218 }
90ae6d4e 8219#endif
57871462 8220 else {
c43b5311 8221 SysPrintf("Compile at bogus memory address: %x \n", (int)addr);
57871462 8222 exit(1);
8223 }
8224
8225 /* Pass 1: disassemble */
8226 /* Pass 2: register dependencies, branch targets */
8227 /* Pass 3: register allocation */
8228 /* Pass 4: branch dependencies */
8229 /* Pass 5: pre-alloc */
8230 /* Pass 6: optimize clean/dirty state */
8231 /* Pass 7: flag 32-bit registers */
8232 /* Pass 8: assembly */
8233 /* Pass 9: linker */
8234 /* Pass 10: garbage collection / free memory */
8235
8236 int i,j;
8237 int done=0;
8238 unsigned int type,op,op2;
8239
8240 //printf("addr = %x source = %x %x\n", addr,source,source[0]);
8241
8242 /* Pass 1 disassembly */
8243
8244 for(i=0;!done;i++) {
e1190b87 8245 bt[i]=0;likely[i]=0;ooo[i]=0;op2=0;
8246 minimum_free_regs[i]=0;
57871462 8247 opcode[i]=op=source[i]>>26;
8248 switch(op)
8249 {
8250 case 0x00: strcpy(insn[i],"special"); type=NI;
8251 op2=source[i]&0x3f;
8252 switch(op2)
8253 {
8254 case 0x00: strcpy(insn[i],"SLL"); type=SHIFTIMM; break;
8255 case 0x02: strcpy(insn[i],"SRL"); type=SHIFTIMM; break;
8256 case 0x03: strcpy(insn[i],"SRA"); type=SHIFTIMM; break;
8257 case 0x04: strcpy(insn[i],"SLLV"); type=SHIFT; break;
8258 case 0x06: strcpy(insn[i],"SRLV"); type=SHIFT; break;
8259 case 0x07: strcpy(insn[i],"SRAV"); type=SHIFT; break;
8260 case 0x08: strcpy(insn[i],"JR"); type=RJUMP; break;
8261 case 0x09: strcpy(insn[i],"JALR"); type=RJUMP; break;
8262 case 0x0C: strcpy(insn[i],"SYSCALL"); type=SYSCALL; break;
8263 case 0x0D: strcpy(insn[i],"BREAK"); type=OTHER; break;
8264 case 0x0F: strcpy(insn[i],"SYNC"); type=OTHER; break;
8265 case 0x10: strcpy(insn[i],"MFHI"); type=MOV; break;
8266 case 0x11: strcpy(insn[i],"MTHI"); type=MOV; break;
8267 case 0x12: strcpy(insn[i],"MFLO"); type=MOV; break;
8268 case 0x13: strcpy(insn[i],"MTLO"); type=MOV; break;
57871462 8269 case 0x18: strcpy(insn[i],"MULT"); type=MULTDIV; break;
8270 case 0x19: strcpy(insn[i],"MULTU"); type=MULTDIV; break;
8271 case 0x1A: strcpy(insn[i],"DIV"); type=MULTDIV; break;
8272 case 0x1B: strcpy(insn[i],"DIVU"); type=MULTDIV; break;
57871462 8273 case 0x20: strcpy(insn[i],"ADD"); type=ALU; break;
8274 case 0x21: strcpy(insn[i],"ADDU"); type=ALU; break;
8275 case 0x22: strcpy(insn[i],"SUB"); type=ALU; break;
8276 case 0x23: strcpy(insn[i],"SUBU"); type=ALU; break;
8277 case 0x24: strcpy(insn[i],"AND"); type=ALU; break;
8278 case 0x25: strcpy(insn[i],"OR"); type=ALU; break;
8279 case 0x26: strcpy(insn[i],"XOR"); type=ALU; break;
8280 case 0x27: strcpy(insn[i],"NOR"); type=ALU; break;
8281 case 0x2A: strcpy(insn[i],"SLT"); type=ALU; break;
8282 case 0x2B: strcpy(insn[i],"SLTU"); type=ALU; break;
57871462 8283 case 0x30: strcpy(insn[i],"TGE"); type=NI; break;
8284 case 0x31: strcpy(insn[i],"TGEU"); type=NI; break;
8285 case 0x32: strcpy(insn[i],"TLT"); type=NI; break;
8286 case 0x33: strcpy(insn[i],"TLTU"); type=NI; break;
8287 case 0x34: strcpy(insn[i],"TEQ"); type=NI; break;
8288 case 0x36: strcpy(insn[i],"TNE"); type=NI; break;
7f2607ea 8289#ifndef FORCE32
8290 case 0x14: strcpy(insn[i],"DSLLV"); type=SHIFT; break;
8291 case 0x16: strcpy(insn[i],"DSRLV"); type=SHIFT; break;
8292 case 0x17: strcpy(insn[i],"DSRAV"); type=SHIFT; break;
8293 case 0x1C: strcpy(insn[i],"DMULT"); type=MULTDIV; break;
8294 case 0x1D: strcpy(insn[i],"DMULTU"); type=MULTDIV; break;
8295 case 0x1E: strcpy(insn[i],"DDIV"); type=MULTDIV; break;
8296 case 0x1F: strcpy(insn[i],"DDIVU"); type=MULTDIV; break;
8297 case 0x2C: strcpy(insn[i],"DADD"); type=ALU; break;
8298 case 0x2D: strcpy(insn[i],"DADDU"); type=ALU; break;
8299 case 0x2E: strcpy(insn[i],"DSUB"); type=ALU; break;
8300 case 0x2F: strcpy(insn[i],"DSUBU"); type=ALU; break;
57871462 8301 case 0x38: strcpy(insn[i],"DSLL"); type=SHIFTIMM; break;
8302 case 0x3A: strcpy(insn[i],"DSRL"); type=SHIFTIMM; break;
8303 case 0x3B: strcpy(insn[i],"DSRA"); type=SHIFTIMM; break;
8304 case 0x3C: strcpy(insn[i],"DSLL32"); type=SHIFTIMM; break;
8305 case 0x3E: strcpy(insn[i],"DSRL32"); type=SHIFTIMM; break;
8306 case 0x3F: strcpy(insn[i],"DSRA32"); type=SHIFTIMM; break;
7f2607ea 8307#endif
57871462 8308 }
8309 break;
8310 case 0x01: strcpy(insn[i],"regimm"); type=NI;
8311 op2=(source[i]>>16)&0x1f;
8312 switch(op2)
8313 {
8314 case 0x00: strcpy(insn[i],"BLTZ"); type=SJUMP; break;
8315 case 0x01: strcpy(insn[i],"BGEZ"); type=SJUMP; break;
8316 case 0x02: strcpy(insn[i],"BLTZL"); type=SJUMP; break;
8317 case 0x03: strcpy(insn[i],"BGEZL"); type=SJUMP; break;
8318 case 0x08: strcpy(insn[i],"TGEI"); type=NI; break;
8319 case 0x09: strcpy(insn[i],"TGEIU"); type=NI; break;
8320 case 0x0A: strcpy(insn[i],"TLTI"); type=NI; break;
8321 case 0x0B: strcpy(insn[i],"TLTIU"); type=NI; break;
8322 case 0x0C: strcpy(insn[i],"TEQI"); type=NI; break;
8323 case 0x0E: strcpy(insn[i],"TNEI"); type=NI; break;
8324 case 0x10: strcpy(insn[i],"BLTZAL"); type=SJUMP; break;
8325 case 0x11: strcpy(insn[i],"BGEZAL"); type=SJUMP; break;
8326 case 0x12: strcpy(insn[i],"BLTZALL"); type=SJUMP; break;
8327 case 0x13: strcpy(insn[i],"BGEZALL"); type=SJUMP; break;
8328 }
8329 break;
8330 case 0x02: strcpy(insn[i],"J"); type=UJUMP; break;
8331 case 0x03: strcpy(insn[i],"JAL"); type=UJUMP; break;
8332 case 0x04: strcpy(insn[i],"BEQ"); type=CJUMP; break;
8333 case 0x05: strcpy(insn[i],"BNE"); type=CJUMP; break;
8334 case 0x06: strcpy(insn[i],"BLEZ"); type=CJUMP; break;
8335 case 0x07: strcpy(insn[i],"BGTZ"); type=CJUMP; break;
8336 case 0x08: strcpy(insn[i],"ADDI"); type=IMM16; break;
8337 case 0x09: strcpy(insn[i],"ADDIU"); type=IMM16; break;
8338 case 0x0A: strcpy(insn[i],"SLTI"); type=IMM16; break;
8339 case 0x0B: strcpy(insn[i],"SLTIU"); type=IMM16; break;
8340 case 0x0C: strcpy(insn[i],"ANDI"); type=IMM16; break;
8341 case 0x0D: strcpy(insn[i],"ORI"); type=IMM16; break;
8342 case 0x0E: strcpy(insn[i],"XORI"); type=IMM16; break;
8343 case 0x0F: strcpy(insn[i],"LUI"); type=IMM16; break;
8344 case 0x10: strcpy(insn[i],"cop0"); type=NI;
8345 op2=(source[i]>>21)&0x1f;
8346 switch(op2)
8347 {
8348 case 0x00: strcpy(insn[i],"MFC0"); type=COP0; break;
8349 case 0x04: strcpy(insn[i],"MTC0"); type=COP0; break;
8350 case 0x10: strcpy(insn[i],"tlb"); type=NI;
8351 switch(source[i]&0x3f)
8352 {
8353 case 0x01: strcpy(insn[i],"TLBR"); type=COP0; break;
8354 case 0x02: strcpy(insn[i],"TLBWI"); type=COP0; break;
8355 case 0x06: strcpy(insn[i],"TLBWR"); type=COP0; break;
8356 case 0x08: strcpy(insn[i],"TLBP"); type=COP0; break;
576bbd8f 8357#ifdef PCSX
8358 case 0x10: strcpy(insn[i],"RFE"); type=COP0; break;
8359#else
57871462 8360 case 0x18: strcpy(insn[i],"ERET"); type=COP0; break;
576bbd8f 8361#endif
57871462 8362 }
8363 }
8364 break;
8365 case 0x11: strcpy(insn[i],"cop1"); type=NI;
8366 op2=(source[i]>>21)&0x1f;
8367 switch(op2)
8368 {
8369 case 0x00: strcpy(insn[i],"MFC1"); type=COP1; break;
8370 case 0x01: strcpy(insn[i],"DMFC1"); type=COP1; break;
8371 case 0x02: strcpy(insn[i],"CFC1"); type=COP1; break;
8372 case 0x04: strcpy(insn[i],"MTC1"); type=COP1; break;
8373 case 0x05: strcpy(insn[i],"DMTC1"); type=COP1; break;
8374 case 0x06: strcpy(insn[i],"CTC1"); type=COP1; break;
8375 case 0x08: strcpy(insn[i],"BC1"); type=FJUMP;
8376 switch((source[i]>>16)&0x3)
8377 {
8378 case 0x00: strcpy(insn[i],"BC1F"); break;
8379 case 0x01: strcpy(insn[i],"BC1T"); break;
8380 case 0x02: strcpy(insn[i],"BC1FL"); break;
8381 case 0x03: strcpy(insn[i],"BC1TL"); break;
8382 }
8383 break;
8384 case 0x10: strcpy(insn[i],"C1.S"); type=NI;
8385 switch(source[i]&0x3f)
8386 {
8387 case 0x00: strcpy(insn[i],"ADD.S"); type=FLOAT; break;
8388 case 0x01: strcpy(insn[i],"SUB.S"); type=FLOAT; break;
8389 case 0x02: strcpy(insn[i],"MUL.S"); type=FLOAT; break;
8390 case 0x03: strcpy(insn[i],"DIV.S"); type=FLOAT; break;
8391 case 0x04: strcpy(insn[i],"SQRT.S"); type=FLOAT; break;
8392 case 0x05: strcpy(insn[i],"ABS.S"); type=FLOAT; break;
8393 case 0x06: strcpy(insn[i],"MOV.S"); type=FLOAT; break;
8394 case 0x07: strcpy(insn[i],"NEG.S"); type=FLOAT; break;
8395 case 0x08: strcpy(insn[i],"ROUND.L.S"); type=FCONV; break;
8396 case 0x09: strcpy(insn[i],"TRUNC.L.S"); type=FCONV; break;
8397 case 0x0A: strcpy(insn[i],"CEIL.L.S"); type=FCONV; break;
8398 case 0x0B: strcpy(insn[i],"FLOOR.L.S"); type=FCONV; break;
8399 case 0x0C: strcpy(insn[i],"ROUND.W.S"); type=FCONV; break;
8400 case 0x0D: strcpy(insn[i],"TRUNC.W.S"); type=FCONV; break;
8401 case 0x0E: strcpy(insn[i],"CEIL.W.S"); type=FCONV; break;
8402 case 0x0F: strcpy(insn[i],"FLOOR.W.S"); type=FCONV; break;
8403 case 0x21: strcpy(insn[i],"CVT.D.S"); type=FCONV; break;
8404 case 0x24: strcpy(insn[i],"CVT.W.S"); type=FCONV; break;
8405 case 0x25: strcpy(insn[i],"CVT.L.S"); type=FCONV; break;
8406 case 0x30: strcpy(insn[i],"C.F.S"); type=FCOMP; break;
8407 case 0x31: strcpy(insn[i],"C.UN.S"); type=FCOMP; break;
8408 case 0x32: strcpy(insn[i],"C.EQ.S"); type=FCOMP; break;
8409 case 0x33: strcpy(insn[i],"C.UEQ.S"); type=FCOMP; break;
8410 case 0x34: strcpy(insn[i],"C.OLT.S"); type=FCOMP; break;
8411 case 0x35: strcpy(insn[i],"C.ULT.S"); type=FCOMP; break;
8412 case 0x36: strcpy(insn[i],"C.OLE.S"); type=FCOMP; break;
8413 case 0x37: strcpy(insn[i],"C.ULE.S"); type=FCOMP; break;
8414 case 0x38: strcpy(insn[i],"C.SF.S"); type=FCOMP; break;
8415 case 0x39: strcpy(insn[i],"C.NGLE.S"); type=FCOMP; break;
8416 case 0x3A: strcpy(insn[i],"C.SEQ.S"); type=FCOMP; break;
8417 case 0x3B: strcpy(insn[i],"C.NGL.S"); type=FCOMP; break;
8418 case 0x3C: strcpy(insn[i],"C.LT.S"); type=FCOMP; break;
8419 case 0x3D: strcpy(insn[i],"C.NGE.S"); type=FCOMP; break;
8420 case 0x3E: strcpy(insn[i],"C.LE.S"); type=FCOMP; break;
8421 case 0x3F: strcpy(insn[i],"C.NGT.S"); type=FCOMP; break;
8422 }
8423 break;
8424 case 0x11: strcpy(insn[i],"C1.D"); type=NI;
8425 switch(source[i]&0x3f)
8426 {
8427 case 0x00: strcpy(insn[i],"ADD.D"); type=FLOAT; break;
8428 case 0x01: strcpy(insn[i],"SUB.D"); type=FLOAT; break;
8429 case 0x02: strcpy(insn[i],"MUL.D"); type=FLOAT; break;
8430 case 0x03: strcpy(insn[i],"DIV.D"); type=FLOAT; break;
8431 case 0x04: strcpy(insn[i],"SQRT.D"); type=FLOAT; break;
8432 case 0x05: strcpy(insn[i],"ABS.D"); type=FLOAT; break;
8433 case 0x06: strcpy(insn[i],"MOV.D"); type=FLOAT; break;
8434 case 0x07: strcpy(insn[i],"NEG.D"); type=FLOAT; break;
8435 case 0x08: strcpy(insn[i],"ROUND.L.D"); type=FCONV; break;
8436 case 0x09: strcpy(insn[i],"TRUNC.L.D"); type=FCONV; break;
8437 case 0x0A: strcpy(insn[i],"CEIL.L.D"); type=FCONV; break;
8438 case 0x0B: strcpy(insn[i],"FLOOR.L.D"); type=FCONV; break;
8439 case 0x0C: strcpy(insn[i],"ROUND.W.D"); type=FCONV; break;
8440 case 0x0D: strcpy(insn[i],"TRUNC.W.D"); type=FCONV; break;
8441 case 0x0E: strcpy(insn[i],"CEIL.W.D"); type=FCONV; break;
8442 case 0x0F: strcpy(insn[i],"FLOOR.W.D"); type=FCONV; break;
8443 case 0x20: strcpy(insn[i],"CVT.S.D"); type=FCONV; break;
8444 case 0x24: strcpy(insn[i],"CVT.W.D"); type=FCONV; break;
8445 case 0x25: strcpy(insn[i],"CVT.L.D"); type=FCONV; break;
8446 case 0x30: strcpy(insn[i],"C.F.D"); type=FCOMP; break;
8447 case 0x31: strcpy(insn[i],"C.UN.D"); type=FCOMP; break;
8448 case 0x32: strcpy(insn[i],"C.EQ.D"); type=FCOMP; break;
8449 case 0x33: strcpy(insn[i],"C.UEQ.D"); type=FCOMP; break;
8450 case 0x34: strcpy(insn[i],"C.OLT.D"); type=FCOMP; break;
8451 case 0x35: strcpy(insn[i],"C.ULT.D"); type=FCOMP; break;
8452 case 0x36: strcpy(insn[i],"C.OLE.D"); type=FCOMP; break;
8453 case 0x37: strcpy(insn[i],"C.ULE.D"); type=FCOMP; break;
8454 case 0x38: strcpy(insn[i],"C.SF.D"); type=FCOMP; break;
8455 case 0x39: strcpy(insn[i],"C.NGLE.D"); type=FCOMP; break;
8456 case 0x3A: strcpy(insn[i],"C.SEQ.D"); type=FCOMP; break;
8457 case 0x3B: strcpy(insn[i],"C.NGL.D"); type=FCOMP; break;
8458 case 0x3C: strcpy(insn[i],"C.LT.D"); type=FCOMP; break;
8459 case 0x3D: strcpy(insn[i],"C.NGE.D"); type=FCOMP; break;
8460 case 0x3E: strcpy(insn[i],"C.LE.D"); type=FCOMP; break;
8461 case 0x3F: strcpy(insn[i],"C.NGT.D"); type=FCOMP; break;
8462 }
8463 break;
8464 case 0x14: strcpy(insn[i],"C1.W"); type=NI;
8465 switch(source[i]&0x3f)
8466 {
8467 case 0x20: strcpy(insn[i],"CVT.S.W"); type=FCONV; break;
8468 case 0x21: strcpy(insn[i],"CVT.D.W"); type=FCONV; break;
8469 }
8470 break;
8471 case 0x15: strcpy(insn[i],"C1.L"); type=NI;
8472 switch(source[i]&0x3f)
8473 {
8474 case 0x20: strcpy(insn[i],"CVT.S.L"); type=FCONV; break;
8475 case 0x21: strcpy(insn[i],"CVT.D.L"); type=FCONV; break;
8476 }
8477 break;
8478 }
8479 break;
909168d6 8480#ifndef FORCE32
57871462 8481 case 0x14: strcpy(insn[i],"BEQL"); type=CJUMP; break;
8482 case 0x15: strcpy(insn[i],"BNEL"); type=CJUMP; break;
8483 case 0x16: strcpy(insn[i],"BLEZL"); type=CJUMP; break;
8484 case 0x17: strcpy(insn[i],"BGTZL"); type=CJUMP; break;
8485 case 0x18: strcpy(insn[i],"DADDI"); type=IMM16; break;
8486 case 0x19: strcpy(insn[i],"DADDIU"); type=IMM16; break;
8487 case 0x1A: strcpy(insn[i],"LDL"); type=LOADLR; break;
8488 case 0x1B: strcpy(insn[i],"LDR"); type=LOADLR; break;
996cc15d 8489#endif
57871462 8490 case 0x20: strcpy(insn[i],"LB"); type=LOAD; break;
8491 case 0x21: strcpy(insn[i],"LH"); type=LOAD; break;
8492 case 0x22: strcpy(insn[i],"LWL"); type=LOADLR; break;
8493 case 0x23: strcpy(insn[i],"LW"); type=LOAD; break;
8494 case 0x24: strcpy(insn[i],"LBU"); type=LOAD; break;
8495 case 0x25: strcpy(insn[i],"LHU"); type=LOAD; break;
8496 case 0x26: strcpy(insn[i],"LWR"); type=LOADLR; break;
64bd6f82 8497#ifndef FORCE32
57871462 8498 case 0x27: strcpy(insn[i],"LWU"); type=LOAD; break;
64bd6f82 8499#endif
57871462 8500 case 0x28: strcpy(insn[i],"SB"); type=STORE; break;
8501 case 0x29: strcpy(insn[i],"SH"); type=STORE; break;
8502 case 0x2A: strcpy(insn[i],"SWL"); type=STORELR; break;
8503 case 0x2B: strcpy(insn[i],"SW"); type=STORE; break;
996cc15d 8504#ifndef FORCE32
57871462 8505 case 0x2C: strcpy(insn[i],"SDL"); type=STORELR; break;
8506 case 0x2D: strcpy(insn[i],"SDR"); type=STORELR; break;
996cc15d 8507#endif
57871462 8508 case 0x2E: strcpy(insn[i],"SWR"); type=STORELR; break;
8509 case 0x2F: strcpy(insn[i],"CACHE"); type=NOP; break;
8510 case 0x30: strcpy(insn[i],"LL"); type=NI; break;
8511 case 0x31: strcpy(insn[i],"LWC1"); type=C1LS; break;
996cc15d 8512#ifndef FORCE32
57871462 8513 case 0x34: strcpy(insn[i],"LLD"); type=NI; break;
8514 case 0x35: strcpy(insn[i],"LDC1"); type=C1LS; break;
8515 case 0x37: strcpy(insn[i],"LD"); type=LOAD; break;
996cc15d 8516#endif
57871462 8517 case 0x38: strcpy(insn[i],"SC"); type=NI; break;
8518 case 0x39: strcpy(insn[i],"SWC1"); type=C1LS; break;
996cc15d 8519#ifndef FORCE32
57871462 8520 case 0x3C: strcpy(insn[i],"SCD"); type=NI; break;
8521 case 0x3D: strcpy(insn[i],"SDC1"); type=C1LS; break;
8522 case 0x3F: strcpy(insn[i],"SD"); type=STORE; break;
996cc15d 8523#endif
b9b61529 8524#ifdef PCSX
8525 case 0x12: strcpy(insn[i],"COP2"); type=NI;
8526 op2=(source[i]>>21)&0x1f;
bedfea38 8527 //if (op2 & 0x10) {
8528 if (source[i]&0x3f) { // use this hack to support old savestates with patched gte insns
c7abc864 8529 if (gte_handlers[source[i]&0x3f]!=NULL) {
bedfea38 8530 if (gte_regnames[source[i]&0x3f]!=NULL)
8531 strcpy(insn[i],gte_regnames[source[i]&0x3f]);
8532 else
8533 snprintf(insn[i], sizeof(insn[i]), "COP2 %x", source[i]&0x3f);
c7abc864 8534 type=C2OP;
8535 }
8536 }
8537 else switch(op2)
b9b61529 8538 {
8539 case 0x00: strcpy(insn[i],"MFC2"); type=COP2; break;
8540 case 0x02: strcpy(insn[i],"CFC2"); type=COP2; break;
8541 case 0x04: strcpy(insn[i],"MTC2"); type=COP2; break;
8542 case 0x06: strcpy(insn[i],"CTC2"); type=COP2; break;
b9b61529 8543 }
8544 break;
8545 case 0x32: strcpy(insn[i],"LWC2"); type=C2LS; break;
8546 case 0x3A: strcpy(insn[i],"SWC2"); type=C2LS; break;
8547 case 0x3B: strcpy(insn[i],"HLECALL"); type=HLECALL; break;
8548#endif
90ae6d4e 8549 default: strcpy(insn[i],"???"); type=NI;
c43b5311 8550 SysPrintf("NI %08x @%08x (%08x)\n", source[i], addr + i*4, addr);
90ae6d4e 8551 break;
57871462 8552 }
8553 itype[i]=type;
8554 opcode2[i]=op2;
8555 /* Get registers/immediates */
8556 lt1[i]=0;
8557 us1[i]=0;
8558 us2[i]=0;
8559 dep1[i]=0;
8560 dep2[i]=0;
bedfea38 8561 gte_rs[i]=gte_rt[i]=0;
57871462 8562 switch(type) {
8563 case LOAD:
8564 rs1[i]=(source[i]>>21)&0x1f;
8565 rs2[i]=0;
8566 rt1[i]=(source[i]>>16)&0x1f;
8567 rt2[i]=0;
8568 imm[i]=(short)source[i];
8569 break;
8570 case STORE:
8571 case STORELR:
8572 rs1[i]=(source[i]>>21)&0x1f;
8573 rs2[i]=(source[i]>>16)&0x1f;
8574 rt1[i]=0;
8575 rt2[i]=0;
8576 imm[i]=(short)source[i];
8577 if(op==0x2c||op==0x2d||op==0x3f) us1[i]=rs2[i]; // 64-bit SDL/SDR/SD
8578 break;
8579 case LOADLR:
8580 // LWL/LWR only load part of the register,
8581 // therefore the target register must be treated as a source too
8582 rs1[i]=(source[i]>>21)&0x1f;
8583 rs2[i]=(source[i]>>16)&0x1f;
8584 rt1[i]=(source[i]>>16)&0x1f;
8585 rt2[i]=0;
8586 imm[i]=(short)source[i];
8587 if(op==0x1a||op==0x1b) us1[i]=rs2[i]; // LDR/LDL
8588 if(op==0x26) dep1[i]=rt1[i]; // LWR
8589 break;
8590 case IMM16:
8591 if (op==0x0f) rs1[i]=0; // LUI instruction has no source register
8592 else rs1[i]=(source[i]>>21)&0x1f;
8593 rs2[i]=0;
8594 rt1[i]=(source[i]>>16)&0x1f;
8595 rt2[i]=0;
8596 if(op>=0x0c&&op<=0x0e) { // ANDI/ORI/XORI
8597 imm[i]=(unsigned short)source[i];
8598 }else{
8599 imm[i]=(short)source[i];
8600 }
8601 if(op==0x18||op==0x19) us1[i]=rs1[i]; // DADDI/DADDIU
8602 if(op==0x0a||op==0x0b) us1[i]=rs1[i]; // SLTI/SLTIU
8603 if(op==0x0d||op==0x0e) dep1[i]=rs1[i]; // ORI/XORI
8604 break;
8605 case UJUMP:
8606 rs1[i]=0;
8607 rs2[i]=0;
8608 rt1[i]=0;
8609 rt2[i]=0;
8610 // The JAL instruction writes to r31.
8611 if (op&1) {
8612 rt1[i]=31;
8613 }
8614 rs2[i]=CCREG;
8615 break;
8616 case RJUMP:
8617 rs1[i]=(source[i]>>21)&0x1f;
8618 rs2[i]=0;
8619 rt1[i]=0;
8620 rt2[i]=0;
5067f341 8621 // The JALR instruction writes to rd.
57871462 8622 if (op2&1) {
5067f341 8623 rt1[i]=(source[i]>>11)&0x1f;
57871462 8624 }
8625 rs2[i]=CCREG;
8626 break;
8627 case CJUMP:
8628 rs1[i]=(source[i]>>21)&0x1f;
8629 rs2[i]=(source[i]>>16)&0x1f;
8630 rt1[i]=0;
8631 rt2[i]=0;
8632 if(op&2) { // BGTZ/BLEZ
8633 rs2[i]=0;
8634 }
8635 us1[i]=rs1[i];
8636 us2[i]=rs2[i];
8637 likely[i]=op>>4;
8638 break;
8639 case SJUMP:
8640 rs1[i]=(source[i]>>21)&0x1f;
8641 rs2[i]=CCREG;
8642 rt1[i]=0;
8643 rt2[i]=0;
8644 us1[i]=rs1[i];
8645 if(op2&0x10) { // BxxAL
8646 rt1[i]=31;
8647 // NOTE: If the branch is not taken, r31 is still overwritten
8648 }
8649 likely[i]=(op2&2)>>1;
8650 break;
8651 case FJUMP:
8652 rs1[i]=FSREG;
8653 rs2[i]=CSREG;
8654 rt1[i]=0;
8655 rt2[i]=0;
8656 likely[i]=((source[i])>>17)&1;
8657 break;
8658 case ALU:
8659 rs1[i]=(source[i]>>21)&0x1f; // source
8660 rs2[i]=(source[i]>>16)&0x1f; // subtract amount
8661 rt1[i]=(source[i]>>11)&0x1f; // destination
8662 rt2[i]=0;
8663 if(op2==0x2a||op2==0x2b) { // SLT/SLTU
8664 us1[i]=rs1[i];us2[i]=rs2[i];
8665 }
8666 else if(op2>=0x24&&op2<=0x27) { // AND/OR/XOR/NOR
8667 dep1[i]=rs1[i];dep2[i]=rs2[i];
8668 }
8669 else if(op2>=0x2c&&op2<=0x2f) { // DADD/DSUB
8670 dep1[i]=rs1[i];dep2[i]=rs2[i];
8671 }
8672 break;
8673 case MULTDIV:
8674 rs1[i]=(source[i]>>21)&0x1f; // source
8675 rs2[i]=(source[i]>>16)&0x1f; // divisor
8676 rt1[i]=HIREG;
8677 rt2[i]=LOREG;
8678 if (op2>=0x1c&&op2<=0x1f) { // DMULT/DMULTU/DDIV/DDIVU
8679 us1[i]=rs1[i];us2[i]=rs2[i];
8680 }
8681 break;
8682 case MOV:
8683 rs1[i]=0;
8684 rs2[i]=0;
8685 rt1[i]=0;
8686 rt2[i]=0;
8687 if(op2==0x10) rs1[i]=HIREG; // MFHI
8688 if(op2==0x11) rt1[i]=HIREG; // MTHI
8689 if(op2==0x12) rs1[i]=LOREG; // MFLO
8690 if(op2==0x13) rt1[i]=LOREG; // MTLO
8691 if((op2&0x1d)==0x10) rt1[i]=(source[i]>>11)&0x1f; // MFxx
8692 if((op2&0x1d)==0x11) rs1[i]=(source[i]>>21)&0x1f; // MTxx
8693 dep1[i]=rs1[i];
8694 break;
8695 case SHIFT:
8696 rs1[i]=(source[i]>>16)&0x1f; // target of shift
8697 rs2[i]=(source[i]>>21)&0x1f; // shift amount
8698 rt1[i]=(source[i]>>11)&0x1f; // destination
8699 rt2[i]=0;
8700 // DSLLV/DSRLV/DSRAV are 64-bit
8701 if(op2>=0x14&&op2<=0x17) us1[i]=rs1[i];
8702 break;
8703 case SHIFTIMM:
8704 rs1[i]=(source[i]>>16)&0x1f;
8705 rs2[i]=0;
8706 rt1[i]=(source[i]>>11)&0x1f;
8707 rt2[i]=0;
8708 imm[i]=(source[i]>>6)&0x1f;
8709 // DSxx32 instructions
8710 if(op2>=0x3c) imm[i]|=0x20;
8711 // DSLL/DSRL/DSRA/DSRA32/DSRL32 but not DSLL32 require 64-bit source
8712 if(op2>=0x38&&op2!=0x3c) us1[i]=rs1[i];
8713 break;
8714 case COP0:
8715 rs1[i]=0;
8716 rs2[i]=0;
8717 rt1[i]=0;
8718 rt2[i]=0;
8719 if(op2==0) rt1[i]=(source[i]>>16)&0x1F; // MFC0
8720 if(op2==4) rs1[i]=(source[i]>>16)&0x1F; // MTC0
8721 if(op2==4&&((source[i]>>11)&0x1f)==12) rt2[i]=CSREG; // Status
8722 if(op2==16) if((source[i]&0x3f)==0x18) rs2[i]=CCREG; // ERET
8723 break;
8724 case COP1:
8725 rs1[i]=0;
8726 rs2[i]=0;
8727 rt1[i]=0;
8728 rt2[i]=0;
8729 if(op2<3) rt1[i]=(source[i]>>16)&0x1F; // MFC1/DMFC1/CFC1
8730 if(op2>3) rs1[i]=(source[i]>>16)&0x1F; // MTC1/DMTC1/CTC1
8731 if(op2==5) us1[i]=rs1[i]; // DMTC1
8732 rs2[i]=CSREG;
8733 break;
bedfea38 8734 case COP2:
8735 rs1[i]=0;
8736 rs2[i]=0;
8737 rt1[i]=0;
8738 rt2[i]=0;
8739 if(op2<3) rt1[i]=(source[i]>>16)&0x1F; // MFC2/CFC2
8740 if(op2>3) rs1[i]=(source[i]>>16)&0x1F; // MTC2/CTC2
8741 rs2[i]=CSREG;
8742 int gr=(source[i]>>11)&0x1F;
8743 switch(op2)
8744 {
8745 case 0x00: gte_rs[i]=1ll<<gr; break; // MFC2
8746 case 0x04: gte_rt[i]=1ll<<gr; break; // MTC2
0ff8c62c 8747 case 0x02: gte_rs[i]=1ll<<(gr+32); break; // CFC2
bedfea38 8748 case 0x06: gte_rt[i]=1ll<<(gr+32); break; // CTC2
8749 }
8750 break;
57871462 8751 case C1LS:
8752 rs1[i]=(source[i]>>21)&0x1F;
8753 rs2[i]=CSREG;
8754 rt1[i]=0;
8755 rt2[i]=0;
8756 imm[i]=(short)source[i];
8757 break;
b9b61529 8758 case C2LS:
8759 rs1[i]=(source[i]>>21)&0x1F;
8760 rs2[i]=0;
8761 rt1[i]=0;
8762 rt2[i]=0;
8763 imm[i]=(short)source[i];
bedfea38 8764 if(op==0x32) gte_rt[i]=1ll<<((source[i]>>16)&0x1F); // LWC2
8765 else gte_rs[i]=1ll<<((source[i]>>16)&0x1F); // SWC2
8766 break;
8767 case C2OP:
8768 rs1[i]=0;
8769 rs2[i]=0;
8770 rt1[i]=0;
8771 rt2[i]=0;
2167bef6 8772 gte_rs[i]=gte_reg_reads[source[i]&0x3f];
8773 gte_rt[i]=gte_reg_writes[source[i]&0x3f];
8774 gte_rt[i]|=1ll<<63; // every op changes flags
587a5b1c 8775 if((source[i]&0x3f)==GTE_MVMVA) {
8776 int v = (source[i] >> 15) & 3;
8777 gte_rs[i]&=~0xe3fll;
8778 if(v==3) gte_rs[i]|=0xe00ll;
8779 else gte_rs[i]|=3ll<<(v*2);
8780 }
b9b61529 8781 break;
57871462 8782 case FLOAT:
8783 case FCONV:
8784 rs1[i]=0;
8785 rs2[i]=CSREG;
8786 rt1[i]=0;
8787 rt2[i]=0;
8788 break;
8789 case FCOMP:
8790 rs1[i]=FSREG;
8791 rs2[i]=CSREG;
8792 rt1[i]=FSREG;
8793 rt2[i]=0;
8794 break;
8795 case SYSCALL:
7139f3c8 8796 case HLECALL:
1e973cb0 8797 case INTCALL:
57871462 8798 rs1[i]=CCREG;
8799 rs2[i]=0;
8800 rt1[i]=0;
8801 rt2[i]=0;
8802 break;
8803 default:
8804 rs1[i]=0;
8805 rs2[i]=0;
8806 rt1[i]=0;
8807 rt2[i]=0;
8808 }
8809 /* Calculate branch target addresses */
8810 if(type==UJUMP)
8811 ba[i]=((start+i*4+4)&0xF0000000)|(((unsigned int)source[i]<<6)>>4);
8812 else if(type==CJUMP&&rs1[i]==rs2[i]&&(op&1))
8813 ba[i]=start+i*4+8; // Ignore never taken branch
8814 else if(type==SJUMP&&rs1[i]==0&&!(op2&1))
8815 ba[i]=start+i*4+8; // Ignore never taken branch
8816 else if(type==CJUMP||type==SJUMP||type==FJUMP)
8817 ba[i]=start+i*4+4+((signed int)((unsigned int)source[i]<<16)>>14);
8818 else ba[i]=-1;
26869094 8819#ifdef PCSX
3e535354 8820 if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP)) {
8821 int do_in_intrp=0;
8822 // branch in delay slot?
8823 if(type==RJUMP||type==UJUMP||type==CJUMP||type==SJUMP||type==FJUMP) {
8824 // don't handle first branch and call interpreter if it's hit
c43b5311 8825 SysPrintf("branch in delay slot @%08x (%08x)\n", addr + i*4, addr);
3e535354 8826 do_in_intrp=1;
8827 }
8828 // basic load delay detection
8829 else if((type==LOAD||type==LOADLR||type==COP0||type==COP2||type==C2LS)&&rt1[i]!=0) {
8830 int t=(ba[i-1]-start)/4;
8831 if(0 <= t && t < i &&(rt1[i]==rs1[t]||rt1[i]==rs2[t])&&itype[t]!=CJUMP&&itype[t]!=SJUMP) {
8832 // jump target wants DS result - potential load delay effect
c43b5311 8833 SysPrintf("load delay @%08x (%08x)\n", addr + i*4, addr);
3e535354 8834 do_in_intrp=1;
8835 bt[t+1]=1; // expected return from interpreter
8836 }
8837 else if(i>=2&&rt1[i-2]==2&&rt1[i]==2&&rs1[i]!=2&&rs2[i]!=2&&rs1[i-1]!=2&&rs2[i-1]!=2&&
8838 !(i>=3&&(itype[i-3]==RJUMP||itype[i-3]==UJUMP||itype[i-3]==CJUMP||itype[i-3]==SJUMP))) {
8839 // v0 overwrite like this is a sign of trouble, bail out
c43b5311 8840 SysPrintf("v0 overwrite @%08x (%08x)\n", addr + i*4, addr);
3e535354 8841 do_in_intrp=1;
8842 }
8843 }
3e535354 8844 if(do_in_intrp) {
8845 rs1[i-1]=CCREG;
8846 rs2[i-1]=rt1[i-1]=rt2[i-1]=0;
26869094 8847 ba[i-1]=-1;
8848 itype[i-1]=INTCALL;
8849 done=2;
3e535354 8850 i--; // don't compile the DS
26869094 8851 }
3e535354 8852 }
26869094 8853#endif
3e535354 8854 /* Is this the end of the block? */
8855 if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000)) {
5067f341 8856 if(rt1[i-1]==0) { // Continue past subroutine call (JAL)
1e973cb0 8857 done=2;
57871462 8858 }
8859 else {
8860 if(stop_after_jal) done=1;
8861 // Stop on BREAK
8862 if((source[i+1]&0xfc00003f)==0x0d) done=1;
8863 }
8864 // Don't recompile stuff that's already compiled
8865 if(check_addr(start+i*4+4)) done=1;
8866 // Don't get too close to the limit
8867 if(i>MAXBLOCK/2) done=1;
8868 }
75dec299 8869 if(itype[i]==SYSCALL&&stop_after_jal) done=1;
1e973cb0 8870 if(itype[i]==HLECALL||itype[i]==INTCALL) done=2;
8871 if(done==2) {
8872 // Does the block continue due to a branch?
8873 for(j=i-1;j>=0;j--)
8874 {
2a706964 8875 if(ba[j]==start+i*4) done=j=0; // Branch into delay slot
1e973cb0 8876 if(ba[j]==start+i*4+4) done=j=0;
8877 if(ba[j]==start+i*4+8) done=j=0;
8878 }
8879 }
75dec299 8880 //assert(i<MAXBLOCK-1);
57871462 8881 if(start+i*4==pagelimit-4) done=1;
8882 assert(start+i*4<pagelimit);
8883 if (i==MAXBLOCK-1) done=1;
8884 // Stop if we're compiling junk
8885 if(itype[i]==NI&&opcode[i]==0x11) {
8886 done=stop_after_jal=1;
c43b5311 8887 SysPrintf("Disabled speculative precompilation\n");
57871462 8888 }
8889 }
8890 slen=i;
8891 if(itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==RJUMP||itype[i-1]==FJUMP) {
8892 if(start+i*4==pagelimit) {
8893 itype[i-1]=SPAN;
8894 }
8895 }
8896 assert(slen>0);
8897
8898 /* Pass 2 - Register dependencies and branch targets */
8899
8900 unneeded_registers(0,slen-1,0);
8901
8902 /* Pass 3 - Register allocation */
8903
8904 struct regstat current; // Current register allocations/status
8905 current.is32=1;
8906 current.dirty=0;
8907 current.u=unneeded_reg[0];
8908 current.uu=unneeded_reg_upper[0];
8909 clear_all_regs(current.regmap);
8910 alloc_reg(&current,0,CCREG);
8911 dirty_reg(&current,CCREG);
8912 current.isconst=0;
8913 current.wasconst=0;
27727b63 8914 current.waswritten=0;
57871462 8915 int ds=0;
8916 int cc=0;
5194fb95 8917 int hr=-1;
6ebf4adf 8918
8919#ifndef FORCE32
57871462 8920 provisional_32bit();
6ebf4adf 8921#endif
57871462 8922 if((u_int)addr&1) {
8923 // First instruction is delay slot
8924 cc=-1;
8925 bt[1]=1;
8926 ds=1;
8927 unneeded_reg[0]=1;
8928 unneeded_reg_upper[0]=1;
8929 current.regmap[HOST_BTREG]=BTREG;
8930 }
8931
8932 for(i=0;i<slen;i++)
8933 {
8934 if(bt[i])
8935 {
8936 int hr;
8937 for(hr=0;hr<HOST_REGS;hr++)
8938 {
8939 // Is this really necessary?
8940 if(current.regmap[hr]==0) current.regmap[hr]=-1;
8941 }
8942 current.isconst=0;
27727b63 8943 current.waswritten=0;
57871462 8944 }
8945 if(i>1)
8946 {
8947 if((opcode[i-2]&0x2f)==0x05) // BNE/BNEL
8948 {
8949 if(rs1[i-2]==0||rs2[i-2]==0)
8950 {
8951 if(rs1[i-2]) {
8952 current.is32|=1LL<<rs1[i-2];
8953 int hr=get_reg(current.regmap,rs1[i-2]|64);
8954 if(hr>=0) current.regmap[hr]=-1;
8955 }
8956 if(rs2[i-2]) {
8957 current.is32|=1LL<<rs2[i-2];
8958 int hr=get_reg(current.regmap,rs2[i-2]|64);
8959 if(hr>=0) current.regmap[hr]=-1;
8960 }
8961 }
8962 }
8963 }
6ebf4adf 8964#ifndef FORCE32
57871462 8965 // If something jumps here with 64-bit values
8966 // then promote those registers to 64 bits
8967 if(bt[i])
8968 {
8969 uint64_t temp_is32=current.is32;
8970 for(j=i-1;j>=0;j--)
8971 {
8972 if(ba[j]==start+i*4)
8973 temp_is32&=branch_regs[j].is32;
8974 }
8975 for(j=i;j<slen;j++)
8976 {
8977 if(ba[j]==start+i*4)
8978 //temp_is32=1;
8979 temp_is32&=p32[j];
8980 }
8981 if(temp_is32!=current.is32) {
8982 //printf("dumping 32-bit regs (%x)\n",start+i*4);
311301dc 8983 #ifndef DESTRUCTIVE_WRITEBACK
8984 if(ds)
8985 #endif
57871462 8986 for(hr=0;hr<HOST_REGS;hr++)
8987 {
8988 int r=current.regmap[hr];
8989 if(r>0&&r<64)
8990 {
8991 if((current.dirty>>hr)&((current.is32&~temp_is32)>>r)&1) {
8992 temp_is32|=1LL<<r;
8993 //printf("restore %d\n",r);
8994 }
8995 }
8996 }
57871462 8997 current.is32=temp_is32;
8998 }
8999 }
6ebf4adf 9000#else
24385cae 9001 current.is32=-1LL;
9002#endif
9003
57871462 9004 memcpy(regmap_pre[i],current.regmap,sizeof(current.regmap));
9005 regs[i].wasconst=current.isconst;
9006 regs[i].was32=current.is32;
9007 regs[i].wasdirty=current.dirty;
8575a877 9008 regs[i].loadedconst=0;
6ebf4adf 9009 #if defined(DESTRUCTIVE_WRITEBACK) && !defined(FORCE32)
57871462 9010 // To change a dirty register from 32 to 64 bits, we must write
9011 // it out during the previous cycle (for branches, 2 cycles)
9012 if(i<slen-1&&bt[i+1]&&itype[i-1]!=UJUMP&&itype[i-1]!=CJUMP&&itype[i-1]!=SJUMP&&itype[i-1]!=RJUMP&&itype[i-1]!=FJUMP)
9013 {
9014 uint64_t temp_is32=current.is32;
9015 for(j=i-1;j>=0;j--)
9016 {
9017 if(ba[j]==start+i*4+4)
9018 temp_is32&=branch_regs[j].is32;
9019 }
9020 for(j=i;j<slen;j++)
9021 {
9022 if(ba[j]==start+i*4+4)
9023 //temp_is32=1;
9024 temp_is32&=p32[j];
9025 }
9026 if(temp_is32!=current.is32) {
9027 //printf("pre-dumping 32-bit regs (%x)\n",start+i*4);
9028 for(hr=0;hr<HOST_REGS;hr++)
9029 {
9030 int r=current.regmap[hr];
9031 if(r>0)
9032 {
9033 if((current.dirty>>hr)&((current.is32&~temp_is32)>>(r&63))&1) {
9034 if(itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=RJUMP&&itype[i]!=FJUMP)
9035 {
9036 if(rs1[i]!=(r&63)&&rs2[i]!=(r&63))
9037 {
9038 //printf("dump %d/r%d\n",hr,r);
9039 current.regmap[hr]=-1;
9040 if(get_reg(current.regmap,r|64)>=0)
9041 current.regmap[get_reg(current.regmap,r|64)]=-1;
9042 }
9043 }
9044 }
9045 }
9046 }
9047 }
9048 }
9049 else if(i<slen-2&&bt[i+2]&&(source[i-1]>>16)!=0x1000&&(itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP))
9050 {
9051 uint64_t temp_is32=current.is32;
9052 for(j=i-1;j>=0;j--)
9053 {
9054 if(ba[j]==start+i*4+8)
9055 temp_is32&=branch_regs[j].is32;
9056 }
9057 for(j=i;j<slen;j++)
9058 {
9059 if(ba[j]==start+i*4+8)
9060 //temp_is32=1;
9061 temp_is32&=p32[j];
9062 }
9063 if(temp_is32!=current.is32) {
9064 //printf("pre-dumping 32-bit regs (%x)\n",start+i*4);
9065 for(hr=0;hr<HOST_REGS;hr++)
9066 {
9067 int r=current.regmap[hr];
9068 if(r>0)
9069 {
9070 if((current.dirty>>hr)&((current.is32&~temp_is32)>>(r&63))&1) {
9071 if(rs1[i]!=(r&63)&&rs2[i]!=(r&63)&&rs1[i+1]!=(r&63)&&rs2[i+1]!=(r&63))
9072 {
9073 //printf("dump %d/r%d\n",hr,r);
9074 current.regmap[hr]=-1;
9075 if(get_reg(current.regmap,r|64)>=0)
9076 current.regmap[get_reg(current.regmap,r|64)]=-1;
9077 }
9078 }
9079 }
9080 }
9081 }
9082 }
9083 #endif
9084 if(itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=RJUMP&&itype[i]!=FJUMP) {
9085 if(i+1<slen) {
9086 current.u=unneeded_reg[i+1]&~((1LL<<rs1[i])|(1LL<<rs2[i]));
9087 current.uu=unneeded_reg_upper[i+1]&~((1LL<<us1[i])|(1LL<<us2[i]));
9088 if((~current.uu>>rt1[i])&1) current.uu&=~((1LL<<dep1[i])|(1LL<<dep2[i]));
9089 current.u|=1;
9090 current.uu|=1;
9091 } else {
9092 current.u=1;
9093 current.uu=1;
9094 }
9095 } else {
9096 if(i+1<slen) {
9097 current.u=branch_unneeded_reg[i]&~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
9098 current.uu=branch_unneeded_reg_upper[i]&~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
9099 if((~current.uu>>rt1[i+1])&1) current.uu&=~((1LL<<dep1[i+1])|(1LL<<dep2[i+1]));
9100 current.u&=~((1LL<<rs1[i])|(1LL<<rs2[i]));
9101 current.uu&=~((1LL<<us1[i])|(1LL<<us2[i]));
9102 current.u|=1;
9103 current.uu|=1;
c43b5311 9104 } else { SysPrintf("oops, branch at end of block with no delay slot\n");exit(1); }
57871462 9105 }
9106 is_ds[i]=ds;
9107 if(ds) {
9108 ds=0; // Skip delay slot, already allocated as part of branch
9109 // ...but we need to alloc it in case something jumps here
9110 if(i+1<slen) {
9111 current.u=branch_unneeded_reg[i-1]&unneeded_reg[i+1];
9112 current.uu=branch_unneeded_reg_upper[i-1]&unneeded_reg_upper[i+1];
9113 }else{
9114 current.u=branch_unneeded_reg[i-1];
9115 current.uu=branch_unneeded_reg_upper[i-1];
9116 }
9117 current.u&=~((1LL<<rs1[i])|(1LL<<rs2[i]));
9118 current.uu&=~((1LL<<us1[i])|(1LL<<us2[i]));
9119 if((~current.uu>>rt1[i])&1) current.uu&=~((1LL<<dep1[i])|(1LL<<dep2[i]));
9120 current.u|=1;
9121 current.uu|=1;
9122 struct regstat temp;
9123 memcpy(&temp,&current,sizeof(current));
9124 temp.wasdirty=temp.dirty;
9125 temp.was32=temp.is32;
9126 // TODO: Take into account unconditional branches, as below
9127 delayslot_alloc(&temp,i);
9128 memcpy(regs[i].regmap,temp.regmap,sizeof(temp.regmap));
9129 regs[i].wasdirty=temp.wasdirty;
9130 regs[i].was32=temp.was32;
9131 regs[i].dirty=temp.dirty;
9132 regs[i].is32=temp.is32;
9133 regs[i].isconst=0;
9134 regs[i].wasconst=0;
9135 current.isconst=0;
9136 // Create entry (branch target) regmap
9137 for(hr=0;hr<HOST_REGS;hr++)
9138 {
9139 int r=temp.regmap[hr];
9140 if(r>=0) {
9141 if(r!=regmap_pre[i][hr]) {
9142 regs[i].regmap_entry[hr]=-1;
9143 }
9144 else
9145 {
9146 if(r<64){
9147 if((current.u>>r)&1) {
9148 regs[i].regmap_entry[hr]=-1;
9149 regs[i].regmap[hr]=-1;
9150 //Don't clear regs in the delay slot as the branch might need them
9151 //current.regmap[hr]=-1;
9152 }else
9153 regs[i].regmap_entry[hr]=r;
9154 }
9155 else {
9156 if((current.uu>>(r&63))&1) {
9157 regs[i].regmap_entry[hr]=-1;
9158 regs[i].regmap[hr]=-1;
9159 //Don't clear regs in the delay slot as the branch might need them
9160 //current.regmap[hr]=-1;
9161 }else
9162 regs[i].regmap_entry[hr]=r;
9163 }
9164 }
9165 } else {
9166 // First instruction expects CCREG to be allocated
9167 if(i==0&&hr==HOST_CCREG)
9168 regs[i].regmap_entry[hr]=CCREG;
9169 else
9170 regs[i].regmap_entry[hr]=-1;
9171 }
9172 }
9173 }
9174 else { // Not delay slot
9175 switch(itype[i]) {
9176 case UJUMP:
9177 //current.isconst=0; // DEBUG
9178 //current.wasconst=0; // DEBUG
9179 //regs[i].wasconst=0; // DEBUG
9180 clear_const(&current,rt1[i]);
9181 alloc_cc(&current,i);
9182 dirty_reg(&current,CCREG);
9183 if (rt1[i]==31) {
9184 alloc_reg(&current,i,31);
9185 dirty_reg(&current,31);
4ef8f67d 9186 //assert(rs1[i+1]!=31&&rs2[i+1]!=31);
9187 //assert(rt1[i+1]!=rt1[i]);
57871462 9188 #ifdef REG_PREFETCH
9189 alloc_reg(&current,i,PTEMP);
9190 #endif
9191 //current.is32|=1LL<<rt1[i];
9192 }
269bb29a 9193 ooo[i]=1;
9194 delayslot_alloc(&current,i+1);
57871462 9195 //current.isconst=0; // DEBUG
9196 ds=1;
9197 //printf("i=%d, isconst=%x\n",i,current.isconst);
9198 break;
9199 case RJUMP:
9200 //current.isconst=0;
9201 //current.wasconst=0;
9202 //regs[i].wasconst=0;
9203 clear_const(&current,rs1[i]);
9204 clear_const(&current,rt1[i]);
9205 alloc_cc(&current,i);
9206 dirty_reg(&current,CCREG);
9207 if(rs1[i]!=rt1[i+1]&&rs1[i]!=rt2[i+1]) {
9208 alloc_reg(&current,i,rs1[i]);
5067f341 9209 if (rt1[i]!=0) {
9210 alloc_reg(&current,i,rt1[i]);
9211 dirty_reg(&current,rt1[i]);
68b3faee 9212 assert(rs1[i+1]!=rt1[i]&&rs2[i+1]!=rt1[i]);
076655d1 9213 assert(rt1[i+1]!=rt1[i]);
57871462 9214 #ifdef REG_PREFETCH
9215 alloc_reg(&current,i,PTEMP);
9216 #endif
9217 }
9218 #ifdef USE_MINI_HT
9219 if(rs1[i]==31) { // JALR
9220 alloc_reg(&current,i,RHASH);
9221 #ifndef HOST_IMM_ADDR32
9222 alloc_reg(&current,i,RHTBL);
9223 #endif
9224 }
9225 #endif
9226 delayslot_alloc(&current,i+1);
9227 } else {
9228 // The delay slot overwrites our source register,
9229 // allocate a temporary register to hold the old value.
9230 current.isconst=0;
9231 current.wasconst=0;
9232 regs[i].wasconst=0;
9233 delayslot_alloc(&current,i+1);
9234 current.isconst=0;
9235 alloc_reg(&current,i,RTEMP);
9236 }
9237 //current.isconst=0; // DEBUG
e1190b87 9238 ooo[i]=1;
57871462 9239 ds=1;
9240 break;
9241 case CJUMP:
9242 //current.isconst=0;
9243 //current.wasconst=0;
9244 //regs[i].wasconst=0;
9245 clear_const(&current,rs1[i]);
9246 clear_const(&current,rs2[i]);
9247 if((opcode[i]&0x3E)==4) // BEQ/BNE
9248 {
9249 alloc_cc(&current,i);
9250 dirty_reg(&current,CCREG);
9251 if(rs1[i]) alloc_reg(&current,i,rs1[i]);
9252 if(rs2[i]) alloc_reg(&current,i,rs2[i]);
9253 if(!((current.is32>>rs1[i])&(current.is32>>rs2[i])&1))
9254 {
9255 if(rs1[i]) alloc_reg64(&current,i,rs1[i]);
9256 if(rs2[i]) alloc_reg64(&current,i,rs2[i]);
9257 }
9258 if((rs1[i]&&(rs1[i]==rt1[i+1]||rs1[i]==rt2[i+1]))||
9259 (rs2[i]&&(rs2[i]==rt1[i+1]||rs2[i]==rt2[i+1]))) {
9260 // The delay slot overwrites one of our conditions.
9261 // Allocate the branch condition registers instead.
57871462 9262 current.isconst=0;
9263 current.wasconst=0;
9264 regs[i].wasconst=0;
9265 if(rs1[i]) alloc_reg(&current,i,rs1[i]);
9266 if(rs2[i]) alloc_reg(&current,i,rs2[i]);
9267 if(!((current.is32>>rs1[i])&(current.is32>>rs2[i])&1))
9268 {
9269 if(rs1[i]) alloc_reg64(&current,i,rs1[i]);
9270 if(rs2[i]) alloc_reg64(&current,i,rs2[i]);
9271 }
9272 }
e1190b87 9273 else
9274 {
9275 ooo[i]=1;
9276 delayslot_alloc(&current,i+1);
9277 }
57871462 9278 }
9279 else
9280 if((opcode[i]&0x3E)==6) // BLEZ/BGTZ
9281 {
9282 alloc_cc(&current,i);
9283 dirty_reg(&current,CCREG);
9284 alloc_reg(&current,i,rs1[i]);
9285 if(!(current.is32>>rs1[i]&1))
9286 {
9287 alloc_reg64(&current,i,rs1[i]);
9288 }
9289 if(rs1[i]&&(rs1[i]==rt1[i+1]||rs1[i]==rt2[i+1])) {
9290 // The delay slot overwrites one of our conditions.
9291 // Allocate the branch condition registers instead.
57871462 9292 current.isconst=0;
9293 current.wasconst=0;
9294 regs[i].wasconst=0;
9295 if(rs1[i]) alloc_reg(&current,i,rs1[i]);
9296 if(!((current.is32>>rs1[i])&1))
9297 {
9298 if(rs1[i]) alloc_reg64(&current,i,rs1[i]);
9299 }
9300 }
e1190b87 9301 else
9302 {
9303 ooo[i]=1;
9304 delayslot_alloc(&current,i+1);
9305 }
57871462 9306 }
9307 else
9308 // Don't alloc the delay slot yet because we might not execute it
9309 if((opcode[i]&0x3E)==0x14) // BEQL/BNEL
9310 {
9311 current.isconst=0;
9312 current.wasconst=0;
9313 regs[i].wasconst=0;
9314 alloc_cc(&current,i);
9315 dirty_reg(&current,CCREG);
9316 alloc_reg(&current,i,rs1[i]);
9317 alloc_reg(&current,i,rs2[i]);
9318 if(!((current.is32>>rs1[i])&(current.is32>>rs2[i])&1))
9319 {
9320 alloc_reg64(&current,i,rs1[i]);
9321 alloc_reg64(&current,i,rs2[i]);
9322 }
9323 }
9324 else
9325 if((opcode[i]&0x3E)==0x16) // BLEZL/BGTZL
9326 {
9327 current.isconst=0;
9328 current.wasconst=0;
9329 regs[i].wasconst=0;
9330 alloc_cc(&current,i);
9331 dirty_reg(&current,CCREG);
9332 alloc_reg(&current,i,rs1[i]);
9333 if(!(current.is32>>rs1[i]&1))
9334 {
9335 alloc_reg64(&current,i,rs1[i]);
9336 }
9337 }
9338 ds=1;
9339 //current.isconst=0;
9340 break;
9341 case SJUMP:
9342 //current.isconst=0;
9343 //current.wasconst=0;
9344 //regs[i].wasconst=0;
9345 clear_const(&current,rs1[i]);
9346 clear_const(&current,rt1[i]);
9347 //if((opcode2[i]&0x1E)==0x0) // BLTZ/BGEZ
9348 if((opcode2[i]&0x0E)==0x0) // BLTZ/BGEZ
9349 {
9350 alloc_cc(&current,i);
9351 dirty_reg(&current,CCREG);
9352 alloc_reg(&current,i,rs1[i]);
9353 if(!(current.is32>>rs1[i]&1))
9354 {
9355 alloc_reg64(&current,i,rs1[i]);
9356 }
9357 if (rt1[i]==31) { // BLTZAL/BGEZAL
9358 alloc_reg(&current,i,31);
9359 dirty_reg(&current,31);
57871462 9360 //#ifdef REG_PREFETCH
9361 //alloc_reg(&current,i,PTEMP);
9362 //#endif
9363 //current.is32|=1LL<<rt1[i];
9364 }
e1190b87 9365 if((rs1[i]&&(rs1[i]==rt1[i+1]||rs1[i]==rt2[i+1])) // The delay slot overwrites the branch condition.
9366 ||(rt1[i]==31&&(rs1[i+1]==31||rs2[i+1]==31||rt1[i+1]==31||rt2[i+1]==31))) { // DS touches $ra
57871462 9367 // Allocate the branch condition registers instead.
57871462 9368 current.isconst=0;
9369 current.wasconst=0;
9370 regs[i].wasconst=0;
9371 if(rs1[i]) alloc_reg(&current,i,rs1[i]);
9372 if(!((current.is32>>rs1[i])&1))
9373 {
9374 if(rs1[i]) alloc_reg64(&current,i,rs1[i]);
9375 }
9376 }
e1190b87 9377 else
9378 {
9379 ooo[i]=1;
9380 delayslot_alloc(&current,i+1);
9381 }
57871462 9382 }
9383 else
9384 // Don't alloc the delay slot yet because we might not execute it
9385 if((opcode2[i]&0x1E)==0x2) // BLTZL/BGEZL
9386 {
9387 current.isconst=0;
9388 current.wasconst=0;
9389 regs[i].wasconst=0;
9390 alloc_cc(&current,i);
9391 dirty_reg(&current,CCREG);
9392 alloc_reg(&current,i,rs1[i]);
9393 if(!(current.is32>>rs1[i]&1))
9394 {
9395 alloc_reg64(&current,i,rs1[i]);
9396 }
9397 }
9398 ds=1;
9399 //current.isconst=0;
9400 break;
9401 case FJUMP:
9402 current.isconst=0;
9403 current.wasconst=0;
9404 regs[i].wasconst=0;
9405 if(likely[i]==0) // BC1F/BC1T
9406 {
9407 // TODO: Theoretically we can run out of registers here on x86.
9408 // The delay slot can allocate up to six, and we need to check
9409 // CSREG before executing the delay slot. Possibly we can drop
9410 // the cycle count and then reload it after checking that the
9411 // FPU is in a usable state, or don't do out-of-order execution.
9412 alloc_cc(&current,i);
9413 dirty_reg(&current,CCREG);
9414 alloc_reg(&current,i,FSREG);
9415 alloc_reg(&current,i,CSREG);
9416 if(itype[i+1]==FCOMP) {
9417 // The delay slot overwrites the branch condition.
9418 // Allocate the branch condition registers instead.
57871462 9419 alloc_cc(&current,i);
9420 dirty_reg(&current,CCREG);
9421 alloc_reg(&current,i,CSREG);
9422 alloc_reg(&current,i,FSREG);
9423 }
9424 else {
e1190b87 9425 ooo[i]=1;
57871462 9426 delayslot_alloc(&current,i+1);
9427 alloc_reg(&current,i+1,CSREG);
9428 }
9429 }
9430 else
9431 // Don't alloc the delay slot yet because we might not execute it
9432 if(likely[i]) // BC1FL/BC1TL
9433 {
9434 alloc_cc(&current,i);
9435 dirty_reg(&current,CCREG);
9436 alloc_reg(&current,i,CSREG);
9437 alloc_reg(&current,i,FSREG);
9438 }
9439 ds=1;
9440 current.isconst=0;
9441 break;
9442 case IMM16:
9443 imm16_alloc(&current,i);
9444 break;
9445 case LOAD:
9446 case LOADLR:
9447 load_alloc(&current,i);
9448 break;
9449 case STORE:
9450 case STORELR:
9451 store_alloc(&current,i);
9452 break;
9453 case ALU:
9454 alu_alloc(&current,i);
9455 break;
9456 case SHIFT:
9457 shift_alloc(&current,i);
9458 break;
9459 case MULTDIV:
9460 multdiv_alloc(&current,i);
9461 break;
9462 case SHIFTIMM:
9463 shiftimm_alloc(&current,i);
9464 break;
9465 case MOV:
9466 mov_alloc(&current,i);
9467 break;
9468 case COP0:
9469 cop0_alloc(&current,i);
9470 break;
9471 case COP1:
b9b61529 9472 case COP2:
57871462 9473 cop1_alloc(&current,i);
9474 break;
9475 case C1LS:
9476 c1ls_alloc(&current,i);
9477 break;
b9b61529 9478 case C2LS:
9479 c2ls_alloc(&current,i);
9480 break;
9481 case C2OP:
9482 c2op_alloc(&current,i);
9483 break;
57871462 9484 case FCONV:
9485 fconv_alloc(&current,i);
9486 break;
9487 case FLOAT:
9488 float_alloc(&current,i);
9489 break;
9490 case FCOMP:
9491 fcomp_alloc(&current,i);
9492 break;
9493 case SYSCALL:
7139f3c8 9494 case HLECALL:
1e973cb0 9495 case INTCALL:
57871462 9496 syscall_alloc(&current,i);
9497 break;
9498 case SPAN:
9499 pagespan_alloc(&current,i);
9500 break;
9501 }
9502
9503 // Drop the upper half of registers that have become 32-bit
9504 current.uu|=current.is32&((1LL<<rt1[i])|(1LL<<rt2[i]));
9505 if(itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=RJUMP&&itype[i]!=FJUMP) {
9506 current.uu&=~((1LL<<us1[i])|(1LL<<us2[i]));
9507 if((~current.uu>>rt1[i])&1) current.uu&=~((1LL<<dep1[i])|(1LL<<dep2[i]));
9508 current.uu|=1;
9509 } else {
9510 current.uu|=current.is32&((1LL<<rt1[i+1])|(1LL<<rt2[i+1]));
9511 current.uu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
9512 if((~current.uu>>rt1[i+1])&1) current.uu&=~((1LL<<dep1[i+1])|(1LL<<dep2[i+1]));
9513 current.uu&=~((1LL<<us1[i])|(1LL<<us2[i]));
9514 current.uu|=1;
9515 }
9516
9517 // Create entry (branch target) regmap
9518 for(hr=0;hr<HOST_REGS;hr++)
9519 {
9520 int r,or,er;
9521 r=current.regmap[hr];
9522 if(r>=0) {
9523 if(r!=regmap_pre[i][hr]) {
9524 // TODO: delay slot (?)
9525 or=get_reg(regmap_pre[i],r); // Get old mapping for this register
9526 if(or<0||(r&63)>=TEMPREG){
9527 regs[i].regmap_entry[hr]=-1;
9528 }
9529 else
9530 {
9531 // Just move it to a different register
9532 regs[i].regmap_entry[hr]=r;
9533 // If it was dirty before, it's still dirty
9534 if((regs[i].wasdirty>>or)&1) dirty_reg(&current,r&63);
9535 }
9536 }
9537 else
9538 {
9539 // Unneeded
9540 if(r==0){
9541 regs[i].regmap_entry[hr]=0;
9542 }
9543 else
9544 if(r<64){
9545 if((current.u>>r)&1) {
9546 regs[i].regmap_entry[hr]=-1;
9547 //regs[i].regmap[hr]=-1;
9548 current.regmap[hr]=-1;
9549 }else
9550 regs[i].regmap_entry[hr]=r;
9551 }
9552 else {
9553 if((current.uu>>(r&63))&1) {
9554 regs[i].regmap_entry[hr]=-1;
9555 //regs[i].regmap[hr]=-1;
9556 current.regmap[hr]=-1;
9557 }else
9558 regs[i].regmap_entry[hr]=r;
9559 }
9560 }
9561 } else {
9562 // Branches expect CCREG to be allocated at the target
9563 if(regmap_pre[i][hr]==CCREG)
9564 regs[i].regmap_entry[hr]=CCREG;
9565 else
9566 regs[i].regmap_entry[hr]=-1;
9567 }
9568 }
9569 memcpy(regs[i].regmap,current.regmap,sizeof(current.regmap));
9570 }
27727b63 9571
9572 if(i>0&&(itype[i-1]==STORE||itype[i-1]==STORELR||(itype[i-1]==C2LS&&opcode[i-1]==0x3a))&&(u_int)imm[i-1]<0x800)
9573 current.waswritten|=1<<rs1[i-1];
9574 current.waswritten&=~(1<<rt1[i]);
9575 current.waswritten&=~(1<<rt2[i]);
9576 if((itype[i]==STORE||itype[i]==STORELR||(itype[i]==C2LS&&opcode[i]==0x3a))&&(u_int)imm[i]>=0x800)
9577 current.waswritten&=~(1<<rs1[i]);
9578
57871462 9579 /* Branch post-alloc */
9580 if(i>0)
9581 {
9582 current.was32=current.is32;
9583 current.wasdirty=current.dirty;
9584 switch(itype[i-1]) {
9585 case UJUMP:
9586 memcpy(&branch_regs[i-1],&current,sizeof(current));
9587 branch_regs[i-1].isconst=0;
9588 branch_regs[i-1].wasconst=0;
9589 branch_regs[i-1].u=branch_unneeded_reg[i-1]&~((1LL<<rs1[i-1])|(1LL<<rs2[i-1]));
9590 branch_regs[i-1].uu=branch_unneeded_reg_upper[i-1]&~((1LL<<us1[i-1])|(1LL<<us2[i-1]));
9591 alloc_cc(&branch_regs[i-1],i-1);
9592 dirty_reg(&branch_regs[i-1],CCREG);
9593 if(rt1[i-1]==31) { // JAL
9594 alloc_reg(&branch_regs[i-1],i-1,31);
9595 dirty_reg(&branch_regs[i-1],31);
9596 branch_regs[i-1].is32|=1LL<<31;
9597 }
9598 memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap));
956f3129 9599 memcpy(constmap[i],constmap[i-1],sizeof(current_constmap));
57871462 9600 break;
9601 case RJUMP:
9602 memcpy(&branch_regs[i-1],&current,sizeof(current));
9603 branch_regs[i-1].isconst=0;
9604 branch_regs[i-1].wasconst=0;
9605 branch_regs[i-1].u=branch_unneeded_reg[i-1]&~((1LL<<rs1[i-1])|(1LL<<rs2[i-1]));
9606 branch_regs[i-1].uu=branch_unneeded_reg_upper[i-1]&~((1LL<<us1[i-1])|(1LL<<us2[i-1]));
9607 alloc_cc(&branch_regs[i-1],i-1);
9608 dirty_reg(&branch_regs[i-1],CCREG);
9609 alloc_reg(&branch_regs[i-1],i-1,rs1[i-1]);
5067f341 9610 if(rt1[i-1]!=0) { // JALR
9611 alloc_reg(&branch_regs[i-1],i-1,rt1[i-1]);
9612 dirty_reg(&branch_regs[i-1],rt1[i-1]);
9613 branch_regs[i-1].is32|=1LL<<rt1[i-1];
57871462 9614 }
9615 #ifdef USE_MINI_HT
9616 if(rs1[i-1]==31) { // JALR
9617 alloc_reg(&branch_regs[i-1],i-1,RHASH);
9618 #ifndef HOST_IMM_ADDR32
9619 alloc_reg(&branch_regs[i-1],i-1,RHTBL);
9620 #endif
9621 }
9622 #endif
9623 memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap));
956f3129 9624 memcpy(constmap[i],constmap[i-1],sizeof(current_constmap));
57871462 9625 break;
9626 case CJUMP:
9627 if((opcode[i-1]&0x3E)==4) // BEQ/BNE
9628 {
9629 alloc_cc(&current,i-1);
9630 dirty_reg(&current,CCREG);
9631 if((rs1[i-1]&&(rs1[i-1]==rt1[i]||rs1[i-1]==rt2[i]))||
9632 (rs2[i-1]&&(rs2[i-1]==rt1[i]||rs2[i-1]==rt2[i]))) {
9633 // The delay slot overwrote one of our conditions
9634 // Delay slot goes after the test (in order)
9635 current.u=branch_unneeded_reg[i-1]&~((1LL<<rs1[i])|(1LL<<rs2[i]));
9636 current.uu=branch_unneeded_reg_upper[i-1]&~((1LL<<us1[i])|(1LL<<us2[i]));
9637 if((~current.uu>>rt1[i])&1) current.uu&=~((1LL<<dep1[i])|(1LL<<dep2[i]));
9638 current.u|=1;
9639 current.uu|=1;
9640 delayslot_alloc(&current,i);
9641 current.isconst=0;
9642 }
9643 else
9644 {
9645 current.u=branch_unneeded_reg[i-1]&~((1LL<<rs1[i-1])|(1LL<<rs2[i-1]));
9646 current.uu=branch_unneeded_reg_upper[i-1]&~((1LL<<us1[i-1])|(1LL<<us2[i-1]));
9647 // Alloc the branch condition registers
9648 if(rs1[i-1]) alloc_reg(&current,i-1,rs1[i-1]);
9649 if(rs2[i-1]) alloc_reg(&current,i-1,rs2[i-1]);
9650 if(!((current.is32>>rs1[i-1])&(current.is32>>rs2[i-1])&1))
9651 {
9652 if(rs1[i-1]) alloc_reg64(&current,i-1,rs1[i-1]);
9653 if(rs2[i-1]) alloc_reg64(&current,i-1,rs2[i-1]);
9654 }
9655 }
9656 memcpy(&branch_regs[i-1],&current,sizeof(current));
9657 branch_regs[i-1].isconst=0;
9658 branch_regs[i-1].wasconst=0;
9659 memcpy(&branch_regs[i-1].regmap_entry,&current.regmap,sizeof(current.regmap));
956f3129 9660 memcpy(constmap[i],constmap[i-1],sizeof(current_constmap));
57871462 9661 }
9662 else
9663 if((opcode[i-1]&0x3E)==6) // BLEZ/BGTZ
9664 {
9665 alloc_cc(&current,i-1);
9666 dirty_reg(&current,CCREG);
9667 if(rs1[i-1]==rt1[i]||rs1[i-1]==rt2[i]) {
9668 // The delay slot overwrote the branch condition
9669 // Delay slot goes after the test (in order)
9670 current.u=branch_unneeded_reg[i-1]&~((1LL<<rs1[i])|(1LL<<rs2[i]));
9671 current.uu=branch_unneeded_reg_upper[i-1]&~((1LL<<us1[i])|(1LL<<us2[i]));
9672 if((~current.uu>>rt1[i])&1) current.uu&=~((1LL<<dep1[i])|(1LL<<dep2[i]));
9673 current.u|=1;
9674 current.uu|=1;
9675 delayslot_alloc(&current,i);
9676 current.isconst=0;
9677 }
9678 else
9679 {
9680 current.u=branch_unneeded_reg[i-1]&~(1LL<<rs1[i-1]);
9681 current.uu=branch_unneeded_reg_upper[i-1]&~(1LL<<us1[i-1]);
9682 // Alloc the branch condition register
9683 alloc_reg(&current,i-1,rs1[i-1]);
9684 if(!(current.is32>>rs1[i-1]&1))
9685 {
9686 alloc_reg64(&current,i-1,rs1[i-1]);
9687 }
9688 }
9689 memcpy(&branch_regs[i-1],&current,sizeof(current));
9690 branch_regs[i-1].isconst=0;
9691 branch_regs[i-1].wasconst=0;
9692 memcpy(&branch_regs[i-1].regmap_entry,&current.regmap,sizeof(current.regmap));
956f3129 9693 memcpy(constmap[i],constmap[i-1],sizeof(current_constmap));
57871462 9694 }
9695 else
9696 // Alloc the delay slot in case the branch is taken
9697 if((opcode[i-1]&0x3E)==0x14) // BEQL/BNEL
9698 {
9699 memcpy(&branch_regs[i-1],&current,sizeof(current));
9700 branch_regs[i-1].u=(branch_unneeded_reg[i-1]&~((1LL<<rs1[i])|(1LL<<rs2[i])|(1LL<<rt1[i])|(1LL<<rt2[i])))|1;
9701 branch_regs[i-1].uu=(branch_unneeded_reg_upper[i-1]&~((1LL<<us1[i])|(1LL<<us2[i])|(1LL<<rt1[i])|(1LL<<rt2[i])))|1;
9702 if((~branch_regs[i-1].uu>>rt1[i])&1) branch_regs[i-1].uu&=~((1LL<<dep1[i])|(1LL<<dep2[i]))|1;
9703 alloc_cc(&branch_regs[i-1],i);
9704 dirty_reg(&branch_regs[i-1],CCREG);
9705 delayslot_alloc(&branch_regs[i-1],i);
9706 branch_regs[i-1].isconst=0;
9707 alloc_reg(&current,i,CCREG); // Not taken path
9708 dirty_reg(&current,CCREG);
9709 memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap));
9710 }
9711 else
9712 if((opcode[i-1]&0x3E)==0x16) // BLEZL/BGTZL
9713 {
9714 memcpy(&branch_regs[i-1],&current,sizeof(current));
9715 branch_regs[i-1].u=(branch_unneeded_reg[i-1]&~((1LL<<rs1[i])|(1LL<<rs2[i])|(1LL<<rt1[i])|(1LL<<rt2[i])))|1;
9716 branch_regs[i-1].uu=(branch_unneeded_reg_upper[i-1]&~((1LL<<us1[i])|(1LL<<us2[i])|(1LL<<rt1[i])|(1LL<<rt2[i])))|1;
9717 if((~branch_regs[i-1].uu>>rt1[i])&1) branch_regs[i-1].uu&=~((1LL<<dep1[i])|(1LL<<dep2[i]))|1;
9718 alloc_cc(&branch_regs[i-1],i);
9719 dirty_reg(&branch_regs[i-1],CCREG);
9720 delayslot_alloc(&branch_regs[i-1],i);
9721 branch_regs[i-1].isconst=0;
9722 alloc_reg(&current,i,CCREG); // Not taken path
9723 dirty_reg(&current,CCREG);
9724 memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap));
9725 }
9726 break;
9727 case SJUMP:
9728 //if((opcode2[i-1]&0x1E)==0) // BLTZ/BGEZ
9729 if((opcode2[i-1]&0x0E)==0) // BLTZ/BGEZ
9730 {
9731 alloc_cc(&current,i-1);
9732 dirty_reg(&current,CCREG);
9733 if(rs1[i-1]==rt1[i]||rs1[i-1]==rt2[i]) {
9734 // The delay slot overwrote the branch condition
9735 // Delay slot goes after the test (in order)
9736 current.u=branch_unneeded_reg[i-1]&~((1LL<<rs1[i])|(1LL<<rs2[i]));
9737 current.uu=branch_unneeded_reg_upper[i-1]&~((1LL<<us1[i])|(1LL<<us2[i]));
9738 if((~current.uu>>rt1[i])&1) current.uu&=~((1LL<<dep1[i])|(1LL<<dep2[i]));
9739 current.u|=1;
9740 current.uu|=1;
9741 delayslot_alloc(&current,i);
9742 current.isconst=0;
9743 }
9744 else
9745 {
9746 current.u=branch_unneeded_reg[i-1]&~(1LL<<rs1[i-1]);
9747 current.uu=branch_unneeded_reg_upper[i-1]&~(1LL<<us1[i-1]);
9748 // Alloc the branch condition register
9749 alloc_reg(&current,i-1,rs1[i-1]);
9750 if(!(current.is32>>rs1[i-1]&1))
9751 {
9752 alloc_reg64(&current,i-1,rs1[i-1]);
9753 }
9754 }
9755 memcpy(&branch_regs[i-1],&current,sizeof(current));
9756 branch_regs[i-1].isconst=0;
9757 branch_regs[i-1].wasconst=0;
9758 memcpy(&branch_regs[i-1].regmap_entry,&current.regmap,sizeof(current.regmap));
956f3129 9759 memcpy(constmap[i],constmap[i-1],sizeof(current_constmap));
57871462 9760 }
9761 else
9762 // Alloc the delay slot in case the branch is taken
9763 if((opcode2[i-1]&0x1E)==2) // BLTZL/BGEZL
9764 {
9765 memcpy(&branch_regs[i-1],&current,sizeof(current));
9766 branch_regs[i-1].u=(branch_unneeded_reg[i-1]&~((1LL<<rs1[i])|(1LL<<rs2[i])|(1LL<<rt1[i])|(1LL<<rt2[i])))|1;
9767 branch_regs[i-1].uu=(branch_unneeded_reg_upper[i-1]&~((1LL<<us1[i])|(1LL<<us2[i])|(1LL<<rt1[i])|(1LL<<rt2[i])))|1;
9768 if((~branch_regs[i-1].uu>>rt1[i])&1) branch_regs[i-1].uu&=~((1LL<<dep1[i])|(1LL<<dep2[i]))|1;
9769 alloc_cc(&branch_regs[i-1],i);
9770 dirty_reg(&branch_regs[i-1],CCREG);
9771 delayslot_alloc(&branch_regs[i-1],i);
9772 branch_regs[i-1].isconst=0;
9773 alloc_reg(&current,i,CCREG); // Not taken path
9774 dirty_reg(&current,CCREG);
9775 memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap));
9776 }
9777 // FIXME: BLTZAL/BGEZAL
9778 if(opcode2[i-1]&0x10) { // BxxZAL
9779 alloc_reg(&branch_regs[i-1],i-1,31);
9780 dirty_reg(&branch_regs[i-1],31);
9781 branch_regs[i-1].is32|=1LL<<31;
9782 }
9783 break;
9784 case FJUMP:
9785 if(likely[i-1]==0) // BC1F/BC1T
9786 {
9787 alloc_cc(&current,i-1);
9788 dirty_reg(&current,CCREG);
9789 if(itype[i]==FCOMP) {
9790 // The delay slot overwrote the branch condition
9791 // Delay slot goes after the test (in order)
9792 delayslot_alloc(&current,i);
9793 current.isconst=0;
9794 }
9795 else
9796 {
9797 current.u=branch_unneeded_reg[i-1]&~(1LL<<rs1[i-1]);
9798 current.uu=branch_unneeded_reg_upper[i-1]&~(1LL<<us1[i-1]);
9799 // Alloc the branch condition register
9800 alloc_reg(&current,i-1,FSREG);
9801 }
9802 memcpy(&branch_regs[i-1],&current,sizeof(current));
9803 memcpy(&branch_regs[i-1].regmap_entry,&current.regmap,sizeof(current.regmap));
9804 }
9805 else // BC1FL/BC1TL
9806 {
9807 // Alloc the delay slot in case the branch is taken
9808 memcpy(&branch_regs[i-1],&current,sizeof(current));
9809 branch_regs[i-1].u=(branch_unneeded_reg[i-1]&~((1LL<<rs1[i])|(1LL<<rs2[i])|(1LL<<rt1[i])|(1LL<<rt2[i])))|1;
9810 branch_regs[i-1].uu=(branch_unneeded_reg_upper[i-1]&~((1LL<<us1[i])|(1LL<<us2[i])|(1LL<<rt1[i])|(1LL<<rt2[i])))|1;
9811 if((~branch_regs[i-1].uu>>rt1[i])&1) branch_regs[i-1].uu&=~((1LL<<dep1[i])|(1LL<<dep2[i]))|1;
9812 alloc_cc(&branch_regs[i-1],i);
9813 dirty_reg(&branch_regs[i-1],CCREG);
9814 delayslot_alloc(&branch_regs[i-1],i);
9815 branch_regs[i-1].isconst=0;
9816 alloc_reg(&current,i,CCREG); // Not taken path
9817 dirty_reg(&current,CCREG);
9818 memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap));
9819 }
9820 break;
9821 }
9822
9823 if(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000)
9824 {
9825 if(rt1[i-1]==31) // JAL/JALR
9826 {
9827 // Subroutine call will return here, don't alloc any registers
9828 current.is32=1;
9829 current.dirty=0;
9830 clear_all_regs(current.regmap);
9831 alloc_reg(&current,i,CCREG);
9832 dirty_reg(&current,CCREG);
9833 }
9834 else if(i+1<slen)
9835 {
9836 // Internal branch will jump here, match registers to caller
9837 current.is32=0x3FFFFFFFFLL;
9838 current.dirty=0;
9839 clear_all_regs(current.regmap);
9840 alloc_reg(&current,i,CCREG);
9841 dirty_reg(&current,CCREG);
9842 for(j=i-1;j>=0;j--)
9843 {
9844 if(ba[j]==start+i*4+4) {
9845 memcpy(current.regmap,branch_regs[j].regmap,sizeof(current.regmap));
9846 current.is32=branch_regs[j].is32;
9847 current.dirty=branch_regs[j].dirty;
9848 break;
9849 }
9850 }
9851 while(j>=0) {
9852 if(ba[j]==start+i*4+4) {
9853 for(hr=0;hr<HOST_REGS;hr++) {
9854 if(current.regmap[hr]!=branch_regs[j].regmap[hr]) {
9855 current.regmap[hr]=-1;
9856 }
9857 current.is32&=branch_regs[j].is32;
9858 current.dirty&=branch_regs[j].dirty;
9859 }
9860 }
9861 j--;
9862 }
9863 }
9864 }
9865 }
9866
9867 // Count cycles in between branches
9868 ccadj[i]=cc;
7139f3c8 9869 if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP||itype[i]==SYSCALL||itype[i]==HLECALL))
57871462 9870 {
9871 cc=0;
9872 }
19776aef 9873#if defined(PCSX) && !defined(DRC_DBG)
054175e9 9874 else if(itype[i]==C2OP&&gte_cycletab[source[i]&0x3f]>2)
9875 {
9876 // GTE runs in parallel until accessed, divide by 2 for a rough guess
9877 cc+=gte_cycletab[source[i]&0x3f]/2;
9878 }
b6e87b2b 9879 else if(/*itype[i]==LOAD||itype[i]==STORE||*/itype[i]==C1LS) // load,store causes weird timing issues
fb407447 9880 {
9881 cc+=2; // 2 cycle penalty (after CLOCK_DIVIDER)
9882 }
9883 else if(itype[i]==C2LS)
9884 {
9885 cc+=4;
9886 }
9887#endif
57871462 9888 else
9889 {
9890 cc++;
9891 }
9892
9893 flush_dirty_uppers(&current);
9894 if(!is_ds[i]) {
9895 regs[i].is32=current.is32;
9896 regs[i].dirty=current.dirty;
9897 regs[i].isconst=current.isconst;
956f3129 9898 memcpy(constmap[i],current_constmap,sizeof(current_constmap));
57871462 9899 }
9900 for(hr=0;hr<HOST_REGS;hr++) {
9901 if(hr!=EXCLUDE_REG&&regs[i].regmap[hr]>=0) {
9902 if(regmap_pre[i][hr]!=regs[i].regmap[hr]) {
9903 regs[i].wasconst&=~(1<<hr);
9904 }
9905 }
9906 }
9907 if(current.regmap[HOST_BTREG]==BTREG) current.regmap[HOST_BTREG]=-1;
27727b63 9908 regs[i].waswritten=current.waswritten;
57871462 9909 }
9910
9911 /* Pass 4 - Cull unused host registers */
9912
9913 uint64_t nr=0;
9914
9915 for (i=slen-1;i>=0;i--)
9916 {
9917 int hr;
9918 if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
9919 {
9920 if(ba[i]<start || ba[i]>=(start+slen*4))
9921 {
9922 // Branch out of this block, don't need anything
9923 nr=0;
9924 }
9925 else
9926 {
9927 // Internal branch
9928 // Need whatever matches the target
9929 nr=0;
9930 int t=(ba[i]-start)>>2;
9931 for(hr=0;hr<HOST_REGS;hr++)
9932 {
9933 if(regs[i].regmap_entry[hr]>=0) {
9934 if(regs[i].regmap_entry[hr]==regs[t].regmap_entry[hr]) nr|=1<<hr;
9935 }
9936 }
9937 }
9938 // Conditional branch may need registers for following instructions
9939 if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&(source[i]>>16)!=0x1000)
9940 {
9941 if(i<slen-2) {
9942 nr|=needed_reg[i+2];
9943 for(hr=0;hr<HOST_REGS;hr++)
9944 {
9945 if(regmap_pre[i+2][hr]>=0&&get_reg(regs[i+2].regmap_entry,regmap_pre[i+2][hr])<0) nr&=~(1<<hr);
9946 //if((regmap_entry[i+2][hr])>=0) if(!((nr>>hr)&1)) printf("%x-bogus(%d=%d)\n",start+i*4,hr,regmap_entry[i+2][hr]);
9947 }
9948 }
9949 }
9950 // Don't need stuff which is overwritten
f5955059 9951 //if(regs[i].regmap[hr]!=regmap_pre[i][hr]) nr&=~(1<<hr);
9952 //if(regs[i].regmap[hr]<0) nr&=~(1<<hr);
57871462 9953 // Merge in delay slot
9954 for(hr=0;hr<HOST_REGS;hr++)
9955 {
9956 if(!likely[i]) {
9957 // These are overwritten unless the branch is "likely"
9958 // and the delay slot is nullified if not taken
9959 if(rt1[i+1]&&rt1[i+1]==(regs[i].regmap[hr]&63)) nr&=~(1<<hr);
9960 if(rt2[i+1]&&rt2[i+1]==(regs[i].regmap[hr]&63)) nr&=~(1<<hr);
9961 }
9962 if(us1[i+1]==(regmap_pre[i][hr]&63)) nr|=1<<hr;
9963 if(us2[i+1]==(regmap_pre[i][hr]&63)) nr|=1<<hr;
9964 if(rs1[i+1]==regmap_pre[i][hr]) nr|=1<<hr;
9965 if(rs2[i+1]==regmap_pre[i][hr]) nr|=1<<hr;
9966 if(us1[i+1]==(regs[i].regmap_entry[hr]&63)) nr|=1<<hr;
9967 if(us2[i+1]==(regs[i].regmap_entry[hr]&63)) nr|=1<<hr;
9968 if(rs1[i+1]==regs[i].regmap_entry[hr]) nr|=1<<hr;
9969 if(rs2[i+1]==regs[i].regmap_entry[hr]) nr|=1<<hr;
9970 if(dep1[i+1]&&!((unneeded_reg_upper[i]>>dep1[i+1])&1)) {
9971 if(dep1[i+1]==(regmap_pre[i][hr]&63)) nr|=1<<hr;
9972 if(dep2[i+1]==(regmap_pre[i][hr]&63)) nr|=1<<hr;
9973 }
9974 if(dep2[i+1]&&!((unneeded_reg_upper[i]>>dep2[i+1])&1)) {
9975 if(dep1[i+1]==(regs[i].regmap_entry[hr]&63)) nr|=1<<hr;
9976 if(dep2[i+1]==(regs[i].regmap_entry[hr]&63)) nr|=1<<hr;
9977 }
b9b61529 9978 if(itype[i+1]==STORE || itype[i+1]==STORELR || (opcode[i+1]&0x3b)==0x39 || (opcode[i+1]&0x3b)==0x3a) {
57871462 9979 if(regmap_pre[i][hr]==INVCP) nr|=1<<hr;
9980 if(regs[i].regmap_entry[hr]==INVCP) nr|=1<<hr;
9981 }
9982 }
9983 }
1e973cb0 9984 else if(itype[i]==SYSCALL||itype[i]==HLECALL||itype[i]==INTCALL)
57871462 9985 {
9986 // SYSCALL instruction (software interrupt)
9987 nr=0;
9988 }
9989 else if(itype[i]==COP0 && (source[i]&0x3f)==0x18)
9990 {
9991 // ERET instruction (return from interrupt)
9992 nr=0;
9993 }
9994 else // Non-branch
9995 {
9996 if(i<slen-1) {
9997 for(hr=0;hr<HOST_REGS;hr++) {
9998 if(regmap_pre[i+1][hr]>=0&&get_reg(regs[i+1].regmap_entry,regmap_pre[i+1][hr])<0) nr&=~(1<<hr);
9999 if(regs[i].regmap[hr]!=regmap_pre[i+1][hr]) nr&=~(1<<hr);
10000 if(regs[i].regmap[hr]!=regmap_pre[i][hr]) nr&=~(1<<hr);
10001 if(regs[i].regmap[hr]<0) nr&=~(1<<hr);
10002 }
10003 }
10004 }
10005 for(hr=0;hr<HOST_REGS;hr++)
10006 {
10007 // Overwritten registers are not needed
10008 if(rt1[i]&&rt1[i]==(regs[i].regmap[hr]&63)) nr&=~(1<<hr);
10009 if(rt2[i]&&rt2[i]==(regs[i].regmap[hr]&63)) nr&=~(1<<hr);
10010 if(FTEMP==(regs[i].regmap[hr]&63)) nr&=~(1<<hr);
10011 // Source registers are needed
10012 if(us1[i]==(regmap_pre[i][hr]&63)) nr|=1<<hr;
10013 if(us2[i]==(regmap_pre[i][hr]&63)) nr|=1<<hr;
10014 if(rs1[i]==regmap_pre[i][hr]) nr|=1<<hr;
10015 if(rs2[i]==regmap_pre[i][hr]) nr|=1<<hr;
10016 if(us1[i]==(regs[i].regmap_entry[hr]&63)) nr|=1<<hr;
10017 if(us2[i]==(regs[i].regmap_entry[hr]&63)) nr|=1<<hr;
10018 if(rs1[i]==regs[i].regmap_entry[hr]) nr|=1<<hr;
10019 if(rs2[i]==regs[i].regmap_entry[hr]) nr|=1<<hr;
10020 if(dep1[i]&&!((unneeded_reg_upper[i]>>dep1[i])&1)) {
10021 if(dep1[i]==(regmap_pre[i][hr]&63)) nr|=1<<hr;
10022 if(dep1[i]==(regs[i].regmap_entry[hr]&63)) nr|=1<<hr;
10023 }
10024 if(dep2[i]&&!((unneeded_reg_upper[i]>>dep2[i])&1)) {
10025 if(dep2[i]==(regmap_pre[i][hr]&63)) nr|=1<<hr;
10026 if(dep2[i]==(regs[i].regmap_entry[hr]&63)) nr|=1<<hr;
10027 }
b9b61529 10028 if(itype[i]==STORE || itype[i]==STORELR || (opcode[i]&0x3b)==0x39 || (opcode[i]&0x3b)==0x3a) {
57871462 10029 if(regmap_pre[i][hr]==INVCP) nr|=1<<hr;
10030 if(regs[i].regmap_entry[hr]==INVCP) nr|=1<<hr;
10031 }
10032 // Don't store a register immediately after writing it,
10033 // may prevent dual-issue.
10034 // But do so if this is a branch target, otherwise we
10035 // might have to load the register before the branch.
10036 if(i>0&&!bt[i]&&((regs[i].wasdirty>>hr)&1)) {
10037 if((regmap_pre[i][hr]>0&&regmap_pre[i][hr]<64&&!((unneeded_reg[i]>>regmap_pre[i][hr])&1)) ||
10038 (regmap_pre[i][hr]>64&&!((unneeded_reg_upper[i]>>(regmap_pre[i][hr]&63))&1)) ) {
10039 if(rt1[i-1]==(regmap_pre[i][hr]&63)) nr|=1<<hr;
10040 if(rt2[i-1]==(regmap_pre[i][hr]&63)) nr|=1<<hr;
10041 }
10042 if((regs[i].regmap_entry[hr]>0&&regs[i].regmap_entry[hr]<64&&!((unneeded_reg[i]>>regs[i].regmap_entry[hr])&1)) ||
10043 (regs[i].regmap_entry[hr]>64&&!((unneeded_reg_upper[i]>>(regs[i].regmap_entry[hr]&63))&1)) ) {
10044 if(rt1[i-1]==(regs[i].regmap_entry[hr]&63)) nr|=1<<hr;
10045 if(rt2[i-1]==(regs[i].regmap_entry[hr]&63)) nr|=1<<hr;
10046 }
10047 }
10048 }
10049 // Cycle count is needed at branches. Assume it is needed at the target too.
10050 if(i==0||bt[i]||itype[i]==CJUMP||itype[i]==FJUMP||itype[i]==SPAN) {
10051 if(regmap_pre[i][HOST_CCREG]==CCREG) nr|=1<<HOST_CCREG;
10052 if(regs[i].regmap_entry[HOST_CCREG]==CCREG) nr|=1<<HOST_CCREG;
10053 }
10054 // Save it
10055 needed_reg[i]=nr;
10056
10057 // Deallocate unneeded registers
10058 for(hr=0;hr<HOST_REGS;hr++)
10059 {
10060 if(!((nr>>hr)&1)) {
10061 if(regs[i].regmap_entry[hr]!=CCREG) regs[i].regmap_entry[hr]=-1;
10062 if((regs[i].regmap[hr]&63)!=rs1[i] && (regs[i].regmap[hr]&63)!=rs2[i] &&
10063 (regs[i].regmap[hr]&63)!=rt1[i] && (regs[i].regmap[hr]&63)!=rt2[i] &&
10064 (regs[i].regmap[hr]&63)!=PTEMP && (regs[i].regmap[hr]&63)!=CCREG)
10065 {
10066 if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&(source[i]>>16)!=0x1000)
10067 {
10068 if(likely[i]) {
10069 regs[i].regmap[hr]=-1;
10070 regs[i].isconst&=~(1<<hr);
79c75f1b 10071 if(i<slen-2) {
10072 regmap_pre[i+2][hr]=-1;
10073 regs[i+2].wasconst&=~(1<<hr);
10074 }
57871462 10075 }
10076 }
10077 }
10078 if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
10079 {
10080 int d1=0,d2=0,map=0,temp=0;
10081 if(get_reg(regs[i].regmap,rt1[i+1]|64)>=0||get_reg(branch_regs[i].regmap,rt1[i+1]|64)>=0)
10082 {
10083 d1=dep1[i+1];
10084 d2=dep2[i+1];
10085 }
10086 if(using_tlb) {
10087 if(itype[i+1]==LOAD || itype[i+1]==LOADLR ||
10088 itype[i+1]==STORE || itype[i+1]==STORELR ||
b9b61529 10089 itype[i+1]==C1LS || itype[i+1]==C2LS)
57871462 10090 map=TLREG;
10091 } else
b9b61529 10092 if(itype[i+1]==STORE || itype[i+1]==STORELR ||
10093 (opcode[i+1]&0x3b)==0x39 || (opcode[i+1]&0x3b)==0x3a) { // SWC1/SDC1 || SWC2/SDC2
57871462 10094 map=INVCP;
10095 }
10096 if(itype[i+1]==LOADLR || itype[i+1]==STORELR ||
b9b61529 10097 itype[i+1]==C1LS || itype[i+1]==C2LS)
57871462 10098 temp=FTEMP;
10099 if((regs[i].regmap[hr]&63)!=rs1[i] && (regs[i].regmap[hr]&63)!=rs2[i] &&
10100 (regs[i].regmap[hr]&63)!=rt1[i] && (regs[i].regmap[hr]&63)!=rt2[i] &&
10101 (regs[i].regmap[hr]&63)!=rt1[i+1] && (regs[i].regmap[hr]&63)!=rt2[i+1] &&
10102 (regs[i].regmap[hr]^64)!=us1[i+1] && (regs[i].regmap[hr]^64)!=us2[i+1] &&
10103 (regs[i].regmap[hr]^64)!=d1 && (regs[i].regmap[hr]^64)!=d2 &&
10104 regs[i].regmap[hr]!=rs1[i+1] && regs[i].regmap[hr]!=rs2[i+1] &&
10105 (regs[i].regmap[hr]&63)!=temp && regs[i].regmap[hr]!=PTEMP &&
10106 regs[i].regmap[hr]!=RHASH && regs[i].regmap[hr]!=RHTBL &&
10107 regs[i].regmap[hr]!=RTEMP && regs[i].regmap[hr]!=CCREG &&
10108 regs[i].regmap[hr]!=map )
10109 {
10110 regs[i].regmap[hr]=-1;
10111 regs[i].isconst&=~(1<<hr);
10112 if((branch_regs[i].regmap[hr]&63)!=rs1[i] && (branch_regs[i].regmap[hr]&63)!=rs2[i] &&
10113 (branch_regs[i].regmap[hr]&63)!=rt1[i] && (branch_regs[i].regmap[hr]&63)!=rt2[i] &&
10114 (branch_regs[i].regmap[hr]&63)!=rt1[i+1] && (branch_regs[i].regmap[hr]&63)!=rt2[i+1] &&
10115 (branch_regs[i].regmap[hr]^64)!=us1[i+1] && (branch_regs[i].regmap[hr]^64)!=us2[i+1] &&
10116 (branch_regs[i].regmap[hr]^64)!=d1 && (branch_regs[i].regmap[hr]^64)!=d2 &&
10117 branch_regs[i].regmap[hr]!=rs1[i+1] && branch_regs[i].regmap[hr]!=rs2[i+1] &&
10118 (branch_regs[i].regmap[hr]&63)!=temp && branch_regs[i].regmap[hr]!=PTEMP &&
10119 branch_regs[i].regmap[hr]!=RHASH && branch_regs[i].regmap[hr]!=RHTBL &&
10120 branch_regs[i].regmap[hr]!=RTEMP && branch_regs[i].regmap[hr]!=CCREG &&
10121 branch_regs[i].regmap[hr]!=map)
10122 {
10123 branch_regs[i].regmap[hr]=-1;
10124 branch_regs[i].regmap_entry[hr]=-1;
10125 if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&(source[i]>>16)!=0x1000)
10126 {
10127 if(!likely[i]&&i<slen-2) {
10128 regmap_pre[i+2][hr]=-1;
79c75f1b 10129 regs[i+2].wasconst&=~(1<<hr);
57871462 10130 }
10131 }
10132 }
10133 }
10134 }
10135 else
10136 {
10137 // Non-branch
10138 if(i>0)
10139 {
10140 int d1=0,d2=0,map=-1,temp=-1;
10141 if(get_reg(regs[i].regmap,rt1[i]|64)>=0)
10142 {
10143 d1=dep1[i];
10144 d2=dep2[i];
10145 }
10146 if(using_tlb) {
10147 if(itype[i]==LOAD || itype[i]==LOADLR ||
10148 itype[i]==STORE || itype[i]==STORELR ||
b9b61529 10149 itype[i]==C1LS || itype[i]==C2LS)
57871462 10150 map=TLREG;
b9b61529 10151 } else if(itype[i]==STORE || itype[i]==STORELR ||
10152 (opcode[i]&0x3b)==0x39 || (opcode[i]&0x3b)==0x3a) { // SWC1/SDC1 || SWC2/SDC2
57871462 10153 map=INVCP;
10154 }
10155 if(itype[i]==LOADLR || itype[i]==STORELR ||
b9b61529 10156 itype[i]==C1LS || itype[i]==C2LS)
57871462 10157 temp=FTEMP;
10158 if((regs[i].regmap[hr]&63)!=rt1[i] && (regs[i].regmap[hr]&63)!=rt2[i] &&
10159 (regs[i].regmap[hr]^64)!=us1[i] && (regs[i].regmap[hr]^64)!=us2[i] &&
10160 (regs[i].regmap[hr]^64)!=d1 && (regs[i].regmap[hr]^64)!=d2 &&
10161 regs[i].regmap[hr]!=rs1[i] && regs[i].regmap[hr]!=rs2[i] &&
10162 (regs[i].regmap[hr]&63)!=temp && regs[i].regmap[hr]!=map &&
10163 (itype[i]!=SPAN||regs[i].regmap[hr]!=CCREG))
10164 {
10165 if(i<slen-1&&!is_ds[i]) {
10166 if(regmap_pre[i+1][hr]!=-1 || regs[i].regmap[hr]!=-1)
10167 if(regmap_pre[i+1][hr]!=regs[i].regmap[hr])
10168 if(regs[i].regmap[hr]<64||!((regs[i].was32>>(regs[i].regmap[hr]&63))&1))
10169 {
c43b5311 10170 SysPrintf("fail: %x (%d %d!=%d)\n",start+i*4,hr,regmap_pre[i+1][hr],regs[i].regmap[hr]);
57871462 10171 assert(regmap_pre[i+1][hr]==regs[i].regmap[hr]);
10172 }
10173 regmap_pre[i+1][hr]=-1;
10174 if(regs[i+1].regmap_entry[hr]==CCREG) regs[i+1].regmap_entry[hr]=-1;
79c75f1b 10175 regs[i+1].wasconst&=~(1<<hr);
57871462 10176 }
10177 regs[i].regmap[hr]=-1;
10178 regs[i].isconst&=~(1<<hr);
10179 }
10180 }
10181 }
10182 }
10183 }
10184 }
10185
10186 /* Pass 5 - Pre-allocate registers */
10187
10188 // If a register is allocated during a loop, try to allocate it for the
10189 // entire loop, if possible. This avoids loading/storing registers
10190 // inside of the loop.
198df76f 10191
57871462 10192 signed char f_regmap[HOST_REGS];
10193 clear_all_regs(f_regmap);
10194 for(i=0;i<slen-1;i++)
10195 {
10196 if(itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
10197 {
10198 if(ba[i]>=start && ba[i]<(start+i*4))
10199 if(itype[i+1]==NOP||itype[i+1]==MOV||itype[i+1]==ALU
10200 ||itype[i+1]==SHIFTIMM||itype[i+1]==IMM16||itype[i+1]==LOAD
10201 ||itype[i+1]==STORE||itype[i+1]==STORELR||itype[i+1]==C1LS
10202 ||itype[i+1]==SHIFT||itype[i+1]==COP1||itype[i+1]==FLOAT
b9b61529 10203 ||itype[i+1]==FCOMP||itype[i+1]==FCONV
10204 ||itype[i+1]==COP2||itype[i+1]==C2LS||itype[i+1]==C2OP)
57871462 10205 {
10206 int t=(ba[i]-start)>>2;
10207 if(t>0&&(itype[t-1]!=UJUMP&&itype[t-1]!=RJUMP&&itype[t-1]!=CJUMP&&itype[t-1]!=SJUMP&&itype[t-1]!=FJUMP)) // loop_preload can't handle jumps into delay slots
198df76f 10208 if(t<2||(itype[t-2]!=UJUMP&&itype[t-2]!=RJUMP)||rt1[t-2]!=31) // call/ret assumes no registers allocated
57871462 10209 for(hr=0;hr<HOST_REGS;hr++)
10210 {
10211 if(regs[i].regmap[hr]>64) {
10212 if(!((regs[i].dirty>>hr)&1))
10213 f_regmap[hr]=regs[i].regmap[hr];
10214 else f_regmap[hr]=-1;
10215 }
b372a952 10216 else if(regs[i].regmap[hr]>=0) {
10217 if(f_regmap[hr]!=regs[i].regmap[hr]) {
10218 // dealloc old register
10219 int n;
10220 for(n=0;n<HOST_REGS;n++)
10221 {
10222 if(f_regmap[n]==regs[i].regmap[hr]) {f_regmap[n]=-1;}
10223 }
10224 // and alloc new one
10225 f_regmap[hr]=regs[i].regmap[hr];
10226 }
10227 }
57871462 10228 if(branch_regs[i].regmap[hr]>64) {
10229 if(!((branch_regs[i].dirty>>hr)&1))
10230 f_regmap[hr]=branch_regs[i].regmap[hr];
10231 else f_regmap[hr]=-1;
10232 }
b372a952 10233 else if(branch_regs[i].regmap[hr]>=0) {
10234 if(f_regmap[hr]!=branch_regs[i].regmap[hr]) {
10235 // dealloc old register
10236 int n;
10237 for(n=0;n<HOST_REGS;n++)
10238 {
10239 if(f_regmap[n]==branch_regs[i].regmap[hr]) {f_regmap[n]=-1;}
10240 }
10241 // and alloc new one
10242 f_regmap[hr]=branch_regs[i].regmap[hr];
10243 }
10244 }
e1190b87 10245 if(ooo[i]) {
10246 if(count_free_regs(regs[i].regmap)<=minimum_free_regs[i+1])
10247 f_regmap[hr]=branch_regs[i].regmap[hr];
10248 }else{
10249 if(count_free_regs(branch_regs[i].regmap)<=minimum_free_regs[i+1])
57871462 10250 f_regmap[hr]=branch_regs[i].regmap[hr];
10251 }
10252 // Avoid dirty->clean transition
e1190b87 10253 #ifdef DESTRUCTIVE_WRITEBACK
57871462 10254 if(t>0) if(get_reg(regmap_pre[t],f_regmap[hr])>=0) if((regs[t].wasdirty>>get_reg(regmap_pre[t],f_regmap[hr]))&1) f_regmap[hr]=-1;
e1190b87 10255 #endif
10256 // This check is only strictly required in the DESTRUCTIVE_WRITEBACK
10257 // case above, however it's always a good idea. We can't hoist the
10258 // load if the register was already allocated, so there's no point
10259 // wasting time analyzing most of these cases. It only "succeeds"
10260 // when the mapping was different and the load can be replaced with
10261 // a mov, which is of negligible benefit. So such cases are
10262 // skipped below.
57871462 10263 if(f_regmap[hr]>0) {
198df76f 10264 if(regs[t].regmap[hr]==f_regmap[hr]||(regs[t].regmap_entry[hr]<0&&get_reg(regmap_pre[t],f_regmap[hr])<0)) {
57871462 10265 int r=f_regmap[hr];
10266 for(j=t;j<=i;j++)
10267 {
10268 //printf("Test %x -> %x, %x %d/%d\n",start+i*4,ba[i],start+j*4,hr,r);
10269 if(r<34&&((unneeded_reg[j]>>r)&1)) break;
10270 if(r>63&&((unneeded_reg_upper[j]>>(r&63))&1)) break;
10271 if(r>63) {
10272 // NB This can exclude the case where the upper-half
10273 // register is lower numbered than the lower-half
10274 // register. Not sure if it's worth fixing...
10275 if(get_reg(regs[j].regmap,r&63)<0) break;
e1190b87 10276 if(get_reg(regs[j].regmap_entry,r&63)<0) break;
57871462 10277 if(regs[j].is32&(1LL<<(r&63))) break;
10278 }
10279 if(regs[j].regmap[hr]==f_regmap[hr]&&(f_regmap[hr]&63)<TEMPREG) {
10280 //printf("Hit %x -> %x, %x %d/%d\n",start+i*4,ba[i],start+j*4,hr,r);
10281 int k;
10282 if(regs[i].regmap[hr]==-1&&branch_regs[i].regmap[hr]==-1) {
10283 if(get_reg(regs[i+2].regmap,f_regmap[hr])>=0) break;
10284 if(r>63) {
10285 if(get_reg(regs[i].regmap,r&63)<0) break;
10286 if(get_reg(branch_regs[i].regmap,r&63)<0) break;
10287 }
10288 k=i;
10289 while(k>1&&regs[k-1].regmap[hr]==-1) {
e1190b87 10290 if(count_free_regs(regs[k-1].regmap)<=minimum_free_regs[k-1]) {
10291 //printf("no free regs for store %x\n",start+(k-1)*4);
10292 break;
57871462 10293 }
57871462 10294 if(get_reg(regs[k-1].regmap,f_regmap[hr])>=0) {
10295 //printf("no-match due to different register\n");
10296 break;
10297 }
10298 if(itype[k-2]==UJUMP||itype[k-2]==RJUMP||itype[k-2]==CJUMP||itype[k-2]==SJUMP||itype[k-2]==FJUMP) {
10299 //printf("no-match due to branch\n");
10300 break;
10301 }
10302 // call/ret fast path assumes no registers allocated
198df76f 10303 if(k>2&&(itype[k-3]==UJUMP||itype[k-3]==RJUMP)&&rt1[k-3]==31) {
57871462 10304 break;
10305 }
10306 if(r>63) {
10307 // NB This can exclude the case where the upper-half
10308 // register is lower numbered than the lower-half
10309 // register. Not sure if it's worth fixing...
10310 if(get_reg(regs[k-1].regmap,r&63)<0) break;
10311 if(regs[k-1].is32&(1LL<<(r&63))) break;
10312 }
10313 k--;
10314 }
10315 if(i<slen-1) {
10316 if((regs[k].is32&(1LL<<f_regmap[hr]))!=
10317 (regs[i+2].was32&(1LL<<f_regmap[hr]))) {
10318 //printf("bad match after branch\n");
10319 break;
10320 }
10321 }
10322 if(regs[k-1].regmap[hr]==f_regmap[hr]&&regmap_pre[k][hr]==f_regmap[hr]) {
10323 //printf("Extend r%d, %x ->\n",hr,start+k*4);
10324 while(k<i) {
10325 regs[k].regmap_entry[hr]=f_regmap[hr];
10326 regs[k].regmap[hr]=f_regmap[hr];
10327 regmap_pre[k+1][hr]=f_regmap[hr];
10328 regs[k].wasdirty&=~(1<<hr);
10329 regs[k].dirty&=~(1<<hr);
10330 regs[k].wasdirty|=(1<<hr)&regs[k-1].dirty;
10331 regs[k].dirty|=(1<<hr)&regs[k].wasdirty;
10332 regs[k].wasconst&=~(1<<hr);
10333 regs[k].isconst&=~(1<<hr);
10334 k++;
10335 }
10336 }
10337 else {
10338 //printf("Fail Extend r%d, %x ->\n",hr,start+k*4);
10339 break;
10340 }
10341 assert(regs[i-1].regmap[hr]==f_regmap[hr]);
10342 if(regs[i-1].regmap[hr]==f_regmap[hr]&&regmap_pre[i][hr]==f_regmap[hr]) {
10343 //printf("OK fill %x (r%d)\n",start+i*4,hr);
10344 regs[i].regmap_entry[hr]=f_regmap[hr];
10345 regs[i].regmap[hr]=f_regmap[hr];
10346 regs[i].wasdirty&=~(1<<hr);
10347 regs[i].dirty&=~(1<<hr);
10348 regs[i].wasdirty|=(1<<hr)&regs[i-1].dirty;
10349 regs[i].dirty|=(1<<hr)&regs[i-1].dirty;
10350 regs[i].wasconst&=~(1<<hr);
10351 regs[i].isconst&=~(1<<hr);
10352 branch_regs[i].regmap_entry[hr]=f_regmap[hr];
10353 branch_regs[i].wasdirty&=~(1<<hr);
10354 branch_regs[i].wasdirty|=(1<<hr)&regs[i].dirty;
10355 branch_regs[i].regmap[hr]=f_regmap[hr];
10356 branch_regs[i].dirty&=~(1<<hr);
10357 branch_regs[i].dirty|=(1<<hr)&regs[i].dirty;
10358 branch_regs[i].wasconst&=~(1<<hr);
10359 branch_regs[i].isconst&=~(1<<hr);
10360 if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&(source[i]>>16)!=0x1000) {
10361 regmap_pre[i+2][hr]=f_regmap[hr];
10362 regs[i+2].wasdirty&=~(1<<hr);
10363 regs[i+2].wasdirty|=(1<<hr)&regs[i].dirty;
10364 assert((branch_regs[i].is32&(1LL<<f_regmap[hr]))==
10365 (regs[i+2].was32&(1LL<<f_regmap[hr])));
10366 }
10367 }
10368 }
10369 for(k=t;k<j;k++) {
e1190b87 10370 // Alloc register clean at beginning of loop,
10371 // but may dirty it in pass 6
57871462 10372 regs[k].regmap_entry[hr]=f_regmap[hr];
10373 regs[k].regmap[hr]=f_regmap[hr];
57871462 10374 regs[k].dirty&=~(1<<hr);
10375 regs[k].wasconst&=~(1<<hr);
10376 regs[k].isconst&=~(1<<hr);
e1190b87 10377 if(itype[k]==UJUMP||itype[k]==RJUMP||itype[k]==CJUMP||itype[k]==SJUMP||itype[k]==FJUMP) {
10378 branch_regs[k].regmap_entry[hr]=f_regmap[hr];
10379 branch_regs[k].regmap[hr]=f_regmap[hr];
10380 branch_regs[k].dirty&=~(1<<hr);
10381 branch_regs[k].wasconst&=~(1<<hr);
10382 branch_regs[k].isconst&=~(1<<hr);
10383 if(itype[k]!=RJUMP&&itype[k]!=UJUMP&&(source[k]>>16)!=0x1000) {
10384 regmap_pre[k+2][hr]=f_regmap[hr];
10385 regs[k+2].wasdirty&=~(1<<hr);
10386 assert((branch_regs[k].is32&(1LL<<f_regmap[hr]))==
10387 (regs[k+2].was32&(1LL<<f_regmap[hr])));
10388 }
10389 }
10390 else
10391 {
10392 regmap_pre[k+1][hr]=f_regmap[hr];
10393 regs[k+1].wasdirty&=~(1<<hr);
10394 }
57871462 10395 }
10396 if(regs[j].regmap[hr]==f_regmap[hr])
10397 regs[j].regmap_entry[hr]=f_regmap[hr];
10398 break;
10399 }
10400 if(j==i) break;
10401 if(regs[j].regmap[hr]>=0)
10402 break;
10403 if(get_reg(regs[j].regmap,f_regmap[hr])>=0) {
10404 //printf("no-match due to different register\n");
10405 break;
10406 }
10407 if((regs[j+1].is32&(1LL<<f_regmap[hr]))!=(regs[j].is32&(1LL<<f_regmap[hr]))) {
10408 //printf("32/64 mismatch %x %d\n",start+j*4,hr);
10409 break;
10410 }
e1190b87 10411 if(itype[j]==UJUMP||itype[j]==RJUMP||(source[j]>>16)==0x1000)
10412 {
10413 // Stop on unconditional branch
10414 break;
10415 }
10416 if(itype[j]==CJUMP||itype[j]==SJUMP||itype[j]==FJUMP)
10417 {
10418 if(ooo[j]) {
10419 if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j+1])
10420 break;
10421 }else{
10422 if(count_free_regs(branch_regs[j].regmap)<=minimum_free_regs[j+1])
10423 break;
10424 }
10425 if(get_reg(branch_regs[j].regmap,f_regmap[hr])>=0) {
10426 //printf("no-match due to different register (branch)\n");
57871462 10427 break;
10428 }
10429 }
e1190b87 10430 if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j]) {
10431 //printf("No free regs for store %x\n",start+j*4);
10432 break;
10433 }
57871462 10434 if(f_regmap[hr]>=64) {
10435 if(regs[j].is32&(1LL<<(f_regmap[hr]&63))) {
10436 break;
10437 }
10438 else
10439 {
10440 if(get_reg(regs[j].regmap,f_regmap[hr]&63)<0) {
10441 break;
10442 }
10443 }
10444 }
10445 }
10446 }
10447 }
10448 }
10449 }
10450 }else{
198df76f 10451 // Non branch or undetermined branch target
57871462 10452 for(hr=0;hr<HOST_REGS;hr++)
10453 {
10454 if(hr!=EXCLUDE_REG) {
10455 if(regs[i].regmap[hr]>64) {
10456 if(!((regs[i].dirty>>hr)&1))
10457 f_regmap[hr]=regs[i].regmap[hr];
10458 }
b372a952 10459 else if(regs[i].regmap[hr]>=0) {
10460 if(f_regmap[hr]!=regs[i].regmap[hr]) {
10461 // dealloc old register
10462 int n;
10463 for(n=0;n<HOST_REGS;n++)
10464 {
10465 if(f_regmap[n]==regs[i].regmap[hr]) {f_regmap[n]=-1;}
10466 }
10467 // and alloc new one
10468 f_regmap[hr]=regs[i].regmap[hr];
10469 }
10470 }
57871462 10471 }
10472 }
10473 // Try to restore cycle count at branch targets
10474 if(bt[i]) {
10475 for(j=i;j<slen-1;j++) {
10476 if(regs[j].regmap[HOST_CCREG]!=-1) break;
e1190b87 10477 if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j]) {
10478 //printf("no free regs for store %x\n",start+j*4);
10479 break;
57871462 10480 }
57871462 10481 }
10482 if(regs[j].regmap[HOST_CCREG]==CCREG) {
10483 int k=i;
10484 //printf("Extend CC, %x -> %x\n",start+k*4,start+j*4);
10485 while(k<j) {
10486 regs[k].regmap_entry[HOST_CCREG]=CCREG;
10487 regs[k].regmap[HOST_CCREG]=CCREG;
10488 regmap_pre[k+1][HOST_CCREG]=CCREG;
10489 regs[k+1].wasdirty|=1<<HOST_CCREG;
10490 regs[k].dirty|=1<<HOST_CCREG;
10491 regs[k].wasconst&=~(1<<HOST_CCREG);
10492 regs[k].isconst&=~(1<<HOST_CCREG);
10493 k++;
10494 }
10495 regs[j].regmap_entry[HOST_CCREG]=CCREG;
10496 }
10497 // Work backwards from the branch target
10498 if(j>i&&f_regmap[HOST_CCREG]==CCREG)
10499 {
10500 //printf("Extend backwards\n");
10501 int k;
10502 k=i;
10503 while(regs[k-1].regmap[HOST_CCREG]==-1) {
e1190b87 10504 if(count_free_regs(regs[k-1].regmap)<=minimum_free_regs[k-1]) {
10505 //printf("no free regs for store %x\n",start+(k-1)*4);
10506 break;
57871462 10507 }
57871462 10508 k--;
10509 }
10510 if(regs[k-1].regmap[HOST_CCREG]==CCREG) {
10511 //printf("Extend CC, %x ->\n",start+k*4);
10512 while(k<=i) {
10513 regs[k].regmap_entry[HOST_CCREG]=CCREG;
10514 regs[k].regmap[HOST_CCREG]=CCREG;
10515 regmap_pre[k+1][HOST_CCREG]=CCREG;
10516 regs[k+1].wasdirty|=1<<HOST_CCREG;
10517 regs[k].dirty|=1<<HOST_CCREG;
10518 regs[k].wasconst&=~(1<<HOST_CCREG);
10519 regs[k].isconst&=~(1<<HOST_CCREG);
10520 k++;
10521 }
10522 }
10523 else {
10524 //printf("Fail Extend CC, %x ->\n",start+k*4);
10525 }
10526 }
10527 }
10528 if(itype[i]!=STORE&&itype[i]!=STORELR&&itype[i]!=C1LS&&itype[i]!=SHIFT&&
10529 itype[i]!=NOP&&itype[i]!=MOV&&itype[i]!=ALU&&itype[i]!=SHIFTIMM&&
10530 itype[i]!=IMM16&&itype[i]!=LOAD&&itype[i]!=COP1&&itype[i]!=FLOAT&&
e1190b87 10531 itype[i]!=FCONV&&itype[i]!=FCOMP)
57871462 10532 {
10533 memcpy(f_regmap,regs[i].regmap,sizeof(f_regmap));
10534 }
10535 }
10536 }
10537
d61de97e 10538 // Cache memory offset or tlb map pointer if a register is available
10539 #ifndef HOST_IMM_ADDR32
10540 #ifndef RAM_OFFSET
10541 if(using_tlb)
10542 #endif
10543 {
10544 int earliest_available[HOST_REGS];
10545 int loop_start[HOST_REGS];
10546 int score[HOST_REGS];
10547 int end[HOST_REGS];
10548 int reg=using_tlb?MMREG:ROREG;
10549
10550 // Init
10551 for(hr=0;hr<HOST_REGS;hr++) {
10552 score[hr]=0;earliest_available[hr]=0;
10553 loop_start[hr]=MAXBLOCK;
10554 }
10555 for(i=0;i<slen-1;i++)
10556 {
10557 // Can't do anything if no registers are available
10558 if(count_free_regs(regs[i].regmap)<=minimum_free_regs[i]) {
10559 for(hr=0;hr<HOST_REGS;hr++) {
10560 score[hr]=0;earliest_available[hr]=i+1;
10561 loop_start[hr]=MAXBLOCK;
10562 }
10563 }
10564 if(itype[i]==UJUMP||itype[i]==RJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) {
10565 if(!ooo[i]) {
10566 if(count_free_regs(branch_regs[i].regmap)<=minimum_free_regs[i+1]) {
10567 for(hr=0;hr<HOST_REGS;hr++) {
10568 score[hr]=0;earliest_available[hr]=i+1;
10569 loop_start[hr]=MAXBLOCK;
10570 }
10571 }
198df76f 10572 }else{
10573 if(count_free_regs(regs[i].regmap)<=minimum_free_regs[i+1]) {
10574 for(hr=0;hr<HOST_REGS;hr++) {
10575 score[hr]=0;earliest_available[hr]=i+1;
10576 loop_start[hr]=MAXBLOCK;
10577 }
10578 }
d61de97e 10579 }
10580 }
10581 // Mark unavailable registers
10582 for(hr=0;hr<HOST_REGS;hr++) {
10583 if(regs[i].regmap[hr]>=0) {
10584 score[hr]=0;earliest_available[hr]=i+1;
10585 loop_start[hr]=MAXBLOCK;
10586 }
10587 if(itype[i]==UJUMP||itype[i]==RJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) {
10588 if(branch_regs[i].regmap[hr]>=0) {
10589 score[hr]=0;earliest_available[hr]=i+2;
10590 loop_start[hr]=MAXBLOCK;
10591 }
10592 }
10593 }
10594 // No register allocations after unconditional jumps
10595 if(itype[i]==UJUMP||itype[i]==RJUMP||(source[i]>>16)==0x1000)
10596 {
10597 for(hr=0;hr<HOST_REGS;hr++) {
10598 score[hr]=0;earliest_available[hr]=i+2;
10599 loop_start[hr]=MAXBLOCK;
10600 }
10601 i++; // Skip delay slot too
10602 //printf("skip delay slot: %x\n",start+i*4);
10603 }
10604 else
10605 // Possible match
10606 if(itype[i]==LOAD||itype[i]==LOADLR||
10607 itype[i]==STORE||itype[i]==STORELR||itype[i]==C1LS) {
10608 for(hr=0;hr<HOST_REGS;hr++) {
10609 if(hr!=EXCLUDE_REG) {
10610 end[hr]=i-1;
10611 for(j=i;j<slen-1;j++) {
10612 if(regs[j].regmap[hr]>=0) break;
10613 if(itype[j]==UJUMP||itype[j]==RJUMP||itype[j]==CJUMP||itype[j]==SJUMP||itype[j]==FJUMP) {
10614 if(branch_regs[j].regmap[hr]>=0) break;
10615 if(ooo[j]) {
10616 if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j+1]) break;
10617 }else{
10618 if(count_free_regs(branch_regs[j].regmap)<=minimum_free_regs[j+1]) break;
10619 }
10620 }
10621 else if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j]) break;
10622 if(itype[j]==UJUMP||itype[j]==RJUMP||itype[j]==CJUMP||itype[j]==SJUMP||itype[j]==FJUMP) {
10623 int t=(ba[j]-start)>>2;
10624 if(t<j&&t>=earliest_available[hr]) {
198df76f 10625 if(t==1||(t>1&&itype[t-2]!=UJUMP&&itype[t-2]!=RJUMP)||(t>1&&rt1[t-2]!=31)) { // call/ret assumes no registers allocated
10626 // Score a point for hoisting loop invariant
10627 if(t<loop_start[hr]) loop_start[hr]=t;
10628 //printf("set loop_start: i=%x j=%x (%x)\n",start+i*4,start+j*4,start+t*4);
10629 score[hr]++;
10630 end[hr]=j;
10631 }
d61de97e 10632 }
10633 else if(t<j) {
10634 if(regs[t].regmap[hr]==reg) {
10635 // Score a point if the branch target matches this register
10636 score[hr]++;
10637 end[hr]=j;
10638 }
10639 }
10640 if(itype[j+1]==LOAD||itype[j+1]==LOADLR||
10641 itype[j+1]==STORE||itype[j+1]==STORELR||itype[j+1]==C1LS) {
10642 score[hr]++;
10643 end[hr]=j;
10644 }
10645 }
10646 if(itype[j]==UJUMP||itype[j]==RJUMP||(source[j]>>16)==0x1000)
10647 {
10648 // Stop on unconditional branch
10649 break;
10650 }
10651 else
10652 if(itype[j]==LOAD||itype[j]==LOADLR||
10653 itype[j]==STORE||itype[j]==STORELR||itype[j]==C1LS) {
10654 score[hr]++;
10655 end[hr]=j;
10656 }
10657 }
10658 }
10659 }
10660 // Find highest score and allocate that register
10661 int maxscore=0;
10662 for(hr=0;hr<HOST_REGS;hr++) {
10663 if(hr!=EXCLUDE_REG) {
10664 if(score[hr]>score[maxscore]) {
10665 maxscore=hr;
10666 //printf("highest score: %d %d (%x->%x)\n",score[hr],hr,start+i*4,start+end[hr]*4);
10667 }
10668 }
10669 }
10670 if(score[maxscore]>1)
10671 {
10672 if(i<loop_start[maxscore]) loop_start[maxscore]=i;
10673 for(j=loop_start[maxscore];j<slen&&j<=end[maxscore];j++) {
10674 //if(regs[j].regmap[maxscore]>=0) {printf("oops: %x %x was %d=%d\n",loop_start[maxscore]*4+start,j*4+start,maxscore,regs[j].regmap[maxscore]);}
10675 assert(regs[j].regmap[maxscore]<0);
10676 if(j>loop_start[maxscore]) regs[j].regmap_entry[maxscore]=reg;
10677 regs[j].regmap[maxscore]=reg;
10678 regs[j].dirty&=~(1<<maxscore);
10679 regs[j].wasconst&=~(1<<maxscore);
10680 regs[j].isconst&=~(1<<maxscore);
10681 if(itype[j]==UJUMP||itype[j]==RJUMP||itype[j]==CJUMP||itype[j]==SJUMP||itype[j]==FJUMP) {
10682 branch_regs[j].regmap[maxscore]=reg;
10683 branch_regs[j].wasdirty&=~(1<<maxscore);
10684 branch_regs[j].dirty&=~(1<<maxscore);
10685 branch_regs[j].wasconst&=~(1<<maxscore);
10686 branch_regs[j].isconst&=~(1<<maxscore);
10687 if(itype[j]!=RJUMP&&itype[j]!=UJUMP&&(source[j]>>16)!=0x1000) {
10688 regmap_pre[j+2][maxscore]=reg;
10689 regs[j+2].wasdirty&=~(1<<maxscore);
10690 }
10691 // loop optimization (loop_preload)
10692 int t=(ba[j]-start)>>2;
198df76f 10693 if(t==loop_start[maxscore]) {
10694 if(t==1||(t>1&&itype[t-2]!=UJUMP&&itype[t-2]!=RJUMP)||(t>1&&rt1[t-2]!=31)) // call/ret assumes no registers allocated
10695 regs[t].regmap_entry[maxscore]=reg;
10696 }
d61de97e 10697 }
10698 else
10699 {
10700 if(j<1||(itype[j-1]!=RJUMP&&itype[j-1]!=UJUMP&&itype[j-1]!=CJUMP&&itype[j-1]!=SJUMP&&itype[j-1]!=FJUMP)) {
10701 regmap_pre[j+1][maxscore]=reg;
10702 regs[j+1].wasdirty&=~(1<<maxscore);
10703 }
10704 }
10705 }
10706 i=j-1;
10707 if(itype[j-1]==RJUMP||itype[j-1]==UJUMP||itype[j-1]==CJUMP||itype[j-1]==SJUMP||itype[j-1]==FJUMP) i++; // skip delay slot
10708 for(hr=0;hr<HOST_REGS;hr++) {
10709 score[hr]=0;earliest_available[hr]=i+i;
10710 loop_start[hr]=MAXBLOCK;
10711 }
10712 }
10713 }
10714 }
10715 }
10716 #endif
10717
57871462 10718 // This allocates registers (if possible) one instruction prior
10719 // to use, which can avoid a load-use penalty on certain CPUs.
10720 for(i=0;i<slen-1;i++)
10721 {
10722 if(!i||(itype[i-1]!=UJUMP&&itype[i-1]!=CJUMP&&itype[i-1]!=SJUMP&&itype[i-1]!=RJUMP&&itype[i-1]!=FJUMP))
10723 {
10724 if(!bt[i+1])
10725 {
b9b61529 10726 if(itype[i]==ALU||itype[i]==MOV||itype[i]==LOAD||itype[i]==SHIFTIMM||itype[i]==IMM16
10727 ||((itype[i]==COP1||itype[i]==COP2)&&opcode2[i]<3))
57871462 10728 {
10729 if(rs1[i+1]) {
10730 if((hr=get_reg(regs[i+1].regmap,rs1[i+1]))>=0)
10731 {
10732 if(regs[i].regmap[hr]<0&&regs[i+1].regmap_entry[hr]<0)
10733 {
10734 regs[i].regmap[hr]=regs[i+1].regmap[hr];
10735 regmap_pre[i+1][hr]=regs[i+1].regmap[hr];
10736 regs[i+1].regmap_entry[hr]=regs[i+1].regmap[hr];
10737 regs[i].isconst&=~(1<<hr);
10738 regs[i].isconst|=regs[i+1].isconst&(1<<hr);
10739 constmap[i][hr]=constmap[i+1][hr];
10740 regs[i+1].wasdirty&=~(1<<hr);
10741 regs[i].dirty&=~(1<<hr);
10742 }
10743 }
10744 }
10745 if(rs2[i+1]) {
10746 if((hr=get_reg(regs[i+1].regmap,rs2[i+1]))>=0)
10747 {
10748 if(regs[i].regmap[hr]<0&&regs[i+1].regmap_entry[hr]<0)
10749 {
10750 regs[i].regmap[hr]=regs[i+1].regmap[hr];
10751 regmap_pre[i+1][hr]=regs[i+1].regmap[hr];
10752 regs[i+1].regmap_entry[hr]=regs[i+1].regmap[hr];
10753 regs[i].isconst&=~(1<<hr);
10754 regs[i].isconst|=regs[i+1].isconst&(1<<hr);
10755 constmap[i][hr]=constmap[i+1][hr];
10756 regs[i+1].wasdirty&=~(1<<hr);
10757 regs[i].dirty&=~(1<<hr);
10758 }
10759 }
10760 }
198df76f 10761 // Preload target address for load instruction (non-constant)
57871462 10762 if(itype[i+1]==LOAD&&rs1[i+1]&&get_reg(regs[i+1].regmap,rs1[i+1])<0) {
10763 if((hr=get_reg(regs[i+1].regmap,rt1[i+1]))>=0)
10764 {
10765 if(regs[i].regmap[hr]<0&&regs[i+1].regmap_entry[hr]<0)
10766 {
10767 regs[i].regmap[hr]=rs1[i+1];
10768 regmap_pre[i+1][hr]=rs1[i+1];
10769 regs[i+1].regmap_entry[hr]=rs1[i+1];
10770 regs[i].isconst&=~(1<<hr);
10771 regs[i].isconst|=regs[i+1].isconst&(1<<hr);
10772 constmap[i][hr]=constmap[i+1][hr];
10773 regs[i+1].wasdirty&=~(1<<hr);
10774 regs[i].dirty&=~(1<<hr);
10775 }
10776 }
10777 }
198df76f 10778 // Load source into target register
57871462 10779 if(lt1[i+1]&&get_reg(regs[i+1].regmap,rs1[i+1])<0) {
10780 if((hr=get_reg(regs[i+1].regmap,rt1[i+1]))>=0)
10781 {
10782 if(regs[i].regmap[hr]<0&&regs[i+1].regmap_entry[hr]<0)
10783 {
10784 regs[i].regmap[hr]=rs1[i+1];
10785 regmap_pre[i+1][hr]=rs1[i+1];
10786 regs[i+1].regmap_entry[hr]=rs1[i+1];
10787 regs[i].isconst&=~(1<<hr);
10788 regs[i].isconst|=regs[i+1].isconst&(1<<hr);
10789 constmap[i][hr]=constmap[i+1][hr];
10790 regs[i+1].wasdirty&=~(1<<hr);
10791 regs[i].dirty&=~(1<<hr);
10792 }
10793 }
10794 }
198df76f 10795 // Preload map address
57871462 10796 #ifndef HOST_IMM_ADDR32
b9b61529 10797 if(itype[i+1]==LOAD||itype[i+1]==LOADLR||itype[i+1]==STORE||itype[i+1]==STORELR||itype[i+1]==C1LS||itype[i+1]==C2LS) {
57871462 10798 hr=get_reg(regs[i+1].regmap,TLREG);
10799 if(hr>=0) {
10800 int sr=get_reg(regs[i+1].regmap,rs1[i+1]);
10801 if(sr>=0&&((regs[i+1].wasconst>>sr)&1)) {
10802 int nr;
10803 if(regs[i].regmap[hr]<0&&regs[i+1].regmap_entry[hr]<0)
10804 {
10805 regs[i].regmap[hr]=MGEN1+((i+1)&1);
10806 regmap_pre[i+1][hr]=MGEN1+((i+1)&1);
10807 regs[i+1].regmap_entry[hr]=MGEN1+((i+1)&1);
10808 regs[i].isconst&=~(1<<hr);
10809 regs[i].isconst|=regs[i+1].isconst&(1<<hr);
10810 constmap[i][hr]=constmap[i+1][hr];
10811 regs[i+1].wasdirty&=~(1<<hr);
10812 regs[i].dirty&=~(1<<hr);
10813 }
10814 else if((nr=get_reg2(regs[i].regmap,regs[i+1].regmap,-1))>=0)
10815 {
10816 // move it to another register
10817 regs[i+1].regmap[hr]=-1;
10818 regmap_pre[i+2][hr]=-1;
10819 regs[i+1].regmap[nr]=TLREG;
10820 regmap_pre[i+2][nr]=TLREG;
10821 regs[i].regmap[nr]=MGEN1+((i+1)&1);
10822 regmap_pre[i+1][nr]=MGEN1+((i+1)&1);
10823 regs[i+1].regmap_entry[nr]=MGEN1+((i+1)&1);
10824 regs[i].isconst&=~(1<<nr);
10825 regs[i+1].isconst&=~(1<<nr);
10826 regs[i].dirty&=~(1<<nr);
10827 regs[i+1].wasdirty&=~(1<<nr);
10828 regs[i+1].dirty&=~(1<<nr);
10829 regs[i+2].wasdirty&=~(1<<nr);
10830 }
10831 }
10832 }
10833 }
10834 #endif
198df76f 10835 // Address for store instruction (non-constant)
b9b61529 10836 if(itype[i+1]==STORE||itype[i+1]==STORELR
10837 ||(opcode[i+1]&0x3b)==0x39||(opcode[i+1]&0x3b)==0x3a) { // SB/SH/SW/SD/SWC1/SDC1/SWC2/SDC2
57871462 10838 if(get_reg(regs[i+1].regmap,rs1[i+1])<0) {
10839 hr=get_reg2(regs[i].regmap,regs[i+1].regmap,-1);
10840 if(hr<0) hr=get_reg(regs[i+1].regmap,-1);
10841 else {regs[i+1].regmap[hr]=AGEN1+((i+1)&1);regs[i+1].isconst&=~(1<<hr);}
10842 assert(hr>=0);
10843 if(regs[i].regmap[hr]<0&&regs[i+1].regmap_entry[hr]<0)
10844 {
10845 regs[i].regmap[hr]=rs1[i+1];
10846 regmap_pre[i+1][hr]=rs1[i+1];
10847 regs[i+1].regmap_entry[hr]=rs1[i+1];
10848 regs[i].isconst&=~(1<<hr);
10849 regs[i].isconst|=regs[i+1].isconst&(1<<hr);
10850 constmap[i][hr]=constmap[i+1][hr];
10851 regs[i+1].wasdirty&=~(1<<hr);
10852 regs[i].dirty&=~(1<<hr);
10853 }
10854 }
10855 }
b9b61529 10856 if(itype[i+1]==LOADLR||(opcode[i+1]&0x3b)==0x31||(opcode[i+1]&0x3b)==0x32) { // LWC1/LDC1, LWC2/LDC2
57871462 10857 if(get_reg(regs[i+1].regmap,rs1[i+1])<0) {
10858 int nr;
10859 hr=get_reg(regs[i+1].regmap,FTEMP);
10860 assert(hr>=0);
10861 if(regs[i].regmap[hr]<0&&regs[i+1].regmap_entry[hr]<0)
10862 {
10863 regs[i].regmap[hr]=rs1[i+1];
10864 regmap_pre[i+1][hr]=rs1[i+1];
10865 regs[i+1].regmap_entry[hr]=rs1[i+1];
10866 regs[i].isconst&=~(1<<hr);
10867 regs[i].isconst|=regs[i+1].isconst&(1<<hr);
10868 constmap[i][hr]=constmap[i+1][hr];
10869 regs[i+1].wasdirty&=~(1<<hr);
10870 regs[i].dirty&=~(1<<hr);
10871 }
10872 else if((nr=get_reg2(regs[i].regmap,regs[i+1].regmap,-1))>=0)
10873 {
10874 // move it to another register
10875 regs[i+1].regmap[hr]=-1;
10876 regmap_pre[i+2][hr]=-1;
10877 regs[i+1].regmap[nr]=FTEMP;
10878 regmap_pre[i+2][nr]=FTEMP;
10879 regs[i].regmap[nr]=rs1[i+1];
10880 regmap_pre[i+1][nr]=rs1[i+1];
10881 regs[i+1].regmap_entry[nr]=rs1[i+1];
10882 regs[i].isconst&=~(1<<nr);
10883 regs[i+1].isconst&=~(1<<nr);
10884 regs[i].dirty&=~(1<<nr);
10885 regs[i+1].wasdirty&=~(1<<nr);
10886 regs[i+1].dirty&=~(1<<nr);
10887 regs[i+2].wasdirty&=~(1<<nr);
10888 }
10889 }
10890 }
b9b61529 10891 if(itype[i+1]==LOAD||itype[i+1]==LOADLR||itype[i+1]==STORE||itype[i+1]==STORELR/*||itype[i+1]==C1LS||||itype[i+1]==C2LS*/) {
57871462 10892 if(itype[i+1]==LOAD)
10893 hr=get_reg(regs[i+1].regmap,rt1[i+1]);
b9b61529 10894 if(itype[i+1]==LOADLR||(opcode[i+1]&0x3b)==0x31||(opcode[i+1]&0x3b)==0x32) // LWC1/LDC1, LWC2/LDC2
57871462 10895 hr=get_reg(regs[i+1].regmap,FTEMP);
b9b61529 10896 if(itype[i+1]==STORE||itype[i+1]==STORELR||(opcode[i+1]&0x3b)==0x39||(opcode[i+1]&0x3b)==0x3a) { // SWC1/SDC1/SWC2/SDC2
57871462 10897 hr=get_reg(regs[i+1].regmap,AGEN1+((i+1)&1));
10898 if(hr<0) hr=get_reg(regs[i+1].regmap,-1);
10899 }
10900 if(hr>=0&&regs[i].regmap[hr]<0) {
10901 int rs=get_reg(regs[i+1].regmap,rs1[i+1]);
10902 if(rs>=0&&((regs[i+1].wasconst>>rs)&1)) {
10903 regs[i].regmap[hr]=AGEN1+((i+1)&1);
10904 regmap_pre[i+1][hr]=AGEN1+((i+1)&1);
10905 regs[i+1].regmap_entry[hr]=AGEN1+((i+1)&1);
10906 regs[i].isconst&=~(1<<hr);
10907 regs[i+1].wasdirty&=~(1<<hr);
10908 regs[i].dirty&=~(1<<hr);
10909 }
10910 }
10911 }
10912 }
10913 }
10914 }
10915 }
10916
10917 /* Pass 6 - Optimize clean/dirty state */
10918 clean_registers(0,slen-1,1);
10919
10920 /* Pass 7 - Identify 32-bit registers */
a28c6ce8 10921#ifndef FORCE32
57871462 10922 provisional_r32();
10923
10924 u_int r32=0;
10925
10926 for (i=slen-1;i>=0;i--)
10927 {
10928 int hr;
10929 if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
10930 {
10931 if(ba[i]<start || ba[i]>=(start+slen*4))
10932 {
10933 // Branch out of this block, don't need anything
10934 r32=0;
10935 }
10936 else
10937 {
10938 // Internal branch
10939 // Need whatever matches the target
10940 // (and doesn't get overwritten by the delay slot instruction)
10941 r32=0;
10942 int t=(ba[i]-start)>>2;
10943 if(ba[i]>start+i*4) {
10944 // Forward branch
10945 if(!(requires_32bit[t]&~regs[i].was32))
10946 r32|=requires_32bit[t]&(~(1LL<<rt1[i+1]))&(~(1LL<<rt2[i+1]));
10947 }else{
10948 // Backward branch
10949 //if(!(regs[t].was32&~unneeded_reg_upper[t]&~regs[i].was32))
10950 // r32|=regs[t].was32&~unneeded_reg_upper[t]&(~(1LL<<rt1[i+1]))&(~(1LL<<rt2[i+1]));
10951 if(!(pr32[t]&~regs[i].was32))
10952 r32|=pr32[t]&(~(1LL<<rt1[i+1]))&(~(1LL<<rt2[i+1]));
10953 }
10954 }
10955 // Conditional branch may need registers for following instructions
10956 if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&(source[i]>>16)!=0x1000)
10957 {
10958 if(i<slen-2) {
10959 r32|=requires_32bit[i+2];
10960 r32&=regs[i].was32;
10961 // Mark this address as a branch target since it may be called
10962 // upon return from interrupt
10963 bt[i+2]=1;
10964 }
10965 }
10966 // Merge in delay slot
10967 if(!likely[i]) {
10968 // These are overwritten unless the branch is "likely"
10969 // and the delay slot is nullified if not taken
10970 r32&=~(1LL<<rt1[i+1]);
10971 r32&=~(1LL<<rt2[i+1]);
10972 }
10973 // Assume these are needed (delay slot)
10974 if(us1[i+1]>0)
10975 {
10976 if((regs[i].was32>>us1[i+1])&1) r32|=1LL<<us1[i+1];
10977 }
10978 if(us2[i+1]>0)
10979 {
10980 if((regs[i].was32>>us2[i+1])&1) r32|=1LL<<us2[i+1];
10981 }
10982 if(dep1[i+1]&&!((unneeded_reg_upper[i]>>dep1[i+1])&1))
10983 {
10984 if((regs[i].was32>>dep1[i+1])&1) r32|=1LL<<dep1[i+1];
10985 }
10986 if(dep2[i+1]&&!((unneeded_reg_upper[i]>>dep2[i+1])&1))
10987 {
10988 if((regs[i].was32>>dep2[i+1])&1) r32|=1LL<<dep2[i+1];
10989 }
10990 }
1e973cb0 10991 else if(itype[i]==SYSCALL||itype[i]==HLECALL||itype[i]==INTCALL)
57871462 10992 {
10993 // SYSCALL instruction (software interrupt)
10994 r32=0;
10995 }
10996 else if(itype[i]==COP0 && (source[i]&0x3f)==0x18)
10997 {
10998 // ERET instruction (return from interrupt)
10999 r32=0;
11000 }
11001 // Check 32 bits
11002 r32&=~(1LL<<rt1[i]);
11003 r32&=~(1LL<<rt2[i]);
11004 if(us1[i]>0)
11005 {
11006 if((regs[i].was32>>us1[i])&1) r32|=1LL<<us1[i];
11007 }
11008 if(us2[i]>0)
11009 {
11010 if((regs[i].was32>>us2[i])&1) r32|=1LL<<us2[i];
11011 }
11012 if(dep1[i]&&!((unneeded_reg_upper[i]>>dep1[i])&1))
11013 {
11014 if((regs[i].was32>>dep1[i])&1) r32|=1LL<<dep1[i];
11015 }
11016 if(dep2[i]&&!((unneeded_reg_upper[i]>>dep2[i])&1))
11017 {
11018 if((regs[i].was32>>dep2[i])&1) r32|=1LL<<dep2[i];
11019 }
11020 requires_32bit[i]=r32;
11021
11022 // Dirty registers which are 32-bit, require 32-bit input
11023 // as they will be written as 32-bit values
11024 for(hr=0;hr<HOST_REGS;hr++)
11025 {
11026 if(regs[i].regmap_entry[hr]>0&&regs[i].regmap_entry[hr]<64) {
11027 if((regs[i].was32>>regs[i].regmap_entry[hr])&(regs[i].wasdirty>>hr)&1) {
11028 if(!((unneeded_reg_upper[i]>>regs[i].regmap_entry[hr])&1))
11029 requires_32bit[i]|=1LL<<regs[i].regmap_entry[hr];
11030 }
11031 }
11032 }
11033 //requires_32bit[i]=is32[i]&~unneeded_reg_upper[i]; // DEBUG
11034 }
04fd948a 11035#else
11036 for (i=slen-1;i>=0;i--)
11037 {
11038 if(itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
11039 {
11040 // Conditional branch
11041 if((source[i]>>16)!=0x1000&&i<slen-2) {
11042 // Mark this address as a branch target since it may be called
11043 // upon return from interrupt
11044 bt[i+2]=1;
11045 }
11046 }
11047 }
a28c6ce8 11048#endif
57871462 11049
11050 if(itype[slen-1]==SPAN) {
11051 bt[slen-1]=1; // Mark as a branch target so instruction can restart after exception
11052 }
4600ba03 11053
11054#ifdef DISASM
57871462 11055 /* Debug/disassembly */
57871462 11056 for(i=0;i<slen;i++)
11057 {
11058 printf("U:");
11059 int r;
11060 for(r=1;r<=CCREG;r++) {
11061 if((unneeded_reg[i]>>r)&1) {
11062 if(r==HIREG) printf(" HI");
11063 else if(r==LOREG) printf(" LO");
11064 else printf(" r%d",r);
11065 }
11066 }
90ae6d4e 11067#ifndef FORCE32
57871462 11068 printf(" UU:");
11069 for(r=1;r<=CCREG;r++) {
11070 if(((unneeded_reg_upper[i]&~unneeded_reg[i])>>r)&1) {
11071 if(r==HIREG) printf(" HI");
11072 else if(r==LOREG) printf(" LO");
11073 else printf(" r%d",r);
11074 }
11075 }
11076 printf(" 32:");
11077 for(r=0;r<=CCREG;r++) {
11078 //if(((is32[i]>>r)&(~unneeded_reg[i]>>r))&1) {
11079 if((regs[i].was32>>r)&1) {
11080 if(r==CCREG) printf(" CC");
11081 else if(r==HIREG) printf(" HI");
11082 else if(r==LOREG) printf(" LO");
11083 else printf(" r%d",r);
11084 }
11085 }
90ae6d4e 11086#endif
57871462 11087 printf("\n");
11088 #if defined(__i386__) || defined(__x86_64__)
11089 printf("pre: eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",regmap_pre[i][0],regmap_pre[i][1],regmap_pre[i][2],regmap_pre[i][3],regmap_pre[i][5],regmap_pre[i][6],regmap_pre[i][7]);
11090 #endif
11091 #ifdef __arm__
11092 printf("pre: r0=%d r1=%d r2=%d r3=%d r4=%d r5=%d r6=%d r7=%d r8=%d r9=%d r10=%d r12=%d\n",regmap_pre[i][0],regmap_pre[i][1],regmap_pre[i][2],regmap_pre[i][3],regmap_pre[i][4],regmap_pre[i][5],regmap_pre[i][6],regmap_pre[i][7],regmap_pre[i][8],regmap_pre[i][9],regmap_pre[i][10],regmap_pre[i][12]);
11093 #endif
11094 printf("needs: ");
11095 if(needed_reg[i]&1) printf("eax ");
11096 if((needed_reg[i]>>1)&1) printf("ecx ");
11097 if((needed_reg[i]>>2)&1) printf("edx ");
11098 if((needed_reg[i]>>3)&1) printf("ebx ");
11099 if((needed_reg[i]>>5)&1) printf("ebp ");
11100 if((needed_reg[i]>>6)&1) printf("esi ");
11101 if((needed_reg[i]>>7)&1) printf("edi ");
11102 printf("r:");
11103 for(r=0;r<=CCREG;r++) {
11104 //if(((requires_32bit[i]>>r)&(~unneeded_reg[i]>>r))&1) {
11105 if((requires_32bit[i]>>r)&1) {
11106 if(r==CCREG) printf(" CC");
11107 else if(r==HIREG) printf(" HI");
11108 else if(r==LOREG) printf(" LO");
11109 else printf(" r%d",r);
11110 }
11111 }
11112 printf("\n");
11113 /*printf("pr:");
11114 for(r=0;r<=CCREG;r++) {
11115 //if(((requires_32bit[i]>>r)&(~unneeded_reg[i]>>r))&1) {
11116 if((pr32[i]>>r)&1) {
11117 if(r==CCREG) printf(" CC");
11118 else if(r==HIREG) printf(" HI");
11119 else if(r==LOREG) printf(" LO");
11120 else printf(" r%d",r);
11121 }
11122 }
11123 if(pr32[i]!=requires_32bit[i]) printf(" OOPS");
11124 printf("\n");*/
11125 #if defined(__i386__) || defined(__x86_64__)
11126 printf("entry: eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",regs[i].regmap_entry[0],regs[i].regmap_entry[1],regs[i].regmap_entry[2],regs[i].regmap_entry[3],regs[i].regmap_entry[5],regs[i].regmap_entry[6],regs[i].regmap_entry[7]);
11127 printf("dirty: ");
11128 if(regs[i].wasdirty&1) printf("eax ");
11129 if((regs[i].wasdirty>>1)&1) printf("ecx ");
11130 if((regs[i].wasdirty>>2)&1) printf("edx ");
11131 if((regs[i].wasdirty>>3)&1) printf("ebx ");
11132 if((regs[i].wasdirty>>5)&1) printf("ebp ");
11133 if((regs[i].wasdirty>>6)&1) printf("esi ");
11134 if((regs[i].wasdirty>>7)&1) printf("edi ");
11135 #endif
11136 #ifdef __arm__
11137 printf("entry: r0=%d r1=%d r2=%d r3=%d r4=%d r5=%d r6=%d r7=%d r8=%d r9=%d r10=%d r12=%d\n",regs[i].regmap_entry[0],regs[i].regmap_entry[1],regs[i].regmap_entry[2],regs[i].regmap_entry[3],regs[i].regmap_entry[4],regs[i].regmap_entry[5],regs[i].regmap_entry[6],regs[i].regmap_entry[7],regs[i].regmap_entry[8],regs[i].regmap_entry[9],regs[i].regmap_entry[10],regs[i].regmap_entry[12]);
11138 printf("dirty: ");
11139 if(regs[i].wasdirty&1) printf("r0 ");
11140 if((regs[i].wasdirty>>1)&1) printf("r1 ");
11141 if((regs[i].wasdirty>>2)&1) printf("r2 ");
11142 if((regs[i].wasdirty>>3)&1) printf("r3 ");
11143 if((regs[i].wasdirty>>4)&1) printf("r4 ");
11144 if((regs[i].wasdirty>>5)&1) printf("r5 ");
11145 if((regs[i].wasdirty>>6)&1) printf("r6 ");
11146 if((regs[i].wasdirty>>7)&1) printf("r7 ");
11147 if((regs[i].wasdirty>>8)&1) printf("r8 ");
11148 if((regs[i].wasdirty>>9)&1) printf("r9 ");
11149 if((regs[i].wasdirty>>10)&1) printf("r10 ");
11150 if((regs[i].wasdirty>>12)&1) printf("r12 ");
11151 #endif
11152 printf("\n");
11153 disassemble_inst(i);
11154 //printf ("ccadj[%d] = %d\n",i,ccadj[i]);
11155 #if defined(__i386__) || defined(__x86_64__)
11156 printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d dirty: ",regs[i].regmap[0],regs[i].regmap[1],regs[i].regmap[2],regs[i].regmap[3],regs[i].regmap[5],regs[i].regmap[6],regs[i].regmap[7]);
11157 if(regs[i].dirty&1) printf("eax ");
11158 if((regs[i].dirty>>1)&1) printf("ecx ");
11159 if((regs[i].dirty>>2)&1) printf("edx ");
11160 if((regs[i].dirty>>3)&1) printf("ebx ");
11161 if((regs[i].dirty>>5)&1) printf("ebp ");
11162 if((regs[i].dirty>>6)&1) printf("esi ");
11163 if((regs[i].dirty>>7)&1) printf("edi ");
11164 #endif
11165 #ifdef __arm__
11166 printf("r0=%d r1=%d r2=%d r3=%d r4=%d r5=%d r6=%d r7=%d r8=%d r9=%d r10=%d r12=%d dirty: ",regs[i].regmap[0],regs[i].regmap[1],regs[i].regmap[2],regs[i].regmap[3],regs[i].regmap[4],regs[i].regmap[5],regs[i].regmap[6],regs[i].regmap[7],regs[i].regmap[8],regs[i].regmap[9],regs[i].regmap[10],regs[i].regmap[12]);
11167 if(regs[i].dirty&1) printf("r0 ");
11168 if((regs[i].dirty>>1)&1) printf("r1 ");
11169 if((regs[i].dirty>>2)&1) printf("r2 ");
11170 if((regs[i].dirty>>3)&1) printf("r3 ");
11171 if((regs[i].dirty>>4)&1) printf("r4 ");
11172 if((regs[i].dirty>>5)&1) printf("r5 ");
11173 if((regs[i].dirty>>6)&1) printf("r6 ");
11174 if((regs[i].dirty>>7)&1) printf("r7 ");
11175 if((regs[i].dirty>>8)&1) printf("r8 ");
11176 if((regs[i].dirty>>9)&1) printf("r9 ");
11177 if((regs[i].dirty>>10)&1) printf("r10 ");
11178 if((regs[i].dirty>>12)&1) printf("r12 ");
11179 #endif
11180 printf("\n");
11181 if(regs[i].isconst) {
11182 printf("constants: ");
11183 #if defined(__i386__) || defined(__x86_64__)
11184 if(regs[i].isconst&1) printf("eax=%x ",(int)constmap[i][0]);
11185 if((regs[i].isconst>>1)&1) printf("ecx=%x ",(int)constmap[i][1]);
11186 if((regs[i].isconst>>2)&1) printf("edx=%x ",(int)constmap[i][2]);
11187 if((regs[i].isconst>>3)&1) printf("ebx=%x ",(int)constmap[i][3]);
11188 if((regs[i].isconst>>5)&1) printf("ebp=%x ",(int)constmap[i][5]);
11189 if((regs[i].isconst>>6)&1) printf("esi=%x ",(int)constmap[i][6]);
11190 if((regs[i].isconst>>7)&1) printf("edi=%x ",(int)constmap[i][7]);
11191 #endif
11192 #ifdef __arm__
11193 if(regs[i].isconst&1) printf("r0=%x ",(int)constmap[i][0]);
11194 if((regs[i].isconst>>1)&1) printf("r1=%x ",(int)constmap[i][1]);
11195 if((regs[i].isconst>>2)&1) printf("r2=%x ",(int)constmap[i][2]);
11196 if((regs[i].isconst>>3)&1) printf("r3=%x ",(int)constmap[i][3]);
11197 if((regs[i].isconst>>4)&1) printf("r4=%x ",(int)constmap[i][4]);
11198 if((regs[i].isconst>>5)&1) printf("r5=%x ",(int)constmap[i][5]);
11199 if((regs[i].isconst>>6)&1) printf("r6=%x ",(int)constmap[i][6]);
11200 if((regs[i].isconst>>7)&1) printf("r7=%x ",(int)constmap[i][7]);
11201 if((regs[i].isconst>>8)&1) printf("r8=%x ",(int)constmap[i][8]);
11202 if((regs[i].isconst>>9)&1) printf("r9=%x ",(int)constmap[i][9]);
11203 if((regs[i].isconst>>10)&1) printf("r10=%x ",(int)constmap[i][10]);
11204 if((regs[i].isconst>>12)&1) printf("r12=%x ",(int)constmap[i][12]);
11205 #endif
11206 printf("\n");
11207 }
90ae6d4e 11208#ifndef FORCE32
57871462 11209 printf(" 32:");
11210 for(r=0;r<=CCREG;r++) {
11211 if((regs[i].is32>>r)&1) {
11212 if(r==CCREG) printf(" CC");
11213 else if(r==HIREG) printf(" HI");
11214 else if(r==LOREG) printf(" LO");
11215 else printf(" r%d",r);
11216 }
11217 }
11218 printf("\n");
90ae6d4e 11219#endif
57871462 11220 /*printf(" p32:");
11221 for(r=0;r<=CCREG;r++) {
11222 if((p32[i]>>r)&1) {
11223 if(r==CCREG) printf(" CC");
11224 else if(r==HIREG) printf(" HI");
11225 else if(r==LOREG) printf(" LO");
11226 else printf(" r%d",r);
11227 }
11228 }
11229 if(p32[i]!=regs[i].is32) printf(" NO MATCH\n");
11230 else printf("\n");*/
11231 if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) {
11232 #if defined(__i386__) || defined(__x86_64__)
11233 printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d dirty: ",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
11234 if(branch_regs[i].dirty&1) printf("eax ");
11235 if((branch_regs[i].dirty>>1)&1) printf("ecx ");
11236 if((branch_regs[i].dirty>>2)&1) printf("edx ");
11237 if((branch_regs[i].dirty>>3)&1) printf("ebx ");
11238 if((branch_regs[i].dirty>>5)&1) printf("ebp ");
11239 if((branch_regs[i].dirty>>6)&1) printf("esi ");
11240 if((branch_regs[i].dirty>>7)&1) printf("edi ");
11241 #endif
11242 #ifdef __arm__
11243 printf("branch(%d): r0=%d r1=%d r2=%d r3=%d r4=%d r5=%d r6=%d r7=%d r8=%d r9=%d r10=%d r12=%d dirty: ",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[4],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7],branch_regs[i].regmap[8],branch_regs[i].regmap[9],branch_regs[i].regmap[10],branch_regs[i].regmap[12]);
11244 if(branch_regs[i].dirty&1) printf("r0 ");
11245 if((branch_regs[i].dirty>>1)&1) printf("r1 ");
11246 if((branch_regs[i].dirty>>2)&1) printf("r2 ");
11247 if((branch_regs[i].dirty>>3)&1) printf("r3 ");
11248 if((branch_regs[i].dirty>>4)&1) printf("r4 ");
11249 if((branch_regs[i].dirty>>5)&1) printf("r5 ");
11250 if((branch_regs[i].dirty>>6)&1) printf("r6 ");
11251 if((branch_regs[i].dirty>>7)&1) printf("r7 ");
11252 if((branch_regs[i].dirty>>8)&1) printf("r8 ");
11253 if((branch_regs[i].dirty>>9)&1) printf("r9 ");
11254 if((branch_regs[i].dirty>>10)&1) printf("r10 ");
11255 if((branch_regs[i].dirty>>12)&1) printf("r12 ");
11256 #endif
90ae6d4e 11257#ifndef FORCE32
57871462 11258 printf(" 32:");
11259 for(r=0;r<=CCREG;r++) {
11260 if((branch_regs[i].is32>>r)&1) {
11261 if(r==CCREG) printf(" CC");
11262 else if(r==HIREG) printf(" HI");
11263 else if(r==LOREG) printf(" LO");
11264 else printf(" r%d",r);
11265 }
11266 }
11267 printf("\n");
90ae6d4e 11268#endif
57871462 11269 }
11270 }
4600ba03 11271#endif // DISASM
57871462 11272
11273 /* Pass 8 - Assembly */
11274 linkcount=0;stubcount=0;
11275 ds=0;is_delayslot=0;
11276 cop1_usable=0;
11277 uint64_t is32_pre=0;
11278 u_int dirty_pre=0;
11279 u_int beginning=(u_int)out;
11280 if((u_int)addr&1) {
11281 ds=1;
11282 pagespan_ds();
11283 }
9ad4d757 11284 u_int instr_addr0_override=0;
11285
11286#ifdef PCSX
11287 if (start == 0x80030000) {
11288 // nasty hack for fastbios thing
96186eba 11289 // override block entry to this code
9ad4d757 11290 instr_addr0_override=(u_int)out;
11291 emit_movimm(start,0);
96186eba 11292 // abuse io address var as a flag that we
11293 // have already returned here once
11294 emit_readword((int)&address,1);
9ad4d757 11295 emit_writeword(0,(int)&pcaddr);
96186eba 11296 emit_writeword(0,(int)&address);
9ad4d757 11297 emit_cmp(0,1);
11298 emit_jne((int)new_dyna_leave);
11299 }
11300#endif
57871462 11301 for(i=0;i<slen;i++)
11302 {
11303 //if(ds) printf("ds: ");
4600ba03 11304 disassemble_inst(i);
57871462 11305 if(ds) {
11306 ds=0; // Skip delay slot
11307 if(bt[i]) assem_debug("OOPS - branch into delay slot\n");
11308 instr_addr[i]=0;
11309 } else {
ffb0b9e0 11310 speculate_register_values(i);
57871462 11311 #ifndef DESTRUCTIVE_WRITEBACK
11312 if(i<2||(itype[i-2]!=UJUMP&&itype[i-2]!=RJUMP&&(source[i-2]>>16)!=0x1000))
11313 {
11314 wb_sx(regmap_pre[i],regs[i].regmap_entry,regs[i].wasdirty,is32_pre,regs[i].was32,
11315 unneeded_reg[i],unneeded_reg_upper[i]);
11316 wb_valid(regmap_pre[i],regs[i].regmap_entry,dirty_pre,regs[i].wasdirty,is32_pre,
11317 unneeded_reg[i],unneeded_reg_upper[i]);
11318 }
f776eb14 11319 if((itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)&&!likely[i]) {
11320 is32_pre=branch_regs[i].is32;
11321 dirty_pre=branch_regs[i].dirty;
11322 }else{
11323 is32_pre=regs[i].is32;
11324 dirty_pre=regs[i].dirty;
11325 }
57871462 11326 #endif
11327 // write back
11328 if(i<2||(itype[i-2]!=UJUMP&&itype[i-2]!=RJUMP&&(source[i-2]>>16)!=0x1000))
11329 {
11330 wb_invalidate(regmap_pre[i],regs[i].regmap_entry,regs[i].wasdirty,regs[i].was32,
11331 unneeded_reg[i],unneeded_reg_upper[i]);
11332 loop_preload(regmap_pre[i],regs[i].regmap_entry);
11333 }
11334 // branch target entry point
11335 instr_addr[i]=(u_int)out;
11336 assem_debug("<->\n");
11337 // load regs
11338 if(regs[i].regmap_entry[HOST_CCREG]==CCREG&&regs[i].regmap[HOST_CCREG]!=CCREG)
11339 wb_register(CCREG,regs[i].regmap_entry,regs[i].wasdirty,regs[i].was32);
11340 load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,rs1[i],rs2[i]);
11341 address_generation(i,&regs[i],regs[i].regmap_entry);
11342 load_consts(regmap_pre[i],regs[i].regmap,regs[i].was32,i);
11343 if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
11344 {
11345 // Load the delay slot registers if necessary
4ef8f67d 11346 if(rs1[i+1]!=rs1[i]&&rs1[i+1]!=rs2[i]&&(rs1[i+1]!=rt1[i]||rt1[i]==0))
57871462 11347 load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,rs1[i+1],rs1[i+1]);
4ef8f67d 11348 if(rs2[i+1]!=rs1[i+1]&&rs2[i+1]!=rs1[i]&&rs2[i+1]!=rs2[i]&&(rs2[i+1]!=rt1[i]||rt1[i]==0))
57871462 11349 load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,rs2[i+1],rs2[i+1]);
b9b61529 11350 if(itype[i+1]==STORE||itype[i+1]==STORELR||(opcode[i+1]&0x3b)==0x39||(opcode[i+1]&0x3b)==0x3a)
57871462 11351 load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,INVCP,INVCP);
11352 }
11353 else if(i+1<slen)
11354 {
11355 // Preload registers for following instruction
11356 if(rs1[i+1]!=rs1[i]&&rs1[i+1]!=rs2[i])
11357 if(rs1[i+1]!=rt1[i]&&rs1[i+1]!=rt2[i])
11358 load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,rs1[i+1],rs1[i+1]);
11359 if(rs2[i+1]!=rs1[i+1]&&rs2[i+1]!=rs1[i]&&rs2[i+1]!=rs2[i])
11360 if(rs2[i+1]!=rt1[i]&&rs2[i+1]!=rt2[i])
11361 load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,rs2[i+1],rs2[i+1]);
11362 }
11363 // TODO: if(is_ooo(i)) address_generation(i+1);
11364 if(itype[i]==CJUMP||itype[i]==FJUMP)
11365 load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,CCREG,CCREG);
b9b61529 11366 if(itype[i]==STORE||itype[i]==STORELR||(opcode[i]&0x3b)==0x39||(opcode[i]&0x3b)==0x3a)
57871462 11367 load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,INVCP,INVCP);
11368 if(bt[i]) cop1_usable=0;
11369 // assemble
11370 switch(itype[i]) {
11371 case ALU:
11372 alu_assemble(i,&regs[i]);break;
11373 case IMM16:
11374 imm16_assemble(i,&regs[i]);break;
11375 case SHIFT:
11376 shift_assemble(i,&regs[i]);break;
11377 case SHIFTIMM:
11378 shiftimm_assemble(i,&regs[i]);break;
11379 case LOAD:
11380 load_assemble(i,&regs[i]);break;
11381 case LOADLR:
11382 loadlr_assemble(i,&regs[i]);break;
11383 case STORE:
11384 store_assemble(i,&regs[i]);break;
11385 case STORELR:
11386 storelr_assemble(i,&regs[i]);break;
11387 case COP0:
11388 cop0_assemble(i,&regs[i]);break;
11389 case COP1:
11390 cop1_assemble(i,&regs[i]);break;
11391 case C1LS:
11392 c1ls_assemble(i,&regs[i]);break;
b9b61529 11393 case COP2:
11394 cop2_assemble(i,&regs[i]);break;
11395 case C2LS:
11396 c2ls_assemble(i,&regs[i]);break;
11397 case C2OP:
11398 c2op_assemble(i,&regs[i]);break;
57871462 11399 case FCONV:
11400 fconv_assemble(i,&regs[i]);break;
11401 case FLOAT:
11402 float_assemble(i,&regs[i]);break;
11403 case FCOMP:
11404 fcomp_assemble(i,&regs[i]);break;
11405 case MULTDIV:
11406 multdiv_assemble(i,&regs[i]);break;
11407 case MOV:
11408 mov_assemble(i,&regs[i]);break;
11409 case SYSCALL:
11410 syscall_assemble(i,&regs[i]);break;
7139f3c8 11411 case HLECALL:
11412 hlecall_assemble(i,&regs[i]);break;
1e973cb0 11413 case INTCALL:
11414 intcall_assemble(i,&regs[i]);break;
57871462 11415 case UJUMP:
11416 ujump_assemble(i,&regs[i]);ds=1;break;
11417 case RJUMP:
11418 rjump_assemble(i,&regs[i]);ds=1;break;
11419 case CJUMP:
11420 cjump_assemble(i,&regs[i]);ds=1;break;
11421 case SJUMP:
11422 sjump_assemble(i,&regs[i]);ds=1;break;
11423 case FJUMP:
11424 fjump_assemble(i,&regs[i]);ds=1;break;
11425 case SPAN:
11426 pagespan_assemble(i,&regs[i]);break;
11427 }
11428 if(itype[i]==UJUMP||itype[i]==RJUMP||(source[i]>>16)==0x1000)
11429 literal_pool(1024);
11430 else
11431 literal_pool_jumpover(256);
11432 }
11433 }
11434 //assert(itype[i-2]==UJUMP||itype[i-2]==RJUMP||(source[i-2]>>16)==0x1000);
11435 // If the block did not end with an unconditional branch,
11436 // add a jump to the next instruction.
11437 if(i>1) {
11438 if(itype[i-2]!=UJUMP&&itype[i-2]!=RJUMP&&(source[i-2]>>16)!=0x1000&&itype[i-1]!=SPAN) {
11439 assert(itype[i-1]!=UJUMP&&itype[i-1]!=CJUMP&&itype[i-1]!=SJUMP&&itype[i-1]!=RJUMP&&itype[i-1]!=FJUMP);
11440 assert(i==slen);
11441 if(itype[i-2]!=CJUMP&&itype[i-2]!=SJUMP&&itype[i-2]!=FJUMP) {
11442 store_regs_bt(regs[i-1].regmap,regs[i-1].is32,regs[i-1].dirty,start+i*4);
11443 if(regs[i-1].regmap[HOST_CCREG]!=CCREG)
11444 emit_loadreg(CCREG,HOST_CCREG);
2573466a 11445 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i-1]+1),HOST_CCREG);
57871462 11446 }
11447 else if(!likely[i-2])
11448 {
11449 store_regs_bt(branch_regs[i-2].regmap,branch_regs[i-2].is32,branch_regs[i-2].dirty,start+i*4);
11450 assert(branch_regs[i-2].regmap[HOST_CCREG]==CCREG);
11451 }
11452 else
11453 {
11454 store_regs_bt(regs[i-2].regmap,regs[i-2].is32,regs[i-2].dirty,start+i*4);
11455 assert(regs[i-2].regmap[HOST_CCREG]==CCREG);
11456 }
11457 add_to_linker((int)out,start+i*4,0);
11458 emit_jmp(0);
11459 }
11460 }
11461 else
11462 {
11463 assert(i>0);
11464 assert(itype[i-1]!=UJUMP&&itype[i-1]!=CJUMP&&itype[i-1]!=SJUMP&&itype[i-1]!=RJUMP&&itype[i-1]!=FJUMP);
11465 store_regs_bt(regs[i-1].regmap,regs[i-1].is32,regs[i-1].dirty,start+i*4);
11466 if(regs[i-1].regmap[HOST_CCREG]!=CCREG)
11467 emit_loadreg(CCREG,HOST_CCREG);
2573466a 11468 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i-1]+1),HOST_CCREG);
57871462 11469 add_to_linker((int)out,start+i*4,0);
11470 emit_jmp(0);
11471 }
11472
11473 // TODO: delay slot stubs?
11474 // Stubs
11475 for(i=0;i<stubcount;i++)
11476 {
11477 switch(stubs[i][0])
11478 {
11479 case LOADB_STUB:
11480 case LOADH_STUB:
11481 case LOADW_STUB:
11482 case LOADD_STUB:
11483 case LOADBU_STUB:
11484 case LOADHU_STUB:
11485 do_readstub(i);break;
11486 case STOREB_STUB:
11487 case STOREH_STUB:
11488 case STOREW_STUB:
11489 case STORED_STUB:
11490 do_writestub(i);break;
11491 case CC_STUB:
11492 do_ccstub(i);break;
11493 case INVCODE_STUB:
11494 do_invstub(i);break;
11495 case FP_STUB:
11496 do_cop1stub(i);break;
11497 case STORELR_STUB:
11498 do_unalignedwritestub(i);break;
11499 }
11500 }
11501
9ad4d757 11502 if (instr_addr0_override)
11503 instr_addr[0] = instr_addr0_override;
11504
57871462 11505 /* Pass 9 - Linker */
11506 for(i=0;i<linkcount;i++)
11507 {
11508 assem_debug("%8x -> %8x\n",link_addr[i][0],link_addr[i][1]);
11509 literal_pool(64);
11510 if(!link_addr[i][2])
11511 {
11512 void *stub=out;
11513 void *addr=check_addr(link_addr[i][1]);
11514 emit_extjump(link_addr[i][0],link_addr[i][1]);
11515 if(addr) {
11516 set_jump_target(link_addr[i][0],(int)addr);
11517 add_link(link_addr[i][1],stub);
11518 }
11519 else set_jump_target(link_addr[i][0],(int)stub);
11520 }
11521 else
11522 {
11523 // Internal branch
11524 int target=(link_addr[i][1]-start)>>2;
11525 assert(target>=0&&target<slen);
11526 assert(instr_addr[target]);
11527 //#ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
11528 //set_jump_target_fillslot(link_addr[i][0],instr_addr[target],link_addr[i][2]>>1);
11529 //#else
11530 set_jump_target(link_addr[i][0],instr_addr[target]);
11531 //#endif
11532 }
11533 }
11534 // External Branch Targets (jump_in)
11535 if(copy+slen*4>(void *)shadow+sizeof(shadow)) copy=shadow;
11536 for(i=0;i<slen;i++)
11537 {
11538 if(bt[i]||i==0)
11539 {
11540 if(instr_addr[i]) // TODO - delay slots (=null)
11541 {
11542 u_int vaddr=start+i*4;
94d23bb9 11543 u_int page=get_page(vaddr);
11544 u_int vpage=get_vpage(vaddr);
57871462 11545 literal_pool(256);
11546 //if(!(is32[i]&(~unneeded_reg_upper[i])&~(1LL<<CCREG)))
a28c6ce8 11547#ifndef FORCE32
57871462 11548 if(!requires_32bit[i])
a28c6ce8 11549#else
11550 if(1)
11551#endif
57871462 11552 {
11553 assem_debug("%8x (%d) <- %8x\n",instr_addr[i],i,start+i*4);
11554 assem_debug("jump_in: %x\n",start+i*4);
11555 ll_add(jump_dirty+vpage,vaddr,(void *)out);
11556 int entry_point=do_dirty_stub(i);
11557 ll_add(jump_in+page,vaddr,(void *)entry_point);
11558 // If there was an existing entry in the hash table,
11559 // replace it with the new address.
11560 // Don't add new entries. We'll insert the
11561 // ones that actually get used in check_addr().
11562 int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
11563 if(ht_bin[0]==vaddr) {
11564 ht_bin[1]=entry_point;
11565 }
11566 if(ht_bin[2]==vaddr) {
11567 ht_bin[3]=entry_point;
11568 }
11569 }
11570 else
11571 {
11572 u_int r=requires_32bit[i]|!!(requires_32bit[i]>>32);
11573 assem_debug("%8x (%d) <- %8x\n",instr_addr[i],i,start+i*4);
11574 assem_debug("jump_in: %x (restricted - %x)\n",start+i*4,r);
11575 //int entry_point=(int)out;
11576 ////assem_debug("entry_point: %x\n",entry_point);
11577 //load_regs_entry(i);
11578 //if(entry_point==(int)out)
11579 // entry_point=instr_addr[i];
11580 //else
11581 // emit_jmp(instr_addr[i]);
11582 //ll_add_32(jump_in+page,vaddr,r,(void *)entry_point);
11583 ll_add_32(jump_dirty+vpage,vaddr,r,(void *)out);
11584 int entry_point=do_dirty_stub(i);
11585 ll_add_32(jump_in+page,vaddr,r,(void *)entry_point);
11586 }
11587 }
11588 }
11589 }
11590 // Write out the literal pool if necessary
11591 literal_pool(0);
11592 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
11593 // Align code
11594 if(((u_int)out)&7) emit_addnop(13);
11595 #endif
11596 assert((u_int)out-beginning<MAX_OUTPUT_BLOCK_SIZE);
11597 //printf("shadow buffer: %x-%x\n",(int)copy,(int)copy+slen*4);
11598 memcpy(copy,source,slen*4);
11599 copy+=slen*4;
11600
11601 #ifdef __arm__
11602 __clear_cache((void *)beginning,out);
11603 #endif
11604
11605 // If we're within 256K of the end of the buffer,
11606 // start over from the beginning. (Is 256K enough?)
bdeade46 11607 if((u_int)out>(u_int)BASE_ADDR+(1<<TARGET_SIZE_2)-MAX_OUTPUT_BLOCK_SIZE) out=(u_char *)BASE_ADDR;
57871462 11608
11609 // Trap writes to any of the pages we compiled
11610 for(i=start>>12;i<=(start+slen*4)>>12;i++) {
11611 invalid_code[i]=0;
90ae6d4e 11612#ifndef DISABLE_TLB
57871462 11613 memory_map[i]|=0x40000000;
11614 if((signed int)start>=(signed int)0xC0000000) {
11615 assert(using_tlb);
11616 j=(((u_int)i<<12)+(memory_map[i]<<2)-(u_int)rdram+(u_int)0x80000000)>>12;
11617 invalid_code[j]=0;
11618 memory_map[j]|=0x40000000;
11619 //printf("write protect physical page: %x (virtual %x)\n",j<<12,start);
11620 }
90ae6d4e 11621#endif
57871462 11622 }
9be4ba64 11623 inv_code_start=inv_code_end=~0;
b12c9fb8 11624#ifdef PCSX
b96d3df7 11625 // for PCSX we need to mark all mirrors too
b12c9fb8 11626 if(get_page(start)<(RAM_SIZE>>12))
11627 for(i=start>>12;i<=(start+slen*4)>>12;i++)
b96d3df7 11628 invalid_code[((u_int)0x00000000>>12)|(i&0x1ff)]=
11629 invalid_code[((u_int)0x80000000>>12)|(i&0x1ff)]=
11630 invalid_code[((u_int)0xa0000000>>12)|(i&0x1ff)]=0;
b12c9fb8 11631#endif
57871462 11632
11633 /* Pass 10 - Free memory by expiring oldest blocks */
11634
bdeade46 11635 int end=((((int)out-(int)BASE_ADDR)>>(TARGET_SIZE_2-16))+16384)&65535;
57871462 11636 while(expirep!=end)
11637 {
11638 int shift=TARGET_SIZE_2-3; // Divide into 8 blocks
bdeade46 11639 int base=(int)BASE_ADDR+((expirep>>13)<<shift); // Base address of this block
57871462 11640 inv_debug("EXP: Phase %d\n",expirep);
11641 switch((expirep>>11)&3)
11642 {
11643 case 0:
11644 // Clear jump_in and jump_dirty
11645 ll_remove_matching_addrs(jump_in+(expirep&2047),base,shift);
11646 ll_remove_matching_addrs(jump_dirty+(expirep&2047),base,shift);
11647 ll_remove_matching_addrs(jump_in+2048+(expirep&2047),base,shift);
11648 ll_remove_matching_addrs(jump_dirty+2048+(expirep&2047),base,shift);
11649 break;
11650 case 1:
11651 // Clear pointers
11652 ll_kill_pointers(jump_out[expirep&2047],base,shift);
11653 ll_kill_pointers(jump_out[(expirep&2047)+2048],base,shift);
11654 break;
11655 case 2:
11656 // Clear hash table
11657 for(i=0;i<32;i++) {
11658 int *ht_bin=hash_table[((expirep&2047)<<5)+i];
11659 if((ht_bin[3]>>shift)==(base>>shift) ||
11660 ((ht_bin[3]-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(base>>shift)) {
11661 inv_debug("EXP: Remove hash %x -> %x\n",ht_bin[2],ht_bin[3]);
11662 ht_bin[2]=ht_bin[3]=-1;
11663 }
11664 if((ht_bin[1]>>shift)==(base>>shift) ||
11665 ((ht_bin[1]-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(base>>shift)) {
11666 inv_debug("EXP: Remove hash %x -> %x\n",ht_bin[0],ht_bin[1]);
11667 ht_bin[0]=ht_bin[2];
11668 ht_bin[1]=ht_bin[3];
11669 ht_bin[2]=ht_bin[3]=-1;
11670 }
11671 }
11672 break;
11673 case 3:
11674 // Clear jump_out
dd3a91a1 11675 #ifdef __arm__
11676 if((expirep&2047)==0)
11677 do_clear_cache();
11678 #endif
57871462 11679 ll_remove_matching_addrs(jump_out+(expirep&2047),base,shift);
11680 ll_remove_matching_addrs(jump_out+2048+(expirep&2047),base,shift);
11681 break;
11682 }
11683 expirep=(expirep+1)&65535;
11684 }
11685 return 0;
11686}
b9b61529 11687
11688// vim:shiftwidth=2:expandtab