Merge pull request #72 from frangarcj/master
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / new_dynarec.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - new_dynarec.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
57871462 4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21#include <stdlib.h>
22#include <stdint.h> //include for uint64_t
23#include <assert.h>
d848b60a 24#include <errno.h>
4600ba03 25#include <sys/mman.h>
d148d265 26#ifdef __MACH__
27#include <libkern/OSCacheControl.h>
28#endif
1e212a25 29#ifdef _3DS
30#include <3ds_utils.h>
31#endif
32#ifdef VITA
33#include <psp2/kernel/sysmem.h>
34static int sceBlock;
73081f23 35int getVMBlock();
1e212a25 36#endif
57871462 37
d148d265 38#include "new_dynarec_config.h"
6f173b35 39#include "backends/psx/emu_if.h" //emulator interface
57871462 40
4600ba03 41//#define DISASM
42//#define assem_debug printf
43//#define inv_debug printf
44#define assem_debug(...)
45#define inv_debug(...)
57871462 46
47#ifdef __i386__
d404093f 48#include "x86/assem_x86.h"
57871462 49#endif
50#ifdef __x86_64__
d404093f 51#include "x64/assem_x64.h"
57871462 52#endif
53#ifdef __arm__
6f173b35 54#include "arm/assem_arm.h"
57871462 55#endif
56
73081f23
FJGG
57#ifdef VITA
58int _newlib_vm_size_user = 1 << TARGET_SIZE_2;
59#endif
60
57871462 61#define MAXBLOCK 4096
62#define MAX_OUTPUT_BLOCK_SIZE 262144
2573466a 63
57871462 64struct regstat
65{
66 signed char regmap_entry[HOST_REGS];
67 signed char regmap[HOST_REGS];
68 uint64_t was32;
69 uint64_t is32;
70 uint64_t wasdirty;
71 uint64_t dirty;
72 uint64_t u;
73 uint64_t uu;
74 u_int wasconst;
75 u_int isconst;
8575a877 76 u_int loadedconst; // host regs that have constants loaded
77 u_int waswritten; // MIPS regs that were used as store base before
57871462 78};
79
de5a60c3 80// note: asm depends on this layout
57871462 81struct ll_entry
82{
83 u_int vaddr;
de5a60c3 84 u_int reg_sv_flags;
57871462 85 void *addr;
86 struct ll_entry *next;
87};
88
e2b5e7aa 89 // used by asm:
90 u_char *out;
91 u_int hash_table[65536][4] __attribute__((aligned(16)));
92 struct ll_entry *jump_in[4096] __attribute__((aligned(16)));
93 struct ll_entry *jump_dirty[4096];
94
95 static struct ll_entry *jump_out[4096];
96 static u_int start;
97 static u_int *source;
98 static char insn[MAXBLOCK][10];
99 static u_char itype[MAXBLOCK];
100 static u_char opcode[MAXBLOCK];
101 static u_char opcode2[MAXBLOCK];
102 static u_char bt[MAXBLOCK];
103 static u_char rs1[MAXBLOCK];
104 static u_char rs2[MAXBLOCK];
105 static u_char rt1[MAXBLOCK];
106 static u_char rt2[MAXBLOCK];
107 static u_char us1[MAXBLOCK];
108 static u_char us2[MAXBLOCK];
109 static u_char dep1[MAXBLOCK];
110 static u_char dep2[MAXBLOCK];
111 static u_char lt1[MAXBLOCK];
bedfea38 112 static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs
113 static uint64_t gte_rt[MAXBLOCK];
114 static uint64_t gte_unneeded[MAXBLOCK];
ffb0b9e0 115 static u_int smrv[32]; // speculated MIPS register values
116 static u_int smrv_strong; // mask or regs that are likely to have correct values
117 static u_int smrv_weak; // same, but somewhat less likely
118 static u_int smrv_strong_next; // same, but after current insn executes
119 static u_int smrv_weak_next;
e2b5e7aa 120 static int imm[MAXBLOCK];
121 static u_int ba[MAXBLOCK];
122 static char likely[MAXBLOCK];
123 static char is_ds[MAXBLOCK];
124 static char ooo[MAXBLOCK];
125 static uint64_t unneeded_reg[MAXBLOCK];
126 static uint64_t unneeded_reg_upper[MAXBLOCK];
127 static uint64_t branch_unneeded_reg[MAXBLOCK];
128 static uint64_t branch_unneeded_reg_upper[MAXBLOCK];
129 static signed char regmap_pre[MAXBLOCK][HOST_REGS];
956f3129 130 static uint64_t current_constmap[HOST_REGS];
131 static uint64_t constmap[MAXBLOCK][HOST_REGS];
132 static struct regstat regs[MAXBLOCK];
133 static struct regstat branch_regs[MAXBLOCK];
e2b5e7aa 134 static signed char minimum_free_regs[MAXBLOCK];
135 static u_int needed_reg[MAXBLOCK];
136 static u_int wont_dirty[MAXBLOCK];
137 static u_int will_dirty[MAXBLOCK];
138 static int ccadj[MAXBLOCK];
139 static int slen;
140 static u_int instr_addr[MAXBLOCK];
141 static u_int link_addr[MAXBLOCK][3];
142 static int linkcount;
143 static u_int stubs[MAXBLOCK*3][8];
144 static int stubcount;
145 static u_int literals[1024][2];
146 static int literalcount;
147 static int is_delayslot;
148 static int cop1_usable;
149 static char shadow[1048576] __attribute__((aligned(16)));
150 static void *copy;
151 static int expirep;
152 static u_int stop_after_jal;
a327ad27 153#ifndef RAM_FIXED
154 static u_int ram_offset;
155#else
156 static const u_int ram_offset=0;
157#endif
e2b5e7aa 158
159 int new_dynarec_hacks;
160 int new_dynarec_did_compile;
57871462 161 extern u_char restore_candidate[512];
162 extern int cycle_count;
163
164 /* registers that may be allocated */
165 /* 1-31 gpr */
166#define HIREG 32 // hi
167#define LOREG 33 // lo
168#define FSREG 34 // FPU status (FCSR)
169#define CSREG 35 // Coprocessor status
170#define CCREG 36 // Cycle count
171#define INVCP 37 // Pointer to invalid_code
1edfcc68 172//#define MMREG 38 // Pointer to memory_map
619e5ded 173#define ROREG 39 // ram offset (if rdram!=0x80000000)
174#define TEMPREG 40
175#define FTEMP 40 // FPU temporary register
176#define PTEMP 41 // Prefetch temporary register
1edfcc68 177//#define TLREG 42 // TLB mapping offset
619e5ded 178#define RHASH 43 // Return address hash
179#define RHTBL 44 // Return address hash table address
180#define RTEMP 45 // JR/JALR address register
181#define MAXREG 45
182#define AGEN1 46 // Address generation temporary register
1edfcc68 183//#define AGEN2 47 // Address generation temporary register
184//#define MGEN1 48 // Maptable address generation temporary register
185//#define MGEN2 49 // Maptable address generation temporary register
619e5ded 186#define BTREG 50 // Branch target temporary register
57871462 187
188 /* instruction types */
189#define NOP 0 // No operation
190#define LOAD 1 // Load
191#define STORE 2 // Store
192#define LOADLR 3 // Unaligned load
193#define STORELR 4 // Unaligned store
9f51b4b9 194#define MOV 5 // Move
57871462 195#define ALU 6 // Arithmetic/logic
196#define MULTDIV 7 // Multiply/divide
197#define SHIFT 8 // Shift by register
198#define SHIFTIMM 9// Shift by immediate
199#define IMM16 10 // 16-bit immediate
200#define RJUMP 11 // Unconditional jump to register
201#define UJUMP 12 // Unconditional jump
202#define CJUMP 13 // Conditional branch (BEQ/BNE/BGTZ/BLEZ)
203#define SJUMP 14 // Conditional branch (regimm format)
204#define COP0 15 // Coprocessor 0
205#define COP1 16 // Coprocessor 1
206#define C1LS 17 // Coprocessor 1 load/store
207#define FJUMP 18 // Conditional branch (floating point)
208#define FLOAT 19 // Floating point unit
209#define FCONV 20 // Convert integer to float
210#define FCOMP 21 // Floating point compare (sets FSREG)
211#define SYSCALL 22// SYSCALL
212#define OTHER 23 // Other
213#define SPAN 24 // Branch/delay slot spans 2 pages
214#define NI 25 // Not implemented
7139f3c8 215#define HLECALL 26// PCSX fake opcodes for HLE
b9b61529 216#define COP2 27 // Coprocessor 2 move
217#define C2LS 28 // Coprocessor 2 load/store
218#define C2OP 29 // Coprocessor 2 operation
1e973cb0 219#define INTCALL 30// Call interpreter to handle rare corner cases
57871462 220
221 /* stubs */
222#define CC_STUB 1
223#define FP_STUB 2
224#define LOADB_STUB 3
225#define LOADH_STUB 4
226#define LOADW_STUB 5
227#define LOADD_STUB 6
228#define LOADBU_STUB 7
229#define LOADHU_STUB 8
230#define STOREB_STUB 9
231#define STOREH_STUB 10
232#define STOREW_STUB 11
233#define STORED_STUB 12
234#define STORELR_STUB 13
235#define INVCODE_STUB 14
236
237 /* branch codes */
238#define TAKEN 1
239#define NOTTAKEN 2
240#define NULLDS 3
241
242// asm linkage
243int new_recompile_block(int addr);
244void *get_addr_ht(u_int vaddr);
245void invalidate_block(u_int block);
246void invalidate_addr(u_int addr);
247void remove_hash(int vaddr);
57871462 248void dyna_linker();
249void dyna_linker_ds();
250void verify_code();
251void verify_code_vm();
252void verify_code_ds();
253void cc_interrupt();
254void fp_exception();
255void fp_exception_ds();
7139f3c8 256void jump_syscall_hle();
7139f3c8 257void jump_hlecall();
1e973cb0 258void jump_intcall();
7139f3c8 259void new_dyna_leave();
57871462 260
57871462 261// Needed by assembler
e2b5e7aa 262static void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32);
263static void wb_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty);
264static void wb_needed_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr);
265static void load_all_regs(signed char i_regmap[]);
266static void load_needed_regs(signed char i_regmap[],signed char next_regmap[]);
267static void load_regs_entry(int t);
268static void load_all_consts(signed char regmap[],int is32,u_int dirty,int i);
269
270static int verify_dirty(u_int *ptr);
271static int get_final_value(int hr, int i, int *value);
272static void add_stub(int type,int addr,int retaddr,int a,int b,int c,int d,int e);
273static void add_to_linker(int addr,int target,int ext);
57871462 274
e2b5e7aa 275static int tracedebug=0;
57871462 276
d148d265 277static void mprotect_w_x(void *start, void *end, int is_x)
278{
279#ifdef NO_WRITE_EXEC
1e212a25 280 #if defined(VITA)
281 // *Open* enables write on all memory that was
282 // allocated by sceKernelAllocMemBlockForVM()?
283 if (is_x)
284 sceKernelCloseVMDomain();
285 else
286 sceKernelOpenVMDomain();
287 #else
d148d265 288 u_long mstart = (u_long)start & ~4095ul;
289 u_long mend = (u_long)end;
290 if (mprotect((void *)mstart, mend - mstart,
291 PROT_READ | (is_x ? PROT_EXEC : PROT_WRITE)) != 0)
292 SysPrintf("mprotect(%c) failed: %s\n", is_x ? 'x' : 'w', strerror(errno));
1e212a25 293 #endif
d148d265 294#endif
295}
296
297static void start_tcache_write(void *start, void *end)
298{
299 mprotect_w_x(start, end, 0);
300}
301
302static void end_tcache_write(void *start, void *end)
303{
304#ifdef __arm__
305 size_t len = (char *)end - (char *)start;
306 #if defined(__BLACKBERRY_QNX__)
307 msync(start, len, MS_SYNC | MS_CACHE_ONLY | MS_INVALIDATE_ICACHE);
308 #elif defined(__MACH__)
309 sys_cache_control(kCacheFunctionPrepareForExecution, start, len);
310 #elif defined(VITA)
1e212a25 311 sceKernelSyncVMDomain(sceBlock, start, len);
312 #elif defined(_3DS)
313 ctr_flush_invalidate_cache();
d148d265 314 #else
315 __clear_cache(start, end);
316 #endif
317 (void)len;
318#endif
319
320 mprotect_w_x(start, end, 1);
321}
322
323static void *start_block(void)
324{
325 u_char *end = out + MAX_OUTPUT_BLOCK_SIZE;
326 if (end > (u_char *)BASE_ADDR + (1<<TARGET_SIZE_2))
327 end = (u_char *)BASE_ADDR + (1<<TARGET_SIZE_2);
328 start_tcache_write(out, end);
329 return out;
330}
331
332static void end_block(void *start)
333{
334 end_tcache_write(start, out);
335}
336
57871462 337//#define DEBUG_CYCLE_COUNT 1
338
b6e87b2b 339#define NO_CYCLE_PENALTY_THR 12
340
4e9dcd7f 341int cycle_multiplier; // 100 for 1.0
342
343static int CLOCK_ADJUST(int x)
344{
345 int s=(x>>31)|1;
346 return (x * cycle_multiplier + s * 50) / 100;
347}
348
94d23bb9 349static u_int get_page(u_int vaddr)
57871462 350{
0ce47d46 351 u_int page=vaddr&~0xe0000000;
352 if (page < 0x1000000)
353 page &= ~0x0e00000; // RAM mirrors
354 page>>=12;
57871462 355 if(page>2048) page=2048+(page&2047);
94d23bb9 356 return page;
357}
358
d25604ca 359// no virtual mem in PCSX
360static u_int get_vpage(u_int vaddr)
361{
362 return get_page(vaddr);
363}
94d23bb9 364
365// Get address from virtual address
366// This is called from the recompiled JR/JALR instructions
367void *get_addr(u_int vaddr)
368{
369 u_int page=get_page(vaddr);
370 u_int vpage=get_vpage(vaddr);
57871462 371 struct ll_entry *head;
372 //printf("TRACE: count=%d next=%d (get_addr %x,page %d)\n",Count,next_interupt,vaddr,page);
373 head=jump_in[page];
374 while(head!=NULL) {
de5a60c3 375 if(head->vaddr==vaddr) {
57871462 376 //printf("TRACE: count=%d next=%d (get_addr match %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr);
581335b0 377 u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
57871462 378 ht_bin[3]=ht_bin[1];
379 ht_bin[2]=ht_bin[0];
581335b0 380 ht_bin[1]=(u_int)head->addr;
57871462 381 ht_bin[0]=vaddr;
382 return head->addr;
383 }
384 head=head->next;
385 }
386 head=jump_dirty[vpage];
387 while(head!=NULL) {
de5a60c3 388 if(head->vaddr==vaddr) {
57871462 389 //printf("TRACE: count=%d next=%d (get_addr match dirty %x: %x)\n",Count,next_interupt,vaddr,(int)head->addr);
390 // Don't restore blocks which are about to expire from the cache
391 if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
392 if(verify_dirty(head->addr)) {
393 //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]);
394 invalid_code[vaddr>>12]=0;
9be4ba64 395 inv_code_start=inv_code_end=~0;
57871462 396 if(vpage<2048) {
57871462 397 restore_candidate[vpage>>3]|=1<<(vpage&7);
398 }
399 else restore_candidate[page>>3]|=1<<(page&7);
581335b0 400 u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
57871462 401 if(ht_bin[0]==vaddr) {
581335b0 402 ht_bin[1]=(u_int)head->addr; // Replace existing entry
57871462 403 }
404 else
405 {
406 ht_bin[3]=ht_bin[1];
407 ht_bin[2]=ht_bin[0];
408 ht_bin[1]=(int)head->addr;
409 ht_bin[0]=vaddr;
410 }
411 return head->addr;
412 }
413 }
414 head=head->next;
415 }
416 //printf("TRACE: count=%d next=%d (get_addr no-match %x)\n",Count,next_interupt,vaddr);
417 int r=new_recompile_block(vaddr);
418 if(r==0) return get_addr(vaddr);
419 // Execute in unmapped page, generate pagefault execption
420 Status|=2;
421 Cause=(vaddr<<31)|0x8;
422 EPC=(vaddr&1)?vaddr-5:vaddr;
423 BadVAddr=(vaddr&~1);
424 Context=(Context&0xFF80000F)|((BadVAddr>>9)&0x007FFFF0);
425 EntryHi=BadVAddr&0xFFFFE000;
426 return get_addr_ht(0x80000000);
427}
428// Look up address in hash table first
429void *get_addr_ht(u_int vaddr)
430{
431 //printf("TRACE: count=%d next=%d (get_addr_ht %x)\n",Count,next_interupt,vaddr);
581335b0 432 u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
57871462 433 if(ht_bin[0]==vaddr) return (void *)ht_bin[1];
434 if(ht_bin[2]==vaddr) return (void *)ht_bin[3];
435 return get_addr(vaddr);
436}
437
57871462 438void clear_all_regs(signed char regmap[])
439{
440 int hr;
441 for (hr=0;hr<HOST_REGS;hr++) regmap[hr]=-1;
442}
443
444signed char get_reg(signed char regmap[],int r)
445{
446 int hr;
447 for (hr=0;hr<HOST_REGS;hr++) if(hr!=EXCLUDE_REG&&regmap[hr]==r) return hr;
448 return -1;
449}
450
451// Find a register that is available for two consecutive cycles
452signed char get_reg2(signed char regmap1[],signed char regmap2[],int r)
453{
454 int hr;
455 for (hr=0;hr<HOST_REGS;hr++) if(hr!=EXCLUDE_REG&&regmap1[hr]==r&&regmap2[hr]==r) return hr;
456 return -1;
457}
458
459int count_free_regs(signed char regmap[])
460{
461 int count=0;
462 int hr;
463 for(hr=0;hr<HOST_REGS;hr++)
464 {
465 if(hr!=EXCLUDE_REG) {
466 if(regmap[hr]<0) count++;
467 }
468 }
469 return count;
470}
471
472void dirty_reg(struct regstat *cur,signed char reg)
473{
474 int hr;
475 if(!reg) return;
476 for (hr=0;hr<HOST_REGS;hr++) {
477 if((cur->regmap[hr]&63)==reg) {
478 cur->dirty|=1<<hr;
479 }
480 }
481}
482
483// If we dirty the lower half of a 64 bit register which is now being
484// sign-extended, we need to dump the upper half.
485// Note: Do this only after completion of the instruction, because
486// some instructions may need to read the full 64-bit value even if
487// overwriting it (eg SLTI, DSRA32).
488static void flush_dirty_uppers(struct regstat *cur)
489{
490 int hr,reg;
491 for (hr=0;hr<HOST_REGS;hr++) {
492 if((cur->dirty>>hr)&1) {
493 reg=cur->regmap[hr];
9f51b4b9 494 if(reg>=64)
57871462 495 if((cur->is32>>(reg&63))&1) cur->regmap[hr]=-1;
496 }
497 }
498}
499
500void set_const(struct regstat *cur,signed char reg,uint64_t value)
501{
502 int hr;
503 if(!reg) return;
504 for (hr=0;hr<HOST_REGS;hr++) {
505 if(cur->regmap[hr]==reg) {
506 cur->isconst|=1<<hr;
956f3129 507 current_constmap[hr]=value;
57871462 508 }
509 else if((cur->regmap[hr]^64)==reg) {
510 cur->isconst|=1<<hr;
956f3129 511 current_constmap[hr]=value>>32;
57871462 512 }
513 }
514}
515
516void clear_const(struct regstat *cur,signed char reg)
517{
518 int hr;
519 if(!reg) return;
520 for (hr=0;hr<HOST_REGS;hr++) {
521 if((cur->regmap[hr]&63)==reg) {
522 cur->isconst&=~(1<<hr);
523 }
524 }
525}
526
527int is_const(struct regstat *cur,signed char reg)
528{
529 int hr;
79c75f1b 530 if(reg<0) return 0;
57871462 531 if(!reg) return 1;
532 for (hr=0;hr<HOST_REGS;hr++) {
533 if((cur->regmap[hr]&63)==reg) {
534 return (cur->isconst>>hr)&1;
535 }
536 }
537 return 0;
538}
539uint64_t get_const(struct regstat *cur,signed char reg)
540{
541 int hr;
542 if(!reg) return 0;
543 for (hr=0;hr<HOST_REGS;hr++) {
544 if(cur->regmap[hr]==reg) {
956f3129 545 return current_constmap[hr];
57871462 546 }
547 }
c43b5311 548 SysPrintf("Unknown constant in r%d\n",reg);
57871462 549 exit(1);
550}
551
552// Least soon needed registers
553// Look at the next ten instructions and see which registers
554// will be used. Try not to reallocate these.
555void lsn(u_char hsn[], int i, int *preferred_reg)
556{
557 int j;
558 int b=-1;
559 for(j=0;j<9;j++)
560 {
561 if(i+j>=slen) {
562 j=slen-i-1;
563 break;
564 }
565 if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
566 {
567 // Don't go past an unconditonal jump
568 j++;
569 break;
570 }
571 }
572 for(;j>=0;j--)
573 {
574 if(rs1[i+j]) hsn[rs1[i+j]]=j;
575 if(rs2[i+j]) hsn[rs2[i+j]]=j;
576 if(rt1[i+j]) hsn[rt1[i+j]]=j;
577 if(rt2[i+j]) hsn[rt2[i+j]]=j;
578 if(itype[i+j]==STORE || itype[i+j]==STORELR) {
579 // Stores can allocate zero
580 hsn[rs1[i+j]]=j;
581 hsn[rs2[i+j]]=j;
582 }
583 // On some architectures stores need invc_ptr
584 #if defined(HOST_IMM8)
b9b61529 585 if(itype[i+j]==STORE || itype[i+j]==STORELR || (opcode[i+j]&0x3b)==0x39 || (opcode[i+j]&0x3b)==0x3a) {
57871462 586 hsn[INVCP]=j;
587 }
588 #endif
589 if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP||itype[i+j]==FJUMP))
590 {
591 hsn[CCREG]=j;
592 b=j;
593 }
594 }
595 if(b>=0)
596 {
597 if(ba[i+b]>=start && ba[i+b]<(start+slen*4))
598 {
599 // Follow first branch
600 int t=(ba[i+b]-start)>>2;
601 j=7-b;if(t+j>=slen) j=slen-t-1;
602 for(;j>=0;j--)
603 {
604 if(rs1[t+j]) if(hsn[rs1[t+j]]>j+b+2) hsn[rs1[t+j]]=j+b+2;
605 if(rs2[t+j]) if(hsn[rs2[t+j]]>j+b+2) hsn[rs2[t+j]]=j+b+2;
606 //if(rt1[t+j]) if(hsn[rt1[t+j]]>j+b+2) hsn[rt1[t+j]]=j+b+2;
607 //if(rt2[t+j]) if(hsn[rt2[t+j]]>j+b+2) hsn[rt2[t+j]]=j+b+2;
608 }
609 }
610 // TODO: preferred register based on backward branch
611 }
612 // Delay slot should preferably not overwrite branch conditions or cycle count
613 if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP)) {
614 if(rs1[i-1]) if(hsn[rs1[i-1]]>1) hsn[rs1[i-1]]=1;
615 if(rs2[i-1]) if(hsn[rs2[i-1]]>1) hsn[rs2[i-1]]=1;
616 hsn[CCREG]=1;
617 // ...or hash tables
618 hsn[RHASH]=1;
619 hsn[RHTBL]=1;
620 }
621 // Coprocessor load/store needs FTEMP, even if not declared
b9b61529 622 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 623 hsn[FTEMP]=0;
624 }
625 // Load L/R also uses FTEMP as a temporary register
626 if(itype[i]==LOADLR) {
627 hsn[FTEMP]=0;
628 }
b7918751 629 // Also SWL/SWR/SDL/SDR
630 if(opcode[i]==0x2a||opcode[i]==0x2e||opcode[i]==0x2c||opcode[i]==0x2d) {
57871462 631 hsn[FTEMP]=0;
632 }
57871462 633 // Don't remove the miniht registers
634 if(itype[i]==UJUMP||itype[i]==RJUMP)
635 {
636 hsn[RHASH]=0;
637 hsn[RHTBL]=0;
638 }
639}
640
641// We only want to allocate registers if we're going to use them again soon
642int needed_again(int r, int i)
643{
644 int j;
645 int b=-1;
646 int rn=10;
9f51b4b9 647
57871462 648 if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000))
649 {
650 if(ba[i-1]<start || ba[i-1]>start+slen*4-4)
651 return 0; // Don't need any registers if exiting the block
652 }
653 for(j=0;j<9;j++)
654 {
655 if(i+j>=slen) {
656 j=slen-i-1;
657 break;
658 }
659 if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
660 {
661 // Don't go past an unconditonal jump
662 j++;
663 break;
664 }
1e973cb0 665 if(itype[i+j]==SYSCALL||itype[i+j]==HLECALL||itype[i+j]==INTCALL||((source[i+j]&0xfc00003f)==0x0d))
57871462 666 {
667 break;
668 }
669 }
670 for(;j>=1;j--)
671 {
672 if(rs1[i+j]==r) rn=j;
673 if(rs2[i+j]==r) rn=j;
674 if((unneeded_reg[i+j]>>r)&1) rn=10;
675 if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP||itype[i+j]==FJUMP))
676 {
677 b=j;
678 }
679 }
680 /*
681 if(b>=0)
682 {
683 if(ba[i+b]>=start && ba[i+b]<(start+slen*4))
684 {
685 // Follow first branch
686 int o=rn;
687 int t=(ba[i+b]-start)>>2;
688 j=7-b;if(t+j>=slen) j=slen-t-1;
689 for(;j>=0;j--)
690 {
691 if(!((unneeded_reg[t+j]>>r)&1)) {
692 if(rs1[t+j]==r) if(rn>j+b+2) rn=j+b+2;
693 if(rs2[t+j]==r) if(rn>j+b+2) rn=j+b+2;
694 }
695 else rn=o;
696 }
697 }
698 }*/
b7217e13 699 if(rn<10) return 1;
581335b0 700 (void)b;
57871462 701 return 0;
702}
703
704// Try to match register allocations at the end of a loop with those
705// at the beginning
706int loop_reg(int i, int r, int hr)
707{
708 int j,k;
709 for(j=0;j<9;j++)
710 {
711 if(i+j>=slen) {
712 j=slen-i-1;
713 break;
714 }
715 if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
716 {
717 // Don't go past an unconditonal jump
718 j++;
719 break;
720 }
721 }
722 k=0;
723 if(i>0){
724 if(itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP)
725 k--;
726 }
727 for(;k<j;k++)
728 {
729 if(r<64&&((unneeded_reg[i+k]>>r)&1)) return hr;
730 if(r>64&&((unneeded_reg_upper[i+k]>>r)&1)) return hr;
731 if(i+k>=0&&(itype[i+k]==UJUMP||itype[i+k]==CJUMP||itype[i+k]==SJUMP||itype[i+k]==FJUMP))
732 {
733 if(ba[i+k]>=start && ba[i+k]<(start+i*4))
734 {
735 int t=(ba[i+k]-start)>>2;
736 int reg=get_reg(regs[t].regmap_entry,r);
737 if(reg>=0) return reg;
738 //reg=get_reg(regs[t+1].regmap_entry,r);
739 //if(reg>=0) return reg;
740 }
741 }
742 }
743 return hr;
744}
745
746
747// Allocate every register, preserving source/target regs
748void alloc_all(struct regstat *cur,int i)
749{
750 int hr;
9f51b4b9 751
57871462 752 for(hr=0;hr<HOST_REGS;hr++) {
753 if(hr!=EXCLUDE_REG) {
754 if(((cur->regmap[hr]&63)!=rs1[i])&&((cur->regmap[hr]&63)!=rs2[i])&&
755 ((cur->regmap[hr]&63)!=rt1[i])&&((cur->regmap[hr]&63)!=rt2[i]))
756 {
757 cur->regmap[hr]=-1;
758 cur->dirty&=~(1<<hr);
759 }
760 // Don't need zeros
761 if((cur->regmap[hr]&63)==0)
762 {
763 cur->regmap[hr]=-1;
764 cur->dirty&=~(1<<hr);
765 }
766 }
767 }
768}
769
57871462 770#ifdef __i386__
d404093f 771#include "x86/assem_x86.c"
57871462 772#endif
773#ifdef __x86_64__
d404093f 774#include "x64/assem_x64.c"
57871462 775#endif
776#ifdef __arm__
6f173b35 777#include "arm/assem_arm.c"
57871462 778#endif
779
780// Add virtual address mapping to linked list
781void ll_add(struct ll_entry **head,int vaddr,void *addr)
782{
783 struct ll_entry *new_entry;
784 new_entry=malloc(sizeof(struct ll_entry));
785 assert(new_entry!=NULL);
786 new_entry->vaddr=vaddr;
de5a60c3 787 new_entry->reg_sv_flags=0;
57871462 788 new_entry->addr=addr;
789 new_entry->next=*head;
790 *head=new_entry;
791}
792
de5a60c3 793void ll_add_flags(struct ll_entry **head,int vaddr,u_int reg_sv_flags,void *addr)
57871462 794{
7139f3c8 795 ll_add(head,vaddr,addr);
de5a60c3 796 (*head)->reg_sv_flags=reg_sv_flags;
57871462 797}
798
799// Check if an address is already compiled
800// but don't return addresses which are about to expire from the cache
801void *check_addr(u_int vaddr)
802{
803 u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
804 if(ht_bin[0]==vaddr) {
805 if(((ht_bin[1]-MAX_OUTPUT_BLOCK_SIZE-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
806 if(isclean(ht_bin[1])) return (void *)ht_bin[1];
807 }
808 if(ht_bin[2]==vaddr) {
809 if(((ht_bin[3]-MAX_OUTPUT_BLOCK_SIZE-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2)))
810 if(isclean(ht_bin[3])) return (void *)ht_bin[3];
811 }
94d23bb9 812 u_int page=get_page(vaddr);
57871462 813 struct ll_entry *head;
814 head=jump_in[page];
815 while(head!=NULL) {
de5a60c3 816 if(head->vaddr==vaddr) {
57871462 817 if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) {
818 // Update existing entry with current address
819 if(ht_bin[0]==vaddr) {
820 ht_bin[1]=(int)head->addr;
821 return head->addr;
822 }
823 if(ht_bin[2]==vaddr) {
824 ht_bin[3]=(int)head->addr;
825 return head->addr;
826 }
827 // Insert into hash table with low priority.
828 // Don't evict existing entries, as they are probably
829 // addresses that are being accessed frequently.
830 if(ht_bin[0]==-1) {
831 ht_bin[1]=(int)head->addr;
832 ht_bin[0]=vaddr;
833 }else if(ht_bin[2]==-1) {
834 ht_bin[3]=(int)head->addr;
835 ht_bin[2]=vaddr;
836 }
837 return head->addr;
838 }
839 }
840 head=head->next;
841 }
842 return 0;
843}
844
845void remove_hash(int vaddr)
846{
847 //printf("remove hash: %x\n",vaddr);
581335b0 848 u_int *ht_bin=hash_table[(((vaddr)>>16)^vaddr)&0xFFFF];
57871462 849 if(ht_bin[2]==vaddr) {
850 ht_bin[2]=ht_bin[3]=-1;
851 }
852 if(ht_bin[0]==vaddr) {
853 ht_bin[0]=ht_bin[2];
854 ht_bin[1]=ht_bin[3];
855 ht_bin[2]=ht_bin[3]=-1;
856 }
857}
858
859void ll_remove_matching_addrs(struct ll_entry **head,int addr,int shift)
860{
861 struct ll_entry *next;
862 while(*head) {
9f51b4b9 863 if(((u_int)((*head)->addr)>>shift)==(addr>>shift) ||
57871462 864 ((u_int)((*head)->addr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift))
865 {
866 inv_debug("EXP: Remove pointer to %x (%x)\n",(int)(*head)->addr,(*head)->vaddr);
867 remove_hash((*head)->vaddr);
868 next=(*head)->next;
869 free(*head);
870 *head=next;
871 }
872 else
873 {
874 head=&((*head)->next);
875 }
876 }
877}
878
879// Remove all entries from linked list
880void ll_clear(struct ll_entry **head)
881{
882 struct ll_entry *cur;
883 struct ll_entry *next;
581335b0 884 if((cur=*head)) {
57871462 885 *head=0;
886 while(cur) {
887 next=cur->next;
888 free(cur);
889 cur=next;
890 }
891 }
892}
893
894// Dereference the pointers and remove if it matches
d148d265 895static void ll_kill_pointers(struct ll_entry *head,int addr,int shift)
57871462 896{
897 while(head) {
898 int ptr=get_pointer(head->addr);
899 inv_debug("EXP: Lookup pointer to %x at %x (%x)\n",(int)ptr,(int)head->addr,head->vaddr);
900 if(((ptr>>shift)==(addr>>shift)) ||
901 (((ptr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift)))
902 {
5088bb70 903 inv_debug("EXP: Kill pointer at %x (%x)\n",(int)head->addr,head->vaddr);
d148d265 904 void *host_addr=find_extjump_insn(head->addr);
dd3a91a1 905 #ifdef __arm__
d148d265 906 mark_clear_cache(host_addr);
dd3a91a1 907 #endif
d148d265 908 set_jump_target((int)host_addr,(int)head->addr);
57871462 909 }
910 head=head->next;
911 }
912}
913
914// This is called when we write to a compiled block (see do_invstub)
f76eeef9 915void invalidate_page(u_int page)
57871462 916{
57871462 917 struct ll_entry *head;
918 struct ll_entry *next;
919 head=jump_in[page];
920 jump_in[page]=0;
921 while(head!=NULL) {
922 inv_debug("INVALIDATE: %x\n",head->vaddr);
923 remove_hash(head->vaddr);
924 next=head->next;
925 free(head);
926 head=next;
927 }
928 head=jump_out[page];
929 jump_out[page]=0;
930 while(head!=NULL) {
931 inv_debug("INVALIDATE: kill pointer to %x (%x)\n",head->vaddr,(int)head->addr);
d148d265 932 void *host_addr=find_extjump_insn(head->addr);
dd3a91a1 933 #ifdef __arm__
d148d265 934 mark_clear_cache(host_addr);
dd3a91a1 935 #endif
d148d265 936 set_jump_target((int)host_addr,(int)head->addr);
57871462 937 next=head->next;
938 free(head);
939 head=next;
940 }
57871462 941}
9be4ba64 942
943static void invalidate_block_range(u_int block, u_int first, u_int last)
57871462 944{
94d23bb9 945 u_int page=get_page(block<<12);
57871462 946 //printf("first=%d last=%d\n",first,last);
f76eeef9 947 invalidate_page(page);
57871462 948 assert(first+5>page); // NB: this assumes MAXBLOCK<=4096 (4 pages)
949 assert(last<page+5);
950 // Invalidate the adjacent pages if a block crosses a 4K boundary
951 while(first<page) {
952 invalidate_page(first);
953 first++;
954 }
955 for(first=page+1;first<last;first++) {
956 invalidate_page(first);
957 }
dd3a91a1 958 #ifdef __arm__
959 do_clear_cache();
960 #endif
9f51b4b9 961
57871462 962 // Don't trap writes
963 invalid_code[block]=1;
f76eeef9 964
57871462 965 #ifdef USE_MINI_HT
966 memset(mini_ht,-1,sizeof(mini_ht));
967 #endif
968}
9be4ba64 969
970void invalidate_block(u_int block)
971{
972 u_int page=get_page(block<<12);
973 u_int vpage=get_vpage(block<<12);
974 inv_debug("INVALIDATE: %x (%d)\n",block<<12,page);
975 //inv_debug("invalid_code[block]=%d\n",invalid_code[block]);
976 u_int first,last;
977 first=last=page;
978 struct ll_entry *head;
979 head=jump_dirty[vpage];
980 //printf("page=%d vpage=%d\n",page,vpage);
981 while(head!=NULL) {
982 u_int start,end;
983 if(vpage>2047||(head->vaddr>>12)==block) { // Ignore vaddr hash collision
984 get_bounds((int)head->addr,&start,&end);
985 //printf("start: %x end: %x\n",start,end);
4a35de07 986 if(page<2048&&start>=(u_int)rdram&&end<(u_int)rdram+RAM_SIZE) {
9be4ba64 987 if(((start-(u_int)rdram)>>12)<=page&&((end-1-(u_int)rdram)>>12)>=page) {
988 if((((start-(u_int)rdram)>>12)&2047)<first) first=((start-(u_int)rdram)>>12)&2047;
989 if((((end-1-(u_int)rdram)>>12)&2047)>last) last=((end-1-(u_int)rdram)>>12)&2047;
990 }
991 }
9be4ba64 992 }
993 head=head->next;
994 }
995 invalidate_block_range(block,first,last);
996}
997
57871462 998void invalidate_addr(u_int addr)
999{
9be4ba64 1000 //static int rhits;
1001 // this check is done by the caller
1002 //if (inv_code_start<=addr&&addr<=inv_code_end) { rhits++; return; }
d25604ca 1003 u_int page=get_vpage(addr);
9be4ba64 1004 if(page<2048) { // RAM
1005 struct ll_entry *head;
1006 u_int addr_min=~0, addr_max=0;
4a35de07 1007 u_int mask=RAM_SIZE-1;
1008 u_int addr_main=0x80000000|(addr&mask);
9be4ba64 1009 int pg1;
4a35de07 1010 inv_code_start=addr_main&~0xfff;
1011 inv_code_end=addr_main|0xfff;
9be4ba64 1012 pg1=page;
1013 if (pg1>0) {
1014 // must check previous page too because of spans..
1015 pg1--;
1016 inv_code_start-=0x1000;
1017 }
1018 for(;pg1<=page;pg1++) {
1019 for(head=jump_dirty[pg1];head!=NULL;head=head->next) {
1020 u_int start,end;
1021 get_bounds((int)head->addr,&start,&end);
4a35de07 1022 if(ram_offset) {
1023 start-=ram_offset;
1024 end-=ram_offset;
1025 }
1026 if(start<=addr_main&&addr_main<end) {
9be4ba64 1027 if(start<addr_min) addr_min=start;
1028 if(end>addr_max) addr_max=end;
1029 }
4a35de07 1030 else if(addr_main<start) {
9be4ba64 1031 if(start<inv_code_end)
1032 inv_code_end=start-1;
1033 }
1034 else {
1035 if(end>inv_code_start)
1036 inv_code_start=end;
1037 }
1038 }
1039 }
1040 if (addr_min!=~0) {
1041 inv_debug("INV ADDR: %08x hit %08x-%08x\n", addr, addr_min, addr_max);
1042 inv_code_start=inv_code_end=~0;
1043 invalidate_block_range(addr>>12,(addr_min&mask)>>12,(addr_max&mask)>>12);
1044 return;
1045 }
1046 else {
4a35de07 1047 inv_code_start=(addr&~mask)|(inv_code_start&mask);
1048 inv_code_end=(addr&~mask)|(inv_code_end&mask);
d25604ca 1049 inv_debug("INV ADDR: %08x miss, inv %08x-%08x, sk %d\n", addr, inv_code_start, inv_code_end, 0);
9be4ba64 1050 return;
d25604ca 1051 }
9be4ba64 1052 }
57871462 1053 invalidate_block(addr>>12);
1054}
9be4ba64 1055
dd3a91a1 1056// This is called when loading a save state.
1057// Anything could have changed, so invalidate everything.
57871462 1058void invalidate_all_pages()
1059{
581335b0 1060 u_int page;
57871462 1061 for(page=0;page<4096;page++)
1062 invalidate_page(page);
1063 for(page=0;page<1048576;page++)
1064 if(!invalid_code[page]) {
1065 restore_candidate[(page&2047)>>3]|=1<<(page&7);
1066 restore_candidate[((page&2047)>>3)+256]|=1<<(page&7);
1067 }
57871462 1068 #ifdef USE_MINI_HT
1069 memset(mini_ht,-1,sizeof(mini_ht));
1070 #endif
57871462 1071}
1072
1073// Add an entry to jump_out after making a link
1074void add_link(u_int vaddr,void *src)
1075{
94d23bb9 1076 u_int page=get_page(vaddr);
57871462 1077 inv_debug("add_link: %x -> %x (%d)\n",(int)src,vaddr,page);
76f71c27 1078 int *ptr=(int *)(src+4);
1079 assert((*ptr&0x0fff0000)==0x059f0000);
581335b0 1080 (void)ptr;
57871462 1081 ll_add(jump_out+page,vaddr,src);
1082 //int ptr=get_pointer(src);
1083 //inv_debug("add_link: Pointer is to %x\n",(int)ptr);
1084}
1085
1086// If a code block was found to be unmodified (bit was set in
1087// restore_candidate) and it remains unmodified (bit is clear
1088// in invalid_code) then move the entries for that 4K page from
1089// the dirty list to the clean list.
1090void clean_blocks(u_int page)
1091{
1092 struct ll_entry *head;
1093 inv_debug("INV: clean_blocks page=%d\n",page);
1094 head=jump_dirty[page];
1095 while(head!=NULL) {
1096 if(!invalid_code[head->vaddr>>12]) {
1097 // Don't restore blocks which are about to expire from the cache
1098 if((((u_int)head->addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) {
1099 u_int start,end;
581335b0 1100 if(verify_dirty(head->addr)) {
57871462 1101 //printf("Possibly Restore %x (%x)\n",head->vaddr, (int)head->addr);
1102 u_int i;
1103 u_int inv=0;
1104 get_bounds((int)head->addr,&start,&end);
4cb76aa4 1105 if(start-(u_int)rdram<RAM_SIZE) {
57871462 1106 for(i=(start-(u_int)rdram+0x80000000)>>12;i<=(end-1-(u_int)rdram+0x80000000)>>12;i++) {
1107 inv|=invalid_code[i];
1108 }
1109 }
4cb76aa4 1110 else if((signed int)head->vaddr>=(signed int)0x80000000+RAM_SIZE) {
57871462 1111 inv=1;
1112 }
1113 if(!inv) {
1114 void * clean_addr=(void *)get_clean_addr((int)head->addr);
1115 if((((u_int)clean_addr-(u_int)out)<<(32-TARGET_SIZE_2))>0x60000000+(MAX_OUTPUT_BLOCK_SIZE<<(32-TARGET_SIZE_2))) {
1116 u_int ppage=page;
57871462 1117 inv_debug("INV: Restored %x (%x/%x)\n",head->vaddr, (int)head->addr, (int)clean_addr);
1118 //printf("page=%x, addr=%x\n",page,head->vaddr);
1119 //assert(head->vaddr>>12==(page|0x80000));
de5a60c3 1120 ll_add_flags(jump_in+ppage,head->vaddr,head->reg_sv_flags,clean_addr);
581335b0 1121 u_int *ht_bin=hash_table[((head->vaddr>>16)^head->vaddr)&0xFFFF];
de5a60c3 1122 if(ht_bin[0]==head->vaddr) {
581335b0 1123 ht_bin[1]=(u_int)clean_addr; // Replace existing entry
de5a60c3 1124 }
1125 if(ht_bin[2]==head->vaddr) {
581335b0 1126 ht_bin[3]=(u_int)clean_addr; // Replace existing entry
57871462 1127 }
1128 }
1129 }
1130 }
1131 }
1132 }
1133 head=head->next;
1134 }
1135}
1136
1137
1138void mov_alloc(struct regstat *current,int i)
1139{
1140 // Note: Don't need to actually alloc the source registers
1141 if((~current->is32>>rs1[i])&1) {
1142 //alloc_reg64(current,i,rs1[i]);
1143 alloc_reg64(current,i,rt1[i]);
1144 current->is32&=~(1LL<<rt1[i]);
1145 } else {
1146 //alloc_reg(current,i,rs1[i]);
1147 alloc_reg(current,i,rt1[i]);
1148 current->is32|=(1LL<<rt1[i]);
1149 }
1150 clear_const(current,rs1[i]);
1151 clear_const(current,rt1[i]);
1152 dirty_reg(current,rt1[i]);
1153}
1154
1155void shiftimm_alloc(struct regstat *current,int i)
1156{
57871462 1157 if(opcode2[i]<=0x3) // SLL/SRL/SRA
1158 {
1159 if(rt1[i]) {
1160 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1161 else lt1[i]=rs1[i];
1162 alloc_reg(current,i,rt1[i]);
1163 current->is32|=1LL<<rt1[i];
1164 dirty_reg(current,rt1[i]);
dc49e339 1165 if(is_const(current,rs1[i])) {
1166 int v=get_const(current,rs1[i]);
1167 if(opcode2[i]==0x00) set_const(current,rt1[i],v<<imm[i]);
1168 if(opcode2[i]==0x02) set_const(current,rt1[i],(u_int)v>>imm[i]);
1169 if(opcode2[i]==0x03) set_const(current,rt1[i],v>>imm[i]);
1170 }
1171 else clear_const(current,rt1[i]);
57871462 1172 }
1173 }
dc49e339 1174 else
1175 {
1176 clear_const(current,rs1[i]);
1177 clear_const(current,rt1[i]);
1178 }
1179
57871462 1180 if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA
1181 {
1182 if(rt1[i]) {
1183 if(rs1[i]) alloc_reg64(current,i,rs1[i]);
1184 alloc_reg64(current,i,rt1[i]);
1185 current->is32&=~(1LL<<rt1[i]);
1186 dirty_reg(current,rt1[i]);
1187 }
1188 }
1189 if(opcode2[i]==0x3c) // DSLL32
1190 {
1191 if(rt1[i]) {
1192 if(rs1[i]) alloc_reg(current,i,rs1[i]);
1193 alloc_reg64(current,i,rt1[i]);
1194 current->is32&=~(1LL<<rt1[i]);
1195 dirty_reg(current,rt1[i]);
1196 }
1197 }
1198 if(opcode2[i]==0x3e) // DSRL32
1199 {
1200 if(rt1[i]) {
1201 alloc_reg64(current,i,rs1[i]);
1202 if(imm[i]==32) {
1203 alloc_reg64(current,i,rt1[i]);
1204 current->is32&=~(1LL<<rt1[i]);
1205 } else {
1206 alloc_reg(current,i,rt1[i]);
1207 current->is32|=1LL<<rt1[i];
1208 }
1209 dirty_reg(current,rt1[i]);
1210 }
1211 }
1212 if(opcode2[i]==0x3f) // DSRA32
1213 {
1214 if(rt1[i]) {
1215 alloc_reg64(current,i,rs1[i]);
1216 alloc_reg(current,i,rt1[i]);
1217 current->is32|=1LL<<rt1[i];
1218 dirty_reg(current,rt1[i]);
1219 }
1220 }
1221}
1222
1223void shift_alloc(struct regstat *current,int i)
1224{
1225 if(rt1[i]) {
1226 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
1227 {
1228 if(rs1[i]) alloc_reg(current,i,rs1[i]);
1229 if(rs2[i]) alloc_reg(current,i,rs2[i]);
1230 alloc_reg(current,i,rt1[i]);
e1190b87 1231 if(rt1[i]==rs2[i]) {
1232 alloc_reg_temp(current,i,-1);
1233 minimum_free_regs[i]=1;
1234 }
57871462 1235 current->is32|=1LL<<rt1[i];
1236 } else { // DSLLV/DSRLV/DSRAV
1237 if(rs1[i]) alloc_reg64(current,i,rs1[i]);
1238 if(rs2[i]) alloc_reg(current,i,rs2[i]);
1239 alloc_reg64(current,i,rt1[i]);
1240 current->is32&=~(1LL<<rt1[i]);
1241 if(opcode2[i]==0x16||opcode2[i]==0x17) // DSRLV and DSRAV need a temporary register
e1190b87 1242 {
57871462 1243 alloc_reg_temp(current,i,-1);
e1190b87 1244 minimum_free_regs[i]=1;
1245 }
57871462 1246 }
1247 clear_const(current,rs1[i]);
1248 clear_const(current,rs2[i]);
1249 clear_const(current,rt1[i]);
1250 dirty_reg(current,rt1[i]);
1251 }
1252}
1253
1254void alu_alloc(struct regstat *current,int i)
1255{
1256 if(opcode2[i]>=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU
1257 if(rt1[i]) {
1258 if(rs1[i]&&rs2[i]) {
1259 alloc_reg(current,i,rs1[i]);
1260 alloc_reg(current,i,rs2[i]);
1261 }
1262 else {
1263 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1264 if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg(current,i,rs2[i]);
1265 }
1266 alloc_reg(current,i,rt1[i]);
1267 }
1268 current->is32|=1LL<<rt1[i];
1269 }
1270 if(opcode2[i]==0x2a||opcode2[i]==0x2b) { // SLT/SLTU
1271 if(rt1[i]) {
1272 if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1))
1273 {
1274 alloc_reg64(current,i,rs1[i]);
1275 alloc_reg64(current,i,rs2[i]);
1276 alloc_reg(current,i,rt1[i]);
1277 } else {
1278 alloc_reg(current,i,rs1[i]);
1279 alloc_reg(current,i,rs2[i]);
1280 alloc_reg(current,i,rt1[i]);
1281 }
1282 }
1283 current->is32|=1LL<<rt1[i];
1284 }
1285 if(opcode2[i]>=0x24&&opcode2[i]<=0x27) { // AND/OR/XOR/NOR
1286 if(rt1[i]) {
1287 if(rs1[i]&&rs2[i]) {
1288 alloc_reg(current,i,rs1[i]);
1289 alloc_reg(current,i,rs2[i]);
1290 }
1291 else
1292 {
1293 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1294 if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg(current,i,rs2[i]);
1295 }
1296 alloc_reg(current,i,rt1[i]);
1297 if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1))
1298 {
1299 if(!((current->uu>>rt1[i])&1)) {
1300 alloc_reg64(current,i,rt1[i]);
1301 }
1302 if(get_reg(current->regmap,rt1[i]|64)>=0) {
1303 if(rs1[i]&&rs2[i]) {
1304 alloc_reg64(current,i,rs1[i]);
1305 alloc_reg64(current,i,rs2[i]);
1306 }
1307 else
1308 {
1309 // Is is really worth it to keep 64-bit values in registers?
1310 #ifdef NATIVE_64BIT
1311 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg64(current,i,rs1[i]);
1312 if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg64(current,i,rs2[i]);
1313 #endif
1314 }
1315 }
1316 current->is32&=~(1LL<<rt1[i]);
1317 } else {
1318 current->is32|=1LL<<rt1[i];
1319 }
1320 }
1321 }
1322 if(opcode2[i]>=0x2c&&opcode2[i]<=0x2f) { // DADD/DADDU/DSUB/DSUBU
1323 if(rt1[i]) {
1324 if(rs1[i]&&rs2[i]) {
1325 if(!((current->uu>>rt1[i])&1)||get_reg(current->regmap,rt1[i]|64)>=0) {
1326 alloc_reg64(current,i,rs1[i]);
1327 alloc_reg64(current,i,rs2[i]);
1328 alloc_reg64(current,i,rt1[i]);
1329 } else {
1330 alloc_reg(current,i,rs1[i]);
1331 alloc_reg(current,i,rs2[i]);
1332 alloc_reg(current,i,rt1[i]);
1333 }
1334 }
1335 else {
1336 alloc_reg(current,i,rt1[i]);
1337 if(!((current->uu>>rt1[i])&1)||get_reg(current->regmap,rt1[i]|64)>=0) {
1338 // DADD used as move, or zeroing
1339 // If we have a 64-bit source, then make the target 64 bits too
1340 if(rs1[i]&&!((current->is32>>rs1[i])&1)) {
1341 if(get_reg(current->regmap,rs1[i])>=0) alloc_reg64(current,i,rs1[i]);
1342 alloc_reg64(current,i,rt1[i]);
1343 } else if(rs2[i]&&!((current->is32>>rs2[i])&1)) {
1344 if(get_reg(current->regmap,rs2[i])>=0) alloc_reg64(current,i,rs2[i]);
1345 alloc_reg64(current,i,rt1[i]);
1346 }
1347 if(opcode2[i]>=0x2e&&rs2[i]) {
1348 // DSUB used as negation - 64-bit result
1349 // If we have a 32-bit register, extend it to 64 bits
1350 if(get_reg(current->regmap,rs2[i])>=0) alloc_reg64(current,i,rs2[i]);
1351 alloc_reg64(current,i,rt1[i]);
1352 }
1353 }
1354 }
1355 if(rs1[i]&&rs2[i]) {
1356 current->is32&=~(1LL<<rt1[i]);
1357 } else if(rs1[i]) {
1358 current->is32&=~(1LL<<rt1[i]);
1359 if((current->is32>>rs1[i])&1)
1360 current->is32|=1LL<<rt1[i];
1361 } else if(rs2[i]) {
1362 current->is32&=~(1LL<<rt1[i]);
1363 if((current->is32>>rs2[i])&1)
1364 current->is32|=1LL<<rt1[i];
1365 } else {
1366 current->is32|=1LL<<rt1[i];
1367 }
1368 }
1369 }
1370 clear_const(current,rs1[i]);
1371 clear_const(current,rs2[i]);
1372 clear_const(current,rt1[i]);
1373 dirty_reg(current,rt1[i]);
1374}
1375
1376void imm16_alloc(struct regstat *current,int i)
1377{
1378 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1379 else lt1[i]=rs1[i];
1380 if(rt1[i]) alloc_reg(current,i,rt1[i]);
1381 if(opcode[i]==0x18||opcode[i]==0x19) { // DADDI/DADDIU
1382 current->is32&=~(1LL<<rt1[i]);
1383 if(!((current->uu>>rt1[i])&1)||get_reg(current->regmap,rt1[i]|64)>=0) {
1384 // TODO: Could preserve the 32-bit flag if the immediate is zero
1385 alloc_reg64(current,i,rt1[i]);
1386 alloc_reg64(current,i,rs1[i]);
1387 }
1388 clear_const(current,rs1[i]);
1389 clear_const(current,rt1[i]);
1390 }
1391 else if(opcode[i]==0x0a||opcode[i]==0x0b) { // SLTI/SLTIU
1392 if((~current->is32>>rs1[i])&1) alloc_reg64(current,i,rs1[i]);
1393 current->is32|=1LL<<rt1[i];
1394 clear_const(current,rs1[i]);
1395 clear_const(current,rt1[i]);
1396 }
1397 else if(opcode[i]>=0x0c&&opcode[i]<=0x0e) { // ANDI/ORI/XORI
1398 if(((~current->is32>>rs1[i])&1)&&opcode[i]>0x0c) {
1399 if(rs1[i]!=rt1[i]) {
1400 if(needed_again(rs1[i],i)) alloc_reg64(current,i,rs1[i]);
1401 alloc_reg64(current,i,rt1[i]);
1402 current->is32&=~(1LL<<rt1[i]);
1403 }
1404 }
1405 else current->is32|=1LL<<rt1[i]; // ANDI clears upper bits
1406 if(is_const(current,rs1[i])) {
1407 int v=get_const(current,rs1[i]);
1408 if(opcode[i]==0x0c) set_const(current,rt1[i],v&imm[i]);
1409 if(opcode[i]==0x0d) set_const(current,rt1[i],v|imm[i]);
1410 if(opcode[i]==0x0e) set_const(current,rt1[i],v^imm[i]);
1411 }
1412 else clear_const(current,rt1[i]);
1413 }
1414 else if(opcode[i]==0x08||opcode[i]==0x09) { // ADDI/ADDIU
1415 if(is_const(current,rs1[i])) {
1416 int v=get_const(current,rs1[i]);
1417 set_const(current,rt1[i],v+imm[i]);
1418 }
1419 else clear_const(current,rt1[i]);
1420 current->is32|=1LL<<rt1[i];
1421 }
1422 else {
1423 set_const(current,rt1[i],((long long)((short)imm[i]))<<16); // LUI
1424 current->is32|=1LL<<rt1[i];
1425 }
1426 dirty_reg(current,rt1[i]);
1427}
1428
1429void load_alloc(struct regstat *current,int i)
1430{
1431 clear_const(current,rt1[i]);
1432 //if(rs1[i]!=rt1[i]&&needed_again(rs1[i],i)) clear_const(current,rs1[i]); // Does this help or hurt?
1433 if(!rs1[i]) current->u&=~1LL; // Allow allocating r0 if it's the source register
1434 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
373d1d07 1435 if(rt1[i]&&!((current->u>>rt1[i])&1)) {
57871462 1436 alloc_reg(current,i,rt1[i]);
373d1d07 1437 assert(get_reg(current->regmap,rt1[i])>=0);
57871462 1438 if(opcode[i]==0x27||opcode[i]==0x37) // LWU/LD
1439 {
1440 current->is32&=~(1LL<<rt1[i]);
1441 alloc_reg64(current,i,rt1[i]);
1442 }
1443 else if(opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR
1444 {
1445 current->is32&=~(1LL<<rt1[i]);
1446 alloc_reg64(current,i,rt1[i]);
1447 alloc_all(current,i);
1448 alloc_reg64(current,i,FTEMP);
e1190b87 1449 minimum_free_regs[i]=HOST_REGS;
57871462 1450 }
1451 else current->is32|=1LL<<rt1[i];
1452 dirty_reg(current,rt1[i]);
57871462 1453 // LWL/LWR need a temporary register for the old value
1454 if(opcode[i]==0x22||opcode[i]==0x26)
1455 {
1456 alloc_reg(current,i,FTEMP);
1457 alloc_reg_temp(current,i,-1);
e1190b87 1458 minimum_free_regs[i]=1;
57871462 1459 }
1460 }
1461 else
1462 {
373d1d07 1463 // Load to r0 or unneeded register (dummy load)
57871462 1464 // but we still need a register to calculate the address
535d208a 1465 if(opcode[i]==0x22||opcode[i]==0x26)
1466 {
1467 alloc_reg(current,i,FTEMP); // LWL/LWR need another temporary
1468 }
57871462 1469 alloc_reg_temp(current,i,-1);
e1190b87 1470 minimum_free_regs[i]=1;
535d208a 1471 if(opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR
1472 {
1473 alloc_all(current,i);
1474 alloc_reg64(current,i,FTEMP);
e1190b87 1475 minimum_free_regs[i]=HOST_REGS;
535d208a 1476 }
57871462 1477 }
1478}
1479
1480void store_alloc(struct regstat *current,int i)
1481{
1482 clear_const(current,rs2[i]);
1483 if(!(rs2[i])) current->u&=~1LL; // Allow allocating r0 if necessary
1484 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1485 alloc_reg(current,i,rs2[i]);
1486 if(opcode[i]==0x2c||opcode[i]==0x2d||opcode[i]==0x3f) { // 64-bit SDL/SDR/SD
1487 alloc_reg64(current,i,rs2[i]);
1488 if(rs2[i]) alloc_reg(current,i,FTEMP);
1489 }
57871462 1490 #if defined(HOST_IMM8)
1491 // On CPUs without 32-bit immediates we need a pointer to invalid_code
1492 else alloc_reg(current,i,INVCP);
1493 #endif
b7918751 1494 if(opcode[i]==0x2a||opcode[i]==0x2e||opcode[i]==0x2c||opcode[i]==0x2d) { // SWL/SWL/SDL/SDR
57871462 1495 alloc_reg(current,i,FTEMP);
1496 }
1497 // We need a temporary register for address generation
1498 alloc_reg_temp(current,i,-1);
e1190b87 1499 minimum_free_regs[i]=1;
57871462 1500}
1501
1502void c1ls_alloc(struct regstat *current,int i)
1503{
1504 //clear_const(current,rs1[i]); // FIXME
1505 clear_const(current,rt1[i]);
1506 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1507 alloc_reg(current,i,CSREG); // Status
1508 alloc_reg(current,i,FTEMP);
1509 if(opcode[i]==0x35||opcode[i]==0x3d) { // 64-bit LDC1/SDC1
1510 alloc_reg64(current,i,FTEMP);
1511 }
57871462 1512 #if defined(HOST_IMM8)
1513 // On CPUs without 32-bit immediates we need a pointer to invalid_code
1514 else if((opcode[i]&0x3b)==0x39) // SWC1/SDC1
1515 alloc_reg(current,i,INVCP);
1516 #endif
1517 // We need a temporary register for address generation
1518 alloc_reg_temp(current,i,-1);
1519}
1520
b9b61529 1521void c2ls_alloc(struct regstat *current,int i)
1522{
1523 clear_const(current,rt1[i]);
1524 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1525 alloc_reg(current,i,FTEMP);
b9b61529 1526 #if defined(HOST_IMM8)
1527 // On CPUs without 32-bit immediates we need a pointer to invalid_code
1edfcc68 1528 if((opcode[i]&0x3b)==0x3a) // SWC2/SDC2
b9b61529 1529 alloc_reg(current,i,INVCP);
1530 #endif
1531 // We need a temporary register for address generation
1532 alloc_reg_temp(current,i,-1);
e1190b87 1533 minimum_free_regs[i]=1;
b9b61529 1534}
1535
57871462 1536#ifndef multdiv_alloc
1537void multdiv_alloc(struct regstat *current,int i)
1538{
1539 // case 0x18: MULT
1540 // case 0x19: MULTU
1541 // case 0x1A: DIV
1542 // case 0x1B: DIVU
1543 // case 0x1C: DMULT
1544 // case 0x1D: DMULTU
1545 // case 0x1E: DDIV
1546 // case 0x1F: DDIVU
1547 clear_const(current,rs1[i]);
1548 clear_const(current,rs2[i]);
1549 if(rs1[i]&&rs2[i])
1550 {
1551 if((opcode2[i]&4)==0) // 32-bit
1552 {
1553 current->u&=~(1LL<<HIREG);
1554 current->u&=~(1LL<<LOREG);
1555 alloc_reg(current,i,HIREG);
1556 alloc_reg(current,i,LOREG);
1557 alloc_reg(current,i,rs1[i]);
1558 alloc_reg(current,i,rs2[i]);
1559 current->is32|=1LL<<HIREG;
1560 current->is32|=1LL<<LOREG;
1561 dirty_reg(current,HIREG);
1562 dirty_reg(current,LOREG);
1563 }
1564 else // 64-bit
1565 {
1566 current->u&=~(1LL<<HIREG);
1567 current->u&=~(1LL<<LOREG);
1568 current->uu&=~(1LL<<HIREG);
1569 current->uu&=~(1LL<<LOREG);
1570 alloc_reg64(current,i,HIREG);
1571 //if(HOST_REGS>10) alloc_reg64(current,i,LOREG);
1572 alloc_reg64(current,i,rs1[i]);
1573 alloc_reg64(current,i,rs2[i]);
1574 alloc_all(current,i);
1575 current->is32&=~(1LL<<HIREG);
1576 current->is32&=~(1LL<<LOREG);
1577 dirty_reg(current,HIREG);
1578 dirty_reg(current,LOREG);
e1190b87 1579 minimum_free_regs[i]=HOST_REGS;
57871462 1580 }
1581 }
1582 else
1583 {
1584 // Multiply by zero is zero.
1585 // MIPS does not have a divide by zero exception.
1586 // The result is undefined, we return zero.
1587 alloc_reg(current,i,HIREG);
1588 alloc_reg(current,i,LOREG);
1589 current->is32|=1LL<<HIREG;
1590 current->is32|=1LL<<LOREG;
1591 dirty_reg(current,HIREG);
1592 dirty_reg(current,LOREG);
1593 }
1594}
1595#endif
1596
1597void cop0_alloc(struct regstat *current,int i)
1598{
1599 if(opcode2[i]==0) // MFC0
1600 {
1601 if(rt1[i]) {
1602 clear_const(current,rt1[i]);
1603 alloc_all(current,i);
1604 alloc_reg(current,i,rt1[i]);
1605 current->is32|=1LL<<rt1[i];
1606 dirty_reg(current,rt1[i]);
1607 }
1608 }
1609 else if(opcode2[i]==4) // MTC0
1610 {
1611 if(rs1[i]){
1612 clear_const(current,rs1[i]);
1613 alloc_reg(current,i,rs1[i]);
1614 alloc_all(current,i);
1615 }
1616 else {
1617 alloc_all(current,i); // FIXME: Keep r0
1618 current->u&=~1LL;
1619 alloc_reg(current,i,0);
1620 }
1621 }
1622 else
1623 {
1624 // TLBR/TLBWI/TLBWR/TLBP/ERET
1625 assert(opcode2[i]==0x10);
1626 alloc_all(current,i);
1627 }
e1190b87 1628 minimum_free_regs[i]=HOST_REGS;
57871462 1629}
1630
1631void cop1_alloc(struct regstat *current,int i)
1632{
1633 alloc_reg(current,i,CSREG); // Load status
1634 if(opcode2[i]<3) // MFC1/DMFC1/CFC1
1635 {
7de557a6 1636 if(rt1[i]){
1637 clear_const(current,rt1[i]);
1638 if(opcode2[i]==1) {
1639 alloc_reg64(current,i,rt1[i]); // DMFC1
1640 current->is32&=~(1LL<<rt1[i]);
1641 }else{
1642 alloc_reg(current,i,rt1[i]); // MFC1/CFC1
1643 current->is32|=1LL<<rt1[i];
1644 }
1645 dirty_reg(current,rt1[i]);
57871462 1646 }
57871462 1647 alloc_reg_temp(current,i,-1);
1648 }
1649 else if(opcode2[i]>3) // MTC1/DMTC1/CTC1
1650 {
1651 if(rs1[i]){
1652 clear_const(current,rs1[i]);
1653 if(opcode2[i]==5)
1654 alloc_reg64(current,i,rs1[i]); // DMTC1
1655 else
1656 alloc_reg(current,i,rs1[i]); // MTC1/CTC1
1657 alloc_reg_temp(current,i,-1);
1658 }
1659 else {
1660 current->u&=~1LL;
1661 alloc_reg(current,i,0);
1662 alloc_reg_temp(current,i,-1);
1663 }
1664 }
e1190b87 1665 minimum_free_regs[i]=1;
57871462 1666}
1667void fconv_alloc(struct regstat *current,int i)
1668{
1669 alloc_reg(current,i,CSREG); // Load status
1670 alloc_reg_temp(current,i,-1);
e1190b87 1671 minimum_free_regs[i]=1;
57871462 1672}
1673void float_alloc(struct regstat *current,int i)
1674{
1675 alloc_reg(current,i,CSREG); // Load status
1676 alloc_reg_temp(current,i,-1);
e1190b87 1677 minimum_free_regs[i]=1;
57871462 1678}
b9b61529 1679void c2op_alloc(struct regstat *current,int i)
1680{
1681 alloc_reg_temp(current,i,-1);
1682}
57871462 1683void fcomp_alloc(struct regstat *current,int i)
1684{
1685 alloc_reg(current,i,CSREG); // Load status
1686 alloc_reg(current,i,FSREG); // Load flags
1687 dirty_reg(current,FSREG); // Flag will be modified
1688 alloc_reg_temp(current,i,-1);
e1190b87 1689 minimum_free_regs[i]=1;
57871462 1690}
1691
1692void syscall_alloc(struct regstat *current,int i)
1693{
1694 alloc_cc(current,i);
1695 dirty_reg(current,CCREG);
1696 alloc_all(current,i);
e1190b87 1697 minimum_free_regs[i]=HOST_REGS;
57871462 1698 current->isconst=0;
1699}
1700
1701void delayslot_alloc(struct regstat *current,int i)
1702{
d404093f 1703 switch(itype[i])
1704 {
57871462 1705 case UJUMP:
1706 case CJUMP:
1707 case SJUMP:
1708 case RJUMP:
1709 case FJUMP:
1710 case SYSCALL:
7139f3c8 1711 case HLECALL:
57871462 1712 case SPAN:
1713 assem_debug("jump in the delay slot. this shouldn't happen.\n");//exit(1);
c43b5311 1714 SysPrintf("Disabled speculative precompilation\n");
57871462 1715 stop_after_jal=1;
1716 break;
1717 case IMM16:
1718 imm16_alloc(current,i);
1719 break;
1720 case LOAD:
1721 case LOADLR:
1722 load_alloc(current,i);
1723 break;
1724 case STORE:
1725 case STORELR:
1726 store_alloc(current,i);
1727 break;
1728 case ALU:
1729 alu_alloc(current,i);
1730 break;
1731 case SHIFT:
1732 shift_alloc(current,i);
1733 break;
1734 case MULTDIV:
1735 multdiv_alloc(current,i);
1736 break;
1737 case SHIFTIMM:
1738 shiftimm_alloc(current,i);
1739 break;
1740 case MOV:
1741 mov_alloc(current,i);
1742 break;
1743 case COP0:
1744 cop0_alloc(current,i);
1745 break;
1746 case COP1:
b9b61529 1747 case COP2:
57871462 1748 cop1_alloc(current,i);
1749 break;
1750 case C1LS:
1751 c1ls_alloc(current,i);
1752 break;
b9b61529 1753 case C2LS:
1754 c2ls_alloc(current,i);
1755 break;
57871462 1756 case FCONV:
1757 fconv_alloc(current,i);
1758 break;
1759 case FLOAT:
1760 float_alloc(current,i);
1761 break;
1762 case FCOMP:
1763 fcomp_alloc(current,i);
1764 break;
b9b61529 1765 case C2OP:
1766 c2op_alloc(current,i);
1767 break;
57871462 1768 }
1769}
1770
1771// Special case where a branch and delay slot span two pages in virtual memory
1772static void pagespan_alloc(struct regstat *current,int i)
1773{
1774 current->isconst=0;
1775 current->wasconst=0;
1776 regs[i].wasconst=0;
e1190b87 1777 minimum_free_regs[i]=HOST_REGS;
57871462 1778 alloc_all(current,i);
1779 alloc_cc(current,i);
1780 dirty_reg(current,CCREG);
1781 if(opcode[i]==3) // JAL
1782 {
1783 alloc_reg(current,i,31);
1784 dirty_reg(current,31);
1785 }
1786 if(opcode[i]==0&&(opcode2[i]&0x3E)==8) // JR/JALR
1787 {
1788 alloc_reg(current,i,rs1[i]);
5067f341 1789 if (rt1[i]!=0) {
1790 alloc_reg(current,i,rt1[i]);
1791 dirty_reg(current,rt1[i]);
57871462 1792 }
1793 }
1794 if((opcode[i]&0x2E)==4) // BEQ/BNE/BEQL/BNEL
1795 {
1796 if(rs1[i]) alloc_reg(current,i,rs1[i]);
1797 if(rs2[i]) alloc_reg(current,i,rs2[i]);
1798 if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1))
1799 {
1800 if(rs1[i]) alloc_reg64(current,i,rs1[i]);
1801 if(rs2[i]) alloc_reg64(current,i,rs2[i]);
1802 }
1803 }
1804 else
1805 if((opcode[i]&0x2E)==6) // BLEZ/BGTZ/BLEZL/BGTZL
1806 {
1807 if(rs1[i]) alloc_reg(current,i,rs1[i]);
1808 if(!((current->is32>>rs1[i])&1))
1809 {
1810 if(rs1[i]) alloc_reg64(current,i,rs1[i]);
1811 }
1812 }
1813 else
1814 if(opcode[i]==0x11) // BC1
1815 {
1816 alloc_reg(current,i,FSREG);
1817 alloc_reg(current,i,CSREG);
1818 }
1819 //else ...
1820}
1821
e2b5e7aa 1822static void add_stub(int type,int addr,int retaddr,int a,int b,int c,int d,int e)
57871462 1823{
1824 stubs[stubcount][0]=type;
1825 stubs[stubcount][1]=addr;
1826 stubs[stubcount][2]=retaddr;
1827 stubs[stubcount][3]=a;
1828 stubs[stubcount][4]=b;
1829 stubs[stubcount][5]=c;
1830 stubs[stubcount][6]=d;
1831 stubs[stubcount][7]=e;
1832 stubcount++;
1833}
1834
1835// Write out a single register
1836void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32)
1837{
1838 int hr;
1839 for(hr=0;hr<HOST_REGS;hr++) {
1840 if(hr!=EXCLUDE_REG) {
1841 if((regmap[hr]&63)==r) {
1842 if((dirty>>hr)&1) {
1843 if(regmap[hr]<64) {
1844 emit_storereg(r,hr);
57871462 1845 }else{
1846 emit_storereg(r|64,hr);
1847 }
1848 }
1849 }
1850 }
1851 }
1852}
1853
d404093f 1854#if 0
1855static int mchecksum(void)
57871462 1856{
1857 //if(!tracedebug) return 0;
1858 int i;
1859 int sum=0;
1860 for(i=0;i<2097152;i++) {
1861 unsigned int temp=sum;
1862 sum<<=1;
1863 sum|=(~temp)>>31;
1864 sum^=((u_int *)rdram)[i];
1865 }
1866 return sum;
1867}
d404093f 1868
1869static int rchecksum(void)
57871462 1870{
1871 int i;
1872 int sum=0;
1873 for(i=0;i<64;i++)
1874 sum^=((u_int *)reg)[i];
1875 return sum;
1876}
d404093f 1877
1878static void rlist(void)
57871462 1879{
1880 int i;
1881 printf("TRACE: ");
1882 for(i=0;i<32;i++)
1883 printf("r%d:%8x%8x ",i,((int *)(reg+i))[1],((int *)(reg+i))[0]);
1884 printf("\n");
57871462 1885}
1886
d404093f 1887static void enabletrace(void)
57871462 1888{
1889 tracedebug=1;
1890}
1891
d404093f 1892static void memdebug(int i)
57871462 1893{
1894 //printf("TRACE: count=%d next=%d (checksum %x) lo=%8x%8x\n",Count,next_interupt,mchecksum(),(int)(reg[LOREG]>>32),(int)reg[LOREG]);
1895 //printf("TRACE: count=%d next=%d (rchecksum %x)\n",Count,next_interupt,rchecksum());
1896 //rlist();
1897 //if(tracedebug) {
1898 //if(Count>=-2084597794) {
1899 if((signed int)Count>=-2084597794&&(signed int)Count<0) {
1900 //if(0) {
1901 printf("TRACE: count=%d next=%d (checksum %x)\n",Count,next_interupt,mchecksum());
1902 //printf("TRACE: count=%d next=%d (checksum %x) Status=%x\n",Count,next_interupt,mchecksum(),Status);
1903 //printf("TRACE: count=%d next=%d (checksum %x) hi=%8x%8x\n",Count,next_interupt,mchecksum(),(int)(reg[HIREG]>>32),(int)reg[HIREG]);
1904 rlist();
1905 #ifdef __i386__
1906 printf("TRACE: %x\n",(&i)[-1]);
1907 #endif
1908 #ifdef __arm__
1909 int j;
1910 printf("TRACE: %x \n",(&j)[10]);
1911 printf("TRACE: %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x\n",(&j)[1],(&j)[2],(&j)[3],(&j)[4],(&j)[5],(&j)[6],(&j)[7],(&j)[8],(&j)[9],(&j)[10],(&j)[11],(&j)[12],(&j)[13],(&j)[14],(&j)[15],(&j)[16],(&j)[17],(&j)[18],(&j)[19],(&j)[20]);
1912 #endif
1913 //fflush(stdout);
1914 }
1915 //printf("TRACE: %x\n",(&i)[-1]);
1916}
d404093f 1917#endif
57871462 1918
57871462 1919void alu_assemble(int i,struct regstat *i_regs)
1920{
1921 if(opcode2[i]>=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU
1922 if(rt1[i]) {
1923 signed char s1,s2,t;
1924 t=get_reg(i_regs->regmap,rt1[i]);
1925 if(t>=0) {
1926 s1=get_reg(i_regs->regmap,rs1[i]);
1927 s2=get_reg(i_regs->regmap,rs2[i]);
1928 if(rs1[i]&&rs2[i]) {
1929 assert(s1>=0);
1930 assert(s2>=0);
1931 if(opcode2[i]&2) emit_sub(s1,s2,t);
1932 else emit_add(s1,s2,t);
1933 }
1934 else if(rs1[i]) {
1935 if(s1>=0) emit_mov(s1,t);
1936 else emit_loadreg(rs1[i],t);
1937 }
1938 else if(rs2[i]) {
1939 if(s2>=0) {
1940 if(opcode2[i]&2) emit_neg(s2,t);
1941 else emit_mov(s2,t);
1942 }
1943 else {
1944 emit_loadreg(rs2[i],t);
1945 if(opcode2[i]&2) emit_neg(t,t);
1946 }
1947 }
1948 else emit_zeroreg(t);
1949 }
1950 }
1951 }
1952 if(opcode2[i]>=0x2c&&opcode2[i]<=0x2f) { // DADD/DADDU/DSUB/DSUBU
1953 if(rt1[i]) {
1954 signed char s1l,s2l,s1h,s2h,tl,th;
1955 tl=get_reg(i_regs->regmap,rt1[i]);
1956 th=get_reg(i_regs->regmap,rt1[i]|64);
1957 if(tl>=0) {
1958 s1l=get_reg(i_regs->regmap,rs1[i]);
1959 s2l=get_reg(i_regs->regmap,rs2[i]);
1960 s1h=get_reg(i_regs->regmap,rs1[i]|64);
1961 s2h=get_reg(i_regs->regmap,rs2[i]|64);
1962 if(rs1[i]&&rs2[i]) {
1963 assert(s1l>=0);
1964 assert(s2l>=0);
1965 if(opcode2[i]&2) emit_subs(s1l,s2l,tl);
1966 else emit_adds(s1l,s2l,tl);
1967 if(th>=0) {
1968 #ifdef INVERTED_CARRY
1969 if(opcode2[i]&2) {if(s1h!=th) emit_mov(s1h,th);emit_sbb(th,s2h);}
1970 #else
1971 if(opcode2[i]&2) emit_sbc(s1h,s2h,th);
1972 #endif
1973 else emit_add(s1h,s2h,th);
1974 }
1975 }
1976 else if(rs1[i]) {
1977 if(s1l>=0) emit_mov(s1l,tl);
1978 else emit_loadreg(rs1[i],tl);
1979 if(th>=0) {
1980 if(s1h>=0) emit_mov(s1h,th);
1981 else emit_loadreg(rs1[i]|64,th);
1982 }
1983 }
1984 else if(rs2[i]) {
1985 if(s2l>=0) {
1986 if(opcode2[i]&2) emit_negs(s2l,tl);
1987 else emit_mov(s2l,tl);
1988 }
1989 else {
1990 emit_loadreg(rs2[i],tl);
1991 if(opcode2[i]&2) emit_negs(tl,tl);
1992 }
1993 if(th>=0) {
1994 #ifdef INVERTED_CARRY
1995 if(s2h>=0) emit_mov(s2h,th);
1996 else emit_loadreg(rs2[i]|64,th);
1997 if(opcode2[i]&2) {
1998 emit_adcimm(-1,th); // x86 has inverted carry flag
1999 emit_not(th,th);
2000 }
2001 #else
2002 if(opcode2[i]&2) {
2003 if(s2h>=0) emit_rscimm(s2h,0,th);
2004 else {
2005 emit_loadreg(rs2[i]|64,th);
2006 emit_rscimm(th,0,th);
2007 }
2008 }else{
2009 if(s2h>=0) emit_mov(s2h,th);
2010 else emit_loadreg(rs2[i]|64,th);
2011 }
2012 #endif
2013 }
2014 }
2015 else {
2016 emit_zeroreg(tl);
2017 if(th>=0) emit_zeroreg(th);
2018 }
2019 }
2020 }
2021 }
2022 if(opcode2[i]==0x2a||opcode2[i]==0x2b) { // SLT/SLTU
2023 if(rt1[i]) {
2024 signed char s1l,s1h,s2l,s2h,t;
2025 if(!((i_regs->was32>>rs1[i])&(i_regs->was32>>rs2[i])&1))
2026 {
2027 t=get_reg(i_regs->regmap,rt1[i]);
2028 //assert(t>=0);
2029 if(t>=0) {
2030 s1l=get_reg(i_regs->regmap,rs1[i]);
2031 s1h=get_reg(i_regs->regmap,rs1[i]|64);
2032 s2l=get_reg(i_regs->regmap,rs2[i]);
2033 s2h=get_reg(i_regs->regmap,rs2[i]|64);
2034 if(rs2[i]==0) // rx<r0
2035 {
2036 assert(s1h>=0);
2037 if(opcode2[i]==0x2a) // SLT
2038 emit_shrimm(s1h,31,t);
2039 else // SLTU (unsigned can not be less than zero)
2040 emit_zeroreg(t);
2041 }
2042 else if(rs1[i]==0) // r0<rx
2043 {
2044 assert(s2h>=0);
2045 if(opcode2[i]==0x2a) // SLT
2046 emit_set_gz64_32(s2h,s2l,t);
2047 else // SLTU (set if not zero)
2048 emit_set_nz64_32(s2h,s2l,t);
2049 }
2050 else {
2051 assert(s1l>=0);assert(s1h>=0);
2052 assert(s2l>=0);assert(s2h>=0);
2053 if(opcode2[i]==0x2a) // SLT
2054 emit_set_if_less64_32(s1h,s1l,s2h,s2l,t);
2055 else // SLTU
2056 emit_set_if_carry64_32(s1h,s1l,s2h,s2l,t);
2057 }
2058 }
2059 } else {
2060 t=get_reg(i_regs->regmap,rt1[i]);
2061 //assert(t>=0);
2062 if(t>=0) {
2063 s1l=get_reg(i_regs->regmap,rs1[i]);
2064 s2l=get_reg(i_regs->regmap,rs2[i]);
2065 if(rs2[i]==0) // rx<r0
2066 {
2067 assert(s1l>=0);
2068 if(opcode2[i]==0x2a) // SLT
2069 emit_shrimm(s1l,31,t);
2070 else // SLTU (unsigned can not be less than zero)
2071 emit_zeroreg(t);
2072 }
2073 else if(rs1[i]==0) // r0<rx
2074 {
2075 assert(s2l>=0);
2076 if(opcode2[i]==0x2a) // SLT
2077 emit_set_gz32(s2l,t);
2078 else // SLTU (set if not zero)
2079 emit_set_nz32(s2l,t);
2080 }
2081 else{
2082 assert(s1l>=0);assert(s2l>=0);
2083 if(opcode2[i]==0x2a) // SLT
2084 emit_set_if_less32(s1l,s2l,t);
2085 else // SLTU
2086 emit_set_if_carry32(s1l,s2l,t);
2087 }
2088 }
2089 }
2090 }
2091 }
2092 if(opcode2[i]>=0x24&&opcode2[i]<=0x27) { // AND/OR/XOR/NOR
2093 if(rt1[i]) {
2094 signed char s1l,s1h,s2l,s2h,th,tl;
2095 tl=get_reg(i_regs->regmap,rt1[i]);
2096 th=get_reg(i_regs->regmap,rt1[i]|64);
2097 if(!((i_regs->was32>>rs1[i])&(i_regs->was32>>rs2[i])&1)&&th>=0)
2098 {
2099 assert(tl>=0);
2100 if(tl>=0) {
2101 s1l=get_reg(i_regs->regmap,rs1[i]);
2102 s1h=get_reg(i_regs->regmap,rs1[i]|64);
2103 s2l=get_reg(i_regs->regmap,rs2[i]);
2104 s2h=get_reg(i_regs->regmap,rs2[i]|64);
2105 if(rs1[i]&&rs2[i]) {
2106 assert(s1l>=0);assert(s1h>=0);
2107 assert(s2l>=0);assert(s2h>=0);
2108 if(opcode2[i]==0x24) { // AND
2109 emit_and(s1l,s2l,tl);
2110 emit_and(s1h,s2h,th);
2111 } else
2112 if(opcode2[i]==0x25) { // OR
2113 emit_or(s1l,s2l,tl);
2114 emit_or(s1h,s2h,th);
2115 } else
2116 if(opcode2[i]==0x26) { // XOR
2117 emit_xor(s1l,s2l,tl);
2118 emit_xor(s1h,s2h,th);
2119 } else
2120 if(opcode2[i]==0x27) { // NOR
2121 emit_or(s1l,s2l,tl);
2122 emit_or(s1h,s2h,th);
2123 emit_not(tl,tl);
2124 emit_not(th,th);
2125 }
2126 }
2127 else
2128 {
2129 if(opcode2[i]==0x24) { // AND
2130 emit_zeroreg(tl);
2131 emit_zeroreg(th);
2132 } else
2133 if(opcode2[i]==0x25||opcode2[i]==0x26) { // OR/XOR
2134 if(rs1[i]){
2135 if(s1l>=0) emit_mov(s1l,tl);
2136 else emit_loadreg(rs1[i],tl);
2137 if(s1h>=0) emit_mov(s1h,th);
2138 else emit_loadreg(rs1[i]|64,th);
2139 }
2140 else
2141 if(rs2[i]){
2142 if(s2l>=0) emit_mov(s2l,tl);
2143 else emit_loadreg(rs2[i],tl);
2144 if(s2h>=0) emit_mov(s2h,th);
2145 else emit_loadreg(rs2[i]|64,th);
2146 }
2147 else{
2148 emit_zeroreg(tl);
2149 emit_zeroreg(th);
2150 }
2151 } else
2152 if(opcode2[i]==0x27) { // NOR
2153 if(rs1[i]){
2154 if(s1l>=0) emit_not(s1l,tl);
2155 else{
2156 emit_loadreg(rs1[i],tl);
2157 emit_not(tl,tl);
2158 }
2159 if(s1h>=0) emit_not(s1h,th);
2160 else{
2161 emit_loadreg(rs1[i]|64,th);
2162 emit_not(th,th);
2163 }
2164 }
2165 else
2166 if(rs2[i]){
2167 if(s2l>=0) emit_not(s2l,tl);
2168 else{
2169 emit_loadreg(rs2[i],tl);
2170 emit_not(tl,tl);
2171 }
2172 if(s2h>=0) emit_not(s2h,th);
2173 else{
2174 emit_loadreg(rs2[i]|64,th);
2175 emit_not(th,th);
2176 }
2177 }
2178 else {
2179 emit_movimm(-1,tl);
2180 emit_movimm(-1,th);
2181 }
2182 }
2183 }
2184 }
2185 }
2186 else
2187 {
2188 // 32 bit
2189 if(tl>=0) {
2190 s1l=get_reg(i_regs->regmap,rs1[i]);
2191 s2l=get_reg(i_regs->regmap,rs2[i]);
2192 if(rs1[i]&&rs2[i]) {
2193 assert(s1l>=0);
2194 assert(s2l>=0);
2195 if(opcode2[i]==0x24) { // AND
2196 emit_and(s1l,s2l,tl);
2197 } else
2198 if(opcode2[i]==0x25) { // OR
2199 emit_or(s1l,s2l,tl);
2200 } else
2201 if(opcode2[i]==0x26) { // XOR
2202 emit_xor(s1l,s2l,tl);
2203 } else
2204 if(opcode2[i]==0x27) { // NOR
2205 emit_or(s1l,s2l,tl);
2206 emit_not(tl,tl);
2207 }
2208 }
2209 else
2210 {
2211 if(opcode2[i]==0x24) { // AND
2212 emit_zeroreg(tl);
2213 } else
2214 if(opcode2[i]==0x25||opcode2[i]==0x26) { // OR/XOR
2215 if(rs1[i]){
2216 if(s1l>=0) emit_mov(s1l,tl);
2217 else emit_loadreg(rs1[i],tl); // CHECK: regmap_entry?
2218 }
2219 else
2220 if(rs2[i]){
2221 if(s2l>=0) emit_mov(s2l,tl);
2222 else emit_loadreg(rs2[i],tl); // CHECK: regmap_entry?
2223 }
2224 else emit_zeroreg(tl);
2225 } else
2226 if(opcode2[i]==0x27) { // NOR
2227 if(rs1[i]){
2228 if(s1l>=0) emit_not(s1l,tl);
2229 else {
2230 emit_loadreg(rs1[i],tl);
2231 emit_not(tl,tl);
2232 }
2233 }
2234 else
2235 if(rs2[i]){
2236 if(s2l>=0) emit_not(s2l,tl);
2237 else {
2238 emit_loadreg(rs2[i],tl);
2239 emit_not(tl,tl);
2240 }
2241 }
2242 else emit_movimm(-1,tl);
2243 }
2244 }
2245 }
2246 }
2247 }
2248 }
2249}
2250
2251void imm16_assemble(int i,struct regstat *i_regs)
2252{
2253 if (opcode[i]==0x0f) { // LUI
2254 if(rt1[i]) {
2255 signed char t;
2256 t=get_reg(i_regs->regmap,rt1[i]);
2257 //assert(t>=0);
2258 if(t>=0) {
2259 if(!((i_regs->isconst>>t)&1))
2260 emit_movimm(imm[i]<<16,t);
2261 }
2262 }
2263 }
2264 if(opcode[i]==0x08||opcode[i]==0x09) { // ADDI/ADDIU
2265 if(rt1[i]) {
2266 signed char s,t;
2267 t=get_reg(i_regs->regmap,rt1[i]);
2268 s=get_reg(i_regs->regmap,rs1[i]);
2269 if(rs1[i]) {
2270 //assert(t>=0);
2271 //assert(s>=0);
2272 if(t>=0) {
2273 if(!((i_regs->isconst>>t)&1)) {
2274 if(s<0) {
2275 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2276 emit_addimm(t,imm[i],t);
2277 }else{
2278 if(!((i_regs->wasconst>>s)&1))
2279 emit_addimm(s,imm[i],t);
2280 else
2281 emit_movimm(constmap[i][s]+imm[i],t);
2282 }
2283 }
2284 }
2285 } else {
2286 if(t>=0) {
2287 if(!((i_regs->isconst>>t)&1))
2288 emit_movimm(imm[i],t);
2289 }
2290 }
2291 }
2292 }
2293 if(opcode[i]==0x18||opcode[i]==0x19) { // DADDI/DADDIU
2294 if(rt1[i]) {
2295 signed char sh,sl,th,tl;
2296 th=get_reg(i_regs->regmap,rt1[i]|64);
2297 tl=get_reg(i_regs->regmap,rt1[i]);
2298 sh=get_reg(i_regs->regmap,rs1[i]|64);
2299 sl=get_reg(i_regs->regmap,rs1[i]);
2300 if(tl>=0) {
2301 if(rs1[i]) {
2302 assert(sh>=0);
2303 assert(sl>=0);
2304 if(th>=0) {
2305 emit_addimm64_32(sh,sl,imm[i],th,tl);
2306 }
2307 else {
2308 emit_addimm(sl,imm[i],tl);
2309 }
2310 } else {
2311 emit_movimm(imm[i],tl);
2312 if(th>=0) emit_movimm(((signed int)imm[i])>>31,th);
2313 }
2314 }
2315 }
2316 }
2317 else if(opcode[i]==0x0a||opcode[i]==0x0b) { // SLTI/SLTIU
2318 if(rt1[i]) {
2319 //assert(rs1[i]!=0); // r0 might be valid, but it's probably a bug
2320 signed char sh,sl,t;
2321 t=get_reg(i_regs->regmap,rt1[i]);
2322 sh=get_reg(i_regs->regmap,rs1[i]|64);
2323 sl=get_reg(i_regs->regmap,rs1[i]);
2324 //assert(t>=0);
2325 if(t>=0) {
2326 if(rs1[i]>0) {
2327 if(sh<0) assert((i_regs->was32>>rs1[i])&1);
2328 if(sh<0||((i_regs->was32>>rs1[i])&1)) {
2329 if(opcode[i]==0x0a) { // SLTI
2330 if(sl<0) {
2331 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2332 emit_slti32(t,imm[i],t);
2333 }else{
2334 emit_slti32(sl,imm[i],t);
2335 }
2336 }
2337 else { // SLTIU
2338 if(sl<0) {
2339 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2340 emit_sltiu32(t,imm[i],t);
2341 }else{
2342 emit_sltiu32(sl,imm[i],t);
2343 }
2344 }
2345 }else{ // 64-bit
2346 assert(sl>=0);
2347 if(opcode[i]==0x0a) // SLTI
2348 emit_slti64_32(sh,sl,imm[i],t);
2349 else // SLTIU
2350 emit_sltiu64_32(sh,sl,imm[i],t);
2351 }
2352 }else{
2353 // SLTI(U) with r0 is just stupid,
2354 // nonetheless examples can be found
2355 if(opcode[i]==0x0a) // SLTI
2356 if(0<imm[i]) emit_movimm(1,t);
2357 else emit_zeroreg(t);
2358 else // SLTIU
2359 {
2360 if(imm[i]) emit_movimm(1,t);
2361 else emit_zeroreg(t);
2362 }
2363 }
2364 }
2365 }
2366 }
2367 else if(opcode[i]>=0x0c&&opcode[i]<=0x0e) { // ANDI/ORI/XORI
2368 if(rt1[i]) {
2369 signed char sh,sl,th,tl;
2370 th=get_reg(i_regs->regmap,rt1[i]|64);
2371 tl=get_reg(i_regs->regmap,rt1[i]);
2372 sh=get_reg(i_regs->regmap,rs1[i]|64);
2373 sl=get_reg(i_regs->regmap,rs1[i]);
2374 if(tl>=0 && !((i_regs->isconst>>tl)&1)) {
2375 if(opcode[i]==0x0c) //ANDI
2376 {
2377 if(rs1[i]) {
2378 if(sl<0) {
2379 if(i_regs->regmap_entry[tl]!=rs1[i]) emit_loadreg(rs1[i],tl);
2380 emit_andimm(tl,imm[i],tl);
2381 }else{
2382 if(!((i_regs->wasconst>>sl)&1))
2383 emit_andimm(sl,imm[i],tl);
2384 else
2385 emit_movimm(constmap[i][sl]&imm[i],tl);
2386 }
2387 }
2388 else
2389 emit_zeroreg(tl);
2390 if(th>=0) emit_zeroreg(th);
2391 }
2392 else
2393 {
2394 if(rs1[i]) {
2395 if(sl<0) {
2396 if(i_regs->regmap_entry[tl]!=rs1[i]) emit_loadreg(rs1[i],tl);
2397 }
2398 if(th>=0) {
2399 if(sh<0) {
2400 emit_loadreg(rs1[i]|64,th);
2401 }else{
2402 emit_mov(sh,th);
2403 }
2404 }
581335b0 2405 if(opcode[i]==0x0d) { // ORI
2406 if(sl<0) {
2407 emit_orimm(tl,imm[i],tl);
2408 }else{
2409 if(!((i_regs->wasconst>>sl)&1))
2410 emit_orimm(sl,imm[i],tl);
2411 else
2412 emit_movimm(constmap[i][sl]|imm[i],tl);
2413 }
57871462 2414 }
581335b0 2415 if(opcode[i]==0x0e) { // XORI
2416 if(sl<0) {
2417 emit_xorimm(tl,imm[i],tl);
2418 }else{
2419 if(!((i_regs->wasconst>>sl)&1))
2420 emit_xorimm(sl,imm[i],tl);
2421 else
2422 emit_movimm(constmap[i][sl]^imm[i],tl);
2423 }
57871462 2424 }
2425 }
2426 else {
2427 emit_movimm(imm[i],tl);
2428 if(th>=0) emit_zeroreg(th);
2429 }
2430 }
2431 }
2432 }
2433 }
2434}
2435
2436void shiftimm_assemble(int i,struct regstat *i_regs)
2437{
2438 if(opcode2[i]<=0x3) // SLL/SRL/SRA
2439 {
2440 if(rt1[i]) {
2441 signed char s,t;
2442 t=get_reg(i_regs->regmap,rt1[i]);
2443 s=get_reg(i_regs->regmap,rs1[i]);
2444 //assert(t>=0);
dc49e339 2445 if(t>=0&&!((i_regs->isconst>>t)&1)){
57871462 2446 if(rs1[i]==0)
2447 {
2448 emit_zeroreg(t);
2449 }
2450 else
2451 {
2452 if(s<0&&i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2453 if(imm[i]) {
2454 if(opcode2[i]==0) // SLL
2455 {
2456 emit_shlimm(s<0?t:s,imm[i],t);
2457 }
2458 if(opcode2[i]==2) // SRL
2459 {
2460 emit_shrimm(s<0?t:s,imm[i],t);
2461 }
2462 if(opcode2[i]==3) // SRA
2463 {
2464 emit_sarimm(s<0?t:s,imm[i],t);
2465 }
2466 }else{
2467 // Shift by zero
2468 if(s>=0 && s!=t) emit_mov(s,t);
2469 }
2470 }
2471 }
2472 //emit_storereg(rt1[i],t); //DEBUG
2473 }
2474 }
2475 if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA
2476 {
2477 if(rt1[i]) {
2478 signed char sh,sl,th,tl;
2479 th=get_reg(i_regs->regmap,rt1[i]|64);
2480 tl=get_reg(i_regs->regmap,rt1[i]);
2481 sh=get_reg(i_regs->regmap,rs1[i]|64);
2482 sl=get_reg(i_regs->regmap,rs1[i]);
2483 if(tl>=0) {
2484 if(rs1[i]==0)
2485 {
2486 emit_zeroreg(tl);
2487 if(th>=0) emit_zeroreg(th);
2488 }
2489 else
2490 {
2491 assert(sl>=0);
2492 assert(sh>=0);
2493 if(imm[i]) {
2494 if(opcode2[i]==0x38) // DSLL
2495 {
2496 if(th>=0) emit_shldimm(sh,sl,imm[i],th);
2497 emit_shlimm(sl,imm[i],tl);
2498 }
2499 if(opcode2[i]==0x3a) // DSRL
2500 {
2501 emit_shrdimm(sl,sh,imm[i],tl);
2502 if(th>=0) emit_shrimm(sh,imm[i],th);
2503 }
2504 if(opcode2[i]==0x3b) // DSRA
2505 {
2506 emit_shrdimm(sl,sh,imm[i],tl);
2507 if(th>=0) emit_sarimm(sh,imm[i],th);
2508 }
2509 }else{
2510 // Shift by zero
2511 if(sl!=tl) emit_mov(sl,tl);
2512 if(th>=0&&sh!=th) emit_mov(sh,th);
2513 }
2514 }
2515 }
2516 }
2517 }
2518 if(opcode2[i]==0x3c) // DSLL32
2519 {
2520 if(rt1[i]) {
2521 signed char sl,tl,th;
2522 tl=get_reg(i_regs->regmap,rt1[i]);
2523 th=get_reg(i_regs->regmap,rt1[i]|64);
2524 sl=get_reg(i_regs->regmap,rs1[i]);
2525 if(th>=0||tl>=0){
2526 assert(tl>=0);
2527 assert(th>=0);
2528 assert(sl>=0);
2529 emit_mov(sl,th);
2530 emit_zeroreg(tl);
2531 if(imm[i]>32)
2532 {
2533 emit_shlimm(th,imm[i]&31,th);
2534 }
2535 }
2536 }
2537 }
2538 if(opcode2[i]==0x3e) // DSRL32
2539 {
2540 if(rt1[i]) {
2541 signed char sh,tl,th;
2542 tl=get_reg(i_regs->regmap,rt1[i]);
2543 th=get_reg(i_regs->regmap,rt1[i]|64);
2544 sh=get_reg(i_regs->regmap,rs1[i]|64);
2545 if(tl>=0){
2546 assert(sh>=0);
2547 emit_mov(sh,tl);
2548 if(th>=0) emit_zeroreg(th);
2549 if(imm[i]>32)
2550 {
2551 emit_shrimm(tl,imm[i]&31,tl);
2552 }
2553 }
2554 }
2555 }
2556 if(opcode2[i]==0x3f) // DSRA32
2557 {
2558 if(rt1[i]) {
2559 signed char sh,tl;
2560 tl=get_reg(i_regs->regmap,rt1[i]);
2561 sh=get_reg(i_regs->regmap,rs1[i]|64);
2562 if(tl>=0){
2563 assert(sh>=0);
2564 emit_mov(sh,tl);
2565 if(imm[i]>32)
2566 {
2567 emit_sarimm(tl,imm[i]&31,tl);
2568 }
2569 }
2570 }
2571 }
2572}
2573
2574#ifndef shift_assemble
2575void shift_assemble(int i,struct regstat *i_regs)
2576{
2577 printf("Need shift_assemble for this architecture.\n");
2578 exit(1);
2579}
2580#endif
2581
2582void load_assemble(int i,struct regstat *i_regs)
2583{
2584 int s,th,tl,addr,map=-1;
2585 int offset;
2586 int jaddr=0;
5bf843dc 2587 int memtarget=0,c=0;
b1570849 2588 int fastload_reg_override=0;
57871462 2589 u_int hr,reglist=0;
2590 th=get_reg(i_regs->regmap,rt1[i]|64);
2591 tl=get_reg(i_regs->regmap,rt1[i]);
2592 s=get_reg(i_regs->regmap,rs1[i]);
2593 offset=imm[i];
2594 for(hr=0;hr<HOST_REGS;hr++) {
2595 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2596 }
2597 if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
2598 if(s>=0) {
2599 c=(i_regs->wasconst>>s)&1;
af4ee1fe 2600 if (c) {
2601 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
af4ee1fe 2602 }
57871462 2603 }
57871462 2604 //printf("load_assemble: c=%d\n",c);
2605 //if(c) printf("load_assemble: const=%x\n",(int)constmap[i][s]+offset);
2606 // FIXME: Even if the load is a NOP, we should check for pagefaults...
581335b0 2607 if((tl<0&&(!c||(((u_int)constmap[i][s]+offset)>>16)==0x1f80))
f18c0f46 2608 ||rt1[i]==0) {
5bf843dc 2609 // could be FIFO, must perform the read
f18c0f46 2610 // ||dummy read
5bf843dc 2611 assem_debug("(forced read)\n");
2612 tl=get_reg(i_regs->regmap,-1);
2613 assert(tl>=0);
5bf843dc 2614 }
2615 if(offset||s<0||c) addr=tl;
2616 else addr=s;
535d208a 2617 //if(tl<0) tl=get_reg(i_regs->regmap,-1);
2618 if(tl>=0) {
2619 //printf("load_assemble: c=%d\n",c);
2620 //if(c) printf("load_assemble: const=%x\n",(int)constmap[i][s]+offset);
2621 assert(tl>=0); // Even if the load is a NOP, we must check for pagefaults and I/O
2622 reglist&=~(1<<tl);
2623 if(th>=0) reglist&=~(1<<th);
1edfcc68 2624 if(!c) {
2625 #ifdef RAM_OFFSET
2626 map=get_reg(i_regs->regmap,ROREG);
2627 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
2628 #endif
2629 #ifdef R29_HACK
2630 // Strmnnrmn's speed hack
2631 if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
2632 #endif
2633 {
2634 jaddr=emit_fastpath_cmp_jump(i,addr,&fastload_reg_override);
535d208a 2635 }
1edfcc68 2636 }
2637 else if(ram_offset&&memtarget) {
2638 emit_addimm(addr,ram_offset,HOST_TEMPREG);
2639 fastload_reg_override=HOST_TEMPREG;
535d208a 2640 }
2641 int dummy=(rt1[i]==0)||(tl!=get_reg(i_regs->regmap,rt1[i])); // ignore loads to r0 and unneeded reg
2642 if (opcode[i]==0x20) { // LB
2643 if(!c||memtarget) {
2644 if(!dummy) {
57871462 2645 #ifdef HOST_IMM_ADDR32
2646 if(c)
2647 emit_movsbl_tlb((constmap[i][s]+offset)^3,map,tl);
2648 else
2649 #endif
2650 {
2651 //emit_xorimm(addr,3,tl);
57871462 2652 //emit_movsbl_indexed((int)rdram-0x80000000,tl,tl);
535d208a 2653 int x=0,a=tl;
2002a1db 2654#ifdef BIG_ENDIAN_MIPS
57871462 2655 if(!c) emit_xorimm(addr,3,tl);
2656 else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
2002a1db 2657#else
535d208a 2658 if(!c) a=addr;
dadf55f2 2659#endif
b1570849 2660 if(fastload_reg_override) a=fastload_reg_override;
2661
535d208a 2662 emit_movsbl_indexed_tlb(x,a,map,tl);
57871462 2663 }
57871462 2664 }
535d208a 2665 if(jaddr)
2666 add_stub(LOADB_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
57871462 2667 }
535d208a 2668 else
2669 inline_readstub(LOADB_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2670 }
2671 if (opcode[i]==0x21) { // LH
2672 if(!c||memtarget) {
2673 if(!dummy) {
57871462 2674 #ifdef HOST_IMM_ADDR32
2675 if(c)
2676 emit_movswl_tlb((constmap[i][s]+offset)^2,map,tl);
2677 else
2678 #endif
2679 {
535d208a 2680 int x=0,a=tl;
2002a1db 2681#ifdef BIG_ENDIAN_MIPS
57871462 2682 if(!c) emit_xorimm(addr,2,tl);
2683 else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
2002a1db 2684#else
535d208a 2685 if(!c) a=addr;
dadf55f2 2686#endif
b1570849 2687 if(fastload_reg_override) a=fastload_reg_override;
57871462 2688 //#ifdef
2689 //emit_movswl_indexed_tlb(x,tl,map,tl);
2690 //else
2691 if(map>=0) {
535d208a 2692 emit_movswl_indexed(x,a,tl);
2693 }else{
a327ad27 2694 #if 1 //def RAM_OFFSET
535d208a 2695 emit_movswl_indexed(x,a,tl);
2696 #else
2697 emit_movswl_indexed((int)rdram-0x80000000+x,a,tl);
2698 #endif
2699 }
57871462 2700 }
57871462 2701 }
535d208a 2702 if(jaddr)
2703 add_stub(LOADH_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
57871462 2704 }
535d208a 2705 else
2706 inline_readstub(LOADH_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2707 }
2708 if (opcode[i]==0x23) { // LW
2709 if(!c||memtarget) {
2710 if(!dummy) {
dadf55f2 2711 int a=addr;
b1570849 2712 if(fastload_reg_override) a=fastload_reg_override;
57871462 2713 //emit_readword_indexed((int)rdram-0x80000000,addr,tl);
2714 #ifdef HOST_IMM_ADDR32
2715 if(c)
2716 emit_readword_tlb(constmap[i][s]+offset,map,tl);
2717 else
2718 #endif
dadf55f2 2719 emit_readword_indexed_tlb(0,a,map,tl);
57871462 2720 }
535d208a 2721 if(jaddr)
2722 add_stub(LOADW_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
57871462 2723 }
535d208a 2724 else
2725 inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2726 }
2727 if (opcode[i]==0x24) { // LBU
2728 if(!c||memtarget) {
2729 if(!dummy) {
57871462 2730 #ifdef HOST_IMM_ADDR32
2731 if(c)
2732 emit_movzbl_tlb((constmap[i][s]+offset)^3,map,tl);
2733 else
2734 #endif
2735 {
2736 //emit_xorimm(addr,3,tl);
57871462 2737 //emit_movzbl_indexed((int)rdram-0x80000000,tl,tl);
535d208a 2738 int x=0,a=tl;
2002a1db 2739#ifdef BIG_ENDIAN_MIPS
57871462 2740 if(!c) emit_xorimm(addr,3,tl);
2741 else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
2002a1db 2742#else
535d208a 2743 if(!c) a=addr;
dadf55f2 2744#endif
b1570849 2745 if(fastload_reg_override) a=fastload_reg_override;
2746
535d208a 2747 emit_movzbl_indexed_tlb(x,a,map,tl);
57871462 2748 }
57871462 2749 }
535d208a 2750 if(jaddr)
2751 add_stub(LOADBU_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
57871462 2752 }
535d208a 2753 else
2754 inline_readstub(LOADBU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2755 }
2756 if (opcode[i]==0x25) { // LHU
2757 if(!c||memtarget) {
2758 if(!dummy) {
57871462 2759 #ifdef HOST_IMM_ADDR32
2760 if(c)
2761 emit_movzwl_tlb((constmap[i][s]+offset)^2,map,tl);
2762 else
2763 #endif
2764 {
535d208a 2765 int x=0,a=tl;
2002a1db 2766#ifdef BIG_ENDIAN_MIPS
57871462 2767 if(!c) emit_xorimm(addr,2,tl);
2768 else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
2002a1db 2769#else
535d208a 2770 if(!c) a=addr;
dadf55f2 2771#endif
b1570849 2772 if(fastload_reg_override) a=fastload_reg_override;
57871462 2773 //#ifdef
2774 //emit_movzwl_indexed_tlb(x,tl,map,tl);
2775 //#else
2776 if(map>=0) {
535d208a 2777 emit_movzwl_indexed(x,a,tl);
2778 }else{
a327ad27 2779 #if 1 //def RAM_OFFSET
535d208a 2780 emit_movzwl_indexed(x,a,tl);
2781 #else
2782 emit_movzwl_indexed((int)rdram-0x80000000+x,a,tl);
2783 #endif
2784 }
57871462 2785 }
2786 }
535d208a 2787 if(jaddr)
2788 add_stub(LOADHU_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
57871462 2789 }
535d208a 2790 else
2791 inline_readstub(LOADHU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2792 }
2793 if (opcode[i]==0x27) { // LWU
2794 assert(th>=0);
2795 if(!c||memtarget) {
2796 if(!dummy) {
dadf55f2 2797 int a=addr;
b1570849 2798 if(fastload_reg_override) a=fastload_reg_override;
57871462 2799 //emit_readword_indexed((int)rdram-0x80000000,addr,tl);
2800 #ifdef HOST_IMM_ADDR32
2801 if(c)
2802 emit_readword_tlb(constmap[i][s]+offset,map,tl);
2803 else
2804 #endif
dadf55f2 2805 emit_readword_indexed_tlb(0,a,map,tl);
57871462 2806 }
535d208a 2807 if(jaddr)
2808 add_stub(LOADW_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
2809 }
2810 else {
2811 inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
57871462 2812 }
535d208a 2813 emit_zeroreg(th);
2814 }
2815 if (opcode[i]==0x37) { // LD
2816 if(!c||memtarget) {
2817 if(!dummy) {
dadf55f2 2818 int a=addr;
b1570849 2819 if(fastload_reg_override) a=fastload_reg_override;
57871462 2820 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,addr,th);
2821 //emit_readword_indexed((int)rdram-0x7FFFFFFC,addr,tl);
2822 #ifdef HOST_IMM_ADDR32
2823 if(c)
2824 emit_readdword_tlb(constmap[i][s]+offset,map,th,tl);
2825 else
2826 #endif
dadf55f2 2827 emit_readdword_indexed_tlb(0,a,map,th,tl);
57871462 2828 }
535d208a 2829 if(jaddr)
2830 add_stub(LOADD_STUB,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
57871462 2831 }
535d208a 2832 else
2833 inline_readstub(LOADD_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
57871462 2834 }
535d208a 2835 }
2836 //emit_storereg(rt1[i],tl); // DEBUG
57871462 2837 //if(opcode[i]==0x23)
2838 //if(opcode[i]==0x24)
2839 //if(opcode[i]==0x23||opcode[i]==0x24)
2840 /*if(opcode[i]==0x21||opcode[i]==0x23||opcode[i]==0x24)
2841 {
2842 //emit_pusha();
2843 save_regs(0x100f);
2844 emit_readword((int)&last_count,ECX);
2845 #ifdef __i386__
2846 if(get_reg(i_regs->regmap,CCREG)<0)
2847 emit_loadreg(CCREG,HOST_CCREG);
2848 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2849 emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG);
2850 emit_writeword(HOST_CCREG,(int)&Count);
2851 #endif
2852 #ifdef __arm__
2853 if(get_reg(i_regs->regmap,CCREG)<0)
2854 emit_loadreg(CCREG,0);
2855 else
2856 emit_mov(HOST_CCREG,0);
2857 emit_add(0,ECX,0);
2858 emit_addimm(0,2*ccadj[i],0);
2859 emit_writeword(0,(int)&Count);
2860 #endif
2861 emit_call((int)memdebug);
2862 //emit_popa();
2863 restore_regs(0x100f);
581335b0 2864 }*/
57871462 2865}
2866
2867#ifndef loadlr_assemble
2868void loadlr_assemble(int i,struct regstat *i_regs)
2869{
2870 printf("Need loadlr_assemble for this architecture.\n");
2871 exit(1);
2872}
2873#endif
2874
2875void store_assemble(int i,struct regstat *i_regs)
2876{
2877 int s,th,tl,map=-1;
2878 int addr,temp;
2879 int offset;
581335b0 2880 int jaddr=0,type;
666a299d 2881 int memtarget=0,c=0;
57871462 2882 int agr=AGEN1+(i&1);
b1570849 2883 int faststore_reg_override=0;
57871462 2884 u_int hr,reglist=0;
2885 th=get_reg(i_regs->regmap,rs2[i]|64);
2886 tl=get_reg(i_regs->regmap,rs2[i]);
2887 s=get_reg(i_regs->regmap,rs1[i]);
2888 temp=get_reg(i_regs->regmap,agr);
2889 if(temp<0) temp=get_reg(i_regs->regmap,-1);
2890 offset=imm[i];
2891 if(s>=0) {
2892 c=(i_regs->wasconst>>s)&1;
af4ee1fe 2893 if(c) {
2894 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
af4ee1fe 2895 }
57871462 2896 }
2897 assert(tl>=0);
2898 assert(temp>=0);
2899 for(hr=0;hr<HOST_REGS;hr++) {
2900 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2901 }
2902 if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
2903 if(offset||s<0||c) addr=temp;
2904 else addr=s;
1edfcc68 2905 if(!c) {
2906 jaddr=emit_fastpath_cmp_jump(i,addr,&faststore_reg_override);
2907 }
2908 else if(ram_offset&&memtarget) {
2909 emit_addimm(addr,ram_offset,HOST_TEMPREG);
2910 faststore_reg_override=HOST_TEMPREG;
57871462 2911 }
2912
2913 if (opcode[i]==0x28) { // SB
2914 if(!c||memtarget) {
97a238a6 2915 int x=0,a=temp;
2002a1db 2916#ifdef BIG_ENDIAN_MIPS
57871462 2917 if(!c) emit_xorimm(addr,3,temp);
2918 else x=((constmap[i][s]+offset)^3)-(constmap[i][s]+offset);
2002a1db 2919#else
97a238a6 2920 if(!c) a=addr;
dadf55f2 2921#endif
b1570849 2922 if(faststore_reg_override) a=faststore_reg_override;
57871462 2923 //emit_writebyte_indexed(tl,(int)rdram-0x80000000,temp);
97a238a6 2924 emit_writebyte_indexed_tlb(tl,x,a,map,a);
57871462 2925 }
2926 type=STOREB_STUB;
2927 }
2928 if (opcode[i]==0x29) { // SH
2929 if(!c||memtarget) {
97a238a6 2930 int x=0,a=temp;
2002a1db 2931#ifdef BIG_ENDIAN_MIPS
57871462 2932 if(!c) emit_xorimm(addr,2,temp);
2933 else x=((constmap[i][s]+offset)^2)-(constmap[i][s]+offset);
2002a1db 2934#else
97a238a6 2935 if(!c) a=addr;
dadf55f2 2936#endif
b1570849 2937 if(faststore_reg_override) a=faststore_reg_override;
57871462 2938 //#ifdef
2939 //emit_writehword_indexed_tlb(tl,x,temp,map,temp);
2940 //#else
2941 if(map>=0) {
97a238a6 2942 emit_writehword_indexed(tl,x,a);
57871462 2943 }else
a327ad27 2944 //emit_writehword_indexed(tl,(int)rdram-0x80000000+x,a);
2945 emit_writehword_indexed(tl,x,a);
57871462 2946 }
2947 type=STOREH_STUB;
2948 }
2949 if (opcode[i]==0x2B) { // SW
dadf55f2 2950 if(!c||memtarget) {
2951 int a=addr;
b1570849 2952 if(faststore_reg_override) a=faststore_reg_override;
57871462 2953 //emit_writeword_indexed(tl,(int)rdram-0x80000000,addr);
dadf55f2 2954 emit_writeword_indexed_tlb(tl,0,a,map,temp);
2955 }
57871462 2956 type=STOREW_STUB;
2957 }
2958 if (opcode[i]==0x3F) { // SD
2959 if(!c||memtarget) {
dadf55f2 2960 int a=addr;
b1570849 2961 if(faststore_reg_override) a=faststore_reg_override;
57871462 2962 if(rs2[i]) {
2963 assert(th>=0);
2964 //emit_writeword_indexed(th,(int)rdram-0x80000000,addr);
2965 //emit_writeword_indexed(tl,(int)rdram-0x7FFFFFFC,addr);
dadf55f2 2966 emit_writedword_indexed_tlb(th,tl,0,a,map,temp);
57871462 2967 }else{
2968 // Store zero
2969 //emit_writeword_indexed(tl,(int)rdram-0x80000000,temp);
2970 //emit_writeword_indexed(tl,(int)rdram-0x7FFFFFFC,temp);
dadf55f2 2971 emit_writedword_indexed_tlb(tl,tl,0,a,map,temp);
57871462 2972 }
2973 }
2974 type=STORED_STUB;
2975 }
b96d3df7 2976 if(jaddr) {
2977 // PCSX store handlers don't check invcode again
2978 reglist|=1<<addr;
2979 add_stub(type,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
2980 jaddr=0;
2981 }
1edfcc68 2982 if(!(i_regs->waswritten&(1<<rs1[i]))&&!(new_dynarec_hacks&NDHACK_NO_SMC_CHECK)) {
57871462 2983 if(!c||memtarget) {
2984 #ifdef DESTRUCTIVE_SHIFT
2985 // The x86 shift operation is 'destructive'; it overwrites the
2986 // source register, so we need to make a copy first and use that.
2987 addr=temp;
2988 #endif
2989 #if defined(HOST_IMM8)
2990 int ir=get_reg(i_regs->regmap,INVCP);
2991 assert(ir>=0);
2992 emit_cmpmem_indexedsr12_reg(ir,addr,1);
2993 #else
2994 emit_cmpmem_indexedsr12_imm((int)invalid_code,addr,1);
2995 #endif
0bbd1454 2996 #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT)
2997 emit_callne(invalidate_addr_reg[addr]);
2998 #else
581335b0 2999 int jaddr2=(int)out;
57871462 3000 emit_jne(0);
3001 add_stub(INVCODE_STUB,jaddr2,(int)out,reglist|(1<<HOST_CCREG),addr,0,0,0);
0bbd1454 3002 #endif
57871462 3003 }
3004 }
7a518516 3005 u_int addr_val=constmap[i][s]+offset;
3eaa7048 3006 if(jaddr) {
3007 add_stub(type,jaddr,(int)out,i,addr,(int)i_regs,ccadj[i],reglist);
3008 } else if(c&&!memtarget) {
7a518516 3009 inline_writestub(type,i,addr_val,i_regs->regmap,rs2[i],ccadj[i],reglist);
3010 }
3011 // basic current block modification detection..
3012 // not looking back as that should be in mips cache already
3013 if(c&&start+i*4<addr_val&&addr_val<start+slen*4) {
c43b5311 3014 SysPrintf("write to %08x hits block %08x, pc=%08x\n",addr_val,start,start+i*4);
7a518516 3015 assert(i_regs->regmap==regs[i].regmap); // not delay slot
3016 if(i_regs->regmap==regs[i].regmap) {
3017 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3018 wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3019 emit_movimm(start+i*4+4,0);
3020 emit_writeword(0,(int)&pcaddr);
3021 emit_jmp((int)do_interrupt);
3022 }
3eaa7048 3023 }
57871462 3024 //if(opcode[i]==0x2B || opcode[i]==0x3F)
3025 //if(opcode[i]==0x2B || opcode[i]==0x28)
3026 //if(opcode[i]==0x2B || opcode[i]==0x29)
3027 //if(opcode[i]==0x2B)
3028 /*if(opcode[i]==0x2B || opcode[i]==0x28 || opcode[i]==0x29 || opcode[i]==0x3F)
3029 {
28d74ee8 3030 #ifdef __i386__
3031 emit_pusha();
3032 #endif
3033 #ifdef __arm__
57871462 3034 save_regs(0x100f);
28d74ee8 3035 #endif
57871462 3036 emit_readword((int)&last_count,ECX);
3037 #ifdef __i386__
3038 if(get_reg(i_regs->regmap,CCREG)<0)
3039 emit_loadreg(CCREG,HOST_CCREG);
3040 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3041 emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG);
3042 emit_writeword(HOST_CCREG,(int)&Count);
3043 #endif
3044 #ifdef __arm__
3045 if(get_reg(i_regs->regmap,CCREG)<0)
3046 emit_loadreg(CCREG,0);
3047 else
3048 emit_mov(HOST_CCREG,0);
3049 emit_add(0,ECX,0);
3050 emit_addimm(0,2*ccadj[i],0);
3051 emit_writeword(0,(int)&Count);
3052 #endif
3053 emit_call((int)memdebug);
28d74ee8 3054 #ifdef __i386__
3055 emit_popa();
3056 #endif
3057 #ifdef __arm__
57871462 3058 restore_regs(0x100f);
28d74ee8 3059 #endif
581335b0 3060 }*/
57871462 3061}
3062
3063void storelr_assemble(int i,struct regstat *i_regs)
3064{
3065 int s,th,tl;
3066 int temp;
581335b0 3067 int temp2=-1;
57871462 3068 int offset;
581335b0 3069 int jaddr=0;
57871462 3070 int case1,case2,case3;
3071 int done0,done1,done2;
af4ee1fe 3072 int memtarget=0,c=0;
fab5d06d 3073 int agr=AGEN1+(i&1);
57871462 3074 u_int hr,reglist=0;
3075 th=get_reg(i_regs->regmap,rs2[i]|64);
3076 tl=get_reg(i_regs->regmap,rs2[i]);
3077 s=get_reg(i_regs->regmap,rs1[i]);
fab5d06d 3078 temp=get_reg(i_regs->regmap,agr);
3079 if(temp<0) temp=get_reg(i_regs->regmap,-1);
57871462 3080 offset=imm[i];
3081 if(s>=0) {
3082 c=(i_regs->isconst>>s)&1;
af4ee1fe 3083 if(c) {
3084 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
af4ee1fe 3085 }
57871462 3086 }
3087 assert(tl>=0);
3088 for(hr=0;hr<HOST_REGS;hr++) {
3089 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3090 }
535d208a 3091 assert(temp>=0);
1edfcc68 3092 if(!c) {
3093 emit_cmpimm(s<0||offset?temp:s,RAM_SIZE);
3094 if(!offset&&s!=temp) emit_mov(s,temp);
3095 jaddr=(int)out;
3096 emit_jno(0);
3097 }
3098 else
3099 {
3100 if(!memtarget||!rs1[i]) {
535d208a 3101 jaddr=(int)out;
3102 emit_jmp(0);
57871462 3103 }
535d208a 3104 }
1edfcc68 3105 #ifdef RAM_OFFSET
3106 int map=get_reg(i_regs->regmap,ROREG);
3107 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3108 #else
9f51b4b9 3109 if((u_int)rdram!=0x80000000)
1edfcc68 3110 emit_addimm_no_flags((u_int)rdram-(u_int)0x80000000,temp);
3111 #endif
535d208a 3112
3113 if (opcode[i]==0x2C||opcode[i]==0x2D) { // SDL/SDR
3114 temp2=get_reg(i_regs->regmap,FTEMP);
3115 if(!rs2[i]) temp2=th=tl;
3116 }
57871462 3117
2002a1db 3118#ifndef BIG_ENDIAN_MIPS
3119 emit_xorimm(temp,3,temp);
3120#endif
535d208a 3121 emit_testimm(temp,2);
3122 case2=(int)out;
3123 emit_jne(0);
3124 emit_testimm(temp,1);
3125 case1=(int)out;
3126 emit_jne(0);
3127 // 0
3128 if (opcode[i]==0x2A) { // SWL
3129 emit_writeword_indexed(tl,0,temp);
3130 }
3131 if (opcode[i]==0x2E) { // SWR
3132 emit_writebyte_indexed(tl,3,temp);
3133 }
3134 if (opcode[i]==0x2C) { // SDL
3135 emit_writeword_indexed(th,0,temp);
3136 if(rs2[i]) emit_mov(tl,temp2);
3137 }
3138 if (opcode[i]==0x2D) { // SDR
3139 emit_writebyte_indexed(tl,3,temp);
3140 if(rs2[i]) emit_shldimm(th,tl,24,temp2);
3141 }
3142 done0=(int)out;
3143 emit_jmp(0);
3144 // 1
3145 set_jump_target(case1,(int)out);
3146 if (opcode[i]==0x2A) { // SWL
3147 // Write 3 msb into three least significant bytes
3148 if(rs2[i]) emit_rorimm(tl,8,tl);
3149 emit_writehword_indexed(tl,-1,temp);
3150 if(rs2[i]) emit_rorimm(tl,16,tl);
3151 emit_writebyte_indexed(tl,1,temp);
3152 if(rs2[i]) emit_rorimm(tl,8,tl);
3153 }
3154 if (opcode[i]==0x2E) { // SWR
3155 // Write two lsb into two most significant bytes
3156 emit_writehword_indexed(tl,1,temp);
3157 }
3158 if (opcode[i]==0x2C) { // SDL
3159 if(rs2[i]) emit_shrdimm(tl,th,8,temp2);
3160 // Write 3 msb into three least significant bytes
3161 if(rs2[i]) emit_rorimm(th,8,th);
3162 emit_writehword_indexed(th,-1,temp);
3163 if(rs2[i]) emit_rorimm(th,16,th);
3164 emit_writebyte_indexed(th,1,temp);
3165 if(rs2[i]) emit_rorimm(th,8,th);
3166 }
3167 if (opcode[i]==0x2D) { // SDR
3168 if(rs2[i]) emit_shldimm(th,tl,16,temp2);
3169 // Write two lsb into two most significant bytes
3170 emit_writehword_indexed(tl,1,temp);
3171 }
3172 done1=(int)out;
3173 emit_jmp(0);
3174 // 2
3175 set_jump_target(case2,(int)out);
3176 emit_testimm(temp,1);
3177 case3=(int)out;
3178 emit_jne(0);
3179 if (opcode[i]==0x2A) { // SWL
3180 // Write two msb into two least significant bytes
3181 if(rs2[i]) emit_rorimm(tl,16,tl);
3182 emit_writehword_indexed(tl,-2,temp);
3183 if(rs2[i]) emit_rorimm(tl,16,tl);
3184 }
3185 if (opcode[i]==0x2E) { // SWR
3186 // Write 3 lsb into three most significant bytes
3187 emit_writebyte_indexed(tl,-1,temp);
3188 if(rs2[i]) emit_rorimm(tl,8,tl);
3189 emit_writehword_indexed(tl,0,temp);
3190 if(rs2[i]) emit_rorimm(tl,24,tl);
3191 }
3192 if (opcode[i]==0x2C) { // SDL
3193 if(rs2[i]) emit_shrdimm(tl,th,16,temp2);
3194 // Write two msb into two least significant bytes
3195 if(rs2[i]) emit_rorimm(th,16,th);
3196 emit_writehword_indexed(th,-2,temp);
3197 if(rs2[i]) emit_rorimm(th,16,th);
3198 }
3199 if (opcode[i]==0x2D) { // SDR
3200 if(rs2[i]) emit_shldimm(th,tl,8,temp2);
3201 // Write 3 lsb into three most significant bytes
3202 emit_writebyte_indexed(tl,-1,temp);
3203 if(rs2[i]) emit_rorimm(tl,8,tl);
3204 emit_writehword_indexed(tl,0,temp);
3205 if(rs2[i]) emit_rorimm(tl,24,tl);
3206 }
3207 done2=(int)out;
3208 emit_jmp(0);
3209 // 3
3210 set_jump_target(case3,(int)out);
3211 if (opcode[i]==0x2A) { // SWL
3212 // Write msb into least significant byte
3213 if(rs2[i]) emit_rorimm(tl,24,tl);
3214 emit_writebyte_indexed(tl,-3,temp);
3215 if(rs2[i]) emit_rorimm(tl,8,tl);
3216 }
3217 if (opcode[i]==0x2E) { // SWR
3218 // Write entire word
3219 emit_writeword_indexed(tl,-3,temp);
3220 }
3221 if (opcode[i]==0x2C) { // SDL
3222 if(rs2[i]) emit_shrdimm(tl,th,24,temp2);
3223 // Write msb into least significant byte
3224 if(rs2[i]) emit_rorimm(th,24,th);
3225 emit_writebyte_indexed(th,-3,temp);
3226 if(rs2[i]) emit_rorimm(th,8,th);
3227 }
3228 if (opcode[i]==0x2D) { // SDR
3229 if(rs2[i]) emit_mov(th,temp2);
3230 // Write entire word
3231 emit_writeword_indexed(tl,-3,temp);
3232 }
3233 set_jump_target(done0,(int)out);
3234 set_jump_target(done1,(int)out);
3235 set_jump_target(done2,(int)out);
3236 if (opcode[i]==0x2C) { // SDL
3237 emit_testimm(temp,4);
57871462 3238 done0=(int)out;
57871462 3239 emit_jne(0);
535d208a 3240 emit_andimm(temp,~3,temp);
3241 emit_writeword_indexed(temp2,4,temp);
3242 set_jump_target(done0,(int)out);
3243 }
3244 if (opcode[i]==0x2D) { // SDR
3245 emit_testimm(temp,4);
3246 done0=(int)out;
3247 emit_jeq(0);
3248 emit_andimm(temp,~3,temp);
3249 emit_writeword_indexed(temp2,-4,temp);
57871462 3250 set_jump_target(done0,(int)out);
57871462 3251 }
535d208a 3252 if(!c||!memtarget)
3253 add_stub(STORELR_STUB,jaddr,(int)out,i,(int)i_regs,temp,ccadj[i],reglist);
1edfcc68 3254 if(!(i_regs->waswritten&(1<<rs1[i]))&&!(new_dynarec_hacks&NDHACK_NO_SMC_CHECK)) {
535d208a 3255 #ifdef RAM_OFFSET
3256 int map=get_reg(i_regs->regmap,ROREG);
3257 if(map<0) map=HOST_TEMPREG;
3258 gen_orig_addr_w(temp,map);
3259 #else
57871462 3260 emit_addimm_no_flags((u_int)0x80000000-(u_int)rdram,temp);
535d208a 3261 #endif
57871462 3262 #if defined(HOST_IMM8)
3263 int ir=get_reg(i_regs->regmap,INVCP);
3264 assert(ir>=0);
3265 emit_cmpmem_indexedsr12_reg(ir,temp,1);
3266 #else
3267 emit_cmpmem_indexedsr12_imm((int)invalid_code,temp,1);
3268 #endif
535d208a 3269 #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT)
3270 emit_callne(invalidate_addr_reg[temp]);
3271 #else
581335b0 3272 int jaddr2=(int)out;
57871462 3273 emit_jne(0);
3274 add_stub(INVCODE_STUB,jaddr2,(int)out,reglist|(1<<HOST_CCREG),temp,0,0,0);
535d208a 3275 #endif
57871462 3276 }
3277 /*
3278 emit_pusha();
3279 //save_regs(0x100f);
3280 emit_readword((int)&last_count,ECX);
3281 if(get_reg(i_regs->regmap,CCREG)<0)
3282 emit_loadreg(CCREG,HOST_CCREG);
3283 emit_add(HOST_CCREG,ECX,HOST_CCREG);
3284 emit_addimm(HOST_CCREG,2*ccadj[i],HOST_CCREG);
3285 emit_writeword(HOST_CCREG,(int)&Count);
3286 emit_call((int)memdebug);
3287 emit_popa();
3288 //restore_regs(0x100f);
581335b0 3289 */
57871462 3290}
3291
3292void c1ls_assemble(int i,struct regstat *i_regs)
3293{
3d624f89 3294 cop1_unusable(i, i_regs);
57871462 3295}
3296
b9b61529 3297void c2ls_assemble(int i,struct regstat *i_regs)
3298{
3299 int s,tl;
3300 int ar;
3301 int offset;
1fd1aceb 3302 int memtarget=0,c=0;
581335b0 3303 int jaddr2=0,type;
b9b61529 3304 int agr=AGEN1+(i&1);
ffb0b9e0 3305 int fastio_reg_override=0;
b9b61529 3306 u_int hr,reglist=0;
3307 u_int copr=(source[i]>>16)&0x1f;
3308 s=get_reg(i_regs->regmap,rs1[i]);
3309 tl=get_reg(i_regs->regmap,FTEMP);
3310 offset=imm[i];
3311 assert(rs1[i]>0);
3312 assert(tl>=0);
b9b61529 3313
3314 for(hr=0;hr<HOST_REGS;hr++) {
3315 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3316 }
3317 if(i_regs->regmap[HOST_CCREG]==CCREG)
3318 reglist&=~(1<<HOST_CCREG);
3319
3320 // get the address
3321 if (opcode[i]==0x3a) { // SWC2
3322 ar=get_reg(i_regs->regmap,agr);
3323 if(ar<0) ar=get_reg(i_regs->regmap,-1);
3324 reglist|=1<<ar;
3325 } else { // LWC2
3326 ar=tl;
3327 }
1fd1aceb 3328 if(s>=0) c=(i_regs->wasconst>>s)&1;
3329 memtarget=c&&(((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE);
b9b61529 3330 if (!offset&&!c&&s>=0) ar=s;
3331 assert(ar>=0);
3332
3333 if (opcode[i]==0x3a) { // SWC2
3334 cop2_get_dreg(copr,tl,HOST_TEMPREG);
1fd1aceb 3335 type=STOREW_STUB;
b9b61529 3336 }
1fd1aceb 3337 else
b9b61529 3338 type=LOADW_STUB;
1fd1aceb 3339
3340 if(c&&!memtarget) {
3341 jaddr2=(int)out;
3342 emit_jmp(0); // inline_readstub/inline_writestub?
b9b61529 3343 }
1fd1aceb 3344 else {
3345 if(!c) {
ffb0b9e0 3346 jaddr2=emit_fastpath_cmp_jump(i,ar,&fastio_reg_override);
1fd1aceb 3347 }
a327ad27 3348 else if(ram_offset&&memtarget) {
3349 emit_addimm(ar,ram_offset,HOST_TEMPREG);
3350 fastio_reg_override=HOST_TEMPREG;
3351 }
1fd1aceb 3352 if (opcode[i]==0x32) { // LWC2
3353 #ifdef HOST_IMM_ADDR32
3354 if(c) emit_readword_tlb(constmap[i][s]+offset,-1,tl);
3355 else
3356 #endif
ffb0b9e0 3357 int a=ar;
3358 if(fastio_reg_override) a=fastio_reg_override;
3359 emit_readword_indexed(0,a,tl);
1fd1aceb 3360 }
3361 if (opcode[i]==0x3a) { // SWC2
3362 #ifdef DESTRUCTIVE_SHIFT
3363 if(!offset&&!c&&s>=0) emit_mov(s,ar);
3364 #endif
ffb0b9e0 3365 int a=ar;
3366 if(fastio_reg_override) a=fastio_reg_override;
3367 emit_writeword_indexed(tl,0,a);
1fd1aceb 3368 }
b9b61529 3369 }
3370 if(jaddr2)
3371 add_stub(type,jaddr2,(int)out,i,ar,(int)i_regs,ccadj[i],reglist);
0ff8c62c 3372 if(opcode[i]==0x3a) // SWC2
3373 if(!(i_regs->waswritten&(1<<rs1[i]))&&!(new_dynarec_hacks&NDHACK_NO_SMC_CHECK)) {
b9b61529 3374#if defined(HOST_IMM8)
3375 int ir=get_reg(i_regs->regmap,INVCP);
3376 assert(ir>=0);
3377 emit_cmpmem_indexedsr12_reg(ir,ar,1);
3378#else
3379 emit_cmpmem_indexedsr12_imm((int)invalid_code,ar,1);
3380#endif
0bbd1454 3381 #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT)
3382 emit_callne(invalidate_addr_reg[ar]);
3383 #else
581335b0 3384 int jaddr3=(int)out;
b9b61529 3385 emit_jne(0);
3386 add_stub(INVCODE_STUB,jaddr3,(int)out,reglist|(1<<HOST_CCREG),ar,0,0,0);
0bbd1454 3387 #endif
b9b61529 3388 }
3389 if (opcode[i]==0x32) { // LWC2
3390 cop2_put_dreg(copr,tl,HOST_TEMPREG);
3391 }
3392}
3393
57871462 3394#ifndef multdiv_assemble
3395void multdiv_assemble(int i,struct regstat *i_regs)
3396{
3397 printf("Need multdiv_assemble for this architecture.\n");
3398 exit(1);
3399}
3400#endif
3401
3402void mov_assemble(int i,struct regstat *i_regs)
3403{
3404 //if(opcode2[i]==0x10||opcode2[i]==0x12) { // MFHI/MFLO
3405 //if(opcode2[i]==0x11||opcode2[i]==0x13) { // MTHI/MTLO
57871462 3406 if(rt1[i]) {
3407 signed char sh,sl,th,tl;
3408 th=get_reg(i_regs->regmap,rt1[i]|64);
3409 tl=get_reg(i_regs->regmap,rt1[i]);
3410 //assert(tl>=0);
3411 if(tl>=0) {
3412 sh=get_reg(i_regs->regmap,rs1[i]|64);
3413 sl=get_reg(i_regs->regmap,rs1[i]);
3414 if(sl>=0) emit_mov(sl,tl);
3415 else emit_loadreg(rs1[i],tl);
3416 if(th>=0) {
3417 if(sh>=0) emit_mov(sh,th);
3418 else emit_loadreg(rs1[i]|64,th);
3419 }
3420 }
3421 }
3422}
3423
3424#ifndef fconv_assemble
3425void fconv_assemble(int i,struct regstat *i_regs)
3426{
3427 printf("Need fconv_assemble for this architecture.\n");
3428 exit(1);
3429}
3430#endif
3431
3432#if 0
3433void float_assemble(int i,struct regstat *i_regs)
3434{
3435 printf("Need float_assemble for this architecture.\n");
3436 exit(1);
3437}
3438#endif
3439
3440void syscall_assemble(int i,struct regstat *i_regs)
3441{
3442 signed char ccreg=get_reg(i_regs->regmap,CCREG);
3443 assert(ccreg==HOST_CCREG);
3444 assert(!is_delayslot);
581335b0 3445 (void)ccreg;
57871462 3446 emit_movimm(start+i*4,EAX); // Get PC
2573466a 3447 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
7139f3c8 3448 emit_jmp((int)jump_syscall_hle); // XXX
3449}
3450
3451void hlecall_assemble(int i,struct regstat *i_regs)
3452{
3453 signed char ccreg=get_reg(i_regs->regmap,CCREG);
3454 assert(ccreg==HOST_CCREG);
3455 assert(!is_delayslot);
581335b0 3456 (void)ccreg;
7139f3c8 3457 emit_movimm(start+i*4+4,0); // Get PC
67ba0fb4 3458 emit_movimm((int)psxHLEt[source[i]&7],1);
2573466a 3459 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // XXX
67ba0fb4 3460 emit_jmp((int)jump_hlecall);
57871462 3461}
3462
1e973cb0 3463void intcall_assemble(int i,struct regstat *i_regs)
3464{
3465 signed char ccreg=get_reg(i_regs->regmap,CCREG);
3466 assert(ccreg==HOST_CCREG);
3467 assert(!is_delayslot);
581335b0 3468 (void)ccreg;
1e973cb0 3469 emit_movimm(start+i*4,0); // Get PC
2573466a 3470 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
1e973cb0 3471 emit_jmp((int)jump_intcall);
3472}
3473
57871462 3474void ds_assemble(int i,struct regstat *i_regs)
3475{
ffb0b9e0 3476 speculate_register_values(i);
57871462 3477 is_delayslot=1;
3478 switch(itype[i]) {
3479 case ALU:
3480 alu_assemble(i,i_regs);break;
3481 case IMM16:
3482 imm16_assemble(i,i_regs);break;
3483 case SHIFT:
3484 shift_assemble(i,i_regs);break;
3485 case SHIFTIMM:
3486 shiftimm_assemble(i,i_regs);break;
3487 case LOAD:
3488 load_assemble(i,i_regs);break;
3489 case LOADLR:
3490 loadlr_assemble(i,i_regs);break;
3491 case STORE:
3492 store_assemble(i,i_regs);break;
3493 case STORELR:
3494 storelr_assemble(i,i_regs);break;
3495 case COP0:
3496 cop0_assemble(i,i_regs);break;
3497 case COP1:
3498 cop1_assemble(i,i_regs);break;
3499 case C1LS:
3500 c1ls_assemble(i,i_regs);break;
b9b61529 3501 case COP2:
3502 cop2_assemble(i,i_regs);break;
3503 case C2LS:
3504 c2ls_assemble(i,i_regs);break;
3505 case C2OP:
3506 c2op_assemble(i,i_regs);break;
57871462 3507 case FCONV:
3508 fconv_assemble(i,i_regs);break;
3509 case FLOAT:
3510 float_assemble(i,i_regs);break;
3511 case FCOMP:
3512 fcomp_assemble(i,i_regs);break;
3513 case MULTDIV:
3514 multdiv_assemble(i,i_regs);break;
3515 case MOV:
3516 mov_assemble(i,i_regs);break;
3517 case SYSCALL:
7139f3c8 3518 case HLECALL:
1e973cb0 3519 case INTCALL:
57871462 3520 case SPAN:
3521 case UJUMP:
3522 case RJUMP:
3523 case CJUMP:
3524 case SJUMP:
3525 case FJUMP:
c43b5311 3526 SysPrintf("Jump in the delay slot. This is probably a bug.\n");
57871462 3527 }
3528 is_delayslot=0;
3529}
3530
3531// Is the branch target a valid internal jump?
3532int internal_branch(uint64_t i_is32,int addr)
3533{
3534 if(addr&1) return 0; // Indirect (register) jump
3535 if(addr>=start && addr<start+slen*4-4)
3536 {
71e490c5 3537 //int t=(addr-start)>>2;
57871462 3538 // Delay slots are not valid branch targets
3539 //if(t>0&&(itype[t-1]==RJUMP||itype[t-1]==UJUMP||itype[t-1]==CJUMP||itype[t-1]==SJUMP||itype[t-1]==FJUMP)) return 0;
3540 // 64 -> 32 bit transition requires a recompile
3541 /*if(is32[t]&~unneeded_reg_upper[t]&~i_is32)
3542 {
3543 if(requires_32bit[t]&~i_is32) printf("optimizable: no\n");
3544 else printf("optimizable: yes\n");
3545 }*/
3546 //if(is32[t]&~unneeded_reg_upper[t]&~i_is32) return 0;
71e490c5 3547 return 1;
57871462 3548 }
3549 return 0;
3550}
3551
3552#ifndef wb_invalidate
3553void wb_invalidate(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,
3554 uint64_t u,uint64_t uu)
3555{
3556 int hr;
3557 for(hr=0;hr<HOST_REGS;hr++) {
3558 if(hr!=EXCLUDE_REG) {
3559 if(pre[hr]!=entry[hr]) {
3560 if(pre[hr]>=0) {
3561 if((dirty>>hr)&1) {
3562 if(get_reg(entry,pre[hr])<0) {
3563 if(pre[hr]<64) {
3564 if(!((u>>pre[hr])&1)) {
3565 emit_storereg(pre[hr],hr);
3566 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
3567 emit_sarimm(hr,31,hr);
3568 emit_storereg(pre[hr]|64,hr);
3569 }
3570 }
3571 }else{
3572 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
3573 emit_storereg(pre[hr],hr);
3574 }
3575 }
3576 }
3577 }
3578 }
3579 }
3580 }
3581 }
3582 // Move from one register to another (no writeback)
3583 for(hr=0;hr<HOST_REGS;hr++) {
3584 if(hr!=EXCLUDE_REG) {
3585 if(pre[hr]!=entry[hr]) {
3586 if(pre[hr]>=0&&(pre[hr]&63)<TEMPREG) {
3587 int nr;
3588 if((nr=get_reg(entry,pre[hr]))>=0) {
3589 emit_mov(hr,nr);
3590 }
3591 }
3592 }
3593 }
3594 }
3595}
3596#endif
3597
3598// Load the specified registers
3599// This only loads the registers given as arguments because
3600// we don't want to load things that will be overwritten
3601void load_regs(signed char entry[],signed char regmap[],int is32,int rs1,int rs2)
3602{
3603 int hr;
3604 // Load 32-bit regs
3605 for(hr=0;hr<HOST_REGS;hr++) {
3606 if(hr!=EXCLUDE_REG&&regmap[hr]>=0) {
3607 if(entry[hr]!=regmap[hr]) {
3608 if(regmap[hr]==rs1||regmap[hr]==rs2)
3609 {
3610 if(regmap[hr]==0) {
3611 emit_zeroreg(hr);
3612 }
3613 else
3614 {
3615 emit_loadreg(regmap[hr],hr);
3616 }
3617 }
3618 }
3619 }
3620 }
3621 //Load 64-bit regs
3622 for(hr=0;hr<HOST_REGS;hr++) {
3623 if(hr!=EXCLUDE_REG&&regmap[hr]>=0) {
3624 if(entry[hr]!=regmap[hr]) {
3625 if(regmap[hr]-64==rs1||regmap[hr]-64==rs2)
3626 {
3627 assert(regmap[hr]!=64);
3628 if((is32>>(regmap[hr]&63))&1) {
3629 int lr=get_reg(regmap,regmap[hr]-64);
3630 if(lr>=0)
3631 emit_sarimm(lr,31,hr);
3632 else
3633 emit_loadreg(regmap[hr],hr);
3634 }
3635 else
3636 {
3637 emit_loadreg(regmap[hr],hr);
3638 }
3639 }
3640 }
3641 }
3642 }
3643}
3644
3645// Load registers prior to the start of a loop
3646// so that they are not loaded within the loop
3647static void loop_preload(signed char pre[],signed char entry[])
3648{
3649 int hr;
3650 for(hr=0;hr<HOST_REGS;hr++) {
3651 if(hr!=EXCLUDE_REG) {
3652 if(pre[hr]!=entry[hr]) {
3653 if(entry[hr]>=0) {
3654 if(get_reg(pre,entry[hr])<0) {
3655 assem_debug("loop preload:\n");
3656 //printf("loop preload: %d\n",hr);
3657 if(entry[hr]==0) {
3658 emit_zeroreg(hr);
3659 }
3660 else if(entry[hr]<TEMPREG)
3661 {
3662 emit_loadreg(entry[hr],hr);
3663 }
3664 else if(entry[hr]-64<TEMPREG)
3665 {
3666 emit_loadreg(entry[hr],hr);
3667 }
3668 }
3669 }
3670 }
3671 }
3672 }
3673}
3674
3675// Generate address for load/store instruction
b9b61529 3676// goes to AGEN for writes, FTEMP for LOADLR and cop1/2 loads
57871462 3677void address_generation(int i,struct regstat *i_regs,signed char entry[])
3678{
b9b61529 3679 if(itype[i]==LOAD||itype[i]==LOADLR||itype[i]==STORE||itype[i]==STORELR||itype[i]==C1LS||itype[i]==C2LS) {
5194fb95 3680 int ra=-1;
57871462 3681 int agr=AGEN1+(i&1);
57871462 3682 if(itype[i]==LOAD) {
3683 ra=get_reg(i_regs->regmap,rt1[i]);
9f51b4b9 3684 if(ra<0) ra=get_reg(i_regs->regmap,-1);
535d208a 3685 assert(ra>=0);
57871462 3686 }
3687 if(itype[i]==LOADLR) {
3688 ra=get_reg(i_regs->regmap,FTEMP);
3689 }
3690 if(itype[i]==STORE||itype[i]==STORELR) {
3691 ra=get_reg(i_regs->regmap,agr);
3692 if(ra<0) ra=get_reg(i_regs->regmap,-1);
3693 }
b9b61529 3694 if(itype[i]==C1LS||itype[i]==C2LS) {
3695 if ((opcode[i]&0x3b)==0x31||(opcode[i]&0x3b)==0x32) // LWC1/LDC1/LWC2/LDC2
57871462 3696 ra=get_reg(i_regs->regmap,FTEMP);
1fd1aceb 3697 else { // SWC1/SDC1/SWC2/SDC2
57871462 3698 ra=get_reg(i_regs->regmap,agr);
3699 if(ra<0) ra=get_reg(i_regs->regmap,-1);
3700 }
3701 }
3702 int rs=get_reg(i_regs->regmap,rs1[i]);
57871462 3703 if(ra>=0) {
3704 int offset=imm[i];
3705 int c=(i_regs->wasconst>>rs)&1;
3706 if(rs1[i]==0) {
3707 // Using r0 as a base address
57871462 3708 if(!entry||entry[ra]!=agr) {
3709 if (opcode[i]==0x22||opcode[i]==0x26) {
3710 emit_movimm(offset&0xFFFFFFFC,ra); // LWL/LWR
3711 }else if (opcode[i]==0x1a||opcode[i]==0x1b) {
3712 emit_movimm(offset&0xFFFFFFF8,ra); // LDL/LDR
3713 }else{
3714 emit_movimm(offset,ra);
3715 }
3716 } // else did it in the previous cycle
3717 }
3718 else if(rs<0) {
3719 if(!entry||entry[ra]!=rs1[i])
3720 emit_loadreg(rs1[i],ra);
3721 //if(!entry||entry[ra]!=rs1[i])
3722 // printf("poor load scheduling!\n");
3723 }
3724 else if(c) {
57871462 3725 if(rs1[i]!=rt1[i]||itype[i]!=LOAD) {
3726 if(!entry||entry[ra]!=agr) {
3727 if (opcode[i]==0x22||opcode[i]==0x26) {
3728 emit_movimm((constmap[i][rs]+offset)&0xFFFFFFFC,ra); // LWL/LWR
3729 }else if (opcode[i]==0x1a||opcode[i]==0x1b) {
3730 emit_movimm((constmap[i][rs]+offset)&0xFFFFFFF8,ra); // LDL/LDR
3731 }else{
3732 #ifdef HOST_IMM_ADDR32
1edfcc68 3733 if((itype[i]!=LOAD&&(opcode[i]&0x3b)!=0x31&&(opcode[i]&0x3b)!=0x32)) // LWC1/LDC1/LWC2/LDC2
57871462 3734 #endif
3735 emit_movimm(constmap[i][rs]+offset,ra);
8575a877 3736 regs[i].loadedconst|=1<<ra;
57871462 3737 }
3738 } // else did it in the previous cycle
3739 } // else load_consts already did it
3740 }
3741 if(offset&&!c&&rs1[i]) {
3742 if(rs>=0) {
3743 emit_addimm(rs,offset,ra);
3744 }else{
3745 emit_addimm(ra,offset,ra);
3746 }
3747 }
3748 }
3749 }
3750 // Preload constants for next instruction
b9b61529 3751 if(itype[i+1]==LOAD||itype[i+1]==LOADLR||itype[i+1]==STORE||itype[i+1]==STORELR||itype[i+1]==C1LS||itype[i+1]==C2LS) {
57871462 3752 int agr,ra;
57871462 3753 // Actual address
3754 agr=AGEN1+((i+1)&1);
3755 ra=get_reg(i_regs->regmap,agr);
3756 if(ra>=0) {
3757 int rs=get_reg(regs[i+1].regmap,rs1[i+1]);
3758 int offset=imm[i+1];
3759 int c=(regs[i+1].wasconst>>rs)&1;
3760 if(c&&(rs1[i+1]!=rt1[i+1]||itype[i+1]!=LOAD)) {
3761 if (opcode[i+1]==0x22||opcode[i+1]==0x26) {
3762 emit_movimm((constmap[i+1][rs]+offset)&0xFFFFFFFC,ra); // LWL/LWR
3763 }else if (opcode[i+1]==0x1a||opcode[i+1]==0x1b) {
3764 emit_movimm((constmap[i+1][rs]+offset)&0xFFFFFFF8,ra); // LDL/LDR
3765 }else{
3766 #ifdef HOST_IMM_ADDR32
1edfcc68 3767 if((itype[i+1]!=LOAD&&(opcode[i+1]&0x3b)!=0x31&&(opcode[i+1]&0x3b)!=0x32)) // LWC1/LDC1/LWC2/LDC2
57871462 3768 #endif
3769 emit_movimm(constmap[i+1][rs]+offset,ra);
8575a877 3770 regs[i+1].loadedconst|=1<<ra;
57871462 3771 }
3772 }
3773 else if(rs1[i+1]==0) {
3774 // Using r0 as a base address
3775 if (opcode[i+1]==0x22||opcode[i+1]==0x26) {
3776 emit_movimm(offset&0xFFFFFFFC,ra); // LWL/LWR
3777 }else if (opcode[i+1]==0x1a||opcode[i+1]==0x1b) {
3778 emit_movimm(offset&0xFFFFFFF8,ra); // LDL/LDR
3779 }else{
3780 emit_movimm(offset,ra);
3781 }
3782 }
3783 }
3784 }
3785}
3786
e2b5e7aa 3787static int get_final_value(int hr, int i, int *value)
57871462 3788{
3789 int reg=regs[i].regmap[hr];
3790 while(i<slen-1) {
3791 if(regs[i+1].regmap[hr]!=reg) break;
3792 if(!((regs[i+1].isconst>>hr)&1)) break;
3793 if(bt[i+1]) break;
3794 i++;
3795 }
3796 if(i<slen-1) {
3797 if(itype[i]==UJUMP||itype[i]==RJUMP||itype[i]==CJUMP||itype[i]==SJUMP) {
3798 *value=constmap[i][hr];
3799 return 1;
3800 }
3801 if(!bt[i+1]) {
3802 if(itype[i+1]==UJUMP||itype[i+1]==RJUMP||itype[i+1]==CJUMP||itype[i+1]==SJUMP) {
3803 // Load in delay slot, out-of-order execution
3804 if(itype[i+2]==LOAD&&rs1[i+2]==reg&&rt1[i+2]==reg&&((regs[i+1].wasconst>>hr)&1))
3805 {
57871462 3806 // Precompute load address
3807 *value=constmap[i][hr]+imm[i+2];
3808 return 1;
3809 }
3810 }
3811 if(itype[i+1]==LOAD&&rs1[i+1]==reg&&rt1[i+1]==reg)
3812 {
57871462 3813 // Precompute load address
3814 *value=constmap[i][hr]+imm[i+1];
3815 //printf("c=%x imm=%x\n",(int)constmap[i][hr],imm[i+1]);
3816 return 1;
3817 }
3818 }
3819 }
3820 *value=constmap[i][hr];
3821 //printf("c=%x\n",(int)constmap[i][hr]);
3822 if(i==slen-1) return 1;
3823 if(reg<64) {
3824 return !((unneeded_reg[i+1]>>reg)&1);
3825 }else{
3826 return !((unneeded_reg_upper[i+1]>>reg)&1);
3827 }
3828}
3829
3830// Load registers with known constants
3831void load_consts(signed char pre[],signed char regmap[],int is32,int i)
3832{
8575a877 3833 int hr,hr2;
3834 // propagate loaded constant flags
3835 if(i==0||bt[i])
3836 regs[i].loadedconst=0;
3837 else {
3838 for(hr=0;hr<HOST_REGS;hr++) {
3839 if(hr!=EXCLUDE_REG&&regmap[hr]>=0&&((regs[i-1].isconst>>hr)&1)&&pre[hr]==regmap[hr]
3840 &&regmap[hr]==regs[i-1].regmap[hr]&&((regs[i-1].loadedconst>>hr)&1))
3841 {
3842 regs[i].loadedconst|=1<<hr;
3843 }
3844 }
3845 }
57871462 3846 // Load 32-bit regs
3847 for(hr=0;hr<HOST_REGS;hr++) {
3848 if(hr!=EXCLUDE_REG&&regmap[hr]>=0) {
3849 //if(entry[hr]!=regmap[hr]) {
8575a877 3850 if(!((regs[i].loadedconst>>hr)&1)) {
57871462 3851 if(((regs[i].isconst>>hr)&1)&&regmap[hr]<64&&regmap[hr]>0) {
8575a877 3852 int value,similar=0;
57871462 3853 if(get_final_value(hr,i,&value)) {
8575a877 3854 // see if some other register has similar value
3855 for(hr2=0;hr2<HOST_REGS;hr2++) {
3856 if(hr2!=EXCLUDE_REG&&((regs[i].loadedconst>>hr2)&1)) {
3857 if(is_similar_value(value,constmap[i][hr2])) {
3858 similar=1;
3859 break;
3860 }
3861 }
3862 }
3863 if(similar) {
3864 int value2;
3865 if(get_final_value(hr2,i,&value2)) // is this needed?
3866 emit_movimm_from(value2,hr2,value,hr);
3867 else
3868 emit_movimm(value,hr);
3869 }
3870 else if(value==0) {
57871462 3871 emit_zeroreg(hr);
3872 }
3873 else {
3874 emit_movimm(value,hr);
3875 }
3876 }
8575a877 3877 regs[i].loadedconst|=1<<hr;
57871462 3878 }
3879 }
3880 }
3881 }
3882 // Load 64-bit regs
3883 for(hr=0;hr<HOST_REGS;hr++) {
3884 if(hr!=EXCLUDE_REG&&regmap[hr]>=0) {
3885 //if(entry[hr]!=regmap[hr]) {
3886 if(i==0||!((regs[i-1].isconst>>hr)&1)||pre[hr]!=regmap[hr]||bt[i]) {
3887 if(((regs[i].isconst>>hr)&1)&&regmap[hr]>64) {
3888 if((is32>>(regmap[hr]&63))&1) {
3889 int lr=get_reg(regmap,regmap[hr]-64);
3890 assert(lr>=0);
3891 emit_sarimm(lr,31,hr);
3892 }
3893 else
3894 {
3895 int value;
3896 if(get_final_value(hr,i,&value)) {
3897 if(value==0) {
3898 emit_zeroreg(hr);
3899 }
3900 else {
3901 emit_movimm(value,hr);
3902 }
3903 }
3904 }
3905 }
3906 }
3907 }
3908 }
3909}
3910void load_all_consts(signed char regmap[],int is32,u_int dirty,int i)
3911{
3912 int hr;
3913 // Load 32-bit regs
3914 for(hr=0;hr<HOST_REGS;hr++) {
3915 if(hr!=EXCLUDE_REG&&regmap[hr]>=0&&((dirty>>hr)&1)) {
3916 if(((regs[i].isconst>>hr)&1)&&regmap[hr]<64&&regmap[hr]>0) {
3917 int value=constmap[i][hr];
3918 if(value==0) {
3919 emit_zeroreg(hr);
3920 }
3921 else {
3922 emit_movimm(value,hr);
3923 }
3924 }
3925 }
3926 }
3927 // Load 64-bit regs
3928 for(hr=0;hr<HOST_REGS;hr++) {
3929 if(hr!=EXCLUDE_REG&&regmap[hr]>=0&&((dirty>>hr)&1)) {
3930 if(((regs[i].isconst>>hr)&1)&&regmap[hr]>64) {
3931 if((is32>>(regmap[hr]&63))&1) {
3932 int lr=get_reg(regmap,regmap[hr]-64);
3933 assert(lr>=0);
3934 emit_sarimm(lr,31,hr);
3935 }
3936 else
3937 {
3938 int value=constmap[i][hr];
3939 if(value==0) {
3940 emit_zeroreg(hr);
3941 }
3942 else {
3943 emit_movimm(value,hr);
3944 }
3945 }
3946 }
3947 }
3948 }
3949}
3950
3951// Write out all dirty registers (except cycle count)
3952void wb_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty)
3953{
3954 int hr;
3955 for(hr=0;hr<HOST_REGS;hr++) {
3956 if(hr!=EXCLUDE_REG) {
3957 if(i_regmap[hr]>0) {
3958 if(i_regmap[hr]!=CCREG) {
3959 if((i_dirty>>hr)&1) {
3960 if(i_regmap[hr]<64) {
3961 emit_storereg(i_regmap[hr],hr);
57871462 3962 }else{
3963 if( !((i_is32>>(i_regmap[hr]&63))&1) ) {
3964 emit_storereg(i_regmap[hr],hr);
3965 }
3966 }
3967 }
3968 }
3969 }
3970 }
3971 }
3972}
3973// Write out dirty registers that we need to reload (pair with load_needed_regs)
3974// This writes the registers not written by store_regs_bt
3975void wb_needed_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr)
3976{
3977 int hr;
3978 int t=(addr-start)>>2;
3979 for(hr=0;hr<HOST_REGS;hr++) {
3980 if(hr!=EXCLUDE_REG) {
3981 if(i_regmap[hr]>0) {
3982 if(i_regmap[hr]!=CCREG) {
3983 if(i_regmap[hr]==regs[t].regmap_entry[hr] && ((regs[t].dirty>>hr)&1) && !(((i_is32&~regs[t].was32&~unneeded_reg_upper[t])>>(i_regmap[hr]&63))&1)) {
3984 if((i_dirty>>hr)&1) {
3985 if(i_regmap[hr]<64) {
3986 emit_storereg(i_regmap[hr],hr);
57871462 3987 }else{
3988 if( !((i_is32>>(i_regmap[hr]&63))&1) ) {
3989 emit_storereg(i_regmap[hr],hr);
3990 }
3991 }
3992 }
3993 }
3994 }
3995 }
3996 }
3997 }
3998}
3999
4000// Load all registers (except cycle count)
4001void load_all_regs(signed char i_regmap[])
4002{
4003 int hr;
4004 for(hr=0;hr<HOST_REGS;hr++) {
4005 if(hr!=EXCLUDE_REG) {
4006 if(i_regmap[hr]==0) {
4007 emit_zeroreg(hr);
4008 }
4009 else
ea3d2e6e 4010 if(i_regmap[hr]>0 && (i_regmap[hr]&63)<TEMPREG && i_regmap[hr]!=CCREG)
57871462 4011 {
4012 emit_loadreg(i_regmap[hr],hr);
4013 }
4014 }
4015 }
4016}
4017
4018// Load all current registers also needed by next instruction
4019void load_needed_regs(signed char i_regmap[],signed char next_regmap[])
4020{
4021 int hr;
4022 for(hr=0;hr<HOST_REGS;hr++) {
4023 if(hr!=EXCLUDE_REG) {
4024 if(get_reg(next_regmap,i_regmap[hr])>=0) {
4025 if(i_regmap[hr]==0) {
4026 emit_zeroreg(hr);
4027 }
4028 else
ea3d2e6e 4029 if(i_regmap[hr]>0 && (i_regmap[hr]&63)<TEMPREG && i_regmap[hr]!=CCREG)
57871462 4030 {
4031 emit_loadreg(i_regmap[hr],hr);
4032 }
4033 }
4034 }
4035 }
4036}
4037
4038// Load all regs, storing cycle count if necessary
4039void load_regs_entry(int t)
4040{
4041 int hr;
2573466a 4042 if(is_ds[t]) emit_addimm(HOST_CCREG,CLOCK_ADJUST(1),HOST_CCREG);
4043 else if(ccadj[t]) emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[t]),HOST_CCREG);
57871462 4044 if(regs[t].regmap_entry[HOST_CCREG]!=CCREG) {
4045 emit_storereg(CCREG,HOST_CCREG);
4046 }
4047 // Load 32-bit regs
4048 for(hr=0;hr<HOST_REGS;hr++) {
ea3d2e6e 4049 if(regs[t].regmap_entry[hr]>=0&&regs[t].regmap_entry[hr]<TEMPREG) {
57871462 4050 if(regs[t].regmap_entry[hr]==0) {
4051 emit_zeroreg(hr);
4052 }
4053 else if(regs[t].regmap_entry[hr]!=CCREG)
4054 {
4055 emit_loadreg(regs[t].regmap_entry[hr],hr);
4056 }
4057 }
4058 }
4059 // Load 64-bit regs
4060 for(hr=0;hr<HOST_REGS;hr++) {
ea3d2e6e 4061 if(regs[t].regmap_entry[hr]>=64&&regs[t].regmap_entry[hr]<TEMPREG+64) {
57871462 4062 assert(regs[t].regmap_entry[hr]!=64);
4063 if((regs[t].was32>>(regs[t].regmap_entry[hr]&63))&1) {
4064 int lr=get_reg(regs[t].regmap_entry,regs[t].regmap_entry[hr]-64);
4065 if(lr<0) {
4066 emit_loadreg(regs[t].regmap_entry[hr],hr);
4067 }
4068 else
4069 {
4070 emit_sarimm(lr,31,hr);
4071 }
4072 }
4073 else
4074 {
4075 emit_loadreg(regs[t].regmap_entry[hr],hr);
4076 }
4077 }
4078 }
4079}
4080
4081// Store dirty registers prior to branch
4082void store_regs_bt(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr)
4083{
4084 if(internal_branch(i_is32,addr))
4085 {
4086 int t=(addr-start)>>2;
4087 int hr;
4088 for(hr=0;hr<HOST_REGS;hr++) {
4089 if(hr!=EXCLUDE_REG) {
4090 if(i_regmap[hr]>0 && i_regmap[hr]!=CCREG) {
4091 if(i_regmap[hr]!=regs[t].regmap_entry[hr] || !((regs[t].dirty>>hr)&1) || (((i_is32&~regs[t].was32&~unneeded_reg_upper[t])>>(i_regmap[hr]&63))&1)) {
4092 if((i_dirty>>hr)&1) {
4093 if(i_regmap[hr]<64) {
4094 if(!((unneeded_reg[t]>>i_regmap[hr])&1)) {
4095 emit_storereg(i_regmap[hr],hr);
4096 if( ((i_is32>>i_regmap[hr])&1) && !((unneeded_reg_upper[t]>>i_regmap[hr])&1) ) {
4097 #ifdef DESTRUCTIVE_WRITEBACK
4098 emit_sarimm(hr,31,hr);
4099 emit_storereg(i_regmap[hr]|64,hr);
4100 #else
4101 emit_sarimm(hr,31,HOST_TEMPREG);
4102 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
4103 #endif
4104 }
4105 }
4106 }else{
4107 if( !((i_is32>>(i_regmap[hr]&63))&1) && !((unneeded_reg_upper[t]>>(i_regmap[hr]&63))&1) ) {
4108 emit_storereg(i_regmap[hr],hr);
4109 }
4110 }
4111 }
4112 }
4113 }
4114 }
4115 }
4116 }
4117 else
4118 {
4119 // Branch out of this block, write out all dirty regs
4120 wb_dirtys(i_regmap,i_is32,i_dirty);
4121 }
4122}
4123
4124// Load all needed registers for branch target
4125void load_regs_bt(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr)
4126{
4127 //if(addr>=start && addr<(start+slen*4))
4128 if(internal_branch(i_is32,addr))
4129 {
4130 int t=(addr-start)>>2;
4131 int hr;
4132 // Store the cycle count before loading something else
4133 if(i_regmap[HOST_CCREG]!=CCREG) {
4134 assert(i_regmap[HOST_CCREG]==-1);
4135 }
4136 if(regs[t].regmap_entry[HOST_CCREG]!=CCREG) {
4137 emit_storereg(CCREG,HOST_CCREG);
4138 }
4139 // Load 32-bit regs
4140 for(hr=0;hr<HOST_REGS;hr++) {
ea3d2e6e 4141 if(hr!=EXCLUDE_REG&&regs[t].regmap_entry[hr]>=0&&regs[t].regmap_entry[hr]<TEMPREG) {
57871462 4142 #ifdef DESTRUCTIVE_WRITEBACK
4143 if(i_regmap[hr]!=regs[t].regmap_entry[hr] || ( !((regs[t].dirty>>hr)&1) && ((i_dirty>>hr)&1) && (((i_is32&~unneeded_reg_upper[t])>>i_regmap[hr])&1) ) || (((i_is32&~regs[t].was32&~unneeded_reg_upper[t])>>(i_regmap[hr]&63))&1)) {
4144 #else
4145 if(i_regmap[hr]!=regs[t].regmap_entry[hr] ) {
4146 #endif
4147 if(regs[t].regmap_entry[hr]==0) {
4148 emit_zeroreg(hr);
4149 }
4150 else if(regs[t].regmap_entry[hr]!=CCREG)
4151 {
4152 emit_loadreg(regs[t].regmap_entry[hr],hr);
4153 }
4154 }
4155 }
4156 }
4157 //Load 64-bit regs
4158 for(hr=0;hr<HOST_REGS;hr++) {
ea3d2e6e 4159 if(hr!=EXCLUDE_REG&&regs[t].regmap_entry[hr]>=64&&regs[t].regmap_entry[hr]<TEMPREG+64) {
57871462 4160 if(i_regmap[hr]!=regs[t].regmap_entry[hr]) {
4161 assert(regs[t].regmap_entry[hr]!=64);
4162 if((i_is32>>(regs[t].regmap_entry[hr]&63))&1) {
4163 int lr=get_reg(regs[t].regmap_entry,regs[t].regmap_entry[hr]-64);
4164 if(lr<0) {
4165 emit_loadreg(regs[t].regmap_entry[hr],hr);
4166 }
4167 else
4168 {
4169 emit_sarimm(lr,31,hr);
4170 }
4171 }
4172 else
4173 {
4174 emit_loadreg(regs[t].regmap_entry[hr],hr);
4175 }
4176 }
4177 else if((i_is32>>(regs[t].regmap_entry[hr]&63))&1) {
4178 int lr=get_reg(regs[t].regmap_entry,regs[t].regmap_entry[hr]-64);
4179 assert(lr>=0);
4180 emit_sarimm(lr,31,hr);
4181 }
4182 }
4183 }
4184 }
4185}
4186
4187int match_bt(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr)
4188{
4189 if(addr>=start && addr<start+slen*4-4)
4190 {
4191 int t=(addr-start)>>2;
4192 int hr;
4193 if(regs[t].regmap_entry[HOST_CCREG]!=CCREG) return 0;
4194 for(hr=0;hr<HOST_REGS;hr++)
4195 {
4196 if(hr!=EXCLUDE_REG)
4197 {
4198 if(i_regmap[hr]!=regs[t].regmap_entry[hr])
4199 {
ea3d2e6e 4200 if(regs[t].regmap_entry[hr]>=0&&(regs[t].regmap_entry[hr]|64)<TEMPREG+64)
57871462 4201 {
4202 return 0;
4203 }
9f51b4b9 4204 else
57871462 4205 if((i_dirty>>hr)&1)
4206 {
ea3d2e6e 4207 if(i_regmap[hr]<TEMPREG)
57871462 4208 {
4209 if(!((unneeded_reg[t]>>i_regmap[hr])&1))
4210 return 0;
4211 }
ea3d2e6e 4212 else if(i_regmap[hr]>=64&&i_regmap[hr]<TEMPREG+64)
57871462 4213 {
4214 if(!((unneeded_reg_upper[t]>>(i_regmap[hr]&63))&1))
4215 return 0;
4216 }
4217 }
4218 }
4219 else // Same register but is it 32-bit or dirty?
4220 if(i_regmap[hr]>=0)
4221 {
4222 if(!((regs[t].dirty>>hr)&1))
4223 {
4224 if((i_dirty>>hr)&1)
4225 {
4226 if(!((unneeded_reg[t]>>i_regmap[hr])&1))
4227 {
4228 //printf("%x: dirty no match\n",addr);
4229 return 0;
4230 }
4231 }
4232 }
4233 if((((regs[t].was32^i_is32)&~unneeded_reg_upper[t])>>(i_regmap[hr]&63))&1)
4234 {
4235 //printf("%x: is32 no match\n",addr);
4236 return 0;
4237 }
4238 }
4239 }
4240 }
4241 //if(is32[t]&~unneeded_reg_upper[t]&~i_is32) return 0;
57871462 4242 // Delay slots are not valid branch targets
4243 //if(t>0&&(itype[t-1]==RJUMP||itype[t-1]==UJUMP||itype[t-1]==CJUMP||itype[t-1]==SJUMP||itype[t-1]==FJUMP)) return 0;
4244 // Delay slots require additional processing, so do not match
4245 if(is_ds[t]) return 0;
4246 }
4247 else
4248 {
4249 int hr;
4250 for(hr=0;hr<HOST_REGS;hr++)
4251 {
4252 if(hr!=EXCLUDE_REG)
4253 {
4254 if(i_regmap[hr]>=0)
4255 {
4256 if(hr!=HOST_CCREG||i_regmap[hr]!=CCREG)
4257 {
4258 if((i_dirty>>hr)&1)
4259 {
4260 return 0;
4261 }
4262 }
4263 }
4264 }
4265 }
4266 }
4267 return 1;
4268}
4269
4270// Used when a branch jumps into the delay slot of another branch
4271void ds_assemble_entry(int i)
4272{
4273 int t=(ba[i]-start)>>2;
4274 if(!instr_addr[t]) instr_addr[t]=(u_int)out;
4275 assem_debug("Assemble delay slot at %x\n",ba[i]);
4276 assem_debug("<->\n");
4277 if(regs[t].regmap_entry[HOST_CCREG]==CCREG&&regs[t].regmap[HOST_CCREG]!=CCREG)
4278 wb_register(CCREG,regs[t].regmap_entry,regs[t].wasdirty,regs[t].was32);
4279 load_regs(regs[t].regmap_entry,regs[t].regmap,regs[t].was32,rs1[t],rs2[t]);
4280 address_generation(t,&regs[t],regs[t].regmap_entry);
b9b61529 4281 if(itype[t]==STORE||itype[t]==STORELR||(opcode[t]&0x3b)==0x39||(opcode[t]&0x3b)==0x3a)
57871462 4282 load_regs(regs[t].regmap_entry,regs[t].regmap,regs[t].was32,INVCP,INVCP);
4283 cop1_usable=0;
4284 is_delayslot=0;
4285 switch(itype[t]) {
4286 case ALU:
4287 alu_assemble(t,&regs[t]);break;
4288 case IMM16:
4289 imm16_assemble(t,&regs[t]);break;
4290 case SHIFT:
4291 shift_assemble(t,&regs[t]);break;
4292 case SHIFTIMM:
4293 shiftimm_assemble(t,&regs[t]);break;
4294 case LOAD:
4295 load_assemble(t,&regs[t]);break;
4296 case LOADLR:
4297 loadlr_assemble(t,&regs[t]);break;
4298 case STORE:
4299 store_assemble(t,&regs[t]);break;
4300 case STORELR:
4301 storelr_assemble(t,&regs[t]);break;
4302 case COP0:
4303 cop0_assemble(t,&regs[t]);break;
4304 case COP1:
4305 cop1_assemble(t,&regs[t]);break;
4306 case C1LS:
4307 c1ls_assemble(t,&regs[t]);break;
b9b61529 4308 case COP2:
4309 cop2_assemble(t,&regs[t]);break;
4310 case C2LS:
4311 c2ls_assemble(t,&regs[t]);break;
4312 case C2OP:
4313 c2op_assemble(t,&regs[t]);break;
57871462 4314 case FCONV:
4315 fconv_assemble(t,&regs[t]);break;
4316 case FLOAT:
4317 float_assemble(t,&regs[t]);break;
4318 case FCOMP:
4319 fcomp_assemble(t,&regs[t]);break;
4320 case MULTDIV:
4321 multdiv_assemble(t,&regs[t]);break;
4322 case MOV:
4323 mov_assemble(t,&regs[t]);break;
4324 case SYSCALL:
7139f3c8 4325 case HLECALL:
1e973cb0 4326 case INTCALL:
57871462 4327 case SPAN:
4328 case UJUMP:
4329 case RJUMP:
4330 case CJUMP:
4331 case SJUMP:
4332 case FJUMP:
c43b5311 4333 SysPrintf("Jump in the delay slot. This is probably a bug.\n");
57871462 4334 }
4335 store_regs_bt(regs[t].regmap,regs[t].is32,regs[t].dirty,ba[i]+4);
4336 load_regs_bt(regs[t].regmap,regs[t].is32,regs[t].dirty,ba[i]+4);
4337 if(internal_branch(regs[t].is32,ba[i]+4))
4338 assem_debug("branch: internal\n");
4339 else
4340 assem_debug("branch: external\n");
4341 assert(internal_branch(regs[t].is32,ba[i]+4));
4342 add_to_linker((int)out,ba[i]+4,internal_branch(regs[t].is32,ba[i]+4));
4343 emit_jmp(0);
4344}
4345
4346void do_cc(int i,signed char i_regmap[],int *adj,int addr,int taken,int invert)
4347{
4348 int count;
4349 int jaddr;
4350 int idle=0;
b6e87b2b 4351 int t=0;
57871462 4352 if(itype[i]==RJUMP)
4353 {
4354 *adj=0;
4355 }
4356 //if(ba[i]>=start && ba[i]<(start+slen*4))
4357 if(internal_branch(branch_regs[i].is32,ba[i]))
4358 {
b6e87b2b 4359 t=(ba[i]-start)>>2;
57871462 4360 if(is_ds[t]) *adj=-1; // Branch into delay slot adds an extra cycle
4361 else *adj=ccadj[t];
4362 }
4363 else
4364 {
4365 *adj=0;
4366 }
4367 count=ccadj[i];
4368 if(taken==TAKEN && i==(ba[i]-start)>>2 && source[i+1]==0) {
4369 // Idle loop
4370 if(count&1) emit_addimm_and_set_flags(2*(count+2),HOST_CCREG);
4371 idle=(int)out;
4372 //emit_subfrommem(&idlecount,HOST_CCREG); // Count idle cycles
4373 emit_andimm(HOST_CCREG,3,HOST_CCREG);
4374 jaddr=(int)out;
4375 emit_jmp(0);
4376 }
4377 else if(*adj==0||invert) {
b6e87b2b 4378 int cycles=CLOCK_ADJUST(count+2);
4379 // faster loop HACK
4380 if (t&&*adj) {
4381 int rel=t-i;
4382 if(-NO_CYCLE_PENALTY_THR<rel&&rel<0)
4383 cycles=CLOCK_ADJUST(*adj)+count+2-*adj;
4384 }
4385 emit_addimm_and_set_flags(cycles,HOST_CCREG);
57871462 4386 jaddr=(int)out;
4387 emit_jns(0);
4388 }
4389 else
4390 {
2573466a 4391 emit_cmpimm(HOST_CCREG,-CLOCK_ADJUST(count+2));
57871462 4392 jaddr=(int)out;
4393 emit_jns(0);
4394 }
4395 add_stub(CC_STUB,jaddr,idle?idle:(int)out,(*adj==0||invert||idle)?0:(count+2),i,addr,taken,0);
4396}
4397
4398void do_ccstub(int n)
4399{
4400 literal_pool(256);
4401 assem_debug("do_ccstub %x\n",start+stubs[n][4]*4);
4402 set_jump_target(stubs[n][1],(int)out);
4403 int i=stubs[n][4];
4404 if(stubs[n][6]==NULLDS) {
4405 // Delay slot instruction is nullified ("likely" branch)
4406 wb_dirtys(regs[i].regmap,regs[i].is32,regs[i].dirty);
4407 }
4408 else if(stubs[n][6]!=TAKEN) {
4409 wb_dirtys(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty);
4410 }
4411 else {
4412 if(internal_branch(branch_regs[i].is32,ba[i]))
4413 wb_needed_dirtys(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
4414 }
4415 if(stubs[n][5]!=-1)
4416 {
4417 // Save PC as return address
4418 emit_movimm(stubs[n][5],EAX);
4419 emit_writeword(EAX,(int)&pcaddr);
4420 }
4421 else
4422 {
4423 // Return address depends on which way the branch goes
4424 if(itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
4425 {
4426 int s1l=get_reg(branch_regs[i].regmap,rs1[i]);
4427 int s1h=get_reg(branch_regs[i].regmap,rs1[i]|64);
4428 int s2l=get_reg(branch_regs[i].regmap,rs2[i]);
4429 int s2h=get_reg(branch_regs[i].regmap,rs2[i]|64);
4430 if(rs1[i]==0)
4431 {
4432 s1l=s2l;s1h=s2h;
4433 s2l=s2h=-1;
4434 }
4435 else if(rs2[i]==0)
4436 {
4437 s2l=s2h=-1;
4438 }
4439 if((branch_regs[i].is32>>rs1[i])&(branch_regs[i].is32>>rs2[i])&1) {
4440 s1h=s2h=-1;
4441 }
4442 assert(s1l>=0);
4443 #ifdef DESTRUCTIVE_WRITEBACK
4444 if(rs1[i]) {
4445 if((branch_regs[i].dirty>>s1l)&(branch_regs[i].is32>>rs1[i])&1)
4446 emit_loadreg(rs1[i],s1l);
9f51b4b9 4447 }
57871462 4448 else {
4449 if((branch_regs[i].dirty>>s1l)&(branch_regs[i].is32>>rs2[i])&1)
4450 emit_loadreg(rs2[i],s1l);
4451 }
4452 if(s2l>=0)
4453 if((branch_regs[i].dirty>>s2l)&(branch_regs[i].is32>>rs2[i])&1)
4454 emit_loadreg(rs2[i],s2l);
4455 #endif
4456 int hr=0;
5194fb95 4457 int addr=-1,alt=-1,ntaddr=-1;
57871462 4458 while(hr<HOST_REGS)
4459 {
4460 if(hr!=EXCLUDE_REG && hr!=HOST_CCREG &&
4461 (branch_regs[i].regmap[hr]&63)!=rs1[i] &&
4462 (branch_regs[i].regmap[hr]&63)!=rs2[i] )
4463 {
4464 addr=hr++;break;
4465 }
4466 hr++;
4467 }
4468 while(hr<HOST_REGS)
4469 {
4470 if(hr!=EXCLUDE_REG && hr!=HOST_CCREG &&
4471 (branch_regs[i].regmap[hr]&63)!=rs1[i] &&
4472 (branch_regs[i].regmap[hr]&63)!=rs2[i] )
4473 {
4474 alt=hr++;break;
4475 }
4476 hr++;
4477 }
4478 if((opcode[i]&0x2E)==6) // BLEZ/BGTZ needs another register
4479 {
4480 while(hr<HOST_REGS)
4481 {
4482 if(hr!=EXCLUDE_REG && hr!=HOST_CCREG &&
4483 (branch_regs[i].regmap[hr]&63)!=rs1[i] &&
4484 (branch_regs[i].regmap[hr]&63)!=rs2[i] )
4485 {
4486 ntaddr=hr;break;
4487 }
4488 hr++;
4489 }
4490 assert(hr<HOST_REGS);
4491 }
4492 if((opcode[i]&0x2f)==4) // BEQ
4493 {
4494 #ifdef HAVE_CMOV_IMM
4495 if(s1h<0) {
4496 if(s2l>=0) emit_cmp(s1l,s2l);
4497 else emit_test(s1l,s1l);
4498 emit_cmov2imm_e_ne_compact(ba[i],start+i*4+8,addr);
4499 }
4500 else
4501 #endif
4502 {
4503 emit_mov2imm_compact(ba[i],addr,start+i*4+8,alt);
4504 if(s1h>=0) {
4505 if(s2h>=0) emit_cmp(s1h,s2h);
4506 else emit_test(s1h,s1h);
4507 emit_cmovne_reg(alt,addr);
4508 }
4509 if(s2l>=0) emit_cmp(s1l,s2l);
4510 else emit_test(s1l,s1l);
4511 emit_cmovne_reg(alt,addr);
4512 }
4513 }
4514 if((opcode[i]&0x2f)==5) // BNE
4515 {
4516 #ifdef HAVE_CMOV_IMM
4517 if(s1h<0) {
4518 if(s2l>=0) emit_cmp(s1l,s2l);
4519 else emit_test(s1l,s1l);
4520 emit_cmov2imm_e_ne_compact(start+i*4+8,ba[i],addr);
4521 }
4522 else
4523 #endif
4524 {
4525 emit_mov2imm_compact(start+i*4+8,addr,ba[i],alt);
4526 if(s1h>=0) {
4527 if(s2h>=0) emit_cmp(s1h,s2h);
4528 else emit_test(s1h,s1h);
4529 emit_cmovne_reg(alt,addr);
4530 }
4531 if(s2l>=0) emit_cmp(s1l,s2l);
4532 else emit_test(s1l,s1l);
4533 emit_cmovne_reg(alt,addr);
4534 }
4535 }
4536 if((opcode[i]&0x2f)==6) // BLEZ
4537 {
4538 //emit_movimm(ba[i],alt);
4539 //emit_movimm(start+i*4+8,addr);
4540 emit_mov2imm_compact(ba[i],alt,start+i*4+8,addr);
4541 emit_cmpimm(s1l,1);
4542 if(s1h>=0) emit_mov(addr,ntaddr);
4543 emit_cmovl_reg(alt,addr);
4544 if(s1h>=0) {
4545 emit_test(s1h,s1h);
4546 emit_cmovne_reg(ntaddr,addr);
4547 emit_cmovs_reg(alt,addr);
4548 }
4549 }
4550 if((opcode[i]&0x2f)==7) // BGTZ
4551 {
4552 //emit_movimm(ba[i],addr);
4553 //emit_movimm(start+i*4+8,ntaddr);
4554 emit_mov2imm_compact(ba[i],addr,start+i*4+8,ntaddr);
4555 emit_cmpimm(s1l,1);
4556 if(s1h>=0) emit_mov(addr,alt);
4557 emit_cmovl_reg(ntaddr,addr);
4558 if(s1h>=0) {
4559 emit_test(s1h,s1h);
4560 emit_cmovne_reg(alt,addr);
4561 emit_cmovs_reg(ntaddr,addr);
4562 }
4563 }
4564 if((opcode[i]==1)&&(opcode2[i]&0x2D)==0) // BLTZ
4565 {
4566 //emit_movimm(ba[i],alt);
4567 //emit_movimm(start+i*4+8,addr);
4568 emit_mov2imm_compact(ba[i],alt,start+i*4+8,addr);
4569 if(s1h>=0) emit_test(s1h,s1h);
4570 else emit_test(s1l,s1l);
4571 emit_cmovs_reg(alt,addr);
4572 }
4573 if((opcode[i]==1)&&(opcode2[i]&0x2D)==1) // BGEZ
4574 {
4575 //emit_movimm(ba[i],addr);
4576 //emit_movimm(start+i*4+8,alt);
4577 emit_mov2imm_compact(ba[i],addr,start+i*4+8,alt);
4578 if(s1h>=0) emit_test(s1h,s1h);
4579 else emit_test(s1l,s1l);
4580 emit_cmovs_reg(alt,addr);
4581 }
4582 if(opcode[i]==0x11 && opcode2[i]==0x08 ) {
4583 if(source[i]&0x10000) // BC1T
4584 {
4585 //emit_movimm(ba[i],alt);
4586 //emit_movimm(start+i*4+8,addr);
4587 emit_mov2imm_compact(ba[i],alt,start+i*4+8,addr);
4588 emit_testimm(s1l,0x800000);
4589 emit_cmovne_reg(alt,addr);
4590 }
4591 else // BC1F
4592 {
4593 //emit_movimm(ba[i],addr);
4594 //emit_movimm(start+i*4+8,alt);
4595 emit_mov2imm_compact(ba[i],addr,start+i*4+8,alt);
4596 emit_testimm(s1l,0x800000);
4597 emit_cmovne_reg(alt,addr);
4598 }
4599 }
4600 emit_writeword(addr,(int)&pcaddr);
4601 }
4602 else
4603 if(itype[i]==RJUMP)
4604 {
4605 int r=get_reg(branch_regs[i].regmap,rs1[i]);
4606 if(rs1[i]==rt1[i+1]||rs1[i]==rt2[i+1]) {
4607 r=get_reg(branch_regs[i].regmap,RTEMP);
4608 }
4609 emit_writeword(r,(int)&pcaddr);
4610 }
c43b5311 4611 else {SysPrintf("Unknown branch type in do_ccstub\n");exit(1);}
57871462 4612 }
4613 // Update cycle count
4614 assert(branch_regs[i].regmap[HOST_CCREG]==CCREG||branch_regs[i].regmap[HOST_CCREG]==-1);
2573466a 4615 if(stubs[n][3]) emit_addimm(HOST_CCREG,CLOCK_ADJUST((int)stubs[n][3]),HOST_CCREG);
57871462 4616 emit_call((int)cc_interrupt);
2573466a 4617 if(stubs[n][3]) emit_addimm(HOST_CCREG,-CLOCK_ADJUST((int)stubs[n][3]),HOST_CCREG);
57871462 4618 if(stubs[n][6]==TAKEN) {
4619 if(internal_branch(branch_regs[i].is32,ba[i]))
4620 load_needed_regs(branch_regs[i].regmap,regs[(ba[i]-start)>>2].regmap_entry);
4621 else if(itype[i]==RJUMP) {
4622 if(get_reg(branch_regs[i].regmap,RTEMP)>=0)
4623 emit_readword((int)&pcaddr,get_reg(branch_regs[i].regmap,RTEMP));
4624 else
4625 emit_loadreg(rs1[i],get_reg(branch_regs[i].regmap,rs1[i]));
4626 }
4627 }else if(stubs[n][6]==NOTTAKEN) {
4628 if(i<slen-2) load_needed_regs(branch_regs[i].regmap,regmap_pre[i+2]);
4629 else load_all_regs(branch_regs[i].regmap);
4630 }else if(stubs[n][6]==NULLDS) {
4631 // Delay slot instruction is nullified ("likely" branch)
4632 if(i<slen-2) load_needed_regs(regs[i].regmap,regmap_pre[i+2]);
4633 else load_all_regs(regs[i].regmap);
4634 }else{
4635 load_all_regs(branch_regs[i].regmap);
4636 }
4637 emit_jmp(stubs[n][2]); // return address
9f51b4b9 4638
57871462 4639 /* This works but uses a lot of memory...
4640 emit_readword((int)&last_count,ECX);
4641 emit_add(HOST_CCREG,ECX,EAX);
4642 emit_writeword(EAX,(int)&Count);
4643 emit_call((int)gen_interupt);
4644 emit_readword((int)&Count,HOST_CCREG);
4645 emit_readword((int)&next_interupt,EAX);
4646 emit_readword((int)&pending_exception,EBX);
4647 emit_writeword(EAX,(int)&last_count);
4648 emit_sub(HOST_CCREG,EAX,HOST_CCREG);
4649 emit_test(EBX,EBX);
4650 int jne_instr=(int)out;
4651 emit_jne(0);
4652 if(stubs[n][3]) emit_addimm(HOST_CCREG,-2*stubs[n][3],HOST_CCREG);
4653 load_all_regs(branch_regs[i].regmap);
4654 emit_jmp(stubs[n][2]); // return address
4655 set_jump_target(jne_instr,(int)out);
4656 emit_readword((int)&pcaddr,EAX);
4657 // Call get_addr_ht instead of doing the hash table here.
4658 // This code is executed infrequently and takes up a lot of space
4659 // so smaller is better.
4660 emit_storereg(CCREG,HOST_CCREG);
4661 emit_pushreg(EAX);
4662 emit_call((int)get_addr_ht);
4663 emit_loadreg(CCREG,HOST_CCREG);
4664 emit_addimm(ESP,4,ESP);
4665 emit_jmpreg(EAX);*/
4666}
4667
e2b5e7aa 4668static void add_to_linker(int addr,int target,int ext)
57871462 4669{
4670 link_addr[linkcount][0]=addr;
4671 link_addr[linkcount][1]=target;
9f51b4b9 4672 link_addr[linkcount][2]=ext;
57871462 4673 linkcount++;
4674}
4675
eba830cd 4676static void ujump_assemble_write_ra(int i)
4677{
4678 int rt;
4679 unsigned int return_address;
4680 rt=get_reg(branch_regs[i].regmap,31);
4681 assem_debug("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
4682 //assert(rt>=0);
4683 return_address=start+i*4+8;
4684 if(rt>=0) {
4685 #ifdef USE_MINI_HT
4686 if(internal_branch(branch_regs[i].is32,return_address)&&rt1[i+1]!=31) {
4687 int temp=-1; // note: must be ds-safe
4688 #ifdef HOST_TEMPREG
4689 temp=HOST_TEMPREG;
4690 #endif
4691 if(temp>=0) do_miniht_insert(return_address,rt,temp);
4692 else emit_movimm(return_address,rt);
4693 }
4694 else
4695 #endif
4696 {
4697 #ifdef REG_PREFETCH
9f51b4b9 4698 if(temp>=0)
eba830cd 4699 {
4700 if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp);
4701 }
4702 #endif
4703 emit_movimm(return_address,rt); // PC into link register
4704 #ifdef IMM_PREFETCH
4705 emit_prefetch(hash_table[((return_address>>16)^return_address)&0xFFFF]);
4706 #endif
4707 }
4708 }
4709}
4710
57871462 4711void ujump_assemble(int i,struct regstat *i_regs)
4712{
eba830cd 4713 int ra_done=0;
57871462 4714 if(i==(ba[i]-start)>>2) assem_debug("idle loop\n");
4715 address_generation(i+1,i_regs,regs[i].regmap_entry);
4716 #ifdef REG_PREFETCH
4717 int temp=get_reg(branch_regs[i].regmap,PTEMP);
9f51b4b9 4718 if(rt1[i]==31&&temp>=0)
57871462 4719 {
581335b0 4720 signed char *i_regmap=i_regs->regmap;
57871462 4721 int return_address=start+i*4+8;
9f51b4b9 4722 if(get_reg(branch_regs[i].regmap,31)>0)
57871462 4723 if(i_regmap[temp]==PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp);
4724 }
4725 #endif
eba830cd 4726 if(rt1[i]==31&&(rt1[i]==rs1[i+1]||rt1[i]==rs2[i+1])) {
4727 ujump_assemble_write_ra(i); // writeback ra for DS
4728 ra_done=1;
57871462 4729 }
4ef8f67d 4730 ds_assemble(i+1,i_regs);
4731 uint64_t bc_unneeded=branch_regs[i].u;
4732 uint64_t bc_unneeded_upper=branch_regs[i].uu;
4733 bc_unneeded|=1|(1LL<<rt1[i]);
4734 bc_unneeded_upper|=1|(1LL<<rt1[i]);
4735 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,
4736 bc_unneeded,bc_unneeded_upper);
4737 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG);
eba830cd 4738 if(!ra_done&&rt1[i]==31)
4739 ujump_assemble_write_ra(i);
57871462 4740 int cc,adj;
4741 cc=get_reg(branch_regs[i].regmap,CCREG);
4742 assert(cc==HOST_CCREG);
4743 store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
4744 #ifdef REG_PREFETCH
4745 if(rt1[i]==31&&temp>=0) emit_prefetchreg(temp);
4746 #endif
4747 do_cc(i,branch_regs[i].regmap,&adj,ba[i],TAKEN,0);
2573466a 4748 if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc);
57871462 4749 load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
4750 if(internal_branch(branch_regs[i].is32,ba[i]))
4751 assem_debug("branch: internal\n");
4752 else
4753 assem_debug("branch: external\n");
4754 if(internal_branch(branch_regs[i].is32,ba[i])&&is_ds[(ba[i]-start)>>2]) {
4755 ds_assemble_entry(i);
4756 }
4757 else {
4758 add_to_linker((int)out,ba[i],internal_branch(branch_regs[i].is32,ba[i]));
4759 emit_jmp(0);
4760 }
4761}
4762
eba830cd 4763static void rjump_assemble_write_ra(int i)
4764{
4765 int rt,return_address;
4766 assert(rt1[i+1]!=rt1[i]);
4767 assert(rt2[i+1]!=rt1[i]);
4768 rt=get_reg(branch_regs[i].regmap,rt1[i]);
4769 assem_debug("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
4770 assert(rt>=0);
4771 return_address=start+i*4+8;
4772 #ifdef REG_PREFETCH
9f51b4b9 4773 if(temp>=0)
eba830cd 4774 {
4775 if(i_regmap[temp]!=PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp);
4776 }
4777 #endif
4778 emit_movimm(return_address,rt); // PC into link register
4779 #ifdef IMM_PREFETCH
4780 emit_prefetch(hash_table[((return_address>>16)^return_address)&0xFFFF]);
4781 #endif
4782}
4783
57871462 4784void rjump_assemble(int i,struct regstat *i_regs)
4785{
57871462 4786 int temp;
581335b0 4787 int rs,cc;
eba830cd 4788 int ra_done=0;
57871462 4789 rs=get_reg(branch_regs[i].regmap,rs1[i]);
4790 assert(rs>=0);
4791 if(rs1[i]==rt1[i+1]||rs1[i]==rt2[i+1]) {
4792 // Delay slot abuse, make a copy of the branch address register
4793 temp=get_reg(branch_regs[i].regmap,RTEMP);
4794 assert(temp>=0);
4795 assert(regs[i].regmap[temp]==RTEMP);
4796 emit_mov(rs,temp);
4797 rs=temp;
4798 }
4799 address_generation(i+1,i_regs,regs[i].regmap_entry);
4800 #ifdef REG_PREFETCH
9f51b4b9 4801 if(rt1[i]==31)
57871462 4802 {
4803 if((temp=get_reg(branch_regs[i].regmap,PTEMP))>=0) {
581335b0 4804 signed char *i_regmap=i_regs->regmap;
57871462 4805 int return_address=start+i*4+8;
4806 if(i_regmap[temp]==PTEMP) emit_movimm((int)hash_table[((return_address>>16)^return_address)&0xFFFF],temp);
4807 }
4808 }
4809 #endif
4810 #ifdef USE_MINI_HT
4811 if(rs1[i]==31) {
4812 int rh=get_reg(regs[i].regmap,RHASH);
4813 if(rh>=0) do_preload_rhash(rh);
4814 }
4815 #endif
eba830cd 4816 if(rt1[i]!=0&&(rt1[i]==rs1[i+1]||rt1[i]==rs2[i+1])) {
4817 rjump_assemble_write_ra(i);
4818 ra_done=1;
57871462 4819 }
d5910d5d 4820 ds_assemble(i+1,i_regs);
4821 uint64_t bc_unneeded=branch_regs[i].u;
4822 uint64_t bc_unneeded_upper=branch_regs[i].uu;
4823 bc_unneeded|=1|(1LL<<rt1[i]);
4824 bc_unneeded_upper|=1|(1LL<<rt1[i]);
4825 bc_unneeded&=~(1LL<<rs1[i]);
4826 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,
4827 bc_unneeded,bc_unneeded_upper);
4828 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i],CCREG);
eba830cd 4829 if(!ra_done&&rt1[i]!=0)
4830 rjump_assemble_write_ra(i);
57871462 4831 cc=get_reg(branch_regs[i].regmap,CCREG);
4832 assert(cc==HOST_CCREG);
581335b0 4833 (void)cc;
57871462 4834 #ifdef USE_MINI_HT
4835 int rh=get_reg(branch_regs[i].regmap,RHASH);
4836 int ht=get_reg(branch_regs[i].regmap,RHTBL);
4837 if(rs1[i]==31) {
4838 if(regs[i].regmap[rh]!=RHASH) do_preload_rhash(rh);
4839 do_preload_rhtbl(ht);
4840 do_rhash(rs,rh);
4841 }
4842 #endif
4843 store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,-1);
4844 #ifdef DESTRUCTIVE_WRITEBACK
4845 if((branch_regs[i].dirty>>rs)&(branch_regs[i].is32>>rs1[i])&1) {
4846 if(rs1[i]!=rt1[i+1]&&rs1[i]!=rt2[i+1]) {
4847 emit_loadreg(rs1[i],rs);
4848 }
4849 }
4850 #endif
4851 #ifdef REG_PREFETCH
4852 if(rt1[i]==31&&temp>=0) emit_prefetchreg(temp);
4853 #endif
4854 #ifdef USE_MINI_HT
4855 if(rs1[i]==31) {
4856 do_miniht_load(ht,rh);
4857 }
4858 #endif
4859 //do_cc(i,branch_regs[i].regmap,&adj,-1,TAKEN);
4860 //if(adj) emit_addimm(cc,2*(ccadj[i]+2-adj),cc); // ??? - Shouldn't happen
4861 //assert(adj==0);
2573466a 4862 emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG);
57871462 4863 add_stub(CC_STUB,(int)out,jump_vaddr_reg[rs],0,i,-1,TAKEN,0);
911f2d55 4864 if(itype[i+1]==COP0&&(source[i+1]&0x3f)==0x10)
4865 // special case for RFE
4866 emit_jmp(0);
4867 else
71e490c5 4868 emit_jns(0);
57871462 4869 //load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,-1);
4870 #ifdef USE_MINI_HT
4871 if(rs1[i]==31) {
4872 do_miniht_jump(rs,rh,ht);
4873 }
4874 else
4875 #endif
4876 {
4877 //if(rs!=EAX) emit_mov(rs,EAX);
4878 //emit_jmp((int)jump_vaddr_eax);
4879 emit_jmp(jump_vaddr_reg[rs]);
4880 }
4881 /* Check hash table
4882 temp=!rs;
4883 emit_mov(rs,temp);
4884 emit_shrimm(rs,16,rs);
4885 emit_xor(temp,rs,rs);
4886 emit_movzwl_reg(rs,rs);
4887 emit_shlimm(rs,4,rs);
4888 emit_cmpmem_indexed((int)hash_table,rs,temp);
4889 emit_jne((int)out+14);
4890 emit_readword_indexed((int)hash_table+4,rs,rs);
4891 emit_jmpreg(rs);
4892 emit_cmpmem_indexed((int)hash_table+8,rs,temp);
4893 emit_addimm_no_flags(8,rs);
4894 emit_jeq((int)out-17);
4895 // No hit on hash table, call compiler
4896 emit_pushreg(temp);
4897//DEBUG >
4898#ifdef DEBUG_CYCLE_COUNT
4899 emit_readword((int)&last_count,ECX);
4900 emit_add(HOST_CCREG,ECX,HOST_CCREG);
4901 emit_readword((int)&next_interupt,ECX);
4902 emit_writeword(HOST_CCREG,(int)&Count);
4903 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
4904 emit_writeword(ECX,(int)&last_count);
4905#endif
4906//DEBUG <
4907 emit_storereg(CCREG,HOST_CCREG);
4908 emit_call((int)get_addr);
4909 emit_loadreg(CCREG,HOST_CCREG);
4910 emit_addimm(ESP,4,ESP);
4911 emit_jmpreg(EAX);*/
4912 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4913 if(rt1[i]!=31&&i<slen-2&&(((u_int)out)&7)) emit_mov(13,13);
4914 #endif
4915}
4916
4917void cjump_assemble(int i,struct regstat *i_regs)
4918{
4919 signed char *i_regmap=i_regs->regmap;
4920 int cc;
4921 int match;
4922 match=match_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
4923 assem_debug("match=%d\n",match);
4924 int s1h,s1l,s2h,s2l;
4925 int prev_cop1_usable=cop1_usable;
4926 int unconditional=0,nop=0;
4927 int only32=0;
57871462 4928 int invert=0;
4929 int internal=internal_branch(branch_regs[i].is32,ba[i]);
4930 if(i==(ba[i]-start)>>2) assem_debug("idle loop\n");
57871462 4931 if(!match) invert=1;
4932 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4933 if(i>(ba[i]-start)>>2) invert=1;
4934 #endif
9f51b4b9 4935
e1190b87 4936 if(ooo[i]) {
57871462 4937 s1l=get_reg(branch_regs[i].regmap,rs1[i]);
4938 s1h=get_reg(branch_regs[i].regmap,rs1[i]|64);
4939 s2l=get_reg(branch_regs[i].regmap,rs2[i]);
4940 s2h=get_reg(branch_regs[i].regmap,rs2[i]|64);
4941 }
4942 else {
4943 s1l=get_reg(i_regmap,rs1[i]);
4944 s1h=get_reg(i_regmap,rs1[i]|64);
4945 s2l=get_reg(i_regmap,rs2[i]);
4946 s2h=get_reg(i_regmap,rs2[i]|64);
4947 }
4948 if(rs1[i]==0&&rs2[i]==0)
4949 {
4950 if(opcode[i]&1) nop=1;
4951 else unconditional=1;
4952 //assert(opcode[i]!=5);
4953 //assert(opcode[i]!=7);
4954 //assert(opcode[i]!=0x15);
4955 //assert(opcode[i]!=0x17);
4956 }
4957 else if(rs1[i]==0)
4958 {
4959 s1l=s2l;s1h=s2h;
4960 s2l=s2h=-1;
4961 only32=(regs[i].was32>>rs2[i])&1;
4962 }
4963 else if(rs2[i]==0)
4964 {
4965 s2l=s2h=-1;
4966 only32=(regs[i].was32>>rs1[i])&1;
4967 }
4968 else {
4969 only32=(regs[i].was32>>rs1[i])&(regs[i].was32>>rs2[i])&1;
4970 }
4971
e1190b87 4972 if(ooo[i]) {
57871462 4973 // Out of order execution (delay slot first)
4974 //printf("OOOE\n");
4975 address_generation(i+1,i_regs,regs[i].regmap_entry);
4976 ds_assemble(i+1,i_regs);
4977 int adj;
4978 uint64_t bc_unneeded=branch_regs[i].u;
4979 uint64_t bc_unneeded_upper=branch_regs[i].uu;
4980 bc_unneeded&=~((1LL<<rs1[i])|(1LL<<rs2[i]));
4981 bc_unneeded_upper&=~((1LL<<us1[i])|(1LL<<us2[i]));
4982 bc_unneeded|=1;
4983 bc_unneeded_upper|=1;
4984 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,
4985 bc_unneeded,bc_unneeded_upper);
4986 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i],rs2[i]);
4987 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG);
4988 cc=get_reg(branch_regs[i].regmap,CCREG);
4989 assert(cc==HOST_CCREG);
9f51b4b9 4990 if(unconditional)
57871462 4991 store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
4992 //do_cc(i,branch_regs[i].regmap,&adj,unconditional?ba[i]:-1,unconditional);
4993 //assem_debug("cycle count (adj)\n");
4994 if(unconditional) {
4995 do_cc(i,branch_regs[i].regmap,&adj,ba[i],TAKEN,0);
4996 if(i!=(ba[i]-start)>>2 || source[i+1]!=0) {
2573466a 4997 if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc);
57871462 4998 load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
4999 if(internal)
5000 assem_debug("branch: internal\n");
5001 else
5002 assem_debug("branch: external\n");
5003 if(internal&&is_ds[(ba[i]-start)>>2]) {
5004 ds_assemble_entry(i);
5005 }
5006 else {
5007 add_to_linker((int)out,ba[i],internal);
5008 emit_jmp(0);
5009 }
5010 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
5011 if(((u_int)out)&7) emit_addnop(0);
5012 #endif
5013 }
5014 }
5015 else if(nop) {
2573466a 5016 emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc);
57871462 5017 int jaddr=(int)out;
5018 emit_jns(0);
5019 add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0);
5020 }
5021 else {
5022 int taken=0,nottaken=0,nottaken1=0;
5023 do_cc(i,branch_regs[i].regmap,&adj,-1,0,invert);
2573466a 5024 if(adj&&!invert) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc);
57871462 5025 if(!only32)
5026 {
5027 assert(s1h>=0);
5028 if(opcode[i]==4) // BEQ
5029 {
5030 if(s2h>=0) emit_cmp(s1h,s2h);
5031 else emit_test(s1h,s1h);
5032 nottaken1=(int)out;
5033 emit_jne(1);
5034 }
5035 if(opcode[i]==5) // BNE
5036 {
5037 if(s2h>=0) emit_cmp(s1h,s2h);
5038 else emit_test(s1h,s1h);
5039 if(invert) taken=(int)out;
5040 else add_to_linker((int)out,ba[i],internal);
5041 emit_jne(0);
5042 }
5043 if(opcode[i]==6) // BLEZ
5044 {
5045 emit_test(s1h,s1h);
5046 if(invert) taken=(int)out;
5047 else add_to_linker((int)out,ba[i],internal);
5048 emit_js(0);
5049 nottaken1=(int)out;
5050 emit_jne(1);
5051 }
5052 if(opcode[i]==7) // BGTZ
5053 {
5054 emit_test(s1h,s1h);
5055 nottaken1=(int)out;
5056 emit_js(1);
5057 if(invert) taken=(int)out;
5058 else add_to_linker((int)out,ba[i],internal);
5059 emit_jne(0);
5060 }
5061 } // if(!only32)
9f51b4b9 5062
57871462 5063 //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
5064 assert(s1l>=0);
5065 if(opcode[i]==4) // BEQ
5066 {
5067 if(s2l>=0) emit_cmp(s1l,s2l);
5068 else emit_test(s1l,s1l);
5069 if(invert){
5070 nottaken=(int)out;
5071 emit_jne(1);
5072 }else{
5073 add_to_linker((int)out,ba[i],internal);
5074 emit_jeq(0);
5075 }
5076 }
5077 if(opcode[i]==5) // BNE
5078 {
5079 if(s2l>=0) emit_cmp(s1l,s2l);
5080 else emit_test(s1l,s1l);
5081 if(invert){
5082 nottaken=(int)out;
5083 emit_jeq(1);
5084 }else{
5085 add_to_linker((int)out,ba[i],internal);
5086 emit_jne(0);
5087 }
5088 }
5089 if(opcode[i]==6) // BLEZ
5090 {
5091 emit_cmpimm(s1l,1);
5092 if(invert){
5093 nottaken=(int)out;
5094 emit_jge(1);
5095 }else{
5096 add_to_linker((int)out,ba[i],internal);
5097 emit_jl(0);
5098 }
5099 }
5100 if(opcode[i]==7) // BGTZ
5101 {
5102 emit_cmpimm(s1l,1);
5103 if(invert){
5104 nottaken=(int)out;
5105 emit_jl(1);
5106 }else{
5107 add_to_linker((int)out,ba[i],internal);
5108 emit_jge(0);
5109 }
5110 }
5111 if(invert) {
5112 if(taken) set_jump_target(taken,(int)out);
5113 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
5114 if(match&&(!internal||!is_ds[(ba[i]-start)>>2])) {
5115 if(adj) {
2573466a 5116 emit_addimm(cc,-CLOCK_ADJUST(adj),cc);
57871462 5117 add_to_linker((int)out,ba[i],internal);
5118 }else{
5119 emit_addnop(13);
5120 add_to_linker((int)out,ba[i],internal*2);
5121 }
5122 emit_jmp(0);
5123 }else
5124 #endif
5125 {
2573466a 5126 if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc);
57871462 5127 store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
5128 load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
5129 if(internal)
5130 assem_debug("branch: internal\n");
5131 else
5132 assem_debug("branch: external\n");
5133 if(internal&&is_ds[(ba[i]-start)>>2]) {
5134 ds_assemble_entry(i);
5135 }
5136 else {
5137 add_to_linker((int)out,ba[i],internal);
5138 emit_jmp(0);
5139 }
5140 }
5141 set_jump_target(nottaken,(int)out);
5142 }
5143
5144 if(nottaken1) set_jump_target(nottaken1,(int)out);
5145 if(adj) {
2573466a 5146 if(!invert) emit_addimm(cc,CLOCK_ADJUST(adj),cc);
57871462 5147 }
5148 } // (!unconditional)
5149 } // if(ooo)
5150 else
5151 {
5152 // In-order execution (branch first)
5153 //if(likely[i]) printf("IOL\n");
5154 //else
5155 //printf("IOE\n");
5156 int taken=0,nottaken=0,nottaken1=0;
5157 if(!unconditional&&!nop) {
5158 if(!only32)
5159 {
5160 assert(s1h>=0);
5161 if((opcode[i]&0x2f)==4) // BEQ
5162 {
5163 if(s2h>=0) emit_cmp(s1h,s2h);
5164 else emit_test(s1h,s1h);
5165 nottaken1=(int)out;
5166 emit_jne(2);
5167 }
5168 if((opcode[i]&0x2f)==5) // BNE
5169 {
5170 if(s2h>=0) emit_cmp(s1h,s2h);
5171 else emit_test(s1h,s1h);
5172 taken=(int)out;
5173 emit_jne(1);
5174 }
5175 if((opcode[i]&0x2f)==6) // BLEZ
5176 {
5177 emit_test(s1h,s1h);
5178 taken=(int)out;
5179 emit_js(1);
5180 nottaken1=(int)out;
5181 emit_jne(2);
5182 }
5183 if((opcode[i]&0x2f)==7) // BGTZ
5184 {
5185 emit_test(s1h,s1h);
5186 nottaken1=(int)out;
5187 emit_js(2);
5188 taken=(int)out;
5189 emit_jne(1);
5190 }
5191 } // if(!only32)
9f51b4b9 5192
57871462 5193 //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
5194 assert(s1l>=0);
5195 if((opcode[i]&0x2f)==4) // BEQ
5196 {
5197 if(s2l>=0) emit_cmp(s1l,s2l);
5198 else emit_test(s1l,s1l);
5199 nottaken=(int)out;
5200 emit_jne(2);
5201 }
5202 if((opcode[i]&0x2f)==5) // BNE
5203 {
5204 if(s2l>=0) emit_cmp(s1l,s2l);
5205 else emit_test(s1l,s1l);
5206 nottaken=(int)out;
5207 emit_jeq(2);
5208 }
5209 if((opcode[i]&0x2f)==6) // BLEZ
5210 {
5211 emit_cmpimm(s1l,1);
5212 nottaken=(int)out;
5213 emit_jge(2);
5214 }
5215 if((opcode[i]&0x2f)==7) // BGTZ
5216 {
5217 emit_cmpimm(s1l,1);
5218 nottaken=(int)out;
5219 emit_jl(2);
5220 }
5221 } // if(!unconditional)
5222 int adj;
5223 uint64_t ds_unneeded=branch_regs[i].u;
5224 uint64_t ds_unneeded_upper=branch_regs[i].uu;
5225 ds_unneeded&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
5226 ds_unneeded_upper&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
5227 if((~ds_unneeded_upper>>rt1[i+1])&1) ds_unneeded_upper&=~((1LL<<dep1[i+1])|(1LL<<dep2[i+1]));
5228 ds_unneeded|=1;
5229 ds_unneeded_upper|=1;
5230 // branch taken
5231 if(!nop) {
5232 if(taken) set_jump_target(taken,(int)out);
5233 assem_debug("1:\n");
5234 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,
5235 ds_unneeded,ds_unneeded_upper);
5236 // load regs
5237 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i+1],rs2[i+1]);
5238 address_generation(i+1,&branch_regs[i],0);
5239 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,INVCP);
5240 ds_assemble(i+1,&branch_regs[i]);
5241 cc=get_reg(branch_regs[i].regmap,CCREG);
5242 if(cc==-1) {
5243 emit_loadreg(CCREG,cc=HOST_CCREG);
5244 // CHECK: Is the following instruction (fall thru) allocated ok?
5245 }
5246 assert(cc==HOST_CCREG);
5247 store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
5248 do_cc(i,i_regmap,&adj,ba[i],TAKEN,0);
5249 assem_debug("cycle count (adj)\n");
2573466a 5250 if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc);
57871462 5251 load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
5252 if(internal)
5253 assem_debug("branch: internal\n");
5254 else
5255 assem_debug("branch: external\n");
5256 if(internal&&is_ds[(ba[i]-start)>>2]) {
5257 ds_assemble_entry(i);
5258 }
5259 else {
5260 add_to_linker((int)out,ba[i],internal);
5261 emit_jmp(0);
5262 }
5263 }
5264 // branch not taken
5265 cop1_usable=prev_cop1_usable;
5266 if(!unconditional) {
5267 if(nottaken1) set_jump_target(nottaken1,(int)out);
5268 set_jump_target(nottaken,(int)out);
5269 assem_debug("2:\n");
5270 if(!likely[i]) {
5271 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,
5272 ds_unneeded,ds_unneeded_upper);
5273 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i+1],rs2[i+1]);
5274 address_generation(i+1,&branch_regs[i],0);
5275 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG);
5276 ds_assemble(i+1,&branch_regs[i]);
5277 }
5278 cc=get_reg(branch_regs[i].regmap,CCREG);
5279 if(cc==-1&&!likely[i]) {
5280 // Cycle count isn't in a register, temporarily load it then write it out
5281 emit_loadreg(CCREG,HOST_CCREG);
2573466a 5282 emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG);
57871462 5283 int jaddr=(int)out;
5284 emit_jns(0);
5285 add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0);
5286 emit_storereg(CCREG,HOST_CCREG);
5287 }
5288 else{
5289 cc=get_reg(i_regmap,CCREG);
5290 assert(cc==HOST_CCREG);
2573466a 5291 emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc);
57871462 5292 int jaddr=(int)out;
5293 emit_jns(0);
5294 add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0);
5295 }
5296 }
5297 }
5298}
5299
5300void sjump_assemble(int i,struct regstat *i_regs)
5301{
5302 signed char *i_regmap=i_regs->regmap;
5303 int cc;
5304 int match;
5305 match=match_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
5306 assem_debug("smatch=%d\n",match);
5307 int s1h,s1l;
5308 int prev_cop1_usable=cop1_usable;
5309 int unconditional=0,nevertaken=0;
5310 int only32=0;
57871462 5311 int invert=0;
5312 int internal=internal_branch(branch_regs[i].is32,ba[i]);
5313 if(i==(ba[i]-start)>>2) assem_debug("idle loop\n");
57871462 5314 if(!match) invert=1;
5315 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
5316 if(i>(ba[i]-start)>>2) invert=1;
5317 #endif
5318
5319 //if(opcode2[i]>=0x10) return; // FIXME (BxxZAL)
df894a3a 5320 //assert(opcode2[i]<0x10||rs1[i]==0); // FIXME (BxxZAL)
57871462 5321
e1190b87 5322 if(ooo[i]) {
57871462 5323 s1l=get_reg(branch_regs[i].regmap,rs1[i]);
5324 s1h=get_reg(branch_regs[i].regmap,rs1[i]|64);
5325 }
5326 else {
5327 s1l=get_reg(i_regmap,rs1[i]);
5328 s1h=get_reg(i_regmap,rs1[i]|64);
5329 }
5330 if(rs1[i]==0)
5331 {
5332 if(opcode2[i]&1) unconditional=1;
5333 else nevertaken=1;
5334 // These are never taken (r0 is never less than zero)
5335 //assert(opcode2[i]!=0);
5336 //assert(opcode2[i]!=2);
5337 //assert(opcode2[i]!=0x10);
5338 //assert(opcode2[i]!=0x12);
5339 }
5340 else {
5341 only32=(regs[i].was32>>rs1[i])&1;
5342 }
5343
e1190b87 5344 if(ooo[i]) {
57871462 5345 // Out of order execution (delay slot first)
5346 //printf("OOOE\n");
5347 address_generation(i+1,i_regs,regs[i].regmap_entry);
5348 ds_assemble(i+1,i_regs);
5349 int adj;
5350 uint64_t bc_unneeded=branch_regs[i].u;
5351 uint64_t bc_unneeded_upper=branch_regs[i].uu;
5352 bc_unneeded&=~((1LL<<rs1[i])|(1LL<<rs2[i]));
5353 bc_unneeded_upper&=~((1LL<<us1[i])|(1LL<<us2[i]));
5354 bc_unneeded|=1;
5355 bc_unneeded_upper|=1;
5356 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,
5357 bc_unneeded,bc_unneeded_upper);
5358 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i],rs1[i]);
5359 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG);
5360 if(rt1[i]==31) {
5361 int rt,return_address;
57871462 5362 rt=get_reg(branch_regs[i].regmap,31);
5363 assem_debug("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
5364 if(rt>=0) {
5365 // Save the PC even if the branch is not taken
5366 return_address=start+i*4+8;
5367 emit_movimm(return_address,rt); // PC into link register
5368 #ifdef IMM_PREFETCH
5369 if(!nevertaken) emit_prefetch(hash_table[((return_address>>16)^return_address)&0xFFFF]);
5370 #endif
5371 }
5372 }
5373 cc=get_reg(branch_regs[i].regmap,CCREG);
5374 assert(cc==HOST_CCREG);
9f51b4b9 5375 if(unconditional)
57871462 5376 store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
5377 //do_cc(i,branch_regs[i].regmap,&adj,unconditional?ba[i]:-1,unconditional);
5378 assem_debug("cycle count (adj)\n");
5379 if(unconditional) {
5380 do_cc(i,branch_regs[i].regmap,&adj,ba[i],TAKEN,0);
5381 if(i!=(ba[i]-start)>>2 || source[i+1]!=0) {
2573466a 5382 if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc);
57871462 5383 load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
5384 if(internal)
5385 assem_debug("branch: internal\n");
5386 else
5387 assem_debug("branch: external\n");
5388 if(internal&&is_ds[(ba[i]-start)>>2]) {
5389 ds_assemble_entry(i);
5390 }
5391 else {
5392 add_to_linker((int)out,ba[i],internal);
5393 emit_jmp(0);
5394 }
5395 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
5396 if(((u_int)out)&7) emit_addnop(0);
5397 #endif
5398 }
5399 }
5400 else if(nevertaken) {
2573466a 5401 emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc);
57871462 5402 int jaddr=(int)out;
5403 emit_jns(0);
5404 add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0);
5405 }
5406 else {
5407 int nottaken=0;
5408 do_cc(i,branch_regs[i].regmap,&adj,-1,0,invert);
2573466a 5409 if(adj&&!invert) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc);
57871462 5410 if(!only32)
5411 {
5412 assert(s1h>=0);
df894a3a 5413 if((opcode2[i]&0xf)==0) // BLTZ/BLTZAL
57871462 5414 {
5415 emit_test(s1h,s1h);
5416 if(invert){
5417 nottaken=(int)out;
5418 emit_jns(1);
5419 }else{
5420 add_to_linker((int)out,ba[i],internal);
5421 emit_js(0);
5422 }
5423 }
df894a3a 5424 if((opcode2[i]&0xf)==1) // BGEZ/BLTZAL
57871462 5425 {
5426 emit_test(s1h,s1h);
5427 if(invert){
5428 nottaken=(int)out;
5429 emit_js(1);
5430 }else{
5431 add_to_linker((int)out,ba[i],internal);
5432 emit_jns(0);
5433 }
5434 }
5435 } // if(!only32)
5436 else
5437 {
5438 assert(s1l>=0);
df894a3a 5439 if((opcode2[i]&0xf)==0) // BLTZ/BLTZAL
57871462 5440 {
5441 emit_test(s1l,s1l);
5442 if(invert){
5443 nottaken=(int)out;
5444 emit_jns(1);
5445 }else{
5446 add_to_linker((int)out,ba[i],internal);
5447 emit_js(0);
5448 }
5449 }
df894a3a 5450 if((opcode2[i]&0xf)==1) // BGEZ/BLTZAL
57871462 5451 {
5452 emit_test(s1l,s1l);
5453 if(invert){
5454 nottaken=(int)out;
5455 emit_js(1);
5456 }else{
5457 add_to_linker((int)out,ba[i],internal);
5458 emit_jns(0);
5459 }
5460 }
5461 } // if(!only32)
9f51b4b9 5462
57871462 5463 if(invert) {
5464 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
5465 if(match&&(!internal||!is_ds[(ba[i]-start)>>2])) {
5466 if(adj) {
2573466a 5467 emit_addimm(cc,-CLOCK_ADJUST(adj),cc);
57871462 5468 add_to_linker((int)out,ba[i],internal);
5469 }else{
5470 emit_addnop(13);
5471 add_to_linker((int)out,ba[i],internal*2);
5472 }
5473 emit_jmp(0);
5474 }else
5475 #endif
5476 {
2573466a 5477 if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc);
57871462 5478 store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
5479 load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
5480 if(internal)
5481 assem_debug("branch: internal\n");
5482 else
5483 assem_debug("branch: external\n");
5484 if(internal&&is_ds[(ba[i]-start)>>2]) {
5485 ds_assemble_entry(i);
5486 }
5487 else {
5488 add_to_linker((int)out,ba[i],internal);
5489 emit_jmp(0);
5490 }
5491 }
5492 set_jump_target(nottaken,(int)out);
5493 }
5494
5495 if(adj) {
2573466a 5496 if(!invert) emit_addimm(cc,CLOCK_ADJUST(adj),cc);
57871462 5497 }
5498 } // (!unconditional)
5499 } // if(ooo)
5500 else
5501 {
5502 // In-order execution (branch first)
5503 //printf("IOE\n");
5504 int nottaken=0;
a6491170 5505 if(rt1[i]==31) {
5506 int rt,return_address;
a6491170 5507 rt=get_reg(branch_regs[i].regmap,31);
5508 if(rt>=0) {
5509 // Save the PC even if the branch is not taken
5510 return_address=start+i*4+8;
5511 emit_movimm(return_address,rt); // PC into link register
5512 #ifdef IMM_PREFETCH
5513 emit_prefetch(hash_table[((return_address>>16)^return_address)&0xFFFF]);
5514 #endif
5515 }
5516 }
57871462 5517 if(!unconditional) {
5518 //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
5519 if(!only32)
5520 {
5521 assert(s1h>=0);
a6491170 5522 if((opcode2[i]&0x0d)==0) // BLTZ/BLTZL/BLTZAL/BLTZALL
57871462 5523 {
5524 emit_test(s1h,s1h);
5525 nottaken=(int)out;
5526 emit_jns(1);
5527 }
a6491170 5528 if((opcode2[i]&0x0d)==1) // BGEZ/BGEZL/BGEZAL/BGEZALL
57871462 5529 {
5530 emit_test(s1h,s1h);
5531 nottaken=(int)out;
5532 emit_js(1);
5533 }
5534 } // if(!only32)
5535 else
5536 {
5537 assert(s1l>=0);
a6491170 5538 if((opcode2[i]&0x0d)==0) // BLTZ/BLTZL/BLTZAL/BLTZALL
57871462 5539 {
5540 emit_test(s1l,s1l);
5541 nottaken=(int)out;
5542 emit_jns(1);
5543 }
a6491170 5544 if((opcode2[i]&0x0d)==1) // BGEZ/BGEZL/BGEZAL/BGEZALL
57871462 5545 {
5546 emit_test(s1l,s1l);
5547 nottaken=(int)out;
5548 emit_js(1);
5549 }
5550 }
5551 } // if(!unconditional)
5552 int adj;
5553 uint64_t ds_unneeded=branch_regs[i].u;
5554 uint64_t ds_unneeded_upper=branch_regs[i].uu;
5555 ds_unneeded&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
5556 ds_unneeded_upper&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
5557 if((~ds_unneeded_upper>>rt1[i+1])&1) ds_unneeded_upper&=~((1LL<<dep1[i+1])|(1LL<<dep2[i+1]));
5558 ds_unneeded|=1;
5559 ds_unneeded_upper|=1;
5560 // branch taken
5561 if(!nevertaken) {
5562 //assem_debug("1:\n");
5563 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,
5564 ds_unneeded,ds_unneeded_upper);
5565 // load regs
5566 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i+1],rs2[i+1]);
5567 address_generation(i+1,&branch_regs[i],0);
5568 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,INVCP);
5569 ds_assemble(i+1,&branch_regs[i]);
5570 cc=get_reg(branch_regs[i].regmap,CCREG);
5571 if(cc==-1) {
5572 emit_loadreg(CCREG,cc=HOST_CCREG);
5573 // CHECK: Is the following instruction (fall thru) allocated ok?
5574 }
5575 assert(cc==HOST_CCREG);
5576 store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
5577 do_cc(i,i_regmap,&adj,ba[i],TAKEN,0);
5578 assem_debug("cycle count (adj)\n");
2573466a 5579 if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc);
57871462 5580 load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
5581 if(internal)
5582 assem_debug("branch: internal\n");
5583 else
5584 assem_debug("branch: external\n");
5585 if(internal&&is_ds[(ba[i]-start)>>2]) {
5586 ds_assemble_entry(i);
5587 }
5588 else {
5589 add_to_linker((int)out,ba[i],internal);
5590 emit_jmp(0);
5591 }
5592 }
5593 // branch not taken
5594 cop1_usable=prev_cop1_usable;
5595 if(!unconditional) {
5596 set_jump_target(nottaken,(int)out);
5597 assem_debug("1:\n");
5598 if(!likely[i]) {
5599 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,
5600 ds_unneeded,ds_unneeded_upper);
5601 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i+1],rs2[i+1]);
5602 address_generation(i+1,&branch_regs[i],0);
5603 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG);
5604 ds_assemble(i+1,&branch_regs[i]);
5605 }
5606 cc=get_reg(branch_regs[i].regmap,CCREG);
5607 if(cc==-1&&!likely[i]) {
5608 // Cycle count isn't in a register, temporarily load it then write it out
5609 emit_loadreg(CCREG,HOST_CCREG);
2573466a 5610 emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG);
57871462 5611 int jaddr=(int)out;
5612 emit_jns(0);
5613 add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0);
5614 emit_storereg(CCREG,HOST_CCREG);
5615 }
5616 else{
5617 cc=get_reg(i_regmap,CCREG);
5618 assert(cc==HOST_CCREG);
2573466a 5619 emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc);
57871462 5620 int jaddr=(int)out;
5621 emit_jns(0);
5622 add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0);
5623 }
5624 }
5625 }
5626}
5627
5628void fjump_assemble(int i,struct regstat *i_regs)
5629{
5630 signed char *i_regmap=i_regs->regmap;
5631 int cc;
5632 int match;
5633 match=match_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
5634 assem_debug("fmatch=%d\n",match);
5635 int fs,cs;
5636 int eaddr;
57871462 5637 int invert=0;
5638 int internal=internal_branch(branch_regs[i].is32,ba[i]);
5639 if(i==(ba[i]-start)>>2) assem_debug("idle loop\n");
57871462 5640 if(!match) invert=1;
5641 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
5642 if(i>(ba[i]-start)>>2) invert=1;
5643 #endif
5644
e1190b87 5645 if(ooo[i]) {
57871462 5646 fs=get_reg(branch_regs[i].regmap,FSREG);
5647 address_generation(i+1,i_regs,regs[i].regmap_entry); // Is this okay?
5648 }
5649 else {
5650 fs=get_reg(i_regmap,FSREG);
5651 }
5652
5653 // Check cop1 unusable
5654 if(!cop1_usable) {
5655 cs=get_reg(i_regmap,CSREG);
5656 assert(cs>=0);
5657 emit_testimm(cs,0x20000000);
5658 eaddr=(int)out;
5659 emit_jeq(0);
5660 add_stub(FP_STUB,eaddr,(int)out,i,cs,(int)i_regs,0,0);
5661 cop1_usable=1;
5662 }
5663
e1190b87 5664 if(ooo[i]) {
57871462 5665 // Out of order execution (delay slot first)
5666 //printf("OOOE\n");
5667 ds_assemble(i+1,i_regs);
5668 int adj;
5669 uint64_t bc_unneeded=branch_regs[i].u;
5670 uint64_t bc_unneeded_upper=branch_regs[i].uu;
5671 bc_unneeded&=~((1LL<<rs1[i])|(1LL<<rs2[i]));
5672 bc_unneeded_upper&=~((1LL<<us1[i])|(1LL<<us2[i]));
5673 bc_unneeded|=1;
5674 bc_unneeded_upper|=1;
5675 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,
5676 bc_unneeded,bc_unneeded_upper);
5677 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i],rs1[i]);
5678 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG);
5679 cc=get_reg(branch_regs[i].regmap,CCREG);
5680 assert(cc==HOST_CCREG);
5681 do_cc(i,branch_regs[i].regmap,&adj,-1,0,invert);
5682 assem_debug("cycle count (adj)\n");
5683 if(1) {
5684 int nottaken=0;
2573466a 5685 if(adj&&!invert) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc);
57871462 5686 if(1) {
5687 assert(fs>=0);
5688 emit_testimm(fs,0x800000);
5689 if(source[i]&0x10000) // BC1T
5690 {
5691 if(invert){
5692 nottaken=(int)out;
5693 emit_jeq(1);
5694 }else{
5695 add_to_linker((int)out,ba[i],internal);
5696 emit_jne(0);
5697 }
5698 }
5699 else // BC1F
5700 if(invert){
5701 nottaken=(int)out;
5702 emit_jne(1);
5703 }else{
5704 add_to_linker((int)out,ba[i],internal);
5705 emit_jeq(0);
5706 }
5707 {
5708 }
5709 } // if(!only32)
9f51b4b9 5710
57871462 5711 if(invert) {
2573466a 5712 if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc);
57871462 5713 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
5714 else if(match) emit_addnop(13);
5715 #endif
5716 store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
5717 load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
5718 if(internal)
5719 assem_debug("branch: internal\n");
5720 else
5721 assem_debug("branch: external\n");
5722 if(internal&&is_ds[(ba[i]-start)>>2]) {
5723 ds_assemble_entry(i);
5724 }
5725 else {
5726 add_to_linker((int)out,ba[i],internal);
5727 emit_jmp(0);
5728 }
5729 set_jump_target(nottaken,(int)out);
5730 }
5731
5732 if(adj) {
2573466a 5733 if(!invert) emit_addimm(cc,CLOCK_ADJUST(adj),cc);
57871462 5734 }
5735 } // (!unconditional)
5736 } // if(ooo)
5737 else
5738 {
5739 // In-order execution (branch first)
5740 //printf("IOE\n");
5741 int nottaken=0;
5742 if(1) {
5743 //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
5744 if(1) {
5745 assert(fs>=0);
5746 emit_testimm(fs,0x800000);
5747 if(source[i]&0x10000) // BC1T
5748 {
5749 nottaken=(int)out;
5750 emit_jeq(1);
5751 }
5752 else // BC1F
5753 {
5754 nottaken=(int)out;
5755 emit_jne(1);
5756 }
5757 }
5758 } // if(!unconditional)
5759 int adj;
5760 uint64_t ds_unneeded=branch_regs[i].u;
5761 uint64_t ds_unneeded_upper=branch_regs[i].uu;
5762 ds_unneeded&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
5763 ds_unneeded_upper&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
5764 if((~ds_unneeded_upper>>rt1[i+1])&1) ds_unneeded_upper&=~((1LL<<dep1[i+1])|(1LL<<dep2[i+1]));
5765 ds_unneeded|=1;
5766 ds_unneeded_upper|=1;
5767 // branch taken
5768 //assem_debug("1:\n");
5769 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,
5770 ds_unneeded,ds_unneeded_upper);
5771 // load regs
5772 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i+1],rs2[i+1]);
5773 address_generation(i+1,&branch_regs[i],0);
5774 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,INVCP);
5775 ds_assemble(i+1,&branch_regs[i]);
5776 cc=get_reg(branch_regs[i].regmap,CCREG);
5777 if(cc==-1) {
5778 emit_loadreg(CCREG,cc=HOST_CCREG);
5779 // CHECK: Is the following instruction (fall thru) allocated ok?
5780 }
5781 assert(cc==HOST_CCREG);
5782 store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
5783 do_cc(i,i_regmap,&adj,ba[i],TAKEN,0);
5784 assem_debug("cycle count (adj)\n");
2573466a 5785 if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc);
57871462 5786 load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
5787 if(internal)
5788 assem_debug("branch: internal\n");
5789 else
5790 assem_debug("branch: external\n");
5791 if(internal&&is_ds[(ba[i]-start)>>2]) {
5792 ds_assemble_entry(i);
5793 }
5794 else {
5795 add_to_linker((int)out,ba[i],internal);
5796 emit_jmp(0);
5797 }
5798
5799 // branch not taken
5800 if(1) { // <- FIXME (don't need this)
5801 set_jump_target(nottaken,(int)out);
5802 assem_debug("1:\n");
5803 if(!likely[i]) {
5804 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,
5805 ds_unneeded,ds_unneeded_upper);
5806 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i+1],rs2[i+1]);
5807 address_generation(i+1,&branch_regs[i],0);
5808 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG);
5809 ds_assemble(i+1,&branch_regs[i]);
5810 }
5811 cc=get_reg(branch_regs[i].regmap,CCREG);
5812 if(cc==-1&&!likely[i]) {
5813 // Cycle count isn't in a register, temporarily load it then write it out
5814 emit_loadreg(CCREG,HOST_CCREG);
2573466a 5815 emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG);
57871462 5816 int jaddr=(int)out;
5817 emit_jns(0);
5818 add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,NOTTAKEN,0);
5819 emit_storereg(CCREG,HOST_CCREG);
5820 }
5821 else{
5822 cc=get_reg(i_regmap,CCREG);
5823 assert(cc==HOST_CCREG);
2573466a 5824 emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc);
57871462 5825 int jaddr=(int)out;
5826 emit_jns(0);
5827 add_stub(CC_STUB,jaddr,(int)out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0);
5828 }
5829 }
5830 }
5831}
5832
5833static void pagespan_assemble(int i,struct regstat *i_regs)
5834{
5835 int s1l=get_reg(i_regs->regmap,rs1[i]);
5836 int s1h=get_reg(i_regs->regmap,rs1[i]|64);
5837 int s2l=get_reg(i_regs->regmap,rs2[i]);
5838 int s2h=get_reg(i_regs->regmap,rs2[i]|64);
57871462 5839 int taken=0;
5840 int nottaken=0;
5841 int unconditional=0;
5842 if(rs1[i]==0)
5843 {
5844 s1l=s2l;s1h=s2h;
5845 s2l=s2h=-1;
5846 }
5847 else if(rs2[i]==0)
5848 {
5849 s2l=s2h=-1;
5850 }
5851 if((i_regs->is32>>rs1[i])&(i_regs->is32>>rs2[i])&1) {
5852 s1h=s2h=-1;
5853 }
5854 int hr=0;
581335b0 5855 int addr=-1,alt=-1,ntaddr=-1;
57871462 5856 if(i_regs->regmap[HOST_BTREG]<0) {addr=HOST_BTREG;}
5857 else {
5858 while(hr<HOST_REGS)
5859 {
5860 if(hr!=EXCLUDE_REG && hr!=HOST_CCREG &&
5861 (i_regs->regmap[hr]&63)!=rs1[i] &&
5862 (i_regs->regmap[hr]&63)!=rs2[i] )
5863 {
5864 addr=hr++;break;
5865 }
5866 hr++;
5867 }
5868 }
5869 while(hr<HOST_REGS)
5870 {
5871 if(hr!=EXCLUDE_REG && hr!=HOST_CCREG && hr!=HOST_BTREG &&
5872 (i_regs->regmap[hr]&63)!=rs1[i] &&
5873 (i_regs->regmap[hr]&63)!=rs2[i] )
5874 {
5875 alt=hr++;break;
5876 }
5877 hr++;
5878 }
5879 if((opcode[i]&0x2E)==6) // BLEZ/BGTZ needs another register
5880 {
5881 while(hr<HOST_REGS)
5882 {
5883 if(hr!=EXCLUDE_REG && hr!=HOST_CCREG && hr!=HOST_BTREG &&
5884 (i_regs->regmap[hr]&63)!=rs1[i] &&
5885 (i_regs->regmap[hr]&63)!=rs2[i] )
5886 {
5887 ntaddr=hr;break;
5888 }
5889 hr++;
5890 }
5891 }
5892 assert(hr<HOST_REGS);
5893 if((opcode[i]&0x2e)==4||opcode[i]==0x11) { // BEQ/BNE/BEQL/BNEL/BC1
5894 load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,CCREG,CCREG);
5895 }
2573466a 5896 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG);
57871462 5897 if(opcode[i]==2) // J
5898 {
5899 unconditional=1;
5900 }
5901 if(opcode[i]==3) // JAL
5902 {
5903 // TODO: mini_ht
5904 int rt=get_reg(i_regs->regmap,31);
5905 emit_movimm(start+i*4+8,rt);
5906 unconditional=1;
5907 }
5908 if(opcode[i]==0&&(opcode2[i]&0x3E)==8) // JR/JALR
5909 {
5910 emit_mov(s1l,addr);
5911 if(opcode2[i]==9) // JALR
5912 {
5067f341 5913 int rt=get_reg(i_regs->regmap,rt1[i]);
57871462 5914 emit_movimm(start+i*4+8,rt);
5915 }
5916 }
5917 if((opcode[i]&0x3f)==4) // BEQ
5918 {
5919 if(rs1[i]==rs2[i])
5920 {
5921 unconditional=1;
5922 }
5923 else
5924 #ifdef HAVE_CMOV_IMM
5925 if(s1h<0) {
5926 if(s2l>=0) emit_cmp(s1l,s2l);
5927 else emit_test(s1l,s1l);
5928 emit_cmov2imm_e_ne_compact(ba[i],start+i*4+8,addr);
5929 }
5930 else
5931 #endif
5932 {
5933 assert(s1l>=0);
5934 emit_mov2imm_compact(ba[i],addr,start+i*4+8,alt);
5935 if(s1h>=0) {
5936 if(s2h>=0) emit_cmp(s1h,s2h);
5937 else emit_test(s1h,s1h);
5938 emit_cmovne_reg(alt,addr);
5939 }
5940 if(s2l>=0) emit_cmp(s1l,s2l);
5941 else emit_test(s1l,s1l);
5942 emit_cmovne_reg(alt,addr);
5943 }
5944 }
5945 if((opcode[i]&0x3f)==5) // BNE
5946 {
5947 #ifdef HAVE_CMOV_IMM
5948 if(s1h<0) {
5949 if(s2l>=0) emit_cmp(s1l,s2l);
5950 else emit_test(s1l,s1l);
5951 emit_cmov2imm_e_ne_compact(start+i*4+8,ba[i],addr);
5952 }
5953 else
5954 #endif
5955 {
5956 assert(s1l>=0);
5957 emit_mov2imm_compact(start+i*4+8,addr,ba[i],alt);
5958 if(s1h>=0) {
5959 if(s2h>=0) emit_cmp(s1h,s2h);
5960 else emit_test(s1h,s1h);
5961 emit_cmovne_reg(alt,addr);
5962 }
5963 if(s2l>=0) emit_cmp(s1l,s2l);
5964 else emit_test(s1l,s1l);
5965 emit_cmovne_reg(alt,addr);
5966 }
5967 }
5968 if((opcode[i]&0x3f)==0x14) // BEQL
5969 {
5970 if(s1h>=0) {
5971 if(s2h>=0) emit_cmp(s1h,s2h);
5972 else emit_test(s1h,s1h);
5973 nottaken=(int)out;
5974 emit_jne(0);
5975 }
5976 if(s2l>=0) emit_cmp(s1l,s2l);
5977 else emit_test(s1l,s1l);
5978 if(nottaken) set_jump_target(nottaken,(int)out);
5979 nottaken=(int)out;
5980 emit_jne(0);
5981 }
5982 if((opcode[i]&0x3f)==0x15) // BNEL
5983 {
5984 if(s1h>=0) {
5985 if(s2h>=0) emit_cmp(s1h,s2h);
5986 else emit_test(s1h,s1h);
5987 taken=(int)out;
5988 emit_jne(0);
5989 }
5990 if(s2l>=0) emit_cmp(s1l,s2l);
5991 else emit_test(s1l,s1l);
5992 nottaken=(int)out;
5993 emit_jeq(0);
5994 if(taken) set_jump_target(taken,(int)out);
5995 }
5996 if((opcode[i]&0x3f)==6) // BLEZ
5997 {
5998 emit_mov2imm_compact(ba[i],alt,start+i*4+8,addr);
5999 emit_cmpimm(s1l,1);
6000 if(s1h>=0) emit_mov(addr,ntaddr);
6001 emit_cmovl_reg(alt,addr);
6002 if(s1h>=0) {
6003 emit_test(s1h,s1h);
6004 emit_cmovne_reg(ntaddr,addr);
6005 emit_cmovs_reg(alt,addr);
6006 }
6007 }
6008 if((opcode[i]&0x3f)==7) // BGTZ
6009 {
6010 emit_mov2imm_compact(ba[i],addr,start+i*4+8,ntaddr);
6011 emit_cmpimm(s1l,1);
6012 if(s1h>=0) emit_mov(addr,alt);
6013 emit_cmovl_reg(ntaddr,addr);
6014 if(s1h>=0) {
6015 emit_test(s1h,s1h);
6016 emit_cmovne_reg(alt,addr);
6017 emit_cmovs_reg(ntaddr,addr);
6018 }
6019 }
6020 if((opcode[i]&0x3f)==0x16) // BLEZL
6021 {
6022 assert((opcode[i]&0x3f)!=0x16);
6023 }
6024 if((opcode[i]&0x3f)==0x17) // BGTZL
6025 {
6026 assert((opcode[i]&0x3f)!=0x17);
6027 }
6028 assert(opcode[i]!=1); // BLTZ/BGEZ
6029
6030 //FIXME: Check CSREG
6031 if(opcode[i]==0x11 && opcode2[i]==0x08 ) {
6032 if((source[i]&0x30000)==0) // BC1F
6033 {
6034 emit_mov2imm_compact(ba[i],addr,start+i*4+8,alt);
6035 emit_testimm(s1l,0x800000);
6036 emit_cmovne_reg(alt,addr);
6037 }
6038 if((source[i]&0x30000)==0x10000) // BC1T
6039 {
6040 emit_mov2imm_compact(ba[i],alt,start+i*4+8,addr);
6041 emit_testimm(s1l,0x800000);
6042 emit_cmovne_reg(alt,addr);
6043 }
6044 if((source[i]&0x30000)==0x20000) // BC1FL
6045 {
6046 emit_testimm(s1l,0x800000);
6047 nottaken=(int)out;
6048 emit_jne(0);
6049 }
6050 if((source[i]&0x30000)==0x30000) // BC1TL
6051 {
6052 emit_testimm(s1l,0x800000);
6053 nottaken=(int)out;
6054 emit_jeq(0);
6055 }
6056 }
6057
6058 assert(i_regs->regmap[HOST_CCREG]==CCREG);
6059 wb_dirtys(regs[i].regmap,regs[i].is32,regs[i].dirty);
6060 if(likely[i]||unconditional)
6061 {
6062 emit_movimm(ba[i],HOST_BTREG);
6063 }
6064 else if(addr!=HOST_BTREG)
6065 {
6066 emit_mov(addr,HOST_BTREG);
6067 }
6068 void *branch_addr=out;
6069 emit_jmp(0);
6070 int target_addr=start+i*4+5;
6071 void *stub=out;
6072 void *compiled_target_addr=check_addr(target_addr);
6073 emit_extjump_ds((int)branch_addr,target_addr);
6074 if(compiled_target_addr) {
6075 set_jump_target((int)branch_addr,(int)compiled_target_addr);
6076 add_link(target_addr,stub);
6077 }
6078 else set_jump_target((int)branch_addr,(int)stub);
6079 if(likely[i]) {
6080 // Not-taken path
6081 set_jump_target((int)nottaken,(int)out);
6082 wb_dirtys(regs[i].regmap,regs[i].is32,regs[i].dirty);
6083 void *branch_addr=out;
6084 emit_jmp(0);
6085 int target_addr=start+i*4+8;
6086 void *stub=out;
6087 void *compiled_target_addr=check_addr(target_addr);
6088 emit_extjump_ds((int)branch_addr,target_addr);
6089 if(compiled_target_addr) {
6090 set_jump_target((int)branch_addr,(int)compiled_target_addr);
6091 add_link(target_addr,stub);
6092 }
6093 else set_jump_target((int)branch_addr,(int)stub);
6094 }
6095}
6096
6097// Assemble the delay slot for the above
6098static void pagespan_ds()
6099{
6100 assem_debug("initial delay slot:\n");
6101 u_int vaddr=start+1;
94d23bb9 6102 u_int page=get_page(vaddr);
6103 u_int vpage=get_vpage(vaddr);
57871462 6104 ll_add(jump_dirty+vpage,vaddr,(void *)out);
6105 do_dirty_stub_ds();
6106 ll_add(jump_in+page,vaddr,(void *)out);
6107 assert(regs[0].regmap_entry[HOST_CCREG]==CCREG);
6108 if(regs[0].regmap[HOST_CCREG]!=CCREG)
6109 wb_register(CCREG,regs[0].regmap_entry,regs[0].wasdirty,regs[0].was32);
6110 if(regs[0].regmap[HOST_BTREG]!=BTREG)
6111 emit_writeword(HOST_BTREG,(int)&branch_target);
6112 load_regs(regs[0].regmap_entry,regs[0].regmap,regs[0].was32,rs1[0],rs2[0]);
6113 address_generation(0,&regs[0],regs[0].regmap_entry);
b9b61529 6114 if(itype[0]==STORE||itype[0]==STORELR||(opcode[0]&0x3b)==0x39||(opcode[0]&0x3b)==0x3a)
57871462 6115 load_regs(regs[0].regmap_entry,regs[0].regmap,regs[0].was32,INVCP,INVCP);
6116 cop1_usable=0;
6117 is_delayslot=0;
6118 switch(itype[0]) {
6119 case ALU:
6120 alu_assemble(0,&regs[0]);break;
6121 case IMM16:
6122 imm16_assemble(0,&regs[0]);break;
6123 case SHIFT:
6124 shift_assemble(0,&regs[0]);break;
6125 case SHIFTIMM:
6126 shiftimm_assemble(0,&regs[0]);break;
6127 case LOAD:
6128 load_assemble(0,&regs[0]);break;
6129 case LOADLR:
6130 loadlr_assemble(0,&regs[0]);break;
6131 case STORE:
6132 store_assemble(0,&regs[0]);break;
6133 case STORELR:
6134 storelr_assemble(0,&regs[0]);break;
6135 case COP0:
6136 cop0_assemble(0,&regs[0]);break;
6137 case COP1:
6138 cop1_assemble(0,&regs[0]);break;
6139 case C1LS:
6140 c1ls_assemble(0,&regs[0]);break;
b9b61529 6141 case COP2:
6142 cop2_assemble(0,&regs[0]);break;
6143 case C2LS:
6144 c2ls_assemble(0,&regs[0]);break;
6145 case C2OP:
6146 c2op_assemble(0,&regs[0]);break;
57871462 6147 case FCONV:
6148 fconv_assemble(0,&regs[0]);break;
6149 case FLOAT:
6150 float_assemble(0,&regs[0]);break;
6151 case FCOMP:
6152 fcomp_assemble(0,&regs[0]);break;
6153 case MULTDIV:
6154 multdiv_assemble(0,&regs[0]);break;
6155 case MOV:
6156 mov_assemble(0,&regs[0]);break;
6157 case SYSCALL:
7139f3c8 6158 case HLECALL:
1e973cb0 6159 case INTCALL:
57871462 6160 case SPAN:
6161 case UJUMP:
6162 case RJUMP:
6163 case CJUMP:
6164 case SJUMP:
6165 case FJUMP:
c43b5311 6166 SysPrintf("Jump in the delay slot. This is probably a bug.\n");
57871462 6167 }
6168 int btaddr=get_reg(regs[0].regmap,BTREG);
6169 if(btaddr<0) {
6170 btaddr=get_reg(regs[0].regmap,-1);
6171 emit_readword((int)&branch_target,btaddr);
6172 }
6173 assert(btaddr!=HOST_CCREG);
6174 if(regs[0].regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
6175#ifdef HOST_IMM8
6176 emit_movimm(start+4,HOST_TEMPREG);
6177 emit_cmp(btaddr,HOST_TEMPREG);
6178#else
6179 emit_cmpimm(btaddr,start+4);
6180#endif
6181 int branch=(int)out;
6182 emit_jeq(0);
6183 store_regs_bt(regs[0].regmap,regs[0].is32,regs[0].dirty,-1);
6184 emit_jmp(jump_vaddr_reg[btaddr]);
6185 set_jump_target(branch,(int)out);
6186 store_regs_bt(regs[0].regmap,regs[0].is32,regs[0].dirty,start+4);
6187 load_regs_bt(regs[0].regmap,regs[0].is32,regs[0].dirty,start+4);
6188}
6189
6190// Basic liveness analysis for MIPS registers
6191void unneeded_registers(int istart,int iend,int r)
6192{
6193 int i;
bedfea38 6194 uint64_t u,uu,gte_u,b,bu,gte_bu;
0ff8c62c 6195 uint64_t temp_u,temp_uu,temp_gte_u=0;
57871462 6196 uint64_t tdep;
0ff8c62c 6197 uint64_t gte_u_unknown=0;
6198 if(new_dynarec_hacks&NDHACK_GTE_UNNEEDED)
6199 gte_u_unknown=~0ll;
57871462 6200 if(iend==slen-1) {
6201 u=1;uu=1;
0ff8c62c 6202 gte_u=gte_u_unknown;
57871462 6203 }else{
6204 u=unneeded_reg[iend+1];
6205 uu=unneeded_reg_upper[iend+1];
6206 u=1;uu=1;
0ff8c62c 6207 gte_u=gte_unneeded[iend+1];
57871462 6208 }
bedfea38 6209
57871462 6210 for (i=iend;i>=istart;i--)
6211 {
6212 //printf("unneeded registers i=%d (%d,%d) r=%d\n",i,istart,iend,r);
6213 if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
6214 {
6215 // If subroutine call, flag return address as a possible branch target
6216 if(rt1[i]==31 && i<slen-2) bt[i+2]=1;
9f51b4b9 6217
57871462 6218 if(ba[i]<start || ba[i]>=(start+slen*4))
6219 {
6220 // Branch out of this block, flush all regs
6221 u=1;
6222 uu=1;
0ff8c62c 6223 gte_u=gte_u_unknown;
9f51b4b9 6224 /* Hexagon hack
57871462 6225 if(itype[i]==UJUMP&&rt1[i]==31)
6226 {
6227 uu=u=0x300C00F; // Discard at, v0-v1, t6-t9
6228 }
6229 if(itype[i]==RJUMP&&rs1[i]==31)
6230 {
6231 uu=u=0x300C0F3; // Discard at, a0-a3, t6-t9
6232 }
4cb76aa4 6233 if(start>0x80000400&&start<0x80000000+RAM_SIZE) {
57871462 6234 if(itype[i]==UJUMP&&rt1[i]==31)
6235 {
6236 //uu=u=0x30300FF0FLL; // Discard at, v0-v1, t0-t9, lo, hi
6237 uu=u=0x300FF0F; // Discard at, v0-v1, t0-t9
6238 }
6239 if(itype[i]==RJUMP&&rs1[i]==31)
6240 {
6241 //uu=u=0x30300FFF3LL; // Discard at, a0-a3, t0-t9, lo, hi
6242 uu=u=0x300FFF3; // Discard at, a0-a3, t0-t9
6243 }
6244 }*/
6245 branch_unneeded_reg[i]=u;
6246 branch_unneeded_reg_upper[i]=uu;
6247 // Merge in delay slot
6248 tdep=(~uu>>rt1[i+1])&1;
6249 u|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
6250 uu|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
6251 u&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
6252 uu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
6253 uu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
6254 u|=1;uu|=1;
bedfea38 6255 gte_u|=gte_rt[i+1];
6256 gte_u&=~gte_rs[i+1];
57871462 6257 // If branch is "likely" (and conditional)
6258 // then we skip the delay slot on the fall-thru path
6259 if(likely[i]) {
6260 if(i<slen-1) {
6261 u&=unneeded_reg[i+2];
6262 uu&=unneeded_reg_upper[i+2];
bedfea38 6263 gte_u&=gte_unneeded[i+2];
57871462 6264 }
6265 else
6266 {
6267 u=1;
6268 uu=1;
0ff8c62c 6269 gte_u=gte_u_unknown;
57871462 6270 }
6271 }
6272 }
6273 else
6274 {
6275 // Internal branch, flag target
6276 bt[(ba[i]-start)>>2]=1;
6277 if(ba[i]<=start+i*4) {
6278 // Backward branch
6279 if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000)
6280 {
6281 // Unconditional branch
6282 temp_u=1;temp_uu=1;
bedfea38 6283 temp_gte_u=0;
57871462 6284 } else {
6285 // Conditional branch (not taken case)
6286 temp_u=unneeded_reg[i+2];
6287 temp_uu=unneeded_reg_upper[i+2];
bedfea38 6288 temp_gte_u&=gte_unneeded[i+2];
57871462 6289 }
6290 // Merge in delay slot
6291 tdep=(~temp_uu>>rt1[i+1])&1;
6292 temp_u|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
6293 temp_uu|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
6294 temp_u&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
6295 temp_uu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
6296 temp_uu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
6297 temp_u|=1;temp_uu|=1;
bedfea38 6298 temp_gte_u|=gte_rt[i+1];
6299 temp_gte_u&=~gte_rs[i+1];
57871462 6300 // If branch is "likely" (and conditional)
6301 // then we skip the delay slot on the fall-thru path
6302 if(likely[i]) {
6303 if(i<slen-1) {
6304 temp_u&=unneeded_reg[i+2];
6305 temp_uu&=unneeded_reg_upper[i+2];
bedfea38 6306 temp_gte_u&=gte_unneeded[i+2];
57871462 6307 }
6308 else
6309 {
6310 temp_u=1;
6311 temp_uu=1;
0ff8c62c 6312 temp_gte_u=gte_u_unknown;
57871462 6313 }
6314 }
6315 tdep=(~temp_uu>>rt1[i])&1;
6316 temp_u|=(1LL<<rt1[i])|(1LL<<rt2[i]);
6317 temp_uu|=(1LL<<rt1[i])|(1LL<<rt2[i]);
6318 temp_u&=~((1LL<<rs1[i])|(1LL<<rs2[i]));
6319 temp_uu&=~((1LL<<us1[i])|(1LL<<us2[i]));
6320 temp_uu&=~((tdep<<dep1[i])|(tdep<<dep2[i]));
6321 temp_u|=1;temp_uu|=1;
bedfea38 6322 temp_gte_u|=gte_rt[i];
6323 temp_gte_u&=~gte_rs[i];
57871462 6324 unneeded_reg[i]=temp_u;
6325 unneeded_reg_upper[i]=temp_uu;
bedfea38 6326 gte_unneeded[i]=temp_gte_u;
57871462 6327 // Only go three levels deep. This recursion can take an
6328 // excessive amount of time if there are a lot of nested loops.
6329 if(r<2) {
6330 unneeded_registers((ba[i]-start)>>2,i-1,r+1);
6331 }else{
6332 unneeded_reg[(ba[i]-start)>>2]=1;
6333 unneeded_reg_upper[(ba[i]-start)>>2]=1;
0ff8c62c 6334 gte_unneeded[(ba[i]-start)>>2]=gte_u_unknown;
57871462 6335 }
6336 } /*else*/ if(1) {
6337 if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000)
6338 {
6339 // Unconditional branch
6340 u=unneeded_reg[(ba[i]-start)>>2];
6341 uu=unneeded_reg_upper[(ba[i]-start)>>2];
bedfea38 6342 gte_u=gte_unneeded[(ba[i]-start)>>2];
57871462 6343 branch_unneeded_reg[i]=u;
6344 branch_unneeded_reg_upper[i]=uu;
6345 //u=1;
6346 //uu=1;
6347 //branch_unneeded_reg[i]=u;
6348 //branch_unneeded_reg_upper[i]=uu;
6349 // Merge in delay slot
6350 tdep=(~uu>>rt1[i+1])&1;
6351 u|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
6352 uu|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
6353 u&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
6354 uu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
6355 uu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
6356 u|=1;uu|=1;
bedfea38 6357 gte_u|=gte_rt[i+1];
6358 gte_u&=~gte_rs[i+1];
57871462 6359 } else {
6360 // Conditional branch
6361 b=unneeded_reg[(ba[i]-start)>>2];
6362 bu=unneeded_reg_upper[(ba[i]-start)>>2];
bedfea38 6363 gte_bu=gte_unneeded[(ba[i]-start)>>2];
57871462 6364 branch_unneeded_reg[i]=b;
6365 branch_unneeded_reg_upper[i]=bu;
6366 //b=1;
6367 //bu=1;
6368 //branch_unneeded_reg[i]=b;
6369 //branch_unneeded_reg_upper[i]=bu;
6370 // Branch delay slot
6371 tdep=(~uu>>rt1[i+1])&1;
6372 b|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
6373 bu|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
6374 b&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
6375 bu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
6376 bu&=~((tdep<<dep1[i+1])|(tdep<<dep2[i+1]));
6377 b|=1;bu|=1;
bedfea38 6378 gte_bu|=gte_rt[i+1];
6379 gte_bu&=~gte_rs[i+1];
57871462 6380 // If branch is "likely" then we skip the
6381 // delay slot on the fall-thru path
6382 if(likely[i]) {
6383 u=b;
6384 uu=bu;
bedfea38 6385 gte_u=gte_bu;
57871462 6386 if(i<slen-1) {
6387 u&=unneeded_reg[i+2];
6388 uu&=unneeded_reg_upper[i+2];
bedfea38 6389 gte_u&=gte_unneeded[i+2];
57871462 6390 //u=1;
6391 //uu=1;
6392 }
6393 } else {
6394 u&=b;
6395 uu&=bu;
bedfea38 6396 gte_u&=gte_bu;
57871462 6397 //u=1;
6398 //uu=1;
6399 }
6400 if(i<slen-1) {
6401 branch_unneeded_reg[i]&=unneeded_reg[i+2];
6402 branch_unneeded_reg_upper[i]&=unneeded_reg_upper[i+2];
6403 //branch_unneeded_reg[i]=1;
6404 //branch_unneeded_reg_upper[i]=1;
6405 } else {
6406 branch_unneeded_reg[i]=1;
6407 branch_unneeded_reg_upper[i]=1;
6408 }
6409 }
6410 }
6411 }
6412 }
1e973cb0 6413 else if(itype[i]==SYSCALL||itype[i]==HLECALL||itype[i]==INTCALL)
57871462 6414 {
6415 // SYSCALL instruction (software interrupt)
6416 u=1;
6417 uu=1;
6418 }
6419 else if(itype[i]==COP0 && (source[i]&0x3f)==0x18)
6420 {
6421 // ERET instruction (return from interrupt)
6422 u=1;
6423 uu=1;
6424 }
6425 //u=uu=1; // DEBUG
6426 tdep=(~uu>>rt1[i])&1;
6427 // Written registers are unneeded
6428 u|=1LL<<rt1[i];
6429 u|=1LL<<rt2[i];
6430 uu|=1LL<<rt1[i];
6431 uu|=1LL<<rt2[i];
bedfea38 6432 gte_u|=gte_rt[i];
57871462 6433 // Accessed registers are needed
6434 u&=~(1LL<<rs1[i]);
6435 u&=~(1LL<<rs2[i]);
6436 uu&=~(1LL<<us1[i]);
6437 uu&=~(1LL<<us2[i]);
bedfea38 6438 gte_u&=~gte_rs[i];
eaa11918 6439 if(gte_rs[i]&&rt1[i]&&(unneeded_reg[i+1]&(1ll<<rt1[i])))
cbbd8dd7 6440 gte_u|=gte_rs[i]&gte_unneeded[i+1]; // MFC2/CFC2 to dead register, unneeded
57871462 6441 // Source-target dependencies
6442 uu&=~(tdep<<dep1[i]);
6443 uu&=~(tdep<<dep2[i]);
6444 // R0 is always unneeded
6445 u|=1;uu|=1;
6446 // Save it
6447 unneeded_reg[i]=u;
6448 unneeded_reg_upper[i]=uu;
bedfea38 6449 gte_unneeded[i]=gte_u;
57871462 6450 /*
6451 printf("ur (%d,%d) %x: ",istart,iend,start+i*4);
6452 printf("U:");
6453 int r;
6454 for(r=1;r<=CCREG;r++) {
6455 if((unneeded_reg[i]>>r)&1) {
6456 if(r==HIREG) printf(" HI");
6457 else if(r==LOREG) printf(" LO");
6458 else printf(" r%d",r);
6459 }
6460 }
6461 printf(" UU:");
6462 for(r=1;r<=CCREG;r++) {
6463 if(((unneeded_reg_upper[i]&~unneeded_reg[i])>>r)&1) {
6464 if(r==HIREG) printf(" HI");
6465 else if(r==LOREG) printf(" LO");
6466 else printf(" r%d",r);
6467 }
6468 }
6469 printf("\n");*/
6470 }
252c20fc 6471 for (i=iend;i>=istart;i--)
6472 {
6473 unneeded_reg_upper[i]=branch_unneeded_reg_upper[i]=-1LL;
6474 }
57871462 6475}
6476
71e490c5 6477// Write back dirty registers as soon as we will no longer modify them,
6478// so that we don't end up with lots of writes at the branches.
6479void clean_registers(int istart,int iend,int wr)
57871462 6480{
71e490c5 6481 int i;
6482 int r;
6483 u_int will_dirty_i,will_dirty_next,temp_will_dirty;
6484 u_int wont_dirty_i,wont_dirty_next,temp_wont_dirty;
6485 if(iend==slen-1) {
6486 will_dirty_i=will_dirty_next=0;
6487 wont_dirty_i=wont_dirty_next=0;
6488 }else{
6489 will_dirty_i=will_dirty_next=will_dirty[iend+1];
6490 wont_dirty_i=wont_dirty_next=wont_dirty[iend+1];
6491 }
6492 for (i=iend;i>=istart;i--)
57871462 6493 {
71e490c5 6494 if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
57871462 6495 {
71e490c5 6496 if(ba[i]<start || ba[i]>=(start+slen*4))
57871462 6497 {
71e490c5 6498 // Branch out of this block, flush all regs
6499 if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000)
57871462 6500 {
6501 // Unconditional branch
6502 will_dirty_i=0;
6503 wont_dirty_i=0;
6504 // Merge in delay slot (will dirty)
6505 for(r=0;r<HOST_REGS;r++) {
6506 if(r!=EXCLUDE_REG) {
6507 if((branch_regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
6508 if((branch_regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
6509 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
6510 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
6511 if((branch_regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
6512 if(branch_regs[i].regmap[r]<=0) will_dirty_i&=~(1<<r);
6513 if(branch_regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
6514 if((regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
6515 if((regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
6516 if((regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
6517 if((regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
6518 if((regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
6519 if(regs[i].regmap[r]<=0) will_dirty_i&=~(1<<r);
6520 if(regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
6521 }
6522 }
6523 }
6524 else
6525 {
6526 // Conditional branch
6527 will_dirty_i=0;
6528 wont_dirty_i=wont_dirty_next;
6529 // Merge in delay slot (will dirty)
6530 for(r=0;r<HOST_REGS;r++) {
6531 if(r!=EXCLUDE_REG) {
6532 if(!likely[i]) {
6533 // Might not dirty if likely branch is not taken
6534 if((branch_regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
6535 if((branch_regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
6536 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
6537 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
6538 if((branch_regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
6539 if(branch_regs[i].regmap[r]==0) will_dirty_i&=~(1<<r);
6540 if(branch_regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
6541 //if((regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
6542 //if((regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
6543 if((regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
6544 if((regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
6545 if((regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
6546 if(regs[i].regmap[r]<=0) will_dirty_i&=~(1<<r);
6547 if(regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
6548 }
6549 }
6550 }
6551 }
6552 // Merge in delay slot (wont dirty)
6553 for(r=0;r<HOST_REGS;r++) {
6554 if(r!=EXCLUDE_REG) {
6555 if((regs[i].regmap[r]&63)==rt1[i]) wont_dirty_i|=1<<r;
6556 if((regs[i].regmap[r]&63)==rt2[i]) wont_dirty_i|=1<<r;
6557 if((regs[i].regmap[r]&63)==rt1[i+1]) wont_dirty_i|=1<<r;
6558 if((regs[i].regmap[r]&63)==rt2[i+1]) wont_dirty_i|=1<<r;
6559 if(regs[i].regmap[r]==CCREG) wont_dirty_i|=1<<r;
6560 if((branch_regs[i].regmap[r]&63)==rt1[i]) wont_dirty_i|=1<<r;
6561 if((branch_regs[i].regmap[r]&63)==rt2[i]) wont_dirty_i|=1<<r;
6562 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) wont_dirty_i|=1<<r;
6563 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) wont_dirty_i|=1<<r;
6564 if(branch_regs[i].regmap[r]==CCREG) wont_dirty_i|=1<<r;
6565 }
6566 }
6567 if(wr) {
6568 #ifndef DESTRUCTIVE_WRITEBACK
6569 branch_regs[i].dirty&=wont_dirty_i;
6570 #endif
6571 branch_regs[i].dirty|=will_dirty_i;
6572 }
6573 }
6574 else
6575 {
6576 // Internal branch
6577 if(ba[i]<=start+i*4) {
6578 // Backward branch
6579 if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000)
6580 {
6581 // Unconditional branch
6582 temp_will_dirty=0;
6583 temp_wont_dirty=0;
6584 // Merge in delay slot (will dirty)
6585 for(r=0;r<HOST_REGS;r++) {
6586 if(r!=EXCLUDE_REG) {
6587 if((branch_regs[i].regmap[r]&63)==rt1[i]) temp_will_dirty|=1<<r;
6588 if((branch_regs[i].regmap[r]&63)==rt2[i]) temp_will_dirty|=1<<r;
6589 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) temp_will_dirty|=1<<r;
6590 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) temp_will_dirty|=1<<r;
6591 if((branch_regs[i].regmap[r]&63)>33) temp_will_dirty&=~(1<<r);
6592 if(branch_regs[i].regmap[r]<=0) temp_will_dirty&=~(1<<r);
6593 if(branch_regs[i].regmap[r]==CCREG) temp_will_dirty|=1<<r;
6594 if((regs[i].regmap[r]&63)==rt1[i]) temp_will_dirty|=1<<r;
6595 if((regs[i].regmap[r]&63)==rt2[i]) temp_will_dirty|=1<<r;
6596 if((regs[i].regmap[r]&63)==rt1[i+1]) temp_will_dirty|=1<<r;
6597 if((regs[i].regmap[r]&63)==rt2[i+1]) temp_will_dirty|=1<<r;
6598 if((regs[i].regmap[r]&63)>33) temp_will_dirty&=~(1<<r);
6599 if(regs[i].regmap[r]<=0) temp_will_dirty&=~(1<<r);
6600 if(regs[i].regmap[r]==CCREG) temp_will_dirty|=1<<r;
6601 }
6602 }
6603 } else {
6604 // Conditional branch (not taken case)
6605 temp_will_dirty=will_dirty_next;
6606 temp_wont_dirty=wont_dirty_next;
6607 // Merge in delay slot (will dirty)
6608 for(r=0;r<HOST_REGS;r++) {
6609 if(r!=EXCLUDE_REG) {
6610 if(!likely[i]) {
6611 // Will not dirty if likely branch is not taken
6612 if((branch_regs[i].regmap[r]&63)==rt1[i]) temp_will_dirty|=1<<r;
6613 if((branch_regs[i].regmap[r]&63)==rt2[i]) temp_will_dirty|=1<<r;
6614 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) temp_will_dirty|=1<<r;
6615 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) temp_will_dirty|=1<<r;
6616 if((branch_regs[i].regmap[r]&63)>33) temp_will_dirty&=~(1<<r);
6617 if(branch_regs[i].regmap[r]==0) temp_will_dirty&=~(1<<r);
6618 if(branch_regs[i].regmap[r]==CCREG) temp_will_dirty|=1<<r;
6619 //if((regs[i].regmap[r]&63)==rt1[i]) temp_will_dirty|=1<<r;
6620 //if((regs[i].regmap[r]&63)==rt2[i]) temp_will_dirty|=1<<r;
6621 if((regs[i].regmap[r]&63)==rt1[i+1]) temp_will_dirty|=1<<r;
6622 if((regs[i].regmap[r]&63)==rt2[i+1]) temp_will_dirty|=1<<r;
6623 if((regs[i].regmap[r]&63)>33) temp_will_dirty&=~(1<<r);
6624 if(regs[i].regmap[r]<=0) temp_will_dirty&=~(1<<r);
6625 if(regs[i].regmap[r]==CCREG) temp_will_dirty|=1<<r;
6626 }
6627 }
6628 }
6629 }
6630 // Merge in delay slot (wont dirty)
6631 for(r=0;r<HOST_REGS;r++) {
6632 if(r!=EXCLUDE_REG) {
6633 if((regs[i].regmap[r]&63)==rt1[i]) temp_wont_dirty|=1<<r;
6634 if((regs[i].regmap[r]&63)==rt2[i]) temp_wont_dirty|=1<<r;
6635 if((regs[i].regmap[r]&63)==rt1[i+1]) temp_wont_dirty|=1<<r;
6636 if((regs[i].regmap[r]&63)==rt2[i+1]) temp_wont_dirty|=1<<r;
6637 if(regs[i].regmap[r]==CCREG) temp_wont_dirty|=1<<r;
6638 if((branch_regs[i].regmap[r]&63)==rt1[i]) temp_wont_dirty|=1<<r;
6639 if((branch_regs[i].regmap[r]&63)==rt2[i]) temp_wont_dirty|=1<<r;
6640 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) temp_wont_dirty|=1<<r;
6641 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) temp_wont_dirty|=1<<r;
6642 if(branch_regs[i].regmap[r]==CCREG) temp_wont_dirty|=1<<r;
6643 }
6644 }
6645 // Deal with changed mappings
6646 if(i<iend) {
6647 for(r=0;r<HOST_REGS;r++) {
6648 if(r!=EXCLUDE_REG) {
6649 if(regs[i].regmap[r]!=regmap_pre[i][r]) {
6650 temp_will_dirty&=~(1<<r);
6651 temp_wont_dirty&=~(1<<r);
6652 if((regmap_pre[i][r]&63)>0 && (regmap_pre[i][r]&63)<34) {
6653 temp_will_dirty|=((unneeded_reg[i]>>(regmap_pre[i][r]&63))&1)<<r;
6654 temp_wont_dirty|=((unneeded_reg[i]>>(regmap_pre[i][r]&63))&1)<<r;
6655 } else {
6656 temp_will_dirty|=1<<r;
6657 temp_wont_dirty|=1<<r;
6658 }
6659 }
6660 }
6661 }
6662 }
6663 if(wr) {
6664 will_dirty[i]=temp_will_dirty;
6665 wont_dirty[i]=temp_wont_dirty;
6666 clean_registers((ba[i]-start)>>2,i-1,0);
6667 }else{
6668 // Limit recursion. It can take an excessive amount
6669 // of time if there are a lot of nested loops.
6670 will_dirty[(ba[i]-start)>>2]=0;
6671 wont_dirty[(ba[i]-start)>>2]=-1;
6672 }
6673 }
6674 /*else*/ if(1)
6675 {
6676 if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000)
6677 {
6678 // Unconditional branch
6679 will_dirty_i=0;
6680 wont_dirty_i=0;
6681 //if(ba[i]>start+i*4) { // Disable recursion (for debugging)
6682 for(r=0;r<HOST_REGS;r++) {
6683 if(r!=EXCLUDE_REG) {
6684 if(branch_regs[i].regmap[r]==regs[(ba[i]-start)>>2].regmap_entry[r]) {
6685 will_dirty_i|=will_dirty[(ba[i]-start)>>2]&(1<<r);
6686 wont_dirty_i|=wont_dirty[(ba[i]-start)>>2]&(1<<r);
6687 }
e3234ecf 6688 if(branch_regs[i].regmap[r]>=0) {
6689 will_dirty_i|=((unneeded_reg[(ba[i]-start)>>2]>>(branch_regs[i].regmap[r]&63))&1)<<r;
6690 wont_dirty_i|=((unneeded_reg[(ba[i]-start)>>2]>>(branch_regs[i].regmap[r]&63))&1)<<r;
6691 }
57871462 6692 }
6693 }
6694 //}
6695 // Merge in delay slot
6696 for(r=0;r<HOST_REGS;r++) {
6697 if(r!=EXCLUDE_REG) {
6698 if((branch_regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
6699 if((branch_regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
6700 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
6701 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
6702 if((branch_regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
6703 if(branch_regs[i].regmap[r]<=0) will_dirty_i&=~(1<<r);
6704 if(branch_regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
6705 if((regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
6706 if((regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
6707 if((regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
6708 if((regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
6709 if((regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
6710 if(regs[i].regmap[r]<=0) will_dirty_i&=~(1<<r);
6711 if(regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
6712 }
6713 }
6714 } else {
6715 // Conditional branch
6716 will_dirty_i=will_dirty_next;
6717 wont_dirty_i=wont_dirty_next;
6718 //if(ba[i]>start+i*4) { // Disable recursion (for debugging)
6719 for(r=0;r<HOST_REGS;r++) {
6720 if(r!=EXCLUDE_REG) {
e3234ecf 6721 signed char target_reg=branch_regs[i].regmap[r];
6722 if(target_reg==regs[(ba[i]-start)>>2].regmap_entry[r]) {
57871462 6723 will_dirty_i&=will_dirty[(ba[i]-start)>>2]&(1<<r);
6724 wont_dirty_i|=wont_dirty[(ba[i]-start)>>2]&(1<<r);
6725 }
e3234ecf 6726 else if(target_reg>=0) {
6727 will_dirty_i&=((unneeded_reg[(ba[i]-start)>>2]>>(target_reg&63))&1)<<r;
6728 wont_dirty_i|=((unneeded_reg[(ba[i]-start)>>2]>>(target_reg&63))&1)<<r;
57871462 6729 }
6730 // Treat delay slot as part of branch too
6731 /*if(regs[i+1].regmap[r]==regs[(ba[i]-start)>>2].regmap_entry[r]) {
6732 will_dirty[i+1]&=will_dirty[(ba[i]-start)>>2]&(1<<r);
6733 wont_dirty[i+1]|=wont_dirty[(ba[i]-start)>>2]&(1<<r);
6734 }
6735 else
6736 {
6737 will_dirty[i+1]&=~(1<<r);
6738 }*/
6739 }
6740 }
6741 //}
6742 // Merge in delay slot
6743 for(r=0;r<HOST_REGS;r++) {
6744 if(r!=EXCLUDE_REG) {
6745 if(!likely[i]) {
6746 // Might not dirty if likely branch is not taken
6747 if((branch_regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
6748 if((branch_regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
6749 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
6750 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
6751 if((branch_regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
6752 if(branch_regs[i].regmap[r]<=0) will_dirty_i&=~(1<<r);
6753 if(branch_regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
6754 //if((regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
6755 //if((regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
6756 if((regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
6757 if((regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
6758 if((regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
6759 if(regs[i].regmap[r]<=0) will_dirty_i&=~(1<<r);
6760 if(regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
6761 }
6762 }
6763 }
6764 }
e3234ecf 6765 // Merge in delay slot (won't dirty)
57871462 6766 for(r=0;r<HOST_REGS;r++) {
6767 if(r!=EXCLUDE_REG) {
6768 if((regs[i].regmap[r]&63)==rt1[i]) wont_dirty_i|=1<<r;
6769 if((regs[i].regmap[r]&63)==rt2[i]) wont_dirty_i|=1<<r;
6770 if((regs[i].regmap[r]&63)==rt1[i+1]) wont_dirty_i|=1<<r;
6771 if((regs[i].regmap[r]&63)==rt2[i+1]) wont_dirty_i|=1<<r;
6772 if(regs[i].regmap[r]==CCREG) wont_dirty_i|=1<<r;
6773 if((branch_regs[i].regmap[r]&63)==rt1[i]) wont_dirty_i|=1<<r;
6774 if((branch_regs[i].regmap[r]&63)==rt2[i]) wont_dirty_i|=1<<r;
6775 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) wont_dirty_i|=1<<r;
6776 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) wont_dirty_i|=1<<r;
6777 if(branch_regs[i].regmap[r]==CCREG) wont_dirty_i|=1<<r;
6778 }
6779 }
6780 if(wr) {
6781 #ifndef DESTRUCTIVE_WRITEBACK
6782 branch_regs[i].dirty&=wont_dirty_i;
6783 #endif
6784 branch_regs[i].dirty|=will_dirty_i;
6785 }
6786 }
6787 }
6788 }
1e973cb0 6789 else if(itype[i]==SYSCALL||itype[i]==HLECALL||itype[i]==INTCALL)
57871462 6790 {
6791 // SYSCALL instruction (software interrupt)
6792 will_dirty_i=0;
6793 wont_dirty_i=0;
6794 }
6795 else if(itype[i]==COP0 && (source[i]&0x3f)==0x18)
6796 {
6797 // ERET instruction (return from interrupt)
6798 will_dirty_i=0;
6799 wont_dirty_i=0;
6800 }
6801 will_dirty_next=will_dirty_i;
6802 wont_dirty_next=wont_dirty_i;
6803 for(r=0;r<HOST_REGS;r++) {
6804 if(r!=EXCLUDE_REG) {
6805 if((regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
6806 if((regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
6807 if((regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
6808 if(regs[i].regmap[r]<=0) will_dirty_i&=~(1<<r);
6809 if(regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
6810 if((regs[i].regmap[r]&63)==rt1[i]) wont_dirty_i|=1<<r;
6811 if((regs[i].regmap[r]&63)==rt2[i]) wont_dirty_i|=1<<r;
6812 if(regs[i].regmap[r]==CCREG) wont_dirty_i|=1<<r;
6813 if(i>istart) {
9f51b4b9 6814 if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=FJUMP)
57871462 6815 {
6816 // Don't store a register immediately after writing it,
6817 // may prevent dual-issue.
6818 if((regs[i].regmap[r]&63)==rt1[i-1]) wont_dirty_i|=1<<r;
6819 if((regs[i].regmap[r]&63)==rt2[i-1]) wont_dirty_i|=1<<r;
6820 }
6821 }
6822 }
6823 }
6824 // Save it
6825 will_dirty[i]=will_dirty_i;
6826 wont_dirty[i]=wont_dirty_i;
6827 // Mark registers that won't be dirtied as not dirty
6828 if(wr) {
6829 /*printf("wr (%d,%d) %x will:",istart,iend,start+i*4);
6830 for(r=0;r<HOST_REGS;r++) {
6831 if((will_dirty_i>>r)&1) {
6832 printf(" r%d",r);
6833 }
6834 }
6835 printf("\n");*/
6836
6837 //if(i==istart||(itype[i-1]!=RJUMP&&itype[i-1]!=UJUMP&&itype[i-1]!=CJUMP&&itype[i-1]!=SJUMP&&itype[i-1]!=FJUMP)) {
6838 regs[i].dirty|=will_dirty_i;
6839 #ifndef DESTRUCTIVE_WRITEBACK
6840 regs[i].dirty&=wont_dirty_i;
6841 if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
6842 {
6843 if(i<iend-1&&itype[i]!=RJUMP&&itype[i]!=UJUMP&&(source[i]>>16)!=0x1000) {
6844 for(r=0;r<HOST_REGS;r++) {
6845 if(r!=EXCLUDE_REG) {
6846 if(regs[i].regmap[r]==regmap_pre[i+2][r]) {
6847 regs[i+2].wasdirty&=wont_dirty_i|~(1<<r);
581335b0 6848 }else {/*printf("i: %x (%d) mismatch(+2): %d\n",start+i*4,i,r);assert(!((wont_dirty_i>>r)&1));*/}
57871462 6849 }
6850 }
6851 }
6852 }
6853 else
6854 {
6855 if(i<iend) {
6856 for(r=0;r<HOST_REGS;r++) {
6857 if(r!=EXCLUDE_REG) {
6858 if(regs[i].regmap[r]==regmap_pre[i+1][r]) {
6859 regs[i+1].wasdirty&=wont_dirty_i|~(1<<r);
581335b0 6860 }else {/*printf("i: %x (%d) mismatch(+1): %d\n",start+i*4,i,r);assert(!((wont_dirty_i>>r)&1));*/}
57871462 6861 }
6862 }
6863 }
6864 }
6865 #endif
6866 //}
6867 }
6868 // Deal with changed mappings
6869 temp_will_dirty=will_dirty_i;
6870 temp_wont_dirty=wont_dirty_i;
6871 for(r=0;r<HOST_REGS;r++) {
6872 if(r!=EXCLUDE_REG) {
6873 int nr;
6874 if(regs[i].regmap[r]==regmap_pre[i][r]) {
6875 if(wr) {
6876 #ifndef DESTRUCTIVE_WRITEBACK
6877 regs[i].wasdirty&=wont_dirty_i|~(1<<r);
6878 #endif
6879 regs[i].wasdirty|=will_dirty_i&(1<<r);
6880 }
6881 }
f776eb14 6882 else if(regmap_pre[i][r]>=0&&(nr=get_reg(regs[i].regmap,regmap_pre[i][r]))>=0) {
57871462 6883 // Register moved to a different register
6884 will_dirty_i&=~(1<<r);
6885 wont_dirty_i&=~(1<<r);
6886 will_dirty_i|=((temp_will_dirty>>nr)&1)<<r;
6887 wont_dirty_i|=((temp_wont_dirty>>nr)&1)<<r;
6888 if(wr) {
6889 #ifndef DESTRUCTIVE_WRITEBACK
6890 regs[i].wasdirty&=wont_dirty_i|~(1<<r);
6891 #endif
6892 regs[i].wasdirty|=will_dirty_i&(1<<r);
6893 }
6894 }
6895 else {
6896 will_dirty_i&=~(1<<r);
6897 wont_dirty_i&=~(1<<r);
6898 if((regmap_pre[i][r]&63)>0 && (regmap_pre[i][r]&63)<34) {
6899 will_dirty_i|=((unneeded_reg[i]>>(regmap_pre[i][r]&63))&1)<<r;
6900 wont_dirty_i|=((unneeded_reg[i]>>(regmap_pre[i][r]&63))&1)<<r;
6901 } else {
6902 wont_dirty_i|=1<<r;
581335b0 6903 /*printf("i: %x (%d) mismatch: %d\n",start+i*4,i,r);assert(!((will_dirty>>r)&1));*/
57871462 6904 }
6905 }
6906 }
6907 }
6908 }
6909}
6910
4600ba03 6911#ifdef DISASM
57871462 6912 /* disassembly */
6913void disassemble_inst(int i)
6914{
6915 if (bt[i]) printf("*"); else printf(" ");
6916 switch(itype[i]) {
6917 case UJUMP:
6918 printf (" %x: %s %8x\n",start+i*4,insn[i],ba[i]);break;
6919 case CJUMP:
6920 printf (" %x: %s r%d,r%d,%8x\n",start+i*4,insn[i],rs1[i],rs2[i],i?start+i*4+4+((signed int)((unsigned int)source[i]<<16)>>14):*ba);break;
6921 case SJUMP:
6922 printf (" %x: %s r%d,%8x\n",start+i*4,insn[i],rs1[i],start+i*4+4+((signed int)((unsigned int)source[i]<<16)>>14));break;
6923 case FJUMP:
6924 printf (" %x: %s %8x\n",start+i*4,insn[i],ba[i]);break;
6925 case RJUMP:
74426039 6926 if (opcode[i]==0x9&&rt1[i]!=31)
5067f341 6927 printf (" %x: %s r%d,r%d\n",start+i*4,insn[i],rt1[i],rs1[i]);
6928 else
6929 printf (" %x: %s r%d\n",start+i*4,insn[i],rs1[i]);
6930 break;
57871462 6931 case SPAN:
6932 printf (" %x: %s (pagespan) r%d,r%d,%8x\n",start+i*4,insn[i],rs1[i],rs2[i],ba[i]);break;
6933 case IMM16:
6934 if(opcode[i]==0xf) //LUI
6935 printf (" %x: %s r%d,%4x0000\n",start+i*4,insn[i],rt1[i],imm[i]&0xffff);
6936 else
6937 printf (" %x: %s r%d,r%d,%d\n",start+i*4,insn[i],rt1[i],rs1[i],imm[i]);
6938 break;
6939 case LOAD:
6940 case LOADLR:
6941 printf (" %x: %s r%d,r%d+%x\n",start+i*4,insn[i],rt1[i],rs1[i],imm[i]);
6942 break;
6943 case STORE:
6944 case STORELR:
6945 printf (" %x: %s r%d,r%d+%x\n",start+i*4,insn[i],rs2[i],rs1[i],imm[i]);
6946 break;
6947 case ALU:
6948 case SHIFT:
6949 printf (" %x: %s r%d,r%d,r%d\n",start+i*4,insn[i],rt1[i],rs1[i],rs2[i]);
6950 break;
6951 case MULTDIV:
6952 printf (" %x: %s r%d,r%d\n",start+i*4,insn[i],rs1[i],rs2[i]);
6953 break;
6954 case SHIFTIMM:
6955 printf (" %x: %s r%d,r%d,%d\n",start+i*4,insn[i],rt1[i],rs1[i],imm[i]);
6956 break;
6957 case MOV:
6958 if((opcode2[i]&0x1d)==0x10)
6959 printf (" %x: %s r%d\n",start+i*4,insn[i],rt1[i]);
6960 else if((opcode2[i]&0x1d)==0x11)
6961 printf (" %x: %s r%d\n",start+i*4,insn[i],rs1[i]);
6962 else
6963 printf (" %x: %s\n",start+i*4,insn[i]);
6964 break;
6965 case COP0:
6966 if(opcode2[i]==0)
6967 printf (" %x: %s r%d,cpr0[%d]\n",start+i*4,insn[i],rt1[i],(source[i]>>11)&0x1f); // MFC0
6968 else if(opcode2[i]==4)
6969 printf (" %x: %s r%d,cpr0[%d]\n",start+i*4,insn[i],rs1[i],(source[i]>>11)&0x1f); // MTC0
6970 else printf (" %x: %s\n",start+i*4,insn[i]);
6971 break;
6972 case COP1:
6973 if(opcode2[i]<3)
6974 printf (" %x: %s r%d,cpr1[%d]\n",start+i*4,insn[i],rt1[i],(source[i]>>11)&0x1f); // MFC1
6975 else if(opcode2[i]>3)
6976 printf (" %x: %s r%d,cpr1[%d]\n",start+i*4,insn[i],rs1[i],(source[i]>>11)&0x1f); // MTC1
6977 else printf (" %x: %s\n",start+i*4,insn[i]);
6978 break;
b9b61529 6979 case COP2:
6980 if(opcode2[i]<3)
6981 printf (" %x: %s r%d,cpr2[%d]\n",start+i*4,insn[i],rt1[i],(source[i]>>11)&0x1f); // MFC2
6982 else if(opcode2[i]>3)
6983 printf (" %x: %s r%d,cpr2[%d]\n",start+i*4,insn[i],rs1[i],(source[i]>>11)&0x1f); // MTC2
6984 else printf (" %x: %s\n",start+i*4,insn[i]);
6985 break;
57871462 6986 case C1LS:
6987 printf (" %x: %s cpr1[%d],r%d+%x\n",start+i*4,insn[i],(source[i]>>16)&0x1f,rs1[i],imm[i]);
6988 break;
b9b61529 6989 case C2LS:
6990 printf (" %x: %s cpr2[%d],r%d+%x\n",start+i*4,insn[i],(source[i]>>16)&0x1f,rs1[i],imm[i]);
6991 break;
1e973cb0 6992 case INTCALL:
6993 printf (" %x: %s (INTCALL)\n",start+i*4,insn[i]);
6994 break;
57871462 6995 default:
6996 //printf (" %s %8x\n",insn[i],source[i]);
6997 printf (" %x: %s\n",start+i*4,insn[i]);
6998 }
6999}
4600ba03 7000#else
7001static void disassemble_inst(int i) {}
7002#endif // DISASM
57871462 7003
d848b60a 7004#define DRC_TEST_VAL 0x74657374
7005
7006static int new_dynarec_test(void)
7007{
7008 int (*testfunc)(void) = (void *)out;
d148d265 7009 void *beginning;
d848b60a 7010 int ret;
d148d265 7011
7012 beginning = start_block();
d848b60a 7013 emit_movimm(DRC_TEST_VAL,0); // test
7014 emit_jmpreg(14);
7015 literal_pool(0);
d148d265 7016 end_block(beginning);
d848b60a 7017 SysPrintf("testing if we can run recompiled code..\n");
7018 ret = testfunc();
7019 if (ret == DRC_TEST_VAL)
7020 SysPrintf("test passed.\n");
7021 else
7022 SysPrintf("test failed: %08x\n", ret);
7023 out=(u_char *)BASE_ADDR;
7024 return ret == DRC_TEST_VAL;
7025}
7026
dc990066 7027// clear the state completely, instead of just marking
7028// things invalid like invalidate_all_pages() does
7029void new_dynarec_clear_full()
57871462 7030{
57871462 7031 int n;
35775df7 7032 out=(u_char *)BASE_ADDR;
7033 memset(invalid_code,1,sizeof(invalid_code));
7034 memset(hash_table,0xff,sizeof(hash_table));
57871462 7035 memset(mini_ht,-1,sizeof(mini_ht));
7036 memset(restore_candidate,0,sizeof(restore_candidate));
dc990066 7037 memset(shadow,0,sizeof(shadow));
57871462 7038 copy=shadow;
7039 expirep=16384; // Expiry pointer, +2 blocks
7040 pending_exception=0;
7041 literalcount=0;
57871462 7042 stop_after_jal=0;
9be4ba64 7043 inv_code_start=inv_code_end=~0;
57871462 7044 // TLB
dc990066 7045 for(n=0;n<4096;n++) ll_clear(jump_in+n);
7046 for(n=0;n<4096;n++) ll_clear(jump_out+n);
7047 for(n=0;n<4096;n++) ll_clear(jump_dirty+n);
7048}
7049
7050void new_dynarec_init()
7051{
d848b60a 7052 SysPrintf("Init new dynarec\n");
1e212a25 7053
7054 // allocate/prepare a buffer for translation cache
7055 // see assem_arm.h for some explanation
7056#if defined(BASE_ADDR_FIXED)
7057 if (mmap (translation_cache, 1 << TARGET_SIZE_2,
dc990066 7058 PROT_READ | PROT_WRITE | PROT_EXEC,
186935dc 7059 MAP_PRIVATE | MAP_ANONYMOUS,
1e212a25 7060 -1, 0) != translation_cache) {
7061 SysPrintf("mmap() failed: %s\n", strerror(errno));
7062 SysPrintf("disable BASE_ADDR_FIXED and recompile\n");
7063 abort();
7064 }
7065#elif defined(BASE_ADDR_DYNAMIC)
7066 #ifdef VITA
73081f23 7067 sceBlock = getVMBlock();//sceKernelAllocMemBlockForVM("code", 1 << TARGET_SIZE_2);
1e212a25 7068 if (sceBlock < 0)
7069 SysPrintf("sceKernelAllocMemBlockForVM failed\n");
7070 int ret = sceKernelGetMemBlockBase(sceBlock, (void **)&translation_cache);
7071 if (ret < 0)
7072 SysPrintf("sceKernelGetMemBlockBase failed\n");
73081f23 7073 sceClibPrintf("translation_cache = 0x%08X \n ", translation_cache);
1e212a25 7074 #else
7075 translation_cache = mmap (NULL, 1 << TARGET_SIZE_2,
7076 PROT_READ | PROT_WRITE | PROT_EXEC,
7077 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
7078 if (translation_cache == MAP_FAILED) {
d848b60a 7079 SysPrintf("mmap() failed: %s\n", strerror(errno));
1e212a25 7080 abort();
d848b60a 7081 }
1e212a25 7082 #endif
7083#else
7084 #ifndef NO_WRITE_EXEC
bdeade46 7085 // not all systems allow execute in data segment by default
7086 if (mprotect(out, 1<<TARGET_SIZE_2, PROT_READ | PROT_WRITE | PROT_EXEC) != 0)
d848b60a 7087 SysPrintf("mprotect() failed: %s\n", strerror(errno));
1e212a25 7088 #endif
dc990066 7089#endif
1e212a25 7090 out=(u_char *)BASE_ADDR;
2573466a 7091 cycle_multiplier=200;
dc990066 7092 new_dynarec_clear_full();
7093#ifdef HOST_IMM8
7094 // Copy this into local area so we don't have to put it in every literal pool
7095 invc_ptr=invalid_code;
7096#endif
57871462 7097 arch_init();
d848b60a 7098 new_dynarec_test();
a327ad27 7099#ifndef RAM_FIXED
7100 ram_offset=(u_int)rdram-0x80000000;
7101#endif
b105cf4f 7102 if (ram_offset!=0)
c43b5311 7103 SysPrintf("warning: RAM is not directly mapped, performance will suffer\n");
57871462 7104}
7105
7106void new_dynarec_cleanup()
7107{
7108 int n;
1e212a25 7109#if defined(BASE_ADDR_FIXED) || defined(BASE_ADDR_DYNAMIC)
7110 #ifdef VITA
73081f23
FJGG
7111 //sceKernelFreeMemBlock(sceBlock);
7112 //sceBlock = -1;
1e212a25 7113 #else
7114 if (munmap ((void *)BASE_ADDR, 1<<TARGET_SIZE_2) < 0)
7115 SysPrintf("munmap() failed\n");
bdeade46 7116 #endif
1e212a25 7117#endif
57871462 7118 for(n=0;n<4096;n++) ll_clear(jump_in+n);
7119 for(n=0;n<4096;n++) ll_clear(jump_out+n);
7120 for(n=0;n<4096;n++) ll_clear(jump_dirty+n);
7121 #ifdef ROM_COPY
c43b5311 7122 if (munmap (ROM_COPY, 67108864) < 0) {SysPrintf("munmap() failed\n");}
57871462 7123 #endif
7124}
7125
03f55e6b 7126static u_int *get_source_start(u_int addr, u_int *limit)
57871462 7127{
03f55e6b 7128 if (addr < 0x00200000 ||
7129 (0xa0000000 <= addr && addr < 0xa0200000)) {
7130 // used for BIOS calls mostly?
7131 *limit = (addr&0xa0000000)|0x00200000;
7132 return (u_int *)((u_int)rdram + (addr&0x1fffff));
7133 }
7134 else if (!Config.HLE && (
7135 /* (0x9fc00000 <= addr && addr < 0x9fc80000) ||*/
7136 (0xbfc00000 <= addr && addr < 0xbfc80000))) {
7137 // BIOS
7138 *limit = (addr & 0xfff00000) | 0x80000;
7139 return (u_int *)((u_int)psxR + (addr&0x7ffff));
7140 }
7141 else if (addr >= 0x80000000 && addr < 0x80000000+RAM_SIZE) {
7142 *limit = (addr & 0x80600000) + 0x00200000;
7143 return (u_int *)((u_int)rdram + (addr&0x1fffff));
7144 }
581335b0 7145 return NULL;
03f55e6b 7146}
7147
7148static u_int scan_for_ret(u_int addr)
7149{
7150 u_int limit = 0;
7151 u_int *mem;
7152
7153 mem = get_source_start(addr, &limit);
7154 if (mem == NULL)
7155 return addr;
7156
7157 if (limit > addr + 0x1000)
7158 limit = addr + 0x1000;
7159 for (; addr < limit; addr += 4, mem++) {
7160 if (*mem == 0x03e00008) // jr $ra
7161 return addr + 8;
57871462 7162 }
581335b0 7163 return addr;
03f55e6b 7164}
7165
7166struct savestate_block {
7167 uint32_t addr;
7168 uint32_t regflags;
7169};
7170
7171static int addr_cmp(const void *p1_, const void *p2_)
7172{
7173 const struct savestate_block *p1 = p1_, *p2 = p2_;
7174 return p1->addr - p2->addr;
7175}
7176
7177int new_dynarec_save_blocks(void *save, int size)
7178{
7179 struct savestate_block *blocks = save;
7180 int maxcount = size / sizeof(blocks[0]);
7181 struct savestate_block tmp_blocks[1024];
7182 struct ll_entry *head;
7183 int p, s, d, o, bcnt;
7184 u_int addr;
7185
7186 o = 0;
7187 for (p = 0; p < sizeof(jump_in) / sizeof(jump_in[0]); p++) {
7188 bcnt = 0;
7189 for (head = jump_in[p]; head != NULL; head = head->next) {
7190 tmp_blocks[bcnt].addr = head->vaddr;
7191 tmp_blocks[bcnt].regflags = head->reg_sv_flags;
7192 bcnt++;
7193 }
7194 if (bcnt < 1)
7195 continue;
7196 qsort(tmp_blocks, bcnt, sizeof(tmp_blocks[0]), addr_cmp);
7197
7198 addr = tmp_blocks[0].addr;
7199 for (s = d = 0; s < bcnt; s++) {
7200 if (tmp_blocks[s].addr < addr)
7201 continue;
7202 if (d == 0 || tmp_blocks[d-1].addr != tmp_blocks[s].addr)
7203 tmp_blocks[d++] = tmp_blocks[s];
7204 addr = scan_for_ret(tmp_blocks[s].addr);
7205 }
7206
7207 if (o + d > maxcount)
7208 d = maxcount - o;
7209 memcpy(&blocks[o], tmp_blocks, d * sizeof(blocks[0]));
7210 o += d;
7211 }
7212
7213 return o * sizeof(blocks[0]);
7214}
7215
7216void new_dynarec_load_blocks(const void *save, int size)
7217{
7218 const struct savestate_block *blocks = save;
7219 int count = size / sizeof(blocks[0]);
7220 u_int regs_save[32];
7221 uint32_t f;
7222 int i, b;
7223
7224 get_addr(psxRegs.pc);
7225
7226 // change GPRs for speculation to at least partially work..
7227 memcpy(regs_save, &psxRegs.GPR, sizeof(regs_save));
7228 for (i = 1; i < 32; i++)
7229 psxRegs.GPR.r[i] = 0x80000000;
7230
7231 for (b = 0; b < count; b++) {
7232 for (f = blocks[b].regflags, i = 0; f; f >>= 1, i++) {
7233 if (f & 1)
7234 psxRegs.GPR.r[i] = 0x1f800000;
7235 }
7236
7237 get_addr(blocks[b].addr);
7238
7239 for (f = blocks[b].regflags, i = 0; f; f >>= 1, i++) {
7240 if (f & 1)
7241 psxRegs.GPR.r[i] = 0x80000000;
7242 }
7243 }
7244
7245 memcpy(&psxRegs.GPR, regs_save, sizeof(regs_save));
7246}
7247
7248int new_recompile_block(int addr)
7249{
7250 u_int pagelimit = 0;
7251 u_int state_rflags = 0;
7252 int i;
7253
57871462 7254 assem_debug("NOTCOMPILED: addr = %x -> %x\n", (int)addr, (int)out);
7255 //printf("NOTCOMPILED: addr = %x -> %x\n", (int)addr, (int)out);
7256 //printf("TRACE: count=%d next=%d (compile %x)\n",Count,next_interupt,addr);
9f51b4b9 7257 //if(debug)
57871462 7258 //printf("TRACE: count=%d next=%d (checksum %x)\n",Count,next_interupt,mchecksum());
7259 //printf("fpu mapping=%x enabled=%x\n",(Status & 0x04000000)>>26,(Status & 0x20000000)>>29);
7260 /*if(Count>=312978186) {
7261 rlist();
7262 }*/
7263 //rlist();
03f55e6b 7264
7265 // this is just for speculation
7266 for (i = 1; i < 32; i++) {
7267 if ((psxRegs.GPR.r[i] & 0xffff0000) == 0x1f800000)
7268 state_rflags |= 1 << i;
7269 }
7270
57871462 7271 start = (u_int)addr&~3;
7272 //assert(((u_int)addr&1)==0);
2f546f9a 7273 new_dynarec_did_compile=1;
9ad4d757 7274 if (Config.HLE && start == 0x80001000) // hlecall
560e4a12 7275 {
7139f3c8 7276 // XXX: is this enough? Maybe check hleSoftCall?
d148d265 7277 void *beginning=start_block();
7139f3c8 7278 u_int page=get_page(start);
d148d265 7279
7139f3c8 7280 invalid_code[start>>12]=0;
7281 emit_movimm(start,0);
7282 emit_writeword(0,(int)&pcaddr);
bb5285ef 7283 emit_jmp((int)new_dyna_leave);
15776b68 7284 literal_pool(0);
d148d265 7285 end_block(beginning);
03f55e6b 7286 ll_add_flags(jump_in+page,start,state_rflags,(void *)beginning);
7139f3c8 7287 return 0;
7288 }
03f55e6b 7289
7290 source = get_source_start(start, &pagelimit);
7291 if (source == NULL) {
7292 SysPrintf("Compile at bogus memory address: %08x\n", addr);
57871462 7293 exit(1);
7294 }
7295
7296 /* Pass 1: disassemble */
7297 /* Pass 2: register dependencies, branch targets */
7298 /* Pass 3: register allocation */
7299 /* Pass 4: branch dependencies */
7300 /* Pass 5: pre-alloc */
7301 /* Pass 6: optimize clean/dirty state */
7302 /* Pass 7: flag 32-bit registers */
7303 /* Pass 8: assembly */
7304 /* Pass 9: linker */
7305 /* Pass 10: garbage collection / free memory */
7306
03f55e6b 7307 int j;
57871462 7308 int done=0;
7309 unsigned int type,op,op2;
7310
7311 //printf("addr = %x source = %x %x\n", addr,source,source[0]);
9f51b4b9 7312
57871462 7313 /* Pass 1 disassembly */
7314
7315 for(i=0;!done;i++) {
e1190b87 7316 bt[i]=0;likely[i]=0;ooo[i]=0;op2=0;
7317 minimum_free_regs[i]=0;
57871462 7318 opcode[i]=op=source[i]>>26;
7319 switch(op)
7320 {
7321 case 0x00: strcpy(insn[i],"special"); type=NI;
7322 op2=source[i]&0x3f;
7323 switch(op2)
7324 {
7325 case 0x00: strcpy(insn[i],"SLL"); type=SHIFTIMM; break;
7326 case 0x02: strcpy(insn[i],"SRL"); type=SHIFTIMM; break;
7327 case 0x03: strcpy(insn[i],"SRA"); type=SHIFTIMM; break;
7328 case 0x04: strcpy(insn[i],"SLLV"); type=SHIFT; break;
7329 case 0x06: strcpy(insn[i],"SRLV"); type=SHIFT; break;
7330 case 0x07: strcpy(insn[i],"SRAV"); type=SHIFT; break;
7331 case 0x08: strcpy(insn[i],"JR"); type=RJUMP; break;
7332 case 0x09: strcpy(insn[i],"JALR"); type=RJUMP; break;
7333 case 0x0C: strcpy(insn[i],"SYSCALL"); type=SYSCALL; break;
7334 case 0x0D: strcpy(insn[i],"BREAK"); type=OTHER; break;
7335 case 0x0F: strcpy(insn[i],"SYNC"); type=OTHER; break;
7336 case 0x10: strcpy(insn[i],"MFHI"); type=MOV; break;
7337 case 0x11: strcpy(insn[i],"MTHI"); type=MOV; break;
7338 case 0x12: strcpy(insn[i],"MFLO"); type=MOV; break;
7339 case 0x13: strcpy(insn[i],"MTLO"); type=MOV; break;
57871462 7340 case 0x18: strcpy(insn[i],"MULT"); type=MULTDIV; break;
7341 case 0x19: strcpy(insn[i],"MULTU"); type=MULTDIV; break;
7342 case 0x1A: strcpy(insn[i],"DIV"); type=MULTDIV; break;
7343 case 0x1B: strcpy(insn[i],"DIVU"); type=MULTDIV; break;
57871462 7344 case 0x20: strcpy(insn[i],"ADD"); type=ALU; break;
7345 case 0x21: strcpy(insn[i],"ADDU"); type=ALU; break;
7346 case 0x22: strcpy(insn[i],"SUB"); type=ALU; break;
7347 case 0x23: strcpy(insn[i],"SUBU"); type=ALU; break;
7348 case 0x24: strcpy(insn[i],"AND"); type=ALU; break;
7349 case 0x25: strcpy(insn[i],"OR"); type=ALU; break;
7350 case 0x26: strcpy(insn[i],"XOR"); type=ALU; break;
7351 case 0x27: strcpy(insn[i],"NOR"); type=ALU; break;
7352 case 0x2A: strcpy(insn[i],"SLT"); type=ALU; break;
7353 case 0x2B: strcpy(insn[i],"SLTU"); type=ALU; break;
57871462 7354 case 0x30: strcpy(insn[i],"TGE"); type=NI; break;
7355 case 0x31: strcpy(insn[i],"TGEU"); type=NI; break;
7356 case 0x32: strcpy(insn[i],"TLT"); type=NI; break;
7357 case 0x33: strcpy(insn[i],"TLTU"); type=NI; break;
7358 case 0x34: strcpy(insn[i],"TEQ"); type=NI; break;
7359 case 0x36: strcpy(insn[i],"TNE"); type=NI; break;
71e490c5 7360#if 0
7f2607ea 7361 case 0x14: strcpy(insn[i],"DSLLV"); type=SHIFT; break;
7362 case 0x16: strcpy(insn[i],"DSRLV"); type=SHIFT; break;
7363 case 0x17: strcpy(insn[i],"DSRAV"); type=SHIFT; break;
7364 case 0x1C: strcpy(insn[i],"DMULT"); type=MULTDIV; break;
7365 case 0x1D: strcpy(insn[i],"DMULTU"); type=MULTDIV; break;
7366 case 0x1E: strcpy(insn[i],"DDIV"); type=MULTDIV; break;
7367 case 0x1F: strcpy(insn[i],"DDIVU"); type=MULTDIV; break;
7368 case 0x2C: strcpy(insn[i],"DADD"); type=ALU; break;
7369 case 0x2D: strcpy(insn[i],"DADDU"); type=ALU; break;
7370 case 0x2E: strcpy(insn[i],"DSUB"); type=ALU; break;
7371 case 0x2F: strcpy(insn[i],"DSUBU"); type=ALU; break;
57871462 7372 case 0x38: strcpy(insn[i],"DSLL"); type=SHIFTIMM; break;
7373 case 0x3A: strcpy(insn[i],"DSRL"); type=SHIFTIMM; break;
7374 case 0x3B: strcpy(insn[i],"DSRA"); type=SHIFTIMM; break;
7375 case 0x3C: strcpy(insn[i],"DSLL32"); type=SHIFTIMM; break;
7376 case 0x3E: strcpy(insn[i],"DSRL32"); type=SHIFTIMM; break;
7377 case 0x3F: strcpy(insn[i],"DSRA32"); type=SHIFTIMM; break;
7f2607ea 7378#endif
57871462 7379 }
7380 break;
7381 case 0x01: strcpy(insn[i],"regimm"); type=NI;
7382 op2=(source[i]>>16)&0x1f;
7383 switch(op2)
7384 {
7385 case 0x00: strcpy(insn[i],"BLTZ"); type=SJUMP; break;
7386 case 0x01: strcpy(insn[i],"BGEZ"); type=SJUMP; break;
7387 case 0x02: strcpy(insn[i],"BLTZL"); type=SJUMP; break;
7388 case 0x03: strcpy(insn[i],"BGEZL"); type=SJUMP; break;
7389 case 0x08: strcpy(insn[i],"TGEI"); type=NI; break;
7390 case 0x09: strcpy(insn[i],"TGEIU"); type=NI; break;
7391 case 0x0A: strcpy(insn[i],"TLTI"); type=NI; break;
7392 case 0x0B: strcpy(insn[i],"TLTIU"); type=NI; break;
7393 case 0x0C: strcpy(insn[i],"TEQI"); type=NI; break;
7394 case 0x0E: strcpy(insn[i],"TNEI"); type=NI; break;
7395 case 0x10: strcpy(insn[i],"BLTZAL"); type=SJUMP; break;
7396 case 0x11: strcpy(insn[i],"BGEZAL"); type=SJUMP; break;
7397 case 0x12: strcpy(insn[i],"BLTZALL"); type=SJUMP; break;
7398 case 0x13: strcpy(insn[i],"BGEZALL"); type=SJUMP; break;
7399 }
7400 break;
7401 case 0x02: strcpy(insn[i],"J"); type=UJUMP; break;
7402 case 0x03: strcpy(insn[i],"JAL"); type=UJUMP; break;
7403 case 0x04: strcpy(insn[i],"BEQ"); type=CJUMP; break;
7404 case 0x05: strcpy(insn[i],"BNE"); type=CJUMP; break;
7405 case 0x06: strcpy(insn[i],"BLEZ"); type=CJUMP; break;
7406 case 0x07: strcpy(insn[i],"BGTZ"); type=CJUMP; break;
7407 case 0x08: strcpy(insn[i],"ADDI"); type=IMM16; break;
7408 case 0x09: strcpy(insn[i],"ADDIU"); type=IMM16; break;
7409 case 0x0A: strcpy(insn[i],"SLTI"); type=IMM16; break;
7410 case 0x0B: strcpy(insn[i],"SLTIU"); type=IMM16; break;
7411 case 0x0C: strcpy(insn[i],"ANDI"); type=IMM16; break;
7412 case 0x0D: strcpy(insn[i],"ORI"); type=IMM16; break;
7413 case 0x0E: strcpy(insn[i],"XORI"); type=IMM16; break;
7414 case 0x0F: strcpy(insn[i],"LUI"); type=IMM16; break;
7415 case 0x10: strcpy(insn[i],"cop0"); type=NI;
7416 op2=(source[i]>>21)&0x1f;
7417 switch(op2)
7418 {
7419 case 0x00: strcpy(insn[i],"MFC0"); type=COP0; break;
7420 case 0x04: strcpy(insn[i],"MTC0"); type=COP0; break;
7421 case 0x10: strcpy(insn[i],"tlb"); type=NI;
7422 switch(source[i]&0x3f)
7423 {
7424 case 0x01: strcpy(insn[i],"TLBR"); type=COP0; break;
7425 case 0x02: strcpy(insn[i],"TLBWI"); type=COP0; break;
7426 case 0x06: strcpy(insn[i],"TLBWR"); type=COP0; break;
7427 case 0x08: strcpy(insn[i],"TLBP"); type=COP0; break;
576bbd8f 7428 case 0x10: strcpy(insn[i],"RFE"); type=COP0; break;
71e490c5 7429 //case 0x18: strcpy(insn[i],"ERET"); type=COP0; break;
57871462 7430 }
7431 }
7432 break;
7433 case 0x11: strcpy(insn[i],"cop1"); type=NI;
7434 op2=(source[i]>>21)&0x1f;
7435 switch(op2)
7436 {
7437 case 0x00: strcpy(insn[i],"MFC1"); type=COP1; break;
7438 case 0x01: strcpy(insn[i],"DMFC1"); type=COP1; break;
7439 case 0x02: strcpy(insn[i],"CFC1"); type=COP1; break;
7440 case 0x04: strcpy(insn[i],"MTC1"); type=COP1; break;
7441 case 0x05: strcpy(insn[i],"DMTC1"); type=COP1; break;
7442 case 0x06: strcpy(insn[i],"CTC1"); type=COP1; break;
7443 case 0x08: strcpy(insn[i],"BC1"); type=FJUMP;
7444 switch((source[i]>>16)&0x3)
7445 {
7446 case 0x00: strcpy(insn[i],"BC1F"); break;
7447 case 0x01: strcpy(insn[i],"BC1T"); break;
7448 case 0x02: strcpy(insn[i],"BC1FL"); break;
7449 case 0x03: strcpy(insn[i],"BC1TL"); break;
7450 }
7451 break;
7452 case 0x10: strcpy(insn[i],"C1.S"); type=NI;
7453 switch(source[i]&0x3f)
7454 {
7455 case 0x00: strcpy(insn[i],"ADD.S"); type=FLOAT; break;
7456 case 0x01: strcpy(insn[i],"SUB.S"); type=FLOAT; break;
7457 case 0x02: strcpy(insn[i],"MUL.S"); type=FLOAT; break;
7458 case 0x03: strcpy(insn[i],"DIV.S"); type=FLOAT; break;
7459 case 0x04: strcpy(insn[i],"SQRT.S"); type=FLOAT; break;
7460 case 0x05: strcpy(insn[i],"ABS.S"); type=FLOAT; break;
7461 case 0x06: strcpy(insn[i],"MOV.S"); type=FLOAT; break;
7462 case 0x07: strcpy(insn[i],"NEG.S"); type=FLOAT; break;
7463 case 0x08: strcpy(insn[i],"ROUND.L.S"); type=FCONV; break;
7464 case 0x09: strcpy(insn[i],"TRUNC.L.S"); type=FCONV; break;
7465 case 0x0A: strcpy(insn[i],"CEIL.L.S"); type=FCONV; break;
7466 case 0x0B: strcpy(insn[i],"FLOOR.L.S"); type=FCONV; break;
7467 case 0x0C: strcpy(insn[i],"ROUND.W.S"); type=FCONV; break;
7468 case 0x0D: strcpy(insn[i],"TRUNC.W.S"); type=FCONV; break;
7469 case 0x0E: strcpy(insn[i],"CEIL.W.S"); type=FCONV; break;
7470 case 0x0F: strcpy(insn[i],"FLOOR.W.S"); type=FCONV; break;
7471 case 0x21: strcpy(insn[i],"CVT.D.S"); type=FCONV; break;
7472 case 0x24: strcpy(insn[i],"CVT.W.S"); type=FCONV; break;
7473 case 0x25: strcpy(insn[i],"CVT.L.S"); type=FCONV; break;
7474 case 0x30: strcpy(insn[i],"C.F.S"); type=FCOMP; break;
7475 case 0x31: strcpy(insn[i],"C.UN.S"); type=FCOMP; break;
7476 case 0x32: strcpy(insn[i],"C.EQ.S"); type=FCOMP; break;
7477 case 0x33: strcpy(insn[i],"C.UEQ.S"); type=FCOMP; break;
7478 case 0x34: strcpy(insn[i],"C.OLT.S"); type=FCOMP; break;
7479 case 0x35: strcpy(insn[i],"C.ULT.S"); type=FCOMP; break;
7480 case 0x36: strcpy(insn[i],"C.OLE.S"); type=FCOMP; break;
7481 case 0x37: strcpy(insn[i],"C.ULE.S"); type=FCOMP; break;
7482 case 0x38: strcpy(insn[i],"C.SF.S"); type=FCOMP; break;
7483 case 0x39: strcpy(insn[i],"C.NGLE.S"); type=FCOMP; break;
7484 case 0x3A: strcpy(insn[i],"C.SEQ.S"); type=FCOMP; break;
7485 case 0x3B: strcpy(insn[i],"C.NGL.S"); type=FCOMP; break;
7486 case 0x3C: strcpy(insn[i],"C.LT.S"); type=FCOMP; break;
7487 case 0x3D: strcpy(insn[i],"C.NGE.S"); type=FCOMP; break;
7488 case 0x3E: strcpy(insn[i],"C.LE.S"); type=FCOMP; break;
7489 case 0x3F: strcpy(insn[i],"C.NGT.S"); type=FCOMP; break;
7490 }
7491 break;
7492 case 0x11: strcpy(insn[i],"C1.D"); type=NI;
7493 switch(source[i]&0x3f)
7494 {
7495 case 0x00: strcpy(insn[i],"ADD.D"); type=FLOAT; break;
7496 case 0x01: strcpy(insn[i],"SUB.D"); type=FLOAT; break;
7497 case 0x02: strcpy(insn[i],"MUL.D"); type=FLOAT; break;
7498 case 0x03: strcpy(insn[i],"DIV.D"); type=FLOAT; break;
7499 case 0x04: strcpy(insn[i],"SQRT.D"); type=FLOAT; break;
7500 case 0x05: strcpy(insn[i],"ABS.D"); type=FLOAT; break;
7501 case 0x06: strcpy(insn[i],"MOV.D"); type=FLOAT; break;
7502 case 0x07: strcpy(insn[i],"NEG.D"); type=FLOAT; break;
7503 case 0x08: strcpy(insn[i],"ROUND.L.D"); type=FCONV; break;
7504 case 0x09: strcpy(insn[i],"TRUNC.L.D"); type=FCONV; break;
7505 case 0x0A: strcpy(insn[i],"CEIL.L.D"); type=FCONV; break;
7506 case 0x0B: strcpy(insn[i],"FLOOR.L.D"); type=FCONV; break;
7507 case 0x0C: strcpy(insn[i],"ROUND.W.D"); type=FCONV; break;
7508 case 0x0D: strcpy(insn[i],"TRUNC.W.D"); type=FCONV; break;
7509 case 0x0E: strcpy(insn[i],"CEIL.W.D"); type=FCONV; break;
7510 case 0x0F: strcpy(insn[i],"FLOOR.W.D"); type=FCONV; break;
7511 case 0x20: strcpy(insn[i],"CVT.S.D"); type=FCONV; break;
7512 case 0x24: strcpy(insn[i],"CVT.W.D"); type=FCONV; break;
7513 case 0x25: strcpy(insn[i],"CVT.L.D"); type=FCONV; break;
7514 case 0x30: strcpy(insn[i],"C.F.D"); type=FCOMP; break;
7515 case 0x31: strcpy(insn[i],"C.UN.D"); type=FCOMP; break;
7516 case 0x32: strcpy(insn[i],"C.EQ.D"); type=FCOMP; break;
7517 case 0x33: strcpy(insn[i],"C.UEQ.D"); type=FCOMP; break;
7518 case 0x34: strcpy(insn[i],"C.OLT.D"); type=FCOMP; break;
7519 case 0x35: strcpy(insn[i],"C.ULT.D"); type=FCOMP; break;
7520 case 0x36: strcpy(insn[i],"C.OLE.D"); type=FCOMP; break;
7521 case 0x37: strcpy(insn[i],"C.ULE.D"); type=FCOMP; break;
7522 case 0x38: strcpy(insn[i],"C.SF.D"); type=FCOMP; break;
7523 case 0x39: strcpy(insn[i],"C.NGLE.D"); type=FCOMP; break;
7524 case 0x3A: strcpy(insn[i],"C.SEQ.D"); type=FCOMP; break;
7525 case 0x3B: strcpy(insn[i],"C.NGL.D"); type=FCOMP; break;
7526 case 0x3C: strcpy(insn[i],"C.LT.D"); type=FCOMP; break;
7527 case 0x3D: strcpy(insn[i],"C.NGE.D"); type=FCOMP; break;
7528 case 0x3E: strcpy(insn[i],"C.LE.D"); type=FCOMP; break;
7529 case 0x3F: strcpy(insn[i],"C.NGT.D"); type=FCOMP; break;
7530 }
7531 break;
7532 case 0x14: strcpy(insn[i],"C1.W"); type=NI;
7533 switch(source[i]&0x3f)
7534 {
7535 case 0x20: strcpy(insn[i],"CVT.S.W"); type=FCONV; break;
7536 case 0x21: strcpy(insn[i],"CVT.D.W"); type=FCONV; break;
7537 }
7538 break;
7539 case 0x15: strcpy(insn[i],"C1.L"); type=NI;
7540 switch(source[i]&0x3f)
7541 {
7542 case 0x20: strcpy(insn[i],"CVT.S.L"); type=FCONV; break;
7543 case 0x21: strcpy(insn[i],"CVT.D.L"); type=FCONV; break;
7544 }
7545 break;
7546 }
7547 break;
71e490c5 7548#if 0
57871462 7549 case 0x14: strcpy(insn[i],"BEQL"); type=CJUMP; break;
7550 case 0x15: strcpy(insn[i],"BNEL"); type=CJUMP; break;
7551 case 0x16: strcpy(insn[i],"BLEZL"); type=CJUMP; break;
7552 case 0x17: strcpy(insn[i],"BGTZL"); type=CJUMP; break;
7553 case 0x18: strcpy(insn[i],"DADDI"); type=IMM16; break;
7554 case 0x19: strcpy(insn[i],"DADDIU"); type=IMM16; break;
7555 case 0x1A: strcpy(insn[i],"LDL"); type=LOADLR; break;
7556 case 0x1B: strcpy(insn[i],"LDR"); type=LOADLR; break;
996cc15d 7557#endif
57871462 7558 case 0x20: strcpy(insn[i],"LB"); type=LOAD; break;
7559 case 0x21: strcpy(insn[i],"LH"); type=LOAD; break;
7560 case 0x22: strcpy(insn[i],"LWL"); type=LOADLR; break;
7561 case 0x23: strcpy(insn[i],"LW"); type=LOAD; break;
7562 case 0x24: strcpy(insn[i],"LBU"); type=LOAD; break;
7563 case 0x25: strcpy(insn[i],"LHU"); type=LOAD; break;
7564 case 0x26: strcpy(insn[i],"LWR"); type=LOADLR; break;
71e490c5 7565#if 0
57871462 7566 case 0x27: strcpy(insn[i],"LWU"); type=LOAD; break;
64bd6f82 7567#endif
57871462 7568 case 0x28: strcpy(insn[i],"SB"); type=STORE; break;
7569 case 0x29: strcpy(insn[i],"SH"); type=STORE; break;
7570 case 0x2A: strcpy(insn[i],"SWL"); type=STORELR; break;
7571 case 0x2B: strcpy(insn[i],"SW"); type=STORE; break;
71e490c5 7572#if 0
57871462 7573 case 0x2C: strcpy(insn[i],"SDL"); type=STORELR; break;
7574 case 0x2D: strcpy(insn[i],"SDR"); type=STORELR; break;
996cc15d 7575#endif
57871462 7576 case 0x2E: strcpy(insn[i],"SWR"); type=STORELR; break;
7577 case 0x2F: strcpy(insn[i],"CACHE"); type=NOP; break;
7578 case 0x30: strcpy(insn[i],"LL"); type=NI; break;
7579 case 0x31: strcpy(insn[i],"LWC1"); type=C1LS; break;
71e490c5 7580#if 0
57871462 7581 case 0x34: strcpy(insn[i],"LLD"); type=NI; break;
7582 case 0x35: strcpy(insn[i],"LDC1"); type=C1LS; break;
7583 case 0x37: strcpy(insn[i],"LD"); type=LOAD; break;
996cc15d 7584#endif
57871462 7585 case 0x38: strcpy(insn[i],"SC"); type=NI; break;
7586 case 0x39: strcpy(insn[i],"SWC1"); type=C1LS; break;
71e490c5 7587#if 0
57871462 7588 case 0x3C: strcpy(insn[i],"SCD"); type=NI; break;
7589 case 0x3D: strcpy(insn[i],"SDC1"); type=C1LS; break;
7590 case 0x3F: strcpy(insn[i],"SD"); type=STORE; break;
996cc15d 7591#endif
b9b61529 7592 case 0x12: strcpy(insn[i],"COP2"); type=NI;
7593 op2=(source[i]>>21)&0x1f;
bedfea38 7594 //if (op2 & 0x10) {
7595 if (source[i]&0x3f) { // use this hack to support old savestates with patched gte insns
c7abc864 7596 if (gte_handlers[source[i]&0x3f]!=NULL) {
bedfea38 7597 if (gte_regnames[source[i]&0x3f]!=NULL)
7598 strcpy(insn[i],gte_regnames[source[i]&0x3f]);
7599 else
7600 snprintf(insn[i], sizeof(insn[i]), "COP2 %x", source[i]&0x3f);
c7abc864 7601 type=C2OP;
7602 }
7603 }
7604 else switch(op2)
b9b61529 7605 {
7606 case 0x00: strcpy(insn[i],"MFC2"); type=COP2; break;
7607 case 0x02: strcpy(insn[i],"CFC2"); type=COP2; break;
7608 case 0x04: strcpy(insn[i],"MTC2"); type=COP2; break;
7609 case 0x06: strcpy(insn[i],"CTC2"); type=COP2; break;
b9b61529 7610 }
7611 break;
7612 case 0x32: strcpy(insn[i],"LWC2"); type=C2LS; break;
7613 case 0x3A: strcpy(insn[i],"SWC2"); type=C2LS; break;
7614 case 0x3B: strcpy(insn[i],"HLECALL"); type=HLECALL; break;
90ae6d4e 7615 default: strcpy(insn[i],"???"); type=NI;
c43b5311 7616 SysPrintf("NI %08x @%08x (%08x)\n", source[i], addr + i*4, addr);
90ae6d4e 7617 break;
57871462 7618 }
7619 itype[i]=type;
7620 opcode2[i]=op2;
7621 /* Get registers/immediates */
7622 lt1[i]=0;
7623 us1[i]=0;
7624 us2[i]=0;
7625 dep1[i]=0;
7626 dep2[i]=0;
bedfea38 7627 gte_rs[i]=gte_rt[i]=0;
57871462 7628 switch(type) {
7629 case LOAD:
7630 rs1[i]=(source[i]>>21)&0x1f;
7631 rs2[i]=0;
7632 rt1[i]=(source[i]>>16)&0x1f;
7633 rt2[i]=0;
7634 imm[i]=(short)source[i];
7635 break;
7636 case STORE:
7637 case STORELR:
7638 rs1[i]=(source[i]>>21)&0x1f;
7639 rs2[i]=(source[i]>>16)&0x1f;
7640 rt1[i]=0;
7641 rt2[i]=0;
7642 imm[i]=(short)source[i];
7643 if(op==0x2c||op==0x2d||op==0x3f) us1[i]=rs2[i]; // 64-bit SDL/SDR/SD
7644 break;
7645 case LOADLR:
7646 // LWL/LWR only load part of the register,
7647 // therefore the target register must be treated as a source too
7648 rs1[i]=(source[i]>>21)&0x1f;
7649 rs2[i]=(source[i]>>16)&0x1f;
7650 rt1[i]=(source[i]>>16)&0x1f;
7651 rt2[i]=0;
7652 imm[i]=(short)source[i];
7653 if(op==0x1a||op==0x1b) us1[i]=rs2[i]; // LDR/LDL
7654 if(op==0x26) dep1[i]=rt1[i]; // LWR
7655 break;
7656 case IMM16:
7657 if (op==0x0f) rs1[i]=0; // LUI instruction has no source register
7658 else rs1[i]=(source[i]>>21)&0x1f;
7659 rs2[i]=0;
7660 rt1[i]=(source[i]>>16)&0x1f;
7661 rt2[i]=0;
7662 if(op>=0x0c&&op<=0x0e) { // ANDI/ORI/XORI
7663 imm[i]=(unsigned short)source[i];
7664 }else{
7665 imm[i]=(short)source[i];
7666 }
7667 if(op==0x18||op==0x19) us1[i]=rs1[i]; // DADDI/DADDIU
7668 if(op==0x0a||op==0x0b) us1[i]=rs1[i]; // SLTI/SLTIU
7669 if(op==0x0d||op==0x0e) dep1[i]=rs1[i]; // ORI/XORI
7670 break;
7671 case UJUMP:
7672 rs1[i]=0;
7673 rs2[i]=0;
7674 rt1[i]=0;
7675 rt2[i]=0;
7676 // The JAL instruction writes to r31.
7677 if (op&1) {
7678 rt1[i]=31;
7679 }
7680 rs2[i]=CCREG;
7681 break;
7682 case RJUMP:
7683 rs1[i]=(source[i]>>21)&0x1f;
7684 rs2[i]=0;
7685 rt1[i]=0;
7686 rt2[i]=0;
5067f341 7687 // The JALR instruction writes to rd.
57871462 7688 if (op2&1) {
5067f341 7689 rt1[i]=(source[i]>>11)&0x1f;
57871462 7690 }
7691 rs2[i]=CCREG;
7692 break;
7693 case CJUMP:
7694 rs1[i]=(source[i]>>21)&0x1f;
7695 rs2[i]=(source[i]>>16)&0x1f;
7696 rt1[i]=0;
7697 rt2[i]=0;
7698 if(op&2) { // BGTZ/BLEZ
7699 rs2[i]=0;
7700 }
7701 us1[i]=rs1[i];
7702 us2[i]=rs2[i];
7703 likely[i]=op>>4;
7704 break;
7705 case SJUMP:
7706 rs1[i]=(source[i]>>21)&0x1f;
7707 rs2[i]=CCREG;
7708 rt1[i]=0;
7709 rt2[i]=0;
7710 us1[i]=rs1[i];
7711 if(op2&0x10) { // BxxAL
7712 rt1[i]=31;
7713 // NOTE: If the branch is not taken, r31 is still overwritten
7714 }
7715 likely[i]=(op2&2)>>1;
7716 break;
7717 case FJUMP:
7718 rs1[i]=FSREG;
7719 rs2[i]=CSREG;
7720 rt1[i]=0;
7721 rt2[i]=0;
7722 likely[i]=((source[i])>>17)&1;
7723 break;
7724 case ALU:
7725 rs1[i]=(source[i]>>21)&0x1f; // source
7726 rs2[i]=(source[i]>>16)&0x1f; // subtract amount
7727 rt1[i]=(source[i]>>11)&0x1f; // destination
7728 rt2[i]=0;
7729 if(op2==0x2a||op2==0x2b) { // SLT/SLTU
7730 us1[i]=rs1[i];us2[i]=rs2[i];
7731 }
7732 else if(op2>=0x24&&op2<=0x27) { // AND/OR/XOR/NOR
7733 dep1[i]=rs1[i];dep2[i]=rs2[i];
7734 }
7735 else if(op2>=0x2c&&op2<=0x2f) { // DADD/DSUB
7736 dep1[i]=rs1[i];dep2[i]=rs2[i];
7737 }
7738 break;
7739 case MULTDIV:
7740 rs1[i]=(source[i]>>21)&0x1f; // source
7741 rs2[i]=(source[i]>>16)&0x1f; // divisor
7742 rt1[i]=HIREG;
7743 rt2[i]=LOREG;
7744 if (op2>=0x1c&&op2<=0x1f) { // DMULT/DMULTU/DDIV/DDIVU
7745 us1[i]=rs1[i];us2[i]=rs2[i];
7746 }
7747 break;
7748 case MOV:
7749 rs1[i]=0;
7750 rs2[i]=0;
7751 rt1[i]=0;
7752 rt2[i]=0;
7753 if(op2==0x10) rs1[i]=HIREG; // MFHI
7754 if(op2==0x11) rt1[i]=HIREG; // MTHI
7755 if(op2==0x12) rs1[i]=LOREG; // MFLO
7756 if(op2==0x13) rt1[i]=LOREG; // MTLO
7757 if((op2&0x1d)==0x10) rt1[i]=(source[i]>>11)&0x1f; // MFxx
7758 if((op2&0x1d)==0x11) rs1[i]=(source[i]>>21)&0x1f; // MTxx
7759 dep1[i]=rs1[i];
7760 break;
7761 case SHIFT:
7762 rs1[i]=(source[i]>>16)&0x1f; // target of shift
7763 rs2[i]=(source[i]>>21)&0x1f; // shift amount
7764 rt1[i]=(source[i]>>11)&0x1f; // destination
7765 rt2[i]=0;
7766 // DSLLV/DSRLV/DSRAV are 64-bit
7767 if(op2>=0x14&&op2<=0x17) us1[i]=rs1[i];
7768 break;
7769 case SHIFTIMM:
7770 rs1[i]=(source[i]>>16)&0x1f;
7771 rs2[i]=0;
7772 rt1[i]=(source[i]>>11)&0x1f;
7773 rt2[i]=0;
7774 imm[i]=(source[i]>>6)&0x1f;
7775 // DSxx32 instructions
7776 if(op2>=0x3c) imm[i]|=0x20;
7777 // DSLL/DSRL/DSRA/DSRA32/DSRL32 but not DSLL32 require 64-bit source
7778 if(op2>=0x38&&op2!=0x3c) us1[i]=rs1[i];
7779 break;
7780 case COP0:
7781 rs1[i]=0;
7782 rs2[i]=0;
7783 rt1[i]=0;
7784 rt2[i]=0;
7785 if(op2==0) rt1[i]=(source[i]>>16)&0x1F; // MFC0
7786 if(op2==4) rs1[i]=(source[i]>>16)&0x1F; // MTC0
7787 if(op2==4&&((source[i]>>11)&0x1f)==12) rt2[i]=CSREG; // Status
7788 if(op2==16) if((source[i]&0x3f)==0x18) rs2[i]=CCREG; // ERET
7789 break;
7790 case COP1:
7791 rs1[i]=0;
7792 rs2[i]=0;
7793 rt1[i]=0;
7794 rt2[i]=0;
7795 if(op2<3) rt1[i]=(source[i]>>16)&0x1F; // MFC1/DMFC1/CFC1
7796 if(op2>3) rs1[i]=(source[i]>>16)&0x1F; // MTC1/DMTC1/CTC1
7797 if(op2==5) us1[i]=rs1[i]; // DMTC1
7798 rs2[i]=CSREG;
7799 break;
bedfea38 7800 case COP2:
7801 rs1[i]=0;
7802 rs2[i]=0;
7803 rt1[i]=0;
7804 rt2[i]=0;
7805 if(op2<3) rt1[i]=(source[i]>>16)&0x1F; // MFC2/CFC2
7806 if(op2>3) rs1[i]=(source[i]>>16)&0x1F; // MTC2/CTC2
7807 rs2[i]=CSREG;
7808 int gr=(source[i]>>11)&0x1F;
7809 switch(op2)
7810 {
7811 case 0x00: gte_rs[i]=1ll<<gr; break; // MFC2
7812 case 0x04: gte_rt[i]=1ll<<gr; break; // MTC2
0ff8c62c 7813 case 0x02: gte_rs[i]=1ll<<(gr+32); break; // CFC2
bedfea38 7814 case 0x06: gte_rt[i]=1ll<<(gr+32); break; // CTC2
7815 }
7816 break;
57871462 7817 case C1LS:
7818 rs1[i]=(source[i]>>21)&0x1F;
7819 rs2[i]=CSREG;
7820 rt1[i]=0;
7821 rt2[i]=0;
7822 imm[i]=(short)source[i];
7823 break;
b9b61529 7824 case C2LS:
7825 rs1[i]=(source[i]>>21)&0x1F;
7826 rs2[i]=0;
7827 rt1[i]=0;
7828 rt2[i]=0;
7829 imm[i]=(short)source[i];
bedfea38 7830 if(op==0x32) gte_rt[i]=1ll<<((source[i]>>16)&0x1F); // LWC2
7831 else gte_rs[i]=1ll<<((source[i]>>16)&0x1F); // SWC2
7832 break;
7833 case C2OP:
7834 rs1[i]=0;
7835 rs2[i]=0;
7836 rt1[i]=0;
7837 rt2[i]=0;
2167bef6 7838 gte_rs[i]=gte_reg_reads[source[i]&0x3f];
7839 gte_rt[i]=gte_reg_writes[source[i]&0x3f];
7840 gte_rt[i]|=1ll<<63; // every op changes flags
587a5b1c 7841 if((source[i]&0x3f)==GTE_MVMVA) {
7842 int v = (source[i] >> 15) & 3;
7843 gte_rs[i]&=~0xe3fll;
7844 if(v==3) gte_rs[i]|=0xe00ll;
7845 else gte_rs[i]|=3ll<<(v*2);
7846 }
b9b61529 7847 break;
57871462 7848 case FLOAT:
7849 case FCONV:
7850 rs1[i]=0;
7851 rs2[i]=CSREG;
7852 rt1[i]=0;
7853 rt2[i]=0;
7854 break;
7855 case FCOMP:
7856 rs1[i]=FSREG;
7857 rs2[i]=CSREG;
7858 rt1[i]=FSREG;
7859 rt2[i]=0;
7860 break;
7861 case SYSCALL:
7139f3c8 7862 case HLECALL:
1e973cb0 7863 case INTCALL:
57871462 7864 rs1[i]=CCREG;
7865 rs2[i]=0;
7866 rt1[i]=0;
7867 rt2[i]=0;
7868 break;
7869 default:
7870 rs1[i]=0;
7871 rs2[i]=0;
7872 rt1[i]=0;
7873 rt2[i]=0;
7874 }
7875 /* Calculate branch target addresses */
7876 if(type==UJUMP)
7877 ba[i]=((start+i*4+4)&0xF0000000)|(((unsigned int)source[i]<<6)>>4);
7878 else if(type==CJUMP&&rs1[i]==rs2[i]&&(op&1))
7879 ba[i]=start+i*4+8; // Ignore never taken branch
7880 else if(type==SJUMP&&rs1[i]==0&&!(op2&1))
7881 ba[i]=start+i*4+8; // Ignore never taken branch
7882 else if(type==CJUMP||type==SJUMP||type==FJUMP)
7883 ba[i]=start+i*4+4+((signed int)((unsigned int)source[i]<<16)>>14);
7884 else ba[i]=-1;
3e535354 7885 if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP)) {
7886 int do_in_intrp=0;
7887 // branch in delay slot?
7888 if(type==RJUMP||type==UJUMP||type==CJUMP||type==SJUMP||type==FJUMP) {
7889 // don't handle first branch and call interpreter if it's hit
c43b5311 7890 SysPrintf("branch in delay slot @%08x (%08x)\n", addr + i*4, addr);
3e535354 7891 do_in_intrp=1;
7892 }
7893 // basic load delay detection
7894 else if((type==LOAD||type==LOADLR||type==COP0||type==COP2||type==C2LS)&&rt1[i]!=0) {
7895 int t=(ba[i-1]-start)/4;
7896 if(0 <= t && t < i &&(rt1[i]==rs1[t]||rt1[i]==rs2[t])&&itype[t]!=CJUMP&&itype[t]!=SJUMP) {
7897 // jump target wants DS result - potential load delay effect
c43b5311 7898 SysPrintf("load delay @%08x (%08x)\n", addr + i*4, addr);
3e535354 7899 do_in_intrp=1;
7900 bt[t+1]=1; // expected return from interpreter
7901 }
7902 else if(i>=2&&rt1[i-2]==2&&rt1[i]==2&&rs1[i]!=2&&rs2[i]!=2&&rs1[i-1]!=2&&rs2[i-1]!=2&&
7903 !(i>=3&&(itype[i-3]==RJUMP||itype[i-3]==UJUMP||itype[i-3]==CJUMP||itype[i-3]==SJUMP))) {
7904 // v0 overwrite like this is a sign of trouble, bail out
c43b5311 7905 SysPrintf("v0 overwrite @%08x (%08x)\n", addr + i*4, addr);
3e535354 7906 do_in_intrp=1;
7907 }
7908 }
3e535354 7909 if(do_in_intrp) {
7910 rs1[i-1]=CCREG;
7911 rs2[i-1]=rt1[i-1]=rt2[i-1]=0;
26869094 7912 ba[i-1]=-1;
7913 itype[i-1]=INTCALL;
7914 done=2;
3e535354 7915 i--; // don't compile the DS
26869094 7916 }
3e535354 7917 }
3e535354 7918 /* Is this the end of the block? */
7919 if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000)) {
5067f341 7920 if(rt1[i-1]==0) { // Continue past subroutine call (JAL)
1e973cb0 7921 done=2;
57871462 7922 }
7923 else {
7924 if(stop_after_jal) done=1;
7925 // Stop on BREAK
7926 if((source[i+1]&0xfc00003f)==0x0d) done=1;
7927 }
7928 // Don't recompile stuff that's already compiled
7929 if(check_addr(start+i*4+4)) done=1;
7930 // Don't get too close to the limit
7931 if(i>MAXBLOCK/2) done=1;
7932 }
75dec299 7933 if(itype[i]==SYSCALL&&stop_after_jal) done=1;
1e973cb0 7934 if(itype[i]==HLECALL||itype[i]==INTCALL) done=2;
7935 if(done==2) {
7936 // Does the block continue due to a branch?
7937 for(j=i-1;j>=0;j--)
7938 {
2a706964 7939 if(ba[j]==start+i*4) done=j=0; // Branch into delay slot
1e973cb0 7940 if(ba[j]==start+i*4+4) done=j=0;
7941 if(ba[j]==start+i*4+8) done=j=0;
7942 }
7943 }
75dec299 7944 //assert(i<MAXBLOCK-1);
57871462 7945 if(start+i*4==pagelimit-4) done=1;
7946 assert(start+i*4<pagelimit);
7947 if (i==MAXBLOCK-1) done=1;
7948 // Stop if we're compiling junk
7949 if(itype[i]==NI&&opcode[i]==0x11) {
7950 done=stop_after_jal=1;
c43b5311 7951 SysPrintf("Disabled speculative precompilation\n");
57871462 7952 }
7953 }
7954 slen=i;
7955 if(itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==RJUMP||itype[i-1]==FJUMP) {
7956 if(start+i*4==pagelimit) {
7957 itype[i-1]=SPAN;
7958 }
7959 }
7960 assert(slen>0);
7961
7962 /* Pass 2 - Register dependencies and branch targets */
7963
7964 unneeded_registers(0,slen-1,0);
9f51b4b9 7965
57871462 7966 /* Pass 3 - Register allocation */
7967
7968 struct regstat current; // Current register allocations/status
7969 current.is32=1;
7970 current.dirty=0;
7971 current.u=unneeded_reg[0];
7972 current.uu=unneeded_reg_upper[0];
7973 clear_all_regs(current.regmap);
7974 alloc_reg(&current,0,CCREG);
7975 dirty_reg(&current,CCREG);
7976 current.isconst=0;
7977 current.wasconst=0;
27727b63 7978 current.waswritten=0;
57871462 7979 int ds=0;
7980 int cc=0;
5194fb95 7981 int hr=-1;
6ebf4adf 7982
57871462 7983 if((u_int)addr&1) {
7984 // First instruction is delay slot
7985 cc=-1;
7986 bt[1]=1;
7987 ds=1;
7988 unneeded_reg[0]=1;
7989 unneeded_reg_upper[0]=1;
7990 current.regmap[HOST_BTREG]=BTREG;
7991 }
9f51b4b9 7992
57871462 7993 for(i=0;i<slen;i++)
7994 {
7995 if(bt[i])
7996 {
7997 int hr;
7998 for(hr=0;hr<HOST_REGS;hr++)
7999 {
8000 // Is this really necessary?
8001 if(current.regmap[hr]==0) current.regmap[hr]=-1;
8002 }
8003 current.isconst=0;
27727b63 8004 current.waswritten=0;
57871462 8005 }
8006 if(i>1)
8007 {
8008 if((opcode[i-2]&0x2f)==0x05) // BNE/BNEL
8009 {
8010 if(rs1[i-2]==0||rs2[i-2]==0)
8011 {
8012 if(rs1[i-2]) {
8013 current.is32|=1LL<<rs1[i-2];
8014 int hr=get_reg(current.regmap,rs1[i-2]|64);
8015 if(hr>=0) current.regmap[hr]=-1;
8016 }
8017 if(rs2[i-2]) {
8018 current.is32|=1LL<<rs2[i-2];
8019 int hr=get_reg(current.regmap,rs2[i-2]|64);
8020 if(hr>=0) current.regmap[hr]=-1;
8021 }
8022 }
8023 }
8024 }
24385cae 8025 current.is32=-1LL;
24385cae 8026
57871462 8027 memcpy(regmap_pre[i],current.regmap,sizeof(current.regmap));
8028 regs[i].wasconst=current.isconst;
8029 regs[i].was32=current.is32;
8030 regs[i].wasdirty=current.dirty;
8575a877 8031 regs[i].loadedconst=0;
57871462 8032 if(itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=RJUMP&&itype[i]!=FJUMP) {
8033 if(i+1<slen) {
8034 current.u=unneeded_reg[i+1]&~((1LL<<rs1[i])|(1LL<<rs2[i]));
8035 current.uu=unneeded_reg_upper[i+1]&~((1LL<<us1[i])|(1LL<<us2[i]));
8036 if((~current.uu>>rt1[i])&1) current.uu&=~((1LL<<dep1[i])|(1LL<<dep2[i]));
8037 current.u|=1;
8038 current.uu|=1;
8039 } else {
8040 current.u=1;
8041 current.uu=1;
8042 }
8043 } else {
8044 if(i+1<slen) {
8045 current.u=branch_unneeded_reg[i]&~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
8046 current.uu=branch_unneeded_reg_upper[i]&~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
8047 if((~current.uu>>rt1[i+1])&1) current.uu&=~((1LL<<dep1[i+1])|(1LL<<dep2[i+1]));
8048 current.u&=~((1LL<<rs1[i])|(1LL<<rs2[i]));
8049 current.uu&=~((1LL<<us1[i])|(1LL<<us2[i]));
8050 current.u|=1;
8051 current.uu|=1;
c43b5311 8052 } else { SysPrintf("oops, branch at end of block with no delay slot\n");exit(1); }
57871462 8053 }
8054 is_ds[i]=ds;
8055 if(ds) {
8056 ds=0; // Skip delay slot, already allocated as part of branch
8057 // ...but we need to alloc it in case something jumps here
8058 if(i+1<slen) {
8059 current.u=branch_unneeded_reg[i-1]&unneeded_reg[i+1];
8060 current.uu=branch_unneeded_reg_upper[i-1]&unneeded_reg_upper[i+1];
8061 }else{
8062 current.u=branch_unneeded_reg[i-1];
8063 current.uu=branch_unneeded_reg_upper[i-1];
8064 }
8065 current.u&=~((1LL<<rs1[i])|(1LL<<rs2[i]));
8066 current.uu&=~((1LL<<us1[i])|(1LL<<us2[i]));
8067 if((~current.uu>>rt1[i])&1) current.uu&=~((1LL<<dep1[i])|(1LL<<dep2[i]));
8068 current.u|=1;
8069 current.uu|=1;
8070 struct regstat temp;
8071 memcpy(&temp,&current,sizeof(current));
8072 temp.wasdirty=temp.dirty;
8073 temp.was32=temp.is32;
8074 // TODO: Take into account unconditional branches, as below
8075 delayslot_alloc(&temp,i);
8076 memcpy(regs[i].regmap,temp.regmap,sizeof(temp.regmap));
8077 regs[i].wasdirty=temp.wasdirty;
8078 regs[i].was32=temp.was32;
8079 regs[i].dirty=temp.dirty;
8080 regs[i].is32=temp.is32;
8081 regs[i].isconst=0;
8082 regs[i].wasconst=0;
8083 current.isconst=0;
8084 // Create entry (branch target) regmap
8085 for(hr=0;hr<HOST_REGS;hr++)
8086 {
8087 int r=temp.regmap[hr];
8088 if(r>=0) {
8089 if(r!=regmap_pre[i][hr]) {
8090 regs[i].regmap_entry[hr]=-1;
8091 }
8092 else
8093 {
8094 if(r<64){
8095 if((current.u>>r)&1) {
8096 regs[i].regmap_entry[hr]=-1;
8097 regs[i].regmap[hr]=-1;
8098 //Don't clear regs in the delay slot as the branch might need them
8099 //current.regmap[hr]=-1;
8100 }else
8101 regs[i].regmap_entry[hr]=r;
8102 }
8103 else {
8104 if((current.uu>>(r&63))&1) {
8105 regs[i].regmap_entry[hr]=-1;
8106 regs[i].regmap[hr]=-1;
8107 //Don't clear regs in the delay slot as the branch might need them
8108 //current.regmap[hr]=-1;
8109 }else
8110 regs[i].regmap_entry[hr]=r;
8111 }
8112 }
8113 } else {
8114 // First instruction expects CCREG to be allocated
9f51b4b9 8115 if(i==0&&hr==HOST_CCREG)
57871462 8116 regs[i].regmap_entry[hr]=CCREG;
8117 else
8118 regs[i].regmap_entry[hr]=-1;
8119 }
8120 }
8121 }
8122 else { // Not delay slot
8123 switch(itype[i]) {
8124 case UJUMP:
8125 //current.isconst=0; // DEBUG
8126 //current.wasconst=0; // DEBUG
8127 //regs[i].wasconst=0; // DEBUG
8128 clear_const(&current,rt1[i]);
8129 alloc_cc(&current,i);
8130 dirty_reg(&current,CCREG);
8131 if (rt1[i]==31) {
8132 alloc_reg(&current,i,31);
8133 dirty_reg(&current,31);
4ef8f67d 8134 //assert(rs1[i+1]!=31&&rs2[i+1]!=31);
8135 //assert(rt1[i+1]!=rt1[i]);
57871462 8136 #ifdef REG_PREFETCH
8137 alloc_reg(&current,i,PTEMP);
8138 #endif
8139 //current.is32|=1LL<<rt1[i];
8140 }
269bb29a 8141 ooo[i]=1;
8142 delayslot_alloc(&current,i+1);
57871462 8143 //current.isconst=0; // DEBUG
8144 ds=1;
8145 //printf("i=%d, isconst=%x\n",i,current.isconst);
8146 break;
8147 case RJUMP:
8148 //current.isconst=0;
8149 //current.wasconst=0;
8150 //regs[i].wasconst=0;
8151 clear_const(&current,rs1[i]);
8152 clear_const(&current,rt1[i]);
8153 alloc_cc(&current,i);
8154 dirty_reg(&current,CCREG);
8155 if(rs1[i]!=rt1[i+1]&&rs1[i]!=rt2[i+1]) {
8156 alloc_reg(&current,i,rs1[i]);
5067f341 8157 if (rt1[i]!=0) {
8158 alloc_reg(&current,i,rt1[i]);
8159 dirty_reg(&current,rt1[i]);
68b3faee 8160 assert(rs1[i+1]!=rt1[i]&&rs2[i+1]!=rt1[i]);
076655d1 8161 assert(rt1[i+1]!=rt1[i]);
57871462 8162 #ifdef REG_PREFETCH
8163 alloc_reg(&current,i,PTEMP);
8164 #endif
8165 }
8166 #ifdef USE_MINI_HT
8167 if(rs1[i]==31) { // JALR
8168 alloc_reg(&current,i,RHASH);
8169 #ifndef HOST_IMM_ADDR32
8170 alloc_reg(&current,i,RHTBL);
8171 #endif
8172 }
8173 #endif
8174 delayslot_alloc(&current,i+1);
8175 } else {
8176 // The delay slot overwrites our source register,
8177 // allocate a temporary register to hold the old value.
8178 current.isconst=0;
8179 current.wasconst=0;
8180 regs[i].wasconst=0;
8181 delayslot_alloc(&current,i+1);
8182 current.isconst=0;
8183 alloc_reg(&current,i,RTEMP);
8184 }
8185 //current.isconst=0; // DEBUG
e1190b87 8186 ooo[i]=1;
57871462 8187 ds=1;
8188 break;
8189 case CJUMP:
8190 //current.isconst=0;
8191 //current.wasconst=0;
8192 //regs[i].wasconst=0;
8193 clear_const(&current,rs1[i]);
8194 clear_const(&current,rs2[i]);
8195 if((opcode[i]&0x3E)==4) // BEQ/BNE
8196 {
8197 alloc_cc(&current,i);
8198 dirty_reg(&current,CCREG);
8199 if(rs1[i]) alloc_reg(&current,i,rs1[i]);
8200 if(rs2[i]) alloc_reg(&current,i,rs2[i]);
8201 if(!((current.is32>>rs1[i])&(current.is32>>rs2[i])&1))
8202 {
8203 if(rs1[i]) alloc_reg64(&current,i,rs1[i]);
8204 if(rs2[i]) alloc_reg64(&current,i,rs2[i]);
8205 }
8206 if((rs1[i]&&(rs1[i]==rt1[i+1]||rs1[i]==rt2[i+1]))||
8207 (rs2[i]&&(rs2[i]==rt1[i+1]||rs2[i]==rt2[i+1]))) {
8208 // The delay slot overwrites one of our conditions.
8209 // Allocate the branch condition registers instead.
57871462 8210 current.isconst=0;
8211 current.wasconst=0;
8212 regs[i].wasconst=0;
8213 if(rs1[i]) alloc_reg(&current,i,rs1[i]);
8214 if(rs2[i]) alloc_reg(&current,i,rs2[i]);
8215 if(!((current.is32>>rs1[i])&(current.is32>>rs2[i])&1))
8216 {
8217 if(rs1[i]) alloc_reg64(&current,i,rs1[i]);
8218 if(rs2[i]) alloc_reg64(&current,i,rs2[i]);
8219 }
8220 }
e1190b87 8221 else
8222 {
8223 ooo[i]=1;
8224 delayslot_alloc(&current,i+1);
8225 }
57871462 8226 }
8227 else
8228 if((opcode[i]&0x3E)==6) // BLEZ/BGTZ
8229 {
8230 alloc_cc(&current,i);
8231 dirty_reg(&current,CCREG);
8232 alloc_reg(&current,i,rs1[i]);
8233 if(!(current.is32>>rs1[i]&1))
8234 {
8235 alloc_reg64(&current,i,rs1[i]);
8236 }
8237 if(rs1[i]&&(rs1[i]==rt1[i+1]||rs1[i]==rt2[i+1])) {
8238 // The delay slot overwrites one of our conditions.
8239 // Allocate the branch condition registers instead.
57871462 8240 current.isconst=0;
8241 current.wasconst=0;
8242 regs[i].wasconst=0;
8243 if(rs1[i]) alloc_reg(&current,i,rs1[i]);
8244 if(!((current.is32>>rs1[i])&1))
8245 {
8246 if(rs1[i]) alloc_reg64(&current,i,rs1[i]);
8247 }
8248 }
e1190b87 8249 else
8250 {
8251 ooo[i]=1;
8252 delayslot_alloc(&current,i+1);
8253 }
57871462 8254 }
8255 else
8256 // Don't alloc the delay slot yet because we might not execute it
8257 if((opcode[i]&0x3E)==0x14) // BEQL/BNEL
8258 {
8259 current.isconst=0;
8260 current.wasconst=0;
8261 regs[i].wasconst=0;
8262 alloc_cc(&current,i);
8263 dirty_reg(&current,CCREG);
8264 alloc_reg(&current,i,rs1[i]);
8265 alloc_reg(&current,i,rs2[i]);
8266 if(!((current.is32>>rs1[i])&(current.is32>>rs2[i])&1))
8267 {
8268 alloc_reg64(&current,i,rs1[i]);
8269 alloc_reg64(&current,i,rs2[i]);
8270 }
8271 }
8272 else
8273 if((opcode[i]&0x3E)==0x16) // BLEZL/BGTZL
8274 {
8275 current.isconst=0;
8276 current.wasconst=0;
8277 regs[i].wasconst=0;
8278 alloc_cc(&current,i);
8279 dirty_reg(&current,CCREG);
8280 alloc_reg(&current,i,rs1[i]);
8281 if(!(current.is32>>rs1[i]&1))
8282 {
8283 alloc_reg64(&current,i,rs1[i]);
8284 }
8285 }
8286 ds=1;
8287 //current.isconst=0;
8288 break;
8289 case SJUMP:
8290 //current.isconst=0;
8291 //current.wasconst=0;
8292 //regs[i].wasconst=0;
8293 clear_const(&current,rs1[i]);
8294 clear_const(&current,rt1[i]);
8295 //if((opcode2[i]&0x1E)==0x0) // BLTZ/BGEZ
8296 if((opcode2[i]&0x0E)==0x0) // BLTZ/BGEZ
8297 {
8298 alloc_cc(&current,i);
8299 dirty_reg(&current,CCREG);
8300 alloc_reg(&current,i,rs1[i]);
8301 if(!(current.is32>>rs1[i]&1))
8302 {
8303 alloc_reg64(&current,i,rs1[i]);
8304 }
8305 if (rt1[i]==31) { // BLTZAL/BGEZAL
8306 alloc_reg(&current,i,31);
8307 dirty_reg(&current,31);
57871462 8308 //#ifdef REG_PREFETCH
8309 //alloc_reg(&current,i,PTEMP);
8310 //#endif
8311 //current.is32|=1LL<<rt1[i];
8312 }
e1190b87 8313 if((rs1[i]&&(rs1[i]==rt1[i+1]||rs1[i]==rt2[i+1])) // The delay slot overwrites the branch condition.
8314 ||(rt1[i]==31&&(rs1[i+1]==31||rs2[i+1]==31||rt1[i+1]==31||rt2[i+1]==31))) { // DS touches $ra
57871462 8315 // Allocate the branch condition registers instead.
57871462 8316 current.isconst=0;
8317 current.wasconst=0;
8318 regs[i].wasconst=0;
8319 if(rs1[i]) alloc_reg(&current,i,rs1[i]);
8320 if(!((current.is32>>rs1[i])&1))
8321 {
8322 if(rs1[i]) alloc_reg64(&current,i,rs1[i]);
8323 }
8324 }
e1190b87 8325 else
8326 {
8327 ooo[i]=1;
8328 delayslot_alloc(&current,i+1);
8329 }
57871462 8330 }
8331 else
8332 // Don't alloc the delay slot yet because we might not execute it
8333 if((opcode2[i]&0x1E)==0x2) // BLTZL/BGEZL
8334 {
8335 current.isconst=0;
8336 current.wasconst=0;
8337 regs[i].wasconst=0;
8338 alloc_cc(&current,i);
8339 dirty_reg(&current,CCREG);
8340 alloc_reg(&current,i,rs1[i]);
8341 if(!(current.is32>>rs1[i]&1))
8342 {
8343 alloc_reg64(&current,i,rs1[i]);
8344 }
8345 }
8346 ds=1;
8347 //current.isconst=0;
8348 break;
8349 case FJUMP:
8350 current.isconst=0;
8351 current.wasconst=0;
8352 regs[i].wasconst=0;
8353 if(likely[i]==0) // BC1F/BC1T
8354 {
8355 // TODO: Theoretically we can run out of registers here on x86.
8356 // The delay slot can allocate up to six, and we need to check
8357 // CSREG before executing the delay slot. Possibly we can drop
8358 // the cycle count and then reload it after checking that the
8359 // FPU is in a usable state, or don't do out-of-order execution.
8360 alloc_cc(&current,i);
8361 dirty_reg(&current,CCREG);
8362 alloc_reg(&current,i,FSREG);
8363 alloc_reg(&current,i,CSREG);
8364 if(itype[i+1]==FCOMP) {
8365 // The delay slot overwrites the branch condition.
8366 // Allocate the branch condition registers instead.
57871462 8367 alloc_cc(&current,i);
8368 dirty_reg(&current,CCREG);
8369 alloc_reg(&current,i,CSREG);
8370 alloc_reg(&current,i,FSREG);
8371 }
8372 else {
e1190b87 8373 ooo[i]=1;
57871462 8374 delayslot_alloc(&current,i+1);
8375 alloc_reg(&current,i+1,CSREG);
8376 }
8377 }
8378 else
8379 // Don't alloc the delay slot yet because we might not execute it
8380 if(likely[i]) // BC1FL/BC1TL
8381 {
8382 alloc_cc(&current,i);
8383 dirty_reg(&current,CCREG);
8384 alloc_reg(&current,i,CSREG);
8385 alloc_reg(&current,i,FSREG);
8386 }
8387 ds=1;
8388 current.isconst=0;
8389 break;
8390 case IMM16:
8391 imm16_alloc(&current,i);
8392 break;
8393 case LOAD:
8394 case LOADLR:
8395 load_alloc(&current,i);
8396 break;
8397 case STORE:
8398 case STORELR:
8399 store_alloc(&current,i);
8400 break;
8401 case ALU:
8402 alu_alloc(&current,i);
8403 break;
8404 case SHIFT:
8405 shift_alloc(&current,i);
8406 break;
8407 case MULTDIV:
8408 multdiv_alloc(&current,i);
8409 break;
8410 case SHIFTIMM:
8411 shiftimm_alloc(&current,i);
8412 break;
8413 case MOV:
8414 mov_alloc(&current,i);
8415 break;
8416 case COP0:
8417 cop0_alloc(&current,i);
8418 break;
8419 case COP1:
b9b61529 8420 case COP2:
57871462 8421 cop1_alloc(&current,i);
8422 break;
8423 case C1LS:
8424 c1ls_alloc(&current,i);
8425 break;
b9b61529 8426 case C2LS:
8427 c2ls_alloc(&current,i);
8428 break;
8429 case C2OP:
8430 c2op_alloc(&current,i);
8431 break;
57871462 8432 case FCONV:
8433 fconv_alloc(&current,i);
8434 break;
8435 case FLOAT:
8436 float_alloc(&current,i);
8437 break;
8438 case FCOMP:
8439 fcomp_alloc(&current,i);
8440 break;
8441 case SYSCALL:
7139f3c8 8442 case HLECALL:
1e973cb0 8443 case INTCALL:
57871462 8444 syscall_alloc(&current,i);
8445 break;
8446 case SPAN:
8447 pagespan_alloc(&current,i);
8448 break;
8449 }
9f51b4b9 8450
57871462 8451 // Drop the upper half of registers that have become 32-bit
8452 current.uu|=current.is32&((1LL<<rt1[i])|(1LL<<rt2[i]));
8453 if(itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=RJUMP&&itype[i]!=FJUMP) {
8454 current.uu&=~((1LL<<us1[i])|(1LL<<us2[i]));
8455 if((~current.uu>>rt1[i])&1) current.uu&=~((1LL<<dep1[i])|(1LL<<dep2[i]));
8456 current.uu|=1;
8457 } else {
8458 current.uu|=current.is32&((1LL<<rt1[i+1])|(1LL<<rt2[i+1]));
8459 current.uu&=~((1LL<<us1[i+1])|(1LL<<us2[i+1]));
8460 if((~current.uu>>rt1[i+1])&1) current.uu&=~((1LL<<dep1[i+1])|(1LL<<dep2[i+1]));
8461 current.uu&=~((1LL<<us1[i])|(1LL<<us2[i]));
8462 current.uu|=1;
8463 }
8464
8465 // Create entry (branch target) regmap
8466 for(hr=0;hr<HOST_REGS;hr++)
8467 {
581335b0 8468 int r,or;
57871462 8469 r=current.regmap[hr];
8470 if(r>=0) {
8471 if(r!=regmap_pre[i][hr]) {
8472 // TODO: delay slot (?)
8473 or=get_reg(regmap_pre[i],r); // Get old mapping for this register
8474 if(or<0||(r&63)>=TEMPREG){
8475 regs[i].regmap_entry[hr]=-1;
8476 }
8477 else
8478 {
8479 // Just move it to a different register
8480 regs[i].regmap_entry[hr]=r;
8481 // If it was dirty before, it's still dirty
8482 if((regs[i].wasdirty>>or)&1) dirty_reg(&current,r&63);
8483 }
8484 }
8485 else
8486 {
8487 // Unneeded
8488 if(r==0){
8489 regs[i].regmap_entry[hr]=0;
8490 }
8491 else
8492 if(r<64){
8493 if((current.u>>r)&1) {
8494 regs[i].regmap_entry[hr]=-1;
8495 //regs[i].regmap[hr]=-1;
8496 current.regmap[hr]=-1;
8497 }else
8498 regs[i].regmap_entry[hr]=r;
8499 }
8500 else {
8501 if((current.uu>>(r&63))&1) {
8502 regs[i].regmap_entry[hr]=-1;
8503 //regs[i].regmap[hr]=-1;
8504 current.regmap[hr]=-1;
8505 }else
8506 regs[i].regmap_entry[hr]=r;
8507 }
8508 }
8509 } else {
8510 // Branches expect CCREG to be allocated at the target
9f51b4b9 8511 if(regmap_pre[i][hr]==CCREG)
57871462 8512 regs[i].regmap_entry[hr]=CCREG;
8513 else
8514 regs[i].regmap_entry[hr]=-1;
8515 }
8516 }
8517 memcpy(regs[i].regmap,current.regmap,sizeof(current.regmap));
8518 }
27727b63 8519
8520 if(i>0&&(itype[i-1]==STORE||itype[i-1]==STORELR||(itype[i-1]==C2LS&&opcode[i-1]==0x3a))&&(u_int)imm[i-1]<0x800)
8521 current.waswritten|=1<<rs1[i-1];
8522 current.waswritten&=~(1<<rt1[i]);
8523 current.waswritten&=~(1<<rt2[i]);
8524 if((itype[i]==STORE||itype[i]==STORELR||(itype[i]==C2LS&&opcode[i]==0x3a))&&(u_int)imm[i]>=0x800)
8525 current.waswritten&=~(1<<rs1[i]);
8526
57871462 8527 /* Branch post-alloc */
8528 if(i>0)
8529 {
8530 current.was32=current.is32;
8531 current.wasdirty=current.dirty;
8532 switch(itype[i-1]) {
8533 case UJUMP:
8534 memcpy(&branch_regs[i-1],&current,sizeof(current));
8535 branch_regs[i-1].isconst=0;
8536 branch_regs[i-1].wasconst=0;
8537 branch_regs[i-1].u=branch_unneeded_reg[i-1]&~((1LL<<rs1[i-1])|(1LL<<rs2[i-1]));
8538 branch_regs[i-1].uu=branch_unneeded_reg_upper[i-1]&~((1LL<<us1[i-1])|(1LL<<us2[i-1]));
8539 alloc_cc(&branch_regs[i-1],i-1);
8540 dirty_reg(&branch_regs[i-1],CCREG);
8541 if(rt1[i-1]==31) { // JAL
8542 alloc_reg(&branch_regs[i-1],i-1,31);
8543 dirty_reg(&branch_regs[i-1],31);
8544 branch_regs[i-1].is32|=1LL<<31;
8545 }
8546 memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap));
956f3129 8547 memcpy(constmap[i],constmap[i-1],sizeof(current_constmap));
57871462 8548 break;
8549 case RJUMP:
8550 memcpy(&branch_regs[i-1],&current,sizeof(current));
8551 branch_regs[i-1].isconst=0;
8552 branch_regs[i-1].wasconst=0;
8553 branch_regs[i-1].u=branch_unneeded_reg[i-1]&~((1LL<<rs1[i-1])|(1LL<<rs2[i-1]));
8554 branch_regs[i-1].uu=branch_unneeded_reg_upper[i-1]&~((1LL<<us1[i-1])|(1LL<<us2[i-1]));
8555 alloc_cc(&branch_regs[i-1],i-1);
8556 dirty_reg(&branch_regs[i-1],CCREG);
8557 alloc_reg(&branch_regs[i-1],i-1,rs1[i-1]);
5067f341 8558 if(rt1[i-1]!=0) { // JALR
8559 alloc_reg(&branch_regs[i-1],i-1,rt1[i-1]);
8560 dirty_reg(&branch_regs[i-1],rt1[i-1]);
8561 branch_regs[i-1].is32|=1LL<<rt1[i-1];
57871462 8562 }
8563 #ifdef USE_MINI_HT
8564 if(rs1[i-1]==31) { // JALR
8565 alloc_reg(&branch_regs[i-1],i-1,RHASH);
8566 #ifndef HOST_IMM_ADDR32
8567 alloc_reg(&branch_regs[i-1],i-1,RHTBL);
8568 #endif
8569 }
8570 #endif
8571 memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap));
956f3129 8572 memcpy(constmap[i],constmap[i-1],sizeof(current_constmap));
57871462 8573 break;
8574 case CJUMP:
8575 if((opcode[i-1]&0x3E)==4) // BEQ/BNE
8576 {
8577 alloc_cc(&current,i-1);
8578 dirty_reg(&current,CCREG);
8579 if((rs1[i-1]&&(rs1[i-1]==rt1[i]||rs1[i-1]==rt2[i]))||
8580 (rs2[i-1]&&(rs2[i-1]==rt1[i]||rs2[i-1]==rt2[i]))) {
8581 // The delay slot overwrote one of our conditions
8582 // Delay slot goes after the test (in order)
8583 current.u=branch_unneeded_reg[i-1]&~((1LL<<rs1[i])|(1LL<<rs2[i]));
8584 current.uu=branch_unneeded_reg_upper[i-1]&~((1LL<<us1[i])|(1LL<<us2[i]));
8585 if((~current.uu>>rt1[i])&1) current.uu&=~((1LL<<dep1[i])|(1LL<<dep2[i]));
8586 current.u|=1;
8587 current.uu|=1;
8588 delayslot_alloc(&current,i);
8589 current.isconst=0;
8590 }
8591 else
8592 {
8593 current.u=branch_unneeded_reg[i-1]&~((1LL<<rs1[i-1])|(1LL<<rs2[i-1]));
8594 current.uu=branch_unneeded_reg_upper[i-1]&~((1LL<<us1[i-1])|(1LL<<us2[i-1]));
8595 // Alloc the branch condition registers
8596 if(rs1[i-1]) alloc_reg(&current,i-1,rs1[i-1]);
8597 if(rs2[i-1]) alloc_reg(&current,i-1,rs2[i-1]);
8598 if(!((current.is32>>rs1[i-1])&(current.is32>>rs2[i-1])&1))
8599 {
8600 if(rs1[i-1]) alloc_reg64(&current,i-1,rs1[i-1]);
8601 if(rs2[i-1]) alloc_reg64(&current,i-1,rs2[i-1]);
8602 }
8603 }
8604 memcpy(&branch_regs[i-1],&current,sizeof(current));
8605 branch_regs[i-1].isconst=0;
8606 branch_regs[i-1].wasconst=0;
8607 memcpy(&branch_regs[i-1].regmap_entry,&current.regmap,sizeof(current.regmap));
956f3129 8608 memcpy(constmap[i],constmap[i-1],sizeof(current_constmap));
57871462 8609 }
8610 else
8611 if((opcode[i-1]&0x3E)==6) // BLEZ/BGTZ
8612 {
8613 alloc_cc(&current,i-1);
8614 dirty_reg(&current,CCREG);
8615 if(rs1[i-1]==rt1[i]||rs1[i-1]==rt2[i]) {
8616 // The delay slot overwrote the branch condition
8617 // Delay slot goes after the test (in order)
8618 current.u=branch_unneeded_reg[i-1]&~((1LL<<rs1[i])|(1LL<<rs2[i]));
8619 current.uu=branch_unneeded_reg_upper[i-1]&~((1LL<<us1[i])|(1LL<<us2[i]));
8620 if((~current.uu>>rt1[i])&1) current.uu&=~((1LL<<dep1[i])|(1LL<<dep2[i]));
8621 current.u|=1;
8622 current.uu|=1;
8623 delayslot_alloc(&current,i);
8624 current.isconst=0;
8625 }
8626 else
8627 {
8628 current.u=branch_unneeded_reg[i-1]&~(1LL<<rs1[i-1]);
8629 current.uu=branch_unneeded_reg_upper[i-1]&~(1LL<<us1[i-1]);
8630 // Alloc the branch condition register
8631 alloc_reg(&current,i-1,rs1[i-1]);
8632 if(!(current.is32>>rs1[i-1]&1))
8633 {
8634 alloc_reg64(&current,i-1,rs1[i-1]);
8635 }
8636 }
8637 memcpy(&branch_regs[i-1],&current,sizeof(current));
8638 branch_regs[i-1].isconst=0;
8639 branch_regs[i-1].wasconst=0;
8640 memcpy(&branch_regs[i-1].regmap_entry,&current.regmap,sizeof(current.regmap));
956f3129 8641 memcpy(constmap[i],constmap[i-1],sizeof(current_constmap));
57871462 8642 }
8643 else
8644 // Alloc the delay slot in case the branch is taken
8645 if((opcode[i-1]&0x3E)==0x14) // BEQL/BNEL
8646 {
8647 memcpy(&branch_regs[i-1],&current,sizeof(current));
8648 branch_regs[i-1].u=(branch_unneeded_reg[i-1]&~((1LL<<rs1[i])|(1LL<<rs2[i])|(1LL<<rt1[i])|(1LL<<rt2[i])))|1;
8649 branch_regs[i-1].uu=(branch_unneeded_reg_upper[i-1]&~((1LL<<us1[i])|(1LL<<us2[i])|(1LL<<rt1[i])|(1LL<<rt2[i])))|1;
8650 if((~branch_regs[i-1].uu>>rt1[i])&1) branch_regs[i-1].uu&=~((1LL<<dep1[i])|(1LL<<dep2[i]))|1;
8651 alloc_cc(&branch_regs[i-1],i);
8652 dirty_reg(&branch_regs[i-1],CCREG);
8653 delayslot_alloc(&branch_regs[i-1],i);
8654 branch_regs[i-1].isconst=0;
8655 alloc_reg(&current,i,CCREG); // Not taken path
8656 dirty_reg(&current,CCREG);
8657 memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap));
8658 }
8659 else
8660 if((opcode[i-1]&0x3E)==0x16) // BLEZL/BGTZL
8661 {
8662 memcpy(&branch_regs[i-1],&current,sizeof(current));
8663 branch_regs[i-1].u=(branch_unneeded_reg[i-1]&~((1LL<<rs1[i])|(1LL<<rs2[i])|(1LL<<rt1[i])|(1LL<<rt2[i])))|1;
8664 branch_regs[i-1].uu=(branch_unneeded_reg_upper[i-1]&~((1LL<<us1[i])|(1LL<<us2[i])|(1LL<<rt1[i])|(1LL<<rt2[i])))|1;
8665 if((~branch_regs[i-1].uu>>rt1[i])&1) branch_regs[i-1].uu&=~((1LL<<dep1[i])|(1LL<<dep2[i]))|1;
8666 alloc_cc(&branch_regs[i-1],i);
8667 dirty_reg(&branch_regs[i-1],CCREG);
8668 delayslot_alloc(&branch_regs[i-1],i);
8669 branch_regs[i-1].isconst=0;
8670 alloc_reg(&current,i,CCREG); // Not taken path
8671 dirty_reg(&current,CCREG);
8672 memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap));
8673 }
8674 break;
8675 case SJUMP:
8676 //if((opcode2[i-1]&0x1E)==0) // BLTZ/BGEZ
8677 if((opcode2[i-1]&0x0E)==0) // BLTZ/BGEZ
8678 {
8679 alloc_cc(&current,i-1);
8680 dirty_reg(&current,CCREG);
8681 if(rs1[i-1]==rt1[i]||rs1[i-1]==rt2[i]) {
8682 // The delay slot overwrote the branch condition
8683 // Delay slot goes after the test (in order)
8684 current.u=branch_unneeded_reg[i-1]&~((1LL<<rs1[i])|(1LL<<rs2[i]));
8685 current.uu=branch_unneeded_reg_upper[i-1]&~((1LL<<us1[i])|(1LL<<us2[i]));
8686 if((~current.uu>>rt1[i])&1) current.uu&=~((1LL<<dep1[i])|(1LL<<dep2[i]));
8687 current.u|=1;
8688 current.uu|=1;
8689 delayslot_alloc(&current,i);
8690 current.isconst=0;
8691 }
8692 else
8693 {
8694 current.u=branch_unneeded_reg[i-1]&~(1LL<<rs1[i-1]);
8695 current.uu=branch_unneeded_reg_upper[i-1]&~(1LL<<us1[i-1]);
8696 // Alloc the branch condition register
8697 alloc_reg(&current,i-1,rs1[i-1]);
8698 if(!(current.is32>>rs1[i-1]&1))
8699 {
8700 alloc_reg64(&current,i-1,rs1[i-1]);
8701 }
8702 }
8703 memcpy(&branch_regs[i-1],&current,sizeof(current));
8704 branch_regs[i-1].isconst=0;
8705 branch_regs[i-1].wasconst=0;
8706 memcpy(&branch_regs[i-1].regmap_entry,&current.regmap,sizeof(current.regmap));
956f3129 8707 memcpy(constmap[i],constmap[i-1],sizeof(current_constmap));
57871462 8708 }
8709 else
8710 // Alloc the delay slot in case the branch is taken
8711 if((opcode2[i-1]&0x1E)==2) // BLTZL/BGEZL
8712 {
8713 memcpy(&branch_regs[i-1],&current,sizeof(current));
8714 branch_regs[i-1].u=(branch_unneeded_reg[i-1]&~((1LL<<rs1[i])|(1LL<<rs2[i])|(1LL<<rt1[i])|(1LL<<rt2[i])))|1;
8715 branch_regs[i-1].uu=(branch_unneeded_reg_upper[i-1]&~((1LL<<us1[i])|(1LL<<us2[i])|(1LL<<rt1[i])|(1LL<<rt2[i])))|1;
8716 if((~branch_regs[i-1].uu>>rt1[i])&1) branch_regs[i-1].uu&=~((1LL<<dep1[i])|(1LL<<dep2[i]))|1;
8717 alloc_cc(&branch_regs[i-1],i);
8718 dirty_reg(&branch_regs[i-1],CCREG);
8719 delayslot_alloc(&branch_regs[i-1],i);
8720 branch_regs[i-1].isconst=0;
8721 alloc_reg(&current,i,CCREG); // Not taken path
8722 dirty_reg(&current,CCREG);
8723 memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap));
8724 }
8725 // FIXME: BLTZAL/BGEZAL
8726 if(opcode2[i-1]&0x10) { // BxxZAL
8727 alloc_reg(&branch_regs[i-1],i-1,31);
8728 dirty_reg(&branch_regs[i-1],31);
8729 branch_regs[i-1].is32|=1LL<<31;
8730 }
8731 break;
8732 case FJUMP:
8733 if(likely[i-1]==0) // BC1F/BC1T
8734 {
8735 alloc_cc(&current,i-1);
8736 dirty_reg(&current,CCREG);
8737 if(itype[i]==FCOMP) {
8738 // The delay slot overwrote the branch condition
8739 // Delay slot goes after the test (in order)
8740 delayslot_alloc(&current,i);
8741 current.isconst=0;
8742 }
8743 else
8744 {
8745 current.u=branch_unneeded_reg[i-1]&~(1LL<<rs1[i-1]);
8746 current.uu=branch_unneeded_reg_upper[i-1]&~(1LL<<us1[i-1]);
8747 // Alloc the branch condition register
8748 alloc_reg(&current,i-1,FSREG);
8749 }
8750 memcpy(&branch_regs[i-1],&current,sizeof(current));
8751 memcpy(&branch_regs[i-1].regmap_entry,&current.regmap,sizeof(current.regmap));
8752 }
8753 else // BC1FL/BC1TL
8754 {
8755 // Alloc the delay slot in case the branch is taken
8756 memcpy(&branch_regs[i-1],&current,sizeof(current));
8757 branch_regs[i-1].u=(branch_unneeded_reg[i-1]&~((1LL<<rs1[i])|(1LL<<rs2[i])|(1LL<<rt1[i])|(1LL<<rt2[i])))|1;
8758 branch_regs[i-1].uu=(branch_unneeded_reg_upper[i-1]&~((1LL<<us1[i])|(1LL<<us2[i])|(1LL<<rt1[i])|(1LL<<rt2[i])))|1;
8759 if((~branch_regs[i-1].uu>>rt1[i])&1) branch_regs[i-1].uu&=~((1LL<<dep1[i])|(1LL<<dep2[i]))|1;
8760 alloc_cc(&branch_regs[i-1],i);
8761 dirty_reg(&branch_regs[i-1],CCREG);
8762 delayslot_alloc(&branch_regs[i-1],i);
8763 branch_regs[i-1].isconst=0;
8764 alloc_reg(&current,i,CCREG); // Not taken path
8765 dirty_reg(&current,CCREG);
8766 memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap));
8767 }
8768 break;
8769 }
8770
8771 if(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000)
8772 {
8773 if(rt1[i-1]==31) // JAL/JALR
8774 {
8775 // Subroutine call will return here, don't alloc any registers
8776 current.is32=1;
8777 current.dirty=0;
8778 clear_all_regs(current.regmap);
8779 alloc_reg(&current,i,CCREG);
8780 dirty_reg(&current,CCREG);
8781 }
8782 else if(i+1<slen)
8783 {
8784 // Internal branch will jump here, match registers to caller
8785 current.is32=0x3FFFFFFFFLL;
8786 current.dirty=0;
8787 clear_all_regs(current.regmap);
8788 alloc_reg(&current,i,CCREG);
8789 dirty_reg(&current,CCREG);
8790 for(j=i-1;j>=0;j--)
8791 {
8792 if(ba[j]==start+i*4+4) {
8793 memcpy(current.regmap,branch_regs[j].regmap,sizeof(current.regmap));
8794 current.is32=branch_regs[j].is32;
8795 current.dirty=branch_regs[j].dirty;
8796 break;
8797 }
8798 }
8799 while(j>=0) {
8800 if(ba[j]==start+i*4+4) {
8801 for(hr=0;hr<HOST_REGS;hr++) {
8802 if(current.regmap[hr]!=branch_regs[j].regmap[hr]) {
8803 current.regmap[hr]=-1;
8804 }
8805 current.is32&=branch_regs[j].is32;
8806 current.dirty&=branch_regs[j].dirty;
8807 }
8808 }
8809 j--;
8810 }
8811 }
8812 }
8813 }
8814
8815 // Count cycles in between branches
8816 ccadj[i]=cc;
7139f3c8 8817 if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP||itype[i]==SYSCALL||itype[i]==HLECALL))
57871462 8818 {
8819 cc=0;
8820 }
71e490c5 8821#if !defined(DRC_DBG)
054175e9 8822 else if(itype[i]==C2OP&&gte_cycletab[source[i]&0x3f]>2)
8823 {
8824 // GTE runs in parallel until accessed, divide by 2 for a rough guess
8825 cc+=gte_cycletab[source[i]&0x3f]/2;
8826 }
b6e87b2b 8827 else if(/*itype[i]==LOAD||itype[i]==STORE||*/itype[i]==C1LS) // load,store causes weird timing issues
fb407447 8828 {
8829 cc+=2; // 2 cycle penalty (after CLOCK_DIVIDER)
8830 }
5fdcbb5a 8831 else if(i>1&&itype[i]==STORE&&itype[i-1]==STORE&&itype[i-2]==STORE&&!bt[i])
8832 {
8833 cc+=4;
8834 }
fb407447 8835 else if(itype[i]==C2LS)
8836 {
8837 cc+=4;
8838 }
8839#endif
57871462 8840 else
8841 {
8842 cc++;
8843 }
8844
8845 flush_dirty_uppers(&current);
8846 if(!is_ds[i]) {
8847 regs[i].is32=current.is32;
8848 regs[i].dirty=current.dirty;
8849 regs[i].isconst=current.isconst;
956f3129 8850 memcpy(constmap[i],current_constmap,sizeof(current_constmap));
57871462 8851 }
8852 for(hr=0;hr<HOST_REGS;hr++) {
8853 if(hr!=EXCLUDE_REG&&regs[i].regmap[hr]>=0) {
8854 if(regmap_pre[i][hr]!=regs[i].regmap[hr]) {
8855 regs[i].wasconst&=~(1<<hr);
8856 }
8857 }
8858 }
8859 if(current.regmap[HOST_BTREG]==BTREG) current.regmap[HOST_BTREG]=-1;
27727b63 8860 regs[i].waswritten=current.waswritten;
57871462 8861 }
9f51b4b9 8862
57871462 8863 /* Pass 4 - Cull unused host registers */
9f51b4b9 8864
57871462 8865 uint64_t nr=0;
9f51b4b9 8866
57871462 8867 for (i=slen-1;i>=0;i--)
8868 {
8869 int hr;
8870 if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
8871 {
8872 if(ba[i]<start || ba[i]>=(start+slen*4))
8873 {
8874 // Branch out of this block, don't need anything
8875 nr=0;
8876 }
8877 else
8878 {
8879 // Internal branch
8880 // Need whatever matches the target
8881 nr=0;
8882 int t=(ba[i]-start)>>2;
8883 for(hr=0;hr<HOST_REGS;hr++)
8884 {
8885 if(regs[i].regmap_entry[hr]>=0) {
8886 if(regs[i].regmap_entry[hr]==regs[t].regmap_entry[hr]) nr|=1<<hr;
8887 }
8888 }
8889 }
8890 // Conditional branch may need registers for following instructions
8891 if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&(source[i]>>16)!=0x1000)
8892 {
8893 if(i<slen-2) {
8894 nr|=needed_reg[i+2];
8895 for(hr=0;hr<HOST_REGS;hr++)
8896 {
8897 if(regmap_pre[i+2][hr]>=0&&get_reg(regs[i+2].regmap_entry,regmap_pre[i+2][hr])<0) nr&=~(1<<hr);
8898 //if((regmap_entry[i+2][hr])>=0) if(!((nr>>hr)&1)) printf("%x-bogus(%d=%d)\n",start+i*4,hr,regmap_entry[i+2][hr]);
8899 }
8900 }
8901 }
8902 // Don't need stuff which is overwritten
f5955059 8903 //if(regs[i].regmap[hr]!=regmap_pre[i][hr]) nr&=~(1<<hr);
8904 //if(regs[i].regmap[hr]<0) nr&=~(1<<hr);
57871462 8905 // Merge in delay slot
8906 for(hr=0;hr<HOST_REGS;hr++)
8907 {
8908 if(!likely[i]) {
8909 // These are overwritten unless the branch is "likely"
8910 // and the delay slot is nullified if not taken
8911 if(rt1[i+1]&&rt1[i+1]==(regs[i].regmap[hr]&63)) nr&=~(1<<hr);
8912 if(rt2[i+1]&&rt2[i+1]==(regs[i].regmap[hr]&63)) nr&=~(1<<hr);
8913 }
8914 if(us1[i+1]==(regmap_pre[i][hr]&63)) nr|=1<<hr;
8915 if(us2[i+1]==(regmap_pre[i][hr]&63)) nr|=1<<hr;
8916 if(rs1[i+1]==regmap_pre[i][hr]) nr|=1<<hr;
8917 if(rs2[i+1]==regmap_pre[i][hr]) nr|=1<<hr;
8918 if(us1[i+1]==(regs[i].regmap_entry[hr]&63)) nr|=1<<hr;
8919 if(us2[i+1]==(regs[i].regmap_entry[hr]&63)) nr|=1<<hr;
8920 if(rs1[i+1]==regs[i].regmap_entry[hr]) nr|=1<<hr;
8921 if(rs2[i+1]==regs[i].regmap_entry[hr]) nr|=1<<hr;
8922 if(dep1[i+1]&&!((unneeded_reg_upper[i]>>dep1[i+1])&1)) {
8923 if(dep1[i+1]==(regmap_pre[i][hr]&63)) nr|=1<<hr;
8924 if(dep2[i+1]==(regmap_pre[i][hr]&63)) nr|=1<<hr;
8925 }
8926 if(dep2[i+1]&&!((unneeded_reg_upper[i]>>dep2[i+1])&1)) {
8927 if(dep1[i+1]==(regs[i].regmap_entry[hr]&63)) nr|=1<<hr;
8928 if(dep2[i+1]==(regs[i].regmap_entry[hr]&63)) nr|=1<<hr;
8929 }
b9b61529 8930 if(itype[i+1]==STORE || itype[i+1]==STORELR || (opcode[i+1]&0x3b)==0x39 || (opcode[i+1]&0x3b)==0x3a) {
57871462 8931 if(regmap_pre[i][hr]==INVCP) nr|=1<<hr;
8932 if(regs[i].regmap_entry[hr]==INVCP) nr|=1<<hr;
8933 }
8934 }
8935 }
1e973cb0 8936 else if(itype[i]==SYSCALL||itype[i]==HLECALL||itype[i]==INTCALL)
57871462 8937 {
8938 // SYSCALL instruction (software interrupt)
8939 nr=0;
8940 }
8941 else if(itype[i]==COP0 && (source[i]&0x3f)==0x18)
8942 {
8943 // ERET instruction (return from interrupt)
8944 nr=0;
8945 }
8946 else // Non-branch
8947 {
8948 if(i<slen-1) {
8949 for(hr=0;hr<HOST_REGS;hr++) {
8950 if(regmap_pre[i+1][hr]>=0&&get_reg(regs[i+1].regmap_entry,regmap_pre[i+1][hr])<0) nr&=~(1<<hr);
8951 if(regs[i].regmap[hr]!=regmap_pre[i+1][hr]) nr&=~(1<<hr);
8952 if(regs[i].regmap[hr]!=regmap_pre[i][hr]) nr&=~(1<<hr);
8953 if(regs[i].regmap[hr]<0) nr&=~(1<<hr);
8954 }
8955 }
8956 }
8957 for(hr=0;hr<HOST_REGS;hr++)
8958 {
8959 // Overwritten registers are not needed
8960 if(rt1[i]&&rt1[i]==(regs[i].regmap[hr]&63)) nr&=~(1<<hr);
8961 if(rt2[i]&&rt2[i]==(regs[i].regmap[hr]&63)) nr&=~(1<<hr);
8962 if(FTEMP==(regs[i].regmap[hr]&63)) nr&=~(1<<hr);
8963 // Source registers are needed
8964 if(us1[i]==(regmap_pre[i][hr]&63)) nr|=1<<hr;
8965 if(us2[i]==(regmap_pre[i][hr]&63)) nr|=1<<hr;
8966 if(rs1[i]==regmap_pre[i][hr]) nr|=1<<hr;
8967 if(rs2[i]==regmap_pre[i][hr]) nr|=1<<hr;
8968 if(us1[i]==(regs[i].regmap_entry[hr]&63)) nr|=1<<hr;
8969 if(us2[i]==(regs[i].regmap_entry[hr]&63)) nr|=1<<hr;
8970 if(rs1[i]==regs[i].regmap_entry[hr]) nr|=1<<hr;
8971 if(rs2[i]==regs[i].regmap_entry[hr]) nr|=1<<hr;
8972 if(dep1[i]&&!((unneeded_reg_upper[i]>>dep1[i])&1)) {
8973 if(dep1[i]==(regmap_pre[i][hr]&63)) nr|=1<<hr;
8974 if(dep1[i]==(regs[i].regmap_entry[hr]&63)) nr|=1<<hr;
8975 }
8976 if(dep2[i]&&!((unneeded_reg_upper[i]>>dep2[i])&1)) {
8977 if(dep2[i]==(regmap_pre[i][hr]&63)) nr|=1<<hr;
8978 if(dep2[i]==(regs[i].regmap_entry[hr]&63)) nr|=1<<hr;
8979 }
b9b61529 8980 if(itype[i]==STORE || itype[i]==STORELR || (opcode[i]&0x3b)==0x39 || (opcode[i]&0x3b)==0x3a) {
57871462 8981 if(regmap_pre[i][hr]==INVCP) nr|=1<<hr;
8982 if(regs[i].regmap_entry[hr]==INVCP) nr|=1<<hr;
8983 }
8984 // Don't store a register immediately after writing it,
8985 // may prevent dual-issue.
8986 // But do so if this is a branch target, otherwise we
8987 // might have to load the register before the branch.
8988 if(i>0&&!bt[i]&&((regs[i].wasdirty>>hr)&1)) {
8989 if((regmap_pre[i][hr]>0&&regmap_pre[i][hr]<64&&!((unneeded_reg[i]>>regmap_pre[i][hr])&1)) ||
8990 (regmap_pre[i][hr]>64&&!((unneeded_reg_upper[i]>>(regmap_pre[i][hr]&63))&1)) ) {
8991 if(rt1[i-1]==(regmap_pre[i][hr]&63)) nr|=1<<hr;
8992 if(rt2[i-1]==(regmap_pre[i][hr]&63)) nr|=1<<hr;
8993 }
8994 if((regs[i].regmap_entry[hr]>0&&regs[i].regmap_entry[hr]<64&&!((unneeded_reg[i]>>regs[i].regmap_entry[hr])&1)) ||
8995 (regs[i].regmap_entry[hr]>64&&!((unneeded_reg_upper[i]>>(regs[i].regmap_entry[hr]&63))&1)) ) {
8996 if(rt1[i-1]==(regs[i].regmap_entry[hr]&63)) nr|=1<<hr;
8997 if(rt2[i-1]==(regs[i].regmap_entry[hr]&63)) nr|=1<<hr;
8998 }
8999 }
9000 }
9001 // Cycle count is needed at branches. Assume it is needed at the target too.
9002 if(i==0||bt[i]||itype[i]==CJUMP||itype[i]==FJUMP||itype[i]==SPAN) {
9003 if(regmap_pre[i][HOST_CCREG]==CCREG) nr|=1<<HOST_CCREG;
9004 if(regs[i].regmap_entry[HOST_CCREG]==CCREG) nr|=1<<HOST_CCREG;
9005 }
9006 // Save it
9007 needed_reg[i]=nr;
9f51b4b9 9008
57871462 9009 // Deallocate unneeded registers
9010 for(hr=0;hr<HOST_REGS;hr++)
9011 {
9012 if(!((nr>>hr)&1)) {
9013 if(regs[i].regmap_entry[hr]!=CCREG) regs[i].regmap_entry[hr]=-1;
9014 if((regs[i].regmap[hr]&63)!=rs1[i] && (regs[i].regmap[hr]&63)!=rs2[i] &&
9015 (regs[i].regmap[hr]&63)!=rt1[i] && (regs[i].regmap[hr]&63)!=rt2[i] &&
9016 (regs[i].regmap[hr]&63)!=PTEMP && (regs[i].regmap[hr]&63)!=CCREG)
9017 {
9018 if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&(source[i]>>16)!=0x1000)
9019 {
9020 if(likely[i]) {
9021 regs[i].regmap[hr]=-1;
9022 regs[i].isconst&=~(1<<hr);
79c75f1b 9023 if(i<slen-2) {
9024 regmap_pre[i+2][hr]=-1;
9025 regs[i+2].wasconst&=~(1<<hr);
9026 }
57871462 9027 }
9028 }
9029 }
9030 if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
9031 {
9032 int d1=0,d2=0,map=0,temp=0;
9033 if(get_reg(regs[i].regmap,rt1[i+1]|64)>=0||get_reg(branch_regs[i].regmap,rt1[i+1]|64)>=0)
9034 {
9035 d1=dep1[i+1];
9036 d2=dep2[i+1];
9037 }
b9b61529 9038 if(itype[i+1]==STORE || itype[i+1]==STORELR ||
9039 (opcode[i+1]&0x3b)==0x39 || (opcode[i+1]&0x3b)==0x3a) { // SWC1/SDC1 || SWC2/SDC2
57871462 9040 map=INVCP;
9041 }
9042 if(itype[i+1]==LOADLR || itype[i+1]==STORELR ||
b9b61529 9043 itype[i+1]==C1LS || itype[i+1]==C2LS)
57871462 9044 temp=FTEMP;
9045 if((regs[i].regmap[hr]&63)!=rs1[i] && (regs[i].regmap[hr]&63)!=rs2[i] &&
9046 (regs[i].regmap[hr]&63)!=rt1[i] && (regs[i].regmap[hr]&63)!=rt2[i] &&
9047 (regs[i].regmap[hr]&63)!=rt1[i+1] && (regs[i].regmap[hr]&63)!=rt2[i+1] &&
9048 (regs[i].regmap[hr]^64)!=us1[i+1] && (regs[i].regmap[hr]^64)!=us2[i+1] &&
9049 (regs[i].regmap[hr]^64)!=d1 && (regs[i].regmap[hr]^64)!=d2 &&
9050 regs[i].regmap[hr]!=rs1[i+1] && regs[i].regmap[hr]!=rs2[i+1] &&
9051 (regs[i].regmap[hr]&63)!=temp && regs[i].regmap[hr]!=PTEMP &&
9052 regs[i].regmap[hr]!=RHASH && regs[i].regmap[hr]!=RHTBL &&
9053 regs[i].regmap[hr]!=RTEMP && regs[i].regmap[hr]!=CCREG &&
9054 regs[i].regmap[hr]!=map )
9055 {
9056 regs[i].regmap[hr]=-1;
9057 regs[i].isconst&=~(1<<hr);
9058 if((branch_regs[i].regmap[hr]&63)!=rs1[i] && (branch_regs[i].regmap[hr]&63)!=rs2[i] &&
9059 (branch_regs[i].regmap[hr]&63)!=rt1[i] && (branch_regs[i].regmap[hr]&63)!=rt2[i] &&
9060 (branch_regs[i].regmap[hr]&63)!=rt1[i+1] && (branch_regs[i].regmap[hr]&63)!=rt2[i+1] &&
9061 (branch_regs[i].regmap[hr]^64)!=us1[i+1] && (branch_regs[i].regmap[hr]^64)!=us2[i+1] &&
9062 (branch_regs[i].regmap[hr]^64)!=d1 && (branch_regs[i].regmap[hr]^64)!=d2 &&
9063 branch_regs[i].regmap[hr]!=rs1[i+1] && branch_regs[i].regmap[hr]!=rs2[i+1] &&
9064 (branch_regs[i].regmap[hr]&63)!=temp && branch_regs[i].regmap[hr]!=PTEMP &&
9065 branch_regs[i].regmap[hr]!=RHASH && branch_regs[i].regmap[hr]!=RHTBL &&
9066 branch_regs[i].regmap[hr]!=RTEMP && branch_regs[i].regmap[hr]!=CCREG &&
9067 branch_regs[i].regmap[hr]!=map)
9068 {
9069 branch_regs[i].regmap[hr]=-1;
9070 branch_regs[i].regmap_entry[hr]=-1;
9071 if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&(source[i]>>16)!=0x1000)
9072 {
9073 if(!likely[i]&&i<slen-2) {
9074 regmap_pre[i+2][hr]=-1;
79c75f1b 9075 regs[i+2].wasconst&=~(1<<hr);
57871462 9076 }
9077 }
9078 }
9079 }
9080 }
9081 else
9082 {
9083 // Non-branch
9084 if(i>0)
9085 {
9086 int d1=0,d2=0,map=-1,temp=-1;
9087 if(get_reg(regs[i].regmap,rt1[i]|64)>=0)
9088 {
9089 d1=dep1[i];
9090 d2=dep2[i];
9091 }
1edfcc68 9092 if(itype[i]==STORE || itype[i]==STORELR ||
b9b61529 9093 (opcode[i]&0x3b)==0x39 || (opcode[i]&0x3b)==0x3a) { // SWC1/SDC1 || SWC2/SDC2
57871462 9094 map=INVCP;
9095 }
9096 if(itype[i]==LOADLR || itype[i]==STORELR ||
b9b61529 9097 itype[i]==C1LS || itype[i]==C2LS)
57871462 9098 temp=FTEMP;
9099 if((regs[i].regmap[hr]&63)!=rt1[i] && (regs[i].regmap[hr]&63)!=rt2[i] &&
9100 (regs[i].regmap[hr]^64)!=us1[i] && (regs[i].regmap[hr]^64)!=us2[i] &&
9101 (regs[i].regmap[hr]^64)!=d1 && (regs[i].regmap[hr]^64)!=d2 &&
9102 regs[i].regmap[hr]!=rs1[i] && regs[i].regmap[hr]!=rs2[i] &&
9103 (regs[i].regmap[hr]&63)!=temp && regs[i].regmap[hr]!=map &&
9104 (itype[i]!=SPAN||regs[i].regmap[hr]!=CCREG))
9105 {
9106 if(i<slen-1&&!is_ds[i]) {
9107 if(regmap_pre[i+1][hr]!=-1 || regs[i].regmap[hr]!=-1)
9108 if(regmap_pre[i+1][hr]!=regs[i].regmap[hr])
9109 if(regs[i].regmap[hr]<64||!((regs[i].was32>>(regs[i].regmap[hr]&63))&1))
9110 {
c43b5311 9111 SysPrintf("fail: %x (%d %d!=%d)\n",start+i*4,hr,regmap_pre[i+1][hr],regs[i].regmap[hr]);
57871462 9112 assert(regmap_pre[i+1][hr]==regs[i].regmap[hr]);
9113 }
9114 regmap_pre[i+1][hr]=-1;
9115 if(regs[i+1].regmap_entry[hr]==CCREG) regs[i+1].regmap_entry[hr]=-1;
79c75f1b 9116 regs[i+1].wasconst&=~(1<<hr);
57871462 9117 }
9118 regs[i].regmap[hr]=-1;
9119 regs[i].isconst&=~(1<<hr);
9120 }
9121 }
9122 }
9123 }
9124 }
9125 }
9f51b4b9 9126
57871462 9127 /* Pass 5 - Pre-allocate registers */
9f51b4b9 9128
57871462 9129 // If a register is allocated during a loop, try to allocate it for the
9130 // entire loop, if possible. This avoids loading/storing registers
9131 // inside of the loop.
9f51b4b9 9132
57871462 9133 signed char f_regmap[HOST_REGS];
9134 clear_all_regs(f_regmap);
9135 for(i=0;i<slen-1;i++)
9136 {
9137 if(itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
9138 {
9f51b4b9 9139 if(ba[i]>=start && ba[i]<(start+i*4))
57871462 9140 if(itype[i+1]==NOP||itype[i+1]==MOV||itype[i+1]==ALU
9141 ||itype[i+1]==SHIFTIMM||itype[i+1]==IMM16||itype[i+1]==LOAD
9142 ||itype[i+1]==STORE||itype[i+1]==STORELR||itype[i+1]==C1LS
9143 ||itype[i+1]==SHIFT||itype[i+1]==COP1||itype[i+1]==FLOAT
b9b61529 9144 ||itype[i+1]==FCOMP||itype[i+1]==FCONV
9145 ||itype[i+1]==COP2||itype[i+1]==C2LS||itype[i+1]==C2OP)
57871462 9146 {
9147 int t=(ba[i]-start)>>2;
9148 if(t>0&&(itype[t-1]!=UJUMP&&itype[t-1]!=RJUMP&&itype[t-1]!=CJUMP&&itype[t-1]!=SJUMP&&itype[t-1]!=FJUMP)) // loop_preload can't handle jumps into delay slots
198df76f 9149 if(t<2||(itype[t-2]!=UJUMP&&itype[t-2]!=RJUMP)||rt1[t-2]!=31) // call/ret assumes no registers allocated
57871462 9150 for(hr=0;hr<HOST_REGS;hr++)
9151 {
9152 if(regs[i].regmap[hr]>64) {
9153 if(!((regs[i].dirty>>hr)&1))
9154 f_regmap[hr]=regs[i].regmap[hr];
9155 else f_regmap[hr]=-1;
9156 }
b372a952 9157 else if(regs[i].regmap[hr]>=0) {
9158 if(f_regmap[hr]!=regs[i].regmap[hr]) {
9159 // dealloc old register
9160 int n;
9161 for(n=0;n<HOST_REGS;n++)
9162 {
9163 if(f_regmap[n]==regs[i].regmap[hr]) {f_regmap[n]=-1;}
9164 }
9165 // and alloc new one
9166 f_regmap[hr]=regs[i].regmap[hr];
9167 }
9168 }
57871462 9169 if(branch_regs[i].regmap[hr]>64) {
9170 if(!((branch_regs[i].dirty>>hr)&1))
9171 f_regmap[hr]=branch_regs[i].regmap[hr];
9172 else f_regmap[hr]=-1;
9173 }
b372a952 9174 else if(branch_regs[i].regmap[hr]>=0) {
9175 if(f_regmap[hr]!=branch_regs[i].regmap[hr]) {
9176 // dealloc old register
9177 int n;
9178 for(n=0;n<HOST_REGS;n++)
9179 {
9180 if(f_regmap[n]==branch_regs[i].regmap[hr]) {f_regmap[n]=-1;}
9181 }
9182 // and alloc new one
9183 f_regmap[hr]=branch_regs[i].regmap[hr];
9184 }
9185 }
e1190b87 9186 if(ooo[i]) {
9f51b4b9 9187 if(count_free_regs(regs[i].regmap)<=minimum_free_regs[i+1])
e1190b87 9188 f_regmap[hr]=branch_regs[i].regmap[hr];
9189 }else{
9f51b4b9 9190 if(count_free_regs(branch_regs[i].regmap)<=minimum_free_regs[i+1])
57871462 9191 f_regmap[hr]=branch_regs[i].regmap[hr];
9192 }
9193 // Avoid dirty->clean transition
e1190b87 9194 #ifdef DESTRUCTIVE_WRITEBACK
57871462 9195 if(t>0) if(get_reg(regmap_pre[t],f_regmap[hr])>=0) if((regs[t].wasdirty>>get_reg(regmap_pre[t],f_regmap[hr]))&1) f_regmap[hr]=-1;
e1190b87 9196 #endif
9197 // This check is only strictly required in the DESTRUCTIVE_WRITEBACK
9198 // case above, however it's always a good idea. We can't hoist the
9199 // load if the register was already allocated, so there's no point
9200 // wasting time analyzing most of these cases. It only "succeeds"
9201 // when the mapping was different and the load can be replaced with
9202 // a mov, which is of negligible benefit. So such cases are
9203 // skipped below.
57871462 9204 if(f_regmap[hr]>0) {
198df76f 9205 if(regs[t].regmap[hr]==f_regmap[hr]||(regs[t].regmap_entry[hr]<0&&get_reg(regmap_pre[t],f_regmap[hr])<0)) {
57871462 9206 int r=f_regmap[hr];
9207 for(j=t;j<=i;j++)
9208 {
9209 //printf("Test %x -> %x, %x %d/%d\n",start+i*4,ba[i],start+j*4,hr,r);
9210 if(r<34&&((unneeded_reg[j]>>r)&1)) break;
9211 if(r>63&&((unneeded_reg_upper[j]>>(r&63))&1)) break;
9212 if(r>63) {
9213 // NB This can exclude the case where the upper-half
9214 // register is lower numbered than the lower-half
9215 // register. Not sure if it's worth fixing...
9216 if(get_reg(regs[j].regmap,r&63)<0) break;
e1190b87 9217 if(get_reg(regs[j].regmap_entry,r&63)<0) break;
57871462 9218 if(regs[j].is32&(1LL<<(r&63))) break;
9219 }
9220 if(regs[j].regmap[hr]==f_regmap[hr]&&(f_regmap[hr]&63)<TEMPREG) {
9221 //printf("Hit %x -> %x, %x %d/%d\n",start+i*4,ba[i],start+j*4,hr,r);
9222 int k;
9223 if(regs[i].regmap[hr]==-1&&branch_regs[i].regmap[hr]==-1) {
9224 if(get_reg(regs[i+2].regmap,f_regmap[hr])>=0) break;
9225 if(r>63) {
9226 if(get_reg(regs[i].regmap,r&63)<0) break;
9227 if(get_reg(branch_regs[i].regmap,r&63)<0) break;
9228 }
9229 k=i;
9230 while(k>1&&regs[k-1].regmap[hr]==-1) {
e1190b87 9231 if(count_free_regs(regs[k-1].regmap)<=minimum_free_regs[k-1]) {
9232 //printf("no free regs for store %x\n",start+(k-1)*4);
9233 break;
57871462 9234 }
57871462 9235 if(get_reg(regs[k-1].regmap,f_regmap[hr])>=0) {
9236 //printf("no-match due to different register\n");
9237 break;
9238 }
9239 if(itype[k-2]==UJUMP||itype[k-2]==RJUMP||itype[k-2]==CJUMP||itype[k-2]==SJUMP||itype[k-2]==FJUMP) {
9240 //printf("no-match due to branch\n");
9241 break;
9242 }
9243 // call/ret fast path assumes no registers allocated
198df76f 9244 if(k>2&&(itype[k-3]==UJUMP||itype[k-3]==RJUMP)&&rt1[k-3]==31) {
57871462 9245 break;
9246 }
9247 if(r>63) {
9248 // NB This can exclude the case where the upper-half
9249 // register is lower numbered than the lower-half
9250 // register. Not sure if it's worth fixing...
9251 if(get_reg(regs[k-1].regmap,r&63)<0) break;
9252 if(regs[k-1].is32&(1LL<<(r&63))) break;
9253 }
9254 k--;
9255 }
9256 if(i<slen-1) {
9257 if((regs[k].is32&(1LL<<f_regmap[hr]))!=
9258 (regs[i+2].was32&(1LL<<f_regmap[hr]))) {
9259 //printf("bad match after branch\n");
9260 break;
9261 }
9262 }
9263 if(regs[k-1].regmap[hr]==f_regmap[hr]&&regmap_pre[k][hr]==f_regmap[hr]) {
9264 //printf("Extend r%d, %x ->\n",hr,start+k*4);
9265 while(k<i) {
9266 regs[k].regmap_entry[hr]=f_regmap[hr];
9267 regs[k].regmap[hr]=f_regmap[hr];
9268 regmap_pre[k+1][hr]=f_regmap[hr];
9269 regs[k].wasdirty&=~(1<<hr);
9270 regs[k].dirty&=~(1<<hr);
9271 regs[k].wasdirty|=(1<<hr)&regs[k-1].dirty;
9272 regs[k].dirty|=(1<<hr)&regs[k].wasdirty;
9273 regs[k].wasconst&=~(1<<hr);
9274 regs[k].isconst&=~(1<<hr);
9275 k++;
9276 }
9277 }
9278 else {
9279 //printf("Fail Extend r%d, %x ->\n",hr,start+k*4);
9280 break;
9281 }
9282 assert(regs[i-1].regmap[hr]==f_regmap[hr]);
9283 if(regs[i-1].regmap[hr]==f_regmap[hr]&&regmap_pre[i][hr]==f_regmap[hr]) {
9284 //printf("OK fill %x (r%d)\n",start+i*4,hr);
9285 regs[i].regmap_entry[hr]=f_regmap[hr];
9286 regs[i].regmap[hr]=f_regmap[hr];
9287 regs[i].wasdirty&=~(1<<hr);
9288 regs[i].dirty&=~(1<<hr);
9289 regs[i].wasdirty|=(1<<hr)&regs[i-1].dirty;
9290 regs[i].dirty|=(1<<hr)&regs[i-1].dirty;
9291 regs[i].wasconst&=~(1<<hr);
9292 regs[i].isconst&=~(1<<hr);
9293 branch_regs[i].regmap_entry[hr]=f_regmap[hr];
9294 branch_regs[i].wasdirty&=~(1<<hr);
9295 branch_regs[i].wasdirty|=(1<<hr)&regs[i].dirty;
9296 branch_regs[i].regmap[hr]=f_regmap[hr];
9297 branch_regs[i].dirty&=~(1<<hr);
9298 branch_regs[i].dirty|=(1<<hr)&regs[i].dirty;
9299 branch_regs[i].wasconst&=~(1<<hr);
9300 branch_regs[i].isconst&=~(1<<hr);
9301 if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&(source[i]>>16)!=0x1000) {
9302 regmap_pre[i+2][hr]=f_regmap[hr];
9303 regs[i+2].wasdirty&=~(1<<hr);
9304 regs[i+2].wasdirty|=(1<<hr)&regs[i].dirty;
9305 assert((branch_regs[i].is32&(1LL<<f_regmap[hr]))==
9306 (regs[i+2].was32&(1LL<<f_regmap[hr])));
9307 }
9308 }
9309 }
9310 for(k=t;k<j;k++) {
e1190b87 9311 // Alloc register clean at beginning of loop,
9312 // but may dirty it in pass 6
57871462 9313 regs[k].regmap_entry[hr]=f_regmap[hr];
9314 regs[k].regmap[hr]=f_regmap[hr];
57871462 9315 regs[k].dirty&=~(1<<hr);
9316 regs[k].wasconst&=~(1<<hr);
9317 regs[k].isconst&=~(1<<hr);
e1190b87 9318 if(itype[k]==UJUMP||itype[k]==RJUMP||itype[k]==CJUMP||itype[k]==SJUMP||itype[k]==FJUMP) {
9319 branch_regs[k].regmap_entry[hr]=f_regmap[hr];
9320 branch_regs[k].regmap[hr]=f_regmap[hr];
9321 branch_regs[k].dirty&=~(1<<hr);
9322 branch_regs[k].wasconst&=~(1<<hr);
9323 branch_regs[k].isconst&=~(1<<hr);
9324 if(itype[k]!=RJUMP&&itype[k]!=UJUMP&&(source[k]>>16)!=0x1000) {
9325 regmap_pre[k+2][hr]=f_regmap[hr];
9326 regs[k+2].wasdirty&=~(1<<hr);
9327 assert((branch_regs[k].is32&(1LL<<f_regmap[hr]))==
9328 (regs[k+2].was32&(1LL<<f_regmap[hr])));
9329 }
9330 }
9331 else
9332 {
9333 regmap_pre[k+1][hr]=f_regmap[hr];
9334 regs[k+1].wasdirty&=~(1<<hr);
9335 }
57871462 9336 }
9337 if(regs[j].regmap[hr]==f_regmap[hr])
9338 regs[j].regmap_entry[hr]=f_regmap[hr];
9339 break;
9340 }
9341 if(j==i) break;
9342 if(regs[j].regmap[hr]>=0)
9343 break;
9344 if(get_reg(regs[j].regmap,f_regmap[hr])>=0) {
9345 //printf("no-match due to different register\n");
9346 break;
9347 }
9348 if((regs[j+1].is32&(1LL<<f_regmap[hr]))!=(regs[j].is32&(1LL<<f_regmap[hr]))) {
9349 //printf("32/64 mismatch %x %d\n",start+j*4,hr);
9350 break;
9351 }
e1190b87 9352 if(itype[j]==UJUMP||itype[j]==RJUMP||(source[j]>>16)==0x1000)
9353 {
9354 // Stop on unconditional branch
9355 break;
9356 }
9357 if(itype[j]==CJUMP||itype[j]==SJUMP||itype[j]==FJUMP)
9358 {
9359 if(ooo[j]) {
9f51b4b9 9360 if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j+1])
e1190b87 9361 break;
9362 }else{
9f51b4b9 9363 if(count_free_regs(branch_regs[j].regmap)<=minimum_free_regs[j+1])
e1190b87 9364 break;
9365 }
9366 if(get_reg(branch_regs[j].regmap,f_regmap[hr])>=0) {
9367 //printf("no-match due to different register (branch)\n");
57871462 9368 break;
9369 }
9370 }
e1190b87 9371 if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j]) {
9372 //printf("No free regs for store %x\n",start+j*4);
9373 break;
9374 }
57871462 9375 if(f_regmap[hr]>=64) {
9376 if(regs[j].is32&(1LL<<(f_regmap[hr]&63))) {
9377 break;
9378 }
9379 else
9380 {
9381 if(get_reg(regs[j].regmap,f_regmap[hr]&63)<0) {
9382 break;
9383 }
9384 }
9385 }
9386 }
9387 }
9388 }
9389 }
9390 }
9391 }else{
198df76f 9392 // Non branch or undetermined branch target
57871462 9393 for(hr=0;hr<HOST_REGS;hr++)
9394 {
9395 if(hr!=EXCLUDE_REG) {
9396 if(regs[i].regmap[hr]>64) {
9397 if(!((regs[i].dirty>>hr)&1))
9398 f_regmap[hr]=regs[i].regmap[hr];
9399 }
b372a952 9400 else if(regs[i].regmap[hr]>=0) {
9401 if(f_regmap[hr]!=regs[i].regmap[hr]) {
9402 // dealloc old register
9403 int n;
9404 for(n=0;n<HOST_REGS;n++)
9405 {
9406 if(f_regmap[n]==regs[i].regmap[hr]) {f_regmap[n]=-1;}
9407 }
9408 // and alloc new one
9409 f_regmap[hr]=regs[i].regmap[hr];
9410 }
9411 }
57871462 9412 }
9413 }
9414 // Try to restore cycle count at branch targets
9415 if(bt[i]) {
9416 for(j=i;j<slen-1;j++) {
9417 if(regs[j].regmap[HOST_CCREG]!=-1) break;
e1190b87 9418 if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j]) {
9419 //printf("no free regs for store %x\n",start+j*4);
9420 break;
57871462 9421 }
57871462 9422 }
9423 if(regs[j].regmap[HOST_CCREG]==CCREG) {
9424 int k=i;
9425 //printf("Extend CC, %x -> %x\n",start+k*4,start+j*4);
9426 while(k<j) {
9427 regs[k].regmap_entry[HOST_CCREG]=CCREG;
9428 regs[k].regmap[HOST_CCREG]=CCREG;
9429 regmap_pre[k+1][HOST_CCREG]=CCREG;
9430 regs[k+1].wasdirty|=1<<HOST_CCREG;
9431 regs[k].dirty|=1<<HOST_CCREG;
9432 regs[k].wasconst&=~(1<<HOST_CCREG);
9433 regs[k].isconst&=~(1<<HOST_CCREG);
9434 k++;
9435 }
9f51b4b9 9436 regs[j].regmap_entry[HOST_CCREG]=CCREG;
57871462 9437 }
9438 // Work backwards from the branch target
9439 if(j>i&&f_regmap[HOST_CCREG]==CCREG)
9440 {
9441 //printf("Extend backwards\n");
9442 int k;
9443 k=i;
9444 while(regs[k-1].regmap[HOST_CCREG]==-1) {
e1190b87 9445 if(count_free_regs(regs[k-1].regmap)<=minimum_free_regs[k-1]) {
9446 //printf("no free regs for store %x\n",start+(k-1)*4);
9447 break;
57871462 9448 }
57871462 9449 k--;
9450 }
9451 if(regs[k-1].regmap[HOST_CCREG]==CCREG) {
9452 //printf("Extend CC, %x ->\n",start+k*4);
9453 while(k<=i) {
9454 regs[k].regmap_entry[HOST_CCREG]=CCREG;
9455 regs[k].regmap[HOST_CCREG]=CCREG;
9456 regmap_pre[k+1][HOST_CCREG]=CCREG;
9457 regs[k+1].wasdirty|=1<<HOST_CCREG;
9458 regs[k].dirty|=1<<HOST_CCREG;
9459 regs[k].wasconst&=~(1<<HOST_CCREG);
9460 regs[k].isconst&=~(1<<HOST_CCREG);
9461 k++;
9462 }
9463 }
9464 else {
9465 //printf("Fail Extend CC, %x ->\n",start+k*4);
9466 }
9467 }
9468 }
9469 if(itype[i]!=STORE&&itype[i]!=STORELR&&itype[i]!=C1LS&&itype[i]!=SHIFT&&
9470 itype[i]!=NOP&&itype[i]!=MOV&&itype[i]!=ALU&&itype[i]!=SHIFTIMM&&
9471 itype[i]!=IMM16&&itype[i]!=LOAD&&itype[i]!=COP1&&itype[i]!=FLOAT&&
e1190b87 9472 itype[i]!=FCONV&&itype[i]!=FCOMP)
57871462 9473 {
9474 memcpy(f_regmap,regs[i].regmap,sizeof(f_regmap));
9475 }
9476 }
9477 }
9f51b4b9 9478
d61de97e 9479 // Cache memory offset or tlb map pointer if a register is available
9480 #ifndef HOST_IMM_ADDR32
9481 #ifndef RAM_OFFSET
1edfcc68 9482 if(0)
d61de97e 9483 #endif
9484 {
9485 int earliest_available[HOST_REGS];
9486 int loop_start[HOST_REGS];
9487 int score[HOST_REGS];
9488 int end[HOST_REGS];
1edfcc68 9489 int reg=ROREG;
d61de97e 9490
9491 // Init
9492 for(hr=0;hr<HOST_REGS;hr++) {
9493 score[hr]=0;earliest_available[hr]=0;
9494 loop_start[hr]=MAXBLOCK;
9495 }
9496 for(i=0;i<slen-1;i++)
9497 {
9498 // Can't do anything if no registers are available
9499 if(count_free_regs(regs[i].regmap)<=minimum_free_regs[i]) {
9500 for(hr=0;hr<HOST_REGS;hr++) {
9501 score[hr]=0;earliest_available[hr]=i+1;
9502 loop_start[hr]=MAXBLOCK;
9503 }
9504 }
9505 if(itype[i]==UJUMP||itype[i]==RJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) {
9506 if(!ooo[i]) {
9507 if(count_free_regs(branch_regs[i].regmap)<=minimum_free_regs[i+1]) {
9508 for(hr=0;hr<HOST_REGS;hr++) {
9509 score[hr]=0;earliest_available[hr]=i+1;
9510 loop_start[hr]=MAXBLOCK;
9511 }
9512 }
198df76f 9513 }else{
9514 if(count_free_regs(regs[i].regmap)<=minimum_free_regs[i+1]) {
9515 for(hr=0;hr<HOST_REGS;hr++) {
9516 score[hr]=0;earliest_available[hr]=i+1;
9517 loop_start[hr]=MAXBLOCK;
9518 }
9519 }
d61de97e 9520 }
9521 }
9522 // Mark unavailable registers
9523 for(hr=0;hr<HOST_REGS;hr++) {
9524 if(regs[i].regmap[hr]>=0) {
9525 score[hr]=0;earliest_available[hr]=i+1;
9526 loop_start[hr]=MAXBLOCK;
9527 }
9528 if(itype[i]==UJUMP||itype[i]==RJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) {
9529 if(branch_regs[i].regmap[hr]>=0) {
9530 score[hr]=0;earliest_available[hr]=i+2;
9531 loop_start[hr]=MAXBLOCK;
9532 }
9533 }
9534 }
9535 // No register allocations after unconditional jumps
9536 if(itype[i]==UJUMP||itype[i]==RJUMP||(source[i]>>16)==0x1000)
9537 {
9538 for(hr=0;hr<HOST_REGS;hr++) {
9539 score[hr]=0;earliest_available[hr]=i+2;
9540 loop_start[hr]=MAXBLOCK;
9541 }
9542 i++; // Skip delay slot too
9543 //printf("skip delay slot: %x\n",start+i*4);
9544 }
9545 else
9546 // Possible match
9547 if(itype[i]==LOAD||itype[i]==LOADLR||
9548 itype[i]==STORE||itype[i]==STORELR||itype[i]==C1LS) {
9549 for(hr=0;hr<HOST_REGS;hr++) {
9550 if(hr!=EXCLUDE_REG) {
9551 end[hr]=i-1;
9552 for(j=i;j<slen-1;j++) {
9553 if(regs[j].regmap[hr]>=0) break;
9554 if(itype[j]==UJUMP||itype[j]==RJUMP||itype[j]==CJUMP||itype[j]==SJUMP||itype[j]==FJUMP) {
9555 if(branch_regs[j].regmap[hr]>=0) break;
9556 if(ooo[j]) {
9557 if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j+1]) break;
9558 }else{
9559 if(count_free_regs(branch_regs[j].regmap)<=minimum_free_regs[j+1]) break;
9560 }
9561 }
9562 else if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j]) break;
9563 if(itype[j]==UJUMP||itype[j]==RJUMP||itype[j]==CJUMP||itype[j]==SJUMP||itype[j]==FJUMP) {
9564 int t=(ba[j]-start)>>2;
9565 if(t<j&&t>=earliest_available[hr]) {
198df76f 9566 if(t==1||(t>1&&itype[t-2]!=UJUMP&&itype[t-2]!=RJUMP)||(t>1&&rt1[t-2]!=31)) { // call/ret assumes no registers allocated
9567 // Score a point for hoisting loop invariant
9568 if(t<loop_start[hr]) loop_start[hr]=t;
9569 //printf("set loop_start: i=%x j=%x (%x)\n",start+i*4,start+j*4,start+t*4);
9570 score[hr]++;
9571 end[hr]=j;
9572 }
d61de97e 9573 }
9574 else if(t<j) {
9575 if(regs[t].regmap[hr]==reg) {
9576 // Score a point if the branch target matches this register
9577 score[hr]++;
9578 end[hr]=j;
9579 }
9580 }
9581 if(itype[j+1]==LOAD||itype[j+1]==LOADLR||
9582 itype[j+1]==STORE||itype[j+1]==STORELR||itype[j+1]==C1LS) {
9583 score[hr]++;
9584 end[hr]=j;
9585 }
9586 }
9587 if(itype[j]==UJUMP||itype[j]==RJUMP||(source[j]>>16)==0x1000)
9588 {
9589 // Stop on unconditional branch
9590 break;
9591 }
9592 else
9593 if(itype[j]==LOAD||itype[j]==LOADLR||
9594 itype[j]==STORE||itype[j]==STORELR||itype[j]==C1LS) {
9595 score[hr]++;
9596 end[hr]=j;
9597 }
9598 }
9599 }
9600 }
9601 // Find highest score and allocate that register
9602 int maxscore=0;
9603 for(hr=0;hr<HOST_REGS;hr++) {
9604 if(hr!=EXCLUDE_REG) {
9605 if(score[hr]>score[maxscore]) {
9606 maxscore=hr;
9607 //printf("highest score: %d %d (%x->%x)\n",score[hr],hr,start+i*4,start+end[hr]*4);
9608 }
9609 }
9610 }
9611 if(score[maxscore]>1)
9612 {
9613 if(i<loop_start[maxscore]) loop_start[maxscore]=i;
9614 for(j=loop_start[maxscore];j<slen&&j<=end[maxscore];j++) {
9615 //if(regs[j].regmap[maxscore]>=0) {printf("oops: %x %x was %d=%d\n",loop_start[maxscore]*4+start,j*4+start,maxscore,regs[j].regmap[maxscore]);}
9616 assert(regs[j].regmap[maxscore]<0);
9617 if(j>loop_start[maxscore]) regs[j].regmap_entry[maxscore]=reg;
9618 regs[j].regmap[maxscore]=reg;
9619 regs[j].dirty&=~(1<<maxscore);
9620 regs[j].wasconst&=~(1<<maxscore);
9621 regs[j].isconst&=~(1<<maxscore);
9622 if(itype[j]==UJUMP||itype[j]==RJUMP||itype[j]==CJUMP||itype[j]==SJUMP||itype[j]==FJUMP) {
9623 branch_regs[j].regmap[maxscore]=reg;
9624 branch_regs[j].wasdirty&=~(1<<maxscore);
9625 branch_regs[j].dirty&=~(1<<maxscore);
9626 branch_regs[j].wasconst&=~(1<<maxscore);
9627 branch_regs[j].isconst&=~(1<<maxscore);
9628 if(itype[j]!=RJUMP&&itype[j]!=UJUMP&&(source[j]>>16)!=0x1000) {
9629 regmap_pre[j+2][maxscore]=reg;
9630 regs[j+2].wasdirty&=~(1<<maxscore);
9631 }
9632 // loop optimization (loop_preload)
9633 int t=(ba[j]-start)>>2;
198df76f 9634 if(t==loop_start[maxscore]) {
9635 if(t==1||(t>1&&itype[t-2]!=UJUMP&&itype[t-2]!=RJUMP)||(t>1&&rt1[t-2]!=31)) // call/ret assumes no registers allocated
9636 regs[t].regmap_entry[maxscore]=reg;
9637 }
d61de97e 9638 }
9639 else
9640 {
9641 if(j<1||(itype[j-1]!=RJUMP&&itype[j-1]!=UJUMP&&itype[j-1]!=CJUMP&&itype[j-1]!=SJUMP&&itype[j-1]!=FJUMP)) {
9642 regmap_pre[j+1][maxscore]=reg;
9643 regs[j+1].wasdirty&=~(1<<maxscore);
9644 }
9645 }
9646 }
9647 i=j-1;
9648 if(itype[j-1]==RJUMP||itype[j-1]==UJUMP||itype[j-1]==CJUMP||itype[j-1]==SJUMP||itype[j-1]==FJUMP) i++; // skip delay slot
9649 for(hr=0;hr<HOST_REGS;hr++) {
9650 score[hr]=0;earliest_available[hr]=i+i;
9651 loop_start[hr]=MAXBLOCK;
9652 }
9653 }
9654 }
9655 }
9656 }
9657 #endif
9f51b4b9 9658
57871462 9659 // This allocates registers (if possible) one instruction prior
9660 // to use, which can avoid a load-use penalty on certain CPUs.
9661 for(i=0;i<slen-1;i++)
9662 {
9663 if(!i||(itype[i-1]!=UJUMP&&itype[i-1]!=CJUMP&&itype[i-1]!=SJUMP&&itype[i-1]!=RJUMP&&itype[i-1]!=FJUMP))
9664 {
9665 if(!bt[i+1])
9666 {
b9b61529 9667 if(itype[i]==ALU||itype[i]==MOV||itype[i]==LOAD||itype[i]==SHIFTIMM||itype[i]==IMM16
9668 ||((itype[i]==COP1||itype[i]==COP2)&&opcode2[i]<3))
57871462 9669 {
9670 if(rs1[i+1]) {
9671 if((hr=get_reg(regs[i+1].regmap,rs1[i+1]))>=0)
9672 {
9673 if(regs[i].regmap[hr]<0&&regs[i+1].regmap_entry[hr]<0)
9674 {
9675 regs[i].regmap[hr]=regs[i+1].regmap[hr];
9676 regmap_pre[i+1][hr]=regs[i+1].regmap[hr];
9677 regs[i+1].regmap_entry[hr]=regs[i+1].regmap[hr];
9678 regs[i].isconst&=~(1<<hr);
9679 regs[i].isconst|=regs[i+1].isconst&(1<<hr);
9680 constmap[i][hr]=constmap[i+1][hr];
9681 regs[i+1].wasdirty&=~(1<<hr);
9682 regs[i].dirty&=~(1<<hr);
9683 }
9684 }
9685 }
9686 if(rs2[i+1]) {
9687 if((hr=get_reg(regs[i+1].regmap,rs2[i+1]))>=0)
9688 {
9689 if(regs[i].regmap[hr]<0&&regs[i+1].regmap_entry[hr]<0)
9690 {
9691 regs[i].regmap[hr]=regs[i+1].regmap[hr];
9692 regmap_pre[i+1][hr]=regs[i+1].regmap[hr];
9693 regs[i+1].regmap_entry[hr]=regs[i+1].regmap[hr];
9694 regs[i].isconst&=~(1<<hr);
9695 regs[i].isconst|=regs[i+1].isconst&(1<<hr);
9696 constmap[i][hr]=constmap[i+1][hr];
9697 regs[i+1].wasdirty&=~(1<<hr);
9698 regs[i].dirty&=~(1<<hr);
9699 }
9700 }
9701 }
198df76f 9702 // Preload target address for load instruction (non-constant)
57871462 9703 if(itype[i+1]==LOAD&&rs1[i+1]&&get_reg(regs[i+1].regmap,rs1[i+1])<0) {
9704 if((hr=get_reg(regs[i+1].regmap,rt1[i+1]))>=0)
9705 {
9706 if(regs[i].regmap[hr]<0&&regs[i+1].regmap_entry[hr]<0)
9707 {
9708 regs[i].regmap[hr]=rs1[i+1];
9709 regmap_pre[i+1][hr]=rs1[i+1];
9710 regs[i+1].regmap_entry[hr]=rs1[i+1];
9711 regs[i].isconst&=~(1<<hr);
9712 regs[i].isconst|=regs[i+1].isconst&(1<<hr);
9713 constmap[i][hr]=constmap[i+1][hr];
9714 regs[i+1].wasdirty&=~(1<<hr);
9715 regs[i].dirty&=~(1<<hr);
9716 }
9717 }
9718 }
9f51b4b9 9719 // Load source into target register
57871462 9720 if(lt1[i+1]&&get_reg(regs[i+1].regmap,rs1[i+1])<0) {
9721 if((hr=get_reg(regs[i+1].regmap,rt1[i+1]))>=0)
9722 {
9723 if(regs[i].regmap[hr]<0&&regs[i+1].regmap_entry[hr]<0)
9724 {
9725 regs[i].regmap[hr]=rs1[i+1];
9726 regmap_pre[i+1][hr]=rs1[i+1];
9727 regs[i+1].regmap_entry[hr]=rs1[i+1];
9728 regs[i].isconst&=~(1<<hr);
9729 regs[i].isconst|=regs[i+1].isconst&(1<<hr);
9730 constmap[i][hr]=constmap[i+1][hr];
9731 regs[i+1].wasdirty&=~(1<<hr);
9732 regs[i].dirty&=~(1<<hr);
9733 }
9734 }
9735 }
198df76f 9736 // Address for store instruction (non-constant)
b9b61529 9737 if(itype[i+1]==STORE||itype[i+1]==STORELR
9738 ||(opcode[i+1]&0x3b)==0x39||(opcode[i+1]&0x3b)==0x3a) { // SB/SH/SW/SD/SWC1/SDC1/SWC2/SDC2
57871462 9739 if(get_reg(regs[i+1].regmap,rs1[i+1])<0) {
9740 hr=get_reg2(regs[i].regmap,regs[i+1].regmap,-1);
9741 if(hr<0) hr=get_reg(regs[i+1].regmap,-1);
9742 else {regs[i+1].regmap[hr]=AGEN1+((i+1)&1);regs[i+1].isconst&=~(1<<hr);}
9743 assert(hr>=0);
9744 if(regs[i].regmap[hr]<0&&regs[i+1].regmap_entry[hr]<0)
9745 {
9746 regs[i].regmap[hr]=rs1[i+1];
9747 regmap_pre[i+1][hr]=rs1[i+1];
9748 regs[i+1].regmap_entry[hr]=rs1[i+1];
9749 regs[i].isconst&=~(1<<hr);
9750 regs[i].isconst|=regs[i+1].isconst&(1<<hr);
9751 constmap[i][hr]=constmap[i+1][hr];
9752 regs[i+1].wasdirty&=~(1<<hr);
9753 regs[i].dirty&=~(1<<hr);
9754 }
9755 }
9756 }
b9b61529 9757 if(itype[i+1]==LOADLR||(opcode[i+1]&0x3b)==0x31||(opcode[i+1]&0x3b)==0x32) { // LWC1/LDC1, LWC2/LDC2
57871462 9758 if(get_reg(regs[i+1].regmap,rs1[i+1])<0) {
9759 int nr;
9760 hr=get_reg(regs[i+1].regmap,FTEMP);
9761 assert(hr>=0);
9762 if(regs[i].regmap[hr]<0&&regs[i+1].regmap_entry[hr]<0)
9763 {
9764 regs[i].regmap[hr]=rs1[i+1];
9765 regmap_pre[i+1][hr]=rs1[i+1];
9766 regs[i+1].regmap_entry[hr]=rs1[i+1];
9767 regs[i].isconst&=~(1<<hr);
9768 regs[i].isconst|=regs[i+1].isconst&(1<<hr);
9769 constmap[i][hr]=constmap[i+1][hr];
9770 regs[i+1].wasdirty&=~(1<<hr);
9771 regs[i].dirty&=~(1<<hr);
9772 }
9773 else if((nr=get_reg2(regs[i].regmap,regs[i+1].regmap,-1))>=0)
9774 {
9775 // move it to another register
9776 regs[i+1].regmap[hr]=-1;
9777 regmap_pre[i+2][hr]=-1;
9778 regs[i+1].regmap[nr]=FTEMP;
9779 regmap_pre[i+2][nr]=FTEMP;
9780 regs[i].regmap[nr]=rs1[i+1];
9781 regmap_pre[i+1][nr]=rs1[i+1];
9782 regs[i+1].regmap_entry[nr]=rs1[i+1];
9783 regs[i].isconst&=~(1<<nr);
9784 regs[i+1].isconst&=~(1<<nr);
9785 regs[i].dirty&=~(1<<nr);
9786 regs[i+1].wasdirty&=~(1<<nr);
9787 regs[i+1].dirty&=~(1<<nr);
9788 regs[i+2].wasdirty&=~(1<<nr);
9789 }
9790 }
9791 }
b9b61529 9792 if(itype[i+1]==LOAD||itype[i+1]==LOADLR||itype[i+1]==STORE||itype[i+1]==STORELR/*||itype[i+1]==C1LS||||itype[i+1]==C2LS*/) {
9f51b4b9 9793 if(itype[i+1]==LOAD)
57871462 9794 hr=get_reg(regs[i+1].regmap,rt1[i+1]);
b9b61529 9795 if(itype[i+1]==LOADLR||(opcode[i+1]&0x3b)==0x31||(opcode[i+1]&0x3b)==0x32) // LWC1/LDC1, LWC2/LDC2
57871462 9796 hr=get_reg(regs[i+1].regmap,FTEMP);
b9b61529 9797 if(itype[i+1]==STORE||itype[i+1]==STORELR||(opcode[i+1]&0x3b)==0x39||(opcode[i+1]&0x3b)==0x3a) { // SWC1/SDC1/SWC2/SDC2
57871462 9798 hr=get_reg(regs[i+1].regmap,AGEN1+((i+1)&1));
9799 if(hr<0) hr=get_reg(regs[i+1].regmap,-1);
9800 }
9801 if(hr>=0&&regs[i].regmap[hr]<0) {
9802 int rs=get_reg(regs[i+1].regmap,rs1[i+1]);
9803 if(rs>=0&&((regs[i+1].wasconst>>rs)&1)) {
9804 regs[i].regmap[hr]=AGEN1+((i+1)&1);
9805 regmap_pre[i+1][hr]=AGEN1+((i+1)&1);
9806 regs[i+1].regmap_entry[hr]=AGEN1+((i+1)&1);
9807 regs[i].isconst&=~(1<<hr);
9808 regs[i+1].wasdirty&=~(1<<hr);
9809 regs[i].dirty&=~(1<<hr);
9810 }
9811 }
9812 }
9813 }
9814 }
9815 }
9816 }
9f51b4b9 9817
57871462 9818 /* Pass 6 - Optimize clean/dirty state */
9819 clean_registers(0,slen-1,1);
9f51b4b9 9820
57871462 9821 /* Pass 7 - Identify 32-bit registers */
04fd948a 9822 for (i=slen-1;i>=0;i--)
9823 {
9824 if(itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
9825 {
9826 // Conditional branch
9827 if((source[i]>>16)!=0x1000&&i<slen-2) {
9828 // Mark this address as a branch target since it may be called
9829 // upon return from interrupt
9830 bt[i+2]=1;
9831 }
9832 }
9833 }
57871462 9834
9835 if(itype[slen-1]==SPAN) {
9836 bt[slen-1]=1; // Mark as a branch target so instruction can restart after exception
9837 }
4600ba03 9838
9839#ifdef DISASM
57871462 9840 /* Debug/disassembly */
57871462 9841 for(i=0;i<slen;i++)
9842 {
9843 printf("U:");
9844 int r;
9845 for(r=1;r<=CCREG;r++) {
9846 if((unneeded_reg[i]>>r)&1) {
9847 if(r==HIREG) printf(" HI");
9848 else if(r==LOREG) printf(" LO");
9849 else printf(" r%d",r);
9850 }
9851 }
57871462 9852 printf("\n");
9853 #if defined(__i386__) || defined(__x86_64__)
9854 printf("pre: eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",regmap_pre[i][0],regmap_pre[i][1],regmap_pre[i][2],regmap_pre[i][3],regmap_pre[i][5],regmap_pre[i][6],regmap_pre[i][7]);
9855 #endif
9856 #ifdef __arm__
9857 printf("pre: r0=%d r1=%d r2=%d r3=%d r4=%d r5=%d r6=%d r7=%d r8=%d r9=%d r10=%d r12=%d\n",regmap_pre[i][0],regmap_pre[i][1],regmap_pre[i][2],regmap_pre[i][3],regmap_pre[i][4],regmap_pre[i][5],regmap_pre[i][6],regmap_pre[i][7],regmap_pre[i][8],regmap_pre[i][9],regmap_pre[i][10],regmap_pre[i][12]);
9858 #endif
9859 printf("needs: ");
9860 if(needed_reg[i]&1) printf("eax ");
9861 if((needed_reg[i]>>1)&1) printf("ecx ");
9862 if((needed_reg[i]>>2)&1) printf("edx ");
9863 if((needed_reg[i]>>3)&1) printf("ebx ");
9864 if((needed_reg[i]>>5)&1) printf("ebp ");
9865 if((needed_reg[i]>>6)&1) printf("esi ");
9866 if((needed_reg[i]>>7)&1) printf("edi ");
57871462 9867 printf("\n");
57871462 9868 #if defined(__i386__) || defined(__x86_64__)
9869 printf("entry: eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",regs[i].regmap_entry[0],regs[i].regmap_entry[1],regs[i].regmap_entry[2],regs[i].regmap_entry[3],regs[i].regmap_entry[5],regs[i].regmap_entry[6],regs[i].regmap_entry[7]);
9870 printf("dirty: ");
9871 if(regs[i].wasdirty&1) printf("eax ");
9872 if((regs[i].wasdirty>>1)&1) printf("ecx ");
9873 if((regs[i].wasdirty>>2)&1) printf("edx ");
9874 if((regs[i].wasdirty>>3)&1) printf("ebx ");
9875 if((regs[i].wasdirty>>5)&1) printf("ebp ");
9876 if((regs[i].wasdirty>>6)&1) printf("esi ");
9877 if((regs[i].wasdirty>>7)&1) printf("edi ");
9878 #endif
9879 #ifdef __arm__
9880 printf("entry: r0=%d r1=%d r2=%d r3=%d r4=%d r5=%d r6=%d r7=%d r8=%d r9=%d r10=%d r12=%d\n",regs[i].regmap_entry[0],regs[i].regmap_entry[1],regs[i].regmap_entry[2],regs[i].regmap_entry[3],regs[i].regmap_entry[4],regs[i].regmap_entry[5],regs[i].regmap_entry[6],regs[i].regmap_entry[7],regs[i].regmap_entry[8],regs[i].regmap_entry[9],regs[i].regmap_entry[10],regs[i].regmap_entry[12]);
9881 printf("dirty: ");
9882 if(regs[i].wasdirty&1) printf("r0 ");
9883 if((regs[i].wasdirty>>1)&1) printf("r1 ");
9884 if((regs[i].wasdirty>>2)&1) printf("r2 ");
9885 if((regs[i].wasdirty>>3)&1) printf("r3 ");
9886 if((regs[i].wasdirty>>4)&1) printf("r4 ");
9887 if((regs[i].wasdirty>>5)&1) printf("r5 ");
9888 if((regs[i].wasdirty>>6)&1) printf("r6 ");
9889 if((regs[i].wasdirty>>7)&1) printf("r7 ");
9890 if((regs[i].wasdirty>>8)&1) printf("r8 ");
9891 if((regs[i].wasdirty>>9)&1) printf("r9 ");
9892 if((regs[i].wasdirty>>10)&1) printf("r10 ");
9893 if((regs[i].wasdirty>>12)&1) printf("r12 ");
9894 #endif
9895 printf("\n");
9896 disassemble_inst(i);
9897 //printf ("ccadj[%d] = %d\n",i,ccadj[i]);
9898 #if defined(__i386__) || defined(__x86_64__)
9899 printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d dirty: ",regs[i].regmap[0],regs[i].regmap[1],regs[i].regmap[2],regs[i].regmap[3],regs[i].regmap[5],regs[i].regmap[6],regs[i].regmap[7]);
9900 if(regs[i].dirty&1) printf("eax ");
9901 if((regs[i].dirty>>1)&1) printf("ecx ");
9902 if((regs[i].dirty>>2)&1) printf("edx ");
9903 if((regs[i].dirty>>3)&1) printf("ebx ");
9904 if((regs[i].dirty>>5)&1) printf("ebp ");
9905 if((regs[i].dirty>>6)&1) printf("esi ");
9906 if((regs[i].dirty>>7)&1) printf("edi ");
9907 #endif
9908 #ifdef __arm__
9909 printf("r0=%d r1=%d r2=%d r3=%d r4=%d r5=%d r6=%d r7=%d r8=%d r9=%d r10=%d r12=%d dirty: ",regs[i].regmap[0],regs[i].regmap[1],regs[i].regmap[2],regs[i].regmap[3],regs[i].regmap[4],regs[i].regmap[5],regs[i].regmap[6],regs[i].regmap[7],regs[i].regmap[8],regs[i].regmap[9],regs[i].regmap[10],regs[i].regmap[12]);
9910 if(regs[i].dirty&1) printf("r0 ");
9911 if((regs[i].dirty>>1)&1) printf("r1 ");
9912 if((regs[i].dirty>>2)&1) printf("r2 ");
9913 if((regs[i].dirty>>3)&1) printf("r3 ");
9914 if((regs[i].dirty>>4)&1) printf("r4 ");
9915 if((regs[i].dirty>>5)&1) printf("r5 ");
9916 if((regs[i].dirty>>6)&1) printf("r6 ");
9917 if((regs[i].dirty>>7)&1) printf("r7 ");
9918 if((regs[i].dirty>>8)&1) printf("r8 ");
9919 if((regs[i].dirty>>9)&1) printf("r9 ");
9920 if((regs[i].dirty>>10)&1) printf("r10 ");
9921 if((regs[i].dirty>>12)&1) printf("r12 ");
9922 #endif
9923 printf("\n");
9924 if(regs[i].isconst) {
9925 printf("constants: ");
9926 #if defined(__i386__) || defined(__x86_64__)
9927 if(regs[i].isconst&1) printf("eax=%x ",(int)constmap[i][0]);
9928 if((regs[i].isconst>>1)&1) printf("ecx=%x ",(int)constmap[i][1]);
9929 if((regs[i].isconst>>2)&1) printf("edx=%x ",(int)constmap[i][2]);
9930 if((regs[i].isconst>>3)&1) printf("ebx=%x ",(int)constmap[i][3]);
9931 if((regs[i].isconst>>5)&1) printf("ebp=%x ",(int)constmap[i][5]);
9932 if((regs[i].isconst>>6)&1) printf("esi=%x ",(int)constmap[i][6]);
9933 if((regs[i].isconst>>7)&1) printf("edi=%x ",(int)constmap[i][7]);
9934 #endif
9935 #ifdef __arm__
9936 if(regs[i].isconst&1) printf("r0=%x ",(int)constmap[i][0]);
9937 if((regs[i].isconst>>1)&1) printf("r1=%x ",(int)constmap[i][1]);
9938 if((regs[i].isconst>>2)&1) printf("r2=%x ",(int)constmap[i][2]);
9939 if((regs[i].isconst>>3)&1) printf("r3=%x ",(int)constmap[i][3]);
9940 if((regs[i].isconst>>4)&1) printf("r4=%x ",(int)constmap[i][4]);
9941 if((regs[i].isconst>>5)&1) printf("r5=%x ",(int)constmap[i][5]);
9942 if((regs[i].isconst>>6)&1) printf("r6=%x ",(int)constmap[i][6]);
9943 if((regs[i].isconst>>7)&1) printf("r7=%x ",(int)constmap[i][7]);
9944 if((regs[i].isconst>>8)&1) printf("r8=%x ",(int)constmap[i][8]);
9945 if((regs[i].isconst>>9)&1) printf("r9=%x ",(int)constmap[i][9]);
9946 if((regs[i].isconst>>10)&1) printf("r10=%x ",(int)constmap[i][10]);
9947 if((regs[i].isconst>>12)&1) printf("r12=%x ",(int)constmap[i][12]);
9948 #endif
9949 printf("\n");
9950 }
57871462 9951 if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) {
9952 #if defined(__i386__) || defined(__x86_64__)
9953 printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d dirty: ",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
9954 if(branch_regs[i].dirty&1) printf("eax ");
9955 if((branch_regs[i].dirty>>1)&1) printf("ecx ");
9956 if((branch_regs[i].dirty>>2)&1) printf("edx ");
9957 if((branch_regs[i].dirty>>3)&1) printf("ebx ");
9958 if((branch_regs[i].dirty>>5)&1) printf("ebp ");
9959 if((branch_regs[i].dirty>>6)&1) printf("esi ");
9960 if((branch_regs[i].dirty>>7)&1) printf("edi ");
9961 #endif
9962 #ifdef __arm__
9963 printf("branch(%d): r0=%d r1=%d r2=%d r3=%d r4=%d r5=%d r6=%d r7=%d r8=%d r9=%d r10=%d r12=%d dirty: ",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[4],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7],branch_regs[i].regmap[8],branch_regs[i].regmap[9],branch_regs[i].regmap[10],branch_regs[i].regmap[12]);
9964 if(branch_regs[i].dirty&1) printf("r0 ");
9965 if((branch_regs[i].dirty>>1)&1) printf("r1 ");
9966 if((branch_regs[i].dirty>>2)&1) printf("r2 ");
9967 if((branch_regs[i].dirty>>3)&1) printf("r3 ");
9968 if((branch_regs[i].dirty>>4)&1) printf("r4 ");
9969 if((branch_regs[i].dirty>>5)&1) printf("r5 ");
9970 if((branch_regs[i].dirty>>6)&1) printf("r6 ");
9971 if((branch_regs[i].dirty>>7)&1) printf("r7 ");
9972 if((branch_regs[i].dirty>>8)&1) printf("r8 ");
9973 if((branch_regs[i].dirty>>9)&1) printf("r9 ");
9974 if((branch_regs[i].dirty>>10)&1) printf("r10 ");
9975 if((branch_regs[i].dirty>>12)&1) printf("r12 ");
9976 #endif
57871462 9977 }
9978 }
4600ba03 9979#endif // DISASM
57871462 9980
9981 /* Pass 8 - Assembly */
9982 linkcount=0;stubcount=0;
9983 ds=0;is_delayslot=0;
9984 cop1_usable=0;
9985 uint64_t is32_pre=0;
9986 u_int dirty_pre=0;
d148d265 9987 void *beginning=start_block();
57871462 9988 if((u_int)addr&1) {
9989 ds=1;
9990 pagespan_ds();
9991 }
9ad4d757 9992 u_int instr_addr0_override=0;
9993
9ad4d757 9994 if (start == 0x80030000) {
9995 // nasty hack for fastbios thing
96186eba 9996 // override block entry to this code
9ad4d757 9997 instr_addr0_override=(u_int)out;
9998 emit_movimm(start,0);
96186eba 9999 // abuse io address var as a flag that we
10000 // have already returned here once
10001 emit_readword((int)&address,1);
9ad4d757 10002 emit_writeword(0,(int)&pcaddr);
96186eba 10003 emit_writeword(0,(int)&address);
9ad4d757 10004 emit_cmp(0,1);
10005 emit_jne((int)new_dyna_leave);
10006 }
57871462 10007 for(i=0;i<slen;i++)
10008 {
10009 //if(ds) printf("ds: ");
4600ba03 10010 disassemble_inst(i);
57871462 10011 if(ds) {
10012 ds=0; // Skip delay slot
10013 if(bt[i]) assem_debug("OOPS - branch into delay slot\n");
10014 instr_addr[i]=0;
10015 } else {
ffb0b9e0 10016 speculate_register_values(i);
57871462 10017 #ifndef DESTRUCTIVE_WRITEBACK
10018 if(i<2||(itype[i-2]!=UJUMP&&itype[i-2]!=RJUMP&&(source[i-2]>>16)!=0x1000))
10019 {
57871462 10020 wb_valid(regmap_pre[i],regs[i].regmap_entry,dirty_pre,regs[i].wasdirty,is32_pre,
10021 unneeded_reg[i],unneeded_reg_upper[i]);
10022 }
f776eb14 10023 if((itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)&&!likely[i]) {
10024 is32_pre=branch_regs[i].is32;
10025 dirty_pre=branch_regs[i].dirty;
10026 }else{
10027 is32_pre=regs[i].is32;
10028 dirty_pre=regs[i].dirty;
10029 }
57871462 10030 #endif
10031 // write back
10032 if(i<2||(itype[i-2]!=UJUMP&&itype[i-2]!=RJUMP&&(source[i-2]>>16)!=0x1000))
10033 {
10034 wb_invalidate(regmap_pre[i],regs[i].regmap_entry,regs[i].wasdirty,regs[i].was32,
10035 unneeded_reg[i],unneeded_reg_upper[i]);
10036 loop_preload(regmap_pre[i],regs[i].regmap_entry);
10037 }
10038 // branch target entry point
10039 instr_addr[i]=(u_int)out;
10040 assem_debug("<->\n");
10041 // load regs
10042 if(regs[i].regmap_entry[HOST_CCREG]==CCREG&&regs[i].regmap[HOST_CCREG]!=CCREG)
10043 wb_register(CCREG,regs[i].regmap_entry,regs[i].wasdirty,regs[i].was32);
10044 load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,rs1[i],rs2[i]);
10045 address_generation(i,&regs[i],regs[i].regmap_entry);
10046 load_consts(regmap_pre[i],regs[i].regmap,regs[i].was32,i);
10047 if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
10048 {
10049 // Load the delay slot registers if necessary
4ef8f67d 10050 if(rs1[i+1]!=rs1[i]&&rs1[i+1]!=rs2[i]&&(rs1[i+1]!=rt1[i]||rt1[i]==0))
57871462 10051 load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,rs1[i+1],rs1[i+1]);
4ef8f67d 10052 if(rs2[i+1]!=rs1[i+1]&&rs2[i+1]!=rs1[i]&&rs2[i+1]!=rs2[i]&&(rs2[i+1]!=rt1[i]||rt1[i]==0))
57871462 10053 load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,rs2[i+1],rs2[i+1]);
b9b61529 10054 if(itype[i+1]==STORE||itype[i+1]==STORELR||(opcode[i+1]&0x3b)==0x39||(opcode[i+1]&0x3b)==0x3a)
57871462 10055 load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,INVCP,INVCP);
10056 }
10057 else if(i+1<slen)
10058 {
10059 // Preload registers for following instruction
10060 if(rs1[i+1]!=rs1[i]&&rs1[i+1]!=rs2[i])
10061 if(rs1[i+1]!=rt1[i]&&rs1[i+1]!=rt2[i])
10062 load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,rs1[i+1],rs1[i+1]);
10063 if(rs2[i+1]!=rs1[i+1]&&rs2[i+1]!=rs1[i]&&rs2[i+1]!=rs2[i])
10064 if(rs2[i+1]!=rt1[i]&&rs2[i+1]!=rt2[i])
10065 load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,rs2[i+1],rs2[i+1]);
10066 }
10067 // TODO: if(is_ooo(i)) address_generation(i+1);
10068 if(itype[i]==CJUMP||itype[i]==FJUMP)
10069 load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,CCREG,CCREG);
b9b61529 10070 if(itype[i]==STORE||itype[i]==STORELR||(opcode[i]&0x3b)==0x39||(opcode[i]&0x3b)==0x3a)
57871462 10071 load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,INVCP,INVCP);
10072 if(bt[i]) cop1_usable=0;
10073 // assemble
10074 switch(itype[i]) {
10075 case ALU:
10076 alu_assemble(i,&regs[i]);break;
10077 case IMM16:
10078 imm16_assemble(i,&regs[i]);break;
10079 case SHIFT:
10080 shift_assemble(i,&regs[i]);break;
10081 case SHIFTIMM:
10082 shiftimm_assemble(i,&regs[i]);break;
10083 case LOAD:
10084 load_assemble(i,&regs[i]);break;
10085 case LOADLR:
10086 loadlr_assemble(i,&regs[i]);break;
10087 case STORE:
10088 store_assemble(i,&regs[i]);break;
10089 case STORELR:
10090 storelr_assemble(i,&regs[i]);break;
10091 case COP0:
10092 cop0_assemble(i,&regs[i]);break;
10093 case COP1:
10094 cop1_assemble(i,&regs[i]);break;
10095 case C1LS:
10096 c1ls_assemble(i,&regs[i]);break;
b9b61529 10097 case COP2:
10098 cop2_assemble(i,&regs[i]);break;
10099 case C2LS:
10100 c2ls_assemble(i,&regs[i]);break;
10101 case C2OP:
10102 c2op_assemble(i,&regs[i]);break;
57871462 10103 case FCONV:
10104 fconv_assemble(i,&regs[i]);break;
10105 case FLOAT:
10106 float_assemble(i,&regs[i]);break;
10107 case FCOMP:
10108 fcomp_assemble(i,&regs[i]);break;
10109 case MULTDIV:
10110 multdiv_assemble(i,&regs[i]);break;
10111 case MOV:
10112 mov_assemble(i,&regs[i]);break;
10113 case SYSCALL:
10114 syscall_assemble(i,&regs[i]);break;
7139f3c8 10115 case HLECALL:
10116 hlecall_assemble(i,&regs[i]);break;
1e973cb0 10117 case INTCALL:
10118 intcall_assemble(i,&regs[i]);break;
57871462 10119 case UJUMP:
10120 ujump_assemble(i,&regs[i]);ds=1;break;
10121 case RJUMP:
10122 rjump_assemble(i,&regs[i]);ds=1;break;
10123 case CJUMP:
10124 cjump_assemble(i,&regs[i]);ds=1;break;
10125 case SJUMP:
10126 sjump_assemble(i,&regs[i]);ds=1;break;
10127 case FJUMP:
10128 fjump_assemble(i,&regs[i]);ds=1;break;
10129 case SPAN:
10130 pagespan_assemble(i,&regs[i]);break;
10131 }
10132 if(itype[i]==UJUMP||itype[i]==RJUMP||(source[i]>>16)==0x1000)
10133 literal_pool(1024);
10134 else
10135 literal_pool_jumpover(256);
10136 }
10137 }
10138 //assert(itype[i-2]==UJUMP||itype[i-2]==RJUMP||(source[i-2]>>16)==0x1000);
10139 // If the block did not end with an unconditional branch,
10140 // add a jump to the next instruction.
10141 if(i>1) {
10142 if(itype[i-2]!=UJUMP&&itype[i-2]!=RJUMP&&(source[i-2]>>16)!=0x1000&&itype[i-1]!=SPAN) {
10143 assert(itype[i-1]!=UJUMP&&itype[i-1]!=CJUMP&&itype[i-1]!=SJUMP&&itype[i-1]!=RJUMP&&itype[i-1]!=FJUMP);
10144 assert(i==slen);
10145 if(itype[i-2]!=CJUMP&&itype[i-2]!=SJUMP&&itype[i-2]!=FJUMP) {
10146 store_regs_bt(regs[i-1].regmap,regs[i-1].is32,regs[i-1].dirty,start+i*4);
10147 if(regs[i-1].regmap[HOST_CCREG]!=CCREG)
10148 emit_loadreg(CCREG,HOST_CCREG);
2573466a 10149 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i-1]+1),HOST_CCREG);
57871462 10150 }
10151 else if(!likely[i-2])
10152 {
10153 store_regs_bt(branch_regs[i-2].regmap,branch_regs[i-2].is32,branch_regs[i-2].dirty,start+i*4);
10154 assert(branch_regs[i-2].regmap[HOST_CCREG]==CCREG);
10155 }
10156 else
10157 {
10158 store_regs_bt(regs[i-2].regmap,regs[i-2].is32,regs[i-2].dirty,start+i*4);
10159 assert(regs[i-2].regmap[HOST_CCREG]==CCREG);
10160 }
10161 add_to_linker((int)out,start+i*4,0);
10162 emit_jmp(0);
10163 }
10164 }
10165 else
10166 {
10167 assert(i>0);
10168 assert(itype[i-1]!=UJUMP&&itype[i-1]!=CJUMP&&itype[i-1]!=SJUMP&&itype[i-1]!=RJUMP&&itype[i-1]!=FJUMP);
10169 store_regs_bt(regs[i-1].regmap,regs[i-1].is32,regs[i-1].dirty,start+i*4);
10170 if(regs[i-1].regmap[HOST_CCREG]!=CCREG)
10171 emit_loadreg(CCREG,HOST_CCREG);
2573466a 10172 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i-1]+1),HOST_CCREG);
57871462 10173 add_to_linker((int)out,start+i*4,0);
10174 emit_jmp(0);
10175 }
10176
10177 // TODO: delay slot stubs?
10178 // Stubs
10179 for(i=0;i<stubcount;i++)
10180 {
10181 switch(stubs[i][0])
10182 {
10183 case LOADB_STUB:
10184 case LOADH_STUB:
10185 case LOADW_STUB:
10186 case LOADD_STUB:
10187 case LOADBU_STUB:
10188 case LOADHU_STUB:
10189 do_readstub(i);break;
10190 case STOREB_STUB:
10191 case STOREH_STUB:
10192 case STOREW_STUB:
10193 case STORED_STUB:
10194 do_writestub(i);break;
10195 case CC_STUB:
10196 do_ccstub(i);break;
10197 case INVCODE_STUB:
10198 do_invstub(i);break;
10199 case FP_STUB:
10200 do_cop1stub(i);break;
10201 case STORELR_STUB:
10202 do_unalignedwritestub(i);break;
10203 }
10204 }
10205
9ad4d757 10206 if (instr_addr0_override)
10207 instr_addr[0] = instr_addr0_override;
10208
57871462 10209 /* Pass 9 - Linker */
10210 for(i=0;i<linkcount;i++)
10211 {
10212 assem_debug("%8x -> %8x\n",link_addr[i][0],link_addr[i][1]);
10213 literal_pool(64);
10214 if(!link_addr[i][2])
10215 {
10216 void *stub=out;
10217 void *addr=check_addr(link_addr[i][1]);
10218 emit_extjump(link_addr[i][0],link_addr[i][1]);
10219 if(addr) {
10220 set_jump_target(link_addr[i][0],(int)addr);
10221 add_link(link_addr[i][1],stub);
10222 }
10223 else set_jump_target(link_addr[i][0],(int)stub);
10224 }
10225 else
10226 {
10227 // Internal branch
10228 int target=(link_addr[i][1]-start)>>2;
10229 assert(target>=0&&target<slen);
10230 assert(instr_addr[target]);
10231 //#ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
10232 //set_jump_target_fillslot(link_addr[i][0],instr_addr[target],link_addr[i][2]>>1);
10233 //#else
10234 set_jump_target(link_addr[i][0],instr_addr[target]);
10235 //#endif
10236 }
10237 }
10238 // External Branch Targets (jump_in)
10239 if(copy+slen*4>(void *)shadow+sizeof(shadow)) copy=shadow;
10240 for(i=0;i<slen;i++)
10241 {
10242 if(bt[i]||i==0)
10243 {
10244 if(instr_addr[i]) // TODO - delay slots (=null)
10245 {
10246 u_int vaddr=start+i*4;
94d23bb9 10247 u_int page=get_page(vaddr);
10248 u_int vpage=get_vpage(vaddr);
57871462 10249 literal_pool(256);
57871462 10250 {
10251 assem_debug("%8x (%d) <- %8x\n",instr_addr[i],i,start+i*4);
10252 assem_debug("jump_in: %x\n",start+i*4);
10253 ll_add(jump_dirty+vpage,vaddr,(void *)out);
10254 int entry_point=do_dirty_stub(i);
03f55e6b 10255 ll_add_flags(jump_in+page,vaddr,state_rflags,(void *)entry_point);
57871462 10256 // If there was an existing entry in the hash table,
10257 // replace it with the new address.
10258 // Don't add new entries. We'll insert the
10259 // ones that actually get used in check_addr().
581335b0 10260 u_int *ht_bin=hash_table[((vaddr>>16)^vaddr)&0xFFFF];
57871462 10261 if(ht_bin[0]==vaddr) {
10262 ht_bin[1]=entry_point;
10263 }
10264 if(ht_bin[2]==vaddr) {
10265 ht_bin[3]=entry_point;
10266 }
10267 }
57871462 10268 }
10269 }
10270 }
10271 // Write out the literal pool if necessary
10272 literal_pool(0);
10273 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
10274 // Align code
10275 if(((u_int)out)&7) emit_addnop(13);
10276 #endif
d148d265 10277 assert((u_int)out-(u_int)beginning<MAX_OUTPUT_BLOCK_SIZE);
57871462 10278 //printf("shadow buffer: %x-%x\n",(int)copy,(int)copy+slen*4);
10279 memcpy(copy,source,slen*4);
10280 copy+=slen*4;
9f51b4b9 10281
d148d265 10282 end_block(beginning);
9f51b4b9 10283
57871462 10284 // If we're within 256K of the end of the buffer,
10285 // start over from the beginning. (Is 256K enough?)
bdeade46 10286 if((u_int)out>(u_int)BASE_ADDR+(1<<TARGET_SIZE_2)-MAX_OUTPUT_BLOCK_SIZE) out=(u_char *)BASE_ADDR;
9f51b4b9 10287
57871462 10288 // Trap writes to any of the pages we compiled
10289 for(i=start>>12;i<=(start+slen*4)>>12;i++) {
10290 invalid_code[i]=0;
57871462 10291 }
9be4ba64 10292 inv_code_start=inv_code_end=~0;
71e490c5 10293
b96d3df7 10294 // for PCSX we need to mark all mirrors too
b12c9fb8 10295 if(get_page(start)<(RAM_SIZE>>12))
10296 for(i=start>>12;i<=(start+slen*4)>>12;i++)
b96d3df7 10297 invalid_code[((u_int)0x00000000>>12)|(i&0x1ff)]=
10298 invalid_code[((u_int)0x80000000>>12)|(i&0x1ff)]=
10299 invalid_code[((u_int)0xa0000000>>12)|(i&0x1ff)]=0;
9f51b4b9 10300
57871462 10301 /* Pass 10 - Free memory by expiring oldest blocks */
9f51b4b9 10302
bdeade46 10303 int end=((((int)out-(int)BASE_ADDR)>>(TARGET_SIZE_2-16))+16384)&65535;
57871462 10304 while(expirep!=end)
10305 {
10306 int shift=TARGET_SIZE_2-3; // Divide into 8 blocks
bdeade46 10307 int base=(int)BASE_ADDR+((expirep>>13)<<shift); // Base address of this block
57871462 10308 inv_debug("EXP: Phase %d\n",expirep);
10309 switch((expirep>>11)&3)
10310 {
10311 case 0:
10312 // Clear jump_in and jump_dirty
10313 ll_remove_matching_addrs(jump_in+(expirep&2047),base,shift);
10314 ll_remove_matching_addrs(jump_dirty+(expirep&2047),base,shift);
10315 ll_remove_matching_addrs(jump_in+2048+(expirep&2047),base,shift);
10316 ll_remove_matching_addrs(jump_dirty+2048+(expirep&2047),base,shift);
10317 break;
10318 case 1:
10319 // Clear pointers
10320 ll_kill_pointers(jump_out[expirep&2047],base,shift);
10321 ll_kill_pointers(jump_out[(expirep&2047)+2048],base,shift);
10322 break;
10323 case 2:
10324 // Clear hash table
10325 for(i=0;i<32;i++) {
581335b0 10326 u_int *ht_bin=hash_table[((expirep&2047)<<5)+i];
57871462 10327 if((ht_bin[3]>>shift)==(base>>shift) ||
10328 ((ht_bin[3]-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(base>>shift)) {
10329 inv_debug("EXP: Remove hash %x -> %x\n",ht_bin[2],ht_bin[3]);
10330 ht_bin[2]=ht_bin[3]=-1;
10331 }
10332 if((ht_bin[1]>>shift)==(base>>shift) ||
10333 ((ht_bin[1]-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(base>>shift)) {
10334 inv_debug("EXP: Remove hash %x -> %x\n",ht_bin[0],ht_bin[1]);
10335 ht_bin[0]=ht_bin[2];
10336 ht_bin[1]=ht_bin[3];
10337 ht_bin[2]=ht_bin[3]=-1;
10338 }
10339 }
10340 break;
10341 case 3:
10342 // Clear jump_out
dd3a91a1 10343 #ifdef __arm__
9f51b4b9 10344 if((expirep&2047)==0)
dd3a91a1 10345 do_clear_cache();
10346 #endif
57871462 10347 ll_remove_matching_addrs(jump_out+(expirep&2047),base,shift);
10348 ll_remove_matching_addrs(jump_out+2048+(expirep&2047),base,shift);
10349 break;
10350 }
10351 expirep=(expirep+1)&65535;
10352 }
10353 return 0;
10354}
b9b61529 10355
10356// vim:shiftwidth=2:expandtab