drc: remove yet more n64 stuff
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / new_dynarec.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus - new_dynarec.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
57871462 4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
20
21#include <stdlib.h>
22#include <stdint.h> //include for uint64_t
23#include <assert.h>
d848b60a 24#include <errno.h>
4600ba03 25#include <sys/mman.h>
d148d265 26#ifdef __MACH__
27#include <libkern/OSCacheControl.h>
28#endif
1e212a25 29#ifdef _3DS
30#include <3ds_utils.h>
31#endif
32#ifdef VITA
33#include <psp2/kernel/sysmem.h>
34static int sceBlock;
35#endif
57871462 36
d148d265 37#include "new_dynarec_config.h"
dd79da89 38#include "../psxhle.h" //emulator interface
3d624f89 39#include "emu_if.h" //emulator interface
57871462 40
b14b6a8f 41#ifndef ARRAY_SIZE
42#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
43#endif
44
4600ba03 45//#define DISASM
46//#define assem_debug printf
47//#define inv_debug printf
48#define assem_debug(...)
49#define inv_debug(...)
57871462 50
51#ifdef __i386__
52#include "assem_x86.h"
53#endif
54#ifdef __x86_64__
55#include "assem_x64.h"
56#endif
57#ifdef __arm__
58#include "assem_arm.h"
59#endif
60
61#define MAXBLOCK 4096
62#define MAX_OUTPUT_BLOCK_SIZE 262144
2573466a 63
b14b6a8f 64// stubs
65enum stub_type {
66 CC_STUB = 1,
67 FP_STUB = 2,
68 LOADB_STUB = 3,
69 LOADH_STUB = 4,
70 LOADW_STUB = 5,
71 LOADD_STUB = 6,
72 LOADBU_STUB = 7,
73 LOADHU_STUB = 8,
74 STOREB_STUB = 9,
75 STOREH_STUB = 10,
76 STOREW_STUB = 11,
77 STORED_STUB = 12,
78 STORELR_STUB = 13,
79 INVCODE_STUB = 14,
80};
81
57871462 82struct regstat
83{
84 signed char regmap_entry[HOST_REGS];
85 signed char regmap[HOST_REGS];
86 uint64_t was32;
87 uint64_t is32;
88 uint64_t wasdirty;
89 uint64_t dirty;
90 uint64_t u;
57871462 91 u_int wasconst;
92 u_int isconst;
8575a877 93 u_int loadedconst; // host regs that have constants loaded
94 u_int waswritten; // MIPS regs that were used as store base before
57871462 95};
96
de5a60c3 97// note: asm depends on this layout
57871462 98struct ll_entry
99{
100 u_int vaddr;
de5a60c3 101 u_int reg_sv_flags;
57871462 102 void *addr;
103 struct ll_entry *next;
104};
105
df4dc2b1 106struct ht_entry
107{
108 u_int vaddr[2];
109 void *tcaddr[2];
110};
111
b14b6a8f 112struct code_stub
113{
114 enum stub_type type;
115 void *addr;
116 void *retaddr;
117 u_int a;
118 uintptr_t b;
119 uintptr_t c;
120 u_int d;
121 u_int e;
122};
123
643aeae3 124struct link_entry
125{
126 void *addr;
127 u_int target;
128 u_int ext;
129};
130
e2b5e7aa 131 // used by asm:
132 u_char *out;
df4dc2b1 133 struct ht_entry hash_table[65536] __attribute__((aligned(16)));
e2b5e7aa 134 struct ll_entry *jump_in[4096] __attribute__((aligned(16)));
135 struct ll_entry *jump_dirty[4096];
136
137 static struct ll_entry *jump_out[4096];
138 static u_int start;
139 static u_int *source;
140 static char insn[MAXBLOCK][10];
141 static u_char itype[MAXBLOCK];
142 static u_char opcode[MAXBLOCK];
143 static u_char opcode2[MAXBLOCK];
144 static u_char bt[MAXBLOCK];
145 static u_char rs1[MAXBLOCK];
146 static u_char rs2[MAXBLOCK];
147 static u_char rt1[MAXBLOCK];
148 static u_char rt2[MAXBLOCK];
149 static u_char us1[MAXBLOCK];
150 static u_char us2[MAXBLOCK];
151 static u_char dep1[MAXBLOCK];
152 static u_char dep2[MAXBLOCK];
153 static u_char lt1[MAXBLOCK];
bedfea38 154 static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs
155 static uint64_t gte_rt[MAXBLOCK];
156 static uint64_t gte_unneeded[MAXBLOCK];
ffb0b9e0 157 static u_int smrv[32]; // speculated MIPS register values
158 static u_int smrv_strong; // mask or regs that are likely to have correct values
159 static u_int smrv_weak; // same, but somewhat less likely
160 static u_int smrv_strong_next; // same, but after current insn executes
161 static u_int smrv_weak_next;
e2b5e7aa 162 static int imm[MAXBLOCK];
163 static u_int ba[MAXBLOCK];
164 static char likely[MAXBLOCK];
165 static char is_ds[MAXBLOCK];
166 static char ooo[MAXBLOCK];
167 static uint64_t unneeded_reg[MAXBLOCK];
e2b5e7aa 168 static uint64_t branch_unneeded_reg[MAXBLOCK];
e2b5e7aa 169 static signed char regmap_pre[MAXBLOCK][HOST_REGS];
956f3129 170 static uint64_t current_constmap[HOST_REGS];
171 static uint64_t constmap[MAXBLOCK][HOST_REGS];
172 static struct regstat regs[MAXBLOCK];
173 static struct regstat branch_regs[MAXBLOCK];
e2b5e7aa 174 static signed char minimum_free_regs[MAXBLOCK];
175 static u_int needed_reg[MAXBLOCK];
176 static u_int wont_dirty[MAXBLOCK];
177 static u_int will_dirty[MAXBLOCK];
178 static int ccadj[MAXBLOCK];
179 static int slen;
df4dc2b1 180 static void *instr_addr[MAXBLOCK];
643aeae3 181 static struct link_entry link_addr[MAXBLOCK];
e2b5e7aa 182 static int linkcount;
b14b6a8f 183 static struct code_stub stubs[MAXBLOCK*3];
e2b5e7aa 184 static int stubcount;
185 static u_int literals[1024][2];
186 static int literalcount;
187 static int is_delayslot;
e2b5e7aa 188 static char shadow[1048576] __attribute__((aligned(16)));
189 static void *copy;
190 static int expirep;
191 static u_int stop_after_jal;
a327ad27 192#ifndef RAM_FIXED
01d26796 193 static uintptr_t ram_offset;
a327ad27 194#else
01d26796 195 static const uintptr_t ram_offset=0;
a327ad27 196#endif
e2b5e7aa 197
198 int new_dynarec_hacks;
199 int new_dynarec_did_compile;
57871462 200 extern u_char restore_candidate[512];
201 extern int cycle_count;
202
203 /* registers that may be allocated */
204 /* 1-31 gpr */
205#define HIREG 32 // hi
206#define LOREG 33 // lo
00fa9369 207//#define FSREG 34 // FPU status (FCSR)
57871462 208#define CSREG 35 // Coprocessor status
209#define CCREG 36 // Cycle count
210#define INVCP 37 // Pointer to invalid_code
1edfcc68 211//#define MMREG 38 // Pointer to memory_map
9c45ca93 212//#define ROREG 39 // ram offset (if rdram!=0x80000000)
619e5ded 213#define TEMPREG 40
214#define FTEMP 40 // FPU temporary register
215#define PTEMP 41 // Prefetch temporary register
1edfcc68 216//#define TLREG 42 // TLB mapping offset
619e5ded 217#define RHASH 43 // Return address hash
218#define RHTBL 44 // Return address hash table address
219#define RTEMP 45 // JR/JALR address register
220#define MAXREG 45
221#define AGEN1 46 // Address generation temporary register
1edfcc68 222//#define AGEN2 47 // Address generation temporary register
223//#define MGEN1 48 // Maptable address generation temporary register
224//#define MGEN2 49 // Maptable address generation temporary register
619e5ded 225#define BTREG 50 // Branch target temporary register
57871462 226
227 /* instruction types */
228#define NOP 0 // No operation
229#define LOAD 1 // Load
230#define STORE 2 // Store
231#define LOADLR 3 // Unaligned load
232#define STORELR 4 // Unaligned store
9f51b4b9 233#define MOV 5 // Move
57871462 234#define ALU 6 // Arithmetic/logic
235#define MULTDIV 7 // Multiply/divide
236#define SHIFT 8 // Shift by register
237#define SHIFTIMM 9// Shift by immediate
238#define IMM16 10 // 16-bit immediate
239#define RJUMP 11 // Unconditional jump to register
240#define UJUMP 12 // Unconditional jump
241#define CJUMP 13 // Conditional branch (BEQ/BNE/BGTZ/BLEZ)
242#define SJUMP 14 // Conditional branch (regimm format)
243#define COP0 15 // Coprocessor 0
244#define COP1 16 // Coprocessor 1
245#define C1LS 17 // Coprocessor 1 load/store
246#define FJUMP 18 // Conditional branch (floating point)
00fa9369 247//#define FLOAT 19 // Floating point unit
248//#define FCONV 20 // Convert integer to float
249//#define FCOMP 21 // Floating point compare (sets FSREG)
57871462 250#define SYSCALL 22// SYSCALL
251#define OTHER 23 // Other
252#define SPAN 24 // Branch/delay slot spans 2 pages
253#define NI 25 // Not implemented
7139f3c8 254#define HLECALL 26// PCSX fake opcodes for HLE
b9b61529 255#define COP2 27 // Coprocessor 2 move
256#define C2LS 28 // Coprocessor 2 load/store
257#define C2OP 29 // Coprocessor 2 operation
1e973cb0 258#define INTCALL 30// Call interpreter to handle rare corner cases
57871462 259
57871462 260 /* branch codes */
261#define TAKEN 1
262#define NOTTAKEN 2
263#define NULLDS 3
264
265// asm linkage
266int new_recompile_block(int addr);
267void *get_addr_ht(u_int vaddr);
268void invalidate_block(u_int block);
269void invalidate_addr(u_int addr);
270void remove_hash(int vaddr);
57871462 271void dyna_linker();
272void dyna_linker_ds();
273void verify_code();
274void verify_code_vm();
275void verify_code_ds();
276void cc_interrupt();
277void fp_exception();
278void fp_exception_ds();
7139f3c8 279void jump_syscall_hle();
7139f3c8 280void jump_hlecall();
1e973cb0 281void jump_intcall();
7139f3c8 282void new_dyna_leave();
57871462 283
57871462 284// Needed by assembler
e2b5e7aa 285static void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32);
286static void wb_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty);
287static void wb_needed_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr);
288static void load_all_regs(signed char i_regmap[]);
289static void load_needed_regs(signed char i_regmap[],signed char next_regmap[]);
290static void load_regs_entry(int t);
291static void load_all_consts(signed char regmap[],int is32,u_int dirty,int i);
292
293static int verify_dirty(u_int *ptr);
294static int get_final_value(int hr, int i, int *value);
b14b6a8f 295static void add_stub(enum stub_type type, void *addr, void *retaddr,
296 u_int a, uintptr_t b, uintptr_t c, u_int d, u_int e);
297static void add_stub_r(enum stub_type type, void *addr, void *retaddr,
298 int i, int addr_reg, struct regstat *i_regs, int ccadj, u_int reglist);
643aeae3 299static void add_to_linker(void *addr, u_int target, int ext);
57871462 300
d148d265 301static void mprotect_w_x(void *start, void *end, int is_x)
302{
303#ifdef NO_WRITE_EXEC
1e212a25 304 #if defined(VITA)
305 // *Open* enables write on all memory that was
306 // allocated by sceKernelAllocMemBlockForVM()?
307 if (is_x)
308 sceKernelCloseVMDomain();
309 else
310 sceKernelOpenVMDomain();
311 #else
d148d265 312 u_long mstart = (u_long)start & ~4095ul;
313 u_long mend = (u_long)end;
314 if (mprotect((void *)mstart, mend - mstart,
315 PROT_READ | (is_x ? PROT_EXEC : PROT_WRITE)) != 0)
316 SysPrintf("mprotect(%c) failed: %s\n", is_x ? 'x' : 'w', strerror(errno));
1e212a25 317 #endif
d148d265 318#endif
319}
320
321static void start_tcache_write(void *start, void *end)
322{
323 mprotect_w_x(start, end, 0);
324}
325
326static void end_tcache_write(void *start, void *end)
327{
328#ifdef __arm__
329 size_t len = (char *)end - (char *)start;
330 #if defined(__BLACKBERRY_QNX__)
331 msync(start, len, MS_SYNC | MS_CACHE_ONLY | MS_INVALIDATE_ICACHE);
332 #elif defined(__MACH__)
333 sys_cache_control(kCacheFunctionPrepareForExecution, start, len);
334 #elif defined(VITA)
1e212a25 335 sceKernelSyncVMDomain(sceBlock, start, len);
336 #elif defined(_3DS)
337 ctr_flush_invalidate_cache();
d148d265 338 #else
339 __clear_cache(start, end);
340 #endif
341 (void)len;
342#endif
343
344 mprotect_w_x(start, end, 1);
345}
346
347static void *start_block(void)
348{
349 u_char *end = out + MAX_OUTPUT_BLOCK_SIZE;
643aeae3 350 if (end > translation_cache + (1<<TARGET_SIZE_2))
351 end = translation_cache + (1<<TARGET_SIZE_2);
d148d265 352 start_tcache_write(out, end);
353 return out;
354}
355
356static void end_block(void *start)
357{
358 end_tcache_write(start, out);
359}
360
57871462 361//#define DEBUG_CYCLE_COUNT 1
362
b6e87b2b 363#define NO_CYCLE_PENALTY_THR 12
364
4e9dcd7f 365int cycle_multiplier; // 100 for 1.0
366
367static int CLOCK_ADJUST(int x)
368{
369 int s=(x>>31)|1;
370 return (x * cycle_multiplier + s * 50) / 100;
371}
372
94d23bb9 373static u_int get_page(u_int vaddr)
57871462 374{
0ce47d46 375 u_int page=vaddr&~0xe0000000;
376 if (page < 0x1000000)
377 page &= ~0x0e00000; // RAM mirrors
378 page>>=12;
57871462 379 if(page>2048) page=2048+(page&2047);
94d23bb9 380 return page;
381}
382
d25604ca 383// no virtual mem in PCSX
384static u_int get_vpage(u_int vaddr)
385{
386 return get_page(vaddr);
387}
94d23bb9 388
df4dc2b1 389static struct ht_entry *hash_table_get(u_int vaddr)
390{
391 return &hash_table[((vaddr>>16)^vaddr)&0xFFFF];
392}
393
394static void hash_table_add(struct ht_entry *ht_bin, u_int vaddr, void *tcaddr)
395{
396 ht_bin->vaddr[1] = ht_bin->vaddr[0];
397 ht_bin->tcaddr[1] = ht_bin->tcaddr[0];
398 ht_bin->vaddr[0] = vaddr;
399 ht_bin->tcaddr[0] = tcaddr;
400}
401
402// some messy ari64's code, seems to rely on unsigned 32bit overflow
403static int doesnt_expire_soon(void *tcaddr)
404{
405 u_int diff = (u_int)((u_char *)tcaddr - out) << (32-TARGET_SIZE_2);
406 return diff > (u_int)(0x60000000 + (MAX_OUTPUT_BLOCK_SIZE << (32-TARGET_SIZE_2)));
407}
408
94d23bb9 409// Get address from virtual address
410// This is called from the recompiled JR/JALR instructions
411void *get_addr(u_int vaddr)
412{
413 u_int page=get_page(vaddr);
414 u_int vpage=get_vpage(vaddr);
57871462 415 struct ll_entry *head;
416 //printf("TRACE: count=%d next=%d (get_addr %x,page %d)\n",Count,next_interupt,vaddr,page);
417 head=jump_in[page];
418 while(head!=NULL) {
de5a60c3 419 if(head->vaddr==vaddr) {
643aeae3 420 //printf("TRACE: count=%d next=%d (get_addr match %x: %p)\n",Count,next_interupt,vaddr,head->addr);
df4dc2b1 421 hash_table_add(hash_table_get(vaddr), vaddr, head->addr);
57871462 422 return head->addr;
423 }
424 head=head->next;
425 }
426 head=jump_dirty[vpage];
427 while(head!=NULL) {
de5a60c3 428 if(head->vaddr==vaddr) {
643aeae3 429 //printf("TRACE: count=%d next=%d (get_addr match dirty %x: %p)\n",Count,next_interupt,vaddr,head->addr);
57871462 430 // Don't restore blocks which are about to expire from the cache
df4dc2b1 431 if (doesnt_expire_soon(head->addr))
432 if (verify_dirty(head->addr)) {
57871462 433 //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]);
434 invalid_code[vaddr>>12]=0;
9be4ba64 435 inv_code_start=inv_code_end=~0;
57871462 436 if(vpage<2048) {
57871462 437 restore_candidate[vpage>>3]|=1<<(vpage&7);
438 }
439 else restore_candidate[page>>3]|=1<<(page&7);
df4dc2b1 440 struct ht_entry *ht_bin = hash_table_get(vaddr);
441 if (ht_bin->vaddr[0] == vaddr)
442 ht_bin->tcaddr[0] = head->addr; // Replace existing entry
57871462 443 else
df4dc2b1 444 hash_table_add(ht_bin, vaddr, head->addr);
445
57871462 446 return head->addr;
447 }
448 }
449 head=head->next;
450 }
451 //printf("TRACE: count=%d next=%d (get_addr no-match %x)\n",Count,next_interupt,vaddr);
452 int r=new_recompile_block(vaddr);
453 if(r==0) return get_addr(vaddr);
454 // Execute in unmapped page, generate pagefault execption
455 Status|=2;
456 Cause=(vaddr<<31)|0x8;
457 EPC=(vaddr&1)?vaddr-5:vaddr;
458 BadVAddr=(vaddr&~1);
459 Context=(Context&0xFF80000F)|((BadVAddr>>9)&0x007FFFF0);
460 EntryHi=BadVAddr&0xFFFFE000;
461 return get_addr_ht(0x80000000);
462}
463// Look up address in hash table first
464void *get_addr_ht(u_int vaddr)
465{
466 //printf("TRACE: count=%d next=%d (get_addr_ht %x)\n",Count,next_interupt,vaddr);
df4dc2b1 467 const struct ht_entry *ht_bin = hash_table_get(vaddr);
468 if (ht_bin->vaddr[0] == vaddr) return ht_bin->tcaddr[0];
469 if (ht_bin->vaddr[1] == vaddr) return ht_bin->tcaddr[1];
57871462 470 return get_addr(vaddr);
471}
472
57871462 473void clear_all_regs(signed char regmap[])
474{
475 int hr;
476 for (hr=0;hr<HOST_REGS;hr++) regmap[hr]=-1;
477}
478
479signed char get_reg(signed char regmap[],int r)
480{
481 int hr;
482 for (hr=0;hr<HOST_REGS;hr++) if(hr!=EXCLUDE_REG&&regmap[hr]==r) return hr;
483 return -1;
484}
485
486// Find a register that is available for two consecutive cycles
487signed char get_reg2(signed char regmap1[],signed char regmap2[],int r)
488{
489 int hr;
490 for (hr=0;hr<HOST_REGS;hr++) if(hr!=EXCLUDE_REG&&regmap1[hr]==r&&regmap2[hr]==r) return hr;
491 return -1;
492}
493
494int count_free_regs(signed char regmap[])
495{
496 int count=0;
497 int hr;
498 for(hr=0;hr<HOST_REGS;hr++)
499 {
500 if(hr!=EXCLUDE_REG) {
501 if(regmap[hr]<0) count++;
502 }
503 }
504 return count;
505}
506
507void dirty_reg(struct regstat *cur,signed char reg)
508{
509 int hr;
510 if(!reg) return;
511 for (hr=0;hr<HOST_REGS;hr++) {
512 if((cur->regmap[hr]&63)==reg) {
513 cur->dirty|=1<<hr;
514 }
515 }
516}
517
518// If we dirty the lower half of a 64 bit register which is now being
519// sign-extended, we need to dump the upper half.
520// Note: Do this only after completion of the instruction, because
521// some instructions may need to read the full 64-bit value even if
522// overwriting it (eg SLTI, DSRA32).
523static void flush_dirty_uppers(struct regstat *cur)
524{
525 int hr,reg;
526 for (hr=0;hr<HOST_REGS;hr++) {
527 if((cur->dirty>>hr)&1) {
528 reg=cur->regmap[hr];
9f51b4b9 529 if(reg>=64)
57871462 530 if((cur->is32>>(reg&63))&1) cur->regmap[hr]=-1;
531 }
532 }
533}
534
535void set_const(struct regstat *cur,signed char reg,uint64_t value)
536{
537 int hr;
538 if(!reg) return;
539 for (hr=0;hr<HOST_REGS;hr++) {
540 if(cur->regmap[hr]==reg) {
541 cur->isconst|=1<<hr;
956f3129 542 current_constmap[hr]=value;
57871462 543 }
544 else if((cur->regmap[hr]^64)==reg) {
545 cur->isconst|=1<<hr;
956f3129 546 current_constmap[hr]=value>>32;
57871462 547 }
548 }
549}
550
551void clear_const(struct regstat *cur,signed char reg)
552{
553 int hr;
554 if(!reg) return;
555 for (hr=0;hr<HOST_REGS;hr++) {
556 if((cur->regmap[hr]&63)==reg) {
557 cur->isconst&=~(1<<hr);
558 }
559 }
560}
561
562int is_const(struct regstat *cur,signed char reg)
563{
564 int hr;
79c75f1b 565 if(reg<0) return 0;
57871462 566 if(!reg) return 1;
567 for (hr=0;hr<HOST_REGS;hr++) {
568 if((cur->regmap[hr]&63)==reg) {
569 return (cur->isconst>>hr)&1;
570 }
571 }
572 return 0;
573}
574uint64_t get_const(struct regstat *cur,signed char reg)
575{
576 int hr;
577 if(!reg) return 0;
578 for (hr=0;hr<HOST_REGS;hr++) {
579 if(cur->regmap[hr]==reg) {
956f3129 580 return current_constmap[hr];
57871462 581 }
582 }
c43b5311 583 SysPrintf("Unknown constant in r%d\n",reg);
57871462 584 exit(1);
585}
586
587// Least soon needed registers
588// Look at the next ten instructions and see which registers
589// will be used. Try not to reallocate these.
590void lsn(u_char hsn[], int i, int *preferred_reg)
591{
592 int j;
593 int b=-1;
594 for(j=0;j<9;j++)
595 {
596 if(i+j>=slen) {
597 j=slen-i-1;
598 break;
599 }
600 if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
601 {
602 // Don't go past an unconditonal jump
603 j++;
604 break;
605 }
606 }
607 for(;j>=0;j--)
608 {
609 if(rs1[i+j]) hsn[rs1[i+j]]=j;
610 if(rs2[i+j]) hsn[rs2[i+j]]=j;
611 if(rt1[i+j]) hsn[rt1[i+j]]=j;
612 if(rt2[i+j]) hsn[rt2[i+j]]=j;
613 if(itype[i+j]==STORE || itype[i+j]==STORELR) {
614 // Stores can allocate zero
615 hsn[rs1[i+j]]=j;
616 hsn[rs2[i+j]]=j;
617 }
618 // On some architectures stores need invc_ptr
619 #if defined(HOST_IMM8)
b9b61529 620 if(itype[i+j]==STORE || itype[i+j]==STORELR || (opcode[i+j]&0x3b)==0x39 || (opcode[i+j]&0x3b)==0x3a) {
57871462 621 hsn[INVCP]=j;
622 }
623 #endif
624 if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP||itype[i+j]==FJUMP))
625 {
626 hsn[CCREG]=j;
627 b=j;
628 }
629 }
630 if(b>=0)
631 {
632 if(ba[i+b]>=start && ba[i+b]<(start+slen*4))
633 {
634 // Follow first branch
635 int t=(ba[i+b]-start)>>2;
636 j=7-b;if(t+j>=slen) j=slen-t-1;
637 for(;j>=0;j--)
638 {
639 if(rs1[t+j]) if(hsn[rs1[t+j]]>j+b+2) hsn[rs1[t+j]]=j+b+2;
640 if(rs2[t+j]) if(hsn[rs2[t+j]]>j+b+2) hsn[rs2[t+j]]=j+b+2;
641 //if(rt1[t+j]) if(hsn[rt1[t+j]]>j+b+2) hsn[rt1[t+j]]=j+b+2;
642 //if(rt2[t+j]) if(hsn[rt2[t+j]]>j+b+2) hsn[rt2[t+j]]=j+b+2;
643 }
644 }
645 // TODO: preferred register based on backward branch
646 }
647 // Delay slot should preferably not overwrite branch conditions or cycle count
648 if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP)) {
649 if(rs1[i-1]) if(hsn[rs1[i-1]]>1) hsn[rs1[i-1]]=1;
650 if(rs2[i-1]) if(hsn[rs2[i-1]]>1) hsn[rs2[i-1]]=1;
651 hsn[CCREG]=1;
652 // ...or hash tables
653 hsn[RHASH]=1;
654 hsn[RHTBL]=1;
655 }
656 // Coprocessor load/store needs FTEMP, even if not declared
b9b61529 657 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 658 hsn[FTEMP]=0;
659 }
660 // Load L/R also uses FTEMP as a temporary register
661 if(itype[i]==LOADLR) {
662 hsn[FTEMP]=0;
663 }
b7918751 664 // Also SWL/SWR/SDL/SDR
665 if(opcode[i]==0x2a||opcode[i]==0x2e||opcode[i]==0x2c||opcode[i]==0x2d) {
57871462 666 hsn[FTEMP]=0;
667 }
57871462 668 // Don't remove the miniht registers
669 if(itype[i]==UJUMP||itype[i]==RJUMP)
670 {
671 hsn[RHASH]=0;
672 hsn[RHTBL]=0;
673 }
674}
675
676// We only want to allocate registers if we're going to use them again soon
677int needed_again(int r, int i)
678{
679 int j;
680 int b=-1;
681 int rn=10;
9f51b4b9 682
57871462 683 if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000))
684 {
685 if(ba[i-1]<start || ba[i-1]>start+slen*4-4)
686 return 0; // Don't need any registers if exiting the block
687 }
688 for(j=0;j<9;j++)
689 {
690 if(i+j>=slen) {
691 j=slen-i-1;
692 break;
693 }
694 if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
695 {
696 // Don't go past an unconditonal jump
697 j++;
698 break;
699 }
1e973cb0 700 if(itype[i+j]==SYSCALL||itype[i+j]==HLECALL||itype[i+j]==INTCALL||((source[i+j]&0xfc00003f)==0x0d))
57871462 701 {
702 break;
703 }
704 }
705 for(;j>=1;j--)
706 {
707 if(rs1[i+j]==r) rn=j;
708 if(rs2[i+j]==r) rn=j;
709 if((unneeded_reg[i+j]>>r)&1) rn=10;
710 if(i+j>=0&&(itype[i+j]==UJUMP||itype[i+j]==CJUMP||itype[i+j]==SJUMP||itype[i+j]==FJUMP))
711 {
712 b=j;
713 }
714 }
715 /*
716 if(b>=0)
717 {
718 if(ba[i+b]>=start && ba[i+b]<(start+slen*4))
719 {
720 // Follow first branch
721 int o=rn;
722 int t=(ba[i+b]-start)>>2;
723 j=7-b;if(t+j>=slen) j=slen-t-1;
724 for(;j>=0;j--)
725 {
726 if(!((unneeded_reg[t+j]>>r)&1)) {
727 if(rs1[t+j]==r) if(rn>j+b+2) rn=j+b+2;
728 if(rs2[t+j]==r) if(rn>j+b+2) rn=j+b+2;
729 }
730 else rn=o;
731 }
732 }
733 }*/
b7217e13 734 if(rn<10) return 1;
581335b0 735 (void)b;
57871462 736 return 0;
737}
738
739// Try to match register allocations at the end of a loop with those
740// at the beginning
741int loop_reg(int i, int r, int hr)
742{
743 int j,k;
744 for(j=0;j<9;j++)
745 {
746 if(i+j>=slen) {
747 j=slen-i-1;
748 break;
749 }
750 if(itype[i+j]==UJUMP||itype[i+j]==RJUMP||(source[i+j]>>16)==0x1000)
751 {
752 // Don't go past an unconditonal jump
753 j++;
754 break;
755 }
756 }
757 k=0;
758 if(i>0){
759 if(itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP)
760 k--;
761 }
762 for(;k<j;k++)
763 {
00fa9369 764 assert(r < 64);
765 if((unneeded_reg[i+k]>>r)&1) return hr;
57871462 766 if(i+k>=0&&(itype[i+k]==UJUMP||itype[i+k]==CJUMP||itype[i+k]==SJUMP||itype[i+k]==FJUMP))
767 {
768 if(ba[i+k]>=start && ba[i+k]<(start+i*4))
769 {
770 int t=(ba[i+k]-start)>>2;
771 int reg=get_reg(regs[t].regmap_entry,r);
772 if(reg>=0) return reg;
773 //reg=get_reg(regs[t+1].regmap_entry,r);
774 //if(reg>=0) return reg;
775 }
776 }
777 }
778 return hr;
779}
780
781
782// Allocate every register, preserving source/target regs
783void alloc_all(struct regstat *cur,int i)
784{
785 int hr;
9f51b4b9 786
57871462 787 for(hr=0;hr<HOST_REGS;hr++) {
788 if(hr!=EXCLUDE_REG) {
789 if(((cur->regmap[hr]&63)!=rs1[i])&&((cur->regmap[hr]&63)!=rs2[i])&&
790 ((cur->regmap[hr]&63)!=rt1[i])&&((cur->regmap[hr]&63)!=rt2[i]))
791 {
792 cur->regmap[hr]=-1;
793 cur->dirty&=~(1<<hr);
794 }
795 // Don't need zeros
796 if((cur->regmap[hr]&63)==0)
797 {
798 cur->regmap[hr]=-1;
799 cur->dirty&=~(1<<hr);
800 }
801 }
802 }
803}
804
57871462 805#ifdef __i386__
806#include "assem_x86.c"
807#endif
808#ifdef __x86_64__
809#include "assem_x64.c"
810#endif
811#ifdef __arm__
812#include "assem_arm.c"
813#endif
814
815// Add virtual address mapping to linked list
816void ll_add(struct ll_entry **head,int vaddr,void *addr)
817{
818 struct ll_entry *new_entry;
819 new_entry=malloc(sizeof(struct ll_entry));
820 assert(new_entry!=NULL);
821 new_entry->vaddr=vaddr;
de5a60c3 822 new_entry->reg_sv_flags=0;
57871462 823 new_entry->addr=addr;
824 new_entry->next=*head;
825 *head=new_entry;
826}
827
de5a60c3 828void ll_add_flags(struct ll_entry **head,int vaddr,u_int reg_sv_flags,void *addr)
57871462 829{
7139f3c8 830 ll_add(head,vaddr,addr);
de5a60c3 831 (*head)->reg_sv_flags=reg_sv_flags;
57871462 832}
833
834// Check if an address is already compiled
835// but don't return addresses which are about to expire from the cache
836void *check_addr(u_int vaddr)
837{
df4dc2b1 838 struct ht_entry *ht_bin = hash_table_get(vaddr);
839 size_t i;
b14b6a8f 840 for (i = 0; i < ARRAY_SIZE(ht_bin->vaddr); i++) {
df4dc2b1 841 if (ht_bin->vaddr[i] == vaddr)
842 if (doesnt_expire_soon((u_char *)ht_bin->tcaddr[i] - MAX_OUTPUT_BLOCK_SIZE))
843 if (isclean(ht_bin->tcaddr[i]))
844 return ht_bin->tcaddr[i];
57871462 845 }
94d23bb9 846 u_int page=get_page(vaddr);
57871462 847 struct ll_entry *head;
848 head=jump_in[page];
df4dc2b1 849 while (head != NULL) {
850 if (head->vaddr == vaddr) {
851 if (doesnt_expire_soon(head->addr)) {
57871462 852 // Update existing entry with current address
df4dc2b1 853 if (ht_bin->vaddr[0] == vaddr) {
854 ht_bin->tcaddr[0] = head->addr;
57871462 855 return head->addr;
856 }
df4dc2b1 857 if (ht_bin->vaddr[1] == vaddr) {
858 ht_bin->tcaddr[1] = head->addr;
57871462 859 return head->addr;
860 }
861 // Insert into hash table with low priority.
862 // Don't evict existing entries, as they are probably
863 // addresses that are being accessed frequently.
df4dc2b1 864 if (ht_bin->vaddr[0] == -1) {
865 ht_bin->vaddr[0] = vaddr;
866 ht_bin->tcaddr[0] = head->addr;
867 }
868 else if (ht_bin->vaddr[1] == -1) {
869 ht_bin->vaddr[1] = vaddr;
870 ht_bin->tcaddr[1] = head->addr;
57871462 871 }
872 return head->addr;
873 }
874 }
875 head=head->next;
876 }
877 return 0;
878}
879
880void remove_hash(int vaddr)
881{
882 //printf("remove hash: %x\n",vaddr);
df4dc2b1 883 struct ht_entry *ht_bin = hash_table_get(vaddr);
884 if (ht_bin->vaddr[1] == vaddr) {
885 ht_bin->vaddr[1] = -1;
886 ht_bin->tcaddr[1] = NULL;
57871462 887 }
df4dc2b1 888 if (ht_bin->vaddr[0] == vaddr) {
889 ht_bin->vaddr[0] = ht_bin->vaddr[1];
890 ht_bin->tcaddr[0] = ht_bin->tcaddr[1];
891 ht_bin->vaddr[1] = -1;
892 ht_bin->tcaddr[1] = NULL;
57871462 893 }
894}
895
643aeae3 896void ll_remove_matching_addrs(struct ll_entry **head,uintptr_t addr,int shift)
57871462 897{
898 struct ll_entry *next;
899 while(*head) {
643aeae3 900 if(((uintptr_t)((*head)->addr)>>shift)==(addr>>shift) ||
901 ((uintptr_t)((*head)->addr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift))
57871462 902 {
643aeae3 903 inv_debug("EXP: Remove pointer to %p (%x)\n",(*head)->addr,(*head)->vaddr);
57871462 904 remove_hash((*head)->vaddr);
905 next=(*head)->next;
906 free(*head);
907 *head=next;
908 }
909 else
910 {
911 head=&((*head)->next);
912 }
913 }
914}
915
916// Remove all entries from linked list
917void ll_clear(struct ll_entry **head)
918{
919 struct ll_entry *cur;
920 struct ll_entry *next;
581335b0 921 if((cur=*head)) {
57871462 922 *head=0;
923 while(cur) {
924 next=cur->next;
925 free(cur);
926 cur=next;
927 }
928 }
929}
930
931// Dereference the pointers and remove if it matches
643aeae3 932static void ll_kill_pointers(struct ll_entry *head,uintptr_t addr,int shift)
57871462 933{
934 while(head) {
643aeae3 935 uintptr_t ptr = (uintptr_t)get_pointer(head->addr);
936 inv_debug("EXP: Lookup pointer to %lx at %p (%x)\n",(long)ptr,head->addr,head->vaddr);
57871462 937 if(((ptr>>shift)==(addr>>shift)) ||
938 (((ptr-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(addr>>shift)))
939 {
643aeae3 940 inv_debug("EXP: Kill pointer at %p (%x)\n",head->addr,head->vaddr);
d148d265 941 void *host_addr=find_extjump_insn(head->addr);
dd3a91a1 942 #ifdef __arm__
d148d265 943 mark_clear_cache(host_addr);
dd3a91a1 944 #endif
df4dc2b1 945 set_jump_target(host_addr, head->addr);
57871462 946 }
947 head=head->next;
948 }
949}
950
951// This is called when we write to a compiled block (see do_invstub)
f76eeef9 952void invalidate_page(u_int page)
57871462 953{
57871462 954 struct ll_entry *head;
955 struct ll_entry *next;
956 head=jump_in[page];
957 jump_in[page]=0;
958 while(head!=NULL) {
959 inv_debug("INVALIDATE: %x\n",head->vaddr);
960 remove_hash(head->vaddr);
961 next=head->next;
962 free(head);
963 head=next;
964 }
965 head=jump_out[page];
966 jump_out[page]=0;
967 while(head!=NULL) {
643aeae3 968 inv_debug("INVALIDATE: kill pointer to %x (%p)\n",head->vaddr,head->addr);
d148d265 969 void *host_addr=find_extjump_insn(head->addr);
dd3a91a1 970 #ifdef __arm__
d148d265 971 mark_clear_cache(host_addr);
dd3a91a1 972 #endif
df4dc2b1 973 set_jump_target(host_addr, head->addr);
57871462 974 next=head->next;
975 free(head);
976 head=next;
977 }
57871462 978}
9be4ba64 979
980static void invalidate_block_range(u_int block, u_int first, u_int last)
57871462 981{
94d23bb9 982 u_int page=get_page(block<<12);
57871462 983 //printf("first=%d last=%d\n",first,last);
f76eeef9 984 invalidate_page(page);
57871462 985 assert(first+5>page); // NB: this assumes MAXBLOCK<=4096 (4 pages)
986 assert(last<page+5);
987 // Invalidate the adjacent pages if a block crosses a 4K boundary
988 while(first<page) {
989 invalidate_page(first);
990 first++;
991 }
992 for(first=page+1;first<last;first++) {
993 invalidate_page(first);
994 }
dd3a91a1 995 #ifdef __arm__
996 do_clear_cache();
997 #endif
9f51b4b9 998
57871462 999 // Don't trap writes
1000 invalid_code[block]=1;
f76eeef9 1001
57871462 1002 #ifdef USE_MINI_HT
1003 memset(mini_ht,-1,sizeof(mini_ht));
1004 #endif
1005}
9be4ba64 1006
1007void invalidate_block(u_int block)
1008{
1009 u_int page=get_page(block<<12);
1010 u_int vpage=get_vpage(block<<12);
1011 inv_debug("INVALIDATE: %x (%d)\n",block<<12,page);
1012 //inv_debug("invalid_code[block]=%d\n",invalid_code[block]);
1013 u_int first,last;
1014 first=last=page;
1015 struct ll_entry *head;
1016 head=jump_dirty[vpage];
1017 //printf("page=%d vpage=%d\n",page,vpage);
1018 while(head!=NULL) {
9be4ba64 1019 if(vpage>2047||(head->vaddr>>12)==block) { // Ignore vaddr hash collision
01d26796 1020 u_char *start, *end;
1021 get_bounds(head->addr, &start, &end);
1022 //printf("start: %p end: %p\n", start, end);
1023 if (page < 2048 && start >= rdram && end < rdram+RAM_SIZE) {
1024 if (((start-rdram)>>12) <= page && ((end-1-rdram)>>12) >= page) {
1025 if ((((start-rdram)>>12)&2047) < first) first = ((start-rdram)>>12)&2047;
1026 if ((((end-1-rdram)>>12)&2047) > last) last = ((end-1-rdram)>>12)&2047;
9be4ba64 1027 }
1028 }
9be4ba64 1029 }
1030 head=head->next;
1031 }
1032 invalidate_block_range(block,first,last);
1033}
1034
57871462 1035void invalidate_addr(u_int addr)
1036{
9be4ba64 1037 //static int rhits;
1038 // this check is done by the caller
1039 //if (inv_code_start<=addr&&addr<=inv_code_end) { rhits++; return; }
d25604ca 1040 u_int page=get_vpage(addr);
9be4ba64 1041 if(page<2048) { // RAM
1042 struct ll_entry *head;
1043 u_int addr_min=~0, addr_max=0;
4a35de07 1044 u_int mask=RAM_SIZE-1;
1045 u_int addr_main=0x80000000|(addr&mask);
9be4ba64 1046 int pg1;
4a35de07 1047 inv_code_start=addr_main&~0xfff;
1048 inv_code_end=addr_main|0xfff;
9be4ba64 1049 pg1=page;
1050 if (pg1>0) {
1051 // must check previous page too because of spans..
1052 pg1--;
1053 inv_code_start-=0x1000;
1054 }
1055 for(;pg1<=page;pg1++) {
1056 for(head=jump_dirty[pg1];head!=NULL;head=head->next) {
01d26796 1057 u_char *start_h, *end_h;
1058 u_int start, end;
1059 get_bounds(head->addr, &start_h, &end_h);
1060 start = (uintptr_t)start_h - ram_offset;
1061 end = (uintptr_t)end_h - ram_offset;
4a35de07 1062 if(start<=addr_main&&addr_main<end) {
9be4ba64 1063 if(start<addr_min) addr_min=start;
1064 if(end>addr_max) addr_max=end;
1065 }
4a35de07 1066 else if(addr_main<start) {
9be4ba64 1067 if(start<inv_code_end)
1068 inv_code_end=start-1;
1069 }
1070 else {
1071 if(end>inv_code_start)
1072 inv_code_start=end;
1073 }
1074 }
1075 }
1076 if (addr_min!=~0) {
1077 inv_debug("INV ADDR: %08x hit %08x-%08x\n", addr, addr_min, addr_max);
1078 inv_code_start=inv_code_end=~0;
1079 invalidate_block_range(addr>>12,(addr_min&mask)>>12,(addr_max&mask)>>12);
1080 return;
1081 }
1082 else {
4a35de07 1083 inv_code_start=(addr&~mask)|(inv_code_start&mask);
1084 inv_code_end=(addr&~mask)|(inv_code_end&mask);
d25604ca 1085 inv_debug("INV ADDR: %08x miss, inv %08x-%08x, sk %d\n", addr, inv_code_start, inv_code_end, 0);
9be4ba64 1086 return;
d25604ca 1087 }
9be4ba64 1088 }
57871462 1089 invalidate_block(addr>>12);
1090}
9be4ba64 1091
dd3a91a1 1092// This is called when loading a save state.
1093// Anything could have changed, so invalidate everything.
57871462 1094void invalidate_all_pages()
1095{
581335b0 1096 u_int page;
57871462 1097 for(page=0;page<4096;page++)
1098 invalidate_page(page);
1099 for(page=0;page<1048576;page++)
1100 if(!invalid_code[page]) {
1101 restore_candidate[(page&2047)>>3]|=1<<(page&7);
1102 restore_candidate[((page&2047)>>3)+256]|=1<<(page&7);
1103 }
57871462 1104 #ifdef USE_MINI_HT
1105 memset(mini_ht,-1,sizeof(mini_ht));
1106 #endif
57871462 1107}
1108
1109// Add an entry to jump_out after making a link
1110void add_link(u_int vaddr,void *src)
1111{
94d23bb9 1112 u_int page=get_page(vaddr);
643aeae3 1113 inv_debug("add_link: %p -> %x (%d)\n",src,vaddr,page);
76f71c27 1114 int *ptr=(int *)(src+4);
1115 assert((*ptr&0x0fff0000)==0x059f0000);
581335b0 1116 (void)ptr;
57871462 1117 ll_add(jump_out+page,vaddr,src);
643aeae3 1118 //void *ptr=get_pointer(src);
1119 //inv_debug("add_link: Pointer is to %p\n",ptr);
57871462 1120}
1121
1122// If a code block was found to be unmodified (bit was set in
1123// restore_candidate) and it remains unmodified (bit is clear
1124// in invalid_code) then move the entries for that 4K page from
1125// the dirty list to the clean list.
1126void clean_blocks(u_int page)
1127{
1128 struct ll_entry *head;
1129 inv_debug("INV: clean_blocks page=%d\n",page);
1130 head=jump_dirty[page];
1131 while(head!=NULL) {
1132 if(!invalid_code[head->vaddr>>12]) {
1133 // Don't restore blocks which are about to expire from the cache
df4dc2b1 1134 if (doesnt_expire_soon(head->addr)) {
581335b0 1135 if(verify_dirty(head->addr)) {
01d26796 1136 u_char *start, *end;
643aeae3 1137 //printf("Possibly Restore %x (%p)\n",head->vaddr, head->addr);
57871462 1138 u_int i;
1139 u_int inv=0;
01d26796 1140 get_bounds(head->addr, &start, &end);
1141 if (start - rdram < RAM_SIZE) {
1142 for (i = (start-rdram+0x80000000)>>12; i <= (end-1-rdram+0x80000000)>>12; i++) {
57871462 1143 inv|=invalid_code[i];
1144 }
1145 }
4cb76aa4 1146 else if((signed int)head->vaddr>=(signed int)0x80000000+RAM_SIZE) {
57871462 1147 inv=1;
1148 }
1149 if(!inv) {
df4dc2b1 1150 void *clean_addr = get_clean_addr(head->addr);
1151 if (doesnt_expire_soon(clean_addr)) {
57871462 1152 u_int ppage=page;
643aeae3 1153 inv_debug("INV: Restored %x (%p/%p)\n",head->vaddr, head->addr, clean_addr);
57871462 1154 //printf("page=%x, addr=%x\n",page,head->vaddr);
1155 //assert(head->vaddr>>12==(page|0x80000));
de5a60c3 1156 ll_add_flags(jump_in+ppage,head->vaddr,head->reg_sv_flags,clean_addr);
df4dc2b1 1157 struct ht_entry *ht_bin = hash_table_get(head->vaddr);
1158 if (ht_bin->vaddr[0] == head->vaddr)
1159 ht_bin->tcaddr[0] = clean_addr; // Replace existing entry
1160 if (ht_bin->vaddr[1] == head->vaddr)
1161 ht_bin->tcaddr[1] = clean_addr; // Replace existing entry
57871462 1162 }
1163 }
1164 }
1165 }
1166 }
1167 head=head->next;
1168 }
1169}
1170
1171
1172void mov_alloc(struct regstat *current,int i)
1173{
1174 // Note: Don't need to actually alloc the source registers
1175 if((~current->is32>>rs1[i])&1) {
1176 //alloc_reg64(current,i,rs1[i]);
00fa9369 1177 assert(0);
57871462 1178 } else {
1179 //alloc_reg(current,i,rs1[i]);
1180 alloc_reg(current,i,rt1[i]);
1181 current->is32|=(1LL<<rt1[i]);
1182 }
1183 clear_const(current,rs1[i]);
1184 clear_const(current,rt1[i]);
1185 dirty_reg(current,rt1[i]);
1186}
1187
1188void shiftimm_alloc(struct regstat *current,int i)
1189{
57871462 1190 if(opcode2[i]<=0x3) // SLL/SRL/SRA
1191 {
1192 if(rt1[i]) {
1193 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1194 else lt1[i]=rs1[i];
1195 alloc_reg(current,i,rt1[i]);
1196 current->is32|=1LL<<rt1[i];
1197 dirty_reg(current,rt1[i]);
dc49e339 1198 if(is_const(current,rs1[i])) {
1199 int v=get_const(current,rs1[i]);
1200 if(opcode2[i]==0x00) set_const(current,rt1[i],v<<imm[i]);
1201 if(opcode2[i]==0x02) set_const(current,rt1[i],(u_int)v>>imm[i]);
1202 if(opcode2[i]==0x03) set_const(current,rt1[i],v>>imm[i]);
1203 }
1204 else clear_const(current,rt1[i]);
57871462 1205 }
1206 }
dc49e339 1207 else
1208 {
1209 clear_const(current,rs1[i]);
1210 clear_const(current,rt1[i]);
1211 }
1212
57871462 1213 if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA
1214 {
9c45ca93 1215 assert(0);
57871462 1216 }
1217 if(opcode2[i]==0x3c) // DSLL32
1218 {
9c45ca93 1219 assert(0);
57871462 1220 }
1221 if(opcode2[i]==0x3e) // DSRL32
1222 {
9c45ca93 1223 assert(0);
57871462 1224 }
1225 if(opcode2[i]==0x3f) // DSRA32
1226 {
9c45ca93 1227 assert(0);
57871462 1228 }
1229}
1230
1231void shift_alloc(struct regstat *current,int i)
1232{
1233 if(rt1[i]) {
1234 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
1235 {
1236 if(rs1[i]) alloc_reg(current,i,rs1[i]);
1237 if(rs2[i]) alloc_reg(current,i,rs2[i]);
1238 alloc_reg(current,i,rt1[i]);
e1190b87 1239 if(rt1[i]==rs2[i]) {
1240 alloc_reg_temp(current,i,-1);
1241 minimum_free_regs[i]=1;
1242 }
57871462 1243 current->is32|=1LL<<rt1[i];
1244 } else { // DSLLV/DSRLV/DSRAV
00fa9369 1245 assert(0);
57871462 1246 }
1247 clear_const(current,rs1[i]);
1248 clear_const(current,rs2[i]);
1249 clear_const(current,rt1[i]);
1250 dirty_reg(current,rt1[i]);
1251 }
1252}
1253
1254void alu_alloc(struct regstat *current,int i)
1255{
1256 if(opcode2[i]>=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU
1257 if(rt1[i]) {
1258 if(rs1[i]&&rs2[i]) {
1259 alloc_reg(current,i,rs1[i]);
1260 alloc_reg(current,i,rs2[i]);
1261 }
1262 else {
1263 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1264 if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg(current,i,rs2[i]);
1265 }
1266 alloc_reg(current,i,rt1[i]);
1267 }
1268 current->is32|=1LL<<rt1[i];
1269 }
1270 if(opcode2[i]==0x2a||opcode2[i]==0x2b) { // SLT/SLTU
1271 if(rt1[i]) {
1272 if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1))
1273 {
1274 alloc_reg64(current,i,rs1[i]);
1275 alloc_reg64(current,i,rs2[i]);
1276 alloc_reg(current,i,rt1[i]);
1277 } else {
1278 alloc_reg(current,i,rs1[i]);
1279 alloc_reg(current,i,rs2[i]);
1280 alloc_reg(current,i,rt1[i]);
1281 }
1282 }
1283 current->is32|=1LL<<rt1[i];
1284 }
1285 if(opcode2[i]>=0x24&&opcode2[i]<=0x27) { // AND/OR/XOR/NOR
1286 if(rt1[i]) {
1287 if(rs1[i]&&rs2[i]) {
1288 alloc_reg(current,i,rs1[i]);
1289 alloc_reg(current,i,rs2[i]);
1290 }
1291 else
1292 {
1293 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1294 if(rs2[i]&&needed_again(rs2[i],i)) alloc_reg(current,i,rs2[i]);
1295 }
1296 alloc_reg(current,i,rt1[i]);
1297 if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1))
1298 {
57871462 1299 if(get_reg(current->regmap,rt1[i]|64)>=0) {
00fa9369 1300 assert(0);
57871462 1301 }
1302 current->is32&=~(1LL<<rt1[i]);
1303 } else {
1304 current->is32|=1LL<<rt1[i];
1305 }
1306 }
1307 }
1308 if(opcode2[i]>=0x2c&&opcode2[i]<=0x2f) { // DADD/DADDU/DSUB/DSUBU
00fa9369 1309 assert(0);
57871462 1310 }
1311 clear_const(current,rs1[i]);
1312 clear_const(current,rs2[i]);
1313 clear_const(current,rt1[i]);
1314 dirty_reg(current,rt1[i]);
1315}
1316
1317void imm16_alloc(struct regstat *current,int i)
1318{
1319 if(rs1[i]&&needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1320 else lt1[i]=rs1[i];
1321 if(rt1[i]) alloc_reg(current,i,rt1[i]);
1322 if(opcode[i]==0x18||opcode[i]==0x19) { // DADDI/DADDIU
00fa9369 1323 assert(0);
57871462 1324 }
1325 else if(opcode[i]==0x0a||opcode[i]==0x0b) { // SLTI/SLTIU
1326 if((~current->is32>>rs1[i])&1) alloc_reg64(current,i,rs1[i]);
1327 current->is32|=1LL<<rt1[i];
1328 clear_const(current,rs1[i]);
1329 clear_const(current,rt1[i]);
1330 }
1331 else if(opcode[i]>=0x0c&&opcode[i]<=0x0e) { // ANDI/ORI/XORI
1332 if(((~current->is32>>rs1[i])&1)&&opcode[i]>0x0c) {
1333 if(rs1[i]!=rt1[i]) {
1334 if(needed_again(rs1[i],i)) alloc_reg64(current,i,rs1[i]);
1335 alloc_reg64(current,i,rt1[i]);
1336 current->is32&=~(1LL<<rt1[i]);
1337 }
1338 }
1339 else current->is32|=1LL<<rt1[i]; // ANDI clears upper bits
1340 if(is_const(current,rs1[i])) {
1341 int v=get_const(current,rs1[i]);
1342 if(opcode[i]==0x0c) set_const(current,rt1[i],v&imm[i]);
1343 if(opcode[i]==0x0d) set_const(current,rt1[i],v|imm[i]);
1344 if(opcode[i]==0x0e) set_const(current,rt1[i],v^imm[i]);
1345 }
1346 else clear_const(current,rt1[i]);
1347 }
1348 else if(opcode[i]==0x08||opcode[i]==0x09) { // ADDI/ADDIU
1349 if(is_const(current,rs1[i])) {
1350 int v=get_const(current,rs1[i]);
1351 set_const(current,rt1[i],v+imm[i]);
1352 }
1353 else clear_const(current,rt1[i]);
1354 current->is32|=1LL<<rt1[i];
1355 }
1356 else {
1357 set_const(current,rt1[i],((long long)((short)imm[i]))<<16); // LUI
1358 current->is32|=1LL<<rt1[i];
1359 }
1360 dirty_reg(current,rt1[i]);
1361}
1362
1363void load_alloc(struct regstat *current,int i)
1364{
1365 clear_const(current,rt1[i]);
1366 //if(rs1[i]!=rt1[i]&&needed_again(rs1[i],i)) clear_const(current,rs1[i]); // Does this help or hurt?
1367 if(!rs1[i]) current->u&=~1LL; // Allow allocating r0 if it's the source register
1368 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
373d1d07 1369 if(rt1[i]&&!((current->u>>rt1[i])&1)) {
57871462 1370 alloc_reg(current,i,rt1[i]);
373d1d07 1371 assert(get_reg(current->regmap,rt1[i])>=0);
57871462 1372 if(opcode[i]==0x27||opcode[i]==0x37) // LWU/LD
1373 {
1374 current->is32&=~(1LL<<rt1[i]);
1375 alloc_reg64(current,i,rt1[i]);
1376 }
1377 else if(opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR
1378 {
1379 current->is32&=~(1LL<<rt1[i]);
1380 alloc_reg64(current,i,rt1[i]);
1381 alloc_all(current,i);
1382 alloc_reg64(current,i,FTEMP);
e1190b87 1383 minimum_free_regs[i]=HOST_REGS;
57871462 1384 }
1385 else current->is32|=1LL<<rt1[i];
1386 dirty_reg(current,rt1[i]);
57871462 1387 // LWL/LWR need a temporary register for the old value
1388 if(opcode[i]==0x22||opcode[i]==0x26)
1389 {
1390 alloc_reg(current,i,FTEMP);
1391 alloc_reg_temp(current,i,-1);
e1190b87 1392 minimum_free_regs[i]=1;
57871462 1393 }
1394 }
1395 else
1396 {
373d1d07 1397 // Load to r0 or unneeded register (dummy load)
57871462 1398 // but we still need a register to calculate the address
535d208a 1399 if(opcode[i]==0x22||opcode[i]==0x26)
1400 {
1401 alloc_reg(current,i,FTEMP); // LWL/LWR need another temporary
1402 }
57871462 1403 alloc_reg_temp(current,i,-1);
e1190b87 1404 minimum_free_regs[i]=1;
535d208a 1405 if(opcode[i]==0x1A||opcode[i]==0x1B) // LDL/LDR
1406 {
1407 alloc_all(current,i);
1408 alloc_reg64(current,i,FTEMP);
e1190b87 1409 minimum_free_regs[i]=HOST_REGS;
535d208a 1410 }
57871462 1411 }
1412}
1413
1414void store_alloc(struct regstat *current,int i)
1415{
1416 clear_const(current,rs2[i]);
1417 if(!(rs2[i])) current->u&=~1LL; // Allow allocating r0 if necessary
1418 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1419 alloc_reg(current,i,rs2[i]);
1420 if(opcode[i]==0x2c||opcode[i]==0x2d||opcode[i]==0x3f) { // 64-bit SDL/SDR/SD
1421 alloc_reg64(current,i,rs2[i]);
1422 if(rs2[i]) alloc_reg(current,i,FTEMP);
1423 }
57871462 1424 #if defined(HOST_IMM8)
1425 // On CPUs without 32-bit immediates we need a pointer to invalid_code
1426 else alloc_reg(current,i,INVCP);
1427 #endif
b7918751 1428 if(opcode[i]==0x2a||opcode[i]==0x2e||opcode[i]==0x2c||opcode[i]==0x2d) { // SWL/SWL/SDL/SDR
57871462 1429 alloc_reg(current,i,FTEMP);
1430 }
1431 // We need a temporary register for address generation
1432 alloc_reg_temp(current,i,-1);
e1190b87 1433 minimum_free_regs[i]=1;
57871462 1434}
1435
1436void c1ls_alloc(struct regstat *current,int i)
1437{
1438 //clear_const(current,rs1[i]); // FIXME
1439 clear_const(current,rt1[i]);
1440 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1441 alloc_reg(current,i,CSREG); // Status
1442 alloc_reg(current,i,FTEMP);
1443 if(opcode[i]==0x35||opcode[i]==0x3d) { // 64-bit LDC1/SDC1
1444 alloc_reg64(current,i,FTEMP);
1445 }
57871462 1446 #if defined(HOST_IMM8)
1447 // On CPUs without 32-bit immediates we need a pointer to invalid_code
1448 else if((opcode[i]&0x3b)==0x39) // SWC1/SDC1
1449 alloc_reg(current,i,INVCP);
1450 #endif
1451 // We need a temporary register for address generation
1452 alloc_reg_temp(current,i,-1);
1453}
1454
b9b61529 1455void c2ls_alloc(struct regstat *current,int i)
1456{
1457 clear_const(current,rt1[i]);
1458 if(needed_again(rs1[i],i)) alloc_reg(current,i,rs1[i]);
1459 alloc_reg(current,i,FTEMP);
b9b61529 1460 #if defined(HOST_IMM8)
1461 // On CPUs without 32-bit immediates we need a pointer to invalid_code
1edfcc68 1462 if((opcode[i]&0x3b)==0x3a) // SWC2/SDC2
b9b61529 1463 alloc_reg(current,i,INVCP);
1464 #endif
1465 // We need a temporary register for address generation
1466 alloc_reg_temp(current,i,-1);
e1190b87 1467 minimum_free_regs[i]=1;
b9b61529 1468}
1469
57871462 1470#ifndef multdiv_alloc
1471void multdiv_alloc(struct regstat *current,int i)
1472{
1473 // case 0x18: MULT
1474 // case 0x19: MULTU
1475 // case 0x1A: DIV
1476 // case 0x1B: DIVU
1477 // case 0x1C: DMULT
1478 // case 0x1D: DMULTU
1479 // case 0x1E: DDIV
1480 // case 0x1F: DDIVU
1481 clear_const(current,rs1[i]);
1482 clear_const(current,rs2[i]);
1483 if(rs1[i]&&rs2[i])
1484 {
1485 if((opcode2[i]&4)==0) // 32-bit
1486 {
1487 current->u&=~(1LL<<HIREG);
1488 current->u&=~(1LL<<LOREG);
1489 alloc_reg(current,i,HIREG);
1490 alloc_reg(current,i,LOREG);
1491 alloc_reg(current,i,rs1[i]);
1492 alloc_reg(current,i,rs2[i]);
1493 current->is32|=1LL<<HIREG;
1494 current->is32|=1LL<<LOREG;
1495 dirty_reg(current,HIREG);
1496 dirty_reg(current,LOREG);
1497 }
1498 else // 64-bit
1499 {
00fa9369 1500 assert(0);
57871462 1501 }
1502 }
1503 else
1504 {
1505 // Multiply by zero is zero.
1506 // MIPS does not have a divide by zero exception.
1507 // The result is undefined, we return zero.
1508 alloc_reg(current,i,HIREG);
1509 alloc_reg(current,i,LOREG);
1510 current->is32|=1LL<<HIREG;
1511 current->is32|=1LL<<LOREG;
1512 dirty_reg(current,HIREG);
1513 dirty_reg(current,LOREG);
1514 }
1515}
1516#endif
1517
1518void cop0_alloc(struct regstat *current,int i)
1519{
1520 if(opcode2[i]==0) // MFC0
1521 {
1522 if(rt1[i]) {
1523 clear_const(current,rt1[i]);
1524 alloc_all(current,i);
1525 alloc_reg(current,i,rt1[i]);
1526 current->is32|=1LL<<rt1[i];
1527 dirty_reg(current,rt1[i]);
1528 }
1529 }
1530 else if(opcode2[i]==4) // MTC0
1531 {
1532 if(rs1[i]){
1533 clear_const(current,rs1[i]);
1534 alloc_reg(current,i,rs1[i]);
1535 alloc_all(current,i);
1536 }
1537 else {
1538 alloc_all(current,i); // FIXME: Keep r0
1539 current->u&=~1LL;
1540 alloc_reg(current,i,0);
1541 }
1542 }
1543 else
1544 {
1545 // TLBR/TLBWI/TLBWR/TLBP/ERET
1546 assert(opcode2[i]==0x10);
1547 alloc_all(current,i);
1548 }
e1190b87 1549 minimum_free_regs[i]=HOST_REGS;
57871462 1550}
1551
00fa9369 1552static void cop12_alloc(struct regstat *current,int i)
57871462 1553{
1554 alloc_reg(current,i,CSREG); // Load status
00fa9369 1555 if(opcode2[i]<3) // MFC1/CFC1
57871462 1556 {
7de557a6 1557 if(rt1[i]){
1558 clear_const(current,rt1[i]);
00fa9369 1559 alloc_reg(current,i,rt1[i]);
1560 current->is32|=1LL<<rt1[i];
7de557a6 1561 dirty_reg(current,rt1[i]);
57871462 1562 }
57871462 1563 alloc_reg_temp(current,i,-1);
1564 }
00fa9369 1565 else if(opcode2[i]>3) // MTC1/CTC1
57871462 1566 {
1567 if(rs1[i]){
1568 clear_const(current,rs1[i]);
00fa9369 1569 alloc_reg(current,i,rs1[i]);
57871462 1570 }
1571 else {
1572 current->u&=~1LL;
1573 alloc_reg(current,i,0);
57871462 1574 }
00fa9369 1575 alloc_reg_temp(current,i,-1);
57871462 1576 }
e1190b87 1577 minimum_free_regs[i]=1;
57871462 1578}
00fa9369 1579
b9b61529 1580void c2op_alloc(struct regstat *current,int i)
1581{
1582 alloc_reg_temp(current,i,-1);
1583}
57871462 1584
1585void syscall_alloc(struct regstat *current,int i)
1586{
1587 alloc_cc(current,i);
1588 dirty_reg(current,CCREG);
1589 alloc_all(current,i);
e1190b87 1590 minimum_free_regs[i]=HOST_REGS;
57871462 1591 current->isconst=0;
1592}
1593
1594void delayslot_alloc(struct regstat *current,int i)
1595{
1596 switch(itype[i]) {
1597 case UJUMP:
1598 case CJUMP:
1599 case SJUMP:
1600 case RJUMP:
1601 case FJUMP:
1602 case SYSCALL:
7139f3c8 1603 case HLECALL:
57871462 1604 case SPAN:
1605 assem_debug("jump in the delay slot. this shouldn't happen.\n");//exit(1);
c43b5311 1606 SysPrintf("Disabled speculative precompilation\n");
57871462 1607 stop_after_jal=1;
1608 break;
1609 case IMM16:
1610 imm16_alloc(current,i);
1611 break;
1612 case LOAD:
1613 case LOADLR:
1614 load_alloc(current,i);
1615 break;
1616 case STORE:
1617 case STORELR:
1618 store_alloc(current,i);
1619 break;
1620 case ALU:
1621 alu_alloc(current,i);
1622 break;
1623 case SHIFT:
1624 shift_alloc(current,i);
1625 break;
1626 case MULTDIV:
1627 multdiv_alloc(current,i);
1628 break;
1629 case SHIFTIMM:
1630 shiftimm_alloc(current,i);
1631 break;
1632 case MOV:
1633 mov_alloc(current,i);
1634 break;
1635 case COP0:
1636 cop0_alloc(current,i);
1637 break;
1638 case COP1:
b9b61529 1639 case COP2:
00fa9369 1640 cop12_alloc(current,i);
57871462 1641 break;
1642 case C1LS:
1643 c1ls_alloc(current,i);
1644 break;
b9b61529 1645 case C2LS:
1646 c2ls_alloc(current,i);
1647 break;
b9b61529 1648 case C2OP:
1649 c2op_alloc(current,i);
1650 break;
57871462 1651 }
1652}
1653
1654// Special case where a branch and delay slot span two pages in virtual memory
1655static void pagespan_alloc(struct regstat *current,int i)
1656{
1657 current->isconst=0;
1658 current->wasconst=0;
1659 regs[i].wasconst=0;
e1190b87 1660 minimum_free_regs[i]=HOST_REGS;
57871462 1661 alloc_all(current,i);
1662 alloc_cc(current,i);
1663 dirty_reg(current,CCREG);
1664 if(opcode[i]==3) // JAL
1665 {
1666 alloc_reg(current,i,31);
1667 dirty_reg(current,31);
1668 }
1669 if(opcode[i]==0&&(opcode2[i]&0x3E)==8) // JR/JALR
1670 {
1671 alloc_reg(current,i,rs1[i]);
5067f341 1672 if (rt1[i]!=0) {
1673 alloc_reg(current,i,rt1[i]);
1674 dirty_reg(current,rt1[i]);
57871462 1675 }
1676 }
1677 if((opcode[i]&0x2E)==4) // BEQ/BNE/BEQL/BNEL
1678 {
1679 if(rs1[i]) alloc_reg(current,i,rs1[i]);
1680 if(rs2[i]) alloc_reg(current,i,rs2[i]);
1681 if(!((current->is32>>rs1[i])&(current->is32>>rs2[i])&1))
1682 {
00fa9369 1683 assert(0);
57871462 1684 }
1685 }
1686 else
1687 if((opcode[i]&0x2E)==6) // BLEZ/BGTZ/BLEZL/BGTZL
1688 {
1689 if(rs1[i]) alloc_reg(current,i,rs1[i]);
1690 if(!((current->is32>>rs1[i])&1))
1691 {
00fa9369 1692 assert(0);
57871462 1693 }
1694 }
57871462 1695 //else ...
1696}
1697
b14b6a8f 1698static void add_stub(enum stub_type type, void *addr, void *retaddr,
1699 u_int a, uintptr_t b, uintptr_t c, u_int d, u_int e)
1700{
1701 assert(a < ARRAY_SIZE(stubs));
1702 stubs[stubcount].type = type;
1703 stubs[stubcount].addr = addr;
1704 stubs[stubcount].retaddr = retaddr;
1705 stubs[stubcount].a = a;
1706 stubs[stubcount].b = b;
1707 stubs[stubcount].c = c;
1708 stubs[stubcount].d = d;
1709 stubs[stubcount].e = e;
57871462 1710 stubcount++;
1711}
1712
b14b6a8f 1713static void add_stub_r(enum stub_type type, void *addr, void *retaddr,
1714 int i, int addr_reg, struct regstat *i_regs, int ccadj, u_int reglist)
1715{
1716 add_stub(type, addr, retaddr, i, addr_reg, (uintptr_t)i_regs, ccadj, reglist);
1717}
1718
57871462 1719// Write out a single register
1720void wb_register(signed char r,signed char regmap[],uint64_t dirty,uint64_t is32)
1721{
1722 int hr;
1723 for(hr=0;hr<HOST_REGS;hr++) {
1724 if(hr!=EXCLUDE_REG) {
1725 if((regmap[hr]&63)==r) {
1726 if((dirty>>hr)&1) {
1727 if(regmap[hr]<64) {
1728 emit_storereg(r,hr);
57871462 1729 }else{
1730 emit_storereg(r|64,hr);
1731 }
1732 }
1733 }
1734 }
1735 }
1736}
1737
57871462 1738void rlist()
1739{
1740 int i;
1741 printf("TRACE: ");
1742 for(i=0;i<32;i++)
1743 printf("r%d:%8x%8x ",i,((int *)(reg+i))[1],((int *)(reg+i))[0]);
1744 printf("\n");
57871462 1745}
1746
57871462 1747void alu_assemble(int i,struct regstat *i_regs)
1748{
1749 if(opcode2[i]>=0x20&&opcode2[i]<=0x23) { // ADD/ADDU/SUB/SUBU
1750 if(rt1[i]) {
1751 signed char s1,s2,t;
1752 t=get_reg(i_regs->regmap,rt1[i]);
1753 if(t>=0) {
1754 s1=get_reg(i_regs->regmap,rs1[i]);
1755 s2=get_reg(i_regs->regmap,rs2[i]);
1756 if(rs1[i]&&rs2[i]) {
1757 assert(s1>=0);
1758 assert(s2>=0);
1759 if(opcode2[i]&2) emit_sub(s1,s2,t);
1760 else emit_add(s1,s2,t);
1761 }
1762 else if(rs1[i]) {
1763 if(s1>=0) emit_mov(s1,t);
1764 else emit_loadreg(rs1[i],t);
1765 }
1766 else if(rs2[i]) {
1767 if(s2>=0) {
1768 if(opcode2[i]&2) emit_neg(s2,t);
1769 else emit_mov(s2,t);
1770 }
1771 else {
1772 emit_loadreg(rs2[i],t);
1773 if(opcode2[i]&2) emit_neg(t,t);
1774 }
1775 }
1776 else emit_zeroreg(t);
1777 }
1778 }
1779 }
1780 if(opcode2[i]>=0x2c&&opcode2[i]<=0x2f) { // DADD/DADDU/DSUB/DSUBU
00fa9369 1781 assert(0);
57871462 1782 }
1783 if(opcode2[i]==0x2a||opcode2[i]==0x2b) { // SLT/SLTU
1784 if(rt1[i]) {
1785 signed char s1l,s1h,s2l,s2h,t;
1786 if(!((i_regs->was32>>rs1[i])&(i_regs->was32>>rs2[i])&1))
1787 {
1788 t=get_reg(i_regs->regmap,rt1[i]);
1789 //assert(t>=0);
1790 if(t>=0) {
1791 s1l=get_reg(i_regs->regmap,rs1[i]);
1792 s1h=get_reg(i_regs->regmap,rs1[i]|64);
1793 s2l=get_reg(i_regs->regmap,rs2[i]);
1794 s2h=get_reg(i_regs->regmap,rs2[i]|64);
1795 if(rs2[i]==0) // rx<r0
1796 {
1797 assert(s1h>=0);
1798 if(opcode2[i]==0x2a) // SLT
1799 emit_shrimm(s1h,31,t);
1800 else // SLTU (unsigned can not be less than zero)
1801 emit_zeroreg(t);
1802 }
1803 else if(rs1[i]==0) // r0<rx
1804 {
1805 assert(s2h>=0);
1806 if(opcode2[i]==0x2a) // SLT
1807 emit_set_gz64_32(s2h,s2l,t);
1808 else // SLTU (set if not zero)
1809 emit_set_nz64_32(s2h,s2l,t);
1810 }
1811 else {
1812 assert(s1l>=0);assert(s1h>=0);
1813 assert(s2l>=0);assert(s2h>=0);
1814 if(opcode2[i]==0x2a) // SLT
1815 emit_set_if_less64_32(s1h,s1l,s2h,s2l,t);
1816 else // SLTU
1817 emit_set_if_carry64_32(s1h,s1l,s2h,s2l,t);
1818 }
1819 }
1820 } else {
1821 t=get_reg(i_regs->regmap,rt1[i]);
1822 //assert(t>=0);
1823 if(t>=0) {
1824 s1l=get_reg(i_regs->regmap,rs1[i]);
1825 s2l=get_reg(i_regs->regmap,rs2[i]);
1826 if(rs2[i]==0) // rx<r0
1827 {
1828 assert(s1l>=0);
1829 if(opcode2[i]==0x2a) // SLT
1830 emit_shrimm(s1l,31,t);
1831 else // SLTU (unsigned can not be less than zero)
1832 emit_zeroreg(t);
1833 }
1834 else if(rs1[i]==0) // r0<rx
1835 {
1836 assert(s2l>=0);
1837 if(opcode2[i]==0x2a) // SLT
1838 emit_set_gz32(s2l,t);
1839 else // SLTU (set if not zero)
1840 emit_set_nz32(s2l,t);
1841 }
1842 else{
1843 assert(s1l>=0);assert(s2l>=0);
1844 if(opcode2[i]==0x2a) // SLT
1845 emit_set_if_less32(s1l,s2l,t);
1846 else // SLTU
1847 emit_set_if_carry32(s1l,s2l,t);
1848 }
1849 }
1850 }
1851 }
1852 }
1853 if(opcode2[i]>=0x24&&opcode2[i]<=0x27) { // AND/OR/XOR/NOR
1854 if(rt1[i]) {
1855 signed char s1l,s1h,s2l,s2h,th,tl;
1856 tl=get_reg(i_regs->regmap,rt1[i]);
1857 th=get_reg(i_regs->regmap,rt1[i]|64);
1858 if(!((i_regs->was32>>rs1[i])&(i_regs->was32>>rs2[i])&1)&&th>=0)
1859 {
1860 assert(tl>=0);
1861 if(tl>=0) {
1862 s1l=get_reg(i_regs->regmap,rs1[i]);
1863 s1h=get_reg(i_regs->regmap,rs1[i]|64);
1864 s2l=get_reg(i_regs->regmap,rs2[i]);
1865 s2h=get_reg(i_regs->regmap,rs2[i]|64);
1866 if(rs1[i]&&rs2[i]) {
1867 assert(s1l>=0);assert(s1h>=0);
1868 assert(s2l>=0);assert(s2h>=0);
1869 if(opcode2[i]==0x24) { // AND
1870 emit_and(s1l,s2l,tl);
1871 emit_and(s1h,s2h,th);
1872 } else
1873 if(opcode2[i]==0x25) { // OR
1874 emit_or(s1l,s2l,tl);
1875 emit_or(s1h,s2h,th);
1876 } else
1877 if(opcode2[i]==0x26) { // XOR
1878 emit_xor(s1l,s2l,tl);
1879 emit_xor(s1h,s2h,th);
1880 } else
1881 if(opcode2[i]==0x27) { // NOR
1882 emit_or(s1l,s2l,tl);
1883 emit_or(s1h,s2h,th);
1884 emit_not(tl,tl);
1885 emit_not(th,th);
1886 }
1887 }
1888 else
1889 {
1890 if(opcode2[i]==0x24) { // AND
1891 emit_zeroreg(tl);
1892 emit_zeroreg(th);
1893 } else
1894 if(opcode2[i]==0x25||opcode2[i]==0x26) { // OR/XOR
1895 if(rs1[i]){
1896 if(s1l>=0) emit_mov(s1l,tl);
1897 else emit_loadreg(rs1[i],tl);
1898 if(s1h>=0) emit_mov(s1h,th);
1899 else emit_loadreg(rs1[i]|64,th);
1900 }
1901 else
1902 if(rs2[i]){
1903 if(s2l>=0) emit_mov(s2l,tl);
1904 else emit_loadreg(rs2[i],tl);
1905 if(s2h>=0) emit_mov(s2h,th);
1906 else emit_loadreg(rs2[i]|64,th);
1907 }
1908 else{
1909 emit_zeroreg(tl);
1910 emit_zeroreg(th);
1911 }
1912 } else
1913 if(opcode2[i]==0x27) { // NOR
1914 if(rs1[i]){
1915 if(s1l>=0) emit_not(s1l,tl);
1916 else{
1917 emit_loadreg(rs1[i],tl);
1918 emit_not(tl,tl);
1919 }
1920 if(s1h>=0) emit_not(s1h,th);
1921 else{
1922 emit_loadreg(rs1[i]|64,th);
1923 emit_not(th,th);
1924 }
1925 }
1926 else
1927 if(rs2[i]){
1928 if(s2l>=0) emit_not(s2l,tl);
1929 else{
1930 emit_loadreg(rs2[i],tl);
1931 emit_not(tl,tl);
1932 }
1933 if(s2h>=0) emit_not(s2h,th);
1934 else{
1935 emit_loadreg(rs2[i]|64,th);
1936 emit_not(th,th);
1937 }
1938 }
1939 else {
1940 emit_movimm(-1,tl);
1941 emit_movimm(-1,th);
1942 }
1943 }
1944 }
1945 }
1946 }
1947 else
1948 {
1949 // 32 bit
1950 if(tl>=0) {
1951 s1l=get_reg(i_regs->regmap,rs1[i]);
1952 s2l=get_reg(i_regs->regmap,rs2[i]);
1953 if(rs1[i]&&rs2[i]) {
1954 assert(s1l>=0);
1955 assert(s2l>=0);
1956 if(opcode2[i]==0x24) { // AND
1957 emit_and(s1l,s2l,tl);
1958 } else
1959 if(opcode2[i]==0x25) { // OR
1960 emit_or(s1l,s2l,tl);
1961 } else
1962 if(opcode2[i]==0x26) { // XOR
1963 emit_xor(s1l,s2l,tl);
1964 } else
1965 if(opcode2[i]==0x27) { // NOR
1966 emit_or(s1l,s2l,tl);
1967 emit_not(tl,tl);
1968 }
1969 }
1970 else
1971 {
1972 if(opcode2[i]==0x24) { // AND
1973 emit_zeroreg(tl);
1974 } else
1975 if(opcode2[i]==0x25||opcode2[i]==0x26) { // OR/XOR
1976 if(rs1[i]){
1977 if(s1l>=0) emit_mov(s1l,tl);
1978 else emit_loadreg(rs1[i],tl); // CHECK: regmap_entry?
1979 }
1980 else
1981 if(rs2[i]){
1982 if(s2l>=0) emit_mov(s2l,tl);
1983 else emit_loadreg(rs2[i],tl); // CHECK: regmap_entry?
1984 }
1985 else emit_zeroreg(tl);
1986 } else
1987 if(opcode2[i]==0x27) { // NOR
1988 if(rs1[i]){
1989 if(s1l>=0) emit_not(s1l,tl);
1990 else {
1991 emit_loadreg(rs1[i],tl);
1992 emit_not(tl,tl);
1993 }
1994 }
1995 else
1996 if(rs2[i]){
1997 if(s2l>=0) emit_not(s2l,tl);
1998 else {
1999 emit_loadreg(rs2[i],tl);
2000 emit_not(tl,tl);
2001 }
2002 }
2003 else emit_movimm(-1,tl);
2004 }
2005 }
2006 }
2007 }
2008 }
2009 }
2010}
2011
2012void imm16_assemble(int i,struct regstat *i_regs)
2013{
2014 if (opcode[i]==0x0f) { // LUI
2015 if(rt1[i]) {
2016 signed char t;
2017 t=get_reg(i_regs->regmap,rt1[i]);
2018 //assert(t>=0);
2019 if(t>=0) {
2020 if(!((i_regs->isconst>>t)&1))
2021 emit_movimm(imm[i]<<16,t);
2022 }
2023 }
2024 }
2025 if(opcode[i]==0x08||opcode[i]==0x09) { // ADDI/ADDIU
2026 if(rt1[i]) {
2027 signed char s,t;
2028 t=get_reg(i_regs->regmap,rt1[i]);
2029 s=get_reg(i_regs->regmap,rs1[i]);
2030 if(rs1[i]) {
2031 //assert(t>=0);
2032 //assert(s>=0);
2033 if(t>=0) {
2034 if(!((i_regs->isconst>>t)&1)) {
2035 if(s<0) {
2036 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2037 emit_addimm(t,imm[i],t);
2038 }else{
2039 if(!((i_regs->wasconst>>s)&1))
2040 emit_addimm(s,imm[i],t);
2041 else
2042 emit_movimm(constmap[i][s]+imm[i],t);
2043 }
2044 }
2045 }
2046 } else {
2047 if(t>=0) {
2048 if(!((i_regs->isconst>>t)&1))
2049 emit_movimm(imm[i],t);
2050 }
2051 }
2052 }
2053 }
2054 if(opcode[i]==0x18||opcode[i]==0x19) { // DADDI/DADDIU
2055 if(rt1[i]) {
2056 signed char sh,sl,th,tl;
2057 th=get_reg(i_regs->regmap,rt1[i]|64);
2058 tl=get_reg(i_regs->regmap,rt1[i]);
2059 sh=get_reg(i_regs->regmap,rs1[i]|64);
2060 sl=get_reg(i_regs->regmap,rs1[i]);
2061 if(tl>=0) {
2062 if(rs1[i]) {
2063 assert(sh>=0);
2064 assert(sl>=0);
2065 if(th>=0) {
2066 emit_addimm64_32(sh,sl,imm[i],th,tl);
2067 }
2068 else {
2069 emit_addimm(sl,imm[i],tl);
2070 }
2071 } else {
2072 emit_movimm(imm[i],tl);
2073 if(th>=0) emit_movimm(((signed int)imm[i])>>31,th);
2074 }
2075 }
2076 }
2077 }
2078 else if(opcode[i]==0x0a||opcode[i]==0x0b) { // SLTI/SLTIU
2079 if(rt1[i]) {
2080 //assert(rs1[i]!=0); // r0 might be valid, but it's probably a bug
2081 signed char sh,sl,t;
2082 t=get_reg(i_regs->regmap,rt1[i]);
2083 sh=get_reg(i_regs->regmap,rs1[i]|64);
2084 sl=get_reg(i_regs->regmap,rs1[i]);
2085 //assert(t>=0);
2086 if(t>=0) {
2087 if(rs1[i]>0) {
2088 if(sh<0) assert((i_regs->was32>>rs1[i])&1);
2089 if(sh<0||((i_regs->was32>>rs1[i])&1)) {
2090 if(opcode[i]==0x0a) { // SLTI
2091 if(sl<0) {
2092 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2093 emit_slti32(t,imm[i],t);
2094 }else{
2095 emit_slti32(sl,imm[i],t);
2096 }
2097 }
2098 else { // SLTIU
2099 if(sl<0) {
2100 if(i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2101 emit_sltiu32(t,imm[i],t);
2102 }else{
2103 emit_sltiu32(sl,imm[i],t);
2104 }
2105 }
2106 }else{ // 64-bit
2107 assert(sl>=0);
2108 if(opcode[i]==0x0a) // SLTI
2109 emit_slti64_32(sh,sl,imm[i],t);
2110 else // SLTIU
2111 emit_sltiu64_32(sh,sl,imm[i],t);
2112 }
2113 }else{
2114 // SLTI(U) with r0 is just stupid,
2115 // nonetheless examples can be found
2116 if(opcode[i]==0x0a) // SLTI
2117 if(0<imm[i]) emit_movimm(1,t);
2118 else emit_zeroreg(t);
2119 else // SLTIU
2120 {
2121 if(imm[i]) emit_movimm(1,t);
2122 else emit_zeroreg(t);
2123 }
2124 }
2125 }
2126 }
2127 }
2128 else if(opcode[i]>=0x0c&&opcode[i]<=0x0e) { // ANDI/ORI/XORI
2129 if(rt1[i]) {
2130 signed char sh,sl,th,tl;
2131 th=get_reg(i_regs->regmap,rt1[i]|64);
2132 tl=get_reg(i_regs->regmap,rt1[i]);
2133 sh=get_reg(i_regs->regmap,rs1[i]|64);
2134 sl=get_reg(i_regs->regmap,rs1[i]);
2135 if(tl>=0 && !((i_regs->isconst>>tl)&1)) {
2136 if(opcode[i]==0x0c) //ANDI
2137 {
2138 if(rs1[i]) {
2139 if(sl<0) {
2140 if(i_regs->regmap_entry[tl]!=rs1[i]) emit_loadreg(rs1[i],tl);
2141 emit_andimm(tl,imm[i],tl);
2142 }else{
2143 if(!((i_regs->wasconst>>sl)&1))
2144 emit_andimm(sl,imm[i],tl);
2145 else
2146 emit_movimm(constmap[i][sl]&imm[i],tl);
2147 }
2148 }
2149 else
2150 emit_zeroreg(tl);
2151 if(th>=0) emit_zeroreg(th);
2152 }
2153 else
2154 {
2155 if(rs1[i]) {
2156 if(sl<0) {
2157 if(i_regs->regmap_entry[tl]!=rs1[i]) emit_loadreg(rs1[i],tl);
2158 }
2159 if(th>=0) {
2160 if(sh<0) {
2161 emit_loadreg(rs1[i]|64,th);
2162 }else{
2163 emit_mov(sh,th);
2164 }
2165 }
581335b0 2166 if(opcode[i]==0x0d) { // ORI
2167 if(sl<0) {
2168 emit_orimm(tl,imm[i],tl);
2169 }else{
2170 if(!((i_regs->wasconst>>sl)&1))
2171 emit_orimm(sl,imm[i],tl);
2172 else
2173 emit_movimm(constmap[i][sl]|imm[i],tl);
2174 }
57871462 2175 }
581335b0 2176 if(opcode[i]==0x0e) { // XORI
2177 if(sl<0) {
2178 emit_xorimm(tl,imm[i],tl);
2179 }else{
2180 if(!((i_regs->wasconst>>sl)&1))
2181 emit_xorimm(sl,imm[i],tl);
2182 else
2183 emit_movimm(constmap[i][sl]^imm[i],tl);
2184 }
57871462 2185 }
2186 }
2187 else {
2188 emit_movimm(imm[i],tl);
2189 if(th>=0) emit_zeroreg(th);
2190 }
2191 }
2192 }
2193 }
2194 }
2195}
2196
2197void shiftimm_assemble(int i,struct regstat *i_regs)
2198{
2199 if(opcode2[i]<=0x3) // SLL/SRL/SRA
2200 {
2201 if(rt1[i]) {
2202 signed char s,t;
2203 t=get_reg(i_regs->regmap,rt1[i]);
2204 s=get_reg(i_regs->regmap,rs1[i]);
2205 //assert(t>=0);
dc49e339 2206 if(t>=0&&!((i_regs->isconst>>t)&1)){
57871462 2207 if(rs1[i]==0)
2208 {
2209 emit_zeroreg(t);
2210 }
2211 else
2212 {
2213 if(s<0&&i_regs->regmap_entry[t]!=rs1[i]) emit_loadreg(rs1[i],t);
2214 if(imm[i]) {
2215 if(opcode2[i]==0) // SLL
2216 {
2217 emit_shlimm(s<0?t:s,imm[i],t);
2218 }
2219 if(opcode2[i]==2) // SRL
2220 {
2221 emit_shrimm(s<0?t:s,imm[i],t);
2222 }
2223 if(opcode2[i]==3) // SRA
2224 {
2225 emit_sarimm(s<0?t:s,imm[i],t);
2226 }
2227 }else{
2228 // Shift by zero
2229 if(s>=0 && s!=t) emit_mov(s,t);
2230 }
2231 }
2232 }
2233 //emit_storereg(rt1[i],t); //DEBUG
2234 }
2235 }
2236 if(opcode2[i]>=0x38&&opcode2[i]<=0x3b) // DSLL/DSRL/DSRA
2237 {
9c45ca93 2238 assert(0);
57871462 2239 }
2240 if(opcode2[i]==0x3c) // DSLL32
2241 {
9c45ca93 2242 assert(0);
57871462 2243 }
2244 if(opcode2[i]==0x3e) // DSRL32
2245 {
9c45ca93 2246 assert(0);
57871462 2247 }
2248 if(opcode2[i]==0x3f) // DSRA32
2249 {
9c45ca93 2250 assert(0);
57871462 2251 }
2252}
2253
2254#ifndef shift_assemble
2255void shift_assemble(int i,struct regstat *i_regs)
2256{
2257 printf("Need shift_assemble for this architecture.\n");
2258 exit(1);
2259}
2260#endif
2261
2262void load_assemble(int i,struct regstat *i_regs)
2263{
9c45ca93 2264 int s,th,tl,addr;
57871462 2265 int offset;
b14b6a8f 2266 void *jaddr=0;
5bf843dc 2267 int memtarget=0,c=0;
b1570849 2268 int fastload_reg_override=0;
57871462 2269 u_int hr,reglist=0;
2270 th=get_reg(i_regs->regmap,rt1[i]|64);
2271 tl=get_reg(i_regs->regmap,rt1[i]);
2272 s=get_reg(i_regs->regmap,rs1[i]);
2273 offset=imm[i];
2274 for(hr=0;hr<HOST_REGS;hr++) {
2275 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2276 }
2277 if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
2278 if(s>=0) {
2279 c=(i_regs->wasconst>>s)&1;
af4ee1fe 2280 if (c) {
2281 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
af4ee1fe 2282 }
57871462 2283 }
57871462 2284 //printf("load_assemble: c=%d\n",c);
643aeae3 2285 //if(c) printf("load_assemble: const=%lx\n",(long)constmap[i][s]+offset);
57871462 2286 // FIXME: Even if the load is a NOP, we should check for pagefaults...
581335b0 2287 if((tl<0&&(!c||(((u_int)constmap[i][s]+offset)>>16)==0x1f80))
f18c0f46 2288 ||rt1[i]==0) {
5bf843dc 2289 // could be FIFO, must perform the read
f18c0f46 2290 // ||dummy read
5bf843dc 2291 assem_debug("(forced read)\n");
2292 tl=get_reg(i_regs->regmap,-1);
2293 assert(tl>=0);
5bf843dc 2294 }
2295 if(offset||s<0||c) addr=tl;
2296 else addr=s;
535d208a 2297 //if(tl<0) tl=get_reg(i_regs->regmap,-1);
2298 if(tl>=0) {
2299 //printf("load_assemble: c=%d\n",c);
643aeae3 2300 //if(c) printf("load_assemble: const=%lx\n",(long)constmap[i][s]+offset);
535d208a 2301 assert(tl>=0); // Even if the load is a NOP, we must check for pagefaults and I/O
2302 reglist&=~(1<<tl);
2303 if(th>=0) reglist&=~(1<<th);
1edfcc68 2304 if(!c) {
1edfcc68 2305 #ifdef R29_HACK
2306 // Strmnnrmn's speed hack
2307 if(rs1[i]!=29||start<0x80001000||start>=0x80000000+RAM_SIZE)
2308 #endif
2309 {
2310 jaddr=emit_fastpath_cmp_jump(i,addr,&fastload_reg_override);
535d208a 2311 }
1edfcc68 2312 }
2313 else if(ram_offset&&memtarget) {
2314 emit_addimm(addr,ram_offset,HOST_TEMPREG);
2315 fastload_reg_override=HOST_TEMPREG;
535d208a 2316 }
2317 int dummy=(rt1[i]==0)||(tl!=get_reg(i_regs->regmap,rt1[i])); // ignore loads to r0 and unneeded reg
2318 if (opcode[i]==0x20) { // LB
2319 if(!c||memtarget) {
2320 if(!dummy) {
57871462 2321 {
535d208a 2322 int x=0,a=tl;
535d208a 2323 if(!c) a=addr;
b1570849 2324 if(fastload_reg_override) a=fastload_reg_override;
2325
9c45ca93 2326 emit_movsbl_indexed(x,a,tl);
57871462 2327 }
57871462 2328 }
535d208a 2329 if(jaddr)
b14b6a8f 2330 add_stub_r(LOADB_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
57871462 2331 }
535d208a 2332 else
2333 inline_readstub(LOADB_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2334 }
2335 if (opcode[i]==0x21) { // LH
2336 if(!c||memtarget) {
2337 if(!dummy) {
9c45ca93 2338 int x=0,a=tl;
2339 if(!c) a=addr;
2340 if(fastload_reg_override) a=fastload_reg_override;
2341 emit_movswl_indexed(x,a,tl);
57871462 2342 }
535d208a 2343 if(jaddr)
b14b6a8f 2344 add_stub_r(LOADH_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
57871462 2345 }
535d208a 2346 else
2347 inline_readstub(LOADH_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2348 }
2349 if (opcode[i]==0x23) { // LW
2350 if(!c||memtarget) {
2351 if(!dummy) {
dadf55f2 2352 int a=addr;
b1570849 2353 if(fastload_reg_override) a=fastload_reg_override;
9c45ca93 2354 emit_readword_indexed(0,a,tl);
57871462 2355 }
535d208a 2356 if(jaddr)
b14b6a8f 2357 add_stub_r(LOADW_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
57871462 2358 }
535d208a 2359 else
2360 inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2361 }
2362 if (opcode[i]==0x24) { // LBU
2363 if(!c||memtarget) {
2364 if(!dummy) {
9c45ca93 2365 int x=0,a=tl;
2366 if(!c) a=addr;
2367 if(fastload_reg_override) a=fastload_reg_override;
b1570849 2368
9c45ca93 2369 emit_movzbl_indexed(x,a,tl);
57871462 2370 }
535d208a 2371 if(jaddr)
b14b6a8f 2372 add_stub_r(LOADBU_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
57871462 2373 }
535d208a 2374 else
2375 inline_readstub(LOADBU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2376 }
2377 if (opcode[i]==0x25) { // LHU
2378 if(!c||memtarget) {
2379 if(!dummy) {
9c45ca93 2380 int x=0,a=tl;
2381 if(!c) a=addr;
2382 if(fastload_reg_override) a=fastload_reg_override;
2383 emit_movzwl_indexed(x,a,tl);
57871462 2384 }
535d208a 2385 if(jaddr)
b14b6a8f 2386 add_stub_r(LOADHU_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
57871462 2387 }
535d208a 2388 else
2389 inline_readstub(LOADHU_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
2390 }
2391 if (opcode[i]==0x27) { // LWU
2392 assert(th>=0);
2393 if(!c||memtarget) {
2394 if(!dummy) {
dadf55f2 2395 int a=addr;
b1570849 2396 if(fastload_reg_override) a=fastload_reg_override;
9c45ca93 2397 emit_readword_indexed(0,a,tl);
57871462 2398 }
535d208a 2399 if(jaddr)
b14b6a8f 2400 add_stub_r(LOADW_STUB,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
535d208a 2401 }
2402 else {
2403 inline_readstub(LOADW_STUB,i,constmap[i][s]+offset,i_regs->regmap,rt1[i],ccadj[i],reglist);
57871462 2404 }
535d208a 2405 emit_zeroreg(th);
2406 }
2407 if (opcode[i]==0x37) { // LD
9c45ca93 2408 assert(0);
57871462 2409 }
535d208a 2410 }
57871462 2411}
2412
2413#ifndef loadlr_assemble
2414void loadlr_assemble(int i,struct regstat *i_regs)
2415{
2416 printf("Need loadlr_assemble for this architecture.\n");
2417 exit(1);
2418}
2419#endif
2420
2421void store_assemble(int i,struct regstat *i_regs)
2422{
9c45ca93 2423 int s,tl;
57871462 2424 int addr,temp;
2425 int offset;
b14b6a8f 2426 void *jaddr=0;
2427 enum stub_type type;
666a299d 2428 int memtarget=0,c=0;
57871462 2429 int agr=AGEN1+(i&1);
b1570849 2430 int faststore_reg_override=0;
57871462 2431 u_int hr,reglist=0;
57871462 2432 tl=get_reg(i_regs->regmap,rs2[i]);
2433 s=get_reg(i_regs->regmap,rs1[i]);
2434 temp=get_reg(i_regs->regmap,agr);
2435 if(temp<0) temp=get_reg(i_regs->regmap,-1);
2436 offset=imm[i];
2437 if(s>=0) {
2438 c=(i_regs->wasconst>>s)&1;
af4ee1fe 2439 if(c) {
2440 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
af4ee1fe 2441 }
57871462 2442 }
2443 assert(tl>=0);
2444 assert(temp>=0);
2445 for(hr=0;hr<HOST_REGS;hr++) {
2446 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2447 }
2448 if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<<HOST_CCREG);
2449 if(offset||s<0||c) addr=temp;
2450 else addr=s;
1edfcc68 2451 if(!c) {
2452 jaddr=emit_fastpath_cmp_jump(i,addr,&faststore_reg_override);
2453 }
2454 else if(ram_offset&&memtarget) {
2455 emit_addimm(addr,ram_offset,HOST_TEMPREG);
2456 faststore_reg_override=HOST_TEMPREG;
57871462 2457 }
2458
2459 if (opcode[i]==0x28) { // SB
2460 if(!c||memtarget) {
97a238a6 2461 int x=0,a=temp;
97a238a6 2462 if(!c) a=addr;
b1570849 2463 if(faststore_reg_override) a=faststore_reg_override;
9c45ca93 2464 emit_writebyte_indexed(tl,x,a);
57871462 2465 }
2466 type=STOREB_STUB;
2467 }
2468 if (opcode[i]==0x29) { // SH
2469 if(!c||memtarget) {
97a238a6 2470 int x=0,a=temp;
97a238a6 2471 if(!c) a=addr;
b1570849 2472 if(faststore_reg_override) a=faststore_reg_override;
9c45ca93 2473 emit_writehword_indexed(tl,x,a);
57871462 2474 }
2475 type=STOREH_STUB;
2476 }
2477 if (opcode[i]==0x2B) { // SW
dadf55f2 2478 if(!c||memtarget) {
2479 int a=addr;
b1570849 2480 if(faststore_reg_override) a=faststore_reg_override;
9c45ca93 2481 emit_writeword_indexed(tl,0,a);
dadf55f2 2482 }
57871462 2483 type=STOREW_STUB;
2484 }
2485 if (opcode[i]==0x3F) { // SD
9c45ca93 2486 assert(0);
57871462 2487 type=STORED_STUB;
2488 }
b96d3df7 2489 if(jaddr) {
2490 // PCSX store handlers don't check invcode again
2491 reglist|=1<<addr;
b14b6a8f 2492 add_stub_r(type,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
b96d3df7 2493 jaddr=0;
2494 }
1edfcc68 2495 if(!(i_regs->waswritten&(1<<rs1[i]))&&!(new_dynarec_hacks&NDHACK_NO_SMC_CHECK)) {
57871462 2496 if(!c||memtarget) {
2497 #ifdef DESTRUCTIVE_SHIFT
2498 // The x86 shift operation is 'destructive'; it overwrites the
2499 // source register, so we need to make a copy first and use that.
2500 addr=temp;
2501 #endif
2502 #if defined(HOST_IMM8)
2503 int ir=get_reg(i_regs->regmap,INVCP);
2504 assert(ir>=0);
2505 emit_cmpmem_indexedsr12_reg(ir,addr,1);
2506 #else
643aeae3 2507 emit_cmpmem_indexedsr12_imm(invalid_code,addr,1);
57871462 2508 #endif
0bbd1454 2509 #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT)
2510 emit_callne(invalidate_addr_reg[addr]);
2511 #else
b14b6a8f 2512 void *jaddr2 = out;
57871462 2513 emit_jne(0);
b14b6a8f 2514 add_stub(INVCODE_STUB,jaddr2,out,reglist|(1<<HOST_CCREG),addr,0,0,0);
0bbd1454 2515 #endif
57871462 2516 }
2517 }
7a518516 2518 u_int addr_val=constmap[i][s]+offset;
3eaa7048 2519 if(jaddr) {
b14b6a8f 2520 add_stub_r(type,jaddr,out,i,addr,i_regs,ccadj[i],reglist);
3eaa7048 2521 } else if(c&&!memtarget) {
7a518516 2522 inline_writestub(type,i,addr_val,i_regs->regmap,rs2[i],ccadj[i],reglist);
2523 }
2524 // basic current block modification detection..
2525 // not looking back as that should be in mips cache already
2526 if(c&&start+i*4<addr_val&&addr_val<start+slen*4) {
c43b5311 2527 SysPrintf("write to %08x hits block %08x, pc=%08x\n",addr_val,start,start+i*4);
7a518516 2528 assert(i_regs->regmap==regs[i].regmap); // not delay slot
2529 if(i_regs->regmap==regs[i].regmap) {
2530 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2531 wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
2532 emit_movimm(start+i*4+4,0);
643aeae3 2533 emit_writeword(0,&pcaddr);
b14b6a8f 2534 emit_jmp(do_interrupt);
7a518516 2535 }
3eaa7048 2536 }
57871462 2537}
2538
2539void storelr_assemble(int i,struct regstat *i_regs)
2540{
9c45ca93 2541 int s,tl;
57871462 2542 int temp;
57871462 2543 int offset;
b14b6a8f 2544 void *jaddr=0;
df4dc2b1 2545 void *case1, *case2, *case3;
2546 void *done0, *done1, *done2;
af4ee1fe 2547 int memtarget=0,c=0;
fab5d06d 2548 int agr=AGEN1+(i&1);
57871462 2549 u_int hr,reglist=0;
57871462 2550 tl=get_reg(i_regs->regmap,rs2[i]);
2551 s=get_reg(i_regs->regmap,rs1[i]);
fab5d06d 2552 temp=get_reg(i_regs->regmap,agr);
2553 if(temp<0) temp=get_reg(i_regs->regmap,-1);
57871462 2554 offset=imm[i];
2555 if(s>=0) {
2556 c=(i_regs->isconst>>s)&1;
af4ee1fe 2557 if(c) {
2558 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
af4ee1fe 2559 }
57871462 2560 }
2561 assert(tl>=0);
2562 for(hr=0;hr<HOST_REGS;hr++) {
2563 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2564 }
535d208a 2565 assert(temp>=0);
1edfcc68 2566 if(!c) {
2567 emit_cmpimm(s<0||offset?temp:s,RAM_SIZE);
2568 if(!offset&&s!=temp) emit_mov(s,temp);
b14b6a8f 2569 jaddr=out;
1edfcc68 2570 emit_jno(0);
2571 }
2572 else
2573 {
2574 if(!memtarget||!rs1[i]) {
b14b6a8f 2575 jaddr=out;
535d208a 2576 emit_jmp(0);
57871462 2577 }
535d208a 2578 }
9c45ca93 2579 emit_addimm_no_flags(ram_offset,temp);
535d208a 2580
2581 if (opcode[i]==0x2C||opcode[i]==0x2D) { // SDL/SDR
9c45ca93 2582 assert(0);
535d208a 2583 }
57871462 2584
9c45ca93 2585 emit_xorimm(temp,3,temp);
535d208a 2586 emit_testimm(temp,2);
df4dc2b1 2587 case2=out;
535d208a 2588 emit_jne(0);
2589 emit_testimm(temp,1);
df4dc2b1 2590 case1=out;
535d208a 2591 emit_jne(0);
2592 // 0
2593 if (opcode[i]==0x2A) { // SWL
2594 emit_writeword_indexed(tl,0,temp);
2595 }
2596 if (opcode[i]==0x2E) { // SWR
2597 emit_writebyte_indexed(tl,3,temp);
2598 }
2599 if (opcode[i]==0x2C) { // SDL
9c45ca93 2600 assert(0);
535d208a 2601 }
2602 if (opcode[i]==0x2D) { // SDR
9c45ca93 2603 assert(0);
535d208a 2604 }
df4dc2b1 2605 done0=out;
535d208a 2606 emit_jmp(0);
2607 // 1
df4dc2b1 2608 set_jump_target(case1, out);
535d208a 2609 if (opcode[i]==0x2A) { // SWL
2610 // Write 3 msb into three least significant bytes
2611 if(rs2[i]) emit_rorimm(tl,8,tl);
2612 emit_writehword_indexed(tl,-1,temp);
2613 if(rs2[i]) emit_rorimm(tl,16,tl);
2614 emit_writebyte_indexed(tl,1,temp);
2615 if(rs2[i]) emit_rorimm(tl,8,tl);
2616 }
2617 if (opcode[i]==0x2E) { // SWR
2618 // Write two lsb into two most significant bytes
2619 emit_writehword_indexed(tl,1,temp);
2620 }
2621 if (opcode[i]==0x2C) { // SDL
9c45ca93 2622 assert(0);
535d208a 2623 }
2624 if (opcode[i]==0x2D) { // SDR
9c45ca93 2625 assert(0);
535d208a 2626 }
df4dc2b1 2627 done1=out;
535d208a 2628 emit_jmp(0);
2629 // 2
df4dc2b1 2630 set_jump_target(case2, out);
535d208a 2631 emit_testimm(temp,1);
df4dc2b1 2632 case3=out;
535d208a 2633 emit_jne(0);
2634 if (opcode[i]==0x2A) { // SWL
2635 // Write two msb into two least significant bytes
2636 if(rs2[i]) emit_rorimm(tl,16,tl);
2637 emit_writehword_indexed(tl,-2,temp);
2638 if(rs2[i]) emit_rorimm(tl,16,tl);
2639 }
2640 if (opcode[i]==0x2E) { // SWR
2641 // Write 3 lsb into three most significant bytes
2642 emit_writebyte_indexed(tl,-1,temp);
2643 if(rs2[i]) emit_rorimm(tl,8,tl);
2644 emit_writehword_indexed(tl,0,temp);
2645 if(rs2[i]) emit_rorimm(tl,24,tl);
2646 }
2647 if (opcode[i]==0x2C) { // SDL
9c45ca93 2648 assert(0);
535d208a 2649 }
2650 if (opcode[i]==0x2D) { // SDR
9c45ca93 2651 assert(0);
535d208a 2652 }
df4dc2b1 2653 done2=out;
535d208a 2654 emit_jmp(0);
2655 // 3
df4dc2b1 2656 set_jump_target(case3, out);
535d208a 2657 if (opcode[i]==0x2A) { // SWL
2658 // Write msb into least significant byte
2659 if(rs2[i]) emit_rorimm(tl,24,tl);
2660 emit_writebyte_indexed(tl,-3,temp);
2661 if(rs2[i]) emit_rorimm(tl,8,tl);
2662 }
2663 if (opcode[i]==0x2E) { // SWR
2664 // Write entire word
2665 emit_writeword_indexed(tl,-3,temp);
2666 }
2667 if (opcode[i]==0x2C) { // SDL
9c45ca93 2668 assert(0);
535d208a 2669 }
2670 if (opcode[i]==0x2D) { // SDR
9c45ca93 2671 assert(0);
535d208a 2672 }
df4dc2b1 2673 set_jump_target(done0, out);
2674 set_jump_target(done1, out);
2675 set_jump_target(done2, out);
535d208a 2676 if (opcode[i]==0x2C) { // SDL
9c45ca93 2677 assert(0);
535d208a 2678 }
2679 if (opcode[i]==0x2D) { // SDR
9c45ca93 2680 assert(0);
57871462 2681 }
535d208a 2682 if(!c||!memtarget)
b14b6a8f 2683 add_stub_r(STORELR_STUB,jaddr,out,i,temp,i_regs,ccadj[i],reglist);
1edfcc68 2684 if(!(i_regs->waswritten&(1<<rs1[i]))&&!(new_dynarec_hacks&NDHACK_NO_SMC_CHECK)) {
9c45ca93 2685 emit_addimm_no_flags(-ram_offset,temp);
57871462 2686 #if defined(HOST_IMM8)
2687 int ir=get_reg(i_regs->regmap,INVCP);
2688 assert(ir>=0);
2689 emit_cmpmem_indexedsr12_reg(ir,temp,1);
2690 #else
643aeae3 2691 emit_cmpmem_indexedsr12_imm(invalid_code,temp,1);
57871462 2692 #endif
535d208a 2693 #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT)
2694 emit_callne(invalidate_addr_reg[temp]);
2695 #else
b14b6a8f 2696 void *jaddr2 = out;
57871462 2697 emit_jne(0);
b14b6a8f 2698 add_stub(INVCODE_STUB,jaddr2,out,reglist|(1<<HOST_CCREG),temp,0,0,0);
535d208a 2699 #endif
57871462 2700 }
57871462 2701}
2702
2703void c1ls_assemble(int i,struct regstat *i_regs)
2704{
3d624f89 2705 cop1_unusable(i, i_regs);
57871462 2706}
2707
b9b61529 2708void c2ls_assemble(int i,struct regstat *i_regs)
2709{
2710 int s,tl;
2711 int ar;
2712 int offset;
1fd1aceb 2713 int memtarget=0,c=0;
b14b6a8f 2714 void *jaddr2=NULL;
2715 enum stub_type type;
b9b61529 2716 int agr=AGEN1+(i&1);
ffb0b9e0 2717 int fastio_reg_override=0;
b9b61529 2718 u_int hr,reglist=0;
2719 u_int copr=(source[i]>>16)&0x1f;
2720 s=get_reg(i_regs->regmap,rs1[i]);
2721 tl=get_reg(i_regs->regmap,FTEMP);
2722 offset=imm[i];
2723 assert(rs1[i]>0);
2724 assert(tl>=0);
b9b61529 2725
2726 for(hr=0;hr<HOST_REGS;hr++) {
2727 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2728 }
2729 if(i_regs->regmap[HOST_CCREG]==CCREG)
2730 reglist&=~(1<<HOST_CCREG);
2731
2732 // get the address
2733 if (opcode[i]==0x3a) { // SWC2
2734 ar=get_reg(i_regs->regmap,agr);
2735 if(ar<0) ar=get_reg(i_regs->regmap,-1);
2736 reglist|=1<<ar;
2737 } else { // LWC2
2738 ar=tl;
2739 }
1fd1aceb 2740 if(s>=0) c=(i_regs->wasconst>>s)&1;
2741 memtarget=c&&(((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE);
b9b61529 2742 if (!offset&&!c&&s>=0) ar=s;
2743 assert(ar>=0);
2744
2745 if (opcode[i]==0x3a) { // SWC2
2746 cop2_get_dreg(copr,tl,HOST_TEMPREG);
1fd1aceb 2747 type=STOREW_STUB;
b9b61529 2748 }
1fd1aceb 2749 else
b9b61529 2750 type=LOADW_STUB;
1fd1aceb 2751
2752 if(c&&!memtarget) {
b14b6a8f 2753 jaddr2=out;
1fd1aceb 2754 emit_jmp(0); // inline_readstub/inline_writestub?
b9b61529 2755 }
1fd1aceb 2756 else {
2757 if(!c) {
ffb0b9e0 2758 jaddr2=emit_fastpath_cmp_jump(i,ar,&fastio_reg_override);
1fd1aceb 2759 }
a327ad27 2760 else if(ram_offset&&memtarget) {
2761 emit_addimm(ar,ram_offset,HOST_TEMPREG);
2762 fastio_reg_override=HOST_TEMPREG;
2763 }
1fd1aceb 2764 if (opcode[i]==0x32) { // LWC2
ffb0b9e0 2765 int a=ar;
2766 if(fastio_reg_override) a=fastio_reg_override;
2767 emit_readword_indexed(0,a,tl);
1fd1aceb 2768 }
2769 if (opcode[i]==0x3a) { // SWC2
2770 #ifdef DESTRUCTIVE_SHIFT
2771 if(!offset&&!c&&s>=0) emit_mov(s,ar);
2772 #endif
ffb0b9e0 2773 int a=ar;
2774 if(fastio_reg_override) a=fastio_reg_override;
2775 emit_writeword_indexed(tl,0,a);
1fd1aceb 2776 }
b9b61529 2777 }
2778 if(jaddr2)
b14b6a8f 2779 add_stub_r(type,jaddr2,out,i,ar,i_regs,ccadj[i],reglist);
0ff8c62c 2780 if(opcode[i]==0x3a) // SWC2
2781 if(!(i_regs->waswritten&(1<<rs1[i]))&&!(new_dynarec_hacks&NDHACK_NO_SMC_CHECK)) {
b9b61529 2782#if defined(HOST_IMM8)
2783 int ir=get_reg(i_regs->regmap,INVCP);
2784 assert(ir>=0);
2785 emit_cmpmem_indexedsr12_reg(ir,ar,1);
2786#else
643aeae3 2787 emit_cmpmem_indexedsr12_imm(invalid_code,ar,1);
b9b61529 2788#endif
0bbd1454 2789 #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT)
2790 emit_callne(invalidate_addr_reg[ar]);
2791 #else
b14b6a8f 2792 void *jaddr3 = out;
b9b61529 2793 emit_jne(0);
b14b6a8f 2794 add_stub(INVCODE_STUB,jaddr3,out,reglist|(1<<HOST_CCREG),ar,0,0,0);
0bbd1454 2795 #endif
b9b61529 2796 }
2797 if (opcode[i]==0x32) { // LWC2
2798 cop2_put_dreg(copr,tl,HOST_TEMPREG);
2799 }
2800}
2801
57871462 2802#ifndef multdiv_assemble
2803void multdiv_assemble(int i,struct regstat *i_regs)
2804{
2805 printf("Need multdiv_assemble for this architecture.\n");
2806 exit(1);
2807}
2808#endif
2809
2810void mov_assemble(int i,struct regstat *i_regs)
2811{
2812 //if(opcode2[i]==0x10||opcode2[i]==0x12) { // MFHI/MFLO
2813 //if(opcode2[i]==0x11||opcode2[i]==0x13) { // MTHI/MTLO
57871462 2814 if(rt1[i]) {
2815 signed char sh,sl,th,tl;
2816 th=get_reg(i_regs->regmap,rt1[i]|64);
2817 tl=get_reg(i_regs->regmap,rt1[i]);
2818 //assert(tl>=0);
2819 if(tl>=0) {
2820 sh=get_reg(i_regs->regmap,rs1[i]|64);
2821 sl=get_reg(i_regs->regmap,rs1[i]);
2822 if(sl>=0) emit_mov(sl,tl);
2823 else emit_loadreg(rs1[i],tl);
2824 if(th>=0) {
2825 if(sh>=0) emit_mov(sh,th);
2826 else emit_loadreg(rs1[i]|64,th);
2827 }
2828 }
2829 }
2830}
2831
57871462 2832void syscall_assemble(int i,struct regstat *i_regs)
2833{
2834 signed char ccreg=get_reg(i_regs->regmap,CCREG);
2835 assert(ccreg==HOST_CCREG);
2836 assert(!is_delayslot);
581335b0 2837 (void)ccreg;
57871462 2838 emit_movimm(start+i*4,EAX); // Get PC
2573466a 2839 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
b14b6a8f 2840 emit_jmp(jump_syscall_hle); // XXX
7139f3c8 2841}
2842
2843void hlecall_assemble(int i,struct regstat *i_regs)
2844{
41e82ad4 2845 extern void psxNULL();
7139f3c8 2846 signed char ccreg=get_reg(i_regs->regmap,CCREG);
2847 assert(ccreg==HOST_CCREG);
2848 assert(!is_delayslot);
581335b0 2849 (void)ccreg;
7139f3c8 2850 emit_movimm(start+i*4+4,0); // Get PC
dd79da89 2851 uint32_t hleCode = source[i] & 0x03ffffff;
b14b6a8f 2852 if (hleCode >= ARRAY_SIZE(psxHLEt))
643aeae3 2853 emit_movimm((uintptr_t)psxNULL,1);
dd79da89 2854 else
643aeae3 2855 emit_movimm((uintptr_t)psxHLEt[hleCode],1);
2573466a 2856 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // XXX
b14b6a8f 2857 emit_jmp(jump_hlecall);
57871462 2858}
2859
1e973cb0 2860void intcall_assemble(int i,struct regstat *i_regs)
2861{
2862 signed char ccreg=get_reg(i_regs->regmap,CCREG);
2863 assert(ccreg==HOST_CCREG);
2864 assert(!is_delayslot);
581335b0 2865 (void)ccreg;
1e973cb0 2866 emit_movimm(start+i*4,0); // Get PC
2573466a 2867 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
b14b6a8f 2868 emit_jmp(jump_intcall);
1e973cb0 2869}
2870
57871462 2871void ds_assemble(int i,struct regstat *i_regs)
2872{
ffb0b9e0 2873 speculate_register_values(i);
57871462 2874 is_delayslot=1;
2875 switch(itype[i]) {
2876 case ALU:
2877 alu_assemble(i,i_regs);break;
2878 case IMM16:
2879 imm16_assemble(i,i_regs);break;
2880 case SHIFT:
2881 shift_assemble(i,i_regs);break;
2882 case SHIFTIMM:
2883 shiftimm_assemble(i,i_regs);break;
2884 case LOAD:
2885 load_assemble(i,i_regs);break;
2886 case LOADLR:
2887 loadlr_assemble(i,i_regs);break;
2888 case STORE:
2889 store_assemble(i,i_regs);break;
2890 case STORELR:
2891 storelr_assemble(i,i_regs);break;
2892 case COP0:
2893 cop0_assemble(i,i_regs);break;
2894 case COP1:
2895 cop1_assemble(i,i_regs);break;
2896 case C1LS:
2897 c1ls_assemble(i,i_regs);break;
b9b61529 2898 case COP2:
2899 cop2_assemble(i,i_regs);break;
2900 case C2LS:
2901 c2ls_assemble(i,i_regs);break;
2902 case C2OP:
2903 c2op_assemble(i,i_regs);break;
57871462 2904 case MULTDIV:
2905 multdiv_assemble(i,i_regs);break;
2906 case MOV:
2907 mov_assemble(i,i_regs);break;
2908 case SYSCALL:
7139f3c8 2909 case HLECALL:
1e973cb0 2910 case INTCALL:
57871462 2911 case SPAN:
2912 case UJUMP:
2913 case RJUMP:
2914 case CJUMP:
2915 case SJUMP:
2916 case FJUMP:
c43b5311 2917 SysPrintf("Jump in the delay slot. This is probably a bug.\n");
57871462 2918 }
2919 is_delayslot=0;
2920}
2921
2922// Is the branch target a valid internal jump?
2923int internal_branch(uint64_t i_is32,int addr)
2924{
2925 if(addr&1) return 0; // Indirect (register) jump
2926 if(addr>=start && addr<start+slen*4-4)
2927 {
71e490c5 2928 return 1;
57871462 2929 }
2930 return 0;
2931}
2932
00fa9369 2933static void wb_invalidate(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u)
57871462 2934{
2935 int hr;
2936 for(hr=0;hr<HOST_REGS;hr++) {
2937 if(hr!=EXCLUDE_REG) {
2938 if(pre[hr]!=entry[hr]) {
2939 if(pre[hr]>=0) {
2940 if((dirty>>hr)&1) {
2941 if(get_reg(entry,pre[hr])<0) {
00fa9369 2942 assert(pre[hr]<64);
2943 if(!((u>>pre[hr])&1))
2944 emit_storereg(pre[hr],hr);
57871462 2945 }
2946 }
2947 }
2948 }
2949 }
2950 }
2951 // Move from one register to another (no writeback)
2952 for(hr=0;hr<HOST_REGS;hr++) {
2953 if(hr!=EXCLUDE_REG) {
2954 if(pre[hr]!=entry[hr]) {
2955 if(pre[hr]>=0&&(pre[hr]&63)<TEMPREG) {
2956 int nr;
2957 if((nr=get_reg(entry,pre[hr]))>=0) {
2958 emit_mov(hr,nr);
2959 }
2960 }
2961 }
2962 }
2963 }
2964}
57871462 2965
2966// Load the specified registers
2967// This only loads the registers given as arguments because
2968// we don't want to load things that will be overwritten
2969void load_regs(signed char entry[],signed char regmap[],int is32,int rs1,int rs2)
2970{
2971 int hr;
2972 // Load 32-bit regs
2973 for(hr=0;hr<HOST_REGS;hr++) {
2974 if(hr!=EXCLUDE_REG&&regmap[hr]>=0) {
2975 if(entry[hr]!=regmap[hr]) {
2976 if(regmap[hr]==rs1||regmap[hr]==rs2)
2977 {
2978 if(regmap[hr]==0) {
2979 emit_zeroreg(hr);
2980 }
2981 else
2982 {
2983 emit_loadreg(regmap[hr],hr);
2984 }
2985 }
2986 }
2987 }
2988 }
2989 //Load 64-bit regs
2990 for(hr=0;hr<HOST_REGS;hr++) {
2991 if(hr!=EXCLUDE_REG&&regmap[hr]>=0) {
2992 if(entry[hr]!=regmap[hr]) {
2993 if(regmap[hr]-64==rs1||regmap[hr]-64==rs2)
2994 {
2995 assert(regmap[hr]!=64);
2996 if((is32>>(regmap[hr]&63))&1) {
2997 int lr=get_reg(regmap,regmap[hr]-64);
2998 if(lr>=0)
2999 emit_sarimm(lr,31,hr);
3000 else
3001 emit_loadreg(regmap[hr],hr);
3002 }
3003 else
3004 {
3005 emit_loadreg(regmap[hr],hr);
3006 }
3007 }
3008 }
3009 }
3010 }
3011}
3012
3013// Load registers prior to the start of a loop
3014// so that they are not loaded within the loop
3015static void loop_preload(signed char pre[],signed char entry[])
3016{
3017 int hr;
3018 for(hr=0;hr<HOST_REGS;hr++) {
3019 if(hr!=EXCLUDE_REG) {
3020 if(pre[hr]!=entry[hr]) {
3021 if(entry[hr]>=0) {
3022 if(get_reg(pre,entry[hr])<0) {
3023 assem_debug("loop preload:\n");
3024 //printf("loop preload: %d\n",hr);
3025 if(entry[hr]==0) {
3026 emit_zeroreg(hr);
3027 }
3028 else if(entry[hr]<TEMPREG)
3029 {
3030 emit_loadreg(entry[hr],hr);
3031 }
3032 else if(entry[hr]-64<TEMPREG)
3033 {
3034 emit_loadreg(entry[hr],hr);
3035 }
3036 }
3037 }
3038 }
3039 }
3040 }
3041}
3042
3043// Generate address for load/store instruction
b9b61529 3044// goes to AGEN for writes, FTEMP for LOADLR and cop1/2 loads
57871462 3045void address_generation(int i,struct regstat *i_regs,signed char entry[])
3046{
b9b61529 3047 if(itype[i]==LOAD||itype[i]==LOADLR||itype[i]==STORE||itype[i]==STORELR||itype[i]==C1LS||itype[i]==C2LS) {
5194fb95 3048 int ra=-1;
57871462 3049 int agr=AGEN1+(i&1);
57871462 3050 if(itype[i]==LOAD) {
3051 ra=get_reg(i_regs->regmap,rt1[i]);
9f51b4b9 3052 if(ra<0) ra=get_reg(i_regs->regmap,-1);
535d208a 3053 assert(ra>=0);
57871462 3054 }
3055 if(itype[i]==LOADLR) {
3056 ra=get_reg(i_regs->regmap,FTEMP);
3057 }
3058 if(itype[i]==STORE||itype[i]==STORELR) {
3059 ra=get_reg(i_regs->regmap,agr);
3060 if(ra<0) ra=get_reg(i_regs->regmap,-1);
3061 }
b9b61529 3062 if(itype[i]==C1LS||itype[i]==C2LS) {
3063 if ((opcode[i]&0x3b)==0x31||(opcode[i]&0x3b)==0x32) // LWC1/LDC1/LWC2/LDC2
57871462 3064 ra=get_reg(i_regs->regmap,FTEMP);
1fd1aceb 3065 else { // SWC1/SDC1/SWC2/SDC2
57871462 3066 ra=get_reg(i_regs->regmap,agr);
3067 if(ra<0) ra=get_reg(i_regs->regmap,-1);
3068 }
3069 }
3070 int rs=get_reg(i_regs->regmap,rs1[i]);
57871462 3071 if(ra>=0) {
3072 int offset=imm[i];
3073 int c=(i_regs->wasconst>>rs)&1;
3074 if(rs1[i]==0) {
3075 // Using r0 as a base address
57871462 3076 if(!entry||entry[ra]!=agr) {
3077 if (opcode[i]==0x22||opcode[i]==0x26) {
3078 emit_movimm(offset&0xFFFFFFFC,ra); // LWL/LWR
3079 }else if (opcode[i]==0x1a||opcode[i]==0x1b) {
3080 emit_movimm(offset&0xFFFFFFF8,ra); // LDL/LDR
3081 }else{
3082 emit_movimm(offset,ra);
3083 }
3084 } // else did it in the previous cycle
3085 }
3086 else if(rs<0) {
3087 if(!entry||entry[ra]!=rs1[i])
3088 emit_loadreg(rs1[i],ra);
3089 //if(!entry||entry[ra]!=rs1[i])
3090 // printf("poor load scheduling!\n");
3091 }
3092 else if(c) {
57871462 3093 if(rs1[i]!=rt1[i]||itype[i]!=LOAD) {
3094 if(!entry||entry[ra]!=agr) {
3095 if (opcode[i]==0x22||opcode[i]==0x26) {
3096 emit_movimm((constmap[i][rs]+offset)&0xFFFFFFFC,ra); // LWL/LWR
3097 }else if (opcode[i]==0x1a||opcode[i]==0x1b) {
3098 emit_movimm((constmap[i][rs]+offset)&0xFFFFFFF8,ra); // LDL/LDR
3099 }else{
57871462 3100 emit_movimm(constmap[i][rs]+offset,ra);
8575a877 3101 regs[i].loadedconst|=1<<ra;
57871462 3102 }
3103 } // else did it in the previous cycle
3104 } // else load_consts already did it
3105 }
3106 if(offset&&!c&&rs1[i]) {
3107 if(rs>=0) {
3108 emit_addimm(rs,offset,ra);
3109 }else{
3110 emit_addimm(ra,offset,ra);
3111 }
3112 }
3113 }
3114 }
3115 // Preload constants for next instruction
b9b61529 3116 if(itype[i+1]==LOAD||itype[i+1]==LOADLR||itype[i+1]==STORE||itype[i+1]==STORELR||itype[i+1]==C1LS||itype[i+1]==C2LS) {
57871462 3117 int agr,ra;
57871462 3118 // Actual address
3119 agr=AGEN1+((i+1)&1);
3120 ra=get_reg(i_regs->regmap,agr);
3121 if(ra>=0) {
3122 int rs=get_reg(regs[i+1].regmap,rs1[i+1]);
3123 int offset=imm[i+1];
3124 int c=(regs[i+1].wasconst>>rs)&1;
3125 if(c&&(rs1[i+1]!=rt1[i+1]||itype[i+1]!=LOAD)) {
3126 if (opcode[i+1]==0x22||opcode[i+1]==0x26) {
3127 emit_movimm((constmap[i+1][rs]+offset)&0xFFFFFFFC,ra); // LWL/LWR
3128 }else if (opcode[i+1]==0x1a||opcode[i+1]==0x1b) {
3129 emit_movimm((constmap[i+1][rs]+offset)&0xFFFFFFF8,ra); // LDL/LDR
3130 }else{
57871462 3131 emit_movimm(constmap[i+1][rs]+offset,ra);
8575a877 3132 regs[i+1].loadedconst|=1<<ra;
57871462 3133 }
3134 }
3135 else if(rs1[i+1]==0) {
3136 // Using r0 as a base address
3137 if (opcode[i+1]==0x22||opcode[i+1]==0x26) {
3138 emit_movimm(offset&0xFFFFFFFC,ra); // LWL/LWR
3139 }else if (opcode[i+1]==0x1a||opcode[i+1]==0x1b) {
3140 emit_movimm(offset&0xFFFFFFF8,ra); // LDL/LDR
3141 }else{
3142 emit_movimm(offset,ra);
3143 }
3144 }
3145 }
3146 }
3147}
3148
e2b5e7aa 3149static int get_final_value(int hr, int i, int *value)
57871462 3150{
3151 int reg=regs[i].regmap[hr];
3152 while(i<slen-1) {
3153 if(regs[i+1].regmap[hr]!=reg) break;
3154 if(!((regs[i+1].isconst>>hr)&1)) break;
3155 if(bt[i+1]) break;
3156 i++;
3157 }
3158 if(i<slen-1) {
3159 if(itype[i]==UJUMP||itype[i]==RJUMP||itype[i]==CJUMP||itype[i]==SJUMP) {
3160 *value=constmap[i][hr];
3161 return 1;
3162 }
3163 if(!bt[i+1]) {
3164 if(itype[i+1]==UJUMP||itype[i+1]==RJUMP||itype[i+1]==CJUMP||itype[i+1]==SJUMP) {
3165 // Load in delay slot, out-of-order execution
3166 if(itype[i+2]==LOAD&&rs1[i+2]==reg&&rt1[i+2]==reg&&((regs[i+1].wasconst>>hr)&1))
3167 {
57871462 3168 // Precompute load address
3169 *value=constmap[i][hr]+imm[i+2];
3170 return 1;
3171 }
3172 }
3173 if(itype[i+1]==LOAD&&rs1[i+1]==reg&&rt1[i+1]==reg)
3174 {
57871462 3175 // Precompute load address
3176 *value=constmap[i][hr]+imm[i+1];
643aeae3 3177 //printf("c=%x imm=%lx\n",(long)constmap[i][hr],imm[i+1]);
57871462 3178 return 1;
3179 }
3180 }
3181 }
3182 *value=constmap[i][hr];
643aeae3 3183 //printf("c=%lx\n",(long)constmap[i][hr]);
57871462 3184 if(i==slen-1) return 1;
00fa9369 3185 assert(reg < 64);
3186 return !((unneeded_reg[i+1]>>reg)&1);
57871462 3187}
3188
3189// Load registers with known constants
3190void load_consts(signed char pre[],signed char regmap[],int is32,int i)
3191{
8575a877 3192 int hr,hr2;
3193 // propagate loaded constant flags
3194 if(i==0||bt[i])
3195 regs[i].loadedconst=0;
3196 else {
3197 for(hr=0;hr<HOST_REGS;hr++) {
3198 if(hr!=EXCLUDE_REG&&regmap[hr]>=0&&((regs[i-1].isconst>>hr)&1)&&pre[hr]==regmap[hr]
3199 &&regmap[hr]==regs[i-1].regmap[hr]&&((regs[i-1].loadedconst>>hr)&1))
3200 {
3201 regs[i].loadedconst|=1<<hr;
3202 }
3203 }
3204 }
57871462 3205 // Load 32-bit regs
3206 for(hr=0;hr<HOST_REGS;hr++) {
3207 if(hr!=EXCLUDE_REG&&regmap[hr]>=0) {
3208 //if(entry[hr]!=regmap[hr]) {
8575a877 3209 if(!((regs[i].loadedconst>>hr)&1)) {
57871462 3210 if(((regs[i].isconst>>hr)&1)&&regmap[hr]<64&&regmap[hr]>0) {
8575a877 3211 int value,similar=0;
57871462 3212 if(get_final_value(hr,i,&value)) {
8575a877 3213 // see if some other register has similar value
3214 for(hr2=0;hr2<HOST_REGS;hr2++) {
3215 if(hr2!=EXCLUDE_REG&&((regs[i].loadedconst>>hr2)&1)) {
3216 if(is_similar_value(value,constmap[i][hr2])) {
3217 similar=1;
3218 break;
3219 }
3220 }
3221 }
3222 if(similar) {
3223 int value2;
3224 if(get_final_value(hr2,i,&value2)) // is this needed?
3225 emit_movimm_from(value2,hr2,value,hr);
3226 else
3227 emit_movimm(value,hr);
3228 }
3229 else if(value==0) {
57871462 3230 emit_zeroreg(hr);
3231 }
3232 else {
3233 emit_movimm(value,hr);
3234 }
3235 }
8575a877 3236 regs[i].loadedconst|=1<<hr;
57871462 3237 }
3238 }
3239 }
3240 }
3241 // Load 64-bit regs
3242 for(hr=0;hr<HOST_REGS;hr++) {
3243 if(hr!=EXCLUDE_REG&&regmap[hr]>=0) {
3244 //if(entry[hr]!=regmap[hr]) {
3245 if(i==0||!((regs[i-1].isconst>>hr)&1)||pre[hr]!=regmap[hr]||bt[i]) {
3246 if(((regs[i].isconst>>hr)&1)&&regmap[hr]>64) {
3247 if((is32>>(regmap[hr]&63))&1) {
3248 int lr=get_reg(regmap,regmap[hr]-64);
3249 assert(lr>=0);
3250 emit_sarimm(lr,31,hr);
3251 }
3252 else
3253 {
3254 int value;
3255 if(get_final_value(hr,i,&value)) {
3256 if(value==0) {
3257 emit_zeroreg(hr);
3258 }
3259 else {
3260 emit_movimm(value,hr);
3261 }
3262 }
3263 }
3264 }
3265 }
3266 }
3267 }
3268}
3269void load_all_consts(signed char regmap[],int is32,u_int dirty,int i)
3270{
3271 int hr;
3272 // Load 32-bit regs
3273 for(hr=0;hr<HOST_REGS;hr++) {
3274 if(hr!=EXCLUDE_REG&&regmap[hr]>=0&&((dirty>>hr)&1)) {
3275 if(((regs[i].isconst>>hr)&1)&&regmap[hr]<64&&regmap[hr]>0) {
3276 int value=constmap[i][hr];
3277 if(value==0) {
3278 emit_zeroreg(hr);
3279 }
3280 else {
3281 emit_movimm(value,hr);
3282 }
3283 }
3284 }
3285 }
3286 // Load 64-bit regs
3287 for(hr=0;hr<HOST_REGS;hr++) {
3288 if(hr!=EXCLUDE_REG&&regmap[hr]>=0&&((dirty>>hr)&1)) {
3289 if(((regs[i].isconst>>hr)&1)&&regmap[hr]>64) {
3290 if((is32>>(regmap[hr]&63))&1) {
3291 int lr=get_reg(regmap,regmap[hr]-64);
3292 assert(lr>=0);
3293 emit_sarimm(lr,31,hr);
3294 }
3295 else
3296 {
3297 int value=constmap[i][hr];
3298 if(value==0) {
3299 emit_zeroreg(hr);
3300 }
3301 else {
3302 emit_movimm(value,hr);
3303 }
3304 }
3305 }
3306 }
3307 }
3308}
3309
3310// Write out all dirty registers (except cycle count)
3311void wb_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty)
3312{
3313 int hr;
3314 for(hr=0;hr<HOST_REGS;hr++) {
3315 if(hr!=EXCLUDE_REG) {
3316 if(i_regmap[hr]>0) {
3317 if(i_regmap[hr]!=CCREG) {
3318 if((i_dirty>>hr)&1) {
00fa9369 3319 assert(i_regmap[hr]<64);
3320 emit_storereg(i_regmap[hr],hr);
57871462 3321 }
3322 }
3323 }
3324 }
3325 }
3326}
3327// Write out dirty registers that we need to reload (pair with load_needed_regs)
3328// This writes the registers not written by store_regs_bt
3329void wb_needed_dirtys(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr)
3330{
3331 int hr;
3332 int t=(addr-start)>>2;
3333 for(hr=0;hr<HOST_REGS;hr++) {
3334 if(hr!=EXCLUDE_REG) {
3335 if(i_regmap[hr]>0) {
3336 if(i_regmap[hr]!=CCREG) {
00fa9369 3337 if(i_regmap[hr]==regs[t].regmap_entry[hr] && ((regs[t].dirty>>hr)&1) && !(((i_is32&~regs[t].was32)>>(i_regmap[hr]&63))&1)) {
57871462 3338 if((i_dirty>>hr)&1) {
00fa9369 3339 assert(i_regmap[hr]<64);
3340 emit_storereg(i_regmap[hr],hr);
57871462 3341 }
3342 }
3343 }
3344 }
3345 }
3346 }
3347}
3348
3349// Load all registers (except cycle count)
3350void load_all_regs(signed char i_regmap[])
3351{
3352 int hr;
3353 for(hr=0;hr<HOST_REGS;hr++) {
3354 if(hr!=EXCLUDE_REG) {
3355 if(i_regmap[hr]==0) {
3356 emit_zeroreg(hr);
3357 }
3358 else
ea3d2e6e 3359 if(i_regmap[hr]>0 && (i_regmap[hr]&63)<TEMPREG && i_regmap[hr]!=CCREG)
57871462 3360 {
3361 emit_loadreg(i_regmap[hr],hr);
3362 }
3363 }
3364 }
3365}
3366
3367// Load all current registers also needed by next instruction
3368void load_needed_regs(signed char i_regmap[],signed char next_regmap[])
3369{
3370 int hr;
3371 for(hr=0;hr<HOST_REGS;hr++) {
3372 if(hr!=EXCLUDE_REG) {
3373 if(get_reg(next_regmap,i_regmap[hr])>=0) {
3374 if(i_regmap[hr]==0) {
3375 emit_zeroreg(hr);
3376 }
3377 else
ea3d2e6e 3378 if(i_regmap[hr]>0 && (i_regmap[hr]&63)<TEMPREG && i_regmap[hr]!=CCREG)
57871462 3379 {
3380 emit_loadreg(i_regmap[hr],hr);
3381 }
3382 }
3383 }
3384 }
3385}
3386
3387// Load all regs, storing cycle count if necessary
3388void load_regs_entry(int t)
3389{
3390 int hr;
2573466a 3391 if(is_ds[t]) emit_addimm(HOST_CCREG,CLOCK_ADJUST(1),HOST_CCREG);
3392 else if(ccadj[t]) emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[t]),HOST_CCREG);
57871462 3393 if(regs[t].regmap_entry[HOST_CCREG]!=CCREG) {
3394 emit_storereg(CCREG,HOST_CCREG);
3395 }
3396 // Load 32-bit regs
3397 for(hr=0;hr<HOST_REGS;hr++) {
ea3d2e6e 3398 if(regs[t].regmap_entry[hr]>=0&&regs[t].regmap_entry[hr]<TEMPREG) {
57871462 3399 if(regs[t].regmap_entry[hr]==0) {
3400 emit_zeroreg(hr);
3401 }
3402 else if(regs[t].regmap_entry[hr]!=CCREG)
3403 {
3404 emit_loadreg(regs[t].regmap_entry[hr],hr);
3405 }
3406 }
3407 }
3408 // Load 64-bit regs
3409 for(hr=0;hr<HOST_REGS;hr++) {
ea3d2e6e 3410 if(regs[t].regmap_entry[hr]>=64&&regs[t].regmap_entry[hr]<TEMPREG+64) {
57871462 3411 assert(regs[t].regmap_entry[hr]!=64);
3412 if((regs[t].was32>>(regs[t].regmap_entry[hr]&63))&1) {
3413 int lr=get_reg(regs[t].regmap_entry,regs[t].regmap_entry[hr]-64);
3414 if(lr<0) {
3415 emit_loadreg(regs[t].regmap_entry[hr],hr);
3416 }
3417 else
3418 {
3419 emit_sarimm(lr,31,hr);
3420 }
3421 }
3422 else
3423 {
3424 emit_loadreg(regs[t].regmap_entry[hr],hr);
3425 }
3426 }
3427 }
3428}
3429
3430// Store dirty registers prior to branch
3431void store_regs_bt(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr)
3432{
3433 if(internal_branch(i_is32,addr))
3434 {
3435 int t=(addr-start)>>2;
3436 int hr;
3437 for(hr=0;hr<HOST_REGS;hr++) {
3438 if(hr!=EXCLUDE_REG) {
3439 if(i_regmap[hr]>0 && i_regmap[hr]!=CCREG) {
00fa9369 3440 if(i_regmap[hr]!=regs[t].regmap_entry[hr] || !((regs[t].dirty>>hr)&1) || (((i_is32&~regs[t].was32)>>(i_regmap[hr]&63))&1)) {
57871462 3441 if((i_dirty>>hr)&1) {
00fa9369 3442 assert(i_regmap[hr]<64);
3443 if(!((unneeded_reg[t]>>i_regmap[hr])&1))
3444 emit_storereg(i_regmap[hr],hr);
57871462 3445 }
3446 }
3447 }
3448 }
3449 }
3450 }
3451 else
3452 {
3453 // Branch out of this block, write out all dirty regs
3454 wb_dirtys(i_regmap,i_is32,i_dirty);
3455 }
3456}
3457
3458// Load all needed registers for branch target
3459void load_regs_bt(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr)
3460{
3461 //if(addr>=start && addr<(start+slen*4))
3462 if(internal_branch(i_is32,addr))
3463 {
3464 int t=(addr-start)>>2;
3465 int hr;
3466 // Store the cycle count before loading something else
3467 if(i_regmap[HOST_CCREG]!=CCREG) {
3468 assert(i_regmap[HOST_CCREG]==-1);
3469 }
3470 if(regs[t].regmap_entry[HOST_CCREG]!=CCREG) {
3471 emit_storereg(CCREG,HOST_CCREG);
3472 }
3473 // Load 32-bit regs
3474 for(hr=0;hr<HOST_REGS;hr++) {
ea3d2e6e 3475 if(hr!=EXCLUDE_REG&&regs[t].regmap_entry[hr]>=0&&regs[t].regmap_entry[hr]<TEMPREG) {
00fa9369 3476 if(i_regmap[hr]!=regs[t].regmap_entry[hr]) {
57871462 3477 if(regs[t].regmap_entry[hr]==0) {
3478 emit_zeroreg(hr);
3479 }
3480 else if(regs[t].regmap_entry[hr]!=CCREG)
3481 {
3482 emit_loadreg(regs[t].regmap_entry[hr],hr);
3483 }
3484 }
3485 }
3486 }
3487 //Load 64-bit regs
3488 for(hr=0;hr<HOST_REGS;hr++) {
ea3d2e6e 3489 if(hr!=EXCLUDE_REG&&regs[t].regmap_entry[hr]>=64&&regs[t].regmap_entry[hr]<TEMPREG+64) {
57871462 3490 if(i_regmap[hr]!=regs[t].regmap_entry[hr]) {
3491 assert(regs[t].regmap_entry[hr]!=64);
3492 if((i_is32>>(regs[t].regmap_entry[hr]&63))&1) {
3493 int lr=get_reg(regs[t].regmap_entry,regs[t].regmap_entry[hr]-64);
3494 if(lr<0) {
3495 emit_loadreg(regs[t].regmap_entry[hr],hr);
3496 }
3497 else
3498 {
3499 emit_sarimm(lr,31,hr);
3500 }
3501 }
3502 else
3503 {
3504 emit_loadreg(regs[t].regmap_entry[hr],hr);
3505 }
3506 }
3507 else if((i_is32>>(regs[t].regmap_entry[hr]&63))&1) {
3508 int lr=get_reg(regs[t].regmap_entry,regs[t].regmap_entry[hr]-64);
3509 assert(lr>=0);
3510 emit_sarimm(lr,31,hr);
3511 }
3512 }
3513 }
3514 }
3515}
3516
3517int match_bt(signed char i_regmap[],uint64_t i_is32,uint64_t i_dirty,int addr)
3518{
3519 if(addr>=start && addr<start+slen*4-4)
3520 {
3521 int t=(addr-start)>>2;
3522 int hr;
3523 if(regs[t].regmap_entry[HOST_CCREG]!=CCREG) return 0;
3524 for(hr=0;hr<HOST_REGS;hr++)
3525 {
3526 if(hr!=EXCLUDE_REG)
3527 {
3528 if(i_regmap[hr]!=regs[t].regmap_entry[hr])
3529 {
ea3d2e6e 3530 if(regs[t].regmap_entry[hr]>=0&&(regs[t].regmap_entry[hr]|64)<TEMPREG+64)
57871462 3531 {
3532 return 0;
3533 }
9f51b4b9 3534 else
57871462 3535 if((i_dirty>>hr)&1)
3536 {
ea3d2e6e 3537 if(i_regmap[hr]<TEMPREG)
57871462 3538 {
3539 if(!((unneeded_reg[t]>>i_regmap[hr])&1))
3540 return 0;
3541 }
ea3d2e6e 3542 else if(i_regmap[hr]>=64&&i_regmap[hr]<TEMPREG+64)
57871462 3543 {
00fa9369 3544 assert(0);
57871462 3545 }
3546 }
3547 }
3548 else // Same register but is it 32-bit or dirty?
3549 if(i_regmap[hr]>=0)
3550 {
3551 if(!((regs[t].dirty>>hr)&1))
3552 {
3553 if((i_dirty>>hr)&1)
3554 {
3555 if(!((unneeded_reg[t]>>i_regmap[hr])&1))
3556 {
3557 //printf("%x: dirty no match\n",addr);
3558 return 0;
3559 }
3560 }
3561 }
57871462 3562 }
3563 }
3564 }
57871462 3565 // Delay slots are not valid branch targets
3566 //if(t>0&&(itype[t-1]==RJUMP||itype[t-1]==UJUMP||itype[t-1]==CJUMP||itype[t-1]==SJUMP||itype[t-1]==FJUMP)) return 0;
3567 // Delay slots require additional processing, so do not match
3568 if(is_ds[t]) return 0;
3569 }
3570 else
3571 {
3572 int hr;
3573 for(hr=0;hr<HOST_REGS;hr++)
3574 {
3575 if(hr!=EXCLUDE_REG)
3576 {
3577 if(i_regmap[hr]>=0)
3578 {
3579 if(hr!=HOST_CCREG||i_regmap[hr]!=CCREG)
3580 {
3581 if((i_dirty>>hr)&1)
3582 {
3583 return 0;
3584 }
3585 }
3586 }
3587 }
3588 }
3589 }
3590 return 1;
3591}
3592
dd114d7d 3593#ifdef DRC_DBG
3594static void drc_dbg_emit_do_cmp(int i)
3595{
3596 extern void do_insn_cmp();
3597 extern int cycle;
3598 u_int hr,reglist=0;
3599
3600 for(hr=0;hr<HOST_REGS;hr++)
3601 if(regs[i].regmap[hr]>=0) reglist|=1<<hr;
3602 save_regs(reglist);
3603 emit_movimm(start+i*4,0);
643aeae3 3604 emit_writeword(0,&pcaddr);
3605 emit_call(do_insn_cmp);
3606 //emit_readword(&cycle,0);
dd114d7d 3607 //emit_addimm(0,2,0);
643aeae3 3608 //emit_writeword(0,&cycle);
dd114d7d 3609 restore_regs(reglist);
3610}
3611#else
3612#define drc_dbg_emit_do_cmp(x)
3613#endif
3614
57871462 3615// Used when a branch jumps into the delay slot of another branch
3616void ds_assemble_entry(int i)
3617{
3618 int t=(ba[i]-start)>>2;
df4dc2b1 3619 if (!instr_addr[t])
3620 instr_addr[t] = out;
57871462 3621 assem_debug("Assemble delay slot at %x\n",ba[i]);
3622 assem_debug("<->\n");
dd114d7d 3623 drc_dbg_emit_do_cmp(t);
57871462 3624 if(regs[t].regmap_entry[HOST_CCREG]==CCREG&&regs[t].regmap[HOST_CCREG]!=CCREG)
3625 wb_register(CCREG,regs[t].regmap_entry,regs[t].wasdirty,regs[t].was32);
3626 load_regs(regs[t].regmap_entry,regs[t].regmap,regs[t].was32,rs1[t],rs2[t]);
3627 address_generation(t,&regs[t],regs[t].regmap_entry);
b9b61529 3628 if(itype[t]==STORE||itype[t]==STORELR||(opcode[t]&0x3b)==0x39||(opcode[t]&0x3b)==0x3a)
57871462 3629 load_regs(regs[t].regmap_entry,regs[t].regmap,regs[t].was32,INVCP,INVCP);
57871462 3630 is_delayslot=0;
3631 switch(itype[t]) {
3632 case ALU:
3633 alu_assemble(t,&regs[t]);break;
3634 case IMM16:
3635 imm16_assemble(t,&regs[t]);break;
3636 case SHIFT:
3637 shift_assemble(t,&regs[t]);break;
3638 case SHIFTIMM:
3639 shiftimm_assemble(t,&regs[t]);break;
3640 case LOAD:
3641 load_assemble(t,&regs[t]);break;
3642 case LOADLR:
3643 loadlr_assemble(t,&regs[t]);break;
3644 case STORE:
3645 store_assemble(t,&regs[t]);break;
3646 case STORELR:
3647 storelr_assemble(t,&regs[t]);break;
3648 case COP0:
3649 cop0_assemble(t,&regs[t]);break;
3650 case COP1:
3651 cop1_assemble(t,&regs[t]);break;
3652 case C1LS:
3653 c1ls_assemble(t,&regs[t]);break;
b9b61529 3654 case COP2:
3655 cop2_assemble(t,&regs[t]);break;
3656 case C2LS:
3657 c2ls_assemble(t,&regs[t]);break;
3658 case C2OP:
3659 c2op_assemble(t,&regs[t]);break;
57871462 3660 case MULTDIV:
3661 multdiv_assemble(t,&regs[t]);break;
3662 case MOV:
3663 mov_assemble(t,&regs[t]);break;
3664 case SYSCALL:
7139f3c8 3665 case HLECALL:
1e973cb0 3666 case INTCALL:
57871462 3667 case SPAN:
3668 case UJUMP:
3669 case RJUMP:
3670 case CJUMP:
3671 case SJUMP:
3672 case FJUMP:
c43b5311 3673 SysPrintf("Jump in the delay slot. This is probably a bug.\n");
57871462 3674 }
3675 store_regs_bt(regs[t].regmap,regs[t].is32,regs[t].dirty,ba[i]+4);
3676 load_regs_bt(regs[t].regmap,regs[t].is32,regs[t].dirty,ba[i]+4);
3677 if(internal_branch(regs[t].is32,ba[i]+4))
3678 assem_debug("branch: internal\n");
3679 else
3680 assem_debug("branch: external\n");
3681 assert(internal_branch(regs[t].is32,ba[i]+4));
643aeae3 3682 add_to_linker(out,ba[i]+4,internal_branch(regs[t].is32,ba[i]+4));
57871462 3683 emit_jmp(0);
3684}
3685
3686void do_cc(int i,signed char i_regmap[],int *adj,int addr,int taken,int invert)
3687{
3688 int count;
b14b6a8f 3689 void *jaddr;
3690 void *idle=NULL;
b6e87b2b 3691 int t=0;
57871462 3692 if(itype[i]==RJUMP)
3693 {
3694 *adj=0;
3695 }
3696 //if(ba[i]>=start && ba[i]<(start+slen*4))
3697 if(internal_branch(branch_regs[i].is32,ba[i]))
3698 {
b6e87b2b 3699 t=(ba[i]-start)>>2;
57871462 3700 if(is_ds[t]) *adj=-1; // Branch into delay slot adds an extra cycle
3701 else *adj=ccadj[t];
3702 }
3703 else
3704 {
3705 *adj=0;
3706 }
3707 count=ccadj[i];
3708 if(taken==TAKEN && i==(ba[i]-start)>>2 && source[i+1]==0) {
3709 // Idle loop
3710 if(count&1) emit_addimm_and_set_flags(2*(count+2),HOST_CCREG);
b14b6a8f 3711 idle=out;
57871462 3712 //emit_subfrommem(&idlecount,HOST_CCREG); // Count idle cycles
3713 emit_andimm(HOST_CCREG,3,HOST_CCREG);
b14b6a8f 3714 jaddr=out;
57871462 3715 emit_jmp(0);
3716 }
3717 else if(*adj==0||invert) {
b6e87b2b 3718 int cycles=CLOCK_ADJUST(count+2);
3719 // faster loop HACK
3720 if (t&&*adj) {
3721 int rel=t-i;
3722 if(-NO_CYCLE_PENALTY_THR<rel&&rel<0)
3723 cycles=CLOCK_ADJUST(*adj)+count+2-*adj;
3724 }
3725 emit_addimm_and_set_flags(cycles,HOST_CCREG);
b14b6a8f 3726 jaddr=out;
57871462 3727 emit_jns(0);
3728 }
3729 else
3730 {
2573466a 3731 emit_cmpimm(HOST_CCREG,-CLOCK_ADJUST(count+2));
b14b6a8f 3732 jaddr=out;
57871462 3733 emit_jns(0);
3734 }
b14b6a8f 3735 add_stub(CC_STUB,jaddr,idle?idle:out,(*adj==0||invert||idle)?0:(count+2),i,addr,taken,0);
57871462 3736}
3737
b14b6a8f 3738static void do_ccstub(int n)
57871462 3739{
3740 literal_pool(256);
b14b6a8f 3741 assem_debug("do_ccstub %x\n",start+stubs[n].b*4);
3742 set_jump_target(stubs[n].addr, out);
3743 int i=stubs[n].b;
3744 if(stubs[n].d==NULLDS) {
57871462 3745 // Delay slot instruction is nullified ("likely" branch)
3746 wb_dirtys(regs[i].regmap,regs[i].is32,regs[i].dirty);
3747 }
b14b6a8f 3748 else if(stubs[n].d!=TAKEN) {
57871462 3749 wb_dirtys(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty);
3750 }
3751 else {
3752 if(internal_branch(branch_regs[i].is32,ba[i]))
3753 wb_needed_dirtys(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
3754 }
b14b6a8f 3755 if(stubs[n].c!=-1)
57871462 3756 {
3757 // Save PC as return address
b14b6a8f 3758 emit_movimm(stubs[n].c,EAX);
643aeae3 3759 emit_writeword(EAX,&pcaddr);
57871462 3760 }
3761 else
3762 {
3763 // Return address depends on which way the branch goes
3764 if(itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
3765 {
3766 int s1l=get_reg(branch_regs[i].regmap,rs1[i]);
3767 int s1h=get_reg(branch_regs[i].regmap,rs1[i]|64);
3768 int s2l=get_reg(branch_regs[i].regmap,rs2[i]);
3769 int s2h=get_reg(branch_regs[i].regmap,rs2[i]|64);
3770 if(rs1[i]==0)
3771 {
3772 s1l=s2l;s1h=s2h;
3773 s2l=s2h=-1;
3774 }
3775 else if(rs2[i]==0)
3776 {
3777 s2l=s2h=-1;
3778 }
3779 if((branch_regs[i].is32>>rs1[i])&(branch_regs[i].is32>>rs2[i])&1) {
3780 s1h=s2h=-1;
3781 }
3782 assert(s1l>=0);
3783 #ifdef DESTRUCTIVE_WRITEBACK
3784 if(rs1[i]) {
3785 if((branch_regs[i].dirty>>s1l)&(branch_regs[i].is32>>rs1[i])&1)
3786 emit_loadreg(rs1[i],s1l);
9f51b4b9 3787 }
57871462 3788 else {
3789 if((branch_regs[i].dirty>>s1l)&(branch_regs[i].is32>>rs2[i])&1)
3790 emit_loadreg(rs2[i],s1l);
3791 }
3792 if(s2l>=0)
3793 if((branch_regs[i].dirty>>s2l)&(branch_regs[i].is32>>rs2[i])&1)
3794 emit_loadreg(rs2[i],s2l);
3795 #endif
3796 int hr=0;
5194fb95 3797 int addr=-1,alt=-1,ntaddr=-1;
57871462 3798 while(hr<HOST_REGS)
3799 {
3800 if(hr!=EXCLUDE_REG && hr!=HOST_CCREG &&
3801 (branch_regs[i].regmap[hr]&63)!=rs1[i] &&
3802 (branch_regs[i].regmap[hr]&63)!=rs2[i] )
3803 {
3804 addr=hr++;break;
3805 }
3806 hr++;
3807 }
3808 while(hr<HOST_REGS)
3809 {
3810 if(hr!=EXCLUDE_REG && hr!=HOST_CCREG &&
3811 (branch_regs[i].regmap[hr]&63)!=rs1[i] &&
3812 (branch_regs[i].regmap[hr]&63)!=rs2[i] )
3813 {
3814 alt=hr++;break;
3815 }
3816 hr++;
3817 }
3818 if((opcode[i]&0x2E)==6) // BLEZ/BGTZ needs another register
3819 {
3820 while(hr<HOST_REGS)
3821 {
3822 if(hr!=EXCLUDE_REG && hr!=HOST_CCREG &&
3823 (branch_regs[i].regmap[hr]&63)!=rs1[i] &&
3824 (branch_regs[i].regmap[hr]&63)!=rs2[i] )
3825 {
3826 ntaddr=hr;break;
3827 }
3828 hr++;
3829 }
3830 assert(hr<HOST_REGS);
3831 }
3832 if((opcode[i]&0x2f)==4) // BEQ
3833 {
3834 #ifdef HAVE_CMOV_IMM
3835 if(s1h<0) {
3836 if(s2l>=0) emit_cmp(s1l,s2l);
3837 else emit_test(s1l,s1l);
3838 emit_cmov2imm_e_ne_compact(ba[i],start+i*4+8,addr);
3839 }
3840 else
3841 #endif
3842 {
3843 emit_mov2imm_compact(ba[i],addr,start+i*4+8,alt);
3844 if(s1h>=0) {
3845 if(s2h>=0) emit_cmp(s1h,s2h);
3846 else emit_test(s1h,s1h);
3847 emit_cmovne_reg(alt,addr);
3848 }
3849 if(s2l>=0) emit_cmp(s1l,s2l);
3850 else emit_test(s1l,s1l);
3851 emit_cmovne_reg(alt,addr);
3852 }
3853 }
3854 if((opcode[i]&0x2f)==5) // BNE
3855 {
3856 #ifdef HAVE_CMOV_IMM
3857 if(s1h<0) {
3858 if(s2l>=0) emit_cmp(s1l,s2l);
3859 else emit_test(s1l,s1l);
3860 emit_cmov2imm_e_ne_compact(start+i*4+8,ba[i],addr);
3861 }
3862 else
3863 #endif
3864 {
3865 emit_mov2imm_compact(start+i*4+8,addr,ba[i],alt);
3866 if(s1h>=0) {
3867 if(s2h>=0) emit_cmp(s1h,s2h);
3868 else emit_test(s1h,s1h);
3869 emit_cmovne_reg(alt,addr);
3870 }
3871 if(s2l>=0) emit_cmp(s1l,s2l);
3872 else emit_test(s1l,s1l);
3873 emit_cmovne_reg(alt,addr);
3874 }
3875 }
3876 if((opcode[i]&0x2f)==6) // BLEZ
3877 {
3878 //emit_movimm(ba[i],alt);
3879 //emit_movimm(start+i*4+8,addr);
3880 emit_mov2imm_compact(ba[i],alt,start+i*4+8,addr);
3881 emit_cmpimm(s1l,1);
3882 if(s1h>=0) emit_mov(addr,ntaddr);
3883 emit_cmovl_reg(alt,addr);
3884 if(s1h>=0) {
3885 emit_test(s1h,s1h);
3886 emit_cmovne_reg(ntaddr,addr);
3887 emit_cmovs_reg(alt,addr);
3888 }
3889 }
3890 if((opcode[i]&0x2f)==7) // BGTZ
3891 {
3892 //emit_movimm(ba[i],addr);
3893 //emit_movimm(start+i*4+8,ntaddr);
3894 emit_mov2imm_compact(ba[i],addr,start+i*4+8,ntaddr);
3895 emit_cmpimm(s1l,1);
3896 if(s1h>=0) emit_mov(addr,alt);
3897 emit_cmovl_reg(ntaddr,addr);
3898 if(s1h>=0) {
3899 emit_test(s1h,s1h);
3900 emit_cmovne_reg(alt,addr);
3901 emit_cmovs_reg(ntaddr,addr);
3902 }
3903 }
3904 if((opcode[i]==1)&&(opcode2[i]&0x2D)==0) // BLTZ
3905 {
3906 //emit_movimm(ba[i],alt);
3907 //emit_movimm(start+i*4+8,addr);
3908 emit_mov2imm_compact(ba[i],alt,start+i*4+8,addr);
3909 if(s1h>=0) emit_test(s1h,s1h);
3910 else emit_test(s1l,s1l);
3911 emit_cmovs_reg(alt,addr);
3912 }
3913 if((opcode[i]==1)&&(opcode2[i]&0x2D)==1) // BGEZ
3914 {
3915 //emit_movimm(ba[i],addr);
3916 //emit_movimm(start+i*4+8,alt);
3917 emit_mov2imm_compact(ba[i],addr,start+i*4+8,alt);
3918 if(s1h>=0) emit_test(s1h,s1h);
3919 else emit_test(s1l,s1l);
3920 emit_cmovs_reg(alt,addr);
3921 }
3922 if(opcode[i]==0x11 && opcode2[i]==0x08 ) {
3923 if(source[i]&0x10000) // BC1T
3924 {
3925 //emit_movimm(ba[i],alt);
3926 //emit_movimm(start+i*4+8,addr);
3927 emit_mov2imm_compact(ba[i],alt,start+i*4+8,addr);
3928 emit_testimm(s1l,0x800000);
3929 emit_cmovne_reg(alt,addr);
3930 }
3931 else // BC1F
3932 {
3933 //emit_movimm(ba[i],addr);
3934 //emit_movimm(start+i*4+8,alt);
3935 emit_mov2imm_compact(ba[i],addr,start+i*4+8,alt);
3936 emit_testimm(s1l,0x800000);
3937 emit_cmovne_reg(alt,addr);
3938 }
3939 }
643aeae3 3940 emit_writeword(addr,&pcaddr);
57871462 3941 }
3942 else
3943 if(itype[i]==RJUMP)
3944 {
3945 int r=get_reg(branch_regs[i].regmap,rs1[i]);
3946 if(rs1[i]==rt1[i+1]||rs1[i]==rt2[i+1]) {
3947 r=get_reg(branch_regs[i].regmap,RTEMP);
3948 }
643aeae3 3949 emit_writeword(r,&pcaddr);
57871462 3950 }
c43b5311 3951 else {SysPrintf("Unknown branch type in do_ccstub\n");exit(1);}
57871462 3952 }
3953 // Update cycle count
3954 assert(branch_regs[i].regmap[HOST_CCREG]==CCREG||branch_regs[i].regmap[HOST_CCREG]==-1);
643aeae3 3955 if(stubs[n].a) emit_addimm(HOST_CCREG,CLOCK_ADJUST((signed int)stubs[n].a),HOST_CCREG);
3956 emit_call(cc_interrupt);
3957 if(stubs[n].a) emit_addimm(HOST_CCREG,-CLOCK_ADJUST((signed int)stubs[n].a),HOST_CCREG);
b14b6a8f 3958 if(stubs[n].d==TAKEN) {
57871462 3959 if(internal_branch(branch_regs[i].is32,ba[i]))
3960 load_needed_regs(branch_regs[i].regmap,regs[(ba[i]-start)>>2].regmap_entry);
3961 else if(itype[i]==RJUMP) {
3962 if(get_reg(branch_regs[i].regmap,RTEMP)>=0)
643aeae3 3963 emit_readword(&pcaddr,get_reg(branch_regs[i].regmap,RTEMP));
57871462 3964 else
3965 emit_loadreg(rs1[i],get_reg(branch_regs[i].regmap,rs1[i]));
3966 }
b14b6a8f 3967 }else if(stubs[n].d==NOTTAKEN) {
57871462 3968 if(i<slen-2) load_needed_regs(branch_regs[i].regmap,regmap_pre[i+2]);
3969 else load_all_regs(branch_regs[i].regmap);
b14b6a8f 3970 }else if(stubs[n].d==NULLDS) {
57871462 3971 // Delay slot instruction is nullified ("likely" branch)
3972 if(i<slen-2) load_needed_regs(regs[i].regmap,regmap_pre[i+2]);
3973 else load_all_regs(regs[i].regmap);
3974 }else{
3975 load_all_regs(branch_regs[i].regmap);
3976 }
b14b6a8f 3977 emit_jmp(stubs[n].retaddr);
57871462 3978}
3979
643aeae3 3980static void add_to_linker(void *addr, u_int target, int ext)
57871462 3981{
643aeae3 3982 assert(linkcount < ARRAY_SIZE(link_addr));
3983 link_addr[linkcount].addr = addr;
3984 link_addr[linkcount].target = target;
3985 link_addr[linkcount].ext = ext;
57871462 3986 linkcount++;
3987}
3988
eba830cd 3989static void ujump_assemble_write_ra(int i)
3990{
3991 int rt;
3992 unsigned int return_address;
3993 rt=get_reg(branch_regs[i].regmap,31);
3994 assem_debug("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
3995 //assert(rt>=0);
3996 return_address=start+i*4+8;
3997 if(rt>=0) {
3998 #ifdef USE_MINI_HT
3999 if(internal_branch(branch_regs[i].is32,return_address)&&rt1[i+1]!=31) {
4000 int temp=-1; // note: must be ds-safe
4001 #ifdef HOST_TEMPREG
4002 temp=HOST_TEMPREG;
4003 #endif
4004 if(temp>=0) do_miniht_insert(return_address,rt,temp);
4005 else emit_movimm(return_address,rt);
4006 }
4007 else
4008 #endif
4009 {
4010 #ifdef REG_PREFETCH
9f51b4b9 4011 if(temp>=0)
eba830cd 4012 {
643aeae3 4013 if(i_regmap[temp]!=PTEMP) emit_movimm((uintptr_t)hash_table_get(return_address),temp);
eba830cd 4014 }
4015 #endif
4016 emit_movimm(return_address,rt); // PC into link register
4017 #ifdef IMM_PREFETCH
df4dc2b1 4018 emit_prefetch(hash_table_get(return_address));
eba830cd 4019 #endif
4020 }
4021 }
4022}
4023
57871462 4024void ujump_assemble(int i,struct regstat *i_regs)
4025{
eba830cd 4026 int ra_done=0;
57871462 4027 if(i==(ba[i]-start)>>2) assem_debug("idle loop\n");
4028 address_generation(i+1,i_regs,regs[i].regmap_entry);
4029 #ifdef REG_PREFETCH
4030 int temp=get_reg(branch_regs[i].regmap,PTEMP);
9f51b4b9 4031 if(rt1[i]==31&&temp>=0)
57871462 4032 {
581335b0 4033 signed char *i_regmap=i_regs->regmap;
57871462 4034 int return_address=start+i*4+8;
9f51b4b9 4035 if(get_reg(branch_regs[i].regmap,31)>0)
643aeae3 4036 if(i_regmap[temp]==PTEMP) emit_movimm((uintptr_t)hash_table_get(return_address),temp);
57871462 4037 }
4038 #endif
eba830cd 4039 if(rt1[i]==31&&(rt1[i]==rs1[i+1]||rt1[i]==rs2[i+1])) {
4040 ujump_assemble_write_ra(i); // writeback ra for DS
4041 ra_done=1;
57871462 4042 }
4ef8f67d 4043 ds_assemble(i+1,i_regs);
4044 uint64_t bc_unneeded=branch_regs[i].u;
4ef8f67d 4045 bc_unneeded|=1|(1LL<<rt1[i]);
00fa9369 4046 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,bc_unneeded);
4ef8f67d 4047 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG);
eba830cd 4048 if(!ra_done&&rt1[i]==31)
4049 ujump_assemble_write_ra(i);
57871462 4050 int cc,adj;
4051 cc=get_reg(branch_regs[i].regmap,CCREG);
4052 assert(cc==HOST_CCREG);
4053 store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
4054 #ifdef REG_PREFETCH
4055 if(rt1[i]==31&&temp>=0) emit_prefetchreg(temp);
4056 #endif
4057 do_cc(i,branch_regs[i].regmap,&adj,ba[i],TAKEN,0);
2573466a 4058 if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc);
57871462 4059 load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
4060 if(internal_branch(branch_regs[i].is32,ba[i]))
4061 assem_debug("branch: internal\n");
4062 else
4063 assem_debug("branch: external\n");
4064 if(internal_branch(branch_regs[i].is32,ba[i])&&is_ds[(ba[i]-start)>>2]) {
4065 ds_assemble_entry(i);
4066 }
4067 else {
643aeae3 4068 add_to_linker(out,ba[i],internal_branch(branch_regs[i].is32,ba[i]));
57871462 4069 emit_jmp(0);
4070 }
4071}
4072
eba830cd 4073static void rjump_assemble_write_ra(int i)
4074{
4075 int rt,return_address;
4076 assert(rt1[i+1]!=rt1[i]);
4077 assert(rt2[i+1]!=rt1[i]);
4078 rt=get_reg(branch_regs[i].regmap,rt1[i]);
4079 assem_debug("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
4080 assert(rt>=0);
4081 return_address=start+i*4+8;
4082 #ifdef REG_PREFETCH
9f51b4b9 4083 if(temp>=0)
eba830cd 4084 {
643aeae3 4085 if(i_regmap[temp]!=PTEMP) emit_movimm((uintptr_t)hash_table_get(return_address),temp);
eba830cd 4086 }
4087 #endif
4088 emit_movimm(return_address,rt); // PC into link register
4089 #ifdef IMM_PREFETCH
df4dc2b1 4090 emit_prefetch(hash_table_get(return_address));
eba830cd 4091 #endif
4092}
4093
57871462 4094void rjump_assemble(int i,struct regstat *i_regs)
4095{
57871462 4096 int temp;
581335b0 4097 int rs,cc;
eba830cd 4098 int ra_done=0;
57871462 4099 rs=get_reg(branch_regs[i].regmap,rs1[i]);
4100 assert(rs>=0);
4101 if(rs1[i]==rt1[i+1]||rs1[i]==rt2[i+1]) {
4102 // Delay slot abuse, make a copy of the branch address register
4103 temp=get_reg(branch_regs[i].regmap,RTEMP);
4104 assert(temp>=0);
4105 assert(regs[i].regmap[temp]==RTEMP);
4106 emit_mov(rs,temp);
4107 rs=temp;
4108 }
4109 address_generation(i+1,i_regs,regs[i].regmap_entry);
4110 #ifdef REG_PREFETCH
9f51b4b9 4111 if(rt1[i]==31)
57871462 4112 {
4113 if((temp=get_reg(branch_regs[i].regmap,PTEMP))>=0) {
581335b0 4114 signed char *i_regmap=i_regs->regmap;
57871462 4115 int return_address=start+i*4+8;
643aeae3 4116 if(i_regmap[temp]==PTEMP) emit_movimm((uintptr_t)hash_table_get(return_address),temp);
57871462 4117 }
4118 }
4119 #endif
4120 #ifdef USE_MINI_HT
4121 if(rs1[i]==31) {
4122 int rh=get_reg(regs[i].regmap,RHASH);
4123 if(rh>=0) do_preload_rhash(rh);
4124 }
4125 #endif
eba830cd 4126 if(rt1[i]!=0&&(rt1[i]==rs1[i+1]||rt1[i]==rs2[i+1])) {
4127 rjump_assemble_write_ra(i);
4128 ra_done=1;
57871462 4129 }
d5910d5d 4130 ds_assemble(i+1,i_regs);
4131 uint64_t bc_unneeded=branch_regs[i].u;
d5910d5d 4132 bc_unneeded|=1|(1LL<<rt1[i]);
d5910d5d 4133 bc_unneeded&=~(1LL<<rs1[i]);
00fa9369 4134 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,bc_unneeded);
d5910d5d 4135 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i],CCREG);
eba830cd 4136 if(!ra_done&&rt1[i]!=0)
4137 rjump_assemble_write_ra(i);
57871462 4138 cc=get_reg(branch_regs[i].regmap,CCREG);
4139 assert(cc==HOST_CCREG);
581335b0 4140 (void)cc;
57871462 4141 #ifdef USE_MINI_HT
4142 int rh=get_reg(branch_regs[i].regmap,RHASH);
4143 int ht=get_reg(branch_regs[i].regmap,RHTBL);
4144 if(rs1[i]==31) {
4145 if(regs[i].regmap[rh]!=RHASH) do_preload_rhash(rh);
4146 do_preload_rhtbl(ht);
4147 do_rhash(rs,rh);
4148 }
4149 #endif
4150 store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,-1);
4151 #ifdef DESTRUCTIVE_WRITEBACK
4152 if((branch_regs[i].dirty>>rs)&(branch_regs[i].is32>>rs1[i])&1) {
4153 if(rs1[i]!=rt1[i+1]&&rs1[i]!=rt2[i+1]) {
4154 emit_loadreg(rs1[i],rs);
4155 }
4156 }
4157 #endif
4158 #ifdef REG_PREFETCH
4159 if(rt1[i]==31&&temp>=0) emit_prefetchreg(temp);
4160 #endif
4161 #ifdef USE_MINI_HT
4162 if(rs1[i]==31) {
4163 do_miniht_load(ht,rh);
4164 }
4165 #endif
4166 //do_cc(i,branch_regs[i].regmap,&adj,-1,TAKEN);
4167 //if(adj) emit_addimm(cc,2*(ccadj[i]+2-adj),cc); // ??? - Shouldn't happen
4168 //assert(adj==0);
2573466a 4169 emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG);
b14b6a8f 4170 add_stub(CC_STUB,out,jump_vaddr_reg[rs],0,i,-1,TAKEN,0);
911f2d55 4171 if(itype[i+1]==COP0&&(source[i+1]&0x3f)==0x10)
4172 // special case for RFE
4173 emit_jmp(0);
4174 else
71e490c5 4175 emit_jns(0);
57871462 4176 //load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,-1);
4177 #ifdef USE_MINI_HT
4178 if(rs1[i]==31) {
4179 do_miniht_jump(rs,rh,ht);
4180 }
4181 else
4182 #endif
4183 {
57871462 4184 emit_jmp(jump_vaddr_reg[rs]);
4185 }
57871462 4186 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4187 if(rt1[i]!=31&&i<slen-2&&(((u_int)out)&7)) emit_mov(13,13);
4188 #endif
4189}
4190
4191void cjump_assemble(int i,struct regstat *i_regs)
4192{
4193 signed char *i_regmap=i_regs->regmap;
4194 int cc;
4195 int match;
4196 match=match_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
4197 assem_debug("match=%d\n",match);
4198 int s1h,s1l,s2h,s2l;
57871462 4199 int unconditional=0,nop=0;
4200 int only32=0;
57871462 4201 int invert=0;
4202 int internal=internal_branch(branch_regs[i].is32,ba[i]);
4203 if(i==(ba[i]-start)>>2) assem_debug("idle loop\n");
57871462 4204 if(!match) invert=1;
4205 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4206 if(i>(ba[i]-start)>>2) invert=1;
4207 #endif
9f51b4b9 4208
e1190b87 4209 if(ooo[i]) {
57871462 4210 s1l=get_reg(branch_regs[i].regmap,rs1[i]);
4211 s1h=get_reg(branch_regs[i].regmap,rs1[i]|64);
4212 s2l=get_reg(branch_regs[i].regmap,rs2[i]);
4213 s2h=get_reg(branch_regs[i].regmap,rs2[i]|64);
4214 }
4215 else {
4216 s1l=get_reg(i_regmap,rs1[i]);
4217 s1h=get_reg(i_regmap,rs1[i]|64);
4218 s2l=get_reg(i_regmap,rs2[i]);
4219 s2h=get_reg(i_regmap,rs2[i]|64);
4220 }
4221 if(rs1[i]==0&&rs2[i]==0)
4222 {
4223 if(opcode[i]&1) nop=1;
4224 else unconditional=1;
4225 //assert(opcode[i]!=5);
4226 //assert(opcode[i]!=7);
4227 //assert(opcode[i]!=0x15);
4228 //assert(opcode[i]!=0x17);
4229 }
4230 else if(rs1[i]==0)
4231 {
4232 s1l=s2l;s1h=s2h;
4233 s2l=s2h=-1;
4234 only32=(regs[i].was32>>rs2[i])&1;
4235 }
4236 else if(rs2[i]==0)
4237 {
4238 s2l=s2h=-1;
4239 only32=(regs[i].was32>>rs1[i])&1;
4240 }
4241 else {
4242 only32=(regs[i].was32>>rs1[i])&(regs[i].was32>>rs2[i])&1;
4243 }
4244
e1190b87 4245 if(ooo[i]) {
57871462 4246 // Out of order execution (delay slot first)
4247 //printf("OOOE\n");
4248 address_generation(i+1,i_regs,regs[i].regmap_entry);
4249 ds_assemble(i+1,i_regs);
4250 int adj;
4251 uint64_t bc_unneeded=branch_regs[i].u;
57871462 4252 bc_unneeded&=~((1LL<<rs1[i])|(1LL<<rs2[i]));
57871462 4253 bc_unneeded|=1;
00fa9369 4254 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,bc_unneeded);
57871462 4255 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i],rs2[i]);
4256 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG);
4257 cc=get_reg(branch_regs[i].regmap,CCREG);
4258 assert(cc==HOST_CCREG);
9f51b4b9 4259 if(unconditional)
57871462 4260 store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
4261 //do_cc(i,branch_regs[i].regmap,&adj,unconditional?ba[i]:-1,unconditional);
4262 //assem_debug("cycle count (adj)\n");
4263 if(unconditional) {
4264 do_cc(i,branch_regs[i].regmap,&adj,ba[i],TAKEN,0);
4265 if(i!=(ba[i]-start)>>2 || source[i+1]!=0) {
2573466a 4266 if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc);
57871462 4267 load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
4268 if(internal)
4269 assem_debug("branch: internal\n");
4270 else
4271 assem_debug("branch: external\n");
4272 if(internal&&is_ds[(ba[i]-start)>>2]) {
4273 ds_assemble_entry(i);
4274 }
4275 else {
643aeae3 4276 add_to_linker(out,ba[i],internal);
57871462 4277 emit_jmp(0);
4278 }
4279 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4280 if(((u_int)out)&7) emit_addnop(0);
4281 #endif
4282 }
4283 }
4284 else if(nop) {
2573466a 4285 emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc);
b14b6a8f 4286 void *jaddr=out;
57871462 4287 emit_jns(0);
b14b6a8f 4288 add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0);
57871462 4289 }
4290 else {
df4dc2b1 4291 void *taken = NULL, *nottaken = NULL, *nottaken1 = NULL;
57871462 4292 do_cc(i,branch_regs[i].regmap,&adj,-1,0,invert);
2573466a 4293 if(adj&&!invert) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc);
57871462 4294 if(!only32)
4295 {
4296 assert(s1h>=0);
4297 if(opcode[i]==4) // BEQ
4298 {
4299 if(s2h>=0) emit_cmp(s1h,s2h);
4300 else emit_test(s1h,s1h);
df4dc2b1 4301 nottaken1=out;
643aeae3 4302 emit_jne((void *)1l);
57871462 4303 }
4304 if(opcode[i]==5) // BNE
4305 {
4306 if(s2h>=0) emit_cmp(s1h,s2h);
4307 else emit_test(s1h,s1h);
df4dc2b1 4308 if(invert) taken=out;
643aeae3 4309 else add_to_linker(out,ba[i],internal);
57871462 4310 emit_jne(0);
4311 }
4312 if(opcode[i]==6) // BLEZ
4313 {
4314 emit_test(s1h,s1h);
df4dc2b1 4315 if(invert) taken=out;
643aeae3 4316 else add_to_linker(out,ba[i],internal);
57871462 4317 emit_js(0);
df4dc2b1 4318 nottaken1=out;
643aeae3 4319 emit_jne((void *)1l);
57871462 4320 }
4321 if(opcode[i]==7) // BGTZ
4322 {
4323 emit_test(s1h,s1h);
df4dc2b1 4324 nottaken1=out;
57871462 4325 emit_js(1);
df4dc2b1 4326 if(invert) taken=out;
643aeae3 4327 else add_to_linker(out,ba[i],internal);
57871462 4328 emit_jne(0);
4329 }
4330 } // if(!only32)
9f51b4b9 4331
57871462 4332 //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
4333 assert(s1l>=0);
4334 if(opcode[i]==4) // BEQ
4335 {
4336 if(s2l>=0) emit_cmp(s1l,s2l);
4337 else emit_test(s1l,s1l);
4338 if(invert){
df4dc2b1 4339 nottaken=out;
643aeae3 4340 emit_jne((void *)1l);
57871462 4341 }else{
643aeae3 4342 add_to_linker(out,ba[i],internal);
57871462 4343 emit_jeq(0);
4344 }
4345 }
4346 if(opcode[i]==5) // BNE
4347 {
4348 if(s2l>=0) emit_cmp(s1l,s2l);
4349 else emit_test(s1l,s1l);
4350 if(invert){
df4dc2b1 4351 nottaken=out;
57871462 4352 emit_jeq(1);
4353 }else{
643aeae3 4354 add_to_linker(out,ba[i],internal);
57871462 4355 emit_jne(0);
4356 }
4357 }
4358 if(opcode[i]==6) // BLEZ
4359 {
4360 emit_cmpimm(s1l,1);
4361 if(invert){
df4dc2b1 4362 nottaken=out;
57871462 4363 emit_jge(1);
4364 }else{
643aeae3 4365 add_to_linker(out,ba[i],internal);
57871462 4366 emit_jl(0);
4367 }
4368 }
4369 if(opcode[i]==7) // BGTZ
4370 {
4371 emit_cmpimm(s1l,1);
4372 if(invert){
df4dc2b1 4373 nottaken=out;
57871462 4374 emit_jl(1);
4375 }else{
643aeae3 4376 add_to_linker(out,ba[i],internal);
57871462 4377 emit_jge(0);
4378 }
4379 }
4380 if(invert) {
df4dc2b1 4381 if(taken) set_jump_target(taken, out);
57871462 4382 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4383 if(match&&(!internal||!is_ds[(ba[i]-start)>>2])) {
4384 if(adj) {
2573466a 4385 emit_addimm(cc,-CLOCK_ADJUST(adj),cc);
643aeae3 4386 add_to_linker(out,ba[i],internal);
57871462 4387 }else{
4388 emit_addnop(13);
643aeae3 4389 add_to_linker(out,ba[i],internal*2);
57871462 4390 }
4391 emit_jmp(0);
4392 }else
4393 #endif
4394 {
2573466a 4395 if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc);
57871462 4396 store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
4397 load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
4398 if(internal)
4399 assem_debug("branch: internal\n");
4400 else
4401 assem_debug("branch: external\n");
4402 if(internal&&is_ds[(ba[i]-start)>>2]) {
4403 ds_assemble_entry(i);
4404 }
4405 else {
643aeae3 4406 add_to_linker(out,ba[i],internal);
57871462 4407 emit_jmp(0);
4408 }
4409 }
df4dc2b1 4410 set_jump_target(nottaken, out);
57871462 4411 }
4412
df4dc2b1 4413 if(nottaken1) set_jump_target(nottaken1, out);
57871462 4414 if(adj) {
2573466a 4415 if(!invert) emit_addimm(cc,CLOCK_ADJUST(adj),cc);
57871462 4416 }
4417 } // (!unconditional)
4418 } // if(ooo)
4419 else
4420 {
4421 // In-order execution (branch first)
4422 //if(likely[i]) printf("IOL\n");
4423 //else
4424 //printf("IOE\n");
df4dc2b1 4425 void *taken = NULL, *nottaken = NULL, *nottaken1 = NULL;
57871462 4426 if(!unconditional&&!nop) {
4427 if(!only32)
4428 {
4429 assert(s1h>=0);
4430 if((opcode[i]&0x2f)==4) // BEQ
4431 {
4432 if(s2h>=0) emit_cmp(s1h,s2h);
4433 else emit_test(s1h,s1h);
df4dc2b1 4434 nottaken1=out;
643aeae3 4435 emit_jne((void *)2l);
57871462 4436 }
4437 if((opcode[i]&0x2f)==5) // BNE
4438 {
4439 if(s2h>=0) emit_cmp(s1h,s2h);
4440 else emit_test(s1h,s1h);
df4dc2b1 4441 taken=out;
643aeae3 4442 emit_jne((void *)1l);
57871462 4443 }
4444 if((opcode[i]&0x2f)==6) // BLEZ
4445 {
4446 emit_test(s1h,s1h);
df4dc2b1 4447 taken=out;
57871462 4448 emit_js(1);
df4dc2b1 4449 nottaken1=out;
643aeae3 4450 emit_jne((void *)2l);
57871462 4451 }
4452 if((opcode[i]&0x2f)==7) // BGTZ
4453 {
4454 emit_test(s1h,s1h);
df4dc2b1 4455 nottaken1=out;
57871462 4456 emit_js(2);
df4dc2b1 4457 taken=out;
643aeae3 4458 emit_jne((void *)1l);
57871462 4459 }
4460 } // if(!only32)
9f51b4b9 4461
57871462 4462 //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
4463 assert(s1l>=0);
4464 if((opcode[i]&0x2f)==4) // BEQ
4465 {
4466 if(s2l>=0) emit_cmp(s1l,s2l);
4467 else emit_test(s1l,s1l);
df4dc2b1 4468 nottaken=out;
643aeae3 4469 emit_jne((void *)2l);
57871462 4470 }
4471 if((opcode[i]&0x2f)==5) // BNE
4472 {
4473 if(s2l>=0) emit_cmp(s1l,s2l);
4474 else emit_test(s1l,s1l);
df4dc2b1 4475 nottaken=out;
57871462 4476 emit_jeq(2);
4477 }
4478 if((opcode[i]&0x2f)==6) // BLEZ
4479 {
4480 emit_cmpimm(s1l,1);
df4dc2b1 4481 nottaken=out;
57871462 4482 emit_jge(2);
4483 }
4484 if((opcode[i]&0x2f)==7) // BGTZ
4485 {
4486 emit_cmpimm(s1l,1);
df4dc2b1 4487 nottaken=out;
57871462 4488 emit_jl(2);
4489 }
4490 } // if(!unconditional)
4491 int adj;
4492 uint64_t ds_unneeded=branch_regs[i].u;
57871462 4493 ds_unneeded&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
57871462 4494 ds_unneeded|=1;
57871462 4495 // branch taken
4496 if(!nop) {
df4dc2b1 4497 if(taken) set_jump_target(taken, out);
57871462 4498 assem_debug("1:\n");
00fa9369 4499 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,ds_unneeded);
57871462 4500 // load regs
4501 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i+1],rs2[i+1]);
4502 address_generation(i+1,&branch_regs[i],0);
4503 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,INVCP);
4504 ds_assemble(i+1,&branch_regs[i]);
4505 cc=get_reg(branch_regs[i].regmap,CCREG);
4506 if(cc==-1) {
4507 emit_loadreg(CCREG,cc=HOST_CCREG);
4508 // CHECK: Is the following instruction (fall thru) allocated ok?
4509 }
4510 assert(cc==HOST_CCREG);
4511 store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
4512 do_cc(i,i_regmap,&adj,ba[i],TAKEN,0);
4513 assem_debug("cycle count (adj)\n");
2573466a 4514 if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc);
57871462 4515 load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
4516 if(internal)
4517 assem_debug("branch: internal\n");
4518 else
4519 assem_debug("branch: external\n");
4520 if(internal&&is_ds[(ba[i]-start)>>2]) {
4521 ds_assemble_entry(i);
4522 }
4523 else {
643aeae3 4524 add_to_linker(out,ba[i],internal);
57871462 4525 emit_jmp(0);
4526 }
4527 }
4528 // branch not taken
57871462 4529 if(!unconditional) {
df4dc2b1 4530 if(nottaken1) set_jump_target(nottaken1, out);
4531 set_jump_target(nottaken, out);
57871462 4532 assem_debug("2:\n");
4533 if(!likely[i]) {
00fa9369 4534 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,ds_unneeded);
57871462 4535 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i+1],rs2[i+1]);
4536 address_generation(i+1,&branch_regs[i],0);
4537 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG);
4538 ds_assemble(i+1,&branch_regs[i]);
4539 }
4540 cc=get_reg(branch_regs[i].regmap,CCREG);
4541 if(cc==-1&&!likely[i]) {
4542 // Cycle count isn't in a register, temporarily load it then write it out
4543 emit_loadreg(CCREG,HOST_CCREG);
2573466a 4544 emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG);
b14b6a8f 4545 void *jaddr=out;
57871462 4546 emit_jns(0);
b14b6a8f 4547 add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0);
57871462 4548 emit_storereg(CCREG,HOST_CCREG);
4549 }
4550 else{
4551 cc=get_reg(i_regmap,CCREG);
4552 assert(cc==HOST_CCREG);
2573466a 4553 emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc);
b14b6a8f 4554 void *jaddr=out;
57871462 4555 emit_jns(0);
b14b6a8f 4556 add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0);
57871462 4557 }
4558 }
4559 }
4560}
4561
4562void sjump_assemble(int i,struct regstat *i_regs)
4563{
4564 signed char *i_regmap=i_regs->regmap;
4565 int cc;
4566 int match;
4567 match=match_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
4568 assem_debug("smatch=%d\n",match);
4569 int s1h,s1l;
57871462 4570 int unconditional=0,nevertaken=0;
4571 int only32=0;
57871462 4572 int invert=0;
4573 int internal=internal_branch(branch_regs[i].is32,ba[i]);
4574 if(i==(ba[i]-start)>>2) assem_debug("idle loop\n");
57871462 4575 if(!match) invert=1;
4576 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4577 if(i>(ba[i]-start)>>2) invert=1;
4578 #endif
4579
4580 //if(opcode2[i]>=0x10) return; // FIXME (BxxZAL)
df894a3a 4581 //assert(opcode2[i]<0x10||rs1[i]==0); // FIXME (BxxZAL)
57871462 4582
e1190b87 4583 if(ooo[i]) {
57871462 4584 s1l=get_reg(branch_regs[i].regmap,rs1[i]);
4585 s1h=get_reg(branch_regs[i].regmap,rs1[i]|64);
4586 }
4587 else {
4588 s1l=get_reg(i_regmap,rs1[i]);
4589 s1h=get_reg(i_regmap,rs1[i]|64);
4590 }
4591 if(rs1[i]==0)
4592 {
4593 if(opcode2[i]&1) unconditional=1;
4594 else nevertaken=1;
4595 // These are never taken (r0 is never less than zero)
4596 //assert(opcode2[i]!=0);
4597 //assert(opcode2[i]!=2);
4598 //assert(opcode2[i]!=0x10);
4599 //assert(opcode2[i]!=0x12);
4600 }
4601 else {
4602 only32=(regs[i].was32>>rs1[i])&1;
4603 }
4604
e1190b87 4605 if(ooo[i]) {
57871462 4606 // Out of order execution (delay slot first)
4607 //printf("OOOE\n");
4608 address_generation(i+1,i_regs,regs[i].regmap_entry);
4609 ds_assemble(i+1,i_regs);
4610 int adj;
4611 uint64_t bc_unneeded=branch_regs[i].u;
57871462 4612 bc_unneeded&=~((1LL<<rs1[i])|(1LL<<rs2[i]));
57871462 4613 bc_unneeded|=1;
00fa9369 4614 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,bc_unneeded);
57871462 4615 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i],rs1[i]);
4616 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG);
4617 if(rt1[i]==31) {
4618 int rt,return_address;
57871462 4619 rt=get_reg(branch_regs[i].regmap,31);
4620 assem_debug("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
4621 if(rt>=0) {
4622 // Save the PC even if the branch is not taken
4623 return_address=start+i*4+8;
4624 emit_movimm(return_address,rt); // PC into link register
4625 #ifdef IMM_PREFETCH
df4dc2b1 4626 if(!nevertaken) emit_prefetch(hash_table_get(return_address));
57871462 4627 #endif
4628 }
4629 }
4630 cc=get_reg(branch_regs[i].regmap,CCREG);
4631 assert(cc==HOST_CCREG);
9f51b4b9 4632 if(unconditional)
57871462 4633 store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
4634 //do_cc(i,branch_regs[i].regmap,&adj,unconditional?ba[i]:-1,unconditional);
4635 assem_debug("cycle count (adj)\n");
4636 if(unconditional) {
4637 do_cc(i,branch_regs[i].regmap,&adj,ba[i],TAKEN,0);
4638 if(i!=(ba[i]-start)>>2 || source[i+1]!=0) {
2573466a 4639 if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc);
57871462 4640 load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
4641 if(internal)
4642 assem_debug("branch: internal\n");
4643 else
4644 assem_debug("branch: external\n");
4645 if(internal&&is_ds[(ba[i]-start)>>2]) {
4646 ds_assemble_entry(i);
4647 }
4648 else {
643aeae3 4649 add_to_linker(out,ba[i],internal);
57871462 4650 emit_jmp(0);
4651 }
4652 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4653 if(((u_int)out)&7) emit_addnop(0);
4654 #endif
4655 }
4656 }
4657 else if(nevertaken) {
2573466a 4658 emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc);
b14b6a8f 4659 void *jaddr=out;
57871462 4660 emit_jns(0);
b14b6a8f 4661 add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0);
57871462 4662 }
4663 else {
df4dc2b1 4664 void *nottaken = NULL;
57871462 4665 do_cc(i,branch_regs[i].regmap,&adj,-1,0,invert);
2573466a 4666 if(adj&&!invert) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc);
57871462 4667 if(!only32)
4668 {
4669 assert(s1h>=0);
df894a3a 4670 if((opcode2[i]&0xf)==0) // BLTZ/BLTZAL
57871462 4671 {
4672 emit_test(s1h,s1h);
4673 if(invert){
df4dc2b1 4674 nottaken=out;
57871462 4675 emit_jns(1);
4676 }else{
643aeae3 4677 add_to_linker(out,ba[i],internal);
57871462 4678 emit_js(0);
4679 }
4680 }
df894a3a 4681 if((opcode2[i]&0xf)==1) // BGEZ/BLTZAL
57871462 4682 {
4683 emit_test(s1h,s1h);
4684 if(invert){
df4dc2b1 4685 nottaken=out;
57871462 4686 emit_js(1);
4687 }else{
643aeae3 4688 add_to_linker(out,ba[i],internal);
57871462 4689 emit_jns(0);
4690 }
4691 }
4692 } // if(!only32)
4693 else
4694 {
4695 assert(s1l>=0);
df894a3a 4696 if((opcode2[i]&0xf)==0) // BLTZ/BLTZAL
57871462 4697 {
4698 emit_test(s1l,s1l);
4699 if(invert){
df4dc2b1 4700 nottaken=out;
57871462 4701 emit_jns(1);
4702 }else{
643aeae3 4703 add_to_linker(out,ba[i],internal);
57871462 4704 emit_js(0);
4705 }
4706 }
df894a3a 4707 if((opcode2[i]&0xf)==1) // BGEZ/BLTZAL
57871462 4708 {
4709 emit_test(s1l,s1l);
4710 if(invert){
df4dc2b1 4711 nottaken=out;
57871462 4712 emit_js(1);
4713 }else{
643aeae3 4714 add_to_linker(out,ba[i],internal);
57871462 4715 emit_jns(0);
4716 }
4717 }
4718 } // if(!only32)
9f51b4b9 4719
57871462 4720 if(invert) {
4721 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4722 if(match&&(!internal||!is_ds[(ba[i]-start)>>2])) {
4723 if(adj) {
2573466a 4724 emit_addimm(cc,-CLOCK_ADJUST(adj),cc);
643aeae3 4725 add_to_linker(out,ba[i],internal);
57871462 4726 }else{
4727 emit_addnop(13);
643aeae3 4728 add_to_linker(out,ba[i],internal*2);
57871462 4729 }
4730 emit_jmp(0);
4731 }else
4732 #endif
4733 {
2573466a 4734 if(adj) emit_addimm(cc,-CLOCK_ADJUST(adj),cc);
57871462 4735 store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
4736 load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
4737 if(internal)
4738 assem_debug("branch: internal\n");
4739 else
4740 assem_debug("branch: external\n");
4741 if(internal&&is_ds[(ba[i]-start)>>2]) {
4742 ds_assemble_entry(i);
4743 }
4744 else {
643aeae3 4745 add_to_linker(out,ba[i],internal);
57871462 4746 emit_jmp(0);
4747 }
4748 }
df4dc2b1 4749 set_jump_target(nottaken, out);
57871462 4750 }
4751
4752 if(adj) {
2573466a 4753 if(!invert) emit_addimm(cc,CLOCK_ADJUST(adj),cc);
57871462 4754 }
4755 } // (!unconditional)
4756 } // if(ooo)
4757 else
4758 {
4759 // In-order execution (branch first)
4760 //printf("IOE\n");
df4dc2b1 4761 void *nottaken = NULL;
a6491170 4762 if(rt1[i]==31) {
4763 int rt,return_address;
a6491170 4764 rt=get_reg(branch_regs[i].regmap,31);
4765 if(rt>=0) {
4766 // Save the PC even if the branch is not taken
4767 return_address=start+i*4+8;
4768 emit_movimm(return_address,rt); // PC into link register
4769 #ifdef IMM_PREFETCH
df4dc2b1 4770 emit_prefetch(hash_table_get(return_address));
a6491170 4771 #endif
4772 }
4773 }
57871462 4774 if(!unconditional) {
4775 //printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
4776 if(!only32)
4777 {
4778 assert(s1h>=0);
a6491170 4779 if((opcode2[i]&0x0d)==0) // BLTZ/BLTZL/BLTZAL/BLTZALL
57871462 4780 {
4781 emit_test(s1h,s1h);
df4dc2b1 4782 nottaken=out;
57871462 4783 emit_jns(1);
4784 }
a6491170 4785 if((opcode2[i]&0x0d)==1) // BGEZ/BGEZL/BGEZAL/BGEZALL
57871462 4786 {
4787 emit_test(s1h,s1h);
df4dc2b1 4788 nottaken=out;
57871462 4789 emit_js(1);
4790 }
4791 } // if(!only32)
4792 else
4793 {
4794 assert(s1l>=0);
a6491170 4795 if((opcode2[i]&0x0d)==0) // BLTZ/BLTZL/BLTZAL/BLTZALL
57871462 4796 {
4797 emit_test(s1l,s1l);
df4dc2b1 4798 nottaken=out;
57871462 4799 emit_jns(1);
4800 }
a6491170 4801 if((opcode2[i]&0x0d)==1) // BGEZ/BGEZL/BGEZAL/BGEZALL
57871462 4802 {
4803 emit_test(s1l,s1l);
df4dc2b1 4804 nottaken=out;
57871462 4805 emit_js(1);
4806 }
4807 }
4808 } // if(!unconditional)
4809 int adj;
4810 uint64_t ds_unneeded=branch_regs[i].u;
57871462 4811 ds_unneeded&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
57871462 4812 ds_unneeded|=1;
57871462 4813 // branch taken
4814 if(!nevertaken) {
4815 //assem_debug("1:\n");
00fa9369 4816 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,ds_unneeded);
57871462 4817 // load regs
4818 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i+1],rs2[i+1]);
4819 address_generation(i+1,&branch_regs[i],0);
4820 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,INVCP);
4821 ds_assemble(i+1,&branch_regs[i]);
4822 cc=get_reg(branch_regs[i].regmap,CCREG);
4823 if(cc==-1) {
4824 emit_loadreg(CCREG,cc=HOST_CCREG);
4825 // CHECK: Is the following instruction (fall thru) allocated ok?
4826 }
4827 assert(cc==HOST_CCREG);
4828 store_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
4829 do_cc(i,i_regmap,&adj,ba[i],TAKEN,0);
4830 assem_debug("cycle count (adj)\n");
2573466a 4831 if(adj) emit_addimm(cc,CLOCK_ADJUST(ccadj[i]+2-adj),cc);
57871462 4832 load_regs_bt(branch_regs[i].regmap,branch_regs[i].is32,branch_regs[i].dirty,ba[i]);
4833 if(internal)
4834 assem_debug("branch: internal\n");
4835 else
4836 assem_debug("branch: external\n");
4837 if(internal&&is_ds[(ba[i]-start)>>2]) {
4838 ds_assemble_entry(i);
4839 }
4840 else {
643aeae3 4841 add_to_linker(out,ba[i],internal);
57871462 4842 emit_jmp(0);
4843 }
4844 }
4845 // branch not taken
57871462 4846 if(!unconditional) {
df4dc2b1 4847 set_jump_target(nottaken, out);
57871462 4848 assem_debug("1:\n");
4849 if(!likely[i]) {
00fa9369 4850 wb_invalidate(regs[i].regmap,branch_regs[i].regmap,regs[i].dirty,regs[i].is32,ds_unneeded);
57871462 4851 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,rs1[i+1],rs2[i+1]);
4852 address_generation(i+1,&branch_regs[i],0);
4853 load_regs(regs[i].regmap,branch_regs[i].regmap,regs[i].was32,CCREG,CCREG);
4854 ds_assemble(i+1,&branch_regs[i]);
4855 }
4856 cc=get_reg(branch_regs[i].regmap,CCREG);
4857 if(cc==-1&&!likely[i]) {
4858 // Cycle count isn't in a register, temporarily load it then write it out
4859 emit_loadreg(CCREG,HOST_CCREG);
2573466a 4860 emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG);
b14b6a8f 4861 void *jaddr=out;
57871462 4862 emit_jns(0);
b14b6a8f 4863 add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,NOTTAKEN,0);
57871462 4864 emit_storereg(CCREG,HOST_CCREG);
4865 }
4866 else{
4867 cc=get_reg(i_regmap,CCREG);
4868 assert(cc==HOST_CCREG);
2573466a 4869 emit_addimm_and_set_flags(CLOCK_ADJUST(ccadj[i]+2),cc);
b14b6a8f 4870 void *jaddr=out;
57871462 4871 emit_jns(0);
b14b6a8f 4872 add_stub(CC_STUB,jaddr,out,0,i,start+i*4+8,likely[i]?NULLDS:NOTTAKEN,0);
57871462 4873 }
4874 }
4875 }
4876}
4877
4878static void pagespan_assemble(int i,struct regstat *i_regs)
4879{
4880 int s1l=get_reg(i_regs->regmap,rs1[i]);
4881 int s1h=get_reg(i_regs->regmap,rs1[i]|64);
4882 int s2l=get_reg(i_regs->regmap,rs2[i]);
4883 int s2h=get_reg(i_regs->regmap,rs2[i]|64);
df4dc2b1 4884 void *taken = NULL;
4885 void *nottaken = NULL;
57871462 4886 int unconditional=0;
4887 if(rs1[i]==0)
4888 {
4889 s1l=s2l;s1h=s2h;
4890 s2l=s2h=-1;
4891 }
4892 else if(rs2[i]==0)
4893 {
4894 s2l=s2h=-1;
4895 }
4896 if((i_regs->is32>>rs1[i])&(i_regs->is32>>rs2[i])&1) {
4897 s1h=s2h=-1;
4898 }
4899 int hr=0;
581335b0 4900 int addr=-1,alt=-1,ntaddr=-1;
57871462 4901 if(i_regs->regmap[HOST_BTREG]<0) {addr=HOST_BTREG;}
4902 else {
4903 while(hr<HOST_REGS)
4904 {
4905 if(hr!=EXCLUDE_REG && hr!=HOST_CCREG &&
4906 (i_regs->regmap[hr]&63)!=rs1[i] &&
4907 (i_regs->regmap[hr]&63)!=rs2[i] )
4908 {
4909 addr=hr++;break;
4910 }
4911 hr++;
4912 }
4913 }
4914 while(hr<HOST_REGS)
4915 {
4916 if(hr!=EXCLUDE_REG && hr!=HOST_CCREG && hr!=HOST_BTREG &&
4917 (i_regs->regmap[hr]&63)!=rs1[i] &&
4918 (i_regs->regmap[hr]&63)!=rs2[i] )
4919 {
4920 alt=hr++;break;
4921 }
4922 hr++;
4923 }
4924 if((opcode[i]&0x2E)==6) // BLEZ/BGTZ needs another register
4925 {
4926 while(hr<HOST_REGS)
4927 {
4928 if(hr!=EXCLUDE_REG && hr!=HOST_CCREG && hr!=HOST_BTREG &&
4929 (i_regs->regmap[hr]&63)!=rs1[i] &&
4930 (i_regs->regmap[hr]&63)!=rs2[i] )
4931 {
4932 ntaddr=hr;break;
4933 }
4934 hr++;
4935 }
4936 }
4937 assert(hr<HOST_REGS);
4938 if((opcode[i]&0x2e)==4||opcode[i]==0x11) { // BEQ/BNE/BEQL/BNEL/BC1
4939 load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,CCREG,CCREG);
4940 }
2573466a 4941 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]+2),HOST_CCREG);
57871462 4942 if(opcode[i]==2) // J
4943 {
4944 unconditional=1;
4945 }
4946 if(opcode[i]==3) // JAL
4947 {
4948 // TODO: mini_ht
4949 int rt=get_reg(i_regs->regmap,31);
4950 emit_movimm(start+i*4+8,rt);
4951 unconditional=1;
4952 }
4953 if(opcode[i]==0&&(opcode2[i]&0x3E)==8) // JR/JALR
4954 {
4955 emit_mov(s1l,addr);
4956 if(opcode2[i]==9) // JALR
4957 {
5067f341 4958 int rt=get_reg(i_regs->regmap,rt1[i]);
57871462 4959 emit_movimm(start+i*4+8,rt);
4960 }
4961 }
4962 if((opcode[i]&0x3f)==4) // BEQ
4963 {
4964 if(rs1[i]==rs2[i])
4965 {
4966 unconditional=1;
4967 }
4968 else
4969 #ifdef HAVE_CMOV_IMM
4970 if(s1h<0) {
4971 if(s2l>=0) emit_cmp(s1l,s2l);
4972 else emit_test(s1l,s1l);
4973 emit_cmov2imm_e_ne_compact(ba[i],start+i*4+8,addr);
4974 }
4975 else
4976 #endif
4977 {
4978 assert(s1l>=0);
4979 emit_mov2imm_compact(ba[i],addr,start+i*4+8,alt);
4980 if(s1h>=0) {
4981 if(s2h>=0) emit_cmp(s1h,s2h);
4982 else emit_test(s1h,s1h);
4983 emit_cmovne_reg(alt,addr);
4984 }
4985 if(s2l>=0) emit_cmp(s1l,s2l);
4986 else emit_test(s1l,s1l);
4987 emit_cmovne_reg(alt,addr);
4988 }
4989 }
4990 if((opcode[i]&0x3f)==5) // BNE
4991 {
4992 #ifdef HAVE_CMOV_IMM
4993 if(s1h<0) {
4994 if(s2l>=0) emit_cmp(s1l,s2l);
4995 else emit_test(s1l,s1l);
4996 emit_cmov2imm_e_ne_compact(start+i*4+8,ba[i],addr);
4997 }
4998 else
4999 #endif
5000 {
5001 assert(s1l>=0);
5002 emit_mov2imm_compact(start+i*4+8,addr,ba[i],alt);
5003 if(s1h>=0) {
5004 if(s2h>=0) emit_cmp(s1h,s2h);
5005 else emit_test(s1h,s1h);
5006 emit_cmovne_reg(alt,addr);
5007 }
5008 if(s2l>=0) emit_cmp(s1l,s2l);
5009 else emit_test(s1l,s1l);
5010 emit_cmovne_reg(alt,addr);
5011 }
5012 }
5013 if((opcode[i]&0x3f)==0x14) // BEQL
5014 {
5015 if(s1h>=0) {
5016 if(s2h>=0) emit_cmp(s1h,s2h);
5017 else emit_test(s1h,s1h);
df4dc2b1 5018 nottaken=out;
57871462 5019 emit_jne(0);
5020 }
5021 if(s2l>=0) emit_cmp(s1l,s2l);
5022 else emit_test(s1l,s1l);
df4dc2b1 5023 if(nottaken) set_jump_target(nottaken, out);
5024 nottaken=out;
57871462 5025 emit_jne(0);
5026 }
5027 if((opcode[i]&0x3f)==0x15) // BNEL
5028 {
5029 if(s1h>=0) {
5030 if(s2h>=0) emit_cmp(s1h,s2h);
5031 else emit_test(s1h,s1h);
df4dc2b1 5032 taken=out;
57871462 5033 emit_jne(0);
5034 }
5035 if(s2l>=0) emit_cmp(s1l,s2l);
5036 else emit_test(s1l,s1l);
df4dc2b1 5037 nottaken=out;
57871462 5038 emit_jeq(0);
df4dc2b1 5039 if(taken) set_jump_target(taken, out);
57871462 5040 }
5041 if((opcode[i]&0x3f)==6) // BLEZ
5042 {
5043 emit_mov2imm_compact(ba[i],alt,start+i*4+8,addr);
5044 emit_cmpimm(s1l,1);
5045 if(s1h>=0) emit_mov(addr,ntaddr);
5046 emit_cmovl_reg(alt,addr);
5047 if(s1h>=0) {
5048 emit_test(s1h,s1h);
5049 emit_cmovne_reg(ntaddr,addr);
5050 emit_cmovs_reg(alt,addr);
5051 }
5052 }
5053 if((opcode[i]&0x3f)==7) // BGTZ
5054 {
5055 emit_mov2imm_compact(ba[i],addr,start+i*4+8,ntaddr);
5056 emit_cmpimm(s1l,1);
5057 if(s1h>=0) emit_mov(addr,alt);
5058 emit_cmovl_reg(ntaddr,addr);
5059 if(s1h>=0) {
5060 emit_test(s1h,s1h);
5061 emit_cmovne_reg(alt,addr);
5062 emit_cmovs_reg(ntaddr,addr);
5063 }
5064 }
5065 if((opcode[i]&0x3f)==0x16) // BLEZL
5066 {
5067 assert((opcode[i]&0x3f)!=0x16);
5068 }
5069 if((opcode[i]&0x3f)==0x17) // BGTZL
5070 {
5071 assert((opcode[i]&0x3f)!=0x17);
5072 }
5073 assert(opcode[i]!=1); // BLTZ/BGEZ
5074
5075 //FIXME: Check CSREG
5076 if(opcode[i]==0x11 && opcode2[i]==0x08 ) {
5077 if((source[i]&0x30000)==0) // BC1F
5078 {
5079 emit_mov2imm_compact(ba[i],addr,start+i*4+8,alt);
5080 emit_testimm(s1l,0x800000);
5081 emit_cmovne_reg(alt,addr);
5082 }
5083 if((source[i]&0x30000)==0x10000) // BC1T
5084 {
5085 emit_mov2imm_compact(ba[i],alt,start+i*4+8,addr);
5086 emit_testimm(s1l,0x800000);
5087 emit_cmovne_reg(alt,addr);
5088 }
5089 if((source[i]&0x30000)==0x20000) // BC1FL
5090 {
5091 emit_testimm(s1l,0x800000);
df4dc2b1 5092 nottaken=out;
57871462 5093 emit_jne(0);
5094 }
5095 if((source[i]&0x30000)==0x30000) // BC1TL
5096 {
5097 emit_testimm(s1l,0x800000);
df4dc2b1 5098 nottaken=out;
57871462 5099 emit_jeq(0);
5100 }
5101 }
5102
5103 assert(i_regs->regmap[HOST_CCREG]==CCREG);
5104 wb_dirtys(regs[i].regmap,regs[i].is32,regs[i].dirty);
5105 if(likely[i]||unconditional)
5106 {
5107 emit_movimm(ba[i],HOST_BTREG);
5108 }
5109 else if(addr!=HOST_BTREG)
5110 {
5111 emit_mov(addr,HOST_BTREG);
5112 }
5113 void *branch_addr=out;
5114 emit_jmp(0);
5115 int target_addr=start+i*4+5;
5116 void *stub=out;
5117 void *compiled_target_addr=check_addr(target_addr);
643aeae3 5118 emit_extjump_ds(branch_addr, target_addr);
57871462 5119 if(compiled_target_addr) {
df4dc2b1 5120 set_jump_target(branch_addr, compiled_target_addr);
57871462 5121 add_link(target_addr,stub);
5122 }
df4dc2b1 5123 else set_jump_target(branch_addr, stub);
57871462 5124 if(likely[i]) {
5125 // Not-taken path
df4dc2b1 5126 set_jump_target(nottaken, out);
57871462 5127 wb_dirtys(regs[i].regmap,regs[i].is32,regs[i].dirty);
5128 void *branch_addr=out;
5129 emit_jmp(0);
5130 int target_addr=start+i*4+8;
5131 void *stub=out;
5132 void *compiled_target_addr=check_addr(target_addr);
643aeae3 5133 emit_extjump_ds(branch_addr, target_addr);
57871462 5134 if(compiled_target_addr) {
df4dc2b1 5135 set_jump_target(branch_addr, compiled_target_addr);
57871462 5136 add_link(target_addr,stub);
5137 }
df4dc2b1 5138 else set_jump_target(branch_addr, stub);
57871462 5139 }
5140}
5141
5142// Assemble the delay slot for the above
5143static void pagespan_ds()
5144{
5145 assem_debug("initial delay slot:\n");
5146 u_int vaddr=start+1;
94d23bb9 5147 u_int page=get_page(vaddr);
5148 u_int vpage=get_vpage(vaddr);
57871462 5149 ll_add(jump_dirty+vpage,vaddr,(void *)out);
5150 do_dirty_stub_ds();
5151 ll_add(jump_in+page,vaddr,(void *)out);
5152 assert(regs[0].regmap_entry[HOST_CCREG]==CCREG);
5153 if(regs[0].regmap[HOST_CCREG]!=CCREG)
5154 wb_register(CCREG,regs[0].regmap_entry,regs[0].wasdirty,regs[0].was32);
5155 if(regs[0].regmap[HOST_BTREG]!=BTREG)
643aeae3 5156 emit_writeword(HOST_BTREG,&branch_target);
57871462 5157 load_regs(regs[0].regmap_entry,regs[0].regmap,regs[0].was32,rs1[0],rs2[0]);
5158 address_generation(0,&regs[0],regs[0].regmap_entry);
b9b61529 5159 if(itype[0]==STORE||itype[0]==STORELR||(opcode[0]&0x3b)==0x39||(opcode[0]&0x3b)==0x3a)
57871462 5160 load_regs(regs[0].regmap_entry,regs[0].regmap,regs[0].was32,INVCP,INVCP);
57871462 5161 is_delayslot=0;
5162 switch(itype[0]) {
5163 case ALU:
5164 alu_assemble(0,&regs[0]);break;
5165 case IMM16:
5166 imm16_assemble(0,&regs[0]);break;
5167 case SHIFT:
5168 shift_assemble(0,&regs[0]);break;
5169 case SHIFTIMM:
5170 shiftimm_assemble(0,&regs[0]);break;
5171 case LOAD:
5172 load_assemble(0,&regs[0]);break;
5173 case LOADLR:
5174 loadlr_assemble(0,&regs[0]);break;
5175 case STORE:
5176 store_assemble(0,&regs[0]);break;
5177 case STORELR:
5178 storelr_assemble(0,&regs[0]);break;
5179 case COP0:
5180 cop0_assemble(0,&regs[0]);break;
5181 case COP1:
5182 cop1_assemble(0,&regs[0]);break;
5183 case C1LS:
5184 c1ls_assemble(0,&regs[0]);break;
b9b61529 5185 case COP2:
5186 cop2_assemble(0,&regs[0]);break;
5187 case C2LS:
5188 c2ls_assemble(0,&regs[0]);break;
5189 case C2OP:
5190 c2op_assemble(0,&regs[0]);break;
57871462 5191 case MULTDIV:
5192 multdiv_assemble(0,&regs[0]);break;
5193 case MOV:
5194 mov_assemble(0,&regs[0]);break;
5195 case SYSCALL:
7139f3c8 5196 case HLECALL:
1e973cb0 5197 case INTCALL:
57871462 5198 case SPAN:
5199 case UJUMP:
5200 case RJUMP:
5201 case CJUMP:
5202 case SJUMP:
5203 case FJUMP:
c43b5311 5204 SysPrintf("Jump in the delay slot. This is probably a bug.\n");
57871462 5205 }
5206 int btaddr=get_reg(regs[0].regmap,BTREG);
5207 if(btaddr<0) {
5208 btaddr=get_reg(regs[0].regmap,-1);
643aeae3 5209 emit_readword(&branch_target,btaddr);
57871462 5210 }
5211 assert(btaddr!=HOST_CCREG);
5212 if(regs[0].regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
5213#ifdef HOST_IMM8
5214 emit_movimm(start+4,HOST_TEMPREG);
5215 emit_cmp(btaddr,HOST_TEMPREG);
5216#else
5217 emit_cmpimm(btaddr,start+4);
5218#endif
df4dc2b1 5219 void *branch = out;
57871462 5220 emit_jeq(0);
5221 store_regs_bt(regs[0].regmap,regs[0].is32,regs[0].dirty,-1);
5222 emit_jmp(jump_vaddr_reg[btaddr]);
df4dc2b1 5223 set_jump_target(branch, out);
57871462 5224 store_regs_bt(regs[0].regmap,regs[0].is32,regs[0].dirty,start+4);
5225 load_regs_bt(regs[0].regmap,regs[0].is32,regs[0].dirty,start+4);
5226}
5227
5228// Basic liveness analysis for MIPS registers
5229void unneeded_registers(int istart,int iend,int r)
5230{
5231 int i;
00fa9369 5232 uint64_t u,gte_u,b,gte_b;
5233 uint64_t temp_u,temp_gte_u=0;
0ff8c62c 5234 uint64_t gte_u_unknown=0;
5235 if(new_dynarec_hacks&NDHACK_GTE_UNNEEDED)
5236 gte_u_unknown=~0ll;
57871462 5237 if(iend==slen-1) {
00fa9369 5238 u=1;
0ff8c62c 5239 gte_u=gte_u_unknown;
57871462 5240 }else{
00fa9369 5241 //u=unneeded_reg[iend+1];
5242 u=1;
0ff8c62c 5243 gte_u=gte_unneeded[iend+1];
57871462 5244 }
bedfea38 5245
57871462 5246 for (i=iend;i>=istart;i--)
5247 {
5248 //printf("unneeded registers i=%d (%d,%d) r=%d\n",i,istart,iend,r);
5249 if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
5250 {
5251 // If subroutine call, flag return address as a possible branch target
5252 if(rt1[i]==31 && i<slen-2) bt[i+2]=1;
9f51b4b9 5253
57871462 5254 if(ba[i]<start || ba[i]>=(start+slen*4))
5255 {
5256 // Branch out of this block, flush all regs
5257 u=1;
0ff8c62c 5258 gte_u=gte_u_unknown;
57871462 5259 branch_unneeded_reg[i]=u;
57871462 5260 // Merge in delay slot
57871462 5261 u|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
57871462 5262 u&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
00fa9369 5263 u|=1;
bedfea38 5264 gte_u|=gte_rt[i+1];
5265 gte_u&=~gte_rs[i+1];
57871462 5266 // If branch is "likely" (and conditional)
5267 // then we skip the delay slot on the fall-thru path
5268 if(likely[i]) {
5269 if(i<slen-1) {
5270 u&=unneeded_reg[i+2];
bedfea38 5271 gte_u&=gte_unneeded[i+2];
57871462 5272 }
5273 else
5274 {
5275 u=1;
0ff8c62c 5276 gte_u=gte_u_unknown;
57871462 5277 }
5278 }
5279 }
5280 else
5281 {
5282 // Internal branch, flag target
5283 bt[(ba[i]-start)>>2]=1;
5284 if(ba[i]<=start+i*4) {
5285 // Backward branch
5286 if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000)
5287 {
5288 // Unconditional branch
00fa9369 5289 temp_u=1;
bedfea38 5290 temp_gte_u=0;
57871462 5291 } else {
5292 // Conditional branch (not taken case)
5293 temp_u=unneeded_reg[i+2];
bedfea38 5294 temp_gte_u&=gte_unneeded[i+2];
57871462 5295 }
5296 // Merge in delay slot
57871462 5297 temp_u|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
57871462 5298 temp_u&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
00fa9369 5299 temp_u|=1;
bedfea38 5300 temp_gte_u|=gte_rt[i+1];
5301 temp_gte_u&=~gte_rs[i+1];
57871462 5302 // If branch is "likely" (and conditional)
5303 // then we skip the delay slot on the fall-thru path
5304 if(likely[i]) {
5305 if(i<slen-1) {
5306 temp_u&=unneeded_reg[i+2];
bedfea38 5307 temp_gte_u&=gte_unneeded[i+2];
57871462 5308 }
5309 else
5310 {
5311 temp_u=1;
0ff8c62c 5312 temp_gte_u=gte_u_unknown;
57871462 5313 }
5314 }
57871462 5315 temp_u|=(1LL<<rt1[i])|(1LL<<rt2[i]);
57871462 5316 temp_u&=~((1LL<<rs1[i])|(1LL<<rs2[i]));
00fa9369 5317 temp_u|=1;
bedfea38 5318 temp_gte_u|=gte_rt[i];
5319 temp_gte_u&=~gte_rs[i];
57871462 5320 unneeded_reg[i]=temp_u;
bedfea38 5321 gte_unneeded[i]=temp_gte_u;
57871462 5322 // Only go three levels deep. This recursion can take an
5323 // excessive amount of time if there are a lot of nested loops.
5324 if(r<2) {
5325 unneeded_registers((ba[i]-start)>>2,i-1,r+1);
5326 }else{
5327 unneeded_reg[(ba[i]-start)>>2]=1;
0ff8c62c 5328 gte_unneeded[(ba[i]-start)>>2]=gte_u_unknown;
57871462 5329 }
5330 } /*else*/ if(1) {
5331 if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000)
5332 {
5333 // Unconditional branch
5334 u=unneeded_reg[(ba[i]-start)>>2];
bedfea38 5335 gte_u=gte_unneeded[(ba[i]-start)>>2];
57871462 5336 branch_unneeded_reg[i]=u;
57871462 5337 // Merge in delay slot
57871462 5338 u|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
57871462 5339 u&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
00fa9369 5340 u|=1;
bedfea38 5341 gte_u|=gte_rt[i+1];
5342 gte_u&=~gte_rs[i+1];
57871462 5343 } else {
5344 // Conditional branch
5345 b=unneeded_reg[(ba[i]-start)>>2];
00fa9369 5346 gte_b=gte_unneeded[(ba[i]-start)>>2];
57871462 5347 branch_unneeded_reg[i]=b;
57871462 5348 // Branch delay slot
57871462 5349 b|=(1LL<<rt1[i+1])|(1LL<<rt2[i+1]);
57871462 5350 b&=~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
00fa9369 5351 b|=1;
5352 gte_b|=gte_rt[i+1];
5353 gte_b&=~gte_rs[i+1];
57871462 5354 // If branch is "likely" then we skip the
5355 // delay slot on the fall-thru path
5356 if(likely[i]) {
5357 u=b;
00fa9369 5358 gte_u=gte_b;
57871462 5359 if(i<slen-1) {
5360 u&=unneeded_reg[i+2];
bedfea38 5361 gte_u&=gte_unneeded[i+2];
57871462 5362 }
5363 } else {
5364 u&=b;
00fa9369 5365 gte_u&=gte_b;
57871462 5366 }
5367 if(i<slen-1) {
5368 branch_unneeded_reg[i]&=unneeded_reg[i+2];
57871462 5369 } else {
5370 branch_unneeded_reg[i]=1;
57871462 5371 }
5372 }
5373 }
5374 }
5375 }
1e973cb0 5376 else if(itype[i]==SYSCALL||itype[i]==HLECALL||itype[i]==INTCALL)
57871462 5377 {
5378 // SYSCALL instruction (software interrupt)
5379 u=1;
57871462 5380 }
5381 else if(itype[i]==COP0 && (source[i]&0x3f)==0x18)
5382 {
5383 // ERET instruction (return from interrupt)
5384 u=1;
57871462 5385 }
00fa9369 5386 //u=1; // DEBUG
57871462 5387 // Written registers are unneeded
5388 u|=1LL<<rt1[i];
5389 u|=1LL<<rt2[i];
bedfea38 5390 gte_u|=gte_rt[i];
57871462 5391 // Accessed registers are needed
5392 u&=~(1LL<<rs1[i]);
5393 u&=~(1LL<<rs2[i]);
bedfea38 5394 gte_u&=~gte_rs[i];
eaa11918 5395 if(gte_rs[i]&&rt1[i]&&(unneeded_reg[i+1]&(1ll<<rt1[i])))
cbbd8dd7 5396 gte_u|=gte_rs[i]&gte_unneeded[i+1]; // MFC2/CFC2 to dead register, unneeded
57871462 5397 // Source-target dependencies
57871462 5398 // R0 is always unneeded
00fa9369 5399 u|=1;
57871462 5400 // Save it
5401 unneeded_reg[i]=u;
bedfea38 5402 gte_unneeded[i]=gte_u;
57871462 5403 /*
5404 printf("ur (%d,%d) %x: ",istart,iend,start+i*4);
5405 printf("U:");
5406 int r;
5407 for(r=1;r<=CCREG;r++) {
5408 if((unneeded_reg[i]>>r)&1) {
5409 if(r==HIREG) printf(" HI");
5410 else if(r==LOREG) printf(" LO");
5411 else printf(" r%d",r);
5412 }
5413 }
00fa9369 5414 printf("\n");
5415 */
252c20fc 5416 }
57871462 5417}
5418
71e490c5 5419// Write back dirty registers as soon as we will no longer modify them,
5420// so that we don't end up with lots of writes at the branches.
5421void clean_registers(int istart,int iend,int wr)
57871462 5422{
71e490c5 5423 int i;
5424 int r;
5425 u_int will_dirty_i,will_dirty_next,temp_will_dirty;
5426 u_int wont_dirty_i,wont_dirty_next,temp_wont_dirty;
5427 if(iend==slen-1) {
5428 will_dirty_i=will_dirty_next=0;
5429 wont_dirty_i=wont_dirty_next=0;
5430 }else{
5431 will_dirty_i=will_dirty_next=will_dirty[iend+1];
5432 wont_dirty_i=wont_dirty_next=wont_dirty[iend+1];
5433 }
5434 for (i=iend;i>=istart;i--)
57871462 5435 {
71e490c5 5436 if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
57871462 5437 {
71e490c5 5438 if(ba[i]<start || ba[i]>=(start+slen*4))
57871462 5439 {
71e490c5 5440 // Branch out of this block, flush all regs
5441 if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000)
57871462 5442 {
5443 // Unconditional branch
5444 will_dirty_i=0;
5445 wont_dirty_i=0;
5446 // Merge in delay slot (will dirty)
5447 for(r=0;r<HOST_REGS;r++) {
5448 if(r!=EXCLUDE_REG) {
5449 if((branch_regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
5450 if((branch_regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
5451 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
5452 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
5453 if((branch_regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
5454 if(branch_regs[i].regmap[r]<=0) will_dirty_i&=~(1<<r);
5455 if(branch_regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
5456 if((regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
5457 if((regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
5458 if((regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
5459 if((regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
5460 if((regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
5461 if(regs[i].regmap[r]<=0) will_dirty_i&=~(1<<r);
5462 if(regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
5463 }
5464 }
5465 }
5466 else
5467 {
5468 // Conditional branch
5469 will_dirty_i=0;
5470 wont_dirty_i=wont_dirty_next;
5471 // Merge in delay slot (will dirty)
5472 for(r=0;r<HOST_REGS;r++) {
5473 if(r!=EXCLUDE_REG) {
5474 if(!likely[i]) {
5475 // Might not dirty if likely branch is not taken
5476 if((branch_regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
5477 if((branch_regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
5478 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
5479 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
5480 if((branch_regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
5481 if(branch_regs[i].regmap[r]==0) will_dirty_i&=~(1<<r);
5482 if(branch_regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
5483 //if((regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
5484 //if((regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
5485 if((regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
5486 if((regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
5487 if((regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
5488 if(regs[i].regmap[r]<=0) will_dirty_i&=~(1<<r);
5489 if(regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
5490 }
5491 }
5492 }
5493 }
5494 // Merge in delay slot (wont dirty)
5495 for(r=0;r<HOST_REGS;r++) {
5496 if(r!=EXCLUDE_REG) {
5497 if((regs[i].regmap[r]&63)==rt1[i]) wont_dirty_i|=1<<r;
5498 if((regs[i].regmap[r]&63)==rt2[i]) wont_dirty_i|=1<<r;
5499 if((regs[i].regmap[r]&63)==rt1[i+1]) wont_dirty_i|=1<<r;
5500 if((regs[i].regmap[r]&63)==rt2[i+1]) wont_dirty_i|=1<<r;
5501 if(regs[i].regmap[r]==CCREG) wont_dirty_i|=1<<r;
5502 if((branch_regs[i].regmap[r]&63)==rt1[i]) wont_dirty_i|=1<<r;
5503 if((branch_regs[i].regmap[r]&63)==rt2[i]) wont_dirty_i|=1<<r;
5504 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) wont_dirty_i|=1<<r;
5505 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) wont_dirty_i|=1<<r;
5506 if(branch_regs[i].regmap[r]==CCREG) wont_dirty_i|=1<<r;
5507 }
5508 }
5509 if(wr) {
5510 #ifndef DESTRUCTIVE_WRITEBACK
5511 branch_regs[i].dirty&=wont_dirty_i;
5512 #endif
5513 branch_regs[i].dirty|=will_dirty_i;
5514 }
5515 }
5516 else
5517 {
5518 // Internal branch
5519 if(ba[i]<=start+i*4) {
5520 // Backward branch
5521 if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000)
5522 {
5523 // Unconditional branch
5524 temp_will_dirty=0;
5525 temp_wont_dirty=0;
5526 // Merge in delay slot (will dirty)
5527 for(r=0;r<HOST_REGS;r++) {
5528 if(r!=EXCLUDE_REG) {
5529 if((branch_regs[i].regmap[r]&63)==rt1[i]) temp_will_dirty|=1<<r;
5530 if((branch_regs[i].regmap[r]&63)==rt2[i]) temp_will_dirty|=1<<r;
5531 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) temp_will_dirty|=1<<r;
5532 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) temp_will_dirty|=1<<r;
5533 if((branch_regs[i].regmap[r]&63)>33) temp_will_dirty&=~(1<<r);
5534 if(branch_regs[i].regmap[r]<=0) temp_will_dirty&=~(1<<r);
5535 if(branch_regs[i].regmap[r]==CCREG) temp_will_dirty|=1<<r;
5536 if((regs[i].regmap[r]&63)==rt1[i]) temp_will_dirty|=1<<r;
5537 if((regs[i].regmap[r]&63)==rt2[i]) temp_will_dirty|=1<<r;
5538 if((regs[i].regmap[r]&63)==rt1[i+1]) temp_will_dirty|=1<<r;
5539 if((regs[i].regmap[r]&63)==rt2[i+1]) temp_will_dirty|=1<<r;
5540 if((regs[i].regmap[r]&63)>33) temp_will_dirty&=~(1<<r);
5541 if(regs[i].regmap[r]<=0) temp_will_dirty&=~(1<<r);
5542 if(regs[i].regmap[r]==CCREG) temp_will_dirty|=1<<r;
5543 }
5544 }
5545 } else {
5546 // Conditional branch (not taken case)
5547 temp_will_dirty=will_dirty_next;
5548 temp_wont_dirty=wont_dirty_next;
5549 // Merge in delay slot (will dirty)
5550 for(r=0;r<HOST_REGS;r++) {
5551 if(r!=EXCLUDE_REG) {
5552 if(!likely[i]) {
5553 // Will not dirty if likely branch is not taken
5554 if((branch_regs[i].regmap[r]&63)==rt1[i]) temp_will_dirty|=1<<r;
5555 if((branch_regs[i].regmap[r]&63)==rt2[i]) temp_will_dirty|=1<<r;
5556 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) temp_will_dirty|=1<<r;
5557 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) temp_will_dirty|=1<<r;
5558 if((branch_regs[i].regmap[r]&63)>33) temp_will_dirty&=~(1<<r);
5559 if(branch_regs[i].regmap[r]==0) temp_will_dirty&=~(1<<r);
5560 if(branch_regs[i].regmap[r]==CCREG) temp_will_dirty|=1<<r;
5561 //if((regs[i].regmap[r]&63)==rt1[i]) temp_will_dirty|=1<<r;
5562 //if((regs[i].regmap[r]&63)==rt2[i]) temp_will_dirty|=1<<r;
5563 if((regs[i].regmap[r]&63)==rt1[i+1]) temp_will_dirty|=1<<r;
5564 if((regs[i].regmap[r]&63)==rt2[i+1]) temp_will_dirty|=1<<r;
5565 if((regs[i].regmap[r]&63)>33) temp_will_dirty&=~(1<<r);
5566 if(regs[i].regmap[r]<=0) temp_will_dirty&=~(1<<r);
5567 if(regs[i].regmap[r]==CCREG) temp_will_dirty|=1<<r;
5568 }
5569 }
5570 }
5571 }
5572 // Merge in delay slot (wont dirty)
5573 for(r=0;r<HOST_REGS;r++) {
5574 if(r!=EXCLUDE_REG) {
5575 if((regs[i].regmap[r]&63)==rt1[i]) temp_wont_dirty|=1<<r;
5576 if((regs[i].regmap[r]&63)==rt2[i]) temp_wont_dirty|=1<<r;
5577 if((regs[i].regmap[r]&63)==rt1[i+1]) temp_wont_dirty|=1<<r;
5578 if((regs[i].regmap[r]&63)==rt2[i+1]) temp_wont_dirty|=1<<r;
5579 if(regs[i].regmap[r]==CCREG) temp_wont_dirty|=1<<r;
5580 if((branch_regs[i].regmap[r]&63)==rt1[i]) temp_wont_dirty|=1<<r;
5581 if((branch_regs[i].regmap[r]&63)==rt2[i]) temp_wont_dirty|=1<<r;
5582 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) temp_wont_dirty|=1<<r;
5583 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) temp_wont_dirty|=1<<r;
5584 if(branch_regs[i].regmap[r]==CCREG) temp_wont_dirty|=1<<r;
5585 }
5586 }
5587 // Deal with changed mappings
5588 if(i<iend) {
5589 for(r=0;r<HOST_REGS;r++) {
5590 if(r!=EXCLUDE_REG) {
5591 if(regs[i].regmap[r]!=regmap_pre[i][r]) {
5592 temp_will_dirty&=~(1<<r);
5593 temp_wont_dirty&=~(1<<r);
5594 if((regmap_pre[i][r]&63)>0 && (regmap_pre[i][r]&63)<34) {
5595 temp_will_dirty|=((unneeded_reg[i]>>(regmap_pre[i][r]&63))&1)<<r;
5596 temp_wont_dirty|=((unneeded_reg[i]>>(regmap_pre[i][r]&63))&1)<<r;
5597 } else {
5598 temp_will_dirty|=1<<r;
5599 temp_wont_dirty|=1<<r;
5600 }
5601 }
5602 }
5603 }
5604 }
5605 if(wr) {
5606 will_dirty[i]=temp_will_dirty;
5607 wont_dirty[i]=temp_wont_dirty;
5608 clean_registers((ba[i]-start)>>2,i-1,0);
5609 }else{
5610 // Limit recursion. It can take an excessive amount
5611 // of time if there are a lot of nested loops.
5612 will_dirty[(ba[i]-start)>>2]=0;
5613 wont_dirty[(ba[i]-start)>>2]=-1;
5614 }
5615 }
5616 /*else*/ if(1)
5617 {
5618 if(itype[i]==RJUMP||itype[i]==UJUMP||(source[i]>>16)==0x1000)
5619 {
5620 // Unconditional branch
5621 will_dirty_i=0;
5622 wont_dirty_i=0;
5623 //if(ba[i]>start+i*4) { // Disable recursion (for debugging)
5624 for(r=0;r<HOST_REGS;r++) {
5625 if(r!=EXCLUDE_REG) {
5626 if(branch_regs[i].regmap[r]==regs[(ba[i]-start)>>2].regmap_entry[r]) {
5627 will_dirty_i|=will_dirty[(ba[i]-start)>>2]&(1<<r);
5628 wont_dirty_i|=wont_dirty[(ba[i]-start)>>2]&(1<<r);
5629 }
e3234ecf 5630 if(branch_regs[i].regmap[r]>=0) {
5631 will_dirty_i|=((unneeded_reg[(ba[i]-start)>>2]>>(branch_regs[i].regmap[r]&63))&1)<<r;
5632 wont_dirty_i|=((unneeded_reg[(ba[i]-start)>>2]>>(branch_regs[i].regmap[r]&63))&1)<<r;
5633 }
57871462 5634 }
5635 }
5636 //}
5637 // Merge in delay slot
5638 for(r=0;r<HOST_REGS;r++) {
5639 if(r!=EXCLUDE_REG) {
5640 if((branch_regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
5641 if((branch_regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
5642 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
5643 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
5644 if((branch_regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
5645 if(branch_regs[i].regmap[r]<=0) will_dirty_i&=~(1<<r);
5646 if(branch_regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
5647 if((regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
5648 if((regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
5649 if((regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
5650 if((regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
5651 if((regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
5652 if(regs[i].regmap[r]<=0) will_dirty_i&=~(1<<r);
5653 if(regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
5654 }
5655 }
5656 } else {
5657 // Conditional branch
5658 will_dirty_i=will_dirty_next;
5659 wont_dirty_i=wont_dirty_next;
5660 //if(ba[i]>start+i*4) { // Disable recursion (for debugging)
5661 for(r=0;r<HOST_REGS;r++) {
5662 if(r!=EXCLUDE_REG) {
e3234ecf 5663 signed char target_reg=branch_regs[i].regmap[r];
5664 if(target_reg==regs[(ba[i]-start)>>2].regmap_entry[r]) {
57871462 5665 will_dirty_i&=will_dirty[(ba[i]-start)>>2]&(1<<r);
5666 wont_dirty_i|=wont_dirty[(ba[i]-start)>>2]&(1<<r);
5667 }
e3234ecf 5668 else if(target_reg>=0) {
5669 will_dirty_i&=((unneeded_reg[(ba[i]-start)>>2]>>(target_reg&63))&1)<<r;
5670 wont_dirty_i|=((unneeded_reg[(ba[i]-start)>>2]>>(target_reg&63))&1)<<r;
57871462 5671 }
5672 // Treat delay slot as part of branch too
5673 /*if(regs[i+1].regmap[r]==regs[(ba[i]-start)>>2].regmap_entry[r]) {
5674 will_dirty[i+1]&=will_dirty[(ba[i]-start)>>2]&(1<<r);
5675 wont_dirty[i+1]|=wont_dirty[(ba[i]-start)>>2]&(1<<r);
5676 }
5677 else
5678 {
5679 will_dirty[i+1]&=~(1<<r);
5680 }*/
5681 }
5682 }
5683 //}
5684 // Merge in delay slot
5685 for(r=0;r<HOST_REGS;r++) {
5686 if(r!=EXCLUDE_REG) {
5687 if(!likely[i]) {
5688 // Might not dirty if likely branch is not taken
5689 if((branch_regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
5690 if((branch_regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
5691 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
5692 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
5693 if((branch_regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
5694 if(branch_regs[i].regmap[r]<=0) will_dirty_i&=~(1<<r);
5695 if(branch_regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
5696 //if((regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
5697 //if((regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
5698 if((regs[i].regmap[r]&63)==rt1[i+1]) will_dirty_i|=1<<r;
5699 if((regs[i].regmap[r]&63)==rt2[i+1]) will_dirty_i|=1<<r;
5700 if((regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
5701 if(regs[i].regmap[r]<=0) will_dirty_i&=~(1<<r);
5702 if(regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
5703 }
5704 }
5705 }
5706 }
e3234ecf 5707 // Merge in delay slot (won't dirty)
57871462 5708 for(r=0;r<HOST_REGS;r++) {
5709 if(r!=EXCLUDE_REG) {
5710 if((regs[i].regmap[r]&63)==rt1[i]) wont_dirty_i|=1<<r;
5711 if((regs[i].regmap[r]&63)==rt2[i]) wont_dirty_i|=1<<r;
5712 if((regs[i].regmap[r]&63)==rt1[i+1]) wont_dirty_i|=1<<r;
5713 if((regs[i].regmap[r]&63)==rt2[i+1]) wont_dirty_i|=1<<r;
5714 if(regs[i].regmap[r]==CCREG) wont_dirty_i|=1<<r;
5715 if((branch_regs[i].regmap[r]&63)==rt1[i]) wont_dirty_i|=1<<r;
5716 if((branch_regs[i].regmap[r]&63)==rt2[i]) wont_dirty_i|=1<<r;
5717 if((branch_regs[i].regmap[r]&63)==rt1[i+1]) wont_dirty_i|=1<<r;
5718 if((branch_regs[i].regmap[r]&63)==rt2[i+1]) wont_dirty_i|=1<<r;
5719 if(branch_regs[i].regmap[r]==CCREG) wont_dirty_i|=1<<r;
5720 }
5721 }
5722 if(wr) {
5723 #ifndef DESTRUCTIVE_WRITEBACK
5724 branch_regs[i].dirty&=wont_dirty_i;
5725 #endif
5726 branch_regs[i].dirty|=will_dirty_i;
5727 }
5728 }
5729 }
5730 }
1e973cb0 5731 else if(itype[i]==SYSCALL||itype[i]==HLECALL||itype[i]==INTCALL)
57871462 5732 {
5733 // SYSCALL instruction (software interrupt)
5734 will_dirty_i=0;
5735 wont_dirty_i=0;
5736 }
5737 else if(itype[i]==COP0 && (source[i]&0x3f)==0x18)
5738 {
5739 // ERET instruction (return from interrupt)
5740 will_dirty_i=0;
5741 wont_dirty_i=0;
5742 }
5743 will_dirty_next=will_dirty_i;
5744 wont_dirty_next=wont_dirty_i;
5745 for(r=0;r<HOST_REGS;r++) {
5746 if(r!=EXCLUDE_REG) {
5747 if((regs[i].regmap[r]&63)==rt1[i]) will_dirty_i|=1<<r;
5748 if((regs[i].regmap[r]&63)==rt2[i]) will_dirty_i|=1<<r;
5749 if((regs[i].regmap[r]&63)>33) will_dirty_i&=~(1<<r);
5750 if(regs[i].regmap[r]<=0) will_dirty_i&=~(1<<r);
5751 if(regs[i].regmap[r]==CCREG) will_dirty_i|=1<<r;
5752 if((regs[i].regmap[r]&63)==rt1[i]) wont_dirty_i|=1<<r;
5753 if((regs[i].regmap[r]&63)==rt2[i]) wont_dirty_i|=1<<r;
5754 if(regs[i].regmap[r]==CCREG) wont_dirty_i|=1<<r;
5755 if(i>istart) {
9f51b4b9 5756 if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=FJUMP)
57871462 5757 {
5758 // Don't store a register immediately after writing it,
5759 // may prevent dual-issue.
5760 if((regs[i].regmap[r]&63)==rt1[i-1]) wont_dirty_i|=1<<r;
5761 if((regs[i].regmap[r]&63)==rt2[i-1]) wont_dirty_i|=1<<r;
5762 }
5763 }
5764 }
5765 }
5766 // Save it
5767 will_dirty[i]=will_dirty_i;
5768 wont_dirty[i]=wont_dirty_i;
5769 // Mark registers that won't be dirtied as not dirty
5770 if(wr) {
5771 /*printf("wr (%d,%d) %x will:",istart,iend,start+i*4);
5772 for(r=0;r<HOST_REGS;r++) {
5773 if((will_dirty_i>>r)&1) {
5774 printf(" r%d",r);
5775 }
5776 }
5777 printf("\n");*/
5778
5779 //if(i==istart||(itype[i-1]!=RJUMP&&itype[i-1]!=UJUMP&&itype[i-1]!=CJUMP&&itype[i-1]!=SJUMP&&itype[i-1]!=FJUMP)) {
5780 regs[i].dirty|=will_dirty_i;
5781 #ifndef DESTRUCTIVE_WRITEBACK
5782 regs[i].dirty&=wont_dirty_i;
5783 if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
5784 {
5785 if(i<iend-1&&itype[i]!=RJUMP&&itype[i]!=UJUMP&&(source[i]>>16)!=0x1000) {
5786 for(r=0;r<HOST_REGS;r++) {
5787 if(r!=EXCLUDE_REG) {
5788 if(regs[i].regmap[r]==regmap_pre[i+2][r]) {
5789 regs[i+2].wasdirty&=wont_dirty_i|~(1<<r);
581335b0 5790 }else {/*printf("i: %x (%d) mismatch(+2): %d\n",start+i*4,i,r);assert(!((wont_dirty_i>>r)&1));*/}
57871462 5791 }
5792 }
5793 }
5794 }
5795 else
5796 {
5797 if(i<iend) {
5798 for(r=0;r<HOST_REGS;r++) {
5799 if(r!=EXCLUDE_REG) {
5800 if(regs[i].regmap[r]==regmap_pre[i+1][r]) {
5801 regs[i+1].wasdirty&=wont_dirty_i|~(1<<r);
581335b0 5802 }else {/*printf("i: %x (%d) mismatch(+1): %d\n",start+i*4,i,r);assert(!((wont_dirty_i>>r)&1));*/}
57871462 5803 }
5804 }
5805 }
5806 }
5807 #endif
5808 //}
5809 }
5810 // Deal with changed mappings
5811 temp_will_dirty=will_dirty_i;
5812 temp_wont_dirty=wont_dirty_i;
5813 for(r=0;r<HOST_REGS;r++) {
5814 if(r!=EXCLUDE_REG) {
5815 int nr;
5816 if(regs[i].regmap[r]==regmap_pre[i][r]) {
5817 if(wr) {
5818 #ifndef DESTRUCTIVE_WRITEBACK
5819 regs[i].wasdirty&=wont_dirty_i|~(1<<r);
5820 #endif
5821 regs[i].wasdirty|=will_dirty_i&(1<<r);
5822 }
5823 }
f776eb14 5824 else if(regmap_pre[i][r]>=0&&(nr=get_reg(regs[i].regmap,regmap_pre[i][r]))>=0) {
57871462 5825 // Register moved to a different register
5826 will_dirty_i&=~(1<<r);
5827 wont_dirty_i&=~(1<<r);
5828 will_dirty_i|=((temp_will_dirty>>nr)&1)<<r;
5829 wont_dirty_i|=((temp_wont_dirty>>nr)&1)<<r;
5830 if(wr) {
5831 #ifndef DESTRUCTIVE_WRITEBACK
5832 regs[i].wasdirty&=wont_dirty_i|~(1<<r);
5833 #endif
5834 regs[i].wasdirty|=will_dirty_i&(1<<r);
5835 }
5836 }
5837 else {
5838 will_dirty_i&=~(1<<r);
5839 wont_dirty_i&=~(1<<r);
5840 if((regmap_pre[i][r]&63)>0 && (regmap_pre[i][r]&63)<34) {
5841 will_dirty_i|=((unneeded_reg[i]>>(regmap_pre[i][r]&63))&1)<<r;
5842 wont_dirty_i|=((unneeded_reg[i]>>(regmap_pre[i][r]&63))&1)<<r;
5843 } else {
5844 wont_dirty_i|=1<<r;
581335b0 5845 /*printf("i: %x (%d) mismatch: %d\n",start+i*4,i,r);assert(!((will_dirty>>r)&1));*/
57871462 5846 }
5847 }
5848 }
5849 }
5850 }
5851}
5852
4600ba03 5853#ifdef DISASM
57871462 5854 /* disassembly */
5855void disassemble_inst(int i)
5856{
5857 if (bt[i]) printf("*"); else printf(" ");
5858 switch(itype[i]) {
5859 case UJUMP:
5860 printf (" %x: %s %8x\n",start+i*4,insn[i],ba[i]);break;
5861 case CJUMP:
5862 printf (" %x: %s r%d,r%d,%8x\n",start+i*4,insn[i],rs1[i],rs2[i],i?start+i*4+4+((signed int)((unsigned int)source[i]<<16)>>14):*ba);break;
5863 case SJUMP:
5864 printf (" %x: %s r%d,%8x\n",start+i*4,insn[i],rs1[i],start+i*4+4+((signed int)((unsigned int)source[i]<<16)>>14));break;
5865 case FJUMP:
5866 printf (" %x: %s %8x\n",start+i*4,insn[i],ba[i]);break;
5867 case RJUMP:
74426039 5868 if (opcode[i]==0x9&&rt1[i]!=31)
5067f341 5869 printf (" %x: %s r%d,r%d\n",start+i*4,insn[i],rt1[i],rs1[i]);
5870 else
5871 printf (" %x: %s r%d\n",start+i*4,insn[i],rs1[i]);
5872 break;
57871462 5873 case SPAN:
5874 printf (" %x: %s (pagespan) r%d,r%d,%8x\n",start+i*4,insn[i],rs1[i],rs2[i],ba[i]);break;
5875 case IMM16:
5876 if(opcode[i]==0xf) //LUI
5877 printf (" %x: %s r%d,%4x0000\n",start+i*4,insn[i],rt1[i],imm[i]&0xffff);
5878 else
5879 printf (" %x: %s r%d,r%d,%d\n",start+i*4,insn[i],rt1[i],rs1[i],imm[i]);
5880 break;
5881 case LOAD:
5882 case LOADLR:
5883 printf (" %x: %s r%d,r%d+%x\n",start+i*4,insn[i],rt1[i],rs1[i],imm[i]);
5884 break;
5885 case STORE:
5886 case STORELR:
5887 printf (" %x: %s r%d,r%d+%x\n",start+i*4,insn[i],rs2[i],rs1[i],imm[i]);
5888 break;
5889 case ALU:
5890 case SHIFT:
5891 printf (" %x: %s r%d,r%d,r%d\n",start+i*4,insn[i],rt1[i],rs1[i],rs2[i]);
5892 break;
5893 case MULTDIV:
5894 printf (" %x: %s r%d,r%d\n",start+i*4,insn[i],rs1[i],rs2[i]);
5895 break;
5896 case SHIFTIMM:
5897 printf (" %x: %s r%d,r%d,%d\n",start+i*4,insn[i],rt1[i],rs1[i],imm[i]);
5898 break;
5899 case MOV:
5900 if((opcode2[i]&0x1d)==0x10)
5901 printf (" %x: %s r%d\n",start+i*4,insn[i],rt1[i]);
5902 else if((opcode2[i]&0x1d)==0x11)
5903 printf (" %x: %s r%d\n",start+i*4,insn[i],rs1[i]);
5904 else
5905 printf (" %x: %s\n",start+i*4,insn[i]);
5906 break;
5907 case COP0:
5908 if(opcode2[i]==0)
5909 printf (" %x: %s r%d,cpr0[%d]\n",start+i*4,insn[i],rt1[i],(source[i]>>11)&0x1f); // MFC0
5910 else if(opcode2[i]==4)
5911 printf (" %x: %s r%d,cpr0[%d]\n",start+i*4,insn[i],rs1[i],(source[i]>>11)&0x1f); // MTC0
5912 else printf (" %x: %s\n",start+i*4,insn[i]);
5913 break;
5914 case COP1:
5915 if(opcode2[i]<3)
5916 printf (" %x: %s r%d,cpr1[%d]\n",start+i*4,insn[i],rt1[i],(source[i]>>11)&0x1f); // MFC1
5917 else if(opcode2[i]>3)
5918 printf (" %x: %s r%d,cpr1[%d]\n",start+i*4,insn[i],rs1[i],(source[i]>>11)&0x1f); // MTC1
5919 else printf (" %x: %s\n",start+i*4,insn[i]);
5920 break;
b9b61529 5921 case COP2:
5922 if(opcode2[i]<3)
5923 printf (" %x: %s r%d,cpr2[%d]\n",start+i*4,insn[i],rt1[i],(source[i]>>11)&0x1f); // MFC2
5924 else if(opcode2[i]>3)
5925 printf (" %x: %s r%d,cpr2[%d]\n",start+i*4,insn[i],rs1[i],(source[i]>>11)&0x1f); // MTC2
5926 else printf (" %x: %s\n",start+i*4,insn[i]);
5927 break;
57871462 5928 case C1LS:
5929 printf (" %x: %s cpr1[%d],r%d+%x\n",start+i*4,insn[i],(source[i]>>16)&0x1f,rs1[i],imm[i]);
5930 break;
b9b61529 5931 case C2LS:
5932 printf (" %x: %s cpr2[%d],r%d+%x\n",start+i*4,insn[i],(source[i]>>16)&0x1f,rs1[i],imm[i]);
5933 break;
1e973cb0 5934 case INTCALL:
5935 printf (" %x: %s (INTCALL)\n",start+i*4,insn[i]);
5936 break;
57871462 5937 default:
5938 //printf (" %s %8x\n",insn[i],source[i]);
5939 printf (" %x: %s\n",start+i*4,insn[i]);
5940 }
5941}
4600ba03 5942#else
5943static void disassemble_inst(int i) {}
5944#endif // DISASM
57871462 5945
d848b60a 5946#define DRC_TEST_VAL 0x74657374
5947
5948static int new_dynarec_test(void)
5949{
5950 int (*testfunc)(void) = (void *)out;
d148d265 5951 void *beginning;
d848b60a 5952 int ret;
d148d265 5953
5954 beginning = start_block();
d848b60a 5955 emit_movimm(DRC_TEST_VAL,0); // test
5956 emit_jmpreg(14);
5957 literal_pool(0);
d148d265 5958 end_block(beginning);
d848b60a 5959 SysPrintf("testing if we can run recompiled code..\n");
5960 ret = testfunc();
5961 if (ret == DRC_TEST_VAL)
5962 SysPrintf("test passed.\n");
5963 else
5964 SysPrintf("test failed: %08x\n", ret);
643aeae3 5965 out = translation_cache;
d848b60a 5966 return ret == DRC_TEST_VAL;
5967}
5968
dc990066 5969// clear the state completely, instead of just marking
5970// things invalid like invalidate_all_pages() does
5971void new_dynarec_clear_full()
57871462 5972{
57871462 5973 int n;
643aeae3 5974 out = translation_cache;
35775df7 5975 memset(invalid_code,1,sizeof(invalid_code));
5976 memset(hash_table,0xff,sizeof(hash_table));
57871462 5977 memset(mini_ht,-1,sizeof(mini_ht));
5978 memset(restore_candidate,0,sizeof(restore_candidate));
dc990066 5979 memset(shadow,0,sizeof(shadow));
57871462 5980 copy=shadow;
5981 expirep=16384; // Expiry pointer, +2 blocks
5982 pending_exception=0;
5983 literalcount=0;
57871462 5984 stop_after_jal=0;
9be4ba64 5985 inv_code_start=inv_code_end=~0;
57871462 5986 // TLB
dc990066 5987 for(n=0;n<4096;n++) ll_clear(jump_in+n);
5988 for(n=0;n<4096;n++) ll_clear(jump_out+n);
5989 for(n=0;n<4096;n++) ll_clear(jump_dirty+n);
5990}
5991
5992void new_dynarec_init()
5993{
d848b60a 5994 SysPrintf("Init new dynarec\n");
1e212a25 5995
5996 // allocate/prepare a buffer for translation cache
5997 // see assem_arm.h for some explanation
5998#if defined(BASE_ADDR_FIXED)
643aeae3 5999 if (mmap(translation_cache, 1 << TARGET_SIZE_2,
dc990066 6000 PROT_READ | PROT_WRITE | PROT_EXEC,
186935dc 6001 MAP_PRIVATE | MAP_ANONYMOUS,
1e212a25 6002 -1, 0) != translation_cache) {
6003 SysPrintf("mmap() failed: %s\n", strerror(errno));
6004 SysPrintf("disable BASE_ADDR_FIXED and recompile\n");
6005 abort();
6006 }
6007#elif defined(BASE_ADDR_DYNAMIC)
6008 #ifdef VITA
6009 sceBlock = sceKernelAllocMemBlockForVM("code", 1 << TARGET_SIZE_2);
6010 if (sceBlock < 0)
6011 SysPrintf("sceKernelAllocMemBlockForVM failed\n");
6012 int ret = sceKernelGetMemBlockBase(sceBlock, (void **)&translation_cache);
6013 if (ret < 0)
6014 SysPrintf("sceKernelGetMemBlockBase failed\n");
6015 #else
6016 translation_cache = mmap (NULL, 1 << TARGET_SIZE_2,
6017 PROT_READ | PROT_WRITE | PROT_EXEC,
6018 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
6019 if (translation_cache == MAP_FAILED) {
d848b60a 6020 SysPrintf("mmap() failed: %s\n", strerror(errno));
1e212a25 6021 abort();
d848b60a 6022 }
1e212a25 6023 #endif
6024#else
6025 #ifndef NO_WRITE_EXEC
bdeade46 6026 // not all systems allow execute in data segment by default
643aeae3 6027 if (mprotect(translation_cache, 1<<TARGET_SIZE_2, PROT_READ | PROT_WRITE | PROT_EXEC) != 0)
d848b60a 6028 SysPrintf("mprotect() failed: %s\n", strerror(errno));
1e212a25 6029 #endif
dc990066 6030#endif
643aeae3 6031 out = translation_cache;
2573466a 6032 cycle_multiplier=200;
dc990066 6033 new_dynarec_clear_full();
6034#ifdef HOST_IMM8
6035 // Copy this into local area so we don't have to put it in every literal pool
6036 invc_ptr=invalid_code;
6037#endif
57871462 6038 arch_init();
d848b60a 6039 new_dynarec_test();
a327ad27 6040#ifndef RAM_FIXED
01d26796 6041 ram_offset=(uintptr_t)rdram-0x80000000;
a327ad27 6042#endif
b105cf4f 6043 if (ram_offset!=0)
c43b5311 6044 SysPrintf("warning: RAM is not directly mapped, performance will suffer\n");
57871462 6045}
6046
6047void new_dynarec_cleanup()
6048{
6049 int n;
1e212a25 6050#if defined(BASE_ADDR_FIXED) || defined(BASE_ADDR_DYNAMIC)
6051 #ifdef VITA
6052 sceKernelFreeMemBlock(sceBlock);
6053 sceBlock = -1;
6054 #else
643aeae3 6055 if (munmap(translation_cache, 1<<TARGET_SIZE_2) < 0)
1e212a25 6056 SysPrintf("munmap() failed\n");
bdeade46 6057 #endif
1e212a25 6058#endif
57871462 6059 for(n=0;n<4096;n++) ll_clear(jump_in+n);
6060 for(n=0;n<4096;n++) ll_clear(jump_out+n);
6061 for(n=0;n<4096;n++) ll_clear(jump_dirty+n);
6062 #ifdef ROM_COPY
c43b5311 6063 if (munmap (ROM_COPY, 67108864) < 0) {SysPrintf("munmap() failed\n");}
57871462 6064 #endif
6065}
6066
03f55e6b 6067static u_int *get_source_start(u_int addr, u_int *limit)
57871462 6068{
03f55e6b 6069 if (addr < 0x00200000 ||
6070 (0xa0000000 <= addr && addr < 0xa0200000)) {
6071 // used for BIOS calls mostly?
6072 *limit = (addr&0xa0000000)|0x00200000;
01d26796 6073 return (u_int *)(rdram + (addr&0x1fffff));
03f55e6b 6074 }
6075 else if (!Config.HLE && (
6076 /* (0x9fc00000 <= addr && addr < 0x9fc80000) ||*/
6077 (0xbfc00000 <= addr && addr < 0xbfc80000))) {
6078 // BIOS
6079 *limit = (addr & 0xfff00000) | 0x80000;
01d26796 6080 return (u_int *)((u_char *)psxR + (addr&0x7ffff));
03f55e6b 6081 }
6082 else if (addr >= 0x80000000 && addr < 0x80000000+RAM_SIZE) {
6083 *limit = (addr & 0x80600000) + 0x00200000;
01d26796 6084 return (u_int *)(rdram + (addr&0x1fffff));
03f55e6b 6085 }
581335b0 6086 return NULL;
03f55e6b 6087}
6088
6089static u_int scan_for_ret(u_int addr)
6090{
6091 u_int limit = 0;
6092 u_int *mem;
6093
6094 mem = get_source_start(addr, &limit);
6095 if (mem == NULL)
6096 return addr;
6097
6098 if (limit > addr + 0x1000)
6099 limit = addr + 0x1000;
6100 for (; addr < limit; addr += 4, mem++) {
6101 if (*mem == 0x03e00008) // jr $ra
6102 return addr + 8;
57871462 6103 }
581335b0 6104 return addr;
03f55e6b 6105}
6106
6107struct savestate_block {
6108 uint32_t addr;
6109 uint32_t regflags;
6110};
6111
6112static int addr_cmp(const void *p1_, const void *p2_)
6113{
6114 const struct savestate_block *p1 = p1_, *p2 = p2_;
6115 return p1->addr - p2->addr;
6116}
6117
6118int new_dynarec_save_blocks(void *save, int size)
6119{
6120 struct savestate_block *blocks = save;
6121 int maxcount = size / sizeof(blocks[0]);
6122 struct savestate_block tmp_blocks[1024];
6123 struct ll_entry *head;
6124 int p, s, d, o, bcnt;
6125 u_int addr;
6126
6127 o = 0;
b14b6a8f 6128 for (p = 0; p < ARRAY_SIZE(jump_in); p++) {
03f55e6b 6129 bcnt = 0;
6130 for (head = jump_in[p]; head != NULL; head = head->next) {
6131 tmp_blocks[bcnt].addr = head->vaddr;
6132 tmp_blocks[bcnt].regflags = head->reg_sv_flags;
6133 bcnt++;
6134 }
6135 if (bcnt < 1)
6136 continue;
6137 qsort(tmp_blocks, bcnt, sizeof(tmp_blocks[0]), addr_cmp);
6138
6139 addr = tmp_blocks[0].addr;
6140 for (s = d = 0; s < bcnt; s++) {
6141 if (tmp_blocks[s].addr < addr)
6142 continue;
6143 if (d == 0 || tmp_blocks[d-1].addr != tmp_blocks[s].addr)
6144 tmp_blocks[d++] = tmp_blocks[s];
6145 addr = scan_for_ret(tmp_blocks[s].addr);
6146 }
6147
6148 if (o + d > maxcount)
6149 d = maxcount - o;
6150 memcpy(&blocks[o], tmp_blocks, d * sizeof(blocks[0]));
6151 o += d;
6152 }
6153
6154 return o * sizeof(blocks[0]);
6155}
6156
6157void new_dynarec_load_blocks(const void *save, int size)
6158{
6159 const struct savestate_block *blocks = save;
6160 int count = size / sizeof(blocks[0]);
6161 u_int regs_save[32];
6162 uint32_t f;
6163 int i, b;
6164
6165 get_addr(psxRegs.pc);
6166
6167 // change GPRs for speculation to at least partially work..
6168 memcpy(regs_save, &psxRegs.GPR, sizeof(regs_save));
6169 for (i = 1; i < 32; i++)
6170 psxRegs.GPR.r[i] = 0x80000000;
6171
6172 for (b = 0; b < count; b++) {
6173 for (f = blocks[b].regflags, i = 0; f; f >>= 1, i++) {
6174 if (f & 1)
6175 psxRegs.GPR.r[i] = 0x1f800000;
6176 }
6177
6178 get_addr(blocks[b].addr);
6179
6180 for (f = blocks[b].regflags, i = 0; f; f >>= 1, i++) {
6181 if (f & 1)
6182 psxRegs.GPR.r[i] = 0x80000000;
6183 }
6184 }
6185
6186 memcpy(&psxRegs.GPR, regs_save, sizeof(regs_save));
6187}
6188
6189int new_recompile_block(int addr)
6190{
6191 u_int pagelimit = 0;
6192 u_int state_rflags = 0;
6193 int i;
6194
1a4301c4 6195 assem_debug("NOTCOMPILED: addr = %x -> %p\n", addr, out);
57871462 6196 //printf("TRACE: count=%d next=%d (compile %x)\n",Count,next_interupt,addr);
9f51b4b9 6197 //if(debug)
57871462 6198 //printf("fpu mapping=%x enabled=%x\n",(Status & 0x04000000)>>26,(Status & 0x20000000)>>29);
03f55e6b 6199
6200 // this is just for speculation
6201 for (i = 1; i < 32; i++) {
6202 if ((psxRegs.GPR.r[i] & 0xffff0000) == 0x1f800000)
6203 state_rflags |= 1 << i;
6204 }
6205
57871462 6206 start = (u_int)addr&~3;
6207 //assert(((u_int)addr&1)==0);
2f546f9a 6208 new_dynarec_did_compile=1;
9ad4d757 6209 if (Config.HLE && start == 0x80001000) // hlecall
560e4a12 6210 {
7139f3c8 6211 // XXX: is this enough? Maybe check hleSoftCall?
d148d265 6212 void *beginning=start_block();
7139f3c8 6213 u_int page=get_page(start);
d148d265 6214
7139f3c8 6215 invalid_code[start>>12]=0;
6216 emit_movimm(start,0);
643aeae3 6217 emit_writeword(0,&pcaddr);
b14b6a8f 6218 emit_jmp(new_dyna_leave);
15776b68 6219 literal_pool(0);
d148d265 6220 end_block(beginning);
03f55e6b 6221 ll_add_flags(jump_in+page,start,state_rflags,(void *)beginning);
7139f3c8 6222 return 0;
6223 }
03f55e6b 6224
6225 source = get_source_start(start, &pagelimit);
6226 if (source == NULL) {
6227 SysPrintf("Compile at bogus memory address: %08x\n", addr);
57871462 6228 exit(1);
6229 }
6230
6231 /* Pass 1: disassemble */
6232 /* Pass 2: register dependencies, branch targets */
6233 /* Pass 3: register allocation */
6234 /* Pass 4: branch dependencies */
6235 /* Pass 5: pre-alloc */
6236 /* Pass 6: optimize clean/dirty state */
6237 /* Pass 7: flag 32-bit registers */
6238 /* Pass 8: assembly */
6239 /* Pass 9: linker */
6240 /* Pass 10: garbage collection / free memory */
6241
03f55e6b 6242 int j;
57871462 6243 int done=0;
6244 unsigned int type,op,op2;
6245
6246 //printf("addr = %x source = %x %x\n", addr,source,source[0]);
9f51b4b9 6247
57871462 6248 /* Pass 1 disassembly */
6249
6250 for(i=0;!done;i++) {
e1190b87 6251 bt[i]=0;likely[i]=0;ooo[i]=0;op2=0;
6252 minimum_free_regs[i]=0;
57871462 6253 opcode[i]=op=source[i]>>26;
6254 switch(op)
6255 {
6256 case 0x00: strcpy(insn[i],"special"); type=NI;
6257 op2=source[i]&0x3f;
6258 switch(op2)
6259 {
6260 case 0x00: strcpy(insn[i],"SLL"); type=SHIFTIMM; break;
6261 case 0x02: strcpy(insn[i],"SRL"); type=SHIFTIMM; break;
6262 case 0x03: strcpy(insn[i],"SRA"); type=SHIFTIMM; break;
6263 case 0x04: strcpy(insn[i],"SLLV"); type=SHIFT; break;
6264 case 0x06: strcpy(insn[i],"SRLV"); type=SHIFT; break;
6265 case 0x07: strcpy(insn[i],"SRAV"); type=SHIFT; break;
6266 case 0x08: strcpy(insn[i],"JR"); type=RJUMP; break;
6267 case 0x09: strcpy(insn[i],"JALR"); type=RJUMP; break;
6268 case 0x0C: strcpy(insn[i],"SYSCALL"); type=SYSCALL; break;
6269 case 0x0D: strcpy(insn[i],"BREAK"); type=OTHER; break;
6270 case 0x0F: strcpy(insn[i],"SYNC"); type=OTHER; break;
6271 case 0x10: strcpy(insn[i],"MFHI"); type=MOV; break;
6272 case 0x11: strcpy(insn[i],"MTHI"); type=MOV; break;
6273 case 0x12: strcpy(insn[i],"MFLO"); type=MOV; break;
6274 case 0x13: strcpy(insn[i],"MTLO"); type=MOV; break;
57871462 6275 case 0x18: strcpy(insn[i],"MULT"); type=MULTDIV; break;
6276 case 0x19: strcpy(insn[i],"MULTU"); type=MULTDIV; break;
6277 case 0x1A: strcpy(insn[i],"DIV"); type=MULTDIV; break;
6278 case 0x1B: strcpy(insn[i],"DIVU"); type=MULTDIV; break;
57871462 6279 case 0x20: strcpy(insn[i],"ADD"); type=ALU; break;
6280 case 0x21: strcpy(insn[i],"ADDU"); type=ALU; break;
6281 case 0x22: strcpy(insn[i],"SUB"); type=ALU; break;
6282 case 0x23: strcpy(insn[i],"SUBU"); type=ALU; break;
6283 case 0x24: strcpy(insn[i],"AND"); type=ALU; break;
6284 case 0x25: strcpy(insn[i],"OR"); type=ALU; break;
6285 case 0x26: strcpy(insn[i],"XOR"); type=ALU; break;
6286 case 0x27: strcpy(insn[i],"NOR"); type=ALU; break;
6287 case 0x2A: strcpy(insn[i],"SLT"); type=ALU; break;
6288 case 0x2B: strcpy(insn[i],"SLTU"); type=ALU; break;
57871462 6289 case 0x30: strcpy(insn[i],"TGE"); type=NI; break;
6290 case 0x31: strcpy(insn[i],"TGEU"); type=NI; break;
6291 case 0x32: strcpy(insn[i],"TLT"); type=NI; break;
6292 case 0x33: strcpy(insn[i],"TLTU"); type=NI; break;
6293 case 0x34: strcpy(insn[i],"TEQ"); type=NI; break;
6294 case 0x36: strcpy(insn[i],"TNE"); type=NI; break;
71e490c5 6295#if 0
7f2607ea 6296 case 0x14: strcpy(insn[i],"DSLLV"); type=SHIFT; break;
6297 case 0x16: strcpy(insn[i],"DSRLV"); type=SHIFT; break;
6298 case 0x17: strcpy(insn[i],"DSRAV"); type=SHIFT; break;
6299 case 0x1C: strcpy(insn[i],"DMULT"); type=MULTDIV; break;
6300 case 0x1D: strcpy(insn[i],"DMULTU"); type=MULTDIV; break;
6301 case 0x1E: strcpy(insn[i],"DDIV"); type=MULTDIV; break;
6302 case 0x1F: strcpy(insn[i],"DDIVU"); type=MULTDIV; break;
6303 case 0x2C: strcpy(insn[i],"DADD"); type=ALU; break;
6304 case 0x2D: strcpy(insn[i],"DADDU"); type=ALU; break;
6305 case 0x2E: strcpy(insn[i],"DSUB"); type=ALU; break;
6306 case 0x2F: strcpy(insn[i],"DSUBU"); type=ALU; break;
57871462 6307 case 0x38: strcpy(insn[i],"DSLL"); type=SHIFTIMM; break;
6308 case 0x3A: strcpy(insn[i],"DSRL"); type=SHIFTIMM; break;
6309 case 0x3B: strcpy(insn[i],"DSRA"); type=SHIFTIMM; break;
6310 case 0x3C: strcpy(insn[i],"DSLL32"); type=SHIFTIMM; break;
6311 case 0x3E: strcpy(insn[i],"DSRL32"); type=SHIFTIMM; break;
6312 case 0x3F: strcpy(insn[i],"DSRA32"); type=SHIFTIMM; break;
7f2607ea 6313#endif
57871462 6314 }
6315 break;
6316 case 0x01: strcpy(insn[i],"regimm"); type=NI;
6317 op2=(source[i]>>16)&0x1f;
6318 switch(op2)
6319 {
6320 case 0x00: strcpy(insn[i],"BLTZ"); type=SJUMP; break;
6321 case 0x01: strcpy(insn[i],"BGEZ"); type=SJUMP; break;
6322 case 0x02: strcpy(insn[i],"BLTZL"); type=SJUMP; break;
6323 case 0x03: strcpy(insn[i],"BGEZL"); type=SJUMP; break;
6324 case 0x08: strcpy(insn[i],"TGEI"); type=NI; break;
6325 case 0x09: strcpy(insn[i],"TGEIU"); type=NI; break;
6326 case 0x0A: strcpy(insn[i],"TLTI"); type=NI; break;
6327 case 0x0B: strcpy(insn[i],"TLTIU"); type=NI; break;
6328 case 0x0C: strcpy(insn[i],"TEQI"); type=NI; break;
6329 case 0x0E: strcpy(insn[i],"TNEI"); type=NI; break;
6330 case 0x10: strcpy(insn[i],"BLTZAL"); type=SJUMP; break;
6331 case 0x11: strcpy(insn[i],"BGEZAL"); type=SJUMP; break;
6332 case 0x12: strcpy(insn[i],"BLTZALL"); type=SJUMP; break;
6333 case 0x13: strcpy(insn[i],"BGEZALL"); type=SJUMP; break;
6334 }
6335 break;
6336 case 0x02: strcpy(insn[i],"J"); type=UJUMP; break;
6337 case 0x03: strcpy(insn[i],"JAL"); type=UJUMP; break;
6338 case 0x04: strcpy(insn[i],"BEQ"); type=CJUMP; break;
6339 case 0x05: strcpy(insn[i],"BNE"); type=CJUMP; break;
6340 case 0x06: strcpy(insn[i],"BLEZ"); type=CJUMP; break;
6341 case 0x07: strcpy(insn[i],"BGTZ"); type=CJUMP; break;
6342 case 0x08: strcpy(insn[i],"ADDI"); type=IMM16; break;
6343 case 0x09: strcpy(insn[i],"ADDIU"); type=IMM16; break;
6344 case 0x0A: strcpy(insn[i],"SLTI"); type=IMM16; break;
6345 case 0x0B: strcpy(insn[i],"SLTIU"); type=IMM16; break;
6346 case 0x0C: strcpy(insn[i],"ANDI"); type=IMM16; break;
6347 case 0x0D: strcpy(insn[i],"ORI"); type=IMM16; break;
6348 case 0x0E: strcpy(insn[i],"XORI"); type=IMM16; break;
6349 case 0x0F: strcpy(insn[i],"LUI"); type=IMM16; break;
6350 case 0x10: strcpy(insn[i],"cop0"); type=NI;
6351 op2=(source[i]>>21)&0x1f;
6352 switch(op2)
6353 {
6354 case 0x00: strcpy(insn[i],"MFC0"); type=COP0; break;
00fa9369 6355 case 0x02: strcpy(insn[i],"CFC0"); type=COP0; break;
57871462 6356 case 0x04: strcpy(insn[i],"MTC0"); type=COP0; break;
00fa9369 6357 case 0x06: strcpy(insn[i],"CTC0"); type=COP0; break;
6358 case 0x10: strcpy(insn[i],"RFE"); type=COP0; break;
57871462 6359 }
6360 break;
00fa9369 6361 case 0x11: strcpy(insn[i],"cop1"); type=COP1;
57871462 6362 op2=(source[i]>>21)&0x1f;
57871462 6363 break;
71e490c5 6364#if 0
57871462 6365 case 0x14: strcpy(insn[i],"BEQL"); type=CJUMP; break;
6366 case 0x15: strcpy(insn[i],"BNEL"); type=CJUMP; break;
6367 case 0x16: strcpy(insn[i],"BLEZL"); type=CJUMP; break;
6368 case 0x17: strcpy(insn[i],"BGTZL"); type=CJUMP; break;
6369 case 0x18: strcpy(insn[i],"DADDI"); type=IMM16; break;
6370 case 0x19: strcpy(insn[i],"DADDIU"); type=IMM16; break;
6371 case 0x1A: strcpy(insn[i],"LDL"); type=LOADLR; break;
6372 case 0x1B: strcpy(insn[i],"LDR"); type=LOADLR; break;
996cc15d 6373#endif
57871462 6374 case 0x20: strcpy(insn[i],"LB"); type=LOAD; break;
6375 case 0x21: strcpy(insn[i],"LH"); type=LOAD; break;
6376 case 0x22: strcpy(insn[i],"LWL"); type=LOADLR; break;
6377 case 0x23: strcpy(insn[i],"LW"); type=LOAD; break;
6378 case 0x24: strcpy(insn[i],"LBU"); type=LOAD; break;
6379 case 0x25: strcpy(insn[i],"LHU"); type=LOAD; break;
6380 case 0x26: strcpy(insn[i],"LWR"); type=LOADLR; break;
71e490c5 6381#if 0
57871462 6382 case 0x27: strcpy(insn[i],"LWU"); type=LOAD; break;
64bd6f82 6383#endif
57871462 6384 case 0x28: strcpy(insn[i],"SB"); type=STORE; break;
6385 case 0x29: strcpy(insn[i],"SH"); type=STORE; break;
6386 case 0x2A: strcpy(insn[i],"SWL"); type=STORELR; break;
6387 case 0x2B: strcpy(insn[i],"SW"); type=STORE; break;
71e490c5 6388#if 0
57871462 6389 case 0x2C: strcpy(insn[i],"SDL"); type=STORELR; break;
6390 case 0x2D: strcpy(insn[i],"SDR"); type=STORELR; break;
996cc15d 6391#endif
57871462 6392 case 0x2E: strcpy(insn[i],"SWR"); type=STORELR; break;
6393 case 0x2F: strcpy(insn[i],"CACHE"); type=NOP; break;
6394 case 0x30: strcpy(insn[i],"LL"); type=NI; break;
6395 case 0x31: strcpy(insn[i],"LWC1"); type=C1LS; break;
71e490c5 6396#if 0
57871462 6397 case 0x34: strcpy(insn[i],"LLD"); type=NI; break;
6398 case 0x35: strcpy(insn[i],"LDC1"); type=C1LS; break;
6399 case 0x37: strcpy(insn[i],"LD"); type=LOAD; break;
996cc15d 6400#endif
57871462 6401 case 0x38: strcpy(insn[i],"SC"); type=NI; break;
6402 case 0x39: strcpy(insn[i],"SWC1"); type=C1LS; break;
71e490c5 6403#if 0
57871462 6404 case 0x3C: strcpy(insn[i],"SCD"); type=NI; break;
6405 case 0x3D: strcpy(insn[i],"SDC1"); type=C1LS; break;
6406 case 0x3F: strcpy(insn[i],"SD"); type=STORE; break;
996cc15d 6407#endif
b9b61529 6408 case 0x12: strcpy(insn[i],"COP2"); type=NI;
6409 op2=(source[i]>>21)&0x1f;
bedfea38 6410 //if (op2 & 0x10) {
6411 if (source[i]&0x3f) { // use this hack to support old savestates with patched gte insns
c7abc864 6412 if (gte_handlers[source[i]&0x3f]!=NULL) {
bedfea38 6413 if (gte_regnames[source[i]&0x3f]!=NULL)
6414 strcpy(insn[i],gte_regnames[source[i]&0x3f]);
6415 else
6416 snprintf(insn[i], sizeof(insn[i]), "COP2 %x", source[i]&0x3f);
c7abc864 6417 type=C2OP;
6418 }
6419 }
6420 else switch(op2)
b9b61529 6421 {
6422 case 0x00: strcpy(insn[i],"MFC2"); type=COP2; break;
6423 case 0x02: strcpy(insn[i],"CFC2"); type=COP2; break;
6424 case 0x04: strcpy(insn[i],"MTC2"); type=COP2; break;
6425 case 0x06: strcpy(insn[i],"CTC2"); type=COP2; break;
b9b61529 6426 }
6427 break;
6428 case 0x32: strcpy(insn[i],"LWC2"); type=C2LS; break;
6429 case 0x3A: strcpy(insn[i],"SWC2"); type=C2LS; break;
6430 case 0x3B: strcpy(insn[i],"HLECALL"); type=HLECALL; break;
90ae6d4e 6431 default: strcpy(insn[i],"???"); type=NI;
c43b5311 6432 SysPrintf("NI %08x @%08x (%08x)\n", source[i], addr + i*4, addr);
90ae6d4e 6433 break;
57871462 6434 }
6435 itype[i]=type;
6436 opcode2[i]=op2;
6437 /* Get registers/immediates */
6438 lt1[i]=0;
6439 us1[i]=0;
6440 us2[i]=0;
6441 dep1[i]=0;
6442 dep2[i]=0;
bedfea38 6443 gte_rs[i]=gte_rt[i]=0;
57871462 6444 switch(type) {
6445 case LOAD:
6446 rs1[i]=(source[i]>>21)&0x1f;
6447 rs2[i]=0;
6448 rt1[i]=(source[i]>>16)&0x1f;
6449 rt2[i]=0;
6450 imm[i]=(short)source[i];
6451 break;
6452 case STORE:
6453 case STORELR:
6454 rs1[i]=(source[i]>>21)&0x1f;
6455 rs2[i]=(source[i]>>16)&0x1f;
6456 rt1[i]=0;
6457 rt2[i]=0;
6458 imm[i]=(short)source[i];
6459 if(op==0x2c||op==0x2d||op==0x3f) us1[i]=rs2[i]; // 64-bit SDL/SDR/SD
6460 break;
6461 case LOADLR:
6462 // LWL/LWR only load part of the register,
6463 // therefore the target register must be treated as a source too
6464 rs1[i]=(source[i]>>21)&0x1f;
6465 rs2[i]=(source[i]>>16)&0x1f;
6466 rt1[i]=(source[i]>>16)&0x1f;
6467 rt2[i]=0;
6468 imm[i]=(short)source[i];
6469 if(op==0x1a||op==0x1b) us1[i]=rs2[i]; // LDR/LDL
6470 if(op==0x26) dep1[i]=rt1[i]; // LWR
6471 break;
6472 case IMM16:
6473 if (op==0x0f) rs1[i]=0; // LUI instruction has no source register
6474 else rs1[i]=(source[i]>>21)&0x1f;
6475 rs2[i]=0;
6476 rt1[i]=(source[i]>>16)&0x1f;
6477 rt2[i]=0;
6478 if(op>=0x0c&&op<=0x0e) { // ANDI/ORI/XORI
6479 imm[i]=(unsigned short)source[i];
6480 }else{
6481 imm[i]=(short)source[i];
6482 }
6483 if(op==0x18||op==0x19) us1[i]=rs1[i]; // DADDI/DADDIU
6484 if(op==0x0a||op==0x0b) us1[i]=rs1[i]; // SLTI/SLTIU
6485 if(op==0x0d||op==0x0e) dep1[i]=rs1[i]; // ORI/XORI
6486 break;
6487 case UJUMP:
6488 rs1[i]=0;
6489 rs2[i]=0;
6490 rt1[i]=0;
6491 rt2[i]=0;
6492 // The JAL instruction writes to r31.
6493 if (op&1) {
6494 rt1[i]=31;
6495 }
6496 rs2[i]=CCREG;
6497 break;
6498 case RJUMP:
6499 rs1[i]=(source[i]>>21)&0x1f;
6500 rs2[i]=0;
6501 rt1[i]=0;
6502 rt2[i]=0;
5067f341 6503 // The JALR instruction writes to rd.
57871462 6504 if (op2&1) {
5067f341 6505 rt1[i]=(source[i]>>11)&0x1f;
57871462 6506 }
6507 rs2[i]=CCREG;
6508 break;
6509 case CJUMP:
6510 rs1[i]=(source[i]>>21)&0x1f;
6511 rs2[i]=(source[i]>>16)&0x1f;
6512 rt1[i]=0;
6513 rt2[i]=0;
6514 if(op&2) { // BGTZ/BLEZ
6515 rs2[i]=0;
6516 }
6517 us1[i]=rs1[i];
6518 us2[i]=rs2[i];
6519 likely[i]=op>>4;
6520 break;
6521 case SJUMP:
6522 rs1[i]=(source[i]>>21)&0x1f;
6523 rs2[i]=CCREG;
6524 rt1[i]=0;
6525 rt2[i]=0;
6526 us1[i]=rs1[i];
6527 if(op2&0x10) { // BxxAL
6528 rt1[i]=31;
6529 // NOTE: If the branch is not taken, r31 is still overwritten
6530 }
6531 likely[i]=(op2&2)>>1;
6532 break;
6533 case FJUMP:
6534 rs1[i]=FSREG;
6535 rs2[i]=CSREG;
6536 rt1[i]=0;
6537 rt2[i]=0;
6538 likely[i]=((source[i])>>17)&1;
6539 break;
6540 case ALU:
6541 rs1[i]=(source[i]>>21)&0x1f; // source
6542 rs2[i]=(source[i]>>16)&0x1f; // subtract amount
6543 rt1[i]=(source[i]>>11)&0x1f; // destination
6544 rt2[i]=0;
6545 if(op2==0x2a||op2==0x2b) { // SLT/SLTU
6546 us1[i]=rs1[i];us2[i]=rs2[i];
6547 }
6548 else if(op2>=0x24&&op2<=0x27) { // AND/OR/XOR/NOR
6549 dep1[i]=rs1[i];dep2[i]=rs2[i];
6550 }
6551 else if(op2>=0x2c&&op2<=0x2f) { // DADD/DSUB
6552 dep1[i]=rs1[i];dep2[i]=rs2[i];
6553 }
6554 break;
6555 case MULTDIV:
6556 rs1[i]=(source[i]>>21)&0x1f; // source
6557 rs2[i]=(source[i]>>16)&0x1f; // divisor
6558 rt1[i]=HIREG;
6559 rt2[i]=LOREG;
6560 if (op2>=0x1c&&op2<=0x1f) { // DMULT/DMULTU/DDIV/DDIVU
6561 us1[i]=rs1[i];us2[i]=rs2[i];
6562 }
6563 break;
6564 case MOV:
6565 rs1[i]=0;
6566 rs2[i]=0;
6567 rt1[i]=0;
6568 rt2[i]=0;
6569 if(op2==0x10) rs1[i]=HIREG; // MFHI
6570 if(op2==0x11) rt1[i]=HIREG; // MTHI
6571 if(op2==0x12) rs1[i]=LOREG; // MFLO
6572 if(op2==0x13) rt1[i]=LOREG; // MTLO
6573 if((op2&0x1d)==0x10) rt1[i]=(source[i]>>11)&0x1f; // MFxx
6574 if((op2&0x1d)==0x11) rs1[i]=(source[i]>>21)&0x1f; // MTxx
6575 dep1[i]=rs1[i];
6576 break;
6577 case SHIFT:
6578 rs1[i]=(source[i]>>16)&0x1f; // target of shift
6579 rs2[i]=(source[i]>>21)&0x1f; // shift amount
6580 rt1[i]=(source[i]>>11)&0x1f; // destination
6581 rt2[i]=0;
6582 // DSLLV/DSRLV/DSRAV are 64-bit
6583 if(op2>=0x14&&op2<=0x17) us1[i]=rs1[i];
6584 break;
6585 case SHIFTIMM:
6586 rs1[i]=(source[i]>>16)&0x1f;
6587 rs2[i]=0;
6588 rt1[i]=(source[i]>>11)&0x1f;
6589 rt2[i]=0;
6590 imm[i]=(source[i]>>6)&0x1f;
6591 // DSxx32 instructions
6592 if(op2>=0x3c) imm[i]|=0x20;
6593 // DSLL/DSRL/DSRA/DSRA32/DSRL32 but not DSLL32 require 64-bit source
6594 if(op2>=0x38&&op2!=0x3c) us1[i]=rs1[i];
6595 break;
6596 case COP0:
6597 rs1[i]=0;
6598 rs2[i]=0;
6599 rt1[i]=0;
6600 rt2[i]=0;
00fa9369 6601 if(op2==0||op2==2) rt1[i]=(source[i]>>16)&0x1F; // MFC0/CFC0
6602 if(op2==4||op2==6) rs1[i]=(source[i]>>16)&0x1F; // MTC0/CTC0
57871462 6603 if(op2==4&&((source[i]>>11)&0x1f)==12) rt2[i]=CSREG; // Status
6604 if(op2==16) if((source[i]&0x3f)==0x18) rs2[i]=CCREG; // ERET
6605 break;
6606 case COP1:
6607 rs1[i]=0;
6608 rs2[i]=0;
6609 rt1[i]=0;
6610 rt2[i]=0;
6611 if(op2<3) rt1[i]=(source[i]>>16)&0x1F; // MFC1/DMFC1/CFC1
6612 if(op2>3) rs1[i]=(source[i]>>16)&0x1F; // MTC1/DMTC1/CTC1
6613 if(op2==5) us1[i]=rs1[i]; // DMTC1
6614 rs2[i]=CSREG;
6615 break;
bedfea38 6616 case COP2:
6617 rs1[i]=0;
6618 rs2[i]=0;
6619 rt1[i]=0;
6620 rt2[i]=0;
6621 if(op2<3) rt1[i]=(source[i]>>16)&0x1F; // MFC2/CFC2
6622 if(op2>3) rs1[i]=(source[i]>>16)&0x1F; // MTC2/CTC2
6623 rs2[i]=CSREG;
6624 int gr=(source[i]>>11)&0x1F;
6625 switch(op2)
6626 {
6627 case 0x00: gte_rs[i]=1ll<<gr; break; // MFC2
6628 case 0x04: gte_rt[i]=1ll<<gr; break; // MTC2
0ff8c62c 6629 case 0x02: gte_rs[i]=1ll<<(gr+32); break; // CFC2
bedfea38 6630 case 0x06: gte_rt[i]=1ll<<(gr+32); break; // CTC2
6631 }
6632 break;
57871462 6633 case C1LS:
6634 rs1[i]=(source[i]>>21)&0x1F;
6635 rs2[i]=CSREG;
6636 rt1[i]=0;
6637 rt2[i]=0;
6638 imm[i]=(short)source[i];
6639 break;
b9b61529 6640 case C2LS:
6641 rs1[i]=(source[i]>>21)&0x1F;
6642 rs2[i]=0;
6643 rt1[i]=0;
6644 rt2[i]=0;
6645 imm[i]=(short)source[i];
bedfea38 6646 if(op==0x32) gte_rt[i]=1ll<<((source[i]>>16)&0x1F); // LWC2
6647 else gte_rs[i]=1ll<<((source[i]>>16)&0x1F); // SWC2
6648 break;
6649 case C2OP:
6650 rs1[i]=0;
6651 rs2[i]=0;
6652 rt1[i]=0;
6653 rt2[i]=0;
2167bef6 6654 gte_rs[i]=gte_reg_reads[source[i]&0x3f];
6655 gte_rt[i]=gte_reg_writes[source[i]&0x3f];
6656 gte_rt[i]|=1ll<<63; // every op changes flags
587a5b1c 6657 if((source[i]&0x3f)==GTE_MVMVA) {
6658 int v = (source[i] >> 15) & 3;
6659 gte_rs[i]&=~0xe3fll;
6660 if(v==3) gte_rs[i]|=0xe00ll;
6661 else gte_rs[i]|=3ll<<(v*2);
6662 }
b9b61529 6663 break;
57871462 6664 case SYSCALL:
7139f3c8 6665 case HLECALL:
1e973cb0 6666 case INTCALL:
57871462 6667 rs1[i]=CCREG;
6668 rs2[i]=0;
6669 rt1[i]=0;
6670 rt2[i]=0;
6671 break;
6672 default:
6673 rs1[i]=0;
6674 rs2[i]=0;
6675 rt1[i]=0;
6676 rt2[i]=0;
6677 }
6678 /* Calculate branch target addresses */
6679 if(type==UJUMP)
6680 ba[i]=((start+i*4+4)&0xF0000000)|(((unsigned int)source[i]<<6)>>4);
6681 else if(type==CJUMP&&rs1[i]==rs2[i]&&(op&1))
6682 ba[i]=start+i*4+8; // Ignore never taken branch
6683 else if(type==SJUMP&&rs1[i]==0&&!(op2&1))
6684 ba[i]=start+i*4+8; // Ignore never taken branch
6685 else if(type==CJUMP||type==SJUMP||type==FJUMP)
6686 ba[i]=start+i*4+4+((signed int)((unsigned int)source[i]<<16)>>14);
6687 else ba[i]=-1;
3e535354 6688 if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP)) {
6689 int do_in_intrp=0;
6690 // branch in delay slot?
6691 if(type==RJUMP||type==UJUMP||type==CJUMP||type==SJUMP||type==FJUMP) {
6692 // don't handle first branch and call interpreter if it's hit
c43b5311 6693 SysPrintf("branch in delay slot @%08x (%08x)\n", addr + i*4, addr);
3e535354 6694 do_in_intrp=1;
6695 }
6696 // basic load delay detection
6697 else if((type==LOAD||type==LOADLR||type==COP0||type==COP2||type==C2LS)&&rt1[i]!=0) {
6698 int t=(ba[i-1]-start)/4;
6699 if(0 <= t && t < i &&(rt1[i]==rs1[t]||rt1[i]==rs2[t])&&itype[t]!=CJUMP&&itype[t]!=SJUMP) {
6700 // jump target wants DS result - potential load delay effect
c43b5311 6701 SysPrintf("load delay @%08x (%08x)\n", addr + i*4, addr);
3e535354 6702 do_in_intrp=1;
6703 bt[t+1]=1; // expected return from interpreter
6704 }
6705 else if(i>=2&&rt1[i-2]==2&&rt1[i]==2&&rs1[i]!=2&&rs2[i]!=2&&rs1[i-1]!=2&&rs2[i-1]!=2&&
6706 !(i>=3&&(itype[i-3]==RJUMP||itype[i-3]==UJUMP||itype[i-3]==CJUMP||itype[i-3]==SJUMP))) {
6707 // v0 overwrite like this is a sign of trouble, bail out
c43b5311 6708 SysPrintf("v0 overwrite @%08x (%08x)\n", addr + i*4, addr);
3e535354 6709 do_in_intrp=1;
6710 }
6711 }
3e535354 6712 if(do_in_intrp) {
6713 rs1[i-1]=CCREG;
6714 rs2[i-1]=rt1[i-1]=rt2[i-1]=0;
26869094 6715 ba[i-1]=-1;
6716 itype[i-1]=INTCALL;
6717 done=2;
3e535354 6718 i--; // don't compile the DS
26869094 6719 }
3e535354 6720 }
3e535354 6721 /* Is this the end of the block? */
6722 if(i>0&&(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000)) {
5067f341 6723 if(rt1[i-1]==0) { // Continue past subroutine call (JAL)
1e973cb0 6724 done=2;
57871462 6725 }
6726 else {
6727 if(stop_after_jal) done=1;
6728 // Stop on BREAK
6729 if((source[i+1]&0xfc00003f)==0x0d) done=1;
6730 }
6731 // Don't recompile stuff that's already compiled
6732 if(check_addr(start+i*4+4)) done=1;
6733 // Don't get too close to the limit
6734 if(i>MAXBLOCK/2) done=1;
6735 }
75dec299 6736 if(itype[i]==SYSCALL&&stop_after_jal) done=1;
1e973cb0 6737 if(itype[i]==HLECALL||itype[i]==INTCALL) done=2;
6738 if(done==2) {
6739 // Does the block continue due to a branch?
6740 for(j=i-1;j>=0;j--)
6741 {
2a706964 6742 if(ba[j]==start+i*4) done=j=0; // Branch into delay slot
1e973cb0 6743 if(ba[j]==start+i*4+4) done=j=0;
6744 if(ba[j]==start+i*4+8) done=j=0;
6745 }
6746 }
75dec299 6747 //assert(i<MAXBLOCK-1);
57871462 6748 if(start+i*4==pagelimit-4) done=1;
6749 assert(start+i*4<pagelimit);
6750 if (i==MAXBLOCK-1) done=1;
6751 // Stop if we're compiling junk
6752 if(itype[i]==NI&&opcode[i]==0x11) {
6753 done=stop_after_jal=1;
c43b5311 6754 SysPrintf("Disabled speculative precompilation\n");
57871462 6755 }
6756 }
6757 slen=i;
6758 if(itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==RJUMP||itype[i-1]==FJUMP) {
6759 if(start+i*4==pagelimit) {
6760 itype[i-1]=SPAN;
6761 }
6762 }
6763 assert(slen>0);
6764
6765 /* Pass 2 - Register dependencies and branch targets */
6766
6767 unneeded_registers(0,slen-1,0);
9f51b4b9 6768
57871462 6769 /* Pass 3 - Register allocation */
6770
6771 struct regstat current; // Current register allocations/status
6772 current.is32=1;
6773 current.dirty=0;
6774 current.u=unneeded_reg[0];
57871462 6775 clear_all_regs(current.regmap);
6776 alloc_reg(&current,0,CCREG);
6777 dirty_reg(&current,CCREG);
6778 current.isconst=0;
6779 current.wasconst=0;
27727b63 6780 current.waswritten=0;
57871462 6781 int ds=0;
6782 int cc=0;
5194fb95 6783 int hr=-1;
6ebf4adf 6784
57871462 6785 if((u_int)addr&1) {
6786 // First instruction is delay slot
6787 cc=-1;
6788 bt[1]=1;
6789 ds=1;
6790 unneeded_reg[0]=1;
57871462 6791 current.regmap[HOST_BTREG]=BTREG;
6792 }
9f51b4b9 6793
57871462 6794 for(i=0;i<slen;i++)
6795 {
6796 if(bt[i])
6797 {
6798 int hr;
6799 for(hr=0;hr<HOST_REGS;hr++)
6800 {
6801 // Is this really necessary?
6802 if(current.regmap[hr]==0) current.regmap[hr]=-1;
6803 }
6804 current.isconst=0;
27727b63 6805 current.waswritten=0;
57871462 6806 }
6807 if(i>1)
6808 {
6809 if((opcode[i-2]&0x2f)==0x05) // BNE/BNEL
6810 {
6811 if(rs1[i-2]==0||rs2[i-2]==0)
6812 {
6813 if(rs1[i-2]) {
6814 current.is32|=1LL<<rs1[i-2];
6815 int hr=get_reg(current.regmap,rs1[i-2]|64);
6816 if(hr>=0) current.regmap[hr]=-1;
6817 }
6818 if(rs2[i-2]) {
6819 current.is32|=1LL<<rs2[i-2];
6820 int hr=get_reg(current.regmap,rs2[i-2]|64);
6821 if(hr>=0) current.regmap[hr]=-1;
6822 }
6823 }
6824 }
6825 }
24385cae 6826 current.is32=-1LL;
24385cae 6827
57871462 6828 memcpy(regmap_pre[i],current.regmap,sizeof(current.regmap));
6829 regs[i].wasconst=current.isconst;
6830 regs[i].was32=current.is32;
6831 regs[i].wasdirty=current.dirty;
8575a877 6832 regs[i].loadedconst=0;
57871462 6833 if(itype[i]!=UJUMP&&itype[i]!=CJUMP&&itype[i]!=SJUMP&&itype[i]!=RJUMP&&itype[i]!=FJUMP) {
6834 if(i+1<slen) {
6835 current.u=unneeded_reg[i+1]&~((1LL<<rs1[i])|(1LL<<rs2[i]));
57871462 6836 current.u|=1;
57871462 6837 } else {
6838 current.u=1;
57871462 6839 }
6840 } else {
6841 if(i+1<slen) {
6842 current.u=branch_unneeded_reg[i]&~((1LL<<rs1[i+1])|(1LL<<rs2[i+1]));
57871462 6843 current.u&=~((1LL<<rs1[i])|(1LL<<rs2[i]));
57871462 6844 current.u|=1;
c43b5311 6845 } else { SysPrintf("oops, branch at end of block with no delay slot\n");exit(1); }
57871462 6846 }
6847 is_ds[i]=ds;
6848 if(ds) {
6849 ds=0; // Skip delay slot, already allocated as part of branch
6850 // ...but we need to alloc it in case something jumps here
6851 if(i+1<slen) {
6852 current.u=branch_unneeded_reg[i-1]&unneeded_reg[i+1];
57871462 6853 }else{
6854 current.u=branch_unneeded_reg[i-1];
57871462 6855 }
6856 current.u&=~((1LL<<rs1[i])|(1LL<<rs2[i]));
57871462 6857 current.u|=1;
57871462 6858 struct regstat temp;
6859 memcpy(&temp,&current,sizeof(current));
6860 temp.wasdirty=temp.dirty;
6861 temp.was32=temp.is32;
6862 // TODO: Take into account unconditional branches, as below
6863 delayslot_alloc(&temp,i);
6864 memcpy(regs[i].regmap,temp.regmap,sizeof(temp.regmap));
6865 regs[i].wasdirty=temp.wasdirty;
6866 regs[i].was32=temp.was32;
6867 regs[i].dirty=temp.dirty;
6868 regs[i].is32=temp.is32;
6869 regs[i].isconst=0;
6870 regs[i].wasconst=0;
6871 current.isconst=0;
6872 // Create entry (branch target) regmap
6873 for(hr=0;hr<HOST_REGS;hr++)
6874 {
6875 int r=temp.regmap[hr];
6876 if(r>=0) {
6877 if(r!=regmap_pre[i][hr]) {
6878 regs[i].regmap_entry[hr]=-1;
6879 }
6880 else
6881 {
6882 if(r<64){
6883 if((current.u>>r)&1) {
6884 regs[i].regmap_entry[hr]=-1;
6885 regs[i].regmap[hr]=-1;
6886 //Don't clear regs in the delay slot as the branch might need them
6887 //current.regmap[hr]=-1;
6888 }else
6889 regs[i].regmap_entry[hr]=r;
6890 }
6891 else {
00fa9369 6892 assert(0);
57871462 6893 }
6894 }
6895 } else {
6896 // First instruction expects CCREG to be allocated
9f51b4b9 6897 if(i==0&&hr==HOST_CCREG)
57871462 6898 regs[i].regmap_entry[hr]=CCREG;
6899 else
6900 regs[i].regmap_entry[hr]=-1;
6901 }
6902 }
6903 }
6904 else { // Not delay slot
6905 switch(itype[i]) {
6906 case UJUMP:
6907 //current.isconst=0; // DEBUG
6908 //current.wasconst=0; // DEBUG
6909 //regs[i].wasconst=0; // DEBUG
6910 clear_const(&current,rt1[i]);
6911 alloc_cc(&current,i);
6912 dirty_reg(&current,CCREG);
6913 if (rt1[i]==31) {
6914 alloc_reg(&current,i,31);
6915 dirty_reg(&current,31);
4ef8f67d 6916 //assert(rs1[i+1]!=31&&rs2[i+1]!=31);
6917 //assert(rt1[i+1]!=rt1[i]);
57871462 6918 #ifdef REG_PREFETCH
6919 alloc_reg(&current,i,PTEMP);
6920 #endif
6921 //current.is32|=1LL<<rt1[i];
6922 }
269bb29a 6923 ooo[i]=1;
6924 delayslot_alloc(&current,i+1);
57871462 6925 //current.isconst=0; // DEBUG
6926 ds=1;
6927 //printf("i=%d, isconst=%x\n",i,current.isconst);
6928 break;
6929 case RJUMP:
6930 //current.isconst=0;
6931 //current.wasconst=0;
6932 //regs[i].wasconst=0;
6933 clear_const(&current,rs1[i]);
6934 clear_const(&current,rt1[i]);
6935 alloc_cc(&current,i);
6936 dirty_reg(&current,CCREG);
6937 if(rs1[i]!=rt1[i+1]&&rs1[i]!=rt2[i+1]) {
6938 alloc_reg(&current,i,rs1[i]);
5067f341 6939 if (rt1[i]!=0) {
6940 alloc_reg(&current,i,rt1[i]);
6941 dirty_reg(&current,rt1[i]);
68b3faee 6942 assert(rs1[i+1]!=rt1[i]&&rs2[i+1]!=rt1[i]);
076655d1 6943 assert(rt1[i+1]!=rt1[i]);
57871462 6944 #ifdef REG_PREFETCH
6945 alloc_reg(&current,i,PTEMP);
6946 #endif
6947 }
6948 #ifdef USE_MINI_HT
6949 if(rs1[i]==31) { // JALR
6950 alloc_reg(&current,i,RHASH);
57871462 6951 alloc_reg(&current,i,RHTBL);
57871462 6952 }
6953 #endif
6954 delayslot_alloc(&current,i+1);
6955 } else {
6956 // The delay slot overwrites our source register,
6957 // allocate a temporary register to hold the old value.
6958 current.isconst=0;
6959 current.wasconst=0;
6960 regs[i].wasconst=0;
6961 delayslot_alloc(&current,i+1);
6962 current.isconst=0;
6963 alloc_reg(&current,i,RTEMP);
6964 }
6965 //current.isconst=0; // DEBUG
e1190b87 6966 ooo[i]=1;
57871462 6967 ds=1;
6968 break;
6969 case CJUMP:
6970 //current.isconst=0;
6971 //current.wasconst=0;
6972 //regs[i].wasconst=0;
6973 clear_const(&current,rs1[i]);
6974 clear_const(&current,rs2[i]);
6975 if((opcode[i]&0x3E)==4) // BEQ/BNE
6976 {
6977 alloc_cc(&current,i);
6978 dirty_reg(&current,CCREG);
6979 if(rs1[i]) alloc_reg(&current,i,rs1[i]);
6980 if(rs2[i]) alloc_reg(&current,i,rs2[i]);
6981 if(!((current.is32>>rs1[i])&(current.is32>>rs2[i])&1))
6982 {
00fa9369 6983 assert(0);
57871462 6984 }
6985 if((rs1[i]&&(rs1[i]==rt1[i+1]||rs1[i]==rt2[i+1]))||
6986 (rs2[i]&&(rs2[i]==rt1[i+1]||rs2[i]==rt2[i+1]))) {
6987 // The delay slot overwrites one of our conditions.
6988 // Allocate the branch condition registers instead.
57871462 6989 current.isconst=0;
6990 current.wasconst=0;
6991 regs[i].wasconst=0;
6992 if(rs1[i]) alloc_reg(&current,i,rs1[i]);
6993 if(rs2[i]) alloc_reg(&current,i,rs2[i]);
6994 if(!((current.is32>>rs1[i])&(current.is32>>rs2[i])&1))
6995 {
00fa9369 6996 assert(0);
57871462 6997 }
6998 }
e1190b87 6999 else
7000 {
7001 ooo[i]=1;
7002 delayslot_alloc(&current,i+1);
7003 }
57871462 7004 }
7005 else
7006 if((opcode[i]&0x3E)==6) // BLEZ/BGTZ
7007 {
7008 alloc_cc(&current,i);
7009 dirty_reg(&current,CCREG);
7010 alloc_reg(&current,i,rs1[i]);
7011 if(!(current.is32>>rs1[i]&1))
7012 {
00fa9369 7013 assert(0);
57871462 7014 }
7015 if(rs1[i]&&(rs1[i]==rt1[i+1]||rs1[i]==rt2[i+1])) {
7016 // The delay slot overwrites one of our conditions.
7017 // Allocate the branch condition registers instead.
57871462 7018 current.isconst=0;
7019 current.wasconst=0;
7020 regs[i].wasconst=0;
7021 if(rs1[i]) alloc_reg(&current,i,rs1[i]);
7022 if(!((current.is32>>rs1[i])&1))
7023 {
00fa9369 7024 assert(0);
57871462 7025 }
7026 }
e1190b87 7027 else
7028 {
7029 ooo[i]=1;
7030 delayslot_alloc(&current,i+1);
7031 }
57871462 7032 }
7033 else
7034 // Don't alloc the delay slot yet because we might not execute it
7035 if((opcode[i]&0x3E)==0x14) // BEQL/BNEL
7036 {
7037 current.isconst=0;
7038 current.wasconst=0;
7039 regs[i].wasconst=0;
7040 alloc_cc(&current,i);
7041 dirty_reg(&current,CCREG);
7042 alloc_reg(&current,i,rs1[i]);
7043 alloc_reg(&current,i,rs2[i]);
7044 if(!((current.is32>>rs1[i])&(current.is32>>rs2[i])&1))
7045 {
00fa9369 7046 assert(0);
57871462 7047 }
7048 }
7049 else
7050 if((opcode[i]&0x3E)==0x16) // BLEZL/BGTZL
7051 {
7052 current.isconst=0;
7053 current.wasconst=0;
7054 regs[i].wasconst=0;
7055 alloc_cc(&current,i);
7056 dirty_reg(&current,CCREG);
7057 alloc_reg(&current,i,rs1[i]);
7058 if(!(current.is32>>rs1[i]&1))
7059 {
00fa9369 7060 assert(0);
57871462 7061 }
7062 }
7063 ds=1;
7064 //current.isconst=0;
7065 break;
7066 case SJUMP:
7067 //current.isconst=0;
7068 //current.wasconst=0;
7069 //regs[i].wasconst=0;
7070 clear_const(&current,rs1[i]);
7071 clear_const(&current,rt1[i]);
7072 //if((opcode2[i]&0x1E)==0x0) // BLTZ/BGEZ
7073 if((opcode2[i]&0x0E)==0x0) // BLTZ/BGEZ
7074 {
7075 alloc_cc(&current,i);
7076 dirty_reg(&current,CCREG);
7077 alloc_reg(&current,i,rs1[i]);
7078 if(!(current.is32>>rs1[i]&1))
7079 {
00fa9369 7080 assert(0);
57871462 7081 }
7082 if (rt1[i]==31) { // BLTZAL/BGEZAL
7083 alloc_reg(&current,i,31);
7084 dirty_reg(&current,31);
57871462 7085 //#ifdef REG_PREFETCH
7086 //alloc_reg(&current,i,PTEMP);
7087 //#endif
7088 //current.is32|=1LL<<rt1[i];
7089 }
e1190b87 7090 if((rs1[i]&&(rs1[i]==rt1[i+1]||rs1[i]==rt2[i+1])) // The delay slot overwrites the branch condition.
7091 ||(rt1[i]==31&&(rs1[i+1]==31||rs2[i+1]==31||rt1[i+1]==31||rt2[i+1]==31))) { // DS touches $ra
57871462 7092 // Allocate the branch condition registers instead.
57871462 7093 current.isconst=0;
7094 current.wasconst=0;
7095 regs[i].wasconst=0;
7096 if(rs1[i]) alloc_reg(&current,i,rs1[i]);
7097 if(!((current.is32>>rs1[i])&1))
7098 {
00fa9369 7099 assert(0);
57871462 7100 }
7101 }
e1190b87 7102 else
7103 {
7104 ooo[i]=1;
7105 delayslot_alloc(&current,i+1);
7106 }
57871462 7107 }
7108 else
7109 // Don't alloc the delay slot yet because we might not execute it
7110 if((opcode2[i]&0x1E)==0x2) // BLTZL/BGEZL
7111 {
7112 current.isconst=0;
7113 current.wasconst=0;
7114 regs[i].wasconst=0;
7115 alloc_cc(&current,i);
7116 dirty_reg(&current,CCREG);
7117 alloc_reg(&current,i,rs1[i]);
7118 if(!(current.is32>>rs1[i]&1))
7119 {
00fa9369 7120 assert(0);
57871462 7121 }
7122 }
7123 ds=1;
7124 //current.isconst=0;
7125 break;
7126 case FJUMP:
00fa9369 7127 assert(0);
57871462 7128 break;
7129 case IMM16:
7130 imm16_alloc(&current,i);
7131 break;
7132 case LOAD:
7133 case LOADLR:
7134 load_alloc(&current,i);
7135 break;
7136 case STORE:
7137 case STORELR:
7138 store_alloc(&current,i);
7139 break;
7140 case ALU:
7141 alu_alloc(&current,i);
7142 break;
7143 case SHIFT:
7144 shift_alloc(&current,i);
7145 break;
7146 case MULTDIV:
7147 multdiv_alloc(&current,i);
7148 break;
7149 case SHIFTIMM:
7150 shiftimm_alloc(&current,i);
7151 break;
7152 case MOV:
7153 mov_alloc(&current,i);
7154 break;
7155 case COP0:
7156 cop0_alloc(&current,i);
7157 break;
7158 case COP1:
b9b61529 7159 case COP2:
00fa9369 7160 cop12_alloc(&current,i);
57871462 7161 break;
7162 case C1LS:
7163 c1ls_alloc(&current,i);
7164 break;
b9b61529 7165 case C2LS:
7166 c2ls_alloc(&current,i);
7167 break;
7168 case C2OP:
7169 c2op_alloc(&current,i);
7170 break;
57871462 7171 case SYSCALL:
7139f3c8 7172 case HLECALL:
1e973cb0 7173 case INTCALL:
57871462 7174 syscall_alloc(&current,i);
7175 break;
7176 case SPAN:
7177 pagespan_alloc(&current,i);
7178 break;
7179 }
9f51b4b9 7180
57871462 7181 // Create entry (branch target) regmap
7182 for(hr=0;hr<HOST_REGS;hr++)
7183 {
581335b0 7184 int r,or;
57871462 7185 r=current.regmap[hr];
7186 if(r>=0) {
7187 if(r!=regmap_pre[i][hr]) {
7188 // TODO: delay slot (?)
7189 or=get_reg(regmap_pre[i],r); // Get old mapping for this register
7190 if(or<0||(r&63)>=TEMPREG){
7191 regs[i].regmap_entry[hr]=-1;
7192 }
7193 else
7194 {
7195 // Just move it to a different register
7196 regs[i].regmap_entry[hr]=r;
7197 // If it was dirty before, it's still dirty
7198 if((regs[i].wasdirty>>or)&1) dirty_reg(&current,r&63);
7199 }
7200 }
7201 else
7202 {
7203 // Unneeded
7204 if(r==0){
7205 regs[i].regmap_entry[hr]=0;
7206 }
7207 else
7208 if(r<64){
7209 if((current.u>>r)&1) {
7210 regs[i].regmap_entry[hr]=-1;
7211 //regs[i].regmap[hr]=-1;
7212 current.regmap[hr]=-1;
7213 }else
7214 regs[i].regmap_entry[hr]=r;
7215 }
7216 else {
00fa9369 7217 assert(0);
57871462 7218 }
7219 }
7220 } else {
7221 // Branches expect CCREG to be allocated at the target
9f51b4b9 7222 if(regmap_pre[i][hr]==CCREG)
57871462 7223 regs[i].regmap_entry[hr]=CCREG;
7224 else
7225 regs[i].regmap_entry[hr]=-1;
7226 }
7227 }
7228 memcpy(regs[i].regmap,current.regmap,sizeof(current.regmap));
7229 }
27727b63 7230
7231 if(i>0&&(itype[i-1]==STORE||itype[i-1]==STORELR||(itype[i-1]==C2LS&&opcode[i-1]==0x3a))&&(u_int)imm[i-1]<0x800)
7232 current.waswritten|=1<<rs1[i-1];
7233 current.waswritten&=~(1<<rt1[i]);
7234 current.waswritten&=~(1<<rt2[i]);
7235 if((itype[i]==STORE||itype[i]==STORELR||(itype[i]==C2LS&&opcode[i]==0x3a))&&(u_int)imm[i]>=0x800)
7236 current.waswritten&=~(1<<rs1[i]);
7237
57871462 7238 /* Branch post-alloc */
7239 if(i>0)
7240 {
7241 current.was32=current.is32;
7242 current.wasdirty=current.dirty;
7243 switch(itype[i-1]) {
7244 case UJUMP:
7245 memcpy(&branch_regs[i-1],&current,sizeof(current));
7246 branch_regs[i-1].isconst=0;
7247 branch_regs[i-1].wasconst=0;
7248 branch_regs[i-1].u=branch_unneeded_reg[i-1]&~((1LL<<rs1[i-1])|(1LL<<rs2[i-1]));
57871462 7249 alloc_cc(&branch_regs[i-1],i-1);
7250 dirty_reg(&branch_regs[i-1],CCREG);
7251 if(rt1[i-1]==31) { // JAL
7252 alloc_reg(&branch_regs[i-1],i-1,31);
7253 dirty_reg(&branch_regs[i-1],31);
7254 branch_regs[i-1].is32|=1LL<<31;
7255 }
7256 memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap));
956f3129 7257 memcpy(constmap[i],constmap[i-1],sizeof(current_constmap));
57871462 7258 break;
7259 case RJUMP:
7260 memcpy(&branch_regs[i-1],&current,sizeof(current));
7261 branch_regs[i-1].isconst=0;
7262 branch_regs[i-1].wasconst=0;
7263 branch_regs[i-1].u=branch_unneeded_reg[i-1]&~((1LL<<rs1[i-1])|(1LL<<rs2[i-1]));
57871462 7264 alloc_cc(&branch_regs[i-1],i-1);
7265 dirty_reg(&branch_regs[i-1],CCREG);
7266 alloc_reg(&branch_regs[i-1],i-1,rs1[i-1]);
5067f341 7267 if(rt1[i-1]!=0) { // JALR
7268 alloc_reg(&branch_regs[i-1],i-1,rt1[i-1]);
7269 dirty_reg(&branch_regs[i-1],rt1[i-1]);
7270 branch_regs[i-1].is32|=1LL<<rt1[i-1];
57871462 7271 }
7272 #ifdef USE_MINI_HT
7273 if(rs1[i-1]==31) { // JALR
7274 alloc_reg(&branch_regs[i-1],i-1,RHASH);
57871462 7275 alloc_reg(&branch_regs[i-1],i-1,RHTBL);
57871462 7276 }
7277 #endif
7278 memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap));
956f3129 7279 memcpy(constmap[i],constmap[i-1],sizeof(current_constmap));
57871462 7280 break;
7281 case CJUMP:
7282 if((opcode[i-1]&0x3E)==4) // BEQ/BNE
7283 {
7284 alloc_cc(&current,i-1);
7285 dirty_reg(&current,CCREG);
7286 if((rs1[i-1]&&(rs1[i-1]==rt1[i]||rs1[i-1]==rt2[i]))||
7287 (rs2[i-1]&&(rs2[i-1]==rt1[i]||rs2[i-1]==rt2[i]))) {
7288 // The delay slot overwrote one of our conditions
7289 // Delay slot goes after the test (in order)
7290 current.u=branch_unneeded_reg[i-1]&~((1LL<<rs1[i])|(1LL<<rs2[i]));
57871462 7291 current.u|=1;
57871462 7292 delayslot_alloc(&current,i);
7293 current.isconst=0;
7294 }
7295 else
7296 {
7297 current.u=branch_unneeded_reg[i-1]&~((1LL<<rs1[i-1])|(1LL<<rs2[i-1]));
57871462 7298 // Alloc the branch condition registers
7299 if(rs1[i-1]) alloc_reg(&current,i-1,rs1[i-1]);
7300 if(rs2[i-1]) alloc_reg(&current,i-1,rs2[i-1]);
7301 if(!((current.is32>>rs1[i-1])&(current.is32>>rs2[i-1])&1))
7302 {
00fa9369 7303 assert(0);
57871462 7304 }
7305 }
7306 memcpy(&branch_regs[i-1],&current,sizeof(current));
7307 branch_regs[i-1].isconst=0;
7308 branch_regs[i-1].wasconst=0;
7309 memcpy(&branch_regs[i-1].regmap_entry,&current.regmap,sizeof(current.regmap));
956f3129 7310 memcpy(constmap[i],constmap[i-1],sizeof(current_constmap));
57871462 7311 }
7312 else
7313 if((opcode[i-1]&0x3E)==6) // BLEZ/BGTZ
7314 {
7315 alloc_cc(&current,i-1);
7316 dirty_reg(&current,CCREG);
7317 if(rs1[i-1]==rt1[i]||rs1[i-1]==rt2[i]) {
7318 // The delay slot overwrote the branch condition
7319 // Delay slot goes after the test (in order)
7320 current.u=branch_unneeded_reg[i-1]&~((1LL<<rs1[i])|(1LL<<rs2[i]));
57871462 7321 current.u|=1;
57871462 7322 delayslot_alloc(&current,i);
7323 current.isconst=0;
7324 }
7325 else
7326 {
7327 current.u=branch_unneeded_reg[i-1]&~(1LL<<rs1[i-1]);
57871462 7328 // Alloc the branch condition register
7329 alloc_reg(&current,i-1,rs1[i-1]);
7330 if(!(current.is32>>rs1[i-1]&1))
7331 {
00fa9369 7332 assert(0);
57871462 7333 }
7334 }
7335 memcpy(&branch_regs[i-1],&current,sizeof(current));
7336 branch_regs[i-1].isconst=0;
7337 branch_regs[i-1].wasconst=0;
7338 memcpy(&branch_regs[i-1].regmap_entry,&current.regmap,sizeof(current.regmap));
956f3129 7339 memcpy(constmap[i],constmap[i-1],sizeof(current_constmap));
57871462 7340 }
7341 else
7342 // Alloc the delay slot in case the branch is taken
7343 if((opcode[i-1]&0x3E)==0x14) // BEQL/BNEL
7344 {
7345 memcpy(&branch_regs[i-1],&current,sizeof(current));
7346 branch_regs[i-1].u=(branch_unneeded_reg[i-1]&~((1LL<<rs1[i])|(1LL<<rs2[i])|(1LL<<rt1[i])|(1LL<<rt2[i])))|1;
57871462 7347 alloc_cc(&branch_regs[i-1],i);
7348 dirty_reg(&branch_regs[i-1],CCREG);
7349 delayslot_alloc(&branch_regs[i-1],i);
7350 branch_regs[i-1].isconst=0;
7351 alloc_reg(&current,i,CCREG); // Not taken path
7352 dirty_reg(&current,CCREG);
7353 memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap));
7354 }
7355 else
7356 if((opcode[i-1]&0x3E)==0x16) // BLEZL/BGTZL
7357 {
7358 memcpy(&branch_regs[i-1],&current,sizeof(current));
7359 branch_regs[i-1].u=(branch_unneeded_reg[i-1]&~((1LL<<rs1[i])|(1LL<<rs2[i])|(1LL<<rt1[i])|(1LL<<rt2[i])))|1;
57871462 7360 alloc_cc(&branch_regs[i-1],i);
7361 dirty_reg(&branch_regs[i-1],CCREG);
7362 delayslot_alloc(&branch_regs[i-1],i);
7363 branch_regs[i-1].isconst=0;
7364 alloc_reg(&current,i,CCREG); // Not taken path
7365 dirty_reg(&current,CCREG);
7366 memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap));
7367 }
7368 break;
7369 case SJUMP:
7370 //if((opcode2[i-1]&0x1E)==0) // BLTZ/BGEZ
7371 if((opcode2[i-1]&0x0E)==0) // BLTZ/BGEZ
7372 {
7373 alloc_cc(&current,i-1);
7374 dirty_reg(&current,CCREG);
7375 if(rs1[i-1]==rt1[i]||rs1[i-1]==rt2[i]) {
7376 // The delay slot overwrote the branch condition
7377 // Delay slot goes after the test (in order)
7378 current.u=branch_unneeded_reg[i-1]&~((1LL<<rs1[i])|(1LL<<rs2[i]));
57871462 7379 current.u|=1;
57871462 7380 delayslot_alloc(&current,i);
7381 current.isconst=0;
7382 }
7383 else
7384 {
7385 current.u=branch_unneeded_reg[i-1]&~(1LL<<rs1[i-1]);
57871462 7386 // Alloc the branch condition register
7387 alloc_reg(&current,i-1,rs1[i-1]);
7388 if(!(current.is32>>rs1[i-1]&1))
7389 {
00fa9369 7390 assert(0);
57871462 7391 }
7392 }
7393 memcpy(&branch_regs[i-1],&current,sizeof(current));
7394 branch_regs[i-1].isconst=0;
7395 branch_regs[i-1].wasconst=0;
7396 memcpy(&branch_regs[i-1].regmap_entry,&current.regmap,sizeof(current.regmap));
956f3129 7397 memcpy(constmap[i],constmap[i-1],sizeof(current_constmap));
57871462 7398 }
7399 else
7400 // Alloc the delay slot in case the branch is taken
7401 if((opcode2[i-1]&0x1E)==2) // BLTZL/BGEZL
7402 {
7403 memcpy(&branch_regs[i-1],&current,sizeof(current));
7404 branch_regs[i-1].u=(branch_unneeded_reg[i-1]&~((1LL<<rs1[i])|(1LL<<rs2[i])|(1LL<<rt1[i])|(1LL<<rt2[i])))|1;
57871462 7405 alloc_cc(&branch_regs[i-1],i);
7406 dirty_reg(&branch_regs[i-1],CCREG);
7407 delayslot_alloc(&branch_regs[i-1],i);
7408 branch_regs[i-1].isconst=0;
7409 alloc_reg(&current,i,CCREG); // Not taken path
7410 dirty_reg(&current,CCREG);
7411 memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap));
7412 }
7413 // FIXME: BLTZAL/BGEZAL
7414 if(opcode2[i-1]&0x10) { // BxxZAL
7415 alloc_reg(&branch_regs[i-1],i-1,31);
7416 dirty_reg(&branch_regs[i-1],31);
7417 branch_regs[i-1].is32|=1LL<<31;
7418 }
7419 break;
7420 case FJUMP:
00fa9369 7421 assert(0);
57871462 7422 break;
7423 }
7424
7425 if(itype[i-1]==UJUMP||itype[i-1]==RJUMP||(source[i-1]>>16)==0x1000)
7426 {
7427 if(rt1[i-1]==31) // JAL/JALR
7428 {
7429 // Subroutine call will return here, don't alloc any registers
7430 current.is32=1;
7431 current.dirty=0;
7432 clear_all_regs(current.regmap);
7433 alloc_reg(&current,i,CCREG);
7434 dirty_reg(&current,CCREG);
7435 }
7436 else if(i+1<slen)
7437 {
7438 // Internal branch will jump here, match registers to caller
7439 current.is32=0x3FFFFFFFFLL;
7440 current.dirty=0;
7441 clear_all_regs(current.regmap);
7442 alloc_reg(&current,i,CCREG);
7443 dirty_reg(&current,CCREG);
7444 for(j=i-1;j>=0;j--)
7445 {
7446 if(ba[j]==start+i*4+4) {
7447 memcpy(current.regmap,branch_regs[j].regmap,sizeof(current.regmap));
7448 current.is32=branch_regs[j].is32;
7449 current.dirty=branch_regs[j].dirty;
7450 break;
7451 }
7452 }
7453 while(j>=0) {
7454 if(ba[j]==start+i*4+4) {
7455 for(hr=0;hr<HOST_REGS;hr++) {
7456 if(current.regmap[hr]!=branch_regs[j].regmap[hr]) {
7457 current.regmap[hr]=-1;
7458 }
7459 current.is32&=branch_regs[j].is32;
7460 current.dirty&=branch_regs[j].dirty;
7461 }
7462 }
7463 j--;
7464 }
7465 }
7466 }
7467 }
7468
7469 // Count cycles in between branches
7470 ccadj[i]=cc;
7139f3c8 7471 if(i>0&&(itype[i-1]==RJUMP||itype[i-1]==UJUMP||itype[i-1]==CJUMP||itype[i-1]==SJUMP||itype[i-1]==FJUMP||itype[i]==SYSCALL||itype[i]==HLECALL))
57871462 7472 {
7473 cc=0;
7474 }
71e490c5 7475#if !defined(DRC_DBG)
054175e9 7476 else if(itype[i]==C2OP&&gte_cycletab[source[i]&0x3f]>2)
7477 {
7478 // GTE runs in parallel until accessed, divide by 2 for a rough guess
7479 cc+=gte_cycletab[source[i]&0x3f]/2;
7480 }
b6e87b2b 7481 else if(/*itype[i]==LOAD||itype[i]==STORE||*/itype[i]==C1LS) // load,store causes weird timing issues
fb407447 7482 {
7483 cc+=2; // 2 cycle penalty (after CLOCK_DIVIDER)
7484 }
5fdcbb5a 7485 else if(i>1&&itype[i]==STORE&&itype[i-1]==STORE&&itype[i-2]==STORE&&!bt[i])
7486 {
7487 cc+=4;
7488 }
fb407447 7489 else if(itype[i]==C2LS)
7490 {
7491 cc+=4;
7492 }
7493#endif
57871462 7494 else
7495 {
7496 cc++;
7497 }
7498
7499 flush_dirty_uppers(&current);
7500 if(!is_ds[i]) {
7501 regs[i].is32=current.is32;
7502 regs[i].dirty=current.dirty;
7503 regs[i].isconst=current.isconst;
956f3129 7504 memcpy(constmap[i],current_constmap,sizeof(current_constmap));
57871462 7505 }
7506 for(hr=0;hr<HOST_REGS;hr++) {
7507 if(hr!=EXCLUDE_REG&&regs[i].regmap[hr]>=0) {
7508 if(regmap_pre[i][hr]!=regs[i].regmap[hr]) {
7509 regs[i].wasconst&=~(1<<hr);
7510 }
7511 }
7512 }
7513 if(current.regmap[HOST_BTREG]==BTREG) current.regmap[HOST_BTREG]=-1;
27727b63 7514 regs[i].waswritten=current.waswritten;
57871462 7515 }
9f51b4b9 7516
57871462 7517 /* Pass 4 - Cull unused host registers */
9f51b4b9 7518
57871462 7519 uint64_t nr=0;
9f51b4b9 7520
57871462 7521 for (i=slen-1;i>=0;i--)
7522 {
7523 int hr;
7524 if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
7525 {
7526 if(ba[i]<start || ba[i]>=(start+slen*4))
7527 {
7528 // Branch out of this block, don't need anything
7529 nr=0;
7530 }
7531 else
7532 {
7533 // Internal branch
7534 // Need whatever matches the target
7535 nr=0;
7536 int t=(ba[i]-start)>>2;
7537 for(hr=0;hr<HOST_REGS;hr++)
7538 {
7539 if(regs[i].regmap_entry[hr]>=0) {
7540 if(regs[i].regmap_entry[hr]==regs[t].regmap_entry[hr]) nr|=1<<hr;
7541 }
7542 }
7543 }
7544 // Conditional branch may need registers for following instructions
7545 if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&(source[i]>>16)!=0x1000)
7546 {
7547 if(i<slen-2) {
7548 nr|=needed_reg[i+2];
7549 for(hr=0;hr<HOST_REGS;hr++)
7550 {
7551 if(regmap_pre[i+2][hr]>=0&&get_reg(regs[i+2].regmap_entry,regmap_pre[i+2][hr])<0) nr&=~(1<<hr);
7552 //if((regmap_entry[i+2][hr])>=0) if(!((nr>>hr)&1)) printf("%x-bogus(%d=%d)\n",start+i*4,hr,regmap_entry[i+2][hr]);
7553 }
7554 }
7555 }
7556 // Don't need stuff which is overwritten
f5955059 7557 //if(regs[i].regmap[hr]!=regmap_pre[i][hr]) nr&=~(1<<hr);
7558 //if(regs[i].regmap[hr]<0) nr&=~(1<<hr);
57871462 7559 // Merge in delay slot
7560 for(hr=0;hr<HOST_REGS;hr++)
7561 {
7562 if(!likely[i]) {
7563 // These are overwritten unless the branch is "likely"
7564 // and the delay slot is nullified if not taken
7565 if(rt1[i+1]&&rt1[i+1]==(regs[i].regmap[hr]&63)) nr&=~(1<<hr);
7566 if(rt2[i+1]&&rt2[i+1]==(regs[i].regmap[hr]&63)) nr&=~(1<<hr);
7567 }
7568 if(us1[i+1]==(regmap_pre[i][hr]&63)) nr|=1<<hr;
7569 if(us2[i+1]==(regmap_pre[i][hr]&63)) nr|=1<<hr;
7570 if(rs1[i+1]==regmap_pre[i][hr]) nr|=1<<hr;
7571 if(rs2[i+1]==regmap_pre[i][hr]) nr|=1<<hr;
7572 if(us1[i+1]==(regs[i].regmap_entry[hr]&63)) nr|=1<<hr;
7573 if(us2[i+1]==(regs[i].regmap_entry[hr]&63)) nr|=1<<hr;
7574 if(rs1[i+1]==regs[i].regmap_entry[hr]) nr|=1<<hr;
7575 if(rs2[i+1]==regs[i].regmap_entry[hr]) nr|=1<<hr;
b9b61529 7576 if(itype[i+1]==STORE || itype[i+1]==STORELR || (opcode[i+1]&0x3b)==0x39 || (opcode[i+1]&0x3b)==0x3a) {
57871462 7577 if(regmap_pre[i][hr]==INVCP) nr|=1<<hr;
7578 if(regs[i].regmap_entry[hr]==INVCP) nr|=1<<hr;
7579 }
7580 }
7581 }
1e973cb0 7582 else if(itype[i]==SYSCALL||itype[i]==HLECALL||itype[i]==INTCALL)
57871462 7583 {
7584 // SYSCALL instruction (software interrupt)
7585 nr=0;
7586 }
7587 else if(itype[i]==COP0 && (source[i]&0x3f)==0x18)
7588 {
7589 // ERET instruction (return from interrupt)
7590 nr=0;
7591 }
7592 else // Non-branch
7593 {
7594 if(i<slen-1) {
7595 for(hr=0;hr<HOST_REGS;hr++) {
7596 if(regmap_pre[i+1][hr]>=0&&get_reg(regs[i+1].regmap_entry,regmap_pre[i+1][hr])<0) nr&=~(1<<hr);
7597 if(regs[i].regmap[hr]!=regmap_pre[i+1][hr]) nr&=~(1<<hr);
7598 if(regs[i].regmap[hr]!=regmap_pre[i][hr]) nr&=~(1<<hr);
7599 if(regs[i].regmap[hr]<0) nr&=~(1<<hr);
7600 }
7601 }
7602 }
7603 for(hr=0;hr<HOST_REGS;hr++)
7604 {
7605 // Overwritten registers are not needed
7606 if(rt1[i]&&rt1[i]==(regs[i].regmap[hr]&63)) nr&=~(1<<hr);
7607 if(rt2[i]&&rt2[i]==(regs[i].regmap[hr]&63)) nr&=~(1<<hr);
7608 if(FTEMP==(regs[i].regmap[hr]&63)) nr&=~(1<<hr);
7609 // Source registers are needed
7610 if(us1[i]==(regmap_pre[i][hr]&63)) nr|=1<<hr;
7611 if(us2[i]==(regmap_pre[i][hr]&63)) nr|=1<<hr;
7612 if(rs1[i]==regmap_pre[i][hr]) nr|=1<<hr;
7613 if(rs2[i]==regmap_pre[i][hr]) nr|=1<<hr;
7614 if(us1[i]==(regs[i].regmap_entry[hr]&63)) nr|=1<<hr;
7615 if(us2[i]==(regs[i].regmap_entry[hr]&63)) nr|=1<<hr;
7616 if(rs1[i]==regs[i].regmap_entry[hr]) nr|=1<<hr;
7617 if(rs2[i]==regs[i].regmap_entry[hr]) nr|=1<<hr;
b9b61529 7618 if(itype[i]==STORE || itype[i]==STORELR || (opcode[i]&0x3b)==0x39 || (opcode[i]&0x3b)==0x3a) {
57871462 7619 if(regmap_pre[i][hr]==INVCP) nr|=1<<hr;
7620 if(regs[i].regmap_entry[hr]==INVCP) nr|=1<<hr;
7621 }
7622 // Don't store a register immediately after writing it,
7623 // may prevent dual-issue.
7624 // But do so if this is a branch target, otherwise we
7625 // might have to load the register before the branch.
7626 if(i>0&&!bt[i]&&((regs[i].wasdirty>>hr)&1)) {
00fa9369 7627 if((regmap_pre[i][hr]>0&&regmap_pre[i][hr]<64&&!((unneeded_reg[i]>>regmap_pre[i][hr])&1))) {
57871462 7628 if(rt1[i-1]==(regmap_pre[i][hr]&63)) nr|=1<<hr;
7629 if(rt2[i-1]==(regmap_pre[i][hr]&63)) nr|=1<<hr;
7630 }
00fa9369 7631 if((regs[i].regmap_entry[hr]>0&&regs[i].regmap_entry[hr]<64&&!((unneeded_reg[i]>>regs[i].regmap_entry[hr])&1))) {
57871462 7632 if(rt1[i-1]==(regs[i].regmap_entry[hr]&63)) nr|=1<<hr;
7633 if(rt2[i-1]==(regs[i].regmap_entry[hr]&63)) nr|=1<<hr;
7634 }
7635 }
7636 }
7637 // Cycle count is needed at branches. Assume it is needed at the target too.
7638 if(i==0||bt[i]||itype[i]==CJUMP||itype[i]==FJUMP||itype[i]==SPAN) {
7639 if(regmap_pre[i][HOST_CCREG]==CCREG) nr|=1<<HOST_CCREG;
7640 if(regs[i].regmap_entry[HOST_CCREG]==CCREG) nr|=1<<HOST_CCREG;
7641 }
7642 // Save it
7643 needed_reg[i]=nr;
9f51b4b9 7644
57871462 7645 // Deallocate unneeded registers
7646 for(hr=0;hr<HOST_REGS;hr++)
7647 {
7648 if(!((nr>>hr)&1)) {
7649 if(regs[i].regmap_entry[hr]!=CCREG) regs[i].regmap_entry[hr]=-1;
7650 if((regs[i].regmap[hr]&63)!=rs1[i] && (regs[i].regmap[hr]&63)!=rs2[i] &&
7651 (regs[i].regmap[hr]&63)!=rt1[i] && (regs[i].regmap[hr]&63)!=rt2[i] &&
7652 (regs[i].regmap[hr]&63)!=PTEMP && (regs[i].regmap[hr]&63)!=CCREG)
7653 {
7654 if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&(source[i]>>16)!=0x1000)
7655 {
7656 if(likely[i]) {
7657 regs[i].regmap[hr]=-1;
7658 regs[i].isconst&=~(1<<hr);
79c75f1b 7659 if(i<slen-2) {
7660 regmap_pre[i+2][hr]=-1;
7661 regs[i+2].wasconst&=~(1<<hr);
7662 }
57871462 7663 }
7664 }
7665 }
7666 if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
7667 {
7668 int d1=0,d2=0,map=0,temp=0;
7669 if(get_reg(regs[i].regmap,rt1[i+1]|64)>=0||get_reg(branch_regs[i].regmap,rt1[i+1]|64)>=0)
7670 {
7671 d1=dep1[i+1];
7672 d2=dep2[i+1];
7673 }
b9b61529 7674 if(itype[i+1]==STORE || itype[i+1]==STORELR ||
7675 (opcode[i+1]&0x3b)==0x39 || (opcode[i+1]&0x3b)==0x3a) { // SWC1/SDC1 || SWC2/SDC2
57871462 7676 map=INVCP;
7677 }
7678 if(itype[i+1]==LOADLR || itype[i+1]==STORELR ||
b9b61529 7679 itype[i+1]==C1LS || itype[i+1]==C2LS)
57871462 7680 temp=FTEMP;
7681 if((regs[i].regmap[hr]&63)!=rs1[i] && (regs[i].regmap[hr]&63)!=rs2[i] &&
7682 (regs[i].regmap[hr]&63)!=rt1[i] && (regs[i].regmap[hr]&63)!=rt2[i] &&
7683 (regs[i].regmap[hr]&63)!=rt1[i+1] && (regs[i].regmap[hr]&63)!=rt2[i+1] &&
7684 (regs[i].regmap[hr]^64)!=us1[i+1] && (regs[i].regmap[hr]^64)!=us2[i+1] &&
7685 (regs[i].regmap[hr]^64)!=d1 && (regs[i].regmap[hr]^64)!=d2 &&
7686 regs[i].regmap[hr]!=rs1[i+1] && regs[i].regmap[hr]!=rs2[i+1] &&
7687 (regs[i].regmap[hr]&63)!=temp && regs[i].regmap[hr]!=PTEMP &&
7688 regs[i].regmap[hr]!=RHASH && regs[i].regmap[hr]!=RHTBL &&
7689 regs[i].regmap[hr]!=RTEMP && regs[i].regmap[hr]!=CCREG &&
7690 regs[i].regmap[hr]!=map )
7691 {
7692 regs[i].regmap[hr]=-1;
7693 regs[i].isconst&=~(1<<hr);
7694 if((branch_regs[i].regmap[hr]&63)!=rs1[i] && (branch_regs[i].regmap[hr]&63)!=rs2[i] &&
7695 (branch_regs[i].regmap[hr]&63)!=rt1[i] && (branch_regs[i].regmap[hr]&63)!=rt2[i] &&
7696 (branch_regs[i].regmap[hr]&63)!=rt1[i+1] && (branch_regs[i].regmap[hr]&63)!=rt2[i+1] &&
7697 (branch_regs[i].regmap[hr]^64)!=us1[i+1] && (branch_regs[i].regmap[hr]^64)!=us2[i+1] &&
7698 (branch_regs[i].regmap[hr]^64)!=d1 && (branch_regs[i].regmap[hr]^64)!=d2 &&
7699 branch_regs[i].regmap[hr]!=rs1[i+1] && branch_regs[i].regmap[hr]!=rs2[i+1] &&
7700 (branch_regs[i].regmap[hr]&63)!=temp && branch_regs[i].regmap[hr]!=PTEMP &&
7701 branch_regs[i].regmap[hr]!=RHASH && branch_regs[i].regmap[hr]!=RHTBL &&
7702 branch_regs[i].regmap[hr]!=RTEMP && branch_regs[i].regmap[hr]!=CCREG &&
7703 branch_regs[i].regmap[hr]!=map)
7704 {
7705 branch_regs[i].regmap[hr]=-1;
7706 branch_regs[i].regmap_entry[hr]=-1;
7707 if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&(source[i]>>16)!=0x1000)
7708 {
7709 if(!likely[i]&&i<slen-2) {
7710 regmap_pre[i+2][hr]=-1;
79c75f1b 7711 regs[i+2].wasconst&=~(1<<hr);
57871462 7712 }
7713 }
7714 }
7715 }
7716 }
7717 else
7718 {
7719 // Non-branch
7720 if(i>0)
7721 {
7722 int d1=0,d2=0,map=-1,temp=-1;
7723 if(get_reg(regs[i].regmap,rt1[i]|64)>=0)
7724 {
7725 d1=dep1[i];
7726 d2=dep2[i];
7727 }
1edfcc68 7728 if(itype[i]==STORE || itype[i]==STORELR ||
b9b61529 7729 (opcode[i]&0x3b)==0x39 || (opcode[i]&0x3b)==0x3a) { // SWC1/SDC1 || SWC2/SDC2
57871462 7730 map=INVCP;
7731 }
7732 if(itype[i]==LOADLR || itype[i]==STORELR ||
b9b61529 7733 itype[i]==C1LS || itype[i]==C2LS)
57871462 7734 temp=FTEMP;
7735 if((regs[i].regmap[hr]&63)!=rt1[i] && (regs[i].regmap[hr]&63)!=rt2[i] &&
7736 (regs[i].regmap[hr]^64)!=us1[i] && (regs[i].regmap[hr]^64)!=us2[i] &&
7737 (regs[i].regmap[hr]^64)!=d1 && (regs[i].regmap[hr]^64)!=d2 &&
7738 regs[i].regmap[hr]!=rs1[i] && regs[i].regmap[hr]!=rs2[i] &&
7739 (regs[i].regmap[hr]&63)!=temp && regs[i].regmap[hr]!=map &&
7740 (itype[i]!=SPAN||regs[i].regmap[hr]!=CCREG))
7741 {
7742 if(i<slen-1&&!is_ds[i]) {
7743 if(regmap_pre[i+1][hr]!=-1 || regs[i].regmap[hr]!=-1)
7744 if(regmap_pre[i+1][hr]!=regs[i].regmap[hr])
7745 if(regs[i].regmap[hr]<64||!((regs[i].was32>>(regs[i].regmap[hr]&63))&1))
7746 {
c43b5311 7747 SysPrintf("fail: %x (%d %d!=%d)\n",start+i*4,hr,regmap_pre[i+1][hr],regs[i].regmap[hr]);
57871462 7748 assert(regmap_pre[i+1][hr]==regs[i].regmap[hr]);
7749 }
7750 regmap_pre[i+1][hr]=-1;
7751 if(regs[i+1].regmap_entry[hr]==CCREG) regs[i+1].regmap_entry[hr]=-1;
79c75f1b 7752 regs[i+1].wasconst&=~(1<<hr);
57871462 7753 }
7754 regs[i].regmap[hr]=-1;
7755 regs[i].isconst&=~(1<<hr);
7756 }
7757 }
7758 }
7759 }
7760 }
7761 }
9f51b4b9 7762
57871462 7763 /* Pass 5 - Pre-allocate registers */
9f51b4b9 7764
57871462 7765 // If a register is allocated during a loop, try to allocate it for the
7766 // entire loop, if possible. This avoids loading/storing registers
7767 // inside of the loop.
9f51b4b9 7768
57871462 7769 signed char f_regmap[HOST_REGS];
7770 clear_all_regs(f_regmap);
7771 for(i=0;i<slen-1;i++)
7772 {
7773 if(itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
7774 {
9f51b4b9 7775 if(ba[i]>=start && ba[i]<(start+i*4))
57871462 7776 if(itype[i+1]==NOP||itype[i+1]==MOV||itype[i+1]==ALU
7777 ||itype[i+1]==SHIFTIMM||itype[i+1]==IMM16||itype[i+1]==LOAD
7778 ||itype[i+1]==STORE||itype[i+1]==STORELR||itype[i+1]==C1LS
00fa9369 7779 ||itype[i+1]==SHIFT||itype[i+1]==COP1
b9b61529 7780 ||itype[i+1]==COP2||itype[i+1]==C2LS||itype[i+1]==C2OP)
57871462 7781 {
7782 int t=(ba[i]-start)>>2;
7783 if(t>0&&(itype[t-1]!=UJUMP&&itype[t-1]!=RJUMP&&itype[t-1]!=CJUMP&&itype[t-1]!=SJUMP&&itype[t-1]!=FJUMP)) // loop_preload can't handle jumps into delay slots
198df76f 7784 if(t<2||(itype[t-2]!=UJUMP&&itype[t-2]!=RJUMP)||rt1[t-2]!=31) // call/ret assumes no registers allocated
57871462 7785 for(hr=0;hr<HOST_REGS;hr++)
7786 {
7787 if(regs[i].regmap[hr]>64) {
7788 if(!((regs[i].dirty>>hr)&1))
7789 f_regmap[hr]=regs[i].regmap[hr];
7790 else f_regmap[hr]=-1;
7791 }
b372a952 7792 else if(regs[i].regmap[hr]>=0) {
7793 if(f_regmap[hr]!=regs[i].regmap[hr]) {
7794 // dealloc old register
7795 int n;
7796 for(n=0;n<HOST_REGS;n++)
7797 {
7798 if(f_regmap[n]==regs[i].regmap[hr]) {f_regmap[n]=-1;}
7799 }
7800 // and alloc new one
7801 f_regmap[hr]=regs[i].regmap[hr];
7802 }
7803 }
57871462 7804 if(branch_regs[i].regmap[hr]>64) {
7805 if(!((branch_regs[i].dirty>>hr)&1))
7806 f_regmap[hr]=branch_regs[i].regmap[hr];
7807 else f_regmap[hr]=-1;
7808 }
b372a952 7809 else if(branch_regs[i].regmap[hr]>=0) {
7810 if(f_regmap[hr]!=branch_regs[i].regmap[hr]) {
7811 // dealloc old register
7812 int n;
7813 for(n=0;n<HOST_REGS;n++)
7814 {
7815 if(f_regmap[n]==branch_regs[i].regmap[hr]) {f_regmap[n]=-1;}
7816 }
7817 // and alloc new one
7818 f_regmap[hr]=branch_regs[i].regmap[hr];
7819 }
7820 }
e1190b87 7821 if(ooo[i]) {
9f51b4b9 7822 if(count_free_regs(regs[i].regmap)<=minimum_free_regs[i+1])
e1190b87 7823 f_regmap[hr]=branch_regs[i].regmap[hr];
7824 }else{
9f51b4b9 7825 if(count_free_regs(branch_regs[i].regmap)<=minimum_free_regs[i+1])
57871462 7826 f_regmap[hr]=branch_regs[i].regmap[hr];
7827 }
7828 // Avoid dirty->clean transition
e1190b87 7829 #ifdef DESTRUCTIVE_WRITEBACK
57871462 7830 if(t>0) if(get_reg(regmap_pre[t],f_regmap[hr])>=0) if((regs[t].wasdirty>>get_reg(regmap_pre[t],f_regmap[hr]))&1) f_regmap[hr]=-1;
e1190b87 7831 #endif
7832 // This check is only strictly required in the DESTRUCTIVE_WRITEBACK
7833 // case above, however it's always a good idea. We can't hoist the
7834 // load if the register was already allocated, so there's no point
7835 // wasting time analyzing most of these cases. It only "succeeds"
7836 // when the mapping was different and the load can be replaced with
7837 // a mov, which is of negligible benefit. So such cases are
7838 // skipped below.
57871462 7839 if(f_regmap[hr]>0) {
198df76f 7840 if(regs[t].regmap[hr]==f_regmap[hr]||(regs[t].regmap_entry[hr]<0&&get_reg(regmap_pre[t],f_regmap[hr])<0)) {
57871462 7841 int r=f_regmap[hr];
7842 for(j=t;j<=i;j++)
7843 {
7844 //printf("Test %x -> %x, %x %d/%d\n",start+i*4,ba[i],start+j*4,hr,r);
7845 if(r<34&&((unneeded_reg[j]>>r)&1)) break;
00fa9369 7846 assert(r < 64);
57871462 7847 if(regs[j].regmap[hr]==f_regmap[hr]&&(f_regmap[hr]&63)<TEMPREG) {
7848 //printf("Hit %x -> %x, %x %d/%d\n",start+i*4,ba[i],start+j*4,hr,r);
7849 int k;
7850 if(regs[i].regmap[hr]==-1&&branch_regs[i].regmap[hr]==-1) {
7851 if(get_reg(regs[i+2].regmap,f_regmap[hr])>=0) break;
7852 if(r>63) {
7853 if(get_reg(regs[i].regmap,r&63)<0) break;
7854 if(get_reg(branch_regs[i].regmap,r&63)<0) break;
7855 }
7856 k=i;
7857 while(k>1&&regs[k-1].regmap[hr]==-1) {
e1190b87 7858 if(count_free_regs(regs[k-1].regmap)<=minimum_free_regs[k-1]) {
7859 //printf("no free regs for store %x\n",start+(k-1)*4);
7860 break;
57871462 7861 }
57871462 7862 if(get_reg(regs[k-1].regmap,f_regmap[hr])>=0) {
7863 //printf("no-match due to different register\n");
7864 break;
7865 }
7866 if(itype[k-2]==UJUMP||itype[k-2]==RJUMP||itype[k-2]==CJUMP||itype[k-2]==SJUMP||itype[k-2]==FJUMP) {
7867 //printf("no-match due to branch\n");
7868 break;
7869 }
7870 // call/ret fast path assumes no registers allocated
198df76f 7871 if(k>2&&(itype[k-3]==UJUMP||itype[k-3]==RJUMP)&&rt1[k-3]==31) {
57871462 7872 break;
7873 }
7874 if(r>63) {
7875 // NB This can exclude the case where the upper-half
7876 // register is lower numbered than the lower-half
7877 // register. Not sure if it's worth fixing...
7878 if(get_reg(regs[k-1].regmap,r&63)<0) break;
7879 if(regs[k-1].is32&(1LL<<(r&63))) break;
7880 }
7881 k--;
7882 }
7883 if(i<slen-1) {
7884 if((regs[k].is32&(1LL<<f_regmap[hr]))!=
7885 (regs[i+2].was32&(1LL<<f_regmap[hr]))) {
7886 //printf("bad match after branch\n");
7887 break;
7888 }
7889 }
7890 if(regs[k-1].regmap[hr]==f_regmap[hr]&&regmap_pre[k][hr]==f_regmap[hr]) {
7891 //printf("Extend r%d, %x ->\n",hr,start+k*4);
7892 while(k<i) {
7893 regs[k].regmap_entry[hr]=f_regmap[hr];
7894 regs[k].regmap[hr]=f_regmap[hr];
7895 regmap_pre[k+1][hr]=f_regmap[hr];
7896 regs[k].wasdirty&=~(1<<hr);
7897 regs[k].dirty&=~(1<<hr);
7898 regs[k].wasdirty|=(1<<hr)&regs[k-1].dirty;
7899 regs[k].dirty|=(1<<hr)&regs[k].wasdirty;
7900 regs[k].wasconst&=~(1<<hr);
7901 regs[k].isconst&=~(1<<hr);
7902 k++;
7903 }
7904 }
7905 else {
7906 //printf("Fail Extend r%d, %x ->\n",hr,start+k*4);
7907 break;
7908 }
7909 assert(regs[i-1].regmap[hr]==f_regmap[hr]);
7910 if(regs[i-1].regmap[hr]==f_regmap[hr]&&regmap_pre[i][hr]==f_regmap[hr]) {
7911 //printf("OK fill %x (r%d)\n",start+i*4,hr);
7912 regs[i].regmap_entry[hr]=f_regmap[hr];
7913 regs[i].regmap[hr]=f_regmap[hr];
7914 regs[i].wasdirty&=~(1<<hr);
7915 regs[i].dirty&=~(1<<hr);
7916 regs[i].wasdirty|=(1<<hr)&regs[i-1].dirty;
7917 regs[i].dirty|=(1<<hr)&regs[i-1].dirty;
7918 regs[i].wasconst&=~(1<<hr);
7919 regs[i].isconst&=~(1<<hr);
7920 branch_regs[i].regmap_entry[hr]=f_regmap[hr];
7921 branch_regs[i].wasdirty&=~(1<<hr);
7922 branch_regs[i].wasdirty|=(1<<hr)&regs[i].dirty;
7923 branch_regs[i].regmap[hr]=f_regmap[hr];
7924 branch_regs[i].dirty&=~(1<<hr);
7925 branch_regs[i].dirty|=(1<<hr)&regs[i].dirty;
7926 branch_regs[i].wasconst&=~(1<<hr);
7927 branch_regs[i].isconst&=~(1<<hr);
7928 if(itype[i]!=RJUMP&&itype[i]!=UJUMP&&(source[i]>>16)!=0x1000) {
7929 regmap_pre[i+2][hr]=f_regmap[hr];
7930 regs[i+2].wasdirty&=~(1<<hr);
7931 regs[i+2].wasdirty|=(1<<hr)&regs[i].dirty;
7932 assert((branch_regs[i].is32&(1LL<<f_regmap[hr]))==
7933 (regs[i+2].was32&(1LL<<f_regmap[hr])));
7934 }
7935 }
7936 }
7937 for(k=t;k<j;k++) {
e1190b87 7938 // Alloc register clean at beginning of loop,
7939 // but may dirty it in pass 6
57871462 7940 regs[k].regmap_entry[hr]=f_regmap[hr];
7941 regs[k].regmap[hr]=f_regmap[hr];
57871462 7942 regs[k].dirty&=~(1<<hr);
7943 regs[k].wasconst&=~(1<<hr);
7944 regs[k].isconst&=~(1<<hr);
e1190b87 7945 if(itype[k]==UJUMP||itype[k]==RJUMP||itype[k]==CJUMP||itype[k]==SJUMP||itype[k]==FJUMP) {
7946 branch_regs[k].regmap_entry[hr]=f_regmap[hr];
7947 branch_regs[k].regmap[hr]=f_regmap[hr];
7948 branch_regs[k].dirty&=~(1<<hr);
7949 branch_regs[k].wasconst&=~(1<<hr);
7950 branch_regs[k].isconst&=~(1<<hr);
7951 if(itype[k]!=RJUMP&&itype[k]!=UJUMP&&(source[k]>>16)!=0x1000) {
7952 regmap_pre[k+2][hr]=f_regmap[hr];
7953 regs[k+2].wasdirty&=~(1<<hr);
7954 assert((branch_regs[k].is32&(1LL<<f_regmap[hr]))==
7955 (regs[k+2].was32&(1LL<<f_regmap[hr])));
7956 }
7957 }
7958 else
7959 {
7960 regmap_pre[k+1][hr]=f_regmap[hr];
7961 regs[k+1].wasdirty&=~(1<<hr);
7962 }
57871462 7963 }
7964 if(regs[j].regmap[hr]==f_regmap[hr])
7965 regs[j].regmap_entry[hr]=f_regmap[hr];
7966 break;
7967 }
7968 if(j==i) break;
7969 if(regs[j].regmap[hr]>=0)
7970 break;
7971 if(get_reg(regs[j].regmap,f_regmap[hr])>=0) {
7972 //printf("no-match due to different register\n");
7973 break;
7974 }
7975 if((regs[j+1].is32&(1LL<<f_regmap[hr]))!=(regs[j].is32&(1LL<<f_regmap[hr]))) {
7976 //printf("32/64 mismatch %x %d\n",start+j*4,hr);
7977 break;
7978 }
e1190b87 7979 if(itype[j]==UJUMP||itype[j]==RJUMP||(source[j]>>16)==0x1000)
7980 {
7981 // Stop on unconditional branch
7982 break;
7983 }
7984 if(itype[j]==CJUMP||itype[j]==SJUMP||itype[j]==FJUMP)
7985 {
7986 if(ooo[j]) {
9f51b4b9 7987 if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j+1])
e1190b87 7988 break;
7989 }else{
9f51b4b9 7990 if(count_free_regs(branch_regs[j].regmap)<=minimum_free_regs[j+1])
e1190b87 7991 break;
7992 }
7993 if(get_reg(branch_regs[j].regmap,f_regmap[hr])>=0) {
7994 //printf("no-match due to different register (branch)\n");
57871462 7995 break;
7996 }
7997 }
e1190b87 7998 if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j]) {
7999 //printf("No free regs for store %x\n",start+j*4);
8000 break;
8001 }
57871462 8002 if(f_regmap[hr]>=64) {
8003 if(regs[j].is32&(1LL<<(f_regmap[hr]&63))) {
8004 break;
8005 }
8006 else
8007 {
8008 if(get_reg(regs[j].regmap,f_regmap[hr]&63)<0) {
8009 break;
8010 }
8011 }
8012 }
8013 }
8014 }
8015 }
8016 }
8017 }
8018 }else{
198df76f 8019 // Non branch or undetermined branch target
57871462 8020 for(hr=0;hr<HOST_REGS;hr++)
8021 {
8022 if(hr!=EXCLUDE_REG) {
8023 if(regs[i].regmap[hr]>64) {
8024 if(!((regs[i].dirty>>hr)&1))
8025 f_regmap[hr]=regs[i].regmap[hr];
8026 }
b372a952 8027 else if(regs[i].regmap[hr]>=0) {
8028 if(f_regmap[hr]!=regs[i].regmap[hr]) {
8029 // dealloc old register
8030 int n;
8031 for(n=0;n<HOST_REGS;n++)
8032 {
8033 if(f_regmap[n]==regs[i].regmap[hr]) {f_regmap[n]=-1;}
8034 }
8035 // and alloc new one
8036 f_regmap[hr]=regs[i].regmap[hr];
8037 }
8038 }
57871462 8039 }
8040 }
8041 // Try to restore cycle count at branch targets
8042 if(bt[i]) {
8043 for(j=i;j<slen-1;j++) {
8044 if(regs[j].regmap[HOST_CCREG]!=-1) break;
e1190b87 8045 if(count_free_regs(regs[j].regmap)<=minimum_free_regs[j]) {
8046 //printf("no free regs for store %x\n",start+j*4);
8047 break;
57871462 8048 }
57871462 8049 }
8050 if(regs[j].regmap[HOST_CCREG]==CCREG) {
8051 int k=i;
8052 //printf("Extend CC, %x -> %x\n",start+k*4,start+j*4);
8053 while(k<j) {
8054 regs[k].regmap_entry[HOST_CCREG]=CCREG;
8055 regs[k].regmap[HOST_CCREG]=CCREG;
8056 regmap_pre[k+1][HOST_CCREG]=CCREG;
8057 regs[k+1].wasdirty|=1<<HOST_CCREG;
8058 regs[k].dirty|=1<<HOST_CCREG;
8059 regs[k].wasconst&=~(1<<HOST_CCREG);
8060 regs[k].isconst&=~(1<<HOST_CCREG);
8061 k++;
8062 }
9f51b4b9 8063 regs[j].regmap_entry[HOST_CCREG]=CCREG;
57871462 8064 }
8065 // Work backwards from the branch target
8066 if(j>i&&f_regmap[HOST_CCREG]==CCREG)
8067 {
8068 //printf("Extend backwards\n");
8069 int k;
8070 k=i;
8071 while(regs[k-1].regmap[HOST_CCREG]==-1) {
e1190b87 8072 if(count_free_regs(regs[k-1].regmap)<=minimum_free_regs[k-1]) {
8073 //printf("no free regs for store %x\n",start+(k-1)*4);
8074 break;
57871462 8075 }
57871462 8076 k--;
8077 }
8078 if(regs[k-1].regmap[HOST_CCREG]==CCREG) {
8079 //printf("Extend CC, %x ->\n",start+k*4);
8080 while(k<=i) {
8081 regs[k].regmap_entry[HOST_CCREG]=CCREG;
8082 regs[k].regmap[HOST_CCREG]=CCREG;
8083 regmap_pre[k+1][HOST_CCREG]=CCREG;
8084 regs[k+1].wasdirty|=1<<HOST_CCREG;
8085 regs[k].dirty|=1<<HOST_CCREG;
8086 regs[k].wasconst&=~(1<<HOST_CCREG);
8087 regs[k].isconst&=~(1<<HOST_CCREG);
8088 k++;
8089 }
8090 }
8091 else {
8092 //printf("Fail Extend CC, %x ->\n",start+k*4);
8093 }
8094 }
8095 }
8096 if(itype[i]!=STORE&&itype[i]!=STORELR&&itype[i]!=C1LS&&itype[i]!=SHIFT&&
8097 itype[i]!=NOP&&itype[i]!=MOV&&itype[i]!=ALU&&itype[i]!=SHIFTIMM&&
00fa9369 8098 itype[i]!=IMM16&&itype[i]!=LOAD&&itype[i]!=COP1)
57871462 8099 {
8100 memcpy(f_regmap,regs[i].regmap,sizeof(f_regmap));
8101 }
8102 }
8103 }
9f51b4b9 8104
57871462 8105 // This allocates registers (if possible) one instruction prior
8106 // to use, which can avoid a load-use penalty on certain CPUs.
8107 for(i=0;i<slen-1;i++)
8108 {
8109 if(!i||(itype[i-1]!=UJUMP&&itype[i-1]!=CJUMP&&itype[i-1]!=SJUMP&&itype[i-1]!=RJUMP&&itype[i-1]!=FJUMP))
8110 {
8111 if(!bt[i+1])
8112 {
b9b61529 8113 if(itype[i]==ALU||itype[i]==MOV||itype[i]==LOAD||itype[i]==SHIFTIMM||itype[i]==IMM16
8114 ||((itype[i]==COP1||itype[i]==COP2)&&opcode2[i]<3))
57871462 8115 {
8116 if(rs1[i+1]) {
8117 if((hr=get_reg(regs[i+1].regmap,rs1[i+1]))>=0)
8118 {
8119 if(regs[i].regmap[hr]<0&&regs[i+1].regmap_entry[hr]<0)
8120 {
8121 regs[i].regmap[hr]=regs[i+1].regmap[hr];
8122 regmap_pre[i+1][hr]=regs[i+1].regmap[hr];
8123 regs[i+1].regmap_entry[hr]=regs[i+1].regmap[hr];
8124 regs[i].isconst&=~(1<<hr);
8125 regs[i].isconst|=regs[i+1].isconst&(1<<hr);
8126 constmap[i][hr]=constmap[i+1][hr];
8127 regs[i+1].wasdirty&=~(1<<hr);
8128 regs[i].dirty&=~(1<<hr);
8129 }
8130 }
8131 }
8132 if(rs2[i+1]) {
8133 if((hr=get_reg(regs[i+1].regmap,rs2[i+1]))>=0)
8134 {
8135 if(regs[i].regmap[hr]<0&&regs[i+1].regmap_entry[hr]<0)
8136 {
8137 regs[i].regmap[hr]=regs[i+1].regmap[hr];
8138 regmap_pre[i+1][hr]=regs[i+1].regmap[hr];
8139 regs[i+1].regmap_entry[hr]=regs[i+1].regmap[hr];
8140 regs[i].isconst&=~(1<<hr);
8141 regs[i].isconst|=regs[i+1].isconst&(1<<hr);
8142 constmap[i][hr]=constmap[i+1][hr];
8143 regs[i+1].wasdirty&=~(1<<hr);
8144 regs[i].dirty&=~(1<<hr);
8145 }
8146 }
8147 }
198df76f 8148 // Preload target address for load instruction (non-constant)
57871462 8149 if(itype[i+1]==LOAD&&rs1[i+1]&&get_reg(regs[i+1].regmap,rs1[i+1])<0) {
8150 if((hr=get_reg(regs[i+1].regmap,rt1[i+1]))>=0)
8151 {
8152 if(regs[i].regmap[hr]<0&&regs[i+1].regmap_entry[hr]<0)
8153 {
8154 regs[i].regmap[hr]=rs1[i+1];
8155 regmap_pre[i+1][hr]=rs1[i+1];
8156 regs[i+1].regmap_entry[hr]=rs1[i+1];
8157 regs[i].isconst&=~(1<<hr);
8158 regs[i].isconst|=regs[i+1].isconst&(1<<hr);
8159 constmap[i][hr]=constmap[i+1][hr];
8160 regs[i+1].wasdirty&=~(1<<hr);
8161 regs[i].dirty&=~(1<<hr);
8162 }
8163 }
8164 }
9f51b4b9 8165 // Load source into target register
57871462 8166 if(lt1[i+1]&&get_reg(regs[i+1].regmap,rs1[i+1])<0) {
8167 if((hr=get_reg(regs[i+1].regmap,rt1[i+1]))>=0)
8168 {
8169 if(regs[i].regmap[hr]<0&&regs[i+1].regmap_entry[hr]<0)
8170 {
8171 regs[i].regmap[hr]=rs1[i+1];
8172 regmap_pre[i+1][hr]=rs1[i+1];
8173 regs[i+1].regmap_entry[hr]=rs1[i+1];
8174 regs[i].isconst&=~(1<<hr);
8175 regs[i].isconst|=regs[i+1].isconst&(1<<hr);
8176 constmap[i][hr]=constmap[i+1][hr];
8177 regs[i+1].wasdirty&=~(1<<hr);
8178 regs[i].dirty&=~(1<<hr);
8179 }
8180 }
8181 }
198df76f 8182 // Address for store instruction (non-constant)
b9b61529 8183 if(itype[i+1]==STORE||itype[i+1]==STORELR
8184 ||(opcode[i+1]&0x3b)==0x39||(opcode[i+1]&0x3b)==0x3a) { // SB/SH/SW/SD/SWC1/SDC1/SWC2/SDC2
57871462 8185 if(get_reg(regs[i+1].regmap,rs1[i+1])<0) {
8186 hr=get_reg2(regs[i].regmap,regs[i+1].regmap,-1);
8187 if(hr<0) hr=get_reg(regs[i+1].regmap,-1);
8188 else {regs[i+1].regmap[hr]=AGEN1+((i+1)&1);regs[i+1].isconst&=~(1<<hr);}
8189 assert(hr>=0);
8190 if(regs[i].regmap[hr]<0&&regs[i+1].regmap_entry[hr]<0)
8191 {
8192 regs[i].regmap[hr]=rs1[i+1];
8193 regmap_pre[i+1][hr]=rs1[i+1];
8194 regs[i+1].regmap_entry[hr]=rs1[i+1];
8195 regs[i].isconst&=~(1<<hr);
8196 regs[i].isconst|=regs[i+1].isconst&(1<<hr);
8197 constmap[i][hr]=constmap[i+1][hr];
8198 regs[i+1].wasdirty&=~(1<<hr);
8199 regs[i].dirty&=~(1<<hr);
8200 }
8201 }
8202 }
b9b61529 8203 if(itype[i+1]==LOADLR||(opcode[i+1]&0x3b)==0x31||(opcode[i+1]&0x3b)==0x32) { // LWC1/LDC1, LWC2/LDC2
57871462 8204 if(get_reg(regs[i+1].regmap,rs1[i+1])<0) {
8205 int nr;
8206 hr=get_reg(regs[i+1].regmap,FTEMP);
8207 assert(hr>=0);
8208 if(regs[i].regmap[hr]<0&&regs[i+1].regmap_entry[hr]<0)
8209 {
8210 regs[i].regmap[hr]=rs1[i+1];
8211 regmap_pre[i+1][hr]=rs1[i+1];
8212 regs[i+1].regmap_entry[hr]=rs1[i+1];
8213 regs[i].isconst&=~(1<<hr);
8214 regs[i].isconst|=regs[i+1].isconst&(1<<hr);
8215 constmap[i][hr]=constmap[i+1][hr];
8216 regs[i+1].wasdirty&=~(1<<hr);
8217 regs[i].dirty&=~(1<<hr);
8218 }
8219 else if((nr=get_reg2(regs[i].regmap,regs[i+1].regmap,-1))>=0)
8220 {
8221 // move it to another register
8222 regs[i+1].regmap[hr]=-1;
8223 regmap_pre[i+2][hr]=-1;
8224 regs[i+1].regmap[nr]=FTEMP;
8225 regmap_pre[i+2][nr]=FTEMP;
8226 regs[i].regmap[nr]=rs1[i+1];
8227 regmap_pre[i+1][nr]=rs1[i+1];
8228 regs[i+1].regmap_entry[nr]=rs1[i+1];
8229 regs[i].isconst&=~(1<<nr);
8230 regs[i+1].isconst&=~(1<<nr);
8231 regs[i].dirty&=~(1<<nr);
8232 regs[i+1].wasdirty&=~(1<<nr);
8233 regs[i+1].dirty&=~(1<<nr);
8234 regs[i+2].wasdirty&=~(1<<nr);
8235 }
8236 }
8237 }
b9b61529 8238 if(itype[i+1]==LOAD||itype[i+1]==LOADLR||itype[i+1]==STORE||itype[i+1]==STORELR/*||itype[i+1]==C1LS||||itype[i+1]==C2LS*/) {
9f51b4b9 8239 if(itype[i+1]==LOAD)
57871462 8240 hr=get_reg(regs[i+1].regmap,rt1[i+1]);
b9b61529 8241 if(itype[i+1]==LOADLR||(opcode[i+1]&0x3b)==0x31||(opcode[i+1]&0x3b)==0x32) // LWC1/LDC1, LWC2/LDC2
57871462 8242 hr=get_reg(regs[i+1].regmap,FTEMP);
b9b61529 8243 if(itype[i+1]==STORE||itype[i+1]==STORELR||(opcode[i+1]&0x3b)==0x39||(opcode[i+1]&0x3b)==0x3a) { // SWC1/SDC1/SWC2/SDC2
57871462 8244 hr=get_reg(regs[i+1].regmap,AGEN1+((i+1)&1));
8245 if(hr<0) hr=get_reg(regs[i+1].regmap,-1);
8246 }
8247 if(hr>=0&&regs[i].regmap[hr]<0) {
8248 int rs=get_reg(regs[i+1].regmap,rs1[i+1]);
8249 if(rs>=0&&((regs[i+1].wasconst>>rs)&1)) {
8250 regs[i].regmap[hr]=AGEN1+((i+1)&1);
8251 regmap_pre[i+1][hr]=AGEN1+((i+1)&1);
8252 regs[i+1].regmap_entry[hr]=AGEN1+((i+1)&1);
8253 regs[i].isconst&=~(1<<hr);
8254 regs[i+1].wasdirty&=~(1<<hr);
8255 regs[i].dirty&=~(1<<hr);
8256 }
8257 }
8258 }
8259 }
8260 }
8261 }
8262 }
9f51b4b9 8263
57871462 8264 /* Pass 6 - Optimize clean/dirty state */
8265 clean_registers(0,slen-1,1);
9f51b4b9 8266
57871462 8267 /* Pass 7 - Identify 32-bit registers */
04fd948a 8268 for (i=slen-1;i>=0;i--)
8269 {
8270 if(itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
8271 {
8272 // Conditional branch
8273 if((source[i]>>16)!=0x1000&&i<slen-2) {
8274 // Mark this address as a branch target since it may be called
8275 // upon return from interrupt
8276 bt[i+2]=1;
8277 }
8278 }
8279 }
57871462 8280
8281 if(itype[slen-1]==SPAN) {
8282 bt[slen-1]=1; // Mark as a branch target so instruction can restart after exception
8283 }
4600ba03 8284
8285#ifdef DISASM
57871462 8286 /* Debug/disassembly */
57871462 8287 for(i=0;i<slen;i++)
8288 {
8289 printf("U:");
8290 int r;
8291 for(r=1;r<=CCREG;r++) {
8292 if((unneeded_reg[i]>>r)&1) {
8293 if(r==HIREG) printf(" HI");
8294 else if(r==LOREG) printf(" LO");
8295 else printf(" r%d",r);
8296 }
8297 }
57871462 8298 printf("\n");
8299 #if defined(__i386__) || defined(__x86_64__)
8300 printf("pre: eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",regmap_pre[i][0],regmap_pre[i][1],regmap_pre[i][2],regmap_pre[i][3],regmap_pre[i][5],regmap_pre[i][6],regmap_pre[i][7]);
8301 #endif
8302 #ifdef __arm__
8303 printf("pre: r0=%d r1=%d r2=%d r3=%d r4=%d r5=%d r6=%d r7=%d r8=%d r9=%d r10=%d r12=%d\n",regmap_pre[i][0],regmap_pre[i][1],regmap_pre[i][2],regmap_pre[i][3],regmap_pre[i][4],regmap_pre[i][5],regmap_pre[i][6],regmap_pre[i][7],regmap_pre[i][8],regmap_pre[i][9],regmap_pre[i][10],regmap_pre[i][12]);
8304 #endif
8305 printf("needs: ");
8306 if(needed_reg[i]&1) printf("eax ");
8307 if((needed_reg[i]>>1)&1) printf("ecx ");
8308 if((needed_reg[i]>>2)&1) printf("edx ");
8309 if((needed_reg[i]>>3)&1) printf("ebx ");
8310 if((needed_reg[i]>>5)&1) printf("ebp ");
8311 if((needed_reg[i]>>6)&1) printf("esi ");
8312 if((needed_reg[i]>>7)&1) printf("edi ");
57871462 8313 printf("\n");
57871462 8314 #if defined(__i386__) || defined(__x86_64__)
8315 printf("entry: eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",regs[i].regmap_entry[0],regs[i].regmap_entry[1],regs[i].regmap_entry[2],regs[i].regmap_entry[3],regs[i].regmap_entry[5],regs[i].regmap_entry[6],regs[i].regmap_entry[7]);
8316 printf("dirty: ");
8317 if(regs[i].wasdirty&1) printf("eax ");
8318 if((regs[i].wasdirty>>1)&1) printf("ecx ");
8319 if((regs[i].wasdirty>>2)&1) printf("edx ");
8320 if((regs[i].wasdirty>>3)&1) printf("ebx ");
8321 if((regs[i].wasdirty>>5)&1) printf("ebp ");
8322 if((regs[i].wasdirty>>6)&1) printf("esi ");
8323 if((regs[i].wasdirty>>7)&1) printf("edi ");
8324 #endif
8325 #ifdef __arm__
8326 printf("entry: r0=%d r1=%d r2=%d r3=%d r4=%d r5=%d r6=%d r7=%d r8=%d r9=%d r10=%d r12=%d\n",regs[i].regmap_entry[0],regs[i].regmap_entry[1],regs[i].regmap_entry[2],regs[i].regmap_entry[3],regs[i].regmap_entry[4],regs[i].regmap_entry[5],regs[i].regmap_entry[6],regs[i].regmap_entry[7],regs[i].regmap_entry[8],regs[i].regmap_entry[9],regs[i].regmap_entry[10],regs[i].regmap_entry[12]);
8327 printf("dirty: ");
8328 if(regs[i].wasdirty&1) printf("r0 ");
8329 if((regs[i].wasdirty>>1)&1) printf("r1 ");
8330 if((regs[i].wasdirty>>2)&1) printf("r2 ");
8331 if((regs[i].wasdirty>>3)&1) printf("r3 ");
8332 if((regs[i].wasdirty>>4)&1) printf("r4 ");
8333 if((regs[i].wasdirty>>5)&1) printf("r5 ");
8334 if((regs[i].wasdirty>>6)&1) printf("r6 ");
8335 if((regs[i].wasdirty>>7)&1) printf("r7 ");
8336 if((regs[i].wasdirty>>8)&1) printf("r8 ");
8337 if((regs[i].wasdirty>>9)&1) printf("r9 ");
8338 if((regs[i].wasdirty>>10)&1) printf("r10 ");
8339 if((regs[i].wasdirty>>12)&1) printf("r12 ");
8340 #endif
8341 printf("\n");
8342 disassemble_inst(i);
8343 //printf ("ccadj[%d] = %d\n",i,ccadj[i]);
8344 #if defined(__i386__) || defined(__x86_64__)
8345 printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d dirty: ",regs[i].regmap[0],regs[i].regmap[1],regs[i].regmap[2],regs[i].regmap[3],regs[i].regmap[5],regs[i].regmap[6],regs[i].regmap[7]);
8346 if(regs[i].dirty&1) printf("eax ");
8347 if((regs[i].dirty>>1)&1) printf("ecx ");
8348 if((regs[i].dirty>>2)&1) printf("edx ");
8349 if((regs[i].dirty>>3)&1) printf("ebx ");
8350 if((regs[i].dirty>>5)&1) printf("ebp ");
8351 if((regs[i].dirty>>6)&1) printf("esi ");
8352 if((regs[i].dirty>>7)&1) printf("edi ");
8353 #endif
8354 #ifdef __arm__
8355 printf("r0=%d r1=%d r2=%d r3=%d r4=%d r5=%d r6=%d r7=%d r8=%d r9=%d r10=%d r12=%d dirty: ",regs[i].regmap[0],regs[i].regmap[1],regs[i].regmap[2],regs[i].regmap[3],regs[i].regmap[4],regs[i].regmap[5],regs[i].regmap[6],regs[i].regmap[7],regs[i].regmap[8],regs[i].regmap[9],regs[i].regmap[10],regs[i].regmap[12]);
8356 if(regs[i].dirty&1) printf("r0 ");
8357 if((regs[i].dirty>>1)&1) printf("r1 ");
8358 if((regs[i].dirty>>2)&1) printf("r2 ");
8359 if((regs[i].dirty>>3)&1) printf("r3 ");
8360 if((regs[i].dirty>>4)&1) printf("r4 ");
8361 if((regs[i].dirty>>5)&1) printf("r5 ");
8362 if((regs[i].dirty>>6)&1) printf("r6 ");
8363 if((regs[i].dirty>>7)&1) printf("r7 ");
8364 if((regs[i].dirty>>8)&1) printf("r8 ");
8365 if((regs[i].dirty>>9)&1) printf("r9 ");
8366 if((regs[i].dirty>>10)&1) printf("r10 ");
8367 if((regs[i].dirty>>12)&1) printf("r12 ");
8368 #endif
8369 printf("\n");
8370 if(regs[i].isconst) {
8371 printf("constants: ");
8372 #if defined(__i386__) || defined(__x86_64__)
643aeae3 8373 if(regs[i].isconst&1) printf("eax=%x ",(u_int)constmap[i][0]);
8374 if((regs[i].isconst>>1)&1) printf("ecx=%x ",(u_int)constmap[i][1]);
8375 if((regs[i].isconst>>2)&1) printf("edx=%x ",(u_int)constmap[i][2]);
8376 if((regs[i].isconst>>3)&1) printf("ebx=%x ",(u_int)constmap[i][3]);
8377 if((regs[i].isconst>>5)&1) printf("ebp=%x ",(u_int)constmap[i][5]);
8378 if((regs[i].isconst>>6)&1) printf("esi=%x ",(u_int)constmap[i][6]);
8379 if((regs[i].isconst>>7)&1) printf("edi=%x ",(u_int)constmap[i][7]);
57871462 8380 #endif
8381 #ifdef __arm__
643aeae3 8382 int r;
8383 for (r = 0; r < ARRAY_SIZE(constmap[i]); r++)
8384 if ((regs[i].isconst >> r) & 1)
8385 printf(" r%d=%x", r, (u_int)constmap[i][r]);
57871462 8386 #endif
8387 printf("\n");
8388 }
57871462 8389 if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP) {
8390 #if defined(__i386__) || defined(__x86_64__)
8391 printf("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d dirty: ",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]);
8392 if(branch_regs[i].dirty&1) printf("eax ");
8393 if((branch_regs[i].dirty>>1)&1) printf("ecx ");
8394 if((branch_regs[i].dirty>>2)&1) printf("edx ");
8395 if((branch_regs[i].dirty>>3)&1) printf("ebx ");
8396 if((branch_regs[i].dirty>>5)&1) printf("ebp ");
8397 if((branch_regs[i].dirty>>6)&1) printf("esi ");
8398 if((branch_regs[i].dirty>>7)&1) printf("edi ");
8399 #endif
8400 #ifdef __arm__
8401 printf("branch(%d): r0=%d r1=%d r2=%d r3=%d r4=%d r5=%d r6=%d r7=%d r8=%d r9=%d r10=%d r12=%d dirty: ",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[4],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7],branch_regs[i].regmap[8],branch_regs[i].regmap[9],branch_regs[i].regmap[10],branch_regs[i].regmap[12]);
8402 if(branch_regs[i].dirty&1) printf("r0 ");
8403 if((branch_regs[i].dirty>>1)&1) printf("r1 ");
8404 if((branch_regs[i].dirty>>2)&1) printf("r2 ");
8405 if((branch_regs[i].dirty>>3)&1) printf("r3 ");
8406 if((branch_regs[i].dirty>>4)&1) printf("r4 ");
8407 if((branch_regs[i].dirty>>5)&1) printf("r5 ");
8408 if((branch_regs[i].dirty>>6)&1) printf("r6 ");
8409 if((branch_regs[i].dirty>>7)&1) printf("r7 ");
8410 if((branch_regs[i].dirty>>8)&1) printf("r8 ");
8411 if((branch_regs[i].dirty>>9)&1) printf("r9 ");
8412 if((branch_regs[i].dirty>>10)&1) printf("r10 ");
8413 if((branch_regs[i].dirty>>12)&1) printf("r12 ");
8414 #endif
57871462 8415 }
8416 }
4600ba03 8417#endif // DISASM
57871462 8418
8419 /* Pass 8 - Assembly */
8420 linkcount=0;stubcount=0;
8421 ds=0;is_delayslot=0;
57871462 8422 uint64_t is32_pre=0;
8423 u_int dirty_pre=0;
d148d265 8424 void *beginning=start_block();
57871462 8425 if((u_int)addr&1) {
8426 ds=1;
8427 pagespan_ds();
8428 }
df4dc2b1 8429 void *instr_addr0_override = NULL;
9ad4d757 8430
9ad4d757 8431 if (start == 0x80030000) {
8432 // nasty hack for fastbios thing
96186eba 8433 // override block entry to this code
df4dc2b1 8434 instr_addr0_override = out;
9ad4d757 8435 emit_movimm(start,0);
96186eba 8436 // abuse io address var as a flag that we
8437 // have already returned here once
643aeae3 8438 emit_readword(&address,1);
8439 emit_writeword(0,&pcaddr);
8440 emit_writeword(0,&address);
9ad4d757 8441 emit_cmp(0,1);
643aeae3 8442 emit_jne(new_dyna_leave);
9ad4d757 8443 }
57871462 8444 for(i=0;i<slen;i++)
8445 {
8446 //if(ds) printf("ds: ");
4600ba03 8447 disassemble_inst(i);
57871462 8448 if(ds) {
8449 ds=0; // Skip delay slot
8450 if(bt[i]) assem_debug("OOPS - branch into delay slot\n");
df4dc2b1 8451 instr_addr[i] = NULL;
57871462 8452 } else {
ffb0b9e0 8453 speculate_register_values(i);
57871462 8454 #ifndef DESTRUCTIVE_WRITEBACK
8455 if(i<2||(itype[i-2]!=UJUMP&&itype[i-2]!=RJUMP&&(source[i-2]>>16)!=0x1000))
8456 {
57871462 8457 wb_valid(regmap_pre[i],regs[i].regmap_entry,dirty_pre,regs[i].wasdirty,is32_pre,
00fa9369 8458 unneeded_reg[i]);
57871462 8459 }
f776eb14 8460 if((itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)&&!likely[i]) {
8461 is32_pre=branch_regs[i].is32;
8462 dirty_pre=branch_regs[i].dirty;
8463 }else{
8464 is32_pre=regs[i].is32;
8465 dirty_pre=regs[i].dirty;
8466 }
57871462 8467 #endif
8468 // write back
8469 if(i<2||(itype[i-2]!=UJUMP&&itype[i-2]!=RJUMP&&(source[i-2]>>16)!=0x1000))
8470 {
00fa9369 8471 wb_invalidate(regmap_pre[i],regs[i].regmap_entry,regs[i].wasdirty,regs[i].was32,unneeded_reg[i]);
57871462 8472 loop_preload(regmap_pre[i],regs[i].regmap_entry);
8473 }
8474 // branch target entry point
df4dc2b1 8475 instr_addr[i] = out;
57871462 8476 assem_debug("<->\n");
dd114d7d 8477 drc_dbg_emit_do_cmp(i);
8478
57871462 8479 // load regs
8480 if(regs[i].regmap_entry[HOST_CCREG]==CCREG&&regs[i].regmap[HOST_CCREG]!=CCREG)
8481 wb_register(CCREG,regs[i].regmap_entry,regs[i].wasdirty,regs[i].was32);
8482 load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,rs1[i],rs2[i]);
8483 address_generation(i,&regs[i],regs[i].regmap_entry);
8484 load_consts(regmap_pre[i],regs[i].regmap,regs[i].was32,i);
8485 if(itype[i]==RJUMP||itype[i]==UJUMP||itype[i]==CJUMP||itype[i]==SJUMP||itype[i]==FJUMP)
8486 {
8487 // Load the delay slot registers if necessary
4ef8f67d 8488 if(rs1[i+1]!=rs1[i]&&rs1[i+1]!=rs2[i]&&(rs1[i+1]!=rt1[i]||rt1[i]==0))
57871462 8489 load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,rs1[i+1],rs1[i+1]);
4ef8f67d 8490 if(rs2[i+1]!=rs1[i+1]&&rs2[i+1]!=rs1[i]&&rs2[i+1]!=rs2[i]&&(rs2[i+1]!=rt1[i]||rt1[i]==0))
57871462 8491 load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,rs2[i+1],rs2[i+1]);
b9b61529 8492 if(itype[i+1]==STORE||itype[i+1]==STORELR||(opcode[i+1]&0x3b)==0x39||(opcode[i+1]&0x3b)==0x3a)
57871462 8493 load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,INVCP,INVCP);
8494 }
8495 else if(i+1<slen)
8496 {
8497 // Preload registers for following instruction
8498 if(rs1[i+1]!=rs1[i]&&rs1[i+1]!=rs2[i])
8499 if(rs1[i+1]!=rt1[i]&&rs1[i+1]!=rt2[i])
8500 load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,rs1[i+1],rs1[i+1]);
8501 if(rs2[i+1]!=rs1[i+1]&&rs2[i+1]!=rs1[i]&&rs2[i+1]!=rs2[i])
8502 if(rs2[i+1]!=rt1[i]&&rs2[i+1]!=rt2[i])
8503 load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,rs2[i+1],rs2[i+1]);
8504 }
8505 // TODO: if(is_ooo(i)) address_generation(i+1);
8506 if(itype[i]==CJUMP||itype[i]==FJUMP)
8507 load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,CCREG,CCREG);
b9b61529 8508 if(itype[i]==STORE||itype[i]==STORELR||(opcode[i]&0x3b)==0x39||(opcode[i]&0x3b)==0x3a)
57871462 8509 load_regs(regs[i].regmap_entry,regs[i].regmap,regs[i].was32,INVCP,INVCP);
57871462 8510 // assemble
8511 switch(itype[i]) {
8512 case ALU:
8513 alu_assemble(i,&regs[i]);break;
8514 case IMM16:
8515 imm16_assemble(i,&regs[i]);break;
8516 case SHIFT:
8517 shift_assemble(i,&regs[i]);break;
8518 case SHIFTIMM:
8519 shiftimm_assemble(i,&regs[i]);break;
8520 case LOAD:
8521 load_assemble(i,&regs[i]);break;
8522 case LOADLR:
8523 loadlr_assemble(i,&regs[i]);break;
8524 case STORE:
8525 store_assemble(i,&regs[i]);break;
8526 case STORELR:
8527 storelr_assemble(i,&regs[i]);break;
8528 case COP0:
8529 cop0_assemble(i,&regs[i]);break;
8530 case COP1:
8531 cop1_assemble(i,&regs[i]);break;
8532 case C1LS:
8533 c1ls_assemble(i,&regs[i]);break;
b9b61529 8534 case COP2:
8535 cop2_assemble(i,&regs[i]);break;
8536 case C2LS:
8537 c2ls_assemble(i,&regs[i]);break;
8538 case C2OP:
8539 c2op_assemble(i,&regs[i]);break;
57871462 8540 case MULTDIV:
8541 multdiv_assemble(i,&regs[i]);break;
8542 case MOV:
8543 mov_assemble(i,&regs[i]);break;
8544 case SYSCALL:
8545 syscall_assemble(i,&regs[i]);break;
7139f3c8 8546 case HLECALL:
8547 hlecall_assemble(i,&regs[i]);break;
1e973cb0 8548 case INTCALL:
8549 intcall_assemble(i,&regs[i]);break;
57871462 8550 case UJUMP:
8551 ujump_assemble(i,&regs[i]);ds=1;break;
8552 case RJUMP:
8553 rjump_assemble(i,&regs[i]);ds=1;break;
8554 case CJUMP:
8555 cjump_assemble(i,&regs[i]);ds=1;break;
8556 case SJUMP:
8557 sjump_assemble(i,&regs[i]);ds=1;break;
8558 case FJUMP:
00fa9369 8559 assert(0);ds=1;break;
57871462 8560 case SPAN:
8561 pagespan_assemble(i,&regs[i]);break;
8562 }
8563 if(itype[i]==UJUMP||itype[i]==RJUMP||(source[i]>>16)==0x1000)
8564 literal_pool(1024);
8565 else
8566 literal_pool_jumpover(256);
8567 }
8568 }
8569 //assert(itype[i-2]==UJUMP||itype[i-2]==RJUMP||(source[i-2]>>16)==0x1000);
8570 // If the block did not end with an unconditional branch,
8571 // add a jump to the next instruction.
8572 if(i>1) {
8573 if(itype[i-2]!=UJUMP&&itype[i-2]!=RJUMP&&(source[i-2]>>16)!=0x1000&&itype[i-1]!=SPAN) {
8574 assert(itype[i-1]!=UJUMP&&itype[i-1]!=CJUMP&&itype[i-1]!=SJUMP&&itype[i-1]!=RJUMP&&itype[i-1]!=FJUMP);
8575 assert(i==slen);
8576 if(itype[i-2]!=CJUMP&&itype[i-2]!=SJUMP&&itype[i-2]!=FJUMP) {
8577 store_regs_bt(regs[i-1].regmap,regs[i-1].is32,regs[i-1].dirty,start+i*4);
8578 if(regs[i-1].regmap[HOST_CCREG]!=CCREG)
8579 emit_loadreg(CCREG,HOST_CCREG);
2573466a 8580 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i-1]+1),HOST_CCREG);
57871462 8581 }
8582 else if(!likely[i-2])
8583 {
8584 store_regs_bt(branch_regs[i-2].regmap,branch_regs[i-2].is32,branch_regs[i-2].dirty,start+i*4);
8585 assert(branch_regs[i-2].regmap[HOST_CCREG]==CCREG);
8586 }
8587 else
8588 {
8589 store_regs_bt(regs[i-2].regmap,regs[i-2].is32,regs[i-2].dirty,start+i*4);
8590 assert(regs[i-2].regmap[HOST_CCREG]==CCREG);
8591 }
643aeae3 8592 add_to_linker(out,start+i*4,0);
57871462 8593 emit_jmp(0);
8594 }
8595 }
8596 else
8597 {
8598 assert(i>0);
8599 assert(itype[i-1]!=UJUMP&&itype[i-1]!=CJUMP&&itype[i-1]!=SJUMP&&itype[i-1]!=RJUMP&&itype[i-1]!=FJUMP);
8600 store_regs_bt(regs[i-1].regmap,regs[i-1].is32,regs[i-1].dirty,start+i*4);
8601 if(regs[i-1].regmap[HOST_CCREG]!=CCREG)
8602 emit_loadreg(CCREG,HOST_CCREG);
2573466a 8603 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i-1]+1),HOST_CCREG);
643aeae3 8604 add_to_linker(out,start+i*4,0);
57871462 8605 emit_jmp(0);
8606 }
8607
8608 // TODO: delay slot stubs?
8609 // Stubs
8610 for(i=0;i<stubcount;i++)
8611 {
b14b6a8f 8612 switch(stubs[i].type)
57871462 8613 {
8614 case LOADB_STUB:
8615 case LOADH_STUB:
8616 case LOADW_STUB:
8617 case LOADD_STUB:
8618 case LOADBU_STUB:
8619 case LOADHU_STUB:
8620 do_readstub(i);break;
8621 case STOREB_STUB:
8622 case STOREH_STUB:
8623 case STOREW_STUB:
8624 case STORED_STUB:
8625 do_writestub(i);break;
8626 case CC_STUB:
8627 do_ccstub(i);break;
8628 case INVCODE_STUB:
8629 do_invstub(i);break;
8630 case FP_STUB:
8631 do_cop1stub(i);break;
8632 case STORELR_STUB:
8633 do_unalignedwritestub(i);break;
8634 }
8635 }
8636
9ad4d757 8637 if (instr_addr0_override)
8638 instr_addr[0] = instr_addr0_override;
8639
57871462 8640 /* Pass 9 - Linker */
8641 for(i=0;i<linkcount;i++)
8642 {
643aeae3 8643 assem_debug("%p -> %8x\n",link_addr[i].addr,link_addr[i].target);
57871462 8644 literal_pool(64);
643aeae3 8645 if (!link_addr[i].ext)
57871462 8646 {
643aeae3 8647 void *stub = out;
8648 void *addr = check_addr(link_addr[i].target);
8649 emit_extjump(link_addr[i].addr, link_addr[i].target);
8650 if (addr) {
8651 set_jump_target(link_addr[i].addr, addr);
8652 add_link(link_addr[i].target,stub);
57871462 8653 }
643aeae3 8654 else
8655 set_jump_target(link_addr[i].addr, stub);
57871462 8656 }
8657 else
8658 {
8659 // Internal branch
643aeae3 8660 int target=(link_addr[i].target-start)>>2;
57871462 8661 assert(target>=0&&target<slen);
8662 assert(instr_addr[target]);
8663 //#ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
643aeae3 8664 //set_jump_target_fillslot(link_addr[i].addr,instr_addr[target],link_addr[i].ext>>1);
57871462 8665 //#else
643aeae3 8666 set_jump_target(link_addr[i].addr, instr_addr[target]);
57871462 8667 //#endif
8668 }
8669 }
8670 // External Branch Targets (jump_in)
8671 if(copy+slen*4>(void *)shadow+sizeof(shadow)) copy=shadow;
8672 for(i=0;i<slen;i++)
8673 {
8674 if(bt[i]||i==0)
8675 {
8676 if(instr_addr[i]) // TODO - delay slots (=null)
8677 {
8678 u_int vaddr=start+i*4;
94d23bb9 8679 u_int page=get_page(vaddr);
8680 u_int vpage=get_vpage(vaddr);
57871462 8681 literal_pool(256);
57871462 8682 {
df4dc2b1 8683 assem_debug("%p (%d) <- %8x\n",instr_addr[i],i,start+i*4);
57871462 8684 assem_debug("jump_in: %x\n",start+i*4);
df4dc2b1 8685 ll_add(jump_dirty+vpage,vaddr,out);
8686 void *entry_point = do_dirty_stub(i);
8687 ll_add_flags(jump_in+page,vaddr,state_rflags,entry_point);
57871462 8688 // If there was an existing entry in the hash table,
8689 // replace it with the new address.
8690 // Don't add new entries. We'll insert the
8691 // ones that actually get used in check_addr().
df4dc2b1 8692 struct ht_entry *ht_bin = hash_table_get(vaddr);
8693 if (ht_bin->vaddr[0] == vaddr)
8694 ht_bin->tcaddr[0] = entry_point;
8695 if (ht_bin->vaddr[1] == vaddr)
8696 ht_bin->tcaddr[1] = entry_point;
57871462 8697 }
57871462 8698 }
8699 }
8700 }
8701 // Write out the literal pool if necessary
8702 literal_pool(0);
8703 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
8704 // Align code
8705 if(((u_int)out)&7) emit_addnop(13);
8706 #endif
01d26796 8707 assert(out - (u_char *)beginning < MAX_OUTPUT_BLOCK_SIZE);
643aeae3 8708 //printf("shadow buffer: %p-%p\n",copy,(u_char *)copy+slen*4);
57871462 8709 memcpy(copy,source,slen*4);
8710 copy+=slen*4;
9f51b4b9 8711
d148d265 8712 end_block(beginning);
9f51b4b9 8713
57871462 8714 // If we're within 256K of the end of the buffer,
8715 // start over from the beginning. (Is 256K enough?)
643aeae3 8716 if (out > translation_cache+(1<<TARGET_SIZE_2)-MAX_OUTPUT_BLOCK_SIZE)
8717 out = translation_cache;
9f51b4b9 8718
57871462 8719 // Trap writes to any of the pages we compiled
8720 for(i=start>>12;i<=(start+slen*4)>>12;i++) {
8721 invalid_code[i]=0;
57871462 8722 }
9be4ba64 8723 inv_code_start=inv_code_end=~0;
71e490c5 8724
b96d3df7 8725 // for PCSX we need to mark all mirrors too
b12c9fb8 8726 if(get_page(start)<(RAM_SIZE>>12))
8727 for(i=start>>12;i<=(start+slen*4)>>12;i++)
b96d3df7 8728 invalid_code[((u_int)0x00000000>>12)|(i&0x1ff)]=
8729 invalid_code[((u_int)0x80000000>>12)|(i&0x1ff)]=
8730 invalid_code[((u_int)0xa0000000>>12)|(i&0x1ff)]=0;
9f51b4b9 8731
57871462 8732 /* Pass 10 - Free memory by expiring oldest blocks */
9f51b4b9 8733
643aeae3 8734 int end=(((out-translation_cache)>>(TARGET_SIZE_2-16))+16384)&65535;
57871462 8735 while(expirep!=end)
8736 {
8737 int shift=TARGET_SIZE_2-3; // Divide into 8 blocks
643aeae3 8738 uintptr_t base=(uintptr_t)translation_cache+((expirep>>13)<<shift); // Base address of this block
57871462 8739 inv_debug("EXP: Phase %d\n",expirep);
8740 switch((expirep>>11)&3)
8741 {
8742 case 0:
8743 // Clear jump_in and jump_dirty
8744 ll_remove_matching_addrs(jump_in+(expirep&2047),base,shift);
8745 ll_remove_matching_addrs(jump_dirty+(expirep&2047),base,shift);
8746 ll_remove_matching_addrs(jump_in+2048+(expirep&2047),base,shift);
8747 ll_remove_matching_addrs(jump_dirty+2048+(expirep&2047),base,shift);
8748 break;
8749 case 1:
8750 // Clear pointers
8751 ll_kill_pointers(jump_out[expirep&2047],base,shift);
8752 ll_kill_pointers(jump_out[(expirep&2047)+2048],base,shift);
8753 break;
8754 case 2:
8755 // Clear hash table
8756 for(i=0;i<32;i++) {
df4dc2b1 8757 struct ht_entry *ht_bin = &hash_table[((expirep&2047)<<5)+i];
8758 if (((uintptr_t)ht_bin->tcaddr[1]>>shift) == (base>>shift) ||
8759 (((uintptr_t)ht_bin->tcaddr[1]-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(base>>shift)) {
8760 inv_debug("EXP: Remove hash %x -> %p\n",ht_bin->vaddr[1],ht_bin->tcaddr[1]);
8761 ht_bin->vaddr[1] = -1;
8762 ht_bin->tcaddr[1] = NULL;
8763 }
8764 if (((uintptr_t)ht_bin->tcaddr[0]>>shift) == (base>>shift) ||
8765 (((uintptr_t)ht_bin->tcaddr[0]-MAX_OUTPUT_BLOCK_SIZE)>>shift)==(base>>shift)) {
8766 inv_debug("EXP: Remove hash %x -> %p\n",ht_bin->vaddr[0],ht_bin->tcaddr[0]);
8767 ht_bin->vaddr[0] = ht_bin->vaddr[1];
8768 ht_bin->tcaddr[0] = ht_bin->tcaddr[1];
8769 ht_bin->vaddr[1] = -1;
8770 ht_bin->tcaddr[1] = NULL;
57871462 8771 }
8772 }
8773 break;
8774 case 3:
8775 // Clear jump_out
dd3a91a1 8776 #ifdef __arm__
9f51b4b9 8777 if((expirep&2047)==0)
dd3a91a1 8778 do_clear_cache();
8779 #endif
57871462 8780 ll_remove_matching_addrs(jump_out+(expirep&2047),base,shift);
8781 ll_remove_matching_addrs(jump_out+2048+(expirep&2047),base,shift);
8782 break;
8783 }
8784 expirep=(expirep+1)&65535;
8785 }
8786 return 0;
8787}
b9b61529 8788
8789// vim:shiftwidth=2:expandtab