X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=libpcsxcore%2Fnew_dynarec%2Fnew_dynarec.c;h=067decb7d7e5dd86bffbe19f9350d2afa0876208;hb=a5cd72d0e598f037fd9d9f23948af5b2fb06e2eb;hp=cabf4871dccc300fb6975f6a30724b0cc558fb0c;hpb=3039c914c0ac33ee89127db3bd4c53eb3c25cafd;p=pcsx_rearmed.git diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index cabf4871..067decb7 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -39,9 +39,10 @@ static Jit g_jit; #include "../psxinterpreter.h" #include "../gte.h" #include "emu_if.h" // emulator interface +#include "linkage_offsets.h" +#include "compiler_features.h" #include "arm_features.h" -#define noinline __attribute__((noinline,noclone)) #ifndef ARRAY_SIZE #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) #endif @@ -54,6 +55,8 @@ static Jit g_jit; //#define DISASM //#define ASSEM_PRINT +//#define REGMAP_PRINT // with DISASM only +//#define INV_DEBUG_W //#define STAT_PRINT #ifdef ASSEM_PRINT @@ -95,14 +98,16 @@ static Jit g_jit; #define TC_REDUCE_BYTES 0 #endif +struct ndrc_tramp +{ + struct tramp_insns ops[2048 / sizeof(struct tramp_insns)]; + const void *f[2048 / sizeof(void *)]; +}; + struct ndrc_mem { u_char translation_cache[(1 << TARGET_SIZE_2) - TC_REDUCE_BYTES]; - struct - { - struct tramp_insns ops[2048 / sizeof(struct tramp_insns)]; - const void *f[2048 / sizeof(void *)]; - } tramp; + struct ndrc_tramp tramp; }; #ifdef BASE_ADDR_DYNAMIC @@ -111,7 +116,7 @@ static struct ndrc_mem *ndrc; static struct ndrc_mem ndrc_ __attribute__((aligned(4096))); static struct ndrc_mem *ndrc = &ndrc_; #endif -#ifdef NDRC_WRITE_OFFSET +#ifdef TC_WRITE_OFFSET # ifdef __GLIBC__ # include # include @@ -119,26 +124,28 @@ static struct ndrc_mem *ndrc = &ndrc_; # include # endif static long ndrc_write_ofs; +#define NDRC_WRITE_OFFSET(x) (void *)((char *)(x) + ndrc_write_ofs) #else -#define ndrc_write_ofs 0 +#define NDRC_WRITE_OFFSET(x) (x) #endif // stubs enum stub_type { CC_STUB = 1, - FP_STUB = 2, + //FP_STUB = 2, LOADB_STUB = 3, LOADH_STUB = 4, LOADW_STUB = 5, - LOADD_STUB = 6, + //LOADD_STUB = 6, LOADBU_STUB = 7, LOADHU_STUB = 8, STOREB_STUB = 9, STOREH_STUB = 10, STOREW_STUB = 11, - STORED_STUB = 12, + //STORED_STUB = 12, STORELR_STUB = 13, INVCODE_STUB = 14, + OVERFLOW_STUB = 15, }; // regmap_pre[i] - regs before [i] insn starts; dirty things here that @@ -154,9 +161,9 @@ struct regstat uint64_t dirty; uint64_t u; u_int wasconst; // before; for example 'lw r2, (r2)' wasconst is true - u_int isconst; // ... but isconst is false when r2 is known + u_int isconst; // ... but isconst is false when r2 is known (hr) u_int loadedconst; // host regs that have constants loaded - u_int waswritten; // MIPS regs that were used as store base before + //u_int waswritten; // MIPS regs that were used as store base before }; struct ht_entry @@ -216,8 +223,8 @@ struct jump_info static struct decoded_insn { u_char itype; - u_char opcode; - u_char opcode2; + u_char opcode; // bits 31-26 + u_char opcode2; // (depends on opcode) u_char rs1; u_char rs2; u_char rt1; @@ -230,9 +237,13 @@ static struct decoded_insn u_char is_ujump:1; u_char is_load:1; u_char is_store:1; + u_char is_delay_load:1; // is_load + MFC/CFC + u_char is_exception:1; // unconditional, also interp. fallback + u_char may_except:1; // might generate an exception } dops[MAXBLOCK]; static u_char *out; + static char invalid_code[0x100000]; static struct ht_entry hash_table[65536]; static struct block_info *blocks[PAGE_COUNT]; static struct jump_info *jumps[PAGE_COUNT]; @@ -340,7 +351,7 @@ static struct decoded_insn #define STORE 2 // Store #define LOADLR 3 // Unaligned load #define STORELR 4 // Unaligned store -#define MOV 5 // Move +#define MOV 5 // Move (hi/lo only) #define ALU 6 // Arithmetic/logic #define MULTDIV 7 // Multiply/divide #define SHIFT 8 // Shift by register @@ -351,16 +362,9 @@ static struct decoded_insn #define CJUMP 13 // Conditional branch (BEQ/BNE/BGTZ/BLEZ) #define SJUMP 14 // Conditional branch (regimm format) #define COP0 15 // Coprocessor 0 -#define COP1 16 // Coprocessor 1 -#define C1LS 17 // Coprocessor 1 load/store -//#define FJUMP 18 // Conditional branch (floating point) -//#define FLOAT 19 // Floating point unit -//#define FCONV 20 // Convert integer to float -//#define FCOMP 21 // Floating point compare (sets FSREG) +#define RFE 16 #define SYSCALL 22// SYSCALL,BREAK -#define OTHER 23 // Other -//#define SPAN 24 // Branch/delay slot spans 2 pages -#define NI 25 // Not implemented +#define OTHER 23 // Other/unknown - do nothing #define HLECALL 26// PCSX fake opcodes for HLE #define COP2 27 // Coprocessor 2 move #define C2LS 28 // Coprocessor 2 load/store @@ -378,23 +382,25 @@ static struct decoded_insn // asm linkage void dyna_linker(); void cc_interrupt(); -void fp_exception(); -void fp_exception_ds(); void jump_syscall (u_int u0, u_int u1, u_int pc); void jump_syscall_ds(u_int u0, u_int u1, u_int pc); void jump_break (u_int u0, u_int u1, u_int pc); void jump_break_ds(u_int u0, u_int u1, u_int pc); +void jump_overflow (u_int u0, u_int u1, u_int pc); +void jump_overflow_ds(u_int u0, u_int u1, u_int pc); void jump_to_new_pc(); void call_gteStall(); void new_dyna_leave(); void *ndrc_get_addr_ht_param(u_int vaddr, int can_compile); void *ndrc_get_addr_ht(u_int vaddr); -void ndrc_invalidate_addr(u_int addr); void ndrc_add_jump_out(u_int vaddr, void *src); +void ndrc_write_invalidate_one(u_int addr); +static void ndrc_write_invalidate_many(u_int addr, u_int end); static int new_recompile_block(u_int addr); static void invalidate_block(struct block_info *block); +static void exception_assemble(int i, const struct regstat *i_regs, int ccadj_); // Needed by assembler static void wb_register(signed char r, const signed char regmap[], uint64_t dirty); @@ -441,13 +447,16 @@ static void mprotect_w_x(void *start, void *end, int is_x) sceKernelOpenVMDomain(); #elif defined(HAVE_LIBNX) Result rc; - if (is_x) - rc = jitTransitionToExecutable(&g_jit); - else - rc = jitTransitionToWritable(&g_jit); - if (R_FAILED(rc)) - SysPrintf("jitTransition %d %08x\n", is_x, rc); - #elif defined(NDRC_WRITE_OFFSET) + // check to avoid the full flush in jitTransitionToExecutable() + if (g_jit.type != JitType_CodeMemory) { + if (is_x) + rc = jitTransitionToExecutable(&g_jit); + else + rc = jitTransitionToWritable(&g_jit); + if (R_FAILED(rc)) + ;//SysPrintf("jitTransition %d %08x\n", is_x, rc); + } + #elif defined(TC_WRITE_OFFSET) // separated rx and rw areas are always available #else u_long mstart = (u_long)start & ~4095ul; @@ -459,10 +468,9 @@ static void mprotect_w_x(void *start, void *end, int is_x) #endif } -static void *start_tcache_write(void *start, void *end) +static void start_tcache_write(void *start, void *end) { mprotect_w_x(start, end, 0); - return (char *)start + ndrc_write_ofs; } static void end_tcache_write(void *start, void *end) @@ -478,7 +486,12 @@ static void end_tcache_write(void *start, void *end) #elif defined(_3DS) ctr_flush_invalidate_cache(); #elif defined(HAVE_LIBNX) - // handled in mprotect_w_x() + if (g_jit.type == JitType_CodeMemory) { + armDCacheClean(start, len); + armICacheInvalidate((char *)start - ndrc_write_ofs, len); + // as of v4.2.1 libnx lacks isb + __asm__ volatile("isb" ::: "memory"); + } #elif defined(__aarch64__) // as of 2021, __clear_cache() is still broken on arm64 // so here is a custom one :( @@ -497,15 +510,37 @@ static void *start_block(void) u_char *end = out + MAX_OUTPUT_BLOCK_SIZE; if (end > ndrc->translation_cache + sizeof(ndrc->translation_cache)) end = ndrc->translation_cache + sizeof(ndrc->translation_cache); - start_tcache_write(out, end); + start_tcache_write(NDRC_WRITE_OFFSET(out), NDRC_WRITE_OFFSET(end)); return out; } static void end_block(void *start) { - end_tcache_write(start, out); + end_tcache_write(NDRC_WRITE_OFFSET(start), NDRC_WRITE_OFFSET(out)); +} + +#ifdef NDRC_CACHE_FLUSH_ALL + +static int needs_clear_cache; + +static void mark_clear_cache(void *target) +{ + if (!needs_clear_cache) { + start_tcache_write(NDRC_WRITE_OFFSET(ndrc), NDRC_WRITE_OFFSET(ndrc + 1)); + needs_clear_cache = 1; + } +} + +static void do_clear_cache(void) +{ + if (needs_clear_cache) { + end_tcache_write(NDRC_WRITE_OFFSET(ndrc), NDRC_WRITE_OFFSET(ndrc + 1)); + needs_clear_cache = 0; + } } +#else + // also takes care of w^x mappings when patching code static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)]; @@ -514,7 +549,7 @@ static void mark_clear_cache(void *target) uintptr_t offset = (u_char *)target - ndrc->translation_cache; u_int mask = 1u << ((offset >> 12) & 31); if (!(needs_clear_cache[offset >> 17] & mask)) { - char *start = (char *)((uintptr_t)target & ~4095l); + char *start = (char *)NDRC_WRITE_OFFSET((uintptr_t)target & ~4095l); start_tcache_write(start, start + 4095); needs_clear_cache[offset >> 17] |= mask; } @@ -543,18 +578,16 @@ static void do_clear_cache(void) break; end += 4096; } - end_tcache_write(start, end); + end_tcache_write(NDRC_WRITE_OFFSET(start), NDRC_WRITE_OFFSET(end)); } needs_clear_cache[i] = 0; } } -//#define DEBUG_CYCLE_COUNT 1 +#endif // NDRC_CACHE_FLUSH_ALL #define NO_CYCLE_PENALTY_THR 12 -int cycle_multiplier = CYCLE_MULT_DEFAULT; // 100 for 1.0 -int cycle_multiplier_override; int cycle_multiplier_old; static int cycle_multiplier_active; @@ -651,6 +684,28 @@ static int doesnt_expire_soon(u_char *tcaddr) return diff > EXPIRITY_OFFSET + MAX_OUTPUT_BLOCK_SIZE; } +static unused void check_for_block_changes(u_int start, u_int end) +{ + u_int start_page = get_page_prev(start); + u_int end_page = get_page(end - 1); + u_int page; + + for (page = start_page; page <= end_page; page++) { + struct block_info *block; + for (block = blocks[page]; block != NULL; block = block->next) { + if (block->is_dirty) + continue; + if (memcmp(block->source, block->copy, block->len)) { + printf("bad block %08x-%08x %016llx %016llx @%08x\n", + block->start, block->start + block->len, + *(long long *)block->source, *(long long *)block->copy, psxRegs.pc); + fflush(stdout); + abort(); + } + } + } +} + static void *try_restore_block(u_int vaddr, u_int start_page, u_int end_page) { void *found_clean = NULL; @@ -724,16 +779,17 @@ static void noinline *get_addr(u_int vaddr, int can_compile) return ndrc_get_addr_ht(vaddr); // generate an address error - Status|=2; - Cause=(vaddr<<31)|(4<<2); - EPC=(vaddr&1)?vaddr-5:vaddr; - BadVAddr=(vaddr&~1); + psxRegs.CP0.n.Cause &= 0x300; + psxRegs.CP0.n.Cause |= R3000E_AdEL << 2; + psxRegs.CP0.n.EPC = vaddr; + psxRegs.pc = 0x80000080; return ndrc_get_addr_ht(0x80000080); } // Look up address in hash table first void *ndrc_get_addr_ht_param(u_int vaddr, int can_compile) { + //check_for_block_changes(vaddr, vaddr + MAXBLOCK); const struct ht_entry *ht_bin = hash_table_get(vaddr); stat_inc(stat_ht_lookups); if (ht_bin->vaddr[0] == vaddr) return ht_bin->tcaddr[0]; @@ -773,6 +829,12 @@ static signed char get_reg(const signed char regmap[], signed char r) #endif +// get reg suitable for writing +static signed char get_reg_w(const signed char regmap[], signed char r) +{ + return r == 0 ? -1 : get_reg(regmap, r); +} + // get reg as mask bit (1 << hr) static u_int get_regm(const signed char regmap[], signed char r) { @@ -1008,7 +1070,7 @@ static int needed_again(int r, int i) j++; break; } - if(dops[i+j].itype==SYSCALL||dops[i+j].itype==HLECALL||dops[i+j].itype==INTCALL||((source[i+j]&0xfc00003f)==0x0d)) + if (dops[i+j].is_exception) { break; } @@ -1129,12 +1191,15 @@ static const struct { FUNCNAME(jump_handler_write8), FUNCNAME(jump_handler_write16), FUNCNAME(jump_handler_write32), - FUNCNAME(ndrc_invalidate_addr), + FUNCNAME(ndrc_write_invalidate_one), + FUNCNAME(ndrc_write_invalidate_many), FUNCNAME(jump_to_new_pc), FUNCNAME(jump_break), FUNCNAME(jump_break_ds), FUNCNAME(jump_syscall), FUNCNAME(jump_syscall_ds), + FUNCNAME(jump_overflow), + FUNCNAME(jump_overflow_ds), FUNCNAME(call_gteStall), FUNCNAME(new_dyna_leave), FUNCNAME(pcsx_mtc0), @@ -1152,8 +1217,43 @@ static const char *func_name(const void *a) return function_names[i].name; return ""; } + +static const char *fpofs_name(u_int ofs) +{ + u_int *p = (u_int *)&dynarec_local + ofs/sizeof(u_int); + static char buf[64]; + switch (ofs) { + #define ofscase(x) case LO_##x: return " ; " #x + ofscase(next_interupt); + ofscase(last_count); + ofscase(pending_exception); + ofscase(stop); + ofscase(address); + ofscase(lo); + ofscase(hi); + ofscase(PC); + ofscase(cycle); + ofscase(mem_rtab); + ofscase(mem_wtab); + ofscase(psxH_ptr); + ofscase(invc_ptr); + ofscase(ram_offset); + #undef ofscase + } + buf[0] = 0; + if (psxRegs.GPR.r <= p && p < &psxRegs.GPR.r[32]) + snprintf(buf, sizeof(buf), " ; r%d", (int)(p - psxRegs.GPR.r)); + else if (psxRegs.CP0.r <= p && p < &psxRegs.CP0.r[32]) + snprintf(buf, sizeof(buf), " ; cp0 $%d", (int)(p - psxRegs.CP0.r)); + else if (psxRegs.CP2D.r <= p && p < &psxRegs.CP2D.r[32]) + snprintf(buf, sizeof(buf), " ; cp2d $%d", (int)(p - psxRegs.CP2D.r)); + else if (psxRegs.CP2C.r <= p && p < &psxRegs.CP2C.r[32]) + snprintf(buf, sizeof(buf), " ; cp2c $%d", (int)(p - psxRegs.CP2C.r)); + return buf; +} #else #define func_name(x) "" +#define fpofs_name(x) "" #endif #ifdef __i386__ @@ -1171,20 +1271,25 @@ static const char *func_name(const void *a) static void *get_trampoline(const void *f) { + struct ndrc_tramp *tramp = NDRC_WRITE_OFFSET(&ndrc->tramp); size_t i; - for (i = 0; i < ARRAY_SIZE(ndrc->tramp.f); i++) { - if (ndrc->tramp.f[i] == f || ndrc->tramp.f[i] == NULL) + for (i = 0; i < ARRAY_SIZE(tramp->f); i++) { + if (tramp->f[i] == f || tramp->f[i] == NULL) break; } - if (i == ARRAY_SIZE(ndrc->tramp.f)) { + if (i == ARRAY_SIZE(tramp->f)) { SysPrintf("trampoline table is full, last func %p\n", f); abort(); } - if (ndrc->tramp.f[i] == NULL) { - const void **d = start_tcache_write(&ndrc->tramp.f[i], &ndrc->tramp.f[i + 1]); - *d = f; - end_tcache_write(&ndrc->tramp.f[i], &ndrc->tramp.f[i + 1]); + if (tramp->f[i] == NULL) { + start_tcache_write(&tramp->f[i], &tramp->f[i + 1]); + tramp->f[i] = f; + end_tcache_write(&tramp->f[i], &tramp->f[i + 1]); +#ifdef HAVE_LIBNX + // invalidate the RX mirror (unsure if necessary, but just in case...) + armDCacheFlush(&ndrc->tramp.f[i], sizeof(ndrc->tramp.f[i])); +#endif } return &ndrc->tramp.ops[i]; } @@ -1291,7 +1396,7 @@ static int blocks_remove_matching_addrs(struct block_info **head, int hit = 0; while (*head) { if ((((*head)->tc_offs ^ base_offs) >> shift) == 0) { - inv_debug("EXP: rm block %08x (tc_offs %zx)\n", (*head)->start, (*head)->tc_offs); + inv_debug("EXP: rm block %08x (tc_offs %x)\n", (*head)->start, (*head)->tc_offs); invalidate_block(*head); next = (*head)->next; free(*head); @@ -1352,7 +1457,7 @@ static void unlink_jumps_tc_range(struct jump_info *ji, u_int base_offs, int shi continue; } - inv_debug("EXP: rm link to %08x (tc_offs %zx)\n", ji->e[i].target_vaddr, tc_offs); + inv_debug("EXP: rm link to %08x (tc_offs %x)\n", ji->e[i].target_vaddr, tc_offs); stat_dec(stat_links); ji->count--; if (i < ji->count) { @@ -1387,7 +1492,7 @@ static int invalidate_range(u_int start, u_int end, int hit = 0; // additional area without code (to supplement invalid_code[]), [start, end) - // avoids excessive ndrc_invalidate_addr() calls + // avoids excessive ndrc_write_invalidate*() calls inv_start = start_m & ~0xfff; inv_end = end_m | 0xfff; @@ -1446,16 +1551,28 @@ void new_dynarec_invalidate_range(unsigned int start, unsigned int end) invalidate_range(start, end, NULL, NULL); } -void ndrc_invalidate_addr(u_int addr) +static void ndrc_write_invalidate_many(u_int start, u_int end) { // this check is done by the caller //if (inv_code_start<=addr&&addr<=inv_code_end) { rhits++; return; } - int ret = invalidate_range(addr, addr + 4, &inv_code_start, &inv_code_end); + int ret = invalidate_range(start, end, &inv_code_start, &inv_code_end); +#ifdef INV_DEBUG_W + int invc = invalid_code[start >> 12]; + u_int len = end - start; if (ret) - inv_debug("INV ADDR: %08x hit %d blocks\n", addr, ret); + printf("INV ADDR: %08x/%02x hit %d blocks\n", start, len, ret); else - inv_debug("INV ADDR: %08x miss, inv %08x-%08x\n", addr, inv_code_start, inv_code_end); + printf("INV ADDR: %08x/%02x miss, inv %08x-%08x invc %d->%d\n", start, len, + inv_code_start, inv_code_end, invc, invalid_code[start >> 12]); + check_for_block_changes(start, end); +#endif stat_inc(stat_inv_addr_calls); + (void)ret; +} + +void ndrc_write_invalidate_one(u_int addr) +{ + ndrc_write_invalidate_many(addr, addr + 4); } // This is called when loading a save state. @@ -1480,26 +1597,6 @@ void new_dynarec_invalidate_all_pages(void) do_clear_cache(); } -static void do_invstub(int n) -{ - literal_pool(20); - u_int reglist = stubs[n].a; - set_jump_target(stubs[n].addr, out); - save_regs(reglist); - if (stubs[n].b != 0) - emit_mov(stubs[n].b, 0); - emit_readword(&inv_code_start, 1); - emit_readword(&inv_code_end, 2); - emit_cmp(0, 1); - emit_cmpcs(2, 0); - void *jaddr = out; - emit_jc(0); - emit_far_call(ndrc_invalidate_addr); - set_jump_target(jaddr, out); - restore_regs(reglist); - emit_jmp(stubs[n].retaddr); // return address -} - // Add an entry to jump_out after making a link // src should point to code by emit_extjump() void ndrc_add_jump_out(u_int vaddr, void *src) @@ -1874,6 +1971,10 @@ static void alu_alloc(struct regstat *current,int i) } alloc_reg(current,i,dops[i].rt1); } + if (!(dops[i].opcode2 & 1)) { + alloc_cc(current,i); // for exceptions + dirty_reg(current,CCREG); + } } if(dops[i].opcode2==0x2a||dops[i].opcode2==0x2b) { // SLT/SLTU if(dops[i].rt1) { @@ -1896,9 +1997,6 @@ static void alu_alloc(struct regstat *current,int i) alloc_reg(current,i,dops[i].rt1); } } - if(dops[i].opcode2>=0x2c&&dops[i].opcode2<=0x2f) { // DADD/DADDU/DSUB/DSUBU - assert(0); - } clear_const(current,dops[i].rs1); clear_const(current,dops[i].rs2); clear_const(current,dops[i].rt1); @@ -1910,10 +2008,7 @@ static void imm16_alloc(struct regstat *current,int i) if(dops[i].rs1&&needed_again(dops[i].rs1,i)) alloc_reg(current,i,dops[i].rs1); else dops[i].use_lt1=!!dops[i].rs1; if(dops[i].rt1) alloc_reg(current,i,dops[i].rt1); - if(dops[i].opcode==0x18||dops[i].opcode==0x19) { // DADDI/DADDIU - assert(0); - } - else if(dops[i].opcode==0x0a||dops[i].opcode==0x0b) { // SLTI/SLTIU + if(dops[i].opcode==0x0a||dops[i].opcode==0x0b) { // SLTI/SLTIU clear_const(current,dops[i].rs1); clear_const(current,dops[i].rt1); } @@ -1932,6 +2027,14 @@ static void imm16_alloc(struct regstat *current,int i) set_const(current,dops[i].rt1,v+imm[i]); } else clear_const(current,dops[i].rt1); + if (dops[i].opcode == 0x08) { + alloc_cc(current,i); // for exceptions + dirty_reg(current,CCREG); + if (dops[i].rt1 == 0) { + alloc_reg_temp(current,i,-1); + minimum_free_regs[i]=1; + } + } } else { set_const(current,dops[i].rt1,imm[i]<<16); // LUI @@ -1950,15 +2053,7 @@ static void load_alloc(struct regstat *current,int i) alloc_reg(current, i, ROREG); if(dops[i].rt1&&!((current->u>>dops[i].rt1)&1)) { alloc_reg(current,i,dops[i].rt1); - assert(get_reg(current->regmap,dops[i].rt1)>=0); - if(dops[i].opcode==0x27||dops[i].opcode==0x37) // LWU/LD - { - assert(0); - } - else if(dops[i].opcode==0x1A||dops[i].opcode==0x1B) // LDL/LDR - { - assert(0); - } + assert(get_reg_w(current->regmap, dops[i].rt1)>=0); dirty_reg(current,dops[i].rt1); // LWL/LWR need a temporary register for the old value if(dops[i].opcode==0x22||dops[i].opcode==0x26) @@ -1978,10 +2073,6 @@ static void load_alloc(struct regstat *current,int i) } alloc_reg_temp(current,i,-1); minimum_free_regs[i]=1; - if(dops[i].opcode==0x1A||dops[i].opcode==0x1B) // LDL/LDR - { - assert(0); - } } } @@ -2008,12 +2099,6 @@ static void store_alloc(struct regstat *current,int i) minimum_free_regs[i]=1; } -static void c1ls_alloc(struct regstat *current,int i) -{ - clear_const(current,dops[i].rt1); - alloc_reg(current,i,CSREG); // Status -} - static void c2ls_alloc(struct regstat *current,int i) { clear_const(current,dops[i].rt1); @@ -2082,7 +2167,6 @@ static void cop0_alloc(struct regstat *current,int i) { if(dops[i].rt1) { clear_const(current,dops[i].rt1); - alloc_all(current,i); alloc_reg(current,i,dops[i].rt1); dirty_reg(current,dops[i].rt1); } @@ -2099,14 +2183,14 @@ static void cop0_alloc(struct regstat *current,int i) current->u&=~1LL; alloc_reg(current,i,0); } + minimum_free_regs[i] = HOST_REGS; } - else - { - // RFE - assert(dops[i].opcode2==0x10); - alloc_all(current,i); - } - minimum_free_regs[i]=HOST_REGS; +} + +static void rfe_alloc(struct regstat *current, int i) +{ + alloc_all(current, i); + minimum_free_regs[i] = HOST_REGS; } static void cop2_alloc(struct regstat *current,int i) @@ -2190,14 +2274,12 @@ static void delayslot_alloc(struct regstat *current,int i) case COP0: cop0_alloc(current,i); break; - case COP1: + case RFE: + rfe_alloc(current,i); break; case COP2: cop2_alloc(current,i); break; - case C1LS: - c1ls_alloc(current,i); - break; case C2LS: c2ls_alloc(current,i); break; @@ -2274,47 +2356,77 @@ static void pass_args(int a0, int a1) } } -static void alu_assemble(int i, const struct regstat *i_regs) +static void alu_assemble(int i, const struct regstat *i_regs, int ccadj_) { if(dops[i].opcode2>=0x20&&dops[i].opcode2<=0x23) { // ADD/ADDU/SUB/SUBU - if(dops[i].rt1) { - signed char s1,s2,t; - t=get_reg(i_regs->regmap,dops[i].rt1); - if(t>=0) { - s1=get_reg(i_regs->regmap,dops[i].rs1); - s2=get_reg(i_regs->regmap,dops[i].rs2); - if(dops[i].rs1&&dops[i].rs2) { + int do_oflow = dops[i].may_except; // ADD/SUB with exceptions enabled + if (dops[i].rt1 || do_oflow) { + int do_exception_check = 0; + signed char s1, s2, t, tmp; + t = get_reg_w(i_regs->regmap, dops[i].rt1); + tmp = get_reg_temp(i_regs->regmap); + if (t < 0 && do_oflow) + t = tmp; + if (t >= 0) { + s1 = get_reg(i_regs->regmap, dops[i].rs1); + s2 = get_reg(i_regs->regmap, dops[i].rs2); + if (dops[i].rs1 && dops[i].rs2) { assert(s1>=0); assert(s2>=0); - if(dops[i].opcode2&2) emit_sub(s1,s2,t); - else emit_add(s1,s2,t); + if (dops[i].opcode2 & 2) { + if (do_oflow) { + emit_subs(s1, s2, tmp); + do_exception_check = 1; + } + else + emit_sub(s1,s2,t); + } + else { + if (do_oflow) { + emit_adds(s1, s2, tmp); + do_exception_check = 1; + } + else + emit_add(s1,s2,t); + } } else if(dops[i].rs1) { if(s1>=0) emit_mov(s1,t); else emit_loadreg(dops[i].rs1,t); } else if(dops[i].rs2) { - if(s2>=0) { - if(dops[i].opcode2&2) emit_neg(s2,t); - else emit_mov(s2,t); + if (s2 < 0) { + emit_loadreg(dops[i].rs2, t); + s2 = t; } - else { - emit_loadreg(dops[i].rs2,t); - if(dops[i].opcode2&2) emit_neg(t,t); + if (dops[i].opcode2 & 2) { + if (do_oflow) { + emit_negs(s2, tmp); + do_exception_check = 1; + } + else + emit_neg(s2, t); } + else if (s2 != t) + emit_mov(s2, t); } - else emit_zeroreg(t); + else + emit_zeroreg(t); + } + if (do_exception_check) { + void *jaddr = out; + emit_jo(0); + if (t >= 0 && tmp != t) + emit_mov(tmp, t); + add_stub_r(OVERFLOW_STUB, jaddr, out, i, 0, i_regs, ccadj_, 0); } } } - if(dops[i].opcode2>=0x2c&&dops[i].opcode2<=0x2f) { // DADD/DADDU/DSUB/DSUBU - assert(0); - } - if(dops[i].opcode2==0x2a||dops[i].opcode2==0x2b) { // SLT/SLTU + else if(dops[i].opcode2==0x2a||dops[i].opcode2==0x2b) { // SLT/SLTU if(dops[i].rt1) { signed char s1l,s2l,t; { - t=get_reg(i_regs->regmap,dops[i].rt1); + t=get_reg_w(i_regs->regmap, dops[i].rt1); //assert(t>=0); if(t>=0) { s1l=get_reg(i_regs->regmap,dops[i].rs1); @@ -2347,10 +2459,10 @@ static void alu_assemble(int i, const struct regstat *i_regs) } } } - if(dops[i].opcode2>=0x24&&dops[i].opcode2<=0x27) { // AND/OR/XOR/NOR + else if(dops[i].opcode2>=0x24&&dops[i].opcode2<=0x27) { // AND/OR/XOR/NOR if(dops[i].rt1) { signed char s1l,s2l,tl; - tl=get_reg(i_regs->regmap,dops[i].rt1); + tl=get_reg_w(i_regs->regmap, dops[i].rt1); { if(tl>=0) { s1l=get_reg(i_regs->regmap,dops[i].rs1); @@ -2414,12 +2526,12 @@ static void alu_assemble(int i, const struct regstat *i_regs) } } -static void imm16_assemble(int i, const struct regstat *i_regs) +static void imm16_assemble(int i, const struct regstat *i_regs, int ccadj_) { if (dops[i].opcode==0x0f) { // LUI if(dops[i].rt1) { signed char t; - t=get_reg(i_regs->regmap,dops[i].rt1); + t=get_reg_w(i_regs->regmap, dops[i].rt1); //assert(t>=0); if(t>=0) { if(!((i_regs->isconst>>t)&1)) @@ -2428,23 +2540,55 @@ static void imm16_assemble(int i, const struct regstat *i_regs) } } if(dops[i].opcode==0x08||dops[i].opcode==0x09) { // ADDI/ADDIU - if(dops[i].rt1) { - signed char s,t; - t=get_reg(i_regs->regmap,dops[i].rt1); + int is_addi = (dops[i].opcode == 0x08); + if (dops[i].rt1 || is_addi) { + signed char s, t, tmp; + t=get_reg_w(i_regs->regmap, dops[i].rt1); s=get_reg(i_regs->regmap,dops[i].rs1); if(dops[i].rs1) { - //assert(t>=0); - //assert(s>=0); + tmp = get_reg_temp(i_regs->regmap); + if (is_addi) { + assert(tmp >= 0); + if (t < 0) t = tmp; + } if(t>=0) { if(!((i_regs->isconst>>t)&1)) { - if(s<0) { + int sum, do_exception_check = 0; + if (s < 0) { if(i_regs->regmap_entry[t]!=dops[i].rs1) emit_loadreg(dops[i].rs1,t); - emit_addimm(t,imm[i],t); - }else{ - if(!((i_regs->wasconst>>s)&1)) - emit_addimm(s,imm[i],t); + if (is_addi) { + emit_addimm_and_set_flags3(t, imm[i], tmp); + do_exception_check = 1; + } else - emit_movimm(constmap[i][s]+imm[i],t); + emit_addimm(t, imm[i], t); + } else { + if (!((i_regs->wasconst >> s) & 1)) { + if (is_addi) { + emit_addimm_and_set_flags3(s, imm[i], tmp); + do_exception_check = 1; + } + else + emit_addimm(s, imm[i], t); + } + else { + int oflow = add_overflow(constmap[i][s], imm[i], sum); + if (is_addi && oflow) + do_exception_check = 2; + else + emit_movimm(sum, t); + } + } + if (do_exception_check) { + void *jaddr = out; + if (do_exception_check == 2) + emit_jmp(0); + else { + emit_jo(0); + if (tmp != t) + emit_mov(tmp, t); + } + add_stub_r(OVERFLOW_STUB, jaddr, out, i, 0, i_regs, ccadj_, 0); } } } @@ -2456,26 +2600,11 @@ static void imm16_assemble(int i, const struct regstat *i_regs) } } } - if(dops[i].opcode==0x18||dops[i].opcode==0x19) { // DADDI/DADDIU - if(dops[i].rt1) { - signed char sl,tl; - tl=get_reg(i_regs->regmap,dops[i].rt1); - sl=get_reg(i_regs->regmap,dops[i].rs1); - if(tl>=0) { - if(dops[i].rs1) { - assert(sl>=0); - emit_addimm(sl,imm[i],tl); - } else { - emit_movimm(imm[i],tl); - } - } - } - } else if(dops[i].opcode==0x0a||dops[i].opcode==0x0b) { // SLTI/SLTIU if(dops[i].rt1) { //assert(dops[i].rs1!=0); // r0 might be valid, but it's probably a bug signed char sl,t; - t=get_reg(i_regs->regmap,dops[i].rt1); + t=get_reg_w(i_regs->regmap, dops[i].rt1); sl=get_reg(i_regs->regmap,dops[i].rs1); //assert(t>=0); if(t>=0) { @@ -2514,7 +2643,7 @@ static void imm16_assemble(int i, const struct regstat *i_regs) else if(dops[i].opcode>=0x0c&&dops[i].opcode<=0x0e) { // ANDI/ORI/XORI if(dops[i].rt1) { signed char sl,tl; - tl=get_reg(i_regs->regmap,dops[i].rt1); + tl=get_reg_w(i_regs->regmap, dops[i].rt1); sl=get_reg(i_regs->regmap,dops[i].rs1); if(tl>=0 && !((i_regs->isconst>>tl)&1)) { if(dops[i].opcode==0x0c) //ANDI @@ -2575,7 +2704,7 @@ static void shiftimm_assemble(int i, const struct regstat *i_regs) { if(dops[i].rt1) { signed char s,t; - t=get_reg(i_regs->regmap,dops[i].rt1); + t=get_reg_w(i_regs->regmap, dops[i].rt1); s=get_reg(i_regs->regmap,dops[i].rs1); //assert(t>=0); if(t>=0&&!((i_regs->isconst>>t)&1)){ @@ -2881,7 +3010,7 @@ static void load_assemble(int i, const struct regstat *i_regs, int ccadj_) int offset_reg = -1; int fastio_reg_override = -1; u_int reglist=get_host_reglist(i_regs->regmap); - tl=get_reg(i_regs->regmap,dops[i].rt1); + tl=get_reg_w(i_regs->regmap, dops[i].rt1); s=get_reg(i_regs->regmap,dops[i].rs1); offset=imm[i]; if(i_regs->regmap[HOST_CCREG]==CCREG) reglist&=~(1<>16)==0x1f80)) - ||dops[i].rt1==0) { + if(tl<0 && ((!c||(((u_int)constmap[i][s]+offset)>>16)==0x1f80) || dops[i].rt1==0)) { // could be FIFO, must perform the read // ||dummy read assem_debug("(forced read)\n"); @@ -2923,7 +3050,7 @@ static void load_assemble(int i, const struct regstat *i_regs, int ccadj_) else if (ram_offset && memtarget) { offset_reg = get_ro_reg(i_regs, 0); } - int dummy=(dops[i].rt1==0)||(tl!=get_reg(i_regs->regmap,dops[i].rt1)); // ignore loads to r0 and unneeded reg + int dummy=(dops[i].rt1==0)||(tl!=get_reg_w(i_regs->regmap, dops[i].rt1)); // ignore loads to r0 and unneeded reg switch (dops[i].opcode) { case 0x20: // LB if(!c||memtarget) { @@ -3018,7 +3145,7 @@ static void load_assemble(int i, const struct regstat *i_regs, int ccadj_) default: assert(0); } - } + } // tl >= 0 if (fastio_reg_override == HOST_TEMPREG || offset_reg == HOST_TEMPREG) host_tempreg_release(); } @@ -3033,7 +3160,7 @@ static void loadlr_assemble(int i, const struct regstat *i_regs, int ccadj_) int offset_reg = -1; int fastio_reg_override = -1; u_int reglist=get_host_reglist(i_regs->regmap); - tl=get_reg(i_regs->regmap,dops[i].rt1); + tl=get_reg_w(i_regs->regmap, dops[i].rt1); s=get_reg(i_regs->regmap,dops[i].rs1); temp=get_reg_temp(i_regs->regmap); temp2=get_reg(i_regs->regmap,FTEMP); @@ -3106,6 +3233,89 @@ static void loadlr_assemble(int i, const struct regstat *i_regs, int ccadj_) } #endif +static void do_invstub(int n) +{ + literal_pool(20); + assem_debug("do_invstub\n"); + u_int reglist = stubs[n].a; + u_int addrr = stubs[n].b; + int ofs_start = stubs[n].c; + int ofs_end = stubs[n].d; + int len = ofs_end - ofs_start; + u_int rightr = 0; + + set_jump_target(stubs[n].addr, out); + save_regs(reglist); + if (addrr != 0 || ofs_start != 0) + emit_addimm(addrr, ofs_start, 0); + emit_readword(&inv_code_start, 2); + emit_readword(&inv_code_end, 3); + if (len != 0) + emit_addimm(0, len + 4, (rightr = 1)); + emit_cmp(0, 2); + emit_cmpcs(3, rightr); + void *jaddr = out; + emit_jc(0); + void *func = (len != 0) + ? (void *)ndrc_write_invalidate_many + : (void *)ndrc_write_invalidate_one; + emit_far_call(func); + set_jump_target(jaddr, out); + restore_regs(reglist); + emit_jmp(stubs[n].retaddr); +} + +static void do_store_smc_check(int i, const struct regstat *i_regs, u_int reglist, int addr) +{ + if (HACK_ENABLED(NDHACK_NO_SMC_CHECK)) + return; + // this can't be used any more since we started to check exact + // block boundaries in invalidate_range() + //if (i_regs->waswritten & (1<= 0; j--) { + if (!dops[j].is_store || dops[j].rs1 != dops[i].rs1 + || abs(imm[j] - imm[j+1]) > imm_maxdiff) + break; + count++; + if (imm_min > imm[j]) + imm_min = imm[j]; + if (imm_max < imm[j]) + imm_max = imm[j]; + } +#if defined(HOST_IMM8) + int ir = get_reg(i_regs->regmap, INVCP); + assert(ir >= 0); + host_tempreg_acquire(); + emit_ldrb_indexedsr12_reg(ir, addr, HOST_TEMPREG); +#else + emit_cmpmem_indexedsr12_imm(invalid_code, addr, 1); + #error not handled +#endif +#ifdef INVALIDATE_USE_COND_CALL + if (count == 1) { + emit_cmpimm(HOST_TEMPREG, 1); + emit_callne(invalidate_addr_reg[addr]); + host_tempreg_release(); + return; + } +#endif + void *jaddr = emit_cbz(HOST_TEMPREG, 0); + host_tempreg_release(); + imm_min -= imm[i]; + imm_max -= imm[i]; + add_stub(INVCODE_STUB, jaddr, out, reglist|(1<waswritten&(1<regmap,INVCP); - assert(ir>=0); - emit_cmpmem_indexedsr12_reg(ir,addr,1); - #else - emit_cmpmem_indexedsr12_imm(invalid_code,addr,1); - #endif - #ifdef INVALIDATE_USE_COND_CALL - emit_callne(invalidate_addr_reg[addr]); - #else - void *jaddr2 = out; - emit_jne(0); - add_stub(INVCODE_STUB,jaddr2,out,reglist|(1<waswritten&(1<regmap,INVCP); - assert(ir>=0); - emit_cmpmem_indexedsr12_reg(ir,temp,1); - #else - emit_cmpmem_indexedsr12_imm(invalid_code,temp,1); - #endif - #ifdef INVALIDATE_USE_COND_CALL - emit_callne(invalidate_addr_reg[temp]); - #else - void *jaddr2 = out; - emit_jne(0); - add_stub(INVCODE_STUB,jaddr2,out,reglist|(1<regmap,dops[i].rt1); + signed char t=get_reg_w(i_regs->regmap, dops[i].rt1); u_int copr=(source[i]>>11)&0x1f; - //assert(t>=0); // Why does this happen? OOT is weird if(t>=0&&dops[i].rt1!=0) { emit_readword(®_cop0[copr],t); } @@ -3387,7 +3568,7 @@ static void cop0_assemble(int i, const struct regstat *i_regs, int ccadj_) emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG); emit_addimm(HOST_CCREG,ccadj_,HOST_CCREG); - emit_writeword(HOST_CCREG,&Count); + emit_writeword(HOST_CCREG,&psxRegs.cycle); } // What a mess. The status register (12) can enable interrupts, // so needs a special case to handle a pending interrupt. @@ -3419,7 +3600,7 @@ static void cop0_assemble(int i, const struct regstat *i_regs, int ccadj_) emit_movimm(copr,0); emit_far_call(pcsx_mtc0); if(copr==9||copr==11||copr==12||copr==13) { - emit_readword(&Count,HOST_CCREG); + emit_readword(&psxRegs.cycle,HOST_CCREG); emit_readword(&next_interupt,HOST_TEMPREG); emit_addimm(HOST_CCREG,-ccadj_,HOST_CCREG); emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG); @@ -3428,8 +3609,8 @@ static void cop0_assemble(int i, const struct regstat *i_regs, int ccadj_) } if(copr==12||copr==13) { assert(!is_delayslot); - emit_readword(&pending_exception,14); - emit_test(14,14); + emit_readword(&pending_exception,HOST_TEMPREG); + emit_test(HOST_TEMPREG,HOST_TEMPREG); void *jaddr = out; emit_jeq(0); emit_readword(&pcaddr, 0); @@ -3440,61 +3621,15 @@ static void cop0_assemble(int i, const struct regstat *i_regs, int ccadj_) } emit_loadreg(dops[i].rs1,s); } - else - { - assert(dops[i].opcode2==0x10); - //if((source[i]&0x3f)==0x10) // RFE - { - emit_readword(&Status,0); - emit_andimm(0,0x3c,1); - emit_andimm(0,~0xf,0); - emit_orrshr_imm(1,2,0); - emit_writeword(0,&Status); - } - } -} - -static void cop1_unusable(int i, const struct regstat *i_regs) -{ - // XXX: should just just do the exception instead - //if(!cop1_usable) - { - void *jaddr=out; - emit_jmp(0); - add_stub_r(FP_STUB,jaddr,out,i,0,i_regs,is_delayslot,0); - } -} - -static void cop1_assemble(int i, const struct regstat *i_regs) -{ - cop1_unusable(i, i_regs); } -static void c1ls_assemble(int i, const struct regstat *i_regs) +static void rfe_assemble(int i, const struct regstat *i_regs, int ccadj_) { - cop1_unusable(i, i_regs); -} - -// FP_STUB -static void do_cop1stub(int n) -{ - literal_pool(256); - assem_debug("do_cop1stub %x\n",start+stubs[n].a*4); - set_jump_target(stubs[n].addr, out); - int i=stubs[n].a; -// int rs=stubs[n].b; - struct regstat *i_regs=(struct regstat *)stubs[n].c; - int ds=stubs[n].d; - if(!ds) { - load_all_consts(regs[i].regmap_entry,regs[i].wasdirty,i); - //if(i_regs!=®s[i]) printf("oops: regs[i]=%x i_regs=%x",(int)®s[i],(int)i_regs); - } - //else {printf("fp exception in delay slot\n");} - wb_dirtys(i_regs->regmap_entry,i_regs->wasdirty); - if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG); - emit_movimm(start+(i-ds)*4,EAX); // Get PC - emit_addimm(HOST_CCREG,ccadj[i],HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle... - emit_far_jump(ds?fp_exception_ds:fp_exception); + emit_readword(&psxRegs.CP0.n.SR, 0); + emit_andimm(0, 0x3c, 1); + emit_andimm(0, ~0xf, 0); + emit_orrshr_imm(1, 2, 0); + emit_writeword(0, &psxRegs.CP0.n.SR); } static int cop2_is_stalling_op(int i, int *cycles) @@ -3882,22 +4017,7 @@ static void c2ls_assemble(int i, const struct regstat *i_regs, int ccadj_) if(jaddr2) add_stub_r(type,jaddr2,out,i,ar,i_regs,ccadj_,reglist); if(dops[i].opcode==0x3a) // SWC2 - if(!(i_regs->waswritten&(1<regmap,INVCP); - assert(ir>=0); - emit_cmpmem_indexedsr12_reg(ir,ar,1); -#else - emit_cmpmem_indexedsr12_imm(invalid_code,ar,1); -#endif - #ifdef INVALIDATE_USE_COND_CALL - emit_callne(invalidate_addr_reg[ar]); - #else - void *jaddr3 = out; - emit_jne(0); - add_stub(INVCODE_STUB,jaddr3,out,reglist|(1<regmap,dops[i].rt1); + signed char tl=get_reg_w(i_regs->regmap, dops[i].rt1); if(tl>=0&&dops[i].rt1!=0) cop2_get_dreg(copr,tl,temp); } @@ -3929,7 +4049,7 @@ static void cop2_assemble(int i, const struct regstat *i_regs) } else if (dops[i].opcode2==2) // CFC2 { - signed char tl=get_reg(i_regs->regmap,dops[i].rt1); + signed char tl=get_reg_w(i_regs->regmap, dops[i].rt1); if(tl>=0&&dops[i].rt1!=0) emit_readword(®_cop2c[copr],tl); } @@ -3993,6 +4113,18 @@ static void do_unalignedwritestub(int n) emit_jmp(stubs[n].retaddr); // return address } +static void do_overflowstub(int n) +{ + assem_debug("do_overflowstub %x\n", start + (u_int)stubs[n].a * 4); + literal_pool(24); + int i = stubs[n].a; + struct regstat *i_regs = (struct regstat *)stubs[n].c; + int ccadj = stubs[n].d; + set_jump_target(stubs[n].addr, out); + wb_dirtys(regs[i].regmap, regs[i].dirty); + exception_assemble(i, i_regs, ccadj); +} + #ifndef multdiv_assemble void multdiv_assemble(int i,struct regstat *i_regs) { @@ -4007,7 +4139,7 @@ static void mov_assemble(int i, const struct regstat *i_regs) //if(dops[i].opcode2==0x11||dops[i].opcode2==0x13) { // MTHI/MTLO if(dops[i].rt1) { signed char sl,tl; - tl=get_reg(i_regs->regmap,dops[i].rt1); + tl=get_reg_w(i_regs->regmap, dops[i].rt1); //assert(tl>=0); if(tl>=0) { sl=get_reg(i_regs->regmap,dops[i].rs1); @@ -4033,26 +4165,36 @@ static void call_c_cpu_handler(int i, const struct regstat *i_regs, int ccadj_, emit_addimm(HOST_CCREG,ccadj_,HOST_CCREG); emit_add(2,HOST_CCREG,2); emit_writeword(2,&psxRegs.cycle); + emit_addimm_ptr(FP,(u_char *)&psxRegs - (u_char *)&dynarec_local,0); emit_far_call(func); emit_far_jump(jump_to_new_pc); } -static void syscall_assemble(int i, const struct regstat *i_regs, int ccadj_) +static void exception_assemble(int i, const struct regstat *i_regs, int ccadj_) { // 'break' tends to be littered around to catch things like // division by 0 and is almost never executed, so don't emit much code here - void *func = (dops[i].opcode2 == 0x0C) - ? (is_delayslot ? jump_syscall_ds : jump_syscall) - : (is_delayslot ? jump_break_ds : jump_break); + void *func; + if (dops[i].itype == ALU || dops[i].itype == IMM16) + func = is_delayslot ? jump_overflow_ds : jump_overflow; + else if (dops[i].opcode2 == 0x0C) + func = is_delayslot ? jump_syscall_ds : jump_syscall; + else + func = is_delayslot ? jump_break_ds : jump_break; assert(get_reg(i_regs->regmap, CCREG) == HOST_CCREG); emit_movimm(start + i*4, 2); // pc emit_addimm(HOST_CCREG, ccadj_ + CLOCK_ADJUST(1), HOST_CCREG); emit_far_jump(func); } +static void hlecall_bad() +{ + assert(0); +} + static void hlecall_assemble(int i, const struct regstat *i_regs, int ccadj_) { - void *hlefunc = psxNULL; + void *hlefunc = hlecall_bad; uint32_t hleCode = source[i] & 0x03ffffff; if (hleCode < ARRAY_SIZE(psxHLEt)) hlefunc = psxHLEt[hleCode]; @@ -4109,7 +4251,7 @@ static void speculate_register_values(int i) // fallthrough case IMM16: if(dops[i].rt1&&is_const(®s[i],dops[i].rt1)) { - int value,hr=get_reg(regs[i].regmap,dops[i].rt1); + int value,hr=get_reg_w(regs[i].regmap, dops[i].rt1); if(hr>=0) { if(get_final_value(hr,i,&value)) smrv[dops[i].rt1]=value; @@ -4167,10 +4309,10 @@ static int assemble(int i, const struct regstat *i_regs, int ccadj_) int ds = 0; switch (dops[i].itype) { case ALU: - alu_assemble(i, i_regs); + alu_assemble(i, i_regs, ccadj_); break; case IMM16: - imm16_assemble(i, i_regs); + imm16_assemble(i, i_regs, ccadj_); break; case SHIFT: shift_assemble(i, i_regs); @@ -4193,11 +4335,8 @@ static int assemble(int i, const struct regstat *i_regs, int ccadj_) case COP0: cop0_assemble(i, i_regs, ccadj_); break; - case COP1: - cop1_assemble(i, i_regs); - break; - case C1LS: - c1ls_assemble(i, i_regs); + case RFE: + rfe_assemble(i, i_regs, ccadj_); break; case COP2: cop2_assemble(i, i_regs); @@ -4216,7 +4355,7 @@ static int assemble(int i, const struct regstat *i_regs, int ccadj_) mov_assemble(i, i_regs); break; case SYSCALL: - syscall_assemble(i, i_regs, ccadj_); + exception_assemble(i, i_regs, ccadj_); break; case HLECALL: hlecall_assemble(i, i_regs, ccadj_); @@ -4242,7 +4381,6 @@ static int assemble(int i, const struct regstat *i_regs, int ccadj_) break; case NOP: case OTHER: - case NI: // not handled, just skip break; default: @@ -4355,7 +4493,7 @@ static void address_generation(int i, const struct regstat *i_regs, signed char int ra=-1; int agr=AGEN1+(i&1); if(dops[i].itype==LOAD) { - ra=get_reg(i_regs->regmap,dops[i].rt1); + ra=get_reg_w(i_regs->regmap, dops[i].rt1); if(ra<0) ra=get_reg_temp(i_regs->regmap); assert(ra>=0); } @@ -4973,8 +5111,8 @@ static void do_ccstub(int n) if(stubs[n].c!=-1) { // Save PC as return address - emit_movimm(stubs[n].c,EAX); - emit_writeword(EAX,&pcaddr); + emit_movimm(stubs[n].c,0); + emit_writeword(0,&pcaddr); } else { @@ -5259,7 +5397,7 @@ static void rjump_assemble_write_ra(int i) int rt,return_address; assert(dops[i+1].rt1!=dops[i].rt1); assert(dops[i+1].rt2!=dops[i].rt1); - rt=get_reg(branch_regs[i].regmap,dops[i].rt1); + rt=get_reg_w(branch_regs[i].regmap, dops[i].rt1); assem_debug("branch(%d): eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",i,branch_regs[i].regmap[0],branch_regs[i].regmap[1],branch_regs[i].regmap[2],branch_regs[i].regmap[3],branch_regs[i].regmap[5],branch_regs[i].regmap[6],branch_regs[i].regmap[7]); assert(rt>=0); return_address=start+i*4+8; @@ -5352,7 +5490,7 @@ static void rjump_assemble(int i, const struct regstat *i_regs) //assert(adj==0); emit_addimm_and_set_flags(ccadj[i] + CLOCK_ADJUST(2), HOST_CCREG); add_stub(CC_STUB,out,NULL,0,i,-1,TAKEN,rs); - if(dops[i+1].itype==COP0 && dops[i+1].opcode2==0x10) + if (dops[i+1].itype == RFE) // special case for RFE emit_jmp(0); else @@ -6026,13 +6164,6 @@ void disassemble_inst(int i) printf (" %x: %s r%d,cpr0[%d]\n",start+i*4,insn[i],dops[i].rs1,(source[i]>>11)&0x1f); // MTC0 else printf (" %x: %s\n",start+i*4,insn[i]); break; - case COP1: - if(dops[i].opcode2<3) - printf (" %x: %s r%d,cpr1[%d]\n",start+i*4,insn[i],dops[i].rt1,(source[i]>>11)&0x1f); // MFC1 - else if(dops[i].opcode2>3) - printf (" %x: %s r%d,cpr1[%d]\n",start+i*4,insn[i],dops[i].rs1,(source[i]>>11)&0x1f); // MTC1 - else printf (" %x: %s\n",start+i*4,insn[i]); - break; case COP2: if(dops[i].opcode2<3) printf (" %x: %s r%d,cpr2[%d]\n",start+i*4,insn[i],dops[i].rt1,(source[i]>>11)&0x1f); // MFC2 @@ -6040,9 +6171,6 @@ void disassemble_inst(int i) printf (" %x: %s r%d,cpr2[%d]\n",start+i*4,insn[i],dops[i].rs1,(source[i]>>11)&0x1f); // MTC2 else printf (" %x: %s\n",start+i*4,insn[i]); break; - case C1LS: - printf (" %x: %s cpr1[%d],r%d+%x\n",start+i*4,insn[i],(source[i]>>16)&0x1f,dops[i].rs1,imm[i]); - break; case C2LS: printf (" %x: %s cpr2[%d],r%d+%x\n",start+i*4,insn[i],(source[i]>>16)&0x1f,dops[i].rs1,imm[i]); break; @@ -6053,9 +6181,12 @@ void disassemble_inst(int i) //printf (" %s %8x\n",insn[i],source[i]); printf (" %x: %s\n",start+i*4,insn[i]); } + #ifndef REGMAP_PRINT return; - printf("D: %"PRIu64" WD: %"PRIu64" U: %"PRIu64"\n", - regs[i].dirty, regs[i].wasdirty, unneeded_reg[i]); + #endif + printf("D: %"PRIx64" WD: %"PRIx64" U: %"PRIx64" hC: %x hWC: %x hLC: %x\n", + regs[i].dirty, regs[i].wasdirty, unneeded_reg[i], + regs[i].isconst, regs[i].wasconst, regs[i].loadedconst); print_regmap("pre: ", regmap_pre[i]); print_regmap("entry: ", regs[i].regmap_entry); print_regmap("map: ", regs[i].regmap); @@ -6071,7 +6202,7 @@ static void disassemble_inst(int i) {} #define DRC_TEST_VAL 0x74657374 -static void new_dynarec_test(void) +static noinline void new_dynarec_test(void) { int (*testfunc)(void); void *beginning; @@ -6084,8 +6215,9 @@ static void new_dynarec_test(void) SysPrintf("linkage_arm* miscompilation/breakage detected.\n"); } - SysPrintf("testing if we can run recompiled code @%p...\n", out); - ((volatile u_int *)(out + ndrc_write_ofs))[0]++; // make the cache dirty + SysPrintf("(%p) testing if we can run recompiled code @%p...\n", + new_dynarec_test, out); + ((volatile u_int *)NDRC_WRITE_OFFSET(out))[0]++; // make the cache dirty for (i = 0; i < ARRAY_SIZE(ret); i++) { out = ndrc->translation_cache; @@ -6132,7 +6264,7 @@ void new_dynarec_clear_full(void) stat_clear(stat_blocks); stat_clear(stat_links); - cycle_multiplier_old = cycle_multiplier; + cycle_multiplier_old = Config.cycle_multiplier; new_dynarec_hacks_old = new_dynarec_hacks; } @@ -6161,8 +6293,10 @@ void new_dynarec_init(void) if (R_FAILED(rc)) SysPrintf("jitCreate failed: %08x\n", rc); SysPrintf("jitCreate: RX: %p RW: %p type: %d\n", g_jit.rx_addr, g_jit.rw_addr, g_jit.type); + jitTransitionToWritable(&g_jit); ndrc = g_jit.rx_addr; ndrc_write_ofs = (char *)g_jit.rw_addr - (char *)ndrc; + memset(NDRC_WRITE_OFFSET(&ndrc->tramp), 0, sizeof(ndrc->tramp)); #else uintptr_t desired_addr = 0; int prot = PROT_READ | PROT_WRITE | PROT_EXEC; @@ -6172,7 +6306,7 @@ void new_dynarec_init(void) extern char _end; desired_addr = ((uintptr_t)&_end + 0xffffff) & ~0xffffffl; #endif - #ifdef NDRC_WRITE_OFFSET + #ifdef TC_WRITE_OFFSET // mostly for testing fd = open("/dev/shm/pcsxr", O_CREAT | O_RDWR, 0600); ftruncate(fd, sizeof(*ndrc)); @@ -6186,7 +6320,7 @@ void new_dynarec_init(void) SysPrintf("mmap() failed: %s\n", strerror(errno)); abort(); } - #ifdef NDRC_WRITE_OFFSET + #ifdef TC_WRITE_OFFSET ndrc_write_ofs = (char *)mw - (char *)ndrc; #endif #endif @@ -6200,7 +6334,6 @@ void new_dynarec_init(void) #endif #endif out = ndrc->translation_cache; - cycle_multiplier=200; new_dynarec_clear_full(); #ifdef HOST_IMM8 // Copy this into local area so we don't have to put it in every literal pool @@ -6240,9 +6373,6 @@ void new_dynarec_cleanup(void) } stat_clear(stat_blocks); stat_clear(stat_links); - #ifdef ROM_COPY - if (munmap (ROM_COPY, 67108864) < 0) {SysPrintf("munmap() failed\n");} - #endif new_dynarec_print_stats(); } @@ -6260,7 +6390,7 @@ static u_int *get_source_start(u_int addr, u_int *limit) (0xbfc00000 <= addr && addr < 0xbfc80000))) { // BIOS. The multiplier should be much higher as it's uncached 8bit mem, - // but timings in PCSX are too tied to the interpreter's BIAS + // but timings in PCSX are too tied to the interpreter's 2-per-insn assumption if (!HACK_ENABLED(NDHACK_OVERRIDE_CYCLE_M)) cycle_multiplier_active = 200; @@ -6447,17 +6577,20 @@ static int apply_hacks(void) static noinline void pass1_disassemble(u_int pagelimit) { int i, j, done = 0, ni_count = 0; - unsigned int type,op,op2; + unsigned int type,op,op2,op3; for (i = 0; !done; i++) { + int force_prev_to_interpreter = 0; memset(&dops[i], 0, sizeof(dops[i])); - op2=0; - minimum_free_regs[i]=0; - dops[i].opcode=op=source[i]>>26; + op2 = 0; + minimum_free_regs[i] = 0; + dops[i].opcode = op = source[i] >> 26; + type = INTCALL; + set_mnemonic(i, "???"); switch(op) { - case 0x00: set_mnemonic(i, "special"); type=NI; + case 0x00: set_mnemonic(i, "special"); op2=source[i]&0x3f; switch(op2) { @@ -6490,51 +6623,20 @@ static noinline void pass1_disassemble(u_int pagelimit) case 0x27: set_mnemonic(i, "NOR"); type=ALU; break; case 0x2A: set_mnemonic(i, "SLT"); type=ALU; break; case 0x2B: set_mnemonic(i, "SLTU"); type=ALU; break; - case 0x30: set_mnemonic(i, "TGE"); type=NI; break; - case 0x31: set_mnemonic(i, "TGEU"); type=NI; break; - case 0x32: set_mnemonic(i, "TLT"); type=NI; break; - case 0x33: set_mnemonic(i, "TLTU"); type=NI; break; - case 0x34: set_mnemonic(i, "TEQ"); type=NI; break; - case 0x36: set_mnemonic(i, "TNE"); type=NI; break; -#if 0 - case 0x14: set_mnemonic(i, "DSLLV"); type=SHIFT; break; - case 0x16: set_mnemonic(i, "DSRLV"); type=SHIFT; break; - case 0x17: set_mnemonic(i, "DSRAV"); type=SHIFT; break; - case 0x1C: set_mnemonic(i, "DMULT"); type=MULTDIV; break; - case 0x1D: set_mnemonic(i, "DMULTU"); type=MULTDIV; break; - case 0x1E: set_mnemonic(i, "DDIV"); type=MULTDIV; break; - case 0x1F: set_mnemonic(i, "DDIVU"); type=MULTDIV; break; - case 0x2C: set_mnemonic(i, "DADD"); type=ALU; break; - case 0x2D: set_mnemonic(i, "DADDU"); type=ALU; break; - case 0x2E: set_mnemonic(i, "DSUB"); type=ALU; break; - case 0x2F: set_mnemonic(i, "DSUBU"); type=ALU; break; - case 0x38: set_mnemonic(i, "DSLL"); type=SHIFTIMM; break; - case 0x3A: set_mnemonic(i, "DSRL"); type=SHIFTIMM; break; - case 0x3B: set_mnemonic(i, "DSRA"); type=SHIFTIMM; break; - case 0x3C: set_mnemonic(i, "DSLL32"); type=SHIFTIMM; break; - case 0x3E: set_mnemonic(i, "DSRL32"); type=SHIFTIMM; break; - case 0x3F: set_mnemonic(i, "DSRA32"); type=SHIFTIMM; break; -#endif } break; - case 0x01: set_mnemonic(i, "regimm"); type=NI; - op2=(source[i]>>16)&0x1f; + case 0x01: set_mnemonic(i, "regimm"); + type = SJUMP; + op2 = (source[i] >> 16) & 0x1f; switch(op2) { - case 0x00: set_mnemonic(i, "BLTZ"); type=SJUMP; break; - case 0x01: set_mnemonic(i, "BGEZ"); type=SJUMP; break; - //case 0x02: set_mnemonic(i, "BLTZL"); type=SJUMP; break; - //case 0x03: set_mnemonic(i, "BGEZL"); type=SJUMP; break; - //case 0x08: set_mnemonic(i, "TGEI"); type=NI; break; - //case 0x09: set_mnemonic(i, "TGEIU"); type=NI; break; - //case 0x0A: set_mnemonic(i, "TLTI"); type=NI; break; - //case 0x0B: set_mnemonic(i, "TLTIU"); type=NI; break; - //case 0x0C: set_mnemonic(i, "TEQI"); type=NI; break; - //case 0x0E: set_mnemonic(i, "TNEI"); type=NI; break; - case 0x10: set_mnemonic(i, "BLTZAL"); type=SJUMP; break; - case 0x11: set_mnemonic(i, "BGEZAL"); type=SJUMP; break; - //case 0x12: set_mnemonic(i, "BLTZALL"); type=SJUMP; break; - //case 0x13: set_mnemonic(i, "BGEZALL"); type=SJUMP; break; + case 0x10: set_mnemonic(i, "BLTZAL"); break; + case 0x11: set_mnemonic(i, "BGEZAL"); break; + default: + if (op2 & 1) + set_mnemonic(i, "BGEZ"); + else + set_mnemonic(i, "BLTZ"); } break; case 0x02: set_mnemonic(i, "J"); type=UJUMP; break; @@ -6551,68 +6653,40 @@ static noinline void pass1_disassemble(u_int pagelimit) case 0x0D: set_mnemonic(i, "ORI"); type=IMM16; break; case 0x0E: set_mnemonic(i, "XORI"); type=IMM16; break; case 0x0F: set_mnemonic(i, "LUI"); type=IMM16; break; - case 0x10: set_mnemonic(i, "cop0"); type=NI; - op2=(source[i]>>21)&0x1f; + case 0x10: set_mnemonic(i, "COP0"); + op2 = (source[i]>>21) & 0x1f; + if (op2 & 0x10) { + op3 = source[i] & 0x1f; + switch (op3) + { + case 0x01: case 0x02: case 0x06: case 0x08: type = INTCALL; break; + case 0x10: set_mnemonic(i, "RFE"); type=RFE; break; + default: type = OTHER; break; + } + break; + } switch(op2) { - case 0x00: set_mnemonic(i, "MFC0"); type=COP0; break; - case 0x02: set_mnemonic(i, "CFC0"); type=COP0; break; + u32 rd; + case 0x00: + set_mnemonic(i, "MFC0"); + rd = (source[i] >> 11) & 0x1F; + if (!(0x00000417u & (1u << rd))) + type = COP0; + break; case 0x04: set_mnemonic(i, "MTC0"); type=COP0; break; - case 0x06: set_mnemonic(i, "CTC0"); type=COP0; break; - case 0x10: set_mnemonic(i, "RFE"); type=COP0; break; + case 0x02: + case 0x06: type = INTCALL; break; + default: type = OTHER; break; } break; - case 0x11: set_mnemonic(i, "cop1"); type=COP1; + case 0x11: set_mnemonic(i, "COP1"); op2=(source[i]>>21)&0x1f; break; -#if 0 - case 0x14: set_mnemonic(i, "BEQL"); type=CJUMP; break; - case 0x15: set_mnemonic(i, "BNEL"); type=CJUMP; break; - case 0x16: set_mnemonic(i, "BLEZL"); type=CJUMP; break; - case 0x17: set_mnemonic(i, "BGTZL"); type=CJUMP; break; - case 0x18: set_mnemonic(i, "DADDI"); type=IMM16; break; - case 0x19: set_mnemonic(i, "DADDIU"); type=IMM16; break; - case 0x1A: set_mnemonic(i, "LDL"); type=LOADLR; break; - case 0x1B: set_mnemonic(i, "LDR"); type=LOADLR; break; -#endif - case 0x20: set_mnemonic(i, "LB"); type=LOAD; break; - case 0x21: set_mnemonic(i, "LH"); type=LOAD; break; - case 0x22: set_mnemonic(i, "LWL"); type=LOADLR; break; - case 0x23: set_mnemonic(i, "LW"); type=LOAD; break; - case 0x24: set_mnemonic(i, "LBU"); type=LOAD; break; - case 0x25: set_mnemonic(i, "LHU"); type=LOAD; break; - case 0x26: set_mnemonic(i, "LWR"); type=LOADLR; break; -#if 0 - case 0x27: set_mnemonic(i, "LWU"); type=LOAD; break; -#endif - case 0x28: set_mnemonic(i, "SB"); type=STORE; break; - case 0x29: set_mnemonic(i, "SH"); type=STORE; break; - case 0x2A: set_mnemonic(i, "SWL"); type=STORELR; break; - case 0x2B: set_mnemonic(i, "SW"); type=STORE; break; -#if 0 - case 0x2C: set_mnemonic(i, "SDL"); type=STORELR; break; - case 0x2D: set_mnemonic(i, "SDR"); type=STORELR; break; -#endif - case 0x2E: set_mnemonic(i, "SWR"); type=STORELR; break; - case 0x2F: set_mnemonic(i, "CACHE"); type=NOP; break; - case 0x30: set_mnemonic(i, "LL"); type=NI; break; - case 0x31: set_mnemonic(i, "LWC1"); type=C1LS; break; -#if 0 - case 0x34: set_mnemonic(i, "LLD"); type=NI; break; - case 0x35: set_mnemonic(i, "LDC1"); type=C1LS; break; - case 0x37: set_mnemonic(i, "LD"); type=LOAD; break; -#endif - case 0x38: set_mnemonic(i, "SC"); type=NI; break; - case 0x39: set_mnemonic(i, "SWC1"); type=C1LS; break; -#if 0 - case 0x3C: set_mnemonic(i, "SCD"); type=NI; break; - case 0x3D: set_mnemonic(i, "SDC1"); type=C1LS; break; - case 0x3F: set_mnemonic(i, "SD"); type=STORE; break; -#endif - case 0x12: set_mnemonic(i, "COP2"); type=NI; + case 0x12: set_mnemonic(i, "COP2"); op2=(source[i]>>21)&0x1f; - //if (op2 & 0x10) - if (source[i]&0x3f) { // use this hack to support old savestates with patched gte insns + if (op2 & 0x10) { + type = OTHER; if (gte_handlers[source[i]&0x3f]!=NULL) { #ifdef DISASM if (gte_regnames[source[i]&0x3f]!=NULL) @@ -6620,7 +6694,7 @@ static noinline void pass1_disassemble(u_int pagelimit) else snprintf(insn[i], sizeof(insn[i]), "COP2 %x", source[i]&0x3f); #endif - type=C2OP; + type = C2OP; } } else switch(op2) @@ -6631,32 +6705,53 @@ static noinline void pass1_disassemble(u_int pagelimit) case 0x06: set_mnemonic(i, "CTC2"); type=COP2; break; } break; + case 0x13: set_mnemonic(i, "COP3"); + op2=(source[i]>>21)&0x1f; + break; + case 0x20: set_mnemonic(i, "LB"); type=LOAD; break; + case 0x21: set_mnemonic(i, "LH"); type=LOAD; break; + case 0x22: set_mnemonic(i, "LWL"); type=LOADLR; break; + case 0x23: set_mnemonic(i, "LW"); type=LOAD; break; + case 0x24: set_mnemonic(i, "LBU"); type=LOAD; break; + case 0x25: set_mnemonic(i, "LHU"); type=LOAD; break; + case 0x26: set_mnemonic(i, "LWR"); type=LOADLR; break; + case 0x28: set_mnemonic(i, "SB"); type=STORE; break; + case 0x29: set_mnemonic(i, "SH"); type=STORE; break; + case 0x2A: set_mnemonic(i, "SWL"); type=STORELR; break; + case 0x2B: set_mnemonic(i, "SW"); type=STORE; break; + case 0x2E: set_mnemonic(i, "SWR"); type=STORELR; break; case 0x32: set_mnemonic(i, "LWC2"); type=C2LS; break; case 0x3A: set_mnemonic(i, "SWC2"); type=C2LS; break; - case 0x3B: set_mnemonic(i, "HLECALL"); type=HLECALL; break; - default: set_mnemonic(i, "???"); type=NI; - SysPrintf("NI %08x @%08x (%08x)\n", source[i], start + i*4, start); + case 0x3B: + if (Config.HLE && (source[i] & 0x03ffffff) < ARRAY_SIZE(psxHLEt)) { + set_mnemonic(i, "HLECALL"); + type = HLECALL; + } + break; + default: break; } + if (type == INTCALL) + SysPrintf("NI %08x @%08x (%08x)\n", source[i], start + i*4, start); dops[i].itype=type; dops[i].opcode2=op2; /* Get registers/immediates */ dops[i].use_lt1=0; gte_rs[i]=gte_rt[i]=0; + dops[i].rs1 = 0; + dops[i].rs2 = 0; + dops[i].rt1 = 0; + dops[i].rt2 = 0; switch(type) { case LOAD: dops[i].rs1=(source[i]>>21)&0x1f; - dops[i].rs2=0; dops[i].rt1=(source[i]>>16)&0x1f; - dops[i].rt2=0; imm[i]=(short)source[i]; break; case STORE: case STORELR: dops[i].rs1=(source[i]>>21)&0x1f; dops[i].rs2=(source[i]>>16)&0x1f; - dops[i].rt1=0; - dops[i].rt2=0; imm[i]=(short)source[i]; break; case LOADLR: @@ -6665,7 +6760,6 @@ static noinline void pass1_disassemble(u_int pagelimit) dops[i].rs1=(source[i]>>21)&0x1f; dops[i].rs2=(source[i]>>16)&0x1f; dops[i].rt1=(source[i]>>16)&0x1f; - dops[i].rt2=0; imm[i]=(short)source[i]; break; case IMM16: @@ -6673,7 +6767,6 @@ static noinline void pass1_disassemble(u_int pagelimit) else dops[i].rs1=(source[i]>>21)&0x1f; dops[i].rs2=0; dops[i].rt1=(source[i]>>16)&0x1f; - dops[i].rt2=0; if(op>=0x0c&&op<=0x0e) { // ANDI/ORI/XORI imm[i]=(unsigned short)source[i]; }else{ @@ -6681,10 +6774,6 @@ static noinline void pass1_disassemble(u_int pagelimit) } break; case UJUMP: - dops[i].rs1=0; - dops[i].rs2=0; - dops[i].rt1=0; - dops[i].rt2=0; // The JAL instruction writes to r31. if (op&1) { dops[i].rt1=31; @@ -6693,9 +6782,6 @@ static noinline void pass1_disassemble(u_int pagelimit) break; case RJUMP: dops[i].rs1=(source[i]>>21)&0x1f; - dops[i].rs2=0; - dops[i].rt1=0; - dops[i].rt2=0; // The JALR instruction writes to rd. if (op2&1) { dops[i].rt1=(source[i]>>11)&0x1f; @@ -6705,8 +6791,6 @@ static noinline void pass1_disassemble(u_int pagelimit) case CJUMP: dops[i].rs1=(source[i]>>21)&0x1f; dops[i].rs2=(source[i]>>16)&0x1f; - dops[i].rt1=0; - dops[i].rt2=0; if(op&2) { // BGTZ/BLEZ dops[i].rs2=0; } @@ -6714,10 +6798,8 @@ static noinline void pass1_disassemble(u_int pagelimit) case SJUMP: dops[i].rs1=(source[i]>>21)&0x1f; dops[i].rs2=CCREG; - dops[i].rt1=0; - dops[i].rt2=0; - if(op2&0x10) { // BxxAL - dops[i].rt1=31; + if (op2 == 0x10 || op2 == 0x11) { // BxxAL + dops[i].rt1 = 31; // NOTE: If the branch is not taken, r31 is still overwritten } break; @@ -6725,7 +6807,6 @@ static noinline void pass1_disassemble(u_int pagelimit) dops[i].rs1=(source[i]>>21)&0x1f; // source dops[i].rs2=(source[i]>>16)&0x1f; // subtract amount dops[i].rt1=(source[i]>>11)&0x1f; // destination - dops[i].rt2=0; break; case MULTDIV: dops[i].rs1=(source[i]>>21)&0x1f; // source @@ -6734,10 +6815,6 @@ static noinline void pass1_disassemble(u_int pagelimit) dops[i].rt2=LOREG; break; case MOV: - dops[i].rs1=0; - dops[i].rs2=0; - dops[i].rt1=0; - dops[i].rt2=0; if(op2==0x10) dops[i].rs1=HIREG; // MFHI if(op2==0x11) dops[i].rt1=HIREG; // MTHI if(op2==0x12) dops[i].rs1=LOREG; // MFLO @@ -6749,41 +6826,19 @@ static noinline void pass1_disassemble(u_int pagelimit) dops[i].rs1=(source[i]>>16)&0x1f; // target of shift dops[i].rs2=(source[i]>>21)&0x1f; // shift amount dops[i].rt1=(source[i]>>11)&0x1f; // destination - dops[i].rt2=0; break; case SHIFTIMM: dops[i].rs1=(source[i]>>16)&0x1f; dops[i].rs2=0; dops[i].rt1=(source[i]>>11)&0x1f; - dops[i].rt2=0; imm[i]=(source[i]>>6)&0x1f; - // DSxx32 instructions - if(op2>=0x3c) imm[i]|=0x20; break; case COP0: - dops[i].rs1=0; - dops[i].rs2=0; - dops[i].rt1=0; - dops[i].rt2=0; - if(op2==0||op2==2) dops[i].rt1=(source[i]>>16)&0x1F; // MFC0/CFC0 - if(op2==4||op2==6) dops[i].rs1=(source[i]>>16)&0x1F; // MTC0/CTC0 + if(op2==0) dops[i].rt1=(source[i]>>16)&0x1F; // MFC0 + if(op2==4) dops[i].rs1=(source[i]>>16)&0x1F; // MTC0 if(op2==4&&((source[i]>>11)&0x1f)==12) dops[i].rt2=CSREG; // Status - if(op2==16) if((source[i]&0x3f)==0x18) dops[i].rs2=CCREG; // ERET - break; - case COP1: - dops[i].rs1=0; - dops[i].rs2=0; - dops[i].rt1=0; - dops[i].rt2=0; - if(op2<3) dops[i].rt1=(source[i]>>16)&0x1F; // MFC1/DMFC1/CFC1 - if(op2>3) dops[i].rs1=(source[i]>>16)&0x1F; // MTC1/DMTC1/CTC1 - dops[i].rs2=CSREG; break; case COP2: - dops[i].rs1=0; - dops[i].rs2=0; - dops[i].rt1=0; - dops[i].rt2=0; if(op2<3) dops[i].rt1=(source[i]>>16)&0x1F; // MFC2/CFC2 if(op2>3) dops[i].rs1=(source[i]>>16)&0x1F; // MTC2/CTC2 dops[i].rs2=CSREG; @@ -6796,27 +6851,13 @@ static noinline void pass1_disassemble(u_int pagelimit) case 0x06: gte_rt[i]=1ll<<(gr+32); break; // CTC2 } break; - case C1LS: - dops[i].rs1=(source[i]>>21)&0x1F; - dops[i].rs2=CSREG; - dops[i].rt1=0; - dops[i].rt2=0; - imm[i]=(short)source[i]; - break; case C2LS: dops[i].rs1=(source[i]>>21)&0x1F; - dops[i].rs2=0; - dops[i].rt1=0; - dops[i].rt2=0; imm[i]=(short)source[i]; if(op==0x32) gte_rt[i]=1ll<<((source[i]>>16)&0x1F); // LWC2 else gte_rs[i]=1ll<<((source[i]>>16)&0x1F); // SWC2 break; case C2OP: - dops[i].rs1=0; - dops[i].rs2=0; - dops[i].rt1=0; - dops[i].rt2=0; gte_rs[i]=gte_reg_reads[source[i]&0x3f]; gte_rt[i]=gte_reg_writes[source[i]&0x3f]; gte_rt[i]|=1ll<<63; // every op changes flags @@ -6831,15 +6872,9 @@ static noinline void pass1_disassemble(u_int pagelimit) case HLECALL: case INTCALL: dops[i].rs1=CCREG; - dops[i].rs2=0; - dops[i].rt1=0; - dops[i].rt2=0; break; default: - dops[i].rs1=0; - dops[i].rs2=0; - dops[i].rt1=0; - dops[i].rt2=0; + break; } /* Calculate branch target addresses */ if(type==UJUMP) @@ -6866,41 +6901,48 @@ static noinline void pass1_disassemble(u_int pagelimit) dops[i].is_jump = (dops[i].itype == RJUMP || dops[i].itype == UJUMP || dops[i].itype == CJUMP || dops[i].itype == SJUMP); dops[i].is_ujump = (dops[i].itype == RJUMP || dops[i].itype == UJUMP); // || (source[i] >> 16) == 0x1000 // beq r0,r0 dops[i].is_load = (dops[i].itype == LOAD || dops[i].itype == LOADLR || op == 0x32); // LWC2 + dops[i].is_delay_load = (dops[i].is_load || (source[i] & 0xf3d00000) == 0x40000000); // MFC/CFC dops[i].is_store = (dops[i].itype == STORE || dops[i].itype == STORELR || op == 0x3a); // SWC2 + dops[i].is_exception = (dops[i].itype == SYSCALL || dops[i].itype == HLECALL || dops[i].itype == INTCALL); + dops[i].may_except = dops[i].is_exception || (dops[i].itype == ALU && (op2 == 0x20 || op2 == 0x22)) || op == 8; - /* messy cases to just pass over to the interpreter */ + /* rare messy cases to just pass over to the interpreter */ if (i > 0 && dops[i-1].is_jump) { - int do_in_intrp=0; // branch in delay slot? if (dops[i].is_jump) { // don't handle first branch and call interpreter if it's hit - SysPrintf("branch in delay slot @%08x (%08x)\n", start + i*4, start); - do_in_intrp=1; + SysPrintf("branch in DS @%08x (%08x)\n", start + i*4, start); + force_prev_to_interpreter = 1; } - // basic load delay detection - else if((type==LOAD||type==LOADLR||type==COP0||type==COP2||type==C2LS)&&dops[i].rt1!=0) { + // basic load delay detection through a branch + else if (dops[i].is_delay_load && dops[i].rt1 != 0) { int t=(ba[i-1]-start)/4; if(0 <= t && t < i &&(dops[i].rt1==dops[t].rs1||dops[i].rt1==dops[t].rs2)&&dops[t].itype!=CJUMP&&dops[t].itype!=SJUMP) { // jump target wants DS result - potential load delay effect - SysPrintf("load delay @%08x (%08x)\n", start + i*4, start); - do_in_intrp=1; + SysPrintf("load delay in DS @%08x (%08x)\n", start + i*4, start); + force_prev_to_interpreter = 1; dops[t+1].bt=1; // expected return from interpreter } else if(i>=2&&dops[i-2].rt1==2&&dops[i].rt1==2&&dops[i].rs1!=2&&dops[i].rs2!=2&&dops[i-1].rs1!=2&&dops[i-1].rs2!=2&& !(i>=3&&dops[i-3].is_jump)) { // v0 overwrite like this is a sign of trouble, bail out SysPrintf("v0 overwrite @%08x (%08x)\n", start + i*4, start); - do_in_intrp=1; + force_prev_to_interpreter = 1; } } - if (do_in_intrp) { - memset(&dops[i-1], 0, sizeof(dops[i-1])); - dops[i-1].itype = INTCALL; - dops[i-1].rs1 = CCREG; - ba[i-1] = -1; - done = 2; - i--; // don't compile the DS - } + } + else if (i > 0 && dops[i-1].is_delay_load && dops[i-1].rt1 != 0 + && (dops[i].rs1 == dops[i-1].rt1 || dops[i].rs2 == dops[i-1].rt1)) { + SysPrintf("load delay @%08x (%08x)\n", start + i*4, start); + force_prev_to_interpreter = 1; + } + if (force_prev_to_interpreter) { + memset(&dops[i-1], 0, sizeof(dops[i-1])); + dops[i-1].itype = INTCALL; + dops[i-1].rs1 = CCREG; + ba[i-1] = -1; + done = 2; + i--; // don't compile the DS/problematic load/etc } /* Is this the end of the block? */ @@ -6934,7 +6976,11 @@ static noinline void pass1_disassemble(u_int pagelimit) // Don't get too close to the limit if(i>MAXBLOCK/2) done=1; } - if (dops[i].itype == SYSCALL || dops[i].itype == HLECALL || dops[i].itype == INTCALL) + if (dops[i].itype == HLECALL) + stop = 1; + else if (dops[i].itype == INTCALL) + stop = 2; + else if (dops[i].is_exception) done = stop_after_jal ? 1 : 2; if (done == 2) { // Does the block continue due to a branch? @@ -6950,7 +6996,7 @@ static noinline void pass1_disassemble(u_int pagelimit) assert(start+i*4 8 || dops[i].opcode == 0x11)) { + if (dops[i].itype == INTCALL && (++ni_count > 8 || dops[i].opcode == 0x11)) { done=stop_after_jal=1; SysPrintf("Disabled speculative precompilation\n"); } @@ -7073,14 +7119,13 @@ static noinline void pass2_unneeded_regs(int istart,int iend,int r) } } } - else if(dops[i].itype==SYSCALL||dops[i].itype==HLECALL||dops[i].itype==INTCALL) + else if(dops[i].may_except) { - // SYSCALL instruction (software interrupt) + // SYSCALL instruction, etc or conditional exception u=1; } - else if(dops[i].itype==COP0 && dops[i].opcode2==0x10) + else if (dops[i].itype == RFE) { - // RFE u=1; } //u=1; // DEBUG @@ -7128,7 +7173,7 @@ static noinline void pass3_register_alloc(u_int addr) current.wasconst = 0; current.isconst = 0; current.loadedconst = 0; - current.waswritten = 0; + //current.waswritten = 0; int ds=0; int cc=0; int hr; @@ -7153,7 +7198,7 @@ static noinline void pass3_register_alloc(u_int addr) if(current.regmap[hr]==0) current.regmap[hr]=-1; } current.isconst=0; - current.waswritten=0; + //current.waswritten=0; } memcpy(regmap_pre[i],current.regmap,sizeof(current.regmap)); @@ -7252,7 +7297,6 @@ static noinline void pass3_register_alloc(u_int addr) delayslot_alloc(¤t,i+1); //current.isconst=0; // DEBUG ds=1; - //printf("i=%d, isconst=%x\n",i,current.isconst); break; case RJUMP: //current.isconst=0; @@ -7368,13 +7412,8 @@ static noinline void pass3_register_alloc(u_int addr) //current.isconst=0; break; case SJUMP: - //current.isconst=0; - //current.wasconst=0; - //regs[i].wasconst=0; clear_const(¤t,dops[i].rs1); clear_const(¤t,dops[i].rt1); - //if((dops[i].opcode2&0x1E)==0x0) // BLTZ/BGEZ - if((dops[i].opcode2&0x0E)==0x0) // BLTZ/BGEZ { alloc_cc(¤t,i); dirty_reg(¤t,CCREG); @@ -7382,9 +7421,6 @@ static noinline void pass3_register_alloc(u_int addr) if (dops[i].rt1==31) { // BLTZAL/BGEZAL alloc_reg(¤t,i,31); dirty_reg(¤t,31); - //#ifdef REG_PREFETCH - //alloc_reg(¤t,i,PTEMP); - //#endif } if((dops[i].rs1&&(dops[i].rs1==dops[i+1].rt1||dops[i].rs1==dops[i+1].rt2)) // The delay slot overwrites the branch condition. ||(dops[i].rt1==31&&(dops[i+1].rs1==31||dops[i+1].rs2==31||dops[i+1].rt1==31||dops[i+1].rt2==31))) { // DS touches $ra @@ -7400,17 +7436,6 @@ static noinline void pass3_register_alloc(u_int addr) delayslot_alloc(¤t,i+1); } } - else - // Don't alloc the delay slot yet because we might not execute it - if((dops[i].opcode2&0x1E)==0x2) // BLTZL/BGEZL - { - current.isconst=0; - current.wasconst=0; - regs[i].wasconst=0; - alloc_cc(¤t,i); - dirty_reg(¤t,CCREG); - alloc_reg(¤t,i,dops[i].rs1); - } ds=1; //current.isconst=0; break; @@ -7443,14 +7468,12 @@ static noinline void pass3_register_alloc(u_int addr) case COP0: cop0_alloc(¤t,i); break; - case COP1: + case RFE: + rfe_alloc(¤t,i); break; case COP2: cop2_alloc(¤t,i); break; - case C1LS: - c1ls_alloc(¤t,i); - break; case C2LS: c2ls_alloc(¤t,i); break; @@ -7512,12 +7535,14 @@ static noinline void pass3_register_alloc(u_int addr) memcpy(regs[i].regmap,current.regmap,sizeof(current.regmap)); } +#if 0 // see do_store_smc_check() if(i>0&&(dops[i-1].itype==STORE||dops[i-1].itype==STORELR||(dops[i-1].itype==C2LS&&dops[i-1].opcode==0x3a))&&(u_int)imm[i-1]<0x800) current.waswritten|=1<=0x800) current.waswritten&=~(1<0) @@ -7640,8 +7665,6 @@ static noinline void pass3_register_alloc(u_int addr) } break; case SJUMP: - //if((dops[i-1].opcode2&0x1E)==0) // BLTZ/BGEZ - if((dops[i-1].opcode2&0x0E)==0) // BLTZ/BGEZ { alloc_cc(¤t,i-1); dirty_reg(¤t,CCREG); @@ -7665,22 +7688,8 @@ static noinline void pass3_register_alloc(u_int addr) memcpy(&branch_regs[i-1].regmap_entry,¤t.regmap,sizeof(current.regmap)); memcpy(constmap[i],constmap[i-1],sizeof(constmap[i])); } - else - // Alloc the delay slot in case the branch is taken - if((dops[i-1].opcode2&0x1E)==2) // BLTZL/BGEZL - { - memcpy(&branch_regs[i-1],¤t,sizeof(current)); - branch_regs[i-1].u=(branch_unneeded_reg[i-1]&~((1LL< 0 && (dops[i-1].is_jump || dops[i].itype == SYSCALL || dops[i].itype == HLECALL)) + if (i > 0 && (dops[i-1].is_jump || dops[i].is_exception)) { cc=0; } @@ -7769,7 +7778,7 @@ static noinline void pass3_register_alloc(u_int addr) } } if(current.regmap[HOST_BTREG]==BTREG) current.regmap[HOST_BTREG]=-1; - regs[i].waswritten=current.waswritten; + //regs[i].waswritten=current.waswritten; } } @@ -7834,14 +7843,9 @@ static noinline void pass4_cull_unused_regs(void) nr |= get_regm(regs[i].regmap_entry, INVCP); } } - else if(dops[i].itype==SYSCALL||dops[i].itype==HLECALL||dops[i].itype==INTCALL) + else if (dops[i].may_except) { - // SYSCALL instruction (software interrupt) - nr=0; - } - else if(dops[i].itype==COP0 && (source[i]&0x3f)==0x18) - { - // ERET instruction (return from interrupt) + // SYSCALL instruction, etc or conditional exception nr=0; } else // Non-branch @@ -8004,8 +8008,8 @@ static noinline void pass5a_preallocate1(void) if(ba[i]>=start && ba[i]<(start+i*4)) if(dops[i+1].itype==NOP||dops[i+1].itype==MOV||dops[i+1].itype==ALU ||dops[i+1].itype==SHIFTIMM||dops[i+1].itype==IMM16||dops[i+1].itype==LOAD - ||dops[i+1].itype==STORE||dops[i+1].itype==STORELR||dops[i+1].itype==C1LS - ||dops[i+1].itype==SHIFT||dops[i+1].itype==COP1 + ||dops[i+1].itype==STORE||dops[i+1].itype==STORELR + ||dops[i+1].itype==SHIFT ||dops[i+1].itype==COP2||dops[i+1].itype==C2LS||dops[i+1].itype==C2OP) { int t=(ba[i]-start)>>2; @@ -8273,9 +8277,9 @@ static noinline void pass5a_preallocate1(void) } } } - if(dops[i].itype!=STORE&&dops[i].itype!=STORELR&&dops[i].itype!=C1LS&&dops[i].itype!=SHIFT&& + if(dops[i].itype!=STORE&&dops[i].itype!=STORELR&&dops[i].itype!=SHIFT&& dops[i].itype!=NOP&&dops[i].itype!=MOV&&dops[i].itype!=ALU&&dops[i].itype!=SHIFTIMM&& - dops[i].itype!=IMM16&&dops[i].itype!=LOAD&&dops[i].itype!=COP1) + dops[i].itype!=IMM16&&dops[i].itype!=LOAD) { memcpy(f_regmap,regs[i].regmap,sizeof(f_regmap)); } @@ -8295,7 +8299,7 @@ static noinline void pass5b_preallocate2(void) if(!dops[i+1].bt) { if(dops[i].itype==ALU||dops[i].itype==MOV||dops[i].itype==LOAD||dops[i].itype==SHIFTIMM||dops[i].itype==IMM16 - ||((dops[i].itype==COP1||dops[i].itype==COP2)&&dops[i].opcode2<3)) + ||(dops[i].itype==COP2&&dops[i].opcode2<3)) { if(dops[i+1].rs1) { if((hr=get_reg(regs[i+1].regmap,dops[i+1].rs1))>=0) @@ -8331,7 +8335,7 @@ static noinline void pass5b_preallocate2(void) } // Preload target address for load instruction (non-constant) if(dops[i+1].itype==LOAD&&dops[i+1].rs1&&get_reg(regs[i+1].regmap,dops[i+1].rs1)<0) { - if((hr=get_reg(regs[i+1].regmap,dops[i+1].rt1))>=0) + if((hr=get_reg_w(regs[i+1].regmap, dops[i+1].rt1))>=0) { if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0) { @@ -8348,7 +8352,7 @@ static noinline void pass5b_preallocate2(void) } // Load source into target register if(dops[i+1].use_lt1&&get_reg(regs[i+1].regmap,dops[i+1].rs1)<0) { - if((hr=get_reg(regs[i+1].regmap,dops[i+1].rt1))>=0) + if((hr=get_reg_w(regs[i+1].regmap, dops[i+1].rt1))>=0) { if(regs[i].regmap[hr]<0&®s[i+1].regmap_entry[hr]<0) { @@ -8422,10 +8426,10 @@ static noinline void pass5b_preallocate2(void) } } } - if(dops[i+1].itype==LOAD||dops[i+1].itype==LOADLR||dops[i+1].itype==STORE||dops[i+1].itype==STORELR/*||dops[i+1].itype==C1LS||||dops[i+1].itype==C2LS*/) { + if(dops[i+1].itype==LOAD||dops[i+1].itype==LOADLR||dops[i+1].itype==STORE||dops[i+1].itype==STORELR/*||dops[i+1].itype==C2LS*/) { hr = -1; if(dops[i+1].itype==LOAD) - hr=get_reg(regs[i+1].regmap,dops[i+1].rt1); + hr=get_reg_w(regs[i+1].regmap, dops[i+1].rt1); if(dops[i+1].itype==LOADLR||(dops[i+1].opcode&0x3b)==0x31||(dops[i+1].opcode&0x3b)==0x32) // LWC1/LDC1, LWC2/LDC2 hr=get_reg(regs[i+1].regmap,FTEMP); if(dops[i+1].itype==STORE||dops[i+1].itype==STORELR||(dops[i+1].opcode&0x3b)==0x39||(dops[i+1].opcode&0x3b)==0x3a) { // SWC1/SDC1/SWC2/SDC2 @@ -8702,15 +8706,9 @@ static noinline void pass6_clean_registers(int istart, int iend, int wr) } } } - else if(dops[i].itype==SYSCALL||dops[i].itype==HLECALL||dops[i].itype==INTCALL) + else if (dops[i].may_except) { - // SYSCALL instruction (software interrupt) - will_dirty_i=0; - wont_dirty_i=0; - } - else if(dops[i].itype==COP0 && (source[i]&0x3f)==0x18) - { - // ERET instruction (return from interrupt) + // SYSCALL instruction, etc or conditional exception will_dirty_i=0; wont_dirty_i=0; } @@ -8822,8 +8820,8 @@ static noinline void pass10_expire_blocks(void) u_int block_i = expirep / step & (PAGE_COUNT - 1); u_int phase = (expirep >> (base_shift - 1)) & 1u; if (!(expirep & (MAX_OUTPUT_BLOCK_SIZE / 2 - 1))) { - inv_debug("EXP: base_offs %x/%x phase %u\n", base_offs, - out - ndrc->translation_cache, phase); + inv_debug("EXP: base_offs %x/%lx phase %u\n", base_offs, + (long)(out - ndrc->translation_cache), phase); } if (!phase) { @@ -8881,14 +8879,21 @@ static int new_recompile_block(u_int addr) assem_debug("NOTCOMPILED: addr = %x -> %p\n", addr, out); + if (addr & 3) { + if (addr != hack_addr) { + SysPrintf("game crash @%08x, ra=%08x\n", addr, psxRegs.GPR.n.ra); + hack_addr = addr; + } + return -1; + } + // this is just for speculation for (i = 1; i < 32; i++) { if ((psxRegs.GPR.r[i] & 0xffff0000) == 0x1f800000) state_rflags |= 1 << i; } - assert(!(addr & 3)); - start = addr & ~3; + start = addr; new_dynarec_did_compile=1; if (Config.HLE && start == 0x80001000) // hlecall { @@ -8929,8 +8934,8 @@ static int new_recompile_block(u_int addr) return 0; } - cycle_multiplier_active = cycle_multiplier_override && cycle_multiplier == CYCLE_MULT_DEFAULT - ? cycle_multiplier_override : cycle_multiplier; + cycle_multiplier_active = Config.cycle_multiplier_override && Config.cycle_multiplier == CYCLE_MULT_DEFAULT + ? Config.cycle_multiplier_override : Config.cycle_multiplier; source = get_source_start(start, &pagelimit); if (source == NULL) { @@ -9142,32 +9147,31 @@ static int new_recompile_block(u_int addr) emit_jmp(0); } - // TODO: delay slot stubs? // Stubs - for(i=0;i