#define dynarec_local ESYM(dynarec_local)
#define add_jump_out ESYM(add_jump_out)
#define new_recompile_block ESYM(new_recompile_block)
+#define ndrc_try_restore_block ESYM(ndrc_try_restore_block)
#define get_addr ESYM(get_addr)
#define get_addr_ht ESYM(get_addr_ht)
-#define clean_blocks ESYM(clean_blocks)
#define gen_interupt ESYM(gen_interupt)
#define invalidate_addr ESYM(invalidate_addr)
#define gteCheckStallRaw ESYM(gteCheckStallRaw)
DRC_VAR(scratch_buf_ptr, 4)
DRC_VAR(ram_offset, 4)
DRC_VAR(mini_ht, 256)
-DRC_VAR(restore_candidate, 512)
#ifdef TEXRELS_FORBIDDEN
.align 2
ptr_jump_in:
.word ESYM(jump_in)
-ptr_jump_dirty:
- .word ESYM(jump_dirty)
ptr_hash_table:
.word ESYM(hash_table)
#endif
#endif
.endm
-/* r0 = virtual target address */
-/* r1 = instruction to patch */
+/* r4 = virtual target address */
+/* r5 = instruction to patch */
.macro dyna_linker_main
#ifndef NO_WRITE_EXEC
load_varadr_ext r3, jump_in
/* get_page */
- lsr r2, r0, #12
+ lsr r2, r4, #12
mov r6, #4096
bic r2, r2, #0xe0000
sub r6, r6, #1
cmp r2, #0x1000
- ldr r7, [r1]
+ ldr r7, [r5]
biclt r2, #0x0e00
and r6, r6, r2
cmp r2, #2048
add r12, r7, #2
orrcs r2, r6, #2048
- ldr r5, [r3, r2, lsl #2]
+ ldr r1, [r3, r2, lsl #2]
lsl r12, r12, #8
- add r6, r1, r12, asr #6 /* old target */
+ add r6, r5, r12, asr #6 /* old target */
mov r8, #0
/* jump_in lookup */
1:
- movs r4, r5
+ movs r0, r1
beq 2f
- ldr r3, [r5] /* ll_entry .vaddr */
- ldrd r4, r5, [r4, #8] /* ll_entry .next, .addr */
- teq r3, r0
+ ldr r3, [r1] /* ll_entry .vaddr */
+ ldrd r0, r1, [r0, #8] /* ll_entry .addr, .next */
+ teq r3, r4
bne 1b
- teq r4, r6
- moveq pc, r4 /* Stale i-cache */
- mov r8, r4
+ teq r0, r6
+ moveq pc, r0 /* Stale i-cache */
+ mov r8, r0
b 1b /* jump_in may have dupes, continue search */
2:
tst r8, r8
- beq 3f /* r0 not in jump_in */
+ beq 3f /* r4 not in jump_in */
- mov r5, r1
+ mov r0, r4
mov r1, r6
bl add_jump_out
sub r2, r8, r5
str r1, [r5]
mov pc, r8
3:
- /* hash_table lookup */
- cmp r2, #2048
- load_varadr_ext r3, jump_dirty
- eor r4, r0, r0, lsl #16
- lslcc r2, r0, #9
- load_varadr_ext r6, hash_table
- lsr r4, r4, #12
- lsrcc r2, r2, #21
- bic r4, r4, #15
- ldr r5, [r3, r2, lsl #2]
- ldr r7, [r6, r4]!
- teq r7, r0
- ldreq pc, [r6, #8]
- ldr r7, [r6, #4]
- teq r7, r0
- ldreq pc, [r6, #12]
- /* jump_dirty lookup */
-6:
- movs r4, r5
- beq 8f
- ldr r3, [r5]
- ldr r5, [r4, #12]
- teq r3, r0
- bne 6b
-7:
- ldr r1, [r4, #8]
- /* hash_table insert */
- ldr r2, [r6]
- ldr r3, [r6, #8]
- str r0, [r6]
- str r1, [r6, #8]
- str r2, [r6, #4]
- str r3, [r6, #12]
- mov pc, r1
-8:
+ mov r0, r4
+ bl ndrc_try_restore_block
+ tst r0, r0
+ movne pc, r0
#else
/* XXX: should be able to do better than this... */
+ mov r0, r4
bl get_addr_ht
mov pc, r0
#endif
FUNCTION(dyna_linker):
/* r0 = virtual target address */
/* r1 = instruction to patch */
- dyna_linker_main
-
mov r4, r0
mov r5, r1
+10:
+ dyna_linker_main
+
+ mov r0, r4
bl new_recompile_block
tst r0, r0
- mov r0, r4
- mov r1, r5
- beq dyna_linker
+ beq 10b
+
/* pagefault */
+ mov r0, r4
mov r1, r0
mov r2, #(4<<2) /* Address error (fetch) */
.size dyna_linker, .-dyna_linker
FUNCTION(dyna_linker_ds):
/* r0 = virtual target address */
/* r1 = instruction to patch */
- dyna_linker_main
-
mov r4, r0
- bic r0, r0, #7
mov r5, r1
+10:
+ dyna_linker_main
+
+ bic r0, r4, #7
orr r0, r0, #1
bl new_recompile_block
tst r0, r0
- mov r0, r4
- mov r1, r5
- beq dyna_linker_ds
+ beq 10b
+
/* pagefault */
+ mov r0, r4
bic r1, r0, #7
mov r2, #0x80000008 /* High bit set indicates pagefault in delay slot */
sub r0, r1, #4
FUNCTION(cc_interrupt):
ldr r0, [fp, #LO_last_count]
mov r1, #0
- mov r2, #0x1fc
add r10, r0, r10
str r1, [fp, #LO_pending_exception]
- and r2, r2, r10, lsr #17
- add r3, fp, #LO_restore_candidate
str r10, [fp, #LO_cycle] /* PCSX cycles */
@@ str r10, [fp, #LO_reg_cop0+36] /* Count - not on PSX */
- ldr r4, [r2, r3]
mov r10, lr
- tst r4, r4
- bne .E4
-.E1:
+
bl gen_interupt
mov lr, r10
ldr r10, [fp, #LO_cycle]
ldmfdne sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, pc}
tst r1, r1
moveq pc, lr
-.E2:
ldr r0, [fp, #LO_pcaddr]
bl get_addr_ht
mov pc, r0
-.E4:
- /* Move 'dirty' blocks to the 'clean' list */
- lsl r5, r2, #3
- str r1, [r2, r3]
-.E5:
- lsrs r4, r4, #1
- mov r0, r5
- add r5, r5, #1
- blcs clean_blocks
- tst r5, #31
- bne .E5
- b .E1
.size cc_interrupt, .-cc_interrupt
.align 2
} dops[MAXBLOCK];
// used by asm:
- u_char *out;
struct ht_entry hash_table[65536] __attribute__((aligned(16)));
struct ll_entry *jump_in[4096] __attribute__((aligned(16)));
- struct ll_entry *jump_dirty[4096];
+ static u_char *out;
+ static struct ll_entry *jump_dirty[4096];
static struct ll_entry *jump_out[4096];
static u_int start;
static u_int *source;
extern int branch_target;
extern uintptr_t ram_offset;
extern uintptr_t mini_ht[32][2];
- extern u_char restore_candidate[512];
/* registers that may be allocated */
/* 1-31 gpr */
void jump_break_ds(u_int u0, u_int u1, u_int pc);
void jump_to_new_pc();
void call_gteStall();
-void clean_blocks(u_int page);
void add_jump_out(u_int vaddr, void *src);
void new_dyna_leave();
+static void *get_clean_addr(void *addr);
+static void get_bounds(void *addr, u_char **start, u_char **end);
+static void ll_add_flags(struct ll_entry **head,int vaddr,u_int reg_sv_flags,void *addr);
+
// Needed by assembler
static void wb_register(signed char r, const signed char regmap[], uint64_t dirty);
static void wb_dirtys(const signed char i_regmap[], uint64_t i_dirty);
ht_bin->tcaddr[0] = tcaddr;
}
+static void mark_valid_code(u_int vaddr, u_int len)
+{
+ u_int i, j;
+ vaddr &= 0x1fffffff;
+ for (i = vaddr & ~0xfff; i < vaddr + len; i += 0x1000) {
+ // ram mirrors, but should not hurt bios
+ for (j = 0; j < 0x800000; j += 0x200000) {
+ invalid_code[(i|j) >> 12] =
+ invalid_code[(i|j|0x80000000u) >> 12] =
+ invalid_code[(i|j|0xa0000000u) >> 12] = 0;
+ }
+ }
+ inv_code_start = inv_code_end = ~0;
+}
+
// some messy ari64's code, seems to rely on unsigned 32bit overflow
static int doesnt_expire_soon(void *tcaddr)
{
return diff > (u_int)(0x60000000 + (MAX_OUTPUT_BLOCK_SIZE << (32-TARGET_SIZE_2)));
}
+void *ndrc_try_restore_block(u_int vaddr)
+{
+ u_int page = get_page(vaddr);
+ struct ll_entry *head;
+
+ for (head = jump_dirty[page]; head != NULL; head = head->next)
+ {
+ if (head->vaddr != vaddr)
+ continue;
+ // don't restore blocks which are about to expire from the cache
+ if (!doesnt_expire_soon(head->addr))
+ continue;
+ if (!verify_dirty(head->addr))
+ continue;
+
+ // restore
+ u_char *start, *end;
+ get_bounds(head->addr, &start, &end);
+ mark_valid_code(vaddr, end - start);
+
+ void *clean_addr = get_clean_addr(head->addr);
+ ll_add_flags(jump_in + page, vaddr, head->reg_sv_flags, clean_addr);
+
+ struct ht_entry *ht_bin = hash_table_get(vaddr);
+ int in_ht = 0;
+ if (ht_bin->vaddr[0] == vaddr) {
+ ht_bin->tcaddr[0] = clean_addr; // Replace existing entry
+ in_ht = 1;
+ }
+ if (ht_bin->vaddr[1] == vaddr) {
+ ht_bin->tcaddr[1] = clean_addr; // Replace existing entry
+ in_ht = 1;
+ }
+ if (!in_ht)
+ hash_table_add(ht_bin, vaddr, clean_addr);
+ inv_debug("INV: Restored %08x (%p/%p)\n", head->vaddr, head->addr, clean_addr);
+ return clean_addr;
+ }
+ return NULL;
+}
+
// Get address from virtual address
// This is called from the recompiled JR/JALR instructions
void noinline *get_addr(u_int vaddr)
{
- u_int page=get_page(vaddr);
- u_int vpage=get_vpage(vaddr);
+ u_int page = get_page(vaddr);
struct ll_entry *head;
- //printf("TRACE: count=%d next=%d (get_addr %x,page %d)\n",Count,next_interupt,vaddr,page);
- head=jump_in[page];
- while(head!=NULL) {
- if(head->vaddr==vaddr) {
- //printf("TRACE: count=%d next=%d (get_addr match %x: %p)\n",Count,next_interupt,vaddr,head->addr);
+ void *code;
+
+ for (head = jump_in[page]; head != NULL; head = head->next) {
+ if (head->vaddr == vaddr) {
hash_table_add(hash_table_get(vaddr), vaddr, head->addr);
return head->addr;
}
- head=head->next;
}
- head=jump_dirty[vpage];
- while(head!=NULL) {
- if(head->vaddr==vaddr) {
- //printf("TRACE: count=%d next=%d (get_addr match dirty %x: %p)\n",Count,next_interupt,vaddr,head->addr);
- // Don't restore blocks which are about to expire from the cache
- if (doesnt_expire_soon(head->addr))
- if (verify_dirty(head->addr)) {
- //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]);
- invalid_code[vaddr>>12]=0;
- inv_code_start=inv_code_end=~0;
- if(vpage<2048) {
- restore_candidate[vpage>>3]|=1<<(vpage&7);
- }
- else restore_candidate[page>>3]|=1<<(page&7);
- struct ht_entry *ht_bin = hash_table_get(vaddr);
- if (ht_bin->vaddr[0] == vaddr)
- ht_bin->tcaddr[0] = head->addr; // Replace existing entry
- else
- hash_table_add(ht_bin, vaddr, head->addr);
+ code = ndrc_try_restore_block(vaddr);
+ if (code)
+ return code;
+
+ int r = new_recompile_block(vaddr);
+ if (r == 0)
+ return get_addr(vaddr);
- return head->addr;
- }
- }
- head=head->next;
- }
- //printf("TRACE: count=%d next=%d (get_addr no-match %x)\n",Count,next_interupt,vaddr);
- int r=new_recompile_block(vaddr);
- if(r==0) return get_addr(vaddr);
// generate an address error
Status|=2;
Cause=(vaddr<<31)|(4<<2);
FUNCNAME(jump_syscall),
FUNCNAME(jump_syscall_ds),
FUNCNAME(call_gteStall),
- FUNCNAME(clean_blocks),
FUNCNAME(new_dyna_leave),
FUNCNAME(pcsx_mtc0),
FUNCNAME(pcsx_mtc0_ds),
u_int page;
for(page=0;page<4096;page++)
invalidate_page(page);
- for(page=0;page<1048576;page++)
- if(!invalid_code[page]) {
- restore_candidate[(page&2047)>>3]|=1<<(page&7);
- restore_candidate[((page&2047)>>3)+256]|=1<<(page&7);
- }
#ifdef USE_MINI_HT
memset(mini_ht,-1,sizeof(mini_ht));
#endif
//inv_debug("add_jump_out: to %p\n",get_pointer(src));
}
-// If a code block was found to be unmodified (bit was set in
-// restore_candidate) and it remains unmodified (bit is clear
-// in invalid_code) then move the entries for that 4K page from
-// the dirty list to the clean list.
-void clean_blocks(u_int page)
-{
- struct ll_entry *head;
- inv_debug("INV: clean_blocks page=%d\n",page);
- head=jump_dirty[page];
- while(head!=NULL) {
- if(!invalid_code[head->vaddr>>12]) {
- // Don't restore blocks which are about to expire from the cache
- if (doesnt_expire_soon(head->addr)) {
- if(verify_dirty(head->addr)) {
- u_char *start, *end;
- //printf("Possibly Restore %x (%p)\n",head->vaddr, head->addr);
- u_int i;
- u_int inv=0;
- get_bounds(head->addr, &start, &end);
- if (start - rdram < RAM_SIZE) {
- for (i = (start-rdram+0x80000000)>>12; i <= (end-1-rdram+0x80000000)>>12; i++) {
- inv|=invalid_code[i];
- }
- }
- else if((signed int)head->vaddr>=(signed int)0x80000000+RAM_SIZE) {
- inv=1;
- }
- if(!inv) {
- void *clean_addr = get_clean_addr(head->addr);
- if (doesnt_expire_soon(clean_addr)) {
- u_int ppage=page;
- inv_debug("INV: Restored %x (%p/%p)\n",head->vaddr, head->addr, clean_addr);
- //printf("page=%x, addr=%x\n",page,head->vaddr);
- //assert(head->vaddr>>12==(page|0x80000));
- ll_add_flags(jump_in+ppage,head->vaddr,head->reg_sv_flags,clean_addr);
- struct ht_entry *ht_bin = hash_table_get(head->vaddr);
- if (ht_bin->vaddr[0] == head->vaddr)
- ht_bin->tcaddr[0] = clean_addr; // Replace existing entry
- if (ht_bin->vaddr[1] == head->vaddr)
- ht_bin->tcaddr[1] = clean_addr; // Replace existing entry
- }
- }
- }
- }
- }
- head=head->next;
- }
-}
-
/* Register allocation */
// Note: registers are allocated clean (unmodified state)
memset(invalid_code,1,sizeof(invalid_code));
memset(hash_table,0xff,sizeof(hash_table));
memset(mini_ht,-1,sizeof(mini_ht));
- memset(restore_candidate,0,sizeof(restore_candidate));
memset(shadow,0,sizeof(shadow));
copy=shadow;
expirep=16384; // Expiry pointer, +2 blocks
ram_offset=(uintptr_t)rdram-0x80000000;
if (ram_offset!=0)
SysPrintf("warning: RAM is not directly mapped, performance will suffer\n");
+ SysPrintf("Mapped (RAM/scrp/ROM/LUTs/TC):\n");
+ SysPrintf("%p/%p/%p/%p/%p\n", psxM, psxH, psxR, mem_rtab, out);
}
void new_dynarec_cleanup(void)
out = ndrc->translation_cache;
// Trap writes to any of the pages we compiled
- for(i=start>>12;i<=(start+slen*4)>>12;i++) {
- invalid_code[i]=0;
- }
- inv_code_start=inv_code_end=~0;
-
- // for PCSX we need to mark all mirrors too
- if(get_page(start)<(RAM_SIZE>>12))
- for(i=start>>12;i<=(start+slen*4)>>12;i++)
- invalid_code[((u_int)0x00000000>>12)|(i&0x1ff)]=
- invalid_code[((u_int)0x80000000>>12)|(i&0x1ff)]=
- invalid_code[((u_int)0xa0000000>>12)|(i&0x1ff)]=0;
+ mark_valid_code(start, slen*4);
/* Pass 10 - Free memory by expiring oldest blocks */