From c6c3b1b36e53f576f540cbf99fb9f8d66ae1e92a Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 2 Oct 2011 18:46:31 +0300 Subject: [PATCH 1/1] rewrite memhandlers (read) --- libpcsxcore/new_dynarec/assem_arm.c | 214 +++++++++++++++++++++++++- libpcsxcore/new_dynarec/emu_if.h | 7 + libpcsxcore/new_dynarec/linkage_arm.s | 50 +++++- libpcsxcore/new_dynarec/pcsxmem.c | 142 +++++++++++++++++ 4 files changed, 403 insertions(+), 10 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index adbde599..cda420fc 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -1,6 +1,7 @@ /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * - * Mupen64plus - assem_arm.c * + * Mupen64plus/PCSX - assem_arm.c * * Copyright (C) 2009-2011 Ari64 * + * Copyright (C) 2010-2011 Gražvydas "notaz" Ignotas * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * @@ -1324,6 +1325,14 @@ void emit_shlimm(int rs,u_int imm,int rt) output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7)); } +void emit_lsls_imm(int rs,int imm,int rt) +{ + assert(imm>0); + assert(imm<32); + assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7)); +} + void emit_shrimm(int rs,u_int imm,int rt) { assert(imm>0); @@ -1383,6 +1392,17 @@ void emit_signextend16(int rs,int rt) #endif } +void emit_signextend8(int rs,int rt) +{ + #ifdef ARMv5_ONLY + emit_shlimm(rs,24,rt); + emit_sarimm(rt,24,rt); + #else + assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]); + output_w32(0xe6af0070|rd_rn_rm(rt,0,rs)); + #endif +} + void emit_shl(u_int rs,u_int shift,u_int rt) { assert(rs<16); @@ -1756,8 +1776,9 @@ void emit_popreg(u_int r) } void emit_callreg(u_int r) { - assem_debug("call *%%%s\n",regname[r]); - assert(0); + assert(r<15); + assem_debug("blx %s\n",regname[r]); + output_w32(0xe12fff30|r); } void emit_jmpreg(u_int r) { @@ -1780,6 +1801,31 @@ void emit_readword_dualindexedx4(int rs1, int rs2, int rt) assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100); } +void emit_ldrcc_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2)); +} +void emit_ldrccb_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2)); +} +void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2)); +} +void emit_ldrcch_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2)); +} +void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2)); +} void emit_readword_indexed_tlb(int addr, int rs, int map, int rt) { if(map<0) emit_readword_indexed(addr, rs, rt); @@ -2641,6 +2687,76 @@ do_readstub(int n) rt=get_reg(i_regmap,rt1[i]); } assert(rs>=0); +#ifdef PCSX + int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0; + reglist|=(1<=0) + reglist&=~(1<=0&&rt1[i]!=0)) { + switch(type) { + case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break; + case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break; + case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break; + case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break; + case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break; + } + } + if(regs_saved) { + restore_jump=(int)out; + emit_jcc(0); // jump to reg restore + } + else + emit_jcc(stubs[n][2]); // return address + + if(!regs_saved) + save_regs(reglist); + int handler=0; + if(type==LOADB_STUB||type==LOADBU_STUB) + handler=(int)jump_handler_read8; + if(type==LOADH_STUB||type==LOADHU_STUB) + handler=(int)jump_handler_read16; + if(type==LOADW_STUB) + handler=(int)jump_handler_read32; + assert(handler!=0); + if(rs!=0) + emit_mov(rs,0); + if(temp2!=1) + emit_mov(temp2,1); + int cc=get_reg(i_regmap,CCREG); + if(cc<0) + emit_loadreg(CCREG,2); + emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*stubs[n][6]+2,2); + emit_call(handler); + if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) { + switch(type) { + case LOADB_STUB: emit_signextend8(0,rt); break; + case LOADBU_STUB: emit_andimm(0,0xff,rt); break; + case LOADH_STUB: emit_signextend16(0,rt); break; + case LOADHU_STUB: emit_andimm(0,0xffff,rt); break; + case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break; + } + } + if(restore_jump) + set_jump_target(restore_jump,(int)out); + restore_regs(reglist); + emit_jmp(stubs[n][2]); // return address +#else // !PCSX if(addr<0) addr=rt; if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1); assert(addr>=0); @@ -2719,7 +2835,37 @@ do_readstub(int n) } } emit_jmp(stubs[n][2]); // return address +#endif // !PCSX +} + +#ifdef PCSX +// return memhandler, or get directly accessable address and return 0 +u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host) +{ + u_int l1,l2=0; + l1=((u_int *)table)[addr>>12]; + if((l1&(1<<31))==0) { + u_int v=l1<<1; + *addr_host=v+addr; + return 0; + } + else { + l1<<=1; + if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB) + l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)]; + else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREW_STUB) + l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2]; + else + l2=((u_int *)l1)[(addr&0xfff)/4]; + if((l2&(1<<31))==0) { + u_int v=l2<<1; + *addr_host=v+(addr&0xfff); + return 0; + } + return l2<<1; + } } +#endif inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist) { @@ -2728,6 +2874,63 @@ inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, i int rt=get_reg(regmap,target); if(rs<0) rs=get_reg(regmap,-1); assert(rs>=0); +#ifdef PCSX + u_int handler,host_addr=0; + if(pcsx_direct_read(type,addr,target?rs:-1,rt)) + return; + handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr); + if (handler==0) { + if(rt<0) + return; + if(target==0||addr!=host_addr) + emit_movimm(host_addr,rs); + switch(type) { + case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break; + case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break; + case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break; + case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break; + case LOADW_STUB: emit_readword_indexed(0,rs,rt); break; + default: assert(0); + } + return; + } + + // call a memhandler + if(rt>=0) + reglist&=~(1<=33554432) { + // unreachable memhandler, a plugin func perhaps + emit_movimm(handler,1); + emit_callreg(1); + } + else + emit_call(handler); + if(rt>=0) { + switch(type) { + case LOADB_STUB: emit_signextend8(0,rt); break; + case LOADBU_STUB: emit_andimm(0,0xff,rt); break; + case LOADH_STUB: emit_signextend16(0,rt); break; + case LOADHU_STUB: emit_andimm(0,0xffff,rt); break; + case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break; + default: assert(0); + } + } + restore_regs(reglist); +#else // if !PCSX int ftable=0; if(type==LOADB_STUB||type==LOADBU_STUB) ftable=(int)readmemb; @@ -2740,10 +2943,6 @@ inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, i ftable=(int)readmemd; #endif assert(ftable!=0); -#ifdef PCSX - if(pcsx_direct_read(type,addr,target?rs:-1,rt)) - return; -#endif if(target==0) emit_movimm(addr,rs); emit_writeword(rs,(int)&address); @@ -2811,6 +3010,7 @@ inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, i if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth); } } +#endif // !PCSX } do_writestub(int n) diff --git a/libpcsxcore/new_dynarec/emu_if.h b/libpcsxcore/new_dynarec/emu_if.h index 7f625a7c..f71efc4b 100644 --- a/libpcsxcore/new_dynarec/emu_if.h +++ b/libpcsxcore/new_dynarec/emu_if.h @@ -34,6 +34,13 @@ extern const char gte_cycletab[64]; extern int FCR0, FCR31; /* mem */ +extern void *mem_rtab; +extern void *mem_wtab; + +void jump_handler_read8(u32 addr, u32 *table, u32 cycles); +void jump_handler_read16(u32 addr, u32 *table, u32 cycles); +void jump_handler_read32(u32 addr, u32 *table, u32 cycles); + extern void (*readmem[0x10000])(); extern void (*readmemb[0x10000])(); extern void (*readmemh[0x10000])(); diff --git a/libpcsxcore/new_dynarec/linkage_arm.s b/libpcsxcore/new_dynarec/linkage_arm.s index f5af0f59..1a4b33fe 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.s +++ b/libpcsxcore/new_dynarec/linkage_arm.s @@ -53,6 +53,8 @@ rdram = 0x80000000 .global memory_map /* psx */ .global psxRegs + .global mem_rtab + .global mem_wtab .global nd_pcsx_io .global psxH_ptr .global inv_code_start @@ -148,8 +150,15 @@ intCycle = interrupt + 4 .size intCycle, 256 psxRegs_end = intCycle + 256 +mem_rtab = psxRegs_end + .type mem_rtab, %object + .size mem_rtab, 4 +mem_wtab = mem_rtab + 4 + .type mem_wtab, %object + .size mem_wtab, 4 + /* nd_pcsx_io */ -nd_pcsx_io = psxRegs_end +nd_pcsx_io = mem_wtab + 4 .type nd_pcsx_io, %object .size nd_pcsx_io, nd_pcsx_io_end-nd_pcsx_io tab_read8 = nd_pcsx_io @@ -189,8 +198,8 @@ inv_code_end = inv_code_start + 4 .size inv_code_end, 4 align0 = inv_code_end + 4 /* just for alignment */ .type align0, %object - .size align0, 12 -branch_target = align0 + 12 + .size align0, 4 +branch_target = align0 + 4 .type branch_target, %object .size branch_target, 4 mini_ht = branch_target + 4 @@ -848,6 +857,9 @@ new_dyna_start: .global ari_write_io8 .global ari_write_io16 .global ari_write_io32 +.global jump_handler_read8 +.global jump_handler_read16 +.global jump_handler_read32 .macro ari_read_ram bic_const op ldr r0, [fp, #address-dynarec_local] @@ -1112,4 +1124,36 @@ ari_write_io16: ari_write_io32: ari_write_io , word, tab_write32, 0 +/* */ + +.macro pcsx_read_mem readop tab_shift + /* r0 = address, r1 = handler_tab, r2 = cycles */ + lsl r3, r0, #20 + lsr r3, #(20+\tab_shift) + ldr r12, [fp, #last_count-dynarec_local] + ldr r1, [r1, r3, lsl #2] + add r2, r2, r12 + lsls r1, #1 +.if \tab_shift == 1 + lsl r3, #1 + \readop r0, [r1, r3] +.else + \readop r0, [r1, r3, lsl #\tab_shift] +.endif + movcc pc, lr + str r2, [fp, #cycle-dynarec_local] + bx r1 +.endm + +jump_handler_read8: + add r1, #0x1000/4*4 + 0x1000/2*4 @ shift to r8 part + pcsx_read_mem ldrccb, 0 + +jump_handler_read16: + add r1, #0x1000/4*4 @ shift to r16 part + pcsx_read_mem ldrcch, 1 + +jump_handler_read32: + pcsx_read_mem ldrcc, 2 + @ vim:filetype=armasm diff --git a/libpcsxcore/new_dynarec/pcsxmem.c b/libpcsxcore/new_dynarec/pcsxmem.c index 601a6ee8..30a04e3a 100644 --- a/libpcsxcore/new_dynarec/pcsxmem.c +++ b/libpcsxcore/new_dynarec/pcsxmem.c @@ -6,6 +6,7 @@ */ #include +#include #include "../psxhw.h" #include "../cdrom.h" #include "../mdec.h" @@ -15,6 +16,7 @@ //#define memprintf printf #define memprintf(...) +static u8 unmapped_mem[0x1000]; int pcsx_ram_is_ro; static void read_mem8() @@ -314,10 +316,142 @@ struct { void *spu_writef; } nd_pcsx_io; +static u32 *mem_readtab; +static u32 *mem_writetab; +static u32 mem_iortab[(1+2+4) * 0x1000 / 4]; +static u32 mem_iowtab[(1+2+4) * 0x1000 / 4]; +//static u32 mem_unmrtab[(1+2+4) * 0x1000 / 4]; +static u32 mem_unmwtab[(1+2+4) * 0x1000 / 4]; + +static void map_item(u32 *out, const void *h, u32 flag) +{ + u32 hv = (u32)h; + if (hv & 1) + fprintf(stderr, "%p has LSB set\n", h); + *out = (hv >> 1) | (flag << 31); +} + +// size must be power of 2, at least 4k +#define map_l1_mem(tab, i, addr, size, base) \ + map_item(&tab[((addr)>>12) + i], (u8 *)(base) - (u32)(addr) - ((i << 12) & ~(size - 1)), 0) + +#define IOMEM32(a) (((a) & 0xfff) / 4) +#define IOMEM16(a) (0x1000/4 + (((a) & 0xfff) / 2)) +#define IOMEM8(a) (0x1000/4 + 0x1000/2 + ((a) & 0xfff)) + void new_dyna_pcsx_mem_init(void) { int i; +#if 1 + // have to map these further to keep tcache close to .text + mem_readtab = mmap((void *)0x08000000, 0x200000 * 4, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mem_readtab == MAP_FAILED) { + fprintf(stderr, "failed to map mem tables\n"); + exit(1); + } + mem_writetab = mem_readtab + 0x100000; + + // 1st level lookup: + // 0: direct mem + // 1: use 2nd lookup + // 2nd level lookup: + // 0: direct mem variable + // 1: memhandler + + // default/unmapped memhandlers + for (i = 0; i < 0x100000; i++) { + //map_item(&mem_readtab[i], mem_unmrtab, 1); + map_l1_mem(mem_readtab, i, 0, 0x1000, unmapped_mem); + map_item(&mem_writetab[i], mem_unmwtab, 1); + } + + // RAM and it's mirrors + for (i = 0; i < (0x800000 >> 12); i++) { + map_l1_mem(mem_readtab, i, 0x80000000, 0x200000, psxM); + map_l1_mem(mem_writetab, i, 0x80000000, 0x200000, psxM); + map_l1_mem(mem_readtab, i, 0x00000000, 0x200000, psxM); + map_l1_mem(mem_writetab, i, 0x00000000, 0x200000, psxM); + map_l1_mem(mem_readtab, i, 0xa0000000, 0x200000, psxM); + map_l1_mem(mem_writetab, i, 0xa0000000, 0x200000, psxM); + } + + // stupid BIOS RAM check + // TODO + + // BIOS and it's mirrors + for (i = 0; i < (0x80000 >> 12); i++) { + map_l1_mem(mem_readtab, i, 0x1fc00000, 0x80000, psxR); + map_l1_mem(mem_readtab, i, 0xbfc00000, 0x80000, psxR); + } + + // scratchpad + map_l1_mem(mem_readtab, 0, 0x1f800000, 0x1000, psxH); + map_l1_mem(mem_writetab, 0, 0x1f800000, 0x1000, psxH); + + // I/O + map_item(&mem_readtab[0x1f801000 >> 12], mem_iortab, 1); + map_item(&mem_writetab[0x1f801000 >> 12], mem_iowtab, 1); + // L2 + // unmapped tables + for (i = 0; i < 0x1000; i++) + map_item(&mem_unmwtab[i], write_mem_dummy, 1); + + // fill IO tables + for (i = 0; i < 0x1000/4; i++) { + map_item(&mem_iortab[i], &psxH[0x1000], 0); + map_item(&mem_iowtab[i], &psxH[0x1000], 0); + } + for (; i < 0x1000/4 + 0x1000/2; i++) { + map_item(&mem_iortab[i], &psxH[0x1000], 0); + map_item(&mem_iowtab[i], &psxH[0x1000], 0); + } + for (; i < 0x1000/4 + 0x1000/2 + 0x1000; i++) { + map_item(&mem_iortab[i], &psxH[0x1000], 0); + map_item(&mem_iowtab[i], &psxH[0x1000], 0); + } + + map_item(&mem_iortab[IOMEM32(0x1040)], io_read_sio32, 1); + map_item(&mem_iortab[IOMEM32(0x1100)], io_rcnt_read_count0, 1); + map_item(&mem_iortab[IOMEM32(0x1104)], io_rcnt_read_mode0, 1); + map_item(&mem_iortab[IOMEM32(0x1108)], io_rcnt_read_target0, 1); + map_item(&mem_iortab[IOMEM32(0x1110)], io_rcnt_read_count1, 1); + map_item(&mem_iortab[IOMEM32(0x1114)], io_rcnt_read_mode1, 1); + map_item(&mem_iortab[IOMEM32(0x1118)], io_rcnt_read_target1, 1); + map_item(&mem_iortab[IOMEM32(0x1120)], io_rcnt_read_count2, 1); + map_item(&mem_iortab[IOMEM32(0x1124)], io_rcnt_read_mode2, 1); + map_item(&mem_iortab[IOMEM32(0x1128)], io_rcnt_read_target2, 1); +// map_item(&mem_iortab[IOMEM32(0x1810)], GPU_readData, 1); +// map_item(&mem_iortab[IOMEM32(0x1814)], GPU_readStatus, 1); + map_item(&mem_iortab[IOMEM32(0x1820)], mdecRead0, 1); + map_item(&mem_iortab[IOMEM32(0x1824)], mdecRead1, 1); + + map_item(&mem_iortab[IOMEM16(0x1040)], io_read_sio16, 1); + map_item(&mem_iortab[IOMEM16(0x1044)], sioReadStat16, 1); + map_item(&mem_iortab[IOMEM16(0x1048)], sioReadMode16, 1); + map_item(&mem_iortab[IOMEM16(0x104a)], sioReadCtrl16, 1); + map_item(&mem_iortab[IOMEM16(0x104e)], sioReadBaud16, 1); + map_item(&mem_iortab[IOMEM16(0x1100)], io_rcnt_read_count0, 1); + map_item(&mem_iortab[IOMEM16(0x1104)], io_rcnt_read_mode0, 1); + map_item(&mem_iortab[IOMEM16(0x1108)], io_rcnt_read_target0, 1); + map_item(&mem_iortab[IOMEM16(0x1110)], io_rcnt_read_count1, 1); + map_item(&mem_iortab[IOMEM16(0x1114)], io_rcnt_read_mode1, 1); + map_item(&mem_iortab[IOMEM16(0x1118)], io_rcnt_read_target1, 1); + map_item(&mem_iortab[IOMEM16(0x1120)], io_rcnt_read_count2, 1); + map_item(&mem_iortab[IOMEM16(0x1124)], io_rcnt_read_mode2, 1); + map_item(&mem_iortab[IOMEM16(0x1128)], io_rcnt_read_target2, 1); + + map_item(&mem_iortab[IOMEM8(0x1040)], sioRead8, 1); + map_item(&mem_iortab[IOMEM8(0x1800)], cdrRead0, 1); + map_item(&mem_iortab[IOMEM8(0x1801)], cdrRead1, 1); + map_item(&mem_iortab[IOMEM8(0x1802)], cdrRead2, 1); + map_item(&mem_iortab[IOMEM8(0x1803)], cdrRead3, 1); + + mem_rtab = mem_readtab; + mem_wtab = mem_writetab; +#endif +/// // default/unmapped handlers for (i = 0; i < 0x10000; i++) { readmemb[i] = read_mem8; @@ -383,7 +517,15 @@ void new_dyna_pcsx_mem_init(void) void new_dyna_pcsx_mem_reset(void) { + int i; + // plugins might change so update the pointers + map_item(&mem_iortab[IOMEM32(0x1810)], GPU_readData, 1); + map_item(&mem_iortab[IOMEM32(0x1814)], GPU_readStatus, 1); + + for (i = 0x1c00; i < 0x1e00; i += 2) + map_item(&mem_iortab[IOMEM16(i)], SPU_readRegister, 1); + nd_pcsx_io.spu_readf = SPU_readRegister; nd_pcsx_io.spu_writef = SPU_writeRegister; -- 2.39.2