X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=libpcsxcore%2Fnew_dynarec%2Fassem_arm.c;h=e29f6c3f9a9d770281f6b0c5cc9f60d86e412e32;hp=95304103c8527f4507b36cf62072e915b1893070;hb=ffb0b9e0f051789f97f5efdcfab0b261e62688f9;hpb=009faf24e665b66283558234920faab03b781d6c diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 95304103..e29f6c3f 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -1,6 +1,7 @@ /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * - * Mupen64plus - assem_arm.c * - * Copyright (C) 2009-2010 Ari64 * + * Mupen64plus/PCSX - assem_arm.c * + * Copyright (C) 2009-2011 Ari64 * + * Copyright (C) 2010-2011 Gražvydas "notaz" Ignotas * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * @@ -170,6 +171,7 @@ void set_jump_target_fillslot(int addr,u_int target,int copy) /* Literal pool */ add_literal(int addr,int val) { + assert(literalcountregmap[n]==reg) {cur->regmap[n]=-1;} + if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) { + dirty=(cur->dirty>>n)&1; + cur->regmap[n]=-1; + } } cur->regmap[hr]=reg; cur->dirty&=~(1<dirty|=dirty<isconst&=~(1<0) { @@ -1002,7 +1014,8 @@ void emit_loadreg(int r, int hr) #ifdef FORCE32 if(r&64) { printf("64bit load in 32bit mode!\n"); - exit(1); + assert(0); + return; } #endif if((r&63)==0) @@ -1026,7 +1039,8 @@ void emit_storereg(int r, int hr) #ifdef FORCE32 if(r&64) { printf("64bit store in 32bit mode!\n"); - exit(1); + assert(0); + return; } #endif int addr=((int)reg)+((r&63)<>4); @@ -1049,7 +1063,7 @@ void emit_test(int rs, int rt) void emit_testimm(int rs,int imm) { u_int armval; - assem_debug("tst %s,$%d\n",regname[rs],imm); + assem_debug("tst %s,#%d\n",regname[rs],imm); genimm_checked(imm,&armval); output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval); } @@ -1120,7 +1134,6 @@ void emit_addimm(u_int rs,int imm,u_int rt) assert(rs<16); assert(rt<16); if(imm!=0) { - assert(imm>-65536&&imm<65536); u_int armval; if(genimm(imm,&armval)) { assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm); @@ -1129,11 +1142,13 @@ void emit_addimm(u_int rs,int imm,u_int rt) assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],imm); output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval); }else if(imm<0) { + assert(imm>-65536); assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00); assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF); output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8)); output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0)); }else{ + assert(imm<65536); assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00); assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF); output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8)); @@ -1311,6 +1326,14 @@ void emit_shlimm(int rs,u_int imm,int rt) output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7)); } +void emit_lsls_imm(int rs,int imm,int rt) +{ + assert(imm>0); + assert(imm<32); + assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7)); +} + void emit_shrimm(int rs,u_int imm,int rt) { assert(imm>0); @@ -1370,6 +1393,17 @@ void emit_signextend16(int rs,int rt) #endif } +void emit_signextend8(int rs,int rt) +{ + #ifdef ARMv5_ONLY + emit_shlimm(rs,24,rt); + emit_sarimm(rt,24,rt); + #else + assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]); + output_w32(0xe6af0070|rd_rn_rm(rt,0,rs)); + #endif +} + void emit_shl(u_int rs,u_int shift,u_int rt) { assert(rs<16); @@ -1442,10 +1476,10 @@ void emit_cmpimm(int rs,int imm) { u_int armval; if(genimm(imm,&armval)) { - assem_debug("cmp %s,$%d\n",regname[rs],imm); + assem_debug("cmp %s,#%d\n",regname[rs],imm); output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval); }else if(genimm(-imm,&armval)) { - assem_debug("cmn %s,$%d\n",regname[rs],imm); + assem_debug("cmn %s,#%d\n",regname[rs],imm); output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval); }else if(imm>0) { assert(imm<65536); @@ -1743,8 +1777,9 @@ void emit_popreg(u_int r) } void emit_callreg(u_int r) { - assem_debug("call *%%%s\n",regname[r]); - assert(0); + assert(r<15); + assem_debug("blx %s\n",regname[r]); + output_w32(0xe12fff30|r); } void emit_jmpreg(u_int r) { @@ -1767,6 +1802,31 @@ void emit_readword_dualindexedx4(int rs1, int rs2, int rt) assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]); output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100); } +void emit_ldrcc_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2)); +} +void emit_ldrccb_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2)); +} +void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2)); +} +void emit_ldrcch_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2)); +} +void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2)); +} void emit_readword_indexed_tlb(int addr, int rs, int map, int rt) { if(map<0) emit_readword_indexed(addr, rs, rt); @@ -1982,6 +2042,21 @@ void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp) } } } +void emit_strcc_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2)); +} +void emit_strccb_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2)); +} +void emit_strcch_dualindexed(int rs1, int rs2, int rt) +{ + assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2)); +} void emit_writeword(int rt, int addr) { u_int offset = addr-(u_int)&dynarec_local; @@ -2606,6 +2681,23 @@ emit_extjump_ds(int addr, int target) #include "pcsxmem_inline.c" #endif +// trashes r2 +static void pass_args(int a0, int a1) +{ + if(a0==1&&a1==0) { + // must swap + emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0); + } + else if(a0!=0&&a1==0) { + emit_mov(a1,1); + if (a0>=0) emit_mov(a0,0); + } + else { + if(a0>=0&&a0!=0) emit_mov(a0,0); + if(a1>=0&&a1!=1) emit_mov(a1,1); + } +} + do_readstub(int n) { assem_debug("do_readstub %x\n",start+stubs[n][3]*4); @@ -2628,6 +2720,73 @@ do_readstub(int n) rt=get_reg(i_regmap,rt1[i]); } assert(rs>=0); +#ifdef PCSX + int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0; + reglist|=(1<=0) + reglist&=~(1<=0&&rt1[i]!=0)) { + switch(type) { + case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break; + case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break; + case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break; + case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break; + case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break; + } + } + if(regs_saved) { + restore_jump=(int)out; + emit_jcc(0); // jump to reg restore + } + else + emit_jcc(stubs[n][2]); // return address + + if(!regs_saved) + save_regs(reglist); + int handler=0; + if(type==LOADB_STUB||type==LOADBU_STUB) + handler=(int)jump_handler_read8; + if(type==LOADH_STUB||type==LOADHU_STUB) + handler=(int)jump_handler_read16; + if(type==LOADW_STUB) + handler=(int)jump_handler_read32; + assert(handler!=0); + pass_args(rs,temp2); + int cc=get_reg(i_regmap,CCREG); + if(cc<0) + emit_loadreg(CCREG,2); + emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*stubs[n][6]+2,2); + emit_call(handler); + if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) { + switch(type) { + case LOADB_STUB: emit_signextend8(0,rt); break; + case LOADBU_STUB: emit_andimm(0,0xff,rt); break; + case LOADH_STUB: emit_signextend16(0,rt); break; + case LOADHU_STUB: emit_andimm(0,0xffff,rt); break; + case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break; + } + } + if(restore_jump) + set_jump_target(restore_jump,(int)out); + restore_regs(reglist); + emit_jmp(stubs[n][2]); // return address +#else // !PCSX if(addr<0) addr=rt; if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1); assert(addr>=0); @@ -2646,12 +2805,14 @@ do_readstub(int n) emit_writeword(rs,(int)&address); //emit_pusha(); save_regs(reglist); +#ifndef PCSX ds=i_regs!=®s[i]; int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]); u_int cmask=ds?-1:(0x100f|~i_regs->wasconst); if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<>12]; + if((l1&(1<<31))==0) { + u_int v=l1<<1; + *addr_host=v+addr; + return 0; + } + else { + l1<<=1; + if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB) + l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)]; + else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB) + l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2]; + else + l2=((u_int *)l1)[(addr&0xfff)/4]; + if((l2&(1<<31))==0) { + u_int v=l2<<1; + *addr_host=v+(addr&0xfff); + return 0; + } + return l2<<1; + } } +#endif inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist) { @@ -2713,6 +2904,63 @@ inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, i int rt=get_reg(regmap,target); if(rs<0) rs=get_reg(regmap,-1); assert(rs>=0); +#ifdef PCSX + u_int handler,host_addr=0; + if(pcsx_direct_read(type,addr,target?rs:-1,rt)) + return; + handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr); + if (handler==0) { + if(rt<0) + return; + if(target==0||addr!=host_addr) + emit_movimm(host_addr,rs); + switch(type) { + case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break; + case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break; + case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break; + case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break; + case LOADW_STUB: emit_readword_indexed(0,rs,rt); break; + default: assert(0); + } + return; + } + + // call a memhandler + if(rt>=0) + reglist&=~(1<=33554432) { + // unreachable memhandler, a plugin func perhaps + emit_movimm(handler,1); + emit_callreg(1); + } + else + emit_call(handler); + if(rt>=0) { + switch(type) { + case LOADB_STUB: emit_signextend8(0,rt); break; + case LOADBU_STUB: emit_andimm(0,0xff,rt); break; + case LOADH_STUB: emit_signextend16(0,rt); break; + case LOADHU_STUB: emit_andimm(0,0xffff,rt); break; + case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break; + default: assert(0); + } + } + restore_regs(reglist); +#else // if !PCSX int ftable=0; if(type==LOADB_STUB||type==LOADBU_STUB) ftable=(int)readmemb; @@ -2725,15 +2973,23 @@ inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, i ftable=(int)readmemd; #endif assert(ftable!=0); -#ifdef PCSX - if(pcsx_direct_read(type,addr,target?rs:-1,rt)) - return; -#endif if(target==0) emit_movimm(addr,rs); emit_writeword(rs,(int)&address); //emit_pusha(); save_regs(reglist); +#ifndef PCSX + if((signed int)addr>=(signed int)0xC0000000) { + // Theoretically we can have a pagefault here, if the TLB has never + // been enabled and the address is outside the range 80000000..BFFFFFFF + // Write out the registers so the pagefault can be handled. This is + // a very rare case and likely represents a bug. + int ds=regmap!=regs[i].regmap; + if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i); + if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty); + else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty); + } +#endif //emit_shrimm(rs,16,1); int cc=get_reg(regmap,CCREG); if(cc<0) { @@ -2784,6 +3040,7 @@ inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, i if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth); } } +#endif // !PCSX } do_writestub(int n) @@ -2809,6 +3066,68 @@ do_writestub(int n) } assert(rs>=0); assert(rt>=0); +#ifdef PCSX + int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra; + int reglist2=reglist|(1<=0); int ftable=0; @@ -2842,12 +3161,14 @@ do_writestub(int n) } //emit_pusha(); save_regs(reglist); +#ifndef PCSX ds=i_regs!=®s[i]; int real_rs=get_reg(i_regmap,rs1[i]); u_int cmask=ds?-1:(0x100f|~i_regs->wasconst); if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<=0); assert(rt>=0); #ifdef PCSX + u_int handler,host_addr=0; if(pcsx_direct_write(type,addr,rs,rt,regmap)) return; -#endif + handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr); + if (handler==0) { + if(target==0||addr!=host_addr) + emit_movimm(host_addr,rs); + switch(type) { + case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break; + case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break; + case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break; + default: assert(0); + } + return; + } + + // call a memhandler + save_regs(reglist); + pass_args(target!=0?rs:-1,rt); + if(target==0) + emit_movimm(addr,0); + int cc=get_reg(regmap,CCREG); + if(cc<0) + emit_loadreg(CCREG,2); + emit_addimm(cc<0?2:cc,CLOCK_DIVIDER*(adj+1),2); + emit_movimm(handler,3); + // returns new cycle_count + emit_call((int)jump_handler_write_h); + emit_addimm(0,-CLOCK_DIVIDER*(adj+1),cc<0?2:cc); + if(cc<0) + emit_storereg(CCREG,2); + restore_regs(reglist); +#else // if !pcsx int ftable=0; if(type==STOREB_STUB) ftable=(int)writememb; @@ -2922,6 +3274,19 @@ inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, } //emit_pusha(); save_regs(reglist); +#ifndef PCSX + // rearmed note: load_all_consts prevents BIOS boot, some bug? + if((signed int)addr>=(signed int)0xC0000000) { + // Theoretically we can have a pagefault here, if the TLB has never + // been enabled and the address is outside the range 80000000..BFFFFFFF + // Write out the registers so the pagefault can be handled. This is + // a very rare case and likely represents a bug. + int ds=regmap!=regs[i].regmap; + if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i); + if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty); + else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty); + } +#endif //emit_shrimm(rs,16,1); int cc=get_reg(regmap,CCREG); if(cc<0) { @@ -2952,6 +3317,7 @@ inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, } //emit_popa(); restore_regs(reglist); +#endif } do_unalignedwritestub(int n) @@ -2975,16 +3341,33 @@ do_unalignedwritestub(int n) reglist|=(1<wasconst); if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<>rs1[i])&1) speculate_mov(rs1[i],rt1[i]); + else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]); + else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]); + else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]); + else { + smrv_strong_next&=~(1<=0) { + if(get_final_value(hr,i,&value)) + smrv[rt1[i]]=value; + else smrv[rt1[i]]=constmap[i][hr]; + smrv_strong_next|=1<>rs1[i])&1) speculate_mov(rs1[i],rt1[i]); + else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]); + } + break; + case LOAD: + if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) { + // special case for BIOS + smrv[rt1[i]]=0xa0000000; + smrv_strong_next|=1<>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst); +#endif +} + +enum { + MTYPE_8000 = 0, + MTYPE_8020, + MTYPE_0000, + MTYPE_A000, + MTYPE_1F80, +}; + +static int get_ptr_mem_type(u_int a) +{ + if(a < 0x00200000) { + if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0)) + // return wrong, must use memhandler for BIOS self-test to pass + // 007 does similar stuff from a00 mirror, weird stuff + return MTYPE_8000; + return MTYPE_0000; + } + if(0x1f800000 <= a && a < 0x1f801000) + return MTYPE_1F80; + if(0x80200000 <= a && a < 0x80800000) + return MTYPE_8020; + if(0xa0000000 <= a && a < 0xa0200000) + return MTYPE_A000; + return MTYPE_8000; +} +#endif + +static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override) +{ + int jaddr,type=0; + +#ifdef PCSX + int mr=rs1[i]; + if(((smrv_strong|smrv_weak)>>mr)&1) { + type=get_ptr_mem_type(smrv[mr]); + //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type); + } + else { + // use the mirror we are running on + type=get_ptr_mem_type(start); + //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type); + } + + if(type==MTYPE_8020) { // RAM 80200000+ mirror + emit_andimm(addr,~0x00e00000,HOST_TEMPREG); + addr=*addr_reg_override=HOST_TEMPREG; + type=0; + } + else if(type==MTYPE_0000) { // RAM 0 mirror + emit_orimm(addr,0x80000000,HOST_TEMPREG); + addr=*addr_reg_override=HOST_TEMPREG; + type=0; + } + else if(type==MTYPE_A000) { // RAM A mirror + emit_andimm(addr,~0x20000000,HOST_TEMPREG); + addr=*addr_reg_override=HOST_TEMPREG; + type=0; + } + else if(type==MTYPE_1F80) { // scratchpad + emit_addimm(addr,-0x1f800000,HOST_TEMPREG); + emit_cmpimm(HOST_TEMPREG,0x1000); + jaddr=(int)out; + emit_jc(0); + } +#endif + + if(type==0) + { + emit_cmpimm(addr,RAM_SIZE); + jaddr=(int)out; + #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK + // Hint to branch predictor that the branch is unlikely to be taken + if(rs1[i]>=28) + emit_jno_unlikely(0); + else + #endif + emit_jno(0); + } + + return jaddr; +} + #define shift_assemble shift_assemble_arm void loadlr_assemble_arm(int i,struct regstat *i_regs) @@ -3322,7 +3882,8 @@ void loadlr_assemble_arm(int i,struct regstat *i_regs) int s,th,tl,temp,temp2,addr,map=-1; int offset; int jaddr=0; - int memtarget,c=0; + int memtarget=0,c=0; + int fastload_reg_override=0; u_int hr,reglist=0; th=get_reg(i_regs->regmap,rt1[i]|64); tl=get_reg(i_regs->regmap,rt1[i]); @@ -3340,8 +3901,10 @@ void loadlr_assemble_arm(int i,struct regstat *i_regs) else addr=s; if(s>=0) { c=(i_regs->wasconst>>s)&1; - memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE; - if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1; + if(c) { + memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE; + if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1; + } } if(!using_tlb) { if(!c) { @@ -3355,9 +3918,7 @@ void loadlr_assemble_arm(int i,struct regstat *i_regs) }else{ emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR } - emit_cmpimm(addr,RAM_SIZE); - jaddr=(int)out; - emit_jno(0); + jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override); } else { if (opcode[i]==0x22||opcode[i]==0x26) { @@ -3377,6 +3938,7 @@ void loadlr_assemble_arm(int i,struct regstat *i_regs) } map=get_reg(i_regs->regmap,TLREG); assert(map>=0); + reglist&=~(1<regmap,FTEMP|64); if(!c||memtarget) { //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h); @@ -3749,6 +4313,7 @@ void c2op_assemble(int i,struct regstat *i_regs) signed char temp=get_reg(i_regs->regmap,-1); u_int c2op=source[i]&0x3f; u_int hr,reglist=0; + int need_flags; for(hr=0;hrregmap[hr]>=0) reglist|=1<>63); // +1 because of how liveness detection works + assem_debug("gte unneeded %016llx, need_flags %d\n",gte_unneeded[i+1],need_flags); +#ifdef ARMv5_ONLY + // let's take more risk here + need_flags=need_flags&>e_reads_flags; +#endif + emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op])); } if(i>=slen-1||itype[i+1]!=C2OP) @@ -4451,7 +5022,9 @@ void multdiv_assemble_arm(int i,struct regstat *i_regs) assert(quotient>=0); assert(remainder>=0); emit_movs(d1,remainder); - emit_negmi(remainder,remainder); + emit_movimm(0xffffffff,quotient); + emit_negmi(quotient,quotient); // .. quotient and .. + emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump) emit_movs(d2,HOST_TEMPREG); emit_jeq((int)out+52); // Division by zero emit_negmi(HOST_TEMPREG,HOST_TEMPREG); @@ -4479,12 +5052,13 @@ void multdiv_assemble_arm(int i,struct regstat *i_regs) signed char remainder=get_reg(i_regs->regmap,HIREG); assert(quotient>=0); assert(remainder>=0); + emit_mov(d1,remainder); + emit_movimm(0xffffffff,quotient); // div0 case emit_test(d2,d2); - emit_jeq((int)out+44); // Division by zero + emit_jeq((int)out+40); // Division by zero emit_clz(d2,HOST_TEMPREG); emit_movimm(1<<31,quotient); emit_shl(d2,HOST_TEMPREG,d2); - emit_mov(d1,remainder); emit_shr(quotient,HOST_TEMPREG,quotient); emit_cmp(remainder,d2); emit_subcs(remainder,d2,remainder); @@ -4494,6 +5068,7 @@ void multdiv_assemble_arm(int i,struct regstat *i_regs) } } else // 64-bit +#ifndef FORCE32 { if(opcode2[i]==0x1C) // DMULT { @@ -4667,6 +5242,9 @@ void multdiv_assemble_arm(int i,struct regstat *i_regs) if(lol>=0) emit_loadreg(LOREG,lol); } } +#else + assert(0); +#endif } else { @@ -4761,7 +5339,7 @@ void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty, if(hr!=EXCLUDE_REG) { reg=pre[hr]; if(((~u)>>(reg&63))&1) { - if(reg==entry[hr]||(reg>0&&entry[hr]<0)) { + if(reg>0) { if(((dirty_pre&~dirty)>>hr)&1) { if(reg>0&®<34) { emit_storereg(reg,hr); @@ -4775,21 +5353,6 @@ void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty, } } } - else // Check if register moved to a different register - if((new_hr=get_reg(entry,reg))>=0) { - if((dirty_pre>>hr)&(~dirty>>new_hr)&1) { - if(reg>0&®<34) { - emit_storereg(reg,hr); - if( ((is32_pre&~uu)>>reg)&1 ) { - emit_sarimm(hr,31,HOST_TEMPREG); - emit_storereg(reg|64,HOST_TEMPREG); - } - } - else if(reg>=64) { - emit_storereg(reg,hr); - } - } - } } } }