From 665f33e1e8ce2e40a7939a33075c3bce1c90790c Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 30 Nov 2012 02:53:25 +0200 Subject: [PATCH] improve ARM feature detection --- configure | 12 ---- include/arm_features.h | 28 ++++++++++ jni/Android.mk | 6 -- libpcsxcore/{gte_arm.s => gte_arm.S} | 43 ++++++++++----- libpcsxcore/new_dynarec/assem_arm.c | 79 +++++++++++++++++++-------- libpcsxcore/new_dynarec/assem_arm.h | 1 - libpcsxcore/new_dynarec/emu_if.c | 4 +- libpcsxcore/new_dynarec/linkage_arm.S | 10 ++++ plugins/dfsound/arm_utils.S | 3 +- plugins/dfsound/spu.c | 3 +- plugins/gpu_unai/gpu_fixedpoint.h | 4 +- 11 files changed, 132 insertions(+), 61 deletions(-) create mode 100644 include/arm_features.h rename libpcsxcore/{gte_arm.s => gte_arm.S} (97%) diff --git a/configure b/configure index 4cff8980..053dba90 100755 --- a/configure +++ b/configure @@ -231,12 +231,6 @@ if [ "$ARCH" = "arm" ]; then if check_define __thumb__; then CFLAGS="$CFLAGS -marm" fi - - if [ "$have_armv7" = "yes" ]; then - ASFLAGS="$ASFLAGS --defsym HAVE_ARMV7=1" - else - ASFLAGS="$ASFLAGS --defsym HAVE_ARMV7=0" - fi else # dynarec only available on ARM enable_dynarec="no" @@ -439,12 +433,6 @@ echo "PLATFORM = $platform" >> $config_mak echo "BUILTIN_GPU = $builtin_gpu" >> $config_mak echo "SOUND_DRIVERS = $sound_drivers" >> $config_mak echo "PLUGINS = $plugins" >> $config_mak -if [ "$have_armv6" = "yes" ]; then - echo "HAVE_ARMV6 = 1" >> $config_mak -fi -if [ "$have_armv7" = "yes" ]; then - echo "HAVE_ARMV7 = 1" >> $config_mak -fi if [ "$have_arm_neon" = "yes" ]; then echo "HAVE_NEON = 1" >> $config_mak fi diff --git a/include/arm_features.h b/include/arm_features.h new file mode 100644 index 00000000..dcdda917 --- /dev/null +++ b/include/arm_features.h @@ -0,0 +1,28 @@ +#ifndef __ARM_FEATURES_H__ +#define __ARM_FEATURES_H__ + +#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ + || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ + || defined(__ARM_ARCH_7EM__) + +#define HAVE_ARMV7 +#define HAVE_ARMV6 +#define HAVE_ARMV5 + +#elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ + || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \ + || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) + +#define HAVE_ARMV6 +#define HAVE_ARMV5 + +#elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5E__) \ + || defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__) + +#define HAVE_ARMV5 + +#endif + +/* no need for HAVE_NEON - GCC defines __ARM_NEON__ consistently */ + +#endif /* __ARM_FEATURES_H__ */ diff --git a/jni/Android.mk b/jni/Android.mk index 98f59f83..60db810e 100644 --- a/jni/Android.mk +++ b/jni/Android.mk @@ -19,12 +19,6 @@ endif ifeq ($(TARGET_ARCH),arm) LOCAL_ARM_MODE := arm - ifeq ($(TARGET_ARCH_EABI),armeabi-v7a) - LOCAL_CFLAGS += -DHAVE_ARMV7=1 - else - LOCAL_CFLAGS += -DHAVE_ARMV7=0 - endif - LOCAL_CFLAGS += -DANDROID_ARM LOCAL_SRC_FILES += ../libpcsxcore/gte_arm.S diff --git a/libpcsxcore/gte_arm.s b/libpcsxcore/gte_arm.S similarity index 97% rename from libpcsxcore/gte_arm.s rename to libpcsxcore/gte_arm.S index 8700f69c..e711e829 100644 --- a/libpcsxcore/gte_arm.s +++ b/libpcsxcore/gte_arm.S @@ -5,37 +5,37 @@ * See the COPYING file in the top-level directory. */ -/* .equiv HAVE_ARMV7, 1 */ +#include "arm_features.h" .text .align 2 .macro sgnxt16 rd rs -.if HAVE_ARMV7 +#ifdef HAVE_ARMV7 sxth \rd, \rs -.else +#else lsl \rd, \rs, #16 asr \rd, \rd, #16 -.endif +#endif .endm @ prepare work reg for ssatx @ in: wr reg, bit to saturate to .macro ssatx_prep wr bit -.if !HAVE_ARMV7 +#ifndef HAVE_ARMV7 mov \wr, #(1<<(\bit-1)) -.endif +#endif .endm .macro ssatx rd wr bit -.if HAVE_ARMV7 +#ifdef HAVE_ARMV7 ssat \rd, #\bit, \rd -.else +#else cmp \rd, \wr subge \rd, \wr, #1 cmn \rd, \wr rsblt \rd, \wr, #0 -.endif +#endif .endm @ prepare work reg for ssatx0 (sat to 0..2^(bit-1)) @@ -52,17 +52,19 @@ .endm .macro usat16_ rd rs -.if HAVE_ARMV7 +#ifdef HAVE_ARMV7 usat \rd, #16, \rs -.else +#else subs \rd, \rs, #0 movlt \rd, #0 cmp \rd, #0x10000 movge \rd, #0x0ff00 orrge \rd, #0x000ff -.endif +#endif .endm +#ifdef HAVE_ARMV5 + .macro udiv_ rd rm rs lsl \rm, #16 clz \rd, \rs @@ -408,6 +410,7 @@ gteMVMVA_part_cv3sh12_arm: bx lr .size gteMVMVA_part_cv3sh12_arm, .-gteMVMVA_part_cv3sh12_arm +#endif /* HAVE_ARMV5 */ .global gteNCLIP_arm @ r0=CP2 (d,c), gteNCLIP_arm: @@ -430,12 +433,12 @@ gteNCLIP_arm: movs r2, r1, lsl #1 adc r5, r5 cmp r5, #0 -.if HAVE_ARMV7 +#ifdef HAVE_ARMV7 movtgt lr, #((1<<31)|(1<<16))>>16 -.else +#else movgt lr, #(1<<31) orrgt lr, #(1<<16) -.endif +#endif cmn r5, #1 orrmi lr, r6 str r1, [r0, #4*24] @@ -460,7 +463,12 @@ gteNCLIP_arm: rsblt r2, r1, #0 .endif str r2, [r0, #4*9] +#ifdef HAVE_ARMV5 ldrd r2, [r0, #4*26] @ gteMAC23 +#else + ldr r2, [r0, #4*26] + ldr r3, [r0, #4*27] +#endif orrlt r12, #(1<<31)|(1<<24) cmp r2, r1 subge r2, r1, #1 @@ -486,7 +494,12 @@ gteNCLIP_arm: rsblt r3, r1, #0 .endif orrlt r12, #1<<22 +#ifdef HAVE_ARMV5 strd r2, [r0, #4*10] @ gteIR23 +#else + str r2, [r0, #4*10] + str r3, [r0, #4*11] +#endif str r12,[r0, #4*(32+31)] @ gteFLAG bx lr .endm diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 77cfafa7..51cceec4 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -28,6 +28,7 @@ #include "../gte_neon.h" #include "pcnt.h" #endif +#include "arm_features.h" #if !BASE_ADDR_FIXED char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096))); @@ -223,7 +224,7 @@ int get_pointer(void *stub) u_int get_clean_addr(int addr) { int *ptr=(int *)addr; - #ifdef ARMv5_ONLY + #ifndef HAVE_ARMV7 ptr+=4; #else ptr+=6; @@ -240,7 +241,7 @@ u_int get_clean_addr(int addr) int verify_dirty(int addr) { u_int *ptr=(u_int *)addr; - #ifdef ARMv5_ONLY + #ifndef HAVE_ARMV7 // get from literal pool assert((*ptr&0xFFFF0000)==0xe59f0000); u_int offset=*ptr&0xfff; @@ -279,7 +280,7 @@ int verify_dirty(int addr) // guarantees that it's not dirty int isclean(int addr) { - #ifdef ARMv5_ONLY + #ifndef HAVE_ARMV7 int *ptr=((u_int *)addr)+4; #else int *ptr=((u_int *)addr)+6; @@ -296,7 +297,7 @@ int isclean(int addr) void get_bounds(int addr,u_int *start,u_int *end) { u_int *ptr=(u_int *)addr; - #ifdef ARMv5_ONLY + #ifndef HAVE_ARMV7 // get from literal pool assert((*ptr&0xFFFF0000)==0xe59f0000); u_int offset=*ptr&0xfff; @@ -1005,7 +1006,7 @@ void emit_movimm(u_int imm,u_int rt) assem_debug("mvn %s,#%d\n",regname[rt],imm); output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval); }else if(imm<65536) { - #ifdef ARMv5_ONLY + #ifndef HAVE_ARMV7 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00); output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8)); assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF); @@ -1014,7 +1015,7 @@ void emit_movimm(u_int imm,u_int rt) emit_movw(imm,rt); #endif }else{ - #ifdef ARMv5_ONLY + #ifndef HAVE_ARMV7 emit_loadlp(imm,rt); #else emit_movw(imm&0x0000FFFF,rt); @@ -1278,7 +1279,7 @@ void emit_andimm(int rs,int imm,int rt) assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm); output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval); }else if(imm==65535) { - #ifdef ARMv5_ONLY + #ifndef HAVE_ARMV7 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]); output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF); assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]); @@ -1289,7 +1290,7 @@ void emit_andimm(int rs,int imm,int rt) #endif }else{ assert(imm>0&&imm<65535); - #ifdef ARMv5_ONLY + #ifndef HAVE_ARMV7 assem_debug("mov r14,#%d\n",imm&0xFF00); output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8)); assem_debug("add r14,r14,#%d\n",imm&0xFF); @@ -1353,6 +1354,14 @@ void emit_lsls_imm(int rs,int imm,int rt) output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7)); } +void emit_lslpls_imm(int rs,int imm,int rt) +{ + assert(imm>0); + assert(imm<32); + assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7)); +} + void emit_shrimm(int rs,u_int imm,int rt) { assert(imm>0); @@ -1403,7 +1412,7 @@ void emit_shrdimm(int rs,int rs2,u_int imm,int rt) void emit_signextend16(int rs,int rt) { - #ifdef ARMv5_ONLY + #ifndef HAVE_ARMV7 emit_shlimm(rs,16,rt); emit_sarimm(rt,16,rt); #else @@ -1414,7 +1423,7 @@ void emit_signextend16(int rs,int rt) void emit_signextend8(int rs,int rt) { - #ifdef ARMv5_ONLY + #ifndef HAVE_ARMV7 emit_shlimm(rs,24,rt); emit_sarimm(rt,24,rt); #else @@ -1502,20 +1511,12 @@ void emit_cmpimm(int rs,int imm) output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval); }else if(imm>0) { assert(imm<65536); - #ifdef ARMv5_ONLY emit_movimm(imm,HOST_TEMPREG); - #else - emit_movw(imm,HOST_TEMPREG); - #endif assem_debug("cmp %s,r14\n",regname[rs]); output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG)); }else{ assert(imm>-65536); - #ifdef ARMv5_ONLY emit_movimm(-imm,HOST_TEMPREG); - #else - emit_movw(-imm,HOST_TEMPREG); - #endif assem_debug("cmn %s,r14\n",regname[rs]); output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG)); } @@ -2295,7 +2296,7 @@ void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt) output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval); } else { - #ifdef ARMv5_ONLY + #ifndef HAVE_ARMV7 emit_movimm(imm1,rt); add_literal((int)out,imm2); assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2); @@ -2586,6 +2587,14 @@ void emit_andne_imm(int rs,int imm,int rt) output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval); } +void emit_addpl_imm(int rs,int imm,int rt) +{ + u_int armval; + genimm_checked(imm,&armval); + assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm); + output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval); +} + void emit_jno_unlikely(int a) { //emit_jno(a); @@ -3565,7 +3574,7 @@ int do_dirty_stub(int i) addr=(u_int)source; #endif // Careful about the code output here, verify_dirty needs to parse it. - #ifdef ARMv5_ONLY + #ifndef HAVE_ARMV7 emit_loadlp(addr,1); emit_loadlp((int)copy,2); emit_loadlp(slen*4,3); @@ -3588,7 +3597,7 @@ int do_dirty_stub(int i) void do_dirty_stub_ds() { // Careful about the code output here, verify_dirty needs to parse it. - #ifdef ARMv5_ONLY + #ifndef HAVE_ARMV7 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1); emit_loadlp((int)copy,2); emit_loadlp(slen*4,3); @@ -4391,7 +4400,16 @@ static void cop2_put_dreg(u_int copr,signed char sl,signed char temp) case 30: emit_movs(sl,temp); emit_mvnmi(temp,temp); +#ifdef HAVE_ARMV5 emit_clz(temp,temp); +#else + emit_movs(temp,HOST_TEMPREG); + emit_movimm(0,temp); + emit_jeq((int)out+4*4); + emit_addpl_imm(temp,1,temp); + emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG); + emit_jns((int)out-2*4); +#endif emit_writeword(sl,(int)®_cop2d[30]); emit_writeword(temp,(int)®_cop2d[31]); break; @@ -4513,6 +4531,7 @@ static void c2op_assemble(int i,struct regstat *i_regs) int lm = (source[i] >> 10) & 1; switch(c2op) { #ifndef DRC_DBG +#ifdef HAVE_ARMV5 case GTE_MVMVA: { int v = (source[i] >> 15) & 3; int cv = (source[i] >> 13) & 3; @@ -4555,6 +4574,7 @@ static void c2op_assemble(int i,struct regstat *i_regs) #endif break; } +#endif /* HAVE_ARMV5 */ case GTE_OP: c2op_prologue(c2op,reglist); emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift)); @@ -5293,8 +5313,15 @@ void multdiv_assemble_arm(int i,struct regstat *i_regs) emit_movs(d2,HOST_TEMPREG); emit_jeq((int)out+52); // Division by zero emit_negmi(HOST_TEMPREG,HOST_TEMPREG); +#ifdef HAVE_ARMV5 emit_clz(HOST_TEMPREG,quotient); emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG); +#else + emit_movimm(0,quotient); + emit_addpl_imm(quotient,1,quotient); + emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG); + emit_jns((int)out-2*4); +#endif emit_orimm(quotient,1<<31,quotient); emit_shr(quotient,quotient,quotient); emit_cmp(remainder,HOST_TEMPREG); @@ -5321,9 +5348,17 @@ void multdiv_assemble_arm(int i,struct regstat *i_regs) emit_movimm(0xffffffff,quotient); // div0 case emit_test(d2,d2); emit_jeq((int)out+40); // Division by zero +#ifdef HAVE_ARMV5 emit_clz(d2,HOST_TEMPREG); emit_movimm(1<<31,quotient); emit_shl(d2,HOST_TEMPREG,d2); +#else + emit_movimm(0,HOST_TEMPREG); + emit_addpl_imm(d2,1,d2); + emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG); + emit_jns((int)out-2*4); + emit_movimm(1<<31,quotient); +#endif emit_shr(quotient,HOST_TEMPREG,quotient); emit_cmp(remainder,d2); emit_subcs(remainder,d2,remainder); @@ -5554,7 +5589,7 @@ void do_miniht_jump(int rs,int rh,int ht) { } void do_miniht_insert(u_int return_address,int rt,int temp) { - #ifdef ARMv5_ONLY + #ifndef HAVE_ARMV7 emit_movimm(return_address,rt); // PC into link register add_to_linker((int)out,return_address,1); emit_pcreladdr(temp); diff --git a/libpcsxcore/new_dynarec/assem_arm.h b/libpcsxcore/new_dynarec/assem_arm.h index f4e36a95..22546386 100644 --- a/libpcsxcore/new_dynarec/assem_arm.h +++ b/libpcsxcore/new_dynarec/assem_arm.h @@ -17,7 +17,6 @@ #define RAM_SIZE 0x200000 #ifndef __ARM_ARCH_7A__ -#define ARMv5_ONLY //#undef CORTEX_A8_BRANCH_PREDICTION_HACK //#undef USE_MINI_HT #endif diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index 588bc631..b8e98836 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -268,9 +268,9 @@ static int ari64_init() if (psxCP2[i] != psxNULL) gte_handlers[i] = psxCP2[i]; -#if !defined(DRC_DBG) -#ifdef __arm__ +#if defined(__arm__) && !defined(DRC_DBG) gte_handlers[0x06] = gteNCLIP_arm; +#ifdef HAVE_ARMV5 gte_handlers_nf[0x01] = gteRTPS_nf_arm; gte_handlers_nf[0x30] = gteRTPT_nf_arm; #endif diff --git a/libpcsxcore/new_dynarec/linkage_arm.S b/libpcsxcore/new_dynarec/linkage_arm.S index 47480789..5b707455 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.S +++ b/libpcsxcore/new_dynarec/linkage_arm.S @@ -19,6 +19,9 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ +#include "arm_features.h" + + .global dynarec_local .global reg .global hi @@ -163,6 +166,13 @@ FCR31 = align0 .type FCR31, %object .size FCR31, 4 +#ifndef HAVE_ARMV5 +.macro blx rd + mov lr, pc + bx \rd +.endm +#endif + .macro load_varadr reg var #if defined(__ARM_ARCH_7A__) && !defined(__PIC__) movw \reg, #:lower16:\var diff --git a/plugins/dfsound/arm_utils.S b/plugins/dfsound/arm_utils.S index 22e58443..f4ef243a 100644 --- a/plugins/dfsound/arm_utils.S +++ b/plugins/dfsound/arm_utils.S @@ -8,6 +8,7 @@ * See the COPYING file in the top-level directory. */ +#include "arm_features.h" .text .align 2 @@ -100,7 +101,7 @@ mcr_finish: vstmiage r3!, {d8} bx lr -#else +#elif defined(HAVE_ARMV5) .global mix_chan @ (int start, int count, int lv, int rv) mix_chan: diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index 45a78869..24665698 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -25,6 +25,7 @@ #include "externals.h" #include "registers.h" #include "out.h" +#include "arm_features.h" #ifdef ENABLE_NLS #include @@ -615,7 +616,7 @@ static int do_samples_noise(int ch, int ns, int ns_to) return ret; } -#ifdef __arm__ +#ifdef HAVE_ARMV5 // asm code; lv and rv must be 0-3fff extern void mix_chan(int start, int count, int lv, int rv); extern void mix_chan_rvb(int start, int count, int lv, int rv); diff --git a/plugins/gpu_unai/gpu_fixedpoint.h b/plugins/gpu_unai/gpu_fixedpoint.h index 03f07d21..e72fda12 100644 --- a/plugins/gpu_unai/gpu_fixedpoint.h +++ b/plugins/gpu_unai/gpu_fixedpoint.h @@ -21,6 +21,8 @@ #ifndef FIXED_H #define FIXED_H +#include "arm_features.h" + typedef s32 fixed; #ifdef GPU_TABLE_10_BITS @@ -55,7 +57,7 @@ INLINE u32 Log2(u32 _a) } */ -#ifdef __arm__ +#ifdef HAVE_ARMV5 INLINE u32 Log2(u32 x) { u32 res; asm("clz %0,%1" : "=r" (res) : "r" (x)); return 32-res; } #else INLINE u32 Log2(u32 x) { u32 i = 0; for ( ; x > 0; ++i, x >>= 1); return i - 1; } -- 2.39.2