From 59774ed0120d20c731ee20da88ba6356d184dc8a Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 25 Sep 2011 16:47:14 +0300 Subject: [PATCH] gte: split arm code for pollux, generate flagless handlers flagless are unused but will be used later, at least I hope so. --- Makefile | 10 ++++- libpcsxcore/gte.c | 39 ++++++++++++++---- libpcsxcore/gte.h | 27 +++++++++++++ libpcsxcore/gte_arm.h | 1 + libpcsxcore/gte_arm.s | 68 ++++++++++++++++++++++++++++++++ libpcsxcore/gte_neon.h | 1 - libpcsxcore/gte_neon.s | 39 +----------------- libpcsxcore/new_dynarec/emu_if.c | 28 ++++++++++--- libpcsxcore/new_dynarec/emu_if.h | 1 + 9 files changed, 161 insertions(+), 53 deletions(-) create mode 100644 libpcsxcore/gte_arm.h create mode 100644 libpcsxcore/gte_arm.s diff --git a/Makefile b/Makefile index d942e1cd..25cd3e98 100644 --- a/Makefile +++ b/Makefile @@ -47,7 +47,10 @@ OBJS += libpcsxcore/cdriso.o libpcsxcore/cdrom.o libpcsxcore/cheat.o libpcsxcore libpcsxcore/psxcommon.o libpcsxcore/psxcounters.o libpcsxcore/psxdma.o libpcsxcore/psxhle.o \ libpcsxcore/psxhw.o libpcsxcore/psxinterpreter.o libpcsxcore/psxmem.o libpcsxcore/r3000a.o \ libpcsxcore/sio.o libpcsxcore/socket.o libpcsxcore/spu.o -OBJS += libpcsxcore/gte.o libpcsxcore/gte_divider.o +OBJS += libpcsxcore/gte.o libpcsxcore/gte_nf.o libpcsxcore/gte_divider.o +ifeq "$(ARCH)" "arm" +OBJS += libpcsxcore/gte_arm.o +endif ifeq "$(HAVE_NEON)" "1" OBJS += libpcsxcore/gte_neon.o endif @@ -56,7 +59,6 @@ libpcsxcore/misc.o libpcsxcore/psxbios.o: CFLAGS += -Wno-nonnull # dynarec ifndef NO_NEW_DRC -libpcsxcore/new_dynarec/linkage_arm.o: ASFLAGS += --defsym HAVE_ARMV7=$(HAVE_ARMV7) OBJS += libpcsxcore/new_dynarec/new_dynarec.o libpcsxcore/new_dynarec/linkage_arm.o OBJS += libpcsxcore/new_dynarec/pcsxmem.o endif @@ -149,9 +151,13 @@ ifndef NO_TSLIB frontend/%.o: CFLAGS += -DHAVE_TSLIB OBJS += frontend/pl_gun_ts.o endif +%.o: ASFLAGS += --defsym HAVE_ARMV7=$(HAVE_ARMV7) frontend/%.o: CFLAGS += -DIN_EVDEV frontend/menu.o: frontend/revision.h +libpcsxcore/gte_nf.o: libpcsxcore/gte.c + $(CC) -c -o $@ $^ $(CFLAGS) -DFLAGLESS + frontend/revision.h: FORCE @(git describe || echo) | sed -e 's/.*/#define REV "\0"/' > $@_ @diff -q $@_ $@ > /dev/null 2>&1 || cp $@_ $@ diff --git a/libpcsxcore/gte.c b/libpcsxcore/gte.c index 3002a90b..0acca653 100644 --- a/libpcsxcore/gte.c +++ b/libpcsxcore/gte.c @@ -168,6 +168,8 @@ #define gteop (psxRegs.code & 0x1ffffff) +#ifndef FLAGLESS + static inline s64 BOUNDS(s64 n_value, s64 n_max, int n_maxflag, s64 n_min, int n_minflag) { if (n_value > n_max) { gteFLAG |= n_maxflag; @@ -189,6 +191,31 @@ static inline s32 LIM(s32 value, s32 max, s32 min, u32 flag) { return ret; } +static inline u32 limE(u32 result) { + if (result > 0x1ffff) { + gteFLAG |= (1 << 31) | (1 << 17); + return 0x1ffff; + } + return result; +} + +#else + +#define BOUNDS(a, ...) (a) + +static inline s32 LIM(s32 value, s32 max, s32 min, u32 flag_unused) { + s32 ret = value; + if (value > max) + ret = max; + else if (value < min) + ret = min; + return ret; +} + +#define limE(a) ((a) & 0x1ffff) + +#endif + #define A1(a) BOUNDS((a), 0x7fffffff, (1 << 30), -(s64)0x80000000, (1 << 31) | (1 << 27)) #define A2(a) BOUNDS((a), 0x7fffffff, (1 << 29), -(s64)0x80000000, (1 << 31) | (1 << 26)) #define A3(a) BOUNDS((a), 0x7fffffff, (1 << 28), -(s64)0x80000000, (1 << 31) | (1 << 25)) @@ -200,14 +227,6 @@ static inline s32 LIM(s32 value, s32 max, s32 min, u32 flag) { #define limC3(a) LIM((a), 0x00ff, 0x0000, (1 << 19)) #define limD(a) LIM((a), 0xffff, 0x0000, (1 << 31) | (1 << 18)) -static inline u32 limE(u32 result) { - if (result > 0x1ffff) { - gteFLAG |= (1 << 31) | (1 << 17); - return 0x1ffff; - } - return result; -} - #define F(a) BOUNDS((a), 0x7fffffff, (1 << 31) | (1 << 16), -(s64)0x80000000, (1 << 31) | (1 << 15)) #define limG1(a) LIM((a), 0x3ff, -0x400, (1 << 31) | (1 << 14)) #define limG2(a) LIM((a), 0x3ff, -0x400, (1 << 31) | (1 << 13)) @@ -215,6 +234,8 @@ static inline u32 limE(u32 result) { #include "gte_divider.h" +#ifndef FLAGLESS + static inline u32 MFC2(int reg) { switch (reg) { case 1: @@ -343,6 +364,8 @@ void gteSWC2() { psxMemWrite32(_oB_, MFC2(_Rt_)); } +#endif // FLAGLESS + #if 0 #define DIVIDE DIVIDE_ static u32 DIVIDE_(s16 n, u16 d) { diff --git a/libpcsxcore/gte.h b/libpcsxcore/gte.h index 2cea0264..22f98a10 100644 --- a/libpcsxcore/gte.h +++ b/libpcsxcore/gte.h @@ -19,6 +19,33 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. * ***************************************************************************/ +#ifdef FLAGLESS + +#define gteRTPS gteRTPS_nf +#define gteOP gteOP_nf +#define gteNCLIP gteNCLIP_nf +#define gteDPCS gteDPCS_nf +#define gteINTPL gteINTPL_nf +#define gteMVMVA gteMVMVA_nf +#define gteNCDS gteNCDS_nf +#define gteNCDT gteNCDT_nf +#define gteCDP gteCDP_nf +#define gteNCCS gteNCCS_nf +#define gteCC gteCC_nf +#define gteNCS gteNCS_nf +#define gteNCT gteNCT_nf +#define gteSQR gteSQR_nf +#define gteDCPL gteDCPL_nf +#define gteDPCT gteDPCT_nf +#define gteAVSZ3 gteAVSZ3_nf +#define gteAVSZ4 gteAVSZ4_nf +#define gteRTPT gteRTPT_nf +#define gteGPF gteGPF_nf +#define gteGPL gteGPL_nf +#define gteNCCT gteNCCT_nf + +#endif + #ifndef __GTE_H__ #define __GTE_H__ diff --git a/libpcsxcore/gte_arm.h b/libpcsxcore/gte_arm.h new file mode 100644 index 00000000..69924c8d --- /dev/null +++ b/libpcsxcore/gte_arm.h @@ -0,0 +1 @@ +void gteNCLIP_arm(void *cp2_regs, int opcode); diff --git a/libpcsxcore/gte_arm.s b/libpcsxcore/gte_arm.s new file mode 100644 index 00000000..c8a7279a --- /dev/null +++ b/libpcsxcore/gte_arm.s @@ -0,0 +1,68 @@ +/* + * (C) Gražvydas "notaz" Ignotas, 2011 + * + * This work is licensed under the terms of any of these licenses + * (at your option): + * - GNU GPL, version 2 or later. + * - GNU LGPL, version 2.1 or later. + * See the COPYING file in the top-level directory. + */ + +/* .equiv HAVE_ARMV7, 1 */ + +.text +.align 2 + +.macro sgnxt16 reg +.if HAVE_ARMV7 + sxth \reg, \reg +.else + lsl \reg, \reg, #16 + asr \reg, \reg, #16 +.endif +.endm + + +.global gteNCLIP_arm @ r0=CP2 (d,c), +gteNCLIP_arm: + push {r4-r6,lr} + + add r1, r0, #4*12 + ldmia r1, {r1-r3} + mov r4, r1, asr #16 + mov r5, r2, asr #16 + mov r6, r3, asr #16 + sub r12, r4, r5 @ 3: gteSY0 - gteSY1 + sub r5, r5, r6 @ 1: gteSY1 - gteSY2 + sgnxt16 r1 + smull r1, r5, r1, r5 @ RdLo, RdHi + sub r6, r4 @ 2: gteSY2 - gteSY0 + sgnxt16 r2 + smlal r1, r5, r2, r6 + mov lr, #0 @ gteFLAG + sgnxt16 r3 + smlal r1, r5, r3, r12 + mov r6, #1<<31 + orr r6, #1<<15 + movs r2, r1, lsl #1 + adc r5, r5 + cmp r5, #0 +.if HAVE_ARMV7 + movtgt lr, #((1<<31)|(1<<16))>>16 +.else + movgt lr, #(1<<31) + orrgt lr, #(1<<16) +.endif + mvngt r1, #1<<31 @ maxint + cmn r5, #1 + movmi r1, #1<<31 @ minint + orrmi lr, r6 + str r1, [r0, #4*24] + str lr, [r0, #4*(32+31)] @ gteFLAG + + pop {r4-r6,pc} + .size gteNCLIP_arm, .-gteNCLIP_arm + + +@ vim:filetype=armasm + diff --git a/libpcsxcore/gte_neon.h b/libpcsxcore/gte_neon.h index 59455f54..7266496c 100644 --- a/libpcsxcore/gte_neon.h +++ b/libpcsxcore/gte_neon.h @@ -1,4 +1,3 @@ void gteRTPS_neon(void *cp2_regs, int opcode); void gteRTPT_neon(void *cp2_regs, int opcode); void gteMVMVA_neon(void *cp2_regs, int opcode); -void gteNCLIP_neon(void *cp2_regs, int opcode); diff --git a/libpcsxcore/gte_neon.s b/libpcsxcore/gte_neon.s index 0db89171..ab8c1b6e 100644 --- a/libpcsxcore/gte_neon.s +++ b/libpcsxcore/gte_neon.s @@ -20,6 +20,8 @@ scratch: .text .align 2 +@ XXX: gteMAC calc shouldn't be saturating, but it is here + @ approximate gteMAC|123 flags @ in: rr 123 as gteMAC|123 @ trash: nothing @@ -627,41 +629,4 @@ gteMVMVA_neon: -@ the name is misnormer, this doesn't use NEON but oh well.. -.global gteNCLIP_neon @ r0=CP2 (d,c), -gteNCLIP_neon: - push {r4-r6,lr} - - add r1, r0, #4*12 - ldmia r1, {r1-r3} - mov r4, r1, asr #16 - mov r5, r2, asr #16 - mov r6, r3, asr #16 - sub r12, r4, r5 @ 3: gteSY0 - gteSY1 - sub r5, r5, r6 @ 1: gteSY1 - gteSY2 - sxth r1, r1 - smull r1, r5, r1, r5 @ RdLo, RdHi - sub r6, r4 @ 2: gteSY2 - gteSY0 - sxth r2, r2 - smlal r1, r5, r2, r6 - mov lr, #0 @ gteFLAG - sxth r3, r3 - smlal r1, r5, r3, r12 - mov r6, #1<<31 - orr r6, #1<<15 - movs r2, r1, lsl #1 - adc r5, r5 - cmp r5, #0 - movtgt lr, #((1<<31)|(1<<16))>>16 - mvngt r1, #1<<31 @ maxint - cmn r5, #1 - movmi r1, #1<<31 @ minint - orrmi lr, r6 - str r1, [r0, #4*24] - str lr, [r0, #4*(32+31)] @ gteFLAG - - pop {r4-r6,pc} - .size gteNCLIP_neon, .-gteNCLIP_neon - - @ vim:filetype=armasm diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c index 8a068ea0..fbd4f964 100644 --- a/libpcsxcore/new_dynarec/emu_if.c +++ b/libpcsxcore/new_dynarec/emu_if.c @@ -14,7 +14,10 @@ #include "../cdrom.h" #include "../psxdma.h" #include "../mdec.h" +#include "../gte_arm.h" #include "../gte_neon.h" +#define FLAGLESS +#include "../gte.h" #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) @@ -140,6 +143,17 @@ void new_dyna_restore(void) void *gte_handlers[64]; +void *gte_handlers_nf[64] = { + NULL , gteRTPS_nf , NULL , NULL , NULL , NULL , gteNCLIP_nf, NULL , // 00 + NULL , NULL , NULL , NULL , gteOP_nf , NULL , NULL , NULL , // 08 + gteDPCS_nf, gteINTPL_nf, gteMVMVA_nf, gteNCDS_nf, gteCDP_nf, NULL , gteNCDT_nf , NULL , // 10 + NULL , NULL , NULL , gteNCCS_nf, gteCC_nf , NULL , gteNCS_nf , NULL , // 18 + gteNCT_nf , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20 + gteSQR_nf , gteDCPL_nf , gteDPCT_nf , NULL , NULL , gteAVSZ3_nf, gteAVSZ4_nf, NULL , // 28 + gteRTPT_nf, NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30 + NULL , NULL , NULL , NULL , NULL , gteGPF_nf , gteGPL_nf , gteNCCT_nf, // 38 +}; + /* from gte.txt.. not sure if this is any good. */ const char gte_cycletab[64] = { /* 1 2 3 4 5 6 7 8 9 a b c d e f */ @@ -161,12 +175,16 @@ static int ari64_init() for (i = 0; i < ARRAY_SIZE(gte_handlers); i++) if (psxCP2[i] != psxNULL) gte_handlers[i] = psxCP2[i]; -#ifndef DRC_DBG + +#if !defined(DRC_DBG) && !defined(PCNT) +#ifdef __arm__ + gte_handlers[0x06] = gteNCLIP_arm; +#endif #ifdef __ARM_NEON__ - gte_handlers[0x01] = gteRTPS_neon; - gte_handlers[0x30] = gteRTPT_neon; - gte_handlers[0x12] = gteMVMVA_neon; - gte_handlers[0x06] = gteNCLIP_neon; + // compiler's _nf version is still a lot slower then neon + gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon; + gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon; + gte_handlers[0x12] = gte_handlers_nf[0x12] = gteMVMVA_neon; #endif #endif psxH_ptr = psxH; diff --git a/libpcsxcore/new_dynarec/emu_if.h b/libpcsxcore/new_dynarec/emu_if.h index 8acd1f5a..e16cca54 100644 --- a/libpcsxcore/new_dynarec/emu_if.h +++ b/libpcsxcore/new_dynarec/emu_if.h @@ -30,6 +30,7 @@ extern int reg_cop0[]; /* COP2/GTE */ extern int reg_cop2d[], reg_cop2c[]; extern void *gte_handlers[64]; +extern void *gte_handlers_nf[64]; extern const char gte_cycletab[64]; /* dummy */ -- 2.39.5