flagless are unused but will be used later, at least I hope so.
libpcsxcore/psxcommon.o libpcsxcore/psxcounters.o libpcsxcore/psxdma.o libpcsxcore/psxhle.o \
libpcsxcore/psxhw.o libpcsxcore/psxinterpreter.o libpcsxcore/psxmem.o libpcsxcore/r3000a.o \
libpcsxcore/sio.o libpcsxcore/socket.o libpcsxcore/spu.o
-OBJS += libpcsxcore/gte.o libpcsxcore/gte_divider.o
+OBJS += libpcsxcore/gte.o libpcsxcore/gte_nf.o libpcsxcore/gte_divider.o
+ifeq "$(ARCH)" "arm"
+OBJS += libpcsxcore/gte_arm.o
+endif
ifeq "$(HAVE_NEON)" "1"
OBJS += libpcsxcore/gte_neon.o
endif
# dynarec
ifndef NO_NEW_DRC
-libpcsxcore/new_dynarec/linkage_arm.o: ASFLAGS += --defsym HAVE_ARMV7=$(HAVE_ARMV7)
OBJS += libpcsxcore/new_dynarec/new_dynarec.o libpcsxcore/new_dynarec/linkage_arm.o
OBJS += libpcsxcore/new_dynarec/pcsxmem.o
endif
frontend/%.o: CFLAGS += -DHAVE_TSLIB
OBJS += frontend/pl_gun_ts.o
endif
+%.o: ASFLAGS += --defsym HAVE_ARMV7=$(HAVE_ARMV7)
frontend/%.o: CFLAGS += -DIN_EVDEV
frontend/menu.o: frontend/revision.h
+libpcsxcore/gte_nf.o: libpcsxcore/gte.c
+ $(CC) -c -o $@ $^ $(CFLAGS) -DFLAGLESS
+
frontend/revision.h: FORCE
@(git describe || echo) | sed -e 's/.*/#define REV "\0"/' > $@_
@diff -q $@_ $@ > /dev/null 2>&1 || cp $@_ $@
#define gteop (psxRegs.code & 0x1ffffff)
+#ifndef FLAGLESS
+
static inline s64 BOUNDS(s64 n_value, s64 n_max, int n_maxflag, s64 n_min, int n_minflag) {
if (n_value > n_max) {
gteFLAG |= n_maxflag;
return ret;
}
+static inline u32 limE(u32 result) {
+ if (result > 0x1ffff) {
+ gteFLAG |= (1 << 31) | (1 << 17);
+ return 0x1ffff;
+ }
+ return result;
+}
+
+#else
+
+#define BOUNDS(a, ...) (a)
+
+static inline s32 LIM(s32 value, s32 max, s32 min, u32 flag_unused) {
+ s32 ret = value;
+ if (value > max)
+ ret = max;
+ else if (value < min)
+ ret = min;
+ return ret;
+}
+
+#define limE(a) ((a) & 0x1ffff)
+
+#endif
+
#define A1(a) BOUNDS((a), 0x7fffffff, (1 << 30), -(s64)0x80000000, (1 << 31) | (1 << 27))
#define A2(a) BOUNDS((a), 0x7fffffff, (1 << 29), -(s64)0x80000000, (1 << 31) | (1 << 26))
#define A3(a) BOUNDS((a), 0x7fffffff, (1 << 28), -(s64)0x80000000, (1 << 31) | (1 << 25))
#define limC3(a) LIM((a), 0x00ff, 0x0000, (1 << 19))
#define limD(a) LIM((a), 0xffff, 0x0000, (1 << 31) | (1 << 18))
-static inline u32 limE(u32 result) {
- if (result > 0x1ffff) {
- gteFLAG |= (1 << 31) | (1 << 17);
- return 0x1ffff;
- }
- return result;
-}
-
#define F(a) BOUNDS((a), 0x7fffffff, (1 << 31) | (1 << 16), -(s64)0x80000000, (1 << 31) | (1 << 15))
#define limG1(a) LIM((a), 0x3ff, -0x400, (1 << 31) | (1 << 14))
#define limG2(a) LIM((a), 0x3ff, -0x400, (1 << 31) | (1 << 13))
#include "gte_divider.h"
+#ifndef FLAGLESS
+
static inline u32 MFC2(int reg) {
switch (reg) {
case 1:
psxMemWrite32(_oB_, MFC2(_Rt_));
}
+#endif // FLAGLESS
+
#if 0
#define DIVIDE DIVIDE_
static u32 DIVIDE_(s16 n, u16 d) {
* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. *
***************************************************************************/
+#ifdef FLAGLESS
+
+#define gteRTPS gteRTPS_nf
+#define gteOP gteOP_nf
+#define gteNCLIP gteNCLIP_nf
+#define gteDPCS gteDPCS_nf
+#define gteINTPL gteINTPL_nf
+#define gteMVMVA gteMVMVA_nf
+#define gteNCDS gteNCDS_nf
+#define gteNCDT gteNCDT_nf
+#define gteCDP gteCDP_nf
+#define gteNCCS gteNCCS_nf
+#define gteCC gteCC_nf
+#define gteNCS gteNCS_nf
+#define gteNCT gteNCT_nf
+#define gteSQR gteSQR_nf
+#define gteDCPL gteDCPL_nf
+#define gteDPCT gteDPCT_nf
+#define gteAVSZ3 gteAVSZ3_nf
+#define gteAVSZ4 gteAVSZ4_nf
+#define gteRTPT gteRTPT_nf
+#define gteGPF gteGPF_nf
+#define gteGPL gteGPL_nf
+#define gteNCCT gteNCCT_nf
+
+#endif
+
#ifndef __GTE_H__
#define __GTE_H__
--- /dev/null
+void gteNCLIP_arm(void *cp2_regs, int opcode);
--- /dev/null
+/*
+ * (C) GraÅžvydas "notaz" Ignotas, 2011
+ *
+ * This work is licensed under the terms of any of these licenses
+ * (at your option):
+ * - GNU GPL, version 2 or later.
+ * - GNU LGPL, version 2.1 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+/* .equiv HAVE_ARMV7, 1 */
+
+.text
+.align 2
+
+.macro sgnxt16 reg
+.if HAVE_ARMV7
+ sxth \reg, \reg
+.else
+ lsl \reg, \reg, #16
+ asr \reg, \reg, #16
+.endif
+.endm
+
+
+.global gteNCLIP_arm @ r0=CP2 (d,c),
+gteNCLIP_arm:
+ push {r4-r6,lr}
+
+ add r1, r0, #4*12
+ ldmia r1, {r1-r3}
+ mov r4, r1, asr #16
+ mov r5, r2, asr #16
+ mov r6, r3, asr #16
+ sub r12, r4, r5 @ 3: gteSY0 - gteSY1
+ sub r5, r5, r6 @ 1: gteSY1 - gteSY2
+ sgnxt16 r1
+ smull r1, r5, r1, r5 @ RdLo, RdHi
+ sub r6, r4 @ 2: gteSY2 - gteSY0
+ sgnxt16 r2
+ smlal r1, r5, r2, r6
+ mov lr, #0 @ gteFLAG
+ sgnxt16 r3
+ smlal r1, r5, r3, r12
+ mov r6, #1<<31
+ orr r6, #1<<15
+ movs r2, r1, lsl #1
+ adc r5, r5
+ cmp r5, #0
+.if HAVE_ARMV7
+ movtgt lr, #((1<<31)|(1<<16))>>16
+.else
+ movgt lr, #(1<<31)
+ orrgt lr, #(1<<16)
+.endif
+ mvngt r1, #1<<31 @ maxint
+ cmn r5, #1
+ movmi r1, #1<<31 @ minint
+ orrmi lr, r6
+ str r1, [r0, #4*24]
+ str lr, [r0, #4*(32+31)] @ gteFLAG
+
+ pop {r4-r6,pc}
+ .size gteNCLIP_arm, .-gteNCLIP_arm
+
+
+@ vim:filetype=armasm
+
void gteRTPS_neon(void *cp2_regs, int opcode);
void gteRTPT_neon(void *cp2_regs, int opcode);
void gteMVMVA_neon(void *cp2_regs, int opcode);
-void gteNCLIP_neon(void *cp2_regs, int opcode);
.text
.align 2
+@ XXX: gteMAC calc shouldn't be saturating, but it is here
+
@ approximate gteMAC|123 flags
@ in: rr 123 as gteMAC|123
@ trash: nothing
-@ the name is misnormer, this doesn't use NEON but oh well..
-.global gteNCLIP_neon @ r0=CP2 (d,c),
-gteNCLIP_neon:
- push {r4-r6,lr}
-
- add r1, r0, #4*12
- ldmia r1, {r1-r3}
- mov r4, r1, asr #16
- mov r5, r2, asr #16
- mov r6, r3, asr #16
- sub r12, r4, r5 @ 3: gteSY0 - gteSY1
- sub r5, r5, r6 @ 1: gteSY1 - gteSY2
- sxth r1, r1
- smull r1, r5, r1, r5 @ RdLo, RdHi
- sub r6, r4 @ 2: gteSY2 - gteSY0
- sxth r2, r2
- smlal r1, r5, r2, r6
- mov lr, #0 @ gteFLAG
- sxth r3, r3
- smlal r1, r5, r3, r12
- mov r6, #1<<31
- orr r6, #1<<15
- movs r2, r1, lsl #1
- adc r5, r5
- cmp r5, #0
- movtgt lr, #((1<<31)|(1<<16))>>16
- mvngt r1, #1<<31 @ maxint
- cmn r5, #1
- movmi r1, #1<<31 @ minint
- orrmi lr, r6
- str r1, [r0, #4*24]
- str lr, [r0, #4*(32+31)] @ gteFLAG
-
- pop {r4-r6,pc}
- .size gteNCLIP_neon, .-gteNCLIP_neon
-
-
@ vim:filetype=armasm
#include "../cdrom.h"
#include "../psxdma.h"
#include "../mdec.h"
+#include "../gte_arm.h"
#include "../gte_neon.h"
+#define FLAGLESS
+#include "../gte.h"
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
void *gte_handlers[64];
+void *gte_handlers_nf[64] = {
+ NULL , gteRTPS_nf , NULL , NULL , NULL , NULL , gteNCLIP_nf, NULL , // 00
+ NULL , NULL , NULL , NULL , gteOP_nf , NULL , NULL , NULL , // 08
+ gteDPCS_nf, gteINTPL_nf, gteMVMVA_nf, gteNCDS_nf, gteCDP_nf, NULL , gteNCDT_nf , NULL , // 10
+ NULL , NULL , NULL , gteNCCS_nf, gteCC_nf , NULL , gteNCS_nf , NULL , // 18
+ gteNCT_nf , NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 20
+ gteSQR_nf , gteDCPL_nf , gteDPCT_nf , NULL , NULL , gteAVSZ3_nf, gteAVSZ4_nf, NULL , // 28
+ gteRTPT_nf, NULL , NULL , NULL , NULL , NULL , NULL , NULL , // 30
+ NULL , NULL , NULL , NULL , NULL , gteGPF_nf , gteGPL_nf , gteNCCT_nf, // 38
+};
+
/* from gte.txt.. not sure if this is any good. */
const char gte_cycletab[64] = {
/* 1 2 3 4 5 6 7 8 9 a b c d e f */
for (i = 0; i < ARRAY_SIZE(gte_handlers); i++)
if (psxCP2[i] != psxNULL)
gte_handlers[i] = psxCP2[i];
-#ifndef DRC_DBG
+
+#if !defined(DRC_DBG) && !defined(PCNT)
+#ifdef __arm__
+ gte_handlers[0x06] = gteNCLIP_arm;
+#endif
#ifdef __ARM_NEON__
- gte_handlers[0x01] = gteRTPS_neon;
- gte_handlers[0x30] = gteRTPT_neon;
- gte_handlers[0x12] = gteMVMVA_neon;
- gte_handlers[0x06] = gteNCLIP_neon;
+ // compiler's _nf version is still a lot slower then neon
+ gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon;
+ gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon;
+ gte_handlers[0x12] = gte_handlers_nf[0x12] = gteMVMVA_neon;
#endif
#endif
psxH_ptr = psxH;
/* COP2/GTE */
extern int reg_cop2d[], reg_cop2c[];
extern void *gte_handlers[64];
+extern void *gte_handlers_nf[64];
extern const char gte_cycletab[64];
/* dummy */