attributes:
label: PCSX-ReARMed Version
description: Version number of the emulator as shown in the menus and printed in logs.
- placeholder: r24l 3a52f747
+ placeholder: r25 ef0559d4
validations:
required: true
- type: input
--- /dev/null
+name: CI (libretro)
+on: [push, pull_request]
+jobs:
+ build-libretro:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ submodules: true
+ - name: make
+ run: make -j$(getconf _NPROCESSORS_ONLN) -f Makefile.libretro
+
+ build-libretro-win32:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ submodules: true
+ - name: Install dependencies
+ run: |
+ sudo apt-get update -qq
+ sudo apt-get install -y gcc-mingw-w64
+ - name: make
+ run: make -j$(getconf _NPROCESSORS_ONLN) -f Makefile.libretro platform=win32 CC=x86_64-w64-mingw32-gcc
+
+ build-libretro-android:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ submodules: true
+ - name: ndk-build
+ run: ${ANDROID_NDK_HOME}/ndk-build -j$(getconf _NPROCESSORS_ONLN) --no-print-directory -C jni/
name: CI (Linux arm64)
on: [push, pull_request]
jobs:
- build-linux:
- runs-on: ubuntu-latest
+ build-linux-arm64:
+ runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v2
with:
- name: configure
run: DUMP_CONFIG_LOG=1 CROSS_COMPILE=aarch64-linux-gnu- SDL_CONFIG=usr/bin/sdl-config PATH=$PATH:usr/bin CFLAGS='-Iusr/include/ -Iusr/include/SDL' LDFLAGS='-Lusr/lib/aarch64-linux-gnu/ -Llib/aarch64-linux-gnu/ -Wl,-rpath-link=lib/aarch64-linux-gnu/,-rpath-link=usr/lib/aarch64-linux-gnu/,-rpath-link=usr/lib/aarch64-linux-gnu/pulseaudio/' ./configure
- name: make
- run: make
+ run: make -j$(getconf _NPROCESSORS_ONLN)
name: CI (Linux armhf)
on: [push, pull_request]
jobs:
- build-linux:
- runs-on: ubuntu-latest
+ build-linux-armhf:
+ runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v2
with:
- name: configure
run: DUMP_CONFIG_LOG=1 CROSS_COMPILE=arm-linux-gnueabihf- SDL_CONFIG=usr/bin/sdl-config PATH=$PATH:usr/bin CFLAGS='-Iusr/include/ -Iusr/include/SDL' LDFLAGS='-Lusr/lib/arm-linux-gnueabihf/ -Llib/arm-linux-gnueabihf/ -Wl,-rpath-link=lib/arm-linux-gnueabihf/,-rpath-link=usr/lib/arm-linux-gnueabihf/,-rpath-link=usr/lib/arm-linux-gnueabihf/pulseaudio/' ./configure
- name: make
- run: make
+ run: make -j$(getconf _NPROCESSORS_ONLN)
- name: configure
run: DUMP_CONFIG_LOG=1 ./configure
- name: make
- run: make
+ run: make -j$(getconf _NPROCESSORS_ONLN)
config.mak
config.log
cheatpops.db
-frontend/revision.h
-tools
+/include/revision.h
+/tools
obj/
.vscode/
.vscode/ipch/*
[submodule "libchdr"]
path = deps/libchdr
url = https://github.com/rtissera/libchdr.git
+[submodule "deps/lightrec"]
+ path = deps/lightrec
+ url = https://github.com/pcercuei/lightrec.git
+[submodule "deps/lightning"]
+ path = deps/lightning
+ url = https://github.com/notaz/gnu_lightning.git
+[submodule "deps/libretro-common"]
+ path = deps/libretro-common
+ url = https://github.com/libretro/libretro-common.git
+[submodule "deps/mman"]
+ path = deps/mman
+ url = https://github.com/witwall/mman-win32
# default stuff goes here, so that config can override
TARGET ?= pcsx
-CFLAGS += -Wall -ggdb -Iinclude -ffast-math
-ifndef DEBUG
-CFLAGS += -O2 -DNDEBUG
+CFLAGS += -Wall -Iinclude -ffast-math
+
+DEBUG ?= 0
+DEBUG_SYMS ?= 0
+ASSERTS ?= 0
+HAVE_CHD ?= 1
+ifneq ($(DEBUG)$(DEBUG_SYMS), 00)
+CFLAGS += -ggdb
+endif
+ifneq ($(DEBUG), 1)
+CFLAGS += -O3
+ifneq ($(ASSERTS), 1)
+CFLAGS += -DNDEBUG
+endif
endif
ifeq ($(DEBUG_ASAN), 1)
CFLAGS += -fsanitize=address
+LDFLAGS += -fsanitize=address
+#LDFLAGS += -static-libasan
+endif
+ifeq ($(DEBUG_UBSAN), 1)
+CFLAGS += -fsanitize=undefined -fno-sanitize=shift-base
+LDFLAGS += -fsanitize=undefined
endif
-CFLAGS += -DP_HAVE_MMAP=$(if $(NO_MMAP),0,1) \
- -DP_HAVE_PTHREAD=$(if $(NO_PTHREAD),0,1) \
- -DP_HAVE_POSIX_MEMALIGN=$(if $(NO_POSIX_MEMALIGN),0,1) \
- -DDISABLE_MEM_LUTS=0
-CXXFLAGS += $(CFLAGS)
#DRC_DBG = 1
#PCNT = 1
+# Suppress minor warnings for dependencies
+deps/%: CFLAGS += -Wno-unused -Wno-unused-function
+
all: config.mak target_ plugins_
ifndef NO_CONFIG_MAK
CC_LINK ?= $(CC)
CC_AS ?= $(CC)
LDFLAGS += $(MAIN_LDFLAGS)
-ifeq ($(DEBUG_ASAN), 1)
-LDFLAGS += -static-libasan
-endif
-EXTRA_LDFLAGS ?= -Wl,-Map=$@.map
+#EXTRA_LDFLAGS ?= -Wl,-Map=$@.map # not on some linkers
LDLIBS += $(MAIN_LDLIBS)
ifdef PCNT
CFLAGS += -DPCNT
endif
+ifneq ($(NO_FSECTIONS), 1)
+CFLAGS += -ffunction-sections -fdata-sections
+ifeq ($(GNU_LINKER),1)
+FSECTIONS_LDFLAGS ?= -Wl,--gc-sections
+LDFLAGS += $(FSECTIONS_LDFLAGS)
+endif
+endif # NO_FSECTIONS
+CFLAGS += -DP_HAVE_MMAP=$(if $(NO_MMAP),0,1) \
+ -DP_HAVE_PTHREAD=$(if $(NO_PTHREAD),0,1) \
+ -DP_HAVE_POSIX_MEMALIGN=$(if $(NO_POSIX_MEMALIGN),0,1) \
+ -DDISABLE_MEM_LUTS=0
+CXXFLAGS += $(CFLAGS)
+
# core
-OBJS += libpcsxcore/cdriso.o libpcsxcore/cdrom.o libpcsxcore/cheat.o libpcsxcore/database.o \
+OBJS += libpcsxcore/cdriso.o libpcsxcore/cdrom.o libpcsxcore/cdrom-async.o \
+ libpcsxcore/cheat.o libpcsxcore/database.o \
libpcsxcore/decode_xa.o libpcsxcore/mdec.o \
libpcsxcore/misc.o libpcsxcore/plugins.o libpcsxcore/ppf.o libpcsxcore/psxbios.o \
libpcsxcore/psxcommon.o libpcsxcore/psxcounters.o libpcsxcore/psxdma.o \
libpcsxcore/sio.o libpcsxcore/spu.o libpcsxcore/gpu.o
OBJS += libpcsxcore/gte.o libpcsxcore/gte_nf.o libpcsxcore/gte_divider.o
#OBJS += libpcsxcore/debug.o libpcsxcore/socket.o libpcsxcore/disr3000a.o
+
+ifeq ($(WANT_ZLIB),1)
+ZLIB_DIR = deps/libchdr/deps/zlib-1.3.1
+CFLAGS += -I$(ZLIB_DIR)
+OBJS += $(ZLIB_DIR)/adler32.o \
+ $(ZLIB_DIR)/compress.o \
+ $(ZLIB_DIR)/crc32.o \
+ $(ZLIB_DIR)/deflate.o \
+ $(ZLIB_DIR)/gzclose.o \
+ $(ZLIB_DIR)/gzlib.o \
+ $(ZLIB_DIR)/gzread.o \
+ $(ZLIB_DIR)/gzwrite.o \
+ $(ZLIB_DIR)/infback.o \
+ $(ZLIB_DIR)/inffast.o \
+ $(ZLIB_DIR)/inflate.o \
+ $(ZLIB_DIR)/inftrees.o \
+ $(ZLIB_DIR)/trees.o \
+ $(ZLIB_DIR)/uncompr.o \
+ $(ZLIB_DIR)/zutil.o
+$(ZLIB_DIR)/%.o: CFLAGS += -DHAVE_UNISTD_H
+endif
+
ifeq "$(ARCH)" "arm"
OBJS += libpcsxcore/gte_arm.o
endif
endif
libpcsxcore/psxbios.o: CFLAGS += -Wno-nonnull
+ifeq ($(MMAP_WIN32),1)
+CFLAGS += -Iinclude/mman -Ideps/mman
+OBJS += deps/mman/mman.o
+endif
+ifeq "$(USE_ASYNC_CDROM)" "1"
+libpcsxcore/cdrom-async.o: CFLAGS += -DUSE_ASYNC_CDROM
+frontend/libretro.o: CFLAGS += -DUSE_ASYNC_CDROM
+frontend/menu.o: CFLAGS += -DUSE_ASYNC_CDROM
+USE_RTHREADS := 1
+endif
+
# dynarec
-ifeq "$(USE_DYNAREC)" "1"
+ifeq "$(DYNAREC)" "lightrec"
+CFLAGS += -Ideps/lightning/include -Ideps/lightrec -Iinclude/lightning -Iinclude/lightrec \
+ -DLIGHTREC -DLIGHTREC_STATIC
+ifeq ($(LIGHTREC_DEBUG),1)
+deps/lightrec/%.o: CFLAGS += -DLOG_LEVEL=DEBUG_L
+libpcsxcore/lightrec/plugin.o: CFLAGS += -DLIGHTREC_DEBUG=1
+frontend/main.o: CFLAGS += -DLIGHTREC_DEBUG=1
+deps/lightning/%.o: CFLAGS += -DDISASSEMBLER=1 -DBINUTILS_2_38=1 -DBINUTILS_2_29=1 \
+ -DHAVE_DISASSEMBLE_INIT_FOR_TARGET=1 -DPACKAGE_VERSION=1
+LDFLAGS += -lopcodes -lbfd
+endif
+LIGHTREC_CUSTOM_MAP ?= 0
+LIGHTREC_CUSTOM_MAP_OBJ ?= libpcsxcore/lightrec/mem.o
+LIGHTREC_THREADED_COMPILER ?= 0
+LIGHTREC_CODE_INV ?= 0
+CFLAGS += -DLIGHTREC_CUSTOM_MAP=$(LIGHTREC_CUSTOM_MAP) \
+ -DLIGHTREC_CODE_INV=$(LIGHTREC_CODE_INV) \
+ -DLIGHTREC_ENABLE_THREADED_COMPILER=$(LIGHTREC_THREADED_COMPILER) \
+ -DLIGHTREC_ENABLE_DISASSEMBLER=$(or $(LIGHTREC_DEBUG),0) \
+ -DLIGHTREC_NO_DEBUG=$(if $(LIGHTREC_DEBUG),0,1)
+ifeq ($(LIGHTREC_CUSTOM_MAP),1)
+LDLIBS += -lrt
+OBJS += $(LIGHTREC_CUSTOM_MAP_OBJ)
+endif
+ifeq ($(NEED_SYSCONF),1)
+OBJS += libpcsxcore/lightrec/sysconf.o
+endif
+ifeq ($(LIGHTREC_THREADED_COMPILER),1)
+OBJS += deps/lightrec/recompiler.o \
+ deps/lightrec/reaper.o
+endif
+OBJS += deps/lightrec/tlsf/tlsf.o
+OBJS += libpcsxcore/lightrec/plugin.o
+OBJS += deps/lightning/lib/jit_disasm.o \
+ deps/lightning/lib/jit_memory.o \
+ deps/lightning/lib/jit_names.o \
+ deps/lightning/lib/jit_note.o \
+ deps/lightning/lib/jit_print.o \
+ deps/lightning/lib/jit_size.o \
+ deps/lightning/lib/lightning.o \
+ deps/lightrec/blockcache.o \
+ deps/lightrec/constprop.o \
+ deps/lightrec/disassembler.o \
+ deps/lightrec/emitter.o \
+ deps/lightrec/interpreter.o \
+ deps/lightrec/lightrec.o \
+ deps/lightrec/memmanager.o \
+ deps/lightrec/optimizer.o \
+ deps/lightrec/regcache.o
+deps/lightning/%.o: CFLAGS += -DHAVE_MMAP=P_HAVE_MMAP
+deps/lightning/%: CFLAGS += -Wno-uninitialized
+deps/lightrec/%: CFLAGS += -Wno-uninitialized
+libpcsxcore/lightrec/mem.o: CFLAGS += -D_GNU_SOURCE
+ifeq ($(MMAP_WIN32),1)
+deps/lightning/lib/lightning.o: CFLAGS += -Dmprotect=_mprotect # deps/mman
+deps/lightning/lib/jit_print.o: CFLAGS += -w
+endif
+else ifeq "$(DYNAREC)" "ari64"
OBJS += libpcsxcore/new_dynarec/new_dynarec.o
OBJS += libpcsxcore/new_dynarec/pcsxmem.o
ifeq "$(ARCH)" "arm"
else
$(error no dynarec support for architecture $(ARCH))
endif
+ ifeq "$(NDRC_THREAD)" "1"
+ libpcsxcore/new_dynarec/new_dynarec.o: CFLAGS += -DNDRC_THREAD
+ libpcsxcore/new_dynarec/emu_if.o: CFLAGS += -DNDRC_THREAD
+ frontend/libretro.o: CFLAGS += -DNDRC_THREAD
+ USE_RTHREADS := 1
+ endif
else
CFLAGS += -DDRC_DISABLE
endif
plugins/dfsound/out.o: CFLAGS += -DHAVE_LIBRETRO
endif
+# supported gpu list in menu
+ifeq "$(HAVE_NEON_GPU)" "1"
+frontend/menu.o: CFLAGS += -DGPU_NEON
+endif
+ifeq "$(HAVE_GLES)" "1"
+frontend/menu.o: CFLAGS += -DHAVE_GLES
+endif
+
# builtin gpu
-OBJS += plugins/gpulib/gpu.o plugins/gpulib/vout_pl.o
+OBJS += plugins/gpulib/gpu.o plugins/gpulib/vout_pl.o plugins/gpulib/prim.o
ifeq "$(BUILTIN_GPU)" "neon"
+CFLAGS += -DGPU_NEON
OBJS += plugins/gpu_neon/psx_gpu_if.o
plugins/gpu_neon/psx_gpu_if.o: CFLAGS += -DNEON_BUILD -DTEXTURE_CACHE_4BPP -DTEXTURE_CACHE_8BPP
plugins/gpu_neon/psx_gpu_if.o: plugins/gpu_neon/psx_gpu/*.c
endif
endif
ifeq "$(BUILTIN_GPU)" "peops"
+CFLAGS += -DGPU_PEOPS
# note: code is not safe for strict-aliasing? (Castlevania problems)
plugins/dfxvideo/gpulib_if.o: CFLAGS += -fno-strict-aliasing
plugins/dfxvideo/gpulib_if.o: plugins/dfxvideo/prim.c plugins/dfxvideo/soft.c
+frontend/menu.o frontend/plugin_lib.o: CFLAGS += -DBUILTIN_GPU_PEOPS
OBJS += plugins/dfxvideo/gpulib_if.o
+ifeq "$(THREAD_RENDERING)" "1"
+CFLAGS += -DTHREAD_RENDERING
+OBJS += plugins/gpulib/gpulib_thread_if.o
endif
-ifeq "$(BUILTIN_GPU)" "unai_old"
-OBJS += plugins/gpu_unai_old/gpulib_if.o
-ifeq "$(ARCH)" "arm"
-OBJS += plugins/gpu_unai_old/gpu_arm.o
-endif
-plugins/gpu_unai_old/gpulib_if.o: CFLAGS += -DREARMED -O3
-CC_LINK = $(CXX)
endif
ifeq "$(BUILTIN_GPU)" "unai"
+CFLAGS += -DGPU_UNAI
+CFLAGS += -DUSE_GPULIB=1
OBJS += plugins/gpu_unai/gpulib_if.o
ifeq "$(ARCH)" "arm"
OBJS += plugins/gpu_unai/gpu_arm.o
endif
-plugins/gpu_unai/gpulib_if.o: CFLAGS += -DREARMED -DUSE_GPULIB=1 -O3
+ifeq "$(THREAD_RENDERING)" "1"
+CFLAGS += -DTHREAD_RENDERING
+OBJS += plugins/gpulib/gpulib_thread_if.o
+endif
+ifneq "$(GPU_UNAI_NO_OLD)" "1"
+OBJS += plugins/gpu_unai/old/if.o
+else
+CFLAGS += -DGPU_UNAI_NO_OLD
+endif
+plugins/gpu_unai/gpulib_if.o: plugins/gpu_unai/*.h
+plugins/gpu_unai/gpulib_if.o: CFLAGS += -DREARMED -DUSE_GPULIB=1
+frontend/menu.o frontend/plugin_lib.o: CFLAGS += -DBUILTIN_GPU_UNAI
+ifneq ($(DEBUG), 1)
+plugins/gpu_unai/gpulib_if.o \
+plugins/gpu_unai/old/if.o: CFLAGS += -O3
+endif
CC_LINK = $(CXX)
endif
-# cdrcimg
-OBJS += plugins/cdrcimg/cdrcimg.o
-
# libchdr
-#ifeq "$(HAVE_CHD)" "1"
+ifeq "$(HAVE_CHD)" "1"
LCHDR = deps/libchdr
-LCHDR_LZMA = $(LCHDR)/deps/lzma-22.01
-LCHDR_ZSTD = $(LCHDR)/deps/zstd-1.5.5/lib
+LCHDR_LZMA = $(LCHDR)/deps/lzma-24.05
+LCHDR_ZSTD = $(LCHDR)/deps/zstd-1.5.6/lib
OBJS += $(LCHDR)/src/libchdr_bitstream.o
OBJS += $(LCHDR)/src/libchdr_cdrom.o
OBJS += $(LCHDR)/src/libchdr_chd.o
OBJS += $(LCHDR)/src/libchdr_flac.o
OBJS += $(LCHDR)/src/libchdr_huffman.o
-$(LCHDR)/src/%.o: CFLAGS += -Wno-unused -std=gnu11
+$(LCHDR)/src/%.o: CFLAGS += -Wno-unused -Wno-maybe-uninitialized -std=gnu11
OBJS += $(LCHDR_LZMA)/src/Alloc.o
-OBJS += $(LCHDR_LZMA)/src/Bra86.o
-OBJS += $(LCHDR_LZMA)/src/BraIA64.o
OBJS += $(LCHDR_LZMA)/src/CpuArch.o
OBJS += $(LCHDR_LZMA)/src/Delta.o
OBJS += $(LCHDR_LZMA)/src/LzFind.o
-OBJS += $(LCHDR_LZMA)/src/Lzma86Dec.o
OBJS += $(LCHDR_LZMA)/src/LzmaDec.o
OBJS += $(LCHDR_LZMA)/src/LzmaEnc.o
OBJS += $(LCHDR_LZMA)/src/Sort.o
-$(LCHDR_LZMA)/src/%.o: CFLAGS += -Wno-unused -D_7ZIP_ST -I$(LCHDR_LZMA)/include
+$(LCHDR_LZMA)/src/%.o: CFLAGS += -Wno-unused -DZ7_ST -I$(LCHDR_LZMA)/include
$(LCHDR)/src/%.o: CFLAGS += -I$(LCHDR_LZMA)/include
-OBJS += $(LCHDR_ZSTD)/common/debug.o
OBJS += $(LCHDR_ZSTD)/common/entropy_common.o
OBJS += $(LCHDR_ZSTD)/common/error_private.o
OBJS += $(LCHDR_ZSTD)/common/fse_decompress.o
-OBJS += $(LCHDR_ZSTD)/common/pool.o
-OBJS += $(LCHDR_ZSTD)/common/threading.o
OBJS += $(LCHDR_ZSTD)/common/xxhash.o
OBJS += $(LCHDR_ZSTD)/common/zstd_common.o
OBJS += $(LCHDR_ZSTD)/decompress/huf_decompress.o
+OBJS += $(LCHDR_ZSTD)/decompress/huf_decompress_amd64.o
OBJS += $(LCHDR_ZSTD)/decompress/zstd_ddict.o
OBJS += $(LCHDR_ZSTD)/decompress/zstd_decompress_block.o
OBJS += $(LCHDR_ZSTD)/decompress/zstd_decompress.o
$(LCHDR_ZSTD)/common/%.o \
-$(LCHDR_ZSTD)/decompress/%.o: CFLAGS += -DZSTD_DISABLE_ASM -I$(LCHDR_ZSTD)
+$(LCHDR_ZSTD)/decompress/%.o: CFLAGS += -I$(LCHDR_ZSTD)
$(LCHDR)/src/%.o: CFLAGS += -I$(LCHDR_ZSTD)
libpcsxcore/cdriso.o: CFLAGS += -Wno-unused-function
CFLAGS += -DHAVE_CHD -I$(LCHDR)/include
-#endif
+endif
# frontend/gui
OBJS += frontend/cspace.o
ifeq "$(PLATFORM)" "generic"
OBJS += frontend/libpicofe/in_sdl.o
-OBJS += frontend/libpicofe/plat_sdl.o
+#OBJS += frontend/libpicofe/plat_sdl.o
OBJS += frontend/libpicofe/plat_dummy.o
-OBJS += frontend/libpicofe/linux/in_evdev.o
OBJS += frontend/plat_sdl.o
+frontend/plat_sdl.o frontend/libpicofe/plat_sdl.o: CFLAGS += -DSDL_OVERLAY_2X
+frontend/menu.o: CFLAGS += -DSDL_OVERLAY_2X -DMENU_SHOW_VARSCALER=1
+ifeq "$(HAVE_EVDEV)" "1"
+OBJS += frontend/libpicofe/linux/in_evdev.o
+endif
ifeq "$(HAVE_GLES)" "1"
OBJS += frontend/libpicofe/gl.o frontend/libpicofe/gl_platform.o
-LDLIBS += $(LDLIBS_GLES)
+OBJS += frontend/libpicofe/gl_loader.o
+#LDLIBS += $(LDLIBS_GLES) # loaded dynamically now
frontend/libpicofe/plat_sdl.o: CFLAGS += -DHAVE_GLES $(CFLAGS_GLES)
frontend/libpicofe/gl_platform.o: CFLAGS += -DHAVE_GLES $(CFLAGS_GLES)
frontend/libpicofe/gl.o: CFLAGS += -DHAVE_GLES $(CFLAGS_GLES)
OBJS += frontend/libpicofe/linux/in_evdev.o
OBJS += frontend/plat_pandora.o frontend/plat_omap.o
frontend/main.o frontend/menu.o: CFLAGS += -include frontend/pandora/ui_feat.h
+frontend/main.o frontend/plugin_lib.o: CFLAGS += -DPANDORA
frontend/libpicofe/linux/plat.o: CFLAGS += -DPANDORA
USE_PLUGIN_LIB = 1
USE_FRONTEND = 1
-CFLAGS += -gdwarf-3 -ffunction-sections -fdata-sections
-LDFLAGS += -Wl,--gc-sections
+CFLAGS += -gdwarf-3
endif
ifeq "$(PLATFORM)" "caanoo"
OBJS += frontend/libpicofe/gp2x/in_gp2x.o frontend/warm/warm.o
OBJS += frontend/libretro-cdrom.o
OBJS += deps/libretro-common/lists/string_list.o
OBJS += deps/libretro-common/memmap/memalign.o
-OBJS += deps/libretro-common/rthreads/rthreads.o
OBJS += deps/libretro-common/vfs/vfs_implementation_cdrom.o
CFLAGS += -DHAVE_CDROM
endif
CFLAGS += -DUSE_LIBRETRO_VFS
endif
OBJS += frontend/libretro.o
-CFLAGS += -DFRONTEND_SUPPORTS_RGB565
+CFLAGS += -DHAVE_LIBRETRO
+INC_LIBRETRO_COMMON := 1
-ifeq ($(MMAP_WIN32),1)
-OBJS += libpcsxcore/memmap_win32.o
+endif # $(PLATFORM) == "libretro"
+
+ifeq "$(USE_RTHREADS)" "1"
+OBJS += frontend/libretro-rthreads.o
+OBJS += deps/libretro-common/features/features_cpu.o
+frontend/main.o: CFLAGS += -DHAVE_RTHREADS
+INC_LIBRETRO_COMMON := 1
endif
+ifeq "$(INC_LIBRETRO_COMMON)" "1"
+CFLAGS += -Ideps/libretro-common/include
endif
ifeq "$(USE_PLUGIN_LIB)" "1"
ifeq "$(USE_FRONTEND)" "1"
OBJS += frontend/menu.o
OBJS += frontend/libpicofe/input.o
+frontend/libpicofe/input.o: CFLAGS += -Wno-array-bounds
frontend/menu.o: frontend/libpicofe/menu.c
ifeq "$(HAVE_TSLIB)" "1"
frontend/%.o: CFLAGS += -DHAVE_TSLIB
# misc
OBJS += frontend/main.o frontend/plugin.o
-frontend/main.o: CFLAGS += -DBUILTIN_GPU=$(BUILTIN_GPU)
+frontend/main.o libpcsxcore/misc.o: CFLAGS += -DBUILTIN_GPU=$(BUILTIN_GPU)
+
+frontend/menu.o frontend/main.o: include/revision.h
+frontend/plat_sdl.o frontend/libretro.o: include/revision.h
+libpcsxcore/misc.o: include/revision.h
-frontend/menu.o frontend/main.o: frontend/revision.h
-frontend/plat_sdl.o frontend/libretro.o: frontend/revision.h
+CFLAGS += $(CFLAGS_LAST)
frontend/libpicofe/%.c:
@echo "libpicofe module is missing, please run:"
libpcsxcore/gte_nf.o: libpcsxcore/gte.c
$(CC) -c -o $@ $^ $(CFLAGS) -DFLAGLESS
-frontend/revision.h: FORCE
+include/revision.h: FORCE
@(git describe --always || echo) | sed -e 's/.*/#define REV "\0"/' > $@_
@diff -q $@_ $@ > /dev/null 2>&1 || cp $@_ $@
@rm $@_
target_: $(TARGET)
$(TARGET): $(OBJS)
+ifeq ($(PARTIAL_LINKING), 1)
+ $(LD) -o $(basename $(TARGET))1.o -r --gc-sections $(addprefix -u ,$(shell cat frontend/libretro-extern)) $(addprefix -u ,$(EXTRA_EXTERN_SYMS)) $^
+ $(OBJCOPY) --keep-global-symbols=frontend/libretro-extern $(addprefix -G ,$(EXTRA_EXTERN_SYMS)) $(basename $(TARGET))1.o $(basename $(TARGET)).o
+ $(AR) rcs $@ $(basename $(TARGET)).o
+else ifeq ($(STATIC_LINKING), 1)
+ $(AR) rcs $@ $^
+else
$(CC_LINK) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LDLIBS) $(EXTRA_LDFLAGS)
+endif
clean: $(PLAT_CLEAN) clean_plugins
- $(RM) $(TARGET) $(OBJS) $(TARGET).map frontend/revision.h
+ $(RM) $(TARGET) *.o $(OBJS) $(TARGET).map include/revision.h
ifneq ($(PLUGINS),)
plugins_: $(PLUGINS)
-$(PLUGINS):
+plugins/gpulib/gpulib.$(ARCH).a:
+ $(MAKE) -C plugins/gpulib/
+
+$(PLUGINS): plugins/gpulib/gpulib.$(ARCH).a
$(MAKE) -C $(dir $@)
clean_plugins:
mkdir -p $(OUT)/plugins
mkdir -p $(OUT)/bios
cp -r $^ $(OUT)/
- mv $(OUT)/*.so* $(OUT)/plugins/
+ -mv $(OUT)/*.so* $(OUT)/plugins/
zip -9 -r $(OUT).zip $(OUT)
endif
cp -r $^ out/
sed -e 's/%PR%/$(VER)/g' out/pcsx.pxml.templ > out/pcsx.pxml
rm out/pcsx.pxml.templ
- mv out/*.so out/plugins/
+ -mv out/*.so out/plugins/
$(PND_MAKE) -p pcsx_rearmed_$(VER).pnd -d out -x out/pcsx.pxml -i frontend/pandora/pcsx.png -c
endif
rm -rf out
mkdir -p out/pcsx_rearmed/plugins
cp -r $^ out/pcsx_rearmed/
- mv out/pcsx_rearmed/*.so out/pcsx_rearmed/plugins/
+ -mv out/pcsx_rearmed/*.so out/pcsx_rearmed/plugins/
mv out/pcsx_rearmed/caanoo.gpe out/pcsx_rearmed/pcsx.gpe
mv out/pcsx_rearmed/pcsx_rearmed.ini out/
mkdir out/pcsx_rearmed/lib/
-$(error This file is unmaintained. Please use the libretro fork: https://github.com/libretro/pcsx_rearmed)
+# Makefile for PCSX ReARMed (libretro)
+
+DEBUG ?= 0
+WANT_ZLIB ?= 1
+HAVE_CHD ?= 1
+HAVE_PHYSICAL_CDROM ?= 1
+USE_ASYNC_CDROM ?= 1
+USE_LIBRETRO_VFS ?= 0
+NDRC_THREAD ?= 1
+GNU_LINKER ?= 1
+
+# Dynarec options: lightrec, ari64
+DYNAREC ?= lightrec
+
+ifeq ($(platform),)
+ platform = unix
+ ifeq ($(shell uname -a),)
+ platform = win
+ else ifneq ($(findstring MINGW,$(shell uname -a)),)
+ platform = win
+ else ifneq ($(findstring Darwin,$(shell uname -a)),)
+ platform = osx
+ else ifneq ($(findstring win,$(shell uname -a)),)
+ platform = win
+ endif
+endif
+
+CC ?= gcc
+CXX ?= g++
+AS ?= as
+LD ?= ld
+CFLAGS ?=
+
+# early compiler overrides
+ifeq ($(platform),ios-arm64)
+ CC = cc -arch arm64 -isysroot $(IOSSDK)
+ CXX = c++ -arch arm64 -isysroot $(IOSSDK)
+else ifneq (,$(findstring ios,$(platform)))
+ CC = cc -arch armv7 -isysroot $(IOSSDK)
+ CXX = c++ -arch armv7 -isysroot $(IOSSDK)
+else ifeq ($(platform), tvos-arm64)
+ ifeq ($(IOSSDK),)
+ IOSSDK := $(shell xcodebuild -version -sdk appletvos Path)
+ endif
+ CC = cc -arch arm64 -isysroot $(IOSSDK)
+ CXX = c++ -arch arm64 -isysroot $(IOSSDK)
+else ifeq ($(platform), osx)
+ ifeq ($(CROSS_COMPILE),1)
+ TARGET_RULE = -target $(LIBRETRO_APPLE_PLATFORM) -isysroot $(LIBRETRO_APPLE_ISYSROOT)
+ CFLAGS += $(TARGET_RULE)
+ CXXFLAGS += $(TARGET_RULE)
+ LDFLAGS += $(TARGET_RULE)
+ endif
+else ifeq ($(platform), psl1ght)
+ ifeq ($(strip $(PS3DEV)),)
+ $(error "PS3DEV env var is not set")
+ endif
+ CC = $(PS3DEV)/ppu/bin/ppu-gcc$(EXE_EXT)
+ AR = $(PS3DEV)/ppu/bin/ppu-ar$(EXE_EXT)
+else ifeq ($(platform), psp1)
+ CC = psp-gcc$(EXE_EXT)
+ AR = psp-ar$(EXE_EXT)
+ LD = psp-ld$(EXE_EXT)
+else ifeq ($(platform), vita)
+ CC = arm-vita-eabi-gcc$(EXE_EXT)
+ AR = arm-vita-eabi-ar$(EXE_EXT)
+ LD = arm-vita-eabi-ld$(EXE_EXT)
+ OBJCOPY = arm-vita-eabi-objcopy$(EXE_EXT)
+else ifeq ($(platform), ctr)
+ ifneq ($(strip $(DEVKITPRO)),)
+ DEVKITARM ?= $(DEVKITPRO)/devkitARM
+ CTRULIB ?= $(DEVKITPRO)/libctru
+ endif
+ ifeq ($(strip $(DEVKITARM)),)
+ $(error "DEVKITARM env var is not set")
+ endif
+ CC = $(DEVKITARM)/bin/arm-none-eabi-gcc$(EXE_EXT)
+ CXX = $(DEVKITARM)/bin/arm-none-eabi-g++$(EXE_EXT)
+ AR = $(DEVKITARM)/bin/arm-none-eabi-ar$(EXE_EXT)
+ LD = $(DEVKITARM)/bin/arm-none-eabi-ld$(EXE_EXT)
+ OBJCOPY = $(DEVKITARM)/bin/arm-none-eabi-objcopy$(EXE_EXT)
+else ifeq ($(platform), libnx)
+ export DEPSDIR := $(CURDIR)/
+ ifeq ($(strip $(DEVKITPRO)),)
+ $(error "DEVKITPRO env var is not set")
+ endif
+ include $(DEVKITPRO)/libnx/switch_rules
+ SHELL := PATH=$(PATH) $(SHELL)
+ LD = $(PREFIX)ld
+else ifeq ($(platform), xenon)
+ CC = xenon-gcc$(EXE_EXT)
+ AR = xenon-ar$(EXE_EXT)
+ LD = xenon-ld$(EXE_EXT)
+else ifneq (,$(filter $(platform),ngc wii wiiu))
+ ifeq ($(strip $(DEVKITPPC)),)
+ $(error "DEVKITPPC env var is not set")
+ endif
+ CC = $(DEVKITPPC)/bin/powerpc-eabi-gcc$(EXE_EXT)
+ CXX = $(DEVKITPPC)/bin/powerpc-eabi-g++$(EXE_EXT)
+ AR = $(DEVKITPPC)/bin/powerpc-eabi-ar$(EXE_EXT)
+ LD = $(DEVKITPPC)/bin/powerpc-eabi-ld$(EXE_EXT)
+else ifeq ($(platform), qnx)
+ CC = qcc -Vgcc_ntoarmv7le
+else ifeq ($(platform), miyoo)
+ CC = /opt/miyoo/usr/bin/arm-linux-gcc
+ CXX = /opt/miyoo/usr/bin/arm-linux-g++
+endif
+CC_AS ?= $(CC)
+
+# workaround wrong owner in libretro infra
+GIT_VERSION1 := $(shell test -d /builds/libretro/pcsx_rearmed && git rev-parse --short HEAD 2>&1)
+ifneq ($(findstring dubious ownership,$(GIT_VERSION1)),)
+DUMMY := $(shell git config --global --add safe.directory /builds/libretro/pcsx_rearmed)
+endif
+
+TARGET_NAME := pcsx_rearmed
+ARCH_DETECTED := $(shell $(CC) $(CFLAGS) -dumpmachine | awk -F- '{print $$1}')
+GIT_VERSION := " $(shell git rev-parse --short HEAD || echo unknown)"
+ifneq ($(GIT_VERSION)," unknown")
+ CFLAGS += -DGIT_VERSION=\"$(GIT_VERSION)\"
+endif
+ifneq ($(WANT_ZLIB),1)
+LIBZ := -lz
+endif
+LIBPTHREAD := -lpthread
+ifneq ($(findstring Haiku,$(shell uname -s)),)
+LIBDL := -lroot -lnetwork
+# easiest way to prevent libretro-common from breaking on Haiku
+HAVE_PHYSICAL_CDROM := 0
+else
+LIBDL := -ldl
+endif
+LIBM := -lm
+MMAP_WIN32 = 0
+EXTRA_LDFLAGS =
+
+# select some defaults
+ifneq (,$(findstring $(ARCH_DETECTED),arm aarch64))
+ DYNAREC = ari64
+ifneq (,$(shell $(CC) -E -dD $(CFLAGS) include/arm_features.h | grep 'define __thumb__'))
+ # must disable thumb as ari64 can't handle it
+ CFLAGS += -marm
+endif
+ifneq (,$(shell $(CC) -E -dD $(CFLAGS) include/arm_features.h | grep 'HAVE_NEON32'))
+ BUILTIN_GPU = neon
+endif
+endif
+ifneq (,$(filter $(ARCH_DETECTED),i686 x86_64 arm64 aarch64))
+ BUILTIN_GPU = neon
+endif
+
+# platform specific options
+
+# Unix
+ifeq ($(platform), unix)
+ TARGET := $(TARGET_NAME)_libretro.so
+ fpic := -fPIC
+ THREAD_RENDERING = 1
+ifeq ($(shell uname),Linux)
+ LIGHTREC_CUSTOM_MAP := 1
+endif
+
+# ODROIDN2
+else ifneq (,$(findstring CortexA73_G12B,$(platform)))
+ TARGET := $(TARGET_NAME)_libretro.so
+ fpic := -fPIC
+ ARCH = arm64
+ BUILTIN_GPU = neon
+ HAVE_NEON = 1
+ DYNAREC = ari64
+ CFLAGS += -fomit-frame-pointer -ffast-math -DARM
+ CFLAGS += -march=armv8-a+crc -mcpu=cortex-a73 -mtune=cortex-a73.cortex-a53
+
+# ALLWINNER H5
+else ifneq (,$(findstring h5,$(platform)))
+ TARGET := $(TARGET_NAME)_libretro.so
+ fpic := -fPIC
+ ARCH = arm64
+ BUILTIN_GPU = neon
+ HAVE_NEON = 1
+ DYNAREC = ari64
+ CFLAGS += -fomit-frame-pointer -ffast-math -DARM
+ CFLAGS += -march=armv8-a+crc -mcpu=cortex-a53 -mtune=cortex-a53
+
+else ifeq ($(platform), linux-portable)
+ TARGET := $(TARGET_NAME)_libretro.so
+ fpic := -fPIC -nostdlib
+ EXTRA_LDFLAGS += -fPIC -nostdlib
+ LIBZ :=
+ LIBPTHREAD :=
+ LIBDL :=
+ LIBM :=
+ NO_UNDEF_CHECK = 1
+ LIGHTREC_CUSTOM_MAP := 1
+
+# OS X
+else ifeq ($(platform), osx)
+ DYNAREC := 0
+ TARGET := $(TARGET_NAME)_libretro.dylib
+ fpic := -fPIC
+ MACSOSVER = `sw_vers -productVersion | cut -d. -f 1`
+ OSXVER = `sw_vers -productVersion | cut -d. -f 2`
+ OSX_LT_MAVERICKS = `(( $(OSXVER) <= 9)) && echo "YES"`
+ ifeq ($(OSX_LT_MAVERICKS),YES)
+ fpic += -mmacosx-version-min=10.1
+ endif
+ CFLAGS += $(ARCHFLAGS)
+ CXXFLAGS += $(ARCHFLAGS)
+ LDFLAGS += $(ARCHFLAGS)
+ HAVE_PHYSICAL_CDROM = 0
+ FSECTIONS_LDFLAGS = -Wl,-dead_strip
+ GNU_LINKER = 0
+
+# iOS
+else ifneq (,$(findstring ios,$(platform)))
+ TARGET := $(TARGET_NAME)_libretro_ios.dylib
+ MINVERSION :=
+ifeq ($(DYNAREC),lightrec)
+ # Override
+ DYNAREC := 0
+endif
+ fpic := -fPIC
+
+ ifeq ($(IOSSDK),)
+ IOSSDK := $(shell xcodebuild -version -sdk iphoneos Path)
+ endif
+
+ CFLAGS += -DIOS
+ifeq ($(platform),ios-arm64)
+ ARCH := arm64
+ BUILTIN_GPU = neon
+ HAVE_NEON = 1
+ DYNAREC = 0
+else
+ ARCH := arm
+ HAVE_NEON = 1
+ HAVE_NEON_ASM = 1
+ BUILTIN_GPU = neon
+ CFLAGS += -marm -mcpu=cortex-a8 -mtune=cortex-a8 -mfpu=neon
+ ASFLAGS += -marm -mcpu=cortex-a8 -mtune=cortex-a8 -mfpu=neon
+endif
+ HAVE_PHYSICAL_CDROM = 0
+ CC_AS = perl ./tools/gas-preprocessor.pl $(CC)
+ifeq ($(platform),$(filter $(platform),ios9 ios-arm64))
+ MINVERSION = -miphoneos-version-min=8.0
+else
+ MINVERSION = -miphoneos-version-min=5.0
+endif
+ CFLAGS += $(MINVERSION)
+ FSECTIONS_LDFLAGS = -Wl,-dead_strip
+ GNU_LINKER = 0
+
+else ifeq ($(platform), tvos-arm64)
+ TARGET := $(TARGET_NAME)_libretro_tvos.dylib
+ MINVERSION :=
+ifeq ($(DYNAREC),lightrec)
+ # Override
+ DYNAREC := 0
+endif
+ fpic := -fPIC
+
+ ifeq ($(IOSSDK),)
+ IOSSDK := $(shell xcodebuild -version -sdk appletvos Path)
+ endif
+
+ CFLAGS += -DIOS -DTVOS
+ ARCH := arm64
+ BUILTIN_GPU = neon
+ HAVE_NEON = 1
+ DYNAREC = 0
+ HAVE_PHYSICAL_CDROM = 0
+ CC_AS = perl ./tools/gas-preprocessor.pl $(CC)
+ MINVERSION = -mappletvos-version-min=11.0
+ CFLAGS += $(MINVERSION)
+ FSECTIONS_LDFLAGS = -Wl,-dead_strip
+ GNU_LINKER = 0
+
+# Nintendo Switch (libnx)
+else ifeq ($(platform), libnx)
+ TARGET := $(TARGET_NAME)_libretro_$(platform).a
+ ARCH := arm64
+ HAVE_VFS_FD = 0
+ CFLAGS += -O3 -fomit-frame-pointer -ffast-math -I$(DEVKITPRO)/libnx/include/ -fPIE
+ CFLAGS += -specs=$(DEVKITPRO)/libnx/switch.specs -DNO_DYLIB -D__arm64__ -D__ARM_NEON__
+ CFLAGS += -D__SWITCH__ -DSWITCH -DHAVE_LIBNX
+ CFLAGS += -DARM -D__aarch64__=1 -march=armv8-a -mtune=cortex-a57 -mtp=soft -DHAVE_INTTYPES -DLSB_FIRST -ffast-math -mcpu=cortex-a57+crc+fp+simd
+ CFLAGS += -ftree-vectorize
+ CFLAGS += -Ifrontend/switch
+ NO_POSIX_MEMALIGN := 1
+ NO_PTHREAD=1
+ NO_MMAP := 1 # for psxmem
+ LIBPTHREAD :=
+ WANT_ZLIB = 0
+ PARTIAL_LINKING = 1
+ BUILTIN_GPU = neon
+ HAVE_NEON = 1
+ DYNAREC = ari64
+ HAVE_PHYSICAL_CDROM = 0
+
+# Lakka Switch (arm64)
+else ifeq ($(platform), arm64)
+ TARGET := $(TARGET_NAME)_libretro.so
+ ARCH := arm64
+ BUILTIN_GPU = neon
+ HAVE_NEON = 1
+ DYNAREC = ari64
+ HAVE_PHYSICAL_CDROM = 0
+ fpic := -fPIC
+ CFLAGS := $(filter-out -O2, $(CFLAGS))
+ CFLAGS += -O3 -ftree-vectorize
+
+# Lightweight PS3 Homebrew SDK
+else ifeq ($(platform), psl1ght)
+ TARGET := $(TARGET_NAME)_libretro_psl1ght.a
+ CFLAGS += -DBLARGG_BIG_ENDIAN=1 -D__ppc__
+ CFLAGS += -DNO_DYLIB
+ NO_UNDEF_CHECK := 1
+ STATIC_LINKING := 1
+ NO_MMAP := 1
+ NO_PTHREAD := 1
+ LIBPTHREAD :=
+ LIBDL :=
+ NEED_SYSCONF := 1
+ HAVE_PHYSICAL_CDROM = 0
+ USE_ASYNC_CDROM = 0
+
+# PSP
+else ifeq ($(platform), psp1)
+ TARGET := $(TARGET_NAME)_libretro_psp1.a
+ CFLAGS += -DPSP -G0
+ HAVE_PHYSICAL_CDROM = 0
+
+# Vita
+else ifeq ($(platform), vita)
+ TARGET := $(TARGET_NAME)_libretro_vita.a
+ CFLAGS += -DVITA
+ CFLAGS += -mcpu=cortex-a9 -mtune=cortex-a9 -mfpu=neon -marm
+ CFLAGS += -fsingle-precision-constant -mword-relocations -fno-unwind-tables
+ CFLAGS += -fno-asynchronous-unwind-tables -ftree-vectorize
+ #CFLAGS += -funroll-loops # ~280K bloat
+ CFLAGS += -fno-optimize-sibling-calls # broken arm->thumb tailcalls?
+ CFLAGS += -I$(VITASDK)/include -Ifrontend/vita
+ CFLAGS += -DNO_DYLIB
+ CFLAGS_LAST += -O3
+ ASFLAGS += -mcpu=cortex-a9 -mtune=cortex-a9 -mfpu=neon
+
+ HAVE_NEON = 1
+ HAVE_NEON_ASM = 1
+ BUILTIN_GPU = neon
+
+ DYNAREC = ari64
+ ARCH = arm
+ PARTIAL_LINKING = 1
+ NO_MMAP := 1
+ NO_POSIX_MEMALIGN := 1
+ NDRC_THREAD := 0 # can't write to tc from thread
+ HAVE_PHYSICAL_CDROM = 0
+ EXTRA_EXTERN_SYMS += _newlib_vm_size_user
+
+# CTR(3DS)
+else ifeq ($(platform), ctr)
+ ifeq ($(strip $(CTRULIB)),)
+ $(error "CTRULIB env var is not set")
+ endif
+ TARGET := $(TARGET_NAME)_libretro_ctr.a
+ CFLAGS += -DARM11 -D_3DS -D__3DS__
+ CFLAGS += -DGPU_UNAI_USE_FLOATMATH -DGPU_UNAI_USE_FLOAT_DIV_MULTINV
+ CFLAGS += -march=armv6k -mtune=mpcore -mfloat-abi=hard -marm -mfpu=vfp -mtp=soft
+ CFLAGS += -mword-relocations
+ CFLAGS += -fomit-frame-pointer
+ CFLAGS_LAST += -O3
+ # CFLAGS += -funroll-loops # ~500K of bloat
+ CFLAGS += -Ifrontend/3ds -I$(CTRULIB)/include
+ CFLAGS += -Werror=implicit-function-declaration
+ CFLAGS += -Wno-format
+ CFLAGS += -DHAVE_UNISTD_H
+ CFLAGS += -DZ7_DECL_Int32_AS_long
+ CFLAGS += -DUSE_CTRULIB_2
+ CFLAGS += -DNO_DYLIB
+
+ OBJS += deps/arm-mem/memcpymove-v6l.o
+ OBJS += deps/arm-mem/memset-v6l.o
+ OBJS += frontend/3ds/utils.o
+
+ BUILTIN_GPU = unai
+ THREAD_RENDERING = 1
+ DYNAREC = ari64
+ ARCH = arm
+ HAVE_NEON = 0
+ PARTIAL_LINKING = 1
+ WANT_ZLIB = 0
+ NO_POSIX_MEMALIGN := 1
+ NO_MMAP := 1 # for psxmem
+ HAVE_PHYSICAL_CDROM = 0
+
+# Xbox 360
+else ifeq ($(platform), xenon)
+ TARGET := $(TARGET_NAME)_libretro_xenon360.a
+ CFLAGS += -D__LIBXENON__ -m32 -D__ppc__
+ HAVE_PHYSICAL_CDROM = 0
+
+# Nintendo GC/Wii/WiiU
+else ifneq (,$(filter $(platform),ngc wii wiiu))
+ TARGET := $(TARGET_NAME)_libretro_$(platform).a
+ ifeq ($(platform), ngc)
+ CFLAGS += -DHW_DOL -mogc
+ NEED_SYSCONF := 1
+ else ifeq ($(platform), wii)
+ CFLAGS += -DHW_RVL -mrvl
+ NEED_SYSCONF := 1
+ else ifeq ($(platform), wiiu)
+ # -mwup was removed in newer devkitPPC versions
+ CFLAGS += -DHW_WUP
+ CFLAGS += -I frontend/wiiu
+ CFLAGS += -DZ7_DECL_Int32_AS_long
+ CFLAGS += -Wno-format
+ LIGHTREC_CUSTOM_MAP := 1
+ LIGHTREC_CUSTOM_MAP_OBJ := libpcsxcore/lightrec/mem_wiiu.o
+ LIGHTREC_CODE_INV := 1
+ endif
+ ARCH = powerpc
+ BUILTIN_GPU = peops
+ CFLAGS += -D__ppc__ -D__powerpc__
+ CFLAGS += -DGEKKO -mcpu=750 -meabi -mhard-float
+ CFLAGS += -DNO_DYLIB
+ STATIC_LINKING := 1
+ THREAD_RENDERING := 0
+ NO_PTHREAD := 1
+ NO_MMAP := 1
+ NO_POSIX_MEMALIGN := 1
+ LIBDL :=
+ LIBPTHREAD :=
+ LIBRT :=
+ HAVE_PHYSICAL_CDROM = 0
+ USE_ASYNC_CDROM = 0
+
+# QNX
+else ifeq ($(platform), qnx)
+ TARGET := $(TARGET_NAME)_libretro_qnx.so
+ fpic := -fPIC
+ HAVE_NEON = 1
+ HAVE_NEON_ASM = 1
+ DYNAREC = ari64
+ BUILTIN_GPU = neon
+ ARCH = arm
+ CFLAGS += -D__BLACKBERRY_QNX__ -marm -mcpu=cortex-a9 -mtune=cortex-a9 -mfpu=neon -mfloat-abi=softfp
+ ASFLAGS += -mcpu=cortex-a9 -mfpu=neon -mfloat-abi=softfp
+ MAIN_LDLIBS += -lsocket
+ LIBPTHREAD :=
+ LIBDL :=
+ LIBM :=
+ HAVE_PHYSICAL_CDROM = 0
+
+#Raspberry Pi 1
+else ifeq ($(platform), rpi1)
+ TARGET := $(TARGET_NAME)_libretro.so
+ fpic := -fPIC
+ CFLAGS += -marm -mcpu=arm1176jzf-s -mfpu=vfp -mfloat-abi=hard
+ ASFLAGS += -mcpu=arm1176jzf-s -mfpu=vfp -mfloat-abi=hard
+ HAVE_NEON = 0
+ ARCH = arm
+ BUILTIN_GPU = unai
+ DYNAREC = ari64
+
+#Raspberry Pi 2
+else ifeq ($(platform), rpi2)
+ TARGET := $(TARGET_NAME)_libretro.so
+ fpic := -fPIC
+ CFLAGS += -marm -mcpu=cortex-a7 -mfpu=neon-vfpv4 -mfloat-abi=hard
+ ASFLAGS += -mcpu=cortex-a7 -mfpu=neon-vfpv4 -mfloat-abi=hard
+ HAVE_NEON = 1
+ HAVE_NEON_ASM = 1
+ ARCH = arm
+ BUILTIN_GPU = neon
+ DYNAREC = ari64
+
+#Raspberry Pi 3
+else ifeq ($(platform), rpi3)
+ TARGET := $(TARGET_NAME)_libretro.so
+ fpic := -fPIC
+ CFLAGS += -marm -mcpu=cortex-a53 -mfpu=neon-fp-armv8 -mfloat-abi=hard
+ ASFLAGS += -mcpu=cortex-a53 -mfpu=neon-fp-armv8 -mfloat-abi=hard
+ HAVE_NEON = 1
+ HAVE_NEON_ASM = 1
+ ARCH = arm
+ BUILTIN_GPU = neon
+ DYNAREC = ari64
+
+#Raspberry Pi 3 with a 64bit GNU/Linux OS
+else ifeq ($(platform), rpi3_64)
+ TARGET := $(TARGET_NAME)_libretro.so
+ ARCH := arm64
+ BUILTIN_GPU = neon
+ HAVE_NEON = 1
+ DYNAREC = ari64
+ fpic := -fPIC
+ CFLAGS += -march=armv8-a+crc+simd -mtune=cortex-a53 -ftree-vectorize
+
+#Raspberry Pi 4 with a 32bit GNU/Linux OS
+else ifeq ($(platform), rpi4)
+ TARGET := $(TARGET_NAME)_libretro.so
+ fpic := -fPIC
+ CFLAGS += -marm -mcpu=cortex-a72 -mfpu=neon-fp-armv8 -mfloat-abi=hard
+ ASFLAGS += -mcpu=cortex-a72 -mfpu=neon-fp-armv8 -mfloat-abi=hard
+ HAVE_NEON = 1
+ HAVE_NEON_ASM = 1
+ ARCH = arm
+ BUILTIN_GPU = neon
+ DYNAREC = ari64
+
+#Raspberry Pi 4 with a 64bit GNU/Linux OS
+else ifeq ($(platform), rpi4_64)
+ TARGET := $(TARGET_NAME)_libretro.so
+ ARCH := arm64
+ BUILTIN_GPU = neon
+ HAVE_NEON = 1
+ DYNAREC = ari64
+ fpic := -fPIC
+ CFLAGS += -march=armv8-a+crc+simd -mtune=cortex-a72 -ftree-vectorize
+
+# Classic Platforms ####################
+# Platform affix = classic_<ISA>_<µARCH>
+# Help at https://modmyclassic.com/comp
+
+# (armv7 a7, hard point, neon based) ###
+# NESC, SNESC, C64 mini
+else ifeq ($(platform), classic_armv7_a7)
+ TARGET := $(TARGET_NAME)_libretro.so
+ fpic := -fPIC
+ CFLAGS += -Ofast \
+ -flto=auto -fuse-linker-plugin \
+ -fno-stack-protector -fno-ident -fomit-frame-pointer \
+ -falign-functions=1 -falign-jumps=1 -falign-loops=1 \
+ -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-unroll-loops \
+ -fmerge-all-constants -fno-math-errno \
+ -marm -mtune=cortex-a7 -mfpu=neon-vfpv4 -mfloat-abi=hard
+ CXXFLAGS += $(CFLAGS)
+ CPPFLAGS += $(CFLAGS)
+ ASFLAGS += $(CFLAGS)
+ HAVE_NEON = 1
+ HAVE_NEON_ASM = 1
+ ARCH = arm
+ BUILTIN_GPU = neon
+ DYNAREC = ari64
+ ifeq ($(shell echo `$(CC) -dumpversion` "< 4.9" | bc -l), 1)
+ CFLAGS += -march=armv7-a
+ else
+ CFLAGS += -march=armv7ve
+ # If gcc is 5.0 or later
+ ifeq ($(shell echo `$(CC) -dumpversion` ">= 5" | bc -l), 1)
+ LDFLAGS += -static-libgcc -static-libstdc++
+ endif
+ endif
+
+# (armv8 a35, hard point, neon based) ###
+# PlayStation Classic
+else ifeq ($(platform), classic_armv8_a35)
+ TARGET := $(TARGET_NAME)_libretro.so
+ fpic := -fPIC
+ CFLAGS += -Ofast \
+ -fmerge-all-constants -fno-math-errno \
+ -fno-stack-protector -fomit-frame-pointer \
+ -marm -mcpu=cortex-a35 -mtune=cortex-a35 -mfpu=neon-fp-armv8 -mfloat-abi=hard
+ HAVE_NEON = 1
+ HAVE_NEON_ASM = 1
+ ARCH = arm
+ BUILTIN_GPU = neon
+ DYNAREC = ari64
+ LDFLAGS += -static-libgcc -static-libstdc++ -fPIC
+
+#######################################
+
+# ARM
+else ifneq (,$(findstring armv,$(platform)))
+ TARGET := $(TARGET_NAME)_libretro.so
+ fpic := -fPIC
+ HAVE_NEON = 0
+ BUILTIN_GPU = peops
+ ifneq (,$(findstring cortexa8,$(platform)))
+ CFLAGS += -mcpu=cortex-a8
+ ASFLAGS += -mcpu=cortex-a8
+ else ifneq (,$(findstring cortexa7,$(platform)))
+ CFLAGS += -mcpu=cortex-a7
+ ASFLAGS += -mcpu=cortex-a7
+ LIBZ :=
+ else ifneq (,$(findstring cortexa9,$(platform)))
+ CFLAGS += -mcpu=cortex-a9
+ ASFLAGS += -mcpu=cortex-a9
+ endif
+ CFLAGS += -marm
+ ifneq (,$(findstring neon,$(platform)))
+ CFLAGS += -mfpu=neon
+ ASFLAGS += -mfpu=neon
+ HAVE_NEON = 1
+ HAVE_NEON_ASM = 1
+ BUILTIN_GPU = neon
+ endif
+ ifneq (,$(findstring softfloat,$(platform)))
+ CFLAGS += -mfloat-abi=softfp
+ ASFLAGS += -mfloat-abi=softfp
+ else ifneq (,$(findstring hardfloat,$(platform)))
+ CFLAGS += -mfloat-abi=hard
+ ASFLAGS += -mfloat-abi=hard
+ endif
+ ARCH = arm
+ DYNAREC = ari64
+
+else ifeq ($(platform), miyoo)
+ TARGET := $(TARGET_NAME)_libretro.so
+ fpic := -fPIC
+ CFLAGS += -mcpu=arm926ej-s -fsingle-precision-constant
+ CFLAGS += -DGPU_UNAI_USE_INT_DIV_MULTINV -D_MIYOO
+ ARCH = arm
+ BUILTIN_GPU = unai
+ DYNAREC = ari64
+ HAVE_NEON = 0
+
+# Emscripten
+else ifeq ($(platform), emscripten)
+ TARGET := $(TARGET_NAME)_libretro_$(platform).bc
+ pthread ?= 0
+ fpic := -fPIC
+ NO_MMAP = 1
+ # we can use -lz for emscripten's built-in zlib port
+ WANT_ZLIB=0
+ CFLAGS += -DNO_DYLIB -DNO_SOCKET
+ CFLAGS += -msimd128 -ftree-vectorize
+ # when compiling with pthreads...
+ ifneq ($(pthread), 0)
+ # use -lpthread
+ LIBPTHREAD := -lpthread
+ NO_PTHREAD=0
+ # but we don't want to include libretro-common's rthread object files here
+ USE_RTHREADS=0
+ USE_ASYNC_CDROM=0
+ # so we disable some uses of threads within pcsx_rearmed.
+ # is this a good solution? I don't know!
+ else
+ LIBPTHREAD :=
+ USE_RTHREADS=0
+ USE_ASYNC_CDROM=0
+ NO_PTHREAD=1
+ endif
+ DYNAREC =
+ STATIC_LINKING = 1
+ HAVE_PHYSICAL_CDROM = 0
+
+# Windows
+else
+ TARGET := $(TARGET_NAME)_libretro.dll
+ PLATFORM = libretro
+ MAIN_LDFLAGS += -static-libgcc -static-libstdc++
+ifneq ($(DEBUG),1)
+ MAIN_LDFLAGS += -s
+endif
+ CFLAGS += -D__WIN32__ -DNO_DYLIB
+ MMAP_WIN32=1
+ NO_PTHREAD=1
+ MAIN_LDLIBS += -lws2_32
+ LIBPTHREAD :=
+ LIBDL :=
+ LIBM :=
+ USE_LIBRETRO_VFS = 1
+endif
+
+CFLAGS += $(fpic)
+MAIN_LDFLAGS += -shared
+MAIN_LDLIBS += $(LIBPTHREAD) $(LIBM) $(LIBDL) $(LIBZ)
+
+# enable large file support if available
+ifeq ($(shell $(CC) -E -dD $(CFLAGS) include/arm_features.h | grep __SIZEOF_LONG__ | awk '{print $$3}'),4)
+CFLAGS += -D_FILE_OFFSET_BITS=64
+endif
+
+# try to autodetect stuff for the lazy
+ifndef ARCH
+ARCH = $(ARCH_DETECTED)
+endif
+ifndef HAVE_NEON_ASM
+# asm for 32bit only
+HAVE_NEON_ASM = $(shell $(CC) -E -dD $(CFLAGS) include/arm_features.h | grep -q HAVE_NEON32 && echo 1 || echo 0)
+endif
+ifeq ($(NO_UNDEF_CHECK)$(shell $(LD) -v 2> /dev/null | awk '{print $$1}'),GNU)
+ ifeq (,$(findstring $(platform),win32))
+ MAIN_LDFLAGS += -Wl,-version-script=frontend/libretro-version-script
+ endif
+MAIN_LDFLAGS += -Wl,--no-undefined
+endif
+ifdef ALLOW_LIGHTREC_ON_ARM
+CFLAGS += -DALLOW_LIGHTREC_ON_ARM
+endif
+ifeq ($(BUILTIN_GPU),neon)
+ifneq (,$(findstring $(ARCH),x86 i686))
+ CFLAGS_GPU_NEON ?= -msse2 # required
+endif
+ifeq ($(ARCH),x86_64)
+ CFLAGS_GPU_NEON ?= -mssse3 # optional, for more perf
+endif
+CFLAGS += $(CFLAGS_GPU_NEON)
+endif
+
+TARGET ?= libretro.so
+PLATFORM = libretro
+BUILTIN_GPU ?= peops
+SOUND_DRIVERS = libretro
+PLUGINS =
+NO_CONFIG_MAK = yes
+
+$(info TARGET: $(TARGET))
+$(info platform: $(platform))
+$(info ARCH: $(ARCH))
+$(info DYNAREC: $(DYNAREC))
+$(info BUILTIN_GPU: $(BUILTIN_GPU))
+$(info CC: $(CC) : $(shell $(CC) --version | head -1))
+$(info CFLAGS: $(CFLAGS))
+$(info MAIN_LDLIBS: $(MAIN_LDLIBS))
+$(info )
+
+include Makefile
+
+# no special AS needed for gpu_neon
+plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.o: plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
+ $(CC) $(CFLAGS) -c $^ -o $@
check_define()
{
- $CC -E -dD $CFLAGS include/arm_features.h | grep -q "$1" || return 1
+ $CC -E -dD $CFLAGS include/arm_features.h | grep -v '#undef' | grep -q "$1" || return 1
return 0
}
check_define_val()
{
- $CC -E -dD $CFLAGS include/arm_features.h | grep "$1" | awk '{print $3}'
+ $CC -E -dD $CFLAGS include/arm_features.h | grep -v '#undef' | grep "$1" | awk '{print $3}'
return 0
}
# setting options to "yes" or "no" will make that choice default,
# "" means "autodetect".
-platform_list="generic pandora maemo caanoo libretro"
+platform_list="generic pandora maemo caanoo"
platform="generic"
-builtin_gpu_list="neon peops unai unai_old"
+builtin_gpu_list="neon peops unai"
+dynarec_list="ari64 lightrec none"
builtin_gpu=""
-sound_driver_list="oss alsa pulseaudio sdl libretro"
+sound_driver_list="oss alsa pulseaudio sdl"
sound_drivers=""
-plugins="plugins/spunull/spunull.so \
-plugins/dfxvideo/gpu_peops.so plugins/gpu_unai_old/gpu_unai_old.so plugins/gpu_unai/gpu_unai.so"
+plugins=""
drc_cache_base="no"
have_armv5=""
have_armv6=""
have_armv7=""
have_arm_neon=""
have_arm_neon_asm=""
+have_neon_gpu=""
have_tslib=""
+have_evdev=""
have_gles=""
have_c64x_dsp=""
-enable_dynarec="yes"
+have_fsections="yes"
+have_dynamic="yes"
+gnu_linker="yes"
+dynarec=""
+multithreading="yes"
need_sdl="no"
need_xlib="no"
need_libpicofe="yes"
optimize_arm926ej="no"
# hardcoded stuff
-if [ "${OSTYPE}" = "FreeBSD" ]; then
- CC="clang"
- CXX="clang++"
- CFLAGS="-I/usr/local/include -L/usr/local/lib"
- MAKE=gmake
-else
- CC="${CC-${CROSS_COMPILE}gcc}"
- CXX="${CXX-${CROSS_COMPILE}g++}"
-fi
+case "${OSTYPE}" in
+ *BSD*|*darwin*)
+ CC="clang"
+ CXX="clang++"
+ if test -d /opt/local/include; then
+ # MacPorts
+ CFLAGS="$CFLAGS -I/opt/local/include"
+ LDFLAGS="$LDFLAGS -L/opt/local/lib"
+ fi
+ if test -d /opt/local/include; then
+ CFLAGS="$CFLAGS -I/usr/local/include"
+ LDFLAGS="$LDFLAGS -L/usr/local/lib"
+ fi
+ MAKE=gmake
+ gnu_linker="no"
+ ;;
+ *)
+ CC="${CC-${CROSS_COMPILE}gcc}"
+ CXX="${CXX-${CROSS_COMPILE}g++}"
+ ;;
+esac
AS="${AS-${CROSS_COMPILE}as}"
AR="${AS-${CROSS_COMPILE}ar}"
MAIN_LDLIBS="$LDLIBS -ldl -lm -lpthread"
config_mak="config.mak"
-if [ "${OSTYPE}" = "FreeBSD" ]; then
- SYSROOT="$sysroot"
- [ "x${SDL_CONFIG}" = "x" ] && SDL_CONFIG="${SYSROOT}/usr/local/bin/sdl-config"
-else
- SYSROOT="$(${CC} --print-sysroot)"
- [ "x${SDL_CONFIG}" = "x" ] && SDL_CONFIG="${SYSROOT}/usr/bin/sdl-config"
-fi
+case "${OSTYPE}" in
+ *BSD*|*darwin*)
+ SYSROOT="$sysroot"
+ [ "x${SDL_CONFIG}" = "x" ] && SDL_CONFIG="${SYSROOT}/usr/local/bin/sdl-config"
+ ;;
+ *)
+ SYSROOT="$(${CC} --print-sysroot 2> /dev/null || true)"
+ [ "x${SDL_CONFIG}" = "x" ] && SDL_CONFIG="${SYSROOT}/usr/bin/sdl-config"
+ ;;
+esac
fail()
{
drc_cache_base="yes"
optimize_cortexa8="yes"
have_arm_neon="yes"
+ have_neon_gpu="yes"
need_xlib="yes"
+ multithreading="no"
;;
maemo)
drc_cache_base="yes"
optimize_cortexa8="yes"
have_arm_neon="yes"
+ have_neon_gpu="yes"
+ multithreading="no"
;;
caanoo)
sound_drivers="oss"
drc_cache_base="yes"
optimize_arm926ej="yes"
need_warm="yes"
- ;;
- libretro)
- sound_drivers="libretro"
- need_libpicofe="no"
+ multithreading="no"
;;
*)
fail "unsupported platform: $platform"
;;
--disable-neon) have_arm_neon="no"
;;
- --disable-dynarec) enable_dynarec="no"
+ --enable-threads) multithreading="yes"
+ ;;
+ --disable-threads) multithreading="no"
+ ;;
+ --enable-dynamic) have_dynamic="yes"
+ ;;
+ --disable-dynamic) have_dynamic="no"
+ ;;
+ --dynarec=*) dynarec="$optarg"
+ ;;
+ --disable-dynarec) dynarec="no"
;;
*) echo "ERROR: unknown option $opt"; show_help="yes"
;;
echo " available: $sound_driver_list"
echo " --enable-neon"
echo " --disable-neon enable/disable ARM NEON optimizations [guessed]"
- echo " --disable-dynarec disable dynamic recompiler"
- echo " (dynarec is only available and enabled on ARM)"
+ echo " --enable-threads"
+ echo " --disable-threads enable/disable multithreaded features [guessed]"
+ echo " --enable-dynamic"
+ echo " --disable-dynamic enable/disable dynamic loading obj. eg.plugins [guessed]"
+ echo " --dynarec=NAME select dynamic recompiler [guessed]"
+ echo " available: $dynarec_list"
echo "influential environment variables:"
echo " CROSS_COMPILE CC CXX AS AR CFLAGS ASFLAGS LDFLAGS LDLIBS"
exit 1
fi
case "$ARCH" in
+aarch64|arm64)
+ have_arm_neon="yes"
+ have_neon_gpu="yes"
+ have_arm_neon_asm="no"
+ if [ "x$dynarec" = "x" ]; then
+ dynarec="ari64"
+ fi
+ ;;
arm*)
# ARM stuff
ARCH="arm"
have_armv5=`check_define HAVE_ARMV5 && echo yes` || true
fi
- if [ "x$builtin_gpu" = "x" ]; then
- if [ "$have_arm_neon" = "yes" ]; then
- builtin_gpu="neon"
- elif [ "$have_armv7" != "yes" ]; then
- # pre-ARMv7 hardware is usually not fast enough for peops
- builtin_gpu="unai_old"
- else
- builtin_gpu="peops"
- fi
+ if [ "x$dynarec" = "x" ]; then
+ dynarec="ari64"
fi
- # automatically set mfpu and mfloat-abi if they are not set
if [ "$have_arm_neon" = "yes" ]; then
+ have_neon_gpu="yes"
fpu="neon"
elif [ "$have_armv6" = "yes" ]; then
fpu="vfp"
fi
+ # automatically set mfpu and mfloat-abi if they are not set
if [ "x$fpu" != "x" ]; then
echo "$CFLAGS" | grep -q -- '-mfpu=' || CFLAGS="$CFLAGS -mfpu=$fpu"
echo "$ASFLAGS" | grep -q -- '-mfpu=' || ASFLAGS="$ASFLAGS -mfpu=$fpu"
fi
have_arm_neon_asm=$have_arm_neon
;;
-aarch64)
- have_arm_neon="yes"
- have_arm_neon_asm="no"
- if [ "x$builtin_gpu" = "x" ]; then
- builtin_gpu="neon"
- fi
- ;;
x86_64)
- enable_dynarec="no"
- if [ "x$builtin_gpu" = "x" ]; then
- builtin_gpu="neon"
+ if [ "x$dynarec" = "x" ]; then
+ dynarec="lightrec"
fi
+ have_neon_gpu="yes"
;;
*)
- # dynarec only available on ARM
- enable_dynarec="no"
+ if [ "x$dynarec" = "x" ]; then
+ dynarec="lightrec"
+ fi
;;
esac
if [ "x$builtin_gpu" = "x" ]; then
- builtin_gpu="peops"
+ if [ "$have_neon_gpu" = "yes" ]; then
+ builtin_gpu="neon"
+ elif [ "$ARCH" = "arm" -a "$have_armv7" != "yes" ]; then
+ # pre-ARMv7 hardware is usually not fast enough for peops
+ builtin_gpu="unai"
+ else
+ builtin_gpu="peops"
+ fi
fi
# supposedly we can avoid -fPIC on armv5 for slightly better performace?
if [ "$ARCH" != "arm" -o "$have_armv6" = "yes" ]; then
PLUGIN_CFLAGS="$PLUGIN_CFLAGS -fPIC"
+else
+ PLUGIN_CFLAGS="$PLUGIN_CFLAGS -fno-PIC"
fi
case "$platform" in
maemo)
CFLAGS="$CFLAGS -DMAEMO -DMAEMO_CHANGES"
;;
-libretro)
- CFLAGS="$CFLAGS -fPIC"
- MAIN_LDFLAGS="$MAIN_LDFLAGS -shared -Wl,--no-undefined"
- ;;
esac
# header/library presence tests
{
cat > $TMPC <<EOF
#include <SDL.h>
- void main() { SDL_OpenAudio(0, 0); }
+ int main(int argc, char *argv[]) { SDL_OpenAudio(0, 0); }
EOF
compile_binary "$@"
}
fi
fi
-# check for VideoCore stuff for Raspberry Pi
-if [ -d /opt/vc/include -a -d /opt/vc/lib -a "$VIDEOCORE" != "no" ]; then
- CFLAGS_GLES="$CFLAGS_GLES -I/opt/vc/include -I/opt/vc/include/interface/vcos/pthreads -I/opt/vc/include/interface/vmcs_host/linux"
- LDLIBS_GLES="$LDLIBS_GLES -L/opt/vc/lib"
- if [ -f /opt/vc/lib/libbcm_host.so ]; then
- LDLIBS_GLES="$LDLIBS_GLES -lbcm_host"
+# evdev
+if [ "x$have_evdev" = "x" ]; then
+ cat > $TMPC <<EOF
+ #include <linux/input.h>
+EOF
+ have_evdev="no"
+ if compile_object; then
+ have_evdev="yes"
+ else
+ cat > $TMPC <<EOF
+ #include <dev/evdev/input.h>
+EOF
+ if compile_object; then
+ have_evdev="yes"
+ fi
fi
- need_xlib="yes"
- VIDEOCORE="yes"
fi
# check for GLES headers
return (int)eglGetDisplay( (EGLNativeDisplayType)0 );
}
EOF
-if [ "$VIDEOCORE" = "yes" ] && compile_binary $CFLAGS_GLES -lbrcmEGL -lbrcmGLESv2 $LDLIBS_GLES; then
- have_gles="yes"
- LDLIBS_GLES="-lbrcmEGL -lbrcmGLESv2 $LDLIBS_GLES"
-elif compile_binary $CFLAGS_GLES -lEGL -lGLES_CM $LDLIBS_GLES; then
+if compile_binary $CFLAGS_GLES -lEGL -lGLES_CM $LDLIBS_GLES; then
have_gles="yes"
LDLIBS_GLES="-lEGL -lGLES_CM $LDLIBS_GLES"
elif compile_binary $CFLAGS_GLES -lEGL -lGLESv1_CM $LDLIBS_GLES; then
have_gles="yes"
LDLIBS_GLES="-lEGL -lGLESv1_CM $LDLIBS_GLES"
+elif compile_object $CFLAGS_GLES; then
+ have_gles="yes"
fi
if check_c64_tools; then
have_c64x_dsp="yes"
fi
-if [ "$have_gles" = "yes" ]; then
- plugins="$plugins plugins/gpu-gles/gpu_gles.so"
-fi
-if [ "$have_arm_neon" = "yes" -a "$builtin_gpu" != "neon" ]; then
- plugins="$plugins plugins/gpu_neon/gpu_neon.so"
+# declare available dynamic plugins
+if [ "$have_dynamic" = "yes" ]; then
+ plugins="plugins/spunull/spunull.so"
+
+ if [ "$builtin_gpu" != "peops" ]; then
+ plugins="$plugins plugins/dfxvideo/gpu_peops.so"
+ fi
+ if [ "$builtin_gpu" != "unai" ]; then
+ plugins="$plugins plugins/gpu_unai/gpu_unai.so"
+ fi
+ if [ "$have_gles" = "yes" -a "x$LDLIBS_GLES" != "x" ]; then
+ plugins="$plugins plugins/gpu-gles/gpu_gles.so"
+ fi
+ if [ "$have_neon_gpu" = "yes" -a "$builtin_gpu" != "neon" ]; then
+ plugins="$plugins plugins/gpu_neon/gpu_neon.so"
+ fi
+else
+ have_dynamic="no"
+ CFLAGS="$CFLAGS -DNO_DYLIB"
fi
# check for xlib (only headers needed)
echo "C compiler flags $CFLAGS"
echo "libraries $MAIN_LDLIBS"
echo "linker flags $LDFLAGS$MAIN_LDFLAGS"
-echo "enable dynarec $enable_dynarec"
+echo "dynarec $dynarec"
if [ "$ARCH" = "arm" -o "$ARCH" = "aarch64" ]; then
echo "enable ARM NEON $have_arm_neon"
fi
echo "ARMv7 optimizations $have_armv7"
echo "TI C64x DSP support $have_c64x_dsp"
fi
-echo "tslib support $have_tslib"
+if [ "$have_dynamic" = "yes" ]; then
+ echo "tslib support $have_tslib"
+else
+ echo "tslib does NOT support staticly linked build"
+fi
if [ "$platform" = "generic" ]; then
echo "OpenGL ES output $have_gles"
fi
+echo "multithreading $multithreading"
echo "# Automatically generated by configure" > $config_mak
printf "# Configured with:" >> $config_mak
echo "PLUGIN_CFLAGS += $PLUGIN_CFLAGS" >> $config_mak
echo >> $config_mak
-if [ "$platform" = "libretro" ]; then
- echo "TARGET = libretro.so" >> $config_mak
-fi
echo "ARCH = $ARCH" >> $config_mak
echo "PLATFORM = $platform" >> $config_mak
echo "BUILTIN_GPU = $builtin_gpu" >> $config_mak
echo "SOUND_DRIVERS = $sound_drivers" >> $config_mak
echo "PLUGINS = $plugins" >> $config_mak
+if [ "$have_neon_gpu" = "yes" ]; then
+ echo "HAVE_NEON_GPU = 1" >> $config_mak
+fi
if [ "$have_arm_neon" = "yes" ]; then
echo "HAVE_NEON = 1" >> $config_mak
fi
if [ "$have_arm_neon_asm" = "yes" ]; then
echo "HAVE_NEON_ASM = 1" >> $config_mak
fi
-if [ "$have_tslib" = "yes" ]; then
+if [ "$have_tslib" = "yes" -a "$have_dynamic" = "yes" ]; then
echo "HAVE_TSLIB = 1" >> $config_mak
fi
+if [ "$have_evdev" = "yes" ]; then
+ echo "HAVE_EVDEV = 1" >> $config_mak
+fi
if [ "$have_gles" = "yes" ]; then
echo "HAVE_GLES = 1" >> $config_mak
echo "CFLAGS_GLES = $CFLAGS_GLES" >> $config_mak
echo "LDLIBS_GLES = $LDLIBS_GLES" >> $config_mak
fi
-if [ "$enable_dynarec" = "yes" ]; then
- echo "USE_DYNAREC = 1" >> $config_mak
+if [ "$have_fsections" = "no" ]; then
+ echo "NO_FSECTIONS = 1" >> $config_mak
+fi
+if [ "$gnu_linker" = "yes" ]; then
+ echo "GNU_LINKER = 1" >> $config_mak
fi
+echo "DYNAREC = $dynarec" >> $config_mak
if [ "$drc_cache_base" = "yes" ]; then
echo "BASE_ADDR_DYNAMIC = 1" >> $config_mak
fi
if [ "$have_c64x_dsp" = "yes" ]; then
echo "HAVE_C64_TOOLS = 1" >> $config_mak
fi
+if [ "$multithreading" = "yes" ]; then
+ echo "USE_ASYNC_CDROM = 1" >> $config_mak
+ echo "NDRC_THREAD = 1" >> $config_mak
+fi
# use pandora's skin (for now)
test -e skin || ln -s frontend/pandora/skin skin
-Subproject commit 5c598c2df3a7717552a76410d79f5af01ff51b1d
+Subproject commit b3974651d869c2f804e9879b063c23280d2ae617
--- /dev/null
+Subproject commit 0abedaac6a795c093f2e1a22f3028fca9efdf3c9
--- /dev/null
+Subproject commit a6bb2b5a7cf36e074e12ccaed32990b437deb784
--- /dev/null
+Subproject commit e38798e0154434bc797b24f8e15f770e0ba5faec
--- /dev/null
+Subproject commit 2d1c576e62b99e85d99407e1a88794c6e44c3310
--- /dev/null
+#ifndef _3DS_UTILS_H
+#define _3DS_UTILS_H
+
+#ifndef USE_CTRULIB_2
+#error CTRULIB_2 is required
+#endif
+
+#define MEMOP_PROT 6
+#define MEMOP_MAP 4
+#define MEMOP_UNMAP 5
+
+#define DEBUG_HOLD() do{printf("%s@%s:%d.\n",__FUNCTION__, __FILE__, __LINE__);fflush(stdout);wait_for_input();}while(0)
+
+void wait_for_input(void);
+void ctr_clear_cache(void);
+void ctr_clear_cache_range(void *start, void *end);
+void ctr_invalidate_icache(void); // only icache
+int ctr_get_tlbdesc(void *ptr);
+
+int svcCustomBackdoor(void *callback, void *a0, void *a1, void *a2);
+int svcConvertVAToPA(const void *VA, int writeCheck);
+
+extern __attribute__((weak)) int __ctr_svchax;
+
+#endif // _3DS_UTILS_H
--- /dev/null
+/* Copyright (C) 2010-2020 The RetroArch team
+ *
+ * ---------------------------------------------------------------------------------------
+ * The following license statement only applies to this file (gx_pthread.h).
+ * ---------------------------------------------------------------------------------------
+ *
+ * Permission is hereby granted, free of charge,
+ * to any person obtaining a copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _CTR_PTHREAD_WRAP_CTR_
+#define _CTR_PTHREAD_WRAP_CTR_
+
+#include <3ds/thread.h>
+#include <3ds/services/apt.h>
+#include "3ds_utils.h"
+
+#include <sys/time.h>
+#include <time.h>
+#include <errno.h>
+
+#define STACKSIZE (4 * 1024)
+#define FALSE 0
+
+#ifndef PTHREAD_SCOPE_PROCESS
+/* An earlier version of devkitARM does not define the pthread types. Can remove in r54+. */
+
+typedef Thread pthread_t;
+typedef LightLock pthread_mutex_t;
+typedef void* pthread_mutexattr_t;
+typedef int pthread_attr_t;
+typedef LightEvent pthread_cond_t;
+typedef int pthread_condattr_t;
+#endif
+
+#ifndef USE_CTRULIB_2
+/* Backported CondVar API from libctru 2.0, and under its license:
+ https://github.com/devkitPro/libctru
+ Slightly modified for compatibility with older libctru. */
+
+typedef s32 CondVar;
+
+static inline Result syncArbitrateAddress(s32* addr, ArbitrationType type, s32 value)
+{
+ return svcArbitrateAddress(__sync_get_arbiter(), (u32)addr, type, value, 0);
+}
+
+static inline Result syncArbitrateAddressWithTimeout(s32* addr, ArbitrationType type, s32 value, s64 timeout_ns)
+{
+ return svcArbitrateAddress(__sync_get_arbiter(), (u32)addr, type, value, timeout_ns);
+}
+
+static inline void __dmb(void)
+{
+ __asm__ __volatile__("mcr p15, 0, %[val], c7, c10, 5" :: [val] "r" (0) : "memory");
+}
+
+static inline void CondVar_BeginWait(CondVar* cv, LightLock* lock)
+{
+ s32 val;
+ do
+ val = __ldrex(cv) - 1;
+ while (__strex(cv, val));
+ LightLock_Unlock(lock);
+}
+
+static inline bool CondVar_EndWait(CondVar* cv, s32 num_threads)
+{
+ bool hasWaiters;
+ s32 val;
+
+ do {
+ val = __ldrex(cv);
+ hasWaiters = val < 0;
+ if (hasWaiters)
+ {
+ if (num_threads < 0)
+ val = 0;
+ else if (val <= -num_threads)
+ val += num_threads;
+ else
+ val = 0;
+ }
+ } while (__strex(cv, val));
+
+ return hasWaiters;
+}
+
+static inline void CondVar_Init(CondVar* cv)
+{
+ *cv = 0;
+}
+
+static inline void CondVar_Wait(CondVar* cv, LightLock* lock)
+{
+ CondVar_BeginWait(cv, lock);
+ syncArbitrateAddress(cv, ARBITRATION_WAIT_IF_LESS_THAN, 0);
+ LightLock_Lock(lock);
+}
+
+static inline int CondVar_WaitTimeout(CondVar* cv, LightLock* lock, s64 timeout_ns)
+{
+ CondVar_BeginWait(cv, lock);
+
+ bool timedOut = false;
+ Result rc = syncArbitrateAddressWithTimeout(cv, ARBITRATION_WAIT_IF_LESS_THAN_TIMEOUT, 0, timeout_ns);
+ if (R_DESCRIPTION(rc) == RD_TIMEOUT)
+ {
+ timedOut = CondVar_EndWait(cv, 1);
+ __dmb();
+ }
+
+ LightLock_Lock(lock);
+ return timedOut;
+}
+
+static inline void CondVar_WakeUp(CondVar* cv, s32 num_threads)
+{
+ __dmb();
+ if (CondVar_EndWait(cv, num_threads))
+ syncArbitrateAddress(cv, ARBITRATION_SIGNAL, num_threads);
+ else
+ __dmb();
+}
+
+static inline void CondVar_Signal(CondVar* cv)
+{
+ CondVar_WakeUp(cv, 1);
+}
+
+static inline void CondVar_Broadcast(CondVar* cv)
+{
+ CondVar_WakeUp(cv, ARBITRATION_SIGNAL_ALL);
+}
+/* End libctru 2.0 backport */
+#endif
+
+/* libctru threads return void but pthreads return void pointer */
+static bool mutex_inited = false;
+static LightLock safe_double_thread_launch;
+static void *(*start_routine_jump)(void*);
+
+static void ctr_thread_launcher(void* data)
+{
+ void *(*start_routine_jump_safe)(void*) = start_routine_jump;
+ LightLock_Unlock(&safe_double_thread_launch);
+ start_routine_jump_safe(data);
+}
+
+static inline int pthread_create(pthread_t *thread,
+ const pthread_attr_t *attr, void *(*start_routine)(void*), void *arg)
+{
+ s32 prio = 0;
+ Thread new_ctr_thread;
+ int procnum = -2; // use default cpu
+ bool isNew3DS;
+
+ APT_CheckNew3DS(&isNew3DS);
+
+ if (isNew3DS)
+ procnum = 2;
+
+ if (!mutex_inited)
+ {
+ LightLock_Init(&safe_double_thread_launch);
+ mutex_inited = true;
+ }
+
+ /*Must wait if attempting to launch 2 threads at once to prevent corruption of function pointer*/
+ while (LightLock_TryLock(&safe_double_thread_launch) != 0);
+
+ svcGetThreadPriority(&prio, CUR_THREAD_HANDLE);
+
+ start_routine_jump = start_routine;
+ new_ctr_thread = threadCreate(ctr_thread_launcher, arg, STACKSIZE, prio - 1, procnum, FALSE);
+
+ if (!new_ctr_thread)
+ {
+ LightLock_Unlock(&safe_double_thread_launch);
+ return EAGAIN;
+ }
+
+ *thread = (pthread_t)new_ctr_thread;
+ return 0;
+}
+
+static inline pthread_t pthread_self(void)
+{
+ return (pthread_t)threadGetCurrent();
+}
+
+static inline int pthread_mutex_init(pthread_mutex_t *mutex,
+ const pthread_mutexattr_t *attr)
+{
+ LightLock_Init((LightLock *)mutex);
+ return 0;
+}
+
+static inline int pthread_mutex_destroy(pthread_mutex_t *mutex)
+{
+ /*Nothing to destroy*/
+ return 0;
+}
+
+static inline int pthread_mutex_lock(pthread_mutex_t *mutex)
+{
+ LightLock_Lock((LightLock *)mutex);
+ return 0;
+}
+
+static inline int pthread_mutex_unlock(pthread_mutex_t *mutex)
+{
+ LightLock_Unlock((LightLock *)mutex);
+ return 0;
+}
+
+static inline void pthread_exit(void *retval)
+{
+ /*Yes the pointer to int cast is not ideal*/
+ /*threadExit((int)retval);*/
+ (void)retval;
+
+ threadExit(0);
+}
+
+static inline int pthread_detach(pthread_t thread)
+{
+ threadDetach((Thread)thread);
+ return 0;
+}
+
+static inline int pthread_join(pthread_t thread, void **retval)
+{
+ /*retval is ignored*/
+ if(threadJoin((Thread)thread, INT64_MAX))
+ return -1;
+
+ threadFree((Thread)thread);
+
+ return 0;
+}
+
+static inline int pthread_mutex_trylock(pthread_mutex_t *mutex)
+{
+ return LightLock_TryLock((LightLock *)mutex);
+}
+
+static inline int pthread_cond_wait(pthread_cond_t *cond,
+ pthread_mutex_t *mutex)
+{
+ CondVar_Wait((CondVar *)cond, (LightLock *)mutex);
+ return 0;
+}
+
+static inline int pthread_cond_timedwait(pthread_cond_t *cond,
+ pthread_mutex_t *mutex, const struct timespec *abstime)
+{
+ struct timespec now = {0};
+ /* Missing clock_gettime*/
+ struct timeval tm;
+ int retval = 0;
+
+ gettimeofday(&tm, NULL);
+ now.tv_sec = tm.tv_sec;
+ now.tv_nsec = tm.tv_usec * 1000;
+ s64 timeout = (abstime->tv_sec - now.tv_sec) * 1000000000 + (abstime->tv_nsec - now.tv_nsec);
+
+ if (timeout < 0)
+ {
+ retval = ETIMEDOUT;
+ }
+ else if (CondVar_WaitTimeout((CondVar *)cond, (LightLock *)mutex, timeout))
+ {
+ retval = ETIMEDOUT;
+ }
+
+ return retval;
+}
+
+static inline int pthread_cond_init(pthread_cond_t *cond,
+ const pthread_condattr_t *attr)
+{
+ CondVar_Init((CondVar *)cond);
+ return 0;
+}
+
+static inline int pthread_cond_signal(pthread_cond_t *cond)
+{
+ CondVar_Signal((CondVar *)cond);
+ return 0;
+}
+
+static inline int pthread_cond_broadcast(pthread_cond_t *cond)
+{
+ CondVar_Broadcast((CondVar *)cond);
+ return 0;
+}
+
+static inline int pthread_cond_destroy(pthread_cond_t *cond)
+{
+ /*Nothing to destroy*/
+ return 0;
+}
+
+static inline int pthread_equal(pthread_t t1, pthread_t t2)
+{
+ if (threadGetHandle((Thread)t1) == threadGetHandle((Thread)t2))
+ return 1;
+ return 0;
+}
+
+#endif
--- /dev/null
+
+#ifndef _3DS_SEMAPHORE_WRAP__
+#define _3DS_SEMAPHORE_WRAP__
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "3ds_utils.h"
+
+typedef uint32_t sem_t;
+
+static inline int sem_init(sem_t *sem, int pshared, unsigned int value)
+{
+ return svcCreateSemaphore(sem, value, INT32_MAX);
+}
+
+static inline int sem_post(sem_t *sem)
+{
+ int32_t count;
+ return svcReleaseSemaphore(&count, *sem, 1);
+}
+
+static inline int sem_wait(sem_t *sem)
+{
+ return svcWaitSynchronization(*sem, INT64_MAX);
+}
+
+static inline int sem_destroy(sem_t *sem)
+{
+ return svcCloseHandle(*sem);
+}
+
+#endif //_3DS_SEMAPHORE_WRAP__
+
--- /dev/null
+#ifndef MMAN_H
+#define MMAN_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdlib.h>
+#include <stdint.h>
+
+#include <3ds/svc.h>
+#include "3ds_utils.h"
+
+#define PROT_READ 0b001
+#define PROT_WRITE 0b010
+#define PROT_EXEC 0b100
+#define MAP_PRIVATE 2
+#define MAP_FIXED 0x10
+#define MAP_ANONYMOUS 0x20
+
+#define MAP_FAILED ((void *)-1)
+
+void SysPrintf(const char *fmt, ...);
+
+#if 0 // not used
+static void* dynarec_cache = NULL;
+static void* dynarec_cache_mapping = NULL;
+
+static inline void* mmap(void *addr, size_t len, int prot, int flags, int fd, off_t offset)
+{
+ (void)fd;
+ (void)offset;
+
+ void* addr_out;
+
+ if((prot == (PROT_READ | PROT_WRITE | PROT_EXEC)) &&
+ (flags == (MAP_PRIVATE | MAP_ANONYMOUS)))
+ {
+ if(__ctr_svchax)
+ {
+ /* this hack works only for pcsx_rearmed */
+ uint32_t currentHandle;
+
+ if (!dynarec_cache) {
+ dynarec_cache = memalign(0x1000, len);
+ if (!dynarec_cache)
+ return MAP_FAILED;
+ }
+
+ svcDuplicateHandle(¤tHandle, 0xFFFF8001);
+ svcControlProcessMemory(currentHandle, (uintptr_t)addr, (uintptr_t)dynarec_cache,
+ len, MEMOP_MAP, prot);
+ svcCloseHandle(currentHandle);
+ dynarec_cache_mapping = addr;
+ memset(addr, 0, len);
+ return addr;
+ }
+ else
+ {
+ printf("tried to mmap RWX pages without svcControlProcessMemory access !\n");
+ return MAP_FAILED;
+ }
+
+ }
+
+ addr_out = memalign(0x1000, len);
+ if (!addr_out)
+ return MAP_FAILED;
+
+ memset(addr_out, 0, len);
+ return addr_out;
+}
+
+static inline int munmap(void *addr, size_t len)
+{
+ if((addr == dynarec_cache_mapping) && __ctr_svchax)
+ {
+ uint32_t currentHandle;
+ svcDuplicateHandle(¤tHandle, 0xFFFF8001);
+ svcControlProcessMemory(currentHandle,
+ (uintptr_t)dynarec_cache, (uintptr_t)dynarec_cache_mapping,
+ len, MEMOP_UNMAP, 0b111);
+ svcCloseHandle(currentHandle);
+ dynarec_cache_mapping = NULL;
+
+ }
+ else
+ free(addr);
+
+ return 0;
+}
+#endif
+
+static inline int mprotect(void *addr, size_t len, int prot)
+{
+ if (__ctr_svchax)
+ {
+ uint32_t currentHandle = 0;
+ int r;
+ svcDuplicateHandle(¤tHandle, 0xFFFF8001);
+ r = svcControlProcessMemory(currentHandle, (uintptr_t)addr, 0,
+ len, MEMOP_PROT, prot);
+ svcCloseHandle(currentHandle);
+ if (r < 0) {
+ SysPrintf("svcControlProcessMemory failed for %p %u %x: %d\n",
+ addr, len, prot, r);
+ return -1;
+ }
+ return 0;
+ }
+
+ SysPrintf("mprotect called without svcControlProcessMemory access!\n");
+ return -1;
+}
+
+#ifdef __cplusplus
+};
+#endif
+
+#endif // MMAN_H
+
--- /dev/null
+ .text
+ .arm
+ .balign 4
+
+ .func ctr_clear_cache_kernel
+ctr_clear_cache_kernel:
+ @ this less than what B2.7.3 of DDI0100I_ARM_ARM recommends, but so is Linux
+ mrs r3, cpsr
+ cpsid aif
+ mov r0, #0
+ mcr p15, 0, r0, c7, c10, 0 @ Clean entire data cache
+ mcr p15, 0, r0, c7, c10, 4 @ Data Sync Barrier
+ mcr p15, 0, r0, c7, c5, 0 @ Invalidate entire instruction cache / Flush BTB
+ msr cpsr, r3
+ bx lr
+ .endfunc
+
+ .func ctr_clear_cache_range_kernel
+ctr_clear_cache_range_kernel:
+ bic r0, r0, #31
+ mov r12, r0
+ mov r2, #0
+ mrs r3, cpsr
+ cpsid aif
+0:
+ mcr p15, 0, r0, c7, c10, 1 @ Clean Data Cache Line (using MVA)
+ add r0, r0, #32
+ cmp r0, r1
+ blo 0b
+ mcr p15, 0, r2, c7, c10, 4 @ Data Sync Barrier
+ mov r0, r12
+0:
+ mcr p15, 0, r0, c7, c5, 1 @ Invalidate Instruction Cache Line (using MVA)
+ add r0, r0, #32
+ cmp r0, r1
+ blo 0b
+ mcr p15, 0, r2, c7, c5, 6 @ Flush Entire Branch Target Cache
+
+ msr cpsr, r3
+ bx lr
+ .endfunc
+
+ @@ Clear the entire data cache / invalidate the instruction cache. Uses
+ @@ Rosalina svcCustomBackdoor to avoid svcBackdoor stack corruption
+ @@ during interrupts.
+ .global ctr_clear_cache
+ .func ctr_clear_cache
+ctr_clear_cache:
+ adr r0, ctr_clear_cache_kernel
+ svc 0x80 @ svcCustomBackdoor
+ bx lr
+ .endfunc
+
+ .global ctr_clear_cache_range
+ .func ctr_clear_cache_range
+ctr_clear_cache_range:
+ mov r2, r1
+ mov r1, r0
+ adr r0, ctr_clear_cache_range_kernel
+ svc 0x80 @ svcCustomBackdoor
+ bx lr
+ .endfunc
+
+ .func ctr_invalidate_icache_kernel
+ctr_invalidate_icache_kernel:
+ mrs r3, cpsr
+ cpsid aif
+ mov r0, #0
+ mcr p15, 0, r0, c7, c10, 4 @ Data Sync Barrier
+ mcr p15, 0, r0, c7, c5, 0 @ Invalidate entire instruction cache / Flush BTB
+ msr cpsr, r3
+ bx lr
+ .endfunc
+
+ .global ctr_invalidate_icache
+ .func ctr_invalidate_icache
+ctr_invalidate_icache:
+ adr r0, ctr_invalidate_icache_kernel
+ svc 0x80 @ svcCustomBackdoor
+ bx lr
+ .endfunc
+
+ .global svcCustomBackdoor
+ .func svcCustomBackdoor
+svcCustomBackdoor:
+ svc 0x80 @ svcCustomBackdoor
+ bx lr
+ .endfunc
+
+ .global svcConvertVAToPA
+ .func svcConvertVAToPA
+svcConvertVAToPA:
+ svc 0x90 @ svcConvertVAToPA
+ bx lr
+ .endfunc
--- /dev/null
+/* zconf.h -- configuration of the zlib compression library
+ * Copyright (C) 1995-2013 Jean-loup Gailly.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* @(#) $Id$ */
+
+#ifndef ZCONF_H
+#define ZCONF_H
+
+/*
+ * If you *really* need a unique prefix for all types and library functions,
+ * compile with -DZ_PREFIX. The "standard" zlib should be compiled without it.
+ * Even better than compiling with -DZ_PREFIX would be to use configure to set
+ * this permanently in zconf.h using "./configure --zprefix".
+ */
+#ifdef Z_PREFIX /* may be set to #if 1 by ./configure */
+# define Z_PREFIX_SET
+
+/* all linked symbols */
+# define _dist_code z__dist_code
+# define _length_code z__length_code
+# define _tr_align z__tr_align
+# define _tr_flush_bits z__tr_flush_bits
+# define _tr_flush_block z__tr_flush_block
+# define _tr_init z__tr_init
+# define _tr_stored_block z__tr_stored_block
+# define _tr_tally z__tr_tally
+# define adler32 z_adler32
+# define adler32_combine z_adler32_combine
+# define adler32_combine64 z_adler32_combine64
+# ifndef Z_SOLO
+# define compress z_compress
+# define compress2 z_compress2
+# define compressBound z_compressBound
+# endif
+# define crc32 z_crc32
+# define crc32_combine z_crc32_combine
+# define crc32_combine64 z_crc32_combine64
+# define deflate z_deflate
+# define deflateBound z_deflateBound
+# define deflateCopy z_deflateCopy
+# define deflateEnd z_deflateEnd
+# define deflateInit2_ z_deflateInit2_
+# define deflateInit_ z_deflateInit_
+# define deflateParams z_deflateParams
+# define deflatePending z_deflatePending
+# define deflatePrime z_deflatePrime
+# define deflateReset z_deflateReset
+# define deflateResetKeep z_deflateResetKeep
+# define deflateSetDictionary z_deflateSetDictionary
+# define deflateSetHeader z_deflateSetHeader
+# define deflateTune z_deflateTune
+# define deflate_copyright z_deflate_copyright
+# define get_crc_table z_get_crc_table
+# ifndef Z_SOLO
+# define gz_error z_gz_error
+# define gz_intmax z_gz_intmax
+# define gz_strwinerror z_gz_strwinerror
+# define gzbuffer z_gzbuffer
+# define gzclearerr z_gzclearerr
+# define gzclose z_gzclose
+# define gzclose_r z_gzclose_r
+# define gzclose_w z_gzclose_w
+# define gzdirect z_gzdirect
+# define gzdopen z_gzdopen
+# define gzeof z_gzeof
+# define gzerror z_gzerror
+# define gzflush z_gzflush
+# define gzgetc z_gzgetc
+# define gzgetc_ z_gzgetc_
+# define gzgets z_gzgets
+# define gzoffset z_gzoffset
+# define gzoffset64 z_gzoffset64
+# define gzopen z_gzopen
+# define gzopen64 z_gzopen64
+# ifdef _WIN32
+# define gzopen_w z_gzopen_w
+# endif
+# define gzprintf z_gzprintf
+# define gzvprintf z_gzvprintf
+# define gzputc z_gzputc
+# define gzputs z_gzputs
+# define gzread z_gzread
+# define gzrewind z_gzrewind
+# define gzseek z_gzseek
+# define gzseek64 z_gzseek64
+# define gzsetparams z_gzsetparams
+# define gztell z_gztell
+# define gztell64 z_gztell64
+# define gzungetc z_gzungetc
+# define gzwrite z_gzwrite
+# endif
+# define inflate z_inflate
+# define inflateBack z_inflateBack
+# define inflateBackEnd z_inflateBackEnd
+# define inflateBackInit_ z_inflateBackInit_
+# define inflateCopy z_inflateCopy
+# define inflateEnd z_inflateEnd
+# define inflateGetHeader z_inflateGetHeader
+# define inflateInit2_ z_inflateInit2_
+# define inflateInit_ z_inflateInit_
+# define inflateMark z_inflateMark
+# define inflatePrime z_inflatePrime
+# define inflateReset z_inflateReset
+# define inflateReset2 z_inflateReset2
+# define inflateSetDictionary z_inflateSetDictionary
+# define inflateGetDictionary z_inflateGetDictionary
+# define inflateSync z_inflateSync
+# define inflateSyncPoint z_inflateSyncPoint
+# define inflateUndermine z_inflateUndermine
+# define inflateResetKeep z_inflateResetKeep
+# define inflate_copyright z_inflate_copyright
+# define inflate_fast z_inflate_fast
+# define inflate_table z_inflate_table
+# ifndef Z_SOLO
+# define uncompress z_uncompress
+# endif
+# define zError z_zError
+# ifndef Z_SOLO
+# define zcalloc z_zcalloc
+# define zcfree z_zcfree
+# endif
+# define zlibCompileFlags z_zlibCompileFlags
+# define zlibVersion z_zlibVersion
+
+/* all zlib typedefs in zlib.h and zconf.h */
+# define Byte z_Byte
+# define Bytef z_Bytef
+# define alloc_func z_alloc_func
+# define charf z_charf
+# define free_func z_free_func
+# ifndef Z_SOLO
+# define gzFile z_gzFile
+# endif
+# define gz_header z_gz_header
+# define gz_headerp z_gz_headerp
+# define in_func z_in_func
+# define intf z_intf
+# define out_func z_out_func
+# define uInt z_uInt
+# define uIntf z_uIntf
+# define uLong z_uLong
+# define uLongf z_uLongf
+# define voidp z_voidp
+# define voidpc z_voidpc
+# define voidpf z_voidpf
+
+/* all zlib structs in zlib.h and zconf.h */
+# define gz_header_s z_gz_header_s
+# define internal_state z_internal_state
+
+#endif
+
+#if defined(__MSDOS__) && !defined(MSDOS)
+# define MSDOS
+#endif
+#if (defined(OS_2) || defined(__OS2__)) && !defined(OS2)
+# define OS2
+#endif
+#if defined(_WINDOWS) && !defined(WINDOWS)
+# define WINDOWS
+#endif
+#if defined(_WIN32) || defined(_WIN32_WCE) || defined(__WIN32__)
+# ifndef WIN32
+# define WIN32
+# endif
+#endif
+#if (defined(MSDOS) || defined(OS2) || defined(WINDOWS)) && !defined(WIN32)
+# if !defined(__GNUC__) && !defined(__FLAT__) && !defined(__386__)
+# ifndef SYS16BIT
+# define SYS16BIT
+# endif
+# endif
+#endif
+
+/*
+ * Compile with -DMAXSEG_64K if the alloc function cannot allocate more
+ * than 64k bytes at a time (needed on systems with 16-bit int).
+ */
+#ifdef SYS16BIT
+# define MAXSEG_64K
+#endif
+#ifdef MSDOS
+# define UNALIGNED_OK
+#endif
+
+#ifdef __STDC_VERSION__
+# ifndef STDC
+# define STDC
+# endif
+# if __STDC_VERSION__ >= 199901L
+# ifndef STDC99
+# define STDC99
+# endif
+# endif
+#endif
+#if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus))
+# define STDC
+#endif
+#if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__))
+# define STDC
+#endif
+#if !defined(STDC) && (defined(MSDOS) || defined(WINDOWS) || defined(WIN32))
+# define STDC
+#endif
+#if !defined(STDC) && (defined(OS2) || defined(__HOS_AIX__))
+# define STDC
+#endif
+
+#if defined(__OS400__) && !defined(STDC) /* iSeries (formerly AS/400). */
+# define STDC
+#endif
+
+#ifndef STDC
+# ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */
+# define const /* note: need a more gentle solution here */
+# endif
+#endif
+
+#if defined(ZLIB_CONST) && !defined(z_const)
+# define z_const const
+#else
+# define z_const
+#endif
+
+/* Some Mac compilers merge all .h files incorrectly: */
+#if defined(__MWERKS__)||defined(applec)||defined(THINK_C)||defined(__SC__)
+# define NO_DUMMY_DECL
+#endif
+
+/* Maximum value for memLevel in deflateInit2 */
+#ifndef MAX_MEM_LEVEL
+# ifdef MAXSEG_64K
+# define MAX_MEM_LEVEL 8
+# else
+# define MAX_MEM_LEVEL 9
+# endif
+#endif
+
+/* Maximum value for windowBits in deflateInit2 and inflateInit2.
+ * WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files
+ * created by gzip. (Files created by minigzip can still be extracted by
+ * gzip.)
+ */
+#ifndef MAX_WBITS
+# define MAX_WBITS 15 /* 32K LZ77 window */
+#endif
+
+/* The memory requirements for deflate are (in bytes):
+ (1 << (windowBits+2)) + (1 << (memLevel+9))
+ that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values)
+ plus a few kilobytes for small objects. For example, if you want to reduce
+ the default memory requirements from 256K to 128K, compile with
+ make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7"
+ Of course this will generally degrade compression (there's no free lunch).
+
+ The memory requirements for inflate are (in bytes) 1 << windowBits
+ that is, 32K for windowBits=15 (default value) plus a few kilobytes
+ for small objects.
+*/
+
+ /* Type declarations */
+
+#ifndef OF /* function prototypes */
+# ifdef STDC
+# define OF(args) args
+# else
+# define OF(args) ()
+# endif
+#endif
+
+#ifndef Z_ARG /* function prototypes for stdarg */
+# if defined(STDC) || defined(Z_HAVE_STDARG_H)
+# define Z_ARG(args) args
+# else
+# define Z_ARG(args) ()
+# endif
+#endif
+
+/* The following definitions for FAR are needed only for MSDOS mixed
+ * model programming (small or medium model with some far allocations).
+ * This was tested only with MSC; for other MSDOS compilers you may have
+ * to define NO_MEMCPY in zutil.h. If you don't need the mixed model,
+ * just define FAR to be empty.
+ */
+#ifdef SYS16BIT
+# if defined(M_I86SM) || defined(M_I86MM)
+ /* MSC small or medium model */
+# define SMALL_MEDIUM
+# ifdef _MSC_VER
+# define FAR _far
+# else
+# define FAR far
+# endif
+# endif
+# if (defined(__SMALL__) || defined(__MEDIUM__))
+ /* Turbo C small or medium model */
+# define SMALL_MEDIUM
+# ifdef __BORLANDC__
+# define FAR _far
+# else
+# define FAR far
+# endif
+# endif
+#endif
+
+#if defined(WINDOWS) || defined(WIN32)
+ /* If building or using zlib as a DLL, define ZLIB_DLL.
+ * This is not mandatory, but it offers a little performance increase.
+ */
+# ifdef ZLIB_DLL
+# if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500))
+# ifdef ZLIB_INTERNAL
+# define ZEXTERN extern __declspec(dllexport)
+# else
+# define ZEXTERN extern __declspec(dllimport)
+# endif
+# endif
+# endif /* ZLIB_DLL */
+ /* If building or using zlib with the WINAPI/WINAPIV calling convention,
+ * define ZLIB_WINAPI.
+ * Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI.
+ */
+# ifdef ZLIB_WINAPI
+# ifdef FAR
+# undef FAR
+# endif
+# include <windows.h>
+ /* No need for _export, use ZLIB.DEF instead. */
+ /* For complete Windows compatibility, use WINAPI, not __stdcall. */
+# define ZEXPORT WINAPI
+# ifdef WIN32
+# define ZEXPORTVA WINAPIV
+# else
+# define ZEXPORTVA FAR CDECL
+# endif
+# endif
+#endif
+
+#if defined (__BEOS__)
+# ifdef ZLIB_DLL
+# ifdef ZLIB_INTERNAL
+# define ZEXPORT __declspec(dllexport)
+# define ZEXPORTVA __declspec(dllexport)
+# else
+# define ZEXPORT __declspec(dllimport)
+# define ZEXPORTVA __declspec(dllimport)
+# endif
+# endif
+#endif
+
+#ifndef ZEXTERN
+# define ZEXTERN extern
+#endif
+#ifndef ZEXPORT
+# define ZEXPORT
+#endif
+#ifndef ZEXPORTVA
+# define ZEXPORTVA
+#endif
+
+#ifndef FAR
+# define FAR
+#endif
+
+#if !defined(__MACTYPES__)
+typedef unsigned char Byte; /* 8 bits */
+#endif
+typedef unsigned int uInt; /* 16 bits or more */
+typedef unsigned long uLong; /* 32 bits or more */
+
+#ifdef SMALL_MEDIUM
+ /* Borland C/C++ and some old MSC versions ignore FAR inside typedef */
+# define Bytef Byte FAR
+#else
+ typedef Byte FAR Bytef;
+#endif
+typedef char FAR charf;
+typedef int FAR intf;
+typedef uInt FAR uIntf;
+typedef uLong FAR uLongf;
+
+#ifdef STDC
+ typedef void const *voidpc;
+ typedef void FAR *voidpf;
+ typedef void *voidp;
+#else
+ typedef Byte const *voidpc;
+ typedef Byte FAR *voidpf;
+ typedef Byte *voidp;
+#endif
+
+#if !defined(Z_U4) && !defined(Z_SOLO) && defined(STDC)
+# include <limits.h>
+# if (UINT_MAX == 0xffffffffUL)
+# define Z_U4 unsigned
+# elif (ULONG_MAX == 0xffffffffUL)
+# define Z_U4 unsigned long
+# elif (USHRT_MAX == 0xffffffffUL)
+# define Z_U4 unsigned short
+# endif
+#endif
+
+#ifdef Z_U4
+ typedef Z_U4 z_crc_t;
+#else
+ typedef unsigned long z_crc_t;
+#endif
+
+#if 1 /* was set to #if 1 by ./configure */
+# define Z_HAVE_UNISTD_H
+#endif
+
+#if 1 /* was set to #if 1 by ./configure */
+# define Z_HAVE_STDARG_H
+#endif
+
+#ifdef STDC
+# ifndef Z_SOLO
+# include <sys/types.h> /* for off_t */
+# endif
+#endif
+
+#if defined(STDC) || defined(Z_HAVE_STDARG_H)
+# ifndef Z_SOLO
+# include <stdarg.h> /* for va_list */
+# endif
+#endif
+
+#ifdef _WIN32
+# ifndef Z_SOLO
+# include <stddef.h> /* for wchar_t */
+# endif
+#endif
+
+/* a little trick to accommodate both "#define _LARGEFILE64_SOURCE" and
+ * "#define _LARGEFILE64_SOURCE 1" as requesting 64-bit operations, (even
+ * though the former does not conform to the LFS document), but considering
+ * both "#undef _LARGEFILE64_SOURCE" and "#define _LARGEFILE64_SOURCE 0" as
+ * equivalently requesting no 64-bit operations
+ */
+#if defined(_LARGEFILE64_SOURCE) && -_LARGEFILE64_SOURCE - -1 == 1
+# undef _LARGEFILE64_SOURCE
+#endif
+
+#if defined(__WATCOMC__) && !defined(Z_HAVE_UNISTD_H)
+# define Z_HAVE_UNISTD_H
+#endif
+#ifndef Z_SOLO
+# if defined(Z_HAVE_UNISTD_H) || defined(_LARGEFILE64_SOURCE)
+# include <unistd.h> /* for SEEK_*, off_t, and _LFS64_LARGEFILE */
+# ifdef VMS
+# include <unixio.h> /* for off_t */
+# endif
+# ifndef z_off_t
+# define z_off_t off_t
+# endif
+# endif
+#endif
+
+#if defined(_LFS64_LARGEFILE) && _LFS64_LARGEFILE-0
+# define Z_LFS64
+#endif
+
+#if defined(_LARGEFILE64_SOURCE) && defined(Z_LFS64)
+# define Z_LARGE64
+#endif
+
+#if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS-0 == 64 && defined(Z_LFS64)
+# define Z_WANT64
+#endif
+
+#if !defined(SEEK_SET) && !defined(Z_SOLO)
+# define SEEK_SET 0 /* Seek from beginning of file. */
+# define SEEK_CUR 1 /* Seek from current position. */
+# define SEEK_END 2 /* Set file pointer to EOF plus "offset" */
+#endif
+
+#ifndef z_off_t
+# define z_off_t long
+#endif
+
+#if !defined(_WIN32) && defined(Z_LARGE64)
+# define z_off64_t off64_t
+#else
+# if defined(_WIN32) && !defined(__GNUC__) && !defined(Z_SOLO)
+# define z_off64_t __int64
+# else
+# define z_off64_t z_off_t
+# endif
+#endif
+
+/* MVS linker does not support external names larger than 8 bytes */
+#if defined(__MVS__)
+ #pragma map(deflateInit_,"DEIN")
+ #pragma map(deflateInit2_,"DEIN2")
+ #pragma map(deflateEnd,"DEEND")
+ #pragma map(deflateBound,"DEBND")
+ #pragma map(inflateInit_,"ININ")
+ #pragma map(inflateInit2_,"ININ2")
+ #pragma map(inflateEnd,"INEND")
+ #pragma map(inflateSync,"INSY")
+ #pragma map(inflateSetDictionary,"INSEDI")
+ #pragma map(compressBound,"CMBND")
+ #pragma map(inflate_table,"INTABL")
+ #pragma map(inflate_fast,"INFA")
+ #pragma map(inflate_copyright,"INCOPY")
+#endif
+
+#endif /* ZCONF_H */
--- /dev/null
+/* zlib.h -- interface of the 'zlib' general purpose compression library
+ version 1.2.8, April 28th, 2013
+
+ Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the authors be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+
+ Jean-loup Gailly Mark Adler
+ jloup@gzip.org madler@alumni.caltech.edu
+
+
+ The data format used by the zlib library is described by RFCs (Request for
+ Comments) 1950 to 1952 in the files http://tools.ietf.org/html/rfc1950
+ (zlib format), rfc1951 (deflate format) and rfc1952 (gzip format).
+*/
+
+#ifndef ZLIB_H
+#define ZLIB_H
+
+#include "zconf.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define ZLIB_VERSION "1.2.8"
+#define ZLIB_VERNUM 0x1280
+#define ZLIB_VER_MAJOR 1
+#define ZLIB_VER_MINOR 2
+#define ZLIB_VER_REVISION 8
+#define ZLIB_VER_SUBREVISION 0
+
+/*
+ The 'zlib' compression library provides in-memory compression and
+ decompression functions, including integrity checks of the uncompressed data.
+ This version of the library supports only one compression method (deflation)
+ but other algorithms will be added later and will have the same stream
+ interface.
+
+ Compression can be done in a single step if the buffers are large enough,
+ or can be done by repeated calls of the compression function. In the latter
+ case, the application must provide more input and/or consume the output
+ (providing more output space) before each call.
+
+ The compressed data format used by default by the in-memory functions is
+ the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped
+ around a deflate stream, which is itself documented in RFC 1951.
+
+ The library also supports reading and writing files in gzip (.gz) format
+ with an interface similar to that of stdio using the functions that start
+ with "gz". The gzip format is different from the zlib format. gzip is a
+ gzip wrapper, documented in RFC 1952, wrapped around a deflate stream.
+
+ This library can optionally read and write gzip streams in memory as well.
+
+ The zlib format was designed to be compact and fast for use in memory
+ and on communications channels. The gzip format was designed for single-
+ file compression on file systems, has a larger header than zlib to maintain
+ directory information, and uses a different, slower check method than zlib.
+
+ The library does not install any signal handler. The decoder checks
+ the consistency of the compressed data, so the library should never crash
+ even in case of corrupted input.
+*/
+
+typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size));
+typedef void (*free_func) OF((voidpf opaque, voidpf address));
+
+struct internal_state;
+
+typedef struct z_stream_s {
+ z_const Bytef *next_in; /* next input byte */
+ uInt avail_in; /* number of bytes available at next_in */
+ uLong total_in; /* total number of input bytes read so far */
+
+ Bytef *next_out; /* next output byte should be put there */
+ uInt avail_out; /* remaining free space at next_out */
+ uLong total_out; /* total number of bytes output so far */
+
+ z_const char *msg; /* last error message, NULL if no error */
+ struct internal_state FAR *state; /* not visible by applications */
+
+ alloc_func zalloc; /* used to allocate the internal state */
+ free_func zfree; /* used to free the internal state */
+ voidpf opaque; /* private data object passed to zalloc and zfree */
+
+ int data_type; /* best guess about the data type: binary or text */
+ uLong adler; /* adler32 value of the uncompressed data */
+ uLong reserved; /* reserved for future use */
+} z_stream;
+
+typedef z_stream FAR *z_streamp;
+
+/*
+ gzip header information passed to and from zlib routines. See RFC 1952
+ for more details on the meanings of these fields.
+*/
+typedef struct gz_header_s {
+ int text; /* true if compressed data believed to be text */
+ uLong time; /* modification time */
+ int xflags; /* extra flags (not used when writing a gzip file) */
+ int os; /* operating system */
+ Bytef *extra; /* pointer to extra field or Z_NULL if none */
+ uInt extra_len; /* extra field length (valid if extra != Z_NULL) */
+ uInt extra_max; /* space at extra (only when reading header) */
+ Bytef *name; /* pointer to zero-terminated file name or Z_NULL */
+ uInt name_max; /* space at name (only when reading header) */
+ Bytef *comment; /* pointer to zero-terminated comment or Z_NULL */
+ uInt comm_max; /* space at comment (only when reading header) */
+ int hcrc; /* true if there was or will be a header crc */
+ int done; /* true when done reading gzip header (not used
+ when writing a gzip file) */
+} gz_header;
+
+typedef gz_header FAR *gz_headerp;
+
+/*
+ The application must update next_in and avail_in when avail_in has dropped
+ to zero. It must update next_out and avail_out when avail_out has dropped
+ to zero. The application must initialize zalloc, zfree and opaque before
+ calling the init function. All other fields are set by the compression
+ library and must not be updated by the application.
+
+ The opaque value provided by the application will be passed as the first
+ parameter for calls of zalloc and zfree. This can be useful for custom
+ memory management. The compression library attaches no meaning to the
+ opaque value.
+
+ zalloc must return Z_NULL if there is not enough memory for the object.
+ If zlib is used in a multi-threaded application, zalloc and zfree must be
+ thread safe.
+
+ On 16-bit systems, the functions zalloc and zfree must be able to allocate
+ exactly 65536 bytes, but will not be required to allocate more than this if
+ the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS, pointers
+ returned by zalloc for objects of exactly 65536 bytes *must* have their
+ offset normalized to zero. The default allocation function provided by this
+ library ensures this (see zutil.c). To reduce memory requirements and avoid
+ any allocation of 64K objects, at the expense of compression ratio, compile
+ the library with -DMAX_WBITS=14 (see zconf.h).
+
+ The fields total_in and total_out can be used for statistics or progress
+ reports. After compression, total_in holds the total size of the
+ uncompressed data and may be saved for use in the decompressor (particularly
+ if the decompressor wants to decompress everything in a single step).
+*/
+
+ /* constants */
+
+#define Z_NO_FLUSH 0
+#define Z_PARTIAL_FLUSH 1
+#define Z_SYNC_FLUSH 2
+#define Z_FULL_FLUSH 3
+#define Z_FINISH 4
+#define Z_BLOCK 5
+#define Z_TREES 6
+/* Allowed flush values; see deflate() and inflate() below for details */
+
+#define Z_OK 0
+#define Z_STREAM_END 1
+#define Z_NEED_DICT 2
+#define Z_ERRNO (-1)
+#define Z_STREAM_ERROR (-2)
+#define Z_DATA_ERROR (-3)
+#define Z_MEM_ERROR (-4)
+#define Z_BUF_ERROR (-5)
+#define Z_VERSION_ERROR (-6)
+/* Return codes for the compression/decompression functions. Negative values
+ * are errors, positive values are used for special but normal events.
+ */
+
+#define Z_NO_COMPRESSION 0
+#define Z_BEST_SPEED 1
+#define Z_BEST_COMPRESSION 9
+#define Z_DEFAULT_COMPRESSION (-1)
+/* compression levels */
+
+#define Z_FILTERED 1
+#define Z_HUFFMAN_ONLY 2
+#define Z_RLE 3
+#define Z_FIXED 4
+#define Z_DEFAULT_STRATEGY 0
+/* compression strategy; see deflateInit2() below for details */
+
+#define Z_BINARY 0
+#define Z_TEXT 1
+#define Z_ASCII Z_TEXT /* for compatibility with 1.2.2 and earlier */
+#define Z_UNKNOWN 2
+/* Possible values of the data_type field (though see inflate()) */
+
+#define Z_DEFLATED 8
+/* The deflate compression method (the only one supported in this version) */
+
+#define Z_NULL 0 /* for initializing zalloc, zfree, opaque */
+
+#define zlib_version zlibVersion()
+/* for compatibility with versions < 1.0.2 */
+
+
+ /* basic functions */
+
+ZEXTERN const char * ZEXPORT zlibVersion OF((void));
+/* The application can compare zlibVersion and ZLIB_VERSION for consistency.
+ If the first character differs, the library code actually used is not
+ compatible with the zlib.h header file used by the application. This check
+ is automatically made by deflateInit and inflateInit.
+ */
+
+/*
+ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level));
+
+ Initializes the internal stream state for compression. The fields
+ zalloc, zfree and opaque must be initialized before by the caller. If
+ zalloc and zfree are set to Z_NULL, deflateInit updates them to use default
+ allocation functions.
+
+ The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9:
+ 1 gives best speed, 9 gives best compression, 0 gives no compression at all
+ (the input data is simply copied a block at a time). Z_DEFAULT_COMPRESSION
+ requests a default compromise between speed and compression (currently
+ equivalent to level 6).
+
+ deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough
+ memory, Z_STREAM_ERROR if level is not a valid compression level, or
+ Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible
+ with the version assumed by the caller (ZLIB_VERSION). msg is set to null
+ if there is no error message. deflateInit does not perform any compression:
+ this will be done by deflate().
+*/
+
+
+ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush));
+/*
+ deflate compresses as much data as possible, and stops when the input
+ buffer becomes empty or the output buffer becomes full. It may introduce
+ some output latency (reading input without producing any output) except when
+ forced to flush.
+
+ The detailed semantics are as follows. deflate performs one or both of the
+ following actions:
+
+ - Compress more input starting at next_in and update next_in and avail_in
+ accordingly. If not all input can be processed (because there is not
+ enough room in the output buffer), next_in and avail_in are updated and
+ processing will resume at this point for the next call of deflate().
+
+ - Provide more output starting at next_out and update next_out and avail_out
+ accordingly. This action is forced if the parameter flush is non zero.
+ Forcing flush frequently degrades the compression ratio, so this parameter
+ should be set only when necessary (in interactive applications). Some
+ output may be provided even if flush is not set.
+
+ Before the call of deflate(), the application should ensure that at least
+ one of the actions is possible, by providing more input and/or consuming more
+ output, and updating avail_in or avail_out accordingly; avail_out should
+ never be zero before the call. The application can consume the compressed
+ output when it wants, for example when the output buffer is full (avail_out
+ == 0), or after each call of deflate(). If deflate returns Z_OK and with
+ zero avail_out, it must be called again after making room in the output
+ buffer because there might be more output pending.
+
+ Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to
+ decide how much data to accumulate before producing output, in order to
+ maximize compression.
+
+ If the parameter flush is set to Z_SYNC_FLUSH, all pending output is
+ flushed to the output buffer and the output is aligned on a byte boundary, so
+ that the decompressor can get all input data available so far. (In
+ particular avail_in is zero after the call if enough output space has been
+ provided before the call.) Flushing may degrade compression for some
+ compression algorithms and so it should be used only when necessary. This
+ completes the current deflate block and follows it with an empty stored block
+ that is three bits plus filler bits to the next byte, followed by four bytes
+ (00 00 ff ff).
+
+ If flush is set to Z_PARTIAL_FLUSH, all pending output is flushed to the
+ output buffer, but the output is not aligned to a byte boundary. All of the
+ input data so far will be available to the decompressor, as for Z_SYNC_FLUSH.
+ This completes the current deflate block and follows it with an empty fixed
+ codes block that is 10 bits long. This assures that enough bytes are output
+ in order for the decompressor to finish the block before the empty fixed code
+ block.
+
+ If flush is set to Z_BLOCK, a deflate block is completed and emitted, as
+ for Z_SYNC_FLUSH, but the output is not aligned on a byte boundary, and up to
+ seven bits of the current block are held to be written as the next byte after
+ the next deflate block is completed. In this case, the decompressor may not
+ be provided enough bits at this point in order to complete decompression of
+ the data provided so far to the compressor. It may need to wait for the next
+ block to be emitted. This is for advanced applications that need to control
+ the emission of deflate blocks.
+
+ If flush is set to Z_FULL_FLUSH, all output is flushed as with
+ Z_SYNC_FLUSH, and the compression state is reset so that decompression can
+ restart from this point if previous compressed data has been damaged or if
+ random access is desired. Using Z_FULL_FLUSH too often can seriously degrade
+ compression.
+
+ If deflate returns with avail_out == 0, this function must be called again
+ with the same value of the flush parameter and more output space (updated
+ avail_out), until the flush is complete (deflate returns with non-zero
+ avail_out). In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that
+ avail_out is greater than six to avoid repeated flush markers due to
+ avail_out == 0 on return.
+
+ If the parameter flush is set to Z_FINISH, pending input is processed,
+ pending output is flushed and deflate returns with Z_STREAM_END if there was
+ enough output space; if deflate returns with Z_OK, this function must be
+ called again with Z_FINISH and more output space (updated avail_out) but no
+ more input data, until it returns with Z_STREAM_END or an error. After
+ deflate has returned Z_STREAM_END, the only possible operations on the stream
+ are deflateReset or deflateEnd.
+
+ Z_FINISH can be used immediately after deflateInit if all the compression
+ is to be done in a single step. In this case, avail_out must be at least the
+ value returned by deflateBound (see below). Then deflate is guaranteed to
+ return Z_STREAM_END. If not enough output space is provided, deflate will
+ not return Z_STREAM_END, and it must be called again as described above.
+
+ deflate() sets strm->adler to the adler32 checksum of all input read
+ so far (that is, total_in bytes).
+
+ deflate() may update strm->data_type if it can make a good guess about
+ the input data type (Z_BINARY or Z_TEXT). In doubt, the data is considered
+ binary. This field is only for information purposes and does not affect the
+ compression algorithm in any manner.
+
+ deflate() returns Z_OK if some progress has been made (more input
+ processed or more output produced), Z_STREAM_END if all input has been
+ consumed and all output has been produced (only when flush is set to
+ Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example
+ if next_in or next_out was Z_NULL), Z_BUF_ERROR if no progress is possible
+ (for example avail_in or avail_out was zero). Note that Z_BUF_ERROR is not
+ fatal, and deflate() can be called again with more input and more output
+ space to continue compressing.
+*/
+
+
+ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm));
+/*
+ All dynamically allocated data structures for this stream are freed.
+ This function discards any unprocessed input and does not flush any pending
+ output.
+
+ deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the
+ stream state was inconsistent, Z_DATA_ERROR if the stream was freed
+ prematurely (some input or output was discarded). In the error case, msg
+ may be set but then points to a static string (which must not be
+ deallocated).
+*/
+
+
+/*
+ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm));
+
+ Initializes the internal stream state for decompression. The fields
+ next_in, avail_in, zalloc, zfree and opaque must be initialized before by
+ the caller. If next_in is not Z_NULL and avail_in is large enough (the
+ exact value depends on the compression method), inflateInit determines the
+ compression method from the zlib header and allocates all data structures
+ accordingly; otherwise the allocation will be deferred to the first call of
+ inflate. If zalloc and zfree are set to Z_NULL, inflateInit updates them to
+ use default allocation functions.
+
+ inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough
+ memory, Z_VERSION_ERROR if the zlib library version is incompatible with the
+ version assumed by the caller, or Z_STREAM_ERROR if the parameters are
+ invalid, such as a null pointer to the structure. msg is set to null if
+ there is no error message. inflateInit does not perform any decompression
+ apart from possibly reading the zlib header if present: actual decompression
+ will be done by inflate(). (So next_in and avail_in may be modified, but
+ next_out and avail_out are unused and unchanged.) The current implementation
+ of inflateInit() does not process any header information -- that is deferred
+ until inflate() is called.
+*/
+
+
+ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush));
+/*
+ inflate decompresses as much data as possible, and stops when the input
+ buffer becomes empty or the output buffer becomes full. It may introduce
+ some output latency (reading input without producing any output) except when
+ forced to flush.
+
+ The detailed semantics are as follows. inflate performs one or both of the
+ following actions:
+
+ - Decompress more input starting at next_in and update next_in and avail_in
+ accordingly. If not all input can be processed (because there is not
+ enough room in the output buffer), next_in is updated and processing will
+ resume at this point for the next call of inflate().
+
+ - Provide more output starting at next_out and update next_out and avail_out
+ accordingly. inflate() provides as much output as possible, until there is
+ no more input data or no more space in the output buffer (see below about
+ the flush parameter).
+
+ Before the call of inflate(), the application should ensure that at least
+ one of the actions is possible, by providing more input and/or consuming more
+ output, and updating the next_* and avail_* values accordingly. The
+ application can consume the uncompressed output when it wants, for example
+ when the output buffer is full (avail_out == 0), or after each call of
+ inflate(). If inflate returns Z_OK and with zero avail_out, it must be
+ called again after making room in the output buffer because there might be
+ more output pending.
+
+ The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FINISH,
+ Z_BLOCK, or Z_TREES. Z_SYNC_FLUSH requests that inflate() flush as much
+ output as possible to the output buffer. Z_BLOCK requests that inflate()
+ stop if and when it gets to the next deflate block boundary. When decoding
+ the zlib or gzip format, this will cause inflate() to return immediately
+ after the header and before the first block. When doing a raw inflate,
+ inflate() will go ahead and process the first block, and will return when it
+ gets to the end of that block, or when it runs out of data.
+
+ The Z_BLOCK option assists in appending to or combining deflate streams.
+ Also to assist in this, on return inflate() will set strm->data_type to the
+ number of unused bits in the last byte taken from strm->next_in, plus 64 if
+ inflate() is currently decoding the last block in the deflate stream, plus
+ 128 if inflate() returned immediately after decoding an end-of-block code or
+ decoding the complete header up to just before the first byte of the deflate
+ stream. The end-of-block will not be indicated until all of the uncompressed
+ data from that block has been written to strm->next_out. The number of
+ unused bits may in general be greater than seven, except when bit 7 of
+ data_type is set, in which case the number of unused bits will be less than
+ eight. data_type is set as noted here every time inflate() returns for all
+ flush options, and so can be used to determine the amount of currently
+ consumed input in bits.
+
+ The Z_TREES option behaves as Z_BLOCK does, but it also returns when the
+ end of each deflate block header is reached, before any actual data in that
+ block is decoded. This allows the caller to determine the length of the
+ deflate block header for later use in random access within a deflate block.
+ 256 is added to the value of strm->data_type when inflate() returns
+ immediately after reaching the end of the deflate block header.
+
+ inflate() should normally be called until it returns Z_STREAM_END or an
+ error. However if all decompression is to be performed in a single step (a
+ single call of inflate), the parameter flush should be set to Z_FINISH. In
+ this case all pending input is processed and all pending output is flushed;
+ avail_out must be large enough to hold all of the uncompressed data for the
+ operation to complete. (The size of the uncompressed data may have been
+ saved by the compressor for this purpose.) The use of Z_FINISH is not
+ required to perform an inflation in one step. However it may be used to
+ inform inflate that a faster approach can be used for the single inflate()
+ call. Z_FINISH also informs inflate to not maintain a sliding window if the
+ stream completes, which reduces inflate's memory footprint. If the stream
+ does not complete, either because not all of the stream is provided or not
+ enough output space is provided, then a sliding window will be allocated and
+ inflate() can be called again to continue the operation as if Z_NO_FLUSH had
+ been used.
+
+ In this implementation, inflate() always flushes as much output as
+ possible to the output buffer, and always uses the faster approach on the
+ first call. So the effects of the flush parameter in this implementation are
+ on the return value of inflate() as noted below, when inflate() returns early
+ when Z_BLOCK or Z_TREES is used, and when inflate() avoids the allocation of
+ memory for a sliding window when Z_FINISH is used.
+
+ If a preset dictionary is needed after this call (see inflateSetDictionary
+ below), inflate sets strm->adler to the Adler-32 checksum of the dictionary
+ chosen by the compressor and returns Z_NEED_DICT; otherwise it sets
+ strm->adler to the Adler-32 checksum of all output produced so far (that is,
+ total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described
+ below. At the end of the stream, inflate() checks that its computed adler32
+ checksum is equal to that saved by the compressor and returns Z_STREAM_END
+ only if the checksum is correct.
+
+ inflate() can decompress and check either zlib-wrapped or gzip-wrapped
+ deflate data. The header type is detected automatically, if requested when
+ initializing with inflateInit2(). Any information contained in the gzip
+ header is not retained, so applications that need that information should
+ instead use raw inflate, see inflateInit2() below, or inflateBack() and
+ perform their own processing of the gzip header and trailer. When processing
+ gzip-wrapped deflate data, strm->adler32 is set to the CRC-32 of the output
+ producted so far. The CRC-32 is checked against the gzip trailer.
+
+ inflate() returns Z_OK if some progress has been made (more input processed
+ or more output produced), Z_STREAM_END if the end of the compressed data has
+ been reached and all uncompressed output has been produced, Z_NEED_DICT if a
+ preset dictionary is needed at this point, Z_DATA_ERROR if the input data was
+ corrupted (input stream not conforming to the zlib format or incorrect check
+ value), Z_STREAM_ERROR if the stream structure was inconsistent (for example
+ next_in or next_out was Z_NULL), Z_MEM_ERROR if there was not enough memory,
+ Z_BUF_ERROR if no progress is possible or if there was not enough room in the
+ output buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and
+ inflate() can be called again with more input and more output space to
+ continue decompressing. If Z_DATA_ERROR is returned, the application may
+ then call inflateSync() to look for a good compression block if a partial
+ recovery of the data is desired.
+*/
+
+
+ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm));
+/*
+ All dynamically allocated data structures for this stream are freed.
+ This function discards any unprocessed input and does not flush any pending
+ output.
+
+ inflateEnd returns Z_OK if success, Z_STREAM_ERROR if the stream state
+ was inconsistent. In the error case, msg may be set but then points to a
+ static string (which must not be deallocated).
+*/
+
+
+ /* Advanced functions */
+
+/*
+ The following functions are needed only in some special applications.
+*/
+
+/*
+ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm,
+ int level,
+ int method,
+ int windowBits,
+ int memLevel,
+ int strategy));
+
+ This is another version of deflateInit with more compression options. The
+ fields next_in, zalloc, zfree and opaque must be initialized before by the
+ caller.
+
+ The method parameter is the compression method. It must be Z_DEFLATED in
+ this version of the library.
+
+ The windowBits parameter is the base two logarithm of the window size
+ (the size of the history buffer). It should be in the range 8..15 for this
+ version of the library. Larger values of this parameter result in better
+ compression at the expense of memory usage. The default value is 15 if
+ deflateInit is used instead.
+
+ windowBits can also be -8..-15 for raw deflate. In this case, -windowBits
+ determines the window size. deflate() will then generate raw deflate data
+ with no zlib header or trailer, and will not compute an adler32 check value.
+
+ windowBits can also be greater than 15 for optional gzip encoding. Add
+ 16 to windowBits to write a simple gzip header and trailer around the
+ compressed data instead of a zlib wrapper. The gzip header will have no
+ file name, no extra data, no comment, no modification time (set to zero), no
+ header crc, and the operating system will be set to 255 (unknown). If a
+ gzip stream is being written, strm->adler is a crc32 instead of an adler32.
+
+ The memLevel parameter specifies how much memory should be allocated
+ for the internal compression state. memLevel=1 uses minimum memory but is
+ slow and reduces compression ratio; memLevel=9 uses maximum memory for
+ optimal speed. The default value is 8. See zconf.h for total memory usage
+ as a function of windowBits and memLevel.
+
+ The strategy parameter is used to tune the compression algorithm. Use the
+ value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a
+ filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no
+ string match), or Z_RLE to limit match distances to one (run-length
+ encoding). Filtered data consists mostly of small values with a somewhat
+ random distribution. In this case, the compression algorithm is tuned to
+ compress them better. The effect of Z_FILTERED is to force more Huffman
+ coding and less string matching; it is somewhat intermediate between
+ Z_DEFAULT_STRATEGY and Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as
+ fast as Z_HUFFMAN_ONLY, but give better compression for PNG image data. The
+ strategy parameter only affects the compression ratio but not the
+ correctness of the compressed output even if it is not set appropriately.
+ Z_FIXED prevents the use of dynamic Huffman codes, allowing for a simpler
+ decoder for special applications.
+
+ deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
+ memory, Z_STREAM_ERROR if any parameter is invalid (such as an invalid
+ method), or Z_VERSION_ERROR if the zlib library version (zlib_version) is
+ incompatible with the version assumed by the caller (ZLIB_VERSION). msg is
+ set to null if there is no error message. deflateInit2 does not perform any
+ compression: this will be done by deflate().
+*/
+
+ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm,
+ const Bytef *dictionary,
+ uInt dictLength));
+/*
+ Initializes the compression dictionary from the given byte sequence
+ without producing any compressed output. When using the zlib format, this
+ function must be called immediately after deflateInit, deflateInit2 or
+ deflateReset, and before any call of deflate. When doing raw deflate, this
+ function must be called either before any call of deflate, or immediately
+ after the completion of a deflate block, i.e. after all input has been
+ consumed and all output has been delivered when using any of the flush
+ options Z_BLOCK, Z_PARTIAL_FLUSH, Z_SYNC_FLUSH, or Z_FULL_FLUSH. The
+ compressor and decompressor must use exactly the same dictionary (see
+ inflateSetDictionary).
+
+ The dictionary should consist of strings (byte sequences) that are likely
+ to be encountered later in the data to be compressed, with the most commonly
+ used strings preferably put towards the end of the dictionary. Using a
+ dictionary is most useful when the data to be compressed is short and can be
+ predicted with good accuracy; the data can then be compressed better than
+ with the default empty dictionary.
+
+ Depending on the size of the compression data structures selected by
+ deflateInit or deflateInit2, a part of the dictionary may in effect be
+ discarded, for example if the dictionary is larger than the window size
+ provided in deflateInit or deflateInit2. Thus the strings most likely to be
+ useful should be put at the end of the dictionary, not at the front. In
+ addition, the current implementation of deflate will use at most the window
+ size minus 262 bytes of the provided dictionary.
+
+ Upon return of this function, strm->adler is set to the adler32 value
+ of the dictionary; the decompressor may later use this value to determine
+ which dictionary has been used by the compressor. (The adler32 value
+ applies to the whole dictionary even if only a subset of the dictionary is
+ actually used by the compressor.) If a raw deflate was requested, then the
+ adler32 value is not computed and strm->adler is not set.
+
+ deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a
+ parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is
+ inconsistent (for example if deflate has already been called for this stream
+ or if not at a block boundary for raw deflate). deflateSetDictionary does
+ not perform any compression: this will be done by deflate().
+*/
+
+ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest,
+ z_streamp source));
+/*
+ Sets the destination stream as a complete copy of the source stream.
+
+ This function can be useful when several compression strategies will be
+ tried, for example when there are several ways of pre-processing the input
+ data with a filter. The streams that will be discarded should then be freed
+ by calling deflateEnd. Note that deflateCopy duplicates the internal
+ compression state which can be quite large, so this strategy is slow and can
+ consume lots of memory.
+
+ deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not
+ enough memory, Z_STREAM_ERROR if the source stream state was inconsistent
+ (such as zalloc being Z_NULL). msg is left unchanged in both source and
+ destination.
+*/
+
+ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm));
+/*
+ This function is equivalent to deflateEnd followed by deflateInit,
+ but does not free and reallocate all the internal compression state. The
+ stream will keep the same compression level and any other attributes that
+ may have been set by deflateInit2.
+
+ deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
+ stream state was inconsistent (such as zalloc or state being Z_NULL).
+*/
+
+ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm,
+ int level,
+ int strategy));
+/*
+ Dynamically update the compression level and compression strategy. The
+ interpretation of level and strategy is as in deflateInit2. This can be
+ used to switch between compression and straight copy of the input data, or
+ to switch to a different kind of input data requiring a different strategy.
+ If the compression level is changed, the input available so far is
+ compressed with the old level (and may be flushed); the new level will take
+ effect only at the next call of deflate().
+
+ Before the call of deflateParams, the stream state must be set as for
+ a call of deflate(), since the currently available input may have to be
+ compressed and flushed. In particular, strm->avail_out must be non-zero.
+
+ deflateParams returns Z_OK if success, Z_STREAM_ERROR if the source
+ stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR if
+ strm->avail_out was zero.
+*/
+
+ZEXTERN int ZEXPORT deflateTune OF((z_streamp strm,
+ int good_length,
+ int max_lazy,
+ int nice_length,
+ int max_chain));
+/*
+ Fine tune deflate's internal compression parameters. This should only be
+ used by someone who understands the algorithm used by zlib's deflate for
+ searching for the best matching string, and even then only by the most
+ fanatic optimizer trying to squeeze out the last compressed bit for their
+ specific input data. Read the deflate.c source code for the meaning of the
+ max_lazy, good_length, nice_length, and max_chain parameters.
+
+ deflateTune() can be called after deflateInit() or deflateInit2(), and
+ returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream.
+ */
+
+ZEXTERN uLong ZEXPORT deflateBound OF((z_streamp strm,
+ uLong sourceLen));
+/*
+ deflateBound() returns an upper bound on the compressed size after
+ deflation of sourceLen bytes. It must be called after deflateInit() or
+ deflateInit2(), and after deflateSetHeader(), if used. This would be used
+ to allocate an output buffer for deflation in a single pass, and so would be
+ called before deflate(). If that first deflate() call is provided the
+ sourceLen input bytes, an output buffer allocated to the size returned by
+ deflateBound(), and the flush value Z_FINISH, then deflate() is guaranteed
+ to return Z_STREAM_END. Note that it is possible for the compressed size to
+ be larger than the value returned by deflateBound() if flush options other
+ than Z_FINISH or Z_NO_FLUSH are used.
+*/
+
+ZEXTERN int ZEXPORT deflatePending OF((z_streamp strm,
+ unsigned *pending,
+ int *bits));
+/*
+ deflatePending() returns the number of bytes and bits of output that have
+ been generated, but not yet provided in the available output. The bytes not
+ provided would be due to the available output space having being consumed.
+ The number of bits of output not provided are between 0 and 7, where they
+ await more bits to join them in order to fill out a full byte. If pending
+ or bits are Z_NULL, then those values are not set.
+
+ deflatePending returns Z_OK if success, or Z_STREAM_ERROR if the source
+ stream state was inconsistent.
+ */
+
+ZEXTERN int ZEXPORT deflatePrime OF((z_streamp strm,
+ int bits,
+ int value));
+/*
+ deflatePrime() inserts bits in the deflate output stream. The intent
+ is that this function is used to start off the deflate output with the bits
+ leftover from a previous deflate stream when appending to it. As such, this
+ function can only be used for raw deflate, and must be used before the first
+ deflate() call after a deflateInit2() or deflateReset(). bits must be less
+ than or equal to 16, and that many of the least significant bits of value
+ will be inserted in the output.
+
+ deflatePrime returns Z_OK if success, Z_BUF_ERROR if there was not enough
+ room in the internal buffer to insert the bits, or Z_STREAM_ERROR if the
+ source stream state was inconsistent.
+*/
+
+ZEXTERN int ZEXPORT deflateSetHeader OF((z_streamp strm,
+ gz_headerp head));
+/*
+ deflateSetHeader() provides gzip header information for when a gzip
+ stream is requested by deflateInit2(). deflateSetHeader() may be called
+ after deflateInit2() or deflateReset() and before the first call of
+ deflate(). The text, time, os, extra field, name, and comment information
+ in the provided gz_header structure are written to the gzip header (xflag is
+ ignored -- the extra flags are set according to the compression level). The
+ caller must assure that, if not Z_NULL, name and comment are terminated with
+ a zero byte, and that if extra is not Z_NULL, that extra_len bytes are
+ available there. If hcrc is true, a gzip header crc is included. Note that
+ the current versions of the command-line version of gzip (up through version
+ 1.3.x) do not support header crc's, and will report that it is a "multi-part
+ gzip file" and give up.
+
+ If deflateSetHeader is not used, the default gzip header has text false,
+ the time set to zero, and os set to 255, with no extra, name, or comment
+ fields. The gzip header is returned to the default state by deflateReset().
+
+ deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source
+ stream state was inconsistent.
+*/
+
+/*
+ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm,
+ int windowBits));
+
+ This is another version of inflateInit with an extra parameter. The
+ fields next_in, avail_in, zalloc, zfree and opaque must be initialized
+ before by the caller.
+
+ The windowBits parameter is the base two logarithm of the maximum window
+ size (the size of the history buffer). It should be in the range 8..15 for
+ this version of the library. The default value is 15 if inflateInit is used
+ instead. windowBits must be greater than or equal to the windowBits value
+ provided to deflateInit2() while compressing, or it must be equal to 15 if
+ deflateInit2() was not used. If a compressed stream with a larger window
+ size is given as input, inflate() will return with the error code
+ Z_DATA_ERROR instead of trying to allocate a larger window.
+
+ windowBits can also be zero to request that inflate use the window size in
+ the zlib header of the compressed stream.
+
+ windowBits can also be -8..-15 for raw inflate. In this case, -windowBits
+ determines the window size. inflate() will then process raw deflate data,
+ not looking for a zlib or gzip header, not generating a check value, and not
+ looking for any check values for comparison at the end of the stream. This
+ is for use with other formats that use the deflate compressed data format
+ such as zip. Those formats provide their own check values. If a custom
+ format is developed using the raw deflate format for compressed data, it is
+ recommended that a check value such as an adler32 or a crc32 be applied to
+ the uncompressed data as is done in the zlib, gzip, and zip formats. For
+ most applications, the zlib format should be used as is. Note that comments
+ above on the use in deflateInit2() applies to the magnitude of windowBits.
+
+ windowBits can also be greater than 15 for optional gzip decoding. Add
+ 32 to windowBits to enable zlib and gzip decoding with automatic header
+ detection, or add 16 to decode only the gzip format (the zlib format will
+ return a Z_DATA_ERROR). If a gzip stream is being decoded, strm->adler is a
+ crc32 instead of an adler32.
+
+ inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
+ memory, Z_VERSION_ERROR if the zlib library version is incompatible with the
+ version assumed by the caller, or Z_STREAM_ERROR if the parameters are
+ invalid, such as a null pointer to the structure. msg is set to null if
+ there is no error message. inflateInit2 does not perform any decompression
+ apart from possibly reading the zlib header if present: actual decompression
+ will be done by inflate(). (So next_in and avail_in may be modified, but
+ next_out and avail_out are unused and unchanged.) The current implementation
+ of inflateInit2() does not process any header information -- that is
+ deferred until inflate() is called.
+*/
+
+ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm,
+ const Bytef *dictionary,
+ uInt dictLength));
+/*
+ Initializes the decompression dictionary from the given uncompressed byte
+ sequence. This function must be called immediately after a call of inflate,
+ if that call returned Z_NEED_DICT. The dictionary chosen by the compressor
+ can be determined from the adler32 value returned by that call of inflate.
+ The compressor and decompressor must use exactly the same dictionary (see
+ deflateSetDictionary). For raw inflate, this function can be called at any
+ time to set the dictionary. If the provided dictionary is smaller than the
+ window and there is already data in the window, then the provided dictionary
+ will amend what's there. The application must insure that the dictionary
+ that was used for compression is provided.
+
+ inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a
+ parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is
+ inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the
+ expected one (incorrect adler32 value). inflateSetDictionary does not
+ perform any decompression: this will be done by subsequent calls of
+ inflate().
+*/
+
+ZEXTERN int ZEXPORT inflateGetDictionary OF((z_streamp strm,
+ Bytef *dictionary,
+ uInt *dictLength));
+/*
+ Returns the sliding dictionary being maintained by inflate. dictLength is
+ set to the number of bytes in the dictionary, and that many bytes are copied
+ to dictionary. dictionary must have enough space, where 32768 bytes is
+ always enough. If inflateGetDictionary() is called with dictionary equal to
+ Z_NULL, then only the dictionary length is returned, and nothing is copied.
+ Similary, if dictLength is Z_NULL, then it is not set.
+
+ inflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the
+ stream state is inconsistent.
+*/
+
+ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm));
+/*
+ Skips invalid compressed data until a possible full flush point (see above
+ for the description of deflate with Z_FULL_FLUSH) can be found, or until all
+ available input is skipped. No output is provided.
+
+ inflateSync searches for a 00 00 FF FF pattern in the compressed data.
+ All full flush points have this pattern, but not all occurrences of this
+ pattern are full flush points.
+
+ inflateSync returns Z_OK if a possible full flush point has been found,
+ Z_BUF_ERROR if no more input was provided, Z_DATA_ERROR if no flush point
+ has been found, or Z_STREAM_ERROR if the stream structure was inconsistent.
+ In the success case, the application may save the current current value of
+ total_in which indicates where valid compressed data was found. In the
+ error case, the application may repeatedly call inflateSync, providing more
+ input each time, until success or end of the input data.
+*/
+
+ZEXTERN int ZEXPORT inflateCopy OF((z_streamp dest,
+ z_streamp source));
+/*
+ Sets the destination stream as a complete copy of the source stream.
+
+ This function can be useful when randomly accessing a large stream. The
+ first pass through the stream can periodically record the inflate state,
+ allowing restarting inflate at those points when randomly accessing the
+ stream.
+
+ inflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not
+ enough memory, Z_STREAM_ERROR if the source stream state was inconsistent
+ (such as zalloc being Z_NULL). msg is left unchanged in both source and
+ destination.
+*/
+
+ZEXTERN int ZEXPORT inflateReset OF((z_streamp strm));
+/*
+ This function is equivalent to inflateEnd followed by inflateInit,
+ but does not free and reallocate all the internal decompression state. The
+ stream will keep attributes that may have been set by inflateInit2.
+
+ inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
+ stream state was inconsistent (such as zalloc or state being Z_NULL).
+*/
+
+ZEXTERN int ZEXPORT inflateReset2 OF((z_streamp strm,
+ int windowBits));
+/*
+ This function is the same as inflateReset, but it also permits changing
+ the wrap and window size requests. The windowBits parameter is interpreted
+ the same as it is for inflateInit2.
+
+ inflateReset2 returns Z_OK if success, or Z_STREAM_ERROR if the source
+ stream state was inconsistent (such as zalloc or state being Z_NULL), or if
+ the windowBits parameter is invalid.
+*/
+
+ZEXTERN int ZEXPORT inflatePrime OF((z_streamp strm,
+ int bits,
+ int value));
+/*
+ This function inserts bits in the inflate input stream. The intent is
+ that this function is used to start inflating at a bit position in the
+ middle of a byte. The provided bits will be used before any bytes are used
+ from next_in. This function should only be used with raw inflate, and
+ should be used before the first inflate() call after inflateInit2() or
+ inflateReset(). bits must be less than or equal to 16, and that many of the
+ least significant bits of value will be inserted in the input.
+
+ If bits is negative, then the input stream bit buffer is emptied. Then
+ inflatePrime() can be called again to put bits in the buffer. This is used
+ to clear out bits leftover after feeding inflate a block description prior
+ to feeding inflate codes.
+
+ inflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source
+ stream state was inconsistent.
+*/
+
+ZEXTERN long ZEXPORT inflateMark OF((z_streamp strm));
+/*
+ This function returns two values, one in the lower 16 bits of the return
+ value, and the other in the remaining upper bits, obtained by shifting the
+ return value down 16 bits. If the upper value is -1 and the lower value is
+ zero, then inflate() is currently decoding information outside of a block.
+ If the upper value is -1 and the lower value is non-zero, then inflate is in
+ the middle of a stored block, with the lower value equaling the number of
+ bytes from the input remaining to copy. If the upper value is not -1, then
+ it is the number of bits back from the current bit position in the input of
+ the code (literal or length/distance pair) currently being processed. In
+ that case the lower value is the number of bytes already emitted for that
+ code.
+
+ A code is being processed if inflate is waiting for more input to complete
+ decoding of the code, or if it has completed decoding but is waiting for
+ more output space to write the literal or match data.
+
+ inflateMark() is used to mark locations in the input data for random
+ access, which may be at bit positions, and to note those cases where the
+ output of a code may span boundaries of random access blocks. The current
+ location in the input stream can be determined from avail_in and data_type
+ as noted in the description for the Z_BLOCK flush parameter for inflate.
+
+ inflateMark returns the value noted above or -1 << 16 if the provided
+ source stream state was inconsistent.
+*/
+
+ZEXTERN int ZEXPORT inflateGetHeader OF((z_streamp strm,
+ gz_headerp head));
+/*
+ inflateGetHeader() requests that gzip header information be stored in the
+ provided gz_header structure. inflateGetHeader() may be called after
+ inflateInit2() or inflateReset(), and before the first call of inflate().
+ As inflate() processes the gzip stream, head->done is zero until the header
+ is completed, at which time head->done is set to one. If a zlib stream is
+ being decoded, then head->done is set to -1 to indicate that there will be
+ no gzip header information forthcoming. Note that Z_BLOCK or Z_TREES can be
+ used to force inflate() to return immediately after header processing is
+ complete and before any actual data is decompressed.
+
+ The text, time, xflags, and os fields are filled in with the gzip header
+ contents. hcrc is set to true if there is a header CRC. (The header CRC
+ was valid if done is set to one.) If extra is not Z_NULL, then extra_max
+ contains the maximum number of bytes to write to extra. Once done is true,
+ extra_len contains the actual extra field length, and extra contains the
+ extra field, or that field truncated if extra_max is less than extra_len.
+ If name is not Z_NULL, then up to name_max characters are written there,
+ terminated with a zero unless the length is greater than name_max. If
+ comment is not Z_NULL, then up to comm_max characters are written there,
+ terminated with a zero unless the length is greater than comm_max. When any
+ of extra, name, or comment are not Z_NULL and the respective field is not
+ present in the header, then that field is set to Z_NULL to signal its
+ absence. This allows the use of deflateSetHeader() with the returned
+ structure to duplicate the header. However if those fields are set to
+ allocated memory, then the application will need to save those pointers
+ elsewhere so that they can be eventually freed.
+
+ If inflateGetHeader is not used, then the header information is simply
+ discarded. The header is always checked for validity, including the header
+ CRC if present. inflateReset() will reset the process to discard the header
+ information. The application would need to call inflateGetHeader() again to
+ retrieve the header from the next gzip stream.
+
+ inflateGetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source
+ stream state was inconsistent.
+*/
+
+/*
+ZEXTERN int ZEXPORT inflateBackInit OF((z_streamp strm, int windowBits,
+ unsigned char FAR *window));
+
+ Initialize the internal stream state for decompression using inflateBack()
+ calls. The fields zalloc, zfree and opaque in strm must be initialized
+ before the call. If zalloc and zfree are Z_NULL, then the default library-
+ derived memory allocation routines are used. windowBits is the base two
+ logarithm of the window size, in the range 8..15. window is a caller
+ supplied buffer of that size. Except for special applications where it is
+ assured that deflate was used with small window sizes, windowBits must be 15
+ and a 32K byte window must be supplied to be able to decompress general
+ deflate streams.
+
+ See inflateBack() for the usage of these routines.
+
+ inflateBackInit will return Z_OK on success, Z_STREAM_ERROR if any of
+ the parameters are invalid, Z_MEM_ERROR if the internal state could not be
+ allocated, or Z_VERSION_ERROR if the version of the library does not match
+ the version of the header file.
+*/
+
+typedef unsigned (*in_func) OF((void FAR *,
+ z_const unsigned char FAR * FAR *));
+typedef int (*out_func) OF((void FAR *, unsigned char FAR *, unsigned));
+
+ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm,
+ in_func in, void FAR *in_desc,
+ out_func out, void FAR *out_desc));
+/*
+ inflateBack() does a raw inflate with a single call using a call-back
+ interface for input and output. This is potentially more efficient than
+ inflate() for file i/o applications, in that it avoids copying between the
+ output and the sliding window by simply making the window itself the output
+ buffer. inflate() can be faster on modern CPUs when used with large
+ buffers. inflateBack() trusts the application to not change the output
+ buffer passed by the output function, at least until inflateBack() returns.
+
+ inflateBackInit() must be called first to allocate the internal state
+ and to initialize the state with the user-provided window buffer.
+ inflateBack() may then be used multiple times to inflate a complete, raw
+ deflate stream with each call. inflateBackEnd() is then called to free the
+ allocated state.
+
+ A raw deflate stream is one with no zlib or gzip header or trailer.
+ This routine would normally be used in a utility that reads zip or gzip
+ files and writes out uncompressed files. The utility would decode the
+ header and process the trailer on its own, hence this routine expects only
+ the raw deflate stream to decompress. This is different from the normal
+ behavior of inflate(), which expects either a zlib or gzip header and
+ trailer around the deflate stream.
+
+ inflateBack() uses two subroutines supplied by the caller that are then
+ called by inflateBack() for input and output. inflateBack() calls those
+ routines until it reads a complete deflate stream and writes out all of the
+ uncompressed data, or until it encounters an error. The function's
+ parameters and return types are defined above in the in_func and out_func
+ typedefs. inflateBack() will call in(in_desc, &buf) which should return the
+ number of bytes of provided input, and a pointer to that input in buf. If
+ there is no input available, in() must return zero--buf is ignored in that
+ case--and inflateBack() will return a buffer error. inflateBack() will call
+ out(out_desc, buf, len) to write the uncompressed data buf[0..len-1]. out()
+ should return zero on success, or non-zero on failure. If out() returns
+ non-zero, inflateBack() will return with an error. Neither in() nor out()
+ are permitted to change the contents of the window provided to
+ inflateBackInit(), which is also the buffer that out() uses to write from.
+ The length written by out() will be at most the window size. Any non-zero
+ amount of input may be provided by in().
+
+ For convenience, inflateBack() can be provided input on the first call by
+ setting strm->next_in and strm->avail_in. If that input is exhausted, then
+ in() will be called. Therefore strm->next_in must be initialized before
+ calling inflateBack(). If strm->next_in is Z_NULL, then in() will be called
+ immediately for input. If strm->next_in is not Z_NULL, then strm->avail_in
+ must also be initialized, and then if strm->avail_in is not zero, input will
+ initially be taken from strm->next_in[0 .. strm->avail_in - 1].
+
+ The in_desc and out_desc parameters of inflateBack() is passed as the
+ first parameter of in() and out() respectively when they are called. These
+ descriptors can be optionally used to pass any information that the caller-
+ supplied in() and out() functions need to do their job.
+
+ On return, inflateBack() will set strm->next_in and strm->avail_in to
+ pass back any unused input that was provided by the last in() call. The
+ return values of inflateBack() can be Z_STREAM_END on success, Z_BUF_ERROR
+ if in() or out() returned an error, Z_DATA_ERROR if there was a format error
+ in the deflate stream (in which case strm->msg is set to indicate the nature
+ of the error), or Z_STREAM_ERROR if the stream was not properly initialized.
+ In the case of Z_BUF_ERROR, an input or output error can be distinguished
+ using strm->next_in which will be Z_NULL only if in() returned an error. If
+ strm->next_in is not Z_NULL, then the Z_BUF_ERROR was due to out() returning
+ non-zero. (in() will always be called before out(), so strm->next_in is
+ assured to be defined if out() returns non-zero.) Note that inflateBack()
+ cannot return Z_OK.
+*/
+
+ZEXTERN int ZEXPORT inflateBackEnd OF((z_streamp strm));
+/*
+ All memory allocated by inflateBackInit() is freed.
+
+ inflateBackEnd() returns Z_OK on success, or Z_STREAM_ERROR if the stream
+ state was inconsistent.
+*/
+
+ZEXTERN uLong ZEXPORT zlibCompileFlags OF((void));
+/* Return flags indicating compile-time options.
+
+ Type sizes, two bits each, 00 = 16 bits, 01 = 32, 10 = 64, 11 = other:
+ 1.0: size of uInt
+ 3.2: size of uLong
+ 5.4: size of voidpf (pointer)
+ 7.6: size of z_off_t
+
+ Compiler, assembler, and debug options:
+ 8: DEBUG
+ 9: ASMV or ASMINF -- use ASM code
+ 10: ZLIB_WINAPI -- exported functions use the WINAPI calling convention
+ 11: 0 (reserved)
+
+ One-time table building (smaller code, but not thread-safe if true):
+ 12: BUILDFIXED -- build static block decoding tables when needed
+ 13: DYNAMIC_CRC_TABLE -- build CRC calculation tables when needed
+ 14,15: 0 (reserved)
+
+ Library content (indicates missing functionality):
+ 16: NO_GZCOMPRESS -- gz* functions cannot compress (to avoid linking
+ deflate code when not needed)
+ 17: NO_GZIP -- deflate can't write gzip streams, and inflate can't detect
+ and decode gzip streams (to avoid linking crc code)
+ 18-19: 0 (reserved)
+
+ Operation variations (changes in library functionality):
+ 20: PKZIP_BUG_WORKAROUND -- slightly more permissive inflate
+ 21: FASTEST -- deflate algorithm with only one, lowest compression level
+ 22,23: 0 (reserved)
+
+ The sprintf variant used by gzprintf (zero is best):
+ 24: 0 = vs*, 1 = s* -- 1 means limited to 20 arguments after the format
+ 25: 0 = *nprintf, 1 = *printf -- 1 means gzprintf() not secure!
+ 26: 0 = returns value, 1 = void -- 1 means inferred string length returned
+
+ Remainder:
+ 27-31: 0 (reserved)
+ */
+
+#ifndef Z_SOLO
+
+ /* utility functions */
+
+/*
+ The following utility functions are implemented on top of the basic
+ stream-oriented functions. To simplify the interface, some default options
+ are assumed (compression level and memory usage, standard memory allocation
+ functions). The source code of these utility functions can be modified if
+ you need special options.
+*/
+
+ZEXTERN int ZEXPORT compress OF((Bytef *dest, uLongf *destLen,
+ const Bytef *source, uLong sourceLen));
+/*
+ Compresses the source buffer into the destination buffer. sourceLen is
+ the byte length of the source buffer. Upon entry, destLen is the total size
+ of the destination buffer, which must be at least the value returned by
+ compressBound(sourceLen). Upon exit, destLen is the actual size of the
+ compressed buffer.
+
+ compress returns Z_OK if success, Z_MEM_ERROR if there was not
+ enough memory, Z_BUF_ERROR if there was not enough room in the output
+ buffer.
+*/
+
+ZEXTERN int ZEXPORT compress2 OF((Bytef *dest, uLongf *destLen,
+ const Bytef *source, uLong sourceLen,
+ int level));
+/*
+ Compresses the source buffer into the destination buffer. The level
+ parameter has the same meaning as in deflateInit. sourceLen is the byte
+ length of the source buffer. Upon entry, destLen is the total size of the
+ destination buffer, which must be at least the value returned by
+ compressBound(sourceLen). Upon exit, destLen is the actual size of the
+ compressed buffer.
+
+ compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
+ memory, Z_BUF_ERROR if there was not enough room in the output buffer,
+ Z_STREAM_ERROR if the level parameter is invalid.
+*/
+
+ZEXTERN uLong ZEXPORT compressBound OF((uLong sourceLen));
+/*
+ compressBound() returns an upper bound on the compressed size after
+ compress() or compress2() on sourceLen bytes. It would be used before a
+ compress() or compress2() call to allocate the destination buffer.
+*/
+
+ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen,
+ const Bytef *source, uLong sourceLen));
+/*
+ Decompresses the source buffer into the destination buffer. sourceLen is
+ the byte length of the source buffer. Upon entry, destLen is the total size
+ of the destination buffer, which must be large enough to hold the entire
+ uncompressed data. (The size of the uncompressed data must have been saved
+ previously by the compressor and transmitted to the decompressor by some
+ mechanism outside the scope of this compression library.) Upon exit, destLen
+ is the actual size of the uncompressed buffer.
+
+ uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
+ enough memory, Z_BUF_ERROR if there was not enough room in the output
+ buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete. In
+ the case where there is not enough room, uncompress() will fill the output
+ buffer with the uncompressed data up to that point.
+*/
+
+ /* gzip file access functions */
+
+/*
+ This library supports reading and writing files in gzip (.gz) format with
+ an interface similar to that of stdio, using the functions that start with
+ "gz". The gzip format is different from the zlib format. gzip is a gzip
+ wrapper, documented in RFC 1952, wrapped around a deflate stream.
+*/
+
+typedef struct gzFile_s *gzFile; /* semi-opaque gzip file descriptor */
+
+/*
+ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode));
+
+ Opens a gzip (.gz) file for reading or writing. The mode parameter is as
+ in fopen ("rb" or "wb") but can also include a compression level ("wb9") or
+ a strategy: 'f' for filtered data as in "wb6f", 'h' for Huffman-only
+ compression as in "wb1h", 'R' for run-length encoding as in "wb1R", or 'F'
+ for fixed code compression as in "wb9F". (See the description of
+ deflateInit2 for more information about the strategy parameter.) 'T' will
+ request transparent writing or appending with no compression and not using
+ the gzip format.
+
+ "a" can be used instead of "w" to request that the gzip stream that will
+ be written be appended to the file. "+" will result in an error, since
+ reading and writing to the same gzip file is not supported. The addition of
+ "x" when writing will create the file exclusively, which fails if the file
+ already exists. On systems that support it, the addition of "e" when
+ reading or writing will set the flag to close the file on an execve() call.
+
+ These functions, as well as gzip, will read and decode a sequence of gzip
+ streams in a file. The append function of gzopen() can be used to create
+ such a file. (Also see gzflush() for another way to do this.) When
+ appending, gzopen does not test whether the file begins with a gzip stream,
+ nor does it look for the end of the gzip streams to begin appending. gzopen
+ will simply append a gzip stream to the existing file.
+
+ gzopen can be used to read a file which is not in gzip format; in this
+ case gzread will directly read from the file without decompression. When
+ reading, this will be detected automatically by looking for the magic two-
+ byte gzip header.
+
+ gzopen returns NULL if the file could not be opened, if there was
+ insufficient memory to allocate the gzFile state, or if an invalid mode was
+ specified (an 'r', 'w', or 'a' was not provided, or '+' was provided).
+ errno can be checked to determine if the reason gzopen failed was that the
+ file could not be opened.
+*/
+
+ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode));
+/*
+ gzdopen associates a gzFile with the file descriptor fd. File descriptors
+ are obtained from calls like open, dup, creat, pipe or fileno (if the file
+ has been previously opened with fopen). The mode parameter is as in gzopen.
+
+ The next call of gzclose on the returned gzFile will also close the file
+ descriptor fd, just like fclose(fdopen(fd, mode)) closes the file descriptor
+ fd. If you want to keep fd open, use fd = dup(fd_keep); gz = gzdopen(fd,
+ mode);. The duplicated descriptor should be saved to avoid a leak, since
+ gzdopen does not close fd if it fails. If you are using fileno() to get the
+ file descriptor from a FILE *, then you will have to use dup() to avoid
+ double-close()ing the file descriptor. Both gzclose() and fclose() will
+ close the associated file descriptor, so they need to have different file
+ descriptors.
+
+ gzdopen returns NULL if there was insufficient memory to allocate the
+ gzFile state, if an invalid mode was specified (an 'r', 'w', or 'a' was not
+ provided, or '+' was provided), or if fd is -1. The file descriptor is not
+ used until the next gz* read, write, seek, or close operation, so gzdopen
+ will not detect if fd is invalid (unless fd is -1).
+*/
+
+ZEXTERN int ZEXPORT gzbuffer OF((gzFile file, unsigned size));
+/*
+ Set the internal buffer size used by this library's functions. The
+ default buffer size is 8192 bytes. This function must be called after
+ gzopen() or gzdopen(), and before any other calls that read or write the
+ file. The buffer memory allocation is always deferred to the first read or
+ write. Two buffers are allocated, either both of the specified size when
+ writing, or one of the specified size and the other twice that size when
+ reading. A larger buffer size of, for example, 64K or 128K bytes will
+ noticeably increase the speed of decompression (reading).
+
+ The new buffer size also affects the maximum length for gzprintf().
+
+ gzbuffer() returns 0 on success, or -1 on failure, such as being called
+ too late.
+*/
+
+ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy));
+/*
+ Dynamically update the compression level or strategy. See the description
+ of deflateInit2 for the meaning of these parameters.
+
+ gzsetparams returns Z_OK if success, or Z_STREAM_ERROR if the file was not
+ opened for writing.
+*/
+
+ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len));
+/*
+ Reads the given number of uncompressed bytes from the compressed file. If
+ the input file is not in gzip format, gzread copies the given number of
+ bytes into the buffer directly from the file.
+
+ After reaching the end of a gzip stream in the input, gzread will continue
+ to read, looking for another gzip stream. Any number of gzip streams may be
+ concatenated in the input file, and will all be decompressed by gzread().
+ If something other than a gzip stream is encountered after a gzip stream,
+ that remaining trailing garbage is ignored (and no error is returned).
+
+ gzread can be used to read a gzip file that is being concurrently written.
+ Upon reaching the end of the input, gzread will return with the available
+ data. If the error code returned by gzerror is Z_OK or Z_BUF_ERROR, then
+ gzclearerr can be used to clear the end of file indicator in order to permit
+ gzread to be tried again. Z_OK indicates that a gzip stream was completed
+ on the last gzread. Z_BUF_ERROR indicates that the input file ended in the
+ middle of a gzip stream. Note that gzread does not return -1 in the event
+ of an incomplete gzip stream. This error is deferred until gzclose(), which
+ will return Z_BUF_ERROR if the last gzread ended in the middle of a gzip
+ stream. Alternatively, gzerror can be used before gzclose to detect this
+ case.
+
+ gzread returns the number of uncompressed bytes actually read, less than
+ len for end of file, or -1 for error.
+*/
+
+ZEXTERN int ZEXPORT gzwrite OF((gzFile file,
+ voidpc buf, unsigned len));
+/*
+ Writes the given number of uncompressed bytes into the compressed file.
+ gzwrite returns the number of uncompressed bytes written or 0 in case of
+ error.
+*/
+
+ZEXTERN int ZEXPORTVA gzprintf Z_ARG((gzFile file, const char *format, ...));
+/*
+ Converts, formats, and writes the arguments to the compressed file under
+ control of the format string, as in fprintf. gzprintf returns the number of
+ uncompressed bytes actually written, or 0 in case of error. The number of
+ uncompressed bytes written is limited to 8191, or one less than the buffer
+ size given to gzbuffer(). The caller should assure that this limit is not
+ exceeded. If it is exceeded, then gzprintf() will return an error (0) with
+ nothing written. In this case, there may also be a buffer overflow with
+ unpredictable consequences, which is possible only if zlib was compiled with
+ the insecure functions sprintf() or vsprintf() because the secure snprintf()
+ or vsnprintf() functions were not available. This can be determined using
+ zlibCompileFlags().
+*/
+
+ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s));
+/*
+ Writes the given null-terminated string to the compressed file, excluding
+ the terminating null character.
+
+ gzputs returns the number of characters written, or -1 in case of error.
+*/
+
+ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len));
+/*
+ Reads bytes from the compressed file until len-1 characters are read, or a
+ newline character is read and transferred to buf, or an end-of-file
+ condition is encountered. If any characters are read or if len == 1, the
+ string is terminated with a null character. If no characters are read due
+ to an end-of-file or len < 1, then the buffer is left untouched.
+
+ gzgets returns buf which is a null-terminated string, or it returns NULL
+ for end-of-file or in case of error. If there was an error, the contents at
+ buf are indeterminate.
+*/
+
+ZEXTERN int ZEXPORT gzputc OF((gzFile file, int c));
+/*
+ Writes c, converted to an unsigned char, into the compressed file. gzputc
+ returns the value that was written, or -1 in case of error.
+*/
+
+ZEXTERN int ZEXPORT gzgetc OF((gzFile file));
+/*
+ Reads one byte from the compressed file. gzgetc returns this byte or -1
+ in case of end of file or error. This is implemented as a macro for speed.
+ As such, it does not do all of the checking the other functions do. I.e.
+ it does not check to see if file is NULL, nor whether the structure file
+ points to has been clobbered or not.
+*/
+
+ZEXTERN int ZEXPORT gzungetc OF((int c, gzFile file));
+/*
+ Push one character back onto the stream to be read as the first character
+ on the next read. At least one character of push-back is allowed.
+ gzungetc() returns the character pushed, or -1 on failure. gzungetc() will
+ fail if c is -1, and may fail if a character has been pushed but not read
+ yet. If gzungetc is used immediately after gzopen or gzdopen, at least the
+ output buffer size of pushed characters is allowed. (See gzbuffer above.)
+ The pushed character will be discarded if the stream is repositioned with
+ gzseek() or gzrewind().
+*/
+
+ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush));
+/*
+ Flushes all pending output into the compressed file. The parameter flush
+ is as in the deflate() function. The return value is the zlib error number
+ (see function gzerror below). gzflush is only permitted when writing.
+
+ If the flush parameter is Z_FINISH, the remaining data is written and the
+ gzip stream is completed in the output. If gzwrite() is called again, a new
+ gzip stream will be started in the output. gzread() is able to read such
+ concatented gzip streams.
+
+ gzflush should be called only when strictly necessary because it will
+ degrade compression if called too often.
+*/
+
+/*
+ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file,
+ z_off_t offset, int whence));
+
+ Sets the starting position for the next gzread or gzwrite on the given
+ compressed file. The offset represents a number of bytes in the
+ uncompressed data stream. The whence parameter is defined as in lseek(2);
+ the value SEEK_END is not supported.
+
+ If the file is opened for reading, this function is emulated but can be
+ extremely slow. If the file is opened for writing, only forward seeks are
+ supported; gzseek then compresses a sequence of zeroes up to the new
+ starting position.
+
+ gzseek returns the resulting offset location as measured in bytes from
+ the beginning of the uncompressed stream, or -1 in case of error, in
+ particular if the file is opened for writing and the new starting position
+ would be before the current position.
+*/
+
+ZEXTERN int ZEXPORT gzrewind OF((gzFile file));
+/*
+ Rewinds the given file. This function is supported only for reading.
+
+ gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET)
+*/
+
+/*
+ZEXTERN z_off_t ZEXPORT gztell OF((gzFile file));
+
+ Returns the starting position for the next gzread or gzwrite on the given
+ compressed file. This position represents a number of bytes in the
+ uncompressed data stream, and is zero when starting, even if appending or
+ reading a gzip stream from the middle of a file using gzdopen().
+
+ gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR)
+*/
+
+/*
+ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile file));
+
+ Returns the current offset in the file being read or written. This offset
+ includes the count of bytes that precede the gzip stream, for example when
+ appending or when using gzdopen() for reading. When reading, the offset
+ does not include as yet unused buffered input. This information can be used
+ for a progress indicator. On error, gzoffset() returns -1.
+*/
+
+ZEXTERN int ZEXPORT gzeof OF((gzFile file));
+/*
+ Returns true (1) if the end-of-file indicator has been set while reading,
+ false (0) otherwise. Note that the end-of-file indicator is set only if the
+ read tried to go past the end of the input, but came up short. Therefore,
+ just like feof(), gzeof() may return false even if there is no more data to
+ read, in the event that the last read request was for the exact number of
+ bytes remaining in the input file. This will happen if the input file size
+ is an exact multiple of the buffer size.
+
+ If gzeof() returns true, then the read functions will return no more data,
+ unless the end-of-file indicator is reset by gzclearerr() and the input file
+ has grown since the previous end of file was detected.
+*/
+
+ZEXTERN int ZEXPORT gzdirect OF((gzFile file));
+/*
+ Returns true (1) if file is being copied directly while reading, or false
+ (0) if file is a gzip stream being decompressed.
+
+ If the input file is empty, gzdirect() will return true, since the input
+ does not contain a gzip stream.
+
+ If gzdirect() is used immediately after gzopen() or gzdopen() it will
+ cause buffers to be allocated to allow reading the file to determine if it
+ is a gzip file. Therefore if gzbuffer() is used, it should be called before
+ gzdirect().
+
+ When writing, gzdirect() returns true (1) if transparent writing was
+ requested ("wT" for the gzopen() mode), or false (0) otherwise. (Note:
+ gzdirect() is not needed when writing. Transparent writing must be
+ explicitly requested, so the application already knows the answer. When
+ linking statically, using gzdirect() will include all of the zlib code for
+ gzip file reading and decompression, which may not be desired.)
+*/
+
+ZEXTERN int ZEXPORT gzclose OF((gzFile file));
+/*
+ Flushes all pending output if necessary, closes the compressed file and
+ deallocates the (de)compression state. Note that once file is closed, you
+ cannot call gzerror with file, since its structures have been deallocated.
+ gzclose must not be called more than once on the same file, just as free
+ must not be called more than once on the same allocation.
+
+ gzclose will return Z_STREAM_ERROR if file is not valid, Z_ERRNO on a
+ file operation error, Z_MEM_ERROR if out of memory, Z_BUF_ERROR if the
+ last read ended in the middle of a gzip stream, or Z_OK on success.
+*/
+
+ZEXTERN int ZEXPORT gzclose_r OF((gzFile file));
+ZEXTERN int ZEXPORT gzclose_w OF((gzFile file));
+/*
+ Same as gzclose(), but gzclose_r() is only for use when reading, and
+ gzclose_w() is only for use when writing or appending. The advantage to
+ using these instead of gzclose() is that they avoid linking in zlib
+ compression or decompression code that is not used when only reading or only
+ writing respectively. If gzclose() is used, then both compression and
+ decompression code will be included the application when linking to a static
+ zlib library.
+*/
+
+ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum));
+/*
+ Returns the error message for the last error which occurred on the given
+ compressed file. errnum is set to zlib error number. If an error occurred
+ in the file system and not in the compression library, errnum is set to
+ Z_ERRNO and the application may consult errno to get the exact error code.
+
+ The application must not modify the returned string. Future calls to
+ this function may invalidate the previously returned string. If file is
+ closed, then the string previously returned by gzerror will no longer be
+ available.
+
+ gzerror() should be used to distinguish errors from end-of-file for those
+ functions above that do not distinguish those cases in their return values.
+*/
+
+ZEXTERN void ZEXPORT gzclearerr OF((gzFile file));
+/*
+ Clears the error and end-of-file flags for file. This is analogous to the
+ clearerr() function in stdio. This is useful for continuing to read a gzip
+ file that is being written concurrently.
+*/
+
+#endif /* !Z_SOLO */
+
+ /* checksum functions */
+
+/*
+ These functions are not related to compression but are exported
+ anyway because they might be useful in applications using the compression
+ library.
+*/
+
+ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len));
+/*
+ Update a running Adler-32 checksum with the bytes buf[0..len-1] and
+ return the updated checksum. If buf is Z_NULL, this function returns the
+ required initial value for the checksum.
+
+ An Adler-32 checksum is almost as reliable as a CRC32 but can be computed
+ much faster.
+
+ Usage example:
+
+ uLong adler = adler32(0L, Z_NULL, 0);
+
+ while (read_buffer(buffer, length) != EOF) {
+ adler = adler32(adler, buffer, length);
+ }
+ if (adler != original_adler) error();
+*/
+
+/*
+ZEXTERN uLong ZEXPORT adler32_combine OF((uLong adler1, uLong adler2,
+ z_off_t len2));
+
+ Combine two Adler-32 checksums into one. For two sequences of bytes, seq1
+ and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for
+ each, adler1 and adler2. adler32_combine() returns the Adler-32 checksum of
+ seq1 and seq2 concatenated, requiring only adler1, adler2, and len2. Note
+ that the z_off_t type (like off_t) is a signed integer. If len2 is
+ negative, the result has no meaning or utility.
+*/
+
+ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len));
+/*
+ Update a running CRC-32 with the bytes buf[0..len-1] and return the
+ updated CRC-32. If buf is Z_NULL, this function returns the required
+ initial value for the crc. Pre- and post-conditioning (one's complement) is
+ performed within this function so it shouldn't be done by the application.
+
+ Usage example:
+
+ uLong crc = crc32(0L, Z_NULL, 0);
+
+ while (read_buffer(buffer, length) != EOF) {
+ crc = crc32(crc, buffer, length);
+ }
+ if (crc != original_crc) error();
+*/
+
+/*
+ZEXTERN uLong ZEXPORT crc32_combine OF((uLong crc1, uLong crc2, z_off_t len2));
+
+ Combine two CRC-32 check values into one. For two sequences of bytes,
+ seq1 and seq2 with lengths len1 and len2, CRC-32 check values were
+ calculated for each, crc1 and crc2. crc32_combine() returns the CRC-32
+ check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and
+ len2.
+*/
+
+
+ /* various hacks, don't look :) */
+
+/* deflateInit and inflateInit are macros to allow checking the zlib version
+ * and the compiler's view of z_stream:
+ */
+ZEXTERN int ZEXPORT deflateInit_ OF((z_streamp strm, int level,
+ const char *version, int stream_size));
+ZEXTERN int ZEXPORT inflateInit_ OF((z_streamp strm,
+ const char *version, int stream_size));
+ZEXTERN int ZEXPORT deflateInit2_ OF((z_streamp strm, int level, int method,
+ int windowBits, int memLevel,
+ int strategy, const char *version,
+ int stream_size));
+ZEXTERN int ZEXPORT inflateInit2_ OF((z_streamp strm, int windowBits,
+ const char *version, int stream_size));
+ZEXTERN int ZEXPORT inflateBackInit_ OF((z_streamp strm, int windowBits,
+ unsigned char FAR *window,
+ const char *version,
+ int stream_size));
+#define deflateInit(strm, level) \
+ deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream))
+#define inflateInit(strm) \
+ inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream))
+#define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \
+ deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\
+ (strategy), ZLIB_VERSION, (int)sizeof(z_stream))
+#define inflateInit2(strm, windowBits) \
+ inflateInit2_((strm), (windowBits), ZLIB_VERSION, \
+ (int)sizeof(z_stream))
+#define inflateBackInit(strm, windowBits, window) \
+ inflateBackInit_((strm), (windowBits), (window), \
+ ZLIB_VERSION, (int)sizeof(z_stream))
+
+#ifndef Z_SOLO
+
+/* gzgetc() macro and its supporting function and exposed data structure. Note
+ * that the real internal state is much larger than the exposed structure.
+ * This abbreviated structure exposes just enough for the gzgetc() macro. The
+ * user should not mess with these exposed elements, since their names or
+ * behavior could change in the future, perhaps even capriciously. They can
+ * only be used by the gzgetc() macro. You have been warned.
+ */
+struct gzFile_s {
+ unsigned have;
+ unsigned char *next;
+ z_off64_t pos;
+};
+ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file)); /* backward compatibility */
+#ifdef Z_PREFIX_SET
+# undef z_gzgetc
+# define z_gzgetc(g) \
+ ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : gzgetc(g))
+#else
+# define gzgetc(g) \
+ ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : gzgetc(g))
+#endif
+
+/* provide 64-bit offset functions if _LARGEFILE64_SOURCE defined, and/or
+ * change the regular functions to 64 bits if _FILE_OFFSET_BITS is 64 (if
+ * both are true, the application gets the *64 functions, and the regular
+ * functions are changed to 64 bits) -- in case these are set on systems
+ * without large file support, _LFS64_LARGEFILE must also be true
+ */
+#ifdef Z_LARGE64
+ ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *));
+ ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int));
+ ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile));
+ ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile));
+ ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off64_t));
+ ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off64_t));
+#endif
+
+#if !defined(ZLIB_INTERNAL) && defined(Z_WANT64)
+# ifdef Z_PREFIX_SET
+# define z_gzopen z_gzopen64
+# define z_gzseek z_gzseek64
+# define z_gztell z_gztell64
+# define z_gzoffset z_gzoffset64
+# define z_adler32_combine z_adler32_combine64
+# define z_crc32_combine z_crc32_combine64
+# else
+# define gzopen gzopen64
+# define gzseek gzseek64
+# define gztell gztell64
+# define gzoffset gzoffset64
+# define adler32_combine adler32_combine64
+# define crc32_combine crc32_combine64
+# endif
+# ifndef Z_LARGE64
+ ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *));
+ ZEXTERN z_off_t ZEXPORT gzseek64 OF((gzFile, z_off_t, int));
+ ZEXTERN z_off_t ZEXPORT gztell64 OF((gzFile));
+ ZEXTERN z_off_t ZEXPORT gzoffset64 OF((gzFile));
+ ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t));
+ ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t));
+# endif
+#else
+ ZEXTERN gzFile ZEXPORT gzopen OF((const char *, const char *));
+ ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile, z_off_t, int));
+ ZEXTERN z_off_t ZEXPORT gztell OF((gzFile));
+ ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile));
+ ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t));
+ ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t));
+#endif
+
+#else /* Z_SOLO */
+
+ ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t));
+ ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t));
+
+#endif /* !Z_SOLO */
+
+/* hack for buggy compilers */
+#if !defined(ZUTIL_H) && !defined(NO_DUMMY_DECL)
+ struct internal_state {int dummy;};
+#endif
+
+/* undocumented functions */
+ZEXTERN const char * ZEXPORT zError OF((int));
+ZEXTERN int ZEXPORT inflateSyncPoint OF((z_streamp));
+ZEXTERN const z_crc_t FAR * ZEXPORT get_crc_table OF((void));
+ZEXTERN int ZEXPORT inflateUndermine OF((z_streamp, int));
+ZEXTERN int ZEXPORT inflateResetKeep OF((z_streamp));
+ZEXTERN int ZEXPORT deflateResetKeep OF((z_streamp));
+#if defined(_WIN32) && !defined(Z_SOLO)
+ZEXTERN gzFile ZEXPORT gzopen_w OF((const wchar_t *path,
+ const char *mode));
+#endif
+#if defined(STDC) || defined(Z_HAVE_STDARG_H)
+# ifndef Z_SOLO
+ZEXTERN int ZEXPORTVA gzvprintf Z_ARG((gzFile file,
+ const char *format,
+ va_list va));
+# endif
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ZLIB_H */
#include <stdint.h>
#include "cspace.h"
+#include "compiler_features.h"
/*
* note: these are intended for testing and should be avoided
#ifndef HAVE_bgr888_to_x
-void bgr888_to_rgb565(void *dst_, const void *src_, int bytes)
+void attr_weak bgr888_to_rgb565(void *dst_, const void *src_, int bytes)
{
const unsigned char *src = src_;
unsigned int *dst = dst_;
void rgb888_to_rgb565(void *dst, const void *src, int bytes) {}
void bgr888_to_rgb888(void *dst, const void *src, int bytes) {}
-#endif // __ARM_NEON__
+#endif // HAVE_bgr888_to_x
+
+void bgr555_to_xrgb8888(void * __restrict__ dst_, const void * __restrict__ src_, int bytes)
+{
+ const uint16_t * __restrict__ src = src_;
+ uint32_t * __restrict__ dst = dst_;
+
+ for (; bytes >= 2; bytes -= 2, src++, dst++)
+ {
+ uint32_t t = ((*src << 19) | (*src >> 7)) & 0xf800f8;
+ t |= (*src << 6) & 0xf800;
+ *dst = t | ((t >> 5) & 0x070707);
+ }
+}
+
+void bgr888_to_xrgb8888(void * __restrict__ dst_, const void * __restrict__ src_, int bytes)
+{
+ const uint8_t * __restrict__ src = src_;
+ uint32_t * __restrict__ dst = dst_;
+
+ for (; bytes >= 3; bytes -= 3, src += 3, dst++)
+ *dst = (src[0] << 16) | (src[1] << 8) | src[2];
+}
/* YUV stuff */
static int yuv_ry[32], yuv_gy[32], yuv_by[32];
static unsigned char yuv_u[32 * 2], yuv_v[32 * 2];
+static struct uyvy { uint32_t y:8; uint32_t vyu:24; } yuv_uyvy[32768];
void bgr_to_uyvy_init(void)
{
- int i, v;
-
- /* init yuv converter:
- y0 = (int)((0.299f * r0) + (0.587f * g0) + (0.114f * b0));
- y1 = (int)((0.299f * r1) + (0.587f * g1) + (0.114f * b1));
- u = (int)(8 * 0.565f * (b0 - y0)) + 128;
- v = (int)(8 * 0.713f * (r0 - y0)) + 128;
- */
- for (i = 0; i < 32; i++) {
- yuv_ry[i] = (int)(0.299f * i * 65536.0f + 0.5f);
- yuv_gy[i] = (int)(0.587f * i * 65536.0f + 0.5f);
- yuv_by[i] = (int)(0.114f * i * 65536.0f + 0.5f);
- }
- for (i = -32; i < 32; i++) {
- v = (int)(8 * 0.565f * i) + 128;
- if (v < 0)
- v = 0;
- if (v > 255)
- v = 255;
- yuv_u[i + 32] = v;
- v = (int)(8 * 0.713f * i) + 128;
- if (v < 0)
- v = 0;
- if (v > 255)
- v = 255;
- yuv_v[i + 32] = v;
- }
+ unsigned char yuv_y[256];
+ int i, v;
+
+ /* init yuv converter:
+ y0 = (int)((0.299f * r0) + (0.587f * g0) + (0.114f * b0));
+ y1 = (int)((0.299f * r1) + (0.587f * g1) + (0.114f * b1));
+ u = (int)(8 * 0.565f * (b0 - y0)) + 128;
+ v = (int)(8 * 0.713f * (r0 - y0)) + 128;
+ */
+ for (i = 0; i < 32; i++) {
+ yuv_ry[i] = (int)(0.299f * i * 65536.0f + 0.5f);
+ yuv_gy[i] = (int)(0.587f * i * 65536.0f + 0.5f);
+ yuv_by[i] = (int)(0.114f * i * 65536.0f + 0.5f);
+ }
+ for (i = -32; i < 32; i++) {
+ v = (int)(8 * 0.565f * i) + 128;
+ if (v < 0)
+ v = 0;
+ if (v > 255)
+ v = 255;
+ yuv_u[i + 32] = v;
+ v = (int)(8 * 0.713f * i) + 128;
+ if (v < 0)
+ v = 0;
+ if (v > 255)
+ v = 255;
+ yuv_v[i + 32] = v;
+ }
+ // valid Y range seems to be 16..235
+ for (i = 0; i < 256; i++) {
+ yuv_y[i] = 16 + 219 * i / 32;
+ }
+ // everything combined into one large array for speed
+ for (i = 0; i < 32768; i++) {
+ int r = (i >> 0) & 0x1f, g = (i >> 5) & 0x1f, b = (i >> 10) & 0x1f;
+ int y = (yuv_ry[r] + yuv_gy[g] + yuv_by[b]) >> 16;
+ yuv_uyvy[i].y = yuv_y[y];
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ yuv_uyvy[i].vyu = (yuv_v[b-y + 32] << 16) | (yuv_y[y] << 8) | yuv_u[r-y + 32];
+#else
+ yuv_uyvy[i].vyu = (yuv_v[r-y + 32] << 16) | (yuv_y[y] << 8) | yuv_u[b-y + 32];
+#endif
+ }
}
void rgb565_to_uyvy(void *d, const void *s, int pixels)
}
}
-void bgr555_to_uyvy(void *d, const void *s, int pixels)
+void bgr555_to_uyvy(void *d, const void *s, int pixels, int x2)
{
- unsigned int *dst = d;
- const unsigned short *src = s;
- const unsigned char *yu = yuv_u + 32;
- const unsigned char *yv = yuv_v + 32;
- int r0, g0, b0, r1, g1, b1;
- int y0, y1, u, v;
-
- for (; pixels > 1; src += 2, dst++, pixels -= 2)
- {
- b0 = (src[0] >> 10) & 0x1f;
- g0 = (src[0] >> 5) & 0x1f;
- r0 = src[0] & 0x1f;
- b1 = (src[1] >> 10) & 0x1f;
- g1 = (src[1] >> 5) & 0x1f;
- r1 = src[1] & 0x1f;
- y0 = (yuv_ry[r0] + yuv_gy[g0] + yuv_by[b0]) >> 16;
- y1 = (yuv_ry[r1] + yuv_gy[g1] + yuv_by[b1]) >> 16;
- u = yu[b0 - y0];
- v = yv[r0 - y0];
- y0 = 16 + 219 * y0 / 31;
- y1 = 16 + 219 * y1 / 31;
-
- *dst = (y1 << 24) | (v << 16) | (y0 << 8) | u;
- }
+ uint32_t *dst = d;
+ const uint16_t *src = s;
+ int i;
+
+ if (x2) {
+ for (i = pixels; i >= 4; src += 4, dst += 4, i -= 4)
+ {
+ const struct uyvy *uyvy0 = yuv_uyvy + (src[0] & 0x7fff);
+ const struct uyvy *uyvy1 = yuv_uyvy + (src[1] & 0x7fff);
+ const struct uyvy *uyvy2 = yuv_uyvy + (src[2] & 0x7fff);
+ const struct uyvy *uyvy3 = yuv_uyvy + (src[3] & 0x7fff);
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ dst[0] = uyvy0->y | (uyvy0->vyu << 8);
+ dst[1] = uyvy1->y | (uyvy1->vyu << 8);
+ dst[2] = uyvy2->y | (uyvy2->vyu << 8);
+ dst[3] = uyvy3->y | (uyvy3->vyu << 8);
+#else
+ dst[0] = (uyvy0->y << 24) | uyvy0->vyu;
+ dst[1] = (uyvy1->y << 24) | uyvy1->vyu;
+ dst[2] = (uyvy2->y << 24) | uyvy2->vyu;
+ dst[3] = (uyvy3->y << 24) | uyvy3->vyu;
+#endif
+ }
+ } else {
+ for (i = pixels; i >= 4; src += 4, dst += 2, i -= 4)
+ {
+ const struct uyvy *uyvy0 = yuv_uyvy + (src[0] & 0x7fff);
+ const struct uyvy *uyvy1 = yuv_uyvy + (src[1] & 0x7fff);
+ const struct uyvy *uyvy2 = yuv_uyvy + (src[2] & 0x7fff);
+ const struct uyvy *uyvy3 = yuv_uyvy + (src[3] & 0x7fff);
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ dst[0] = uyvy1->y | (uyvy0->vyu << 8);
+ dst[1] = uyvy3->y | (uyvy2->vyu << 8);
+#else
+ dst[0] = (uyvy1->y << 24) | uyvy0->vyu;
+ dst[1] = (uyvy3->y << 24) | uyvy2->vyu;
+#endif
+ }
+ }
}
-void bgr888_to_uyvy(void *d, const void *s, int pixels)
+void bgr888_to_uyvy(void *d, const void *s, int pixels, int x2)
{
- unsigned int *dst = d;
- const unsigned char *src8 = s;
- const unsigned char *yu = yuv_u + 32;
- const unsigned char *yv = yuv_v + 32;
- int r0, g0, b0, r1, g1, b1;
- int y0, y1, u, v;
-
- for (; pixels > 0; src8 += 3*2, dst++, pixels -= 2)
- {
- r0 = src8[0], g0 = src8[1], b0 = src8[2];
- r1 = src8[3], g1 = src8[4], b1 = src8[5];
- y0 = (r0 * 19595 + g0 * 38470 + b0 * 7471) >> 16;
- y1 = (r1 * 19595 + g1 * 38470 + b1 * 7471) >> 16;
- u = yu[(b0 - y0) / 8];
- v = yv[(r0 - y0) / 8];
- y0 = 16 + 219 * y0 / 255;
- y1 = 16 + 219 * y1 / 255;
-
- *dst = (y1 << 24) | (v << 16) | (y0 << 8) | u;
- }
+ unsigned int *dst = d;
+ const unsigned char *src8 = s;
+ const unsigned char *yu = yuv_u + 32;
+ const unsigned char *yv = yuv_v + 32;
+ int r0, g0, b0, r1, g1, b1;
+ int y0, y1, u0, u1, v0, v1;
+
+ if (x2) {
+ for (; pixels >= 2; src8 += 3*2, pixels -= 2)
+ {
+ r0 = src8[0], g0 = src8[1], b0 = src8[2];
+ r1 = src8[3], g1 = src8[4], b1 = src8[5];
+ y0 = (r0 * 19595 + g0 * 38470 + b0 * 7471) >> 16;
+ y1 = (r1 * 19595 + g1 * 38470 + b1 * 7471) >> 16;
+ u0 = yu[(b0 - y0) / 8];
+ u1 = yu[(b1 - y1) / 8];
+ v0 = yv[(r0 - y0) / 8];
+ v1 = yv[(r1 - y1) / 8];
+ y0 = 16 + 219 * y0 / 255;
+ y1 = 16 + 219 * y1 / 255;
+
+ *dst++ = (y0 << 24) | (v0 << 16) | (y0 << 8) | u0;
+ *dst++ = (y1 << 24) | (v1 << 16) | (y1 << 8) | u1;
+ }
+ }
+ else {
+ for (; pixels >= 2; src8 += 3*2, dst++, pixels -= 2)
+ {
+ r0 = src8[0], g0 = src8[1], b0 = src8[2];
+ r1 = src8[3], g1 = src8[4], b1 = src8[5];
+ y0 = (r0 * 19595 + g0 * 38470 + b0 * 7471) >> 16;
+ y1 = (r1 * 19595 + g1 * 38470 + b1 * 7471) >> 16;
+ u0 = yu[(b0 - y0) / 8];
+ v0 = yv[(r0 - y0) / 8];
+ y0 = 16 + 219 * y0 / 255;
+ y1 = 16 + 219 * y1 / 255;
+
+ *dst = (y1 << 24) | (v0 << 16) | (y0 << 8) | u0;
+ }
+ }
}
void bgr555_to_rgb565_b(void *dst, const void *src, int bytes,
int brightness2k); // 0-0x0800
+void bgr555_to_xrgb8888(void *dst, const void *src, int bytes);
+void bgr888_to_xrgb8888(void *dst, const void *src, int bytes);
+
void bgr_to_uyvy_init(void);
void rgb565_to_uyvy(void *d, const void *s, int pixels);
-void bgr555_to_uyvy(void *d, const void *s, int pixels);
-void bgr888_to_uyvy(void *d, const void *s, int pixels);
+void bgr555_to_uyvy(void *d, const void *s, int pixels, int x2);
+void bgr888_to_uyvy(void *d, const void *s, int pixels, int x2);
#ifdef __cplusplus
}
orr \rn, r12, lsl #6
.endm
+.macro bgr555_to_rgb565_one_i rn1 rn2
+ and r12, lr, \rn1, lsr #5
+ and \rn1,lr, \rn1, lsr #10
+ orr r12, r11, lsl #5
+ and r11, lr, \rn2
+ orr \rn1,r12, lsl #6
+.endm
+
.macro pld_ reg offs=#0
#ifdef HAVE_ARMV6
pld [\reg, \offs]
.endm
FUNCTION(bgr555_to_rgb565): @ void *dst, const void *src, int bytes
- pld_ r1
push {r4-r11,lr}
mov lr, #0x001f
subs r2, #4*8
0:
ldmia r1!, {r3-r10}
subs r2, #4*8
- bgr555_to_rgb565_one r3
-
- pld_ r1, #32*2
- bgr555_to_rgb565_one r4
- bgr555_to_rgb565_one r5
- bgr555_to_rgb565_one r6
- bgr555_to_rgb565_one r7
- bgr555_to_rgb565_one r8
- bgr555_to_rgb565_one r9
- bgr555_to_rgb565_one r10
+ bic r12, r1, #0x1f
+ pld_ r12, #32*1
+ and r11, lr, r3
+ bgr555_to_rgb565_one_i r3 r4
+ bgr555_to_rgb565_one_i r4 r5
+ bgr555_to_rgb565_one_i r5 r6
+ bgr555_to_rgb565_one_i r6 r7
+ bgr555_to_rgb565_one_i r7 r8
+ bgr555_to_rgb565_one_i r8 r9
+ bgr555_to_rgb565_one_i r9 r10
+ bgr555_to_rgb565_one_i r10 r10
stmia r0!, {r3-r10}
bge 0b
bgt 2b
pop {r4-r11,pc}
+
+
+#ifdef HAVE_ARMV6 /* v6-only due to potential misaligned reads */
+
+# r1b0g0r0 g2r2b1g1 b3g3r3b2
+FUNCTION(bgr888_to_rgb565):
+ pld [r1]
+ push {r4-r10,lr}
+
+ mov r10, #0x001f @ b mask
+ mov r12, #0x07e0 @ g mask
+ mov lr, #0xf800 @ r mask
+
+0:
+ ldr r3, [r1], #4 @ may be unaligned
+ ldr r4, [r1], #4
+ ldr r5, [r1], #4
+ pld [r1, #32*1]
+ and r6, r10,r3, lsr #16+3 @ b0
+ and r7, r12,r3, lsr #5 @ g0
+ and r8, lr, r3, lsl #8 @ r0
+ and r9, lr, r3, lsr #16 @ r1
+ orr r6, r6, r7
+ orr r6, r6, r8 @ r0g0b0
+
+ and r7, r12,r4, lsl #3 @ g1
+ and r8, r10,r4, lsr #11 @ b1
+ orr r9, r9, r7
+ orr r9, r9, r8 @ r1g1b1
+ and r7, lr, r4, lsr #8 @ r2
+ and r8, r12,r4, lsr #21 @ g2
+ pkhbt r9, r6, r9, lsl #16
+ str r9, [r0], #4
+
+ and r6, r10,r5, lsr #3 @ b2
+ orr r7, r7, r8
+ orr r6, r6, r7 @ r2g2b2
+ and r7, lr, r5 @ r3
+ and r8, r12,r5, lsr #13 @ g3
+ orr r7, r7, r5, lsr #27 @ r3b3
+ orr r7, r7, r8 @ r3g3b3
+ pkhbt r7, r6, r7, lsl #16
+ str r7, [r0], #4
+ subs r2, r2, #12
+ bgt 0b
+
+ pop {r4-r10,pc}
+
+#endif /* HAVE_ARMV6 */
-Subproject commit a8ded55fc9df952b5582a6da72e1de887e65a34b
+Subproject commit da09f8f20da6548e1debf4abb9840eb9f130678d
//#include <linux/cdrom.h>
#endif
+#include "../libpcsxcore/psxcommon.h"
+#include "../libpcsxcore/cdrom.h"
+
+//#include "vfs/vfs_implementation.h"
+#include "vfs/vfs_implementation_cdrom.h"
+
static int cdrom_send_command_dummy(const libretro_vfs_implementation_file *stream,
CDROM_CMD_Direction dir, void *buf, size_t len, unsigned char *cmd, size_t cmd_len,
unsigned char *sense, size_t sense_len)
return ret;
}
-int cdrom_read_sector(libretro_vfs_implementation_file *stream,
- unsigned int lba, void *b)
+int rcdrom_readSector(void *stream, unsigned int lba, void *b)
{
unsigned char cmd[] = {0xBE, 0, 0, 0, 0, 0, 0, 0, 1, 0xF8, 0, 0};
cmd[2] = lba >> 24;
return cdrom_send_command_once(stream, DIRECTION_IN, b, 2352, cmd, sizeof(cmd));
}
+void *rcdrom_open(const char *name, u32 *total_lba, u32 *have_subchannel)
+{
+ void *g_cd_handle = retro_vfs_file_open_impl(name, RETRO_VFS_FILE_ACCESS_READ,
+ RETRO_VFS_FILE_ACCESS_HINT_NONE);
+ if (!g_cd_handle) {
+ SysPrintf("retro_vfs_file_open failed for '%s'\n", name);
+ return NULL;
+ }
+ else {
+ int ret = cdrom_set_read_speed_x(g_cd_handle, 4);
+ if (ret) SysPrintf("CD speed set failed\n");
+ const cdrom_toc_t *toc = retro_vfs_file_get_cdrom_toc();
+ const cdrom_track_t *last = &toc->track[toc->num_tracks - 1];
+ unsigned int lba = MSF2SECT(last->min, last->sec, last->frame);
+ *total_lba = lba + last->track_size;
+ *have_subchannel = 0;
+ //cdrom_get_current_config_random_readable(acdrom.h);
+ //cdrom_get_current_config_multiread(acdrom.h);
+ //cdrom_get_current_config_cdread(acdrom.h);
+ //cdrom_get_current_config_profiles(acdrom.h);
+ return g_cd_handle;
+ }
+}
+
+void rcdrom_close(void *stream)
+{
+ retro_vfs_file_close_impl(stream);
+}
+
+int rcdrom_getTN(void *stream, u8 *tn)
+{
+ const cdrom_toc_t *toc = retro_vfs_file_get_cdrom_toc();
+ if (toc) {
+ tn[0] = 1;
+ tn[1] = toc->num_tracks;
+ return 0;
+ }
+ return -1;
+}
+
+int rcdrom_getTD(void *stream, u32 total_lba, u8 track, u8 *rt)
+{
+ const cdrom_toc_t *toc = retro_vfs_file_get_cdrom_toc();
+ rt[0] = 0, rt[1] = 2, rt[2] = 0;
+ if (track == 0) {
+ lba2msf(total_lba + 150, &rt[0], &rt[1], &rt[2]);
+ }
+ else if (track <= toc->num_tracks) {
+ int i = track - 1;
+ rt[0] = toc->track[i].min;
+ rt[1] = toc->track[i].sec;
+ rt[2] = toc->track[i].frame;
+ }
+ return 0;
+}
+
+int rcdrom_getStatus(void *stream, struct CdrStat *stat)
+{
+ const cdrom_toc_t *toc = retro_vfs_file_get_cdrom_toc();
+ stat->Type = toc->track[0].audio ? 2 : 1;
+ return 0;
+}
+
+int rcdrom_isMediaInserted(void *stream)
+{
+ return cdrom_is_media_inserted(stream);
+}
+
+int rcdrom_readSub(void *stream, unsigned int lba, void *b)
+{
+ return -1;
+}
+
// vim:sw=3:ts=3:expandtab
--- /dev/null
+retro_api_version
+retro_cheat_reset
+retro_cheat_set
+retro_deinit
+retro_get_memory_data
+retro_get_memory_size
+retro_get_region
+retro_get_system_av_info
+retro_get_system_info
+retro_init
+retro_load_game
+retro_load_game_special
+retro_reset
+retro_run
+retro_serialize
+retro_serialize_size
+retro_set_audio_sample
+retro_set_audio_sample_batch
+retro_set_controller_port_device
+retro_set_environment
+retro_set_input_poll
+retro_set_input_state
+retro_set_video_refresh
+retro_unload_game
+retro_unserialize
--- /dev/null
+EXTERN(retro_api_version)
+EXTERN(retro_cheat_reset)
+EXTERN(retro_cheat_set)
+EXTERN(retro_deinit)
+EXTERN(retro_get_memory_data)
+EXTERN(retro_get_memory_size)
+EXTERN(retro_get_region)
+EXTERN(retro_get_system_av_info)
+EXTERN(retro_get_system_info)
+EXTERN(retro_init)
+EXTERN(retro_load_game)
+EXTERN(retro_load_game_special)
+EXTERN(retro_reset)
+EXTERN(retro_run)
+EXTERN(retro_serialize)
+EXTERN(retro_serialize_size)
+EXTERN(retro_set_audio_sample)
+EXTERN(retro_set_audio_sample_batch)
+EXTERN(retro_set_controller_port_device)
+EXTERN(retro_set_environment)
+EXTERN(retro_set_input_poll)
+EXTERN(retro_set_input_state)
+EXTERN(retro_set_video_refresh)
+EXTERN(retro_unload_game)
+EXTERN(retro_unserialize)
--- /dev/null
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE // *_np
+#endif
+#ifdef _3DS
+#include <3ds/svc.h>
+#include <3ds/os.h>
+#include <3ds/services/apt.h>
+#include <sys/time.h>
+#endif
+
+#include "../deps/libretro-common/rthreads/rthreads.c"
+#include "features/features_cpu.h"
+#include "libretro-rthreads.h"
+
+// pcsxr "extensions"
+extern void SysPrintf(const char *fmt, ...);
+
+#ifdef _3DS
+static bool is_new_3ds;
+#endif
+
+void pcsxr_sthread_init(void)
+{
+ SysPrintf("%d cpu core(s) detected\n", cpu_features_get_core_amount());
+#ifdef _3DS
+ int64_t version = 0;
+ int fpscr = -1;
+
+ APT_CheckNew3DS(&is_new_3ds);
+ svcGetSystemInfo(&version, 0x10000, 0);
+
+ APT_SetAppCpuTimeLimit(35);
+ u32 percent = -1;
+ APT_GetAppCpuTimeLimit(&percent);
+
+ __asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
+ SysPrintf("%s3ds detected, v%d.%d, AppCpuTimeLimit=%ld fpscr=%08x\n",
+ is_new_3ds ? "new" : "old", (int)GET_VERSION_MAJOR(version),
+ (int)GET_VERSION_MINOR(version), percent, fpscr);
+#endif
+}
+
+sthread_t *pcsxr_sthread_create(void (*thread_func)(void *),
+ enum pcsxr_thread_type type)
+{
+ sthread_t *h = NULL;
+#ifdef _3DS
+ Thread ctr_thread;
+ int core_id = 0;
+ s32 prio = 0x30;
+
+ h = calloc(1, sizeof(*h));
+ if (!h)
+ return NULL;
+
+ svcGetThreadPriority(&prio, CUR_THREAD_HANDLE);
+
+ switch (type) {
+ case PCSXRT_CDR:
+ case PCSXRT_SPU:
+ core_id = 1;
+ break;
+ case PCSXRT_DRC:
+ case PCSXRT_GPU:
+ core_id = is_new_3ds ? 2 : 1;
+ break;
+ case PCSXRT_COUNT:
+ break;
+ }
+
+ ctr_thread = threadCreate(thread_func, NULL, STACKSIZE, prio, core_id, false);
+ if (!ctr_thread) {
+ if (core_id == 1) {
+ SysPrintf("threadCreate pcsxt %d core %d failed\n",
+ type, core_id);
+ core_id = is_new_3ds ? 2 : -1;
+ ctr_thread = threadCreate(thread_func, NULL, STACKSIZE,
+ prio, core_id, false);
+ }
+ }
+ if (!ctr_thread) {
+ SysPrintf("threadCreate pcsxt %d core %d failed\n", type, core_id);
+ free(h);
+ return NULL;
+ }
+ h->id = (pthread_t)ctr_thread;
+#else
+ h = sthread_create(thread_func, NULL);
+ #if defined(__GLIBC__) || \
+ (defined(__ANDROID_API__) && __ANDROID_API__ >= 26)
+ if (h && (unsigned int)type < (unsigned int)PCSXRT_COUNT)
+ {
+ const char * const pcsxr_tnames[PCSXRT_COUNT] = {
+ "pcsxr-cdrom", "pcsxr-drc", "pcsxr-gpu", "pcsxr-spu"
+ };
+ pthread_setname_np(h->id, pcsxr_tnames[type]);
+ }
+ #endif
+#endif
+ return h;
+}
--- /dev/null
+#ifndef __LIBRETRO_PCSXR_RTHREADS_H__
+#define __LIBRETRO_PCSXR_RTHREADS_H__
+
+#include "rthreads/rthreads.h"
+
+enum pcsxr_thread_type
+{
+ PCSXRT_CDR = 0,
+ PCSXRT_DRC,
+ PCSXRT_GPU,
+ PCSXRT_SPU,
+ PCSXRT_COUNT // must be last
+};
+
+void pcsxr_sthread_init(void);
+sthread_t *pcsxr_sthread_create(void (*thread_func)(void*),
+ enum pcsxr_thread_type type);
+
+#endif // __LIBRETRO_PCSXR_RTHREADS_H__
#define _GNU_SOURCE 1 // strcasestr
#include <stdio.h>
#include <stdlib.h>
+#include <stdint.h>
#include <string.h>
#include <strings.h>
+#include <assert.h>
#ifdef __MACH__
#include <unistd.h>
#include <sys/syscall.h>
#include "../libpcsxcore/psxmem_map.h"
#include "../libpcsxcore/new_dynarec/new_dynarec.h"
#include "../libpcsxcore/cdrom.h"
+#include "../libpcsxcore/cdrom-async.h"
#include "../libpcsxcore/cdriso.h"
#include "../libpcsxcore/cheat.h"
#include "../libpcsxcore/r3000a.h"
#endif
#ifdef _3DS
+#include <3ds/svc.h>
+#include <3ds/services/apt.h>
+#include <3ds/allocator/linear.h>
#include "3ds/3ds_utils.h"
#endif
+#ifndef MAP_FAILED
+#define MAP_FAILED ((void *)(intptr_t)-1)
+#endif
+
#define PORTS_NUMBER 8
#ifndef MIN
#define ISHEXDEC ((buf[cursor] >= '0') && (buf[cursor] <= '9')) || ((buf[cursor] >= 'a') && (buf[cursor] <= 'f')) || ((buf[cursor] >= 'A') && (buf[cursor] <= 'F'))
-#define INTERNAL_FPS_SAMPLE_PERIOD 64
-
//hack to prevent retroarch freezing when reseting in the menu but not while running with the hot key
static int rebootemu = 0;
static void *vout_buf;
static void *vout_buf_ptr;
-static int vout_width = 256, vout_height = 240, vout_pitch = 256;
+static int vout_width = 256, vout_height = 240, vout_pitch_b = 256*2;
static int vout_fb_dirty;
static int psx_w, psx_h;
static bool vout_can_dupe;
static bool found_bios;
-static bool display_internal_fps = false;
-static unsigned frame_count = 0;
+static int display_internal_fps;
static bool libretro_supports_bitmasks = false;
static bool libretro_supports_option_categories = false;
static bool show_input_settings = true;
static unsigned retro_audio_latency = 0;
static int update_audio_latency = false;
-static unsigned previous_width = 0;
-static unsigned previous_height = 0;
+static unsigned int current_width;
+static unsigned int current_height;
+static enum retro_pixel_format current_fmt;
static int plugins_opened;
static bool axis_bounds_modifier;
/* PSX max resolution is 640x512, but with enhancement it's 1024x512 */
+#ifdef GPU_NEON
#define VOUT_MAX_WIDTH 1024
+#else
+#define VOUT_MAX_WIDTH 640
+#endif
#define VOUT_MAX_HEIGHT 512
//Dummy functions
}
}
-static void set_vout_fb()
+static void bgr_to_fb_empty(void *dst, const void *src, int bytes)
+{
+}
+
+typedef void (bgr_to_fb_func)(void *dst, const void *src, int bytes);
+static bgr_to_fb_func *g_bgr_to_fb = bgr_to_fb_empty;
+
+static void set_bgr_to_fb_func(int bgr24)
+{
+ switch (current_fmt)
+ {
+ case RETRO_PIXEL_FORMAT_XRGB8888:
+ g_bgr_to_fb = bgr24 ? bgr888_to_xrgb8888 : bgr555_to_xrgb8888;
+ break;
+ case RETRO_PIXEL_FORMAT_RGB565:
+ g_bgr_to_fb = bgr24 ? bgr888_to_rgb565 : bgr555_to_rgb565;
+ break;
+ default:
+ LogErr("unsupported current_fmt: %d\n", current_fmt);
+ g_bgr_to_fb = bgr_to_fb_empty;
+ break;
+ }
+}
+
+static void set_vout_fb(void)
{
struct retro_framebuffer fb = { 0 };
+ bool ret;
fb.width = vout_width;
fb.height = vout_height;
fb.access_flags = RETRO_MEMORY_ACCESS_WRITE;
- vout_pitch = vout_width;
- if (environ_cb(RETRO_ENVIRONMENT_GET_CURRENT_SOFTWARE_FRAMEBUFFER, &fb)
- && fb.format == RETRO_PIXEL_FORMAT_RGB565
- && vout_can_dupe)
+ ret = environ_cb(RETRO_ENVIRONMENT_GET_CURRENT_SOFTWARE_FRAMEBUFFER, &fb);
+ if (ret && vout_can_dupe &&
+ (fb.format == RETRO_PIXEL_FORMAT_RGB565 || fb.format == RETRO_PIXEL_FORMAT_XRGB8888))
{
+ int bytes_pp = (fb.format == RETRO_PIXEL_FORMAT_XRGB8888) ? 4 : 2;
+ if (current_fmt != fb.format) {
+ LogWarn("fb.format changed: %d->%d\n", current_fmt, fb.format);
+ current_fmt = fb.format;
+ }
vout_buf_ptr = fb.data;
- if (fb.pitch / 2 != vout_pitch && fb.pitch != vout_width * 2)
- LogWarn("got unusual pitch %zd for resolution %dx%d\n", fb.pitch, vout_width, vout_height);
- vout_pitch = fb.pitch / 2;
+ vout_pitch_b = fb.pitch;
+ if (fb.pitch != vout_width * bytes_pp)
+ LogWarn("got unusual pitch %zd for fmt %d resolution %dx%d\n",
+ fb.pitch, fb.format, vout_width, vout_height);
}
else
+ {
+ int bytes_pp = (current_fmt == RETRO_PIXEL_FORMAT_XRGB8888) ? 4 : 2;
vout_buf_ptr = vout_buf;
+ vout_pitch_b = vout_width * bytes_pp;
+ }
}
static void vout_set_mode(int w, int h, int raw_w, int raw_h, int bpp)
psx_w = raw_w;
psx_h = raw_h;
- if (previous_width != vout_width || previous_height != vout_height)
+ /* it may seem like we could do RETRO_ENVIRONMENT_SET_PIXEL_FORMAT here to
+ * switch to something that can accommodate bgr24 for FMVs, but although it
+ * succeeds it doesn't actually change the format at least on Linux, and the
+ * docs say only retro_load_game() can do it */
+
+ if (current_width != vout_width || current_height != vout_height)
{
- previous_width = vout_width;
- previous_height = vout_height;
+ current_width = vout_width;
+ current_height = vout_height;
struct retro_system_av_info info;
retro_get_system_av_info(&info);
}
set_vout_fb();
+ set_bgr_to_fb_func(bpp == 24);
}
-#ifndef FRONTEND_SUPPORTS_RGB565
-static void convert(void *buf, size_t bytes)
-{
- unsigned int i, v, *p = buf;
-
- for (i = 0; i < bytes / 4; i++)
- {
- v = p[i];
- p[i] = (v & 0x001f001f) | ((v >> 1) & 0x7fe07fe0);
- }
-}
-#endif
-
// Function to add crosshairs
-static void addCrosshair(int port, int crosshair_color, unsigned short *buffer, int bufferStride, int pos_x, int pos_y, int thickness, int size_x, int size_y) {
+static void addCrosshair(int port, int crosshair_color, unsigned short *buffer, int bufferStride, int pos_x, int pos_y, int thickness, int size_x, int size_y)
+{
for (port = 0; port < 2; port++) {
// Draw the horizontal line of the crosshair
int i, j;
info->size_y = psx_h * (pl_rearmed_cbs.gpu_neon.enhancement_enable ? 2 : 1) * (4.0f / 3.0f) / 40.0f;
}
-static void vout_flip(const void *vram, int stride, int bgr24,
+static void vout_flip(const void *vram_, int vram_ofs, int bgr24,
int x, int y, int w, int h, int dims_changed)
{
- unsigned short *dest = vout_buf_ptr;
- const unsigned short *src = vram;
- int dstride = vout_pitch, h1 = h;
- int port = 0;
+ int bytes_pp = (current_fmt == RETRO_PIXEL_FORMAT_XRGB8888) ? 4 : 2;
+ int bytes_pp_s = bgr24 ? 3 : 2;
+ bgr_to_fb_func *bgr_to_fb = g_bgr_to_fb;
+ unsigned char *dest = vout_buf_ptr;
+ const unsigned char *vram = vram_;
+ int dstride = vout_pitch_b, h1 = h;
+ int enhres = w > psx_w;
+ u32 vram_mask = enhres ? ~0 : 0xfffff;
+ int port = 0, hwrapped;
if (vram == NULL || dims_changed || (in_enable_crosshair[0] + in_enable_crosshair[1]) > 0)
{
- memset(vout_buf_ptr, 0, dstride * vout_height * 2);
+ unsigned char *dest2 = dest;
+ int h2 = h, ll = vout_width * bytes_pp;
+ if (dstride == ll)
+ memset(dest2, 0, dstride * vout_height);
+ else
+ for (; h2-- > 0; dest2 += dstride)
+ memset(dest2, 0, ll);
// blanking
if (vram == NULL)
goto out;
}
- dest += x + y * dstride;
+ dest += x * bytes_pp + y * dstride;
- if (bgr24)
- {
- // XXX: could we switch to RETRO_PIXEL_FORMAT_XRGB8888 here?
- for (; h1-- > 0; dest += dstride, src += stride)
- {
- bgr888_to_rgb565(dest, src, w * 3);
- }
+ for (; h1-- > 0; dest += dstride) {
+ bgr_to_fb(dest, vram + vram_ofs, w * bytes_pp_s);
+ vram_ofs = (vram_ofs + 2048) & vram_mask;
}
- else
- {
- for (; h1-- > 0; dest += dstride, src += stride)
- {
- bgr555_to_rgb565(dest, src, w * 2);
+
+ hwrapped = (vram_ofs & 2047) + w * bytes_pp_s - 2048;
+ if (!enhres && hwrapped > 0) {
+ // this is super-rare so just fix-up
+ vram_ofs = (vram_ofs - h * 2048) & 0xff800;
+ dest -= dstride * h;
+ dest += (w - hwrapped / bytes_pp_s) * bytes_pp;
+ for (h1 = h; h1-- > 0; dest += dstride) {
+ bgr_to_fb(dest, vram + vram_ofs, hwrapped);
+ vram_ofs = (vram_ofs + 2048) & 0xfffff;
}
}
+ if (current_fmt == RETRO_PIXEL_FORMAT_RGB565)
for (port = 0; port < 2; port++) {
if (in_enable_crosshair[port] > 0 && (in_type[port] == PSE_PAD_TYPE_GUNCON || in_type[port] == PSE_PAD_TYPE_GUN))
{
struct CrosshairInfo crosshairInfo;
CrosshairDimensions(port, &crosshairInfo);
- addCrosshair(port, in_enable_crosshair[port], dest, dstride, crosshairInfo.pos_x, crosshairInfo.pos_y, crosshairInfo.thickness, crosshairInfo.size_x, crosshairInfo.size_y);
+ addCrosshair(port, in_enable_crosshair[port], (unsigned short *)dest,
+ dstride / 2, crosshairInfo.pos_x, crosshairInfo.pos_y,
+ crosshairInfo.thickness, crosshairInfo.size_x, crosshairInfo.size_y);
}
}
out:
-#ifndef FRONTEND_SUPPORTS_RGB565
- convert(vout_buf_ptr, vout_pitch * vout_height * 2);
-#endif
vout_fb_dirty = 1;
pl_rearmed_cbs.flip_cnt++;
}
#ifdef _3DS
-typedef struct
-{
- void *buffer;
- uint32_t target_map;
- size_t size;
- enum psxMapTag tag;
-} psx_map_t;
-
-psx_map_t custom_psx_maps[] = {
- { NULL, 0x13000000, 0x210000, MAP_TAG_RAM }, // 0x80000000
- { NULL, 0x12800000, 0x010000, MAP_TAG_OTHER }, // 0x1f800000
- { NULL, 0x12c00000, 0x080000, MAP_TAG_OTHER }, // 0x1fc00000
- { NULL, 0x11000000, 0x800000, MAP_TAG_LUTS }, // 0x08000000
- { NULL, 0x12000000, 0x201000, MAP_TAG_VRAM }, // 0x00000000
-};
+static u32 mapped_addrs[8];
+static u32 mapped_ram, mapped_ram_src;
+static void *vram_mem;
-void *pl_3ds_mmap(unsigned long addr, size_t size, int is_fixed,
- enum psxMapTag tag)
+// http://3dbrew.org/wiki/Memory_layout#ARM11_User-land_memory_regions
+static void *pl_3ds_mmap(unsigned long addr, size_t size,
+ enum psxMapTag tag, int *can_retry_addr)
{
- (void)is_fixed;
+ void *ret = MAP_FAILED;
+ *can_retry_addr = 0;
(void)addr;
- if (__ctr_svchax)
+ if (tag == MAP_TAG_VRAM && vram_mem)
+ return vram_mem;
+
+ if (__ctr_svchax) do
{
- psx_map_t *custom_map = custom_psx_maps;
+ // idea from fbalpha2012_neogeo
+ s32 addr = 0x10000000 - 0x1000;
+ u32 found_addr = 0;
+ MemInfo mem_info;
+ PageInfo page_info;
+ size_t i;
+ int r;
- for (; custom_map->size; custom_map++)
+ for (i = 0; i < sizeof(mapped_addrs) / sizeof(mapped_addrs[0]); i++)
+ if (mapped_addrs[i] == 0)
+ break;
+ if (i == sizeof(mapped_addrs) / sizeof(mapped_addrs[0]))
+ break;
+
+ size = (size + 0xfff) & ~0xfff;
+
+ while (addr >= 0x08000000)
{
- if ((custom_map->size == size) && (custom_map->tag == tag))
- {
- uint32_t ptr_aligned, tmp;
- void *ret;
+ if ((r = svcQueryMemory(&mem_info, &page_info, addr)) < 0) {
+ LogErr("svcQueryMemory failed: %d\n", r);
+ break;
+ }
- custom_map->buffer = malloc(size + 0x1000);
- ptr_aligned = (((u32)custom_map->buffer) + 0xFFF) & ~0xFFF;
+ if (mem_info.state == MEMSTATE_FREE && mem_info.size >= size) {
+ found_addr = mem_info.base_addr + mem_info.size - size;
+ break;
+ }
- if (svcControlMemory(&tmp, (void *)custom_map->target_map, (void *)ptr_aligned, size, MEMOP_MAP, 0x3) < 0)
- {
- LogErr("could not map memory @0x%08X\n", custom_map->target_map);
- exit(1);
- }
+ addr = mem_info.base_addr - 0x1000;
+ }
+ if (found_addr == 0) {
+ LogErr("no addr space for %u bytes\n", size);
+ break;
+ }
- ret = (void *)custom_map->target_map;
- memset(ret, 0, size);
- return ret;
+ // https://libctru.devkitpro.org/svc_8h.html#a8046e9b23b1b209a4e278cb1c19c7a5a
+ if ((r = svcControlMemory(&mapped_addrs[i], found_addr, 0, size, MEMOP_ALLOC, MEMPERM_READWRITE)) < 0) {
+ LogErr("svcControlMemory failed for %08x %u: %d\n", found_addr, size, r);
+ break;
+ }
+ if (mapped_addrs[i] == 0) // needed?
+ mapped_addrs[i] = found_addr;
+ ret = (void *)mapped_addrs[i];
+
+ // "round" address helps the dynarec slightly, map ram at 0x13000000
+ if (tag == MAP_TAG_RAM && !mapped_ram) {
+ u32 target = 0x13000000;
+ if ((r = svcControlMemory(&mapped_ram, target, mapped_addrs[i], size, MEMOP_MAP, MEMPERM_READWRITE)) < 0)
+ LogErr("could not map ram %08x -> %08x: %d\n", mapped_addrs[i], target, r);
+ else {
+ mapped_ram_src = mapped_addrs[i];
+ mapped_ram = target;
+ ret = (void *)mapped_ram;
}
}
+ memset(ret, 0, size);
+ return ret;
}
+ while (0);
- return calloc(size, 1);
+ ret = calloc(size, 1);
+ return ret ? ret : MAP_FAILED;
}
-void pl_3ds_munmap(void *ptr, size_t size, enum psxMapTag tag)
+static void pl_3ds_munmap(void *ptr, size_t size, enum psxMapTag tag)
{
(void)tag;
- if (__ctr_svchax)
- {
- psx_map_t *custom_map = custom_psx_maps;
-
- for (; custom_map->size; custom_map++)
- {
- if ((custom_map->target_map == (uint32_t)ptr))
- {
- uint32_t ptr_aligned, tmp;
+ if (ptr && ptr == vram_mem)
+ return;
- ptr_aligned = (((u32)custom_map->buffer) + 0xFFF) & ~0xFFF;
+ if (ptr && __ctr_svchax)
+ {
+ size_t i;
+ u32 tmp;
- svcControlMemory(&tmp, (void *)custom_map->target_map, (void *)ptr_aligned, size, MEMOP_UNMAP, 0x3);
+ size = (size + 0xfff) & ~0xfff;
- free(custom_map->buffer);
- custom_map->buffer = NULL;
+ if (ptr == (void *)mapped_ram) {
+ svcControlMemory(&tmp, mapped_ram, mapped_ram_src, size, MEMOP_UNMAP, 0);
+ ptr = (void *)mapped_ram_src;
+ mapped_ram = mapped_ram_src = 0;
+ }
+ for (i = 0; i < sizeof(mapped_addrs) / sizeof(mapped_addrs[0]); i++) {
+ if (ptr == (void *)mapped_addrs[i]) {
+ svcControlMemory(&tmp, mapped_addrs[i], 0, size, MEMOP_FREE, 0);
+ mapped_addrs[i] = 0;
return;
}
}
free(ptr);
}
+
+// debug
+static int ctr_get_tlbe_k(u32 ptr)
+{
+ u32 tlb_base = -1, tlb_ctl = -1, *l1;
+ s32 tlb_mask = 0xffffc000;
+
+ asm volatile("mrc p15, 0, %0, c2, c0, 0" : "=r"(tlb_base));
+ asm volatile("mrc p15, 0, %0, c2, c0, 2" : "=r"(tlb_ctl));
+ tlb_mask >>= tlb_ctl & 7;
+ l1 = (u32 *)((tlb_base & tlb_mask) | 0xe0000000);
+ return l1[ptr >> 20];
+}
+
+static int ctr_get_tlbe(void *ptr)
+{
+ if (svcConvertVAToPA((void *)0xe0000000, 0) != 0x20000000)
+ return -1;
+ return svcCustomBackdoor(ctr_get_tlbe_k, ptr, NULL, NULL);
+}
+#endif
+
+#ifdef HAVE_LIBNX
+static void *pl_switch_mmap(unsigned long addr, size_t size,
+ enum psxMapTag tag, int *can_retry_addr)
+{
+ void *ret = MAP_FAILED;
+ *can_retry_addr = 0;
+ (void)addr;
+
+ // there's svcMapPhysicalMemory() but user logs show it doesn't hand out
+ // any desired addresses, so don't even bother
+ ret = aligned_alloc(0x1000, size);
+ if (!ret)
+ return MAP_FAILED;
+ memset(ret, 0, size);
+ return ret;
+}
+
+static void pl_switch_munmap(void *ptr, size_t size, enum psxMapTag tag)
+{
+ (void)size;
+ (void)tag;
+ free(ptr);
+}
#endif
#ifdef VITA
static void *addr = NULL;
-psx_map_t custom_psx_maps[] = {
+static psx_map_t custom_psx_maps[] = {
{ NULL, 0x800000, MAP_TAG_LUTS },
{ NULL, 0x080000, MAP_TAG_OTHER },
{ NULL, 0x010000, MAP_TAG_OTHER },
{ NULL, 0x210000, MAP_TAG_RAM },
};
-int init_vita_mmap()
+static int init_vita_mmap()
{
- int n;
void *tmpaddr;
addr = malloc(64 * 1024 * 1024);
if (addr == NULL)
custom_psx_maps[5].buffer = tmpaddr + 0x2000000;
memset(tmpaddr, 0, 0x2210000);
#if 0
+ int n;
for(n = 0; n < 5; n++){
sceClibPrintf("addr reserved %x\n",custom_psx_maps[n].buffer);
}
return 0;
}
-void deinit_vita_mmap()
+static void deinit_vita_mmap()
{
size_t i;
for (i = 0; i < sizeof(custom_psx_maps) / sizeof(custom_psx_maps[0]); i++) {
free(addr);
}
-void *pl_vita_mmap(unsigned long addr, size_t size, int is_fixed,
- enum psxMapTag tag)
+static void *pl_vita_mmap(unsigned long addr, size_t size,
+ enum psxMapTag tag, int *can_retry_addr)
{
- (void)is_fixed;
+ void *ret;
(void)addr;
+ *can_retry_addr = 0;
psx_map_t *custom_map = custom_psx_maps;
}
}
- return calloc(size, 1);
+ ret = calloc(size, 1);
+ return ret ? ret : MAP_FAILED;
}
-void pl_vita_munmap(void *ptr, size_t size, enum psxMapTag tag)
+static void pl_vita_munmap(void *ptr, size_t size, enum psxMapTag tag)
{
(void)tag;
}
#endif
+static void log_mem_usage(void)
+{
+#ifdef _3DS
+ extern u32 __heap_size, __linear_heap_size, __stacksize__;
+ extern char __end__; // 3dsx.ld
+ u32 app_memory = *((volatile u32 *)0x1FF80040);
+ s64 mem_used = 0;
+ if (__ctr_svchax)
+ svcGetSystemInfo(&mem_used, 0, 1);
+
+ SysPrintf("mem: %d/%d heap: %d linear: %d/%d stack: %d exe: %d\n",
+ (int)mem_used, app_memory, __heap_size, __linear_heap_size - linearSpaceFree(),
+ __linear_heap_size, __stacksize__, (int)&__end__ - 0x100000);
+#endif
+}
+
static void *pl_mmap(unsigned int size)
{
return psxMap(0, size, 0, MAP_TAG_VRAM);
void pl_frame_limit(void)
{
/* called once per frame, make psxCpu->Execute() above return */
- stop++;
+ psxRegs.stop++;
}
void pl_timing_prepare(int is_pal)
struct retro_core_option_display option_display;
char gpu_unai_option[][40] = {
"pcsx_rearmed_gpu_unai_blending",
+ "pcsx_rearmed_gpu_unai_skipline",
"pcsx_rearmed_gpu_unai_lighting",
"pcsx_rearmed_gpu_unai_fast_lighting",
"pcsx_rearmed_gpu_unai_scale_hires",
#endif
memset(info, 0, sizeof(*info));
info->library_name = "PCSX-ReARMed";
- info->library_version = "r24l" GIT_VERSION;
+ info->library_version = "r25" GIT_VERSION;
info->valid_extensions = "bin|cue|img|mdf|pbp|toc|cbn|m3u|chd|iso|exe";
info->need_fullpath = true;
}
}
}
-#ifdef HAVE_CDROM
-static long CALLBACK rcdrom_open(void);
-static long CALLBACK rcdrom_close(void);
-static void rcdrom_stop_thread(void);
-#endif
-
static bool disk_set_eject_state(bool ejected)
{
if (ejected != disk_ejected)
LidInterrupt();
#ifdef HAVE_CDROM
- if (CDR_open == rcdrom_open && ejected != disk_ejected) {
- rcdrom_stop_thread();
+ if (cdra_is_physical() && ejected != disk_ejected) {
+ cdra_stop_thread();
if (!ejected) {
// likely the real cd was also changed - rescan
- rcdrom_close();
- rcdrom_open();
+ cdra_close();
+ cdra_open();
}
}
#endif
if (disks[index].fname == NULL)
{
LogErr("missing disk #%u\n", index);
- CDR_shutdown();
+ cdra_shutdown();
// RetroArch specifies "no disk" with index == count,
// so don't fail here..
LogErr("failed to load cdr plugin\n");
return false;
}
- if (CDR_open() < 0)
+ if (cdra_open() < 0)
{
LogErr("failed to open cdr plugin\n");
return false;
}
}
-// raw cdrom support
-#ifdef HAVE_CDROM
-#include "vfs/vfs_implementation.h"
-#include "vfs/vfs_implementation_cdrom.h"
-#include "libretro-cdrom.h"
-#include "rthreads/rthreads.h"
-#include "retro_timers.h"
-struct cached_buf {
- unsigned char buf[2352];
- unsigned int lba;
-};
-static struct {
- libretro_vfs_implementation_file *h;
- sthread_t *thread;
- slock_t *read_lock;
- slock_t *buf_lock;
- scond_t *cond;
- struct cached_buf *buf;
- unsigned int buf_cnt, thread_exit, do_prefetch;
- unsigned int total_lba, prefetch_lba;
- int check_eject_delay;
-} rcdrom;
-
-static void lbacache_do(unsigned int lba)
-{
- unsigned char m, s, f, buf[2352];
- unsigned int i = lba % rcdrom.buf_cnt;
- int ret;
-
- cdrom_lba_to_msf(lba + 150, &m, &s, &f);
- slock_lock(rcdrom.read_lock);
- ret = cdrom_read_sector(rcdrom.h, lba, buf);
- slock_lock(rcdrom.buf_lock);
- slock_unlock(rcdrom.read_lock);
- //printf("%d:%02d:%02d m%d f%d\n", m, s, f, buf[12+3], ((buf[12+4+2] >> 5) & 1) + 1);
- if (ret) {
- rcdrom.do_prefetch = 0;
- slock_unlock(rcdrom.buf_lock);
- LogErr("prefetch: cdrom_read_sector failed for lba %d\n", lba);
- return;
- }
- rcdrom.check_eject_delay = 100;
-
- if (lba != rcdrom.buf[i].lba) {
- memcpy(rcdrom.buf[i].buf, buf, sizeof(rcdrom.buf[i].buf));
- rcdrom.buf[i].lba = lba;
- }
- slock_unlock(rcdrom.buf_lock);
- retro_sleep(0); // why does the main thread stall without this?
-}
-
-static int lbacache_get(unsigned int lba, void *buf)
-{
- unsigned int i;
- int ret = 0;
-
- i = lba % rcdrom.buf_cnt;
- slock_lock(rcdrom.buf_lock);
- if (lba == rcdrom.buf[i].lba) {
- memcpy(buf, rcdrom.buf[i].buf, 2352);
- ret = 1;
- }
- slock_unlock(rcdrom.buf_lock);
- return ret;
-}
-
-static void rcdrom_prefetch_thread(void *unused)
-{
- unsigned int buf_cnt, lba, lba_to;
-
- slock_lock(rcdrom.buf_lock);
- while (!rcdrom.thread_exit)
- {
-#ifdef __GNUC__
- __asm__ __volatile__("":::"memory"); // barrier
-#endif
- if (!rcdrom.do_prefetch)
- scond_wait(rcdrom.cond, rcdrom.buf_lock);
- if (!rcdrom.do_prefetch || !rcdrom.h || rcdrom.thread_exit)
- continue;
-
- buf_cnt = rcdrom.buf_cnt;
- lba = rcdrom.prefetch_lba;
- lba_to = lba + buf_cnt;
- if (lba_to > rcdrom.total_lba)
- lba_to = rcdrom.total_lba;
- for (; lba < lba_to; lba++) {
- if (lba != rcdrom.buf[lba % buf_cnt].lba)
- break;
- }
- if (lba == lba_to) {
- // caching complete
- rcdrom.do_prefetch = 0;
- continue;
- }
-
- slock_unlock(rcdrom.buf_lock);
- lbacache_do(lba);
- slock_lock(rcdrom.buf_lock);
- }
- slock_unlock(rcdrom.buf_lock);
-}
-
-static void rcdrom_stop_thread(void)
-{
- rcdrom.thread_exit = 1;
- if (rcdrom.buf_lock) {
- slock_lock(rcdrom.buf_lock);
- rcdrom.do_prefetch = 0;
- if (rcdrom.cond)
- scond_signal(rcdrom.cond);
- slock_unlock(rcdrom.buf_lock);
- }
- if (rcdrom.thread) {
- sthread_join(rcdrom.thread);
- rcdrom.thread = NULL;
- }
- if (rcdrom.cond) { scond_free(rcdrom.cond); rcdrom.cond = NULL; }
- if (rcdrom.buf_lock) { slock_free(rcdrom.buf_lock); rcdrom.buf_lock = NULL; }
- if (rcdrom.read_lock) { slock_free(rcdrom.read_lock); rcdrom.read_lock = NULL; }
- free(rcdrom.buf);
- rcdrom.buf = NULL;
-}
-
-// the thread is optional, if anything fails we can do direct reads
-static void rcdrom_start_thread(void)
-{
- rcdrom_stop_thread();
- rcdrom.thread_exit = rcdrom.prefetch_lba = rcdrom.do_prefetch = 0;
- if (rcdrom.buf_cnt == 0)
- return;
- rcdrom.buf = calloc(rcdrom.buf_cnt, sizeof(rcdrom.buf[0]));
- rcdrom.buf_lock = slock_new();
- rcdrom.read_lock = slock_new();
- rcdrom.cond = scond_new();
- if (rcdrom.buf && rcdrom.buf_lock && rcdrom.read_lock && rcdrom.cond) {
- rcdrom.thread = sthread_create(rcdrom_prefetch_thread, NULL);
- rcdrom.buf[0].lba = ~0;
- }
- if (!rcdrom.thread) {
- LogErr("cdrom precache thread init failed.\n");
- rcdrom_stop_thread();
- }
-}
-
-static long CALLBACK rcdrom_open(void)
-{
- const char *name = GetIsoFile();
- //printf("%s %s\n", __func__, name);
- rcdrom.h = retro_vfs_file_open_impl(name, RETRO_VFS_FILE_ACCESS_READ,
- RETRO_VFS_FILE_ACCESS_HINT_NONE);
- if (rcdrom.h) {
- int ret = cdrom_set_read_speed_x(rcdrom.h, 4);
- if (ret) LogErr("CD speed set failed\n");
- const cdrom_toc_t *toc = retro_vfs_file_get_cdrom_toc();
- const cdrom_track_t *last = &toc->track[toc->num_tracks - 1];
- unsigned int lba = cdrom_msf_to_lba(last->min, last->sec, last->frame) - 150;
- rcdrom.total_lba = lba + last->track_size;
- //cdrom_get_current_config_random_readable(rcdrom.h);
- //cdrom_get_current_config_multiread(rcdrom.h);
- //cdrom_get_current_config_cdread(rcdrom.h);
- //cdrom_get_current_config_profiles(rcdrom.h);
- rcdrom_start_thread();
- return 0;
- }
- LogErr("retro_vfs_file_open failed for '%s'\n", name);
- return -1;
-}
-
-static long CALLBACK rcdrom_close(void)
-{
- //printf("%s\n", __func__);
- if (rcdrom.h) {
- rcdrom_stop_thread();
- retro_vfs_file_close_impl(rcdrom.h);
- rcdrom.h = NULL;
- }
- return 0;
-}
-
-static long CALLBACK rcdrom_getTN(unsigned char *tn)
-{
- const cdrom_toc_t *toc = retro_vfs_file_get_cdrom_toc();
- tn[0] = 1;
- tn[1] = toc->num_tracks;
- //printf("%s -> %d %d\n", __func__, tn[0], tn[1]);
- return 0;
-}
-
-static long CALLBACK rcdrom_getTD(unsigned char track, unsigned char *rt)
-{
- const cdrom_toc_t *toc = retro_vfs_file_get_cdrom_toc();
- rt[0] = 0, rt[1] = 2, rt[2] = 0;
- if (track == 0) {
- cdrom_lba_to_msf(rcdrom.total_lba + 150, &rt[2], &rt[1], &rt[0]);
- }
- else if (track <= toc->num_tracks) {
- int i = track - 1;
- rt[2] = toc->track[i].min;
- rt[1] = toc->track[i].sec;
- rt[0] = toc->track[i].frame;
- }
- //printf("%s %d -> %d:%02d:%02d\n", __func__, track, rt[2], rt[1], rt[0]);
- return 0;
-}
-
-static long CALLBACK rcdrom_prefetch(unsigned char m, unsigned char s, unsigned char f)
-{
- unsigned int lba = cdrom_msf_to_lba(m, s, f) - 150;
- if (rcdrom.cond && rcdrom.h) {
- rcdrom.prefetch_lba = lba;
- rcdrom.do_prefetch = 1;
- scond_signal(rcdrom.cond);
- }
- if (rcdrom.buf) {
- unsigned int c = rcdrom.buf_cnt;
- if (c)
- return rcdrom.buf[lba % c].lba == lba;
- }
- return 1;
-}
-
-static int rcdrom_read_msf(unsigned char m, unsigned char s, unsigned char f,
- void *buf, const char *func)
-{
- unsigned int lba = cdrom_msf_to_lba(m, s, f) - 150;
- int hit = 0, ret = -1;
- if (rcdrom.buf_lock)
- hit = lbacache_get(lba, buf);
- if (!hit && rcdrom.read_lock) {
- // maybe still prefetching
- slock_lock(rcdrom.read_lock);
- slock_unlock(rcdrom.read_lock);
- hit = lbacache_get(lba, buf);
- if (hit)
- hit = 2;
- }
- if (!hit) {
- slock_t *lock = rcdrom.read_lock;
- rcdrom.do_prefetch = 0;
- if (lock)
- slock_lock(lock);
- if (rcdrom.h) {
- ret = cdrom_read_sector(rcdrom.h, lba, buf);
- if (ret)
- LogErr("cdrom_read_sector failed for lba %d\n", lba);
- }
- if (lock)
- slock_unlock(lock);
- }
- else
- ret = 0;
- rcdrom.check_eject_delay = ret ? 0 : 100;
- //printf("%s %d:%02d:%02d -> %d hit %d\n", func, m, s, f, ret, hit);
- return ret;
-}
-
-static boolean CALLBACK rcdrom_readTrack(unsigned char *time)
-{
- unsigned char m = btoi(time[0]), s = btoi(time[1]), f = btoi(time[2]);
- return !rcdrom_read_msf(m, s, f, ISOgetBuffer() - 12, __func__);
-}
-
-static long CALLBACK rcdrom_readCDDA(unsigned char m, unsigned char s, unsigned char f,
- unsigned char *buffer)
-{
- return rcdrom_read_msf(m, s, f, buffer, __func__);
-}
-
-static unsigned char * CALLBACK rcdrom_getBuffer(void)
-{
- //printf("%s\n", __func__);
- return ISOgetBuffer();
-}
-
-static unsigned char * CALLBACK rcdrom_getBufferSub(int sector)
-{
- //printf("%s %d %d\n", __func__, sector, rcdrom_h->cdrom.last_frame_lba);
- return NULL;
-}
-
-static long CALLBACK rcdrom_getStatus(struct CdrStat *stat)
-{
- const cdrom_toc_t *toc = retro_vfs_file_get_cdrom_toc();
- //printf("%s %p\n", __func__, stat);
- CDR__getStatus(stat);
- stat->Type = toc->track[0].audio ? 2 : 1;
- return 0;
-}
-
-static void rcdrom_check_eject(void)
-{
- bool media_inserted;
- if (!rcdrom.h || rcdrom.do_prefetch || rcdrom.check_eject_delay-- > 0)
- return;
- rcdrom.check_eject_delay = 100;
- media_inserted = cdrom_is_media_inserted(rcdrom.h); // 1-2ms
- if (!media_inserted != disk_ejected)
- disk_set_eject_state(!media_inserted);
-}
-#endif // HAVE_CDROM
-
#if defined(__QNX__) || defined(_WIN32)
/* Blackberry QNX doesn't have strcasestr */
static void set_retro_memmap(void)
{
-#ifndef NDEBUG
+ uint64_t flags_ram = RETRO_MEMDESC_SYSTEM_RAM;
struct retro_memory_map retromap = { 0 };
- struct retro_memory_descriptor mmap = {
- 0, psxM, 0, 0, 0, 0, 0x200000
+ struct retro_memory_descriptor descs[] = {
+ { flags_ram, psxM, 0, 0x00000000, 0x5fe00000, 0, 0x200000 },
+ { flags_ram, psxH, 0, 0x1f800000, 0x7ffffc00, 0, 0x000400 },
+ // not ram but let the frontend patch it if it wants; should be last
+ { flags_ram, psxR, 0, 0x1fc00000, 0x5ff80000, 0, 0x080000 },
};
- retromap.descriptors = &mmap;
- retromap.num_descriptors = 1;
+ retromap.descriptors = descs;
+ retromap.num_descriptors = sizeof(descs) / sizeof(descs[0]);
+ if (Config.HLE)
+ retromap.num_descriptors--;
environ_cb(RETRO_ENVIRONMENT_SET_MEMORY_MAPS, &retromap);
-#endif
}
static void show_notification(const char *msg_str,
}
static void update_variables(bool in_flight);
+
+static int get_bool_variable(const char *key)
+{
+ struct retro_variable var = { NULL, };
+
+ var.key = key;
+ if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value)
+ {
+ if (strcmp(var.value, "enabled") == 0)
+ return 1;
+ }
+ return 0;
+}
+
bool retro_load_game(const struct retro_game_info *info)
{
size_t i;
unsigned int cd_index = 0;
- bool is_m3u = (strcasestr(info->path, ".m3u") != NULL);
- bool is_exe = (strcasestr(info->path, ".exe") != NULL);
+ bool is_m3u, is_exe;
int ret;
struct retro_input_descriptor desc[] = {
{ 0 },
};
- frame_count = 0;
-
environ_cb(RETRO_ENVIRONMENT_SET_INPUT_DESCRIPTORS, desc);
-#ifdef FRONTEND_SUPPORTS_RGB565
- enum retro_pixel_format fmt = RETRO_PIXEL_FORMAT_RGB565;
+ enum retro_pixel_format fmt = get_bool_variable("pcsx_rearmed_rgb32_output")
+ ? RETRO_PIXEL_FORMAT_XRGB8888 : RETRO_PIXEL_FORMAT_RGB565;
if (environ_cb(RETRO_ENVIRONMENT_SET_PIXEL_FORMAT, &fmt))
- {
- SysPrintf("RGB565 supported, using it\n");
- }
-#endif
+ current_fmt = fmt;
+ else
+ LogErr("SET_PIXEL_FORMAT failed\n");
+ SysPrintf("Using PIXEL_FORMAT %d\n", current_fmt);
+ set_bgr_to_fb_func(0);
if (info == NULL || info->path == NULL)
{
LogErr("info->path required\n");
return false;
}
+ is_m3u = (strcasestr(info->path, ".m3u") != NULL);
+ is_exe = (strcasestr(info->path, ".exe") != NULL);
update_variables(false);
}
if (!strncmp(info->path, "cdrom:", 6))
{
-#ifdef HAVE_CDROM
- CDR_open = rcdrom_open;
- CDR_close = rcdrom_close;
- CDR_getTN = rcdrom_getTN;
- CDR_getTD = rcdrom_getTD;
- CDR_readTrack = rcdrom_readTrack;
- CDR_getBuffer = rcdrom_getBuffer;
- CDR_getBufferSub = rcdrom_getBufferSub;
- CDR_getStatus = rcdrom_getStatus;
- CDR_readCDDA = rcdrom_readCDDA;
- CDR_prefetch = rcdrom_prefetch;
-#elif !defined(USE_LIBRETRO_VFS)
+#if !defined(HAVE_CDROM) && !defined(USE_LIBRETRO_VFS)
ReleasePlugins();
LogErr("%s\n", "Physical CD-ROM support is not compiled in.");
show_notification("Physical CD-ROM support is not compiled in.", 6000, 3);
}
plugins_opened = 1;
- NetOpened = 0;
if (OpenPlugins() == -1)
{
LogErr("failed to reload cdr plugins\n");
return false;
}
- if (CDR_open() < 0)
+ if (cdra_open() < 0)
{
LogErr("failed to open cdr plugin\n");
return false;
for (i = 0; i < 8; ++i)
in_type[i] = PSE_PAD_TYPE_STANDARD;
- plugin_call_rearmed_cbs();
- /* dfinput_activate(); */
-
if (!is_exe && CheckCdrom() == -1)
{
LogErr("unsupported/invalid CD image: %s\n", info->path);
return false;
}
+ plugin_call_rearmed_cbs();
SysReset();
if (is_exe)
set_retro_memmap();
retro_set_audio_buff_status_cb();
+ log_mem_usage();
if (check_unsatisfied_libcrypt())
show_notification("LibCrypt protected game with missing SBI detected", 3000, 3);
+ if (Config.TurboCD)
+ show_notification("TurboCD is ON", 700, 2);
return true;
}
{
axis_bounds_modifier = true;
}
- else if (strcmp(var.value, "circle") == 0)
+ else
{
axis_bounds_modifier = false;
}
if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value)
{
- if (strcmp(var.value, "disabled") == 0)
+ if (strcmp(var.value, "force") == 0)
{
- pl_rearmed_cbs.gpu_peops.iUseDither = 0;
+ pl_rearmed_cbs.dithering = 2;
+ pl_rearmed_cbs.gpu_peopsgl.bDrawDither = 1;
+ }
+ else if (strcmp(var.value, "disabled") == 0)
+ {
+ pl_rearmed_cbs.dithering = 0;
pl_rearmed_cbs.gpu_peopsgl.bDrawDither = 0;
- pl_rearmed_cbs.gpu_unai.dithering = 0;
-#ifdef GPU_NEON
- pl_rearmed_cbs.gpu_neon.allow_dithering = 0;
-#endif
}
- else if (strcmp(var.value, "enabled") == 0)
+ else
{
- pl_rearmed_cbs.gpu_peops.iUseDither = 1;
+ pl_rearmed_cbs.dithering = 1;
pl_rearmed_cbs.gpu_peopsgl.bDrawDither = 1;
- pl_rearmed_cbs.gpu_unai.dithering = 1;
-#ifdef GPU_NEON
- pl_rearmed_cbs.gpu_neon.allow_dithering = 1;
-#endif
}
}
}
var.value = NULL;
- var.key = "pcsx_rearmed_neon_enhancement_tex_adj";
+ var.key = "pcsx_rearmed_neon_enhancement_tex_adj_v2";
if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value)
{
#endif
var.value = NULL;
- var.key = "pcsx_rearmed_display_internal_fps";
+ var.key = "pcsx_rearmed_display_fps_v2";
if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value)
{
- if (strcmp(var.value, "disabled") == 0)
- display_internal_fps = false;
+ if (strcmp(var.value, "extra") == 0)
+ display_internal_fps = 2;
else if (strcmp(var.value, "enabled") == 0)
- display_internal_fps = true;
+ display_internal_fps = 1;
+ else
+ display_internal_fps = 0;
}
-#ifdef HAVE_CDROM
var.value = NULL;
- var.key = "pcsx_rearmed_phys_cd_readahead";
+ var.key = "pcsx_rearmed_cd_turbo";
if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value)
{
- long newval = strtol(var.value, NULL, 10);
- bool changed = rcdrom.buf_cnt != newval;
- if (rcdrom.h && changed)
- rcdrom_stop_thread();
- rcdrom.buf_cnt = newval;
- if (rcdrom.h && changed) {
- rcdrom_start_thread();
- if (rcdrom.cond && rcdrom.prefetch_lba) {
- rcdrom.do_prefetch = 1;
- scond_signal(rcdrom.cond);
- }
- }
+ if (strcmp(var.value, "enabled") == 0)
+ Config.TurboCD = true;
+ else
+ Config.TurboCD = false;
+ }
+
+#if defined(HAVE_CDROM) || defined(USE_ASYNC_CDROM)
+ var.value = NULL;
+ var.key = "pcsx_rearmed_cd_readahead";
+ if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value)
+ {
+ cdra_set_buf_count(strtol(var.value, NULL, 10));
}
#endif
prev_cpu->Notify(R3000ACPU_NOTIFY_BEFORE_SAVE, NULL);
prev_cpu->Shutdown();
psxCpu->Init();
- psxCpu->Reset();
psxCpu->Notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL);
}
}
-#endif /* !DRC_DISABLE */
+#endif // !DRC_DISABLE
var.value = NULL;
var.key = "pcsx_rearmed_psxclock";
if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value)
{
int psxclock = atoi(var.value);
- Config.cycle_multiplier = 10000 / psxclock;
+ if (strcmp(var.value, "auto") == 0 || psxclock == 0)
+ Config.cycle_multiplier = CYCLE_MULT_DEFAULT;
+ else
+ Config.cycle_multiplier = 10000 / psxclock;
}
#if !defined(DRC_DISABLE) && !defined(LIGHTREC)
+#ifdef NDRC_THREAD
+ var.value = NULL;
+ var.key = "pcsx_rearmed_drc_thread";
+ if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value)
+ {
+ ndrc_g.hacks &= ~(NDHACK_THREAD_FORCE | NDHACK_THREAD_FORCE_ON);
+ if (strcmp(var.value, "disabled") == 0)
+ ndrc_g.hacks |= NDHACK_THREAD_FORCE;
+ else if (strcmp(var.value, "enabled") == 0)
+ ndrc_g.hacks |= NDHACK_THREAD_FORCE | NDHACK_THREAD_FORCE_ON;
+ // psxCpu->ApplyConfig(); will start/stop the thread
+ }
+#endif
+
var.value = NULL;
var.key = "pcsx_rearmed_nosmccheck";
if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value)
{
if (strcmp(var.value, "enabled") == 0)
- new_dynarec_hacks |= NDHACK_NO_SMC_CHECK;
+ ndrc_g.hacks |= NDHACK_NO_SMC_CHECK;
else
- new_dynarec_hacks &= ~NDHACK_NO_SMC_CHECK;
+ ndrc_g.hacks &= ~NDHACK_NO_SMC_CHECK;
}
var.value = NULL;
if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value)
{
if (strcmp(var.value, "enabled") == 0)
- new_dynarec_hacks |= NDHACK_GTE_UNNEEDED;
+ ndrc_g.hacks |= NDHACK_GTE_UNNEEDED;
else
- new_dynarec_hacks &= ~NDHACK_GTE_UNNEEDED;
+ ndrc_g.hacks &= ~NDHACK_GTE_UNNEEDED;
}
var.value = NULL;
if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value)
{
if (strcmp(var.value, "enabled") == 0)
- new_dynarec_hacks |= NDHACK_GTE_NO_FLAGS;
+ ndrc_g.hacks |= NDHACK_GTE_NO_FLAGS;
else
- new_dynarec_hacks &= ~NDHACK_GTE_NO_FLAGS;
+ ndrc_g.hacks &= ~NDHACK_GTE_NO_FLAGS;
}
var.value = NULL;
if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value)
{
if (strcmp(var.value, "enabled") == 0)
- new_dynarec_hacks |= NDHACK_NO_COMPAT_HACKS;
+ ndrc_g.hacks |= NDHACK_NO_COMPAT_HACKS;
else
- new_dynarec_hacks &= ~NDHACK_NO_COMPAT_HACKS;
+ ndrc_g.hacks &= ~NDHACK_NO_COMPAT_HACKS;
}
#endif /* !DRC_DISABLE && !LIGHTREC */
}
#endif
-#if 0 // currently disabled, see USE_READ_THREAD in libpcsxcore/cdriso.c
- if (P_HAVE_PTHREAD) {
- var.value = NULL;
- var.key = "pcsx_rearmed_async_cd";
- if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value)
- {
- if (strcmp(var.value, "async") == 0)
- {
- Config.AsyncCD = 1;
- Config.CHD_Precache = 0;
- }
- else if (strcmp(var.value, "sync") == 0)
- {
- Config.AsyncCD = 0;
- Config.CHD_Precache = 0;
- }
- else if (strcmp(var.value, "precache") == 0)
- {
- Config.AsyncCD = 0;
- Config.CHD_Precache = 1;
- }
- }
- }
-#endif
-
var.value = NULL;
var.key = "pcsx_rearmed_noxadecoding";
if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value)
pl_rearmed_cbs.screen_centering_y = atoi(var.value);
}
+ var.value = NULL;
+ var.key = "pcsx_rearmed_screen_centering_h_adj";
+ if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value)
+ {
+ pl_rearmed_cbs.screen_centering_h_adj = atoi(var.value);
+ }
+
+ var.value = NULL;
+ var.key = "pcsx_rearmed_show_overscan";
+ if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value)
+ {
+ if (strcmp(var.value, "auto") == 0)
+ pl_rearmed_cbs.show_overscan = 1;
+ else if (strcmp(var.value, "hack") == 0)
+ pl_rearmed_cbs.show_overscan = 2;
+ else
+ pl_rearmed_cbs.show_overscan = 0;
+ }
+
#ifdef THREAD_RENDERING
var.key = "pcsx_rearmed_gpu_thread_rendering";
var.value = NULL;
* (480i, 512i) and has been obsoleted by
* pcsx_rearmed_gpu_unai_scale_hires */
pl_rearmed_cbs.gpu_unai.ilace_force = 0;
- /* Note: This used to be an option, but it has no
- * discernable effect and has been obsoleted by
- * pcsx_rearmed_gpu_unai_scale_hires */
- pl_rearmed_cbs.gpu_unai.pixel_skip = 0;
+
+ var.key = "pcsx_rearmed_gpu_unai_old_renderer";
+ var.value = NULL;
+
+ if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value)
+ {
+ if (strcmp(var.value, "enabled") == 0)
+ pl_rearmed_cbs.gpu_unai.old_renderer = 1;
+ else
+ pl_rearmed_cbs.gpu_unai.old_renderer = 0;
+ }
+
+ var.key = "pcsx_rearmed_gpu_unai_skipline";
+ var.value = NULL;
+
+ if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value)
+ {
+ if (strcmp(var.value, "disabled") == 0)
+ pl_rearmed_cbs.gpu_unai.ilace_force = 0;
+ else if (strcmp(var.value, "enabled") == 0)
+ pl_rearmed_cbs.gpu_unai.ilace_force = 1;
+ }
var.key = "pcsx_rearmed_gpu_unai_lighting";
var.value = NULL;
mouse_sensitivity = atof(var.value);
}
+#ifdef _3DS
+ var.value = NULL;
+ var.key = "pcsx_rearmed_3ds_appcputime";
+ if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value)
+ {
+ APT_SetAppCpuTimeLimit(strtol(var.value, NULL, 10));
+ }
+#endif
+
if (found_bios)
{
var.value = NULL;
update_option_visibility();
- if (old_fps != psxGetFps())
+ if (in_flight && old_fps != psxGetFps())
{
struct retro_system_av_info info;
retro_get_system_av_info(&info);
return button;
}
-unsigned char axis_range_modifier(int16_t axis_value, bool is_square)
+static unsigned char axis_range_modifier(int axis_value, bool is_square)
{
- float modifier_axis_range = 0;
+ int modifier_axis_range;
if (is_square)
- {
- modifier_axis_range = round((axis_value >> 8) / 0.785) + 128;
- if (modifier_axis_range < 0)
- {
- modifier_axis_range = 0;
- }
- else if (modifier_axis_range > 255)
- {
- modifier_axis_range = 255;
- }
- }
+ modifier_axis_range = roundf((axis_value >> 8) / 0.785f) + 128;
else
- {
- modifier_axis_range = MIN(((axis_value >> 8) + 128), 255);
- }
+ modifier_axis_range = (axis_value >> 8) + 128;
+
+ if (modifier_axis_range < 0)
+ modifier_axis_range = 0;
+ else if (modifier_axis_range > 255)
+ modifier_axis_range = 255;
return modifier_axis_range;
}
{
if (display_internal_fps)
{
- frame_count++;
+ static u32 fps, frame_count_s;
+ static time_t last_time;
+ static u32 psx_vsync_count;
+ u32 psx_vsync_rate = is_pal_mode ? 50 : 60;
+ time_t now;
- if (frame_count % INTERNAL_FPS_SAMPLE_PERIOD == 0)
+ psx_vsync_count++;
+ frame_count_s++;
+ now = time(NULL);
+ if (now != last_time)
{
- unsigned internal_fps = pl_rearmed_cbs.flip_cnt * (is_pal_mode ? 50 : 60) / INTERNAL_FPS_SAMPLE_PERIOD;
- char str[64];
- const char *strc = (const char *)str;
+ fps = frame_count_s;
+ frame_count_s = 0;
+ last_time = now;
+ }
- str[0] = '\0';
+ if (psx_vsync_count >= psx_vsync_rate)
+ {
+ int pos = 0, cd_count;
+ char str[64];
- snprintf(str, sizeof(str), "Internal FPS: %2d", internal_fps);
+ if (display_internal_fps > 1) {
+#if !defined(DRC_DISABLE) && !defined(LIGHTREC)
+ if (ndrc_g.did_compile) {
+ pos = snprintf(str, sizeof(str), "DRC: %d ", ndrc_g.did_compile);
+ ndrc_g.did_compile = 0;
+ }
+#endif
+ cd_count = cdra_get_buf_count();
+ if (cd_count) {
+ pos += snprintf(str + pos, sizeof(str) - pos, "CD: %2d/%d ",
+ cdra_get_buf_cached_approx(), cd_count);
+ }
+ }
+ snprintf(str + pos, sizeof(str) - pos, "FPS: %2d/%2d",
+ pl_rearmed_cbs.flip_cnt, fps);
pl_rearmed_cbs.flip_cnt = 0;
+ psx_vsync_count = 0;
if (msg_interface_version >= 1)
{
struct retro_message_ext msg = {
- strc,
+ str,
3000,
1,
RETRO_LOG_INFO,
else
{
struct retro_message msg = {
- strc,
+ str,
180
};
environ_cb(RETRO_ENVIRONMENT_SET_MESSAGE, &msg);
}
}
}
- else
- frame_count = 0;
}
void retro_run(void)
if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE_UPDATE, &updated) && updated)
update_variables(true);
- stop = 0;
- psxCpu->Execute();
+ psxRegs.stop = 0;
+ psxCpu->Execute(&psxRegs);
if (pl_rearmed_cbs.fskip_dirty == 1) {
if (frameskip_counter < frameskip_interval)
}
video_cb((vout_fb_dirty || !vout_can_dupe) ? vout_buf_ptr : NULL,
- vout_width, vout_height, vout_pitch * 2);
+ vout_width, vout_height, vout_pitch_b);
vout_fb_dirty = 0;
#ifdef HAVE_CDROM
- if (CDR_open == rcdrom_open)
- rcdrom_check_eject();
+ int inserted;
+ if (cdra_check_eject(&inserted) > 0) {
+ bool media_inserted = inserted != 0;
+ if (!media_inserted != disk_ejected)
+ disk_set_eject_state(!media_inserted);
+ }
#endif
}
struct retro_rumble_interface rumble;
int ret;
+ log_mem_usage();
+
msg_interface_version = 0;
environ_cb(RETRO_ENVIRONMENT_GET_MESSAGE_INTERFACE_VERSION, &msg_interface_version);
syscall(SYS_ptrace, 0 /*PTRACE_TRACEME*/, 0, 0, 0);
#endif
-#ifdef _3DS
+#if defined(_3DS)
psxMapHook = pl_3ds_mmap;
psxUnmapHook = pl_3ds_munmap;
-#endif
-#ifdef VITA
+#elif defined(HAVE_LIBNX)
+ psxMapHook = pl_switch_mmap;
+ psxUnmapHook = pl_switch_munmap;
+#elif defined(VITA)
if (init_vita_mmap() < 0)
abort();
psxMapHook = pl_vita_mmap;
exit(1);
}
+ // alloc enough for RETRO_PIXEL_FORMAT_XRGB8888
+ size_t vout_buf_size = VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT * 4;
#ifdef _3DS
- vout_buf = linearMemAlign(VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT * 2, 0x80);
+ // Place psx vram in linear mem to take advantage of it's supersection mapping.
+ // The emu allocs 2x (0x201000 to be exact) but doesn't really need that much,
+ // so place vout_buf below to also act as an overdraw guard.
+ vram_mem = linearMemAlign(1024*1024 + 4096 + vout_buf_size, 4096);
+ if (vram_mem) {
+ vout_buf = (char *)vram_mem + 1024*1024 + 4096;
+ if (__ctr_svchax)
+ SysPrintf("vram: %p PA %08x tlb %08x\n", vram_mem,
+ svcConvertVAToPA(vram_mem, 0), ctr_get_tlbe(vram_mem));
+ }
#elif defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200112L) && P_HAVE_POSIX_MEMALIGN
- if (posix_memalign(&vout_buf, 16, VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT * 2) != 0)
- vout_buf = (void *) 0;
+ if (posix_memalign(&vout_buf, 16, vout_buf_size) != 0)
+ vout_buf = NULL;
else
- memset(vout_buf, 0, VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT * 2);
+ memset(vout_buf, 0, vout_buf_size);
#else
- vout_buf = calloc(VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT, 2);
+ vout_buf = calloc(vout_buf_size, 1);
#endif
+ if (vout_buf == NULL)
+ {
+ LogErr("OOM for vout_buf.\n");
+ // may be able to continue if we get retro_framebuffer access
+ }
vout_buf_ptr = vout_buf;
if (environ_cb(RETRO_ENVIRONMENT_GET_RUMBLE_INTERFACE, &rumble))
rumble_cb = rumble.set_rumble_state;
- /* Set how much slower PSX CPU runs * 100 (so that 200 is 2 times)
- * we have to do this because cache misses and some IO penalties
- * are not emulated. Warning: changing this may break compatibility. */
- Config.cycle_multiplier = CYCLE_MULT_DEFAULT;
-#if defined(HAVE_PRE_ARMV7) && !defined(_3DS)
- Config.cycle_multiplier = 200;
-#endif
- pl_rearmed_cbs.gpu_peops.iUseDither = 1;
pl_rearmed_cbs.gpu_peops.dwActFixes = GPU_PEOPS_OLD_FRAME_SKIP;
SaveFuncs.open = save_open;
}
SysClose();
#ifdef _3DS
- linearFree(vout_buf);
+ linearFree(vram_mem);
+ vram_mem = NULL;
#else
free(vout_buf);
#endif
update_audio_latency = false;
}
-#ifdef VITA
-#include <psp2/kernel/threadmgr.h>
-int usleep(unsigned long us)
-{
- sceKernelDelayThread(us);
-}
-#endif
-
void SysPrintf(const char *fmt, ...)
{
va_list list;
"Compatibility Fixes",
"Configure settings/workarounds required for correct operation of specific games."
},
-#if !defined(DRC_DISABLE) && !defined(LIGHTREC)
{
"speed_hack",
"Speed Hacks (Advanced)",
"Configure hacks that may improve performance at the expense of decreased accuracy/stability."
},
-#endif
{ NULL, NULL, NULL },
};
},
{
"pcsx_rearmed_memcard2",
- "Enable Second Memory Card (Shared)",
+ "Second Memory Card (Shared)",
NULL,
"Emulate a second memory card in slot 2. This will be shared by all games.",
NULL,
},
"disabled",
},
-#ifndef _WIN32
- {
- "pcsx_rearmed_async_cd",
- "CD Access Method (Restart)",
- NULL,
- "Select method used to read data from content disk images. 'Synchronous' mimics original hardware. 'Asynchronous' can reduce stuttering on devices with slow storage. 'Pre-Cache (CHD)' loads disk image into memory for faster access (CHD files only).",
- NULL,
- "system",
- {
- { "sync", "Synchronous" },
- { "async", "Asynchronous" },
- { "precache", "Pre-Cache (CHD)" },
- { NULL, NULL},
- },
- "sync",
- },
-#endif
-#ifdef HAVE_CDROM
+#if defined(HAVE_CDROM) || defined(USE_ASYNC_CDROM)
#define V(x) { #x, NULL }
{
- "pcsx_rearmed_phys_cd_readahead",
- "Physical CD read-ahead",
+ "pcsx_rearmed_cd_readahead",
+ "CD read-ahead",
NULL,
- "(Hardware CD-ROM only) Reads the specified amount of sectors ahead of time to try to avoid later stalls. 333000 will try to read the complete disk (requires an additional 750MB of RAM).",
+ "Reads the specified amount of sectors ahead of time to try to avoid later stalls. "
+#ifdef HAVE_CDROM
+ "Affects both physical CD-ROM and CD images. "
+#endif
+#if !defined(_3DS) && !defined(VITA)
+ "333000 will try to read the complete disk (requires an additional 750MB of RAM)."
+#endif
+ ,
NULL,
"system",
{
V(0), V(1), V(2), V(3), V(4), V(5), V(6), V(7),
V(8), V(9), V(10), V(11), V(12), V(13), V(14), V(15),
- V(16), V(32), V(64), V(128), V(256), V(512), V(1024), V(333000),
+ V(16), V(32), V(64), V(128), V(256), V(512), V(1024),
+#if !defined(_3DS) && !defined(VITA)
+ V(333000),
+#endif
{ NULL, NULL},
},
"12",
},
"enabled",
},
+#if !defined(LIGHTREC) && defined(NDRC_THREAD)
+ {
+ "pcsx_rearmed_drc_thread",
+ "DynaRec threading",
+ NULL,
+ "Run the dynarec on another thread.",
+ NULL,
+ "system",
+ {
+ { "auto", "Auto" },
+ { "disabled", NULL },
+ { "enabled", NULL },
+ { NULL, NULL },
+ },
+ "auto",
+ },
#endif
+#endif // DRC_DISABLE
{
"pcsx_rearmed_psxclock",
- "PSX CPU Clock Speed",
+ "PSX CPU Clock Speed (%)",
NULL,
- "Overclock or under-clock the PSX CPU. Try adjusting this if the game is too slow, too fast or hangs."
-#if defined(HAVE_PRE_ARMV7) && !defined(_3DS)
- " Default is 50."
-#else
- " Default is 57."
-#endif
- ,
+ "Overclock or under-clock the PSX CPU. Should be much less than 100 (something like 57) due to some real hardware slowdowns not being emulated. Usually should be left at 'Auto', else glitches or hangs are likely.",
NULL,
"system",
{
+ { "auto", "Auto" },
{ "30", NULL },
{ "31", NULL },
{ "32", NULL },
{ "100", NULL },
{ NULL, NULL },
},
-#if defined(HAVE_PRE_ARMV7) && !defined(_3DS)
- "50",
-#else
- "57",
-#endif
+ "auto",
},
{
"pcsx_rearmed_dithering",
"Dithering Pattern",
NULL,
- "Enable emulation of the dithering technique used by the PSX to smooth out color banding artifacts. Increases performance requirements.",
+ "Enable emulation of the dithering technique used by the PSX to smooth out color banding artifacts. \"Force\" enables it even if the game turns it off. Increases performance requirements.",
NULL,
"video",
{
{ "disabled", NULL },
{ "enabled", NULL },
+ { "force", "Force" },
{ NULL, NULL },
},
-#if defined HAVE_LIBNX || defined _3DS
+#if defined(_3DS)
"disabled",
#else
"enabled",
"3"
},
{
- "pcsx_rearmed_display_internal_fps",
+ "pcsx_rearmed_display_fps_v2",
"Display Internal FPS",
NULL,
"Show the internal frame rate at which the emulated PlayStation system is rendering content. Note: Requires on-screen notifications to be enabled in the libretro frontend.",
{
{ "disabled", NULL },
{ "enabled", NULL },
+ { "extra", NULL },
{ NULL, NULL },
},
"disabled",
},
"auto",
},
+ {
+ "pcsx_rearmed_rgb32_output",
+ "RGB32 output",
+ NULL,
+ "Improves color depth for true color modes (most FMVs and occasional title screens). Causes higher CPU usage due to double memory bandwidth requirement, even in 15bpp modes. Takes effect on game reload only (libretro limitation).",
+ NULL,
+ "video",
+ {
+ { "disabled", NULL },
+ { "enabled", NULL },
+ { NULL, NULL },
+ },
+ "disabled",
+ },
{
"pcsx_rearmed_gpu_slow_llists",
"(GPU) Slow linked list processing",
},
"auto",
},
+ {
+ "pcsx_rearmed_show_overscan",
+ "(GPU) Horizontal overscan",
+ NULL,
+ "The PSX can display graphics way into the horizontal borders, even if most screens would crop it. This option tries to display all such graphics. Note that this may result in unusual resolutions that your device might not handle well. The 'Hack' option is intended for the widescreen hacks.",
+ NULL,
+ "video",
+ {
+ { "disabled", NULL },
+ { "auto", "Auto" },
+ { "hack", "Hack" },
+ { NULL, NULL },
+ },
+ "disabled",
+ },
{
"pcsx_rearmed_screen_centering",
"(GPU) Screen centering",
#define V(x) { #x, NULL }
{
"pcsx_rearmed_screen_centering_x",
- "(GPU) Manual screen centering X",
+ "(GPU) Manual position X",
NULL,
"X offset of the frame buffer. Only effective when 'Screen centering' is set to 'Manual'.",
NULL,
},
{
"pcsx_rearmed_screen_centering_y",
- "(GPU) Manual screen centering Y",
+ "(GPU) Manual position Y",
NULL,
"Y offset of the frame buffer. Only effective when 'Screen centering' is set to 'Manual'.",
NULL,
},
"0",
},
+ {
+ "pcsx_rearmed_screen_centering_h_adj",
+ "(GPU) Manual height adjustment",
+ NULL,
+ "Height adjustment. Only effective when 'Screen centering' is set to 'Manual'.",
+ NULL,
+ "video",
+ {
+ V(-64), V(-48), V(-40), V(-32), V(-24), V(-16), V(-8), V(-7), V(-6), V(-5), V(-4), V(-3), V(-2), V(-1), V(0),
+ { NULL, NULL },
+ },
+ "0",
+ },
#undef V
#ifdef GPU_NEON
{
{
"pcsx_rearmed_neon_enhancement_no_main",
"(GPU) Enhanced Resolution Speed Hack",
- "Enhanced Resolution Speed Hack",
+ "Enh. Res. Speed Hack",
"('Enhanced Resolution' Hack) Improves performance but reduces compatibility and may cause rendering errors.",
NULL,
"gpu_neon",
"disabled",
},
{
- "pcsx_rearmed_neon_enhancement_tex_adj",
+ "pcsx_rearmed_neon_enhancement_tex_adj_v2",
"(GPU) Enhanced Resolution Texture Adjustment",
- "Enhanced Resolution Texture Adjustment",
- "('Enhanced Resolution' Hack) Attempts to solve some texturing issues in some games, but causes new ones in others.",
+ "Enh. Res. Texture Fixup",
+ "('Enhanced Resolution' Hack) Solves some texturing issues in some games in Enhanced Resolution mode. May cause a small performance hit.",
NULL,
"gpu_neon",
{
{ "enabled", NULL },
{ NULL, NULL },
},
- "disabled",
+ "enabled",
},
#endif /* GPU_NEON */
#ifdef GPU_PEOPS
},
"disabled",
},
+#ifndef GPU_UNAI_NO_OLD
+ {
+ "pcsx_rearmed_gpu_unai_old_renderer",
+ "(GPU) Old renderer",
+ "Old renderer",
+ "This enables faster, but less accurate code.",
+ NULL,
+ "gpu_unai",
+ {
+ { "disabled", NULL },
+ { "enabled", NULL },
+ { NULL, NULL},
+ },
+ "disabled",
+ },
+#endif
{
"pcsx_rearmed_gpu_unai_blending",
"(GPU) Texture Blending",
},
"enabled",
},
+ {
+ "pcsx_rearmed_gpu_unai_skipline",
+ "(GPU) Skip every 2nd line",
+ "Skip every 2nd line",
+ "Skips every second scanline. Can be enabled to improve performance at the expense of display inaccuracies (artifacts etc.).",
+ NULL,
+ "gpu_unai",
+ {
+ { "disabled", NULL },
+ { "enabled", NULL },
+ { NULL, NULL},
+ },
+ "disabled",
+ },
{
"pcsx_rearmed_gpu_unai_lighting",
"(GPU) Lighting Effects",
{ "square", "Square" },
{ NULL, NULL },
},
- "circle",
+ "square",
},
{
"pcsx_rearmed_vibration",
"pcsx_rearmed_crosshair1",
"Player 1 Lightgun Crosshair",
NULL,
- "Toggle player 1's crosshair for the Guncon or Konami Gun",
+ "Toggle player 1's crosshair for the Guncon or Konami Gun. Only works if RGB32 output is off (video options).",
NULL,
"input",
{
"pcsx_rearmed_crosshair2",
"Player 2 Lightgun Crosshair",
NULL,
- "Toggle player 2's crosshair for the Guncon or Konami Gun",
+ "Toggle player 2's crosshair for the Guncon or Konami Gun. Only works if RGB32 output is off (video options).",
NULL,
"input",
{
},
"disabled",
},
+#ifdef _3DS
+#define V(x) { #x, NULL }
+ {
+ "pcsx_rearmed_3ds_appcputime",
+ "3DS AppCpuTimeLimit",
+ NULL,
+ "% of syscore (core #1) CPU time allocated to the emulator",
+ NULL,
+ "speed_hack",
+ {
+ V( 5), V(10),
+ V(15), V(20),
+ V(25), V(30),
+ V(35), V(40),
+ V(45), V(50),
+ V(55), V(60),
+ V(65), V(70),
+ V(75), V(80),
+ { NULL, NULL},
+ },
+ "35",
+ },
+#undef V
+#endif // _3DS
+ {
+ "pcsx_rearmed_cd_turbo",
+ "Turbo CD",
+ NULL,
+ "This makes the emulated CD-ROM extremely fast and can reduce loading times in some cases. Warning: many games were not programmed to handle such a speed. The game (or even the emulator) MAY CRASH at ANY TIME if this is enabled.",
+ NULL,
+ "speed_hack",
+ {
+ { "disabled", NULL },
+ { "enabled", NULL },
+ { NULL, NULL },
+ },
+ "disabled",
+ },
#if !defined(DRC_DISABLE) && !defined(LIGHTREC)
{
"pcsx_rearmed_nocompathacks",
"pcsx_rearmed_nostalls",
"Disable CPU/GTE Stalls",
NULL,
- "Will cause some games to run too quickly."
+ "Will cause some games to run too quickly. Should be disabled in almost all cases."
#if defined(LIGHTREC)
" Interpreter only."
#endif
,
NULL,
- "compat_hack",
+ "speed_hack",
{
{ "disabled", NULL },
{ "enabled", NULL },
#if !defined(_WIN32) && !defined(NO_DYLIB)
#include <dlfcn.h>
#endif
+#ifdef HAVE_RTHREADS
+#include "../frontend/libretro-rthreads.h"
+#endif
#include "main.h"
#include "plugin.h"
#include "../libpcsxcore/cheat.h"
#include "../libpcsxcore/sio.h"
#include "../libpcsxcore/database.h"
+#include "../libpcsxcore/cdrom-async.h"
#include "../libpcsxcore/new_dynarec/new_dynarec.h"
#include "../plugins/cdrcimg/cdrcimg.h"
#include "../plugins/dfsound/spu_config.h"
#include "arm_features.h"
#include "revision.h"
-#if defined(__has_builtin)
+#if defined(__EMSCRIPTEN__)
+#define DO_CPU_CHECKS 0
+#elif defined(__has_builtin)
#define DO_CPU_CHECKS __has_builtin(__builtin_cpu_init)
#elif defined(__x86_64__) || defined(__i386__)
#define DO_CPU_CHECKS 1
#endif
#ifndef NO_FRONTEND
+#include <sys/stat.h>
#include "libpicofe/input.h"
#include "libpicofe/plat.h"
#include "libpicofe/readpng.h"
static void toggle_fast_forward(int force_off);
static void check_profile(void);
static void check_memcards(void);
+static int get_gameid_filename(char *buf, int size, const char *fmt, int i);
+static const char *get_home_dir(void);
+#define MAKE_PATH(buf, dir, fname) \
+ emu_make_path(buf, sizeof(buf), dir, fname)
+
#endif
#ifndef BOOT_MSG
#define BOOT_MSG "Booting up..."
char hud_msg[64];
int hud_new_msg;
-static void make_path(char *buf, size_t size, const char *dir, const char *fname)
-{
- if (fname)
- snprintf(buf, size, ".%s%s", dir, fname);
- else
- snprintf(buf, size, ".%s", dir);
-}
-#define MAKE_PATH(buf, dir, fname) \
- make_path(buf, sizeof(buf), dir, fname)
-
-static int get_gameid_filename(char *buf, int size, const char *fmt, int i) {
- char trimlabel[33];
- int j;
-
- strncpy(trimlabel, CdromLabel, 32);
- trimlabel[32] = 0;
- for (j = 31; j >= 0; j--)
- if (trimlabel[j] == ' ')
- trimlabel[j] = 0;
- else
- continue;
-
- snprintf(buf, size, fmt, trimlabel, CdromId, i);
-
- return 0;
-}
-
void set_cd_image(const char *fname)
{
- const char *ext = NULL;
-
- if (fname != NULL)
- ext = strrchr(fname, '.');
-
- if (ext && (
- strcasecmp(ext, ".z") == 0 || strcasecmp(ext, ".bz") == 0 ||
- strcasecmp(ext, ".znx") == 0 /*|| strcasecmp(ext, ".pbp") == 0*/)) {
- SetIsoFile(NULL);
- cdrcimg_set_fname(fname);
- strcpy(Config.Cdr, "builtin_cdrcimg");
- } else {
- SetIsoFile(fname);
- strcpy(Config.Cdr, "builtin_cdr");
- }
+ SetIsoFile(fname);
}
static void set_default_paths(void)
{
#ifndef NO_FRONTEND
- snprintf(Config.PatchesDir, sizeof(Config.PatchesDir), "." PATCHES_DIR);
+ const char *home = get_home_dir();
+ struct stat st;
+ MAKE_PATH(Config.PatchesDir, PATCHES_DIR, NULL);
MAKE_PATH(Config.Mcd1, MEMCARD_DIR, "card1.mcd");
MAKE_PATH(Config.Mcd2, MEMCARD_DIR, "card2.mcd");
- strcpy(Config.BiosDir, "bios");
+ MAKE_PATH(Config.BiosDir, BIOS_DIR, NULL);
+
+ emu_make_data_path(Config.PluginsDir, "plugins", sizeof(Config.PluginsDir));
+
+ // prefer bios in working dir for compatibility
+ if (!strcmp(home, ".") && !stat("bios", &st))
+ strcpy(Config.BiosDir, "bios");
+
+ SysPrintf("dirs: profile=%s" PCSX_DOT_DIR ", bios=%s, plugins=%s\n",
+ home, Config.BiosDir, Config.PluginsDir);
#endif
- strcpy(Config.PluginsDir, "plugins");
strcpy(Config.Gpu, "builtin_gpu");
strcpy(Config.Spu, "builtin_spu");
- strcpy(Config.Cdr, "builtin_cdr");
- strcpy(Config.Pad1, "builtin_pad");
- strcpy(Config.Pad2, "builtin_pad");
- strcpy(Config.Net, "Disabled");
}
void emu_set_default_config(void)
Config.GpuListWalking = -1;
Config.FractionalFramerate = -1;
+ pl_rearmed_cbs.dithering = 1;
pl_rearmed_cbs.gpu_neon.allow_interlace = 2; // auto
pl_rearmed_cbs.gpu_neon.enhancement_enable =
pl_rearmed_cbs.gpu_neon.enhancement_no_main = 0;
- pl_rearmed_cbs.gpu_peops.iUseDither = 0;
+ pl_rearmed_cbs.gpu_neon.enhancement_tex_adj = 1;
pl_rearmed_cbs.gpu_peops.dwActFixes = 1<<7;
+ pl_rearmed_cbs.gpu_unai.old_renderer = 0;
pl_rearmed_cbs.gpu_unai.ilace_force = 0;
- pl_rearmed_cbs.gpu_unai.pixel_skip = 0;
pl_rearmed_cbs.gpu_unai.lighting = 1;
pl_rearmed_cbs.gpu_unai.fast_lighting = 0;
pl_rearmed_cbs.gpu_unai.blending = 1;
- pl_rearmed_cbs.gpu_unai.dithering = 0;
- pl_rearmed_cbs.gpu_unai_old.abe_hack =
- pl_rearmed_cbs.gpu_unai_old.no_light =
- pl_rearmed_cbs.gpu_unai_old.no_blend = 0;
memset(&pl_rearmed_cbs.gpu_peopsgl, 0, sizeof(pl_rearmed_cbs.gpu_peopsgl));
pl_rearmed_cbs.gpu_peopsgl.iVRamSize = 64;
pl_rearmed_cbs.gpu_peopsgl.iTexGarbageCollection = 1;
spu_config.iTempo = 1;
#endif
#endif
- new_dynarec_hacks = 0;
+ ndrc_g.hacks = 0;
in_type[0] = PSE_PAD_TYPE_STANDARD;
in_type[1] = PSE_PAD_TYPE_STANDARD;
}
+#ifndef NO_FRONTEND
+
void do_emu_action(void)
{
int ret;
ret = emu_save_state(state_slot);
snprintf(hud_msg, sizeof(hud_msg), ret == 0 ? "SAVED" : "FAIL!");
break;
-#ifndef NO_FRONTEND
case SACTION_ENTER_MENU:
toggle_fast_forward(1);
menu_loop();
scrbuf = pl_prepare_screenshot(&w, &h, &bpp);
get_gameid_filename(buf, sizeof(buf),
- "screenshots/%.32s-%.9s.%d.png", ti);
- ret = -1;
+ "%s" SCREENSHOTS_DIR "%.32s-%.9s.%d.png", ti);
+ ret = -2;
if (scrbuf != 0 && bpp == 16)
ret = writepng(buf, scrbuf, w, h);
if (ret == 0)
snprintf(hud_msg, sizeof(hud_msg), "SCREENSHOT TAKEN");
+ else
+ SysPrintf("writepng %s: %d\n", buf, ret);
break;
}
case SACTION_VOLUME_UP:
ret = padToggleAnalog(0);
snprintf(hud_msg, sizeof(hud_msg), "ANALOG %s", ret ? "ON" : "OFF");
break;
-#endif
default:
return;
}
hud_new_msg = 3;
}
+#endif
+
static char basic_lcase(char c)
{
if ('A' <= c && c <= 'Z')
SysPrintf("note: running with HLE BIOS, expect compatibility problems\n");
SysPrintf("----------------------------------------------------------\n");
}
+ if (Config.TurboCD)
+ SysPrintf("note: TurboCD is enabled, this breaks games\n");
if (show_hud_msg) {
if (check_unsatisfied_libcrypt())
#endif // DO_CPU_CHECKS
}
-#define MKSTR2(x) #x
-#define MKSTR(x) MKSTR2(x)
-static const char *get_build_info(void)
-{
- return " ("
-#ifdef __VERSION__
- "cc " __VERSION__ " "
-#endif
-#if defined(__SIZEOF_POINTER__) && __SIZEOF_POINTER__ == 8
- "64bit "
-#elif defined(__SIZEOF_POINTER__) && __SIZEOF_POINTER__ == 4
- "32bit "
-#endif
-#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
- "be "
-#endif
-#if defined(__PIC__) || defined(__pic__)
- "pic "
-#endif
-#if defined(__aarch64__)
- "arm64"
-#elif defined(__arm__)
- "arm"
-#endif
-#ifdef __ARM_ARCH
- "v" MKSTR(__ARM_ARCH) " "
-#endif
-#if defined(__AVX__)
- "avx "
-#elif defined(__SSSE3__)
- "ssse3 "
-#elif defined(__ARM_NEON) || defined(__ARM_NEON__)
- "neon "
-#endif
-#if defined(LIGHTREC)
- "lightrec "
-#elif !defined(DRC_DISABLE)
- "ari64 "
-#endif
- "gpu=" MKSTR(BUILTIN_GPU)
- ")";
-}
-
int emu_core_preinit(void)
{
// what is the name of the config file?
int emu_core_init(void)
{
- SysPrintf("Starting PCSX-ReARMed " REV "%s\n", get_build_info());
+ SysPrintf("Starting PCSX-ReARMed " REV " (%s)\n", get_build_info());
+ SysPrintf("build time: " __DATE__ " " __TIME__ "\n");
+
+#if defined(__arm__) && defined(__ARM_FP)
+ // RunFast mode
+ u32 fpscr = ~0;
+ __asm__ volatile("vmrs %0, fpscr" : "=r"(fpscr));
+ SysPrintf("old fpscr = %08x\n", fpscr);
+ fpscr &= ~0x00009f9f;
+ fpscr |= 0x03000000; // DN | FZ
+ __asm__ volatile("vmsr fpscr, %0" :: "r"(fpscr));
+#endif
+#ifdef HAVE_RTHREADS
+ pcsxr_sthread_init();
+#endif
#ifndef NO_FRONTEND
check_profile();
check_memcards();
void emu_core_ask_exit(void)
{
- stop++;
+ psxRegs.stop++;
g_emu_want_quit = 1;
}
#include <sys/stat.h>
#include <sys/types.h>
+static const char *get_home_dir(void)
+{
+#if defined(PANDORA) || !defined(__unix__)
+ return ".";
+#else
+ static const char *home = NULL;
+ struct stat st;
+ if (home)
+ return home;
+ // for compatibility with older versions, look for .pcsx in the working dir
+ if (stat(PCSX_DOT_DIR + 1, &st) != 0)
+ home = getenv("HOME");
+ if (home == NULL)
+ home = ".";
+ return home;
+#endif
+}
+
+void emu_make_path(char *buf, size_t size, const char *dir, const char *fname)
+{
+ const char *home = get_home_dir();
+ if (fname)
+ snprintf(buf, size, "%s%s%s", home, dir, fname);
+ else
+ snprintf(buf, size, "%s%s", home, dir);
+}
+
+void emu_make_data_path(char *buff, const char *end, int size)
+{
+ int pos, end_len;
+
+ end_len = strlen(end);
+ pos = plat_get_root_dir(buff, size);
+ strncpy(buff + pos, end, size - pos);
+ buff[size - 1] = 0;
+ if (pos + end_len > size - 1)
+ printf("Warning: path truncated: %s\n", buff);
+}
+
static void create_profile_dir(const char *directory) {
char path[MAXPATHLEN];
create_profile_dir(BIOS_DIR);
create_profile_dir(MEMCARD_DIR);
create_profile_dir(STATES_DIR);
- create_profile_dir(PLUGINS_DIR);
- create_profile_dir(PLUGINS_CFG_DIR);
create_profile_dir(CHEATS_DIR);
create_profile_dir(PATCHES_DIR);
- create_profile_dir(PCSX_DOT_DIR "cfg");
- create_profile_dir("/screenshots/");
+ create_profile_dir(CFG_DIR);
+ create_profile_dir(SCREENSHOTS_DIR);
}
static void check_memcards(void)
int i;
for (i = 1; i <= 9; i++) {
- snprintf(buf, sizeof(buf), ".%scard%d.mcd", MEMCARD_DIR, i);
+ snprintf(buf, sizeof(buf), "%s%scard%d.mcd",
+ get_home_dir(), MEMCARD_DIR, i);
f = fopen(buf, "rb");
if (f == NULL) {
// FIXME: this recovery doesn't work, just delete bad config and bail out
// SysMessage("could not load plugins, retrying with defaults\n");
set_default_paths();
- snprintf(path, sizeof(path), "." PCSX_DOT_DIR "%s", cfgfile_basename);
+ snprintf(path, sizeof(path), "%s" PCSX_DOT_DIR "%s",
+ get_home_dir(), cfgfile_basename);
remove(path);
SysMessage("Failed loading plugins!");
return 1;
if (OpenPlugins() == -1) {
return 1;
}
- plugin_call_rearmed_cbs();
CheckCdrom();
+ plugin_call_rearmed_cbs();
SysReset();
if (file[0] != '\0') {
else
menu_loop();
+#ifndef LIGHTREC_DEBUG
pl_start_watchdog();
+#endif
while (!g_emu_want_quit)
{
- stop = 0;
+ psxRegs.stop = 0;
emu_action = SACTION_NONE;
- psxCpu->Execute();
+ psxCpu->Execute(&psxRegs);
if (emu_action != SACTION_NONE)
do_emu_action();
}
}
static void SignalExit(int sig) {
+ SysPrintf("got signal %d\n", sig);
// only to restore framebuffer/resolution on some devices
plat_finish();
_exit(1);
}
-#endif
-
-void SysRunGui() {
- printf("SysRunGui\n");
-}
-
-static void CALLBACK dummy_lace()
-{
-}
-
-void SysReset() {
- // rearmed hack: EmuReset() runs some code when real BIOS is used,
- // but we usually do reset from menu while GPU is not open yet,
- // so we need to prevent updateLace() call..
- void *real_lace = GPU_updateLace;
- GPU_updateLace = dummy_lace;
- g_emu_resetting = 1;
-
- // reset can run code, timing must be set
- pl_timing_prepare(Config.PsxType);
- // hmh core forgets this
- CDR_stop();
-
- EmuReset();
-
- GPU_updateLace = real_lace;
- g_emu_resetting = 0;
-}
+static int get_gameid_filename(char *buf, int size, const char *fmt, int i) {
+ char trimlabel[33];
+ int j;
-void SysClose() {
- EmuShutdown();
- ReleasePlugins();
+ strncpy(trimlabel, CdromLabel, 32);
+ trimlabel[32] = 0;
+ for (j = 31; j >= 0; j--)
+ if (trimlabel[j] == ' ')
+ trimlabel[j] = 0;
+ else
+ continue;
- StopDebugger();
+ snprintf(buf, size, fmt, get_home_dir(), trimlabel, CdromId, i);
- if (emuLog != NULL && emuLog != stdout && emuLog != stderr) {
- fclose(emuLog);
- emuLog = NULL;
- }
+ return 0;
}
int get_state_filename(char *buf, int size, int i) {
return get_gameid_filename(buf, size,
- "." STATES_DIR "%.32s-%.9s.%3.3d", i);
+ "%s" STATES_DIR "%.32s-%.9s.%3.3d", i);
}
int emu_check_state(int slot)
return LoadState(fname);
}
+#endif // NO_FRONTEND
+
+static void CALLBACK dummy_lace(void)
+{
+}
+
+void SysReset() {
+ // rearmed hack: EmuReset() runs some code when real BIOS is used,
+ // but we usually do reset from menu while GPU is not open yet,
+ // so we need to prevent updateLace() call..
+ void *real_lace = GPU_updateLace;
+ GPU_updateLace = dummy_lace;
+ g_emu_resetting = 1;
+
+ // reset can run code, timing must be set
+ pl_timing_prepare(Config.PsxType);
+
+ EmuReset();
+
+ GPU_updateLace = real_lace;
+ g_emu_resetting = 0;
+}
+
+void SysClose() {
+ EmuShutdown();
+ ReleasePlugins();
+
+ StopDebugger();
+
+ if (emuLog != NULL && emuLog != stdout && emuLog != stderr) {
+ fclose(emuLog);
+ emuLog = NULL;
+ }
+}
+
#ifndef HAVE_LIBRETRO
#ifndef ANDROID
signal(SIGPIPE, SignalExit);
#endif
- ret = CDR_open();
- if (ret < 0) { SysMessage(_("Error opening CD-ROM plugin!")); return -1; }
+ ret = cdra_open();
+ if (UsingIso() && ret < 0) { SysMessage(_("Error opening CD-ROM plugin!")); return -1; }
ret = SPU_open();
if (ret < 0) { SysMessage(_("Error opening SPU plugin!")); return -1; }
SPU_registerCallback(SPUirq);
// pcsx-rearmed: we handle gpu elsewhere
//ret = GPU_open(&gpuDisp, "PCSX", NULL);
//if (ret < 0) { SysMessage(_("Error opening GPU plugin!")); return -1; }
- ret = PAD1_open(&gpuDisp);
- if (ret < 0) { SysMessage(_("Error opening Controller 1 plugin!")); return -1; }
- ret = PAD2_open(&gpuDisp);
- if (ret < 0) { SysMessage(_("Error opening Controller 2 plugin!")); return -1; }
-
- if (Config.UseNet && !NetOpened) {
- netInfo info;
- char path[MAXPATHLEN * 2];
- char dotdir[MAXPATHLEN];
-
- MAKE_PATH(dotdir, "/.pcsx/plugins/", NULL);
-
- strcpy(info.EmuName, "PCSX");
- memcpy(info.CdromID, CdromId, 9); /* no \0 trailing character? */
- memcpy(info.CdromLabel, CdromLabel, 9);
- info.CdromLabel[9] = '\0';
- info.psxMem = psxM;
- info.GPU_showScreenPic = GPU_showScreenPic;
- info.GPU_displayText = GPU_displayText;
- info.GPU_showScreenPic = GPU_showScreenPic;
- info.PAD_setSensitive = PAD1_setSensitive;
- sprintf(path, "%s%s", Config.BiosDir, Config.Bios);
- strcpy(info.BIOSpath, path);
- strcpy(info.MCD1path, Config.Mcd1);
- strcpy(info.MCD2path, Config.Mcd2);
- sprintf(path, "%s%s", dotdir, Config.Gpu);
- strcpy(info.GPUpath, path);
- sprintf(path, "%s%s", dotdir, Config.Spu);
- strcpy(info.SPUpath, path);
- sprintf(path, "%s%s", dotdir, Config.Cdr);
- strcpy(info.CDRpath, path);
- NET_setInfo(&info);
-
- ret = NET_open(&gpuDisp);
- if (ret < 0) {
- if (ret == -2) {
- // -2 is returned when something in the info
- // changed and needs to be synced
- char *ptr;
-
- PARSEPATH(Config.Bios, info.BIOSpath);
- PARSEPATH(Config.Gpu, info.GPUpath);
- PARSEPATH(Config.Spu, info.SPUpath);
- PARSEPATH(Config.Cdr, info.CDRpath);
-
- strcpy(Config.Mcd1, info.MCD1path);
- strcpy(Config.Mcd2, info.MCD2path);
- return -2;
- } else {
- Config.UseNet = FALSE;
- }
- } else {
- if (NET_queryPlayer() == 1) {
- if (SendPcsxInfo() == -1) Config.UseNet = FALSE;
- } else {
- if (RecvPcsxInfo() == -1) Config.UseNet = FALSE;
- }
- }
- NetOpened = TRUE;
- } else if (Config.UseNet) {
- NET_resume();
- }
return 0;
}
signal(SIGPIPE, SIG_DFL);
#endif
- ret = CDR_close();
- if (ret < 0) { SysMessage(_("Error closing CD-ROM plugin!")); return; }
+ cdra_close();
ret = SPU_close();
- if (ret < 0) { SysMessage(_("Error closing SPU plugin!")); return; }
- ret = PAD1_close();
- if (ret < 0) { SysMessage(_("Error closing Controller 1 Plugin!")); return; }
- ret = PAD2_close();
- if (ret < 0) { SysMessage(_("Error closing Controller 2 plugin!")); return; }
+ if (ret < 0) { SysMessage(_("Error closing SPU plugin!")); }
// pcsx-rearmed: we handle gpu elsewhere
//ret = GPU_close();
//if (ret < 0) { SysMessage(_("Error closing GPU plugin!")); return; }
-
- if (Config.UseNet) {
- NET_pause();
- }
}
/* we hook statically linked plugins here */
static const char *builtin_plugins[] = {
- "builtin_gpu", "builtin_spu", "builtin_cdr", "builtin_pad",
- "builtin_cdrcimg",
+ "builtin_gpu", "builtin_spu"
};
static const int builtin_plugin_ids[] = {
- PLUGIN_GPU, PLUGIN_SPU, PLUGIN_CDR, PLUGIN_PAD,
- PLUGIN_CDRCIMG,
+ PLUGIN_GPU, PLUGIN_SPU
};
void *SysLoadLibrary(const char *lib) {
#ifndef __FRONTEND_MAIN_H__
#define __FRONTEND_MAIN_H__
+#include <stdlib.h>
#include "config.h"
-#define DEFAULT_MEM_CARD_1 "/.pcsx/memcards/card1.mcd"
-#define DEFAULT_MEM_CARD_2 "/.pcsx/memcards/card2.mcd"
-#define MEMCARD_DIR "/.pcsx/memcards/"
-#define PLUGINS_DIR "/.pcsx/plugins/"
-#define PLUGINS_CFG_DIR "/.pcsx/plugins/cfg/"
#define PCSX_DOT_DIR "/.pcsx/"
-#define STATES_DIR "/.pcsx/sstates/"
-#define CHEATS_DIR "/.pcsx/cheats/"
-#define PATCHES_DIR "/.pcsx/patches/"
-#define BIOS_DIR "/bios/"
+#define DEFAULT_MEM_CARD_1 PCSX_DOT_DIR "memcards/card1.mcd"
+#define DEFAULT_MEM_CARD_2 PCSX_DOT_DIR "memcards/card2.mcd"
+#define MEMCARD_DIR PCSX_DOT_DIR "memcards/"
+#define STATES_DIR PCSX_DOT_DIR "sstates/"
+#define CHEATS_DIR PCSX_DOT_DIR "cheats/"
+#define PATCHES_DIR PCSX_DOT_DIR "patches/"
+#define CFG_DIR PCSX_DOT_DIR "cfg/"
+#ifndef PANDORA
+#define BIOS_DIR PCSX_DOT_DIR "bios/"
+#define SCREENSHOTS_DIR PCSX_DOT_DIR "screenshots/"
+#else
+#define BIOS_DIR "/bios/"
+#define SCREENSHOTS_DIR "/screenshots/"
+#endif
extern char cfgfile_basename[MAXPATHLEN];
void emu_set_default_config(void);
void emu_on_new_cd(int show_hud_msg);
+void emu_make_path(char *buf, size_t size, const char *dir, const char *fname);
+void emu_make_data_path(char *buff, const char *end, int size);
+
int get_state_filename(char *buf, int size, int i);
int emu_check_state(int slot);
int emu_save_state(int slot);
#define SACTION_GUN_MASK (0x0f << SACTION_GUN_TRIGGER)
-static inline void emu_set_action(enum sched_action action_)
-{
- extern enum sched_action emu_action, emu_action_old;
- extern int stop;
-
- if (action_ == SACTION_NONE)
- emu_action_old = 0;
- else if (action_ != emu_action_old)
- stop++;
- emu_action = action_;
-}
-
#endif /* __FRONTEND_MAIN_H__ */
*/
#define _GNU_SOURCE 1
-#ifdef __FreeBSD__
-#define STAT stat
-#else
-#define STAT stat64
-#endif
#include <stdio.h>
#include <string.h>
#include <errno.h>
+#ifndef NO_DYLIB
#include <dlfcn.h>
+#endif
#include <zlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "libpicofe/plat.h"
#include "../libpcsxcore/misc.h"
#include "../libpcsxcore/cdrom.h"
+#include "../libpcsxcore/cdrom-async.h"
#include "../libpcsxcore/cdriso.h"
#include "../libpcsxcore/cheat.h"
+#include "../libpcsxcore/ppf.h"
#include "../libpcsxcore/new_dynarec/new_dynarec.h"
#include "../plugins/dfsound/spu_config.h"
#include "psemu_plugin_defs.h"
+#include "compiler_features.h"
#include "arm_features.h"
#include "revision.h"
#define REARMED_BIRTHDAY_TIME 1293306830 /* 25 Dec 2010 */
+#if defined(__linux__) && (!defined(__SIZEOF_POINTER__) || __SIZEOF_POINTER__ == 4)
+#define STAT stat64
+#else
+#define STAT stat
+#endif
#define array_size(x) (sizeof(x) / sizeof(x[0]))
MA_OPT_SWFILTER,
MA_OPT_GAMMA,
MA_OPT_VOUT_MODE,
+ MA_OPT_VOUT_FULL,
MA_OPT_SCANLINES,
MA_OPT_SCANLINE_LEVEL,
MA_OPT_CENTERING,
+ MA_OPT_OVERSCAN,
+ MA_OPT_VSYNC,
} menu_id;
static int last_vout_w, last_vout_h, last_vout_bpp;
static int config_save_counter, region, in_type_sel1, in_type_sel2;
static int psx_clock;
static int memcard1_sel = -1, memcard2_sel = -1;
+static int cd_buf_count;
extern int g_autostateld_opt;
static int menu_iopts[8];
int g_opts, g_scaler, g_gamma = 100;
int scanlines, scanline_level = 20;
int soft_scaling, analog_deadzone; // for Caanoo
int soft_filter;
+int in_evdev_allow_abs_only attr_weak; // FIXME
#ifndef HAVE_PRE_ARMV7
#define DEFAULT_PSX_CLOCK (10000 / CYCLE_MULT_DEFAULT)
static int bios_sel, gpu_plugsel, spu_plugsel;
#ifndef UI_FEATURES_H
-#define MENU_BIOS_PATH "bios/"
-#define MENU_SHOW_VARSCALER 0
#define MENU_SHOW_VOUTMODE 1
#define MENU_SHOW_SCALER2 0
#define MENU_SHOW_NUBS_BTNS 0
#define MENU_SHOW_FULLSCREEN 1
#define MENU_SHOW_VOLUME 0
#endif
+#ifndef MENU_SHOW_VARSCALER
+#define MENU_SHOW_VARSCALER 0
+#endif
+#ifndef MENU_SHOW_VARSCALER_C
+#define MENU_SHOW_VARSCALER_C 0
+#endif
static int min(int x, int y) { return x < y ? x : y; }
static int max(int x, int y) { return x > y ? x : y; }
-void emu_make_path(char *buff, const char *end, int size)
-{
- int pos, end_len;
-
- end_len = strlen(end);
- pos = plat_get_root_dir(buff, size);
- strncpy(buff + pos, end, size - pos);
- buff[size - 1] = 0;
- if (pos + end_len > size - 1)
- printf("Warning: path truncated: %s\n", buff);
-}
-
static int emu_check_save_file(int slot, int *time)
{
char fname[MAXPATHLEN];
CE_CONFIG_VAL(GpuListWalking),
CE_CONFIG_VAL(FractionalFramerate),
CE_CONFIG_VAL(PreciseExceptions),
+ CE_CONFIG_VAL(TurboCD),
+ CE_CONFIG_VAL(SlowBoot),
CE_INTVAL(region),
CE_INTVAL_V(g_scaler, 3),
CE_INTVAL(g_gamma),
CE_INTVAL(memcard1_sel),
CE_INTVAL(memcard2_sel),
CE_INTVAL(g_autostateld_opt),
+ CE_INTVAL(cd_buf_count),
+ CE_INTVAL_N("adev0_axis0", in_adev_axis[0][0]),
+ CE_INTVAL_N("adev0_axis1", in_adev_axis[0][1]),
+ CE_INTVAL_N("adev1_axis0", in_adev_axis[1][0]),
+ CE_INTVAL_N("adev1_axis1", in_adev_axis[1][1]),
CE_INTVAL_N("adev0_is_nublike", in_adev_is_nublike[0]),
CE_INTVAL_N("adev1_is_nublike", in_adev_is_nublike[1]),
CE_INTVAL_V(frameskip, 4),
- CE_INTVAL_P(gpu_peops.iUseDither),
+ CE_INTVAL_PV(dithering, 2),
CE_INTVAL_P(gpu_peops.dwActFixes),
- CE_INTVAL_P(gpu_unai_old.lineskip),
- CE_INTVAL_P(gpu_unai_old.abe_hack),
- CE_INTVAL_P(gpu_unai_old.no_light),
- CE_INTVAL_P(gpu_unai_old.no_blend),
+ CE_INTVAL_P(gpu_unai.old_renderer),
CE_INTVAL_P(gpu_unai.ilace_force),
- CE_INTVAL_P(gpu_unai.pixel_skip),
CE_INTVAL_P(gpu_unai.lighting),
CE_INTVAL_P(gpu_unai.fast_lighting),
CE_INTVAL_P(gpu_unai.blending),
- CE_INTVAL_P(gpu_unai.dithering),
CE_INTVAL_P(gpu_unai.scale_hires),
CE_INTVAL_P(gpu_neon.allow_interlace),
CE_INTVAL_P(gpu_neon.enhancement_enable),
CE_INTVAL_P(gpu_neon.enhancement_no_main),
- CE_INTVAL_P(gpu_neon.enhancement_tex_adj),
+ CE_INTVAL_PV(gpu_neon.enhancement_tex_adj, 2),
CE_INTVAL_P(gpu_peopsgl.bDrawDither),
CE_INTVAL_P(gpu_peopsgl.iFilterType),
CE_INTVAL_P(gpu_peopsgl.iFrameTexType),
CE_INTVAL_P(screen_centering_type),
CE_INTVAL_P(screen_centering_x),
CE_INTVAL_P(screen_centering_y),
+ CE_INTVAL_P(screen_centering_h_adj),
+ CE_INTVAL_P(show_overscan),
CE_INTVAL(spu_config.iUseReverb),
CE_INTVAL(spu_config.iXAPitch),
CE_INTVAL(spu_config.iUseInterpolation),
CE_INTVAL(in_evdev_allow_abs_only),
CE_INTVAL(volume_boost),
CE_INTVAL(psx_clock),
- CE_INTVAL(new_dynarec_hacks),
+ CE_INTVAL(ndrc_g.hacks),
CE_INTVAL(in_enable_vibration),
};
static void make_cfg_fname(char *buf, size_t size, int is_game)
{
- if (is_game)
- snprintf(buf, size, "." PCSX_DOT_DIR "cfg/%.32s-%.9s.cfg", get_cd_label(), CdromId);
+ char id_buf[64];
+ if (is_game) {
+ snprintf(id_buf, sizeof(id_buf), "%.32s-%.9s.cfg",
+ get_cd_label(), CdromId);
+ emu_make_path(buf, size, CFG_DIR, id_buf);
+ }
else
- snprintf(buf, size, "." PCSX_DOT_DIR "%s", cfgfile_basename);
+ emu_make_path(buf, size, PCSX_DOT_DIR, cfgfile_basename);
}
static void keys_write_all(FILE *f);
return -1;
}
+ cd_buf_count = cdra_get_buf_count();
+
for (i = 0; i < ARRAY_SIZE(config_data); i++) {
fprintf(f, "%s = ", config_data[i].name);
switch (config_data[i].len) {
FILE *f;
int i, ret = -1;
- snprintf(path, sizeof(path), "." PCSX_DOT_DIR "lastcdimg.txt");
+ emu_make_path(path, sizeof(path), PCSX_DOT_DIR, "lastcdimg.txt");
f = fopen(path, is_get ? "r" : "w");
if (f == NULL) {
ret = -1;
}
keys_load_all(cfg);
+ cdra_set_buf_count(cd_buf_count);
ret = 0;
fail_read:
free(cfg);
#ifdef HAVE_CHD
"chd",
#endif
- "bz", "znx", "pbp", "cbn", NULL
+ "bz", "znx", "pbp", "cbn", "ppf", NULL
};
// rrrr rggg gggb bbbb
static int key_config_loop_wrap(int id, int keys)
{
+ int d;
+
+ for (d = 0; d < IN_MAX_DEVS; d++)
+ in_set_config_int(d, IN_CFG_ANALOG_MAP_ULDR, 0);
switch (id) {
case MA_CTRL_PLAYER1:
key_config_loop(me_ctrl_actions, array_size(me_ctrl_actions) - 1, 0);
default:
break;
}
+ for (d = 0; d < IN_MAX_DEVS; d++)
+ in_set_config_int(d, IN_CFG_ANALOG_MAP_ULDR, 1);
+
return 0;
}
// ------------ gfx options menu ------------
static const char *men_scaler[] = {
- "1x1", "integer scaled 2x", "scaled 4:3", "integer scaled 4:3", "fullscreen", "custom", NULL
+ "1x1", "integer scaled 2x", "scaled 4:3", "integer scaled 4:3", "fullscreen",
+#if MENU_SHOW_VARSCALER_C
+ "custom",
+#endif
+ NULL
};
static const char *men_soft_filter[] = { "None",
-#ifdef __ARM_NEON__
+#ifdef HAVE_NEON32
"scale2x", "eagle2x",
#endif
NULL };
static const char *men_dummy[] = { NULL };
static const char *men_centering[] = { "Auto", "Ingame", "Borderless", "Force", NULL };
+static const char *men_overscan[] = { "OFF", "Auto", "Hack", NULL };
static const char h_scaler[] = "int. 2x - scales w. or h. 2x if it fits on screen\n"
"int. 4:3 - uses integer if possible, else fractional";
static const char h_cscaler[] = "Displays the scaler layer, you can resize it\n"
"using d-pad or move it using R+d-pad";
static const char h_soft_filter[] = "Works only if game uses low resolution modes";
static const char h_gamma[] = "Gamma/brightness adjustment (default 100)";
-#ifdef __ARM_NEON__
+#ifdef HAVE_NEON32
static const char *men_scanlines[] = { "OFF", "1", "2", "3", NULL };
static const char h_scanline_l[] = "Scanline brightness, 0-100%";
#endif
static menu_entry e_menu_gfx_options[] =
{
- mee_enum ("Screen centering", MA_OPT_CENTERING, pl_rearmed_cbs.screen_centering_type, men_centering),
+ mee_enum ("PSX Screen centering", MA_OPT_CENTERING, pl_rearmed_cbs.screen_centering_type, men_centering),
+ mee_enum ("Show overscan", MA_OPT_OVERSCAN, pl_rearmed_cbs.show_overscan, men_overscan),
mee_enum_h ("Scaler", MA_OPT_VARSCALER, g_scaler, men_scaler, h_scaler),
mee_enum ("Video output mode", MA_OPT_VOUT_MODE, plat_target.vout_method, men_dummy),
+ mee_onoff ("Fullscreen mode", MA_OPT_VOUT_FULL, plat_target.vout_fullscreen, 1),
mee_onoff ("Software Scaling", MA_OPT_SCALER2, soft_scaling, 1),
- mee_enum ("Hardware Filter", MA_OPT_HWFILTER, plat_target.hwfilter, men_dummy),
mee_enum_h ("Software Filter", MA_OPT_SWFILTER, soft_filter, men_soft_filter, h_soft_filter),
-#ifdef __ARM_NEON__
+ mee_enum ("Hardware Filter", MA_OPT_HWFILTER, plat_target.hwfilter, men_dummy),
+#ifdef HAVE_NEON32
mee_enum ("Scanlines", MA_OPT_SCANLINES, scanlines, men_scanlines),
mee_range_h ("Scanline brightness", MA_OPT_SCANLINE_LEVEL, scanline_level, 0, 100, h_scanline_l),
#endif
mee_range_h ("Gamma adjustment", MA_OPT_GAMMA, g_gamma, 1, 200, h_gamma),
-// mee_onoff ("Vsync", 0, vsync, 1),
+ mee_onoff ("OpenGL Vsync", MA_OPT_VSYNC, g_opts, OPT_VSYNC),
mee_cust_h ("Setup custom scaler", MA_OPT_VARSCALER_C, menu_loop_cscaler, NULL, h_cscaler),
mee_end,
};
// ------------ bios/plugins ------------
-#ifdef BUILTIN_GPU_NEON
-
-static const char h_gpu_neon[] =
- "Configure built-in NEON GPU plugin";
static const char h_gpu_neon_enhanced[] =
- "Renders in double resolution at the cost of lower performance\n"
+ "Renders in double resolution at perf. cost\n"
"(not available for high resolution games)";
static const char h_gpu_neon_enhanced_hack[] =
"Speed hack for above option (glitches some games)";
+static const char h_gpu_neon_enhanced_texadj[] =
+ "Solves some Enh. res. texture issues, some perf hit";
static const char *men_gpu_interlace[] = { "Off", "On", "Auto", NULL };
static menu_entry e_menu_plugin_gpu_neon[] =
{
- mee_enum ("Enable interlace mode", 0, pl_rearmed_cbs.gpu_neon.allow_interlace, men_gpu_interlace),
mee_onoff_h ("Enhanced resolution", 0, pl_rearmed_cbs.gpu_neon.enhancement_enable, 1, h_gpu_neon_enhanced),
mee_onoff_h ("Enhanced res. speed hack", 0, pl_rearmed_cbs.gpu_neon.enhancement_no_main, 1, h_gpu_neon_enhanced_hack),
- mee_onoff ("Enh. res. texture adjust", 0, pl_rearmed_cbs.gpu_neon.enhancement_tex_adj, 1),
- mee_end,
-};
-
-static int menu_loop_plugin_gpu_neon(int id, int keys)
-{
- static int sel = 0;
- me_loop(e_menu_plugin_gpu_neon, &sel);
- return 0;
-}
-
-#endif
-
-static menu_entry e_menu_plugin_gpu_unai_old[] =
-{
- mee_onoff ("Skip every 2nd line", 0, pl_rearmed_cbs.gpu_unai_old.lineskip, 1),
- mee_onoff ("Abe's Odyssey hack", 0, pl_rearmed_cbs.gpu_unai_old.abe_hack, 1),
- mee_onoff ("Disable lighting", 0, pl_rearmed_cbs.gpu_unai_old.no_light, 1),
- mee_onoff ("Disable blending", 0, pl_rearmed_cbs.gpu_unai_old.no_blend, 1),
+ mee_onoff_h ("Enh. res. texture adjust", 0, pl_rearmed_cbs.gpu_neon.enhancement_tex_adj, 1, h_gpu_neon_enhanced_texadj),
+ mee_enum ("Enable interlace mode", 0, pl_rearmed_cbs.gpu_neon.allow_interlace, men_gpu_interlace),
mee_end,
};
-static int menu_loop_plugin_gpu_unai_old(int id, int keys)
-{
- int sel = 0;
- me_loop(e_menu_plugin_gpu_unai_old, &sel);
- return 0;
-}
-
static menu_entry e_menu_plugin_gpu_unai[] =
{
- mee_onoff ("Interlace", 0, pl_rearmed_cbs.gpu_unai.ilace_force, 1),
- mee_onoff ("Dithering", 0, pl_rearmed_cbs.gpu_unai.dithering, 1),
+ mee_onoff ("Old renderer", 0, pl_rearmed_cbs.gpu_unai.old_renderer, 1),
+ mee_onoff ("Skip every 2nd line", 0, pl_rearmed_cbs.gpu_unai.ilace_force, 1),
mee_onoff ("Lighting", 0, pl_rearmed_cbs.gpu_unai.lighting, 1),
mee_onoff ("Fast lighting", 0, pl_rearmed_cbs.gpu_unai.fast_lighting, 1),
mee_onoff ("Blending", 0, pl_rearmed_cbs.gpu_unai.blending, 1),
- mee_onoff ("Pixel skip", 0, pl_rearmed_cbs.gpu_unai.pixel_skip, 1),
mee_end,
};
-static int menu_loop_plugin_gpu_unai(int id, int keys)
-{
- int sel = 0;
- me_loop(e_menu_plugin_gpu_unai, &sel);
- return 0;
-}
-
-
-static const char *men_gpu_dithering[] = { "None", "Game dependant", "Always", NULL };
//static const char h_gpu_0[] = "Needed for Chrono Cross";
static const char h_gpu_1[] = "Capcom fighting games";
static const char h_gpu_2[] = "Black screens in Lunar";
static menu_entry e_menu_plugin_gpu_peops[] =
{
- mee_enum ("Dithering", 0, pl_rearmed_cbs.gpu_peops.iUseDither, men_gpu_dithering),
// mee_onoff_h ("Odd/even bit hack", 0, pl_rearmed_cbs.gpu_peops.dwActFixes, 1<<0, h_gpu_0),
mee_onoff_h ("Expand screen width", 0, pl_rearmed_cbs.gpu_peops.dwActFixes, 1<<1, h_gpu_1),
mee_onoff_h ("Ignore brightness color", 0, pl_rearmed_cbs.gpu_peops.dwActFixes, 1<<2, h_gpu_2),
mee_end,
};
-static int menu_loop_plugin_gpu_peops(int id, int keys)
-{
- static int sel = 0;
- me_loop(e_menu_plugin_gpu_peops, &sel);
- return 0;
-}
-
static const char *men_peopsgl_texfilter[] = { "None", "Standard", "Extended",
"Standard-sprites", "Extended-sprites", "Standard+sprites", "Extended+sprites", NULL };
static const char *men_peopsgl_fbtex[] = { "Emulated VRam", "Black", "Card", "Card+soft" };
mee_end,
};
-static int menu_loop_plugin_gpu_peopsgl(int id, int keys)
-{
- static int sel = 0;
- me_loop(e_menu_plugin_gpu_peopsgl, &sel);
- return 0;
-}
-
static const char *men_spu_interp[] = { "None", "Simple", "Gaussian", "Cubic", NULL };
static const char h_spu_volboost[] = "Large values cause distortion";
static const char h_spu_tempo[] = "Slows down audio if emu is too slow\n"
return 0;
}
+static const char *men_gpu_dithering[] = { "OFF", "ON", "Force", NULL };
+
static const char h_bios[] = "HLE is simulated BIOS. BIOS selection is saved in\n"
"savestates and can't be changed there. Must save\n"
"config and reload the game for change to take effect";
static const char h_plugin_gpu[] =
-#ifdef BUILTIN_GPU_NEON
+#if defined(BUILTIN_GPU_NEON)
"builtin_gpu is the NEON GPU, very fast and accurate\n"
+#elif defined(BUILTIN_GPU_PEOPS)
+ "builtin_gpu is the P.E.Op.S GPU, slow but accurate\n"
+#elif defined(BUILTIN_GPU_UNAI)
+ "builtin_gpu is the Unai GPU, very fast\n"
+#endif
+#ifndef NO_DYLIB
+#if !defined(BUILTIN_GPU_NEON) && defined(GPU_NEON)
+ "gpu_neon is Exophase's NEON GPU, fast and accurate\n"
#endif
+#ifndef BUILTIN_GPU_PEOPS
"gpu_peops is Pete's soft GPU, slow but accurate\n"
- "gpu_unai_old is from old PCSX4ALL, fast but glitchy\n"
- "gpu_unai is newer, more accurate but slower\n"
+#endif
+#ifndef BUILTIN_GPU_UNAI
+ "gpu_unai is the GPU renderer from PCSX4ALL\n"
+#endif
+#ifdef HAVE_GLES
"gpu_gles Pete's hw GPU, uses 3D chip but is glitchy\n"
- "must save config and reload the game if changed";
-static const char h_plugin_spu[] = "spunull effectively disables sound\n"
- "must save config and reload the game if changed";
-static const char h_gpu_peops[] = "Configure P.E.Op.S. SoftGL Driver V1.17";
-static const char h_gpu_peopsgl[]= "Configure P.E.Op.S. MesaGL Driver V1.78";
-static const char h_gpu_unai_old[] = "Configure Unai/PCSX4ALL Team GPU plugin (old)";
-static const char h_gpu_unai[] = "Configure Unai/PCSX4ALL Team plugin (new)";
+#endif
+ "must save config and reload the game if changed"
+#endif
+ ;
+static const char h_plugin_spu[] = ""
+#ifndef NO_DYLIB
+ "spunull effectively disables sound\n"
+ "must save config and reload the game if changed"
+#endif
+;
+// static const char h_gpu_peops[] = "Configure P.E.Op.S. SoftGL Driver V1.17";
+// static const char h_gpu_peopsgl[]= "Configure P.E.Op.S. MesaGL Driver V1.78";
+// static const char h_gpu_unai[] = "Configure Unai/PCSX4ALL Team plugin (new)";
static const char h_spu[] = "Configure built-in P.E.Op.S. Sound Driver V1.7";
+static int menu_loop_pluginsel_options(int id, int keys)
+{
+ static int sel = 0;
+ if (strcmp(gpu_plugins[gpu_plugsel], "gpu_peops.so") == 0)
+ me_loop(e_menu_plugin_gpu_peops, &sel);
+ else if (strcmp(gpu_plugins[gpu_plugsel], "gpu_unai.so") == 0)
+ me_loop(e_menu_plugin_gpu_unai, &sel);
+ else if (strcmp(gpu_plugins[gpu_plugsel], "gpu_gles.so") == 0)
+ me_loop(e_menu_plugin_gpu_peopsgl, &sel);
+ else if (strcmp(gpu_plugins[gpu_plugsel], "gpu_neon.so") == 0)
+ me_loop(e_menu_plugin_gpu_neon, &sel);
+ else
+#if defined(BUILTIN_GPU_NEON)
+ me_loop(e_menu_plugin_gpu_neon, &sel);
+#elif defined(BUILTIN_GPU_PEOPS)
+ me_loop(e_menu_plugin_gpu_peops, &sel);
+#elif defined(BUILTIN_GPU_UNAI)
+ me_loop(e_menu_plugin_gpu_unai, &sel);
+#endif
+ return 0;
+}
+
static menu_entry e_menu_plugin_options[] =
{
mee_enum_h ("BIOS", 0, bios_sel, bioses, h_bios),
+ mee_enum ("GPU Dithering", 0, pl_rearmed_cbs.dithering, men_gpu_dithering),
mee_enum_h ("GPU plugin", 0, gpu_plugsel, gpu_plugins, h_plugin_gpu),
mee_enum_h ("SPU plugin", 0, spu_plugsel, spu_plugins, h_plugin_spu),
-#ifdef BUILTIN_GPU_NEON
- mee_handler_h ("Configure built-in GPU plugin", menu_loop_plugin_gpu_neon, h_gpu_neon),
-#endif
- mee_handler_h ("Configure gpu_peops plugin", menu_loop_plugin_gpu_peops, h_gpu_peops),
- mee_handler_h ("Configure gpu_unai_old GPU plugin", menu_loop_plugin_gpu_unai_old, h_gpu_unai_old),
- mee_handler_h ("Configure gpu_unai GPU plugin", menu_loop_plugin_gpu_unai, h_gpu_unai),
- mee_handler_h ("Configure gpu_gles GPU plugin", menu_loop_plugin_gpu_peopsgl, h_gpu_peopsgl),
+ mee_handler ("Configure selected GPU plugin", menu_loop_pluginsel_options),
mee_handler_h ("Configure built-in SPU plugin", menu_loop_plugin_spu, h_spu),
mee_end,
};
static menu_entry e_menu_speed_hacks[] =
{
#ifndef DRC_DISABLE
- mee_onoff_h ("Disable compat hacks", 0, new_dynarec_hacks, NDHACK_NO_COMPAT_HACKS, h_cfg_noch),
- mee_onoff_h ("Disable SMC checks", 0, new_dynarec_hacks, NDHACK_NO_SMC_CHECK, h_cfg_nosmc),
- mee_onoff_h ("Assume GTE regs unneeded", 0, new_dynarec_hacks, NDHACK_GTE_UNNEEDED, h_cfg_gteunn),
- mee_onoff_h ("Disable GTE flags", 0, new_dynarec_hacks, NDHACK_GTE_NO_FLAGS, h_cfg_gteflgs),
+ mee_onoff_h ("Disable compat hacks", 0, ndrc_g.hacks, NDHACK_NO_COMPAT_HACKS, h_cfg_noch),
+ mee_onoff_h ("Disable SMC checks", 0, ndrc_g.hacks, NDHACK_NO_SMC_CHECK, h_cfg_nosmc),
+ mee_onoff_h ("Assume GTE regs unneeded", 0, ndrc_g.hacks, NDHACK_GTE_UNNEEDED, h_cfg_gteunn),
+ mee_onoff_h ("Disable GTE flags", 0, ndrc_g.hacks, NDHACK_GTE_NO_FLAGS, h_cfg_gteflgs),
#endif
mee_onoff_h ("Disable CPU/GTE stalls", 0, menu_iopts[0], 1, h_cfg_stalls),
mee_end,
"causes a performance hit";
static const char h_cfg_ffps[] = "Instead of 50/60fps for PAL/NTSC use ~49.75/59.81\n"
"Closer to real hw but doesn't match modern displays.";
+static const char h_cfg_tcd[] = "Greatly reduce CD load times. Breaks some games.";
static const char h_cfg_psxclk[] = "Over/under-clock the PSX, default is " DEFAULT_PSX_CLOCK_S "\n"
"(adjust this if the game is too slow/too fast/hangs)";
-enum { AMO_XA, AMO_CDDA, AMO_IC, AMO_BP, AMO_CPU, AMO_GPUL, AMO_FFPS };
+enum { AMO_XA, AMO_CDDA, AMO_IC, AMO_BP, AMO_CPU, AMO_GPUL, AMO_FFPS, AMO_TCD };
static menu_entry e_menu_adv_options[] =
{
mee_onoff_h ("BP exception emulation", 0, menu_iopts[AMO_BP], 1, h_cfg_exc),
mee_enum_h ("GPU l-list slow walking",0, menu_iopts[AMO_GPUL], men_autooo, h_cfg_gpul),
mee_enum_h ("Fractional framerate", 0, menu_iopts[AMO_FFPS], men_autooo, h_cfg_ffps),
+ mee_onoff_h ("Turbo CD-ROM ", 0, menu_iopts[AMO_TCD], 1, h_cfg_tcd),
+#ifdef USE_ASYNC_CDROM
+ mee_range ("CD-ROM read-ahead", 0, cd_buf_count, 0, 1024),
+#endif
#if !defined(DRC_DISABLE) || defined(LIGHTREC)
mee_onoff_h ("Disable dynarec (slow!)",0, menu_iopts[AMO_CPU], 1, h_cfg_nodrc),
#endif
{ &Config.icache_emulation, &menu_iopts[AMO_IC] },
{ &Config.PreciseExceptions, &menu_iopts[AMO_BP] },
{ &Config.Cpu, &menu_iopts[AMO_CPU] },
+ { &Config.TurboCD, &menu_iopts[AMO_TCD] },
};
int i;
for (i = 0; i < ARRAY_SIZE(opts); i++)
*opts[i].opt = *opts[i].mopt;
Config.GpuListWalking = menu_iopts[AMO_GPUL] - 1;
Config.FractionalFramerate = menu_iopts[AMO_FFPS] - 1;
+ cdra_set_buf_count(cd_buf_count);
return 0;
}
{
strcpy(Config.Mcd1, "none");
if (memcard1_sel != 0)
- snprintf(Config.Mcd1, sizeof(Config.Mcd1), ".%s%s", MEMCARD_DIR, memcards[memcard1_sel]);
+ emu_make_path(Config.Mcd1, sizeof(Config.Mcd1), MEMCARD_DIR, memcards[memcard1_sel]);
strcpy(Config.Mcd2, "none");
if (memcard2_sel != 0)
- snprintf(Config.Mcd2, sizeof(Config.Mcd2), ".%s%s", MEMCARD_DIR, memcards[memcard2_sel]);
+ emu_make_path(Config.Mcd2, sizeof(Config.Mcd2), MEMCARD_DIR, memcards[memcard2_sel]);
LoadMcds(Config.Mcd1, Config.Mcd2);
draw_mc_bg();
}
int inp;
static const char msg[] =
"You don't seem to have copied any BIOS\n"
- "files to\n"
- MENU_BIOS_PATH "\n\n"
+ "files to\n%s\n\n"
"While many games work fine with fake\n"
"(HLE) BIOS, others (like MGS and FF8)\n"
"Press %s or %s to continue";
char tmp_msg[sizeof(msg) + 64];
- snprintf(tmp_msg, sizeof(tmp_msg), msg,
+ snprintf(tmp_msg, sizeof(tmp_msg), msg, Config.BiosDir,
in_get_key_name(-1, -PBTN_MOK), in_get_key_name(-1, -PBTN_MBACK));
while (1)
{
"(C) 2005-2009 PCSX-df Team\n"
"(C) 2009-2011 PCSX-Reloaded Team\n\n"
"ARM recompiler (C) 2009-2011 Ari64\n"
-#ifdef BUILTIN_GPU_NEON
"ARM NEON GPU (c) 2011-2012 Exophase\n"
-#endif
"PEOpS GPU and SPU by Pete Bernert\n"
" and the P.E.Op.S. team\n"
"PCSX4ALL plugin by PCSX4ALL team\n"
set_cd_image(cdimg);
LoadPlugins();
pcnt_hook_plugins();
- NetOpened = 0;
if (OpenPlugins() == -1) {
menu_update_msg("failed to open plugins");
return -1;
static int run_cd_image(const char *fname)
{
int autoload_state = g_autostateld_opt;
+ size_t fname_len = strlen(fname);
+ const char *ppfname = NULL;
+ char fname2[256];
+
+ // simle ppf handling, like game.chd.ppf
+ if (4 < fname_len && fname_len < sizeof(fname2)
+ && strcasecmp(fname + fname_len - 4, ".ppf") == 0) {
+ memcpy(fname2, fname, fname_len - 4);
+ fname2[fname_len - 4] = 0;
+ ppfname = fname;
+ fname = fname2;
+ }
ready_to_go = 0;
reload_plugins(fname);
menu_update_msg("unsupported/invalid CD image");
return -1;
}
+ if (ppfname)
+ BuildPPFCache(ppfname);
SysReset();
if (autoload_state) {
unsigned int newest = 0;
- int time, slot, newest_slot = -1;
+ int time = 0, slot, newest_slot = -1;
for (slot = 0; slot < 10; slot++) {
if (emu_check_save_file(slot, &time)) {
printf("selected file: %s\n", fname);
- new_dynarec_clear_full();
+ ndrc_clear_full();
if (run_cd_image(fname) != 0)
return -1;
menu_update_msg("failed to load cdr plugin");
return -1;
}
- if (CDR_open() < 0) {
+ if (cdra_open() < 0) {
menu_update_msg("failed to open cdr plugin");
return -1;
}
CdromId[0] = '\0';
CdromLabel[0] = '\0';
- CDR_close();
- if (CDR_open() < 0) {
+ cdra_close();
+ if (cdra_open() < 0) {
menu_update_msg("failed to open cdr plugin");
return -1;
}
char fname[MAXPATHLEN];
struct dirent *ent;
int bios_i, gpu_i, spu_i, mc_i;
- char *p;
DIR *dir;
bioses[0] = "HLE";
dir = opendir(fname);
if (dir == NULL) {
perror("scan_bios_plugins bios opendir");
+#ifndef NO_DYLIB
goto do_plugins;
+#else
+ goto do_memcards;
+#endif
}
while (1) {
closedir(dir);
+#ifndef NO_DYLIB
do_plugins:
snprintf(fname, sizeof(fname), "%s/", Config.PluginsDir);
dir = opendir(fname);
while (1) {
void *h, *tmp;
+ char *p;
errno = 0;
ent = readdir(dir);
}
closedir(dir);
+#endif
do_memcards:
- dir = opendir("." MEMCARD_DIR);
+ emu_make_path(fname, sizeof(fname), MEMCARD_DIR, NULL);
+ dir = opendir(fname);
if (dir == NULL) {
perror("scan_bios_plugins memcards opendir");
return;
if (ent->d_type != DT_REG && ent->d_type != DT_LNK)
continue;
- snprintf(fname, sizeof(fname), "." MEMCARD_DIR "%s", ent->d_name);
+ emu_make_path(fname, sizeof(fname), MEMCARD_DIR, ent->d_name);
if (stat(fname, &st) != 0) {
printf("bad memcard file: %s\n", ent->d_name);
continue;
exit(1);
}
- emu_make_path(buff, "skin/background.png", sizeof(buff));
+ emu_make_data_path(buff, "skin/background.png", sizeof(buff));
readpng(g_menubg_src_ptr, buff, READPNG_BG, g_menuscreen_w, g_menuscreen_h);
i = plat_target.cpu_clock_set != NULL
me_enable(e_menu_gfx_options, MA_OPT_VOUT_MODE,
plat_target.vout_methods != NULL);
+#ifndef SDL_OVERLAY_2X
+ i = me_id2offset(e_menu_gfx_options, MA_OPT_VOUT_FULL);
+ e_menu_gfx_options[i].data = plat_target.vout_methods;
+ me_enable(e_menu_gfx_options, MA_OPT_VOUT_FULL, 0);
+#endif
+
i = me_id2offset(e_menu_gfx_options, MA_OPT_HWFILTER);
e_menu_gfx_options[i].data = plat_target.hwfilters;
- me_enable(e_menu_gfx_options, MA_OPT_HWFILTER,
- plat_target.hwfilters != NULL);
-
- me_enable(e_menu_gfx_options, MA_OPT_GAMMA,
- plat_target.gamma_set != NULL);
+ me_enable(e_menu_gfx_options, MA_OPT_HWFILTER, plat_target.hwfilters != NULL);
+ if (plat_target.hwfilters && !strcmp(plat_target.hwfilters[0], "linear"))
+ e_menu_gfx_options[i].name = "OpenGL filter";
+ else
+ me_enable(e_menu_gfx_options, MA_OPT_VSYNC, 0);
-#ifdef HAVE_PRE_ARMV7
+ me_enable(e_menu_gfx_options, MA_OPT_GAMMA, plat_target.gamma_set != NULL);
+#ifndef HAVE_NEON32
me_enable(e_menu_gfx_options, MA_OPT_SWFILTER, 0);
#endif
me_enable(e_menu_gfx_options, MA_OPT_VARSCALER, MENU_SHOW_VARSCALER);
me_enable(e_menu_gfx_options, MA_OPT_VOUT_MODE, MENU_SHOW_VOUTMODE);
- me_enable(e_menu_gfx_options, MA_OPT_VARSCALER_C, MENU_SHOW_VARSCALER);
+ me_enable(e_menu_gfx_options, MA_OPT_VARSCALER_C, MENU_SHOW_VARSCALER_C);
me_enable(e_menu_gfx_options, MA_OPT_SCALER2, MENU_SHOW_SCALER2);
me_enable(e_menu_keyconfig, MA_CTRL_NUBS_BTNS, MENU_SHOW_NUBS_BTNS);
me_enable(e_menu_keyconfig, MA_CTRL_VIBRATION, MENU_SHOW_VIBRATION);
prev_cpu->Notify(R3000ACPU_NOTIFY_BEFORE_SAVE, NULL);
prev_cpu->Shutdown();
psxCpu->Init();
- psxCpu->Reset();
psxCpu->Notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL);
}
menu_sync_config();
psxCpu->ApplyConfig();
- // core doesn't care about Config.Cdda changes,
- // so handle them manually here
- if (Config.Cdda)
- CDR_stop();
-
if (cpu_clock > 0)
plat_target_cpu_clock_set(cpu_clock);
OPT_NO_FRAMELIM = 1 << 2,
OPT_SHOWSPU = 1 << 3,
OPT_TSGUN_NOTRIGGER = 1 << 4,
+ OPT_VSYNC = 1 << 5,
};
enum g_scaler_opts {
#define MENU_BIOS_PATH "<SD card>/pandora/appdata/pcsx_rearmed/bios/"
#define BOOT_MSG "Booting up... (press SPACE for menu)"
#define MENU_SHOW_VARSCALER 1
+#define MENU_SHOW_VARSCALER_C 1
#define MENU_SHOW_VOUTMODE 0
#define MENU_SHOW_SCALER2 0
#define MENU_SHOW_NUBS_BTNS 1
{
}
-static void *pl_emu_mmap(unsigned long addr, size_t size, int is_fixed,
- enum psxMapTag tag)
+static void *pl_emu_mmap(unsigned long addr, size_t size,
+ enum psxMapTag tag, int *can_retry_addr)
{
unsigned int pbase;
void *retval;
int ret;
+ *can_retry_addr = 1;
if (!have_warm)
goto basic_map;
}
basic_map:
- retval = plat_mmap(addr, size, 0, is_fixed);
+ retval = plat_mmap(addr, size, 0, 0);
out:
- if (tag == MAP_TAG_VRAM)
+ if (tag == MAP_TAG_VRAM && retval)
psx_vram = retval;
- return retval;
+ return retval ? retval : MAP_FAILED;
}
static void pl_emu_munmap(void *ptr, size_t size, enum psxMapTag tag)
*/
#include <stdio.h>
+#include <assert.h>
#include <SDL.h>
+#include "../libpcsxcore/plugins.h"
#include "libpicofe/input.h"
#include "libpicofe/in_sdl.h"
#include "libpicofe/menu.h"
#include "libpicofe/fonts.h"
#include "libpicofe/plat_sdl.h"
+#include "libpicofe/plat.h"
#include "libpicofe/gl.h"
#include "cspace.h"
#include "plugin_lib.h"
#include "plugin.h"
+#include "menu.h"
#include "main.h"
#include "plat.h"
#include "revision.h"
+#include "libpicofe/plat_sdl.c"
+
static const struct in_default_bind in_sdl_defbinds[] = {
{ SDLK_UP, IN_BINDTYPE_PLAYER12, DKEY_UP },
{ SDLK_DOWN, IN_BINDTYPE_PLAYER12, DKEY_DOWN },
static int psx_w = 256, psx_h = 240;
static void *shadow_fb, *menubg_img;
+static int vout_fullscreen_old;
+static int forced_clears;
+static int forced_flips;
+static int sdl12_compat;
+static int resized;
static int in_menu;
+static int gl_w_prev, gl_h_prev, gl_quirks_prev;
+static float gl_vertices[] = {
+ -1.0f, 1.0f, 0.0f, // 0 0 1
+ 1.0f, 1.0f, 0.0f, // 1 ^
+ -1.0f, -1.0f, 0.0f, // 2 | 2 3
+ 1.0f, -1.0f, 0.0f, // 3 +-->
+};
+
+static void handle_window_resize(void);
+static void handle_scaler_resize(int w, int h);
static void centered_clear(void);
-static void *setup_blit_callbacks(int w);
-static int change_video_mode(int force)
+static int plugin_owns_display(void)
{
- int w, h, ret;
-
- if (in_menu) {
- w = g_menuscreen_w;
- h = g_menuscreen_h;
- }
- else {
- w = psx_w;
- h = psx_h;
- }
-
- ret = plat_sdl_change_video_mode(w, h, force);
- if (ret == 0 && plat_sdl_overlay == NULL && !plat_sdl_gl_active)
- centered_clear();
- return ret;
+ // if true, a plugin is drawing and flipping
+ return (pl_rearmed_cbs.gpu_caps & GPU_CAP_OWNS_DISPLAY);
}
-static void resize_cb(int w, int h)
+static void plugin_update(void)
{
- // used by some plugins..
- pl_rearmed_cbs.screen_w = w;
- pl_rearmed_cbs.screen_h = h;
+ // used by some plugins...
+ pl_rearmed_cbs.screen_w = plat_sdl_screen->w;
+ pl_rearmed_cbs.screen_h = plat_sdl_screen->h;
pl_rearmed_cbs.gles_display = gl_es_display;
pl_rearmed_cbs.gles_surface = gl_es_surface;
plugin_call_rearmed_cbs();
- setup_blit_callbacks(psx_w);
+}
+
+static void sdl_event_handler(void *event_)
+{
+ SDL_Event *event = event_;
+
+ switch (event->type) {
+ case SDL_VIDEORESIZE:
+ if (window_w != (event->resize.w & ~3) || window_h != (event->resize.h & ~1)) {
+ window_w = event->resize.w & ~3;
+ window_h = event->resize.h & ~1;
+ resized = 1;
+ if (!in_menu && plat_sdl_gl_active && plugin_owns_display()) {
+ // the plugin flips by itself so resize has to be handled here
+ handle_window_resize();
+ if (GPU_open != NULL) {
+ int ret = GPU_open(&gpuDisp, "PCSX", NULL);
+ if (ret)
+ fprintf(stderr, "GPU_open: %d\n", ret);
+ }
+ }
+ }
+ return;
+ case SDL_ACTIVEEVENT:
+ // no need to redraw?
+ return;
+ default:
+ break;
+ }
+ plat_sdl_event_handler(event_);
}
static void quit_cb(void)
void plat_init(void)
{
+ static const char *hwfilters[] = { "linear", "nearest", NULL };
+ const SDL_version *ver;
int shadow_size;
int ret;
plat_sdl_quit_cb = quit_cb;
- plat_sdl_resize_cb = resize_cb;
+
+ old_fullscreen = -1; // hack
ret = plat_sdl_init();
if (ret != 0)
exit(1);
+ ver = SDL_Linked_Version();
+ sdl12_compat = ver->patch >= 50;
+ printf("SDL %u.%u.%u compat=%d\n", ver->major, ver->minor, ver->patch, sdl12_compat);
+
in_menu = 1;
SDL_WM_SetCaption("PCSX-ReARMed " REV, NULL);
exit(1);
}
- in_sdl_init(&in_sdl_platform_data, plat_sdl_event_handler);
+ in_sdl_init(&in_sdl_platform_data, sdl_event_handler);
in_probe();
pl_rearmed_cbs.only_16bpp = 1;
pl_rearmed_cbs.pl_get_layer_pos = get_layer_pos;
bgr_to_uyvy_init();
+
+ assert(plat_sdl_screen);
+ plugin_update();
+ if (plat_target.vout_method == vout_mode_gl)
+ gl_w_prev = plat_sdl_screen->w, gl_h_prev = plat_sdl_screen->h;
+ if (vout_mode_gl != -1)
+ plat_target.hwfilters = hwfilters;
}
void plat_finish(void)
{
}
-static void uyvy_to_rgb565(void *d, const void *s, int pixels)
+static void uyvy_to_rgb565(void *d, int pixels)
{
+ const unsigned int *src = (const void *)plat_sdl_overlay->pixels[0];
+ int x2 = plat_sdl_overlay->w >= psx_w * 2;
unsigned short *dst = d;
- const unsigned int *src = s;
int v;
// no colors, for now
- for (; pixels > 0; src++, dst += 2, pixels -= 2) {
- v = (*src >> 8) & 0xff;
- v = (v - 16) * 255 / 219 / 8;
- dst[0] = (v << 11) | (v << 6) | v;
+ if (x2) {
+ for (; pixels > 0; src++, dst++, pixels--) {
+ v = (*src >> 8) & 0xff;
+ v = (v - 16) * 255 / 219 / 8;
+ *dst = (v << 11) | (v << 6) | v;
+ }
+ }
+ else {
+ for (; pixels > 0; src++, dst += 2, pixels -= 2) {
+ v = (*src >> 8) & 0xff;
+ v = (v - 16) * 255 / 219 / 8;
+ dst[0] = (v << 11) | (v << 6) | v;
+
+ v = (*src >> 24) & 0xff;
+ v = (v - 16) * 255 / 219 / 8;
+ dst[1] = (v << 11) | (v << 6) | v;
+ }
+ }
+}
- v = (*src >> 24) & 0xff;
- v = (v - 16) * 255 / 219 / 8;
- dst[1] = (v << 11) | (v << 6) | v;
+static void overlay_resize(int force)
+{
+ int x2_mul = !in_menu && plat_target.vout_method > 1 ? 2 : 1; // lame
+ int w = in_menu ? g_menuscreen_w : psx_w;
+ int h = in_menu ? g_menuscreen_h : psx_h;
+
+ if (!force && plat_sdl_overlay && w * x2_mul == plat_sdl_overlay->w
+ && h == plat_sdl_overlay->h)
+ return;
+ if (plat_sdl_overlay)
+ SDL_FreeYUVOverlay(plat_sdl_overlay);
+ plat_sdl_overlay = SDL_CreateYUVOverlay(w * x2_mul, h, SDL_UYVY_OVERLAY,
+ plat_sdl_screen);
+ if (plat_sdl_overlay) {
+ //printf("overlay: %dx%d %08x hw=%d\n", plat_sdl_overlay->w, plat_sdl_overlay->h,
+ // plat_sdl_overlay->format, plat_sdl_overlay->hw_overlay);
+ if (SDL_LockYUVOverlay(plat_sdl_overlay) == 0) {
+ plat_sdl_overlay_clear();
+ SDL_UnlockYUVOverlay(plat_sdl_overlay);
+ }
}
+ else {
+ fprintf(stderr, "overlay resize to %dx%d failed\n", w, h);
+ plat_target.vout_method = 0;
+ }
+ handle_scaler_resize(w, h);
}
static void overlay_blit(int doffs, const void *src_, int w, int h,
const unsigned short *src = src_;
unsigned short *dst;
int dstride = plat_sdl_overlay->w;
+ int x2 = dstride >= 2 * w;
SDL_LockYUVOverlay(plat_sdl_overlay);
dst = (void *)plat_sdl_overlay->pixels[0];
dst += doffs;
if (bgr24) {
for (; h > 0; dst += dstride, src += sstride, h--)
- bgr888_to_uyvy(dst, src, w);
+ bgr888_to_uyvy(dst, src, w, x2);
}
else {
for (; h > 0; dst += dstride, src += sstride, h--)
- bgr555_to_uyvy(dst, src, w);
+ bgr555_to_uyvy(dst, src, w, x2);
}
SDL_UnlockYUVOverlay(plat_sdl_overlay);
static void overlay_hud_print(int x, int y, const char *str, int bpp)
{
+ int x2;
SDL_LockYUVOverlay(plat_sdl_overlay);
+ x2 = plat_sdl_overlay->w >= psx_w * 2;
+ if (x2)
+ x *= 2;
basic_text_out_uyvy_nf(plat_sdl_overlay->pixels[0], plat_sdl_overlay->w, x, y, str);
SDL_UnlockYUVOverlay(plat_sdl_overlay);
}
+static void gl_finish_pl(void)
+{
+ if (plugin_owns_display() && GPU_close != NULL)
+ GPU_close();
+ gl_destroy();
+}
+
+static void gl_resize(void)
+{
+ int w = in_menu ? g_menuscreen_w : psx_w;
+ int h = in_menu ? g_menuscreen_h : psx_h;
+
+ gl_quirks &= ~(GL_QUIRK_SCALING_NEAREST | GL_QUIRK_VSYNC_ON);
+ if (plat_target.hwfilter) // inverted from plat_sdl_gl_scaling()
+ gl_quirks |= GL_QUIRK_SCALING_NEAREST;
+ if (g_opts & OPT_VSYNC)
+ gl_quirks |= GL_QUIRK_VSYNC_ON;
+
+ if (plugin_owns_display())
+ w = plat_sdl_screen->w, h = plat_sdl_screen->h;
+ if (plat_sdl_gl_active) {
+ if (w == gl_w_prev && h == gl_h_prev && gl_quirks == gl_quirks_prev)
+ return;
+ gl_finish_pl();
+ }
+ plat_sdl_gl_active = (gl_create(window, &gl_quirks, w, h) == 0);
+ if (plat_sdl_gl_active)
+ gl_w_prev = w, gl_h_prev = h, gl_quirks_prev = gl_quirks;
+ else {
+ fprintf(stderr, "warning: could not init GL.\n");
+ plat_target.vout_method = 0;
+ }
+ handle_scaler_resize(w, h);
+ plugin_update();
+ forced_flips = 0; // interferes with gl
+}
+
+static void overlay_or_gl_check_enable(void)
+{
+ int ovl_on = plat_target.vout_method == vout_mode_overlay ||
+ plat_target.vout_method == vout_mode_overlay2x;
+ int gl_on = plat_target.vout_method == vout_mode_gl;
+ if (!gl_on && plat_sdl_gl_active) {
+ gl_finish_pl();
+ pl_rearmed_cbs.gles_display = gl_es_display;
+ pl_rearmed_cbs.gles_surface = gl_es_surface;
+ plat_sdl_gl_active = 0;
+ }
+ if (!ovl_on && plat_sdl_overlay) {
+ SDL_FreeYUVOverlay(plat_sdl_overlay);
+ plat_sdl_overlay = NULL;
+ }
+ if (ovl_on)
+ overlay_resize(0);
+ else if (gl_on)
+ gl_resize();
+}
+
static void centered_clear(void)
{
int dstride = plat_sdl_screen->pitch / 2;
int h = plat_sdl_screen->h;
unsigned short *dst;
- SDL_LockSurface(plat_sdl_screen);
+ if (plat_sdl_gl_active) {
+ gl_clear();
+ return;
+ }
+
+ if (SDL_MUSTLOCK(plat_sdl_screen))
+ SDL_LockSurface(plat_sdl_screen);
dst = plat_sdl_screen->pixels;
for (; h > 0; dst += dstride, h--)
memset(dst, 0, w * 2);
- SDL_UnlockSurface(plat_sdl_screen);
+ if (SDL_MUSTLOCK(plat_sdl_screen))
+ SDL_UnlockSurface(plat_sdl_screen);
+
+ if (plat_sdl_overlay != NULL) {
+ // apply the parts not covered by the overlay
+ forced_flips = 3;
+ }
+}
+
+static int adj_src_dst(const SDL_Surface *sfc, int w, int pp, int *h,
+ unsigned short **dst, const unsigned short **src)
+{
+ int line_w = w;
+ if (sfc->w > w)
+ *dst += (sfc->w - w) / 2;
+ else {
+ *src += (w - sfc->w) / 2;
+ line_w = sfc->w;
+ }
+ if (sfc->h > *h)
+ *dst += sfc->pitch * (sfc->h - *h) / 2 / 2;
+ else {
+ *src += pp * (*h - sfc->h) / 2;
+ *h = sfc->h;
+ }
+ return line_w;
}
static void centered_blit(int doffs, const void *src_, int w, int h,
unsigned short *dst;
int dstride;
- SDL_LockSurface(plat_sdl_screen);
+ if (SDL_MUSTLOCK(plat_sdl_screen))
+ SDL_LockSurface(plat_sdl_screen);
dst = plat_sdl_screen->pixels;
dstride = plat_sdl_screen->pitch / 2;
+ w = adj_src_dst(plat_sdl_screen, w, sstride, &h, &dst, &src);
- dst += doffs + (plat_sdl_screen->w - w) / 2;
- dst += dstride * (plat_sdl_screen->h - h) / 2;
if (bgr24) {
for (; h > 0; dst += dstride, src += sstride, h--)
bgr888_to_rgb565(dst, src, w * 3);
bgr555_to_rgb565(dst, src, w * 2);
}
- SDL_UnlockSurface(plat_sdl_screen);
+ if (SDL_MUSTLOCK(plat_sdl_screen))
+ SDL_UnlockSurface(plat_sdl_screen);
}
static void centered_blit_menu(void)
int w = g_menuscreen_w;
int h = g_menuscreen_h;
unsigned short *dst;
- int dstride;
+ int dstride, len;
+
+ if (SDL_MUSTLOCK(plat_sdl_screen))
+ SDL_LockSurface(plat_sdl_screen);
- SDL_LockSurface(plat_sdl_screen);
dst = plat_sdl_screen->pixels;
dstride = plat_sdl_screen->pitch / 2;
+ len = adj_src_dst(plat_sdl_screen, w, g_menuscreen_pp, &h, &dst, &src);
- dst += (plat_sdl_screen->w - w) / 2;
- dst += dstride * (plat_sdl_screen->h - h) / 2;
for (; h > 0; dst += dstride, src += g_menuscreen_pp, h--)
- memcpy(dst, src, w * 2);
+ memcpy(dst, src, len * 2);
- SDL_UnlockSurface(plat_sdl_screen);
+ if (SDL_MUSTLOCK(plat_sdl_screen))
+ SDL_UnlockSurface(plat_sdl_screen);
}
static void centered_hud_print(int x, int y, const char *str, int bpp)
{
- x += (plat_sdl_screen->w - psx_w) / 2;
- y += (plat_sdl_screen->h - psx_h) / 2;
- SDL_LockSurface(plat_sdl_screen);
+ int w_diff, h_diff;
+ if (SDL_MUSTLOCK(plat_sdl_screen))
+ SDL_LockSurface(plat_sdl_screen);
+ w_diff = plat_sdl_screen->w - psx_w;
+ h_diff = plat_sdl_screen->h - psx_h;
+ if (w_diff > 0) x += w_diff / 2;
+ if (h_diff > 0) y += h_diff / 2;
+ if (h_diff < 0) y += h_diff;
+ if (w_diff < 0 && x > 32) x += w_diff;
basic_text_out16_nf(plat_sdl_screen->pixels, plat_sdl_screen->pitch / 2, x, y, str);
- SDL_UnlockSurface(plat_sdl_screen);
+ if (SDL_MUSTLOCK(plat_sdl_screen))
+ SDL_UnlockSurface(plat_sdl_screen);
}
-static void *setup_blit_callbacks(int w)
+static void *setup_blit_callbacks(int w, int h)
{
pl_plat_clear = NULL;
pl_plat_blit = NULL;
return shadow_fb;
}
else {
- if (w == plat_sdl_screen->w)
+ pl_plat_clear = centered_clear;
+
+ if (!SDL_MUSTLOCK(plat_sdl_screen) && w == plat_sdl_screen->w &&
+ h == plat_sdl_screen->h)
return plat_sdl_screen->pixels;
+
+ pl_plat_blit = centered_blit;
+ pl_plat_hud_print = centered_hud_print;
+ }
+ return NULL;
+}
+
+// not using plat_sdl_change_video_mode() since we need
+// different size overlay vs plat_sdl_screen layer
+static void change_mode(int w, int h)
+{
+ int set_w = w, set_h = h, had_overlay = 0, had_gl = 0;
+ if (plat_target.vout_fullscreen && (plat_target.vout_method != 0 || !sdl12_compat))
+ set_w = fs_w, set_h = fs_h;
+ if (plat_sdl_screen->w != set_w || plat_sdl_screen->h != set_h ||
+ plat_target.vout_fullscreen != vout_fullscreen_old)
+ {
+ Uint32 flags = plat_sdl_screen->flags;
+ if (plat_target.vout_fullscreen)
+ flags |= SDL_FULLSCREEN;
else {
- pl_plat_clear = centered_clear;
- pl_plat_blit = centered_blit;
- pl_plat_hud_print = centered_hud_print;
+ flags &= ~SDL_FULLSCREEN;
+ if (plat_sdl_is_windowed())
+ flags |= SDL_RESIZABLE; // sdl12-compat 1.2.68 loses this flag
+ }
+ if (plat_sdl_overlay) {
+ SDL_FreeYUVOverlay(plat_sdl_overlay);
+ plat_sdl_overlay = NULL;
+ had_overlay = 1;
}
+ if (plat_sdl_gl_active) {
+ gl_finish_pl();
+ plat_sdl_gl_active = 0;
+ had_gl = 1;
+ }
+ SDL_PumpEvents();
+ plat_sdl_screen = SDL_SetVideoMode(set_w, set_h, 16, flags);
+ //printf("mode: %dx%d %x -> %dx%d\n", set_w, set_h, flags,
+ // plat_sdl_screen->w, plat_sdl_screen->h);
+ assert(plat_sdl_screen);
+ if (vout_fullscreen_old && !plat_target.vout_fullscreen)
+ // why is this needed?? (on 1.2.68)
+ SDL_WM_GrabInput(SDL_GRAB_OFF);
+ if (vout_mode_gl != -1)
+ update_wm_display_window();
+ // overlay needs the latest plat_sdl_screen
+ if (had_overlay)
+ overlay_resize(1);
+ if (had_gl)
+ gl_resize();
+ centered_clear();
+ plugin_update();
+ vout_fullscreen_old = plat_target.vout_fullscreen;
}
- return NULL;
+}
+
+static void handle_scaler_resize(int w, int h)
+{
+ int ww = plat_sdl_screen->w;
+ int wh = plat_sdl_screen->h;
+ int layer_w_old = g_layer_w;
+ int layer_h_old = g_layer_h;
+ float w_mul, h_mul;
+ int x, y;
+ pl_update_layer_size(w, h, ww, wh);
+ if (layer_w_old != g_layer_w || layer_h_old != g_layer_h)
+ forced_clears = 3;
+
+ w_mul = 2.0f / ww;
+ h_mul = 2.0f / wh;
+ x = (ww - g_layer_w) / 2;
+ y = (wh - g_layer_h) / 2;
+ gl_vertices[3*0+0] = gl_vertices[3*2+0] = -1.0f + x * w_mul;
+ gl_vertices[3*1+0] = gl_vertices[3*3+0] = -1.0f + (x + g_layer_w) * w_mul;
+ gl_vertices[3*2+1] = gl_vertices[3*3+1] = -1.0f + y * h_mul;
+ gl_vertices[3*0+1] = gl_vertices[3*1+1] = -1.0f + (y + g_layer_h) * h_mul;
+}
+
+static void handle_window_resize(void)
+{
+ // sdl12-compat: a hack to take advantage of sdl2 scaling
+ if (resized && (plat_target.vout_method != 0 || !sdl12_compat)) {
+ change_mode(window_w, window_h);
+ setup_blit_callbacks(psx_w, psx_h);
+ forced_clears = 3;
+ }
+ resized = 0;
}
void *plat_gvideo_set_mode(int *w, int *h, int *bpp)
{
psx_w = *w;
psx_h = *h;
- change_video_mode(0);
- if (plat_sdl_gl_active)
- memset(shadow_fb, 0, psx_w * psx_h * 2);
- return setup_blit_callbacks(*w);
+
+ if (plat_sdl_gl_active && plugin_owns_display())
+ return NULL;
+
+ if (plat_sdl_overlay != NULL)
+ overlay_resize(0);
+ else if (plat_sdl_gl_active) {
+ memset(shadow_fb, 0, (*w) * (*h) * 2);
+ gl_resize();
+ }
+ else if (plat_target.vout_method == 0) // && sdl12_compat
+ change_mode(*w, *h);
+
+ handle_scaler_resize(*w, *h); // override the value from pl_vout_set_mode()
+ return setup_blit_callbacks(*w, *h);
}
void *plat_gvideo_flip(void)
{
+ void *ret = NULL;
+ int do_flip = 0;
if (plat_sdl_overlay != NULL) {
- SDL_Rect dstrect = { 0, 0, plat_sdl_screen->w, plat_sdl_screen->h };
+ SDL_Rect dstrect = {
+ (plat_sdl_screen->w - g_layer_w) / 2,
+ (plat_sdl_screen->h - g_layer_h) / 2,
+ g_layer_w, g_layer_h
+ };
SDL_DisplayYUVOverlay(plat_sdl_overlay, &dstrect);
- return NULL;
}
else if (plat_sdl_gl_active) {
- gl_flip(shadow_fb, psx_w, psx_h);
- return shadow_fb;
+ gl_flip_v(shadow_fb, psx_w, psx_h, g_scaler != SCALE_FULLSCREEN ? gl_vertices : NULL);
+ ret = shadow_fb;
}
- else {
- // XXX: no locking, but should be fine with SDL_SWSURFACE?
+ else
+ do_flip |= 2;
+
+ if (forced_flips > 0) {
+ forced_flips--;
+ do_flip |= 1;
+ }
+ if (do_flip)
SDL_Flip(plat_sdl_screen);
- return plat_sdl_screen->pixels;
+ handle_window_resize();
+ if (do_flip) {
+ if (forced_clears > 0) {
+ forced_clears--;
+ centered_clear();
+ }
+ if (!SDL_MUSTLOCK(plat_sdl_screen) && plat_sdl_screen->w == psx_w &&
+ plat_sdl_screen->h == psx_h && (do_flip & 2)) {
+ ret = plat_sdl_screen->pixels;
+ }
}
+ assert(ret || pl_plat_clear != NULL);
+ return ret;
}
void plat_gvideo_close(void)
void plat_video_menu_enter(int is_rom_loaded)
{
- int force_mode_change = 0;
+ int d;
in_menu = 1;
/* surface will be lost, must adjust pl_vout_buf for menu bg */
if (plat_sdl_overlay != NULL)
- uyvy_to_rgb565(menubg_img, plat_sdl_overlay->pixels[0], psx_w * psx_h);
+ uyvy_to_rgb565(menubg_img, psx_w * psx_h);
else if (plat_sdl_gl_active)
memcpy(menubg_img, shadow_fb, psx_w * psx_h * 2);
- else
- memcpy(menubg_img, plat_sdl_screen->pixels, psx_w * psx_h * 2);
+ else {
+ unsigned short *dst = menubg_img;
+ const unsigned short *src;
+ int h;
+ if (SDL_MUSTLOCK(plat_sdl_screen))
+ SDL_LockSurface(plat_sdl_screen);
+ src = plat_sdl_screen->pixels;
+ src += (plat_sdl_screen->w - psx_w) / 2;
+ src += plat_sdl_screen->pitch * (plat_sdl_screen->h - psx_h) / 2 / 2;
+ for (h = psx_h; h > 0; dst += psx_w, src += plat_sdl_screen->pitch / 2, h--)
+ memcpy(dst, src, psx_w * 2);
+ if (SDL_MUSTLOCK(plat_sdl_screen))
+ SDL_UnlockSurface(plat_sdl_screen);
+ }
pl_vout_buf = menubg_img;
- /* gles plugin messes stuff up.. */
- if (pl_rearmed_cbs.gpu_caps & GPU_CAP_OWNS_DISPLAY)
- force_mode_change = 1;
+ if (plat_target.vout_method == 0)
+ change_mode(g_menuscreen_w, g_menuscreen_h);
+ else
+ overlay_or_gl_check_enable();
+ centered_clear();
- change_video_mode(force_mode_change);
+ for (d = 0; d < IN_MAX_DEVS; d++)
+ in_set_config_int(d, IN_CFG_ANALOG_MAP_ULDR, 1);
}
void plat_video_menu_begin(void)
{
+ void *old_ovl = plat_sdl_overlay;
+ static int g_scaler_old;
+ int scaler_changed = g_scaler_old != g_scaler;
+ g_scaler_old = g_scaler;
+ if (plat_target.vout_fullscreen != vout_fullscreen_old ||
+ (plat_target.vout_fullscreen && scaler_changed)) {
+ change_mode(g_menuscreen_w, g_menuscreen_h);
+ }
+ overlay_or_gl_check_enable();
+ handle_scaler_resize(g_menuscreen_w, g_menuscreen_h);
+
+ if (old_ovl != plat_sdl_overlay || scaler_changed)
+ centered_clear();
g_menuscreen_ptr = shadow_fb;
}
void plat_video_menu_end(void)
{
+ int do_flip = 0;
+
if (plat_sdl_overlay != NULL) {
- SDL_Rect dstrect = { 0, 0, plat_sdl_screen->w, plat_sdl_screen->h };
+ SDL_Rect dstrect = {
+ (plat_sdl_screen->w - g_layer_w) / 2,
+ (plat_sdl_screen->h - g_layer_h) / 2,
+ g_layer_w, g_layer_h
+ };
SDL_LockYUVOverlay(plat_sdl_overlay);
rgb565_to_uyvy(plat_sdl_overlay->pixels[0], shadow_fb,
SDL_DisplayYUVOverlay(plat_sdl_overlay, &dstrect);
}
else if (plat_sdl_gl_active) {
- gl_flip(g_menuscreen_ptr, g_menuscreen_w, g_menuscreen_h);
+ gl_flip_v(g_menuscreen_ptr, g_menuscreen_w, g_menuscreen_h,
+ g_scaler != SCALE_FULLSCREEN ? gl_vertices : NULL);
}
else {
centered_blit_menu();
- SDL_Flip(plat_sdl_screen);
+ do_flip |= 2;
+ }
+
+ if (forced_flips > 0) {
+ forced_flips--;
+ do_flip |= 1;
}
+ if (do_flip)
+ SDL_Flip(plat_sdl_screen);
+
+ handle_window_resize();
g_menuscreen_ptr = NULL;
}
void plat_video_menu_leave(void)
{
- void *fb = NULL;
- if (plat_sdl_overlay != NULL || plat_sdl_gl_active)
- fb = shadow_fb;
- else if (plat_sdl_screen)
- fb = plat_sdl_screen->pixels;
- if (fb)
- memset(fb, 0, g_menuscreen_w * g_menuscreen_h * 2);
+ int d;
+
in_menu = 0;
+ if (plat_sdl_overlay != NULL || plat_sdl_gl_active)
+ memset(shadow_fb, 0, g_menuscreen_w * g_menuscreen_h * 2);
+
+ if (plat_target.vout_fullscreen)
+ change_mode(fs_w, fs_h);
+ overlay_or_gl_check_enable();
+ centered_clear();
+ setup_blit_callbacks(psx_w, psx_h);
+
+ for (d = 0; d < IN_MAX_DEVS; d++)
+ in_set_config_int(d, IN_CFG_ANALOG_MAP_ULDR, 0);
}
-/* unused stuff */
void *plat_prepare_screenshot(int *w, int *h, int *bpp)
{
- return 0;
+ if (plat_sdl_screen && !SDL_MUSTLOCK(plat_sdl_screen) &&
+ plat_sdl_overlay == NULL && !plat_sdl_gl_active)
+ {
+ *w = plat_sdl_screen->pitch / 2;
+ *h = plat_sdl_screen->h;
+ *bpp = 16;
+ return plat_sdl_screen->pixels;
+ }
+ fprintf(stderr, "screenshot not implemented in current mode\n");
+ return NULL;
}
void plat_trigger_vibrate(int pad, int low, int high)
#undef CALLBACK
#define CALLBACK
-/* CDR */
-struct CdrStat;
-static long CALLBACK CDRinit(void) { return 0; }
-static long CALLBACK CDRshutdown(void) { return 0; }
-static long CALLBACK CDRopen(void) { return 0; }
-static long CALLBACK CDRclose(void) { return 0; }
-static long CALLBACK CDRgetTN(unsigned char *_) { return 0; }
-static long CALLBACK CDRgetTD(unsigned char _, unsigned char *__) { return 0; }
-static boolean CALLBACK CDRreadTrack(unsigned char *_) { return FALSE; }
-static unsigned char * CALLBACK CDRgetBuffer(void) { return NULL; }
-static unsigned char * CALLBACK CDRgetBufferSub(int sector) { return NULL; }
-static long CALLBACK CDRconfigure(void) { return 0; }
-static long CALLBACK CDRtest(void) { return 0; }
-static void CALLBACK CDRabout(void) { return; }
-static long CALLBACK CDRplay(unsigned char *_) { return 0; }
-static long CALLBACK CDRstop(void) { return 0; }
-static long CALLBACK CDRsetfilename(char *_) { return 0; }
-static long CALLBACK CDRgetStatus(struct CdrStat *_) { return 0; }
-static char * CALLBACK CDRgetDriveLetter(void) { return NULL; }
-static long CALLBACK CDRreadCDDA(unsigned char _, unsigned char __, unsigned char ___, unsigned char *____) { return 0; }
-static long CALLBACK CDRgetTE(unsigned char _, unsigned char *__, unsigned char *___, unsigned char *____) { return 0; }
-static long CALLBACK CDRprefetch(unsigned char m, unsigned char s, unsigned char f) { return 1; }
-
-/* GPU */
-static void CALLBACK GPUdisplayText(char *_) { return; }
-
/* SPU */
#include "../plugins/dfsound/spu.h"
/* PAD */
-static long CALLBACK PADinit(long _) { return 0; }
-static long CALLBACK PADopen(unsigned long *_) { return 0; }
-static long CALLBACK PADshutdown(void) { return 0; }
-static long CALLBACK PADclose(void) { return 0; }
-static void CALLBACK PADsetSensitive(int _) { return; }
-
-static long CALLBACK PADreadPort1(PadDataS *pad) {
+long PAD1_readPort(PadDataS *pad) {
int pad_index = pad->requestPadIndex;
pad->controllerType = in_type[pad_index];
return 0;
}
-static long CALLBACK PADreadPort2(PadDataS *pad) {
+long PAD2_readPort(PadDataS *pad) {
int pad_index = pad->requestPadIndex;
pad->controllerType = in_type[pad_index];
#define DIRECT(id, name) \
{ id, #name, name }
-#define DIRECT_CDR(name) DIRECT(PLUGIN_CDR, name)
#define DIRECT_SPU(name) DIRECT(PLUGIN_SPU, name)
#define DIRECT_GPU(name) DIRECT(PLUGIN_GPU, name)
-#define DIRECT_PAD(name) DIRECT(PLUGIN_PAD, name)
static const struct {
int id;
const char *name;
void *func;
} plugin_funcs[] = {
- /* CDR */
- DIRECT_CDR(CDRinit),
- DIRECT_CDR(CDRshutdown),
- DIRECT_CDR(CDRopen),
- DIRECT_CDR(CDRclose),
- DIRECT_CDR(CDRtest),
- DIRECT_CDR(CDRgetTN),
- DIRECT_CDR(CDRgetTD),
- DIRECT_CDR(CDRreadTrack),
- DIRECT_CDR(CDRgetBuffer),
- DIRECT_CDR(CDRgetBufferSub),
- DIRECT_CDR(CDRplay),
- DIRECT_CDR(CDRstop),
- DIRECT_CDR(CDRgetStatus),
- DIRECT_CDR(CDRgetDriveLetter),
- DIRECT_CDR(CDRconfigure),
- DIRECT_CDR(CDRabout),
- DIRECT_CDR(CDRsetfilename),
- DIRECT_CDR(CDRreadCDDA),
- DIRECT_CDR(CDRgetTE),
- DIRECT_CDR(CDRprefetch),
/* SPU */
DIRECT_SPU(SPUinit),
DIRECT_SPU(SPUshutdown),
DIRECT_SPU(SPUasync),
DIRECT_SPU(SPUplayCDDAchannel),
DIRECT_SPU(SPUsetCDvol),
- /* PAD */
- DIRECT_PAD(PADinit),
- DIRECT_PAD(PADshutdown),
- DIRECT_PAD(PADopen),
- DIRECT_PAD(PADclose),
- DIRECT_PAD(PADsetSensitive),
- DIRECT_PAD(PADreadPort1),
- DIRECT_PAD(PADreadPort2),
-/*
- DIRECT_PAD(PADquery),
- DIRECT_PAD(PADconfigure),
- DIRECT_PAD(PADtest),
- DIRECT_PAD(PADabout),
- DIRECT_PAD(PADkeypressed),
- DIRECT_PAD(PADstartPoll),
- DIRECT_PAD(PADpoll),
-*/
/* GPU */
DIRECT_GPU(GPUupdateLace),
DIRECT_GPU(GPUinit),
DIRECT_GPU(GPUvBlank),
DIRECT_GPU(GPUgetScreenInfo),
DIRECT_GPU(GPUrearmedCallbacks),
-
- DIRECT_GPU(GPUdisplayText),
-/*
- DIRECT_GPU(GPUkeypressed),
- DIRECT_GPU(GPUmakeSnapshot),
- DIRECT_GPU(GPUconfigure),
- DIRECT_GPU(GPUgetScreenPic),
- DIRECT_GPU(GPUshowScreenPic),
-*/
};
void *plugin_link(enum builtint_plugins_e id, const char *sym)
{
int i;
- if (id == PLUGIN_CDRCIMG)
- return cdrcimg_get_sym(sym);
-
for (i = 0; i < ARRAY_SIZE(plugin_funcs); i++) {
if (id != plugin_funcs[i].id)
continue;
enum builtint_plugins_e {
PLUGIN_GPU,
PLUGIN_SPU,
- PLUGIN_CDR,
- PLUGIN_PAD,
- PLUGIN_CDRCIMG,
};
void *plugin_link(enum builtint_plugins_e id, const char *sym);
void plugin_call_rearmed_cbs(void);
+struct PadDataS;
+long PAD1_readPort(struct PadDataS *);
+long PAD2_readPort(struct PadDataS *);
+
#endif /* __PLUGIN_H__ */
#include "../libpcsxcore/gpu.h"
#include "../libpcsxcore/r3000a.h"
#include "../libpcsxcore/psxcounters.h"
+#include "arm_features.h"
#define HUD_HEIGHT 10
int multitap2;
int in_analog_left[8][2] = {{ 127, 127 },{ 127, 127 },{ 127, 127 },{ 127, 127 },{ 127, 127 },{ 127, 127 },{ 127, 127 },{ 127, 127 }};
int in_analog_right[8][2] = {{ 127, 127 },{ 127, 127 },{ 127, 127 },{ 127, 127 },{ 127, 127 },{ 127, 127 },{ 127, 127 },{ 127, 127 }};
-int in_adev[2] = { -1, -1 }, in_adev_axis[2][2] = {{ 0, 1 }, { 0, 1 }};
+int in_adev[2] = { -1, -1 };
+int in_adev_axis[2][2] =
+#ifdef PANDORA
+ {{ 0, 1 }, { 0, 1 }};
+#else
+ {{ 0, 1 }, { 2, 3 }};
+#endif
int in_adev_is_nublike[2];
unsigned short in_keystate[8];
int in_mouse[8][2];
void *pl_vout_buf;
int g_layer_x, g_layer_y, g_layer_w, g_layer_h;
static int pl_vout_w, pl_vout_h, pl_vout_bpp; /* output display/layer */
-static int pl_vout_scale_w, pl_vout_scale_h, pl_vout_yoffset;
+static int pl_vout_scale_w, pl_vout_scale_h;
static int psx_w, psx_h, psx_bpp;
static int vsync_cnt;
static int is_pal, frame_interval, frame_interval1024;
}
/* update scaler target size according to user settings */
-static void update_layer_size(int w, int h)
+void pl_update_layer_size(int w, int h, int fw, int fh)
{
float mult;
int imult;
case SCALE_2_2:
g_layer_w = w; g_layer_h = h;
- if (w * 2 <= g_menuscreen_w)
+ if (w * 2 <= fw)
g_layer_w = w * 2;
- if (h * 2 <= g_menuscreen_h)
+ if (h * 2 <= fh)
g_layer_h = h * 2;
break;
case SCALE_4_3v2:
- if (h > g_menuscreen_h || (240 < h && h <= 360))
- goto fractional_4_3;
+#ifdef PANDORA
+ if (h <= fh && !(240 < h && h <= 360))
+ {
+#endif
// 4:3 that prefers integer scaling
- imult = g_menuscreen_h / h;
+ imult = fh / h;
+ if (imult < 1)
+ imult = 1;
g_layer_w = w * imult;
g_layer_h = h * imult;
mult = (float)g_layer_w / (float)g_layer_h;
if (mult < 1.25f || mult > 1.666f)
g_layer_w = 4.0f/3.0f * (float)g_layer_h;
- printf(" -> %dx%d %.1f\n", g_layer_w, g_layer_h, mult);
+ //printf(" -> %dx%d %.1f\n", g_layer_w, g_layer_h, mult);
break;
+#ifdef PANDORA
+ }
+#endif
- fractional_4_3:
case SCALE_4_3:
mult = 240.0f / (float)h * 4.0f / 3.0f;
if (h > 256)
mult *= 2.0f;
- g_layer_w = mult * (float)g_menuscreen_h;
- g_layer_h = g_menuscreen_h;
- printf(" -> %dx%d %.1f\n", g_layer_w, g_layer_h, mult);
+ g_layer_w = mult * (float)fh;
+ g_layer_h = fh;
+ //printf(" -> %dx%d %.1f\n", g_layer_w, g_layer_h, mult);
break;
case SCALE_FULLSCREEN:
- g_layer_w = g_menuscreen_w;
- g_layer_h = g_menuscreen_h;
+ g_layer_w = fw;
+ g_layer_h = fh;
break;
default:
}
if (g_scaler != SCALE_CUSTOM) {
- g_layer_x = g_menuscreen_w / 2 - g_layer_w / 2;
- g_layer_y = g_menuscreen_h / 2 - g_layer_h / 2;
+ g_layer_x = fw / 2 - g_layer_w / 2;
+ g_layer_y = fh / 2 - g_layer_h / 2;
}
- if (g_layer_w > g_menuscreen_w * 2) g_layer_w = g_menuscreen_w * 2;
- if (g_layer_h > g_menuscreen_h * 2) g_layer_h = g_menuscreen_h * 2;
+ if (g_layer_w > fw * 2) g_layer_w = fw * 2;
+ if (g_layer_h > fh * 2) g_layer_h = fh * 2;
}
// XXX: this is platform specific really
static void pl_vout_set_mode(int w, int h, int raw_w, int raw_h, int bpp)
{
int vout_w, vout_h, vout_bpp;
- int buf_yoffset = 0;
// special h handling, Wipeout likes to change it by 1-6
static int vsync_cnt_ms_prev;
assert(vout_h >= 192);
pl_vout_scale_w = pl_vout_scale_h = 1;
-#ifdef __ARM_NEON__
+#ifdef HAVE_NEON32
if (soft_filter) {
if (resolution_ok(w * 2, h * 2) && bpp == 16) {
pl_vout_scale_w = 2;
vout_w *= pl_vout_scale_w;
vout_h *= pl_vout_scale_h;
- update_layer_size(vout_w, vout_h);
+ pl_update_layer_size(vout_w, vout_h, g_menuscreen_w, g_menuscreen_h);
pl_vout_buf = plat_gvideo_set_mode(&vout_w, &vout_h, &vout_bpp);
if (pl_vout_buf == NULL && pl_plat_blit == NULL)
pl_vout_w = vout_w;
pl_vout_h = vout_h;
pl_vout_bpp = vout_bpp;
- pl_vout_yoffset = buf_yoffset;
}
- if (pl_vout_buf != NULL)
- pl_vout_buf = (char *)pl_vout_buf
- + pl_vout_yoffset * pl_vout_w * pl_vout_bpp / 8;
menu_notify_mode_change(pl_vout_w, pl_vout_h, pl_vout_bpp);
}
flip_clear_counter = 2;
}
-static void pl_vout_flip(const void *vram, int stride, int bgr24,
+static void pl_vout_flip(const void *vram_, int vram_ofs, int bgr24,
int x, int y, int w, int h, int dims_changed)
{
unsigned char *dest = pl_vout_buf;
- const unsigned short *src = vram;
+ const unsigned char *vram = vram_;
int dstride = pl_vout_w, h1 = h;
- int h_full = pl_vout_h - pl_vout_yoffset;
+ int h_full = pl_vout_h;
+ int enhres = w > psx_w;
int xoffs = 0, doffs;
+ int hwrapped;
pcnt_start(PCNT_BLIT);
// offset
xoffs = x * pl_vout_scale_w;
- doffs = xoffs + y * dstride;
+ doffs = xoffs + y * pl_vout_scale_h * dstride;
if (dims_changed)
flip_clear_counter = 3;
if (pl_plat_blit)
{
- pl_plat_blit(doffs, src, w, h, stride, bgr24);
+ pl_plat_blit(doffs, vram + vram_ofs, w, h, 1024, bgr24);
goto out_hud;
}
if (bgr24)
{
+ hwrapped = (vram_ofs & 2047) + w * 3 - 2048;
if (pl_rearmed_cbs.only_16bpp) {
- for (; h1-- > 0; dest += dstride * 2, src += stride)
- {
- bgr888_to_rgb565(dest, src, w * 3);
+ for (; h1-- > 0; dest += dstride * 2) {
+ bgr888_to_rgb565(dest, vram + vram_ofs, w * 3);
+ vram_ofs = (vram_ofs + 2048) & 0xfffff;
+ }
+
+ if (hwrapped > 0) {
+ // this is super-rare so just fix-up
+ vram_ofs = (vram_ofs - h * 2048) & 0xff800;
+ dest -= dstride * 2 * h;
+ dest += (w - hwrapped / 3) * 2;
+ for (h1 = h; h1-- > 0; dest += dstride * 2) {
+ bgr888_to_rgb565(dest, vram + vram_ofs, hwrapped);
+ vram_ofs = (vram_ofs + 2048) & 0xfffff;
+ }
}
}
else {
dest -= doffs * 2;
dest += (doffs / 8) * 24;
- for (; h1-- > 0; dest += dstride * 3, src += stride)
- {
- bgr888_to_rgb888(dest, src, w * 3);
+ for (; h1-- > 0; dest += dstride * 3) {
+ bgr888_to_rgb888(dest, vram + vram_ofs, w * 3);
+ vram_ofs = (vram_ofs + 2048) & 0xfffff;
+ }
+
+ if (hwrapped > 0) {
+ vram_ofs = (vram_ofs - h * 2048) & 0xff800;
+ dest -= dstride * 3 * h;
+ dest += w * 3 - hwrapped;
+ for (h1 = h; h1-- > 0; dest += dstride * 3) {
+ bgr888_to_rgb888(dest, vram + vram_ofs, hwrapped);
+ vram_ofs = (vram_ofs + 2048) & 0xfffff;
+ }
}
}
}
-#ifdef __ARM_NEON__
+#ifdef HAVE_NEON32
else if (soft_filter == SOFT_FILTER_SCALE2X && pl_vout_scale_w == 2)
{
- neon_scale2x_16_16(src, (void *)dest, w,
- stride * 2, dstride * 2, h);
+ neon_scale2x_16_16((const void *)(vram + vram_ofs), (void *)dest, w,
+ 2048, dstride * 2, h);
}
else if (soft_filter == SOFT_FILTER_EAGLE2X && pl_vout_scale_w == 2)
{
- neon_eagle2x_16_16(src, (void *)dest, w,
- stride * 2, dstride * 2, h);
+ neon_eagle2x_16_16((const void *)(vram + vram_ofs), (void *)dest, w,
+ 2048, dstride * 2, h);
}
else if (scanlines != 0 && scanline_level != 100)
{
int h2, l = scanline_level * 2048 / 100;
- int stride_0 = pl_vout_scale_h >= 2 ? 0 : stride;
+ int stride_0 = pl_vout_scale_h >= 2 ? 0 : 2048;
h1 *= pl_vout_scale_h;
while (h1 > 0)
{
for (h2 = scanlines; h2 > 0 && h1 > 0; h2--, h1--) {
- bgr555_to_rgb565(dest, src, w * 2);
- dest += dstride * 2, src += stride_0;
+ bgr555_to_rgb565(dest, vram + vram_ofs, w * 2);
+ vram_ofs = (vram_ofs + stride_0) & 0xfffff;
+ dest += dstride * 2;
}
for (h2 = scanlines; h2 > 0 && h1 > 0; h2--, h1--) {
- bgr555_to_rgb565_b(dest, src, w * 2, l);
- dest += dstride * 2, src += stride;
+ bgr555_to_rgb565_b(dest, vram + vram_ofs, w * 2, l);
+ vram_ofs = (vram_ofs + 2048) & 0xfffff;
+ dest += dstride * 2;
}
}
}
#endif
else
{
- for (; h1-- > 0; dest += dstride * 2, src += stride)
- {
- bgr555_to_rgb565(dest, src, w * 2);
+ unsigned int vram_mask = enhres ? ~0 : 0xfffff;
+ for (; h1-- > 0; dest += dstride * 2) {
+ bgr555_to_rgb565(dest, vram + vram_ofs, w * 2);
+ vram_ofs = (vram_ofs + 2048) & vram_mask;
+ }
+
+ hwrapped = (vram_ofs & 2047) + w * 2 - 2048;
+ if (!enhres && hwrapped > 0) {
+ vram_ofs = (vram_ofs - h * 2048) & 0xff800;
+ dest -= dstride * 2 * h;
+ dest += w * 2 - hwrapped;
+ for (h1 = h; h1-- > 0; dest += dstride * 2) {
+ bgr555_to_rgb565(dest, vram + vram_ofs, hwrapped);
+ vram_ofs = (vram_ofs + 2048) & 0xfffff;
+ }
}
}
// let's flip now
pl_vout_buf = plat_gvideo_flip();
- if (pl_vout_buf != NULL)
- pl_vout_buf = (char *)pl_vout_buf
- + pl_vout_yoffset * pl_vout_w * pl_vout_bpp / 8;
pl_rearmed_cbs.flip_cnt++;
}
return 1;
}
-#ifdef BUILTIN_GPU_NEON
static int dispmode_doubleres(void)
{
if (!(pl_rearmed_cbs.gpu_caps & GPU_CAP_SUPPORTS_2X)
snprintf(hud_msg, sizeof(hud_msg), "double resolution");
return 1;
}
-#endif
-#ifdef __ARM_NEON__
+#ifdef HAVE_NEON32
static int dispmode_scale2x(void)
{
if (!resolution_ok(psx_w * 2, psx_h * 2) || psx_bpp != 16)
static int (*dispmode_switchers[])(void) = {
dispmode_default,
-#ifdef BUILTIN_GPU_NEON
dispmode_doubleres,
-#endif
-#ifdef __ARM_NEON__
+#ifdef HAVE_NEON32
dispmode_scale2x,
dispmode_eagle2x,
#endif
}
}
+static void emu_set_action(enum sched_action action_)
+{
+ extern enum sched_action emu_action, emu_action_old;
+
+ if (action_ == SACTION_NONE)
+ emu_action_old = 0;
+ else if (action_ != emu_action_old)
+ psxRegs.stop++;
+ emu_action = action_;
+}
+
static void update_input(void)
{
int actions[IN_BINDTYPE_COUNT] = { 0, };
// recompilation is not that fast and may cause frame skip on
// loading screens and such, resulting in flicker or glitches
- if (new_dynarec_did_compile) {
+ if (ndrc_g.did_compile) {
if (drc_active_vsyncs < 32)
pl_rearmed_cbs.fskip_advice = 0;
drc_active_vsyncs++;
}
else
drc_active_vsyncs = 0;
- new_dynarec_did_compile = 0;
+ ndrc_g.did_compile = 0;
}
pcnt_start(PCNT_ALL);
int seen_dead = 0;
int sleep_time = 5;
-#if !defined(NDEBUG) || defined(DRC_DBG)
- // don't interfere with debug
- return NULL;
-#endif
while (1)
{
sleep(sleep_time);
- if (stop) {
+ if (psxRegs.stop) {
seen_dead = 0;
sleep_time = 5;
continue;
fprintf(stderr, "watchdog: seen_dead %d\n", seen_dead);
if (seen_dead > 4) {
fprintf(stderr, "watchdog: lockup detected, aborting\n");
+ fflush(stderr);
// we can't do any cleanup here really, the main thread is
// likely touching resources and would crash anyway
abort();
void pl_start_watchdog(void)
{
+#if defined(NDEBUG) && !defined(DRC_DBG)
pthread_attr_t attr;
pthread_t tid;
int ret;
+#ifdef __linux__
+ int tpid = 0;
+ char buf[256];
+ FILE *f = fopen("/proc/self/status", "r");
+ if (f) {
+ while (fgets(buf, sizeof(buf), f))
+ if (buf[0] == 'T' && sscanf(buf, "TracerPid: %d", &tpid) == 1)
+ break;
+ fclose(f);
+ }
+ if (tpid) {
+ printf("no watchdog to tracer %d\n", tpid);
+ return;
+ }
+#endif
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
ret = pthread_create(&tid, &attr, watchdog_thread, NULL);
if (ret != 0)
fprintf(stderr, "could not start watchdog: %d\n", ret);
+#endif
+ (void)watchdog_thread;
}
-static void *pl_emu_mmap(unsigned long addr, size_t size, int is_fixed,
- enum psxMapTag tag)
+static void *pl_emu_mmap(unsigned long addr, size_t size,
+ enum psxMapTag tag, int *can_retry_addr)
{
- return plat_mmap(addr, size, 0, is_fixed);
+ *can_retry_addr = 1;
+ return plat_mmap(addr, size, 0, 0);
}
static void pl_emu_munmap(void *ptr, size_t size, enum psxMapTag tag)
static void *pl_mmap(unsigned int size)
{
- return psxMapHook(0, size, 0, MAP_TAG_VRAM);
+ int can_retry_addr;
+ return psxMapHook(0, size, MAP_TAG_VRAM, &can_retry_addr);
}
static void pl_munmap(void *ptr, unsigned int size)
void pl_timing_prepare(int is_pal);
void pl_frame_limit(void);
+void pl_update_layer_size(int w, int h, int fw, int fh);
// for communication with gpulib
struct rearmed_cbs {
void (*pl_get_layer_pos)(int *x, int *y, int *w, int *h);
int (*pl_vout_open)(void);
void (*pl_vout_set_mode)(int w, int h, int raw_w, int raw_h, int bpp);
- void (*pl_vout_flip)(const void *vram, int stride, int bgr24,
+ void (*pl_vout_flip)(const void *vram, int vram_offset, int bgr24,
int x, int y, int w, int h, int dims_changed);
void (*pl_vout_close)(void);
void *(*mmap)(unsigned int size);
void (*pl_vout_set_raw_vram)(void *vram);
void (*pl_set_gpu_caps)(int caps);
// emulation related
- void (*gpu_state_change)(int what);
+ void (*gpu_state_change)(int what, int cycles);
// some stats, for display by some plugins
int flips_per_sec, cpu_usage;
float vsps_cur; // currect vsync/s
// gpu options
int frameskip;
int fskip_advice;
+ int fskip_force;
+ int fskip_dirty;
unsigned int *gpu_frame_count;
unsigned int *gpu_hcnt;
unsigned int flip_cnt; // increment manually if not using pl_vout_flip
unsigned int only_16bpp; // platform is 16bpp-only
+ unsigned int thread_rendering;
+ unsigned int dithering; // 0 off, 1 on, 2 force
struct {
int allow_interlace; // 0 off, 1 on, 2 guess
int enhancement_enable;
int enhancement_tex_adj;
} gpu_neon;
struct {
- int iUseDither;
int dwActFixes;
float fFrameRateHz;
int dwFrameRateTicks;
} gpu_peops;
struct {
- int abe_hack;
- int no_light, no_blend;
- int lineskip;
- } gpu_unai_old;
- struct {
+ int old_renderer;
int ilace_force;
- int pixel_skip;
int lighting;
int fast_lighting;
int blending;
- int dithering;
int scale_hires;
} gpu_unai;
struct {
int screen_centering_type_default;
int screen_centering_x;
int screen_centering_y;
+ int screen_centering_h_adj;
+ int show_overscan;
};
extern struct rearmed_cbs pl_rearmed_cbs;
--- /dev/null
+#ifndef MMAN_H
+#define MMAN_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+#include <switch.h>
+
+#define PROT_READ 0b001
+#define PROT_WRITE 0b010
+#define PROT_EXEC 0b100
+#define MAP_PRIVATE 2
+#define MAP_FIXED 0x10
+#define MAP_ANONYMOUS 0x20
+
+#define MAP_FAILED ((void *)-1)
+
+#define ALIGNMENT 0x1000
+
+#if 0 // not used
+static inline void *mmap(void *addr, size_t len, int prot, int flags, int fd, off_t offset)
+{
+ (void)fd;
+ (void)offset;
+
+ // match Linux behavior
+ len = (len + ALIGNMENT - 1) & ~(ALIGNMENT - 1);
+
+ Result rc = svcMapPhysicalMemory(addr, len);
+ if (R_FAILED(rc))
+ {
+ //printf("mmap failed\n");
+ addr = aligned_alloc(ALIGNMENT, len);
+ }
+ if (!addr)
+ return MAP_FAILED;
+ memset(addr, 0, len);
+ return addr;
+}
+
+static inline int munmap(void *addr, size_t len)
+{
+ len = (len + ALIGNMENT - 1) & ~(ALIGNMENT - 1);
+ Result rc = svcUnmapPhysicalMemory(addr, len);
+ if (R_FAILED(rc))
+ {
+ //printf("munmap failed\n");
+ free(addr);
+ }
+ return 0;
+}
+#endif
+
+#ifdef __cplusplus
+};
+#endif
+
+#endif // MMAN_H
+
--- /dev/null
+/* zconf.h -- configuration of the zlib compression library
+ * Copyright (C) 1995-2013 Jean-loup Gailly.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* @(#) $Id$ */
+
+#ifndef ZCONF_H
+#define ZCONF_H
+
+/*
+ * If you *really* need a unique prefix for all types and library functions,
+ * compile with -DZ_PREFIX. The "standard" zlib should be compiled without it.
+ * Even better than compiling with -DZ_PREFIX would be to use configure to set
+ * this permanently in zconf.h using "./configure --zprefix".
+ */
+#ifdef Z_PREFIX /* may be set to #if 1 by ./configure */
+# define Z_PREFIX_SET
+
+/* all linked symbols */
+# define _dist_code z__dist_code
+# define _length_code z__length_code
+# define _tr_align z__tr_align
+# define _tr_flush_bits z__tr_flush_bits
+# define _tr_flush_block z__tr_flush_block
+# define _tr_init z__tr_init
+# define _tr_stored_block z__tr_stored_block
+# define _tr_tally z__tr_tally
+# define adler32 z_adler32
+# define adler32_combine z_adler32_combine
+# define adler32_combine64 z_adler32_combine64
+# ifndef Z_SOLO
+# define compress z_compress
+# define compress2 z_compress2
+# define compressBound z_compressBound
+# endif
+# define crc32 z_crc32
+# define crc32_combine z_crc32_combine
+# define crc32_combine64 z_crc32_combine64
+# define deflate z_deflate
+# define deflateBound z_deflateBound
+# define deflateCopy z_deflateCopy
+# define deflateEnd z_deflateEnd
+# define deflateInit2_ z_deflateInit2_
+# define deflateInit_ z_deflateInit_
+# define deflateParams z_deflateParams
+# define deflatePending z_deflatePending
+# define deflatePrime z_deflatePrime
+# define deflateReset z_deflateReset
+# define deflateResetKeep z_deflateResetKeep
+# define deflateSetDictionary z_deflateSetDictionary
+# define deflateSetHeader z_deflateSetHeader
+# define deflateTune z_deflateTune
+# define deflate_copyright z_deflate_copyright
+# define get_crc_table z_get_crc_table
+# ifndef Z_SOLO
+# define gz_error z_gz_error
+# define gz_intmax z_gz_intmax
+# define gz_strwinerror z_gz_strwinerror
+# define gzbuffer z_gzbuffer
+# define gzclearerr z_gzclearerr
+# define gzclose z_gzclose
+# define gzclose_r z_gzclose_r
+# define gzclose_w z_gzclose_w
+# define gzdirect z_gzdirect
+# define gzdopen z_gzdopen
+# define gzeof z_gzeof
+# define gzerror z_gzerror
+# define gzflush z_gzflush
+# define gzgetc z_gzgetc
+# define gzgetc_ z_gzgetc_
+# define gzgets z_gzgets
+# define gzoffset z_gzoffset
+# define gzoffset64 z_gzoffset64
+# define gzopen z_gzopen
+# define gzopen64 z_gzopen64
+# ifdef _WIN32
+# define gzopen_w z_gzopen_w
+# endif
+# define gzprintf z_gzprintf
+# define gzvprintf z_gzvprintf
+# define gzputc z_gzputc
+# define gzputs z_gzputs
+# define gzread z_gzread
+# define gzrewind z_gzrewind
+# define gzseek z_gzseek
+# define gzseek64 z_gzseek64
+# define gzsetparams z_gzsetparams
+# define gztell z_gztell
+# define gztell64 z_gztell64
+# define gzungetc z_gzungetc
+# define gzwrite z_gzwrite
+# endif
+# define inflate z_inflate
+# define inflateBack z_inflateBack
+# define inflateBackEnd z_inflateBackEnd
+# define inflateBackInit_ z_inflateBackInit_
+# define inflateCopy z_inflateCopy
+# define inflateEnd z_inflateEnd
+# define inflateGetHeader z_inflateGetHeader
+# define inflateInit2_ z_inflateInit2_
+# define inflateInit_ z_inflateInit_
+# define inflateMark z_inflateMark
+# define inflatePrime z_inflatePrime
+# define inflateReset z_inflateReset
+# define inflateReset2 z_inflateReset2
+# define inflateSetDictionary z_inflateSetDictionary
+# define inflateGetDictionary z_inflateGetDictionary
+# define inflateSync z_inflateSync
+# define inflateSyncPoint z_inflateSyncPoint
+# define inflateUndermine z_inflateUndermine
+# define inflateResetKeep z_inflateResetKeep
+# define inflate_copyright z_inflate_copyright
+# define inflate_fast z_inflate_fast
+# define inflate_table z_inflate_table
+# ifndef Z_SOLO
+# define uncompress z_uncompress
+# endif
+# define zError z_zError
+# ifndef Z_SOLO
+# define zcalloc z_zcalloc
+# define zcfree z_zcfree
+# endif
+# define zlibCompileFlags z_zlibCompileFlags
+# define zlibVersion z_zlibVersion
+
+/* all zlib typedefs in zlib.h and zconf.h */
+# define Byte z_Byte
+# define Bytef z_Bytef
+# define alloc_func z_alloc_func
+# define charf z_charf
+# define free_func z_free_func
+# ifndef Z_SOLO
+# define gzFile z_gzFile
+# endif
+# define gz_header z_gz_header
+# define gz_headerp z_gz_headerp
+# define in_func z_in_func
+# define intf z_intf
+# define out_func z_out_func
+# define uInt z_uInt
+# define uIntf z_uIntf
+# define uLong z_uLong
+# define uLongf z_uLongf
+# define voidp z_voidp
+# define voidpc z_voidpc
+# define voidpf z_voidpf
+
+/* all zlib structs in zlib.h and zconf.h */
+# define gz_header_s z_gz_header_s
+# define internal_state z_internal_state
+
+#endif
+
+#if defined(__MSDOS__) && !defined(MSDOS)
+# define MSDOS
+#endif
+#if (defined(OS_2) || defined(__OS2__)) && !defined(OS2)
+# define OS2
+#endif
+#if defined(_WINDOWS) && !defined(WINDOWS)
+# define WINDOWS
+#endif
+#if defined(_WIN32) || defined(_WIN32_WCE) || defined(__WIN32__)
+# ifndef WIN32
+# define WIN32
+# endif
+#endif
+#if (defined(MSDOS) || defined(OS2) || defined(WINDOWS)) && !defined(WIN32)
+# if !defined(__GNUC__) && !defined(__FLAT__) && !defined(__386__)
+# ifndef SYS16BIT
+# define SYS16BIT
+# endif
+# endif
+#endif
+
+/*
+ * Compile with -DMAXSEG_64K if the alloc function cannot allocate more
+ * than 64k bytes at a time (needed on systems with 16-bit int).
+ */
+#ifdef SYS16BIT
+# define MAXSEG_64K
+#endif
+#ifdef MSDOS
+# define UNALIGNED_OK
+#endif
+
+#ifdef __STDC_VERSION__
+# ifndef STDC
+# define STDC
+# endif
+# if __STDC_VERSION__ >= 199901L
+# ifndef STDC99
+# define STDC99
+# endif
+# endif
+#endif
+#if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus))
+# define STDC
+#endif
+#if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__))
+# define STDC
+#endif
+#if !defined(STDC) && (defined(MSDOS) || defined(WINDOWS) || defined(WIN32))
+# define STDC
+#endif
+#if !defined(STDC) && (defined(OS2) || defined(__HOS_AIX__))
+# define STDC
+#endif
+
+#if defined(__OS400__) && !defined(STDC) /* iSeries (formerly AS/400). */
+# define STDC
+#endif
+
+#ifndef STDC
+# ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */
+# define const /* note: need a more gentle solution here */
+# endif
+#endif
+
+#if defined(ZLIB_CONST) && !defined(z_const)
+# define z_const const
+#else
+# define z_const
+#endif
+
+/* Some Mac compilers merge all .h files incorrectly: */
+#if defined(__MWERKS__)||defined(applec)||defined(THINK_C)||defined(__SC__)
+# define NO_DUMMY_DECL
+#endif
+
+/* Maximum value for memLevel in deflateInit2 */
+#ifndef MAX_MEM_LEVEL
+# ifdef MAXSEG_64K
+# define MAX_MEM_LEVEL 8
+# else
+# define MAX_MEM_LEVEL 9
+# endif
+#endif
+
+/* Maximum value for windowBits in deflateInit2 and inflateInit2.
+ * WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files
+ * created by gzip. (Files created by minigzip can still be extracted by
+ * gzip.)
+ */
+#ifndef MAX_WBITS
+# define MAX_WBITS 15 /* 32K LZ77 window */
+#endif
+
+/* The memory requirements for deflate are (in bytes):
+ (1 << (windowBits+2)) + (1 << (memLevel+9))
+ that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values)
+ plus a few kilobytes for small objects. For example, if you want to reduce
+ the default memory requirements from 256K to 128K, compile with
+ make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7"
+ Of course this will generally degrade compression (there's no free lunch).
+
+ The memory requirements for inflate are (in bytes) 1 << windowBits
+ that is, 32K for windowBits=15 (default value) plus a few kilobytes
+ for small objects.
+*/
+
+ /* Type declarations */
+
+#ifndef OF /* function prototypes */
+# ifdef STDC
+# define OF(args) args
+# else
+# define OF(args) ()
+# endif
+#endif
+
+#ifndef Z_ARG /* function prototypes for stdarg */
+# if defined(STDC) || defined(Z_HAVE_STDARG_H)
+# define Z_ARG(args) args
+# else
+# define Z_ARG(args) ()
+# endif
+#endif
+
+/* The following definitions for FAR are needed only for MSDOS mixed
+ * model programming (small or medium model with some far allocations).
+ * This was tested only with MSC; for other MSDOS compilers you may have
+ * to define NO_MEMCPY in zutil.h. If you don't need the mixed model,
+ * just define FAR to be empty.
+ */
+#ifdef SYS16BIT
+# if defined(M_I86SM) || defined(M_I86MM)
+ /* MSC small or medium model */
+# define SMALL_MEDIUM
+# ifdef _MSC_VER
+# define FAR _far
+# else
+# define FAR far
+# endif
+# endif
+# if (defined(__SMALL__) || defined(__MEDIUM__))
+ /* Turbo C small or medium model */
+# define SMALL_MEDIUM
+# ifdef __BORLANDC__
+# define FAR _far
+# else
+# define FAR far
+# endif
+# endif
+#endif
+
+#if defined(WINDOWS) || defined(WIN32)
+ /* If building or using zlib as a DLL, define ZLIB_DLL.
+ * This is not mandatory, but it offers a little performance increase.
+ */
+# ifdef ZLIB_DLL
+# if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500))
+# ifdef ZLIB_INTERNAL
+# define ZEXTERN extern __declspec(dllexport)
+# else
+# define ZEXTERN extern __declspec(dllimport)
+# endif
+# endif
+# endif /* ZLIB_DLL */
+ /* If building or using zlib with the WINAPI/WINAPIV calling convention,
+ * define ZLIB_WINAPI.
+ * Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI.
+ */
+# ifdef ZLIB_WINAPI
+# ifdef FAR
+# undef FAR
+# endif
+# include <windows.h>
+ /* No need for _export, use ZLIB.DEF instead. */
+ /* For complete Windows compatibility, use WINAPI, not __stdcall. */
+# define ZEXPORT WINAPI
+# ifdef WIN32
+# define ZEXPORTVA WINAPIV
+# else
+# define ZEXPORTVA FAR CDECL
+# endif
+# endif
+#endif
+
+#if defined (__BEOS__)
+# ifdef ZLIB_DLL
+# ifdef ZLIB_INTERNAL
+# define ZEXPORT __declspec(dllexport)
+# define ZEXPORTVA __declspec(dllexport)
+# else
+# define ZEXPORT __declspec(dllimport)
+# define ZEXPORTVA __declspec(dllimport)
+# endif
+# endif
+#endif
+
+#ifndef ZEXTERN
+# define ZEXTERN extern
+#endif
+#ifndef ZEXPORT
+# define ZEXPORT
+#endif
+#ifndef ZEXPORTVA
+# define ZEXPORTVA
+#endif
+
+#ifndef FAR
+# define FAR
+#endif
+
+#if !defined(__MACTYPES__)
+typedef unsigned char Byte; /* 8 bits */
+#endif
+typedef unsigned int uInt; /* 16 bits or more */
+typedef unsigned long uLong; /* 32 bits or more */
+
+#ifdef SMALL_MEDIUM
+ /* Borland C/C++ and some old MSC versions ignore FAR inside typedef */
+# define Bytef Byte FAR
+#else
+ typedef Byte FAR Bytef;
+#endif
+typedef char FAR charf;
+typedef int FAR intf;
+typedef uInt FAR uIntf;
+typedef uLong FAR uLongf;
+
+#ifdef STDC
+ typedef void const *voidpc;
+ typedef void FAR *voidpf;
+ typedef void *voidp;
+#else
+ typedef Byte const *voidpc;
+ typedef Byte FAR *voidpf;
+ typedef Byte *voidp;
+#endif
+
+#if !defined(Z_U4) && !defined(Z_SOLO) && defined(STDC)
+# include <limits.h>
+# if (UINT_MAX == 0xffffffffUL)
+# define Z_U4 unsigned
+# elif (ULONG_MAX == 0xffffffffUL)
+# define Z_U4 unsigned long
+# elif (USHRT_MAX == 0xffffffffUL)
+# define Z_U4 unsigned short
+# endif
+#endif
+
+#ifdef Z_U4
+ typedef Z_U4 z_crc_t;
+#else
+ typedef unsigned long z_crc_t;
+#endif
+
+#if 1 /* was set to #if 1 by ./configure */
+# define Z_HAVE_UNISTD_H
+#endif
+
+#if 1 /* was set to #if 1 by ./configure */
+# define Z_HAVE_STDARG_H
+#endif
+
+#ifdef STDC
+# ifndef Z_SOLO
+# include <sys/types.h> /* for off_t */
+# endif
+#endif
+
+#if defined(STDC) || defined(Z_HAVE_STDARG_H)
+# ifndef Z_SOLO
+# include <stdarg.h> /* for va_list */
+# endif
+#endif
+
+#ifdef _WIN32
+# ifndef Z_SOLO
+# include <stddef.h> /* for wchar_t */
+# endif
+#endif
+
+/* a little trick to accommodate both "#define _LARGEFILE64_SOURCE" and
+ * "#define _LARGEFILE64_SOURCE 1" as requesting 64-bit operations, (even
+ * though the former does not conform to the LFS document), but considering
+ * both "#undef _LARGEFILE64_SOURCE" and "#define _LARGEFILE64_SOURCE 0" as
+ * equivalently requesting no 64-bit operations
+ */
+#if defined(_LARGEFILE64_SOURCE) && -_LARGEFILE64_SOURCE - -1 == 1
+# undef _LARGEFILE64_SOURCE
+#endif
+
+#if defined(__WATCOMC__) && !defined(Z_HAVE_UNISTD_H)
+# define Z_HAVE_UNISTD_H
+#endif
+#ifndef Z_SOLO
+# if defined(Z_HAVE_UNISTD_H) || defined(_LARGEFILE64_SOURCE)
+# include <unistd.h> /* for SEEK_*, off_t, and _LFS64_LARGEFILE */
+# ifdef VMS
+# include <unixio.h> /* for off_t */
+# endif
+# ifndef z_off_t
+# define z_off_t off_t
+# endif
+# endif
+#endif
+
+#if defined(_LFS64_LARGEFILE) && _LFS64_LARGEFILE-0
+# define Z_LFS64
+#endif
+
+#if defined(_LARGEFILE64_SOURCE) && defined(Z_LFS64)
+# define Z_LARGE64
+#endif
+
+#if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS-0 == 64 && defined(Z_LFS64)
+# define Z_WANT64
+#endif
+
+#if !defined(SEEK_SET) && !defined(Z_SOLO)
+# define SEEK_SET 0 /* Seek from beginning of file. */
+# define SEEK_CUR 1 /* Seek from current position. */
+# define SEEK_END 2 /* Set file pointer to EOF plus "offset" */
+#endif
+
+#ifndef z_off_t
+# define z_off_t long
+#endif
+
+#if !defined(_WIN32) && defined(Z_LARGE64)
+# define z_off64_t off64_t
+#else
+# if defined(_WIN32) && !defined(__GNUC__) && !defined(Z_SOLO)
+# define z_off64_t __int64
+# else
+# define z_off64_t z_off_t
+# endif
+#endif
+
+/* MVS linker does not support external names larger than 8 bytes */
+#if defined(__MVS__)
+ #pragma map(deflateInit_,"DEIN")
+ #pragma map(deflateInit2_,"DEIN2")
+ #pragma map(deflateEnd,"DEEND")
+ #pragma map(deflateBound,"DEBND")
+ #pragma map(inflateInit_,"ININ")
+ #pragma map(inflateInit2_,"ININ2")
+ #pragma map(inflateEnd,"INEND")
+ #pragma map(inflateSync,"INSY")
+ #pragma map(inflateSetDictionary,"INSEDI")
+ #pragma map(compressBound,"CMBND")
+ #pragma map(inflate_table,"INTABL")
+ #pragma map(inflate_fast,"INFA")
+ #pragma map(inflate_copyright,"INCOPY")
+#endif
+
+#endif /* ZCONF_H */
--- /dev/null
+/* zlib.h -- interface of the 'zlib' general purpose compression library
+ version 1.2.8, April 28th, 2013
+
+ Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the authors be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+
+ Jean-loup Gailly Mark Adler
+ jloup@gzip.org madler@alumni.caltech.edu
+
+
+ The data format used by the zlib library is described by RFCs (Request for
+ Comments) 1950 to 1952 in the files http://tools.ietf.org/html/rfc1950
+ (zlib format), rfc1951 (deflate format) and rfc1952 (gzip format).
+*/
+
+#ifndef ZLIB_H
+#define ZLIB_H
+
+#include "zconf.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define ZLIB_VERSION "1.2.8"
+#define ZLIB_VERNUM 0x1280
+#define ZLIB_VER_MAJOR 1
+#define ZLIB_VER_MINOR 2
+#define ZLIB_VER_REVISION 8
+#define ZLIB_VER_SUBREVISION 0
+
+/*
+ The 'zlib' compression library provides in-memory compression and
+ decompression functions, including integrity checks of the uncompressed data.
+ This version of the library supports only one compression method (deflation)
+ but other algorithms will be added later and will have the same stream
+ interface.
+
+ Compression can be done in a single step if the buffers are large enough,
+ or can be done by repeated calls of the compression function. In the latter
+ case, the application must provide more input and/or consume the output
+ (providing more output space) before each call.
+
+ The compressed data format used by default by the in-memory functions is
+ the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped
+ around a deflate stream, which is itself documented in RFC 1951.
+
+ The library also supports reading and writing files in gzip (.gz) format
+ with an interface similar to that of stdio using the functions that start
+ with "gz". The gzip format is different from the zlib format. gzip is a
+ gzip wrapper, documented in RFC 1952, wrapped around a deflate stream.
+
+ This library can optionally read and write gzip streams in memory as well.
+
+ The zlib format was designed to be compact and fast for use in memory
+ and on communications channels. The gzip format was designed for single-
+ file compression on file systems, has a larger header than zlib to maintain
+ directory information, and uses a different, slower check method than zlib.
+
+ The library does not install any signal handler. The decoder checks
+ the consistency of the compressed data, so the library should never crash
+ even in case of corrupted input.
+*/
+
+typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size));
+typedef void (*free_func) OF((voidpf opaque, voidpf address));
+
+struct internal_state;
+
+typedef struct z_stream_s {
+ z_const Bytef *next_in; /* next input byte */
+ uInt avail_in; /* number of bytes available at next_in */
+ uLong total_in; /* total number of input bytes read so far */
+
+ Bytef *next_out; /* next output byte should be put there */
+ uInt avail_out; /* remaining free space at next_out */
+ uLong total_out; /* total number of bytes output so far */
+
+ z_const char *msg; /* last error message, NULL if no error */
+ struct internal_state FAR *state; /* not visible by applications */
+
+ alloc_func zalloc; /* used to allocate the internal state */
+ free_func zfree; /* used to free the internal state */
+ voidpf opaque; /* private data object passed to zalloc and zfree */
+
+ int data_type; /* best guess about the data type: binary or text */
+ uLong adler; /* adler32 value of the uncompressed data */
+ uLong reserved; /* reserved for future use */
+} z_stream;
+
+typedef z_stream FAR *z_streamp;
+
+/*
+ gzip header information passed to and from zlib routines. See RFC 1952
+ for more details on the meanings of these fields.
+*/
+typedef struct gz_header_s {
+ int text; /* true if compressed data believed to be text */
+ uLong time; /* modification time */
+ int xflags; /* extra flags (not used when writing a gzip file) */
+ int os; /* operating system */
+ Bytef *extra; /* pointer to extra field or Z_NULL if none */
+ uInt extra_len; /* extra field length (valid if extra != Z_NULL) */
+ uInt extra_max; /* space at extra (only when reading header) */
+ Bytef *name; /* pointer to zero-terminated file name or Z_NULL */
+ uInt name_max; /* space at name (only when reading header) */
+ Bytef *comment; /* pointer to zero-terminated comment or Z_NULL */
+ uInt comm_max; /* space at comment (only when reading header) */
+ int hcrc; /* true if there was or will be a header crc */
+ int done; /* true when done reading gzip header (not used
+ when writing a gzip file) */
+} gz_header;
+
+typedef gz_header FAR *gz_headerp;
+
+/*
+ The application must update next_in and avail_in when avail_in has dropped
+ to zero. It must update next_out and avail_out when avail_out has dropped
+ to zero. The application must initialize zalloc, zfree and opaque before
+ calling the init function. All other fields are set by the compression
+ library and must not be updated by the application.
+
+ The opaque value provided by the application will be passed as the first
+ parameter for calls of zalloc and zfree. This can be useful for custom
+ memory management. The compression library attaches no meaning to the
+ opaque value.
+
+ zalloc must return Z_NULL if there is not enough memory for the object.
+ If zlib is used in a multi-threaded application, zalloc and zfree must be
+ thread safe.
+
+ On 16-bit systems, the functions zalloc and zfree must be able to allocate
+ exactly 65536 bytes, but will not be required to allocate more than this if
+ the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS, pointers
+ returned by zalloc for objects of exactly 65536 bytes *must* have their
+ offset normalized to zero. The default allocation function provided by this
+ library ensures this (see zutil.c). To reduce memory requirements and avoid
+ any allocation of 64K objects, at the expense of compression ratio, compile
+ the library with -DMAX_WBITS=14 (see zconf.h).
+
+ The fields total_in and total_out can be used for statistics or progress
+ reports. After compression, total_in holds the total size of the
+ uncompressed data and may be saved for use in the decompressor (particularly
+ if the decompressor wants to decompress everything in a single step).
+*/
+
+ /* constants */
+
+#define Z_NO_FLUSH 0
+#define Z_PARTIAL_FLUSH 1
+#define Z_SYNC_FLUSH 2
+#define Z_FULL_FLUSH 3
+#define Z_FINISH 4
+#define Z_BLOCK 5
+#define Z_TREES 6
+/* Allowed flush values; see deflate() and inflate() below for details */
+
+#define Z_OK 0
+#define Z_STREAM_END 1
+#define Z_NEED_DICT 2
+#define Z_ERRNO (-1)
+#define Z_STREAM_ERROR (-2)
+#define Z_DATA_ERROR (-3)
+#define Z_MEM_ERROR (-4)
+#define Z_BUF_ERROR (-5)
+#define Z_VERSION_ERROR (-6)
+/* Return codes for the compression/decompression functions. Negative values
+ * are errors, positive values are used for special but normal events.
+ */
+
+#define Z_NO_COMPRESSION 0
+#define Z_BEST_SPEED 1
+#define Z_BEST_COMPRESSION 9
+#define Z_DEFAULT_COMPRESSION (-1)
+/* compression levels */
+
+#define Z_FILTERED 1
+#define Z_HUFFMAN_ONLY 2
+#define Z_RLE 3
+#define Z_FIXED 4
+#define Z_DEFAULT_STRATEGY 0
+/* compression strategy; see deflateInit2() below for details */
+
+#define Z_BINARY 0
+#define Z_TEXT 1
+#define Z_ASCII Z_TEXT /* for compatibility with 1.2.2 and earlier */
+#define Z_UNKNOWN 2
+/* Possible values of the data_type field (though see inflate()) */
+
+#define Z_DEFLATED 8
+/* The deflate compression method (the only one supported in this version) */
+
+#define Z_NULL 0 /* for initializing zalloc, zfree, opaque */
+
+#define zlib_version zlibVersion()
+/* for compatibility with versions < 1.0.2 */
+
+
+ /* basic functions */
+
+ZEXTERN const char * ZEXPORT zlibVersion OF((void));
+/* The application can compare zlibVersion and ZLIB_VERSION for consistency.
+ If the first character differs, the library code actually used is not
+ compatible with the zlib.h header file used by the application. This check
+ is automatically made by deflateInit and inflateInit.
+ */
+
+/*
+ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level));
+
+ Initializes the internal stream state for compression. The fields
+ zalloc, zfree and opaque must be initialized before by the caller. If
+ zalloc and zfree are set to Z_NULL, deflateInit updates them to use default
+ allocation functions.
+
+ The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9:
+ 1 gives best speed, 9 gives best compression, 0 gives no compression at all
+ (the input data is simply copied a block at a time). Z_DEFAULT_COMPRESSION
+ requests a default compromise between speed and compression (currently
+ equivalent to level 6).
+
+ deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough
+ memory, Z_STREAM_ERROR if level is not a valid compression level, or
+ Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible
+ with the version assumed by the caller (ZLIB_VERSION). msg is set to null
+ if there is no error message. deflateInit does not perform any compression:
+ this will be done by deflate().
+*/
+
+
+ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush));
+/*
+ deflate compresses as much data as possible, and stops when the input
+ buffer becomes empty or the output buffer becomes full. It may introduce
+ some output latency (reading input without producing any output) except when
+ forced to flush.
+
+ The detailed semantics are as follows. deflate performs one or both of the
+ following actions:
+
+ - Compress more input starting at next_in and update next_in and avail_in
+ accordingly. If not all input can be processed (because there is not
+ enough room in the output buffer), next_in and avail_in are updated and
+ processing will resume at this point for the next call of deflate().
+
+ - Provide more output starting at next_out and update next_out and avail_out
+ accordingly. This action is forced if the parameter flush is non zero.
+ Forcing flush frequently degrades the compression ratio, so this parameter
+ should be set only when necessary (in interactive applications). Some
+ output may be provided even if flush is not set.
+
+ Before the call of deflate(), the application should ensure that at least
+ one of the actions is possible, by providing more input and/or consuming more
+ output, and updating avail_in or avail_out accordingly; avail_out should
+ never be zero before the call. The application can consume the compressed
+ output when it wants, for example when the output buffer is full (avail_out
+ == 0), or after each call of deflate(). If deflate returns Z_OK and with
+ zero avail_out, it must be called again after making room in the output
+ buffer because there might be more output pending.
+
+ Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to
+ decide how much data to accumulate before producing output, in order to
+ maximize compression.
+
+ If the parameter flush is set to Z_SYNC_FLUSH, all pending output is
+ flushed to the output buffer and the output is aligned on a byte boundary, so
+ that the decompressor can get all input data available so far. (In
+ particular avail_in is zero after the call if enough output space has been
+ provided before the call.) Flushing may degrade compression for some
+ compression algorithms and so it should be used only when necessary. This
+ completes the current deflate block and follows it with an empty stored block
+ that is three bits plus filler bits to the next byte, followed by four bytes
+ (00 00 ff ff).
+
+ If flush is set to Z_PARTIAL_FLUSH, all pending output is flushed to the
+ output buffer, but the output is not aligned to a byte boundary. All of the
+ input data so far will be available to the decompressor, as for Z_SYNC_FLUSH.
+ This completes the current deflate block and follows it with an empty fixed
+ codes block that is 10 bits long. This assures that enough bytes are output
+ in order for the decompressor to finish the block before the empty fixed code
+ block.
+
+ If flush is set to Z_BLOCK, a deflate block is completed and emitted, as
+ for Z_SYNC_FLUSH, but the output is not aligned on a byte boundary, and up to
+ seven bits of the current block are held to be written as the next byte after
+ the next deflate block is completed. In this case, the decompressor may not
+ be provided enough bits at this point in order to complete decompression of
+ the data provided so far to the compressor. It may need to wait for the next
+ block to be emitted. This is for advanced applications that need to control
+ the emission of deflate blocks.
+
+ If flush is set to Z_FULL_FLUSH, all output is flushed as with
+ Z_SYNC_FLUSH, and the compression state is reset so that decompression can
+ restart from this point if previous compressed data has been damaged or if
+ random access is desired. Using Z_FULL_FLUSH too often can seriously degrade
+ compression.
+
+ If deflate returns with avail_out == 0, this function must be called again
+ with the same value of the flush parameter and more output space (updated
+ avail_out), until the flush is complete (deflate returns with non-zero
+ avail_out). In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that
+ avail_out is greater than six to avoid repeated flush markers due to
+ avail_out == 0 on return.
+
+ If the parameter flush is set to Z_FINISH, pending input is processed,
+ pending output is flushed and deflate returns with Z_STREAM_END if there was
+ enough output space; if deflate returns with Z_OK, this function must be
+ called again with Z_FINISH and more output space (updated avail_out) but no
+ more input data, until it returns with Z_STREAM_END or an error. After
+ deflate has returned Z_STREAM_END, the only possible operations on the stream
+ are deflateReset or deflateEnd.
+
+ Z_FINISH can be used immediately after deflateInit if all the compression
+ is to be done in a single step. In this case, avail_out must be at least the
+ value returned by deflateBound (see below). Then deflate is guaranteed to
+ return Z_STREAM_END. If not enough output space is provided, deflate will
+ not return Z_STREAM_END, and it must be called again as described above.
+
+ deflate() sets strm->adler to the adler32 checksum of all input read
+ so far (that is, total_in bytes).
+
+ deflate() may update strm->data_type if it can make a good guess about
+ the input data type (Z_BINARY or Z_TEXT). In doubt, the data is considered
+ binary. This field is only for information purposes and does not affect the
+ compression algorithm in any manner.
+
+ deflate() returns Z_OK if some progress has been made (more input
+ processed or more output produced), Z_STREAM_END if all input has been
+ consumed and all output has been produced (only when flush is set to
+ Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example
+ if next_in or next_out was Z_NULL), Z_BUF_ERROR if no progress is possible
+ (for example avail_in or avail_out was zero). Note that Z_BUF_ERROR is not
+ fatal, and deflate() can be called again with more input and more output
+ space to continue compressing.
+*/
+
+
+ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm));
+/*
+ All dynamically allocated data structures for this stream are freed.
+ This function discards any unprocessed input and does not flush any pending
+ output.
+
+ deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the
+ stream state was inconsistent, Z_DATA_ERROR if the stream was freed
+ prematurely (some input or output was discarded). In the error case, msg
+ may be set but then points to a static string (which must not be
+ deallocated).
+*/
+
+
+/*
+ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm));
+
+ Initializes the internal stream state for decompression. The fields
+ next_in, avail_in, zalloc, zfree and opaque must be initialized before by
+ the caller. If next_in is not Z_NULL and avail_in is large enough (the
+ exact value depends on the compression method), inflateInit determines the
+ compression method from the zlib header and allocates all data structures
+ accordingly; otherwise the allocation will be deferred to the first call of
+ inflate. If zalloc and zfree are set to Z_NULL, inflateInit updates them to
+ use default allocation functions.
+
+ inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough
+ memory, Z_VERSION_ERROR if the zlib library version is incompatible with the
+ version assumed by the caller, or Z_STREAM_ERROR if the parameters are
+ invalid, such as a null pointer to the structure. msg is set to null if
+ there is no error message. inflateInit does not perform any decompression
+ apart from possibly reading the zlib header if present: actual decompression
+ will be done by inflate(). (So next_in and avail_in may be modified, but
+ next_out and avail_out are unused and unchanged.) The current implementation
+ of inflateInit() does not process any header information -- that is deferred
+ until inflate() is called.
+*/
+
+
+ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush));
+/*
+ inflate decompresses as much data as possible, and stops when the input
+ buffer becomes empty or the output buffer becomes full. It may introduce
+ some output latency (reading input without producing any output) except when
+ forced to flush.
+
+ The detailed semantics are as follows. inflate performs one or both of the
+ following actions:
+
+ - Decompress more input starting at next_in and update next_in and avail_in
+ accordingly. If not all input can be processed (because there is not
+ enough room in the output buffer), next_in is updated and processing will
+ resume at this point for the next call of inflate().
+
+ - Provide more output starting at next_out and update next_out and avail_out
+ accordingly. inflate() provides as much output as possible, until there is
+ no more input data or no more space in the output buffer (see below about
+ the flush parameter).
+
+ Before the call of inflate(), the application should ensure that at least
+ one of the actions is possible, by providing more input and/or consuming more
+ output, and updating the next_* and avail_* values accordingly. The
+ application can consume the uncompressed output when it wants, for example
+ when the output buffer is full (avail_out == 0), or after each call of
+ inflate(). If inflate returns Z_OK and with zero avail_out, it must be
+ called again after making room in the output buffer because there might be
+ more output pending.
+
+ The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FINISH,
+ Z_BLOCK, or Z_TREES. Z_SYNC_FLUSH requests that inflate() flush as much
+ output as possible to the output buffer. Z_BLOCK requests that inflate()
+ stop if and when it gets to the next deflate block boundary. When decoding
+ the zlib or gzip format, this will cause inflate() to return immediately
+ after the header and before the first block. When doing a raw inflate,
+ inflate() will go ahead and process the first block, and will return when it
+ gets to the end of that block, or when it runs out of data.
+
+ The Z_BLOCK option assists in appending to or combining deflate streams.
+ Also to assist in this, on return inflate() will set strm->data_type to the
+ number of unused bits in the last byte taken from strm->next_in, plus 64 if
+ inflate() is currently decoding the last block in the deflate stream, plus
+ 128 if inflate() returned immediately after decoding an end-of-block code or
+ decoding the complete header up to just before the first byte of the deflate
+ stream. The end-of-block will not be indicated until all of the uncompressed
+ data from that block has been written to strm->next_out. The number of
+ unused bits may in general be greater than seven, except when bit 7 of
+ data_type is set, in which case the number of unused bits will be less than
+ eight. data_type is set as noted here every time inflate() returns for all
+ flush options, and so can be used to determine the amount of currently
+ consumed input in bits.
+
+ The Z_TREES option behaves as Z_BLOCK does, but it also returns when the
+ end of each deflate block header is reached, before any actual data in that
+ block is decoded. This allows the caller to determine the length of the
+ deflate block header for later use in random access within a deflate block.
+ 256 is added to the value of strm->data_type when inflate() returns
+ immediately after reaching the end of the deflate block header.
+
+ inflate() should normally be called until it returns Z_STREAM_END or an
+ error. However if all decompression is to be performed in a single step (a
+ single call of inflate), the parameter flush should be set to Z_FINISH. In
+ this case all pending input is processed and all pending output is flushed;
+ avail_out must be large enough to hold all of the uncompressed data for the
+ operation to complete. (The size of the uncompressed data may have been
+ saved by the compressor for this purpose.) The use of Z_FINISH is not
+ required to perform an inflation in one step. However it may be used to
+ inform inflate that a faster approach can be used for the single inflate()
+ call. Z_FINISH also informs inflate to not maintain a sliding window if the
+ stream completes, which reduces inflate's memory footprint. If the stream
+ does not complete, either because not all of the stream is provided or not
+ enough output space is provided, then a sliding window will be allocated and
+ inflate() can be called again to continue the operation as if Z_NO_FLUSH had
+ been used.
+
+ In this implementation, inflate() always flushes as much output as
+ possible to the output buffer, and always uses the faster approach on the
+ first call. So the effects of the flush parameter in this implementation are
+ on the return value of inflate() as noted below, when inflate() returns early
+ when Z_BLOCK or Z_TREES is used, and when inflate() avoids the allocation of
+ memory for a sliding window when Z_FINISH is used.
+
+ If a preset dictionary is needed after this call (see inflateSetDictionary
+ below), inflate sets strm->adler to the Adler-32 checksum of the dictionary
+ chosen by the compressor and returns Z_NEED_DICT; otherwise it sets
+ strm->adler to the Adler-32 checksum of all output produced so far (that is,
+ total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described
+ below. At the end of the stream, inflate() checks that its computed adler32
+ checksum is equal to that saved by the compressor and returns Z_STREAM_END
+ only if the checksum is correct.
+
+ inflate() can decompress and check either zlib-wrapped or gzip-wrapped
+ deflate data. The header type is detected automatically, if requested when
+ initializing with inflateInit2(). Any information contained in the gzip
+ header is not retained, so applications that need that information should
+ instead use raw inflate, see inflateInit2() below, or inflateBack() and
+ perform their own processing of the gzip header and trailer. When processing
+ gzip-wrapped deflate data, strm->adler32 is set to the CRC-32 of the output
+ producted so far. The CRC-32 is checked against the gzip trailer.
+
+ inflate() returns Z_OK if some progress has been made (more input processed
+ or more output produced), Z_STREAM_END if the end of the compressed data has
+ been reached and all uncompressed output has been produced, Z_NEED_DICT if a
+ preset dictionary is needed at this point, Z_DATA_ERROR if the input data was
+ corrupted (input stream not conforming to the zlib format or incorrect check
+ value), Z_STREAM_ERROR if the stream structure was inconsistent (for example
+ next_in or next_out was Z_NULL), Z_MEM_ERROR if there was not enough memory,
+ Z_BUF_ERROR if no progress is possible or if there was not enough room in the
+ output buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and
+ inflate() can be called again with more input and more output space to
+ continue decompressing. If Z_DATA_ERROR is returned, the application may
+ then call inflateSync() to look for a good compression block if a partial
+ recovery of the data is desired.
+*/
+
+
+ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm));
+/*
+ All dynamically allocated data structures for this stream are freed.
+ This function discards any unprocessed input and does not flush any pending
+ output.
+
+ inflateEnd returns Z_OK if success, Z_STREAM_ERROR if the stream state
+ was inconsistent. In the error case, msg may be set but then points to a
+ static string (which must not be deallocated).
+*/
+
+
+ /* Advanced functions */
+
+/*
+ The following functions are needed only in some special applications.
+*/
+
+/*
+ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm,
+ int level,
+ int method,
+ int windowBits,
+ int memLevel,
+ int strategy));
+
+ This is another version of deflateInit with more compression options. The
+ fields next_in, zalloc, zfree and opaque must be initialized before by the
+ caller.
+
+ The method parameter is the compression method. It must be Z_DEFLATED in
+ this version of the library.
+
+ The windowBits parameter is the base two logarithm of the window size
+ (the size of the history buffer). It should be in the range 8..15 for this
+ version of the library. Larger values of this parameter result in better
+ compression at the expense of memory usage. The default value is 15 if
+ deflateInit is used instead.
+
+ windowBits can also be -8..-15 for raw deflate. In this case, -windowBits
+ determines the window size. deflate() will then generate raw deflate data
+ with no zlib header or trailer, and will not compute an adler32 check value.
+
+ windowBits can also be greater than 15 for optional gzip encoding. Add
+ 16 to windowBits to write a simple gzip header and trailer around the
+ compressed data instead of a zlib wrapper. The gzip header will have no
+ file name, no extra data, no comment, no modification time (set to zero), no
+ header crc, and the operating system will be set to 255 (unknown). If a
+ gzip stream is being written, strm->adler is a crc32 instead of an adler32.
+
+ The memLevel parameter specifies how much memory should be allocated
+ for the internal compression state. memLevel=1 uses minimum memory but is
+ slow and reduces compression ratio; memLevel=9 uses maximum memory for
+ optimal speed. The default value is 8. See zconf.h for total memory usage
+ as a function of windowBits and memLevel.
+
+ The strategy parameter is used to tune the compression algorithm. Use the
+ value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a
+ filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no
+ string match), or Z_RLE to limit match distances to one (run-length
+ encoding). Filtered data consists mostly of small values with a somewhat
+ random distribution. In this case, the compression algorithm is tuned to
+ compress them better. The effect of Z_FILTERED is to force more Huffman
+ coding and less string matching; it is somewhat intermediate between
+ Z_DEFAULT_STRATEGY and Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as
+ fast as Z_HUFFMAN_ONLY, but give better compression for PNG image data. The
+ strategy parameter only affects the compression ratio but not the
+ correctness of the compressed output even if it is not set appropriately.
+ Z_FIXED prevents the use of dynamic Huffman codes, allowing for a simpler
+ decoder for special applications.
+
+ deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
+ memory, Z_STREAM_ERROR if any parameter is invalid (such as an invalid
+ method), or Z_VERSION_ERROR if the zlib library version (zlib_version) is
+ incompatible with the version assumed by the caller (ZLIB_VERSION). msg is
+ set to null if there is no error message. deflateInit2 does not perform any
+ compression: this will be done by deflate().
+*/
+
+ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm,
+ const Bytef *dictionary,
+ uInt dictLength));
+/*
+ Initializes the compression dictionary from the given byte sequence
+ without producing any compressed output. When using the zlib format, this
+ function must be called immediately after deflateInit, deflateInit2 or
+ deflateReset, and before any call of deflate. When doing raw deflate, this
+ function must be called either before any call of deflate, or immediately
+ after the completion of a deflate block, i.e. after all input has been
+ consumed and all output has been delivered when using any of the flush
+ options Z_BLOCK, Z_PARTIAL_FLUSH, Z_SYNC_FLUSH, or Z_FULL_FLUSH. The
+ compressor and decompressor must use exactly the same dictionary (see
+ inflateSetDictionary).
+
+ The dictionary should consist of strings (byte sequences) that are likely
+ to be encountered later in the data to be compressed, with the most commonly
+ used strings preferably put towards the end of the dictionary. Using a
+ dictionary is most useful when the data to be compressed is short and can be
+ predicted with good accuracy; the data can then be compressed better than
+ with the default empty dictionary.
+
+ Depending on the size of the compression data structures selected by
+ deflateInit or deflateInit2, a part of the dictionary may in effect be
+ discarded, for example if the dictionary is larger than the window size
+ provided in deflateInit or deflateInit2. Thus the strings most likely to be
+ useful should be put at the end of the dictionary, not at the front. In
+ addition, the current implementation of deflate will use at most the window
+ size minus 262 bytes of the provided dictionary.
+
+ Upon return of this function, strm->adler is set to the adler32 value
+ of the dictionary; the decompressor may later use this value to determine
+ which dictionary has been used by the compressor. (The adler32 value
+ applies to the whole dictionary even if only a subset of the dictionary is
+ actually used by the compressor.) If a raw deflate was requested, then the
+ adler32 value is not computed and strm->adler is not set.
+
+ deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a
+ parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is
+ inconsistent (for example if deflate has already been called for this stream
+ or if not at a block boundary for raw deflate). deflateSetDictionary does
+ not perform any compression: this will be done by deflate().
+*/
+
+ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest,
+ z_streamp source));
+/*
+ Sets the destination stream as a complete copy of the source stream.
+
+ This function can be useful when several compression strategies will be
+ tried, for example when there are several ways of pre-processing the input
+ data with a filter. The streams that will be discarded should then be freed
+ by calling deflateEnd. Note that deflateCopy duplicates the internal
+ compression state which can be quite large, so this strategy is slow and can
+ consume lots of memory.
+
+ deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not
+ enough memory, Z_STREAM_ERROR if the source stream state was inconsistent
+ (such as zalloc being Z_NULL). msg is left unchanged in both source and
+ destination.
+*/
+
+ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm));
+/*
+ This function is equivalent to deflateEnd followed by deflateInit,
+ but does not free and reallocate all the internal compression state. The
+ stream will keep the same compression level and any other attributes that
+ may have been set by deflateInit2.
+
+ deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
+ stream state was inconsistent (such as zalloc or state being Z_NULL).
+*/
+
+ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm,
+ int level,
+ int strategy));
+/*
+ Dynamically update the compression level and compression strategy. The
+ interpretation of level and strategy is as in deflateInit2. This can be
+ used to switch between compression and straight copy of the input data, or
+ to switch to a different kind of input data requiring a different strategy.
+ If the compression level is changed, the input available so far is
+ compressed with the old level (and may be flushed); the new level will take
+ effect only at the next call of deflate().
+
+ Before the call of deflateParams, the stream state must be set as for
+ a call of deflate(), since the currently available input may have to be
+ compressed and flushed. In particular, strm->avail_out must be non-zero.
+
+ deflateParams returns Z_OK if success, Z_STREAM_ERROR if the source
+ stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR if
+ strm->avail_out was zero.
+*/
+
+ZEXTERN int ZEXPORT deflateTune OF((z_streamp strm,
+ int good_length,
+ int max_lazy,
+ int nice_length,
+ int max_chain));
+/*
+ Fine tune deflate's internal compression parameters. This should only be
+ used by someone who understands the algorithm used by zlib's deflate for
+ searching for the best matching string, and even then only by the most
+ fanatic optimizer trying to squeeze out the last compressed bit for their
+ specific input data. Read the deflate.c source code for the meaning of the
+ max_lazy, good_length, nice_length, and max_chain parameters.
+
+ deflateTune() can be called after deflateInit() or deflateInit2(), and
+ returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream.
+ */
+
+ZEXTERN uLong ZEXPORT deflateBound OF((z_streamp strm,
+ uLong sourceLen));
+/*
+ deflateBound() returns an upper bound on the compressed size after
+ deflation of sourceLen bytes. It must be called after deflateInit() or
+ deflateInit2(), and after deflateSetHeader(), if used. This would be used
+ to allocate an output buffer for deflation in a single pass, and so would be
+ called before deflate(). If that first deflate() call is provided the
+ sourceLen input bytes, an output buffer allocated to the size returned by
+ deflateBound(), and the flush value Z_FINISH, then deflate() is guaranteed
+ to return Z_STREAM_END. Note that it is possible for the compressed size to
+ be larger than the value returned by deflateBound() if flush options other
+ than Z_FINISH or Z_NO_FLUSH are used.
+*/
+
+ZEXTERN int ZEXPORT deflatePending OF((z_streamp strm,
+ unsigned *pending,
+ int *bits));
+/*
+ deflatePending() returns the number of bytes and bits of output that have
+ been generated, but not yet provided in the available output. The bytes not
+ provided would be due to the available output space having being consumed.
+ The number of bits of output not provided are between 0 and 7, where they
+ await more bits to join them in order to fill out a full byte. If pending
+ or bits are Z_NULL, then those values are not set.
+
+ deflatePending returns Z_OK if success, or Z_STREAM_ERROR if the source
+ stream state was inconsistent.
+ */
+
+ZEXTERN int ZEXPORT deflatePrime OF((z_streamp strm,
+ int bits,
+ int value));
+/*
+ deflatePrime() inserts bits in the deflate output stream. The intent
+ is that this function is used to start off the deflate output with the bits
+ leftover from a previous deflate stream when appending to it. As such, this
+ function can only be used for raw deflate, and must be used before the first
+ deflate() call after a deflateInit2() or deflateReset(). bits must be less
+ than or equal to 16, and that many of the least significant bits of value
+ will be inserted in the output.
+
+ deflatePrime returns Z_OK if success, Z_BUF_ERROR if there was not enough
+ room in the internal buffer to insert the bits, or Z_STREAM_ERROR if the
+ source stream state was inconsistent.
+*/
+
+ZEXTERN int ZEXPORT deflateSetHeader OF((z_streamp strm,
+ gz_headerp head));
+/*
+ deflateSetHeader() provides gzip header information for when a gzip
+ stream is requested by deflateInit2(). deflateSetHeader() may be called
+ after deflateInit2() or deflateReset() and before the first call of
+ deflate(). The text, time, os, extra field, name, and comment information
+ in the provided gz_header structure are written to the gzip header (xflag is
+ ignored -- the extra flags are set according to the compression level). The
+ caller must assure that, if not Z_NULL, name and comment are terminated with
+ a zero byte, and that if extra is not Z_NULL, that extra_len bytes are
+ available there. If hcrc is true, a gzip header crc is included. Note that
+ the current versions of the command-line version of gzip (up through version
+ 1.3.x) do not support header crc's, and will report that it is a "multi-part
+ gzip file" and give up.
+
+ If deflateSetHeader is not used, the default gzip header has text false,
+ the time set to zero, and os set to 255, with no extra, name, or comment
+ fields. The gzip header is returned to the default state by deflateReset().
+
+ deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source
+ stream state was inconsistent.
+*/
+
+/*
+ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm,
+ int windowBits));
+
+ This is another version of inflateInit with an extra parameter. The
+ fields next_in, avail_in, zalloc, zfree and opaque must be initialized
+ before by the caller.
+
+ The windowBits parameter is the base two logarithm of the maximum window
+ size (the size of the history buffer). It should be in the range 8..15 for
+ this version of the library. The default value is 15 if inflateInit is used
+ instead. windowBits must be greater than or equal to the windowBits value
+ provided to deflateInit2() while compressing, or it must be equal to 15 if
+ deflateInit2() was not used. If a compressed stream with a larger window
+ size is given as input, inflate() will return with the error code
+ Z_DATA_ERROR instead of trying to allocate a larger window.
+
+ windowBits can also be zero to request that inflate use the window size in
+ the zlib header of the compressed stream.
+
+ windowBits can also be -8..-15 for raw inflate. In this case, -windowBits
+ determines the window size. inflate() will then process raw deflate data,
+ not looking for a zlib or gzip header, not generating a check value, and not
+ looking for any check values for comparison at the end of the stream. This
+ is for use with other formats that use the deflate compressed data format
+ such as zip. Those formats provide their own check values. If a custom
+ format is developed using the raw deflate format for compressed data, it is
+ recommended that a check value such as an adler32 or a crc32 be applied to
+ the uncompressed data as is done in the zlib, gzip, and zip formats. For
+ most applications, the zlib format should be used as is. Note that comments
+ above on the use in deflateInit2() applies to the magnitude of windowBits.
+
+ windowBits can also be greater than 15 for optional gzip decoding. Add
+ 32 to windowBits to enable zlib and gzip decoding with automatic header
+ detection, or add 16 to decode only the gzip format (the zlib format will
+ return a Z_DATA_ERROR). If a gzip stream is being decoded, strm->adler is a
+ crc32 instead of an adler32.
+
+ inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
+ memory, Z_VERSION_ERROR if the zlib library version is incompatible with the
+ version assumed by the caller, or Z_STREAM_ERROR if the parameters are
+ invalid, such as a null pointer to the structure. msg is set to null if
+ there is no error message. inflateInit2 does not perform any decompression
+ apart from possibly reading the zlib header if present: actual decompression
+ will be done by inflate(). (So next_in and avail_in may be modified, but
+ next_out and avail_out are unused and unchanged.) The current implementation
+ of inflateInit2() does not process any header information -- that is
+ deferred until inflate() is called.
+*/
+
+ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm,
+ const Bytef *dictionary,
+ uInt dictLength));
+/*
+ Initializes the decompression dictionary from the given uncompressed byte
+ sequence. This function must be called immediately after a call of inflate,
+ if that call returned Z_NEED_DICT. The dictionary chosen by the compressor
+ can be determined from the adler32 value returned by that call of inflate.
+ The compressor and decompressor must use exactly the same dictionary (see
+ deflateSetDictionary). For raw inflate, this function can be called at any
+ time to set the dictionary. If the provided dictionary is smaller than the
+ window and there is already data in the window, then the provided dictionary
+ will amend what's there. The application must insure that the dictionary
+ that was used for compression is provided.
+
+ inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a
+ parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is
+ inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the
+ expected one (incorrect adler32 value). inflateSetDictionary does not
+ perform any decompression: this will be done by subsequent calls of
+ inflate().
+*/
+
+ZEXTERN int ZEXPORT inflateGetDictionary OF((z_streamp strm,
+ Bytef *dictionary,
+ uInt *dictLength));
+/*
+ Returns the sliding dictionary being maintained by inflate. dictLength is
+ set to the number of bytes in the dictionary, and that many bytes are copied
+ to dictionary. dictionary must have enough space, where 32768 bytes is
+ always enough. If inflateGetDictionary() is called with dictionary equal to
+ Z_NULL, then only the dictionary length is returned, and nothing is copied.
+ Similary, if dictLength is Z_NULL, then it is not set.
+
+ inflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the
+ stream state is inconsistent.
+*/
+
+ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm));
+/*
+ Skips invalid compressed data until a possible full flush point (see above
+ for the description of deflate with Z_FULL_FLUSH) can be found, or until all
+ available input is skipped. No output is provided.
+
+ inflateSync searches for a 00 00 FF FF pattern in the compressed data.
+ All full flush points have this pattern, but not all occurrences of this
+ pattern are full flush points.
+
+ inflateSync returns Z_OK if a possible full flush point has been found,
+ Z_BUF_ERROR if no more input was provided, Z_DATA_ERROR if no flush point
+ has been found, or Z_STREAM_ERROR if the stream structure was inconsistent.
+ In the success case, the application may save the current current value of
+ total_in which indicates where valid compressed data was found. In the
+ error case, the application may repeatedly call inflateSync, providing more
+ input each time, until success or end of the input data.
+*/
+
+ZEXTERN int ZEXPORT inflateCopy OF((z_streamp dest,
+ z_streamp source));
+/*
+ Sets the destination stream as a complete copy of the source stream.
+
+ This function can be useful when randomly accessing a large stream. The
+ first pass through the stream can periodically record the inflate state,
+ allowing restarting inflate at those points when randomly accessing the
+ stream.
+
+ inflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not
+ enough memory, Z_STREAM_ERROR if the source stream state was inconsistent
+ (such as zalloc being Z_NULL). msg is left unchanged in both source and
+ destination.
+*/
+
+ZEXTERN int ZEXPORT inflateReset OF((z_streamp strm));
+/*
+ This function is equivalent to inflateEnd followed by inflateInit,
+ but does not free and reallocate all the internal decompression state. The
+ stream will keep attributes that may have been set by inflateInit2.
+
+ inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
+ stream state was inconsistent (such as zalloc or state being Z_NULL).
+*/
+
+ZEXTERN int ZEXPORT inflateReset2 OF((z_streamp strm,
+ int windowBits));
+/*
+ This function is the same as inflateReset, but it also permits changing
+ the wrap and window size requests. The windowBits parameter is interpreted
+ the same as it is for inflateInit2.
+
+ inflateReset2 returns Z_OK if success, or Z_STREAM_ERROR if the source
+ stream state was inconsistent (such as zalloc or state being Z_NULL), or if
+ the windowBits parameter is invalid.
+*/
+
+ZEXTERN int ZEXPORT inflatePrime OF((z_streamp strm,
+ int bits,
+ int value));
+/*
+ This function inserts bits in the inflate input stream. The intent is
+ that this function is used to start inflating at a bit position in the
+ middle of a byte. The provided bits will be used before any bytes are used
+ from next_in. This function should only be used with raw inflate, and
+ should be used before the first inflate() call after inflateInit2() or
+ inflateReset(). bits must be less than or equal to 16, and that many of the
+ least significant bits of value will be inserted in the input.
+
+ If bits is negative, then the input stream bit buffer is emptied. Then
+ inflatePrime() can be called again to put bits in the buffer. This is used
+ to clear out bits leftover after feeding inflate a block description prior
+ to feeding inflate codes.
+
+ inflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source
+ stream state was inconsistent.
+*/
+
+ZEXTERN long ZEXPORT inflateMark OF((z_streamp strm));
+/*
+ This function returns two values, one in the lower 16 bits of the return
+ value, and the other in the remaining upper bits, obtained by shifting the
+ return value down 16 bits. If the upper value is -1 and the lower value is
+ zero, then inflate() is currently decoding information outside of a block.
+ If the upper value is -1 and the lower value is non-zero, then inflate is in
+ the middle of a stored block, with the lower value equaling the number of
+ bytes from the input remaining to copy. If the upper value is not -1, then
+ it is the number of bits back from the current bit position in the input of
+ the code (literal or length/distance pair) currently being processed. In
+ that case the lower value is the number of bytes already emitted for that
+ code.
+
+ A code is being processed if inflate is waiting for more input to complete
+ decoding of the code, or if it has completed decoding but is waiting for
+ more output space to write the literal or match data.
+
+ inflateMark() is used to mark locations in the input data for random
+ access, which may be at bit positions, and to note those cases where the
+ output of a code may span boundaries of random access blocks. The current
+ location in the input stream can be determined from avail_in and data_type
+ as noted in the description for the Z_BLOCK flush parameter for inflate.
+
+ inflateMark returns the value noted above or -1 << 16 if the provided
+ source stream state was inconsistent.
+*/
+
+ZEXTERN int ZEXPORT inflateGetHeader OF((z_streamp strm,
+ gz_headerp head));
+/*
+ inflateGetHeader() requests that gzip header information be stored in the
+ provided gz_header structure. inflateGetHeader() may be called after
+ inflateInit2() or inflateReset(), and before the first call of inflate().
+ As inflate() processes the gzip stream, head->done is zero until the header
+ is completed, at which time head->done is set to one. If a zlib stream is
+ being decoded, then head->done is set to -1 to indicate that there will be
+ no gzip header information forthcoming. Note that Z_BLOCK or Z_TREES can be
+ used to force inflate() to return immediately after header processing is
+ complete and before any actual data is decompressed.
+
+ The text, time, xflags, and os fields are filled in with the gzip header
+ contents. hcrc is set to true if there is a header CRC. (The header CRC
+ was valid if done is set to one.) If extra is not Z_NULL, then extra_max
+ contains the maximum number of bytes to write to extra. Once done is true,
+ extra_len contains the actual extra field length, and extra contains the
+ extra field, or that field truncated if extra_max is less than extra_len.
+ If name is not Z_NULL, then up to name_max characters are written there,
+ terminated with a zero unless the length is greater than name_max. If
+ comment is not Z_NULL, then up to comm_max characters are written there,
+ terminated with a zero unless the length is greater than comm_max. When any
+ of extra, name, or comment are not Z_NULL and the respective field is not
+ present in the header, then that field is set to Z_NULL to signal its
+ absence. This allows the use of deflateSetHeader() with the returned
+ structure to duplicate the header. However if those fields are set to
+ allocated memory, then the application will need to save those pointers
+ elsewhere so that they can be eventually freed.
+
+ If inflateGetHeader is not used, then the header information is simply
+ discarded. The header is always checked for validity, including the header
+ CRC if present. inflateReset() will reset the process to discard the header
+ information. The application would need to call inflateGetHeader() again to
+ retrieve the header from the next gzip stream.
+
+ inflateGetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source
+ stream state was inconsistent.
+*/
+
+/*
+ZEXTERN int ZEXPORT inflateBackInit OF((z_streamp strm, int windowBits,
+ unsigned char FAR *window));
+
+ Initialize the internal stream state for decompression using inflateBack()
+ calls. The fields zalloc, zfree and opaque in strm must be initialized
+ before the call. If zalloc and zfree are Z_NULL, then the default library-
+ derived memory allocation routines are used. windowBits is the base two
+ logarithm of the window size, in the range 8..15. window is a caller
+ supplied buffer of that size. Except for special applications where it is
+ assured that deflate was used with small window sizes, windowBits must be 15
+ and a 32K byte window must be supplied to be able to decompress general
+ deflate streams.
+
+ See inflateBack() for the usage of these routines.
+
+ inflateBackInit will return Z_OK on success, Z_STREAM_ERROR if any of
+ the parameters are invalid, Z_MEM_ERROR if the internal state could not be
+ allocated, or Z_VERSION_ERROR if the version of the library does not match
+ the version of the header file.
+*/
+
+typedef unsigned (*in_func) OF((void FAR *,
+ z_const unsigned char FAR * FAR *));
+typedef int (*out_func) OF((void FAR *, unsigned char FAR *, unsigned));
+
+ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm,
+ in_func in, void FAR *in_desc,
+ out_func out, void FAR *out_desc));
+/*
+ inflateBack() does a raw inflate with a single call using a call-back
+ interface for input and output. This is potentially more efficient than
+ inflate() for file i/o applications, in that it avoids copying between the
+ output and the sliding window by simply making the window itself the output
+ buffer. inflate() can be faster on modern CPUs when used with large
+ buffers. inflateBack() trusts the application to not change the output
+ buffer passed by the output function, at least until inflateBack() returns.
+
+ inflateBackInit() must be called first to allocate the internal state
+ and to initialize the state with the user-provided window buffer.
+ inflateBack() may then be used multiple times to inflate a complete, raw
+ deflate stream with each call. inflateBackEnd() is then called to free the
+ allocated state.
+
+ A raw deflate stream is one with no zlib or gzip header or trailer.
+ This routine would normally be used in a utility that reads zip or gzip
+ files and writes out uncompressed files. The utility would decode the
+ header and process the trailer on its own, hence this routine expects only
+ the raw deflate stream to decompress. This is different from the normal
+ behavior of inflate(), which expects either a zlib or gzip header and
+ trailer around the deflate stream.
+
+ inflateBack() uses two subroutines supplied by the caller that are then
+ called by inflateBack() for input and output. inflateBack() calls those
+ routines until it reads a complete deflate stream and writes out all of the
+ uncompressed data, or until it encounters an error. The function's
+ parameters and return types are defined above in the in_func and out_func
+ typedefs. inflateBack() will call in(in_desc, &buf) which should return the
+ number of bytes of provided input, and a pointer to that input in buf. If
+ there is no input available, in() must return zero--buf is ignored in that
+ case--and inflateBack() will return a buffer error. inflateBack() will call
+ out(out_desc, buf, len) to write the uncompressed data buf[0..len-1]. out()
+ should return zero on success, or non-zero on failure. If out() returns
+ non-zero, inflateBack() will return with an error. Neither in() nor out()
+ are permitted to change the contents of the window provided to
+ inflateBackInit(), which is also the buffer that out() uses to write from.
+ The length written by out() will be at most the window size. Any non-zero
+ amount of input may be provided by in().
+
+ For convenience, inflateBack() can be provided input on the first call by
+ setting strm->next_in and strm->avail_in. If that input is exhausted, then
+ in() will be called. Therefore strm->next_in must be initialized before
+ calling inflateBack(). If strm->next_in is Z_NULL, then in() will be called
+ immediately for input. If strm->next_in is not Z_NULL, then strm->avail_in
+ must also be initialized, and then if strm->avail_in is not zero, input will
+ initially be taken from strm->next_in[0 .. strm->avail_in - 1].
+
+ The in_desc and out_desc parameters of inflateBack() is passed as the
+ first parameter of in() and out() respectively when they are called. These
+ descriptors can be optionally used to pass any information that the caller-
+ supplied in() and out() functions need to do their job.
+
+ On return, inflateBack() will set strm->next_in and strm->avail_in to
+ pass back any unused input that was provided by the last in() call. The
+ return values of inflateBack() can be Z_STREAM_END on success, Z_BUF_ERROR
+ if in() or out() returned an error, Z_DATA_ERROR if there was a format error
+ in the deflate stream (in which case strm->msg is set to indicate the nature
+ of the error), or Z_STREAM_ERROR if the stream was not properly initialized.
+ In the case of Z_BUF_ERROR, an input or output error can be distinguished
+ using strm->next_in which will be Z_NULL only if in() returned an error. If
+ strm->next_in is not Z_NULL, then the Z_BUF_ERROR was due to out() returning
+ non-zero. (in() will always be called before out(), so strm->next_in is
+ assured to be defined if out() returns non-zero.) Note that inflateBack()
+ cannot return Z_OK.
+*/
+
+ZEXTERN int ZEXPORT inflateBackEnd OF((z_streamp strm));
+/*
+ All memory allocated by inflateBackInit() is freed.
+
+ inflateBackEnd() returns Z_OK on success, or Z_STREAM_ERROR if the stream
+ state was inconsistent.
+*/
+
+ZEXTERN uLong ZEXPORT zlibCompileFlags OF((void));
+/* Return flags indicating compile-time options.
+
+ Type sizes, two bits each, 00 = 16 bits, 01 = 32, 10 = 64, 11 = other:
+ 1.0: size of uInt
+ 3.2: size of uLong
+ 5.4: size of voidpf (pointer)
+ 7.6: size of z_off_t
+
+ Compiler, assembler, and debug options:
+ 8: DEBUG
+ 9: ASMV or ASMINF -- use ASM code
+ 10: ZLIB_WINAPI -- exported functions use the WINAPI calling convention
+ 11: 0 (reserved)
+
+ One-time table building (smaller code, but not thread-safe if true):
+ 12: BUILDFIXED -- build static block decoding tables when needed
+ 13: DYNAMIC_CRC_TABLE -- build CRC calculation tables when needed
+ 14,15: 0 (reserved)
+
+ Library content (indicates missing functionality):
+ 16: NO_GZCOMPRESS -- gz* functions cannot compress (to avoid linking
+ deflate code when not needed)
+ 17: NO_GZIP -- deflate can't write gzip streams, and inflate can't detect
+ and decode gzip streams (to avoid linking crc code)
+ 18-19: 0 (reserved)
+
+ Operation variations (changes in library functionality):
+ 20: PKZIP_BUG_WORKAROUND -- slightly more permissive inflate
+ 21: FASTEST -- deflate algorithm with only one, lowest compression level
+ 22,23: 0 (reserved)
+
+ The sprintf variant used by gzprintf (zero is best):
+ 24: 0 = vs*, 1 = s* -- 1 means limited to 20 arguments after the format
+ 25: 0 = *nprintf, 1 = *printf -- 1 means gzprintf() not secure!
+ 26: 0 = returns value, 1 = void -- 1 means inferred string length returned
+
+ Remainder:
+ 27-31: 0 (reserved)
+ */
+
+#ifndef Z_SOLO
+
+ /* utility functions */
+
+/*
+ The following utility functions are implemented on top of the basic
+ stream-oriented functions. To simplify the interface, some default options
+ are assumed (compression level and memory usage, standard memory allocation
+ functions). The source code of these utility functions can be modified if
+ you need special options.
+*/
+
+ZEXTERN int ZEXPORT compress OF((Bytef *dest, uLongf *destLen,
+ const Bytef *source, uLong sourceLen));
+/*
+ Compresses the source buffer into the destination buffer. sourceLen is
+ the byte length of the source buffer. Upon entry, destLen is the total size
+ of the destination buffer, which must be at least the value returned by
+ compressBound(sourceLen). Upon exit, destLen is the actual size of the
+ compressed buffer.
+
+ compress returns Z_OK if success, Z_MEM_ERROR if there was not
+ enough memory, Z_BUF_ERROR if there was not enough room in the output
+ buffer.
+*/
+
+ZEXTERN int ZEXPORT compress2 OF((Bytef *dest, uLongf *destLen,
+ const Bytef *source, uLong sourceLen,
+ int level));
+/*
+ Compresses the source buffer into the destination buffer. The level
+ parameter has the same meaning as in deflateInit. sourceLen is the byte
+ length of the source buffer. Upon entry, destLen is the total size of the
+ destination buffer, which must be at least the value returned by
+ compressBound(sourceLen). Upon exit, destLen is the actual size of the
+ compressed buffer.
+
+ compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
+ memory, Z_BUF_ERROR if there was not enough room in the output buffer,
+ Z_STREAM_ERROR if the level parameter is invalid.
+*/
+
+ZEXTERN uLong ZEXPORT compressBound OF((uLong sourceLen));
+/*
+ compressBound() returns an upper bound on the compressed size after
+ compress() or compress2() on sourceLen bytes. It would be used before a
+ compress() or compress2() call to allocate the destination buffer.
+*/
+
+ZEXTERN int ZEXPORT uncompress OF((Bytef *dest, uLongf *destLen,
+ const Bytef *source, uLong sourceLen));
+/*
+ Decompresses the source buffer into the destination buffer. sourceLen is
+ the byte length of the source buffer. Upon entry, destLen is the total size
+ of the destination buffer, which must be large enough to hold the entire
+ uncompressed data. (The size of the uncompressed data must have been saved
+ previously by the compressor and transmitted to the decompressor by some
+ mechanism outside the scope of this compression library.) Upon exit, destLen
+ is the actual size of the uncompressed buffer.
+
+ uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
+ enough memory, Z_BUF_ERROR if there was not enough room in the output
+ buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete. In
+ the case where there is not enough room, uncompress() will fill the output
+ buffer with the uncompressed data up to that point.
+*/
+
+ /* gzip file access functions */
+
+/*
+ This library supports reading and writing files in gzip (.gz) format with
+ an interface similar to that of stdio, using the functions that start with
+ "gz". The gzip format is different from the zlib format. gzip is a gzip
+ wrapper, documented in RFC 1952, wrapped around a deflate stream.
+*/
+
+typedef struct gzFile_s *gzFile; /* semi-opaque gzip file descriptor */
+
+/*
+ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode));
+
+ Opens a gzip (.gz) file for reading or writing. The mode parameter is as
+ in fopen ("rb" or "wb") but can also include a compression level ("wb9") or
+ a strategy: 'f' for filtered data as in "wb6f", 'h' for Huffman-only
+ compression as in "wb1h", 'R' for run-length encoding as in "wb1R", or 'F'
+ for fixed code compression as in "wb9F". (See the description of
+ deflateInit2 for more information about the strategy parameter.) 'T' will
+ request transparent writing or appending with no compression and not using
+ the gzip format.
+
+ "a" can be used instead of "w" to request that the gzip stream that will
+ be written be appended to the file. "+" will result in an error, since
+ reading and writing to the same gzip file is not supported. The addition of
+ "x" when writing will create the file exclusively, which fails if the file
+ already exists. On systems that support it, the addition of "e" when
+ reading or writing will set the flag to close the file on an execve() call.
+
+ These functions, as well as gzip, will read and decode a sequence of gzip
+ streams in a file. The append function of gzopen() can be used to create
+ such a file. (Also see gzflush() for another way to do this.) When
+ appending, gzopen does not test whether the file begins with a gzip stream,
+ nor does it look for the end of the gzip streams to begin appending. gzopen
+ will simply append a gzip stream to the existing file.
+
+ gzopen can be used to read a file which is not in gzip format; in this
+ case gzread will directly read from the file without decompression. When
+ reading, this will be detected automatically by looking for the magic two-
+ byte gzip header.
+
+ gzopen returns NULL if the file could not be opened, if there was
+ insufficient memory to allocate the gzFile state, or if an invalid mode was
+ specified (an 'r', 'w', or 'a' was not provided, or '+' was provided).
+ errno can be checked to determine if the reason gzopen failed was that the
+ file could not be opened.
+*/
+
+ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode));
+/*
+ gzdopen associates a gzFile with the file descriptor fd. File descriptors
+ are obtained from calls like open, dup, creat, pipe or fileno (if the file
+ has been previously opened with fopen). The mode parameter is as in gzopen.
+
+ The next call of gzclose on the returned gzFile will also close the file
+ descriptor fd, just like fclose(fdopen(fd, mode)) closes the file descriptor
+ fd. If you want to keep fd open, use fd = dup(fd_keep); gz = gzdopen(fd,
+ mode);. The duplicated descriptor should be saved to avoid a leak, since
+ gzdopen does not close fd if it fails. If you are using fileno() to get the
+ file descriptor from a FILE *, then you will have to use dup() to avoid
+ double-close()ing the file descriptor. Both gzclose() and fclose() will
+ close the associated file descriptor, so they need to have different file
+ descriptors.
+
+ gzdopen returns NULL if there was insufficient memory to allocate the
+ gzFile state, if an invalid mode was specified (an 'r', 'w', or 'a' was not
+ provided, or '+' was provided), or if fd is -1. The file descriptor is not
+ used until the next gz* read, write, seek, or close operation, so gzdopen
+ will not detect if fd is invalid (unless fd is -1).
+*/
+
+ZEXTERN int ZEXPORT gzbuffer OF((gzFile file, unsigned size));
+/*
+ Set the internal buffer size used by this library's functions. The
+ default buffer size is 8192 bytes. This function must be called after
+ gzopen() or gzdopen(), and before any other calls that read or write the
+ file. The buffer memory allocation is always deferred to the first read or
+ write. Two buffers are allocated, either both of the specified size when
+ writing, or one of the specified size and the other twice that size when
+ reading. A larger buffer size of, for example, 64K or 128K bytes will
+ noticeably increase the speed of decompression (reading).
+
+ The new buffer size also affects the maximum length for gzprintf().
+
+ gzbuffer() returns 0 on success, or -1 on failure, such as being called
+ too late.
+*/
+
+ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy));
+/*
+ Dynamically update the compression level or strategy. See the description
+ of deflateInit2 for the meaning of these parameters.
+
+ gzsetparams returns Z_OK if success, or Z_STREAM_ERROR if the file was not
+ opened for writing.
+*/
+
+ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len));
+/*
+ Reads the given number of uncompressed bytes from the compressed file. If
+ the input file is not in gzip format, gzread copies the given number of
+ bytes into the buffer directly from the file.
+
+ After reaching the end of a gzip stream in the input, gzread will continue
+ to read, looking for another gzip stream. Any number of gzip streams may be
+ concatenated in the input file, and will all be decompressed by gzread().
+ If something other than a gzip stream is encountered after a gzip stream,
+ that remaining trailing garbage is ignored (and no error is returned).
+
+ gzread can be used to read a gzip file that is being concurrently written.
+ Upon reaching the end of the input, gzread will return with the available
+ data. If the error code returned by gzerror is Z_OK or Z_BUF_ERROR, then
+ gzclearerr can be used to clear the end of file indicator in order to permit
+ gzread to be tried again. Z_OK indicates that a gzip stream was completed
+ on the last gzread. Z_BUF_ERROR indicates that the input file ended in the
+ middle of a gzip stream. Note that gzread does not return -1 in the event
+ of an incomplete gzip stream. This error is deferred until gzclose(), which
+ will return Z_BUF_ERROR if the last gzread ended in the middle of a gzip
+ stream. Alternatively, gzerror can be used before gzclose to detect this
+ case.
+
+ gzread returns the number of uncompressed bytes actually read, less than
+ len for end of file, or -1 for error.
+*/
+
+ZEXTERN int ZEXPORT gzwrite OF((gzFile file,
+ voidpc buf, unsigned len));
+/*
+ Writes the given number of uncompressed bytes into the compressed file.
+ gzwrite returns the number of uncompressed bytes written or 0 in case of
+ error.
+*/
+
+ZEXTERN int ZEXPORTVA gzprintf Z_ARG((gzFile file, const char *format, ...));
+/*
+ Converts, formats, and writes the arguments to the compressed file under
+ control of the format string, as in fprintf. gzprintf returns the number of
+ uncompressed bytes actually written, or 0 in case of error. The number of
+ uncompressed bytes written is limited to 8191, or one less than the buffer
+ size given to gzbuffer(). The caller should assure that this limit is not
+ exceeded. If it is exceeded, then gzprintf() will return an error (0) with
+ nothing written. In this case, there may also be a buffer overflow with
+ unpredictable consequences, which is possible only if zlib was compiled with
+ the insecure functions sprintf() or vsprintf() because the secure snprintf()
+ or vsnprintf() functions were not available. This can be determined using
+ zlibCompileFlags().
+*/
+
+ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s));
+/*
+ Writes the given null-terminated string to the compressed file, excluding
+ the terminating null character.
+
+ gzputs returns the number of characters written, or -1 in case of error.
+*/
+
+ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len));
+/*
+ Reads bytes from the compressed file until len-1 characters are read, or a
+ newline character is read and transferred to buf, or an end-of-file
+ condition is encountered. If any characters are read or if len == 1, the
+ string is terminated with a null character. If no characters are read due
+ to an end-of-file or len < 1, then the buffer is left untouched.
+
+ gzgets returns buf which is a null-terminated string, or it returns NULL
+ for end-of-file or in case of error. If there was an error, the contents at
+ buf are indeterminate.
+*/
+
+ZEXTERN int ZEXPORT gzputc OF((gzFile file, int c));
+/*
+ Writes c, converted to an unsigned char, into the compressed file. gzputc
+ returns the value that was written, or -1 in case of error.
+*/
+
+ZEXTERN int ZEXPORT gzgetc OF((gzFile file));
+/*
+ Reads one byte from the compressed file. gzgetc returns this byte or -1
+ in case of end of file or error. This is implemented as a macro for speed.
+ As such, it does not do all of the checking the other functions do. I.e.
+ it does not check to see if file is NULL, nor whether the structure file
+ points to has been clobbered or not.
+*/
+
+ZEXTERN int ZEXPORT gzungetc OF((int c, gzFile file));
+/*
+ Push one character back onto the stream to be read as the first character
+ on the next read. At least one character of push-back is allowed.
+ gzungetc() returns the character pushed, or -1 on failure. gzungetc() will
+ fail if c is -1, and may fail if a character has been pushed but not read
+ yet. If gzungetc is used immediately after gzopen or gzdopen, at least the
+ output buffer size of pushed characters is allowed. (See gzbuffer above.)
+ The pushed character will be discarded if the stream is repositioned with
+ gzseek() or gzrewind().
+*/
+
+ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush));
+/*
+ Flushes all pending output into the compressed file. The parameter flush
+ is as in the deflate() function. The return value is the zlib error number
+ (see function gzerror below). gzflush is only permitted when writing.
+
+ If the flush parameter is Z_FINISH, the remaining data is written and the
+ gzip stream is completed in the output. If gzwrite() is called again, a new
+ gzip stream will be started in the output. gzread() is able to read such
+ concatented gzip streams.
+
+ gzflush should be called only when strictly necessary because it will
+ degrade compression if called too often.
+*/
+
+/*
+ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file,
+ z_off_t offset, int whence));
+
+ Sets the starting position for the next gzread or gzwrite on the given
+ compressed file. The offset represents a number of bytes in the
+ uncompressed data stream. The whence parameter is defined as in lseek(2);
+ the value SEEK_END is not supported.
+
+ If the file is opened for reading, this function is emulated but can be
+ extremely slow. If the file is opened for writing, only forward seeks are
+ supported; gzseek then compresses a sequence of zeroes up to the new
+ starting position.
+
+ gzseek returns the resulting offset location as measured in bytes from
+ the beginning of the uncompressed stream, or -1 in case of error, in
+ particular if the file is opened for writing and the new starting position
+ would be before the current position.
+*/
+
+ZEXTERN int ZEXPORT gzrewind OF((gzFile file));
+/*
+ Rewinds the given file. This function is supported only for reading.
+
+ gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET)
+*/
+
+/*
+ZEXTERN z_off_t ZEXPORT gztell OF((gzFile file));
+
+ Returns the starting position for the next gzread or gzwrite on the given
+ compressed file. This position represents a number of bytes in the
+ uncompressed data stream, and is zero when starting, even if appending or
+ reading a gzip stream from the middle of a file using gzdopen().
+
+ gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR)
+*/
+
+/*
+ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile file));
+
+ Returns the current offset in the file being read or written. This offset
+ includes the count of bytes that precede the gzip stream, for example when
+ appending or when using gzdopen() for reading. When reading, the offset
+ does not include as yet unused buffered input. This information can be used
+ for a progress indicator. On error, gzoffset() returns -1.
+*/
+
+ZEXTERN int ZEXPORT gzeof OF((gzFile file));
+/*
+ Returns true (1) if the end-of-file indicator has been set while reading,
+ false (0) otherwise. Note that the end-of-file indicator is set only if the
+ read tried to go past the end of the input, but came up short. Therefore,
+ just like feof(), gzeof() may return false even if there is no more data to
+ read, in the event that the last read request was for the exact number of
+ bytes remaining in the input file. This will happen if the input file size
+ is an exact multiple of the buffer size.
+
+ If gzeof() returns true, then the read functions will return no more data,
+ unless the end-of-file indicator is reset by gzclearerr() and the input file
+ has grown since the previous end of file was detected.
+*/
+
+ZEXTERN int ZEXPORT gzdirect OF((gzFile file));
+/*
+ Returns true (1) if file is being copied directly while reading, or false
+ (0) if file is a gzip stream being decompressed.
+
+ If the input file is empty, gzdirect() will return true, since the input
+ does not contain a gzip stream.
+
+ If gzdirect() is used immediately after gzopen() or gzdopen() it will
+ cause buffers to be allocated to allow reading the file to determine if it
+ is a gzip file. Therefore if gzbuffer() is used, it should be called before
+ gzdirect().
+
+ When writing, gzdirect() returns true (1) if transparent writing was
+ requested ("wT" for the gzopen() mode), or false (0) otherwise. (Note:
+ gzdirect() is not needed when writing. Transparent writing must be
+ explicitly requested, so the application already knows the answer. When
+ linking statically, using gzdirect() will include all of the zlib code for
+ gzip file reading and decompression, which may not be desired.)
+*/
+
+ZEXTERN int ZEXPORT gzclose OF((gzFile file));
+/*
+ Flushes all pending output if necessary, closes the compressed file and
+ deallocates the (de)compression state. Note that once file is closed, you
+ cannot call gzerror with file, since its structures have been deallocated.
+ gzclose must not be called more than once on the same file, just as free
+ must not be called more than once on the same allocation.
+
+ gzclose will return Z_STREAM_ERROR if file is not valid, Z_ERRNO on a
+ file operation error, Z_MEM_ERROR if out of memory, Z_BUF_ERROR if the
+ last read ended in the middle of a gzip stream, or Z_OK on success.
+*/
+
+ZEXTERN int ZEXPORT gzclose_r OF((gzFile file));
+ZEXTERN int ZEXPORT gzclose_w OF((gzFile file));
+/*
+ Same as gzclose(), but gzclose_r() is only for use when reading, and
+ gzclose_w() is only for use when writing or appending. The advantage to
+ using these instead of gzclose() is that they avoid linking in zlib
+ compression or decompression code that is not used when only reading or only
+ writing respectively. If gzclose() is used, then both compression and
+ decompression code will be included the application when linking to a static
+ zlib library.
+*/
+
+ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum));
+/*
+ Returns the error message for the last error which occurred on the given
+ compressed file. errnum is set to zlib error number. If an error occurred
+ in the file system and not in the compression library, errnum is set to
+ Z_ERRNO and the application may consult errno to get the exact error code.
+
+ The application must not modify the returned string. Future calls to
+ this function may invalidate the previously returned string. If file is
+ closed, then the string previously returned by gzerror will no longer be
+ available.
+
+ gzerror() should be used to distinguish errors from end-of-file for those
+ functions above that do not distinguish those cases in their return values.
+*/
+
+ZEXTERN void ZEXPORT gzclearerr OF((gzFile file));
+/*
+ Clears the error and end-of-file flags for file. This is analogous to the
+ clearerr() function in stdio. This is useful for continuing to read a gzip
+ file that is being written concurrently.
+*/
+
+#endif /* !Z_SOLO */
+
+ /* checksum functions */
+
+/*
+ These functions are not related to compression but are exported
+ anyway because they might be useful in applications using the compression
+ library.
+*/
+
+ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len));
+/*
+ Update a running Adler-32 checksum with the bytes buf[0..len-1] and
+ return the updated checksum. If buf is Z_NULL, this function returns the
+ required initial value for the checksum.
+
+ An Adler-32 checksum is almost as reliable as a CRC32 but can be computed
+ much faster.
+
+ Usage example:
+
+ uLong adler = adler32(0L, Z_NULL, 0);
+
+ while (read_buffer(buffer, length) != EOF) {
+ adler = adler32(adler, buffer, length);
+ }
+ if (adler != original_adler) error();
+*/
+
+/*
+ZEXTERN uLong ZEXPORT adler32_combine OF((uLong adler1, uLong adler2,
+ z_off_t len2));
+
+ Combine two Adler-32 checksums into one. For two sequences of bytes, seq1
+ and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for
+ each, adler1 and adler2. adler32_combine() returns the Adler-32 checksum of
+ seq1 and seq2 concatenated, requiring only adler1, adler2, and len2. Note
+ that the z_off_t type (like off_t) is a signed integer. If len2 is
+ negative, the result has no meaning or utility.
+*/
+
+ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len));
+/*
+ Update a running CRC-32 with the bytes buf[0..len-1] and return the
+ updated CRC-32. If buf is Z_NULL, this function returns the required
+ initial value for the crc. Pre- and post-conditioning (one's complement) is
+ performed within this function so it shouldn't be done by the application.
+
+ Usage example:
+
+ uLong crc = crc32(0L, Z_NULL, 0);
+
+ while (read_buffer(buffer, length) != EOF) {
+ crc = crc32(crc, buffer, length);
+ }
+ if (crc != original_crc) error();
+*/
+
+/*
+ZEXTERN uLong ZEXPORT crc32_combine OF((uLong crc1, uLong crc2, z_off_t len2));
+
+ Combine two CRC-32 check values into one. For two sequences of bytes,
+ seq1 and seq2 with lengths len1 and len2, CRC-32 check values were
+ calculated for each, crc1 and crc2. crc32_combine() returns the CRC-32
+ check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and
+ len2.
+*/
+
+
+ /* various hacks, don't look :) */
+
+/* deflateInit and inflateInit are macros to allow checking the zlib version
+ * and the compiler's view of z_stream:
+ */
+ZEXTERN int ZEXPORT deflateInit_ OF((z_streamp strm, int level,
+ const char *version, int stream_size));
+ZEXTERN int ZEXPORT inflateInit_ OF((z_streamp strm,
+ const char *version, int stream_size));
+ZEXTERN int ZEXPORT deflateInit2_ OF((z_streamp strm, int level, int method,
+ int windowBits, int memLevel,
+ int strategy, const char *version,
+ int stream_size));
+ZEXTERN int ZEXPORT inflateInit2_ OF((z_streamp strm, int windowBits,
+ const char *version, int stream_size));
+ZEXTERN int ZEXPORT inflateBackInit_ OF((z_streamp strm, int windowBits,
+ unsigned char FAR *window,
+ const char *version,
+ int stream_size));
+#define deflateInit(strm, level) \
+ deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream))
+#define inflateInit(strm) \
+ inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream))
+#define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \
+ deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\
+ (strategy), ZLIB_VERSION, (int)sizeof(z_stream))
+#define inflateInit2(strm, windowBits) \
+ inflateInit2_((strm), (windowBits), ZLIB_VERSION, \
+ (int)sizeof(z_stream))
+#define inflateBackInit(strm, windowBits, window) \
+ inflateBackInit_((strm), (windowBits), (window), \
+ ZLIB_VERSION, (int)sizeof(z_stream))
+
+#ifndef Z_SOLO
+
+/* gzgetc() macro and its supporting function and exposed data structure. Note
+ * that the real internal state is much larger than the exposed structure.
+ * This abbreviated structure exposes just enough for the gzgetc() macro. The
+ * user should not mess with these exposed elements, since their names or
+ * behavior could change in the future, perhaps even capriciously. They can
+ * only be used by the gzgetc() macro. You have been warned.
+ */
+struct gzFile_s {
+ unsigned have;
+ unsigned char *next;
+ z_off64_t pos;
+};
+ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file)); /* backward compatibility */
+#ifdef Z_PREFIX_SET
+# undef z_gzgetc
+# define z_gzgetc(g) \
+ ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : gzgetc(g))
+#else
+# define gzgetc(g) \
+ ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : gzgetc(g))
+#endif
+
+/* provide 64-bit offset functions if _LARGEFILE64_SOURCE defined, and/or
+ * change the regular functions to 64 bits if _FILE_OFFSET_BITS is 64 (if
+ * both are true, the application gets the *64 functions, and the regular
+ * functions are changed to 64 bits) -- in case these are set on systems
+ * without large file support, _LFS64_LARGEFILE must also be true
+ */
+#ifdef Z_LARGE64
+ ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *));
+ ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int));
+ ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile));
+ ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile));
+ ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off64_t));
+ ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off64_t));
+#endif
+
+#if !defined(ZLIB_INTERNAL) && defined(Z_WANT64)
+# ifdef Z_PREFIX_SET
+# define z_gzopen z_gzopen64
+# define z_gzseek z_gzseek64
+# define z_gztell z_gztell64
+# define z_gzoffset z_gzoffset64
+# define z_adler32_combine z_adler32_combine64
+# define z_crc32_combine z_crc32_combine64
+# else
+# define gzopen gzopen64
+# define gzseek gzseek64
+# define gztell gztell64
+# define gzoffset gzoffset64
+# define adler32_combine adler32_combine64
+# define crc32_combine crc32_combine64
+# endif
+# ifndef Z_LARGE64
+ ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *));
+ ZEXTERN z_off_t ZEXPORT gzseek64 OF((gzFile, z_off_t, int));
+ ZEXTERN z_off_t ZEXPORT gztell64 OF((gzFile));
+ ZEXTERN z_off_t ZEXPORT gzoffset64 OF((gzFile));
+ ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t));
+ ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t));
+# endif
+#else
+ ZEXTERN gzFile ZEXPORT gzopen OF((const char *, const char *));
+ ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile, z_off_t, int));
+ ZEXTERN z_off_t ZEXPORT gztell OF((gzFile));
+ ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile));
+ ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t));
+ ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t));
+#endif
+
+#else /* Z_SOLO */
+
+ ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t));
+ ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t));
+
+#endif /* !Z_SOLO */
+
+/* hack for buggy compilers */
+#if !defined(ZUTIL_H) && !defined(NO_DUMMY_DECL)
+ struct internal_state {int dummy;};
+#endif
+
+/* undocumented functions */
+ZEXTERN const char * ZEXPORT zError OF((int));
+ZEXTERN int ZEXPORT inflateSyncPoint OF((z_streamp));
+ZEXTERN const z_crc_t FAR * ZEXPORT get_crc_table OF((void));
+ZEXTERN int ZEXPORT inflateUndermine OF((z_streamp, int));
+ZEXTERN int ZEXPORT inflateResetKeep OF((z_streamp));
+ZEXTERN int ZEXPORT deflateResetKeep OF((z_streamp));
+#if defined(_WIN32) && !defined(Z_SOLO)
+ZEXTERN gzFile ZEXPORT gzopen_w OF((const wchar_t *path,
+ const char *mode));
+#endif
+#if defined(STDC) || defined(Z_HAVE_STDARG_H)
+# ifndef Z_SOLO
+ZEXTERN int ZEXPORTVA gzvprintf Z_ARG((gzFile file,
+ const char *format,
+ va_list va));
+# endif
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ZLIB_H */
--- /dev/null
+/* Copyright (C) 2010-2015 The RetroArch team
+ *
+ * ---------------------------------------------------------------------------------------
+ * The following license statement only applies to this file (retro_inline.h).
+ * ---------------------------------------------------------------------------------------
+ *
+ * Permission is hereby granted, free of charge,
+ * to any person obtaining a copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __LIBRETRO_SDK_INLINE_H
+#define __LIBRETRO_SDK_INLINE_H
+
+#ifndef INLINE
+
+#if !defined(__cplusplus) && defined(_WIN32)
+#define INLINE _inline
+#elif defined(__STDC_VERSION__) && __STDC_VERSION__>=199901L
+#define INLINE inline
+#elif defined(__GNUC__)
+#define INLINE __inline__
+#else
+#define INLINE
+#endif
+
+#endif
+#endif
--- /dev/null
+#ifndef MMAN_H
+#define MMAN_H
+
+#include <stdlib.h>
+//#include <psp2/kernel/sysmem.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define PROT_READ 0b001
+#define PROT_WRITE 0b010
+#define PROT_EXEC 0b100
+#define MAP_PRIVATE 2
+#define MAP_ANONYMOUS 0x20
+
+#define MAP_FAILED ((void *)-1)
+
+#if 0 // not used
+static inline void* mmap(void *addr, size_t len, int prot, int flags, int fd, off_t offset)
+{
+ (void)prot;
+ (void)flags;
+ (void)fd;
+ (void)offset;
+
+ int block, ret;
+
+ block = sceKernelAllocMemBlockForVM("code", len);
+ if(block<=0){
+ sceClibPrintf("could not alloc mem block @0x%08X 0x%08X \n", block, len);
+ exit(1);
+ }
+
+ // get base address
+ ret = sceKernelGetMemBlockBase(block, &addr);
+ if (ret < 0)
+ {
+ sceClibPrintf("could get address @0x%08X 0x%08X \n", block, addr);
+ exit(1);
+ }
+
+
+ if(!addr)
+ return MAP_FAILED;
+
+ return addr;
+}
+
+static inline int mprotect(void *addr, size_t len, int prot)
+{
+ (void)addr;
+ (void)len;
+ (void)prot;
+ return 0;
+}
+
+static inline int munmap(void *addr, size_t len)
+{
+ int uid = sceKernelFindMemBlockByAddr(addr, len);
+
+ return sceKernelFreeMemBlock(uid);
+
+}
+#endif
+
+#ifdef __cplusplus
+};
+#endif
+
+#endif // MMAN_H
--- /dev/null
+//SPDX-License-Identifier: GPL-2.0-or-later
+/* From wut:
+ * https://github.com/devkitPro/wut/blob/0b196e8abcedeb0238105f3ffab7cb0093638b86/include/coreinit/memorymap.h
+ */
+
+#pragma once
+#include <stdint.h>
+#include <stdbool.h>
+typedef bool BOOL;
+
+/**
+ * \defgroup coreinit_memorymap Memory Map
+ * \ingroup coreinit
+ *
+ * @{
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum OSMemoryMapMode
+{
+ OS_MAP_MEMORY_INVALID = 0,
+ OS_MAP_MEMORY_READ_ONLY = 1,
+ OS_MAP_MEMORY_READ_WRITE = 2,
+ OS_MAP_MEMORY_FREE = 3,
+ OS_MAP_MEMORY_ALLOCATED = 4,
+} OSMemoryMapMode;
+
+#define OS_PAGE_SIZE (128 * 1024)
+
+uint32_t
+OSEffectiveToPhysical(uint32_t virtualAddress);
+
+BOOL
+OSIsAddressValid(uint32_t virtualAddress);
+
+BOOL
+__OSValidateAddressSpaceRange(int /* unused */,
+ uint32_t virtualAddress,
+ uint32_t size);
+
+/**
+ * Allocates virtual address range for later mapping.
+ *
+ * \param virtualAddress
+ * Requested start address for the range. If there is no preference, NULL can be
+ * used.
+ *
+ * \param size
+ * Size of address range to allocate.
+ *
+ * \param align
+ * Alignment of address range to allocate.
+ *
+ * \return
+ * The starting address of the newly allocated range, or NULL on failure.
+ *
+ * \sa
+ * - OSFreeVirtAddr()
+ * - OSMapMemory()
+ */
+uint32_t
+OSAllocVirtAddr(uint32_t virtualAddress,
+ uint32_t size,
+ uint32_t align);
+
+/**
+ * Frees a previously allocated virtual address range back to the system.
+ *
+ * \param virtualAddress
+ * The start of the virtual address range to free.
+ *
+ * \param size
+ * The size of the virtual address range to free.
+ *
+ * \return
+ * \c true on success.
+ */
+BOOL
+OSFreeVirtAddr(uint32_t virtualAddress,
+ uint32_t size);
+
+/**
+ * Determines the status of the given virtual memory address - mapped read-write
+ * or read-only, free, allocated or invalid.
+ *
+ * \param virtualAddress
+ * The virtual address to query.
+ *
+ * \return
+ * The status of the memory address - see #OSMemoryMapMode.
+ */
+OSMemoryMapMode
+OSQueryVirtAddr(uint32_t virtualAddress);
+
+/**
+ * Maps a physical address to a virtual address, with a given size and set of
+ * permissions.
+ *
+ * \param virtualAddress
+ * The target virtual address for the mapping.
+ *
+ * \param physicalAddress
+ * Physical address of the memory to back the mapping.
+ *
+ * \param size
+ * Size, in bytes, of the desired mapping. Likely has an alignment requirement.
+ *
+ * \param mode
+ * Permissions to map the memory with - see #OSMemoryMapMode.
+ *
+ * \return
+ * \c true on success.
+ *
+ * \sa
+ * - OSAllocVirtAddr()
+ * - OSUnmapMemory()
+ */
+BOOL
+OSMapMemory(uint32_t virtualAddress,
+ uint32_t physicalAddress,
+ uint32_t size,
+ OSMemoryMapMode mode);
+
+/**
+ * Unmaps previously mapped memory.
+ *
+ * \param virtualAddress
+ * Starting address of the area to unmap.
+ *
+ * \param size
+ * Size of the memory area to unmap.
+ *
+ * \return
+ * \c true on success.
+ */
+BOOL
+OSUnmapMemory(uint32_t virtualAddress,
+ uint32_t size);
+
+/**
+ * Gets the range of virtual addresses available for mapping.
+ *
+ * \param outVirtualAddress
+ * Pointer to write the starting address of the memory area to.
+ *
+ * \param outSize
+ * Pointer to write the size of the memory area to.
+ *
+ * \sa
+ * - OSMapMemory()
+ */
+void
+OSGetMapVirtAddrRange(uint32_t *outVirtualAddress,
+ uint32_t *outSize);
+
+/**
+ * Gets the range of available physical memory (not reserved for app code or
+ * data).
+ *
+ * \param outPhysicalAddress
+ * Pointer to write the starting physical address of the memory area to.
+ *
+ * \param outSize
+ * Pointer to write the size of the memory area to.
+ *
+ * \if false
+ * Is memory returned by this function actually safe to map and use? couldn't
+ * get a straight answer from decaf-emu's kernel_memory.cpp...
+ * \endif
+ */
+void
+OSGetAvailPhysAddrRange(uint32_t *outPhysicalAddress,
+ uint32_t *outSize);
+
+/**
+ * Gets the range of physical memory used for the application's data.
+ *
+ * \param outPhysicalAddress
+ * Pointer to write the starting physical address of the memory area to.
+ *
+ * \param outSize
+ * Pointer to write the size of the memory area to.
+ *
+ * \if false
+ * does this include the main heap?
+ * \endif
+ */
+void
+OSGetDataPhysAddrRange(uint32_t *outPhysicalAddress,
+ uint32_t *outSize);
+
+#ifdef __cplusplus
+}
+#endif
+
+/** @} */
#ifdef __GNUC__
# define likely(x) __builtin_expect((x),1)
# define unlikely(x) __builtin_expect((x),0)
+# define preload __builtin_prefetch
# ifdef __clang__
# define noinline __attribute__((noinline))
# else
# define noinline __attribute__((noinline,noclone))
# endif
-# define unused __attribute__((unused))
+# define attr_unused __attribute__((unused))
#else
# define likely(x) (x)
# define unlikely(x) (x)
+# define preload (x)
# define noinline
-# define unused
+# define attr_unused
+#endif
+
+// doesn't work on Android, mingw...
+#if defined(__GNUC__) && !defined(ANDROID) && !defined(__MINGW32__)
+# define attr_weak __attribute__((weak))
+#else
+# define attr_weak
#endif
#ifndef __has_builtin
--- /dev/null
+#ifndef __CHDCONFIG_H__
+#define __CHDCONFIG_H__
+
+/* this overrides deps/libchdr/include/libchdr/chdconfig.h */
+#define WANT_SUBCODE 1
+#define NEED_CACHE_HUNK 1
+
+#if defined(__x86_64__) || defined(__aarch64__)
+#define WANT_RAW_DATA_SECTOR 1
+#define VERIFY_BLOCK_CRC 1
+#else
+// assume some slower hw so no ecc that most (all?) games don't need
+#endif
+
+#endif
--- /dev/null
+/*
+ * Copyright (C) 2012-2023 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU lightning.
+ *
+ * GNU lightning is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU lightning is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+ * License for more details.
+ *
+ * Authors:
+ * Paulo Cesar Pereira de Andrade
+ */
+
+#ifndef _lightning_h
+#define _lightning_h
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <pthread.h>
+
+#if defined(__hpux) && defined(__hppa__)
+# include <machine/param.h>
+#endif
+#if defined(__alpha__) && defined(__osf__)
+# include <machine/endian.h>
+#endif
+
+#ifndef __WORDSIZE
+# if defined(WORDSIZE) /* ppc darwin */
+# define __WORDSIZE WORDSIZE
+# elif defined(__SIZEOF_POINTER__) /* ppc aix */
+# define __WORDSIZE (__SIZEOF_POINTER__ << 3)
+# elif defined(_ILP32) /* hppa hp-ux */
+# define __WORDSIZE 32
+# elif defined(_LP64) /* ia64 hp-ux (with cc +DD64) */
+# define __WORDSIZE 64
+# elif defined(_MIPS_SZPTR) /* mips irix */
+# if _MIPS_SZPTR == 32
+# define __WORDSIZE 32
+# else
+# define __WORDSIZE 64
+# endif
+# else /* From FreeBSD 9.1 stdint.h */
+# if defined(UINTPTR_MAX) && defined(UINT64_MAX) && \
+ (UINTPTR_MAX == UINT64_MAX)
+# define __WORDSIZE 64
+# else
+# define __WORDSIZE 32
+# endif
+# endif
+#endif
+#ifndef __LITTLE_ENDIAN
+# if defined(LITTLE_ENDIAN) /* ppc darwin */
+# define __LITTLE_ENDIAN LITTLE_ENDIAN
+# elif defined(__ORDER_LITTLE_ENDIAN__) /* ppc aix */
+# define __LITTLE_ENDIAN __ORDER_LITTLE_ENDIAN__
+# else
+# define __LITTLE_ENDIAN 1234
+# endif
+#endif
+#ifndef __BIG_ENDIAN
+# if defined(BIG_ENDIAN) /* ppc darwin */
+# define __BIG_ENDIAN BIG_ENDIAN
+# elif defined(__ORDER_BIG_ENDIAN__) /* ppc aix */
+# define __BIG_ENDIAN __ORDER_BIG_ENDIAN__
+# else
+# define __BIG_ENDIAN 4321
+# endif
+#endif
+#ifndef __BYTE_ORDER
+# if defined(BYTE_ORDER) /* ppc darwin */
+# define __BYTE_ORDER BYTE_ORDER
+# elif defined(__BYTE_ORDER__) /* ppc aix */
+# define __BYTE_ORDER __BYTE_ORDER__
+# elif defined(_BIG_ENDIAN) /* hppa hp-ux */
+# define __BYTE_ORDER __BIG_ENDIAN
+# elif defined(__BIG_ENDIAN__) /* ia64 hp-ux */
+# define __BYTE_ORDER __BIG_ENDIAN
+# elif defined(__i386__) /* 32 bit x86 solaris */
+# define __BYTE_ORDER __LITTLE_ENDIAN
+# elif defined(__x86_64__) /* 64 bit x86 solaris */
+# define __BYTE_ORDER __LITTLE_ENDIAN
+# elif defined(__MIPSEB) /* mips irix */
+# define __BYTE_ORDER __BIG_ENDIAN
+# else
+# error cannot figure __BYTE_ORDER
+# endif
+#endif
+
+typedef signed char jit_int8_t;
+typedef unsigned char jit_uint8_t;
+typedef signed short jit_int16_t;
+typedef unsigned short jit_uint16_t;
+typedef signed int jit_int32_t;
+typedef unsigned int jit_uint32_t;
+#if __WORDSIZE == 32
+typedef signed long long jit_int64_t;
+typedef unsigned long long jit_uint64_t;
+typedef jit_int32_t jit_word_t;
+typedef jit_uint32_t jit_uword_t;
+#elif (_WIN32 && !__CYGWIN__)
+typedef signed long long jit_int64_t;
+typedef unsigned long long jit_uint64_t;
+typedef jit_int64_t jit_word_t;
+typedef jit_uint64_t jit_uword_t;
+#else
+typedef signed long jit_int64_t;
+typedef unsigned long jit_uint64_t;
+typedef jit_int64_t jit_word_t;
+typedef jit_uint64_t jit_uword_t;
+#endif
+typedef float jit_float32_t;
+typedef double jit_float64_t;
+typedef void* jit_pointer_t;
+typedef jit_int32_t jit_bool_t;
+typedef jit_int32_t jit_gpr_t;
+typedef jit_int32_t jit_fpr_t;
+
+#if !defined(__powerpc__) && \
+ (defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__))
+#define __powerpc__ 1
+#endif
+
+#if defined(__i386__) || defined(__x86_64__)
+# include <lightning/jit_x86.h>
+#elif defined(__mips__)
+# include <lightning/jit_mips.h>
+#elif defined(__arm__)
+# include <lightning/jit_arm.h>
+#elif defined(__powerpc__)
+# include <lightning/jit_ppc.h>
+#elif defined(__sparc__)
+# include <lightning/jit_sparc.h>
+#elif defined(__ia64__)
+# include <lightning/jit_ia64.h>
+#elif defined(__hppa__)
+# include <lightning/jit_hppa.h>
+#elif defined(__aarch64__)
+# include <lightning/jit_aarch64.h>
+#elif defined(__s390__) || defined(__s390x__)
+# include <lightning/jit_s390.h>
+#elif defined(__alpha__)
+# include <lightning/jit_alpha.h>
+#elif defined(__riscv)
+# include <lightning/jit_riscv.h>
+#elif defined(__loongarch__)
+# include <lightning/jit_loongarch.h>
+#endif
+
+#define jit_flag_node 0x0001 /* patch node not absolute */
+#define jit_flag_patch 0x0002 /* jump already patched */
+#define jit_flag_data 0x0004 /* data in the constant pool */
+#define jit_flag_use 0x0008 /* do not remove marker label */
+#define jit_flag_synth 0x0010 /* synthesized instruction */
+#define jit_flag_head 0x1000 /* label reached by normal flow */
+#define jit_flag_varargs 0x2000 /* call{r,i} to varargs function */
+
+#define JIT_R(index) jit_r(index)
+#define JIT_V(index) jit_v(index)
+#define JIT_F(index) jit_f(index)
+#define JIT_R_NUM jit_r_num()
+#define JIT_V_NUM jit_v_num()
+#define JIT_F_NUM jit_f_num()
+
+#define JIT_DISABLE_DATA 1 /* force synthesize of constants */
+#define JIT_DISABLE_NOTE 2 /* disable debug info generation */
+
+#define jit_class_chk 0x02000000 /* just checking */
+#define jit_class_arg 0x08000000 /* argument register */
+#define jit_class_sav 0x10000000 /* callee save */
+#define jit_class_gpr 0x20000000 /* general purpose */
+#define jit_class_fpr 0x40000000 /* float */
+#define jit_class(reg) ((reg) & 0xffff0000)
+#define jit_regno(reg) ((reg) & 0x00007fff)
+
+typedef struct jit_node jit_node_t;
+typedef struct jit_state jit_state_t;
+
+typedef enum {
+ jit_code_data,
+#define jit_live(u) jit_new_node_w(jit_code_live, u)
+#define jit_align(u) jit_new_node_w(jit_code_align, u)
+ jit_code_live, jit_code_align,
+ jit_code_save, jit_code_load,
+#define jit_skip(u) jit_new_node_w(jit_code_skip, u)
+ jit_code_skip,
+#define jit_name(u) _jit_name(_jit,u)
+ jit_code_name,
+#define jit_note(u, v) _jit_note(_jit, u, v)
+#define jit_label() _jit_label(_jit)
+#define jit_forward() _jit_forward(_jit)
+#define jit_indirect() _jit_indirect(_jit)
+#define jit_link(u) _jit_link(_jit,u)
+ jit_code_note, jit_code_label,
+
+#define jit_prolog() _jit_prolog(_jit)
+ jit_code_prolog,
+
+#define jit_ellipsis() _jit_ellipsis(_jit)
+ jit_code_ellipsis,
+#define jit_va_push(u) _jit_va_push(_jit,u)
+ jit_code_va_push,
+#define jit_allocai(u) _jit_allocai(_jit,u)
+#define jit_allocar(u, v) _jit_allocar(_jit,u,v)
+ jit_code_allocai, jit_code_allocar,
+
+#define jit_arg_c() _jit_arg(_jit, jit_code_arg_c)
+#define jit_arg_s() _jit_arg(_jit, jit_code_arg_s)
+#define jit_arg_i() _jit_arg(_jit, jit_code_arg_i)
+# if __WORDSIZE == 32
+# define jit_arg() jit_arg_i()
+#else
+# define jit_arg_l() _jit_arg(_jit, jit_code_arg_l)
+# define jit_arg() jit_arg_l()
+#endif
+ jit_code_arg_c, jit_code_arg_s,
+ jit_code_arg_i, jit_code_arg_l,
+#if __WORDSIZE == 32
+# define jit_code_arg jit_code_arg_i
+#else
+# define jit_code_arg jit_code_arg_l
+#endif
+
+#define jit_getarg_c(u,v) _jit_getarg_c(_jit,u,v)
+#define jit_getarg_uc(u,v) _jit_getarg_uc(_jit,u,v)
+#define jit_getarg_s(u,v) _jit_getarg_s(_jit,u,v)
+#define jit_getarg_us(u,v) _jit_getarg_us(_jit,u,v)
+#define jit_getarg_i(u,v) _jit_getarg_i(_jit,u,v)
+#if __WORDSIZE == 32
+# define jit_getarg(u,v) jit_getarg_i(u,v)
+#else
+# define jit_getarg_ui(u,v) _jit_getarg_ui(_jit,u,v)
+# define jit_getarg_l(u,v) _jit_getarg_l(_jit,u,v)
+# define jit_getarg(u,v) jit_getarg_l(u,v)
+#endif
+ jit_code_getarg_c, jit_code_getarg_uc,
+ jit_code_getarg_s, jit_code_getarg_us,
+ jit_code_getarg_i, jit_code_getarg_ui,
+ jit_code_getarg_l,
+#if __WORDSIZE == 32
+# define jit_code_getarg jit_code_getarg_i
+#else
+# define jit_code_getarg jit_code_getarg_l
+#endif
+
+#define jit_putargr_c(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_c)
+#define jit_putargi_c(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_c)
+#define jit_putargr_uc(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_uc)
+#define jit_putargi_uc(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_uc)
+#define jit_putargr_s(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_s)
+#define jit_putargi_s(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_s)
+#define jit_putargr_us(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_us)
+#define jit_putargi_us(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_us)
+#define jit_putargr_i(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_i)
+#define jit_putargi_i(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_i)
+#if __WORDSIZE == 32
+# define jit_putargr(u,v) jit_putargr_i(u,v)
+# define jit_putargi(u,v) jit_putargi_i(u,v)
+#else
+# define jit_putargr_ui(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_ui)
+# define jit_putargi_ui(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_ui)
+# define jit_putargr_l(u,v) _jit_putargr(_jit,u,v,jit_code_putargr_l)
+# define jit_putargi_l(u,v) _jit_putargi(_jit,u,v,jit_code_putargi_l)
+# define jit_putargr(u,v) jit_putargr_l(u,v)
+# define jit_putargi(u,v) jit_putargi_l(u,v)
+#endif
+ jit_code_putargr_c, jit_code_putargi_c,
+ jit_code_putargr_uc, jit_code_putargi_uc,
+ jit_code_putargr_s, jit_code_putargi_s,
+ jit_code_putargr_us, jit_code_putargi_us,
+ jit_code_putargr_i, jit_code_putargi_i,
+ jit_code_putargr_ui, jit_code_putargi_ui,
+ jit_code_putargr_l, jit_code_putargi_l,
+#if __WORDSIZE == 32
+# define jit_code_putargr jit_code_putargr_i
+# define jit_code_putargi jit_code_putargi_i
+#else
+# define jit_code_putargr jit_code_putargr_l
+# define jit_code_putargi jit_code_putargi_l
+#endif
+
+#define jit_va_start(u) jit_new_node_w(jit_code_va_start, u)
+ jit_code_va_start,
+#define jit_va_arg(u, v) jit_new_node_ww(jit_code_va_arg, u, v)
+#define jit_va_arg_d(u, v) jit_new_node_ww(jit_code_va_arg_d, u, v)
+ jit_code_va_arg, jit_code_va_arg_d,
+#define jit_va_end(u) jit_new_node_w(jit_code_va_end, u)
+ jit_code_va_end,
+
+#define jit_addr(u,v,w) jit_new_node_www(jit_code_addr,u,v,w)
+#define jit_addi(u,v,w) jit_new_node_www(jit_code_addi,u,v,w)
+ jit_code_addr, jit_code_addi,
+#define jit_addcr(u,v,w) jit_new_node_www(jit_code_addcr,u,v,w)
+#define jit_addci(u,v,w) jit_new_node_www(jit_code_addci,u,v,w)
+ jit_code_addcr, jit_code_addci,
+#define jit_addxr(u,v,w) jit_new_node_www(jit_code_addxr,u,v,w)
+#define jit_addxi(u,v,w) jit_new_node_www(jit_code_addxi,u,v,w)
+ jit_code_addxr, jit_code_addxi,
+#define jit_subr(u,v,w) jit_new_node_www(jit_code_subr,u,v,w)
+#define jit_subi(u,v,w) jit_new_node_www(jit_code_subi,u,v,w)
+ jit_code_subr, jit_code_subi,
+#define jit_subcr(u,v,w) jit_new_node_www(jit_code_subcr,u,v,w)
+#define jit_subci(u,v,w) jit_new_node_www(jit_code_subci,u,v,w)
+ jit_code_subcr, jit_code_subci,
+#define jit_subxr(u,v,w) jit_new_node_www(jit_code_subxr,u,v,w)
+#define jit_subxi(u,v,w) jit_new_node_www(jit_code_subxi,u,v,w)
+ jit_code_subxr, jit_code_subxi,
+#define jit_rsbr(u,v,w) jit_subr(u,w,v)
+#define jit_rsbi(u,v,w) jit_new_node_www(jit_code_rsbi,u,v,w)
+ jit_code_rsbi,
+#define jit_mulr(u,v,w) jit_new_node_www(jit_code_mulr,u,v,w)
+#define jit_muli(u,v,w) jit_new_node_www(jit_code_muli,u,v,w)
+ jit_code_mulr, jit_code_muli,
+#define jit_qmulr(l,h,v,w) jit_new_node_qww(jit_code_qmulr,l,h,v,w)
+#define jit_qmuli(l,h,v,w) jit_new_node_qww(jit_code_qmuli,l,h,v,w)
+ jit_code_qmulr, jit_code_qmuli,
+#define jit_qmulr_u(l,h,v,w) jit_new_node_qww(jit_code_qmulr_u,l,h,v,w)
+#define jit_qmuli_u(l,h,v,w) jit_new_node_qww(jit_code_qmuli_u,l,h,v,w)
+ jit_code_qmulr_u, jit_code_qmuli_u,
+#define jit_divr(u,v,w) jit_new_node_www(jit_code_divr,u,v,w)
+#define jit_divi(u,v,w) jit_new_node_www(jit_code_divi,u,v,w)
+ jit_code_divr, jit_code_divi,
+#define jit_divr_u(u,v,w) jit_new_node_www(jit_code_divr_u,u,v,w)
+#define jit_divi_u(u,v,w) jit_new_node_www(jit_code_divi_u,u,v,w)
+ jit_code_divr_u, jit_code_divi_u,
+#define jit_qdivr(l,h,v,w) jit_new_node_qww(jit_code_qdivr,l,h,v,w)
+#define jit_qdivi(l,h,v,w) jit_new_node_qww(jit_code_qdivi,l,h,v,w)
+ jit_code_qdivr, jit_code_qdivi,
+#define jit_qdivr_u(l,h,v,w) jit_new_node_qww(jit_code_qdivr_u,l,h,v,w)
+#define jit_qdivi_u(l,h,v,w) jit_new_node_qww(jit_code_qdivi_u,l,h,v,w)
+ jit_code_qdivr_u, jit_code_qdivi_u,
+#define jit_remr(u,v,w) jit_new_node_www(jit_code_remr,u,v,w)
+#define jit_remi(u,v,w) jit_new_node_www(jit_code_remi,u,v,w)
+ jit_code_remr, jit_code_remi,
+#define jit_remr_u(u,v,w) jit_new_node_www(jit_code_remr_u,u,v,w)
+#define jit_remi_u(u,v,w) jit_new_node_www(jit_code_remi_u,u,v,w)
+ jit_code_remr_u, jit_code_remi_u,
+
+#define jit_andr(u,v,w) jit_new_node_www(jit_code_andr,u,v,w)
+#define jit_andi(u,v,w) jit_new_node_www(jit_code_andi,u,v,w)
+ jit_code_andr, jit_code_andi,
+#define jit_orr(u,v,w) jit_new_node_www(jit_code_orr,u,v,w)
+#define jit_ori(u,v,w) jit_new_node_www(jit_code_ori,u,v,w)
+ jit_code_orr, jit_code_ori,
+#define jit_xorr(u,v,w) jit_new_node_www(jit_code_xorr,u,v,w)
+#define jit_xori(u,v,w) jit_new_node_www(jit_code_xori,u,v,w)
+ jit_code_xorr, jit_code_xori,
+
+#define jit_lshr(u,v,w) jit_new_node_www(jit_code_lshr,u,v,w)
+#define jit_lshi(u,v,w) jit_new_node_www(jit_code_lshi,u,v,w)
+ jit_code_lshr, jit_code_lshi,
+#define jit_rshr(u,v,w) jit_new_node_www(jit_code_rshr,u,v,w)
+#define jit_rshi(u,v,w) jit_new_node_www(jit_code_rshi,u,v,w)
+ jit_code_rshr, jit_code_rshi,
+#define jit_rshr_u(u,v,w) jit_new_node_www(jit_code_rshr_u,u,v,w)
+#define jit_rshi_u(u,v,w) jit_new_node_www(jit_code_rshi_u,u,v,w)
+ jit_code_rshr_u, jit_code_rshi_u,
+
+#define jit_negr(u,v) jit_new_node_ww(jit_code_negr,u,v)
+#define jit_negi(u,v) jit_new_node_ww(jit_code_negi,u,v)
+ jit_code_negr, jit_code_negi,
+#define jit_comr(u,v) jit_new_node_ww(jit_code_comr,u,v)
+#define jit_comi(u,v) jit_new_node_ww(jit_code_comi,u,v)
+ jit_code_comr, jit_code_comi,
+
+#define jit_ltr(u,v,w) jit_new_node_www(jit_code_ltr,u,v,w)
+#define jit_lti(u,v,w) jit_new_node_www(jit_code_lti,u,v,w)
+ jit_code_ltr, jit_code_lti,
+#define jit_ltr_u(u,v,w) jit_new_node_www(jit_code_ltr_u,u,v,w)
+#define jit_lti_u(u,v,w) jit_new_node_www(jit_code_lti_u,u,v,w)
+ jit_code_ltr_u, jit_code_lti_u,
+#define jit_ler(u,v,w) jit_new_node_www(jit_code_ler,u,v,w)
+#define jit_lei(u,v,w) jit_new_node_www(jit_code_lei,u,v,w)
+ jit_code_ler, jit_code_lei,
+#define jit_ler_u(u,v,w) jit_new_node_www(jit_code_ler_u,u,v,w)
+#define jit_lei_u(u,v,w) jit_new_node_www(jit_code_lei_u,u,v,w)
+ jit_code_ler_u, jit_code_lei_u,
+#define jit_eqr(u,v,w) jit_new_node_www(jit_code_eqr,u,v,w)
+#define jit_eqi(u,v,w) jit_new_node_www(jit_code_eqi,u,v,w)
+ jit_code_eqr, jit_code_eqi,
+#define jit_ger(u,v,w) jit_new_node_www(jit_code_ger,u,v,w)
+#define jit_gei(u,v,w) jit_new_node_www(jit_code_gei,u,v,w)
+ jit_code_ger, jit_code_gei,
+#define jit_ger_u(u,v,w) jit_new_node_www(jit_code_ger_u,u,v,w)
+#define jit_gei_u(u,v,w) jit_new_node_www(jit_code_gei_u,u,v,w)
+ jit_code_ger_u, jit_code_gei_u,
+#define jit_gtr(u,v,w) jit_new_node_www(jit_code_gtr,u,v,w)
+#define jit_gti(u,v,w) jit_new_node_www(jit_code_gti,u,v,w)
+ jit_code_gtr, jit_code_gti,
+#define jit_gtr_u(u,v,w) jit_new_node_www(jit_code_gtr_u,u,v,w)
+#define jit_gti_u(u,v,w) jit_new_node_www(jit_code_gti_u,u,v,w)
+ jit_code_gtr_u, jit_code_gti_u,
+#define jit_ner(u,v,w) jit_new_node_www(jit_code_ner,u,v,w)
+#define jit_nei(u,v,w) jit_new_node_www(jit_code_nei,u,v,w)
+ jit_code_ner, jit_code_nei,
+
+#define jit_movr(u,v) jit_new_node_ww(jit_code_movr,u,v)
+#define jit_movi(u,v) jit_new_node_ww(jit_code_movi,u,v)
+ jit_code_movr, jit_code_movi,
+
+#define jit_movnr(u,v,w) jit_new_node_www(jit_code_movnr,u,v,w)
+#define jit_movzr(u,v,w) jit_new_node_www(jit_code_movzr,u,v,w)
+ jit_code_movnr, jit_code_movzr,
+
+ jit_code_casr, jit_code_casi,
+#define jit_casr(u, v, w, x) jit_new_node_wwq(jit_code_casr, u, v, w, x)
+#define jit_casi(u, v, w, x) jit_new_node_wwq(jit_code_casi, u, v, w, x)
+
+#define jit_extr_c(u,v) jit_new_node_ww(jit_code_extr_c,u,v)
+#define jit_exti_c(u,v) jit_new_node_ww(jit_code_exti_c,u,v)
+ jit_code_extr_c, jit_code_exti_c,
+
+#define jit_extr_uc(u,v) jit_new_node_ww(jit_code_extr_uc,u,v)
+#define jit_exti_uc(u,v) jit_new_node_ww(jit_code_exti_uc,u,v)
+ jit_code_extr_uc, jit_code_exti_uc,
+
+#define jit_extr_s(u,v) jit_new_node_ww(jit_code_extr_s,u,v)
+#define jit_exti_s(u,v) jit_new_node_ww(jit_code_exti_s,u,v)
+ jit_code_extr_s, jit_code_exti_s,
+
+#define jit_extr_us(u,v) jit_new_node_ww(jit_code_extr_us,u,v)
+#define jit_exti_us(u,v) jit_new_node_ww(jit_code_exti_us,u,v)
+ jit_code_extr_us, jit_code_exti_us,
+
+#if __WORDSIZE == 64
+# define jit_extr_i(u,v) jit_new_node_ww(jit_code_extr_i,u,v)
+# define jit_exti_i(u,v) jit_new_node_ww(jit_code_exti_i,u,v)
+# define jit_extr_ui(u,v) jit_new_node_ww(jit_code_extr_ui,u,v)
+# define jit_exti_ui(u,v) jit_new_node_ww(jit_code_exti_ui,u,v)
+#endif
+ jit_code_extr_i, jit_code_exti_i,
+ jit_code_extr_ui, jit_code_exti_ui,
+
+#define jit_bswapr_us(u,v) jit_new_node_ww(jit_code_bswapr_us,u,v)
+#define jit_bswapi_us(u,v) jit_new_node_ww(jit_code_bswapi_us,u,v)
+ jit_code_bswapr_us, jit_code_bswapi_us,
+
+#define jit_bswapr_ui(u,v) jit_new_node_ww(jit_code_bswapr_ui,u,v)
+#define jit_bswapi_ui(u,v) jit_new_node_ww(jit_code_bswapi_ui,u,v)
+ jit_code_bswapr_ui, jit_code_bswapi_ui,
+
+#if __WORDSIZE == 64
+# define jit_bswapr_ul(u,v) jit_new_node_ww(jit_code_bswapr_ul,u,v)
+# define jit_bswapi_ul(u,v) jit_new_node_ww(jit_code_bswapi_ul,u,v)
+#endif
+ jit_code_bswapr_ul, jit_code_bswapi_ul,
+
+#if __WORDSIZE == 32
+# define jit_bswapr(u,v) jit_bswapr_ui(u,v)
+# define jit_bswapi(u,v) jit_bswapi_ui(u,v)
+#else
+# define jit_bswapr(u,v) jit_bswapr_ul(u,v)
+# define jit_bswapi(u,v) jit_bswapi_ul(u,v)
+#endif
+
+#define jit_htonr_us(u,v) jit_new_node_ww(jit_code_htonr_us,u,v)
+#define jit_ntohr_us(u,v) jit_htonr_us(u,v)
+#define jit_htoni_us(u,v) jit_new_node_ww(jit_code_htoni_us,u,v)
+#define jit_ntohi_us(u,v) jit_htoni_us(u, v)
+ jit_code_htonr_us, jit_code_htoni_us,
+
+#define jit_htonr_ui(u,v) jit_new_node_ww(jit_code_htonr_ui,u,v)
+#define jit_ntohr_ui(u,v) jit_htonr_ui(u,v)
+#define jit_htoni_ui(u,v) jit_new_node_ww(jit_code_htoni_ui,u,v)
+#define jit_ntohi_ui(u,v) jit_htoni_ui(u, v)
+ jit_code_htonr_ui, jit_code_htoni_ui,
+
+#if __WORDSIZE == 64
+# define jit_htonr_ul(u,v) jit_new_node_ww(jit_code_htonr_ul,u,v)
+# define jit_ntohr_ul(u,v) jit_htonr_ul(u,v)
+# define jit_htoni_ul(u,v) jit_new_node_ww(jit_code_htoni_ul,u,v)
+# define jit_ntohi_ul(u,v) jit_htoni_ul(u, v)
+#endif
+ jit_code_htonr_ul, jit_code_htoni_ul,
+
+#if __WORDSIZE == 32
+# define jit_htonr(u,v) jit_htonr_ui(u,v)
+# define jit_htoni(u,v) jit_htoni_ui(u,v)
+#else
+# define jit_htonr(u,v) jit_htonr_ul(u,v)
+# define jit_htoni(u,v) jit_htoni_ul(u,v)
+#endif
+#define jit_ntohr(u,v) jit_htonr(u,v)
+#define jit_ntohi(u,v) jit_htoni(u,v)
+
+#define jit_ldr_c(u,v) jit_new_node_ww(jit_code_ldr_c,u,v)
+#define jit_ldi_c(u,v) jit_new_node_wp(jit_code_ldi_c,u,v)
+ jit_code_ldr_c, jit_code_ldi_c,
+#define jit_ldr_uc(u,v) jit_new_node_ww(jit_code_ldr_uc,u,v)
+#define jit_ldi_uc(u,v) jit_new_node_wp(jit_code_ldi_uc,u,v)
+ jit_code_ldr_uc, jit_code_ldi_uc,
+#define jit_ldr_s(u,v) jit_new_node_ww(jit_code_ldr_s,u,v)
+#define jit_ldi_s(u,v) jit_new_node_wp(jit_code_ldi_s,u,v)
+ jit_code_ldr_s, jit_code_ldi_s,
+#define jit_ldr_us(u,v) jit_new_node_ww(jit_code_ldr_us,u,v)
+#define jit_ldi_us(u,v) jit_new_node_wp(jit_code_ldi_us,u,v)
+ jit_code_ldr_us, jit_code_ldi_us,
+#define jit_ldr_i(u,v) jit_new_node_ww(jit_code_ldr_i,u,v)
+#define jit_ldi_i(u,v) jit_new_node_wp(jit_code_ldi_i,u,v)
+ jit_code_ldr_i, jit_code_ldi_i,
+#if __WORDSIZE == 32
+# define jit_ldr(u,v) jit_ldr_i(u,v)
+# define jit_ldi(u,v) jit_ldi_i(u,v)
+#else
+# define jit_ldr(u,v) jit_ldr_l(u,v)
+# define jit_ldi(u,v) jit_ldi_l(u,v)
+# define jit_ldr_ui(u,v) jit_new_node_ww(jit_code_ldr_ui,u,v)
+# define jit_ldi_ui(u,v) jit_new_node_wp(jit_code_ldi_ui,u,v)
+#define jit_ldr_l(u,v) jit_new_node_ww(jit_code_ldr_l,u,v)
+#define jit_ldi_l(u,v) jit_new_node_wp(jit_code_ldi_l,u,v)
+#endif
+ jit_code_ldr_ui, jit_code_ldi_ui,
+ jit_code_ldr_l, jit_code_ldi_l,
+
+#define jit_ldxr_c(u,v,w) jit_new_node_www(jit_code_ldxr_c,u,v,w)
+#define jit_ldxi_c(u,v,w) jit_new_node_www(jit_code_ldxi_c,u,v,w)
+ jit_code_ldxr_c, jit_code_ldxi_c,
+#define jit_ldxr_uc(u,v,w) jit_new_node_www(jit_code_ldxr_uc,u,v,w)
+#define jit_ldxi_uc(u,v,w) jit_new_node_www(jit_code_ldxi_uc,u,v,w)
+ jit_code_ldxr_uc, jit_code_ldxi_uc,
+#define jit_ldxr_s(u,v,w) jit_new_node_www(jit_code_ldxr_s,u,v,w)
+#define jit_ldxi_s(u,v,w) jit_new_node_www(jit_code_ldxi_s,u,v,w)
+ jit_code_ldxr_s, jit_code_ldxi_s,
+#define jit_ldxr_us(u,v,w) jit_new_node_www(jit_code_ldxr_us,u,v,w)
+#define jit_ldxi_us(u,v,w) jit_new_node_www(jit_code_ldxi_us,u,v,w)
+ jit_code_ldxr_us, jit_code_ldxi_us,
+#define jit_ldxr_i(u,v,w) jit_new_node_www(jit_code_ldxr_i,u,v,w)
+#define jit_ldxi_i(u,v,w) jit_new_node_www(jit_code_ldxi_i,u,v,w)
+ jit_code_ldxr_i, jit_code_ldxi_i,
+#if __WORDSIZE == 32
+# define jit_ldxr(u,v,w) jit_ldxr_i(u,v,w)
+# define jit_ldxi(u,v,w) jit_ldxi_i(u,v,w)
+#else
+# define jit_ldxr_ui(u,v,w) jit_new_node_www(jit_code_ldxr_ui,u,v,w)
+# define jit_ldxi_ui(u,v,w) jit_new_node_www(jit_code_ldxi_ui,u,v,w)
+# define jit_ldxr_l(u,v,w) jit_new_node_www(jit_code_ldxr_l,u,v,w)
+# define jit_ldxi_l(u,v,w) jit_new_node_www(jit_code_ldxi_l,u,v,w)
+# define jit_ldxr(u,v,w) jit_ldxr_l(u,v,w)
+# define jit_ldxi(u,v,w) jit_ldxi_l(u,v,w)
+#endif
+ jit_code_ldxr_ui, jit_code_ldxi_ui,
+ jit_code_ldxr_l, jit_code_ldxi_l,
+
+#define jit_str_c(u,v) jit_new_node_ww(jit_code_str_c,u,v)
+#define jit_sti_c(u,v) jit_new_node_pw(jit_code_sti_c,u,v)
+ jit_code_str_c, jit_code_sti_c,
+#define jit_str_s(u,v) jit_new_node_ww(jit_code_str_s,u,v)
+#define jit_sti_s(u,v) jit_new_node_pw(jit_code_sti_s,u,v)
+ jit_code_str_s, jit_code_sti_s,
+#define jit_str_i(u,v) jit_new_node_ww(jit_code_str_i,u,v)
+#define jit_sti_i(u,v) jit_new_node_pw(jit_code_sti_i,u,v)
+ jit_code_str_i, jit_code_sti_i,
+#if __WORDSIZE == 32
+# define jit_str(u,v) jit_str_i(u,v)
+# define jit_sti(u,v) jit_sti_i(u,v)
+#else
+# define jit_str(u,v) jit_str_l(u,v)
+# define jit_sti(u,v) jit_sti_l(u,v)
+# define jit_str_l(u,v) jit_new_node_ww(jit_code_str_l,u,v)
+# define jit_sti_l(u,v) jit_new_node_pw(jit_code_sti_l,u,v)
+#endif
+ jit_code_str_l, jit_code_sti_l,
+
+#define jit_stxr_c(u,v,w) jit_new_node_www(jit_code_stxr_c,u,v,w)
+#define jit_stxi_c(u,v,w) jit_new_node_www(jit_code_stxi_c,u,v,w)
+ jit_code_stxr_c, jit_code_stxi_c,
+#define jit_stxr_s(u,v,w) jit_new_node_www(jit_code_stxr_s,u,v,w)
+#define jit_stxi_s(u,v,w) jit_new_node_www(jit_code_stxi_s,u,v,w)
+ jit_code_stxr_s, jit_code_stxi_s,
+#define jit_stxr_i(u,v,w) jit_new_node_www(jit_code_stxr_i,u,v,w)
+#define jit_stxi_i(u,v,w) jit_new_node_www(jit_code_stxi_i,u,v,w)
+ jit_code_stxr_i, jit_code_stxi_i,
+#if __WORDSIZE == 32
+# define jit_stxr(u,v,w) jit_stxr_i(u,v,w)
+# define jit_stxi(u,v,w) jit_stxi_i(u,v,w)
+#else
+# define jit_stxr(u,v,w) jit_stxr_l(u,v,w)
+# define jit_stxi(u,v,w) jit_stxi_l(u,v,w)
+# define jit_stxr_l(u,v,w) jit_new_node_www(jit_code_stxr_l,u,v,w)
+# define jit_stxi_l(u,v,w) jit_new_node_www(jit_code_stxi_l,u,v,w)
+#endif
+ jit_code_stxr_l, jit_code_stxi_l,
+
+#define jit_bltr(v,w) jit_new_node_pww(jit_code_bltr,NULL,v,w)
+#define jit_blti(v,w) jit_new_node_pww(jit_code_blti,NULL,v,w)
+ jit_code_bltr, jit_code_blti,
+#define jit_bltr_u(v,w) jit_new_node_pww(jit_code_bltr_u,NULL,v,w)
+#define jit_blti_u(v,w) jit_new_node_pww(jit_code_blti_u,NULL,v,w)
+ jit_code_bltr_u, jit_code_blti_u,
+#define jit_bler(v,w) jit_new_node_pww(jit_code_bler,NULL,v,w)
+#define jit_blei(v,w) jit_new_node_pww(jit_code_blei,NULL,v,w)
+ jit_code_bler, jit_code_blei,
+#define jit_bler_u(v,w) jit_new_node_pww(jit_code_bler_u,NULL,v,w)
+#define jit_blei_u(v,w) jit_new_node_pww(jit_code_blei_u,NULL,v,w)
+ jit_code_bler_u, jit_code_blei_u,
+#define jit_beqr(v,w) jit_new_node_pww(jit_code_beqr,NULL,v,w)
+#define jit_beqi(v,w) jit_new_node_pww(jit_code_beqi,NULL,v,w)
+ jit_code_beqr, jit_code_beqi,
+#define jit_bger(v,w) jit_new_node_pww(jit_code_bger,NULL,v,w)
+#define jit_bgei(v,w) jit_new_node_pww(jit_code_bgei,NULL,v,w)
+ jit_code_bger, jit_code_bgei,
+#define jit_bger_u(v,w) jit_new_node_pww(jit_code_bger_u,NULL,v,w)
+#define jit_bgei_u(v,w) jit_new_node_pww(jit_code_bgei_u,NULL,v,w)
+ jit_code_bger_u, jit_code_bgei_u,
+#define jit_bgtr(v,w) jit_new_node_pww(jit_code_bgtr,NULL,v,w)
+#define jit_bgti(v,w) jit_new_node_pww(jit_code_bgti,NULL,v,w)
+ jit_code_bgtr, jit_code_bgti,
+#define jit_bgtr_u(v,w) jit_new_node_pww(jit_code_bgtr_u,NULL,v,w)
+#define jit_bgti_u(v,w) jit_new_node_pww(jit_code_bgti_u,NULL,v,w)
+ jit_code_bgtr_u, jit_code_bgti_u,
+#define jit_bner(v,w) jit_new_node_pww(jit_code_bner,NULL,v,w)
+#define jit_bnei(v,w) jit_new_node_pww(jit_code_bnei,NULL,v,w)
+ jit_code_bner, jit_code_bnei,
+
+#define jit_bmsr(v,w) jit_new_node_pww(jit_code_bmsr,NULL,v,w)
+#define jit_bmsi(v,w) jit_new_node_pww(jit_code_bmsi,NULL,v,w)
+ jit_code_bmsr, jit_code_bmsi,
+#define jit_bmcr(v,w) jit_new_node_pww(jit_code_bmcr,NULL,v,w)
+#define jit_bmci(v,w) jit_new_node_pww(jit_code_bmci,NULL,v,w)
+ jit_code_bmcr, jit_code_bmci,
+
+#define jit_boaddr(v,w) jit_new_node_pww(jit_code_boaddr,NULL,v,w)
+#define jit_boaddi(v,w) jit_new_node_pww(jit_code_boaddi,NULL,v,w)
+ jit_code_boaddr, jit_code_boaddi,
+#define jit_boaddr_u(v,w) jit_new_node_pww(jit_code_boaddr_u,NULL,v,w)
+#define jit_boaddi_u(v,w) jit_new_node_pww(jit_code_boaddi_u,NULL,v,w)
+ jit_code_boaddr_u, jit_code_boaddi_u,
+#define jit_bxaddr(v,w) jit_new_node_pww(jit_code_bxaddr,NULL,v,w)
+#define jit_bxaddi(v,w) jit_new_node_pww(jit_code_bxaddi,NULL,v,w)
+ jit_code_bxaddr, jit_code_bxaddi,
+#define jit_bxaddr_u(v,w) jit_new_node_pww(jit_code_bxaddr_u,NULL,v,w)
+#define jit_bxaddi_u(v,w) jit_new_node_pww(jit_code_bxaddi_u,NULL,v,w)
+ jit_code_bxaddr_u, jit_code_bxaddi_u,
+#define jit_bosubr(v,w) jit_new_node_pww(jit_code_bosubr,NULL,v,w)
+#define jit_bosubi(v,w) jit_new_node_pww(jit_code_bosubi,NULL,v,w)
+ jit_code_bosubr, jit_code_bosubi,
+#define jit_bosubr_u(v,w) jit_new_node_pww(jit_code_bosubr_u,NULL,v,w)
+#define jit_bosubi_u(v,w) jit_new_node_pww(jit_code_bosubi_u,NULL,v,w)
+ jit_code_bosubr_u, jit_code_bosubi_u,
+#define jit_bxsubr(v,w) jit_new_node_pww(jit_code_bxsubr,NULL,v,w)
+#define jit_bxsubi(v,w) jit_new_node_pww(jit_code_bxsubi,NULL,v,w)
+ jit_code_bxsubr, jit_code_bxsubi,
+#define jit_bxsubr_u(v,w) jit_new_node_pww(jit_code_bxsubr_u,NULL,v,w)
+#define jit_bxsubi_u(v,w) jit_new_node_pww(jit_code_bxsubi_u,NULL,v,w)
+ jit_code_bxsubr_u, jit_code_bxsubi_u,
+
+#define jit_jmpr(u) jit_new_node_w(jit_code_jmpr,u)
+#define jit_jmpi() jit_new_node_p(jit_code_jmpi,NULL)
+ jit_code_jmpr, jit_code_jmpi,
+#define jit_callr(u) jit_new_node_w(jit_code_callr,u)
+#define jit_calli(u) jit_new_node_p(jit_code_calli,u)
+ jit_code_callr, jit_code_calli,
+
+#define jit_prepare() _jit_prepare(_jit)
+ jit_code_prepare,
+
+#define jit_pushargr_c(u) _jit_pushargr(_jit,u,jit_code_pushargr_c)
+#define jit_pushargi_c(u) _jit_pushargi(_jit,u,jit_code_pushargi_c)
+#define jit_pushargr_uc(u) _jit_pushargr(_jit,u,jit_code_pushargr_uc)
+#define jit_pushargi_uc(u) _jit_pushargi(_jit,u,jit_code_pushargi_uc)
+#define jit_pushargr_s(u) _jit_pushargr(_jit,u,jit_code_pushargr_s)
+#define jit_pushargi_s(u) _jit_pushargi(_jit,u,jit_code_pushargi_s)
+#define jit_pushargr_us(u) _jit_pushargr(_jit,u,jit_code_pushargr_us)
+#define jit_pushargi_us(u) _jit_pushargi(_jit,u,jit_code_pushargi_us)
+#define jit_pushargr_i(u) _jit_pushargr(_jit,u,jit_code_pushargr_i)
+#define jit_pushargi_i(u) _jit_pushargi(_jit,u,jit_code_pushargi_i)
+#if __WORDSIZE == 32
+# define jit_pushargr(u) jit_pushargr_i(u)
+# define jit_pushargi(u) jit_pushargi_i(u)
+#else
+# define jit_pushargr_ui(u) _jit_pushargr(_jit,u,jit_code_pushargr_ui)
+# define jit_pushargi_ui(u) _jit_pushargi(_jit,u,jit_code_pushargi_ui)
+# define jit_pushargr_l(u) _jit_pushargr(_jit,u,jit_code_pushargr_l)
+# define jit_pushargi_l(u) _jit_pushargi(_jit,u,jit_code_pushargi_l)
+# define jit_pushargr(u) jit_pushargr_l(u)
+# define jit_pushargi(u) jit_pushargi_l(u)
+#endif
+ jit_code_pushargr_c, jit_code_pushargi_c,
+ jit_code_pushargr_uc, jit_code_pushargi_uc,
+ jit_code_pushargr_s, jit_code_pushargi_s,
+ jit_code_pushargr_us, jit_code_pushargi_us,
+ jit_code_pushargr_i, jit_code_pushargi_i,
+ jit_code_pushargr_ui, jit_code_pushargi_ui,
+ jit_code_pushargr_l, jit_code_pushargi_l,
+#if __WORDSIZE == 32
+# define jit_code_pushargr jit_code_pushargr_i
+# define jit_code_pushargi jit_code_pushargi_i
+#else
+# define jit_code_pushargr jit_code_pushargr_l
+# define jit_code_pushargi jit_code_pushargi_l
+#endif
+
+#define jit_finishr(u) _jit_finishr(_jit,u)
+#define jit_finishi(u) _jit_finishi(_jit,u)
+ jit_code_finishr, jit_code_finishi,
+#define jit_ret() _jit_ret(_jit)
+ jit_code_ret,
+
+#define jit_retr_c(u) _jit_retr(_jit,u,jit_code_retr_c)
+#define jit_reti_c(u) _jit_reti(_jit,u,jit_code_reti_c)
+#define jit_retr_uc(u) _jit_retr(_jit,u,jit_code_retr_uc)
+#define jit_reti_uc(u) _jit_reti(_jit,u,jit_code_reti_uc)
+#define jit_retr_s(u) _jit_retr(_jit,u,jit_code_retr_s)
+#define jit_reti_s(u) _jit_reti(_jit,u,jit_code_reti_s)
+#define jit_retr_us(u) _jit_retr(_jit,u,jit_code_retr_us)
+#define jit_reti_us(u) _jit_reti(_jit,u,jit_code_reti_us)
+#define jit_retr_i(u) _jit_retr(_jit,u,jit_code_retr_i)
+#define jit_reti_i(u) _jit_reti(_jit,u,jit_code_reti_i)
+#if __WORDSIZE == 32
+# define jit_retr(u) jit_retr_i(u)
+# define jit_reti(u) jit_reti_i(u)
+#else
+# define jit_retr_ui(u) _jit_retr(_jit,u,jit_code_retr_ui)
+# define jit_reti_ui(u) _jit_reti(_jit,u,jit_code_reti_ui)
+# define jit_retr_l(u) _jit_retr(_jit,u,jit_code_retr_l)
+# define jit_reti_l(u) _jit_reti(_jit,u,jit_code_reti_l)
+# define jit_retr(u) jit_retr_l(u)
+# define jit_reti(u) jit_reti_l(u)
+#endif
+ jit_code_retr_c, jit_code_reti_c,
+ jit_code_retr_uc, jit_code_reti_uc,
+ jit_code_retr_s, jit_code_reti_s,
+ jit_code_retr_us, jit_code_reti_us,
+ jit_code_retr_i, jit_code_reti_i,
+ jit_code_retr_ui, jit_code_reti_ui,
+ jit_code_retr_l, jit_code_reti_l,
+#if __WORDSIZE == 32
+# define jit_code_retr jit_code_retr_i
+# define jit_code_reti jit_code_reti_i
+#else
+# define jit_code_retr jit_code_retr_l
+# define jit_code_reti jit_code_reti_l
+#endif
+
+#define jit_retval_c(u) _jit_retval_c(_jit,u)
+#define jit_retval_uc(u) _jit_retval_uc(_jit,u)
+#define jit_retval_s(u) _jit_retval_s(_jit,u)
+#define jit_retval_us(u) _jit_retval_us(_jit,u)
+#define jit_retval_i(u) _jit_retval_i(_jit,u)
+#if __WORDSIZE == 32
+# define jit_retval(u) jit_retval_i(u)
+#else
+# define jit_retval_ui(u) _jit_retval_ui(_jit,u)
+# define jit_retval_l(u) _jit_retval_l(_jit,u)
+# define jit_retval(u) jit_retval_l(u)
+#endif
+ jit_code_retval_c, jit_code_retval_uc,
+ jit_code_retval_s, jit_code_retval_us,
+ jit_code_retval_i, jit_code_retval_ui,
+ jit_code_retval_l,
+#if __WORDSIZE == 32
+# define jit_code_retval jit_code_retval_i
+#else
+# define jit_code_retval jit_code_retval_l
+#endif
+
+#define jit_epilog() _jit_epilog(_jit)
+ jit_code_epilog,
+
+#define jit_arg_f() _jit_arg_f(_jit)
+ jit_code_arg_f,
+#define jit_getarg_f(u,v) _jit_getarg_f(_jit,u,v)
+ jit_code_getarg_f,
+#define jit_putargr_f(u,v) _jit_putargr_f(_jit,u,v)
+#define jit_putargi_f(u,v) _jit_putargi_f(_jit,u,v)
+ jit_code_putargr_f, jit_code_putargi_f,
+
+#define jit_addr_f(u,v,w) jit_new_node_www(jit_code_addr_f,u,v,w)
+#define jit_addi_f(u,v,w) jit_new_node_wwf(jit_code_addi_f,u,v,w)
+ jit_code_addr_f, jit_code_addi_f,
+#define jit_subr_f(u,v,w) jit_new_node_www(jit_code_subr_f,u,v,w)
+#define jit_subi_f(u,v,w) jit_new_node_wwf(jit_code_subi_f,u,v,w)
+ jit_code_subr_f, jit_code_subi_f,
+#define jit_rsbr_f(u,v,w) jit_subr_f(u,w,v)
+#define jit_rsbi_f(u,v,w) jit_new_node_wwf(jit_code_rsbi_f,u,v,w)
+ jit_code_rsbi_f,
+#define jit_mulr_f(u,v,w) jit_new_node_www(jit_code_mulr_f,u,v,w)
+#define jit_muli_f(u,v,w) jit_new_node_wwf(jit_code_muli_f,u,v,w)
+ jit_code_mulr_f, jit_code_muli_f,
+#define jit_divr_f(u,v,w) jit_new_node_www(jit_code_divr_f,u,v,w)
+#define jit_divi_f(u,v,w) jit_new_node_wwf(jit_code_divi_f,u,v,w)
+ jit_code_divr_f, jit_code_divi_f,
+
+#define jit_negr_f(u,v) jit_new_node_ww(jit_code_negr_f,u,v)
+#define jit_negi_f(u,v) _jit_negi_f(_jit,u,v)
+ jit_code_negr_f, jit_code_negi_f,
+#define jit_absr_f(u,v) jit_new_node_ww(jit_code_absr_f,u,v)
+#define jit_absi_f(u,v) _jit_absi_f(_jit,u,v)
+ jit_code_absr_f, jit_code_absi_f,
+#define jit_sqrtr_f(u,v) jit_new_node_ww(jit_code_sqrtr_f,u,v)
+#define jit_sqrti_f(u,v) _jit_sqrti_f(_jit,u,v)
+ jit_code_sqrtr_f, jit_code_sqrti_f,
+
+#define jit_ltr_f(u,v,w) jit_new_node_www(jit_code_ltr_f,u,v,w)
+#define jit_lti_f(u,v,w) jit_new_node_wwf(jit_code_lti_f,u,v,w)
+ jit_code_ltr_f, jit_code_lti_f,
+#define jit_ler_f(u,v,w) jit_new_node_www(jit_code_ler_f,u,v,w)
+#define jit_lei_f(u,v,w) jit_new_node_wwf(jit_code_lei_f,u,v,w)
+ jit_code_ler_f, jit_code_lei_f,
+#define jit_eqr_f(u,v,w) jit_new_node_www(jit_code_eqr_f,u,v,w)
+#define jit_eqi_f(u,v,w) jit_new_node_wwf(jit_code_eqi_f,u,v,w)
+ jit_code_eqr_f, jit_code_eqi_f,
+#define jit_ger_f(u,v,w) jit_new_node_www(jit_code_ger_f,u,v,w)
+#define jit_gei_f(u,v,w) jit_new_node_wwf(jit_code_gei_f,u,v,w)
+ jit_code_ger_f, jit_code_gei_f,
+#define jit_gtr_f(u,v,w) jit_new_node_www(jit_code_gtr_f,u,v,w)
+#define jit_gti_f(u,v,w) jit_new_node_wwf(jit_code_gti_f,u,v,w)
+ jit_code_gtr_f, jit_code_gti_f,
+#define jit_ner_f(u,v,w) jit_new_node_www(jit_code_ner_f,u,v,w)
+#define jit_nei_f(u,v,w) jit_new_node_wwf(jit_code_nei_f,u,v,w)
+ jit_code_ner_f, jit_code_nei_f,
+#define jit_unltr_f(u,v,w) jit_new_node_www(jit_code_unltr_f,u,v,w)
+#define jit_unlti_f(u,v,w) jit_new_node_wwf(jit_code_unlti_f,u,v,w)
+ jit_code_unltr_f, jit_code_unlti_f,
+#define jit_unler_f(u,v,w) jit_new_node_www(jit_code_unler_f,u,v,w)
+#define jit_unlei_f(u,v,w) jit_new_node_wwf(jit_code_unlei_f,u,v,w)
+ jit_code_unler_f, jit_code_unlei_f,
+#define jit_uneqr_f(u,v,w) jit_new_node_www(jit_code_uneqr_f,u,v,w)
+#define jit_uneqi_f(u,v,w) jit_new_node_wwf(jit_code_uneqi_f,u,v,w)
+ jit_code_uneqr_f, jit_code_uneqi_f,
+#define jit_unger_f(u,v,w) jit_new_node_www(jit_code_unger_f,u,v,w)
+#define jit_ungei_f(u,v,w) jit_new_node_wwf(jit_code_ungei_f,u,v,w)
+ jit_code_unger_f, jit_code_ungei_f,
+#define jit_ungtr_f(u,v,w) jit_new_node_www(jit_code_ungtr_f,u,v,w)
+#define jit_ungti_f(u,v,w) jit_new_node_wwf(jit_code_ungti_f,u,v,w)
+ jit_code_ungtr_f, jit_code_ungti_f,
+#define jit_ltgtr_f(u,v,w) jit_new_node_www(jit_code_ltgtr_f,u,v,w)
+#define jit_ltgti_f(u,v,w) jit_new_node_wwf(jit_code_ltgti_f,u,v,w)
+ jit_code_ltgtr_f, jit_code_ltgti_f,
+#define jit_ordr_f(u,v,w) jit_new_node_www(jit_code_ordr_f,u,v,w)
+#define jit_ordi_f(u,v,w) jit_new_node_wwf(jit_code_ordi_f,u,v,w)
+ jit_code_ordr_f, jit_code_ordi_f,
+#define jit_unordr_f(u,v,w) jit_new_node_www(jit_code_unordr_f,u,v,w)
+#define jit_unordi_f(u,v,w) jit_new_node_wwf(jit_code_unordi_f,u,v,w)
+ jit_code_unordr_f, jit_code_unordi_f,
+
+#define jit_truncr_f_i(u,v) jit_new_node_ww(jit_code_truncr_f_i,u,v)
+ jit_code_truncr_f_i,
+#if __WORDSIZE == 32
+# define jit_truncr_f(u,v) jit_truncr_f_i(u,v)
+#else
+# define jit_truncr_f(u,v) jit_truncr_f_l(u,v)
+# define jit_truncr_f_l(u,v) jit_new_node_ww(jit_code_truncr_f_l,u,v)
+#endif
+ jit_code_truncr_f_l,
+#define jit_extr_f(u,v) jit_new_node_ww(jit_code_extr_f,u,v)
+#define jit_extr_d_f(u,v) jit_new_node_ww(jit_code_extr_d_f,u,v)
+ jit_code_extr_f, jit_code_extr_d_f,
+#define jit_movr_f(u,v) jit_new_node_ww(jit_code_movr_f,u,v)
+#define jit_movi_f(u,v) jit_new_node_wf(jit_code_movi_f,u,v)
+ jit_code_movr_f, jit_code_movi_f,
+
+#define jit_ldr_f(u,v) jit_new_node_ww(jit_code_ldr_f,u,v)
+#define jit_ldi_f(u,v) jit_new_node_wp(jit_code_ldi_f,u,v)
+ jit_code_ldr_f, jit_code_ldi_f,
+#define jit_ldxr_f(u,v,w) jit_new_node_www(jit_code_ldxr_f,u,v,w)
+#define jit_ldxi_f(u,v,w) jit_new_node_www(jit_code_ldxi_f,u,v,w)
+ jit_code_ldxr_f, jit_code_ldxi_f,
+#define jit_str_f(u,v) jit_new_node_ww(jit_code_str_f,u,v)
+#define jit_sti_f(u,v) jit_new_node_pw(jit_code_sti_f,u,v)
+ jit_code_str_f, jit_code_sti_f,
+#define jit_stxr_f(u,v,w) jit_new_node_www(jit_code_stxr_f,u,v,w)
+#define jit_stxi_f(u,v,w) jit_new_node_www(jit_code_stxi_f,u,v,w)
+ jit_code_stxr_f, jit_code_stxi_f,
+
+#define jit_bltr_f(v,w) jit_new_node_pww(jit_code_bltr_f,NULL,v,w)
+#define jit_blti_f(v,w) jit_new_node_pwf(jit_code_blti_f,NULL,v,w)
+ jit_code_bltr_f, jit_code_blti_f,
+#define jit_bler_f(v,w) jit_new_node_pww(jit_code_bler_f,NULL,v,w)
+#define jit_blei_f(v,w) jit_new_node_pwf(jit_code_blei_f,NULL,v,w)
+ jit_code_bler_f, jit_code_blei_f,
+#define jit_beqr_f(v,w) jit_new_node_pww(jit_code_beqr_f,NULL,v,w)
+#define jit_beqi_f(v,w) jit_new_node_pwf(jit_code_beqi_f,NULL,v,w)
+ jit_code_beqr_f, jit_code_beqi_f,
+#define jit_bger_f(v,w) jit_new_node_pww(jit_code_bger_f,NULL,v,w)
+#define jit_bgei_f(v,w) jit_new_node_pwf(jit_code_bgei_f,NULL,v,w)
+ jit_code_bger_f, jit_code_bgei_f,
+#define jit_bgtr_f(v,w) jit_new_node_pww(jit_code_bgtr_f,NULL,v,w)
+#define jit_bgti_f(v,w) jit_new_node_pwf(jit_code_bgti_f,NULL,v,w)
+ jit_code_bgtr_f, jit_code_bgti_f,
+#define jit_bner_f(v,w) jit_new_node_pww(jit_code_bner_f,NULL,v,w)
+#define jit_bnei_f(v,w) jit_new_node_pwf(jit_code_bnei_f,NULL,v,w)
+ jit_code_bner_f, jit_code_bnei_f,
+#define jit_bunltr_f(v,w) jit_new_node_pww(jit_code_bunltr_f,NULL,v,w)
+#define jit_bunlti_f(v,w) jit_new_node_pwf(jit_code_bunlti_f,NULL,v,w)
+ jit_code_bunltr_f, jit_code_bunlti_f,
+#define jit_bunler_f(v,w) jit_new_node_pww(jit_code_bunler_f,NULL,v,w)
+#define jit_bunlei_f(v,w) jit_new_node_pwf(jit_code_bunlei_f,NULL,v,w)
+ jit_code_bunler_f, jit_code_bunlei_f,
+#define jit_buneqr_f(v,w) jit_new_node_pww(jit_code_buneqr_f,NULL,v,w)
+#define jit_buneqi_f(v,w) jit_new_node_pwf(jit_code_buneqi_f,NULL,v,w)
+ jit_code_buneqr_f, jit_code_buneqi_f,
+#define jit_bunger_f(v,w) jit_new_node_pww(jit_code_bunger_f,NULL,v,w)
+#define jit_bungei_f(v,w) jit_new_node_pwf(jit_code_bungei_f,NULL,v,w)
+ jit_code_bunger_f, jit_code_bungei_f,
+#define jit_bungtr_f(v,w) jit_new_node_pww(jit_code_bungtr_f,NULL,v,w)
+#define jit_bungti_f(v,w) jit_new_node_pwf(jit_code_bungti_f,NULL,v,w)
+ jit_code_bungtr_f, jit_code_bungti_f,
+#define jit_bltgtr_f(v,w) jit_new_node_pww(jit_code_bltgtr_f,NULL,v,w)
+#define jit_bltgti_f(v,w) jit_new_node_pwf(jit_code_bltgti_f,NULL,v,w)
+ jit_code_bltgtr_f, jit_code_bltgti_f,
+#define jit_bordr_f(v,w) jit_new_node_pww(jit_code_bordr_f,NULL,v,w)
+#define jit_bordi_f(v,w) jit_new_node_pwf(jit_code_bordi_f,NULL,v,w)
+ jit_code_bordr_f, jit_code_bordi_f,
+#define jit_bunordr_f(v,w) jit_new_node_pww(jit_code_bunordr_f,NULL,v,w)
+#define jit_bunordi_f(v,w) jit_new_node_pwf(jit_code_bunordi_f,NULL,v,w)
+ jit_code_bunordr_f, jit_code_bunordi_f,
+
+#define jit_pushargr_f(u) _jit_pushargr_f(_jit,u)
+#define jit_pushargi_f(u) _jit_pushargi_f(_jit,u)
+ jit_code_pushargr_f, jit_code_pushargi_f,
+#define jit_retr_f(u) _jit_retr_f(_jit,u)
+#define jit_reti_f(u) _jit_reti_f(_jit,u)
+ jit_code_retr_f, jit_code_reti_f,
+#define jit_retval_f(u) _jit_retval_f(_jit,u)
+ jit_code_retval_f,
+
+#define jit_arg_d() _jit_arg_d(_jit)
+ jit_code_arg_d,
+#define jit_getarg_d(u,v) _jit_getarg_d(_jit,u,v)
+ jit_code_getarg_d,
+#define jit_putargr_d(u,v) _jit_putargr_d(_jit,u,v)
+#define jit_putargi_d(u,v) _jit_putargi_d(_jit,u,v)
+ jit_code_putargr_d, jit_code_putargi_d,
+
+#define jit_addr_d(u,v,w) jit_new_node_www(jit_code_addr_d,u,v,w)
+#define jit_addi_d(u,v,w) jit_new_node_wwd(jit_code_addi_d,u,v,w)
+ jit_code_addr_d, jit_code_addi_d,
+#define jit_subr_d(u,v,w) jit_new_node_www(jit_code_subr_d,u,v,w)
+#define jit_subi_d(u,v,w) jit_new_node_wwd(jit_code_subi_d,u,v,w)
+ jit_code_subr_d, jit_code_subi_d,
+#define jit_rsbr_d(u,v,w) jit_subr_d(u,w,v)
+#define jit_rsbi_d(u,v,w) jit_new_node_wwd(jit_code_rsbi_d,u,v,w)
+ jit_code_rsbi_d,
+#define jit_mulr_d(u,v,w) jit_new_node_www(jit_code_mulr_d,u,v,w)
+#define jit_muli_d(u,v,w) jit_new_node_wwd(jit_code_muli_d,u,v,w)
+ jit_code_mulr_d, jit_code_muli_d,
+#define jit_divr_d(u,v,w) jit_new_node_www(jit_code_divr_d,u,v,w)
+#define jit_divi_d(u,v,w) jit_new_node_wwd(jit_code_divi_d,u,v,w)
+ jit_code_divr_d, jit_code_divi_d,
+
+#define jit_negr_d(u,v) jit_new_node_ww(jit_code_negr_d,u,v)
+#define jit_negi_d(u,v) _jit_negi_d(_jit,u,v)
+ jit_code_negr_d, jit_code_negi_d,
+#define jit_absr_d(u,v) jit_new_node_ww(jit_code_absr_d,u,v)
+#define jit_absi_d(u,v) _jit_absi_d(_jit,u,v)
+ jit_code_absr_d, jit_code_absi_d,
+#define jit_sqrtr_d(u,v) jit_new_node_ww(jit_code_sqrtr_d,u,v)
+#define jit_sqrti_d(u,v) _jit_sqrti_d(_jit,u,v)
+ jit_code_sqrtr_d, jit_code_sqrti_d,
+
+#define jit_ltr_d(u,v,w) jit_new_node_www(jit_code_ltr_d,u,v,w)
+#define jit_lti_d(u,v,w) jit_new_node_wwd(jit_code_lti_d,u,v,w)
+ jit_code_ltr_d, jit_code_lti_d,
+#define jit_ler_d(u,v,w) jit_new_node_www(jit_code_ler_d,u,v,w)
+#define jit_lei_d(u,v,w) jit_new_node_wwd(jit_code_lei_d,u,v,w)
+ jit_code_ler_d, jit_code_lei_d,
+#define jit_eqr_d(u,v,w) jit_new_node_www(jit_code_eqr_d,u,v,w)
+#define jit_eqi_d(u,v,w) jit_new_node_wwd(jit_code_eqi_d,u,v,w)
+ jit_code_eqr_d, jit_code_eqi_d,
+#define jit_ger_d(u,v,w) jit_new_node_www(jit_code_ger_d,u,v,w)
+#define jit_gei_d(u,v,w) jit_new_node_wwd(jit_code_gei_d,u,v,w)
+ jit_code_ger_d, jit_code_gei_d,
+#define jit_gtr_d(u,v,w) jit_new_node_www(jit_code_gtr_d,u,v,w)
+#define jit_gti_d(u,v,w) jit_new_node_wwd(jit_code_gti_d,u,v,w)
+ jit_code_gtr_d, jit_code_gti_d,
+#define jit_ner_d(u,v,w) jit_new_node_www(jit_code_ner_d,u,v,w)
+#define jit_nei_d(u,v,w) jit_new_node_wwd(jit_code_nei_d,u,v,w)
+ jit_code_ner_d, jit_code_nei_d,
+#define jit_unltr_d(u,v,w) jit_new_node_www(jit_code_unltr_d,u,v,w)
+#define jit_unlti_d(u,v,w) jit_new_node_wwd(jit_code_unlti_d,u,v,w)
+ jit_code_unltr_d, jit_code_unlti_d,
+#define jit_unler_d(u,v,w) jit_new_node_www(jit_code_unler_d,u,v,w)
+#define jit_unlei_d(u,v,w) jit_new_node_wwd(jit_code_unlei_d,u,v,w)
+ jit_code_unler_d, jit_code_unlei_d,
+#define jit_uneqr_d(u,v,w) jit_new_node_www(jit_code_uneqr_d,u,v,w)
+#define jit_uneqi_d(u,v,w) jit_new_node_wwd(jit_code_uneqi_d,u,v,w)
+ jit_code_uneqr_d, jit_code_uneqi_d,
+#define jit_unger_d(u,v,w) jit_new_node_www(jit_code_unger_d,u,v,w)
+#define jit_ungei_d(u,v,w) jit_new_node_wwd(jit_code_ungei_d,u,v,w)
+ jit_code_unger_d, jit_code_ungei_d,
+#define jit_ungtr_d(u,v,w) jit_new_node_www(jit_code_ungtr_d,u,v,w)
+#define jit_ungti_d(u,v,w) jit_new_node_wwd(jit_code_ungti_d,u,v,w)
+ jit_code_ungtr_d, jit_code_ungti_d,
+#define jit_ltgtr_d(u,v,w) jit_new_node_www(jit_code_ltgtr_d,u,v,w)
+#define jit_ltgti_d(u,v,w) jit_new_node_wwd(jit_code_ltgti_d,u,v,w)
+ jit_code_ltgtr_d, jit_code_ltgti_d,
+#define jit_ordr_d(u,v,w) jit_new_node_www(jit_code_ordr_d,u,v,w)
+#define jit_ordi_d(u,v,w) jit_new_node_wwd(jit_code_ordi_d,u,v,w)
+ jit_code_ordr_d, jit_code_ordi_d,
+#define jit_unordr_d(u,v,w) jit_new_node_www(jit_code_unordr_d,u,v,w)
+#define jit_unordi_d(u,v,w) jit_new_node_wwd(jit_code_unordi_d,u,v,w)
+ jit_code_unordr_d, jit_code_unordi_d,
+
+#define jit_truncr_d_i(u,v) jit_new_node_ww(jit_code_truncr_d_i,u,v)
+ jit_code_truncr_d_i,
+#if __WORDSIZE == 32
+# define jit_truncr_d(u,v) jit_truncr_d_i(u,v)
+#else
+# define jit_truncr_d(u,v) jit_truncr_d_l(u,v)
+# define jit_truncr_d_l(u,v) jit_new_node_ww(jit_code_truncr_d_l,u,v)
+#endif
+ jit_code_truncr_d_l,
+#define jit_extr_d(u,v) jit_new_node_ww(jit_code_extr_d,u,v)
+#define jit_extr_f_d(u,v) jit_new_node_ww(jit_code_extr_f_d,u,v)
+ jit_code_extr_d, jit_code_extr_f_d,
+#define jit_movr_d(u,v) jit_new_node_ww(jit_code_movr_d,u,v)
+#define jit_movi_d(u,v) jit_new_node_wd(jit_code_movi_d,u,v)
+ jit_code_movr_d, jit_code_movi_d,
+
+#define jit_ldr_d(u,v) jit_new_node_ww(jit_code_ldr_d,u,v)
+#define jit_ldi_d(u,v) jit_new_node_wp(jit_code_ldi_d,u,v)
+ jit_code_ldr_d, jit_code_ldi_d,
+#define jit_ldxr_d(u,v,w) jit_new_node_www(jit_code_ldxr_d,u,v,w)
+#define jit_ldxi_d(u,v,w) jit_new_node_www(jit_code_ldxi_d,u,v,w)
+ jit_code_ldxr_d, jit_code_ldxi_d,
+#define jit_str_d(u,v) jit_new_node_ww(jit_code_str_d,u,v)
+#define jit_sti_d(u,v) jit_new_node_pw(jit_code_sti_d,u,v)
+ jit_code_str_d, jit_code_sti_d,
+#define jit_stxr_d(u,v,w) jit_new_node_www(jit_code_stxr_d,u,v,w)
+#define jit_stxi_d(u,v,w) jit_new_node_www(jit_code_stxi_d,u,v,w)
+ jit_code_stxr_d, jit_code_stxi_d,
+
+#define jit_bltr_d(v,w) jit_new_node_pww(jit_code_bltr_d,NULL,v,w)
+#define jit_blti_d(v,w) jit_new_node_pwd(jit_code_blti_d,NULL,v,w)
+ jit_code_bltr_d, jit_code_blti_d,
+#define jit_bler_d(v,w) jit_new_node_pww(jit_code_bler_d,NULL,v,w)
+#define jit_blei_d(v,w) jit_new_node_pwd(jit_code_blei_d,NULL,v,w)
+ jit_code_bler_d, jit_code_blei_d,
+#define jit_beqr_d(v,w) jit_new_node_pww(jit_code_beqr_d,NULL,v,w)
+#define jit_beqi_d(v,w) jit_new_node_pwd(jit_code_beqi_d,NULL,v,w)
+ jit_code_beqr_d, jit_code_beqi_d,
+#define jit_bger_d(v,w) jit_new_node_pww(jit_code_bger_d,NULL,v,w)
+#define jit_bgei_d(v,w) jit_new_node_pwd(jit_code_bgei_d,NULL,v,w)
+ jit_code_bger_d, jit_code_bgei_d,
+#define jit_bgtr_d(v,w) jit_new_node_pww(jit_code_bgtr_d,NULL,v,w)
+#define jit_bgti_d(v,w) jit_new_node_pwd(jit_code_bgti_d,NULL,v,w)
+ jit_code_bgtr_d, jit_code_bgti_d,
+#define jit_bner_d(v,w) jit_new_node_pww(jit_code_bner_d,NULL,v,w)
+#define jit_bnei_d(v,w) jit_new_node_pwd(jit_code_bnei_d,NULL,v,w)
+ jit_code_bner_d, jit_code_bnei_d,
+#define jit_bunltr_d(v,w) jit_new_node_pww(jit_code_bunltr_d,NULL,v,w)
+#define jit_bunlti_d(v,w) jit_new_node_pwd(jit_code_bunlti_d,NULL,v,w)
+ jit_code_bunltr_d, jit_code_bunlti_d,
+#define jit_bunler_d(v,w) jit_new_node_pww(jit_code_bunler_d,NULL,v,w)
+#define jit_bunlei_d(v,w) jit_new_node_pwd(jit_code_bunlei_d,NULL,v,w)
+ jit_code_bunler_d, jit_code_bunlei_d,
+#define jit_buneqr_d(v,w) jit_new_node_pww(jit_code_buneqr_d,NULL,v,w)
+#define jit_buneqi_d(v,w) jit_new_node_pwd(jit_code_buneqi_d,NULL,v,w)
+ jit_code_buneqr_d, jit_code_buneqi_d,
+#define jit_bunger_d(v,w) jit_new_node_pww(jit_code_bunger_d,NULL,v,w)
+#define jit_bungei_d(v,w) jit_new_node_pwd(jit_code_bungei_d,NULL,v,w)
+ jit_code_bunger_d, jit_code_bungei_d,
+#define jit_bungtr_d(v,w) jit_new_node_pww(jit_code_bungtr_d,NULL,v,w)
+#define jit_bungti_d(v,w) jit_new_node_pwd(jit_code_bungti_d,NULL,v,w)
+ jit_code_bungtr_d, jit_code_bungti_d,
+#define jit_bltgtr_d(v,w) jit_new_node_pww(jit_code_bltgtr_d,NULL,v,w)
+#define jit_bltgti_d(v,w) jit_new_node_pwd(jit_code_bltgti_d,NULL,v,w)
+ jit_code_bltgtr_d, jit_code_bltgti_d,
+#define jit_bordr_d(v,w) jit_new_node_pww(jit_code_bordr_d,NULL,v,w)
+#define jit_bordi_d(v,w) jit_new_node_pwd(jit_code_bordi_d,NULL,v,w)
+ jit_code_bordr_d, jit_code_bordi_d,
+#define jit_bunordr_d(v,w) jit_new_node_pww(jit_code_bunordr_d,NULL,v,w)
+#define jit_bunordi_d(v,w) jit_new_node_pwd(jit_code_bunordi_d,NULL,v,w)
+ jit_code_bunordr_d, jit_code_bunordi_d,
+
+#define jit_pushargr_d(u) _jit_pushargr_d(_jit,u)
+#define jit_pushargi_d(u) _jit_pushargi_d(_jit,u)
+ jit_code_pushargr_d, jit_code_pushargi_d,
+#define jit_retr_d(u) _jit_retr_d(_jit,u)
+#define jit_reti_d(u) _jit_reti_d(_jit,u)
+ jit_code_retr_d, jit_code_reti_d,
+#define jit_retval_d(u) _jit_retval_d(_jit,u)
+ jit_code_retval_d,
+
+ /* w* -> f|d */
+#define jit_movr_w_f(u, v) jit_new_node_ww(jit_code_movr_w_f, u, v)
+#define jit_movi_w_f(u,v) jit_new_node_ww(jit_code_movi_w_f, u, v)
+ jit_code_movr_w_f, jit_code_movi_w_f,
+#define jit_movr_ww_d(u, v, w) jit_new_node_www(jit_code_movr_ww_d, u, v, w)
+#define jit_movi_ww_d(u,v, w) jit_new_node_www(jit_code_movi_ww_d, u, v, w)
+ jit_code_movr_ww_d, jit_code_movi_ww_d,
+
+ /* w -> d */
+#define jit_movr_w_d(u, v) jit_new_node_ww(jit_code_movr_w_d, u, v)
+#define jit_movi_w_d(u,v) jit_new_node_ww(jit_code_movi_w_d, u, v)
+ jit_code_movr_w_d, jit_code_movi_w_d,
+
+ /* f|d -> w* */
+#define jit_movr_f_w(u, v) jit_new_node_ww(jit_code_movr_f_w, u, v)
+#define jit_movi_f_w(u, v) jit_new_node_wf(jit_code_movi_f_w, u, v)
+ jit_code_movr_f_w, jit_code_movi_f_w,
+#define jit_movr_d_ww(u, v, w) jit_new_node_www(jit_code_movr_d_ww, u, v, w)
+#define jit_movi_d_ww(u, v, w) jit_new_node_wwd(jit_code_movi_d_ww, u, v, w)
+ jit_code_movr_d_ww, jit_code_movi_d_ww,
+
+ /* d -> w */
+#define jit_movr_d_w(u, v) jit_new_node_ww(jit_code_movr_d_w, u, v)
+#define jit_movi_d_w(u, v) jit_new_node_wd(jit_code_movi_d_w, u, v)
+ jit_code_movr_d_w, jit_code_movi_d_w,
+
+#define jit_clor(u,v) jit_new_node_ww(jit_code_clor,u,v)
+#define jit_cloi(u,v) jit_new_node_ww(jit_code_cloi,u,v)
+ jit_code_clor, jit_code_cloi,
+
+#define jit_clzr(u,v) jit_new_node_ww(jit_code_clzr,u,v)
+#define jit_clzi(u,v) jit_new_node_ww(jit_code_clzi,u,v)
+ jit_code_clzr, jit_code_clzi,
+
+#define jit_ctor(u,v) jit_new_node_ww(jit_code_ctor,u,v)
+#define jit_ctoi(u,v) jit_new_node_ww(jit_code_ctoi,u,v)
+ jit_code_ctor, jit_code_ctoi,
+#define jit_ctzr(u,v) jit_new_node_ww(jit_code_ctzr,u,v)
+#define jit_ctzi(u,v) jit_new_node_ww(jit_code_ctzi,u,v)
+ jit_code_ctzr, jit_code_ctzi,
+
+#define jit_rbitr(u,v) jit_new_node_ww(jit_code_rbitr,u,v)
+#define jit_rbiti(u,v) jit_new_node_ww(jit_code_rbiti,u,v)
+ jit_code_rbitr, jit_code_rbiti,
+
+#define jit_popcntr(u,v) jit_new_node_ww(jit_code_popcntr,u,v)
+#define jit_popcnti(u,v) jit_new_node_ww(jit_code_popcnti,u,v)
+ jit_code_popcntr, jit_code_popcnti,
+
+#define jit_lrotr(u,v,w) jit_new_node_www(jit_code_lrotr,u,v,w)
+#define jit_lroti(u,v,w) jit_new_node_www(jit_code_lroti,u,v,w)
+ jit_code_lrotr, jit_code_lroti,
+#define jit_rrotr(u,v,w) jit_new_node_www(jit_code_rrotr,u,v,w)
+#define jit_rroti(u,v,w) jit_new_node_www(jit_code_rroti,u,v,w)
+ jit_code_rrotr, jit_code_rroti,
+
+#define jit_extr(u,v,w,x) jit_new_node_wwq(jit_code_extr, u, v, w, x)
+#define jit_exti(u,v,w,x) jit_new_node_wwq(jit_code_exti, u, v, w, x)
+ jit_code_extr, jit_code_exti,
+#define jit_extr_u(u,v,w,x) jit_new_node_wwq(jit_code_extr_u, u, v, w, x)
+#define jit_exti_u(u,v,w,x) jit_new_node_wwq(jit_code_exti_u, u, v, w, x)
+ jit_code_extr_u, jit_code_exti_u,
+#define jit_depr(u,v,w,x) jit_new_node_wwq(jit_code_depr, u, v, w, x)
+#define jit_depi(u,v,w,x) jit_new_node_wwq(jit_code_depi, u, v, w, x)
+ jit_code_depr, jit_code_depi,
+
+#define jit_qlshr(l,h,v,w) jit_new_node_qww(jit_code_qlshr,l,h,v,w)
+#define jit_qlshi(l,h,v,w) jit_new_node_qww(jit_code_qlshi,l,h,v,w)
+ jit_code_qlshr, jit_code_qlshi,
+#define jit_qlshr_u(l,h,v,w) jit_new_node_qww(jit_code_qlshr_u,l,h,v,w)
+#define jit_qlshi_u(l,h,v,w) jit_new_node_qww(jit_code_qlshi_u,l,h,v,w)
+ jit_code_qlshr_u, jit_code_qlshi_u,
+#define jit_qrshr(l,h,v,w) jit_new_node_qww(jit_code_qrshr,l,h,v,w)
+#define jit_qrshi(l,h,v,w) jit_new_node_qww(jit_code_qrshi,l,h,v,w)
+ jit_code_qrshr, jit_code_qrshi,
+#define jit_qrshr_u(l,h,v,w) jit_new_node_qww(jit_code_qrshr_u,l,h,v,w)
+#define jit_qrshi_u(l,h,v,w) jit_new_node_qww(jit_code_qrshi_u,l,h,v,w)
+ jit_code_qrshr_u, jit_code_qrshi_u,
+
+#define jit_unldr(u,v,w) jit_new_node_www(jit_code_unldr, u, v, w)
+#define jit_unldi(u,v,w) jit_new_node_www(jit_code_unldi, u, v, w)
+ jit_code_unldr, jit_code_unldi,
+#define jit_unldr_u(u,v,w) jit_new_node_www(jit_code_unldr_u, u, v, w)
+#define jit_unldi_u(u,v,w) jit_new_node_www(jit_code_unldi_u, u, v, w)
+ jit_code_unldr_u, jit_code_unldi_u,
+#define jit_unstr(u,v,w) jit_new_node_www(jit_code_unstr, u, v, w)
+#define jit_unsti(u,v,w) jit_new_node_www(jit_code_unsti, u, v, w)
+ jit_code_unstr, jit_code_unsti,
+
+#define jit_unldr_x(u,v,w) jit_new_node_www(jit_code_unldr_x, u, v, w)
+#define jit_unldi_x(u,v,w) jit_new_node_www(jit_code_unldi_x, u, v, w)
+ jit_code_unldr_x, jit_code_unldi_x,
+#define jit_unstr_x(u,v,w) jit_new_node_www(jit_code_unstr_x, u, v, w)
+#define jit_unsti_x(u,v,w) jit_new_node_www(jit_code_unsti_x, u, v, w)
+ jit_code_unstr_x, jit_code_unsti_x,
+
+#define jit_fmar_f(u,v,w,x) jit_new_node_wqw(jit_code_fmar_f, u, v, w, x)
+#define jit_fmai_f(u,v,w,x) _jit_fmai_f(_jit, u, v, w, x)
+ jit_code_fmar_f, jit_code_fmai_f,
+#define jit_fmsr_f(u,v,w,x) jit_new_node_wqw(jit_code_fmsr_f, u, v, w, x)
+#define jit_fmsi_f(u,v,w,x) _jit_fmsi_f(_jit, u, v, w, x)
+ jit_code_fmsr_f, jit_code_fmsi_f,
+#define jit_fmar_d(u,v,w,x) jit_new_node_wqw(jit_code_fmar_d, u, v, w, x)
+#define jit_fmai_d(u,v,w,x) _jit_fmai_d(_jit, u, v, w, x)
+ jit_code_fmar_d, jit_code_fmai_d,
+#define jit_fmsr_d(u,v,w,x) jit_new_node_wqw(jit_code_fmsr_d, u, v, w, x)
+#define jit_fmsi_d(u,v,w,x) _jit_fmsi_d(_jit, u, v, w, x)
+ jit_code_fmsr_d, jit_code_fmsi_d,
+
+#define jit_fnmar_f(u,v,w,x) jit_new_node_wqw(jit_code_fnmar_f, u, v, w, x)
+#define jit_fnmai_f(u,v,w,x) _jit_fnmai_f(_jit, u, v, w, x)
+ jit_code_fnmar_f, jit_code_fnmai_f,
+#define jit_fnmsr_f(u,v,w,x) jit_new_node_wqw(jit_code_fnmsr_f, u, v, w, x)
+#define jit_fnmsi_f(u,v,w,x) _jit_fnmsi_f(_jit, u, v, w, x)
+ jit_code_fnmsr_f, jit_code_fnmsi_f,
+#define jit_fnmar_d(u,v,w,x) jit_new_node_wqw(jit_code_fnmar_d, u, v, w, x)
+#define jit_fnmai_d(u,v,w,x) _jit_fnmai_d(_jit, u, v, w, x)
+ jit_code_fnmar_d, jit_code_fnmai_d,
+#define jit_fnmsr_d(u,v,w,x) jit_new_node_wqw(jit_code_fnmsr_d, u, v, w, x)
+#define jit_fnmsi_d(u,v,w,x) _jit_fnmsi_d(_jit, u, v, w, x)
+ jit_code_fnmsr_d, jit_code_fnmsi_d,
+
+#define jit_hmulr(u,v,w) jit_new_node_www(jit_code_hmulr,u,v,w)
+#define jit_hmuli(u,v,w) jit_new_node_www(jit_code_hmuli,u,v,w)
+ jit_code_hmulr, jit_code_hmuli,
+#define jit_hmulr_u(u,v,w) jit_new_node_www(jit_code_hmulr_u,u,v,w)
+#define jit_hmuli_u(u,v,w) jit_new_node_www(jit_code_hmuli_u,u,v,w)
+ jit_code_hmulr_u, jit_code_hmuli_u,
+
+#define jit_ldxbr_c(u,v,w) jit_new_node_www(jit_code_ldxbr_c,u,v,w)
+#define jit_ldxbi_c(u,v,w) jit_new_node_www(jit_code_ldxbi_c,u,v,w)
+ jit_code_ldxbr_c, jit_code_ldxbi_c,
+#define jit_ldxar_c(u,v,w) jit_new_node_www(jit_code_ldxar_c,u,v,w)
+#define jit_ldxai_c(u,v,w) jit_new_node_www(jit_code_ldxai_c,u,v,w)
+ jit_code_ldxar_c, jit_code_ldxai_c,
+#define jit_ldxbr_uc(u,v,w) jit_new_node_www(jit_code_ldxbr_uc,u,v,w)
+#define jit_ldxbi_uc(u,v,w) jit_new_node_www(jit_code_ldxbi_uc,u,v,w)
+ jit_code_ldxbr_uc, jit_code_ldxbi_uc,
+#define jit_ldxar_uc(u,v,w) jit_new_node_www(jit_code_ldxar_uc,u,v,w)
+#define jit_ldxai_uc(u,v,w) jit_new_node_www(jit_code_ldxai_uc,u,v,w)
+ jit_code_ldxar_uc, jit_code_ldxai_uc,
+#define jit_ldxbr_s(u,v,w) jit_new_node_www(jit_code_ldxbr_s,u,v,w)
+#define jit_ldxbi_s(u,v,w) jit_new_node_www(jit_code_ldxbi_s,u,v,w)
+ jit_code_ldxbr_s, jit_code_ldxbi_s,
+#define jit_ldxar_s(u,v,w) jit_new_node_www(jit_code_ldxar_s,u,v,w)
+#define jit_ldxai_s(u,v,w) jit_new_node_www(jit_code_ldxai_s,u,v,w)
+ jit_code_ldxar_s, jit_code_ldxai_s,
+#define jit_ldxbr_us(u,v,w) jit_new_node_www(jit_code_ldxbr_us,u,v,w)
+#define jit_ldxbi_us(u,v,w) jit_new_node_www(jit_code_ldxbi_us,u,v,w)
+ jit_code_ldxbr_us, jit_code_ldxbi_us,
+#define jit_ldxar_us(u,v,w) jit_new_node_www(jit_code_ldxar_us,u,v,w)
+#define jit_ldxai_us(u,v,w) jit_new_node_www(jit_code_ldxai_us,u,v,w)
+ jit_code_ldxar_us, jit_code_ldxai_us,
+#define jit_ldxbr_i(u,v,w) jit_new_node_www(jit_code_ldxbr_i,u,v,w)
+#define jit_ldxbi_i(u,v,w) jit_new_node_www(jit_code_ldxbi_i,u,v,w)
+ jit_code_ldxbr_i, jit_code_ldxbi_i,
+#define jit_ldxar_i(u,v,w) jit_new_node_www(jit_code_ldxar_i,u,v,w)
+#define jit_ldxai_i(u,v,w) jit_new_node_www(jit_code_ldxai_i,u,v,w)
+ jit_code_ldxar_i, jit_code_ldxai_i,
+#if __WORDSIZE == 32
+# define jit_ldxbr(u,v,w) jit_ldxbr_i(u,v,w)
+# define jit_ldxbi(u,v,w) jit_ldxbi_i(u,v,w)
+# define jit_ldxar(u,v,w) jit_ldxar_i(u,v,w)
+# define jit_ldxai(u,v,w) jit_ldxai_i(u,v,w)
+#else
+# define jit_ldxbr(u,v,w) jit_ldxbr_l(u,v,w)
+# define jit_ldxbi(u,v,w) jit_ldxbi_l(u,v,w)
+# define jit_ldxar(u,v,w) jit_ldxar_l(u,v,w)
+# define jit_ldxai(u,v,w) jit_ldxai_l(u,v,w)
+# define jit_ldxbr_ui(u,v,w) jit_new_node_www(jit_code_ldxbr_ui,u,v,w)
+# define jit_ldxbi_ui(u,v,w) jit_new_node_www(jit_code_ldxbi_ui,u,v,w)
+# define jit_ldxar_ui(u,v,w) jit_new_node_www(jit_code_ldxar_ui,u,v,w)
+# define jit_ldxai_ui(u,v,w) jit_new_node_www(jit_code_ldxai_ui,u,v,w)
+# define jit_ldxbr_l(u,v,w) jit_new_node_www(jit_code_ldxbr_l,u,v,w)
+# define jit_ldxbi_l(u,v,w) jit_new_node_www(jit_code_ldxbi_l,u,v,w)
+# define jit_ldxar_l(u,v,w) jit_new_node_www(jit_code_ldxar_l,u,v,w)
+# define jit_ldxai_l(u,v,w) jit_new_node_www(jit_code_ldxai_l,u,v,w)
+#endif
+ jit_code_ldxbr_ui, jit_code_ldxbi_ui,
+ jit_code_ldxar_ui, jit_code_ldxai_ui,
+ jit_code_ldxbr_l, jit_code_ldxbi_l,
+ jit_code_ldxar_l, jit_code_ldxai_l,
+# define jit_ldxbr_f(u,v,w) jit_new_node_www(jit_code_ldxbr_f,u,v,w)
+# define jit_ldxbi_f(u,v,w) jit_new_node_www(jit_code_ldxbi_f,u,v,w)
+# define jit_ldxar_f(u,v,w) jit_new_node_www(jit_code_ldxar_f,u,v,w)
+# define jit_ldxai_f(u,v,w) jit_new_node_www(jit_code_ldxai_f,u,v,w)
+ jit_code_ldxbr_f, jit_code_ldxbi_f,
+ jit_code_ldxar_f, jit_code_ldxai_f,
+# define jit_ldxbr_d(u,v,w) jit_new_node_www(jit_code_ldxbr_d,u,v,w)
+# define jit_ldxbi_d(u,v,w) jit_new_node_www(jit_code_ldxbi_d,u,v,w)
+# define jit_ldxar_d(u,v,w) jit_new_node_www(jit_code_ldxar_d,u,v,w)
+# define jit_ldxai_d(u,v,w) jit_new_node_www(jit_code_ldxai_d,u,v,w)
+ jit_code_ldxbr_d, jit_code_ldxbi_d,
+ jit_code_ldxar_d, jit_code_ldxai_d,
+#define jit_stxbr_c(u,v,w) jit_new_node_www(jit_code_stxbr_c,u,v,w)
+#define jit_stxbi_c(u,v,w) jit_new_node_www(jit_code_stxbi_c,u,v,w)
+#define jit_stxar_c(u,v,w) jit_new_node_www(jit_code_stxar_c,u,v,w)
+#define jit_stxai_c(u,v,w) jit_new_node_www(jit_code_stxai_c,u,v,w)
+ jit_code_stxbr_c, jit_code_stxbi_c,
+ jit_code_stxar_c, jit_code_stxai_c,
+#define jit_stxbr_s(u,v,w) jit_new_node_www(jit_code_stxbr_s,u,v,w)
+#define jit_stxbi_s(u,v,w) jit_new_node_www(jit_code_stxbi_s,u,v,w)
+#define jit_stxar_s(u,v,w) jit_new_node_www(jit_code_stxar_s,u,v,w)
+#define jit_stxai_s(u,v,w) jit_new_node_www(jit_code_stxai_s,u,v,w)
+ jit_code_stxbr_s, jit_code_stxbi_s,
+ jit_code_stxar_s, jit_code_stxai_s,
+#define jit_stxbr_i(u,v,w) jit_new_node_www(jit_code_stxbr_i,u,v,w)
+#define jit_stxbi_i(u,v,w) jit_new_node_www(jit_code_stxbi_i,u,v,w)
+#define jit_stxar_i(u,v,w) jit_new_node_www(jit_code_stxar_i,u,v,w)
+#define jit_stxai_i(u,v,w) jit_new_node_www(jit_code_stxai_i,u,v,w)
+ jit_code_stxbr_i, jit_code_stxbi_i,
+ jit_code_stxar_i, jit_code_stxai_i,
+#if __WORDSIZE == 32
+# define jit_stxbr(u,v,w) jit_stxbr_i(u,v,w)
+# define jit_stxbi(u,v,w) jit_stxbi_i(u,v,w)
+# define jit_stxar(u,v,w) jit_stxar_i(u,v,w)
+# define jit_stxai(u,v,w) jit_stxai_i(u,v,w)
+#else
+# define jit_stxbr(u,v,w) jit_stxbr_l(u,v,w)
+# define jit_stxbi(u,v,w) jit_stxbi_l(u,v,w)
+# define jit_stxar(u,v,w) jit_stxar_l(u,v,w)
+# define jit_stxai(u,v,w) jit_stxai_l(u,v,w)
+# define jit_stxbr_l(u,v,w) jit_new_node_www(jit_code_stxbr_l,u,v,w)
+# define jit_stxbi_l(u,v,w) jit_new_node_www(jit_code_stxbi_l,u,v,w)
+# define jit_stxar_l(u,v,w) jit_new_node_www(jit_code_stxar_l,u,v,w)
+# define jit_stxai_l(u,v,w) jit_new_node_www(jit_code_stxai_l,u,v,w)
+#endif
+ jit_code_stxbr_l, jit_code_stxbi_l,
+ jit_code_stxar_l, jit_code_stxai_l,
+# define jit_stxbr_f(u,v,w) jit_new_node_www(jit_code_stxbr_f,u,v,w)
+# define jit_stxbi_f(u,v,w) jit_new_node_www(jit_code_stxbi_f,u,v,w)
+# define jit_stxar_f(u,v,w) jit_new_node_www(jit_code_stxar_f,u,v,w)
+# define jit_stxai_f(u,v,w) jit_new_node_www(jit_code_stxai_f,u,v,w)
+ jit_code_stxbr_f, jit_code_stxbi_f,
+ jit_code_stxar_f, jit_code_stxai_f,
+# define jit_stxbr_d(u,v,w) jit_new_node_www(jit_code_stxbr_d,u,v,w)
+# define jit_stxbi_d(u,v,w) jit_new_node_www(jit_code_stxbi_d,u,v,w)
+# define jit_stxar_d(u,v,w) jit_new_node_www(jit_code_stxar_d,u,v,w)
+# define jit_stxai_d(u,v,w) jit_new_node_www(jit_code_stxai_d,u,v,w)
+ jit_code_stxbr_d, jit_code_stxbi_d,
+ jit_code_stxar_d, jit_code_stxai_d,
+
+ jit_code_last_code
+} jit_code_t;
+
+typedef void* (*jit_alloc_func_ptr) (size_t);
+typedef void* (*jit_realloc_func_ptr) (void*, size_t);
+typedef void (*jit_free_func_ptr) (void*);
+
+/*
+ * Prototypes
+ */
+extern void init_jit_with_debug(const char*,FILE*);
+extern void init_jit(const char*);
+extern void finish_jit(void);
+
+extern jit_state_t *jit_new_state(void);
+#define jit_clear_state() _jit_clear_state(_jit)
+extern void _jit_clear_state(jit_state_t*);
+#define jit_destroy_state() _jit_destroy_state(_jit)
+extern void _jit_destroy_state(jit_state_t*);
+
+#define jit_address(node) _jit_address(_jit, node)
+extern jit_pointer_t _jit_address(jit_state_t*, jit_node_t*);
+extern jit_node_t *_jit_name(jit_state_t*, const char*);
+extern jit_node_t *_jit_note(jit_state_t*, const char*, int);
+extern jit_node_t *_jit_label(jit_state_t*);
+extern jit_node_t *_jit_forward(jit_state_t*);
+extern jit_node_t *_jit_indirect(jit_state_t*);
+extern void _jit_link(jit_state_t*, jit_node_t*);
+#define jit_forward_p(u) _jit_forward_p(_jit,u)
+extern jit_bool_t _jit_forward_p(jit_state_t*,jit_node_t*);
+#define jit_indirect_p(u) _jit_indirect_p(_jit,u)
+extern jit_bool_t _jit_indirect_p(jit_state_t*,jit_node_t*);
+#define jit_target_p(u) _jit_target_p(_jit,u)
+extern jit_bool_t _jit_target_p(jit_state_t*,jit_node_t*);
+
+extern void _jit_prolog(jit_state_t*);
+
+extern jit_int32_t _jit_allocai(jit_state_t*, jit_int32_t);
+extern void _jit_allocar(jit_state_t*, jit_int32_t, jit_int32_t);
+extern void _jit_ellipsis(jit_state_t*);
+
+extern jit_node_t *_jit_arg(jit_state_t*, jit_code_t);
+
+extern void _jit_getarg_c(jit_state_t*, jit_gpr_t, jit_node_t*);
+extern void _jit_getarg_uc(jit_state_t*, jit_gpr_t, jit_node_t*);
+extern void _jit_getarg_s(jit_state_t*, jit_gpr_t, jit_node_t*);
+extern void _jit_getarg_us(jit_state_t*, jit_gpr_t, jit_node_t*);
+extern void _jit_getarg_i(jit_state_t*, jit_gpr_t, jit_node_t*);
+#if __WORDSIZE == 64
+extern void _jit_getarg_ui(jit_state_t*, jit_gpr_t, jit_node_t*);
+extern void _jit_getarg_l(jit_state_t*, jit_gpr_t, jit_node_t*);
+#endif
+
+extern void _jit_putargr(jit_state_t*, jit_gpr_t, jit_node_t*, jit_code_t);
+extern void _jit_putargi(jit_state_t*, jit_word_t, jit_node_t*, jit_code_t);
+
+extern void _jit_prepare(jit_state_t*);
+extern void _jit_ellipsis(jit_state_t*);
+extern void _jit_va_push(jit_state_t*, jit_gpr_t);
+
+extern void _jit_pushargr(jit_state_t*, jit_gpr_t, jit_code_t);
+extern void _jit_pushargi(jit_state_t*, jit_word_t, jit_code_t);
+
+extern void _jit_finishr(jit_state_t*, jit_gpr_t);
+extern jit_node_t *_jit_finishi(jit_state_t*, jit_pointer_t);
+extern void _jit_ret(jit_state_t*);
+
+extern void _jit_retr(jit_state_t*, jit_gpr_t, jit_code_t);
+extern void _jit_reti(jit_state_t*, jit_word_t, jit_code_t);
+
+extern void _jit_retval_c(jit_state_t*, jit_gpr_t);
+extern void _jit_retval_uc(jit_state_t*, jit_gpr_t);
+extern void _jit_retval_s(jit_state_t*, jit_gpr_t);
+extern void _jit_retval_us(jit_state_t*, jit_gpr_t);
+extern void _jit_retval_i(jit_state_t*, jit_gpr_t);
+#if __WORDSIZE == 64
+extern void _jit_retval_ui(jit_state_t*, jit_gpr_t);
+extern void _jit_retval_l(jit_state_t*, jit_gpr_t);
+#endif
+
+extern void _jit_epilog(jit_state_t*);
+
+#define jit_patch(u) _jit_patch(_jit,u)
+extern void _jit_patch(jit_state_t*, jit_node_t*);
+#define jit_patch_at(u,v) _jit_patch_at(_jit,u,v)
+extern void _jit_patch_at(jit_state_t*, jit_node_t*, jit_node_t*);
+#define jit_patch_abs(u,v) _jit_patch_abs(_jit,u,v)
+extern void _jit_patch_abs(jit_state_t*, jit_node_t*, jit_pointer_t);
+#define jit_realize() _jit_realize(_jit)
+extern void _jit_realize(jit_state_t*);
+#define jit_get_code(u) _jit_get_code(_jit,u)
+extern jit_pointer_t _jit_get_code(jit_state_t*, jit_word_t*);
+#define jit_set_code(u,v) _jit_set_code(_jit,u,v)
+extern void _jit_set_code(jit_state_t*, jit_pointer_t, jit_word_t);
+#define jit_get_data(u,v) _jit_get_data(_jit,u,v)
+extern jit_pointer_t _jit_get_data(jit_state_t*, jit_word_t*, jit_word_t*);
+#define jit_set_data(u,v,w) _jit_set_data(_jit,u,v,w)
+extern void _jit_set_data(jit_state_t*, jit_pointer_t, jit_word_t, jit_word_t);
+#define jit_frame(u) _jit_frame(_jit,u)
+extern void _jit_frame(jit_state_t*, jit_int32_t);
+#define jit_tramp(u) _jit_tramp(_jit,u)
+extern void _jit_tramp(jit_state_t*, jit_int32_t);
+#define jit_emit() _jit_emit(_jit)
+extern jit_pointer_t _jit_emit(jit_state_t*);
+#define jit_unprotect() _jit_unprotect(_jit)
+extern void _jit_unprotect(jit_state_t*);
+#define jit_protect() _jit_protect(_jit)
+extern void _jit_protect(jit_state_t*);
+
+#define jit_print() _jit_print(_jit)
+extern void _jit_print(jit_state_t*);
+
+extern jit_node_t *_jit_arg_f(jit_state_t*);
+extern void _jit_getarg_f(jit_state_t*, jit_fpr_t, jit_node_t*);
+extern void _jit_putargr_f(jit_state_t*, jit_fpr_t, jit_node_t*);
+extern void _jit_putargi_f(jit_state_t*, jit_float32_t, jit_node_t*);
+extern void _jit_pushargr_f(jit_state_t*, jit_fpr_t);
+extern void _jit_pushargi_f(jit_state_t*, jit_float32_t);
+extern void _jit_retr_f(jit_state_t*, jit_fpr_t);
+extern void _jit_reti_f(jit_state_t*, jit_float32_t);
+extern void _jit_retval_f(jit_state_t*, jit_fpr_t);
+extern void _jit_negi_f(jit_state_t*, jit_fpr_t, jit_float32_t);
+extern void _jit_absi_f(jit_state_t*, jit_fpr_t, jit_float32_t);
+extern void _jit_sqrti_f(jit_state_t*, jit_fpr_t, jit_float32_t);
+extern void _jit_fmai_f(jit_state_t*,
+ jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float32_t);
+extern void _jit_fmsi_f(jit_state_t*,
+ jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float32_t);
+extern void _jit_fnmai_f(jit_state_t*,
+ jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float32_t);
+extern void _jit_fnmsi_f(jit_state_t*,
+ jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float32_t);
+
+extern jit_node_t *_jit_arg_d(jit_state_t*);
+extern void _jit_getarg_d(jit_state_t*, jit_fpr_t, jit_node_t*);
+extern void _jit_putargr_d(jit_state_t*, jit_fpr_t, jit_node_t*);
+extern void _jit_putargi_d(jit_state_t*, jit_float64_t, jit_node_t*);
+extern void _jit_pushargr_d(jit_state_t*, jit_fpr_t);
+extern void _jit_pushargi_d(jit_state_t*, jit_float64_t);
+extern void _jit_retr_d(jit_state_t*, jit_fpr_t);
+extern void _jit_reti_d(jit_state_t*, jit_float64_t);
+extern void _jit_retval_d(jit_state_t*, jit_fpr_t);
+extern void _jit_negi_d(jit_state_t*, jit_fpr_t, jit_float64_t);
+extern void _jit_absi_d(jit_state_t*, jit_fpr_t, jit_float64_t);
+extern void _jit_sqrti_d(jit_state_t*, jit_fpr_t, jit_float64_t);
+extern void _jit_fmai_d(jit_state_t*,
+ jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float64_t);
+extern void _jit_fmsi_d(jit_state_t*,
+ jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float64_t);
+extern void _jit_fnmai_d(jit_state_t*,
+ jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float64_t);
+extern void _jit_fnmsi_d(jit_state_t*,
+ jit_fpr_t, jit_fpr_t, jit_fpr_t, jit_float64_t);
+
+#define jit_get_reg(s) _jit_get_reg(_jit,s)
+extern jit_int32_t _jit_get_reg(jit_state_t*, jit_int32_t);
+
+#define jit_unget_reg(r) _jit_unget_reg(_jit,r)
+extern void _jit_unget_reg(jit_state_t*, jit_int32_t);
+
+#define jit_new_node(c) _jit_new_node(_jit,c)
+extern jit_node_t *_jit_new_node(jit_state_t*, jit_code_t);
+#define jit_new_node_w(c,u) _jit_new_node_w(_jit,c,u)
+extern jit_node_t *_jit_new_node_w(jit_state_t*, jit_code_t,
+ jit_word_t);
+#define jit_new_node_f(c,u) _jit_new_node_f(_jit,c,u)
+extern jit_node_t *_jit_new_node_f(jit_state_t*, jit_code_t,
+ jit_float32_t);
+#define jit_new_node_d(c,u) _jit_new_node_d(_jit,c,u)
+extern jit_node_t *_jit_new_node_d(jit_state_t*, jit_code_t,
+ jit_float64_t);
+#define jit_new_node_p(c,u) _jit_new_node_p(_jit,c,u)
+extern jit_node_t *_jit_new_node_p(jit_state_t*, jit_code_t,
+ jit_pointer_t);
+#define jit_new_node_ww(c,u,v) _jit_new_node_ww(_jit,c,u,v)
+extern jit_node_t *_jit_new_node_ww(jit_state_t*,jit_code_t,
+ jit_word_t, jit_word_t);
+#define jit_new_node_wp(c,u,v) _jit_new_node_wp(_jit,c,u,v)
+extern jit_node_t *_jit_new_node_wp(jit_state_t*,jit_code_t,
+ jit_word_t, jit_pointer_t);
+#define jit_new_node_fp(c,u,v) _jit_new_node_fp(_jit,c,u,v)
+extern jit_node_t *_jit_new_node_fp(jit_state_t*,jit_code_t,
+ jit_float32_t, jit_pointer_t);
+#define jit_new_node_dp(c,u,v) _jit_new_node_dp(_jit,c,u,v)
+extern jit_node_t *_jit_new_node_dp(jit_state_t*,jit_code_t,
+ jit_float64_t, jit_pointer_t);
+#define jit_new_node_pw(c,u,v) _jit_new_node_pw(_jit,c,u,v)
+extern jit_node_t *_jit_new_node_pw(jit_state_t*,jit_code_t,
+ jit_pointer_t, jit_word_t);
+#define jit_new_node_wf(c,u,v) _jit_new_node_wf(_jit,c,u,v)
+extern jit_node_t *_jit_new_node_wf(jit_state_t*, jit_code_t,
+ jit_word_t, jit_float32_t);
+#define jit_new_node_wd(c,u,v) _jit_new_node_wd(_jit,c,u,v)
+extern jit_node_t *_jit_new_node_wd(jit_state_t*, jit_code_t,
+ jit_word_t, jit_float64_t);
+#define jit_new_node_www(c,u,v,w) _jit_new_node_www(_jit,c,u,v,w)
+extern jit_node_t *_jit_new_node_www(jit_state_t*, jit_code_t,
+ jit_word_t, jit_word_t, jit_word_t);
+#define jit_new_node_qww(c,l,h,v,w) _jit_new_node_qww(_jit,c,l,h,v,w)
+extern jit_node_t *_jit_new_node_qww(jit_state_t*, jit_code_t,
+ jit_int32_t, jit_int32_t,
+ jit_word_t, jit_word_t);
+#define jit_new_node_wqw(c,u,l,h,w) _jit_new_node_wqw(_jit,c,u,l,h,w)
+extern jit_node_t *_jit_new_node_wqw(jit_state_t*, jit_code_t,
+ jit_word_t, jit_int32_t,
+ jit_int32_t, jit_word_t);
+#define jit_new_node_wwq(c,u,v,l,h) _jit_new_node_wwq(_jit,c,u,v,l,h)
+extern jit_node_t *_jit_new_node_wwq(jit_state_t*, jit_code_t,
+ jit_word_t, jit_word_t,
+ jit_int32_t, jit_int32_t);
+#define jit_new_node_wwf(c,u,v,w) _jit_new_node_wwf(_jit,c,u,v,w)
+extern jit_node_t *_jit_new_node_wwf(jit_state_t*, jit_code_t,
+ jit_word_t, jit_word_t, jit_float32_t);
+#define jit_new_node_wqf(c,u,l,h,w) _jit_new_node_wqf(_jit,c,u,l,h,w)
+extern jit_node_t *_jit_new_node_wqf(jit_state_t*, jit_code_t,
+ jit_word_t, jit_int32_t,
+ jit_int32_t, jit_float32_t);
+#define jit_new_node_wwd(c,u,v,w) _jit_new_node_wwd(_jit,c,u,v,w)
+extern jit_node_t *_jit_new_node_wwd(jit_state_t*, jit_code_t,
+ jit_word_t, jit_word_t, jit_float64_t);
+#define jit_new_node_wqd(c,u,l,h,w) _jit_new_node_wqd(_jit,c,u,l,h,w)
+extern jit_node_t *_jit_new_node_wqd(jit_state_t*, jit_code_t,
+ jit_word_t, jit_int32_t,
+ jit_int32_t, jit_float64_t);
+#define jit_new_node_pww(c,u,v,w) _jit_new_node_pww(_jit,c,u,v,w)
+extern jit_node_t *_jit_new_node_pww(jit_state_t*, jit_code_t,
+ jit_pointer_t, jit_word_t, jit_word_t);
+#define jit_new_node_pwf(c,u,v,w) _jit_new_node_pwf(_jit,c,u,v,w)
+extern jit_node_t *_jit_new_node_pwf(jit_state_t*, jit_code_t,
+ jit_pointer_t, jit_word_t, jit_float32_t);
+#define jit_new_node_pwd(c,u,v,w) _jit_new_node_pwd(_jit,c,u,v,w)
+extern jit_node_t *_jit_new_node_pwd(jit_state_t*, jit_code_t,
+ jit_pointer_t, jit_word_t, jit_float64_t);
+
+#define jit_arg_register_p(u) _jit_arg_register_p(_jit,u)
+extern jit_bool_t _jit_arg_register_p(jit_state_t*, jit_node_t*);
+#define jit_callee_save_p(u) _jit_callee_save_p(_jit,u)
+extern jit_bool_t _jit_callee_save_p(jit_state_t*, jit_int32_t);
+#define jit_pointer_p(u) _jit_pointer_p(_jit,u)
+extern jit_bool_t _jit_pointer_p(jit_state_t*,jit_pointer_t);
+
+#define jit_get_note(n,u,v,w) _jit_get_note(_jit,n,u,v,w)
+extern jit_bool_t _jit_get_note(jit_state_t*,jit_pointer_t,char**,char**,int*);
+
+#define jit_disassemble() _jit_disassemble(_jit)
+extern void _jit_disassemble(jit_state_t*);
+
+extern void jit_set_memory_functions(jit_alloc_func_ptr,
+ jit_realloc_func_ptr,
+ jit_free_func_ptr);
+extern void jit_get_memory_functions(jit_alloc_func_ptr*,
+ jit_realloc_func_ptr*,
+ jit_free_func_ptr*);
+#endif /* _lightning_h */
--- /dev/null
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/*
+ * Copyright (C) 2019-2021 Paul Cercueil <paul@crapouillou.net>
+ */
+
+#ifndef __LIGHTREC_CONFIG_H__
+#define __LIGHTREC_CONFIG_H__
+
+#define ENABLE_THREADED_COMPILER LIGHTREC_ENABLE_THREADED_COMPILER
+#define ENABLE_FIRST_PASS 1
+#define ENABLE_DISASSEMBLER LIGHTREC_ENABLE_DISASSEMBLER
+#define ENABLE_CODE_BUFFER 1
+
+#define HAS_DEFAULT_ELM 1
+
+#define OPT_REMOVE_DIV_BY_ZERO_SEQ 1
+#define OPT_REPLACE_MEMSET LIGHTREC_NO_DEBUG
+#define OPT_DETECT_IMPOSSIBLE_BRANCHES 1
+#define OPT_HANDLE_LOAD_DELAYS 1
+#define OPT_TRANSFORM_OPS 1
+#define OPT_LOCAL_BRANCHES 1
+#define OPT_SWITCH_DELAY_SLOTS 1
+#define OPT_FLAG_IO 1
+#define OPT_FLAG_MULT_DIV LIGHTREC_NO_DEBUG
+#define OPT_EARLY_UNLOAD 1
+#define OPT_PRELOAD_PC 1
+
+#define OPT_SH4_USE_GBR 0
+
+#endif /* __LIGHTREC_CONFIG_H__ */
+
--- /dev/null
+#include <mman.h>
#define PSE_PAD_WARN 80
-typedef struct
+typedef struct PadDataS
{
// controller type - fill it withe predefined values above
unsigned char controllerType;
-$(error This file is unmaintained. Please use the libretro fork: https://github.com/libretro/pcsx_rearmed)
+LOCAL_PATH := $(call my-dir)
+include $(CLEAR_VARS)
+
+$(shell cd "$(LOCAL_PATH)" && ((git describe --always || echo) | sed -e 's/.*/#define REV "\0"/' > ../include/revision.h_))
+$(shell cd "$(LOCAL_PATH)" && (diff -q ../include/revision.h_ ../include/revision.h > /dev/null 2>&1 || cp ../include/revision.h_ ../include/revision.h))
+$(shell cd "$(LOCAL_PATH)" && (rm ../include/revision.h_))
+
+USE_LIBRETRO_VFS ?= 0
+USE_ASYNC_CDROM ?= 1
+USE_RTHREADS ?= 0
+NDRC_THREAD ?= 1
+
+ROOT_DIR := $(LOCAL_PATH)/..
+CORE_DIR := $(ROOT_DIR)/libpcsxcore
+SPU_DIR := $(ROOT_DIR)/plugins/dfsound
+GPU_DIR := $(ROOT_DIR)/plugins/gpulib
+CDR_DIR := $(ROOT_DIR)/plugins/cdrcimg
+FRONTEND_DIR := $(ROOT_DIR)/frontend
+NEON_DIR := $(ROOT_DIR)/plugins/gpu_neon
+UNAI_DIR := $(ROOT_DIR)/plugins/gpu_unai
+PEOPS_DIR := $(ROOT_DIR)/plugins/dfxvideo
+DYNAREC_DIR := $(ROOT_DIR)/libpcsxcore/new_dynarec
+DEPS_DIR := $(ROOT_DIR)/deps
+LIBRETRO_COMMON := $(DEPS_DIR)/libretro-common
+EXTRA_INCLUDES :=
+COREFLAGS :=
+SOURCES_ASM :=
+
+# core
+SOURCES_C := $(CORE_DIR)/cdriso.c \
+ $(CORE_DIR)/cdrom.c \
+ $(CORE_DIR)/cdrom-async.c \
+ $(CORE_DIR)/cheat.c \
+ $(CORE_DIR)/database.c \
+ $(CORE_DIR)/decode_xa.c \
+ $(CORE_DIR)/mdec.c \
+ $(CORE_DIR)/misc.c \
+ $(CORE_DIR)/plugins.c \
+ $(CORE_DIR)/ppf.c \
+ $(CORE_DIR)/psxbios.c \
+ $(CORE_DIR)/psxcommon.c \
+ $(CORE_DIR)/psxcounters.c \
+ $(CORE_DIR)/psxdma.c \
+ $(CORE_DIR)/psxevents.c \
+ $(CORE_DIR)/psxhw.c \
+ $(CORE_DIR)/psxinterpreter.c \
+ $(CORE_DIR)/psxmem.c \
+ $(CORE_DIR)/r3000a.c \
+ $(CORE_DIR)/sio.c \
+ $(CORE_DIR)/spu.c \
+ $(CORE_DIR)/gpu.c \
+ $(CORE_DIR)/gte.c \
+ $(CORE_DIR)/gte_nf.c \
+ $(CORE_DIR)/gte_divider.c
+
+# spu
+SOURCES_C += $(SPU_DIR)/dma.c \
+ $(SPU_DIR)/freeze.c \
+ $(SPU_DIR)/registers.c \
+ $(SPU_DIR)/spu.c \
+ $(SPU_DIR)/out.c \
+ $(SPU_DIR)/nullsnd.c
+
+# gpu
+SOURCES_C += $(GPU_DIR)/gpu.c \
+ $(GPU_DIR)/prim.c \
+ $(GPU_DIR)/vout_pl.c
+
+# cdrcimg
+SOURCES_C += $(CDR_DIR)/cdrcimg.c
+
+# frontend
+SOURCES_C += $(FRONTEND_DIR)/main.c \
+ $(FRONTEND_DIR)/plugin.c \
+ $(FRONTEND_DIR)/cspace.c \
+ $(FRONTEND_DIR)/libretro.c
+
+# libchdr
+LCHDR = $(DEPS_DIR)/libchdr
+LCHDR_LZMA = $(LCHDR)/deps/lzma-24.05
+LCHDR_ZSTD = $(LCHDR)/deps/zstd-1.5.6/lib
+SOURCES_C += \
+ $(LCHDR)/src/libchdr_bitstream.c \
+ $(LCHDR)/src/libchdr_cdrom.c \
+ $(LCHDR)/src/libchdr_chd.c \
+ $(LCHDR)/src/libchdr_flac.c \
+ $(LCHDR)/src/libchdr_huffman.c \
+ $(LCHDR_LZMA)/src/Alloc.c \
+ $(LCHDR_LZMA)/src/CpuArch.c \
+ $(LCHDR_LZMA)/src/Delta.c \
+ $(LCHDR_LZMA)/src/LzFind.c \
+ $(LCHDR_LZMA)/src/LzmaDec.c \
+ $(LCHDR_LZMA)/src/LzmaEnc.c \
+ $(LCHDR_LZMA)/src/Sort.c \
+ $(LCHDR_ZSTD)/common/entropy_common.c \
+ $(LCHDR_ZSTD)/common/error_private.c \
+ $(LCHDR_ZSTD)/common/fse_decompress.c \
+ $(LCHDR_ZSTD)/common/xxhash.c \
+ $(LCHDR_ZSTD)/common/zstd_common.c \
+ $(LCHDR_ZSTD)/decompress/huf_decompress.c \
+ $(LCHDR_ZSTD)/decompress/zstd_ddict.c \
+ $(LCHDR_ZSTD)/decompress/zstd_decompress_block.c \
+ $(LCHDR_ZSTD)/decompress/zstd_decompress.c
+EXTRA_INCLUDES += $(LCHDR)/include $(LCHDR_LZMA)/include $(LCHDR_ZSTD)
+COREFLAGS += -DHAVE_CHD -DZ7_ST -DZSTD_DISABLE_ASM
+ifeq (,$(call gte,$(APP_PLATFORM_LEVEL),18))
+ifneq ($(TARGET_ARCH_ABI),arm64-v8a)
+# HACK
+COREFLAGS += -Dgetauxval=0*
+endif
+endif
+
+COREFLAGS += -ffast-math -DHAVE_LIBRETRO -DNO_FRONTEND -DANDROID -DREARMED
+COREFLAGS += -DP_HAVE_MMAP=1 -DP_HAVE_PTHREAD=1 -DP_HAVE_POSIX_MEMALIGN=1
+
+ifeq ($(USE_LIBRETRO_VFS),1)
+SOURCES_C += \
+ $(LIBRETRO_COMMON)/compat/compat_posix_string.c \
+ $(LIBRETRO_COMMON)/compat/fopen_utf8.c \
+ $(LIBRETRO_COMMON)/encodings/compat_strl.c \
+ $(LIBRETRO_COMMON)/encodings/encoding_utf.c \
+ $(LIBRETRO_COMMON)/file/file_path.c \
+ $(LIBRETRO_COMMON)/streams/file_stream.c \
+ $(LIBRETRO_COMMON)/streams/file_stream_transforms.c \
+ $(LIBRETRO_COMMON)/string/stdstring.c \
+ $(LIBRETRO_COMMON)/time/rtime.c \
+ $(LIBRETRO_COMMON)/vfs/vfs_implementation.c
+COREFLAGS += -DUSE_LIBRETRO_VFS
+endif
+EXTRA_INCLUDES += $(LIBRETRO_COMMON)/include
+
+USE_RTHREADS=0
+HAVE_ARI64=0
+HAVE_LIGHTREC=0
+LIGHTREC_CUSTOM_MAP=0
+LIGHTREC_THREADED_COMPILER=0
+HAVE_GPU_NEON=0
+ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
+ HAVE_ARI64=1
+ HAVE_GPU_NEON=1
+else ifeq ($(TARGET_ARCH_ABI),armeabi)
+ HAVE_ARI64=1
+else ifeq ($(TARGET_ARCH_ABI),arm64-v8a)
+ HAVE_ARI64=1
+ HAVE_GPU_NEON=1
+else ifeq ($(TARGET_ARCH_ABI),x86_64)
+ HAVE_LIGHTREC=1
+ HAVE_GPU_NEON=1
+else ifeq ($(TARGET_ARCH_ABI),x86)
+ HAVE_LIGHTREC=1
+ HAVE_GPU_NEON=1
+else
+ COREFLAGS += -DDRC_DISABLE
+endif
+ COREFLAGS += -DLIGHTREC_CUSTOM_MAP=$(LIGHTREC_CUSTOM_MAP)
+ COREFLAGS += -DLIGHTREC_ENABLE_THREADED_COMPILER=$(LIGHTREC_THREADED_COMPILER)
+ COREFLAGS += -DLIGHTREC_ENABLE_DISASSEMBLER=$(or $(LIGHTREC_DEBUG),0)
+ COREFLAGS += -DLIGHTREC_NO_DEBUG=$(if $(LIGHTREC_DEBUG),0,1)
+
+ifeq ($(HAVE_ARI64),1)
+ SOURCES_C += $(DYNAREC_DIR)/new_dynarec.c \
+ $(DYNAREC_DIR)/pcsxmem.c
+ ifeq ($(TARGET_ARCH_ABI),arm64-v8a)
+ SOURCES_ASM += $(DYNAREC_DIR)/linkage_arm64.S
+ else
+ SOURCES_ASM += $(CORE_DIR)/gte_arm.S \
+ $(SPU_DIR)/arm_utils.S \
+ $(DYNAREC_DIR)/linkage_arm.S
+ endif
+ ifeq ($(NDRC_THREAD),1)
+ COREFLAGS += -DNDRC_THREAD
+ USE_RTHREADS := 1
+ endif
+endif
+ SOURCES_C += $(DYNAREC_DIR)/emu_if.c
+
+ifeq ($(HAVE_LIGHTREC),1)
+ COREFLAGS += -DLIGHTREC -DLIGHTREC_STATIC -DLIGHTREC_CODE_INV=0
+ EXTRA_INCLUDES += $(DEPS_DIR)/lightning/include \
+ $(DEPS_DIR)/lightrec \
+ $(DEPS_DIR)/lightrec/tlsf \
+ $(ROOT_DIR)/include/lightning \
+ $(ROOT_DIR)/include/lightrec
+ SOURCES_C += $(DEPS_DIR)/lightrec/blockcache.c \
+ $(DEPS_DIR)/lightrec/constprop.c \
+ $(DEPS_DIR)/lightrec/disassembler.c \
+ $(DEPS_DIR)/lightrec/emitter.c \
+ $(DEPS_DIR)/lightrec/interpreter.c \
+ $(DEPS_DIR)/lightrec/lightrec.c \
+ $(DEPS_DIR)/lightrec/memmanager.c \
+ $(DEPS_DIR)/lightrec/optimizer.c \
+ $(DEPS_DIR)/lightrec/regcache.c \
+ $(DEPS_DIR)/lightrec/recompiler.c \
+ $(DEPS_DIR)/lightrec/reaper.c \
+ $(DEPS_DIR)/lightrec/tlsf/tlsf.c
+ SOURCES_C += $(DEPS_DIR)/lightning/lib/jit_disasm.c \
+ $(DEPS_DIR)/lightning/lib/jit_memory.c \
+ $(DEPS_DIR)/lightning/lib/jit_names.c \
+ $(DEPS_DIR)/lightning/lib/jit_note.c \
+ $(DEPS_DIR)/lightning/lib/jit_print.c \
+ $(DEPS_DIR)/lightning/lib/jit_size.c \
+ $(DEPS_DIR)/lightning/lib/lightning.c
+ SOURCES_C += $(CORE_DIR)/lightrec/plugin.c
+ifeq ($(LIGHTREC_CUSTOM_MAP),1)
+ SOURCES_C += $(CORE_DIR)/lightrec/mem.c
+endif
+endif
+
+
+ifeq ($(HAVE_GPU_NEON),1)
+ COREFLAGS += -DNEON_BUILD -DTEXTURE_CACHE_4BPP -DTEXTURE_CACHE_8BPP -DGPU_NEON
+ ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
+ SOURCES_ASM += $(NEON_DIR)/psx_gpu/psx_gpu_arm_neon.S
+ else
+ COREFLAGS += -DSIMD_BUILD
+ SOURCES_C += $(NEON_DIR)/psx_gpu/psx_gpu_simd.c
+ endif
+ SOURCES_C += $(NEON_DIR)/psx_gpu_if.c
+else ifeq ($(TARGET_ARCH_ABI),armeabi)
+ COREFLAGS += -DUSE_GPULIB=1 -DGPU_UNAI
+ COREFLAGS += -DHAVE_bgr555_to_rgb565
+ SOURCES_ASM += $(UNAI_DIR)/gpu_arm.S \
+ $(FRONTEND_DIR)/cspace_arm.S
+ SOURCES_C += $(UNAI_DIR)/gpulib_if.cpp
+else
+ COREFLAGS += -fno-strict-aliasing -DGPU_PEOPS
+ SOURCES_C += $(PEOPS_DIR)/gpulib_if.c
+endif
+
+ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
+ COREFLAGS += -DHAVE_bgr555_to_rgb565 -DHAVE_bgr888_to_x
+ SOURCES_ASM += $(CORE_DIR)/gte_neon.S \
+ $(FRONTEND_DIR)/cspace_neon.S
+endif
+
+ifeq ($(USE_ASYNC_CDROM),1)
+COREFLAGS += -DUSE_ASYNC_CDROM
+USE_RTHREADS := 1
+endif
+ifeq ($(USE_RTHREADS),1)
+SOURCES_C += \
+ $(FRONTEND_DIR)/libretro-rthreads.c \
+ $(LIBRETRO_COMMON)/features/features_cpu.c
+COREFLAGS += -DHAVE_RTHREADS
+endif
+
+GIT_VERSION := " $(shell git rev-parse --short HEAD || echo unknown)"
+ifneq ($(GIT_VERSION)," unknown")
+ COREFLAGS += -DGIT_VERSION=\"$(GIT_VERSION)\"
+endif
+
+LOCAL_MODULE := retro
+LOCAL_SRC_FILES := $(SOURCES_C) $(SOURCES_ASM)
+LOCAL_CFLAGS := $(COREFLAGS)
+LOCAL_C_INCLUDES := $(ROOT_DIR)/include
+LOCAL_C_INCLUDES += $(DEPS_DIR)/crypto
+LOCAL_C_INCLUDES += $(EXTRA_INCLUDES)
+LOCAL_LDFLAGS := -Wl,-version-script=$(FRONTEND_DIR)/libretro-version-script
+LOCAL_LDFLAGS += -Wl,--script=$(FRONTEND_DIR)/libretro-extern.T
+LOCAL_LDFLAGS += -Wl,--gc-sections
+LOCAL_LDLIBS := -lz -llog
+LOCAL_ARM_MODE := arm
+
+ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
+ LOCAL_ARM_NEON := true
+endif
+
+include $(BUILD_SHARED_LIBRARY)
***************************************************************************/
#include "psxcommon.h"
-#include "plugins.h"
#include "cdrom.h"
#include "cdriso.h"
#include "ppf.h"
+#include <errno.h>
+#include <zlib.h>
+#ifdef HAVE_CHD
+#include <libchdr/chd.h>
+#endif
+
#ifdef _WIN32
#define strcasecmp _stricmp
#else
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
-#if P_HAVE_PTHREAD
-#include <pthread.h>
-#include <sys/time.h>
#endif
-#endif
-#include <errno.h>
-#include <zlib.h>
-#ifdef HAVE_CHD
-#include "libchdr/chd.h"
+#ifdef USE_LIBRETRO_VFS
+#include <streams/file_stream_transforms.h>
+#undef fseeko
+#undef ftello
+#undef rewind
+#define ftello rftell
+#define fseeko rfseek
+#define rewind(f_) rfseek(f_, 0, SEEK_SET)
#endif
#define OFF_T_MSB ((off_t)1 << (sizeof(off_t) * 8 - 1))
unsigned int cdrIsoMultidiskSelect;
static FILE *cdHandle = NULL;
-static FILE *cddaHandle = NULL;
static FILE *subHandle = NULL;
static boolean subChanMixed = FALSE;
static boolean multifile = FALSE;
static unsigned char cdbuffer[CD_FRAMESIZE_RAW];
-static unsigned char subbuffer[SUB_FRAMESIZE];
static boolean cddaBigEndian = FALSE;
/* Frame offset into CD image where pregap data would be found if it was there.
#endif
static int (*cdimg_read_func)(FILE *f, unsigned int base, void *dest, int sector);
-static int (*cdimg_read_sub_func)(FILE *f, int sector);
+static int (*cdimg_read_sub_func)(FILE *f, int sector, void *dest);
-char* CALLBACK CDR__getDriveLetter(void);
-long CALLBACK CDR__configure(void);
-long CALLBACK CDR__test(void);
-void CALLBACK CDR__about(void);
-long CALLBACK CDR__setfilename(char *filename);
-long CALLBACK CDR__prefetch(u8 m, u8 s, u8 f);
-
-static void DecodeRawSubData(void);
+static void DecodeRawSubData(unsigned char *subbuffer);
struct trackinfo {
enum {DATA=1, CDDA} type;
static struct trackinfo ti[MAXTRACKS];
// get a sector from a msf-array
-static unsigned int msf2sec(char *msf) {
+static unsigned int msf2sec(const void *msf_) {
+ const unsigned char *msf = msf_;
return ((msf[0] * 60 + msf[1]) * 75) + msf[2];
}
-static void sec2msf(unsigned int s, char *msf) {
+static void sec2msf(unsigned int s, void *msf_) {
+ unsigned char *msf = msf_;
msf[0] = s / 75 / 60;
s = s - msf[0] * 75 * 60;
msf[1] = s / 75;
return size;
}
+// Some c libs like newlib default buffering to just 1k which is less than
+// cd sector size which is bad for performance.
+// Note that NULL setvbuf() is implemented differently by different libs
+// (newlib mallocs a buffer of given size and glibc ignores size and uses it's own).
+static void set_static_stdio_buffer(FILE *f)
+{
+#if !defined(fopen) // no stdio redirect
+ static char buf[16 * 1024];
+ if (f) {
+ int r;
+ errno = 0;
+ r = setvbuf(f, buf, _IOFBF, sizeof(buf));
+ if (r)
+ SysPrintf("cdriso: setvbuf %d %d\n", r, errno);
+ }
+#endif
+}
+
// this function tries to get the .toc file of the given .bin
// the necessary data is put into the ti (trackinformation)-array
static int parsetoc(const char *isofile) {
fclose(cdHandle);
cdHandle = ti[1].handle;
ti[1].handle = NULL;
+ set_static_stdio_buffer(cdHandle);
}
return 0;
}
static int handlechd(const char *isofile) {
int frame_offset = 150;
int file_offset = 0;
+ int is_chd_ext = 0;
+ chd_error err;
+ if (strlen(isofile) >= 3) {
+ const char *ext = isofile + strlen(isofile) - 3;
+ is_chd_ext = !strcasecmp(ext, "chd");
+ }
chd_img = calloc(1, sizeof(*chd_img));
if (chd_img == NULL)
goto fail_io;
- if(chd_open(isofile, CHD_OPEN_READ, NULL, &chd_img->chd) != CHDERR_NONE)
+ err = chd_open_file(cdHandle, CHD_OPEN_READ, NULL, &chd_img->chd);
+ if (err != CHDERR_NONE) {
+ if (is_chd_ext)
+ SysPrintf("chd_open: %d\n", err);
goto fail_io;
+ }
if (Config.CHD_Precache && (chd_precache(chd_img->chd) != CHDERR_NONE))
goto fail_io;
}
subHandle = fopen(subname, "rb");
- if (subHandle == NULL) {
+ if (subHandle == NULL)
return -1;
- }
return 0;
}
strcpy(sbiname + strlen(sbiname) - 4, disknum);
}
else
- strcpy(sbiname + strlen(sbiname) - 4, ".sbi");
+ strcpy(sbiname + strlen(sbiname) - 4, ".sbi");
}
else {
return -1;
int ret;
if (!f)
return -1;
+ if (!dest)
+ dest = cdbuffer;
if (fseeko(f, base + sector * CD_FRAMESIZE_RAW, SEEK_SET))
goto fail_io;
ret = fread(dest, 1, CD_FRAMESIZE_RAW, f);
if (!f)
return -1;
+ if (!dest)
+ dest = cdbuffer;
if (fseeko(f, base + sector * (CD_FRAMESIZE_RAW + SUB_FRAMESIZE), SEEK_SET))
goto fail_io;
ret = fread(dest, 1, CD_FRAMESIZE_RAW, f);
return -1;
}
-static int cdread_sub_sub_mixed(FILE *f, int sector)
+static int cdread_sub_sub_mixed(FILE *f, int sector, void *buffer)
{
if (!f)
return -1;
if (fseeko(f, sector * (CD_FRAMESIZE_RAW + SUB_FRAMESIZE) + CD_FRAMESIZE_RAW, SEEK_SET))
goto fail_io;
- if (fread(subbuffer, 1, SUB_FRAMESIZE, f) != SUB_FRAMESIZE)
+ if (fread(buffer, 1, SUB_FRAMESIZE, f) != SUB_FRAMESIZE)
goto fail_io;
- return SUB_FRAMESIZE;
+ return 0;
fail_io:
SysPrintf("subchannel: file IO error %d, sector %u\n", errno, sector);
if (fread(is_compressed ? compr_img->buff_compressed : compr_img->buff_raw[0],
1, size, cdHandle) != size) {
- SysPrintf("read error for block %d at %x: ", block, start_byte);
+ SysPrintf("read error for block %d at %lx: ", block, (long)start_byte);
perror(NULL);
return -1;
}
compr_img->current_block = block;
finish:
- if (dest != cdbuffer) // copy avoid HACK
+ if (dest != NULL)
memcpy(dest, compr_img->buff_raw[compr_img->sector_in_blk],
CD_FRAMESIZE_RAW);
return CD_FRAMESIZE_RAW;
chd_img->current_hunk[chd_img->current_buffer] = hunk;
}
- if (dest != cdbuffer) // copy avoid HACK
+ if (dest != NULL)
memcpy(dest, chd_get_sector(chd_img->current_buffer, chd_img->sector_in_hunk),
CD_FRAMESIZE_RAW);
return CD_FRAMESIZE_RAW;
}
-static int cdread_sub_chd(FILE *f, int sector)
+static int cdread_sub_chd(FILE *f, int sector, void *buffer_ptr)
{
unsigned int sector_in_hunk;
unsigned int buffer;
chd_img->current_hunk[buffer] = hunk;
}
- memcpy(subbuffer, chd_get_sector(buffer, sector_in_hunk) + CD_FRAMESIZE_RAW, SUB_FRAMESIZE);
- return SUB_FRAMESIZE;
+ memcpy(buffer_ptr, chd_get_sector(buffer, sector_in_hunk) + CD_FRAMESIZE_RAW, SUB_FRAMESIZE);
+ return 0;
}
#endif
static int cdread_2048(FILE *f, unsigned int base, void *dest, int sector)
{
+ unsigned char *dst = dest ? dest : cdbuffer;
int ret;
if (!f)
return -1;
+
fseeko(f, base + sector * 2048, SEEK_SET);
- ret = fread((char *)dest + 12 * 2, 1, 2048, f);
+ ret = fread(dst + 12 * 2, 1, 2048, f);
// not really necessary, fake mode 2 header
- memset(cdbuffer, 0, 12 * 2);
- sec2msf(sector + 2 * 75, (char *)&cdbuffer[12]);
- cdbuffer[12 + 3] = 1;
+ memset(dst, 0, 12 * 2);
+ sec2msf(sector + 2 * 75, dst + 12);
+ dst[12 + 0] = itob(dst[12 + 0]);
+ dst[12 + 1] = itob(dst[12 + 1]);
+ dst[12 + 2] = itob(dst[12 + 2]);
+ dst[12 + 3] = 1;
return 12*2 + ret;
}
-static unsigned char * CALLBACK ISOgetBuffer_compr(void) {
- return compr_img->buff_raw[compr_img->sector_in_blk] + 12;
+static void * ISOgetBuffer_normal(void) {
+ return cdbuffer + 12;
+}
+
+static void * ISOgetBuffer_compr(void) {
+ return compr_img->buff_raw[compr_img->sector_in_blk] + 12;
}
#ifdef HAVE_CHD
-static unsigned char * CALLBACK ISOgetBuffer_chd(void) {
- return chd_get_sector(chd_img->current_buffer, chd_img->sector_in_hunk) + 12;
+static void * ISOgetBuffer_chd(void) {
+ return chd_get_sector(chd_img->current_buffer, chd_img->sector_in_hunk) + 12;
}
#endif
-unsigned char * CALLBACK ISOgetBuffer(void) {
- return cdbuffer + 12;
-}
+void * (*ISOgetBuffer)(void) = ISOgetBuffer_normal;
static void PrintTracks(void) {
int i;
// This function is invoked by the front-end when opening an ISO
// file for playback
-static long CALLBACK ISOopen(void) {
+int ISOopen(const char *fname)
+{
boolean isMode1ISO = FALSE;
char alt_bin_filename[MAXPATHLEN];
const char *bin_filename;
return 0; // it's already open
}
- cdHandle = fopen(GetIsoFile(), "rb");
+ cdHandle = fopen(fname, "rb");
if (cdHandle == NULL) {
SysPrintf(_("Could't open '%s' for reading: %s\n"),
- GetIsoFile(), strerror(errno));
+ fname, strerror(errno));
return -1;
}
+ set_static_stdio_buffer(cdHandle);
size_main = get_size(cdHandle);
snprintf(image_str, sizeof(image_str) - 6*4 - 1,
- "Loaded CD Image: %s", GetIsoFile());
+ "Loaded CD Image: %s", fname);
cddaBigEndian = FALSE;
subChanMixed = FALSE;
cdrIsoMultidiskCount = 1;
multifile = 0;
- CDR_getBuffer = ISOgetBuffer;
+ ISOgetBuffer = ISOgetBuffer_normal;
cdimg_read_func = cdread_normal;
cdimg_read_sub_func = NULL;
- if (parsetoc(GetIsoFile()) == 0) {
+ if (parsetoc(fname) == 0) {
strcat(image_str, "[+toc]");
}
- else if (parseccd(GetIsoFile()) == 0) {
+ else if (parseccd(fname) == 0) {
strcat(image_str, "[+ccd]");
}
- else if (parsemds(GetIsoFile()) == 0) {
+ else if (parsemds(fname) == 0) {
strcat(image_str, "[+mds]");
}
- else if (parsecue(GetIsoFile()) == 0) {
+ else if (parsecue(fname) == 0) {
strcat(image_str, "[+cue]");
}
- if (handlepbp(GetIsoFile()) == 0) {
+ if (handlepbp(fname) == 0) {
strcat(image_str, "[+pbp]");
- CDR_getBuffer = ISOgetBuffer_compr;
+ ISOgetBuffer = ISOgetBuffer_compr;
cdimg_read_func = cdread_compressed;
}
- else if (handlecbin(GetIsoFile()) == 0) {
+ else if (handlecbin(fname) == 0) {
strcat(image_str, "[+cbin]");
- CDR_getBuffer = ISOgetBuffer_compr;
+ ISOgetBuffer = ISOgetBuffer_compr;
cdimg_read_func = cdread_compressed;
}
#ifdef HAVE_CHD
- else if (handlechd(GetIsoFile()) == 0) {
+ else if (handlechd(fname) == 0) {
strcat(image_str, "[+chd]");
- CDR_getBuffer = ISOgetBuffer_chd;
+ ISOgetBuffer = ISOgetBuffer_chd;
cdimg_read_func = cdread_chd;
cdimg_read_sub_func = cdread_sub_chd;
- fclose(cdHandle);
- cdHandle = NULL;
}
#endif
- if (!subChanMixed && opensubfile(GetIsoFile()) == 0) {
+ if (!subChanMixed && opensubfile(fname) == 0) {
strcat(image_str, "[+sub]");
}
- if (opensbifile(GetIsoFile()) == 0) {
+ if (opensbifile(fname) == 0) {
strcat(image_str, "[+sbi]");
}
// maybe user selected metadata file instead of main .bin ..
- bin_filename = GetIsoFile();
+ bin_filename = fname;
if (cdHandle && size_main < 2352 * 0x10) {
static const char *exts[] = { ".bin", ".BIN", ".img", ".IMG" };
FILE *tmpf = NULL;
bin_filename = alt_bin_filename;
fclose(cdHandle);
cdHandle = tmpf;
+ set_static_stdio_buffer(cdHandle);
size_main = get_size(cdHandle);
}
}
}
}
- SysPrintf("%s.\n", image_str);
+ SysPrintf("%s (%lld bytes).\n", image_str, (long long)size_main);
PrintTracks();
return 0;
}
-static long CALLBACK ISOclose(void) {
+int ISOclose(void)
+{
int i;
if (cdHandle != NULL) {
fclose(subHandle);
subHandle = NULL;
}
- cddaHandle = NULL;
if (compr_img != NULL) {
free(compr_img->index_table);
free(compr_img);
compr_img = NULL;
}
-
+
#ifdef HAVE_CHD
if (chd_img != NULL) {
chd_close(chd_img->chd);
UnloadSBI();
memset(cdbuffer, 0, sizeof(cdbuffer));
- CDR_getBuffer = ISOgetBuffer;
+ ISOgetBuffer = ISOgetBuffer_normal;
return 0;
}
-static long CALLBACK ISOinit(void) {
+int ISOinit(void)
+{
assert(cdHandle == NULL);
assert(subHandle == NULL);
+ numtracks = 0;
return 0; // do nothing
}
-static long CALLBACK ISOshutdown(void) {
- ISOclose();
- return 0;
+int ISOshutdown(void)
+{
+ return ISOclose();
}
// return Starting and Ending Track
// buffer:
// byte 0 - start track
// byte 1 - end track
-static long CALLBACK ISOgetTN(unsigned char *buffer) {
+int ISOgetTN(unsigned char *buffer)
+{
buffer[0] = 1;
if (numtracks > 0) {
// return Track Time
// buffer:
-// byte 0 - frame
+// byte 0 - minute
// byte 1 - second
-// byte 2 - minute
-static long CALLBACK ISOgetTD(unsigned char track, unsigned char *buffer) {
+// byte 2 - frame
+int ISOgetTD(int track, unsigned char *buffer)
+{
if (track == 0) {
unsigned int sect;
- unsigned char time[3];
sect = msf2sec(ti[numtracks].start) + msf2sec(ti[numtracks].length);
- sec2msf(sect, (char *)time);
- buffer[2] = time[0];
- buffer[1] = time[1];
- buffer[0] = time[2];
+ sec2msf(sect, buffer);
}
else if (numtracks > 0 && track <= numtracks) {
- buffer[2] = ti[track].start[0];
- buffer[1] = ti[track].start[1];
- buffer[0] = ti[track].start[2];
+ memcpy(buffer, ti[track].start, 3);
}
else {
buffer[2] = 0;
}
// decode 'raw' subchannel data ripped by cdrdao
-static void DecodeRawSubData(void) {
+static void DecodeRawSubData(unsigned char *subbuffer) {
unsigned char subQData[12];
int i;
}
// read track
-// time: byte 0 - minute; byte 1 - second; byte 2 - frame
-// uses bcd format
-static boolean CALLBACK ISOreadTrack(unsigned char *time) {
- int sector = MSF2SECT(btoi(time[0]), btoi(time[1]), btoi(time[2]));
+// time: byte 0 - minute; byte 1 - second; byte 2 - frame (non-bcd)
+// buf: if NULL, data is kept in internal buffer accessible by ISOgetBuffer()
+int ISOreadTrack(const unsigned char *time, void *buf)
+{
+ int sector = msf2sec(time);
long ret;
if (!cdHandle && !chd_img)
- return 0;
+ return -1;
+
+ if (numtracks > 1 && sector >= msf2sec(ti[2].start))
+ return ISOreadCDDA(time, buf);
+ sector -= 2 * 75;
if (pregapOffset && sector >= pregapOffset)
sector -= 2 * 75;
- ret = cdimg_read_func(cdHandle, 0, cdbuffer, sector);
- if (ret < 12*2 + 2048)
- return 0;
-
- return 1;
-}
+ ret = cdimg_read_func(cdHandle, 0, buf, sector);
+ if (ret < 12*2 + 2048) {
+ if (buf && multifile && sector >= msf2sec(ti[1].length)) {
+ // assume a gap not backed by a file
+ memset(buf, 0, CD_FRAMESIZE_RAW);
+ return 0;
+ }
+ return -1;
+ }
-// plays cdda audio
-// sector: byte 0 - minute; byte 1 - second; byte 2 - frame
-// does NOT uses bcd format
-static long CALLBACK ISOplay(unsigned char *time) {
return 0;
}
-// stops cdda audio
-static long CALLBACK ISOstop(void) {
- return 0;
-}
+// read subchannel data
+int ISOreadSub(const unsigned char *time, void *buffer)
+{
+ int ret, sector = MSF2SECT(time[0], time[1], time[2]);
-// gets subchannel data
-static unsigned char* CALLBACK ISOgetBufferSub(int sector) {
if (pregapOffset && sector >= pregapOffset) {
sector -= 2 * 75;
if (sector < pregapOffset) // ?
- return NULL;
+ return -1;
}
if (cdimg_read_sub_func != NULL) {
- if (cdimg_read_sub_func(cdHandle, sector) != SUB_FRAMESIZE)
- return NULL;
+ if ((ret = cdimg_read_sub_func(cdHandle, sector, buffer)))
+ return ret;
}
else if (subHandle != NULL) {
if (fseeko(subHandle, sector * SUB_FRAMESIZE, SEEK_SET))
- return NULL;
- if (fread(subbuffer, 1, SUB_FRAMESIZE, subHandle) != SUB_FRAMESIZE)
- return NULL;
+ return -1;
+ if (fread(buffer, 1, SUB_FRAMESIZE, subHandle) != SUB_FRAMESIZE)
+ return -1;
}
else {
- return NULL;
+ return -1;
}
- if (subChanRaw) DecodeRawSubData();
- return subbuffer;
+ if (subChanRaw)
+ DecodeRawSubData(buffer);
+ return 0;
}
-static long CALLBACK ISOgetStatus(struct CdrStat *stat) {
+int ISOgetStatus(struct CdrStat *stat)
+{
CDR__getStatus(stat);
// BIOS - boot ID (CD type)
}
// read CDDA sector into buffer
-long CALLBACK ISOreadCDDA(unsigned char m, unsigned char s, unsigned char f, unsigned char *buffer) {
- unsigned char msf[3] = {m, s, f};
+int ISOreadCDDA(const unsigned char *time, void *buffer)
+{
unsigned int track, track_start = 0;
FILE *handle = cdHandle;
unsigned int cddaCurPos;
- int ret;
+ int ret, ret_clear = -1;
- cddaCurPos = msf2sec((char *)msf);
+ cddaCurPos = msf2sec(time);
// find current track index
for (track = numtracks; ; track--) {
// data tracks play silent
if (ti[track].type != CDDA) {
- memset(buffer, 0, CD_FRAMESIZE_RAW);
- return 0;
+ ret_clear = 0;
+ goto clear_return;
}
if (multifile) {
}
}
}
- if (!handle && !chd_img) {
- memset(buffer, 0, CD_FRAMESIZE_RAW);
- return -1;
- }
+ if (!handle && !chd_img)
+ goto clear_return;
ret = cdimg_read_func(handle, ti[track].start_offset,
buffer, cddaCurPos - track_start);
if (ret != CD_FRAMESIZE_RAW) {
- memset(buffer, 0, CD_FRAMESIZE_RAW);
- return -1;
+ if (multifile && cddaCurPos - track_start >= msf2sec(ti[track].length))
+ ret_clear = 0; // gap
+ goto clear_return;
}
- if (cddaBigEndian) {
+ if (cddaBigEndian && buffer) {
+ unsigned char tmp, *buf = buffer;
int i;
- unsigned char tmp;
for (i = 0; i < CD_FRAMESIZE_RAW / 2; i++) {
- tmp = buffer[i * 2];
- buffer[i * 2] = buffer[i * 2 + 1];
- buffer[i * 2 + 1] = tmp;
+ tmp = buf[i * 2];
+ buf[i * 2] = buf[i * 2 + 1];
+ buf[i * 2 + 1] = tmp;
}
}
return 0;
-}
-
-void cdrIsoInit(void) {
- CDR_init = ISOinit;
- CDR_shutdown = ISOshutdown;
- CDR_open = ISOopen;
- CDR_close = ISOclose;
- CDR_getTN = ISOgetTN;
- CDR_getTD = ISOgetTD;
- CDR_readTrack = ISOreadTrack;
- CDR_getBuffer = ISOgetBuffer;
- CDR_play = ISOplay;
- CDR_stop = ISOstop;
- CDR_getBufferSub = ISOgetBufferSub;
- CDR_getStatus = ISOgetStatus;
- CDR_readCDDA = ISOreadCDDA;
-
- CDR_getDriveLetter = CDR__getDriveLetter;
- CDR_configure = CDR__configure;
- CDR_test = CDR__test;
- CDR_about = CDR__about;
- CDR_setfilename = CDR__setfilename;
- CDR_prefetch = CDR__prefetch;
- numtracks = 0;
-}
-
-int cdrIsoActive(void) {
- return (cdHandle || chd_img);
+clear_return:
+ if (buffer)
+ memset(buffer, 0, CD_FRAMESIZE_RAW);
+ return ret_clear;
}
extern "C" {
#endif
-void cdrIsoInit(void);
-int cdrIsoActive(void);
-unsigned char * CALLBACK ISOgetBuffer(void);
+struct CdrStat;
+
+int ISOinit(void);
+int ISOshutdown(void);
+int ISOopen(const char *fname);
+int ISOclose(void);
+int ISOgetTN(unsigned char *buffer);
+int ISOgetTD(int track, unsigned char *buffer);
+int ISOreadTrack(const unsigned char *time, void *buf);
+int ISOreadCDDA(const unsigned char *time, void *buffer);
+int ISOreadSub(const unsigned char *time, void *buffer);
+int ISOgetStatus(struct CdrStat *stat);
+
+extern void * (*ISOgetBuffer)(void);
extern unsigned int cdrIsoMultidiskCount;
extern unsigned int cdrIsoMultidiskSelect;
--- /dev/null
+/***************************************************************************
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU General Public License as published by *
+ * the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU General Public License for more details. *
+ ***************************************************************************/
+
+#include <stdalign.h>
+#include <stdlib.h>
+#include <string.h>
+#include "system.h"
+#include "plugins.h"
+#include "cdriso.h"
+#include "cdrom.h"
+#include "cdrom-async.h"
+
+#if 0
+#define acdrom_dbg printf
+#else
+#define acdrom_dbg(...)
+#endif
+
+#ifdef USE_ASYNC_CDROM
+
+static void *g_cd_handle;
+
+#ifndef HAVE_CDROM
+
+static void *rcdrom_open(const char *name, u32 *total_lba, u32 *have_sub) { return NULL; }
+static void rcdrom_close(void *stream) {}
+static int rcdrom_getTN(void *stream, u8 *tn) { return -1; }
+static int rcdrom_getTD(void *stream, u32 total_lba, u8 track, u8 *rt) { return -1; }
+static int rcdrom_getStatus(void *stream, struct CdrStat *stat) { return -1; }
+static int rcdrom_readSector(void *stream, unsigned int lba, void *b) { return -1; }
+static int rcdrom_readSub(void *stream, unsigned int lba, void *b) { return -1; }
+static int rcdrom_isMediaInserted(void *stream) { return 0; }
+
+#endif
+
+#ifdef USE_C11_THREADS
+#include <threads.h>
+
+static int c11_threads_cb_wrapper(void *cb)
+{
+ ((void (*)(void *))cb)(NULL);
+
+ return 0;
+}
+
+#define slock_new() ({ \
+ mtx_t *lock = malloc(sizeof(*lock)); \
+ if (lock) mtx_init(lock, mtx_plain); \
+ lock; \
+})
+
+#define scond_new() ({ \
+ cnd_t *cnd = malloc(sizeof(*cnd)); \
+ if (cnd) cnd_init(cnd); \
+ cnd; \
+})
+
+#define pcsxr_sthread_create(cb, unused) ({ \
+ thrd_t *thd = malloc(sizeof(*thd)); \
+ if (thd) \
+ thrd_create(thd, c11_threads_cb_wrapper, cb); \
+ thd; \
+})
+
+#define sthread_join(thrd) ({ \
+ thrd_join(*thrd, NULL); \
+ free(thrd); \
+})
+
+#define slock_free(lock) free(lock)
+#define slock_lock(lock) mtx_lock(lock)
+#define slock_unlock(lock) mtx_unlock(lock)
+#define scond_free(cond) free(cond)
+#define scond_wait(cond, lock) cnd_wait(cond, lock)
+#define scond_signal(cond) cnd_signal(cond)
+#define slock_t mtx_t
+#define scond_t cnd_t
+#define sthread_t thrd_t
+#else
+#include "../frontend/libretro-rthreads.h"
+#endif
+
+#include "retro_timers.h"
+
+struct cached_buf {
+ u32 lba;
+ u8 buf[CD_FRAMESIZE_RAW];
+ u8 buf_sub[SUB_FRAMESIZE];
+};
+static struct {
+ sthread_t *thread;
+ slock_t *read_lock;
+ slock_t *buf_lock;
+ scond_t *cond;
+ struct cached_buf *buf_cache;
+ u32 buf_cnt, thread_exit, do_prefetch, prefetch_failed, have_subchannel;
+ u32 total_lba, prefetch_lba;
+ int check_eject_delay;
+
+ // single sector cache, not touched by the thread
+ alignas(64) u8 buf_local[CD_FRAMESIZE_RAW_ALIGNED];
+} acdrom;
+
+static void lbacache_do(u32 lba)
+{
+ alignas(64) unsigned char buf[CD_FRAMESIZE_RAW_ALIGNED];
+ unsigned char msf[3], buf_sub[SUB_FRAMESIZE];
+ u32 i = lba % acdrom.buf_cnt;
+ int ret;
+
+ lba2msf(lba + 150, &msf[0], &msf[1], &msf[2]);
+ slock_lock(acdrom.read_lock);
+ if (g_cd_handle)
+ ret = rcdrom_readSector(g_cd_handle, lba, buf);
+ else
+ ret = ISOreadTrack(msf, buf);
+ if (acdrom.have_subchannel) {
+ if (g_cd_handle)
+ ret |= rcdrom_readSub(g_cd_handle, lba, buf_sub);
+ else
+ ret |= ISOreadSub(msf, buf_sub);
+ }
+
+ slock_lock(acdrom.buf_lock);
+ slock_unlock(acdrom.read_lock);
+ acdrom_dbg("c %d:%02d:%02d %2d m%d f%d\n", msf[0], msf[1], msf[2], ret,
+ buf[12+3], ((buf[12+4+2] >> 5) & 1) + 1);
+ if (ret) {
+ acdrom.do_prefetch = 0;
+ acdrom.prefetch_failed = 1;
+ slock_unlock(acdrom.buf_lock);
+ SysPrintf("prefetch: read failed for lba %d: %d\n", lba, ret);
+ return;
+ }
+ acdrom.prefetch_failed = 0;
+ acdrom.check_eject_delay = 100;
+
+ if (lba != acdrom.buf_cache[i].lba) {
+ acdrom.buf_cache[i].lba = lba;
+ memcpy(acdrom.buf_cache[i].buf, buf, sizeof(acdrom.buf_cache[i].buf));
+ if (acdrom.have_subchannel)
+ memcpy(acdrom.buf_cache[i].buf_sub, buf_sub, sizeof(buf_sub));
+ }
+ slock_unlock(acdrom.buf_lock);
+ if (g_cd_handle)
+ retro_sleep(0); // why does the main thread stall without this?
+}
+
+static int lbacache_get(unsigned int lba, void *buf, void *sub_buf)
+{
+ unsigned int i;
+ int ret = 0;
+
+ i = lba % acdrom.buf_cnt;
+ slock_lock(acdrom.buf_lock);
+ if (lba == acdrom.buf_cache[i].lba) {
+ if (!buf)
+ buf = acdrom.buf_local;
+ memcpy(buf, acdrom.buf_cache[i].buf, CD_FRAMESIZE_RAW);
+ if (sub_buf)
+ memcpy(sub_buf, acdrom.buf_cache[i].buf_sub, SUB_FRAMESIZE);
+ ret = 1;
+ }
+ slock_unlock(acdrom.buf_lock);
+ return ret;
+}
+
+// note: This has races on some vars but that's ok, main thread can deal
+// with it. Only unsafe buffer accesses and simultaneous reads are prevented.
+static void cdra_prefetch_thread(void *unused)
+{
+ u32 buf_cnt, lba, lba_to;
+
+ slock_lock(acdrom.buf_lock);
+ while (!acdrom.thread_exit)
+ {
+#ifdef __GNUC__
+ __asm__ __volatile__("":::"memory"); // barrier
+#endif
+ if (!acdrom.do_prefetch)
+ scond_wait(acdrom.cond, acdrom.buf_lock);
+ if (!acdrom.do_prefetch || acdrom.thread_exit)
+ continue;
+
+ buf_cnt = acdrom.buf_cnt;
+ lba = acdrom.prefetch_lba;
+ lba_to = lba + buf_cnt;
+ if (lba_to > acdrom.total_lba)
+ lba_to = acdrom.total_lba;
+ for (; lba < lba_to; lba++) {
+ if (lba != acdrom.buf_cache[lba % buf_cnt].lba)
+ break;
+ }
+ if (lba == lba_to || lba >= acdrom.total_lba) {
+ // caching complete
+ acdrom.do_prefetch = 0;
+ continue;
+ }
+
+ slock_unlock(acdrom.buf_lock);
+ lbacache_do(lba);
+ slock_lock(acdrom.buf_lock);
+ }
+ slock_unlock(acdrom.buf_lock);
+}
+
+void cdra_stop_thread(void)
+{
+ acdrom.thread_exit = 1;
+ if (acdrom.buf_lock) {
+ slock_lock(acdrom.buf_lock);
+ acdrom.do_prefetch = 0;
+ if (acdrom.cond)
+ scond_signal(acdrom.cond);
+ slock_unlock(acdrom.buf_lock);
+ }
+ if (acdrom.thread) {
+ sthread_join(acdrom.thread);
+ acdrom.thread = NULL;
+ }
+ if (acdrom.cond) { scond_free(acdrom.cond); acdrom.cond = NULL; }
+ if (acdrom.buf_lock) { slock_free(acdrom.buf_lock); acdrom.buf_lock = NULL; }
+ if (acdrom.read_lock) { slock_free(acdrom.read_lock); acdrom.read_lock = NULL; }
+ free(acdrom.buf_cache);
+ acdrom.buf_cache = NULL;
+}
+
+// the thread is optional, if anything fails we can do direct reads
+static void cdra_start_thread(void)
+{
+ cdra_stop_thread();
+ acdrom.thread_exit = acdrom.prefetch_lba = acdrom.do_prefetch = 0;
+ acdrom.prefetch_failed = 0;
+ if (acdrom.buf_cnt == 0)
+ return;
+ acdrom.buf_cache = calloc(acdrom.buf_cnt, sizeof(acdrom.buf_cache[0]));
+ acdrom.buf_lock = slock_new();
+ acdrom.read_lock = slock_new();
+ acdrom.cond = scond_new();
+ if (acdrom.buf_cache && acdrom.buf_lock && acdrom.read_lock && acdrom.cond)
+ {
+ int i;
+ acdrom.thread = pcsxr_sthread_create(cdra_prefetch_thread, PCSXRT_CDR);
+ for (i = 0; i < acdrom.buf_cnt; i++)
+ acdrom.buf_cache[i].lba = ~0;
+ }
+ if (acdrom.thread) {
+ SysPrintf("cdrom precache: %d buffers%s\n",
+ acdrom.buf_cnt, acdrom.have_subchannel ? " +sub" : "");
+ }
+ else {
+ SysPrintf("cdrom precache thread init failed.\n");
+ cdra_stop_thread();
+ }
+}
+
+int cdra_init(void)
+{
+ return ISOinit();
+}
+
+void cdra_shutdown(void)
+{
+ cdra_close();
+}
+
+int cdra_open(void)
+{
+ const char *name = GetIsoFile();
+ u8 buf_sub[SUB_FRAMESIZE];
+ int ret = -1, ret2;
+
+ acdrom_dbg("%s %s\n", __func__, name);
+ acdrom.have_subchannel = 0;
+ if (!name[0] || !strncmp(name, "cdrom:", 6)) {
+ g_cd_handle = rcdrom_open(name, &acdrom.total_lba, &acdrom.have_subchannel);
+ if (!!g_cd_handle)
+ ret = 0;
+ }
+
+ // try ISO even if it's cdrom:// as it might work through libretro vfs
+ if (name[0] && ret < 0) {
+ ret = ISOopen(name);
+ if (ret == 0) {
+ u8 msf[3];
+ ISOgetTD(0, msf);
+ acdrom.total_lba = MSF2SECT(msf[0], msf[1], msf[2]);
+ msf[0] = 0; msf[1] = 2; msf[2] = 16;
+ ret2 = ISOreadSub(msf, buf_sub);
+ acdrom.have_subchannel = (ret2 == 0);
+ }
+ }
+ if (ret == 0)
+ cdra_start_thread();
+ return ret;
+}
+
+void cdra_close(void)
+{
+ acdrom_dbg("%s\n", __func__);
+ cdra_stop_thread();
+ if (g_cd_handle) {
+ rcdrom_close(g_cd_handle);
+ g_cd_handle = NULL;
+ }
+ else
+ ISOclose();
+}
+
+int cdra_getTN(unsigned char *tn)
+{
+ int ret;
+ if (g_cd_handle)
+ ret = rcdrom_getTN(g_cd_handle, tn);
+ else
+ ret = ISOgetTN(tn);
+ acdrom_dbg("%s -> %d %d\n", __func__, tn[0], tn[1]);
+ return ret;
+}
+
+int cdra_getTD(int track, unsigned char *rt)
+{
+ int ret;
+ if (g_cd_handle)
+ ret = rcdrom_getTD(g_cd_handle, acdrom.total_lba, track, rt);
+ else
+ ret = ISOgetTD(track, rt);
+ //acdrom_dbg("%s %d -> %d:%02d:%02d\n", __func__, track, rt[2], rt[1], rt[0]);
+ return ret;
+}
+
+int cdra_prefetch(unsigned char m, unsigned char s, unsigned char f)
+{
+ u32 lba = MSF2SECT(m, s, f);
+ int ret = 1;
+ if (acdrom.cond) {
+ acdrom.prefetch_lba = lba;
+ acdrom.do_prefetch = 1;
+ scond_signal(acdrom.cond);
+ }
+ if (acdrom.buf_cache && !acdrom.prefetch_failed) {
+ u32 c = acdrom.buf_cnt;
+ if (c)
+ ret = acdrom.buf_cache[lba % c].lba == lba;
+ acdrom_dbg("p %d:%02d:%02d %d\n", m, s, f, ret);
+ }
+ return ret;
+}
+
+static int cdra_do_read(const unsigned char *time, int cdda,
+ void *buf, void *buf_sub)
+{
+ u32 lba = MSF2SECT(time[0], time[1], time[2]);
+ int hit = 0, ret = -1, read_locked = 0;
+ do
+ {
+ if (acdrom.buf_lock) {
+ hit = lbacache_get(lba, buf, buf_sub);
+ if (hit)
+ break;
+ }
+ if (acdrom.read_lock) {
+ // maybe still prefetching
+ slock_lock(acdrom.read_lock);
+ read_locked = 1;
+ hit = lbacache_get(lba, buf, buf_sub);
+ if (hit) {
+ hit = 2;
+ break;
+ }
+ }
+ acdrom.do_prefetch = 0;
+ if (!buf)
+ buf = acdrom.buf_local;
+ if (g_cd_handle) {
+ if (buf_sub)
+ ret = rcdrom_readSub(g_cd_handle, lba, buf_sub);
+ else
+ ret = rcdrom_readSector(g_cd_handle, lba, buf);
+ }
+ else if (buf_sub)
+ ret = ISOreadSub(time, buf_sub);
+ else if (cdda)
+ ret = ISOreadCDDA(time, buf);
+ else
+ ret = ISOreadTrack(time, buf);
+ if (ret)
+ SysPrintf("cdrom read failed for lba %d: %d\n", lba, ret);
+ }
+ while (0);
+ if (read_locked)
+ slock_unlock(acdrom.read_lock);
+ if (hit)
+ ret = 0;
+ acdrom.check_eject_delay = ret ? 0 : 100;
+ acdrom_dbg("f%c %d:%02d:%02d %d%s\n",
+ buf_sub ? 's' : (cdda ? 'c' : 'd'),
+ time[0], time[1], time[2], hit, ret ? " ERR" : "");
+ return ret;
+}
+
+// time: msf in non-bcd format
+int cdra_readTrack(const unsigned char *time)
+{
+ if (!acdrom.thread && !g_cd_handle) {
+ // just forward to ISOreadTrack to avoid extra copying
+ return ISOreadTrack(time, NULL);
+ }
+ return cdra_do_read(time, 0, NULL, NULL);
+}
+
+int cdra_readCDDA(const unsigned char *time, void *buffer)
+{
+ return cdra_do_read(time, 1, buffer, NULL);
+}
+
+int cdra_readSub(const unsigned char *time, void *buffer)
+{
+ if (!acdrom.thread && !g_cd_handle)
+ return ISOreadSub(time, buffer);
+ if (!acdrom.have_subchannel)
+ return -1;
+ acdrom_dbg("s %d:%02d:%02d\n", time[0], time[1], time[2]);
+ return cdra_do_read(time, 0, NULL, buffer);
+}
+
+// pointer to cached buffer from last cdra_readTrack() call
+void *cdra_getBuffer(void)
+{
+ //acdrom_dbg("%s\n", __func__);
+ if (!acdrom.thread && !g_cd_handle)
+ return ISOgetBuffer();
+ return acdrom.buf_local + 12;
+}
+
+int cdra_getStatus(struct CdrStat *stat)
+{
+ int ret;
+ CDR__getStatus(stat);
+ if (g_cd_handle)
+ ret = rcdrom_getStatus(g_cd_handle, stat);
+ else
+ ret = ISOgetStatus(stat);
+ return ret;
+}
+
+int cdra_is_physical(void)
+{
+ return !!g_cd_handle;
+}
+
+int cdra_check_eject(int *inserted)
+{
+ if (!g_cd_handle || acdrom.do_prefetch || acdrom.check_eject_delay-- > 0)
+ return 0;
+ acdrom.check_eject_delay = 100;
+ *inserted = rcdrom_isMediaInserted(g_cd_handle); // 1-2ms
+ return 1;
+}
+
+void cdra_set_buf_count(int newcount)
+{
+ if (acdrom.buf_cnt == newcount)
+ return;
+ cdra_stop_thread();
+ acdrom.buf_cnt = newcount;
+ cdra_start_thread();
+}
+
+int cdra_get_buf_count(void)
+{
+ return acdrom.buf_cnt;
+}
+
+int cdra_get_buf_cached_approx(void)
+{
+ u32 buf_cnt = acdrom.buf_cnt, lba = acdrom.prefetch_lba;
+ u32 total = acdrom.total_lba;
+ u32 left = buf_cnt;
+ int buf_use = 0;
+
+ if (left > total)
+ left = total;
+ for (; lba < total && left > 0; lba++, left--)
+ if (lba == acdrom.buf_cache[lba % buf_cnt].lba)
+ buf_use++;
+ for (lba = 0; left > 0; lba++, left--)
+ if (lba == acdrom.buf_cache[lba % buf_cnt].lba)
+ buf_use++;
+
+ return buf_use;
+}
+#else
+
+// phys. CD-ROM without a cache is unusable so not implemented
+#ifdef HAVE_CDROM
+#error "HAVE_CDROM requires USE_ASYNC_CDROM"
+#endif
+
+// just forward to cdriso
+int cdra_init(void)
+{
+ return ISOinit();
+}
+
+void cdra_shutdown(void)
+{
+ ISOshutdown();
+}
+
+int cdra_open(void)
+{
+ return ISOopen(GetIsoFile());
+}
+
+void cdra_close(void)
+{
+ ISOclose();
+}
+
+int cdra_getTN(unsigned char *tn)
+{
+ return ISOgetTN(tn);
+}
+
+int cdra_getTD(int track, unsigned char *rt)
+{
+ return ISOgetTD(track, rt);
+}
+
+int cdra_prefetch(unsigned char m, unsigned char s, unsigned char f)
+{
+ return 1; // always hit
+}
+
+// time: msf in non-bcd format
+int cdra_readTrack(const unsigned char *time)
+{
+ return ISOreadTrack(time, NULL);
+}
+
+int cdra_readCDDA(const unsigned char *time, void *buffer)
+{
+ return ISOreadCDDA(time, buffer);
+}
+
+int cdra_readSub(const unsigned char *time, void *buffer)
+{
+ return ISOreadSub(time, buffer);
+}
+
+// pointer to cached buffer from last cdra_readTrack() call
+void *cdra_getBuffer(void)
+{
+ return ISOgetBuffer();
+}
+
+int cdra_getStatus(struct CdrStat *stat)
+{
+ return ISOgetStatus(stat);
+}
+
+int cdra_is_physical(void) { return 0; }
+int cdra_check_eject(int *inserted) { return 0; }
+void cdra_stop_thread(void) {}
+void cdra_set_buf_count(int newcount) {}
+int cdra_get_buf_count(void) { return 0; }
+int cdra_get_buf_cached_approx(void) { return 0; }
+
+#endif
+
+// vim:sw=3:ts=3:expandtab
--- /dev/null
+#include "psxcommon.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct CdrStat;
+
+#ifdef HAVE_CDROM
+void *rcdrom_open(const char *name, u32 *total_lba, u32 *have_sub);
+void rcdrom_close(void *stream);
+int rcdrom_getTN(void *stream, u8 *tn);
+int rcdrom_getTD(void *stream, u32 total_lba, u8 track, u8 *rt);
+int rcdrom_getStatus(void *stream, struct CdrStat *stat);
+int rcdrom_readSector(void *stream, unsigned int lba, void *b);
+int rcdrom_readSub(void *stream, unsigned int lba, void *b);
+int rcdrom_isMediaInserted(void *stream);
+#endif
+
+int cdra_init(void);
+void cdra_shutdown(void);
+int cdra_open(void);
+void cdra_close(void);
+int cdra_getTN(unsigned char *tn);
+int cdra_getTD(int track, unsigned char *rt);
+int cdra_getStatus(struct CdrStat *stat);
+int cdra_readTrack(const unsigned char *time);
+int cdra_readCDDA(const unsigned char *time, void *buffer);
+int cdra_readSub(const unsigned char *time, void *buffer);
+int cdra_prefetch(unsigned char m, unsigned char s, unsigned char f);
+
+int cdra_is_physical(void);
+int cdra_check_eject(int *inserted);
+void cdra_stop_thread(void);
+void cdra_set_buf_count(int count);
+int cdra_get_buf_count(void);
+int cdra_get_buf_cached_approx(void);
+
+void *cdra_getBuffer(void);
+
+#ifdef __cplusplus
+}
+#endif
* Handles all CD-ROM registers and functions.
*/
+#include <stdalign.h>
#include <assert.h>
#include "cdrom.h"
+#include "cdrom-async.h"
#include "misc.h"
#include "ppf.h"
#include "psxdma.h"
u8 AdpcmActive;
u32 LastReadSeekCycles;
- u8 unused7;
+ u8 RetryDetected;
u8 DriveState; // enum drive_state
u8 FastForward;
u8 AttenuatorLeftToLeftT, AttenuatorLeftToRightT;
u8 AttenuatorRightToRightT, AttenuatorRightToLeftT;
} cdr;
-static s16 read_buf[CD_FRAMESIZE_RAW/2];
+alignas(64) static s16 read_buf[CD_FRAMESIZE_RAW_ALIGNED / 2];
+
+struct SubQ {
+ char res0[12];
+ unsigned char ControlAndADR;
+ unsigned char TrackNumber;
+ unsigned char IndexNumber;
+ unsigned char TrackRelativeAddress[3];
+ unsigned char Filler;
+ unsigned char AbsoluteAddress[3];
+ unsigned char CRC[2];
+ char res1[72];
+};
/* CD-ROM magic numbers */
#define CdlSync 0 /* nocash documentation : "Uh, actually, returns error code 40h = Invalid Command...?" */
DRIVESTATE_SEEK,
};
-static struct CdrStat stat;
+static struct CdrStat cdr_stat;
static unsigned int msf2sec(const u8 *msf) {
return ((msf[0] * 60 + msf[1]) * 75) + msf[2];
}
-// for that weird psemu API..
-static unsigned int fsm2sec(const u8 *msf) {
- return ((msf[2] * 60 + msf[1]) * 75) + msf[0];
-}
-
-static void sec2msf(unsigned int s, u8 *msf) {
- msf[0] = s / 75 / 60;
- s = s - msf[0] * 75 * 60;
- msf[1] = s / 75;
- s = s - msf[1] * 75;
- msf[2] = s;
-}
-
// cdrPlayReadInterrupt
#define CDRPLAYREAD_INT(eCycle, isFirst) { \
u32 e_ = eCycle; \
}
#define StopCdda() { \
- if (cdr.Play && !Config.Cdda) CDR_stop(); \
cdr.Play = FALSE; \
cdr.FastForward = 0; \
cdr.FastBackward = 0; \
//StopReading();
SetPlaySeekRead(cdr.StatP, 0);
- if (CDR_getStatus(&stat) == -1)
+ if (cdra_getStatus(&cdr_stat) == -1)
return;
- if (stat.Status & STATUS_SHELLOPEN)
+ if (cdr_stat.Status & STATUS_SHELLOPEN)
{
memset(cdr.Prev, 0xff, sizeof(cdr.Prev));
cdr.DriveState = DRIVESTATE_LID_OPEN;
break;
case DRIVESTATE_LID_OPEN:
- if (CDR_getStatus(&stat) == -1)
- stat.Status &= ~STATUS_SHELLOPEN;
+ if (cdra_getStatus(&cdr_stat) != 0)
+ cdr_stat.Status &= ~STATUS_SHELLOPEN;
// 02, 12, 10
if (!(cdr.StatP & STATUS_SHELLOPEN)) {
+ int was_reading = cdr.Reading;
StopReading();
SetPlaySeekRead(cdr.StatP, 0);
cdr.StatP |= STATUS_SHELLOPEN;
+ memset(cdr.Prev, 0xff, sizeof(cdr.Prev));
// IIRC this sometimes doesn't happen on real hw
// (when lots of commands are sent?)
SetResultSize(2);
cdr.Result[0] = cdr.StatP | STATUS_SEEKERROR;
cdr.Result[1] = ERROR_SHELLOPEN;
- if (cdr.CmdInProgress) {
+ if (cdr.CmdInProgress || was_reading) {
psxRegs.interrupt &= ~(1 << PSXINT_CDR);
cdr.CmdInProgress = 0;
cdr.Result[0] = cdr.StatP | STATUS_ERROR;
else if (cdr.StatP & STATUS_ROTATING) {
cdr.StatP &= ~STATUS_ROTATING;
}
- else if (!(stat.Status & STATUS_SHELLOPEN)) {
+ else if (!(cdr_stat.Status & STATUS_SHELLOPEN)) {
// closed now
CheckCdrom();
current = msf2sec(time);
for (cdr.CurTrack = 1; cdr.CurTrack < cdr.ResultTN[1]; cdr.CurTrack++) {
- CDR_getTD(cdr.CurTrack + 1, cdr.ResultTD);
- sect = fsm2sec(cdr.ResultTD);
+ cdra_getTD(cdr.CurTrack + 1, cdr.ResultTD);
+ sect = msf2sec(cdr.ResultTD);
if (sect - current >= 150)
break;
}
unsigned int this_s, start_s, next_s, pregap;
int relative_s;
- CDR_getTD(cdr.CurTrack, start);
+ cdra_getTD(cdr.CurTrack, start);
if (cdr.CurTrack + 1 <= cdr.ResultTN[1]) {
pregap = 150;
- CDR_getTD(cdr.CurTrack + 1, next);
+ cdra_getTD(cdr.CurTrack + 1, next);
}
else {
// last track - cd size
pregap = 0;
- next[0] = cdr.SetSectorEnd[2];
- next[1] = cdr.SetSectorEnd[1];
- next[2] = cdr.SetSectorEnd[0];
+ memcpy(next, cdr.SetSectorEnd, 3);
}
this_s = msf2sec(time);
- start_s = fsm2sec(start);
- next_s = fsm2sec(next);
+ start_s = msf2sec(start);
+ next_s = msf2sec(next);
cdr.TrackChanged = FALSE;
cdr.subq.Index = 0;
relative_s = -relative_s;
}
- sec2msf(relative_s, cdr.subq.Relative);
+ lba2msf(relative_s, &cdr.subq.Relative[0],
+ &cdr.subq.Relative[1], &cdr.subq.Relative[2]);
cdr.subq.Track = itob(cdr.CurTrack);
cdr.subq.Relative[0] = itob(cdr.subq.Relative[0]);
static int ReadTrack(const u8 *time)
{
- unsigned char tmp[3];
- int read_ok;
+ int ret;
- tmp[0] = itob(time[0]);
- tmp[1] = itob(time[1]);
- tmp[2] = itob(time[2]);
+ CDR_LOG("ReadTrack *** %02d:%02d:%02d\n", tmp[0], tmp[1], tmp[2]);
- CDR_LOG("ReadTrack *** %02x:%02x:%02x\n", tmp[0], tmp[1], tmp[2]);
-
- if (memcmp(cdr.Prev, tmp, 3) == 0)
+ if (memcmp(cdr.Prev, time, 3) == 0)
return 1;
- read_ok = CDR_readTrack(tmp);
- if (read_ok)
- memcpy(cdr.Prev, tmp, 3);
- return read_ok;
+ ret = cdra_readTrack(time);
+ if (ret == 0)
+ memcpy(cdr.Prev, time, 3);
+ return ret == 0;
}
static void UpdateSubq(const u8 *time)
{
- const struct SubQ *subq;
- int s = MSF2SECT(time[0], time[1], time[2]);
+ int ret = -1, s = MSF2SECT(time[0], time[1], time[2]);
+ struct SubQ subq;
u16 crc;
if (CheckSBI(s))
return;
- subq = (struct SubQ *)CDR_getBufferSub(s);
- if (subq != NULL && cdr.CurTrack == 1) {
- crc = calcCrc((u8 *)subq + 12, 10);
- if (crc == (((u16)subq->CRC[0] << 8) | subq->CRC[1])) {
- cdr.subq.Track = subq->TrackNumber;
- cdr.subq.Index = subq->IndexNumber;
- memcpy(cdr.subq.Relative, subq->TrackRelativeAddress, 3);
- memcpy(cdr.subq.Absolute, subq->AbsoluteAddress, 3);
+ if (cdr.CurTrack == 1)
+ ret = cdra_readSub(time, &subq);
+ if (ret == 0) {
+ crc = calcCrc((u8 *)&subq + 12, 10);
+ if (crc == (((u16)subq.CRC[0] << 8) | subq.CRC[1])) {
+ cdr.subq.Track = subq.TrackNumber;
+ cdr.subq.Index = subq.IndexNumber;
+ memcpy(cdr.subq.Relative, subq.TrackRelativeAddress, 3);
+ memcpy(cdr.subq.Absolute, subq.AbsoluteAddress, 3);
}
else {
CDR_LOG_I("subq bad crc @%02d:%02d:%02d\n",
cdr.ReportDelay--;
}
+static boolean canDoTurbo(void)
+{
+ u32 c = psxRegs.cycle;
+ return Config.TurboCD && !cdr.RetryDetected && !cdr.AdpcmActive
+ //&& c - psxRegs.intCycle[PSXINT_SPUDMA].sCycle > (u32)cdReadTime * 2
+ && c - psxRegs.intCycle[PSXINT_MDECOUTDMA].sCycle > (u32)cdReadTime * 16;
+}
+
static int cdrSeekTime(unsigned char *target)
{
- int diff = msf2sec(cdr.SetSectorPlay) - msf2sec(target);
- int seekTime = abs(diff) * (cdReadTime / 2000);
+ int diff = abs((int)msf2sec(cdr.SetSectorPlay) - (int)msf2sec(target));
+ int seekTime = diff * (cdReadTime / 2000);
int cyclesSinceRS = psxRegs.cycle - cdr.LastReadSeekCycles;
seekTime = MAX_VALUE(seekTime, 20000);
+ // sled seek?
+ if (diff >= 7200)
+ seekTime = PSXCLK / 7 + diff * 64;
+ // add *something* as rotation time until the target sector
+ if (cyclesSinceRS >= cdReadTime)
+ seekTime += (8 - ((cyclesSinceRS >> 18) & 7)) * (cdReadTime / 2);
+
// Transformers Beast Wars Transmetals does Setloc(x),SeekL,Setloc(x),ReadN
// and then wants some slack time
if (cdr.DriveState == DRIVESTATE_PAUSED || cyclesSinceRS < cdReadTime *3/2)
seekTime += cdReadTime;
- seekTime = MIN_VALUE(seekTime, PSXCLK * 2 / 3);
- CDR_LOG("seek: %.2f %.2f (%.2f) st %d di %d\n", (float)seekTime / PSXCLK,
+ //seekTime = MIN_VALUE(seekTime, PSXCLK * 2 / 3);
+ CDR_LOG("seek: %02d:%02d:%02d %.2f %.2f (%.2f) st %d di %d\n",
+ target[0], target[1], target[2], (float)seekTime / PSXCLK,
(float)seekTime / cdReadTime, (float)cyclesSinceRS / cdReadTime,
cdr.DriveState, diff);
return seekTime;
* Note: always enforcing this breaks other games like Crash PAL version
* (inputs get dropped because bios handler doesn't see interrupts).
*/
- u32 vint_rel;
- if (psxRegs.cycle - rcnts[3].cycleStart > 250000)
- return cycles;
- vint_rel = rcnts[3].cycleStart + 63000 - psxRegs.cycle;
- vint_rel += PSXCLK / 60;
+ u32 vint_rel = rcnts[3].cycleStart + 63000 - psxRegs.cycle;
while ((s32)(vint_rel - cycles) < 0)
vint_rel += PSXCLK / 60;
return vint_rel;
}
}
+static int msfiEq(const u8 *a, const u8 *b)
+{
+ return a[0] == b[0] && a[1] == b[1] && a[2] == b[2];
+}
+
void cdrPlayReadInterrupt(void)
{
- int hit = CDR_prefetch(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2]);
- if (!hit && cdr.PhysCdPropagations++ < 222) {
- // this propagates real cdrom delays to the emulated game
+ // this works but causes instability for timing sensitive games
+#if 0
+ int hit = cdra_prefetch(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2]);
+ if (!hit && cdr.PhysCdPropagations < 75/2) {
+ // this propagates the real cdrom delays to the emulated game
CDRPLAYREAD_INT(cdReadTime / 2, 0);
+ cdr.PhysCdPropagations++;
return;
}
- cdr.PhysCdPropagations = 0;
-
+#endif
cdr.LastReadSeekCycles = psxRegs.cycle;
if (cdr.Reading) {
cdr.DriveState = DRIVESTATE_PAUSED;
}
else {
- CDR_readCDDA(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2], (u8 *)read_buf);
+ cdra_readCDDA(cdr.SetSectorPlay, read_buf);
}
if (!cdr.IrqStat && (cdr.Mode & (MODE_AUTOPAUSE|MODE_REPORT)))
}
msfiAdd(cdr.SetSectorPlay, 1);
- CDR_prefetch(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2]);
+ cdra_prefetch(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2]);
// update for CdlGetlocP/autopause
generate_subq(cdr.SetSectorPlay);
CDRPLAYREAD_INT(cdReadTime, 0);
+
+ // stop propagation since it breaks streaming
+ cdr.PhysCdPropagations = 0xff;
}
static void softReset(void)
{
- CDR_getStatus(&stat);
- if (stat.Status & STATUS_SHELLOPEN) {
+ cdra_getStatus(&cdr_stat);
+ if (cdr_stat.Status & STATUS_SHELLOPEN) {
cdr.DriveState = DRIVESTATE_LID_OPEN;
cdr.StatP = STATUS_SHELLOPEN;
}
u32 second_resp_time = 0;
const void *buf;
u8 ParamC;
- u8 set_loc[3];
int read_ok;
u16 not_ready = 0;
u8 IrqStat = Acknowledge;
}
if (cdr.Irq1Pending) {
// hand out the "newest" sector, according to nocash
- cdrUpdateTransferBuf(CDR_getBuffer());
+ cdrUpdateTransferBuf(cdra_getBuffer());
CDR_LOG_I("%x:%02x:%02x loaded on ack, cmd=%02x res=%02x\n",
cdr.Transfer[0], cdr.Transfer[1], cdr.Transfer[2],
cdr.CmdInProgress, cdr.Irq1Pending);
break;
case CdlSetloc:
- // case CdlSetloc + CMD_WHILE_NOT_READY: // or is it?
- CDR_LOG("CDROM setloc command (%02X, %02X, %02X)\n", cdr.Param[0], cdr.Param[1], cdr.Param[2]);
+ case CdlSetloc + CMD_WHILE_NOT_READY: // apparently?
+ if (cdr.StatP & STATUS_SHELLOPEN)
+ // wrong? Driver2 vs Amerzone
+ goto set_error;
// MM must be BCD, SS must be BCD and <0x60, FF must be BCD and <0x75
if (((cdr.Param[0] & 0x0F) > 0x09) || (cdr.Param[0] > 0x99) || ((cdr.Param[1] & 0x0F) > 0x09) || (cdr.Param[1] >= 0x60) || ((cdr.Param[2] & 0x0F) > 0x09) || (cdr.Param[2] >= 0x75))
}
else
{
+ u8 set_loc[3];
for (i = 0; i < 3; i++)
set_loc[i] = btoi(cdr.Param[i]);
+ if ((msfiEq(cdr.SetSector, set_loc)) //|| msfiEq(cdr.Param, cdr.Transfer))
+ && !cdr.SetlocPending)
+ cdr.RetryDetected++;
+ else
+ cdr.RetryDetected = 0;
memcpy(cdr.SetSector, set_loc, 3);
cdr.SetSector[3] = 0;
cdr.SetlocPending = 1;
CDR_LOG("PLAY track %d\n", cdr.CurTrack);
- if (CDR_getTD((u8)cdr.CurTrack, cdr.ResultTD) != -1) {
- for (i = 0; i < 3; i++)
- set_loc[i] = cdr.ResultTD[2 - i];
- seekTime = cdrSeekTime(set_loc);
- memcpy(cdr.SetSectorPlay, set_loc, 3);
+ if (cdra_getTD(cdr.CurTrack, cdr.ResultTD) != -1) {
+ seekTime = cdrSeekTime(cdr.ResultTD);
+ memcpy(cdr.SetSectorPlay, cdr.ResultTD, 3);
}
}
else if (cdr.SetlocPending) {
cdr.ReportDelay = 60;
cdr.sectorsRead = 0;
- if (!Config.Cdda)
- CDR_play(cdr.SetSectorPlay);
-
SetPlaySeekRead(cdr.StatP, STATUS_SEEK | STATUS_ROTATING);
// BIOS player - set flag again
cdr.Play = TRUE;
cdr.DriveState = DRIVESTATE_PLAY_READ;
+ cdr.PhysCdPropagations = 0;
CDRPLAYREAD_INT(cdReadTime + seekTime, 1);
start_rotating = 1;
error = ERROR_BAD_ARGNUM;
goto set_error;
}
- cdr.DriveState = DRIVESTATE_STANDBY;
second_resp_time = cdReadTime * 125 / 2;
start_rotating = 1;
break;
case CdlStop:
if (cdr.Play) {
// grab time for current track
- CDR_getTD((u8)(cdr.CurTrack), cdr.ResultTD);
-
- cdr.SetSectorPlay[0] = cdr.ResultTD[2];
- cdr.SetSectorPlay[1] = cdr.ResultTD[1];
- cdr.SetSectorPlay[2] = cdr.ResultTD[0];
+ cdra_getTD(cdr.CurTrack, cdr.ResultTD);
+ memcpy(cdr.SetSectorPlay, cdr.ResultTD, 3);
}
StopCdda();
cdr.sectorsRead = 0;
/*
- Gundam Battle Assault 2: much slower (*)
- - Fixes boot, gameplay
-
- Hokuto no Ken 2: slower
- - Fixes intro + subtitles
-
- InuYasha - Feudal Fairy Tale: slower
- - Fixes battles
+ Gundam Battle Assault 2
+ Hokuto no Ken 2
+ InuYasha - Feudal Fairy Tale
+ Dance Dance Revolution Konamix
+ Digimon Rumble Arena
+ ...
*/
- /* Gameblabla - Tightening the timings (as taken from Duckstation).
- * The timings from Duckstation are based upon hardware tests.
- * Mednafen's timing don't work for Gundam Battle Assault 2 in PAL/50hz mode,
- * seems to be timing sensitive as it can depend on the CPU's clock speed.
- * */
if (!(cdr.StatP & (STATUS_PLAY | STATUS_READ)))
{
second_resp_time = 7000;
}
else
{
- second_resp_time = (((cdr.Mode & MODE_SPEED) ? 1 : 2) * 1097107);
+ second_resp_time = 2100011;
+ // a hack to try to avoid weird cmd vs irq1 races causing games to retry
+ second_resp_time += (cdr.RetryDetected & 15) * 100001;
}
SetPlaySeekRead(cdr.StatP, 0);
DriveStateOld = cdr.DriveState;
break;
case CdlGetTN:
- if (CDR_getTN(cdr.ResultTN) == -1) {
+ if (cdra_getTN(cdr.ResultTN) != 0) {
assert(0);
}
SetResultSize_(3);
case CdlGetTD:
cdr.Track = btoi(cdr.Param[0]);
- if (CDR_getTD(cdr.Track, cdr.ResultTD) == -1) {
+ if (cdra_getTD(cdr.Track, cdr.ResultTD) != 0) {
error = ERROR_BAD_ARGVAL;
goto set_error;
}
SetResultSize_(3);
- cdr.Result[1] = itob(cdr.ResultTD[2]);
+ cdr.Result[1] = itob(cdr.ResultTD[0]);
cdr.Result[2] = itob(cdr.ResultTD[1]);
// no sector number
- //cdr.Result[3] = itob(cdr.ResultTD[0]);
+ //cdr.Result[3] = itob(cdr.ResultTD[2]);
break;
case CdlSeekL:
StopReading();
SetPlaySeekRead(cdr.StatP, STATUS_SEEK | STATUS_ROTATING);
- seekTime = cdrSeekTime(cdr.SetSector);
+ if (!canDoTurbo())
+ seekTime = cdrSeekTime(cdr.SetSector);
memcpy(cdr.SetSectorPlay, cdr.SetSector, 4);
cdr.DriveState = DRIVESTATE_SEEK;
- CDR_prefetch(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1],
+ cdra_prefetch(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1],
cdr.SetSectorPlay[2]);
/*
Crusaders of Might and Magic = 0.5x-4x
Find_CurTrack(cdr.SetSectorPlay);
read_ok = ReadTrack(cdr.SetSectorPlay);
- if (read_ok && (buf = CDR_getBuffer()))
+ if (read_ok && (buf = cdra_getBuffer()))
memcpy(cdr.LocL, buf, 8);
UpdateSubq(cdr.SetSectorPlay);
cdr.DriveState = DRIVESTATE_STANDBY;
cdr.Result[3] = 0;
// 0x10 - audio | 0x40 - disk missing | 0x80 - unlicensed
- if (CDR_getStatus(&stat) == -1 || stat.Type == 0 || stat.Type == 0xff) {
+ if (cdra_getStatus(&cdr_stat) != 0 || cdr_stat.Type == 0 || cdr_stat.Type == 0xff) {
cdr.Result[1] = 0xc0;
}
else {
- if (stat.Type == 2)
+ if (cdr_stat.Type == 2)
cdr.Result[1] |= 0x10;
if (CdromId[0] == '\0')
cdr.Result[1] |= 0x80;
cdr.SubqForwardSectors = 1;
cdr.sectorsRead = 0;
cdr.DriveState = DRIVESTATE_SEEK;
- CDR_prefetch(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1],
+ cdr.PhysCdPropagations = 0;
+ cdra_prefetch(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1],
cdr.SetSectorPlay[2]);
cycles = (cdr.Mode & MODE_SPEED) ? cdReadTime : cdReadTime * 2;
cycles += seekTime;
if (Config.hacks.cdr_read_timing)
cycles = cdrAlignTimingHack(cycles);
+ else if (canDoTurbo())
+ cycles = cdReadTime / 2;
CDRPLAYREAD_INT(cycles, 1);
SetPlaySeekRead(cdr.StatP, STATUS_SEEK);
setIrq(IrqStat, Cmd);
}
-#ifdef HAVE_ARMV7
- #define ssat32_to_16(v) \
- asm("ssat %0,#16,%1" : "=r" (v) : "r" (v))
-#else
- #define ssat32_to_16(v) do { \
- if (v < -32768) v = -32768; \
- else if (v > 32767) v = 32767; \
- } while (0)
-#endif
-
static void cdrPrepCdda(s16 *buf, int samples)
{
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2],
cdr.CmdInProgress, cdr.IrqStat);
cdr.Irq1Pending = result;
+ // F1 2000 timing hack :(
+ // compensate for some csum func @80014380 taking too long
+ if (!cdr.AdpcmActive)
+ psxRegs.intCycle[PSXINT_CDREAD].sCycle += cdReadTime / 10;
return;
}
SetResultSize(1);
read_ok = ReadTrack(cdr.SetSectorPlay);
if (read_ok)
- buf = CDR_getBuffer();
+ buf = cdra_getBuffer();
if (buf == NULL)
read_ok = 0;
if ((cdr.Mode & MODE_SF) && (subhdr->mode & 0x44) == 0x44) // according to nocash
deliver_data = 0;
+ if (buf[3] != 1 && buf[3] != 2) { // according to duckstation
+ deliver_data = 0;
+ CDR_LOG_I("%x:%02x:%02x mode %02x ignored\n",
+ buf[0], buf[1], buf[2], buf[3]);
+ }
/*
Croc 2: $40 - only FORM1 (*)
cdrReadInterruptSetResult(cdr.StatP);
msfiAdd(cdr.SetSectorPlay, 1);
- CDR_prefetch(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2]);
+ cdra_prefetch(cdr.SetSectorPlay[0], cdr.SetSectorPlay[1], cdr.SetSectorPlay[2]);
CDRPLAYREAD_INT((cdr.Mode & MODE_SPEED) ? (cdReadTime / 2) : cdReadTime, 0);
}
cdrRead0:
bit 0,1 - reg index
bit 2 - adpcm active
- bit 5 - 1 result ready
- bit 6 - 1 dma ready
+ bit 3 - 1 parameter fifo empty
+ bit 4 - 1 parameter fifo not full
+ bit 5 - 1 response fifo not empty
+ bit 6 - 1 data fifo not empty
bit 7 - 1 command being processed
*/
unsigned char cdrRead0(void) {
- cdr.Ctrl &= ~0x24;
+ cdr.Ctrl &= ~0x64;
cdr.Ctrl |= cdr.AdpcmActive << 2;
cdr.Ctrl |= cdr.ResultReady << 5;
+ cdr.Ctrl |= ((signed int)(cdr.FifoOffset - cdr.FifoSize) >> 31) & 0x40;
- cdr.Ctrl |= 0x40; // data fifo not empty
-
- // What means the 0x10 and the 0x08 bits? I only saw it used by the bios
cdr.Ctrl |= 0x18;
CDR_LOG_IO("cdr r0.sta: %02x\n", cdr.Ctrl);
}
void psxDma3(u32 madr, u32 bcr, u32 chcr) {
- u32 cdsize, max_words;
+ u32 cdsize, max_words, cycles;
int size;
u8 *ptr;
}
psxCpu->Clear(madr, cdsize / 4);
- set_event(PSXINT_CDRDMA, (cdsize / 4) * 24);
+ cycles = (cdsize / 4) * 24;
+ set_event(PSXINT_CDRDMA, cycles);
HW_DMA3_CHCR &= SWAPu32(~0x10000000);
if (chcr & 0x100) {
}
else {
// halted
- psxRegs.cycle += (cdsize/4) * 24 - 20;
+ psxRegs.cycle += cycles - 20;
}
+ if (canDoTurbo() && cdr.Reading && cdr.FifoOffset >= 2048)
+ CDRPLAYREAD_INT(cycles + 4096, 1);
return;
default:
static void getCdInfo(void)
{
- u8 tmp;
-
- CDR_getTN(cdr.ResultTN);
- CDR_getTD(0, cdr.SetSectorEnd);
- tmp = cdr.SetSectorEnd[0];
- cdr.SetSectorEnd[0] = cdr.SetSectorEnd[2];
- cdr.SetSectorEnd[2] = tmp;
+ cdra_getTN(cdr.ResultTN);
+ cdra_getTD(0, cdr.SetSectorEnd);
}
void cdrReset() {
u32 tmp;
u8 tmpp[3];
- if (Mode == 0 && !Config.Cdda)
- CDR_stop();
-
- cdr.freeze_ver = 0x63647202;
+ cdr.freeze_ver = 0x63647203;
gzfreeze(&cdr, sizeof(cdr));
if (Mode == 1) {
cdr.SubqForwardSectors = SUBQ_FORWARD_SECTORS;
// read right sub data
- tmpp[0] = btoi(cdr.Prev[0]);
- tmpp[1] = btoi(cdr.Prev[1]);
- tmpp[2] = btoi(cdr.Prev[2]);
- cdr.Prev[0]++;
- ReadTrack(tmpp);
+ memcpy(tmpp, cdr.Prev, sizeof(tmpp));
+ if (cdr.freeze_ver < 0x63647203) {
+ tmpp[0] = btoi(tmpp[0]);
+ tmpp[1] = btoi(tmpp[1]);
+ tmpp[2] = btoi(tmpp[2]);
+ }
+ cdr.Prev[0] = 0xff;
+ if (tmpp[0] != 0xff)
+ ReadTrack(tmpp);
if (cdr.Play) {
if (cdr.freeze_ver < 0x63647202)
memcpy(cdr.SetSectorPlay, cdr.SetSector, 3);
Find_CurTrack(cdr.SetSectorPlay);
- if (!Config.Cdda)
- CDR_play(cdr.SetSectorPlay);
}
if (!cdr.Muted)
- ll = cdr.AttenuatorLeftToLeft, lr = cdr.AttenuatorLeftToLeft,
+ ll = cdr.AttenuatorLeftToLeft, lr = cdr.AttenuatorLeftToRight,
rl = cdr.AttenuatorRightToLeft, rr = cdr.AttenuatorRightToRight;
SPU_setCDvol(ll, lr, rl, rr, psxRegs.cycle);
}
#define MIN_VALUE(a,b) ({ __typeof__ (a) _a = (a); __typeof__ (b) _b = (b); _a < _b ? _a : _b; })
#define MAX_VALUE(a,b) ({ __typeof__ (a) _a = (a); __typeof__ (b) _b = (b); _a > _b ? _a : _b; })
-#define MSF2SECT(m, s, f) (((m) * 60 + (s) - 2) * 75 + (f))
-
#define CD_FRAMESIZE_RAW 2352
#define DATA_SIZE (CD_FRAMESIZE_RAW - 12)
+/* CD_FRAMESIZE_RAW aligned to a cache line for DMA buffers
+ * (assuming a cache line of max. 64 bytes) */
+#define CD_FRAMESIZE_RAW_ALIGNED 2368
+
#define SUB_FRAMESIZE 96
+#define MSF2SECT(m, s, f) (((m) * 60 + (s) - 2) * 75 + (f))
+
+static inline void lba2msf(unsigned int lba, u8 *m, u8 *s, u8 *f) {
+ *m = lba / 75 / 60;
+ lba = lba - *m * 75 * 60;
+ *s = lba / 75;
+ lba = lba - *s * 75;
+ *f = lba;
+}
+
void cdrReset();
void cdrInterrupt(void);
#include "misc.h"
#include "sio.h"
#include "ppf.h"
+#include "cdrom-async.h"
#include "new_dynarec/new_dynarec.h"
+#include "lightrec/plugin.h"
/* It's duplicated from emu_if.c */
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
+/* Corresponds to LIGHTREC_OPT_INV_DMA_ONLY of lightrec.h */
+#define LIGHTREC_HACK_INV_DMA_ONLY (1 << 0)
+
+u32 lightrec_hacks;
+
static const char * const MemorycardHack_db[] =
{
/* Lifeforce Tenka, also known as Codename Tenka */
"SLES01712", "SLPS01525", "SLPS91138", "SLPM87102", "SLUS00823",
/* Crash Bash */
"SCES02834", "SCUS94570", "SCUS94616", "SCUS94654",
+ /* F1 2000 - aborting/resuming dma in menus */
+ "SLUS01120", "SLES02722", "SLES02723", "SLES02724", "SLPS02758", "SLPM80564",
/* Final Fantasy IV */
"SCES03840", "SLPM86028", "SLUS01360",
/* Point Blank - calibration cursor */
{
/* Gradius Gaiden */
"SLPM86042", "SLPM86103", "SLPM87323",
+ /* Salamander Deluxe Pack Plus */
+ "SLPM86037",
/* Sexy Parodius */
"SLPM86009",
};
-static const char * const dualshock_timing1024_hack_db[] =
-{
- /* Judge Dredd - could also be poor cdrom+mdec+dma timing */
- "SLUS00630", "SLES00755",
-};
-
static const char * const dualshock_init_analog_hack_db[] =
{
/* Formula 1 Championship Edition */
static const char * const fractional_Framerate_hack_db[] =
{
+ /* Contra - Legacy of War - weird char select hang */
+ "SLUS00288", "SLES00608",
/* Dance Dance Revolution */
"SLPM86503", // 3rd Mix
"SLPM86752", // 4th Mix
"SCUS94425", "SCES02104",
};
+static const char * const f1_hack_db[] =
+{
+ /* Formula One Arcade */
+ "SCES03886",
+ /* Formula One '99 */
+ "SLUS00870", "SCPS10101", "SCES01979", "SLES01979",
+ /* Formula One 2000 */
+ "SLUS01134", "SCES02777", "SCES02778", "SCES02779",
+ /* Formula One 2001 */
+ "SCES03404", "SCES03423", "SCES03424", "SCES03524",
+};
+
#define HACK_ENTRY(var, list) \
{ #var, &Config.hacks.var, list, ARRAY_SIZE(list) }
HACK_ENTRY(cdr_read_timing, cdr_read_hack_db),
HACK_ENTRY(gpu_slow_list_walking, gpu_slow_llist_db),
HACK_ENTRY(gpu_centering, gpu_centering_hack_db),
- HACK_ENTRY(gpu_timing1024, dualshock_timing1024_hack_db),
HACK_ENTRY(dualshock_init_analog, dualshock_init_analog_hack_db),
HACK_ENTRY(fractional_Framerate, fractional_Framerate_hack_db),
+ HACK_ENTRY(f1, f1_hack_db),
};
static const struct
{ 174, { "SLES00477" } },
/* Brave Fencer Musashi - cd sectors arrive too fast */
{ 170, { "SLUS00726", "SLPS01490" } },
-#if defined(DRC_DISABLE) || defined(LIGHTREC) /* new_dynarec has a hack for this game */
+#if defined(DRC_DISABLE) || defined(LIGHTREC) /* ari64 drc has a hack for this game */
/* Parasite Eve II - internal timer checks */
{ 125, { "SLUS01042", "SLUS01055", "SLES02558", "SLES12558" } },
+ { 125, { "SLES02559", "SLES12559", "SLES02560", "SLES12560" } },
+ { 125, { "SLES02561", "SLES12561", "SLES02562", "SLES12562" } },
+ { 125, { "SCPS45467", "SCPS45468", "SLPS02480", "SLPS02481" } },
#endif
/* Discworld Noir - audio skips if CPU runs too fast */
{ 222, { "SLES01549", "SLES02063", "SLES02064" } },
/* Digimon World */
{ 153, { "SLUS01032", "SLES02914" } },
+ /* Power Rangers: Lightspeed Rescue - jumping fails if FPS is over 30 */
+ { 310, { "SLUS01114", "SLES03286" } },
/* Syphon Filter - reportedly hangs under unknown conditions */
{ 169, { "SCUS94240" } },
+#ifndef DRC_DISABLE
/* Psychic Detective - some weird race condition in the game's cdrom code */
- { 222, { "SLUS00165", "SLUS00166", "SLUS00167" } },
- { 222, { "SLES00070", "SLES10070", "SLES20070" } },
+ { 181, { "SLUS00165", "SLUS00166", "SLUS00167" } },
+ { 181, { "SLES00070", "SLES10070", "SLES20070" } },
+#endif
/* Vib-Ribbon - cd timing issues (PAL+ari64drc only?) */
{ 200, { "SCES02873" } },
/* Zero Divide - sometimes too fast */
{ 200, { "SLUS00183", "SLES00159", "SLPS00083", "SLPM80008" } },
+ /* Eagle One: Harrier Attack - hangs (but not in standalone build?) */
+ { 153, { "SLUS00943" } },
+ /* Sol Divide: FMV timing */
+ { 200, { "SLUS01519", "SCPS45260", "SLPS01463" } },
+ /* Legend of Legaia - some attack moves lag and cause a/v desync */
+ { 160, { "SCUS94254", "SCUS94366", "SCES01752" } },
+ { 160, { "SCES01944", "SCES01945", "SCES01946", "SCES01947" } },
+};
+
+static const struct
+{
+ int cycles;
+ const char * const id[4];
+}
+gpu_timing_hack_db[] =
+{
+ /* Judge Dredd - poor cdrom+mdec+dma+gpu timing */
+ { 1024, { "SLUS00630", "SLES00755" } },
+ /* F1 2000 - flooding the GPU in menus */
+ { 300*1024, { "SLUS01120", "SLES02722", "SLES02723", "SLES02724" } },
+ { 300*1024, { "SLPS02758", "SLPM80564" } },
+ /* Soul Blade - same as above */
+ { 512*1024, { "SLUS00240", "SCES00577" } },
+};
+
+static const char * const lightrec_hack_db[] =
+{
+ /* Tomb Raider (Rev 2) - boot menu clears over itself */
+ "SLUS00152",
};
/* Function for automatic patching according to GameID. */
}
/* Dynarec game-specific hacks */
- new_dynarec_hacks_pergame = 0;
+ ndrc_g.hacks_pergame = 0;
+ if (Config.hacks.f1)
+ ndrc_g.hacks_pergame |= NDHACK_THREAD_FORCE; // force without *_ON -> off
Config.cycle_multiplier_override = 0;
for (i = 0; i < ARRAY_SIZE(cycle_multiplier_overrides); i++)
if (j < ARRAY_SIZE(cycle_multiplier_overrides[i].id))
{
Config.cycle_multiplier_override = cycle_multiplier_overrides[i].mult;
- new_dynarec_hacks_pergame |= NDHACK_OVERRIDE_CYCLE_M;
+ ndrc_g.hacks_pergame |= NDHACK_OVERRIDE_CYCLE_M;
SysPrintf("using cycle_multiplier_override: %d\n",
Config.cycle_multiplier_override);
break;
}
}
+
+ Config.gpu_timing_override = 0;
+ for (i = 0; i < ARRAY_SIZE(gpu_timing_hack_db); i++)
+ {
+ const char * const * const ids = gpu_timing_hack_db[i].id;
+ for (j = 0; j < ARRAY_SIZE(gpu_timing_hack_db[i].id); j++)
+ if (ids[j] && strcmp(ids[j], CdromId) == 0)
+ break;
+ if (j < ARRAY_SIZE(gpu_timing_hack_db[i].id))
+ {
+ Config.gpu_timing_override = gpu_timing_hack_db[i].cycles;
+ SysPrintf("using gpu_timing_override: %d\n",
+ Config.gpu_timing_override);
+ break;
+ }
+ }
+
+ if (drc_is_lightrec()) {
+ lightrec_hacks = 0;
+ if (Config.hacks.f1)
+ lightrec_hacks |= LIGHTREC_HACK_INV_DMA_ONLY;
+ for (i = 0; i < ARRAY_SIZE(lightrec_hack_db); i++)
+ if (strcmp(lightrec_hack_db[i], CdromId) == 0)
+ lightrec_hacks |= LIGHTREC_HACK_INV_DMA_ONLY;
+ if (lightrec_hacks)
+ SysPrintf("using lightrec_hacks: 0x%x\n", lightrec_hacks);
+ }
}
// from duckstation's gamedb.json
int check_unsatisfied_libcrypt(void)
{
const char *p = CdromId + 4;
+ u8 buf_sub[SUB_FRAMESIZE];
u16 id, key = 0;
+ u8 msf[3];
size_t i;
if (strncmp(CdromId, "SCE", 3) && strncmp(CdromId, "SLE", 3))
return 0;
// detected a protected game
- if (!CDR_getBufferSub(libcrypt_sectors[0]) && !sbi_sectors) {
+ lba2msf(libcrypt_sectors[0] + 150, &msf[0], &msf[1], &msf[2]);
+ if (!sbi_sectors && cdra_readSub(msf, buf_sub) != 0) {
SysPrintf("==================================================\n");
SysPrintf("LibCrypt game detected with missing SBI/subchannel\n");
SysPrintf("==================================================\n");
#include "gpu.h"
#include "psxdma.h"
-void gpu_state_change(int what)
+void gpu_state_change(int what, int cycles)
{
enum psx_gpu_state state = what;
switch (state)
psxRegs.gpuIdleAfter = psxRegs.cycle + PSXCLK / 50;
break;
case PGS_VRAM_TRANSFER_END:
- psxRegs.gpuIdleAfter = psxRegs.cycle;
+ psxRegs.gpuIdleAfter = psxRegs.cycle - 1;
break;
case PGS_PRIMITIVE_START:
- psxRegs.gpuIdleAfter = psxRegs.cycle + 200;
+ // limit because gpulib delays things with it's buffering...
+ if (cycles > 512)
+ cycles = 512;
+ psxRegs.gpuIdleAfter = psxRegs.cycle + cycles - 1;
break;
}
}
#ifndef __GPU_H__
#define __GPU_H__
+#include <stdint.h>
+
+typedef struct GPUFreeze {
+ uint32_t ulFreezeVersion;
+ uint32_t ulStatus;
+ uint32_t ulControl[256];
+ unsigned char psxVRam[1024*512*2];
+} GPUFreeze_t;
+
#define PSXGPU_LCF (1u<<31)
#define PSXGPU_nBUSY (1u<<26)
#define PSXGPU_ILACE (1u<<22)
+#define PSXGPU_RGB24 (1u<<21)
#define PSXGPU_DHEIGHT (1u<<19)
#define PSXGPU_FIELD (1u<<13)
PGS_PRIMITIVE_START, // for non-dma only
};
-void gpu_state_change(int what);
+void gpu_state_change(int what, int cycles);
#endif /* __GPU_H__ */
#include "gte.h"
#include "psxmem.h"
+#include "../include/compiler_features.h"
#define VX(n) (n < 3 ? regs->CP2D.p[n << 1].sw.l : regs->CP2D.p[9].sw.l)
#define VY(n) (n < 3 ? regs->CP2D.p[n << 1].sw.h : regs->CP2D.p[10].sw.l)
static inline u32 limE_(psxCP2Regs *regs, u32 result) {
if (result > 0x1ffff) {
- gteFLAG |= (1 << 31) | (1 << 17);
+ gteFLAG |= (1u << 31) | (1u << 17);
return 0x1ffff;
}
return result;
#define limE(result) \
limE_(regs,result)
-#define A1(a) BOUNDS((a), 0x7fffffff, (1 << 30), -(s64)0x80000000, (1 << 31) | (1 << 27))
-#define A2(a) BOUNDS((a), 0x7fffffff, (1 << 29), -(s64)0x80000000, (1 << 31) | (1 << 26))
-#define A3(a) BOUNDS((a), 0x7fffffff, (1 << 28), -(s64)0x80000000, (1 << 31) | (1 << 25))
-#define limB1(a, l) LIM((a), 0x7fff, -0x8000 * !l, (1 << 31) | (1 << 24))
-#define limB2(a, l) LIM((a), 0x7fff, -0x8000 * !l, (1 << 31) | (1 << 23))
-#define limB3(a, l) LIM((a), 0x7fff, -0x8000 * !l, (1 << 22))
-#define limC1(a) LIM((a), 0x00ff, 0x0000, (1 << 21))
-#define limC2(a) LIM((a), 0x00ff, 0x0000, (1 << 20))
-#define limC3(a) LIM((a), 0x00ff, 0x0000, (1 << 19))
-#define limD(a) LIM((a), 0xffff, 0x0000, (1 << 31) | (1 << 18))
-
-#define F(a) BOUNDS((a), 0x7fffffff, (1 << 31) | (1 << 16), -(s64)0x80000000, (1 << 31) | (1 << 15))
-#define limG1(a) LIM((a), 0x3ff, -0x400, (1 << 31) | (1 << 14))
-#define limG2(a) LIM((a), 0x3ff, -0x400, (1 << 31) | (1 << 13))
-#define limH(a) LIM((a), 0x1000, 0x0000, (1 << 12))
+#define A1(a) BOUNDS((a), 0x7fffffff, (1u << 30), -(s64)0x80000000, (1u << 31) | (1u << 27))
+#define A2(a) BOUNDS((a), 0x7fffffff, (1u << 29), -(s64)0x80000000, (1u << 31) | (1u << 26))
+#define A3(a) BOUNDS((a), 0x7fffffff, (1u << 28), -(s64)0x80000000, (1u << 31) | (1u << 25))
+#define limB1(a, l) LIM((a), 0x7fff, -0x8000 * !l, (1u << 31) | (1u << 24))
+#define limB2(a, l) LIM((a), 0x7fff, -0x8000 * !l, (1u << 31) | (1u << 23))
+#define limB3(a, l) LIM((a), 0x7fff, -0x8000 * !l, (1u << 22))
+#define limC1(a) LIM((a), 0x00ff, 0x0000, (1u << 21))
+#define limC2(a) LIM((a), 0x00ff, 0x0000, (1u << 20))
+#define limC3(a) LIM((a), 0x00ff, 0x0000, (1u << 19))
+#define limD(a) LIM((a), 0xffff, 0x0000, (1u << 31) | (1u << 18))
+
+#define F(a) BOUNDS((a), 0x7fffffff, (1u << 31) | (1u << 16), -(s64)0x80000000, (1u << 31) | (1u << 15))
+#define limG1(a) LIM((a), 0x3ff, -0x400, (1u << 31) | (1u << 14))
+#define limG2(a) LIM((a), 0x3ff, -0x400, (1u << 31) | (1u << 13))
+#define limH(a) LIM((a), 0x1000, 0x0000, (1u << 12))
#ifndef __arm__
#define A1U A1
23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 39,
};
-// warning: called by the dynarec
-int gteCheckStallRaw(u32 op_cycles, psxRegisters *regs) {
+// warning: ari64 drc stores it's negative cycles in gteBusyCycle
+static int gteCheckStallRaw(u32 op_cycles, psxRegisters *regs) {
u32 left = regs->gteBusyCycle - regs->cycle;
int stall = 0;
return regs->CP2D.r[reg];
}
+static u32 lzc(s32 val)
+{
+#if __has_builtin(__builtin_clrsb)
+ return 1 + __builtin_clrsb(val);
+#else
+ val ^= val >> 31;
+ return val ? __builtin_clz(val) : 32;
+#endif
+}
+
void MTC2(struct psxCP2Regs *regs, u32 value, int reg) {
switch (reg) {
case 15:
break;
case 30:
- {
- int a;
- gteLZCS = value;
-
- a = gteLZCS;
- if (a > 0) {
- int i;
- for (i = 31; (a & (1 << i)) == 0 && i >= 0; i--);
- gteLZCR = 31 - i;
- } else if (a < 0) {
- int i;
- a ^= 0xffffffff;
- for (i = 31; (a & (1 << i)) == 0 && i >= 0; i--);
- gteLZCR = 31 - i;
- } else {
- gteLZCR = 32;
- }
- }
+ gteLZCS = value;
+ gteLZCR = lzc(value);
break;
case 31:
extern const unsigned char gte_cycletab[64];
-int gteCheckStallRaw(u32 op_cycles, psxRegisters *regs);
void gteCheckStall(u32 op);
u32 MFC2(struct psxCP2Regs *regs, int reg);
subs r2, r4, #1
orrvs lr, #(1<<15) @ F
orrvs lr, #(1<<31)
- cmp r4, #0x1000
+ cmp r4, #0x1000<<12
orrhi lr, #(1<<12) @ limH
str lr, [r0, #4*(32+31)] @ gteFLAG
subs r2, r4, #1
orrvs lr, #(1<<15)
orrvs lr, #(1<<31) @ F
- cmp r4, #0x1000
+ cmp r4, #0x1000<<12
orrhi lr, #(1<<12) @ limH
str lr, [r0, #4*(32+31)] @ gteFLAG
--- /dev/null
+#!/usr/bin/env python3
+
+from time import sleep
+from sys import argv
+from os import environ
+import subprocess
+
+def get_next_line(p):
+ line = ""
+
+ while line[0:5] != "CYCLE":
+ line = p.readline().decode()
+
+ if (len(line) == 0):
+ sleep(0.001)
+ elif line[0:5] != "CYCLE":
+ print(line[:-1])
+
+ return line
+
+def print_differences(inter, dynarec):
+ inter_array = inter.split(" ")
+ inter_dict = dict(zip(inter_array[::2], inter_array[1::2]))
+ dynarec_array = dynarec.split(" ")
+ dynarec_dict = dict(zip(dynarec_array[::2], dynarec_array[1::2]))
+
+ diff = dict([(k, (inter_dict[k], dynarec_dict[k])) for k in inter_dict.keys() if inter_dict[k] != dynarec_dict[k]])
+
+ print("\nDifferences:")
+ print("{:15}{:15}{:15}".format("", "Interpreter", "Dynarec"))
+ for k in diff:
+ print("{:15}{:15}{:15}".format(k, diff[k][0], diff[k][1]))
+
+def print_mismatch(inter, dynarec, oldline):
+ print("\nMismatch!")
+ print(inter + " - Interpreter")
+ print(dynarec + " - Dynarec")
+ print("State before the mismatch:")
+ print(oldline)
+ print_differences(inter, dynarec)
+
+def read_loop(p1, p2):
+ oldline = ""
+
+ while True:
+ line1 = get_next_line(p1)
+ line2 = get_next_line(p2)
+
+ if line1 != line2:
+ # TODO: Proper matching
+
+ # Lightrec might be lagging behind
+ #if line1[0:16] != line2[0:16]:
+ if line1[6:16] != line2[6:16]:
+ cycle1 = int(line1[6:16], 16)
+ cycle2 = int(line2[6:16], 16)
+
+ if cycle1 < cycle2:
+ print(line2[:-1] + " - Dynarec")
+
+ while cycle1 < cycle2:
+ print(line1[:-1] + " - Interpreter lagging behind")
+ print_differences(line1[:-1], line2[:-1])
+ line1 = get_next_line(p1)
+ cycle1 = int(line1[6:16], 16)
+
+ while cycle1 > cycle2:
+ print(line2[:-1] + " - Dynarec lagging behind")
+ print_differences(line1[:-1], line2[:-1])
+ line2 = get_next_line(p2)
+ cycle2 = int(line2[6:16], 16)
+
+ if line1 != line2:
+ print_mismatch(line1[:-1], line2[:-1], oldline)
+ break
+
+ if cycle2 < cycle1:
+ print(line1[:-1] + " - Interpreter")
+
+ while cycle1 > cycle2:
+ print(line2[:-1] + " - Dynarec lagging behind")
+ print_differences(line1[:-1], line2[:-1])
+ line2 = get_next_line(p2)
+ cycle2 = int(line2[6:16], 16)
+
+ while cycle1 < cycle2:
+ print(line1[:-1] + " - Interpreter lagging behind")
+ print_differences(line1[:-1], line2[:-1])
+ line1 = get_next_line(p1)
+ cycle1 = int(line1[6:16], 16)
+
+ if line1 != line2:
+ print_mismatch(line1[:-1], line2[:-1], oldline)
+ break
+
+ if line1 == line2:
+ oldline = line1[:-1]
+ print(oldline[:16] + " - Match")
+ continue
+
+ print_mismatch(line1[:-1], line2[:-1], oldline)
+ break
+ else:
+ oldline = line1[:-1]
+
+def main():
+ with subprocess.Popen(['./pcsx'] + argv[1:], env={ **environ, 'LIGHTREC_DEBUG': '1', 'LIGHTREC_INTERPRETER': '1' }, stdout=subprocess.PIPE, bufsize=1) as fifo_int:
+ with subprocess.Popen(['./pcsx'] + argv[1:], env={ **environ, 'LIGHTREC_DEBUG': '1' }, stdout=subprocess.PIPE, bufsize=1) as fifo_jit:
+ read_loop(fifo_int.stdout, fifo_jit.stdout)
+
+if __name__ == '__main__':
+ main()
--- /dev/null
+// SPDX-License-Identifier: LGPL-2.1-or-later
+/*
+ * Copyright (C) 2022 Paul Cercueil <paul@crapouillou.net>
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/shm.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "../psxhw.h"
+#include "../psxmem.h"
+#include "../r3000a.h"
+
+#include "mem.h"
+
+#define ARRAY_SIZE(a) (sizeof(a) ? (sizeof(a) / sizeof((a)[0])) : 0)
+
+#ifndef MAP_FIXED_NOREPLACE
+#define MAP_FIXED_NOREPLACE 0x100000
+#endif
+
+#ifndef MFD_HUGETLB
+#define MFD_HUGETLB 0x0004
+#endif
+
+static const uintptr_t supported_io_bases[] = {
+ 0x0,
+ 0x10000000,
+ 0x40000000,
+ 0x80000000,
+};
+
+static void * mmap_huge(void *addr, size_t length, int prot, int flags,
+ int fd, off_t offset)
+{
+ void *map = MAP_FAILED;
+
+ if (length >= 0x200000) {
+ map = mmap(addr, length, prot,
+ flags | MAP_HUGETLB | (21 << MAP_HUGE_SHIFT),
+ fd, offset);
+ if (map != MAP_FAILED)
+ printf("Hugetlb mmap to address 0x%" PRIxPTR " succeeded\n",
+ (uintptr_t) addr);
+ }
+
+ if (map == MAP_FAILED) {
+ map = mmap(addr, length, prot, flags, fd, offset);
+ if (map != MAP_FAILED) {
+ printf("Regular mmap to address 0x%" PRIxPTR " succeeded\n",
+ (uintptr_t) addr);
+#ifdef MADV_HUGEPAGE
+ madvise(map, length, MADV_HUGEPAGE);
+#endif
+ }
+ }
+
+ return map;
+}
+
+static int lightrec_mmap_ram(bool hugetlb)
+{
+ unsigned int i, j;
+ int err, memfd, flags = 0;
+ uintptr_t base;
+ void *map;
+
+ if (hugetlb)
+ flags |= MFD_HUGETLB;
+
+ memfd = syscall(SYS_memfd_create, "/lightrec_memfd",
+ flags);
+ if (memfd < 0) {
+ SysMessage("Failed to create memfd: %d", errno);
+ err = -errno;
+ return err;
+ }
+
+ err = ftruncate(memfd, 0x200000);
+ if (err < 0) {
+ SysMessage("Could not trim memfd: %d", errno);
+ err = -errno;
+ goto err_close_memfd;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(supported_io_bases); i++) {
+ base = supported_io_bases[i];
+
+ for (j = 0; j < 4; j++) {
+ void *base_ptr = (void *)(base + j * 0x200000);
+ map = mmap_huge(base_ptr, 0x200000, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_FIXED_NOREPLACE, memfd, 0);
+ if (map == MAP_FAILED)
+ break;
+ // some systems ignore MAP_FIXED_NOREPLACE
+ if (map != base_ptr) {
+ munmap(map, 0x200000);
+ break;
+ }
+ }
+
+ /* Impossible to map using this base */
+ if (j == 0)
+ continue;
+
+ /* All mirrors mapped - we got a match! */
+ if (j == 4)
+ break;
+
+ /* Only some mirrors mapped - clean the mess and try again */
+ for (; j > 0; j--)
+ munmap((void *)(base + (j - 1) * 0x200000), 0x200000);
+ }
+
+ if (i == ARRAY_SIZE(supported_io_bases)) {
+ err = -EINVAL;
+ goto err_close_memfd;
+ }
+
+ err = 0;
+ psxM = (s8 *)base;
+
+err_close_memfd:
+ close(memfd);
+ return err;
+}
+
+int lightrec_init_mmap(void)
+{
+ unsigned int i;
+ s8 *base, *target;
+ void *map;
+ int err = lightrec_mmap_ram(true);
+ if (err) {
+ err = lightrec_mmap_ram(false);
+ if (err) {
+ SysMessage("Unable to mmap RAM and mirrors");
+ return err;
+ }
+ }
+
+ base = psxM;
+
+ target = base + 0x1f000000;
+ map = mmap(target, 0x10000,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | /*MAP_FIXED_NOREPLACE |*/ MAP_ANONYMOUS, -1, 0);
+ if (map == MAP_FAILED) {
+ SysMessage("Unable to mmap parallel port: %d", errno);
+ err = -EINVAL;
+ goto err_unmap;
+ }
+ if (map != target)
+ SysMessage("lightrec: mapped parallel port at %p, wanted %p", map, target);
+
+ psxP = (s8 *)map;
+
+ target = base + 0x1fc00000;
+ map = mmap_huge(target, 0x200000,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | /*MAP_FIXED_NOREPLACE |*/ MAP_ANONYMOUS, -1, 0);
+ if (map == MAP_FAILED) {
+ SysMessage("Unable to mmap BIOS: %d", errno);
+ err = -EINVAL;
+ goto err_unmap_parallel;
+ }
+ if (map != target)
+ SysMessage("lightrec: mapped bios at %p, wanted %p", map, target);
+
+ psxR = (s8 *)map;
+
+ target = base + 0x1f800000;
+ map = mmap(target, 0x10000,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | /*MAP_FIXED_NOREPLACE |*/ MAP_ANONYMOUS, 0, 0);
+ if (map == MAP_FAILED) {
+ SysMessage("Unable to mmap scratchpad: %d", errno);
+ err = -EINVAL;
+ goto err_unmap_bios;
+ }
+ if (map != target)
+ SysMessage("lightrec: mapped scratchpad at %p, wanted %p", map, target);
+
+ psxH = (s8 *)map;
+
+ target = base + 0x800000;
+ map = mmap_huge(target, CODE_BUFFER_SIZE,
+ PROT_EXEC | PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | /*MAP_FIXED_NOREPLACE |*/ MAP_ANONYMOUS,
+ -1, 0);
+ if (map == MAP_FAILED) {
+ SysMessage("Unable to mmap code buffer: %d", errno);
+ err = -EINVAL;
+ goto err_unmap_scratch;
+ }
+ if (map != target)
+ SysMessage("lightrec: mapped code at %p, wanted %p", map, target);
+
+ code_buffer = map;
+
+ return 0;
+
+err_unmap_scratch:
+ munmap(psxH, 0x10000);
+err_unmap_bios:
+ munmap(psxR, 0x200000);
+err_unmap_parallel:
+ munmap(psxP, 0x10000);
+err_unmap:
+ for (i = 0; i < 4; i++)
+ munmap((void *)((uintptr_t)psxM + i * 0x200000), 0x200000);
+ return err;
+}
+
+void lightrec_free_mmap(void)
+{
+ unsigned int i;
+
+ munmap(code_buffer, CODE_BUFFER_SIZE);
+ munmap(psxH, 0x10000);
+ munmap(psxR, 0x200000);
+ munmap(psxP, 0x10000);
+ for (i = 0; i < 4; i++)
+ munmap((void *)((uintptr_t)psxM + i * 0x200000), 0x200000);
+}
#ifdef LIGHTREC
-#define CODE_BUFFER_SIZE (8 * 1024 * 1024)
+#ifdef HW_WUP /* WiiU */
+# define WUP_RWX_MEM_BASE 0x00802000
+# define WUP_RWX_MEM_END 0x01000000
+# define CODE_BUFFER_SIZE_DFT (WUP_RWX_MEM_END - WUP_RWX_MEM_BASE)
+#else
+# define CODE_BUFFER_SIZE_DFT (8 * 1024 * 1024)
+#endif
+
+#ifndef CODE_BUFFER_SIZE
+#define CODE_BUFFER_SIZE CODE_BUFFER_SIZE_DFT
+#endif
extern void *code_buffer;
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2022 Ash Logan <ash@heyquark.com>
+ */
+
+#include <coreinit/memorymap.h>
+#include <malloc.h>
+#include <stdbool.h>
+
+#include "../memmap.h"
+#include "../psxhw.h"
+#include "../psxmem.h"
+#include "../r3000a.h"
+
+#include "mem.h"
+
+void wiiu_clear_cache(void *start, void *end);
+
+static void* wiiu_mmap(uint32_t requested_va, size_t length, void* backing_mem) {
+ if (length < OS_PAGE_SIZE) length = OS_PAGE_SIZE;
+
+ uint32_t va = OSAllocVirtAddr(requested_va, length, 0);
+ if (!va) return MAP_FAILED;
+
+ BOOL mapped = OSMapMemory(va, OSEffectiveToPhysical((uint32_t)backing_mem),
+ length, OS_MAP_MEMORY_READ_WRITE);
+ if (!mapped) {
+ OSFreeVirtAddr(va, length);
+ return MAP_FAILED;
+ }
+
+ return (void*)va;
+}
+
+static void wiiu_unmap(void* va, size_t length) {
+ if (va == MAP_FAILED) return;
+ OSUnmapMemory((uint32_t)va, length);
+ OSFreeVirtAddr((uint32_t)va, length);
+}
+
+static void* psx_mem;
+static void* psx_parallel;
+static void* psx_scratch;
+static void* psx_bios;
+
+int lightrec_init_mmap(void) {
+ psx_mem = memalign(OS_PAGE_SIZE, 0x200000);
+ psx_parallel = memalign(OS_PAGE_SIZE, 0x10000);
+ psx_scratch = memalign(OS_PAGE_SIZE, 0x10000);
+ psx_bios = memalign(OS_PAGE_SIZE, 0x80000);
+ if (!psx_mem || !psx_parallel || !psx_scratch || !psx_bios)
+ goto cleanup_allocations;
+
+ uint32_t avail_va;
+ uint32_t avail_va_size;
+ OSGetMapVirtAddrRange(&avail_va, &avail_va_size);
+ if (!avail_va || avail_va_size < 0x20000000)
+ goto cleanup_allocations;
+
+ // Map 4x ram mirrors
+ int i;
+ for (i = 0; i < 4; i++) {
+ void* ret = wiiu_mmap(avail_va + 0x200000 * i, 0x200000, psx_mem);
+ if (ret == MAP_FAILED) break;
+ }
+ if (i != 4) {
+ for (int i = 0; i < 4; i++)
+ wiiu_unmap(avail_va + 0x200000 * i, 0x200000);
+ goto cleanup_allocations;
+ }
+ psxM = (void*)avail_va;
+
+ psxP = wiiu_mmap(avail_va + 0x1f000000, 0x10000, psx_parallel);
+ psxH = wiiu_mmap(avail_va + 0x1f800000, 0x10000, psx_scratch);
+ psxR = wiiu_mmap(avail_va + 0x1fc00000, 0x80000, psx_bios);
+
+ if (psxP == MAP_FAILED || psxH == MAP_FAILED || psxR == MAP_FAILED) {
+ for (int i = 0; i < 4; i++)
+ wiiu_unmap(psxM + 0x200000 * i, 0x200000);
+ wiiu_unmap(psxP, 0x10000);
+ wiiu_unmap(psxH, 0x10000);
+ wiiu_unmap(psxR, 0x80000);
+ goto cleanup_allocations;
+ }
+
+ code_buffer = WUP_RWX_MEM_BASE;
+
+ return 0;
+
+cleanup_allocations:
+ free(psx_mem);
+ free(psx_parallel);
+ free(psx_scratch);
+ free(psx_bios);
+ return -1;
+}
+
+void lightrec_free_mmap(void) {
+ for (int i = 0; i < 4; i++)
+ wiiu_unmap(psxM + 0x200000 * i, 0x200000);
+ wiiu_unmap(psxP, 0x10000);
+ wiiu_unmap(psxH, 0x10000);
+ wiiu_unmap(psxR, 0x80000);
+ free(psx_mem);
+ free(psx_parallel);
+ free(psx_scratch);
+ free(psx_bios);
+}
+
+void lightrec_code_inv(void *ptr, uint32_t len)
+{
+ wiiu_clear_cache(ptr, (void *)((uintptr_t)ptr + len));
+}
--- /dev/null
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <signal.h>
+#include <assert.h>
+
+#if P_HAVE_MMAP
+#include <sys/mman.h>
+#endif
+
+#include "lightrec.h"
+#include "../cdrom.h"
+#include "../gpu.h"
+#include "../gte.h"
+#include "../mdec.h"
+#include "../psxdma.h"
+#include "../psxhw.h"
+#include "../psxmem.h"
+#include "../r3000a.h"
+#include "../psxinterpreter.h"
+#include "../psxhle.h"
+#include "../psxevents.h"
+
+#include "../frontend/main.h"
+
+#include "mem.h"
+#include "plugin.h"
+
+#if (defined(__arm__) || defined(__aarch64__)) && !defined(ALLOW_LIGHTREC_ON_ARM)
+#error "Lightrec should not be used on ARM (please specify DYNAREC=ari64 to make)"
+#endif
+
+#define ARRAY_SIZE(x) (sizeof(x) ? sizeof(x) / sizeof((x)[0]) : 0)
+
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+# define LE32TOH(x) __builtin_bswap32(x)
+# define HTOLE32(x) __builtin_bswap32(x)
+# define LE16TOH(x) __builtin_bswap16(x)
+# define HTOLE16(x) __builtin_bswap16(x)
+#else
+# define LE32TOH(x) (x)
+# define HTOLE32(x) (x)
+# define LE16TOH(x) (x)
+# define HTOLE16(x) (x)
+#endif
+
+#ifdef __GNUC__
+# define likely(x) __builtin_expect(!!(x),1)
+# define unlikely(x) __builtin_expect(!!(x),0)
+#else
+# define likely(x) (x)
+# define unlikely(x) (x)
+#endif
+
+#ifndef LIGHTREC_PROG_NAME
+# ifdef __linux__
+# define LIGHTREC_PROG_NAME "/proc/self/exe"
+# else
+# define LIGHTREC_PROG_NAME "retroarch.exe"
+# endif
+#endif
+
+#ifndef GPUSTATUS_POLLING_THRESHOLD
+# define GPUSTATUS_POLLING_THRESHOLD 0
+#endif
+
+psxRegisters psxRegs;
+Rcnt rcnts[4];
+
+void* code_buffer;
+
+static struct lightrec_state *lightrec_state;
+
+static bool use_lightrec_interpreter;
+static bool block_stepping;
+//static bool use_pcsx_interpreter;
+#define use_pcsx_interpreter 0
+static bool ram_disabled;
+static bool lightrec_debug, lightrec_very_debug;
+static u32 lightrec_begin_cycles;
+
+extern u32 lightrec_hacks;
+
+static void lightrec_plugin_apply_config();
+extern void lightrec_code_inv(void *ptr, uint32_t len);
+
+enum my_cp2_opcodes {
+ OP_CP2_RTPS = 0x01,
+ OP_CP2_NCLIP = 0x06,
+ OP_CP2_OP = 0x0c,
+ OP_CP2_DPCS = 0x10,
+ OP_CP2_INTPL = 0x11,
+ OP_CP2_MVMVA = 0x12,
+ OP_CP2_NCDS = 0x13,
+ OP_CP2_CDP = 0x14,
+ OP_CP2_NCDT = 0x16,
+ OP_CP2_NCCS = 0x1b,
+ OP_CP2_CC = 0x1c,
+ OP_CP2_NCS = 0x1e,
+ OP_CP2_NCT = 0x20,
+ OP_CP2_SQR = 0x28,
+ OP_CP2_DCPL = 0x29,
+ OP_CP2_DPCT = 0x2a,
+ OP_CP2_AVSZ3 = 0x2d,
+ OP_CP2_AVSZ4 = 0x2e,
+ OP_CP2_RTPT = 0x30,
+ OP_CP2_GPF = 0x3d,
+ OP_CP2_GPL = 0x3e,
+ OP_CP2_NCCT = 0x3f,
+};
+
+static void (*cp2_ops[])(struct psxCP2Regs *) = {
+ [OP_CP2_RTPS] = gteRTPS,
+ [OP_CP2_NCLIP] = gteNCLIP,
+ [OP_CP2_OP] = gteOP,
+ [OP_CP2_DPCS] = gteDPCS,
+ [OP_CP2_INTPL] = gteINTPL,
+ [OP_CP2_MVMVA] = gteMVMVA,
+ [OP_CP2_NCDS] = gteNCDS,
+ [OP_CP2_CDP] = gteCDP,
+ [OP_CP2_NCDT] = gteNCDT,
+ [OP_CP2_NCCS] = gteNCCS,
+ [OP_CP2_CC] = gteCC,
+ [OP_CP2_NCS] = gteNCS,
+ [OP_CP2_NCT] = gteNCT,
+ [OP_CP2_SQR] = gteSQR,
+ [OP_CP2_DCPL] = gteDCPL,
+ [OP_CP2_DPCT] = gteDPCT,
+ [OP_CP2_AVSZ3] = gteAVSZ3,
+ [OP_CP2_AVSZ4] = gteAVSZ4,
+ [OP_CP2_RTPT] = gteRTPT,
+ [OP_CP2_GPF] = gteGPF,
+ [OP_CP2_GPL] = gteGPL,
+ [OP_CP2_NCCT] = gteNCCT,
+};
+
+static char cache_buf[64 * 1024];
+
+static void cop2_op(struct lightrec_state *state, u32 func)
+{
+ struct lightrec_registers *regs = lightrec_get_registers(state);
+
+ psxRegs.code = func;
+
+ if (unlikely(!cp2_ops[func & 0x3f])) {
+ fprintf(stderr, "Invalid CP2 function %u\n", func);
+ } else {
+ /* This works because regs->cp2c comes right after regs->cp2d,
+ * so it can be cast to a pcsxCP2Regs pointer. */
+ cp2_ops[func & 0x3f]((psxCP2Regs *) regs->cp2d);
+ }
+}
+
+static bool has_interrupt(void)
+{
+ struct lightrec_registers *regs = lightrec_get_registers(lightrec_state);
+
+ return ((psxHu32(0x1070) & psxHu32(0x1074)) &&
+ (regs->cp0[12] & 0x401) == 0x401) ||
+ (regs->cp0[12] & regs->cp0[13] & 0x0300);
+}
+
+static void lightrec_tansition_to_pcsx(struct lightrec_state *state)
+{
+ psxRegs.cycle += lightrec_current_cycle_count(state) / 1024;
+ lightrec_reset_cycle_count(state, 0);
+}
+
+static void lightrec_tansition_from_pcsx(struct lightrec_state *state)
+{
+ s32 cycles_left = psxRegs.next_interupt - psxRegs.cycle;
+
+ if (block_stepping || cycles_left <= 0 || has_interrupt())
+ lightrec_set_exit_flags(state, LIGHTREC_EXIT_CHECK_INTERRUPT);
+ else {
+ lightrec_set_target_cycle_count(state, cycles_left * 1024);
+ }
+}
+
+static void hw_write_byte(struct lightrec_state *state,
+ u32 op, void *host, u32 mem, u32 val)
+{
+ lightrec_tansition_to_pcsx(state);
+
+ psxHwWrite8(mem, val);
+
+ lightrec_tansition_from_pcsx(state);
+}
+
+static void hw_write_half(struct lightrec_state *state,
+ u32 op, void *host, u32 mem, u32 val)
+{
+ lightrec_tansition_to_pcsx(state);
+
+ psxHwWrite16(mem, val);
+
+ lightrec_tansition_from_pcsx(state);
+}
+
+static void hw_write_word(struct lightrec_state *state,
+ u32 op, void *host, u32 mem, u32 val)
+{
+ lightrec_tansition_to_pcsx(state);
+
+ psxHwWrite32(mem, val);
+
+ lightrec_tansition_from_pcsx(state);
+}
+
+static u8 hw_read_byte(struct lightrec_state *state, u32 op, void *host, u32 mem)
+{
+ u8 val;
+
+ lightrec_tansition_to_pcsx(state);
+
+ val = psxHwRead8(mem);
+
+ lightrec_tansition_from_pcsx(state);
+
+ return val;
+}
+
+static u16 hw_read_half(struct lightrec_state *state,
+ u32 op, void *host, u32 mem)
+{
+ u16 val;
+
+ lightrec_tansition_to_pcsx(state);
+
+ val = psxHwRead16(mem);
+
+ lightrec_tansition_from_pcsx(state);
+
+ return val;
+}
+
+static u32 hw_read_word(struct lightrec_state *state,
+ u32 op, void *host, u32 mem)
+{
+ static u32 old_cycle, oldold_cycle, old_gpusr;
+ u32 val, diff;
+
+ lightrec_tansition_to_pcsx(state);
+
+ val = psxHwRead32(mem);
+
+ if (GPUSTATUS_POLLING_THRESHOLD > 0 && mem == 0x1f801814) {
+ diff = psxRegs.cycle - old_cycle;
+
+ if (diff > 0
+ && diff < GPUSTATUS_POLLING_THRESHOLD
+ && diff == old_cycle - oldold_cycle) {
+ while (psxRegs.next_interupt > psxRegs.cycle && val == old_gpusr) {
+ psxRegs.cycle += diff;
+ val = psxHwRead32(mem);
+ }
+ }
+
+ oldold_cycle = old_cycle;
+ old_cycle = psxRegs.cycle;
+ old_gpusr = val;
+ }
+
+ lightrec_tansition_from_pcsx(state);
+
+ return val;
+}
+
+static struct lightrec_mem_map_ops hw_regs_ops = {
+ .sb = hw_write_byte,
+ .sh = hw_write_half,
+ .sw = hw_write_word,
+ .lb = hw_read_byte,
+ .lh = hw_read_half,
+ .lw = hw_read_word,
+};
+
+static u32 cache_ctrl;
+
+static void cache_ctrl_write_word(struct lightrec_state *state,
+ u32 op, void *host, u32 mem, u32 val)
+{
+ cache_ctrl = val;
+}
+
+static u32 cache_ctrl_read_word(struct lightrec_state *state,
+ u32 op, void *host, u32 mem)
+{
+ return cache_ctrl;
+}
+
+static struct lightrec_mem_map_ops cache_ctrl_ops = {
+ .sw = cache_ctrl_write_word,
+ .lw = cache_ctrl_read_word,
+};
+
+static struct lightrec_mem_map lightrec_map[] = {
+ [PSX_MAP_KERNEL_USER_RAM] = {
+ /* Kernel and user memory */
+ .pc = 0x00000000,
+ .length = 0x200000,
+ },
+ [PSX_MAP_BIOS] = {
+ /* BIOS */
+ .pc = 0x1fc00000,
+ .length = 0x80000,
+ },
+ [PSX_MAP_SCRATCH_PAD] = {
+ /* Scratch pad */
+ .pc = 0x1f800000,
+ .length = 0x400,
+ },
+ [PSX_MAP_PARALLEL_PORT] = {
+ /* Parallel port */
+ .pc = 0x1f000000,
+ .length = 0x10000,
+ },
+ [PSX_MAP_HW_REGISTERS] = {
+ /* Hardware registers */
+ .pc = 0x1f801000,
+ .length = 0x8000,
+ .ops = &hw_regs_ops,
+ },
+ [PSX_MAP_CACHE_CONTROL] = {
+ /* Cache control */
+ .pc = 0x5ffe0130,
+ .length = 4,
+ .ops = &cache_ctrl_ops,
+ },
+
+ /* Mirrors of the kernel/user memory */
+ [PSX_MAP_MIRROR1] = {
+ .pc = 0x00200000,
+ .length = 0x200000,
+ .mirror_of = &lightrec_map[PSX_MAP_KERNEL_USER_RAM],
+ },
+ [PSX_MAP_MIRROR2] = {
+ .pc = 0x00400000,
+ .length = 0x200000,
+ .mirror_of = &lightrec_map[PSX_MAP_KERNEL_USER_RAM],
+ },
+ [PSX_MAP_MIRROR3] = {
+ .pc = 0x00600000,
+ .length = 0x200000,
+ .mirror_of = &lightrec_map[PSX_MAP_KERNEL_USER_RAM],
+ },
+
+ /* Mirror of the parallel port. Only used by the PS2/PS3 BIOS */
+ [PSX_MAP_PPORT_MIRROR] = {
+ .pc = 0x1fa00000,
+ .length = 0x10000,
+ .mirror_of = &lightrec_map[PSX_MAP_PARALLEL_PORT],
+ },
+
+ /* Code buffer */
+ [PSX_MAP_CODE_BUFFER] = {
+ .length = CODE_BUFFER_SIZE,
+ },
+};
+
+static void lightrec_enable_ram(struct lightrec_state *state, bool enable)
+{
+ if (enable)
+ memcpy(psxM, cache_buf, sizeof(cache_buf));
+ else
+ memcpy(cache_buf, psxM, sizeof(cache_buf));
+
+ ram_disabled = !enable;
+}
+
+static bool lightrec_can_hw_direct(u32 kaddr, bool is_write, u8 size)
+{
+ if (is_write && size != 32) {
+ // force32 so must go through handlers
+ if (0x1f801000 <= kaddr && kaddr < 0x1f801024)
+ return false;
+ if ((kaddr & 0x1fffff80) == 0x1f801080) // dma
+ return false;
+ }
+
+ switch (size) {
+ case 8:
+ switch (kaddr) {
+ case 0x1f801040:
+ case 0x1f801050:
+ case 0x1f801800:
+ case 0x1f801801:
+ case 0x1f801802:
+ case 0x1f801803:
+ return false;
+ default:
+ return true;
+ }
+ case 16:
+ switch (kaddr) {
+ case 0x1f801040:
+ case 0x1f801044:
+ case 0x1f801048:
+ case 0x1f80104a:
+ case 0x1f80104e:
+ case 0x1f801050:
+ case 0x1f801054:
+ case 0x1f80105a:
+ case 0x1f80105e:
+ case 0x1f801100:
+ case 0x1f801104:
+ case 0x1f801108:
+ case 0x1f801110:
+ case 0x1f801114:
+ case 0x1f801118:
+ case 0x1f801120:
+ case 0x1f801124:
+ case 0x1f801128:
+ return false;
+ case 0x1f801070:
+ case 0x1f801074:
+ return !is_write;
+ default:
+ return kaddr < 0x1f801c00 || kaddr >= 0x1f801e00;
+ }
+ default:
+ switch (kaddr) {
+ case 0x1f801040:
+ case 0x1f801050:
+ case 0x1f801100:
+ case 0x1f801104:
+ case 0x1f801108:
+ case 0x1f801110:
+ case 0x1f801114:
+ case 0x1f801118:
+ case 0x1f801120:
+ case 0x1f801124:
+ case 0x1f801128:
+ case 0x1f801810:
+ case 0x1f801814:
+ case 0x1f801820:
+ case 0x1f801824:
+ return false;
+ case 0x1f801070:
+ case 0x1f801074:
+ case 0x1f801088:
+ case 0x1f801098:
+ case 0x1f8010a8:
+ case 0x1f8010b8:
+ case 0x1f8010c8:
+ case 0x1f8010e8:
+ case 0x1f8010f4:
+ return !is_write;
+ default:
+ return !is_write || kaddr < 0x1f801c00 || kaddr >= 0x1f801e00;
+ }
+ }
+}
+
+static const struct lightrec_ops lightrec_ops = {
+ .cop2_op = cop2_op,
+ .enable_ram = lightrec_enable_ram,
+ .hw_direct = lightrec_can_hw_direct,
+ .code_inv = LIGHTREC_CODE_INV ? lightrec_code_inv : NULL,
+};
+
+static int lightrec_plugin_init(void)
+{
+ lightrec_map[PSX_MAP_KERNEL_USER_RAM].address = psxM;
+ lightrec_map[PSX_MAP_BIOS].address = psxR;
+ lightrec_map[PSX_MAP_SCRATCH_PAD].address = psxH;
+ lightrec_map[PSX_MAP_HW_REGISTERS].address = psxH + 0x1000;
+ lightrec_map[PSX_MAP_PARALLEL_PORT].address = psxP;
+
+ if (!LIGHTREC_CUSTOM_MAP) {
+#if P_HAVE_MMAP
+ code_buffer = mmap(0, CODE_BUFFER_SIZE,
+ PROT_EXEC | PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (code_buffer == MAP_FAILED)
+ return -ENOMEM;
+#else
+ code_buffer = malloc(CODE_BUFFER_SIZE);
+ if (!code_buffer)
+ return -ENOMEM;
+#endif
+ }
+
+ if (LIGHTREC_CUSTOM_MAP) {
+ lightrec_map[PSX_MAP_MIRROR1].address = psxM + 0x200000;
+ lightrec_map[PSX_MAP_MIRROR2].address = psxM + 0x400000;
+ lightrec_map[PSX_MAP_MIRROR3].address = psxM + 0x600000;
+ }
+
+ lightrec_map[PSX_MAP_CODE_BUFFER].address = code_buffer;
+
+ use_lightrec_interpreter = !!getenv("LIGHTREC_INTERPRETER");
+
+#ifdef LIGHTREC_DEBUG
+ char *cycles = getenv("LIGHTREC_BEGIN_CYCLES");
+
+ lightrec_very_debug = !!getenv("LIGHTREC_VERY_DEBUG");
+ lightrec_debug = lightrec_very_debug || !!getenv("LIGHTREC_DEBUG");
+
+ if (cycles)
+ lightrec_begin_cycles = (unsigned int) strtol(cycles, NULL, 0);
+#endif
+
+ lightrec_state = lightrec_init(LIGHTREC_PROG_NAME,
+ lightrec_map, ARRAY_SIZE(lightrec_map),
+ &lightrec_ops);
+
+ // fprintf(stderr, "M=0x%lx, P=0x%lx, R=0x%lx, H=0x%lx\n",
+ // (uintptr_t) psxM,
+ // (uintptr_t) psxP,
+ // (uintptr_t) psxR,
+ // (uintptr_t) psxH);
+
+#ifndef _WIN32
+ signal(SIGPIPE, exit);
+#endif
+ lightrec_plugin_apply_config();
+ return 0;
+}
+
+static u32 do_calculate_hash(const void *buffer, u32 count, u32 needle, bool le)
+{
+ unsigned int i;
+ const u32 *data = (const u32 *) buffer;
+ u32 hash = needle;
+
+ count /= 4;
+ for(i = 0; i < count; ++i) {
+ hash += le ? LE32TOH(data[i]) : data[i];
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ }
+
+ hash += (hash << 3);
+ hash ^= (hash >> 11);
+ hash += (hash << 15);
+
+ return hash;
+}
+
+static u32 hash_calculate_le(const void *buffer, u32 count)
+{
+ return do_calculate_hash(buffer, count, 0xffffffff, true);
+}
+
+u32 hash_calculate(const void *buffer, u32 count)
+{
+ return do_calculate_hash(buffer, count, 0xffffffff, false);
+}
+
+static u32 hash_calculate_ram(const void *buffer, u32 ram_size)
+{
+ u32 hash;
+
+ if (ram_disabled)
+ hash = hash_calculate_le(cache_buf, sizeof(cache_buf));
+ else
+ hash = hash_calculate_le(buffer, sizeof(cache_buf));
+
+ return do_calculate_hash(buffer + sizeof(cache_buf),
+ ram_size - sizeof(cache_buf),
+ hash, true);
+}
+
+static const char * const mips_regs[] = {
+ "zero",
+ "at",
+ "v0", "v1",
+ "a0", "a1", "a2", "a3",
+ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7",
+ "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
+ "t8", "t9",
+ "k0", "k1",
+ "gp", "sp", "fp", "ra",
+ "lo", "hi",
+};
+
+static void print_for_big_ass_debugger(void)
+{
+ struct lightrec_registers *regs;
+ unsigned int i;
+
+ regs = lightrec_get_registers(lightrec_state);
+
+ printf("CYCLE 0x%08x PC 0x%08x", psxRegs.cycle, psxRegs.pc);
+
+ if (lightrec_very_debug)
+ printf(" RAM 0x%08x SCRATCH 0x%08x HW 0x%08x",
+ hash_calculate_ram(psxM, 0x200000),
+ hash_calculate_le(psxH, 0x400),
+ hash_calculate_le(psxH + 0x1000, 0x2000));
+
+ printf(" CP0 0x%08x CP2D 0x%08x CP2C 0x%08x INT 0x%04x INTCYCLE 0x%08x GPU 0x%08x",
+ hash_calculate(regs->cp0, sizeof(regs->cp0)),
+ hash_calculate(regs->cp2d, sizeof(regs->cp2d)),
+ hash_calculate(regs->cp2c, sizeof(regs->cp2c)),
+ psxRegs.interrupt,
+ hash_calculate(psxRegs.intCycle, sizeof(psxRegs.intCycle)),
+ LE32TOH(HW_GPU_STATUS));
+
+ if (lightrec_very_debug) {
+ for (i = 0; i < 32; i++)
+ printf(" CP2D%u 0x%08x", i, regs->cp2d[i]);
+ for (i = 0; i < 32; i++)
+ printf(" CP2C%u 0x%08x", i, regs->cp2c[i]);
+ }
+
+ if (lightrec_very_debug)
+ for (i = 0; i < 34; i++)
+ printf(" %s 0x%08x", mips_regs[i], regs->gpr[i]);
+ else
+ printf(" GPR 0x%08x",
+ hash_calculate(regs->gpr, sizeof(regs->gpr)));
+ printf("\n");
+
+ fflush(stdout);
+}
+
+static void lightrec_plugin_sync_regs_to_pcsx(bool need_cp2);
+static void lightrec_plugin_sync_regs_from_pcsx(bool need_cp2);
+
+static void lightrec_plugin_execute_internal(bool block_only)
+{
+ struct lightrec_registers *regs;
+ u32 flags, cycles_pcsx;
+ u32 old_pc = psxRegs.pc;
+
+ regs = lightrec_get_registers(lightrec_state);
+ gen_interupt((psxCP0Regs *)regs->cp0);
+ if (!block_only && psxRegs.stop)
+ return;
+
+ cycles_pcsx = psxRegs.next_interupt - psxRegs.cycle;
+ assert((s32)cycles_pcsx > 0);
+
+ // step during early boot so that 0x80030000 fastboot hack works
+ block_stepping = block_only;
+ if (block_only)
+ cycles_pcsx = 0;
+
+ if (use_pcsx_interpreter) {
+ psxInt.ExecuteBlock(&psxRegs, 0);
+ } else {
+ u32 cycles_lightrec = cycles_pcsx * 1024;
+ if (unlikely(use_lightrec_interpreter)) {
+ psxRegs.pc = lightrec_run_interpreter(lightrec_state,
+ psxRegs.pc,
+ cycles_lightrec);
+ } else {
+ psxRegs.pc = lightrec_execute(lightrec_state,
+ psxRegs.pc, cycles_lightrec);
+ }
+
+ lightrec_tansition_to_pcsx(lightrec_state);
+
+ flags = lightrec_exit_flags(lightrec_state);
+
+ if (flags & LIGHTREC_EXIT_SEGFAULT) {
+ fprintf(stderr, "Exiting at cycle 0x%08x\n",
+ psxRegs.cycle);
+ if (lightrec_debug)
+ print_for_big_ass_debugger();
+ exit(1);
+ }
+
+ if (flags & LIGHTREC_EXIT_SYSCALL)
+ psxException(R3000E_Syscall << 2, 0, (psxCP0Regs *)regs->cp0);
+ if (flags & LIGHTREC_EXIT_BREAK)
+ psxException(R3000E_Bp << 2, 0, (psxCP0Regs *)regs->cp0);
+ else if (flags & LIGHTREC_EXIT_UNKNOWN_OP) {
+ u32 op = intFakeFetch(psxRegs.pc);
+ u32 hlec = op & 0x03ffffff;
+ if ((op >> 26) == 0x3b && hlec < ARRAY_SIZE(psxHLEt) && Config.HLE) {
+ lightrec_plugin_sync_regs_to_pcsx(0);
+ psxHLEt[hlec]();
+ lightrec_plugin_sync_regs_from_pcsx(0);
+ }
+ else
+ psxException(R3000E_RI << 2, 0, (psxCP0Regs *)regs->cp0);
+ }
+ }
+
+ if (lightrec_debug && psxRegs.cycle >= lightrec_begin_cycles && psxRegs.pc != old_pc) {
+ print_for_big_ass_debugger();
+ }
+
+ if ((regs->cp0[13] & regs->cp0[12] & 0x300) && (regs->cp0[12] & 0x1)) {
+ /* Handle software interrupts */
+ regs->cp0[13] &= ~0x7c;
+ psxException(regs->cp0[13], 0, (psxCP0Regs *)regs->cp0);
+ }
+}
+
+static void lightrec_plugin_execute(psxRegisters *regs)
+{
+ while (!regs->stop)
+ lightrec_plugin_execute_internal(lightrec_very_debug);
+}
+
+static void lightrec_plugin_execute_block(psxRegisters *regs,
+ enum blockExecCaller caller)
+{
+ lightrec_plugin_execute_internal(true);
+}
+
+static void lightrec_plugin_clear(u32 addr, u32 size)
+{
+ if ((addr == 0 && size == UINT32_MAX)
+ || (lightrec_hacks & LIGHTREC_OPT_INV_DMA_ONLY))
+ lightrec_invalidate_all(lightrec_state);
+ else
+ /* size * 4: PCSX uses DMA units */
+ lightrec_invalidate(lightrec_state, addr, size * 4);
+}
+
+static void lightrec_plugin_notify(enum R3000Anote note, void *data)
+{
+ switch (note)
+ {
+ case R3000ACPU_NOTIFY_CACHE_ISOLATED:
+ case R3000ACPU_NOTIFY_CACHE_UNISOLATED:
+ /* not used, lightrec calls lightrec_enable_ram() instead */
+ break;
+ case R3000ACPU_NOTIFY_BEFORE_SAVE:
+ /* non-null 'data' means this is HLE related sync */
+ lightrec_plugin_sync_regs_to_pcsx(data == NULL);
+ break;
+ case R3000ACPU_NOTIFY_AFTER_LOAD:
+ lightrec_plugin_sync_regs_from_pcsx(data == NULL);
+ if (data == NULL)
+ lightrec_invalidate_all(lightrec_state);
+ break;
+ }
+}
+
+static void lightrec_plugin_apply_config()
+{
+ static u32 cycles_per_op_old;
+ u32 cycle_mult = Config.cycle_multiplier_override && Config.cycle_multiplier == CYCLE_MULT_DEFAULT
+ ? Config.cycle_multiplier_override : Config.cycle_multiplier;
+ u32 cycles_per_op = cycle_mult * 1024 / 100;
+ assert(cycles_per_op);
+
+ if (cycles_per_op_old && cycles_per_op_old != cycles_per_op) {
+ SysPrintf("lightrec: reinit block cache for cycles_per_op %.2f\n",
+ cycles_per_op / 1024.f);
+ }
+ cycles_per_op_old = cycles_per_op;
+ lightrec_set_cycles_per_opcode(lightrec_state, cycles_per_op);
+
+ lightrec_set_unsafe_opt_flags(lightrec_state, lightrec_hacks);
+ intApplyConfig();
+}
+
+static void lightrec_plugin_shutdown(void)
+{
+ lightrec_destroy(lightrec_state);
+
+ if (!LIGHTREC_CUSTOM_MAP) {
+#if P_HAVE_MMAP
+ munmap(code_buffer, CODE_BUFFER_SIZE);
+#else
+ free(code_buffer);
+#endif
+ }
+}
+
+static void lightrec_plugin_reset(void)
+{
+ struct lightrec_registers *regs;
+
+ regs = lightrec_get_registers(lightrec_state);
+
+ /* Invalidate all blocks */
+ lightrec_invalidate_all(lightrec_state);
+
+ /* Reset registers */
+ memset(regs, 0, sizeof(*regs));
+
+ regs->cp0[12] = 0x10900000; // COP0 enabled | BEV = 1 | TS = 1
+ regs->cp0[15] = 0x00000002; // PRevID = Revision ID, same as R3000A
+}
+
+static void lightrec_plugin_sync_regs_from_pcsx(bool need_cp2)
+{
+ struct lightrec_registers *regs;
+
+ regs = lightrec_get_registers(lightrec_state);
+ memcpy(regs->gpr, &psxRegs.GPR, sizeof(regs->gpr));
+ memcpy(regs->cp0, &psxRegs.CP0, sizeof(regs->cp0));
+ if (need_cp2)
+ memcpy(regs->cp2d, &psxRegs.CP2, sizeof(regs->cp2d) + sizeof(regs->cp2c));
+}
+
+static void lightrec_plugin_sync_regs_to_pcsx(bool need_cp2)
+{
+ struct lightrec_registers *regs;
+
+ regs = lightrec_get_registers(lightrec_state);
+ memcpy(&psxRegs.GPR, regs->gpr, sizeof(regs->gpr));
+ memcpy(&psxRegs.CP0, regs->cp0, sizeof(regs->cp0));
+ if (need_cp2)
+ memcpy(&psxRegs.CP2, regs->cp2d, sizeof(regs->cp2d) + sizeof(regs->cp2c));
+}
+
+R3000Acpu psxRec =
+{
+ lightrec_plugin_init,
+ lightrec_plugin_reset,
+ lightrec_plugin_execute,
+ lightrec_plugin_execute_block,
+ lightrec_plugin_clear,
+ lightrec_plugin_notify,
+ lightrec_plugin_apply_config,
+ lightrec_plugin_shutdown,
+};
--- /dev/null
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/*
+ * Copyright (C) 2022 Paul Cercueil <paul@crapouillou.net>
+ */
+
+#ifndef __LIGHTREC_PLUGIN_H__
+#define __LIGHTREC_PLUGIN_H__
+
+#ifdef LIGHTREC
+
+#define drc_is_lightrec() 1
+
+#else /* if !LIGHTREC */
+
+#define drc_is_lightrec() 0
+
+#endif
+
+#endif /* __LIGHTREC_PLUGIN_H__ */
+
--- /dev/null
+#include <errno.h>
+#include <unistd.h>
+
+/* Implement the sysconf() symbol which is needed by GNU Lightning */
+long sysconf(int name)
+{
+ switch (name) {
+ case _SC_PAGE_SIZE:
+ return 4096;
+ default:
+ return -EINVAL;
+ }
+}
*/
/*
- * >= 10 for Galerians
+ * >= 14 for Sol Divide
* <= 18 for "Disney's Treasure Planet"
+ * Psychic Detective may break on *any* change
*/
-#define MDEC_BIAS 10
+#define MDEC_BIAS 14
+#define MDEC_DELAY 1024
#define DSIZE 8
#define DSIZE2 (DSIZE * DSIZE)
#define SCALE8(c) SCALER(c, 20)
#define SCALE5(c) SCALER(c, 23)
-#define CLAMP5(c) ( ((c) < -16) ? 0 : (((c) > (31 - 16)) ? 31 : ((c) + 16)) )
-#define CLAMP8(c) ( ((c) < -128) ? 0 : (((c) > (255 - 128)) ? 255 : ((c) + 128)) )
+static inline int clamp5(int v)
+{
+ v += 16;
+ v = v < 0 ? 0 : (v > 31 ? 31 : v);
+ return v;
+}
+
+static inline int clamp8(int v)
+{
+ v += 128;
+ v = v < 0 ? 0 : (v > 255 ? 255 : v);
+ return v;
+}
-#define CLAMP_SCALE8(a) (CLAMP8(SCALE8(a)))
-#define CLAMP_SCALE5(a) (CLAMP5(SCALE5(a)))
+#define CLAMP_SCALE8(a) (clamp8(SCALE8(a)))
+#define CLAMP_SCALE5(a) (clamp5(SCALE5(a)))
static inline void putlinebw15(u16 *image, int *Yblk) {
int i;
for (i = 0; i < 8; i++, Yblk++) {
int Y = *Yblk;
// missing rounding
- image[i] = SWAP16((CLAMP5(Y >> 3) * 0x421) | A);
+ image[i] = SWAP16((clamp5(Y >> 3) * 0x421) | A);
}
}
int i;
unsigned char Y;
for (i = 0; i < 8 * 3; i += 3, Yblk++) {
- Y = CLAMP8(*Yblk);
+ Y = clamp8(*Yblk);
image[i + 0] = Y;
image[i + 1] = Y;
image[i + 2] = Y;
log_unhandled("mdec: bork\n");
/* define the power of mdec */
- set_event(PSXINT_MDECOUTDMA, words * MDEC_BIAS);
+ set_event(PSXINT_MDECOUTDMA, words * MDEC_BIAS + MDEC_DELAY);
/* some CPU stalling */
- psxRegs.cycle += words;
+ psxRegs.cycle += words * MDEC_BIAS / 4;
}
void mdec1Interrupt() {
+++ /dev/null
-/* Copyright (C) 2010-2020 The RetroArch team
- *
- * ---------------------------------------------------------------------------------------
- * The following license statement only applies to this file (memmap_win32.c).
- * ---------------------------------------------------------------------------------------
- *
- * Permission is hereby granted, free of charge,
- * to any person obtaining a copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation the rights to
- * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
- * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <windows.h>
-#include <errno.h>
-#include <io.h>
-
-#include "memmap.h"
-
-#ifndef FILE_MAP_EXECUTE
-#define FILE_MAP_EXECUTE 0x0020
-#endif /* FILE_MAP_EXECUTE */
-
-static int __map_mman_error(const DWORD err, const int deferr)
-{
- if (err == 0)
- return 0;
- /* TODO: implement */
- return err;
-}
-
-static DWORD __map_mmap_prot_page(const int prot)
-{
- DWORD protect = 0;
-
- if (prot == PROT_NONE)
- return 0;
-
- if ((prot & PROT_EXEC) != 0)
- protect = ((prot & PROT_WRITE) != 0) ?
- PAGE_EXECUTE_READWRITE : PAGE_EXECUTE_READ;
- else
- protect = ((prot & PROT_WRITE) != 0) ?
- PAGE_READWRITE : PAGE_READONLY;
-
- return protect;
-}
-
-static DWORD __map_mmap_prot_file(const int prot)
-{
- DWORD desiredAccess = 0;
-
- if (prot == PROT_NONE)
- return 0;
-
- if ((prot & PROT_READ) != 0)
- desiredAccess |= FILE_MAP_READ;
- if ((prot & PROT_WRITE) != 0)
- desiredAccess |= FILE_MAP_WRITE;
- if ((prot & PROT_EXEC) != 0)
- desiredAccess |= FILE_MAP_EXECUTE;
-
- return desiredAccess;
-}
-
-void* mmap(void *addr, size_t len, int prot, int flags, int fildes, off_t off)
-{
- HANDLE fm, h;
-
- void * map = MAP_FAILED;
-
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable: 4293)
-#endif
-
- const DWORD dwFileOffsetLow = (sizeof(off_t) <= sizeof(DWORD)) ?
- (DWORD)off : (DWORD)(off & 0xFFFFFFFFL);
- const DWORD dwFileOffsetHigh = (sizeof(off_t) <= sizeof(DWORD)) ?
- (DWORD)0 : (DWORD)((off >> 32) & 0xFFFFFFFFL);
- const DWORD protect = __map_mmap_prot_page(prot);
- const DWORD desiredAccess = __map_mmap_prot_file(prot);
-
- const off_t maxSize = off + (off_t)len;
-
- const DWORD dwMaxSizeLow = (sizeof(off_t) <= sizeof(DWORD)) ?
- (DWORD)maxSize : (DWORD)(maxSize & 0xFFFFFFFFL);
- const DWORD dwMaxSizeHigh = (sizeof(off_t) <= sizeof(DWORD)) ?
- (DWORD)0 : (DWORD)((maxSize >> 32) & 0xFFFFFFFFL);
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-
- errno = 0;
-
- if (len == 0
- /* Unsupported flag combinations */
- || (flags & MAP_FIXED) != 0
- /* Usupported protection combinations */
- || prot == PROT_EXEC)
- {
- errno = EINVAL;
- return MAP_FAILED;
- }
-
- h = ((flags & MAP_ANONYMOUS) == 0) ?
- (HANDLE)_get_osfhandle(fildes) : INVALID_HANDLE_VALUE;
-
- if ((flags & MAP_ANONYMOUS) == 0 && h == INVALID_HANDLE_VALUE)
- {
- errno = EBADF;
- return MAP_FAILED;
- }
-
- fm = CreateFileMapping(h, NULL, protect, dwMaxSizeHigh, dwMaxSizeLow, NULL);
-
- if (!fm)
- goto error;
-
- map = MapViewOfFile(fm, desiredAccess, dwFileOffsetHigh, dwFileOffsetLow, len);
-
- CloseHandle(fm);
-
- if (!map)
- goto error;
-
- return map;
-error:
- errno = __map_mman_error(GetLastError(), EPERM);
- return MAP_FAILED;
-}
-
-int munmap(void *addr, size_t len)
-{
- if (UnmapViewOfFile(addr))
- return 0;
-
- errno = __map_mman_error(GetLastError(), EPERM);
-
- return -1;
-}
-
-int mprotect(void *addr, size_t len, int prot)
-{
- DWORD newProtect = __map_mmap_prot_page(prot);
- DWORD oldProtect = 0;
-
- if (VirtualProtect(addr, len, newProtect, &oldProtect))
- return 0;
-
- errno = __map_mman_error(GetLastError(), EPERM);
-
- return -1;
-}
-
-int msync(void *addr, size_t len, int flags)
-{
- if (FlushViewOfFile(addr, len))
- return 0;
-
- errno = __map_mman_error(GetLastError(), EPERM);
-
- return -1;
-}
-
-int mlock(const void *addr, size_t len)
-{
- if (VirtualLock((LPVOID)addr, len))
- return 0;
-
- errno = __map_mman_error(GetLastError(), EPERM);
-
- return -1;
-}
-
-int munlock(const void *addr, size_t len)
-{
- if (VirtualUnlock((LPVOID)addr, len))
- return 0;
-
- errno = __map_mman_error(GetLastError(), EPERM);
-
- return -1;
-}
-
#include <assert.h>
#include "misc.h"
#include "cdrom.h"
+#include "cdrom-async.h"
#include "mdec.h"
#include "gpu.h"
#include "ppf.h"
#include "psxbios.h"
#include "database.h"
#include <zlib.h>
+#include "revision.h"
char CdromId[10] = "";
char CdromLabel[33] = "";
int CdromFrontendId; // for frontend use
+static u32 save_counter;
+
// PSX Executable types
#define PSX_EXE 1
#define CPE_EXE 2
char name [1];
};
-static void mmssdd( char *b, char *p )
+static void mmssdd(const char *b, char *p)
{
- int m, s, d;
- unsigned char *ub = (void *)b;
+ const unsigned char *ub = (void *)b;
int block = (ub[3] << 24) | (ub[2] << 16) | (ub[1] << 8) | ub[0];
+ int m, s, d;
block += 150;
m = block / 4500; // minutes
s = block / 75; // seconds
d = block - s * 75; // seconds rest
- m = ((m / 10) << 4) | m % 10;
- s = ((s / 10) << 4) | s % 10;
- d = ((d / 10) << 4) | d % 10;
-
p[0] = m;
p[1] = s;
p[2] = d;
}
#define incTime() \
- time[0] = btoi(time[0]); time[1] = btoi(time[1]); time[2] = btoi(time[2]); \
time[2]++; \
if(time[2] == 75) { \
time[2] = 0; \
time[0]++; \
} \
} \
- time[0] = itob(time[0]); time[1] = itob(time[1]); time[2] = itob(time[2]);
#define READTRACK() \
- if (!CDR_readTrack(time)) return -1; \
- buf = (void *)CDR_getBuffer(); \
+ if (cdra_readTrack(time)) return -1; \
+ buf = cdra_getBuffer(); \
if (buf == NULL) return -1; \
else CheckPPFCache((u8 *)buf, time[0], time[1], time[2]);
u32 sp = 0;
int i, ret;
+ save_counter = 0;
+
if (!Config.HLE) {
if (psxRegs.pc != 0x80030000) // BiosBootBypass'ed or custom BIOS?
return 0;
return 0;
}
- time[0] = itob(0); time[1] = itob(2); time[2] = itob(0x10);
+ time[0] = 0; time[1] = 2; time[2] = 0x10;
READTRACK();
p1++;
snprintf(exename, sizeof(exename), "%s", p1);
- time[0] = itob(0); time[1] = itob(2); time[2] = itob(0x10);
+ time[0] = 0; time[1] = 2; time[2] = 0x10;
READTRACK();
size -= 2048;
addr += 2048;
}
- if (time_bcd_out)
- memcpy(time_bcd_out, time, 3);
+ if (time_bcd_out) {
+ time_bcd_out[0] = itob(time[0]);
+ time_bcd_out[1] = itob(time[1]);
+ time_bcd_out[2] = itob(time[2]);
+ }
return 0;
}
int CheckCdrom() {
struct iso_directory_record *dir;
struct CdrStat stat = { 0, 0, };
- unsigned char time[4];
+ unsigned char time[4] = { 0, 2, 4 };
char *buf;
unsigned char mdir[4096];
char exename[256];
+ int lic_region_detected = -1;
int i, len, c;
FreePPFCache();
memset(CdromId, 0, sizeof(CdromId));
memset(exename, 0, sizeof(exename));
- time[0] = itob(0);
- time[1] = itob(2);
- time[2] = itob(0x10);
-
if (!Config.HLE && Config.SlowBoot) {
- // boot to BIOS in case of CDDA ir lid open
- CDR_getStatus(&stat);
- if ((stat.Status & 0x10) || stat.Type == 2 || !CDR_readTrack(time))
+ // boot to BIOS in case of CDDA or lid is open
+ cdra_getStatus(&stat);
+ if ((stat.Status & 0x10) || stat.Type == 2 || cdra_readTrack(time))
return 0;
}
+ if (Config.PsxAuto) {
+ time[0] = 0;
+ time[1] = 2;
+ time[2] = 4;
+ READTRACK();
+ if (strcmp((char *)buf + 12 + 46, "Entertainment Euro pe ") == 0)
+ lic_region_detected = PSX_TYPE_PAL;
+ // else it'll default to NTSC anyway
+ }
+
+ time[0] = 0;
+ time[1] = 2;
+ time[2] = 0x10;
READTRACK();
strncpy(CdromLabel, buf + 52, 32);
for (i = 0; i < len; ++i) {
if (exename[i] == ';' || c >= sizeof(CdromId) - 1)
break;
- if (isalnum(exename[i]))
+ if (isalnum((int)exename[i]))
CdromId[c++] = exename[i];
}
}
strcpy(CdromId, "SLUS99999");
if (Config.PsxAuto) { // autodetect system (pal or ntsc)
- if (
+ if (lic_region_detected >= 0)
+ Config.PsxType = lic_region_detected;
+ else if (
/* Make sure Wild Arms SCUS-94608 is not detected as a PAL game. */
((CdromId[0] == 's' || CdromId[0] == 'S') && (CdromId[2] == 'e' || CdromId[2] == 'E')) ||
!strncmp(CdromId, "DTLS3035", 8) ||
Apply_Hacks_Cdrom();
- BuildPPFCache();
+ BuildPPFCache(NULL);
return 0;
}
memcpy(ptr, tmp, size * nmemb);
free(tmp);
}
+ else
+ ret = fread(ptr, size, nmemb, stream);
return ret;
}
case 0: /* End of file */
break;
default:
- SysPrintf(_("Unknown CPE opcode %02x at position %08x.\n"), opcode, ftell(tmpFile) - 1);
+ SysPrintf(_("Unknown CPE opcode %02x at position %08lx.\n"), opcode, ftell(tmpFile) - 1);
retval = -1;
break;
}
zlib_open, zlib_read, zlib_write, zlib_seek, zlib_close
};
-static const char PcsxHeader[32] = "STv4 PCSX v" PCSX_VERSION;
+static const char PcsxHeader[32] = "STv4 PCSXra " REV;
// Savestate Versioning!
// If you make changes to the savestate version, please increment the value below.
static const u32 SaveVersion = 0x8b410006;
+struct origin_info {
+ boolean icache_emulation;
+ boolean DisableStalls;
+ boolean PreciseExceptions;
+ boolean TurboCD;
+ s8 GpuListWalking;
+ s8 FractionalFramerate;
+ u8 Cpu;
+ u8 PsxType;
+ char build_info[64];
+};
+
#define MISC_MAGIC 0x4353494d
struct misc_save_data {
u32 magic;
u32 gpuSr;
u32 frame_counter;
int CdromFrontendId;
+ u32 save_counter;
};
+#define EX_SCREENPIC_SIZE (128 * 96 * 3)
+
int SaveState(const char *file) {
struct misc_save_data *misc = (void *)(psxH + 0xf000);
- void *f;
+ struct origin_info oi = { 0, };
GPUFreeze_t *gpufP = NULL;
SPUFreezeHdr_t spufH;
SPUFreeze_t *spufP = NULL;
- unsigned char *pMem = NULL;
+ u8 buf[EX_SCREENPIC_SIZE];
int result = -1;
int Size;
+ void *f;
assert(!psxRegs.branching);
assert(!psxRegs.cpuInRecursion);
misc->gpuSr = HW_GPU_STATUS;
misc->frame_counter = frame_counter;
misc->CdromFrontendId = CdromFrontendId;
+ misc->save_counter = ++save_counter;
psxCpu->Notify(R3000ACPU_NOTIFY_BEFORE_SAVE, NULL);
SaveFuncs.write(f, (void *)&SaveVersion, sizeof(u32));
SaveFuncs.write(f, (void *)&Config.HLE, sizeof(boolean));
- pMem = (unsigned char *)malloc(128 * 96 * 3);
- if (pMem == NULL) goto cleanup;
- GPU_getScreenPic(pMem);
- SaveFuncs.write(f, pMem, 128 * 96 * 3);
- free(pMem);
+ oi.icache_emulation = Config.icache_emulation;
+ oi.DisableStalls = Config.DisableStalls;
+ oi.PreciseExceptions = Config.PreciseExceptions;
+ oi.TurboCD = Config.TurboCD;
+ oi.GpuListWalking = Config.GpuListWalking;
+ oi.FractionalFramerate = Config.FractionalFramerate;
+ oi.Cpu = Config.Cpu;
+ oi.PsxType = Config.PsxType;
+ snprintf(oi.build_info, sizeof(oi.build_info), "%s", get_build_info());
+
+ // this was space for ScreenPic
+ assert(sizeof(buf) >= EX_SCREENPIC_SIZE);
+ assert(sizeof(oi) - 3 <= EX_SCREENPIC_SIZE);
+ memset(buf, 0, sizeof(buf));
+ memcpy(buf + 3, &oi, sizeof(oi));
+ SaveFuncs.write(f, buf, EX_SCREENPIC_SIZE);
if (Config.HLE)
psxBiosFreeze(1);
psxHwFreeze(f, 1);
psxRcntFreeze(f, 1);
mdecFreeze(f, 1);
- new_dyna_freeze(f, 1);
+ ndrc_freeze(f, 1);
padFreeze(f, 1);
result = 0;
void *f;
GPUFreeze_t *gpufP = NULL;
SPUFreeze_t *spufP = NULL;
+ boolean hle, oldhle;
int Size;
char header[32];
u32 version;
- boolean hle;
int result = -1;
f = SaveFuncs.open(file, "rb");
if (f == NULL) return -1;
- SaveFuncs.read(f, header, sizeof(header));
+ if (!file)
+ file = "(stream)";
+ memset(header, 0, sizeof(header));
+ SaveFuncs.read(f, header, 16);
+ if (strncmp("RASTATE", header, 7) == 0) {
+ // looks like RA header, normal savestate should follow
+ SysPrintf("%s: trying to skip RASTATE header\n", file);
+ SaveFuncs.read(f, header, 16);
+ }
+ SaveFuncs.read(f, header + 16, 16);
SaveFuncs.read(f, &version, sizeof(u32));
SaveFuncs.read(f, &hle, sizeof(boolean));
- if (strncmp("STv4 PCSX", header, 9) != 0 || version != SaveVersion) {
- SysPrintf("incompatible savestate version %x\n", version);
+ if (strncmp("STv4 PCSX", header, 9) != 0) {
+ SysPrintf("%s: is not a savestate?\n", file);
+ goto cleanup;
+ }
+ if (version != SaveVersion) {
+ SysPrintf("%s: incompatible savestate version %x\n", file, version);
goto cleanup;
}
+ oldhle = Config.HLE;
Config.HLE = hle;
if (Config.HLE)
psxBiosInit();
- SaveFuncs.seek(f, 128 * 96 * 3, SEEK_CUR);
+ // ex-ScreenPic space
+ SaveFuncs.seek(f, EX_SCREENPIC_SIZE, SEEK_CUR);
+
SaveFuncs.read(f, psxM, 0x00200000);
SaveFuncs.read(f, psxR, 0x00080000);
SaveFuncs.read(f, psxH, 0x00010000);
SaveFuncs.read(f, &psxRegs, offsetof(psxRegisters, gteBusyCycle));
psxRegs.gteBusyCycle = psxRegs.cycle;
+ psxRegs.branching = 0;
psxRegs.biosBranchCheck = ~0;
+ psxRegs.cpuInRecursion = 0;
psxRegs.gpuIdleAfter = psxRegs.cycle - 1;
HW_GPU_STATUS &= SWAP32(~PSXGPU_nBUSY);
if (misc->magic == MISC_MAGIC) {
HW_GPU_STATUS = misc->gpuSr;
frame_counter = misc->frame_counter;
CdromFrontendId = misc->CdromFrontendId;
+ if (misc->save_counter)
+ save_counter = misc->save_counter;
}
- psxCpu->Notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL);
-
if (Config.HLE)
psxBiosFreeze(0);
psxHwFreeze(f, 0);
psxRcntFreeze(f, 0);
mdecFreeze(f, 0);
- new_dyna_freeze(f, 0);
+
+ if (Config.HLE != oldhle) {
+ // at least ari64 drc compiles differently so hard reset
+ psxCpu->Shutdown();
+ psxCpu->Init();
+ }
+ ndrc_freeze(f, 0);
padFreeze(f, 0);
events_restore();
if (Config.HLE)
psxBiosCheckExe(biosBranchCheckOld, 0x60, 1);
+ psxCpu->Notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL);
+
result = 0;
cleanup:
memset(misc, 0, sizeof(*misc));
f = SaveFuncs.open(file, "rb");
if (f == NULL) return -1;
- SaveFuncs.read(f, header, sizeof(header));
+ memset(header, 0, sizeof(header));
+ SaveFuncs.read(f, header, 16);
+ if (strncmp("RASTATE", header, 7) == 0)
+ SaveFuncs.read(f, header, 16);
+ SaveFuncs.read(f, header + 16, 16);
SaveFuncs.read(f, &version, sizeof(u32));
SaveFuncs.read(f, &hle, sizeof(boolean));
return 0;
}
-// NET Function Helpers
-
-int SendPcsxInfo() {
- if (NET_recvData == NULL || NET_sendData == NULL)
- return 0;
-
- boolean Sio_old = 0;
- boolean SpuIrq_old = 0;
- boolean RCntFix_old = 0;
- NET_sendData(&Config.Xa, sizeof(Config.Xa), PSE_NET_BLOCKING);
- NET_sendData(&Sio_old, sizeof(Sio_old), PSE_NET_BLOCKING);
- NET_sendData(&SpuIrq_old, sizeof(SpuIrq_old), PSE_NET_BLOCKING);
- NET_sendData(&RCntFix_old, sizeof(RCntFix_old), PSE_NET_BLOCKING);
- NET_sendData(&Config.PsxType, sizeof(Config.PsxType), PSE_NET_BLOCKING);
- NET_sendData(&Config.Cpu, sizeof(Config.Cpu), PSE_NET_BLOCKING);
-
- return 0;
-}
-
-int RecvPcsxInfo() {
- int tmp;
-
- if (NET_recvData == NULL || NET_sendData == NULL)
- return 0;
-
- boolean Sio_old = 0;
- boolean SpuIrq_old = 0;
- boolean RCntFix_old = 0;
- NET_recvData(&Config.Xa, sizeof(Config.Xa), PSE_NET_BLOCKING);
- NET_recvData(&Sio_old, sizeof(Sio_old), PSE_NET_BLOCKING);
- NET_recvData(&SpuIrq_old, sizeof(SpuIrq_old), PSE_NET_BLOCKING);
- NET_recvData(&RCntFix_old, sizeof(RCntFix_old), PSE_NET_BLOCKING);
- NET_recvData(&Config.PsxType, sizeof(Config.PsxType), PSE_NET_BLOCKING);
-
- tmp = Config.Cpu;
- NET_recvData(&Config.Cpu, sizeof(Config.Cpu), PSE_NET_BLOCKING);
- if (tmp != Config.Cpu) {
- psxCpu->Shutdown();
-#ifndef DRC_DISABLE
- if (Config.Cpu == CPU_INTERPRETER) psxCpu = &psxInt;
- else psxCpu = &psxRec;
-#else
- psxCpu = &psxInt;
-#endif
- if (psxCpu->Init() == -1) {
- SysClose(); return -1;
- }
- psxCpu->Reset();
- psxCpu->Notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL);
- }
-
- return 0;
-}
-
// remove the leading and trailing spaces in a string
void trim(char *str) {
int pos = 0;
0x2E93, 0x3EB2, 0x0ED1, 0x1EF0
};
-u16 calcCrc(u8 *d, int len) {
+u16 calcCrc(const u8 *d, int len) {
u16 crc = 0;
int i;
return ~crc;
}
+
+#define MKSTR2(x) #x
+#define MKSTR(x) MKSTR2(x)
+const char *get_build_info(void)
+{
+ return ""
+#ifdef __VERSION__
+ "cc " __VERSION__ " "
+#endif
+#if defined(__SIZEOF_POINTER__) && __SIZEOF_POINTER__ == 8
+ "64bit "
+#elif defined(__SIZEOF_POINTER__) && __SIZEOF_POINTER__ == 4
+ "32bit "
+#endif
+#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ "be "
+#endif
+#if defined(__PIE__) || defined(__pie__)
+ "pie "
+#endif
+#if defined(__PIC__) || defined(__pic__)
+ "pic "
+#endif
+#if defined(__aarch64__)
+ "arm64"
+#elif defined(__arm__)
+ "arm"
+#endif
+#ifdef __ARM_ARCH
+ "v" MKSTR(__ARM_ARCH) " "
+#endif
+#ifdef __thumb__
+ "thumb "
+#endif
+#if defined(__AVX__)
+ "avx "
+#elif defined(__SSSE3__)
+ "ssse3 "
+#elif defined(__ARM_NEON) || defined(__ARM_NEON__)
+ "neon "
+#endif
+#if defined(__ARM_FEATURE_SVE) && __ARM_FEATURE_SVE
+ "sve "
+#endif
+#if defined(LIGHTREC)
+ "lightrec "
+#elif !defined(DRC_DISABLE)
+ "ari64 "
+#endif
+ "gpu=" MKSTR(BUILTIN_GPU);
+}
int LoadState(const char *file);
int CheckState(const char *file);
-int SendPcsxInfo();
-int RecvPcsxInfo();
-
void trim(char *str);
-u16 calcCrc(u8 *d, int len);
+u16 calcCrc(const u8 *d, int len);
+
+const char *get_build_info(void);
#ifdef __cplusplus
}
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+#include <strings.h> // ffs
#define FLAGLESS
#include "../gte.h"
#undef FLAGLESS
#include "pcnt.h"
#include "arm_features.h"
+#ifdef TC_WRITE_OFFSET
+#error "not implemented"
+#endif
+
#ifdef DRC_DBG
#pragma GCC diagnostic ignored "-Wunused-function"
#pragma GCC diagnostic ignored "-Wunused-variable"
void invalidate_addr_r10();
void invalidate_addr_r12();
-const u_int invalidate_addr_reg[16] = {
- (int)invalidate_addr_r0,
- (int)invalidate_addr_r1,
- (int)invalidate_addr_r2,
- (int)invalidate_addr_r3,
- (int)invalidate_addr_r4,
- (int)invalidate_addr_r5,
- (int)invalidate_addr_r6,
- (int)invalidate_addr_r7,
- (int)invalidate_addr_r8,
- (int)invalidate_addr_r9,
- (int)invalidate_addr_r10,
+const void *invalidate_addr_reg[16] = {
+ invalidate_addr_r0,
+ invalidate_addr_r1,
+ invalidate_addr_r2,
+ invalidate_addr_r3,
+ invalidate_addr_r4,
+ invalidate_addr_r5,
+ invalidate_addr_r6,
+ invalidate_addr_r7,
+ invalidate_addr_r8,
+ invalidate_addr_r9,
+ invalidate_addr_r10,
0,
- (int)invalidate_addr_r12,
+ invalidate_addr_r12,
0,
0,
- 0};
+ 0
+};
/* Linker */
+static void set_jump_target_far1(u_int *insn, void *target)
+{
+ u_int ni = *insn & 0xff000000;
+ ni |= (((u_int)target - (u_int)insn - 8u) << 6) >> 8;
+ assert((ni & 0x0e000000) == 0x0a000000);
+ *insn = ni;
+}
+
static void set_jump_target(void *addr, void *target_)
{
- u_int target = (u_int)target_;
- u_char *ptr = addr;
- u_int *ptr2=(u_int *)ptr;
+ const u_int target = (u_int)target_;
+ const u_char *ptr = addr;
+ u_int *ptr2 = (u_int *)ptr;
if(ptr[3]==0xe2) {
assert((target-(u_int)ptr2-8)<1024);
assert(((uintptr_t)addr&3)==0);
else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
}
else {
- assert((ptr[3]&0x0e)==0xa);
- *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
+ set_jump_target_far1(ptr2, target_);
}
}
return *l_ptr;
}
-// find where external branch is liked to using addr of it's stub:
-// get address that insn one after stub loads (dyna_linker arg1),
-// treat it as a pointer to branch insn,
-// return addr where that branch jumps to
-#if 0
-static void *get_pointer(void *stub)
-{
- //printf("get_pointer(%x)\n",(int)stub);
- int *i_ptr=find_extjump_insn(stub);
- assert((*i_ptr&0x0f000000)==0x0a000000); // b
- return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
-}
-#endif
-
// Allocate a specific ARM register.
static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
{
/* Assembler */
-static unused char regname[16][4] = {
+static attr_unused char regname[16][4] = {
"r0",
"r1",
"r2",
return ((u_int)offset>>2)&0xffffff;
}
-static unused void emit_breakpoint(void)
+static attr_unused void emit_breakpoint(void)
{
assem_debug("bkpt #0\n");
//output_w32(0xe1200070);
static void emit_storereg(int r, int hr)
{
assert(hr != EXCLUDE_REG);
- int addr = (int)&psxRegs.GPR.r[r];
+ void *addr;
switch (r) {
//case HIREG: addr = &hi; break;
//case LOREG: addr = &lo; break;
- case CCREG: addr = (int)&cycle_count; break;
- default: assert(r < 34); break;
+ case CCREG: addr = &cycle_count; break;
+ default: assert(r < 34u); addr = &psxRegs.GPR.r[r]; break;
}
- u_int offset = addr-(u_int)&dynarec_local;
- assert(offset<4096);
+ uintptr_t offset = (char *)addr - (char *)&dynarec_local;
+ assert(offset < 4096u);
assem_debug("str %s,fp+%d # r%d\n",regname[hr],offset,r);
output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
}
output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
}
-static unused void emit_lslpls_imm(int rs,int imm,int rt)
+static attr_unused void emit_lslpls_imm(int rs,int imm,int rt)
{
assert(imm>0);
assert(imm<32);
output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
}
-static unused void emit_orrshl(u_int rs,u_int shift,u_int rt)
+static attr_unused void emit_orrshl(u_int rs,u_int shift,u_int rt)
{
assert(rs<16);
assert(rt<16);
output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
}
-static unused void emit_orrshr(u_int rs,u_int shift,u_int rt)
+static attr_unused void emit_orrshr(u_int rs,u_int shift,u_int rt)
{
assert(rs<16);
assert(rt<16);
output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
}
-static unused void emit_cmovne_reg(int rs,int rt)
+static attr_unused void emit_cmovne_reg(int rs,int rt)
{
assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
static void emit_call(const void *a_)
{
int a = (int)a_;
- assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
+ assem_debug("bl %p%s\n", log_addr(a), func_name(a_));
u_int offset=genjmp(a);
output_w32(0xeb000000|offset);
}
static void emit_jmp(const void *a_)
{
int a = (int)a_;
- assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
+ assem_debug("b %p%s\n", log_addr(a_), func_name(a_));
u_int offset=genjmp(a);
output_w32(0xea000000|offset);
}
static void emit_jne(const void *a_)
{
int a = (int)a_;
- assem_debug("bne %x\n",a);
+ assem_debug("bne %p\n", log_addr(a_));
u_int offset=genjmp(a);
output_w32(0x1a000000|offset);
}
static void emit_jeq(const void *a_)
{
int a = (int)a_;
- assem_debug("beq %x\n",a);
+ assem_debug("beq %p\n", log_addr(a_));
u_int offset=genjmp(a);
output_w32(0x0a000000|offset);
}
static void emit_js(const void *a_)
{
int a = (int)a_;
- assem_debug("bmi %x\n",a);
+ assem_debug("bmi %p\n", log_addr(a_));
u_int offset=genjmp(a);
output_w32(0x4a000000|offset);
}
static void emit_jns(const void *a_)
{
int a = (int)a_;
- assem_debug("bpl %x\n",a);
+ assem_debug("bpl %p\n", log_addr(a_));
u_int offset=genjmp(a);
output_w32(0x5a000000|offset);
}
static void emit_jl(const void *a_)
{
int a = (int)a_;
- assem_debug("blt %x\n",a);
+ assem_debug("blt %p\n", log_addr(a_));
u_int offset=genjmp(a);
output_w32(0xba000000|offset);
}
static void emit_jge(const void *a_)
{
int a = (int)a_;
- assem_debug("bge %x\n",a);
+ assem_debug("bge %p\n", log_addr(a_));
u_int offset=genjmp(a);
output_w32(0xaa000000|offset);
}
static void emit_jo(const void *a_)
{
int a = (int)a_;
- assem_debug("bvs %x\n",a);
+ assem_debug("bvs %p\n", log_addr(a_));
u_int offset=genjmp(a);
output_w32(0x6a000000|offset);
}
static void emit_jno(const void *a_)
{
int a = (int)a_;
- assem_debug("bvc %x\n",a);
+ assem_debug("bvc %p\n", log_addr(a_));
u_int offset=genjmp(a);
output_w32(0x7a000000|offset);
}
static void emit_jc(const void *a_)
{
int a = (int)a_;
- assem_debug("bcs %x\n",a);
+ assem_debug("bcs %p\n", log_addr(a_));
u_int offset=genjmp(a);
output_w32(0x2a000000|offset);
}
static void emit_jcc(const void *a_)
{
int a = (int)a_;
- assem_debug("bcc %x\n",a);
+ assem_debug("bcc %p\n", log_addr(a_));
u_int offset=genjmp(a);
output_w32(0x3a000000|offset);
}
return ret;
}
-static unused void emit_callreg(u_int r)
+static attr_unused void emit_callreg(u_int r)
{
assert(r<15);
assem_debug("blx %s\n",regname[r]);
output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
}
-static unused void emit_rsbimm(int rs, int imm, int rt)
+static attr_unused void emit_rsbimm(int rs, int imm, int rt)
{
u_int armval;
genimm_checked(imm,&armval);
output_w32(0xe7d00000|rd_rn_rm(rt,base,r)|0x620);
}
-static void emit_callne(int a)
+static void emit_callne(const void *a_)
{
- assem_debug("blne %x\n",a);
+ int a = (int)a_;
+ assem_debug("blne %p\n", log_addr(a_));
u_int offset=genjmp(a);
output_w32(0x1b000000|offset);
}
// Used to preload hash table entries
-static unused void emit_prefetchreg(int r)
+static attr_unused void emit_prefetchreg(int r)
{
assem_debug("pld %s\n",regname[r]);
output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
}
-static unused void emit_addpl_imm(int rs,int imm,int rt)
+static attr_unused void emit_addpl_imm(int rs,int imm,int rt)
{
u_int armval;
genimm_checked(imm,&armval);
output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
}
-static void emit_jno_unlikely(int a)
+static void emit_jno_unlikely(void *a_)
{
- //emit_jno(a);
- assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
+ //emit_jno(a_);
+ assert(a_ == NULL);
+ assem_debug("addvc pc,pc,#? (%p)\n", /*a-(int)out-8,*/ log_addr(a_));
output_w32(0x72800000|rd_rn_rm(15,15,0));
}
set_jump_target(jaddr, out);
}
-// parsed by get_pointer, find_extjump_insn
+// parsed by find_extjump_insn, check_extjump2
static void emit_extjump(u_char *addr, u_int target)
{
u_char *ptr=(u_char *)addr;
static void do_readstub(int n)
{
- assem_debug("do_readstub %x\n",start+stubs[n].a*4);
+ assem_debug("do_readstub %p\n", log_addr(start + stubs[n].a*4));
literal_pool(256);
set_jump_target(stubs[n].addr, out);
enum stub_type type=stubs[n].type;
static void do_writestub(int n)
{
- assem_debug("do_writestub %x\n",start+stubs[n].a*4);
+ assem_debug("do_writestub %p\n", log_addr(start + stubs[n].a*4));
literal_pool(256);
set_jump_target(stubs[n].addr, out);
enum stub_type type=stubs[n].type;
extern char *invc_ptr;
+// note: max due to branch encoding: arm 32M, arm64 128M
#define TARGET_SIZE_2 24 // 2^24 = 16 megabytes
struct tramp_insns
#include "arm_features.h"
/* Linker */
+static void set_jump_target_far1(u_int *insn_, void *target)
+{
+ u_int *insn = NDRC_WRITE_OFFSET(insn_);
+ u_int in = *insn & 0xfc000000;
+ intptr_t offset = (u_char *)target - (u_char *)insn_;
+ assert(in == 0x14000000);
+ assert(-134217728 <= offset && offset < 134217728);
+ in |= (offset >> 2) & 0x3ffffff;
+ *insn = in;
+}
+
static void set_jump_target(void *addr, void *target)
{
u_int *ptr = NDRC_WRITE_OFFSET(addr);
intptr_t offset = (u_char *)target - (u_char *)addr;
if ((*ptr&0xFC000000) == 0x14000000) { // b
- assert(offset>=-134217728LL&&offset<134217728LL);
- *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
+ set_jump_target_far1(addr, target);
}
else if ((*ptr&0xff000000) == 0x54000000 // b.cond
|| (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
return ptr + offset / 4;
}
-#if 0
-// find where external branch is liked to using addr of it's stub:
-// get address that the stub loads (dyna_linker arg1),
-// treat it as a pointer to branch insn,
-// return addr where that branch jumps to
-static void *get_pointer(void *stub)
-{
- int *i_ptr = find_extjump_insn(stub);
- if ((*i_ptr&0xfc000000) == 0x14000000) // b
- return i_ptr + ((signed int)(*i_ptr<<6)>>6);
- if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
- || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
- return i_ptr + ((signed int)(*i_ptr<<8)>>13);
- assert(0);
- return NULL;
-}
-#endif
-
// Allocate a specific ARM register.
static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
{
/* Assembler */
-static unused const char *regname[32] = {
+static attr_unused const char *regname[32] = {
"w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
"w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
"ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
"w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
};
-static unused const char *regname64[32] = {
+static attr_unused const char *regname64[32] = {
"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
"x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
"ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
};
-static unused const char *condname[16] = {
+static attr_unused const char *condname[16] = {
"eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
"hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
};
output_w32(0x6b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
}
-static unused void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
+static attr_unused void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
{
assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
static void emit_storereg(u_int r, u_int hr)
{
assert(r < 64);
- void *addr = &psxRegs.GPR.r[r];
+ void *addr;
switch (r) {
//case HIREG: addr = &hi; break;
//case LOREG: addr = &lo; break;
case CCREG: addr = &cycle_count; break;
- default: assert(r < 34); break;
+ default: assert(r < 34u); addr = &psxRegs.GPR.r[r]; break;
}
emit_writeword(hr, addr);
}
static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
{
- unused const char *st = s ? "s" : "";
+ attr_unused const char *st = s ? "s" : "";
s = s ? 0x20000000 : 0;
is64 = is64 ? 0x80000000 : 0;
if (imm < 4096) {
static void emit_call(const void *a)
{
intptr_t diff = (u_char *)a - out;
- assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
+ assem_debug("bl %p%s\n", log_addr(a), func_name(a));
assert(!(diff & 3));
if (-134217728 <= diff && diff <= 134217727)
output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
static void emit_jmp(const void *a)
{
- assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
+ assem_debug("b %p%s\n", log_addr(a), func_name(a));
u_int offset = genjmp(a);
output_w32(0x14000000 | offset);
}
static void emit_jne(const void *a)
{
- assem_debug("bne %p\n", a);
+ assem_debug("bne %p\n", log_addr(a));
u_int offset = genjmpcc(a);
output_w32(0x54000000 | (offset << 5) | COND_NE);
}
static void emit_jeq(const void *a)
{
- assem_debug("beq %p\n", a);
+ assem_debug("beq %p\n", log_addr(a));
u_int offset = genjmpcc(a);
output_w32(0x54000000 | (offset << 5) | COND_EQ);
}
static void emit_js(const void *a)
{
- assem_debug("bmi %p\n", a);
+ assem_debug("bmi %p\n", log_addr(a));
u_int offset = genjmpcc(a);
output_w32(0x54000000 | (offset << 5) | COND_MI);
}
static void emit_jns(const void *a)
{
- assem_debug("bpl %p\n", a);
+ assem_debug("bpl %p\n", log_addr(a));
u_int offset = genjmpcc(a);
output_w32(0x54000000 | (offset << 5) | COND_PL);
}
static void emit_jl(const void *a)
{
- assem_debug("blt %p\n", a);
+ assem_debug("blt %p\n", log_addr(a));
u_int offset = genjmpcc(a);
output_w32(0x54000000 | (offset << 5) | COND_LT);
}
static void emit_jge(const void *a)
{
- assem_debug("bge %p\n", a);
+ assem_debug("bge %p\n", log_addr(a));
u_int offset = genjmpcc(a);
output_w32(0x54000000 | (offset << 5) | COND_GE);
}
static void emit_jo(const void *a)
{
- assem_debug("bvs %p\n", a);
+ assem_debug("bvs %p\n", log_addr(a));
u_int offset = genjmpcc(a);
output_w32(0x54000000 | (offset << 5) | COND_VS);
}
static void emit_jno(const void *a)
{
- assem_debug("bvc %p\n", a);
+ assem_debug("bvc %p\n", log_addr(a));
u_int offset = genjmpcc(a);
output_w32(0x54000000 | (offset << 5) | COND_VC);
}
static void emit_jc(const void *a)
{
- assem_debug("bcs %p\n", a);
+ assem_debug("bcs %p\n", log_addr(a));
u_int offset = genjmpcc(a);
output_w32(0x54000000 | (offset << 5) | COND_CS);
}
static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
{
- assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
+ assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], log_addr(a));
u_int offset = genjmpcc(a);
is64 = is64 ? 0x80000000 : 0;
isnz = isnz ? 0x01000000 : 0;
static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
{
u_int op = 0xb9000000;
- unused const char *ldst = is_st ? "st" : "ld";
- unused char rp = is64 ? 'x' : 'w';
+ attr_unused const char *ldst = is_st ? "st" : "ld";
+ attr_unused char rp = is64 ? 'x' : 'w';
assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
is64 = is64 ? 1 : 0;
assert((ofs & ((1 << (2+is64)) - 1)) == 0);
static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
{
u_int op = 0x29000000;
- unused const char *ldst = is_st ? "st" : "ld";
- unused char rp = is64 ? 'x' : 'w';
+ attr_unused const char *ldst = is_st ? "st" : "ld";
+ attr_unused char rp = is64 ? 'x' : 'w';
assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
is64 = is64 ? 1 : 0;
assert((ofs & ((1 << (2+is64)) - 1)) == 0);
{
}
-// parsed by get_pointer, find_extjump_insn
+// parsed by find_extjump_insn, check_extjump2
static void emit_extjump(u_char *addr, u_int target)
{
assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
{
if (rs != 0)
emit_mov(rs, 0);
+ emit_readptr(&hash_table_ptr, 1);
emit_far_call(ndrc_get_addr_ht);
emit_jmpreg(0);
}
emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
}
-static unused void clear_cache_arm64(char *start, char *end)
+static attr_unused void clear_cache_arm64(char *start, char *end)
{
// Don't rely on GCC's __clear_cache implementation, as it caches
// icache/dcache cache line sizes, that can vary between cores on
#define HOST_IMM8 1
/* calling convention:
- r0 -r17: caller-save
- r19-r29: callee-save */
+ x0 -x17: caller-save
+ x18 : caller-save (platform reg)
+ x19-x29: callee-save */
#define HOST_REGS 29
#define EXCLUDE_REG -1
#include "../psxinterpreter.h"
#include "../psxcounters.h"
#include "../psxevents.h"
+#include "../psxbios.h"
#include "../r3000a.h"
#include "../gte_arm.h"
#include "../gte_neon.h"
+#include "compiler_features.h"
+#include "arm_features.h"
#define FLAGLESS
#include "../gte.h"
+#if defined(NDRC_THREAD) && !defined(DRC_DISABLE) && !defined(LIGHTREC)
+#include "../../frontend/libretro-rthreads.h"
+#include "features/features_cpu.h"
+#include "retro_timers.h"
+#endif
+#ifdef _3DS
+#include <3ds_utils.h>
+#endif
+#ifndef ARRAY_SIZE
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
+#endif
//#define evprintf printf
#define evprintf(...)
-void pcsx_mtc0(u32 reg, u32 val)
-{
- evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
- MTC0(&psxRegs, reg, val);
- gen_interupt(&psxRegs.CP0);
-
- //if (psxRegs.CP0.n.Cause & psxRegs.CP0.n.SR & 0x0300) // possible sw irq
- if ((psxRegs.pc & 0x803ffeff) == 0x80000080)
- pending_exception = 1;
-}
-
-void pcsx_mtc0_ds(u32 reg, u32 val)
-{
- evprintf("MTC0 %d #%x @%08x %u\n", reg, val, psxRegs.pc, psxRegs.cycle);
- MTC0(&psxRegs, reg, val);
-}
+static void ari64_thread_sync(void);
-void new_dyna_freeze(void *f, int mode)
+void ndrc_freeze(void *f, int mode)
{
const char header_save[8] = "ariblks";
uint32_t addrs[1024 * 4];
int bytes;
char header[8];
+ ari64_thread_sync();
+
if (mode != 0) { // save
size = new_dynarec_save_blocks(addrs, sizeof(addrs));
if (size == 0)
SaveFuncs.write(f, addrs, size);
}
else {
- new_dyna_pcsx_mem_load_state();
-
bytes = SaveFuncs.read(f, header, sizeof(header));
if (bytes != sizeof(header) || strcmp(header, header_save)) {
if (bytes > 0)
//printf("drc: %d block info entries %s\n", size/8, mode ? "saved" : "loaded");
}
+void ndrc_clear_full(void)
+{
+ ari64_thread_sync();
+ new_dynarec_clear_full();
+}
+
#if !defined(DRC_DISABLE) && !defined(LIGHTREC)
+#include "linkage_offsets.h"
+
+static void ari64_thread_init(void);
+static int ari64_thread_check_range(unsigned int start, unsigned int end);
+
+void pcsx_mtc0(psxRegisters *regs, u32 reg, u32 val)
+{
+ evprintf("MTC0 %d #%x @%08x %u\n", reg, val, regs->pc, regs->cycle);
+ MTC0(regs, reg, val);
+ gen_interupt(®s->CP0);
+}
+
+void pcsx_mtc0_ds(psxRegisters *regs, u32 reg, u32 val)
+{
+ evprintf("MTC0 %d #%x @%08x %u\n", reg, val, regs->pc, regs->cycle);
+ MTC0(regs, reg, val);
+}
/* GTE stuff */
void *gte_handlers[64];
[GTE_NCCT] = GDBITS9(9,10,11,20,21,22,25,26,27),
};
-static int ari64_init()
-{
- static u32 scratch_buf[8*8*2] __attribute__((aligned(64)));
- size_t i;
-
- new_dynarec_init();
- new_dyna_pcsx_mem_init();
-
- for (i = 0; i < ARRAY_SIZE(gte_handlers); i++)
- if (psxCP2[i] != gteNULL)
- gte_handlers[i] = psxCP2[i];
-
-#if defined(__arm__) && !defined(DRC_DBG)
- gte_handlers[0x06] = gteNCLIP_arm;
-#ifdef HAVE_ARMV5
- gte_handlers_nf[0x01] = gteRTPS_nf_arm;
- gte_handlers_nf[0x30] = gteRTPT_nf_arm;
-#endif
-#ifdef __ARM_NEON__
- // compiler's _nf version is still a lot slower than neon
- // _nf_arm RTPS is roughly the same, RTPT slower
- gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon;
- gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon;
-#endif
-#endif
-#ifdef DRC_DBG
- memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf));
-#endif
- psxH_ptr = psxH;
- zeromem_ptr = zero_mem;
- scratch_buf_ptr = scratch_buf;
-
- return 0;
-}
-
static void ari64_reset()
{
+ ari64_thread_sync();
new_dyna_pcsx_mem_reset();
new_dynarec_invalidate_all_pages();
new_dyna_pcsx_mem_load_state();
- pending_exception = 1;
}
// execute until predefined leave points
// (HLE softcall exit and BIOS fastboot end)
-static void ari64_execute_until()
+static void ari64_execute_until(psxRegisters *regs)
{
- evprintf("ari64_execute %08x, %u->%u (%d)\n", psxRegs.pc,
- psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
+ void *drc_local = (char *)regs - LO_psxRegs;
+
+ assert(drc_local == dynarec_local);
+ evprintf("+exec %08x, %u->%u (%d)\n", regs->pc, regs->cycle,
+ regs->next_interupt, regs->next_interupt - regs->cycle);
- new_dyna_start(dynarec_local);
+ new_dyna_start(drc_local);
- evprintf("ari64_execute end %08x, %u->%u (%d)\n", psxRegs.pc,
- psxRegs.cycle, next_interupt, next_interupt - psxRegs.cycle);
+ evprintf("-exec %08x, %u->%u (%d) stop %d \n", regs->pc, regs->cycle,
+ regs->next_interupt, regs->next_interupt - regs->cycle, regs->stop);
}
-static void ari64_execute()
+static void ari64_execute(struct psxRegisters *regs)
{
- while (!stop) {
- schedule_timeslice();
- ari64_execute_until();
- evprintf("drc left @%08x\n", psxRegs.pc);
+ while (!regs->stop) {
+ schedule_timeslice(regs);
+ ari64_execute_until(regs);
+ evprintf("drc left @%08x\n", regs->pc);
}
}
-static void ari64_execute_block(enum blockExecCaller caller)
+static void ari64_execute_block(struct psxRegisters *regs, enum blockExecCaller caller)
{
if (caller == EXEC_CALLER_BOOT)
- stop++;
+ regs->stop++;
- next_interupt = psxRegs.cycle + 1;
- ari64_execute_until();
+ regs->next_interupt = regs->cycle + 1;
+ ari64_execute_until(regs);
if (caller == EXEC_CALLER_BOOT)
- stop--;
+ regs->stop--;
}
static void ari64_clear(u32 addr, u32 size)
{
- size *= 4; /* PCSX uses DMA units (words) */
+ u32 end = addr + size * 4; /* PCSX uses DMA units (words) */
- evprintf("ari64_clear %08x %04x\n", addr, size);
+ evprintf("ari64_clear %08x %04x\n", addr, size * 4);
+
+ if (!new_dynarec_quick_check_range(addr, end) &&
+ !ari64_thread_check_range(addr, end))
+ return;
+
+ ari64_thread_sync();
+ new_dynarec_invalidate_range(addr, end);
+}
- new_dynarec_invalidate_range(addr, addr + size);
+static void ari64_on_ext_change(int ram_replaced, int other_cpu_emu_exec)
+{
+ if (ram_replaced)
+ ari64_reset();
+ else if (other_cpu_emu_exec)
+ new_dyna_pcsx_mem_load_state();
}
static void ari64_notify(enum R3000Anote note, void *data) {
case R3000ACPU_NOTIFY_BEFORE_SAVE:
break;
case R3000ACPU_NOTIFY_AFTER_LOAD:
- if (data == NULL)
- ari64_reset();
+ ari64_on_ext_change(data == NULL, 0);
psxInt.Notify(note, data);
break;
}
static void ari64_apply_config()
{
+ int thread_changed;
+
+ ari64_thread_sync();
intApplyConfig();
if (Config.DisableStalls)
- new_dynarec_hacks |= NDHACK_NO_STALLS;
+ ndrc_g.hacks |= NDHACK_NO_STALLS;
else
- new_dynarec_hacks &= ~NDHACK_NO_STALLS;
+ ndrc_g.hacks &= ~NDHACK_NO_STALLS;
- if (Config.cycle_multiplier != cycle_multiplier_old
- || new_dynarec_hacks != new_dynarec_hacks_old)
+ thread_changed = ((ndrc_g.hacks | ndrc_g.hacks_pergame) ^ ndrc_g.hacks_old)
+ & (NDHACK_THREAD_FORCE | NDHACK_THREAD_FORCE_ON);
+ if (Config.cycle_multiplier != ndrc_g.cycle_multiplier_old
+ || (ndrc_g.hacks | ndrc_g.hacks_pergame) != ndrc_g.hacks_old)
{
new_dynarec_clear_full();
}
+ if (thread_changed)
+ ari64_thread_init();
+}
+
+#ifdef NDRC_THREAD
+static void clear_local_cache(void)
+{
+#if defined(__arm__) || defined(__aarch64__)
+ if (ndrc_g.thread.dirty_start) {
+ // see "Ensuring the visibility of updates to instructions"
+ // in v7/v8 reference manuals (DDI0406, DDI0487 etc.)
+#if defined(__aarch64__) || defined(HAVE_ARMV8)
+ // the actual clean/invalidate is broadcast to all cores,
+ // the manual only prescribes an isb
+ __asm__ volatile("isb");
+//#elif defined(_3DS)
+// ctr_invalidate_icache();
+#else
+ // while on v6 this is always required, on v7 it depends on
+ // "Multiprocessing Extensions" being present, but that is difficult
+ // to detect so do it always for now
+ new_dyna_clear_cache(ndrc_g.thread.dirty_start, ndrc_g.thread.dirty_end);
+#endif
+ ndrc_g.thread.dirty_start = ndrc_g.thread.dirty_end = 0;
+ }
+#endif
+}
+
+static void mixed_execute_block(struct psxRegisters *regs, enum blockExecCaller caller)
+{
+ psxInt.ExecuteBlock(regs, caller);
+}
+
+static void mixed_clear(u32 addr, u32 size)
+{
+ ari64_clear(addr, size);
+ psxInt.Clear(addr, size);
+}
+
+static void mixed_notify(enum R3000Anote note, void *data)
+{
+ ari64_notify(note, data);
+ psxInt.Notify(note, data);
+}
+
+static R3000Acpu psxMixedCpu = {
+ NULL /* Init */, NULL /* Reset */, NULL /* Execute */,
+ mixed_execute_block,
+ mixed_clear,
+ mixed_notify,
+ NULL /* ApplyConfig */, NULL /* Shutdown */
+};
+
+static noinline void ari64_execute_threaded_slow(struct psxRegisters *regs,
+ enum blockExecCaller block_caller)
+{
+ if (ndrc_g.thread.busy_addr == ~0u) {
+ memcpy(ndrc_smrv_regs, regs->GPR.r, sizeof(ndrc_smrv_regs));
+ slock_lock(ndrc_g.thread.lock);
+ ndrc_g.thread.busy_addr = regs->pc;
+ slock_unlock(ndrc_g.thread.lock);
+ scond_signal(ndrc_g.thread.cond);
+ }
+
+ //ari64_notify(R3000ACPU_NOTIFY_BEFORE_SAVE, NULL);
+ psxInt.Notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL);
+ assert(psxCpu == &psxRec);
+ psxCpu = &psxMixedCpu;
+ for (;;)
+ {
+ mixed_execute_block(regs, block_caller);
+
+ if (ndrc_g.thread.busy_addr == ~0u)
+ break;
+ if (block_caller == EXEC_CALLER_HLE) {
+ if (!psxBiosSoftcallEnded())
+ continue;
+ break;
+ }
+ else if (block_caller == EXEC_CALLER_BOOT) {
+ if (!psxExecuteBiosEnded())
+ continue;
+ break;
+ }
+ if (regs->stop)
+ break;
+ }
+ psxCpu = &psxRec;
+
+ psxInt.Notify(R3000ACPU_NOTIFY_BEFORE_SAVE, NULL);
+ //ari64_notify(R3000ACPU_NOTIFY_AFTER_LOAD, NULL);
+ ari64_on_ext_change(0, 1);
+}
+
+static void ari64_execute_threaded_once(struct psxRegisters *regs,
+ enum blockExecCaller block_caller)
+{
+ void *drc_local = (char *)regs - LO_psxRegs;
+ struct ht_entry *hash_table =
+ *(void **)((char *)drc_local + LO_hash_table_ptr);
+ void *target;
+
+ if (likely(ndrc_g.thread.busy_addr == ~0u)) {
+ target = ndrc_get_addr_ht_param(hash_table, regs->pc,
+ ndrc_cm_no_compile);
+ if (target) {
+ clear_local_cache();
+ new_dyna_start_at(drc_local, target);
+ return;
+ }
+ }
+ ari64_execute_threaded_slow(regs, block_caller);
+}
+
+static void ari64_execute_threaded(struct psxRegisters *regs)
+{
+ schedule_timeslice(regs);
+ while (!regs->stop)
+ {
+ ari64_execute_threaded_once(regs, EXEC_CALLER_OTHER);
+
+ if ((s32)(regs->cycle - regs->next_interupt) >= 0)
+ schedule_timeslice(regs);
+ }
+}
+
+static void ari64_execute_threaded_block(struct psxRegisters *regs,
+ enum blockExecCaller caller)
+{
+ if (caller == EXEC_CALLER_BOOT)
+ regs->stop++;
+
+ regs->next_interupt = regs->cycle + 1;
+
+ ari64_execute_threaded_once(regs, caller);
+ if (regs->cpuInRecursion) {
+ // must sync since we are returning to compiled code
+ ari64_thread_sync();
+ }
+
+ if (caller == EXEC_CALLER_BOOT)
+ regs->stop--;
+}
+
+static void ari64_thread_sync(void)
+{
+ if (!ndrc_g.thread.lock || ndrc_g.thread.busy_addr == ~0u)
+ return;
+ for (;;) {
+ slock_lock(ndrc_g.thread.lock);
+ slock_unlock(ndrc_g.thread.lock);
+ if (ndrc_g.thread.busy_addr == ~0)
+ break;
+ retro_sleep(0);
+ }
+}
+
+static int ari64_thread_check_range(unsigned int start, unsigned int end)
+{
+ u32 addr = ndrc_g.thread.busy_addr;
+ if (addr == ~0u)
+ return 0;
+
+ addr &= 0x1fffffff;
+ start &= 0x1fffffff;
+ end &= 0x1fffffff;
+ if (addr >= end)
+ return 0;
+ if (addr + MAXBLOCK * 4 <= start)
+ return 0;
+
+ //SysPrintf("%x hits %x-%x\n", addr, start, end);
+ return 1;
+}
+
+static void ari64_compile_thread(void *unused)
+{
+ struct ht_entry *hash_table =
+ *(void **)((char *)dynarec_local + LO_hash_table_ptr);
+ void *target;
+ u32 addr;
+
+ slock_lock(ndrc_g.thread.lock);
+ while (!ndrc_g.thread.exit)
+ {
+ addr = *(volatile unsigned int *)&ndrc_g.thread.busy_addr;
+ if (addr == ~0u)
+ scond_wait(ndrc_g.thread.cond, ndrc_g.thread.lock);
+ addr = *(volatile unsigned int *)&ndrc_g.thread.busy_addr;
+ if (addr == ~0u || ndrc_g.thread.exit)
+ continue;
+
+ target = ndrc_get_addr_ht_param(hash_table, addr,
+ ndrc_cm_compile_in_thread);
+ //printf("c %08x -> %p\n", addr, target);
+ ndrc_g.thread.busy_addr = ~0u;
+ }
+ slock_unlock(ndrc_g.thread.lock);
+ (void)target;
+}
+
+static void ari64_thread_shutdown(void)
+{
+ psxRec.Execute = ari64_execute;
+ psxRec.ExecuteBlock = ari64_execute_block;
+
+ if (ndrc_g.thread.lock)
+ slock_lock(ndrc_g.thread.lock);
+ ndrc_g.thread.exit = 1;
+ if (ndrc_g.thread.lock)
+ slock_unlock(ndrc_g.thread.lock);
+ if (ndrc_g.thread.cond)
+ scond_signal(ndrc_g.thread.cond);
+ if (ndrc_g.thread.handle) {
+ sthread_join(ndrc_g.thread.handle);
+ ndrc_g.thread.handle = NULL;
+ }
+ if (ndrc_g.thread.cond) {
+ scond_free(ndrc_g.thread.cond);
+ ndrc_g.thread.cond = NULL;
+ }
+ if (ndrc_g.thread.lock) {
+ slock_free(ndrc_g.thread.lock);
+ ndrc_g.thread.lock = NULL;
+ }
+ ndrc_g.thread.busy_addr = ~0u;
+}
+
+static void ari64_thread_init(void)
+{
+ int enable;
+
+ if (ndrc_g.hacks_pergame & NDHACK_THREAD_FORCE)
+ enable = 0;
+ else if (ndrc_g.hacks & NDHACK_THREAD_FORCE)
+ enable = ndrc_g.hacks & NDHACK_THREAD_FORCE_ON;
+ else {
+ u32 cpu_count = cpu_features_get_core_amount();
+ enable = cpu_count > 1;
+#ifdef _3DS
+ // bad for old3ds, reprotedly no improvement for new3ds
+ enable = 0;
+#endif
+ }
+
+ if (!ndrc_g.thread.handle == !enable)
+ return;
+
+ ari64_thread_shutdown();
+ ndrc_g.thread.exit = 0;
+ ndrc_g.thread.busy_addr = ~0u;
+
+ if (enable) {
+ ndrc_g.thread.lock = slock_new();
+ ndrc_g.thread.cond = scond_new();
+ }
+ if (ndrc_g.thread.lock && ndrc_g.thread.cond)
+ ndrc_g.thread.handle = pcsxr_sthread_create(ari64_compile_thread, PCSXRT_DRC);
+ if (ndrc_g.thread.handle) {
+ psxRec.Execute = ari64_execute_threaded;
+ psxRec.ExecuteBlock = ari64_execute_threaded_block;
+ }
+ else {
+ // clean up potential incomplete init
+ ari64_thread_shutdown();
+ }
+ SysPrintf("compiler thread %sabled\n", ndrc_g.thread.handle ? "en" : "dis");
+}
+#else // if !NDRC_THREAD
+static void ari64_thread_init(void) {}
+static void ari64_thread_shutdown(void) {}
+static int ari64_thread_check_range(unsigned int start, unsigned int end) { return 0; }
+#endif
+
+static int ari64_init()
+{
+ static u32 scratch_buf[8*8*2] __attribute__((aligned(64)));
+ size_t i;
+
+ new_dynarec_init();
+ new_dyna_pcsx_mem_init();
+
+ for (i = 0; i < ARRAY_SIZE(gte_handlers); i++)
+ if (psxCP2[i] != gteNULL)
+ gte_handlers[i] = psxCP2[i];
+
+#if defined(__arm__) && !defined(DRC_DBG)
+ gte_handlers[0x06] = gteNCLIP_arm;
+#ifdef HAVE_ARMV5
+ gte_handlers_nf[0x01] = gteRTPS_nf_arm;
+ gte_handlers_nf[0x30] = gteRTPT_nf_arm;
+#endif
+#ifdef __ARM_NEON__
+ // compiler's _nf version is still a lot slower than neon
+ // _nf_arm RTPS is roughly the same, RTPT slower
+ gte_handlers[0x01] = gte_handlers_nf[0x01] = gteRTPS_neon;
+ gte_handlers[0x30] = gte_handlers_nf[0x30] = gteRTPT_neon;
+#endif
+#endif
+#ifdef DRC_DBG
+ memcpy(gte_handlers_nf, gte_handlers, sizeof(gte_handlers_nf));
+#endif
+ psxH_ptr = psxH;
+ zeromem_ptr = zero_mem;
+ scratch_buf_ptr = scratch_buf; // for gte_neon.S
+
+ ndrc_g.cycle_multiplier_old = Config.cycle_multiplier;
+ ndrc_g.hacks_old = ndrc_g.hacks | ndrc_g.hacks_pergame;
+ ari64_apply_config();
+ ari64_thread_init();
+
+ return 0;
}
static void ari64_shutdown()
{
+ ari64_thread_shutdown();
new_dynarec_cleanup();
new_dyna_pcsx_mem_shutdown();
}
#else // if DRC_DISABLE
-unsigned int address;
-int pending_exception, stop;
-u32 next_interupt;
-int new_dynarec_did_compile;
-int cycle_multiplier_old;
-int new_dynarec_hacks_pergame;
-int new_dynarec_hacks_old;
-int new_dynarec_hacks;
-void *psxH_ptr;
-void *zeromem_ptr;
-u32 zero_mem[0x1000/4];
-void *mem_rtab;
-void *scratch_buf_ptr;
+struct ndrc_globals ndrc_g; // dummy
void new_dynarec_init() {}
void new_dyna_start(void *context) {}
void new_dynarec_cleanup() {}
void new_dyna_pcsx_mem_shutdown(void) {}
int new_dynarec_save_blocks(void *save, int size) { return 0; }
void new_dynarec_load_blocks(const void *save, int size) {}
+
+#endif // DRC_DISABLE
+
+#ifndef NDRC_THREAD
+static void ari64_thread_sync(void) {}
#endif
#ifdef DRC_DBG
}
// log event changes
for (i = 0; i < PSXINT_COUNT; i++) {
- if (event_cycles[i] != event_cycles_o[i]) {
+ if (psxRegs.event_cycles[i] != event_cycles_o[i]) {
byte = 0xf8;
fwrite(&byte, 1, 1, f);
fwrite(&i, 1, 1, f);
- fwrite(&event_cycles[i], 1, 4, f);
- event_cycles_o[i] = event_cycles[i];
+ fwrite(&psxRegs.event_cycles[i], 1, 4, f);
+ event_cycles_o[i] = psxRegs.event_cycles[i];
}
}
#define SAVE_IF_CHANGED(code_, name_) { \
//if (psxRegs.cycle == 166172) breakme();
- if (which_event >= 0 && event_cycles[which_event] != ev_cycles) {
+ if (which_event >= 0 && psxRegs.event_cycles[which_event] != ev_cycles) {
printf("bad ev_cycles #%d: %u %u / %u\n", which_event,
- event_cycles[which_event], ev_cycles, psxRegs.cycle);
+ psxRegs.event_cycles[which_event], ev_cycles, psxRegs.cycle);
fatal = 1;
}
for (i = 0; i < 8; i++)
printf("r%d=%08x r%2d=%08x r%2d=%08x r%2d=%08x\n", i, allregs_p[i],
i+8, allregs_p[i+8], i+16, allregs_p[i+16], i+24, allregs_p[i+24]);
- printf("PC: %08x/%08x, cycle %u, next %u\n", psxRegs.pc, ppc, psxRegs.cycle, next_interupt);
+ printf("PC: %08x/%08x, cycle %u, next %u\n", psxRegs.pc, ppc,
+ psxRegs.cycle, psxRegs.next_interupt);
//dump_mem("/tmp/psxram.dump", psxM, 0x200000);
//dump_mem("/mnt/ntz/dev/pnd/tmp/psxregs.dump", psxH, 0x10000);
exit(1);
badregs_mask_prev = badregs_mask;
}
-#endif
+#endif // DRC_DBG
extern int dynarec_local[];
-/* same as psxRegs.GPR.n.* */
-extern int hi, lo;
-
-/* same as psxRegs.CP0.n.* */
-extern int reg_cop0[];
-
/* COP2/GTE */
enum gte_opcodes {
GTE_RTPS = 0x01,
GTE_NCCT = 0x3f,
};
-extern int reg_cop2d[], reg_cop2c[];
extern void *gte_handlers[64];
extern void *gte_handlers_nf[64];
extern const char *gte_regnames[64];
/* cycles/irqs */
extern u32 next_interupt;
-extern int pending_exception;
/* called by drc */
-void pcsx_mtc0(u32 reg, u32 val);
-void pcsx_mtc0_ds(u32 reg, u32 val);
+struct psxRegisters;
+void pcsx_mtc0(struct psxRegisters *regs, u32 reg, u32 val);
+void pcsx_mtc0_ds(struct psxRegisters *regs, u32 reg, u32 val);
/* misc */
extern void SysPrintf(const char *fmt, ...);
#ifdef __MACH__
#define dynarec_local ESYM(dynarec_local)
-#define ndrc_add_jump_out ESYM(ndrc_add_jump_out)
+#define ndrc_patch_link ESYM(ndrc_patch_link)
#define ndrc_get_addr_ht ESYM(ndrc_get_addr_ht)
#define ndrc_get_addr_ht_param ESYM(ndrc_get_addr_ht_param)
#define ndrc_write_invalidate_one ESYM(ndrc_write_invalidate_one)
#define gen_interupt ESYM(gen_interupt)
-#define gteCheckStallRaw ESYM(gteCheckStallRaw)
#define psxException ESYM(psxException)
#define execI ESYM(execI)
+#endif
+
+/* make mini_ht reachable with a single armv4 insn */
+#if (LO_mini_ht & ~0xff0)
+#error misligned mini_ht
#endif
.bss
#define DRC_VAR(name, size_) \
DRC_VAR_(name, ESYM(name), size_)
-DRC_VAR(next_interupt, 4)
+@DRC_VAR(next_interupt, 4)
DRC_VAR(cycle_count, 4)
DRC_VAR(last_count, 4)
-DRC_VAR(pending_exception, 4)
-DRC_VAR(stop, 4)
-DRC_VAR(branch_target, 4)
+@DRC_VAR(stop, 4)
DRC_VAR(address, 4)
DRC_VAR(hack_addr, 4)
DRC_VAR(psxRegs, LO_psxRegs_end - LO_psxRegs)
/* psxRegs */
-@DRC_VAR(reg, 128)
-DRC_VAR(lo, 4)
-DRC_VAR(hi, 4)
-DRC_VAR(reg_cop0, 128)
+@DRC_VAR(lo, 4)
+@DRC_VAR(hi, 4)
DRC_VAR(reg_cop2d, 128)
DRC_VAR(reg_cop2c, 128)
-DRC_VAR(pcaddr, 4)
@DRC_VAR(code, 4)
@DRC_VAR(cycle, 4)
@DRC_VAR(interrupt, 4)
DRC_VAR(invc_ptr, 4)
DRC_VAR(scratch_buf_ptr, 4)
DRC_VAR(ram_offset, 4)
+DRC_VAR(hash_table_ptr, 4)
DRC_VAR(mini_ht, 256)
FUNCTION(dyna_linker):
/* r0 = virtual target address */
/* r1 = pointer to an instruction to patch */
-#ifndef NO_WRITE_EXEC
+#if 1
ldr r7, [r1]
mov r4, r0
add r6, r7, #2
mov r5, r1
lsl r6, r6, #8
/* must not compile - that might expire the caller block */
- mov r1, #0
+ ldr r0, [fp, #LO_hash_table_ptr]
+ mov r1, r4
+ mov r2, #0 /* ndrc_compile_mode=ndrc_cm_no_compile */
bl ndrc_get_addr_ht_param
movs r8, r0
beq 0f
add r6, r5, r6, asr #6 /* old target */
teq r0, r6
- moveq pc, r0 /* Stale i-cache */
+ bxeq r0 /* Stale i-cache */
mov r0, r4
- mov r1, r6
- bl ndrc_add_jump_out
-
- sub r2, r8, r5
- and r1, r7, #0xff000000
- lsl r2, r2, #6
- sub r1, r1, #2
- add r1, r1, r2, lsr #8
- str r1, [r5]
- mov pc, r8
+ mov r1, r5
+ mov r2, r6
+ mov r3, r8
+ bl ndrc_patch_link
+ bx r8
0:
mov r0, r4
-#else
- /* XXX: should be able to do better than this... */
#endif
+ ldr r1, [fp, #LO_hash_table_ptr]
bl ndrc_get_addr_ht
- mov pc, r0
+ bx r0
.size dyna_linker, .-dyna_linker
.align 2
add r0, r7, #0
.size jump_vaddr_r7, .-jump_vaddr_r7
FUNCTION(jump_vaddr_r0):
+ ldr r1, [fp, #LO_hash_table_ptr]
bl ndrc_get_addr_ht
- mov pc, r0
+ bx r0
.size jump_vaddr_r0, .-jump_vaddr_r0
.align 2
FUNCTION(cc_interrupt):
ldr r0, [fp, #LO_last_count]
- mov r1, #0
- add r10, r0, r10
- str r1, [fp, #LO_pending_exception]
- str r10, [fp, #LO_cycle] /* PCSX cycles */
+ ldr r9, [fp, #LO_pcaddr]
+ add r1, r0, r10
+ str r1, [fp, #LO_cycle] /* PCSX cycles */
mov r10, lr
add r0, fp, #LO_reg_cop0 /* CP0 */
bl gen_interupt
mov lr, r10
ldr r10, [fp, #LO_cycle]
- ldr r0, [fp, #LO_next_interupt]
- ldr r1, [fp, #LO_pending_exception]
- ldr r2, [fp, #LO_stop]
- str r0, [fp, #LO_last_count]
- sub r10, r10, r0
+ ldr r0, [fp, #LO_pcaddr]
+ ldr r1, [fp, #LO_next_interupt]
+ ldrb r2, [fp, #LO_stop]
+ str r1, [fp, #LO_last_count]
+ sub r10, r10, r1
tst r2, r2
ldmfdne sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, pc}
- tst r1, r1
- moveq pc, lr
- ldr r0, [fp, #LO_pcaddr]
+ cmp r0, r9
+ bxeq lr
+ ldr r1, [fp, #LO_hash_table_ptr]
bl ndrc_get_addr_ht
- mov pc, r0
+ bx r0
.size cc_interrupt, .-cc_interrupt
.align 2
/* note: psxException might do recursive recompiler call from it's HLE code,
* so be ready for this */
FUNCTION(jump_to_new_pc):
- ldr r2, [fp, #LO_stop]
+ ldrb r2, [fp, #LO_stop]
ldr r1, [fp, #LO_next_interupt]
ldr r10, [fp, #LO_cycle]
ldr r0, [fp, #LO_pcaddr]
str r1, [fp, #LO_last_count]
sub r10, r10, r1
bne new_dyna_leave
+ ldr r1, [fp, #LO_hash_table_ptr]
bl ndrc_get_addr_ht
- mov pc, r0
+ bx r0
.size jump_to_new_pc, .-jump_to_new_pc
.align 2
.size invalidate_addr_call, .-invalidate_addr_call
.align 2
-FUNCTION(new_dyna_start):
+FUNCTION(new_dyna_start_at):
/* ip is stored to conform EABI alignment */
+ stmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, lr}
+ mov fp, r0 /* dynarec_local */
+ mov r0, r1
+ b new_dyna_start_at_e
+
+FUNCTION(new_dyna_start):
stmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, ip, lr}
mov fp, r0 /* dynarec_local */
ldr r0, [fp, #LO_pcaddr]
+ ldr r1, [fp, #LO_hash_table_ptr]
bl ndrc_get_addr_ht
+new_dyna_start_at_e:
ldr r1, [fp, #LO_next_interupt]
ldr r10, [fp, #LO_cycle]
str r1, [fp, #LO_last_count]
sub r10, r10, r1
- mov pc, r0
+ bx r0
.size new_dyna_start, .-new_dyna_start
/* --------------------------------------- */
.else
\readop r0, [r1, r3, lsl #\tab_shift]
.endif
- movcc pc, lr
+ bxcc lr
mov r2, r12
str r12, [fp, #LO_cycle]
.endm
.else
\wrtop r1, [r3, r12, lsl #\tab_shift]
.endif
- movcc pc, lr
+ bxcc lr
ldr r12, [fp, #LO_last_count]
mov r0, r1
add r2, r2, r12
lsr r0, #16 @ /= 8
bx lr
-FUNCTION(call_gteStall):
- /* r0 = op_cycles, r1 = cycles */
- ldr r2, [fp, #LO_last_count]
- str lr, [fp, #LO_saved_lr]
- add r1, r1, r2
- str r1, [fp, #LO_cycle]
- add r1, fp, #LO_psxRegs
- bl gteCheckStallRaw
- ldr lr, [fp, #LO_saved_lr]
- add r10, r10, r0
- bx lr
-
#ifdef HAVE_ARMV6
FUNCTION(get_reg):
#ifdef __MACH__
#define dynarec_local ESYM(dynarec_local)
-#define ndrc_add_jump_out ESYM(ndrc_add_jump_out)
+#define ndrc_patch_link ESYM(ndrc_patch_link)
#define ndrc_get_addr_ht ESYM(ndrc_get_addr_ht)
+#define ndrc_get_addr_ht_param ESYM(ndrc_get_addr_ht_param)
#define gen_interupt ESYM(gen_interupt)
-#define gteCheckStallRaw ESYM(gteCheckStallRaw)
#define psxException ESYM(psxException)
#define execI ESYM(execI)
#endif
#define DRC_VAR(name, size_) \
DRC_VAR_(name, ESYM(name), size_)
-DRC_VAR(next_interupt, 4)
+#DRC_VAR(next_interupt, 4)
DRC_VAR(cycle_count, 4)
DRC_VAR(last_count, 4)
-DRC_VAR(pending_exception, 4)
-DRC_VAR(stop, 4)
-DRC_VAR(branch_target, 4)
+#DRC_VAR(stop, 4)
DRC_VAR(address, 4)
DRC_VAR(hack_addr, 4)
DRC_VAR(psxRegs, LO_psxRegs_end - LO_psxRegs)
/* psxRegs */
-#DRC_VAR(reg, 128)
-DRC_VAR(lo, 4)
-DRC_VAR(hi, 4)
-DRC_VAR(reg_cop0, 128)
+#DRC_VAR(lo, 4)
+#DRC_VAR(hi, 4)
DRC_VAR(reg_cop2d, 128)
DRC_VAR(reg_cop2c, 128)
-DRC_VAR(pcaddr, 4)
#DRC_VAR(code, 4)
#DRC_VAR(cycle, 4)
#DRC_VAR(interrupt, 4)
DRC_VAR(zeromem_ptr, 8)
DRC_VAR(scratch_buf_ptr, 8)
DRC_VAR(ram_offset, 8)
+DRC_VAR(hash_table_ptr, 8)
DRC_VAR(mini_ht, 256)
.align 2
FUNCTION(dyna_linker):
- /* r0 = virtual target address */
- /* r1 = instruction to patch */
+ /* w0 = virtual target address */
+ /* x1 = instruction to patch */
+#if 1
+ mov w19, w0
+ mov x20, x1
+ /* must not compile - that might expire the caller block */
+ ldr x0, [rFP, #LO_hash_table_ptr]
+ mov w1, w19
+ mov w2, #0 /* ndrc_compile_mode=ndrc_cm_no_compile */
+ bl ndrc_get_addr_ht_param
+ cbz x0, 0f
+
+ ldr w2, [x20]
+ mov x3, x0
+ sbfiz x2, x2, 2, 26
+ add x2, x2, x20
+ mov x1, x20
+ mov w0, w19
+ mov x19, x3
+ bl ndrc_patch_link
+ br x19
+0:
+ mov w0, w19
+#endif
+ ldr x1, [rFP, #LO_hash_table_ptr]
bl ndrc_get_addr_ht
br x0
ESIZE(dyna_linker, .-dyna_linker)
.align 2
FUNCTION(cc_interrupt):
ldr w0, [rFP, #LO_last_count]
+ ldr w22, [rFP, #LO_pcaddr]
add rCC, w0, rCC
- str wzr, [rFP, #LO_pending_exception]
str rCC, [rFP, #LO_cycle] /* PCSX cycles */
mov x21, lr
1:
bl gen_interupt
mov lr, x21
ldr rCC, [rFP, #LO_cycle]
- ldr w0, [rFP, #LO_next_interupt]
- ldr w1, [rFP, #LO_pending_exception]
- ldr w2, [rFP, #LO_stop]
- str w0, [rFP, #LO_last_count]
- sub rCC, rCC, w0
+ ldr w0, [rFP, #LO_pcaddr]
+ ldr w1, [rFP, #LO_next_interupt]
+ ldrb w2, [rFP, #LO_stop]
+ str w1, [rFP, #LO_last_count]
+ sub rCC, rCC, w1
cbnz w2, new_dyna_leave
- cbnz w1, 2f
+ cmp w0, w22
+ bne 2f
ret
2:
- ldr w0, [rFP, #LO_pcaddr]
+ ldr x1, [rFP, #LO_hash_table_ptr]
bl ndrc_get_addr_ht
br x0
ESIZE(cc_interrupt, .-cc_interrupt)
/* note: psxException might do recursive recompiler call from it's HLE code,
* so be ready for this */
FUNCTION(jump_to_new_pc):
- ldr w2, [rFP, #LO_stop]
+ ldrb w2, [rFP, #LO_stop]
ldr w1, [rFP, #LO_next_interupt]
ldr rCC, [rFP, #LO_cycle]
ldr w0, [rFP, #LO_pcaddr]
sub rCC, rCC, w1
str w1, [rFP, #LO_last_count]
cbnz w2, new_dyna_leave
+ ldr x1, [rFP, #LO_hash_table_ptr]
bl ndrc_get_addr_ht
br x0
ESIZE(jump_to_new_pc, .-jump_to_new_pc)
/* stack must be aligned by 16, and include space for save_regs() use */
.align 2
+FUNCTION(new_dyna_start_at):
+ stp x29, x30, [sp, #-SSP_ALL]!
+ mov rFP, x0
+ b new_dyna_start_at_e
+
FUNCTION(new_dyna_start):
stp x29, x30, [sp, #-SSP_ALL]!
- ldr w1, [x0, #LO_next_interupt]
- ldr w2, [x0, #LO_cycle]
+ mov rFP, x0
+ ldr w0, [rFP, #LO_pcaddr]
+ ldr x1, [rFP, #LO_hash_table_ptr]
+ bl ndrc_get_addr_ht
+ mov x1, x0
+new_dyna_start_at_e:
+ ldr w3, [rFP, #LO_next_interupt]
+ ldr w2, [rFP, #LO_cycle]
stp x19, x20, [sp, #16*1]
stp x21, x22, [sp, #16*2]
stp x23, x24, [sp, #16*3]
stp x25, x26, [sp, #16*4]
stp x27, x28, [sp, #16*5]
- mov rFP, x0
- ldr w0, [rFP, #LO_pcaddr]
- str w1, [rFP, #LO_last_count]
- sub rCC, w2, w1
- bl ndrc_get_addr_ht
- br x0
+ str w3, [rFP, #LO_last_count]
+ sub rCC, w2, w3
+ br x1
ESIZE(new_dyna_start, .-new_dyna_start)
.align 2
bl execI
b jump_to_new_pc
-FUNCTION(call_gteStall):
- /* w0 = op_cycles, w1 = cycles */
- ldr w2, [rFP, #LO_last_count]
- str lr, [rFP, #LO_saved_lr]
- add w1, w1, w2
- str w1, [rFP, #LO_cycle]
- add x1, rFP, #LO_psxRegs
- bl gteCheckStallRaw
- ldr lr, [rFP, #LO_saved_lr]
- add rCC, rCC, w0
- ret
-
#ifdef DRC_DBG
#undef do_insn_cmp
FUNCTION(do_insn_cmp_arm64):
#define PTRSZ __SIZEOF_POINTER__
-#define LO_next_interupt 64
-#define LO_cycle_count (LO_next_interupt + 4)
+#define LO_cycle_count 64
#define LO_last_count (LO_cycle_count + 4)
-#define LO_pending_exception (LO_last_count + 4)
-#define LO_stop (LO_pending_exception + 4)
-#define LO_branch_target (LO_stop + 4)
-#define LO_address (LO_branch_target + 4)
+#define LO_address (LO_last_count + 4)
#define LO_hack_addr (LO_address + 4)
#define LO_psxRegs (LO_hack_addr + 4)
-#define LO_reg (LO_psxRegs)
-#define LO_lo (LO_reg + 128)
+#define LO_lo (LO_psxRegs + 128)
#define LO_hi (LO_lo + 4)
#define LO_reg_cop0 (LO_hi + 4)
#define LO_reg_cop2d (LO_reg_cop0 + 128)
#define LO_cycle (LO_code + 4)
#define LO_interrupt (LO_cycle + 4)
#define LO_intCycle (LO_interrupt + 4)
-#define LO_gteBusyCycle (LO_intCycle + 256)
+#define LO_next_interupt (LO_intCycle + 4*2*31)
+#define LO_unused (LO_next_interupt + 4)
+#define LO_gteBusyCycle (LO_unused + 4)
#define LO_muldivBusyCycle (LO_gteBusyCycle + 4)
#define LO_psxRegs_subCycle (LO_muldivBusyCycle + 4)
#define LO_psxRegs_biuReg (LO_psxRegs_subCycle + 4*2)
-#define LO_psxRegs_reserved (LO_psxRegs_biuReg + 4)
-#define LO_psxRegs_end (LO_psxRegs_reserved + 4*7)
+#define LO_stop (LO_psxRegs_biuReg + 4)
+#define LO_psxRegs_end (LO_stop + 4*9)
#define LO_rcnts (LO_psxRegs_end)
#define LO_rcnts_end (LO_rcnts + 7*4*4)
#define LO_inv_code_start (LO_rcnts_end)
#define LO_psxH_ptr (LO_mem_wtab + PTRSZ)
#define LO_zeromem_ptr (LO_psxH_ptr + PTRSZ)
#define LO_invc_ptr (LO_zeromem_ptr + PTRSZ)
-#define LO_scratch_buf_ptr (LO_invc_ptr + PTRSZ)
+#define LO_scratch_buf_ptr (LO_invc_ptr + PTRSZ) // for gte_neon.S
#define LO_saved_lr (LO_scratch_buf_ptr + PTRSZ)
#define LO_ram_offset (LO_saved_lr + PTRSZ)
-#define LO_mini_ht (LO_ram_offset + PTRSZ)
+#define LO_hash_table_ptr (LO_ram_offset + PTRSZ)
+#define LO_unused2 (LO_hash_table_ptr + PTRSZ)
+#define LO_mini_ht (LO_unused2 + PTRSZ)
#define LO_dynarec_local_size (LO_mini_ht + PTRSZ*32*2)
#define LO_cop2_to_scratch_buf (LO_scratch_buf_ptr - LO_reg_cop2d)
//#define DISASM
//#define ASSEM_PRINT
+//#define ASSEM_PRINT_ADDRS
//#define REGMAP_PRINT // with DISASM only
//#define INV_DEBUG_W
//#define STAT_PRINT
#else
#define assem_debug(...)
#endif
+#ifdef ASSEM_PRINT_ADDRS
+#define log_addr(a) (a)
+#else
+// for diff-able output
+#define log_addr(a) ((u_long)(a) <= 1024u ? (void *)(a) : (void *)0xadd0l)
+#endif
//#define inv_debug printf
#define inv_debug(...)
+#define SysPrintf_lim(...) do { \
+ if (err_print_count++ < 64u) \
+ SysPrintf(__VA_ARGS__); \
+} while (0)
+
+// from linkage_*
+extern int cycle_count; // ... until end of the timeslice, counts -N -> 0 (CCREG)
+extern int last_count; // last absolute target, often = next_interupt
+
+extern int reg_cop2d[], reg_cop2c[];
+
+extern void *hash_table_ptr;
+extern uintptr_t ram_offset;
+extern uintptr_t mini_ht[32][2];
+
#ifdef __i386__
#include "assem_x86.h"
#endif
#endif
#define RAM_SIZE 0x200000
-#define MAXBLOCK 2048
#define MAX_OUTPUT_BLOCK_SIZE 262144
#define EXPIRITY_OFFSET (MAX_OUTPUT_BLOCK_SIZE * 2)
#define PAGE_COUNT 1024
#define TC_REDUCE_BYTES 0
#endif
+struct ndrc_globals ndrc_g;
+
struct ndrc_tramp
{
struct tramp_insns ops[2048 / sizeof(struct tramp_insns)];
u_char is_delay_load:1; // is_load + MFC/CFC
u_char is_exception:1; // unconditional, also interp. fallback
u_char may_except:1; // might generate an exception
- u_char ls_type:2; // load/store type (ls_width_type)
+ u_char ls_type:2; // load/store type (ls_width_type LS_*)
} dops[MAXBLOCK];
enum ls_width_type {
static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs
static uint64_t gte_rt[MAXBLOCK];
static uint64_t gte_unneeded[MAXBLOCK];
- static u_int smrv[32]; // speculated MIPS register values
+ unsigned int ndrc_smrv_regs[32]; // speculated MIPS register values
static u_int smrv_strong; // mask or regs that are likely to have correct values
static u_int smrv_weak; // same, but somewhat less likely
static u_int smrv_strong_next; // same, but after current insn executes
static void *copy;
static u_int expirep;
static u_int stop_after_jal;
+ static u_int ni_count;
+ static u_int err_print_count;
static u_int f1_hack;
+ static u_int vsync_hack;
#ifdef STAT_PRINT
static int stat_bc_direct;
static int stat_bc_pre;
#define stat_clear(s)
#endif
- int new_dynarec_hacks;
- int new_dynarec_hacks_pergame;
- int new_dynarec_hacks_old;
- int new_dynarec_did_compile;
-
- #define HACK_ENABLED(x) ((new_dynarec_hacks | new_dynarec_hacks_pergame) & (x))
-
- extern int cycle_count; // ... until end of the timeslice, counts -N -> 0 (CCREG)
- extern int last_count; // last absolute target, often = next_interupt
- extern int pcaddr;
- extern int pending_exception;
- extern int branch_target;
- extern uintptr_t ram_offset;
- extern uintptr_t mini_ht[32][2];
+ #define HACK_ENABLED(x) ((ndrc_g.hacks | ndrc_g.hacks_pergame) & (x))
/* registers that may be allocated */
/* 1-31 gpr */
void jump_addrerror (u_int cause, u_int addr, u_int pc);
void jump_addrerror_ds(u_int cause, u_int addr, u_int pc);
void jump_to_new_pc();
-void call_gteStall();
void new_dyna_leave();
-void *ndrc_get_addr_ht_param(u_int vaddr, int can_compile);
-void *ndrc_get_addr_ht(u_int vaddr);
-void ndrc_add_jump_out(u_int vaddr, void *src);
+void *ndrc_get_addr_ht(u_int vaddr, struct ht_entry *ht);
void ndrc_write_invalidate_one(u_int addr);
static void ndrc_write_invalidate_many(u_int addr, u_int end);
#endif
}
-static void start_tcache_write(void *start, void *end)
-{
- mprotect_w_x(start, end, 0);
-}
-
-static void end_tcache_write(void *start, void *end)
+void new_dyna_clear_cache(void *start, void *end)
{
#if defined(__arm__) || defined(__aarch64__)
size_t len = (char *)end - (char *)start;
#elif defined(VITA)
sceKernelSyncVMDomain(sceBlock, start, len);
#elif defined(_3DS)
- ctr_flush_invalidate_cache();
+ // tuned for old3ds' 16k:16k cache (in it's mostly clean state...)
+ if ((char *)end - (char *)start <= 2*1024)
+ ctr_clear_cache_range(start, end);
+ else
+ ctr_clear_cache();
#elif defined(HAVE_LIBNX)
if (g_jit.type == JitType_CodeMemory) {
armDCacheClean(start, len);
__asm__ volatile("isb" ::: "memory");
}
#elif defined(__aarch64__)
- // as of 2021, __clear_cache() is still broken on arm64
- // so here is a custom one :(
+ // __clear_cache() doesn't handle differing cacheline sizes on big.LITTLE and
+ // leaves it to the kernel to virtualize ctr_el0, which some old kernels don't do
clear_cache_arm64(start, end);
#else
__clear_cache(start, end);
#endif
(void)len;
#endif
+}
+
+static void start_tcache_write(void *start, void *end)
+{
+ mprotect_w_x(start, end, 0);
+}
+
+static void end_tcache_write(void *start, void *end)
+{
+#ifdef NDRC_THREAD
+ if (!ndrc_g.thread.dirty_start || (size_t)ndrc_g.thread.dirty_start > (size_t)start)
+ ndrc_g.thread.dirty_start = start;
+ if ((size_t)ndrc_g.thread.dirty_end < (size_t)end)
+ ndrc_g.thread.dirty_end = end;
+#endif
+ new_dyna_clear_cache(start, end);
mprotect_w_x(start, end, 1);
}
#define NO_CYCLE_PENALTY_THR 12
-int cycle_multiplier_old;
static int cycle_multiplier_active;
static int CLOCK_ADJUST(int x)
return page;
}
+static struct ht_entry *hash_table_get_p(struct ht_entry *ht, u_int vaddr)
+{
+ return &ht[((vaddr >> 16) ^ vaddr) & 0xFFFF];
+}
+
static struct ht_entry *hash_table_get(u_int vaddr)
{
- return &hash_table[((vaddr>>16)^vaddr)&0xFFFF];
+ return hash_table_get_p(hash_table, vaddr);
}
#define HASH_TABLE_BAD 0xbac
return diff > EXPIRITY_OFFSET + MAX_OUTPUT_BLOCK_SIZE;
}
-static unused void check_for_block_changes(u_int start, u_int end)
+static attr_unused void check_for_block_changes(u_int start, u_int end)
{
u_int start_page = get_page_prev(start);
u_int end_page = get_page(end - 1);
// Get address from virtual address
// This is called from the recompiled JR/JALR instructions
-static void noinline *get_addr(u_int vaddr, int can_compile)
+static void noinline *get_addr(struct ht_entry *ht, const u_int vaddr,
+ enum ndrc_compile_mode compile_mode)
{
u_int start_page = get_page_prev(vaddr);
u_int i, page, end_page = get_page(vaddr);
if (found_clean)
return found_clean;
- if (!can_compile)
+ if (compile_mode == ndrc_cm_no_compile)
return NULL;
+#ifdef NDRC_THREAD
+ if (ndrc_g.thread.handle && compile_mode == ndrc_cm_compile_live) {
+ psxRegs.pc = vaddr;
+ return new_dyna_leave;
+ }
+ if (!ndrc_g.thread.handle)
+#endif
+ memcpy(ndrc_smrv_regs, psxRegs.GPR.r, sizeof(ndrc_smrv_regs));
int r = new_recompile_block(vaddr);
if (likely(r == 0))
- return ndrc_get_addr_ht(vaddr);
+ return ndrc_get_addr_ht(vaddr, ht);
- return ndrc_get_addr_ht(generate_exception(vaddr));
+ if (compile_mode == ndrc_cm_compile_live)
+ return ndrc_get_addr_ht(generate_exception(vaddr), ht);
+
+ return NULL;
}
// Look up address in hash table first
-void *ndrc_get_addr_ht_param(u_int vaddr, int can_compile)
+void *ndrc_get_addr_ht_param(struct ht_entry *ht, unsigned int vaddr,
+ enum ndrc_compile_mode compile_mode)
{
//check_for_block_changes(vaddr, vaddr + MAXBLOCK);
- const struct ht_entry *ht_bin = hash_table_get(vaddr);
+ const struct ht_entry *ht_bin = hash_table_get_p(ht, vaddr);
u_int vaddr_a = vaddr & ~3;
stat_inc(stat_ht_lookups);
if (ht_bin->vaddr[0] == vaddr_a) return ht_bin->tcaddr[0];
if (ht_bin->vaddr[1] == vaddr_a) return ht_bin->tcaddr[1];
- return get_addr(vaddr, can_compile);
+ return get_addr(ht, vaddr, compile_mode);
}
-void *ndrc_get_addr_ht(u_int vaddr)
+// "usual" addr lookup for indirect branches, etc
+// to be used by currently running code only
+void *ndrc_get_addr_ht(u_int vaddr, struct ht_entry *ht)
{
- return ndrc_get_addr_ht_param(vaddr, 1);
+ return ndrc_get_addr_ht_param(ht, vaddr, ndrc_cm_compile_live);
}
static void clear_all_regs(signed char regmap[])
FUNCNAME(cc_interrupt),
FUNCNAME(gen_interupt),
FUNCNAME(ndrc_get_addr_ht),
+ FUNCNAME(ndrc_get_addr_ht_param),
FUNCNAME(jump_handler_read8),
FUNCNAME(jump_handler_read16),
FUNCNAME(jump_handler_read32),
FUNCNAME(jump_overflow_ds),
FUNCNAME(jump_addrerror),
FUNCNAME(jump_addrerror_ds),
- FUNCNAME(call_gteStall),
FUNCNAME(new_dyna_leave),
FUNCNAME(pcsx_mtc0),
FUNCNAME(pcsx_mtc0_ds),
ofscase(next_interupt);
ofscase(cycle_count);
ofscase(last_count);
- ofscase(pending_exception);
ofscase(stop);
ofscase(address);
ofscase(lo);
ofscase(psxH_ptr);
ofscase(invc_ptr);
ofscase(ram_offset);
+ ofscase(hash_table_ptr);
#undef ofscase
}
buf[0] = 0;
invalidate_range(start, end, NULL, NULL);
}
+// check if the range may need invalidation (must be thread-safe)
+int new_dynarec_quick_check_range(unsigned int start, unsigned int end)
+{
+ u_int start_page = get_page_prev(start);
+ u_int end_page = get_page(end - 1);
+ u_int page;
+
+ if (inv_code_start <= start && end <= inv_code_end)
+ return 0;
+ for (page = start_page; page <= end_page; page++) {
+ if (blocks[page]) {
+ //SysPrintf("quick hit %x-%x\n", start, end);
+ return 1;
+ }
+ }
+ return 0;
+}
+
static void ndrc_write_invalidate_many(u_int start, u_int end)
{
// this check is done by the caller
}
// Add an entry to jump_out after making a link
-// src should point to code by emit_extjump()
-void ndrc_add_jump_out(u_int vaddr, void *src)
+// stub should point to stub code by emit_extjump()
+static void ndrc_add_jump_out(u_int vaddr, void *stub)
{
- inv_debug("ndrc_add_jump_out: %p -> %x\n", src, vaddr);
+ inv_debug("ndrc_add_jump_out: %p -> %x\n", stub, vaddr);
u_int page = get_page(vaddr);
struct jump_info *ji;
stat_inc(stat_links);
- check_extjump2(src);
+ check_extjump2(stub);
ji = jumps[page];
if (ji == NULL) {
ji = malloc(sizeof(*ji) + sizeof(ji->e[0]) * 16);
}
jumps[page] = ji;
ji->e[ji->count].target_vaddr = vaddr;
- ji->e[ji->count].stub = src;
+ ji->e[ji->count].stub = stub;
ji->count++;
}
+void ndrc_patch_link(u_int vaddr, void *insn, void *stub, void *target)
+{
+ void *insn_end = (char *)insn + 4;
+
+ //start_tcache_write(insn, insn_end);
+ mprotect_w_x(insn, insn_end, 0);
+
+ assert(target != stub);
+ set_jump_target_far1(insn, target);
+ ndrc_add_jump_out(vaddr, stub);
+
+#if defined(__aarch64__) || defined(NO_WRITE_EXEC)
+ // arm64: no syscall concerns, dyna_linker lacks stale detection
+ // w^x: have to do costly permission switching anyway
+ new_dyna_clear_cache(NDRC_WRITE_OFFSET(insn), NDRC_WRITE_OFFSET(insn_end));
+#endif
+ //end_tcache_write(insn, insn_end);
+ mprotect_w_x(insn, insn_end, 1);
+}
+
/* Register allocation */
static void alloc_set(struct regstat *cur, int reg, int hr)
assert(addr >= 0);
*offset_reg = -1;
if(((smrv_strong|smrv_weak)>>mr)&1) {
- type=get_ptr_mem_type(smrv[mr]);
- //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
+ type=get_ptr_mem_type(ndrc_smrv_regs[mr]);
+ //printf("set %08x @%08x r%d %d\n", ndrc_smrv_regs[mr], start+i*4, mr, type);
}
else {
// use the mirror we are running on
emit_cmpmem_indexedsr12_imm(invalid_code, addr, 1);
#error not handled
#endif
+ (void)count;
#ifdef INVALIDATE_USE_COND_CALL
if (count == 1) {
emit_cmpimm(HOST_TEMPREG, 1);
// not looking back as that should be in mips cache already
// (see Spyro2 title->attract mode)
if (start + i*4 < addr_const && addr_const < start + slen*4) {
- SysPrintf("write to %08x hits block %08x, pc=%08x\n", addr_const, start, start+i*4);
+ SysPrintf_lim("write to %08x hits block %08x, pc=%08x\n",
+ addr_const, start, start+i*4);
assert(i_regs->regmap==regs[i].regmap); // not delay slot
if(i_regs->regmap==regs[i].regmap) {
load_all_consts(regs[i].regmap_entry,regs[i].wasdirty,i);
wb_dirtys(regs[i].regmap_entry,regs[i].wasdirty);
- emit_movimm(start+i*4+4,0);
- emit_writeword(0,&pcaddr);
- emit_addimm(HOST_CCREG,2,HOST_CCREG);
+ emit_readptr(&hash_table_ptr, 1);
+ emit_movimm(start+i*4+4, 0);
+ emit_writeword(0, &psxRegs.pc);
+ emit_addimm(HOST_CCREG, 2, HOST_CCREG);
emit_far_call(ndrc_get_addr_ht);
emit_jmpreg(0);
}
signed char t=get_reg_w(i_regs->regmap, dops[i].rt1);
u_int copr=(source[i]>>11)&0x1f;
if(t>=0&&dops[i].rt1!=0) {
- emit_readword(®_cop0[copr],t);
+ emit_readword(&psxRegs.CP0.r[copr],t);
}
}
else if(dops[i].opcode2==4) // MTC0
emit_writeword(HOST_CCREG,&last_count);
emit_movimm(0,HOST_CCREG);
emit_storereg(CCREG,HOST_CCREG);
- emit_loadreg(dops[i].rs1,1);
- emit_movimm(copr,0);
+ emit_loadreg(dops[i].rs1, 2);
+ emit_movimm(copr, 1);
+ emit_addimm_ptr(FP, (u_char *)&psxRegs - (u_char *)&dynarec_local, 0);
emit_far_call(pcsx_mtc0_ds);
emit_loadreg(dops[i].rs1,s);
return;
}
emit_movimm(start+i*4+4,HOST_TEMPREG);
- emit_writeword(HOST_TEMPREG,&pcaddr);
- emit_movimm(0,HOST_TEMPREG);
- emit_writeword(HOST_TEMPREG,&pending_exception);
+ emit_writeword(HOST_TEMPREG,&psxRegs.pc);
}
- if( s != 1)
- emit_mov(s, 1);
- emit_movimm(copr, 0);
+ if (s != 2)
+ emit_mov(s, 2);
+ emit_movimm(copr, 1);
+ emit_addimm_ptr(FP, (u_char *)&psxRegs - (u_char *)&dynarec_local, 0);
emit_far_call(pcsx_mtc0);
if (copr == 12 || copr == 13) {
emit_readword(&psxRegs.cycle,HOST_CCREG);
emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
//emit_writeword(HOST_TEMPREG,&last_count);
assert(!is_delayslot);
- emit_readword(&pending_exception,HOST_TEMPREG);
- emit_test(HOST_TEMPREG,HOST_TEMPREG);
+ emit_readword(&psxRegs.pc, 0);
+ emit_movimm(start+i*4+4, HOST_TEMPREG);
+ emit_cmp(HOST_TEMPREG, 0);
void *jaddr = out;
emit_jeq(0);
- emit_readword(&pcaddr, 0);
+ emit_readptr(&hash_table_ptr, 1);
emit_far_call(ndrc_get_addr_ht);
emit_jmpreg(0);
set_jump_target(jaddr, out);
static int cop2_is_stalling_op(int i, int *cycles)
{
- if (dops[i].opcode == 0x3a) { // SWC2
- *cycles = 0;
- return 1;
- }
- if (dops[i].itype == COP2 && (dops[i].opcode2 == 0 || dops[i].opcode2 == 2)) { // MFC2/CFC2
+ if (dops[i].itype == COP2 || dops[i].itype == C2LS) {
*cycles = 0;
return 1;
}
static void cop2_do_stall_check(u_int op, int i, const struct regstat *i_regs, u_int reglist)
{
- int j = i, other_gte_op_cycles = -1, stall = -MAXBLOCK, cycles_passed;
+ int j = i, cycles, other_gte_op_cycles = -1, stall = -MAXBLOCK, cycles_passed;
int rtmp = reglist_find_free(reglist);
if (HACK_ENABLED(NDHACK_NO_STALLS))
if (other_gte_op_cycles >= 0)
stall = other_gte_op_cycles - cycles_passed;
else if (cycles_passed >= 44)
- stall = 0; // can't stall
+ stall = 0; // can't possibly stall
if (stall == -MAXBLOCK && rtmp >= 0) {
// unknown stall, do the expensive runtime check
assem_debug("; cop2_do_stall_check\n");
-#if 0 // too slow
- save_regs(reglist);
- emit_movimm(gte_cycletab[op], 0);
- emit_addimm(HOST_CCREG, cinfo[i].ccadj, 1);
- emit_far_call(call_gteStall);
- restore_regs(reglist);
-#else
+ // busy - (cc + adj) -> busy - adj - cc
host_tempreg_acquire();
emit_readword(&psxRegs.gteBusyCycle, rtmp);
emit_addimm(rtmp, -cinfo[i].ccadj, rtmp);
emit_cmovb_reg(rtmp, HOST_CCREG);
//emit_log_gte_stall(i, 0, reglist);
host_tempreg_release();
-#endif
}
else if (stall > 0) {
//emit_log_gte_stall(i, stall, reglist);
}
// save gteBusyCycle, if needed
- if (gte_cycletab[op] == 0)
+ cycles = gte_cycletab[op];
+ if (cycles == 0)
return;
other_gte_op_cycles = -1;
for (j = i + 1; j < slen; j++) {
// will handle stall when assembling that op
return;
cycles_passed = cinfo[min(j, slen -1)].ccadj - cinfo[i].ccadj;
- if (cycles_passed >= 44)
+ if (cycles_passed >= cycles)
return;
assem_debug("; save gteBusyCycle\n");
host_tempreg_acquire();
-#if 0
- emit_readword(&last_count, HOST_TEMPREG);
- emit_add(HOST_TEMPREG, HOST_CCREG, HOST_TEMPREG);
- emit_addimm(HOST_TEMPREG, cinfo[i].ccadj, HOST_TEMPREG);
- emit_addimm(HOST_TEMPREG, gte_cycletab[op]), HOST_TEMPREG);
- emit_writeword(HOST_TEMPREG, &psxRegs.gteBusyCycle);
-#else
- emit_addimm(HOST_CCREG, cinfo[i].ccadj + gte_cycletab[op], HOST_TEMPREG);
+ emit_addimm(HOST_CCREG, cinfo[i].ccadj + cycles, HOST_TEMPREG);
emit_writeword(HOST_TEMPREG, &psxRegs.gteBusyCycle);
-#endif
host_tempreg_release();
}
static void speculate_mov(int rs,int rt)
{
- if(rt!=0) {
- smrv_strong_next|=1<<rt;
- smrv[rt]=smrv[rs];
+ if (rt != 0) {
+ smrv_strong_next |= 1 << rt;
+ ndrc_smrv_regs[rt] = ndrc_smrv_regs[rs];
}
}
static void speculate_mov_weak(int rs,int rt)
{
- if(rt!=0) {
- smrv_weak_next|=1<<rt;
- smrv[rt]=smrv[rs];
+ if (rt != 0) {
+ smrv_weak_next |= 1 << rt;
+ ndrc_smrv_regs[rt] = ndrc_smrv_regs[rs];
}
}
static void speculate_register_values(int i)
{
if(i==0) {
- memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
// gp,sp are likely to stay the same throughout the block
smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
smrv_weak_next=~smrv_strong_next;
- //printf(" llr %08x\n", smrv[4]);
+ //printf(" llr %08x\n", ndrc_smrv_regs[4]);
}
smrv_strong=smrv_strong_next;
smrv_weak=smrv_weak_next;
u_int value;
if(hr>=0) {
if(get_final_value(hr,i,&value))
- smrv[dops[i].rt1]=value;
- else smrv[dops[i].rt1]=constmap[i][hr];
+ ndrc_smrv_regs[dops[i].rt1]=value;
+ else ndrc_smrv_regs[dops[i].rt1]=constmap[i][hr];
smrv_strong_next|=1<<dops[i].rt1;
}
}
}
break;
case LOAD:
- if(start<0x2000&&(dops[i].rt1==26||(smrv[dops[i].rt1]>>24)==0xa0)) {
+ if(start<0x2000&&(dops[i].rt1==26||(ndrc_smrv_regs[dops[i].rt1]>>24)==0xa0)) {
// special case for BIOS
- smrv[dops[i].rt1]=0xa0000000;
+ ndrc_smrv_regs[dops[i].rt1]=0xa0000000;
smrv_strong_next|=1<<dops[i].rt1;
break;
}
}
#if 0
int r=4;
- printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
+ printf("x %08x %08x %d %d c %08x %08x\n",ndrc_smrv_regs[r],start+i*4,
((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
#endif
}
emit_storereg(dops[i].rt1, 0);
}
emit_movimm(start+i*4,0);
- emit_writeword(0,&pcaddr);
+ emit_writeword(0,&psxRegs.pc);
int cc = get_reg(regs[i].regmap_entry, CCREG);
if (cc < 0)
emit_loadreg(CCREG, cc = 0);
literal_pool(256);
assem_debug("do_ccstub %x\n",start+(u_int)stubs[n].b*4);
set_jump_target(stubs[n].addr, out);
- int i=stubs[n].b;
+ int i = stubs[n].b;
+ int r_pc = -1;
if (stubs[n].d != TAKEN) {
wb_dirtys(branch_regs[i].regmap,branch_regs[i].dirty);
}
if(stubs[n].c!=-1)
{
// Save PC as return address
- emit_movimm(stubs[n].c,0);
- emit_writeword(0,&pcaddr);
+ emit_movimm(stubs[n].c, (r_pc = 0));
}
else
{
else
emit_movimm((dops[i].opcode2 & 1) ? cinfo[i].ba : start + i*4 + 8, addr);
}
- emit_writeword(addr, &pcaddr);
+ r_pc = addr;
}
else
if(dops[i].itype==RJUMP)
{
- int r=get_reg(branch_regs[i].regmap,dops[i].rs1);
+ r_pc = get_reg(branch_regs[i].regmap, dops[i].rs1);
if (ds_writes_rjump_rs(i)) {
- r=get_reg(branch_regs[i].regmap,RTEMP);
+ r_pc = get_reg(branch_regs[i].regmap, RTEMP);
}
- emit_writeword(r,&pcaddr);
}
else {SysPrintf("Unknown branch type in do_ccstub\n");abort();}
}
+ emit_writeword(r_pc, &psxRegs.pc);
// Update cycle count
assert(branch_regs[i].regmap[HOST_CCREG]==CCREG||branch_regs[i].regmap[HOST_CCREG]==-1);
if(stubs[n].a) emit_addimm(HOST_CCREG,(int)stubs[n].a,HOST_CCREG);
load_needed_regs(branch_regs[i].regmap,regs[(cinfo[i].ba-start)>>2].regmap_entry);
else if(dops[i].itype==RJUMP) {
if(get_reg(branch_regs[i].regmap,RTEMP)>=0)
- emit_readword(&pcaddr,get_reg(branch_regs[i].regmap,RTEMP));
+ emit_readword(&psxRegs.pc,get_reg(branch_regs[i].regmap,RTEMP));
else
emit_loadreg(dops[i].rs1,get_reg(branch_regs[i].regmap,dops[i].rs1));
}
#endif
}
+static void vsync_hack_assemble(int i, int ld_ofs, int cc)
+{
+ int sp = get_reg(branch_regs[i].regmap, 29);
+ int ro = get_reg(branch_regs[i].regmap, ROREG);
+ int cycles = CLOCK_ADJUST(9+5) * 16;
+ void *t_exit[3], *loop_target, *t_loop_break;
+ int j;
+ if (sp < 0 || (ram_offset && ro < 0))
+ return;
+ assem_debug("; vsync hack\n");
+ host_tempreg_acquire();
+ emit_cmpimm(cc, -cycles);
+ t_exit[0] = out;
+ emit_jge(0);
+ emit_cmpimm(sp, RAM_SIZE);
+ t_exit[1] = out;
+ emit_jno(0);
+ if (ro >= 0) {
+ emit_addimm(sp, ld_ofs, HOST_TEMPREG);
+ emit_ldr_dualindexed(ro, HOST_TEMPREG, HOST_TEMPREG);
+ }
+ else
+ emit_readword_indexed(ld_ofs, sp, HOST_TEMPREG);
+ emit_cmpimm(HOST_TEMPREG, 17);
+ t_exit[2] = out;
+ emit_jl(0);
+
+ assem_debug("1:\n");
+ loop_target = out;
+ emit_addimm(HOST_TEMPREG, -16, HOST_TEMPREG);
+ emit_addimm(cc, cycles, cc);
+ emit_cmpimm(HOST_TEMPREG, 17);
+ t_loop_break = out;
+ emit_jl(DJT_2);
+ emit_cmpimm(cc, -cycles);
+ emit_jl(loop_target);
+
+ assem_debug("2:\n");
+ set_jump_target(t_loop_break, out);
+ do_store_word(sp, ld_ofs, HOST_TEMPREG, ro, 1);
+
+ for (j = 0; j < ARRAY_SIZE(t_exit); j++)
+ set_jump_target(t_exit[j], out);
+ host_tempreg_release();
+}
+
static void cjump_assemble(int i, const struct regstat *i_regs)
{
const signed char *i_regmap = i_regs->regmap;
int internal=internal_branch(cinfo[i].ba);
if(i==(cinfo[i].ba-start)>>2) assem_debug("idle loop\n");
if(!match) invert=1;
+ if (vsync_hack && (vsync_hack >> 16) == i) invert=1;
#ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
if(i>(cinfo[i].ba-start)>>2) invert=1;
#endif
}
if(invert) {
if(taken) set_jump_target(taken, out);
+ if (vsync_hack && (vsync_hack >> 16) == i)
+ vsync_hack_assemble(i, vsync_hack & 0xffff, cc);
#ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
if (match && (!internal || !dops[(cinfo[i].ba-start)>>2].is_ds)) {
if(adj) {
SysPrintf("(%p) testing if we can run recompiled code @%p...\n",
new_dynarec_test, out);
- ((volatile u_int *)NDRC_WRITE_OFFSET(out))[0]++; // make the cache dirty
for (i = 0; i < ARRAY_SIZE(ret); i++) {
out = ndrc->translation_cache;
beginning = start_block();
+ ((volatile u_int *)NDRC_WRITE_OFFSET(out))[0]++; // make the cache dirty
emit_movimm(DRC_TEST_VAL + i, 0); // test
emit_ret();
literal_pool(0);
mini_ht_clear();
copy=shadow;
expirep = EXPIRITY_OFFSET;
- pending_exception=0;
literalcount=0;
stop_after_jal=0;
+ ni_count=0;
+ err_print_count=0;
inv_code_start=inv_code_end=~0;
hack_addr=0;
f1_hack=0;
stat_clear(stat_blocks);
stat_clear(stat_links);
- if (cycle_multiplier_old != Config.cycle_multiplier
- || new_dynarec_hacks_old != new_dynarec_hacks)
+ if (ndrc_g.cycle_multiplier_old != Config.cycle_multiplier
+ || ndrc_g.hacks_old != (ndrc_g.hacks | ndrc_g.hacks_pergame))
{
SysPrintf("ndrc config: mul=%d, ha=%x, pex=%d\n",
- get_cycle_multiplier(), new_dynarec_hacks, Config.PreciseExceptions);
+ get_cycle_multiplier(), ndrc_g.hacks, Config.PreciseExceptions);
}
- cycle_multiplier_old = Config.cycle_multiplier;
- new_dynarec_hacks_old = new_dynarec_hacks;
+ ndrc_g.cycle_multiplier_old = Config.cycle_multiplier;
+ ndrc_g.hacks_old = ndrc_g.hacks | ndrc_g.hacks_pergame;
}
static int pgsize(void)
{
+ long ret = -1;
#ifdef _SC_PAGESIZE
- return sysconf(_SC_PAGESIZE);
-#else
- return 4096;
+ ret = sysconf(_SC_PAGESIZE);
#endif
+ if (ret < 1)
+ ret = 4096;
+ return ret;
}
void new_dynarec_init(void)
SysPrintf("Init new dynarec, ndrc size %x, pgsize %d\n",
(int)sizeof(*ndrc), align + 1);
-#ifdef _3DS
- check_rosalina();
-#endif
#ifdef BASE_ADDR_DYNAMIC
#ifdef VITA
sceBlock = getVMBlock(); //sceKernelAllocMemBlockForVM("code", sizeof(*ndrc));
if (sceBlock <= 0)
- SysPrintf("sceKernelAllocMemBlockForVM failed: %x\n", sceBlock);
+ SysPrintf("getVMBlock failed: %x\n", sceBlock);
int ret = sceKernelGetMemBlockBase(sceBlock, (void **)&ndrc);
- if (ret < 0)
- SysPrintf("sceKernelGetMemBlockBase failed: %x\n", ret);
- sceKernelOpenVMDomain();
- sceClibPrintf("translation_cache = 0x%08lx\n ", (long)ndrc->translation_cache);
+ if (ret)
+ SysPrintf("sceKernelGetMemBlockBase: %x\n", ret);
+ ret = sceKernelOpenVMDomain();
+ if (ret)
+ SysPrintf("sceKernelOpenVMDomain: %x\n", ret);
#elif defined(_MSC_VER)
ndrc = VirtualAlloc(NULL, sizeof(*ndrc), MEM_COMMIT | MEM_RESERVE,
PAGE_EXECUTE_READWRITE);
void *mw = mmap(NULL, sizeof(*ndrc), PROT_READ | PROT_WRITE,
(flags = MAP_SHARED), fd, 0);
assert(mw != MAP_FAILED);
+ #endif
+ #if defined(NO_WRITE_EXEC) || defined(TC_WRITE_OFFSET)
prot = PROT_READ | PROT_EXEC;
#endif
ndrc = mmap((void *)desired_addr, sizeof(*ndrc), prot, flags, fd, 0);
#endif
#endif
#else
- #ifndef NO_WRITE_EXEC
ndrc = (struct ndrc_mem *)((size_t)(ndrc_bss + align) & ~align);
+ #ifndef NO_WRITE_EXEC
// not all systems allow execute in data segment by default
// size must be 4K aligned for 3DS?
if (mprotect(ndrc, sizeof(*ndrc),
PROT_READ | PROT_WRITE | PROT_EXEC) != 0)
SysPrintf("mprotect(%p) failed: %s\n", ndrc, strerror(errno));
#endif
+ #ifdef TC_WRITE_OFFSET
+ #error "misconfiguration detected"
+ #endif
#endif
out = ndrc->translation_cache;
new_dynarec_clear_full();
+ hash_table_ptr = hash_table;
#ifdef HOST_IMM8
// Copy this into local area so we don't have to put it in every literal pool
invc_ptr=invalid_code;
arch_init();
new_dynarec_test();
ram_offset = (uintptr_t)psxM - 0x80000000;
- if (ram_offset!=0)
- SysPrintf("warning: RAM is not directly mapped, performance will suffer\n");
+ if (ram_offset != 0)
+ SysPrintf("RAM is not directly mapped\n");
SysPrintf("Mapped (RAM/scrp/ROM/LUTs/TC):\n");
SysPrintf("%p/%p/%p/%p/%p\n", psxM, psxH, psxR, mem_rtab, out);
}
static u_int *get_source_start(u_int addr, u_int *limit)
{
- if (addr < 0x00800000
- || (0x80000000 <= addr && addr < 0x80800000)
- || (0xa0000000 <= addr && addr < 0xa0800000))
+ if (addr < 0x00800000u
+ || (0x80000000u <= addr && addr < 0x80800000u)
+ || (0xa0000000u <= addr && addr < 0xa0800000u))
{
// used for BIOS calls mostly?
*limit = (addr & 0xa0600000) + 0x00200000;
return (u_int *)(psxM + (addr & 0x1fffff));
}
else if (
- /* (0x9fc00000 <= addr && addr < 0x9fc80000) ||*/
- (0xbfc00000 <= addr && addr < 0xbfc80000))
+ (0x9fc00000u <= addr && addr < 0x9fc80000u) ||
+ (0xbfc00000u <= addr && addr < 0xbfc80000u))
{
- // BIOS. The multiplier should be much higher as it's uncached 8bit mem,
- // but timings in PCSX are too tied to the interpreter's 2-per-insn assumption
- if (!HACK_ENABLED(NDHACK_OVERRIDE_CYCLE_M))
- cycle_multiplier_active = 200;
+ // BIOS. The multiplier should be much higher as it's uncached 8bit mem
+ // XXX: disabled as this introduces differences from the interpreter
+ // and lightrec multipliers making emu variations act inconsistently
+ //if (!HACK_ENABLED(NDHACK_OVERRIDE_CYCLE_M))
+ // cycle_multiplier_active = 200;
*limit = (addr & 0xfff00000) | 0x80000;
return (u_int *)((u_char *)psxR + (addr&0x7ffff));
psxRegs.GPR.r[i] = 0x1f800000;
}
- ndrc_get_addr_ht(sblocks[b].addr);
+ ndrc_get_addr_ht_param(hash_table, sblocks[b].addr, ndrc_cm_compile_offline);
for (f = sblocks[b].regflags, i = 0; f; f >>= 1, i++) {
if (f & 1)
memset(&dops[i], 0, sizeof(dops[i]));
dops[i].itype = INTCALL;
dops[i].rs1 = CCREG;
- dops[i].is_exception = 1;
+ dops[i].is_exception = dops[i].may_except = 1;
cinfo[i].ba = -1;
}
+static noinline void do_vsync(int i)
+{
+ // lui a0, x; addiu a0, x; jal puts
+ u32 addr = (cinfo[i].imm << 16) + (signed short)cinfo[i+1].imm;
+ char *str = NULL;
+ int j, t, jals_cnt = 0;
+
+ if (!is_ram_addr(addr))
+ return;
+ str = (char *)psxM + (addr & 0x1fffff);
+ if (!str || strncmp(str, "VSync: timeout", 14))
+ return;
+ // jal clearPad, jal clearRCnt; j return; nop
+ for (j = i+2; j < slen; j++) {
+ if (dops[j].itype == SHIFTIMM || dops[j].itype == IMM16 || dops[j].itype == ALU)
+ continue;
+ if (dops[j].opcode == 0x03) {
+ jals_cnt++; continue;
+ }
+ break;
+ }
+ if (j >= slen || jals_cnt != 3 || dops[j++].opcode != 0x02)
+ return;
+ for (; j < slen; j++)
+ if (dops[j].itype != SHIFTIMM && dops[j].itype != IMM16)
+ break;
+ if (j >= slen || dops[j].opcode != 0x23) // lw x, condition
+ return;
+ j += 2;
+ if (dops[j].opcode != 0 || dops[j].opcode2 != 0x2A) // slt x, y
+ return;
+ if (dops[++j].opcode != 0x05) // bnez x, loop
+ return;
+ t = (cinfo[j].ba - start) / 4;
+ if (t < 0 || t >= slen)
+ return;
+ // lw x, d(sp)
+ if (dops[t].opcode != 0x23 || dops[t].rs1 != 29 || (u32)cinfo[t].imm >= 1024)
+ return;
+ if (dops[t+2].opcode != 0x09 || cinfo[t+2].imm != -1) // addiu x, -1
+ return;
+ SysPrintf("vsync @%08x\n", start + t*4);
+ vsync_hack = (j << 16) | (cinfo[t].imm & 0xffff);
+}
+
static int apply_hacks(void)
{
int i;
+ vsync_hack = 0;
if (HACK_ENABLED(NDHACK_NO_COMPAT_HACKS))
return 0;
/* special hack(s) */
SysPrintf("PE2 hack @%08x\n", start + (i+3)*4);
dops[i + 3].itype = NOP;
}
+ // see also: psxBiosCheckExe()
+ if (i > 1 && dops[i].opcode == 0x0f && dops[i].rt1 == 4
+ && dops[i+1].opcode == 0x09 && dops[i+1].rt1 == 4 && dops[i+1].rs1 == 4
+ && dops[i+2].opcode == 0x03)
+ {
+ do_vsync(i);
+ }
+ }
+ if (source[0] == 0x3c05edb8 && source[1] == 0x34a58320)
+ {
+ // lui a1, 0xEDB8; ori a1, 0x8320
+ SysPrintf("F1 2000 hack @%08x\n", start);
+ cycle_multiplier_active = 100;
}
i = slen;
if (i > 10 && source[i-1] == 0 && source[i-2] == 0x03e00008
return 1;
}
}
+#if 0 // alt vsync, not used
if (Config.HLE)
{
if (start <= psxRegs.biosBranchCheck && psxRegs.biosBranchCheck < start + i*4)
}
}
}
+#endif
return 0;
}
break;
}
if (type == INTCALL)
- SysPrintf("NI %08x @%08x (%08x)\n", src, start + i*4, start);
+ SysPrintf_lim("NI %08x @%08x (%08x)\n", src, start + i*4, start);
dops[i].itype = type;
dops[i].opcode2 = op2;
dops[i].ls_type = ls_type;
}
}
-static noinline void pass1_disassemble(u_int pagelimit)
+static noinline void pass1a_disassemble(u_int pagelimit)
{
- int i, j, done = 0, ni_count = 0;
+ int i, j, done = 0;
int ds_next = 0;
for (i = 0; !done; i++)
// branch in delay slot?
if (dops[i].is_jump) {
// don't handle first branch and call interpreter if it's hit
- SysPrintf("branch in DS @%08x (%08x)\n", start + i*4, start);
+ SysPrintf_lim("branch in DS @%08x (%08x)\n", start + i*4, start);
force_j_to_interpreter = 1;
}
// load delay detection through a branch
if ((dop && is_ld_use_hazard(&dops[i], dop))
|| (!dop && Config.PreciseExceptions)) {
// jump target wants DS result - potential load delay effect
- SysPrintf("load delay in DS @%08x (%08x)\n", start + i*4, start);
+ SysPrintf_lim("load delay in DS @%08x (%08x)\n", start + i*4, start);
force_j_to_interpreter = 1;
if (0 <= t && t < i)
dops[t + 1].bt = 1; // expected return from interpreter
else if(i>=2&&dops[i-2].rt1==2&&dops[i].rt1==2&&dops[i].rs1!=2&&dops[i].rs2!=2&&dops[i-1].rs1!=2&&dops[i-1].rs2!=2&&
!(i>=3&&dops[i-3].is_jump)) {
// v0 overwrite like this is a sign of trouble, bail out
- SysPrintf("v0 overwrite @%08x (%08x)\n", start + i*4, start);
+ SysPrintf_lim("v0 overwrite @%08x (%08x)\n", start + i*4, start);
force_j_to_interpreter = 1;
}
}
else if (i > 0 && dops[i-1].is_delay_load
&& is_ld_use_hazard(&dops[i-1], &dops[i])
&& (i < 2 || !dops[i-2].is_ujump)) {
- SysPrintf("load delay @%08x (%08x)\n", start + i*4, start);
+ SysPrintf_lim("load delay @%08x (%08x)\n", start + i*4, start);
for (j = i - 1; j > 0 && dops[j-1].is_delay_load; j--)
if (dops[j-1].rt1 != dops[i-1].rt1)
break;
i = j; // don't compile the problematic branch/load/etc
}
if (dops[i].is_exception && i > 0 && dops[i-1].is_jump) {
- SysPrintf("exception in DS @%08x (%08x)\n", start + i*4, start);
+ SysPrintf_lim("exception in DS @%08x (%08x)\n", start + i*4, start);
i--;
force_intcall(i);
done = 2;
}
- if (i >= 2 && (source[i-2] & 0xffe0f800) == 0x40806000) // MTC0 $12
+ if (i >= 2) {
+ if ((source[i-2] & 0xffe0f800) == 0x40806000 // MTC0 $12
+ || (dops[i-2].is_jump && dops[i-2].rt1 == 31)) // call
dops[i].bt = 1;
+ }
if (i >= 1 && (source[i-1] & 0xffe0f800) == 0x40806800) // MTC0 $13
dops[i].bt = 1;
/* Is this the end of the block? */
if (i > 0 && dops[i-1].is_ujump) {
- if (dops[i-1].rt1 == 0) { // not jal
+ // Don't recompile stuff that's already compiled
+ if (check_addr(start + i*4+4)) {
+ done = 1;
+ continue;
+ }
+ // Don't get too close to the limit
+ if (i > MAXBLOCK - 64)
+ done = 2;
+ if (dops[i-1].opcode2 == 0x08 || dops[i-1].rs1 == 31) // JR; JALR x, lr
+ done = 2;
+ else if (dops[i-1].itype != RJUMP && dops[i-1].rt1 == 0) { // not JAL(R)
int found_bbranch = 0, t = (cinfo[i-1].ba - start) / 4;
if ((u_int)(t - i) < 64 && start + (t+64)*4 < pagelimit) {
// scan for a branch back to i+1
done = 2;
}
else {
- if(stop_after_jal) done=1;
- // Stop on BREAK
- if((source[i+1]&0xfc00003f)==0x0d) done=1;
+ // jal(r) - continue or perf may suffer for platforms without
+ // runtime block linking (like in crash3)
+ if (stop_after_jal)
+ done = 2;
}
- // Don't recompile stuff that's already compiled
- if(check_addr(start+i*4+4)) done=1;
- // Don't get too close to the limit
- if (i > MAXBLOCK - 64)
- done = 1;
}
if (dops[i].itype == HLECALL)
done = 1;
- else if (dops[i].itype == INTCALL)
+ else if (dops[i].itype == INTCALL) {
+ ni_count++;
done = 2;
+ }
else if (dops[i].is_exception)
- done = stop_after_jal ? 1 : 2;
+ done = 2;
if (done == 2) {
// Does the block continue due to a branch?
- for(j=i-1;j>=0;j--)
- {
- if(cinfo[j].ba==start+i*4) done=j=0; // Branch into delay slot
- if(cinfo[j].ba==start+i*4+4) done=j=0;
- if(cinfo[j].ba==start+i*4+8) done=j=0;
+ for (j = i-1; j >= 0; j--) {
+ if (cinfo[j].ba == start+i*4) done=j=0; // Branch into delay slot
+ if (cinfo[j].ba == start+i*4+4) done=j=0;
+ if (cinfo[j].ba == start+i*4+8) done=j=0;
}
}
//assert(i<MAXBLOCK-1);
assert(start+i*4<pagelimit);
if (i == MAXBLOCK - 2)
done = 1;
- // Stop if we're compiling junk
- if (dops[i].itype == INTCALL && (++ni_count > 8 || dops[i].opcode == 0x11)) {
- done=stop_after_jal=1;
- SysPrintf("Disabled speculative precompilation\n");
- }
+ }
+ if (ni_count > 32 && !stop_after_jal) {
+ stop_after_jal = 1;
+ SysPrintf("Disabled speculative precompilation\n");
}
while (i > 0 && dops[i-1].is_jump)
i--;
slen = i;
}
+static noinline void pass1b_bt(void)
+{
+ int i;
+ for (i = 0; i < slen; i++)
+ if (dops[i].is_jump && start <= cinfo[i].ba && cinfo[i].ba < start+slen*4)
+ // Internal branch, flag target
+ dops[(cinfo[i].ba - start) >> 2].bt = 1;
+}
+
// Basic liveness analysis for MIPS registers
-static noinline void pass2_unneeded_regs(int istart,int iend,int r)
+static noinline void pass2b_unneeded_regs(int istart, int iend, int r)
{
int i;
uint64_t u,gte_u,b,gte_b;
//printf("unneeded registers i=%d (%d,%d) r=%d\n",i,istart,iend,r);
if(dops[i].is_jump)
{
- // If subroutine call, flag return address as a possible branch target
- if(dops[i].rt1==31 && i<slen-2) dops[i+2].bt=1;
-
if(cinfo[i].ba<start || cinfo[i].ba>=(start+slen*4))
{
// Branch out of this block, flush all regs
}
else
{
- // Internal branch, flag target
- dops[(cinfo[i].ba-start)>>2].bt=1;
if(cinfo[i].ba<=start+i*4) {
// Backward branch
if(dops[i].is_ujump)
// Only go three levels deep. This recursion can take an
// excessive amount of time if there are a lot of nested loops.
if(r<2) {
- pass2_unneeded_regs((cinfo[i].ba-start)>>2,i-1,r+1);
+ pass2b_unneeded_regs((cinfo[i].ba-start)>>2, i-1, r+1);
}else{
unneeded_reg[(cinfo[i].ba-start)>>2]=1;
gte_unneeded[(cinfo[i].ba-start)>>2]=gte_u_unknown;
}
}
-static noinline void pass2a_unneeded_other(void)
+static noinline void pass2a_unneeded(void)
{
int i, j;
for (i = 0; i < slen; i++)
break;
}
}
+ // rm redundant stack loads (unoptimized code, assuming no io mem access through sp)
+ if (i > 0 && dops[i].is_load && dops[i].rs1 == 29 && dops[i].ls_type == LS_32
+ && dops[i-1].is_store && dops[i-1].rs1 == 29 && dops[i-1].ls_type == LS_32
+ && dops[i-1].rs2 == dops[i].rt1 && !dops[i-1].is_ds && i < slen - 1
+ && dops[i+1].rs1 != dops[i].rt1 && dops[i+1].rs2 != dops[i].rt1
+ && !dops[i].bt && cinfo[i].imm == cinfo[i-1].imm)
+ {
+ cinfo[i].imm = 0;
+ memset(&dops[i], 0, sizeof(dops[i]));
+ dops[i].itype = NOP;
+ }
}
}
if(regmap_pre[i+1][hr]!=-1 || regs[i].regmap[hr]>0)
if(regmap_pre[i+1][hr]!=regs[i].regmap[hr])
{
- SysPrintf("fail: %x (%d %d!=%d)\n",start+i*4,hr,regmap_pre[i+1][hr],regs[i].regmap[hr]);
+ SysPrintf_lim("fail: %x (%d %d!=%d)\n",
+ start+i*4, hr, regmap_pre[i+1][hr], regs[i].regmap[hr]);
assert(regmap_pre[i+1][hr]==regs[i].regmap[hr]);
}
regmap_pre[i+1][hr]=-1;
// to use, which can avoid a load-use penalty on certain CPUs.
static noinline void pass5b_preallocate2(void)
{
- int i, hr;
- for(i=0;i<slen-1;i++)
+ int i, hr, limit = min(slen - 1, MAXBLOCK - 2);
+ for (i = 0; i < limit; i++)
{
if (!i || !dops[i-1].is_jump)
{
return block;
}
-static int new_recompile_block(u_int addr)
+static int noinline new_recompile_block(u_int addr)
{
u_int pagelimit = 0;
u_int state_rflags = 0;
int i;
- assem_debug("NOTCOMPILED: addr = %x -> %p\n", addr, out);
+ assem_debug("NOTCOMPILED: addr = %x -> %p\n", addr, log_addr(out));
if (addr & 3) {
if (addr != hack_addr) {
- SysPrintf("game crash @%08x, ra=%08x\n", addr, psxRegs.GPR.n.ra);
+ SysPrintf_lim("game crash @%08x, ra=%08x\n", addr, psxRegs.GPR.n.ra);
hack_addr = addr;
}
return -1;
}
start = addr;
- new_dynarec_did_compile=1;
+ ndrc_g.did_compile++;
if (Config.HLE && start == 0x80001000) // hlecall
{
void *beginning = start_block();
emit_movimm(start,0);
- emit_writeword(0,&pcaddr);
+ emit_writeword(0,&psxRegs.pc);
emit_far_jump(new_dyna_leave);
literal_pool(0);
end_block(beginning);
emit_addimm(0, 0x18, 0);
emit_adds_ptr(1, 1, 1);
emit_ldr_dualindexed(1, 0, 0);
+ emit_readptr(&hash_table_ptr, 1);
emit_writeword(0, &psxRegs.GPR.r[26]); // lw k0, 0x18(sp)
emit_far_call(ndrc_get_addr_ht);
emit_jmpreg(0); // jr k0
source = get_source_start(start, &pagelimit);
if (source == NULL) {
if (addr != hack_addr) {
- SysPrintf("Compile at bogus memory address: %08x, ra=%x\n",
+ SysPrintf_lim("Compile at bogus memory address: %08x, ra=%x\n",
addr, psxRegs.GPR.n.ra);
hack_addr = addr;
}
/* Pass 1 disassembly */
- pass1_disassemble(pagelimit);
+ pass1a_disassemble(pagelimit);
+ pass1b_bt();
int clear_hack_addr = apply_hacks();
- /* Pass 2 - Register dependencies and branch targets */
-
- pass2_unneeded_regs(0,slen-1,0);
+ /* Pass 2 - unneeded, register dependencies */
- pass2a_unneeded_other();
+ pass2a_unneeded();
+ pass2b_unneeded_regs(0, slen-1, 0);
/* Pass 3 - Register allocation */
// for BiosBootBypass() to work
// io address var abused as a "already been here" flag
emit_readword(&address, 1);
- emit_writeword(0, &pcaddr);
+ emit_writeword(0, &psxRegs.pc);
emit_writeword(0, &address);
emit_cmp(0, 1);
}
else {
emit_readword(&psxRegs.cpuInRecursion, 1);
- emit_writeword(0, &pcaddr);
+ emit_writeword(0, &psxRegs.pc);
emit_test(1, 1);
}
#ifdef __aarch64__
/* Pass 9 - Linker */
for(i=0;i<linkcount;i++)
{
- assem_debug("%p -> %8x\n",link_addr[i].addr,link_addr[i].target);
+ assem_debug("link: %p -> %08x\n",
+ log_addr(link_addr[i].addr), link_addr[i].target);
literal_pool(64);
if (!link_addr[i].internal)
{
{
if ((i == 0 || dops[i].bt) && instr_addr[i])
{
- assem_debug("%p (%d) <- %8x\n", instr_addr[i], i, start + i*4);
+ assem_debug("%p (%d) <- %8x\n", log_addr(instr_addr[i]), i, start + i*4);
u_int vaddr = start + i*4;
literal_pool(256);
#define NEW_DYNAREC 1
-extern int pcaddr;
-extern int pending_exception;
-extern int stop;
-extern int new_dynarec_did_compile;
-
-extern int cycle_multiplier_old;
+#define MAXBLOCK 2048 // in mips instructions
#define NDHACK_NO_SMC_CHECK (1<<0)
#define NDHACK_GTE_UNNEEDED (1<<1)
#define NDHACK_OVERRIDE_CYCLE_M (1<<3)
#define NDHACK_NO_STALLS (1<<4)
#define NDHACK_NO_COMPAT_HACKS (1<<5)
-extern int new_dynarec_hacks;
-extern int new_dynarec_hacks_pergame;
-extern int new_dynarec_hacks_old;
+#define NDHACK_THREAD_FORCE (1<<6)
+#define NDHACK_THREAD_FORCE_ON (1<<7)
+
+struct ndrc_globals
+{
+ int hacks;
+ int hacks_pergame;
+ int hacks_old;
+ int did_compile;
+ int cycle_multiplier_old;
+ struct {
+ void *handle;
+ void *lock;
+ void *cond;
+ void *dirty_start;
+ void *dirty_end;
+ unsigned int busy_addr; // 0 is valid, ~0 == none
+ int exit;
+ } thread;
+};
+extern struct ndrc_globals ndrc_g;
void new_dynarec_init(void);
void new_dynarec_cleanup(void);
void new_dynarec_clear_full(void);
-void new_dyna_start(void *context);
int new_dynarec_save_blocks(void *save, int size);
void new_dynarec_load_blocks(const void *save, int size);
void new_dynarec_print_stats(void);
+int new_dynarec_quick_check_range(unsigned int start, unsigned int end);
void new_dynarec_invalidate_range(unsigned int start, unsigned int end);
void new_dynarec_invalidate_all_pages(void);
+void new_dyna_clear_cache(void *start, void *end);
+
+void new_dyna_start(void *context);
+void new_dyna_start_at(void *context, void *compiled_code);
+
+struct ht_entry;
+enum ndrc_compile_mode {
+ ndrc_cm_no_compile = 0,
+ ndrc_cm_compile_live, // from executing code, vaddr is the current pc
+ ndrc_cm_compile_offline,
+ ndrc_cm_compile_in_thread,
+};
+void *ndrc_get_addr_ht_param(struct ht_entry *ht, unsigned int vaddr,
+ enum ndrc_compile_mode compile_mode);
+
+extern unsigned int ndrc_smrv_regs[32];
#define USE_MINI_HT 1
//#define REG_PREFETCH 1
+// options:
+//#define NO_WRITE_EXEC 1
+//#define BASE_ADDR_DYNAMIC 1
+//#define TC_WRITE_OFFSET 1
+//#define NDRC_CACHE_FLUSH_ALL 1
+
#if defined(__MACH__) || defined(HAVE_LIBNX)
#define NO_WRITE_EXEC 1
#endif
diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c
-index 74f32ee3..4eec8a83 100644
+index 300a84c8..e4343533 100644
--- a/libpcsxcore/new_dynarec/new_dynarec.c
+++ b/libpcsxcore/new_dynarec/new_dynarec.c
-@@ -325,7 +325,7 @@ static struct compile_info
- int new_dynarec_hacks_old;
- int new_dynarec_did_compile;
+@@ -345,7 +345,7 @@ static struct compile_info
+ #define stat_clear(s)
+ #endif
-- #define HACK_ENABLED(x) ((new_dynarec_hacks | new_dynarec_hacks_pergame) & (x))
-+ #define HACK_ENABLED(x) ((NDHACK_NO_STALLS) & (x))
+- #define HACK_ENABLED(x) ((ndrc_g.hacks | ndrc_g.hacks_pergame) & (x))
++ #define HACK_ENABLED(x) ((NDHACK_NO_STALLS|NDHACK_NO_COMPAT_HACKS) & (x))
- extern int cycle_count; // ... until end of the timeslice, counts -N -> 0 (CCREG)
- extern int last_count; // last absolute target, often = next_interupt
-@@ -603,6 +603,7 @@ static int cycle_multiplier_active;
+ /* registers that may be allocated */
+ /* 1-31 gpr */
+@@ -626,6 +626,7 @@ static int cycle_multiplier_active;
static int CLOCK_ADJUST(int x)
{
int m = cycle_multiplier_active;
int s = (x >> 31) | 1;
return (x * m + s * 50) / 100;
-@@ -808,6 +809,9 @@ static noinline u_int generate_exception(u_int pc)
- // This is called from the recompiled JR/JALR instructions
- static void noinline *get_addr(u_int vaddr, int can_compile)
+@@ -837,6 +838,9 @@ static noinline u_int generate_exception(u_int pc)
+ static void noinline *get_addr(struct ht_entry *ht, const u_int vaddr,
+ enum ndrc_compile_mode compile_mode)
{
+#ifdef DRC_DBG
+printf("get_addr %08x, pc=%08x\n", vaddr, psxRegs.pc);
u_int start_page = get_page_prev(vaddr);
u_int i, page, end_page = get_page(vaddr);
void *found_clean = NULL;
-@@ -7213,7 +7217,7 @@ static noinline void pass2_unneeded_regs(int istart,int iend,int r)
+@@ -7421,7 +7425,7 @@ static noinline void pass2b_unneeded_regs(int istart, int iend, int r)
// R0 is always unneeded
u|=1;
// Save it
gte_unneeded[i]=gte_u;
/*
printf("ur (%d,%d) %x: ",istart,iend,start+i*4);
-@@ -8355,6 +8359,7 @@ static noinline void pass5a_preallocate1(void)
+@@ -8574,6 +8578,7 @@ static noinline void pass5a_preallocate1(void)
+ // to use, which can avoid a load-use penalty on certain CPUs.
static noinline void pass5b_preallocate2(void)
{
- int i, hr;
+ return;
- for(i=0;i<slen-1;i++)
+ int i, hr, limit = min(slen - 1, MAXBLOCK - 2);
+ for (i = 0; i < limit; i++)
{
- if (!i || !dops[i-1].is_jump)
-@@ -9380,6 +9385,10 @@ static int new_recompile_block(u_int addr)
+@@ -9602,6 +9607,10 @@ static int noinline new_recompile_block(u_int addr)
#ifdef ASSEM_PRINT
fflush(stdout);
stat_inc(stat_bc_direct);
return 0;
diff --git a/libpcsxcore/new_dynarec/pcsxmem.c b/libpcsxcore/new_dynarec/pcsxmem.c
-index f4b1d90e..d3975ceb 100644
+index 98e2c6be..edba031e 100644
--- a/libpcsxcore/new_dynarec/pcsxmem.c
+++ b/libpcsxcore/new_dynarec/pcsxmem.c
-@@ -258,6 +258,8 @@ static void write_biu(u32 value)
+@@ -238,6 +238,8 @@ static void write_biu(u32 value)
return;
}
psxRegs.biuReg = value;
}
diff --git a/libpcsxcore/psxcounters.c b/libpcsxcore/psxcounters.c
-index d0d45ec5..4ed03c40 100644
+index 064c06b6..07e2afb5 100644
--- a/libpcsxcore/psxcounters.c
+++ b/libpcsxcore/psxcounters.c
-@@ -428,9 +428,12 @@ void psxRcntUpdate()
+@@ -455,9 +455,12 @@ void psxRcntUpdate()
/******************************************************************************/
_psxRcntWcount( index, value );
psxRcntSet();
-@@ -439,6 +442,7 @@ void psxRcntWcount( u32 index, u32 value )
+@@ -466,6 +469,7 @@ void psxRcntWcount( u32 index, u32 value )
void psxRcntWmode( u32 index, u32 value )
{
verboseLog( 1, "[RCNT %i] wmode: %x\n", index, value );
_psxRcntWmode( index, value );
_psxRcntWcount( index, 0 );
-@@ -450,6 +454,7 @@ void psxRcntWmode( u32 index, u32 value )
+@@ -477,6 +481,7 @@ void psxRcntWmode( u32 index, u32 value )
void psxRcntWtarget( u32 index, u32 value )
{
verboseLog( 1, "[RCNT %i] wtarget: %x\n", index, value );
rcnts[index].target = value;
-@@ -463,6 +468,7 @@ u32 psxRcntRcount0()
+@@ -490,6 +495,7 @@ u32 psxRcntRcount0()
{
u32 index = 0;
u32 count;
if ((rcnts[0].mode & 7) == (RcSyncModeEnable | Rc01UnblankReset) ||
(rcnts[0].mode & 7) == (RcSyncModeEnable | Rc01UnblankReset2))
+diff --git a/libpcsxcore/psxevents.c b/libpcsxcore/psxevents.c
+index 1e2d01f6..0ee15974 100644
+--- a/libpcsxcore/psxevents.c
++++ b/libpcsxcore/psxevents.c
+@@ -77,11 +77,13 @@ void irq_test(psxCP0Regs *cp0)
+ }
+ }
+
+- cp0->n.Cause &= ~0x400;
++ u32 c2 = cp0->n.Cause & ~0x400;
+ if (psxHu32(0x1070) & psxHu32(0x1074))
+- cp0->n.Cause |= 0x400;
+- if (((cp0->n.Cause | 1) & cp0->n.SR & 0x401) == 0x401)
++ c2 |= 0x400;
++ if (((c2 | 1) & cp0->n.SR & 0x401) == 0x401) {
++ cp0->n.Cause = c2;
+ psxException(0, 0, cp0);
++ }
+ }
+
+ void gen_interupt(psxCP0Regs *cp0)
diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c
-index f6ff2e8b..2f7147c3 100644
+index 68d79321..50a38f8d 100644
--- a/libpcsxcore/psxinterpreter.c
+++ b/libpcsxcore/psxinterpreter.c
-@@ -245,7 +245,7 @@ static inline void addCycle(psxRegisters *regs)
+@@ -243,7 +243,7 @@ static inline void addCycle(psxRegisters *regs)
{
assert(regs->subCycleStep >= 0x10000);
regs->subCycle += regs->subCycleStep;
regs->subCycle &= 0xffff;
}
-@@ -1348,8 +1348,15 @@ static void intShutdown() {
+@@ -440,7 +440,9 @@ static void doBranch(psxRegisters *regs, u32 tar, enum R3000Abdt taken) {
+ regs->CP0.n.Target = pc_final;
+ regs->branching = 0;
+
++ psxRegs.cycle += 2;
+ psxBranchTest();
++ psxRegs.cycle -= 2;
+ }
+
+ static void doBranchReg(psxRegisters *regs, u32 tar) {
+@@ -973,7 +975,7 @@ void MTC0(psxRegisters *regs_, int reg, u32 val) {
+ }
+ }
+
+-OP(psxMTC0) { MTC0(regs_, _Rd_, _u32(_rRt_)); }
++OP(psxMTC0) { MTC0(regs_, _Rd_, _u32(_rRt_)); psxBranchTest(); }
+
+ // no exception
+ static inline void psxNULLne(psxRegisters *regs) {
+@@ -1132,6 +1134,7 @@ OP(psxHLE) {
+ dloadFlush(regs_);
+ psxHLEt[hleCode]();
+ regs_->branchSeen = 1;
++ regs_->cycle -= 2;
+ }
+
+ static void (INT_ATTR *psxBSC[64])(psxRegisters *regs_, u32 code) = {
+@@ -1182,18 +1185,20 @@ static void intReset() {
+ static inline void execI_(u8 **memRLUT, psxRegisters *regs) {
+ u32 pc = regs->pc;
+
+- addCycle(regs);
++ //addCycle(regs);
+ dloadStep(regs);
+
+ regs->pc += 4;
+ regs->code = fetch(regs, memRLUT, pc);
+ psxBSC[regs->code >> 26](regs, regs->code);
++ psxRegs.cycle += 2;
++ fetchNoCache(regs, memRLUT, regs->pc); // bus err check
+ }
+
+ static inline void execIbp(u8 **memRLUT, psxRegisters *regs) {
+ u32 pc = regs->pc;
+
+- addCycle(regs);
++ //addCycle(regs);
+ dloadStep(regs);
+
+ if (execBreakCheck(regs, pc))
+@@ -1202,6 +1207,8 @@ static inline void execIbp(u8 **memRLUT, psxRegisters *regs) {
+ regs->pc += 4;
+ regs->code = fetch(regs, memRLUT, pc);
+ psxBSC[regs->code >> 26](regs, regs->code);
++ psxRegs.cycle += 2;
++ fetchNoCache(regs, memRLUT, regs->pc); // bus err check
+ }
+
+ static void intExecute(psxRegisters *regs) {
+@@ -1218,20 +1225,28 @@ static void intExecuteBp(psxRegisters *regs) {
+ execIbp(memRLUT, regs);
+ }
+
++ extern int last_count;
++ void do_insn_cmp(void);
+ static void intExecuteBlock(psxRegisters *regs, enum blockExecCaller caller) {
+ u8 **memRLUT = psxMemRLUT;
+
++ last_count = 0;
+ regs->branchSeen = 0;
+- while (!regs->branchSeen)
++ while (!regs->branchSeen || (regs->dloadReg[0] || regs->dloadReg[1])) {
++ do_insn_cmp();
+ execI_(memRLUT, regs);
++ }
+ }
+
+ static void intExecuteBlockBp(psxRegisters *regs, enum blockExecCaller caller) {
+ u8 **memRLUT = psxMemRLUT;
+
++ last_count = 0;
+ regs->branchSeen = 0;
+- while (!regs->branchSeen)
++ while (!regs->branchSeen || (regs->dloadReg[0] || regs->dloadReg[1])) {
++ do_insn_cmp();
+ execIbp(memRLUT, regs);
++ }
+ }
+
+ static void intClear(u32 Addr, u32 Size) {
+@@ -1263,7 +1278,7 @@ static void setupCop(u32 sr)
+ else
+ psxBSC[17] = psxCOPd;
+ if (sr & (1u << 30))
+- psxBSC[18] = Config.DisableStalls ? psxCOP2 : psxCOP2_stall;
++ psxBSC[18] = psxCOP2;
+ else
+ psxBSC[18] = psxCOPd;
+ if (sr & (1u << 31))
+@@ -1282,7 +1297,7 @@ void intApplyConfig() {
+ assert(psxSPC[26] == psxDIV || psxSPC[26] == psxDIV_stall);
+ assert(psxSPC[27] == psxDIVU || psxSPC[27] == psxDIVU_stall);
+
+- if (Config.DisableStalls) {
++ if (1) {
+ psxBSC[18] = psxCOP2;
+ psxBSC[50] = gteLWC2;
+ psxBSC[58] = gteSWC2;
+@@ -1365,8 +1380,12 @@ static void intShutdown() {
// single step (may do several ops in case of a branch or load delay)
// called by asm/dynarec
void execI(psxRegisters *regs) {
-+ extern int last_count;
-+ extern u32 next_interupt;
-+ void do_insn_cmp(void);
-+ printf("execI %08x c %u, ni %u\n", regs->pc, regs->cycle, next_interupt);
++ printf("execI %08x c %u, ni %u\n", regs->pc, regs->cycle, regs->next_interupt);
+ last_count = 0;
do {
execIbp(psxMemRLUT, regs);
diff --git a/libpcsxcore/new_dynarec/emu_if.c b/libpcsxcore/new_dynarec/emu_if.c
-index f879ad8c..0ec366d0 100644
+index cefadd21..63a5c1b1 100644
--- a/libpcsxcore/new_dynarec/emu_if.c
+++ b/libpcsxcore/new_dynarec/emu_if.c
-@@ -323,13 +323,18 @@ static void ari64_shutdown()
- {
+@@ -5,6 +5,7 @@
+ * See the COPYING file in the top-level directory.
+ */
+
++#undef NDRC_THREAD
+ #include <stdio.h>
+
+ #include "emu_if.h"
+@@ -631,13 +632,18 @@ static void ari64_shutdown()
+ ari64_thread_shutdown();
new_dynarec_cleanup();
new_dyna_pcsx_mem_shutdown();
+ (void)ari64_execute;
+ (void)ari64_execute_block;
}
-+extern void intExecuteT();
-+extern void intExecuteBlockT();
++extern void intExecuteT(psxRegisters *regs);
++extern void intExecuteBlockT(psxRegisters *regs, enum blockExecCaller caller);
+
R3000Acpu psxRec = {
ari64_init,
ari64_clear,
ari64_notify,
ari64_apply_config,
-@@ -398,7 +403,7 @@ static u32 memcheck_read(u32 a)
+@@ -699,7 +705,7 @@ static u32 memcheck_read(u32 a)
return *(u32 *)(psxM + (a & 0x1ffffc));
}
{
static psxRegisters oldregs;
diff --git a/libpcsxcore/new_dynarec/pcsxmem.c b/libpcsxcore/new_dynarec/pcsxmem.c
-index 1f37dc29..357f753e 100644
+index 98e2c6be..edba031e 100644
--- a/libpcsxcore/new_dynarec/pcsxmem.c
+++ b/libpcsxcore/new_dynarec/pcsxmem.c
-@@ -289,6 +289,8 @@ static void write_biu(u32 value)
+@@ -238,6 +238,8 @@ static void write_biu(u32 value)
return;
}
psxRegs.biuReg = value;
}
diff --git a/libpcsxcore/psxcounters.c b/libpcsxcore/psxcounters.c
-index 18bd6a4e..bc2eb3f6 100644
+index 064c06b6..07e2afb5 100644
--- a/libpcsxcore/psxcounters.c
+++ b/libpcsxcore/psxcounters.c
-@@ -389,9 +389,12 @@ void psxRcntUpdate()
+@@ -455,9 +455,12 @@ void psxRcntUpdate()
/******************************************************************************/
_psxRcntWcount( index, value );
psxRcntSet();
-@@ -400,6 +403,7 @@ void psxRcntWcount( u32 index, u32 value )
+@@ -466,6 +469,7 @@ void psxRcntWcount( u32 index, u32 value )
void psxRcntWmode( u32 index, u32 value )
{
verboseLog( 1, "[RCNT %i] wmode: %x\n", index, value );
_psxRcntWmode( index, value );
_psxRcntWcount( index, 0 );
-@@ -411,6 +415,7 @@ void psxRcntWmode( u32 index, u32 value )
+@@ -477,6 +481,7 @@ void psxRcntWmode( u32 index, u32 value )
void psxRcntWtarget( u32 index, u32 value )
{
verboseLog( 1, "[RCNT %i] wtarget: %x\n", index, value );
rcnts[index].target = value;
-@@ -423,6 +428,7 @@ void psxRcntWtarget( u32 index, u32 value )
- u32 psxRcntRcount( u32 index )
+@@ -490,6 +495,7 @@ u32 psxRcntRcount0()
{
+ u32 index = 0;
u32 count;
+handler_cycle = psxRegs.cycle;
- count = _psxRcntRcount( index );
+ if ((rcnts[0].mode & 7) == (RcSyncModeEnable | Rc01UnblankReset) ||
+ (rcnts[0].mode & 7) == (RcSyncModeEnable | Rc01UnblankReset2))
+diff --git a/libpcsxcore/psxevents.c b/libpcsxcore/psxevents.c
+index 1e2d01f6..0ee15974 100644
+--- a/libpcsxcore/psxevents.c
++++ b/libpcsxcore/psxevents.c
+@@ -77,11 +77,13 @@ void irq_test(psxCP0Regs *cp0)
+ }
+ }
+
+- cp0->n.Cause &= ~0x400;
++ u32 c2 = cp0->n.Cause & ~0x400;
+ if (psxHu32(0x1070) & psxHu32(0x1074))
+- cp0->n.Cause |= 0x400;
+- if (((cp0->n.Cause | 1) & cp0->n.SR & 0x401) == 0x401)
++ c2 |= 0x400;
++ if (((c2 | 1) & cp0->n.SR & 0x401) == 0x401) {
++ cp0->n.Cause = c2;
+ psxException(0, 0, cp0);
++ }
+ }
+ void gen_interupt(psxCP0Regs *cp0)
diff --git a/libpcsxcore/psxhw.c b/libpcsxcore/psxhw.c
-index 10a2695f..7e4a64da 100644
+index c487b02d..171c447f 100644
--- a/libpcsxcore/psxhw.c
+++ b/libpcsxcore/psxhw.c
-@@ -437,13 +437,14 @@ void psxHwWrite8(u32 add, u8 value) {
- return;
- }
+@@ -323,6 +323,7 @@ void psxHwWrite8(u32 add, u32 value) {
+ log_unhandled("unhandled w8 %08x %08x @%08x\n",
+ add, value, psxRegs.pc);
+ }
++ if (add < 0x1f802000)
+ psxHu8(add) = value;
+ }
-+ if (add < 0x1f802000)
- psxHu8(add) = value;
- #ifdef PSXHW_LOG
- PSXHW_LOG("*Unknown 8bit write at address %x value %x\n", add, value);
- #endif
- return;
+@@ -396,6 +397,7 @@ void psxHwWrite16(u32 add, u32 value) {
+ log_unhandled("unhandled w16 %08x %08x @%08x\n",
+ add, value, psxRegs.pc);
}
-- psxHu8(add) = value;
-+ //psxHu8(add) = value;
- #ifdef PSXHW_LOG
- PSXHW_LOG("*Known 8bit write at address %x value %x\n", add, value);
- #endif
-@@ -565,6 +566,7 @@ void psxHwWrite16(u32 add, u16 value) {
- return;
- }
++ if (add < 0x1f802000)
+ psxHu16ref(add) = SWAPu16(value);
+ }
-+ if (add < 0x1f802000)
- psxHu16ref(add) = SWAPu16(value);
- #ifdef PSXHW_LOG
- PSXHW_LOG("*Unknown 16bit write at address %x value %x\n", add, value);
-@@ -756,9 +758,9 @@ void psxHwWrite32(u32 add, u32 value) {
+@@ -452,6 +454,7 @@ void psxHwWrite32(u32 add, u32 value) {
return;
+ }
+ }
++ if (add < 0x1f802000)
+ psxHu32ref(add) = SWAPu32(value);
+ }
- case 0x1f801820:
-- mdecWrite0(value); break;
-+ mdecWrite0(value); return;
- case 0x1f801824:
-- mdecWrite1(value); break;
-+ mdecWrite1(value); return;
-
- case 0x1f801100:
- #ifdef PSXHW_LOG
-@@ -826,6 +828,7 @@ void psxHwWrite32(u32 add, u32 value) {
- return;
- }
-
-+ if (add < 0x1f802000)
- psxHu32ref(add) = SWAPu32(value);
- #ifdef PSXHW_LOG
- PSXHW_LOG("*Unknown 32bit write at address %x value %x\n", add, value);
diff --git a/libpcsxcore/psxinterpreter.c b/libpcsxcore/psxinterpreter.c
-index 5756bee5..4bf9248d 100644
+index 68d79321..2e3d14ab 100644
--- a/libpcsxcore/psxinterpreter.c
+++ b/libpcsxcore/psxinterpreter.c
-@@ -238,7 +238,7 @@ static inline void addCycle(psxRegisters *regs)
+@@ -243,7 +243,7 @@ static inline void addCycle(psxRegisters *regs)
{
assert(regs->subCycleStep >= 0x10000);
regs->subCycle += regs->subCycleStep;
regs->subCycle &= 0xffff;
}
-@@ -435,7 +435,9 @@ static void doBranch(psxRegisters *regs, u32 tar, enum R3000Abdt taken) {
+@@ -440,7 +440,9 @@ static void doBranch(psxRegisters *regs, u32 tar, enum R3000Abdt taken) {
regs->CP0.n.Target = pc_final;
regs->branching = 0;
}
static void doBranchReg(psxRegisters *regs, u32 tar) {
-@@ -960,7 +962,7 @@ void MTC0(psxRegisters *regs_, int reg, u32 val) {
+@@ -973,7 +975,7 @@ void MTC0(psxRegisters *regs_, int reg, u32 val) {
}
}
// no exception
static inline void psxNULLne(psxRegisters *regs) {
-@@ -1120,6 +1122,7 @@ OP(psxHLE) {
- }
- psxHLEt[hleCode]();
- branchSeen = 1;
-+ psxRegs.cycle -= 2;
- }
-
- static void (INT_ATTR *psxBSC[64])(psxRegisters *regs_, u32 code) = {
-@@ -1169,18 +1172,20 @@ static void intReset() {
+@@ -1182,18 +1184,20 @@ static void intReset() {
static inline void execI_(u8 **memRLUT, psxRegisters *regs) {
u32 pc = regs->pc;
dloadStep(regs);
if (execBreakCheck(regs, pc))
-@@ -1189,6 +1194,8 @@ static inline void execIbp(u8 **memRLUT, psxRegisters *regs) {
+@@ -1202,6 +1206,8 @@ static inline void execIbp(u8 **memRLUT, psxRegisters *regs) {
regs->pc += 4;
regs->code = fetch(regs, memRLUT, pc);
psxBSC[regs->code >> 26](regs, regs->code);
+ fetchNoCache(regs, memRLUT, regs->pc); // bus err check
}
- static void intExecute() {
-@@ -1218,6 +1225,30 @@ void intExecuteBlock(enum blockExecCaller caller) {
- execI_(memRLUT, regs_);
+ static void intExecute(psxRegisters *regs) {
+@@ -1234,6 +1240,27 @@ static void intExecuteBlockBp(psxRegisters *regs, enum blockExecCaller caller) {
+ execIbp(memRLUT, regs);
}
+extern void do_insn_trace(void);
+
-+void intExecuteT() {
-+ psxRegisters *regs_ = &psxRegs;
++void intExecuteT(psxRegisters *regs) {
+ u8 **memRLUT = psxMemRLUT;
-+ extern int stop;
+
-+ while (!stop) {
++ while (!regs->stop) {
+ do_insn_trace();
-+ execIbp(memRLUT, regs_);
++ execIbp(memRLUT, regs);
+ }
+}
+
-+void intExecuteBlockT() {
-+ psxRegisters *regs_ = &psxRegs;
++void intExecuteBlockT(psxRegisters *regs, enum blockExecCaller caller) {
+ u8 **memRLUT = psxMemRLUT;
+
-+ branchSeen = 0;
-+ while (!branchSeen) {
++ regs->branchSeen = 0;
++ while (!regs->branchSeen) {
+ do_insn_trace();
-+ execIbp(memRLUT, regs_);
++ execIbp(memRLUT, regs);
+ }
+}
+
static void intClear(u32 Addr, u32 Size) {
}
-@@ -1246,7 +1277,7 @@ static void setupCop(u32 sr)
+@@ -1263,7 +1290,7 @@ static void setupCop(u32 sr)
else
psxBSC[17] = psxCOPd;
if (sr & (1u << 30))
else
psxBSC[18] = psxCOPd;
if (sr & (1u << 31))
-@@ -1265,7 +1296,7 @@ void intApplyConfig() {
+@@ -1282,7 +1309,7 @@ void intApplyConfig() {
assert(psxSPC[26] == psxDIV || psxSPC[26] == psxDIV_stall);
assert(psxSPC[27] == psxDIVU || psxSPC[27] == psxDIVU_stall);
psxBSC[50] = gteLWC2;
psxBSC[58] = gteSWC2;
diff --git a/libpcsxcore/psxmem.c b/libpcsxcore/psxmem.c
-index 42755e52..4fa4316b 100644
+index 13301992..2ccdea74 100644
--- a/libpcsxcore/psxmem.c
+++ b/libpcsxcore/psxmem.c
-@@ -289,10 +289,13 @@ void psxMemOnIsolate(int enable)
+@@ -316,10 +316,13 @@ void psxMemOnIsolate(int enable)
: R3000ACPU_NOTIFY_CACHE_UNISOLATED, NULL);
}
t = mem >> 16;
if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) {
if ((mem & 0xffff) < 0x400)
-@@ -318,6 +321,7 @@ u16 psxMemRead16(u32 mem) {
+@@ -345,6 +348,7 @@ u16 psxMemRead16(u32 mem) {
char *p;
u32 t;
t = mem >> 16;
if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) {
if ((mem & 0xffff) < 0x400)
-@@ -343,6 +347,7 @@ u32 psxMemRead32(u32 mem) {
+@@ -370,6 +374,7 @@ u32 psxMemRead32(u32 mem) {
char *p;
u32 t;
t = mem >> 16;
if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) {
if ((mem & 0xffff) < 0x400)
-@@ -370,6 +375,7 @@ void psxMemWrite8(u32 mem, u8 value) {
+@@ -397,6 +402,7 @@ void psxMemWrite8(u32 mem, u32 value) {
char *p;
u32 t;
t = mem >> 16;
if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) {
if ((mem & 0xffff) < 0x400)
-@@ -397,6 +403,7 @@ void psxMemWrite16(u32 mem, u16 value) {
+@@ -424,6 +430,7 @@ void psxMemWrite16(u32 mem, u32 value) {
char *p;
u32 t;
t = mem >> 16;
if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) {
if ((mem & 0xffff) < 0x400)
-@@ -424,6 +431,7 @@ void psxMemWrite32(u32 mem, u32 value) {
+@@ -451,6 +458,7 @@ void psxMemWrite32(u32 mem, u32 value) {
char *p;
u32 t;
// if ((mem&0x1fffff) == 0x71E18 || value == 0x48088800) SysPrintf("t2fix!!\n");
t = mem >> 16;
if (t == 0x1f80 || t == 0x9f80 || t == 0xbf80) {
-@@ -442,6 +450,8 @@ void psxMemWrite32(u32 mem, u32 value) {
+@@ -469,6 +477,8 @@ void psxMemWrite32(u32 mem, u32 value) {
#endif
} else {
if (mem == 0xfffe0130) {
return;
}
diff --git a/libpcsxcore/r3000a.c b/libpcsxcore/r3000a.c
-index 48881068..47c40940 100644
+index cfd1ab09..724167e0 100644
--- a/libpcsxcore/r3000a.c
+++ b/libpcsxcore/r3000a.c
-@@ -127,6 +127,8 @@ void psxException(u32 cause, enum R3000Abdt bdt, psxCP0Regs *cp0) {
+@@ -141,6 +141,8 @@ void psxException(u32 cause, enum R3000Abdt bdt, psxCP0Regs *cp0) {
}
void psxBranchTest() {
+ extern u32 irq_test_cycle;
+ irq_test_cycle = psxRegs.cycle;
- if ((psxRegs.cycle - psxNextsCounter) >= psxNextCounter)
+ if ((psxRegs.cycle - psxRegs.psxNextsCounter) >= psxRegs.psxNextCounter)
psxRcntUpdate();
static void io_write_sio16(u32 value)
{
- sioWrite8((unsigned char)value);
- sioWrite8((unsigned char)(value>>8));
+ sioWrite8(value);
}
static void io_write_sio32(u32 value)
{
- sioWrite8((unsigned char)value);
- sioWrite8((unsigned char)(value >> 8));
- sioWrite8((unsigned char)(value >> 16));
- sioWrite8((unsigned char)(value >> 24));
+ sioWrite8(value);
}
#if !defined(DRC_DBG) && defined(__arm__)
make_forcew32_func(10b0)
make_forcew32_func(10c0)
make_forcew32_func(10e0)
-make_forcew32_func(10f0)
void new_dyna_pcsx_mem_load_state(void)
{
map_item(&mem_iowtab[IOMEM32(0x10cc)], psxHwWriteChcr4, 1);
map_item(&mem_iowtab[IOMEM32(0x10e8)], psxHwWriteChcr6, 1);
map_item(&mem_iowtab[IOMEM32(0x10ec)], psxHwWriteChcr6, 1);
+ map_item(&mem_iowtab[IOMEM32(0x10f0)], psxHwWriteDmaPcr32, 1);
map_item(&mem_iowtab[IOMEM32(0x10f4)], psxHwWriteDmaIcr32, 1);
map_item(&mem_iowtab[IOMEM32(0x1100)], io_rcnt_write_count0, 1);
map_item(&mem_iowtab[IOMEM32(0x1104)], io_rcnt_write_mode0, 1);
map_item(&mem_iowtab[IOMEM16(0x10e0)], io_write_force32_10e0, 1);
map_item(&mem_iowtab[IOMEM16(0x10e8)], psxHwWriteChcr6, 1);
map_item(&mem_iowtab[IOMEM16(0x10ec)], psxHwWriteChcr6, 1);
- map_item(&mem_iowtab[IOMEM16(0x10f0)], io_write_force32_10f0, 1);
+ map_item(&mem_iowtab[IOMEM16(0x10f0)], psxHwWriteDmaPcr32, 1);
map_item(&mem_iowtab[IOMEM16(0x10f4)], psxHwWriteDmaIcr32, 1);
map_item(&mem_iowtab[IOMEM16(0x1100)], io_rcnt_write_count0, 1);
map_item(&mem_iowtab[IOMEM16(0x1104)], io_rcnt_write_mode0, 1);
#include "plugins.h"
#include "cdriso.h"
+#include "cdrom-async.h"
#include "psxcounters.h"
static char IsoFile[MAXPATHLEN] = "";
GPUwriteData GPU_writeData;
GPUwriteDataMem GPU_writeDataMem;
GPUdmaChain GPU_dmaChain;
-GPUkeypressed GPU_keypressed;
-GPUdisplayText GPU_displayText;
-GPUmakeSnapshot GPU_makeSnapshot;
GPUfreeze GPU_freeze;
-GPUgetScreenPic GPU_getScreenPic;
-GPUshowScreenPic GPU_showScreenPic;
GPUvBlank GPU_vBlank;
GPUgetScreenInfo GPU_getScreenInfo;
-CDRinit CDR_init;
-CDRshutdown CDR_shutdown;
-CDRopen CDR_open;
-CDRclose CDR_close;
-CDRtest CDR_test;
-CDRgetTN CDR_getTN;
-CDRgetTD CDR_getTD;
-CDRreadTrack CDR_readTrack;
-CDRgetBuffer CDR_getBuffer;
-CDRplay CDR_play;
-CDRstop CDR_stop;
-CDRgetStatus CDR_getStatus;
-CDRgetDriveLetter CDR_getDriveLetter;
-CDRgetBufferSub CDR_getBufferSub;
-CDRconfigure CDR_configure;
-CDRabout CDR_about;
-CDRsetfilename CDR_setfilename;
-CDRreadCDDA CDR_readCDDA;
-CDRgetTE CDR_getTE;
-CDRprefetch CDR_prefetch;
-
SPUinit SPU_init;
SPUshutdown SPU_shutdown;
SPUopen SPU_open;
SPUplayCDDAchannel SPU_playCDDAchannel;
SPUsetCDvol SPU_setCDvol;
-PADconfigure PAD1_configure;
-PADabout PAD1_about;
-PADinit PAD1_init;
-PADshutdown PAD1_shutdown;
-PADtest PAD1_test;
-PADopen PAD1_open;
-PADclose PAD1_close;
-PADquery PAD1_query;
-PADreadPort1 PAD1_readPort1;
-PADkeypressed PAD1_keypressed;
-PADstartPoll PAD1_startPoll;
-PADpoll PAD1_poll;
-PADsetSensitive PAD1_setSensitive;
-
-PADconfigure PAD2_configure;
-PADabout PAD2_about;
-PADinit PAD2_init;
-PADshutdown PAD2_shutdown;
-PADtest PAD2_test;
-PADopen PAD2_open;
-PADclose PAD2_close;
-PADquery PAD2_query;
-PADreadPort2 PAD2_readPort2;
-PADkeypressed PAD2_keypressed;
-PADstartPoll PAD2_startPoll;
-PADpoll PAD2_poll;
-PADsetSensitive PAD2_setSensitive;
-
-NETinit NET_init;
-NETshutdown NET_shutdown;
-NETopen NET_open;
-NETclose NET_close;
-NETtest NET_test;
-NETconfigure NET_configure;
-NETabout NET_about;
-NETpause NET_pause;
-NETresume NET_resume;
-NETqueryPlayer NET_queryPlayer;
-NETsendData NET_sendData;
-NETrecvData NET_recvData;
-NETsendPadData NET_sendPadData;
-NETrecvPadData NET_recvPadData;
-NETsetInfo NET_setInfo;
-NETkeypressed NET_keypressed;
-
#ifdef ENABLE_SIO1API
SIO1init SIO1_init;
void *hGPUDriver = NULL;
-void CALLBACK GPU__displayText(char *pText) {
- SysPrintf("%s\n", pText);
-}
-
-long CALLBACK GPU__configure(void) { return 0; }
-long CALLBACK GPU__test(void) { return 0; }
-void CALLBACK GPU__about(void) {}
-void CALLBACK GPU__makeSnapshot(void) {}
-void CALLBACK GPU__keypressed(int key) {}
-long CALLBACK GPU__getScreenPic(unsigned char *pMem) { return -1; }
-long CALLBACK GPU__showScreenPic(unsigned char *pMem) { return -1; }
-void CALLBACK GPU__vBlank(int val) {}
-void CALLBACK GPU__getScreenInfo(int *y, int *base_hres) {}
+static void CALLBACK GPU__vBlank(int val) {}
+static void CALLBACK GPU__getScreenInfo(int *y, int *base_hres) {}
#define LoadGpuSym1(dest, name) \
LoadSym(GPU_##dest, GPU##dest, name, TRUE);
LoadGpuSym1(writeStatus, "GPUwriteStatus");
LoadGpuSym1(dmaChain, "GPUdmaChain");
LoadGpuSym1(updateLace, "GPUupdateLace");
- LoadGpuSym0(keypressed, "GPUkeypressed");
- LoadGpuSym0(displayText, "GPUdisplayText");
- LoadGpuSym0(makeSnapshot, "GPUmakeSnapshot");
LoadGpuSym1(freeze, "GPUfreeze");
- LoadGpuSym0(getScreenPic, "GPUgetScreenPic");
- LoadGpuSym0(showScreenPic, "GPUshowScreenPic");
LoadGpuSym0(vBlank, "GPUvBlank");
LoadGpuSym0(getScreenInfo, "GPUgetScreenInfo");
return 0;
}
-void *hCDRDriver = NULL;
-
-long CALLBACK CDR__play(unsigned char *sector) { return 0; }
-long CALLBACK CDR__stop(void) { return 0; }
-
-long CALLBACK CDR__getStatus(struct CdrStat *stat) {
+int CDR__getStatus(struct CdrStat *stat) {
if (cdOpenCaseTime < 0 || cdOpenCaseTime > (s64)time(NULL))
stat->Status = 0x10;
else
return 0;
}
-char* CALLBACK CDR__getDriveLetter(void) { return NULL; }
-long CALLBACK CDR__configure(void) { return 0; }
-long CALLBACK CDR__test(void) { return 0; }
-void CALLBACK CDR__about(void) {}
-long CALLBACK CDR__setfilename(char*filename) { return 0; }
-long CALLBACK CDR__prefetch(u8 m, u8 s, u8 f) { return 1; }
-
-#define LoadCdrSym1(dest, name) \
- LoadSym(CDR_##dest, CDR##dest, name, TRUE);
-
-#define LoadCdrSym0(dest, name) \
- LoadSym(CDR_##dest, CDR##dest, name, FALSE); \
- if (CDR_##dest == NULL) CDR_##dest = (CDR##dest) CDR__##dest;
-
-#define LoadCdrSymN(dest, name) \
- LoadSym(CDR_##dest, CDR##dest, name, FALSE);
-
-static int LoadCDRplugin(const char *CDRdll) {
- void *drv;
-
- if (CDRdll == NULL) {
- cdrIsoInit();
- return 0;
- }
-
- hCDRDriver = SysLoadLibrary(CDRdll);
- if (hCDRDriver == NULL) {
- CDR_configure = NULL;
- SysMessage (_("Could not load CD-ROM plugin %s!"), CDRdll); return -1;
- }
- drv = hCDRDriver;
- LoadCdrSym1(init, "CDRinit");
- LoadCdrSym1(shutdown, "CDRshutdown");
- LoadCdrSym1(open, "CDRopen");
- LoadCdrSym1(close, "CDRclose");
- LoadCdrSym1(getTN, "CDRgetTN");
- LoadCdrSym1(getTD, "CDRgetTD");
- LoadCdrSym1(readTrack, "CDRreadTrack");
- LoadCdrSym1(getBuffer, "CDRgetBuffer");
- LoadCdrSym1(getBufferSub, "CDRgetBufferSub");
- LoadCdrSym0(play, "CDRplay");
- LoadCdrSym0(stop, "CDRstop");
- LoadCdrSym0(getStatus, "CDRgetStatus");
- LoadCdrSym0(getDriveLetter, "CDRgetDriveLetter");
- LoadCdrSym0(configure, "CDRconfigure");
- LoadCdrSym0(test, "CDRtest");
- LoadCdrSym0(about, "CDRabout");
- LoadCdrSym0(setfilename, "CDRsetfilename");
- LoadCdrSymN(readCDDA, "CDRreadCDDA");
- LoadCdrSymN(getTE, "CDRgetTE");
- LoadCdrSym0(prefetch, "CDRprefetch");
-
- return 0;
-}
-
static void *hSPUDriver = NULL;\r
static void CALLBACK SPU__registerScheduleCb(void (CALLBACK *cb)(unsigned int)) {}\r
static void CALLBACK SPU__setCDvol(unsigned char ll, unsigned char lr,
extern int in_type[8];
-void *hPAD1Driver = NULL;
-void *hPAD2Driver = NULL;
-
// Pad information, keystate, mode, config mode, vibration
static PadDataS pads[8];
// refresh the button state on port 1.
// int pad is not needed.
-unsigned char CALLBACK PAD1__startPoll(int unused) {
+unsigned char PAD1_startPoll(int unused) {
int i;
reqPos = 0;
pads[0].requestPadIndex = 0;
- PAD1_readPort1(&pads[0]);
+ PAD1_readPort(&pads[0]);
pads[0].multitapLongModeEnabled = 0;
if (pads[0].portMultitap)
// a multitap is plugged and enabled: refresh pads 1-3
for (i = 1; i < 4; i++) {
pads[i].requestPadIndex = i;
- PAD1_readPort1(&pads[i]);
+ PAD1_readPort(&pads[i]);
}
}
return 0xff;
}
-unsigned char CALLBACK PAD1__poll(unsigned char value, int *more_data) {
+unsigned char PAD1_poll(unsigned char value, int *more_data) {
return PADpollMain(0, value, more_data);
}
-long CALLBACK PAD1__configure(void) { return 0; }
-void CALLBACK PAD1__about(void) {}
-long CALLBACK PAD1__test(void) { return 0; }
-long CALLBACK PAD1__query(void) { return 3; }
-long CALLBACK PAD1__keypressed() { return 0; }
-
-#define LoadPad1Sym1(dest, name) \
- LoadSym(PAD1_##dest, PAD##dest, name, TRUE);
-
-#define LoadPad1SymN(dest, name) \
- LoadSym(PAD1_##dest, PAD##dest, name, FALSE);
-
-#define LoadPad1Sym0(dest, name) \
- LoadSym(PAD1_##dest, PAD##dest, name, FALSE); \
- if (PAD1_##dest == NULL) PAD1_##dest = (PAD##dest) PAD1__##dest;
-
-static int LoadPAD1plugin(const char *PAD1dll) {
- void *drv;
- size_t p;
-
- hPAD1Driver = SysLoadLibrary(PAD1dll);
- if (hPAD1Driver == NULL) {
- PAD1_configure = NULL;
- SysMessage (_("Could not load Controller 1 plugin %s!"), PAD1dll); return -1;
- }
- drv = hPAD1Driver;
- LoadPad1Sym1(init, "PADinit");
- LoadPad1Sym1(shutdown, "PADshutdown");
- LoadPad1Sym1(open, "PADopen");
- LoadPad1Sym1(close, "PADclose");
- LoadPad1Sym0(query, "PADquery");
- LoadPad1Sym1(readPort1, "PADreadPort1");
- LoadPad1Sym0(configure, "PADconfigure");
- LoadPad1Sym0(test, "PADtest");
- LoadPad1Sym0(about, "PADabout");
- LoadPad1Sym0(keypressed, "PADkeypressed");
- LoadPad1Sym0(startPoll, "PADstartPoll");
- LoadPad1Sym0(poll, "PADpoll");
- LoadPad1SymN(setSensitive, "PADsetSensitive");
-
- memset(pads, 0, sizeof(pads));
- for (p = 0; p < sizeof(pads) / sizeof(pads[0]); p++) {
- memset(pads[p].ds.cmd4dConfig, 0xff, sizeof(pads[p].ds.cmd4dConfig));
- }
-
- return 0;
-}
-
-unsigned char CALLBACK PAD2__startPoll(int pad) {
+unsigned char PAD2_startPoll(int pad) {
int pad_index = pads[0].portMultitap ? 4 : 1;
int i;
reqPos = 0;
pads[pad_index].requestPadIndex = pad_index;
- PAD2_readPort2(&pads[pad_index]);
+ PAD2_readPort(&pads[pad_index]);
pads[pad_index].multitapLongModeEnabled = 0;
if (pads[pad_index].portMultitap)
} else {
for (i = 1; i < 4; i++) {
pads[pad_index + i].requestPadIndex = pad_index + i;
- PAD2_readPort2(&pads[pad_index + i]);
+ PAD2_readPort(&pads[pad_index + i]);
}
}
return 0xff;
}
-unsigned char CALLBACK PAD2__poll(unsigned char value, int *more_data) {
+unsigned char PAD2_poll(unsigned char value, int *more_data) {
return PADpollMain(pads[0].portMultitap ? 4 : 1, value, more_data);
}
-long CALLBACK PAD2__configure(void) { return 0; }
-void CALLBACK PAD2__about(void) {}
-long CALLBACK PAD2__test(void) { return 0; }
-long CALLBACK PAD2__query(void) { return PSE_PAD_USE_PORT1 | PSE_PAD_USE_PORT2; }
-long CALLBACK PAD2__keypressed() { return 0; }
-
-#define LoadPad2Sym1(dest, name) \
- LoadSym(PAD2_##dest, PAD##dest, name, TRUE);
-
-#define LoadPad2Sym0(dest, name) \
- LoadSym(PAD2_##dest, PAD##dest, name, FALSE); \
- if (PAD2_##dest == NULL) PAD2_##dest = (PAD##dest) PAD2__##dest;
-
-#define LoadPad2SymN(dest, name) \
- LoadSym(PAD2_##dest, PAD##dest, name, FALSE);
-
-static int LoadPAD2plugin(const char *PAD2dll) {
- void *drv;
+static void PAD_init(void) {
+ size_t p;
- hPAD2Driver = SysLoadLibrary(PAD2dll);
- if (hPAD2Driver == NULL) {
- PAD2_configure = NULL;
- SysMessage (_("Could not load Controller 2 plugin %s!"), PAD2dll); return -1;
+ memset(pads, 0, sizeof(pads));
+ for (p = 0; p < sizeof(pads) / sizeof(pads[0]); p++) {
+ memset(pads[p].ds.cmd4dConfig, 0xff, sizeof(pads[p].ds.cmd4dConfig));
}
- drv = hPAD2Driver;
- LoadPad2Sym1(init, "PADinit");
- LoadPad2Sym1(shutdown, "PADshutdown");
- LoadPad2Sym1(open, "PADopen");
- LoadPad2Sym1(close, "PADclose");
- LoadPad2Sym0(query, "PADquery");
- LoadPad2Sym1(readPort2, "PADreadPort2");
- LoadPad2Sym0(configure, "PADconfigure");
- LoadPad2Sym0(test, "PADtest");
- LoadPad2Sym0(about, "PADabout");
- LoadPad2Sym0(keypressed, "PADkeypressed");
- LoadPad2Sym0(startPoll, "PADstartPoll");
- LoadPad2Sym0(poll, "PADpoll");
- LoadPad2SymN(setSensitive, "PADsetSensitive");
-
- return 0;
}
int padFreeze(void *f, int Mode) {
return r;
}
-
-void *hNETDriver = NULL;
-
-void CALLBACK NET__setInfo(netInfo *info) {}
-void CALLBACK NET__keypressed(int key) {}
-long CALLBACK NET__configure(void) { return 0; }
-long CALLBACK NET__test(void) { return 0; }
-void CALLBACK NET__about(void) {}
-
-#define LoadNetSym1(dest, name) \
- LoadSym(NET_##dest, NET##dest, name, TRUE);
-
-#define LoadNetSymN(dest, name) \
- LoadSym(NET_##dest, NET##dest, name, FALSE);
-
-#define LoadNetSym0(dest, name) \
- LoadSym(NET_##dest, NET##dest, name, FALSE); \
- if (NET_##dest == NULL) NET_##dest = (NET##dest) NET__##dest;
-
-static int LoadNETplugin(const char *NETdll) {
- void *drv;
-
- hNETDriver = SysLoadLibrary(NETdll);
- if (hNETDriver == NULL) {
- SysMessage (_("Could not load NetPlay plugin %s!"), NETdll); return -1;
- }
- drv = hNETDriver;
- LoadNetSym1(init, "NETinit");
- LoadNetSym1(shutdown, "NETshutdown");
- LoadNetSym1(open, "NETopen");
- LoadNetSym1(close, "NETclose");
- LoadNetSymN(sendData, "NETsendData");
- LoadNetSymN(recvData, "NETrecvData");
- LoadNetSym1(sendPadData, "NETsendPadData");
- LoadNetSym1(recvPadData, "NETrecvPadData");
- LoadNetSym1(queryPlayer, "NETqueryPlayer");
- LoadNetSym1(pause, "NETpause");
- LoadNetSym1(resume, "NETresume");
- LoadNetSym0(setInfo, "NETsetInfo");
- LoadNetSym0(keypressed, "NETkeypressed");
- LoadNetSym0(configure, "NETconfigure");
- LoadNetSym0(test, "NETtest");
- LoadNetSym0(about, "NETabout");
-
- return 0;
-}
-
#ifdef ENABLE_SIO1API
void *hSIO1Driver = NULL;
#endif
int LoadPlugins() {
- int ret;
char Plugin[MAXPATHLEN * 2];
+ int ret;
ReleasePlugins();
SysLibError();
- if (UsingIso()) {
- LoadCDRplugin(NULL);
- } else {
- sprintf(Plugin, "%s/%s", Config.PluginsDir, Config.Cdr);
- if (LoadCDRplugin(Plugin) == -1) return -1;
- }
-
sprintf(Plugin, "%s/%s", Config.PluginsDir, Config.Gpu);
if (LoadGPUplugin(Plugin) == -1) return -1;
sprintf(Plugin, "%s/%s", Config.PluginsDir, Config.Spu);
if (LoadSPUplugin(Plugin) == -1) return -1;
- sprintf(Plugin, "%s/%s", Config.PluginsDir, Config.Pad1);
- if (LoadPAD1plugin(Plugin) == -1) return -1;
-
- sprintf(Plugin, "%s/%s", Config.PluginsDir, Config.Pad2);
- if (LoadPAD2plugin(Plugin) == -1) return -1;
-
- if (strcmp("Disabled", Config.Net) == 0 || strcmp("", Config.Net) == 0)
- Config.UseNet = FALSE;
- else {
- Config.UseNet = TRUE;
- sprintf(Plugin, "%s/%s", Config.PluginsDir, Config.Net);
- if (LoadNETplugin(Plugin) == -1) Config.UseNet = FALSE;
- }
-
#ifdef ENABLE_SIO1API
sprintf(Plugin, "%s/%s", Config.PluginsDir, Config.Sio1);
if (LoadSIO1plugin(Plugin) == -1) return -1;
#endif
- ret = CDR_init();
+ ret = cdra_init();
if (ret < 0) { SysMessage (_("Error initializing CD-ROM plugin: %d"), ret); return -1; }
ret = GPU_init();
if (ret < 0) { SysMessage (_("Error initializing GPU plugin: %d"), ret); return -1; }
ret = SPU_init();
if (ret < 0) { SysMessage (_("Error initializing SPU plugin: %d"), ret); return -1; }
- ret = PAD1_init(1);
- if (ret < 0) { SysMessage (_("Error initializing Controller 1 plugin: %d"), ret); return -1; }
- ret = PAD2_init(2);
- if (ret < 0) { SysMessage (_("Error initializing Controller 2 plugin: %d"), ret); return -1; }
-
- if (Config.UseNet) {
- ret = NET_init();
- if (ret < 0) { SysMessage (_("Error initializing NetPlay plugin: %d"), ret); return -1; }
- }
+ PAD_init();
#ifdef ENABLE_SIO1API
ret = SIO1_init();
}
void ReleasePlugins() {
- if (Config.UseNet) {
- int ret = NET_close();
- if (ret < 0) Config.UseNet = FALSE;
- }
- NetOpened = FALSE;
-
- if (hCDRDriver != NULL || cdrIsoActive()) CDR_shutdown();
+ cdra_shutdown();
if (hGPUDriver != NULL) GPU_shutdown();
if (hSPUDriver != NULL) SPU_shutdown();
- if (hPAD1Driver != NULL) PAD1_shutdown();
- if (hPAD2Driver != NULL) PAD2_shutdown();
- if (Config.UseNet && hNETDriver != NULL) NET_shutdown();
-
- if (hCDRDriver != NULL) { SysCloseLibrary(hCDRDriver); hCDRDriver = NULL; }
if (hGPUDriver != NULL) { SysCloseLibrary(hGPUDriver); hGPUDriver = NULL; }
if (hSPUDriver != NULL) { SysCloseLibrary(hSPUDriver); hSPUDriver = NULL; }
- if (hPAD1Driver != NULL) { SysCloseLibrary(hPAD1Driver); hPAD1Driver = NULL; }
- if (hPAD2Driver != NULL) { SysCloseLibrary(hPAD2Driver); hPAD2Driver = NULL; }
-
- if (Config.UseNet && hNETDriver != NULL) {
- SysCloseLibrary(hNETDriver); hNETDriver = NULL;
- }
#ifdef ENABLE_SIO1API
if (hSIO1Driver != NULL) {
// for CD swap
int ReloadCdromPlugin()
{
- if (hCDRDriver != NULL || cdrIsoActive()) CDR_shutdown();
- if (hCDRDriver != NULL) { SysCloseLibrary(hCDRDriver); hCDRDriver = NULL; }
-
- if (UsingIso()) {
- LoadCDRplugin(NULL);
- } else {
- char Plugin[MAXPATHLEN * 2];
- sprintf(Plugin, "%s/%s", Config.PluginsDir, Config.Cdr);
- if (LoadCDRplugin(Plugin) == -1) return -1;
- }
-
- return CDR_init();
+ cdra_shutdown();
+ return cdra_init();
}
void SetIsoFile(const char *filename) {
\r
typedef long (CALLBACK *GPUopen)(unsigned long *, char *, char *);\r
typedef long (CALLBACK *SPUopen)(void);\r
-typedef long (CALLBACK *PADopen)(unsigned long *);\r
-typedef long (CALLBACK *NETopen)(unsigned long *);\r
typedef long (CALLBACK *SIO1open)(unsigned long *);\r
\r
#include "spu.h"\r
+#include "gpu.h"\r
#include "decode_xa.h"\r
\r
int LoadPlugins();\r
typedef void (CALLBACK* GPUreadDataMem)(uint32_t *, int);\r
typedef long (CALLBACK* GPUdmaChain)(uint32_t *, uint32_t, uint32_t *, int32_t *);\r
typedef void (CALLBACK* GPUupdateLace)(void);\r
-typedef void (CALLBACK* GPUmakeSnapshot)(void);\r
-typedef void (CALLBACK* GPUkeypressed)(int);\r
-typedef void (CALLBACK* GPUdisplayText)(char *);\r
-typedef struct {\r
- uint32_t ulFreezeVersion;\r
- uint32_t ulStatus;\r
- uint32_t ulControl[256];\r
- unsigned char psxVRam[1024*512*2];\r
-} GPUFreeze_t;\r
typedef long (CALLBACK* GPUfreeze)(uint32_t, GPUFreeze_t *);\r
-typedef long (CALLBACK* GPUgetScreenPic)(unsigned char *);\r
-typedef long (CALLBACK* GPUshowScreenPic)(unsigned char *);\r
typedef void (CALLBACK* GPUvBlank)(int, int);\r
typedef void (CALLBACK* GPUgetScreenInfo)(int *, int *);\r
\r
extern GPUwriteData GPU_writeData;\r
extern GPUwriteDataMem GPU_writeDataMem;\r
extern GPUdmaChain GPU_dmaChain;\r
-extern GPUkeypressed GPU_keypressed;\r
-extern GPUdisplayText GPU_displayText;\r
-extern GPUmakeSnapshot GPU_makeSnapshot;\r
extern GPUfreeze GPU_freeze;\r
-extern GPUgetScreenPic GPU_getScreenPic;\r
-extern GPUshowScreenPic GPU_showScreenPic;\r
extern GPUvBlank GPU_vBlank;\r
extern GPUgetScreenInfo GPU_getScreenInfo;\r
\r
-// CD-ROM Functions\r
-typedef long (CALLBACK* CDRinit)(void);\r
-typedef long (CALLBACK* CDRshutdown)(void);\r
-typedef long (CALLBACK* CDRopen)(void);\r
-typedef long (CALLBACK* CDRclose)(void);\r
-typedef long (CALLBACK* CDRgetTN)(unsigned char *);\r
-typedef long (CALLBACK* CDRgetTD)(unsigned char, unsigned char *);\r
-typedef boolean (CALLBACK* CDRreadTrack)(unsigned char *);\r
-typedef unsigned char* (CALLBACK* CDRgetBuffer)(void);\r
-typedef unsigned char* (CALLBACK* CDRgetBufferSub)(int sector);\r
-typedef long (CALLBACK* CDRconfigure)(void);\r
-typedef long (CALLBACK* CDRtest)(void);\r
-typedef void (CALLBACK* CDRabout)(void);\r
-typedef long (CALLBACK* CDRplay)(unsigned char *);\r
-typedef long (CALLBACK* CDRstop)(void);\r
-typedef long (CALLBACK* CDRsetfilename)(char *);\r
+// CD-ROM\r
struct CdrStat {\r
uint32_t Type; // DATA, CDDA\r
uint32_t Status; // same as cdr.StatP\r
unsigned char Time_[3]; // unused\r
};\r
-typedef long (CALLBACK* CDRgetStatus)(struct CdrStat *);\r
-typedef char* (CALLBACK* CDRgetDriveLetter)(void);\r
-struct SubQ {\r
- char res0[12];\r
- unsigned char ControlAndADR;\r
- unsigned char TrackNumber;\r
- unsigned char IndexNumber;\r
- unsigned char TrackRelativeAddress[3];\r
- unsigned char Filler;\r
- unsigned char AbsoluteAddress[3];\r
- unsigned char CRC[2];\r
- char res1[72];\r
-};\r
-typedef long (CALLBACK* CDRreadCDDA)(unsigned char, unsigned char, unsigned char, unsigned char *);\r
-typedef long (CALLBACK* CDRgetTE)(unsigned char, unsigned char *, unsigned char *, unsigned char *);\r
-typedef long (CALLBACK* CDRprefetch)(unsigned char, unsigned char, unsigned char);\r
-\r
-// CD-ROM function pointers\r
-extern CDRinit CDR_init;\r
-extern CDRshutdown CDR_shutdown;\r
-extern CDRopen CDR_open;\r
-extern CDRclose CDR_close; \r
-extern CDRtest CDR_test;\r
-extern CDRgetTN CDR_getTN;\r
-extern CDRgetTD CDR_getTD;\r
-extern CDRreadTrack CDR_readTrack;\r
-extern CDRgetBuffer CDR_getBuffer;\r
-extern CDRgetBufferSub CDR_getBufferSub;\r
-extern CDRplay CDR_play;\r
-extern CDRstop CDR_stop;\r
-extern CDRgetStatus CDR_getStatus;\r
-extern CDRgetDriveLetter CDR_getDriveLetter;\r
-extern CDRconfigure CDR_configure;\r
-extern CDRabout CDR_about;\r
-extern CDRsetfilename CDR_setfilename;\r
-extern CDRreadCDDA CDR_readCDDA;\r
-extern CDRgetTE CDR_getTE;\r
-extern CDRprefetch CDR_prefetch;\r
\r
-long CALLBACK CDR__getStatus(struct CdrStat *stat);\r
+int CDR__getStatus(struct CdrStat *stat);\r
\r
// SPU Functions\r
typedef long (CALLBACK* SPUinit)(void); \r
extern SPUsetCDvol SPU_setCDvol;\r
\r
// PAD Functions\r
-typedef long (CALLBACK* PADconfigure)(void);\r
-typedef void (CALLBACK* PADabout)(void);\r
-typedef long (CALLBACK* PADinit)(long);\r
-typedef long (CALLBACK* PADshutdown)(void); \r
-typedef long (CALLBACK* PADtest)(void); \r
-typedef long (CALLBACK* PADclose)(void);\r
-typedef long (CALLBACK* PADquery)(void);\r
-typedef long (CALLBACK* PADreadPort1)(PadDataS*);\r
-typedef long (CALLBACK* PADreadPort2)(PadDataS*);\r
-typedef long (CALLBACK* PADkeypressed)(void);\r
-typedef unsigned char (CALLBACK* PADstartPoll)(int);\r
-typedef unsigned char (CALLBACK* PADpoll)(unsigned char, int *);\r
-typedef void (CALLBACK* PADsetSensitive)(int);\r
-\r
-// PAD function pointers\r
-extern PADconfigure PAD1_configure;\r
-extern PADabout PAD1_about;\r
-extern PADinit PAD1_init;\r
-extern PADshutdown PAD1_shutdown;\r
-extern PADtest PAD1_test;\r
-extern PADopen PAD1_open;\r
-extern PADclose PAD1_close;\r
-extern PADquery PAD1_query;\r
-extern PADreadPort1 PAD1_readPort1;\r
-extern PADkeypressed PAD1_keypressed;\r
-extern PADstartPoll PAD1_startPoll;\r
-extern PADpoll PAD1_poll;\r
-extern PADsetSensitive PAD1_setSensitive;\r
-\r
-extern PADconfigure PAD2_configure;\r
-extern PADabout PAD2_about;\r
-extern PADinit PAD2_init;\r
-extern PADshutdown PAD2_shutdown;\r
-extern PADtest PAD2_test;\r
-extern PADopen PAD2_open;\r
-extern PADclose PAD2_close;\r
-extern PADquery PAD2_query;\r
-extern PADreadPort2 PAD2_readPort2;\r
-extern PADkeypressed PAD2_keypressed;\r
-extern PADstartPoll PAD2_startPoll;\r
-extern PADpoll PAD2_poll;\r
-extern PADsetSensitive PAD2_setSensitive;\r
-\r
-// NET Functions\r
-typedef long (CALLBACK* NETinit)(void);\r
-typedef long (CALLBACK* NETshutdown)(void);\r
-typedef long (CALLBACK* NETclose)(void);\r
-typedef long (CALLBACK* NETconfigure)(void);\r
-typedef long (CALLBACK* NETtest)(void);\r
-typedef void (CALLBACK* NETabout)(void);\r
-typedef void (CALLBACK* NETpause)(void);\r
-typedef void (CALLBACK* NETresume)(void);\r
-typedef long (CALLBACK* NETqueryPlayer)(void);\r
-typedef long (CALLBACK* NETsendData)(void *, int, int);\r
-typedef long (CALLBACK* NETrecvData)(void *, int, int);\r
-typedef long (CALLBACK* NETsendPadData)(void *, int);\r
-typedef long (CALLBACK* NETrecvPadData)(void *, int);\r
-\r
-typedef struct {\r
- char EmuName[32];\r
- char CdromID[9]; // ie. 'SCPH12345', no \0 trailing character\r
- char CdromLabel[11];\r
- void *psxMem;\r
- GPUshowScreenPic GPU_showScreenPic;\r
- GPUdisplayText GPU_displayText;\r
- PADsetSensitive PAD_setSensitive;\r
- char GPUpath[256]; // paths must be absolute\r
- char SPUpath[256];\r
- char CDRpath[256];\r
- char MCD1path[256];\r
- char MCD2path[256];\r
- char BIOSpath[256]; // 'HLE' for internal bios\r
- char Unused[1024];\r
-} netInfo;\r
-\r
-typedef long (CALLBACK* NETsetInfo)(netInfo *);\r
-typedef long (CALLBACK* NETkeypressed)(int);\r
+long PAD1_readPort(PadDataS *);\r
+unsigned char PAD1_startPoll(int);\r
+unsigned char PAD1_poll(unsigned char, int *);\r
\r
-// NET function pointers \r
-extern NETinit NET_init;\r
-extern NETshutdown NET_shutdown;\r
-extern NETopen NET_open;\r
-extern NETclose NET_close; \r
-extern NETtest NET_test;\r
-extern NETconfigure NET_configure;\r
-extern NETabout NET_about;\r
-extern NETpause NET_pause;\r
-extern NETresume NET_resume;\r
-extern NETqueryPlayer NET_queryPlayer;\r
-extern NETsendData NET_sendData;\r
-extern NETrecvData NET_recvData;\r
-extern NETsendPadData NET_sendPadData;\r
-extern NETrecvPadData NET_recvPadData;\r
-extern NETsetInfo NET_setInfo;\r
-extern NETkeypressed NET_keypressed;\r
+long PAD2_readPort(PadDataS *);\r
+unsigned char PAD2_startPoll(int);\r
+unsigned char PAD2_poll(unsigned char, int *);\r
\r
#ifdef ENABLE_SIO1API\r
\r
\r
extern void pl_gun_byte2(int port, unsigned char byte);\r
extern void plat_trigger_vibrate(int pad, int low, int high);\r
-extern void plat_get_psx_resolution(int *xres, int *yres);\r
\r
#ifdef __cplusplus\r
}\r
void CheckPPFCache(unsigned char *pB, unsigned char m, unsigned char s, unsigned char f) {
PPF_CACHE *pcstart, *pcend, *pcpos;
- int addr = MSF2SECT(btoi(m), btoi(s), btoi(f)), pos, anz, start;
+ int addr = MSF2SECT(m, s, f), pos, anz, start;
if (ppfCache == NULL) return;
}
}
-void BuildPPFCache() {
+void BuildPPFCache(const char *fname) {
FILE *ppffile;
char buffer[12];
char method, undo = 0, blockcheck = 0;
if (CdromId[0] == '\0') return;
- // Generate filename in the format of SLUS_123.45
- buffer[0] = toupper(CdromId[0]);
- buffer[1] = toupper(CdromId[1]);
- buffer[2] = toupper(CdromId[2]);
- buffer[3] = toupper(CdromId[3]);
- buffer[4] = '_';
- buffer[5] = CdromId[4];
- buffer[6] = CdromId[5];
- buffer[7] = CdromId[6];
- buffer[8] = '.';
- buffer[9] = CdromId[7];
- buffer[10] = CdromId[8];
- buffer[11] = '\0';
-
- sprintf(szPPF, "%s%s", Config.PatchesDir, buffer);
-
- ppffile = fopen(szPPF, "rb");
+ if (!fname) {
+ // Generate filename in the format of SLUS_123.45
+ buffer[0] = toupper(CdromId[0]);
+ buffer[1] = toupper(CdromId[1]);
+ buffer[2] = toupper(CdromId[2]);
+ buffer[3] = toupper(CdromId[3]);
+ buffer[4] = '_';
+ buffer[5] = CdromId[4];
+ buffer[6] = CdromId[5];
+ buffer[7] = CdromId[6];
+ buffer[8] = '.';
+ buffer[9] = CdromId[7];
+ buffer[10] = CdromId[8];
+ buffer[11] = '\0';
+
+ sprintf(szPPF, "%s%s", Config.PatchesDir, buffer);
+ fname = szPPF;
+ }
+ ppffile = fopen(fname, "rb");
if (ppffile == NULL) return;
memset(buffer, 0, 5);
goto fail_io;
if (strcmp(buffer, "PPF") != 0) {
- SysPrintf(_("Invalid PPF patch: %s.\n"), szPPF);
+ SysPrintf(_("Invalid PPF patch: %s.\n"), fname);
fclose(ppffile);
return;
}
FillPPFCache(); // build address array
- SysPrintf(_("Loaded PPF %d.0 patch: %s.\n"), method + 1, szPPF);
+ SysPrintf(_("Loaded PPF %d.0 patch: %s.\n"), method + 1, fname);
+ return;
fail_io:
#ifndef NDEBUG
extern "C" {
#endif
-void BuildPPFCache();
+void BuildPPFCache(const char *fname);
void FreePPFCache();
void CheckPPFCache(unsigned char *pB, unsigned char m, unsigned char s, unsigned char f);
#define CARD_HARDLER_READM 0x5689 // fake, for psxBios_read()
#define CARD_HARDLER_INFO 0x5B64
-#define HLEOP(n) SWAPu32((0x3b << 26) | (n));
+#define HLEOP(n) SWAPu32((0x3bu << 26) | (n));
static u8 loadRam8(u32 addr)
{
static int returned_from_exception(void)
{
// 0x80000080 means it took another exception just after return
- return pc0 == k0 || pc0 == 0x80000080;
+ return pc0 == k0 || pc0 == 0x80000080
+#ifdef LIGHTREC
+ // lightrec doesn't return at 0x80000080, so look
+ // for the next block too
+ || pc0 == A_EXCEPTION
+#endif
+ ;
+}
+
+int psxBiosSoftcallEnded(void)
+{
+ return pc0 == 0x80001000 || returned_from_exception();
}
+// TODO: get rid of this softCall() thing as recursive cpu calls cause
+// complications with dynarecs
static inline void softCall(u32 pc) {
u32 sra = ra;
u32 ssr = psxRegs.CP0.n.SR;
psxRegs.cpuInRecursion++;
psxCpu->Notify(R3000ACPU_NOTIFY_AFTER_LOAD, PTR_1);
- while (pc0 != 0x80001000 && ++lim < 0x100000)
- psxCpu->ExecuteBlock(EXEC_CALLER_HLE);
+ while (!psxBiosSoftcallEnded() && ++lim < 0x100000)
+ psxCpu->ExecuteBlock(&psxRegs, EXEC_CALLER_HLE);
psxCpu->Notify(R3000ACPU_NOTIFY_BEFORE_SAVE, PTR_1);
psxRegs.cpuInRecursion--;
- if (lim == 0x100000)
- PSXBIOS_LOG("softCall @%x hit lim\n", pc);
+ if (pc0 != 0x80001000)
+ log_unhandled("%s @%x did not return (@%x cnt=%d)\n",
+ __func__, pc, pc0, lim);
ra = sra;
psxRegs.CP0.n.SR |= ssr & 0x404;
}
psxRegs.cpuInRecursion++;
psxCpu->Notify(R3000ACPU_NOTIFY_AFTER_LOAD, PTR_1);
- while (!returned_from_exception() && pc0 != 0x80001000 && ++lim < 0x100000)
- psxCpu->ExecuteBlock(EXEC_CALLER_HLE);
+ while (!psxBiosSoftcallEnded() && ++lim < 0x100000)
+ psxCpu->ExecuteBlock(&psxRegs, EXEC_CALLER_HLE);
psxCpu->Notify(R3000ACPU_NOTIFY_BEFORE_SAVE, PTR_1);
psxRegs.cpuInRecursion--;
- if (lim == 0x100000)
- PSXBIOS_LOG("softCallInException @%x hit lim\n", pc);
+ if (pc0 != 0x80001000 && !psxBiosSoftcallEnded())
+ log_unhandled("%s @%x did not return (@%x cnt=%d)\n",
+ __func__, pc, pc0, lim);
if (pc0 == 0x80001000)
ra = sra;
}
{
u8 r = 0;
if (psxRegs.interrupt & (1u << PSXINT_CDR)) {
- if ((s32)(psxRegs.cycle - event_cycles[PSXINT_CDR]) < 0)
- psxRegs.cycle = event_cycles[PSXINT_CDR] + 1;
+ if ((s32)(psxRegs.cycle - psxRegs.event_cycles[PSXINT_CDR]) < 0)
+ psxRegs.cycle = psxRegs.event_cycles[PSXINT_CDR] + 1;
irq_test(&psxRegs.CP0);
}
if (do_ack) {
// retrigger this hlecall after the next emulation event
pc0 -= 4;
- if ((s32)(next_interupt - psxRegs.cycle) > 0)
- psxRegs.cycle = next_interupt;
+ if ((s32)(psxRegs.next_interupt - psxRegs.cycle) > 0)
+ psxRegs.cycle = psxRegs.next_interupt;
psxBranchTest();
}
biosA0[0x03] = biosB0[0x35] = psxBios_write_psxout;
biosA0[0x3c] = biosB0[0x3d] = psxBios_putchar_psxout;
biosA0[0x3e] = biosB0[0x3f] = psxBios_puts_psxout;
- biosA0[0x3f] = psxBios_printf_psxout;
+ // calls putchar() internally so no need to override
+ //biosA0[0x3f] = psxBios_printf_psxout;
if (!Config.HLE) {
char verstr[0x24+1];
if (cycles_passed < 10 || cycles_passed > 50 || v0 != v0_expect)
return;
- waste_cycles = schedule_timeslice() - psxRegs.cycle;
+ waste_cycles = schedule_timeslice(&psxRegs) - psxRegs.cycle;
loops = waste_cycles / cycles_passed;
if (loops > v0)
loops = v0;
void psxBiosSetupBootState(void);
void psxBiosCheckExe(u32 t_addr, u32 t_size, int loading_state);
void psxBiosCheckBranch(void);
+int psxBiosSoftcallEnded(void);
extern void (*biosA0[256])();
extern void (**biosB0)();
void __Log(char *fmt, ...);
+// lots of timing depends on this and makes or breaks compatibility,
+// don't change unless you're going to retest hundreds of games
#define CYCLE_MULT_DEFAULT 175
typedef struct {
char Gpu[MAXPATHLEN];
char Spu[MAXPATHLEN];
- char Cdr[MAXPATHLEN];
- char Pad1[MAXPATHLEN];
- char Pad2[MAXPATHLEN];
- char Net[MAXPATHLEN];
char Sio1[MAXPATHLEN];
char Mcd1[MAXPATHLEN];
char Mcd2[MAXPATHLEN];
boolean Mdec;
boolean PsxAuto;
boolean Cdda;
- boolean AsyncCD;
boolean CHD_Precache; /* loads disk image into memory, works with CHD only. */
boolean HLE;
boolean SlowBoot;
boolean Debug;
boolean PsxOut;
- boolean UseNet;
boolean icache_emulation;
boolean DisableStalls;
boolean PreciseExceptions;
+ boolean TurboCD;
int cycle_multiplier; // 100 for 1.0
int cycle_multiplier_override;
+ int gpu_timing_override;
s8 GpuListWalking;
s8 FractionalFramerate; // ~49.75 and ~59.81 instead of 50 and 60
u8 Cpu; // CPU_DYNAREC or CPU_INTERPRETER
boolean gpu_slow_list_walking;
boolean gpu_centering;
boolean dualshock_init_analog;
- boolean gpu_timing1024;
boolean fractional_Framerate;
+ boolean f1;
} hacks;
} PcsxConfig;
extern PcsxConfig Config;
-extern boolean NetOpened;
struct PcsxSaveFuncs {
void *(*open)(const char *name, const char *mode);
#ifdef DRC_DISABLE
Rcnt rcnts[ CounterQuantity ];
#endif
-u32 hSyncCount = 0;
-u32 frame_counter = 0;
+unsigned int hSyncCount = 0;
+unsigned int frame_counter = 0;
static u32 hsync_steps = 0;
-u32 psxNextCounter = 0, psxNextsCounter = 0;
-
/******************************************************************************/
#define FPS_FRACTIONAL_PAL (53203425/314./3406) // ~49.75
s32 countToUpdate;
u32 i;
- psxNextsCounter = psxRegs.cycle;
- psxNextCounter = 0x7fffffff;
+ psxRegs.psxNextsCounter = psxRegs.cycle;
+ psxRegs.psxNextCounter = 0x7fffffff;
for( i = 0; i < CounterQuantity; ++i )
{
- countToUpdate = rcnts[i].cycle - (psxNextsCounter - rcnts[i].cycleStart);
+ countToUpdate = rcnts[i].cycle - (psxRegs.psxNextsCounter - rcnts[i].cycleStart);
if( countToUpdate < 0 )
{
- psxNextCounter = 0;
+ psxRegs.psxNextCounter = 0;
break;
}
- if( countToUpdate < (s32)psxNextCounter )
+ if( countToUpdate < (s32)psxRegs.psxNextCounter )
{
- psxNextCounter = countToUpdate;
+ psxRegs.psxNextCounter = countToUpdate;
}
}
- set_event(PSXINT_RCNT, psxNextCounter);
+ set_event(PSXINT_RCNT, psxRegs.psxNextCounter);
}
/******************************************************************************/
gzfreeze( &rcnts, sizeof(Rcnt) * CounterQuantity );
gzfreeze( &hSyncCount, sizeof(hSyncCount) );
gzfreeze( &spuSyncCount, sizeof(spuSyncCount) );
- gzfreeze( &psxNextCounter, sizeof(psxNextCounter) );
- gzfreeze( &psxNextsCounter, sizeof(psxNextsCounter) );
+ gzfreeze( &psxRegs.psxNextCounter, sizeof(psxRegs.psxNextCounter) );
+ gzfreeze( &psxRegs.psxNextsCounter, sizeof(psxRegs.psxNextsCounter) );
if (Mode == 0)
{
#include "psxmem.h"
#include "plugins.h"
-extern u32 psxNextCounter, psxNextsCounter;
-
-extern u32 hSyncCount, frame_counter;
+extern unsigned int hSyncCount, frame_counter;
typedef struct Rcnt
{
void psxDma2(u32 madr, u32 bcr, u32 chcr) { // GPU
u32 *ptr, madr_next, *madr_next_p;
u32 words, words_left, words_max, words_copy;
- int cycles_sum, cycles_last_cmd = 0, do_walking;
+ s32 cycles_last_cmd = 0;
+ int do_walking;
+ long cycles_sum;
madr &= ~3;
switch (chcr) {
case 0x01000200: // vram2mem
- PSXDMA_LOG("*** DMA2 GPU - vram2mem *** %lx addr = %lx size = %lx\n", chcr, madr, bcr);
+ PSXDMA_LOG("*** DMA2 GPU - vram2mem *** %x addr = %x size = %x\n", chcr, madr, bcr);
ptr = getDmaRam(madr, &words_max);
if (ptr == INVALID_PTR) {
log_unhandled("bad dma2 madr %x\n", madr);
return;
case 0x01000201: // mem2vram
- PSXDMA_LOG("*** DMA 2 - GPU mem2vram *** %lx addr = %lx size = %lx\n", chcr, madr, bcr);
+ PSXDMA_LOG("*** DMA 2 - GPU mem2vram *** %x addr = %x size = %x\n", chcr, madr, bcr);
words = words_left = (bcr >> 16) * (bcr & 0xffff);
while (words_left > 0) {
ptr = getDmaRam(madr, &words_max);
return;
case 0x01000401: // dma chain
- PSXDMA_LOG("*** DMA 2 - GPU dma chain *** %lx addr = %lx size = %lx\n", chcr, madr, bcr);
+ PSXDMA_LOG("*** DMA 2 - GPU dma chain *** %x addr = %x size = %x\n", chcr, madr, bcr);
// when not emulating walking progress, end immediately
+ // (some games abort the dma and read madr so break out of that logic)
madr_next = 0xffffff;
do_walking = Config.GpuListWalking;
- if (do_walking < 0 || Config.hacks.gpu_timing1024)
+ if (do_walking < 0)
do_walking = Config.hacks.gpu_slow_list_walking;
madr_next_p = do_walking ? &madr_next : NULL;
HW_DMA2_MADR = SWAPu32(madr_next);
- // a hack for Judge Dredd which is annoyingly sensitive to timing
- if (Config.hacks.gpu_timing1024)
- cycles_sum = 1024;
+ // timing hack with some lame heuristics
+ if (Config.gpu_timing_override && (do_walking || cycles_sum > 64)
+ && !(HW_GPU_STATUS & SWAP32(PSXGPU_DHEIGHT | PSXGPU_RGB24)))
+ cycles_sum = Config.gpu_timing_override;
psxRegs.gpuIdleAfter = psxRegs.cycle + cycles_sum + cycles_last_cmd;
set_event(PSXINT_GPUDMA, cycles_sum);
- //printf("%u dma2cf: %6d,%4d %08x %08x %08x %08x\n", psxRegs.cycle,
+ //printf("%u dma2cf: %6ld,%4d %08x %08x %08x %08x\n", psxRegs.cycle,
// cycles_sum, cycles_last_cmd, madr, bcr, chcr, HW_DMA2_MADR);
return;
void gpuInterrupt() {
if (HW_DMA2_CHCR == SWAP32(0x01000401) && !(HW_DMA2_MADR & SWAP32(0x800000)))
{
- u32 madr_next = 0xffffff, madr = SWAPu32(HW_DMA2_MADR);
- int cycles_sum, cycles_last_cmd = 0;
- cycles_sum = GPU_dmaChain((u32 *)psxM, madr & 0x1fffff,
- &madr_next, &cycles_last_cmd);
+ u32 madr_next = SWAPu32(HW_DMA2_MADR);
+ s32 cycles_sum = psxRegs.gpuIdleAfter - psxRegs.cycle;
+ s32 cycles_last_cmd = 0;
+
+ do {
+ cycles_sum += cycles_last_cmd;
+ cycles_sum += GPU_dmaChain((u32 *)psxM, madr_next & 0x1fffff,
+ &madr_next, &cycles_last_cmd);
+ }
+ while (cycles_sum <= 0 && !(madr_next & 0x800000));
HW_DMA2_MADR = SWAPu32(madr_next);
- if ((s32)(psxRegs.gpuIdleAfter - psxRegs.cycle) > 0)
- cycles_sum += psxRegs.gpuIdleAfter - psxRegs.cycle;
psxRegs.gpuIdleAfter = psxRegs.cycle + cycles_sum + cycles_last_cmd;
set_event(PSXINT_GPUDMA, cycles_sum);
//printf("%u dma2cn: %6d,%4d %08x\n", psxRegs.cycle, cycles_sum,
- // cycles_last_cmd, HW_DMA2_MADR);
+ // cycles_last_cmd, HW_DMA2_MADR);
return;
}
if (HW_DMA2_CHCR & SWAP32(0x01000000))
+#include <stddef.h>
#include <stdio.h>
#include "r3000a.h"
#include "cdrom.h"
#include "mdec.h"
#include "psxevents.h"
-extern int pending_exception;
-
//#define evprintf printf
#define evprintf(...)
-u32 event_cycles[PSXINT_COUNT];
+static psxRegisters *cp0TOpsxRegs(psxCP0Regs *cp0)
+{
+#ifndef LIGHTREC
+ return (void *)((char *)cp0 - offsetof(psxRegisters, CP0));
+#else
+ // lightrec has it's own cp0
+ return &psxRegs;
+#endif
+}
-u32 schedule_timeslice(void)
+u32 schedule_timeslice(psxRegisters *regs)
{
- u32 i, c = psxRegs.cycle;
- u32 irqs = psxRegs.interrupt;
+ u32 i, c = regs->cycle;
+ u32 irqs = regs->interrupt;
s32 min, dif;
min = PSXCLK;
for (i = 0; irqs != 0; i++, irqs >>= 1) {
if (!(irqs & 1))
continue;
- dif = event_cycles[i] - c;
+ dif = regs->event_cycles[i] - c;
//evprintf(" ev %d\n", dif);
if (0 < dif && dif < min)
min = dif;
}
- next_interupt = c + min;
- return next_interupt;
+ regs->next_interupt = c + min;
+ return regs->next_interupt;
}
static void irqNoOp() {
[PSXINT_RCNT] = psxRcntUpdate,
};
-/* local dupe of psxBranchTest, using event_cycles */
void irq_test(psxCP0Regs *cp0)
{
- u32 cycle = psxRegs.cycle;
+ psxRegisters *regs = cp0TOpsxRegs(cp0);
+ u32 cycle = regs->cycle;
u32 irq, irq_bits;
- for (irq = 0, irq_bits = psxRegs.interrupt; irq_bits != 0; irq++, irq_bits >>= 1) {
+ for (irq = 0, irq_bits = regs->interrupt; irq_bits != 0; irq++, irq_bits >>= 1) {
if (!(irq_bits & 1))
continue;
- if ((s32)(cycle - event_cycles[irq]) >= 0) {
- // note: irq_funcs() also modify psxRegs.interrupt
- psxRegs.interrupt &= ~(1u << irq);
+ if ((s32)(cycle - regs->event_cycles[irq]) >= 0) {
+ // note: irq_funcs() also modify regs->interrupt
+ regs->interrupt &= ~(1u << irq);
irq_funcs[irq]();
}
}
cp0->n.Cause &= ~0x400;
if (psxHu32(0x1070) & psxHu32(0x1074))
cp0->n.Cause |= 0x400;
- if (((cp0->n.Cause | 1) & cp0->n.SR & 0x401) == 0x401) {
+ if (((cp0->n.Cause | 1) & cp0->n.SR & 0x401) == 0x401)
psxException(0, 0, cp0);
- pending_exception = 1;
- }
}
void gen_interupt(psxCP0Regs *cp0)
{
- evprintf(" +ge %08x, %u->%u (%d)\n", psxRegs.pc, psxRegs.cycle,
- next_interupt, next_interupt - psxRegs.cycle);
+ psxRegisters *regs = cp0TOpsxRegs(cp0);
- irq_test(cp0);
- //pending_exception = 1;
+ evprintf(" +ge %08x, %u->%u (%d)\n", regs->pc, regs->cycle,
+ regs->next_interupt, regs->next_interupt - regs->cycle);
- schedule_timeslice();
+ irq_test(cp0);
+ schedule_timeslice(regs);
- evprintf(" -ge %08x, %u->%u (%d)\n", psxRegs.pc, psxRegs.cycle,
- next_interupt, next_interupt - psxRegs.cycle);
+ evprintf(" -ge %08x, %u->%u (%d)\n", regs->pc, regs->cycle,
+ regs->next_interupt, regs->next_interupt - regs->cycle);
}
void events_restore(void)
{
int i;
for (i = 0; i < PSXINT_COUNT; i++)
- event_cycles[i] = psxRegs.intCycle[i].sCycle + psxRegs.intCycle[i].cycle;
+ psxRegs.event_cycles[i] = psxRegs.intCycle[i].sCycle + psxRegs.intCycle[i].cycle;
- event_cycles[PSXINT_RCNT] = psxNextsCounter + psxNextCounter;
+ psxRegs.event_cycles[PSXINT_RCNT] = psxRegs.psxNextsCounter + psxRegs.psxNextCounter;
psxRegs.interrupt |= 1 << PSXINT_RCNT;
psxRegs.interrupt &= (1 << PSXINT_COUNT) - 1;
}
PSXINT_COUNT
};
-extern u32 event_cycles[PSXINT_COUNT];
-extern u32 next_interupt;
-extern int stop;
-
#define set_event_raw_abs(e, abs) { \
u32 abs_ = abs; \
- s32 di_ = next_interupt - abs_; \
- event_cycles[e] = abs_; \
+ s32 di_ = psxRegs.next_interupt - abs_; \
+ psxRegs.event_cycles[e] = abs_; \
if (di_ > 0) { \
- /*printf("%u: next_interupt %u -> %u\n", psxRegs.cycle, next_interupt, abs_);*/ \
- next_interupt = abs_; \
+ /*printf("%u: next_interupt %u -> %u\n", psxRegs.cycle, psxRegs.next_interupt, abs_);*/ \
+ psxRegs.next_interupt = abs_; \
} \
}
} while (0)
union psxCP0Regs_;
-u32 schedule_timeslice(void);
+struct psxRegisters;
+
+u32 schedule_timeslice(struct psxRegisters *regs);
void irq_test(union psxCP0Regs_ *cp0);
void gen_interupt(union psxCP0Regs_ *cp0);
void events_restore(void);
#include "mdec.h"
#include "cdrom.h"
#include "gpu.h"
+#include "../include/compiler_features.h"
void psxHwReset() {
memset(psxH, 0, 0x10000);
if ((value ^ old) & 0x01000000) { \
if (!(value & 0x01000000)) \
abort_func; \
- else if (SWAPu32(HW_DMA_PCR) & (8u << (n * 4))) \
+ else if (HW_DMA_PCR & SWAPu32(8u << (n * 4))) \
psxDma##n(SWAPu32(HW_DMA##n##_MADR), SWAPu32(HW_DMA##n##_BCR), value); \
} \
}
make_dma_func(4,)
make_dma_func(6,)
+void psxHwWriteDmaPcr32(u32 value)
+{
+ // todo: can this also pause/stop live dma?
+ u32 on = (SWAPu32(HW_DMA_PCR) ^ value) & value & 0x08888888;
+ u32 chcr;
+ HW_DMA_PCR = SWAPu32(value);
+ if (likely(!on))
+ return;
+ #define DO(n) \
+ chcr = SWAPu32(HW_DMA##n##_CHCR); \
+ if ((on & (8u << 4*n)) && (chcr & 0x01000000)) \
+ psxDma##n(SWAPu32(HW_DMA##n##_MADR), SWAPu32(HW_DMA##n##_BCR), chcr)
+ DO(0);
+ DO(1);
+ // breaks Kyuutenkai. Probably needs better timing or
+ // proper gpu side dma enable handling
+ //DO(2);
+ if ((on & (8u << 4*2)) && (SWAPu32(HW_DMA2_CHCR) & 0x01000000))
+ log_unhandled("dma2 pcr write ignored\n");
+ DO(3);
+ DO(4);
+ DO(6);
+ #undef DO
+}
+
void psxHwWriteDmaIcr32(u32 value)
{
u32 tmp = value & 0x00ff803f;
case 0x10cc: psxHwWriteChcr4(value); return;
case 0x10e8: // DMA6 chcr (OT clear)
case 0x10ec: psxHwWriteChcr6(value); return;
+ case 0x10f0: psxHwWriteDmaPcr32(value); return;
case 0x10f4: psxHwWriteDmaIcr32(value); return;
// forced write32 with no immediate effect:
case 0x10c0:
case 0x10d0:
case 0x10e0:
- case 0x10f0:
psxHu32ref(add) = SWAPu32(value);
return;
case 0x10cc: psxHwWriteChcr4(value); return;
case 0x10e8: // DMA6 chcr (OT clear)
case 0x10ec: psxHwWriteChcr6(value); return;
+ case 0x10f0: psxHwWriteDmaPcr32(value); return;
case 0x10f4: psxHwWriteDmaIcr32(value); return;
case 0x1810: GPU_writeData(value); return;
#define HW_DMA_PCR (psxHu32ref(0x10f0))
#define HW_DMA_ICR (psxHu32ref(0x10f4))
-#define HW_DMA_ICR_BUS_ERROR (1<<15)
-#define HW_DMA_ICR_GLOBAL_ENABLE (1<<23)
-#define HW_DMA_ICR_IRQ_SENT (1<<31)
+#define HW_DMA_ICR_BUS_ERROR (1u << 15)
+#define HW_DMA_ICR_GLOBAL_ENABLE (1u << 23)
+#define HW_DMA_ICR_IRQ_SENT (1u << 31)
#define DMA_INTERRUPT(n) { \
u32 icr = SWAPu32(HW_DMA_ICR); \
- if (icr & (1 << (16 + n))) { \
- icr |= 1 << (24 + n); \
+ if (icr & (1u << (16 + n))) { \
+ icr |= 1u << (24 + n); \
if (icr & HW_DMA_ICR_GLOBAL_ENABLE && !(icr & HW_DMA_ICR_IRQ_SENT)) { \
psxHu32ref(0x1070) |= SWAP32(8); \
icr |= HW_DMA_ICR_IRQ_SENT; \
void psxHwWriteChcr3(u32 value);
void psxHwWriteChcr4(u32 value);
void psxHwWriteChcr6(u32 value);
+void psxHwWriteDmaPcr32(u32 value);
void psxHwWriteDmaIcr32(u32 value);
void psxHwWriteGpuSR(u32 value);
u32 psxHwReadGpuSR(void);
#define DO_EXCEPTION_RESERVEDI
#define HANDLE_LOAD_DELAY
-static int branchSeen = 0;
-
#ifdef __i386__
#define INT_ATTR __attribute__((regparm(2)))
#else
cp0->n.Cause |= (regs->branching << 30) | (R3000E_Bp << 2);
cp0->n.SR = (cp0->n.SR & ~0x3f) | ((cp0->n.SR & 0x0f) << 2);
cp0->n.EPC = regs->branching ? pc - 4 : pc;
- psxRegs.pc = 0x80000040;
+ regs->pc = 0x80000040;
}
static int execBreakCheck(psxRegisters *regs, u32 pc)
static void doBranch(psxRegisters *regs, u32 tar, enum R3000Abdt taken) {
u32 code, pc, pc_final;
- branchSeen = regs->branching = taken;
+ regs->branchSeen = regs->branching = taken;
pc_final = taken == R3000A_BRANCH_TAKEN ? tar : regs->pc + 4;
// fetch the delay slot
return 0;
}
if (unlikely(BUS_LOCKED_ADDR(addr))) {
+ log_unhandled("bus error read addr=%08x @%08x ra=%08x\n",
+ addr, regs->pc - 4, regs->GPR.n.ra);
intException(regs, regs->pc - 4, R3000E_DBE << 2);
return 0;
}
return 0;
}
if (unlikely(BUS_LOCKED_ADDR(addr))) {
+ log_unhandled("bus error write addr=%08x @%08x ra=%08x\n",
+ addr, regs->pc - 4, regs->GPR.n.ra);
intException(regs, regs->pc - 4, R3000E_DBE << 2);
return 0;
}
// SysPrintf("MTC0 %d: %x\n", reg, val);
switch (reg) {
case 12: // SR
- if (unlikely((regs_->CP0.n.SR ^ val) & (1 << 16)))
+ if (unlikely((regs_->CP0.n.SR ^ val) & (1u << 16)))
psxMemOnIsolate((val >> 16) & 1);
- if (unlikely((regs_->CP0.n.SR ^ val) & (7 << 29)))
+ if (unlikely((regs_->CP0.n.SR ^ val) & (7u << 29)))
setupCop(val);
regs_->CP0.n.SR = val;
psxTestSWInts(regs_, 1);
}
dloadFlush(regs_);
psxHLEt[hleCode]();
- branchSeen = 1;
+ regs_->branchSeen = 1;
}
static void (INT_ATTR *psxBSC[64])(psxRegisters *regs_, u32 code) = {
///////////////////////////////////////////
static int intInit() {
+ intApplyConfig();
return 0;
}
psxBSC[regs->code >> 26](regs, regs->code);
}
-static void intExecute() {
- psxRegisters *regs_ = &psxRegs;
+static void intExecute(psxRegisters *regs) {
+ u8 **memRLUT = psxMemRLUT;
+
+ while (!regs->stop)
+ execI_(memRLUT, regs);
+}
+
+static void intExecuteBp(psxRegisters *regs) {
u8 **memRLUT = psxMemRLUT;
- extern int stop;
- while (!stop)
- execI_(memRLUT, regs_);
+ while (!regs->stop)
+ execIbp(memRLUT, regs);
}
-static void intExecuteBp() {
- psxRegisters *regs_ = &psxRegs;
+static void intExecuteBlock(psxRegisters *regs, enum blockExecCaller caller) {
u8 **memRLUT = psxMemRLUT;
- extern int stop;
- while (!stop)
- execIbp(memRLUT, regs_);
+ regs->branchSeen = 0;
+ while (!regs->branchSeen)
+ execI_(memRLUT, regs);
}
-void intExecuteBlock(enum blockExecCaller caller) {
- psxRegisters *regs_ = &psxRegs;
+static void intExecuteBlockBp(psxRegisters *regs, enum blockExecCaller caller) {
u8 **memRLUT = psxMemRLUT;
- branchSeen = 0;
- while (!branchSeen)
- execI_(memRLUT, regs_);
+ regs->branchSeen = 0;
+ while (!regs->branchSeen)
+ execIbp(memRLUT, regs);
}
static void intClear(u32 Addr, u32 Size) {
setupCop(psxRegs.CP0.n.SR);
// fallthrough
case R3000ACPU_NOTIFY_CACHE_ISOLATED: // Armored Core?
- memset(&ICache, 0xff, sizeof(ICache));
+ if (fetch == fetchICache)
+ memset(&ICache, 0xff, sizeof(ICache));
break;
case R3000ACPU_NOTIFY_CACHE_UNISOLATED:
break;
psxSPC[0x08] = psxJRe;
psxSPC[0x09] = psxJALRe;
psxInt.Execute = intExecuteBp;
+ psxInt.ExecuteBlock = intExecuteBlockBp;
} else {
psxBSC[0x20] = psxLB;
psxBSC[0x21] = psxLH;
psxSPC[0x08] = psxJR;
psxSPC[0x09] = psxJALR;
psxInt.Execute = intExecute;
+ psxInt.ExecuteBlock = intExecuteBlock;
}
// the dynarec may occasionally call the interpreter, in such a case the
// cache won't work (cache only works right if all fetches go through it)
- if (!Config.icache_emulation || psxCpu != &psxInt)
+ if (!Config.icache_emulation || psxCpu != &psxInt) {
fetch = fetchNoCache;
+ memset(&ICache, 0xff, sizeof(ICache));
+ }
else
fetch = fetchICache;
#ifndef __PSXINTERPRETER_H__
#define __PSXINTERPRETER_H__
+struct psxRegisters;
+struct psxCP2Regs;
+
// get an opcode without triggering exceptions or affecting cache
u32 intFakeFetch(u32 pc);
// called by "new_dynarec"
-void execI(psxRegisters *regs);
+void execI(struct psxRegisters *regs);
void intApplyConfig();
-void MTC0(psxRegisters *regs_, int reg, u32 val);
+void MTC0(struct psxRegisters *regs, int reg, u32 val);
void gteNULL(struct psxCP2Regs *regs);
extern void (*psxCP2[64])(struct psxCP2Regs *regs);
-// called by lightrec
-void intExecuteBlock(enum blockExecCaller caller);
-
#endif // __PSXINTERPRETER_H__
#endif
static void * psxMapDefault(unsigned long addr, size_t size,
- int is_fixed, enum psxMapTag tag)
+ enum psxMapTag tag, int *can_retry_addr)
{
void *ptr;
#if !P_HAVE_MMAP
+ *can_retry_addr = 0;
ptr = calloc(1, size);
return ptr ? ptr : MAP_FAILED;
#else
int flags = MAP_PRIVATE | MAP_ANONYMOUS;
+ *can_retry_addr = 1;
ptr = mmap((void *)(uintptr_t)addr, size,
PROT_READ | PROT_WRITE, flags, -1, 0);
#ifdef MADV_HUGEPAGE
if (size >= 2*1024*1024) {
if (ptr != MAP_FAILED && ((uintptr_t)ptr & (2*1024*1024 - 1))) {
- // try to manually realign assuming bottom-to-top alloc
+ // try to manually realign assuming decreasing addr alloc
munmap(ptr, size);
addr = (uintptr_t)ptr & ~(2*1024*1024 - 1);
ptr = mmap((void *)(uintptr_t)addr, size,
#endif
}
-void *(*psxMapHook)(unsigned long addr, size_t size, int is_fixed,
- enum psxMapTag tag) = psxMapDefault;
+void *(*psxMapHook)(unsigned long addr, size_t size,
+ enum psxMapTag tag, int *can_retry_addr) = psxMapDefault;
void (*psxUnmapHook)(void *ptr, size_t size,
enum psxMapTag tag) = psxUnmapDefault;
void *psxMap(unsigned long addr, size_t size, int is_fixed,
enum psxMapTag tag)
{
- int try_ = 0;
- unsigned long mask;
- void *ret;
+ int try_, can_retry_addr = 0;
+ void *ret = MAP_FAILED;
-retry:
- ret = psxMapHook(addr, size, 0, tag);
- if (ret == NULL)
- return MAP_FAILED;
-
- if (addr != 0 && ret != (void *)(uintptr_t)addr) {
- SysMessage("psxMap: warning: wanted to map @%08x, got %p\n",
- addr, ret);
-
- if (is_fixed) {
+ for (try_ = 0; try_ < 3; try_++)
+ {
+ if (ret != MAP_FAILED)
psxUnmap(ret, size, tag);
+ ret = psxMapHook(addr, size, tag, &can_retry_addr);
+ if (ret == MAP_FAILED)
return MAP_FAILED;
- }
- if (((addr ^ (unsigned long)(uintptr_t)ret) & ~0xff000000l) && try_ < 2)
- {
- psxUnmap(ret, size, tag);
+ if (addr != 0 && ret != (void *)(uintptr_t)addr) {
+ SysMessage("psxMap: tried to map @%08lx, got %p\n",
+ addr, ret);
+ if (is_fixed) {
+ psxUnmap(ret, size, tag);
+ return MAP_FAILED;
+ }
- // try to use similarly aligned memory instead
- // (recompiler needs this)
- mask = try_ ? 0xffff : 0xffffff;
- addr = ((uintptr_t)ret + mask) & ~mask;
- try_++;
- goto retry;
+ if (can_retry_addr && ((addr ^ (uintptr_t)ret) & ~0xff000000l)) {
+ unsigned long mask;
+
+ // try to use similarly aligned memory instead
+ // (recompiler prefers this)
+ mask = try_ ? 0xffff : 0xffffff;
+ addr = ((uintptr_t)ret + mask) & ~mask;
+ continue;
+ }
}
+ break;
}
return ret;
if (psxM == MAP_FAILED)
psxM = psxMap(0x77000000, 0x00210000, 0, MAP_TAG_RAM);
if (psxM == MAP_FAILED) {
- SysMessage(_("mapping main RAM failed"));
+ SysMessage("mapping main RAM failed");
psxM = NULL;
return -1;
}
psxH = psxMap(0x1f800000, 0x10000, 0, MAP_TAG_OTHER);
if (psxH == MAP_FAILED) {
- SysMessage(_("Error allocating memory!"));
- psxMemShutdown();
+ SysMessage("Error allocating psxH");
+ psxH = NULL;
return -1;
}
psxR = psxMap(0x1fc00000, 0x80000, 0, MAP_TAG_OTHER);
if (psxR == MAP_FAILED) {
- SysMessage(_("Error allocating memory!"));
- psxMemShutdown();
+ SysMessage("Error allocating psxR");
+ psxR = NULL;
return -1;
}
else
ret = psxMemInitMap();
if (ret) {
- SysMessage(_("Error allocating memory!"));
+ if (LIGHTREC_CUSTOM_MAP)
+ SysMessage("lightrec_init_mmap failed");
psxMemShutdown();
return -1;
}
psxMemWLUT = (u8 **)malloc(0x10000 * sizeof(void *));
if (psxMemRLUT == NULL || psxMemWLUT == NULL) {
- SysMessage(_("Error allocating memory!"));
+ SysMessage("Error allocating psxMem LUTs");
psxMemShutdown();
return -1;
}
if (f == NULL) {
SysMessage(_("Could not open BIOS:\"%s\". Enabling HLE Bios!\n"), bios);
- memset(psxR, 0, 0x80000);
} else {
if (fread(psxR, 1, 0x80000, f) == 0x80000) {
Config.HLE = FALSE;
fclose(f);
}
}
+ if (Config.HLE)
+ memset(psxR, 0, 0x80000);
}
void psxMemShutdown() {
MAP_TAG_LUTS,
};
-extern void *(*psxMapHook)(unsigned long addr, size_t size, int is_fixed,
- enum psxMapTag tag);
+extern void *(*psxMapHook)(unsigned long addr, size_t size,
+ enum psxMapTag tag, int *can_retry_addr);
extern void (*psxUnmapHook)(void *ptr, size_t size, enum psxMapTag tag);
void *psxMap(unsigned long addr, size_t size, int is_fixed,
#include "psxbios.h"
#include "psxevents.h"
#include "../include/compiler_features.h"
+#include <assert.h>
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
+#endif
R3000Acpu *psxCpu = NULL;
#ifdef DRC_DISABLE
#endif
int psxInit() {
+ assert(PSXINT_COUNT <= ARRAY_SIZE(psxRegs.intCycle));
+ assert(ARRAY_SIZE(psxRegs.intCycle) == ARRAY_SIZE(psxRegs.event_cycles));
+
#ifndef DRC_DISABLE
if (Config.Cpu == CPU_INTERPRETER) {
psxCpu = &psxInt;
void psxReset() {
boolean introBypassed = FALSE;
+ boolean oldhle = Config.HLE;
+
psxMemReset();
memset(&psxRegs, 0, sizeof(psxRegs));
psxRegs.CP0.n.SR &= ~(1u << 22); // RAM exception vector
}
+ if (Config.HLE != oldhle) {
+ // at least ari64 drc compiles differently so hard reset
+ psxCpu->Shutdown();
+ psxCpu->Init();
+ }
psxCpu->ApplyConfig();
psxCpu->Reset();
}
void psxBranchTest() {
- if ((psxRegs.cycle - psxNextsCounter) >= psxNextCounter)
+ if ((psxRegs.cycle - psxRegs.psxNextsCounter) >= psxRegs.psxNextCounter)
psxRcntUpdate();
irq_test(&psxRegs.CP0);
}
}
+int psxExecuteBiosEnded(void) {
+ return (psxRegs.pc & 0xff800000) == 0x80000000;
+}
+
void psxExecuteBios() {
int i;
for (i = 0; i < 5000000; i++) {
- psxCpu->ExecuteBlock(EXEC_CALLER_BOOT);
- if ((psxRegs.pc & 0xff800000) == 0x80000000)
+ psxCpu->ExecuteBlock(&psxRegs, EXEC_CALLER_BOOT);
+ if (psxExecuteBiosEnded())
break;
}
if (psxRegs.pc != 0x80030000)
enum blockExecCaller {
EXEC_CALLER_BOOT,
EXEC_CALLER_HLE,
+ EXEC_CALLER_OTHER,
};
+struct psxRegisters;
+
typedef struct {
int (*Init)();
void (*Reset)();
- void (*Execute)();
- void (*ExecuteBlock)(enum blockExecCaller caller); /* executes up to a jump */
+ void (*Execute)(struct psxRegisters *regs);
+ /* executes up to a jump */
+ void (*ExecuteBlock)(struct psxRegisters *regs, enum blockExecCaller caller);
void (*Clear)(u32 Addr, u32 Size);
void (*Notify)(enum R3000Anote note, void *data);
void (*ApplyConfig)();
psxCP2Ctrl CP2C; /* Cop2 control registers */
} psxCP2Regs;
-typedef struct {
+typedef struct psxRegisters {
// note: some cores like lightrec don't keep their data here,
// so use R3000ACPU_NOTIFY_BEFORE_SAVE to sync
psxGPRRegs GPR; /* General Purpose Registers */
u32 code; /* The instruction */
u32 cycle;
u32 interrupt;
- struct { u32 sCycle, cycle; } intCycle[32];
+ struct { u32 sCycle, cycle; } intCycle[20];
+ u32 event_cycles[20];
+ u32 psxNextCounter;
+ u32 psxNextsCounter;
+ u32 next_interupt; /* cycle */
+ u32 unused;
u32 gteBusyCycle;
u32 muldivBusyCycle;
u32 subCycle; /* interpreter cycle counting */
u32 subCycleStep;
u32 biuReg;
+ u8 stop;
+ u8 branchSeen; /* interp. */
u8 branching; /* interp. R3000A_BRANCH_TAKEN / not, 0 if not branch */
u8 dloadSel; /* interp. delay load state */
u8 dloadReg[2];
+ u8 unused2[2];
u32 dloadVal[2];
u32 biosBranchCheck;
u32 cpuInRecursion;
u32 gpuIdleAfter;
- u32 reserved[1];
+ u32 unused3[2];
// warning: changing anything in psxRegisters requires update of all
- // asm in libpcsxcore/new_dynarec/
+ // asm in libpcsxcore/new_dynarec/ and may break savestates
} psxRegisters;
extern psxRegisters psxRegs;
/* new_dynarec stuff */
-void new_dyna_freeze(void *f, int mode);
+void ndrc_freeze(void *f, int mode);
+void ndrc_clear_full(void);
int psxInit();
void psxReset();
void psxException(u32 code, enum R3000Abdt bdt, psxCP0Regs *cp0);
void psxBranchTest();
void psxExecuteBios();
+int psxExecuteBiosEnded(void);
void psxJumpTest();
void irq10Interrupt();
int i = 0, j;
f = fopen(mcd, "wb");
- if (f == NULL)
+ if (f == NULL) {
+ SysPrintf("CreateMcd: couldn't open %s\n", mcd);
return;
+ }
if (stat(mcd, &buf) != -1) {
if ((buf.st_size == MCD_SIZE + 3904) || strstr(mcd, ".gme")) {
int SysInit(); // Init mem and plugins
void SysReset(); // Resets mem
-void SysPrintf(const char *fmt, ...); // Printf used by bios syscalls
-void SysMessage(const char *fmt, ...); // Message used to print msg to users
+void SysPrintf(const char *fmt, ...)
+#if defined(__GNUC__) && defined(__x86_64__) // some platforms have int32_t as long
+ __attribute__((format(printf, 1, 2)))
+#endif
+ ;
+void SysMessage(const char *fmt, ...) // Message used to print msg to users
+#if defined(__GNUC__) && defined(__x86_64__)
+ __attribute__((format(printf, 1, 2)))
+#endif
+ ;
void *SysLoadLibrary(const char *lib); // Loads Library
void *SysLoadSym(void *lib, const char *sym); // Loads Symbol from Library
const char *SysLibError(); // Gets previous error loading sysbols
// might also need more delay like in set_dma_end()\r
do_irq_io(irq_after * 4);\r
}\r
- for (i = 0; i < 24; i++) {\r
+ for (i = 0; i < MAXCHAN; i++) {\r
size_t ediff, p = spu.s_chan[i].pCurr - spu.spuMemC;\r
if (spu.s_chan[i].ADSRX.State == ADSR_RELEASE && !spu.s_chan[i].ADSRX.EnvelopeVol)\r
continue;\r
int VolLeft;\r
int VolRight;\r
\r
- int FB_SRC_A; // (offset)\r
- int FB_SRC_B; // (offset)\r
- int IIR_ALPHA; // (coef.)\r
- int ACC_COEF_A; // (coef.)\r
- int ACC_COEF_B; // (coef.)\r
- int ACC_COEF_C; // (coef.)\r
- int ACC_COEF_D; // (coef.)\r
- int IIR_COEF; // (coef.)\r
- int FB_ALPHA; // (coef.)\r
- int FB_X; // (coef.)\r
- int IIR_DEST_A0; // (offset)\r
- int IIR_DEST_A1; // (offset)\r
- int ACC_SRC_A0; // (offset)\r
- int ACC_SRC_A1; // (offset)\r
- int ACC_SRC_B0; // (offset)\r
- int ACC_SRC_B1; // (offset)\r
- int IIR_SRC_A0; // (offset)\r
- int IIR_SRC_A1; // (offset)\r
- int IIR_DEST_B0; // (offset)\r
- int IIR_DEST_B1; // (offset)\r
- int ACC_SRC_C0; // (offset)\r
- int ACC_SRC_C1; // (offset)\r
- int ACC_SRC_D0; // (offset)\r
- int ACC_SRC_D1; // (offset)\r
- int IIR_SRC_B1; // (offset)\r
- int IIR_SRC_B0; // (offset)\r
- int MIX_DEST_A0; // (offset)\r
- int MIX_DEST_A1; // (offset)\r
- int MIX_DEST_B0; // (offset)\r
- int MIX_DEST_B1; // (offset)\r
- int IN_COEF_L; // (coef.)\r
- int IN_COEF_R; // (coef.)\r
-\r
- int dirty; // registers changed\r
-\r
- // MIX_DEST_xx - FB_SRC_x\r
- int FB_SRC_A0, FB_SRC_A1, FB_SRC_B0, FB_SRC_B1;\r
+ // directly from nocash docs\r
+ //int dAPF1; // 1DC0 disp Reverb APF Offset 1\r
+ //int dAPF2; // 1DC2 disp Reverb APF Offset 2\r
+ int vIIR; // 1DC4 volume Reverb Reflection Volume 1\r
+ int vCOMB1; // 1DC6 volume Reverb Comb Volume 1\r
+ int vCOMB2; // 1DC8 volume Reverb Comb Volume 2\r
+ int vCOMB3; // 1DCA volume Reverb Comb Volume 3\r
+ int vCOMB4; // 1DCC volume Reverb Comb Volume 4\r
+ int vWALL; // 1DCE volume Reverb Reflection Volume 2\r
+ int vAPF1; // 1DD0 volume Reverb APF Volume 1\r
+ int vAPF2; // 1DD2 volume Reverb APF Volume 2\r
+ int mLSAME; // 1DD4 src/dst Reverb Same Side Reflection Address 1 Left\r
+ int mRSAME; // 1DD6 src/dst Reverb Same Side Reflection Address 1 Right\r
+ int mLCOMB1; // 1DD8 src Reverb Comb Address 1 Left\r
+ int mRCOMB1; // 1DDA src Reverb Comb Address 1 Right\r
+ int mLCOMB2; // 1DDC src Reverb Comb Address 2 Left\r
+ int mRCOMB2; // 1DDE src Reverb Comb Address 2 Right\r
+ int dLSAME; // 1DE0 src Reverb Same Side Reflection Address 2 Left\r
+ int dRSAME; // 1DE2 src Reverb Same Side Reflection Address 2 Right\r
+ int mLDIFF; // 1DE4 src/dst Reverb Different Side Reflect Address 1 Left\r
+ int mRDIFF; // 1DE6 src/dst Reverb Different Side Reflect Address 1 Right\r
+ int mLCOMB3; // 1DE8 src Reverb Comb Address 3 Left\r
+ int mRCOMB3; // 1DEA src Reverb Comb Address 3 Right\r
+ int mLCOMB4; // 1DEC src Reverb Comb Address 4 Left\r
+ int mRCOMB4; // 1DEE src Reverb Comb Address 4 Right\r
+ int dLDIFF; // 1DF0 src Reverb Different Side Reflect Address 2 Left\r
+ int dRDIFF; // 1DF2 src Reverb Different Side Reflect Address 2 Right\r
+ int mLAPF1; // 1DF4 src/dst Reverb APF Address 1 Left\r
+ int mRAPF1; // 1DF6 src/dst Reverb APF Address 1 Right\r
+ int mLAPF2; // 1DF8 src/dst Reverb APF Address 2 Left\r
+ int mRAPF2; // 1DFA src/dst Reverb APF Address 2 Right\r
+ int vLIN; // 1DFC volume Reverb Input Volume Left\r
+ int vRIN; // 1DFE volume Reverb Input Volume Right\r
+\r
+ // subtracted offsets\r
+ int mLAPF1_dAPF1, mRAPF1_dAPF1, mLAPF2_dAPF2, mRAPF2_dAPF2;\r
+\r
+ int dirty; // registers changed\r
} REVERBInfo;\r
\r
///////////////////////////////////////////////////////////\r
typedef union\r
{\r
int SB[28 + 4 + 4];\r
+ int SB_rvb[2][4*2]; // for reverb filtering\r
struct {\r
int sample[28];\r
union {\r
\r
unsigned short regArea[0x400];\r
\r
- sample_buf sb[MAXCHAN];\r
+ sample_buf sb[MAXCHAN+1]; // last entry is used for reverb filter\r
int interpolation;\r
\r
#if P_HAVE_PTHREAD || defined(WANT_THREAD_CODE)\r
sample_buf * sb_thread;\r
- sample_buf sb_thread_[MAXCHAN];\r
+ sample_buf sb_thread_[MAXCHAN+1];\r
#endif\r
} SPUInfo;\r
\r
-#define regAreaGet(offset) \\r
+#define regAreaRef(offset) \\r
spu.regArea[((offset) - 0xc00) >> 1]\r
+#define regAreaGet(offset) \\r
+ regAreaRef(offset)\r
#define regAreaGetCh(ch, offset) \\r
spu.regArea[(((ch) << 4) | (offset)) >> 1]\r
\r
* *\r
***************************************************************************/\r
\r
+#include <stddef.h>\r
#include <assert.h>\r
#include "stdafx.h"\r
\r
uint32_t XARepeat;\r
uint32_t XALastVal;\r
uint32_t last_keyon_cycles;\r
+ uint32_t rvb_sb[2][4];\r
+ int32_t interpolation; // which interpolation's data is in SPUCHAN_orig::SB\r
\r
} SPUOSSFreeze_t;\r
\r
// SPUFREEZE: called by main emu on savestate load/save\r
////////////////////////////////////////////////////////////////////////\r
\r
-long CALLBACK SPUfreeze(unsigned int ulFreezeMode, SPUFreeze_t * pF,\r
+long DoFreeze(unsigned int ulFreezeMode, SPUFreeze_t * pF,\r
unsigned int cycles)\r
{\r
SPUOSSFreeze_t * pFO = NULL;\r
- int i;\r
+ sample_buf *sb_rvb = &spu.sb[MAXCHAN];\r
+ int i, j;\r
\r
if(!pF) return 0; // first check\r
\r
+#if P_HAVE_PTHREAD || defined(WANT_THREAD_CODE)\r
+ sb_rvb = &spu.sb_thread[MAXCHAN];\r
+#endif\r
if(ulFreezeMode) // info or save?\r
{//--------------------------------------------------//\r
int xa_left = 0, cdda_left = 0;\r
pFO->XARepeat = spu.XARepeat;\r
pFO->XALastVal = spu.XALastVal;\r
pFO->last_keyon_cycles = spu.last_keyon_cycles;\r
+ for (i = 0; i < 2; i++)\r
+ memcpy(&pFO->rvb_sb[i], sb_rvb->SB_rvb[i], sizeof(pFO->rvb_sb[i]));\r
+ pFO->interpolation = spu.interpolation;\r
\r
for(i=0;i<MAXCHAN;i++)\r
{\r
spu.XARepeat = 0;\r
spu.XALastVal = 0;\r
spu.last_keyon_cycles = cycles - 16*786u;\r
- if (pFO && pF->ulFreezeSize >= sizeof(*pF) + sizeof(*pFO)) {\r
+ spu.interpolation = -1;\r
+ if (pFO && pF->ulFreezeSize >= sizeof(*pF) + offsetof(SPUOSSFreeze_t, rvb_sb)) {\r
spu.cycles_dma_end = pFO->cycles_dma_end;\r
spu.decode_dirty_ch = pFO->decode_dirty_ch;\r
spu.dwNoiseVal = pFO->dwNoiseVal;\r
spu.XALastVal = pFO->XALastVal;\r
spu.last_keyon_cycles = pFO->last_keyon_cycles;\r
}\r
+ if (pFO && pF->ulFreezeSize >= sizeof(*pF) + sizeof(*pFO)) {\r
+ for (i = 0; i < 2; i++)\r
+ for (j = 0; j < 2; j++)\r
+ memcpy(&sb_rvb->SB_rvb[i][j*4], pFO->rvb_sb[i], 4 * sizeof(sb_rvb->SB_rvb[i][0]));\r
+ spu.interpolation = pFO->interpolation;\r
+ }\r
+ for (i = 0; i <= 2; i += 2)\r
+ if (!regAreaGet(H_SPUcmvolL+i) && regAreaGet(H_SPUmvolL+i) < 0x8000u)\r
+ regAreaRef(H_SPUcmvolL+i) = regAreaGet(H_SPUmvolL+i) << 1;\r
\r
// repair some globals\r
for(i=0;i<=62;i+=2)\r
spu.rvb->StartAddr = regAreaGet(H_SPUReverbAddr) << 2;\r
if (spu.rvb->CurrAddr < spu.rvb->StartAddr)\r
spu.rvb->CurrAddr = spu.rvb->StartAddr;\r
- // fix to prevent new interpolations from crashing\r
- spu.interpolation = -1;\r
\r
ClearWorkingState();\r
\r
\r
\r
// Mednafen's table (PSX) 99-100%\r
-const int gauss[]={\r
- 0x12c7, 0x59b3, 0x1307, 0xffffffff, \r
- 0x1288, 0x59b2, 0x1347, 0xffffffff, \r
- 0x1249, 0x59b0, 0x1388, 0xffffffff, \r
- 0x120b, 0x59ad, 0x13c9, 0xffffffff, \r
- 0x11cd, 0x59a9, 0x140b, 0xffffffff, \r
- 0x118f, 0x59a4, 0x144d, 0xffffffff, \r
- 0x1153, 0x599e, 0x1490, 0xffffffff, \r
- 0x1116, 0x5997, 0x14d4, 0xffffffff, \r
- 0x10db, 0x598f, 0x1517, 0xffffffff, \r
- 0x109f, 0x5986, 0x155c, 0xffffffff, \r
- 0x1065, 0x597c, 0x15a0, 0xffffffff, \r
- 0x102a, 0x5971, 0x15e6, 0xffffffff, \r
- 0x0ff1, 0x5965, 0x162c, 0xffffffff, \r
- 0x0fb7, 0x5958, 0x1672, 0xffffffff, \r
- 0x0f7f, 0x5949, 0x16b9, 0xffffffff, \r
- 0x0f46, 0x593a, 0x1700, 0xffffffff, \r
+static const signed short gauss[] =\r
+{\r
+ 0x12c7, 0x59b3, 0x1307, 0xffff,\r
+ 0x1288, 0x59b2, 0x1347, 0xffff,\r
+ 0x1249, 0x59b0, 0x1388, 0xffff,\r
+ 0x120b, 0x59ad, 0x13c9, 0xffff,\r
+ 0x11cd, 0x59a9, 0x140b, 0xffff,\r
+ 0x118f, 0x59a4, 0x144d, 0xffff,\r
+ 0x1153, 0x599e, 0x1490, 0xffff,\r
+ 0x1116, 0x5997, 0x14d4, 0xffff,\r
+ 0x10db, 0x598f, 0x1517, 0xffff,\r
+ 0x109f, 0x5986, 0x155c, 0xffff,\r
+ 0x1065, 0x597c, 0x15a0, 0xffff,\r
+ 0x102a, 0x5971, 0x15e6, 0xffff,\r
+ 0x0ff1, 0x5965, 0x162c, 0xffff,\r
+ 0x0fb7, 0x5958, 0x1672, 0xffff,\r
+ 0x0f7f, 0x5949, 0x16b9, 0xffff,\r
+ 0x0f46, 0x593a, 0x1700, 0xffff,\r
0x0f0f, 0x592a, 0x1747, 0x0000, \r
0x0ed7, 0x5919, 0x1790, 0x0000, \r
0x0ea1, 0x5907, 0x17d8, 0x0000, \r
0x0000, 0x17d8, 0x5907, 0x0ea1, \r
0x0000, 0x1790, 0x5919, 0x0ed7, \r
0x0000, 0x1747, 0x592a, 0x0f0f, \r
- 0xffffffff, 0x1700, 0x593a, 0x0f46, \r
- 0xffffffff, 0x16b9, 0x5949, 0x0f7f, \r
- 0xffffffff, 0x1672, 0x5958, 0x0fb7, \r
- 0xffffffff, 0x162c, 0x5965, 0x0ff1, \r
- 0xffffffff, 0x15e6, 0x5971, 0x102a, \r
- 0xffffffff, 0x15a0, 0x597c, 0x1065, \r
- 0xffffffff, 0x155c, 0x5986, 0x109f, \r
- 0xffffffff, 0x1517, 0x598f, 0x10db, \r
- 0xffffffff, 0x14d4, 0x5997, 0x1116, \r
- 0xffffffff, 0x1490, 0x599e, 0x1153, \r
- 0xffffffff, 0x144d, 0x59a4, 0x118f, \r
- 0xffffffff, 0x140b, 0x59a9, 0x11cd, \r
- 0xffffffff, 0x13c9, 0x59ad, 0x120b, \r
- 0xffffffff, 0x1388, 0x59b0, 0x1249, \r
- 0xffffffff, 0x1347, 0x59b2, 0x1288, \r
- 0xffffffff, 0x1307, 0x59b3, 0x12c7, \r
+ 0xffff, 0x1700, 0x593a, 0x0f46,\r
+ 0xffff, 0x16b9, 0x5949, 0x0f7f,\r
+ 0xffff, 0x1672, 0x5958, 0x0fb7,\r
+ 0xffff, 0x162c, 0x5965, 0x0ff1,\r
+ 0xffff, 0x15e6, 0x5971, 0x102a,\r
+ 0xffff, 0x15a0, 0x597c, 0x1065,\r
+ 0xffff, 0x155c, 0x5986, 0x109f,\r
+ 0xffff, 0x1517, 0x598f, 0x10db,\r
+ 0xffff, 0x14d4, 0x5997, 0x1116,\r
+ 0xffff, 0x1490, 0x599e, 0x1153,\r
+ 0xffff, 0x144d, 0x59a4, 0x118f,\r
+ 0xffff, 0x140b, 0x59a9, 0x11cd,\r
+ 0xffff, 0x13c9, 0x59ad, 0x120b,\r
+ 0xffff, 0x1388, 0x59b0, 0x1249,\r
+ 0xffff, 0x1347, 0x59b2, 0x1288,\r
+ 0xffff, 0x1307, 0x59b3, 0x12c7,\r
};\r
\r
#endif\r
}
out_current = &out_drivers[i];
- printf("selected sound output driver: %s\n", out_current->name);
+ // printf("selected sound output driver: %s\n", out_current->name);
}
int changed = spu.regArea[rofs] != val;\r
spu.regArea[rofs] = val;\r
\r
- if (!changed && (ignore_dupe[rofs >> 5] & (1 << (rofs & 0x1f))))\r
+ if (!changed && (ignore_dupe[rofs >> 5] & (1u << (rofs & 0x1f))))\r
return;\r
// zero keyon/keyoff?\r
if (val == 0 && (r & 0xff8) == 0xd88)\r
//-------------------------------------------------//\r
\r
case H_SPUmvolL:\r
- case H_SPUmvolR:\r
- if (val & 0x8000)\r
+ case H_SPUmvolR: {\r
+ int ofs = H_SPUcmvolL - H_SPUmvolL;\r
+ unsigned short *cur = ®AreaGet(r + ofs);\r
+ if (val & 0x8000) {\r
+ // this (for now?) lacks an update mechanism, so is instant\r
log_unhandled("w master sweep: %08lx %04x\n", reg, val);\r
+ int was_neg = (*cur >> 14) & 1;\r
+ int dec = (val >> 13) & 1;\r
+ int inv = (val >> 12) & 1;\r
+ *cur = (was_neg ^ dec ^ inv) ? 0x7fff : 0;\r
+ }\r
+ else\r
+ *cur = val << 1;\r
break;\r
+ }\r
\r
case 0x0dac:\r
if (val != 4)\r
//auxprintf("ER %d\n",val);\r
break;\r
//-------------------------------------------------//\r
- case H_SPUmvolL:\r
- //auxprintf("ML %d\n",val);\r
- break;\r
- //-------------------------------------------------//\r
- case H_SPUmvolR:\r
- //auxprintf("MR %d\n",val);\r
- break;\r
- //-------------------------------------------------//\r
case H_SPUMute1:\r
//auxprintf("M0 %04x\n",val);\r
break;\r
ReverbOn(16,24,val);\r
break;\r
//-------------------------------------------------//\r
- case H_Reverb+0 : goto rvbd;\r
- case H_Reverb+2 : goto rvbd;\r
- case H_Reverb+4 : spu.rvb->IIR_ALPHA=(short)val; break;\r
- case H_Reverb+6 : spu.rvb->ACC_COEF_A=(short)val; break;\r
- case H_Reverb+8 : spu.rvb->ACC_COEF_B=(short)val; break;\r
- case H_Reverb+10 : spu.rvb->ACC_COEF_C=(short)val; break;\r
- case H_Reverb+12 : spu.rvb->ACC_COEF_D=(short)val; break;\r
- case H_Reverb+14 : spu.rvb->IIR_COEF=(short)val; break;\r
- case H_Reverb+16 : spu.rvb->FB_ALPHA=(short)val; break;\r
- case H_Reverb+18 : spu.rvb->FB_X=(short)val; break;\r
- case H_Reverb+20 : goto rvbd;\r
- case H_Reverb+22 : goto rvbd;\r
- case H_Reverb+24 : goto rvbd;\r
- case H_Reverb+26 : goto rvbd;\r
- case H_Reverb+28 : goto rvbd;\r
- case H_Reverb+30 : goto rvbd;\r
- case H_Reverb+32 : goto rvbd;\r
- case H_Reverb+34 : goto rvbd;\r
- case H_Reverb+36 : goto rvbd;\r
- case H_Reverb+38 : goto rvbd;\r
- case H_Reverb+40 : goto rvbd;\r
- case H_Reverb+42 : goto rvbd;\r
- case H_Reverb+44 : goto rvbd;\r
- case H_Reverb+46 : goto rvbd;\r
- case H_Reverb+48 : goto rvbd;\r
- case H_Reverb+50 : goto rvbd;\r
- case H_Reverb+52 : goto rvbd;\r
- case H_Reverb+54 : goto rvbd;\r
- case H_Reverb+56 : goto rvbd;\r
- case H_Reverb+58 : goto rvbd;\r
- case H_Reverb+60 : spu.rvb->IN_COEF_L=(short)val; break;\r
- case H_Reverb+62 : spu.rvb->IN_COEF_R=(short)val; break;\r
+ case H_Reverb + 0x00 : goto rvbd;\r
+ case H_Reverb + 0x02 : goto rvbd;\r
+ case H_Reverb + 0x04 : spu.rvb->vIIR = (signed short)val; break;\r
+ case H_Reverb + 0x06 : spu.rvb->vCOMB1 = (signed short)val; break;\r
+ case H_Reverb + 0x08 : spu.rvb->vCOMB2 = (signed short)val; break;\r
+ case H_Reverb + 0x0a : spu.rvb->vCOMB3 = (signed short)val; break;\r
+ case H_Reverb + 0x0c : spu.rvb->vCOMB4 = (signed short)val; break;\r
+ case H_Reverb + 0x0e : spu.rvb->vWALL = (signed short)val; break;\r
+ case H_Reverb + 0x10 : spu.rvb->vAPF1 = (signed short)val; break;\r
+ case H_Reverb + 0x12 : spu.rvb->vAPF2 = (signed short)val; break;\r
+ case H_Reverb + 0x14 : goto rvbd;\r
+ case H_Reverb + 0x16 : goto rvbd;\r
+ case H_Reverb + 0x18 : goto rvbd;\r
+ case H_Reverb + 0x1a : goto rvbd;\r
+ case H_Reverb + 0x1c : goto rvbd;\r
+ case H_Reverb + 0x1e : goto rvbd;\r
+ case H_Reverb + 0x20 : goto rvbd;\r
+ case H_Reverb + 0x22 : goto rvbd;\r
+ case H_Reverb + 0x24 : goto rvbd;\r
+ case H_Reverb + 0x26 : goto rvbd;\r
+ case H_Reverb + 0x28 : goto rvbd;\r
+ case H_Reverb + 0x2a : goto rvbd;\r
+ case H_Reverb + 0x2c : goto rvbd;\r
+ case H_Reverb + 0x2e : goto rvbd;\r
+ case H_Reverb + 0x30 : goto rvbd;\r
+ case H_Reverb + 0x32 : goto rvbd;\r
+ case H_Reverb + 0x34 : goto rvbd;\r
+ case H_Reverb + 0x36 : goto rvbd;\r
+ case H_Reverb + 0x38 : goto rvbd;\r
+ case H_Reverb + 0x3a : goto rvbd;\r
+ case H_Reverb + 0x3c : spu.rvb->vLIN = (signed short)val; break;\r
+ case H_Reverb + 0x3e : spu.rvb->vRIN = (signed short)val; break;\r
}\r
return;\r
\r
\r
case H_SPUMute1:\r
case H_SPUMute2:\r
- log_unhandled("r isOn: %08lx\n", reg);\r
+ log_unhandled("spu r isOn: %08lx %04x\n", reg, regAreaGet(r));\r
+ break;\r
+\r
+ case H_SPUmvolL:\r
+ case H_SPUmvolR:\r
+ log_unhandled("spu r mvol: %08lx %04x\n", reg, regAreaGet(r));\r
break;\r
\r
case 0x0dac:\r
\r
default:\r
if (r >= 0xda0)\r
- log_unhandled("spu r %08lx\n", reg);\r
+ log_unhandled("spu r %08lx %04x\n", reg, regAreaGet(r));\r
break;\r
}\r
\r
#define H_CDRight 0x0db2\r
#define H_ExtLeft 0x0db4\r
#define H_ExtRight 0x0db6\r
+#define H_SPUcmvolL 0x0db8\r
+#define H_SPUcmvolR 0x0dba\r
#define H_Reverb 0x0dc0\r
#define H_SPUPitch0 0x0c04\r
#define H_SPUPitch1 0x0c14\r
\r
#include "stdafx.h"\r
#include "spu.h"\r
+#include <assert.h>\r
\r
#define _IN_REVERB\r
\r
\r
////////////////////////////////////////////////////////////////////////\r
\r
-INLINE int rvb2ram_offs(int curr, int space, int iOff)\r
+INLINE int rvb_wrap(int ofs, int space)\r
{\r
- iOff += curr;\r
- if (iOff >= 0x40000) iOff -= space;\r
- return iOff;\r
+#if 0\r
+ int mask = (0x3ffff - ofs) >> 31;\r
+ ofs = ofs - (space & mask);\r
+#else\r
+ if (ofs >= 0x40000)\r
+ ofs -= space;\r
+#endif\r
+ //assert(ofs >= 0x40000 - space);\r
+ //assert(ofs < 0x40000);\r
+ return ofs;\r
+}\r
+\r
+INLINE int rvb2ram_offs(int curr, int space, int ofs)\r
+{\r
+ ofs += curr;\r
+ return rvb_wrap(ofs, space);\r
}\r
\r
// get_buffer content helper: takes care about wraps\r
#define g_buffer(var) \\r
- ((int)(signed short)LE16TOH(spu.spuMem[rvb2ram_offs(curr_addr, space, rvb->var)]))\r
+ ((int)(signed short)LE16TOH(spuMem[rvb2ram_offs(curr_addr, space, var)]))\r
\r
// saturate iVal and store it as var\r
-#define s_buffer(var, iVal) \\r
+#define s_buffer_w(var, iVal) \\r
ssat32_to_16(iVal); \\r
- spu.spuMem[rvb2ram_offs(curr_addr, space, rvb->var)] = HTOLE16(iVal)\r
-\r
-#define s_buffer1(var, iVal) \\r
- ssat32_to_16(iVal); \\r
- spu.spuMem[rvb2ram_offs(curr_addr, space, rvb->var + 1)] = HTOLE16(iVal)\r
+ spuMem[rvb2ram_offs(curr_addr, space, var)] = HTOLE16(iVal)\r
\r
////////////////////////////////////////////////////////////////////////\r
\r
-// portions based on spu2-x from PCSX2\r
-static void MixREVERB(int *SSumLR, int *RVB, int ns_to, int curr_addr)\r
+static void reverb_interpolate(sample_buf *sb, int curr_addr,\r
+ int out0[2], int out1[2])\r
+{\r
+ int spos = (curr_addr - 3) & 3;\r
+ int dpos = curr_addr & 3;\r
+ int i;\r
+\r
+ for (i = 0; i < 2; i++)\r
+ sb->SB_rvb[i][dpos] = sb->SB_rvb[i][4 | dpos] = out0[i];\r
+\r
+ // mednafen uses some 20 coefs here, we just reuse gauss [0] and [128]\r
+ for (i = 0; i < 2; i++)\r
+ {\r
+ const int *s;\r
+ s = &sb->SB_rvb[i][spos];\r
+ out0[i] = (s[0] * 0x12c7 + s[1] * 0x59b3 + s[2] * 0x1307) >> 15;\r
+ out1[i] = (s[0] * 0x019c + s[1] * 0x3def + s[2] * 0x3e4c + s[3] * 0x01a8) >> 15;\r
+ }\r
+}\r
+\r
+static void MixREVERB(int *SSumLR, int *RVB, int ns_to, int curr_addr,\r
+ int do_filter)\r
{\r
+ unsigned short *spuMem = spu.spuMem;\r
const REVERBInfo *rvb = spu.rvb;\r
- int IIR_ALPHA = rvb->IIR_ALPHA;\r
- int IIR_COEF = rvb->IIR_COEF;\r
+ sample_buf *sb = &spu.sb[MAXCHAN];\r
int space = 0x40000 - rvb->StartAddr;\r
- int l, r, ns;\r
+ int mlsame_m2o = rvb->mLSAME + space - 1;\r
+ int mrsame_m2o = rvb->mRSAME + space - 1;\r
+ int mldiff_m2o = rvb->mLDIFF + space - 1;\r
+ int mrdiff_m2o = rvb->mRDIFF + space - 1;\r
+ int vCOMB1 = rvb->vCOMB1 >> 1, vCOMB2 = rvb->vCOMB2 >> 1;\r
+ int vCOMB3 = rvb->vCOMB3 >> 1, vCOMB4 = rvb->vCOMB4 >> 1;\r
+ int vAPF1 = rvb->vAPF1 >> 1, vAPF2 = rvb->vAPF2 >> 1;\r
+ int vLIN = rvb->vLIN >> 1, vRIN = rvb->vRIN >> 1;\r
+ int vWALL = rvb->vWALL >> 1;\r
+ int vIIR = rvb->vIIR;\r
+ int ns;\r
+\r
+#if P_HAVE_PTHREAD || defined(WANT_THREAD_CODE)\r
+ sb = &spu.sb_thread[MAXCHAN];\r
+#endif\r
+ if (mlsame_m2o >= space) mlsame_m2o -= space;\r
+ if (mrsame_m2o >= space) mrsame_m2o -= space;\r
+ if (mldiff_m2o >= space) mldiff_m2o -= space;\r
+ if (mrdiff_m2o >= space) mrdiff_m2o -= space;\r
\r
for (ns = 0; ns < ns_to * 2; )\r
{\r
- int ACC0, ACC1, FB_A0, FB_A1, FB_B0, FB_B1;\r
- int mix_dest_a0, mix_dest_a1, mix_dest_b0, mix_dest_b1;\r
-\r
- int input_L = RVB[ns] * rvb->IN_COEF_L;\r
- int input_R = RVB[ns+1] * rvb->IN_COEF_R;\r
-\r
- int IIR_INPUT_A0 = ((g_buffer(IIR_SRC_A0) * IIR_COEF) + input_L) >> 15;\r
- int IIR_INPUT_A1 = ((g_buffer(IIR_SRC_A1) * IIR_COEF) + input_R) >> 15;\r
- int IIR_INPUT_B0 = ((g_buffer(IIR_SRC_B0) * IIR_COEF) + input_L) >> 15;\r
- int IIR_INPUT_B1 = ((g_buffer(IIR_SRC_B1) * IIR_COEF) + input_R) >> 15;\r
-\r
- int iir_dest_a0 = g_buffer(IIR_DEST_A0);\r
- int iir_dest_a1 = g_buffer(IIR_DEST_A1);\r
- int iir_dest_b0 = g_buffer(IIR_DEST_B0);\r
- int iir_dest_b1 = g_buffer(IIR_DEST_B1);\r
-\r
- int IIR_A0 = iir_dest_a0 + ((IIR_INPUT_A0 - iir_dest_a0) * IIR_ALPHA >> 15);\r
- int IIR_A1 = iir_dest_a1 + ((IIR_INPUT_A1 - iir_dest_a1) * IIR_ALPHA >> 15);\r
- int IIR_B0 = iir_dest_b0 + ((IIR_INPUT_B0 - iir_dest_b0) * IIR_ALPHA >> 15);\r
- int IIR_B1 = iir_dest_b1 + ((IIR_INPUT_B1 - iir_dest_b1) * IIR_ALPHA >> 15);\r
+ int Lin = RVB[ns];\r
+ int Rin = RVB[ns+1];\r
+ int mlsame_m2 = g_buffer(mlsame_m2o) << (15-1);\r
+ int mrsame_m2 = g_buffer(mrsame_m2o) << (15-1);\r
+ int mldiff_m2 = g_buffer(mldiff_m2o) << (15-1);\r
+ int mrdiff_m2 = g_buffer(mrdiff_m2o) << (15-1);\r
+ int Lout, Rout, out0[2], out1[2];\r
+\r
+ ssat32_to_16(Lin); Lin *= vLIN;\r
+ ssat32_to_16(Rin); Rin *= vRIN;\r
+\r
+ // from nocash psx-spx\r
+ mlsame_m2 += ((Lin + g_buffer(rvb->dLSAME) * vWALL - mlsame_m2) >> 15) * vIIR;\r
+ mrsame_m2 += ((Rin + g_buffer(rvb->dRSAME) * vWALL - mrsame_m2) >> 15) * vIIR;\r
+ mldiff_m2 += ((Lin + g_buffer(rvb->dLDIFF) * vWALL - mldiff_m2) >> 15) * vIIR;\r
+ mrdiff_m2 += ((Rin + g_buffer(rvb->dRDIFF) * vWALL - mrdiff_m2) >> 15) * vIIR;\r
+ mlsame_m2 >>= (15-1); s_buffer_w(rvb->mLSAME, mlsame_m2);\r
+ mrsame_m2 >>= (15-1); s_buffer_w(rvb->mRSAME, mrsame_m2);\r
+ mldiff_m2 >>= (15-1); s_buffer_w(rvb->mLDIFF, mldiff_m2);\r
+ mrdiff_m2 >>= (15-1); s_buffer_w(rvb->mRDIFF, mrdiff_m2);\r
+\r
+ Lout = vCOMB1 * g_buffer(rvb->mLCOMB1) + vCOMB2 * g_buffer(rvb->mLCOMB2)\r
+ + vCOMB3 * g_buffer(rvb->mLCOMB3) + vCOMB4 * g_buffer(rvb->mLCOMB4);\r
+ Rout = vCOMB1 * g_buffer(rvb->mRCOMB1) + vCOMB2 * g_buffer(rvb->mRCOMB2)\r
+ + vCOMB3 * g_buffer(rvb->mRCOMB3) + vCOMB4 * g_buffer(rvb->mRCOMB4);\r
\r
preload(SSumLR + ns + 64*2/4 - 4);\r
\r
- s_buffer1(IIR_DEST_A0, IIR_A0);\r
- s_buffer1(IIR_DEST_A1, IIR_A1);\r
- s_buffer1(IIR_DEST_B0, IIR_B0);\r
- s_buffer1(IIR_DEST_B1, IIR_B1);\r
+ Lout -= vAPF1 * g_buffer(rvb->mLAPF1_dAPF1); Lout >>= (15-1);\r
+ Rout -= vAPF1 * g_buffer(rvb->mRAPF1_dAPF1); Rout >>= (15-1);\r
+ s_buffer_w(rvb->mLAPF1, Lout);\r
+ s_buffer_w(rvb->mRAPF1, Rout);\r
+ Lout = Lout * vAPF1 + (g_buffer(rvb->mLAPF1_dAPF1) << (15-1));\r
+ Rout = Rout * vAPF1 + (g_buffer(rvb->mRAPF1_dAPF1) << (15-1));\r
\r
preload(RVB + ns + 64*2/4 - 4);\r
\r
- ACC0 = (g_buffer(ACC_SRC_A0) * rvb->ACC_COEF_A +\r
- g_buffer(ACC_SRC_B0) * rvb->ACC_COEF_B +\r
- g_buffer(ACC_SRC_C0) * rvb->ACC_COEF_C +\r
- g_buffer(ACC_SRC_D0) * rvb->ACC_COEF_D) >> 15;\r
- ACC1 = (g_buffer(ACC_SRC_A1) * rvb->ACC_COEF_A +\r
- g_buffer(ACC_SRC_B1) * rvb->ACC_COEF_B +\r
- g_buffer(ACC_SRC_C1) * rvb->ACC_COEF_C +\r
- g_buffer(ACC_SRC_D1) * rvb->ACC_COEF_D) >> 15;\r
-\r
- FB_A0 = g_buffer(FB_SRC_A0);\r
- FB_A1 = g_buffer(FB_SRC_A1);\r
- FB_B0 = g_buffer(FB_SRC_B0);\r
- FB_B1 = g_buffer(FB_SRC_B1);\r
+ Lout -= vAPF2 * g_buffer(rvb->mLAPF2_dAPF2); Lout >>= (15-1);\r
+ Rout -= vAPF2 * g_buffer(rvb->mRAPF2_dAPF2); Rout >>= (15-1);\r
+ s_buffer_w(rvb->mLAPF2, Lout);\r
+ s_buffer_w(rvb->mRAPF2, Rout);\r
+ Lout = Lout * vAPF2 + (g_buffer(rvb->mLAPF2_dAPF2) << (15-1));\r
+ Rout = Rout * vAPF2 + (g_buffer(rvb->mRAPF2_dAPF2) << (15-1));\r
\r
- mix_dest_a0 = ACC0 - ((FB_A0 * rvb->FB_ALPHA) >> 15);\r
- mix_dest_a1 = ACC1 - ((FB_A1 * rvb->FB_ALPHA) >> 15);\r
+ out0[0] = out1[0] = (Lout >> (15-1)) * rvb->VolLeft >> 15;\r
+ out0[1] = out1[1] = (Rout >> (15-1)) * rvb->VolRight >> 15;\r
+ if (do_filter)\r
+ reverb_interpolate(sb, curr_addr, out0, out1);\r
\r
- mix_dest_b0 = FB_A0 + (((ACC0 - FB_A0) * rvb->FB_ALPHA - FB_B0 * rvb->FB_X) >> 15);\r
- mix_dest_b1 = FB_A1 + (((ACC1 - FB_A1) * rvb->FB_ALPHA - FB_B1 * rvb->FB_X) >> 15);\r
-\r
- s_buffer(MIX_DEST_A0, mix_dest_a0);\r
- s_buffer(MIX_DEST_A1, mix_dest_a1);\r
- s_buffer(MIX_DEST_B0, mix_dest_b0);\r
- s_buffer(MIX_DEST_B1, mix_dest_b1);\r
-\r
- l = (mix_dest_a0 + mix_dest_b0) / 2;\r
- r = (mix_dest_a1 + mix_dest_b1) / 2;\r
-\r
- l = (l * rvb->VolLeft) >> 15; // 15?\r
- r = (r * rvb->VolRight) >> 15;\r
-\r
- SSumLR[ns++] += l;\r
- SSumLR[ns++] += r;\r
- SSumLR[ns++] += l;\r
- SSumLR[ns++] += r;\r
+ SSumLR[ns++] += out0[0];\r
+ SSumLR[ns++] += out0[1];\r
+ SSumLR[ns++] += out1[0];\r
+ SSumLR[ns++] += out1[1];\r
\r
curr_addr++;\r
- if (curr_addr >= 0x40000) curr_addr = rvb->StartAddr;\r
+ curr_addr = rvb_wrap(curr_addr, space);\r
}\r
}\r
\r
static void MixREVERB_off(int *SSumLR, int ns_to, int curr_addr)\r
{\r
const REVERBInfo *rvb = spu.rvb;\r
+ unsigned short *spuMem = spu.spuMem;\r
int space = 0x40000 - rvb->StartAddr;\r
- int l, r, ns;\r
+ int Lout, Rout, ns;\r
\r
for (ns = 0; ns < ns_to * 2; )\r
{\r
preload(SSumLR + ns + 64*2/4 - 4);\r
\r
- l = (g_buffer(MIX_DEST_A0) + g_buffer(MIX_DEST_B0)) / 2;\r
- r = (g_buffer(MIX_DEST_A1) + g_buffer(MIX_DEST_B1)) / 2;\r
+ // todo: is this missing COMB and APF1?\r
+ Lout = g_buffer(rvb->mLAPF2_dAPF2);\r
+ Rout = g_buffer(rvb->mLAPF2_dAPF2);\r
\r
- l = (l * rvb->VolLeft) >> 15;\r
- r = (r * rvb->VolRight) >> 15;\r
+ Lout = (Lout * rvb->VolLeft) >> 15;\r
+ Rout = (Rout * rvb->VolRight) >> 15;\r
\r
- SSumLR[ns++] += l;\r
- SSumLR[ns++] += r;\r
- SSumLR[ns++] += l;\r
- SSumLR[ns++] += r;\r
+ SSumLR[ns++] += Lout;\r
+ SSumLR[ns++] += Rout;\r
+ SSumLR[ns++] += Lout;\r
+ SSumLR[ns++] += Rout;\r
\r
curr_addr++;\r
if (curr_addr >= 0x40000) curr_addr = rvb->StartAddr;\r
t -= space; \\r
rvb->d = t\r
\r
- prep_offs(IIR_SRC_A0, 32);\r
- prep_offs(IIR_SRC_A1, 34);\r
- prep_offs(IIR_SRC_B0, 36);\r
- prep_offs(IIR_SRC_B1, 38);\r
- prep_offs(IIR_DEST_A0, 20);\r
- prep_offs(IIR_DEST_A1, 22);\r
- prep_offs(IIR_DEST_B0, 36);\r
- prep_offs(IIR_DEST_B1, 38);\r
- prep_offs(ACC_SRC_A0, 24);\r
- prep_offs(ACC_SRC_A1, 26);\r
- prep_offs(ACC_SRC_B0, 28);\r
- prep_offs(ACC_SRC_B1, 30);\r
- prep_offs(ACC_SRC_C0, 40);\r
- prep_offs(ACC_SRC_C1, 42);\r
- prep_offs(ACC_SRC_D0, 44);\r
- prep_offs(ACC_SRC_D1, 46);\r
- prep_offs(MIX_DEST_A0, 52);\r
- prep_offs(MIX_DEST_A1, 54);\r
- prep_offs(MIX_DEST_B0, 56);\r
- prep_offs(MIX_DEST_B1, 58);\r
- prep_offs2(FB_SRC_A0, 52, 0);\r
- prep_offs2(FB_SRC_A1, 54, 0);\r
- prep_offs2(FB_SRC_B0, 56, 2);\r
- prep_offs2(FB_SRC_B1, 58, 2);\r
+ prep_offs(mLSAME, 0x14);\r
+ prep_offs(mRSAME, 0x16);\r
+ prep_offs(mLCOMB1, 0x18);\r
+ prep_offs(mRCOMB1, 0x1a);\r
+ prep_offs(mLCOMB2, 0x1c);\r
+ prep_offs(mRCOMB2, 0x1e);\r
+ prep_offs(dLSAME, 0x20);\r
+ prep_offs(dRSAME, 0x22);\r
+ prep_offs(mLDIFF, 0x24);\r
+ prep_offs(mRDIFF, 0x26);\r
+ prep_offs(mLCOMB3, 0x28);\r
+ prep_offs(mRCOMB3, 0x2a);\r
+ prep_offs(mLCOMB4, 0x2c);\r
+ prep_offs(mRCOMB4, 0x2e);\r
+ prep_offs(dLDIFF, 0x30);\r
+ prep_offs(dRDIFF, 0x32);\r
+ prep_offs(mLAPF1, 0x34);\r
+ prep_offs(mRAPF1, 0x36);\r
+ prep_offs(mLAPF2, 0x38);\r
+ prep_offs(mRAPF2, 0x3a);\r
+ prep_offs2(mLAPF1_dAPF1, 0x34, 0);\r
+ prep_offs2(mRAPF1_dAPF1, 0x36, 0);\r
+ prep_offs2(mLAPF2_dAPF2, 0x38, 2);\r
+ prep_offs2(mRAPF2_dAPF2, 0x3a, 2);\r
\r
#undef prep_offs\r
#undef prep_offs2\r
{\r
if (spu.spuCtrl & 0x80) // -> reverb on? oki\r
{\r
- MixREVERB(SSumLR, RVB, ns_to, curr_addr);\r
+ MixREVERB(SSumLR, RVB, ns_to, curr_addr, 0); //spu.interpolation > 1);\r
}\r
else if (spu.rvb->VolLeft || spu.rvb->VolRight)\r
{\r
\r
#endif\r
\r
-/*\r
------------------------------------------------------------------------------\r
-PSX reverb hardware notes\r
-by Neill Corlett\r
------------------------------------------------------------------------------\r
-\r
-Yadda yadda disclaimer yadda probably not perfect yadda well it's okay anyway\r
-yadda yadda.\r
-\r
------------------------------------------------------------------------------\r
-\r
-Basics\r
-------\r
-\r
-- The reverb buffer is 22khz 16-bit mono PCM.\r
-- It starts at the reverb address given by 1DA2, extends to\r
- the end of sound RAM, and wraps back to the 1DA2 address.\r
-\r
-Setting the address at 1DA2 resets the current reverb work address.\r
-\r
-This work address ALWAYS increments every 1/22050 sec., regardless of\r
-whether reverb is enabled (bit 7 of 1DAA set).\r
-\r
-And the contents of the reverb buffer ALWAYS play, scaled by the\r
-"reverberation depth left/right" volumes (1D84/1D86).\r
-(which, by the way, appear to be scaled so 3FFF=approx. 1.0, 4000=-1.0)\r
-\r
------------------------------------------------------------------------------\r
-\r
-Register names\r
---------------\r
-\r
-These are probably not their real names.\r
-These are probably not even correct names.\r
-We will use them anyway, because we can.\r
-\r
-1DC0: FB_SRC_A (offset)\r
-1DC2: FB_SRC_B (offset)\r
-1DC4: IIR_ALPHA (coef.)\r
-1DC6: ACC_COEF_A (coef.)\r
-1DC8: ACC_COEF_B (coef.)\r
-1DCA: ACC_COEF_C (coef.)\r
-1DCC: ACC_COEF_D (coef.)\r
-1DCE: IIR_COEF (coef.)\r
-1DD0: FB_ALPHA (coef.)\r
-1DD2: FB_X (coef.)\r
-1DD4: IIR_DEST_A0 (offset)\r
-1DD6: IIR_DEST_A1 (offset)\r
-1DD8: ACC_SRC_A0 (offset)\r
-1DDA: ACC_SRC_A1 (offset)\r
-1DDC: ACC_SRC_B0 (offset)\r
-1DDE: ACC_SRC_B1 (offset)\r
-1DE0: IIR_SRC_A0 (offset)\r
-1DE2: IIR_SRC_A1 (offset)\r
-1DE4: IIR_DEST_B0 (offset)\r
-1DE6: IIR_DEST_B1 (offset)\r
-1DE8: ACC_SRC_C0 (offset)\r
-1DEA: ACC_SRC_C1 (offset)\r
-1DEC: ACC_SRC_D0 (offset)\r
-1DEE: ACC_SRC_D1 (offset)\r
-1DF0: IIR_SRC_B1 (offset)\r
-1DF2: IIR_SRC_B0 (offset)\r
-1DF4: MIX_DEST_A0 (offset)\r
-1DF6: MIX_DEST_A1 (offset)\r
-1DF8: MIX_DEST_B0 (offset)\r
-1DFA: MIX_DEST_B1 (offset)\r
-1DFC: IN_COEF_L (coef.)\r
-1DFE: IN_COEF_R (coef.)\r
-\r
-The coefficients are signed fractional values.\r
--32768 would be -1.0\r
- 32768 would be 1.0 (if it were possible... the highest is of course 32767)\r
-\r
-The offsets are (byte/8) offsets into the reverb buffer.\r
-i.e. you multiply them by 8, you get byte offsets.\r
-You can also think of them as (samples/4) offsets.\r
-They appear to be signed. They can be negative.\r
-None of the documented presets make them negative, though.\r
-\r
-Yes, 1DF0 and 1DF2 appear to be backwards. Not a typo.\r
-\r
------------------------------------------------------------------------------\r
-\r
-What it does\r
-------------\r
-\r
-We take all reverb sources:\r
-- regular channels that have the reverb bit on\r
-- cd and external sources, if their reverb bits are on\r
-and mix them into one stereo 44100hz signal.\r
-\r
-Lowpass/downsample that to 22050hz. The PSX uses a proper bandlimiting\r
-algorithm here, but I haven't figured out the hysterically exact specifics.\r
-I use an 8-tap filter with these coefficients, which are nice but probably\r
-not the real ones:\r
-\r
-0.037828187894\r
-0.157538631280\r
-0.321159685278\r
-0.449322115345\r
-0.449322115345\r
-0.321159685278\r
-0.157538631280\r
-0.037828187894\r
-\r
-So we have two input samples (INPUT_SAMPLE_L, INPUT_SAMPLE_R) every 22050hz.\r
-\r
-* IN MY EMULATION, I divide these by 2 to make it clip less.\r
- (and of course the L/R output coefficients are adjusted to compensate)\r
- The real thing appears to not do this.\r
-\r
-At every 22050hz tick:\r
-- If the reverb bit is enabled (bit 7 of 1DAA), execute the reverb\r
- steady-state algorithm described below\r
-- AFTERWARDS, retrieve the "wet out" L and R samples from the reverb buffer\r
- (This part may not be exactly right and I guessed at the coefs. TODO: check later.)\r
- L is: 0.333 * (buffer[MIX_DEST_A0] + buffer[MIX_DEST_B0])\r
- R is: 0.333 * (buffer[MIX_DEST_A1] + buffer[MIX_DEST_B1])\r
-- Advance the current buffer position by 1 sample\r
-\r
-The wet out L and R are then upsampled to 44100hz and played at the\r
-"reverberation depth left/right" (1D84/1D86) volume, independent of the main\r
-volume.\r
-\r
------------------------------------------------------------------------------\r
-\r
-Reverb steady-state\r
--------------------\r
-\r
-The reverb steady-state algorithm is fairly clever, and of course by\r
-"clever" I mean "batshit insane".\r
-\r
-buffer[x] is relative to the current buffer position, not the beginning of\r
-the buffer. Note that all buffer offsets must wrap around so they're\r
-contained within the reverb work area.\r
-\r
-Clipping is performed at the end... maybe also sooner, but definitely at\r
-the end.\r
-\r
-IIR_INPUT_A0 = buffer[IIR_SRC_A0] * IIR_COEF + INPUT_SAMPLE_L * IN_COEF_L;\r
-IIR_INPUT_A1 = buffer[IIR_SRC_A1] * IIR_COEF + INPUT_SAMPLE_R * IN_COEF_R;\r
-IIR_INPUT_B0 = buffer[IIR_SRC_B0] * IIR_COEF + INPUT_SAMPLE_L * IN_COEF_L;\r
-IIR_INPUT_B1 = buffer[IIR_SRC_B1] * IIR_COEF + INPUT_SAMPLE_R * IN_COEF_R;\r
-\r
-IIR_A0 = IIR_INPUT_A0 * IIR_ALPHA + buffer[IIR_DEST_A0] * (1.0 - IIR_ALPHA);\r
-IIR_A1 = IIR_INPUT_A1 * IIR_ALPHA + buffer[IIR_DEST_A1] * (1.0 - IIR_ALPHA);\r
-IIR_B0 = IIR_INPUT_B0 * IIR_ALPHA + buffer[IIR_DEST_B0] * (1.0 - IIR_ALPHA);\r
-IIR_B1 = IIR_INPUT_B1 * IIR_ALPHA + buffer[IIR_DEST_B1] * (1.0 - IIR_ALPHA);\r
-\r
-buffer[IIR_DEST_A0 + 1sample] = IIR_A0;\r
-buffer[IIR_DEST_A1 + 1sample] = IIR_A1;\r
-buffer[IIR_DEST_B0 + 1sample] = IIR_B0;\r
-buffer[IIR_DEST_B1 + 1sample] = IIR_B1;\r
-\r
-ACC0 = buffer[ACC_SRC_A0] * ACC_COEF_A +\r
- buffer[ACC_SRC_B0] * ACC_COEF_B +\r
- buffer[ACC_SRC_C0] * ACC_COEF_C +\r
- buffer[ACC_SRC_D0] * ACC_COEF_D;\r
-ACC1 = buffer[ACC_SRC_A1] * ACC_COEF_A +\r
- buffer[ACC_SRC_B1] * ACC_COEF_B +\r
- buffer[ACC_SRC_C1] * ACC_COEF_C +\r
- buffer[ACC_SRC_D1] * ACC_COEF_D;\r
-\r
-FB_A0 = buffer[MIX_DEST_A0 - FB_SRC_A];\r
-FB_A1 = buffer[MIX_DEST_A1 - FB_SRC_A];\r
-FB_B0 = buffer[MIX_DEST_B0 - FB_SRC_B];\r
-FB_B1 = buffer[MIX_DEST_B1 - FB_SRC_B];\r
-\r
-buffer[MIX_DEST_A0] = ACC0 - FB_A0 * FB_ALPHA;\r
-buffer[MIX_DEST_A1] = ACC1 - FB_A1 * FB_ALPHA;\r
-buffer[MIX_DEST_B0] = (FB_ALPHA * ACC0) - FB_A0 * (FB_ALPHA^0x8000) - FB_B0 * FB_X;\r
-buffer[MIX_DEST_B1] = (FB_ALPHA * ACC1) - FB_A1 * (FB_ALPHA^0x8000) - FB_B1 * FB_X;\r
-\r
------------------------------------------------------------------------------\r
-*/\r
-\r
// vim:shiftwidth=1:expandtab\r
#include "arm_features.h"
#endif
-#ifdef HAVE_ARMV7
+#ifdef HAVE_ARMV6
#define ssat32_to_16(v) \
asm("ssat %0,#16,%1" : "=r" (v) : "r" (v))
#else
int gpos = sb->interp.gauss.pos;
int vl = (spos >> 6) & ~3;
int vr;
- vr = (gauss[vl+0] * gval(0)) >> 15;
- vr += (gauss[vl+1] * gval(1)) >> 15;
- vr += (gauss[vl+2] * gval(2)) >> 15;
- vr += (gauss[vl+3] * gval(3)) >> 15;
- return vr;
+ vr = gauss[vl+0] * gval(0);
+ vr += gauss[vl+1] * gval(1);
+ vr += gauss[vl+2] * gval(2);
+ vr += gauss[vl+3] * gval(3);
+ return vr >> 15;
}
static void decode_block_data(int *dest, const unsigned char *src, int predict_nr, int shift_factor)
static void do_samples_finish(int *SSumLR, int ns_to,
int silentch, int decode_pos)
{
- int vol_l = ((int)regAreaGet(H_SPUmvolL) << 17) >> 17;
- int vol_r = ((int)regAreaGet(H_SPUmvolR) << 17) >> 17;
+ int vol_l = ((int)regAreaGet(H_SPUcmvolL) << 16) >> 17;
+ int vol_r = ((int)regAreaGet(H_SPUcmvolR) << 16) >> 17;
int ns;
int d;
#endif
+long CALLBACK SPUfreeze(unsigned int ulFreezeMode, struct SPUFreeze * pF,
+ unsigned int cycles)
+{
+ if (worker != NULL)
+ sync_worker_thread(1);
+ return DoFreeze(ulFreezeMode, pF, cycles);
+}
+
// SPUINIT: this func will be called first by the main emu
long CALLBACK SPUinit(void)
{
*run_chans = ~spu.dwChannelsAudible & ~spu.dwChannelDead & irq_chans;
*fmod_chans_out = fmod_chans;
*noise_chans_out = noise_chans;
+ // sometimes unused
+ (void)do_samples_skip_fmod;
+ (void)SkipADSR;
}
// vim:shiftwidth=1:expandtab
\r
// internal\r
void ClearWorkingState(void);\r
+long DoFreeze(unsigned int, struct SPUFreeze *, unsigned int);\r
\r
#endif /* __P_SPU_H__ */\r
static void thread_sync_caches(void)
{
if (f.stale_caches) {
- f.dsp_cache_inv_virt(spu.sb_thread, sizeof(spu.sb_thread[0]) * MAXCHAN);
+ f.dsp_cache_inv_virt(spu.sb_thread, sizeof(spu.sb_thread[0]) * (MAXCHAN+1));
f.dsp_cache_inv_virt(spu.spuMemC + 0x800, 0x800);
if (spu.rvb->StartAddr) {
int left = 0x40000 - spu.rvb->StartAddr;
struct region_mem *mem;
int ret;
+ spu.sb_thread = spu.sb_thread_;
if (f.handle == NULL) {
const char lib[] = "libc64.so.1";
int failed = 0;
struct region_mem {
unsigned char spu_ram[512 * 1024];
- sample_buf sb_thread[MAXCHAN];
+ sample_buf sb_thread[MAXCHAN+1];
// careful not to lose ARM writes by DSP overwriting
// with old data when it's writing out neighbor cachelines
- int _pad1[128/4 - ((sizeof(sample_buf) * MAXCHAN / 4) & (128/4 - 1))];
+ int _pad1[128/4 - ((sizeof(sample_buf) * (MAXCHAN+1) / 4) & (128/4 - 1))];
struct spu_in {
// these are not to be modified by DSP
SPUCHAN s_chan[24 + 1];
// nothing to do? Write out non-critical caches
if (dirty) {
syscalls.cache_wb(spu.spuMemC + 0x800, 0x800, 1);
- syscalls.cache_wb(spu.sb_thread, sizeof(spu.sb_thread[0]) * MAXCHAN, 1);
+ syscalls.cache_wb(spu.sb_thread, sizeof(spu.sb_thread_), 1);
if (had_rvb) {
left = 0x40000 - spu.rvb->StartAddr;
syscalls.cache_wb(spu.spuMem + spu.rvb->StartAddr, left * 2, 1);
// account for centering
h -= PreviousPSXDisplay.Range.y0;
+ #error out of date
rcbs->pl_vout_flip(srcs, 1024, PSXDisplay.RGB24, w, h);
}
#pragma GCC diagnostic ignored "-Wmisleading-indentation"
#endif
+#ifdef THREAD_RENDERING
+#include "../gpulib/gpulib_thread_if.h"
+#define do_cmd_list real_do_cmd_list
+#define renderer_init real_renderer_init
+#define renderer_finish real_renderer_finish
+#define renderer_sync_ecmds real_renderer_sync_ecmds
+#define renderer_update_caches real_renderer_update_caches
+#define renderer_flush_queues real_renderer_flush_queues
+#define renderer_set_interlace real_renderer_set_interlace
+#define renderer_set_config real_renderer_set_config
+#define renderer_notify_res_change real_renderer_notify_res_change
+#define renderer_notify_update_lace real_renderer_notify_update_lace
+#define renderer_sync real_renderer_sync
+#define ex_regs scratch_ex_regs
+#endif
+
#define u32 uint32_t
#define INFO_TW 0
{
}
+void renderer_sync(void)
+{
+}
+
+void renderer_notify_update_lace(int updated)
+{
+}
+
#include "../../frontend/plugin_lib.h"
void renderer_set_config(const struct rearmed_cbs *cbs)
{
- iUseDither = cbs->gpu_peops.iUseDither;
+ iUseDither = cbs->dithering;
dwActFixes = cbs->gpu_peops.dwActFixes;
if (cbs->pl_set_gpu_caps)
cbs->pl_set_gpu_caps(0);
-CFLAGS += -ggdb -fPIC -O2 # -Wall\r
+CFLAGS += -ggdb -O2 # -Wall\r
\r
include ../../config.mak\r
\r
\r
#define bool unsigned short\r
\r
+#if 0\r
typedef struct {\r
unsigned int ulFreezeVersion;\r
unsigned int ulStatus;\r
unsigned char psxVRam[1024*1024*2];\r
} GPUFreeze_t;\r
\r
-#if 0\r
long CALLBACK GPUinit();\r
long CALLBACK GPUshutdown();\r
long CALLBACK GPUopen(unsigned long *disp, char *cap, char *cfg);\r
#include "gpuPrim.c"
#include "hud.c"
+static int is_opened;
+
static const short dispWidths[8] = {256,320,512,640,368,384,512,640};
short g_m1,g_m2,g_m3;
short DrawSemiTrans;
#define GPUwriteStatus_ext GPUwriteStatus_ext // for gpulib to see this
void GPUwriteStatus_ext(unsigned int gdata)
{
+ if (!is_opened)
+ return;
+
switch((gdata>>24)&0xff)
{
case 0x00:
#include "../gpulib/gpu.c"
-static int is_opened;
-
static void set_vram(void *vram)
{
psxVub=vram;
return 0;
}
+static void clear_gl_state_for_menu(void)
+{
+ static const GLenum caps[] = {
+ GL_ALPHA_TEST, GL_BLEND, GL_COLOR_LOGIC_OP, GL_COLOR_MATERIAL,
+ GL_CULL_FACE, GL_DEPTH_TEST, GL_FOG, GL_LIGHTING, GL_NORMALIZE,
+ GL_POLYGON_OFFSET_FILL, GL_RESCALE_NORMAL, GL_SAMPLE_ALPHA_TO_COVERAGE,
+ GL_SAMPLE_ALPHA_TO_ONE, GL_SAMPLE_COVERAGE, GL_SCISSOR_TEST, GL_STENCIL_TEST
+ };
+ static const GLenum cstates[] = {
+ GL_COLOR_ARRAY, GL_NORMAL_ARRAY, GL_POINT_SIZE_ARRAY_OES
+ };
+ size_t i;
+ for (i = 0; i < sizeof(caps) / sizeof(caps[0]); i++)
+ glDisable(caps[i]);
+ for (i = 0; i < 6; i++)
+ glDisable(GL_CLIP_PLANE0 + i);
+ for (i = 0; i < 8; i++)
+ glDisable(GL_LIGHT0 + i);
+ for (i = 0; i < sizeof(cstates) / sizeof(cstates[0]); i++)
+ glDisableClientState(cstates[i]);
+
+ glColor4ub(255, 255, 255, 255);
+ glLoadIdentity();
+ glEnable(GL_TEXTURE_2D);
+ glEnableClientState(GL_TEXTURE_COORD_ARRAY);
+ glEnableClientState(GL_VERTEX_ARRAY);
+}
+
void renderer_finish(void)
{
}
return 0;
}
-void vout_update(void)
+int vout_update(void)
{
if(PSXDisplay.Interlaced) // interlaced mode?
{
if(PSXDisplay.DisplayMode.x>0 && PSXDisplay.DisplayMode.y>0)
{
updateDisplay(); // -> swap buffers (new frame)
+ return 1;
}
}
else if(bRenderFrontBuffer) // no interlace mode? and some stuff in front has changed?
{
updateFrontDisplay(); // -> update front buffer
+ return 1;
}
+ return 0;
}
void vout_blank(void)
{
int ret;
+ if (is_opened) {
+ fprintf(stderr, "double GPUopen\n");
+ return -1;
+ }
iResX = cbs->screen_w;
iResY = cbs->screen_h;
rRatioRect.left = rRatioRect.top=0;
long GPUclose(void)
{
+ if (!is_opened)
+ return 0;
is_opened = 0;
KillDisplayLists();
+ clear_gl_state_for_menu();
GLcleanup(); // close OGL
return 0;
}
cbs->pl_set_gpu_caps(GPU_CAP_OWNS_DISPLAY);
if (is_opened && cbs->gles_display != NULL && cbs->gles_surface != NULL) {
- // HACK..
- GPUclose();
- GPUopen(NULL, NULL, NULL);
+ if (cbs->gles_display != display || cbs->gles_surface != surface) {
+ // HACK...
+ fprintf(stderr, "gles reinit hack\n");
+ GPUclose();
+ GPUopen(NULL, NULL, NULL);
+ }
}
set_vram(gpu.vram);
DisplayText(buf, 1);
}
}
+
+void renderer_sync(void)
+{
+}
+
+void renderer_notify_update_lace(int updated)
+{
+}
CFLAGS += -I../../include
CFLAGS += -DTEXTURE_CACHE_4BPP -DTEXTURE_CACHE_8BPP
-CFLAGS += -DNEON_BUILD
-ifeq "$(HAVE_NEON)" "1"
- ifeq "$(HAVE_NEON_ASM)" "1"
- SRC += psx_gpu/psx_gpu_arm_neon.S
- else
- OBJS += psx_gpu/psx_gpu_simd.c
- CFLAGS += -DSIMD_BUILD
- endif
+CFLAGS += -DGPU_NEON -DNEON_BUILD
+ifeq "$(HAVE_NEON_ASM)" "1"
+SRC += psx_gpu/psx_gpu_arm_neon.S
else
-CFLAGS += -fno-strict-aliasing
+SRC += psx_gpu/psx_gpu_simd.c
+CFLAGS += -DSIMD_BUILD
endif
BIN_GPULIB = gpu_neon.so
#include "vector_types.h"
#include "psx_gpu.h"
-#define unlikely(x) __builtin_expect((x), 0)
-
#endif
coverage_x = x1 >> 6;
- mask_up_left = 0xFFFF0000 << coverage_x;
- if(coverage_x < 0)
- mask_up_left = 0xFFFF0000;
+ mask_up_left = 0xFFFF0000;
+ if(coverage_x > 0)
+ mask_up_left <<= coverage_x;
coverage_y = y1 >> 8;
if(coverage_y <= 0)
render_block_handler_struct *render_block_handler =
psx_gpu->render_block_handler;
+#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD)
+ // the asm doesn't bother to save callee-save vector regs, so do it here
+ __asm__ __volatile__("":::"q4","q5","q6","q7");
+#endif
+
render_block_handler->texture_blocks(psx_gpu);
render_block_handler->shade_blocks(psx_gpu);
render_block_handler->blend_blocks(psx_gpu);
#endif
psx_gpu->num_blocks = 0;
+#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD)
+ __asm__ __volatile__("":::"q4","q5","q6","q7");
+#endif
}
}
y##set##_b.e[1] = vertex->b \
-void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a,
- vertex_struct *b, vertex_struct *c)
+void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu,
+ const vertex_struct * __restrict__ a, const vertex_struct * __restrict__ b,
+ const vertex_struct * __restrict__ c)
{
u32 triangle_area = psx_gpu->triangle_area;
u32 winding_mask_scalar;
setup_spans_set_x4(alternate, down, alternate_active); \
height -= 4; \
} while(height > 0); \
+ if (psx_gpu->hacks_active & (AHACK_TEXTURE_ADJ_U | AHACK_TEXTURE_ADJ_V)) \
+ span_uvrg_offset[height - 1].low = span_uvrg_offset[height - 2].low; \
} \
setup_spans_set_x4(alternate, up, alternate_active); \
height -= 4; \
} \
+ if (psx_gpu->hacks_active & AHACK_TEXTURE_ADJ_V) \
+ psx_gpu->span_uvrg_offset[0].low = psx_gpu->span_uvrg_offset[1].low; \
} \
#define index_left 0
setup_spans_set_x4(none, down, no);
height_minor_b -= 4;
}
+ if (psx_gpu->hacks_active & (AHACK_TEXTURE_ADJ_U | AHACK_TEXTURE_ADJ_V))
+ {
+ span_uvrg_offset[height_minor_b - 1].low =
+ span_uvrg_offset[height_minor_b - 2].low;
+ }
}
left_split_triangles++;
#endif
+// this is some hacky mess, can this be improved somehow?
+// ideally change things to not have to do this hack at all
+void __attribute__((noinline))
+setup_blocks_uv_adj_hack(psx_gpu_struct *psx_gpu, block_struct *block,
+ edge_data_struct *span_edge_data, vec_4x32u *span_uvrg_offset)
+{
+ size_t span_i = span_uvrg_offset - psx_gpu->span_uvrg_offset;
+ if (span_i != 0 && span_i != psx_gpu->num_spans - 1
+ && !(psx_gpu->hacks_active & AHACK_TEXTURE_ADJ_U))
+ return;
+ u32 num_blocks = span_edge_data->num_blocks - 1;
+ s32 offset = __builtin_ctz(span_edge_data->right_mask | 0x100) - 1;
+ s32 toffset = 8 * num_blocks + offset - 1;
+ if (toffset < 0 && !(psx_gpu->hacks_active & AHACK_TEXTURE_ADJ_U))
+ return;
+
+ toffset += span_edge_data->left_x;
+ s32 u_dx = psx_gpu->uvrg_dx.low.e[0];
+ s32 v_dx = psx_gpu->uvrg_dx.low.e[1];
+ u32 u = span_uvrg_offset->low.e[0];
+ u32 v = span_uvrg_offset->low.e[1];
+ u += u_dx * toffset;
+ v += v_dx * toffset;
+ u = (u >> 16) & psx_gpu->texture_mask_width;
+ v = (v >> 16) & psx_gpu->texture_mask_height;
+ if (!(psx_gpu->render_state_base & (TEXTURE_MODE_16BPP << 8))) {
+ // 4bpp 8bpp are swizzled
+ u32 u_ = u;
+ u = (u & 0x0f) | ((v & 0x0f) << 4);
+ v = (v & 0xf0) | (u_ >> 4);
+ }
+ assert(offset >= 0);
+ //assert(block->uv.e[offset] == ((v << 8) | u));
+ block->uv.e[offset] = (v << 8) | u;
+}
#define dither_table_entry_normal(value) \
(value) \
vec_8x8s dither_offsets_short; \
\
dither_row = \
- (dither_row >> dither_shift) | (dither_row << (32 - dither_shift)); \
+ (dither_row >> dither_shift) | ((u64)dither_row << (32 - dither_shift)); \
dup_2x32b(vector_cast(vec_2x32u, dither_offsets_short), dither_row); \
setup_blocks_span_initialize_dithered_##texturing() \
#define setup_blocks_store_draw_mask_untextured_direct(_block, bits) \
+#define setup_blocks_uv_adj_hack_untextured(_block, edge_data, uvrg_offset) \
+
+#define setup_blocks_uv_adj_hack_textured(_block, edge_data, uvrg_offset) \
+{ \
+ u32 m_ = AHACK_TEXTURE_ADJ_U | AHACK_TEXTURE_ADJ_V; \
+ if (unlikely(psx_gpu->hacks_active & m_)) \
+ setup_blocks_uv_adj_hack(psx_gpu, _block, edge_data, uvrg_offset); \
+} \
#define setup_blocks_add_blocks_indirect() \
num_blocks += span_num_blocks; \
#define setup_blocks_add_blocks_direct() \
stats_add(texel_blocks_untextured, span_num_blocks); \
- span_pixel_blocks += span_num_blocks \
+ stats_add(span_pixel_blocks, span_num_blocks); \
#define setup_blocks_builder(shading, texturing, dithering, sw, target) \
setup_blocks_store_##shading##_##texturing(sw, dithering, target, edge); \
setup_blocks_store_draw_mask_##texturing##_##target(block, \
span_edge_data->right_mask); \
+ setup_blocks_uv_adj_hack_##texturing(block, span_edge_data, \
+ span_uvrg_offset); \
\
block++; \
} \
(triangle_winding_##winding << 6)) \
static int prepare_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
- vertex_struct *vertexes_out[3])
+ prepared_triangle *triangle_out)
{
- s32 y_top, y_bottom;
+ s32 y_top, y_bottom, offset_x, offset_y, i;
s32 triangle_area;
u32 triangle_winding = 0;
y_bottom = c->y;
y_top = a->y;
+ offset_y = sign_extend_11bit(y_top + psx_gpu->offset_y) - y_top;
if((y_bottom - y_top) >= 512)
{
vertex_swap(a, b);
}
- if((c->x - psx_gpu->offset_x) >= 1024 || (c->x - a->x) >= 1024)
+ if(c->x - a->x >= 1024)
{
#ifdef PROFILE
trivial_rejects++;
return 0;
}
- if(invalidate_texture_cache_region_viewport(psx_gpu, a->x, y_top, c->x,
- y_bottom) == 0)
+ offset_x = sign_extend_11bit(a->x + psx_gpu->offset_x) - a->x;
+ if(invalidate_texture_cache_region_viewport(psx_gpu,
+ a->x + offset_x, y_top + offset_y,
+ c->x + offset_x, y_bottom + offset_y) == 0)
{
#ifdef PROFILE
trivial_rejects++;
return 0;
}
+ for (i = 0; i < 3; i++)
+ {
+ vertexes[i].x += offset_x;
+ vertexes[i].y += offset_y;
+ }
+
psx_gpu->triangle_area = triangle_area;
psx_gpu->triangle_winding = triangle_winding;
- vertexes_out[0] = a;
- vertexes_out[1] = b;
- vertexes_out[2] = c;
+ triangle_out->vertexes[0] = a;
+ triangle_out->vertexes[1] = b;
+ triangle_out->vertexes[2] = c;
+ triangle_out->offset_x = offset_x;
+ triangle_out->offset_y = offset_y;
return 1;
}
triangle_set_direction(y_direction_b, y_delta_b);
triangle_set_direction(y_direction_c, y_delta_c);
+#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD)
+ // the asm doesn't bother to save callee-save vector regs, so do it here
+ __asm__ __volatile__("vstmia %0, {q4-q7}" :: "r"(psx_gpu->saved_q4_q7) : "memory");
+#endif
+
compute_all_gradients(psx_gpu, a, b, c);
switch(y_direction_a | (y_direction_b << 2) | (y_direction_c << 4) |
&(render_triangle_block_handlers[render_state]);
((setup_blocks_function_type *)psx_gpu->render_block_handler->setup_blocks)
(psx_gpu);
+
+#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD)
+ __asm__ __volatile__("vldmia %0, {q4-q7}" :: "r"(psx_gpu->saved_q4_q7));
+#endif
}
void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
u32 flags)
{
- vertex_struct *vertex_ptrs[3];
- if (prepare_triangle(psx_gpu, vertexes, vertex_ptrs))
- render_triangle_p(psx_gpu, vertex_ptrs, flags);
+ prepared_triangle triangle;
+ if (prepare_triangle(psx_gpu, vertexes, &triangle))
+ render_triangle_p(psx_gpu, triangle.vertexes, flags);
}
#if !defined(NEON_BUILD) || defined(SIMD_BUILD)
u32 r = color & 0xFF;
u32 g = (color >> 8) & 0xFF;
u32 b = (color >> 16) & 0xFF;
- u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10) |
- psx_gpu->mask_msb;
+ u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10);
u32 color_32bpp = color_16bpp | (color_16bpp << 16);
u32 *vram_ptr = (u32 *)(psx_gpu->vram_out_ptr + x + (y * 1024));
u32 r = color & 0xFF;
u32 g = (color >> 8) & 0xFF;
u32 b = (color >> 16) & 0xFF;
- u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10) |
- psx_gpu->mask_msb;
+ u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10);
u32 color_32bpp = color_16bpp | (color_16bpp << 16);
u32 *vram_ptr = (u32 *)(psx_gpu->vram_out_ptr + x + (y * 1024));
#define dither_table_row(a, b, c, d) \
- ((a & 0xFF) | ((b & 0xFF) << 8) | ((c & 0xFF) << 16) | ((d & 0xFF) << 24)) \
+ ((a & 0xFF) | ((b & 0xFF) << 8) | ((c & 0xFF) << 16) | ((u32)(d & 0xFF) << 24)) \
void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram)
{
psx_gpu->dither_table[1] = dither_table_row(2, -2, 3, -1);
psx_gpu->dither_table[2] = dither_table_row(-3, 1, -4, 0);
psx_gpu->dither_table[3] = dither_table_row(3, -1, 2, -2);
+ psx_gpu->allow_dithering = 1;
psx_gpu->primitive_type = PRIMITIVE_TYPE_UNKNOWN;
psx_gpu->saved_hres = 256;
+ psx_gpu->hacks_active = 0;
- // check some offset
+ // check some offsets, asm relies on these
+ psx_gpu->reserved_a[(offsetof(psx_gpu_struct, test_mask) == 0) - 1] = 0;
psx_gpu->reserved_a[(offsetof(psx_gpu_struct, blocks) == psx_gpu_blocks_offset) - 1] = 0;
}
#define SPAN_DATA_BLOCKS_SIZE 32
+#define AHACK_TEXTURE_ADJ_U (1 << 0)
+#define AHACK_TEXTURE_ADJ_V (1 << 1)
+
#ifndef __ASSEMBLER__
#include "vector_types.h"
+#ifndef unlikely
+#define unlikely(x) __builtin_expect((x), 0)
+#endif
+
+#define sign_extend_11bit(value) \
+ (((s32)((value) << 21)) >> 21)
+
typedef enum
{
PRIMITIVE_TYPE_TRIANGLE = 0,
// enhancement stuff
u16 *enhancement_buf_ptr; // main alloc
u16 *enhancement_current_buf_ptr; // offset into above, 4 bufs
+ u32 hacks_active; // AHACK_TEXTURE_ADJ_U ...
u32 saved_hres;
s16 saved_viewport_start_x;
s16 saved_viewport_start_y;
u16 enhancement_scanout_eselect; // eviction selector
u16 enhancement_current_buf;
+ u32 allow_dithering:1;
+ u32 force_dithering:1;
u32 hack_disable_main:1;
u32 hack_texture_adj:1;
// Align up to 64 byte boundary to keep the upcoming buffers cache line
// aligned, also make reachable with single immediate addition
- u8 reserved_a[184 + 9*4 - 9*sizeof(void *)];
+ u8 reserved_a[68 + 9*4 - 9*sizeof(void *)];
+
+ // space for saving regs on c call to flush_render_block_buffer() and asm
+ u32 saved_tmp[48 / sizeof(u32)];
+ u32 saved_q4_q7[64 / sizeof(u32)];
// 8KB
block_struct blocks[MAX_BLOCKS_PER_ROW];
u32 padding;
} vertex_struct;
+typedef struct
+{
+ vertex_struct *vertexes[3];
+ s16 offset_x;
+ s16 offset_y;
+} prepared_triangle;
+
void render_block_fill(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y,
u32 width, u32 height);
void render_block_copy(psx_gpu_struct *psx_gpu, u16 *source, u32 x, u32 y,
void update_texture_8bpp_cache(psx_gpu_struct *psx_gpu);
void flush_render_block_buffer(psx_gpu_struct *psx_gpu);
+void setup_blocks_uv_adj_hack(psx_gpu_struct *psx_gpu, block_struct *block,
+ edge_data_struct *span_edge_data, vec_4x32u *span_uvrg_offset);
+
void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram);
u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
s32 *cpu_cycles_sum_out, s32 *cpu_cycles_last, u32 *last_command);
#ifdef __MACH__
#define flush_render_block_buffer _flush_render_block_buffer
#define update_texture_8bpp_cache _update_texture_8bpp_cache
+#define setup_blocks_uv_adj_hack _setup_blocks_uv_adj_hack
#endif
@ r0: psx_gpu
#define uvrg q14
#define uvrg_dy q15
+#define uv d28
#define alternate_x_16 d4
subs height, height, #4; \
bhi 2b; \
\
+ nop; \
+ ldr temp, [psx_gpu, #psx_gpu_hacks_active_offset]; \
+ tst temp, #(AHACK_TEXTURE_ADJ_U | AHACK_TEXTURE_ADJ_V); \
+ beq 1f; \
+ add temp, span_uvrg_offset, height, lsl #4; \
+ vldr uv, [temp, #(-16*2)]; \
+ vstr uv, [temp, #(-16)]; \
+ \
1: \
subs height, height, #4; \
bhi 2b; \
\
+ nop; \
+ ldr temp, [psx_gpu, #psx_gpu_hacks_active_offset]; \
+ tst temp, #AHACK_TEXTURE_ADJ_V; \
+ beq 1f; \
+ add temp, psx_gpu, #psx_gpu_span_uvrg_offset_offset; \
+ vldr uv, [temp, #16]; \
+ vstr uv, [temp, #0]; \
+ \
1: \
subs height_minor_b, height_minor_b, #4
bhi 2b
+ nop
+ ldr temp, [psx_gpu, #psx_gpu_hacks_active_offset]
+ tst temp, #(AHACK_TEXTURE_ADJ_U | AHACK_TEXTURE_ADJ_V)
+ beq 1f
+ add temp, span_uvrg_offset, height, lsl #4
+ vldr uv, [temp, #(-16*2)]
+ vstr uv, [temp, #(-16)]
+
1:
setup_spans_epilogue()
#define uvrg_dx_ptr r2
#define texture_mask_ptr r3
+#define hacks_active r6
#define dither_shift r8
#define dither_row r10
#define color_b r5
#undef uvrg
+#undef uv
#define u_block q0
#define v_block q1
#define setup_blocks_texture_unswizzled() \
+#define setup_blocks_uv_adj_hack_textured(hacks_active) \
+ tst hacks_active, #(AHACK_TEXTURE_ADJ_U | AHACK_TEXTURE_ADJ_V); \
+ beq 91f; \
+ \
+ /* pushing odd num of regs here realigns our unaligned stack */ \
+ vstr texture_mask_u, [r0, #psx_gpu_saved_tmp_offset]; \
+ vstr texture_mask_v, [r0, #psx_gpu_saved_tmp_offset + 8]; \
+ push { r0 - r4, EXTRA_UNSAVED_REGS r12, r14 }; \
+ mov r12, span_uvrg_offset; \
+ sub r1, block_ptr_a, #64; \
+ mov r2, span_edge_data; \
+ mov r3, r12; \
+ bl setup_blocks_uv_adj_hack; /* psx_gpu=r0 */ \
+ pop { r0 - r4, EXTRA_UNSAVED_REGS r12, r14 }; \
+ vldr texture_mask_u, [r0, #psx_gpu_saved_tmp_offset]; \
+ vldr texture_mask_v, [r0, #psx_gpu_saved_tmp_offset + 8]; \
+ \
+ vadd.u32 uvrg_dx8, uvrg_dx4, uvrg_dx4; \
+91: \
+
#define setup_blocks_shaded_textured_builder(swizzling) \
.align 3; \
vld1.u32 { test_mask }, [psx_gpu, :128]; \
vdup.u8 draw_mask, right_mask; \
\
+ ldr hacks_active, [psx_gpu, #psx_gpu_hacks_active_offset]; \
vmov.u32 fb_mask_ptrs[0], right_mask; \
vtst.u16 draw_mask, draw_mask, test_mask; \
vzip.u8 u_whole_8, v_whole_8; \
vst1.u32 { dither_offsets }, [block_ptr_b, :128], c_32; \
vst1.u32 { b_whole_8, fb_mask_ptrs }, [block_ptr_a, :128], c_32; \
\
+ setup_blocks_uv_adj_hack_textured(hacks_active); \
+ \
1: \
add span_uvrg_offset, span_uvrg_offset, #16; \
add span_b_offset, span_b_offset, #4; \
ldmia sp!, { r4 - r11, pc }; \
\
2: \
- /* TODO: Load from psx_gpu instead of saving/restoring these */\
- vpush { texture_mask }; \
- vpush { uvrg_dx4 }; \
- \
- stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12 }; /* r14=num_blocks */ \
+ vstr texture_mask_u, [r0, #psx_gpu_saved_tmp_offset]; \
+ vstr texture_mask_v, [r0, #psx_gpu_saved_tmp_offset + 8]; \
+ /* pushing odd num of regs here realigns our unaligned stack */ \
+ push { r0 - r3, EXTRA_UNSAVED_REGS r12 }; /* r14=num_blocks */ \
bl flush_render_block_buffer; \
- ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12 }; \
- \
- vpop { uvrg_dx4 }; \
- vpop { texture_mask }; \
+ pop { r0 - r3, EXTRA_UNSAVED_REGS r12 }; \
+ vldr texture_mask_u, [r0, #psx_gpu_saved_tmp_offset]; \
+ vldr texture_mask_v, [r0, #psx_gpu_saved_tmp_offset + 8]; \
\
vadd.u32 uvrg_dx8, uvrg_dx4, uvrg_dx4; \
vmov.u8 fb_mask_ptrs, #0; \
vld1.u32 { test_mask }, [psx_gpu, :128]; \
vdup.u8 draw_mask, right_mask; \
\
+ ldr hacks_active, [psx_gpu, #psx_gpu_hacks_active_offset]; \
vmov.u32 fb_mask_ptrs[0], right_mask; \
vtst.u16 draw_mask, draw_mask, test_mask; \
vzip.u8 u_whole_8, v_whole_8; \
vst1.u32 { dither_offsets }, [block_ptr_b, :128], c_32; \
vst1.u32 { b_whole_8, fb_mask_ptrs }, [block_ptr_a, :128], c_32; \
\
+ setup_blocks_uv_adj_hack_textured(hacks_active); \
+ \
1: \
add span_uvrg_offset, span_uvrg_offset, #16; \
add span_edge_data, span_edge_data, #8; \
ldmia sp!, { r4 - r11, pc }; \
\
2: \
- /* TODO: Load from psx_gpu instead of saving/restoring these */\
- vpush { texture_mask }; \
- vpush { uvrg_dx4 }; \
- \
- stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12 }; /* r14=num_blocks */ \
+ vstr texture_mask_u, [r0, #psx_gpu_saved_tmp_offset]; \
+ vstr texture_mask_v, [r0, #psx_gpu_saved_tmp_offset + 8]; \
+ push { r0 - r3, EXTRA_UNSAVED_REGS r12 }; /* r14=num_blocks */ \
bl flush_render_block_buffer; \
- ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12 }; \
- \
- vpop { uvrg_dx4 }; \
- vpop { texture_mask }; \
+ pop { r0 - r3, EXTRA_UNSAVED_REGS r12 }; \
+ vldr texture_mask_u, [r0, #psx_gpu_saved_tmp_offset]; \
+ vldr texture_mask_v, [r0, #psx_gpu_saved_tmp_offset + 8]; \
\
vadd.u32 uvrg_dx8, uvrg_dx4, uvrg_dx4; \
vmov.u8 fb_mask_ptrs, #0; \
ldmia sp!, { r4 - r11, pc }
2:
- vpush { colors }
-
- stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }
+ vstr d4, [r0, #psx_gpu_saved_tmp_offset] /* colors */
+ vstr d5, [r0, #psx_gpu_saved_tmp_offset + 8]
+ push { r0 - r3, EXTRA_UNSAVED_REGS r12 }
bl flush_render_block_buffer
- ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }
-
- vpop { colors }
+ pop { r0 - r3, EXTRA_UNSAVED_REGS r12 }
+ vldr d4, [r0, #psx_gpu_saved_tmp_offset]
+ vldr d5, [r0, #psx_gpu_saved_tmp_offset + 8]
vld1.u32 { test_mask }, [psx_gpu, :128]
veor.u32 draw_mask, draw_mask, draw_mask
bne 0b; \
\
restore_abi_regs(); \
- ldmia sp!, { r4 - r11, pc }; \
+ pop { r4 - r11, pc }; \
\
2: \
- /* TODO: Load from psx_gpu instead of saving/restoring these */\
- vpush { rg_dx4 }; \
- \
- stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \
+ vstr rg_dx4, [r0, #psx_gpu_saved_tmp_offset]; \
+ push { r0 - r3, EXTRA_UNSAVED_REGS r12 }; \
bl flush_render_block_buffer; \
- ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \
- \
- vpop { rg_dx4 }; \
+ pop { r0 - r3, EXTRA_UNSAVED_REGS r12 }; \
+ vldr rg_dx4, [r0, #psx_gpu_saved_tmp_offset]; \
\
vmov.u8 d64_1, #1; \
vmov.u8 d128_4, #4; \
.align 3
function(texture_blocks_8bpp)
- stmdb sp!, { r3 - r11, r14 }
+ push { r4 - r11, lr }
add block_ptr, psx_gpu, #psx_gpu_blocks_offset
ldr texture_ptr, [psx_gpu, #psx_gpu_texture_page_ptr_offset]
add block_ptr, block_ptr, #64
bne 0b
- ldmia sp!, { r3 - r11, pc }
+ pop { r4 - r11, pc }
1:
- stmdb sp!, { r1 - r2, EXTRA_UNSAVED_REGS r12 }
-
- bl update_texture_8bpp_cache
-
- ldmia sp!, { r1 - r2, EXTRA_UNSAVED_REGS r12 }
- bal 0b
+ /* pushing odd num of regs here realigns our unaligned stack */
+ push { r1 - r2, EXTRA_UNSAVED_REGS r12 }
+ bl update_texture_8bpp_cache
+ pop { r1 - r2, EXTRA_UNSAVED_REGS r12 }
+ bal 0b
#undef uv_0
#undef vram_ptr
#undef color
-#undef width
-#undef height
-#undef pitch
-
-#define vram_ptr r0
-#define color r1
-#define width r2
-#define height r3
-
-#define pitch r1
-
-#define num_width r12
-
-#undef colors_a
-#undef colors_b
-
-#define colors_a q0
-#define colors_b q1
-
-.align 3
-
-function(render_block_fill_body)
- vdup.u16 colors_a, color
- mov pitch, #2048
-
- vmov colors_b, colors_a
- sub pitch, pitch, width, lsl #1
-
- mov num_width, width
-
- 0:
- vst1.u32 { colors_a, colors_b }, [vram_ptr, :256]!
-
- subs num_width, num_width, #16
- bne 0b
-
- add vram_ptr, vram_ptr, pitch
- mov num_width, width
-
- subs height, height, #1
- bne 0b
-
- bx lr
-
-
#undef x
#undef y
#undef width
#define texels_wide_high d15
#define texels_wide q7
+.align 3
setup_sprite_flush_blocks:
- vpush { q1 - q5 }
+ push { r0 - r3, EXTRA_UNSAVED_REGS r12, lr }
+ add block, r0, #psx_gpu_saved_tmp_offset /* r5 */
+ vstmia block, { q1 - q3 }
+ bl flush_render_block_buffer
+ vldmia block, { q1 - q3 }
+ pop { r0 - r3, EXTRA_UNSAVED_REGS r12, lr }
- stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }
- bl flush_render_block_buffer
- ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }
-
- vpop { q1 - q5 }
-
- add block, psx_gpu, #psx_gpu_blocks_offset
- bx lr
+ add block, psx_gpu, #psx_gpu_blocks_offset
+ bx lr
setup_sprite_update_texture_4bpp_cache:
- stmdb sp!, { r0 - r3, r14 }
+ push { r0 - r4, lr }
bl update_texture_4bpp_cache
- ldmia sp!, { r0 - r3, pc }
+ pop { r0 - r4, pc }
setup_sprite_update_texture_8bpp_cache:
- stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r14 }
+ push { r0 - r4, EXTRA_UNSAVED_REGS lr }
bl update_texture_8bpp_cache
- ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS pc }
+ pop { r0 - r4, EXTRA_UNSAVED_REGS pc }
#define setup_sprite_tiled_initialize_4bpp() \
setup_sprite_setup_left_draw_mask_fb_ptr##x4mode(); \
\
setup_sprite_tile_column_height_##multi_height(edge_mode, edge, tm, x4mode); \
- restore_abi_regs(); \
- ldmia sp!, { r4 - r11, pc } \
+ vpop { q4 - q7 }; \
+ pop { r3 - r11, pc } \
#define setup_sprite_tiled_advance_column() \
add texture_offset_base, texture_offset_base, #0x100; \
\
setup_sprite_tiled_advance_column(); \
setup_sprite_tile_column_height_##multi_height(right_mode, left, tm, x4mode);\
- restore_abi_regs(); \
- ldmia sp!, { r4 - r11, pc } \
+ vpop { q4 - q7 }; \
+ pop { r3 - r11, pc } \
#define setup_sprite_offset_u_adjust() \
.align 4; \
\
function(setup_sprite_##texture_mode##x4mode) \
- stmdb sp!, { r4 - r11, r14 }; \
+ push { r3 - r11, lr }; \
setup_sprite_tiled_initialize_##texture_mode##x4mode(); \
\
- ldr v, [sp, #36]; \
+ ldr v, [sp, #4*(10+0)]; \
and offset_u, u, #0xF; \
\
- ldr width, [sp, #40]; \
+ ldr width, [sp, #4*(10+1)]; \
ldr fb_ptr, [psx_gpu, #psx_gpu_vram_out_ptr_offset]; \
\
- ldr height, [sp, #44]; \
+ ldr height, [sp, #4*(10+2)]; \
add fb_ptr, fb_ptr, y, lsl #11; \
\
- save_abi_regs(); \
+ vpush { q4 - q7 }; \
\
add fb_ptr, fb_ptr, x, lsl #1; \
and offset_v, v, #0xF; \
#define texels_67 r9
function(texture_sprite_blocks_8bpp)
- stmdb sp!, { r4 - r11, r14 }
+ push { r4 - r11, r14 }
movw texel_shift_mask, #(0xFF << 1)
ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]
add block_ptr, block_ptr, #64
bne 0b
+ nop
- ldmia sp!, { r4 - r11, pc }
+ pop { r4 - r11, pc }
#undef width_rounded
setup_sprites_16bpp_flush:
- vpush { d0 - d3 }
-
- stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }
- bl flush_render_block_buffer
- ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }
-
- vpop { d0 - d3 }
+ push { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }
+ add r1, r0, #psx_gpu_saved_tmp_offset
+ vstmia r1, { d0 - d3 }
+ bl flush_render_block_buffer
+ pop { r0 - r3, EXTRA_UNSAVED_REGS r12 }
+ add lr, r0, #psx_gpu_saved_tmp_offset
+ vldmia lr, { d0 - d3 }
add block, psx_gpu, #psx_gpu_blocks_offset
mov num_blocks, block_width
- bx lr
+ pop { pc }
function(setup_sprite_16bpp)
- stmdb sp!, { r4 - r11, r14 }
+ push { r3 - r11, lr }
ldr fb_ptr, [psx_gpu, #psx_gpu_vram_out_ptr_offset]
- ldr v, [sp, #36]
+ ldr v, [sp, #4*(10+0)]
add fb_ptr, fb_ptr, y, lsl #11
- ldr width, [sp, #40]
+ ldr width, [sp, #4*(10+1)]
add fb_ptr, fb_ptr, x, lsl #1
- ldr height, [sp, #44]
+ ldr height, [sp, #4*(10+2)]
and left_offset, u, #0x7
add texture_offset_base, u, u
strh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]
bne 1b
- ldmia sp!, { r4 - r11, pc }
+ pop { r3 - r11, pc }
0:
add num_blocks, num_blocks, block_width
strh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]
bne 0b
+ nop
- ldmia sp!, { r4 - r11, pc }
+ pop { r3 - r11, pc }
// 4x version
#undef draw_mask_fb_ptr
function(setup_sprite_16bpp_4x)
- stmdb sp!, { r4 - r11, r14 }
+ push { r3 - r11, lr }
ldr fb_ptr, [psx_gpu, #psx_gpu_vram_out_ptr_offset]
- ldr v, [sp, #36]
+ ldr v, [sp, #4*(10+0)]
add fb_ptr, fb_ptr, y, lsl #11
- ldr width, [sp, #40]
+ ldr width, [sp, #4*(10+1)]
add fb_ptr, fb_ptr, x, lsl #1
- ldr height, [sp, #44]
+ ldr height, [sp, #4*(10+2)]
and left_offset, u, #0x7
add texture_offset_base, u, u
strh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]
bne 1b
- ldmia sp!, { r4 - r11, pc }
+ pop { r3 - r11, pc }
0:
add num_blocks, num_blocks, block_width
strh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]
bne 0b
+ nop
- ldmia sp!, { r4 - r11, pc }
+ pop { r3 - r11, pc }
#undef width
.align 3
function(setup_sprite_untextured_512)
- stmdb sp!, { r4 - r11, r14 }
+ push { r4 - r11, r14 }
- ldr width, [sp, #40]
+ ldr width, [sp, #4*(9+1)]
ldr fb_ptr, [psx_gpu, #psx_gpu_vram_out_ptr_offset]
- ldr height, [sp, #44]
+ ldr height, [sp, #4*(9+2)]
add fb_ptr, fb_ptr, y, lsl #11
add fb_ptr, fb_ptr, x, lsl #1
sub right_width, width, #1
- ldr color, [sp, #48]
+ ldr color, [sp, #4*(9+3)]
and right_width, #7
add block_width, width, #7
strh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]
bgt setup_sprite_untextured_height_loop
- ldmia sp!, { r4 - r11, pc }
+ pop { r4 - r11, pc }
#define texel_block_expanded_cd q3
function(update_texture_4bpp_cache)
- stmdb sp!, { r4 - r11, r14 }
+ push { r3 - r11, r14 }
vpush { q0 - q3 }
ldrb current_texture_page, [psx_gpu, #psx_gpu_current_texture_page_offset]
bne 0b
vpop { q0 - q3 }
- ldmia sp!, { r4 - r11, pc }
+ pop { r3 - r11, pc }
#undef current_texture_page
function(update_texture_8bpp_cache_slice)
stmdb sp!, { r4 - r11, r14 }
- vpush { q0 - q3 }
ldrb current_texture_page, [psx_gpu, #psx_gpu_current_texture_page_offset]
ldr vram_ptr_a, [psx_gpu, #psx_gpu_vram_ptr_offset]
bne 0b
- vpop { q0 - q3 }
ldmia sp!, { r4 - r11, pc }
#include "SDL.h"
#include "common.h"
+#include "../../gpulib/gpu.h"
+#include "psx_gpu.c"
+#include "psx_gpu_parse.c"
+
+#pragma GCC diagnostic ignored "-Wunused-result"
+
extern u32 span_pixels;
extern u32 span_pixel_blocks;
extern u32 spans;
FILE *list_file;
u32 no_display = 0;
s32 dummy0 = 0;
- u32 dummy1 = 0;
+ s32 dummy1 = 0;
+ u32 dummy2 = 0;
if((argc != 3) && (argc != 4))
{
u32 fbdev_handle = open("/dev/fb1", O_RDWR);
vram_ptr = (mmap((void *)0x50000000, 1024 * 1024 * 2, PROT_READ | PROT_WRITE,
MAP_SHARED | 0xA0000000, fbdev_handle, 0));
-#elif 1
+#elif 0
#ifndef MAP_HUGETLB
#define MAP_HUGETLB 0x40000 /* arch specific */
#endif
clear_stats();
-#ifdef NEON_BUILD
+#ifdef CYCLE_COUNTER
init_counter();
#endif
- gpu_parse(psx_gpu, list, size, &dummy0, &dummy1);
+ gpu_parse(psx_gpu, list, size, &dummy0, &dummy1, &dummy2);
flush_render_block_buffer(psx_gpu);
clear_stats();
-#ifdef NEON_BUILD
+#ifdef CYCLE_COUNTER
u32 cycles = get_counter();
#endif
- gpu_parse(psx_gpu, list, size, &dummy0, &dummy1);
+ gpu_parse(psx_gpu, list, size, &dummy0, &dummy1, &dummy2);
flush_render_block_buffer(psx_gpu);
-#ifdef NEON_BUILD
+#ifdef CYCLE_COUNTER
u32 cycles_elapsed = get_counter() - cycles;
printf("%-64s: %d\n", argv[1], cycles_elapsed);
#define psx_gpu_texture_mask_width_offset 0xfa
#define psx_gpu_texture_mask_height_offset 0xfb
#define psx_gpu_reciprocal_table_ptr_offset 0x108
+#define psx_gpu_hacks_active_offset 0x114
+#define psx_gpu_saved_tmp_offset 0x190
#define psx_gpu_blocks_offset 0x200
#define psx_gpu_span_uvrg_offset_offset 0x2200
#define psx_gpu_span_edge_data_offset 0x4200
//WRITE_OFFSET(f, clut_settings);
//WRITE_OFFSET(f, texture_settings);
WRITE_OFFSET(f, reciprocal_table_ptr);
+ WRITE_OFFSET(f, hacks_active);
+ WRITE_OFFSET(f, saved_tmp);
+ //WRITE_OFFSET(f, saved_q4_q7);
WRITE_OFFSET(f, blocks);
WRITE_OFFSET(f, span_uvrg_offset);
WRITE_OFFSET(f, span_edge_data);
#include "common.h"
#include "../../gpulib/gpu_timing.h"
+#include "../../gpulib/gpu.h"
#ifndef command_lengths
const u8 command_lengths[256] =
};
#endif
-void update_texture_ptr(psx_gpu_struct *psx_gpu)
+static void update_texture_ptr(psx_gpu_struct *psx_gpu)
{
u8 *texture_base;
u8 *texture_ptr;
psx_gpu->texture_page_ptr = texture_ptr;
}
-void set_texture(psx_gpu_struct *psx_gpu, u32 texture_settings)
+static void set_texture(psx_gpu_struct *psx_gpu, u32 texture_settings)
{
texture_settings &= 0x1FF;
if(psx_gpu->texture_settings != texture_settings)
psx_gpu->render_state_base = render_state_base;
- psx_gpu->current_texture_mask = 0x1 << new_texture_page;
+ psx_gpu->current_texture_mask = 1u << new_texture_page;
if(texture_mode == TEXTURE_MODE_8BPP)
{
// In 8bpp mode 256x256 takes up two pages. If it's on the right edge it
// wraps back around to the left edge.
u32 adjacent_texture_page = ((texture_settings + 1) & 0xF) | (texture_settings & 0x10);
- psx_gpu->current_texture_mask |= 0x1 << adjacent_texture_page;
+ psx_gpu->current_texture_mask |= 1u << adjacent_texture_page;
if((psx_gpu->last_8bpp_texture_page ^ new_texture_page) & 0x1)
{
}
}
-void set_clut(psx_gpu_struct *psx_gpu, u32 clut_settings)
+static void set_clut(psx_gpu_struct *psx_gpu, u32 clut_settings)
{
- if(psx_gpu->clut_settings != clut_settings)
+ clut_settings &= 0x7FFF;
+ if (psx_gpu->clut_settings != clut_settings)
{
flush_render_block_buffer(psx_gpu);
psx_gpu->clut_settings = clut_settings;
- psx_gpu->clut_ptr = psx_gpu->vram_ptr + ((clut_settings & 0x7FFF) * 16);
+ psx_gpu->clut_ptr = psx_gpu->vram_ptr + clut_settings * 16;
}
}
-void set_triangle_color(psx_gpu_struct *psx_gpu, u32 triangle_color)
+static void set_triangle_color(psx_gpu_struct *psx_gpu, u32 triangle_color)
{
if(psx_gpu->triangle_color != triangle_color)
{
}
}
-#define sign_extend_12bit(value) \
- (((s32)((value) << 20)) >> 20) \
-
-#define sign_extend_11bit(value) \
- (((s32)((value) << 21)) >> 21) \
-
-#define sign_extend_10bit(value) \
- (((s32)((value) << 22)) >> 22) \
-
-
#define get_vertex_data_xy(vertex_number, offset16) \
- vertexes[vertex_number].x = \
- sign_extend_12bit(list_s16[offset16]) + psx_gpu->offset_x; \
- vertexes[vertex_number].y = \
- sign_extend_12bit(list_s16[(offset16) + 1]) + psx_gpu->offset_y; \
+ vertexes[vertex_number].x = sign_extend_11bit(list_s16[offset16]); \
+ vertexes[vertex_number].y = sign_extend_11bit(list_s16[(offset16) + 1]); \
#define get_vertex_data_uv(vertex_number, offset16) \
vertexes[vertex_number].u = list_s16[offset16] & 0xFF; \
#define SET_Ex(r, v)
#endif
+static void textured_sprite(psx_gpu_struct *psx_gpu, const u32 *list,
+ s32 width, s32 height, u32 *cpu_cycles_sum, u32 *cpu_cycles)
+{
+ s32 x = sign_extend_11bit(list[1] + psx_gpu->offset_x);
+ s32 y = sign_extend_11bit((list[1] >> 16) + psx_gpu->offset_y);
+ u8 v = (list[2] >> 8) & 0xff;
+ u8 u = list[2] & 0xff;
+
+ set_clut(psx_gpu, list[2] >> 16);
+
+ render_sprite(psx_gpu, x, y, u, v, &width, &height, list[0] >> 24, list[0]);
+ gput_sum(*cpu_cycles_sum, *cpu_cycles, gput_sprite(width, height));
+}
+
+static void undo_offset(vertex_struct *vertexes, prepared_triangle *triangle)
+{
+ s32 i;
+ for (i = 0; i < 3; i++)
+ {
+ vertexes[i].x -= triangle->offset_x;
+ vertexes[i].y -= triangle->offset_y;
+ }
+}
+
+static void do_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
+ u32 current_command)
+{
+ prepared_triangle triangle;
+ if (prepare_triangle(psx_gpu, vertexes, &triangle))
+ render_triangle_p(psx_gpu, triangle.vertexes, current_command);
+}
+
+static void do_quad(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
+ u32 current_command)
+{
+ prepared_triangle triangle;
+ if (prepare_triangle(psx_gpu, vertexes, &triangle))
+ {
+ render_triangle_p(psx_gpu, triangle.vertexes, current_command);
+ undo_offset(vertexes, &triangle);
+ }
+ if (prepare_triangle(psx_gpu, vertexes + 1, &triangle))
+ render_triangle_p(psx_gpu, triangle.vertexes, current_command);
+}
+
u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
s32 *cpu_cycles_sum_out, s32 *cpu_cycles_last, u32 *last_command)
{
vertex_struct vertexes[4] __attribute__((aligned(16))) = {};
u32 current_command = 0, command_length;
u32 cpu_cycles_sum = 0, cpu_cycles = *cpu_cycles_last;
+ u32 simplified_prim[4*4];
u32 *list_start = list;
u32 *list_end = list + (size / 4);
get_vertex_data_xy(1, 4);
get_vertex_data_xy(2, 6);
- render_triangle(psx_gpu, vertexes, current_command);
+ do_triangle(psx_gpu, vertexes, current_command);
gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base());
break;
}
get_vertex_data_xy_uv(1, 6);
get_vertex_data_xy_uv(2, 10);
- render_triangle(psx_gpu, vertexes, current_command);
+ do_triangle(psx_gpu, vertexes, current_command);
gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_t());
break;
}
get_vertex_data_xy(1, 4);
get_vertex_data_xy(2, 6);
get_vertex_data_xy(3, 8);
-
- render_triangle(psx_gpu, vertexes, current_command);
- render_triangle(psx_gpu, &(vertexes[1]), current_command);
+
+ do_quad(psx_gpu, vertexes, current_command);
gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base());
break;
}
case 0x2C ... 0x2F:
{
- set_clut(psx_gpu, list_s16[5]);
- set_texture(psx_gpu, list_s16[9]);
+ u32 i, simplified_count;
+ set_texture(psx_gpu, list[4] >> 16);
+ if (!(psx_gpu->render_state_base & RENDER_STATE_DITHER) &&
+ (simplified_count = prim_try_simplify_quad_t(simplified_prim, list)))
+ {
+ for (i = 0; i < simplified_count; i++) {
+ const u32 *list_ = &simplified_prim[i * 4];
+ textured_sprite(psx_gpu, list_, list_[3] & 0x3FF,
+ (list_[3] >> 16) & 0x1FF, &cpu_cycles_sum, &cpu_cycles);
+ }
+ break;
+ }
+
+ set_clut(psx_gpu, list[2] >> 16);
set_triangle_color(psx_gpu, list[0] & 0xFFFFFF);
get_vertex_data_xy_uv(0, 2);
get_vertex_data_xy_uv(1, 6);
get_vertex_data_xy_uv(2, 10);
get_vertex_data_xy_uv(3, 14);
-
- render_triangle(psx_gpu, vertexes, current_command);
- render_triangle(psx_gpu, &(vertexes[1]), current_command);
+
+ do_quad(psx_gpu, vertexes, current_command);
gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_t());
break;
}
get_vertex_data_xy_rgb(1, 4);
get_vertex_data_xy_rgb(2, 8);
- render_triangle(psx_gpu, vertexes, current_command);
+ do_triangle(psx_gpu, vertexes, current_command);
gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_g());
break;
}
get_vertex_data_xy_uv_rgb(1, 6);
get_vertex_data_xy_uv_rgb(2, 12);
- render_triangle(psx_gpu, vertexes, current_command);
+ do_triangle(psx_gpu, vertexes, current_command);
gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_gt());
break;
}
get_vertex_data_xy_rgb(1, 4);
get_vertex_data_xy_rgb(2, 8);
get_vertex_data_xy_rgb(3, 12);
-
- render_triangle(psx_gpu, vertexes, current_command);
- render_triangle(psx_gpu, &(vertexes[1]), current_command);
+
+ do_quad(psx_gpu, vertexes, current_command);
gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_g());
break;
}
case 0x3C ... 0x3F:
{
- set_clut(psx_gpu, list_s16[5]);
- set_texture(psx_gpu, list_s16[11]);
+ u32 i, simplified_count;
+ set_texture(psx_gpu, list[5] >> 16);
+ if (!(psx_gpu->render_state_base & RENDER_STATE_DITHER) &&
+ (simplified_count = prim_try_simplify_quad_gt(simplified_prim, list)))
+ {
+ for (i = 0; i < simplified_count; i++) {
+ const u32 *list_ = &simplified_prim[i * 4];
+ textured_sprite(psx_gpu, list_, list_[3] & 0x3FF,
+ (list_[3] >> 16) & 0x1FF, &cpu_cycles_sum, &cpu_cycles);
+ }
+ break;
+ }
+
+ set_clut(psx_gpu, list[2] >> 16);
get_vertex_data_xy_uv_rgb(0, 0);
get_vertex_data_xy_uv_rgb(1, 6);
get_vertex_data_xy_uv_rgb(2, 12);
get_vertex_data_xy_uv_rgb(3, 18);
-
- render_triangle(psx_gpu, vertexes, current_command);
- render_triangle(psx_gpu, &(vertexes[1]), current_command);
+
+ do_quad(psx_gpu, vertexes, current_command);
gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_gt());
break;
}
gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
break;
}
-
- case 0x64 ... 0x67:
- {
- u32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
- u32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
- u32 uv = list_s16[4];
- s32 width = list_s16[6] & 0x3FF;
- s32 height = list_s16[7] & 0x1FF;
-
- set_clut(psx_gpu, list_s16[5]);
- render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF,
- &width, &height, current_command, list[0]);
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
+ case 0x64 ... 0x67:
+ textured_sprite(psx_gpu, list, list[3] & 0x3FF, (list[3] >> 16) & 0x1FF,
+ &cpu_cycles_sum, &cpu_cycles);
break;
- }
-
+
case 0x68 ... 0x6B:
{
s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
break;
}
-
- case 0x74 ... 0x77:
- {
- s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
- s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
- u32 uv = list_s16[4];
- s32 width = 8, height = 8;
-
- set_clut(psx_gpu, list_s16[5]);
- render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF,
- &width, &height, current_command, list[0]);
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
+ case 0x74 ... 0x77:
+ textured_sprite(psx_gpu, list, 8, 8, &cpu_cycles_sum, &cpu_cycles);
break;
- }
-
+
case 0x78 ... 0x7B:
{
s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
}
case 0x7C ... 0x7F:
- {
- s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
- s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
- u32 uv = list_s16[4];
- s32 width = 16, height = 16;
-
- set_clut(psx_gpu, list_s16[5]);
-
- render_sprite(psx_gpu, x, y, uv & 0xFF, (uv >> 8) & 0xFF,
- &width, &height, current_command, list[0]);
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
+ textured_sprite(psx_gpu, list, 16, 16, &cpu_cycles_sum, &cpu_cycles);
break;
- }
#ifdef PCSX
case 0x1F: // irq?
case 0xE1:
set_texture(psx_gpu, list[0]);
- if(list[0] & (1 << 9))
+ if ((psx_gpu->allow_dithering && (list[0] & (1 << 9)))
+ || psx_gpu->force_dithering)
psx_gpu->render_state_base |= RENDER_STATE_DITHER;
else
psx_gpu->render_state_base &= ~RENDER_STATE_DITHER;
case 0xE5:
{
- s32 offset_x = list[0] << 21;
- s32 offset_y = list[0] << 10;
- psx_gpu->offset_x = offset_x >> 21;
- psx_gpu->offset_y = offset_y >> 21;
+ psx_gpu->offset_x = sign_extend_11bit(list[0]);
+ psx_gpu->offset_y = sign_extend_11bit(list[0] >> 11);
SET_Ex(5, list[0]);
break;
psx_gpu->viewport_start_y = psx_gpu->saved_viewport_start_y; \
psx_gpu->viewport_end_x = psx_gpu->saved_viewport_end_x; \
psx_gpu->viewport_end_y = psx_gpu->saved_viewport_end_y; \
+ psx_gpu->hacks_active = 0; \
psx_gpu->uvrgb_phase = 0x8000; \
}
psx_gpu->viewport_end_y = psx_gpu->saved_viewport_end_y * 2 + 1;
if (psx_gpu->viewport_end_x - psx_gpu->viewport_start_x + 1 > 1024)
psx_gpu->viewport_end_x = psx_gpu->viewport_start_x + 1023;
- psx_gpu->uvrgb_phase = 0x7fff;
+ //psx_gpu->uvrgb_phase = 0x7fff;
return 1;
}
return 1;
}
-static int is_in_array(int val, int array[], int len)
-{
- int i;
- for (i = 0; i < len; i++)
- if (array[i] == val)
- return 1;
- return 0;
-}
-
-static int make_members_unique(int array[], int len)
+static u32 uv_hack(psx_gpu_struct *psx_gpu, const vertex_struct *vertex_ptrs)
{
- int i, j;
- for (i = j = 1; i < len; i++)
- if (!is_in_array(array[i], array, j))
- array[j++] = array[i];
-
- if (array[0] > array[1]) {
- i = array[0]; array[0] = array[1]; array[1] = i;
- }
- return j;
-}
-
-static void patch_u(vertex_struct *vertex_ptrs, int count, int old, int new)
-{
- int i;
- for (i = 0; i < count; i++)
- if (vertex_ptrs[i].u == old)
- vertex_ptrs[i].u = new;
-}
-
-static void patch_v(vertex_struct *vertex_ptrs, int count, int old, int new)
-{
- int i;
- for (i = 0; i < count; i++)
- if (vertex_ptrs[i].v == old)
- vertex_ptrs[i].v = new;
-}
-
-// this sometimes does more harm than good, like in PE2
-static void uv_hack(vertex_struct *vertex_ptrs, int vertex_count)
-{
- int i, u[4], v[4];
-
- for (i = 0; i < vertex_count; i++) {
- u[i] = vertex_ptrs[i].u;
- v[i] = vertex_ptrs[i].v;
- }
- if (make_members_unique(u, vertex_count) == 2 && u[1] - u[0] >= 8) {
- if ((u[0] & 7) == 7) {
- patch_u(vertex_ptrs, vertex_count, u[0], u[0] + 1);
- //printf("u hack: %3u-%3u -> %3u-%3u\n", u[0], u[1], u[0]+1, u[1]);
- }
- else if ((u[1] & 7) == 0 || u[1] - u[0] > 128) {
- patch_u(vertex_ptrs, vertex_count, u[1], u[1] - 1);
- //printf("u hack: %3u-%3u -> %3u-%3u\n", u[0], u[1], u[0], u[1]-1);
- }
- }
- if (make_members_unique(v, vertex_count) == 2 && ((v[0] - v[1]) & 7) == 0) {
- if ((v[0] & 7) == 7) {
- patch_v(vertex_ptrs, vertex_count, v[0], v[0] + 1);
- //printf("v hack: %3u-%3u -> %3u-%3u\n", v[0], v[1], v[0]+1, v[1]);
- }
- else if ((v[1] & 7) == 0) {
- patch_v(vertex_ptrs, vertex_count, v[1], v[1] - 1);
- //printf("v hack: %3u-%3u -> %3u-%3u\n", v[0], v[1], v[0], v[1]-1);
- }
+ int i, have_right_edge = 0, have_bottom_edge = 0, bad_u = 0, bad_v = 0;
+ u32 hacks = 0;
+
+ for (i = 0; i < 3; i++) {
+ int j = (i + 1) % 3, k = (i + 2) % 3;
+ int du = abs((int)vertex_ptrs[i].u - (int)vertex_ptrs[j].u);
+ int dv = abs((int)vertex_ptrs[i].v - (int)vertex_ptrs[j].v);
+ if (du && (du & 7) != 7)
+ bad_u = 1;
+ if (dv && (dv & 7) != 7)
+ bad_v = 1;
+ if (vertex_ptrs[i].x == vertex_ptrs[j].x && vertex_ptrs[k].x < vertex_ptrs[j].x)
+ have_right_edge = 1;
+ if (vertex_ptrs[i].y == vertex_ptrs[j].y)// && vertex_ptrs[k].y < vertex_ptrs[j].y)
+ have_bottom_edge = 1;
}
+ if (have_right_edge && bad_u)
+ hacks |= AHACK_TEXTURE_ADJ_U;
+ if (have_bottom_edge && bad_v)
+ hacks |= AHACK_TEXTURE_ADJ_V;
+ return hacks;
}
static void do_triangle_enhanced(psx_gpu_struct *psx_gpu,
vertex_struct *vertexes, u32 current_command)
{
- vertex_struct *vertex_ptrs[3];
+ prepared_triangle triangle;
- if (!prepare_triangle(psx_gpu, vertexes, vertex_ptrs))
+ if (!prepare_triangle(psx_gpu, vertexes, &triangle))
return;
if (!psx_gpu->hack_disable_main)
- render_triangle_p(psx_gpu, vertex_ptrs, current_command);
+ render_triangle_p(psx_gpu, triangle.vertexes, current_command);
- if (!check_enhanced_range(psx_gpu, vertex_ptrs[0]->x, vertex_ptrs[2]->x))
+ if (!check_enhanced_range(psx_gpu, triangle.vertexes[0]->x,
+ triangle.vertexes[2]->x))
return;
if (!enhancement_enable(psx_gpu))
return;
- shift_vertices3(vertex_ptrs);
+ if ((current_command & RENDER_FLAGS_TEXTURE_MAP) && psx_gpu->hack_texture_adj)
+ psx_gpu->hacks_active |= uv_hack(psx_gpu, vertexes);
+ shift_vertices3(triangle.vertexes);
shift_triangle_area();
- render_triangle_p(psx_gpu, vertex_ptrs, current_command);
- unshift_vertices3(vertex_ptrs);
+ render_triangle_p(psx_gpu, triangle.vertexes, current_command);
+ //unshift_vertices3(triangle.vertexes);
}
static void do_quad_enhanced(psx_gpu_struct *psx_gpu, vertex_struct *vertexes,
u32 current_command)
{
+ s16 x12_save[2] = { vertexes[1].x, vertexes[2].x };
+ s16 y12_save[2] = { vertexes[1].y, vertexes[2].y };
do_triangle_enhanced(psx_gpu, vertexes, current_command);
enhancement_disable();
+ vertexes[1].x = x12_save[0], vertexes[2].x = x12_save[1];
+ vertexes[1].y = y12_save[0], vertexes[2].y = y12_save[1];
do_triangle_enhanced(psx_gpu, &vertexes[1], current_command);
}
}
#endif
+static void textured_sprite_enh(psx_gpu_struct *psx_gpu, const u32 *list,
+ s32 width, s32 height, u32 *cpu_cycles_sum, u32 *cpu_cycles)
+{
+ s32 x = sign_extend_11bit(list[1] + psx_gpu->offset_x);
+ s32 y = sign_extend_11bit((list[1] >> 16) + psx_gpu->offset_y);
+ s32 width_b = width, height_b = height;
+ u8 v = (list[2] >> 8) & 0xff;
+ u8 u = list[2] & 0xff;
+
+ set_clut(psx_gpu, list[2] >> 16);
+
+ render_sprite(psx_gpu, x, y, u, v, &width, &height, list[0] >> 24, list[0]);
+ gput_sum(*cpu_cycles_sum, *cpu_cycles, gput_sprite(width, height));
+
+ if (check_enhanced_range(psx_gpu, x, x + width))
+ do_sprite_enhanced(psx_gpu, x, y, u, v, width_b, height_b, list[0]);
+}
+
u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size,
s32 *cpu_cycles_sum_out, s32 *cpu_cycles_last, u32 *last_command)
{
vertex_struct vertexes[4] __attribute__((aligned(16))) = {};
u32 current_command = 0, command_length;
u32 cpu_cycles_sum = 0, cpu_cycles = *cpu_cycles_last;
+ u32 simplified_prim[4*4];
u32 *list_start = list;
u32 *list_end = list + (size / 4);
case 0x2C ... 0x2F:
{
- set_clut(psx_gpu, list_s16[5]);
- set_texture(psx_gpu, list_s16[9]);
+ u32 i, simplified_count;
+ set_texture(psx_gpu, list[4] >> 16);
+ if (!(psx_gpu->render_state_base & RENDER_STATE_DITHER) &&
+ (simplified_count = prim_try_simplify_quad_t(simplified_prim, list)))
+ {
+ for (i = 0; i < simplified_count; i++) {
+ const u32 *list_ = &simplified_prim[i * 4];
+ textured_sprite_enh(psx_gpu, list_, list_[3] & 0x3FF,
+ (list_[3] >> 16) & 0x1FF, &cpu_cycles_sum, &cpu_cycles);
+ }
+ break;
+ }
+
+ set_clut(psx_gpu, list[2] >> 16);
set_triangle_color(psx_gpu, list[0] & 0xFFFFFF);
get_vertex_data_xy_uv(0, 2);
get_vertex_data_xy_uv(2, 10);
get_vertex_data_xy_uv(3, 14);
- if (psx_gpu->hack_texture_adj)
- uv_hack(vertexes, 4);
do_quad_enhanced(psx_gpu, vertexes, current_command);
gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_t());
break;
case 0x3C ... 0x3F:
{
- set_clut(psx_gpu, list_s16[5]);
- set_texture(psx_gpu, list_s16[11]);
+ u32 i, simplified_count;
+ set_texture(psx_gpu, list[5] >> 16);
+ if (!(psx_gpu->render_state_base & RENDER_STATE_DITHER) &&
+ (simplified_count = prim_try_simplify_quad_gt(simplified_prim, list)))
+ {
+ for (i = 0; i < simplified_count; i++) {
+ const u32 *list_ = &simplified_prim[i * 4];
+ textured_sprite_enh(psx_gpu, list_, list_[3] & 0x3FF,
+ (list_[3] >> 16) & 0x1FF, &cpu_cycles_sum, &cpu_cycles);
+ }
+ break;
+ }
+
+ set_clut(psx_gpu, list[2] >> 16);
get_vertex_data_xy_uv_rgb(0, 0);
get_vertex_data_xy_uv_rgb(1, 6);
get_vertex_data_xy_uv_rgb(2, 12);
get_vertex_data_xy_uv_rgb(3, 18);
- if (psx_gpu->hack_texture_adj)
- uv_hack(vertexes, 4);
do_quad_enhanced(psx_gpu, vertexes, current_command);
gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_gt());
break;
}
break;
}
-
- case 0x64 ... 0x67:
- {
- u32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
- u32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
- u8 u = list_s16[4];
- u8 v = list_s16[4] >> 8;
- s32 width = list_s16[6] & 0x3FF;
- s32 height = list_s16[7] & 0x1FF;
-
- set_clut(psx_gpu, list_s16[5]);
- render_sprite(psx_gpu, x, y, u, v,
- &width, &height, current_command, list[0]);
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
-
- if (check_enhanced_range(psx_gpu, x, x + width)) {
- width = list_s16[6] & 0x3FF;
- height = list_s16[7] & 0x1FF;
- do_sprite_enhanced(psx_gpu, x, y, u, v, width, height, list[0]);
- }
+ case 0x64 ... 0x67:
+ textured_sprite_enh(psx_gpu, list, list[3] & 0x3FF, (list[3] >> 16) & 0x1FF,
+ &cpu_cycles_sum, &cpu_cycles);
break;
- }
-
+
case 0x68 ... 0x6B:
{
s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
do_sprite_enhanced(psx_gpu, x, y, 0, 0, 8, 8, list[0]);
break;
}
-
- case 0x74 ... 0x77:
- {
- s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
- s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
- u8 u = list_s16[4];
- u8 v = list_s16[4] >> 8;
- s32 width = 8, height = 8;
-
- set_clut(psx_gpu, list_s16[5]);
-
- render_sprite(psx_gpu, x, y, u, v,
- &width, &height, current_command, list[0]);
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
- if (check_enhanced_range(psx_gpu, x, x + 8))
- do_sprite_enhanced(psx_gpu, x, y, u, v, 8, 8, list[0]);
+ case 0x74 ... 0x77:
+ textured_sprite_enh(psx_gpu, list, 8, 8, &cpu_cycles_sum, &cpu_cycles);
break;
- }
-
+
case 0x78 ... 0x7B:
{
s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
do_sprite_enhanced(psx_gpu, x, y, 0, 0, 16, 16, list[0]);
break;
}
-
- case 0x7C ... 0x7F:
- {
- s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x);
- s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y);
- u8 u = list_s16[4];
- u8 v = list_s16[4] >> 8;
- s32 width = 16, height = 16;
-
- set_clut(psx_gpu, list_s16[5]);
- render_sprite(psx_gpu, x, y, u, v,
- &width, &height, current_command, list[0]);
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(width, height));
-
- if (check_enhanced_range(psx_gpu, x, x + 16))
- do_sprite_enhanced(psx_gpu, x, y, u, v, 16, 16, list[0]);
+ case 0x7C ... 0x7F:
+ textured_sprite_enh(psx_gpu, list, 16, 16, &cpu_cycles_sum, &cpu_cycles);
break;
- }
case 0x80 ... 0x9F: // vid -> vid
case 0xA0 ... 0xBF: // sys -> vid
case 0xE1:
set_texture(psx_gpu, list[0]);
- if(list[0] & (1 << 9))
+ if ((psx_gpu->allow_dithering && (list[0] & (1 << 9)))
+ || psx_gpu->force_dithering)
psx_gpu->render_state_base |= RENDER_STATE_DITHER;
else
psx_gpu->render_state_base &= ~RENDER_STATE_DITHER;
case 0xE5:
{
- s32 offset_x = list[0] << 21;
- s32 offset_y = list[0] << 10;
- psx_gpu->offset_x = offset_x >> 21;
- psx_gpu->offset_y = offset_y >> 21;
+ psx_gpu->offset_x = sign_extend_11bit(list[0]);
+ psx_gpu->offset_y = sign_extend_11bit(list[0] >> 11);
SET_Ex(5, list[0]);
break;
#define gvld1_u8(d, s) d.u8 = vld1_u8(s)
#define gvld1_u32(d, s) d.u32 = vld1_u32((const u32 *)(s))
+#define gvld1_u64(d, s) d.u64 = vld1_u64((const u64 *)(s))
#define gvld1q_u8(d, s) d.u8 = vld1q_u8(s)
#define gvld1q_u16(d, s) d.u16 = vld1q_u16(s)
#define gvld1q_u32(d, s) d.u32 = vld1q_u32((const u32 *)(s))
#define gvst1_u8(v, p) \
vst1_u8(p, v.u8)
+#define gvst1_u64(v, p) \
+ vst1_u64((u64 *)(p), v.u64)
#define gvst1q_u16(v, p) \
vst1q_u16(p, v.u16)
#define gvst1q_inc_u32(v, p, i) { \
#define gvld1_u8(d, s) d.m = _mm_loadu_si64(s)
#define gvld1_u32 gvld1_u8
+#define gvld1_u64 gvld1_u8
#define gvld1q_u8(d, s) d.m = _mm_loadu_si128((__m128i *)(s))
#define gvld1q_u16 gvld1q_u8
#define gvld1q_u32 gvld1q_u8
+#define gvst1_u8(v, p) _mm_storeu_si64(p, v.m)
+#define gvst1_u64 gvst1_u8
+
#define gvst4_4_inc_u32(v0, v1, v2, v3, p, i) { \
__m128i t0 = _mm_unpacklo_epi32(v0.m, v1.m); \
__m128i t1 = _mm_unpacklo_epi32(v2.m, v3.m); \
setup_spans_set_x4(alternate, down, alternate_active); \
height -= 4; \
} while(height > 0); \
+ if (psx_gpu->hacks_active & (AHACK_TEXTURE_ADJ_U | AHACK_TEXTURE_ADJ_V)) \
+ { \
+ vec_2x32u tmp; \
+ gvld1_u64(tmp, &span_uvrg_offset[height - 2]); \
+ gvst1_u64(tmp, &span_uvrg_offset[height - 1]); \
+ } \
} \
setup_spans_set_x4(alternate, up, alternate_active); \
height -= 4; \
} \
+ if (psx_gpu->hacks_active & AHACK_TEXTURE_ADJ_V) \
+ { \
+ vec_2x32u tmp; \
+ gvld1_u64(tmp, &psx_gpu->span_uvrg_offset[1]); \
+ gvst1_u64(tmp, &psx_gpu->span_uvrg_offset[0]); \
+ } \
} \
#define half_left lo
setup_spans_set_x4(none, down, no);
height_minor_b -= 4;
}
+ if (psx_gpu->hacks_active & (AHACK_TEXTURE_ADJ_U | AHACK_TEXTURE_ADJ_V))
+ {
+ vec_2x32u tmp;
+ gvld1_u64(tmp, &span_uvrg_offset[height_minor_b - 2]);
+ gvst1_u64(tmp, &span_uvrg_offset[height_minor_b - 1]);
+ }
}
}
vec_8x8s dither_offsets_short; \
\
dither_row = \
- (dither_row >> dither_shift) | (dither_row << (32 - dither_shift)); \
+ (dither_row >> dither_shift) | ((u64)dither_row << (32 - dither_shift)); \
gvdup_n_u32(dither_offsets_short, dither_row); \
setup_blocks_span_initialize_dithered_##texturing() \
gvaddq_u32(u_block, u_block, block_span); \
gvld1q_u32(block_span, psx_gpu->v_block_span.e); \
gvaddq_u32(v_block, v_block, block_span); \
+ (void)(span_b_offset); \
} \
#define setup_blocks_span_initialize_unshaded_untextured() \
+ (void)(span_uvrg_offset); \
+ (void)(span_b_offset) \
#define setup_blocks_texture_swizzled() \
{ \
} \
#define setup_blocks_store_draw_mask_untextured_direct(_block, bits) \
+ (void)(_block) \
+
+#define setup_blocks_uv_adj_hack_untextured(_block, edge_data, uvrg_offset) \
+
+#define setup_blocks_uv_adj_hack_textured(_block, edge_data, uvrg_offset) \
+{ \
+ u32 m_ = AHACK_TEXTURE_ADJ_U | AHACK_TEXTURE_ADJ_V; \
+ if (unlikely(psx_gpu->hacks_active & m_)) \
+ setup_blocks_uv_adj_hack(psx_gpu, _block, edge_data, (void *)uvrg_offset); \
+} \
#define setup_blocks_add_blocks_indirect() \
num_blocks += span_num_blocks; \
setup_blocks_store_##shading##_##texturing(sw, dithering, target, edge); \
setup_blocks_store_draw_mask_##texturing##_##target(block, \
span_edge_data->right_mask); \
+ setup_blocks_uv_adj_hack_##texturing(block, span_edge_data, \
+ span_uvrg_offset); \
\
block++; \
} \
CC = $(CROSS_COMPILE)gcc
-CFLAGS += -DTEXTURE_CACHE_4BPP -DTEXTURE_CACHE_8BPP
+CFLAGS += -DTEXTURE_CACHE_4BPP -DTEXTURE_CACHE_8BPP -DGPU_NEON
CFLAGS += -Wall -ggdb
CFLAGS += -fno-strict-aliasing
CFLAGS += `sdl-config --cflags`
-LDFLAGS += `sdl-config --libs`
-
-VPATH += ..
+LDLIBS += `sdl-config --libs`
ifdef NEON
CFLAGS += -mcpu=cortex-a8 -mfpu=neon -DNEON_BUILD
ASFLAGS = $(CFLAGS)
-OBJ += psx_gpu_arm_neon.o
+OBJ += ../psx_gpu_arm_neon.o
+else
+CFLAGS += -DNEON_BUILD -DSIMD_BUILD
+OBJ += ../psx_gpu_simd.o
endif
ifndef DEBUG
CFLAGS += -O2 -DNDEBUG
endif
-OBJ += psx_gpu.o psx_gpu_parse.o psx_gpu_main.o
+OBJ += ../psx_gpu_main.o ../../../gpulib/prim.o
all: psx_gpu
psx_gpu: $(OBJ)
+ $(CC) -o $@ $(CFLAGS) $^ $(LDFLAGS) $(LDLIBS)
clean:
$(RM) psx_gpu $(OBJ)
#include <stdio.h>
#include <assert.h>
-#include <sys/mman.h>
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#ifndef min
{
int ret;
-#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD)
- // the asm doesn't bother to save callee-save vector regs, so do it here
- __asm__ __volatile__("":::"q4","q5","q6","q7");
-#endif
-
if (gpu.state.enhancement_active)
ret = gpu_parse_enhanced(&egpu, list, count * 4,
cycles_sum, cycles_last, (u32 *)last_cmd);
ret = gpu_parse(&egpu, list, count * 4,
cycles_sum, cycles_last, (u32 *)last_cmd);
-#if defined(__arm__) && defined(NEON_BUILD) && !defined(SIMD_BUILD)
- __asm__ __volatile__("":::"q4","q5","q6","q7");
-#endif
-
ex_regs[1] &= ~0x1ff;
ex_regs[1] |= egpu.texture_settings & 0x1ff;
return ret;
// to be able to reuse 1024-width code better (triangle setup,
// dithering phase, lines).
egpu.enhancement_buf_ptr = gpu.mmap(ENHANCEMENT_BUF_SIZE);
- if (egpu.enhancement_buf_ptr == NULL) {
+ if (egpu.enhancement_buf_ptr == NULL || egpu.enhancement_buf_ptr == (void *)(intptr_t)-1) {
fprintf(stderr, "failed to map enhancement buffer\n");
+ egpu.enhancement_buf_ptr = NULL;
gpu.get_enhancement_bufer = NULL;
}
else {
}
if (cbs->pl_set_gpu_caps)
cbs->pl_set_gpu_caps(GPU_CAP_SUPPORTS_2X);
+
+ egpu.allow_dithering = cbs->dithering;
+ egpu.force_dithering = cbs->dithering >> 1;
+ /*
+ if (!egpu.allow_dithering) {
+ egpu.dither_table[0] = dither_table_row(0, 0, 0, 0);
+ egpu.dither_table[1] = dither_table_row(0, 0, 0, 0);
+ egpu.dither_table[2] = dither_table_row(0, 0, 0, 0);
+ egpu.dither_table[3] = dither_table_row(0, 0, 0, 0);
+ } else {
+ egpu.dither_table[0] = dither_table_row(-4, 0, -3, 1);
+ egpu.dither_table[1] = dither_table_row(2, -2, 3, -1);
+ egpu.dither_table[2] = dither_table_row(-3, 1, -4, 0);
+ egpu.dither_table[3] = dither_table_row(3, -1, 2, -2);
+ }
+ */
egpu.hack_disable_main = cbs->gpu_neon.enhancement_no_main;
egpu.hack_texture_adj = cbs->gpu_neon.enhancement_tex_adj;
}
}
+void renderer_sync(void)
+{
+}
+
+void renderer_notify_update_lace(int updated)
+{
+}
+
// vim:ts=2:sw=2:expandtab
include ../../config.mak
SRC_STANDALONE += gpu.cpp
-SRC_GPULIB += gpulib_if.cpp
+SRC_GPULIB += gpulib_if.cpp old/if.cpp
ifeq "$(ARCH)" "arm"
SRC += gpu_arm.S
uint8_t fast_lighting:1;
uint8_t blending:1;
uint8_t dithering:1;
+ uint8_t force_dithering:1;
+ uint8_t old_renderer:1;
//senquack Only PCSX Rearmed's version of gpu_unai had this, and I
// don't think it's necessary. It would require adding 'AH' flag to
/*
- * (C) Gražvydas "notaz" Ignotas, 2011
+ * (C) Gražvydas "notaz" Ignotas, 2011,2024
*
* This work is licensed under the terms of GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
#include "arm_features.h"
+.syntax unified
.text
.align 2
+.macro pld_ reg offs=#0
+#ifdef HAVE_ARMV6
+ pld [\reg, \offs]
+#endif
+.endm
+
+#ifdef HAVE_ARMV6
+
+.macro modulate rp mbr mg t0 t1 t2
+ and \t0, \rp, #0x001f
+ and \t1, \rp, #0x03e0
+ and \t2, \rp, #0x7c00
+ smulbb \t0, \t0, \mbr @ -> 0000 0000 0000 orrr rrxx xxxx xxxx xxxx
+ smulbt \t1, \t1, \mg @ -> 0000 000o gggg gxxx xxxx xxxx xxx0 0000
+ smulbt \t2, \t2, \mbr @ -> 00ob bbbb xxxx xxxx xxxx xx00 0000 0000
+ and \rp, \rp, #0x8000 @ retain msb
+ usat \t0, #5, \t0, asr #14
+ usat \t1, #5, \t1, asr #19
+ usat \t2, #5, \t2, asr #24
+ orr \rp, \rp, \t0
+ orr \rp, \rp, \t1, lsl #5
+ orr \rp, \rp, \t2, lsl #10
+.endm
+
+@ http://www.slack.net/~ant/info/rgb_mixing.html
+@ p0 = (p0 + p1) / 2; p1 |= 0x8000
+@ msb of input p0 is assumed to be set
+.macro semitrans0 p0 p1 t
+ eor \t, \p0, \p1
+ and \t, \t, #0x0420
+ sub \p0, \p0, \t
+ orr \p1, \p1, #0x8000
+ uhadd16 \p0, \p0, \p1
+.endm
+
+.macro semitrans0p p0 p1 m421 t
+ eor \t, \p0, \p1
+ and \t, \t, \m421
+ add \p0, \p0, \p1
+ uhsub16 \p0, \p0, \t @ sub because of borrow into hi16
+.endm
+
+@ p0 - {p1|r,g,b} // p1* - premasked rgb
+.macro semitrans2p p0 p1r p1g p1b m1f t0 t1
+ and \t0, \p0, \m1f
+ and \t1, \p0, \m1f, lsl #5
+ and \p0, \p0, \m1f, lsl #10
+ uqsub16 \t0, \t0, \p1r
+ uqsub16 \t1, \t1, \p1g
+ uqsub16 \p0, \p0, \p1b
+ orr \t0, \t0, \t1
+ orr \p0, \p0, \t0
+.endm
+
+#else
+
+@ msb of input p0 is assumed to be set
+.macro semitrans0 p0 p1 t
+ eor \t, \p0, \p1
+ and \t, \t, #0x0420
+ orr \p1, \p1, #0x8000
+ sub \p0, \p0, \t
+ add \p0, \p0, \p1
+ orr \p0, \p0, #0x10000
+ mov \p0, \p0, lsr #1
+.endm
+
+.macro semitrans0p p0 p1 m421 t
+ eor \t, \p0, \p1
+ and \t, \t, \m421
+ add \p0, \p0, \p1
+ sub \p0, \p0, \t
+ mov \p0, \p0, lsr #1
+.endm
+
+#endif // HAVE_ARMV6
+
+.macro semitrans13p p0 p1 m421 t0
+ add \t0, \p0, \p1
+ eor \p0, \p0, \p1
+ and \p0, \p0, \m421 @ low_bits
+ sub \p0, \t0, \p0
+ and \p0, \p0, \m421, lsl #5 @ carries
+ sub \t0, \t0, \p0 @ modulo
+ sub \p0, \p0, \p0, lsr #5 @ clamp
+ orr \p0, \t0, \p0
+.endm
+
+
@ in: r0=dst, r2=pal, r12=0x1e
@ trashes r6-r8,lr,flags
-.macro do_4_pixels rs ibase obase
+.macro do_4x_4bpp rs ibase obase
.if \ibase - 1 < 0
and r6, r12, \rs, lsl #1
.else
ldrh r8, [r2, r8]
ldrh lr, [r2, lr]
tst r6, r6
- strneh r6, [r0, #\obase+0]
+ strhne r6, [r0, #\obase+0]
tst r7, r7
- strneh r7, [r0, #\obase+2]
+ strhne r7, [r0, #\obase+2]
tst r8, r8
- strneh r8, [r0, #\obase+4]
+ strhne r8, [r0, #\obase+4]
tst lr, lr
- strneh lr, [r0, #\obase+6]
+ strhne lr, [r0, #\obase+6]
+.endm
+
+@ in: r0=dst, r2=pal, r12=0x1fe
+@ loads/stores \rs,r6-r8
+.macro do_4x_8bpp rs
+ and r6, r12, \rs, lsl #1
+ and r7, r12, \rs, lsr #7
+ and r8, r12, \rs, lsr #15
+ and \rs,r12, \rs, lsr #23
+ ldrh r6, [r2, r6]
+ ldrh r7, [r2, r7]
+ ldrh r8, [r2, r8]
+ ldrh \rs,[r2, \rs]
+ tst r6, r6
+ strhne r6, [r0, #0]
+ tst r7, r7
+ strhne r7, [r0, #2]
+ tst r8, r8
+ strhne r8, [r0, #4]
+ tst \rs,\rs
+ strhne \rs,[r0, #6]
+.endm
+
+
+@ (void *d, u16 c, u32 cnt, const struct gpu_unai_inner_t *inn)
+@ see also poly_untex_st_m
+.macro tile_driver_st_m name semit
+FUNCTION(\name):
+ .cfi_startproc
+ stmfd sp!, {r4-r9,lr}
+ .cfi_def_cfa_offset 4*7
+ .cfi_rel_offset lr, 4*6
+ ldr r7, [r3, #0x18] @ y0
+ ldr r8, [r3, #0x1c] @ y1
+.if \semit != 2
+ mov r4, #0x8000
+ orr r4, r4, r4, lsl #16 @ mask 8000
+ mov r6, #0x420
+ orr r6, r6, #1
+ orr r6, r6, r6, lsl #16 @ mask 0421
+.endif
+.if \semit == 2
+ and r4, r1, #0x03e0
+ and r5, r1, #0x7c00
+ and r1, r1, #0x001f
+ orr r4, r4, r4, lsl #16 @ premasked g
+ orr r5, r5, r5, lsl #16 @ premasked b
+ mov r6, #0x00001f
+ orr r6, #0x1f0000 @ mask
+.elseif \semit == 3
+ mov r1, r1, lsr #2
+ bic r1, r1, #(0x0c60>>2)
+.endif
+ orr r1, r1, r1, lsl #16
+ sub r3, r8, r7 @ h
+ mov r7, r2 @ save w
+0:
+ ldrh r8, [r0]
+ pld_ r0, #2048
+ tst r0, #2
+ beq 1f
+ sub r2, #1
+.if \semit == 0
+ bic r8, r8, r4
+ semitrans0p r8, r1, r6, lr
+.elseif \semit == 1 || \semit == 3
+ bic r8, r8, r4
+ semitrans13p r8, r1, r6, lr
+.elseif \semit == 2
+ semitrans2p r8, r1, r4, r5, r6, r9, lr
+.endif
+ strh r8, [r0], #2
+1:
+ ldr r8, [r0]
+ pld_ r0, #32
+ subs r2, r2, #2
+.if \semit == 0
+ bic r8, r8, r4
+ semitrans0p r8, r1, r6, lr
+.elseif \semit == 1 || \semit == 3
+ bic r8, r8, r4
+ semitrans13p r8, r1, r6, lr
+.elseif \semit == 2
+ semitrans2p r8, r1, r4, r5, r6, r9, lr
+.endif
+ strpl r8, [r0], #4
+ bpl 1b
+2:
+ tst r2, #1
+ strhne r8, [r0], #2
+ mov r2, r7 @ w
+ add r0, r0, #2048
+ sub r0, r0, r7, lsl #1
+ subs r3, r3, #1
+ bgt 0b
+
+ ldmfd sp!, {r4-r9,pc}
+ .cfi_endproc
.endm
-.global draw_spr16_full @ (u16 *d, void *s, u16 *pal, int lines)
-draw_spr16_full:
+
+tile_driver_st_m tile_driver_st0_asm, 0
+tile_driver_st_m tile_driver_st1_asm, 1
+tile_driver_st_m tile_driver_st3_asm, 3
+#ifdef HAVE_ARMV6
+tile_driver_st_m tile_driver_st2_asm, 2
+#endif
+
+@ (u16 *d, void *s, u16 *pal, int lines)
+sprite_4bpp_x16_asm_:
+ ldr r12,[r3, #0x18] @ y0
+ ldr r2, [r3, #0x04] @ pal
+ ldr r3, [r3, #0x1c] @ y1
+ sub r3, r3, r12
+FUNCTION(sprite_4bpp_x16_asm):
+ .cfi_startproc
stmfd sp!, {r4-r8,lr}
- mov r12, #0x1e @ empty pixel
+ .cfi_def_cfa_offset 4*6
+ .cfi_rel_offset lr, 4*5
+ mov r12, #0x1e
0:
ldmia r1, {r4,r5}
- do_4_pixels r4, 0, 0
- do_4_pixels r4, 16, 8
- do_4_pixels r5, 0, 16
- do_4_pixels r5, 16, 24
+ pld_ r1, #2048
+ do_4x_4bpp r4, 0, 0
+ do_4x_4bpp r4, 16, 8
+ do_4x_4bpp r5, 0, 16
+ do_4x_4bpp r5, 16, 24
subs r3, r3, #1
add r0, r0, #2048
add r1, r1, #2048
bgt 0b
ldmfd sp!, {r4-r8,pc}
+ .cfi_endproc
+
+
+@
+.macro sprite_driver_part1 is8bpp
+ stmfd sp!, {r4-r11,lr}
+ .cfi_def_cfa_offset 4*9
+ .cfi_rel_offset lr, 4*8
+ mov r12, #0x01e
+.if \is8bpp
+ orr r12, r12, #0x1f0 @ mask=0x01fe
+.endif
+ ldr r4, [r3, #0x08] @ u
+ ldr r5, [r3, #0x1c] @ v1
+ ldr r6, [r3, #0x18] @ v0
+ and r4, r4, #((8 >> \is8bpp) - 1)
+ sub r5, r5, r6
+ sub r5, r5, #1
+ orr r5, r4, r5, lsl #8 @ ((h-1) << 8) | u0_fraction
+ mov r9, r2 @ saved_w
+ mov r10, r0 @ saved_dst
+ mov r11, r1 @ saved_src
+ ldr r2, [r3, #0x04] @ pal
+11: @ line_loop:
+ pld_ r11, #2048
+ mov r0, r10
+ mov r1, r11
+ mov r3, r9
+ ands r6, r5, #(7 >> \is8bpp)
+ bne 15f @ fractional_u
+12:
+ subs r3, r3, #(8 >> \is8bpp) @ w
+ bmi 14f @ fractional_w
+.endm
+.macro sprite_driver_part2 is8bpp
+ cmn r3, #(8 >> \is8bpp)
+ bne 14f @ fractional_w
+13: @ eol:
+ add r10, r10, #2048
+ add r11, r11, #2048
+ subs r5, r5, #0x100
+ bpl 11b @ line_loop
+ ldmfd sp!, {r4-r11,pc}
+14: @ fractional_w:
+ ldr r4, [r1], #4
+ add r8, r3, #(8 >> \is8bpp)
+ mov r3, #0
+ mov r4, r4, lsl #1
+ b 16f @ fractional_loop
+15: @ fractional_u:
+ bic r1, r1, #3
+ rsb r8, r6, #(8 >> \is8bpp)
+ ldr r4, [r1], #4
+ cmp r8, r3
+ movgt r8, r3
+ mov r7, r6, lsl #(2 + \is8bpp)
+ sub r3, r3, r8
+ sub r7, r7, #1
+ mov r4, r4, lsr r7
+16: @ fractional_loop:
+.endm
+.macro sprite_driver_part3
+ tst r3, r3
+ beq 13b @ sprd4_eol
+ b 12b @ return from fractional_u
+.endm
+
+@ (u16 *d, const void *s, int width, const gpu_unai_inner_t *)
+FUNCTION(sprite_driver_4bpp_asm):
+ .cfi_startproc
+ ldr r12, [r3, #8] @ u
+ mov r12, r12, lsl #29
+ orr r12, r12, r2 @ w
+ cmp r12, #16
+ beq sprite_4bpp_x16_asm_ @ use specialized aligned x16 version
+ sprite_driver_part1 0
+0:
+ ldr r4, [r1], #4
+ pld_ r1, #28
+ do_4x_4bpp r4, 0, 0
+ do_4x_4bpp r4, 16, 8
+ add r0, r0, #16
+ subs r3, r3, #8
+ bpl 0b
+ sprite_driver_part2 0
+0:
+ and r7, r12, r4
+ mov r4, r4, lsr #4
+ ldrh r7, [r2, r7]
+ add r0, r0, #2
+ tst r7, r7
+ strhne r7, [r0, #-2]
+ subs r8, r8, #1
+ bgt 0b
+ sprite_driver_part3
+ .cfi_endproc
+
+
+@ (u16 *d, const void *s, int width, const gpu_unai_inner_t *)
+FUNCTION(sprite_driver_8bpp_asm):
+ .cfi_startproc
+ sprite_driver_part1 1
+0:
+ ldr r4, [r1], #4
+ pld_ r1, #28
+ do_4x_8bpp r4
+ add r0, r0, #8
+ subs r3, r3, #4
+ bpl 0b
+ sprite_driver_part2 1
+0:
+ and r7, r12, r4
+ mov r4, r4, lsr #8
+ ldrh r7, [r2, r7]
+ add r0, r0, #2
+ tst r7, r7
+ strhne r7, [r0, #-2]
+ subs r8, r8, #1
+ bgt 0b
+ sprite_driver_part3
+ .cfi_endproc
+
+
+@ (u16 *d, const void *s, int width, const gpu_unai_inner_t *)
+.macro sprite_driver_l_st name bpp light semit
+FUNCTION(\name):
+ .cfi_startproc
+ stmfd sp!, {r4-r11,lr}
+ .cfi_def_cfa_offset 4*4
+ .cfi_rel_offset lr, 4*3
+ ldr r5, [r3, #0x18] @ y0
+ ldr r7, [r3, #0x1c] @ y1
+ ldr r8, [r3, #0x20] @ rbg5
+ mov r6, r2 @ saved_w
+ ldr r2, [r3, #0x04] @ pal
+ ldr r10,[r3, #0x08] @ u
+ ldr r11,[r3, #0x10] @ u_msk
+ sub r5, r7, r5 @ h
+ mov r7, r8, lsl #(8+2) @ 0bbb bb00 0ggg gg00 0rrr rr00 0000 0000
+ mov r8, r8, lsl #(16+2)@ 0ggg gg00 ...
+ mov r3, r11,lsr #10
+ orr r6, r3, r6, lsl #16 @ (w << 16) | u_mask
+ mov r3, r6
+ and r10,r10,r6
+
+3: @ line_loop:
+.if \bpp == 4
+ add r9, r1, r10, lsr #1
+.elseif \bpp == 8
+ add r9, r1, r10
+ pld_ r9, #2048
+.endif
+0:
+.if \bpp == 4
+ ldrb r4, [r1, r10, lsr #1]
+.elseif \bpp == 8
+ ldrb r4, [r1, r10]
+.endif
+ subs r3, r3, #1<<16
+ bmi 1f
+.if \bpp == 4
+ tst r10, #1
+ movne r4, r4, lsr #3
+ addeq r4, r4, r4
+ and r4, r4, #0x1e
+.elseif \bpp == 8
+ add r4, r4, r4 @ <<= 1
+.endif
+ ldrsh r12,[r2, r4]
+ add r10,r10,#1
+ and r10,r10,r6
+ add r0, r0, #2
+ tst r12,r12
+ beq 0b
+.if \light && \semit != 1
+ modulate r12, r7, r8, r4, r9, lr
+.endif
+.if \semit == 0
+ ldrhmi lr, [r0, #-2]
+ strhpl r12,[r0, #-2]
+ bpl 0b
+ semitrans0 r12, lr, r9
+.elseif \light && \semit == 1
+ and r4, r12, #0x001f
+ and r9, r12, #0x03e0
+ and r12, r12, #0x7c00
+ ldrhmi r11, [r0, #-2]
+ smulbb r4, r4, r7 @ -> 0000 0000 0000 orrr rrxx xxxx xxxx xxxx
+ smulbt r9, r9, r8 @ -> 0000 000o gggg gxxx xxxx xxxx xxx0 0000
+ smulbt r12, r12, r7 @ -> 00ob bbbb xxxx xxxx xxxx xx00 0000 0000
+ and r8, r11, #0x001f
+ and lr, r11, #0x03e0
+ and r11, r11, #0x7c00
+ addmi r4, r4, r8, lsl #14
+ addmi r9, r9, lr, lsl #14
+ addmi r12, r12, r11, lsl #14
+ usat r4, #5, r4, asr #14
+ usat r9, #5, r9, asr #19
+ usat r12, #5, r12, asr #24
+ orrmi r4, r4, #0x8000
+ orr r4, r4, r9, lsl #5
+ orr r12, r4, r12, lsl #10
+ mov r8, r7, lsl #8 @ restore r8
+.endif
+ strh r12,[r0, #-2]
+ b 0b
+1:
+ add r0, r0, #2048
+ add r1, r1, #2048
+ sub r0, r0, r6, lsr #15 @ dst
+ sub r10,r10,r6, lsr #16 @ u
+ mov r3, r6 @ (w << 16) | u_mask
+ and r10,r6, r10
+ subs r5, r5, #1
+ and r10,r10,#0xff
+ bgt 3b @ line_loop
+
+ ldmfd sp!, {r4-r11,pc}
+ .cfi_endproc
+.endm
+
+sprite_driver_l_st sprite_driver_4bpp_l0_std_asm, 4, 0, -1
+sprite_driver_l_st sprite_driver_4bpp_l0_st0_asm, 4, 0, 0
+sprite_driver_l_st sprite_driver_8bpp_l0_std_asm, 8, 0, -1
+sprite_driver_l_st sprite_driver_8bpp_l0_st0_asm, 8, 0, 0
+
+#ifdef HAVE_ARMV6
+
+sprite_driver_l_st sprite_driver_4bpp_l1_std_asm, 4, 1, -1
+sprite_driver_l_st sprite_driver_4bpp_l1_st0_asm, 4, 1, 0
+sprite_driver_l_st sprite_driver_4bpp_l1_st1_asm, 4, 1, 1
+sprite_driver_l_st sprite_driver_8bpp_l1_std_asm, 8, 1, -1
+sprite_driver_l_st sprite_driver_8bpp_l1_st0_asm, 8, 1, 0
+sprite_driver_l_st sprite_driver_8bpp_l1_st1_asm, 8, 1, 1
+
+#endif // HAVE_ARMV6
+
+
+@ (u16 *d, const void *s, int width, const gpu_unai_inner_t *)
+FUNCTION(sprite_driver_16bpp_asm):
+ .cfi_startproc
+ stmfd sp!, {r4-r6,lr}
+ .cfi_def_cfa_offset 4*4
+ .cfi_rel_offset lr, 4*3
+ ldr r4, [r3, #0x1c] @ v1
+ ldr r5, [r3, #0x18] @ v0
+ mov r12, #0x00ff
+ orr r12, r12, #0xff00 @ mask
+ mov r6, r2 @ saved_w
+ sub r5, r4, r5
+ sub r5, r5, #1 @ h-1
+3: @ line_loop:
+ pld_ r1, #2048
+ mov r2, r6 @ w
+ tst r1, #2
+ beq 0f
+2: @ 1pix:
+ ldrh lr, [r1], #2
+ add r0, r0, #2
+ sub r2, r2, #1
+ tst lr, lr
+ strhne lr, [r0, #-2]
+0:
+ subs r2, r2, #4
+ bmi 1f
+0:
+ ldmia r1!, {r3,r4}
+ add r0, r0, #2*4
+ pld_ r1, #24
+ tst r3, r12
+ strhne r3, [r0, #-8]
+ movs lr, r3, lsr #16
+ strhne lr, [r0, #-6]
+ tst r4, r12
+ strhne r4, [r0, #-4]
+ movs lr, r4, lsr #16
+ strhne lr, [r0, #-2]
+ subs r2, r2, #4
+ bpl 0b
+1:
+ adds r2, r2, #4
+ bne 2b @ 1pix
+ add r0, r0, #2048
+ add r1, r1, #2048
+ sub r0, r0, r6, lsl #1 @ dst
+ sub r1, r1, r6, lsl #1
+ subs r5, r5, #1
+ bpl 3b @ line_loop
+
+ ldmfd sp!, {r4-r6,pc}
+ .cfi_endproc
+
+
+@ (void *d, const gpu_unai_inner_t *inn, int count)
+@ see also tile_driver_st_m
+.macro poly_untex_st_m name semit
+FUNCTION(\name):
+ .cfi_startproc
+ ldrh r1, [r1, #0x38] @ rgb
+ stmfd sp!, {r4-r7,lr}
+ .cfi_def_cfa_offset 4*5
+ .cfi_rel_offset lr, 4*4
+.if \semit != 2
+ mov r4, #0x8000
+ orr r4, r4, r4, lsl #16 @ mask 8000
+ mov r6, #0x420
+ orr r6, r6, #1
+ orr r6, r6, r6, lsl #16 @ mask 0421
+.endif
+.if \semit == 2
+ and r4, r1, #0x03e0
+ and r5, r1, #0x7c00
+ and r1, r1, #0x001f
+ orr r4, r4, r4, lsl #16 @ premasked g
+ orr r5, r5, r5, lsl #16 @ premasked b
+ mov r6, #0x00001f
+ orr r6, #0x1f0000 @ mask
+.elseif \semit == 3
+ mov r1, r1, lsr #2
+ bic r1, r1, #(0x0c60>>2)
+.endif
+ orr r1, r1, r1, lsl #16
+0:
+ ldrh r3, [r0]
+ pld_ r0, #2048
+ tst r0, #2
+ beq 1f
+ sub r2, #1
+.if \semit == 0
+ bic r3, r3, r4
+ semitrans0p r3, r1, r6, lr
+.elseif \semit == 1 || \semit == 3
+ bic r3, r3, r4
+ semitrans13p r3, r1, r6, lr
+.elseif \semit == 2
+ semitrans2p r3, r1, r4, r5, r6, r7, lr
+.endif
+ strh r3, [r0], #2
+1:
+ ldr r3, [r0]
+ pld_ r0, #32
+ subs r2, r2, #2
+.if \semit == 0
+ bic r3, r3, r4
+ semitrans0p r3, r1, r6, lr
+.elseif \semit == 1 || \semit == 3
+ bic r3, r3, r4
+ semitrans13p r3, r1, r6, lr
+.elseif \semit == 2
+ semitrans2p r3, r1, r4, r5, r6, r7, lr
+.endif
+ strpl r3, [r0], #4
+ bpl 1b
+2:
+ tst r2, #1
+ strhne r3, [r0], #2
+
+ ldmfd sp!, {r4-r7,pc}
+ .cfi_endproc
+.endm
+
+poly_untex_st_m poly_untex_st0_asm, 0
+poly_untex_st_m poly_untex_st1_asm, 1
+poly_untex_st_m poly_untex_st3_asm, 3
+#ifdef HAVE_ARMV6
+poly_untex_st_m poly_untex_st2_asm, 2
+#endif
+
+
+.macro poly_4_8bpp_asm_m name bpp light semit
+FUNCTION(\name): @ (void *d, const gpu_unai_inner_t *inn, int count)
+ .cfi_startproc
+ stmfd sp!, {r4-r11,lr}
+ .cfi_def_cfa_offset 4*9
+ .cfi_rel_offset lr, 4*8
+ add r12, r1, #4
+ ldmia r12, {r3, r4, r7, r12, lr} @ clut, u, v, u_msk, v_msk
+ ldr r5, [r1, #0x18] @ u_inc
+.if \light
+ ldr r10,[r1, #0x24] @ rbg
+.endif
+ mov r6, r12 @ u_msk
+ ldr r12,[r1, #0x1c] @ v_inc
+.if \light
+ mov r10,r10,lsl #7 @ 0bbb bbbb 0ggg gggg 0rrr rrrr r000 0000
+ bic r10,r10,#1<<23
+ bic r10,r10,#1<<15
+ mov r11,r10,lsl #8 @ 0ggg gggg ...
+.endif
+ and r4, r4, r6
+ and lr, lr, r7 @ v_msk & v
+ and lr, lr, #0xff<<10
+ tst r12,r12
+ bne v_\name
+ ldr r1, [r1] @ src
+ mov r7, r4, lsr #(13 - (\bpp / 8 * 3))
+ add r1, r1, lr, lsl #1
+#ifdef HAVE_ARMV6
+ add r12,r1, r7, lsl #(2 - (\bpp / 8 * 2))
+ pld_ r12,#2048 @ next line
+#endif
+0:
+.if \light || \semit >= 0
+ mov r7, r4, lsr #(13 - (\bpp / 8 * 3))
+ subs r2, r2, #1
+ bmi 1f
+.endif
+.if \bpp == 4
+ ldr lr, [r1, r7, lsl #2]
+ lsr r12,r4, #8
+ and r12,r12,#0x1c
+ sub r12,r12,#1
+ mov r12,lr, ror r12
+ add r4, r4, r5
+ and r12,r12,#0x1e
+.else
+ ldrb r12,[r1, r7]
+ add r4, r4, r5
+ add r12,r12,r12
+.endif
+ and r4, r4, r6
+ ldrsh r12,[r3, r12]
+ add r0, r0, #2
+.if !\light && \semit < 0
+ mov r7, r4, lsr #(13 - (\bpp / 8 * 3))
+ tst r12,r12
+ strhne r12,[r0, #-2]
+ subs r2, r2, #1
+ bgt 0b
+ @ end
+.else
+ tst r12,r12
+ beq 0b
+.if \light && \semit != 1
+ modulate r12, r10, r11, r7, r8, lr
+.endif
+.if \semit == 0
+ ldrhmi r7, [r0, #-2]
+ strhpl r12,[r0, #-2]
+ bpl 0b
+ semitrans0 r12, r7, lr
+.endif
+ strh r12,[r0, #-2]
+ b 0b
+.endif @ \light || \semit >= 0
+1:
+ ldmfd sp!, {r4-r11,pc}
+
+v_\name: @ r3=clut, r4=u, r5=u_inc, r6=u_msk, r7=v, lr=v_masked
+.if \light || \semit >= 0
+ sub sp, sp, #4*2
+ stmia sp, {r5,r6}
+ .cfi_def_cfa_offset 4*(9+2)
+ .cfi_rel_offset lr, 4*(8+2)
+.endif
+ ldr r9, [r1, #0x14] @ v_msk
+ ldr r1, [r1] @ src
+ mov r8, r12 @ v_inc
+ and r9, r9, #0xff<<10 @ v_msk_final
+.if !\light && \semit < 0
+ and lr, r7, r9
+ mov r12,r4, lsr #(13 - (\bpp / 8 * 3))
+ add lr, r1, lr, lsl #1
+.endif
+0:
+.if \light || \semit >= 0
+ and lr, r7, r9
+ mov r12,r4, lsr #(13 - (\bpp / 8 * 3))
+ add lr, r1, lr, lsl #1
+ subs r2, r2, #1
+ bmi 1f
+.endif
+.if \bpp == 4
+ ldr lr, [lr, r12, lsl #2]
+ lsr r12,r4, #8
+ and r12,r12,#0x1c
+ sub r12,r12,#1
+ mov r12,lr, ror r12
+ add r4, r4, r5
+ and r12,r12,#0x1e
+.else
+ ldrb r12,[lr, r12]
+ add r4, r4, r5
+ add r12,r12,r12
+.endif
+ and r4, r4, r6
+ ldrsh r12,[r3, r12]
+ add r0, r0, #2
+ add r7, r7, r8
+.if !\light && \semit < 0
+ and lr, r7, r9
+ tst r12,r12
+ add lr, r1, lr, lsl #1
+ strhne r12,[r0, #-2]
+ mov r12,r4, lsr #(13 - (\bpp / 8 * 3))
+ subs r2, r2, #1
+ bgt 0b
+ @ end
+.else
+ tst r12,r12
+ beq 0b
+.if \light && \semit != 1
+ modulate r12, r10, r11, r5, r6, lr
+.endif
+.if \semit == 0
+ ldrhmi r6, [r0, #-2]
+ strhpl r12,[r0, #-2]
+ ldmiapl sp, {r5,r6}
+ bpl 0b
+ semitrans0 r12, r6, lr
+.endif
+ strh r12,[r0, #-2]
+ ldmia sp, {r5,r6}
+ b 0b
+.endif @ \light || \semit >= 0
+1:
+.if \light || \semit >= 0
+ add sp, sp, #4*2
+.endif
+ ldmfd sp!, {r4-r11,pc}
+ .cfi_endproc
+.endm
+
+poly_4_8bpp_asm_m poly_4bpp_asm, 4, 0, -1
+poly_4_8bpp_asm_m poly_4bpp_l0_st0_asm, 4, 0, 0
+poly_4_8bpp_asm_m poly_8bpp_asm, 8, 0, -1
+poly_4_8bpp_asm_m poly_8bpp_l0_st0_asm, 8, 0, 0
+
+#ifdef HAVE_ARMV6
+
+poly_4_8bpp_asm_m poly_4bpp_l1_std_asm, 4, 1, -1
+poly_4_8bpp_asm_m poly_4bpp_l1_st0_asm, 4, 1, 0
+poly_4_8bpp_asm_m poly_8bpp_l1_std_asm, 8, 1, -1
+poly_4_8bpp_asm_m poly_8bpp_l1_st0_asm, 8, 1, 0
+
+#endif // HAVE_ARMV6
@ vim:filetype=armasm
extern "C" {
#endif
-void draw_spr16_full(void *d, void *s, void *pal, int lines);
+struct gpu_unai_inner_t;
+
+void tile_driver_st0_asm(void *d, u16 c, u32 cnt, const struct gpu_unai_inner_t *inn);
+void tile_driver_st1_asm(void *d, u16 c, u32 cnt, const struct gpu_unai_inner_t *inn);
+void tile_driver_st3_asm(void *d, u16 c, u32 cnt, const struct gpu_unai_inner_t *inn);
+
+void sprite_driver_4bpp_asm(void *pPixel, const u8 *pTxt_base,
+ u32 count, const struct gpu_unai_inner_t *inn);
+void sprite_driver_8bpp_asm(void *pPixel, const u8 *pTxt_base,
+ u32 count, const struct gpu_unai_inner_t *inn);
+void sprite_driver_16bpp_asm(void *pPixel, const void *pTxt_base,
+ u32 count, const struct gpu_unai_inner_t *inn);
+void sprite_4bpp_x16_asm(void *d, const void *s, void *pal, int lines);
+
+void sprite_driver_4bpp_l0_std_asm(void *pPixel, const u8 *pTxt_base,
+ u32 count, const struct gpu_unai_inner_t *inn);
+void sprite_driver_4bpp_l0_st0_asm(void *pPixel, const u8 *pTxt_base,
+ u32 count, const struct gpu_unai_inner_t *inn);
+void sprite_driver_8bpp_l0_std_asm(void *pPixel, const u8 *pTxt_base,
+ u32 count, const struct gpu_unai_inner_t *inn);
+void sprite_driver_8bpp_l0_st0_asm(void *pPixel, const u8 *pTxt_base,
+ u32 count, const struct gpu_unai_inner_t *inn);
+
+void poly_untex_st0_asm(void *d, const struct gpu_unai_inner_t *inn, int count);
+void poly_untex_st1_asm(void *d, const struct gpu_unai_inner_t *inn, int count);
+void poly_untex_st3_asm(void *d, const struct gpu_unai_inner_t *inn, int count);
+void poly_4bpp_asm (void *d, const struct gpu_unai_inner_t *inn, int count);
+void poly_4bpp_l0_st0_asm(void *d, const struct gpu_unai_inner_t *inn, int count);
+void poly_8bpp_asm (void *d, const struct gpu_unai_inner_t *inn, int count);
+void poly_8bpp_l0_st0_asm(void *d, const struct gpu_unai_inner_t *inn, int count);
+
+#ifdef HAVE_ARMV6
+
+void tile_driver_st2_asm(void *d, u16 c, u32 cnt, const struct gpu_unai_inner_t *inn);
+
+void sprite_driver_4bpp_l1_std_asm(void *pPixel, const u8 *pTxt_base,
+ u32 count, const struct gpu_unai_inner_t *inn);
+void sprite_driver_4bpp_l1_st0_asm(void *pPixel, const u8 *pTxt_base,
+ u32 count, const struct gpu_unai_inner_t *inn);
+void sprite_driver_4bpp_l1_st1_asm(void *pPixel, const u8 *pTxt_base,
+ u32 count, const struct gpu_unai_inner_t *inn);
+void sprite_driver_8bpp_l1_std_asm(void *pPixel, const u8 *pTxt_base,
+ u32 count, const struct gpu_unai_inner_t *inn);
+void sprite_driver_8bpp_l1_st0_asm(void *pPixel, const u8 *pTxt_base,
+ u32 count, const struct gpu_unai_inner_t *inn);
+void sprite_driver_8bpp_l1_st1_asm(void *pPixel, const u8 *pTxt_base,
+ u32 count, const struct gpu_unai_inner_t *inn);
+
+void poly_untex_st2_asm(void *d, const struct gpu_unai_inner_t *inn, int count);
+void poly_4bpp_l1_std_asm(void *d, const struct gpu_unai_inner_t *inn, int count);
+void poly_4bpp_l1_st0_asm(void *d, const struct gpu_unai_inner_t *inn, int count);
+void poly_8bpp_l1_std_asm(void *d, const struct gpu_unai_inner_t *inn, int count);
+void poly_8bpp_l1_st0_asm(void *d, const struct gpu_unai_inner_t *inn, int count);
+
+#endif // HAVE_ARMV6
#ifdef __cplusplus
}
gpu_unai.BLEND_MODE = ((tpage>>5) & 3) << 3;
gpu_unai.TEXT_MODE = (tmode + 1) << 5; // gpu_unai.TEXT_MODE should be values 1..3, so add one
- gpu_unai.TBA = &gpu_unai.vram[FRAME_OFFSET(tx, ty)];
+ gpu_unai.inn.TBA = &gpu_unai.vram[FRAME_OFFSET(tx, ty)];
}
///////////////////////////////////////////////////////////////////////////////
INLINE void gpuSetCLUT(u16 clut)
{
- gpu_unai.CBA = &gpu_unai.vram[(clut & 0x7FFF) << 4];
+ gpu_unai.inn.CBA = &gpu_unai.vram[(clut & 0x7FFF) << 4];
}
#ifdef ENABLE_GPU_NULL_SUPPORT
///////////////////////////////////////////////////////////////////////////
// --- BEGIN INVERSE APPROXIMATION SECTION ---
///////////////////////////////////////////////////////////////////////////
-#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+#if defined(GPU_UNAI_USE_INT_DIV_MULTINV) || (!defined(GPU_UNAI_NO_OLD) && !defined(GPU_UNAI_USE_FLOATMATH))
// big precision inverse table.
#define TABLE_BITS 16
s32 s_invTable[(1<<TABLE_BITS)];
+#endif
+#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
//senquack - MIPS32 happens to have same instruction/format:
#if defined(__arm__) || (__mips == 32)
INLINE u32 Log2(u32 x) { u32 res; asm("clz %0,%1" : "=r" (res) : "r" (x)); return 32-res; }
#include "gpu_inner_quantization.h"
#include "gpu_inner_light.h"
+#include "arm_features.h"
+#include "compiler_features.h"
#ifdef __arm__
+#include "gpu_arm.h"
#include "gpu_inner_blend_arm.h"
#include "gpu_inner_light_arm.h"
#define gpuBlending gpuBlendingARM
// GPU Tiles innerloops generator
template<int CF>
-static void gpuTileSpanFn(le16_t *pDst, u32 count, u16 data)
+static inline void gpuTileSpanFn(le16_t *pDst, u16 data, u32 count)
{
le16_t ldata;
}
}
-static void TileNULL(le16_t *pDst, u32 count, u16 data)
+template<int CF>
+static noinline void gpuTileDriverFn(le16_t *pDst, u16 data, u32 count,
+ const gpu_unai_inner_t &inn)
+{
+ const int li=gpu_unai.inn.ilace_mask;
+ const int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.inn.ilace_mask+1):0);
+ const int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.inn.ilace_mask+1):0):1);
+ const int y1 = inn.y1;
+ int y0 = inn.y0;
+
+ for (; y0 < y1; ++y0) {
+ if (!(y0&li) && (y0&pi) != pif)
+ gpuTileSpanFn<CF>(pDst, data, count);
+ pDst += FRAME_WIDTH;
+ }
+}
+
+#ifdef __arm__
+
+template<int CF>
+static void TileAsm(le16_t *pDst, u16 data, u32 count, const gpu_unai_inner_t &inn)
+{
+ switch (CF) {
+ case 0x02: tile_driver_st0_asm(pDst, data, count, &inn); return;
+ case 0x0a: tile_driver_st1_asm(pDst, data, count, &inn); return;
+ case 0x1a: tile_driver_st3_asm(pDst, data, count, &inn); return;
+#ifdef HAVE_ARMV6
+ case 0x12: tile_driver_st2_asm(pDst, data, count, &inn); return;
+#endif
+ }
+ gpuTileDriverFn<CF>(pDst, data, count, inn);
+}
+
+#endif
+
+static void TileNULL(le16_t *pDst, u16 data, u32 count, const gpu_unai_inner_t &inn)
{
#ifdef ENABLE_GPU_LOG_SUPPORT
fprintf(stdout,"TileNULL()\n");
///////////////////////////////////////////////////////////////////////////////
// Tiles innerloops driver
-typedef void (*PT)(le16_t *pDst, u32 count, u16 data);
+typedef void (*PT)(le16_t *pDst, u16 data, u32 count, const gpu_unai_inner_t &inn);
// Template instantiation helper macros
-#define TI(cf) gpuTileSpanFn<(cf)>
+#define TI(cf) gpuTileDriverFn<(cf)>
#define TN TileNULL
+#ifdef __arm__
+#define TA(cf) TileAsm<(cf)>
+#else
+#define TA(cf) TI(cf)
+#endif
+#ifdef HAVE_ARMV6
+#define TA6(cf) TileAsm<(cf)>
+#else
+#define TA6(cf) TI(cf)
+#endif
#define TIBLOCK(ub) \
- TI((ub)|0x00), TI((ub)|0x02), TI((ub)|0x04), TI((ub)|0x06), \
- TN, TI((ub)|0x0a), TN, TI((ub)|0x0e), \
- TN, TI((ub)|0x12), TN, TI((ub)|0x16), \
- TN, TI((ub)|0x1a), TN, TI((ub)|0x1e)
+ TI((ub)|0x00), TA6((ub)|0x02), TI((ub)|0x04), TI((ub)|0x06), \
+ TN, TA ((ub)|0x0a), TN, TI((ub)|0x0e), \
+ TN, TA6((ub)|0x12), TN, TI((ub)|0x16), \
+ TN, TA ((ub)|0x1a), TN, TI((ub)|0x1e)
-const PT gpuTileSpanDrivers[32] = {
+const PT gpuTileDrivers[32] = {
TIBLOCK(0<<8), TIBLOCK(1<<8)
};
#undef TI
#undef TN
+#undef TA
+#undef TA6
#undef TIBLOCK
///////////////////////////////////////////////////////////////////////////////
// GPU Sprites innerloops generator
+typedef void (*PS)(le16_t *pPixel, u32 count, const u8 *pTxt,
+ const gpu_unai_inner_t &inn);
+
template<int CF>
-static void gpuSpriteSpanFn(le16_t *pDst, u32 count, u8* pTxt, u32 u0)
+static noinline void gpuSpriteDriverFn(le16_t *pPixel, u32 count, const u8 *pTxt_base,
+ const gpu_unai_inner_t &inn)
{
// Blend func can save an operation if it knows uSrc MSB is unset.
// Untextured prims can always skip (source color always comes with MSB=0).
uint_fast16_t uSrc, uDst, srcMSB;
bool should_blend;
- u32 u0_mask = gpu_unai.TextureWindow[2];
+ u32 u0_mask = inn.u_msk >> 10;
u8 r5, g5, b5;
if (CF_LIGHT) {
- r5 = gpu_unai.r5;
- g5 = gpu_unai.g5;
- b5 = gpu_unai.b5;
+ r5 = inn.r5;
+ g5 = inn.g5;
+ b5 = inn.b5;
}
+ const le16_t *CBA_; if (CF_TEXTMODE!=3) CBA_ = inn.CBA;
+ const u32 v0_mask = inn.v_msk >> 10;
+ s32 y0 = inn.y0, y1 = inn.y1, li = inn.ilace_mask;
+ u32 u0_ = inn.u, v0 = inn.v;
+
if (CF_TEXTMODE==3) {
- // Texture is accessed byte-wise, so adjust mask if 16bpp
+ // Texture is accessed byte-wise, so adjust to 16bpp
+ u0_ <<= 1;
u0_mask <<= 1;
}
- const le16_t *CBA_; if (CF_TEXTMODE!=3) CBA_ = gpu_unai.CBA;
-
- do
+ for (; y0 < y1; ++y0, pPixel += FRAME_WIDTH, ++v0)
{
+ if (y0 & li) continue;
+ const u8 *pTxt = pTxt_base + ((v0 & v0_mask) * 2048);
+ le16_t *pDst = pPixel;
+ u32 u0 = u0_;
+ u32 count1 = count;
+ do
+ {
if (CF_MASKCHECK || CF_BLEND) { uDst = le16_to_u16(*pDst); }
if (CF_MASKCHECK) if (uDst&0x8000) { goto endsprite; }
endsprite:
u0 += (CF_TEXTMODE==3) ? 2 : 1;
pDst++;
+ }
+ while (--count1);
}
- while (--count);
}
-static void SpriteNULL(le16_t *pDst, u32 count, u8* pTxt, u32 u0)
+#ifdef __arm__
+
+template<int CF>
+static void SpriteMaybeAsm(le16_t *pPixel, u32 count, const u8 *pTxt_base,
+ const gpu_unai_inner_t &inn)
+{
+#if 1
+ s32 lines = inn.y1 - inn.y0;
+ u32 u1m = inn.u + count - 1, v1m = inn.v + lines - 1;
+ if (u1m == (u1m & (inn.u_msk >> 10)) && v1m == (v1m & (inn.v_msk >> 10))) {
+ const u8 *pTxt = pTxt_base + inn.v * 2048;
+ switch (CF) {
+ case 0x20: sprite_driver_4bpp_asm (pPixel, pTxt + inn.u / 2, count, &inn); return;
+ case 0x40: sprite_driver_8bpp_asm (pPixel, pTxt + inn.u, count, &inn); return;
+ case 0x60: sprite_driver_16bpp_asm(pPixel, pTxt + inn.u * 2, count, &inn); return;
+ }
+ }
+ if (v1m == (v1m & (inn.v_msk >> 10))) {
+ const u8 *pTxt = pTxt_base + inn.v * 2048;
+ switch (CF) {
+ case 0x20: sprite_driver_4bpp_l0_std_asm(pPixel, pTxt, count, &inn); return;
+ case 0x22: sprite_driver_4bpp_l0_st0_asm(pPixel, pTxt, count, &inn); return;
+ case 0x40: sprite_driver_8bpp_l0_std_asm(pPixel, pTxt, count, &inn); return;
+ case 0x42: sprite_driver_8bpp_l0_st0_asm(pPixel, pTxt, count, &inn); return;
+#ifdef HAVE_ARMV6
+ case 0x21: sprite_driver_4bpp_l1_std_asm(pPixel, pTxt, count, &inn); return;
+ case 0x23: sprite_driver_4bpp_l1_st0_asm(pPixel, pTxt, count, &inn); return;
+ case 0x2b: sprite_driver_4bpp_l1_st1_asm(pPixel, pTxt, count, &inn); return;
+ case 0x41: sprite_driver_8bpp_l1_std_asm(pPixel, pTxt, count, &inn); return;
+ case 0x43: sprite_driver_8bpp_l1_st0_asm(pPixel, pTxt, count, &inn); return;
+ case 0x4b: sprite_driver_8bpp_l1_st1_asm(pPixel, pTxt, count, &inn); return;
+#endif
+ }
+ }
+#endif
+ gpuSpriteDriverFn<CF>(pPixel, count, pTxt_base, inn);
+}
+#endif // __arm__
+
+static void SpriteNULL(le16_t *pPixel, u32 count, const u8 *pTxt_base,
+ const gpu_unai_inner_t &inn)
{
#ifdef ENABLE_GPU_LOG_SUPPORT
fprintf(stdout,"SpriteNULL()\n");
///////////////////////////////////////////////////////////////////////////////
// Sprite innerloops driver
-typedef void (*PS)(le16_t *pDst, u32 count, u8* pTxt, u32 u0);
// Template instantiation helper macros
-#define TI(cf) gpuSpriteSpanFn<(cf)>
+#define TI(cf) gpuSpriteDriverFn<(cf)>
#define TN SpriteNULL
+#ifdef __arm__
+#define TA(cf) SpriteMaybeAsm<(cf)>
+#else
+#define TA(cf) TI(cf)
+#endif
+#ifdef HAVE_ARMV6
+#define TA6(cf) SpriteMaybeAsm<(cf)>
+#else
+#define TA6(cf) TI(cf)
+#endif
#define TIBLOCK(ub) \
TN, TN, TN, TN, TN, TN, TN, TN, \
TN, TN, TN, TN, TN, TN, TN, TN, \
TN, TN, TN, TN, TN, TN, TN, TN, \
TN, TN, TN, TN, TN, TN, TN, TN, \
- TI((ub)|0x20), TI((ub)|0x21), TI((ub)|0x22), TI((ub)|0x23), TI((ub)|0x24), TI((ub)|0x25), TI((ub)|0x26), TI((ub)|0x27), \
- TN, TN, TI((ub)|0x2a), TI((ub)|0x2b), TN, TN, TI((ub)|0x2e), TI((ub)|0x2f), \
+ TA((ub)|0x20), TA6((ub)|0x21),TA6((ub)|0x22),TA6((ub)|0x23),TI((ub)|0x24), TI((ub)|0x25), TI((ub)|0x26), TI((ub)|0x27), \
+ TN, TN, TI((ub)|0x2a), TA6((ub)|0x2b),TN, TN, TI((ub)|0x2e), TI((ub)|0x2f), \
TN, TN, TI((ub)|0x32), TI((ub)|0x33), TN, TN, TI((ub)|0x36), TI((ub)|0x37), \
TN, TN, TI((ub)|0x3a), TI((ub)|0x3b), TN, TN, TI((ub)|0x3e), TI((ub)|0x3f), \
- TI((ub)|0x40), TI((ub)|0x41), TI((ub)|0x42), TI((ub)|0x43), TI((ub)|0x44), TI((ub)|0x45), TI((ub)|0x46), TI((ub)|0x47), \
- TN, TN, TI((ub)|0x4a), TI((ub)|0x4b), TN, TN, TI((ub)|0x4e), TI((ub)|0x4f), \
+ TA((ub)|0x40), TA6((ub)|0x41),TA6((ub)|0x42),TA6((ub)|0x43),TI((ub)|0x44), TI((ub)|0x45), TI((ub)|0x46), TI((ub)|0x47), \
+ TN, TN, TI((ub)|0x4a), TA6((ub)|0x4b),TN, TN, TI((ub)|0x4e), TI((ub)|0x4f), \
TN, TN, TI((ub)|0x52), TI((ub)|0x53), TN, TN, TI((ub)|0x56), TI((ub)|0x57), \
TN, TN, TI((ub)|0x5a), TI((ub)|0x5b), TN, TN, TI((ub)|0x5e), TI((ub)|0x5f), \
- TI((ub)|0x60), TI((ub)|0x61), TI((ub)|0x62), TI((ub)|0x63), TI((ub)|0x64), TI((ub)|0x65), TI((ub)|0x66), TI((ub)|0x67), \
+ TA((ub)|0x60), TI((ub)|0x61), TI((ub)|0x62), TI((ub)|0x63), TI((ub)|0x64), TI((ub)|0x65), TI((ub)|0x66), TI((ub)|0x67), \
TN, TN, TI((ub)|0x6a), TI((ub)|0x6b), TN, TN, TI((ub)|0x6e), TI((ub)|0x6f), \
TN, TN, TI((ub)|0x72), TI((ub)|0x73), TN, TN, TI((ub)|0x76), TI((ub)|0x77), \
TN, TN, TI((ub)|0x7a), TI((ub)|0x7b), TN, TN, TI((ub)|0x7e), TI((ub)|0x7f)
-const PS gpuSpriteSpanDrivers[256] = {
+const PS gpuSpriteDrivers[256] = {
TIBLOCK(0<<8), TIBLOCK(1<<8)
};
#undef TI
#undef TN
#undef TIBLOCK
+#undef TA
+#undef TA6
///////////////////////////////////////////////////////////////////////////////
// GPU Polygon innerloops generator
// relevant blend/light headers.
// (see README_senquack.txt)
template<int CF>
-static void gpuPolySpanFn(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count)
+static noinline void gpuPolySpanFn(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count)
{
// Blend func can save an operation if it knows uSrc MSB is unset.
// Untextured prims can always skip this (src color MSB is always 0).
const bool skip_uSrc_mask = MSB_PRESERVED ? (!CF_TEXTMODE) : (!CF_TEXTMODE) || CF_LIGHT;
bool should_blend;
- u32 bMsk; if (CF_BLITMASK) bMsk = gpu_unai.blit_mask;
+ u32 bMsk; if (CF_BLITMASK) bMsk = gpu_unai.inn.blit_mask;
if (!CF_TEXTMODE)
{
if (!CF_GOURAUD)
{
// UNTEXTURED, NO GOURAUD
- const u16 pix15 = gpu_unai.PixelData;
+ const u16 pix15 = gpu_unai.inn.PixelData;
do {
uint_fast16_t uSrc, uDst;
else
{
// UNTEXTURED, GOURAUD
- gcol_t l_gCol = gpu_unai.gCol;
- gcol_t l_gInc = gpu_unai.gInc;
+ gcol_t l_gCol = gpu_unai.inn.gCol;
+ gcol_t l_gInc = gpu_unai.inn.gInc;
do {
uint_fast16_t uDst, uSrc;
//senquack - note: original UNAI code had gpu_unai.{u4/v4} packed into
// one 32-bit unsigned int, but this proved to lose too much accuracy
// (pixel drouputs noticeable in NFS3 sky), so now are separate vars.
- u32 l_u_msk = gpu_unai.u_msk; u32 l_v_msk = gpu_unai.v_msk;
- u32 l_u = gpu_unai.u & l_u_msk; u32 l_v = gpu_unai.v & l_v_msk;
- s32 l_u_inc = gpu_unai.u_inc; s32 l_v_inc = gpu_unai.v_inc;
+ u32 l_u_msk = gpu_unai.inn.u_msk; u32 l_v_msk = gpu_unai.inn.v_msk;
+ u32 l_u = gpu_unai.inn.u & l_u_msk; u32 l_v = gpu_unai.inn.v & l_v_msk;
+ s32 l_u_inc = gpu_unai.inn.u_inc; s32 l_v_inc = gpu_unai.inn.v_inc;
+ l_v <<= 1;
+ l_v_inc <<= 1;
+ l_v_msk = (l_v_msk & (0xff<<10)) << 1;
- const le16_t* TBA_ = gpu_unai.TBA;
- const le16_t* CBA_; if (CF_TEXTMODE!=3) CBA_ = gpu_unai.CBA;
+ const le16_t* TBA_ = gpu_unai.inn.TBA;
+ const le16_t* CBA_; if (CF_TEXTMODE!=3) CBA_ = gpu_unai.inn.CBA;
u8 r5, g5, b5;
u8 r8, g8, b8;
if (CF_LIGHT) {
if (CF_GOURAUD) {
- l_gInc = gpu_unai.gInc;
- l_gCol = gpu_unai.gCol;
+ l_gInc = gpu_unai.inn.gInc;
+ l_gCol = gpu_unai.inn.gCol;
} else {
if (CF_DITHER) {
- r8 = gpu_unai.r8;
- g8 = gpu_unai.g8;
- b8 = gpu_unai.b8;
+ r8 = gpu_unai.inn.r8;
+ g8 = gpu_unai.inn.g8;
+ b8 = gpu_unai.inn.b8;
} else {
- r5 = gpu_unai.r5;
- g5 = gpu_unai.g5;
- b5 = gpu_unai.b5;
+ r5 = gpu_unai.inn.r5;
+ g5 = gpu_unai.inn.g5;
+ b5 = gpu_unai.inn.b5;
}
}
}
// (UNAI originally used 16.16)
if (CF_TEXTMODE==1) { // 4bpp (CLUT)
u32 tu=(l_u>>10);
- u32 tv=(l_v<<1)&(0xff<<11);
+ u32 tv=l_v&l_v_msk;
u8 rgb=((u8*)TBA_)[tv+(tu>>1)];
uSrc=le16_to_u16(CBA_[(rgb>>((tu&1)<<2))&0xf]);
if (!uSrc) goto endpolytext;
}
if (CF_TEXTMODE==2) { // 8bpp (CLUT)
- uSrc = le16_to_u16(CBA_[(((u8*)TBA_)[(l_u>>10)+((l_v<<1)&(0xff<<11))])]);
+ u32 tv=l_v&l_v_msk;
+ uSrc = le16_to_u16(CBA_[((u8*)TBA_)[tv+(l_u>>10)]]);
if (!uSrc) goto endpolytext;
}
if (CF_TEXTMODE==3) { // 16bpp
- uSrc = le16_to_u16(TBA_[(l_u>>10)+((l_v)&(0xff<<10))]);
+ u32 tv=(l_v&l_v_msk)>>1;
+ uSrc = le16_to_u16(TBA_[tv+(l_u>>10)]);
if (!uSrc) goto endpolytext;
}
endpolytext:
pDst++;
l_u = (l_u + l_u_inc) & l_u_msk;
- l_v = (l_v + l_v_inc) & l_v_msk;
+ l_v += l_v_inc;
if (CF_LIGHT && CF_GOURAUD)
l_gCol.raw += l_gInc.raw;
}
}
}
+#ifdef __arm__
+template<int CF>
+static void PolySpanMaybeAsm(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count)
+{
+ switch (CF) {
+ case 0x02: poly_untex_st0_asm (pDst, &gpu_unai.inn, count); break;
+ case 0x0a: poly_untex_st1_asm (pDst, &gpu_unai.inn, count); break;
+ case 0x1a: poly_untex_st3_asm (pDst, &gpu_unai.inn, count); break;
+ case 0x20: poly_4bpp_asm (pDst, &gpu_unai.inn, count); break;
+ case 0x22: poly_4bpp_l0_st0_asm(pDst, &gpu_unai.inn, count); break;
+ case 0x40: poly_8bpp_asm (pDst, &gpu_unai.inn, count); break;
+ case 0x42: poly_8bpp_l0_st0_asm(pDst, &gpu_unai.inn, count); break;
+#ifdef HAVE_ARMV6
+ case 0x12: poly_untex_st2_asm (pDst, &gpu_unai.inn, count); break;
+ case 0x21: poly_4bpp_l1_std_asm(pDst, &gpu_unai.inn, count); break;
+ case 0x23: poly_4bpp_l1_st0_asm(pDst, &gpu_unai.inn, count); break;
+ case 0x41: poly_8bpp_l1_std_asm(pDst, &gpu_unai.inn, count); break;
+ case 0x43: poly_8bpp_l1_st0_asm(pDst, &gpu_unai.inn, count); break;
+#endif
+ default: gpuPolySpanFn<CF>(gpu_unai, pDst, count);
+ }
+}
+#endif
+
static void PolyNULL(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count)
{
#ifdef ENABLE_GPU_LOG_SUPPORT
// Template instantiation helper macros
#define TI(cf) gpuPolySpanFn<(cf)>
#define TN PolyNULL
+#ifdef __arm__
+#define TA(cf) PolySpanMaybeAsm<(cf)>
+#else
+#define TA(cf) TI(cf)
+#endif
+#ifdef HAVE_ARMV6
+#define TA6(cf) PolySpanMaybeAsm<(cf)>
+#else
+#define TA6(cf) TI(cf)
+#endif
#define TIBLOCK(ub) \
- TI((ub)|0x00), TI((ub)|0x01), TI((ub)|0x02), TI((ub)|0x03), TI((ub)|0x04), TI((ub)|0x05), TI((ub)|0x06), TI((ub)|0x07), \
- TN, TN, TI((ub)|0x0a), TI((ub)|0x0b), TN, TN, TI((ub)|0x0e), TI((ub)|0x0f), \
- TN, TN, TI((ub)|0x12), TI((ub)|0x13), TN, TN, TI((ub)|0x16), TI((ub)|0x17), \
- TN, TN, TI((ub)|0x1a), TI((ub)|0x1b), TN, TN, TI((ub)|0x1e), TI((ub)|0x1f), \
- TI((ub)|0x20), TI((ub)|0x21), TI((ub)|0x22), TI((ub)|0x23), TI((ub)|0x24), TI((ub)|0x25), TI((ub)|0x26), TI((ub)|0x27), \
+ TI((ub)|0x00), TI((ub)|0x01), TA6((ub)|0x02),TI((ub)|0x03), TI((ub)|0x04), TI((ub)|0x05), TI((ub)|0x06), TI((ub)|0x07), \
+ TN, TN, TA((ub)|0x0a), TI((ub)|0x0b), TN, TN, TI((ub)|0x0e), TI((ub)|0x0f), \
+ TN, TN, TA6((ub)|0x12),TI((ub)|0x13), TN, TN, TI((ub)|0x16), TI((ub)|0x17), \
+ TN, TN, TA((ub)|0x1a), TI((ub)|0x1b), TN, TN, TI((ub)|0x1e), TI((ub)|0x1f), \
+ TA((ub)|0x20), TA6((ub)|0x21),TA6((ub)|0x22),TA6((ub)|0x23),TI((ub)|0x24), TI((ub)|0x25), TI((ub)|0x26), TI((ub)|0x27), \
TN, TN, TI((ub)|0x2a), TI((ub)|0x2b), TN, TN, TI((ub)|0x2e), TI((ub)|0x2f), \
TN, TN, TI((ub)|0x32), TI((ub)|0x33), TN, TN, TI((ub)|0x36), TI((ub)|0x37), \
TN, TN, TI((ub)|0x3a), TI((ub)|0x3b), TN, TN, TI((ub)|0x3e), TI((ub)|0x3f), \
- TI((ub)|0x40), TI((ub)|0x41), TI((ub)|0x42), TI((ub)|0x43), TI((ub)|0x44), TI((ub)|0x45), TI((ub)|0x46), TI((ub)|0x47), \
+ TA((ub)|0x40), TA6((ub)|0x41),TA6((ub)|0x42),TA6((ub)|0x43),TI((ub)|0x44), TI((ub)|0x45), TI((ub)|0x46), TI((ub)|0x47), \
TN, TN, TI((ub)|0x4a), TI((ub)|0x4b), TN, TN, TI((ub)|0x4e), TI((ub)|0x4f), \
TN, TN, TI((ub)|0x52), TI((ub)|0x53), TN, TN, TI((ub)|0x56), TI((ub)|0x57), \
TN, TN, TI((ub)|0x5a), TI((ub)|0x5b), TN, TN, TI((ub)|0x5e), TI((ub)|0x5f), \
#undef TI
#undef TN
#undef TIBLOCK
+#undef TA
+#undef TA6
#endif /* __GPU_UNAI_GPU_INNER_H__ */
asm ("eor %[mix], %[uSrc], %[uDst]\n\t" // uSrc ^ uDst
"and %[mix], %[mix], %[mask]\n\t" // ... & 0x0421
"sub %[mix], %[uDst], %[mix]\n\t" // uDst - ...
+ #ifdef HAVE_ARMV6
+ "uhadd16 %[mix], %[uSrc], %[mix]\n\t"
+ #else
"add %[mix], %[uSrc], %[mix]\n\t" // uSrc + ...
"mov %[mix], %[mix], lsr #0x1\n\t" // ... >> 1
+ #endif
: [mix] "=&r" (mix)
- : [uSrc] "r" (uSrc), [uDst] "r" (uDst), [mask] "r" (0x0421));
+ : [uSrc] "r" (uSrc), [uDst] "r" (uDst), [mask] "r" (0x0420)); // 421
}
if (BLENDMODE == 1 || BLENDMODE == 3) {
#ifndef _OP_LIGHT_ARM_H_
#define _OP_LIGHT_ARM_H_
+#include "arm_features.h"
+
////////////////////////////////////////////////////////////////////////////////
// Extract bgr555 color from Gouraud u32 fixed-pt 8.3:8.3:8.2 rgb triplet
//
// u16 output: mbbbbbgggggrrrrr
// Where 'X' are fixed-pt bits.
////////////////////////////////////////////////////////////////////////////////
+#ifdef HAVE_ARMV6
+// clang uses smulbb but not gcc, so we need this
+GPU_INLINE int_fast16_t smulbb(int_fast16_t a, int_fast16_t b)
+{
+ int_fast16_t r;
+ asm("smulbb %0, %1, %2" : "=r"(r) : "r"(a), "r"(b));
+ return r;
+}
+
+GPU_INLINE uint_fast16_t gpuLightingTXTARM(uint_fast16_t uSrc, u8 r5, u8 g5, u8 b5)
+{
+ // on v6 we have single-cycle mul and sat which is better than the lut
+ int_fast16_t r = smulbb(uSrc & 0x001f, r5);
+ int_fast16_t g = smulbb(uSrc & 0x03e0, g5);
+ int_fast16_t b = smulbb(uSrc & 0x7c00, b5);
+ asm volatile("usat %0, #5, %0, asr #4" : "=r"(r) : "0"(r));
+ asm volatile("usat %0, #5, %0, asr #9" : "=r"(g) : "0"(g));
+ asm volatile("usat %0, #5, %0, asr #14" : "=r"(b) : "0"(b));
+ return (uSrc & 0x8000) | (b << 10) | (g << 5) | r;
+}
+#else
GPU_INLINE uint_fast16_t gpuLightingTXTARM(uint_fast16_t uSrc, u8 r5, u8 g5, u8 b5)
{
uint_fast16_t out = 0x03E0;
: "cc");
return out;
}
+#endif
////////////////////////////////////////////////////////////////////////////////
// Apply fast (low-precision) 5-bit Gouraud lighting to bgr555 texture color:
w0 = le16_to_s16(packet.U2[4]) & 0x3ff;
h0 = le16_to_s16(packet.U2[5]) & 0x1ff;
+ x0 &= ~0xF;
+ w0 = ((w0 + 0xF) & ~0xF);
+
w0 += x0;
if (x0 < 0) x0 = 0;
if (w0 > FRAME_WIDTH) w0 = FRAME_WIDTH;
fprintf(stdout,"gpuClearImage(x0=%d,y0=%d,w0=%d,h0=%d)\n",x0,y0,w0,h0);
#endif
- if (x0&1)
- {
- le16_t* pixel = gpu_unai.vram + FRAME_OFFSET(x0, y0);
- le16_t rgb = u16_to_le16(GPU_RGB16(le32_to_u32(packet.U4[0])));
- y0 = FRAME_WIDTH - w0;
- do {
- x0=w0;
- do { *pixel++ = rgb; } while (--x0);
- pixel += y0;
- } while (--h0);
- }
- else
{
le32_t* pixel = (le32_t*)gpu_unai.vram + ((FRAME_OFFSET(x0, y0))>>1);
u32 _rgb = GPU_RGB16(le32_to_u32(packet.U4[0]));
le32_t rgb = u32_to_le32(_rgb | (_rgb << 16));
- if (w0&1)
- {
- y0 = (FRAME_WIDTH - w0 +1)>>1;
- w0>>=1;
- do {
- x0=w0;
- do { *pixel++ = rgb; } while (--x0);
- *((u16*)pixel) = (u16)le32_raw(rgb);
- pixel += y0;
- } while (--h0);
- }
- else
{
y0 = (FRAME_WIDTH - w0)>>1;
- w0>>=1;
+ w0>>=3;
do {
x0=w0;
- do { *pixel++ = rgb; } while (--x0);
+ do {
+ pixel[0] = rgb;
+ pixel[1] = rgb;
+ pixel[2] = rgb;
+ pixel[3] = rgb;
+ pixel += 4;
+ } while (--x0);
pixel += y0;
} while (--h0);
}
int num_verts = (is_quad) ? 4 : 3;
le32_t *ptr;
- // X,Y coords, adjusted by draw offsets
- s32 x_off = gpu_unai.DrawingOffset[0];
- s32 y_off = gpu_unai.DrawingOffset[1];
+ // X,Y coords
ptr = &packet.U4[1];
for (int i=0; i < num_verts; ++i, ptr += vert_stride) {
u32 coords = le32_to_u32(*ptr);
- vbuf[i].x = GPU_EXPANDSIGN((s16)coords) + x_off;
- vbuf[i].y = GPU_EXPANDSIGN((s16)(coords >> 16)) + y_off;
+ vbuf[i].x = GPU_EXPANDSIGN(coords);
+ vbuf[i].y = GPU_EXPANDSIGN(coords >> 16);
}
// U,V texture coords (if applicable)
// or 1 for second triangle of a quad (idx 1,2,3 of vbuf[]).
// Returns true if triangle should be rendered, false if not.
///////////////////////////////////////////////////////////////////////////////
-static bool polyUseTriangle(const PolyVertex *vbuf, int tri_num, const PolyVertex **vert_ptrs)
+static bool polyUseTriangle(const PolyVertex *vbuf, int tri_num, const PolyVertex **vert_ptrs, s32 &x_off, s32 &y_off)
{
// Using verts 0,1,2 or is this the 2nd pass of a quad (verts 1,2,3)?
const PolyVertex *tri_ptr = &vbuf[(tri_num == 0) ? 0 : 1];
(highest_y - lowest_y) >= CHKMAX_Y)
return false;
+ // Determine offsets
+ x_off = gpu_unai.DrawingOffset[0];
+ y_off = gpu_unai.DrawingOffset[1];
+ x_off = GPU_EXPANDSIGN(lowest_x + x_off) - lowest_x;
+ y_off = GPU_EXPANDSIGN(lowest_y + y_off) - lowest_y;
+
// Determine if triangle is completely outside clipping range
- int xmin, xmax, ymin, ymax;
+ s32 xmin, xmax, ymin, ymax;
xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2];
ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3];
- int clipped_lowest_x = Max2(xmin,lowest_x);
- int clipped_lowest_y = Max2(ymin,lowest_y);
- int clipped_highest_x = Min2(xmax,highest_x);
- int clipped_highest_y = Min2(ymax,highest_y);
+ int clipped_lowest_x = Max2(xmin, lowest_x + x_off);
+ int clipped_lowest_y = Max2(ymin, lowest_y + y_off);
+ int clipped_highest_x = Min2(xmax, highest_x + x_off);
+ int clipped_highest_y = Min2(ymax, highest_y + y_off);
if (clipped_lowest_x >= clipped_highest_x ||
clipped_lowest_y >= clipped_highest_y)
return false;
/*----------------------------------------------------------------------
gpuDrawPolyF - Flat-shaded, untextured poly
----------------------------------------------------------------------*/
-void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
+void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad,
+ PolyType ptype = POLYTYPE_F)
{
// Set up bgr555 color to be used across calls in inner driver
- gpu_unai.PixelData = GPU_RGB16(le32_to_u32(packet.U4[0]));
+ gpu_unai.inn.PixelData = GPU_RGB16(le32_to_u32(packet.U4[0]));
PolyVertex vbuf[4];
- polyInitVertexBuffer(vbuf, packet, POLYTYPE_F, is_quad);
+ polyInitVertexBuffer(vbuf, packet, ptype, is_quad);
int total_passes = is_quad ? 2 : 1;
int cur_pass = 0;
do
{
const PolyVertex* vptrs[3];
- if (polyUseTriangle(vbuf, cur_pass, vptrs) == false)
+ s32 x_off, y_off;
+ if (!polyUseTriangle(vbuf, cur_pass, vptrs, x_off, y_off))
continue;
s32 xa, xb, ya, yb;
s32 x3, dx3, x4, dx4, dx;
s32 x0, x1, x2, y0, y1, y2;
- x0 = vptrs[0]->x; y0 = vptrs[0]->y;
- x1 = vptrs[1]->x; y1 = vptrs[1]->y;
- x2 = vptrs[2]->x; y2 = vptrs[2]->y;
+ x0 = vptrs[0]->x + x_off; y0 = vptrs[0]->y + y_off;
+ x1 = vptrs[1]->x + x_off; y1 = vptrs[1]->y + y_off;
+ x2 = vptrs[2]->x + x_off; y2 = vptrs[2]->y + y_off;
ya = y2 - y0;
yb = y2 - y1;
continue;
le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
- int li=gpu_unai.ilace_mask;
- int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
- int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
+ int li=gpu_unai.inn.ilace_mask;
+ int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.inn.ilace_mask+1):0);
+ int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.inn.ilace_mask+1):0):1);
for (; loop1; --loop1, ya++, PixelBase += FRAME_WIDTH,
x3 += dx3, x4 += dx4 )
/*----------------------------------------------------------------------
gpuDrawPolyFT - Flat-shaded, textured poly
----------------------------------------------------------------------*/
-void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad)
+void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad,
+ PolyType ptype = POLYTYPE_FT)
{
// r8/g8/b8 used if texture-blending & dithering is applied (24-bit light)
- gpu_unai.r8 = packet.U1[0];
- gpu_unai.g8 = packet.U1[1];
- gpu_unai.b8 = packet.U1[2];
+ gpu_unai.inn.r8 = packet.U1[0];
+ gpu_unai.inn.g8 = packet.U1[1];
+ gpu_unai.inn.b8 = packet.U1[2];
// r5/g5/b5 used if just texture-blending is applied (15-bit light)
- gpu_unai.r5 = packet.U1[0] >> 3;
- gpu_unai.g5 = packet.U1[1] >> 3;
- gpu_unai.b5 = packet.U1[2] >> 3;
+ gpu_unai.inn.r5 = packet.U1[0] >> 3;
+ gpu_unai.inn.g5 = packet.U1[1] >> 3;
+ gpu_unai.inn.b5 = packet.U1[2] >> 3;
PolyVertex vbuf[4];
- polyInitVertexBuffer(vbuf, packet, POLYTYPE_FT, is_quad);
+ polyInitVertexBuffer(vbuf, packet, ptype, is_quad);
int total_passes = is_quad ? 2 : 1;
int cur_pass = 0;
do
{
const PolyVertex* vptrs[3];
- if (polyUseTriangle(vbuf, cur_pass, vptrs) == false)
+ s32 x_off, y_off;
+ if (!polyUseTriangle(vbuf, cur_pass, vptrs, x_off, y_off))
continue;
s32 xa, xb, ya, yb;
s32 u0, u1, u2, v0, v1, v2;
s32 du4, dv4;
- x0 = vptrs[0]->x; y0 = vptrs[0]->y;
- u0 = vptrs[0]->tex.u; v0 = vptrs[0]->tex.v;
- x1 = vptrs[1]->x; y1 = vptrs[1]->y;
- u1 = vptrs[1]->tex.u; v1 = vptrs[1]->tex.v;
- x2 = vptrs[2]->x; y2 = vptrs[2]->y;
- u2 = vptrs[2]->tex.u; v2 = vptrs[2]->tex.v;
+ x0 = vptrs[0]->x + x_off; y0 = vptrs[0]->y + y_off;
+ u0 = vptrs[0]->tex.u; v0 = vptrs[0]->tex.v;
+ x1 = vptrs[1]->x + x_off; y1 = vptrs[1]->y + y_off;
+ u1 = vptrs[1]->tex.u; v1 = vptrs[1]->tex.v;
+ x2 = vptrs[2]->x + x_off; y2 = vptrs[2]->y + y_off;
+ u2 = vptrs[2]->tex.u; v2 = vptrs[2]->tex.v;
ya = y2 - y0;
yb = y2 - y1;
#endif
#endif
// Set u,v increments for inner driver
- gpu_unai.u_inc = du4;
- gpu_unai.v_inc = dv4;
+ gpu_unai.inn.u_inc = du4;
+ gpu_unai.inn.v_inc = dv4;
//senquack - TODO: why is it always going through 2 iterations when sometimes one would suffice here?
// (SAME ISSUE ELSEWHERE)
continue;
le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
- int li=gpu_unai.ilace_mask;
- int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
- int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
+ int li=gpu_unai.inn.ilace_mask;
+ int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.inn.ilace_mask+1):0);
+ int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.inn.ilace_mask+1):0):1);
for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
x3 += dx3, x4 += dx4,
}
// Set u,v coords for inner driver
- gpu_unai.u = u4;
- gpu_unai.v = v4;
+ gpu_unai.inn.u = u4;
+ gpu_unai.inn.v = v4;
if (xb > xmax) xb = xmax;
if ((xb - xa) > 0)
do
{
const PolyVertex* vptrs[3];
- if (polyUseTriangle(vbuf, cur_pass, vptrs) == false)
+ s32 x_off, y_off;
+ if (!polyUseTriangle(vbuf, cur_pass, vptrs, x_off, y_off))
continue;
s32 xa, xb, ya, yb;
s32 r0, r1, r2, g0, g1, g2, b0, b1, b2;
s32 dr4, dg4, db4;
- x0 = vptrs[0]->x; y0 = vptrs[0]->y;
- r0 = vptrs[0]->col.r; g0 = vptrs[0]->col.g; b0 = vptrs[0]->col.b;
- x1 = vptrs[1]->x; y1 = vptrs[1]->y;
- r1 = vptrs[1]->col.r; g1 = vptrs[1]->col.g; b1 = vptrs[1]->col.b;
- x2 = vptrs[2]->x; y2 = vptrs[2]->y;
- r2 = vptrs[2]->col.r; g2 = vptrs[2]->col.g; b2 = vptrs[2]->col.b;
+ x0 = vptrs[0]->x + x_off; y0 = vptrs[0]->y + y_off;
+ r0 = vptrs[0]->col.r; g0 = vptrs[0]->col.g; b0 = vptrs[0]->col.b;
+ x1 = vptrs[1]->x + x_off; y1 = vptrs[1]->y + y_off;
+ r1 = vptrs[1]->col.r; g1 = vptrs[1]->col.g; b1 = vptrs[1]->col.b;
+ x2 = vptrs[2]->x + x_off; y2 = vptrs[2]->y + y_off;
+ r2 = vptrs[2]->col.r; g2 = vptrs[2]->col.g; b2 = vptrs[2]->col.b;
ya = y2 - y0;
yb = y2 - y1;
#endif
#endif
// Setup packed Gouraud increment for inner driver
- gpu_unai.gInc = gpuPackGouraudColInc(dr4, dg4, db4);
+ gpu_unai.inn.gInc = gpuPackGouraudColInc(dr4, dg4, db4);
for (s32 loop0 = 2; loop0; loop0--) {
if (loop0 == 2) {
continue;
le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
- int li=gpu_unai.ilace_mask;
- int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
- int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
+ int li=gpu_unai.inn.ilace_mask;
+ int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.inn.ilace_mask+1):0);
+ int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.inn.ilace_mask+1):0):1);
for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
x3 += dx3, x4 += dx4,
}
// Setup packed Gouraud color for inner driver
- gpu_unai.gCol = gpuPackGouraudCol(r4, g4, b4);
+ gpu_unai.inn.gCol = gpuPackGouraudCol(r4, g4, b4);
if (xb > xmax) xb = xmax;
if ((xb - xa) > 0)
do
{
const PolyVertex* vptrs[3];
- if (polyUseTriangle(vbuf, cur_pass, vptrs) == false)
+ s32 x_off, y_off;
+ if (!polyUseTriangle(vbuf, cur_pass, vptrs, x_off, y_off))
continue;
s32 xa, xb, ya, yb;
s32 du4, dv4;
s32 dr4, dg4, db4;
- x0 = vptrs[0]->x; y0 = vptrs[0]->y;
- u0 = vptrs[0]->tex.u; v0 = vptrs[0]->tex.v;
- r0 = vptrs[0]->col.r; g0 = vptrs[0]->col.g; b0 = vptrs[0]->col.b;
- x1 = vptrs[1]->x; y1 = vptrs[1]->y;
- u1 = vptrs[1]->tex.u; v1 = vptrs[1]->tex.v;
- r1 = vptrs[1]->col.r; g1 = vptrs[1]->col.g; b1 = vptrs[1]->col.b;
- x2 = vptrs[2]->x; y2 = vptrs[2]->y;
- u2 = vptrs[2]->tex.u; v2 = vptrs[2]->tex.v;
- r2 = vptrs[2]->col.r; g2 = vptrs[2]->col.g; b2 = vptrs[2]->col.b;
+ x0 = vptrs[0]->x + x_off; y0 = vptrs[0]->y + y_off;
+ u0 = vptrs[0]->tex.u; v0 = vptrs[0]->tex.v;
+ r0 = vptrs[0]->col.r; g0 = vptrs[0]->col.g; b0 = vptrs[0]->col.b;
+ x1 = vptrs[1]->x + x_off; y1 = vptrs[1]->y + y_off;
+ u1 = vptrs[1]->tex.u; v1 = vptrs[1]->tex.v;
+ r1 = vptrs[1]->col.r; g1 = vptrs[1]->col.g; b1 = vptrs[1]->col.b;
+ x2 = vptrs[2]->x + x_off; y2 = vptrs[2]->y + y_off;
+ u2 = vptrs[2]->tex.u; v2 = vptrs[2]->tex.v;
+ r2 = vptrs[2]->col.r; g2 = vptrs[2]->col.g; b2 = vptrs[2]->col.b;
ya = y2 - y0;
yb = y2 - y1;
#endif
#endif
// Set u,v increments and packed Gouraud increment for inner driver
- gpu_unai.u_inc = du4;
- gpu_unai.v_inc = dv4;
- gpu_unai.gInc = gpuPackGouraudColInc(dr4, dg4, db4);
+ gpu_unai.inn.u_inc = du4;
+ gpu_unai.inn.v_inc = dv4;
+ gpu_unai.inn.gInc = gpuPackGouraudColInc(dr4, dg4, db4);
for (s32 loop0 = 2; loop0; loop0--) {
if (loop0 == 2) {
continue;
le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)];
- int li=gpu_unai.ilace_mask;
- int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
- int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
+ int li=gpu_unai.inn.ilace_mask;
+ int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.inn.ilace_mask+1):0);
+ int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.inn.ilace_mask+1):0):1);
for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH,
x3 += dx3, x4 += dx4,
}
// Set packed Gouraud color and u,v coords for inner driver
- gpu_unai.u = u4;
- gpu_unai.v = v4;
- gpu_unai.gCol = gpuPackGouraudCol(r4, g4, b4);
+ gpu_unai.inn.u = u4;
+ gpu_unai.inn.v = v4;
+ gpu_unai.inn.gCol = gpuPackGouraudCol(r4, g4, b4);
if (xb > xmax) xb = xmax;
if ((xb - xa) > 0)
///////////////////////////////////////////////////////////////////////////////
// GPU internal sprite drawing functions
-void gpuDrawS(PtrUnion packet, const PS gpuSpriteSpanDriver, s32 *w_out, s32 *h_out)
+void gpuDrawS(PtrUnion packet, const PS gpuSpriteDriver, s32 *w_out, s32 *h_out)
{
s32 x0, x1, y0, y1;
u32 u0, v0;
*w_out = x1;
*h_out = y1 - y0;
- gpu_unai.r5 = packet.U1[0] >> 3;
- gpu_unai.g5 = packet.U1[1] >> 3;
- gpu_unai.b5 = packet.U1[2] >> 3;
-
le16_t *Pixel = &gpu_unai.vram[FRAME_OFFSET(x0, y0)];
- const int li=gpu_unai.ilace_mask;
- const int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
- const int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
- unsigned int tmode = gpu_unai.TEXT_MODE >> 5;
- const u32 v0_mask = gpu_unai.TextureWindow[3];
- u8* pTxt_base = (u8*)gpu_unai.TBA;
-
- // Texture is accessed byte-wise, so adjust idx if 16bpp
- if (tmode == 3) u0 <<= 1;
-
- for (; y0<y1; ++y0) {
- u8* pTxt = pTxt_base + ((v0 & v0_mask) * 2048);
- if (!(y0&li) && (y0&pi)!=pif)
- gpuSpriteSpanDriver(Pixel, x1, pTxt, u0);
- Pixel += FRAME_WIDTH;
- v0++;
- }
-}
-
-#ifdef __arm__
-#include "gpu_arm.h"
-
-/* Notaz 4bit sprites optimization */
-void gpuDrawS16(PtrUnion packet, s32 *w_out, s32 *h_out)
-{
- s32 x0, y0;
- s32 u0, v0;
- s32 xmin, xmax;
- s32 ymin, ymax;
- u32 h = 16;
- //NOTE: Must 11-bit sign-extend the whole sum here, not just packet X/Y,
- // or sprites in 1st level of SkullMonkeys disappear when walking right.
- // This now matches behavior of Mednafen and PCSX Rearmed's gpu_neon:
- x0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[2]) + gpu_unai.DrawingOffset[0]);
- y0 = GPU_EXPANDSIGN(le16_to_s16(packet.U2[3]) + gpu_unai.DrawingOffset[1]);
-
- xmin = gpu_unai.DrawingArea[0]; xmax = gpu_unai.DrawingArea[2];
- ymin = gpu_unai.DrawingArea[1]; ymax = gpu_unai.DrawingArea[3];
- u0 = packet.U1[8];
- v0 = packet.U1[9];
-
- if (x0 > xmax - 16 || x0 < xmin ||
- ((u0 | v0) & 15) || !(gpu_unai.TextureWindow[2] & gpu_unai.TextureWindow[3] & 8)) {
- // send corner cases to general handler
- packet.U4[3] = u32_to_le32(0x00100010);
- gpuDrawS(packet, gpuSpriteSpanFn<0x20>, w_out, h_out);
- return;
- }
-
- if (y0 >= ymax || y0 <= ymin - 16)
- return;
- if (y0 < ymin) {
- h -= ymin - y0;
- v0 += ymin - y0;
- y0 = ymin;
- }
- else if (ymax - y0 < 16)
- h = ymax - y0;
- *w_out = 16;
- *h_out = h;
-
- draw_spr16_full(&gpu_unai.vram[FRAME_OFFSET(x0, y0)], &gpu_unai.TBA[FRAME_OFFSET(u0/4, v0)], gpu_unai.CBA, h);
+ gpu_unai.inn.r5 = packet.U1[0] >> 3;
+ gpu_unai.inn.g5 = packet.U1[1] >> 3;
+ gpu_unai.inn.b5 = packet.U1[2] >> 3;
+ gpu_unai.inn.u = u0;
+ gpu_unai.inn.v = v0;
+ gpu_unai.inn.y0 = y0;
+ gpu_unai.inn.y1 = y1;
+ gpuSpriteDriver(Pixel, x1, (u8 *)gpu_unai.inn.TBA, gpu_unai.inn);
}
-#endif // __arm__
-void gpuDrawT(PtrUnion packet, const PT gpuTileSpanDriver, s32 *w_out, s32 *h_out)
+void gpuDrawT(PtrUnion packet, const PT gpuTileDriver, s32 *w_out, s32 *h_out)
{
s32 x0, x1, y0, y1;
const u16 Data = GPU_RGB16(le32_to_u32(packet.U4[0]));
le16_t *Pixel = &gpu_unai.vram[FRAME_OFFSET(x0, y0)];
- const int li=gpu_unai.ilace_mask;
- const int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0);
- const int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1);
-
- for (; y0<y1; ++y0) {
- if (!(y0&li) && (y0&pi)!=pif)
- gpuTileSpanDriver(Pixel,x1,Data);
- Pixel += FRAME_WIDTH;
- }
+
+ gpu_unai.inn.y0 = y0;
+ gpu_unai.inn.y1 = y1;
+ gpuTileDriver(Pixel, Data, x1, gpu_unai.inn);
}
#endif /* __GPU_UNAI_GPU_RASTER_SPRITE_H__ */
#ifndef GPU_UNAI_H
#define GPU_UNAI_H
+#include <stdint.h>
#include "gpu.h"
// Header shared between both standalone gpu_unai (gpu.cpp) and new
u64 raw;
} gcol_t;
+#ifndef NDEBUG
+
typedef struct {
u32 v;
} le32_t;
u16 v;
} le16_t;
+#define LExRead(v_) (v_.v)
+
+#else
+
+typedef u32 le32_t;
+typedef u16 le16_t;
+#define LExRead(v) (v)
+
+#endif
+
static inline u32 le32_to_u32(le32_t le)
{
- return LE32TOH(le.v);
+ return LE32TOH(LExRead(le));
}
static inline s32 le32_to_s32(le32_t le)
{
- return (int32_t) LE32TOH(le.v);
+ return (int32_t) LE32TOH(LExRead(le));
}
static inline u32 le32_raw(le32_t le)
{
- return le.v;
+ return LExRead(le);
}
static inline le32_t u32_to_le32(u32 u)
{
- return (le32_t){ .v = HTOLE32(u) };
+ return (le32_t){ HTOLE32(u) };
}
static inline u16 le16_to_u16(le16_t le)
{
- return LE16TOH(le.v);
+ return LE16TOH(LExRead(le));
}
static inline s16 le16_to_s16(le16_t le)
{
- return (int16_t) LE16TOH(le.v);
+ return (int16_t) LE16TOH(LExRead(le));
}
static inline u16 le16_raw(le16_t le)
{
- return le.v;
+ return LExRead(le);
}
static inline le16_t u16_to_le16(u16 u)
{
- return (le16_t){ .v = HTOLE16(u) };
+ return (le16_t){ HTOLE16(u) };
}
union PtrUnion
// 'Unsafe' version of above that doesn't check for div-by-zero
#define GPU_FAST_DIV(rs, rt) ((signed)(rs) / (signed)(rt))
+// warning: gpu_arm.S asm uses this struct, update the asm if you change this
+struct gpu_unai_inner_t {
+ le16_t* TBA; // 00 Ptr to current texture in VRAM
+ le16_t* CBA; // 04 Ptr to current CLUT in VRAM
+
+ // 22.10 Fixed-pt texture coords, mask, scanline advance
+ // NOTE: U,V are no longer packed together into one u32, this proved to be
+ // too imprecise, leading to pixel dropouts. Example: NFS3's skybox.
+ u32 u, v; // 08 not fractional for sprites
+ u32 u_msk, v_msk; // 10 always 22.10
+ union {
+ struct {
+ s32 u_inc, v_inc; // 18 poly uv increment, 22.10
+ };
+ struct {
+ s32 y0, y1; // 18 sprite y range
+ };
+ };
+
+ // Color for flat-shaded, texture-blended prims
+ u8 r5, g5, b5, pad5; // 20 5-bit light for undithered prims
+ u8 r8, g8, b8, pad8; // 24 8-bit light for dithered prims
+
+ // Color for Gouraud-shaded prims
+ // Fixed-pt 8.8 rgb triplet
+ // Packed fixed-pt 8.3:8.3:8.2 rgb triplet
+ // layout: ccccccccXXXXXXXX for c in [r, g, b]
+ // ^ bit 16
+ gcol_t gCol; // 28
+ gcol_t gInc; // 30 Increment along scanline for gCol
+
+ // Color for flat-shaded, untextured prims
+ u16 PixelData; // 38 bgr555 color for untextured flat-shaded polys
+
+ u8 blit_mask; // Determines what pixels to skip when rendering.
+ // Only useful on low-resolution devices using
+ // a simple pixel-dropping downscaler for PS1
+ // high-res modes. See 'pixel_skip' option.
+
+ u8 ilace_mask; // Determines what lines to skip when rendering.
+ // Normally 0 when PS1 240 vertical res is in
+ // use and ilace_force is 0. When running in
+ // PS1 480 vertical res on a low-resolution
+ // device (320x240), will usually be set to 1
+ // so odd lines are not rendered. (Unless future
+ // full-screen scaling option is in use ..TODO)
+};
+
struct gpu_unai_t {
u32 GPU_GP1;
GPUPacket PacketBuffer;
s16 DrawingOffset[2]; // [0] : Drawing offset X (signed)
// [1] : Drawing offset Y (signed)
- le16_t* TBA; // Ptr to current texture in VRAM
- le16_t* CBA; // Ptr to current CLUT in VRAM
-
////////////////////////////////////////////////////////////////////////////
// Inner Loop parameters
- // 22.10 Fixed-pt texture coords, mask, scanline advance
- // NOTE: U,V are no longer packed together into one u32, this proved to be
- // too imprecise, leading to pixel dropouts. Example: NFS3's skybox.
- u32 u, v;
- u32 u_msk, v_msk;
- s32 u_inc, v_inc;
-
- // Color for Gouraud-shaded prims
- // Fixed-pt 8.8 rgb triplet
- // Packed fixed-pt 8.3:8.3:8.2 rgb triplet
- // layout: ccccccccXXXXXXXX for c in [r, g, b]
- // ^ bit 16
- gcol_t gCol;
- gcol_t gInc; // Increment along scanline for gCol
-
- // Color for flat-shaded, texture-blended prims
- u8 r5, g5, b5; // 5-bit light for undithered prims
- u8 r8, g8, b8; // 8-bit light for dithered prims
-
- // Color for flat-shaded, untextured prims
- u16 PixelData; // bgr555 color for untextured flat-shaded polys
+ __attribute__((aligned(32)))
+ gpu_unai_inner_t inn;
// End of inner Loop parameters
////////////////////////////////////////////////////////////////////////////
-
- u8 blit_mask; // Determines what pixels to skip when rendering.
- // Only useful on low-resolution devices using
- // a simple pixel-dropping downscaler for PS1
- // high-res modes. See 'pixel_skip' option.
-
- u8 ilace_mask; // Determines what lines to skip when rendering.
- // Normally 0 when PS1 240 vertical res is in
- // use and ilace_force is 0. When running in
- // PS1 480 vertical res on a low-resolution
- // device (320x240), will usually be set to 1
- // so odd lines are not rendered. (Unless future
- // full-screen scaling option is in use ..TODO)
-
bool prog_ilace_flag; // Tracks successive frames for 'prog_ilace' option
u8 BLEND_MODE;
u32 DitherMatrix[64]; // Matrix of dither coefficients
};
-static gpu_unai_t gpu_unai;
+static __attribute__((aligned(32))) gpu_unai_t gpu_unai;
// Global config that frontend can alter.. Values are read in GPU_init().
// TODO: if frontend menu modifies a setting, add a function that can notify
return gpu_unai.config.dithering;
}
-// For now, this is just for development/experimentation purposes..
-// If modified to return true, it will allow ignoring the status register
-// bit 9 setting (dither enable). It will still restrict dithering only
-// to Gouraud-shaded or texture-blended polys.
static inline bool ForcedDitheringEnabled()
{
- return false;
+ return gpu_unai.config.force_dithering;
}
static inline bool ProgressiveInterlaceEnabled()
#include <stdlib.h>
#include <string.h>
#include "../gpulib/gpu.h"
+#include "old/if.h"
#ifdef THREAD_RENDERING
#include "../gpulib/gpulib_thread_if.h"
/////////////////////////////////////////////////////////////////////////////
+#ifndef GPU_UNAI_NO_OLD
+#define IS_OLD_RENDERER() gpu_unai.config.old_renderer
+#else
+#define IS_OLD_RENDERER() false
+#endif
+
#define DOWNSCALE_VRAM_SIZE (1024 * 512 * 2 * 2 + 4096)
INLINE void scale_640_to_320(le16_t *dest, const le16_t *src, bool isRGB24) {
lines = *h;
// Ensure start at a non-skipped line
- while (*y & gpu_unai.ilace_mask) ++*y;
+ while (*y & gpu_unai.inn.ilace_mask) ++*y;
}
unsigned int fb_offset_src = (*y * dstride + *x) & fb_mask;
gpu_unai.downscale_vram = (le16_t*)gpu.mmap(DOWNSCALE_VRAM_SIZE);
- if (gpu_unai.downscale_vram == NULL) {
+ if (gpu_unai.downscale_vram == NULL || gpu_unai.downscale_vram == (le16_t *)(intptr_t)-1) {
fprintf(stderr, "failed to map downscale buffer\n");
+ gpu_unai.downscale_vram = NULL;
gpu.get_downscale_buffer = NULL;
}
else {
//senquack - new vars must be updated whenever texture window is changed:
// (used for polygon-drawing in gpu_inner.h, gpu_raster_polygon.h)
const u32 fb = FIXED_BITS; // # of fractional fixed-pt bits of u4/v4
- gpu_unai.u_msk = (((u32)gpu_unai.TextureWindow[2]) << fb) | ((1 << fb) - 1);
- gpu_unai.v_msk = (((u32)gpu_unai.TextureWindow[3]) << fb) | ((1 << fb) - 1);
+ gpu_unai.inn.u_msk = (((u32)gpu_unai.TextureWindow[2]) << fb) | ((1 << fb) - 1);
+ gpu_unai.inn.v_msk = (((u32)gpu_unai.TextureWindow[3]) << fb) | ((1 << fb) - 1);
// Configuration options
gpu_unai.config = gpu_unai_config_ext;
// sprite-span functions, perhaps unnecessarily. No Abe Oddysey hack was
// present in latest PCSX4ALL sources we were using.
//gpu_unai.config.enableAbbeyHack = gpu_unai_config_ext.abe_hack;
- gpu_unai.ilace_mask = gpu_unai.config.ilace_force;
+ gpu_unai.inn.ilace_mask = gpu_unai.config.ilace_force;
-#ifdef GPU_UNAI_USE_INT_DIV_MULTINV
+#if defined(GPU_UNAI_USE_INT_DIV_MULTINV) || (!defined(GPU_UNAI_NO_OLD) && !defined(GPU_UNAI_USE_FLOATMATH))
// s_invTable
for(int i=1;i<=(1<<TABLE_BITS);++i)
{
void renderer_notify_res_change(void)
{
- if (PixelSkipEnabled()) {
- // Set blit_mask for high horizontal resolutions. This allows skipping
- // rendering pixels that would never get displayed on low-resolution
- // platforms that use simple pixel-dropping scaler.
-
- switch (gpu.screen.hres)
- {
- case 512: gpu_unai.blit_mask = 0xa4; break; // GPU_BlitWWSWWSWS
- case 640: gpu_unai.blit_mask = 0xaa; break; // GPU_BlitWS
- default: gpu_unai.blit_mask = 0; break;
- }
- } else {
- gpu_unai.blit_mask = 0;
- }
+ gpu_unai.inn.ilace_mask = gpu_unai.config.ilace_force;
- if (LineSkipEnabled()) {
- // Set rendering line-skip (only render every other line in high-res
- // 480 vertical mode, or, optionally, force it for all video modes)
-
- if (gpu.screen.vres == 480) {
- if (gpu_unai.config.ilace_force) {
- gpu_unai.ilace_mask = 3; // Only need 1/4 of lines
- } else {
- gpu_unai.ilace_mask = 1; // Only need 1/2 of lines
- }
- } else {
- // Vert resolution changed from 480 to lower one
- gpu_unai.ilace_mask = gpu_unai.config.ilace_force;
- }
- } else {
- gpu_unai.ilace_mask = 0;
+#ifndef HAVE_PRE_ARMV7 /* XXX */
+ if (gpu_unai.config.scale_hires)
+#endif
+ {
+ gpu_unai.inn.ilace_mask |= !!(gpu.status & PSX_GPU_STATUS_INTERLACE);
}
/*
// Inner loop vars must be updated whenever texture window is changed:
const u32 fb = FIXED_BITS; // # of fractional fixed-pt bits of u4/v4
- gpu_unai.u_msk = (((u32)gpu_unai.TextureWindow[2]) << fb) | ((1 << fb) - 1);
- gpu_unai.v_msk = (((u32)gpu_unai.TextureWindow[3]) << fb) | ((1 << fb) - 1);
+ gpu_unai.inn.u_msk = (((u32)gpu_unai.TextureWindow[2]) << fb) | ((1 << fb) - 1);
+ gpu_unai.inn.v_msk = (((u32)gpu_unai.TextureWindow[3]) << fb) | ((1 << fb) - 1);
gpuSetTexture(gpu_unai.GPU_GP1);
}
case 5: {
// GP0(E5h) - Set Drawing Offset (X,Y)
- gpu_unai.DrawingOffset[0] = ((s32)cmd_word<<(32-11))>>(32-11);
- gpu_unai.DrawingOffset[1] = ((s32)cmd_word<<(32-22))>>(32-11);
+ gpu_unai.DrawingOffset[0] = GPU_EXPANDSIGN(cmd_word);
+ gpu_unai.DrawingOffset[1] = GPU_EXPANDSIGN(cmd_word >> 11);
} break;
case 6: {
#endif
#include "../gpulib/gpu_timing.h"
+
+// Strip lower 3 bits of each color and determine if lighting should be used:
+static inline bool need_lighting(u32 rgb_raw)
+{
+ return (rgb_raw & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080);
+}
+
+static inline void textured_sprite(int &cpu_cycles_sum, int &cpu_cycles)
+{
+ u32 PRIM = le32_to_u32(gpu_unai.PacketBuffer.U4[0]) >> 24;
+ gpuSetCLUT(le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
+ u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1);
+ s32 w = 0, h = 0;
+
+ //senquack - Only color 808080h-878787h allows skipping lighting calculation:
+ // This fixes Silent Hill running animation on loading screens:
+ // (On PSX, color values 0x00-0x7F darken the source texture's color,
+ // 0x81-FF lighten textures (ultimately clamped to 0x1F),
+ // 0x80 leaves source texture color unchanged, HOWEVER,
+ // gpu_unai uses a simple lighting LUT whereby only the upper
+ // 5 bits of an 8-bit color are used, so 0x80-0x87 all behave as
+ // 0x80.
+ //
+ // NOTE: I've changed all textured sprite draw commands here and
+ // elsewhere to use proper behavior, but left poly commands
+ // alone, I don't want to slow rendering down too much. (TODO)
+ if (need_lighting(le32_raw(gpu_unai.PacketBuffer.U4[0])))
+ driver_idx |= Lighting;
+ PS driver = gpuSpriteDrivers[driver_idx];
+ PtrUnion packet = { .ptr = (void*)&gpu_unai.PacketBuffer };
+ gpuDrawS(packet, driver, &w, &h);
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h));
+}
+
extern const unsigned char cmd_lengths[256];
int do_cmd_list(u32 *list_, int list_len,
le32_t *list_start = list;
le32_t *list_end = list + list_len;
- //TODO: set ilace_mask when resolution changes instead of every time,
- // eliminate #ifdef below.
- gpu_unai.ilace_mask = gpu_unai.config.ilace_force;
-
-#ifdef HAVE_PRE_ARMV7 /* XXX */
- gpu_unai.ilace_mask |= !!(gpu.status & PSX_GPU_STATUS_INTERLACE);
-#endif
- if (gpu_unai.config.scale_hires) {
- gpu_unai.ilace_mask |= !!(gpu.status & PSX_GPU_STATUS_INTERLACE);
- }
+ if (IS_OLD_RENDERER())
+ return oldunai_do_cmd_list(list_, list_len, cycles_sum_out, cycles_last, last_cmd);
for (; list < list_end; list += 1 + len)
{
case 0x22:
case 0x23: { // Monochrome 3-pt poly
PP driver = gpuPolySpanDrivers[
- (gpu_unai.blit_mask?1024:0) |
+ //(gpu_unai.blit_mask?1024:0) |
Blending_Mode |
gpu_unai.Masking | Blending | gpu_unai.PixelMSB
];
gpuSetTexture(le32_to_u32(gpu_unai.PacketBuffer.U4[4]) >> 16);
u32 driver_idx =
- (gpu_unai.blit_mask?1024:0) |
+ //(gpu_unai.blit_mask?1024:0) |
Dithering |
Blending_Mode | gpu_unai.TEXT_MODE |
gpu_unai.Masking | Blending | gpu_unai.PixelMSB;
case 0x2A:
case 0x2B: { // Monochrome 4-pt poly
PP driver = gpuPolySpanDrivers[
- (gpu_unai.blit_mask?1024:0) |
+ //(gpu_unai.blit_mask?1024:0) |
Blending_Mode |
gpu_unai.Masking | Blending | gpu_unai.PixelMSB
];
case 0x2D:
case 0x2E:
case 0x2F: { // Textured 4-pt poly
- gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
+ u32 simplified_count;
gpuSetTexture(le32_to_u32(gpu_unai.PacketBuffer.U4[4]) >> 16);
+ if ((simplified_count = prim_try_simplify_quad_t(gpu_unai.PacketBuffer.U4,
+ gpu_unai.PacketBuffer.U4)))
+ {
+ for (i = 0;; ) {
+ textured_sprite(cpu_cycles_sum, cpu_cycles);
+ if (++i >= simplified_count)
+ break;
+ memcpy(&gpu_unai.PacketBuffer.U4[0], &gpu_unai.PacketBuffer.U4[i * 4], 16);
+ }
+ break;
+ }
+ gpuSetCLUT(le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
u32 driver_idx =
- (gpu_unai.blit_mask?1024:0) |
+ //(gpu_unai.blit_mask?1024:0) |
Dithering |
Blending_Mode | gpu_unai.TEXT_MODE |
gpu_unai.Masking | Blending | gpu_unai.PixelMSB;
// this is an untextured poly, so CF_LIGHT (texture blend)
// shouldn't apply. Until the original array of template
// instantiation ptrs is fixed, we're stuck with this. (TODO)
+ u8 gouraud = 129;
+ u32 xor_ = 0, rgb0 = le32_raw(gpu_unai.PacketBuffer.U4[0]);
+ for (i = 1; i < 3; i++)
+ xor_ |= rgb0 ^ le32_raw(gpu_unai.PacketBuffer.U4[i * 2]);
+ if ((xor_ & HTOLE32(0xf8f8f8)) == 0)
+ gouraud = 0;
PP driver = gpuPolySpanDrivers[
- (gpu_unai.blit_mask?1024:0) |
+ //(gpu_unai.blit_mask?1024:0) |
Dithering |
Blending_Mode |
- gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB
+ gpu_unai.Masking | Blending | gouraud | gpu_unai.PixelMSB
];
- gpuDrawPolyG(packet, driver, false);
+ if (gouraud)
+ gpuDrawPolyG(packet, driver, false);
+ else
+ gpuDrawPolyF(packet, driver, false, POLYTYPE_G);
gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_g());
} break;
case 0x37: { // Gouraud-shaded, textured 3-pt poly
gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
gpuSetTexture (le32_to_u32(gpu_unai.PacketBuffer.U4[5]) >> 16);
+ u8 lighting = Lighting;
+ u8 gouraud = lighting ? (1<<7) : 0;
+ if (lighting) {
+ u32 xor_ = 0, rgb0 = le32_raw(gpu_unai.PacketBuffer.U4[0]);
+ for (i = 1; i < 3; i++)
+ xor_ |= rgb0 ^ le32_raw(gpu_unai.PacketBuffer.U4[i * 3]);
+ if ((xor_ & HTOLE32(0xf8f8f8)) == 0) {
+ gouraud = 0;
+ if (!need_lighting(rgb0))
+ lighting = 0;
+ }
+ }
PP driver = gpuPolySpanDrivers[
- (gpu_unai.blit_mask?1024:0) |
+ //(gpu_unai.blit_mask?1024:0) |
Dithering |
Blending_Mode | gpu_unai.TEXT_MODE |
- gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB
+ gpu_unai.Masking | Blending | gouraud | lighting | gpu_unai.PixelMSB
];
- gpuDrawPolyGT(packet, driver, false);
+ if (gouraud)
+ gpuDrawPolyGT(packet, driver, false); // is_quad = true
+ else
+ gpuDrawPolyFT(packet, driver, false, POLYTYPE_GT);
gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_gt());
} break;
case 0x3A:
case 0x3B: { // Gouraud-shaded 4-pt poly
// See notes regarding '129' for 0x30..0x33 further above -senquack
+ u8 gouraud = 129;
+ u32 xor_ = 0, rgb0 = le32_raw(gpu_unai.PacketBuffer.U4[0]);
+ for (i = 1; i < 4; i++)
+ xor_ |= rgb0 ^ le32_raw(gpu_unai.PacketBuffer.U4[i * 2]);
+ if ((xor_ & HTOLE32(0xf8f8f8)) == 0)
+ gouraud = 0;
PP driver = gpuPolySpanDrivers[
- (gpu_unai.blit_mask?1024:0) |
+ //(gpu_unai.blit_mask?1024:0) |
Dithering |
Blending_Mode |
- gpu_unai.Masking | Blending | 129 | gpu_unai.PixelMSB
+ gpu_unai.Masking | Blending | gouraud | gpu_unai.PixelMSB
];
- gpuDrawPolyG(packet, driver, true); // is_quad = true
+ if (gouraud)
+ gpuDrawPolyG(packet, driver, true); // is_quad = true
+ else
+ gpuDrawPolyF(packet, driver, true, POLYTYPE_G);
gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_g());
} break;
case 0x3D:
case 0x3E:
case 0x3F: { // Gouraud-shaded, textured 4-pt poly
- gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
- gpuSetTexture (le32_to_u32(gpu_unai.PacketBuffer.U4[5]) >> 16);
+ u32 simplified_count;
+ gpuSetTexture(le32_to_u32(gpu_unai.PacketBuffer.U4[5]) >> 16);
+ if ((simplified_count = prim_try_simplify_quad_gt(gpu_unai.PacketBuffer.U4,
+ gpu_unai.PacketBuffer.U4)))
+ {
+ for (i = 0;; ) {
+ textured_sprite(cpu_cycles_sum, cpu_cycles);
+ if (++i >= simplified_count)
+ break;
+ memcpy(&gpu_unai.PacketBuffer.U4[0], &gpu_unai.PacketBuffer.U4[i * 4], 16);
+ }
+ break;
+ }
+ gpuSetCLUT(le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
+ u8 lighting = Lighting;
+ u8 gouraud = lighting ? (1<<7) : 0;
+ if (lighting) {
+ u32 xor_ = 0, rgb0 = le32_raw(gpu_unai.PacketBuffer.U4[0]);
+ for (i = 1; i < 4; i++)
+ xor_ |= rgb0 ^ le32_raw(gpu_unai.PacketBuffer.U4[i * 3]);
+ if ((xor_ & HTOLE32(0xf8f8f8)) == 0) {
+ gouraud = 0;
+ if (!need_lighting(rgb0))
+ lighting = 0;
+ }
+ }
PP driver = gpuPolySpanDrivers[
- (gpu_unai.blit_mask?1024:0) |
+ //(gpu_unai.blit_mask?1024:0) |
Dithering |
Blending_Mode | gpu_unai.TEXT_MODE |
- gpu_unai.Masking | Blending | ((Lighting)?129:0) | gpu_unai.PixelMSB
+ gpu_unai.Masking | Blending | gouraud | lighting | gpu_unai.PixelMSB
];
- gpuDrawPolyGT(packet, driver, true); // is_quad = true
+ if (gouraud)
+ gpuDrawPolyGT(packet, driver, true); // is_quad = true
+ else
+ gpuDrawPolyFT(packet, driver, true, POLYTYPE_GT);
gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_gt());
} break;
case 0x61:
case 0x62:
case 0x63: { // Monochrome rectangle (variable size)
- PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
+ PT driver = gpuTileDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
s32 w = 0, h = 0;
gpuDrawT(packet, driver, &w, &h);
gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h));
case 0x64:
case 0x65:
case 0x66:
- case 0x67: { // Textured rectangle (variable size)
- gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
- u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1);
- s32 w = 0, h = 0;
-
- //senquack - Only color 808080h-878787h allows skipping lighting calculation:
- // This fixes Silent Hill running animation on loading screens:
- // (On PSX, color values 0x00-0x7F darken the source texture's color,
- // 0x81-FF lighten textures (ultimately clamped to 0x1F),
- // 0x80 leaves source texture color unchanged, HOWEVER,
- // gpu_unai uses a simple lighting LUT whereby only the upper
- // 5 bits of an 8-bit color are used, so 0x80-0x87 all behave as
- // 0x80.
- //
- // NOTE: I've changed all textured sprite draw commands here and
- // elsewhere to use proper behavior, but left poly commands
- // alone, I don't want to slow rendering down too much. (TODO)
- //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))
- // Strip lower 3 bits of each color and determine if lighting should be used:
- if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080))
- driver_idx |= Lighting;
- PS driver = gpuSpriteSpanDrivers[driver_idx];
- gpuDrawS(packet, driver, &w, &h);
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h));
- } break;
+ case 0x67: // Textured rectangle (variable size)
+ textured_sprite(cpu_cycles_sum, cpu_cycles);
+ break;
case 0x68:
case 0x69:
case 0x6A:
case 0x6B: { // Monochrome rectangle (1x1 dot)
gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00010001);
- PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
+ PT driver = gpuTileDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
s32 w = 0, h = 0;
gpuDrawT(packet, driver, &w, &h);
gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(1, 1));
case 0x72:
case 0x73: { // Monochrome rectangle (8x8)
gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00080008);
- PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
+ PT driver = gpuTileDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
s32 w = 0, h = 0;
gpuDrawT(packet, driver, &w, &h);
gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h));
case 0x76:
case 0x77: { // Textured rectangle (8x8)
gpu_unai.PacketBuffer.U4[3] = u32_to_le32(0x00080008);
- gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
- u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1);
- s32 w = 0, h = 0;
-
- //senquack - Only color 808080h-878787h allows skipping lighting calculation:
- //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))
- // Strip lower 3 bits of each color and determine if lighting should be used:
- if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080))
- driver_idx |= Lighting;
- PS driver = gpuSpriteSpanDrivers[driver_idx];
- gpuDrawS(packet, driver, &w, &h);
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h));
+ textured_sprite(cpu_cycles_sum, cpu_cycles);
} break;
case 0x78:
case 0x7A:
case 0x7B: { // Monochrome rectangle (16x16)
gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00100010);
- PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
+ PT driver = gpuTileDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1];
s32 w = 0, h = 0;
gpuDrawT(packet, driver, &w, &h);
gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h));
case 0x7C:
case 0x7D:
-#ifdef __arm__
- if ((gpu_unai.GPU_GP1 & 0x180) == 0 && (gpu_unai.Masking | gpu_unai.PixelMSB) == 0)
- {
- s32 w = 0, h = 0;
- gpuSetCLUT(le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
- gpuDrawS16(packet, &w, &h);
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h));
- break;
- }
- // fallthrough
-#endif
case 0x7E:
case 0x7F: { // Textured rectangle (16x16)
gpu_unai.PacketBuffer.U4[3] = u32_to_le32(0x00100010);
- gpuSetCLUT (le32_to_u32(gpu_unai.PacketBuffer.U4[2]) >> 16);
- u32 driver_idx = Blending_Mode | gpu_unai.TEXT_MODE | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>1);
- s32 w = 0, h = 0;
- //senquack - Only color 808080h-878787h allows skipping lighting calculation:
- //if ((gpu_unai.PacketBuffer.U1[0]>0x5F) && (gpu_unai.PacketBuffer.U1[1]>0x5F) && (gpu_unai.PacketBuffer.U1[2]>0x5F))
- // Strip lower 3 bits of each color and determine if lighting should be used:
- if ((le32_raw(gpu_unai.PacketBuffer.U4[0]) & HTOLE32(0xF8F8F8)) != HTOLE32(0x808080))
- driver_idx |= Lighting;
- PS driver = gpuSpriteSpanDrivers[driver_idx];
- gpuDrawS(packet, driver, &w, &h);
- gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h));
+ textured_sprite(cpu_cycles_sum, cpu_cycles);
} break;
#ifdef TEST
void renderer_sync_ecmds(u32 *ecmds)
{
- int dummy;
- do_cmd_list(&ecmds[1], 6, &dummy, &dummy, &dummy);
+ if (!IS_OLD_RENDERER()) {
+ int dummy;
+ do_cmd_list(&ecmds[1], 6, &dummy, &dummy, &dummy);
+ }
+ else
+ oldunai_renderer_sync_ecmds(ecmds);
}
void renderer_update_caches(int x, int y, int w, int h, int state_changed)
void renderer_set_interlace(int enable, int is_odd)
{
+ renderer_notify_res_change();
}
#include "../../frontend/plugin_lib.h"
void renderer_set_config(const struct rearmed_cbs *cbs)
{
gpu_unai.vram = (le16_t *)gpu.vram;
+ gpu_unai.config.old_renderer = cbs->gpu_unai.old_renderer;
gpu_unai.config.ilace_force = cbs->gpu_unai.ilace_force;
- gpu_unai.config.pixel_skip = cbs->gpu_unai.pixel_skip;
gpu_unai.config.lighting = cbs->gpu_unai.lighting;
gpu_unai.config.fast_lighting = cbs->gpu_unai.fast_lighting;
gpu_unai.config.blending = cbs->gpu_unai.blending;
- gpu_unai.config.dithering = cbs->gpu_unai.dithering;
gpu_unai.config.scale_hires = cbs->gpu_unai.scale_hires;
+ gpu_unai.config.dithering = cbs->dithering != 0;
+ gpu_unai.config.force_dithering = cbs->dithering >> 1;
gpu.state.downscale_enable = gpu_unai.config.scale_hires;
if (gpu_unai.config.scale_hires) {
} else {
unmap_downscale_buffer();
}
+ oldunai_renderer_set_config(cbs);
}
void renderer_sync(void)
cbs->pl_vout_set_mode(w0, h1, w0, h1, isRGB24 ? 24 : 16);
}
+#error out of date
cbs->pl_vout_flip(base, 1024, isRGB24, w0, h1);
}
#define fixed_TWO ((fixed)2<<FIXED_BITS)
#define fixed_HALF ((fixed)((1<<FIXED_BITS)>>1))
-// big precision inverse table.
-s32 s_invTable[(1<<TABLE_BITS)];
-
INLINE fixed i2x(const int _x) { return ((_x)<<FIXED_BITS); }
INLINE fixed x2i(const fixed _x) { return ((_x)>>FIXED_BITS); }
}
*/
+#ifdef GPU_UNAI_USE_FLOATMATH
+
+#define inv_type float
+
+INLINE void xInv (const fixed _b, float & factor_, float & shift_)
+{
+ factor_ = 1.0f / _b;
+ shift_ = 0.0f; // not used
+}
+
+INLINE fixed xInvMulx (const fixed _a, const float fact, const float shift)
+{
+ return (fixed)((_a << FIXED_BITS) * fact);
+}
+
+INLINE fixed xLoDivx (const fixed _a, const fixed _b)
+{
+ return (fixed)((_a << FIXED_BITS) / (float)_b);
+}
+
+#else
+
+#define inv_type s32
+
#ifdef HAVE_ARMV5
INLINE u32 Log2(u32 x) { u32 res; asm("clz %0,%1" : "=r" (res) : "r" (x)); return 32-res; }
#else
INLINE u32 Log2(u32 x) { u32 i = 0; for ( ; x > 0; ++i, x >>= 1); return i - 1; }
#endif
+// big precision inverse table.
+extern s32 s_invTable[(1<<TABLE_BITS)];
+
#ifdef GPU_TABLE_10_BITS
INLINE void xInv (const fixed _b, s32& iFactor_, s32& iShift_)
{
return xInvMulx(_a, iFact, iShift);
}
+#endif // GPU_UNAI_USE_FLOATMATH
+
///////////////////////////////////////////////////////////////////////////
template<typename T>
INLINE T Min2 (const T _a, const T _b) { return (_a<_b)?_a:_b; }
w0 = PacketBuffer.S2[4] & 0x3ff;
h0 = PacketBuffer.S2[5] & 0x3ff;
+ x0 &= ~0xF;
+ w0 = ((w0 + 0xF) & ~0xF);
+
w0 += x0;
if (x0 < 0) x0 = 0;
if (w0 > FRAME_WIDTH) w0 = FRAME_WIDTH;
h0 -= y0;
if (h0 <= 0) return;
- if (x0&1)
- {
- u16* pixel = (u16*)GPU_FrameBuffer + FRAME_OFFSET(x0, y0);
- u16 rgb = GPU_RGB16(PacketBuffer.S4[0]);
- y0 = FRAME_WIDTH - w0;
- do {
- x0=w0;
- do { *pixel++ = rgb; } while (--x0);
- pixel += y0;
- } while (--h0);
- }
- else
{
u32* pixel = (u32*)(void*)GPU_FrameBuffer + ((FRAME_OFFSET(x0, y0))>>1);
u32 rgb = GPU_RGB16(PacketBuffer.S4[0]);
rgb |= (rgb<<16);
- if (w0&1)
- {
- y0 = (FRAME_WIDTH - w0 +1)>>1;
- w0>>=1;
- do {
- x0=w0;
- do { *pixel++ = rgb; } while (--x0);
- *((u16*)pixel) = (u16)rgb;
- pixel += y0;
- } while (--h0);
- }
- else
{
y0 = (FRAME_WIDTH - w0)>>1;
- w0>>=1;
+ w0>>=3;
do {
x0=w0;
- do { *pixel++ = rgb; } while (--x0);
+ do {
+ pixel[0] = rgb;
+ pixel[1] = rgb;
+ pixel[2] = rgb;
+ pixel[3] = rgb;
+ pixel += 4;
+ } while (--x0);
pixel += y0;
} while (--h0);
}
du4 = (u2 - u1) * ya - (u2 - u0) * yb;
dv4 = (v2 - v1) * ya - (v2 - v0) * yb;
- s32 iF,iS;
+ inv_type iF,iS;
xInv( dx, iF, iS);
du4 = xInvMulx( du4, iF, iS);
dv4 = xInvMulx( dv4, iF, iS);
dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
db4 = (b2 - b1) * ya - (b2 - b0) * yb;
- s32 iF,iS;
+ inv_type iF,iS;
xInv( dx, iF, iS);
dr4 = xInvMulx( dr4, iF, iS);
dg4 = xInvMulx( dg4, iF, iS);
dg4 = (g2 - g1) * ya - (g2 - g0) * yb;
db4 = (b2 - b1) * ya - (b2 - b0) * yb;
- s32 iF,iS;
+ inv_type iF,iS;
xInv( dx, iF, iS);
du4 = xInvMulx( du4, iF, iS);
}
#ifdef __arm__
-#include "gpu_arm.h"
+#include "../gpu_arm.h"
void gpuDrawS16(void)
{
else if (ymax - y0 < 16)
h = ymax - y0;
- draw_spr16_full(&GPU_FrameBuffer[FRAME_OFFSET(x0, y0)], &TBA[FRAME_OFFSET(u0/4, v0)], CBA, h);
+ sprite_4bpp_x16_asm(&GPU_FrameBuffer[FRAME_OFFSET(x0, y0)], &TBA[FRAME_OFFSET(u0/4, v0)], CBA, h);
}
#endif // __arm__
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include "../gpulib/gpu.h"
+#include "../../gpulib/gpu.h"
#include "arm_features.h"
+#include "if.h"
#define u8 uint8_t
#define s8 int8_t
static bool FrameToRead = false; /* load image in progress */
static bool FrameToWrite = false; /* store image in progress */
-static bool enableAbbeyHack = false; /* Abe's Odyssey hack */
+//static bool enableAbbeyHack = false; /* Abe's Odyssey hack */
+#define enableAbbeyHack false
static u8 BLEND_MODE;
static u8 TEXT_MODE;
/////////////////////////////////////////////////////////////////////////////
-int renderer_init(void)
+void oldunai_renderer_init(void)
{
GPU_FrameBuffer = (u16 *)gpu.vram;
+#if 0 // shared with "new" unai
// s_invTable
for(int i=1;i<=(1<<TABLE_BITS);++i)
{
#endif
s_invTable[i-1]=s32(v);
}
-
- return 0;
-}
-
-void renderer_finish(void)
-{
-}
-
-void renderer_notify_res_change(void)
-{
-}
-
-void renderer_notify_scanout_change(int x, int y)
-{
+#endif
}
extern const unsigned char cmd_lengths[256];
-int do_cmd_list(uint32_t *list, int list_len,
+int oldunai_do_cmd_list(uint32_t *list, int list_len,
int *cycles_sum_out, int *cycles_last, int *last_cmd)
{
unsigned int cmd = 0, len, i;
- unsigned int *list_start = list;
- unsigned int *list_end = list + list_len;
+ uint32_t *list_start = list;
+ uint32_t *list_end = list + list_len;
linesInterlace = force_interlace;
#ifdef HAVE_PRE_ARMV7 /* XXX */
return list - list_start;
}
-void renderer_sync_ecmds(uint32_t *ecmds)
+void oldunai_renderer_sync_ecmds(uint32_t *ecmds)
{
int dummy;
do_cmd_list(&ecmds[1], 6, &dummy, &dummy, &dummy);
}
-void renderer_update_caches(int x, int y, int w, int h, int state_changed)
-{
-}
-
-void renderer_flush_queues(void)
-{
-}
-
-void renderer_set_interlace(int enable, int is_odd)
-{
-}
-
#ifndef TEST
-#include "../../frontend/plugin_lib.h"
+#include "../../../frontend/plugin_lib.h"
-void renderer_set_config(const struct rearmed_cbs *cbs)
+void oldunai_renderer_set_config(const struct rearmed_cbs *cbs)
{
- force_interlace = cbs->gpu_unai_old.lineskip;
- enableAbbeyHack = cbs->gpu_unai_old.abe_hack;
- light = !cbs->gpu_unai_old.no_light;
- blend = !cbs->gpu_unai_old.no_blend;
+ force_interlace = cbs->gpu_unai.ilace_force;
+ //enableAbbeyHack = cbs->gpu_unai_old.abe_hack;
+ light = cbs->gpu_unai.lighting;
+ blend = cbs->gpu_unai.blending;
GPU_FrameBuffer = (u16 *)gpu.vram;
}
--- /dev/null
+#ifndef GPU_UNAI_NO_OLD
+
+struct rearmed_cbs;
+
+void oldunai_renderer_init(void);
+int oldunai_do_cmd_list(uint32_t *list, int list_len,
+ int *cycles_sum_out, int *cycles_last, int *last_cmd);
+void oldunai_renderer_sync_ecmds(uint32_t *ecmds);
+void oldunai_renderer_set_config(const struct rearmed_cbs *cbs);
+
+#else
+
+#define oldunai_renderer_init()
+#define oldunai_do_cmd_list(...) 0
+#define oldunai_renderer_sync_ecmds(x)
+#define oldunai_renderer_set_config(x)
+
+#endif
+++ /dev/null
-CFLAGS += -ggdb -Wall -O3 -ffast-math
-CFLAGS += -DREARMED
-CFLAGS += -I../../include
-
-include ../../config.mak
-
-SRC_STANDALONE += gpu.cpp
-SRC_GPULIB += gpulib_if.cpp
-
-ifeq "$(ARCH)" "arm"
-SRC += gpu_arm.s
-endif
-
-#BIN_STANDALONE = gpuPCSX4ALL.so
-BIN_GPULIB = gpu_unai_old.so
-include ../gpulib/gpulib.mak
+++ /dev/null
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void draw_spr16_full(u16 *d, void *s, u16 *pal, int lines);
-
-#ifdef __cplusplus
-}
-#endif
+++ /dev/null
-/*
- * (C) Gražvydas "notaz" Ignotas, 2011
- *
- * This work is licensed under the terms of GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-
-.text
-.align 2
-
-@ in: r0=dst, r2=pal, r12=0x1e
-@ trashes r6-r8,lr,flags
-.macro do_4_pixels rs ibase obase
-.if \ibase - 1 < 0
- and r6, r12, \rs, lsl #1
-.else
- and r6, r12, \rs, lsr #\ibase-1
-.endif
- and r7, r12, \rs, lsr #\ibase+3
- and r8, r12, \rs, lsr #\ibase+7
- and lr, r12, \rs, lsr #\ibase+11
- ldrh r6, [r2, r6]
- ldrh r7, [r2, r7]
- ldrh r8, [r2, r8]
- ldrh lr, [r2, lr]
- tst r6, r6
- strneh r6, [r0, #\obase+0]
- tst r7, r7
- strneh r7, [r0, #\obase+2]
- tst r8, r8
- strneh r8, [r0, #\obase+4]
- tst lr, lr
- strneh lr, [r0, #\obase+6]
-.endm
-
-.global draw_spr16_full @ (u16 *d, void *s, u16 *pal, int lines)
-draw_spr16_full:
- stmfd sp!, {r4-r8,lr}
- mov r12, #0x1e @ empty pixel
-
-0:
- ldmia r1, {r4,r5}
- do_4_pixels r4, 0, 0
- do_4_pixels r4, 16, 8
- do_4_pixels r5, 0, 16
- do_4_pixels r5, 16, 24
- subs r3, r3, #1
- add r0, r0, #2048
- add r1, r1, #2048
- bgt 0b
-
- ldmfd sp!, {r4-r8,pc}
-
-@ vim:filetype=armasm
include ../../config.mak
-OBJS += gpu.o
+OBJS += gpu.o prim.o
-ifeq "$(ARCH)" "arm"
-OBJS += vout_pl.o
-EXT = a
-else
#CFLAGS += `sdl-config --cflags`
#OBJS += vout_sdl.o
OBJS += vout_pl.o
-EXT = $(ARCH).a
-endif
CFLAGS += $(PLUGIN_CFLAGS)
# need to compile to another dir, same files are compiled
# into main binary without PIC
OBJS2 = $(patsubst %.o,obj/%.o,$(OBJS))
-TARGET = gpulib.$(EXT)
+TARGET = gpulib.$(ARCH).a
all: ../../config.mak obj $(TARGET)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <stdlib.h> /* for calloc */
+
#include "gpu.h"
#include "gpu_timing.h"
#include "../../libpcsxcore/gpu.h" // meh
#include "../../frontend/plugin_lib.h"
+#include "../../include/compiler_features.h"
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#endif
-#ifdef __GNUC__
-#define unlikely(x) __builtin_expect((x), 0)
-#define preload __builtin_prefetch
-#define noinline __attribute__((noinline))
-#else
-#define unlikely(x)
-#define preload(...)
-#define noinline
-#endif
//#define log_io gpu_log
#define log_io(...)
struct psx_gpu gpu;
-static noinline int do_cmd_buffer(uint32_t *data, int count,
+static noinline int do_cmd_buffer(struct psx_gpu *gpu, uint32_t *data, int count,
int *cycles_sum, int *cycles_last);
-static void finish_vram_transfer(int is_read);
+static noinline void finish_vram_transfer(struct psx_gpu *gpu, int is_read);
-static noinline void do_cmd_reset(void)
+static noinline void do_cmd_reset(struct psx_gpu *gpu)
{
int dummy = 0;
- if (unlikely(gpu.cmd_len > 0))
- do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
- gpu.cmd_len = 0;
-
- if (unlikely(gpu.dma.h > 0))
- finish_vram_transfer(gpu.dma_start.is_read);
- gpu.dma.h = 0;
+ renderer_sync();
+ if (unlikely(gpu->cmd_len > 0))
+ do_cmd_buffer(gpu, gpu->cmd_buffer, gpu->cmd_len, &dummy, &dummy);
+ gpu->cmd_len = 0;
+
+ if (unlikely(gpu->dma.h > 0))
+ finish_vram_transfer(gpu, gpu->dma_start.is_read);
+ gpu->dma.h = 0;
}
-static noinline void do_reset(void)
+static noinline void do_reset(struct psx_gpu *gpu)
{
unsigned int i;
- do_cmd_reset();
-
- memset(gpu.regs, 0, sizeof(gpu.regs));
- for (i = 0; i < sizeof(gpu.ex_regs) / sizeof(gpu.ex_regs[0]); i++)
- gpu.ex_regs[i] = (0xe0 + i) << 24;
- gpu.status = 0x14802000;
- gpu.gp0 = 0;
- gpu.regs[3] = 1;
- gpu.screen.hres = gpu.screen.w = 256;
- gpu.screen.vres = gpu.screen.h = 240;
- gpu.screen.x = gpu.screen.y = 0;
- renderer_sync_ecmds(gpu.ex_regs);
+ do_cmd_reset(gpu);
+
+ memset(gpu->regs, 0, sizeof(gpu->regs));
+ for (i = 0; i < sizeof(gpu->ex_regs) / sizeof(gpu->ex_regs[0]); i++)
+ gpu->ex_regs[i] = (0xe0 + i) << 24;
+ gpu->status = 0x14802000;
+ gpu->gp0 = 0;
+ gpu->regs[3] = 1;
+ gpu->screen.hres = gpu->screen.w = 256;
+ gpu->screen.vres = gpu->screen.h = 240;
+ gpu->screen.x = gpu->screen.y = 0;
+ renderer_sync_ecmds(gpu->ex_regs);
renderer_notify_res_change();
}
-static noinline void update_width(void)
+static noinline void update_width(struct psx_gpu *gpu)
{
static const short hres_all[8] = { 256, 368, 320, 368, 512, 368, 640, 368 };
static const uint8_t hdivs[8] = { 10, 7, 8, 7, 5, 7, 4, 7 };
- uint8_t hdiv = hdivs[(gpu.status >> 16) & 7];
- int hres = hres_all[(gpu.status >> 16) & 7];
- int pal = gpu.status & PSX_GPU_STATUS_PAL;
- int sw = gpu.screen.x2 - gpu.screen.x1;
- int type = gpu.state.screen_centering_type;
+ uint8_t hdiv = hdivs[(gpu->status >> 16) & 7];
+ int hres = hres_all[(gpu->status >> 16) & 7];
+ int pal = gpu->status & PSX_GPU_STATUS_PAL;
+ int sw = gpu->screen.x2 - gpu->screen.x1;
+ int type = gpu->state.screen_centering_type;
int x = 0, x_auto;
if (type == C_AUTO)
- type = gpu.state.screen_centering_type_default;
+ type = gpu->state.screen_centering_type_default;
if (sw <= 0)
/* nothing displayed? */;
else {
int s = pal ? 656 : 608; // or 600? pal is just a guess
- x = (gpu.screen.x1 - s) / hdiv;
+ x = (gpu->screen.x1 - s) / hdiv;
x = (x + 1) & ~1; // blitter limitation
sw /= hdiv;
sw = (sw + 2) & ~3; // according to nocash
+
+ if (gpu->state.show_overscan == 2) // widescreen hack
+ sw = (sw + 63) & ~63;
+ if (gpu->state.show_overscan && sw >= hres)
+ x = 0, hres = sw;
switch (type) {
case C_INGAME:
break;
case C_MANUAL:
- x = gpu.state.screen_centering_x;
+ x = gpu->state.screen_centering_x;
break;
default:
// correct if slightly miscentered
// .x range check is done in vout_update()
}
// reduce the unpleasant right border that a few games have
- if (gpu.state.screen_centering_type == 0
+ if (gpu->state.screen_centering_type == 0
&& x <= 4 && hres - (x + sw) >= 4)
hres -= 4;
- gpu.screen.x = x;
- gpu.screen.w = sw;
- gpu.screen.hres = hres;
- gpu.state.dims_changed = 1;
- //printf("xx %d %d -> %2d, %d / %d\n",
- // gpu.screen.x1, gpu.screen.x2, x, sw, hres);
+ gpu->screen.x = x;
+ gpu->screen.w = sw;
+ gpu->screen.hres = hres;
+ gpu->state.dims_changed = 1;
+ //printf("xx %d %d (%d) -> %2d, %d / %d\n", gpu->screen.x1,
+ // gpu->screen.x2, gpu->screen.x2 - gpu->screen.x1, x, sw, hres);
}
-static noinline void update_height(void)
+static noinline void update_height(struct psx_gpu *gpu)
{
- int pal = gpu.status & PSX_GPU_STATUS_PAL;
- int dheight = gpu.status & PSX_GPU_STATUS_DHEIGHT;
- int y = gpu.screen.y1 - (pal ? 39 : 16); // 39 for spyro
- int sh = gpu.screen.y2 - gpu.screen.y1;
+ int pal = gpu->status & PSX_GPU_STATUS_PAL;
+ int dheight = gpu->status & PSX_GPU_STATUS_DHEIGHT;
+ int y = gpu->screen.y1 - (pal ? 39 : 16); // 39 for spyro
+ int sh = gpu->screen.y2 - gpu->screen.y1;
int center_tol = 16;
int vres = 240;
- if (pal && (sh > 240 || gpu.screen.vres == 256))
+ if (pal && (sh > 240 || gpu->screen.vres == 256))
vres = 256;
if (dheight)
y *= 2, sh *= 2, vres *= 2, center_tol *= 2;
if (sh <= 0)
/* nothing displayed? */;
else {
- switch (gpu.state.screen_centering_type) {
+ switch (gpu->state.screen_centering_type) {
case C_INGAME:
break;
case C_BORDERLESS:
y = 0;
break;
case C_MANUAL:
- y = gpu.state.screen_centering_y;
+ y = gpu->state.screen_centering_y;
+ vres += gpu->state.screen_centering_h_adj;
break;
default:
// correct if slightly miscentered
if (y + sh > vres)
sh = vres - y;
}
- gpu.screen.y = y;
- gpu.screen.h = sh;
- gpu.screen.vres = vres;
- gpu.state.dims_changed = 1;
+ gpu->screen.y = y;
+ gpu->screen.h = sh;
+ gpu->screen.vres = vres;
+ gpu->state.dims_changed = 1;
//printf("yy %d %d -> %d, %d / %d\n",
- // gpu.screen.y1, gpu.screen.y2, y, sh, vres);
+ // gpu->screen.y1, gpu->screen.y2, y, sh, vres);
}
-static noinline void decide_frameskip(void)
+static noinline void decide_frameskip(struct psx_gpu *gpu)
{
- if (gpu.frameskip.active)
- gpu.frameskip.cnt++;
+ *gpu->frameskip.dirty = 1;
+
+ if (gpu->frameskip.active)
+ gpu->frameskip.cnt++;
else {
- gpu.frameskip.cnt = 0;
- gpu.frameskip.frame_ready = 1;
+ gpu->frameskip.cnt = 0;
+ gpu->frameskip.frame_ready = 1;
}
- if (!gpu.frameskip.active && *gpu.frameskip.advice)
- gpu.frameskip.active = 1;
- else if (gpu.frameskip.set > 0 && gpu.frameskip.cnt < gpu.frameskip.set)
- gpu.frameskip.active = 1;
+ if (*gpu->frameskip.force)
+ gpu->frameskip.active = 1;
+ else if (!gpu->frameskip.active && *gpu->frameskip.advice)
+ gpu->frameskip.active = 1;
+ else if (gpu->frameskip.set > 0 && gpu->frameskip.cnt < gpu->frameskip.set)
+ gpu->frameskip.active = 1;
else
- gpu.frameskip.active = 0;
+ gpu->frameskip.active = 0;
- if (!gpu.frameskip.active && gpu.frameskip.pending_fill[0] != 0) {
+ if (!gpu->frameskip.active && gpu->frameskip.pending_fill[0] != 0) {
int dummy = 0;
- do_cmd_list(gpu.frameskip.pending_fill, 3, &dummy, &dummy, &dummy);
- gpu.frameskip.pending_fill[0] = 0;
+ do_cmd_list(gpu->frameskip.pending_fill, 3, &dummy, &dummy, &dummy);
+ gpu->frameskip.pending_fill[0] = 0;
}
}
-static noinline int decide_frameskip_allow(uint32_t cmd_e3)
+static noinline int decide_frameskip_allow(struct psx_gpu *gpu)
{
// no frameskip if it decides to draw to display area,
// but not for interlace since it'll most likely always do that
+ uint32_t cmd_e3 = gpu->ex_regs[3];
uint32_t x = cmd_e3 & 0x3ff;
uint32_t y = (cmd_e3 >> 10) & 0x3ff;
- gpu.frameskip.allow = (gpu.status & PSX_GPU_STATUS_INTERLACE) ||
- (uint32_t)(x - gpu.screen.src_x) >= (uint32_t)gpu.screen.w ||
- (uint32_t)(y - gpu.screen.src_y) >= (uint32_t)gpu.screen.h;
- return gpu.frameskip.allow;
+ gpu->frameskip.allow = (gpu->status & PSX_GPU_STATUS_INTERLACE) ||
+ (uint32_t)(x - gpu->screen.src_x) >= (uint32_t)gpu->screen.w ||
+ (uint32_t)(y - gpu->screen.src_y) >= (uint32_t)gpu->screen.h;
+ return gpu->frameskip.allow;
}
-static void flush_cmd_buffer(void);
+static void flush_cmd_buffer(struct psx_gpu *gpu);
-static noinline void get_gpu_info(uint32_t data)
+static noinline void get_gpu_info(struct psx_gpu *gpu, uint32_t data)
{
- if (unlikely(gpu.cmd_len > 0))
- flush_cmd_buffer();
+ if (unlikely(gpu->cmd_len > 0))
+ flush_cmd_buffer(gpu);
switch (data & 0x0f) {
case 0x02:
case 0x03:
case 0x04:
- gpu.gp0 = gpu.ex_regs[data & 7] & 0xfffff;
+ gpu->gp0 = gpu->ex_regs[data & 7] & 0xfffff;
break;
case 0x05:
- gpu.gp0 = gpu.ex_regs[5] & 0x3fffff;
+ gpu->gp0 = gpu->ex_regs[5] & 0x3fffff;
break;
case 0x07:
- gpu.gp0 = 2;
+ gpu->gp0 = 2;
break;
default:
- // gpu.gp0 unchanged
+ // gpu->gp0 unchanged
break;
}
}
-// double, for overdraw guard
-#define VRAM_SIZE (1024 * 512 * 2 * 2)
+#ifndef max
+#define max(a, b) (((a) > (b)) ? (a) : (b))
+#endif
+
+// Minimum 16-byte VRAM alignment needed by gpu_unai's pixel-skipping
+// renderer/downscaler it uses in high res modes:
+#ifdef GCW_ZERO
+ // On GCW platform (MIPS), align to 8192 bytes (1 TLB entry) to reduce # of
+ // fills. (Will change this value if it ever gets large page support)
+ #define VRAM_ALIGN 8192
+#else
+ #define VRAM_ALIGN 64
+#endif
+
+// double, for overdraw/overscan guard + at least 1 page before
+#define VRAM_SIZE ((1024 * 512 * 2 * 2) + max(VRAM_ALIGN, 4096))
+
+// vram ptr received from mmap/malloc/alloc (will deallocate using this)
+static uint16_t *vram_ptr_orig = NULL;
+#ifndef GPULIB_USE_MMAP
+# if defined(__linux__) || defined(_3DS) || defined(HAVE_LIBNX) || defined(VITA)
+# define GPULIB_USE_MMAP 1
+# else
+# define GPULIB_USE_MMAP 0
+# endif
+#endif
static int map_vram(void)
{
- gpu.vram = gpu.mmap(VRAM_SIZE);
- if (gpu.vram != NULL) {
- gpu.vram += 4096 / 2;
+#if GPULIB_USE_MMAP
+ gpu.vram = vram_ptr_orig = gpu.mmap(VRAM_SIZE);
+#else
+ gpu.vram = vram_ptr_orig = calloc(VRAM_SIZE, 1);
+#endif
+ if (gpu.vram != NULL && gpu.vram != (void *)(intptr_t)-1) {
+ // 4kb guard in front
+ gpu.vram += (4096 / 2);
+ // Align
+ gpu.vram = (uint16_t*)(((uintptr_t)gpu.vram + (VRAM_ALIGN-1)) & ~(VRAM_ALIGN-1));
return 0;
}
else {
fprintf(stderr, "could not map vram, expect crashes\n");
+ gpu.vram = NULL;
return -1;
}
}
gpu.state.frame_count = &gpu.zero;
gpu.state.hcnt = &gpu.zero;
gpu.cmd_len = 0;
- do_reset();
+ do_reset(&gpu);
- if (gpu.mmap != NULL) {
- if (map_vram() != 0)
- ret = -1;
- }
return ret;
}
renderer_finish();
ret = vout_finish();
- if (gpu.vram != NULL) {
- gpu.vram -= 4096 / 2;
- gpu.munmap(gpu.vram, VRAM_SIZE);
+
+ if (vram_ptr_orig != NULL) {
+#if GPULIB_USE_MMAP
+ gpu.munmap(vram_ptr_orig, VRAM_SIZE);
+#else
+ free(vram_ptr_orig);
+#endif
}
- gpu.vram = NULL;
+ vram_ptr_orig = gpu.vram = NULL;
return ret;
}
void GPUwriteStatus(uint32_t data)
{
uint32_t cmd = data >> 24;
+ uint32_t fb_dirty = 1;
int src_x, src_y;
if (cmd < ARRAY_SIZE(gpu.regs)) {
gpu.regs[cmd] = data;
}
- gpu.state.fb_dirty = 1;
-
switch (cmd) {
case 0x00:
- do_reset();
+ do_reset(&gpu);
break;
case 0x01:
- do_cmd_reset();
+ do_cmd_reset(&gpu);
+ fb_dirty = 0;
break;
case 0x03:
if (data & 1) {
case 0x04:
gpu.status &= ~PSX_GPU_STATUS_DMA_MASK;
gpu.status |= PSX_GPU_STATUS_DMA(data & 3);
+ fb_dirty = 0;
break;
case 0x05:
src_x = data & 0x3ff; src_y = (data >> 10) & 0x1ff;
gpu.screen.src_y = src_y;
renderer_notify_scanout_change(src_x, src_y);
if (gpu.frameskip.set) {
- decide_frameskip_allow(gpu.ex_regs[3]);
+ decide_frameskip_allow(&gpu);
if (gpu.frameskip.last_flip_frame != *gpu.state.frame_count) {
- decide_frameskip();
+ decide_frameskip(&gpu);
gpu.frameskip.last_flip_frame = *gpu.state.frame_count;
}
}
case 0x06:
gpu.screen.x1 = data & 0xfff;
gpu.screen.x2 = (data >> 12) & 0xfff;
- update_width();
+ update_width(&gpu);
break;
case 0x07:
gpu.screen.y1 = data & 0x3ff;
gpu.screen.y2 = (data >> 10) & 0x3ff;
- update_height();
+ update_height(&gpu);
break;
case 0x08:
gpu.status = (gpu.status & ~0x7f0000) | ((data & 0x3F) << 17) | ((data & 0x40) << 10);
- update_width();
- update_height();
+ update_width(&gpu);
+ update_height(&gpu);
renderer_notify_res_change();
break;
default:
if ((cmd & 0xf0) == 0x10)
- get_gpu_info(data);
+ get_gpu_info(&gpu, data);
+ fb_dirty = 0;
break;
}
+ gpu.state.fb_dirty |= fb_dirty;
+
#ifdef GPUwriteStatus_ext
GPUwriteStatus_ext(data);
#endif
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
-#define VRAM_MEM_XY(x, y) &gpu.vram[(y) * 1024 + (x)]
+#define VRAM_MEM_XY(vram_, x, y) &vram_[(y) * 1024 + (x)]
-static void cpy_msb(uint16_t *dst, const uint16_t *src, int l, uint16_t msb)
+// this isn't very useful so should be rare
+static void cpy_mask(uint16_t *dst, const uint16_t *src, int l, uint32_t r6)
{
int i;
- for (i = 0; i < l; i++)
- dst[i] = src[i] | msb;
+ if (r6 == 1) {
+ for (i = 0; i < l; i++)
+ dst[i] = src[i] | 0x8000;
+ }
+ else {
+ uint16_t msb = r6 << 15;
+ for (i = 0; i < l; i++) {
+ uint16_t mask = (int16_t)dst[i] >> 15;
+ dst[i] = (dst[i] & mask) | ((src[i] | msb) & ~mask);
+ }
+ }
}
-static inline void do_vram_line(int x, int y, uint16_t *mem, int l,
- int is_read, uint16_t msb)
+static inline void do_vram_line(uint16_t *vram_, int x, int y,
+ uint16_t *mem, int l, int is_read, uint32_t r6)
{
- uint16_t *vram = VRAM_MEM_XY(x, y);
+ uint16_t *vram = VRAM_MEM_XY(vram_, x, y);
if (unlikely(is_read))
memcpy(mem, vram, l * 2);
- else if (unlikely(msb))
- cpy_msb(vram, mem, l, msb);
+ else if (unlikely(r6))
+ cpy_mask(vram, mem, l, r6);
else
memcpy(vram, mem, l * 2);
}
-static int do_vram_io(uint32_t *data, int count, int is_read)
+static int do_vram_io(struct psx_gpu *gpu, uint32_t *data, int count, int is_read)
{
int count_initial = count;
- uint16_t msb = gpu.ex_regs[6] << 15;
+ uint32_t r6 = gpu->ex_regs[6] & 3;
uint16_t *sdata = (uint16_t *)data;
- int x = gpu.dma.x, y = gpu.dma.y;
- int w = gpu.dma.w, h = gpu.dma.h;
- int o = gpu.dma.offset;
+ uint16_t *vram = gpu->vram;
+ int x = gpu->dma.x, y = gpu->dma.y;
+ int w = gpu->dma.w, h = gpu->dma.h;
+ int o = gpu->dma.offset;
int l;
count *= 2; // operate in 16bpp pixels
- if (gpu.dma.offset) {
- l = w - gpu.dma.offset;
+ renderer_sync();
+
+ if (gpu->dma.offset) {
+ l = w - gpu->dma.offset;
if (count < l)
l = count;
- do_vram_line(x + o, y, sdata, l, is_read, msb);
+ do_vram_line(vram, x + o, y, sdata, l, is_read, r6);
if (o + l < w)
o += l;
for (; h > 0 && count >= w; sdata += w, count -= w, y++, h--) {
y &= 511;
- do_vram_line(x, y, sdata, w, is_read, msb);
+ do_vram_line(vram, x, y, sdata, w, is_read, r6);
}
if (h > 0) {
if (count > 0) {
y &= 511;
- do_vram_line(x, y, sdata, count, is_read, msb);
+ do_vram_line(vram, x, y, sdata, count, is_read, r6);
o = count;
count = 0;
}
}
else
- finish_vram_transfer(is_read);
- gpu.dma.y = y;
- gpu.dma.h = h;
- gpu.dma.offset = o;
+ finish_vram_transfer(gpu, is_read);
+ gpu->dma.y = y;
+ gpu->dma.h = h;
+ gpu->dma.offset = o;
return count_initial - count / 2;
}
-static void start_vram_transfer(uint32_t pos_word, uint32_t size_word, int is_read)
+static noinline void start_vram_transfer(struct psx_gpu *gpu, uint32_t pos_word,
+ uint32_t size_word, int is_read)
{
- if (gpu.dma.h)
- log_anomaly("start_vram_transfer while old unfinished\n");
+ if (gpu->dma.h)
+ log_anomaly(gpu, "start_vram_transfer while old unfinished\n");
- gpu.dma.x = pos_word & 0x3ff;
- gpu.dma.y = (pos_word >> 16) & 0x1ff;
- gpu.dma.w = ((size_word - 1) & 0x3ff) + 1;
- gpu.dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
- gpu.dma.offset = 0;
- gpu.dma.is_read = is_read;
- gpu.dma_start = gpu.dma;
+ gpu->dma.x = pos_word & 0x3ff;
+ gpu->dma.y = (pos_word >> 16) & 0x1ff;
+ gpu->dma.w = ((size_word - 1) & 0x3ff) + 1;
+ gpu->dma.h = (((size_word >> 16) - 1) & 0x1ff) + 1;
+ gpu->dma.offset = 0;
+ gpu->dma.is_read = is_read;
+ gpu->dma_start = gpu->dma;
renderer_flush_queues();
if (is_read) {
- gpu.status |= PSX_GPU_STATUS_IMG;
+ const uint16_t *mem = VRAM_MEM_XY(gpu->vram, gpu->dma.x, gpu->dma.y);
+ gpu->status |= PSX_GPU_STATUS_IMG;
// XXX: wrong for width 1
- gpu.gp0 = LE32TOH(*(uint32_t *) VRAM_MEM_XY(gpu.dma.x, gpu.dma.y));
- gpu.state.last_vram_read_frame = *gpu.state.frame_count;
+ gpu->gp0 = LE16TOH(mem[0]) | ((uint32_t)LE16TOH(mem[1]) << 16);
+ gpu->state.last_vram_read_frame = *gpu->state.frame_count;
}
- log_io("start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
- gpu.dma.x, gpu.dma.y, gpu.dma.w, gpu.dma.h);
- if (gpu.gpu_state_change)
- gpu.gpu_state_change(PGS_VRAM_TRANSFER_START);
+ log_io(gpu, "start_vram_transfer %c (%d, %d) %dx%d\n", is_read ? 'r' : 'w',
+ gpu->dma.x, gpu->dma.y, gpu->dma.w, gpu->dma.h);
+ if (gpu->gpu_state_change)
+ gpu->gpu_state_change(PGS_VRAM_TRANSFER_START, 0);
}
-static void finish_vram_transfer(int is_read)
+static void finish_vram_transfer(struct psx_gpu *gpu, int is_read)
{
if (is_read)
- gpu.status &= ~PSX_GPU_STATUS_IMG;
+ gpu->status &= ~PSX_GPU_STATUS_IMG;
else {
- gpu.state.fb_dirty = 1;
- renderer_update_caches(gpu.dma_start.x, gpu.dma_start.y,
- gpu.dma_start.w, gpu.dma_start.h, 0);
+ int32_t screen_r = gpu->screen.src_x + gpu->screen.hres;
+ int32_t screen_b = gpu->screen.src_y + gpu->screen.vres;
+ int32_t dma_r = gpu->dma_start.x + gpu->dma_start.w;
+ int32_t dma_b = gpu->dma_start.y + gpu->dma_start.h;
+ int32_t not_dirty;
+ not_dirty = screen_r - gpu->dma_start.x - 1;
+ not_dirty |= screen_b - gpu->dma_start.y - 1;
+ not_dirty |= dma_r - gpu->screen.src_x - 1;
+ not_dirty |= dma_b - gpu->screen.src_y - 1;
+ not_dirty >>= 31;
+ log_io(gpu, "dma %3d,%3d %dx%d scr %3d,%3d %3dx%3d -> dirty %d\n",
+ gpu->dma_start.x, gpu->dma_start.y, gpu->dma_start.w, gpu->dma_start.h,
+ gpu->screen.src_x, gpu->screen.src_y, gpu->screen.hres, gpu->screen.vres, !not_dirty);
+ gpu->state.fb_dirty |= !not_dirty;
+ renderer_update_caches(gpu->dma_start.x, gpu->dma_start.y,
+ gpu->dma_start.w, gpu->dma_start.h, 0);
}
- if (gpu.gpu_state_change)
- gpu.gpu_state_change(PGS_VRAM_TRANSFER_END);
+ if (gpu->gpu_state_change)
+ gpu->gpu_state_change(PGS_VRAM_TRANSFER_END, 0);
}
-static void do_vram_copy(const uint32_t *params, int *cpu_cycles)
+static void do_vram_copy(struct psx_gpu *gpu, const uint32_t *params, int *cpu_cycles)
{
const uint32_t sx = LE32TOH(params[0]) & 0x3FF;
const uint32_t sy = (LE32TOH(params[0]) >> 16) & 0x1FF;
const uint32_t dy = (LE32TOH(params[1]) >> 16) & 0x1FF;
uint32_t w = ((LE32TOH(params[2]) - 1) & 0x3FF) + 1;
uint32_t h = (((LE32TOH(params[2]) >> 16) - 1) & 0x1FF) + 1;
- uint16_t msb = gpu.ex_regs[6] << 15;
+ uint16_t msb = gpu->ex_regs[6] << 15;
+ uint16_t *vram = gpu->vram;
uint16_t lbuf[128];
uint32_t x, y;
{
for (y = 0; y < h; y++)
{
- const uint16_t *src = VRAM_MEM_XY(0, (sy + y) & 0x1ff);
- uint16_t *dst = VRAM_MEM_XY(0, (dy + y) & 0x1ff);
+ const uint16_t *src = VRAM_MEM_XY(vram, 0, (sy + y) & 0x1ff);
+ uint16_t *dst = VRAM_MEM_XY(vram, 0, (dy + y) & 0x1ff);
for (x = 0; x < w; x += ARRAY_SIZE(lbuf))
{
uint32_t x1, w1 = w - x;
else
{
uint32_t sy1 = sy, dy1 = dy;
- for (y = 0; y < h; y++, sy1++, dy1++)
- memcpy(VRAM_MEM_XY(dx, dy1 & 0x1ff), VRAM_MEM_XY(sx, sy1 & 0x1ff), w * 2);
+ for (y = 0; y < h; y++, sy1++, dy1++) {
+ memcpy(VRAM_MEM_XY(vram, dx, dy1 & 0x1ff),
+ VRAM_MEM_XY(vram, sx, sy1 & 0x1ff), w * 2);
+ }
}
renderer_update_caches(dx, dy, w, h, 0);
}
-static noinline int do_cmd_list_skip(uint32_t *data, int count, int *last_cmd)
+static noinline int do_cmd_list_skip(struct psx_gpu *gpu, uint32_t *data,
+ int count, int *last_cmd)
{
int cmd = 0, pos = 0, len, dummy = 0, v;
int skip = 1;
- gpu.frameskip.pending_fill[0] = 0;
+ gpu->frameskip.pending_fill[0] = 0;
while (pos < count && skip) {
uint32_t *list = data + pos;
switch (cmd) {
case 0x02:
- if ((LE32TOH(list[2]) & 0x3ff) > gpu.screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu.screen.h)
+ if ((LE32TOH(list[2]) & 0x3ff) > gpu->screen.w || ((LE32TOH(list[2]) >> 16) & 0x1ff) > gpu->screen.h)
// clearing something large, don't skip
do_cmd_list(list, 3, &dummy, &dummy, &dummy);
else
- memcpy(gpu.frameskip.pending_fill, list, 3 * 4);
+ memcpy(gpu->frameskip.pending_fill, list, 3 * 4);
break;
case 0x24 ... 0x27:
case 0x2c ... 0x2f:
case 0x34 ... 0x37:
case 0x3c ... 0x3f:
- gpu.ex_regs[1] &= ~0x1ff;
- gpu.ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
+ gpu->ex_regs[1] &= ~0x1ff;
+ gpu->ex_regs[1] |= LE32TOH(list[4 + ((cmd >> 4) & 1)]) & 0x1ff;
break;
case 0x48 ... 0x4F:
for (v = 3; pos + v < count; v++)
len += v - 4;
break;
default:
- if (cmd == 0xe3)
- skip = decide_frameskip_allow(LE32TOH(list[0]));
- if ((cmd & 0xf8) == 0xe0)
- gpu.ex_regs[cmd & 7] = LE32TOH(list[0]);
+ if ((cmd & 0xf8) == 0xe0) {
+ gpu->ex_regs[cmd & 7] = LE32TOH(list[0]);
+ if (cmd == 0xe3)
+ skip = decide_frameskip_allow(gpu);
+ }
break;
}
if (0x80 <= cmd && cmd <= 0xdf)
pos += len;
}
- renderer_sync_ecmds(gpu.ex_regs);
+ renderer_sync_ecmds(gpu->ex_regs);
*last_cmd = cmd;
return pos;
}
-static noinline int do_cmd_buffer(uint32_t *data, int count,
+static noinline int do_cmd_buffer(struct psx_gpu *gpu, uint32_t *data, int count,
int *cycles_sum, int *cycles_last)
{
int cmd, pos;
- uint32_t old_e3 = gpu.ex_regs[3];
+ uint32_t old_e3 = gpu->ex_regs[3];
int vram_dirty = 0;
// process buffer
for (pos = 0; pos < count; )
{
- if (gpu.dma.h && !gpu.dma_start.is_read) { // XXX: need to verify
- vram_dirty = 1;
- pos += do_vram_io(data + pos, count - pos, 0);
+ if (gpu->dma.h && !gpu->dma_start.is_read) { // XXX: need to verify
+ // vram_dirty = 1; // handled in finish_vram_transfer()
+ pos += do_vram_io(gpu, data + pos, count - pos, 0);
if (pos == count)
break;
}
cmd = LE32TOH(data[pos]) >> 24;
- if (0xa0 <= cmd && cmd <= 0xdf) {
+ switch (cmd & 0xe0) {
+ case 0xe0:
+ if (cmd < 0xe8) {
+ if (gpu->ex_regs[cmd & 7] == LE32TOH(data[pos])) {
+ pos++;
+ continue;
+ }
+ }
+ break;
+ case 0xc0:
+ case 0xa0:
if (unlikely((pos+2) >= count)) {
// incomplete vram write/read cmd, can't consume yet
cmd = -1;
}
// consume vram write/read cmd
- start_vram_transfer(LE32TOH(data[pos + 1]), LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
+ start_vram_transfer(gpu, LE32TOH(data[pos + 1]),
+ LE32TOH(data[pos + 2]), (cmd & 0xe0) == 0xc0);
pos += 3;
continue;
- }
- else if ((cmd & 0xe0) == 0x80) {
+ case 0x80:
if (unlikely((pos+3) >= count)) {
cmd = -1; // incomplete cmd, can't consume yet
break;
}
+ renderer_sync();
*cycles_sum += *cycles_last;
*cycles_last = 0;
- do_vram_copy(data + pos + 1, cycles_last);
+ do_vram_copy(gpu, data + pos + 1, cycles_last);
vram_dirty = 1;
pos += 4;
continue;
- }
- else if (cmd == 0x1f) {
- log_anomaly("irq1?\n");
+ case 0x00:
+ if (cmd == 2)
+ break;
+ if (cmd == 0x1f)
+ log_anomaly(gpu, "irq1?\n");
pos++;
continue;
}
// 0xex cmds might affect frameskip.allow, so pass to do_cmd_list_skip
- if (gpu.frameskip.active && (gpu.frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0))
- pos += do_cmd_list_skip(data + pos, count - pos, &cmd);
+ if (gpu->frameskip.active &&
+ (gpu->frameskip.allow || ((LE32TOH(data[pos]) >> 24) & 0xf0) == 0xe0)) {
+ pos += do_cmd_list_skip(gpu, data + pos, count - pos, &cmd);
+ }
else {
pos += do_cmd_list(data + pos, count - pos, cycles_sum, cycles_last, &cmd);
vram_dirty = 1;
break;
}
- gpu.status &= ~0x1fff;
- gpu.status |= gpu.ex_regs[1] & 0x7ff;
- gpu.status |= (gpu.ex_regs[6] & 3) << 11;
+ gpu->status &= ~0x1fff;
+ gpu->status |= gpu->ex_regs[1] & 0x7ff;
+ gpu->status |= (gpu->ex_regs[6] & 3) << 11;
- gpu.state.fb_dirty |= vram_dirty;
+ gpu->state.fb_dirty |= vram_dirty;
- if (old_e3 != gpu.ex_regs[3])
- decide_frameskip_allow(gpu.ex_regs[3]);
+ if (old_e3 != gpu->ex_regs[3])
+ decide_frameskip_allow(gpu);
return count - pos;
}
-static noinline void flush_cmd_buffer(void)
+static noinline void flush_cmd_buffer(struct psx_gpu *gpu)
{
+ int cycles_last = 0;
int dummy = 0, left;
- left = do_cmd_buffer(gpu.cmd_buffer, gpu.cmd_len, &dummy, &dummy);
+ left = do_cmd_buffer(gpu, gpu->cmd_buffer, gpu->cmd_len, &dummy, &cycles_last);
if (left > 0)
- memmove(gpu.cmd_buffer, gpu.cmd_buffer + gpu.cmd_len - left, left * 4);
- if (left != gpu.cmd_len) {
- if (!gpu.dma.h && gpu.gpu_state_change)
- gpu.gpu_state_change(PGS_PRIMITIVE_START);
- gpu.cmd_len = left;
+ memmove(gpu->cmd_buffer, gpu->cmd_buffer + gpu->cmd_len - left, left * 4);
+ if (left != gpu->cmd_len) {
+ gpu->cmd_len = left;
+ if (!gpu->dma.h && gpu->gpu_state_change)
+ gpu->gpu_state_change(PGS_PRIMITIVE_START, cycles_last);
}
}
{
int dummy = 0, left;
- log_io("gpu_dma_write %p %d\n", mem, count);
+ log_io(&gpu, "gpu_dma_write %p %d\n", mem, count);
if (unlikely(gpu.cmd_len > 0))
- flush_cmd_buffer();
+ flush_cmd_buffer(&gpu);
- left = do_cmd_buffer(mem, count, &dummy, &dummy);
+ left = do_cmd_buffer(&gpu, mem, count, &dummy, &dummy);
if (left)
- log_anomaly("GPUwriteDataMem: discarded %d/%d words\n", left, count);
+ log_anomaly(&gpu, "GPUwriteDataMem: discarded %d/%d words\n", left, count);
}
void GPUwriteData(uint32_t data)
{
- log_io("gpu_write %08x\n", data);
+ log_io(&gpu, "gpu_write %08x\n", data);
gpu.cmd_buffer[gpu.cmd_len++] = HTOLE32(data);
if (gpu.cmd_len >= CMD_BUFFER_LEN)
- flush_cmd_buffer();
+ flush_cmd_buffer(&gpu);
}
long GPUdmaChain(uint32_t *rambase, uint32_t start_addr,
preload(rambase + (start_addr & 0x1fffff) / 4);
if (unlikely(gpu.cmd_len > 0))
- flush_cmd_buffer();
+ flush_cmd_buffer(&gpu);
- log_io("gpu_dma_chain\n");
+ log_io(&gpu, "gpu_dma_chain\n");
addr = ld_addr = start_addr & 0xffffff;
for (count = 0; (addr & 0x800000) == 0; count++)
{
if (len > 0)
cpu_cycles_sum += 5 + len;
- log_io(".chain %08lx #%d+%d %u+%u\n",
+ log_io(&gpu, ".chain %08lx #%d+%d %u+%u\n",
(long)(list - rambase) * 4, len, gpu.cmd_len, cpu_cycles_sum, cpu_cycles_last);
if (unlikely(gpu.cmd_len > 0)) {
if (gpu.cmd_len + len > ARRAY_SIZE(gpu.cmd_buffer)) {
- log_anomaly("cmd_buffer overflow, likely garbage commands\n");
+ log_anomaly(&gpu, "cmd_buffer overflow, likely garbage commands\n");
gpu.cmd_len = 0;
}
memcpy(gpu.cmd_buffer + gpu.cmd_len, list + 1, len * 4);
gpu.cmd_len += len;
- flush_cmd_buffer();
+ flush_cmd_buffer(&gpu);
continue;
}
if (len) {
- left = do_cmd_buffer(list + 1, len, &cpu_cycles_sum, &cpu_cycles_last);
+ left = do_cmd_buffer(&gpu, list + 1, len, &cpu_cycles_sum, &cpu_cycles_last);
if (left) {
memcpy(gpu.cmd_buffer, list + 1 + len - left, left * 4);
gpu.cmd_len = left;
- log_anomaly("GPUdmaChain: %d/%d words left\n", left, len);
+ log_anomaly(&gpu, "GPUdmaChain: %d/%d words left\n", left, len);
}
}
if (progress_addr) {
- *progress_addr = addr;
- break;
+ // hack for bios boot logo race (must be not too fast or too slow)
+ if (gpu.status & PSX_GPU_STATUS_DHEIGHT)
+ cpu_cycles_sum += 5;
+ if (cpu_cycles_sum > 512)
+ break;
}
if (addr == ld_addr) {
- log_anomaly("GPUdmaChain: loop @ %08x, cnt=%u\n", addr, count);
+ log_anomaly(&gpu, "GPUdmaChain: loop @ %08x, cnt=%u\n", addr, count);
break;
}
if (count == ld_count) {
gpu.state.last_list.cycles = cpu_cycles_sum + cpu_cycles_last;
gpu.state.last_list.addr = start_addr;
+ if (progress_addr)
+ *progress_addr = addr;
*cycles_last_cmd = cpu_cycles_last;
return cpu_cycles_sum;
}
void GPUreadDataMem(uint32_t *mem, int count)
{
- log_io("gpu_dma_read %p %d\n", mem, count);
+ log_io(&gpu, "gpu_dma_read %p %d\n", mem, count);
if (unlikely(gpu.cmd_len > 0))
- flush_cmd_buffer();
+ flush_cmd_buffer(&gpu);
if (gpu.dma.h)
- do_vram_io(mem, count, 1);
+ do_vram_io(&gpu, mem, count, 1);
}
uint32_t GPUreadData(void)
uint32_t ret;
if (unlikely(gpu.cmd_len > 0))
- flush_cmd_buffer();
+ flush_cmd_buffer(&gpu);
ret = gpu.gp0;
if (gpu.dma.h) {
ret = HTOLE32(ret);
- do_vram_io(&ret, 1, 1);
+ do_vram_io(&gpu, &ret, 1, 1);
ret = LE32TOH(ret);
}
- log_io("gpu_read %08x\n", ret);
+ log_io(&gpu, "gpu_read %08x\n", ret);
return ret;
}
uint32_t ret;
if (unlikely(gpu.cmd_len > 0))
- flush_cmd_buffer();
+ flush_cmd_buffer(&gpu);
ret = gpu.status;
- log_io("gpu_read_status %08x\n", ret);
+ log_io(&gpu, "gpu_read_status %08x\n", ret);
return ret;
}
-struct GPUFreeze
-{
- uint32_t ulFreezeVersion; // should be always 1 for now (set by main emu)
- uint32_t ulStatus; // current gpu status
- uint32_t ulControl[256]; // latest control register values
- unsigned char psxVRam[1024*1024*2]; // current VRam image (full 2 MB for ZN)
-};
-
-long GPUfreeze(uint32_t type, struct GPUFreeze *freeze)
+long GPUfreeze(uint32_t type, GPUFreeze_t *freeze)
{
int i;
switch (type) {
case 1: // save
if (gpu.cmd_len > 0)
- flush_cmd_buffer();
+ flush_cmd_buffer(&gpu);
+
+ renderer_sync();
memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2);
memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs));
memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs));
freeze->ulStatus = gpu.status;
break;
case 0: // load
+ renderer_sync();
memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2);
- memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
+ //memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs));
memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs));
gpu.status = freeze->ulStatus;
gpu.cmd_len = 0;
- for (i = 8; i > 0; i--) {
- gpu.regs[i] ^= 1; // avoid reg change detection
- GPUwriteStatus((i << 24) | (gpu.regs[i] ^ 1));
- }
+ for (i = 8; i > 1; i--)
+ GPUwriteStatus((i << 24) | freeze->ulControl[i]);
renderer_sync_ecmds(gpu.ex_regs);
renderer_update_caches(0, 0, 1024, 512, 0);
break;
void GPUupdateLace(void)
{
+ int updated = 0;
+
if (gpu.cmd_len > 0)
- flush_cmd_buffer();
+ flush_cmd_buffer(&gpu);
renderer_flush_queues();
#ifndef RAW_FB_DISPLAY
return;
}
+ renderer_notify_update_lace(0);
+
if (!gpu.state.fb_dirty)
return;
#endif
gpu.frameskip.frame_ready = 0;
}
- vout_update();
+ updated = vout_update();
if (gpu.state.enhancement_active && !gpu.state.enhancement_was_active)
renderer_update_caches(0, 0, 1024, 512, 1);
gpu.state.enhancement_was_active = gpu.state.enhancement_active;
- gpu.state.fb_dirty = 0;
- gpu.state.blanked = 0;
+ if (updated) {
+ gpu.state.fb_dirty = 0;
+ gpu.state.blanked = 0;
+ }
+ renderer_notify_update_lace(1);
}
void GPUvBlank(int is_vblank, int lcf)
gpu.state.old_interlace = interlace;
if (gpu.cmd_len > 0)
- flush_cmd_buffer();
+ flush_cmd_buffer(&gpu);
renderer_flush_queues();
renderer_set_interlace(interlace, !lcf);
}
{
gpu.frameskip.set = cbs->frameskip;
gpu.frameskip.advice = &cbs->fskip_advice;
+ gpu.frameskip.force = &cbs->fskip_force;
+ gpu.frameskip.dirty = (void *)&cbs->fskip_dirty;
gpu.frameskip.active = 0;
gpu.frameskip.frame_ready = 1;
- gpu.state.hcnt = cbs->gpu_hcnt;
- gpu.state.frame_count = cbs->gpu_frame_count;
+ gpu.state.hcnt = (uint32_t *)cbs->gpu_hcnt;
+ gpu.state.frame_count = (uint32_t *)cbs->gpu_frame_count;
gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
gpu.state.screen_centering_type_default = cbs->screen_centering_type_default;
if (gpu.state.screen_centering_type != cbs->screen_centering_type
|| gpu.state.screen_centering_x != cbs->screen_centering_x
- || gpu.state.screen_centering_y != cbs->screen_centering_y) {
+ || gpu.state.screen_centering_y != cbs->screen_centering_y
+ || gpu.state.screen_centering_h_adj != cbs->screen_centering_h_adj
+ || gpu.state.show_overscan != cbs->show_overscan) {
gpu.state.screen_centering_type = cbs->screen_centering_type;
gpu.state.screen_centering_x = cbs->screen_centering_x;
gpu.state.screen_centering_y = cbs->screen_centering_y;
- update_width();
- update_height();
+ gpu.state.screen_centering_h_adj = cbs->screen_centering_h_adj;
+ gpu.state.show_overscan = cbs->show_overscan;
+ update_width(&gpu);
+ update_height(&gpu);
}
gpu.mmap = cbs->mmap;
//#define RAW_FB_DISPLAY
-#define gpu_log(fmt, ...) \
- printf("%d:%03d: " fmt, *gpu.state.frame_count, *gpu.state.hcnt, ##__VA_ARGS__)
+#define gpu_log(gpu, fmt, ...) \
+ printf("%d:%03d: " fmt, *(gpu)->state.frame_count, *(gpu)->state.hcnt, ##__VA_ARGS__)
//#define log_anomaly gpu_log
#define log_anomaly(...)
#define LE16TOH(x) (x)
#endif
-#define BIT(x) (1 << (x))
+#undef BIT
+#define BIT(x) (1u << (x))
#define PSX_GPU_STATUS_DHEIGHT BIT(19)
#define PSX_GPU_STATUS_PAL BIT(20)
#define PSX_GPU_STATUS_DMA_MASK (BIT(29) | BIT(30))
struct psx_gpu {
- uint32_t cmd_buffer[CMD_BUFFER_LEN];
uint32_t regs[16];
uint16_t *vram;
uint32_t status;
uint32_t downscale_enable:1;
uint32_t downscale_active:1;
uint32_t dims_changed:1;
+ uint32_t show_overscan:2;
uint32_t *frame_count;
uint32_t *hcnt; /* hsync count */
struct {
uint32_t w_out_old, h_out_old, status_vo_old;
short screen_centering_type;
short screen_centering_type_default;
- int screen_centering_x;
- int screen_centering_y;
+ short screen_centering_x;
+ short screen_centering_y;
+ int screen_centering_h_adj;
} state;
struct {
int32_t set:3; /* -1 auto, 0 off, 1-3 fixed */
uint32_t allow:1;
uint32_t frame_ready:1;
const int *advice;
+ const int *force;
+ int *dirty;
uint32_t last_flip_frame;
uint32_t pending_fill[3];
} frameskip;
+ uint32_t scratch_ex_regs[8]; // for threaded rendering
+ uint32_t cmd_buffer[CMD_BUFFER_LEN];
void *(*get_enhancement_bufer)
(int *x, int *y, int *w, int *h, int *vram_h);
uint16_t *(*get_downscale_buffer)
(int *x, int *y, int *w, int *h, int *vram_h);
void *(*mmap)(unsigned int size);
void (*munmap)(void *ptr, unsigned int size);
- void (*gpu_state_change)(int what); // psx_gpu_state
+ void (*gpu_state_change)(int what, int cycles); // psx_gpu_state
};
extern struct psx_gpu gpu;
void renderer_set_interlace(int enable, int is_odd);
void renderer_set_config(const struct rearmed_cbs *config);
void renderer_notify_res_change(void);
+void renderer_notify_update_lace(int updated);
+void renderer_sync(void);
void renderer_notify_scanout_change(int x, int y);
int vout_init(void);
int vout_finish(void);
-void vout_update(void);
+int vout_update(void);
void vout_blank(void);
void vout_set_config(const struct rearmed_cbs *config);
+int prim_try_simplify_quad_t (void *simplified, const void *prim);
+int prim_try_simplify_quad_gt(void *simplified, const void *prim);
+
/* listing these here for correct linkage if rasterizer uses c++ */
struct GPUFreeze;
# always adding gpulib to deps (XXX might be no longer needed)
# users must include ../../config.mak
-LDFLAGS += -shared -Wl,--no-undefined
-CFLAGS += $(PLUGIN_CFLAGS)
-ifeq "$(ARCH)" "arm"
- EXT =
-else
- #LDLIBS_GPULIB += `sdl-config --libs`
- EXT = .$(ARCH)
+LDFLAGS += -shared
+ifeq ($(GNU_LINKER),1)
+LDFLAGS += -Wl,--no-undefined
endif
+CFLAGS += $(PLUGIN_CFLAGS)
+#LDLIBS_GPULIB += `sdl-config --libs`
ifdef DEBUG
CFLAGS += -O0
endif
-GPULIB_A = ../gpulib/gpulib$(EXT).a
+GPULIB_A = ../gpulib/gpulib.$(ARCH).a
ifdef BIN_STANDALONE
TARGETS += $(BIN_STANDALONE)
--- /dev/null
+/**************************************************************************
+* Copyright (C) 2020 The RetroArch Team *
+* *
+* This program is free software; you can redistribute it and/or modify *
+* it under the terms of the GNU General Public License as published by *
+* the Free Software Foundation; either version 2 of the License, or *
+* (at your option) any later version. *
+* *
+* This program is distributed in the hope that it will be useful, *
+* but WITHOUT ANY WARRANTY; without even the implied warranty of *
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+* GNU General Public License for more details. *
+* *
+* You should have received a copy of the GNU General Public License *
+* along with this program; if not, write to the *
+* Free Software Foundation, Inc., *
+* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. *
+***************************************************************************/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <pthread.h>
+#include "../gpulib/gpu.h"
+#include "../../frontend/plugin_lib.h"
+#include "gpu.h"
+#include "gpu_timing.h"
+#include "gpulib_thread_if.h"
+
+extern void SysPrintf(const char *fmt, ...);
+
+#define FALSE 0
+#define TRUE 1
+#define BOOL unsigned short
+
+typedef struct {
+ uint32_t *cmd_list;
+ int count;
+ int last_cmd;
+} video_thread_cmd;
+
+#define QUEUE_SIZE 0x2000
+
+typedef struct {
+ size_t start;
+ size_t end;
+ size_t used;
+ video_thread_cmd queue[QUEUE_SIZE];
+} video_thread_queue;
+
+typedef struct {
+ pthread_t thread;
+ pthread_mutex_t queue_lock;
+ pthread_cond_t cond_msg_avail;
+ pthread_cond_t cond_msg_done;
+ pthread_cond_t cond_queue_empty;
+ video_thread_queue *queue;
+ video_thread_queue *bg_queue;
+ BOOL running;
+} video_thread_state;
+
+static video_thread_state thread;
+static video_thread_queue queues[2];
+static int thread_rendering;
+static BOOL hold_cmds;
+static BOOL needs_display;
+static BOOL flushed;
+
+extern const unsigned char cmd_lengths[];
+
+static void *video_thread_main(void *arg) {
+ video_thread_cmd *cmd;
+ int i;
+
+#ifdef _3DS
+ static int processed = 0;
+#endif /* _3DS */
+
+#if defined(__arm__) && defined(__ARM_FP)
+ // RunFast mode
+ uint32_t fpscr = ~0;
+ __asm__ volatile("vmrs %0, fpscr" : "=r"(fpscr));
+ fpscr &= ~0x00009f9f;
+ fpscr |= 0x03000000; // DN | FZ
+ __asm__ volatile("vmsr fpscr, %0" :: "r"(fpscr));
+#endif
+
+ while(1) {
+ int result, cycles_dummy = 0, last_cmd, start, end;
+ video_thread_queue *queue;
+ pthread_mutex_lock(&thread.queue_lock);
+
+ while (!thread.queue->used && thread.running) {
+ pthread_cond_wait(&thread.cond_msg_avail, &thread.queue_lock);
+ }
+
+ if (!thread.running) {
+ pthread_mutex_unlock(&thread.queue_lock);
+ break;
+ }
+
+ queue = thread.queue;
+ start = queue->start;
+ end = queue->end > queue->start ? queue->end : QUEUE_SIZE;
+ queue->start = end % QUEUE_SIZE;
+ pthread_mutex_unlock(&thread.queue_lock);
+
+ for (i = start; i < end; i++) {
+ cmd = &queue->queue[i];
+ result = real_do_cmd_list(cmd->cmd_list, cmd->count,
+ &cycles_dummy, &cycles_dummy, &last_cmd);
+ if (result != cmd->count) {
+ fprintf(stderr, "Processed wrong cmd count: expected %d, got %d\n", cmd->count, result);
+ }
+
+#ifdef _3DS
+ /* Periodically yield so as not to starve other threads */
+ processed += cmd->count;
+ if (processed >= 512) {
+ svcSleepThread(1);
+ processed %= 512;
+ }
+#endif /* _3DS */
+ }
+
+ pthread_mutex_lock(&thread.queue_lock);
+ queue->used -= (end - start);
+
+ if (!queue->used)
+ pthread_cond_signal(&thread.cond_queue_empty);
+
+ pthread_cond_signal(&thread.cond_msg_done);
+ pthread_mutex_unlock(&thread.queue_lock);
+ }
+
+ return 0;
+}
+
+static void cmd_queue_swap() {
+ video_thread_queue *tmp;
+ if (!thread.bg_queue->used) return;
+
+ pthread_mutex_lock(&thread.queue_lock);
+ if (!thread.queue->used) {
+ tmp = thread.queue;
+ thread.queue = thread.bg_queue;
+ thread.bg_queue = tmp;
+ pthread_cond_signal(&thread.cond_msg_avail);
+ }
+ pthread_mutex_unlock(&thread.queue_lock);
+}
+
+/* Waits for the main queue to completely finish. */
+void renderer_wait() {
+ if (!thread.running) return;
+
+ /* Not completely safe, but should be fine since the render thread
+ * only decreases used, and we check again inside the lock. */
+ if (!thread.queue->used) {
+ return;
+ }
+
+ pthread_mutex_lock(&thread.queue_lock);
+
+ while (thread.queue->used) {
+ pthread_cond_wait(&thread.cond_queue_empty, &thread.queue_lock);
+ }
+
+ pthread_mutex_unlock(&thread.queue_lock);
+}
+
+/* Waits for all GPU commands in both queues to finish, bringing VRAM
+ * completely up-to-date. */
+void renderer_sync(void) {
+ if (!thread.running) return;
+
+ /* Not completely safe, but should be fine since the render thread
+ * only decreases used, and we check again inside the lock. */
+ if (!thread.queue->used && !thread.bg_queue->used) {
+ return;
+ }
+
+ if (thread.bg_queue->used) {
+ /* When we flush the background queue, the vblank handler can't
+ * know that we had a frame pending, and we delay rendering too
+ * long. Force it. */
+ flushed = TRUE;
+ }
+
+ /* Flush both queues. This is necessary because gpulib could be
+ * trying to process a DMA write that a command in the queue should
+ * run beforehand. For example, Xenogears sprites write a black
+ * rectangle over the to-be-DMA'd spot in VRAM -- if this write
+ * happens after the DMA, it will clear the DMA, resulting in
+ * flickering sprites. We need to be totally up-to-date. This may
+ * drop a frame. */
+ renderer_wait();
+ cmd_queue_swap();
+ hold_cmds = FALSE;
+ renderer_wait();
+}
+
+static void video_thread_stop() {
+ int i;
+ renderer_sync();
+
+ if (thread.running) {
+ thread.running = FALSE;
+ pthread_cond_signal(&thread.cond_msg_avail);
+ pthread_join(thread.thread, NULL);
+ }
+
+ pthread_mutex_destroy(&thread.queue_lock);
+ pthread_cond_destroy(&thread.cond_msg_avail);
+ pthread_cond_destroy(&thread.cond_msg_done);
+ pthread_cond_destroy(&thread.cond_queue_empty);
+
+ for (i = 0; i < QUEUE_SIZE; i++) {
+ video_thread_cmd *cmd = &thread.queue->queue[i];
+ free(cmd->cmd_list);
+ cmd->cmd_list = NULL;
+ }
+
+ for (i = 0; i < QUEUE_SIZE; i++) {
+ video_thread_cmd *cmd = &thread.bg_queue->queue[i];
+ free(cmd->cmd_list);
+ cmd->cmd_list = NULL;
+ }
+}
+
+static void video_thread_start() {
+ SysPrintf("Starting render thread\n");
+
+ thread.queue = &queues[0];
+ thread.bg_queue = &queues[1];
+ thread.running = TRUE;
+
+ if (pthread_cond_init(&thread.cond_msg_avail, NULL) ||
+ pthread_cond_init(&thread.cond_msg_done, NULL) ||
+ pthread_cond_init(&thread.cond_queue_empty, NULL) ||
+ pthread_mutex_init(&thread.queue_lock, NULL) ||
+ pthread_create(&thread.thread, NULL, video_thread_main, &thread)) {
+ goto error;
+ }
+
+ return;
+
+ error:
+ SysPrintf("Failed to start rendering thread\n");
+ thread.running = FALSE;
+ video_thread_stop();
+}
+
+static void video_thread_queue_cmd(uint32_t *list, int count, int last_cmd) {
+ video_thread_cmd *cmd;
+ uint32_t *cmd_list;
+ video_thread_queue *queue;
+ BOOL lock;
+
+ cmd_list = (uint32_t *)calloc(count, sizeof(uint32_t));
+
+ if (!cmd_list) {
+ /* Out of memory, disable the thread and run sync from now on */
+ SysPrintf("Failed to allocate render thread command list, stopping thread\n");
+ video_thread_stop();
+ }
+
+ memcpy(cmd_list, list, count * sizeof(uint32_t));
+
+ if (hold_cmds && thread.bg_queue->used >= QUEUE_SIZE) {
+ /* If the bg queue is full, do a full sync to empty both queues
+ * and clear space. This should be very rare, I've only seen it in
+ * Tekken 3 post-battle-replay. */
+ renderer_sync();
+ }
+
+ if (hold_cmds) {
+ queue = thread.bg_queue;
+ lock = FALSE;
+ } else {
+ queue = thread.queue;
+ lock = TRUE;
+ }
+
+ if (lock) {
+ pthread_mutex_lock(&thread.queue_lock);
+
+ while (queue->used >= QUEUE_SIZE) {
+ pthread_cond_wait(&thread.cond_msg_done, &thread.queue_lock);
+ }
+ }
+
+ cmd = &queue->queue[queue->end];
+ free(cmd->cmd_list);
+ cmd->cmd_list = cmd_list;
+ cmd->count = count;
+ cmd->last_cmd = last_cmd;
+ queue->end = (queue->end + 1) % QUEUE_SIZE;
+ queue->used++;
+
+ if (lock) {
+ pthread_cond_signal(&thread.cond_msg_avail);
+ pthread_mutex_unlock(&thread.queue_lock);
+ }
+}
+
+/* Slice off just the part of the list that can be handled async, and
+ * update ex_regs. */
+static int scan_cmd_list(uint32_t *data, int count,
+ int *cycles_sum_out, int *cycles_last, int *last_cmd)
+{
+ int cpu_cycles_sum = 0, cpu_cycles = *cycles_last;
+ int cmd = 0, pos = 0, len, v;
+
+ while (pos < count) {
+ uint32_t *list = data + pos;
+ short *slist = (void *)list;
+ cmd = LE32TOH(list[0]) >> 24;
+ len = 1 + cmd_lengths[cmd];
+
+ switch (cmd) {
+ case 0x02:
+ gput_sum(cpu_cycles_sum, cpu_cycles,
+ gput_fill(LE16TOH(slist[4]) & 0x3ff,
+ LE16TOH(slist[5]) & 0x1ff));
+ break;
+ case 0x20 ... 0x23:
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base());
+ break;
+ case 0x24 ... 0x27:
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_t());
+ gpu.ex_regs[1] &= ~0x1ff;
+ gpu.ex_regs[1] |= LE32TOH(list[4]) & 0x1ff;
+ break;
+ case 0x28 ... 0x2b:
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base());
+ break;
+ case 0x2c ... 0x2f:
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_t());
+ gpu.ex_regs[1] &= ~0x1ff;
+ gpu.ex_regs[1] |= LE32TOH(list[4]) & 0x1ff;
+ break;
+ case 0x30 ... 0x33:
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_g());
+ break;
+ case 0x34 ... 0x37:
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_poly_base_gt());
+ gpu.ex_regs[1] &= ~0x1ff;
+ gpu.ex_regs[1] |= LE32TOH(list[5]) & 0x1ff;
+ break;
+ case 0x38 ... 0x3b:
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_g());
+ break;
+ case 0x3c ... 0x3f:
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_quad_base_gt());
+ gpu.ex_regs[1] &= ~0x1ff;
+ gpu.ex_regs[1] |= LE32TOH(list[5]) & 0x1ff;
+ break;
+ case 0x40 ... 0x47:
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
+ break;
+ case 0x48 ... 0x4F:
+ for (v = 3; pos + v < count; v++)
+ {
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
+ if ((list[v] & 0xf000f000) == 0x50005000)
+ break;
+ }
+ len += v - 3;
+ break;
+ case 0x50 ... 0x57:
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
+ break;
+ case 0x58 ... 0x5F:
+ for (v = 4; pos + v < count; v += 2)
+ {
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_line(0));
+ if ((list[v] & 0xf000f000) == 0x50005000)
+ break;
+ }
+ len += v - 4;
+ break;
+ case 0x60 ... 0x63:
+ gput_sum(cpu_cycles_sum, cpu_cycles,
+ gput_sprite(LE16TOH(slist[4]) & 0x3ff,
+ LE16TOH(slist[5]) & 0x1ff));
+ break;
+ case 0x64 ... 0x67:
+ gput_sum(cpu_cycles_sum, cpu_cycles,
+ gput_sprite(LE16TOH(slist[6]) & 0x3ff,
+ LE16TOH(slist[7]) & 0x1ff));
+ break;
+ case 0x68 ... 0x6b:
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(1, 1));
+ break;
+ case 0x70 ... 0x77:
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(8, 8));
+ break;
+ case 0x78 ... 0x7f:
+ gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(16, 16));
+ break;
+ default:
+ if ((cmd & 0xf8) == 0xe0)
+ gpu.ex_regs[cmd & 7] = list[0];
+ break;
+ }
+
+ if (pos + len > count) {
+ cmd = -1;
+ break; /* incomplete cmd */
+ }
+ if (0x80 <= cmd && cmd <= 0xdf)
+ break; /* image i/o */
+
+ pos += len;
+ }
+
+ *cycles_sum_out += cpu_cycles_sum;
+ *cycles_last = cpu_cycles;
+ *last_cmd = cmd;
+ return pos;
+}
+
+int do_cmd_list(uint32_t *list, int count,
+ int *cycles_sum, int *cycles_last, int *last_cmd)
+{
+ int pos = 0;
+
+ if (thread.running) {
+ pos = scan_cmd_list(list, count, cycles_sum, cycles_last, last_cmd);
+ video_thread_queue_cmd(list, pos, *last_cmd);
+ } else {
+ pos = real_do_cmd_list(list, count, cycles_sum, cycles_last, last_cmd);
+ memcpy(gpu.ex_regs, gpu.scratch_ex_regs, sizeof(gpu.ex_regs));
+ }
+ return pos;
+}
+
+int renderer_init(void) {
+ if (thread_rendering) {
+ video_thread_start();
+ }
+ return real_renderer_init();
+}
+
+void renderer_finish(void) {
+ real_renderer_finish();
+
+ if (thread_rendering && thread.running) {
+ video_thread_stop();
+ }
+}
+
+void renderer_sync_ecmds(uint32_t * ecmds) {
+ if (thread.running) {
+ int dummy = 0;
+ do_cmd_list(&ecmds[1], 6, &dummy, &dummy, &dummy);
+ } else {
+ real_renderer_sync_ecmds(ecmds);
+ }
+}
+
+void renderer_update_caches(int x, int y, int w, int h, int state_changed) {
+ renderer_sync();
+ real_renderer_update_caches(x, y, w, h, state_changed);
+}
+
+void renderer_flush_queues(void) {
+ /* Called during DMA and updateLace. We want to sync if it's DMA,
+ * but not if it's updateLace. Instead of syncing here, there's a
+ * renderer_sync call during DMA. */
+ real_renderer_flush_queues();
+}
+
+/*
+ * Normally all GPU commands are processed before rendering the
+ * frame. For games that naturally run < 50/60fps, this is unnecessary
+ * -- it forces the game to render as if it was 60fps and leaves the
+ * GPU idle half the time on a 30fps game, for example.
+ *
+ * Allowing the renderer to wait until a frame is done before
+ * rendering it would give it double, triple, or quadruple the amount
+ * of time to finish before we have to wait for it.
+ *
+ * We can use a heuristic to figure out when to force a render.
+ *
+ * - If a frame isn't done when we're asked to render, wait for it and
+ * put future GPU commands in a separate buffer (for the next frame)
+ *
+ * - If the frame is done, and had no future GPU commands, render it.
+ *
+ * - If we do have future GPU commands, it meant the frame took too
+ * long to render and there's another frame waiting. Stop until the
+ * first frame finishes, render it, and start processing the next
+ * one.
+ *
+ * This may possibly add a frame or two of latency that shouldn't be
+ * different than the real device. It may skip rendering a frame
+ * entirely if a VRAM transfer happens while a frame is waiting, or in
+ * games that natively run at 60fps if frames are coming in too
+ * quickly to process. Depending on how the game treats "60fps," this
+ * may not be noticeable.
+ */
+void renderer_notify_update_lace(int updated) {
+ if (!thread.running) return;
+
+ if (thread_rendering == THREAD_RENDERING_SYNC) {
+ renderer_sync();
+ return;
+ }
+
+ if (updated) {
+ cmd_queue_swap();
+ return;
+ }
+
+ pthread_mutex_lock(&thread.queue_lock);
+ if (thread.bg_queue->used || flushed) {
+ /* We have commands for a future frame to run. Force a wait until
+ * the current frame is finished, and start processing the next
+ * frame after it's drawn (see the `updated` clause above). */
+ pthread_mutex_unlock(&thread.queue_lock);
+ renderer_wait();
+ pthread_mutex_lock(&thread.queue_lock);
+
+ /* We are no longer holding commands back, so the next frame may
+ * get mixed into the following frame. This is usually fine, but can
+ * result in frameskip-like effects for 60fps games. */
+ flushed = FALSE;
+ hold_cmds = FALSE;
+ needs_display = TRUE;
+ gpu.state.fb_dirty = TRUE;
+ } else if (thread.queue->used) {
+ /* We are still drawing during a vblank. Cut off the current frame
+ * by sending new commands to the background queue and skip
+ * drawing our partly rendered frame to the display. */
+ hold_cmds = TRUE;
+ needs_display = TRUE;
+ gpu.state.fb_dirty = FALSE;
+ } else if (needs_display && !thread.queue->used) {
+ /* We have processed all commands in the queue, render the
+ * buffer. We know we have something to render, because
+ * needs_display is TRUE. */
+ hold_cmds = FALSE;
+ needs_display = FALSE;
+ gpu.state.fb_dirty = TRUE;
+ } else {
+ /* Everything went normally, so do the normal thing. */
+ }
+
+ pthread_mutex_unlock(&thread.queue_lock);
+}
+
+void renderer_set_interlace(int enable, int is_odd) {
+ real_renderer_set_interlace(enable, is_odd);
+}
+
+void renderer_set_config(const struct rearmed_cbs *cbs) {
+ renderer_sync();
+ thread_rendering = cbs->thread_rendering;
+ if (!thread.running && thread_rendering != THREAD_RENDERING_OFF) {
+ video_thread_start();
+ } else if (thread.running && thread_rendering == THREAD_RENDERING_OFF) {
+ video_thread_stop();
+ }
+ real_renderer_set_config(cbs);
+}
+
+void renderer_notify_res_change(void) {
+ renderer_sync();
+ real_renderer_notify_res_change();
+}
--- /dev/null
+/**************************************************************************
+* Copyright (C) 2020 The RetroArch Team *
+* *
+* This program is free software; you can redistribute it and/or modify *
+* it under the terms of the GNU General Public License as published by *
+* the Free Software Foundation; either version 2 of the License, or *
+* (at your option) any later version. *
+* *
+* This program is distributed in the hope that it will be useful, *
+* but WITHOUT ANY WARRANTY; without even the implied warranty of *
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+* GNU General Public License for more details. *
+* *
+* You should have received a copy of the GNU General Public License *
+* along with this program; if not, write to the *
+* Free Software Foundation, Inc., *
+* 51 Franklin Street, Fifth Floor, Boston, MA 02111-1307 USA. *
+***************************************************************************/
+
+#ifndef __GPULIB_THREAD_H__
+#define __GPULIB_THREAD_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int real_do_cmd_list(uint32_t *list, int count,
+ int *cycles_sum_out, int *cycles_last, int *last_cmd);
+int real_renderer_init(void);
+void real_renderer_finish(void);
+void real_renderer_sync_ecmds(uint32_t * ecmds);
+void real_renderer_update_caches(int x, int y, int w, int h, int state_changed);
+void real_renderer_flush_queues(void);
+void real_renderer_set_interlace(int enable, int is_odd);
+void real_renderer_set_config(const struct rearmed_cbs *config);
+void real_renderer_notify_res_change(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __GPULIB_THREAD_H__ */
--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "../../include/compiler_features.h"
+#include "gpu.h"
+
+// retain neon's ability to sample textures pixel-perfectly
+#ifdef GPU_NEON
+#define STRICT
+#endif
+
+struct vert_t
+{
+ union {
+ struct {
+ int16_t x, y;
+ };
+ uint32_t xy;
+ };
+ union {
+ struct {
+ uint8_t u, v;
+ int16_t clut;
+ };
+ uint32_t uvclut;
+ };
+};
+
+// gt ~ gouraud textured
+struct vert_gt
+{
+ uint32_t rgb;
+ struct vert_t t;
+};
+
+struct quad_t
+{
+ uint32_t rgb_c;
+ struct vert_t v[4];
+};
+
+struct quad_gt
+{
+ struct vert_gt v[4];
+};
+
+struct sprite
+{
+ uint32_t rgb_c;
+ union {
+ struct {
+ int16_t x, y;
+ };
+ uint32_t xy;
+ };
+ union {
+ struct {
+ uint8_t u, v;
+ int16_t clut;
+ };
+ uint32_t uvclut;
+ };
+ int16_t w, h;
+};
+
+// debug
+#if 0
+static void log_quad_t(const struct quad_t *q, int ret)
+{
+#if 1
+ printf("quad_t %08x", q->rgb_c);
+ int i;
+ for (i = 0; i < 4; i++)
+ printf(" | %3d,%3d %3d,%3d",
+ q->v[i].x, q->v[i].y, q->v[i].u, q->v[i].v);
+ printf(" -> %d\n", ret);
+#endif
+}
+
+static void log_quad_gt(const struct vert_gt *v, int ret)
+{
+#if 1
+ printf("quad_gt %02x", v[0].rgb >> 24);
+ int i;
+ for (i = 0; i < 4; i++)
+ printf(" | %3d,%3d %3d,%3d %06x",
+ v[i].t.x, v[i].t.y, v[i].t.u, v[i].t.v, v[i].rgb & 0xffffff);
+ printf(" -> %d\n", ret);
+#endif
+}
+
+int prim_try_simplify_quad_t_(void *simplified, const void *prim_);
+int prim_try_simplify_quad_t(void *simplified, const void *prim_)
+{
+ struct quad_t prim = *(struct quad_t *)prim_;
+ int ret = prim_try_simplify_quad_t_(simplified, prim_);
+ #define prim_try_simplify_quad_t prim_try_simplify_quad_t_
+ ///if (!ret)
+ log_quad_t(&prim, ret);
+ return ret;
+}
+
+int prim_try_simplify_quad_gt_(void *simplified, const void *prim_);
+int prim_try_simplify_quad_gt(void *simplified, const void *prim_)
+{
+ struct quad_gt prim = *(struct quad_gt *)prim_;
+ int ret = prim_try_simplify_quad_gt_(simplified, prim_);
+ #define prim_try_simplify_quad_gt prim_try_simplify_quad_gt_
+ ///if (!ret)
+ log_quad_gt(prim.v, ret);
+ return ret;
+}
+#endif // debug
+
+static noinline int simplify_quad_t(void *simplified, const struct vert_t *v,
+ int xd, int ud, int yd, int vd, uint32_t rgb_c, uint16_t clut)
+{
+ struct sprite *s = simplified;
+ int ret = 1;
+ rgb_c &= HTOLE32(0x03ffffff);
+ rgb_c |= HTOLE32(0x64000000);
+ xd = abs(xd);
+ ud = abs(ud);
+ s[0].rgb_c = rgb_c;
+ s[0].xy = v->xy;
+ s[0].u = v->u;
+ s[0].v = v->v;
+ s[0].clut = clut;
+ s[0].w = HTOLE16(xd);
+ s[0].h = HTOLE16(yd);
+#ifndef STRICT
+ if (xd != ud) {
+ int mid = xd / 2;
+ s[0].w = HTOLE16(mid);
+ s[1].rgb_c = rgb_c;
+ s[1].x = HTOLE16(LE16TOH(s[0].x) + mid);
+ s[1].y = s[0].y;
+ s[1].u = s[0].u + mid + ud - xd;
+ s[1].v = s[0].v;
+ s[1].clut = clut;
+ s[1].w = HTOLE16(xd - mid);
+ s[1].h = s[0].h;
+ ret = 2;
+ }
+ if (yd != vd) {
+ int i, mid = yd / 2, y = LE16TOH(s[0].y);
+ memcpy(s + ret, s, sizeof(s[0]) * ret);
+ for (i = 0; i < ret; i++) {
+ s[i].h = HTOLE16(mid);
+ s[ret+i].y = HTOLE16(y + mid);
+ s[ret+i].h = HTOLE16(yd - mid);
+ s[ret+i].v = s[0].v + mid + vd - yd;
+ }
+ ret *= 2;
+ }
+#endif
+ return ret;
+}
+
+// this is split to reduce gcc spilling
+static noinline int prim_try_simplify_quad_t2(void *simplified,
+ const struct vert_t *v, uint32_t rgb_c)
+{
+ do {
+ int yd = LE16TOH(v[2].y) - LE16TOH(v[0].y);
+ int xd, ud, vd;
+ if (yd < 0)
+ break;
+ xd = LE16TOH(v[1].x) - LE16TOH(v[0].x);
+ ud = LE16TOH(v[1].u) - LE16TOH(v[0].u);
+ vd = LE16TOH(v[2].v) - LE16TOH(v[0].v);
+#ifdef STRICT
+ if (xd != ud || yd != vd)
+#else
+ if (abs(xd - ud) > 1 || abs(yd - vd) > 1)
+#endif
+ break;
+ return simplify_quad_t(simplified, xd < 0 ? &v[1] : &v[0],
+ xd, ud, yd, vd, rgb_c, v[0].clut);
+ }
+ while (0);
+ return 0;
+}
+
+static noinline int prim_try_simplify_quad_gt2(void *simplified,
+ const struct vert_gt *v)
+{
+ do {
+ int yd = LE16TOH(v[2].t.y) - LE16TOH(v[0].t.y);
+ int xd, ud, vd;
+ if (yd < 0)
+ break;
+ xd = LE16TOH(v[1].t.x) - LE16TOH(v[0].t.x);
+ ud = LE16TOH(v[1].t.u) - LE16TOH(v[0].t.u);
+ vd = LE16TOH(v[2].t.v) - LE16TOH(v[0].t.v);
+#ifdef STRICT
+ if (xd != ud || yd != vd)
+#else
+ if (abs(xd - ud) > 1 || abs(yd - vd) > 1)
+#endif
+ break;
+ if (!(v[0].rgb & HTOLE32(1 << 24))) { // modulation/"lighting"
+ uint32_t i, xor = 0, rgb0 = v[0].rgb;
+ for (i = 1; i < 4; i++)
+ xor |= rgb0 ^ v[i].rgb;
+ if (xor & HTOLE32(0xf8f8f8))
+ break;
+ }
+ return simplify_quad_t(simplified, xd < 0 ? &v[1].t : &v[0].t,
+ xd, ud, yd, vd, v[0].rgb, v[0].t.clut);
+ }
+ while (0);
+ return 0;
+}
+
+// 2c-2f
+int prim_try_simplify_quad_t(void *simplified, const void *prim_)
+{
+ const struct quad_t *prim = prim_;
+ const struct vert_t *v = prim->v;
+ int ret = 0;
+ do {
+ if (v[0].y != v[1].y || v[0].x != v[2].x || v[2].y != v[3].y || v[1].x != v[3].x)
+ break;
+ if (v[0].v != v[1].v || v[0].u != v[2].u || v[2].v != v[3].v || v[1].u != v[3].u)
+ break;
+ ret = prim_try_simplify_quad_t2(simplified, v, prim->rgb_c);
+ }
+ while (0);
+ return ret;
+}
+
+// 3c-3f
+int prim_try_simplify_quad_gt(void *simplified, const void *prim)
+{
+ const struct vert_gt *v = prim;
+ int ret = 0;
+ do {
+ if (v[0].t.y != v[1].t.y || v[0].t.x != v[2].t.x || v[2].t.y != v[3].t.y || v[1].t.x != v[3].t.x)
+ break;
+ if (v[0].t.v != v[1].t.v || v[0].t.u != v[2].t.u || v[2].t.v != v[3].t.v || v[1].t.u != v[3].t.u)
+ break;
+ ret = prim_try_simplify_quad_gt2(simplified, v);
+ }
+ while (0);
+ return ret;
+}
+
+// vim:shiftwidth=2:expandtab
h = gpu.screen.h;
w_out = w, h_out = h;
#ifdef RAW_FB_DISPLAY
- w = w_out = 1024, h = h_out = 512;
+ w = w_out = (gpu.status & PSX_GPU_STATUS_RGB24) ? 2048/3 : 1024;
+ h = h_out = 512;
#endif
gpu.state.enhancement_active =
gpu.get_enhancement_bufer != NULL && gpu.state.enhancement_enable
bpp = 24;
}
+ gpu.state.downscale_active =
+ gpu.get_downscale_buffer != NULL && gpu.state.downscale_enable
+ && (w >= 512 || h >= 256);
+
+ if (gpu.state.downscale_active) {
+ w_out = w < 512 ? w : 320;
+ h_out = h < 256 ? h : h / 2;
+ }
+
// width|rgb24 change?
if (force || (gpu.status ^ gpu.state.status_vo_old) & ((7<<16)|(1<<21))
|| w_out != gpu.state.w_out_old || h_out != gpu.state.h_out_old)
}
}
-void vout_update(void)
+int vout_update(void)
{
int bpp = (gpu.status & PSX_GPU_STATUS_RGB24) ? 24 : 16;
uint8_t *vram = (uint8_t *)gpu.vram;
int h = gpu.screen.h;
int vram_h = 512;
int src_x2 = 0;
+ int offset;
#ifdef RAW_FB_DISPLAY
- w = 1024, h = 512, x = src_x = y = src_y = 0;
+ w = (gpu.status & PSX_GPU_STATUS_RGB24) ? 2048/3 : 1024;
+ h = 512, x = src_x = y = src_y = 0;
#endif
if (x < 0) { w += x; src_x2 = -x; x = 0; }
if (y < 0) { h += y; src_y -= y; y = 0; }
if (w <= 0 || h <= 0)
- return;
+ return 0;
check_mode_change(0);
if (gpu.state.enhancement_active) {
if (!gpu.state.enhancement_was_active)
- return; // buffer not ready yet
+ return 0; // buffer not ready yet
vram = gpu.get_enhancement_bufer(&src_x, &src_y, &w, &h, &vram_h);
if (vram == NULL)
- return;
+ return 0;
x *= 2; y *= 2;
src_x2 *= 2;
}
+ if (gpu.state.downscale_active)
+ vram = (void *)gpu.get_downscale_buffer(&src_x, &src_y, &w, &h, &vram_h);
+
if (src_y + h > vram_h) {
if (src_y + h - vram_h > h / 2) {
// wrap
h = vram_h - src_y;
}
- vram += (src_y * 1024 + src_x) * 2;
- vram += src_x2 * bpp / 8;
+ offset = (src_y * 1024 + src_x) * 2;
+ offset += src_x2 * bpp / 8;
- cbs->pl_vout_flip(vram, 1024, !!(gpu.status & PSX_GPU_STATUS_RGB24),
+ cbs->pl_vout_flip(vram, offset, !!(gpu.status & PSX_GPU_STATUS_RGB24),
x, y, w, h, gpu.state.dims_changed);
gpu.state.dims_changed = 0;
+ return 1;
}
void vout_blank(void)
w *= 2;
h *= 2;
}
- cbs->pl_vout_flip(NULL, 1024, !!(gpu.status & PSX_GPU_STATUS_RGB24), 0, 0, w, h, 0);
+ cbs->pl_vout_flip(NULL, 0, !!(gpu.status & PSX_GPU_STATUS_RGB24), 0, 0, w, h, 0);
}
long GPUopen(unsigned long *disp, char *cap, char *cfg)
return 0;
}
-void vout_update(void)
+int vout_update(void)
{
uint32_t *d;
int i;
}
SDL_UnlockSurface(screen);
SDL_UpdateRect(screen, 0, 0, 1024, 512);
+ return 1;
}
void vout_blank(void)
all: ../../config.mak $(TARGET)
+CFLAGS += $(PLUGIN_CFLAGS)
+
$(TARGET): spunull.c
- $(CC) $(CFLAGS) -shared -fPIC -ggdb -O2 -o $@ $^
+ $(CC) $(CFLAGS) -shared -ggdb -O2 -o $@ $^
ln -fs $(PLUGINDIR)/$(TARGET) ../
clean:
////////////////////////////////////////////////////////////////////////\r
-unsigned short CALLBACK SPUreadRegister(unsigned long reg)
+unsigned short CALLBACK SPUreadRegister(unsigned long reg, unsigned int cycles)
{
unsigned long r=reg&0xfff;
// XA AUDIO
////////////////////////////////////////////////////////////////////////
-void CALLBACK SPUplayADPCMchannel(xa_decode_t *xap)
+void CALLBACK SPUplayADPCMchannel(xa_decode_t *xap, unsigned int cycles, int is_start)
{
}
{
}
-int SPUplayCDDAchannel(short *pcm, int nbytes)
+int SPUplayCDDAchannel(short *pcm, int nbytes, unsigned int cycle, int is_start)
{
return -1;
}
{
//SoundOff(16,24,channels);
}
+
+void CALLBACK SPUregisterScheduleCb(void (CALLBACK *callback)(unsigned int))
+{
+}
Supported CD image formats:
- .bin/.cue
- .bin/.toc
+- .chd
- .img/.ccd/.sub
- .mdf/.mds
-- .Z/.Z.table
-- .bz/.bz.table
-- .ZNX/.ZNX.table (partial)
- EBOOT.PBP (PSP, partial)
- .cbn
builtin_gpu - this is either Exophase's ARM NEON GPU (accurate and fast,
available if platform supports NEON, like on pandora),
gpu_peops or gpu_unai (depends on compile options).
+gpu_neon.so - Exophase's ARM NEON GPU with ARMv7 assembly optimizations.
gpu_peops.so - P.E.Op.S. soft GPU, reasonably accurate but slow
(also found with older emulators on PC)
-gpu_unai_old.so- Unai's plugin from PCSX4ALL project. Faster than P.E.Op.S.
- but has some glitches (old version).
gpu_gles.so - experimental port of P.E.Op.S. MesaGL plugin to OpenGL ES.
Occasionally faster but has lots of glitches and seems to
be rather unstable (may crash the driver/system).
Changelog
---------
+r25 (2025-02-26)
+* bug fixes and compatibility improvements
+
r24 (2024-01-22)
* HLE compatibility has been greatly improved
* various compatibility and accuracy improvements