From: notaz Date: Fri, 2 Nov 2012 01:07:05 +0000 (+0200) Subject: Merge branch 'enhancement' X-Git-Tag: r16~15 X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=commitdiff_plain;h=2857d72e4ca743bba3cf55e298949e24d97dff02;hp=0e53ec55691229fee1cbb7c23b305be6a59431c4 Merge branch 'enhancement' Conflicts: frontend/libretro.c --- diff --git a/.gitmodules b/.gitmodules index 650250d6..f93599e3 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ -[submodule "frontend/warm"] +[submodule "libpicofe"] + path = frontend/libpicofe + url = git://notaz.gp2x.de/~notaz/libpicofe.git +[submodule "warm"] path = frontend/warm url = git://notaz.gp2x.de/~notaz/warm.git diff --git a/Makefile b/Makefile index c10f7393..1f3e7361 100644 --- a/Makefile +++ b/Makefile @@ -127,6 +127,14 @@ OBJS += plugins/cdrcimg/cdrcimg.o # dfinput OBJS += plugins/dfinput/main.o plugins/dfinput/pad.o plugins/dfinput/guncon.o +# misc +ifeq "$(HAVE_NEON)" "1" +OBJS += frontend/libpicofe/arm/neon_scale2x.o +OBJS += frontend/libpicofe/arm/neon_eagle2x.o +frontend/libpicofe/arm/neon_scale2x.o: CFLAGS += -DDO_BGR_TO_RGB +frontend/libpicofe/arm/neon_eagle2x.o: CFLAGS += -DDO_BGR_TO_RGB +endif + # gui OBJS += frontend/main.o frontend/plugin.o OBJS += frontend/common/readpng.o frontend/common/fonts.o @@ -178,6 +186,12 @@ endif frontend/%.o: CFLAGS += -DIN_EVDEV frontend/menu.o frontend/main.o frontend/plat_sdl.o: frontend/revision.h +frontend/libpicofe/arm/neon_scale2x.S frontend/libpicofe/menu.c: + @echo "libpicofe module is missing, please run:" + @echo "git submodule init && git submodule update" + @exit 1 + + libpcsxcore/gte_nf.o: libpcsxcore/gte.c $(CC) -c -o $@ $^ $(CFLAGS) -DFLAGLESS @@ -185,7 +199,6 @@ frontend/revision.h: FORCE @(git describe || echo) | sed -e 's/.*/#define REV "\0"/' > $@_ @diff -q $@_ $@ > /dev/null 2>&1 || cp $@_ $@ @rm $@_ -.PHONY: FORCE %.o: %.S $(CC) $(CFLAGS) -c $^ -o $@ @@ -213,9 +226,11 @@ plugins_: clean_plugins: endif +.PHONY: all clean target_ plugins_ clean_plugins FORCE + # ----------- release ----------- -VER ?= $(shell git describe master) +VER ?= $(shell git describe HEAD) ifeq "$(PLATFORM)" "generic" OUT = pcsx_rearmed_$(VER) diff --git a/frontend/common/plat.h b/frontend/common/plat.h index 0a9fc0b2..1fb87676 100644 --- a/frontend/common/plat.h +++ b/frontend/common/plat.h @@ -45,6 +45,10 @@ int plat_is_dir(const char *path); int plat_wait_event(int *fds_hnds, int count, int timeout_ms); void plat_sleep_ms(int ms); +void *plat_mmap(unsigned long addr, size_t size, int need_exec, int is_fixed); +void *plat_mremap(void *ptr, size_t oldsize, size_t newsize); +void plat_munmap(void *ptr, size_t size); + /* timers, to be used for time diff and must refer to the same clock */ unsigned int plat_get_ticks_ms(void); unsigned int plat_get_ticks_us(void); diff --git a/frontend/libpicofe b/frontend/libpicofe new file mode 160000 index 00000000..6ce097ba --- /dev/null +++ b/frontend/libpicofe @@ -0,0 +1 @@ +Subproject commit 6ce097ba2f3cd1c269bacd032b775b6d296433fc diff --git a/frontend/libretro.c b/frontend/libretro.c index 4305aa72..1eb2147f 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -13,6 +13,7 @@ #include "../libpcsxcore/psxcounters.h" #include "../libpcsxcore/new_dynarec/new_dynarec.h" #include "../plugins/dfsound/out.h" +#include "../plugins/gpulib/cspace.h" #include "main.h" #include "plugin.h" #include "plugin_lib.h" @@ -26,7 +27,6 @@ static retro_environment_t environ_cb; static retro_audio_sample_batch_t audio_batch_cb; static void *vout_buf; -static int vout_width, vout_height; static int samples_sent, samples_to_send; static int plugins_opened; static int native_rgb565; @@ -42,14 +42,10 @@ static int vout_open(void) return 0; } -static void *vout_set_mode(int w, int h, int bpp) +static void vout_set_mode(int w, int h, int bpp) { - vout_width = w; - vout_height = h; - return vout_buf; } -/* FIXME: either teach PCSX to blit to RGB1555 or RetroArch to support RGB565 */ static void convert(void *buf, size_t bytes) { unsigned int i, v, *p = buf; @@ -60,14 +56,39 @@ static void convert(void *buf, size_t bytes) } } -static void *vout_flip(void) +static void vout_flip(const void *vram, int stride, int bgr24, int w, int h) { - pl_rearmed_cbs.flip_cnt++; - if (!native_rgb565) - convert(vout_buf, vout_width * vout_height * 2); - video_cb(vout_buf, vout_width, vout_height, vout_width * 2); + unsigned short *dest = vout_buf; + const unsigned short *src = vram; + int dstride = w, h1 = h; + + if (vram == NULL) { + // blanking + memset(pl_vout_buf, 0, dstride * h * 2); + goto out; + } - return vout_buf; + if (bgr24) + { + // XXX: could we switch to RETRO_PIXEL_FORMAT_XRGB8888 here? + for (; h1-- > 0; dest += dstride, src += stride) + { + bgr888_to_rgb565(dest, src, w * 3); + } + } + else + { + for (; h1-- > 0; dest += dstride, src += stride) + { + bgr555_to_rgb565(dest, src, w * 2); + } + } + +out: + if (!native_rgb565) + convert(vout_buf, w * h * 2); + video_cb(vout_buf, w, h, w * 2); + pl_rearmed_cbs.flip_cnt++; } static void vout_close(void) diff --git a/frontend/linux/plat.c b/frontend/linux/plat.c index b7152b55..4ed1e65d 100644 --- a/frontend/linux/plat.c +++ b/frontend/linux/plat.c @@ -17,9 +17,17 @@ #include #include #include +#include #include "../common/plat.h" +/* XXX: maybe unhardcode pagesize? */ +#define HUGETLB_PAGESIZE (2 * 1024 * 1024) +#define HUGETLB_THRESHOLD (HUGETLB_PAGESIZE / 2) +#ifndef MAP_HUGETLB +#define MAP_HUGETLB 0x40000 /* arch specific */ +#endif + int plat_is_dir(const char *path) { @@ -126,16 +134,36 @@ int plat_wait_event(int *fds_hnds, int count, int timeout_ms) return ret; } -void *plat_mmap(unsigned long addr, size_t size) +void *plat_mmap(unsigned long addr, size_t size, int need_exec, int is_fixed) { + static int hugetlb_disabled; + int prot = PROT_READ | PROT_WRITE; + int flags = MAP_PRIVATE | MAP_ANONYMOUS; void *req, *ret; req = (void *)addr; - ret = mmap(req, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (need_exec) + prot |= PROT_EXEC; + if (is_fixed) + flags |= MAP_FIXED; + if (size >= HUGETLB_THRESHOLD && !hugetlb_disabled) + flags |= MAP_HUGETLB; + + ret = mmap(req, size, prot, flags, -1, 0); + if (ret == MAP_FAILED && (flags & MAP_HUGETLB)) { + fprintf(stderr, + "warning: failed to do hugetlb mmap (%p, %zu): %d\n", + req, size, errno); + hugetlb_disabled = 1; + flags &= ~MAP_HUGETLB; + ret = mmap(req, size, prot, flags, -1, 0); + } if (ret == MAP_FAILED) return NULL; - if (ret != req) - printf("warning: mmaped to %p, requested %p\n", ret, req); + + if (req != NULL && ret != req) + fprintf(stderr, + "warning: mmaped to %p, requested %p\n", ret, req); return ret; } @@ -155,7 +183,18 @@ void *plat_mremap(void *ptr, size_t oldsize, size_t newsize) void plat_munmap(void *ptr, size_t size) { - munmap(ptr, size); + int ret; + + ret = munmap(ptr, size); + if (ret != 0 && (size & (HUGETLB_PAGESIZE - 1))) { + // prehaps an autorounded hugetlb mapping? + size = (size + HUGETLB_PAGESIZE - 1) & ~(HUGETLB_PAGESIZE - 1); + ret = munmap(ptr, size); + } + if (ret != 0) { + fprintf(stderr, + "munmap(%p, %zu) failed: %d\n", ptr, size, errno); + } } /* lprintf */ diff --git a/frontend/main.c b/frontend/main.c index 19e8319b..56b5cb76 100644 --- a/frontend/main.c +++ b/frontend/main.c @@ -143,6 +143,8 @@ void emu_set_default_config(void) Config.PsxAuto = 1; pl_rearmed_cbs.gpu_neon.allow_interlace = 2; // auto + pl_rearmed_cbs.gpu_neon.enhancement_enable = + pl_rearmed_cbs.gpu_neon.enhancement_no_main = 0; pl_rearmed_cbs.gpu_peops.iUseDither = 0; pl_rearmed_cbs.gpu_peops.dwActFixes = 1<<7; pl_rearmed_cbs.gpu_unai.abe_hack = @@ -230,6 +232,14 @@ do_state_slot: pl_rearmed_cbs.frameskip == 0 ? "OFF" : "1" ); plugin_call_rearmed_cbs(); break; + case SACTION_SWITCH_DISPMODE: + pl_switch_dispmode(); + plugin_call_rearmed_cbs(); + if (GPU_open != NULL && GPU_close != NULL) { + GPU_close(); + GPU_open(&gpuDisp, "PCSX", NULL); + } + break; case SACTION_SCREENSHOT: { char buf[MAXPATHLEN]; diff --git a/frontend/main.h b/frontend/main.h index bdb48702..a03db8b2 100644 --- a/frontend/main.h +++ b/frontend/main.h @@ -65,6 +65,7 @@ enum sched_action { SACTION_NEXT_SSLOT, SACTION_PREV_SSLOT, SACTION_TOGGLE_FSKIP, + SACTION_SWITCH_DISPMODE, SACTION_SCREENSHOT, SACTION_VOLUME_UP, SACTION_VOLUME_DOWN, diff --git a/frontend/menu.c b/frontend/menu.c index 42a53e1d..d3ce06c3 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -75,6 +75,7 @@ typedef enum MA_OPT_SCALER, MA_OPT_SCALER2, MA_OPT_FILTERING, + MA_OPT_FILTERING2, MA_OPT_SCALER_C, } menu_id; @@ -87,7 +88,7 @@ static int psx_clock; static int memcard1_sel, memcard2_sel; int g_opts, g_scaler; int soft_scaling, analog_deadzone; // for Caanoo -int filter; +int filter, soft_filter; #ifdef __ARM_ARCH_7A__ #define DEFAULT_PSX_CLOCK 57 @@ -213,6 +214,7 @@ static void menu_set_defconfig(void) frameskip = 0; analog_deadzone = 50; soft_scaling = 1; + soft_filter = 0; psx_clock = DEFAULT_PSX_CLOCK; region = 0; @@ -274,6 +276,7 @@ static const struct { CE_INTVAL(g_layer_w), CE_INTVAL(g_layer_h), CE_INTVAL(filter), + CE_INTVAL(soft_filter), CE_INTVAL(state_slot), CE_INTVAL(cpu_clock), CE_INTVAL(g_opts), @@ -288,6 +291,8 @@ static const struct { CE_INTVAL_P(gpu_unai.no_light), CE_INTVAL_P(gpu_unai.no_blend), CE_INTVAL_P(gpu_neon.allow_interlace), + CE_INTVAL_P(gpu_neon.enhancement_enable), + CE_INTVAL_P(gpu_neon.enhancement_no_main), CE_INTVAL_P(gpu_peopsgl.bDrawDither), CE_INTVAL_P(gpu_peopsgl.iFilterType), CE_INTVAL_P(gpu_peopsgl.iFrameTexType), @@ -661,6 +666,7 @@ me_bind_action emuctrl_actions[] = { "Next Save Slot ", 1 << SACTION_NEXT_SSLOT }, { "Toggle Frameskip ", 1 << SACTION_TOGGLE_FSKIP }, { "Take Screenshot ", 1 << SACTION_SCREENSHOT }, + { "Switch Renderer ", 1 << SACTION_SWITCH_DISPMODE }, { "Enter Menu ", 1 << SACTION_ENTER_MENU }, #ifdef __ARM_ARCH_7A__ /* XXX */ { "Minimize ", 1 << SACTION_MINIMIZE }, @@ -1029,9 +1035,15 @@ static int menu_loop_keyconfig(int id, int keys) // ------------ gfx options menu ------------ static const char *men_scaler[] = { "1x1", "scaled 4:3", "integer scaled 4:3", "fullscreen", "custom", NULL }; +static const char *men_soft_filter[] = { "None", +#ifdef __ARM_NEON__ + "scale2x", "eagle2x", +#endif + NULL }; +static const char *men_dummy[] = { NULL }; static const char h_cscaler[] = "Displays the scaler layer, you can resize it\n" "using d-pad or move it using R+d-pad"; -static const char *men_dummy[] = { NULL }; +static const char h_soft_filter[] = "Works only if game uses low resolution modes"; static int menu_loop_cscaler(int id, int keys) { @@ -1090,6 +1102,7 @@ static menu_entry e_menu_gfx_options[] = mee_enum ("Scaler", MA_OPT_SCALER, g_scaler, men_scaler), mee_onoff ("Software Scaling", MA_OPT_SCALER2, soft_scaling, 1), mee_enum ("Filter", MA_OPT_FILTERING, filter, men_dummy), + mee_enum_h ("Software Filter", MA_OPT_FILTERING2, soft_filter, men_soft_filter, h_soft_filter), // mee_onoff ("Vsync", 0, vsync, 1), mee_cust_h ("Setup custom scaler", MA_OPT_SCALER_C, menu_loop_cscaler, NULL, h_cscaler), mee_end, @@ -1118,18 +1131,26 @@ void menu_set_filter_list(void *filters) #ifdef __ARM_NEON__ -static const char h_gpu_neon[] = "Configure built-in NEON GPU plugin"; +static const char h_gpu_neon[] = + "Configure built-in NEON GPU plugin"; +static const char h_gpu_neon_enhanced[] = + "Renders in double resolution at the cost of lower performance\n" + "(not available for high resolution games)"; +static const char h_gpu_neon_enhanced_hack[] = + "Speed hack for above option (glitches some games)"; static const char *men_gpu_interlace[] = { "Off", "On", "Auto", NULL }; static menu_entry e_menu_plugin_gpu_neon[] = { mee_enum ("Enable interlace mode", 0, pl_rearmed_cbs.gpu_neon.allow_interlace, men_gpu_interlace), + mee_onoff_h ("Enhanced resolution (slow)", 0, pl_rearmed_cbs.gpu_neon.enhancement_enable, 1, h_gpu_neon_enhanced), + mee_onoff_h ("Enhanced res. speed hack", 0, pl_rearmed_cbs.gpu_neon.enhancement_no_main, 1, h_gpu_neon_enhanced_hack), mee_end, }; static int menu_loop_plugin_gpu_neon(int id, int keys) { - int sel = 0; + static int sel = 0; me_loop(e_menu_plugin_gpu_neon, &sel); return 0; } @@ -2247,6 +2268,7 @@ void menu_init(void) #ifndef __ARM_ARCH_7A__ /* XXX */ me_enable(e_menu_gfx_options, MA_OPT_SCALER, 0); me_enable(e_menu_gfx_options, MA_OPT_FILTERING, 0); + me_enable(e_menu_gfx_options, MA_OPT_FILTERING2, 0); me_enable(e_menu_gfx_options, MA_OPT_SCALER_C, 0); me_enable(e_menu_keyconfig, MA_CTRL_NUBS_BTNS, 0); #else diff --git a/frontend/menu.h b/frontend/menu.h index 2062acdf..221be15c 100644 --- a/frontend/menu.h +++ b/frontend/menu.h @@ -22,9 +22,15 @@ enum g_scaler_opts { SCALE_CUSTOM, }; +enum g_soft_filter_opts { + SOFT_FILTER_NONE, + SOFT_FILTER_SCALE2X, + SOFT_FILTER_EAGLE2X, +}; + extern int g_opts, g_scaler; extern int soft_scaling, analog_deadzone; -extern int filter; +extern int filter, soft_filter; extern int g_menuscreen_w; extern int g_menuscreen_h; diff --git a/frontend/pandora/pcsx.sh b/frontend/pandora/pcsx.sh index 0957b945..bc1d6c53 100755 --- a/frontend/pandora/pcsx.sh +++ b/frontend/pandora/pcsx.sh @@ -5,10 +5,15 @@ nub0mode=`cat /proc/pandora/nub0/mode` nub1mode=`cat /proc/pandora/nub1/mode` /usr/pandora/scripts/op_nubchange.sh absolute absolute +# 4MB for RAM (2+align) + 2MB for vram (1+overdraw) + 10MB for gpu_neon (8+overdraw) +# no big deal if this fails, only performance loss +sudo -n /usr/pandora/scripts/op_hugetlb.sh 16 + ./pcsx "$@" # restore stuff if pcsx crashes ./picorestore sudo -n /usr/pandora/scripts/op_lcdrate.sh 60 +sudo -n /usr/pandora/scripts/op_hugetlb.sh 0 /usr/pandora/scripts/op_nubchange.sh $nub0mode $nub1mode diff --git a/frontend/plat_omap.c b/frontend/plat_omap.c index b01c6343..e5b6c04e 100644 --- a/frontend/plat_omap.c +++ b/frontend/plat_omap.c @@ -52,8 +52,9 @@ static int omap_setup_layer_(int fd, int enabled, int x, int y, int w, int h) perror("SETUP_PLANE"); } - if (mi.size < 640*512*3*3) { - mi.size = 640*512*3*3; + // upto 1024x512 (2x resolution enhancement) + if (mi.size < 1024*512*2 * 3) { + mi.size = 1024*512*2 * 3; ret = ioctl(fd, OMAPFB_SETUP_MEM, &mi); if (ret != 0) { perror("SETUP_MEM"); diff --git a/frontend/plat_pandora.c b/frontend/plat_pandora.c index 9ec747db..b82450c6 100644 --- a/frontend/plat_pandora.c +++ b/frontend/plat_pandora.c @@ -65,6 +65,7 @@ static const struct in_default_bind in_evdev_defbinds[] = { { KEY_4, IN_BINDTYPE_EMU, SACTION_NEXT_SSLOT }, { KEY_5, IN_BINDTYPE_EMU, SACTION_TOGGLE_FSKIP }, { KEY_6, IN_BINDTYPE_EMU, SACTION_SCREENSHOT }, + { KEY_7, IN_BINDTYPE_EMU, SACTION_SWITCH_DISPMODE }, { 0, 0, 0 } }; diff --git a/frontend/plat_pollux.c b/frontend/plat_pollux.c index 1dafb7cd..52a09b14 100644 --- a/frontend/plat_pollux.c +++ b/frontend/plat_pollux.c @@ -305,12 +305,13 @@ static void spend_cycles(int loops) #define DMA_REG(x) memregl[(DMA_BASE6 + x) >> 2] /* this takes ~1.5ms, while ldm/stm ~1.95ms */ -static void raw_flip_dma(int x, int y) +static void raw_flip_dma(const void *vram, int stride, int bgr24, int w, int h) { + unsigned int pixel_offset = psx_vram - (unsigned short *)vram; unsigned int dst = fb_paddrs[fb_work_buf] + (fb_offset_y * 320 + fb_offset_x) * psx_bpp / 8; - int spsx_line = y + psx_offset_y; - int spsx_offset = (x + psx_offset_x) & 0x3f8; + int spsx_line = pixel_offset / 1024 + psx_offset_y; + int spsx_offset = (pixel_offset + psx_offset_x) & 0x3f8; int dst_stride = 320 * psx_bpp / 8; int len = psx_src_width * psx_bpp / 8; int i; @@ -344,7 +345,7 @@ static void raw_flip_dma(int x, int y) if (psx_bpp == 16) { pl_vout_buf = g_menuscreen_ptr; - pl_print_hud(fb_offset_x); + pl_print_hud(w, h, fb_offset_x); } g_menuscreen_ptr = fb_flip(); @@ -354,26 +355,24 @@ static void raw_flip_dma(int x, int y) } #define make_flip_func(name, blitfunc) \ -static void name(int x, int y) \ +static void name(const void *vram_, int stride, int bgr24, int w, int h) \ { \ - unsigned short *vram = psx_vram; \ + const unsigned short *vram = vram_; \ unsigned char *dst = (unsigned char *)g_menuscreen_ptr + \ (fb_offset_y * 320 + fb_offset_x) * psx_bpp / 8; \ - unsigned int src = (y + psx_offset_y) * 1024 + x + psx_offset_x; \ int dst_stride = 320 * psx_bpp / 8; \ int len = psx_src_width * psx_bpp / 8; \ int i; \ \ pcnt_start(PCNT_BLIT); \ \ - for (i = psx_src_height; i > 0; i--, src += psx_step * 1024, dst += dst_stride) { \ - src &= 1024*512-1; \ - blitfunc(dst, vram + src, len); \ - } \ + vram += psx_offset_y * 1024 + psx_offset_x; \ + for (i = psx_src_height; i > 0; i--, vram += psx_step * 1024, dst += dst_stride)\ + blitfunc(dst, vram, len); \ \ if (psx_bpp == 16) { \ pl_vout_buf = g_menuscreen_ptr; \ - pl_print_hud(fb_offset_x); \ + pl_print_hud(w, h, fb_offset_x); \ } \ \ g_menuscreen_ptr = fb_flip(); \ @@ -402,20 +401,20 @@ void *plat_gvideo_set_mode(int *w_, int *h_, int *bpp_) switch (w + (bpp != 16) + !soft_scaling) { case 640: - pl_rearmed_cbs.pl_vout_raw_flip = raw_flip_soft_640; + pl_rearmed_cbs.pl_vout_flip = raw_flip_soft_640; w_max = 640; break; case 512: - pl_rearmed_cbs.pl_vout_raw_flip = raw_flip_soft_512; + pl_rearmed_cbs.pl_vout_flip = raw_flip_soft_512; w_max = 512; break; case 384: case 368: - pl_rearmed_cbs.pl_vout_raw_flip = raw_flip_soft_368; + pl_rearmed_cbs.pl_vout_flip = raw_flip_soft_368; w_max = 368; break; default: - pl_rearmed_cbs.pl_vout_raw_flip = have_warm ? raw_flip_dma : raw_flip_soft; + pl_rearmed_cbs.pl_vout_flip = have_warm ? raw_flip_dma : raw_flip_soft; w_max = 320; break; } @@ -621,7 +620,7 @@ void plat_init(void) if (mixerdev == -1) perror("open(/dev/mixer)"); - pl_rearmed_cbs.pl_vout_raw_flip = have_warm ? raw_flip_dma : raw_flip_soft; + pl_rearmed_cbs.pl_vout_flip = have_warm ? raw_flip_dma : raw_flip_soft; pl_rearmed_cbs.pl_vout_set_raw_vram = pl_vout_set_raw_vram; psx_src_width = 320; diff --git a/frontend/plugin_lib.c b/frontend/plugin_lib.c index 4dbb9a7d..3ee59472 100644 --- a/frontend/plugin_lib.c +++ b/frontend/plugin_lib.c @@ -21,13 +21,17 @@ #include "linux/fbdev.h" #include "common/fonts.h" #include "common/input.h" +#include "common/plat.h" #include "menu.h" #include "main.h" #include "plat.h" #include "pcnt.h" #include "pl_gun_ts.h" +#include "libpicofe/arm/neon_scale2x.h" +#include "libpicofe/arm/neon_eagle2x.h" #include "../libpcsxcore/new_dynarec/new_dynarec.h" #include "../libpcsxcore/psemu_plugin_defs.h" +#include "../plugins/gpulib/cspace.h" int in_type1, in_type2; int in_a1[2] = { 127, 127 }, in_a2[2] = { 127, 127 }; @@ -38,6 +42,7 @@ void *tsdev; void *pl_vout_buf; int g_layer_x, g_layer_y, g_layer_w, g_layer_h; static int pl_vout_w, pl_vout_h, pl_vout_bpp; /* output display/layer */ +static int pl_vout_scale; static int psx_w, psx_h, psx_bpp; static int vsync_cnt; static int is_pal, frame_interval, frame_interval1024; @@ -113,10 +118,8 @@ static __attribute__((noinline)) void draw_active_chans(int vout_w, int vout_h) } } -void pl_print_hud(int xborder) +void pl_print_hud(int w, int h, int xborder) { - int w = pl_vout_w, h = pl_vout_h; - if (h < 16) return; @@ -184,55 +187,142 @@ static void update_layer_size(int w, int h) if (g_layer_h > g_menuscreen_h) g_layer_h = g_menuscreen_h; } -static void *pl_vout_set_mode(int w, int h, int bpp) +// XXX: this is platform specific really +static int resolution_ok(int w, int h) { + return w <= 1024 && h <= 512; +} + +static void pl_vout_set_mode(int w, int h, int bpp) +{ + int vout_w, vout_h, vout_bpp; + // special h handling, Wipeout likes to change it by 1-6 static int vsync_cnt_ms_prev; if ((unsigned int)(vsync_cnt - vsync_cnt_ms_prev) < 5*60) h = (h + 7) & ~7; vsync_cnt_ms_prev = vsync_cnt; - if (w == psx_w && h == psx_h && bpp == psx_bpp) - return pl_vout_buf; + vout_w = psx_w = w; + vout_h = psx_h = h; + vout_bpp = psx_bpp = bpp; + + pl_vout_scale = 1; +#ifdef __ARM_NEON__ + if (soft_filter) { + if (resolution_ok(w * 2, h * 2) && bpp == 16) { + vout_w *= 2; + vout_h *= 2; + pl_vout_scale = 2; + } + else { + // filter unavailable + hud_msg[0] = 0; + } + } +#endif - pl_vout_w = psx_w = w; - pl_vout_h = psx_h = h; - pl_vout_bpp = psx_bpp = bpp; + if (pl_vout_buf != NULL && vout_w == pl_vout_w && vout_h == pl_vout_h + && vout_bpp == pl_vout_bpp) + return; - update_layer_size(pl_vout_w, pl_vout_h); + update_layer_size(vout_w, vout_h); - pl_vout_buf = plat_gvideo_set_mode(&pl_vout_w, &pl_vout_h, &pl_vout_bpp); - if (pl_vout_buf == NULL && pl_rearmed_cbs.pl_vout_raw_flip == NULL) + pl_vout_buf = plat_gvideo_set_mode(&vout_w, &vout_h, &vout_bpp); + if (pl_vout_buf == NULL) fprintf(stderr, "failed to set mode %dx%d@%d\n", psx_w, psx_h, psx_bpp); + else { + pl_vout_w = vout_w; + pl_vout_h = vout_h; + pl_vout_bpp = vout_bpp; + } menu_notify_mode_change(pl_vout_w, pl_vout_h, pl_vout_bpp); - - return pl_vout_buf; } -// only used if raw flip is not defined -static void *pl_vout_flip(void) +static void pl_vout_flip(const void *vram, int stride, int bgr24, int w, int h) { - pl_rearmed_cbs.flip_cnt++; + static int doffs_old, clear_counter; + unsigned char *dest = pl_vout_buf; + const unsigned short *src = vram; + int dstride = pl_vout_w, h1 = h; + int doffs; + + if (dest == NULL) + goto out; + + if (vram == NULL) { + // blanking + memset(pl_vout_buf, 0, dstride * pl_vout_h * pl_vout_bpp / 8); + goto out; + } + + // borders + doffs = (dstride - w * pl_vout_scale) / 2 & ~1; + dest += doffs * 2; + + if (doffs > doffs_old) + clear_counter = 2; + doffs_old = doffs; + + if (clear_counter > 0) { + memset(pl_vout_buf, 0, dstride * pl_vout_h * pl_vout_bpp / 8); + clear_counter--; + } - if (pl_vout_buf != NULL) - pl_print_hud(0); + if (bgr24) + { + if (pl_rearmed_cbs.only_16bpp) { + for (; h1-- > 0; dest += dstride * 2, src += stride) + { + bgr888_to_rgb565(dest, src, w * 3); + } + } + else { + dest -= doffs * 2; + dest += (doffs / 8) * 24; + for (; h1-- > 0; dest += dstride * 3, src += stride) + { + bgr888_to_rgb888(dest, src, w * 3); + } + } + } +#ifdef __ARM_NEON__ + else if (soft_filter == SOFT_FILTER_SCALE2X && pl_vout_scale == 2) + { + neon_scale2x_16_16(src, (void *)dest, w, + stride * 2, dstride * 2, h1); + } + else if (soft_filter == SOFT_FILTER_EAGLE2X && pl_vout_scale == 2) + { + neon_eagle2x_16_16(src, (void *)dest, w, + stride * 2, dstride * 2, h1); + } +#endif + else + { + for (; h1-- > 0; dest += dstride * 2, src += stride) + { + bgr555_to_rgb565(dest, src, w * 2); + } + } + + pl_print_hud(w * pl_vout_scale, h * pl_vout_scale, 0); + +out: // let's flip now pl_vout_buf = plat_gvideo_flip(); - return pl_vout_buf; + pl_rearmed_cbs.flip_cnt++; } static int pl_vout_open(void) { struct timeval now; - int h; - // force mode update - h = psx_h; - psx_h--; - pl_vout_buf = pl_vout_set_mode(psx_w, h, psx_bpp); + // force mode update on pl_vout_set_mode() call from gpulib/vout_pl + pl_vout_buf = NULL; plat_gvideo_open(is_pal); @@ -249,6 +339,11 @@ static void pl_vout_close(void) plat_gvideo_close(); } +static void pl_set_gpu_caps(int caps) +{ + pl_rearmed_cbs.gpu_caps = caps; +} + void *pl_prepare_screenshot(int *w, int *h, int *bpp) { void *ret = plat_prepare_screenshot(w, h, bpp); @@ -262,6 +357,75 @@ void *pl_prepare_screenshot(int *w, int *h, int *bpp) return pl_vout_buf; } +/* display/redering mode switcher */ +static int dispmode_default(void) +{ + pl_rearmed_cbs.gpu_neon.enhancement_enable = 0; + soft_filter = SOFT_FILTER_NONE; + snprintf(hud_msg, sizeof(hud_msg), "default mode"); + return 1; +} + +int dispmode_doubleres(void) +{ + if (!(pl_rearmed_cbs.gpu_caps & GPU_CAP_SUPPORTS_2X) + || !resolution_ok(psx_w * 2, psx_h * 2) || psx_bpp != 16) + return 0; + + dispmode_default(); + pl_rearmed_cbs.gpu_neon.enhancement_enable = 1; + snprintf(hud_msg, sizeof(hud_msg), "double resolution"); + return 1; +} + +int dispmode_scale2x(void) +{ + if (psx_bpp != 16) + return 0; + + dispmode_default(); + soft_filter = SOFT_FILTER_SCALE2X; + snprintf(hud_msg, sizeof(hud_msg), "scale2x"); + return 1; +} + +int dispmode_eagle2x(void) +{ + if (psx_bpp != 16) + return 0; + + dispmode_default(); + soft_filter = SOFT_FILTER_EAGLE2X; + snprintf(hud_msg, sizeof(hud_msg), "eagle2x"); + return 1; +} + +static int (*dispmode_switchers[])(void) = { + dispmode_default, +#ifdef __ARM_NEON__ + dispmode_doubleres, + dispmode_scale2x, + dispmode_eagle2x, +#endif +}; + +static int dispmode_current; + +void pl_switch_dispmode(void) +{ + if (pl_rearmed_cbs.gpu_caps & GPU_CAP_OWNS_DISPLAY) + return; + + while (1) { + dispmode_current++; + if (dispmode_current >= + sizeof(dispmode_switchers) / sizeof(dispmode_switchers[0])) + dispmode_current = 0; + if (dispmode_switchers[dispmode_current]()) + break; + } +} + #ifndef MAEMO static void update_analogs(void) { @@ -442,16 +606,31 @@ void pl_timing_prepare(int is_pal_) static void pl_text_out16_(int x, int y, const char *text) { - int i, l, len = strlen(text), w = pl_vout_w; - unsigned short *screen = (unsigned short *)pl_vout_buf + x + y * w; + int i, l, w = pl_vout_w; + unsigned short *screen; unsigned short val = 0xffff; - for (i = 0; i < len; i++, screen += 8) + x &= ~1; + screen = (unsigned short *)pl_vout_buf + x + y * w; + for (i = 0; ; i++, screen += 8) { + char c = text[i]; + if (c == 0) + break; + if (c == ' ') + continue; + for (l = 0; l < 8; l++) { - unsigned char fd = fontdata8x8[text[i] * 8 + l]; + unsigned char fd = fontdata8x8[c * 8 + l]; unsigned short *s = screen + l * w; + unsigned int *s32 = (void *)s; + + s32[0] = (s32[0] >> 1) & 0x7bef7bef; + s32[1] = (s32[1] >> 1) & 0x7bef7bef; + s32[2] = (s32[2] >> 1) & 0x7bef7bef; + s32[3] = (s32[3] >> 1) & 0x7bef7bef; + if (fd&0x80) s[0] = val; if (fd&0x40) s[1] = val; if (fd&0x20) s[2] = val; @@ -484,12 +663,26 @@ static void pl_get_layer_pos(int *x, int *y, int *w, int *h) *h = g_layer_h; } +static void *pl_mmap(unsigned int size) +{ + return plat_mmap(0, size, 0, 0); +} + +static void pl_munmap(void *ptr, unsigned int size) +{ + plat_munmap(ptr, size); +} + struct rearmed_cbs pl_rearmed_cbs = { pl_get_layer_pos, pl_vout_open, pl_vout_set_mode, pl_vout_flip, pl_vout_close, + + .mmap = pl_mmap, + .munmap = pl_munmap, + .pl_set_gpu_caps = pl_set_gpu_caps, }; /* watchdog */ diff --git a/frontend/plugin_lib.h b/frontend/plugin_lib.h index bcf74acc..332fbc2e 100644 --- a/frontend/plugin_lib.h +++ b/frontend/plugin_lib.h @@ -31,7 +31,8 @@ void pl_text_out16(int x, int y, const char *texto, ...); void pl_start_watchdog(void); void *pl_prepare_screenshot(int *w, int *h, int *bpp); void pl_init(void); -void pl_print_hud(int xborder); +void pl_print_hud(int width, int height, int xborder); +void pl_switch_dispmode(void); void pl_timing_prepare(int is_pal); void pl_frame_limit(void); @@ -41,12 +42,15 @@ void pl_update_gun(int *xn, int *xres, int *y, int *in); struct rearmed_cbs { void (*pl_get_layer_pos)(int *x, int *y, int *w, int *h); int (*pl_vout_open)(void); - void *(*pl_vout_set_mode)(int w, int h, int bpp); - void *(*pl_vout_flip)(void); + void (*pl_vout_set_mode)(int w, int h, int bpp); + void (*pl_vout_flip)(const void *vram, int stride, int bgr24, + int w, int h); void (*pl_vout_close)(void); - // these are only used by some frontends - void (*pl_vout_raw_flip)(int x, int y); + void *(*mmap)(unsigned int size); + void (*munmap)(void *ptr, unsigned int size); + // only used by some frontends void (*pl_vout_set_raw_vram)(void *vram); + void (*pl_set_gpu_caps)(int caps); // some stats, for display by some plugins int flips_per_sec, cpu_usage; float vsps_cur; // currect vsync/s @@ -60,6 +64,8 @@ struct rearmed_cbs { unsigned int only_16bpp; // platform is 16bpp-only struct { int allow_interlace; // 0 off, 1 on, 2 guess + int enhancement_enable; + int enhancement_no_main; } gpu_neon; struct { int iUseDither; @@ -78,10 +84,17 @@ struct rearmed_cbs { int iUseMask, bOpaquePass, bAdvancedBlend, bUseFastMdec; int iVRamSize, iTexGarbageCollection; } gpu_peopsgl; + // misc + int gpu_caps; }; extern struct rearmed_cbs pl_rearmed_cbs; +enum gpu_plugin_caps { + GPU_CAP_OWNS_DISPLAY = (1 << 0), + GPU_CAP_SUPPORTS_2X = (1 << 1), +}; + #ifndef ARRAY_SIZE #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) #endif diff --git a/libpcsxcore/psxmem.c b/libpcsxcore/psxmem.c index 1cabd53c..ddcd05be 100644 --- a/libpcsxcore/psxmem.c +++ b/libpcsxcore/psxmem.c @@ -60,6 +60,16 @@ u8 **psxMemRLUT = NULL; 0xbfc0_0000-0xbfc7_ffff BIOS Mirror (512K) Uncached */ +#if 1 +void *plat_mmap(unsigned long addr, size_t size, int need_exec, int is_fixed); +void plat_munmap(void *ptr, size_t size); +#else +#define plat_mmap(addr, size, need_exec, is_fixed) \ + mmap((void *)addr, size, PROT_WRITE | PROT_READ, \ + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) +#define plat_munmap munmap +#endif + int psxMemInit() { int i; @@ -68,8 +78,7 @@ int psxMemInit() { memset(psxMemRLUT, 0, 0x10000 * sizeof(void *)); memset(psxMemWLUT, 0, 0x10000 * sizeof(void *)); - psxM = mmap((void *)0x80000000, 0x00210000, - PROT_WRITE | PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + psxM = plat_mmap(0x80000000, 0x00210000, 0, 1); #ifndef RAM_FIXED if (psxM == MAP_FAILED) psxM = mmap((void *)0x70000000, 0x00210000, @@ -144,7 +153,7 @@ void psxMemReset() { } void psxMemShutdown() { - munmap(psxM, 0x00210000); + plat_munmap(psxM, 0x00210000); munmap(psxH, 0x1f800000); munmap(psxR, 0x80000); diff --git a/plugins/dfxvideo/draw_pl.c b/plugins/dfxvideo/draw_pl.c index dffd52b4..06a635da 100644 --- a/plugins/dfxvideo/draw_pl.c +++ b/plugins/dfxvideo/draw_pl.c @@ -19,56 +19,26 @@ BOOL bCheckMask = FALSE; unsigned short sSetMask; unsigned long lSetMask; -static void blit(void *vout_buf) +static void blit(void) { int px = PSXDisplay.DisplayPosition.x & ~1; // XXX: align needed by bgr*_to_... int py = PSXDisplay.DisplayPosition.y; int w = PreviousPSXDisplay.Range.x1; int h = PreviousPSXDisplay.DisplayMode.y; - int pitch = PreviousPSXDisplay.DisplayMode.x; unsigned short *srcs = psxVuw + py * 1024 + px; - unsigned char *dest = vout_buf; if (w <= 0) return; - pitch *= (PSXDisplay.RGB24 && !rcbs->only_16bpp) ? 3 : 2; - // account for centering h -= PreviousPSXDisplay.Range.y0; - dest += PreviousPSXDisplay.Range.y0 / 2 * pitch; - dest += (PreviousPSXDisplay.Range.x0 & ~3) * 2; // must align here too.. - - if (PSXDisplay.RGB24) - { - if (!rcbs->only_16bpp) - { - for (; h-- > 0; dest += pitch, srcs += 1024) - { - bgr888_to_rgb888(dest, srcs, w * 3); - } - } - else - { - for (; h-- > 0; dest += pitch, srcs += 1024) - { - bgr888_to_rgb565(dest, srcs, w * 3); - } - } - } - else - { - for (; h-- > 0; dest += pitch, srcs += 1024) - { - bgr555_to_rgb565(dest, srcs, w * 2); - } - } + + rcbs->pl_vout_flip(srcs, 1024, PSXDisplay.RGB24, w, h); } void DoBufferSwap(void) { static int fbw, fbh, fb24bpp; - static void *vout_buf; if (PreviousPSXDisplay.DisplayMode.x == 0 || PreviousPSXDisplay.DisplayMode.y == 0) return; @@ -80,17 +50,12 @@ void DoBufferSwap(void) fbw = PreviousPSXDisplay.DisplayMode.x; fbh = PreviousPSXDisplay.DisplayMode.y; fb24bpp = PSXDisplay.RGB24; - vout_buf = rcbs->pl_vout_set_mode(fbw, fbh, fb24bpp ? 24 : 16); + rcbs->pl_vout_set_mode(fbw, fbh, fb24bpp ? 24 : 16); } pcnt_start(PCNT_BLIT); - if (rcbs->pl_vout_raw_flip != NULL) - rcbs->pl_vout_raw_flip(PSXDisplay.DisplayPosition.x, PSXDisplay.DisplayPosition.y); - else - blit(vout_buf); + blit(); pcnt_end(PCNT_BLIT); - - vout_buf = rcbs->pl_vout_flip(); } void DoClearScreenBuffer(void) diff --git a/plugins/dfxvideo/gpu.c b/plugins/dfxvideo/gpu.c index 9fa08fe3..3d20dfa4 100644 --- a/plugins/dfxvideo/gpu.c +++ b/plugins/dfxvideo/gpu.c @@ -1143,6 +1143,8 @@ void GPUrearmedCallbacks(const struct rearmed_cbs *cbs) dwFrameRateTicks = cbs->gpu_peops.dwFrameRateTicks; if (cbs->pl_vout_set_raw_vram) cbs->pl_vout_set_raw_vram(psxVub); + if (cbs->pl_set_gpu_caps) + cbs->pl_set_gpu_caps(0); skip_advice = &cbs->fskip_advice; fps_skip = 100.0f; diff --git a/plugins/dfxvideo/gpulib_if.c b/plugins/dfxvideo/gpulib_if.c index 12aa0a3f..d98520cb 100644 --- a/plugins/dfxvideo/gpulib_if.c +++ b/plugins/dfxvideo/gpulib_if.c @@ -265,9 +265,9 @@ long lLowerpart; ///////////////////////////////////////////////////////////////////////////// -int renderer_init(void) +static void set_vram(void *vram) { - psxVub=(void *)gpu.vram; + psxVub=vram; psxVsb=(signed char *)psxVub; // different ways of accessing PSX VRAM psxVsw=(signed short *)psxVub; @@ -276,6 +276,11 @@ int renderer_init(void) psxVul=(uint32_t *)psxVub; psxVuw_eom=psxVuw+1024*512; // pre-calc of end of vram +} + +int renderer_init(void) +{ + set_vram(gpu.vram); PSXDisplay.RGB24 = FALSE; // init some stuff PSXDisplay.Interlaced = FALSE; @@ -294,6 +299,14 @@ int renderer_init(void) return 0; } +void renderer_finish(void) +{ +} + +void renderer_notify_res_change(void) +{ +} + extern const unsigned char cmd_lengths[256]; int do_cmd_list(unsigned int *list, int list_len, int *last_cmd) @@ -408,4 +421,7 @@ void renderer_set_config(const struct rearmed_cbs *cbs) { iUseDither = cbs->gpu_peops.iUseDither; dwActFixes = cbs->gpu_peops.dwActFixes; + if (cbs->pl_set_gpu_caps) + cbs->pl_set_gpu_caps(0); + set_vram(gpu.vram); } diff --git a/plugins/gpu-gles/gpulib_if.c b/plugins/gpu-gles/gpulib_if.c index c25ad8bb..068dc411 100644 --- a/plugins/gpu-gles/gpulib_if.c +++ b/plugins/gpu-gles/gpulib_if.c @@ -479,10 +479,15 @@ switch((gdata>>24)&0xff) static int is_opened; -int renderer_init(void) +static void set_vram(void *vram) { - psxVub=(void *)gpu.vram; + psxVub=vram; psxVuw=(unsigned short *)psxVub; +} + +int renderer_init(void) +{ + set_vram(gpu.vram); PSXDisplay.RGB24 = FALSE; // init some stuff PSXDisplay.Interlaced = FALSE; @@ -500,6 +505,14 @@ int renderer_init(void) return 0; } +void renderer_finish(void) +{ +} + +void renderer_notify_res_change(void) +{ +} + extern const unsigned char cmd_lengths[256]; // XXX: mostly dupe code from soft peops @@ -702,6 +715,10 @@ void renderer_set_config(const struct rearmed_cbs *cbs_) bUseFastMdec = cbs->gpu_peopsgl.bUseFastMdec; iTexGarbageCollection = cbs->gpu_peopsgl.iTexGarbageCollection; iVRamSize = cbs->gpu_peopsgl.iVRamSize; + if (cbs->pl_set_gpu_caps) + cbs->pl_set_gpu_caps(GPU_CAP_OWNS_DISPLAY); + + set_vram(gpu.vram); } void SetAspectRatio(void) diff --git a/plugins/gpu_neon/Makefile b/plugins/gpu_neon/Makefile index 8a7342bd..08bf0ee6 100644 --- a/plugins/gpu_neon/Makefile +++ b/plugins/gpu_neon/Makefile @@ -1,4 +1,4 @@ -CFLAGS += -ggdb -Wall -O2 +CFLAGS += -ggdb -Wall -O2 -DNDEBUG include ../../config.mak diff --git a/plugins/gpu_neon/psx_gpu/common.h b/plugins/gpu_neon/psx_gpu/common.h index f299f794..d5cf3e91 100644 --- a/plugins/gpu_neon/psx_gpu/common.h +++ b/plugins/gpu_neon/psx_gpu/common.h @@ -18,5 +18,7 @@ typedef unsigned long long int u64; #include "vector_ops.h" #include "psx_gpu.h" +#define unlikely(x) __builtin_expect((x), 0) + #endif diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index 68996c16..2cba8781 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -47,7 +47,8 @@ u32 zero_block_spans = 0; u32 texture_cache_loads = 0; u32 false_modulated_blocks = 0; -u32 reciprocal_table[512]; +/* double size for enhancement */ +u32 reciprocal_table[512 * 2]; typedef s32 fixed_type; @@ -453,7 +454,7 @@ void setup_blocks_shaded_untextured_undithered_unswizzled_indirect( void flush_render_block_buffer(psx_gpu_struct *psx_gpu) { - if((psx_gpu->interlace_mode & RENDER_INTERLACE_ENABLED) && + if((psx_gpu->render_mode & RENDER_INTERLACE_ENABLED) && (psx_gpu->primitive_type == PRIMITIVE_TYPE_SPRITE)) { u32 num_blocks_dest = 0; @@ -463,7 +464,7 @@ void flush_render_block_buffer(psx_gpu_struct *psx_gpu) u16 *vram_ptr = psx_gpu->vram_ptr; u32 i; - if(psx_gpu->interlace_mode & RENDER_INTERLACE_ODD) + if(psx_gpu->render_mode & RENDER_INTERLACE_ODD) { for(i = 0; i < psx_gpu->num_blocks; i++) { @@ -566,7 +567,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, vec_4x32u uvrg_base; vec_4x32u b_base; - vec_4x32u const_0x8000; + vec_4x32u uvrgb_phase; vec_4x16s d0_a_d3_c, d0_b, d0_c; vec_4x16s d1_a, d1_b, d1_c_d2_a; @@ -595,12 +596,12 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, setup_gradient_calculation_input(1, b); setup_gradient_calculation_input(2, c); - dup_4x32b(const_0x8000, 0x8000); + dup_4x32b(uvrgb_phase, psx_gpu->uvrgb_phase); shl_long_4x16b(uvrg_base, x0_a_y0_c, 16); shl_long_4x16b(b_base, x0_b, 16); - add_4x32b(uvrg_base, uvrg_base, const_0x8000); - add_4x32b(b_base, b_base, const_0x8000); + add_4x32b(uvrg_base, uvrg_base, uvrgb_phase); + add_4x32b(b_base, b_base, uvrgb_phase); // Can probably pair these, but it'll require careful register allocation sub_4x16b(d0_a_d3_c, x1_a_y1_c, x0_a_y0_c); @@ -766,6 +767,26 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, printf("mismatch on %s %s: %x vs %x\n", #_a, #_b, _a, _b) \ +#ifndef NDEBUG +#define setup_spans_debug_check(span_edge_data_element) \ +{ \ + u32 _num_spans = &span_edge_data_element - psx_gpu->span_edge_data; \ + if (_num_spans > MAX_SPANS) \ + *(int *)0 = 1; \ + if (_num_spans < psx_gpu->num_spans) \ + { \ + if(span_edge_data_element.num_blocks > MAX_BLOCKS_PER_ROW) \ + *(int *)0 = 1; \ + if(span_edge_data_element.y > 2048) \ + *(int *)0 = 1; \ + } \ +} \ + +#else +#define setup_spans_debug_check(span_edge_data_element) \ + +#endif + #define setup_spans_prologue_alternate_yes() \ vec_2x64s alternate_x; \ vec_2x64s alternate_dx_dy; \ @@ -854,7 +875,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, \ dup_2x32b(edge_shifts, edge_shift); \ sub_2x32b(heights_b, heights, c_0x01); \ - shr_2x32b(height_reciprocals, edge_shifts, 12); \ + shr_2x32b(height_reciprocals, edge_shifts, 10); \ \ mla_2x32b(heights_b, x_starts, heights); \ bic_immediate_4x16b(vector_cast(vec_4x16u, edge_shifts), 0xE0); \ @@ -883,8 +904,8 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, sub_2x32b(widths, x_ends, x_starts); \ width_alt = x_c - start_c; \ \ - shr_2x32b(height_reciprocals, edge_shifts, 12); \ - height_reciprocal_alt = edge_shift_alt >> 12; \ + shr_2x32b(height_reciprocals, edge_shifts, 10); \ + height_reciprocal_alt = edge_shift_alt >> 10; \ \ bic_immediate_4x16b(vector_cast(vec_4x16u, edge_shifts), 0xE0); \ edge_shift_alt &= 0x1F; \ @@ -1069,6 +1090,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, span_edge_data[i].num_blocks = left_right_x_16.high.e[i]; \ span_edge_data[i].right_mask = span_shift.e[i]; \ span_edge_data[i].y = y_x4.e[i]; \ + setup_spans_debug_check(span_edge_data[i]); \ } \ \ span_edge_data += 4; \ @@ -1406,12 +1428,16 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, y_x4.e[3] = y_a + 3; setup_spans_adjust_edges_alternate_no(index_left, index_right); + // FIXME: overflow corner case + if(psx_gpu->num_spans + height_minor_b == MAX_SPANS) + height_minor_b &= ~3; + psx_gpu->num_spans += height_minor_b; - do + while(height_minor_b > 0) { setup_spans_set_x4(none, down, no); height_minor_b -= 4; - } while(height_minor_b > 0); + } } left_split_triangles++; @@ -1872,7 +1898,7 @@ void setup_blocks_##shading##_##texturing##_##dithering##_##sw##_##target( \ if(span_num_blocks) \ { \ y = span_edge_data->y; \ - fb_ptr = psx_gpu->vram_ptr + span_edge_data->left_x + (y * 1024); \ + fb_ptr = psx_gpu->vram_out_ptr + span_edge_data->left_x + (y * 1024); \ \ setup_blocks_span_initialize_##shading##_##texturing(); \ setup_blocks_span_initialize_##dithering(texturing); \ @@ -2905,8 +2931,8 @@ char *render_block_flag_strings[] = (triangle_y_direction_##direction_c << 4) | \ (triangle_winding_##winding << 6)) \ -void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, - u32 flags) +static int prepare_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, + vertex_struct *vertexes_out[3]) { s32 y_top, y_bottom; s32 triangle_area; @@ -2927,7 +2953,7 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, #ifdef PROFILE trivial_rejects++; #endif - return; + return 0; } if(b->y < a->y) @@ -2949,7 +2975,7 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, #ifdef PROFILE trivial_rejects++; #endif - return; + return 0; } if(triangle_area < 0) @@ -2975,7 +3001,7 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, #ifdef PROFILE trivial_rejects++; #endif - return; + return 0; } if(invalidate_texture_cache_region_viewport(psx_gpu, a->x, y_top, c->x, @@ -2984,13 +3010,28 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, #ifdef PROFILE trivial_rejects++; #endif - return; + return 0; } - psx_gpu->num_spans = 0; psx_gpu->triangle_area = triangle_area; psx_gpu->triangle_winding = triangle_winding; + vertexes_out[0] = a; + vertexes_out[1] = b; + vertexes_out[2] = c; + + return 1; +} + +static void render_triangle_p(psx_gpu_struct *psx_gpu, + vertex_struct *vertex_ptrs[3], u32 flags) +{ + psx_gpu->num_spans = 0; + + vertex_struct *a = vertex_ptrs[0]; + vertex_struct *b = vertex_ptrs[1]; + vertex_struct *c = vertex_ptrs[2]; + s32 y_delta_a = b->y - a->y; s32 y_delta_b = c->y - b->y; s32 y_delta_c = c->y - a->y; @@ -3002,7 +3043,7 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, compute_all_gradients(psx_gpu, a, b, c); switch(y_direction_a | (y_direction_b << 2) | (y_direction_c << 4) | - (triangle_winding << 6)) + (psx_gpu->triangle_winding << 6)) { triangle_case(up, up, up, negative): triangle_case(up, up, flat, negative): @@ -3081,11 +3122,11 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, spans += psx_gpu->num_spans; #endif - if(psx_gpu->interlace_mode & RENDER_INTERLACE_ENABLED) + if(unlikely(psx_gpu->render_mode & RENDER_INTERLACE_ENABLED)) { u32 i; - if(psx_gpu->interlace_mode & RENDER_INTERLACE_ODD) + if(psx_gpu->render_mode & RENDER_INTERLACE_ODD) { for(i = 0; i < psx_gpu->num_spans; i++) { @@ -3126,6 +3167,14 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, (psx_gpu); } +void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, + u32 flags) +{ + vertex_struct *vertex_ptrs[3]; + if (prepare_triangle(psx_gpu, vertexes, vertex_ptrs)) + render_triangle_p(psx_gpu, vertex_ptrs, flags); +} + void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu); @@ -3161,14 +3210,17 @@ void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu) #endif -#define setup_sprite_tiled_initialize_4bpp() \ +#define setup_sprite_tiled_initialize_4bpp_clut() \ u16 *clut_ptr = psx_gpu->clut_ptr; \ vec_8x16u clut_a, clut_b; \ vec_16x8u clut_low, clut_high; \ \ load_8x16b(clut_a, clut_ptr); \ load_8x16b(clut_b, clut_ptr + 8); \ - unzip_16x8b(clut_low, clut_high, clut_a, clut_b); \ + unzip_16x8b(clut_low, clut_high, clut_a, clut_b) \ + +#define setup_sprite_tiled_initialize_4bpp() \ + setup_sprite_tiled_initialize_4bpp_clut(); \ \ if(psx_gpu->current_texture_mask & psx_gpu->dirty_textures_4bpp_mask) \ update_texture_4bpp_cache(psx_gpu) \ @@ -3185,10 +3237,6 @@ void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu) load_64b(texels, texture_block_ptr) \ -#define setup_sprite_tile_setup_block_yes(side, offset, texture_mode) \ - -#define setup_sprite_tile_setup_block_no(side, offset, texture_mode) \ - #define setup_sprite_tile_add_blocks(tile_num_blocks) \ num_blocks += tile_num_blocks; \ sprite_blocks += tile_num_blocks; \ @@ -3334,34 +3382,36 @@ void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu) #define setup_sprite_tile_column_edge_post_adjust_full(edge) \ -#define setup_sprite_tile_column_height_single(edge_mode, edge, texture_mode) \ +#define setup_sprite_tile_column_height_single(edge_mode, edge, texture_mode, \ + x4mode) \ do \ { \ sub_tile_height = column_data; \ - setup_sprite_tile_column_edge_pre_adjust_##edge_mode(edge); \ - setup_sprite_tile_##edge_mode##_##texture_mode(edge); \ - setup_sprite_tile_column_edge_post_adjust_##edge_mode(edge); \ + setup_sprite_tile_column_edge_pre_adjust_##edge_mode##x4mode(edge); \ + setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \ + setup_sprite_tile_column_edge_post_adjust_##edge_mode##x4mode(edge); \ } while(0) \ -#define setup_sprite_tile_column_height_multi(edge_mode, edge, texture_mode) \ +#define setup_sprite_tile_column_height_multi(edge_mode, edge, texture_mode, \ + x4mode) \ do \ { \ u32 tiles_remaining = column_data >> 16; \ sub_tile_height = column_data & 0xFF; \ - setup_sprite_tile_column_edge_pre_adjust_##edge_mode(edge); \ - setup_sprite_tile_##edge_mode##_##texture_mode(edge); \ + setup_sprite_tile_column_edge_pre_adjust_##edge_mode##x4mode(edge); \ + setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \ tiles_remaining -= 1; \ \ while(tiles_remaining) \ { \ sub_tile_height = 16; \ - setup_sprite_tile_##edge_mode##_##texture_mode(edge); \ + setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \ tiles_remaining--; \ } \ \ sub_tile_height = (column_data >> 8) & 0xFF; \ - setup_sprite_tile_##edge_mode##_##texture_mode(edge); \ - setup_sprite_tile_column_edge_post_adjust_##edge_mode(edge); \ + setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \ + setup_sprite_tile_column_edge_post_adjust_##edge_mode##x4mode(edge); \ } while(0) \ @@ -3374,15 +3424,18 @@ do \ column_data |= (tile_height - 1) << 16 \ +#define RIGHT_MASK_BIT_SHIFT 8 +#define RIGHT_MASK_BIT_SHIFT_4x 16 + #define setup_sprite_tile_column_width_single(texture_mode, multi_height, \ - edge_mode, edge) \ + edge_mode, edge, x4mode) \ { \ setup_sprite_column_data_##multi_height(); \ left_mask_bits = left_block_mask | right_block_mask; \ - right_mask_bits = left_mask_bits >> 8; \ + right_mask_bits = left_mask_bits >> RIGHT_MASK_BIT_SHIFT##x4mode; \ \ setup_sprite_tile_column_height_##multi_height(edge_mode, edge, \ - texture_mode); \ + texture_mode, x4mode); \ } \ #define setup_sprite_tiled_advance_column() \ @@ -3390,18 +3443,22 @@ do \ if((texture_offset_base & 0xF00) == 0) \ texture_offset_base -= (0x100 + 0xF00) \ +#define FB_PTR_MULTIPLIER 1 +#define FB_PTR_MULTIPLIER_4x 2 + #define setup_sprite_tile_column_width_multi(texture_mode, multi_height, \ - left_mode, right_mode) \ + left_mode, right_mode, x4mode) \ { \ setup_sprite_column_data_##multi_height(); \ - s32 fb_ptr_advance_column = 16 - (1024 * height); \ + s32 fb_ptr_advance_column = (16 - (1024 * height)) \ + * FB_PTR_MULTIPLIER##x4mode; \ \ tile_width -= 2; \ left_mask_bits = left_block_mask; \ - right_mask_bits = left_mask_bits >> 8; \ + right_mask_bits = left_mask_bits >> RIGHT_MASK_BIT_SHIFT##x4mode; \ \ setup_sprite_tile_column_height_##multi_height(left_mode, right, \ - texture_mode); \ + texture_mode, x4mode); \ fb_ptr += fb_ptr_advance_column; \ \ left_mask_bits = 0x00; \ @@ -3410,22 +3467,297 @@ do \ while(tile_width) \ { \ setup_sprite_tiled_advance_column(); \ - setup_sprite_tile_column_height_##multi_height(full, none, texture_mode); \ + setup_sprite_tile_column_height_##multi_height(full, none, \ + texture_mode, x4mode); \ fb_ptr += fb_ptr_advance_column; \ tile_width--; \ } \ \ left_mask_bits = right_block_mask; \ - right_mask_bits = left_mask_bits >> 8; \ + right_mask_bits = left_mask_bits >> RIGHT_MASK_BIT_SHIFT##x4mode; \ \ setup_sprite_tiled_advance_column(); \ setup_sprite_tile_column_height_##multi_height(right_mode, left, \ - texture_mode); \ + texture_mode, x4mode); \ +} \ + + +/* 4x stuff */ +#define setup_sprite_tiled_initialize_4bpp_4x() \ + setup_sprite_tiled_initialize_4bpp_clut() \ + +#define setup_sprite_tiled_initialize_8bpp_4x() \ + + +#define setup_sprite_tile_full_4bpp_4x(edge) \ +{ \ + vec_8x8u texels_low, texels_high; \ + vec_8x16u pixels, pixels_wide; \ + setup_sprite_tile_add_blocks(sub_tile_height * 2 * 4); \ + u32 left_mask_bits_a = left_mask_bits & 0xFF; \ + u32 left_mask_bits_b = left_mask_bits >> 8; \ + u32 right_mask_bits_a = right_mask_bits & 0xFF; \ + u32 right_mask_bits_b = right_mask_bits >> 8; \ + \ + while(sub_tile_height) \ + { \ + setup_sprite_tile_fetch_texel_block_8bpp(0); \ + tbl_16(texels_low, texels, clut_low); \ + tbl_16(texels_high, texels, clut_high); \ + zip_8x16b(pixels, texels_low, texels_high); \ + \ + zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \ + block->texels = pixels_wide; \ + block->draw_mask_bits = left_mask_bits_a; \ + block->fb_ptr = fb_ptr; \ + block++; \ + \ + block->texels = pixels_wide; \ + block->draw_mask_bits = left_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024; \ + block++; \ + \ + zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \ + block->texels = pixels_wide; \ + block->draw_mask_bits = left_mask_bits_b; \ + block->fb_ptr = fb_ptr + 8; \ + block++; \ + \ + block->texels = pixels_wide; \ + block->draw_mask_bits = left_mask_bits_b; \ + block->fb_ptr = fb_ptr + 1024 + 8; \ + block++; \ + \ + setup_sprite_tile_fetch_texel_block_8bpp(8); \ + tbl_16(texels_low, texels, clut_low); \ + tbl_16(texels_high, texels, clut_high); \ + zip_8x16b(pixels, texels_low, texels_high); \ + \ + zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \ + block->texels = pixels_wide; \ + block->draw_mask_bits = right_mask_bits_a; \ + block->fb_ptr = fb_ptr + 16; \ + block++; \ + \ + block->texels = pixels_wide; \ + block->draw_mask_bits = right_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024 + 16; \ + block++; \ + \ + zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \ + block->texels = pixels_wide; \ + block->draw_mask_bits = right_mask_bits_b; \ + block->fb_ptr = fb_ptr + 24; \ + block++; \ + \ + block->texels = pixels_wide; \ + block->draw_mask_bits = right_mask_bits_b; \ + block->fb_ptr = fb_ptr + 1024 + 24; \ + block++; \ + \ + fb_ptr += 2048; \ + texture_offset += 0x10; \ + sub_tile_height--; \ + } \ + texture_offset += 0xF00; \ + psx_gpu->num_blocks = num_blocks; \ } \ +#define setup_sprite_tile_half_4bpp_4x(edge) \ +{ \ + vec_8x8u texels_low, texels_high; \ + vec_8x16u pixels, pixels_wide; \ + setup_sprite_tile_add_blocks(sub_tile_height * 4); \ + u32 edge##_mask_bits_a = edge##_mask_bits & 0xFF; \ + u32 edge##_mask_bits_b = edge##_mask_bits >> 8; \ + \ + while(sub_tile_height) \ + { \ + setup_sprite_tile_fetch_texel_block_8bpp(0); \ + tbl_16(texels_low, texels, clut_low); \ + tbl_16(texels_high, texels, clut_high); \ + zip_8x16b(pixels, texels_low, texels_high); \ + \ + zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \ + block->texels = pixels_wide; \ + block->draw_mask_bits = edge##_mask_bits_a; \ + block->fb_ptr = fb_ptr; \ + block++; \ + \ + block->texels = pixels_wide; \ + block->draw_mask_bits = edge##_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024; \ + block++; \ + \ + zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \ + block->texels = pixels_wide; \ + block->draw_mask_bits = edge##_mask_bits_b; \ + block->fb_ptr = fb_ptr + 8; \ + block++; \ + \ + block->texels = pixels_wide; \ + block->draw_mask_bits = edge##_mask_bits_b; \ + block->fb_ptr = fb_ptr + 1024 + 8; \ + block++; \ + \ + fb_ptr += 2048; \ + texture_offset += 0x10; \ + sub_tile_height--; \ + } \ + texture_offset += 0xF00; \ + psx_gpu->num_blocks = num_blocks; \ +} \ -#define setup_sprite_tiled_builder(texture_mode) \ -void setup_sprite_##texture_mode(psx_gpu_struct *psx_gpu, s32 x, s32 y, \ + +#define setup_sprite_tile_full_8bpp_4x(edge) \ +{ \ + setup_sprite_tile_add_blocks(sub_tile_height * 2 * 4); \ + vec_16x8u texels_wide; \ + u32 left_mask_bits_a = left_mask_bits & 0xFF; \ + u32 left_mask_bits_b = left_mask_bits >> 8; \ + u32 right_mask_bits_a = right_mask_bits & 0xFF; \ + u32 right_mask_bits_b = right_mask_bits >> 8; \ + \ + while(sub_tile_height) \ + { \ + setup_sprite_tile_fetch_texel_block_8bpp(0); \ + zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \ + block->r = texels_wide.low; \ + block->draw_mask_bits = left_mask_bits_a; \ + block->fb_ptr = fb_ptr; \ + block++; \ + \ + block->r = texels_wide.low; \ + block->draw_mask_bits = left_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024; \ + block++; \ + \ + block->r = texels_wide.high; \ + block->draw_mask_bits = left_mask_bits_b; \ + block->fb_ptr = fb_ptr + 8; \ + block++; \ + \ + block->r = texels_wide.high; \ + block->draw_mask_bits = left_mask_bits_b; \ + block->fb_ptr = fb_ptr + 1024 + 8; \ + block++; \ + \ + setup_sprite_tile_fetch_texel_block_8bpp(8); \ + zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \ + block->r = texels_wide.low; \ + block->draw_mask_bits = right_mask_bits_a; \ + block->fb_ptr = fb_ptr + 16; \ + block++; \ + \ + block->r = texels_wide.low; \ + block->draw_mask_bits = right_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024 + 16; \ + block++; \ + \ + block->r = texels_wide.high; \ + block->draw_mask_bits = right_mask_bits_b; \ + block->fb_ptr = fb_ptr + 24; \ + block++; \ + \ + block->r = texels_wide.high; \ + block->draw_mask_bits = right_mask_bits_b; \ + block->fb_ptr = fb_ptr + 24 + 1024; \ + block++; \ + \ + fb_ptr += 2048; \ + texture_offset += 0x10; \ + sub_tile_height--; \ + } \ + texture_offset += 0xF00; \ + psx_gpu->num_blocks = num_blocks; \ +} \ + +#define setup_sprite_tile_half_8bpp_4x(edge) \ +{ \ + setup_sprite_tile_add_blocks(sub_tile_height * 4); \ + vec_16x8u texels_wide; \ + u32 edge##_mask_bits_a = edge##_mask_bits & 0xFF; \ + u32 edge##_mask_bits_b = edge##_mask_bits >> 8; \ + \ + while(sub_tile_height) \ + { \ + setup_sprite_tile_fetch_texel_block_8bpp(0); \ + zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \ + block->r = texels_wide.low; \ + block->draw_mask_bits = edge##_mask_bits_a; \ + block->fb_ptr = fb_ptr; \ + block++; \ + \ + block->r = texels_wide.low; \ + block->draw_mask_bits = edge##_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024; \ + block++; \ + \ + block->r = texels_wide.high; \ + block->draw_mask_bits = edge##_mask_bits_b; \ + block->fb_ptr = fb_ptr + 8; \ + block++; \ + \ + block->r = texels_wide.high; \ + block->draw_mask_bits = edge##_mask_bits_b; \ + block->fb_ptr = fb_ptr + 8 + 1024; \ + block++; \ + \ + fb_ptr += 2048; \ + texture_offset += 0x10; \ + sub_tile_height--; \ + } \ + texture_offset += 0xF00; \ + psx_gpu->num_blocks = num_blocks; \ +} \ + + +#define setup_sprite_tile_column_edge_pre_adjust_half_right_4x() \ + texture_offset = texture_offset_base + 8; \ + fb_ptr += 16 \ + +#define setup_sprite_tile_column_edge_pre_adjust_half_left_4x() \ + texture_offset = texture_offset_base \ + +#define setup_sprite_tile_column_edge_pre_adjust_half_4x(edge) \ + setup_sprite_tile_column_edge_pre_adjust_half_##edge##_4x() \ + +#define setup_sprite_tile_column_edge_pre_adjust_full_4x(edge) \ + texture_offset = texture_offset_base \ + +#define setup_sprite_tile_column_edge_post_adjust_half_right_4x() \ + fb_ptr -= 16 \ + +#define setup_sprite_tile_column_edge_post_adjust_half_left_4x() \ + +#define setup_sprite_tile_column_edge_post_adjust_half_4x(edge) \ + setup_sprite_tile_column_edge_post_adjust_half_##edge##_4x() \ + +#define setup_sprite_tile_column_edge_post_adjust_full_4x(edge) \ + + +#define setup_sprite_offset_u_adjust() \ + +#define setup_sprite_comapre_left_block_mask() \ + ((left_block_mask & 0xFF) == 0xFF) \ + +#define setup_sprite_comapre_right_block_mask() \ + (((right_block_mask >> 8) & 0xFF) == 0xFF) \ + + +#define setup_sprite_offset_u_adjust_4x() \ + offset_u *= 2; \ + offset_u_right = offset_u_right * 2 + 1 \ + +#define setup_sprite_comapre_left_block_mask_4x() \ + ((left_block_mask & 0xFFFF) == 0xFFFF) \ + +#define setup_sprite_comapre_right_block_mask_4x() \ + (((right_block_mask >> 16) & 0xFFFF) == 0xFFFF) \ + + +#define setup_sprite_tiled_builder(texture_mode, x4mode) \ +void setup_sprite_##texture_mode##x4mode(psx_gpu_struct *psx_gpu, s32 x, s32 y,\ s32 u, s32 v, s32 width, s32 height, u32 color) \ { \ s32 offset_u = u & 0xF; \ @@ -3437,8 +3769,10 @@ void setup_sprite_##texture_mode(psx_gpu_struct *psx_gpu, s32 x, s32 y, \ s32 tile_width = width_rounded / 16; \ u32 offset_u_right = width_rounded & 0xF; \ \ - u32 left_block_mask = ~(0xFFFF << offset_u); \ - u32 right_block_mask = 0xFFFE << offset_u_right; \ + setup_sprite_offset_u_adjust##x4mode(); \ + \ + u32 left_block_mask = ~(0xFFFFFFFF << offset_u); \ + u32 right_block_mask = 0xFFFFFFFE << offset_u_right; \ \ u32 left_mask_bits; \ u32 right_mask_bits; \ @@ -3455,19 +3789,19 @@ void setup_sprite_##texture_mode(psx_gpu_struct *psx_gpu, s32 x, s32 y, \ u32 texture_offset_base = texture_offset; \ u32 control_mask; \ \ - u16 *fb_ptr = psx_gpu->vram_ptr + (y * 1024) + (x - offset_u); \ + u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + (x - offset_u); \ u32 num_blocks = psx_gpu->num_blocks; \ block_struct *block = psx_gpu->blocks + num_blocks; \ \ u16 *texture_block_ptr; \ vec_8x8u texels; \ \ - setup_sprite_tiled_initialize_##texture_mode(); \ + setup_sprite_tiled_initialize_##texture_mode##x4mode(); \ \ control_mask = tile_width == 1; \ control_mask |= (tile_height == 1) << 1; \ - control_mask |= ((left_block_mask & 0xFF) == 0xFF) << 2; \ - control_mask |= (((right_block_mask >> 8) & 0xFF) == 0xFF) << 3; \ + control_mask |= setup_sprite_comapre_left_block_mask##x4mode() << 2; \ + control_mask |= setup_sprite_comapre_right_block_mask##x4mode() << 3; \ \ sprites_##texture_mode++; \ \ @@ -3475,64 +3809,77 @@ void setup_sprite_##texture_mode(psx_gpu_struct *psx_gpu, s32 x, s32 y, \ { \ default: \ case 0x0: \ - setup_sprite_tile_column_width_multi(texture_mode, multi, full, full); \ + setup_sprite_tile_column_width_multi(texture_mode, multi, full, full, \ + x4mode); \ break; \ \ case 0x1: \ - setup_sprite_tile_column_width_single(texture_mode, multi, full, none); \ + setup_sprite_tile_column_width_single(texture_mode, multi, full, none, \ + x4mode); \ break; \ \ case 0x2: \ - setup_sprite_tile_column_width_multi(texture_mode, single, full, full); \ + setup_sprite_tile_column_width_multi(texture_mode, single, full, full, \ + x4mode); \ break; \ \ case 0x3: \ - setup_sprite_tile_column_width_single(texture_mode, single, full, none); \ + setup_sprite_tile_column_width_single(texture_mode, single, full, none, \ + x4mode); \ break; \ \ case 0x4: \ - setup_sprite_tile_column_width_multi(texture_mode, multi, half, full); \ + setup_sprite_tile_column_width_multi(texture_mode, multi, half, full, \ + x4mode); \ break; \ \ case 0x5: \ - setup_sprite_tile_column_width_single(texture_mode, multi, half, right); \ + setup_sprite_tile_column_width_single(texture_mode, multi, half, right, \ + x4mode); \ break; \ \ case 0x6: \ - setup_sprite_tile_column_width_multi(texture_mode, single, half, full); \ + setup_sprite_tile_column_width_multi(texture_mode, single, half, full, \ + x4mode); \ break; \ \ case 0x7: \ - setup_sprite_tile_column_width_single(texture_mode, single, half, right);\ + setup_sprite_tile_column_width_single(texture_mode, single, half, right, \ + x4mode); \ break; \ \ case 0x8: \ - setup_sprite_tile_column_width_multi(texture_mode, multi, full, half); \ + setup_sprite_tile_column_width_multi(texture_mode, multi, full, half, \ + x4mode); \ break; \ \ case 0x9: \ - setup_sprite_tile_column_width_single(texture_mode, multi, half, left); \ + setup_sprite_tile_column_width_single(texture_mode, multi, half, left, \ + x4mode); \ break; \ \ case 0xA: \ - setup_sprite_tile_column_width_multi(texture_mode, single, full, half); \ + setup_sprite_tile_column_width_multi(texture_mode, single, full, half, \ + x4mode); \ break; \ \ case 0xB: \ - setup_sprite_tile_column_width_single(texture_mode, single, half, left); \ + setup_sprite_tile_column_width_single(texture_mode, single, half, left, \ + x4mode); \ break; \ \ case 0xC: \ - setup_sprite_tile_column_width_multi(texture_mode, multi, half, half); \ + setup_sprite_tile_column_width_multi(texture_mode, multi, half, half, \ + x4mode); \ break; \ \ case 0xE: \ - setup_sprite_tile_column_width_multi(texture_mode, single, half, half); \ + setup_sprite_tile_column_width_multi(texture_mode, single, half, half, \ + x4mode); \ break; \ } \ } \ - void setup_sprite_4bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color); void setup_sprite_8bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, @@ -3540,9 +3887,24 @@ void setup_sprite_8bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color); +void setup_sprite_4bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, + s32 width, s32 height, u32 color); +void setup_sprite_8bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, + s32 width, s32 height, u32 color); +void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, + s32 width, s32 height, u32 color); + +void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, + s32 v, s32 width, s32 height, u32 color); +void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y, + s32 u, s32 v, s32 width, s32 height, u32 color); + #ifndef NEON_BUILD -setup_sprite_tiled_builder(4bpp); -setup_sprite_tiled_builder(8bpp); +setup_sprite_tiled_builder(4bpp,); +setup_sprite_tiled_builder(8bpp,); + +setup_sprite_tiled_builder(4bpp,_4x); +setup_sprite_tiled_builder(8bpp,_4x); void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color) @@ -3550,7 +3912,7 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, u32 left_offset = u & 0x7; u32 width_rounded = width + left_offset + 7; - u16 *fb_ptr = psx_gpu->vram_ptr + (y * 1024) + (s32)(x - left_offset); + u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + (s32)(x - left_offset); u32 right_width = width_rounded & 0x7; u32 block_width = width_rounded / 8; u32 fb_ptr_pitch = (1024 + 8) - (block_width * 8); @@ -3665,14 +4027,19 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, } } -#endif - void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color) { + if((psx_gpu->render_state & (RENDER_STATE_MASK_EVALUATE | + RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND)) == 0) + { + setup_sprite_untextured_simple(psx_gpu, x, y, u, v, width, height, color); + return; + } + u32 right_width = ((width - 1) & 0x7) + 1; u32 right_mask_bits = (0xFF << right_width); - u16 *fb_ptr = psx_gpu->vram_ptr + (y * 1024) + x; + u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + x; u32 block_width = (width + 7) / 8; u32 fb_ptr_pitch = 1024 - ((block_width - 1) * 8); u32 blocks_remaining; @@ -3735,6 +4102,66 @@ void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, } } +#endif + +void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y, + s32 u, s32 v, s32 width, s32 height, u32 color) +{ + u32 r = color & 0xFF; + u32 g = (color >> 8) & 0xFF; + u32 b = (color >> 16) & 0xFF; + u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10) | + psx_gpu->mask_msb; + u32 color_32bpp = color_16bpp | (color_16bpp << 16); + + u16 *vram_ptr16 = psx_gpu->vram_out_ptr + x + (y * 1024); + u32 *vram_ptr; + + u32 num_width; + + if(psx_gpu->num_blocks > MAX_BLOCKS) + { + flush_render_block_buffer(psx_gpu); + } + + while(height) + { + num_width = width; + + vram_ptr = (void *)vram_ptr16; + if((long)vram_ptr16 & 2) + { + *vram_ptr16 = color_32bpp; + vram_ptr = (void *)(vram_ptr16 + 1); + num_width--; + } + + while(num_width >= 4 * 2) + { + vram_ptr[0] = color_32bpp; + vram_ptr[1] = color_32bpp; + vram_ptr[2] = color_32bpp; + vram_ptr[3] = color_32bpp; + + vram_ptr += 4; + num_width -= 4 * 2; + } + + while(num_width >= 2) + { + *vram_ptr++ = color_32bpp; + num_width -= 2; + } + + if(num_width > 0) + { + *(u16 *)vram_ptr = color_32bpp; + } + + vram_ptr16 += 1024; + height--; + } +} #define setup_sprite_blocks_switch_textured(texture_mode) \ @@ -4155,9 +4582,6 @@ do \ { \ delta_y *= -1; \ \ - if(delta_y >= 512) \ - return; \ - \ if(delta_x > delta_y) \ { \ draw_line_span_horizontal(decrement, shading, blending, dithering, \ @@ -4171,9 +4595,6 @@ do \ } \ else \ { \ - if(delta_y >= 512) \ - return; \ - \ if(delta_x > delta_y) \ { \ draw_line_span_horizontal(increment, shading, blending, dithering, \ @@ -4188,7 +4609,7 @@ do \ void render_line(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 flags, - u32 color) + u32 color, int double_resolution) { s32 color_r, color_g, color_b; u32 triangle_winding = 0; @@ -4240,12 +4661,22 @@ void render_line(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, u32 flags, delta_x = x_b - x_a; delta_y = y_b - y_a; - if(delta_x >= 1024) + if(delta_x >= 1024 || delta_y >= 512 || delta_y <= -512) return; + if(double_resolution) + { + x_a *= 2; + x_b *= 2; + y_a *= 2; + y_b *= 2; + delta_x *= 2; + delta_y *= 2; + } + flags &= ~RENDER_FLAGS_TEXTURE_MAP; - vram_ptr = psx_gpu->vram_ptr + (y_a * 1024) + x_a; + vram_ptr = psx_gpu->vram_out_ptr + (y_a * 1024) + x_a; control_mask = 0x0; @@ -4435,7 +4866,6 @@ void render_block_fill(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y, if((width == 0) || (height == 0)) return; - flush_render_block_buffer(psx_gpu); invalidate_texture_cache_region(psx_gpu, x, y, x + width - 1, y + height - 1); u32 r = color & 0xFF; @@ -4445,17 +4875,17 @@ void render_block_fill(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y, psx_gpu->mask_msb; u32 color_32bpp = color_16bpp | (color_16bpp << 16); - u32 *vram_ptr = (u32 *)(psx_gpu->vram_ptr + x + (y * 1024)); + u32 *vram_ptr = (u32 *)(psx_gpu->vram_out_ptr + x + (y * 1024)); u32 pitch = 512 - (width / 2); u32 num_width; - if(psx_gpu->interlace_mode & RENDER_INTERLACE_ENABLED) + if(psx_gpu->render_mode & RENDER_INTERLACE_ENABLED) { pitch += 512; height /= 2; - if(psx_gpu->interlace_mode & RENDER_INTERLACE_ODD) + if(psx_gpu->render_mode & RENDER_INTERLACE_ODD) vram_ptr += 512; } @@ -4482,6 +4912,50 @@ void render_block_fill(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y, } } +void render_block_fill_enh(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y, + u32 width, u32 height) +{ + if((width == 0) || (height == 0)) + return; + + if(width > 1024) + width = 1024; + + u32 r = color & 0xFF; + u32 g = (color >> 8) & 0xFF; + u32 b = (color >> 16) & 0xFF; + u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10) | + psx_gpu->mask_msb; + u32 color_32bpp = color_16bpp | (color_16bpp << 16); + + u32 *vram_ptr = (u32 *)(psx_gpu->vram_out_ptr + x + (y * 1024)); + + u32 pitch = 1024 / 2 - (width / 2); + u32 num_width; + + while(height) + { + num_width = width; + while(num_width) + { + vram_ptr[0] = color_32bpp; + vram_ptr[1] = color_32bpp; + vram_ptr[2] = color_32bpp; + vram_ptr[3] = color_32bpp; + vram_ptr[4] = color_32bpp; + vram_ptr[5] = color_32bpp; + vram_ptr[6] = color_32bpp; + vram_ptr[7] = color_32bpp; + + vram_ptr += 8; + num_width -= 16; + } + + vram_ptr += pitch; + height--; + } +} + void render_block_copy(psx_gpu_struct *psx_gpu, u16 *source, u32 x, u32 y, u32 width, u32 height, u32 pitch) { @@ -4522,16 +4996,17 @@ void initialize_reciprocal_table(void) u32 height_reciprocal; s32 shift; - for(height = 1; height < 512; height++) + for(height = 1; height < sizeof(reciprocal_table) + / sizeof(reciprocal_table[0]); height++) { shift = __builtin_clz(height); height_normalized = height << shift; - height_reciprocal = ((1ULL << 50) + (height_normalized - 1)) / + height_reciprocal = ((1ULL << 51) + (height_normalized - 1)) / height_normalized; - shift = 32 - (50 - shift); + shift = 32 - (51 - shift); - reciprocal_table[height] = (height_reciprocal << 12) | shift; + reciprocal_table[height] = (height_reciprocal << 10) | shift; } } @@ -4559,8 +5034,10 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram) psx_gpu->render_state = 0; psx_gpu->render_state_base = 0; psx_gpu->num_blocks = 0; + psx_gpu->uvrgb_phase = 0x8000; psx_gpu->vram_ptr = vram; + psx_gpu->vram_out_ptr = vram; psx_gpu->texture_page_base = psx_gpu->vram_ptr; psx_gpu->texture_page_ptr = psx_gpu->vram_ptr; @@ -4573,7 +5050,7 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram) psx_gpu->texture_mask_width = 0xFF; psx_gpu->texture_mask_height = 0xFF; - psx_gpu->interlace_mode = 0; + psx_gpu->render_mode = 0; memset(psx_gpu->vram_ptr, 0, sizeof(u16) * 1024 * 512); @@ -4596,6 +5073,8 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram) psx_gpu->dither_table[3] = dither_table_row(3, -1, 2, -2); psx_gpu->primitive_type = PRIMITIVE_TYPE_UNKNOWN; + + psx_gpu->enhancement_x_threshold = 256; } u64 get_us(void) @@ -4660,3 +5139,4 @@ void triangle_benchmark(psx_gpu_struct *psx_gpu) #endif +#include "psx_gpu_4x.c" diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h index 53a87177..846658cc 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h @@ -56,8 +56,8 @@ typedef enum typedef enum { RENDER_INTERLACE_ENABLED = 0x1, - RENDER_INTERLACE_ODD = 0x2 -} render_interlace_enum; + RENDER_INTERLACE_ODD = 0x2, +} render_mode_enum; typedef struct { @@ -122,7 +122,6 @@ typedef struct vec_4x32u g_block_span; vec_4x32u b_block_span; - // 72 bytes u32 b; u32 b_dy; @@ -138,25 +137,21 @@ typedef struct u32 triangle_color; u32 dither_table[4]; + u32 uvrgb_phase; + struct render_block_handler_struct *render_block_handler; void *texture_page_ptr; void *texture_page_base; u16 *clut_ptr; u16 *vram_ptr; + u16 *vram_out_ptr; - // 26 bytes u16 render_state_base; u16 render_state; u16 num_spans; u16 num_blocks; - s16 offset_x; - s16 offset_y; - - u16 clut_settings; - u16 texture_settings; - s16 viewport_start_x; s16 viewport_start_y; s16 viewport_end_x; @@ -164,7 +159,6 @@ typedef struct u16 mask_msb; - // 8 bytes u8 triangle_winding; u8 display_area_draw_enable; @@ -178,11 +172,27 @@ typedef struct u8 texture_window_y; u8 primitive_type; - u8 interlace_mode; + u8 render_mode; + + s16 offset_x; + s16 offset_y; + + u16 clut_settings; + u16 texture_settings; + + // enhancement stuff + u16 *enhancement_buf_ptr; + u16 *enhancement_current_buf_ptr; + u32 enhancement_x_threshold; + s16 saved_viewport_start_x; + s16 saved_viewport_start_y; + s16 saved_viewport_end_x; + s16 saved_viewport_end_y; + u8 enhancement_buf_by_x16[64]; // Align up to 64 byte boundary to keep the upcoming buffers cache line - // aligned - //u8 reserved_a[0]; + // aligned, also make reachable with single immediate addition + u8 reserved_a[164]; // 8KB block_struct blocks[MAX_BLOCKS_PER_ROW]; @@ -224,7 +234,7 @@ void render_triangle(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, void render_sprite(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, s32 width, s32 height, u32 flags, u32 color); void render_line(psx_gpu_struct *gpu, vertex_struct *vertexes, u32 flags, - u32 color); + u32 color, int double_resolution); u32 texture_region_mask(s32 x1, s32 y1, s32 x2, s32 y2); diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c b/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c new file mode 100644 index 00000000..83c6680f --- /dev/null +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c @@ -0,0 +1,384 @@ +#define select_enhancement_buf_ptr(psx_gpu, x) \ + ((psx_gpu)->enhancement_buf_ptr + \ + ((psx_gpu)->enhancement_buf_by_x16[(x) / 16] << 20)) + +#ifndef NEON_BUILD +void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, + s32 v, s32 width, s32 height, u32 color) +{ + u32 left_offset = u & 0x7; + u32 width_rounded = width + left_offset + 7; + + u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + (s32)(x - left_offset * 2); + u32 right_width = width_rounded & 0x7; + u32 block_width = width_rounded / 8; + u32 fb_ptr_pitch = (2048 + 16) - (block_width * 16); + + u32 left_mask_bits = ~(0xFFFF << (left_offset * 2)); + u32 right_mask_bits = 0xFFFC << (right_width * 2); + + u32 texture_offset_base = u + (v * 1024); + u32 texture_mask = + psx_gpu->texture_mask_width | (psx_gpu->texture_mask_height * 1024); + + u32 blocks_remaining; + u32 num_blocks = psx_gpu->num_blocks; + block_struct *block = psx_gpu->blocks + num_blocks; + + u16 *texture_page_ptr = psx_gpu->texture_page_ptr; + u16 *texture_block_ptr; + + texture_offset_base &= ~0x7; + + sprites_16bpp++; + + if(block_width == 1) + { + u32 mask_bits = left_mask_bits | right_mask_bits; + u32 mask_bits_a = mask_bits & 0xFF; + u32 mask_bits_b = mask_bits >> 8; + + vec_8x16u texels; + vec_8x16u texels_wide; + + while(height) + { + num_blocks += 4; + sprite_blocks += 4; + + if(num_blocks > MAX_BLOCKS) + { + flush_render_block_buffer(psx_gpu); + num_blocks = 4; + block = psx_gpu->blocks; + } + + texture_block_ptr = + texture_page_ptr + (texture_offset_base & texture_mask); + + load_128b(texels, texture_block_ptr); + + zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low); + block->texels = texels_wide; + block->draw_mask_bits = mask_bits_a; + block->fb_ptr = fb_ptr; + block++; + + block->texels = texels_wide; + block->draw_mask_bits = mask_bits_a; + block->fb_ptr = fb_ptr + 1024; + block++; + + zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.high, texels.high); + block->texels = texels_wide; + block->draw_mask_bits = mask_bits_b; + block->fb_ptr = fb_ptr + 8; + block++; + + block->texels = texels_wide; + block->draw_mask_bits = mask_bits_b; + block->fb_ptr = fb_ptr + 8 + 1024; + block++; + + texture_offset_base += 1024; + fb_ptr += 2048; + + height--; + psx_gpu->num_blocks = num_blocks; + } + } + else + { + u32 texture_offset; + + u32 left_mask_bits_a = left_mask_bits & 0xFF; + u32 left_mask_bits_b = left_mask_bits >> 8; + u32 right_mask_bits_a = right_mask_bits & 0xFF; + u32 right_mask_bits_b = right_mask_bits >> 8; + + vec_8x16u texels; + vec_8x16u texels_wide; + + while(height) + { + blocks_remaining = block_width - 2; + num_blocks += block_width * 4; + sprite_blocks += block_width * 4; + + if(num_blocks > MAX_BLOCKS) + { + flush_render_block_buffer(psx_gpu); + num_blocks = block_width * 4; + block = psx_gpu->blocks; + } + + texture_offset = texture_offset_base; + texture_offset_base += 1024; + + texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask); + + load_128b(texels, texture_block_ptr); + + zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low); + block->texels = texels_wide; + block->draw_mask_bits = left_mask_bits_a; + block->fb_ptr = fb_ptr; + block++; + + block->texels = texels_wide; + block->draw_mask_bits = left_mask_bits_a; + block->fb_ptr = fb_ptr + 1024; + block++; + + zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.high, texels.high); + block->texels = texels_wide; + block->draw_mask_bits = left_mask_bits_b; + block->fb_ptr = fb_ptr + 8; + block++; + + block->texels = texels_wide; + block->draw_mask_bits = left_mask_bits_b; + block->fb_ptr = fb_ptr + 8 + 1024; + block++; + + texture_offset += 8; + fb_ptr += 16; + + while(blocks_remaining) + { + texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask); + load_128b(texels, texture_block_ptr); + + zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low); + block->texels = texels_wide; + block->draw_mask_bits = 0; + block->fb_ptr = fb_ptr; + block++; + + block->texels = texels_wide; + block->draw_mask_bits = 0; + block->fb_ptr = fb_ptr + 1024; + block++; + + zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.high, texels.high); + block->texels = texels_wide; + block->draw_mask_bits = 0; + block->fb_ptr = fb_ptr + 8; + block++; + + block->texels = texels_wide; + block->draw_mask_bits = 0; + block->fb_ptr = fb_ptr + 8 + 1024; + block++; + + texture_offset += 8; + fb_ptr += 16; + + blocks_remaining--; + } + + texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask); + load_128b(texels, texture_block_ptr); + + zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low); + block->texels = texels_wide; + block->draw_mask_bits = right_mask_bits_a; + block->fb_ptr = fb_ptr; + block++; + + block->texels = texels_wide; + block->draw_mask_bits = right_mask_bits_a; + block->fb_ptr = fb_ptr + 1024; + block++; + + zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.high, texels.high); + block->texels = texels_wide; + block->draw_mask_bits = right_mask_bits_b; + block->fb_ptr = fb_ptr + 8; + block++; + + block->texels = texels_wide; + block->draw_mask_bits = right_mask_bits_b; + block->fb_ptr = fb_ptr + 8 + 1024; + block++; + + fb_ptr += fb_ptr_pitch; + + height--; + psx_gpu->num_blocks = num_blocks; + } + } +} + +#endif + +static void setup_sprite_untextured_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, + s32 u, s32 v, s32 width, s32 height, u32 color) +{ + setup_sprite_untextured(psx_gpu, x, y, u, v, width * 2, height * 2, color); +} + +#define setup_sprite_blocks_switch_textured_4x(texture_mode) \ + setup_sprite_##texture_mode##_4x \ + +#define setup_sprite_blocks_switch_untextured_4x(texture_mode) \ + setup_sprite_untextured_4x \ + +#define setup_sprite_blocks_switch_4x(texturing, texture_mode) \ + setup_sprite_blocks_switch_##texturing##_4x(texture_mode) \ + + +#define render_sprite_blocks_switch_block_modulation_4x(texture_mode, \ + blend_mode, mask_evaluate, shading, dithering, texturing, blending, \ + modulation) \ +{ \ + setup_sprite_blocks_switch_4x(texturing, texture_mode), \ + texture_sprite_blocks_switch_##texturing(texture_mode), \ + shade_blocks_switch(unshaded, texturing, modulation, undithered, blending, \ + mask_evaluate), \ + blend_blocks_switch(texturing, blending, blend_mode, mask_evaluate) \ +} \ + +#define render_sprite_blocks_switch_block_blending_4x(texture_mode, \ + blend_mode, mask_evaluate, shading, dithering, texturing, blending) \ + render_sprite_blocks_switch_block_modulation_4x(texture_mode, blend_mode, \ + mask_evaluate, shading, dithering, texturing, blending, modulated), \ + render_sprite_blocks_switch_block_modulation_4x(texture_mode, blend_mode, \ + mask_evaluate, shading, dithering, texturing, blending, unmodulated) \ + +#define render_sprite_blocks_switch_block_texturing_4x(texture_mode, \ + blend_mode, mask_evaluate, shading, dithering, texturing) \ + render_sprite_blocks_switch_block_blending_4x(texture_mode, blend_mode, \ + mask_evaluate, shading, dithering, texturing, unblended), \ + render_sprite_blocks_switch_block_blending_4x(texture_mode, blend_mode, \ + mask_evaluate, shading, dithering, texturing, blended) \ + +#define render_sprite_blocks_switch_block_dithering_4x(texture_mode, \ + blend_mode, mask_evaluate, shading, dithering) \ + render_sprite_blocks_switch_block_texturing_4x(texture_mode, blend_mode, \ + mask_evaluate, shading, dithering, untextured), \ + render_sprite_blocks_switch_block_texturing_4x(texture_mode, blend_mode, \ + mask_evaluate, shading, dithering, textured) \ + +#define render_sprite_blocks_switch_block_shading_4x(texture_mode, blend_mode, \ + mask_evaluate, shading) \ + render_sprite_blocks_switch_block_dithering_4x(texture_mode, blend_mode, \ + mask_evaluate, shading, undithered), \ + render_sprite_blocks_switch_block_dithering_4x(texture_mode, blend_mode, \ + mask_evaluate, shading, dithered) \ + +#define render_sprite_blocks_switch_block_mask_evaluate_4x(texture_mode, \ + blend_mode, mask_evaluate) \ + render_sprite_blocks_switch_block_shading_4x(texture_mode, blend_mode, \ + mask_evaluate, unshaded), \ + render_sprite_blocks_switch_block_shading_4x(texture_mode, blend_mode, \ + mask_evaluate, shaded) \ + +#define render_sprite_blocks_switch_block_blend_mode_4x(texture_mode, \ + blend_mode) \ + render_sprite_blocks_switch_block_mask_evaluate_4x(texture_mode, blend_mode, \ + off), \ + render_sprite_blocks_switch_block_mask_evaluate_4x(texture_mode, blend_mode, \ + on) \ + +#define render_sprite_blocks_switch_block_texture_mode_4x(texture_mode) \ + render_sprite_blocks_switch_block_blend_mode_4x(texture_mode, average), \ + render_sprite_blocks_switch_block_blend_mode_4x(texture_mode, add), \ + render_sprite_blocks_switch_block_blend_mode_4x(texture_mode, subtract), \ + render_sprite_blocks_switch_block_blend_mode_4x(texture_mode, add_fourth) \ + +#define render_sprite_blocks_switch_block_4x() \ + render_sprite_blocks_switch_block_texture_mode_4x(4bpp), \ + render_sprite_blocks_switch_block_texture_mode_4x(8bpp), \ + render_sprite_blocks_switch_block_texture_mode_4x(16bpp), \ + render_sprite_blocks_switch_block_texture_mode_4x(4bpp) \ + + +render_block_handler_struct render_sprite_block_handlers_4x[] = +{ + render_sprite_blocks_switch_block_4x() +}; + + +void render_sprite_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 u, u32 v, + s32 width, s32 height, u32 flags, u32 color) +{ + s32 x_right = x + width - 1; + s32 y_bottom = y + height - 1; + +#ifdef PROFILE + sprites++; +#endif + + if(x < psx_gpu->viewport_start_x) + { + u32 clip = psx_gpu->viewport_start_x - x; + x += clip; + u += clip; + width -= clip; + } + + if(y < psx_gpu->viewport_start_y) + { + s32 clip = psx_gpu->viewport_start_y - y; + y += clip; + v += clip; + height -= clip; + } + + if(x_right > psx_gpu->viewport_end_x) + width -= x_right - psx_gpu->viewport_end_x; + + if(y_bottom > psx_gpu->viewport_end_y) + height -= y_bottom - psx_gpu->viewport_end_y; + + if((width <= 0) || (height <= 0)) + return; + + psx_gpu->vram_out_ptr = select_enhancement_buf_ptr(psx_gpu, x); + + x *= 2; + y *= 2; + +#ifdef PROFILE + span_pixels += width * height; + spans += height; +#endif + + u32 render_state = flags & + (RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND | + RENDER_FLAGS_TEXTURE_MAP); + render_state |= + (psx_gpu->render_state_base & ~RENDER_STATE_DITHER); + + if((psx_gpu->render_state != render_state) || + (psx_gpu->primitive_type != PRIMITIVE_TYPE_SPRITE)) + { + psx_gpu->render_state = render_state; + flush_render_block_buffer(psx_gpu); +#ifdef PROFILE + state_changes++; +#endif + } + + psx_gpu->primitive_type = PRIMITIVE_TYPE_SPRITE; + + color &= 0xFFFFFF; + + if(psx_gpu->triangle_color != color) + { + flush_render_block_buffer(psx_gpu); + psx_gpu->triangle_color = color; + } + + if(color == 0x808080) + render_state |= RENDER_FLAGS_MODULATE_TEXELS; + + render_block_handler_struct *render_block_handler = + &(render_sprite_block_handlers_4x[render_state]); + psx_gpu->render_block_handler = render_block_handler; + + ((setup_sprite_function_type *)render_block_handler->setup_blocks) + (psx_gpu, x, y, u, v, width, height, color); +} + diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index 294685ae..4e1e4032 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -1,5 +1,6 @@ /* * Copyright (C) 2011 Gilead Kutnick "Exophase" + * Copyright (C) 2012 Gražvydas Ignotas "notaz" * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -16,65 +17,13 @@ #define MAX_BLOCKS 64 #define MAX_BLOCKS_PER_ROW 128 -#define psx_gpu_test_mask_offset 0 -#define psx_gpu_uvrg_offset 16 -#define psx_gpu_uvrg_dx_offset 32 -#define psx_gpu_uvrg_dy_offset 48 -#define psx_gpu_u_block_span_offset 64 -#define psx_gpu_v_block_span_offset 80 -#define psx_gpu_r_block_span_offset 96 -#define psx_gpu_g_block_span_offset 112 -#define psx_gpu_b_block_span_offset 128 - -#define psx_gpu_b_dx_offset 132 - -#define psx_gpu_b_offset 144 -#define psx_gpu_b_dy_offset 148 -#define psx_gpu_triangle_area_offset 152 -#define psx_gpu_texture_window_settings_offset 156 -#define psx_gpu_current_texture_mask_offset 160 -#define psx_gpu_viewport_mask_offset 164 -#define psx_gpu_dirty_textures_4bpp_mask_offset 168 -#define psx_gpu_dirty_textures_8bpp_mask_offset 172 -#define psx_gpu_dirty_textures_8bpp_alternate_mask_offset 176 -#define psx_gpu_triangle_color_offset 180 -#define psx_gpu_dither_table_offset 184 -#define psx_gpu_render_block_handler_offset 200 -#define psx_gpu_texture_page_ptr_offset 204 -#define psx_gpu_texture_page_base_offset 208 -#define psx_gpu_clut_ptr_offset 212 -#define psx_gpu_vram_ptr_offset 216 - -#define psx_gpu_render_state_base_offset 220 -#define psx_gpu_render_state_offset 222 -#define psx_gpu_num_spans_offset 224 -#define psx_gpu_num_blocks_offset 226 -#define psx_gpu_offset_x_offset 228 -#define psx_gpu_offset_y_offset 230 -#define psx_gpu_clut_settings_offset 232 -#define psx_gpu_texture_settings_offset 234 -#define psx_gpu_viewport_start_x_offset 236 -#define psx_gpu_viewport_start_y_offset 238 -#define psx_gpu_viewport_end_x_offset 240 -#define psx_gpu_viewport_end_y_offset 242 -#define psx_gpu_mask_msb_offset 244 - -#define psx_gpu_triangle_winding_offset 246 -#define psx_gpu_display_area_draw_enable_offset 247 -#define psx_gpu_current_texture_page_offset 248 -#define psx_gpu_last_8bpp_texture_page_offset 249 -#define psx_gpu_texture_mask_width_offset 250 -#define psx_gpu_texture_mask_height_offset 251 -#define psx_gpu_texture_window_x_offset 252 -#define psx_gpu_texture_window_y_offset 253 -#define psx_gpu_primitive_type_offset 254 - -#define psx_gpu_reserved_a_offset 255 - -#define psx_gpu_blocks_offset 0x0100 -#define psx_gpu_span_uvrg_offset_offset 0x2100 -#define psx_gpu_span_edge_data_offset 0x4100 -#define psx_gpu_span_b_offset_offset 0x5100 +#define RENDER_STATE_MASK_EVALUATE 0x20 +#define RENDER_FLAGS_MODULATE_TEXELS 0x1 +#define RENDER_FLAGS_BLEND 0x2 + +#include "psx_gpu_offsets.h" + +#define psx_gpu_b_dx_offset (psx_gpu_b_block_span_offset + 4) #define edge_data_left_x_offset 0 #define edge_data_num_blocks_offset 2 @@ -238,6 +187,7 @@ #define uvrg_dx3l d6 #define uvrg_dx3h d7 +#define uvrgb_phase q13 .align 4 @@ -369,11 +319,16 @@ function(compute_all_gradients) vmull.s16 ga_uvrg_y, d0_b, d1_b rsbmi ga_bx, ga_bx, #0 + @ r12 = psx_gpu->uvrgb_phase + ldr r12, [ psx_gpu, #psx_gpu_uvrgb_phase_offset ] + vmlsl.s16 ga_uvrg_y, d2_b, d3_b movs gs_by, ga_by, asr #31 vshr.u64 d0, d30, #22 - mov b_base, b0, lsl #16 + add b_base, r12, b0, lsl #16 + + vdup.u32 uvrgb_phase, r12 rsbmi ga_by, ga_by, #0 vclt.s32 gs_uvrg_x, ga_uvrg_x, #0 @ gs_uvrg_x = ga_uvrg_x < 0 @@ -382,7 +337,6 @@ function(compute_all_gradients) ldrb r12, [ psx_gpu, #psx_gpu_triangle_winding_offset ] vclt.s32 gs_uvrg_y, ga_uvrg_y, #0 @ gs_uvrg_y = ga_uvrg_y < 0 - add b_base, b_base, #0x8000 rsb r12, r12, #0 @ r12 = -(triangle->winding) vdup.u32 w_mask, r12 @ w_mask = { -w, -w, -w, -w } @@ -391,7 +345,7 @@ function(compute_all_gradients) vshll.u16 uvrg_base, uvrg0, #16 @ uvrg_base = uvrg0 << 16 vdup.u32 r_shift, r14 @ r_shift = { shift, shift, shift, shift } - vorr.u32 uvrg_base, #0x8000 + vadd.u32 uvrg_base, uvrgb_phase vabs.s32 ga_uvrg_x, ga_uvrg_x @ ga_uvrg_x = abs(ga_uvrg_x) vmov area_r_s, s0 @ area_r_s = triangle_reciprocal @@ -657,7 +611,7 @@ function(compute_all_gradients) \ vdup.u32 edge_shifts, temp; \ vsub.u32 heights_b, heights, c_0x01; \ - vshr.u32 height_reciprocals, edge_shifts, #12; \ + vshr.u32 height_reciprocals, edge_shifts, #10; \ \ vmla.s32 heights_b, x_starts, heights; \ vbic.u16 edge_shifts, #0xE0; \ @@ -682,8 +636,8 @@ function(compute_all_gradients) vsub.u32 heights_b, heights, c_0x01; \ sub height_b_alt, height_minor_b, #1; \ \ - vshr.u32 height_reciprocals, edge_shifts, #12; \ - lsr height_reciprocal_alt, edge_shift_alt, #12; \ + vshr.u32 height_reciprocals, edge_shifts, #10; \ + lsr height_reciprocal_alt, edge_shift_alt, #10; \ \ vmla.s32 heights_b, x_starts, heights; \ mla height_b_alt, height_minor_b, start_c, height_b_alt; \ @@ -1221,6 +1175,10 @@ function(setup_spans_up_down) ldrh temp, [ psx_gpu, #psx_gpu_num_spans_offset ] add temp, temp, height_minor_b + + cmp temp, #MAX_SPANS + beq 5f + strh temp, [ psx_gpu, #psx_gpu_num_spans_offset ] 2: @@ -1236,6 +1194,15 @@ function(setup_spans_up_down) setup_spans_prologue_b() bal 4b + 5: + // FIXME: overflow corner case + sub temp, temp, height_minor_b + bics height_minor_b, #3 + add temp, temp, height_minor_b + strh temp, [ psx_gpu, #psx_gpu_num_spans_offset ] + bne 2b + bal 1b + .pool #undef span_uvrg_offset @@ -1393,7 +1360,7 @@ function(setup_blocks_shaded_textured_dithered_##swizzling##_indirect) \ add dither_offset_ptr, psx_gpu, #psx_gpu_dither_table_offset; \ \ ldrh y, [ span_edge_data, #edge_data_y_offset ]; \ - ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ]; \ + ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]; \ \ cmp span_num_blocks, #0; \ beq 1f; \ @@ -1660,7 +1627,7 @@ function(setup_blocks_unshaded_textured_dithered_##swizzling##_indirect) \ add dither_offset_ptr, psx_gpu, #psx_gpu_dither_table_offset; \ \ ldrh y, [ span_edge_data, #edge_data_y_offset ]; \ - ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ]; \ + ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]; \ \ cmp span_num_blocks, #0; \ beq 1f; \ @@ -1855,7 +1822,7 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_indirect) ldrh span_num_blocks, [ span_edge_data, #edge_data_num_blocks_offset ] ldrh y, [ span_edge_data, #edge_data_y_offset ] - ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ] + ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ] cmp span_num_blocks, #0 beq 1f @@ -1975,7 +1942,7 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_direct) ldrh span_num_blocks, [ span_edge_data, #edge_data_num_blocks_offset ] ldrh y, [ span_edge_data, #edge_data_y_offset ] - ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ] + ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ] cmp span_num_blocks, #0 beq 1f @@ -2162,7 +2129,7 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_indirect) \ add dither_offset_ptr, psx_gpu, #psx_gpu_dither_table_offset; \ \ ldrh y, [ span_edge_data, #edge_data_y_offset ]; \ - ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ]; \ + ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]; \ \ cmp span_num_blocks, #0; \ beq 1f; \ @@ -2402,7 +2369,7 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_direct) \ add dither_offset_ptr, psx_gpu, #psx_gpu_dither_table_offset; \ \ ldrh y, [ span_edge_data, #edge_data_y_offset ]; \ - ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ]; \ + ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]; \ \ cmp span_num_blocks, #0; \ beq 1f; \ @@ -3239,6 +3206,7 @@ function(shade_blocks_##shading##_textured_modulated_##dithering##_##target) \ shade_blocks_textured_modulated_load_bdm_##shading(); \ vshrn.u16 texels_b, texels, #7; \ \ + pld [ block_ptr_load_a ]; \ vmovn.u16 texels_r, texels; \ vmlal.u8 pixels, pixels_r_low, d64_1; \ \ @@ -3437,10 +3405,12 @@ function(shade_blocks_textured_unmodulated_direct) [ draw_mask_bits_ptr, :16 ], c_64 vbif.u16 fb_pixels, pixels, draw_mask_combined - vld1.u32 { pixels }, [ block_ptr_load, :128 ], c_64 - sub fb_ptr_cmp, fb_ptr_next, fb_ptr + pld [ fb_ptr_next, #64 ] + add fb_ptr_cmp, fb_ptr_cmp, #14 + vld1.u32 { pixels }, [ block_ptr_load, :128 ], c_64 + cmp fb_ptr_cmp, #28 bls 4f @@ -3799,11 +3769,15 @@ function(blend_blocks_textured_add_##mask_evaluate) \ vorr.u16 blend_pixels, fb_pixels_rb, fb_pixels_g; \ vand.u16 pixels_mg, pixels, d128_0x83E0; \ \ - vbit.u16 blend_pixels, fb_pixels, draw_mask; \ - vld1.u32 { draw_mask }, [ draw_mask_ptr, :128 ], c_64; \ + sub fb_ptr_cmp, fb_ptr_next, fb_ptr; \ + pld [ fb_ptr_next, #64 ]; \ \ sub fb_ptr_cmp, fb_ptr_next, fb_ptr; \ + vbit.u16 blend_pixels, fb_pixels, draw_mask; \ + \ add fb_ptr_cmp, fb_ptr_cmp, #14; \ + vld1.u32 { draw_mask }, [ draw_mask_ptr, :128 ], c_64; \ + \ cmp fb_ptr_cmp, #28; \ bls 2f; \ \ @@ -4456,6 +4430,12 @@ function(render_block_fill_body) #define draw_mask_fb_ptr_left d2 #define draw_mask_fb_ptr_right d3 +#define draw_mask_fb_ptr_left_a d2 +#define draw_mask_fb_ptr_left_b d3 +#define draw_mask_fb_ptr_right_a d10 +#define draw_mask_fb_ptr_right_b d11 +#define draw_masks_fb_ptrs2 q5 + #define clut_low_a d4 #define clut_low_b d5 #define clut_high_a d6 @@ -4467,37 +4447,24 @@ function(render_block_fill_body) #define clut_a q2 #define clut_b q3 -#define texels_low d10 -#define texels_high d11 - +#define texels_low d12 +#define texels_high d13 -setup_sprite_flush_blocks_single: - vpush { q1 - q4 } - - stmdb sp!, { r0 - r3, r12, r14 } - bl flush_render_block_buffer - ldmia sp!, { r0 - r3, r12, r14 } - - vpop { q1 - q4 } - - add block, psx_gpu, #psx_gpu_blocks_offset - - mov num_blocks, sub_tile_height - bx lr +#define texels_wide_low d14 +#define texels_wide_high d15 +#define texels_wide q7 -setup_sprite_flush_blocks_double: - vpush { q1 - q4 } +setup_sprite_flush_blocks: + vpush { q1 - q5 } stmdb sp!, { r0 - r3, r12, r14 } bl flush_render_block_buffer ldmia sp!, { r0 - r3, r12, r14 } - vpop { q1 - q4 } + vpop { q1 - q5 } add block, psx_gpu, #psx_gpu_blocks_offset - - mov num_blocks, sub_tile_height, lsl #1 bx lr @@ -4535,8 +4502,6 @@ setup_sprite_update_texture_8bpp_cache: blne setup_sprite_update_texture_8bpp_cache \ -#define setup_sprite_tile_setup_block_no(side, offset, texture_mode) \ - #define setup_sprite_block_count_single() \ sub_tile_height \ @@ -4547,7 +4512,8 @@ setup_sprite_update_texture_8bpp_cache: add num_blocks, num_blocks, setup_sprite_block_count_##type(); \ cmp num_blocks, #MAX_BLOCKS; \ \ - blgt setup_sprite_flush_blocks_##type \ + movgt num_blocks, setup_sprite_block_count_##type(); \ + blgt setup_sprite_flush_blocks \ #define setup_sprite_tile_full_4bpp(edge) \ @@ -4729,31 +4695,33 @@ setup_sprite_update_texture_8bpp_cache: #define setup_sprite_tile_column_edge_post_adjust_full(edge) \ -#define setup_sprite_tile_column_height_single(edge_mode, edge, texture_mode) \ +#define setup_sprite_tile_column_height_single(edge_mode, edge, texture_mode, \ + x4mode) \ mov sub_tile_height, column_data; \ - setup_sprite_tile_column_edge_pre_adjust_##edge_mode(edge); \ - setup_sprite_tile_##edge_mode##_##texture_mode(edge); \ - setup_sprite_tile_column_edge_post_adjust_##edge_mode(edge) \ + setup_sprite_tile_column_edge_pre_adjust_##edge_mode##x4mode(edge); \ + setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \ + setup_sprite_tile_column_edge_post_adjust_##edge_mode##x4mode(edge) \ -#define setup_sprite_tile_column_height_multi(edge_mode, edge, texture_mode) \ +#define setup_sprite_tile_column_height_multi(edge_mode, edge, texture_mode, \ + x4mode) \ and sub_tile_height, column_data, #0xFF; \ mov tiles_remaining, column_data, lsr #16; \ - setup_sprite_tile_column_edge_pre_adjust_##edge_mode(edge); \ - setup_sprite_tile_##edge_mode##_##texture_mode(edge); \ + setup_sprite_tile_column_edge_pre_adjust_##edge_mode##x4mode(edge); \ + setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \ \ subs tiles_remaining, tiles_remaining, #1; \ beq 2f; \ \ 3: \ mov sub_tile_height, #16; \ - setup_sprite_tile_##edge_mode##_##texture_mode(edge); \ + setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \ subs tiles_remaining, tiles_remaining, #1; \ bne 3b; \ \ 2: \ uxtb sub_tile_height, column_data, ror #8; \ - setup_sprite_tile_##edge_mode##_##texture_mode(edge); \ - setup_sprite_tile_column_edge_post_adjust_##edge_mode(edge) \ + setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \ + setup_sprite_tile_column_edge_post_adjust_##edge_mode##x4mode(edge) \ #define setup_sprite_column_data_single() \ @@ -4772,17 +4740,30 @@ setup_sprite_update_texture_8bpp_cache: \ orr column_data, column_data, height_rounded, lsl #8 \ -#define setup_sprite_tile_column_width_single(texture_mode, multi_height, \ - edge_mode, edge) \ - setup_sprite_##texture_mode##_single_##multi_height##_##edge_mode##_##edge: \ +#define setup_sprite_setup_left_draw_mask_fb_ptr() \ + vdup.u8 draw_mask_fb_ptr_left, block_masks[0]; \ + vdup.u8 draw_mask_fb_ptr_right, block_masks[1] \ + +#define setup_sprite_setup_left_draw_mask_fb_ptr_advance_column() \ + mov fb_ptr_advance_column, #32; \ + vdup.u8 draw_mask_fb_ptr_left, block_masks[0]; \ + \ + sub fb_ptr_advance_column, height, lsl #11; \ + vdup.u8 draw_mask_fb_ptr_right, block_masks[1] \ + +#define setup_sprite_setup_right_draw_mask_fb_ptr() \ + vdup.u8 draw_mask_fb_ptr_left, block_masks[4]; \ + vdup.u8 draw_mask_fb_ptr_right, block_masks[5] \ + +#define setup_sprite_tile_column_width_single(tm, multi_height, edge_mode, \ + edge, x4mode) \ + setup_sprite_##tm##_single_##multi_height##_##edge_mode##_##edge##x4mode: \ setup_sprite_column_data_##multi_height(); \ vext.32 block_masks_shifted, block_masks, block_masks, #1; \ vorr.u32 block_masks, block_masks, block_masks_shifted; \ - vdup.u8 draw_mask_fb_ptr_left, block_masks[0]; \ - vdup.u8 draw_mask_fb_ptr_right, block_masks[1]; \ + setup_sprite_setup_left_draw_mask_fb_ptr##x4mode(); \ \ - setup_sprite_tile_column_height_##multi_height(edge_mode, edge, \ - texture_mode); \ + setup_sprite_tile_column_height_##multi_height(edge_mode, edge, tm, x4mode); \ ldmia sp!, { r4 - r11, pc } \ #define setup_sprite_tiled_advance_column() \ @@ -4791,39 +4772,337 @@ setup_sprite_update_texture_8bpp_cache: subeq texture_offset_base, texture_offset_base, #(0x100 + 0xF00) \ #define setup_sprite_tile_column_width_multi(tm, multi_height, left_mode, \ - right_mode) \ - setup_sprite_##tm##_multi_##multi_height##_##left_mode##_##right_mode: \ + right_mode, x4mode) \ + setup_sprite_##tm##_multi_##multi_height##_##left_mode##_##right_mode##x4mode:\ setup_sprite_column_data_##multi_height(); \ - mov fb_ptr_advance_column, #32; \ \ - sub fb_ptr_advance_column, height, lsl #11; \ - vdup.u8 draw_mask_fb_ptr_left, block_masks[0]; \ + setup_sprite_setup_left_draw_mask_fb_ptr_advance_column##x4mode(); \ \ - vdup.u8 draw_mask_fb_ptr_right, block_masks[1]; \ - setup_sprite_tile_column_height_##multi_height(left_mode, right, tm); \ + setup_sprite_tile_column_height_##multi_height(left_mode, right, tm, x4mode);\ \ subs tile_width, tile_width, #2; \ add fb_ptr, fb_ptr, fb_ptr_advance_column; \ \ - vmov.u8 draw_masks_fb_ptrs, #0; \ beq 1f; \ \ + vmov.u8 draw_masks_fb_ptrs, #0; \ + vmov.u8 draw_masks_fb_ptrs2, #0; \ + \ 0: \ setup_sprite_tiled_advance_column(); \ - setup_sprite_tile_column_height_##multi_height(full, none, tm); \ + setup_sprite_tile_column_height_##multi_height(full, none, tm, x4mode); \ add fb_ptr, fb_ptr, fb_ptr_advance_column; \ subs tile_width, tile_width, #1; \ bne 0b; \ \ 1: \ - vdup.u8 draw_mask_fb_ptr_left, block_masks[4]; \ - vdup.u8 draw_mask_fb_ptr_right, block_masks[5]; \ + setup_sprite_setup_right_draw_mask_fb_ptr##x4mode(); \ \ setup_sprite_tiled_advance_column(); \ - setup_sprite_tile_column_height_##multi_height(right_mode, left, tm); \ + setup_sprite_tile_column_height_##multi_height(right_mode, left, tm, x4mode);\ ldmia sp!, { r4 - r11, pc } \ +#define setup_sprite_offset_u_adjust() \ + +#define setup_sprite_get_left_block_mask() \ + and left_block_mask, left_block_mask, #0xFF \ + +#define setup_sprite_compare_left_block_mask() \ + cmp left_block_mask, #0xFF \ + +#define setup_sprite_get_right_block_mask() \ + uxtb right_block_mask, right_block_mask, ror #8 \ + +#define setup_sprite_compare_right_block_mask() \ + cmp right_block_mask, #0xFF \ + + + +/* 4x stuff */ +#define fb_ptr2 column_data + +#define setup_sprite_offset_u_adjust_4x() \ + sub fb_ptr, fb_ptr, offset_u, lsl #1; \ + lsl offset_u_right, #1; \ + lsl offset_u, #1; \ + add offset_u_right, #1 \ + +#define setup_sprite_get_left_block_mask_4x() \ + sxth left_block_mask, left_block_mask \ + +#define setup_sprite_compare_left_block_mask_4x() \ + cmp left_block_mask, #0xFFFFFFFF \ + +#define setup_sprite_get_right_block_mask_4x() \ + sxth right_block_mask, right_block_mask, ror #16 \ + +#define setup_sprite_compare_right_block_mask_4x() \ + cmp right_block_mask, #0xFFFFFFFF \ + + +#define widen_texels_16bpp(texels_) \ + vmov texels_wide_low, texels_; \ + vmov texels_wide_high, texels_; \ + vzip.16 texels_wide_low, texels_wide_high \ + +#define widen_texels_8bpp(texels_) \ + vmov texels_wide_low, texels_; \ + vmov texels_wide_high, texels_; \ + vzip.8 texels_wide_low, texels_wide_high \ + +#define write_block_16bpp(texels_, block_, draw_mask_fb_ptr_, fb_ptr_) \ + vst1.u32 { texels_ }, [ block_, :128 ]; \ + add block_, block_, #40; \ + \ + vmov.u32 draw_mask_fb_ptr_[1], fb_ptr_; \ + vst1.u32 { draw_mask_fb_ptr_ }, [ block_, :64 ]; \ + add block_, block_, #24 \ + +/* assumes 16-byte offset already added to block_ */ +#define write_block_8bpp(texels_, block_, draw_mask_fb_ptr_, fb_ptr_) \ + vst1.u32 { texels_ }, [ block_, :64 ]; \ + add block_, block_, #24; \ + \ + vmov.u32 draw_mask_fb_ptr_[1], fb_ptr_; \ + vst1.u32 { draw_mask_fb_ptr_ }, [ block_, :64 ]; \ + add block_, block_, #40 \ + +#define do_texture_block_16bpp_4x(fb_ptr_tmp, draw_mask_fb_ptr_a_, \ + draw_mask_fb_ptr_b_) \ + widen_texels_16bpp(texels_low); \ + add fb_ptr_tmp, fb_ptr, #1024*2; \ + \ + write_block_16bpp(texels_wide, block, draw_mask_fb_ptr_a_, fb_ptr); \ + \ + write_block_16bpp(texels_wide, block, draw_mask_fb_ptr_a_, fb_ptr_tmp); \ + widen_texels_16bpp(texels_high); \ + \ + add fb_ptr_tmp, fb_ptr, #8*2; \ + write_block_16bpp(texels_wide, block, draw_mask_fb_ptr_b_, fb_ptr_tmp); \ + \ + add fb_ptr_tmp, fb_ptr_tmp, #1024*2; \ + write_block_16bpp(texels_wide, block, draw_mask_fb_ptr_b_, fb_ptr_tmp) \ + +#define do_texture_block_8bpp_4x(fb_ptr_tmp, draw_mask_fb_ptr_a_, \ + draw_mask_fb_ptr_b_) \ + widen_texels_8bpp(texels); \ + add fb_ptr_tmp, fb_ptr, #1024*2; \ + \ + write_block_8bpp(texels_wide_low, block, draw_mask_fb_ptr_a_, fb_ptr); \ + write_block_8bpp(texels_wide_low, block, draw_mask_fb_ptr_a_, fb_ptr_tmp); \ + \ + add fb_ptr_tmp, fb_ptr, #8*2; \ + write_block_8bpp(texels_wide_high, block, draw_mask_fb_ptr_b_, fb_ptr_tmp); \ + \ + add fb_ptr_tmp, fb_ptr_tmp, #1024*2; \ + write_block_8bpp(texels_wide_high, block, draw_mask_fb_ptr_b_, fb_ptr_tmp) \ + + +#define setup_sprite_tiled_initialize_4bpp_4x() \ + ldr clut_ptr, [ psx_gpu, #psx_gpu_clut_ptr_offset ]; \ + vld1.u32 { clut_a, clut_b }, [ clut_ptr, :128 ]; \ + \ + vuzp.u8 clut_a, clut_b \ + +#define setup_sprite_tiled_initialize_8bpp_4x() \ + + +#define setup_sprite_block_count_single_4x() \ + sub_tile_height, lsl #2 \ + +#define setup_sprite_block_count_double_4x() \ + sub_tile_height, lsl #(1+2) \ + +#define setup_sprite_tile_full_4bpp_4x(edge) \ + setup_sprite_tile_add_blocks(double_4x); \ + str column_data, [sp, #-8]!; /* fb_ptr2 */ \ + \ + 4: \ + and texture_block_ptr, texture_offset, texture_mask; \ + pld [ fb_ptr ]; \ + \ + add texture_block_ptr, texture_page_ptr, texture_block_ptr; \ + vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \ + \ + add texture_block_ptr, texture_offset, #8; \ + vtbl.8 texels_low, { clut_low_a, clut_low_b }, texels; \ + \ + and texture_block_ptr, texture_block_ptr, texture_mask; \ + vtbl.8 texels_high, { clut_high_a, clut_high_b }, texels; \ + \ + vzip.8 texels_low, texels_high; \ + do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_left_a, \ + draw_mask_fb_ptr_left_b); \ + \ + add texture_block_ptr, texture_page_ptr, texture_block_ptr; \ + pld [ fb_ptr, #2048 ]; \ + \ + vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \ + add fb_ptr, fb_ptr, #16*2; \ + \ + vtbl.8 texels_low, { clut_low_a, clut_low_b }, texels; \ + vtbl.8 texels_high, { clut_high_a, clut_high_b }, texels; \ + \ + vzip.8 texels_low, texels_high; \ + do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_right_a, \ + draw_mask_fb_ptr_right_b); \ + \ + add texture_offset, texture_offset, #0x10; \ + add fb_ptr, fb_ptr, #(2048 - 16) * 2; \ + \ + subs sub_tile_height, sub_tile_height, #1; \ + bne 4b; \ + \ + ldr column_data, [sp], #8; /* fb_ptr2 */ \ + add texture_offset, texture_offset, #0xF00; \ + strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] \ + + +#define setup_sprite_tile_half_4bpp_4x(edge) \ + setup_sprite_tile_add_blocks(single_4x); \ + str column_data, [sp, #-8]!; /* fb_ptr2 */ \ + \ + 4: \ + and texture_block_ptr, texture_offset, texture_mask; \ + pld [ fb_ptr ]; \ + \ + add texture_block_ptr, texture_page_ptr, texture_block_ptr; \ + vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \ + \ + add texture_block_ptr, texture_page_ptr, texture_block_ptr; \ + vtbl.8 texels_low, { clut_low_a, clut_low_b }, texels; \ + \ + vtbl.8 texels_high, { clut_high_a, clut_high_b }, texels; \ + add texture_offset, texture_offset, #0x10; \ + \ + vzip.8 texels_low, texels_high; \ + do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_##edge##_a, \ + draw_mask_fb_ptr_##edge##_b); \ + \ + pld [ fb_ptr, #2048 ]; \ + add fb_ptr, fb_ptr, #2048 * 2; \ + \ + subs sub_tile_height, sub_tile_height, #1; \ + bne 4b; \ + \ + ldr column_data, [sp], #8; /* fb_ptr2 */ \ + add texture_offset, texture_offset, #0xF00; \ + strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] \ + + +#define setup_sprite_tile_full_8bpp_4x(edge) \ + setup_sprite_tile_add_blocks(double_4x); \ + add block, block, #16; \ + str column_data, [sp, #-8]!; /* fb_ptr2 */ \ + \ + 4: \ + and texture_block_ptr, texture_offset, texture_mask; \ + pld [ fb_ptr ]; \ + \ + add texture_block_ptr, texture_page_ptr, texture_block_ptr; \ + vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \ + \ + add texture_block_ptr, texture_offset, #8; \ + do_texture_block_8bpp_4x(fb_ptr2, draw_mask_fb_ptr_left_a, \ + draw_mask_fb_ptr_left_b); \ + \ + pld [ fb_ptr, #2048 ]; \ + and texture_block_ptr, texture_block_ptr, texture_mask; \ + \ + add fb_ptr, fb_ptr, #16*2; \ + add texture_block_ptr, texture_page_ptr, texture_block_ptr; \ + \ + vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \ + \ + do_texture_block_8bpp_4x(fb_ptr2, draw_mask_fb_ptr_right_a, \ + draw_mask_fb_ptr_right_b); \ + \ + add texture_offset, texture_offset, #0x10; \ + add fb_ptr, fb_ptr, #(2048 - 16) * 2; \ + \ + subs sub_tile_height, sub_tile_height, #1; \ + bne 4b; \ + \ + sub block, block, #16; \ + ldr column_data, [sp], #8; /* fb_ptr2 */ \ + add texture_offset, texture_offset, #0xF00; \ + strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] \ + + +#define setup_sprite_tile_half_8bpp_4x(edge) \ + setup_sprite_tile_add_blocks(single_4x); \ + add block, block, #16; \ + str column_data, [sp, #-8]!; /* fb_ptr2 */ \ + \ + 4: \ + and texture_block_ptr, texture_offset, texture_mask; \ + pld [ fb_ptr ]; \ + \ + add texture_block_ptr, texture_page_ptr, texture_block_ptr; \ + vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \ + \ + pld [ fb_ptr, #2048 ]; \ + do_texture_block_8bpp_4x(fb_ptr2, draw_mask_fb_ptr_##edge##_a, \ + draw_mask_fb_ptr_##edge##_b); \ + \ + add texture_offset, texture_offset, #0x10; \ + add fb_ptr, fb_ptr, #2048 * 2; \ + \ + subs sub_tile_height, sub_tile_height, #1; \ + bne 4b; \ + \ + sub block, block, #16; \ + ldr column_data, [sp], #8; /* fb_ptr2 */ \ + add texture_offset, texture_offset, #0xF00; \ + strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] \ + + +#define setup_sprite_tile_column_edge_pre_adjust_half_right_4x() \ + add texture_offset, texture_offset_base, #8; \ + add fb_ptr, fb_ptr, #16 * 2 \ + +#define setup_sprite_tile_column_edge_pre_adjust_half_left_4x() \ + mov texture_offset, texture_offset_base \ + +#define setup_sprite_tile_column_edge_pre_adjust_half_4x(edge) \ + setup_sprite_tile_column_edge_pre_adjust_half_##edge##_4x() \ + +#define setup_sprite_tile_column_edge_pre_adjust_full_4x(edge) \ + mov texture_offset, texture_offset_base \ + +#define setup_sprite_tile_column_edge_post_adjust_half_right_4x() \ + sub fb_ptr, fb_ptr, #16 * 2 \ + +#define setup_sprite_tile_column_edge_post_adjust_half_left_4x() \ + +#define setup_sprite_tile_column_edge_post_adjust_half_4x(edge) \ + setup_sprite_tile_column_edge_post_adjust_half_##edge##_4x() \ + +#define setup_sprite_tile_column_edge_post_adjust_full_4x(edge) \ + + +#define setup_sprite_setup_left_draw_mask_fb_ptr_4x() \ + vdup.u8 draw_mask_fb_ptr_left_a, block_masks[0]; \ + vdup.u8 draw_mask_fb_ptr_left_b, block_masks[1]; \ + vdup.u8 draw_mask_fb_ptr_right_a, block_masks[2]; \ + vdup.u8 draw_mask_fb_ptr_right_b, block_masks[3] \ + +#define setup_sprite_setup_left_draw_mask_fb_ptr_advance_column_4x() \ + mov fb_ptr_advance_column, #32 * 2; \ + vdup.u8 draw_mask_fb_ptr_left_a, block_masks[0]; \ + vdup.u8 draw_mask_fb_ptr_left_b, block_masks[1]; \ + sub fb_ptr_advance_column, height, lsl #11 + 1; \ + vdup.u8 draw_mask_fb_ptr_right_a, block_masks[2]; \ + vdup.u8 draw_mask_fb_ptr_right_b, block_masks[3] \ + +#define setup_sprite_setup_right_draw_mask_fb_ptr_4x() \ + vdup.u8 draw_mask_fb_ptr_left_a, block_masks[4]; \ + vdup.u8 draw_mask_fb_ptr_left_b, block_masks[5]; \ + vdup.u8 draw_mask_fb_ptr_right_a, block_masks[6]; \ + vdup.u8 draw_mask_fb_ptr_right_b, block_masks[7] \ + + // r0: psx_gpu // r1: x // r2: y @@ -4833,34 +5112,48 @@ setup_sprite_update_texture_8bpp_cache: // [ sp + 8 ]: height // [ sp + 12 ]: color (unused) -#define setup_sprite_tiled_builder(texture_mode) \ - \ -setup_sprite_tile_column_width_multi(texture_mode, multi, full, full); \ -setup_sprite_tile_column_width_single(texture_mode, multi, full, none); \ -setup_sprite_tile_column_width_multi(texture_mode, single, full, full); \ -setup_sprite_tile_column_width_single(texture_mode, single, full, none); \ -setup_sprite_tile_column_width_multi(texture_mode, multi, half, full); \ -setup_sprite_tile_column_width_single(texture_mode, multi, half, right); \ -setup_sprite_tile_column_width_multi(texture_mode, single, half, full); \ -setup_sprite_tile_column_width_single(texture_mode, single, half, right); \ -setup_sprite_tile_column_width_multi(texture_mode, multi, full, half); \ -setup_sprite_tile_column_width_single(texture_mode, multi, half, left); \ -setup_sprite_tile_column_width_multi(texture_mode, single, full, half); \ -setup_sprite_tile_column_width_single(texture_mode, single, half, left); \ -setup_sprite_tile_column_width_multi(texture_mode, multi, half, half); \ -setup_sprite_tile_column_width_multi(texture_mode, single, half, half); \ +#define setup_sprite_tiled_builder(texture_mode, x4mode) \ + \ +setup_sprite_tile_column_width_multi(texture_mode, multi, full, full, \ + x4mode); \ +setup_sprite_tile_column_width_single(texture_mode, multi, full, none, \ + x4mode); \ +setup_sprite_tile_column_width_multi(texture_mode, single, full, full, \ + x4mode); \ +setup_sprite_tile_column_width_single(texture_mode, single, full, none, \ + x4mode); \ +setup_sprite_tile_column_width_multi(texture_mode, multi, half, full, \ + x4mode); \ +setup_sprite_tile_column_width_single(texture_mode, multi, half, right, \ + x4mode); \ +setup_sprite_tile_column_width_multi(texture_mode, single, half, full, \ + x4mode); \ +setup_sprite_tile_column_width_single(texture_mode, single, half, right, \ + x4mode); \ +setup_sprite_tile_column_width_multi(texture_mode, multi, full, half, \ + x4mode); \ +setup_sprite_tile_column_width_single(texture_mode, multi, half, left, \ + x4mode); \ +setup_sprite_tile_column_width_multi(texture_mode, single, full, half, \ + x4mode); \ +setup_sprite_tile_column_width_single(texture_mode, single, half, left, \ + x4mode); \ +setup_sprite_tile_column_width_multi(texture_mode, multi, half, half, \ + x4mode); \ +setup_sprite_tile_column_width_multi(texture_mode, single, half, half, \ + x4mode); \ \ .align 4; \ \ -function(setup_sprite_##texture_mode) \ +function(setup_sprite_##texture_mode##x4mode) \ stmdb sp!, { r4 - r11, r14 }; \ - setup_sprite_tiled_initialize_##texture_mode(); \ + setup_sprite_tiled_initialize_##texture_mode##x4mode(); \ \ ldr v, [ sp, #36 ]; \ and offset_u, u, #0xF; \ \ ldr width, [ sp, #40 ]; \ - ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ]; \ + ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]; \ \ ldr height, [ sp, #44 ]; \ add fb_ptr, fb_ptr, y, lsl #11; \ @@ -4883,11 +5176,13 @@ function(setup_sprite_##texture_mode) \ \ /* texture_offset_base = VH-UH-UL-00 */\ bfi texture_offset_base, u, #4, #8; \ - movw right_block_mask, #0xFFFE; \ + mov right_block_mask, #0xFFFFFFFE; \ + \ + setup_sprite_offset_u_adjust##x4mode(); \ \ /* texture_offset_base = VH-UH-VL-00 */\ bfi texture_offset_base, v, #4, #4; \ - movw left_block_mask, #0xFFFF; \ + mov left_block_mask, #0xFFFFFFFF; \ \ mov tile_height, height_rounded, lsr #4; \ mvn left_block_mask, left_block_mask, lsl offset_u; \ @@ -4907,16 +5202,16 @@ function(setup_sprite_##texture_mode) \ \ /* texture_mask = HH-WH-HL-WL */\ bfi texture_mask, texture_mask_rev, #8, #4; \ - and left_block_mask, left_block_mask, #0xFF; \ + setup_sprite_get_left_block_mask##x4mode(); \ \ mov control_mask, #0; \ - cmp left_block_mask, #0xFF; \ + setup_sprite_compare_left_block_mask##x4mode(); \ \ - uxtb right_block_mask, right_block_mask, ror #8; \ + setup_sprite_get_right_block_mask##x4mode(); \ orreq control_mask, control_mask, #0x4; \ \ ldrh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ]; \ - cmp right_block_mask, #0xFF; \ + setup_sprite_compare_right_block_mask##x4mode(); \ \ orreq control_mask, control_mask, #0x8; \ cmp tile_width, #1; \ @@ -4931,25 +5226,31 @@ function(setup_sprite_##texture_mode) \ ldr pc, [ pc, control_mask, lsl #2 ]; \ nop; \ \ - .word setup_sprite_##texture_mode##_multi_multi_full_full; \ - .word setup_sprite_##texture_mode##_single_multi_full_none; \ - .word setup_sprite_##texture_mode##_multi_single_full_full; \ - .word setup_sprite_##texture_mode##_single_single_full_none; \ - .word setup_sprite_##texture_mode##_multi_multi_half_full; \ - .word setup_sprite_##texture_mode##_single_multi_half_right; \ - .word setup_sprite_##texture_mode##_multi_single_half_full; \ - .word setup_sprite_##texture_mode##_single_single_half_right; \ - .word setup_sprite_##texture_mode##_multi_multi_full_half; \ - .word setup_sprite_##texture_mode##_single_multi_half_left; \ - .word setup_sprite_##texture_mode##_multi_single_full_half; \ - .word setup_sprite_##texture_mode##_single_single_half_left; \ - .word setup_sprite_##texture_mode##_multi_multi_half_half; \ + .word setup_sprite_##texture_mode##_multi_multi_full_full##x4mode; \ + .word setup_sprite_##texture_mode##_single_multi_full_none##x4mode; \ + .word setup_sprite_##texture_mode##_multi_single_full_full##x4mode; \ + .word setup_sprite_##texture_mode##_single_single_full_none##x4mode; \ + .word setup_sprite_##texture_mode##_multi_multi_half_full##x4mode; \ + .word setup_sprite_##texture_mode##_single_multi_half_right##x4mode; \ + .word setup_sprite_##texture_mode##_multi_single_half_full##x4mode; \ + .word setup_sprite_##texture_mode##_single_single_half_right##x4mode; \ + .word setup_sprite_##texture_mode##_multi_multi_full_half##x4mode; \ + .word setup_sprite_##texture_mode##_single_multi_half_left##x4mode; \ + .word setup_sprite_##texture_mode##_multi_single_full_half##x4mode; \ + .word setup_sprite_##texture_mode##_single_single_half_left##x4mode; \ + .word setup_sprite_##texture_mode##_multi_multi_half_half##x4mode; \ .word 0x00000000; \ - .word setup_sprite_##texture_mode##_multi_single_half_half \ + .word setup_sprite_##texture_mode##_multi_single_half_half##x4mode; \ -setup_sprite_tiled_builder(4bpp); -setup_sprite_tiled_builder(8bpp); +setup_sprite_tiled_builder(4bpp,); +setup_sprite_tiled_builder(8bpp,); + +#undef draw_mask_fb_ptr_left +#undef draw_mask_fb_ptr_right + +setup_sprite_tiled_builder(4bpp, _4x); +setup_sprite_tiled_builder(8bpp, _4x); #undef block_ptr @@ -5038,6 +5339,12 @@ function(texture_sprite_blocks_8bpp) #undef texture_mask #undef num_blocks #undef texture_offset +#undef texels_low +#undef texels_high +#undef texels_wide_low +#undef texels_wide_high +#undef texels_wide +#undef fb_ptr2 #define psx_gpu r0 #define x r1 @@ -5049,6 +5356,7 @@ function(texture_sprite_blocks_8bpp) #define left_offset r8 #define width_rounded r9 #define right_width r10 + #define block_width r11 #define texture_offset_base r1 @@ -5059,6 +5367,7 @@ function(texture_sprite_blocks_8bpp) #define fb_ptr r7 #define texture_offset r8 #define blocks_remaining r9 +#define fb_ptr2 r10 #define fb_ptr_pitch r12 #define texture_block_ptr r14 @@ -5077,29 +5386,23 @@ function(texture_sprite_blocks_8bpp) #define draw_mask_fb_ptr d2 #define texels q2 +#define draw_mask_fb_ptr_a d2 +#define draw_mask_fb_ptr_b d3 +#define texels_low d4 +#define texels_high d5 +#define texels_wide_low d6 +#define texels_wide_high d7 +#define texels_wide q3 -setup_sprites_16bpp_flush_single: - vpush { d0 - d2 } - - stmdb sp!, { r0 - r3, r12, r14 } - bl flush_render_block_buffer - ldmia sp!, { r0 - r3, r12, r14 } - - vpop { d0 - d2 } - - add block, psx_gpu, #psx_gpu_blocks_offset - mov num_blocks, #1 - - bx lr -setup_sprites_16bpp_flush_row: - vpush { d0 - d2 } +setup_sprites_16bpp_flush: + vpush { d0 - d3 } stmdb sp!, { r0 - r3, r12, r14 } bl flush_render_block_buffer ldmia sp!, { r0 - r3, r12, r14 } - vpop { d0 - d2 } + vpop { d0 - d3 } add block, psx_gpu, #psx_gpu_blocks_offset mov num_blocks, block_width @@ -5108,7 +5411,7 @@ setup_sprites_16bpp_flush_row: function(setup_sprite_16bpp) stmdb sp!, { r4 - r11, r14 } - ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ] + ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ] ldr v, [ sp, #36 ] add fb_ptr, fb_ptr, y, lsl #11 @@ -5164,7 +5467,7 @@ function(setup_sprite_16bpp) 1: add num_blocks, num_blocks, #1 cmp num_blocks, #MAX_BLOCKS - blgt setup_sprites_16bpp_flush_single + blgt setup_sprites_16bpp_flush and texture_block_ptr, texture_offset_base, texture_mask subs height, height, #1 @@ -5193,7 +5496,7 @@ function(setup_sprite_16bpp) mov texture_offset, texture_offset_base cmp num_blocks, #MAX_BLOCKS - blgt setup_sprites_16bpp_flush_row + blgt setup_sprites_16bpp_flush add texture_offset_base, texture_offset_base, #2048 and texture_block_ptr, texture_offset, texture_mask @@ -5264,6 +5567,290 @@ function(setup_sprite_16bpp) ldmia sp!, { r4 - r11, pc } +// 4x version +// FIXME: duplicate code with normal version :( +#undef draw_mask_fb_ptr + +function(setup_sprite_16bpp_4x) + stmdb sp!, { r4 - r11, r14 } + ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ] + + ldr v, [ sp, #36 ] + add fb_ptr, fb_ptr, y, lsl #11 + + ldr width, [ sp, #40 ] + add fb_ptr, fb_ptr, x, lsl #1 + + ldr height, [ sp, #44 ] + and left_offset, u, #0x7 + + add texture_offset_base, u, u + add width_rounded, width, #7 + + add texture_offset_base, v, lsl #11 + movw left_mask_bits, #0xFFFF + + ldrb texture_mask_width, [ psx_gpu, #psx_gpu_texture_mask_width_offset ] + add width_rounded, width_rounded, left_offset + + lsl left_offset, #1 + + ldrb texture_mask_height, [ psx_gpu, #psx_gpu_texture_mask_height_offset ] + sub fb_ptr, fb_ptr, left_offset, lsl #1 + + add texture_mask, texture_mask_width, texture_mask_width + movw right_mask_bits, #0xFFFC + + and right_width, width_rounded, #0x7 + mvn left_mask_bits, left_mask_bits, lsl left_offset + + lsl right_width, #1 + + add texture_mask, texture_mask_height, lsl #11 + mov block_width, width_rounded, lsr #3 + + mov right_mask_bits, right_mask_bits, lsl right_width + movw fb_ptr_pitch, #(2048 + 16) * 2 + + sub fb_ptr_pitch, fb_ptr_pitch, block_width, lsl #4+1 + vmov block_masks, left_mask_bits, right_mask_bits + + ldrh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] + add block, psx_gpu, #psx_gpu_blocks_offset + + bic texture_offset_base, texture_offset_base, #0xF + cmp block_width, #1 + + ldr texture_page_ptr, [ psx_gpu, #psx_gpu_texture_page_ptr_offset ] + add block, block, num_blocks, lsl #6 + + lsl block_width, #2 + bne 0f + + vext.32 block_masks_shifted, block_masks, block_masks, #1 + vorr.u32 block_masks, block_masks, block_masks_shifted + vdup.u8 draw_mask_fb_ptr_a, block_masks[0] + vdup.u8 draw_mask_fb_ptr_b, block_masks[1] + + 1: + add num_blocks, num_blocks, block_width + cmp num_blocks, #MAX_BLOCKS + blgt setup_sprites_16bpp_flush + + and texture_block_ptr, texture_offset_base, texture_mask + subs height, height, #1 + + add texture_block_ptr, texture_page_ptr, texture_block_ptr + vld1.u32 { texels }, [ texture_block_ptr, :128 ] + + do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_a, draw_mask_fb_ptr_b) + + add texture_offset_base, texture_offset_base, #2048 + add fb_ptr, fb_ptr, #2048*2 + strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] + bne 1b + + ldmia sp!, { r4 - r11, pc } + + 0: + add num_blocks, num_blocks, block_width + mov texture_offset, texture_offset_base + + vdup.u8 draw_mask_fb_ptr_a, block_masks[0] // left_mask_bits + vdup.u8 draw_mask_fb_ptr_b, block_masks[1] + + cmp num_blocks, #MAX_BLOCKS + blgt setup_sprites_16bpp_flush + + add texture_offset_base, texture_offset_base, #2048 + and texture_block_ptr, texture_offset, texture_mask + + add texture_block_ptr, texture_page_ptr, texture_block_ptr + vld1.u32 { texels }, [ texture_block_ptr, :128 ] + + do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_a, draw_mask_fb_ptr_b) + + subs blocks_remaining, block_width, #2*4 + add texture_offset, texture_offset, #16 + + vmov.u8 draw_mask_fb_ptr_a, #0 + vmov.u8 draw_mask_fb_ptr_b, #0 + + add fb_ptr, fb_ptr, #16*2 + beq 2f + + 1: + and texture_block_ptr, texture_offset, texture_mask + subs blocks_remaining, blocks_remaining, #4 + + add texture_block_ptr, texture_page_ptr, texture_block_ptr + vld1.u32 { texels }, [ texture_block_ptr, :128 ] + + do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_a, draw_mask_fb_ptr_b) + add texture_offset, texture_offset, #16 + + add fb_ptr, fb_ptr, #16*2 + bgt 1b + + 2: + vdup.u8 draw_mask_fb_ptr_a, block_masks[4] // right_mask_bits + vdup.u8 draw_mask_fb_ptr_b, block_masks[5] + + and texture_block_ptr, texture_offset, texture_mask + add texture_block_ptr, texture_page_ptr, texture_block_ptr + + vld1.u32 { texels }, [ texture_block_ptr, :128 ] + + do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_a, draw_mask_fb_ptr_b) + subs height, height, #1 + + add fb_ptr, fb_ptr, fb_ptr_pitch + strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] + + bne 0b + + ldmia sp!, { r4 - r11, pc } + + +#undef width +#undef right_width +#undef right_mask_bits +#undef color +#undef height +#undef blocks_remaining +#undef colors +#undef right_mask +#undef test_mask +#undef draw_mask + +#define psx_gpu r0 +#define x r1 +#define y r2 +#define width r3 +#define right_width r5 +#define right_mask_bits r6 +#define fb_ptr r7 +#define color r8 +#define height r9 +#define fb_ptr_pitch r12 + +// referenced by setup_sprites_16bpp_flush +#define num_blocks r4 +#define block r5 +#define block_width r11 + +#define color_r r1 +#define color_g r2 +#define color_b r8 +#define blocks_remaining r6 + +#define colors q0 +#define right_mask q1 +#define test_mask q2 +#define draw_mask q2 +#define draw_mask_bits_fb_ptr d6 + + +.align 3 + +function(setup_sprite_untextured) + ldrh r12, [ psx_gpu, #psx_gpu_render_state_offset ] + tst r12, #(RENDER_STATE_MASK_EVALUATE | RENDER_FLAGS_MODULATE_TEXELS \ + | RENDER_FLAGS_BLEND) + beq setup_sprite_untextured_simple + + stmdb sp!, { r4 - r11, r14 } + + ldr width, [ sp, #40 ] + ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ] + + ldr height, [ sp, #44 ] + add fb_ptr, fb_ptr, y, lsl #11 + + add fb_ptr, fb_ptr, x, lsl #1 + sub right_width, width, #1 + + ldr color, [ sp, #48 ] + and right_width, #7 + + add block_width, width, #7 + add right_width, #1 + + lsr block_width, #3 + mov right_mask_bits, #0xff + + sub fb_ptr_pitch, block_width, #1 + lsl right_mask_bits, right_width + + lsl fb_ptr_pitch, #3+1 + ubfx color_r, color, #3, #5 + + rsb fb_ptr_pitch, #1024*2 + ubfx color_g, color, #11, #5 + + vld1.u32 { test_mask }, [ psx_gpu, :128 ] + ubfx color_b, color, #19, #5 + + vdup.u16 right_mask, right_mask_bits + orr color, color_r, color_b, lsl #10 + + ldrh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] + orr color, color, color_g, lsl #5 + + vtst.u16 right_mask, right_mask, test_mask + add block, psx_gpu, #psx_gpu_blocks_offset + + vdup.u16 colors, color + add block, block, num_blocks, lsl #6 + + +setup_sprite_untextured_height_loop: + add num_blocks, block_width + sub blocks_remaining, block_width, #1 + + cmp num_blocks, #MAX_BLOCKS + blgt setup_sprites_16bpp_flush + + cmp blocks_remaining, #0 + ble 1f + + vmov.u8 draw_mask, #0 /* zero_mask */ + vmov.u8 draw_mask_bits_fb_ptr, #0 + + 0: + vst1.u32 { draw_mask }, [ block, :128 ]! + subs blocks_remaining, #1 + + vst1.u32 { colors }, [ block, :128 ] + add block, block, #24 + + vmov.u32 draw_mask_bits_fb_ptr[1], fb_ptr + vst1.u32 { draw_mask_bits_fb_ptr }, [ block, :64 ] + + add block, block, #24 + add fb_ptr, #8*2 + bgt 0b + + 1: + vst1.u32 { right_mask }, [ block, :128 ]! + subs height, #1 + + vst1.u32 { colors }, [ block, :128 ] + add block, block, #24 + + vmov.u32 draw_mask_bits_fb_ptr[1], fb_ptr + vst1.u32 { draw_mask_bits_fb_ptr }, [ block, :64 ] + + add block, block, #24 + add fb_ptr, fb_ptr_pitch + + strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] + bgt setup_sprite_untextured_height_loop + + ldmia sp!, { r4 - r11, pc } + + + #undef texture_page_ptr #undef vram_ptr #undef dirty_textures_mask @@ -5461,3 +6048,40 @@ function(update_texture_8bpp_cache_slice) vpop { q0 - q3 } ldmia sp!, { r4 - r11, pc } + +/* void scale2x_tiles8(void *dst, const void *src, int w8, int h) */ +function(scale2x_tiles8) + push { r4, r14 } + + mov r4, r1 + add r12, r0, #1024*2 + mov r14, r2 + +0: + vld1.u16 { q0 }, [ r1, :128 ]! + vld1.u16 { q2 }, [ r1, :128 ]! + vmov q1, q0 + vmov q3, q2 + vzip.16 q0, q1 + vzip.16 q2, q3 + subs r14, #2 + vst1.u16 { q0, q1 }, [ r0, :128 ]! + vst1.u16 { q0, q1 }, [ r12, :128 ]! + blt 1f + vst1.u16 { q2, q3 }, [ r0, :128 ]! + vst1.u16 { q2, q3 }, [ r12, :128 ]! + bgt 0b +1: + subs r3, #1 + mov r14, r2 + add r0, #1024*2*2 + add r4, #1024*2 + sub r0, r2, lsl #4+1 + mov r1, r4 + add r12, r0, #1024*2 + bgt 0b + nop + + pop { r4, pc } + +// vim:filetype=armasm diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h new file mode 100644 index 00000000..1307891e --- /dev/null +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h @@ -0,0 +1,57 @@ +#define psx_gpu_test_mask_offset 0x0 +#define psx_gpu_uvrg_offset 0x10 +#define psx_gpu_uvrg_dx_offset 0x20 +#define psx_gpu_uvrg_dy_offset 0x30 +#define psx_gpu_u_block_span_offset 0x40 +#define psx_gpu_v_block_span_offset 0x50 +#define psx_gpu_r_block_span_offset 0x60 +#define psx_gpu_g_block_span_offset 0x70 +#define psx_gpu_b_block_span_offset 0x80 +#define psx_gpu_b_offset 0x90 +#define psx_gpu_b_dy_offset 0x94 +#define psx_gpu_triangle_area_offset 0x98 +#define psx_gpu_texture_window_settings_offset 0x9c +#define psx_gpu_current_texture_mask_offset 0xa0 +#define psx_gpu_viewport_mask_offset 0xa4 +#define psx_gpu_dirty_textures_4bpp_mask_offset 0xa8 +#define psx_gpu_dirty_textures_8bpp_mask_offset 0xac +#define psx_gpu_dirty_textures_8bpp_alternate_mask_offset 0xb0 +#define psx_gpu_triangle_color_offset 0xb4 +#define psx_gpu_dither_table_offset 0xb8 +#define psx_gpu_uvrgb_phase_offset 0xc8 +#define psx_gpu_render_block_handler_offset 0xcc +#define psx_gpu_texture_page_ptr_offset 0xd0 +#define psx_gpu_texture_page_base_offset 0xd4 +#define psx_gpu_clut_ptr_offset 0xd8 +#define psx_gpu_vram_ptr_offset 0xdc +#define psx_gpu_vram_out_ptr_offset 0xe0 +#define psx_gpu_render_state_base_offset 0xe4 +#define psx_gpu_render_state_offset 0xe6 +#define psx_gpu_num_spans_offset 0xe8 +#define psx_gpu_num_blocks_offset 0xea +#define psx_gpu_viewport_start_x_offset 0xec +#define psx_gpu_viewport_start_y_offset 0xee +#define psx_gpu_viewport_end_x_offset 0xf0 +#define psx_gpu_viewport_end_y_offset 0xf2 +#define psx_gpu_mask_msb_offset 0xf4 +#define psx_gpu_triangle_winding_offset 0xf6 +#define psx_gpu_display_area_draw_enable_offset 0xf7 +#define psx_gpu_current_texture_page_offset 0xf8 +#define psx_gpu_last_8bpp_texture_page_offset 0xf9 +#define psx_gpu_texture_mask_width_offset 0xfa +#define psx_gpu_texture_mask_height_offset 0xfb +#define psx_gpu_texture_window_x_offset 0xfc +#define psx_gpu_texture_window_y_offset 0xfd +#define psx_gpu_primitive_type_offset 0xfe +#define psx_gpu_render_mode_offset 0xff +#define psx_gpu_offset_x_offset 0x100 +#define psx_gpu_offset_y_offset 0x102 +#define psx_gpu_clut_settings_offset 0x104 +#define psx_gpu_texture_settings_offset 0x106 +#define psx_gpu_blocks_offset 0x200 +#define psx_gpu_span_uvrg_offset_offset 0x2200 +#define psx_gpu_span_edge_data_offset 0x4200 +#define psx_gpu_span_b_offset_offset 0x5200 +#define psx_gpu_texture_4bpp_cache_offset 0x5a00 +#define psx_gpu_texture_8bpp_even_cache_offset 0x205a00 +#define psx_gpu_texture_8bpp_odd_cache_offset 0x305a00 diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c new file mode 100644 index 00000000..5adfb75f --- /dev/null +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c @@ -0,0 +1,86 @@ +#include +#include + +#include "common.h" + +#define WRITE_OFFSET(f, member) \ + fprintf(f, "#define %-50s0x%x\n", \ + "psx_gpu_" #member "_offset", \ + offsetof(psx_gpu_struct, member)); + +int main() +{ + FILE *f; + + if (sizeof(f) != 4) { + fprintf(stderr, "bad pointer size\n"); + return 1; + } + + f = fopen("psx_gpu_offsets.h", "w"); + if (f == NULL) { + perror("fopen"); + return 1; + } + + WRITE_OFFSET(f, test_mask); + WRITE_OFFSET(f, uvrg); + WRITE_OFFSET(f, uvrg_dx); + WRITE_OFFSET(f, uvrg_dy); + WRITE_OFFSET(f, u_block_span); + WRITE_OFFSET(f, v_block_span); + WRITE_OFFSET(f, r_block_span); + WRITE_OFFSET(f, g_block_span); + WRITE_OFFSET(f, b_block_span); + WRITE_OFFSET(f, b); + WRITE_OFFSET(f, b_dy); + WRITE_OFFSET(f, triangle_area); + WRITE_OFFSET(f, texture_window_settings); + WRITE_OFFSET(f, current_texture_mask); + WRITE_OFFSET(f, viewport_mask); + WRITE_OFFSET(f, dirty_textures_4bpp_mask); + WRITE_OFFSET(f, dirty_textures_8bpp_mask); + WRITE_OFFSET(f, dirty_textures_8bpp_alternate_mask); + WRITE_OFFSET(f, triangle_color); + WRITE_OFFSET(f, dither_table); + WRITE_OFFSET(f, uvrgb_phase); + WRITE_OFFSET(f, render_block_handler); + WRITE_OFFSET(f, texture_page_ptr); + WRITE_OFFSET(f, texture_page_base); + WRITE_OFFSET(f, clut_ptr); + WRITE_OFFSET(f, vram_ptr); + WRITE_OFFSET(f, vram_out_ptr); + WRITE_OFFSET(f, render_state_base); + WRITE_OFFSET(f, render_state); + WRITE_OFFSET(f, num_spans); + WRITE_OFFSET(f, num_blocks); + WRITE_OFFSET(f, viewport_start_x); + WRITE_OFFSET(f, viewport_start_y); + WRITE_OFFSET(f, viewport_end_x); + WRITE_OFFSET(f, viewport_end_y); + WRITE_OFFSET(f, mask_msb); + WRITE_OFFSET(f, triangle_winding); + WRITE_OFFSET(f, display_area_draw_enable); + WRITE_OFFSET(f, current_texture_page); + WRITE_OFFSET(f, last_8bpp_texture_page); + WRITE_OFFSET(f, texture_mask_width); + WRITE_OFFSET(f, texture_mask_height); + WRITE_OFFSET(f, texture_window_x); + WRITE_OFFSET(f, texture_window_y); + WRITE_OFFSET(f, primitive_type); + WRITE_OFFSET(f, render_mode); + WRITE_OFFSET(f, offset_x); + WRITE_OFFSET(f, offset_y); + WRITE_OFFSET(f, clut_settings); + WRITE_OFFSET(f, texture_settings); + WRITE_OFFSET(f, blocks); + WRITE_OFFSET(f, span_uvrg_offset); + WRITE_OFFSET(f, span_edge_data); + WRITE_OFFSET(f, span_b_offset); + WRITE_OFFSET(f, texture_4bpp_cache); + WRITE_OFFSET(f, texture_8bpp_even_cache); + WRITE_OFFSET(f, texture_8bpp_odd_cache); + fclose(f); + + return 0; +} diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c index 920c6388..67da86ea 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_parse.c @@ -92,6 +92,7 @@ void update_texture_ptr(psx_gpu_struct *psx_gpu) void set_texture(psx_gpu_struct *psx_gpu, u32 texture_settings) { + texture_settings &= 0x1FF; if(psx_gpu->texture_settings != texture_settings) { u32 new_texture_page = texture_settings & 0x1F; @@ -152,6 +153,52 @@ void set_triangle_color(psx_gpu_struct *psx_gpu, u32 triangle_color) } } +static void do_fill(psx_gpu_struct *psx_gpu, u32 x, u32 y, + u32 width, u32 height, u32 color) +{ + x &= ~0xF; + width = ((width + 0xF) & ~0xF); + + flush_render_block_buffer(psx_gpu); + + if(unlikely((x + width) > 1024)) + { + u32 width_a = 1024 - x; + u32 width_b = width - width_a; + + if(unlikely((y + height) > 512)) + { + u32 height_a = 512 - y; + u32 height_b = height - height_a; + + render_block_fill(psx_gpu, color, x, y, width_a, height_a); + render_block_fill(psx_gpu, color, 0, y, width_b, height_a); + render_block_fill(psx_gpu, color, x, 0, width_a, height_b); + render_block_fill(psx_gpu, color, 0, 0, width_b, height_b); + } + else + { + render_block_fill(psx_gpu, color, x, y, width_a, height); + render_block_fill(psx_gpu, color, 0, y, width_b, height); + } + } + else + { + if(unlikely((y + height) > 512)) + { + u32 height_a = 512 - y; + u32 height_b = height - height_a; + + render_block_fill(psx_gpu, color, x, y, width, height_a); + render_block_fill(psx_gpu, color, x, 0, width, height_b); + } + else + { + render_block_fill(psx_gpu, color, x, y, width, height); + } + } +} + #define sign_extend_12bit(value) \ (((s32)((value) << 20)) >> 20) \ @@ -235,45 +282,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) u32 height = list_s16[5] & 0x1FF; u32 color = list[0] & 0xFFFFFF; - x &= ~0xF; - width = ((width + 0xF) & ~0xF); - - if((x + width) > 1024) - { - u32 width_a = 1024 - x; - u32 width_b = width - width_a; - - if((y + height) > 512) - { - u32 height_a = 512 - y; - u32 height_b = height - height_a; - - render_block_fill(psx_gpu, color, x, y, width_a, height_a); - render_block_fill(psx_gpu, color, 0, y, width_b, height_a); - render_block_fill(psx_gpu, color, x, 0, width_a, height_b); - render_block_fill(psx_gpu, color, 0, 0, width_b, height_b); - } - else - { - render_block_fill(psx_gpu, color, x, y, width_a, height); - render_block_fill(psx_gpu, color, 0, y, width_b, height); - } - } - else - { - if((y + height) > 512) - { - u32 height_a = 512 - y; - u32 height_b = height - height_a; - - render_block_fill(psx_gpu, color, x, y, width, height_a); - render_block_fill(psx_gpu, color, x, 0, width, height_b); - } - else - { - render_block_fill(psx_gpu, color, x, y, width, height); - } - } + do_fill(psx_gpu, x, y, width, height, color); break; } @@ -399,7 +408,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) vertexes[1].x = list_s16[4] + psx_gpu->offset_x; vertexes[1].y = list_s16[5] + psx_gpu->offset_y; - render_line(psx_gpu, vertexes, current_command, list[0]); + render_line(psx_gpu, vertexes, current_command, list[0], 0); break; } @@ -420,7 +429,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) vertexes[1].x = (xy & 0xFFFF) + psx_gpu->offset_x; vertexes[1].y = (xy >> 16) + psx_gpu->offset_y; - render_line(psx_gpu, vertexes, current_command, list[0]); + render_line(psx_gpu, vertexes, current_command, list[0], 0); list_position++; num_vertexes++; @@ -451,7 +460,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) vertexes[1].x = list_s16[6] + psx_gpu->offset_x; vertexes[1].y = list_s16[7] + psx_gpu->offset_y; - render_line(psx_gpu, vertexes, current_command, 0); + render_line(psx_gpu, vertexes, current_command, 0, 0); break; } @@ -481,7 +490,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) vertexes[1].x = (xy & 0xFFFF) + psx_gpu->offset_x; vertexes[1].y = (xy >> 16) + psx_gpu->offset_y; - render_line(psx_gpu, vertexes, current_command, 0); + render_line(psx_gpu, vertexes, current_command, 0, 0); list_position += 2; num_vertexes++; @@ -592,12 +601,22 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) break; } - case 0x80: // vid -> vid - render_block_move(psx_gpu, list_s16[2] & 0x3FF, list_s16[3] & 0x1FF, - list_s16[4] & 0x3FF, list_s16[5] & 0x1FF, - ((list_s16[6] - 1) & 0x3FF) + 1, ((list_s16[7] - 1) & 0x1FF) + 1); - break; - + case 0x80: // vid -> vid + { + u32 sx = list_s16[2] & 0x3FF; + u32 sy = list_s16[3] & 0x1FF; + u32 dx = list_s16[4] & 0x3FF; + u32 dy = list_s16[5] & 0x1FF; + u32 w = ((list_s16[6] - 1) & 0x3FF) + 1; + u32 h = ((list_s16[7] - 1) & 0x1FF) + 1; + + if (sx == dx && sy == dy && psx_gpu->mask_msb == 0) + break; + + render_block_move(psx_gpu, sx, sy, dx, dy, w, h); + break; + } + #ifdef PCSX case 0xA0: // sys -> vid case 0xC0: // vid -> sys @@ -626,7 +645,7 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) #endif case 0xE1: - set_texture(psx_gpu, list[0] & 0x1FF); + set_texture(psx_gpu, list[0]); if(list[0] & (1 << 9)) psx_gpu->render_state_base |= RENDER_STATE_DITHER; @@ -669,11 +688,21 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) } SET_Ex(2, list[0]); break; - } + } + + case 0xE3: + { + s16 viewport_start_x = list[0] & 0x3FF; + s16 viewport_start_y = (list[0] >> 10) & 0x1FF; + + if(viewport_start_x == psx_gpu->viewport_start_x && + viewport_start_y == psx_gpu->viewport_start_y) + { + break; + } - case 0xE3: - psx_gpu->viewport_start_x = list[0] & 0x3FF; - psx_gpu->viewport_start_y = (list[0] >> 10) & 0x1FF; + psx_gpu->viewport_start_x = viewport_start_x; + psx_gpu->viewport_start_y = viewport_start_y; #ifdef TEXTURE_CACHE_4BPP psx_gpu->viewport_mask = @@ -681,12 +710,23 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) psx_gpu->viewport_start_y, psx_gpu->viewport_end_x, psx_gpu->viewport_end_y); #endif - SET_Ex(3, list[0]); - break; - - case 0xE4: - psx_gpu->viewport_end_x = list[0] & 0x3FF; - psx_gpu->viewport_end_y = (list[0] >> 10) & 0x1FF; + SET_Ex(3, list[0]); + break; + } + + case 0xE4: + { + s16 viewport_end_x = list[0] & 0x3FF; + s16 viewport_end_y = (list[0] >> 10) & 0x1FF; + + if(viewport_end_x == psx_gpu->viewport_end_x && + viewport_end_y == psx_gpu->viewport_end_y) + { + break; + } + + psx_gpu->viewport_end_x = viewport_end_x; + psx_gpu->viewport_end_y = viewport_end_y; #ifdef TEXTURE_CACHE_4BPP psx_gpu->viewport_mask = @@ -694,10 +734,11 @@ u32 gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size, u32 *last_command) psx_gpu->viewport_start_y, psx_gpu->viewport_end_x, psx_gpu->viewport_end_y); #endif - SET_Ex(4, list[0]); - break; + SET_Ex(4, list[0]); + break; + } - case 0xE5: + case 0xE5: { s32 offset_x = list[0] << 21; s32 offset_y = list[0] << 10; @@ -741,3 +782,786 @@ breakloop: return list - list_start; } +#ifdef PCSX + +#define ENH_BUF_TABLE_STEP (1024 / sizeof(psx_gpu->enhancement_buf_by_x16)) + +static void update_enhancement_buf_table_from_hres(psx_gpu_struct *psx_gpu) +{ + u32 b, x, s; + + b = 0; + s = psx_gpu->enhancement_x_threshold; + for (x = 0; x < sizeof(psx_gpu->enhancement_buf_by_x16); x++) + { + if (b < 3 && x * ENH_BUF_TABLE_STEP >= s - ENH_BUF_TABLE_STEP - 1) + { + s += psx_gpu->enhancement_x_threshold; + b++; + } + psx_gpu->enhancement_buf_by_x16[x] = b; + } +} + +static void update_enhancement_buf_table_from_x(psx_gpu_struct *psx_gpu, + u32 x0, u32 len) +{ + u32 x, b; + + for (x = x0, b = 0; x >= len; b++) + x -= len; + if (b > 3) + b = 3; + + memset(psx_gpu->enhancement_buf_by_x16 + x0 / ENH_BUF_TABLE_STEP, + b, (len + ENH_BUF_TABLE_STEP - 1) / ENH_BUF_TABLE_STEP); +} + +#define select_enhancement_buf(psx_gpu) \ + psx_gpu->enhancement_current_buf_ptr = \ + select_enhancement_buf_ptr(psx_gpu, psx_gpu->saved_viewport_start_x) + +#define enhancement_disable() { \ + psx_gpu->vram_out_ptr = psx_gpu->vram_ptr; \ + psx_gpu->viewport_start_x = psx_gpu->saved_viewport_start_x; \ + psx_gpu->viewport_start_y = psx_gpu->saved_viewport_start_y; \ + psx_gpu->viewport_end_x = psx_gpu->saved_viewport_end_x; \ + psx_gpu->viewport_end_y = psx_gpu->saved_viewport_end_y; \ + psx_gpu->uvrgb_phase = 0x8000; \ +} + +#define enhancement_enable() { \ + psx_gpu->vram_out_ptr = psx_gpu->enhancement_current_buf_ptr; \ + psx_gpu->viewport_start_x = psx_gpu->saved_viewport_start_x * 2; \ + psx_gpu->viewport_start_y = psx_gpu->saved_viewport_start_y * 2; \ + psx_gpu->viewport_end_x = psx_gpu->saved_viewport_end_x * 2 + 1; \ + psx_gpu->viewport_end_y = psx_gpu->saved_viewport_end_y * 2 + 1; \ + psx_gpu->uvrgb_phase = 0x1000; \ +} + +#define shift_vertices3(v) { \ + v[0]->x *= 2; \ + v[0]->y *= 2; \ + v[1]->x *= 2; \ + v[1]->y *= 2; \ + v[2]->x *= 2; \ + v[2]->y *= 2; \ +} + +#define unshift_vertices3(v) { \ + v[0]->x /= 2; \ + v[0]->y /= 2; \ + v[1]->x /= 2; \ + v[1]->y /= 2; \ + v[2]->x /= 2; \ + v[2]->y /= 2; \ +} + +#define shift_triangle_area() \ + psx_gpu->triangle_area *= 4 + +extern void scale2x_tiles8(void *dst, const void *src, int w8, int h); + +#ifndef NEON_BUILD +// TODO? +void scale2x_tiles8(void *dst, const void *src, int w8, int h) {} +#endif + +static int disable_main_render; + +static void do_triangle_enhanced(psx_gpu_struct *psx_gpu, + vertex_struct *vertexes, u32 current_command) +{ + vertex_struct *vertex_ptrs[3]; + + if (!prepare_triangle(psx_gpu, vertexes, vertex_ptrs)) + return; + + if (!disable_main_render) + render_triangle_p(psx_gpu, vertex_ptrs, current_command); + + enhancement_enable(); + shift_vertices3(vertex_ptrs); + shift_triangle_area(); + render_triangle_p(psx_gpu, vertex_ptrs, current_command); +} + +static void do_quad_enhanced(psx_gpu_struct *psx_gpu, vertex_struct *vertexes, + u32 current_command) +{ + vertex_struct *vertex_ptrs[3]; + + if (prepare_triangle(psx_gpu, vertexes, vertex_ptrs)) { + if (!disable_main_render) + render_triangle_p(psx_gpu, vertex_ptrs, current_command); + + enhancement_enable(); + shift_vertices3(vertex_ptrs); + shift_triangle_area(); + render_triangle_p(psx_gpu, vertex_ptrs, current_command); + unshift_vertices3(vertex_ptrs); + } + enhancement_disable(); + if (prepare_triangle(psx_gpu, &vertexes[1], vertex_ptrs)) { + if (!disable_main_render) + render_triangle_p(psx_gpu, vertex_ptrs, current_command); + + enhancement_enable(); + shift_vertices3(vertex_ptrs); + shift_triangle_area(); + render_triangle_p(psx_gpu, vertex_ptrs, current_command); + } +} + +#if 0 + +#define fill_vertex(i, x_, y_, u_, v_, rgb_) \ + vertexes[i].x = x_; \ + vertexes[i].y = y_; \ + vertexes[i].u = u_; \ + vertexes[i].v = v_; \ + vertexes[i].r = rgb_; \ + vertexes[i].g = (rgb_) >> 8; \ + vertexes[i].b = (rgb_) >> 16 + +static void do_sprite_enhanced(psx_gpu_struct *psx_gpu, int x, int y, + u32 u, u32 v, u32 w, u32 h, u32 cmd_rgb) +{ + vertex_struct *vertex_ptrs[3]; + u32 flags = (cmd_rgb >> 24); + u32 color = cmd_rgb & 0xffffff; + u32 render_state_base_saved = psx_gpu->render_state_base; + int x1, y1; + u8 u1, v1; + + flags &= + (RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND | + RENDER_FLAGS_TEXTURE_MAP); + + set_triangle_color(psx_gpu, color); + if(color == 0x808080) + flags |= RENDER_FLAGS_MODULATE_TEXELS; + + psx_gpu->render_state_base &= ~RENDER_STATE_DITHER; + enhancement_enable(); + + x1 = x + w; + y1 = y + h; + u1 = u + w; + v1 = v + h; + // FIXME.. + if (u1 < u) u1 = 0xff; + if (v1 < v) v1 = 0xff; + + // 0-2 + // |/ + // 1 + fill_vertex(0, x, y, u, v, color); + fill_vertex(1, x, y1, u, v1, color); + fill_vertex(2, x1, y, u1, v, color); + if (prepare_triangle(psx_gpu, vertexes, vertex_ptrs)) { + shift_vertices3(vertex_ptrs); + shift_triangle_area(); + render_triangle_p(psx_gpu, vertex_ptrs, flags); + } + + // 0 + // /| + // 1-2 + fill_vertex(0, x1, y, u1, v, color); + fill_vertex(1, x, y1, u, v1, color); + fill_vertex(2, x1, y1, u1, v1, color); + if (prepare_triangle(psx_gpu, vertexes, vertex_ptrs)) { + shift_vertices3(vertex_ptrs); + shift_triangle_area(); + render_triangle_p(psx_gpu, vertex_ptrs, flags); + } + + psx_gpu->render_state_base = render_state_base_saved; +} +#else +static void do_sprite_enhanced(psx_gpu_struct *psx_gpu, int x, int y, + u32 u, u32 v, u32 w, u32 h, u32 cmd_rgb) +{ + u32 flags = (cmd_rgb >> 24); + u32 color = cmd_rgb & 0xffffff; + + render_sprite_4x(psx_gpu, x, y, u, v, w, h, flags, color); +} +#endif + +u32 gpu_parse_enhanced(psx_gpu_struct *psx_gpu, u32 *list, u32 size, + u32 *last_command) +{ + u32 current_command = 0, command_length; + + u32 *list_start = list; + u32 *list_end = list + (size / 4); + + psx_gpu->saved_viewport_start_x = psx_gpu->viewport_start_x; + psx_gpu->saved_viewport_start_y = psx_gpu->viewport_start_y; + psx_gpu->saved_viewport_end_x = psx_gpu->viewport_end_x; + psx_gpu->saved_viewport_end_y = psx_gpu->viewport_end_y; + select_enhancement_buf(psx_gpu); + + for(; list < list_end; list += 1 + command_length) + { + s16 *list_s16 = (void *)list; + current_command = *list >> 24; + command_length = command_lengths[current_command]; + if (list + 1 + command_length > list_end) { + current_command = (u32)-1; + break; + } + + enhancement_disable(); + + switch(current_command) + { + case 0x00: + break; + + case 0x02: + { + u32 x = list_s16[2] & 0x3FF; + u32 y = list_s16[3] & 0x1FF; + u32 width = list_s16[4] & 0x3FF; + u32 height = list_s16[5] & 0x1FF; + u32 color = list[0] & 0xFFFFFF; + + x &= ~0xF; + width = ((width + 0xF) & ~0xF); + + do_fill(psx_gpu, x, y, width, height, color); + + psx_gpu->vram_out_ptr = select_enhancement_buf_ptr(psx_gpu, x); + x *= 2; + y *= 2; + width *= 2; + height *= 2; + render_block_fill_enh(psx_gpu, color, x, y, width, height); + break; + } + + case 0x20 ... 0x23: + { + set_triangle_color(psx_gpu, list[0] & 0xFFFFFF); + + get_vertex_data_xy(0, 2); + get_vertex_data_xy(1, 4); + get_vertex_data_xy(2, 6); + + do_triangle_enhanced(psx_gpu, vertexes, current_command); + break; + } + + case 0x24 ... 0x27: + { + set_clut(psx_gpu, list_s16[5]); + set_texture(psx_gpu, list_s16[9]); + set_triangle_color(psx_gpu, list[0] & 0xFFFFFF); + + get_vertex_data_xy_uv(0, 2); + get_vertex_data_xy_uv(1, 6); + get_vertex_data_xy_uv(2, 10); + + do_triangle_enhanced(psx_gpu, vertexes, current_command); + break; + } + + case 0x28 ... 0x2B: + { + set_triangle_color(psx_gpu, list[0] & 0xFFFFFF); + + get_vertex_data_xy(0, 2); + get_vertex_data_xy(1, 4); + get_vertex_data_xy(2, 6); + get_vertex_data_xy(3, 8); + + do_quad_enhanced(psx_gpu, vertexes, current_command); + break; + } + + case 0x2C ... 0x2F: + { + set_clut(psx_gpu, list_s16[5]); + set_texture(psx_gpu, list_s16[9]); + set_triangle_color(psx_gpu, list[0] & 0xFFFFFF); + + get_vertex_data_xy_uv(0, 2); + get_vertex_data_xy_uv(1, 6); + get_vertex_data_xy_uv(2, 10); + get_vertex_data_xy_uv(3, 14); + + do_quad_enhanced(psx_gpu, vertexes, current_command); + break; + } + + case 0x30 ... 0x33: + { + get_vertex_data_xy_rgb(0, 0); + get_vertex_data_xy_rgb(1, 4); + get_vertex_data_xy_rgb(2, 8); + + do_triangle_enhanced(psx_gpu, vertexes, current_command); + break; + } + + case 0x34: + case 0x35: + case 0x36: + case 0x37: + { + set_clut(psx_gpu, list_s16[5]); + set_texture(psx_gpu, list_s16[11]); + + get_vertex_data_xy_uv_rgb(0, 0); + get_vertex_data_xy_uv_rgb(1, 6); + get_vertex_data_xy_uv_rgb(2, 12); + + do_triangle_enhanced(psx_gpu, vertexes, current_command); + break; + } + + case 0x38: + case 0x39: + case 0x3A: + case 0x3B: + { + get_vertex_data_xy_rgb(0, 0); + get_vertex_data_xy_rgb(1, 4); + get_vertex_data_xy_rgb(2, 8); + get_vertex_data_xy_rgb(3, 12); + + do_quad_enhanced(psx_gpu, vertexes, current_command); + break; + } + + case 0x3C: + case 0x3D: + case 0x3E: + case 0x3F: + { + set_clut(psx_gpu, list_s16[5]); + set_texture(psx_gpu, list_s16[11]); + + get_vertex_data_xy_uv_rgb(0, 0); + get_vertex_data_xy_uv_rgb(1, 6); + get_vertex_data_xy_uv_rgb(2, 12); + get_vertex_data_xy_uv_rgb(3, 18); + + do_quad_enhanced(psx_gpu, vertexes, current_command); + break; + } + + case 0x40 ... 0x47: + { + vertexes[0].x = list_s16[2] + psx_gpu->offset_x; + vertexes[0].y = list_s16[3] + psx_gpu->offset_y; + vertexes[1].x = list_s16[4] + psx_gpu->offset_x; + vertexes[1].y = list_s16[5] + psx_gpu->offset_y; + + render_line(psx_gpu, vertexes, current_command, list[0], 0); + enhancement_enable(); + render_line(psx_gpu, vertexes, current_command, list[0], 1); + break; + } + + case 0x48 ... 0x4F: + { + u32 num_vertexes = 1; + u32 *list_position = &(list[2]); + u32 xy = list[1]; + + vertexes[1].x = (xy & 0xFFFF) + psx_gpu->offset_x; + vertexes[1].y = (xy >> 16) + psx_gpu->offset_y; + + xy = *list_position; + while(1) + { + vertexes[0] = vertexes[1]; + + vertexes[1].x = (xy & 0xFFFF) + psx_gpu->offset_x; + vertexes[1].y = (xy >> 16) + psx_gpu->offset_y; + + enhancement_disable(); + render_line(psx_gpu, vertexes, current_command, list[0], 0); + enhancement_enable(); + render_line(psx_gpu, vertexes, current_command, list[0], 1); + + list_position++; + num_vertexes++; + + if(list_position >= list_end) + break; + + xy = *list_position; + if((xy & 0xF000F000) == 0x50005000) + break; + } + + command_length += (num_vertexes - 2); + break; + } + + case 0x50 ... 0x57: + { + vertexes[0].r = list[0] & 0xFF; + vertexes[0].g = (list[0] >> 8) & 0xFF; + vertexes[0].b = (list[0] >> 16) & 0xFF; + vertexes[0].x = list_s16[2] + psx_gpu->offset_x; + vertexes[0].y = list_s16[3] + psx_gpu->offset_y; + + vertexes[1].r = list[2] & 0xFF; + vertexes[1].g = (list[2] >> 8) & 0xFF; + vertexes[1].b = (list[2] >> 16) & 0xFF; + vertexes[1].x = list_s16[6] + psx_gpu->offset_x; + vertexes[1].y = list_s16[7] + psx_gpu->offset_y; + + render_line(psx_gpu, vertexes, current_command, 0, 0); + enhancement_enable(); + render_line(psx_gpu, vertexes, current_command, 0, 1); + break; + } + + case 0x58 ... 0x5F: + { + u32 num_vertexes = 1; + u32 *list_position = &(list[2]); + u32 color = list[0]; + u32 xy = list[1]; + + vertexes[1].r = color & 0xFF; + vertexes[1].g = (color >> 8) & 0xFF; + vertexes[1].b = (color >> 16) & 0xFF; + vertexes[1].x = (xy & 0xFFFF) + psx_gpu->offset_x; + vertexes[1].y = (xy >> 16) + psx_gpu->offset_y; + + color = list_position[0]; + while(1) + { + xy = list_position[1]; + + vertexes[0] = vertexes[1]; + + vertexes[1].r = color & 0xFF; + vertexes[1].g = (color >> 8) & 0xFF; + vertexes[1].b = (color >> 16) & 0xFF; + vertexes[1].x = (xy & 0xFFFF) + psx_gpu->offset_x; + vertexes[1].y = (xy >> 16) + psx_gpu->offset_y; + + enhancement_disable(); + render_line(psx_gpu, vertexes, current_command, 0, 0); + enhancement_enable(); + render_line(psx_gpu, vertexes, current_command, 0, 1); + + list_position += 2; + num_vertexes++; + + if(list_position >= list_end) + break; + + color = list_position[0]; + if((color & 0xF000F000) == 0x50005000) + break; + } + + command_length += ((num_vertexes - 2) * 2); + break; + } + + case 0x60 ... 0x63: + { + u32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); + u32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); + u32 width = list_s16[4] & 0x3FF; + u32 height = list_s16[5] & 0x1FF; + + render_sprite(psx_gpu, x, y, 0, 0, width, height, current_command, list[0]); + do_sprite_enhanced(psx_gpu, x, y, 0, 0, width, height, list[0]); + break; + } + + case 0x64 ... 0x67: + { + u32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); + u32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); + u8 u = list_s16[4]; + u8 v = list_s16[4] >> 8; + u32 width = list_s16[6] & 0x3FF; + u32 height = list_s16[7] & 0x1FF; + + set_clut(psx_gpu, list_s16[5]); + + render_sprite(psx_gpu, x, y, u, v, width, height, + current_command, list[0]); + do_sprite_enhanced(psx_gpu, x, y, u, v, width, height, list[0]); + break; + } + + case 0x68: + case 0x69: + case 0x6A: + case 0x6B: + { + s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); + s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); + + render_sprite(psx_gpu, x, y, 0, 0, 1, 1, current_command, list[0]); + do_sprite_enhanced(psx_gpu, x, y, 0, 0, 1, 1, list[0]); + break; + } + + case 0x70: + case 0x71: + case 0x72: + case 0x73: + { + s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); + s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); + + render_sprite(psx_gpu, x, y, 0, 0, 8, 8, current_command, list[0]); + do_sprite_enhanced(psx_gpu, x, y, 0, 0, 8, 8, list[0]); + break; + } + + case 0x74: + case 0x75: + case 0x76: + case 0x77: + { + s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); + s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); + u8 u = list_s16[4]; + u8 v = list_s16[4] >> 8; + + set_clut(psx_gpu, list_s16[5]); + + render_sprite(psx_gpu, x, y, u, v, 8, 8, + current_command, list[0]); + do_sprite_enhanced(psx_gpu, x, y, u, v, 8, 8, list[0]); + break; + } + + case 0x78: + case 0x79: + case 0x7A: + case 0x7B: + { + s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); + s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); + + render_sprite(psx_gpu, x, y, 0, 0, 16, 16, current_command, list[0]); + do_sprite_enhanced(psx_gpu, x, y, 0, 0, 16, 16, list[0]); + break; + } + + case 0x7C: + case 0x7D: + case 0x7E: + case 0x7F: + { + s32 x = sign_extend_11bit(list_s16[2] + psx_gpu->offset_x); + s32 y = sign_extend_11bit(list_s16[3] + psx_gpu->offset_y); + u8 u = list_s16[4]; + u8 v = list_s16[4] >> 8; + + set_clut(psx_gpu, list_s16[5]); + + render_sprite(psx_gpu, x, y, u, v, 16, 16, current_command, list[0]); + do_sprite_enhanced(psx_gpu, x, y, u, v, 16, 16, list[0]); + break; + } + + case 0x80: // vid -> vid + { + u32 sx = list_s16[2] & 0x3FF; + u32 sy = list_s16[3] & 0x1FF; + u32 dx = list_s16[4] & 0x3FF; + u32 dy = list_s16[5] & 0x1FF; + u32 w = ((list_s16[6] - 1) & 0x3FF) + 1; + u32 h = ((list_s16[7] - 1) & 0x1FF) + 1; + u16 *buf; + + if (sx == dx && sy == dy && psx_gpu->mask_msb == 0) + break; + + render_block_move(psx_gpu, sx, sy, dx, dy, w, h); + if (dy + h > 512) + h = 512 - dy; + sx = sx & ~7; // FIXME? + dx = dx * 2 & ~7; + dy *= 2; + w = (w + 7) / 8; + buf = select_enhancement_buf_ptr(psx_gpu, dx / 2); + scale2x_tiles8(buf + dy * 1024 + dx, + psx_gpu->vram_ptr + sy * 1024 + sx, w, h); + break; + } + + case 0xA0: // sys -> vid + case 0xC0: // vid -> sys + goto breakloop; + + case 0xE1: + set_texture(psx_gpu, list[0]); + + if(list[0] & (1 << 9)) + psx_gpu->render_state_base |= RENDER_STATE_DITHER; + else + psx_gpu->render_state_base &= ~RENDER_STATE_DITHER; + + psx_gpu->display_area_draw_enable = (list[0] >> 10) & 0x1; + SET_Ex(1, list[0]); + break; + + case 0xE2: + { + // TODO: Clean + u32 texture_window_settings = list[0]; + u32 tmp, x, y, w, h; + + if(texture_window_settings != psx_gpu->texture_window_settings) + { + tmp = (texture_window_settings & 0x1F) | 0x20; + for(w = 8; (tmp & 1) == 0; tmp >>= 1, w <<= 1); + + tmp = ((texture_window_settings >> 5) & 0x1f) | 0x20; + for (h = 8; (tmp & 1) == 0; tmp >>= 1, h <<= 1); + + tmp = 32 - (w >> 3); + x = ((texture_window_settings >> 10) & tmp) << 3; + + tmp = 32 - (h >> 3); + y = ((texture_window_settings >> 15) & tmp) << 3; + + flush_render_block_buffer(psx_gpu); + + psx_gpu->texture_window_settings = texture_window_settings; + psx_gpu->texture_window_x = x; + psx_gpu->texture_window_y = y; + psx_gpu->texture_mask_width = w - 1; + psx_gpu->texture_mask_height = h - 1; + + update_texture_ptr(psx_gpu); + } + SET_Ex(2, list[0]); + break; + } + + case 0xE3: + { + s16 viewport_start_x = list[0] & 0x3FF; + s16 viewport_start_y = (list[0] >> 10) & 0x1FF; + u32 d; + + if(viewport_start_x == psx_gpu->viewport_start_x && + viewport_start_y == psx_gpu->viewport_start_y) + { + break; + } + psx_gpu->viewport_start_x = viewport_start_x; + psx_gpu->viewport_start_y = viewport_start_y; + psx_gpu->saved_viewport_start_x = viewport_start_x; + psx_gpu->saved_viewport_start_y = viewport_start_y; + + d = (u32)psx_gpu->viewport_end_x - (u32)viewport_start_x + 1; + if((u32)psx_gpu->enhancement_x_threshold - d <= 16) + { + update_enhancement_buf_table_from_x(psx_gpu, + viewport_start_x, d); + } + select_enhancement_buf(psx_gpu); + +#ifdef TEXTURE_CACHE_4BPP + psx_gpu->viewport_mask = + texture_region_mask(psx_gpu->viewport_start_x, + psx_gpu->viewport_start_y, psx_gpu->viewport_end_x, + psx_gpu->viewport_end_y); +#endif + SET_Ex(3, list[0]); + break; + } + + case 0xE4: + { + s16 viewport_end_x = list[0] & 0x3FF; + s16 viewport_end_y = (list[0] >> 10) & 0x1FF; + u32 d; + + if(viewport_end_x == psx_gpu->viewport_end_x && + viewport_end_y == psx_gpu->viewport_end_y) + { + break; + } + + psx_gpu->viewport_end_x = viewport_end_x; + psx_gpu->viewport_end_y = viewport_end_y; + psx_gpu->saved_viewport_end_x = viewport_end_x; + psx_gpu->saved_viewport_end_y = viewport_end_y; + + d = (u32)viewport_end_x - (u32)psx_gpu->viewport_start_x + 1; + if((u32)psx_gpu->enhancement_x_threshold - d <= 16) + { + update_enhancement_buf_table_from_x(psx_gpu, + psx_gpu->viewport_start_x, d); + } + select_enhancement_buf(psx_gpu); + +#ifdef TEXTURE_CACHE_4BPP + psx_gpu->viewport_mask = + texture_region_mask(psx_gpu->viewport_start_x, + psx_gpu->viewport_start_y, psx_gpu->viewport_end_x, + psx_gpu->viewport_end_y); +#endif + SET_Ex(4, list[0]); + break; + } + + case 0xE5: + { + s32 offset_x = list[0] << 21; + s32 offset_y = list[0] << 10; + psx_gpu->offset_x = offset_x >> 21; + psx_gpu->offset_y = offset_y >> 21; + + SET_Ex(5, list[0]); + break; + } + + case 0xE6: + { + u32 mask_settings = list[0]; + u16 mask_msb = mask_settings << 15; + + if(list[0] & 0x2) + psx_gpu->render_state_base |= RENDER_STATE_MASK_EVALUATE; + else + psx_gpu->render_state_base &= ~RENDER_STATE_MASK_EVALUATE; + + if(mask_msb != psx_gpu->mask_msb) + { + flush_render_block_buffer(psx_gpu); + psx_gpu->mask_msb = mask_msb; + } + + SET_Ex(6, list[0]); + break; + } + + default: + break; + } + } + + enhancement_disable(); + +breakloop: + if (last_command != NULL) + *last_command = current_command; + return list - list_start; +} + +#endif /* PCSX */ + +// vim:shiftwidth=2:expandtab diff --git a/plugins/gpu_neon/psx_gpu/tests/Makefile b/plugins/gpu_neon/psx_gpu/tests/Makefile index 210f44d6..58cca296 100644 --- a/plugins/gpu_neon/psx_gpu/tests/Makefile +++ b/plugins/gpu_neon/psx_gpu/tests/Makefile @@ -13,7 +13,7 @@ ASFLAGS = $(CFLAGS) OBJ += psx_gpu_arm_neon.o endif ifndef DEBUG -CFLAGS += -O2 -fno-strict-aliasing +CFLAGS += -O2 -DNDEBUG -fno-strict-aliasing endif OBJ += psx_gpu.o psx_gpu_parse.o psx_gpu_main.o diff --git a/plugins/gpu_neon/psx_gpu/vector_ops.h b/plugins/gpu_neon/psx_gpu/vector_ops.h index c11955d0..c91e7d95 100644 --- a/plugins/gpu_neon/psx_gpu/vector_ops.h +++ b/plugins/gpu_neon/psx_gpu/vector_ops.h @@ -394,6 +394,10 @@ build_vector_types(s); foreach_element(8, (dest).e[_i] = \ (u8)(source_a).e[_i] | ((u8)(source_b).e[_i] << 8)) \ +#define zip_4x32b(dest, source_a, source_b) \ + foreach_element(4, (dest).e[_i] = \ + (u16)(source_a).e[_i] | ((u16)(source_b).e[_i] << 16)) \ + #define zip_2x64b(dest, source_a, source_b) \ foreach_element(2, (dest).e[_i] = \ (u64)(source_a).e[_i] | ((u64)(source_b).e[_i] << 32)) \ diff --git a/plugins/gpu_neon/psx_gpu_if.c b/plugins/gpu_neon/psx_gpu_if.c index ff31c273..ad017614 100644 --- a/plugins/gpu_neon/psx_gpu_if.c +++ b/plugins/gpu_neon/psx_gpu_if.c @@ -9,11 +9,13 @@ */ #include +#include extern const unsigned char cmd_lengths[256]; #define command_lengths cmd_lengths static unsigned int *ex_regs; +static int initialized; #define PCSX #define SET_Ex(r, v) \ @@ -27,20 +29,102 @@ static psx_gpu_struct egpu __attribute__((aligned(256))); int do_cmd_list(uint32_t *list, int count, int *last_cmd) { - int ret = gpu_parse(&egpu, list, count * 4, (u32 *)last_cmd); + int ret; + + if (gpu.state.enhancement_active) + ret = gpu_parse_enhanced(&egpu, list, count * 4, (u32 *)last_cmd); + else + ret = gpu_parse(&egpu, list, count * 4, (u32 *)last_cmd); ex_regs[1] &= ~0x1ff; ex_regs[1] |= egpu.texture_settings & 0x1ff; return ret; } +#define ENHANCEMENT_BUF_SIZE (1024 * 1024 * 2 * 4 + 4096 * 2) + +static uint16_t *get_enhancement_bufer(int *x, int *y, int *w, int *h, + int *vram_h) +{ + uint16_t *ret = select_enhancement_buf_ptr(&egpu, *x); + + *x *= 2; + *y *= 2; + *w = *w * 2; + *h = *h * 2; + *vram_h = 1024; + return ret; +} + +static void map_enhancement_buffer(void) +{ + // currently we use 4x 1024*1024 buffers instead of single 2048*1024 + // to be able to reuse 1024-width code better (triangle setup, + // dithering phase, lines). + egpu.enhancement_buf_ptr = gpu.mmap(ENHANCEMENT_BUF_SIZE); + if (egpu.enhancement_buf_ptr == NULL) { + fprintf(stderr, "failed to map enhancement buffer\n"); + gpu.get_enhancement_bufer = NULL; + } + else { + egpu.enhancement_buf_ptr += 4096 / 2; + gpu.get_enhancement_bufer = get_enhancement_bufer; + } +} + int renderer_init(void) { - initialize_psx_gpu(&egpu, gpu.vram); + if (gpu.vram != NULL) { + initialize_psx_gpu(&egpu, gpu.vram); + initialized = 1; + } + + if (gpu.mmap != NULL && egpu.enhancement_buf_ptr == NULL) + map_enhancement_buffer(); + ex_regs = gpu.ex_regs; return 0; } +void renderer_finish(void) +{ + if (egpu.enhancement_buf_ptr != NULL) { + egpu.enhancement_buf_ptr -= 4096 / 2; + gpu.munmap(egpu.enhancement_buf_ptr, ENHANCEMENT_BUF_SIZE); + } + egpu.enhancement_buf_ptr = NULL; + egpu.enhancement_current_buf_ptr = NULL; + initialized = 0; +} + +static __attribute__((noinline)) void +sync_enhancement_buffers(int x, int y, int w, int h) +{ + const int step_x = 1024 / sizeof(egpu.enhancement_buf_by_x16); + u16 *src, *dst; + int w1, fb_index; + + w += x & (step_x - 1); + x &= ~(step_x - 1); + w = (w + step_x - 1) & ~(step_x - 1); + if (y + h > 512) + h = 512 - y; + + while (w > 0) { + fb_index = egpu.enhancement_buf_by_x16[x / step_x]; + for (w1 = 0; w > 0; w1++, w -= step_x) + if (fb_index != egpu.enhancement_buf_by_x16[x / step_x + w1]) + break; + + src = gpu.vram + y * 1024 + x; + dst = select_enhancement_buf_ptr(&egpu, x); + dst += (y * 1024 + x) * 2; + scale2x_tiles8(dst, src, w1 * step_x / 8, h); + + x += w1 * step_x; + } +} + void renderer_sync_ecmds(uint32_t *ecmds) { gpu_parse(&egpu, ecmds + 1, 6 * 4, NULL); @@ -49,6 +133,8 @@ void renderer_sync_ecmds(uint32_t *ecmds) void renderer_update_caches(int x, int y, int w, int h) { update_texture_cache_region(&egpu, x, y, x + w - 1, y + h - 1); + if (gpu.state.enhancement_active && !gpu.status.rgb24) + sync_enhancement_buffers(x, y, w, h); } void renderer_flush_queues(void) @@ -58,13 +144,44 @@ void renderer_flush_queues(void) void renderer_set_interlace(int enable, int is_odd) { - egpu.interlace_mode &= ~(RENDER_INTERLACE_ENABLED|RENDER_INTERLACE_ODD); + egpu.render_mode &= ~(RENDER_INTERLACE_ENABLED|RENDER_INTERLACE_ODD); if (enable) - egpu.interlace_mode |= RENDER_INTERLACE_ENABLED; + egpu.render_mode |= RENDER_INTERLACE_ENABLED; if (is_odd) - egpu.interlace_mode |= RENDER_INTERLACE_ODD; + egpu.render_mode |= RENDER_INTERLACE_ODD; } +void renderer_notify_res_change(void) +{ + // note: must keep it multiple of 8 + if (egpu.enhancement_x_threshold != gpu.screen.hres) + { + egpu.enhancement_x_threshold = gpu.screen.hres; + update_enhancement_buf_table_from_hres(&egpu); + } +} + +#include "../../frontend/plugin_lib.h" + void renderer_set_config(const struct rearmed_cbs *cbs) { + static int enhancement_was_on; + + disable_main_render = cbs->gpu_neon.enhancement_no_main; + if (egpu.enhancement_buf_ptr != NULL && cbs->gpu_neon.enhancement_enable + && !enhancement_was_on) + { + sync_enhancement_buffers(0, 0, 1024, 512); + } + enhancement_was_on = cbs->gpu_neon.enhancement_enable; + + if (!initialized) { + initialize_psx_gpu(&egpu, gpu.vram); + initialized = 1; + } + + if (gpu.mmap != NULL && egpu.enhancement_buf_ptr == NULL) + map_enhancement_buffer(); + if (cbs->pl_set_gpu_caps) + cbs->pl_set_gpu_caps(GPU_CAP_SUPPORTS_2X); } diff --git a/plugins/gpu_unai/gpu.cpp b/plugins/gpu_unai/gpu.cpp index 46552acd..c111d78e 100644 --- a/plugins/gpu_unai/gpu.cpp +++ b/plugins/gpu_unai/gpu.cpp @@ -824,7 +824,6 @@ void GPU_updateLace(void) extern "C" { static const struct rearmed_cbs *cbs; -static void *screen_buf; static s16 old_res_horz, old_res_vert, old_rgb24; static void blit(void) @@ -832,12 +831,10 @@ static void blit(void) u16 *base = (u16 *)GPU_FrameBuffer; s16 isRGB24 = (GPU_GP1 & 0x00200000) ? 1 : 0; s16 h0, x0, y0, w0, h1; - u32 fb_offs; - u8 *dest; x0 = DisplayArea[0] & ~1; // alignment needed by blitter y0 = DisplayArea[1]; - fb_offs = FRAME_OFFSET(x0, y0); + base += FRAME_OFFSET(x0, y0); w0 = DisplayArea[2]; h0 = DisplayArea[3]; // video mode @@ -853,62 +850,10 @@ static void blit(void) old_res_horz = w0; old_res_vert = h1; old_rgb24 = (s16)isRGB24; - screen_buf = cbs->pl_vout_set_mode(w0, h1, isRGB24 ? 24 : 16); + cbs->pl_vout_set_mode(w0, h1, isRGB24 ? 24 : 16); } - dest = (u8 *)screen_buf; - if (isRGB24) - { - if (!cbs->only_16bpp) - { - for (; h1-- > 0; dest += w0 * 3, fb_offs += 1024) - { - fb_offs &= 1024*512-1; - bgr888_to_rgb888(dest, base + fb_offs, w0 * 3); - } - } - else - { - for (; h1-- > 0; dest += w0 * 2, fb_offs += 1024) - { - fb_offs &= 1024*512-1; - bgr888_to_rgb565(dest, base + fb_offs, w0 * 3); - } - } - } - else - { - for (; h1-- > 0; dest += w0 * 2, fb_offs += 1024) - { - fb_offs &= 1024*512-1; - bgr555_to_rgb565(dest, base + fb_offs, w0 * 2); - } - } - - screen_buf = cbs->pl_vout_flip(); -} - -static void blit_raw(void) -{ - s16 isRGB24 = (GPU_GP1 & 0x00200000) ? 1 : 0; - s16 h0, w0, h1; - - w0 = DisplayArea[2]; - h0 = DisplayArea[3]; // video mode - h1 = DisplayArea[5] - DisplayArea[4]; // display needed - if (h0 == 480) h1 = Min2(h1*2,480); - - if (h1 <= 0) - return; - - if (w0 != old_res_horz || h1 != old_res_vert || isRGB24 != old_rgb24) - { - old_res_horz = w0; - old_res_vert = h1; - old_rgb24 = (s16)isRGB24; - screen_buf = cbs->pl_vout_set_mode(w0, h1, isRGB24 ? 24 : 16); - } - cbs->pl_vout_raw_flip(DisplayArea[0], DisplayArea[1]); + cbs->pl_vout_flip(base, 1024, isRGB24, w0, h1); } void GPU_updateLace(void) @@ -920,10 +865,7 @@ void GPU_updateLace(void) return; if (!wasSkip) { - if (cbs->pl_vout_raw_flip != NULL) - blit_raw(); - else - blit(); + blit(); fb_dirty = false; skCount = 0; } @@ -939,7 +881,6 @@ void GPU_updateLace(void) long GPUopen(unsigned long *, char *, char *) { cbs->pl_vout_open(); - screen_buf = cbs->pl_vout_flip(); return 0; } @@ -966,6 +907,8 @@ void GPUrearmedCallbacks(const struct rearmed_cbs *cbs_) cbs_->pl_vout_set_raw_vram((void *)GPU_FrameBuffer); cbs = cbs_; + if (cbs->pl_set_gpu_caps) + cbs->pl_set_gpu_caps(0); } } /* extern "C" */ diff --git a/plugins/gpu_unai/gpulib_if.cpp b/plugins/gpu_unai/gpulib_if.cpp index 38e7ce1a..de167214 100644 --- a/plugins/gpu_unai/gpulib_if.cpp +++ b/plugins/gpu_unai/gpulib_if.cpp @@ -154,6 +154,14 @@ int renderer_init(void) return 0; } +void renderer_finish(void) +{ +} + +void renderer_notify_res_change(void) +{ +} + extern const unsigned char cmd_lengths[256]; int do_cmd_list(unsigned int *list, int list_len, int *last_cmd) @@ -525,6 +533,8 @@ void renderer_set_config(const struct rearmed_cbs *cbs) enableAbbeyHack = cbs->gpu_unai.abe_hack; light = !cbs->gpu_unai.no_light; blend = !cbs->gpu_unai.no_blend; + + GPU_FrameBuffer = (u16 *)gpu.vram; } #endif diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index 46e92d1b..b61bff60 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -24,7 +24,7 @@ //#define log_anomaly gpu_log #define log_anomaly(...) -struct psx_gpu gpu __attribute__((aligned(2048))); +struct psx_gpu gpu; static noinline int do_cmd_buffer(uint32_t *data, int count); static void finish_vram_transfer(int is_read); @@ -133,6 +133,22 @@ static noinline void get_gpu_info(uint32_t data) } } +// double, for overdraw guard +#define VRAM_SIZE (1024 * 512 * 2 * 2) + +static int map_vram(void) +{ + gpu.vram = gpu.mmap(VRAM_SIZE); + if (gpu.vram != NULL) { + gpu.vram += 4096 / 2; + return 0; + } + else { + fprintf(stderr, "could not map vram, expect crashes\n"); + return -1; + } +} + long GPUinit(void) { int ret; @@ -145,12 +161,26 @@ long GPUinit(void) gpu.cmd_len = 0; do_reset(); + if (gpu.mmap != NULL) { + if (map_vram() != 0) + ret = -1; + } return ret; } long GPUshutdown(void) { - return vout_finish(); + long ret; + + renderer_finish(); + ret = vout_finish(); + if (gpu.vram != NULL) { + gpu.vram -= 4096 / 2; + gpu.munmap(gpu.vram, VRAM_SIZE); + } + gpu.vram = NULL; + + return ret; } void GPUwriteStatus(uint32_t data) @@ -207,6 +237,7 @@ void GPUwriteStatus(uint32_t data) gpu.screen.vres = vres[(gpu.status.reg >> 19) & 3]; update_width(); update_height(); + renderer_notify_res_change(); break; default: if ((cmd & 0xf0) == 0x10) @@ -582,13 +613,13 @@ long GPUfreeze(uint32_t type, struct GPUFreeze *freeze) case 1: // save if (gpu.cmd_len > 0) flush_cmd_buffer(); - memcpy(freeze->psxVRam, gpu.vram, sizeof(gpu.vram)); + memcpy(freeze->psxVRam, gpu.vram, 1024 * 512 * 2); memcpy(freeze->ulControl, gpu.regs, sizeof(gpu.regs)); memcpy(freeze->ulControl + 0xe0, gpu.ex_regs, sizeof(gpu.ex_regs)); freeze->ulStatus = gpu.status.reg; break; case 0: // load - memcpy(gpu.vram, freeze->psxVRam, sizeof(gpu.vram)); + memcpy(gpu.vram, freeze->psxVRam, 1024 * 512 * 2); memcpy(gpu.regs, freeze->ulControl, sizeof(gpu.regs)); memcpy(gpu.ex_regs, freeze->ulControl + 0xe0, sizeof(gpu.ex_regs)); gpu.status.reg = freeze->ulStatus; @@ -669,6 +700,14 @@ void GPUrearmedCallbacks(const struct rearmed_cbs *cbs) gpu.state.hcnt = cbs->gpu_hcnt; gpu.state.frame_count = cbs->gpu_frame_count; gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace; + gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable; + + gpu.mmap = cbs->mmap; + gpu.munmap = cbs->munmap; + + // delayed vram mmap + if (gpu.vram == NULL) + map_vram(); if (cbs->pl_vout_set_raw_vram) cbs->pl_vout_set_raw_vram(gpu.vram); diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h index 1cbe38cd..d11f991c 100644 --- a/plugins/gpulib/gpu.h +++ b/plugins/gpulib/gpu.h @@ -17,10 +17,9 @@ extern "C" { #define CMD_BUFFER_LEN 1024 struct psx_gpu { - uint16_t vram[1024 * 512]; - uint16_t guard[1024 * 512]; // overdraw guard uint32_t cmd_buffer[CMD_BUFFER_LEN]; uint32_t regs[16]; + uint16_t *vram; union { uint32_t reg; struct { @@ -67,6 +66,8 @@ struct psx_gpu { uint32_t old_interlace:1; uint32_t allow_interlace:2; uint32_t blanked:1; + uint32_t enhancement_enable:1; + uint32_t enhancement_active:1; uint32_t *frame_count; uint32_t *hcnt; /* hsync count */ struct { @@ -87,6 +88,10 @@ struct psx_gpu { uint32_t last_flip_frame; uint32_t pending_fill[3]; } frameskip; + uint16_t *(*get_enhancement_bufer) + (int *x, int *y, int *w, int *h, int *vram_h); + void *(*mmap)(unsigned int size); + void (*munmap)(void *ptr, unsigned int size); }; extern struct psx_gpu gpu; @@ -98,11 +103,13 @@ int do_cmd_list(uint32_t *list, int count, int *last_cmd); struct rearmed_cbs; int renderer_init(void); +void renderer_finish(void); void renderer_sync_ecmds(uint32_t * ecmds); void renderer_update_caches(int x, int y, int w, int h); void renderer_flush_queues(void); void renderer_set_interlace(int enable, int is_odd); void renderer_set_config(const struct rearmed_cbs *config); +void renderer_notify_res_change(void); int vout_init(void); int vout_finish(void); diff --git a/plugins/gpulib/vout_pl.c b/plugins/gpulib/vout_pl.c index 0bd1ecf6..11307e2a 100644 --- a/plugins/gpulib/vout_pl.c +++ b/plugins/gpulib/vout_pl.c @@ -15,7 +15,6 @@ #include "../../frontend/plugin_lib.h" static const struct rearmed_cbs *cbs; -static void *screen_buf; int vout_init(void) { @@ -27,90 +26,74 @@ int vout_finish(void) return 0; } -static void check_mode_change(void) +static void check_mode_change(int force) { static uint32_t old_status; static int old_h; + int w = gpu.screen.hres; + int h = gpu.screen.h; + + gpu.state.enhancement_active = + gpu.get_enhancement_bufer != NULL && gpu.state.enhancement_enable + && w <= 512 && h <= 256 && !gpu.status.rgb24; + + if (gpu.state.enhancement_active) { + w *= 2; + h *= 2; + } // width|rgb24 change? - if ((gpu.status.reg ^ old_status) & ((7<<16)|(1<<21)) || gpu.screen.h != old_h) + if (force || (gpu.status.reg ^ old_status) & ((7<<16)|(1<<21)) || h != old_h) { old_status = gpu.status.reg; - old_h = gpu.screen.h; - screen_buf = cbs->pl_vout_set_mode(gpu.screen.hres, gpu.screen.h, + old_h = h; + + cbs->pl_vout_set_mode(w, h, (gpu.status.rgb24 && !cbs->only_16bpp) ? 24 : 16); } } -static void blit(void) +void vout_update(void) { int x = gpu.screen.x & ~1; // alignment needed by blitter int y = gpu.screen.y; int w = gpu.screen.w; int h = gpu.screen.h; uint16_t *vram = gpu.vram; - int stride = gpu.screen.hres; - int fb_offs, doffs; - uint8_t *dest; + int vram_h = 512; - dest = (uint8_t *)screen_buf; - if (dest == NULL) + if (w == 0 || h == 0) return; - fb_offs = y * 1024 + x; + check_mode_change(0); + if (gpu.state.enhancement_active) + vram = gpu.get_enhancement_bufer(&x, &y, &w, &h, &vram_h); - // only do centering, at least for now - doffs = (stride - w) / 2 & ~1; - - if (gpu.status.rgb24) - { - if (cbs->only_16bpp) { - dest += doffs * 2; - for (; h-- > 0; dest += stride * 2, fb_offs += 1024) - { - fb_offs &= 1024*512-1; - bgr888_to_rgb565(dest, vram + fb_offs, w * 3); - } - } - else { - dest += (doffs / 8) * 24; - for (; h-- > 0; dest += stride * 3, fb_offs += 1024) - { - fb_offs &= 1024*512-1; - bgr888_to_rgb888(dest, vram + fb_offs, w * 3); - } - } - } - else - { - dest += doffs * 2; - for (; h-- > 0; dest += stride * 2, fb_offs += 1024) - { - fb_offs &= 1024*512-1; - bgr555_to_rgb565(dest, vram + fb_offs, w * 2); + if (y + h > vram_h) { + if (y + h - vram_h > h / 2) { + // wrap + y = 0; + h -= vram_h - y; } + else + // clip + h = vram_h - y; } - screen_buf = cbs->pl_vout_flip(); -} + vram += y * 1024 + x; -void vout_update(void) -{ - check_mode_change(); - if (cbs->pl_vout_raw_flip) - cbs->pl_vout_raw_flip(gpu.screen.x, gpu.screen.y); - else - blit(); + cbs->pl_vout_flip(vram, 1024, gpu.status.rgb24, w, h); } void vout_blank(void) { - check_mode_change(); - if (cbs->pl_vout_raw_flip == NULL) { - int bytespp = gpu.status.rgb24 ? 3 : 2; - memset(screen_buf, 0, gpu.screen.hres * gpu.screen.h * bytespp); - screen_buf = cbs->pl_vout_flip(); + int w = gpu.screen.hres; + int h = gpu.screen.h; + if (gpu.state.enhancement_active) { + w *= 2; + h *= 2; } + cbs->pl_vout_flip(NULL, 1024, gpu.status.rgb24, w, h); } long GPUopen(void **unused) @@ -119,7 +102,7 @@ long GPUopen(void **unused) gpu.frameskip.frame_ready = 1; cbs->pl_vout_open(); - screen_buf = cbs->pl_vout_flip(); + check_mode_change(1); return 0; }