From: notaz <notasas@gmail.com>
Date: Sun, 28 Oct 2012 23:08:35 +0000 (+0200)
Subject: move blit to core, allow filtering while blitting
X-Git-Tag: r16~15^2~2
X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=fa56d36096cd4ab2b227ce2aa61c8404b8874689;p=pcsx_rearmed.git

move blit to core, allow filtering while blitting

also adds libpicofe to pull filters from, and filter related UI stuff
---

diff --git a/.gitmodules b/.gitmodules
index 650250d6..f93599e3 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,6 @@
-[submodule "frontend/warm"]
+[submodule "libpicofe"]
+	path = frontend/libpicofe
+	url = git://notaz.gp2x.de/~notaz/libpicofe.git
+[submodule "warm"]
 	path = frontend/warm
 	url = git://notaz.gp2x.de/~notaz/warm.git
diff --git a/Makefile b/Makefile
index af110879..1f3e7361 100644
--- a/Makefile
+++ b/Makefile
@@ -127,6 +127,14 @@ OBJS += plugins/cdrcimg/cdrcimg.o
 # dfinput
 OBJS += plugins/dfinput/main.o plugins/dfinput/pad.o plugins/dfinput/guncon.o
 
+# misc
+ifeq "$(HAVE_NEON)" "1"
+OBJS += frontend/libpicofe/arm/neon_scale2x.o
+OBJS += frontend/libpicofe/arm/neon_eagle2x.o
+frontend/libpicofe/arm/neon_scale2x.o: CFLAGS += -DDO_BGR_TO_RGB
+frontend/libpicofe/arm/neon_eagle2x.o: CFLAGS += -DDO_BGR_TO_RGB
+endif
+
 # gui
 OBJS += frontend/main.o frontend/plugin.o
 OBJS += frontend/common/readpng.o frontend/common/fonts.o
@@ -178,6 +186,12 @@ endif
 frontend/%.o: CFLAGS += -DIN_EVDEV
 frontend/menu.o frontend/main.o frontend/plat_sdl.o: frontend/revision.h
 
+frontend/libpicofe/arm/neon_scale2x.S frontend/libpicofe/menu.c:
+	@echo "libpicofe module is missing, please run:"
+	@echo "git submodule init && git submodule update"
+	@exit 1
+
+
 libpcsxcore/gte_nf.o: libpcsxcore/gte.c
 	$(CC) -c -o $@ $^ $(CFLAGS) -DFLAGLESS
 
@@ -185,7 +199,6 @@ frontend/revision.h: FORCE
 	@(git describe || echo) | sed -e 's/.*/#define REV "\0"/' > $@_
 	@diff -q $@_ $@ > /dev/null 2>&1 || cp $@_ $@
 	@rm $@_
-.PHONY: FORCE
 
 %.o: %.S
 	$(CC) $(CFLAGS) -c $^ -o $@
@@ -213,6 +226,8 @@ plugins_:
 clean_plugins:
 endif
 
+.PHONY: all clean target_ plugins_ clean_plugins FORCE
+
 # ----------- release -----------
 
 VER ?= $(shell git describe HEAD)
diff --git a/frontend/libpicofe b/frontend/libpicofe
new file mode 160000
index 00000000..6ce097ba
--- /dev/null
+++ b/frontend/libpicofe
@@ -0,0 +1 @@
+Subproject commit 6ce097ba2f3cd1c269bacd032b775b6d296433fc
diff --git a/frontend/libretro.c b/frontend/libretro.c
index 9bbea5ba..d1cb4006 100644
--- a/frontend/libretro.c
+++ b/frontend/libretro.c
@@ -13,6 +13,7 @@
 #include "../libpcsxcore/psxcounters.h"
 #include "../libpcsxcore/new_dynarec/new_dynarec.h"
 #include "../plugins/dfsound/out.h"
+#include "../plugins/gpulib/cspace.h"
 #include "main.h"
 #include "plugin.h"
 #include "plugin_lib.h"
@@ -26,7 +27,6 @@ static retro_environment_t environ_cb;
 static retro_audio_sample_batch_t audio_batch_cb;
 
 static void *vout_buf;
-static int vout_width, vout_height;
 static int samples_sent, samples_to_send;
 static int plugins_opened;
 
@@ -41,11 +41,8 @@ static int vout_open(void)
 	return 0;
 }
 
-static void *vout_set_mode(int w, int h, int bpp)
+static void vout_set_mode(int w, int h, int bpp)
 {
-	vout_width = w;
-	vout_height = h;
-	return vout_buf;
 }
 
 /* FIXME: either teach PCSX to blit to RGB1555 or RetroArch to support RGB565 */
@@ -59,13 +56,39 @@ static void convert(void *buf, size_t bytes)
 	}
 }
 
-static void *vout_flip(void)
+static void vout_flip(const void *vram, int stride, int bgr24, int w, int h)
 {
-	pl_rearmed_cbs.flip_cnt++;
-	convert(vout_buf,  vout_width * vout_height * 2);
-	video_cb(vout_buf, vout_width, vout_height, vout_width * 2);
+	unsigned short *dest = vout_buf;
+	const unsigned short *src = vram;
+	int dstride = w, h1 = h;
+
+	if (vram == NULL) {
+		// blanking
+		memset(pl_vout_buf, 0, dstride * h * 2);
+		goto out;
+	}
+
+	if (bgr24)
+	{
+		// XXX: could we switch to RETRO_PIXEL_FORMAT_XRGB8888 here?
+		for (; h1-- > 0; dest += dstride, src += stride)
+		{
+			bgr888_to_rgb565(dest, src, w * 3);
+		}
+	}
+	else
+	{
+		for (; h1-- > 0; dest += dstride, src += stride)
+		{
+			bgr555_to_rgb565(dest, src, w * 2);
+		}
+	}
+
 
-	return vout_buf;
+out:
+	convert(vout_buf, w * h * 2);
+	video_cb(vout_buf, w, h, w * 2);
+	pl_rearmed_cbs.flip_cnt++;
 }
 
 static void vout_close(void)
diff --git a/frontend/main.c b/frontend/main.c
index 7c15cb09..56b5cb76 100644
--- a/frontend/main.c
+++ b/frontend/main.c
@@ -232,10 +232,13 @@ do_state_slot:
 			pl_rearmed_cbs.frameskip == 0 ? "OFF" : "1" );
 		plugin_call_rearmed_cbs();
 		break;
-	case SACTION_TOGGLE_RENDERER:
-		pl_rearmed_cbs.gpu_neon.enhancement_enable =
-			!pl_rearmed_cbs.gpu_neon.enhancement_enable;
+	case SACTION_SWITCH_DISPMODE:
+		pl_switch_dispmode();
 		plugin_call_rearmed_cbs();
+		if (GPU_open != NULL && GPU_close != NULL) {
+			GPU_close();
+			GPU_open(&gpuDisp, "PCSX", NULL);
+		}
 		break;
 	case SACTION_SCREENSHOT:
 		{
diff --git a/frontend/main.h b/frontend/main.h
index 56e1e735..a03db8b2 100644
--- a/frontend/main.h
+++ b/frontend/main.h
@@ -65,7 +65,7 @@ enum sched_action {
 	SACTION_NEXT_SSLOT,
 	SACTION_PREV_SSLOT,
 	SACTION_TOGGLE_FSKIP,
-	SACTION_TOGGLE_RENDERER,
+	SACTION_SWITCH_DISPMODE,
 	SACTION_SCREENSHOT,
 	SACTION_VOLUME_UP,
 	SACTION_VOLUME_DOWN,
diff --git a/frontend/menu.c b/frontend/menu.c
index f0fd17c4..d3ce06c3 100644
--- a/frontend/menu.c
+++ b/frontend/menu.c
@@ -75,6 +75,7 @@ typedef enum
 	MA_OPT_SCALER,
 	MA_OPT_SCALER2,
 	MA_OPT_FILTERING,
+	MA_OPT_FILTERING2,
 	MA_OPT_SCALER_C,
 } menu_id;
 
@@ -87,7 +88,7 @@ static int psx_clock;
 static int memcard1_sel, memcard2_sel;
 int g_opts, g_scaler;
 int soft_scaling, analog_deadzone; // for Caanoo
-int filter;
+int filter, soft_filter;
 
 #ifdef __ARM_ARCH_7A__
 #define DEFAULT_PSX_CLOCK 57
@@ -213,6 +214,7 @@ static void menu_set_defconfig(void)
 	frameskip = 0;
 	analog_deadzone = 50;
 	soft_scaling = 1;
+	soft_filter = 0;
 	psx_clock = DEFAULT_PSX_CLOCK;
 
 	region = 0;
@@ -274,6 +276,7 @@ static const struct {
 	CE_INTVAL(g_layer_w),
 	CE_INTVAL(g_layer_h),
 	CE_INTVAL(filter),
+	CE_INTVAL(soft_filter),
 	CE_INTVAL(state_slot),
 	CE_INTVAL(cpu_clock),
 	CE_INTVAL(g_opts),
@@ -663,7 +666,7 @@ me_bind_action emuctrl_actions[] =
 	{ "Next Save Slot   ", 1 << SACTION_NEXT_SSLOT },
 	{ "Toggle Frameskip ", 1 << SACTION_TOGGLE_FSKIP },
 	{ "Take Screenshot  ", 1 << SACTION_SCREENSHOT },
-	{ "Toggle Renderer  ", 1 << SACTION_TOGGLE_RENDERER },
+	{ "Switch Renderer  ", 1 << SACTION_SWITCH_DISPMODE },
 	{ "Enter Menu       ", 1 << SACTION_ENTER_MENU },
 #ifdef __ARM_ARCH_7A__ /* XXX */
 	{ "Minimize         ", 1 << SACTION_MINIMIZE },
@@ -1032,9 +1035,15 @@ static int menu_loop_keyconfig(int id, int keys)
 // ------------ gfx options menu ------------
 
 static const char *men_scaler[] = { "1x1", "scaled 4:3", "integer scaled 4:3", "fullscreen", "custom", NULL };
+static const char *men_soft_filter[] = { "None",
+#ifdef __ARM_NEON__
+	"scale2x", "eagle2x",
+#endif
+	NULL };
+static const char *men_dummy[] = { NULL };
 static const char h_cscaler[]   = "Displays the scaler layer, you can resize it\n"
 				  "using d-pad or move it using R+d-pad";
-static const char *men_dummy[] = { NULL };
+static const char h_soft_filter[] = "Works only if game uses low resolution modes";
 
 static int menu_loop_cscaler(int id, int keys)
 {
@@ -1093,6 +1102,7 @@ static menu_entry e_menu_gfx_options[] =
 	mee_enum      ("Scaler",                   MA_OPT_SCALER, g_scaler, men_scaler),
 	mee_onoff     ("Software Scaling",         MA_OPT_SCALER2, soft_scaling, 1),
 	mee_enum      ("Filter",                   MA_OPT_FILTERING, filter, men_dummy),
+	mee_enum_h    ("Software Filter",          MA_OPT_FILTERING2, soft_filter, men_soft_filter, h_soft_filter),
 //	mee_onoff     ("Vsync",                    0, vsync, 1),
 	mee_cust_h    ("Setup custom scaler",      MA_OPT_SCALER_C, menu_loop_cscaler, NULL, h_cscaler),
 	mee_end,
@@ -1121,9 +1131,13 @@ void menu_set_filter_list(void *filters)
 
 #ifdef __ARM_NEON__
 
-static const char h_gpu_neon[] = "Configure built-in NEON GPU plugin";
-static const char h_gpu_neon_enhanced[] = "Renders in double resolution at the cost of lower performance";
-static const char h_gpu_neon_enhanced_hack[] = "Speed hack for above option (glitches some games)";
+static const char h_gpu_neon[] =
+	"Configure built-in NEON GPU plugin";
+static const char h_gpu_neon_enhanced[] =
+	"Renders in double resolution at the cost of lower performance\n"
+	"(not available for high resolution games)";
+static const char h_gpu_neon_enhanced_hack[] =
+	"Speed hack for above option (glitches some games)";
 static const char *men_gpu_interlace[] = { "Off", "On", "Auto", NULL };
 
 static menu_entry e_menu_plugin_gpu_neon[] =
@@ -2254,6 +2268,7 @@ void menu_init(void)
 #ifndef __ARM_ARCH_7A__ /* XXX */
 	me_enable(e_menu_gfx_options, MA_OPT_SCALER, 0);
 	me_enable(e_menu_gfx_options, MA_OPT_FILTERING, 0);
+	me_enable(e_menu_gfx_options, MA_OPT_FILTERING2, 0);
 	me_enable(e_menu_gfx_options, MA_OPT_SCALER_C, 0);
 	me_enable(e_menu_keyconfig, MA_CTRL_NUBS_BTNS, 0);
 #else
diff --git a/frontend/menu.h b/frontend/menu.h
index 2062acdf..221be15c 100644
--- a/frontend/menu.h
+++ b/frontend/menu.h
@@ -22,9 +22,15 @@ enum g_scaler_opts {
 	SCALE_CUSTOM,
 };
 
+enum g_soft_filter_opts {
+	SOFT_FILTER_NONE,
+	SOFT_FILTER_SCALE2X,
+	SOFT_FILTER_EAGLE2X,
+};
+
 extern int g_opts, g_scaler;
 extern int soft_scaling, analog_deadzone;
-extern int filter;
+extern int filter, soft_filter;
 
 extern int g_menuscreen_w;
 extern int g_menuscreen_h;
diff --git a/frontend/plat_pandora.c b/frontend/plat_pandora.c
index b6447cf9..b82450c6 100644
--- a/frontend/plat_pandora.c
+++ b/frontend/plat_pandora.c
@@ -65,7 +65,7 @@ static const struct in_default_bind in_evdev_defbinds[] = {
 	{ KEY_4,        IN_BINDTYPE_EMU, SACTION_NEXT_SSLOT },
 	{ KEY_5,        IN_BINDTYPE_EMU, SACTION_TOGGLE_FSKIP },
 	{ KEY_6,        IN_BINDTYPE_EMU, SACTION_SCREENSHOT },
-	{ KEY_7,        IN_BINDTYPE_EMU, SACTION_TOGGLE_RENDERER },
+	{ KEY_7,        IN_BINDTYPE_EMU, SACTION_SWITCH_DISPMODE },
 	{ 0, 0, 0 }
 };
 
diff --git a/frontend/plat_pollux.c b/frontend/plat_pollux.c
index 1dafb7cd..52a09b14 100644
--- a/frontend/plat_pollux.c
+++ b/frontend/plat_pollux.c
@@ -305,12 +305,13 @@ static void spend_cycles(int loops)
 #define DMA_REG(x) memregl[(DMA_BASE6 + x) >> 2]
 
 /* this takes ~1.5ms, while ldm/stm ~1.95ms */
-static void raw_flip_dma(int x, int y)
+static void raw_flip_dma(const void *vram, int stride, int bgr24, int w, int h)
 {
+	unsigned int pixel_offset = psx_vram - (unsigned short *)vram;
 	unsigned int dst = fb_paddrs[fb_work_buf] +
 			(fb_offset_y * 320 + fb_offset_x) * psx_bpp / 8;
-	int spsx_line = y + psx_offset_y;
-	int spsx_offset = (x + psx_offset_x) & 0x3f8;
+	int spsx_line = pixel_offset / 1024 + psx_offset_y;
+	int spsx_offset = (pixel_offset + psx_offset_x) & 0x3f8;
 	int dst_stride = 320 * psx_bpp / 8;
 	int len = psx_src_width * psx_bpp / 8;
 	int i;
@@ -344,7 +345,7 @@ static void raw_flip_dma(int x, int y)
 
 	if (psx_bpp == 16) {
 		pl_vout_buf = g_menuscreen_ptr;
-		pl_print_hud(fb_offset_x);
+		pl_print_hud(w, h, fb_offset_x);
 	}
 
 	g_menuscreen_ptr = fb_flip();
@@ -354,26 +355,24 @@ static void raw_flip_dma(int x, int y)
 }
 
 #define make_flip_func(name, blitfunc)                                                  \
-static void name(int x, int y)                                                          \
+static void name(const void *vram_, int stride, int bgr24, int w, int h)                \
 {                                                                                       \
-        unsigned short *vram = psx_vram;                                                \
+        const unsigned short *vram = vram_;                                             \
         unsigned char *dst = (unsigned char *)g_menuscreen_ptr +                        \
                         (fb_offset_y * 320 + fb_offset_x) * psx_bpp / 8;                \
-        unsigned int src = (y + psx_offset_y) * 1024 + x + psx_offset_x;                \
         int dst_stride = 320 * psx_bpp / 8;                                             \
         int len = psx_src_width * psx_bpp / 8;                                          \
         int i;                                                                          \
                                                                                         \
         pcnt_start(PCNT_BLIT);                                                          \
                                                                                         \
-        for (i = psx_src_height; i > 0; i--, src += psx_step * 1024, dst += dst_stride) { \
-                src &= 1024*512-1;                                                      \
-                blitfunc(dst, vram + src, len);                                         \
-        }                                                                               \
+        vram += psx_offset_y * 1024 + psx_offset_x;                                     \
+        for (i = psx_src_height; i > 0; i--, vram += psx_step * 1024, dst += dst_stride)\
+                blitfunc(dst, vram, len);                                               \
                                                                                         \
         if (psx_bpp == 16) {                                                            \
                 pl_vout_buf = g_menuscreen_ptr;                                         \
-                pl_print_hud(fb_offset_x);                                              \
+                pl_print_hud(w, h, fb_offset_x);                                        \
         }                                                                               \
                                                                                         \
         g_menuscreen_ptr = fb_flip();                                                   \
@@ -402,20 +401,20 @@ void *plat_gvideo_set_mode(int *w_, int *h_, int *bpp_)
 
 	switch (w + (bpp != 16) + !soft_scaling) {
 	case 640:
-		pl_rearmed_cbs.pl_vout_raw_flip = raw_flip_soft_640;
+		pl_rearmed_cbs.pl_vout_flip = raw_flip_soft_640;
 		w_max = 640;
 		break;
 	case 512:
-		pl_rearmed_cbs.pl_vout_raw_flip = raw_flip_soft_512;
+		pl_rearmed_cbs.pl_vout_flip = raw_flip_soft_512;
 		w_max = 512;
 		break;
 	case 384:
 	case 368:
-		pl_rearmed_cbs.pl_vout_raw_flip = raw_flip_soft_368;
+		pl_rearmed_cbs.pl_vout_flip = raw_flip_soft_368;
 		w_max = 368;
 		break;
 	default:
-		pl_rearmed_cbs.pl_vout_raw_flip = have_warm ? raw_flip_dma : raw_flip_soft;
+		pl_rearmed_cbs.pl_vout_flip = have_warm ? raw_flip_dma : raw_flip_soft;
 		w_max = 320;
 		break;
 	}
@@ -621,7 +620,7 @@ void plat_init(void)
 	if (mixerdev == -1)
 		perror("open(/dev/mixer)");
 
-	pl_rearmed_cbs.pl_vout_raw_flip = have_warm ? raw_flip_dma : raw_flip_soft;
+	pl_rearmed_cbs.pl_vout_flip = have_warm ? raw_flip_dma : raw_flip_soft;
 	pl_rearmed_cbs.pl_vout_set_raw_vram = pl_vout_set_raw_vram;
 
 	psx_src_width = 320;
diff --git a/frontend/plugin_lib.c b/frontend/plugin_lib.c
index 484d67bf..bf213dd4 100644
--- a/frontend/plugin_lib.c
+++ b/frontend/plugin_lib.c
@@ -27,8 +27,11 @@
 #include "plat.h"
 #include "pcnt.h"
 #include "pl_gun_ts.h"
+#include "libpicofe/arm/neon_scale2x.h"
+#include "libpicofe/arm/neon_eagle2x.h"
 #include "../libpcsxcore/new_dynarec/new_dynarec.h"
 #include "../libpcsxcore/psemu_plugin_defs.h"
+#include "../plugins/gpulib/cspace.h"
 
 int in_type1, in_type2;
 int in_a1[2] = { 127, 127 }, in_a2[2] = { 127, 127 };
@@ -39,6 +42,7 @@ void *tsdev;
 void *pl_vout_buf;
 int g_layer_x, g_layer_y, g_layer_w, g_layer_h;
 static int pl_vout_w, pl_vout_h, pl_vout_bpp; /* output display/layer */
+static int pl_vout_scale;
 static int psx_w, psx_h, psx_bpp;
 static int vsync_cnt;
 static int is_pal, frame_interval, frame_interval1024;
@@ -114,10 +118,8 @@ static __attribute__((noinline)) void draw_active_chans(int vout_w, int vout_h)
 	}
 }
 
-void pl_print_hud(int xborder)
+void pl_print_hud(int w, int h, int xborder)
 {
-	int w = pl_vout_w, h = pl_vout_h;
-
 	if (h < 16)
 		return;
 
@@ -185,55 +187,142 @@ static void update_layer_size(int w, int h)
 	if (g_layer_h > g_menuscreen_h) g_layer_h = g_menuscreen_h;
 }
 
-static void *pl_vout_set_mode(int w, int h, int bpp)
+// XXX: this is platform specific really
+static int resolution_ok(int w, int h)
 {
+	return w <= 1024 && h <= 512;
+}
+
+static void pl_vout_set_mode(int w, int h, int bpp)
+{
+	int vout_w, vout_h, vout_bpp;
+
 	// special h handling, Wipeout likes to change it by 1-6
 	static int vsync_cnt_ms_prev;
 	if ((unsigned int)(vsync_cnt - vsync_cnt_ms_prev) < 5*60)
 		h = (h + 7) & ~7;
 	vsync_cnt_ms_prev = vsync_cnt;
 
-	if (w == psx_w && h == psx_h && bpp == psx_bpp)
-		return pl_vout_buf;
+	vout_w = psx_w = w;
+	vout_h = psx_h = h;
+	vout_bpp = psx_bpp = bpp;
+
+	pl_vout_scale = 1;
+#ifdef __ARM_NEON__
+	if (soft_filter) {
+		if (resolution_ok(w * 2, h * 2) && bpp == 16) {
+			vout_w *= 2;
+			vout_h *= 2;
+			pl_vout_scale = 2;
+		}
+		else {
+			// filter unavailable
+			hud_msg[0] = 0;
+		}
+	}
+#endif
 
-	pl_vout_w = psx_w = w;
-	pl_vout_h = psx_h = h;
-	pl_vout_bpp = psx_bpp = bpp;
+	if (pl_vout_buf != NULL && vout_w == pl_vout_w && vout_h == pl_vout_h
+	    && vout_bpp == pl_vout_bpp)
+		return;
 
-	update_layer_size(pl_vout_w, pl_vout_h);
+	update_layer_size(vout_w, vout_h);
 
-	pl_vout_buf = plat_gvideo_set_mode(&pl_vout_w, &pl_vout_h, &pl_vout_bpp);
-	if (pl_vout_buf == NULL && pl_rearmed_cbs.pl_vout_raw_flip == NULL)
+	pl_vout_buf = plat_gvideo_set_mode(&vout_w, &vout_h, &vout_bpp);
+	if (pl_vout_buf == NULL)
 		fprintf(stderr, "failed to set mode %dx%d@%d\n",
 			psx_w, psx_h, psx_bpp);
+	else {
+		pl_vout_w = vout_w;
+		pl_vout_h = vout_h;
+		pl_vout_bpp = vout_bpp;
+	}
 
 	menu_notify_mode_change(pl_vout_w, pl_vout_h, pl_vout_bpp);
-
-	return pl_vout_buf;
 }
 
-// only used if raw flip is not defined
-static void *pl_vout_flip(void)
+static void pl_vout_flip(const void *vram, int stride, int bgr24, int w, int h)
 {
-	pl_rearmed_cbs.flip_cnt++;
+	static int doffs_old, clear_counter;
+	unsigned char *dest = pl_vout_buf;
+	const unsigned short *src = vram;
+	int dstride = pl_vout_w, h1 = h;
+	int doffs;
+
+	if (dest == NULL)
+		goto out;
+
+	if (vram == NULL) {
+		// blanking
+		memset(pl_vout_buf, 0, dstride * pl_vout_h * pl_vout_bpp / 8);
+		goto out;
+	}
+
+	// borders
+	doffs = (dstride - w * pl_vout_scale) / 2 & ~1;
+	dest += doffs * 2;
+
+	if (doffs > doffs_old)
+		clear_counter = 2;
+	doffs_old = doffs;
+
+	if (clear_counter > 0) {
+		memset(pl_vout_buf, 0, dstride * pl_vout_h * pl_vout_bpp / 8);
+		clear_counter--;
+	}
+
+	if (bgr24)
+	{
+		if (pl_rearmed_cbs.only_16bpp) {
+			for (; h1-- > 0; dest += dstride * 2, src += stride)
+			{
+				bgr888_to_rgb565(dest, src, w * 3);
+			}
+		}
+		else {
+			dest -= doffs * 2;
+			dest += (doffs / 8) * 24;
+
+			for (; h1-- > 0; dest += dstride * 3, src += stride)
+			{
+				bgr888_to_rgb888(dest, src, w * 3);
+			}
+		}
+	}
+#ifdef __ARM_NEON__
+	else if (soft_filter == SOFT_FILTER_SCALE2X && pl_vout_scale == 2)
+	{
+		neon_scale2x_16_16(src, (void *)dest, w,
+			stride * 2, dstride * 2, h1);
+	}
+	else if (soft_filter == SOFT_FILTER_EAGLE2X && pl_vout_scale == 2)
+	{
+		neon_eagle2x_16_16(src, (void *)dest, w,
+			stride * 2, dstride * 2, h1);
+	}
+#endif
+	else
+	{
+		for (; h1-- > 0; dest += dstride * 2, src += stride)
+		{
+			bgr555_to_rgb565(dest, src, w * 2);
+		}
+	}
 
-	if (pl_vout_buf != NULL)
-		pl_print_hud(0);
+	pl_print_hud(w * pl_vout_scale, h * pl_vout_scale, 0);
 
+out:
 	// let's flip now
 	pl_vout_buf = plat_gvideo_flip();
-	return pl_vout_buf;
+	pl_rearmed_cbs.flip_cnt++;
 }
 
 static int pl_vout_open(void)
 {
 	struct timeval now;
-	int h;
 
-	// force mode update
-	h = psx_h;
-	psx_h--;
-	pl_vout_buf = pl_vout_set_mode(psx_w, h, psx_bpp);
+	// force mode update on pl_vout_set_mode() call from gpulib/vout_pl
+	pl_vout_buf = NULL;
 
 	plat_gvideo_open(is_pal);
 
@@ -250,6 +339,11 @@ static void pl_vout_close(void)
 	plat_gvideo_close();
 }
 
+static void pl_set_gpu_caps(int caps)
+{
+	pl_rearmed_cbs.gpu_caps = caps;
+}
+
 void *pl_prepare_screenshot(int *w, int *h, int *bpp)
 {
 	void *ret = plat_prepare_screenshot(w, h, bpp);
@@ -263,6 +357,75 @@ void *pl_prepare_screenshot(int *w, int *h, int *bpp)
 	return pl_vout_buf;
 }
 
+/* display/redering mode switcher */
+static int dispmode_default(void)
+{
+	pl_rearmed_cbs.gpu_neon.enhancement_enable = 0;
+	soft_filter = SOFT_FILTER_NONE;
+	snprintf(hud_msg, sizeof(hud_msg), "default mode");
+	return 1;
+}
+
+int dispmode_doubleres(void)
+{
+	if (!(pl_rearmed_cbs.gpu_caps & GPU_CAP_SUPPORTS_2X)
+	    || !resolution_ok(psx_w * 2, psx_h * 2) || psx_bpp != 16)
+		return 0;
+
+	dispmode_default();
+	pl_rearmed_cbs.gpu_neon.enhancement_enable = 1;
+	snprintf(hud_msg, sizeof(hud_msg), "double resolution");
+	return 1;
+}
+
+int dispmode_scale2x(void)
+{
+	if (psx_bpp != 16)
+		return 0;
+
+	dispmode_default();
+	soft_filter = SOFT_FILTER_SCALE2X;
+	snprintf(hud_msg, sizeof(hud_msg), "scale2x");
+	return 1;
+}
+
+int dispmode_eagle2x(void)
+{
+	if (psx_bpp != 16)
+		return 0;
+
+	dispmode_default();
+	soft_filter = SOFT_FILTER_EAGLE2X;
+	snprintf(hud_msg, sizeof(hud_msg), "eagle2x");
+	return 1;
+}
+
+static int (*dispmode_switchers[])(void) = {
+	dispmode_default,
+#ifdef __ARM_NEON__
+	dispmode_doubleres,
+	dispmode_scale2x,
+	dispmode_eagle2x,
+#endif
+};
+
+static int dispmode_current;
+
+void pl_switch_dispmode(void)
+{
+	if (pl_rearmed_cbs.gpu_caps & GPU_CAP_OWNS_DISPLAY)
+		return;
+
+	while (1) {
+		dispmode_current++;
+		if (dispmode_current >=
+		    sizeof(dispmode_switchers) / sizeof(dispmode_switchers[0]))
+			dispmode_current = 0;
+		if (dispmode_switchers[dispmode_current]())
+			break;
+	}
+}
+
 #ifndef MAEMO
 static void update_analogs(void)
 {
@@ -501,8 +664,10 @@ struct rearmed_cbs pl_rearmed_cbs = {
 	pl_vout_set_mode,
 	pl_vout_flip,
 	pl_vout_close,
-	pl_mmap,
-	pl_munmap,
+
+	.mmap = pl_mmap,
+	.munmap = pl_munmap,
+	.pl_set_gpu_caps = pl_set_gpu_caps,
 };
 
 /* watchdog */
diff --git a/frontend/plugin_lib.h b/frontend/plugin_lib.h
index 1701d06c..332fbc2e 100644
--- a/frontend/plugin_lib.h
+++ b/frontend/plugin_lib.h
@@ -31,7 +31,8 @@ void  pl_text_out16(int x, int y, const char *texto, ...);
 void  pl_start_watchdog(void);
 void *pl_prepare_screenshot(int *w, int *h, int *bpp);
 void  pl_init(void);
-void  pl_print_hud(int xborder);
+void  pl_print_hud(int width, int height, int xborder);
+void  pl_switch_dispmode(void);
 
 void  pl_timing_prepare(int is_pal);
 void  pl_frame_limit(void);
@@ -41,14 +42,15 @@ void  pl_update_gun(int *xn, int *xres, int *y, int *in);
 struct rearmed_cbs {
 	void  (*pl_get_layer_pos)(int *x, int *y, int *w, int *h);
 	int   (*pl_vout_open)(void);
-	void *(*pl_vout_set_mode)(int w, int h, int bpp);
-	void *(*pl_vout_flip)(void);
+	void  (*pl_vout_set_mode)(int w, int h, int bpp);
+	void  (*pl_vout_flip)(const void *vram, int stride, int bgr24,
+			      int w, int h);
 	void  (*pl_vout_close)(void);
 	void *(*mmap)(unsigned int size);
 	void  (*munmap)(void *ptr, unsigned int size);
-	// these are only used by some frontends
-	void  (*pl_vout_raw_flip)(int x, int y);
+	// only used by some frontends
 	void  (*pl_vout_set_raw_vram)(void *vram);
+	void  (*pl_set_gpu_caps)(int caps);
 	// some stats, for display by some plugins
 	int flips_per_sec, cpu_usage;
 	float vsps_cur; // currect vsync/s
@@ -82,10 +84,17 @@ struct rearmed_cbs {
 		int   iUseMask, bOpaquePass, bAdvancedBlend, bUseFastMdec;
 		int   iVRamSize, iTexGarbageCollection;
 	} gpu_peopsgl;
+	// misc
+	int gpu_caps;
 };
 
 extern struct rearmed_cbs pl_rearmed_cbs;
 
+enum gpu_plugin_caps {
+	GPU_CAP_OWNS_DISPLAY = (1 << 0),
+	GPU_CAP_SUPPORTS_2X = (1 << 1),
+};
+
 #ifndef ARRAY_SIZE
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
 #endif
diff --git a/plugins/dfxvideo/draw_pl.c b/plugins/dfxvideo/draw_pl.c
index dffd52b4..06a635da 100644
--- a/plugins/dfxvideo/draw_pl.c
+++ b/plugins/dfxvideo/draw_pl.c
@@ -19,56 +19,26 @@ BOOL           bCheckMask = FALSE;
 unsigned short sSetMask;
 unsigned long  lSetMask;
 
-static void blit(void *vout_buf)
+static void blit(void)
 {
  int px = PSXDisplay.DisplayPosition.x & ~1; // XXX: align needed by bgr*_to_...
  int py = PSXDisplay.DisplayPosition.y;
  int w = PreviousPSXDisplay.Range.x1;
  int h = PreviousPSXDisplay.DisplayMode.y;
- int pitch = PreviousPSXDisplay.DisplayMode.x;
  unsigned short *srcs = psxVuw + py * 1024 + px;
- unsigned char *dest = vout_buf;
 
  if (w <= 0)
    return;
 
- pitch *= (PSXDisplay.RGB24 && !rcbs->only_16bpp) ? 3 : 2;
-
  // account for centering
  h -= PreviousPSXDisplay.Range.y0;
- dest += PreviousPSXDisplay.Range.y0 / 2 * pitch;
- dest += (PreviousPSXDisplay.Range.x0 & ~3) * 2; // must align here too..
-
- if (PSXDisplay.RGB24)
- {
-   if (!rcbs->only_16bpp)
-   {
-     for (; h-- > 0; dest += pitch, srcs += 1024)
-     {
-       bgr888_to_rgb888(dest, srcs, w * 3);
-     }
-   }
-   else
-   {
-     for (; h-- > 0; dest += pitch, srcs += 1024)
-     {
-       bgr888_to_rgb565(dest, srcs, w * 3);
-     }
-   }
- }
- else
- {
-   for (; h-- > 0; dest += pitch, srcs += 1024)
-   {
-     bgr555_to_rgb565(dest, srcs, w * 2);
-   }
- }
+
+ rcbs->pl_vout_flip(srcs, 1024, PSXDisplay.RGB24, w, h);
 }
 
 void DoBufferSwap(void)
 {
  static int fbw, fbh, fb24bpp;
- static void *vout_buf;
 
  if (PreviousPSXDisplay.DisplayMode.x == 0 || PreviousPSXDisplay.DisplayMode.y == 0)
   return;
@@ -80,17 +50,12 @@ void DoBufferSwap(void)
   fbw = PreviousPSXDisplay.DisplayMode.x;
   fbh = PreviousPSXDisplay.DisplayMode.y;
   fb24bpp = PSXDisplay.RGB24;
-  vout_buf = rcbs->pl_vout_set_mode(fbw, fbh, fb24bpp ? 24 : 16);
+  rcbs->pl_vout_set_mode(fbw, fbh, fb24bpp ? 24 : 16);
  }
 
  pcnt_start(PCNT_BLIT);
- if (rcbs->pl_vout_raw_flip != NULL)
-  rcbs->pl_vout_raw_flip(PSXDisplay.DisplayPosition.x, PSXDisplay.DisplayPosition.y);
- else
-  blit(vout_buf);
+ blit();
  pcnt_end(PCNT_BLIT);
-
- vout_buf = rcbs->pl_vout_flip();
 }
 
 void DoClearScreenBuffer(void)
diff --git a/plugins/dfxvideo/gpu.c b/plugins/dfxvideo/gpu.c
index 9fa08fe3..3d20dfa4 100644
--- a/plugins/dfxvideo/gpu.c
+++ b/plugins/dfxvideo/gpu.c
@@ -1143,6 +1143,8 @@ void GPUrearmedCallbacks(const struct rearmed_cbs *cbs)
  dwFrameRateTicks = cbs->gpu_peops.dwFrameRateTicks;
  if (cbs->pl_vout_set_raw_vram)
   cbs->pl_vout_set_raw_vram(psxVub);
+ if (cbs->pl_set_gpu_caps)
+  cbs->pl_set_gpu_caps(0);
 
  skip_advice = &cbs->fskip_advice;
  fps_skip = 100.0f;
diff --git a/plugins/dfxvideo/gpulib_if.c b/plugins/dfxvideo/gpulib_if.c
index 50130f6f..d98520cb 100644
--- a/plugins/dfxvideo/gpulib_if.c
+++ b/plugins/dfxvideo/gpulib_if.c
@@ -421,5 +421,7 @@ void renderer_set_config(const struct rearmed_cbs *cbs)
 {
  iUseDither = cbs->gpu_peops.iUseDither;
  dwActFixes = cbs->gpu_peops.dwActFixes;
+ if (cbs->pl_set_gpu_caps)
+  cbs->pl_set_gpu_caps(0);
  set_vram(gpu.vram);
 }
diff --git a/plugins/gpu-gles/gpulib_if.c b/plugins/gpu-gles/gpulib_if.c
index d245c44b..068dc411 100644
--- a/plugins/gpu-gles/gpulib_if.c
+++ b/plugins/gpu-gles/gpulib_if.c
@@ -715,6 +715,8 @@ void renderer_set_config(const struct rearmed_cbs *cbs_)
  bUseFastMdec = cbs->gpu_peopsgl.bUseFastMdec;
  iTexGarbageCollection = cbs->gpu_peopsgl.iTexGarbageCollection;
  iVRamSize = cbs->gpu_peopsgl.iVRamSize;
+ if (cbs->pl_set_gpu_caps)
+  cbs->pl_set_gpu_caps(GPU_CAP_OWNS_DISPLAY);
 
  set_vram(gpu.vram);
 }
diff --git a/plugins/gpu_neon/psx_gpu_if.c b/plugins/gpu_neon/psx_gpu_if.c
index 3ce9c1cf..8900d4e8 100644
--- a/plugins/gpu_neon/psx_gpu_if.c
+++ b/plugins/gpu_neon/psx_gpu_if.c
@@ -44,7 +44,7 @@ int do_cmd_list(uint32_t *list, int count, int *last_cmd)
 #define ENHANCEMENT_BUF_SIZE (1024 * 1024 * 2 * 4 + 4096 * 2)
 
 static uint16_t *get_enhancement_bufer(int *x, int *y, int *w, int *h,
- int *stride, int *mask)
+ int *vram_h)
 {
   uint16_t *ret = select_enhancement_buf_ptr(&egpu, *x);
 
@@ -52,8 +52,7 @@ static uint16_t *get_enhancement_bufer(int *x, int *y, int *w, int *h,
   *y *= 2;
   *w = *w * 2;
   *h = *h * 2;
-  *stride *= 2;
-  *mask = 1024 * 1024 - 1;
+  *vram_h = 1024;
   return ret;
 }
 
@@ -183,4 +182,6 @@ void renderer_set_config(const struct rearmed_cbs *cbs)
 
   if (gpu.mmap != NULL && egpu.enhancement_buf_ptr == NULL)
     map_enhancement_buffer();
+  if (cbs->pl_set_gpu_caps)
+    cbs->pl_set_gpu_caps(GPU_CAP_SUPPORTS_2X);
 }
diff --git a/plugins/gpu_unai/gpu.cpp b/plugins/gpu_unai/gpu.cpp
index 46552acd..c111d78e 100644
--- a/plugins/gpu_unai/gpu.cpp
+++ b/plugins/gpu_unai/gpu.cpp
@@ -824,7 +824,6 @@ void  GPU_updateLace(void)
 extern "C" {
 
 static const struct rearmed_cbs *cbs;
-static void *screen_buf;
 static s16 old_res_horz, old_res_vert, old_rgb24;
 
 static void blit(void)
@@ -832,12 +831,10 @@ static void blit(void)
 	u16 *base = (u16 *)GPU_FrameBuffer;
 	s16 isRGB24 = (GPU_GP1 & 0x00200000) ? 1 : 0;
 	s16 h0, x0, y0, w0, h1;
-	u32 fb_offs;
-	u8  *dest;
 
 	x0 = DisplayArea[0] & ~1; // alignment needed by blitter
 	y0 = DisplayArea[1];
-	fb_offs = FRAME_OFFSET(x0, y0);
+	base += FRAME_OFFSET(x0, y0);
 
 	w0 = DisplayArea[2];
 	h0 = DisplayArea[3];  // video mode
@@ -853,62 +850,10 @@ static void blit(void)
 		old_res_horz = w0;
 		old_res_vert = h1;
 		old_rgb24 = (s16)isRGB24;
-		screen_buf = cbs->pl_vout_set_mode(w0, h1, isRGB24 ? 24 : 16);
+		cbs->pl_vout_set_mode(w0, h1, isRGB24 ? 24 : 16);
 	}
-	dest = (u8 *)screen_buf;
 
-	if (isRGB24)
-	{
-		if (!cbs->only_16bpp)
-		{
-			for (; h1-- > 0; dest += w0 * 3, fb_offs += 1024)
-			{
-				fb_offs &= 1024*512-1;
-				bgr888_to_rgb888(dest, base + fb_offs, w0 * 3);
-			}
-		}
-		else
-		{
-			for (; h1-- > 0; dest += w0 * 2, fb_offs += 1024)
-			{
-				fb_offs &= 1024*512-1;
-				bgr888_to_rgb565(dest, base + fb_offs, w0 * 3);
-			}
-		}
-	}
-	else
-	{
-		for (; h1-- > 0; dest += w0 * 2, fb_offs += 1024)
-		{
-			fb_offs &= 1024*512-1;
-			bgr555_to_rgb565(dest, base + fb_offs, w0 * 2);
-		}
-	}
-
-	screen_buf = cbs->pl_vout_flip();
-}
-
-static void blit_raw(void)
-{
-	s16 isRGB24 = (GPU_GP1 & 0x00200000) ? 1 : 0;
-	s16 h0, w0, h1;
-
-	w0 = DisplayArea[2];
-	h0 = DisplayArea[3];  // video mode
-	h1 = DisplayArea[5] - DisplayArea[4]; // display needed
-	if (h0 == 480) h1 = Min2(h1*2,480);
-
-	if (h1 <= 0)
-		return;
-
-	if (w0 != old_res_horz || h1 != old_res_vert || isRGB24 != old_rgb24)
-	{
-		old_res_horz = w0;
-		old_res_vert = h1;
-		old_rgb24 = (s16)isRGB24;
-		screen_buf = cbs->pl_vout_set_mode(w0, h1, isRGB24 ? 24 : 16);
-	}
-	cbs->pl_vout_raw_flip(DisplayArea[0], DisplayArea[1]);
+	cbs->pl_vout_flip(base, 1024, isRGB24, w0, h1);
 }
 
 void GPU_updateLace(void)
@@ -920,10 +865,7 @@ void GPU_updateLace(void)
 		return;
 
 	if (!wasSkip) {
-		if (cbs->pl_vout_raw_flip != NULL)
-			blit_raw();
-		else
-			blit();
+		blit();
 		fb_dirty = false;
 		skCount = 0;
 	}
@@ -939,7 +881,6 @@ void GPU_updateLace(void)
 long GPUopen(unsigned long *, char *, char *)
 {
 	cbs->pl_vout_open();
-	screen_buf = cbs->pl_vout_flip();
 	return 0;
 }
 
@@ -966,6 +907,8 @@ void GPUrearmedCallbacks(const struct rearmed_cbs *cbs_)
 		cbs_->pl_vout_set_raw_vram((void *)GPU_FrameBuffer);
 
 	cbs = cbs_;
+	if (cbs->pl_set_gpu_caps)
+		cbs->pl_set_gpu_caps(0);
 }
 
 } /* extern "C" */
diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h
index ea5051ed..d11f991c 100644
--- a/plugins/gpulib/gpu.h
+++ b/plugins/gpulib/gpu.h
@@ -89,7 +89,7 @@ struct psx_gpu {
     uint32_t pending_fill[3];
   } frameskip;
   uint16_t *(*get_enhancement_bufer)
-    (int *x, int *y, int *w, int *h, int *stride, int *mask);
+    (int *x, int *y, int *w, int *h, int *vram_h);
   void *(*mmap)(unsigned int size);
   void  (*munmap)(void *ptr, unsigned int size);
 };
diff --git a/plugins/gpulib/vout_pl.c b/plugins/gpulib/vout_pl.c
index 9a844324..11307e2a 100644
--- a/plugins/gpulib/vout_pl.c
+++ b/plugins/gpulib/vout_pl.c
@@ -15,7 +15,6 @@
 #include "../../frontend/plugin_lib.h"
 
 static const struct rearmed_cbs *cbs;
-static void *screen_buf;
 
 int vout_init(void)
 {
@@ -27,7 +26,7 @@ int vout_finish(void)
   return 0;
 }
 
-static void check_mode_change(void)
+static void check_mode_change(int force)
 {
   static uint32_t old_status;
   static int old_h;
@@ -44,95 +43,57 @@ static void check_mode_change(void)
   }
 
   // width|rgb24 change?
-  if ((gpu.status.reg ^ old_status) & ((7<<16)|(1<<21)) || h != old_h)
+  if (force || (gpu.status.reg ^ old_status) & ((7<<16)|(1<<21)) || h != old_h)
   {
     old_status = gpu.status.reg;
     old_h = h;
 
-    screen_buf = cbs->pl_vout_set_mode(w, h,
+    cbs->pl_vout_set_mode(w, h,
       (gpu.status.rgb24 && !cbs->only_16bpp) ? 24 : 16);
   }
 }
 
-static void blit(void)
+void vout_update(void)
 {
   int x = gpu.screen.x & ~1; // alignment needed by blitter
   int y = gpu.screen.y;
   int w = gpu.screen.w;
   int h = gpu.screen.h;
   uint16_t *vram = gpu.vram;
-  int stride = gpu.screen.hres;
-  int vram_stride = 1024;
-  int vram_mask = 1024 * 512 - 1;
-  int fb_offs, doffs;
-  uint8_t *dest;
-
-  dest = (uint8_t *)screen_buf;
-  if (dest == NULL || w == 0 || stride == 0)
+  int vram_h = 512;
+
+  if (w == 0 || h == 0)
     return;
 
+  check_mode_change(0);
   if (gpu.state.enhancement_active)
-    vram = gpu.get_enhancement_bufer(&x, &y, &w, &h, &stride, &vram_mask);
-
-  fb_offs = y * vram_stride + x;
+    vram = gpu.get_enhancement_bufer(&x, &y, &w, &h, &vram_h);
 
-  // only do centering, at least for now
-  doffs = (stride - w) / 2 & ~1;
-
-  if (gpu.status.rgb24)
-  {
-    if (cbs->only_16bpp) {
-      dest += doffs * 2;
-      for (; h-- > 0; dest += stride * 2, fb_offs += vram_stride)
-      {
-        fb_offs &= vram_mask;
-        bgr888_to_rgb565(dest, vram + fb_offs, w * 3);
-      }
-    }
-    else {
-      dest += (doffs / 8) * 24;
-      for (; h-- > 0; dest += stride * 3, fb_offs += vram_stride)
-      {
-        fb_offs &= vram_mask;
-        bgr888_to_rgb888(dest, vram + fb_offs, w * 3);
-      }
-    }
-  }
-  else
-  {
-    dest += doffs * 2;
-    for (; h-- > 0; dest += stride * 2, fb_offs += vram_stride)
-    {
-      fb_offs &= vram_mask;
-      bgr555_to_rgb565(dest, vram + fb_offs, w * 2);
+  if (y + h > vram_h) {
+    if (y + h - vram_h > h / 2) {
+      // wrap
+      y = 0;
+      h -= vram_h - y;
     }
+    else
+      // clip
+      h = vram_h - y;
   }
 
-  screen_buf = cbs->pl_vout_flip();
-}
+  vram += y * 1024 + x;
 
-void vout_update(void)
-{
-  check_mode_change();
-  if (cbs->pl_vout_raw_flip)
-    cbs->pl_vout_raw_flip(gpu.screen.x, gpu.screen.y);
-  else
-    blit();
+  cbs->pl_vout_flip(vram, 1024, gpu.status.rgb24, w, h);
 }
 
 void vout_blank(void)
 {
-  if (cbs->pl_vout_raw_flip == NULL) {
-    int w = gpu.screen.hres;
-    int h = gpu.screen.h;
-    int bytespp = gpu.status.rgb24 ? 3 : 2;
-    if (gpu.state.enhancement_active) {
-      w *= 2;
-      h *= 2;
-    }
-    memset(screen_buf, 0, w * h * bytespp);
-    screen_buf = cbs->pl_vout_flip();
+  int w = gpu.screen.hres;
+  int h = gpu.screen.h;
+  if (gpu.state.enhancement_active) {
+    w *= 2;
+    h *= 2;
   }
+  cbs->pl_vout_flip(NULL, 1024, gpu.status.rgb24, w, h);
 }
 
 long GPUopen(void **unused)
@@ -141,7 +102,7 @@ long GPUopen(void **unused)
   gpu.frameskip.frame_ready = 1;
 
   cbs->pl_vout_open();
-  screen_buf = cbs->pl_vout_flip();
+  check_mode_change(1);
   return 0;
 }