From: notaz <notasas@gmail.com>
Date: Sat, 29 Mar 2025 23:27:32 +0000 (+0200)
Subject: implement scanout wrapping
X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=cdc1d78f3e3e22c45e8461368b3aa58fce23c09c;p=pcsx_rearmed.git

implement scanout wrapping

... somewhat at least
libretro/pcsx_rearmed#884
---

diff --git a/frontend/libretro.c b/frontend/libretro.c
index 03db4b40..a5caf79b 100644
--- a/frontend/libretro.c
+++ b/frontend/libretro.c
@@ -371,16 +371,16 @@ static void CrosshairDimensions(int port, struct CrosshairInfo *info) {
    info->size_y = psx_h * (pl_rearmed_cbs.gpu_neon.enhancement_enable ? 2 : 1) * (4.0f / 3.0f) / 40.0f;
 }
 
-static void vout_flip(const void *vram, int stride, int bgr24,
+static void vout_flip(const void *vram_, int vram_ofs, int bgr24,
       int x, int y, int w, int h, int dims_changed)
 {
    int bytes_pp = (current_fmt == RETRO_PIXEL_FORMAT_XRGB8888) ? 4 : 2;
    int bytes_pp_s = bgr24 ? 3 : 2;
    bgr_to_fb_func *bgr_to_fb = g_bgr_to_fb;
    unsigned char *dest = vout_buf_ptr;
-   const unsigned short *src = vram;
+   const unsigned char *vram = vram_;
    int dstride = vout_pitch_b, h1 = h;
-   int port = 0;
+   int port = 0, hwrapped;
 
    if (vram == NULL || dims_changed || (in_enable_crosshair[0] + in_enable_crosshair[1]) > 0)
    {
@@ -398,8 +398,22 @@ static void vout_flip(const void *vram, int stride, int bgr24,
 
    dest += x * bytes_pp + y * dstride;
 
-   for (; h1-- > 0; dest += dstride, src += stride)
-      bgr_to_fb(dest, src, w * bytes_pp_s);
+   for (; h1-- > 0; dest += dstride) {
+      bgr_to_fb(dest, vram + vram_ofs, w * bytes_pp_s);
+      vram_ofs = (vram_ofs + 2048) & 0xfffff;
+   }
+
+   hwrapped = (vram_ofs & 2047) + w * bytes_pp_s - 2048;
+   if (hwrapped > 0) {
+      // this is super-rare so just fix-up
+      vram_ofs = (vram_ofs - h * 2048) & 0xff800;
+      dest -= dstride * h;
+      dest += (w - hwrapped / bytes_pp_s) * bytes_pp;
+      for (h1 = h; h1-- > 0; dest += dstride) {
+         bgr_to_fb(dest, vram + vram_ofs, hwrapped);
+         vram_ofs = (vram_ofs + 2048) & 0xfffff;
+      }
+   }
 
    if (current_fmt == RETRO_PIXEL_FORMAT_RGB565)
    for (port = 0; port < 2; port++) {
diff --git a/frontend/plugin_lib.c b/frontend/plugin_lib.c
index cacaf943..7efcf46c 100644
--- a/frontend/plugin_lib.c
+++ b/frontend/plugin_lib.c
@@ -311,14 +311,15 @@ void pl_force_clear(void)
 	flip_clear_counter = 2;
 }
 
-static void pl_vout_flip(const void *vram, int stride, int bgr24,
+static void pl_vout_flip(const void *vram_, int vram_ofs, int bgr24,
 	int x, int y, int w, int h, int dims_changed)
 {
 	unsigned char *dest = pl_vout_buf;
-	const unsigned short *src = vram;
+	const unsigned char *vram = vram_;
 	int dstride = pl_vout_w, h1 = h;
 	int h_full = pl_vout_h - pl_vout_yoffset;
 	int xoffs = 0, doffs;
+	int hwrapped;
 
 	pcnt_start(PCNT_BLIT);
 
@@ -353,7 +354,7 @@ static void pl_vout_flip(const void *vram, int stride, int bgr24,
 
 	if (pl_plat_blit)
 	{
-		pl_plat_blit(doffs, src, w, h, stride, bgr24);
+		pl_plat_blit(doffs, vram + vram_ofs, w, h, 1024, bgr24);
 		goto out_hud;
 	}
 
@@ -364,58 +365,93 @@ static void pl_vout_flip(const void *vram, int stride, int bgr24,
 
 	if (bgr24)
 	{
+		hwrapped = (vram_ofs & 2047) + w * 3 - 2048;
 		if (pl_rearmed_cbs.only_16bpp) {
-			for (; h1-- > 0; dest += dstride * 2, src += stride)
-			{
-				bgr888_to_rgb565(dest, src, w * 3);
+			for (; h1-- > 0; dest += dstride * 2) {
+				bgr888_to_rgb565(dest, vram + vram_ofs, w * 3);
+				vram_ofs = (vram_ofs + 2048) & 0xfffff;
+			}
+
+			if (hwrapped > 0) {
+				// this is super-rare so just fix-up
+				vram_ofs = (vram_ofs - h * 2048) & 0xff800;
+				dest -= dstride * 2 * h;
+				dest += (w - hwrapped / 3) * 2;
+				for (h1 = h; h1-- > 0; dest += dstride * 2) {
+					bgr888_to_rgb565(dest, vram + vram_ofs, hwrapped);
+					vram_ofs = (vram_ofs + 2048) & 0xfffff;
+				}
 			}
 		}
 		else {
 			dest -= doffs * 2;
 			dest += (doffs / 8) * 24;
 
-			for (; h1-- > 0; dest += dstride * 3, src += stride)
-			{
-				bgr888_to_rgb888(dest, src, w * 3);
+			for (; h1-- > 0; dest += dstride * 3) {
+				bgr888_to_rgb888(dest, vram + vram_ofs, w * 3);
+				vram_ofs = (vram_ofs + 2048) & 0xfffff;
+			}
+
+			if (hwrapped > 0) {
+				vram_ofs = (vram_ofs - h * 2048) & 0xff800;
+				dest -= dstride * 3 * h;
+				dest += w * 3 - hwrapped;
+				for (h1 = h; h1-- > 0; dest += dstride * 3) {
+					bgr888_to_rgb888(dest, vram + vram_ofs, hwrapped);
+					vram_ofs = (vram_ofs + 2048) & 0xfffff;
+				}
 			}
 		}
 	}
 #ifdef HAVE_NEON32
 	else if (soft_filter == SOFT_FILTER_SCALE2X && pl_vout_scale_w == 2)
 	{
-		neon_scale2x_16_16(src, (void *)dest, w,
-			stride * 2, dstride * 2, h);
+		neon_scale2x_16_16((const void *)(vram + vram_ofs), (void *)dest, w,
+			2048, dstride * 2, h);
 	}
 	else if (soft_filter == SOFT_FILTER_EAGLE2X && pl_vout_scale_w == 2)
 	{
-		neon_eagle2x_16_16(src, (void *)dest, w,
-			stride * 2, dstride * 2, h);
+		neon_eagle2x_16_16((const void *)(vram + vram_ofs), (void *)dest, w,
+			2048, dstride * 2, h);
 	}
 	else if (scanlines != 0 && scanline_level != 100)
 	{
 		int h2, l = scanline_level * 2048 / 100;
-		int stride_0 = pl_vout_scale_h >= 2 ? 0 : stride;
+		int stride_0 = pl_vout_scale_h >= 2 ? 0 : 2048;
 
 		h1 *= pl_vout_scale_h;
 		while (h1 > 0)
 		{
 			for (h2 = scanlines; h2 > 0 && h1 > 0; h2--, h1--) {
-				bgr555_to_rgb565(dest, src, w * 2);
-				dest += dstride * 2, src += stride_0;
+				bgr555_to_rgb565(dest, vram + vram_ofs, w * 2);
+				vram_ofs = (vram_ofs + stride_0) & 0xfffff;
+				dest += dstride * 2;
 			}
 
 			for (h2 = scanlines; h2 > 0 && h1 > 0; h2--, h1--) {
-				bgr555_to_rgb565_b(dest, src, w * 2, l);
-				dest += dstride * 2, src += stride;
+				bgr555_to_rgb565_b(dest, vram + vram_ofs, w * 2, l);
+				vram_ofs = (vram_ofs + 2048) & 0xfffff;
+				dest += dstride * 2;
 			}
 		}
 	}
 #endif
 	else
 	{
-		for (; h1-- > 0; dest += dstride * 2, src += stride)
-		{
-			bgr555_to_rgb565(dest, src, w * 2);
+		for (; h1-- > 0; dest += dstride * 2) {
+			bgr555_to_rgb565(dest, vram + vram_ofs, w * 2);
+			vram_ofs = (vram_ofs + 2048) & 0xfffff;
+		}
+
+		hwrapped = (vram_ofs & 2047) + w * 2 - 2048;
+		if (hwrapped > 0) {
+			vram_ofs = (vram_ofs - h * 2048) & 0xff800;
+			dest -= dstride * 2 * h;
+			dest += w * 2 - hwrapped;
+			for (h1 = h; h1-- > 0; dest += dstride * 2) {
+				bgr555_to_rgb565(dest, vram + vram_ofs, hwrapped);
+				vram_ofs = (vram_ofs + 2048) & 0xfffff;
+			}
 		}
 	}
 
diff --git a/frontend/plugin_lib.h b/frontend/plugin_lib.h
index ef63cc21..855c716d 100644
--- a/frontend/plugin_lib.h
+++ b/frontend/plugin_lib.h
@@ -54,7 +54,7 @@ struct rearmed_cbs {
 	void  (*pl_get_layer_pos)(int *x, int *y, int *w, int *h);
 	int   (*pl_vout_open)(void);
 	void  (*pl_vout_set_mode)(int w, int h, int raw_w, int raw_h, int bpp);
-	void  (*pl_vout_flip)(const void *vram, int stride, int bgr24,
+	void  (*pl_vout_flip)(const void *vram, int vram_offset, int bgr24,
 			      int x, int y, int w, int h, int dims_changed);
 	void  (*pl_vout_close)(void);
 	void *(*mmap)(unsigned int size);
diff --git a/plugins/dfxvideo/draw_pl.c b/plugins/dfxvideo/draw_pl.c
index 37dbfff1..f2436c44 100644
--- a/plugins/dfxvideo/draw_pl.c
+++ b/plugins/dfxvideo/draw_pl.c
@@ -32,6 +32,7 @@ static void blit(void)
  // account for centering
  h -= PreviousPSXDisplay.Range.y0;
 
+ #error out of date
  rcbs->pl_vout_flip(srcs, 1024, PSXDisplay.RGB24, w, h);
 }
 
diff --git a/plugins/gpu_unai/old/gpu.cpp b/plugins/gpu_unai/old/gpu.cpp
index 1552bed9..439a6c25 100644
--- a/plugins/gpu_unai/old/gpu.cpp
+++ b/plugins/gpu_unai/old/gpu.cpp
@@ -852,6 +852,7 @@ static void blit(void)
 		cbs->pl_vout_set_mode(w0, h1, w0, h1, isRGB24 ? 24 : 16);
 	}
 
+#error out of date
 	cbs->pl_vout_flip(base, 1024, isRGB24, w0, h1);
 }
 
diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c
index 40682b30..e82cc0e6 100644
--- a/plugins/gpulib/gpu.c
+++ b/plugins/gpulib/gpu.c
@@ -237,7 +237,7 @@ static noinline void get_gpu_info(struct psx_gpu *gpu, uint32_t data)
 	#define VRAM_ALIGN 64
 #endif
 
-// double, for overdraw guard + at least 1 page before
+// double, for overdraw/overscan guard + at least 1 page before
 #define VRAM_SIZE ((1024 * 512 * 2 * 2) + max(VRAM_ALIGN, 4096))
 
 // vram ptr received from mmap/malloc/alloc (will deallocate using this)
diff --git a/plugins/gpulib/vout_pl.c b/plugins/gpulib/vout_pl.c
index f2004825..65e3de53 100644
--- a/plugins/gpulib/vout_pl.c
+++ b/plugins/gpulib/vout_pl.c
@@ -86,6 +86,7 @@ int vout_update(void)
   int h = gpu.screen.h;
   int vram_h = 512;
   int src_x2 = 0;
+  int offset;
 
 #ifdef RAW_FB_DISPLAY
   w = (gpu.status & PSX_GPU_STATUS_RGB24) ? 2048/3 : 1024;
@@ -122,10 +123,10 @@ int vout_update(void)
       h = vram_h - src_y;
   }
 
-  vram += (src_y * 1024 + src_x) * 2;
-  vram += src_x2 * bpp / 8;
+  offset = (src_y * 1024 + src_x) * 2;
+  offset += src_x2 * bpp / 8;
 
-  cbs->pl_vout_flip(vram, 1024, !!(gpu.status & PSX_GPU_STATUS_RGB24),
+  cbs->pl_vout_flip(vram, offset, !!(gpu.status & PSX_GPU_STATUS_RGB24),
       x, y, w, h, gpu.state.dims_changed);
   gpu.state.dims_changed = 0;
   return 1;
@@ -141,7 +142,7 @@ void vout_blank(void)
     w *= 2;
     h *= 2;
   }
-  cbs->pl_vout_flip(NULL, 1024, !!(gpu.status & PSX_GPU_STATUS_RGB24), 0, 0, w, h, 0);
+  cbs->pl_vout_flip(NULL, 0, !!(gpu.status & PSX_GPU_STATUS_RGB24), 0, 0, w, h, 0);
 }
 
 long GPUopen(unsigned long *disp, char *cap, char *cfg)