From 4a96d0a990781fece6598561a79757cfc00d6c0d Mon Sep 17 00:00:00 2001
From: notaz <notasas@gmail.com>
Date: Mon, 13 Nov 2023 21:04:37 +0200
Subject: [PATCH] gpu_neon: enforce alignment required for asm

libretro/pcsx_rearmed#798
---
 plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S | 1 +
 plugins/gpu_neon/psx_gpu_if.c               | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
index f0ba39f3..ffbea043 100644
--- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
@@ -6129,6 +6129,7 @@ function(scale2x_tiles8)
   mov r14, r2
 
 0:
+  pld [r1, #1024*2]
   vld1.u16 { q0 }, [r1, :128]!
   vld1.u16 { q2 }, [r1, :128]!
   vmov q1, q0
diff --git a/plugins/gpu_neon/psx_gpu_if.c b/plugins/gpu_neon/psx_gpu_if.c
index 3dab827c..429b327b 100644
--- a/plugins/gpu_neon/psx_gpu_if.c
+++ b/plugins/gpu_neon/psx_gpu_if.c
@@ -142,6 +142,9 @@ sync_enhancement_buffers(int x, int y, int w, int h)
     x2 = min(right, s->x + s_w);
     y1 = max(y, s->y);
     y2 = min(bottom, s->y + s_h);
+    // 16-byte align for the asm version
+    x2 += x1 & 7;
+    x1 &= ~7;
     scale2x_tiles8(dst + y1 * 1024*2 + x1 * 2,
         src + y1 * 1024 + x1, (x2 - x1 + 7) / 8u, y2 - y1);
   }
-- 
2.39.5