From f189413eb7a73f42e27184ebf3609d77cf9d13a7 Mon Sep 17 00:00:00 2001 From: notaz Date: Sat, 8 Jul 2023 00:18:08 +0300 Subject: [PATCH] cscpace: fix more alignment issues libretro/pcsx_rearmed#719 --- frontend/blit320.s | 3 +++ frontend/cspace.c | 5 +++-- frontend/cspace_arm.S | 6 ++++++ frontend/cspace_neon.S | 8 +++++--- frontend/plugin_lib.c | 2 -- 5 files changed, 17 insertions(+), 7 deletions(-) diff --git a/frontend/blit320.s b/frontend/blit320.s index 201fdea6..2d50dfd7 100644 --- a/frontend/blit320.s +++ b/frontend/blit320.s @@ -25,6 +25,7 @@ blit320_640: stmfd sp!, {r4-r8,lr} mov r12, #40 + bic r1, r1, #3 0: ldmia r1!, {r2-r8,lr} lhw_str r2, r3 @@ -40,6 +41,7 @@ blit320_640: blit320_512: stmfd sp!, {r4-r8,lr} mov r12, #32 + bic r1, r1, #3 0: ldmia r1!, {r2-r8,lr} lsl r2, #16 @@ -73,6 +75,7 @@ blit320_512: blit320_368: stmfd sp!, {r4-r8,lr} mov r12, #23 + bic r1, r1, #3 0: ldmia r1!, {r2-r8,lr} unaligned_str r2, r3 @ 1,2 diff --git a/frontend/cspace.c b/frontend/cspace.c index 2b528a5a..785b3d13 100644 --- a/frontend/cspace.c +++ b/frontend/cspace.c @@ -8,6 +8,7 @@ * See the COPYING file in the top-level directory. */ +#include #include "cspace.h" /* @@ -30,7 +31,6 @@ || (defined(__GNUC__) && __GNUC__ >= 5)) \ && __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ -#include #include #if defined(__ARM_NEON) || defined(__ARM_NEON__) @@ -93,7 +93,8 @@ void bgr555_to_rgb565(void * __restrict__ dst_, const void * __restrict__ src_, void bgr555_to_rgb565(void *dst_, const void *src_, int bytes) { - const unsigned int *src = src_; + // source can be misaligned, but it's very rare, so just force + const unsigned int *src = (const void *)((intptr_t)src_ & ~3); unsigned int *dst = dst_; unsigned int x, p, r, g, b; diff --git a/frontend/cspace_arm.S b/frontend/cspace_arm.S index 67778da5..177b0858 100644 --- a/frontend/cspace_arm.S +++ b/frontend/cspace_arm.S @@ -34,6 +34,12 @@ FUNCTION(bgr555_to_rgb565): @ void *dst, const void *src, int bytes orr lr, lr, lsl #16 blt 1f + @ src can be unaligned, but that's very rare, so just force it. + @ The manual says unaligned ldm should fault, and it does on + @ cortex-a78's 32bit mode, but curiously on cortex-a8 it just + @ works and loads the data correctly. + bic r1, r1, #3 + 0: ldmia r1!, {r3-r10} subs r2, #4*8 diff --git a/frontend/cspace_neon.S b/frontend/cspace_neon.S index 4928b44a..3a89fdb9 100644 --- a/frontend/cspace_neon.S +++ b/frontend/cspace_neon.S @@ -40,7 +40,8 @@ FUNCTION(bgr555_to_rgb565): @ dst, src, bytes pld [r1, #64*2] @ Pulls 15-bit BGR color values (which are actually 16 bits) into q0-q3. @ example: q0 = 0111 1110 0101 0011 - vldmia r1!, {q0-q3} + vld1.16 {d0-d3}, [r1]! + vld1.16 {d4-d7}, [r1]! @ Shift BGR color 1 bit to the left, discarding MSB and preparing for vbit. @ MSB is used for transparency (not needed here, and can mess with green). @ example: q0 = 1111 1100 1010 0110 @@ -113,7 +114,8 @@ FUNCTION(bgr555_to_rgb565_b): @ dst, src, bytes, int brightness2k // 0-0x0800 vdup.16 q14, r3 0: pld [r1, #64*2] - vldmia r1!, {q0-q3} + vld1.16 {d0-d3}, [r1]! + vld1.16 {d4-d7}, [r1]! vand.u16 q8, q0, q14 vand.u16 q9, q1, q14 vand.u16 q10, q2, q14 @@ -270,4 +272,4 @@ FUNCTION(rgb888_to_rgb565): @ dst, src, bytes bx lr -@ vim:filetype=armasm +@ vim:filetype=armasm:expandtab diff --git a/frontend/plugin_lib.c b/frontend/plugin_lib.c index 171296d2..d5cec766 100644 --- a/frontend/plugin_lib.c +++ b/frontend/plugin_lib.c @@ -402,8 +402,6 @@ static void pl_vout_flip(const void *vram, int stride, int bgr24, int w, int h) #endif else { - src = (void *)((uintptr_t)src & ~3); // align for the blitter - for (; h1-- > 0; dest += dstride * 2, src += stride) { bgr555_to_rgb565(dest, src, w * 2); -- 2.39.2