frontend: remove src alignment requirements in asm
authornotaz <notasas@gmail.com>
Sun, 21 Nov 2021 14:44:35 +0000 (16:44 +0200)
committernotaz <notasas@gmail.com>
Sun, 21 Nov 2021 14:44:35 +0000 (16:44 +0200)
no measurable perf improvement seen just from :64 alignment both on
cortex-a8 and cortex-a72, and Psybadek uses unaligned vram location.

frontend/cspace_neon.S
frontend/plat_pollux.c
frontend/plugin_lib.c
plugins/gpulib/vout_pl.c

index 56ab304..4cb3d4c 100644 (file)
@@ -183,8 +183,8 @@ FUNCTION(bgr888_to_rgb888): @ dst, src, bytes
     umull       r12,r2, r3, r2
 0:
     pld         [r1, #48*3]
     umull       r12,r2, r3, r2
 0:
     pld         [r1, #48*3]
-    vld3.8      {d0-d2}, [r1, :64]!
-    vld3.8      {d3-d5}, [r1, :64]!
+    vld3.8      {d0-d2}, [r1]!
+    vld3.8      {d3-d5}, [r1]!
     vswp        d0, d2
     vswp        d3, d5
     vst3.8      {d0-d2}, [r0, :64]!
     vswp        d0, d2
     vswp        d3, d5
     vst3.8      {d0-d2}, [r0, :64]!
@@ -207,8 +207,8 @@ FUNCTION(bgr888_to_rgb565): @ dst, src, bytes
     vdup.16     q15, r3
 0:
     pld         [r1, #48*3]
     vdup.16     q15, r3
 0:
     pld         [r1, #48*3]
-    vld3.8      {d1-d3}, [r1, :64]!
-    vld3.8      {d5-d7}, [r1, :64]!
+    vld3.8      {d1-d3}, [r1]!
+    vld3.8      {d5-d7}, [r1]!
 
     vshll.u8    q8, d2, #3      @ g
     vshll.u8    q9, d6, #3
 
     vshll.u8    q8, d2, #3      @ g
     vshll.u8    q9, d6, #3
index 18b8053..326a40f 100644 (file)
@@ -309,6 +309,7 @@ static void name(int doffs, const void *vram_, int w, int h, int sstride, int bg
         int i;                                                                          \
                                                                                         \
         vram += psx_offset_y * 1024 + psx_offset_x;                                     \
         int i;                                                                          \
                                                                                         \
         vram += psx_offset_y * 1024 + psx_offset_x;                                     \
+        vram = (void *)((long)vram & ~3);                                               \
         for (i = psx_src_height; i > 0; i--, vram += psx_step * 1024, dst += dst_stride)\
                 blitfunc(dst, vram, len);                                               \
 }
         for (i = psx_src_height; i > 0; i--, vram += psx_step * 1024, dst += dst_stride)\
                 blitfunc(dst, vram, len);                                               \
 }
index ab4d415..c6a2bf0 100644 (file)
@@ -396,6 +396,8 @@ static void pl_vout_flip(const void *vram, int stride, int bgr24, int w, int h)
 #endif
        else
        {
 #endif
        else
        {
+               src = (void *)((uintptr_t)src & ~3); // align for the blitter
+
                for (; h1-- > 0; dest += dstride * 2, src += stride)
                {
                        bgr555_to_rgb565(dest, src, w * 2);
                for (; h1-- > 0; dest += dstride * 2, src += stride)
                {
                        bgr555_to_rgb565(dest, src, w * 2);
index a9437cb..d1fdefb 100644 (file)
@@ -55,7 +55,7 @@ static void check_mode_change(int force)
 
 void vout_update(void)
 {
 
 void vout_update(void)
 {
-  int x = gpu.screen.x & ~1; // alignment needed by blitter
+  int x = gpu.screen.x;
   int y = gpu.screen.y;
   int w = gpu.screen.w;
   int h = gpu.screen.h;
   int y = gpu.screen.y;
   int w = gpu.screen.w;
   int h = gpu.screen.h;