tune the preloads a bit master github/master
authornotaz <notasas@gmail.com>
Mon, 30 May 2016 00:24:11 +0000 (03:24 +0300)
committernotaz <notasas@gmail.com>
Mon, 30 May 2016 00:24:11 +0000 (03:24 +0300)
src/video/SDL_blit_A.c
src/video/SDL_blit_neon.S

index b013ed3..504451b 100644 (file)
@@ -77,6 +77,7 @@ static void name(SDL_BlitInfo *info) \
        int dststride = width * dstBpp + info->d_skip; \
 \
        while ( height-- ) { \
        int dststride = width * dstBpp + info->d_skip; \
 \
        while ( height-- ) { \
+           __builtin_prefetch(dst + dststride); \
            neon_name(dst, src, width); \
            src += srcstride; \
            dst += dststride; \
            neon_name(dst, src, width); \
            src += srcstride; \
            dst += dststride; \
index 979bb2a..dcbfd74 100644 (file)
     vdup.i16   q12, r12
 
 0:
     vdup.i16   q12, r12
 
 0:
-    pld        [r1, #64*2]
     cmp        r2, #8
     cmp        r2, #8
-    pld        [r0, #64*2]
     blt        3f
 1:
     vld4.8     {d4-d7}, [r1]!
     blt        3f
 1:
     vld4.8     {d4-d7}, [r1]!
+    pld        [r1, #64*2]
     vld4.8     {d0-d3}, [r0]
     vld4.8     {d0-d3}, [r0]
+    pld        [r0, #64+32]
 2:
 .if \bgr2rgb
     vswp       d4, d6          @ BGR->RGB
 2:
 .if \bgr2rgb
     vswp       d4, d6          @ BGR->RGB
@@ -164,13 +164,13 @@ do_argb_finish:
 .endif
     vdup.i16   q12, r12
 0:
 .endif
     vdup.i16   q12, r12
 0:
-    pld        [r1, #64*2]
     cmp        r2, #8
     cmp        r2, #8
-    pld        [r0, #64*2]
     blt        3f
 1:
     vld4.8     {d4-d7}, [r1]!
     blt        3f
 1:
     vld4.8     {d4-d7}, [r1]!
+    pld        [r1, #64*2]
     vld2.8     {d1-d2}, [r0]
     vld2.8     {d1-d2}, [r0]
+    pld        [r0, #64+32]
 .if \bgr2rgb
     vswp       d4, d6          @ BGR->RGB
 .endif
 .if \bgr2rgb
     vswp       d4, d6          @ BGR->RGB
 .endif