gpu_neon: new intrinsics-only implementation
[pcsx_rearmed.git] / plugins / gpu_neon / psx_gpu / psx_gpu_4x.c
index 85e972c..942b3d3 100644 (file)
@@ -2,7 +2,21 @@
   ((psx_gpu)->enhancement_buf_ptr + \\r
    ((psx_gpu)->enhancement_buf_by_x16[(x) / 16] << 20))\r
 \r
-#ifndef NEON_BUILD\r
+#if !defined(NEON_BUILD) || defined(SIMD_BUILD)\r
+\r
+#ifndef zip_4x32b\r
+\r
+#define vector_cast(vec_to, source) source\r
+\r
+#define zip_4x32b(dest, source_a, source_b) {                                  \\r
+  u32 _i; for(_i = 0; _i < 4; _i++) {                                          \\r
+    (dest).e[_i * 2 + 0] = (source_a).e[_i];                                   \\r
+    (dest).e[_i * 2 + 1] = (source_b).e[_i];                                   \\r
+  }                                                                            \\r
+}\r
+\r
+#endif\r
+\r
 void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,\r
  s32 v, s32 width, s32 height, u32 color)\r
 {\r
@@ -56,7 +70,8 @@ void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
       texture_block_ptr =\r
        texture_page_ptr + (texture_offset_base & texture_mask);\r
 \r
-      load_128b(texels, texture_block_ptr);\r
+      //load_128b(texels, texture_block_ptr);\r
+      texels = *(vec_8x16u *)texture_block_ptr;\r
       \r
       zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low);\r
       block->texels = texels_wide;\r
@@ -117,7 +132,8 @@ void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
 \r
       texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);\r
       \r
-      load_128b(texels, texture_block_ptr);\r
+      //load_128b(texels, texture_block_ptr);\r
+      texels = *(vec_8x16u *)texture_block_ptr;\r
 \r
       zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low);\r
       block->texels = texels_wide;\r
@@ -147,7 +163,8 @@ void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
       while(blocks_remaining)\r
       {\r
         texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);\r
-        load_128b(texels, texture_block_ptr);\r
+        //load_128b(texels, texture_block_ptr);\r
+        texels = *(vec_8x16u *)texture_block_ptr;\r
 \r
         zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low);\r
         block->texels = texels_wide;\r
@@ -178,7 +195,8 @@ void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
       }\r
 \r
       texture_block_ptr = texture_page_ptr + (texture_offset & texture_mask);\r
-      load_128b(texels, texture_block_ptr);\r
+      //load_128b(texels, texture_block_ptr);\r
+      texels = *(vec_8x16u *)texture_block_ptr;\r
       \r
       zip_4x32b(vector_cast(vec_4x32u, texels_wide), texels.low, texels.low);\r
       block->texels = texels_wide;\r