psx_gpu: change fill handling again
authorExophase <exophase@gmail.com>
Wed, 4 Jan 2012 01:01:19 +0000 (03:01 +0200)
committernotaz <notasas@gmail.com>
Wed, 4 Jan 2012 16:10:41 +0000 (18:10 +0200)
plugins/gpu_neon/psx_gpu/psx_gpu.c
plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S

index 76bfb15..7c1503b 100644 (file)
@@ -4322,30 +4322,48 @@ void render_block_fill(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y,
   flush_render_block_buffer(psx_gpu);
   invalidate_texture_cache_region(psx_gpu, x, y, x + width - 1, y + height - 1);
 
-#ifndef NEON_BUILD
   u32 r = color & 0xFF;
   u32 g = (color >> 8) & 0xFF;
   u32 b = (color >> 16) & 0xFF;
-  u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10);
+  u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10) |
+   psx_gpu->mask_msb;
+  u32 color_32bpp = color_16bpp | (color_16bpp << 16);
 
-  u16 *vram_ptr = psx_gpu->vram_ptr + x + (y * 1024);
-  u32 draw_x, draw_y;
+  u32 *vram_ptr = (u32 *)(psx_gpu->vram_ptr + x + (y * 1024));
 
-  for(draw_y = 0; draw_y < height; draw_y++)
+  u32 pitch = 512 - (width / 2);
+  u32 num_width;
+
+  if(psx_gpu->interlace_mode & RENDER_INTERLACE_ENABLED)
   {
-    for(draw_x = 0; draw_x < width; draw_x++)
+    pitch += 512;
+    height /= 2;
+
+    if(psx_gpu->interlace_mode & RENDER_INTERLACE_ODD)
+      vram_ptr += 512; 
+  }
+
+  while(height)
+  {
+    num_width = width;
+    while(num_width)
     {
-      vram_ptr[draw_x] = color_16bpp;
+      vram_ptr[0] = color_32bpp;
+      vram_ptr[1] = color_32bpp;
+      vram_ptr[2] = color_32bpp;
+      vram_ptr[3] = color_32bpp;
+      vram_ptr[4] = color_32bpp;
+      vram_ptr[5] = color_32bpp;
+      vram_ptr[6] = color_32bpp;
+      vram_ptr[7] = color_32bpp;
+
+      vram_ptr += 8;
+      num_width -= 16;
     }
 
-    vram_ptr += 1024;
+    vram_ptr += pitch;
+    height--;
   }
-#else
-  void render_block_fill_body(psx_gpu_struct *psx_gpu, u32 color, u32 x, u32 y,
-   u32 width, u32 height);
-
-  render_block_fill_body(psx_gpu, color, x, y, width, height);
-#endif
 }
 
 void render_block_copy(psx_gpu_struct *psx_gpu, u16 *source, u32 x, u32 y,
index 973a8b3..11a11b1 100644 (file)
@@ -4337,31 +4337,20 @@ function(warmup)
 
   bx lr
 
+#undef vram_ptr
 #undef color
-#undef y
+#undef width
 #undef height
-
-#define psx_gpu                                           r0
-#define color                                             r1
-#define x                                                 r2
-#define y                                                 r3
+#undef pitch
 
 #define vram_ptr                                          r0
-#define width                                             r3
-#define height                                            r12
-
-#define parameter_width_offset                            0
-#define parameter_height_offset                           4
+#define color                                             r1
+#define width                                             r2
+#define height                                            r3
 
-#define color_r                                           r14
-#define color_g                                           r4
-#define color_b                                           r5  
+#define pitch                                             r1
 
-#define left_unaligned                                    r14
-#define right_unaligned                                   r4
-#define pitch                                             r5
-#define num_unaligned                                     r2
-#define num_width                                         r6
+#define num_width                                         r12
 
 #undef colors_a
 #undef colors_b
@@ -4372,44 +4361,28 @@ function(warmup)
 .align 3
 
 function(render_block_fill_body)
-  ldr vram_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ]
-  ldr height, [ sp, #parameter_height_offset ]
-
-  add vram_ptr, vram_ptr, y, lsl #11
-  ldr width, [ sp, #parameter_width_offset ]
-
-  add vram_ptr, vram_ptr, x, lsl #1
-  stmdb sp!, { r4 - r6, r14 }
-
-  ubfx color_r, color, #3, #5
-  ubfx color_g, color, #11, #5
-
-  ubfx color_b, color, #19, #5
-  orr color, color_r, color_g, lsl #5
-
-  orr color, color, color_b, lsl #10
   vdup.u16 colors_a, color
+  mov pitch, #2048
 
   vmov colors_b, colors_a
-  mov pitch, #2048
   sub pitch, pitch, width, lsl #1
 
- 0:
-  mov num_width, width, lsr #4
+  mov num_width, width
 
- 1:
-  vst1.u32 { colors_a, colors_b }, [ vram_ptr, :128 ]!
+ 0:  
+  vst1.u32 { colors_a, colors_b }, [ vram_ptr, :256 ]!
 
-  subs num_width, num_width, #1
-  bne 1b
+  subs num_width, num_width, #2
+  bne 0b
 
   add vram_ptr, vram_ptr, pitch
+  mov num_width, width
+
   subs height, height, #1
   bne 0b
- 1: 
-  ldmia sp!, { r4 - r6, pc }
 
+  bx lr
 
 #undef x
 #undef y