psx_gpu: improve fills
authorExophase <exophase@gmail.com>
Sun, 1 Jan 2012 21:07:31 +0000 (23:07 +0200)
committernotaz <notasas@gmail.com>
Sun, 1 Jan 2012 21:07:31 +0000 (23:07 +0200)
plugins/gpu_neon/psx_gpu/psx_gpu.c
plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
plugins/gpu_neon/psx_gpu/psx_gpu_main.c
plugins/gpu_neon/psx_gpu/psx_gpu_parse.c

index 75deb30..76bfb15 100644 (file)
@@ -4352,8 +4352,8 @@ void render_block_copy(psx_gpu_struct *psx_gpu, u16 *source, u32 x, u32 y,
  u32 width, u32 height, u32 pitch)
 {
   u16 *vram_ptr = psx_gpu->vram_ptr + x + (y * 1024);
-  u32 mask_msb = psx_gpu->mask_msb;
   u32 draw_x, draw_y;
+  u32 mask_msb = psx_gpu->mask_msb;
 
   if((width == 0) || (height == 0))
     return;
index 0dc7ece..973a8b3 100644 (file)
@@ -4363,9 +4363,11 @@ function(warmup)
 #define num_unaligned                                     r2
 #define num_width                                         r6
 
-#undef colors
+#undef colors_a
+#undef colors_b
 
-#define colors                                            q0
+#define colors_a                                          q0
+#define colors_b                                          q1
 
 .align 3
 
@@ -4386,51 +4388,26 @@ function(render_block_fill_body)
   orr color, color_r, color_g, lsl #5
 
   orr color, color, color_b, lsl #10
-  add left_unaligned, x, #0x7
+  vdup.u16 colors_a, color
 
-  bic left_unaligned, left_unaligned, #0x7
-  vdup.u16 colors, color
-
-  sub left_unaligned, left_unaligned, x
+  vmov colors_b, colors_a
   mov pitch, #2048
-
   sub pitch, pitch, width, lsl #1
-  sub width, width, left_unaligned
-
-  and right_unaligned, width, #0x7
-  bic width, width, #0x7
 
  0:
-  mov num_width, width, lsr #3
-
-  movs num_unaligned, left_unaligned
-  beq 2f
+  mov num_width, width, lsr #4
 
  1:
-  strh color, [ vram_ptr ], #2
-
-  subs num_unaligned, num_unaligned, #1
-  bne 1b
+  vst1.u32 { colors_a, colors_b }, [ vram_ptr, :128 ]!
 
- 2:
-  vst1.u32 { colors }, [ vram_ptr, :128 ]!
   subs num_width, num_width, #1
-  bne 2b
-
-  movs num_unaligned, right_unaligned
-  beq 4f
-
- 3:
-  strh color, [ vram_ptr ], #2
-  
-  subs num_unaligned, num_unaligned, #1
-  bne 3b
+  bne 1b
 
- 4:
   add vram_ptr, vram_ptr, pitch
   subs height, height, #1
   bne 0b
-  
+ 1: 
   ldmia sp!, { r4 - r6, pc }
 
 
index f1f7944..6c17b0a 100644 (file)
@@ -174,7 +174,7 @@ int main(int argc, char *argv[])
    MAP_SHARED | 0xA0000000, fbdev_handle, 0));
   vram_ptr += 64;
 
-  initialize_psx_gpu(psx_gpu, vram_ptr + 64);
+  initialize_psx_gpu(psx_gpu, vram_ptr);
 #else
   initialize_psx_gpu(psx_gpu, _vram + 64);
 #endif
@@ -204,14 +204,15 @@ int main(int argc, char *argv[])
   gpu_parse(psx_gpu, list, size);
   flush_render_block_buffer(psx_gpu);
 
-  printf("%-64s: ", argv[1]);
 #ifdef NEON_BUILD
   u32 cycles_elapsed = get_counter() - cycles;
 
-  printf("%d\n", cycles_elapsed);
+  printf("%-64s: %d\n", argv[1], cycles_elapsed);
+#else
+  printf("%-64s: ", argv[1]);
 #endif
 
-#if 0
+#if 1
   u32 i;
 
   for(i = 0; i < 1024 * 512; i++)
@@ -238,7 +239,7 @@ int main(int argc, char *argv[])
   }
 #endif
 
-#if 1
+#if 0
   printf("\n");
   printf("  %d pixels, %d pixel blocks, %d spans\n"
    "   (%lf pixels per block, %lf pixels per span),\n"
index 7fee2eb..4f3dd12 100644 (file)
@@ -210,16 +210,61 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)
        s16 *list_s16 = (void *)list;
        current_command = *list >> 24;
        command_length = command_lengths[current_command];
-  
+
        switch(current_command)
        {
                case 0x00:
                        break;
   
                case 0x02:
-        render_block_fill(psx_gpu, list[0] & 0xFFFFFF, list_s16[2] & 0x3FF,
-         list_s16[3] & 0x1FF, list_s16[4] & 0x3FF, list_s16[5] & 0x1FF);
+      {
+        u32 x = list_s16[2] & 0x3FF;
+        u32 y = list_s16[3] & 0x1FF;
+        u32 width = list_s16[4] & 0x3FF;
+        u32 height = list_s16[5] & 0x1FF;
+        u32 color = list[0] & 0xFFFFFF;
+
+        x &= ~0xF;
+        width = ((width + 0xF) & ~0xF);
+
+        if((x + width) > 1024)
+        {
+          u32 width_a = 1024 - x;
+          u32 width_b = width - width_a;
+
+          if((y + height) > 512)
+          {
+            u32 height_a = 512 - y;
+            u32 height_b = height - height_a;
+
+            render_block_fill(psx_gpu, color, x, y, width_a, height_a);
+            render_block_fill(psx_gpu, color, 0, y, width_b, height_a);
+            render_block_fill(psx_gpu, color, x, 0, width_a, height_b);
+            render_block_fill(psx_gpu, color, 0, 0, width_b, height_b);
+          }
+          else
+          {
+            render_block_fill(psx_gpu, color, x, y, width_a, height);
+            render_block_fill(psx_gpu, color, 0, y, width_b, height);
+          }
+        }
+        else
+        {
+          if((y + height) > 512)
+          {
+            u32 height_a = 512 - y;
+            u32 height_b = height - height_a;
+
+            render_block_fill(psx_gpu, color, x, y, width, height_a);
+            render_block_fill(psx_gpu, color, x, 0, width, height_b);
+          }
+          else
+          {
+            render_block_fill(psx_gpu, color, x, y, width, height);
+          }
+        }
                        break;
+      }
   
                case 0x20 ... 0x23:
       {
@@ -567,6 +612,7 @@ void gpu_parse(psx_gpu_struct *psx_gpu, u32 *list, u32 size)
   
                case 0xE1:
         set_texture(psx_gpu, list[0] & 0x1FF);
+
         if(list[0] & (1 << 9))
           psx_gpu->render_state_base |= RENDER_STATE_DITHER;
         else