psx_gpu: implement setup_sprite_untextured in asm
authornotaz <notasas@gmail.com>
Sat, 27 Oct 2012 19:14:16 +0000 (22:14 +0300)
committernotaz <notasas@gmail.com>
Sat, 27 Oct 2012 19:14:16 +0000 (22:14 +0300)
plugins/gpu_neon/psx_gpu/psx_gpu.c
plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S

index ce72af5..485ef27 100644 (file)
@@ -3885,6 +3885,11 @@ void setup_sprite_8bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
 void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v,
  s32 width, s32 height, u32 color);
 
+void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
+ s32 v, s32 width, s32 height, u32 color);
+void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y,
+ s32 u, s32 v, s32 width, s32 height, u32 color);
+
 #ifndef NEON_BUILD
 setup_sprite_tiled_builder(4bpp,);
 setup_sprite_tiled_builder(8bpp,);
@@ -4013,11 +4018,16 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
   }
 }
 
-#endif
-
 void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
  s32 v, s32 width, s32 height, u32 color)
 {
+  if((psx_gpu->render_state & (RENDER_STATE_MASK_EVALUATE |
+   RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND)) == 0)
+  {
+    setup_sprite_untextured_simple(psx_gpu, x, y, u, v, width, height, color);
+    return;
+  }
+
   u32 right_width = ((width - 1) & 0x7) + 1;
   u32 right_mask_bits = (0xFF << right_width);
   u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + x;
@@ -4083,6 +4093,66 @@ void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u,
   }
 }
 
+#endif
+
+void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y,
+ s32 u, s32 v, s32 width, s32 height, u32 color)
+{
+  u32 r = color & 0xFF;
+  u32 g = (color >> 8) & 0xFF;
+  u32 b = (color >> 16) & 0xFF;
+  u32 color_16bpp = (r >> 3) | ((g >> 3) << 5) | ((b >> 3) << 10) |
+   psx_gpu->mask_msb;
+  u32 color_32bpp = color_16bpp | (color_16bpp << 16);
+
+  u16 *vram_ptr16 = psx_gpu->vram_out_ptr + x + (y * 1024);
+  u32 *vram_ptr;
+
+  u32 num_width;
+
+  if(psx_gpu->num_blocks > MAX_BLOCKS)
+  {
+    flush_render_block_buffer(psx_gpu);
+  }
+
+  while(height)
+  {
+    num_width = width;
+
+    vram_ptr = (void *)vram_ptr16;
+    if((long)vram_ptr16 & 2)
+    {
+      *vram_ptr16 = color_32bpp;
+      vram_ptr = (void *)(vram_ptr16 + 1);
+      num_width--;
+    }
+
+    while(num_width >= 4 * 2)
+    {
+      vram_ptr[0] = color_32bpp;
+      vram_ptr[1] = color_32bpp;
+      vram_ptr[2] = color_32bpp;
+      vram_ptr[3] = color_32bpp;
+
+      vram_ptr += 4;
+      num_width -= 4 * 2;
+    }
+
+    while(num_width >= 2)
+    {
+      *vram_ptr++ = color_32bpp;
+      num_width -= 2;
+    }
+
+    if(num_width > 0)
+    {
+      *(u16 *)vram_ptr = color_32bpp;
+    }
+
+    vram_ptr16 += 1024;
+    height--;
+  }
+}
 
 
 #define setup_sprite_blocks_switch_textured(texture_mode)                      \
index 103483a..085e11b 100644 (file)
 #define MAX_BLOCKS                                        64
 #define MAX_BLOCKS_PER_ROW                                128
 
+#define RENDER_STATE_MASK_EVALUATE                        0x20
+#define RENDER_FLAGS_MODULATE_TEXELS                      0x1
+#define RENDER_FLAGS_BLEND                                0x2
+
 #include "psx_gpu_offsets.h"
 
 #define psx_gpu_b_dx_offset (psx_gpu_b_block_span_offset + 4)
@@ -5687,6 +5691,145 @@ function(setup_sprite_16bpp_4x)
   ldmia sp!, { r4 - r11, pc }
 
 
+#undef width
+#undef right_width
+#undef right_mask_bits
+#undef color
+#undef height
+#undef blocks_remaining
+#undef colors
+#undef right_mask
+#undef test_mask
+#undef draw_mask
+
+#define psx_gpu                                           r0
+#define x                                                 r1
+#define y                                                 r2
+#define width                                             r3
+#define right_width                                       r5
+#define right_mask_bits                                   r6
+#define fb_ptr                                            r7
+#define color                                             r8
+#define height                                            r9
+#define fb_ptr_pitch                                      r12
+
+// referenced by setup_sprites_16bpp_flush
+#define num_blocks                                        r4
+#define block                                             r5
+#define block_width                                       r11
+
+#define color_r                                           r1
+#define color_g                                           r2
+#define color_b                                           r8
+#define blocks_remaining                                  r6
+
+#define colors                                            q0
+#define right_mask                                        q1
+#define test_mask                                         q2
+#define draw_mask                                         q2
+#define draw_mask_bits_fb_ptr                             d6
+
+
+.align 3
+
+function(setup_sprite_untextured)
+  ldrh r12, [ psx_gpu, #psx_gpu_render_state_offset ]
+  tst r12, #(RENDER_STATE_MASK_EVALUATE | RENDER_FLAGS_MODULATE_TEXELS         \
+    | RENDER_FLAGS_BLEND)
+  beq setup_sprite_untextured_simple
+
+  stmdb sp!, { r4 - r11, r14 }
+
+  ldr width, [ sp, #40 ]
+  ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]
+
+  ldr height, [ sp, #44 ]
+  add fb_ptr, fb_ptr, y, lsl #11
+
+  add fb_ptr, fb_ptr, x, lsl #1
+  sub right_width, width, #1
+
+  ldr color, [ sp, #48 ]
+  and right_width, #7
+
+  add block_width, width, #7
+  add right_width, #1
+
+  lsr block_width, #3
+  mov right_mask_bits, #0xff
+
+  sub fb_ptr_pitch, block_width, #1
+  lsl right_mask_bits, right_width
+
+  lsl fb_ptr_pitch, #3+1
+  ubfx color_r, color, #3, #5
+
+  rsb fb_ptr_pitch, #1024*2
+  ubfx color_g, color, #11, #5
+
+  vld1.u32 { test_mask }, [ psx_gpu, :128 ]
+  ubfx color_b, color, #19, #5
+
+  vdup.u16 right_mask, right_mask_bits
+  orr color, color_r, color_b, lsl #10
+
+  ldrh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ]
+  orr color, color, color_g, lsl #5
+
+  vtst.u16 right_mask, right_mask, test_mask
+  add block, psx_gpu, #psx_gpu_blocks_offset
+
+  vdup.u16 colors, color
+  add block, block, num_blocks, lsl #6
+
+
+setup_sprite_untextured_height_loop:
+  add num_blocks, block_width
+  sub blocks_remaining, block_width, #1
+
+  cmp num_blocks, #MAX_BLOCKS
+  blgt setup_sprites_16bpp_flush
+
+  cmp blocks_remaining, #0
+  ble 1f
+
+  vmov.u8 draw_mask, #0 /* zero_mask */
+  vmov.u8 draw_mask_bits_fb_ptr, #0
+
+ 0:
+  vst1.u32 { draw_mask }, [ block, :128 ]!
+  subs blocks_remaining, #1
+
+  vst1.u32 { colors }, [ block, :128 ]
+  add block, block, #24
+
+  vmov.u32 draw_mask_bits_fb_ptr[1], fb_ptr
+  vst1.u32 { draw_mask_bits_fb_ptr }, [ block, :64 ]
+  
+  add block, block, #24
+  add fb_ptr, #8*2
+  bgt 0b
+
+ 1:
+  vst1.u32 { right_mask }, [ block, :128 ]!
+  subs height, #1
+
+  vst1.u32 { colors }, [ block, :128 ]
+  add block, block, #24
+
+  vmov.u32 draw_mask_bits_fb_ptr[1], fb_ptr
+  vst1.u32 { draw_mask_bits_fb_ptr }, [ block, :64 ]
+  
+  add block, block, #24
+  add fb_ptr, fb_ptr_pitch
+
+  strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ]
+  bgt setup_sprite_untextured_height_loop
+
+  ldmia sp!, { r4 - r11, pc }
+
+
+
 #undef texture_page_ptr
 #undef vram_ptr
 #undef dirty_textures_mask