psx_gpu: convert to UAL, load everything from context

author notaz <notasas@gmail.com>

Mon, 1 Apr 2013 00:03:52 +0000 (03:03 +0300)

committer notaz <notasas@gmail.com>

Mon, 1 Apr 2013 15:33:15 +0000 (18:33 +0300)
author notaz <notasas@gmail.com>
Mon, 1 Apr 2013 00:03:52 +0000 (03:03 +0300)
committer notaz <notasas@gmail.com>
Mon, 1 Apr 2013 15:33:15 +0000 (18:33 +0300)
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c

index f52e842..e113f06 100644 (file)
--- a/plugins/gpu_neon/psx_gpu/psx_gpu.c
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c
@@ -5056,6 +5056,7 @@ void initialize_psx_gpu(psx_gpu_struct *psx_gpu, u16 *vram)
    memset(psx_gpu->vram_ptr, 0, sizeof(u16) * 1024 * 512);
  
    initialize_reciprocal_table();
+  psx_gpu->reciprocal_table_ptr = reciprocal_table;
  
    //    00 01 10 11
    // 00  0  4  1  5
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h

index 846658c..1eaa99a 100644 (file)
--- a/plugins/gpu_neon/psx_gpu/psx_gpu.h
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h
@@ -180,6 +180,8 @@ typedef struct
    u16 clut_settings;
    u16 texture_settings;
  
+  u32 *reciprocal_table_ptr;
+
    // enhancement stuff
    u16 *enhancement_buf_ptr;
    u16 *enhancement_current_buf_ptr;
@@ -192,7 +194,7 @@ typedef struct
  
    // Align up to 64 byte boundary to keep the upcoming buffers cache line
    // aligned, also make reachable with single immediate addition
-  u8 reserved_a[164];
+  u8 reserved_a[160];
  
    // 8KB
    block_struct blocks[MAX_BLOCKS_PER_ROW];
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S

index d8fb153..8df7aca 100644 (file)
--- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S
@@ -31,6 +31,8 @@
  #define edge_data_right_mask_offset                       4
  #define edge_data_y_offset                                6
  
+.syntax unified
+.text
  
  #define psx_gpu                                           r0
  #define v_a                                               r1
@@ -192,18 +194,6 @@
  
  .align 4
  
-/* FIXME: users of this should be in psx_gpu instead */
-#ifndef __PIC__
-#define load_pointer(register, pointer)                                        \
-  movw register, :lower16:pointer;                                             \
-  movt register, :upper16:pointer;                                             \
-
-#else
-#define load_pointer(register, pointer)                                        \
-  ldr  register, =pointer                                                      \
-
-#endif
-
  #define function(name)                                                         \
    .global name;                                                                \
    name:                                                                        \
@@ -576,7 +566,7 @@ function(compute_all_gradients)
    vld1.32 { uvrg }, [ temp ];                                                  \
    add temp, psx_gpu, #psx_gpu_uvrg_dy_offset;                                  \
    vld1.32 { uvrg_dy }, [ temp ];                                               \
-  load_pointer(reciprocal_table_ptr, reciprocal_table);                        \
+  ldr reciprocal_table_ptr, [ psx_gpu, #psx_gpu_reciprocal_table_ptr_offset ]; \
                                                                                 \
    vmov.u32 c_0x01, #0x01                                                       \
  
@@ -624,7 +614,7 @@ function(compute_all_gradients)
  #define height_b_alt              r12
  
  #define compute_edge_delta_x3(start_c, height_a, height_b)                     \
-  vmov.u32 heights, height_a, height_b;                                        \
+  vmov heights, height_a, height_b;                                            \
    ldr temp, [ reciprocal_table_ptr, height_a, lsl #2 ];                        \
    vmov.u32 edge_shifts[0], temp;                                               \
    ldr temp, [ reciprocal_table_ptr, height_b, lsl #2 ];                        \
@@ -884,7 +874,7 @@ function(compute_all_gradients)
    add temp, temp, #(1 << 16);                                                  \
    add y_a, temp, #2;                                                           \
    add y_a, y_a, #(2 << 16);                                                    \
-  vmov.u32 y_x4, temp, y_a;                                                    \
+  vmov y_x4, temp, y_a;                                                        \
                                                                                 \
    setup_spans_adjust_edges_alternate_##alternate_active(left_index,            \
     right_index);                                                               \
@@ -939,7 +929,7 @@ function(compute_all_gradients)
    sub temp, temp, #(1 << 16);                                                  \
    sub y_a, temp, #2;                                                           \
    sub y_a, y_a, #(2 << 16);                                                    \
-  vmov.u32 y_x4, temp, y_a;                                                    \
+  vmov y_x4, temp, y_a;                                                        \
                                                                                 \
    vaddw.s32 edges_xy, edges_xy, edges_dx_dy;                                   \
                                                                                 \
@@ -970,7 +960,7 @@ function(compute_all_gradients)
    sub height, y_a, y_c;                                                        \
                                                                                 \
    vdup.u32 x_starts, x_a;                                                      \
-  vmov.u32 x_ends, x_c, x_b;                                                   \
+  vmov x_ends, x_c, x_b;                                                       \
                                                                                 \
    compute_edge_delta_x3(x_b, height_major, height_minor_a);                    \
    setup_spans_up(major, minor, minor, yes);                                    \
@@ -982,8 +972,6 @@ function(setup_spans_up_left)
  function(setup_spans_up_right)
    setup_spans_up_up(right, left)
  
-.pool
-
  #define setup_spans_down_down(minor, major)                                    \
    setup_spans_prologue();                                                      \
    sub height_minor_a, y_b, y_a;                                                \
@@ -991,7 +979,7 @@ function(setup_spans_up_right)
    sub height, y_c, y_a;                                                        \
                                                                                 \
    vdup.u32 x_starts, x_a;                                                      \
-  vmov.u32 x_ends, x_c, x_b;                                                   \
+  vmov x_ends, x_c, x_b;                                                       \
                                                                                 \
    compute_edge_delta_x3(x_b, height_major, height_minor_a);                    \
    setup_spans_down(major, minor, minor, yes);                                  \
@@ -1014,7 +1002,7 @@ function(setup_spans_down_right)
  function(setup_spans_up_a)
    setup_spans_prologue()
  
-  vmov.u32 x_starts, x_a, x_b
+  vmov x_starts, x_a, x_b
    vdup.u32 x_ends, x_c
  
    setup_spans_up_flat()
@@ -1023,7 +1011,7 @@ function(setup_spans_up_b)
    setup_spans_prologue()
  
    vdup.u32 x_starts, x_a
-  vmov.u32 x_ends, x_b, x_c
+  vmov x_ends, x_b, x_c
  
    setup_spans_up_flat()
  
@@ -1037,7 +1025,7 @@ function(setup_spans_up_b)
  function(setup_spans_down_a)
    setup_spans_prologue()
  
-  vmov.u32 x_starts, x_a, x_b
+  vmov x_starts, x_a, x_b
    vdup.u32 x_ends, x_c
  
    setup_spans_down_flat()
@@ -1046,7 +1034,7 @@ function(setup_spans_down_b)
    setup_spans_prologue()
  
    vdup.u32 x_starts, x_a
-  vmov.u32 x_ends, x_b, x_c
+  vmov x_ends, x_b, x_c
  
    setup_spans_down_flat()
  
@@ -1077,13 +1065,13 @@ function(setup_spans_up_down)
    sub height_minor_b, y_c, y_a
    sub height_major, y_c, y_b
  
-  vmov.u32 x_starts, x_a, x_c
+  vmov x_starts, x_a, x_c
    vdup.u32 x_ends, x_b
  
    compute_edge_delta_x3(x_a, height_minor_a, height_major)
  
    mov temp, #0
-  vmov.u32 height_increment, temp, height_minor_b
+  vmov height_increment, temp, height_minor_b
    vmlal.s32 edges_xy, edges_dx_dy, height_increment
  
    vmov edges_xy_b_left, edge_alt_low, edge_alt_high
@@ -1120,7 +1108,7 @@ function(setup_spans_up_down)
    sub temp, temp, #(1 << 16)
    sub y_a, temp, #2
    sub y_a, y_a, #(2 << 16)
-  vmov.u32 y_x4, temp, y_a
+  vmov y_x4, temp, y_a
  
    vaddw.s32 edges_xy, edges_xy, edges_dx_dy
  
@@ -1170,7 +1158,7 @@ function(setup_spans_up_down)
    add temp, temp, #(1 << 16) 
    add y_a, temp, #2
    add y_a, y_a, #(2 << 16)
-  vmov.u32 y_x4, temp, y_a
+  vmov y_x4, temp, y_a
  
    setup_spans_adjust_edges_alternate_no(left, right)
  
@@ -1204,8 +1192,6 @@ function(setup_spans_up_down)
    bne 2b
    bal 1b
  
-.pool
-
  #undef span_uvrg_offset
  #undef span_edge_data
  #undef span_b_offset
@@ -1936,7 +1922,7 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_direct)
    vdup.u16 colors, color
  
    add span_edge_data, psx_gpu, #psx_gpu_span_edge_data_offset
-  orr color, color, lsl #16
+  orr color, color, color, lsl #16
  
  
   0:
@@ -1978,7 +1964,7 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_direct)
    moveq right_mask, right_mask, lsr #2
  
    tst right_mask, #0x1
-  streqh color, [ fb_ptr ]
+  strheq color, [ fb_ptr ]
  
   1:
    add span_edge_data, span_edge_data, #8
@@ -2690,7 +2676,7 @@ function(texture_blocks_4bpp)
    orr pixels_a, pixels_a, pixel_3, lsl #24
  
    orr pixels_b, pixels_b, pixel_7, lsl #24
-  vmov.u32 texels, pixels_a, pixels_b
+  vmov texels, pixels_a, pixels_b
  
    vtbl.8 texels_low, { clut_low_a, clut_low_b }, texels
    vtbl.8 texels_high, { clut_high_a, clut_high_b }, texels
@@ -4751,7 +4737,7 @@ setup_sprite_update_texture_8bpp_cache:
    mov fb_ptr_advance_column, #32;                                              \
    vdup.u8 draw_mask_fb_ptr_left, block_masks[0];                               \
                                                                                 \
-  sub fb_ptr_advance_column, height, lsl #11;                                  \
+  sub fb_ptr_advance_column, fb_ptr_advance_column, height, lsl #11;           \
    vdup.u8 draw_mask_fb_ptr_right, block_masks[1]                               \
  
  #define setup_sprite_setup_right_draw_mask_fb_ptr()                            \
@@ -5095,7 +5081,7 @@ setup_sprite_update_texture_8bpp_cache:
    mov fb_ptr_advance_column, #32 * 2;                                          \
    vdup.u8 draw_mask_fb_ptr_left_a, block_masks[0];                             \
    vdup.u8 draw_mask_fb_ptr_left_b, block_masks[1];                             \
-  sub fb_ptr_advance_column, height, lsl #11 + 1;                              \
+  sub fb_ptr_advance_column, fb_ptr_advance_column, height, lsl #11 + 1;       \
    vdup.u8 draw_mask_fb_ptr_right_a, block_masks[2];                            \
    vdup.u8 draw_mask_fb_ptr_right_b, block_masks[3]                             \
  
@@ -5428,7 +5414,7 @@ function(setup_sprite_16bpp)
    add texture_offset_base, u, u
    add width_rounded, width, #7
  
-  add texture_offset_base, v, lsl #11
+  add texture_offset_base, texture_offset_base, v, lsl #11
    mov left_mask_bits, #0xFF
    
    ldrb texture_mask_width, [ psx_gpu, #psx_gpu_texture_mask_width_offset ]
@@ -5443,7 +5429,7 @@ function(setup_sprite_16bpp)
    and right_width, width_rounded, #0x7
    mvn left_mask_bits, left_mask_bits, lsl left_offset
  
-  add texture_mask, texture_mask_height, lsl #11
+  add texture_mask, texture_mask, texture_mask_height, lsl #11
    mov block_width, width_rounded, lsr #3
  
    mov right_mask_bits, right_mask_bits, lsl right_width
@@ -5590,7 +5576,7 @@ function(setup_sprite_16bpp_4x)
    add texture_offset_base, u, u
    add width_rounded, width, #7
  
-  add texture_offset_base, v, lsl #11
+  add texture_offset_base, texture_offset_base, v, lsl #11
    movw left_mask_bits, #0xFFFF
    
    ldrb texture_mask_width, [ psx_gpu, #psx_gpu_texture_mask_width_offset ]
@@ -5609,7 +5595,7 @@ function(setup_sprite_16bpp_4x)
  
    lsl right_width, #1
  
-  add texture_mask, texture_mask_height, lsl #11
+  add texture_mask, texture_mask, texture_mask_height, lsl #11
    mov block_width, width_rounded, lsr #3
  
    mov right_mask_bits, right_mask_bits, lsl right_width
@@ -5760,7 +5746,7 @@ function(setup_sprite_untextured)
    ldrh r12, [ psx_gpu, #psx_gpu_render_state_offset ]
    tst r12, #(RENDER_STATE_MASK_EVALUATE | RENDER_FLAGS_MODULATE_TEXELS         \
      | RENDER_FLAGS_BLEND)
-  ldreqb r12, [ psx_gpu, #psx_gpu_render_mode_offset ]
+  ldrbeq r12, [ psx_gpu, #psx_gpu_render_mode_offset ]
    tsteq r12, #RENDER_INTERLACE_ENABLED
    beq setup_sprite_untextured_simple
  
@@ -6081,7 +6067,7 @@ function(scale2x_tiles8)
    mov r14, r2
    add r0, #1024*2*2
    add r4, #1024*2
-  sub r0, r2, lsl #4+1
+  sub r0, r0, r2, lsl #4+1
    mov r1, r4
    add r12, r0, #1024*2
    bgt 0b
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h

index 1307891..5460e40 100644 (file)
--- a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h
@@ -48,6 +48,7 @@
  #define psx_gpu_offset_y_offset                           0x102
  #define psx_gpu_clut_settings_offset                      0x104
  #define psx_gpu_texture_settings_offset                   0x106
+#define psx_gpu_reciprocal_table_ptr_offset               0x108
  #define psx_gpu_blocks_offset                             0x200
  #define psx_gpu_span_uvrg_offset_offset                   0x2200
  #define psx_gpu_span_edge_data_offset                     0x4200
diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c

index 5adfb75..b1de121 100644 (file)
--- a/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c
+++ b/plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c
@@ -73,6 +73,7 @@ int main()
         WRITE_OFFSET(f, offset_y);
         WRITE_OFFSET(f, clut_settings);
         WRITE_OFFSET(f, texture_settings);
+       WRITE_OFFSET(f, reciprocal_table_ptr);
         WRITE_OFFSET(f, blocks);
         WRITE_OFFSET(f, span_uvrg_offset);
         WRITE_OFFSET(f, span_edge_data);
author	notaz <notasas@gmail.com>
	Mon, 1 Apr 2013 00:03:52 +0000 (03:03 +0300)
committer	notaz <notasas@gmail.com>
	Mon, 1 Apr 2013 15:33:15 +0000 (18:33 +0300)
plugins/gpu_neon/psx_gpu/psx_gpu.c		patch \| blob \| blame \| history
plugins/gpu_neon/psx_gpu/psx_gpu.h		patch \| blob \| blame \| history
plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S		patch \| blob \| blame \| history
plugins/gpu_neon/psx_gpu/psx_gpu_offsets.h		patch \| blob \| blame \| history
plugins/gpu_neon/psx_gpu/psx_gpu_offsets_update.c		patch \| blob \| blame \| history