psx_gpu: use different uvrgb phase for enhancement
[pcsx_rearmed.git] / plugins / gpu_neon / psx_gpu / psx_gpu_arm_neon.S
index 294685a..3239412 100644 (file)
 #define MAX_BLOCKS                                        64
 #define MAX_BLOCKS_PER_ROW                                128
 
-#define psx_gpu_test_mask_offset                          0
-#define psx_gpu_uvrg_offset                               16
-#define psx_gpu_uvrg_dx_offset                            32
-#define psx_gpu_uvrg_dy_offset                            48
-#define psx_gpu_u_block_span_offset                       64
-#define psx_gpu_v_block_span_offset                       80
-#define psx_gpu_r_block_span_offset                       96
-#define psx_gpu_g_block_span_offset                       112
-#define psx_gpu_b_block_span_offset                       128
-
-#define psx_gpu_b_dx_offset                               132
-
-#define psx_gpu_b_offset                                  144
-#define psx_gpu_b_dy_offset                               148
-#define psx_gpu_triangle_area_offset                      152
-#define psx_gpu_texture_window_settings_offset            156
-#define psx_gpu_current_texture_mask_offset               160
-#define psx_gpu_viewport_mask_offset                      164
-#define psx_gpu_dirty_textures_4bpp_mask_offset           168
-#define psx_gpu_dirty_textures_8bpp_mask_offset           172
-#define psx_gpu_dirty_textures_8bpp_alternate_mask_offset 176
-#define psx_gpu_triangle_color_offset                     180
-#define psx_gpu_dither_table_offset                       184
-#define psx_gpu_render_block_handler_offset               200
-#define psx_gpu_texture_page_ptr_offset                   204
-#define psx_gpu_texture_page_base_offset                  208
-#define psx_gpu_clut_ptr_offset                           212
-#define psx_gpu_vram_ptr_offset                           216
-
-#define psx_gpu_render_state_base_offset                  220
-#define psx_gpu_render_state_offset                       222
-#define psx_gpu_num_spans_offset                          224
-#define psx_gpu_num_blocks_offset                         226
-#define psx_gpu_offset_x_offset                           228
-#define psx_gpu_offset_y_offset                           230
-#define psx_gpu_clut_settings_offset                      232
-#define psx_gpu_texture_settings_offset                   234
-#define psx_gpu_viewport_start_x_offset                   236
-#define psx_gpu_viewport_start_y_offset                   238
-#define psx_gpu_viewport_end_x_offset                     240
-#define psx_gpu_viewport_end_y_offset                     242
-#define psx_gpu_mask_msb_offset                           244
-                                                          
-#define psx_gpu_triangle_winding_offset                   246
-#define psx_gpu_display_area_draw_enable_offset           247
-#define psx_gpu_current_texture_page_offset               248
-#define psx_gpu_last_8bpp_texture_page_offset             249
-#define psx_gpu_texture_mask_width_offset                 250
-#define psx_gpu_texture_mask_height_offset                251
-#define psx_gpu_texture_window_x_offset                   252
-#define psx_gpu_texture_window_y_offset                   253
-#define psx_gpu_primitive_type_offset                     254
-
-#define psx_gpu_reserved_a_offset                         255
-
-#define psx_gpu_blocks_offset                             0x0100
-#define psx_gpu_span_uvrg_offset_offset                   0x2100
-#define psx_gpu_span_edge_data_offset                     0x4100
-#define psx_gpu_span_b_offset_offset                      0x5100
+#include "psx_gpu_offsets.h"
+
+#define psx_gpu_b_dx_offset (psx_gpu_b_block_span_offset + 4)
 
 #define edge_data_left_x_offset                           0
 #define edge_data_num_blocks_offset                       2
 #define uvrg_dx3l                                         d6
 #define uvrg_dx3h                                         d7
 
+#define uvrgb_phase                                       q13
 
 .align 4
 
@@ -369,11 +314,16 @@ function(compute_all_gradients)
   vmull.s16 ga_uvrg_y, d0_b, d1_b
   rsbmi ga_bx, ga_bx, #0
 
+  @ r12 = psx_gpu->uvrgb_phase
+  ldr r12, [ psx_gpu, #psx_gpu_uvrgb_phase_offset ]
+
   vmlsl.s16 ga_uvrg_y, d2_b, d3_b
   movs gs_by, ga_by, asr #31
 
   vshr.u64 d0, d30, #22
-  mov b_base, b0, lsl #16
+  add b_base, r12, b0, lsl #16
+
+  vdup.u32 uvrgb_phase, r12
 
   rsbmi ga_by, ga_by, #0
   vclt.s32 gs_uvrg_x, ga_uvrg_x, #0  @ gs_uvrg_x = ga_uvrg_x < 0
@@ -382,7 +332,6 @@ function(compute_all_gradients)
   ldrb r12, [ psx_gpu, #psx_gpu_triangle_winding_offset ]
   vclt.s32 gs_uvrg_y, ga_uvrg_y, #0  @ gs_uvrg_y = ga_uvrg_y < 0
 
-  add b_base, b_base, #0x8000
   rsb r12, r12, #0                   @ r12 = -(triangle->winding)
 
   vdup.u32 w_mask, r12               @ w_mask = { -w, -w, -w, -w }
@@ -391,7 +340,7 @@ function(compute_all_gradients)
   vshll.u16 uvrg_base, uvrg0, #16    @ uvrg_base = uvrg0 << 16
   vdup.u32 r_shift, r14              @ r_shift = { shift, shift, shift, shift }
 
-  vorr.u32 uvrg_base, #0x8000
+  vadd.u32 uvrg_base, uvrgb_phase
   vabs.s32 ga_uvrg_x, ga_uvrg_x      @ ga_uvrg_x = abs(ga_uvrg_x)
 
   vmov area_r_s, s0                  @ area_r_s = triangle_reciprocal
@@ -657,7 +606,7 @@ function(compute_all_gradients)
                                                                                \
   vdup.u32 edge_shifts, temp;                                                  \
   vsub.u32 heights_b, heights, c_0x01;                                         \
-  vshr.u32 height_reciprocals, edge_shifts, #12;                               \
+  vshr.u32 height_reciprocals, edge_shifts, #10;                               \
                                                                                \
   vmla.s32 heights_b, x_starts, heights;                                       \
   vbic.u16 edge_shifts, #0xE0;                                                 \
@@ -682,8 +631,8 @@ function(compute_all_gradients)
   vsub.u32 heights_b, heights, c_0x01;                                         \
   sub height_b_alt, height_minor_b, #1;                                        \
                                                                                \
-  vshr.u32 height_reciprocals, edge_shifts, #12;                               \
-  lsr height_reciprocal_alt, edge_shift_alt, #12;                              \
+  vshr.u32 height_reciprocals, edge_shifts, #10;                               \
+  lsr height_reciprocal_alt, edge_shift_alt, #10;                              \
                                                                                \
   vmla.s32 heights_b, x_starts, heights;                                       \
   mla height_b_alt, height_minor_b, start_c, height_b_alt;                     \
@@ -1393,7 +1342,7 @@ function(setup_blocks_shaded_textured_dithered_##swizzling##_indirect)         \
   add dither_offset_ptr, psx_gpu, #psx_gpu_dither_table_offset;                \
                                                                                \
   ldrh y, [ span_edge_data, #edge_data_y_offset ];                             \
-  ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ];                           \
+  ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ];                       \
                                                                                \
   cmp span_num_blocks, #0;                                                     \
   beq 1f;                                                                      \
@@ -1660,7 +1609,7 @@ function(setup_blocks_unshaded_textured_dithered_##swizzling##_indirect)       \
   add dither_offset_ptr, psx_gpu, #psx_gpu_dither_table_offset;                \
                                                                                \
   ldrh y, [ span_edge_data, #edge_data_y_offset ];                             \
-  ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ];                           \
+  ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ];                       \
                                                                                \
   cmp span_num_blocks, #0;                                                     \
   beq 1f;                                                                      \
@@ -1855,7 +1804,7 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_indirect)
   ldrh span_num_blocks, [ span_edge_data, #edge_data_num_blocks_offset ]
   ldrh y, [ span_edge_data, #edge_data_y_offset ]
 
-  ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ]
+  ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]
 
   cmp span_num_blocks, #0
   beq 1f
@@ -1975,7 +1924,7 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_direct)
   ldrh span_num_blocks, [ span_edge_data, #edge_data_num_blocks_offset ]
   ldrh y, [ span_edge_data, #edge_data_y_offset ]
 
-  ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ]
+  ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]
 
   cmp span_num_blocks, #0
   beq 1f
@@ -2162,7 +2111,7 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_indirect)     \
   add dither_offset_ptr, psx_gpu, #psx_gpu_dither_table_offset;                \
                                                                                \
   ldrh y, [ span_edge_data, #edge_data_y_offset ];                             \
-  ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ];                           \
+  ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ];                       \
                                                                                \
   cmp span_num_blocks, #0;                                                     \
   beq 1f;                                                                      \
@@ -2402,7 +2351,7 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_direct)       \
   add dither_offset_ptr, psx_gpu, #psx_gpu_dither_table_offset;                \
                                                                                \
   ldrh y, [ span_edge_data, #edge_data_y_offset ];                             \
-  ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ];                           \
+  ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ];                       \
                                                                                \
   cmp span_num_blocks, #0;                                                     \
   beq 1f;                                                                      \
@@ -4860,7 +4809,7 @@ function(setup_sprite_##texture_mode)                                          \
   and offset_u, u, #0xF;                                                       \
                                                                                \
   ldr width, [ sp, #40 ];                                                      \
-  ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ];                           \
+  ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ];                       \
                                                                                \
   ldr height, [ sp, #44 ];                                                     \
   add fb_ptr, fb_ptr, y, lsl #11;                                              \
@@ -5108,7 +5057,7 @@ setup_sprites_16bpp_flush_row:
 
 function(setup_sprite_16bpp)
   stmdb sp!, { r4 - r11, r14 }
-  ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_ptr_offset ]
+  ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ]
 
   ldr v, [ sp, #36 ]
   add fb_ptr, fb_ptr, y, lsl #11