psx_gpu: increase reciprocal accuracy
authorExophase <exophase@gmail.com>
Sun, 7 Oct 2012 17:13:22 +0000 (20:13 +0300)
committernotaz <notasas@gmail.com>
Sun, 7 Oct 2012 17:32:09 +0000 (20:32 +0300)
fixes 448 height issue

plugins/gpu_neon/psx_gpu/psx_gpu.c
plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S

index 68996c1..98aacc3 100644 (file)
@@ -854,7 +854,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a,
                                                                                \
   dup_2x32b(edge_shifts, edge_shift);                                          \
   sub_2x32b(heights_b, heights, c_0x01);                                       \
                                                                                \
   dup_2x32b(edge_shifts, edge_shift);                                          \
   sub_2x32b(heights_b, heights, c_0x01);                                       \
-  shr_2x32b(height_reciprocals, edge_shifts, 12);                              \
+  shr_2x32b(height_reciprocals, edge_shifts, 10);                              \
                                                                                \
   mla_2x32b(heights_b, x_starts, heights);                                     \
   bic_immediate_4x16b(vector_cast(vec_4x16u, edge_shifts), 0xE0);              \
                                                                                \
   mla_2x32b(heights_b, x_starts, heights);                                     \
   bic_immediate_4x16b(vector_cast(vec_4x16u, edge_shifts), 0xE0);              \
@@ -883,8 +883,8 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a,
   sub_2x32b(widths, x_ends, x_starts);                                         \
   width_alt = x_c - start_c;                                                   \
                                                                                \
   sub_2x32b(widths, x_ends, x_starts);                                         \
   width_alt = x_c - start_c;                                                   \
                                                                                \
-  shr_2x32b(height_reciprocals, edge_shifts, 12);                              \
-  height_reciprocal_alt = edge_shift_alt >> 12;                                \
+  shr_2x32b(height_reciprocals, edge_shifts, 10);                              \
+  height_reciprocal_alt = edge_shift_alt >> 10;                                \
                                                                                \
   bic_immediate_4x16b(vector_cast(vec_4x16u, edge_shifts), 0xE0);              \
   edge_shift_alt &= 0x1F;                                                      \
                                                                                \
   bic_immediate_4x16b(vector_cast(vec_4x16u, edge_shifts), 0xE0);              \
   edge_shift_alt &= 0x1F;                                                      \
@@ -4526,12 +4526,12 @@ void initialize_reciprocal_table(void)
   {
     shift = __builtin_clz(height);
     height_normalized = height << shift;
   {
     shift = __builtin_clz(height);
     height_normalized = height << shift;
-    height_reciprocal = ((1ULL << 50) + (height_normalized - 1)) /
+    height_reciprocal = ((1ULL << 52) + (height_normalized - 1)) /
      height_normalized;
 
      height_normalized;
 
-    shift = 32 - (50 - shift);
+    shift = 32 - (52 - shift);
 
 
-    reciprocal_table[height] = (height_reciprocal << 12) | shift;
+    reciprocal_table[height] = (height_reciprocal << 10) | shift;
   }
 }
 
   }
 }
 
index 294685a..3331d5d 100644 (file)
@@ -657,7 +657,7 @@ function(compute_all_gradients)
                                                                                \
   vdup.u32 edge_shifts, temp;                                                  \
   vsub.u32 heights_b, heights, c_0x01;                                         \
                                                                                \
   vdup.u32 edge_shifts, temp;                                                  \
   vsub.u32 heights_b, heights, c_0x01;                                         \
-  vshr.u32 height_reciprocals, edge_shifts, #12;                               \
+  vshr.u32 height_reciprocals, edge_shifts, #10;                               \
                                                                                \
   vmla.s32 heights_b, x_starts, heights;                                       \
   vbic.u16 edge_shifts, #0xE0;                                                 \
                                                                                \
   vmla.s32 heights_b, x_starts, heights;                                       \
   vbic.u16 edge_shifts, #0xE0;                                                 \
@@ -682,8 +682,8 @@ function(compute_all_gradients)
   vsub.u32 heights_b, heights, c_0x01;                                         \
   sub height_b_alt, height_minor_b, #1;                                        \
                                                                                \
   vsub.u32 heights_b, heights, c_0x01;                                         \
   sub height_b_alt, height_minor_b, #1;                                        \
                                                                                \
-  vshr.u32 height_reciprocals, edge_shifts, #12;                               \
-  lsr height_reciprocal_alt, edge_shift_alt, #12;                              \
+  vshr.u32 height_reciprocals, edge_shifts, #10;                               \
+  lsr height_reciprocal_alt, edge_shift_alt, #10;                              \
                                                                                \
   vmla.s32 heights_b, x_starts, heights;                                       \
   mla height_b_alt, height_minor_b, start_c, height_b_alt;                     \
                                                                                \
   vmla.s32 heights_b, x_starts, heights;                                       \
   mla height_b_alt, height_minor_b, start_c, height_b_alt;                     \