\
dup_2x32b(edge_shifts, edge_shift); \
sub_2x32b(heights_b, heights, c_0x01); \
- shr_2x32b(height_reciprocals, edge_shifts, 12); \
+ shr_2x32b(height_reciprocals, edge_shifts, 10); \
\
mla_2x32b(heights_b, x_starts, heights); \
bic_immediate_4x16b(vector_cast(vec_4x16u, edge_shifts), 0xE0); \
sub_2x32b(widths, x_ends, x_starts); \
width_alt = x_c - start_c; \
\
- shr_2x32b(height_reciprocals, edge_shifts, 12); \
- height_reciprocal_alt = edge_shift_alt >> 12; \
+ shr_2x32b(height_reciprocals, edge_shifts, 10); \
+ height_reciprocal_alt = edge_shift_alt >> 10; \
\
bic_immediate_4x16b(vector_cast(vec_4x16u, edge_shifts), 0xE0); \
edge_shift_alt &= 0x1F; \
{
shift = __builtin_clz(height);
height_normalized = height << shift;
- height_reciprocal = ((1ULL << 50) + (height_normalized - 1)) /
+ height_reciprocal = ((1ULL << 52) + (height_normalized - 1)) /
height_normalized;
- shift = 32 - (50 - shift);
+ shift = 32 - (52 - shift);
- reciprocal_table[height] = (height_reciprocal << 12) | shift;
+ reciprocal_table[height] = (height_reciprocal << 10) | shift;
}
}
\
vdup.u32 edge_shifts, temp; \
vsub.u32 heights_b, heights, c_0x01; \
- vshr.u32 height_reciprocals, edge_shifts, #12; \
+ vshr.u32 height_reciprocals, edge_shifts, #10; \
\
vmla.s32 heights_b, x_starts, heights; \
vbic.u16 edge_shifts, #0xE0; \
vsub.u32 heights_b, heights, c_0x01; \
sub height_b_alt, height_minor_b, #1; \
\
- vshr.u32 height_reciprocals, edge_shifts, #12; \
- lsr height_reciprocal_alt, edge_shift_alt, #12; \
+ vshr.u32 height_reciprocals, edge_shifts, #10; \
+ lsr height_reciprocal_alt, edge_shift_alt, #10; \
\
vmla.s32 heights_b, x_starts, heights; \
mla height_b_alt, height_minor_b, start_c, height_b_alt; \