#define edge_data_right_mask_offset 4
#define edge_data_y_offset 6
+.syntax unified
+.text
#define psx_gpu r0
#define v_a r1
.align 4
-/* FIXME: users of this should be in psx_gpu instead */
-#ifndef __PIC__
-#define load_pointer(register, pointer) \
- movw register, :lower16:pointer; \
- movt register, :upper16:pointer; \
-
-#else
-#define load_pointer(register, pointer) \
- ldr register, =pointer \
-
-#endif
-
#define function(name) \
.global name; \
name: \
vld1.32 { uvrg }, [ temp ]; \
add temp, psx_gpu, #psx_gpu_uvrg_dy_offset; \
vld1.32 { uvrg_dy }, [ temp ]; \
- load_pointer(reciprocal_table_ptr, reciprocal_table); \
+ ldr reciprocal_table_ptr, [ psx_gpu, #psx_gpu_reciprocal_table_ptr_offset ]; \
\
vmov.u32 c_0x01, #0x01 \
#define height_b_alt r12
#define compute_edge_delta_x3(start_c, height_a, height_b) \
- vmov.u32 heights, height_a, height_b; \
+ vmov heights, height_a, height_b; \
ldr temp, [ reciprocal_table_ptr, height_a, lsl #2 ]; \
vmov.u32 edge_shifts[0], temp; \
ldr temp, [ reciprocal_table_ptr, height_b, lsl #2 ]; \
add temp, temp, #(1 << 16); \
add y_a, temp, #2; \
add y_a, y_a, #(2 << 16); \
- vmov.u32 y_x4, temp, y_a; \
+ vmov y_x4, temp, y_a; \
\
setup_spans_adjust_edges_alternate_##alternate_active(left_index, \
right_index); \
sub temp, temp, #(1 << 16); \
sub y_a, temp, #2; \
sub y_a, y_a, #(2 << 16); \
- vmov.u32 y_x4, temp, y_a; \
+ vmov y_x4, temp, y_a; \
\
vaddw.s32 edges_xy, edges_xy, edges_dx_dy; \
\
sub height, y_a, y_c; \
\
vdup.u32 x_starts, x_a; \
- vmov.u32 x_ends, x_c, x_b; \
+ vmov x_ends, x_c, x_b; \
\
compute_edge_delta_x3(x_b, height_major, height_minor_a); \
setup_spans_up(major, minor, minor, yes); \
function(setup_spans_up_right)
setup_spans_up_up(right, left)
-.pool
-
#define setup_spans_down_down(minor, major) \
setup_spans_prologue(); \
sub height_minor_a, y_b, y_a; \
sub height, y_c, y_a; \
\
vdup.u32 x_starts, x_a; \
- vmov.u32 x_ends, x_c, x_b; \
+ vmov x_ends, x_c, x_b; \
\
compute_edge_delta_x3(x_b, height_major, height_minor_a); \
setup_spans_down(major, minor, minor, yes); \
function(setup_spans_up_a)
setup_spans_prologue()
- vmov.u32 x_starts, x_a, x_b
+ vmov x_starts, x_a, x_b
vdup.u32 x_ends, x_c
setup_spans_up_flat()
setup_spans_prologue()
vdup.u32 x_starts, x_a
- vmov.u32 x_ends, x_b, x_c
+ vmov x_ends, x_b, x_c
setup_spans_up_flat()
function(setup_spans_down_a)
setup_spans_prologue()
- vmov.u32 x_starts, x_a, x_b
+ vmov x_starts, x_a, x_b
vdup.u32 x_ends, x_c
setup_spans_down_flat()
setup_spans_prologue()
vdup.u32 x_starts, x_a
- vmov.u32 x_ends, x_b, x_c
+ vmov x_ends, x_b, x_c
setup_spans_down_flat()
sub height_minor_b, y_c, y_a
sub height_major, y_c, y_b
- vmov.u32 x_starts, x_a, x_c
+ vmov x_starts, x_a, x_c
vdup.u32 x_ends, x_b
compute_edge_delta_x3(x_a, height_minor_a, height_major)
mov temp, #0
- vmov.u32 height_increment, temp, height_minor_b
+ vmov height_increment, temp, height_minor_b
vmlal.s32 edges_xy, edges_dx_dy, height_increment
vmov edges_xy_b_left, edge_alt_low, edge_alt_high
sub temp, temp, #(1 << 16)
sub y_a, temp, #2
sub y_a, y_a, #(2 << 16)
- vmov.u32 y_x4, temp, y_a
+ vmov y_x4, temp, y_a
vaddw.s32 edges_xy, edges_xy, edges_dx_dy
add temp, temp, #(1 << 16)
add y_a, temp, #2
add y_a, y_a, #(2 << 16)
- vmov.u32 y_x4, temp, y_a
+ vmov y_x4, temp, y_a
setup_spans_adjust_edges_alternate_no(left, right)
bne 2b
bal 1b
-.pool
-
#undef span_uvrg_offset
#undef span_edge_data
#undef span_b_offset
vdup.u16 colors, color
add span_edge_data, psx_gpu, #psx_gpu_span_edge_data_offset
- orr color, color, lsl #16
+ orr color, color, color, lsl #16
0:
moveq right_mask, right_mask, lsr #2
tst right_mask, #0x1
- streqh color, [ fb_ptr ]
+ strheq color, [ fb_ptr ]
1:
add span_edge_data, span_edge_data, #8
orr pixels_a, pixels_a, pixel_3, lsl #24
orr pixels_b, pixels_b, pixel_7, lsl #24
- vmov.u32 texels, pixels_a, pixels_b
+ vmov texels, pixels_a, pixels_b
vtbl.8 texels_low, { clut_low_a, clut_low_b }, texels
vtbl.8 texels_high, { clut_high_a, clut_high_b }, texels
mov fb_ptr_advance_column, #32; \
vdup.u8 draw_mask_fb_ptr_left, block_masks[0]; \
\
- sub fb_ptr_advance_column, height, lsl #11; \
+ sub fb_ptr_advance_column, fb_ptr_advance_column, height, lsl #11; \
vdup.u8 draw_mask_fb_ptr_right, block_masks[1] \
#define setup_sprite_setup_right_draw_mask_fb_ptr() \
mov fb_ptr_advance_column, #32 * 2; \
vdup.u8 draw_mask_fb_ptr_left_a, block_masks[0]; \
vdup.u8 draw_mask_fb_ptr_left_b, block_masks[1]; \
- sub fb_ptr_advance_column, height, lsl #11 + 1; \
+ sub fb_ptr_advance_column, fb_ptr_advance_column, height, lsl #11 + 1; \
vdup.u8 draw_mask_fb_ptr_right_a, block_masks[2]; \
vdup.u8 draw_mask_fb_ptr_right_b, block_masks[3] \
add texture_offset_base, u, u
add width_rounded, width, #7
- add texture_offset_base, v, lsl #11
+ add texture_offset_base, texture_offset_base, v, lsl #11
mov left_mask_bits, #0xFF
ldrb texture_mask_width, [ psx_gpu, #psx_gpu_texture_mask_width_offset ]
and right_width, width_rounded, #0x7
mvn left_mask_bits, left_mask_bits, lsl left_offset
- add texture_mask, texture_mask_height, lsl #11
+ add texture_mask, texture_mask, texture_mask_height, lsl #11
mov block_width, width_rounded, lsr #3
mov right_mask_bits, right_mask_bits, lsl right_width
add texture_offset_base, u, u
add width_rounded, width, #7
- add texture_offset_base, v, lsl #11
+ add texture_offset_base, texture_offset_base, v, lsl #11
movw left_mask_bits, #0xFFFF
ldrb texture_mask_width, [ psx_gpu, #psx_gpu_texture_mask_width_offset ]
lsl right_width, #1
- add texture_mask, texture_mask_height, lsl #11
+ add texture_mask, texture_mask, texture_mask_height, lsl #11
mov block_width, width_rounded, lsr #3
mov right_mask_bits, right_mask_bits, lsl right_width
ldrh r12, [ psx_gpu, #psx_gpu_render_state_offset ]
tst r12, #(RENDER_STATE_MASK_EVALUATE | RENDER_FLAGS_MODULATE_TEXELS \
| RENDER_FLAGS_BLEND)
- ldreqb r12, [ psx_gpu, #psx_gpu_render_mode_offset ]
+ ldrbeq r12, [ psx_gpu, #psx_gpu_render_mode_offset ]
tsteq r12, #RENDER_INTERLACE_ENABLED
beq setup_sprite_untextured_simple
mov r14, r2
add r0, #1024*2*2
add r4, #1024*2
- sub r0, r2, lsl #4+1
+ sub r0, r0, r2, lsl #4+1
mov r1, r4
add r12, r0, #1024*2
bgt 0b