* General Public License for more details.
*/
-#define MAX_SPANS 512
-#define MAX_BLOCKS 64
-#define MAX_BLOCKS_PER_ROW 128
-
-#define RENDER_STATE_MASK_EVALUATE 0x20
-#define RENDER_FLAGS_MODULATE_TEXELS 0x1
-#define RENDER_FLAGS_BLEND 0x2
#define RENDER_INTERLACE_ENABLED 0x1
+#include "psx_gpu.h"
#include "psx_gpu_offsets.h"
#define psx_gpu_b_dx_offset (psx_gpu_b_block_span_offset + 4)
#ifdef __MACH__
#define flush_render_block_buffer _flush_render_block_buffer
-#define setup_sprite_untextured_simple _setup_sprite_untextured_simple
#define update_texture_8bpp_cache _update_texture_8bpp_cache
#endif
sub r14, r14, #(62 - 12) @ r14 = shift - (62 - FIXED_BITS)
vshll.u16 uvrg_base, uvrg0, #16 @ uvrg_base = uvrg0 << 16
- vdup.u32 r_shift, r14 @ r_shift = { shift, shift, shift, shift }
-
+ vdup.u32 r_shift, r14 @ r_shift = { shift, shift*, shift, shift* }
+ @ * - vshl.u64: ignored by hw
vadd.u32 uvrg_base, uvrgb_phase
vabs.s32 ga_uvrg_x, ga_uvrg_x @ ga_uvrg_x = abs(ga_uvrg_x)
#define left_x_32_low d22
#define left_x_32_high d23
+#define tmp_max_blocks d20
+
#define edges_xy q0
#define edges_dx_dy d2
#define edge_shifts d3
str b, [span_b_offset], #4; \
setup_spans_adjust_interpolants_##direction(); \
\
+ vmov.u16 tmp_max_blocks, #MAX_BLOCKS_PER_ROW; \
vshr.u16 left_right_x_16_high, left_right_x_16_high, #3; \
vshl.u16 span_shifts, c_0xFFFE, span_shifts; \
+ vmin.u16 left_right_x_16_high, left_right_x_16_high, tmp_max_blocks; \
\
vst4.u16 { left_right_x_16, span_shifts_y }, [span_edge_data]!; \
\
str b, [span_b_offset], #4; \
setup_spans_adjust_interpolants_##direction(); \
\
- vshl.u16 span_shifts, c_0xFFFE, span_shifts; \
+ vmov.u16 tmp_max_blocks, #MAX_BLOCKS_PER_ROW; \
vshr.u16 left_right_x_16_high, left_right_x_16_high, #3; \
+ vshl.u16 span_shifts, c_0xFFFE, span_shifts; \
+ vmin.u16 left_right_x_16_high, left_right_x_16_high, tmp_max_blocks; \
\
vst4.u16 { left_right_x_16, span_shifts_y }, [span_edge_data]!; \
\
ble 1f; \
\
orr temp, y_a, y_a, lsl #16; \
+ cmp height, #512; \
add temp, temp, #(1 << 16); \
+ movgt height, #512; \
add y_a, temp, #2; \
add y_a, y_a, #(2 << 16); \
vmov y_x4, temp, y_a; \
ble 1f; \
\
orr temp, y_a, y_a, lsl #16; \
+ cmp height, #512; \
sub temp, temp, #(1 << 16); \
+ movgt height, #512; \
sub y_a, temp, #2; \
sub y_a, y_a, #(2 << 16); \
vmov y_x4, temp, y_a; \
vpush { texture_mask }; \
vpush { uvrg_dx4 }; \
\
- stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \
+ stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12 }; /* r14=num_blocks */ \
bl flush_render_block_buffer; \
- ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \
+ ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12 }; \
\
vpop { uvrg_dx4 }; \
vpop { texture_mask }; \
vpush { texture_mask }; \
vpush { uvrg_dx4 }; \
\
- stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \
+ stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12 }; /* r14=num_blocks */ \
bl flush_render_block_buffer; \
- ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \
+ ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12 }; \
\
vpop { uvrg_dx4 }; \
vpop { texture_mask }; \
.align 3; \
\
function(shade_blocks_##shading##_textured_modulated_##dithering##_##target) \
+ save_abi_regs(); \
shade_blocks_textured_modulated_prologue_##shading(dithering, target); \
stmdb sp!, { r4 - r5, lr }; \
- save_abi_regs(); \
ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \
\
vld1.u32 { test_mask }, [psx_gpu, :128]; \
shade_blocks_textured_modulated_store_draw_mask_##target(28); \
shade_blocks_textured_modulated_store_pixels_##target(); \
\
+ ldmia sp!, { r4 - r5, lr }; \
restore_abi_regs(); \
- ldmia sp!, { r4 - r5, pc } \
+ bx lr \
shade_blocks_textured_modulated_builder(shaded, dithered, direct);
.align 3
-function(setup_sprite_untextured)
- ldrh r12, [psx_gpu, #psx_gpu_render_state_offset]
- tst r12, #(RENDER_STATE_MASK_EVALUATE | RENDER_FLAGS_MODULATE_TEXELS \
- | RENDER_FLAGS_BLEND)
- ldrbeq r12, [psx_gpu, #psx_gpu_render_mode_offset]
- tsteq r12, #RENDER_INTERLACE_ENABLED
- beq setup_sprite_untextured_simple
-
+function(setup_sprite_untextured_512)
stmdb sp!, { r4 - r11, r14 }
ldr width, [sp, #40]
#define texel_block_expanded_b q2
#define texel_block_expanded_ab q2
#define texel_block_expanded_c q3
-#define texel_block_expanded_d q4
+#define texel_block_expanded_d q0
#define texel_block_expanded_cd q3
function(update_texture_4bpp_cache)