* General Public License for more details.
*/
-#define MAX_SPANS 512
-#define MAX_BLOCKS 64
-#define MAX_BLOCKS_PER_ROW 128
-
-#define RENDER_STATE_MASK_EVALUATE 0x20
-#define RENDER_FLAGS_MODULATE_TEXELS 0x1
-#define RENDER_FLAGS_BLEND 0x2
#define RENDER_INTERLACE_ENABLED 0x1
+#include "psx_gpu.h"
#include "psx_gpu_offsets.h"
#define psx_gpu_b_dx_offset (psx_gpu_b_block_span_offset + 4)
#ifdef __MACH__
#define flush_render_block_buffer _flush_render_block_buffer
-#define setup_sprite_untextured_simple _setup_sprite_untextured_simple
#define update_texture_8bpp_cache _update_texture_8bpp_cache
+#define setup_blocks_uv_adj_hack _setup_blocks_uv_adj_hack
#endif
@ r0: psx_gpu
#define uvrg q14
#define uvrg_dy q15
+#define uv d28
#define alternate_x_16 d4
#define left_x_32_low d22
#define left_x_32_high d23
+#define tmp_max_blocks d20
+
#define edges_xy q0
#define edges_dx_dy d2
#define edge_shifts d3
str b, [span_b_offset], #4; \
setup_spans_adjust_interpolants_##direction(); \
\
+ vmov.u16 tmp_max_blocks, #MAX_BLOCKS_PER_ROW; \
vshr.u16 left_right_x_16_high, left_right_x_16_high, #3; \
vshl.u16 span_shifts, c_0xFFFE, span_shifts; \
+ vmin.u16 left_right_x_16_high, left_right_x_16_high, tmp_max_blocks; \
\
vst4.u16 { left_right_x_16, span_shifts_y }, [span_edge_data]!; \
\
str b, [span_b_offset], #4; \
setup_spans_adjust_interpolants_##direction(); \
\
- vshl.u16 span_shifts, c_0xFFFE, span_shifts; \
+ vmov.u16 tmp_max_blocks, #MAX_BLOCKS_PER_ROW; \
vshr.u16 left_right_x_16_high, left_right_x_16_high, #3; \
+ vshl.u16 span_shifts, c_0xFFFE, span_shifts; \
+ vmin.u16 left_right_x_16_high, left_right_x_16_high, tmp_max_blocks; \
\
vst4.u16 { left_right_x_16, span_shifts_y }, [span_edge_data]!; \
\
ble 1f; \
\
orr temp, y_a, y_a, lsl #16; \
+ cmp height, #512; \
add temp, temp, #(1 << 16); \
+ movgt height, #512; \
add y_a, temp, #2; \
add y_a, y_a, #(2 << 16); \
vmov y_x4, temp, y_a; \
subs height, height, #4; \
bhi 2b; \
\
+ nop; \
+ ldr temp, [psx_gpu, #psx_gpu_hacks_active_offset]; \
+ tst temp, #(AHACK_TEXTURE_ADJ_U | AHACK_TEXTURE_ADJ_V); \
+ beq 1f; \
+ add temp, span_uvrg_offset, height, lsl #4; \
+ vldr uv, [temp, #(-16*2)]; \
+ vstr uv, [temp, #(-16)]; \
+ \
1: \
ble 1f; \
\
orr temp, y_a, y_a, lsl #16; \
+ cmp height, #512; \
sub temp, temp, #(1 << 16); \
+ movgt height, #512; \
sub y_a, temp, #2; \
sub y_a, y_a, #(2 << 16); \
vmov y_x4, temp, y_a; \
subs height, height, #4; \
bhi 2b; \
\
+ nop; \
+ ldr temp, [psx_gpu, #psx_gpu_hacks_active_offset]; \
+ tst temp, #AHACK_TEXTURE_ADJ_V; \
+ beq 1f; \
+ add temp, psx_gpu, #psx_gpu_span_uvrg_offset_offset; \
+ vldr uv, [temp, #16]; \
+ vstr uv, [temp, #0]; \
+ \
1: \
subs height_minor_b, height_minor_b, #4
bhi 2b
+ nop
+ ldr temp, [psx_gpu, #psx_gpu_hacks_active_offset]
+ tst temp, #(AHACK_TEXTURE_ADJ_U | AHACK_TEXTURE_ADJ_V)
+ beq 1f
+ add temp, span_uvrg_offset, height, lsl #4
+ vldr uv, [temp, #(-16*2)]
+ vstr uv, [temp, #(-16)]
+
1:
setup_spans_epilogue()
#define uvrg_dx_ptr r2
#define texture_mask_ptr r3
+#define hacks_active r6
#define dither_shift r8
#define dither_row r10
#define color_b r5
#undef uvrg
+#undef uv
#define u_block q0
#define v_block q1
#define setup_blocks_texture_unswizzled() \
+#define setup_blocks_uv_adj_hack_textured(hacks_active) \
+ tst hacks_active, #(AHACK_TEXTURE_ADJ_U | AHACK_TEXTURE_ADJ_V); \
+ beq 91f; \
+ \
+ /* pushing odd num of regs here realigns our unaligned stack */ \
+ vstr texture_mask_u, [r0, #psx_gpu_saved_tmp_offset]; \
+ vstr texture_mask_v, [r0, #psx_gpu_saved_tmp_offset + 8]; \
+ push { r0 - r4, EXTRA_UNSAVED_REGS r12, r14 }; \
+ mov r12, span_uvrg_offset; \
+ sub r1, block_ptr_a, #64; \
+ mov r2, span_edge_data; \
+ mov r3, r12; \
+ bl setup_blocks_uv_adj_hack; /* psx_gpu=r0 */ \
+ pop { r0 - r4, EXTRA_UNSAVED_REGS r12, r14 }; \
+ vldr texture_mask_u, [r0, #psx_gpu_saved_tmp_offset]; \
+ vldr texture_mask_v, [r0, #psx_gpu_saved_tmp_offset + 8]; \
+ \
+ vadd.u32 uvrg_dx8, uvrg_dx4, uvrg_dx4; \
+91: \
+
#define setup_blocks_shaded_textured_builder(swizzling) \
.align 3; \
vld1.u32 { test_mask }, [psx_gpu, :128]; \
vdup.u8 draw_mask, right_mask; \
\
+ ldr hacks_active, [psx_gpu, #psx_gpu_hacks_active_offset]; \
vmov.u32 fb_mask_ptrs[0], right_mask; \
vtst.u16 draw_mask, draw_mask, test_mask; \
vzip.u8 u_whole_8, v_whole_8; \
vst1.u32 { dither_offsets }, [block_ptr_b, :128], c_32; \
vst1.u32 { b_whole_8, fb_mask_ptrs }, [block_ptr_a, :128], c_32; \
\
+ setup_blocks_uv_adj_hack_textured(hacks_active); \
+ \
1: \
add span_uvrg_offset, span_uvrg_offset, #16; \
add span_b_offset, span_b_offset, #4; \
ldmia sp!, { r4 - r11, pc }; \
\
2: \
- /* TODO: Load from psx_gpu instead of saving/restoring these */\
- vpush { texture_mask }; \
- vpush { uvrg_dx4 }; \
- \
- stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12 }; /* r14=num_blocks */ \
+ vstr texture_mask_u, [r0, #psx_gpu_saved_tmp_offset]; \
+ vstr texture_mask_v, [r0, #psx_gpu_saved_tmp_offset + 8]; \
+ /* pushing odd num of regs here realigns our unaligned stack */ \
+ push { r0 - r3, EXTRA_UNSAVED_REGS r12 }; /* r14=num_blocks */ \
bl flush_render_block_buffer; \
- ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12 }; \
- \
- vpop { uvrg_dx4 }; \
- vpop { texture_mask }; \
+ pop { r0 - r3, EXTRA_UNSAVED_REGS r12 }; \
+ vldr texture_mask_u, [r0, #psx_gpu_saved_tmp_offset]; \
+ vldr texture_mask_v, [r0, #psx_gpu_saved_tmp_offset + 8]; \
\
vadd.u32 uvrg_dx8, uvrg_dx4, uvrg_dx4; \
vmov.u8 fb_mask_ptrs, #0; \
vld1.u32 { test_mask }, [psx_gpu, :128]; \
vdup.u8 draw_mask, right_mask; \
\
+ ldr hacks_active, [psx_gpu, #psx_gpu_hacks_active_offset]; \
vmov.u32 fb_mask_ptrs[0], right_mask; \
vtst.u16 draw_mask, draw_mask, test_mask; \
vzip.u8 u_whole_8, v_whole_8; \
vst1.u32 { dither_offsets }, [block_ptr_b, :128], c_32; \
vst1.u32 { b_whole_8, fb_mask_ptrs }, [block_ptr_a, :128], c_32; \
\
+ setup_blocks_uv_adj_hack_textured(hacks_active); \
+ \
1: \
add span_uvrg_offset, span_uvrg_offset, #16; \
add span_edge_data, span_edge_data, #8; \
ldmia sp!, { r4 - r11, pc }; \
\
2: \
- /* TODO: Load from psx_gpu instead of saving/restoring these */\
- vpush { texture_mask }; \
- vpush { uvrg_dx4 }; \
- \
- stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12 }; /* r14=num_blocks */ \
+ vstr texture_mask_u, [r0, #psx_gpu_saved_tmp_offset]; \
+ vstr texture_mask_v, [r0, #psx_gpu_saved_tmp_offset + 8]; \
+ push { r0 - r3, EXTRA_UNSAVED_REGS r12 }; /* r14=num_blocks */ \
bl flush_render_block_buffer; \
- ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12 }; \
- \
- vpop { uvrg_dx4 }; \
- vpop { texture_mask }; \
+ pop { r0 - r3, EXTRA_UNSAVED_REGS r12 }; \
+ vldr texture_mask_u, [r0, #psx_gpu_saved_tmp_offset]; \
+ vldr texture_mask_v, [r0, #psx_gpu_saved_tmp_offset + 8]; \
\
vadd.u32 uvrg_dx8, uvrg_dx4, uvrg_dx4; \
vmov.u8 fb_mask_ptrs, #0; \
ldmia sp!, { r4 - r11, pc }
2:
- vpush { colors }
-
- stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }
+ vstr d4, [r0, #psx_gpu_saved_tmp_offset] /* colors */
+ vstr d5, [r0, #psx_gpu_saved_tmp_offset + 8]
+ push { r0 - r3, EXTRA_UNSAVED_REGS r12 }
bl flush_render_block_buffer
- ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }
-
- vpop { colors }
+ pop { r0 - r3, EXTRA_UNSAVED_REGS r12 }
+ vldr d4, [r0, #psx_gpu_saved_tmp_offset]
+ vldr d5, [r0, #psx_gpu_saved_tmp_offset + 8]
vld1.u32 { test_mask }, [psx_gpu, :128]
veor.u32 draw_mask, draw_mask, draw_mask
bne 0b; \
\
restore_abi_regs(); \
- ldmia sp!, { r4 - r11, pc }; \
+ pop { r4 - r11, pc }; \
\
2: \
- /* TODO: Load from psx_gpu instead of saving/restoring these */\
- vpush { rg_dx4 }; \
- \
- stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \
+ vstr rg_dx4, [r0, #psx_gpu_saved_tmp_offset]; \
+ push { r0 - r3, EXTRA_UNSAVED_REGS r12 }; \
bl flush_render_block_buffer; \
- ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \
- \
- vpop { rg_dx4 }; \
+ pop { r0 - r3, EXTRA_UNSAVED_REGS r12 }; \
+ vldr rg_dx4, [r0, #psx_gpu_saved_tmp_offset]; \
\
vmov.u8 d64_1, #1; \
vmov.u8 d128_4, #4; \
.align 3
function(texture_blocks_8bpp)
- stmdb sp!, { r3 - r11, r14 }
+ push { r4 - r11, lr }
add block_ptr, psx_gpu, #psx_gpu_blocks_offset
ldr texture_ptr, [psx_gpu, #psx_gpu_texture_page_ptr_offset]
add block_ptr, block_ptr, #64
bne 0b
- ldmia sp!, { r3 - r11, pc }
+ pop { r4 - r11, pc }
1:
- stmdb sp!, { r1 - r2, EXTRA_UNSAVED_REGS r12 }
-
- bl update_texture_8bpp_cache
-
- ldmia sp!, { r1 - r2, EXTRA_UNSAVED_REGS r12 }
- bal 0b
+ /* pushing odd num of regs here realigns our unaligned stack */
+ push { r1 - r2, EXTRA_UNSAVED_REGS r12 }
+ bl update_texture_8bpp_cache
+ pop { r1 - r2, EXTRA_UNSAVED_REGS r12 }
+ bal 0b
#undef uv_0
#undef vram_ptr
#undef color
-#undef width
-#undef height
-#undef pitch
-
-#define vram_ptr r0
-#define color r1
-#define width r2
-#define height r3
-
-#define pitch r1
-
-#define num_width r12
-
-#undef colors_a
-#undef colors_b
-
-#define colors_a q0
-#define colors_b q1
-
-.align 3
-
-function(render_block_fill_body)
- vdup.u16 colors_a, color
- mov pitch, #2048
-
- vmov colors_b, colors_a
- sub pitch, pitch, width, lsl #1
-
- mov num_width, width
-
- 0:
- vst1.u32 { colors_a, colors_b }, [vram_ptr, :256]!
-
- subs num_width, num_width, #16
- bne 0b
-
- add vram_ptr, vram_ptr, pitch
- mov num_width, width
-
- subs height, height, #1
- bne 0b
-
- bx lr
-
-
#undef x
#undef y
#undef width
#define texels_wide_high d15
#define texels_wide q7
+.align 3
setup_sprite_flush_blocks:
- vpush { q1 - q5 }
+ push { r0 - r3, EXTRA_UNSAVED_REGS r12, lr }
+ add block, r0, #psx_gpu_saved_tmp_offset /* r5 */
+ vstmia block, { q1 - q3 }
+ bl flush_render_block_buffer
+ vldmia block, { q1 - q3 }
+ pop { r0 - r3, EXTRA_UNSAVED_REGS r12, lr }
- stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }
- bl flush_render_block_buffer
- ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }
-
- vpop { q1 - q5 }
-
- add block, psx_gpu, #psx_gpu_blocks_offset
- bx lr
+ add block, psx_gpu, #psx_gpu_blocks_offset
+ bx lr
setup_sprite_update_texture_4bpp_cache:
- stmdb sp!, { r0 - r3, r14 }
+ push { r0 - r4, lr }
bl update_texture_4bpp_cache
- ldmia sp!, { r0 - r3, pc }
+ pop { r0 - r4, pc }
setup_sprite_update_texture_8bpp_cache:
- stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r14 }
+ push { r0 - r4, EXTRA_UNSAVED_REGS lr }
bl update_texture_8bpp_cache
- ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS pc }
+ pop { r0 - r4, EXTRA_UNSAVED_REGS pc }
#define setup_sprite_tiled_initialize_4bpp() \
setup_sprite_setup_left_draw_mask_fb_ptr##x4mode(); \
\
setup_sprite_tile_column_height_##multi_height(edge_mode, edge, tm, x4mode); \
- restore_abi_regs(); \
- ldmia sp!, { r4 - r11, pc } \
+ vpop { q4 - q7 }; \
+ pop { r3 - r11, pc } \
#define setup_sprite_tiled_advance_column() \
add texture_offset_base, texture_offset_base, #0x100; \
\
setup_sprite_tiled_advance_column(); \
setup_sprite_tile_column_height_##multi_height(right_mode, left, tm, x4mode);\
- restore_abi_regs(); \
- ldmia sp!, { r4 - r11, pc } \
+ vpop { q4 - q7 }; \
+ pop { r3 - r11, pc } \
#define setup_sprite_offset_u_adjust() \
.align 4; \
\
function(setup_sprite_##texture_mode##x4mode) \
- stmdb sp!, { r4 - r11, r14 }; \
+ push { r3 - r11, lr }; \
setup_sprite_tiled_initialize_##texture_mode##x4mode(); \
\
- ldr v, [sp, #36]; \
+ ldr v, [sp, #4*(10+0)]; \
and offset_u, u, #0xF; \
\
- ldr width, [sp, #40]; \
+ ldr width, [sp, #4*(10+1)]; \
ldr fb_ptr, [psx_gpu, #psx_gpu_vram_out_ptr_offset]; \
\
- ldr height, [sp, #44]; \
+ ldr height, [sp, #4*(10+2)]; \
add fb_ptr, fb_ptr, y, lsl #11; \
\
- save_abi_regs(); \
+ vpush { q4 - q7 }; \
\
add fb_ptr, fb_ptr, x, lsl #1; \
and offset_v, v, #0xF; \
#define texels_67 r9
function(texture_sprite_blocks_8bpp)
- stmdb sp!, { r4 - r11, r14 }
+ push { r4 - r11, r14 }
movw texel_shift_mask, #(0xFF << 1)
ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]
add block_ptr, block_ptr, #64
bne 0b
+ nop
- ldmia sp!, { r4 - r11, pc }
+ pop { r4 - r11, pc }
#undef width_rounded
setup_sprites_16bpp_flush:
- vpush { d0 - d3 }
-
- stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }
- bl flush_render_block_buffer
- ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }
-
- vpop { d0 - d3 }
+ push { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }
+ add r1, r0, #psx_gpu_saved_tmp_offset
+ vstmia r1, { d0 - d3 }
+ bl flush_render_block_buffer
+ pop { r0 - r3, EXTRA_UNSAVED_REGS r12 }
+ add lr, r0, #psx_gpu_saved_tmp_offset
+ vldmia lr, { d0 - d3 }
add block, psx_gpu, #psx_gpu_blocks_offset
mov num_blocks, block_width
- bx lr
+ pop { pc }
function(setup_sprite_16bpp)
- stmdb sp!, { r4 - r11, r14 }
+ push { r3 - r11, lr }
ldr fb_ptr, [psx_gpu, #psx_gpu_vram_out_ptr_offset]
- ldr v, [sp, #36]
+ ldr v, [sp, #4*(10+0)]
add fb_ptr, fb_ptr, y, lsl #11
- ldr width, [sp, #40]
+ ldr width, [sp, #4*(10+1)]
add fb_ptr, fb_ptr, x, lsl #1
- ldr height, [sp, #44]
+ ldr height, [sp, #4*(10+2)]
and left_offset, u, #0x7
add texture_offset_base, u, u
strh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]
bne 1b
- ldmia sp!, { r4 - r11, pc }
+ pop { r3 - r11, pc }
0:
add num_blocks, num_blocks, block_width
strh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]
bne 0b
+ nop
- ldmia sp!, { r4 - r11, pc }
+ pop { r3 - r11, pc }
// 4x version
#undef draw_mask_fb_ptr
function(setup_sprite_16bpp_4x)
- stmdb sp!, { r4 - r11, r14 }
+ push { r3 - r11, lr }
ldr fb_ptr, [psx_gpu, #psx_gpu_vram_out_ptr_offset]
- ldr v, [sp, #36]
+ ldr v, [sp, #4*(10+0)]
add fb_ptr, fb_ptr, y, lsl #11
- ldr width, [sp, #40]
+ ldr width, [sp, #4*(10+1)]
add fb_ptr, fb_ptr, x, lsl #1
- ldr height, [sp, #44]
+ ldr height, [sp, #4*(10+2)]
and left_offset, u, #0x7
add texture_offset_base, u, u
strh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]
bne 1b
- ldmia sp!, { r4 - r11, pc }
+ pop { r3 - r11, pc }
0:
add num_blocks, num_blocks, block_width
strh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]
bne 0b
+ nop
- ldmia sp!, { r4 - r11, pc }
+ pop { r3 - r11, pc }
#undef width
.align 3
-function(setup_sprite_untextured)
- ldrh r12, [psx_gpu, #psx_gpu_render_state_offset]
- tst r12, #(RENDER_STATE_MASK_EVALUATE | RENDER_FLAGS_MODULATE_TEXELS \
- | RENDER_FLAGS_BLEND)
- ldrbeq r12, [psx_gpu, #psx_gpu_render_mode_offset]
- tsteq r12, #RENDER_INTERLACE_ENABLED
- beq setup_sprite_untextured_simple
-
- stmdb sp!, { r4 - r11, r14 }
+function(setup_sprite_untextured_512)
+ push { r4 - r11, r14 }
- ldr width, [sp, #40]
+ ldr width, [sp, #4*(9+1)]
ldr fb_ptr, [psx_gpu, #psx_gpu_vram_out_ptr_offset]
- ldr height, [sp, #44]
+ ldr height, [sp, #4*(9+2)]
add fb_ptr, fb_ptr, y, lsl #11
add fb_ptr, fb_ptr, x, lsl #1
sub right_width, width, #1
- ldr color, [sp, #48]
+ ldr color, [sp, #4*(9+3)]
and right_width, #7
add block_width, width, #7
strh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]
bgt setup_sprite_untextured_height_loop
- ldmia sp!, { r4 - r11, pc }
+ pop { r4 - r11, pc }
#define texel_block_expanded_cd q3
function(update_texture_4bpp_cache)
- stmdb sp!, { r4 - r11, r14 }
+ push { r3 - r11, r14 }
vpush { q0 - q3 }
ldrb current_texture_page, [psx_gpu, #psx_gpu_current_texture_page_offset]
bne 0b
vpop { q0 - q3 }
- ldmia sp!, { r4 - r11, pc }
+ pop { r3 - r11, pc }
#undef current_texture_page
function(update_texture_8bpp_cache_slice)
stmdb sp!, { r4 - r11, r14 }
- vpush { q0 - q3 }
ldrb current_texture_page, [psx_gpu, #psx_gpu_current_texture_page_offset]
ldr vram_ptr_a, [psx_gpu, #psx_gpu_vram_ptr_offset]
bne 0b
- vpop { q0 - q3 }
ldmia sp!, { r4 - r11, pc }
mov r14, r2
0:
+ pld [r1, #1024*2]
vld1.u16 { q0 }, [r1, :128]!
vld1.u16 { q2 }, [r1, :128]!
vmov q1, q0