.syntax unified
.text
+#if 0
+#define save_abi_regs() \
+ vpush {q4-q7}
+#define restore_abi_regs() \
+ vpop {q4-q7}
+#else
+#define save_abi_regs()
+#define restore_abi_regs()
+#endif
+
#define psx_gpu r0
#define v_a r1
#define v_b r2
.align 4
-#ifndef __MACH__
+#include "arm_features.h"
-#define function(name) \
- .global name; \
- .type name, %function; \
- name: \
+#define function(name) FUNCTION(name):
+
+#ifndef TEXRELS_FORBIDDEN
#define JT_OP_REL(table_label, index_reg, temp)
#define JT_OP(x...) x
#define JTE(start, target) target
-#define EXTRA_UNSAVED_REGS
-
#else
-#define function(name) \
- .globl _##name; \
- name: \
- _##name: \
-
#define JT_OP_REL(table_label, index_reg, temp) \
adr temp, table_label; \
ldr temp, [temp, index_reg, lsl #2]; \
#define JT_OP(x...)
#define JTE(start, target) (target - start)
-// r7 is preserved, but add it for EABI alignment..
-#define EXTRA_UNSAVED_REGS r7, r9,
+#endif
+#ifdef __MACH__
#define flush_render_block_buffer _flush_render_block_buffer
#define setup_sprite_untextured_simple _setup_sprite_untextured_simple
#define update_texture_8bpp_cache _update_texture_8bpp_cache
-
#endif
@ r0: psx_gpu
@ r12 = psx_gpu->triangle_area
ldr r12, [psx_gpu, #psx_gpu_triangle_area_offset]
stmdb sp!, { r4 - r11, lr }
+ save_abi_regs()
@ load exponent of 62 into upper half of double
movw r4, #0
sub r14, r14, #(62 - 12) @ r14 = shift - (62 - FIXED_BITS)
vshll.u16 uvrg_base, uvrg0, #16 @ uvrg_base = uvrg0 << 16
- vdup.u32 r_shift, r14 @ r_shift = { shift, shift, shift, shift }
-
+ vdup.u32 r_shift, r14 @ r_shift = { shift, shift*, shift, shift* }
+ @ * - vshl.u64: ignored by hw
vadd.u32 uvrg_base, uvrgb_phase
vabs.s32 ga_uvrg_x, ga_uvrg_x @ ga_uvrg_x = abs(ga_uvrg_x)
stmia store_b, { g_bx0, g_bx, g_bx2, g_bx3, b_base, g_by }
+ restore_abi_regs()
ldmia sp!, { r4 - r11, pc }
#define setup_spans_prologue() \
stmdb sp!, { r4 - r11, lr }; \
+ save_abi_regs(); \
\
ldrsh x_a, [v_a, #8]; \
ldrsh x_b, [v_b, #8]; \
#define setup_spans_epilogue() \
+ restore_abi_regs(); \
ldmia sp!, { r4 - r11, pc } \
bxeq lr; \
\
stmdb sp!, { r4 - r11, r14 }; \
+ save_abi_regs(); \
vshl.u32 uvrg_dx4, uvrg_dx, #2; \
\
ldr b_dx, [psx_gpu, #psx_gpu_b_dx_offset]; \
strh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \
bne 0b; \
\
+ restore_abi_regs(); \
ldmia sp!, { r4 - r11, pc }; \
\
2: \
vpush { texture_mask }; \
vpush { uvrg_dx4 }; \
\
- stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \
+ stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12 }; /* r14=num_blocks */ \
bl flush_render_block_buffer; \
- ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \
+ ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12 }; \
\
vpop { uvrg_dx4 }; \
vpop { texture_mask }; \
bxeq lr; \
\
stmdb sp!, { r4 - r11, r14 }; \
+ save_abi_regs(); \
vshl.u32 uvrg_dx4, uvrg_dx, #2; \
\
vshl.u32 uvrg_dx8, uvrg_dx, #3; \
strh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \
bne 0b; \
\
+ restore_abi_regs(); \
ldmia sp!, { r4 - r11, pc }; \
\
2: \
vpush { texture_mask }; \
vpush { uvrg_dx4 }; \
\
- stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \
+ stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12 }; /* r14=num_blocks */ \
bl flush_render_block_buffer; \
- ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \
+ ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12 }; \
\
vpop { uvrg_dx4 }; \
vpop { texture_mask }; \
bxeq lr
stmdb sp!, { r4 - r11, r14 }
+ save_abi_regs()
vld1.u32 { test_mask }, [psx_gpu, :128]
ldr color, [psx_gpu, #psx_gpu_triangle_color_offset]
strh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]
bne 0b
+ restore_abi_regs()
ldmia sp!, { r4 - r11, pc }
2:
bxeq lr; \
\
stmdb sp!, { r4 - r11, r14 }; \
+ save_abi_regs(); \
vshl.u32 rg_dx4, rg_dx, #2; \
\
ldr b_dx, [psx_gpu, #psx_gpu_b_dx_offset]; \
strh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \
bne 0b; \
\
+ restore_abi_regs(); \
ldmia sp!, { r4 - r11, pc }; \
\
2: \
bxeq lr; \
\
stmdb sp!, { r4 - r11, r14 }; \
+ save_abi_regs(); \
vshl.u32 rg_dx4, rg_dx, #2; \
\
ldr b_dx, [psx_gpu, #psx_gpu_b_dx_offset]; \
\
bne 0b; \
\
+ restore_abi_regs(); \
ldmia sp!, { r4 - r11, pc } \
setup_blocks_shaded_untextured_direct_builder(undithered)
.align 3; \
\
function(shade_blocks_##shading##_textured_modulated_##dithering##_##target) \
+ save_abi_regs(); \
shade_blocks_textured_modulated_prologue_##shading(dithering, target); \
stmdb sp!, { r4 - r5, lr }; \
ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \
shade_blocks_textured_modulated_store_draw_mask_##target(28); \
shade_blocks_textured_modulated_store_pixels_##target(); \
\
- ldmia sp!, { r4 - r5, pc } \
+ ldmia sp!, { r4 - r5, lr }; \
+ restore_abi_regs(); \
+ bx lr \
shade_blocks_textured_modulated_builder(shaded, dithered, direct);
.align 3
function(shade_blocks_textured_unmodulated_indirect)
- str r14, [sp, #-4]
+ stmdb sp!, { r4, r14 }
+ save_abi_regs()
add draw_mask_bits_ptr, psx_gpu, #(psx_gpu_blocks_offset + 40)
ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]
vorr.u16 draw_mask_combined, draw_mask, zero_mask
vst1.u32 { draw_mask_combined }, [draw_mask_store_ptr, :128], c_64
- ldr pc, [sp, #-4]
+ restore_abi_regs()
+ ldmia sp!, { r4, pc }
.align 3
function(shade_blocks_textured_unmodulated_direct)
stmdb sp!, { r4, r14 }
+ save_abi_regs()
add draw_mask_bits_ptr, psx_gpu, #(psx_gpu_blocks_offset + 40)
ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]
vst1.u16 { fb_pixels_next }, [fb_ptr_next]
+ restore_abi_regs()
ldmia sp!, { r4, pc }
4:
function(shade_blocks_unshaded_untextured_direct)
stmdb sp!, { r4, r14 }
+ save_abi_regs()
add draw_mask_ptr, psx_gpu, #psx_gpu_blocks_offset
ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]
vbif.u16 fb_pixels_next, pixels, draw_mask
vst1.u16 { fb_pixels_next }, [fb_ptr_next]
+ restore_abi_regs()
ldmia sp!, { r4, pc }
4:
\
function(blend_blocks_##texturing##_average_##mask_evaluate) \
stmdb sp!, { r4, r14 }; \
+ save_abi_regs(); \
add mask_msb_ptr, psx_gpu, #psx_gpu_mask_msb_offset; \
ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \
\
vbif.u16 fb_pixels_next, blend_pixels, draw_mask_next; \
vst1.u16 { fb_pixels_next }, [fb_ptr_next]; \
\
+ restore_abi_regs(); \
ldmia sp!, { r4, pc }; \
\
2: \
\
function(blend_blocks_textured_add_##mask_evaluate) \
stmdb sp!, { r4, r14 }; \
+ save_abi_regs(); \
add mask_msb_ptr, psx_gpu, #psx_gpu_mask_msb_offset; \
ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \
\
vbit.u16 blend_pixels, fb_pixels, draw_mask; \
vst1.u16 { blend_pixels }, [fb_ptr_next]; \
\
+ restore_abi_regs(); \
ldmia sp!, { r4, pc }; \
\
2: \
\
function(blend_blocks_untextured_add_##mask_evaluate) \
stmdb sp!, { r4, r14 }; \
+ save_abi_regs(); \
add mask_msb_ptr, psx_gpu, #psx_gpu_mask_msb_offset; \
ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \
\
vbit.u16 blend_pixels, fb_pixels, draw_mask; \
vst1.u16 { blend_pixels }, [fb_ptr_next]; \
\
+ restore_abi_regs(); \
ldmia sp!, { r4, pc }; \
\
2: \
#define blend_blocks_subtract_combine_textured() \
vbif.u16 blend_pixels, pixels, blend_mask \
-#define blend_blocks_subtract_set_stb_textured() \
+#define blend_blocks_subtract_set_stp_textured() \
vorr.u16 blend_pixels, #0x8000 \
#define blend_blocks_subtract_msb_mask_textured() \
#define blend_blocks_subtract_combine_untextured() \
-#define blend_blocks_subtract_set_stb_untextured() \
+#define blend_blocks_subtract_set_stp_untextured() \
vorr.u16 blend_pixels, blend_pixels, msb_mask \
#define blend_blocks_subtract_msb_mask_untextured() \
\
function(blend_blocks_##texturing##_subtract_##mask_evaluate) \
stmdb sp!, { r4, r14 }; \
+ save_abi_regs(); \
add mask_msb_ptr, psx_gpu, #psx_gpu_mask_msb_offset; \
ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \
\
vld1.u32 { pixels_next }, [pixel_ptr, :128], c_64; \
vorr.u16 blend_pixels, fb_pixels_rb, fb_pixels_g; \
vand.u16 pixels_rb, pixels_next, d128_0x7C1F; \
- blend_blocks_subtract_set_stb_##texturing(); \
+ blend_blocks_subtract_set_stp_##texturing(); \
vand.u16 pixels_g, pixels_next, d128_0x03E0; \
blend_blocks_subtract_combine_##texturing(); \
blend_blocks_subtract_set_blend_mask_##texturing(); \
\
blend_blocks_subtract_msb_mask_##texturing(); \
vorr.u16 blend_pixels, fb_pixels_rb, fb_pixels_g; \
- blend_blocks_subtract_set_stb_##texturing(); \
+ blend_blocks_subtract_set_stp_##texturing(); \
blend_blocks_subtract_combine_##texturing(); \
vbit.u16 blend_pixels, fb_pixels, draw_mask; \
vst1.u16 { blend_pixels }, [fb_ptr_next]; \
\
+ restore_abi_regs(); \
ldmia sp!, { r4, pc }; \
\
2: \
\
function(blend_blocks_textured_add_fourth_##mask_evaluate) \
stmdb sp!, { r4, r14 }; \
+ save_abi_regs(); \
add mask_msb_ptr, psx_gpu, #psx_gpu_mask_msb_offset; \
ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \
\
ldr fb_ptr_next, [pixel_ptr, #28]; \
\
vorr.u16 blend_pixels, fb_pixels_rb, fb_pixels_g; \
+ vorr.u16 blend_pixels, #0x8000; /* stp */ \
vbif.u16 blend_pixels, pixels, blend_mask; \
\
vld1.u32 { pixels }, [pixel_ptr, :128], c_64; \
\
1: \
vorr.u16 blend_pixels, fb_pixels_rb, fb_pixels_g; \
- vorr.u16 blend_pixels, blend_pixels, msb_mask; \
+ vorr.u16 blend_pixels, #0x8000; /* stp */ \
vbif.u16 blend_pixels, pixels, blend_mask; \
+ vorr.u16 blend_pixels, blend_pixels, msb_mask; \
vbit.u16 blend_pixels, fb_pixels, draw_mask; \
vst1.u16 { blend_pixels }, [fb_ptr_next]; \
\
+ restore_abi_regs(); \
ldmia sp!, { r4, pc }; \
\
2: \
\
function(blend_blocks_untextured_add_fourth_##mask_evaluate) \
stmdb sp!, { r4, r14 }; \
+ save_abi_regs(); \
add mask_msb_ptr, psx_gpu, #psx_gpu_mask_msb_offset; \
ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \
\
vbit.u16 blend_pixels, fb_pixels, draw_mask; \
vst1.u16 { blend_pixels }, [fb_ptr_next]; \
\
+ restore_abi_regs(); \
ldmia sp!, { r4, pc }; \
\
2: \
function(blend_blocks_textured_unblended_on)
stmdb sp!, { r4, r14 }
+ save_abi_regs()
add mask_msb_ptr, psx_gpu, #psx_gpu_mask_msb_offset
ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]
vbif.u16 fb_pixels, pixels, draw_mask
vst1.u16 { fb_pixels }, [fb_ptr]
+ restore_abi_regs()
ldmia sp!, { r4, pc }
setup_sprite_setup_left_draw_mask_fb_ptr##x4mode(); \
\
setup_sprite_tile_column_height_##multi_height(edge_mode, edge, tm, x4mode); \
+ restore_abi_regs(); \
ldmia sp!, { r4 - r11, pc } \
#define setup_sprite_tiled_advance_column() \
\
setup_sprite_tiled_advance_column(); \
setup_sprite_tile_column_height_##multi_height(right_mode, left, tm, x4mode);\
+ restore_abi_regs(); \
ldmia sp!, { r4 - r11, pc } \
ldr height, [sp, #44]; \
add fb_ptr, fb_ptr, y, lsl #11; \
\
+ save_abi_regs(); \
+ \
add fb_ptr, fb_ptr, x, lsl #1; \
and offset_v, v, #0xF; \
\
#define texel_block_expanded_b q2
#define texel_block_expanded_ab q2
#define texel_block_expanded_c q3
-#define texel_block_expanded_d q4
+#define texel_block_expanded_d q0
#define texel_block_expanded_cd q3
function(update_texture_4bpp_cache)