+#define setup_sprite_offset_u_adjust() \
+
+#define setup_sprite_get_left_block_mask() \
+ and left_block_mask, left_block_mask, #0xFF \
+
+#define setup_sprite_compare_left_block_mask() \
+ cmp left_block_mask, #0xFF \
+
+#define setup_sprite_get_right_block_mask() \
+ uxtb right_block_mask, right_block_mask, ror #8 \
+
+#define setup_sprite_compare_right_block_mask() \
+ cmp right_block_mask, #0xFF \
+
+
+
+/* 4x stuff */
+#define fb_ptr2 column_data
+
+#define setup_sprite_offset_u_adjust_4x() \
+ sub fb_ptr, fb_ptr, offset_u, lsl #1; \
+ lsl offset_u_right, #1; \
+ lsl offset_u, #1; \
+ add offset_u_right, #1 \
+
+#define setup_sprite_get_left_block_mask_4x() \
+ sxth left_block_mask, left_block_mask \
+
+#define setup_sprite_compare_left_block_mask_4x() \
+ cmp left_block_mask, #0xFFFFFFFF \
+
+#define setup_sprite_get_right_block_mask_4x() \
+ sxth right_block_mask, right_block_mask, ror #16 \
+
+#define setup_sprite_compare_right_block_mask_4x() \
+ cmp right_block_mask, #0xFFFFFFFF \
+
+
+#define widen_texels_16bpp(texels_) \
+ vmov texels_wide_low, texels_; \
+ vmov texels_wide_high, texels_; \
+ vzip.16 texels_wide_low, texels_wide_high \
+
+#define widen_texels_8bpp(texels_) \
+ vmov texels_wide_low, texels_; \
+ vmov texels_wide_high, texels_; \
+ vzip.8 texels_wide_low, texels_wide_high \
+
+#define write_block_16bpp(texels_, block_, draw_mask_fb_ptr_, fb_ptr_) \
+ vst1.u32 { texels_ }, [block_, :128]; \
+ add block_, block_, #40; \
+ \
+ vmov.u32 draw_mask_fb_ptr_[1], fb_ptr_; \
+ vst1.u32 { draw_mask_fb_ptr_ }, [block_, :64]; \
+ add block_, block_, #24 \
+
+/* assumes 16-byte offset already added to block_ */
+#define write_block_8bpp(texels_, block_, draw_mask_fb_ptr_, fb_ptr_) \
+ vst1.u32 { texels_ }, [block_, :64]; \
+ add block_, block_, #24; \
+ \
+ vmov.u32 draw_mask_fb_ptr_[1], fb_ptr_; \
+ vst1.u32 { draw_mask_fb_ptr_ }, [block_, :64]; \
+ add block_, block_, #40 \
+
+#define do_texture_block_16bpp_4x(fb_ptr_tmp, draw_mask_fb_ptr_a_, \
+ draw_mask_fb_ptr_b_) \
+ widen_texels_16bpp(texels_low); \
+ add fb_ptr_tmp, fb_ptr, #1024*2; \
+ \
+ write_block_16bpp(texels_wide, block, draw_mask_fb_ptr_a_, fb_ptr); \
+ \
+ write_block_16bpp(texels_wide, block, draw_mask_fb_ptr_a_, fb_ptr_tmp); \
+ widen_texels_16bpp(texels_high); \
+ \
+ add fb_ptr_tmp, fb_ptr, #8*2; \
+ write_block_16bpp(texels_wide, block, draw_mask_fb_ptr_b_, fb_ptr_tmp); \
+ \
+ add fb_ptr_tmp, fb_ptr_tmp, #1024*2; \
+ write_block_16bpp(texels_wide, block, draw_mask_fb_ptr_b_, fb_ptr_tmp) \
+
+#define do_texture_block_8bpp_4x(fb_ptr_tmp, draw_mask_fb_ptr_a_, \
+ draw_mask_fb_ptr_b_) \
+ widen_texels_8bpp(texels); \
+ add fb_ptr_tmp, fb_ptr, #1024*2; \
+ \
+ write_block_8bpp(texels_wide_low, block, draw_mask_fb_ptr_a_, fb_ptr); \
+ write_block_8bpp(texels_wide_low, block, draw_mask_fb_ptr_a_, fb_ptr_tmp); \
+ \
+ add fb_ptr_tmp, fb_ptr, #8*2; \
+ write_block_8bpp(texels_wide_high, block, draw_mask_fb_ptr_b_, fb_ptr_tmp); \
+ \
+ add fb_ptr_tmp, fb_ptr_tmp, #1024*2; \
+ write_block_8bpp(texels_wide_high, block, draw_mask_fb_ptr_b_, fb_ptr_tmp) \
+
+
+#define setup_sprite_tiled_initialize_4bpp_4x() \
+ ldr clut_ptr, [psx_gpu, #psx_gpu_clut_ptr_offset]; \
+ vld1.u32 { clut_a, clut_b }, [clut_ptr, :128]; \
+ \
+ vuzp.u8 clut_a, clut_b \
+
+#define setup_sprite_tiled_initialize_8bpp_4x() \
+
+
+#define setup_sprite_block_count_single_4x() \
+ sub_tile_height, lsl #2 \
+
+#define setup_sprite_block_count_double_4x() \
+ sub_tile_height, lsl #(1+2) \
+
+#define setup_sprite_tile_full_4bpp_4x(edge) \
+ setup_sprite_tile_add_blocks(double_4x); \
+ str column_data, [sp, #-8]!; /* fb_ptr2 */ \
+ \
+ 4: \
+ and texture_block_ptr, texture_offset, texture_mask; \
+ pld [fb_ptr]; \
+ \
+ add texture_block_ptr, texture_page_ptr, texture_block_ptr; \
+ vld1.u32 { texels }, [texture_block_ptr, :64]; \
+ \
+ add texture_block_ptr, texture_offset, #8; \
+ vtbl.8 texels_low, { clut_low_a, clut_low_b }, texels; \
+ \
+ and texture_block_ptr, texture_block_ptr, texture_mask; \
+ vtbl.8 texels_high, { clut_high_a, clut_high_b }, texels; \
+ \
+ vzip.8 texels_low, texels_high; \
+ do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_left_a, \
+ draw_mask_fb_ptr_left_b); \
+ \
+ add texture_block_ptr, texture_page_ptr, texture_block_ptr; \
+ pld [fb_ptr, #2048]; \
+ \
+ vld1.u32 { texels }, [texture_block_ptr, :64]; \
+ add fb_ptr, fb_ptr, #16*2; \
+ \
+ vtbl.8 texels_low, { clut_low_a, clut_low_b }, texels; \
+ vtbl.8 texels_high, { clut_high_a, clut_high_b }, texels; \
+ \
+ vzip.8 texels_low, texels_high; \
+ do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_right_a, \
+ draw_mask_fb_ptr_right_b); \
+ \
+ add texture_offset, texture_offset, #0x10; \
+ add fb_ptr, fb_ptr, #(2048 - 16) * 2; \
+ \
+ subs sub_tile_height, sub_tile_height, #1; \
+ bne 4b; \
+ \
+ ldr column_data, [sp], #8; /* fb_ptr2 */ \
+ add texture_offset, texture_offset, #0xF00; \
+ strh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset] \
+
+
+#define setup_sprite_tile_half_4bpp_4x(edge) \
+ setup_sprite_tile_add_blocks(single_4x); \
+ str column_data, [sp, #-8]!; /* fb_ptr2 */ \
+ \
+ 4: \
+ and texture_block_ptr, texture_offset, texture_mask; \
+ pld [fb_ptr]; \
+ \
+ add texture_block_ptr, texture_page_ptr, texture_block_ptr; \
+ vld1.u32 { texels }, [texture_block_ptr, :64]; \
+ \
+ add texture_block_ptr, texture_page_ptr, texture_block_ptr; \
+ vtbl.8 texels_low, { clut_low_a, clut_low_b }, texels; \
+ \
+ vtbl.8 texels_high, { clut_high_a, clut_high_b }, texels; \
+ add texture_offset, texture_offset, #0x10; \
+ \
+ vzip.8 texels_low, texels_high; \
+ do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_##edge##_a, \
+ draw_mask_fb_ptr_##edge##_b); \
+ \
+ pld [fb_ptr, #2048]; \
+ add fb_ptr, fb_ptr, #2048 * 2; \
+ \
+ subs sub_tile_height, sub_tile_height, #1; \
+ bne 4b; \
+ \
+ ldr column_data, [sp], #8; /* fb_ptr2 */ \
+ add texture_offset, texture_offset, #0xF00; \
+ strh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset] \
+
+
+#define setup_sprite_tile_full_8bpp_4x(edge) \
+ setup_sprite_tile_add_blocks(double_4x); \
+ add block, block, #16; \
+ str column_data, [sp, #-8]!; /* fb_ptr2 */ \
+ \
+ 4: \
+ and texture_block_ptr, texture_offset, texture_mask; \
+ pld [fb_ptr]; \
+ \
+ add texture_block_ptr, texture_page_ptr, texture_block_ptr; \
+ vld1.u32 { texels }, [texture_block_ptr, :64]; \
+ \
+ add texture_block_ptr, texture_offset, #8; \
+ do_texture_block_8bpp_4x(fb_ptr2, draw_mask_fb_ptr_left_a, \
+ draw_mask_fb_ptr_left_b); \
+ \
+ pld [fb_ptr, #2048]; \
+ and texture_block_ptr, texture_block_ptr, texture_mask; \
+ \
+ add fb_ptr, fb_ptr, #16*2; \
+ add texture_block_ptr, texture_page_ptr, texture_block_ptr; \
+ \
+ vld1.u32 { texels }, [texture_block_ptr, :64]; \
+ \
+ do_texture_block_8bpp_4x(fb_ptr2, draw_mask_fb_ptr_right_a, \
+ draw_mask_fb_ptr_right_b); \
+ \
+ add texture_offset, texture_offset, #0x10; \
+ add fb_ptr, fb_ptr, #(2048 - 16) * 2; \
+ \
+ subs sub_tile_height, sub_tile_height, #1; \
+ bne 4b; \
+ \
+ sub block, block, #16; \
+ ldr column_data, [sp], #8; /* fb_ptr2 */ \
+ add texture_offset, texture_offset, #0xF00; \
+ strh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset] \
+
+
+#define setup_sprite_tile_half_8bpp_4x(edge) \
+ setup_sprite_tile_add_blocks(single_4x); \
+ add block, block, #16; \
+ str column_data, [sp, #-8]!; /* fb_ptr2 */ \
+ \
+ 4: \
+ and texture_block_ptr, texture_offset, texture_mask; \
+ pld [fb_ptr]; \
+ \
+ add texture_block_ptr, texture_page_ptr, texture_block_ptr; \
+ vld1.u32 { texels }, [texture_block_ptr, :64]; \
+ \
+ pld [fb_ptr, #2048]; \
+ do_texture_block_8bpp_4x(fb_ptr2, draw_mask_fb_ptr_##edge##_a, \
+ draw_mask_fb_ptr_##edge##_b); \
+ \
+ add texture_offset, texture_offset, #0x10; \
+ add fb_ptr, fb_ptr, #2048 * 2; \
+ \
+ subs sub_tile_height, sub_tile_height, #1; \
+ bne 4b; \
+ \
+ sub block, block, #16; \
+ ldr column_data, [sp], #8; /* fb_ptr2 */ \
+ add texture_offset, texture_offset, #0xF00; \
+ strh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset] \
+
+
+#define setup_sprite_tile_column_edge_pre_adjust_half_right_4x() \
+ add texture_offset, texture_offset_base, #8; \
+ add fb_ptr, fb_ptr, #16 * 2 \
+
+#define setup_sprite_tile_column_edge_pre_adjust_half_left_4x() \
+ mov texture_offset, texture_offset_base \
+
+#define setup_sprite_tile_column_edge_pre_adjust_half_4x(edge) \
+ setup_sprite_tile_column_edge_pre_adjust_half_##edge##_4x() \
+
+#define setup_sprite_tile_column_edge_pre_adjust_full_4x(edge) \
+ mov texture_offset, texture_offset_base \
+
+#define setup_sprite_tile_column_edge_post_adjust_half_right_4x() \
+ sub fb_ptr, fb_ptr, #16 * 2 \
+
+#define setup_sprite_tile_column_edge_post_adjust_half_left_4x() \
+
+#define setup_sprite_tile_column_edge_post_adjust_half_4x(edge) \
+ setup_sprite_tile_column_edge_post_adjust_half_##edge##_4x() \
+
+#define setup_sprite_tile_column_edge_post_adjust_full_4x(edge) \
+
+
+#define setup_sprite_setup_left_draw_mask_fb_ptr_4x() \
+ vdup.u8 draw_mask_fb_ptr_left_a, block_masks[0]; \
+ vdup.u8 draw_mask_fb_ptr_left_b, block_masks[1]; \
+ vdup.u8 draw_mask_fb_ptr_right_a, block_masks[2]; \
+ vdup.u8 draw_mask_fb_ptr_right_b, block_masks[3] \
+
+#define setup_sprite_setup_left_draw_mask_fb_ptr_advance_column_4x() \
+ mov fb_ptr_advance_column, #32 * 2; \
+ vdup.u8 draw_mask_fb_ptr_left_a, block_masks[0]; \
+ vdup.u8 draw_mask_fb_ptr_left_b, block_masks[1]; \
+ sub fb_ptr_advance_column, fb_ptr_advance_column, height, lsl #11 + 1; \
+ vdup.u8 draw_mask_fb_ptr_right_a, block_masks[2]; \
+ vdup.u8 draw_mask_fb_ptr_right_b, block_masks[3] \
+
+#define setup_sprite_setup_right_draw_mask_fb_ptr_4x() \
+ vdup.u8 draw_mask_fb_ptr_left_a, block_masks[4]; \
+ vdup.u8 draw_mask_fb_ptr_left_b, block_masks[5]; \
+ vdup.u8 draw_mask_fb_ptr_right_a, block_masks[6]; \
+ vdup.u8 draw_mask_fb_ptr_right_b, block_masks[7] \
+
+