From 59d15d23d97d4347d8046057013f8979db0914f0 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 21 Oct 2012 02:42:03 +0300 Subject: [PATCH] psx_gpu: consolidate C code, implement exnhancement asm --- plugins/gpu_neon/psx_gpu/psx_gpu.c | 410 ++++++++++-- plugins/gpu_neon/psx_gpu/psx_gpu_4x.c | 511 +------------- plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S | 698 ++++++++++++++++---- 3 files changed, 947 insertions(+), 672 deletions(-) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index 2d552aac..3de2ecee 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -3185,14 +3185,17 @@ void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu) #endif -#define setup_sprite_tiled_initialize_4bpp() \ +#define setup_sprite_tiled_initialize_4bpp_clut() \ u16 *clut_ptr = psx_gpu->clut_ptr; \ vec_8x16u clut_a, clut_b; \ vec_16x8u clut_low, clut_high; \ \ load_8x16b(clut_a, clut_ptr); \ load_8x16b(clut_b, clut_ptr + 8); \ - unzip_16x8b(clut_low, clut_high, clut_a, clut_b); \ + unzip_16x8b(clut_low, clut_high, clut_a, clut_b) \ + +#define setup_sprite_tiled_initialize_4bpp() \ + setup_sprite_tiled_initialize_4bpp_clut(); \ \ if(psx_gpu->current_texture_mask & psx_gpu->dirty_textures_4bpp_mask) \ update_texture_4bpp_cache(psx_gpu) \ @@ -3209,10 +3212,6 @@ void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu) load_64b(texels, texture_block_ptr) \ -#define setup_sprite_tile_setup_block_yes(side, offset, texture_mode) \ - -#define setup_sprite_tile_setup_block_no(side, offset, texture_mode) \ - #define setup_sprite_tile_add_blocks(tile_num_blocks) \ num_blocks += tile_num_blocks; \ sprite_blocks += tile_num_blocks; \ @@ -3358,34 +3357,36 @@ void texture_sprite_blocks_8bpp(psx_gpu_struct *psx_gpu) #define setup_sprite_tile_column_edge_post_adjust_full(edge) \ -#define setup_sprite_tile_column_height_single(edge_mode, edge, texture_mode) \ +#define setup_sprite_tile_column_height_single(edge_mode, edge, texture_mode, \ + x4mode) \ do \ { \ sub_tile_height = column_data; \ - setup_sprite_tile_column_edge_pre_adjust_##edge_mode(edge); \ - setup_sprite_tile_##edge_mode##_##texture_mode(edge); \ - setup_sprite_tile_column_edge_post_adjust_##edge_mode(edge); \ + setup_sprite_tile_column_edge_pre_adjust_##edge_mode##x4mode(edge); \ + setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \ + setup_sprite_tile_column_edge_post_adjust_##edge_mode##x4mode(edge); \ } while(0) \ -#define setup_sprite_tile_column_height_multi(edge_mode, edge, texture_mode) \ +#define setup_sprite_tile_column_height_multi(edge_mode, edge, texture_mode, \ + x4mode) \ do \ { \ u32 tiles_remaining = column_data >> 16; \ sub_tile_height = column_data & 0xFF; \ - setup_sprite_tile_column_edge_pre_adjust_##edge_mode(edge); \ - setup_sprite_tile_##edge_mode##_##texture_mode(edge); \ + setup_sprite_tile_column_edge_pre_adjust_##edge_mode##x4mode(edge); \ + setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \ tiles_remaining -= 1; \ \ while(tiles_remaining) \ { \ sub_tile_height = 16; \ - setup_sprite_tile_##edge_mode##_##texture_mode(edge); \ + setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \ tiles_remaining--; \ } \ \ sub_tile_height = (column_data >> 8) & 0xFF; \ - setup_sprite_tile_##edge_mode##_##texture_mode(edge); \ - setup_sprite_tile_column_edge_post_adjust_##edge_mode(edge); \ + setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \ + setup_sprite_tile_column_edge_post_adjust_##edge_mode##x4mode(edge); \ } while(0) \ @@ -3398,15 +3399,18 @@ do \ column_data |= (tile_height - 1) << 16 \ +#define RIGHT_MASK_BIT_SHIFT 8 +#define RIGHT_MASK_BIT_SHIFT_4x 16 + #define setup_sprite_tile_column_width_single(texture_mode, multi_height, \ - edge_mode, edge) \ + edge_mode, edge, x4mode) \ { \ setup_sprite_column_data_##multi_height(); \ left_mask_bits = left_block_mask | right_block_mask; \ - right_mask_bits = left_mask_bits >> 8; \ + right_mask_bits = left_mask_bits >> RIGHT_MASK_BIT_SHIFT##x4mode; \ \ setup_sprite_tile_column_height_##multi_height(edge_mode, edge, \ - texture_mode); \ + texture_mode, x4mode); \ } \ #define setup_sprite_tiled_advance_column() \ @@ -3414,18 +3418,22 @@ do \ if((texture_offset_base & 0xF00) == 0) \ texture_offset_base -= (0x100 + 0xF00) \ +#define FB_PTR_MULTIPLIER 1 +#define FB_PTR_MULTIPLIER_4x 2 + #define setup_sprite_tile_column_width_multi(texture_mode, multi_height, \ - left_mode, right_mode) \ + left_mode, right_mode, x4mode) \ { \ setup_sprite_column_data_##multi_height(); \ - s32 fb_ptr_advance_column = 16 - (1024 * height); \ + s32 fb_ptr_advance_column = (16 - (1024 * height)) \ + * FB_PTR_MULTIPLIER##x4mode; \ \ tile_width -= 2; \ left_mask_bits = left_block_mask; \ - right_mask_bits = left_mask_bits >> 8; \ + right_mask_bits = left_mask_bits >> RIGHT_MASK_BIT_SHIFT##x4mode; \ \ setup_sprite_tile_column_height_##multi_height(left_mode, right, \ - texture_mode); \ + texture_mode, x4mode); \ fb_ptr += fb_ptr_advance_column; \ \ left_mask_bits = 0x00; \ @@ -3434,22 +3442,297 @@ do \ while(tile_width) \ { \ setup_sprite_tiled_advance_column(); \ - setup_sprite_tile_column_height_##multi_height(full, none, texture_mode); \ + setup_sprite_tile_column_height_##multi_height(full, none, \ + texture_mode, x4mode); \ fb_ptr += fb_ptr_advance_column; \ tile_width--; \ } \ \ left_mask_bits = right_block_mask; \ - right_mask_bits = left_mask_bits >> 8; \ + right_mask_bits = left_mask_bits >> RIGHT_MASK_BIT_SHIFT##x4mode; \ \ setup_sprite_tiled_advance_column(); \ setup_sprite_tile_column_height_##multi_height(right_mode, left, \ - texture_mode); \ + texture_mode, x4mode); \ +} \ + + +/* 4x stuff */ +#define setup_sprite_tiled_initialize_4bpp_4x() \ + setup_sprite_tiled_initialize_4bpp_clut() \ + +#define setup_sprite_tiled_initialize_8bpp_4x() \ + + +#define setup_sprite_tile_full_4bpp_4x(edge) \ +{ \ + vec_8x8u texels_low, texels_high; \ + vec_8x16u pixels, pixels_wide; \ + setup_sprite_tile_add_blocks(sub_tile_height * 2 * 4); \ + u32 left_mask_bits_a = left_mask_bits & 0xFF; \ + u32 left_mask_bits_b = left_mask_bits >> 8; \ + u32 right_mask_bits_a = right_mask_bits & 0xFF; \ + u32 right_mask_bits_b = right_mask_bits >> 8; \ + \ + while(sub_tile_height) \ + { \ + setup_sprite_tile_fetch_texel_block_8bpp(0); \ + tbl_16(texels_low, texels, clut_low); \ + tbl_16(texels_high, texels, clut_high); \ + zip_8x16b(pixels, texels_low, texels_high); \ + \ + zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \ + block->texels = pixels_wide; \ + block->draw_mask_bits = left_mask_bits_a; \ + block->fb_ptr = fb_ptr; \ + block++; \ + \ + block->texels = pixels_wide; \ + block->draw_mask_bits = left_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024; \ + block++; \ + \ + zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \ + block->texels = pixels_wide; \ + block->draw_mask_bits = left_mask_bits_b; \ + block->fb_ptr = fb_ptr + 8; \ + block++; \ + \ + block->texels = pixels_wide; \ + block->draw_mask_bits = left_mask_bits_b; \ + block->fb_ptr = fb_ptr + 1024 + 8; \ + block++; \ + \ + setup_sprite_tile_fetch_texel_block_8bpp(8); \ + tbl_16(texels_low, texels, clut_low); \ + tbl_16(texels_high, texels, clut_high); \ + zip_8x16b(pixels, texels_low, texels_high); \ + \ + zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \ + block->texels = pixels_wide; \ + block->draw_mask_bits = right_mask_bits_a; \ + block->fb_ptr = fb_ptr + 16; \ + block++; \ + \ + block->texels = pixels_wide; \ + block->draw_mask_bits = right_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024 + 16; \ + block++; \ + \ + zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \ + block->texels = pixels_wide; \ + block->draw_mask_bits = right_mask_bits_b; \ + block->fb_ptr = fb_ptr + 24; \ + block++; \ + \ + block->texels = pixels_wide; \ + block->draw_mask_bits = right_mask_bits_b; \ + block->fb_ptr = fb_ptr + 1024 + 24; \ + block++; \ + \ + fb_ptr += 2048; \ + texture_offset += 0x10; \ + sub_tile_height--; \ + } \ + texture_offset += 0xF00; \ + psx_gpu->num_blocks = num_blocks; \ } \ +#define setup_sprite_tile_half_4bpp_4x(edge) \ +{ \ + vec_8x8u texels_low, texels_high; \ + vec_8x16u pixels, pixels_wide; \ + setup_sprite_tile_add_blocks(sub_tile_height * 4); \ + u32 edge##_mask_bits_a = edge##_mask_bits & 0xFF; \ + u32 edge##_mask_bits_b = edge##_mask_bits >> 8; \ + \ + while(sub_tile_height) \ + { \ + setup_sprite_tile_fetch_texel_block_8bpp(0); \ + tbl_16(texels_low, texels, clut_low); \ + tbl_16(texels_high, texels, clut_high); \ + zip_8x16b(pixels, texels_low, texels_high); \ + \ + zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \ + block->texels = pixels_wide; \ + block->draw_mask_bits = edge##_mask_bits_a; \ + block->fb_ptr = fb_ptr; \ + block++; \ + \ + block->texels = pixels_wide; \ + block->draw_mask_bits = edge##_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024; \ + block++; \ + \ + zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \ + block->texels = pixels_wide; \ + block->draw_mask_bits = edge##_mask_bits_b; \ + block->fb_ptr = fb_ptr + 8; \ + block++; \ + \ + block->texels = pixels_wide; \ + block->draw_mask_bits = edge##_mask_bits_b; \ + block->fb_ptr = fb_ptr + 1024 + 8; \ + block++; \ + \ + fb_ptr += 2048; \ + texture_offset += 0x10; \ + sub_tile_height--; \ + } \ + texture_offset += 0xF00; \ + psx_gpu->num_blocks = num_blocks; \ +} \ -#define setup_sprite_tiled_builder(texture_mode) \ -void setup_sprite_##texture_mode(psx_gpu_struct *psx_gpu, s32 x, s32 y, \ + +#define setup_sprite_tile_full_8bpp_4x(edge) \ +{ \ + setup_sprite_tile_add_blocks(sub_tile_height * 2 * 4); \ + vec_16x8u texels_wide; \ + u32 left_mask_bits_a = left_mask_bits & 0xFF; \ + u32 left_mask_bits_b = left_mask_bits >> 8; \ + u32 right_mask_bits_a = right_mask_bits & 0xFF; \ + u32 right_mask_bits_b = right_mask_bits >> 8; \ + \ + while(sub_tile_height) \ + { \ + setup_sprite_tile_fetch_texel_block_8bpp(0); \ + zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \ + block->r = texels_wide.low; \ + block->draw_mask_bits = left_mask_bits_a; \ + block->fb_ptr = fb_ptr; \ + block++; \ + \ + block->r = texels_wide.low; \ + block->draw_mask_bits = left_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024; \ + block++; \ + \ + block->r = texels_wide.high; \ + block->draw_mask_bits = left_mask_bits_b; \ + block->fb_ptr = fb_ptr + 8; \ + block++; \ + \ + block->r = texels_wide.high; \ + block->draw_mask_bits = left_mask_bits_b; \ + block->fb_ptr = fb_ptr + 1024 + 8; \ + block++; \ + \ + setup_sprite_tile_fetch_texel_block_8bpp(8); \ + zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \ + block->r = texels_wide.low; \ + block->draw_mask_bits = right_mask_bits_a; \ + block->fb_ptr = fb_ptr + 16; \ + block++; \ + \ + block->r = texels_wide.low; \ + block->draw_mask_bits = right_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024 + 16; \ + block++; \ + \ + block->r = texels_wide.high; \ + block->draw_mask_bits = right_mask_bits_b; \ + block->fb_ptr = fb_ptr + 24; \ + block++; \ + \ + block->r = texels_wide.high; \ + block->draw_mask_bits = right_mask_bits_b; \ + block->fb_ptr = fb_ptr + 24 + 1024; \ + block++; \ + \ + fb_ptr += 2048; \ + texture_offset += 0x10; \ + sub_tile_height--; \ + } \ + texture_offset += 0xF00; \ + psx_gpu->num_blocks = num_blocks; \ +} \ + +#define setup_sprite_tile_half_8bpp_4x(edge) \ +{ \ + setup_sprite_tile_add_blocks(sub_tile_height * 4); \ + vec_16x8u texels_wide; \ + u32 edge##_mask_bits_a = edge##_mask_bits & 0xFF; \ + u32 edge##_mask_bits_b = edge##_mask_bits >> 8; \ + \ + while(sub_tile_height) \ + { \ + setup_sprite_tile_fetch_texel_block_8bpp(0); \ + zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \ + block->r = texels_wide.low; \ + block->draw_mask_bits = edge##_mask_bits_a; \ + block->fb_ptr = fb_ptr; \ + block++; \ + \ + block->r = texels_wide.low; \ + block->draw_mask_bits = edge##_mask_bits_a; \ + block->fb_ptr = fb_ptr + 1024; \ + block++; \ + \ + block->r = texels_wide.high; \ + block->draw_mask_bits = edge##_mask_bits_b; \ + block->fb_ptr = fb_ptr + 8; \ + block++; \ + \ + block->r = texels_wide.high; \ + block->draw_mask_bits = edge##_mask_bits_b; \ + block->fb_ptr = fb_ptr + 8 + 1024; \ + block++; \ + \ + fb_ptr += 2048; \ + texture_offset += 0x10; \ + sub_tile_height--; \ + } \ + texture_offset += 0xF00; \ + psx_gpu->num_blocks = num_blocks; \ +} \ + + +#define setup_sprite_tile_column_edge_pre_adjust_half_right_4x() \ + texture_offset = texture_offset_base + 8; \ + fb_ptr += 16 \ + +#define setup_sprite_tile_column_edge_pre_adjust_half_left_4x() \ + texture_offset = texture_offset_base \ + +#define setup_sprite_tile_column_edge_pre_adjust_half_4x(edge) \ + setup_sprite_tile_column_edge_pre_adjust_half_##edge##_4x() \ + +#define setup_sprite_tile_column_edge_pre_adjust_full_4x(edge) \ + texture_offset = texture_offset_base \ + +#define setup_sprite_tile_column_edge_post_adjust_half_right_4x() \ + fb_ptr -= 16 \ + +#define setup_sprite_tile_column_edge_post_adjust_half_left_4x() \ + +#define setup_sprite_tile_column_edge_post_adjust_half_4x(edge) \ + setup_sprite_tile_column_edge_post_adjust_half_##edge##_4x() \ + +#define setup_sprite_tile_column_edge_post_adjust_full_4x(edge) \ + + +#define setup_sprite_offset_u_adjust() \ + +#define setup_sprite_comapre_left_block_mask() \ + ((left_block_mask & 0xFF) == 0xFF) \ + +#define setup_sprite_comapre_right_block_mask() \ + (((right_block_mask >> 8) & 0xFF) == 0xFF) \ + + +#define setup_sprite_offset_u_adjust_4x() \ + offset_u *= 2; \ + offset_u_right = offset_u_right * 2 + 1 \ + +#define setup_sprite_comapre_left_block_mask_4x() \ + ((left_block_mask & 0xFFFF) == 0xFFFF) \ + +#define setup_sprite_comapre_right_block_mask_4x() \ + (((right_block_mask >> 16) & 0xFFFF) == 0xFFFF) \ + + +#define setup_sprite_tiled_builder(texture_mode, x4mode) \ +void setup_sprite_##texture_mode##x4mode(psx_gpu_struct *psx_gpu, s32 x, s32 y,\ s32 u, s32 v, s32 width, s32 height, u32 color) \ { \ s32 offset_u = u & 0xF; \ @@ -3461,8 +3744,10 @@ void setup_sprite_##texture_mode(psx_gpu_struct *psx_gpu, s32 x, s32 y, \ s32 tile_width = width_rounded / 16; \ u32 offset_u_right = width_rounded & 0xF; \ \ - u32 left_block_mask = ~(0xFFFF << offset_u); \ - u32 right_block_mask = 0xFFFE << offset_u_right; \ + setup_sprite_offset_u_adjust##x4mode(); \ + \ + u32 left_block_mask = ~(0xFFFFFFFF << offset_u); \ + u32 right_block_mask = 0xFFFFFFFE << offset_u_right; \ \ u32 left_mask_bits; \ u32 right_mask_bits; \ @@ -3479,19 +3764,19 @@ void setup_sprite_##texture_mode(psx_gpu_struct *psx_gpu, s32 x, s32 y, \ u32 texture_offset_base = texture_offset; \ u32 control_mask; \ \ - u16 *fb_ptr = psx_gpu->vram_ptr + (y * 1024) + (x - offset_u); \ + u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + (x - offset_u); \ u32 num_blocks = psx_gpu->num_blocks; \ block_struct *block = psx_gpu->blocks + num_blocks; \ \ u16 *texture_block_ptr; \ vec_8x8u texels; \ \ - setup_sprite_tiled_initialize_##texture_mode(); \ + setup_sprite_tiled_initialize_##texture_mode##x4mode(); \ \ control_mask = tile_width == 1; \ control_mask |= (tile_height == 1) << 1; \ - control_mask |= ((left_block_mask & 0xFF) == 0xFF) << 2; \ - control_mask |= (((right_block_mask >> 8) & 0xFF) == 0xFF) << 3; \ + control_mask |= setup_sprite_comapre_left_block_mask##x4mode() << 2; \ + control_mask |= setup_sprite_comapre_right_block_mask##x4mode() << 3; \ \ sprites_##texture_mode++; \ \ @@ -3499,64 +3784,77 @@ void setup_sprite_##texture_mode(psx_gpu_struct *psx_gpu, s32 x, s32 y, \ { \ default: \ case 0x0: \ - setup_sprite_tile_column_width_multi(texture_mode, multi, full, full); \ + setup_sprite_tile_column_width_multi(texture_mode, multi, full, full, \ + x4mode); \ break; \ \ case 0x1: \ - setup_sprite_tile_column_width_single(texture_mode, multi, full, none); \ + setup_sprite_tile_column_width_single(texture_mode, multi, full, none, \ + x4mode); \ break; \ \ case 0x2: \ - setup_sprite_tile_column_width_multi(texture_mode, single, full, full); \ + setup_sprite_tile_column_width_multi(texture_mode, single, full, full, \ + x4mode); \ break; \ \ case 0x3: \ - setup_sprite_tile_column_width_single(texture_mode, single, full, none); \ + setup_sprite_tile_column_width_single(texture_mode, single, full, none, \ + x4mode); \ break; \ \ case 0x4: \ - setup_sprite_tile_column_width_multi(texture_mode, multi, half, full); \ + setup_sprite_tile_column_width_multi(texture_mode, multi, half, full, \ + x4mode); \ break; \ \ case 0x5: \ - setup_sprite_tile_column_width_single(texture_mode, multi, half, right); \ + setup_sprite_tile_column_width_single(texture_mode, multi, half, right, \ + x4mode); \ break; \ \ case 0x6: \ - setup_sprite_tile_column_width_multi(texture_mode, single, half, full); \ + setup_sprite_tile_column_width_multi(texture_mode, single, half, full, \ + x4mode); \ break; \ \ case 0x7: \ - setup_sprite_tile_column_width_single(texture_mode, single, half, right);\ + setup_sprite_tile_column_width_single(texture_mode, single, half, right, \ + x4mode); \ break; \ \ case 0x8: \ - setup_sprite_tile_column_width_multi(texture_mode, multi, full, half); \ + setup_sprite_tile_column_width_multi(texture_mode, multi, full, half, \ + x4mode); \ break; \ \ case 0x9: \ - setup_sprite_tile_column_width_single(texture_mode, multi, half, left); \ + setup_sprite_tile_column_width_single(texture_mode, multi, half, left, \ + x4mode); \ break; \ \ case 0xA: \ - setup_sprite_tile_column_width_multi(texture_mode, single, full, half); \ + setup_sprite_tile_column_width_multi(texture_mode, single, full, half, \ + x4mode); \ break; \ \ case 0xB: \ - setup_sprite_tile_column_width_single(texture_mode, single, half, left); \ + setup_sprite_tile_column_width_single(texture_mode, single, half, left, \ + x4mode); \ break; \ \ case 0xC: \ - setup_sprite_tile_column_width_multi(texture_mode, multi, half, half); \ + setup_sprite_tile_column_width_multi(texture_mode, multi, half, half, \ + x4mode); \ break; \ \ case 0xE: \ - setup_sprite_tile_column_width_multi(texture_mode, single, half, half); \ + setup_sprite_tile_column_width_multi(texture_mode, single, half, half, \ + x4mode); \ break; \ } \ } \ - void setup_sprite_4bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color); void setup_sprite_8bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, @@ -3564,9 +3862,19 @@ void setup_sprite_8bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color); +void setup_sprite_4bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, + s32 width, s32 height, u32 color); +void setup_sprite_8bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, + s32 width, s32 height, u32 color); +void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, + s32 width, s32 height, u32 color); + #ifndef NEON_BUILD -setup_sprite_tiled_builder(4bpp); -setup_sprite_tiled_builder(8bpp); +setup_sprite_tiled_builder(4bpp,); +setup_sprite_tiled_builder(8bpp,); + +setup_sprite_tiled_builder(4bpp,_4x); +setup_sprite_tiled_builder(8bpp,_4x); void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c b/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c index f8afcf1f..19c4a9ef 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c @@ -1,513 +1,4 @@ -#define setup_sprite_tiled_initialize_4bpp_4x() \ - u16 *clut_ptr = psx_gpu->clut_ptr; \ - vec_8x16u clut_a, clut_b; \ - vec_16x8u clut_low, clut_high; \ - \ - load_8x16b(clut_a, clut_ptr); \ - load_8x16b(clut_b, clut_ptr + 8); \ - unzip_16x8b(clut_low, clut_high, clut_a, clut_b) \ - - -#define setup_sprite_tiled_initialize_8bpp_4x() \ - - -#define setup_sprite_tile_fetch_texel_block_8bpp_4x(offset) \ - texture_block_ptr = psx_gpu->texture_page_ptr + \ - ((texture_offset + offset) & texture_mask); \ - \ - load_64b(texels, texture_block_ptr) \ - - -#define setup_sprite_tile_setup_block_yes_4x(side, offset, texture_mode) \ - -#define setup_sprite_tile_setup_block_no_4x(side, offset, texture_mode) \ - -#define setup_sprite_tile_add_blocks_4x(tile_num_blocks) \ - num_blocks += tile_num_blocks * 4; \ - sprite_blocks += tile_num_blocks * 4; \ - \ - if(num_blocks > MAX_BLOCKS) \ - { \ - flush_render_block_buffer(psx_gpu); \ - num_blocks = tile_num_blocks * 4; \ - block = psx_gpu->blocks; \ - } \ - -#define setup_sprite_tile_full_4bpp_4x(edge) \ -{ \ - vec_8x8u texels_low, texels_high; \ - vec_8x16u pixels, pixels_wide; \ - setup_sprite_tile_add_blocks_4x(sub_tile_height * 2); \ - u32 left_mask_bits_a = left_mask_bits & 0xFF; \ - u32 left_mask_bits_b = left_mask_bits >> 8; \ - u32 right_mask_bits_a = right_mask_bits & 0xFF; \ - u32 right_mask_bits_b = right_mask_bits >> 8; \ - \ - while(sub_tile_height) \ - { \ - setup_sprite_tile_fetch_texel_block_8bpp_4x(0); \ - tbl_16(texels_low, texels, clut_low); \ - tbl_16(texels_high, texels, clut_high); \ - zip_8x16b(pixels, texels_low, texels_high); \ - \ - zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \ - block->texels = pixels_wide; \ - block->draw_mask_bits = left_mask_bits_a; \ - block->fb_ptr = fb_ptr; \ - block++; \ - \ - block->texels = pixels_wide; \ - block->draw_mask_bits = left_mask_bits_a; \ - block->fb_ptr = fb_ptr + 1024; \ - block++; \ - \ - zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \ - block->texels = pixels_wide; \ - block->draw_mask_bits = left_mask_bits_b; \ - block->fb_ptr = fb_ptr + 8; \ - block++; \ - \ - block->texels = pixels_wide; \ - block->draw_mask_bits = left_mask_bits_b; \ - block->fb_ptr = fb_ptr + 1024 + 8; \ - block++; \ - \ - setup_sprite_tile_fetch_texel_block_8bpp_4x(8); \ - tbl_16(texels_low, texels, clut_low); \ - tbl_16(texels_high, texels, clut_high); \ - zip_8x16b(pixels, texels_low, texels_high); \ - \ - zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \ - block->texels = pixels_wide; \ - block->draw_mask_bits = right_mask_bits_a; \ - block->fb_ptr = fb_ptr + 16; \ - block++; \ - \ - block->texels = pixels_wide; \ - block->draw_mask_bits = right_mask_bits_a; \ - block->fb_ptr = fb_ptr + 1024 + 16; \ - block++; \ - \ - zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \ - block->texels = pixels_wide; \ - block->draw_mask_bits = right_mask_bits_b; \ - block->fb_ptr = fb_ptr + 24; \ - block++; \ - \ - block->texels = pixels_wide; \ - block->draw_mask_bits = right_mask_bits_b; \ - block->fb_ptr = fb_ptr + 1024 + 24; \ - block++; \ - \ - fb_ptr += 2048; \ - texture_offset += 0x10; \ - sub_tile_height--; \ - } \ - texture_offset += 0xF00; \ - psx_gpu->num_blocks = num_blocks; \ -} \ - -#define setup_sprite_tile_half_4bpp_4x(edge) \ -{ \ - vec_8x8u texels_low, texels_high; \ - vec_8x16u pixels, pixels_wide; \ - setup_sprite_tile_add_blocks_4x(sub_tile_height); \ - u32 edge##_mask_bits_a = edge##_mask_bits & 0xFF; \ - u32 edge##_mask_bits_b = edge##_mask_bits >> 8; \ - \ - while(sub_tile_height) \ - { \ - setup_sprite_tile_fetch_texel_block_8bpp_4x(0); \ - tbl_16(texels_low, texels, clut_low); \ - tbl_16(texels_high, texels, clut_high); \ - zip_8x16b(pixels, texels_low, texels_high); \ - \ - zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.low, pixels.low); \ - block->texels = pixels_wide; \ - block->draw_mask_bits = edge##_mask_bits_a; \ - block->fb_ptr = fb_ptr; \ - block++; \ - \ - block->texels = pixels_wide; \ - block->draw_mask_bits = edge##_mask_bits_a; \ - block->fb_ptr = fb_ptr + 1024; \ - block++; \ - \ - zip_4x32b(vector_cast(vec_4x32u, pixels_wide), pixels.high, pixels.high); \ - block->texels = pixels_wide; \ - block->draw_mask_bits = edge##_mask_bits_b; \ - block->fb_ptr = fb_ptr + 8; \ - block++; \ - \ - block->texels = pixels_wide; \ - block->draw_mask_bits = edge##_mask_bits_b; \ - block->fb_ptr = fb_ptr + 1024 + 8; \ - block++; \ - \ - fb_ptr += 2048; \ - texture_offset += 0x10; \ - sub_tile_height--; \ - } \ - texture_offset += 0xF00; \ - psx_gpu->num_blocks = num_blocks; \ -} \ - - -#define setup_sprite_tile_full_8bpp_4x(edge) \ -{ \ - setup_sprite_tile_add_blocks_4x(sub_tile_height * 2); \ - vec_16x8u texels_wide; \ - u32 left_mask_bits_a = left_mask_bits & 0xFF; \ - u32 left_mask_bits_b = left_mask_bits >> 8; \ - u32 right_mask_bits_a = right_mask_bits & 0xFF; \ - u32 right_mask_bits_b = right_mask_bits >> 8; \ - \ - while(sub_tile_height) \ - { \ - setup_sprite_tile_fetch_texel_block_8bpp_4x(0); \ - zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \ - block->r = texels_wide.low; \ - block->draw_mask_bits = left_mask_bits_a; \ - block->fb_ptr = fb_ptr; \ - block++; \ - \ - block->r = texels_wide.low; \ - block->draw_mask_bits = left_mask_bits_a; \ - block->fb_ptr = fb_ptr + 1024; \ - block++; \ - \ - block->r = texels_wide.high; \ - block->draw_mask_bits = left_mask_bits_b; \ - block->fb_ptr = fb_ptr + 8; \ - block++; \ - \ - block->r = texels_wide.high; \ - block->draw_mask_bits = left_mask_bits_b; \ - block->fb_ptr = fb_ptr + 1024 + 8; \ - block++; \ - \ - setup_sprite_tile_fetch_texel_block_8bpp_4x(8); \ - zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \ - block->r = texels_wide.low; \ - block->draw_mask_bits = right_mask_bits_a; \ - block->fb_ptr = fb_ptr + 16; \ - block++; \ - \ - block->r = texels_wide.low; \ - block->draw_mask_bits = right_mask_bits_a; \ - block->fb_ptr = fb_ptr + 1024 + 16; \ - block++; \ - \ - block->r = texels_wide.high; \ - block->draw_mask_bits = right_mask_bits_b; \ - block->fb_ptr = fb_ptr + 24; \ - block++; \ - \ - block->r = texels_wide.high; \ - block->draw_mask_bits = right_mask_bits_b; \ - block->fb_ptr = fb_ptr + 24 + 1024; \ - block++; \ - \ - fb_ptr += 2048; \ - texture_offset += 0x10; \ - sub_tile_height--; \ - } \ - texture_offset += 0xF00; \ - psx_gpu->num_blocks = num_blocks; \ -} \ - -#define setup_sprite_tile_half_8bpp_4x(edge) \ -{ \ - setup_sprite_tile_add_blocks_4x(sub_tile_height); \ - vec_16x8u texels_wide; \ - u32 edge##_mask_bits_a = edge##_mask_bits & 0xFF; \ - u32 edge##_mask_bits_b = edge##_mask_bits >> 8; \ - \ - while(sub_tile_height) \ - { \ - setup_sprite_tile_fetch_texel_block_8bpp_4x(0); \ - zip_8x16b(vector_cast(vec_8x16u, texels_wide), texels, texels); \ - block->r = texels_wide.low; \ - block->draw_mask_bits = edge##_mask_bits_a; \ - block->fb_ptr = fb_ptr; \ - block++; \ - \ - block->r = texels_wide.low; \ - block->draw_mask_bits = edge##_mask_bits_a; \ - block->fb_ptr = fb_ptr + 1024; \ - block++; \ - \ - block->r = texels_wide.high; \ - block->draw_mask_bits = edge##_mask_bits_b; \ - block->fb_ptr = fb_ptr + 8; \ - block++; \ - \ - block->r = texels_wide.high; \ - block->draw_mask_bits = edge##_mask_bits_b; \ - block->fb_ptr = fb_ptr + 8 + 1024; \ - block++; \ - \ - fb_ptr += 2048; \ - texture_offset += 0x10; \ - sub_tile_height--; \ - } \ - texture_offset += 0xF00; \ - psx_gpu->num_blocks = num_blocks; \ -} \ - - -#define setup_sprite_tile_column_edge_pre_adjust_half_right_4x() \ - texture_offset = texture_offset_base + 8; \ - fb_ptr += 16 \ - -#define setup_sprite_tile_column_edge_pre_adjust_half_left_4x() \ - texture_offset = texture_offset_base \ - -#define setup_sprite_tile_column_edge_pre_adjust_half_4x(edge) \ - setup_sprite_tile_column_edge_pre_adjust_half_##edge##_4x() \ - -#define setup_sprite_tile_column_edge_pre_adjust_full_4x(edge) \ - texture_offset = texture_offset_base \ - -#define setup_sprite_tile_column_edge_post_adjust_half_right_4x() \ - fb_ptr -= 16 \ - -#define setup_sprite_tile_column_edge_post_adjust_half_left_4x() \ - -#define setup_sprite_tile_column_edge_post_adjust_half_4x(edge) \ - setup_sprite_tile_column_edge_post_adjust_half_##edge##_4x() \ - -#define setup_sprite_tile_column_edge_post_adjust_full_4x(edge) \ - - -#define setup_sprite_tile_column_height_single_4x(edge_mode, edge, \ - texture_mode) \ -do \ -{ \ - sub_tile_height = column_data; \ - setup_sprite_tile_column_edge_pre_adjust_##edge_mode##_4x(edge); \ - setup_sprite_tile_##edge_mode##_##texture_mode##_4x(edge); \ - setup_sprite_tile_column_edge_post_adjust_##edge_mode##_4x(edge); \ -} while(0) \ - -#define setup_sprite_tile_column_height_multi_4x(edge_mode, edge, \ - texture_mode) \ -do \ -{ \ - u32 tiles_remaining = column_data >> 16; \ - sub_tile_height = column_data & 0xFF; \ - setup_sprite_tile_column_edge_pre_adjust_##edge_mode##_4x(edge); \ - setup_sprite_tile_##edge_mode##_##texture_mode##_4x(edge); \ - tiles_remaining -= 1; \ - \ - while(tiles_remaining) \ - { \ - sub_tile_height = 16; \ - setup_sprite_tile_##edge_mode##_##texture_mode##_4x(edge); \ - tiles_remaining--; \ - } \ - \ - sub_tile_height = (column_data >> 8) & 0xFF; \ - setup_sprite_tile_##edge_mode##_##texture_mode##_4x(edge); \ - setup_sprite_tile_column_edge_post_adjust_##edge_mode##_4x(edge); \ -} while(0) \ - - -#define setup_sprite_column_data_single_4x() \ - column_data = height \ - -#define setup_sprite_column_data_multi_4x() \ - column_data = 16 - offset_v; \ - column_data |= ((height_rounded & 0xF) + 1) << 8; \ - column_data |= (tile_height - 1) << 16 \ - - -#define setup_sprite_tile_column_width_single_4x(texture_mode, multi_height, \ - edge_mode, edge) \ -{ \ - setup_sprite_column_data_##multi_height##_4x(); \ - left_mask_bits = left_block_mask | right_block_mask; \ - right_mask_bits = left_mask_bits >> 16; \ - \ - setup_sprite_tile_column_height_##multi_height##_4x(edge_mode, edge, \ - texture_mode); \ -} \ - -#define setup_sprite_tiled_advance_column_4x() \ - texture_offset_base += 0x100; \ - if((texture_offset_base & 0xF00) == 0) \ - texture_offset_base -= (0x100 + 0xF00) \ - -#define setup_sprite_tile_column_width_multi_4x(texture_mode, multi_height, \ - left_mode, right_mode) \ -{ \ - setup_sprite_column_data_##multi_height##_4x(); \ - s32 fb_ptr_advance_column = 32 - (2048 * height); \ - \ - tile_width -= 2; \ - left_mask_bits = left_block_mask; \ - right_mask_bits = left_mask_bits >> 16; \ - \ - setup_sprite_tile_column_height_##multi_height##_4x(left_mode, right, \ - texture_mode); \ - fb_ptr += fb_ptr_advance_column; \ - \ - left_mask_bits = 0x00; \ - right_mask_bits = 0x00; \ - \ - while(tile_width) \ - { \ - setup_sprite_tiled_advance_column_4x(); \ - setup_sprite_tile_column_height_##multi_height##_4x(full, none, \ - texture_mode); \ - fb_ptr += fb_ptr_advance_column; \ - tile_width--; \ - } \ - \ - left_mask_bits = right_block_mask; \ - right_mask_bits = left_mask_bits >> 16; \ - \ - setup_sprite_tiled_advance_column(); \ - setup_sprite_tile_column_height_##multi_height##_4x(right_mode, left, \ - texture_mode); \ -} \ - - -#define setup_sprite_tiled_builder_4x(texture_mode) \ -void setup_sprite_##texture_mode##_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, \ - s32 u, s32 v, s32 width, s32 height, u32 color) \ -{ \ - s32 offset_u = u & 0xF; \ - s32 offset_v = v & 0xF; \ - \ - s32 width_rounded = offset_u + width + 15; \ - s32 height_rounded = offset_v + height + 15; \ - s32 tile_height = height_rounded / 16; \ - s32 tile_width = width_rounded / 16; \ - u32 offset_u_right = width_rounded & 0xF; \ - \ - u32 left_block_mask = ~(0xFFFFFFFF << (offset_u * 2)); \ - u32 right_block_mask = 0xFFFFFFFC << (offset_u_right * 2); \ - \ - u32 left_mask_bits; \ - u32 right_mask_bits; \ - \ - u32 sub_tile_height; \ - u32 column_data; \ - \ - u32 texture_mask = (psx_gpu->texture_mask_width & 0xF) | \ - ((psx_gpu->texture_mask_height & 0xF) << 4) | \ - ((psx_gpu->texture_mask_width >> 4) << 8) | \ - ((psx_gpu->texture_mask_height >> 4) << 12); \ - u32 texture_offset = ((v & 0xF) << 4) | ((u & 0xF0) << 4) | \ - ((v & 0xF0) << 8); \ - u32 texture_offset_base = texture_offset; \ - u32 control_mask; \ - \ - u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + (x - offset_u * 2); \ - u32 num_blocks = psx_gpu->num_blocks; \ - block_struct *block = psx_gpu->blocks + num_blocks; \ - \ - u16 *texture_block_ptr; \ - vec_8x8u texels; \ - \ - setup_sprite_tiled_initialize_##texture_mode##_4x(); \ - \ - control_mask = tile_width == 1; \ - control_mask |= (tile_height == 1) << 1; \ - control_mask |= ((left_block_mask & 0xFFFF) == 0xFFFF) << 2; \ - control_mask |= (((right_block_mask >> 16) & 0xFFFF) == 0xFFFF) << 3; \ - \ - sprites_##texture_mode++; \ - \ - switch(control_mask) \ - { \ - default: \ - case 0x0: \ - setup_sprite_tile_column_width_multi_4x(texture_mode, multi, full, \ - full); \ - break; \ - \ - case 0x1: \ - setup_sprite_tile_column_width_single_4x(texture_mode, multi, full, \ - none); \ - break; \ - \ - case 0x2: \ - setup_sprite_tile_column_width_multi_4x(texture_mode, single, full, \ - full); \ - break; \ - \ - case 0x3: \ - setup_sprite_tile_column_width_single_4x(texture_mode, single, full, \ - none); \ - break; \ - \ - case 0x4: \ - setup_sprite_tile_column_width_multi_4x(texture_mode, multi, half, \ - full); \ - break; \ - \ - case 0x5: \ - setup_sprite_tile_column_width_single_4x(texture_mode, multi, half, \ - right); \ - break; \ - \ - case 0x6: \ - setup_sprite_tile_column_width_multi_4x(texture_mode, single, half, \ - full); \ - break; \ - \ - case 0x7: \ - setup_sprite_tile_column_width_single_4x(texture_mode, single, half, \ - right); \ - break; \ - \ - case 0x8: \ - setup_sprite_tile_column_width_multi_4x(texture_mode, multi, full, \ - half); \ - break; \ - \ - case 0x9: \ - setup_sprite_tile_column_width_single_4x(texture_mode, multi, half, \ - left); \ - break; \ - \ - case 0xA: \ - setup_sprite_tile_column_width_multi_4x(texture_mode, single, full, \ - half); \ - break; \ - \ - case 0xB: \ - setup_sprite_tile_column_width_single_4x(texture_mode, single, half, \ - left); \ - break; \ - \ - case 0xC: \ - setup_sprite_tile_column_width_multi_4x(texture_mode, multi, half, \ - half); \ - break; \ - \ - case 0xE: \ - setup_sprite_tile_column_width_multi_4x(texture_mode, single, half, \ - half); \ - break; \ - } \ -} \ - - -void setup_sprite_4bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, - s32 width, s32 height, u32 color); -void setup_sprite_8bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, - s32 width, s32 height, u32 color); -void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, - s32 width, s32 height, u32 color); - -//#ifndef NEON_BUILD -#if 1 -setup_sprite_tiled_builder_4x(4bpp); -setup_sprite_tiled_builder_4x(8bpp); - +#ifndef NEON_BUILD void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color) { diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index 87a14f64..103483a8 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -1,5 +1,6 @@ /* * Copyright (C) 2011 Gilead Kutnick "Exophase" + * Copyright (C) 2012 Gražvydas Ignotas "notaz" * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as @@ -3188,6 +3189,7 @@ function(shade_blocks_##shading##_textured_modulated_##dithering##_##target) \ shade_blocks_textured_modulated_load_bdm_##shading(); \ vshrn.u16 texels_b, texels, #7; \ \ + pld [ block_ptr_load_a ]; \ vmovn.u16 texels_r, texels; \ vmlal.u8 pixels, pixels_r_low, d64_1; \ \ @@ -4405,6 +4407,12 @@ function(render_block_fill_body) #define draw_mask_fb_ptr_left d2 #define draw_mask_fb_ptr_right d3 +#define draw_mask_fb_ptr_left_a d2 +#define draw_mask_fb_ptr_left_b d3 +#define draw_mask_fb_ptr_right_a d10 +#define draw_mask_fb_ptr_right_b d11 +#define draw_masks_fb_ptrs2 q5 + #define clut_low_a d4 #define clut_low_b d5 #define clut_high_a d6 @@ -4416,37 +4424,24 @@ function(render_block_fill_body) #define clut_a q2 #define clut_b q3 -#define texels_low d10 -#define texels_high d11 - - -setup_sprite_flush_blocks_single: - vpush { q1 - q4 } - - stmdb sp!, { r0 - r3, r12, r14 } - bl flush_render_block_buffer - ldmia sp!, { r0 - r3, r12, r14 } - - vpop { q1 - q4 } - - add block, psx_gpu, #psx_gpu_blocks_offset +#define texels_low d12 +#define texels_high d13 - mov num_blocks, sub_tile_height - bx lr +#define texels_wide_low d14 +#define texels_wide_high d15 +#define texels_wide q7 -setup_sprite_flush_blocks_double: - vpush { q1 - q4 } +setup_sprite_flush_blocks: + vpush { q1 - q5 } stmdb sp!, { r0 - r3, r12, r14 } bl flush_render_block_buffer ldmia sp!, { r0 - r3, r12, r14 } - vpop { q1 - q4 } + vpop { q1 - q5 } add block, psx_gpu, #psx_gpu_blocks_offset - - mov num_blocks, sub_tile_height, lsl #1 bx lr @@ -4484,8 +4479,6 @@ setup_sprite_update_texture_8bpp_cache: blne setup_sprite_update_texture_8bpp_cache \ -#define setup_sprite_tile_setup_block_no(side, offset, texture_mode) \ - #define setup_sprite_block_count_single() \ sub_tile_height \ @@ -4496,7 +4489,8 @@ setup_sprite_update_texture_8bpp_cache: add num_blocks, num_blocks, setup_sprite_block_count_##type(); \ cmp num_blocks, #MAX_BLOCKS; \ \ - blgt setup_sprite_flush_blocks_##type \ + movgt num_blocks, setup_sprite_block_count_##type(); \ + blgt setup_sprite_flush_blocks \ #define setup_sprite_tile_full_4bpp(edge) \ @@ -4678,31 +4672,33 @@ setup_sprite_update_texture_8bpp_cache: #define setup_sprite_tile_column_edge_post_adjust_full(edge) \ -#define setup_sprite_tile_column_height_single(edge_mode, edge, texture_mode) \ +#define setup_sprite_tile_column_height_single(edge_mode, edge, texture_mode, \ + x4mode) \ mov sub_tile_height, column_data; \ - setup_sprite_tile_column_edge_pre_adjust_##edge_mode(edge); \ - setup_sprite_tile_##edge_mode##_##texture_mode(edge); \ - setup_sprite_tile_column_edge_post_adjust_##edge_mode(edge) \ + setup_sprite_tile_column_edge_pre_adjust_##edge_mode##x4mode(edge); \ + setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \ + setup_sprite_tile_column_edge_post_adjust_##edge_mode##x4mode(edge) \ -#define setup_sprite_tile_column_height_multi(edge_mode, edge, texture_mode) \ +#define setup_sprite_tile_column_height_multi(edge_mode, edge, texture_mode, \ + x4mode) \ and sub_tile_height, column_data, #0xFF; \ mov tiles_remaining, column_data, lsr #16; \ - setup_sprite_tile_column_edge_pre_adjust_##edge_mode(edge); \ - setup_sprite_tile_##edge_mode##_##texture_mode(edge); \ + setup_sprite_tile_column_edge_pre_adjust_##edge_mode##x4mode(edge); \ + setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \ \ subs tiles_remaining, tiles_remaining, #1; \ beq 2f; \ \ 3: \ mov sub_tile_height, #16; \ - setup_sprite_tile_##edge_mode##_##texture_mode(edge); \ + setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \ subs tiles_remaining, tiles_remaining, #1; \ bne 3b; \ \ 2: \ uxtb sub_tile_height, column_data, ror #8; \ - setup_sprite_tile_##edge_mode##_##texture_mode(edge); \ - setup_sprite_tile_column_edge_post_adjust_##edge_mode(edge) \ + setup_sprite_tile_##edge_mode##_##texture_mode##x4mode(edge); \ + setup_sprite_tile_column_edge_post_adjust_##edge_mode##x4mode(edge) \ #define setup_sprite_column_data_single() \ @@ -4721,17 +4717,30 @@ setup_sprite_update_texture_8bpp_cache: \ orr column_data, column_data, height_rounded, lsl #8 \ -#define setup_sprite_tile_column_width_single(texture_mode, multi_height, \ - edge_mode, edge) \ - setup_sprite_##texture_mode##_single_##multi_height##_##edge_mode##_##edge: \ +#define setup_sprite_setup_left_draw_mask_fb_ptr() \ + vdup.u8 draw_mask_fb_ptr_left, block_masks[0]; \ + vdup.u8 draw_mask_fb_ptr_right, block_masks[1] \ + +#define setup_sprite_setup_left_draw_mask_fb_ptr_advance_column() \ + mov fb_ptr_advance_column, #32; \ + vdup.u8 draw_mask_fb_ptr_left, block_masks[0]; \ + \ + sub fb_ptr_advance_column, height, lsl #11; \ + vdup.u8 draw_mask_fb_ptr_right, block_masks[1] \ + +#define setup_sprite_setup_right_draw_mask_fb_ptr() \ + vdup.u8 draw_mask_fb_ptr_left, block_masks[4]; \ + vdup.u8 draw_mask_fb_ptr_right, block_masks[5] \ + +#define setup_sprite_tile_column_width_single(tm, multi_height, edge_mode, \ + edge, x4mode) \ + setup_sprite_##tm##_single_##multi_height##_##edge_mode##_##edge##x4mode: \ setup_sprite_column_data_##multi_height(); \ vext.32 block_masks_shifted, block_masks, block_masks, #1; \ vorr.u32 block_masks, block_masks, block_masks_shifted; \ - vdup.u8 draw_mask_fb_ptr_left, block_masks[0]; \ - vdup.u8 draw_mask_fb_ptr_right, block_masks[1]; \ + setup_sprite_setup_left_draw_mask_fb_ptr##x4mode(); \ \ - setup_sprite_tile_column_height_##multi_height(edge_mode, edge, \ - texture_mode); \ + setup_sprite_tile_column_height_##multi_height(edge_mode, edge, tm, x4mode); \ ldmia sp!, { r4 - r11, pc } \ #define setup_sprite_tiled_advance_column() \ @@ -4740,39 +4749,335 @@ setup_sprite_update_texture_8bpp_cache: subeq texture_offset_base, texture_offset_base, #(0x100 + 0xF00) \ #define setup_sprite_tile_column_width_multi(tm, multi_height, left_mode, \ - right_mode) \ - setup_sprite_##tm##_multi_##multi_height##_##left_mode##_##right_mode: \ + right_mode, x4mode) \ + setup_sprite_##tm##_multi_##multi_height##_##left_mode##_##right_mode##x4mode:\ setup_sprite_column_data_##multi_height(); \ - mov fb_ptr_advance_column, #32; \ \ - sub fb_ptr_advance_column, height, lsl #11; \ - vdup.u8 draw_mask_fb_ptr_left, block_masks[0]; \ + setup_sprite_setup_left_draw_mask_fb_ptr_advance_column##x4mode(); \ \ - vdup.u8 draw_mask_fb_ptr_right, block_masks[1]; \ - setup_sprite_tile_column_height_##multi_height(left_mode, right, tm); \ + setup_sprite_tile_column_height_##multi_height(left_mode, right, tm, x4mode);\ \ subs tile_width, tile_width, #2; \ add fb_ptr, fb_ptr, fb_ptr_advance_column; \ \ - vmov.u8 draw_masks_fb_ptrs, #0; \ beq 1f; \ \ + vmov.u8 draw_masks_fb_ptrs, #0; \ + vmov.u8 draw_masks_fb_ptrs2, #0; \ + \ 0: \ setup_sprite_tiled_advance_column(); \ - setup_sprite_tile_column_height_##multi_height(full, none, tm); \ + setup_sprite_tile_column_height_##multi_height(full, none, tm, x4mode); \ add fb_ptr, fb_ptr, fb_ptr_advance_column; \ subs tile_width, tile_width, #1; \ bne 0b; \ \ 1: \ - vdup.u8 draw_mask_fb_ptr_left, block_masks[4]; \ - vdup.u8 draw_mask_fb_ptr_right, block_masks[5]; \ + setup_sprite_setup_right_draw_mask_fb_ptr##x4mode(); \ \ setup_sprite_tiled_advance_column(); \ - setup_sprite_tile_column_height_##multi_height(right_mode, left, tm); \ + setup_sprite_tile_column_height_##multi_height(right_mode, left, tm, x4mode);\ ldmia sp!, { r4 - r11, pc } \ +#define setup_sprite_offset_u_adjust() \ + +#define setup_sprite_get_left_block_mask() \ + and left_block_mask, left_block_mask, #0xFF \ + +#define setup_sprite_compare_left_block_mask() \ + cmp left_block_mask, #0xFF \ + +#define setup_sprite_get_right_block_mask() \ + uxtb right_block_mask, right_block_mask, ror #8 \ + +#define setup_sprite_compare_right_block_mask() \ + cmp right_block_mask, #0xFF \ + + + +/* 4x stuff */ +#define fb_ptr2 column_data + +#define setup_sprite_offset_u_adjust_4x() \ + sub fb_ptr, fb_ptr, offset_u, lsl #1; \ + lsl offset_u_right, #1; \ + lsl offset_u, #1; \ + add offset_u_right, #1 \ + +#define setup_sprite_get_left_block_mask_4x() \ + sxth left_block_mask, left_block_mask \ + +#define setup_sprite_compare_left_block_mask_4x() \ + cmp left_block_mask, #0xFFFFFFFF \ + +#define setup_sprite_get_right_block_mask_4x() \ + sxth right_block_mask, right_block_mask, ror #16 \ + +#define setup_sprite_compare_right_block_mask_4x() \ + cmp right_block_mask, #0xFFFFFFFF \ + + +#define widen_texels_16bpp(texels_) \ + vmov texels_wide_low, texels_; \ + vmov texels_wide_high, texels_; \ + vzip.16 texels_wide_low, texels_wide_high \ + +#define widen_texels_8bpp(texels_) \ + vmov texels_wide_low, texels_; \ + vmov texels_wide_high, texels_; \ + vzip.8 texels_wide_low, texels_wide_high \ + +#define write_block_16bpp(texels_, block_, draw_mask_fb_ptr_, fb_ptr_) \ + vst1.u32 { texels_ }, [ block_, :128 ]; \ + add block_, block_, #40; \ + \ + vmov.u32 draw_mask_fb_ptr_[1], fb_ptr_; \ + vst1.u32 { draw_mask_fb_ptr_ }, [ block_, :64 ]; \ + add block_, block_, #24 \ + +/* assumes 16-byte offset already added to block_ */ +#define write_block_8bpp(texels_, block_, draw_mask_fb_ptr_, fb_ptr_) \ + vst1.u32 { texels_ }, [ block_, :64 ]; \ + add block_, block_, #24; \ + \ + vmov.u32 draw_mask_fb_ptr_[1], fb_ptr_; \ + vst1.u32 { draw_mask_fb_ptr_ }, [ block_, :64 ]; \ + add block_, block_, #40 \ + +#define do_texture_block_16bpp_4x(fb_ptr_tmp, draw_mask_fb_ptr_a_, \ + draw_mask_fb_ptr_b_) \ + widen_texels_16bpp(texels_low); \ + add fb_ptr_tmp, fb_ptr, #1024*2; \ + \ + write_block_16bpp(texels_wide, block, draw_mask_fb_ptr_a_, fb_ptr); \ + \ + write_block_16bpp(texels_wide, block, draw_mask_fb_ptr_a_, fb_ptr_tmp); \ + widen_texels_16bpp(texels_high); \ + \ + add fb_ptr_tmp, fb_ptr, #8*2; \ + write_block_16bpp(texels_wide, block, draw_mask_fb_ptr_b_, fb_ptr_tmp); \ + \ + add fb_ptr_tmp, fb_ptr_tmp, #1024*2; \ + write_block_16bpp(texels_wide, block, draw_mask_fb_ptr_b_, fb_ptr_tmp) \ + +#define do_texture_block_8bpp_4x(fb_ptr_tmp, draw_mask_fb_ptr_a_, \ + draw_mask_fb_ptr_b_) \ + widen_texels_8bpp(texels); \ + add fb_ptr_tmp, fb_ptr, #1024*2; \ + \ + write_block_8bpp(texels_wide_low, block, draw_mask_fb_ptr_a_, fb_ptr); \ + write_block_8bpp(texels_wide_low, block, draw_mask_fb_ptr_a_, fb_ptr_tmp); \ + \ + add fb_ptr_tmp, fb_ptr, #8*2; \ + write_block_8bpp(texels_wide_high, block, draw_mask_fb_ptr_b_, fb_ptr_tmp); \ + \ + add fb_ptr_tmp, fb_ptr_tmp, #1024*2; \ + write_block_8bpp(texels_wide_high, block, draw_mask_fb_ptr_b_, fb_ptr_tmp) \ + + +#define setup_sprite_tiled_initialize_4bpp_4x() \ + ldr clut_ptr, [ psx_gpu, #psx_gpu_clut_ptr_offset ]; \ + vld1.u32 { clut_a, clut_b }, [ clut_ptr, :128 ]; \ + \ + vuzp.u8 clut_a, clut_b \ + +#define setup_sprite_tiled_initialize_8bpp_4x() \ + + +#define setup_sprite_block_count_single_4x() \ + sub_tile_height, lsl #2 \ + +#define setup_sprite_block_count_double_4x() \ + sub_tile_height, lsl #(1+2) \ + +#define setup_sprite_tile_full_4bpp_4x(edge) \ + setup_sprite_tile_add_blocks(double_4x); \ + str column_data, [sp, #-8]!; /* fb_ptr2 */ \ + \ + 4: \ + and texture_block_ptr, texture_offset, texture_mask; \ + pld [ fb_ptr ]; \ + \ + add texture_block_ptr, texture_page_ptr, texture_block_ptr; \ + vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \ + \ + add texture_block_ptr, texture_offset, #8; \ + vtbl.8 texels_low, { clut_low_a, clut_low_b }, texels; \ + \ + and texture_block_ptr, texture_block_ptr, texture_mask; \ + vtbl.8 texels_high, { clut_high_a, clut_high_b }, texels; \ + \ + vzip.8 texels_low, texels_high; \ + do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_left_a, \ + draw_mask_fb_ptr_left_b); \ + \ + add texture_block_ptr, texture_page_ptr, texture_block_ptr; \ + add fb_ptr, fb_ptr, #16*2; \ + \ + vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \ + vtbl.8 texels_low, { clut_low_a, clut_low_b }, texels; \ + \ + pld [ fb_ptr ]; \ + vtbl.8 texels_high, { clut_high_a, clut_high_b }, texels; \ + \ + vzip.8 texels_low, texels_high; \ + do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_right_a, \ + draw_mask_fb_ptr_right_b); \ + \ + add texture_offset, texture_offset, #0x10; \ + add fb_ptr, fb_ptr, #(2048 - 16) * 2; \ + \ + subs sub_tile_height, sub_tile_height, #1; \ + bne 4b; \ + \ + ldr column_data, [sp], #8; /* fb_ptr2 */ \ + add texture_offset, texture_offset, #0xF00; \ + strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] \ + + +#define setup_sprite_tile_half_4bpp_4x(edge) \ + setup_sprite_tile_add_blocks(single_4x); \ + str column_data, [sp, #-8]!; /* fb_ptr2 */ \ + \ + 4: \ + and texture_block_ptr, texture_offset, texture_mask; \ + pld [ fb_ptr ]; \ + \ + add texture_block_ptr, texture_page_ptr, texture_block_ptr; \ + vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \ + \ + add texture_block_ptr, texture_page_ptr, texture_block_ptr; \ + vtbl.8 texels_low, { clut_low_a, clut_low_b }, texels; \ + \ + vtbl.8 texels_high, { clut_high_a, clut_high_b }, texels; \ + add texture_offset, texture_offset, #0x10; \ + \ + vzip.8 texels_low, texels_high; \ + do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_##edge##_a, \ + draw_mask_fb_ptr_##edge##_b); \ + \ + add fb_ptr, fb_ptr, #2048 * 2; \ + subs sub_tile_height, sub_tile_height, #1; \ + \ + bne 4b; \ + \ + ldr column_data, [sp], #8; /* fb_ptr2 */ \ + add texture_offset, texture_offset, #0xF00; \ + strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] \ + + +#define setup_sprite_tile_full_8bpp_4x(edge) \ + setup_sprite_tile_add_blocks(double_4x); \ + add block, block, #16; \ + str column_data, [sp, #-8]!; /* fb_ptr2 */ \ + \ + 4: \ + and texture_block_ptr, texture_offset, texture_mask; \ + pld [ fb_ptr ]; \ + \ + add texture_block_ptr, texture_page_ptr, texture_block_ptr; \ + vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \ + \ + add texture_block_ptr, texture_offset, #8; \ + do_texture_block_8bpp_4x(fb_ptr2, draw_mask_fb_ptr_left_a, \ + draw_mask_fb_ptr_left_b); \ + \ + and texture_block_ptr, texture_block_ptr, texture_mask; \ + \ + add fb_ptr, fb_ptr, #16*2; \ + add texture_block_ptr, texture_page_ptr, texture_block_ptr; \ + \ + vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \ + pld [ fb_ptr ]; \ + \ + do_texture_block_8bpp_4x(fb_ptr2, draw_mask_fb_ptr_right_a, \ + draw_mask_fb_ptr_right_b); \ + \ + add texture_offset, texture_offset, #0x10; \ + add fb_ptr, fb_ptr, #(2048 - 16) * 2; \ + \ + subs sub_tile_height, sub_tile_height, #1; \ + bne 4b; \ + \ + sub block, block, #16; \ + ldr column_data, [sp], #8; /* fb_ptr2 */ \ + add texture_offset, texture_offset, #0xF00; \ + strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] \ + + +#define setup_sprite_tile_half_8bpp_4x(edge) \ + setup_sprite_tile_add_blocks(single_4x); \ + add block, block, #16; \ + str column_data, [sp, #-8]!; /* fb_ptr2 */ \ + \ + 4: \ + and texture_block_ptr, texture_offset, texture_mask; \ + pld [ fb_ptr ]; \ + \ + add texture_block_ptr, texture_page_ptr, texture_block_ptr; \ + vld1.u32 { texels }, [ texture_block_ptr, :64 ]; \ + \ + do_texture_block_8bpp_4x(fb_ptr2, draw_mask_fb_ptr_##edge##_a, \ + draw_mask_fb_ptr_##edge##_b); \ + \ + add texture_offset, texture_offset, #0x10; \ + add fb_ptr, fb_ptr, #2048 * 2; \ + \ + subs sub_tile_height, sub_tile_height, #1; \ + bne 4b; \ + \ + sub block, block, #16; \ + ldr column_data, [sp], #8; /* fb_ptr2 */ \ + add texture_offset, texture_offset, #0xF00; \ + strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] \ + + +#define setup_sprite_tile_column_edge_pre_adjust_half_right_4x() \ + add texture_offset, texture_offset_base, #8; \ + add fb_ptr, fb_ptr, #16 * 2 \ + +#define setup_sprite_tile_column_edge_pre_adjust_half_left_4x() \ + mov texture_offset, texture_offset_base \ + +#define setup_sprite_tile_column_edge_pre_adjust_half_4x(edge) \ + setup_sprite_tile_column_edge_pre_adjust_half_##edge##_4x() \ + +#define setup_sprite_tile_column_edge_pre_adjust_full_4x(edge) \ + mov texture_offset, texture_offset_base \ + +#define setup_sprite_tile_column_edge_post_adjust_half_right_4x() \ + sub fb_ptr, fb_ptr, #16 * 2 \ + +#define setup_sprite_tile_column_edge_post_adjust_half_left_4x() \ + +#define setup_sprite_tile_column_edge_post_adjust_half_4x(edge) \ + setup_sprite_tile_column_edge_post_adjust_half_##edge##_4x() \ + +#define setup_sprite_tile_column_edge_post_adjust_full_4x(edge) \ + + +#define setup_sprite_setup_left_draw_mask_fb_ptr_4x() \ + vdup.u8 draw_mask_fb_ptr_left_a, block_masks[0]; \ + vdup.u8 draw_mask_fb_ptr_left_b, block_masks[1]; \ + vdup.u8 draw_mask_fb_ptr_right_a, block_masks[2]; \ + vdup.u8 draw_mask_fb_ptr_right_b, block_masks[3] \ + +#define setup_sprite_setup_left_draw_mask_fb_ptr_advance_column_4x() \ + mov fb_ptr_advance_column, #32 * 2; \ + vdup.u8 draw_mask_fb_ptr_left_a, block_masks[0]; \ + vdup.u8 draw_mask_fb_ptr_left_b, block_masks[1]; \ + sub fb_ptr_advance_column, height, lsl #11 + 1; \ + vdup.u8 draw_mask_fb_ptr_right_a, block_masks[2]; \ + vdup.u8 draw_mask_fb_ptr_right_b, block_masks[3] \ + +#define setup_sprite_setup_right_draw_mask_fb_ptr_4x() \ + vdup.u8 draw_mask_fb_ptr_left_a, block_masks[4]; \ + vdup.u8 draw_mask_fb_ptr_left_b, block_masks[5]; \ + vdup.u8 draw_mask_fb_ptr_right_a, block_masks[6]; \ + vdup.u8 draw_mask_fb_ptr_right_b, block_masks[7] \ + + // r0: psx_gpu // r1: x // r2: y @@ -4782,28 +5087,42 @@ setup_sprite_update_texture_8bpp_cache: // [ sp + 8 ]: height // [ sp + 12 ]: color (unused) -#define setup_sprite_tiled_builder(texture_mode) \ - \ -setup_sprite_tile_column_width_multi(texture_mode, multi, full, full); \ -setup_sprite_tile_column_width_single(texture_mode, multi, full, none); \ -setup_sprite_tile_column_width_multi(texture_mode, single, full, full); \ -setup_sprite_tile_column_width_single(texture_mode, single, full, none); \ -setup_sprite_tile_column_width_multi(texture_mode, multi, half, full); \ -setup_sprite_tile_column_width_single(texture_mode, multi, half, right); \ -setup_sprite_tile_column_width_multi(texture_mode, single, half, full); \ -setup_sprite_tile_column_width_single(texture_mode, single, half, right); \ -setup_sprite_tile_column_width_multi(texture_mode, multi, full, half); \ -setup_sprite_tile_column_width_single(texture_mode, multi, half, left); \ -setup_sprite_tile_column_width_multi(texture_mode, single, full, half); \ -setup_sprite_tile_column_width_single(texture_mode, single, half, left); \ -setup_sprite_tile_column_width_multi(texture_mode, multi, half, half); \ -setup_sprite_tile_column_width_multi(texture_mode, single, half, half); \ +#define setup_sprite_tiled_builder(texture_mode, x4mode) \ + \ +setup_sprite_tile_column_width_multi(texture_mode, multi, full, full, \ + x4mode); \ +setup_sprite_tile_column_width_single(texture_mode, multi, full, none, \ + x4mode); \ +setup_sprite_tile_column_width_multi(texture_mode, single, full, full, \ + x4mode); \ +setup_sprite_tile_column_width_single(texture_mode, single, full, none, \ + x4mode); \ +setup_sprite_tile_column_width_multi(texture_mode, multi, half, full, \ + x4mode); \ +setup_sprite_tile_column_width_single(texture_mode, multi, half, right, \ + x4mode); \ +setup_sprite_tile_column_width_multi(texture_mode, single, half, full, \ + x4mode); \ +setup_sprite_tile_column_width_single(texture_mode, single, half, right, \ + x4mode); \ +setup_sprite_tile_column_width_multi(texture_mode, multi, full, half, \ + x4mode); \ +setup_sprite_tile_column_width_single(texture_mode, multi, half, left, \ + x4mode); \ +setup_sprite_tile_column_width_multi(texture_mode, single, full, half, \ + x4mode); \ +setup_sprite_tile_column_width_single(texture_mode, single, half, left, \ + x4mode); \ +setup_sprite_tile_column_width_multi(texture_mode, multi, half, half, \ + x4mode); \ +setup_sprite_tile_column_width_multi(texture_mode, single, half, half, \ + x4mode); \ \ .align 4; \ \ -function(setup_sprite_##texture_mode) \ +function(setup_sprite_##texture_mode##x4mode) \ stmdb sp!, { r4 - r11, r14 }; \ - setup_sprite_tiled_initialize_##texture_mode(); \ + setup_sprite_tiled_initialize_##texture_mode##x4mode(); \ \ ldr v, [ sp, #36 ]; \ and offset_u, u, #0xF; \ @@ -4832,11 +5151,13 @@ function(setup_sprite_##texture_mode) \ \ /* texture_offset_base = VH-UH-UL-00 */\ bfi texture_offset_base, u, #4, #8; \ - movw right_block_mask, #0xFFFE; \ + mov right_block_mask, #0xFFFFFFFE; \ + \ + setup_sprite_offset_u_adjust##x4mode(); \ \ /* texture_offset_base = VH-UH-VL-00 */\ bfi texture_offset_base, v, #4, #4; \ - movw left_block_mask, #0xFFFF; \ + mov left_block_mask, #0xFFFFFFFF; \ \ mov tile_height, height_rounded, lsr #4; \ mvn left_block_mask, left_block_mask, lsl offset_u; \ @@ -4856,16 +5177,16 @@ function(setup_sprite_##texture_mode) \ \ /* texture_mask = HH-WH-HL-WL */\ bfi texture_mask, texture_mask_rev, #8, #4; \ - and left_block_mask, left_block_mask, #0xFF; \ + setup_sprite_get_left_block_mask##x4mode(); \ \ mov control_mask, #0; \ - cmp left_block_mask, #0xFF; \ + setup_sprite_compare_left_block_mask##x4mode(); \ \ - uxtb right_block_mask, right_block_mask, ror #8; \ + setup_sprite_get_right_block_mask##x4mode(); \ orreq control_mask, control_mask, #0x4; \ \ ldrh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ]; \ - cmp right_block_mask, #0xFF; \ + setup_sprite_compare_right_block_mask##x4mode(); \ \ orreq control_mask, control_mask, #0x8; \ cmp tile_width, #1; \ @@ -4880,25 +5201,31 @@ function(setup_sprite_##texture_mode) \ ldr pc, [ pc, control_mask, lsl #2 ]; \ nop; \ \ - .word setup_sprite_##texture_mode##_multi_multi_full_full; \ - .word setup_sprite_##texture_mode##_single_multi_full_none; \ - .word setup_sprite_##texture_mode##_multi_single_full_full; \ - .word setup_sprite_##texture_mode##_single_single_full_none; \ - .word setup_sprite_##texture_mode##_multi_multi_half_full; \ - .word setup_sprite_##texture_mode##_single_multi_half_right; \ - .word setup_sprite_##texture_mode##_multi_single_half_full; \ - .word setup_sprite_##texture_mode##_single_single_half_right; \ - .word setup_sprite_##texture_mode##_multi_multi_full_half; \ - .word setup_sprite_##texture_mode##_single_multi_half_left; \ - .word setup_sprite_##texture_mode##_multi_single_full_half; \ - .word setup_sprite_##texture_mode##_single_single_half_left; \ - .word setup_sprite_##texture_mode##_multi_multi_half_half; \ + .word setup_sprite_##texture_mode##_multi_multi_full_full##x4mode; \ + .word setup_sprite_##texture_mode##_single_multi_full_none##x4mode; \ + .word setup_sprite_##texture_mode##_multi_single_full_full##x4mode; \ + .word setup_sprite_##texture_mode##_single_single_full_none##x4mode; \ + .word setup_sprite_##texture_mode##_multi_multi_half_full##x4mode; \ + .word setup_sprite_##texture_mode##_single_multi_half_right##x4mode; \ + .word setup_sprite_##texture_mode##_multi_single_half_full##x4mode; \ + .word setup_sprite_##texture_mode##_single_single_half_right##x4mode; \ + .word setup_sprite_##texture_mode##_multi_multi_full_half##x4mode; \ + .word setup_sprite_##texture_mode##_single_multi_half_left##x4mode; \ + .word setup_sprite_##texture_mode##_multi_single_full_half##x4mode; \ + .word setup_sprite_##texture_mode##_single_single_half_left##x4mode; \ + .word setup_sprite_##texture_mode##_multi_multi_half_half##x4mode; \ .word 0x00000000; \ - .word setup_sprite_##texture_mode##_multi_single_half_half \ + .word setup_sprite_##texture_mode##_multi_single_half_half##x4mode; \ + + +setup_sprite_tiled_builder(4bpp,); +setup_sprite_tiled_builder(8bpp,); +#undef draw_mask_fb_ptr_left +#undef draw_mask_fb_ptr_right -setup_sprite_tiled_builder(4bpp); -setup_sprite_tiled_builder(8bpp); +setup_sprite_tiled_builder(4bpp, _4x); +setup_sprite_tiled_builder(8bpp, _4x); #undef block_ptr @@ -4987,6 +5314,12 @@ function(texture_sprite_blocks_8bpp) #undef texture_mask #undef num_blocks #undef texture_offset +#undef texels_low +#undef texels_high +#undef texels_wide_low +#undef texels_wide_high +#undef texels_wide +#undef fb_ptr2 #define psx_gpu r0 #define x r1 @@ -4998,6 +5331,7 @@ function(texture_sprite_blocks_8bpp) #define left_offset r8 #define width_rounded r9 #define right_width r10 + #define block_width r11 #define texture_offset_base r1 @@ -5008,6 +5342,7 @@ function(texture_sprite_blocks_8bpp) #define fb_ptr r7 #define texture_offset r8 #define blocks_remaining r9 +#define fb_ptr2 r10 #define fb_ptr_pitch r12 #define texture_block_ptr r14 @@ -5026,29 +5361,23 @@ function(texture_sprite_blocks_8bpp) #define draw_mask_fb_ptr d2 #define texels q2 +#define draw_mask_fb_ptr_a d2 +#define draw_mask_fb_ptr_b d3 +#define texels_low d4 +#define texels_high d5 +#define texels_wide_low d6 +#define texels_wide_high d7 +#define texels_wide q3 -setup_sprites_16bpp_flush_single: - vpush { d0 - d2 } - - stmdb sp!, { r0 - r3, r12, r14 } - bl flush_render_block_buffer - ldmia sp!, { r0 - r3, r12, r14 } - - vpop { d0 - d2 } - - add block, psx_gpu, #psx_gpu_blocks_offset - mov num_blocks, #1 - - bx lr -setup_sprites_16bpp_flush_row: - vpush { d0 - d2 } +setup_sprites_16bpp_flush: + vpush { d0 - d3 } stmdb sp!, { r0 - r3, r12, r14 } bl flush_render_block_buffer ldmia sp!, { r0 - r3, r12, r14 } - vpop { d0 - d2 } + vpop { d0 - d3 } add block, psx_gpu, #psx_gpu_blocks_offset mov num_blocks, block_width @@ -5113,7 +5442,7 @@ function(setup_sprite_16bpp) 1: add num_blocks, num_blocks, #1 cmp num_blocks, #MAX_BLOCKS - blgt setup_sprites_16bpp_flush_single + blgt setup_sprites_16bpp_flush and texture_block_ptr, texture_offset_base, texture_mask subs height, height, #1 @@ -5142,7 +5471,7 @@ function(setup_sprite_16bpp) mov texture_offset, texture_offset_base cmp num_blocks, #MAX_BLOCKS - blgt setup_sprites_16bpp_flush_row + blgt setup_sprites_16bpp_flush add texture_offset_base, texture_offset_base, #2048 and texture_block_ptr, texture_offset, texture_mask @@ -5213,6 +5542,151 @@ function(setup_sprite_16bpp) ldmia sp!, { r4 - r11, pc } +// 4x version +// FIXME: duplicate code with normal version :( +#undef draw_mask_fb_ptr + +function(setup_sprite_16bpp_4x) + stmdb sp!, { r4 - r11, r14 } + ldr fb_ptr, [ psx_gpu, #psx_gpu_vram_out_ptr_offset ] + + ldr v, [ sp, #36 ] + add fb_ptr, fb_ptr, y, lsl #11 + + ldr width, [ sp, #40 ] + add fb_ptr, fb_ptr, x, lsl #1 + + ldr height, [ sp, #44 ] + and left_offset, u, #0x7 + + add texture_offset_base, u, u + add width_rounded, width, #7 + + add texture_offset_base, v, lsl #11 + movw left_mask_bits, #0xFFFF + + ldrb texture_mask_width, [ psx_gpu, #psx_gpu_texture_mask_width_offset ] + add width_rounded, width_rounded, left_offset + + lsl left_offset, #1 + + ldrb texture_mask_height, [ psx_gpu, #psx_gpu_texture_mask_height_offset ] + sub fb_ptr, fb_ptr, left_offset, lsl #1 + + add texture_mask, texture_mask_width, texture_mask_width + movw right_mask_bits, #0xFFFC + + and right_width, width_rounded, #0x7 + mvn left_mask_bits, left_mask_bits, lsl left_offset + + lsl right_width, #1 + + add texture_mask, texture_mask_height, lsl #11 + mov block_width, width_rounded, lsr #3 + + mov right_mask_bits, right_mask_bits, lsl right_width + movw fb_ptr_pitch, #(2048 + 16) * 2 + + sub fb_ptr_pitch, fb_ptr_pitch, block_width, lsl #4+1 + vmov block_masks, left_mask_bits, right_mask_bits + + ldrh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] + add block, psx_gpu, #psx_gpu_blocks_offset + + bic texture_offset_base, texture_offset_base, #0xF + cmp block_width, #1 + + ldr texture_page_ptr, [ psx_gpu, #psx_gpu_texture_page_ptr_offset ] + add block, block, num_blocks, lsl #6 + + lsl block_width, #2 + bne 0f + + vext.32 block_masks_shifted, block_masks, block_masks, #1 + vorr.u32 block_masks, block_masks, block_masks_shifted + vdup.u8 draw_mask_fb_ptr_a, block_masks[0] + vdup.u8 draw_mask_fb_ptr_b, block_masks[1] + + 1: + add num_blocks, num_blocks, block_width + cmp num_blocks, #MAX_BLOCKS + blgt setup_sprites_16bpp_flush + + and texture_block_ptr, texture_offset_base, texture_mask + subs height, height, #1 + + add texture_block_ptr, texture_page_ptr, texture_block_ptr + vld1.u32 { texels }, [ texture_block_ptr, :128 ] + + do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_a, draw_mask_fb_ptr_b) + + add texture_offset_base, texture_offset_base, #2048 + add fb_ptr, fb_ptr, #2048*2 + strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] + bne 1b + + ldmia sp!, { r4 - r11, pc } + + 0: + add num_blocks, num_blocks, block_width + mov texture_offset, texture_offset_base + + vdup.u8 draw_mask_fb_ptr_a, block_masks[0] // left_mask_bits + vdup.u8 draw_mask_fb_ptr_b, block_masks[1] + + cmp num_blocks, #MAX_BLOCKS + blgt setup_sprites_16bpp_flush + + add texture_offset_base, texture_offset_base, #2048 + and texture_block_ptr, texture_offset, texture_mask + + add texture_block_ptr, texture_page_ptr, texture_block_ptr + vld1.u32 { texels }, [ texture_block_ptr, :128 ] + + do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_a, draw_mask_fb_ptr_b) + + subs blocks_remaining, block_width, #2*4 + add texture_offset, texture_offset, #16 + + vmov.u8 draw_mask_fb_ptr_a, #0 + vmov.u8 draw_mask_fb_ptr_b, #0 + + add fb_ptr, fb_ptr, #16*2 + beq 2f + + 1: + and texture_block_ptr, texture_offset, texture_mask + subs blocks_remaining, blocks_remaining, #4 + + add texture_block_ptr, texture_page_ptr, texture_block_ptr + vld1.u32 { texels }, [ texture_block_ptr, :128 ] + + do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_a, draw_mask_fb_ptr_b) + add texture_offset, texture_offset, #16 + + add fb_ptr, fb_ptr, #16*2 + bgt 1b + + 2: + vdup.u8 draw_mask_fb_ptr_a, block_masks[4] // right_mask_bits + vdup.u8 draw_mask_fb_ptr_b, block_masks[5] + + and texture_block_ptr, texture_offset, texture_mask + add texture_block_ptr, texture_page_ptr, texture_block_ptr + + vld1.u32 { texels }, [ texture_block_ptr, :128 ] + + do_texture_block_16bpp_4x(fb_ptr2, draw_mask_fb_ptr_a, draw_mask_fb_ptr_b) + subs height, height, #1 + + add fb_ptr, fb_ptr, fb_ptr_pitch + strh num_blocks, [ psx_gpu, #psx_gpu_num_blocks_offset ] + + bne 0b + + ldmia sp!, { r4 - r11, pc } + + #undef texture_page_ptr #undef vram_ptr #undef dirty_textures_mask @@ -5445,3 +5919,5 @@ function(scale2x_tiles8) nop pop { r4, pc } + +// vim:filetype=armasm -- 2.39.5