From 530ba0614a3e883a28129decc1a6f8bae5a904d7 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 24 Aug 2023 23:07:56 +0300 Subject: [PATCH] gpu_neon: don't crash on large primitives in enhancement mode --- plugins/gpu_neon/psx_gpu/psx_gpu.c | 96 +++++++++++++-------- plugins/gpu_neon/psx_gpu/psx_gpu.h | 18 ++-- plugins/gpu_neon/psx_gpu/psx_gpu_4x.c | 6 +- plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S | 30 +++---- plugins/gpu_neon/psx_gpu/psx_gpu_simd.c | 40 +++++---- plugins/gpu_neon/psx_gpu/psx_gpu_simd.h | 10 +-- plugins/gpu_neon/psx_gpu/vector_ops.h | 9 ++ 7 files changed, 125 insertions(+), 84 deletions(-) diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.c b/plugins/gpu_neon/psx_gpu/psx_gpu.c index e252d04e..370d8f2a 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "common.h" #ifndef NEON_BUILD @@ -772,24 +773,23 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, printf("mismatch on %s %s: %x vs %x\n", #_a, #_b, _a, _b) \ -#ifndef NDEBUG -#define setup_spans_debug_check(span_edge_data_element) \ -{ \ - u32 _num_spans = &span_edge_data_element - psx_gpu->span_edge_data; \ - if (_num_spans > MAX_SPANS) \ - *(volatile int *)0 = 1; \ - if (_num_spans < psx_gpu->num_spans) \ - { \ - if(span_edge_data_element.num_blocks > MAX_BLOCKS_PER_ROW) \ - *(volatile int *)0 = 2; \ - if(span_edge_data_element.y >= 2048) \ - *(volatile int *)0 = 3; \ - } \ -} \ - +#if !defined(NEON_BUILD) && !defined(NDEBUG) +static void setup_spans_debug_check(psx_gpu_struct *psx_gpu, + edge_data_struct *span_edge_data_element) +{ + u32 _num_spans = span_edge_data_element - psx_gpu->span_edge_data; + if (_num_spans > MAX_SPANS) + *(volatile int *)0 = 1; + if (_num_spans < psx_gpu->num_spans) + { + if(span_edge_data_element->num_blocks > MAX_BLOCKS_PER_ROW) + *(volatile int *)0 = 2; + if(span_edge_data_element->y >= 2048) + *(volatile int *)0 = 3; + } +} #else -#define setup_spans_debug_check(span_edge_data_element) \ - +#define setup_spans_debug_check(psx_gpu, span_edge_data_element) #endif #define setup_spans_prologue_alternate_yes() \ @@ -856,6 +856,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, span_b_offset = psx_gpu->span_b_offset; \ \ vec_8x16u c_0x0001; \ + vec_4x16u c_max_blocks_per_row; \ \ dup_8x16b(c_0x0001, 0x0001); \ dup_8x16b(left_edge, psx_gpu->viewport_start_x); \ @@ -864,6 +865,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, dup_4x16b(c_0x04, 0x04); \ dup_4x16b(c_0x07, 0x07); \ dup_4x16b(c_0xFFFE, 0xFFFE); \ + dup_4x16b(c_max_blocks_per_row, MAX_BLOCKS_PER_ROW); \ #define compute_edge_delta_x2() \ @@ -1087,6 +1089,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, and_4x16b(span_shift, left_right_x_16.high, c_0x07); \ shl_variable_4x16b(span_shift, c_0xFFFE, span_shift); \ shr_4x16b(left_right_x_16.high, left_right_x_16.high, 3); \ + min_4x16b(left_right_x_16.high, left_right_x_16.high, c_max_blocks_per_row); \ \ u32 i; \ for(i = 0; i < 4; i++) \ @@ -1095,7 +1098,7 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, span_edge_data[i].num_blocks = left_right_x_16.high.e[i]; \ span_edge_data[i].right_mask = span_shift.e[i]; \ span_edge_data[i].y = y_x4.e[i]; \ - setup_spans_debug_check(span_edge_data[i]); \ + setup_spans_debug_check(psx_gpu, &span_edge_data[i]); \ } \ \ span_edge_data += 4; \ @@ -1125,7 +1128,9 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, \ setup_spans_prologue_b(); \ \ - if(height > 0) \ + if (height > 512) \ + height = 512; \ + if (height > 0) \ { \ y_x4.e[0] = y_a; \ y_x4.e[1] = y_a + 1; \ @@ -1173,7 +1178,9 @@ void compute_all_gradients(psx_gpu_struct *psx_gpu, vertex_struct *a, \ setup_spans_prologue_b(); \ \ - if(height > 0) \ + if (height > 512) \ + height = 512; \ + if (height > 0) \ { \ y_x4.e[0] = y_a; \ y_x4.e[1] = y_a - 1; \ @@ -1363,7 +1370,9 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, setup_spans_prologue_b(); - if(height_minor_a > 0) + if (height_minor_a > 512) + height_minor_a = 512; + if (height_minor_a > 0) { y_x4.e[0] = y_a; y_x4.e[1] = y_a - 1; @@ -1405,7 +1414,9 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, setup_spans_clip(increment, no); } - if(height_minor_b > 0) + if (height_minor_b > 512) + height_minor_b = 512; + if (height_minor_b > 0) { y_x4.e[0] = y_a; y_x4.e[1] = y_a + 1; @@ -3045,6 +3056,7 @@ static void render_triangle_p(psx_gpu_struct *psx_gpu, } } } + assert(psx_gpu->span_edge_data[0].y < 1024u); u32 render_state = flags & (RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND | @@ -3914,17 +3926,9 @@ void setup_sprite_16bpp(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, #ifndef NEON_BUILD -void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, +void setup_sprite_untextured_512(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color) { - if((psx_gpu->render_state & (RENDER_STATE_MASK_EVALUATE | - RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND)) == 0 && - (psx_gpu->render_mode & RENDER_INTERLACE_ENABLED) == 0) - { - setup_sprite_untextured_simple(psx_gpu, x, y, u, v, width, height, color); - return; - } - u32 right_width = ((width - 1) & 0x7) + 1; u32 right_mask_bits = (0xFF << right_width); u16 *fb_ptr = psx_gpu->vram_out_ptr + (y * 1024) + x; @@ -3992,8 +3996,9 @@ void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, #endif -void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y, - s32 u, s32 v, s32 width, s32 height, u32 color) +static void __attribute__((noinline)) +setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, + s32 v, s32 width, s32 height, u32 color) { u32 r = color & 0xFF; u32 g = (color >> 8) & 0xFF; @@ -4007,7 +4012,7 @@ void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y, u32 num_width; - if(psx_gpu->num_blocks > MAX_BLOCKS) + if(psx_gpu->num_blocks) { flush_render_block_buffer(psx_gpu); } @@ -4051,6 +4056,29 @@ void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y, } } +void setup_sprite_untextured_512(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, + s32 v, s32 width, s32 height, u32 color); + +void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, + s32 v, s32 width, s32 height, u32 color) +{ + if((psx_gpu->render_state & (RENDER_STATE_MASK_EVALUATE | + RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND)) == 0 && + (psx_gpu->render_mode & RENDER_INTERLACE_ENABLED) == 0) + { + setup_sprite_untextured_simple(psx_gpu, x, y, u, v, width, height, color); + return; + } + + while (width > 0) + { + s32 w1 = width > 512 ? 512 : width; + setup_sprite_untextured_512(psx_gpu, x, y, 0, 0, w1, height, color); + x += 512; + width -= 512; + } +} + #define setup_sprite_blocks_switch_textured(texture_mode) \ setup_sprite_##texture_mode \ diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu.h b/plugins/gpu_neon/psx_gpu/psx_gpu.h index 88e40ac4..957b434c 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu.h @@ -15,6 +15,14 @@ #ifndef PSX_GPU_H #define PSX_GPU_H +#define MAX_SPANS 512 +#define MAX_BLOCKS 64 +#define MAX_BLOCKS_PER_ROW 128 + +#define SPAN_DATA_BLOCKS_SIZE 32 + +#ifndef __ASSEMBLER__ + #include "vector_types.h" typedef enum @@ -101,12 +109,6 @@ typedef struct vec_8x16u dither_offsets; } block_struct; -#define MAX_SPANS 512 -#define MAX_BLOCKS 64 -#define MAX_BLOCKS_PER_ROW 128 - -#define SPAN_DATA_BLOCKS_SIZE 32 - typedef struct render_block_handler_struct render_block_handler_struct; typedef struct @@ -261,5 +263,5 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, const vertex_struct * __restrict__ a, const vertex_struct * __restrict__ b, const vertex_struct * __restrict__ c); -#endif - +#endif // __ASSEMBLER__ +#endif // PSX_GPU_H diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c b/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c index d7ec3409..bd6c7a1f 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_4x.c @@ -237,7 +237,11 @@ void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, static void setup_sprite_untextured_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color) { - setup_sprite_untextured(psx_gpu, x, y, u, v, width * 2, height * 2, color); + width *= 2; + height *= 2; + if (width > 1024) + width = 1024; + setup_sprite_untextured(psx_gpu, x, y, u, v, width, height, color); } #define setup_sprite_blocks_switch_textured_4x(texture_mode) \ diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index c62c1baa..f0ba39f3 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -13,15 +13,9 @@ * General Public License for more details. */ -#define MAX_SPANS 512 -#define MAX_BLOCKS 64 -#define MAX_BLOCKS_PER_ROW 128 - -#define RENDER_STATE_MASK_EVALUATE 0x20 -#define RENDER_FLAGS_MODULATE_TEXELS 0x1 -#define RENDER_FLAGS_BLEND 0x2 #define RENDER_INTERLACE_ENABLED 0x1 +#include "psx_gpu.h" #include "psx_gpu_offsets.h" #define psx_gpu_b_dx_offset (psx_gpu_b_block_span_offset + 4) @@ -228,7 +222,6 @@ #ifdef __MACH__ #define flush_render_block_buffer _flush_render_block_buffer -#define setup_sprite_untextured_simple _setup_sprite_untextured_simple #define update_texture_8bpp_cache _update_texture_8bpp_cache #endif @@ -565,6 +558,8 @@ function(compute_all_gradients) #define left_x_32_low d22 #define left_x_32_high d23 +#define tmp_max_blocks d20 + #define edges_xy q0 #define edges_dx_dy d2 #define edge_shifts d3 @@ -819,8 +814,10 @@ function(compute_all_gradients) str b, [span_b_offset], #4; \ setup_spans_adjust_interpolants_##direction(); \ \ + vmov.u16 tmp_max_blocks, #MAX_BLOCKS_PER_ROW; \ vshr.u16 left_right_x_16_high, left_right_x_16_high, #3; \ vshl.u16 span_shifts, c_0xFFFE, span_shifts; \ + vmin.u16 left_right_x_16_high, left_right_x_16_high, tmp_max_blocks; \ \ vst4.u16 { left_right_x_16, span_shifts_y }, [span_edge_data]!; \ \ @@ -867,8 +864,10 @@ function(compute_all_gradients) str b, [span_b_offset], #4; \ setup_spans_adjust_interpolants_##direction(); \ \ - vshl.u16 span_shifts, c_0xFFFE, span_shifts; \ + vmov.u16 tmp_max_blocks, #MAX_BLOCKS_PER_ROW; \ vshr.u16 left_right_x_16_high, left_right_x_16_high, #3; \ + vshl.u16 span_shifts, c_0xFFFE, span_shifts; \ + vmin.u16 left_right_x_16_high, left_right_x_16_high, tmp_max_blocks; \ \ vst4.u16 { left_right_x_16, span_shifts_y }, [span_edge_data]!; \ \ @@ -908,7 +907,9 @@ function(compute_all_gradients) ble 1f; \ \ orr temp, y_a, y_a, lsl #16; \ + cmp height, #512; \ add temp, temp, #(1 << 16); \ + movgt height, #512; \ add y_a, temp, #2; \ add y_a, y_a, #(2 << 16); \ vmov y_x4, temp, y_a; \ @@ -963,7 +964,9 @@ function(compute_all_gradients) ble 1f; \ \ orr temp, y_a, y_a, lsl #16; \ + cmp height, #512; \ sub temp, temp, #(1 << 16); \ + movgt height, #512; \ sub y_a, temp, #2; \ sub y_a, y_a, #(2 << 16); \ vmov y_x4, temp, y_a; \ @@ -5826,14 +5829,7 @@ function(setup_sprite_16bpp_4x) .align 3 -function(setup_sprite_untextured) - ldrh r12, [psx_gpu, #psx_gpu_render_state_offset] - tst r12, #(RENDER_STATE_MASK_EVALUATE | RENDER_FLAGS_MODULATE_TEXELS \ - | RENDER_FLAGS_BLEND) - ldrbeq r12, [psx_gpu, #psx_gpu_render_mode_offset] - tsteq r12, #RENDER_INTERLACE_ENABLED - beq setup_sprite_untextured_simple - +function(setup_sprite_untextured_512) stmdb sp!, { r4 - r11, r14 } ldr width, [sp, #40] diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c b/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c index 00392549..ac4af9da 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_simd.c @@ -115,6 +115,7 @@ typedef union #define gvhaddq_u16(d, a, b) d.u16 = vhaddq_u16(a.u16, b.u16) #define gvmax_s16(d, a, b) d.s16 = vmax_s16(a.s16, b.s16) #define gvmin_s16(d, a, b) d.s16 = vmin_s16(a.s16, b.s16) +#define gvmin_u16(d, a, b) d.u16 = vmin_u16(a.u16, b.u16) #define gvminq_u8(d, a, b) d.u8 = vminq_u8(a.u8, b.u8) #define gvminq_u16(d, a, b) d.u16 = vminq_u16(a.u16, b.u16) #define gvmla_s32(d, a, b) d.s32 = vmla_s32(d.s32, a.s32, b.s32) @@ -353,7 +354,8 @@ typedef union } #endif // !__SSSE3__ #ifdef __SSE4_1__ -#define gvminq_u16(d, a, b) d.m = _mm_min_epu16(a.m, b.m) +#define gvmin_u16(d, a, b) d.m = _mm_min_epu16(a.m, b.m) +#define gvminq_u16 gvmin_u16 #define gvmovl_u8(d, s) d.m = _mm_cvtepu8_epi16(s.m) #define gvmovl_s8(d, s) d.m = _mm_cvtepi8_epi16(s.m) #define gvmovl_s32(d, s) d.m = _mm_cvtepi32_epi64(s.m) @@ -463,11 +465,12 @@ typedef union // can do this because the caller needs the msb clear #define gvhaddq_u16(d, a, b) d.u16 = (a.u16 + b.u16) >> 1 #endif -#ifndef gvminq_u16 -#define gvminq_u16(d, a, b) { \ +#ifndef gvmin_u16 +#define gvmin_u16(d, a, b) { \ gvu16 t_ = a.u16 < b.u16; \ d.u16 = (a.u16 & t_) | (b.u16 & ~t_); \ } +#define gvminq_u16 gvmin_u16 #endif #ifndef gvmlsq_s32 #define gvmlsq_s32(d, a, b) d.s32 -= a.s32 * b.s32 @@ -1093,6 +1096,7 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, span_b_offset = psx_gpu->span_b_offset; \ \ vec_8x16u c_0x0001; \ + vec_4x16u c_max_blocks_per_row; \ \ gvdupq_n_u16(c_0x0001, 0x0001); \ gvdupq_n_u16(left_edge, psx_gpu->viewport_start_x); \ @@ -1101,6 +1105,7 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, gvdup_n_u16(c_0x04, 0x04); \ gvdup_n_u16(c_0x07, 0x07); \ gvdup_n_u16(c_0xFFFE, 0xFFFE); \ + gvdup_n_u16(c_max_blocks_per_row, MAX_BLOCKS_PER_ROW); \ #if defined(__ARM_NEON) || defined(__ARM_NEON__) // better encoding, remaining bits are unused anyway @@ -1351,6 +1356,7 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, gvand(span_shift, left_right_x_16_hi, c_0x07); \ setup_spans_make_span_shift(span_shift); \ gvshr_n_u16(left_right_x_16_hi, left_right_x_16_hi, 3); \ + gvmin_u16(left_right_x_16_hi, left_right_x_16_hi, c_max_blocks_per_row); \ \ gvst4_pi_u16(left_right_x_16_lo, left_right_x_16_hi, span_shift, y_x4, \ span_edge_data); \ @@ -1380,7 +1386,9 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, \ setup_spans_prologue_b(); \ \ - if(height > 0) \ + if (height > 512) \ + height = 512; \ + if (height > 0) \ { \ u64 y_x4_ = ((u64)(y_a + 3) << 48) | ((u64)(u16)(y_a + 2) << 32) \ | (u32)((y_a + 1) << 16) | (u16)y_a; \ @@ -1426,7 +1434,9 @@ void compute_all_gradients(psx_gpu_struct * __restrict__ psx_gpu, \ setup_spans_prologue_b(); \ \ - if(height > 0) \ + if (height > 512) \ + height = 512; \ + if (height > 0) \ { \ u64 y_x4_ = ((u64)(y_a - 3) << 48) | ((u64)(u16)(y_a - 2) << 32) \ | (u32)((y_a - 1) << 16) | (u16)y_a; \ @@ -1642,7 +1652,9 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, setup_spans_prologue_b(); - if(height_minor_a > 0) + if (height_minor_a > 512) + height_minor_a = 512; + if (height_minor_a > 0) { u64 y_x4_ = ((u64)(y_a - 3) << 48) | ((u64)(u16)(y_a - 2) << 32) | (u32)((y_a - 1) << 16) | (u16)y_a; @@ -1683,7 +1695,9 @@ void setup_spans_up_down(psx_gpu_struct *psx_gpu, vertex_struct *v_a, setup_spans_clip(increment, no); } - if(height_minor_b > 0) + if (height_minor_b > 512) + height_minor_b = 512; + if (height_minor_b > 0) { u64 y_x4_ = ((u64)(y_a + 3) << 48) | ((u64)(u16)(y_a + 2) << 32) | (u32)((y_a + 1) << 16) | (u16)y_a; @@ -3167,19 +3181,11 @@ void blend_blocks_textured_unblended_off(psx_gpu_struct *psx_gpu) { } -void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, +void setup_sprite_untextured_512(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color) { - if((psx_gpu->render_state & (RENDER_STATE_MASK_EVALUATE | - RENDER_FLAGS_MODULATE_TEXELS | RENDER_FLAGS_BLEND)) == 0 && - (psx_gpu->render_mode & RENDER_INTERLACE_ENABLED) == 0) - { - setup_sprite_untextured_simple(psx_gpu, x, y, u, v, width, height, color); - return; - } - #if 0 - setup_sprite_untextured_(psx_gpu, x, y, u, v, width, height, color); + setup_sprite_untextured_512_(psx_gpu, x, y, u, v, width, height, color); return; #endif u32 right_width = ((width - 1) & 0x7) + 1; diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_simd.h b/plugins/gpu_neon/psx_gpu/psx_gpu_simd.h index a8080aff..3d1d1bdd 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_simd.h +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_simd.h @@ -84,8 +84,7 @@ #define setup_sprite_4bpp_4x setup_sprite_4bpp_4x_ #define setup_sprite_8bpp_4x setup_sprite_8bpp_4x_ #define setup_sprite_16bpp_4x setup_sprite_16bpp_4x_ -#define setup_sprite_untextured setup_sprite_untextured_ -#define setup_sprite_untextured_simple setup_sprite_untextured_simple_ +#define setup_sprite_untextured_512 setup_sprite_untextured_512_ #define scale2x_tiles8 scale2x_tiles8_ #endif @@ -205,10 +204,8 @@ void setup_sprite_8bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, void setup_sprite_16bpp_4x(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color); -void setup_sprite_untextured(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, +void setup_sprite_untextured_512(psx_gpu_struct *psx_gpu, s32 x, s32 y, s32 u, s32 v, s32 width, s32 height, u32 color); -void setup_sprite_untextured_simple(psx_gpu_struct *psx_gpu, s32 x, s32 y, - s32 u, s32 v, s32 width, s32 height, u32 color); void scale2x_tiles8(void *dst, const void *src, int w8, int h); @@ -275,7 +272,6 @@ void scale2x_tiles8(void *dst, const void *src, int w8, int h); #undef setup_sprite_4bpp_4x #undef setup_sprite_8bpp_4x #undef setup_sprite_16bpp_4x -#undef setup_sprite_untextured -#undef setup_sprite_untextured_simple +#undef setup_sprite_untextured_512 #undef scale2x_tiles8 #endif diff --git a/plugins/gpu_neon/psx_gpu/vector_ops.h b/plugins/gpu_neon/psx_gpu/vector_ops.h index 6f2bcbf7..6bc76433 100644 --- a/plugins/gpu_neon/psx_gpu/vector_ops.h +++ b/plugins/gpu_neon/psx_gpu/vector_ops.h @@ -525,6 +525,15 @@ (dest).e[_i] = result; \ }) \ +#define min_4x16b(dest, source_a, source_b) \ + foreach_element(4, \ + { \ + s32 result = (source_a).e[_i]; \ + if((source_b).e[_i] < result) \ + result = (source_b).e[_i]; \ + (dest).e[_i] = result; \ + }) \ + #define min_8x16b(dest, source_a, source_b) \ foreach_element(8, \ { \ -- 2.39.5