X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=plugins%2Fgpu_neon%2Fpsx_gpu%2Fpsx_gpu_arm_neon.S;h=7c820d273cecd041e709df8b0e0ad18a0391774b;hp=110c868a438dbbee5496dc682878afef62fa699e;hb=refs%2Fheads%2Fmaster;hpb=25e52b2c51afd3609aa2a0e218036d27520af510 diff --git a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S index 110c868a..ffbea043 100644 --- a/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S +++ b/plugins/gpu_neon/psx_gpu/psx_gpu_arm_neon.S @@ -13,15 +13,9 @@ * General Public License for more details. */ -#define MAX_SPANS 512 -#define MAX_BLOCKS 64 -#define MAX_BLOCKS_PER_ROW 128 - -#define RENDER_STATE_MASK_EVALUATE 0x20 -#define RENDER_FLAGS_MODULATE_TEXELS 0x1 -#define RENDER_FLAGS_BLEND 0x2 #define RENDER_INTERLACE_ENABLED 0x1 +#include "psx_gpu.h" #include "psx_gpu_offsets.h" #define psx_gpu_b_dx_offset (psx_gpu_b_block_span_offset + 4) @@ -34,6 +28,16 @@ .syntax unified .text +#if 0 +#define save_abi_regs() \ + vpush {q4-q7} +#define restore_abi_regs() \ + vpop {q4-q7} +#else +#define save_abi_regs() +#define restore_abi_regs() +#endif + #define psx_gpu r0 #define v_a r1 #define v_b r2 @@ -194,26 +198,18 @@ .align 4 -#ifndef __MACH__ +#include "arm_features.h" + +#define function(name) FUNCTION(name): -#define function(name) \ - .global name; \ - .type name, %function; \ - name: \ +#ifndef TEXRELS_FORBIDDEN #define JT_OP_REL(table_label, index_reg, temp) #define JT_OP(x...) x #define JTE(start, target) target -#define EXTRA_UNSAVED_REGS - #else -#define function(name) \ - .globl _##name; \ - name: \ - _##name: \ - #define JT_OP_REL(table_label, index_reg, temp) \ adr temp, table_label; \ ldr temp, [temp, index_reg, lsl #2]; \ @@ -222,13 +218,11 @@ #define JT_OP(x...) #define JTE(start, target) (target - start) -// r7 is preserved, but add it for EABI alignment.. -#define EXTRA_UNSAVED_REGS r7, r9, +#endif +#ifdef __MACH__ #define flush_render_block_buffer _flush_render_block_buffer -#define setup_sprite_untextured_simple _setup_sprite_untextured_simple #define update_texture_8bpp_cache _update_texture_8bpp_cache - #endif @ r0: psx_gpu @@ -242,6 +236,7 @@ function(compute_all_gradients) @ r12 = psx_gpu->triangle_area ldr r12, [psx_gpu, #psx_gpu_triangle_area_offset] stmdb sp!, { r4 - r11, lr } + save_abi_regs() @ load exponent of 62 into upper half of double movw r4, #0 @@ -367,8 +362,8 @@ function(compute_all_gradients) sub r14, r14, #(62 - 12) @ r14 = shift - (62 - FIXED_BITS) vshll.u16 uvrg_base, uvrg0, #16 @ uvrg_base = uvrg0 << 16 - vdup.u32 r_shift, r14 @ r_shift = { shift, shift, shift, shift } - + vdup.u32 r_shift, r14 @ r_shift = { shift, shift*, shift, shift* } + @ * - vshl.u64: ignored by hw vadd.u32 uvrg_base, uvrgb_phase vabs.s32 ga_uvrg_x, ga_uvrg_x @ ga_uvrg_x = abs(ga_uvrg_x) @@ -457,6 +452,7 @@ function(compute_all_gradients) stmia store_b, { g_bx0, g_bx, g_bx2, g_bx3, b_base, g_by } + restore_abi_regs() ldmia sp!, { r4 - r11, pc } @@ -562,6 +558,8 @@ function(compute_all_gradients) #define left_x_32_low d22 #define left_x_32_high d23 +#define tmp_max_blocks d20 + #define edges_xy q0 #define edges_dx_dy d2 #define edge_shifts d3 @@ -587,6 +585,7 @@ function(compute_all_gradients) #define setup_spans_prologue() \ stmdb sp!, { r4 - r11, lr }; \ + save_abi_regs(); \ \ ldrsh x_a, [v_a, #8]; \ ldrsh x_b, [v_b, #8]; \ @@ -815,8 +814,10 @@ function(compute_all_gradients) str b, [span_b_offset], #4; \ setup_spans_adjust_interpolants_##direction(); \ \ + vmov.u16 tmp_max_blocks, #MAX_BLOCKS_PER_ROW; \ vshr.u16 left_right_x_16_high, left_right_x_16_high, #3; \ vshl.u16 span_shifts, c_0xFFFE, span_shifts; \ + vmin.u16 left_right_x_16_high, left_right_x_16_high, tmp_max_blocks; \ \ vst4.u16 { left_right_x_16, span_shifts_y }, [span_edge_data]!; \ \ @@ -863,8 +864,10 @@ function(compute_all_gradients) str b, [span_b_offset], #4; \ setup_spans_adjust_interpolants_##direction(); \ \ - vshl.u16 span_shifts, c_0xFFFE, span_shifts; \ + vmov.u16 tmp_max_blocks, #MAX_BLOCKS_PER_ROW; \ vshr.u16 left_right_x_16_high, left_right_x_16_high, #3; \ + vshl.u16 span_shifts, c_0xFFFE, span_shifts; \ + vmin.u16 left_right_x_16_high, left_right_x_16_high, tmp_max_blocks; \ \ vst4.u16 { left_right_x_16, span_shifts_y }, [span_edge_data]!; \ \ @@ -904,7 +907,9 @@ function(compute_all_gradients) ble 1f; \ \ orr temp, y_a, y_a, lsl #16; \ + cmp height, #512; \ add temp, temp, #(1 << 16); \ + movgt height, #512; \ add y_a, temp, #2; \ add y_a, y_a, #(2 << 16); \ vmov y_x4, temp, y_a; \ @@ -959,7 +964,9 @@ function(compute_all_gradients) ble 1f; \ \ orr temp, y_a, y_a, lsl #16; \ + cmp height, #512; \ sub temp, temp, #(1 << 16); \ + movgt height, #512; \ sub y_a, temp, #2; \ sub y_a, y_a, #(2 << 16); \ vmov y_x4, temp, y_a; \ @@ -983,6 +990,7 @@ function(compute_all_gradients) #define setup_spans_epilogue() \ + restore_abi_regs(); \ ldmia sp!, { r4 - r11, pc } \ @@ -1357,6 +1365,7 @@ function(setup_blocks_shaded_textured_dithered_##swizzling##_indirect) \ bxeq lr; \ \ stmdb sp!, { r4 - r11, r14 }; \ + save_abi_regs(); \ vshl.u32 uvrg_dx4, uvrg_dx, #2; \ \ ldr b_dx, [psx_gpu, #psx_gpu_b_dx_offset]; \ @@ -1586,6 +1595,7 @@ function(setup_blocks_shaded_textured_dithered_##swizzling##_indirect) \ strh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \ bne 0b; \ \ + restore_abi_regs(); \ ldmia sp!, { r4 - r11, pc }; \ \ 2: \ @@ -1593,9 +1603,9 @@ function(setup_blocks_shaded_textured_dithered_##swizzling##_indirect) \ vpush { texture_mask }; \ vpush { uvrg_dx4 }; \ \ - stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \ + stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12 }; /* r14=num_blocks */ \ bl flush_render_block_buffer; \ - ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \ + ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12 }; \ \ vpop { uvrg_dx4 }; \ vpop { texture_mask }; \ @@ -1626,6 +1636,7 @@ function(setup_blocks_unshaded_textured_dithered_##swizzling##_indirect) \ bxeq lr; \ \ stmdb sp!, { r4 - r11, r14 }; \ + save_abi_regs(); \ vshl.u32 uvrg_dx4, uvrg_dx, #2; \ \ vshl.u32 uvrg_dx8, uvrg_dx, #3; \ @@ -1783,6 +1794,7 @@ function(setup_blocks_unshaded_textured_dithered_##swizzling##_indirect) \ strh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \ bne 0b; \ \ + restore_abi_regs(); \ ldmia sp!, { r4 - r11, pc }; \ \ 2: \ @@ -1790,9 +1802,9 @@ function(setup_blocks_unshaded_textured_dithered_##swizzling##_indirect) \ vpush { texture_mask }; \ vpush { uvrg_dx4 }; \ \ - stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \ + stmdb sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12 }; /* r14=num_blocks */ \ bl flush_render_block_buffer; \ - ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12, r14 }; \ + ldmia sp!, { r0 - r3, EXTRA_UNSAVED_REGS r12 }; \ \ vpop { uvrg_dx4 }; \ vpop { texture_mask }; \ @@ -1819,6 +1831,7 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_indirect) bxeq lr stmdb sp!, { r4 - r11, r14 } + save_abi_regs() vld1.u32 { test_mask }, [psx_gpu, :128] ldr color, [psx_gpu, #psx_gpu_triangle_color_offset] @@ -1901,6 +1914,7 @@ function(setup_blocks_unshaded_untextured_undithered_unswizzled_indirect) strh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset] bne 0b + restore_abi_regs() ldmia sp!, { r4 - r11, pc } 2: @@ -2123,6 +2137,7 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_indirect) \ bxeq lr; \ \ stmdb sp!, { r4 - r11, r14 }; \ + save_abi_regs(); \ vshl.u32 rg_dx4, rg_dx, #2; \ \ ldr b_dx, [psx_gpu, #psx_gpu_b_dx_offset]; \ @@ -2315,6 +2330,7 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_indirect) \ strh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \ bne 0b; \ \ + restore_abi_regs(); \ ldmia sp!, { r4 - r11, pc }; \ \ 2: \ @@ -2366,6 +2382,7 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_direct) \ bxeq lr; \ \ stmdb sp!, { r4 - r11, r14 }; \ + save_abi_regs(); \ vshl.u32 rg_dx4, rg_dx, #2; \ \ ldr b_dx, [psx_gpu, #psx_gpu_b_dx_offset]; \ @@ -2586,6 +2603,7 @@ function(setup_blocks_shaded_untextured_##dithering##_unswizzled_direct) \ \ bne 0b; \ \ + restore_abi_regs(); \ ldmia sp!, { r4 - r11, pc } \ setup_blocks_shaded_untextured_direct_builder(undithered) @@ -3159,6 +3177,7 @@ function(texture_blocks_16bpp) .align 3; \ \ function(shade_blocks_##shading##_textured_modulated_##dithering##_##target) \ + save_abi_regs(); \ shade_blocks_textured_modulated_prologue_##shading(dithering, target); \ stmdb sp!, { r4 - r5, lr }; \ ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \ @@ -3276,7 +3295,9 @@ function(shade_blocks_##shading##_textured_modulated_##dithering##_##target) \ shade_blocks_textured_modulated_store_draw_mask_##target(28); \ shade_blocks_textured_modulated_store_pixels_##target(); \ \ - ldmia sp!, { r4 - r5, pc } \ + ldmia sp!, { r4 - r5, lr }; \ + restore_abi_regs(); \ + bx lr \ shade_blocks_textured_modulated_builder(shaded, dithered, direct); @@ -3341,7 +3362,8 @@ shade_blocks_textured_modulated_builder(unshaded, undithered, indirect); .align 3 function(shade_blocks_textured_unmodulated_indirect) - str r14, [sp, #-4] + stmdb sp!, { r4, r14 } + save_abi_regs() add draw_mask_bits_ptr, psx_gpu, #(psx_gpu_blocks_offset + 40) ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset] @@ -3384,13 +3406,15 @@ function(shade_blocks_textured_unmodulated_indirect) vorr.u16 draw_mask_combined, draw_mask, zero_mask vst1.u32 { draw_mask_combined }, [draw_mask_store_ptr, :128], c_64 - ldr pc, [sp, #-4] + restore_abi_regs() + ldmia sp!, { r4, pc } .align 3 function(shade_blocks_textured_unmodulated_direct) stmdb sp!, { r4, r14 } + save_abi_regs() add draw_mask_bits_ptr, psx_gpu, #(psx_gpu_blocks_offset + 40) ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset] @@ -3452,6 +3476,7 @@ function(shade_blocks_textured_unmodulated_direct) vst1.u16 { fb_pixels_next }, [fb_ptr_next] + restore_abi_regs() ldmia sp!, { r4, pc } 4: @@ -3471,6 +3496,7 @@ function(shade_blocks_unshaded_untextured_indirect) function(shade_blocks_unshaded_untextured_direct) stmdb sp!, { r4, r14 } + save_abi_regs() add draw_mask_ptr, psx_gpu, #psx_gpu_blocks_offset ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset] @@ -3517,6 +3543,7 @@ function(shade_blocks_unshaded_untextured_direct) vbif.u16 fb_pixels_next, pixels, draw_mask vst1.u16 { fb_pixels_next }, [fb_ptr_next] + restore_abi_regs() ldmia sp!, { r4, pc } 4: @@ -3622,6 +3649,7 @@ function(shade_blocks_unshaded_untextured_direct) \ function(blend_blocks_##texturing##_average_##mask_evaluate) \ stmdb sp!, { r4, r14 }; \ + save_abi_regs(); \ add mask_msb_ptr, psx_gpu, #psx_gpu_mask_msb_offset; \ ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \ \ @@ -3703,6 +3731,7 @@ function(blend_blocks_##texturing##_average_##mask_evaluate) \ vbif.u16 fb_pixels_next, blend_pixels, draw_mask_next; \ vst1.u16 { fb_pixels_next }, [fb_ptr_next]; \ \ + restore_abi_regs(); \ ldmia sp!, { r4, pc }; \ \ 2: \ @@ -3741,6 +3770,7 @@ blend_blocks_average_builder(untextured, on) \ function(blend_blocks_textured_add_##mask_evaluate) \ stmdb sp!, { r4, r14 }; \ + save_abi_regs(); \ add mask_msb_ptr, psx_gpu, #psx_gpu_mask_msb_offset; \ ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \ \ @@ -3826,6 +3856,7 @@ function(blend_blocks_textured_add_##mask_evaluate) \ vbit.u16 blend_pixels, fb_pixels, draw_mask; \ vst1.u16 { blend_pixels }, [fb_ptr_next]; \ \ + restore_abi_regs(); \ ldmia sp!, { r4, pc }; \ \ 2: \ @@ -3845,6 +3876,7 @@ function(blend_blocks_textured_add_##mask_evaluate) \ \ function(blend_blocks_untextured_add_##mask_evaluate) \ stmdb sp!, { r4, r14 }; \ + save_abi_regs(); \ add mask_msb_ptr, psx_gpu, #psx_gpu_mask_msb_offset; \ ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \ \ @@ -3920,6 +3952,7 @@ function(blend_blocks_untextured_add_##mask_evaluate) \ vbit.u16 blend_pixels, fb_pixels, draw_mask; \ vst1.u16 { blend_pixels }, [fb_ptr_next]; \ \ + restore_abi_regs(); \ ldmia sp!, { r4, pc }; \ \ 2: \ @@ -3944,7 +3977,7 @@ blend_blocks_add_untextured_builder(on) #define blend_blocks_subtract_combine_textured() \ vbif.u16 blend_pixels, pixels, blend_mask \ -#define blend_blocks_subtract_set_stb_textured() \ +#define blend_blocks_subtract_set_stp_textured() \ vorr.u16 blend_pixels, #0x8000 \ #define blend_blocks_subtract_msb_mask_textured() \ @@ -3954,7 +3987,7 @@ blend_blocks_add_untextured_builder(on) #define blend_blocks_subtract_combine_untextured() \ -#define blend_blocks_subtract_set_stb_untextured() \ +#define blend_blocks_subtract_set_stp_untextured() \ vorr.u16 blend_pixels, blend_pixels, msb_mask \ #define blend_blocks_subtract_msb_mask_untextured() \ @@ -3977,6 +4010,7 @@ blend_blocks_add_untextured_builder(on) \ function(blend_blocks_##texturing##_subtract_##mask_evaluate) \ stmdb sp!, { r4, r14 }; \ + save_abi_regs(); \ add mask_msb_ptr, psx_gpu, #psx_gpu_mask_msb_offset; \ ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \ \ @@ -4019,7 +4053,7 @@ function(blend_blocks_##texturing##_subtract_##mask_evaluate) \ vld1.u32 { pixels_next }, [pixel_ptr, :128], c_64; \ vorr.u16 blend_pixels, fb_pixels_rb, fb_pixels_g; \ vand.u16 pixels_rb, pixels_next, d128_0x7C1F; \ - blend_blocks_subtract_set_stb_##texturing(); \ + blend_blocks_subtract_set_stp_##texturing(); \ vand.u16 pixels_g, pixels_next, d128_0x03E0; \ blend_blocks_subtract_combine_##texturing(); \ blend_blocks_subtract_set_blend_mask_##texturing(); \ @@ -4047,11 +4081,12 @@ function(blend_blocks_##texturing##_subtract_##mask_evaluate) \ \ blend_blocks_subtract_msb_mask_##texturing(); \ vorr.u16 blend_pixels, fb_pixels_rb, fb_pixels_g; \ - blend_blocks_subtract_set_stb_##texturing(); \ + blend_blocks_subtract_set_stp_##texturing(); \ blend_blocks_subtract_combine_##texturing(); \ vbit.u16 blend_pixels, fb_pixels, draw_mask; \ vst1.u16 { blend_pixels }, [fb_ptr_next]; \ \ + restore_abi_regs(); \ ldmia sp!, { r4, pc }; \ \ 2: \ @@ -4076,6 +4111,7 @@ blend_blocks_subtract_builder(untextured, on) \ function(blend_blocks_textured_add_fourth_##mask_evaluate) \ stmdb sp!, { r4, r14 }; \ + save_abi_regs(); \ add mask_msb_ptr, psx_gpu, #psx_gpu_mask_msb_offset; \ ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \ \ @@ -4119,6 +4155,7 @@ function(blend_blocks_textured_add_fourth_##mask_evaluate) \ ldr fb_ptr_next, [pixel_ptr, #28]; \ \ vorr.u16 blend_pixels, fb_pixels_rb, fb_pixels_g; \ + vorr.u16 blend_pixels, #0x8000; /* stp */ \ vbif.u16 blend_pixels, pixels, blend_mask; \ \ vld1.u32 { pixels }, [pixel_ptr, :128], c_64; \ @@ -4154,11 +4191,13 @@ function(blend_blocks_textured_add_fourth_##mask_evaluate) \ \ 1: \ vorr.u16 blend_pixels, fb_pixels_rb, fb_pixels_g; \ - vorr.u16 blend_pixels, blend_pixels, msb_mask; \ + vorr.u16 blend_pixels, #0x8000; /* stp */ \ vbif.u16 blend_pixels, pixels, blend_mask; \ + vorr.u16 blend_pixels, blend_pixels, msb_mask; \ vbit.u16 blend_pixels, fb_pixels, draw_mask; \ vst1.u16 { blend_pixels }, [fb_ptr_next]; \ \ + restore_abi_regs(); \ ldmia sp!, { r4, pc }; \ \ 2: \ @@ -4178,6 +4217,7 @@ function(blend_blocks_textured_add_fourth_##mask_evaluate) \ \ function(blend_blocks_untextured_add_fourth_##mask_evaluate) \ stmdb sp!, { r4, r14 }; \ + save_abi_regs(); \ add mask_msb_ptr, psx_gpu, #psx_gpu_mask_msb_offset; \ ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset]; \ \ @@ -4257,6 +4297,7 @@ function(blend_blocks_untextured_add_fourth_##mask_evaluate) \ vbit.u16 blend_pixels, fb_pixels, draw_mask; \ vst1.u16 { blend_pixels }, [fb_ptr_next]; \ \ + restore_abi_regs(); \ ldmia sp!, { r4, pc }; \ \ 2: \ @@ -4282,6 +4323,7 @@ blend_blocks_add_fourth_untextured_builder(on) function(blend_blocks_textured_unblended_on) stmdb sp!, { r4, r14 } + save_abi_regs() add mask_msb_ptr, psx_gpu, #psx_gpu_mask_msb_offset ldrh num_blocks, [psx_gpu, #psx_gpu_num_blocks_offset] @@ -4321,6 +4363,7 @@ function(blend_blocks_textured_unblended_on) vbif.u16 fb_pixels, pixels, draw_mask vst1.u16 { fb_pixels }, [fb_ptr] + restore_abi_regs() ldmia sp!, { r4, pc } @@ -4790,6 +4833,7 @@ setup_sprite_update_texture_8bpp_cache: setup_sprite_setup_left_draw_mask_fb_ptr##x4mode(); \ \ setup_sprite_tile_column_height_##multi_height(edge_mode, edge, tm, x4mode); \ + restore_abi_regs(); \ ldmia sp!, { r4 - r11, pc } \ #define setup_sprite_tiled_advance_column() \ @@ -4826,6 +4870,7 @@ setup_sprite_update_texture_8bpp_cache: \ setup_sprite_tiled_advance_column(); \ setup_sprite_tile_column_height_##multi_height(right_mode, left, tm, x4mode);\ + restore_abi_regs(); \ ldmia sp!, { r4 - r11, pc } \ @@ -5184,6 +5229,8 @@ function(setup_sprite_##texture_mode##x4mode) \ ldr height, [sp, #44]; \ add fb_ptr, fb_ptr, y, lsl #11; \ \ + save_abi_regs(); \ + \ add fb_ptr, fb_ptr, x, lsl #1; \ and offset_v, v, #0xF; \ \ @@ -5782,14 +5829,7 @@ function(setup_sprite_16bpp_4x) .align 3 -function(setup_sprite_untextured) - ldrh r12, [psx_gpu, #psx_gpu_render_state_offset] - tst r12, #(RENDER_STATE_MASK_EVALUATE | RENDER_FLAGS_MODULATE_TEXELS \ - | RENDER_FLAGS_BLEND) - ldrbeq r12, [psx_gpu, #psx_gpu_render_mode_offset] - tsteq r12, #RENDER_INTERLACE_ENABLED - beq setup_sprite_untextured_simple - +function(setup_sprite_untextured_512) stmdb sp!, { r4 - r11, r14 } ldr width, [sp, #40] @@ -5907,7 +5947,7 @@ setup_sprite_untextured_height_loop: #define texel_block_expanded_b q2 #define texel_block_expanded_ab q2 #define texel_block_expanded_c q3 -#define texel_block_expanded_d q4 +#define texel_block_expanded_d q0 #define texel_block_expanded_cd q3 function(update_texture_4bpp_cache) @@ -6089,6 +6129,7 @@ function(scale2x_tiles8) mov r14, r2 0: + pld [r1, #1024*2] vld1.u16 { q0 }, [r1, :128]! vld1.u16 { q2 }, [r1, :128]! vmov q1, q0