From aaf0bb597561573da4caa8fcf6a2f969bf317fb3 Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 2 Dec 2024 02:56:09 +0200 Subject: [PATCH] gpu_unai: asm part 5 --- plugins/gpu_unai/gpu_arm.S | 228 ++++++++++++++++++++++++++- plugins/gpu_unai/gpu_arm.h | 29 ++-- plugins/gpu_unai/gpu_inner.h | 87 +++++++--- plugins/gpu_unai/gpu_raster_sprite.h | 15 +- plugins/gpu_unai/gpu_unai.h | 6 +- plugins/gpu_unai/gpulib_if.cpp | 8 +- 6 files changed, 321 insertions(+), 52 deletions(-) diff --git a/plugins/gpu_unai/gpu_arm.S b/plugins/gpu_unai/gpu_arm.S index 9970c028..a516f08f 100644 --- a/plugins/gpu_unai/gpu_arm.S +++ b/plugins/gpu_unai/gpu_arm.S @@ -40,14 +40,65 @@ @ msb of input p0 is assumed to be set .macro semitrans0 p0 p1 t eor \t, \p0, \p1 - and \t, \t, #0x0420 + and \t, \t, #0x0420 sub \p0, \p0, \t orr \p1, \p1, #0x8000 uhadd16 \p0, \p0, \p1 .endm +.macro semitrans0p p0 p1 m421 t + eor \t, \p0, \p1 + and \t, \t, \m421 + add \p0, \p0, \p1 + uhsub16 \p0, \p0, \t @ sub because of borrow into hi16 +.endm + +@ p0 - {p1|r,g,b} // p1* - premasked rgb +.macro semitrans2p p0 p1r p1g p1b m1f t0 t1 + and \t0, \p0, \m1f + and \t1, \p0, \m1f, lsl #5 + and \p0, \p0, \m1f, lsl #10 + uqsub16 \t0, \t0, \p1r + uqsub16 \t1, \t1, \p1g + uqsub16 \p0, \p0, \p1b + orr \t0, \t0, \t1 + orr \p0, \p0, \t0 +.endm + +#else + +@ msb of input p0 is assumed to be set +.macro semitrans0 p0 p1 t + eor \t, \p0, \p1 + and \t, \t, #0x0420 + orr \p1, \p1, #0x8000 + sub \p0, \p0, \t + add \p0, \p0, \p1 + orr \p0, \p0, #0x10000 + mov \p0, \p0, lsr #1 +.endm + +.macro semitrans0p p0 p1 m421 t + eor \t, \p0, \p1 + and \t, \t, \m421 + add \p0, \p0, \p1 + sub \p0, \p0, \t + mov \p0, \p0, lsr #1 +.endm + #endif // HAVE_ARMV6 +.macro semitrans13p p0 p1 m421 t0 + add \t0, \p0, \p1 + eor \p0, \p0, \p1 + and \p0, \p0, \m421 @ low_bits + sub \p0, \t0, \p0 + and \p0, \p0, \m421, lsl #5 @ carries + sub \t0, \t0, \p0 @ modulo + sub \p0, \p0, \p0, lsr #5 @ clamp + orr \p0, \t0, \p0 +.endm + @ in: r0=dst, r2=pal, r12=0x1e @ trashes r6-r8,lr,flags @@ -95,6 +146,91 @@ strhne \rs,[r0, #6] .endm + +@ (void *d, u16 c, u32 cnt, const struct gpu_unai_inner_t *inn) +@ see also poly_untex_st_m +.macro tile_driver_st_m name semit +FUNCTION(\name): + .cfi_startproc + stmfd sp!, {r4-r9,lr} + .cfi_def_cfa_offset 4*7 + .cfi_rel_offset lr, 4*6 + ldr r7, [r3, #0x18] @ y0 + ldr r8, [r3, #0x1c] @ y1 +.if \semit != 2 + mov r4, #0x8000 + orr r4, r4, r4, lsl #16 @ mask 8000 + mov r6, #0x420 + orr r6, r6, #1 + orr r6, r6, r6, lsl #16 @ mask 0421 +.endif +.if \semit == 2 + and r4, r1, #0x03e0 + and r5, r1, #0x7c00 + and r1, r1, #0x001f + orr r4, r4, r4, lsl #16 @ premasked g + orr r5, r5, r5, lsl #16 @ premasked b + mov r6, #0x00001f + orr r6, #0x1f0000 @ mask +.elseif \semit == 3 + mov r1, r1, lsr #2 + bic r1, r1, #(0x0c60>>2) +.endif + orr r1, r1, r1, lsl #16 + sub r3, r8, r7 @ h + mov r7, r2 @ save w +0: + ldrh r8, [r0] + pld_ r0, #2048 + tst r0, #2 + beq 1f + sub r2, #1 +.if \semit == 0 + bic r8, r8, r4 + semitrans0p r8, r1, r6, lr +.elseif \semit == 1 || \semit == 3 + bic r8, r8, r4 + semitrans13p r8, r1, r6, lr +.elseif \semit == 2 + semitrans2p r8, r1, r4, r5, r6, r9, lr +.endif + strh r8, [r0], #2 +1: + ldr r8, [r0] + pld_ r0, #32 + subs r2, r2, #2 +.if \semit == 0 + bic r8, r8, r4 + semitrans0p r8, r1, r6, lr +.elseif \semit == 1 || \semit == 3 + bic r8, r8, r4 + semitrans13p r8, r1, r6, lr +.elseif \semit == 2 + semitrans2p r8, r1, r4, r5, r6, r9, lr +.endif + strpl r8, [r0], #4 + bpl 1b +2: + tst r2, #1 + strhne r8, [r0], #2 + mov r2, r7 @ w + add r0, r0, #2048 + sub r0, r0, r7, lsl #1 + subs r3, r3, #1 + bgt 0b + + ldmfd sp!, {r4-r9,pc} + .cfi_endproc +.endm + + +tile_driver_st_m tile_driver_st0_asm, 0 +tile_driver_st_m tile_driver_st1_asm, 1 +tile_driver_st_m tile_driver_st3_asm, 3 +#ifdef HAVE_ARMV6 +tile_driver_st_m tile_driver_st2_asm, 2 +#endif + @ (u16 *d, void *s, u16 *pal, int lines) sprite_4bpp_x16_asm_: ldr r12,[r3, #0x18] @ y0 @@ -106,7 +242,7 @@ FUNCTION(sprite_4bpp_x16_asm): stmfd sp!, {r4-r8,lr} .cfi_def_cfa_offset 4*6 .cfi_rel_offset lr, 4*5 - mov r12, #0x1e @ empty pixel + mov r12, #0x1e 0: ldmia r1, {r4,r5} @@ -343,15 +479,15 @@ FUNCTION(\name): .endm sprite_driver_l_st sprite_driver_4bpp_l0_std_asm, 4, 0, -1 +sprite_driver_l_st sprite_driver_4bpp_l0_st0_asm, 4, 0, 0 sprite_driver_l_st sprite_driver_8bpp_l0_std_asm, 8, 0, -1 +sprite_driver_l_st sprite_driver_8bpp_l0_st0_asm, 8, 0, 0 #ifdef HAVE_ARMV6 -sprite_driver_l_st sprite_driver_4bpp_l0_st0_asm, 4, 0, 0 sprite_driver_l_st sprite_driver_4bpp_l1_std_asm, 4, 1, -1 sprite_driver_l_st sprite_driver_4bpp_l1_st0_asm, 4, 1, 0 sprite_driver_l_st sprite_driver_4bpp_l1_st1_asm, 4, 1, 1 -sprite_driver_l_st sprite_driver_8bpp_l0_st0_asm, 8, 0, 0 sprite_driver_l_st sprite_driver_8bpp_l1_std_asm, 8, 1, -1 sprite_driver_l_st sprite_driver_8bpp_l1_st0_asm, 8, 1, 0 sprite_driver_l_st sprite_driver_8bpp_l1_st1_asm, 8, 1, 1 @@ -414,6 +550,82 @@ FUNCTION(sprite_driver_16bpp_asm): .cfi_endproc +@ (void *d, const gpu_unai_inner_t *inn, int count) +@ see also tile_driver_st_m +.macro poly_untex_st_m name semit +FUNCTION(\name): + .cfi_startproc + ldrh r1, [r1, #0x38] @ rgb + stmfd sp!, {r4-r7,lr} + .cfi_def_cfa_offset 4*5 + .cfi_rel_offset lr, 4*4 +.if \semit != 2 + mov r4, #0x8000 + orr r4, r4, r4, lsl #16 @ mask 8000 + mov r6, #0x420 + orr r6, r6, #1 + orr r6, r6, r6, lsl #16 @ mask 0421 +.endif +.if \semit == 2 + and r4, r1, #0x03e0 + and r5, r1, #0x7c00 + and r1, r1, #0x001f + orr r4, r4, r4, lsl #16 @ premasked g + orr r5, r5, r5, lsl #16 @ premasked b + mov r6, #0x00001f + orr r6, #0x1f0000 @ mask +.elseif \semit == 3 + mov r1, r1, lsr #2 + bic r1, r1, #(0x0c60>>2) +.endif + orr r1, r1, r1, lsl #16 +0: + ldrh r3, [r0] + pld_ r0, #2048 + tst r0, #2 + beq 1f + sub r2, #1 +.if \semit == 0 + bic r3, r3, r4 + semitrans0p r3, r1, r6, lr +.elseif \semit == 1 || \semit == 3 + bic r3, r3, r4 + semitrans13p r3, r1, r6, lr +.elseif \semit == 2 + semitrans2p r3, r1, r4, r5, r6, r7, lr +.endif + strh r3, [r0], #2 +1: + ldr r3, [r0] + pld_ r0, #32 + subs r2, r2, #2 +.if \semit == 0 + bic r3, r3, r4 + semitrans0p r3, r1, r6, lr +.elseif \semit == 1 || \semit == 3 + bic r3, r3, r4 + semitrans13p r3, r1, r6, lr +.elseif \semit == 2 + semitrans2p r3, r1, r4, r5, r6, r7, lr +.endif + strpl r3, [r0], #4 + bpl 1b +2: + tst r2, #1 + strhne r3, [r0], #2 + + ldmfd sp!, {r4-r7,pc} + .cfi_endproc +.endm + +poly_untex_st_m poly_untex_st0_asm, 0 +poly_untex_st_m poly_untex_st1_asm, 1 +poly_untex_st_m poly_untex_st3_asm, 3 +#ifdef HAVE_ARMV6 +poly_untex_st_m poly_untex_st2_asm, 2 +#endif + + .macro poly_4_8bpp_asm_m name bpp light semit FUNCTION(\name): @ (void *d, const gpu_unai_inner_t *inn, int count) .cfi_startproc @@ -568,15 +780,15 @@ v_\name: @ r3=clut, r4=u, r5=u_inc, r6=u_msk, r7=v, lr=v_masked .cfi_endproc .endm -poly_4_8bpp_asm_m poly_4bpp_asm, 4, 0, -1 -poly_4_8bpp_asm_m poly_8bpp_asm, 8, 0, -1 +poly_4_8bpp_asm_m poly_4bpp_asm, 4, 0, -1 +poly_4_8bpp_asm_m poly_4bpp_l0_st0_asm, 4, 0, 0 +poly_4_8bpp_asm_m poly_8bpp_asm, 8, 0, -1 +poly_4_8bpp_asm_m poly_8bpp_l0_st0_asm, 8, 0, 0 #ifdef HAVE_ARMV6 -poly_4_8bpp_asm_m poly_4bpp_l0_st0_asm, 4, 0, 0 poly_4_8bpp_asm_m poly_4bpp_l1_std_asm, 4, 1, -1 poly_4_8bpp_asm_m poly_4bpp_l1_st0_asm, 4, 1, 0 -poly_4_8bpp_asm_m poly_8bpp_l0_st0_asm, 8, 0, 0 poly_4_8bpp_asm_m poly_8bpp_l1_std_asm, 8, 1, -1 poly_4_8bpp_asm_m poly_8bpp_l1_st0_asm, 8, 1, 0 diff --git a/plugins/gpu_unai/gpu_arm.h b/plugins/gpu_unai/gpu_arm.h index 6b8c81a9..d69490ff 100644 --- a/plugins/gpu_unai/gpu_arm.h +++ b/plugins/gpu_unai/gpu_arm.h @@ -7,6 +7,10 @@ extern "C" { struct gpu_unai_inner_t; +void tile_driver_st0_asm(void *d, u16 c, u32 cnt, const struct gpu_unai_inner_t *inn); +void tile_driver_st1_asm(void *d, u16 c, u32 cnt, const struct gpu_unai_inner_t *inn); +void tile_driver_st3_asm(void *d, u16 c, u32 cnt, const struct gpu_unai_inner_t *inn); + void sprite_driver_4bpp_asm(void *pPixel, const u8 *pTxt_base, u32 count, const struct gpu_unai_inner_t *inn); void sprite_driver_8bpp_asm(void *pPixel, const u8 *pTxt_base, @@ -15,36 +19,43 @@ void sprite_driver_16bpp_asm(void *pPixel, const void *pTxt_base, u32 count, const struct gpu_unai_inner_t *inn); void sprite_4bpp_x16_asm(void *d, const void *s, void *pal, int lines); -void poly_4bpp_asm(void *d, const struct gpu_unai_inner_t *inn, int count); -void poly_8bpp_asm(void *d, const struct gpu_unai_inner_t *inn, int count); - void sprite_driver_4bpp_l0_std_asm(void *pPixel, const u8 *pTxt_base, u32 count, const struct gpu_unai_inner_t *inn); +void sprite_driver_4bpp_l0_st0_asm(void *pPixel, const u8 *pTxt_base, + u32 count, const struct gpu_unai_inner_t *inn); void sprite_driver_8bpp_l0_std_asm(void *pPixel, const u8 *pTxt_base, u32 count, const struct gpu_unai_inner_t *inn); +void sprite_driver_8bpp_l0_st0_asm(void *pPixel, const u8 *pTxt_base, + u32 count, const struct gpu_unai_inner_t *inn); + +void poly_untex_st0_asm(void *d, const struct gpu_unai_inner_t *inn, int count); +void poly_untex_st1_asm(void *d, const struct gpu_unai_inner_t *inn, int count); +void poly_untex_st3_asm(void *d, const struct gpu_unai_inner_t *inn, int count); +void poly_4bpp_asm (void *d, const struct gpu_unai_inner_t *inn, int count); +void poly_4bpp_l0_st0_asm(void *d, const struct gpu_unai_inner_t *inn, int count); +void poly_8bpp_asm (void *d, const struct gpu_unai_inner_t *inn, int count); +void poly_8bpp_l0_st0_asm(void *d, const struct gpu_unai_inner_t *inn, int count); #ifdef HAVE_ARMV6 -void sprite_driver_4bpp_l0_st0_asm(void *pPixel, const u8 *pTxt_base, - u32 count, const struct gpu_unai_inner_t *inn); +void tile_driver_st2_asm(void *d, u16 c, u32 cnt, const struct gpu_unai_inner_t *inn); + void sprite_driver_4bpp_l1_std_asm(void *pPixel, const u8 *pTxt_base, u32 count, const struct gpu_unai_inner_t *inn); void sprite_driver_4bpp_l1_st0_asm(void *pPixel, const u8 *pTxt_base, u32 count, const struct gpu_unai_inner_t *inn); void sprite_driver_4bpp_l1_st1_asm(void *pPixel, const u8 *pTxt_base, u32 count, const struct gpu_unai_inner_t *inn); -void sprite_driver_8bpp_l0_st0_asm(void *pPixel, const u8 *pTxt_base, - u32 count, const struct gpu_unai_inner_t *inn); void sprite_driver_8bpp_l1_std_asm(void *pPixel, const u8 *pTxt_base, u32 count, const struct gpu_unai_inner_t *inn); void sprite_driver_8bpp_l1_st0_asm(void *pPixel, const u8 *pTxt_base, u32 count, const struct gpu_unai_inner_t *inn); void sprite_driver_8bpp_l1_st1_asm(void *pPixel, const u8 *pTxt_base, u32 count, const struct gpu_unai_inner_t *inn); -void poly_4bpp_l0_st0_asm(void *d, const struct gpu_unai_inner_t *inn, int count); + +void poly_untex_st2_asm(void *d, const struct gpu_unai_inner_t *inn, int count); void poly_4bpp_l1_std_asm(void *d, const struct gpu_unai_inner_t *inn, int count); void poly_4bpp_l1_st0_asm(void *d, const struct gpu_unai_inner_t *inn, int count); -void poly_8bpp_l0_st0_asm(void *d, const struct gpu_unai_inner_t *inn, int count); void poly_8bpp_l1_std_asm(void *d, const struct gpu_unai_inner_t *inn, int count); void poly_8bpp_l1_st0_asm(void *d, const struct gpu_unai_inner_t *inn, int count); diff --git a/plugins/gpu_unai/gpu_inner.h b/plugins/gpu_unai/gpu_inner.h index 14d66444..3281d0fa 100644 --- a/plugins/gpu_unai/gpu_inner.h +++ b/plugins/gpu_unai/gpu_inner.h @@ -58,6 +58,7 @@ #include "arm_features.h" #include "compiler_features.h" #ifdef __arm__ +#include "gpu_arm.h" #include "gpu_inner_blend_arm.h" #include "gpu_inner_light_arm.h" #define gpuBlending gpuBlendingARM @@ -278,7 +279,7 @@ const PSD gpuPixelSpanDrivers[64] = // GPU Tiles innerloops generator template -static void gpuTileSpanFn(le16_t *pDst, u32 count, u16 data) +static inline void gpuTileSpanFn(le16_t *pDst, u16 data, u32 count) { le16_t ldata; @@ -330,7 +331,42 @@ endtile: } } -static void TileNULL(le16_t *pDst, u32 count, u16 data) +template +static noinline void gpuTileDriverFn(le16_t *pDst, u16 data, u32 count, + const gpu_unai_inner_t &inn) +{ + const int li=gpu_unai.inn.ilace_mask; + const int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.inn.ilace_mask+1):0); + const int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.inn.ilace_mask+1):0):1); + const int y1 = inn.y1; + int y0 = inn.y0; + + for (; y0 < y1; ++y0) { + if (!(y0&li) && (y0&pi) != pif) + gpuTileSpanFn(pDst, data, count); + pDst += FRAME_WIDTH; + } +} + +#ifdef __arm__ + +template +static void TileAsm(le16_t *pDst, u16 data, u32 count, const gpu_unai_inner_t &inn) +{ + switch (CF) { + case 0x02: tile_driver_st0_asm(pDst, data, count, &inn); return; + case 0x0a: tile_driver_st1_asm(pDst, data, count, &inn); return; + case 0x1a: tile_driver_st3_asm(pDst, data, count, &inn); return; +#ifdef HAVE_ARMV6 + case 0x12: tile_driver_st2_asm(pDst, data, count, &inn); return; +#endif + } + gpuTileDriverFn(pDst, data, count, inn); +} + +#endif + +static void TileNULL(le16_t *pDst, u16 data, u32 count, const gpu_unai_inner_t &inn) { #ifdef ENABLE_GPU_LOG_SUPPORT fprintf(stdout,"TileNULL()\n"); @@ -339,23 +375,35 @@ static void TileNULL(le16_t *pDst, u32 count, u16 data) /////////////////////////////////////////////////////////////////////////////// // Tiles innerloops driver -typedef void (*PT)(le16_t *pDst, u32 count, u16 data); +typedef void (*PT)(le16_t *pDst, u16 data, u32 count, const gpu_unai_inner_t &inn); // Template instantiation helper macros -#define TI(cf) gpuTileSpanFn<(cf)> +#define TI(cf) gpuTileDriverFn<(cf)> #define TN TileNULL +#ifdef __arm__ +#define TA(cf) TileAsm<(cf)> +#else +#define TA(cf) TI(cf) +#endif +#ifdef HAVE_ARMV6 +#define TA6(cf) TileAsm<(cf)> +#else +#define TA6(cf) TI(cf) +#endif #define TIBLOCK(ub) \ - TI((ub)|0x00), TI((ub)|0x02), TI((ub)|0x04), TI((ub)|0x06), \ - TN, TI((ub)|0x0a), TN, TI((ub)|0x0e), \ - TN, TI((ub)|0x12), TN, TI((ub)|0x16), \ - TN, TI((ub)|0x1a), TN, TI((ub)|0x1e) + TI((ub)|0x00), TA6((ub)|0x02), TI((ub)|0x04), TI((ub)|0x06), \ + TN, TA ((ub)|0x0a), TN, TI((ub)|0x0e), \ + TN, TA6((ub)|0x12), TN, TI((ub)|0x16), \ + TN, TA ((ub)|0x1a), TN, TI((ub)|0x1e) -const PT gpuTileSpanDrivers[32] = { +const PT gpuTileDrivers[32] = { TIBLOCK(0<<8), TIBLOCK(1<<8) }; #undef TI #undef TN +#undef TA +#undef TA6 #undef TIBLOCK @@ -446,7 +494,6 @@ endsprite: } #ifdef __arm__ -#include "gpu_arm.h" template static void SpriteMaybeAsm(le16_t *pPixel, u32 count, const u8 *pTxt_base, @@ -467,14 +514,14 @@ static void SpriteMaybeAsm(le16_t *pPixel, u32 count, const u8 *pTxt_base, const u8 *pTxt = pTxt_base + inn.v * 2048; switch (CF) { case 0x20: sprite_driver_4bpp_l0_std_asm(pPixel, pTxt, count, &inn); return; + case 0x22: sprite_driver_4bpp_l0_st0_asm(pPixel, pTxt, count, &inn); return; case 0x40: sprite_driver_8bpp_l0_std_asm(pPixel, pTxt, count, &inn); return; + case 0x42: sprite_driver_8bpp_l0_st0_asm(pPixel, pTxt, count, &inn); return; #ifdef HAVE_ARMV6 case 0x21: sprite_driver_4bpp_l1_std_asm(pPixel, pTxt, count, &inn); return; - case 0x22: sprite_driver_4bpp_l0_st0_asm(pPixel, pTxt, count, &inn); return; case 0x23: sprite_driver_4bpp_l1_st0_asm(pPixel, pTxt, count, &inn); return; case 0x2b: sprite_driver_4bpp_l1_st1_asm(pPixel, pTxt, count, &inn); return; case 0x41: sprite_driver_8bpp_l1_std_asm(pPixel, pTxt, count, &inn); return; - case 0x42: sprite_driver_8bpp_l0_st0_asm(pPixel, pTxt, count, &inn); return; case 0x43: sprite_driver_8bpp_l1_st0_asm(pPixel, pTxt, count, &inn); return; case 0x4b: sprite_driver_8bpp_l1_st1_asm(pPixel, pTxt, count, &inn); return; #endif @@ -761,14 +808,18 @@ template static void PolySpanMaybeAsm(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count) { switch (CF) { + case 0x02: poly_untex_st0_asm (pDst, &gpu_unai.inn, count); break; + case 0x0a: poly_untex_st1_asm (pDst, &gpu_unai.inn, count); break; + case 0x1a: poly_untex_st3_asm (pDst, &gpu_unai.inn, count); break; case 0x20: poly_4bpp_asm (pDst, &gpu_unai.inn, count); break; + case 0x22: poly_4bpp_l0_st0_asm(pDst, &gpu_unai.inn, count); break; case 0x40: poly_8bpp_asm (pDst, &gpu_unai.inn, count); break; + case 0x42: poly_8bpp_l0_st0_asm(pDst, &gpu_unai.inn, count); break; #ifdef HAVE_ARMV6 + case 0x12: poly_untex_st2_asm (pDst, &gpu_unai.inn, count); break; case 0x21: poly_4bpp_l1_std_asm(pDst, &gpu_unai.inn, count); break; - case 0x22: poly_4bpp_l0_st0_asm(pDst, &gpu_unai.inn, count); break; case 0x23: poly_4bpp_l1_st0_asm(pDst, &gpu_unai.inn, count); break; case 0x41: poly_8bpp_l1_std_asm(pDst, &gpu_unai.inn, count); break; - case 0x42: poly_8bpp_l0_st0_asm(pDst, &gpu_unai.inn, count); break; case 0x43: poly_8bpp_l1_st0_asm(pDst, &gpu_unai.inn, count); break; #endif default: gpuPolySpanFn(gpu_unai, pDst, count); @@ -801,10 +852,10 @@ typedef void (*PP)(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count); #define TA6(cf) TI(cf) #endif #define TIBLOCK(ub) \ - TI((ub)|0x00), TI((ub)|0x01), TI((ub)|0x02), TI((ub)|0x03), TI((ub)|0x04), TI((ub)|0x05), TI((ub)|0x06), TI((ub)|0x07), \ - TN, TN, TI((ub)|0x0a), TI((ub)|0x0b), TN, TN, TI((ub)|0x0e), TI((ub)|0x0f), \ - TN, TN, TI((ub)|0x12), TI((ub)|0x13), TN, TN, TI((ub)|0x16), TI((ub)|0x17), \ - TN, TN, TI((ub)|0x1a), TI((ub)|0x1b), TN, TN, TI((ub)|0x1e), TI((ub)|0x1f), \ + TI((ub)|0x00), TI((ub)|0x01), TA6((ub)|0x02),TI((ub)|0x03), TI((ub)|0x04), TI((ub)|0x05), TI((ub)|0x06), TI((ub)|0x07), \ + TN, TN, TA((ub)|0x0a), TI((ub)|0x0b), TN, TN, TI((ub)|0x0e), TI((ub)|0x0f), \ + TN, TN, TA6((ub)|0x12),TI((ub)|0x13), TN, TN, TI((ub)|0x16), TI((ub)|0x17), \ + TN, TN, TA((ub)|0x1a), TI((ub)|0x1b), TN, TN, TI((ub)|0x1e), TI((ub)|0x1f), \ TA((ub)|0x20), TA6((ub)|0x21),TA6((ub)|0x22),TA6((ub)|0x23),TI((ub)|0x24), TI((ub)|0x25), TI((ub)|0x26), TI((ub)|0x27), \ TN, TN, TI((ub)|0x2a), TI((ub)|0x2b), TN, TN, TI((ub)|0x2e), TI((ub)|0x2f), \ TN, TN, TI((ub)|0x32), TI((ub)|0x33), TN, TN, TI((ub)|0x36), TI((ub)|0x37), \ diff --git a/plugins/gpu_unai/gpu_raster_sprite.h b/plugins/gpu_unai/gpu_raster_sprite.h index e314e974..5c7b67ce 100644 --- a/plugins/gpu_unai/gpu_raster_sprite.h +++ b/plugins/gpu_unai/gpu_raster_sprite.h @@ -73,7 +73,7 @@ void gpuDrawS(PtrUnion packet, const PS gpuSpriteDriver, s32 *w_out, s32 *h_out) gpuSpriteDriver(Pixel, x1, (u8 *)gpu_unai.inn.TBA, gpu_unai.inn); } -void gpuDrawT(PtrUnion packet, const PT gpuTileSpanDriver, s32 *w_out, s32 *h_out) +void gpuDrawT(PtrUnion packet, const PT gpuTileDriver, s32 *w_out, s32 *h_out) { s32 x0, x1, y0, y1; @@ -103,15 +103,10 @@ void gpuDrawT(PtrUnion packet, const PT gpuTileSpanDriver, s32 *w_out, s32 *h_ou const u16 Data = GPU_RGB16(le32_to_u32(packet.U4[0])); le16_t *Pixel = &gpu_unai.vram[FRAME_OFFSET(x0, y0)]; - const int li=gpu_unai.inn.ilace_mask; - const int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.inn.ilace_mask+1):0); - const int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.inn.ilace_mask+1):0):1); - - for (; y0>3)) >> 1]; + PT driver = gpuTileDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; s32 w = 0, h = 0; gpuDrawT(packet, driver, &w, &h); gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h)); @@ -766,7 +766,7 @@ int do_cmd_list(u32 *list_, int list_len, case 0x6A: case 0x6B: { // Monochrome rectangle (1x1 dot) gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00010001); - PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; + PT driver = gpuTileDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; s32 w = 0, h = 0; gpuDrawT(packet, driver, &w, &h); gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(1, 1)); @@ -777,7 +777,7 @@ int do_cmd_list(u32 *list_, int list_len, case 0x72: case 0x73: { // Monochrome rectangle (8x8) gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00080008); - PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; + PT driver = gpuTileDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; s32 w = 0, h = 0; gpuDrawT(packet, driver, &w, &h); gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h)); @@ -796,7 +796,7 @@ int do_cmd_list(u32 *list_, int list_len, case 0x7A: case 0x7B: { // Monochrome rectangle (16x16) gpu_unai.PacketBuffer.U4[2] = u32_to_le32(0x00100010); - PT driver = gpuTileSpanDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; + PT driver = gpuTileDrivers[(Blending_Mode | gpu_unai.Masking | Blending | (gpu_unai.PixelMSB>>3)) >> 1]; s32 w = 0, h = 0; gpuDrawT(packet, driver, &w, &h); gput_sum(cpu_cycles_sum, cpu_cycles, gput_sprite(w, h)); -- 2.39.5