From: notaz Date: Sun, 1 Dec 2024 16:14:18 +0000 (+0200) Subject: gpu_unai: asm part 4 X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8177857bcdbfaa55095150bbd7e9aad7aa44da02;p=pcsx_rearmed.git gpu_unai: asm part 4 --- diff --git a/Makefile b/Makefile index 4000d489..52275723 100644 --- a/Makefile +++ b/Makefile @@ -272,6 +272,7 @@ OBJS += plugins/gpu_unai/old/if.o else CFLAGS += -DGPU_UNAI_NO_OLD endif +plugins/gpu_unai/gpulib_if.o: plugins/gpu_unai/*.h plugins/gpu_unai/gpulib_if.o: CFLAGS += -DREARMED -DUSE_GPULIB=1 ifneq ($(DEBUG), 1) plugins/gpu_unai/gpulib_if.o \ diff --git a/plugins/gpu_unai/gpu_arm.S b/plugins/gpu_unai/gpu_arm.S index b56951f9..9970c028 100644 --- a/plugins/gpu_unai/gpu_arm.S +++ b/plugins/gpu_unai/gpu_arm.S @@ -17,6 +17,38 @@ #endif .endm +#ifdef HAVE_ARMV6 + +.macro modulate rp mbr mg t0 t1 t2 + and \t0, \rp, #0x001f + and \t1, \rp, #0x03e0 + and \t2, \rp, #0x7c00 + smulbb \t0, \t0, \mbr @ -> 0000 0000 0000 orrr rrxx xxxx xxxx xxxx + smulbt \t1, \t1, \mg @ -> 0000 000o gggg gxxx xxxx xxxx xxx0 0000 + smulbt \t2, \t2, \mbr @ -> 00ob bbbb xxxx xxxx xxxx xx00 0000 0000 + and \rp, \rp, #0x8000 @ retain msb + usat \t0, #5, \t0, asr #14 + usat \t1, #5, \t1, asr #19 + usat \t2, #5, \t2, asr #24 + orr \rp, \rp, \t0 + orr \rp, \rp, \t1, lsl #5 + orr \rp, \rp, \t2, lsl #10 +.endm + +@ http://www.slack.net/~ant/info/rgb_mixing.html +@ p0 = (p0 + p1) / 2; p1 |= 0x8000 +@ msb of input p0 is assumed to be set +.macro semitrans0 p0 p1 t + eor \t, \p0, \p1 + and \t, \t, #0x0420 + sub \p0, \p0, \t + orr \p1, \p1, #0x8000 + uhadd16 \p0, \p0, \p1 +.endm + +#endif // HAVE_ARMV6 + + @ in: r0=dst, r2=pal, r12=0x1e @ trashes r6-r8,lr,flags .macro do_4x_4bpp rs ibase obase @@ -63,11 +95,13 @@ strhne \rs,[r0, #6] .endm -.global sprite_4bpp_x16_asm @ (u16 *d, void *s, u16 *pal, int lines) +@ (u16 *d, void *s, u16 *pal, int lines) sprite_4bpp_x16_asm_: - ldr r2, [r3] @ pal - ldr r3, [r3, #0x1c] @ lines -sprite_4bpp_x16_asm: + ldr r12,[r3, #0x18] @ y0 + ldr r2, [r3, #0x04] @ pal + ldr r3, [r3, #0x1c] @ y1 + sub r3, r3, r12 +FUNCTION(sprite_4bpp_x16_asm): .cfi_startproc stmfd sp!, {r4-r8,lr} .cfi_def_cfa_offset 4*6 @@ -99,15 +133,17 @@ sprite_4bpp_x16_asm: .if \is8bpp orr r12, r12, #0x1f0 @ mask=0x01fe .endif - ldr r4, [r3, #4] @ u0 - ldr r5, [r3, #0x1c] @ h + ldr r4, [r3, #0x08] @ u + ldr r5, [r3, #0x1c] @ v1 + ldr r6, [r3, #0x18] @ v0 and r4, r4, #((8 >> \is8bpp) - 1) + sub r5, r5, r6 sub r5, r5, #1 orr r5, r4, r5, lsl #8 @ ((h-1) << 8) | u0_fraction mov r9, r2 @ saved_w mov r10, r0 @ saved_dst mov r11, r1 @ saved_src - ldr r2, [r3] @ pal + ldr r2, [r3, #0x04] @ pal 11: @ line_loop: pld_ r11, #2048 mov r0, r10 @@ -152,10 +188,10 @@ sprite_4bpp_x16_asm: b 12b @ return from fractional_u .endm -.global sprite_driver_4bpp_asm @ (u16 *d, const void *s, int width, spriteDriverArg) -sprite_driver_4bpp_asm: +@ (u16 *d, const void *s, int width, const gpu_unai_inner_t *) +FUNCTION(sprite_driver_4bpp_asm): .cfi_startproc - ldr r12, [r3, #4] @ u0 + ldr r12, [r3, #8] @ u mov r12, r12, lsl #29 orr r12, r12, r2 @ w cmp r12, #16 @@ -183,8 +219,8 @@ sprite_driver_4bpp_asm: .cfi_endproc -.global sprite_driver_8bpp_asm @ (u16 *d, const void *s, int width, spriteDriverArg) -sprite_driver_8bpp_asm: +@ (u16 *d, const void *s, int width, const gpu_unai_inner_t *) +FUNCTION(sprite_driver_8bpp_asm): .cfi_startproc sprite_driver_part1 1 0: @@ -208,41 +244,215 @@ sprite_driver_8bpp_asm: .cfi_endproc -.macro poly_4bpp_init v_target need_rgb +@ (u16 *d, const void *s, int width, const gpu_unai_inner_t *) +.macro sprite_driver_l_st name bpp light semit +FUNCTION(\name): + .cfi_startproc + stmfd sp!, {r4-r11,lr} + .cfi_def_cfa_offset 4*4 + .cfi_rel_offset lr, 4*3 + ldr r5, [r3, #0x18] @ y0 + ldr r7, [r3, #0x1c] @ y1 + ldr r8, [r3, #0x20] @ rbg5 + mov r6, r2 @ saved_w + ldr r2, [r3, #0x04] @ pal + ldr r10,[r3, #0x08] @ u + ldr r11,[r3, #0x10] @ u_msk + sub r5, r7, r5 @ h + mov r7, r8, lsl #(8+2) @ 0bbb bb00 0ggg gg00 0rrr rr00 0000 0000 + mov r8, r8, lsl #(16+2)@ 0ggg gg00 ... + mov r3, r11,lsr #10 + orr r6, r3, r6, lsl #16 @ (w << 16) | u_mask + mov r3, r6 + and r10,r10,r6 + +3: @ line_loop: +.if \bpp == 4 + add r9, r1, r10, lsr #1 +.elseif \bpp == 8 + add r9, r1, r10 + pld_ r9, #2048 +.endif +0: +.if \bpp == 4 + ldrb r4, [r1, r10, lsr #1] +.elseif \bpp == 8 + ldrb r4, [r1, r10] +.endif + subs r3, r3, #1<<16 + bmi 1f +.if \bpp == 4 + tst r10, #1 + movne r4, r4, lsr #3 + addeq r4, r4, r4 + and r4, r4, #0x1e +.elseif \bpp == 8 + add r4, r4, r4 @ <<= 1 +.endif + ldrsh r12,[r2, r4] + add r10,r10,#1 + and r10,r10,r6 + add r0, r0, #2 + tst r12,r12 + beq 0b +.if \light && \semit != 1 + modulate r12, r7, r8, r4, r9, lr +.endif +.if \semit == 0 + ldrhmi lr, [r0, #-2] + strhpl r12,[r0, #-2] + bpl 0b + semitrans0 r12, lr, r9 +.elseif \light && \semit == 1 + and r4, r12, #0x001f + and r9, r12, #0x03e0 + and r12, r12, #0x7c00 + ldrhmi r11, [r0, #-2] + smulbb r4, r4, r7 @ -> 0000 0000 0000 orrr rrxx xxxx xxxx xxxx + smulbt r9, r9, r8 @ -> 0000 000o gggg gxxx xxxx xxxx xxx0 0000 + smulbt r12, r12, r7 @ -> 00ob bbbb xxxx xxxx xxxx xx00 0000 0000 + and r8, r11, #0x001f + and lr, r11, #0x03e0 + and r11, r11, #0x7c00 + addmi r4, r4, r8, lsl #14 + addmi r9, r9, lr, lsl #14 + addmi r12, r12, r11, lsl #14 + usat r4, #5, r4, asr #14 + usat r9, #5, r9, asr #19 + usat r12, #5, r12, asr #24 + orrmi r4, r4, #0x8000 + orr r4, r4, r9, lsl #5 + orr r12, r4, r12, lsl #10 + mov r8, r7, lsl #8 @ restore r8 +.endif + strh r12,[r0, #-2] + b 0b +1: + add r0, r0, #2048 + add r1, r1, #2048 + sub r0, r0, r6, lsr #15 @ dst + sub r10,r10,r6, lsr #16 @ u + mov r3, r6 @ (w << 16) | u_mask + and r10,r6, r10 + subs r5, r5, #1 + and r10,r10,#0xff + bgt 3b @ line_loop + + ldmfd sp!, {r4-r11,pc} + .cfi_endproc +.endm + +sprite_driver_l_st sprite_driver_4bpp_l0_std_asm, 4, 0, -1 +sprite_driver_l_st sprite_driver_8bpp_l0_std_asm, 8, 0, -1 + +#ifdef HAVE_ARMV6 + +sprite_driver_l_st sprite_driver_4bpp_l0_st0_asm, 4, 0, 0 +sprite_driver_l_st sprite_driver_4bpp_l1_std_asm, 4, 1, -1 +sprite_driver_l_st sprite_driver_4bpp_l1_st0_asm, 4, 1, 0 +sprite_driver_l_st sprite_driver_4bpp_l1_st1_asm, 4, 1, 1 +sprite_driver_l_st sprite_driver_8bpp_l0_st0_asm, 8, 0, 0 +sprite_driver_l_st sprite_driver_8bpp_l1_std_asm, 8, 1, -1 +sprite_driver_l_st sprite_driver_8bpp_l1_st0_asm, 8, 1, 0 +sprite_driver_l_st sprite_driver_8bpp_l1_st1_asm, 8, 1, 1 + +#endif // HAVE_ARMV6 + + +@ (u16 *d, const void *s, int width, const gpu_unai_inner_t *) +FUNCTION(sprite_driver_16bpp_asm): + .cfi_startproc + stmfd sp!, {r4-r6,lr} + .cfi_def_cfa_offset 4*4 + .cfi_rel_offset lr, 4*3 + ldr r4, [r3, #0x1c] @ v1 + ldr r5, [r3, #0x18] @ v0 + mov r12, #0x00ff + orr r12, r12, #0xff00 @ mask + mov r6, r2 @ saved_w + sub r5, r4, r5 + sub r5, r5, #1 @ h-1 +3: @ line_loop: + pld_ r1, #2048 + mov r2, r6 @ w + tst r1, #2 + beq 0f +2: @ 1pix: + ldrh lr, [r1], #2 + add r0, r0, #2 + sub r2, r2, #1 + tst lr, lr + strhne lr, [r0, #-2] +0: + subs r2, r2, #4 + bmi 1f +0: + ldmia r1!, {r3,r4} + add r0, r0, #2*4 + pld_ r1, #24 + tst r3, r12 + strhne r3, [r0, #-8] + movs lr, r3, lsr #16 + strhne lr, [r0, #-6] + tst r4, r12 + strhne r4, [r0, #-4] + movs lr, r4, lsr #16 + strhne lr, [r0, #-2] + subs r2, r2, #4 + bpl 0b +1: + adds r2, r2, #4 + bne 2b @ 1pix + add r0, r0, #2048 + add r1, r1, #2048 + sub r0, r0, r6, lsl #1 @ dst + sub r1, r1, r6, lsl #1 + subs r5, r5, #1 + bpl 3b @ line_loop + + ldmfd sp!, {r4-r6,pc} + .cfi_endproc + + +.macro poly_4_8bpp_asm_m name bpp light semit +FUNCTION(\name): @ (void *d, const gpu_unai_inner_t *inn, int count) + .cfi_startproc + stmfd sp!, {r4-r11,lr} + .cfi_def_cfa_offset 4*9 + .cfi_rel_offset lr, 4*8 add r12, r1, #4 ldmia r12, {r3, r4, r7, r12, lr} @ clut, u, v, u_msk, v_msk ldr r5, [r1, #0x18] @ u_inc -.if \need_rgb +.if \light ldr r10,[r1, #0x24] @ rbg .endif - mov r6, r12 + mov r6, r12 @ u_msk ldr r12,[r1, #0x1c] @ v_inc -.if \need_rgb +.if \light mov r10,r10,lsl #7 @ 0bbb bbbb 0ggg gggg 0rrr rrrr r000 0000 bic r10,r10,#1<<23 bic r10,r10,#1<<15 - mov r11,r10,lsl #8 + mov r11,r10,lsl #8 @ 0ggg gggg ... .endif and r4, r4, r6 and lr, lr, r7 @ v_msk & v and lr, lr, #0xff<<10 tst r12,r12 - bne \v_target + bne v_\name ldr r1, [r1] @ src - mov r7, r4, lsr #13 + mov r7, r4, lsr #(13 - (\bpp / 8 * 3)) add r1, r1, lr, lsl #1 - add r12,r1, r7, lsl #2 - pld_ r12,#2048 -.endm - -.global poly_4bpp_asm @ (void *d, const struct gpu_unai_inner_t *inn, int count) -poly_4bpp_asm: - .cfi_startproc - stmfd sp!, {r4-r7,lr} - .cfi_def_cfa_offset 4*5 - .cfi_rel_offset lr, 4*4 - poly_4bpp_init poly_4bpp_v_asm 0 +#ifdef HAVE_ARMV6 + add r12,r1, r7, lsl #(2 - (\bpp / 8 * 2)) + pld_ r12,#2048 @ next line +#endif 0: +.if \light || \semit >= 0 + mov r7, r4, lsr #(13 - (\bpp / 8 * 3)) + subs r2, r2, #1 + bmi 1f +.endif +.if \bpp == 4 ldr lr, [r1, r7, lsl #2] lsr r12,r4, #8 and r12,r12,#0x1c @@ -250,135 +460,64 @@ poly_4bpp_asm: mov r12,lr, ror r12 add r4, r4, r5 and r12,r12,#0x1e - and r4, r4, r6 - ldrh r12,[r3, r12] - add r0, r0, #2 - mov r7, r4, lsr #13 - tst r12,r12 - strhne r12,[r0, #-2] - subs r2, r2, #1 - bgt 0b - - ldmfd sp!, {r4-r7,pc} - -poly_4bpp_v_asm: @ r3=clut, r4=u, r5=u_inc, r6=u_msk, r7=v, lr=v_masked - stmfd sp!, {r8-r9} - .cfi_def_cfa_offset 4*7 - .cfi_rel_offset lr, 4*6 - ldr r9, [r1, #0x14] @ v_msk - ldr r1, [r1] @ src - mov r8, r12 @ v_inc - mov r12,r4, lsr #13 - add lr, r1, lr, lsl #1 - and r9, r9, #0xff<<10 @ v_msk_final -0: - ldr lr, [lr, r12, lsl #2] - lsr r12,r4, #8 - and r12,r12,#0x1c - sub r12,r12,#1 - mov r12,lr, ror r12 +.else + ldrb r12,[r1, r7] add r4, r4, r5 - and r12,r12,#0x1e + add r12,r12,r12 +.endif and r4, r4, r6 - ldrh r12,[r3, r12] + ldrsh r12,[r3, r12] add r0, r0, #2 - add r7, r7, r8 - and lr, r7, r9 +.if !\light && \semit < 0 + mov r7, r4, lsr #(13 - (\bpp / 8 * 3)) tst r12,r12 - add lr, r1, lr, lsl #1 strhne r12,[r0, #-2] - mov r12,r4, lsr #13 subs r2, r2, #1 bgt 0b - - ldmfd sp!, {r8-r9} - ldmfd sp!, {r4-r7,pc} - .cfi_endproc - - -#ifdef HAVE_ARMV6 - -.macro modulate rp mbr mg t0 t1 t2 - and \t0, \rp, #0x001f - and \t1, \rp, #0x03e0 - and \t2, \rp, #0x7c00 - smulbb \t0, \t0, \mbr @ -> 0000 0000 0000 orrr rrxx xxxx xxxx xxxx - smulbt \t1, \t1, \mg @ -> 0000 000o gggg gxxx xxxx xxxx xxx0 0000 - smulbt \t2, \t2, \mbr @ -> 00ob bbbb xxxx xxxx xxxx xx00 0000 0000 - ands \rp, \rp, #0x8000 @ retain msb + semi-transparency test - usat \t0, #5, \t0, asr #14 - usat \t1, #5, \t1, asr #19 - usat \t2, #5, \t2, asr #24 - orr \rp, \rp, \t0 - orr \rp, \rp, \t1, lsl #5 - orr \rp, \rp, \t2, lsl #10 -.endm - -@ http://www.slack.net/~ant/info/rgb_mixing.html -@ p0 = (p0 + p1) / 2; p1 |= 0x8000 -@ msb of input p0 is assumed to be set -.macro semitrans0 p0 p1 t - eor \t, \p0, \p1 - and \t, \t, #0x0420 - sub \p0, \p0, \t - orr \p1, \p1, #0x8000 - uhadd16 \p0, \p0, \p1 -.endm - -.macro poly_4bpp_asm_m name semitrans -.global \name @ (void *d, const struct gpu_unai_inner_t *inn, int count) -\name: - .cfi_startproc - stmfd sp!, {r4-r11,lr} - .cfi_def_cfa_offset 4*9 - .cfi_rel_offset lr, 4*8 - poly_4bpp_init v_\name 1 -0: - mov r12,r4, lsr #13 - subs r2, r2, #1 - bmi 1f - ldr lr, [r1, r12, lsl #2] - lsr r12,r4, #8 - and r12,r12,#0x1c - sub r12,r12,#1 - mov r12,lr, ror r12 - add r4, r4, r5 - and r12,r12,#0x1e - and r4, r4, r6 - ldrh r12,[r3, r12] - add r0, r0, #2 + @ end +.else tst r12,r12 beq 0b +.if \light && \semit != 1 modulate r12, r10, r11, r7, r8, lr -.if \semitrans < 0 - @ no semi-transparency -.elseif \semitrans == 0 - ldrhne r7, [r0, #-2] - strheq r12,[r0, #-2] - beq 0b +.endif +.if \semit == 0 + ldrhmi r7, [r0, #-2] + strhpl r12,[r0, #-2] + bpl 0b semitrans0 r12, r7, lr .endif strh r12,[r0, #-2] b 0b +.endif @ \light || \semit >= 0 1: ldmfd sp!, {r4-r11,pc} v_\name: @ r3=clut, r4=u, r5=u_inc, r6=u_msk, r7=v, lr=v_masked +.if \light || \semit >= 0 sub sp, sp, #4*2 + stmia sp, {r5,r6} .cfi_def_cfa_offset 4*(9+2) .cfi_rel_offset lr, 4*(8+2) +.endif ldr r9, [r1, #0x14] @ v_msk ldr r1, [r1] @ src mov r8, r12 @ v_inc - mov r12,r4, lsr #13 and r9, r9, #0xff<<10 @ v_msk_final - stmia sp, {r5,r6} +.if !\light && \semit < 0 + and lr, r7, r9 + mov r12,r4, lsr #(13 - (\bpp / 8 * 3)) + add lr, r1, lr, lsl #1 +.endif 0: +.if \light || \semit >= 0 and lr, r7, r9 - mov r12,r4, lsr #13 + mov r12,r4, lsr #(13 - (\bpp / 8 * 3)) add lr, r1, lr, lsl #1 subs r2, r2, #1 bmi 1f +.endif +.if \bpp == 4 ldr lr, [lr, r12, lsl #2] lsr r12,r4, #8 and r12,r12,#0x1c @@ -386,32 +525,60 @@ v_\name: @ r3=clut, r4=u, r5=u_inc, r6=u_msk, r7=v, lr=v_masked mov r12,lr, ror r12 add r4, r4, r5 and r12,r12,#0x1e +.else + ldrb r12,[lr, r12] + add r4, r4, r5 + add r12,r12,r12 +.endif and r4, r4, r6 - ldrh r12,[r3, r12] + ldrsh r12,[r3, r12] add r0, r0, #2 add r7, r7, r8 +.if !\light && \semit < 0 + and lr, r7, r9 + tst r12,r12 + add lr, r1, lr, lsl #1 + strhne r12,[r0, #-2] + mov r12,r4, lsr #(13 - (\bpp / 8 * 3)) + subs r2, r2, #1 + bgt 0b + @ end +.else tst r12,r12 beq 0b +.if \light && \semit != 1 modulate r12, r10, r11, r5, r6, lr -.if \semitrans < 0 - @ no semi-transparency -.elseif \semitrans == 0 - ldrhne r7, [r0, #-2] - strheq r12,[r0, #-2] - beq 0b - semitrans0 r12, r7, lr +.endif +.if \semit == 0 + ldrhmi r6, [r0, #-2] + strhpl r12,[r0, #-2] + ldmiapl sp, {r5,r6} + bpl 0b + semitrans0 r12, r6, lr .endif strh r12,[r0, #-2] ldmia sp, {r5,r6} b 0b +.endif @ \light || \semit >= 0 1: +.if \light || \semit >= 0 add sp, sp, #4*2 +.endif ldmfd sp!, {r4-r11,pc} .cfi_endproc .endm -poly_4bpp_asm_m poly_4bpp_l_asm, -1 -poly_4bpp_asm_m poly_4bpp_l_st0_asm, 0 +poly_4_8bpp_asm_m poly_4bpp_asm, 4, 0, -1 +poly_4_8bpp_asm_m poly_8bpp_asm, 8, 0, -1 + +#ifdef HAVE_ARMV6 + +poly_4_8bpp_asm_m poly_4bpp_l0_st0_asm, 4, 0, 0 +poly_4_8bpp_asm_m poly_4bpp_l1_std_asm, 4, 1, -1 +poly_4_8bpp_asm_m poly_4bpp_l1_st0_asm, 4, 1, 0 +poly_4_8bpp_asm_m poly_8bpp_l0_st0_asm, 8, 0, 0 +poly_4_8bpp_asm_m poly_8bpp_l1_std_asm, 8, 1, -1 +poly_4_8bpp_asm_m poly_8bpp_l1_st0_asm, 8, 1, 0 #endif // HAVE_ARMV6 diff --git a/plugins/gpu_unai/gpu_arm.h b/plugins/gpu_unai/gpu_arm.h index 027aa53c..6b8c81a9 100644 --- a/plugins/gpu_unai/gpu_arm.h +++ b/plugins/gpu_unai/gpu_arm.h @@ -6,17 +6,49 @@ extern "C" { #endif struct gpu_unai_inner_t; -struct spriteDriverArg; void sprite_driver_4bpp_asm(void *pPixel, const u8 *pTxt_base, - u32 count, const struct spriteDriverArg *arg); + u32 count, const struct gpu_unai_inner_t *inn); void sprite_driver_8bpp_asm(void *pPixel, const u8 *pTxt_base, - u32 count, const struct spriteDriverArg *arg); + u32 count, const struct gpu_unai_inner_t *inn); +void sprite_driver_16bpp_asm(void *pPixel, const void *pTxt_base, + u32 count, const struct gpu_unai_inner_t *inn); void sprite_4bpp_x16_asm(void *d, const void *s, void *pal, int lines); void poly_4bpp_asm(void *d, const struct gpu_unai_inner_t *inn, int count); -void poly_4bpp_l_asm(void *d, const struct gpu_unai_inner_t *inn, int count); -void poly_4bpp_l_st0_asm(void *d, const struct gpu_unai_inner_t *inn, int count); +void poly_8bpp_asm(void *d, const struct gpu_unai_inner_t *inn, int count); + +void sprite_driver_4bpp_l0_std_asm(void *pPixel, const u8 *pTxt_base, + u32 count, const struct gpu_unai_inner_t *inn); +void sprite_driver_8bpp_l0_std_asm(void *pPixel, const u8 *pTxt_base, + u32 count, const struct gpu_unai_inner_t *inn); + +#ifdef HAVE_ARMV6 + +void sprite_driver_4bpp_l0_st0_asm(void *pPixel, const u8 *pTxt_base, + u32 count, const struct gpu_unai_inner_t *inn); +void sprite_driver_4bpp_l1_std_asm(void *pPixel, const u8 *pTxt_base, + u32 count, const struct gpu_unai_inner_t *inn); +void sprite_driver_4bpp_l1_st0_asm(void *pPixel, const u8 *pTxt_base, + u32 count, const struct gpu_unai_inner_t *inn); +void sprite_driver_4bpp_l1_st1_asm(void *pPixel, const u8 *pTxt_base, + u32 count, const struct gpu_unai_inner_t *inn); +void sprite_driver_8bpp_l0_st0_asm(void *pPixel, const u8 *pTxt_base, + u32 count, const struct gpu_unai_inner_t *inn); +void sprite_driver_8bpp_l1_std_asm(void *pPixel, const u8 *pTxt_base, + u32 count, const struct gpu_unai_inner_t *inn); +void sprite_driver_8bpp_l1_st0_asm(void *pPixel, const u8 *pTxt_base, + u32 count, const struct gpu_unai_inner_t *inn); +void sprite_driver_8bpp_l1_st1_asm(void *pPixel, const u8 *pTxt_base, + u32 count, const struct gpu_unai_inner_t *inn); +void poly_4bpp_l0_st0_asm(void *d, const struct gpu_unai_inner_t *inn, int count); +void poly_4bpp_l1_std_asm(void *d, const struct gpu_unai_inner_t *inn, int count); +void poly_4bpp_l1_st0_asm(void *d, const struct gpu_unai_inner_t *inn, int count); +void poly_8bpp_l0_st0_asm(void *d, const struct gpu_unai_inner_t *inn, int count); +void poly_8bpp_l1_std_asm(void *d, const struct gpu_unai_inner_t *inn, int count); +void poly_8bpp_l1_st0_asm(void *d, const struct gpu_unai_inner_t *inn, int count); + +#endif // HAVE_ARMV6 #ifdef __cplusplus } diff --git a/plugins/gpu_unai/gpu_inner.h b/plugins/gpu_unai/gpu_inner.h index 87324b90..14d66444 100644 --- a/plugins/gpu_unai/gpu_inner.h +++ b/plugins/gpu_unai/gpu_inner.h @@ -362,19 +362,12 @@ const PT gpuTileSpanDrivers[32] = { /////////////////////////////////////////////////////////////////////////////// // GPU Sprites innerloops generator -// warning: gpu_arm.S asm uses this, update it if you change this -typedef struct spriteDriverArg { - const le16_t *CBA; // 00 - u32 u0, v0, u0_mask, v0_mask; // 04 08 0c 10 - s32 y0, y1, lines, li; // 14 -} spriteDriverArg; - typedef void (*PS)(le16_t *pPixel, u32 count, const u8 *pTxt, - const spriteDriverArg *arg); + const gpu_unai_inner_t &inn); template static noinline void gpuSpriteDriverFn(le16_t *pPixel, u32 count, const u8 *pTxt_base, - const spriteDriverArg *arg) + const gpu_unai_inner_t &inn) { // Blend func can save an operation if it knows uSrc MSB is unset. // Untextured prims can always skip (source color always comes with MSB=0). @@ -383,25 +376,26 @@ static noinline void gpuSpriteDriverFn(le16_t *pPixel, u32 count, const u8 *pTxt uint_fast16_t uSrc, uDst, srcMSB; bool should_blend; - u32 u0_mask = arg->u0_mask; + u32 u0_mask = inn.u_msk >> 10; u8 r5, g5, b5; if (CF_LIGHT) { - r5 = gpu_unai.inn.r5; - g5 = gpu_unai.inn.g5; - b5 = gpu_unai.inn.b5; + r5 = inn.r5; + g5 = inn.g5; + b5 = inn.b5; } + const le16_t *CBA_; if (CF_TEXTMODE!=3) CBA_ = inn.CBA; + const u32 v0_mask = inn.v_msk >> 10; + s32 y0 = inn.y0, y1 = inn.y1, li = inn.ilace_mask; + u32 u0_ = inn.u, v0 = inn.v; + if (CF_TEXTMODE==3) { - // Texture is accessed byte-wise, so adjust mask if 16bpp + // Texture is accessed byte-wise, so adjust to 16bpp + u0_ <<= 1; u0_mask <<= 1; } - const le16_t *CBA_; if (CF_TEXTMODE!=3) CBA_ = arg->CBA; - const u32 v0_mask = arg->v0_mask; - s32 y0 = arg->y0, y1 = arg->y1, li = arg->li; - u32 u0_ = arg->u0, v0 = arg->v0; - for (; y0 < y1; ++y0, pPixel += FRAME_WIDTH, ++v0) { if (y0 & li) continue; @@ -454,39 +448,45 @@ endsprite: #ifdef __arm__ #include "gpu_arm.h" -static void Sprite4bppMaybeAsm(le16_t *pPixel, u32 count, const u8 *pTxt_base, - const spriteDriverArg *arg) +template +static void SpriteMaybeAsm(le16_t *pPixel, u32 count, const u8 *pTxt_base, + const gpu_unai_inner_t &inn) { #if 1 - s32 lines = arg->lines; - u32 u1m = arg->u0 + count - 1, v1m = arg->v0 + lines - 1; - if (u1m == (u1m & arg->u0_mask) && v1m == (v1m & arg->v0_mask)) { - pTxt_base += arg->u0 / 2 + arg->v0 * 2048; - sprite_driver_4bpp_asm(pPixel, pTxt_base, count, arg); - } - else + s32 lines = inn.y1 - inn.y0; + u32 u1m = inn.u + count - 1, v1m = inn.v + lines - 1; + if (u1m == (u1m & (inn.u_msk >> 10)) && v1m == (v1m & (inn.v_msk >> 10))) { + const u8 *pTxt = pTxt_base + inn.v * 2048; + switch (CF) { + case 0x20: sprite_driver_4bpp_asm (pPixel, pTxt + inn.u / 2, count, &inn); return; + case 0x40: sprite_driver_8bpp_asm (pPixel, pTxt + inn.u, count, &inn); return; + case 0x60: sprite_driver_16bpp_asm(pPixel, pTxt + inn.u * 2, count, &inn); return; + } + } + if (v1m == (v1m & (inn.v_msk >> 10))) { + const u8 *pTxt = pTxt_base + inn.v * 2048; + switch (CF) { + case 0x20: sprite_driver_4bpp_l0_std_asm(pPixel, pTxt, count, &inn); return; + case 0x40: sprite_driver_8bpp_l0_std_asm(pPixel, pTxt, count, &inn); return; +#ifdef HAVE_ARMV6 + case 0x21: sprite_driver_4bpp_l1_std_asm(pPixel, pTxt, count, &inn); return; + case 0x22: sprite_driver_4bpp_l0_st0_asm(pPixel, pTxt, count, &inn); return; + case 0x23: sprite_driver_4bpp_l1_st0_asm(pPixel, pTxt, count, &inn); return; + case 0x2b: sprite_driver_4bpp_l1_st1_asm(pPixel, pTxt, count, &inn); return; + case 0x41: sprite_driver_8bpp_l1_std_asm(pPixel, pTxt, count, &inn); return; + case 0x42: sprite_driver_8bpp_l0_st0_asm(pPixel, pTxt, count, &inn); return; + case 0x43: sprite_driver_8bpp_l1_st0_asm(pPixel, pTxt, count, &inn); return; + case 0x4b: sprite_driver_8bpp_l1_st1_asm(pPixel, pTxt, count, &inn); return; #endif - gpuSpriteDriverFn<0x20>(pPixel, count, pTxt_base, arg); -} - -static void Sprite8bppMaybeAsm(le16_t *pPixel, u32 count, const u8 *pTxt_base, - const spriteDriverArg *arg) -{ -#if 1 - s32 lines = arg->lines; - u32 u1m = arg->u0 + count - 1, v1m = arg->v0 + lines - 1; - if (u1m == (u1m & arg->u0_mask) && v1m == (v1m & arg->v0_mask)) { - pTxt_base += arg->u0 + arg->v0 * 2048; - sprite_driver_8bpp_asm(pPixel, pTxt_base, count, arg); - } - else + } + } #endif - gpuSpriteDriverFn<0x40>(pPixel, count, pTxt_base, arg); + gpuSpriteDriverFn(pPixel, count, pTxt_base, inn); } #endif // __arm__ static void SpriteNULL(le16_t *pPixel, u32 count, const u8 *pTxt_base, - const spriteDriverArg *arg) + const gpu_unai_inner_t &inn) { #ifdef ENABLE_GPU_LOG_SUPPORT fprintf(stdout,"SpriteNULL()\n"); @@ -502,29 +502,32 @@ static void SpriteNULL(le16_t *pPixel, u32 count, const u8 *pTxt_base, #define TI(cf) gpuSpriteDriverFn<(cf)> #define TN SpriteNULL #ifdef __arm__ -#define TA4(cf) Sprite4bppMaybeAsm -#define TA8(cf) Sprite8bppMaybeAsm +#define TA(cf) SpriteMaybeAsm<(cf)> +#else +#define TA(cf) TI(cf) +#endif +#ifdef HAVE_ARMV6 +#define TA6(cf) SpriteMaybeAsm<(cf)> #else -#define TA4(cf) TI(cf) -#define TA8(cf) TI(cf) +#define TA6(cf) TI(cf) #endif #define TIBLOCK(ub) \ - TN, TN, TN, TN, TN, TN, TN, TN, \ - TN, TN, TN, TN, TN, TN, TN, TN, \ - TN, TN, TN, TN, TN, TN, TN, TN, \ - TN, TN, TN, TN, TN, TN, TN, TN, \ - TA4((ub)|0x20), TI((ub)|0x21), TI((ub)|0x22), TI((ub)|0x23), TI((ub)|0x24), TI((ub)|0x25), TI((ub)|0x26), TI((ub)|0x27), \ - TN, TN, TI((ub)|0x2a), TI((ub)|0x2b), TN, TN, TI((ub)|0x2e), TI((ub)|0x2f), \ - TN, TN, TI((ub)|0x32), TI((ub)|0x33), TN, TN, TI((ub)|0x36), TI((ub)|0x37), \ - TN, TN, TI((ub)|0x3a), TI((ub)|0x3b), TN, TN, TI((ub)|0x3e), TI((ub)|0x3f), \ - TA8((ub)|0x40), TI((ub)|0x41), TI((ub)|0x42), TI((ub)|0x43), TI((ub)|0x44), TI((ub)|0x45), TI((ub)|0x46), TI((ub)|0x47), \ - TN, TN, TI((ub)|0x4a), TI((ub)|0x4b), TN, TN, TI((ub)|0x4e), TI((ub)|0x4f), \ - TN, TN, TI((ub)|0x52), TI((ub)|0x53), TN, TN, TI((ub)|0x56), TI((ub)|0x57), \ - TN, TN, TI((ub)|0x5a), TI((ub)|0x5b), TN, TN, TI((ub)|0x5e), TI((ub)|0x5f), \ - TI((ub)|0x60), TI((ub)|0x61), TI((ub)|0x62), TI((ub)|0x63), TI((ub)|0x64), TI((ub)|0x65), TI((ub)|0x66), TI((ub)|0x67), \ - TN, TN, TI((ub)|0x6a), TI((ub)|0x6b), TN, TN, TI((ub)|0x6e), TI((ub)|0x6f), \ - TN, TN, TI((ub)|0x72), TI((ub)|0x73), TN, TN, TI((ub)|0x76), TI((ub)|0x77), \ - TN, TN, TI((ub)|0x7a), TI((ub)|0x7b), TN, TN, TI((ub)|0x7e), TI((ub)|0x7f) + TN, TN, TN, TN, TN, TN, TN, TN, \ + TN, TN, TN, TN, TN, TN, TN, TN, \ + TN, TN, TN, TN, TN, TN, TN, TN, \ + TN, TN, TN, TN, TN, TN, TN, TN, \ + TA((ub)|0x20), TA6((ub)|0x21),TA6((ub)|0x22),TA6((ub)|0x23),TI((ub)|0x24), TI((ub)|0x25), TI((ub)|0x26), TI((ub)|0x27), \ + TN, TN, TI((ub)|0x2a), TA6((ub)|0x2b),TN, TN, TI((ub)|0x2e), TI((ub)|0x2f), \ + TN, TN, TI((ub)|0x32), TI((ub)|0x33), TN, TN, TI((ub)|0x36), TI((ub)|0x37), \ + TN, TN, TI((ub)|0x3a), TI((ub)|0x3b), TN, TN, TI((ub)|0x3e), TI((ub)|0x3f), \ + TA((ub)|0x40), TA6((ub)|0x41),TA6((ub)|0x42),TA6((ub)|0x43),TI((ub)|0x44), TI((ub)|0x45), TI((ub)|0x46), TI((ub)|0x47), \ + TN, TN, TI((ub)|0x4a), TA6((ub)|0x4b),TN, TN, TI((ub)|0x4e), TI((ub)|0x4f), \ + TN, TN, TI((ub)|0x52), TI((ub)|0x53), TN, TN, TI((ub)|0x56), TI((ub)|0x57), \ + TN, TN, TI((ub)|0x5a), TI((ub)|0x5b), TN, TN, TI((ub)|0x5e), TI((ub)|0x5f), \ + TA((ub)|0x60), TI((ub)|0x61), TI((ub)|0x62), TI((ub)|0x63), TI((ub)|0x64), TI((ub)|0x65), TI((ub)|0x66), TI((ub)|0x67), \ + TN, TN, TI((ub)|0x6a), TI((ub)|0x6b), TN, TN, TI((ub)|0x6e), TI((ub)|0x6f), \ + TN, TN, TI((ub)|0x72), TI((ub)|0x73), TN, TN, TI((ub)|0x76), TI((ub)|0x77), \ + TN, TN, TI((ub)|0x7a), TI((ub)|0x7b), TN, TN, TI((ub)|0x7e), TI((ub)|0x7f) const PS gpuSpriteDrivers[256] = { TIBLOCK(0<<8), TIBLOCK(1<<8) @@ -533,8 +536,8 @@ const PS gpuSpriteDrivers[256] = { #undef TI #undef TN #undef TIBLOCK -#undef TA4 -#undef TA8 +#undef TA +#undef TA6 /////////////////////////////////////////////////////////////////////////////// // GPU Polygon innerloops generator @@ -566,7 +569,7 @@ static noinline void gpuPolySpanFn(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 const bool skip_uSrc_mask = MSB_PRESERVED ? (!CF_TEXTMODE) : (!CF_TEXTMODE) || CF_LIGHT; bool should_blend; - u32 bMsk; if (CF_BLITMASK) bMsk = gpu_unai.blit_mask; + u32 bMsk; if (CF_BLITMASK) bMsk = gpu_unai.inn.blit_mask; if (!CF_TEXTMODE) { @@ -758,9 +761,16 @@ template static void PolySpanMaybeAsm(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count) { switch (CF) { - case 0x20: poly_4bpp_asm (pDst, &gpu_unai.inn, count); break; - case 0x21: poly_4bpp_l_asm (pDst, &gpu_unai.inn, count); break; - case 0x23: poly_4bpp_l_st0_asm(pDst, &gpu_unai.inn, count); break; + case 0x20: poly_4bpp_asm (pDst, &gpu_unai.inn, count); break; + case 0x40: poly_8bpp_asm (pDst, &gpu_unai.inn, count); break; +#ifdef HAVE_ARMV6 + case 0x21: poly_4bpp_l1_std_asm(pDst, &gpu_unai.inn, count); break; + case 0x22: poly_4bpp_l0_st0_asm(pDst, &gpu_unai.inn, count); break; + case 0x23: poly_4bpp_l1_st0_asm(pDst, &gpu_unai.inn, count); break; + case 0x41: poly_8bpp_l1_std_asm(pDst, &gpu_unai.inn, count); break; + case 0x42: poly_8bpp_l0_st0_asm(pDst, &gpu_unai.inn, count); break; + case 0x43: poly_8bpp_l1_st0_asm(pDst, &gpu_unai.inn, count); break; +#endif default: gpuPolySpanFn(gpu_unai, pDst, count); } } @@ -795,11 +805,11 @@ typedef void (*PP)(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count); TN, TN, TI((ub)|0x0a), TI((ub)|0x0b), TN, TN, TI((ub)|0x0e), TI((ub)|0x0f), \ TN, TN, TI((ub)|0x12), TI((ub)|0x13), TN, TN, TI((ub)|0x16), TI((ub)|0x17), \ TN, TN, TI((ub)|0x1a), TI((ub)|0x1b), TN, TN, TI((ub)|0x1e), TI((ub)|0x1f), \ - TA((ub)|0x20), TA6((ub)|0x21),TI((ub)|0x22), TA6((ub)|0x23),TI((ub)|0x24), TI((ub)|0x25), TI((ub)|0x26), TI((ub)|0x27), \ + TA((ub)|0x20), TA6((ub)|0x21),TA6((ub)|0x22),TA6((ub)|0x23),TI((ub)|0x24), TI((ub)|0x25), TI((ub)|0x26), TI((ub)|0x27), \ TN, TN, TI((ub)|0x2a), TI((ub)|0x2b), TN, TN, TI((ub)|0x2e), TI((ub)|0x2f), \ TN, TN, TI((ub)|0x32), TI((ub)|0x33), TN, TN, TI((ub)|0x36), TI((ub)|0x37), \ TN, TN, TI((ub)|0x3a), TI((ub)|0x3b), TN, TN, TI((ub)|0x3e), TI((ub)|0x3f), \ - TI((ub)|0x40), TI((ub)|0x41), TI((ub)|0x42), TI((ub)|0x43), TI((ub)|0x44), TI((ub)|0x45), TI((ub)|0x46), TI((ub)|0x47), \ + TA((ub)|0x40), TA6((ub)|0x41),TA6((ub)|0x42),TA6((ub)|0x43),TI((ub)|0x44), TI((ub)|0x45), TI((ub)|0x46), TI((ub)|0x47), \ TN, TN, TI((ub)|0x4a), TI((ub)|0x4b), TN, TN, TI((ub)|0x4e), TI((ub)|0x4f), \ TN, TN, TI((ub)|0x52), TI((ub)|0x53), TN, TN, TI((ub)|0x56), TI((ub)|0x57), \ TN, TN, TI((ub)|0x5a), TI((ub)|0x5b), TN, TN, TI((ub)|0x5e), TI((ub)|0x5f), \ diff --git a/plugins/gpu_unai/gpu_raster_polygon.h b/plugins/gpu_unai/gpu_raster_polygon.h index 1457afde..0479d0fa 100644 --- a/plugins/gpu_unai/gpu_raster_polygon.h +++ b/plugins/gpu_unai/gpu_raster_polygon.h @@ -352,9 +352,9 @@ void gpuDrawPolyF(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad continue; le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)]; - int li=gpu_unai.ilace_mask; - int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0); - int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1); + int li=gpu_unai.inn.ilace_mask; + int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.inn.ilace_mask+1):0); + int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.inn.ilace_mask+1):0):1); for (; loop1; --loop1, ya++, PixelBase += FRAME_WIDTH, x3 += dx3, x4 += dx4 ) @@ -663,9 +663,9 @@ void gpuDrawPolyFT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_qua continue; le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)]; - int li=gpu_unai.ilace_mask; - int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0); - int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1); + int li=gpu_unai.inn.ilace_mask; + int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.inn.ilace_mask+1):0); + int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.inn.ilace_mask+1):0):1); for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH, x3 += dx3, x4 += dx4, @@ -1008,9 +1008,9 @@ void gpuDrawPolyG(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_quad continue; le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)]; - int li=gpu_unai.ilace_mask; - int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0); - int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1); + int li=gpu_unai.inn.ilace_mask; + int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.inn.ilace_mask+1):0); + int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.inn.ilace_mask+1):0):1); for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH, x3 += dx3, x4 += dx4, @@ -1403,9 +1403,9 @@ void gpuDrawPolyGT(const PtrUnion packet, const PP gpuPolySpanDriver, u32 is_qua continue; le16_t* PixelBase = &gpu_unai.vram[FRAME_OFFSET(0, ya)]; - int li=gpu_unai.ilace_mask; - int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0); - int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1); + int li=gpu_unai.inn.ilace_mask; + int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.inn.ilace_mask+1):0); + int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.inn.ilace_mask+1):0):1); for (; loop1; --loop1, ++ya, PixelBase += FRAME_WIDTH, x3 += dx3, x4 += dx4, diff --git a/plugins/gpu_unai/gpu_raster_sprite.h b/plugins/gpu_unai/gpu_raster_sprite.h index 13d783e6..e314e974 100644 --- a/plugins/gpu_unai/gpu_raster_sprite.h +++ b/plugins/gpu_unai/gpu_raster_sprite.h @@ -61,31 +61,16 @@ void gpuDrawS(PtrUnion packet, const PS gpuSpriteDriver, s32 *w_out, s32 *h_out) *w_out = x1; *h_out = y1 - y0; + le16_t *Pixel = &gpu_unai.vram[FRAME_OFFSET(x0, y0)]; + gpu_unai.inn.r5 = packet.U1[0] >> 3; gpu_unai.inn.g5 = packet.U1[1] >> 3; gpu_unai.inn.b5 = packet.U1[2] >> 3; - - le16_t *Pixel = &gpu_unai.vram[FRAME_OFFSET(x0, y0)]; - const int li=gpu_unai.ilace_mask; - //const int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0); - //const int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1); - unsigned int tmode = gpu_unai.TEXT_MODE >> 5; - u8* pTxt_base = (u8*)gpu_unai.inn.TBA; - - // Texture is accessed byte-wise, so adjust idx if 16bpp - if (tmode == 3) u0 <<= 1; - - spriteDriverArg arg; - arg.CBA = gpu_unai.inn.CBA; - arg.u0 = u0; - arg.v0 = v0; - arg.u0_mask = gpu_unai.TextureWindow[2]; - arg.v0_mask = gpu_unai.TextureWindow[3]; - arg.y0 = y0; - arg.y1 = y1; - arg.lines = y1 - y0; - arg.li = li; - gpuSpriteDriver(Pixel, x1, pTxt_base, &arg); + gpu_unai.inn.u = u0; + gpu_unai.inn.v = v0; + gpu_unai.inn.y0 = y0; + gpu_unai.inn.y1 = y1; + gpuSpriteDriver(Pixel, x1, (u8 *)gpu_unai.inn.TBA, gpu_unai.inn); } void gpuDrawT(PtrUnion packet, const PT gpuTileSpanDriver, s32 *w_out, s32 *h_out) @@ -118,9 +103,9 @@ void gpuDrawT(PtrUnion packet, const PT gpuTileSpanDriver, s32 *w_out, s32 *h_ou const u16 Data = GPU_RGB16(le32_to_u32(packet.U4[0])); le16_t *Pixel = &gpu_unai.vram[FRAME_OFFSET(x0, y0)]; - const int li=gpu_unai.ilace_mask; - const int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.ilace_mask+1):0); - const int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.ilace_mask+1):0):1); + const int li=gpu_unai.inn.ilace_mask; + const int pi=(ProgressiveInterlaceEnabled()?(gpu_unai.inn.ilace_mask+1):0); + const int pif=(ProgressiveInterlaceEnabled()?(gpu_unai.prog_ilace_flag?(gpu_unai.inn.ilace_mask+1):0):1); for (; y0