#include "arm_features.h"
+.syntax unified
.text
.align 2
ldrh r8, [r2, r8]
ldrh lr, [r2, lr]
tst r6, r6
- strneh r6, [r0, #\obase+0]
+ strhne r6, [r0, #\obase+0]
tst r7, r7
- strneh r7, [r0, #\obase+2]
+ strhne r7, [r0, #\obase+2]
tst r8, r8
- strneh r8, [r0, #\obase+4]
+ strhne r8, [r0, #\obase+4]
tst lr, lr
- strneh lr, [r0, #\obase+6]
+ strhne lr, [r0, #\obase+6]
.endm
@ in: r0=dst, r2=pal, r12=0x1fe
ldrh r8, [r2, r8]
ldrh \rs,[r2, \rs]
tst r6, r6
- strneh r6, [r0, #0]
+ strhne r6, [r0, #0]
tst r7, r7
- strneh r7, [r0, #2]
+ strhne r7, [r0, #2]
tst r8, r8
- strneh r8, [r0, #4]
+ strhne r8, [r0, #4]
tst \rs,\rs
- strneh \rs,[r0, #6]
+ strhne \rs,[r0, #6]
.endm
.global sprite_4bpp_x16_asm @ (u16 *d, void *s, u16 *pal, int lines)
ldrh r7, [r2, r7]
add r0, r0, #2
tst r7, r7
- strneh r7, [r0, #-2]
+ strhne r7, [r0, #-2]
subs r8, r8, #1
bgt 0b
sprite_driver_part3
ldrh r7, [r2, r7]
add r0, r0, #2
tst r7, r7
- strneh r7, [r0, #-2]
+ strhne r7, [r0, #-2]
subs r8, r8, #1
bgt 0b
sprite_driver_part3
add r0, r0, #2
mov r7, r4, lsr #13
tst r12,r12
- strneh r12,[r0, #-2]
+ strhne r12,[r0, #-2]
subs r2, r2, #1
bgt 0b
and lr, r7, r9
tst r12,r12
add lr, r1, lr, lsl #1
- strneh r12,[r0, #-2]
+ strhne r12,[r0, #-2]
mov r12,r4, lsr #13
subs r2, r2, #1
bgt 0b
smulbb \t0, \t0, \mbr @ -> 0000 0000 0000 orrr rrxx xxxx xxxx xxxx
smulbt \t1, \t1, \mg @ -> 0000 000o gggg gxxx xxxx xxxx xxx0 0000
smulbt \t2, \t2, \mbr @ -> 00ob bbbb xxxx xxxx xxxx xx00 0000 0000
- and \rp, \rp, #0x8000
+ ands \rp, \rp, #0x8000 @ retain msb + semi-transparency test
usat \t0, #5, \t0, asr #14
usat \t1, #5, \t1, asr #19
usat \t2, #5, \t2, asr #24
orr \rp, \rp, \t2, lsl #10
.endm
-.global poly_4bpp_l_asm @ (void *d, const struct gpu_unai_inner_t *inn, int count)
-poly_4bpp_l_asm:
+@ http://www.slack.net/~ant/info/rgb_mixing.html
+@ p0 = (p0 + p1) / 2; p1 |= 0x8000
+@ msb of input p0 is assumed to be set
+.macro semitrans0 p0 p1 t
+ eor \t, \p0, \p1
+ and \t, \t, #0x0420
+ sub \p0, \p0, \t
+ orr \p1, \p1, #0x8000
+ uhadd16 \p0, \p0, \p1
+.endm
+
+.macro poly_4bpp_asm_m name semitrans
+.global \name @ (void *d, const struct gpu_unai_inner_t *inn, int count)
+\name:
.cfi_startproc
stmfd sp!, {r4-r11,lr}
.cfi_def_cfa_offset 4*9
.cfi_rel_offset lr, 4*8
- poly_4bpp_init poly_4bpp_lv_asm 1
+ poly_4bpp_init v_\name 1
0:
mov r12,r4, lsr #13
subs r2, r2, #1
tst r12,r12
beq 0b
modulate r12, r10, r11, r7, r8, lr
+.if \semitrans < 0
+ @ no semi-transparency
+.elseif \semitrans == 0
+ ldrhne r7, [r0, #-2]
+ strheq r12,[r0, #-2]
+ beq 0b
+ semitrans0 r12, r7, lr
+.endif
strh r12,[r0, #-2]
b 0b
1:
ldmfd sp!, {r4-r11,pc}
-poly_4bpp_lv_asm: @ r3=clut, r4=u, r5=u_inc, r6=u_msk, r7=v, lr=v_masked
+v_\name: @ r3=clut, r4=u, r5=u_inc, r6=u_msk, r7=v, lr=v_masked
sub sp, sp, #4*2
.cfi_def_cfa_offset 4*(9+2)
.cfi_rel_offset lr, 4*(8+2)
tst r12,r12
beq 0b
modulate r12, r10, r11, r5, r6, lr
+.if \semitrans < 0
+ @ no semi-transparency
+.elseif \semitrans == 0
+ ldrhne r7, [r0, #-2]
+ strheq r12,[r0, #-2]
+ beq 0b
+ semitrans0 r12, r7, lr
+.endif
strh r12,[r0, #-2]
ldmia sp, {r5,r6}
b 0b
add sp, sp, #4*2
ldmfd sp!, {r4-r11,pc}
.cfi_endproc
+.endm
+
+poly_4bpp_asm_m poly_4bpp_l_asm, -1
+poly_4bpp_asm_m poly_4bpp_l_st0_asm, 0
#endif // HAVE_ARMV6
void poly_4bpp_asm(void *d, const struct gpu_unai_inner_t *inn, int count);
void poly_4bpp_l_asm(void *d, const struct gpu_unai_inner_t *inn, int count);
+void poly_4bpp_l_st0_asm(void *d, const struct gpu_unai_inner_t *inn, int count);
#ifdef __cplusplus
}
#include "gpu_inner_light.h"
#include "arm_features.h"
+#include "compiler_features.h"
#ifdef __arm__
#include "gpu_inner_blend_arm.h"
#include "gpu_inner_light_arm.h"
const spriteDriverArg *arg);
template<int CF>
-static void gpuSpriteDriverFn(le16_t *pPixel, u32 count, const u8 *pTxt_base,
+static noinline void gpuSpriteDriverFn(le16_t *pPixel, u32 count, const u8 *pTxt_base,
const spriteDriverArg *arg)
{
// Blend func can save an operation if it knows uSrc MSB is unset.
// relevant blend/light headers.
// (see README_senquack.txt)
template<int CF>
-static void gpuPolySpanFn(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count)
+static noinline void gpuPolySpanFn(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count)
{
// Blend func can save an operation if it knows uSrc MSB is unset.
// Untextured prims can always skip this (src color MSB is always 0).
#ifdef __arm__
template<int CF>
-static void PolySpanAsm(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count)
+static void PolySpanMaybeAsm(const gpu_unai_t &gpu_unai, le16_t *pDst, u32 count)
{
switch (CF) {
- case 0x20: poly_4bpp_asm (pDst, &gpu_unai.inn, count); break;
- case 0x21: poly_4bpp_l_asm(pDst, &gpu_unai.inn, count); break;
+ case 0x20: poly_4bpp_asm (pDst, &gpu_unai.inn, count); break;
+ case 0x21: poly_4bpp_l_asm (pDst, &gpu_unai.inn, count); break;
+ case 0x23: poly_4bpp_l_st0_asm(pDst, &gpu_unai.inn, count); break;
+ default: gpuPolySpanFn<CF>(gpu_unai, pDst, count);
}
}
#endif
#define TI(cf) gpuPolySpanFn<(cf)>
#define TN PolyNULL
#ifdef __arm__
-#define TA(cf) PolySpanAsm<(cf)>
+#define TA(cf) PolySpanMaybeAsm<(cf)>
#else
#define TA(cf) TI(cf)
#endif
#ifdef HAVE_ARMV6
-#define TA6(cf) PolySpanAsm<(cf)>
+#define TA6(cf) PolySpanMaybeAsm<(cf)>
#else
#define TA6(cf) TI(cf)
#endif
TN, TN, TI((ub)|0x0a), TI((ub)|0x0b), TN, TN, TI((ub)|0x0e), TI((ub)|0x0f), \
TN, TN, TI((ub)|0x12), TI((ub)|0x13), TN, TN, TI((ub)|0x16), TI((ub)|0x17), \
TN, TN, TI((ub)|0x1a), TI((ub)|0x1b), TN, TN, TI((ub)|0x1e), TI((ub)|0x1f), \
- TA((ub)|0x20), TA6((ub)|0x21),TI((ub)|0x22), TI((ub)|0x23), TI((ub)|0x24), TI((ub)|0x25), TI((ub)|0x26), TI((ub)|0x27), \
+ TA((ub)|0x20), TA6((ub)|0x21),TI((ub)|0x22), TA6((ub)|0x23),TI((ub)|0x24), TI((ub)|0x25), TI((ub)|0x26), TI((ub)|0x27), \
TN, TN, TI((ub)|0x2a), TI((ub)|0x2b), TN, TN, TI((ub)|0x2e), TI((ub)|0x2f), \
TN, TN, TI((ub)|0x32), TI((ub)|0x33), TN, TN, TI((ub)|0x36), TI((ub)|0x37), \
TN, TN, TI((ub)|0x3a), TI((ub)|0x3b), TN, TN, TI((ub)|0x3e), TI((ub)|0x3f), \
asm ("eor %[mix], %[uSrc], %[uDst]\n\t" // uSrc ^ uDst
"and %[mix], %[mix], %[mask]\n\t" // ... & 0x0421
"sub %[mix], %[uDst], %[mix]\n\t" // uDst - ...
+ #ifdef HAVE_ARMV6
+ "uhadd16 %[mix], %[uSrc], %[mix]\n\t"
+ #else
"add %[mix], %[uSrc], %[mix]\n\t" // uSrc + ...
"mov %[mix], %[mix], lsr #0x1\n\t" // ... >> 1
+ #endif
: [mix] "=&r" (mix)
- : [uSrc] "r" (uSrc), [uDst] "r" (uDst), [mask] "r" (0x0421));
+ : [uSrc] "r" (uSrc), [uDst] "r" (uDst), [mask] "r" (0x0420)); // 421
}
if (BLENDMODE == 1 || BLENDMODE == 3) {