X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=platform%2Fgizmondo%2Fasm_utils.s;h=1cee30e1f643c1df679072c0a2e3814b10dcfd27;hb=fda2f31020bf0d6cf7b5dd70ec01cf390b7e1483;hp=8f168e97f23a5cee8a625c1cc3535d6692f596e2;hpb=fd34fdd0f4f7e0883be4944738b55419a066dab4;p=picodrive.git diff --git a/platform/gizmondo/asm_utils.s b/platform/gizmondo/asm_utils.s index 8f168e9..1cee30e 100644 --- a/platform/gizmondo/asm_utils.s +++ b/platform/gizmondo/asm_utils.s @@ -1,30 +1,45 @@ @ vim:filetype=armasm -.global vidCpy8to16_40 @ void *dest, void *src, short *pal, int lines +.global vidCpy8to16 @ void *dest, void *src, short *pal, int lines|(flags<<16), + @ flags=is32col[0], no_even_lines[1], no_odd_lines[2] -vidCpy8to16_40: - stmfd sp!, {r4-r9,lr} +vidCpy8to16: + stmfd sp!, {r4-r8,lr} + and r4, r3, #0xff0000 + and r3, r3, #0xff + tst r4, #0x10000 mov r3, r3, lsr #1 orr r3, r3, r3, lsl #8 + orreq r3, r3, #(320/8-1)<<24 @ 40 col mode + orrne r3, r3, #(256/8-1)<<24 @ 32 col mode + addne r0, r0, #32*2 + orr r3, r3, r4 add r1, r1, #8 - orr r3, r3, #(320/8-1)<<24 + mov lr, #0xff + mov lr, lr, lsl #1 - @ even lines -vcloop_40_aligned: + @ no even lines? + tst r3, #0x20000 + addne r0, r0, #320*2 + addne r1, r1, #328 + bne vcloop_odd + + @ even lines first +vcloop_aligned: ldr r12, [r1], #4 ldr r7, [r1], #4 - and r4, lr, r12, lsl #1 + and r4, lr, r12,lsl #1 ldrh r4, [r2, r4] - and r5, lr, r12, lsr #7 + and r5, lr, r12,lsr #7 ldrh r5, [r2, r5] - and r6, lr, r12, lsr #15 + and r6, lr, r12,lsr #15 ldrh r6, [r2, r6] orr r4, r4, r5, lsl #16 - and r5, lr, r12, lsr #23 + and r5, lr, r12,lsr #23 ldrh r5, [r2, r5] and r8, lr, r7, lsl #1 ldrh r8, [r2, r8] @@ -34,22 +49,30 @@ vcloop_40_aligned: ldrh r6, [r2, r6] and r12,lr, r7, lsr #15 ldrh r12,[r2, r12] - and r9, lr, r7, lsr #23 - ldrh r9, [r2, r9] + and r7, lr, r7, lsr #23 + ldrh r7, [r2, r7] orr r8, r8, r6, lsl #16 subs r3, r3, #1<<24 - orr r12,r12, r9, lsl #16 + orr r12,r12, r7, lsl #16 stmia r0!, {r4,r5,r8,r12} - bpl vcloop_40_aligned + bpl vcloop_aligned + tst r3, #0x10000 add r1, r1, #336 @ skip a line and 1 col - add r0, r0, #320*2+2*2 - add r3, r3, #(320/8)<<24 + addne r1, r1, #64 @ skip more for 32col mode + add r0, r0, #(320+2)*2 + addne r0, r0, #64*2 + addeq r3, r3, #(320/8)<<24 + addne r3, r3, #(256/8)<<24 sub r3, r3, #1 tst r3, #0xff - bne vcloop_40_aligned + bne vcloop_aligned + + @ no odd lines? + tst r3, #0x40000 + ldmnefd sp!, {r4-r8,pc} and r4, r3, #0xff00 orr r3, r3, r4, lsr #8 @@ -63,54 +86,59 @@ vcloop_40_aligned: mul r4, r5, r6 sub r1, r1, r4 - @ FIXME FIXME FIXME - ldmfd sp!, {r4-r9,lr} - bx lr + sub r0, r0, #2 +vcloop_odd: + mov r8, #0 -vcloop_40_unaligned: +vcloop_unaligned: ldr r12, [r1], #4 ldr r7, [r1], #4 - and r4, lr, r12, lsl #1 - ldrh r4, [r2, r4] + and r6, lr, r12, lsl #1 + ldrh r6, [r2, r6] and r5, lr, r12, lsr #7 ldrh r5, [r2, r5] - strh r4, [r0]! + orr r4, r8, r6, lsl #16 + and r6, lr, r12, lsr #15 ldrh r6, [r2, r6] - - and r4, lr, r12, lsr #23 - ldrh r4, [r2, r4] - orr r5, r5, r6, lsl #16 - - and r8, lr, r7, lsl #1 + and r8, lr, r12, lsr #23 ldrh r8, [r2, r8] + orr r5, r5, r6, lsl #16 - and r6, lr, r7, lsr #7 + and r6, lr, r7, lsl #1 ldrh r6, [r2, r6] - orr r8, r4, r8, lsl #16 - - and r12,lr, r7, lsr #15 + and r12,lr, r7, lsr #7 ldrh r12,[r2, r12] + orr r6, r8, r6, lsl #16 + + and r8, lr, r7, lsr #15 + ldrh r8, [r2, r8] - and r4, lr, r7, lsr #23 - ldrh r4, [r2, r6] - orr r12,r6, r12,lsl #16 subs r3, r3, #1<<24 + and r7, lr, r7, lsr #23 + orr r12,r12,r8, lsl #16 + + ldrh r8, [r2, r7] - stmia r0!, {r5,r8,r12} - strh r4, [r0]! - bpl vcloop_40_unaligned + stmia r0!, {r4,r5,r6,r12} + bpl vcloop_unaligned + strh r8, [r0] + mov r8, #0 + + tst r3, #0x10000 add r1, r1, #336 @ skip a line and 1 col - add r0, r0, #320*2+2*2 - add r3, r3, #(320/8)<<24 + addne r1, r1, #64 @ skip more for 32col mode + add r0, r0, #(320+2)*2 + addne r0, r0, #64*2 + addeq r3, r3, #(320/8)<<24 + addne r3, r3, #(256/8)<<24 sub r3, r3, #1 tst r3, #0xff - bne vcloop_40_unaligned + bne vcloop_unaligned - ldmfd sp!, {r4-r9,lr} + ldmfd sp!, {r4-r8,lr} bx lr -