X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=platform%2Fgizmondo%2Fasm_utils.s;h=91e57109fba0fca2273b25cd93323f988cc3bbf8;hb=fa283c9a387276360ffb881f2125cbdb7adfa382;hp=ec2d55bdd35b0ab5f6eb0a1261c36b17e7c86097;hpb=499a0be394af5257bbec4999d552be2ac0ec75b7;p=picodrive.git diff --git a/platform/gizmondo/asm_utils.s b/platform/gizmondo/asm_utils.s index ec2d55b..91e5710 100644 --- a/platform/gizmondo/asm_utils.s +++ b/platform/gizmondo/asm_utils.s @@ -1,32 +1,37 @@ @ vim:filetype=armasm -.global vidCpy8to16_40 @ void *dest, void *src, short *pal, int lines +.global vidCpy8to16 @ void *dest, void *src, short *pal, int lines|(is32col<<8) -vidCpy8to16_40: - stmfd sp!, {r4-r9,lr} +vidCpy8to16: + stmfd sp!, {r4-r8,lr} + tst r3, #0x100 + and r3, r3, #0xff mov r3, r3, lsr #1 orr r3, r3, r3, lsl #8 - orr r3, r3, #(320/8-1)<<24 + orreq r3, r3, #(320/8-1)<<24 @ 40 col mode + orrne r3, r3, #(256/8-1)<<24 @ 32 col mode + orrne r3, r3, #0x10000 + addne r0, r0, #32*2 add r1, r1, #8 mov lr, #0xff mov lr, lr, lsl #1 - @ even lines -vcloop_40_aligned: + @ even lines first +vcloop_aligned: ldr r12, [r1], #4 ldr r7, [r1], #4 - and r4, lr, r12, lsl #1 + and r4, lr, r12,lsl #1 ldrh r4, [r2, r4] - and r5, lr, r12, lsr #7 + and r5, lr, r12,lsr #7 ldrh r5, [r2, r5] - and r6, lr, r12, lsr #15 + and r6, lr, r12,lsr #15 ldrh r6, [r2, r6] orr r4, r4, r5, lsl #16 - and r5, lr, r12, lsr #23 + and r5, lr, r12,lsr #23 ldrh r5, [r2, r5] and r8, lr, r7, lsl #1 ldrh r8, [r2, r8] @@ -36,22 +41,26 @@ vcloop_40_aligned: ldrh r6, [r2, r6] and r12,lr, r7, lsr #15 ldrh r12,[r2, r12] - and r9, lr, r7, lsr #23 - ldrh r9, [r2, r9] + and r7, lr, r7, lsr #23 + ldrh r7, [r2, r7] orr r8, r8, r6, lsl #16 subs r3, r3, #1<<24 - orr r12,r12, r9, lsl #16 + orr r12,r12, r7, lsl #16 stmia r0!, {r4,r5,r8,r12} - bpl vcloop_40_aligned + bpl vcloop_aligned + tst r3, #0x10000 add r1, r1, #336 @ skip a line and 1 col - add r0, r0, #320*2+2*2 - add r3, r3, #(320/8)<<24 + addne r1, r1, #64 @ skip more for 32col mode + add r0, r0, #(320+2)*2 + addne r0, r0, #64*2 + addeq r3, r3, #(320/8)<<24 + addne r3, r3, #(256/8)<<24 sub r3, r3, #1 tst r3, #0xff - bne vcloop_40_aligned + bne vcloop_aligned and r4, r3, #0xff00 orr r3, r3, r4, lsr #8 @@ -65,18 +74,10 @@ vcloop_40_aligned: mul r4, r5, r6 sub r1, r1, r4 -vcloop_40_unaligned_outer: - ldr r12, [r1], #4 - ldr r7, [r1], #4 - - and r4, lr, r12, lsl #1 - ldrh r4, [r2, r4] - and r5, lr, r12, lsr #7 - ldrh r5, [r2, r5] - strh r4, [r0], #2 - b vcloop_40_unaligned_enter + sub r0, r0, #2 + mov r8, #0 -vcloop_40_unaligned: +vcloop_unaligned: ldr r12, [r1], #4 ldr r7, [r1], #4 @@ -84,44 +85,47 @@ vcloop_40_unaligned: ldrh r6, [r2, r6] and r5, lr, r12, lsr #7 ldrh r5, [r2, r5] - orr r4, r4, r6, lsl #16 - str r4, [r0], #4 + orr r4, r8, r6, lsl #16 -vcloop_40_unaligned_enter: and r6, lr, r12, lsr #15 ldrh r6, [r2, r6] - - and r4, lr, r12, lsr #23 - ldrh r4, [r2, r4] + and r8, lr, r12, lsr #23 + ldrh r8, [r2, r8] orr r5, r5, r6, lsl #16 - and r8, lr, r7, lsl #1 - ldrh r8, [r2, r8] - and r6, lr, r7, lsr #7 + and r6, lr, r7, lsl #1 ldrh r6, [r2, r6] - orr r8, r4, r8, lsl #16 - - and r12,lr, r7, lsr #15 + and r12,lr, r7, lsr #7 ldrh r12,[r2, r12] + orr r6, r8, r6, lsl #16 + + and r8, lr, r7, lsr #15 + ldrh r8, [r2, r8] - and r4, lr, r7, lsr #23 - ldrh r4, [r2, r4] - orr r12,r6, r12,lsl #16 subs r3, r3, #1<<24 + and r7, lr, r7, lsr #23 + orr r12,r12,r8, lsl #16 + + ldrh r8, [r2, r7] - stmia r0!, {r5,r8,r12} - bpl vcloop_40_unaligned + stmia r0!, {r4,r5,r6,r12} + bpl vcloop_unaligned - strh r4, [r0], #2 + strh r8, [r0] + mov r8, #0 + tst r3, #0x10000 add r1, r1, #336 @ skip a line and 1 col - add r0, r0, #320*2+2*2 - add r3, r3, #(320/8)<<24 + addne r1, r1, #64 @ skip more for 32col mode + add r0, r0, #(320+2)*2 + addne r0, r0, #64*2 + addeq r3, r3, #(320/8)<<24 + addne r3, r3, #(256/8)<<24 sub r3, r3, #1 tst r3, #0xff - bne vcloop_40_unaligned_outer + bne vcloop_unaligned - ldmfd sp!, {r4-r9,lr} + ldmfd sp!, {r4-r8,lr} bx lr