giz wip (acc 16bit faster than 8!)
[picodrive.git] / platform / gizmondo / asm_utils.s
CommitLineData
ea8c405f 1@ vim:filetype=armasm
2
3
4.global vidCpy8to16_40 @ void *dest, void *src, short *pal, int lines
5
6vidCpy8to16_40:
7 stmfd sp!, {r4-r9,lr}
8
9 mov r3, r3, lsr #1
10 orr r3, r3, r3, lsl #8
ea8c405f 11 orr r3, r3, #(320/8-1)<<24
2ec14aec 12 add r1, r1, #8
13 mov lr, #0xff
14 mov lr, lr, lsl #1
ea8c405f 15
16 @ even lines
17vcloop_40_aligned:
18 ldr r12, [r1], #4
19 ldr r7, [r1], #4
20
21 and r4, lr, r12, lsl #1
22 ldrh r4, [r2, r4]
23 and r5, lr, r12, lsr #7
24 ldrh r5, [r2, r5]
25 and r6, lr, r12, lsr #15
26 ldrh r6, [r2, r6]
27 orr r4, r4, r5, lsl #16
28
29 and r5, lr, r12, lsr #23
30 ldrh r5, [r2, r5]
31 and r8, lr, r7, lsl #1
32 ldrh r8, [r2, r8]
33 orr r5, r6, r5, lsl #16
34
35 and r6, lr, r7, lsr #7
36 ldrh r6, [r2, r6]
37 and r12,lr, r7, lsr #15
38 ldrh r12,[r2, r12]
39 and r9, lr, r7, lsr #23
40 ldrh r9, [r2, r9]
41 orr r8, r8, r6, lsl #16
42
43 subs r3, r3, #1<<24
44 orr r12,r12, r9, lsl #16
45
46 stmia r0!, {r4,r5,r8,r12}
47 bpl vcloop_40_aligned
48
49 add r1, r1, #336 @ skip a line and 1 col
50 add r0, r0, #320*2+2*2
51 add r3, r3, #(320/8)<<24
52 sub r3, r3, #1
53 tst r3, #0xff
54 bne vcloop_40_aligned
55
56 and r4, r3, #0xff00
57 orr r3, r3, r4, lsr #8
58 mov r4, r4, lsr #7
fd34fdd0 59 sub r6, r4, #1
ea8c405f 60 mov r5, #320*2
61 add r5, r5, #2
fd34fdd0 62 mul r4, r5, r6
ea8c405f 63 sub r0, r0, r4
64 mov r5, #328
fd34fdd0 65 mul r4, r5, r6
ea8c405f 66 sub r1, r1, r4
67
499a0be3 68vcloop_40_unaligned_outer:
ea8c405f 69 ldr r12, [r1], #4
70 ldr r7, [r1], #4
71
72 and r4, lr, r12, lsl #1
73 ldrh r4, [r2, r4]
74 and r5, lr, r12, lsr #7
75 ldrh r5, [r2, r5]
499a0be3 76 strh r4, [r0], #2
77 b vcloop_40_unaligned_enter
78
79vcloop_40_unaligned:
80 ldr r12, [r1], #4
81 ldr r7, [r1], #4
82
83 and r6, lr, r12, lsl #1
84 ldrh r6, [r2, r6]
85 and r5, lr, r12, lsr #7
86 ldrh r5, [r2, r5]
87 orr r4, r4, r6, lsl #16
88 str r4, [r0], #4
89
90vcloop_40_unaligned_enter:
ea8c405f 91 and r6, lr, r12, lsr #15
92 ldrh r6, [r2, r6]
93
94 and r4, lr, r12, lsr #23
95 ldrh r4, [r2, r4]
499a0be3 96 orr r5, r5, r6, lsl #16
ea8c405f 97
98 and r8, lr, r7, lsl #1
99 ldrh r8, [r2, r8]
ea8c405f 100 and r6, lr, r7, lsr #7
101 ldrh r6, [r2, r6]
499a0be3 102 orr r8, r4, r8, lsl #16
ea8c405f 103
104 and r12,lr, r7, lsr #15
105 ldrh r12,[r2, r12]
106
107 and r4, lr, r7, lsr #23
2ec14aec 108 ldrh r4, [r2, r4]
499a0be3 109 orr r12,r6, r12,lsl #16
fd34fdd0 110 subs r3, r3, #1<<24
ea8c405f 111
112 stmia r0!, {r5,r8,r12}
fd34fdd0 113 bpl vcloop_40_unaligned
ea8c405f 114
499a0be3 115 strh r4, [r0], #2
116
ea8c405f 117 add r1, r1, #336 @ skip a line and 1 col
118 add r0, r0, #320*2+2*2
119 add r3, r3, #(320/8)<<24
120 sub r3, r3, #1
121 tst r3, #0xff
499a0be3 122 bne vcloop_40_unaligned_outer
ea8c405f 123
124 ldmfd sp!, {r4-r9,lr}
125 bx lr
126
127