merge an assorment of calc84maniac's fixes.
[gpsp.git] / arm / video_blend.S
CommitLineData
2823a4c8 1.align 2\r
2\r
3.global expand_blend\r
4.global expand_normal\r
5\r
6@ Input:\r
7@ r0 = screen_src_ptr\r
8@ r1 = screen_dest_ptr\r
9@ r2 = start\r
10@ r3 = end\r
11\r
126:\r
13 .word io_registers\r
14 .word palette_ram_converted\r
15 .word 0x04000200 @ combine test mask\r
16 .word 0x07E0F81F @ clamp mask\r
17 .word 0x000003FE @ palette index mask\r
18 .word 0x08010020 @ saturation mask\r
19\r
20expand_blend:\r
21 stmdb sp!, { r4, r5, r6, r9, r10, r11, r14 }\r
22\r
23 add r0, r0, r2, lsl #2 @ screen_src_ptr += start\r
24 add r1, r1, r2, lsl #1 @ screen_dest_ptr += start\r
25 sub r2, r3, r2 @ r2 = end - start\r
26 ldr r3, 6b @ r3 = io_registers\r
fe113069 27 ldrh r3, [r3, #0x52] @ r3 = bldalpha\r
2823a4c8 28 mov r4, r3, lsr #8 @ r4 = bldalpha >> 8\r
29 and r3, r3, #0x1F @ r3 = blend_a\r
30 and r4, r4, #0x1F @ r4 = blend_b\r
31 cmp r3, #16 @ if(blend_a > 16)\r
32 movgt r3, #16 @ blend_a = 16\r
33 cmp r4, #16 @ if(blend_b > 16)\r
c95affa7 34 movgt r4, #16 @ blend_b = 16\r
2823a4c8 35\r
36 ldr r14, 6b + 4 @ r14 = palette_ram_converted\r
37 ldr r12, 6b + 8 @ r12 = 0x04000200\r
38 ldr r11, 6b + 12 @ r11 = 0x07E0F81F\r
39 ldr r10, 6b + 16 @ r10 = 0x000003FE\r
40\r
41 add r5, r3, r4 @ r5 = blend_a + blend_b\r
42 cmp r5, #16 @ if((blend_a + blend_b) > 16)\r
43 bgt 3f @ goto loop w/saturation\r
44\r
45\r
46 @ loop w/o saturation\r
471:\r
48 ldr r5, [r0], #4 @ r5 = pixel_pair, screen_src_ptr++\r
49 and r6, r5, r12 @ r6 = r5 & 0x04000200\r
50 cmp r6, r12 @ if(r6 != 0x4000200)\r
51 bne 2f @ goto no_blend\r
52\r
53 and r6, r10, r5, lsl #1 @ r6 = (pixel_pair & 0x1FF) << 1\r
54 ldrh r6, [r14, r6] @ r6 = pixel_top\r
55 orr r6, r6, r6, lsl #16 @ r6 = pixel_top | (pixel_top << 16)\r
56 and r6, r6, r11 @ r6 = pixel_top_dilated\r
57\r
58 and r5, r10, r5, lsr #15 @ r5 = ((pixel_pair >> 16) & 0x1FF) << 1\r
59 ldrh r5, [r14, r5] @ r5 = pixel_bottom\r
60 orr r5, r5, r5, lsl #16 @ r5 = pixel_bottom | (pixel_bottom << 16)\r
61 and r5, r5, r11 @ r5 = pixel_bottom_dilated\r
62\r
63 mul r5, r4, r5 @ r5 = pixel_bottom * blend_b = bottom_mul\r
64 mla r5, r3, r6, r5 @ r5 = (pixel_top * blend_a) + bottom_mul\r
65\r
66 and r5, r11, r5, lsr #4 @ r5 = (color_dilated >> 4) & 0x07E0F81F\r
67 orr r5, r5, r5, lsr #16 @ r5 = color_dilated | (color_dilated >> 16)\r
68\r
69 strh r5, [r1], #2 @ *screen_dest_ptr = r5, screen_dest_ptr++\r
70 subs r2, r2, #1 @ counter--\r
71 bne 1b @ go again\r
72\r
73 ldmia sp!, { r4, r5, r6, r9, r10, r11, pc }\r
74\r
752:\r
76 and r5, r10, r5, lsl #1 @ r5 = (pixel_pair & 0x1FF) << 1\r
77 ldrh r5, [r14, r5] @ r5 = pixel_top\r
78 strh r5, [r1], #2 @ *screen_dest_ptr = r5, screen_dest_ptr++\r
79\r
80 subs r2, r2, #1 @ counter--\r
81 bne 1b @ go again\r
82\r
83 ldmia sp!, { r4, r5, r6, r9, r10, r11, pc }\r
84\r
85@ loop w/saturation\r
86\r
873:\r
88 ldr r9, 6b + 20 @ r9 = 0x08010020\r
89\r
904:\r
91 ldr r5, [r0], #4 @ r5 = pixel_pair, screen_src_ptr++\r
92 and r6, r5, r12 @ r6 = r5 & 0x04000200\r
93 cmp r6, r12 @ if(r6 != 0x4000200)\r
94 bne 5f @ goto no_blend\r
95\r
96 and r6, r10, r5, lsl #1 @ r6 = (pixel_pair & 0x1FF) << 1\r
97 ldrh r6, [r14, r6] @ r6 = pixel_top\r
98 orr r6, r6, r6, lsl #16 @ r6 = pixel_top | (pixel_top << 16)\r
99 and r6, r6, r11 @ r6 = pixel_top_dilated\r
100\r
101 and r5, r10, r5, lsr #15 @ r5 = ((pixel_pair >> 16) & 0x1FF) << 1\r
102 ldrh r5, [r14, r5] @ r5 = pixel_bottom\r
103 orr r5, r5, r5, lsl #16 @ r5 = pixel_bottom | (pixel_bottom << 16)\r
104 and r5, r5, r11 @ r5 = pixel_bottom_dilated\r
105\r
106 mul r5, r4, r5 @ r5 = pixel_bottom * blend_b = bottom_mul\r
107 mla r5, r3, r6, r5 @ r5 = (pixel_top * blend_a) + bottom_mul\r
108\r
109 and r6, r9, r5, lsr #4 @ r6 = saturation bits\r
110 orr r6, r6, r6, lsr #1 @ propogate saturation down msb\r
111 orr r6, r6, r6, lsr #2 @ propogate down next two bits\r
112 orr r6, r6, r6, lsr #3 @ propogate down next three bits\r
113 orr r5, r6, r5, lsr #4 @ mask over result w/saturation\r
114\r
115 and r5, r11, r5 @ r5 = (color_dilated >> 4) & 0x07E0F81F\r
116 orr r5, r5, r5, lsr #16 @ r5 = color_dilated | (color_dilated >> 16)\r
117 strh r5, [r1], #2 @ *screen_dest_ptr = r5, screen_dest_ptr++\r
118\r
119 subs r2, r2, #1 @ counter--\r
120 bne 4b @ go again\r
121\r
122 ldmia sp!, { r4, r5, r6, r9, r10, r11, pc }\r
123\r
1245:\r
125 and r5, r10, r5, lsl #1 @ r5 = (pixel_pair & 0x1FF) << 1\r
126 ldrh r5, [r14, r5] @ r5 = pixel_top\r
127 strh r5, [r1], #2 @ *screen_dest_ptr = r5, screen_dest_ptr++\r
128\r
129 subs r2, r2, #1 @ counter--\r
130 bne 4b @ go again\r
131\r
132 ldmia sp!, { r4, r5, r6, r9, r10, r11, pc }\r
133\r
134\r
135\r
136@ The following function isn't complete (only works on run multiples of 8),\r
137@ but unfortunately I don't see much potential for actually being able to\r
138@ use it..\r
139\r
140#define expand_pixel_pair(reg, temp) ;\\r
141 and temp, r3, reg, lsr #15 ;\\r
142 ldrh temp, [r2, temp] ;\\r
143 ;\\r
144 and reg, r3, reg, lsl #1 ;\\r
145 ldrh reg, [r2, reg] ;\\r
146 ;\\r
147 orr reg, reg, temp, lsl #16 ;\\r
148\r
149\r
150@ Input:\r
151@ r0 = screen_ptr\r
152@ r1 = start\r
153@ r2 = end\r
154\r
1551:\r
156 .word palette_ram_converted\r
157 .word 0x3FE\r
158\r
159expand_normal:\r
160 stmdb sp!, { r4, r5, r6, r7, r14 }\r
161\r
162 add r0, r0, r1, lsl #1 @ screen_ptr += start\r
163 sub r1, r2, r1 @ r1 = end - start\r
164 ldr r2, 1b @ r2 = palette_ram_converted\r
165 ldr r3, 1b + 4 @ r3 = 0x3FE\r
166\r
1672:\r
168 ldmia r0, { r4, r5, r6, r7 }\r
169\r
170 expand_pixel_pair(r4, r14)\r
171 expand_pixel_pair(r5, r14)\r
172 expand_pixel_pair(r6, r14)\r
173 expand_pixel_pair(r7, r14)\r
174\r
175 stmia r0!, { r4, r5, r6, r7 }\r
176\r
177 subs r1, r1, #8\r
178 bne 2b\r
179\r
180 ldmia sp!, { r4, r5, r6, r7, pc }\r
181\r