renderers (interlace and stuff)
[picodrive.git] / platform / uiq3 / engine / blit.s
CommitLineData
cc68a136 1@ some color conversion and blitting routines\r
2\r
3@ (c) Copyright 2006, notaz\r
4@ All Rights Reserved\r
5\r
6\r
7@ Convert 0000bbb0 ggg0rrr0 0000bbb0 ggg0rrr0\r
8@ to 00000000 rrr00000 ggg00000 bbb00000 ...\r
9\r
10@ lr = 0x00e000e0, out: r3=lower_pix, r2=higher_pix; trashes rin\r
11@ if sh==2, r8=0x00404040 (sh!=0 destroys flags!)\r
12.macro convRGB32_2 rin sh=0\r
13 and r2, lr, \rin, lsr #4 @ blue\r
14 and r3, \rin, lr\r
15 orr r2, r2, r3, lsl #8 @ g0b0g0b0\r
16\r
17 mov r3, r2, lsl #16 @ g0b00000\r
18 and \rin,lr, \rin, ror #12 @ 00r000r0 (reversed)\r
19 orr r3, r3, \rin, lsr #16 @ g0b000r0\r
20.if \sh == 1\r
21 mov r3, r3, ror #17 @ shadow mode\r
22.elseif \sh == 2\r
23 adds r3, r3, #0x40000000 @ green\r
24 orrcs r3, r3, #0xe0000000\r
25 mov r3, r3, ror #8\r
26 adds r3, r3, #0x40000000\r
27 orrcs r3, r3, #0xe0000000\r
28 mov r3, r3, ror #16\r
29 adds r3, r3, #0x40000000\r
30 orrcs r3, r3, #0xe0000000\r
31 mov r3, r3, ror #24\r
32 orr r3, r3, r3, lsr #3\r
33.else\r
34 mov r3, r3, ror #16 @ r3=low\r
35 orr r3, r3, r3, lsr #3\r
36.endif\r
37\r
38 str r3, [r0], #4\r
39\r
40 mov r2, r2, lsr #16\r
41 orr r2, r2, \rin, lsl #16\r
42.if \sh == 1\r
43 mov r2, r2, lsr #1\r
44.elseif \sh == 2\r
45 mov r2, r2, ror #8\r
46 adds r2, r2, #0x40000000 @ blue\r
47 orrcs r2, r2, #0xe0000000\r
48 mov r2, r2, ror #8\r
49 adds r2, r2, #0x40000000\r
50 orrcs r2, r2, #0xe0000000\r
51 mov r2, r2, ror #8\r
52 adds r2, r2, #0x40000000\r
53 orrcs r2, r2, #0xe0000000\r
54 mov r2, r2, ror #8\r
55 orr r2, r2, r2, lsr #3\r
56.else\r
57 orr r2, r2, r2, lsr #3\r
58.endif\r
59\r
60 str r2, [r0], #4\r
61.endm\r
62\r
63\r
64.global vidConvCpyRGB32 @ void *to, void *from, int pixels\r
65\r
66vidConvCpyRGB32:\r
67 stmfd sp!, {r4-r7,lr}\r
68\r
69 mov r12, r2, lsr #3 @ repeats\r
70 mov lr, #0x00e00000\r
71 orr lr, lr, #0x00e0\r
72\r
73.loopRGB32:\r
74 subs r12, r12, #1\r
75\r
76 ldmia r1!, {r4-r7}\r
77 convRGB32_2 r4\r
78 convRGB32_2 r5\r
79 convRGB32_2 r6\r
80 convRGB32_2 r7\r
81\r
82 bgt .loopRGB32\r
83\r
84 ldmfd sp!, {r4-r7,lr}\r
85 bx lr\r
86\r
87\r
88.global vidConvCpyRGB32sh @ void *to, void *from, int pixels\r
89\r
90vidConvCpyRGB32sh:\r
91 stmfd sp!, {r4-r7,lr}\r
92\r
93 mov r12, r2, lsr #3 @ repeats\r
94 mov lr, #0x00e00000\r
95 orr lr, lr, #0x00e0\r
96\r
97.loopRGB32sh:\r
98 subs r12, r12, #1\r
99\r
100 ldmia r1!, {r4-r7}\r
101 convRGB32_2 r4, 1\r
102 convRGB32_2 r5, 1\r
103 convRGB32_2 r6, 1\r
104 convRGB32_2 r7, 1\r
105\r
106 bgt .loopRGB32sh\r
107\r
108 ldmfd sp!, {r4-r7,lr}\r
109 bx lr\r
110\r
111\r
112.global vidConvCpyRGB32hi @ void *to, void *from, int pixels\r
113\r
114vidConvCpyRGB32hi:\r
115 stmfd sp!, {r4-r7,lr}\r
116\r
117 mov r12, r2, lsr #3 @ repeats\r
118 mov lr, #0x00e00000\r
119 orr lr, lr, #0x00e0\r
120\r
121.loopRGB32hi:\r
122 ldmia r1!, {r4-r7}\r
123 convRGB32_2 r4, 2\r
124 convRGB32_2 r5, 2\r
125 convRGB32_2 r6, 2\r
126 convRGB32_2 r7, 2\r
127\r
128 subs r12, r12, #1\r
129 bgt .loopRGB32hi\r
130\r
131 ldmfd sp!, {r4-r7,lr}\r
132 bx lr\r
133\r
134\r
135@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
136\r
137@ -------- M2 stuff ---------\r
138/*\r
139.global vidConvCpy_90 @ void *to, void *from, int width\r
140\r
141vidConvCpy_90:\r
142 stmfd sp!, {r4-r10,lr}\r
143\r
144 mov lr, #0x00F00000\r
145 orr lr, lr, #0x00F0\r
146\r
147 mov r12, #224/4 @ row counter\r
148 mov r10, r2, lsl #2 @ we do 2 pixel wide copies\r
149\r
150 add r8, r0, #256*4 @ parallel line\r
151 add r1, r1, #0x23000\r
152 add r1, r1, #0x00B80 @ r1+=328*223*2+8*2\r
153 mov r9, r1\r
154\r
155 mov r4, #0 @ fill bottom border\r
156 mov r5, #0\r
157 mov r6, #0\r
158 mov r7, #0\r
159 stmia r0!, {r4-r7}\r
160 stmia r0!, {r4-r7}\r
161 stmia r8!, {r4-r7}\r
162 stmia r8!, {r4-r7}\r
163\r
164.loopM2RGB32_90:\r
165 subs r12, r12, #1\r
166\r
167 @ at first this loop was written differently: src pixels were fetched with ldm's and\r
168 @ dest was not sequential. It ran nearly 2 times slower. It seems it is very important\r
169 @ to do sequential memory access on those items, which we have more (to offload addressing bus?).\r
170\r
171 ldr r4, [r1], #-328*2\r
172 ldr r5, [r1], #-328*2\r
173 ldr r6, [r1], #-328*2\r
174 ldr r7, [r1], #-328*2\r
175\r
176 convRGB32_2 r4, 1\r
177 convRGB32_2 r5, 1\r
178 convRGB32_2 r6, 1\r
179 convRGB32_2 r7, 1\r
180\r
181 str r4, [r8], #4\r
182 str r5, [r8], #4\r
183 str r6, [r8], #4\r
184 str r7, [r8], #4\r
185\r
186 bne .loopM2RGB32_90\r
187\r
188 mov r4, #0 @ top border\r
189 mov r5, #0\r
190 mov r6, #0\r
191 stmia r0!, {r4-r6,r12}\r
192 stmia r0!, {r4-r6,r12}\r
193 stmia r8!, {r4-r6,r12}\r
194 stmia r8!, {r4-r6,r12}\r
195\r
196 subs r10, r10, #1\r
197 ldmeqfd sp!, {r4-r10,pc} @ return\r
198\r
199 add r0, r8, #16*4 @ set new dst pointer\r
200 add r8, r0, #256*4\r
201 add r9, r9, #2*2 @ fix src pointer\r
202 mov r1, r9\r
203\r
204 stmia r0!, {r4-r6,r12} @ bottom border\r
205 stmia r0!, {r4-r6,r12}\r
206 stmia r8!, {r4-r6,r12}\r
207 stmia r8!, {r4-r6,r12}\r
208\r
209 mov r12, #224/4 @ restore row counter\r
210 b .loopM2RGB32_90\r
211\r
212\r
213\r
214@ converter for vidConvCpy_270\r
215@ lr = 0x00F000F0, out: r3=lower_pix, r2=higher_pix; trashes rin\r
216.macro convRGB32_3 rin\r
217 and r2, lr, \rin, lsr #4 @ blue\r
218 and r3, \rin, lr\r
219 orr r2, r2, r3, lsl #8 @ g0b0g0b0\r
220\r
221 mov r3, r2, lsl #16 @ g0b00000\r
222 and \rin,lr, \rin, ror #12 @ 00r000r0 (reversed)\r
223 orr r3, r3, \rin, lsr #16 @ g0b000r0\r
224\r
225 mov r2, r2, lsr #16\r
226 orr r2, r2, \rin, lsl #16\r
227 str r2, [r0], #4\r
228\r
229 mov \rin,r3, ror #16 @ r3=low\r
230.endm\r
231*/\r
232@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
233\r
234\r
235@ takes byte-sized pixels from r3-r6, fetches from pal and stores to r7,r8,r10,lr\r
236@ r2=pal\r
237.macro mode2_4pix shift\r
238 and r7, r11, r3, lsr #\shift\r
239 ldr r7, [r2, r7, lsl #2]\r
240\r
241 and r8, r11, r4, lsr #\shift\r
242 ldr r8, [r2, r8, lsl #2]\r
243\r
244 and r10,r11, r5, lsr #\shift\r
245 ldr r10,[r2, r10,lsl #2]\r
246\r
247 and lr, r11, r6, lsr #\shift\r
248 ldr lr, [r2, lr, lsl #2]\r
249.endm\r
250\r
251@ r2=pal, r11=0xff\r
252.macro mode2_4pix_getpix0 dreg sreg\r
253 and \dreg, r11, \sreg\r
254 ldr \dreg, [r2, \dreg, lsl #2]\r
255.endm\r
256\r
257.macro mode2_4pix_getpix1 dreg sreg\r
258 and \dreg, r11, \sreg, lsr #8\r
259 ldr \dreg, [r2, \dreg, lsl #2]\r
260.endm\r
261\r
262.macro mode2_4pix_getpix2 dreg sreg\r
263 and \dreg, r11, \sreg, lsr #16\r
264 ldr \dreg, [r2, \dreg, lsl #2]\r
265.endm\r
266\r
267.macro mode2_4pix_getpix3 dreg sreg\r
268 and \dreg, r11, \sreg, lsr #24\r
269 ldr \dreg, [r2, \dreg, lsl #2]\r
270.endm\r
271\r
272@ takes byte-sized pixels from reg, fetches from pal and stores to r3-r6\r
273@ r11=0xFF, r2=pal\r
274.macro mode2_4pix2_0 reg\r
275 mode2_4pix_getpix0 r3, \reg\r
276 mode2_4pix_getpix1 r4, \reg\r
277 mode2_4pix_getpix2 r5, \reg\r
278 mode2_4pix_getpix3 r6, \reg\r
279.endm\r
280\r
281@ ...\r
282.macro mode2_4pix2_180 reg\r
283 mode2_4pix_getpix3 r3, \reg\r
284 mode2_4pix_getpix2 r4, \reg\r
285 mode2_4pix_getpix1 r5, \reg\r
286 mode2_4pix_getpix0 r6, \reg\r
287.endm\r
288\r
289@ takes byte-sized pixels from reg, fetches from pal and stores to r3-r5\r
290@ r11=0xFF, r2=pal, r10=0xfcfcfc, r6=tmp\r
291.macro mode2_4pix_to3 reg is180\r
292.if \is180\r
293 mode2_4pix_getpix3 r3, \reg\r
294 mode2_4pix_getpix2 r4, \reg\r
295.else\r
296 mode2_4pix_getpix0 r3, \reg @ gathering loads cause a weird-hang\r
297 mode2_4pix_getpix1 r4, \reg\r
298.endif\r
299\r
300 sub r3, r3, r3, lsr #2 @ r3 *= 0.75\r
301 add r3, r3, r4, lsr #2 @ r3 += r4 * 0.25\r
302 and r3, r3, r10\r
303\r
304.if \is180\r
305 mode2_4pix_getpix1 r5, \reg\r
306 mode2_4pix_getpix0 r6, \reg\r
307.else\r
308 mode2_4pix_getpix2 r5, \reg\r
309 mode2_4pix_getpix3 r6, \reg\r
310.endif\r
311\r
312 mov r4, r4, lsr #1\r
313 add r4, r4, r5, lsr #1 @ r4 = (r4 + r5) / 2;\r
314@ and r4, r4, r10\r
315 sub r6, r6, r6, lsr #2 @ r6 *= 0.75\r
316 add r5, r6, r5, lsr #2 @ r5 = r6 + r5 * 0.25\r
317 and r5, r5, r10\r
318.endm\r
319\r
320\r
321@ void *to, void *from, void *pal, int width\r
322.macro vidConvCpyM2_landscape is270\r
323 stmfd sp!, {r4-r11,lr}\r
324\r
325 mov r11, #0xff\r
326\r
327 mov r12, #(224/4-1)<<16 @ row counter\r
328 orr r12, r12, r3, lsl #1 @ we do 4 pixel wide copies (right to left)\r
329\r
330.if \is270\r
331 add r1, r1, #324\r
332.else\r
333 add r1, r1, #0x11c00\r
334 add r1, r1, #0x00308 @ 328*224+8\r
335.endif\r
336 mov r9, r1\r
337\r
338 mov r3, #0 @ fill top border\r
339 mov r4, #0\r
340 mov r5, #0\r
341 mov r6, #0\r
342 stmia r0!, {r3-r6}\r
343 stmia r0!, {r3-r6}\r
344 add r7, r0, #256*4-8*4\r
345 stmia r7!, {r3-r6}\r
346 stmia r7!, {r3-r6}\r
347 add r7, r7, #256*4-8*4\r
348 stmia r7!, {r3-r6}\r
349 stmia r7!, {r3-r6}\r
350 add r7, r7, #256*4-8*4\r
351 stmia r7!, {r3-r6}\r
352 stmia r7!, {r3-r6}\r
353\r
3540: @ .loopM2RGB32_270:\r
355 subs r12, r12, #1<<16\r
356\r
357.if \is270\r
358 ldr r3, [r1], #328\r
359 ldr r4, [r1], #328\r
360 ldr r5, [r1], #328\r
361 ldr r6, [r1], #328\r
362.else\r
363 ldr r3, [r1, #-328]!\r
364 ldr r4, [r1, #-328]!\r
365 ldr r5, [r1, #-328]!\r
366 ldr r6, [r1, #-328]!\r
367.endif\r
368\r
369.if \is270\r
370 mode2_4pix 24\r
371.else\r
372 mode2_4pix 0\r
373.endif\r
374 stmia r0, {r7,r8,r10,lr}\r
375 add r0, r0, #256*4\r
376\r
377.if \is270\r
378 mode2_4pix 16\r
379.else\r
380 mode2_4pix 8\r
381.endif\r
382 stmia r0, {r7,r8,r10,lr}\r
383 add r0, r0, #256*4\r
384\r
385.if \is270\r
386 mode2_4pix 8\r
387.else\r
388 mode2_4pix 16\r
389.endif\r
390 stmia r0, {r7,r8,r10,lr}\r
391 add r0, r0, #256*4\r
392\r
393.if \is270\r
394 mode2_4pix 0\r
395.else\r
396 mode2_4pix 24\r
397.endif\r
398 stmia r0!,{r7,r8,r10,lr}\r
399 sub r0, r0, #256*4*3\r
400\r
401 bpl 0b @ .loopM2RGB32_270\r
402\r
403 mov r3, #0 @ bottom border\r
404 mov r4, #0\r
405 mov r5, #0\r
406 mov r6, #0\r
407 stmia r0!, {r3-r6}\r
408 stmia r0!, {r3-r6}\r
409 add r0, r0, #256*4-8*4\r
410 stmia r0!, {r3-r6}\r
411 stmia r0!, {r3-r6}\r
412 add r0, r0, #256*4-8*4\r
413 stmia r0!, {r3-r6}\r
414 stmia r0!, {r3-r6}\r
415 add r0, r0, #256*4-8*4\r
416 stmia r0!, {r3-r6}\r
417 nop @ phone crashes if this is commented out. Do I stress it too much?\r
418 stmia r0!, {r3-r6}\r
419\r
420 add r12, r12, #1<<16\r
421 subs r12, r12, #1\r
422 ldmeqfd sp!, {r4-r11,pc} @ return\r
423\r
424 add r0, r0, #16*4\r
425.if \is270\r
426 sub r9, r9, #4 @ fix src pointer\r
427.else\r
428 add r9, r9, #4\r
429.endif\r
430 mov r1, r9\r
431\r
432 stmia r0!, {r3-r6} @ top border\r
433 stmia r0!, {r3-r6}\r
434 add r7, r0, #256*4-8*4\r
435 stmia r7!, {r3-r6}\r
436 stmia r7!, {r3-r6}\r
437 add r7, r7, #256*4-8*4\r
438 stmia r7!, {r3-r6}\r
439 stmia r7!, {r3-r6}\r
440 add r7, r7, #256*4-8*4\r
441 stmia r7!, {r3-r6}\r
442 stmia r7!, {r3-r6}\r
443\r
444 orr r12, r12, #(224/4-1)<<16 @ restore row counter\r
445 b 0b @ .loopM2RGB32_270\r
446.endm\r
447\r
448\r
449.global vidConvCpy_90 @ void *to, void *from, void *pal, int width\r
450\r
451vidConvCpy_90:\r
452 vidConvCpyM2_landscape 0\r
453\r
454\r
455.global vidConvCpy_270 @ void *to, void *from, void *pal, int width\r
456\r
457vidConvCpy_270:\r
458 vidConvCpyM2_landscape 1\r
459\r
460\r
461.global vidConvCpy_center_0 @ void *to, void *from, void *pal\r
462\r
463vidConvCpy_center_0:\r
464 stmfd sp!, {r4-r6,r11,lr}\r
465\r
466 mov r11, #0xff\r
467 add r1, r1, #8 @ not border (centering 32col here)\r
468\r
469 mov r12, #(240/4-1)<<16\r
470 orr r12, r12, #224\r
471\r
472.loopRGB32_c0:\r
473 ldr lr, [r1], #4\r
474 subs r12, r12, #1<<16\r
475\r
476 mode2_4pix2_0 lr\r
477 stmia r0!, {r3-r6}\r
478 bpl .loopRGB32_c0\r
479\r
480 sub r12, r12, #1\r
481 adds r12, r12, #1<<16\r
482 ldmeqfd sp!, {r4-r6,r11,pc} @ return\r
483 add r0, r0, #16*4\r
484 add r1, r1, #88\r
485 orr r12, #(240/4-1)<<16\r
486 b .loopRGB32_c0\r
487\r
488\r
489.global vidConvCpy_center_180 @ void *to, void *from, void *pal\r
490\r
491vidConvCpy_center_180:\r
492 stmfd sp!, {r4-r6,r11,lr}\r
493\r
494 mov r11, #0xff\r
495 add r1, r1, #0x11c00\r
496 add r1, r1, #0x002B8 @ #328*224-72\r
497\r
498 mov r12, #(240/4-1)<<16\r
499 orr r12, r12, #224\r
500\r
501.loopRGB32_c180:\r
502 ldr lr, [r1, #-4]!\r
503 subs r12, r12, #1<<16\r
504\r
505 mode2_4pix2_180 lr\r
506 stmia r0!, {r3-r6}\r
507 bpl .loopRGB32_c180\r
508\r
509 sub r12, r12, #1\r
510 adds r12, r12, #1<<16\r
511 ldmeqfd sp!, {r4-r6,r11,pc} @ return\r
512 add r0, r0, #16*4\r
513 sub r1, r1, #88\r
514 orr r12, #(240/4-1)<<16\r
515 b .loopRGB32_c180\r
516\r
517\r
518@ note: the following code assumes that (pal[x] & 0x030303) == 0\r
519\r
520.global vidConvCpy_center2_40c_0 @ void *to, void *from, void *pal, int lines\r
521\r
522vidConvCpy_center2_40c_0:\r
523 stmfd sp!, {r4-r6,r10,r11,lr}\r
524\r
525 mov r11, #0xff\r
526 mov r10, #0xfc\r
527 orr r10, r10, lsl #8\r
528 orr r10, r10, lsl #8\r
529 add r1, r1, #8 @ border\r
530\r
531 mov r12, #(240/3-1)<<16\r
532 orr r12, r12, r3\r
533\r
534.loopRGB32_c2_40c_0:\r
535 ldr lr, [r1], #4\r
536 subs r12, r12, #1<<16\r
537\r
538 mode2_4pix_to3 lr, 0\r
539\r
540 stmia r0!, {r3-r5}\r
541 bpl .loopRGB32_c2_40c_0\r
542\r
543 sub r12, r12, #1\r
544 adds r12, r12, #1<<16\r
545 ldmeqfd sp!, {r4-r6,r10,r11,pc} @ return\r
546 add r0, r0, #16*4\r
547 add r1, r1, #8\r
548 orr r12, #(240/3-1)<<16\r
549 b .loopRGB32_c2_40c_0\r
550\r
551\r
552.global vidConvCpy_center2_40c_180 @ void *to, void *from, void *pal, int lines\r
553\r
554vidConvCpy_center2_40c_180:\r
555 stmfd sp!, {r4-r6,r10,r11,lr}\r
556\r
557 mov r11, #0xff\r
558 mov r10, #0xfc\r
559 orr r10, r10, lsl #8\r
560 orr r10, r10, lsl #8\r
561\r
562 mov r4, #328\r
563 mla r1, r3, r4, r1\r
564@ add r1, r1, #0x11000\r
565@ add r1, r1, #0x00f00 @ #328*224\r
566\r
567 mov r12, #(240/3-1)<<16\r
568 orr r12, r12, r3\r
569\r
570.loop_c2_40c_180:\r
571 ldr lr, [r1, #-4]!\r
572 subs r12, r12, #1<<16\r
573\r
574 mode2_4pix_to3 lr, 1\r
575\r
576 stmia r0!, {r3-r5}\r
577 bpl .loop_c2_40c_180\r
578\r
579 sub r12, r12, #1\r
580 adds r12, r12, #1<<16\r
581 ldmeqfd sp!, {r4-r6,r10,r11,pc} @ return\r
582 add r0, r0, #16*4\r
583 sub r1, r1, #8\r
584 orr r12, #(240/3-1)<<16\r
585 b .loop_c2_40c_180\r
586\r
587\r
588.global vidConvCpy_center2_32c_0 @ void *to, void *from, void *pal, int lines\r
589\r
590vidConvCpy_center2_32c_0:\r
591 stmfd sp!, {r4-r11,lr}\r
592\r
593 mov r10, #0xfc\r
594 orr r10, r10, lsl #8\r
595 orr r10, r10, lsl #8\r
596 mov r11, #0xff\r
597 add r1, r1, #8 @ border\r
598\r
599 mov r12, #(240/15-1)<<16\r
600 orr r12, r12, r3\r
601\r
602.loop_c2_32c_0:\r
603 ldmia r1!, {r7-r9,lr}\r
604 subs r12, r12, #1<<16\r
605\r
606 mode2_4pix2_0 r7\r
607 stmia r0!, {r3-r6}\r
608 mode2_4pix2_0 r8\r
609 stmia r0!, {r3-r6}\r
610 mode2_4pix2_0 r9\r
611 stmia r0!, {r3-r6}\r
612 mode2_4pix_to3 lr, 0\r
613 stmia r0!, {r3-r5}\r
614 bpl .loop_c2_32c_0\r
615\r
616 sub r12, r12, #1\r
617 adds r12, r12, #1<<16\r
618 ldmeqfd sp!, {r4-r11,pc} @ return\r
619 add r0, r0, #16*4\r
620 add r1, r1, #64+8\r
621 orr r12, #(240/15-1)<<16\r
622 b .loop_c2_32c_0\r
623\r
624\r
625.global vidConvCpy_center2_32c_180 @ void *to, void *from, void *pal, int lines\r
626\r
627vidConvCpy_center2_32c_180:\r
628 stmfd sp!, {r4-r11,lr}\r
629\r
630 mov r10, #0xfc\r
631 orr r10, r10, lsl #8\r
632 orr r10, r10, lsl #8\r
633 mov r11, #0xff\r
634\r
635 mov r4, #328\r
636 mla r1, r3, r4, r1\r
637@ add r1, r1, #0x11000\r
638@ add r1, r1, #0x00f00 @ #328*224\r
639\r
640 mov r12, #(240/15-1)<<16\r
641 orr r12, r12, r3\r
642\r
643.loop_c2_32c_180:\r
644 ldmdb r1!, {r7-r9,lr}\r
645 subs r12, r12, #1<<16\r
646\r
647 mode2_4pix2_180 lr\r
648 stmia r0!, {r3-r6}\r
649 mode2_4pix2_180 r9\r
650 stmia r0!, {r3-r6}\r
651 mode2_4pix2_180 r8\r
652 stmia r0!, {r3-r6}\r
653 mode2_4pix_to3 r7, 1\r
654 stmia r0!, {r3-r5}\r
655 bpl .loop_c2_32c_180\r
656\r
657 sub r12, r12, #1\r
658 adds r12, r12, #1<<16\r
659 ldmeqfd sp!, {r4-r11,pc} @ return\r
660 add r0, r0, #16*4\r
661 sub r1, r1, #64+8\r
662 orr r12, #(240/15-1)<<16\r
663 b .loop_c2_32c_180\r
664\r
665\r
666@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
667\r
668\r
669.global vidClear @ void *to, int lines\r
670\r
671vidClear:\r
672 stmfd sp!, {lr}\r
673 mov r12, #240/16-1\r
674 orr r12, r1, r12, lsl #16\r
675 mov r1, #0\r
676 mov r2, #0\r
677 mov r3, #0\r
678 mov lr, #0\r
679\r
680.loopVidClear:\r
681 subs r12, r12, #1<<16\r
682\r
683 stmia r0!, {r1-r3,lr}\r
684 stmia r0!, {r1-r3,lr}\r
685 stmia r0!, {r1-r3,lr}\r
686 stmia r0!, {r1-r3,lr}\r
687 bpl .loopVidClear\r
688\r
689 sub r12, r12, #1\r
690 adds r12, r12, #1<<16\r
691 ldmeqfd sp!, {pc} @ return\r
692 add r0, r0, #16*4\r
693 orr r12, #(240/16-1)<<16\r
694 b .loopVidClear\r
695\r