portrait mode asm
[gpsp.git] / gp2x / upscale_aspect.s
CommitLineData
788343bb 1/*
2 * 240x160 -> 320x213 upscaler for ARM with interpolation
3 *
4 * Written by GraÅžvydas "notaz" Ignotas
5 * Prototyped by Rokas
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 * * Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * * Neither the name of the organization nor the
15 * names of its contributors may be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 * 0 1 2 : 3 4 5
30 * 6 7 8 : 9 10 11
31 * 12 13 14 : 15 16 17
32 * v
33 * 0 1 2 3 : 4 5 6 7
34 * 8 9 10 11 : 12 13 14 15
35 * 16 17 18 19 : 20 21 22 23
36 * 24 25 26 27 : 28 29 30 31
37 */
38
39.macro unpack_hi dst, src
cd4e2bda 40 mov \dst, \src, lsr #16
41 orr \dst, \dst, \dst, lsl #16
788343bb 42 and \dst, \dst, lr
43.endm
44
45.macro unpack_lo dst, src
cd4e2bda 46 mov \dst, \src, lsl #16
47 orr \dst, \dst, \dst, lsr #16
788343bb 48 and \dst, \dst, lr
49.endm
50
51@ do 3:5 summing: r2 = (s1*3 + s2*5 + 4) / 8
52@ s2 != r2
53.macro do_3_5 s1, s2
54 add r2,\s1,\s1, lsl #1 @ r2 = s1 * 3
55 add r2, r2,\s2, lsl #2
56 add r2, r2,\s2 @ r2 += s2 * 5
57 add r2, r2, r12,lsl #2 @ sum += round * 4
58 and r2, lr, r2, lsr #3 @ mask_to_unpacked(sum / 8)
59.endm
60
61@ do 14:7:7:4: r2 = (s1*14 + s2*7 + s3*7 + s4*4 + 16) / 32
62@ {s2,s3,s4} != r2
63.macro do_14_7_7_4 s1, s2, s3, s4
64 mov r2,\s1, lsl #4
65 sub r2, r2,\s1, lsl #1 @ r2 = s1 * 14
66 add r2, r2,\s2, lsl #3
67 sub r2, r2,\s2 @ r2 += s2 * 7
68 add r2, r2,\s3, lsl #3
69 sub r2, r2,\s3 @ r2 += s3 * 7
70 add r2, r2,\s4, lsl #2 @ r2 += s4 * 4
71 add r2, r2, r12,lsl #3 @ sum += round * 16
72 and r2, lr, r2, lsr #5 @ mask_to_unpacked(sum / 32)
73.endm
74
75.global upscale_aspect @ u16 *dst, u16 *src
76upscale_aspect:
77 stmfd sp!,{r4-r11,lr}
78 mov lr, #0x0000001f
79 orr lr, lr, #0x0000f800 @ for "unpacked" form of
80 orr lr, lr, #0x07e00000 @ 00000ggg'ggg00000'rrrrr000'000bbbbb
81 mov r12, #0x00000001
82 orr r12,r12,#0x00000800
83 orr r12,r12,#0x00200000 @ rounding constant
84
85 mov r8, #((240/6)-1) << 24 @ cols
86 orr r8, r8, #160/3 @ rows
87
88 add r0, r0, #320*2*13
89loop1:
90 ldr r10,[r1]
91 ldr r11,[r1, #320*2*1]
92
93 unpack_lo r4, r10
94 unpack_hi r5, r10
95 unpack_lo r6, r11
96 unpack_hi r7, r11
97
98 ldr r11,[r1, #4]
99
100 do_3_5 r4, r5
101 orr r2, r2, r2, lsr #16
102 mov r3, r10, lsl #16
103 mov r3, r3, lsr #16
104 orr r2, r3, r2, lsl #16
105 str r2, [r0] @ 0,1
106
107 ldr r10,[r1, #320*2*2]
108
109 do_3_5 r4, r6
110 orr r3, r2, r2, lsl #16
111 mov r3, r3, lsr #16 @ 8
112
113 do_14_7_7_4 r7, r5, r6, r4
114 orr r2, r2, r2, lsr #16
115 orr r2, r3, r2, lsl #16
116 str r2, [r0, #320*2*1] @ 8,9
117
118 unpack_lo r4, r10
119 unpack_hi r9, r10
120
121 do_3_5 r4, r6
122 orr r3, r2, r2, lsl #16
123 mov r3, r3, lsr #16
124
125 do_14_7_7_4 r7, r9, r6, r4
126 orr r2, r2, r2, lsr #16
127 orr r2, r3, r2, lsl #16
128 str r2, [r0, #320*2*2] @ 16,17
129
130 do_3_5 r4, r9
131 orr r2, r2, r2, lsr #16
132 mov r3, r10, lsl #16
133 mov r3, r3, lsr #16
134 orr r2, r3, r2, lsl #16
135 str r2, [r0, #320*2*3] @ 24,25
136
137 ldr r10,[r1, #320*2*1+4]
138
139 unpack_lo r6, r11
140 unpack_lo r4, r10
141
142 do_3_5 r6, r5
143 orr r2, r2, r2, lsl #16
144 mov r3, r11, lsl #16
145 orr r2, r3, r2, lsr #16
146 str r2, [r0, #4] @ 2,3
147
148 do_14_7_7_4 r7, r4, r5, r6
149 orr r2, r2, r2, lsl #16
150 mov r3, r2, lsr #16
151
152 ldr r5, [r1, #320*2*2+4]
153
154 do_3_5 r6, r4
155 orr r2, r2, r2, lsr #16
156 orr r2, r3, r2, lsl #16
157 str r2, [r0, #320*2*1+4] @ 10,11
158
159 unpack_lo r6, r5
160
161 do_14_7_7_4 r7, r4, r9, r6
162 orr r2, r2, r2, lsl #16
163 mov r3, r2, lsr #16
164
165 do_3_5 r6, r4
166 orr r2, r2, r2, lsr #16
167 orr r2, r3, r2, lsl #16
168 str r2, [r0, #320*2*2+4] @ 18,19
169
170 unpack_hi r4, r10
171
172 ldr r10,[r1, #8]
173
174 do_3_5 r6, r9
175 orr r2, r2, r2, lsl #16
176 mov r2, r2, lsr #16
177 orr r2, r2, r5, lsl #16
178 str r2, [r0, #320*2*3+4] @ 26,27
179
180 unpack_hi r6, r11
181 unpack_lo r7, r10
182
183 do_3_5 r6, r7
184 orr r2, r2, r2, lsr #16
185 mov r2, r2, lsl #16
186 orr r2, r2, r11,lsr #16
187 str r2, [r0, #8] @ 4,5
188
189 ldr r11,[r1, #320*2*1+8]
190
191 unpack_hi r9, r10
192
193 do_3_5 r9, r7
194 orr r2, r2, r2, lsr #16
195 mov r2, r2, lsl #16
196 orr r2, r2, r10,lsr #16
197 mov r2, r2, ror #16
198 str r2, [r0, #12] @ 6,7
199
200 unpack_lo r10,r11
201
202 do_3_5 r6, r4
203 orr r2, r2, r2, lsl #16
204 mov r3, r2, lsr #16
205
206 do_14_7_7_4 r10, r4, r7, r6
207 orr r2, r2, r2, lsr #16
208 orr r2, r3, r2, lsl #16
209 str r2, [r0, #320*2*1+8] @ 12,13
210
211 unpack_hi r6, r11
212
213 ldr r11,[r1, #320*2*2+8]
214
215 do_14_7_7_4 r10, r6, r7, r9
216 orr r2, r2, r2, lsl #16
217 mov r3, r2, lsr #16
218
219 do_3_5 r9, r6
220 orr r2, r2, r2, lsr #16
221 orr r2, r3, r2, lsl #16
222 str r2, [r0, #320*2*1+12] @ 14,15
223
224 unpack_hi r7, r5
225 unpack_lo r9, r11
226
227 do_3_5 r7, r4
228 orr r2, r2, r2, lsl #16
229 mov r3, r2, lsr #16
230
231 do_14_7_7_4 r10, r4, r9, r7
232 orr r2, r2, r2, lsr #16
233 orr r2, r3, r2, lsl #16
234 str r2, [r0, #320*2*2+8] @ 20,21
235
236 do_3_5 r7, r9
237 orr r2, r2, r2, lsr #16
238 mov r2, r2, lsl #16
239 orr r2, r2, r5, lsr #16
240 str r2, [r0, #320*2*3+8] @ 28,29
241
242 unpack_hi r5, r11
243
244 do_14_7_7_4 r10, r6, r9, r5
245 orr r2, r2, r2, lsl #16
246 mov r3, r2, lsr #16
247
248 do_3_5 r5, r6
249 orr r2, r2, r2, lsr #16
250 orr r2, r3, r2, lsl #16
251 str r2, [r0, #320*2*2+12] @ 22,23
252
253 do_3_5 r5, r9
254 orr r2, r2, r2, lsr #16
255 mov r3, r11, lsr #16
256 orr r2, r3, r2, lsl #16
257 mov r2, r2, ror #16
258 str r2, [r0, #320*2*3+12] @ 30,31
259
260 add r0, r0, #16
261 add r1, r1, #12
262
263 subs r8, r8, #1<<24
264 bpl loop1
265
266 add r0, r0, #320*3*2
267 add r1, r1, #(320*2+80)*2
268 sub r8, r8, #1
269 tst r8, #0xff
270 add r8, r8, #(240/6) << 24 @ cols
271 bne loop1
272
273 @@ last line
274 mov r8, #240/6
275
276loop2:
277 ldmia r1!,{r9,r10,r11}
278
279 unpack_lo r4, r9
280 unpack_hi r5, r9
281
282 do_3_5 r4, r5
283 orr r2, r2, r2, lsr #16
284 mov r3, r9, lsl #16
285 mov r3, r3, lsr #16
286 orr r2, r3, r2, lsl #16
287 str r2, [r0], #4
288
289 unpack_lo r6, r10
290 unpack_hi r7, r10
291
292 do_3_5 r6, r5
293 orr r2, r2, r2, lsl #16
294 mov r2, r2, lsr #16
295 orr r2, r2, r10,lsl #16
296 str r2, [r0], #4
297
298 unpack_lo r4, r11
299 unpack_hi r5, r11
300
301 do_3_5 r7, r4
302 orr r2, r2, r2, lsr #16
303 mov r3, r10, lsr #16
304 orr r2, r3, r2, lsl #16
305 str r2, [r0], #4
306
307 do_3_5 r5, r4
308 orr r2, r2, r2, lsr #16
309 mov r3, r11, lsr #16
310 orr r2, r3, r2, lsl #16
311 mov r2, r2, ror #16
312 str r2, [r0], #4
313
314 subs r8, r8, #1
315 bne loop2
316
317 ldmfd sp!,{r4-r11,pc}
318
73d1a857 319
320.global upscale_aspect_row @ void *dst, void *linesx4, u32 row
321upscale_aspect_row:
322 stmfd sp!,{r4-r11,lr}
323 mov lr, #0x0000001f
324 orr lr, lr, #0x0000f800 @ for "unpacked" form of
325 orr lr, lr, #0x07e00000 @ 00000ggg'ggg00000'rrrrr000'000bbbbb
326 mov r12, #0x00000001
327 orr r12,r12,#0x00000800
328 orr r12,r12,#0x00200000 @ rounding constant
329
330 mov r8, #(240/6) @ cols
331
332 add r0, r0, #(240*320)*2
333 add r0, r0, #12*2
334 add r0, r0, r2, lsl #3
335
336uar_loop:
337 ldr r10,[r1]
338 ldr r11,[r1, #240*2*1]
339
340 unpack_lo r4, r10
341 unpack_hi r5, r10
342 unpack_lo r6, r11
343 unpack_hi r7, r11
344
345 ldr r11,[r1, #240*2*2]
346
347 do_3_5 r4, r6
348 orr r2, r2, r2, lsr #16
349 mov r3, r10, lsl #16
350 mov r3, r3, lsr #16
351 orr r2, r3, r2, lsl #16
352 str r2, [r0, #-240*2]! @ 0,8
353
354 unpack_lo r10,r11
355 unpack_hi r9, r11
356
357 do_3_5 r10,r6
358 orr r2, r2, r2, lsl #16
359 mov r3, r11, lsl #16
360 orr r2, r3, r2, lsr #16
361 str r2, [r0, #4] @ 16,24
362
363 do_3_5 r4, r5
364 orr r3, r2, r2, lsl #16
365
366 do_14_7_7_4 r7, r5, r6, r4
367 orr r2, r2, r2, lsr #16
368 mov r3, r3, lsr #16
369 orr r2, r3, r2, lsl #16
370 str r2, [r0, #-240*2]! @ 1,9
371
372 ldr r11,[r1, #4]
373
374 do_14_7_7_4 r7, r6, r9, r10
375 orr r3, r2, r2, lsl #16
376
377 do_3_5 r10,r9
378 orr r2, r2, r2, lsr #16
379 mov r3, r3, lsr #16
380 orr r2, r3, r2, lsl #16
381 str r2, [r0, #4] @ 17,25
382
383 ldr r10,[r1, #240*2*1+4]
384
385 unpack_lo r4, r11
386 unpack_lo r6, r10
387
388 do_3_5 r4, r5
389 orr r3, r2, r2, lsl #16
390
391 do_14_7_7_4 r7, r5, r6, r4
392 orr r2, r2, r2, lsr #16
393 mov r3, r3, lsr #16
394 orr r2, r3, r2, lsl #16
395 str r2, [r0, #-240*2]! @ 2,10
396
397 do_3_5 r4, r6
398
399 ldr r4, [r1, #240*2*2+4]
400
401 orr r2, r2, r2, lsr #16
402 mov r3, r11, lsl #16
403 mov r3, r3, lsr #16
404 orr r2, r3, r2, lsl #16
405 str r2, [r0, #-240*2] @ 3,11
406
407 unpack_lo r5, r4
408
409 do_14_7_7_4 r7, r6, r9, r5
410 orr r3, r2, r2, lsl #16
411
412 do_3_5 r5, r9
413 orr r2, r2, r2, lsr #16
414 mov r2, r2, lsl #16
415 orr r2, r2, r3, lsr #16
416 str r2, [r0, #4] @ 18,26
417
418 do_3_5 r5, r6
419 orr r2, r2, r2, lsl #16
420 mov r3, r4, lsl #16
421 orr r2, r3, r2, lsr #16
422 str r2, [r0, #-240*2+4] @ 19,27
423
424 unpack_hi r5, r11
425 unpack_hi r6, r10
426 unpack_hi r7, r4
427
428 ldr r10,[r1, #8]
429
430 do_3_5 r5, r6
431 orr r2, r2, r2, lsr #16
432 mov r3, r11, lsr #16
433 orr r2, r3, r2, lsl #16
434 str r2, [r0, #-240*2*2]! @ 4,12
435
436 ldr r11,[r1, #240*2*1+8]
437
438 do_3_5 r7, r6
439 orr r2, r2, r2, lsl #16
440 mov r3, r4, lsr #16
441 mov r3, r3, lsl #16
442 orr r2, r3, r2, lsr #16
443 str r2, [r0, #4] @ 20,28
444
445 unpack_lo r4, r10
446 unpack_lo r9, r11
447
448 ldr r11,[r1, #240*2*2+8]
449
450 do_3_5 r5, r4
451 orr r3, r2, r2, lsl #16
452
453 do_14_7_7_4 r9, r4, r6, r5
454 orr r2, r2, r2, lsr #16
455 mov r2, r2, lsl #16
456 orr r2, r2, r3, lsr #16
457 str r2, [r0, #-240*2]! @ 5,13
458
459 unpack_lo r5, r11
460
461 do_14_7_7_4 r9, r5, r6, r7
462 orr r3, r2, r2, lsl #16
463
464 do_3_5 r7, r5
465 orr r2, r2, r2, lsr #16
466 mov r3, r3, lsr #16
467 orr r2, r3, r2, lsl #16
468 str r2, [r0, #4] @ 21,29
469
470 ldr r7, [r1, #240*2*1+8]
471
472 unpack_hi r6, r10
473 unpack_hi r7, r7
474
475 do_3_5 r6, r4
476 orr r3, r2, r2, lsl #16
477
478 do_14_7_7_4 r9, r4, r7, r6
479 orr r2, r2, r2, lsr #16
480 mov r2, r2, lsl #16
481 orr r2, r2, r3, lsr #16
482 str r2, [r0, #-240*2]! @ 6,14
483
484 unpack_hi r4, r11
485
486 do_14_7_7_4 r9, r5, r7, r4
487 orr r3, r2, r2, lsl #16
488
489 do_3_5 r4, r5
490 orr r2, r2, r2, lsr #16
491 mov r3, r3, lsr #16
492 orr r2, r3, r2, lsl #16
493 str r2, [r0, #4] @ 22,30
494
495 do_3_5 r6, r7
496 orr r2, r2, r2, lsr #16
497 mov r3, r10, lsr #16
498 orr r2, r3, r2, lsl #16
499 str r2, [r0, #-240*2]! @ 7,15
500
501 do_3_5 r4, r7
502 orr r2, r2, r2, lsl #16
503 mov r3, r11, lsr #16
504 mov r3, r3, lsl #16
505 orr r2, r3, r2, lsr #16
506 str r2, [r0, #4] @ 23,31
507
508 subs r8, r8, #1
509 add r1, r1, #12
510 bne uar_loop
511
512 ldmfd sp!,{r4-r11,pc}
513
514
515@ bonus function
516
517@ input: r2-r5
518@ output: r7,r8
519@ trash: r6
520.macro rb_line_low
521 mov r6, r2, lsl #16
522 mov r7, r3, lsl #16
523 orr r7, r7, r6, lsr #16
524 mov r6, r4, lsl #16
525 mov r8, r5, lsl #16
526 orr r8, r8, r6, lsr #16
527.endm
528
529.macro rb_line_hi
530 mov r6, r2, lsr #16
531 mov r7, r3, lsr #16
532 orr r7, r6, r7, lsl #16
533 mov r6, r4, lsr #16
534 mov r8, r5, lsr #16
535 orr r8, r6, r8, lsl #16
536.endm
537
538.global do_rotated_blit @ void *dst, void *linesx4, u32 y
539do_rotated_blit:
540 stmfd sp!,{r4-r8,lr}
541
542 add r0, r0, #(240*320)*2
543 sub r0, r0, #(240*40)*2
544 sub r0, r0, #(240-40+4)*2 @ y starts from 4
545 add r0, r0, r2, lsl #1
546
547 mov lr, #240/4
548
549rotated_blit_loop:
550 ldr r2, [r1, #240*0*2]
551 ldr r3, [r1, #240*1*2]
552 ldr r4, [r1, #240*2*2]
553 ldr r5, [r1, #240*3*2]
554 rb_line_low
555 stmia r0, {r7,r8}
556 sub r0, r0, #240*2
557 rb_line_hi
558 stmia r0, {r7,r8}
559 sub r0, r0, #240*2
560
561 ldr r2, [r1, #240*0*2+4]
562 ldr r3, [r1, #240*1*2+4]
563 ldr r4, [r1, #240*2*2+4]
564 ldr r5, [r1, #240*3*2+4]
565 rb_line_low
566 stmia r0, {r7,r8}
567 sub r0, r0, #240*2
568 rb_line_hi
569 stmia r0, {r7,r8}
570 sub r0, r0, #240*2
571
572 subs lr, lr, #1
573 add r1, r1, #8
574 bne rotated_blit_loop
575
576 ldmfd sp!,{r4-r8,pc}
577
788343bb 578@ vim:filetype=armasm
579