pandora: fix readme and pxml version
[picodrive.git] / pico / draw2_arm.S
... / ...
CommitLineData
1/*\r
2 * assembly optimized versions of most funtions from draw2.c\r
3 * (C) notaz, 2006-2008\r
4 * (C) irixxxx, 2019-2023\r
5 *\r
6 * This work is licensed under the terms of MAME license.\r
7 * See COPYING file in the top-level directory.\r
8 *\r
9 * this is highly specialized, be careful if changing related C code!\r
10 *\r
11 * NB: this only deals with buffers having line width at 328\r
12 */\r
13\r
14#include "pico_int_offs.h"\r
15\r
16.equ PDRAW_INTERLACE, (1<<3)\r
17.equ PDRAW_32_COLS, (1<<8)\r
18.equ PDRAW_BORDER_32, (1<<9)\r
19.equ PDRAW_30_ROWS, (1<<11)\r
20\r
21@ define these constants in your include file:\r
22@ .equiv START_ROW, 1\r
23@ .equiv END_ROW, 27\r
24@ one row means 8 pixels. If above example was used, (27-1)*8=208 lines would be rendered.\r
25#ifndef START_ROW\r
26#define START_ROW 0\r
27#endif\r
28#ifndef END_ROW\r
29#define END_ROW 28\r
30#endif\r
31\r
32.text\r
33.align 2\r
34\r
35@ void BackFillFull(unsigned char *dst, int reg7, int lwidth)\r
36\r
37.global BackFillFull\r
38\r
39BackFillFull:\r
40 stmfd sp!, {r4-r10,lr}\r
41\r
42 sub r10,r2, #320 @ unused bytes in a line\r
43 add lr, r0, #8 @ 8 px overlap area at start of line\r
44 add lr, lr, r2, lsl #3 @ 8 lines overlap area at top\r
45\r
46 mov r0, r1, lsl #26\r
47 mov r0, r0, lsr #26\r
48\r
49 orr r0, r0, r0, lsl #8\r
50 orr r0, r0, r0, lsl #16\r
51\r
52 mov r1, r0 @ 25 opcodes wasted?\r
53 mov r2, r0\r
54 mov r3, r0\r
55 mov r4, r0\r
56 mov r5, r0\r
57 mov r6, r0\r
58 mov r7, r0\r
59 mov r8, r0\r
60 mov r9, r0\r
61\r
62 mov r12, #(END_ROW-START_ROW)*8\r
63\r
64 @ go go go!\r
65.bff_loop:\r
66 subs r12, r12, #1\r
67\r
68 stmia lr!, {r0-r9} @ 10*4*8\r
69 stmia lr!, {r0-r9}\r
70 stmia lr!, {r0-r9}\r
71 stmia lr!, {r0-r9}\r
72 stmia lr!, {r0-r9}\r
73 stmia lr!, {r0-r9}\r
74 stmia lr!, {r0-r9}\r
75 stmia lr!, {r0-r9}\r
76\r
77 add lr, lr, r10 @ skip unused rest of line\r
78 bne .bff_loop\r
79\r
80 ldmfd sp!, {r4-r10,lr}\r
81 bx lr\r
82\r
83.pool\r
84\r
85@ -------- some macros --------\r
86\r
87@ helpers\r
88.macro add_c24 d s c\r
89 add \d, \s, #(\c & 0x00ff00)\r
90.if \c & 0x0000ff\r
91 add \d, \d, #(\c & 0x0000ff)\r
92.endif\r
93.if \c & 0xff0000\r
94 add \d, \d, #(\c & 0xff0000)\r
95.endif\r
96.endm\r
97\r
98@ TileLineSinglecol (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r0: pixels8_old\r
99.macro TileLineSinglecol notsinglecol=0\r
100 and r2, r2, #0xf @ #0x0000000f\r
101.if !\notsinglecol\r
102 cmp r2, r0, lsr #28 @ if these don't match,\r
103 bicne r9, r9, #2 @ it is a sign that whole tile is not singlecolor (only it's lines may be)\r
104.endif\r
105 orr r4, r3, r2\r
106 orr r4, r4, r4, lsl #8\r
107\r
108 tst r1, #1 @ not aligned?\r
109 strneb r4, [r1], #1\r
110 streqh r4, [r1], #2\r
111 strh r4, [r1], #2\r
112 strh r4, [r1], #2\r
113 strh r4, [r1], #2\r
114 strneb r4, [r1], #1 @ have a remaining unaligned pixel?\r
115 sub r1, r1, #8\r
116.if !\notsinglecol\r
117 mov r0, #0xf\r
118 orr r0, r0, r2, lsl #28 @ we will need the old palindex later\r
119.endif\r
120.endm\r
121\r
122@ TileNorm (r1=pdest, r2=pixels8, r3=pal) r0,r4: scratch\r
123.macro TileLineNorm\r
124 ands r4, r0, r2, lsr #12 @ #0x0000f000\r
125 orrne r4, r3, r4\r
126 strneb r4, [r1]\r
127 ands r4, r0, r2, lsr #8 @ #0x00000f00\r
128 orrne r4, r3, r4\r
129 strneb r4, [r1,#1]\r
130 ands r4, r0, r2, lsr #4 @ #0x000000f0\r
131 orrne r4, r3, r4\r
132 strneb r4, [r1,#2]\r
133 ands r4, r0, r2 @ #0x0000000f\r
134 orrne r4, r3, r4\r
135 strneb r4, [r1,#3]\r
136 ands r4, r0, r2, lsr #28 @ #0xf0000000\r
137 orrne r4, r3, r4\r
138 strneb r4, [r1,#4]\r
139 ands r4, r0, r2, lsr #24 @ #0x0f000000\r
140 orrne r4, r3, r4\r
141 strneb r4, [r1,#5]\r
142 ands r4, r0, r2, lsr #20 @ #0x00f00000\r
143 orrne r4, r3, r4\r
144 strneb r4, [r1,#6]\r
145 ands r4, r0, r2, lsr #16 @ #0x000f0000\r
146 orrne r4, r3, r4\r
147 strneb r4, [r1,#7]\r
148.endm\r
149\r
150@ TileFlip (r1=pdest, r2=pixels8, r3=pal) r0,r4: scratch\r
151.macro TileLineFlip\r
152 ands r4, r0, r2, lsr #16 @ #0x000f0000\r
153 orrne r4, r3, r4\r
154 strneb r4, [r1]\r
155 ands r4, r0, r2, lsr #20 @ #0x00f00000\r
156 orrne r4, r3, r4\r
157 strneb r4, [r1,#1]\r
158 ands r4, r0, r2, lsr #24 @ #0x0f000000\r
159 orrne r4, r3, r4\r
160 strneb r4, [r1,#2]\r
161 ands r4, r0, r2, lsr #28 @ #0xf0000000\r
162 orrne r4, r3, r4\r
163 strneb r4, [r1,#3]\r
164 ands r4, r0, r2 @ #0x0000000f\r
165 orrne r4, r3, r4\r
166 strneb r4, [r1,#4]\r
167 ands r4, r0, r2, lsr #4 @ #0x000000f0\r
168 orrne r4, r3, r4\r
169 strneb r4, [r1,#5]\r
170 ands r4, r0, r2, lsr #8 @ #0x00000f00\r
171 orrne r4, r3, r4\r
172 strneb r4, [r1,#6]\r
173 ands r4, r0, r2, lsr #12 @ #0x0000f000\r
174 orrne r4, r3, r4\r
175 strneb r4, [r1,#7]\r
176.endm\r
177\r
178@ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf\r
179.macro Tile hflip vflip\r
180 mov r7, r9, lsl #13 @ r9=code<<8; addr=(code&0x7ff)<<4;\r
181 add r7, r10, r7, lsr #16\r
182 orr r9, r9, #3 @ emptytile=singlecolor=1, r9 must be <code_16> 00000xxx\r
183.if \vflip\r
184 @ we read tilecodes in reverse order if we have vflip\r
185 add r7, r7, #8*4\r
186.endif\r
187 @ loop through 8 lines\r
188 orr r9, r9, #(7<<24)\r
189 b 1f @ loop_enter\r
190\r
1910: @ singlecol_loop\r
192 subs r9, r9, #(1<<24)\r
193 add r1, r1, #328 @ set pointer to next line\r
194 bmi 8f @ loop_exit with r0 restore\r
1951:\r
196.if \vflip\r
197 ldr r2, [r7, #-4]! @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels\r
198.else\r
199 ldr r2, [r7], #4\r
200.endif\r
201 tst r2, r2\r
202 beq 2f @ empty line\r
203 bic r9, r9, #1\r
204 cmp r2, r2, ror #4\r
205 bne 3f @ not singlecolor\r
206 TileLineSinglecol\r
207 b 0b\r
208\r
2092:\r
210 bic r9, r9, #2\r
2112: @ empty_loop\r
212 subs r9, r9, #(1<<24)\r
213 add r1, r1, #328 @ set pointer to next line\r
214 bmi 8f @ loop_exit with r0 restore\r
215.if \vflip\r
216 ldr r2, [r7, #-4]! @ next pack\r
217.else\r
218 ldr r2, [r7], #4\r
219.endif\r
220 mov r0, #0xf @ singlecol_loop might have messed r0\r
221 tst r2, r2\r
222 beq 2b\r
223\r
224 bic r9, r9, #3 @ if we are here, it means we have empty and not empty line\r
225 b 5f\r
226\r
2273: @ not empty, not singlecol\r
228 mov r0, #0xf\r
229 bic r9, r9, #3\r
230 b 6f\r
231\r
2324: @ not empty, not singlecol loop\r
233 subs r9, r9, #(1<<24)\r
234 add r1, r1, #328 @ set pointer to next line\r
235 bmi 9f @ loop_exit\r
236.if \vflip\r
237 ldr r2, [r7, #-4]! @ next pack\r
238.else\r
239 ldr r2, [r7], #4\r
240.endif\r
241 tst r2, r2\r
242 beq 4b @ empty line\r
2435:\r
244 cmp r2, r2, ror #4\r
245 beq 7f @ singlecolor line\r
2466:\r
247.if \hflip\r
248 TileLineFlip\r
249.else\r
250 TileLineNorm\r
251.endif\r
252 b 4b\r
2537:\r
254 TileLineSinglecol 1\r
255 b 4b\r
256\r
2578:\r
258 mov r0, #0xf\r
2599: @ loop_exit\r
260 add r9, r9, #(1<<24) @ fix r9\r
261 sub r1, r1, #328*8 @ restore pdest pointer\r
262.endm\r
263\r
264\r
265@ TileLineSinglecolAl (r1=pdest, r4,r7=color)\r
266.macro TileLineSinglecolAl0\r
267 stmia r1!, {r4,r7}\r
268 add r1, r1, #320\r
269.endm\r
270\r
271.macro TileLineSinglecolAl1\r
272 strb r4, [r1], #1\r
273 strh r4, [r1], #2\r
274 str r4, [r1], #4\r
275 strb r4, [r1], #1+320\r
276@ add r1, r1, #320\r
277.endm\r
278\r
279.macro TileLineSinglecolAl2\r
280 strh r4, [r1], #2\r
281 str r4, [r1], #4\r
282 strh r4, [r1], #2\r
283 add r1, r1, #320\r
284.endm\r
285\r
286.macro TileLineSinglecolAl3\r
287 strb r4, [r1], #1\r
288 str r4, [r1], #4\r
289 strh r4, [r1], #2\r
290 strb r4, [r1], #1+320\r
291@ add r1, r1, #320\r
292.endm\r
293\r
294@ TileSinglecol (r1=pdest, r2=pixels8, r3=pal) r4,r7: scratch, r0=0xf\r
295@ kaligned==1, if dest is always aligned\r
296.macro TileSinglecol kaligned=0\r
297 and r4, r2, #0xf @ we assume we have good r2 from previous time\r
298 orr r4, r4, r3\r
299 orr r4, r4, r4, lsl #8\r
300 orr r4, r4, r4, lsl #16\r
301 mov r7, r4\r
302\r
303.if !\kaligned\r
304 tst r1, #2 @ not aligned?\r
305 bne 2f\r
306 tst r1, #1\r
307 bne 1f\r
308.endif\r
309\r
310 TileLineSinglecolAl0\r
311 TileLineSinglecolAl0\r
312 TileLineSinglecolAl0\r
313 TileLineSinglecolAl0\r
314 TileLineSinglecolAl0\r
315 TileLineSinglecolAl0\r
316 TileLineSinglecolAl0\r
317 TileLineSinglecolAl0\r
318\r
319.if !\kaligned\r
320 b 4f\r
3211:\r
322 TileLineSinglecolAl1\r
323 TileLineSinglecolAl1\r
324 TileLineSinglecolAl1\r
325 TileLineSinglecolAl1\r
326 TileLineSinglecolAl1\r
327 TileLineSinglecolAl1\r
328 TileLineSinglecolAl1\r
329 TileLineSinglecolAl1\r
330 b 4f\r
331\r
3322:\r
333 tst r1, #1\r
334 bne 3f\r
335\r
336 TileLineSinglecolAl2\r
337 TileLineSinglecolAl2\r
338 TileLineSinglecolAl2\r
339 TileLineSinglecolAl2\r
340 TileLineSinglecolAl2\r
341 TileLineSinglecolAl2\r
342 TileLineSinglecolAl2\r
343 TileLineSinglecolAl2\r
344 b 4f\r
345\r
3463:\r
347 TileLineSinglecolAl3\r
348 TileLineSinglecolAl3\r
349 TileLineSinglecolAl3\r
350 TileLineSinglecolAl3\r
351 TileLineSinglecolAl3\r
352 TileLineSinglecolAl3\r
353 TileLineSinglecolAl3\r
354 TileLineSinglecolAl3\r
355\r
3564:\r
357.endif\r
358 sub r1, r1, #328*8 @ restore pdest pointer\r
359.endm\r
360\r
361\r
362\r
363@ DrawLayerTiles(*hcache, *scrpos, (cells<<24)|(nametab<<9)|(vscroll&0x3ff)<<11|(shift[width]<<8)|planeend, (ymask<<24)|(planestart<<16)|[htab||hscroll]\r
364\r
365@ void DrawLayerFull(int plane, int *hcache, int planestart, int planeend,\r
366@ struct PicoEState *est)\r
367\r
368.global DrawLayerFull\r
369\r
370DrawLayerFull:\r
371 ldr r12,[sp] @ est\r
372 stmfd sp!, {r4-r11,lr}\r
373\r
374 mov r6, r1 @ hcache\r
375\r
376 ldr r11, [r12, #OFS_EST_Pico]\r
377 ldr r10, [r12, #OFS_EST_PicoMem_vram]\r
378 ldrb r5, [r11, #OFS_Pico_video_reg+13] @ pvid->reg[13]\r
379 ldrb r7, [r11, #OFS_Pico_video_reg+11]\r
380\r
381 sub lr, r3, r2\r
382 and lr, lr, #0x00ff0000 @ lr=cells\r
383\r
384 mov r5, r5, lsl #10 @ htab=pvid->reg[13]<<9; (halfwords)\r
385 add r5, r5, r0, lsl #1 @ htab+=plane\r
386 bic r5, r5, #0x00ff0000 @ just in case\r
387\r
388 tst r7, #3 @ full screen scroll? (if ==0)\r
389 ldrb r7, [r11, #OFS_Pico_video_reg+16] @ ??hh??ww\r
390 ldreqh r5, [r10, r5]\r
391 biceq r5, r5, #0x0000fc00 @ r5=hscroll (0-0x3ff)\r
392 movne r5, r5, lsr #1\r
393 orrne r5, r5, #0x8000 @ this marks that we have htab pointer, not hscroll here\r
394\r
395 and r8, r7, #3\r
396\r
397 orr r5, r5, r7, lsl #1+24\r
398 orr r5, r5, #0x1f000000\r
399 cmp r8, #1\r
400 biclt r5, r5, #0x80000000\r
401 biceq r5, r5, #0xc0000000\r
402 bicgt r5, r5, #0xe0000000\r
403\r
404 mov r9, r2, lsl #24\r
405 orr r5, r5, r9, lsr #8 @ r5=(ymask<<24)|(trow<<16)|[htab||hscroll]\r
406\r
407 add r4, r8, #5\r
408 cmp r4, #7\r
409 subge r4, r4, #1 @ r4=shift[width] (5,6,6,7)\r
410\r
411 orr lr, lr, r4 \r
412 orr lr, lr, r3, lsl #24 @ lr=(planeend<<24)|(cells<<16)|shift[width]\r
413\r
414 @ calculate xmask:\r
415 mov r8, r8, lsl #24+5\r
416 orr r8, r8, #0x1f000000\r
417\r
418 @ Find name table:\r
419 tst r0, r0\r
420 ldreqb r4, [r11, #OFS_Pico_video_reg+2]\r
421 moveq r4, r4, lsr #3\r
422 ldrneb r4, [r11, #OFS_Pico_video_reg+4]\r
423 and r4, r4, #7\r
424 orr lr, lr, r4, lsl #13 @ lr|=nametab_bits{3}<<13\r
425\r
426 ldr r11,[sp, #9*4] @ est\r
427 ldr r4, [r11, #OFS_EST_Draw2Start]\r
428 ldr r7, [r11, #OFS_EST_rendstatus]\r
429 ldr r11, [r11, #OFS_EST_Draw2FB]\r
430 sub r4, r9, r4, lsl #24\r
431 tst r7, #PDRAW_BORDER_32 @ H32 border mode?\r
432 tstne r7, #PDRAW_32_COLS\r
433 addne r11, r11, #32\r
434 mov r4, r4, asr #24\r
435 mov r7, #328*8\r
436 mla r11, r4, r7, r11 @ scrpos+=8*328*(planestart-Draw2Start);\r
437\r
438 @ Get vertical scroll value:\r
439 add_c24 r7, r10, (OFS_PMEM_vsram-OFS_PMEM_vram)\r
440 ldr r7, [r7]\r
441 tst r0, r0\r
442 moveq r7, r7, lsl #22\r
443 movne r7, r7, lsl #6\r
444 mov r7, r7, lsr #22 @ r7=vscroll (10 bits)\r
445\r
446 orr lr, lr, r7, lsl #3\r
447 mov lr, lr, ror #24 @ packed: cccccccc nnnvvvvv vvvvvsss pppppppp: cells, nametab, vscroll, shift[width], planeend\r
448\r
449 ands r7, r7, #7\r
450 addne lr, lr, #1 @ we have vertically clipped tiles due to vscroll, so we need 1 more row\r
451\r
452 rsb r7, r7, #8\r
453 str r7, [r6], #4 @ push y-offset to tilecache\r
454 mov r4, #328\r
455 mla r11, r4, r7, r11 @ scrpos+=(8-(vscroll&7))*328;\r
456\r
457 mov r9, #0xff000000 @ r9=(prevcode<<8)|flags: 1~tile empty, 2~tile singlecolor\r
458\r
459.rtrloop_outer:\r
460 mov r4, lr, lsl #11\r
461 mov r4, r4, lsr #25 @ r4=vscroll>>3 (7 bits)\r
462 add r4, r4, r5, lsr #16 @ +trow\r
463 and r4, r4, r5, lsr #24 @ &=ymask\r
464 mov r7, lr, lsr #8\r
465 and r7, r7, #7 @ shift[width]\r
466 mov r0, lr, lsr #9\r
467 and r0, r0, #0x7000 @ nametab\r
468 add r12,r0, r4, lsl r7 @ nametab_row = nametab + (((trow+(vscroll>>3))&ymask)<<shift[width]); \r
469\r
470 mov r4, lr, lsr #24\r
471 orr r12,r12,r4, lsl #23\r
472 mov r12,r12,lsl #1 @ (nametab_row|(cells<<24)) (halfword compliant)\r
473\r
474 @ htab?\r
475 tst r5, #0x8000\r
476 moveq r7, r5, lsl #22 @ hscroll (0-3FFh)\r
477 moveq r7, r7, lsr #22\r
478 beq .rtr_hscroll_done\r
479\r
480 @ get hscroll from htab\r
481 mov r7, r5, lsl #17\r
482 ands r4, r5, #0x00ff0000\r
483 add r7, r7, r4, lsl #5 @ +=trow<<4\r
484 andne r4, lr, #0x3800\r
485 subne r7, r7, r4, lsl #7 @ if(trow) htaddr-=(vscroll&7)<<1;\r
486 mov r7, r7, lsr #16 @ halfwords\r
487 ldrh r7, [r10, r7]\r
488\r
489.rtr_hscroll_done:\r
490 and r8, r8, #0xff000000\r
491 rsb r4, r7, #0 @ r4=tilex=(-ts->hscroll)>>3\r
492 mov r4, r4, asr #3\r
493 and r4, r4, #0xff\r
494 orr r8, r8, r4 @ r8=(xmask<<24)|tilex\r
495\r
496 sub r7, r7, #1\r
497 and r7, r7, #7\r
498 add r7, r7, #1 @ r7=dx=((ts->hscroll-1)&7)+1\r
499\r
500 cmp r7, #8\r
501 subeq r12,r12, #0x01000000 @ we will loop cells+1 times, so loop less when there is no hscroll\r
502\r
503 add r1, r11, r7 @ r1=pdest\r
504 mov r0, #0xf\r
505 b .rtrloop_enter\r
506\r
507 @ r4 & r7 are scratch in this loop\r
508.rtrloop: @ 40-41 times\r
509 add r1, r1, #8\r
510 subs r12,r12, #0x01000000\r
511 add r8, r8, #1\r
512 bmi .rtrloop_exit\r
513\r
514.rtrloop_enter:\r
515 and r7, r8, r8, lsr #24\r
516 add r7, r10, r7, lsl #1\r
517 bic r4, r12, #0xff000000 @ Pico.vram[nametab_row+(tilex&xmask)];\r
518 ldrh r7, [r7, r4] @ r7=code (int, but from unsigned, no sign extend)\r
519\r
520 tst r7, #0x8000\r
521 bne .rtr_hiprio\r
522\r
523 cmp r7, r9, lsr #8\r
524 bne .rtr_notsamecode\r
525 @ we know stuff about this tile already\r
526 tst r9, #1\r
527 bne .rtrloop @ empty tile\r
528 tst r9, #2\r
529 bne .rtr_singlecolor @ singlecolor tile\r
530 b .rtr_samecode\r
531\r
532.rtr_notsamecode:\r
533 and r4, r9, #0x600000\r
534 mov r9, r7, lsl #8 @ remember new code\r
535\r
536 @ update cram\r
537 and r7, r7, #0x6000\r
538 mov r3, r7, asr #9 @ r3=pal=((code&0x6000)>>9);\r
539\r
540.rtr_samecode:\r
541 tst r9, #0x100000 @ vflip?\r
542 bne .rtr_vflip\r
543\r
544 tst r9, #0x080000 @ hflip?\r
545 bne .rtr_hflip\r
546\r
547 @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf\r
548 Tile 0, 0\r
549 b .rtrloop\r
550\r
551.rtr_hflip:\r
552 Tile 1, 0\r
553 b .rtrloop\r
554\r
555.rtr_vflip:\r
556 tst r9, #0x080000 @ hflip?\r
557 bne .rtr_vflip_hflip\r
558\r
559 Tile 0, 1\r
560 b .rtrloop\r
561\r
562.rtr_vflip_hflip:\r
563 Tile 1, 1\r
564 b .rtrloop\r
565\r
566.rtr_singlecolor:\r
567 TileSinglecol\r
568 b .rtrloop\r
569\r
570.rtr_hiprio:\r
571 @ *(*hcache)++ = code|(dx<<16)|(trow<<27);\r
572 sub r4, r1, r11\r
573 orr r7, r7, r4, lsl #16\r
574 and r4, r5, #0x00ff0000\r
575 orr r7, r7, r4, lsl #11 @ (trow<<27)\r
576 str r7, [r6], #4 @ cache hi priority tile\r
577 b .rtrloop\r
578\r
579.rtrloop_exit:\r
580 add r5, r5, #0x00010000\r
581 mov r4, r5, lsl #8\r
582 cmp r4, lr, lsl #24\r
583 bge .rtrloop_outer_exit\r
584 add r11, r11, #328*8\r
585 b .rtrloop_outer\r
586\r
587.rtrloop_outer_exit:\r
588\r
589 @ terminate cache list\r
590 mov r0, #0\r
591 str r0, [r6] @ save cache pointer\r
592\r
593 ldmfd sp!, {r4-r11,lr}\r
594 bx lr\r
595\r
596.pool\r
597\r
598\r
599@ void DrawTilesFromCacheF(int *hc, struct PicoEState *est)\r
600\r
601.global DrawTilesFromCacheF\r
602\r
603DrawTilesFromCacheF:\r
604 stmfd sp!, {r4-r11,lr}\r
605\r
606 mov r9, #0xff000000 @ r9=prevcode=-1\r
607 mvn r6, #0 @ r6=prevy=-1\r
608\r
609 ldr r7, [r1, #OFS_EST_rendstatus]\r
610 ldr r4, [r1, #OFS_EST_Draw2FB]\r
611 ldr r11,[r1, #OFS_EST_Draw2Start]\r
612 ldr r2, [r0], #4 @ read y offset\r
613 tst r7, #PDRAW_BORDER_32 @ H32 border mode?\r
614 tstne r7, #PDRAW_32_COLS\r
615 addne r4, r4, #32\r
616 mov r7, #328\r
617 mla r2, r7, r2, r4\r
618 sub r12, r2, #(328*8*START_ROW) @ r12=scrpos\r
619\r
620 ldr r10, [r1, #OFS_EST_PicoMem_vram]\r
621 mov r8, r0 @ hc\r
622 mov r0, #0xf\r
623\r
624 @ scratch: r4, r7\r
625 @ *hcache++ = code|(dx<<16)|(trow<<27); // cache it\r
626\r
627.dtfcf_loop:\r
628 ldr r7, [r8], #4 @ read code\r
629 movs r1, r7, lsr #16 @ r1=dx;\r
630 ldmeqfd sp!, {r4-r11,pc} @ dx is never zero, this must be a terminator, return\r
631\r
632 @ row changed?\r
633 cmp r6, r7, lsr #27\r
634 movne r6, r7, lsr #27\r
635 subne r6, r6, r11\r
636 movne r4, #328*8\r
637 mlane r5, r4, r6, r12 @ r5=pd = scrpos + (prevy-Draw2Start)*328*8\r
638\r
639 bic r1, r1, #0xf800\r
640 add r1, r5, r1 @ r1=pdest (halfwords)\r
641\r
642 mov r7, r7, lsl #16\r
643 mov r7, r7, lsr #16\r
644\r
645 cmp r7, r9, lsr #8\r
646 bne .dtfcf_notsamecode\r
647 @ we know stuff about this tile already\r
648 tst r9, #1\r
649 bne .dtfcf_loop @ empty tile\r
650 tst r9, #2\r
651 bne .dtfcf_singlecolor @ singlecolor tile\r
652 b .dtfcf_samecode\r
653\r
654.dtfcf_notsamecode:\r
655 and r4, r9, #0x600000\r
656 mov r9, r7, lsl #8 @ remember new code\r
657\r
658 @ update cram val\r
659 and r7, r7, #0x6000\r
660 mov r3, r7, asr #9 @ r3=pal=((code&0x6000)>>9);\r
661\r
662\r
663.dtfcf_samecode:\r
664\r
665 tst r9, #0x100000 @ vflip?\r
666 bne .dtfcf_vflip\r
667\r
668 tst r9, #0x080000 @ hflip?\r
669 bne .dtfcf_hflip\r
670\r
671 @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf\r
672 Tile 0, 0\r
673 b .dtfcf_loop\r
674\r
675.dtfcf_hflip:\r
676 Tile 1, 0\r
677 b .dtfcf_loop\r
678\r
679.dtfcf_vflip:\r
680 tst r9, #0x080000 @ hflip?\r
681 bne .dtfcf_vflip_hflip\r
682\r
683 Tile 0, 1\r
684 b .dtfcf_loop\r
685\r
686.dtfcf_vflip_hflip:\r
687 Tile 1, 1\r
688 b .dtfcf_loop\r
689\r
690.dtfcf_singlecolor:\r
691 TileSinglecol\r
692 b .dtfcf_loop\r
693\r
694.pool\r
695\r
696\r
697@ @@@@@@@@@@@@@@@\r
698\r
699@ (tile_start<<16)|row_start\r
700@ void DrawWindowFull(int start, int end, int prio, struct PicoEState *est)\r
701\r
702.global DrawWindowFull\r
703\r
704DrawWindowFull:\r
705 stmfd sp!, {r4-r11,lr}\r
706\r
707 ldr r11, [r3, #OFS_EST_Pico]\r
708 ldrb r12, [r11, #OFS_Pico_video_reg+3] @ pvid->reg[3]\r
709 mov r12, r12, lsl #10\r
710\r
711 ldr r4, [r11, #OFS_Pico_video_reg+12]\r
712 mov r5, #1 @ nametab_step\r
713 ldr r11, [r3, #OFS_EST_Draw2FB]\r
714 ldr r6, [r3, #OFS_EST_Draw2Start]\r
715 tst r4, #1 @ 40 cell mode?\r
716 andne r12, r12, #0xf000 @ 0x3c<<10\r
717 movne r5, r5, lsl #7\r
718 bne 0f\r
719 ldr r7, [r3, #OFS_EST_rendstatus]\r
720 and r12, r12, #0xf800\r
721 mov r5, r5, lsl #6 @ nametab_step\r
722 tst r7, #PDRAW_BORDER_32\r
723 tstne r7, #PDRAW_32_COLS\r
724 addne r11, r11, #32 @ center screen in H32 mode\r
725\r
7260: and r4, r0, #0xff\r
727 sub r4, r4, r6\r
728 mla r12, r5, r4, r12 @ nametab += nametab_step*(start-Draw2Start];\r
729\r
730 ldr r10, [r3, #OFS_EST_PicoMem_vram]\r
731 mov r4, r0, lsr #16 @ r4=start_cell_h\r
732 add r7, r12, r4, lsl #1\r
733\r
734 @ fetch the first code now\r
735 ldrh r7, [r10, r7]\r
736 cmp r2, r7, lsr #15\r
737 ldmnefd sp!, {r4-r11,pc} @ hack: simply assume that whole window uses same priority\r
738\r
739 rsb r8, r4, r1, lsr #16 @ cells (h)\r
740 orr r8, r8, r4, lsl #8\r
741 mov r4, r1, lsl #24\r
742 sub r4, r4, r0, lsl #24\r
743 orr r8, r8, r4, lsr #8 @ r8=cells_h|(start_cell_h<<8)|(cells_v<<16)\r
744 sub r8, r8, #0x010000 @ adjust for algo\r
745\r
746 mov r9, #0xff000000 @ r9=prevcode=-1\r
747\r
748 and r4, r0, #0xff\r
749 add r11, r11, #328*8\r
750 sub r4, r4, r6\r
751 add r11, r11, #8\r
752\r
753 mov r7, #328*8\r
754 mla r11, r7, r4, r11 @ scrpos+=8*328*(start-Draw2Start);\r
755 mov r0, #0xf\r
756\r
757.dwfloop_outer:\r
758 and r6, r8, #0xff00 @ r6=tilex\r
759 add r1, r11, r6, lsr #5 @ r1=pdest\r
760 add r6, r12, r6, lsr #7\r
761 add r6, r10, r6 @ r6=Pico.vram+nametab+tilex\r
762 orr r8, r8, r8, lsl #24\r
763 sub r8, r8, #0x01000000 @ cell loop counter\r
764 b .dwfloop_enter\r
765\r
766 @ r4 & r7 are scratch in this loop\r
767.dwfloop:\r
768 add r1, r1, #8\r
769 subs r8, r8, #0x01000000\r
770 bmi .dwfloop_exit\r
771\r
772.dwfloop_enter:\r
773 ldrh r7, [r6], #2 @ r7=code\r
774\r
775 cmp r7, r9, lsr #8\r
776 bne .dwf_notsamecode\r
777 @ we know stuff about this tile already\r
778 tst r9, #1\r
779 bne .dwfloop @ empty tile\r
780 tst r9, #2\r
781 bne .dwf_singlecolor @ singlecolor tile\r
782 b .dwf_samecode\r
783\r
784.dwf_notsamecode:\r
785 and r4, r9, #0x600000\r
786 mov r9, r7, lsl #8 @ remember new code\r
787\r
788 @ update cram val\r
789 and r7, r7, #0x6000\r
790 mov r3, r7, asr #9 @ r3=pal=((code&0x6000)>>9);\r
791\r
792.dwf_samecode:\r
793\r
794 tst r9, #0x100000 @ vflip?\r
795 bne .dwf_vflip\r
796\r
797 tst r9, #0x080000 @ hflip?\r
798 bne .dwf_hflip\r
799\r
800 @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=PicoMem.vram)\r
801 @ r2,r4,r7: scratch, r0=0xf\r
802 Tile 0, 0\r
803 b .dwfloop\r
804\r
805.dwf_hflip:\r
806 Tile 1, 0\r
807 b .dwfloop\r
808\r
809.dwf_vflip:\r
810 tst r9, #0x080000 @ hflip?\r
811 bne .dwf_vflip_hflip\r
812\r
813 Tile 0, 1\r
814 b .dwfloop\r
815\r
816.dwf_vflip_hflip:\r
817 Tile 1, 1\r
818 b .dwfloop\r
819\r
820.dwf_singlecolor:\r
821 TileSinglecol 1\r
822 b .dwfloop\r
823\r
824.dwfloop_exit:\r
825 bic r8, r8, #0xff000000 @ fix r8\r
826 subs r8, r8, #0x010000\r
827 ldmmifd sp!, {r4-r11,pc}\r
828 add r11, r11, #328*8\r
829 add r12, r12, r5 @ nametab+=nametab_step\r
830 b .dwfloop_outer\r
831\r
832.pool\r
833\r
834\r
835@ ---------------- sprites ---------------\r
836\r
837.macro SpriteLoop hflip vflip\r
838.if \vflip\r
839 mov r1, r5, lsr #24 @ height\r
840 mov r0, #328*8\r
841 mla r11, r1, r0, r11 @ scrpos+=height*328*8;\r
842 add r12, r12, r1, lsl #3 @ sy+=height*8\r
843.endif\r
844 mov r0, #0xf\r
845.if \hflip\r
846 and r1, r5, #0xff\r
847 add r8, r8, r1, lsl #3 @ sx+=width*8\r
84858:\r
849 cmp r8, #336\r
850 blt 51f\r
851 add r9, r9, r5, lsr #16\r
852 sub r5, r5, #1 @ sub width\r
853 sub r8, r8, #8\r
854 b 58b\r
855.else\r
856 cmp r8, #0 @ skip tiles hidden on the left of screen\r
857 bgt 51f\r
85858:\r
859 add r9, r9, r5, lsr #16\r
860 sub r5, r5, #1\r
861 adds r8, r8, #8\r
862 ble 58b\r
863 b 51f\r
864.endif\r
865\r
86650: @ outer\r
867.if !\hflip\r
868 add r8, r8, #8 @ sx+=8\r
869.endif\r
870 bic r5, r5, #0xff000000 @ fix height\r
871 orr r5, r5, r5, lsl #16\r
872\r
87351: @ outer_enter\r
874 sub r5, r5, #1 @ width--\r
875 movs r1, r5, lsl #24\r
876 ldmmifd sp!, {r4-r11,pc} @ end of tile\r
877.if \hflip\r
878 subs r8, r8, #8 @ sx-=8\r
879 ldmlefd sp!, {r4-r11,pc} @ tile offscreen\r
880.else\r
881 cmp r8, #328\r
882 ldmgefd sp!, {r4-r11,pc} @ tile offscreen\r
883.endif\r
884 mov r6, r12 @ r6=sy\r
885 add r1, r11, r8 @ pdest=scrpos+sx\r
886 b 53f\r
887\r
88852: @ inner\r
889 add r9, r9, #1<<8 @ tile++\r
890.if !\vflip\r
891 add r6, r6, #8 @ sy+=8\r
892 add r1, r1, #328*8\r
893.endif\r
894\r
89553: @ inner_enter\r
896 @ end of sprite?\r
897 subs r5, r5, #0x01000000\r
898 bmi 50b @ ->outer\r
899.if \vflip\r
900 sub r6, r6, #8 @ sy-=8\r
901 sub r1, r1, #328*8\r
902.endif\r
903\r
904 @ offscreen?\r
905 cmp r6, #(START_ROW*8)\r
906 ble 52b\r
907\r
908 cmp r6, #(END_ROW*8+8)\r
909 bge 52b\r
910\r
911 @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=PicoMem.vram)\r
912 @ r2,r4,r7: scratch, r0=0xf\r
913 Tile \hflip, \vflip\r
914 b 52b\r
915.endm\r
916\r
917@ void DrawSpriteFull(unsigned int *sprite, struct PicoEState *est)\r
918\r
919.global DrawSpriteFull\r
920\r
921DrawSpriteFull:\r
922 stmfd sp!, {r4-r11,lr}\r
923\r
924 ldr r3, [r0] @ sprite[0]\r
925 mov r5, r3, lsl #4\r
926 mov r6, r5, lsr #30\r
927 add r6, r6, #1 @ r6=width\r
928 mov r5, r5, lsl #2\r
929 mov r5, r5, lsr #30\r
930 add r5, r5, #1 @ r5=height\r
931\r
932 mov r12, r3, lsl #23\r
933 mov r12, r12, lsr #23\r
934\r
935 ldr lr, [r0, #4] @ lr=code\r
936 sub r12, r12, #0x78 @ r12=sy\r
937 mov r8, lr, lsl #7\r
938 mov r8, r8, lsr #23\r
939 sub r8, r8, #0x78 @ r8=sx\r
940\r
941 mov r9, lr, lsl #21\r
942 mov r9, r9, lsr #13 @ r9=tile<<8\r
943\r
944 and r3, lr, #0x6000\r
945 mov r3, r3, lsr #9 @ r3=pal=((code>>9)&0x30);\r
946\r
947 ldr r0, [r1, #OFS_EST_rendstatus]\r
948 ldr r11, [r1, #OFS_EST_Draw2FB]\r
949 ldr r2, [r1, #OFS_EST_Draw2Start]\r
950 ldr r10, [r1, #OFS_EST_PicoMem_vram]\r
951 tst r0, #PDRAW_BORDER_32 @ H32 border mode?\r
952 tstne r0, #PDRAW_32_COLS\r
953 addne r11, r11, #32\r
954 sub r12, r12, r2, lsl #3\r
955 mov r0, #328\r
956 mla r11, r12, r0, r11 @ scrpos+=(sy-Draw2Start*8)*328;\r
957\r
958 orr r5, r5, r5, lsl #16 @\r
959 orr r5, r6, r5, lsl #8 @ r5=width|(height<<8)|(height<<24)\r
960\r
961 tst lr, #0x1000 @ vflip?\r
962 bne .dsf_vflip\r
963\r
964 tst lr, #0x0800 @ hflip?\r
965 bne .dsf_hflip\r
966\r
967 SpriteLoop 0, 0\r
968\r
969.dsf_hflip:\r
970 SpriteLoop 1, 0\r
971\r
972.dsf_vflip:\r
973 tst lr, #0x0800 @ hflip?\r
974 bne .dsf_vflip_hflip\r
975\r
976 SpriteLoop 0, 1\r
977\r
978.dsf_vflip_hflip:\r
979 SpriteLoop 1, 1\r
980\r
981.pool\r
982\r
983@ vim:filetype=armasm\r