scroll size improvement
[picodrive.git] / pico / draw_arm.S
CommitLineData
cff531af 1/*\r
2 * assembly optimized versions of most funtions from draw.c\r
ea38612f 3 * (C) notaz, 2006-2010,2017\r
cff531af 4 *\r
5 * This work is licensed under the terms of MAME license.\r
6 * See COPYING file in the top-level directory.\r
7 *\r
8 * this is highly specialized, be careful if changing related C code!\r
9 */\r
cc68a136 10\r
ea38612f 11#include "pico_int_o32.h"\r
12\r
cc68a136 13.extern DrawStripInterlace\r
5a681086 14\r
283fec1b 15.equ PDRAW_SPRITES_MOVED, (1<<0)\r
16.equ PDRAW_WND_DIFF_PRIO, (1<<1)\r
17.equ PDRAW_ACC_SPRITES, (1<<2)\r
18.equ PDRAW_DIRTY_SPRITES, (1<<4)\r
19.equ PDRAW_PLANE_HI_PRIO, (1<<6)\r
20.equ PDRAW_SHHI_DONE, (1<<7)\r
cc68a136 21\r
22@ helper\r
23.macro TilePixel pat lsrr offs\r
24.if !\lsrr\r
25 ands r4, \pat, r2\r
26.else\r
27 ands r4, \pat, r2, lsr #\lsrr\r
28.endif\r
29 orrne r4, r3, r4\r
30 strneb r4, [r1,#\offs]\r
31.endm\r
32\r
33@ TileNorm (r1=pdest, r2=pixels8, r3=pal) r4: scratch, pat: register with helper pattern 0xf\r
34.macro TileNorm pat\r
35 TilePixel \pat, 12, 0 @ #0x0000f000\r
36 TilePixel \pat, 8, 1 @ #0x00000f00\r
37 TilePixel \pat, 4, 2 @ #0x000000f0\r
38 TilePixel \pat, 0, 3 @ #0x0000000f\r
39 TilePixel \pat, 28, 4 @ #0xf0000000\r
40 TilePixel \pat, 24, 5 @ #0x0f000000\r
41 TilePixel \pat, 20, 6 @ #0x00f00000\r
42 TilePixel \pat, 16, 7 @ #0x000f0000\r
43.endm\r
44\r
45@ TileFlip (r1=pdest, r2=pixels8, r3=pal) r4: scratch, pat: register with helper pattern 0xf\r
46.macro TileFlip pat\r
47 TilePixel \pat, 16, 0 @ #0x000f0000\r
48 TilePixel \pat, 20, 1 @ #0x00f00000\r
49 TilePixel \pat, 24, 2 @ #0x0f000000\r
50 TilePixel \pat, 28, 3 @ #0xf0000000\r
51 TilePixel \pat, 0, 4 @ #0x0000000f\r
52 TilePixel \pat, 4, 5 @ #0x000000f0\r
53 TilePixel \pat, 8, 6 @ #0x00000f00\r
54 TilePixel \pat, 12, 7 @ #0x0000f000\r
55.endm\r
56\r
57@ shadow/hilight mode\r
58\r
59@ this one is for hi priority layer\r
60.macro TilePixelShHP lsrr offs\r
61.if !\lsrr\r
62 ands r4, r12, r2\r
63.else\r
64 ands r4, r12, r2, lsr #\lsrr\r
65.endif\r
66 ldreqb r4, [r1,#\offs]\r
67 orrne r4, r3, r4\r
bfa12428 68 andeq r4, r4, #0xbf\r
07abbab1 69 strb r4, [r1,#\offs]\r
cc68a136 70.endm\r
71\r
7a7c6476 72@ TileNormShHP (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: register with helper pattern 0xf, touches r3 high bits\r
cc68a136 73.macro TileNormShHP\r
74 TilePixelShHP 12, 0 @ #0x0000f000\r
75 TilePixelShHP 8, 1 @ #0x00000f00\r
76 TilePixelShHP 4, 2 @ #0x000000f0\r
77 TilePixelShHP 0, 3 @ #0x0000000f\r
78 TilePixelShHP 28, 4 @ #0xf0000000\r
79 TilePixelShHP 24, 5 @ #0x0f000000\r
80 TilePixelShHP 20, 6 @ #0x00f00000\r
81 TilePixelShHP 16, 7 @ #0x000f0000\r
82.endm\r
83\r
7a7c6476 84@ TileFlipShHP (r1=pdest, r2=pixels8, r3=pal) r4: scratch, pat: register with helper pattern 0xf\r
cc68a136 85.macro TileFlipShHP\r
86 TilePixelShHP 16, 0 @ #0x000f0000\r
87 TilePixelShHP 20, 1 @ #0x00f00000\r
88 TilePixelShHP 24, 2 @ #0x0f000000\r
89 TilePixelShHP 28, 3 @ #0xf0000000\r
90 TilePixelShHP 0, 4 @ #0x0000000f\r
91 TilePixelShHP 4, 5 @ #0x000000f0\r
92 TilePixelShHP 8, 6 @ #0x00000f00\r
93 TilePixelShHP 12, 7 @ #0x0000f000\r
94.endm\r
95\r
96\r
97@ TileSingleSh (r1=pdest, r2=pixels8, r3=pal) r4,r7: scratch, r0=sx; r12: helper pattern 0xf\r
98.macro TileSingleSh\r
99 tst r0, #1 @ not aligned?\r
100 mov r7, #0x00c000\r
101 orr r7, r7, #0xc0\r
102 ldrneb r4, [r1]\r
103 ldreqh r4, [r1]\r
104 orr r4, r4, r7\r
105 strneb r4, [r1], #1\r
106 streqh r4, [r1], #2\r
107 ldrh r4, [r1]\r
108 orr r4, r4, r7\r
109 strh r4, [r1], #2\r
110 ldrh r4, [r1]\r
111 orr r4, r4, r7\r
112 strh r4, [r1], #2\r
113 ldrh r4, [r1]\r
114 orr r4, r4, r7\r
115 strh r4, [r1], #2\r
116 ldrneb r4, [r1]\r
117 orr r4, r4, r7\r
118 strneb r4, [r1], #1\r
119.endm\r
120\r
121@ TileSingleHi (r1=pdest, r2=pixels8, r3=pal) r4,r7: scratch, r0=sx, r12: register with helper pattern 0xf\r
122.macro TileSingleHi\r
123 tst r1, #1 @ not aligned?\r
124 mov r7, #0x008000\r
125 orr r7, r7, #0x80\r
126 ldrneb r4, [r1], #1\r
127 ldreqh r4, [r1], #2 @ 1ci\r
128 ldrh r12, [r1], #2\r
129 bic r4, r4, r7, lsr #1\r
130 orr r4, r4, r7\r
131 strneb r4, [r1, #-3]\r
132 streqh r4, [r1, #-4]\r
133 ldrh r4, [r1], #2\r
134 bic r12, r12, r7, lsr #1\r
135 orr r12, r12, r7\r
136 strh r12, [r1, #-4]\r
137 ldrh r12, [r1], #2\r
138 bic r4, r4, r7, lsr #1\r
139 orr r4, r4, r7\r
140 strh r4, [r1, #-4]\r
141 ldrneb r4, [r1]\r
142 bic r12, r12, r7, lsr #1\r
143 orr r12, r12, r7\r
144 strh r12, [r1, #-2]\r
145 bicne r4, r4, r7, lsr #1\r
146 orrne r4, r4, r7\r
147 strneb r4, [r1], #1\r
148 mov r12, #0xf\r
149.endm\r
150\r
151.macro TileDoShGenPixel shift ofs\r
152.if \shift\r
153 ands r4, r12, r2, lsr #\shift\r
154.else\r
155 ands r4, r12, r2\r
156.endif\r
07abbab1 157 beq 0f\r
cc68a136 158 cmp r4, #0xe\r
bfa12428 159 ldrgeb r7, [r1,#\ofs]\r
160 orrlt r7, r3, r4 @ normal\r
07abbab1 161\r
bfa12428 162 bicge r7, r7, #0xc0\r
163 orrge r7, r7, r4, lsl #6\r
164 strb r7, [r1,#\ofs]\r
07abbab1 1650:\r
cc68a136 166.endm\r
167\r
168@ TileFlipSh (r1=pdest, r2=pixels8, r3=pal) r4,r7: scratch, r0=sx, r12: register with helper pattern 0xf\r
169.macro TileFlipSh\r
170 TileDoShGenPixel 16, 0 @ #0x000f0000\r
171 TileDoShGenPixel 20, 1 @ #0x00f00000\r
172 TileDoShGenPixel 24, 2 @ #0x0f000000\r
173 TileDoShGenPixel 28, 3 @ #0xf0000000\r
174 TileDoShGenPixel 0, 4 @ #0x0000000f\r
175 TileDoShGenPixel 4, 5 @ #0x000000f0\r
176 TileDoShGenPixel 8, 6 @ #0x00000f00\r
177 TileDoShGenPixel 12, 7 @ #0x0000f000\r
178.endm\r
179\r
180@ TileNormSh (r1=pdest, r2=pixels8, r3=pal) r4,r7: scratch, r0=sx, r12: register with helper pattern 0xf\r
181.macro TileNormSh\r
182 TileDoShGenPixel 12, 0 @ #0x0000f000\r
183 TileDoShGenPixel 8, 1 @ #0x00000f00\r
184 TileDoShGenPixel 4, 2 @ #0x000000f0\r
185 TileDoShGenPixel 0, 3 @ #0x0000000f\r
186 TileDoShGenPixel 28, 4 @ #0xf0000000\r
187 TileDoShGenPixel 24, 5 @ #0x0f000000\r
188 TileDoShGenPixel 20, 6 @ #0x00f00000\r
189 TileDoShGenPixel 16, 7 @ #0x000f0000\r
190.endm\r
191\r
bfa12428 192.macro TileDoShGenPixel_markop shift ofs\r
07abbab1 193.if \shift\r
bfa12428 194 ands r4, r12, r2, lsr #\shift\r
07abbab1 195.else\r
bfa12428 196 ands r4, r12, r2\r
07abbab1 197.endif\r
bfa12428 198 beq 0f\r
199 cmp r4, #0xe\r
200 ldrgeb r4, [r1,#\ofs]\r
201 orrlt r4, r3, r4\r
e54507e8 202 orrge r4, r4, #0x80\r
bfa12428 203 strb r4, [r1,#\ofs]\r
2040:\r
07abbab1 205.endm\r
206\r
e54507e8 207.macro TileFlipSh_markop\r
bfa12428 208 TileDoShGenPixel_markop 16, 0 @ #0x000f0000\r
209 TileDoShGenPixel_markop 20, 1 @ #0x00f00000\r
210 TileDoShGenPixel_markop 24, 2 @ #0x0f000000\r
211 TileDoShGenPixel_markop 28, 3 @ #0xf0000000\r
212 TileDoShGenPixel_markop 0, 4 @ #0x0000000f\r
213 TileDoShGenPixel_markop 4, 5 @ #0x000000f0\r
214 TileDoShGenPixel_markop 8, 6 @ #0x00000f00\r
215 TileDoShGenPixel_markop 12, 7 @ #0x0000f000\r
07abbab1 216.endm\r
217\r
e54507e8 218.macro TileNormSh_markop\r
bfa12428 219 TileDoShGenPixel_markop 12, 0 @ #0x0000f000\r
220 TileDoShGenPixel_markop 8, 1 @ #0x00000f00\r
221 TileDoShGenPixel_markop 4, 2 @ #0x000000f0\r
222 TileDoShGenPixel_markop 0, 3 @ #0x0000000f\r
223 TileDoShGenPixel_markop 28, 4 @ #0xf0000000\r
224 TileDoShGenPixel_markop 24, 5 @ #0x0f000000\r
225 TileDoShGenPixel_markop 20, 6 @ #0x00f00000\r
226 TileDoShGenPixel_markop 16, 7 @ #0x000f0000\r
07abbab1 227.endm\r
228\r
229.macro TileDoShGenPixel_onlyop_lp shift ofs\r
230.if \shift\r
231 ands r7, r12, r2, lsr #\shift\r
232.else\r
233 ands r7, r12, r2\r
234.endif\r
235 ldrneb r4, [r1,#\ofs]\r
07abbab1 236 cmp r7, #0xe\r
bfa12428 237 blt 0f\r
238\r
239 tst r4, #0xc0\r
240 bicne r4, r4, #0xc0\r
241 orrne r4, r4, r7, lsl #6\r
242 strneb r4, [r1,#\ofs]\r
07abbab1 2430:\r
244.endm\r
245\r
246.macro TileFlipSh_onlyop_lp\r
247 TileDoShGenPixel_onlyop_lp 16, 0 @ #0x000f0000\r
248 TileDoShGenPixel_onlyop_lp 20, 1 @ #0x00f00000\r
249 TileDoShGenPixel_onlyop_lp 24, 2 @ #0x0f000000\r
250 TileDoShGenPixel_onlyop_lp 28, 3 @ #0xf0000000\r
251 TileDoShGenPixel_onlyop_lp 0, 4 @ #0x0000000f\r
252 TileDoShGenPixel_onlyop_lp 4, 5 @ #0x000000f0\r
253 TileDoShGenPixel_onlyop_lp 8, 6 @ #0x00000f00\r
254 TileDoShGenPixel_onlyop_lp 12, 7 @ #0x0000f000\r
255.endm\r
256\r
257.macro TileNormSh_onlyop_lp\r
258 TileDoShGenPixel_onlyop_lp 12, 0 @ #0x0000f000\r
259 TileDoShGenPixel_onlyop_lp 8, 1 @ #0x00000f00\r
260 TileDoShGenPixel_onlyop_lp 4, 2 @ #0x000000f0\r
261 TileDoShGenPixel_onlyop_lp 0, 3 @ #0x0000000f\r
262 TileDoShGenPixel_onlyop_lp 28, 4 @ #0xf0000000\r
263 TileDoShGenPixel_onlyop_lp 24, 5 @ #0x0f000000\r
264 TileDoShGenPixel_onlyop_lp 20, 6 @ #0x00f00000\r
265 TileDoShGenPixel_onlyop_lp 16, 7 @ #0x000f0000\r
266.endm\r
267\r
cc68a136 268\r
269@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
270\r
271@ struct TileStrip\r
272@ {\r
273@ int nametab; // 0x00\r
274@ int line; // 0x04\r
275@ int hscroll; // 0x08\r
276@ int xmask; // 0x0C\r
277@ int *hc; // 0x10 (pointer to cache buffer)\r
278@ int cells; // 0x14\r
279@ };\r
280\r
ea38612f 281@ void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells,\r
282@ struct PicoEState *est)\r
cc68a136 283\r
83c093a4 284.global DrawLayer\r
cc68a136 285\r
286DrawLayer:\r
ea38612f 287 ldr r12, [sp] @ est\r
cc68a136 288 stmfd sp!, {r4-r11,lr}\r
289\r
ea38612f 290 ldr r11, [r12, #OFS_Pico_video]\r
cc68a136 291 mov r8, #1\r
292\r
83c093a4 293 ldrb r7, [r11, #16] @ ??vv??hh\r
cc68a136 294\r
295 mov r6, r1 @ hcache\r
83c093a4 296 orr r9, r3, r0, lsl #30\r
297 orr r9, r9, r2, lsl #8 @ r9=sh[31]|cellskip[15:8]|maxcells[7:0] (tmp)\r
cc68a136 298\r
299 mov r1, r7, lsl #4\r
300 orr r1, r1, #0x00ff\r
301\r
302 and r10, r7, #3\r
303 cmp r10, #1\r
304 biclt r1, r1, #0xfc00\r
305 biceq r1, r1, #0xfe00\r
eced0190 306 cmp r10, #2\r
307 moveq r1, #0x0007\r
308 movgt r1, #0x00ff @ r1=ymask=(height<<8)|0xff; ...; // Y Mask in pixels\r
cc68a136 309\r
310 add r10, r10, #5\r
311 cmp r10, #7\r
312 subge r10, r10, #1 @ r10=shift[width] (5,6,6,7)\r
313\r
ea38612f 314 ldr r2, [r12, #OFS_DrawScanline]\r
315 ldr lr, [r12, #OFS_Pico_vram]\r
cc68a136 316\r
317 @ Find name table:\r
83c093a4 318 ands r0, r0, #1\r
cc68a136 319 ldreqb r12, [r11, #2]\r
320 ldrneb r12, [r11, #4]\r
321\r
ea38612f 322 @ calculate xmask:\r
323 mov r5, r8, lsl r10\r
324 sub r5, r5, #1 @ r5=xmask\r
cc68a136 325\r
326 moveq r12, r12, lsl #10\r
327 movne r12, r12, lsl #13\r
328 and r12, r12, #(7<<13) @ r12=(ts->nametab<<1) (halfword compliant)\r
329\r
330 ldrh r8, [r11, #12]\r
331 ldrb r7, [r11, #11]\r
cc68a136 332\r
333 mov r4, r8, lsr #8 @ pvid->reg[13]\r
334 mov r4, r4, lsl #10 @ htab=pvid->reg[13]<<9; (halfwords)\r
335 tst r7, #2\r
b6d7ac70 336 addne r4, r4, r2, lsl #2 @ htab+=DrawScanline<<1; // Offset by line\r
cc68a136 337 tst r7, #1\r
338 biceq r4, r4, #0x1f @ htab&=~0xf; // Offset by tile\r
339 add r4, r4, r0, lsl #1 @ htab+=plane\r
340 bic r4, r4, #0x00ff0000 @ just in case\r
341 ldrh r3, [lr, r4] @ r3=hscroll\r
342\r
343 tst r7, #4\r
344 bne .DrawStrip_vsscroll\r
345\r
346 @ Get vertical scroll value:\r
347 add r7, lr, #0x012000\r
348 add r7, r7, #0x000180 @ r7=Pico.vsram (Pico+0x22180)\r
349 ldr r7, [r7]\r
350\r
351 tst r8, #2\r
352 tstne r8, #4\r
353 bne .DrawStrip_interlace\r
354\r
355 tst r0, r0\r
356 movne r7, r7, lsr #16\r
357\r
358 @ Find the line in the name table\r
359 add r2, r2, r7\r
360 and r2, r2, r1\r
361 mov r4, r2, lsr #3\r
362 add r10, r10, #1 @ shift[width]++\r
363 add r12, r12, r4, lsl r10 @ nametab+=(ts.line>>3)<<shift[width];\r
364\r
365 @ ldmia r0, {r1,r2,r3,r5,r6,r9} @ r2=line, r3=ts->hscroll, r5=ts->xmask, r6=ts->hc, r9=ts->cells\r
cc68a136 366\r
367 and r10,r2, #7\r
368 mov r10,r10, lsl #1 @ r10=ty=(ts->line&7)<<1;\r
369 orr r10,r10, r9, lsl #24\r
370\r
371 rsb r8, r3, #0\r
372 mov r8, r8, lsr #3 @ r8=tilex=(-ts->hscroll)>>3\r
373\r
374 sub r1, r3, #1\r
375 and r1, r1, #7\r
376 add r7, r1, #1 @ r7=dx=((ts->hscroll-1)&7)+1\r
377\r
378 tst r9, #1<<31\r
379 mov r3, #0\r
740da8c6 380 orrne r10,r10, #1<<23 @ r10=(cells<<24|sh<<23|hi_not_empty<<22|had_output<<21|ty)\r
cc68a136 381 movne r3, #0x40 @ default to shadowed pal on sh mode\r
382\r
cc68a136 383 cmp r7, #8\r
384 addne r10,r10, #0x01000000 @ we will loop cells+1 times if there is scroll\r
385\r
83c093a4 386 and r9, r9, #0xff00\r
387 add r8, r8, r9, lsr #8 @ tilex+=cellskip\r
388 add r7, r7, r9, lsr #5 @ dx+=cellskip<<3;\r
389 sub r10,r10,r9, lsl #16 @ cells-=cellskip\r
83c093a4 390\r
cc68a136 391 @ cache some stuff to avoid mem access\r
99bdfd31 392 ldr r11,[sp, #9*4] @ est\r
cc68a136 393 mov r0, #0xf\r
99bdfd31 394 ldr r11,[r11, #OFS_HighCol]\r
ea8c405f 395\r
396 mvn r9, #0 @ r9=prevcode=-1\r
99bdfd31 397 add r1, r11, r7 @ r1=pdest\r
cc68a136 398\r
399\r
400 @ r4 & r7 are scratch in this loop\r
401.dsloop_subr1:\r
402 sub r1, r1, #8\r
403.dsloop: @ 40-41 times\r
404 subs r10,r10, #0x01000000\r
405 bmi .dsloop_exit\r
406\r
407.dsloop_enter:\r
408 and r7, r5, r8\r
409 add r7, lr, r7, lsl #1 @ Pico.vram+((tilex&ts->xmask) as halfwords)\r
410 ldrh r7, [r7, r12] @ r7=code (int, but from unsigned, no sign extend)\r
411\r
412 add r1, r1, #8\r
413 add r8, r8, #1\r
414\r
415 tst r7, #0x8000\r
416 bne .DrawStrip_hiprio\r
417\r
418 cmp r7, r9\r
419 beq .DrawStrip_samecode @ we know stuff about this tile already\r
420\r
421 mov r9, r7 @ remember code\r
740da8c6 422 orr r10, r10, #1<<21 @ seen non hi-prio tile\r
cc68a136 423\r
424 movs r2, r9, lsl #20 @ if (code&0x1000)\r
425 mov r2, r2, lsl #1\r
426 add r2, r2, r10, lsl #17\r
427 mov r2, r2, lsr #17\r
428 eorcs r2, r2, #0x0e @ if (code&0x1000) addr^=0xe;\r
429\r
430 ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels\r
431\r
432 bic r7, r3, #0x3f\r
433 and r3, r9, #0x6000\r
434 add r3, r7, r3, lsr #9 @ r3=pal=((code&0x6000)>>9);\r
435\r
436.DrawStrip_samecode:\r
437 tst r2, r2\r
438 beq .dsloop @ tileline blank\r
439\r
440 cmp r2, r2, ror #4\r
441 beq .DrawStrip_SingleColor @ tileline singlecolor \r
442\r
443 tst r9, #0x0800\r
7a7c6476 444 bne .DrawStrip_TileFlip\r
cc68a136 445\r
446 @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r0: helper pattern\r
cc68a136 447.DrawStrip_TileNorm:\r
448 TileNorm r0\r
449 b .dsloop\r
450\r
7a7c6476 451.DrawStrip_TileFlip:\r
452 TileFlip r0\r
453 b .dsloop\r
454\r
cc68a136 455.DrawStrip_SingleColor:\r
456 and r4, r2, #0xf\r
457 orr r4, r3, r4\r
458 orr r4, r4, r4, lsl #8\r
459 tst r1, #1 @ not aligned?\r
460 strneb r4, [r1], #1\r
461 streqh r4, [r1], #2\r
462 strh r4, [r1], #2\r
463 strh r4, [r1], #2\r
464 strh r4, [r1], #2\r
465 strneb r4, [r1], #1 @ have a remaining unaligned pixel?\r
466 b .dsloop_subr1\r
467\r
cc68a136 468.DrawStrip_hiprio_maybempt:\r
469 cmp r7, r9\r
470 beq .dsloop @ must've been empty, otherwise we wouldn't get here\r
471 movs r2, r7, lsl #20 @ if (code&0x1000)\r
472 mov r2, r2, lsl #1\r
473 add r2, r2, r10, lsl #17\r
474 mov r2, r2, lsr #17\r
475 eorcs r2, r2, #0x0e @ if (code&0x1000) addr^=0xe;\r
476 ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels\r
477 mov r9, r7 @ remember code\r
478 tst r2, r2\r
740da8c6 479 beq .dsloop\r
480 orr r10, r10, #1<<22\r
481\r
482.DrawStrip_hiprio:\r
483 tst r10, #0x00c00000\r
484 beq .DrawStrip_hiprio_maybempt\r
485 sub r0, r1, r11\r
486 orr r7, r7, r0, lsl #16\r
487 orr r7, r7, r10, lsl #25 @ (ty<<25)\r
488 tst r7, #0x1000\r
489 eorne r7, r7, #7<<26 @ if(code&0x1000) cval^=7<<26;\r
490 str r7, [r6], #4 @ cache hi priority tile\r
491 mov r0, #0xf\r
cc68a136 492 b .dsloop\r
493\r
494.dsloop_exit:\r
740da8c6 495 tst r10, #1<<21 @ seen non hi-prio tile\r
ea38612f 496 ldr r1, [sp, #9*4] @ est\r
cc68a136 497 mov r0, #0\r
ea38612f 498 ldreq r2, [r1, #OFS_rendstatus]\r
cc68a136 499 str r0, [r6] @ terminate the cache list\r
283fec1b 500 orreq r2, r2, #PDRAW_PLANE_HI_PRIO @ had a layer with all hi-prio tiles\r
ea38612f 501 streq r2, [r1, #OFS_rendstatus]\r
cc68a136 502\r
503 ldmfd sp!, {r4-r11,lr}\r
504 bx lr\r
505\r
6d7acf9e 506@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
cc68a136 507\r
508.DrawStrip_vsscroll:\r
6d7acf9e 509 rsb r8, r3, #0\r
510 mov r8, r8, lsr #3 @ r8=tilex=(-ts->hscroll)>>3\r
7b802576 511 bic r8, r8, #0x3fc00000\r
512 orr r8, r8, r5, lsl #25 @ r8=(xmask[31:25]|had_output[24]|tilex[21:0])\r
cc68a136 513\r
ea38612f 514 ldr r11, [sp, #9*4] @ est\r
6d7acf9e 515 orr r5, r1, r10, lsl #24\r
ea38612f 516 ldr r4, [r11, #OFS_DrawScanline]\r
6d7acf9e 517 sub r1, r3, #1\r
518 orr r5, r5, r4, lsl #16 @ r5=(shift_width[31:24]|scanline[23:16]|ymask[15:0])\r
519 and r1, r1, #7\r
520 add r7, r1, #1 @ r7=dx=((ts->hscroll-1)&7)+1\r
cc68a136 521\r
6d7acf9e 522 mov r10,r9, lsl #16\r
83c093a4 523 tst r0, #1\r
6d7acf9e 524 orrne r10,r10, #0x8000\r
525 tst r9, #1<<31\r
526 mov r3, #0\r
527 orr r10,r10, #0xff000000 @ will be adjusted on entering loop\r
83c093a4 528 orrne r10,r10, #1<<23 @ r10=(cell[31:24]|sh[23]|hi_not_empty[22]|cells_max[21:16]|plane[15]|ty[14:0])\r
6d7acf9e 529 movne r3, #0x40 @ default to shadowed pal on sh mode\r
cc68a136 530\r
83c093a4 531 cmp r7, #8\r
532 subne r10,r10, #0x01000000 @ have hscroll, start with negative cell\r
533\r
534 and r9, r9, #0xff00\r
535 add r8, r8, r9, lsr #8 @ tilex+=cellskip\r
536 add r7, r7, r9, lsr #5 @ dx+=cellskip<<3;\r
537 add r10,r10,r9, lsl #16 @ cell+=cellskip\r
cc68a136 538\r
6d7acf9e 539 @ cache some stuff to avoid mem access\r
99bdfd31 540 ldr r11,[sp, #9*4] @ est\r
ea8c405f 541 mov r0, #0xf\r
99bdfd31 542 ldr r11,[r11, #OFS_HighCol]\r
6d7acf9e 543\r
ea8c405f 544 mvn r9, #0 @ r9=prevcode=-1\r
545 add r1, r11, r7 @ r1=pdest\r
6d7acf9e 546\r
547 @ r4 & r7 are scratch in this loop\r
548.dsloop_vs_subr1:\r
549 sub r1, r1, #8\r
550.dsloop_vs: @ 40-41 times\r
551 add r10,r10, #0x01000000\r
552 and r4, r10, #0x003f0000\r
553 cmp r4, r10, asr #8\r
740da8c6 554 ble .dsloop_vs_exit\r
6d7acf9e 555\r
556 @ calc offset and read tileline code to r7, also calc ty\r
557 add r7, lr, #0x012000\r
558 add r7, r7, #0x000180 @ r7=Pico.vsram (Pico+0x22180)\r
559 add r7, r7, r10,asr #23 @ vsram + ((cell&~1)<<1)\r
560 bic r7, r7, #3\r
561 tst r10,#0x8000 @ plane1?\r
562 addne r7, r7, #2\r
563 ldrh r7, [r7] @ r7=vscroll\r
564\r
565 bic r10,r10,#0xff @ clear old ty\r
7b802576 566 and r4, r5, #0xff0000 @ scanline\r
567 add r4, r4, r7, lsl #16 @ ... += vscroll\r
568 and r4, r4, r5, lsl #16 @ ... &= ymask\r
6d7acf9e 569 and r7, r4, #0x70000\r
570 orr r10,r10,r7, lsr #15 @ new ty\r
571\r
572 mov r4, r4, lsr #19\r
573 mov r7, r5, lsr #24\r
574 mov r4, r4, lsl r7 @ nametabadd\r
575\r
576 and r7, r8, r8, lsr #25\r
577 add r7, lr, r7, lsl #1 @ Pico.vram+((tilex&ts->xmask) as halfwords)\r
578 add r7, r7, r4, lsl #1\r
579 ldrh r7, [r7, r12] @ r7=code (int, but from unsigned, no sign extend)\r
580\r
581 add r1, r1, #8\r
582 add r8, r8, #1\r
583\r
584 tst r7, #0x8000\r
585 bne .DrawStrip_vs_hiprio\r
586\r
587 cmp r7, r9\r
588 beq .DrawStrip_vs_samecode @ we know stuff about this tile already\r
589\r
590 mov r9, r7 @ remember code\r
7b802576 591 orr r8, r8, #(1<<24)@ seen non hi-prio tile\r
6d7acf9e 592\r
593 movs r2, r9, lsl #20 @ if (code&0x1000)\r
594 mov r2, r2, lsl #1\r
595 add r2, r2, r10, lsl #17\r
596 mov r2, r2, lsr #17\r
597 eorcs r2, r2, #0x0e @ if (code&0x1000) addr^=0xe;\r
598\r
599 ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels\r
600\r
601 bic r7, r3, #0x3f\r
602 and r3, r9, #0x6000\r
603 add r3, r7, r3, lsr #9 @ r3=pal=((code&0x6000)>>9);\r
604\r
605.DrawStrip_vs_samecode:\r
606 tst r2, r2\r
607 beq .dsloop_vs @ tileline blank\r
608\r
609 cmp r2, r2, ror #4\r
610 beq .DrawStrip_vs_SingleColor @ tileline singlecolor \r
611\r
612 tst r9, #0x0800\r
7a7c6476 613 bne .DrawStrip_vs_TileFlip\r
6d7acf9e 614\r
615 @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r0: helper pattern\r
6d7acf9e 616.DrawStrip_vs_TileNorm:\r
617 TileNorm r0\r
618 b .dsloop_vs\r
619\r
7a7c6476 620.DrawStrip_vs_TileFlip:\r
621 TileFlip r0\r
622 b .dsloop_vs\r
623\r
6d7acf9e 624.DrawStrip_vs_SingleColor:\r
625 and r4, r2, #0xf\r
626 orr r4, r3, r4\r
627 orr r4, r4, r4, lsl #8\r
628 tst r1, #1 @ not aligned?\r
629 strneb r4, [r1], #1\r
630 streqh r4, [r1], #2\r
631 strh r4, [r1], #2\r
632 strh r4, [r1], #2\r
633 strh r4, [r1], #2\r
634 strneb r4, [r1], #1 @ have a remaining unaligned pixel?\r
635 b .dsloop_vs_subr1\r
636\r
637.DrawStrip_vs_hiprio:\r
638 tst r10, #0x00c00000\r
639 beq .DrawStrip_vs_hiprio_maybempt\r
640 sub r0, r1, r11\r
641 orr r7, r7, r0, lsl #16\r
642 orr r7, r7, r10, lsl #25 @ (ty<<25)\r
643 tst r7, #0x1000\r
644 eorne r7, r7, #7<<26 @ if(code&0x1000) cval^=7<<26;\r
645 str r7, [r6], #4 @ cache hi priority tile\r
646 mov r0, #0xf\r
647 b .dsloop_vs\r
648\r
649.DrawStrip_vs_hiprio_maybempt:\r
650 cmp r7, r9\r
651 beq .dsloop_vs @ must've been empty, otherwise we wouldn't get here\r
652 movs r2, r7, lsl #20 @ if (code&0x1000)\r
653 mov r2, r2, lsl #1\r
654 add r2, r2, r10, lsl #17\r
655 mov r2, r2, lsr #17\r
656 eorcs r2, r2, #0x0e @ if (code&0x1000) addr^=0xe;\r
657 ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels\r
658 mov r9, r7 @ remember code\r
659 tst r2, r2\r
660 orrne r10, r10, #1<<22\r
661 bne .DrawStrip_vs_hiprio\r
662 b .dsloop_vs\r
663\r
740da8c6 664.dsloop_vs_exit:\r
7b802576 665 tst r8, #(1<<24) @ seen non hi-prio tile\r
ea38612f 666 ldr r1, [sp, #9*4] @ est\r
740da8c6 667 mov r0, #0\r
ea38612f 668 ldreq r2, [r1, #OFS_rendstatus]\r
740da8c6 669 str r0, [r6] @ terminate the cache list\r
283fec1b 670 orreq r2, r2, #PDRAW_PLANE_HI_PRIO @ had a layer with all hi-prio tiles\r
ea38612f 671 streq r2, [r1, #OFS_rendstatus]\r
740da8c6 672\r
673 ldmfd sp!, {r4-r11,lr}\r
674 bx lr\r
675\r
6d7acf9e 676\r
677@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
cc68a136 678\r
679@ interlace mode 2? Sonic 2?\r
680.DrawStrip_interlace:\r
681 tst r0, r0\r
682 moveq r7, r7, lsl #21\r
683 movne r7, r7, lsl #5\r
684\r
685 @ Find the line in the name table\r
b6d7ac70 686 add r2, r7, r2, lsl #22 @ r2=(vscroll+(DrawScanline<<1))<<21 (11 bits);\r
cc68a136 687 orr r1, r1, #0x80000000\r
688 and r2, r2, r1, ror #10 @ &((ymask<<1)|1)<<21;\r
689 mov r2, r2, lsr #21\r
690 mov r4, r2, lsr #4\r
691 mov r12, r12, lsr #1 @ halfwords\r
692 add r0, r12, r4, lsl r10 @ nametab+=(ts.line>>4)<<shift[width];\r
693 and r9, r9, #0xff\r
694\r
695 sub sp, sp, #6*4\r
696 stmia sp, {r0,r2,r3,r5,r6,r9}\r
697\r
698 mov r0, sp\r
699 bl DrawStripInterlace @ struct TileStrip *ts\r
700\r
701 add sp, sp, #6*4\r
702 ldmfd sp!, {r4-r11,lr}\r
703 bx lr\r
704\r
705.pool\r
706\r
707@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
708\r
99bdfd31 709@ void BackFill(int reg7, int sh, struct PicoEState *est)\r
cc68a136 710\r
99bdfd31 711.global BackFill\r
cc68a136 712\r
713BackFill:\r
714 stmfd sp!, {r4-r9,lr}\r
715\r
ea8c405f 716 mov r0, r0, lsl #26\r
99bdfd31 717 ldr lr, [r2, #OFS_HighCol]\r
ea8c405f 718 mov r0, r0, lsr #26\r
719 add lr, lr, #8\r
ea8c405f 720\r
cc68a136 721 orr r0, r0, r1, lsl #6\r
722 orr r0, r0, r0, lsl #8\r
723 orr r0, r0, r0, lsl #16\r
724\r
725 mov r1, r0\r
726 mov r2, r0\r
727 mov r3, r0\r
728 mov r4, r0\r
729 mov r5, r0\r
730 mov r6, r0\r
731 mov r7, r0\r
732\r
733 @ go go go!\r
734 stmia lr!, {r0-r7} @ 10*8*4\r
735 stmia lr!, {r0-r7}\r
736 stmia lr!, {r0-r7}\r
737 stmia lr!, {r0-r7}\r
738 stmia lr!, {r0-r7}\r
739 stmia lr!, {r0-r7}\r
740 stmia lr!, {r0-r7}\r
741 stmia lr!, {r0-r7}\r
742 stmia lr!, {r0-r7}\r
743 stmia lr!, {r0-r7}\r
744\r
99bdfd31 745 ldmfd sp!, {r4-r9,lr}\r
746 bx lr\r
cc68a136 747\r
748\r
749@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
750\r
ea38612f 751@ void DrawTilesFromCache(int *hc, int sh, int rlim, struct PicoEState *est)\r
cc68a136 752\r
ea38612f 753.global DrawTilesFromCache\r
cc68a136 754\r
755DrawTilesFromCache:\r
ea38612f 756 stmfd sp!, {r4-r9,r11,lr}\r
cc68a136 757\r
cc68a136 758 @ cache some stuff to avoid mem access\r
99bdfd31 759 ldr r11,[r3, #OFS_HighCol]\r
cc68a136 760 mov r12,#0xf\r
ea38612f 761 ldr lr, [r3, #OFS_Pico_vram]\r
762 mov r9, r3 @ est\r
cc68a136 763\r
740da8c6 764 mvn r5, #0 @ r5=prevcode=-1\r
7a7c6476 765 ands r8, r1, #1\r
766 orr r8, r8, r2, lsl #1\r
740da8c6 767 bne .dtfc_check_rendflags\r
768\r
cc68a136 769 @ scratch: r4, r7\r
770.dtfc_loop:\r
771 ldr r6, [r0], #4 @ read code\r
772 movs r1, r6, lsr #16 @ r1=dx;\r
ea38612f 773 ldmeqfd sp!, {r4-r9,r11,pc} @ dx is never zero, this must be a terminator, return\r
7a7c6476 774 bic r4, r1, #0xfe00\r
775 add r1, r11, r4 @ r1=pdest\r
cc68a136 776\r
777 mov r7, r6, lsl #16\r
778 cmp r5, r7, lsr #16\r
779 beq .dtfc_samecode @ if (code==prevcode)\r
780\r
781 mov r5, r7, lsr #16\r
782\r
783 mov r2, r5, lsl #21\r
784 mov r2, r2, lsr #17 @ r2=addr=(code&0x7ff)<<4;\r
785 add r2, r2, r6, lsr #25 @ addr+=ty\r
786\r
787 and r3, r5, #0x6000\r
788 mov r3, r3, lsr #9 @ r3=pal=((code&0x6000)>>9);\r
789\r
790 ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels\r
791\r
792.dtfc_samecode:\r
7a7c6476 793 rsbs r4, r4, r8, lsr #1\r
794 bmi .dtfc_cut_tile\r
795\r
796 tst r8, #1\r
cc68a136 797 bne .dtfc_shadow\r
798\r
799 tst r2, r2\r
800 beq .dtfc_loop\r
801\r
802 cmp r2, r2, ror #4\r
803 beq .dtfc_SingleColor @ tileline singlecolor \r
804\r
805 tst r5, #0x0800\r
7a7c6476 806 bne .dtfc_TileFlip\r
cc68a136 807\r
808 @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern\r
cc68a136 809.dtfc_TileNorm:\r
810 TileNorm r12\r
811 b .dtfc_loop\r
812\r
7a7c6476 813.dtfc_TileFlip:\r
814 TileFlip r12\r
815 b .dtfc_loop\r
816\r
cc68a136 817.dtfc_SingleColor:\r
818 and r4, r2, #0xf\r
819 orr r4, r3, r4\r
820 orr r4, r4, r4, lsl #8\r
821 tst r1, #1 @ not aligned?\r
822 strneb r4, [r1], #1\r
823 streqh r4, [r1], #2\r
824 strh r4, [r1], #2\r
825 strh r4, [r1], #2\r
826 strh r4, [r1], #2\r
827 strneb r4, [r1], #1 @ have a remaining unaligned pixel?\r
828 b .dtfc_loop\r
829\r
830.dtfc_shadow:\r
831 tst r2, r2\r
832 beq .dtfc_shadow_blank\r
833\r
834 cmp r2, r2, ror #4\r
835 beq .dtfc_SingleColor @ tileline singlecolor \r
836\r
837 tst r5, #0x0800\r
7a7c6476 838 bne .dtfc_TileFlipShHP\r
cc68a136 839\r
840 @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern\r
cc68a136 841.dtfc_TileNormShHP:\r
842 TileNormShHP\r
843 b .dtfc_loop\r
844\r
7a7c6476 845.dtfc_TileFlipShHP:\r
846 TileFlipShHP\r
847 b .dtfc_loop\r
848\r
cc68a136 849.dtfc_shadow_blank:\r
07abbab1 850 tst r1, #1\r
851 ldrneb r4, [r1]\r
bfa12428 852 mov r6, #0xbf\r
853 and r4, r4, #0xbf\r
07abbab1 854 strneb r4, [r1], #1\r
855 ldrh r4, [r1]\r
856 orr r6, r6, r6, lsl #8\r
857 and r4, r4, r6\r
858 strh r4, [r1], #2\r
859 ldrh r4, [r1]\r
860 and r4, r4, r6\r
861 strh r4, [r1], #2\r
862 ldrh r4, [r1]\r
863 and r4, r4, r6\r
864 strh r4, [r1], #2\r
865 ldrh r4, [r1]\r
866 and r4, r4, r6\r
867 streqh r4, [r1]\r
868 strneb r4, [r1]\r
cc68a136 869 b .dtfc_loop\r
870\r
7a7c6476 871.dtfc_cut_tile:\r
872 add r4, r4, #7 @ 0-6\r
873 mov r4, r4, lsl #2\r
874 mov r12,#0xf<<28\r
875 mov r12,r12,asr r4\r
876 mov r2, r2, ror #16\r
877 tst r5, #0x0800 @ flipped?\r
878 mvnne r12,r12\r
879 and r2, r2, r12\r
880 mov r2, r2, ror #16\r
881 mov r12,#0xf\r
882 tst r8, #1\r
883 bne .dtfc_shadow\r
884 tst r2, r2\r
885 beq .dtfc_loop\r
886 tst r5, #0x0800\r
887 beq .dtfc_TileNorm\r
888 b .dtfc_TileFlip\r
889\r
740da8c6 890@ check if we have detected layer covered with hi-prio tiles:\r
891.dtfc_check_rendflags:\r
ea38612f 892 ldr r2, [r9, #OFS_rendstatus]\r
283fec1b 893 tst r2, #(PDRAW_PLANE_HI_PRIO|PDRAW_SHHI_DONE)\r
740da8c6 894 beq .dtfc_loop\r
7a7c6476 895 bic r8, r8, #1 @ sh/hi mode off\r
283fec1b 896 tst r2, #PDRAW_SHHI_DONE\r
740da8c6 897 bne .dtfc_loop @ already processed\r
283fec1b 898 orr r2, r2, #PDRAW_SHHI_DONE\r
ea38612f 899 str r2, [r9, #OFS_rendstatus]\r
740da8c6 900\r
901 add r1, r11,#8\r
07abbab1 902 mov r3, #320/4/4\r
bfa12428 903 mov r6, #0xbf\r
740da8c6 904 orr r6, r6, r6, lsl #8\r
905 orr r6, r6, r6, lsl #16\r
906.dtfc_loop_shprep:\r
07abbab1 907 ldmia r1, {r2,r4,r5,r7}\r
740da8c6 908 subs r3, r3, #1\r
07abbab1 909 and r2, r2, r6\r
910 and r4, r4, r6\r
911 and r5, r5, r6\r
912 and r7, r7, r6\r
913 stmia r1!,{r2,r4,r5,r7}\r
914 bne .dtfc_loop_shprep\r
915\r
916 mvn r5, #0 @ r5=prevcode=-1\r
917 b .dtfc_loop\r
740da8c6 918\r
cc68a136 919.pool\r
920\r
921@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
922\r
923\r
ea38612f 924@ void DrawSpritesSHi(unsigned char *sprited, struct PicoEState *est)\r
925\r
926.global DrawSpritesSHi\r
e352c3af 927\r
928DrawSpritesSHi:\r
fbc65db7 929 ldr r3, [r0]\r
930 mov r12,#0xff\r
e352c3af 931 ands r3, r3, #0x7f\r
932 bxeq lr\r
cc68a136 933\r
ea38612f 934 stmfd sp!, {r1,r4-r11,lr} @ +est\r
fbc65db7 935 strb r12,[r0,#2] @ set end marker\r
936 add r10,r0, #3 @ r10=HighLnSpr end\r
e352c3af 937 add r10,r10,r3 @ r10=HighLnSpr end\r
938\r
99bdfd31 939 ldr r11,[r1, #OFS_HighCol]\r
ea8c405f 940 mov r12,#0xf\r
ea38612f 941 ldr lr, [r1, #OFS_Pico_vram]\r
cc68a136 942\r
cc68a136 943\r
e352c3af 944DrawSpriteSHi:\r
945 @ draw next sprite\r
946 ldrb r0, [r10,#-1]!\r
ea38612f 947 ldr r7, [sp] @ est\r
99bdfd31 948 ldr r1, [r7, #OFS_HighPreSpr]\r
e352c3af 949 cmp r0, #0xff\r
ea38612f 950 ldmeqfd sp!, {r1,r4-r11,pc} @ end of list\r
e352c3af 951 and r0, r0, #0x7f\r
952 add r0, r1, r0, lsl #3\r
953\r
954 ldr r9, [r0, #4] @ sprite[1]\r
955 mov r2, r9, asr #16 @ r2=sx\r
cc68a136 956\r
e352c3af 957 mov r9, r9, lsl #16\r
958 mov r3, r9, lsr #31 @ priority\r
959 mov r9, r9, lsr #16\r
fbc65db7 960@ orr r9, r9, r8, lsl #31 @ r9=code|sh[31] @@ sh is always on here now\r
e352c3af 961 and r4, r9, #0x6000\r
962 orr r9, r9, r4, lsl #16\r
fbc65db7 963 orr r9, r9, #0x90000000 @ r9=scc1 ???? ... <code> (s=shadow/hilight, cc=pal)\r
e352c3af 964 cmp r12,r9, lsr #28 @ sh/hi with pal3?\r
e54507e8 965 cmpne r3, #1 @ if not, is it hi prio?\r
e352c3af 966 bne DrawSpriteSHi @ non-operator low sprite, already drawn\r
cc68a136 967\r
e352c3af 968 ldr r3, [r0] @ sprite[0]\r
e352c3af 969 mov r6, r3, lsr #28\r
970 sub r6, r6, #1 @ r6=width-1 (inc later)\r
971 mov r5, r3, lsr #24\r
972 and r5, r5, #7 @ r5=height\r
cc68a136 973\r
ea38612f 974 ldr r7, [r7, #OFS_DrawScanline]\r
e352c3af 975 mov r0, r3, lsl #16 @ r4=sy<<16 (tmp)\r
cc68a136 976\r
e352c3af 977 sub r7, r7, r0, asr #16 @ r7=row=DrawScanline-sy\r
cc68a136 978\r
e352c3af 979 tst r9, #0x1000\r
980 movne r0, r5, lsl #3\r
981 subne r0, r0, #1\r
982 subne r7, r0, r7 @ if (code&0x1000) row=(height<<3)-1-row; // Flip Y\r
cc68a136 983\r
e352c3af 984 add r8, r9, r7, lsr #3 @ tile+=row>>3; // Tile number increases going down\r
985 tst r9, #0x0800\r
986 mlane r8, r5, r6, r8 @ if (code&0x0800) { tile+=delta*(width-1);\r
987 rsbne r5, r5, #0 @ delta=-delta; } // r5=delta now\r
cc68a136 988\r
e352c3af 989 mov r8, r8, lsl #21\r
990 mov r8, r8, lsr #17\r
991 and r7, r7, #7\r
992 add r8, r8, r7, lsl #1 @ tile+=(row&7)<<1; // Tile address\r
993\r
994 mov r5, r5, lsl #4 @ delta<<=4; // Delta of address\r
e352c3af 995 mov r3, r4, lsr #9 @ r3=pal=((code>>9)&0x30);\r
996\r
997 add r6, r6, #1 @ inc now\r
998 adds r0, r2, #0 @ mov sx to r0 and set ZV flags\r
999 b .dsprShi_loop_enter\r
1000\r
1001.dsprShi_loop:\r
1002 subs r6, r6, #1 @ width--\r
1003 beq DrawSpriteSHi\r
1004 adds r0, r0, #8 @ sx+=8\r
1005 add r8, r8, r5 @ tile+=delta\r
1006\r
1007.dsprShi_loop_enter:\r
1008 ble .dsprShi_loop @ sx <= 0\r
cc68a136 1009 cmp r0, #328\r
e352c3af 1010 bge DrawSpriteSHi\r
cc68a136 1011\r
1012 mov r8, r8, lsl #17\r
e352c3af 1013 mov r8, r8, lsr #17 @ tile&=0x7fff; // Clip tile address\r
cc68a136 1014\r
e352c3af 1015 ldr r2, [lr, r8, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels\r
1016 add r1, r11, r0 @ r1=pdest\r
cc68a136 1017 tst r2, r2\r
e352c3af 1018 beq .dsprShi_loop\r
cc68a136 1019\r
e352c3af 1020 cmp r12, r9, lsr #28\r
1021 beq .dsprShi_shadow\r
cc68a136 1022\r
1023 cmp r2, r2, ror #4\r
e352c3af 1024 beq .dsprShi_SingleColor @ tileline singlecolor \r
cc68a136 1025\r
e352c3af 1026 tst r9, #0x0800\r
1027 bne .dsprShi_TileFlip\r
cc68a136 1028\r
e352c3af 1029 @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern\r
1030@ scratch: r4, r7\r
1031.dsprShi_TileNorm:\r
cc68a136 1032 TileNorm r12\r
e352c3af 1033 b .dsprShi_loop\r
cc68a136 1034\r
e352c3af 1035.dsprShi_TileFlip:\r
7a7c6476 1036 TileFlip r12\r
e352c3af 1037 b .dsprShi_loop\r
7a7c6476 1038\r
e352c3af 1039.dsprShi_SingleColor:\r
cc68a136 1040 and r4, r2, #0xf\r
1041 orr r4, r3, r4\r
1042 orr r4, r4, r4, lsl #8\r
e352c3af 1043 tst r0, #1 @ not aligned?\r
cc68a136 1044 strneb r4, [r1], #1\r
1045 streqh r4, [r1], #2\r
1046 strh r4, [r1], #2\r
1047 strh r4, [r1], #2\r
1048 strh r4, [r1], #2\r
1049 strneb r4, [r1], #1\r
e352c3af 1050 b .dsprShi_loop\r
cc68a136 1051\r
e352c3af 1052.dsprShi_shadow:\r
1053 tst r9, #0x8000\r
1054 beq .dsprShi_shadow_lowpri\r
07abbab1 1055\r
cc68a136 1056 cmp r2, r2, ror #4\r
e352c3af 1057 beq .dsprShi_singlec_sh\r
cc68a136 1058\r
e352c3af 1059 tst r9, #0x0800\r
1060 bne .dsprShi_TileFlip_sh\r
cc68a136 1061\r
e54507e8 1062 @ (r1=pdest, r2=pixels8, r3=pal) r4, r7: scratch, r12: helper pattern\r
e352c3af 1063.dsprShi_TileNorm_sh:\r
cc68a136 1064 TileNormSh\r
e352c3af 1065 b .dsprShi_loop\r
cc68a136 1066\r
e352c3af 1067.dsprShi_TileFlip_sh:\r
7a7c6476 1068 TileFlipSh\r
e352c3af 1069 b .dsprShi_loop\r
7a7c6476 1070\r
e352c3af 1071.dsprShi_singlec_sh:\r
cc68a136 1072 cmp r2, #0xe0000000\r
e352c3af 1073 bcc .dsprShi_SingleColor @ normal singlecolor tileline (carry inverted in ARM)\r
cc68a136 1074 tst r2, #0x10000000\r
e352c3af 1075 bne .dsprShi_sh_sh\r
cc68a136 1076 TileSingleHi\r
e352c3af 1077 b .dsprShi_loop\r
cc68a136 1078\r
e352c3af 1079.dsprShi_sh_sh:\r
cc68a136 1080 TileSingleSh\r
e352c3af 1081 b .dsprShi_loop\r
cc68a136 1082\r
e352c3af 1083.dsprShi_shadow_lowpri:\r
1084 tst r9, #0x800\r
1085 bne .dsprShi_TileFlip_sh_lp\r
07abbab1 1086\r
e352c3af 1087.dsprShi_TileNorm_sh_lp:\r
07abbab1 1088 TileNormSh_onlyop_lp\r
e352c3af 1089 b .dsprShi_loop\r
07abbab1 1090\r
e352c3af 1091.dsprShi_TileFlip_sh_lp:\r
07abbab1 1092 TileFlipSh_onlyop_lp\r
e352c3af 1093 b .dsprShi_loop\r
07abbab1 1094\r
cc68a136 1095.pool\r
1096\r
1097@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
1098\r
ea38612f 1099@ void DrawAllSprites(unsigned char *sprited, int prio, int sh,\r
1100@ struct PicoEState *est)\r
1101\r
1102.global DrawAllSprites\r
283fec1b 1103\r
1104DrawAllSprites:\r
fbc65db7 1105 orr r1, r2, r1, lsl #1\r
ea38612f 1106 ldr r2, [r0]\r
1107 ands r2, r2, #0x7f\r
283fec1b 1108 bxeq lr\r
1109\r
1110 @ time to do some real work\r
ea38612f 1111 stmfd sp!, {r1,r3-r11,lr} @ +sh|prio<<1 +est\r
283fec1b 1112 mov r12,#0xff\r
fbc65db7 1113 strb r12,[r0,#2] @ set end marker\r
1114 add r10,r0, #3\r
ea38612f 1115 add r10,r10,r2 @ r10=HighLnSpr end\r
283fec1b 1116\r
99bdfd31 1117 ldr r11,[r3, #OFS_HighCol]\r
283fec1b 1118 mov r12,#0xf\r
ea38612f 1119 ldr lr, [r3, #OFS_Pico_vram]\r
283fec1b 1120\r
1121@ + 0 : hhhhvvvv ----hhvv yyyyyyyy yyyyyyyy // v, h: horiz. size\r
cc68a136 1122@ + 4 : xxxxxxxx xxxxxxxx pccvhnnn nnnnnnnn // x: x coord + 8\r
1123\r
ea38612f 1124DrawSprite:\r
283fec1b 1125 @ draw next sprite\r
1126 ldrb r0, [r10,#-1]!\r
ea38612f 1127 ldr r8, [sp] @ sh|prio<<1\r
1128 ldr r7, [sp, #4] @ est\r
e352c3af 1129 mov r2, r0, lsr #7\r
1130 cmp r0, #0xff\r
ea38612f 1131 ldmeqfd sp!, {r1,r3-r11,pc} @ end of list\r
97a7f774 1132 cmp r2, r8, lsr #1\r
e352c3af 1133 bne DrawSprite @ wrong priority\r
99bdfd31 1134 ldr r1, [r7, #OFS_HighPreSpr]\r
283fec1b 1135 and r0, r0, #0x7f\r
1136 add r0, r1, r0, lsl #3\r
cc68a136 1137\r
cc68a136 1138 ldr r3, [r0] @ sprite[0]\r
ea38612f 1139 ldr r7, [r7, #OFS_DrawScanline]\r
cc68a136 1140 mov r6, r3, lsr #28\r
1141 sub r6, r6, #1 @ r6=width-1 (inc later)\r
1142 mov r5, r3, lsr #24\r
1143 and r5, r5, #7 @ r5=height\r
1144\r
1145 mov r4, r3, lsl #16 @ r4=sy<<16 (tmp)\r
1146\r
cc68a136 1147 ldr r9, [r0, #4]\r
b6d7ac70 1148 sub r7, r7, r4, asr #16 @ r7=row=DrawScanline-sy\r
cc68a136 1149\r
cc68a136 1150 mov r2, r9, asr #16 @ r2=sx\r
e5fa9817 1151 mov r9, r9, lsl #16\r
1152 mov r9, r9, lsr #16\r
97a7f774 1153 orr r9, r9, r8, lsl #31 @ r9=code|sh[31]\r
cc68a136 1154\r
1155 tst r9, #0x1000\r
1156 movne r4, r5, lsl #3\r
1157 subne r4, r4, #1\r
1158 subne r7, r4, r7 @ if (code&0x1000) row=(height<<3)-1-row; // Flip Y\r
1159\r
07abbab1 1160 add r8, r9, r7, lsr #3 @ tile+=row>>3; // Tile number increases going down\r
cc68a136 1161 tst r9, #0x0800\r
1162 mlane r8, r5, r6, r8 @ if (code&0x0800) { tile+=delta*(width-1);\r
1163 rsbne r5, r5, #0 @ delta=-delta; } // r5=delta now\r
1164\r
07abbab1 1165 mov r8, r8, lsl #21\r
1166 mov r8, r8, lsr #17\r
cc68a136 1167 and r7, r7, #7\r
1168 add r8, r8, r7, lsl #1 @ tile+=(row&7)<<1; // Tile address\r
1169\r
e5fa9817 1170.dspr_continue:\r
cc68a136 1171 @ cache some stuff to avoid mem access\r
cc68a136 1172 mov r5, r5, lsl #4 @ delta<<=4; // Delta of address\r
1173 and r4, r9, #0x6000\r
1174 orr r9, r9, r4, lsl #16\r
97a7f774 1175 orrs r9, r9, #0x10000000 @ r9=scc1 ???? ... <code> (s=shadow/hilight, cc=pal)\r
cc68a136 1176\r
cc68a136 1177 mov r3, r4, lsr #9 @ r3=pal=((code>>9)&0x30);\r
97a7f774 1178 orrmi r3, r3, #0x40 @ for sh/hi\r
cc68a136 1179\r
1180 add r6, r6, #1 @ inc now\r
1181 adds r0, r2, #0 @ mov sx to r0 and set ZV flags\r
1182 b .dspr_loop_enter\r
1183\r
1184.dspr_loop:\r
1185 subs r6, r6, #1 @ width--\r
283fec1b 1186 beq DrawSprite\r
cc68a136 1187 adds r0, r0, #8 @ sx+=8\r
1188 add r8, r8, r5 @ tile+=delta\r
1189\r
1190.dspr_loop_enter:\r
1191 ble .dspr_loop @ sx <= 0\r
1192 cmp r0, #328\r
283fec1b 1193 bge DrawSprite\r
cc68a136 1194\r
1195 mov r8, r8, lsl #17\r
1196 mov r8, r8, lsr #17 @ tile&=0x7fff; // Clip tile address\r
1197\r
1198 ldr r2, [lr, r8, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels\r
1199 add r1, r11, r0 @ r1=pdest\r
1200 tst r2, r2\r
1201 beq .dspr_loop\r
1202\r
1203 cmp r12, r9, lsr #28\r
1204 beq .dspr_shadow\r
1205\r
1206 cmp r2, r2, ror #4\r
1207 beq .dspr_SingleColor @ tileline singlecolor \r
1208\r
1209 tst r9, #0x0800\r
7a7c6476 1210 bne .dspr_TileFlip\r
cc68a136 1211\r
1212 @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern\r
cc68a136 1213@ scratch: r4, r7\r
1214.dspr_TileNorm:\r
1215 TileNorm r12\r
1216 b .dspr_loop\r
1217\r
7a7c6476 1218.dspr_TileFlip:\r
1219 TileFlip r12\r
1220 b .dspr_loop\r
1221\r
07abbab1 1222.dspr_singlec_sh:\r
1223 cmp r2, #0xe0000000\r
e54507e8 1224 bcs .dspr_TileNorm_sh @ op. tileline, markop. XXX: maybe add a spec. handler?\r
07abbab1 1225\r
cc68a136 1226.dspr_SingleColor:\r
1227 and r4, r2, #0xf\r
1228 orr r4, r3, r4\r
1229 orr r4, r4, r4, lsl #8\r
1230 tst r0, #1 @ not aligned?\r
1231 strneb r4, [r1], #1\r
1232 streqh r4, [r1], #2\r
1233 strh r4, [r1], #2\r
1234 strh r4, [r1], #2\r
1235 strh r4, [r1], #2\r
1236 strneb r4, [r1], #1\r
1237 b .dspr_loop\r
1238\r
1239.dspr_shadow:\r
1240 cmp r2, r2, ror #4\r
1241 beq .dspr_singlec_sh\r
1242\r
1243 tst r9, #0x0800\r
7a7c6476 1244 bne .dspr_TileFlip_sh\r
cc68a136 1245\r
1246 @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern\r
cc68a136 1247.dspr_TileNorm_sh:\r
e54507e8 1248 TileNormSh_markop\r
cc68a136 1249 b .dspr_loop\r
1250\r
7a7c6476 1251.dspr_TileFlip_sh:\r
e54507e8 1252 TileFlipSh_markop\r
cc68a136 1253 b .dspr_loop\r
1254\r
1255\r
cc68a136 1256@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
1257\r
ea38612f 1258@ void DrawWindow(int tstart, int tend, int prio, int sh\r
1259@ struct PicoEState *est)\r
1260\r
1261.global DrawWindow\r
cc68a136 1262\r
1263DrawWindow:\r
ea38612f 1264 ldr r12, [sp] @ est\r
cc68a136 1265 stmfd sp!, {r4-r11,lr}\r
1266\r
ea38612f 1267 ldr r6, [r12, #OFS_Pico_video]\r
1268 ldr r10, [r12, #OFS_DrawScanline]\r
1269 mov r11, r12 @ est\r
1270 ldrb r12, [r6, #3] @ pvid->reg[3]\r
cc68a136 1271\r
ea38612f 1272 ldr r4, [r6, #12]\r
cc68a136 1273 mov r5, r10, lsr #3\r
1274 and r10, r10, #7\r
1275 mov r10, r10, lsl #1 @ r10=ty\r
1276\r
1277 mov r12, r12, lsl #10\r
1278\r
1279 tst r4, #1 @ 40 cell mode?\r
1280 andne r12, r12, #0xf000 @ 0x3c<<10\r
1281 andeq r12, r12, #0xf800\r
1282 addne r12, r12, r5, lsl #7\r
1283 addeq r12, r12, r5, lsl #6 @ nametab\r
1284 add r12, r12, r0, lsl #2 @ +starttile\r
1285\r
ea38612f 1286 ldr lr, [r11, #OFS_Pico_vram]\r
1287 ldr r6, [r11, #OFS_rendstatus]\r
cc68a136 1288\r
1289 @ fetch the first code now\r
1290 ldrh r7, [lr, r12]\r
1291\r
283fec1b 1292 ands r6, r6, #PDRAW_WND_DIFF_PRIO\r
cc68a136 1293 orr r6, r6, r2\r
cc68a136 1294\r
7292c709 1295 eoreq r8, r2, r7, lsr #15 @ do prio bits differ?\r
1296 cmpeq r8, #1\r
1297 ldmeqfd sp!, {r4-r11,pc} @ yes, assume that whole window uses same priority\r
cc68a136 1298\r
cc68a136 1299 orr r6, r6, r3, lsl #8 @ shadow mode\r
1300\r
1301 sub r8, r1, r0\r
cc68a136 1302\r
1303 @ cache some stuff to avoid mem access\r
99bdfd31 1304 ldr r11, [r11, #OFS_HighCol]\r
ea8c405f 1305 mov r8, r8, lsl #1 @ cells\r
ea8c405f 1306 add r11,r11,#8\r
ea8c405f 1307 mvn r9, #0 @ r9=prevcode=-1\r
07abbab1 1308 add r1, r11, r0, lsl #4 @ r1=pdest\r
cc68a136 1309 mov r0, #0xf\r
1310 b .dwloop_enter\r
1311\r
07abbab1 1312 @ r4,r5 are scratch in this loop\r
cc68a136 1313.dwloop:\r
1314 add r1, r1, #8\r
1315.dwloop_nor1:\r
1316 add r12, r12, #2 @ halfwords\r
1317 ldrh r7, [lr, r12] @ r7=code (int, but from unsigned, no sign extend)\r
1318 subs r8, r8, #1\r
1319 beq .dwloop_end @ done\r
1320\r
1321 eor r5, r6, r7, lsr #15\r
1322 tst r5, #1\r
1323 orrne r6, r6, #2 @ wrong pri\r
1324 bne .dwloop\r
1325\r
1326 cmp r7, r9\r
1327 beq .dw_samecode @ we know stuff about this tile already\r
1328\r
1329.dwloop_enter:\r
1330 mov r9, r7 @ remember code\r
1331\r
1332 movs r2, r9, lsl #20 @ if (code&0x1000)\r
1333 mov r2, r2, lsl #1\r
1334 add r2, r10, r2, lsr #17 @ r2=addr=(code&0x7ff)<<4; addr+=ty\r
1335 eorcs r2, r2, #0x0e @ if (code&0x1000) addr^=0xe;\r
1336\r
1337 and r3, r9, #0x6000\r
1338 mov r3, r3, lsr #9 @ r3=pal=((code&0x6000)>>9);\r
1339\r
1340 ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels\r
1341\r
1342.dw_samecode:\r
1343 tst r6, #0x100\r
1344 bne .dw_shadow\r
1345.dw_shadow_done:\r
1346 tst r2, r2\r
1347 beq .dwloop @ tileline blank\r
1348\r
1349 cmp r2, r2, ror #4\r
1350 beq .dw_SingleColor @ tileline singlecolor \r
1351\r
1352 tst r9, #0x0800\r
7a7c6476 1353 bne .dw_TileFlip\r
cc68a136 1354\r
1355 @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r0: helper pattern\r
cc68a136 1356.dw_TileNorm:\r
1357 TileNorm r0\r
1358 b .dwloop\r
1359\r
7a7c6476 1360.dw_TileFlip:\r
1361 TileFlip r0\r
1362 b .dwloop\r
1363\r
cc68a136 1364.dw_SingleColor:\r
1365 and r4, r0, r2 @ #0x0000000f\r
1366 orr r4, r3, r4\r
1367 orr r4, r4, r4, lsl #8\r
1368 orr r4, r4, r4, lsl #16\r
1369 mov r5, r4\r
1370 stmia r1!, {r4,r5}\r
1371 b .dwloop_nor1 @ we incremeted r1 ourselves\r
1372\r
1373.dw_shadow:\r
1374 tst r6, #1 @ hi pri?\r
1375 orreq r3, r3, #0x40\r
1376 beq .dw_shadow_done\r
1377 ldr r4, [r1]\r
07abbab1 1378 mov r5, #0x3f\r
1379 orr r5, r5, r5, lsl #8\r
1380 orr r5, r5, r5, lsl #16\r
1381 and r4, r4, r5\r
cc68a136 1382 str r4, [r1]\r
1383 ldr r4, [r1,#4]\r
07abbab1 1384 and r4, r4, r5\r
cc68a136 1385 str r4, [r1,#4]\r
1386 b .dw_shadow_done\r
1387\r
1388.dwloop_end:\r
ea38612f 1389 and r2, r6, #PDRAW_WND_DIFF_PRIO\r
1390 ldmfd sp!, {r4-r11,lr}\r
1391 ldr r0, [sp]\r
1392 ldr r1, [r0, #OFS_rendstatus]\r
1393 orr r1, r1, r2\r
1394 str r1, [r0, #OFS_rendstatus]\r
cc68a136 1395\r
ea38612f 1396 bx lr\r
cc68a136 1397\r
1398\r
1399@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
1400\r
1401\r
1402@ hilights 2 pixels in RGB444/BGR444 format\r
1403.macro TileDoShHi2Pixels444 reg\r
1404 mov \reg, \reg, ror #12\r
1405 adds \reg, \reg, #0x40000000\r
1406 orrcs \reg, \reg, #0xf0000000\r
1407 mov \reg, \reg, ror #28\r
1408 adds \reg, \reg, #0x40000000\r
1409 orrcs \reg, \reg, #0xf0000000\r
1410 mov \reg, \reg, ror #28\r
1411 adds \reg, \reg, #0x40000000\r
1412 orrcs \reg, \reg, #0xf0000000\r
1413 mov \reg, \reg, ror #24\r
1414 adds \reg, \reg, #0x40000000\r
1415 orrcs \reg, \reg, #0xf0000000\r
1416 mov \reg, \reg, ror #28\r
1417 adds \reg, \reg, #0x40000000\r
1418 orrcs \reg, \reg, #0xf0000000\r
1419 mov \reg, \reg, ror #28\r
1420 adds \reg, \reg, #0x40000000\r
1421 orrcs \reg, \reg, #0xf0000000\r
1422 mov \reg, \reg, ror #12\r
1423.endm\r
1424\r
1425\r
cc68a136 1426@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
1427\r
1428\r
cc68a136 1429@ Convert 0000bbb0 ggg0rrr0\r
1430@ to rrrrrggg gggbbbbb\r
1431\r
a39d8ba5 1432@ r2,r3 - scratch, lr = 0x001c001c, r8 = 0x08610861\r
cc68a136 1433.macro convRGB565 reg\r
a39d8ba5 1434 and r2, lr, \reg,lsr #7 @ b\r
1435 and r3, lr, \reg,lsr #3 @ g\r
1436 and \reg, lr, \reg,lsl #1 @ r\r
1437 orr r2, r2, r3, lsl #6\r
1438 orr \reg, r2, \reg,lsl #11\r
1439\r
1440 and r2, r8, \reg,lsr #4\r
1441 orr \reg, \reg, r2\r
cc68a136 1442.endm\r
1443\r
a39d8ba5 1444@ trashes: r2-r8,r12,lr; r8 = 0x08610861; r0,r1 are advanced\r
b2305d08 1445.macro vidConvCpyRGB565_local\r
2ec14aec 1446 mov r12, r2, lsr #3 @ repeats\r
cc68a136 1447 mov lr, #0x001c0000\r
1448 orr lr, lr, #0x01c @ lr == pattern 0x001c001c\r
cc68a136 1449\r
b2305d08 14500:\r
cc68a136 1451 ldmia r1!, {r4-r7}\r
1452 subs r12, r12, #1\r
1453 convRGB565 r4\r
1454 str r4, [r0], #4\r
1455 convRGB565 r5\r
1456 str r5, [r0], #4\r
1457 convRGB565 r6\r
1458 str r6, [r0], #4\r
1459 convRGB565 r7\r
1460 str r7, [r0], #4\r
1461\r
b2305d08 1462 bgt 0b\r
1463.endm\r
1464\r
1465\r
1466.global vidConvCpyRGB565\r
cc68a136 1467\r
b2305d08 1468vidConvCpyRGB565: @ void *to, void *from, int pixels\r
1469 stmfd sp!, {r4-r9,lr}\r
a39d8ba5 1470 mov r8, #0x0061\r
1471 orr r8, r8, #0x0800\r
f4750ee0 1472 orr r8, r8, r8, lsl #16\r
b2305d08 1473 vidConvCpyRGB565_local\r
cc68a136 1474 ldmfd sp!, {r4-r9,lr}\r
1475 bx lr\r
1476\r
1477\r
ea38612f 1478@ void PicoDoHighPal555(int sh, int line, struct PicoEState *est)\r
1479\r
1480.global PicoDoHighPal555\r
cc68a136 1481\r
b2305d08 1482PicoDoHighPal555:\r
ea38612f 1483 stmfd sp!, {r4-r10,lr}\r
1484 mov r10,r2 @ est\r
b2305d08 1485 mov r1, #0\r
ea38612f 1486 ldr r8, [r10, #OFS_Pico_video]\r
cc68a136 1487\r
b2305d08 1488PicoDoHighPal555_nopush:\r
a39d8ba5 1489 orr r9, r1, r0, lsl #31 @ 0:called from FinalizeLine555, 31: s/h\r
b2305d08 1490\r
98a27142 1491 add r0, r10, #OFS_HighPal\r
b2305d08 1492\r
cc68a136 1493 mov r1, #0\r
b2305d08 1494 strb r1, [r8, #-0x1a] @ 0x2220e ~ dirtyPal\r
1495\r
3d48f143 1496 sub r1, r8, #0x128 @ r1=Pico.cram\r
cc68a136 1497 mov r2, #0x40\r
a39d8ba5 1498 mov r8, #0x0061\r
1499 orr r8, r8, #0x0800\r
f4750ee0 1500 orr r8, r8, r8, lsl #16\r
a39d8ba5 1501\r
b2305d08 1502 vidConvCpyRGB565_local\r
cc68a136 1503\r
a39d8ba5 1504 tst r9, #(1<<31)\r
b2305d08 1505 beq PicoDoHighPal555_end\r
1506\r
98a27142 1507 add r3, r10, #OFS_HighPal\r
cc68a136 1508\r
1509 @ shadowed pixels:\r
1510 mov r12, #0x008e\r
cc68a136 1511 add r4, r3, #0x40*2\r
b2305d08 1512 orr r12,r12,#0x7300\r
cc68a136 1513 add r5, r3, #0xc0*2\r
b2305d08 1514 orr r12,r12,r12,lsl #16\r
cc68a136 1515 mov lr, #0x40/4\r
1516.fl_loopcpRGB555_sh:\r
1517 ldmia r3!, {r1,r6}\r
1518 subs lr, lr, #1\r
1519 and r1, r12, r1, lsr #1\r
1520 and r6, r12, r6, lsr #1\r
1521 stmia r4!, {r1,r6}\r
1522 stmia r5!, {r1,r6}\r
1523 bne .fl_loopcpRGB555_sh\r
1524\r
1525 @ hilighted pixels:\r
a39d8ba5 1526 @ t = ((dpal[i] >> 1) & 0x738e738e) + 0x738e738e;\r
1527 @ t |= (t >> 4) & 0x08610861;\r
1528 @ r8=0x08610861\r
cc68a136 1529 sub r3, r3, #0x40*2\r
a39d8ba5 1530 mov lr, #0x40/4\r
cc68a136 1531.fl_loopcpRGB555_hi:\r
a39d8ba5 1532 ldmia r3!, {r1,r6}\r
1533 and r1, r12, r1, lsr #1\r
1534 and r6, r12, r6, lsr #1\r
1535 add r1, r12, r1\r
1536 add r6, r12, r6\r
1537 and r5, r8, r1, lsr #4\r
1538 and r7, r8, r6, lsr #4\r
1539 orr r1, r1, r5\r
1540 orr r6, r6, r7\r
1541 stmia r4!, {r1,r6}\r
cc68a136 1542 subs lr, lr, #1\r
1543 bne .fl_loopcpRGB555_hi\r
b2305d08 1544 mov r0, #1\r
cc68a136 1545\r
b2305d08 1546PicoDoHighPal555_end:\r
a39d8ba5 1547 tst r9, #1\r
ea38612f 1548 ldmeqfd sp!, {r4-r10,pc}\r
b2305d08 1549\r
ea38612f 1550 ldr r8, [r10, #OFS_Pico_video]\r
b2305d08 1551 b FinalizeLineRGB555_pal_done\r
1552\r
1553\r
ea38612f 1554@ void FinalizeLine555(int sh, int line, struct PicoEState *est)\r
1555\r
1556.global FinalizeLine555\r
b2305d08 1557\r
5a681086 1558FinalizeLine555:\r
ea38612f 1559 stmfd sp!, {r4-r10,lr}\r
1560 mov r10,r2 @ est\r
1561 ldr r8, [r10, #OFS_Pico_video]\r
b2305d08 1562\r
1563 ldrb r2, [r8, #-0x1a] @ 0x2220e ~ dirtyPal\r
1564 mov r1, #1\r
1565 tst r2, r2\r
1566 bne PicoDoHighPal555_nopush\r
1567\r
1568FinalizeLineRGB555_pal_done:\r
98a27142 1569 add r3, r10, #OFS_HighPal\r
cc68a136 1570\r
ea38612f 1571 ldr r12, [r10, #OFS_rendstatus]\r
b2305d08 1572 eors r0, r0, #1 @ sh is 0\r
e5fa9817 1573 mov lr, #0xff\r
283fec1b 1574 tstne r12,#PDRAW_ACC_SPRITES\r
e5fa9817 1575 movne lr, #0x3f\r
1576\r
99bdfd31 1577 ldr r1, [r10, #OFS_HighCol]\r
1578 ldr r0, [r10, #OFS_DrawLineDest]\r
ea8c405f 1579 add r1, r1, #8\r
3d48f143 1580\r
1581 ldrb r12, [r8, #12]\r
cc68a136 1582 mov lr, lr, lsl #1\r
1583\r
3d48f143 1584 tst r12, #1\r
1585 movne r2, #320/8 @ len\r
1586 bne .fl_no32colRGB555\r
99bdfd31 1587 ldr r4, [r10, #OFS_PicoOpt]\r
3d48f143 1588 mov r2, #256/8\r
1589 ldr r4, [r4]\r
1590 tst r4, #0x4000\r
1591 bne .fl_32scale_RGB555\r
1592 tst r4, #0x0100\r
1593 addeq r0, r0, #32*2\r
1594\r
1595.fl_no32colRGB555:\r
cc68a136 1596\r
f62850ba 1597#ifdef UNALIGNED_DRAWLINEDEST\r
499a0be3 1598 @ this is basically for Gizmondo, which has unaligned odd lines in the framebuffer\r
1599 tst r0, #2\r
1600 bne .fl_RGB555u\r
f62850ba 1601#endif\r
499a0be3 1602\r
1603.fl_loopRGB555:\r
cc68a136 1604 ldr r12, [r1], #4\r
1605 ldr r7, [r1], #4\r
1606\r
1607 and r4, lr, r12, lsl #1\r
1608 ldrh r4, [r3, r4]\r
1609 and r5, lr, r12, lsr #7\r
1610 ldrh r5, [r3, r5]\r
1611 and r6, lr, r12, lsr #15\r
1612 ldrh r6, [r3, r6]\r
1613 orr r4, r4, r5, lsl #16\r
1614\r
1615 and r5, lr, r12, lsr #23\r
1616 ldrh r5, [r3, r5]\r
1617 and r8, lr, r7, lsl #1\r
1618 ldrh r8, [r3, r8]\r
1619 orr r5, r6, r5, lsl #16\r
1620\r
1621 and r6, lr, r7, lsr #7\r
1622 ldrh r6, [r3, r6]\r
1623 and r12,lr, r7, lsr #15\r
1624 ldrh r12,[r3, r12]\r
499a0be3 1625 and r7, lr, r7, lsr #23\r
1626 ldrh r7, [r3, r7]\r
cc68a136 1627 orr r8, r8, r6, lsl #16\r
1628\r
cc68a136 1629 subs r2, r2, #1\r
499a0be3 1630 orr r12,r12, r7, lsl #16\r
cc68a136 1631\r
1632 stmia r0!, {r4,r5,r8,r12}\r
1633 bne .fl_loopRGB555\r
1634\r
ea38612f 1635 ldmfd sp!, {r4-r10,lr}\r
3d48f143 1636 bx lr\r
1637\r
1638\r
1639.fl_32scale_RGB555:\r
3d48f143 1640 mov r9, #0x3900 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007\r
1641 orr r9, r9, #0x00e7\r
1642\r
f62850ba 1643#ifdef UNALIGNED_DRAWLINEDEST\r
499a0be3 1644 tst r0, #2\r
1645 bne .fl_32scale_RGB555u\r
f62850ba 1646#endif\r
499a0be3 1647\r
3d48f143 1648.fl_loop32scale_RGB555:\r
1649 ldr r12, [r1], #4\r
1650 ldr r7, [r1], #4\r
cc68a136 1651\r
3d48f143 1652 and r4, lr, r12,lsl #1\r
1653 ldrh r4, [r3, r4]\r
1654 and r5, lr, r12,lsr #7\r
1655 ldrh r5, [r3, r5]\r
1656 and r4, r4, r9, lsl #2\r
1657 orr r4, r4, r4, lsl #14 @ r4[31:16] = 1/4 pix_s 0\r
1658 and r5, r5, r9, lsl #2\r
1659 sub r6, r5, r5, lsr #2 @ r6 = 3/4 pix_s 1\r
1660 add r4, r4, r6, lsl #16 @ pix_d 0, 1\r
1661 and r6, lr, r12,lsr #15\r
1662 ldrh r6, [r3, r6]\r
1663 and r12,lr, r12,lsr #23\r
1664 ldrh r12,[r3, r12]\r
1665 and r6, r6, r9, lsl #2\r
1666 add r5, r5, r6\r
1667 mov r5, r5, lsr #1\r
1668 sub r6, r6, r6, lsr #2 @ r6 = 3/4 pix_s 2\r
1669 orr r5, r5, r6, lsl #16\r
1670\r
1671 and r6, lr, r7, lsl #1\r
1672 ldrh r6, [r3, r6]\r
1673 and r12,r12,r9, lsl #2\r
1674 add r5, r5, r12,lsl #14 @ pix_d 2, 3\r
1675 and r6, r6, r9, lsl #2\r
1676 orr r6, r12,r6, lsl #16 @ pix_d 4, 5\r
1677\r
1678 and r12,lr, r7, lsr #7\r
1679 ldrh r12,[r3, r12]\r
1680 and r10,lr, r7, lsr #15\r
1681 ldrh r10,[r3, r10]\r
1682 and r12,r12,r9, lsl #2\r
1683 sub r8, r12,r12,lsr #2 @ r8 = 3/4 pix_s 1\r
1684 add r8, r8, r6, lsr #18\r
1685 and r7, lr, r7, lsr #23\r
1686 ldrh r7, [r3, r7]\r
1687 and r10,r10,r9, lsl #2\r
1688 orr r8, r8, r10,lsl #15\r
1689 add r8, r8, r12,lsl #15 @ pix_d 6, 7\r
1690 sub r10,r10,r10,lsr #2 @ r10= 3/4 pix_s 2\r
1691 and r7, r7, r9, lsl #2\r
1692 add r10,r10,r7, lsr #2 @ += 1/4 pix_s 3\r
1693 orr r10,r10,r7, lsl #16 @ pix_d 8, 9\r
1694\r
1695 subs r2, r2, #1\r
1696\r
1697 stmia r0!, {r4,r5,r6,r8,r10}\r
1698 bne .fl_loop32scale_RGB555\r
1699\r
ea38612f 1700 ldmfd sp!, {r4-r10,lr}\r
3d48f143 1701 bx lr\r
1702\r
f62850ba 1703#ifdef UNALIGNED_DRAWLINEDEST\r
499a0be3 1704 @ unaligned versions of loops\r
9839d126 1705 @ warning: starts drawing 2bytes before dst\r
499a0be3 1706\r
1707.fl_RGB555u:\r
9839d126 1708 sub r0, r0, #2 @ initial adjustment\r
1709 mov r8, #0\r
499a0be3 1710\r
1711.fl_loopRGB555u:\r
1712 ldr r12, [r1], #4\r
1713 ldr r7, [r1], #4\r
1714\r
9839d126 1715 and r6, lr, r12,lsl #1\r
1716 ldrh r6, [r3, r6]\r
1717 and r5, lr, r12,lsr #7\r
499a0be3 1718 ldrh r5, [r3, r5]\r
9839d126 1719 orr r4, r8, r6, lsl #16\r
499a0be3 1720\r
9839d126 1721 and r6, lr, r12,lsr #15\r
499a0be3 1722 ldrh r6, [r3, r6]\r
9839d126 1723 and r8, lr, r12,lsr #23\r
499a0be3 1724 ldrh r8, [r3, r8]\r
9839d126 1725 orr r5, r5, r6, lsl #16\r
499a0be3 1726\r
9839d126 1727 and r6, lr, r7, lsl #1\r
499a0be3 1728 ldrh r6, [r3, r6]\r
9839d126 1729 and r12,lr, r7, lsr #7\r
499a0be3 1730 ldrh r12,[r3, r12]\r
9839d126 1731 orr r6, r8, r6, lsl #16\r
1732\r
1733 and r8, lr, r7, lsr #15\r
499a0be3 1734 ldrh r8, [r3, r8]\r
9839d126 1735 and r7, lr, r7, lsr #23\r
499a0be3 1736\r
1737 subs r2, r2, #1\r
9839d126 1738 orr r12,r12,r8, lsl #16\r
1739 ldrh r8, [r3, r7]\r
1740\r
1741 stmia r0!, {r4,r5,r6,r12}\r
499a0be3 1742 bne .fl_loopRGB555u\r
1743\r
1744 strh r8, [r0], #2\r
1745\r
ea38612f 1746 ldmfd sp!, {r4-r10,lr}\r
499a0be3 1747 bx lr\r
1748\r
1749\r
1750.fl_32scale_RGB555u:\r
9839d126 1751 sub r0, r0, #2 @ initial adjustment\r
1752 mov r4, #0\r
499a0be3 1753\r
1754 @ r9 f800 07e0 001f | e000 0780 001c | 3800 01e0 0007\r
1755.fl_loop32scale_RGB555u:\r
1756 ldr r12, [r1], #4\r
1757 ldr r7, [r1], #4\r
1758\r
9839d126 1759 and r6, lr, r12,lsl #1\r
1760 ldrh r6, [r3, r6]\r
499a0be3 1761 and r5, lr, r12,lsr #7\r
1762 ldrh r5, [r3, r5]\r
9839d126 1763 and r6, r6, r9, lsl #2\r
1764 orr r4, r4, r6, lsl #16 @ r4 = pix_d -1, 0\r
499a0be3 1765\r
1766 and r5, r5, r9, lsl #2\r
9839d126 1767 sub r8, r5, r5, lsr #2 @ r8 = 3/4 pix_s 1\r
1768 add r6, r8, r6, lsr #2 @ r6 = (1/4 pix_s 0) + (3/4 pix_s 1)\r
1769 orr r5, r6, r5, lsl #15\r
499a0be3 1770\r
1771 and r6, lr, r12,lsr #15\r
1772 ldrh r6, [r3, r6]\r
1773 and r12,lr, r12,lsr #23\r
1774 ldrh r12,[r3, r12]\r
1775 and r6, r6, r9, lsl #2\r
9839d126 1776 add r5, r5, r6, lsl #15 @ r5 = pix_d 1, 2\r
499a0be3 1777\r
9839d126 1778 and r8, lr, r7, lsl #1\r
1779 ldrh r8, [r3, r8]\r
1780 and r10,lr, r7, lsr #7\r
1781 ldrh r10,[r3, r10]\r
499a0be3 1782 and r12,r12,r9, lsl #2\r
9839d126 1783 sub r6, r6, r6, lsr #2 @ r6 = 3/4 pix_s 2\r
1784 add r6, r6, r12,lsr #2\r
1785 orr r6, r6, r12,lsl #16 @ r6 = pix_d 3, 4\r
499a0be3 1786\r
9839d126 1787 and r8, r8, r9, lsl #2\r
1788 and r10,r10,r9, lsl #2\r
1789 sub r12,r10,r10,lsr #2 @ r12 = 3/4 pix_s 5\r
1790 orr r8, r8, r8, lsl #14\r
1791 add r8, r8, r12,lsl #16 @ r8 = pix_d 5, 6\r
1792 and r12,lr, r7, lsr #15\r
499a0be3 1793 ldrh r12,[r3, r12]\r
499a0be3 1794 and r7, lr, r7, lsr #23\r
1795 ldrh r7, [r3, r7]\r
9839d126 1796 and r12,r12,r9, lsl #2\r
1797 add r10,r10,r12\r
1798 mov r10,r10, lsr #1\r
1799 sub r12,r12,r12,lsr #2 @ r12 = 3/4 pix_s 6\r
1800 orr r10,r10,r12,lsl #16\r
499a0be3 1801 and r7, r7, r9, lsl #2\r
9839d126 1802 add r10,r10,r7, lsl #14 @ r10 = pix_d 7, 8\r
499a0be3 1803\r
1804 subs r2, r2, #1\r
1805\r
1806 stmia r0!, {r4,r5,r6,r8,r10}\r
9839d126 1807 mov r4, r7\r
499a0be3 1808 bne .fl_loop32scale_RGB555u\r
1809\r
9839d126 1810 strh r4, [r0], #2\r
1811\r
ea38612f 1812 ldmfd sp!, {r4-r10,lr}\r
499a0be3 1813 bx lr\r
1814\r
f62850ba 1815#endif /* UNALIGNED_DRAWLINEDEST */\r
499a0be3 1816\r
cc68a136 1817\r
1818@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\r
1819\r
1820@ utility\r
1821.global blockcpy @ void *dst, void *src, size_t n\r
1822\r
1823blockcpy:\r
1824 stmfd sp!, {r4,r5}\r
1825 mov r2, r2, lsr #4\r
1826blockcpy_loop:\r
1827 ldmia r1!, {r3-r5,r12}\r
1828 subs r2, r2, #1\r
1829 stmia r0!, {r3-r5,r12}\r
1830 bne blockcpy_loop\r
1831 ldmfd sp!, {r4,r5}\r
1832 bx lr\r
1833\r
1834\r
1835.global blockcpy_or @ void *dst, void *src, size_t n, int pat\r
1836\r
1837blockcpy_or:\r
1838 stmfd sp!, {r4-r6}\r
1839 orr r3, r3, r3, lsl #8\r
1840 orr r3, r3, r3, lsl #16\r
1841 mov r2, r2, lsr #4\r
1842blockcpy_loop_or:\r
1843 ldmia r1!, {r4-r6,r12}\r
1844 subs r2, r2, #1\r
1845 orr r4, r4, r3\r
1846 orr r5, r5, r3\r
1847 orr r6, r6, r3\r
1848 orr r12,r12,r3\r
1849 stmia r0!, {r4-r6,r12}\r
1850 bne blockcpy_loop_or\r
1851 ldmfd sp!, {r4-r6}\r
1852 bx lr\r
1853\r
cff531af 1854@ vim:filetype=armasm\r