X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=Pico%2FDraw.s;h=5ddd5e6e982990c0d3479d9c167a63801591684b;hb=99464b6230bbe515dffb8893f6b6b51031be9157;hp=5032f150818ff58464aad7b88d56e58e8c69a768;hpb=7a7c6476f31d4b88b0aae876e94bc49733b36e83;p=picodrive.git diff --git a/Pico/Draw.s b/Pico/Draw.s index 5032f15..5ddd5e6 100644 --- a/Pico/Draw.s +++ b/Pico/Draw.s @@ -6,6 +6,7 @@ @ (c) Copyright 2007, Grazvydas "notaz" Ignotas @ All Rights Reserved +.include "port_config.s" .extern Pico .extern PicoOpt @@ -211,9 +212,9 @@ @ int cells; // 0x14 @ }; -@ int DrawLayer(int plane, int *hcache, int maxcells, int sh) +@ void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells); -.global DrawLayer @ int plane, int *hcache, int maxcells, int sh +.global DrawLayer DrawLayer: stmfd sp!, {r4-r11,lr} @@ -221,10 +222,11 @@ DrawLayer: ldr r11, =(Pico+0x22228) @ Pico.video mov r8, #1 - ldrb r7, [r11, #16] @ ??hh??ww + ldrb r7, [r11, #16] @ ??vv??hh mov r6, r1 @ hcache - orr r9, r2, r3, lsl #31 @ r9=maxcells|(sh<<31) + orr r9, r3, r0, lsl #30 + orr r9, r9, r2, lsl #8 @ r9=sh[31]|cellskip[15:8]|maxcells[7:0] (tmp) mov r1, r7, lsl #4 orr r1, r1, #0x00ff @@ -244,7 +246,7 @@ DrawLayer: sub r5, r5, #1 @ r5=xmask @ Find name table: - tst r0, r0 + ands r0, r0, #1 ldreqb r12, [r11, #2] ldrneb r12, [r11, #4] @@ -309,14 +311,25 @@ DrawLayer: orrne r10,r10, #1<<23 @ r10=(cells<<24|sh<<23|hi_not_empty<<22|had_output<<21|ty) movne r3, #0x40 @ default to shadowed pal on sh mode - mvn r9, #0 @ r9=prevcode=-1 - cmp r7, #8 addne r10,r10, #0x01000000 @ we will loop cells+1 times if there is scroll + and r9, r9, #0xff00 + add r8, r8, r9, lsr #8 @ tilex+=cellskip + add r7, r7, r9, lsr #5 @ dx+=cellskip<<3; + sub r10,r10,r9, lsl #16 @ cells-=cellskip + @ cache some stuff to avoid mem access +.if OVERRIDE_HIGHCOL + ldr r11,=HighCol + mov r0, #0xf + ldr r11,[r11] +.else ldr r11,=HighCol mov r0, #0xf +.endif + + mvn r9, #0 @ r9=prevcode=-1 add r1, r11, r7 @ r1=pdest @@ -431,8 +444,8 @@ DrawLayer: .DrawStrip_vsscroll: rsb r8, r3, #0 mov r8, r8, lsr #3 @ r8=tilex=(-ts->hscroll)>>3 - bic r8, r8, #0xff000000 - orr r8, r8, r5, lsl #25 @ r8=(xmask[31:25]|had_output[24]|tilex[15:0]) + bic r8, r8, #0x3fc00000 + orr r8, r8, r5, lsl #25 @ r8=(xmask[31:25]|had_output[24]|tilex[21:0]) ldr r4, =Scanline orr r5, r1, r10, lsl #24 @@ -443,24 +456,34 @@ DrawLayer: add r7, r1, #1 @ r7=dx=((ts->hscroll-1)&7)+1 mov r10,r9, lsl #16 - tst r0, r0 + tst r0, #1 orrne r10,r10, #0x8000 tst r9, #1<<31 mov r3, #0 orr r10,r10, #0xff000000 @ will be adjusted on entering loop - orrne r10,r10, #1<<23 @ r10=(cells[31:24]|sh[23]|hi_not_empty[22]|cells_max[21:16]|plane[15]|ty[14:0]) + orrne r10,r10, #1<<23 @ r10=(cell[31:24]|sh[23]|hi_not_empty[22]|cells_max[21:16]|plane[15]|ty[14:0]) movne r3, #0x40 @ default to shadowed pal on sh mode - mvn r9, #0 @ r9=prevcode=-1 + cmp r7, #8 + subne r10,r10, #0x01000000 @ have hscroll, start with negative cell + + and r9, r9, #0xff00 + add r8, r8, r9, lsr #8 @ tilex+=cellskip + add r7, r7, r9, lsr #5 @ dx+=cellskip<<3; + add r10,r10,r9, lsl #16 @ cell+=cellskip @ cache some stuff to avoid mem access +.if OVERRIDE_HIGHCOL ldr r11,=HighCol mov r0, #0xf - add r1, r11, r7 @ r1=pdest - - cmp r7, #8 - subne r10,r10, #0x01000000 @ have hscroll, start with negative cell + ldr r11,[r11] +.else + ldr r11,=HighCol + mov r0, #0xf +.endif + mvn r9, #0 @ r9=prevcode=-1 + add r1, r11, r7 @ r1=pdest @ r4 & r7 are scratch in this loop .dsloop_vs_subr1: @@ -481,9 +504,9 @@ DrawLayer: ldrh r7, [r7] @ r7=vscroll bic r10,r10,#0xff @ clear old ty - and r4, r5, #0xff0000 - add r4, r4, r7, lsl #16 - and r4, r4, r5, lsl #16 @ r4=line<<16 + and r4, r5, #0xff0000 @ scanline + add r4, r4, r7, lsl #16 @ ... += vscroll + and r4, r4, r5, lsl #16 @ ... &= ymask and r7, r4, #0x70000 orr r10,r10,r7, lsr #15 @ new ty @@ -506,7 +529,7 @@ DrawLayer: beq .DrawStrip_vs_samecode @ we know stuff about this tile already mov r9, r7 @ remember code - orr r8, r8, #1<<24 @ seen non hi-prio tile + orr r8, r8, #(1<<24)@ seen non hi-prio tile movs r2, r9, lsl #20 @ if (code&0x1000) mov r2, r2, lsl #1 @@ -580,7 +603,7 @@ DrawLayer: b .dsloop_vs .dsloop_vs_exit: - tst r8, #1<<24 @ seen non hi-prio tile + tst r8, #(1<<24) @ seen non hi-prio tile ldreq r1, =rendstatus mov r0, #0 ldreq r2, [r1] @@ -630,10 +653,18 @@ DrawLayer: BackFill: stmfd sp!, {r4-r9,lr} +.if OVERRIDE_HIGHCOL + ldr lr, =HighCol + mov r0, r0, lsl #26 + ldr lr, [lr] + mov r0, r0, lsr #26 + add lr, lr, #8 +.else ldr lr, =(HighCol+8) - mov r0, r0, lsl #26 mov r0, r0, lsr #26 +.endif + orr r0, r0, r1, lsl #6 orr r0, r0, r0, lsl #8 orr r0, r0, r0, lsl #16 @@ -671,9 +702,15 @@ DrawTilesFromCache: stmfd sp!, {r4-r8,r11,lr} @ cache some stuff to avoid mem access +.if OVERRIDE_HIGHCOL + ldr r11,=HighCol + mov r12,#0xf + ldr r11,[r11] +.else ldr r11,=HighCol - ldr lr, =(Pico+0x10000) @ lr=Pico.vram mov r12,#0xf +.endif + ldr lr, =(Pico+0x10000) @ lr=Pico.vram mvn r5, #0 @ r5=prevcode=-1 ands r8, r1, #1 @@ -865,11 +902,17 @@ DrawSpritesFromCache: stmfd sp!, {r4-r11,lr} @ cache some stuff to avoid mem access +.if OVERRIDE_HIGHCOL ldr r11,=HighCol + mov r12,#0xf + ldr r11,[r11] +.else + ldr r11,=HighCol + mov r12,#0xf +.endif ldr lr, =(Pico+0x10000) @ lr=Pico.vram mov r6, r1, lsl #31 orr r6, r6, #1<<30 - mov r12,#0xf mov r10, r0 @@ -1033,9 +1076,15 @@ DrawSprite: bne .dspr_cache @ if(code&0x8000) // high priority - cache it @ cache some stuff to avoid mem access +.if OVERRIDE_HIGHCOL + ldr r11,=HighCol + mov r12,#0xf + ldr r11,[r11] +.else ldr r11,=HighCol - ldr lr, =(Pico+0x10000) @ lr=Pico.vram mov r12,#0xf +.endif + ldr lr, =(Pico+0x10000) @ lr=Pico.vram mov r5, r5, lsl #4 @ delta<<=4; // Delta of address and r4, r9, #0x6000 @@ -1193,12 +1242,19 @@ DrawWindow: orr r6, r6, r3, lsl #8 @ shadow mode sub r8, r1, r0 - mov r8, r8, lsl #1 @ cells - - mvn r9, #0 @ r9=prevcode=-1 @ cache some stuff to avoid mem access +.if OVERRIDE_HIGHCOL + ldr r11,=HighCol + mov r8, r8, lsl #1 @ cells + ldr r11,[r11] + mvn r9, #0 @ r9=prevcode=-1 + add r11,r11,#8 +.else ldr r11,=(HighCol+8) + mov r8, r8, lsl #1 @ cells + mvn r9, #0 @ r9=prevcode=-1 +.endif add r1, r11, r0, lsl #4 @ r1=pdest mov r0, #0xf b .dwloop_enter @@ -1399,9 +1455,17 @@ FinalizeLineBGR444: .fl_noshBGR444: +.if OVERRIDE_HIGHCOL + ldr r1, =HighCol + mov lr, #0xff + ldr r1, [r1] + mov lr, lr, lsl #1 + add r1, r1, #8 +.else ldr r1, =(HighCol+8) mov lr, #0xff mov lr, lr, lsl #1 +.endif .fl_loopBGR444: @@ -1471,14 +1535,16 @@ FinalizeLineBGR444: orr \reg, \reg, r3 @ add blue back .endm +.global vidConvCpyRGB565 + vidConvCpyRGB565: @ void *to, void *from, int pixels stmfd sp!, {r4-r9,lr} - mov r12, r2, lsr #3 @ repeats + mov r12, r2, lsr #3 @ repeats mov lr, #0x001c0000 orr lr, lr, #0x01c @ lr == pattern 0x001c001c mov r8, #0x00030000 - orr r8, r8, #0x003 @ lr == pattern 0x001c001c + orr r8, r8, #0x003 .loopRGB565: ldmia r1!, {r4-r7} @@ -1553,9 +1619,17 @@ FinalizeLineRGB555: sub r3, r3, #0x40*2 .fl_noshRGB555: +.if OVERRIDE_HIGHCOL + ldr r1, =HighCol + ldr r0, =DrawLineDest + ldr r1, [r1] + ldr r0, [r0] + add r1, r1, #8 +.else ldr r0, =DrawLineDest ldr r1, =(HighCol+8) ldr r0, [r0] +.endif ldrb r12, [r8, #12] mov lr, #0xff @@ -1573,8 +1647,14 @@ FinalizeLineRGB555: addeq r0, r0, #32*2 .fl_no32colRGB555: -.fl_loopRGB555: +.if UNALIGNED_DRAWLINEDEST + @ this is basically for Gizmondo, which has unaligned odd lines in the framebuffer + tst r0, #2 + bne .fl_RGB555u +.endif + +.fl_loopRGB555: ldr r12, [r1], #4 ldr r7, [r1], #4 @@ -1596,12 +1676,12 @@ FinalizeLineRGB555: ldrh r6, [r3, r6] and r12,lr, r7, lsr #15 ldrh r12,[r3, r12] + and r7, lr, r7, lsr #23 + ldrh r7, [r3, r7] orr r8, r8, r6, lsl #16 - and r6, lr, r7, lsr #23 - ldrh r6, [r3, r6] @ 1 cycle interlock here (r6) subs r2, r2, #1 - orr r12,r12, r6, lsl #16 + orr r12,r12, r7, lsl #16 stmia r0!, {r4,r5,r8,r12} bne .fl_loopRGB555 @@ -1615,6 +1695,11 @@ FinalizeLineRGB555: mov r9, #0x3900 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007 orr r9, r9, #0x00e7 +.if UNALIGNED_DRAWLINEDEST + tst r0, #2 + bne .fl_32scale_RGB555u +.endif + .fl_loop32scale_RGB555: ldr r12, [r1], #4 ldr r7, [r1], #4 @@ -1671,6 +1756,121 @@ FinalizeLineRGB555: ldmfd sp!, {r4-r8,lr} bx lr +.if UNALIGNED_DRAWLINEDEST + @ unaligned versions of loops + @ warning: starts drawing 2bytes before dst + +.fl_RGB555u: + sub r0, r0, #2 @ initial adjustment + mov r8, #0 + +.fl_loopRGB555u: + ldr r12, [r1], #4 + ldr r7, [r1], #4 + + and r6, lr, r12,lsl #1 + ldrh r6, [r3, r6] + and r5, lr, r12,lsr #7 + ldrh r5, [r3, r5] + orr r4, r8, r6, lsl #16 + + and r6, lr, r12,lsr #15 + ldrh r6, [r3, r6] + and r8, lr, r12,lsr #23 + ldrh r8, [r3, r8] + orr r5, r5, r6, lsl #16 + + and r6, lr, r7, lsl #1 + ldrh r6, [r3, r6] + and r12,lr, r7, lsr #7 + ldrh r12,[r3, r12] + orr r6, r8, r6, lsl #16 + + and r8, lr, r7, lsr #15 + ldrh r8, [r3, r8] + and r7, lr, r7, lsr #23 + + subs r2, r2, #1 + orr r12,r12,r8, lsl #16 + ldrh r8, [r3, r7] + + stmia r0!, {r4,r5,r6,r12} + bne .fl_loopRGB555u + + strh r8, [r0], #2 + + ldmfd sp!, {r4-r8,lr} + bx lr + + +.fl_32scale_RGB555u: + sub r0, r0, #2 @ initial adjustment + mov r4, #0 + + @ r9 f800 07e0 001f | e000 0780 001c | 3800 01e0 0007 +.fl_loop32scale_RGB555u: + ldr r12, [r1], #4 + ldr r7, [r1], #4 + + and r6, lr, r12,lsl #1 + ldrh r6, [r3, r6] + and r5, lr, r12,lsr #7 + ldrh r5, [r3, r5] + and r6, r6, r9, lsl #2 + orr r4, r4, r6, lsl #16 @ r4 = pix_d -1, 0 + + and r5, r5, r9, lsl #2 + sub r8, r5, r5, lsr #2 @ r8 = 3/4 pix_s 1 + add r6, r8, r6, lsr #2 @ r6 = (1/4 pix_s 0) + (3/4 pix_s 1) + orr r5, r6, r5, lsl #15 + + and r6, lr, r12,lsr #15 + ldrh r6, [r3, r6] + and r12,lr, r12,lsr #23 + ldrh r12,[r3, r12] + and r6, r6, r9, lsl #2 + add r5, r5, r6, lsl #15 @ r5 = pix_d 1, 2 + + and r8, lr, r7, lsl #1 + ldrh r8, [r3, r8] + and r10,lr, r7, lsr #7 + ldrh r10,[r3, r10] + and r12,r12,r9, lsl #2 + sub r6, r6, r6, lsr #2 @ r6 = 3/4 pix_s 2 + add r6, r6, r12,lsr #2 + orr r6, r6, r12,lsl #16 @ r6 = pix_d 3, 4 + + and r8, r8, r9, lsl #2 + and r10,r10,r9, lsl #2 + sub r12,r10,r10,lsr #2 @ r12 = 3/4 pix_s 5 + orr r8, r8, r8, lsl #14 + add r8, r8, r12,lsl #16 @ r8 = pix_d 5, 6 + and r12,lr, r7, lsr #15 + ldrh r12,[r3, r12] + and r7, lr, r7, lsr #23 + ldrh r7, [r3, r7] + and r12,r12,r9, lsl #2 + add r10,r10,r12 + mov r10,r10, lsr #1 + sub r12,r12,r12,lsr #2 @ r12 = 3/4 pix_s 6 + orr r10,r10,r12,lsl #16 + and r7, r7, r9, lsl #2 + add r10,r10,r7, lsl #14 @ r10 = pix_d 7, 8 + + subs r2, r2, #1 + + stmia r0!, {r4,r5,r6,r8,r10} + mov r4, r7 + bne .fl_loop32scale_RGB555u + + strh r4, [r0], #2 + + ldmfd sp!, {r9,r10} + ldmfd sp!, {r4-r8,lr} + bx lr + +.endif @ UNALIGNED_DRAWLINEDEST + @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@