X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=Pico%2FDraw.s;h=3cafde9274ffd5f102158113dcdde999610691c5;hb=1832075ec7b43b166a9d5f83753832a05e7d4239;hp=6338aabd8bce926fc4e88fe506ff026bbfcbf05e;hpb=2ec14aec8ff1068bf6265f3ae9c502bf58cfc686;p=picodrive.git diff --git a/Pico/Draw.s b/Pico/Draw.s index 6338aab..3cafde9 100644 --- a/Pico/Draw.s +++ b/Pico/Draw.s @@ -444,8 +444,8 @@ DrawLayer: .DrawStrip_vsscroll: rsb r8, r3, #0 mov r8, r8, lsr #3 @ r8=tilex=(-ts->hscroll)>>3 - bic r8, r8, #0xff000000 - orr r8, r8, r5, lsl #25 @ r8=(xmask[31:25]|had_output[24]|tilex[23:0]) + bic r8, r8, #0x3fc00000 + orr r8, r8, r5, lsl #25 @ r8=(xmask[31:25]|had_output[24]|tilex[21:0]) ldr r4, =Scanline orr r5, r1, r10, lsl #24 @@ -504,9 +504,9 @@ DrawLayer: ldrh r7, [r7] @ r7=vscroll bic r10,r10,#0xff @ clear old ty - and r4, r5, #0xff0000 - add r4, r4, r7, lsl #16 - and r4, r4, r5, lsl #16 @ r4=line<<16 + and r4, r5, #0xff0000 @ scanline + add r4, r4, r7, lsl #16 @ ... += vscroll + and r4, r4, r5, lsl #16 @ ... &= ymask and r7, r4, #0x70000 orr r10,r10,r7, lsr #15 @ new ty @@ -529,7 +529,7 @@ DrawLayer: beq .DrawStrip_vs_samecode @ we know stuff about this tile already mov r9, r7 @ remember code - orr r8, r8, #1<<24 @ seen non hi-prio tile + orr r8, r8, #(1<<24)@ seen non hi-prio tile movs r2, r9, lsl #20 @ if (code&0x1000) mov r2, r2, lsl #1 @@ -603,7 +603,7 @@ DrawLayer: b .dsloop_vs .dsloop_vs_exit: - tst r8, #1<<24 @ seen non hi-prio tile + tst r8, #(1<<24) @ seen non hi-prio tile ldreq r1, =rendstatus mov r0, #0 ldreq r2, [r1] @@ -896,7 +896,7 @@ DrawTilesFromCache: @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -.global DrawSpritesFromCache @ int *hc, int sh +.global DrawSpritesFromCache @ int *hc, int maxwidth, int prio, int sh DrawSpritesFromCache: stmfd sp!, {r4-r11,lr} @@ -911,7 +911,7 @@ DrawSpritesFromCache: mov r12,#0xf .endif ldr lr, =(Pico+0x10000) @ lr=Pico.vram - mov r6, r1, lsl #31 + mov r6, r3, lsl #31 orr r6, r6, #1<<30 mov r10, r0 @@ -1032,11 +1032,12 @@ DrawSpritesFromCache: @ + 0 : hhhhvvvv ab--hhvv yyyyyyyy yyyyyyyy // a: offscreen h, b: offs. v, h: horiz. size @ + 4 : xxxxxxxx xxxxxxxx pccvhnnn nnnnnnnn // x: x coord + 8 -.global DrawSprite @ unsigned int *sprite, int **hc, int sh +.global DrawSprite @ unsigned int *sprite, int **hc, int sh, int acc_sprites DrawSprite: stmfd sp!, {r4-r9,r11,lr} + orr r8, r3, r2, lsl #4 ldr r3, [r0] @ sprite[0] ldr r7, =Scanline mov r6, r3, lsr #28 @@ -1050,10 +1051,10 @@ DrawSprite: ldr r9, [r0, #4] sub r7, r7, r4, asr #16 @ r7=row=Scanline-sy - tst r2, r2 mov r2, r9, asr #16 @ r2=sx - bic r9, r9, #0xfe000000 - orrne r9, r9, #1<<31 @ r9=code|(sh<<31) + mov r9, r9, lsl #16 + mov r9, r9, lsr #16 + orr r9, r9, r8, lsl #27 @ r9=code|sh[31]|as[27] tst r9, #0x1000 movne r4, r5, lsl #3 @@ -1075,6 +1076,7 @@ DrawSprite: tst r9, #0x8000 bne .dspr_cache @ if(code&0x8000) // high priority - cache it +.dspr_continue: @ cache some stuff to avoid mem access .if OVERRIDE_HIGHCOL ldr r11,=HighCol @@ -1089,11 +1091,10 @@ DrawSprite: mov r5, r5, lsl #4 @ delta<<=4; // Delta of address and r4, r9, #0x6000 orr r9, r9, r4, lsl #16 - orr r9, r9, #0x10000000 @ r9=scc1 ???? ... (s=shadow/hilight, cc=pal) + orrs r9, r9, #0x10000000 @ r9=scc1 a??? ... (s=shadow/hilight, cc=pal, a=acc_spr) - tst r9, #1<<31 mov r3, r4, lsr #9 @ r3=pal=((code>>9)&0x30); - orrne r3, r3, #0x40 @ shadow by default + orrmi r3, r3, #0x40 @ shadow by default add r6, r6, #1 @ inc now adds r0, r2, #0 @ mov sx to r0 and set ZV flags @@ -1184,19 +1185,21 @@ DrawSprite: mov r4, r8, lsl #16 @ tile tst r9, #0x0800 orrne r4, r4, #0x10000 @ code&0x0800 - mov r2, r2, lsl #22 - orr r4, r4, r2, lsr #16 @ (sx<<6)&0x0000ffc0 - and r2, r9, #0x6000 - orr r4, r4, r2, lsr #9 @ (code>>9)&0x30 + mov r0, r2, lsl #22 + orr r4, r4, r0, lsr #16 @ (sx<<6)&0x0000ffc0 + and r0, r9, #0x6000 + orr r4, r4, r0, lsr #9 @ (code>>9)&0x30 mov r3, r3, lsl #12 - ldr r2, [r1] + ldr r0, [r1] orr r4, r4, r3, lsr #28 @ (sprite[0]>>24)&0xf - str r4, [r2], #4 - str r2, [r1] + str r4, [r0], #4 + str r0, [r1] - ldmfd sp!, {r4-r9,r11,lr} - bx lr + tst r9, #(1<<27) + ldmeqfd sp!, {r4-r9,r11,lr} + bne .dspr_continue @ draw anyway if accurate sprites enabled + bxeq lr @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@ -1452,23 +1455,29 @@ FinalizeLineBGR444: bne .fl_loopcpBGR444_hi sub r3, r4, #0x40*3*2 + mov r6, #1 .fl_noshBGR444: + ldr r12,=rendstatus + eors r6, r6, #1 @ sh is 0 + ldr r12,[r12] + mov lr, #0xff + tstne r12,#(1<<2) @ and PDRAW_ACC_SPRITES + .if OVERRIDE_HIGHCOL ldr r1, =HighCol - mov lr, #0xff + movne lr, #0x3f ldr r1, [r1] mov lr, lr, lsl #1 add r1, r1, #8 .else ldr r1, =(HighCol+8) - mov lr, #0xff + movne lr, #0x3f mov lr, lr, lsl #1 .endif .fl_loopBGR444: - ldr r12, [r1], #4 subs r2, r2, #1 @@ -1478,11 +1487,10 @@ FinalizeLineBGR444: ldrh r5, [r3, r5] and r6, lr, r12, lsr #15 ldrh r6, [r3, r6] + and r12,lr, r12, lsr #23 + ldrh r12,[r3, r12] @ 1c.i. orr r4, r4, r5, lsl #16 - - and r5, lr, r12, lsr #23 - ldrh r5, [r3, r5] @ 2c.i. - orr r5, r6, r5, lsl #16 + orr r5, r6, r12,lsl #16 stmia r0!, {r4,r5} bne .fl_loopBGR444 @@ -1617,8 +1625,16 @@ FinalizeLineRGB555: bne .fl_loopcpRGB555_hi sub r3, r3, #0x40*2 + mov r6, #1 .fl_noshRGB555: + ldr r12,=rendstatus + eors r6, r6, #1 @ sh is 0 + ldr r12,[r12] + mov lr, #0xff + tstne r12,#(1<<2) @ and PDRAW_ACC_SPRITES + movne lr, #0x3f + .if OVERRIDE_HIGHCOL ldr r1, =HighCol ldr r0, =DrawLineDest @@ -1632,7 +1648,6 @@ FinalizeLineRGB555: .endif ldrb r12, [r8, #12] - mov lr, #0xff mov lr, lr, lsl #1 tst r12, #1 @@ -1647,8 +1662,14 @@ FinalizeLineRGB555: addeq r0, r0, #32*2 .fl_no32colRGB555: -.fl_loopRGB555: +.if UNALIGNED_DRAWLINEDEST + @ this is basically for Gizmondo, which has unaligned odd lines in the framebuffer + tst r0, #2 + bne .fl_RGB555u +.endif + +.fl_loopRGB555: ldr r12, [r1], #4 ldr r7, [r1], #4 @@ -1670,12 +1691,12 @@ FinalizeLineRGB555: ldrh r6, [r3, r6] and r12,lr, r7, lsr #15 ldrh r12,[r3, r12] + and r7, lr, r7, lsr #23 + ldrh r7, [r3, r7] orr r8, r8, r6, lsl #16 - and r6, lr, r7, lsr #23 - ldrh r6, [r3, r6] @ 1 cycle interlock here (r6) subs r2, r2, #1 - orr r12,r12, r6, lsl #16 + orr r12,r12, r7, lsl #16 stmia r0!, {r4,r5,r8,r12} bne .fl_loopRGB555 @@ -1689,6 +1710,11 @@ FinalizeLineRGB555: mov r9, #0x3900 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007 orr r9, r9, #0x00e7 +.if UNALIGNED_DRAWLINEDEST + tst r0, #2 + bne .fl_32scale_RGB555u +.endif + .fl_loop32scale_RGB555: ldr r12, [r1], #4 ldr r7, [r1], #4 @@ -1745,6 +1771,121 @@ FinalizeLineRGB555: ldmfd sp!, {r4-r8,lr} bx lr +.if UNALIGNED_DRAWLINEDEST + @ unaligned versions of loops + @ warning: starts drawing 2bytes before dst + +.fl_RGB555u: + sub r0, r0, #2 @ initial adjustment + mov r8, #0 + +.fl_loopRGB555u: + ldr r12, [r1], #4 + ldr r7, [r1], #4 + + and r6, lr, r12,lsl #1 + ldrh r6, [r3, r6] + and r5, lr, r12,lsr #7 + ldrh r5, [r3, r5] + orr r4, r8, r6, lsl #16 + + and r6, lr, r12,lsr #15 + ldrh r6, [r3, r6] + and r8, lr, r12,lsr #23 + ldrh r8, [r3, r8] + orr r5, r5, r6, lsl #16 + + and r6, lr, r7, lsl #1 + ldrh r6, [r3, r6] + and r12,lr, r7, lsr #7 + ldrh r12,[r3, r12] + orr r6, r8, r6, lsl #16 + + and r8, lr, r7, lsr #15 + ldrh r8, [r3, r8] + and r7, lr, r7, lsr #23 + + subs r2, r2, #1 + orr r12,r12,r8, lsl #16 + ldrh r8, [r3, r7] + + stmia r0!, {r4,r5,r6,r12} + bne .fl_loopRGB555u + + strh r8, [r0], #2 + + ldmfd sp!, {r4-r8,lr} + bx lr + + +.fl_32scale_RGB555u: + sub r0, r0, #2 @ initial adjustment + mov r4, #0 + + @ r9 f800 07e0 001f | e000 0780 001c | 3800 01e0 0007 +.fl_loop32scale_RGB555u: + ldr r12, [r1], #4 + ldr r7, [r1], #4 + + and r6, lr, r12,lsl #1 + ldrh r6, [r3, r6] + and r5, lr, r12,lsr #7 + ldrh r5, [r3, r5] + and r6, r6, r9, lsl #2 + orr r4, r4, r6, lsl #16 @ r4 = pix_d -1, 0 + + and r5, r5, r9, lsl #2 + sub r8, r5, r5, lsr #2 @ r8 = 3/4 pix_s 1 + add r6, r8, r6, lsr #2 @ r6 = (1/4 pix_s 0) + (3/4 pix_s 1) + orr r5, r6, r5, lsl #15 + + and r6, lr, r12,lsr #15 + ldrh r6, [r3, r6] + and r12,lr, r12,lsr #23 + ldrh r12,[r3, r12] + and r6, r6, r9, lsl #2 + add r5, r5, r6, lsl #15 @ r5 = pix_d 1, 2 + + and r8, lr, r7, lsl #1 + ldrh r8, [r3, r8] + and r10,lr, r7, lsr #7 + ldrh r10,[r3, r10] + and r12,r12,r9, lsl #2 + sub r6, r6, r6, lsr #2 @ r6 = 3/4 pix_s 2 + add r6, r6, r12,lsr #2 + orr r6, r6, r12,lsl #16 @ r6 = pix_d 3, 4 + + and r8, r8, r9, lsl #2 + and r10,r10,r9, lsl #2 + sub r12,r10,r10,lsr #2 @ r12 = 3/4 pix_s 5 + orr r8, r8, r8, lsl #14 + add r8, r8, r12,lsl #16 @ r8 = pix_d 5, 6 + and r12,lr, r7, lsr #15 + ldrh r12,[r3, r12] + and r7, lr, r7, lsr #23 + ldrh r7, [r3, r7] + and r12,r12,r9, lsl #2 + add r10,r10,r12 + mov r10,r10, lsr #1 + sub r12,r12,r12,lsr #2 @ r12 = 3/4 pix_s 6 + orr r10,r10,r12,lsl #16 + and r7, r7, r9, lsl #2 + add r10,r10,r7, lsl #14 @ r10 = pix_d 7, 8 + + subs r2, r2, #1 + + stmia r0!, {r4,r5,r6,r8,r10} + mov r4, r7 + bne .fl_loop32scale_RGB555u + + strh r4, [r0], #2 + + ldmfd sp!, {r9,r10} + ldmfd sp!, {r4-r8,lr} + bx lr + +.endif @ UNALIGNED_DRAWLINEDEST + @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@