From 959ea39b232ee01a24a016e8844fc4536254e368 Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 21 Jan 2021 19:09:17 +0100 Subject: [PATCH] vdp renderer, partial sync 8bit fast ARM asm with C code --- pico/draw2.c | 37 ++++++++++++++++--------------- pico/draw2_arm.S | 54 ++++++++++++++++++++++++++++++---------------- pico/pico_int.h | 4 ++-- tools/mkoffsets.sh | 2 ++ 4 files changed, 59 insertions(+), 38 deletions(-) diff --git a/pico/draw2.c b/pico/draw2.c index 9d65ea54..1e27af3d 100644 --- a/pico/draw2.c +++ b/pico/draw2.c @@ -34,7 +34,7 @@ void (*PicoPrepareCram)()=0; // prepares PicoCramHigh for renderer to // stuff available in asm: #ifdef _ASM_DRAW_C -void BackFillFull(void *dst, int reg7); +void BackFillFull(void *dst, int reg7, int lwidth); void DrawLayerFull(int plane, u32 *hcache, int planestart, int planeend, struct PicoEState *est); void DrawTilesFromCacheF(u32 *hc, struct PicoEState *est); @@ -183,7 +183,7 @@ static void DrawWindowFull(int start, int end, int prio, struct PicoEState *est) nametab += nametab_step*(start-scrstart); // check priority - code=PicoMem.vram[nametab+tile_start]; + code=est->PicoMem_vram[nametab+tile_start]; if ((code>>15) != prio) return; // hack: just assume that whole window uses same priority scrpos+=8*est->Draw2Width+8; @@ -197,7 +197,7 @@ static void DrawWindowFull(int start, int end, int prio, struct PicoEState *est) // unsigned short *pal=NULL; unsigned char pal; - code=PicoMem.vram[nametab+tilex]; + code=est->PicoMem_vram[nametab+tilex]; if (code==blank) continue; // Get tile address/2: @@ -250,7 +250,7 @@ static void DrawLayerFull(int plane, u32 *hcache, int planestart, int planeend, if(!(pvid->reg[11]&3)) { // full screen scroll // Get horizontal scroll value - hscroll=PicoMem.vram[htab&0x7fff]; + hscroll=est->PicoMem_vram[htab&0x7fff]; htab = 0; // this marks that we don't have to update scroll value } @@ -297,7 +297,7 @@ static void DrawLayerFull(int plane, u32 *hcache, int planestart, int planeend, if(htab) { int htaddr=htab+(trow<<4); if(trow) htaddr-=(vscroll&7)<<1; - hscroll=PicoMem.vram[htaddr&0x7fff]; + hscroll=est->PicoMem_vram[htaddr&0x7fff]; } // Draw tiles across screen: @@ -323,7 +323,7 @@ static void DrawLayerFull(int plane, u32 *hcache, int planestart, int planeend, #endif vsidx++; - code=PicoMem.vram[nametab_row+(tilex&xmask)]; + code=est->PicoMem_vram[nametab_row+(tilex&xmask)]; if (code==blank) continue; if (code>>15) { // high priority tile @@ -442,18 +442,19 @@ static void DrawSpriteFull(unsigned int *sprite, struct PicoEState *est) pal=(unsigned char)((code>>9)&0x30); // goto first vertically visible tile - while(sy <= scrstart*8) { sy+=8; tile+=tdeltay; height--; } + sy -= scrstart*8; + while(sy <= 0) { sy+=8; tile+=tdeltay; height--; } scrpos = est->Draw2FB; if (est->rendstatus&PDRAW_BORDER_32) scrpos += 32; - scrpos+=(sy-scrstart*8)*est->Draw2Width; + scrpos+=sy*est->Draw2Width; for (; height > 0; height--, sy+=8, tile+=tdeltay) { int w = width, x=sx, t=tile, s; - if((sy-scrstart*8) >= END_ROW*8+8) return; // offscreen + if(sy >= END_ROW*8+8) return; // offscreen for (; w; w--,x+=8,t+=tdeltax) { @@ -502,7 +503,7 @@ static void DrawAllSpritesFull(int prio, int maxwidth, struct PicoEState *est) unsigned int *sprite=NULL; int code, code2, sx, sy, height; - sprite=(u32 *)(PicoMem.vram+((table+(link<<2))&0x7ffc)); // Find sprite + sprite=(u32 *)(est->PicoMem_vram+((table+(link<<2))&0x7ffc)); // Find sprite // get sprite info code = sprite[0]; @@ -559,16 +560,18 @@ static void DrawAllSpritesFull(int prio, int maxwidth, struct PicoEState *est) } #ifndef _ASM_DRAW_C -static void BackFillFull(void *dst, int reg7) +static void BackFillFull(unsigned char *dst, int reg7, int lwidth) { unsigned int back; + int i; // Start with a background color: back=reg7&0x3f; back|=back<<8; back|=back<<16; - memset32(dst, back, Pico.est.Draw2Width*(8+(END_ROW-START_ROW)*8)/4); + for (i = 0, dst += 8*lwidth; i < (END_ROW-START_ROW)*8; i++, dst += lwidth) + memset32(dst+8, back, 320/4); } #endif @@ -589,19 +592,19 @@ static void DrawDisplayFull(void) } if(est->rendstatus & PDRAW_30_ROWS) { // In 240 line mode, the top and bottom 8 lines are omitted - // since this renderer always renderers 224 lines + // since this renderer always renders 224 lines scrstart ++, scrend ++; } est->Draw2Start = scrstart; - planestart = scrstart, planeend = scrend; - winstart = scrstart, winend = scrend; - // 32C border for centering? (for asm) est->rendstatus &= ~PDRAW_BORDER_32; if ((est->rendstatus&PDRAW_32_COLS) && !(PicoIn.opt&POPT_DIS_32C_BORDER)) est->rendstatus |= PDRAW_BORDER_32; + planestart = scrstart, planeend = scrend; + winstart = scrstart, winend = scrend; + // horizontal window? if ((win=pvid->reg[0x12])) { @@ -716,7 +719,7 @@ PICO_INTERNAL void PicoFrameFull() if (PicoPrepareCram) PicoPrepareCram(); // Draw screen: - BackFillFull(Pico.est.Draw2FB, Pico.video.reg[7]); + BackFillFull(Pico.est.Draw2FB, Pico.video.reg[7], Pico.est.Draw2Width); if (Pico.video.reg[1] & 0x40) DrawDisplayFull(); diff --git a/pico/draw2_arm.S b/pico/draw2_arm.S index ded0d5a5..3101a823 100644 --- a/pico/draw2_arm.S +++ b/pico/draw2_arm.S @@ -6,10 +6,17 @@ * See COPYING file in the top-level directory. * * this is highly specialized, be careful if changing related C code! + * + * NB: this only deals with buffers having line width at 328 */ #include "pico_int_offs.h" +.equ PDRAW_INTERLACE, (1<<3) +.equ PDRAW_32_COLS, (1<<8) +.equ PDRAW_BORDER_32, (1<<9) +.equ PDRAW_30_ROWS, (1<<11) + @ define these constants in your include file: @ .equiv START_ROW, 1 @ .equiv END_ROW, 27 @@ -24,14 +31,17 @@ .text .align 2 -@ void BackFillFull(void *dst, int reg7) +@ void BackFillFull(unsigned char *dst, int reg7, int lwidth) .global BackFillFull BackFillFull: - stmfd sp!, {r4-r9,lr} + stmfd sp!, {r4-r10,lr} + + sub r10,r2, #320 @ unused bytes in a line + add lr, r0, #8 @ 8 px overlap area at start of line + add lr, lr, r2, lsl #3 @ 8 lines overlap area at top - add lr, r0, #328*8 mov r0, r1, lsl #26 mov r0, r0, lsr #26 @@ -52,7 +62,6 @@ BackFillFull: @ go go go! .bff_loop: - add lr, lr, #8 subs r12, r12, #1 stmia lr!, {r0-r9} @ 10*4*8 @@ -64,9 +73,10 @@ BackFillFull: stmia lr!, {r0-r9} stmia lr!, {r0-r9} + add lr, lr, r10 @ skip unused rest of line bne .bff_loop - ldmfd sp!, {r4-r9,lr} + ldmfd sp!, {r4-r10,lr} bx lr .pool @@ -413,14 +423,15 @@ DrawLayerFull: orr lr, lr, r4, lsl #13 @ lr|=nametab_bits{3}<<13 ldr r11,[sp, #9*4] @ est - sub r4, r9, #(START_ROW<<24) + ldr r4, [r11, #OFS_EST_Draw2Start] ldr r7, [r11, #OFS_EST_rendstatus] ldr r11, [r11, #OFS_EST_Draw2FB] - tst r7, #0x100 @ H32 border mode? + sub r4, r9, r4, lsl #24 + tst r7, #PDRAW_BORDER_32 @ H32 border mode? addne r11, r11, #32 mov r4, r4, asr #24 mov r7, #328*8 - mla r11, r4, r7, r11 @ scrpos+=8*328*(planestart-START_ROW); + mla r11, r4, r7, r11 @ scrpos+=8*328*(planestart-Draw2Start); @ Get vertical scroll value: add_c24 r7, r10, (OFS_PMEM_vsram-OFS_PMEM_vram) @@ -588,15 +599,16 @@ DrawLayerFull: .global DrawTilesFromCacheF DrawTilesFromCacheF: - stmfd sp!, {r4-r10,lr} + stmfd sp!, {r4-r11,lr} mov r9, #0xff000000 @ r9=prevcode=-1 mvn r6, #0 @ r6=prevy=-1 ldr r7, [r1, #OFS_EST_rendstatus] ldr r4, [r1, #OFS_EST_Draw2FB] + ldr r11,[r1, #OFS_EST_Draw2Start] ldr r2, [r0], #4 @ read y offset - tst r7, #0x100 @ H32 border mode? + tst r7, #PDRAW_BORDER_32 @ H32 border mode? addne r4, r4, #32 mov r7, #328 mla r2, r7, r2, r4 @@ -612,13 +624,14 @@ DrawTilesFromCacheF: .dtfcf_loop: ldr r7, [r8], #4 @ read code movs r1, r7, lsr #16 @ r1=dx; - ldmeqfd sp!, {r4-r10,pc} @ dx is never zero, this must be a terminator, return + ldmeqfd sp!, {r4-r11,pc} @ dx is never zero, this must be a terminator, return @ row changed? cmp r6, r7, lsr #27 movne r6, r7, lsr #27 + subne r6, r6, r11 movne r4, #328*8 - mlane r5, r4, r6, r12 @ r5=pd = scrpos + prevy*328*8 + mlane r5, r4, r6, r12 @ r5=pd = scrpos + (prevy-Draw2Start)*328*8 bic r1, r1, #0xf800 add r1, r5, r1 @ r1=pdest (halfwords) @@ -695,6 +708,7 @@ DrawWindowFull: ldr r4, [r11, #OFS_Pico_video_reg+12] mov r5, #1 @ nametab_step ldr r11, [r3, #OFS_EST_Draw2FB] + ldr r6, [r3, #OFS_EST_Draw2Start] tst r4, #1 @ 40 cell mode? andne r12, r12, #0xf000 @ 0x3c<<10 movne r5, r5, lsl #7 @@ -702,11 +716,12 @@ DrawWindowFull: ldr r7, [r3, #OFS_EST_rendstatus] and r12, r12, #0xf800 mov r5, r5, lsl #6 @ nametab_step - tst r7, #0x100 + tst r7, #PDRAW_BORDER_32 addne r11, r11, #32 @ center screen in H32 mode 0: and r4, r0, #0xff - mla r12, r5, r4, r12 @ nametab += nametab_step*start; + sub r4, r4, r6 + mla r12, r5, r4, r12 @ nametab += nametab_step*(start-Draw2Start]; ldr r10, [r3, #OFS_EST_PicoMem_vram] mov r4, r0, lsr #16 @ r4=start_cell_h @@ -728,11 +743,11 @@ DrawWindowFull: and r4, r0, #0xff add r11, r11, #328*8 - sub r4, r4, #START_ROW + sub r4, r4, r6 add r11, r11, #8 mov r7, #328*8 - mla r11, r7, r4, r11 @ scrpos+=8*328*(start-START_ROW); + mla r11, r7, r4, r11 @ scrpos+=8*328*(start-Draw2Start); mov r0, #0xf .dwfloop_outer: @@ -927,12 +942,13 @@ DrawSpriteFull: ldr r0, [r1, #OFS_EST_rendstatus] ldr r11, [r1, #OFS_EST_Draw2FB] + ldr r2, [r1, #OFS_EST_Draw2Start] ldr r10, [r1, #OFS_EST_PicoMem_vram] - tst r0, #0x100 @ H32 border mode? + tst r0, #PDRAW_BORDER_32 @ H32 border mode? addne r11, r11, #32 - sub r1, r12, #(START_ROW*8) + sub r12, r12, r2, lsl #3 mov r0, #328 - mla r11, r1, r0, r11 @ scrpos+=(sy-START_ROW*8)*328; + mla r11, r12, r0, r11 @ scrpos+=(sy-Draw2Start*8)*328; orr r5, r5, r5, lsl #16 @ orr r5, r6, r5, lsl #8 @ r5=width|(height<<8)|(height<<24) diff --git a/pico/pico_int.h b/pico/pico_int.h index d66b67e2..26fb60a1 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -355,8 +355,8 @@ struct PicoEState unsigned char *HighCol; u32 *HighPreSpr; struct Pico *Pico; - void *PicoMem_vram; - void *PicoMem_cram; + unsigned short *PicoMem_vram; + unsigned short *PicoMem_cram; unsigned int *PicoOpt; unsigned char *Draw2FB; int Draw2Width; diff --git a/tools/mkoffsets.sh b/tools/mkoffsets.sh index c347587c..e45567f5 100755 --- a/tools/mkoffsets.sh +++ b/tools/mkoffsets.sh @@ -144,6 +144,8 @@ get_define OFS_EST_ PicoEState PicoMem_vram ; echo "$line" >>$fn get_define OFS_EST_ PicoEState PicoMem_cram ; echo "$line" >>$fn get_define OFS_EST_ PicoEState PicoOpt ; echo "$line" >>$fn get_define OFS_EST_ PicoEState Draw2FB ; echo "$line" >>$fn +get_define OFS_EST_ PicoEState Draw2Width ; echo "$line" >>$fn +get_define OFS_EST_ PicoEState Draw2Start ; echo "$line" >>$fn get_define OFS_EST_ PicoEState HighPal ; echo "$line" >>$fn get_define OFS_PMEM_ PicoMem vram ; echo "$line" >>$fn -- 2.39.2