From 52e4a905c846cedafff6c7623c6ecf505d47c275 Mon Sep 17 00:00:00 2001 From: kub Date: Mon, 22 Nov 2021 19:18:12 +0100 Subject: [PATCH] 32x, add support for h32 mode rendering --- pico/32x/32x.c | 3 +- pico/32x/draw.c | 20 ++++++------ pico/32x/draw_arm.S | 2 +- pico/draw.c | 10 ++++-- pico/draw_arm.S | 74 +++++++++++++++++++++++++++++++++++---------- pico/pico.h | 1 + 6 files changed, 79 insertions(+), 31 deletions(-) diff --git a/pico/32x/32x.c b/pico/32x/32x.c index a34e8075..a674fb6f 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -230,7 +230,6 @@ static void p32x_start_blank(void) // XXX: no proper handling of 32col mode.. if ((Pico32x.vdp_regs[0] & P32XV_Mx) != 0 && // 32x not blanking - (Pico.video.reg[12] & 1) && // 40col mode (!(Pico.video.debug_p & PVD_KILL_32X))) { int md_bg = Pico.video.reg[7] & 0x3f; @@ -238,7 +237,7 @@ static void p32x_start_blank(void) // we draw full layer (not line-by-line) PicoDraw32xLayer(offs, lines, md_bg); } - else if (Pico32xDrawMode != PDM32X_32X_ONLY) + else if (Pico32xDrawMode == PDM32X_BOTH) PicoDraw32xLayerMdOnly(offs, lines); pprof_end(draw); diff --git a/pico/32x/draw.c b/pico/32x/draw.c index 5143af88..adc51659 100644 --- a/pico/32x/draw.c +++ b/pico/32x/draw.c @@ -8,6 +8,14 @@ */ #include "../pico_int.h" +// NB: 32X officially doesn't support H32 mode. However, it does work since the +// cartridge slot carries the EDCLK signal which is always H40 clock and is used +// as video clock by the 32X. The H32 MD image is overlayed with the 320 px 32X +// image which has the same on-screen width. How the /YS signal on the cartridge +// slot (signalling the display of background color) is processed in this case +// is however unclear and might lead to glitches due to race conditions by the +// different video clocks for H32 and H40. + // BGR555 to native conversion #if defined(USE_BGR555) #define PXCONV(t) ((t)&(mr|mg|mb|mp)) @@ -122,8 +130,6 @@ void FinalizeLine32xRGB555(int sh, int line, struct PicoEState *est) FinalizeLine555(sh, line, est); if ((Pico32x.vdp_regs[0] & P32XV_Mx) == 0 || // 32x blanking - // XXX: how is 32col mode handled by real hardware? - !(Pico.video.reg[12] & 1) || // 32col mode (Pico.video.debug_p & PVD_KILL_32X)) { return; @@ -310,14 +316,6 @@ void PicoDraw32xLayerMdOnly(int offs, int lines) int poffs = 0, plen = 320; int l, p; - if (!(Pico.video.reg[12] & 1)) { - // 32col mode. for some render modes MD pixel data carries an offset - if (!(PicoIn.opt & POPT_DIS_32C_BORDER)) - pmd += 32; - poffs = 32; - plen = 256; - } - PicoDrawUpdateHighPal(); dst += poffs; @@ -350,6 +348,8 @@ void PicoDrawSetOutFormat32x(pdso_t which, int use_32x_line_mode) PicoDrawSetInternalBuf(NULL, 0); PicoDrawSetOutBufMD(Pico.est.Draw2FB, 328); } + // always need upscaling for H32, before mixing in 32X layer + PicoIn.opt |= POPT_EN_SOFTSCALE; if (use_32x_line_mode) // we'll draw via FinalizeLine32xRGB555 (rare) diff --git a/pico/32x/draw_arm.S b/pico/32x/draw_arm.S index e226d2a5..5c19329f 100644 --- a/pico/32x/draw_arm.S +++ b/pico/32x/draw_arm.S @@ -57,7 +57,7 @@ add r0, r0, r4 mov lr, pc ldr pc, [sp, #(4+1)*4] - ldr r1, [sp, #(3+2)*4] @ &Pico.est + ldr r1, [sp, #(4+2)*4] @ &Pico.est ldr r0, [r1, #OFS_EST_DrawLineDest] ldr r2, [r1, #OFS_EST_DrawLineDestIncr] add r0, r0, r2 diff --git a/pico/draw.c b/pico/draw.c index 6dce27c9..bf4ef38e 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -1652,14 +1652,18 @@ void FinalizeLine555(int sh, int line, struct PicoEState *est) else len = 256; if ((*est->PicoOpt & POPT_EN_SOFTSCALE) && len < 320) { - if (len == 256) + if (len == 256) { switch (PicoIn.filter) { case 3: h_upscale_bl4_4_5(pd, 320, ps, 256, len, f_pal); break; case 2: h_upscale_bl2_4_5(pd, 320, ps, 256, len, f_pal); break; case 1: h_upscale_snn_4_5(pd, 320, ps, 256, len, f_pal); break; default: h_upscale_nn_4_5(pd, 320, ps, 256, len, f_pal); break; } - else if (len == 160) + if (est->rendstatus & PDRAW_32X_SCALE) { // 32X needs scaled CLUT data + unsigned char *psc = ps - 256, *pdc = psc; + rh_upscale_nn_4_5(pdc, 320, psc, 256, 256, f_nop); + } + } else if (len == 160) switch (PicoIn.filter) { case 3: case 2: h_upscale_bl2_1_2(pd, 320, ps, 160, len, f_pal); break; @@ -1886,6 +1890,8 @@ PICO_INTERNAL void PicoFrameStart(void) Pico.est.rendstatus |= PDRAW_SKIP_FRAME; if (sprep | skipped) Pico.est.rendstatus |= PDRAW_PARSE_SPRITES; + if (PicoIn.AHW & PAHW_32X) + Pico.est.rendstatus |= PDRAW_32X_SCALE; Pico.est.HighCol = HighColBase + loffs * HighColIncrement; Pico.est.DrawLineDest = (char *)DrawLineDestBase + loffs * DrawLineDestIncrement; diff --git a/pico/draw_arm.S b/pico/draw_arm.S index a9915635..11d01716 100644 --- a/pico/draw_arm.S +++ b/pico/draw_arm.S @@ -21,6 +21,7 @@ .equ PDRAW_DIRTY_SPRITES, (1<<4) .equ PDRAW_PLANE_HI_PRIO, (1<<6) .equ PDRAW_SHHI_DONE, (1<<7) +.equ PDRAW_32X_SCALE, (1<<12) @ helpers .macro add_c24 d s c @@ -1633,20 +1634,20 @@ PicoDoHighPal555_end: .global FinalizeLine555 FinalizeLine555: - stmfd sp!, {r4-r10,lr} - mov r10,r2 @ est - ldr r8, [r10, #OFS_EST_Pico] + stmfd sp!, {r4-r11,lr} + mov r11,r2 @ est + ldr r8, [r11, #OFS_EST_Pico] bl PicoDrawUpdateHighPal - add r3, r10, #OFS_EST_HighPal + add r3, r11, #OFS_EST_HighPal mov lr, #0xff mov lr, lr, lsl #1 - ldr r5, [r10, #OFS_EST_PicoOpt] - ldr r1, [r10, #OFS_EST_HighCol] - ldr r0, [r10, #OFS_EST_DrawLineDest] + ldr r5, [r11, #OFS_EST_PicoOpt] + ldr r1, [r11, #OFS_EST_HighCol] + ldr r0, [r11, #OFS_EST_DrawLineDest] ldr r4, [r5] ldr r7, [r5, #OFS_PicoIn_AHW-OFS_PicoIn_opt] ldrb r12,[r8, #OFS_Pico_video_reg+12] @@ -1716,7 +1717,7 @@ FinalizeLine555: stmia r0!, {r4,r5,r8,r12} bne .fl_loopRGB555 - ldmfd sp!, {r4-r10,pc} + ldmfd sp!, {r4-r11,pc} .fl_32scale_RGB555: @@ -1773,7 +1774,7 @@ FinalizeLine555: stmia r0!, {r4,r5,r6,r8,r10} bne .fl_32scale_nn - ldmfd sp!, {r4-r10,pc} + b .fl_32scale_8bit .fl_32scale_snn: ldr r12, [r1], #4 @@ -1822,7 +1823,7 @@ FinalizeLine555: stmia r0!, {r4,r5,r6,r8,r10} bne .fl_32scale_snn - ldmfd sp!, {r4-r10,pc} + b .fl_32scale_8bit .fl_32scale_bl2: ldr r12, [r1], #4 @@ -1878,7 +1879,7 @@ FinalizeLine555: stmia r0!, {r4,r5,r6,r8,r10} bne .fl_32scale_bl2 - ldmfd sp!, {r4-r10,pc} + b .fl_32scale_8bit .fl_32scale_bl4: // TODO this should reflect the bl4 C algorithm, but it doesn't, it's bln. @@ -1946,7 +1947,48 @@ FinalizeLine555: stmia r0!, {r4,r5,r6,r8,r10} bne .fl_32loop_bl4 - ldmfd sp!, {r4-r10,pc} +.fl_32scale_8bit: + ldr r4, [r11, #OFS_EST_rendstatus] + add r0, r1, #320-256 + mov r2, #256/8 + tst r4, #PDRAW_32X_SCALE + ldmeqfd sp!, {r4-r11,pc} + mov lr, #0xff + +.fl_32scale_8bit_nn: + ldr r7, [r1, #-4]! + ldr r12, [r1, #-4]! + + and r4, lr, r12, lsl #0 + and r5, lr, r12, lsr #8 + and r6, lr, r12, lsr #16 + and r10,lr, r12, lsr #24 + + orr r4, r4, r5, lsl #8 + orr r5, r6, r6, lsl #8 + + and r6, lr, r7, lsl #0 + and r8, lr, r7, lsr #8 + and r12,lr, r7, lsr #16 + and r7, lr, r7, lsr #24 + + orr r6, r10,r6, lsl #8 + orr r8, r8,r12, lsl #8 + + subs r2, r2, #1 + + orr r10,r12,r7, lsl #8 + + strh r10, [r0, #-2]! + strh r8, [r0, #-2]! + strh r6, [r0, #-2]! + strh r5, [r0, #-2]! + strh r4, [r0, #-2]! + + bne .fl_32scale_8bit_nn + + ldmfd sp!, {r4-r11,pc} + .fl_20scale_RGB555: ldr r5, [r5, #OFS_PicoIn_filter-OFS_PicoIn_opt] @@ -2002,7 +2044,7 @@ FinalizeLine555: subs r2, r2, #1 bne .fl_20scale_nn - ldmfd sp!, {r4-r10,pc} + ldmfd sp!, {r4-r11,pc} .fl_20scale_bl2: @@ -2082,7 +2124,7 @@ FinalizeLine555: stmia r0!, {r4,r5,r6,r8} bne .fl_20loop_bl2 - ldmfd sp!, {r4-r10,pc} + ldmfd sp!, {r4-r11,pc} #ifdef UNALIGNED_DRAWLINEDEST @@ -2128,7 +2170,7 @@ FinalizeLine555: strh r8, [r0], #2 - ldmfd sp!, {r4-r10,pc} + ldmfd sp!, {r4-r11,pc} .fl_32scale_RGB555u: @@ -2193,7 +2235,7 @@ FinalizeLine555: strh r4, [r0], #2 - ldmfd sp!, {r4-r10,pc} + ldmfd sp!, {r4-r11,pc} #endif /* UNALIGNED_DRAWLINEDEST */ diff --git a/pico/pico.h b/pico/pico.h index 51021070..b35ce2fe 100644 --- a/pico/pico.h +++ b/pico/pico.h @@ -223,6 +223,7 @@ void PicoDoHighPal555(int sh, int line, struct PicoEState *est); #define PDRAW_BORDER_32 (1<<9) // center H32 in buffer (32 px border) #define PDRAW_SKIP_FRAME (1<<10) // frame is skipped #define PDRAW_30_ROWS (1<<11) // 30 rows mode (240 lines) +#define PDRAW_32X_SCALE (1<<12) // scale CLUT layer for 32X extern int rendstatus_old; extern int rendlines; -- 2.39.5