From: kub Date: Fri, 10 Sep 2021 16:46:05 +0000 (+0200) Subject: sdl, complete overhaul of hardware/software scaling X-Git-Tag: v2.00~495 X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d5d1778252407f7110a868d2aad669905d6b4b6b;p=picodrive.git sdl, complete overhaul of hardware/software scaling --- diff --git a/Makefile b/Makefile index b0cc83ec..56dba332 100644 --- a/Makefile +++ b/Makefile @@ -210,7 +210,7 @@ endif ifeq "$(USE_FRONTEND)" "1" # common -OBJS += platform/common/main.o platform/common/emu.o \ +OBJS += platform/common/main.o platform/common/emu.o platform/common/upscale.o \ platform/common/menu_pico.o platform/common/config_file.o # libpicofe diff --git a/pico/32x/draw.c b/pico/32x/draw.c index cc98a692..5143af88 100644 --- a/pico/32x/draw.c +++ b/pico/32x/draw.c @@ -122,7 +122,7 @@ void FinalizeLine32xRGB555(int sh, int line, struct PicoEState *est) FinalizeLine555(sh, line, est); if ((Pico32x.vdp_regs[0] & P32XV_Mx) == 0 || // 32x blanking - // XXX: how is 32col mode hadled by real hardware? + // XXX: how is 32col mode handled by real hardware? !(Pico.video.reg[12] & 1) || // 32col mode (Pico.video.debug_p & PVD_KILL_32X)) { @@ -162,6 +162,7 @@ void FinalizeLine32xRGB555(int sh, int line, struct PicoEState *est) #define PICOSCAN_POST \ PicoScan32xEnd(l + (lines_sft_offs & 0xff)); \ + Pico.est.DrawLineDest = (char *)Pico.est.DrawLineDest + DrawLineDestIncrement32x; \ #define make_do_loop(name, pre_code, post_code, md_code) \ /* Direct Color Mode */ \ @@ -260,6 +261,7 @@ void PicoDraw32xLayer(int offs, int lines, int md_bg) int which_func; Pico.est.DrawLineDest = (char *)DrawLineDestBase32x + offs * DrawLineDestIncrement32x; + Pico.est.DrawLineDestIncr = DrawLineDestIncrement32x; dram = Pico32xMem->dram[Pico32x.vdp_regs[0x0a/2] & P32XV_FS]; if (Pico32xDrawMode == PDM32X_BOTH) @@ -330,7 +332,7 @@ void PicoDraw32xLayerMdOnly(int offs, int lines) dst[p + 2] = pal[*pmd++]; dst[p + 3] = pal[*pmd++]; } - dst = (void *)((char *)dst + DrawLineDestIncrement32x); + dst = Pico.est.DrawLineDest = (char *)dst + DrawLineDestIncrement32x; pmd += 328 - plen; if (have_scan) PicoScan32xEnd(l + offs); diff --git a/pico/32x/draw_arm.S b/pico/32x/draw_arm.S index cdcb211a..8d6ee738 100644 --- a/pico/32x/draw_arm.S +++ b/pico/32x/draw_arm.S @@ -20,11 +20,11 @@ PIC_LDR_INIT() -.macro call_scan_prep cond est @ &Pico.est +.macro call_scan_prep cond pico @ &Pico .if \cond PIC_LDR(r4, r6, PicoScan32xBegin) PIC_LDR(r5, r6, PicoScan32xEnd) - ldr r6, [\est, #OFS_EST_DrawLineDest] + add r6, \pico, #OFS_Pico_est ldr r4, [r4] ldr r5, [r5] stmfd sp!, {r4,r5,r6} @@ -44,9 +44,9 @@ add r0, r0, r4 mov lr, pc ldr pc, [sp, #(3+0)*4] - ldr r0, [sp, #(3+2)*4] @ &DrawLineDest + ldr r0, [sp, #(3+2)*4] @ &Pico.est ldmfd sp!, {r1-r3} - ldr r0, [r0] + ldr r0, [r0, #OFS_EST_DrawLineDest] .endif .endm @@ -57,6 +57,11 @@ add r0, r0, r4 mov lr, pc ldr pc, [sp, #(4+1)*4] + ldr r1, [sp, #(3+2)*4] @ &Pico.est + ldr r0, [r1, #OFS_EST_DrawLineDest] + ldr r2, [r1, #OFS_EST_DrawLineDestIncr] + add r0, r0, r2 + str r0, [r1] ldmfd sp!, {r0-r3} .endif .endm @@ -71,6 +76,7 @@ PIC_LDR(lr, r9, Pico) PIC_LDR(r10,r9, Pico32x) ldr r11, [lr, #OFS_Pico_est+OFS_EST_Draw2FB] + ldr r12, [lr, #OFS_Pico_est+OFS_EST_DrawLineDestIncr] ldrh r10,[r10, #0x40] @ Pico32x.vdp_regs[0] add r9, lr, #OFS_Pico_est+OFS_EST_HighPal @ palmd @@ -88,6 +94,9 @@ 0: @ loop_outer: call_scan_end \call_scan + ldr r12, [r9, #OFS_EST_DrawLineDestIncr-OFS_EST_HighPal] + sub r0, r0, #320*2 + add r0, r0, r12 add r4, r4, #1 cmp r4, r2, lsr #16 call_scan_fin_ge \call_scan @@ -231,7 +240,7 @@ PIC_LDR(lr, r9, Pico) PIC_LDR(r10,r9, Pico32xMem) - ldr r9,=OFS_PMEM32x_pal_native + ldr r9, =OFS_PMEM32x_pal_native ldr r10, [r10] ldr r11, [lr, #OFS_Pico_est+OFS_EST_Draw2FB] add r10,r10,r9 @@ -248,6 +257,9 @@ 0: @ loop_outer: call_scan_end \call_scan + ldr r12, [r9, #OFS_EST_DrawLineDestIncr-OFS_EST_HighPal] + sub r0, r0, #320*2 + add r0, r0, r12 add r4, r4, #1 cmp r4, r2, lsr #16 call_scan_fin_ge \call_scan @@ -400,7 +412,7 @@ PIC_LDR(lr, r9, Pico) PIC_LDR(r10,r9, Pico32xMem) - ldr r9,=OFS_PMEM32x_pal_native + ldr r9, =OFS_PMEM32x_pal_native ldr r10, [r10] ldr r11, [lr, #OFS_Pico_est+OFS_EST_Draw2FB] add r10,r10,r9 @@ -417,6 +429,9 @@ 0: @ loop_outer: call_scan_end \call_scan + ldr r12, [r9, #OFS_EST_DrawLineDestIncr-OFS_EST_HighPal] + sub r0, r0, #320*2 + add r0, r0, r12 add r4, r4, #1 cmp r4, r2, lsr #16 call_scan_fin_ge \call_scan diff --git a/pico/draw.c b/pico/draw.c index 7e279dbf..54e48512 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -44,6 +44,8 @@ */ #include "pico_int.h" +#include + #define FORCE // layer forcing via debug register? int (*PicoScanBegin)(unsigned int num) = NULL; @@ -1407,7 +1409,7 @@ static NOINLINE void PrepareSprites(int max_lines) if (!(Pico.video.reg[12]&1)) max_sprites = 64, max_line_sprites = 16, max_width = 264; - if (PicoIn.opt & POPT_DIS_SPRITE_LIM) + if (*est->PicoOpt & POPT_DIS_SPRITE_LIM) max_line_sprites = MAX_LINE_SPRITES; sh = Pico.video.reg[0xC]&8; // shadow/hilight? @@ -1643,20 +1645,20 @@ void FinalizeLine555(int sh, int line, struct PicoEState *est) if (Pico.video.reg[12]&1) { len = 320; } else { - if (!(PicoIn.opt&POPT_DIS_32C_BORDER)) pd+=32; len = 256; } - { -#if 1 - int i; - - for (i = len; i > 0; i-=4) { - *pd++ = pal[*ps++]; - *pd++ = pal[*ps++]; - *pd++ = pal[*ps++]; - *pd++ = pal[*ps++]; + if ((*est->PicoOpt & POPT_EN_SOFTSCALE) && len == 256) { + switch (PicoIn.filter) { + case 3: h_upscale_bl4_4_5(pd, 320, ps, 256, 256, f_pal); break; + case 2: h_upscale_bl2_4_5(pd, 320, ps, 256, 256, f_pal); break; + case 1: h_upscale_snn_4_5(pd, 320, ps, 256, 256, f_pal); break; + default: h_upscale_nn_4_5(pd, 320, ps, 256, 256, f_pal); break; } + } else { + if (!(*est->PicoOpt & POPT_DIS_32C_BORDER) && len == 256) pd += 32; +#if 1 + h_copy(pd, 320, ps, 320, len, f_pal); #else extern void amips_clut(unsigned short *dst, unsigned char *src, unsigned short *pal, int count); extern void amips_clut_6bit(unsigned short *dst, unsigned char *src, unsigned short *pal, int count); @@ -1691,19 +1693,32 @@ static void FinalizeLine8bit(int sh, int line, struct PicoEState *est) if (Pico.video.reg[12]&1) { len = 320; } else { - if (!(PicoIn.opt & POPT_DIS_32C_BORDER)) - pd += 32; len = 256; } - if (DrawLineDestIncrement == 0) { + if ((PicoIn.opt & POPT_EN_SOFTSCALE) && len == 256) { + unsigned char *ps = est->HighCol+8; + unsigned char pal = 0; + + if (!sh && (est->rendstatus & PDRAW_SONIC_MODE)) + pal = est->SonicPalCount*0x40; + if (DrawLineDestIncrement == 0) + pd = est->HighCol+8; + // Smoothing can't be used with CLUT, hence it's always Nearest Neighbour. + // use reverse version since src and dest ptr may be the same. + rh_upscale_nn_4_5(pd, 320, ps, 256, len, f_or); + } else if (DrawLineDestIncrement == 0) { if (!sh && (est->rendstatus & PDRAW_SONIC_MODE)) - blockcpy_or(pd+8, est->HighCol+8, len, est->SonicPalCount*0x40); - } else if (!sh && (est->rendstatus & PDRAW_SONIC_MODE)) { - // select active backup palette - blockcpy_or(pd, est->HighCol+8, len, est->SonicPalCount*0x40); + blockcpy_or(est->HighCol+8, est->HighCol+8, len, est->SonicPalCount*0x40); } else { - blockcpy(pd, est->HighCol+8, len); + if (!(PicoIn.opt & POPT_DIS_32C_BORDER)) + pd += 32; + if (!sh && (est->rendstatus & PDRAW_SONIC_MODE)) { + // select active backup palette + blockcpy_or(pd, est->HighCol+8, len, est->SonicPalCount*0x40); + } else { + blockcpy(pd, est->HighCol+8, len); + } } } @@ -1828,7 +1843,7 @@ static int DrawDisplay(int sh) // MUST be called every frame PICO_INTERNAL void PicoFrameStart(void) { - int offs = 8, lines = 224; + int loffs = 8, lines = 224, coffs = 0, columns = 320; int dirty = ((Pico.est.rendstatus & PDRAW_SONIC_MODE) || Pico.m.dirtyPal); int sprep = Pico.est.rendstatus & (PDRAW_SPRITES_MOVED|PDRAW_DIRTY_SPRITES); int skipped = Pico.est.rendstatus & PDRAW_SKIP_FRAME; @@ -1837,19 +1852,25 @@ PICO_INTERNAL void PicoFrameStart(void) Pico.est.rendstatus = 0; if ((Pico.video.reg[12] & 6) == 6) Pico.est.rendstatus |= PDRAW_INTERLACE; // interlace mode - if (!(Pico.video.reg[12] & 1)) + if (!(Pico.video.reg[12] & 1)) { Pico.est.rendstatus |= PDRAW_32_COLS; + if (!(PicoIn.opt & POPT_EN_SOFTSCALE)) { + columns = 256; + coffs = 32; + } + } if (Pico.video.reg[1] & 8) { Pico.est.rendstatus |= PDRAW_30_ROWS; - offs = 0; lines = 240; + loffs = 0; } + if (PicoIn.opt & POPT_DIS_32C_BORDER) + coffs = 0; if (Pico.est.rendstatus != rendstatus_old || lines != rendlines) { rendlines = lines; // mode_change() might reset rendstatus_old by calling SetColorFormat - emu_video_mode_change((lines == 240) ? 0 : 8, - lines, (Pico.video.reg[12] & 1) ? 0 : 1); + emu_video_mode_change(loffs, lines, coffs, columns); rendstatus_old = Pico.est.rendstatus; } if (PicoIn.skipFrame) // preserve this until something is rendered at last @@ -1857,8 +1878,8 @@ PICO_INTERNAL void PicoFrameStart(void) if (sprep | skipped) Pico.est.rendstatus |= PDRAW_PARSE_SPRITES; - Pico.est.HighCol = HighColBase + offs * HighColIncrement; - Pico.est.DrawLineDest = (char *)DrawLineDestBase + offs * DrawLineDestIncrement; + Pico.est.HighCol = HighColBase + loffs * HighColIncrement; + Pico.est.DrawLineDest = (char *)DrawLineDestBase + loffs * DrawLineDestIncrement; Pico.est.DrawScanline = 0; skip_next_line = 0; diff --git a/pico/draw_arm.S b/pico/draw_arm.S index e05c2fd4..38d831f6 100644 --- a/pico/draw_arm.S +++ b/pico/draw_arm.S @@ -1653,9 +1653,9 @@ FinalizeLine555: tst r12, #1 movne r2, #320/8 @ len bne .fl_no32colRGB555 - ldr r4, [r10, #OFS_EST_PicoOpt] + ldr r5, [r10, #OFS_EST_PicoOpt] mov r2, #256/8 - ldr r4, [r4] + ldr r4, [r5] tst r4, #0x4000 bne .fl_32scale_RGB555 tst r4, #0x0100 @@ -1705,15 +1705,167 @@ FinalizeLine555: .fl_32scale_RGB555: - mov r9, #0x3900 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007 - orr r9, r9, #0x00e7 + ldr r5, [r5, #OFS_PicoIn_filter-OFS_PicoIn_opt] + + mov r9, #0xf700 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007 + orr r9, r9, #0x00de #ifdef UNALIGNED_DRAWLINEDEST tst r0, #2 bne .fl_32scale_RGB555u #endif -.fl_loop32scale_RGB555: + ands r5, r5, #0x3 + addne pc, pc, r5, lsl #2 + b .fl_32scale_nn + b .fl_32scale_nn + b .fl_32scale_snn + b .fl_32scale_bl2 + b .fl_32scale_bl4 + +.fl_32scale_nn: + ldr r12, [r1], #4 + ldr r7, [r1], #4 + + and r4, lr, r12, lsl #1 + ldrh r4, [r3, r4] + and r5, lr, r12, lsr #7 + ldrh r5, [r3, r5] + and r6, lr, r12, lsr #15 + ldrh r6, [r3, r6] + and r10,lr, r12, lsr #23 + ldrh r10,[r3, r10] + + orr r4, r4, r5, lsl #16 + orr r5, r6, r6, lsl #16 + + and r6, lr, r7, lsl #1 + ldrh r6, [r3, r6] + and r8, lr, r7, lsr #7 + ldrh r8, [r3, r8] + and r12,lr, r7, lsr #15 + ldrh r12,[r3, r12] + and r7, lr, r7, lsr #23 + ldrh r7, [r3, r7] + + orr r6, r10,r6, lsl #16 + orr r8, r8,r12, lsl #16 + + subs r2, r2, #1 + + orr r10,r12,r7, lsl #16 + + stmia r0!, {r4,r5,r6,r8,r10} + bne .fl_32scale_nn + + ldmfd sp!, {r4-r10,pc} + +.fl_32scale_snn: + ldr r12, [r1], #4 + ldr r7, [r1], #4 + + and r4, lr, r12, lsl #1 + ldrh r4, [r3, r4] + and r5, lr, r12, lsr #7 + ldrh r5, [r3, r5] + and r6, lr, r12, lsr #15 + ldrh r6, [r3, r6] + and r10,lr, r12, lsr #23 + ldrh r10,[r3, r10] + + and r4, r4, r9 + and r5, r5, r9 + orr r4, r4, r5, lsl #16 + and r6, r6, r9 + add r5, r5, r6 + mov r5, r5, lsr #1 + orr r5, r5, r6, lsl #16 + + and r6, lr, r7, lsl #1 + ldrh r6, [r3, r6] + and r8, lr, r7, lsr #7 + ldrh r8, [r3, r8] + and r12,lr, r7, lsr #15 + ldrh r12,[r3, r12] + and r7, lr, r7, lsr #23 + ldrh r7, [r3, r7] + + and r6, r6, r9 + and r10,r10,r9 + orr r6, r10,r6, lsl #16 + and r12,r12,r9 + and r7, r7, r9 + orr r10,r12,r7, lsl #16 + + and r8, r8, r9 + add r12,r12,r8 + mov r12,r12,lsr #1 + orr r8, r8,r12, lsl #16 + + subs r2, r2, #1 + + stmia r0!, {r4,r5,r6,r8,r10} + bne .fl_32scale_snn + + ldmfd sp!, {r4-r10,pc} + +.fl_32scale_bl2: + ldr r12, [r1], #4 + ldr r7, [r1], #4 + + and r4, lr, r12, lsl #1 + ldrh r4, [r3, r4] + and r5, lr, r12, lsr #7 + ldrh r5, [r3, r5] + and r6, lr, r12, lsr #15 + ldrh r6, [r3, r6] + + and r4, r4, r9 + and r5, r5, r9 + add r10,r4, r5 + mov r10,r10,lsr #1 + orr r4, r4, r10,lsl #16 @ px0 | (px0+px1)/2 + + and r6, r6, r9 + add r5, r5, r6 + mov r5, r5, lsr #1 + orr r5, r5, r6, lsl #16 @ (px1+px2)/2 | px2 + + and r10,lr, r12, lsr #23 + ldrh r10,[r3, r10] + and r8, lr, r7, lsl #1 + ldrh r8, [r3, r8] + + and r10,r10,r9 + and r8, r8, r9 + orr r6, r10,r8, lsl #16 @ px3 | px4 + + and r12,lr, r7, lsr #15 + ldrh r12,[r3, r12] + and r10, lr, r7, lsr #23 + ldrh r10, [r3, r10] + and r7, lr, r7, lsr #7 + ldrh r7, [r3, r7] + + and r12,r12,r9 + and r10,r10,r9 + orr r10,r12,r10, lsl #16 @ px6 | px7 + + and r7, r7, r9 + add r12,r12,r7 + add r8, r8, r7 + mov r8, r8, lsr #1 + mov r12,r12,lsr #1 + orr r8, r8,r12, lsl #16 @ (px4+px5)/2 | (px5+px6)/2 + + subs r2, r2, #1 + + stmia r0!, {r4,r5,r6,r8,r10} + bne .fl_32scale_bl2 + + ldmfd sp!, {r4-r10,pc} + +.fl_32scale_bl4: ldr r12, [r1], #4 ldr r7, [r1], #4 @@ -1721,16 +1873,21 @@ FinalizeLine555: ldrh r4, [r3, r4] and r5, lr, r12,lsr #7 ldrh r5, [r3, r5] - and r4, r4, r9, lsl #2 + + @ r4 = 1/4px0+3/4px1 : px0 + and r4, r4, r9 orr r4, r4, r4, lsl #14 @ r4[31:16] = 1/4 pix_s 0 - and r5, r5, r9, lsl #2 + and r5, r5, r9 sub r6, r5, r5, lsr #2 @ r6 = 3/4 pix_s 1 add r4, r4, r6, lsl #16 @ pix_d 0, 1 + and r6, lr, r12,lsr #15 ldrh r6, [r3, r6] and r12,lr, r12,lsr #23 ldrh r12,[r3, r12] - and r6, r6, r9, lsl #2 + + @ r5 = 3/4px2+1/4px3 : (px1+px2)/2 + and r6, r6, r9 add r5, r5, r6 mov r5, r5, lsr #1 sub r6, r6, r6, lsr #2 @ r6 = 3/4 pix_s 2 @@ -1738,32 +1895,38 @@ FinalizeLine555: and r6, lr, r7, lsl #1 ldrh r6, [r3, r6] - and r12,r12,r9, lsl #2 + and r12,r12,r9 add r5, r5, r12,lsl #14 @ pix_d 2, 3 - and r6, r6, r9, lsl #2 + + @ r6 = px4 : px3 + and r6, r6, r9 orr r6, r12,r6, lsl #16 @ pix_d 4, 5 + @ r8 = (px5+px6)/2 : 1/4px4+3/4px5 and r12,lr, r7, lsr #7 ldrh r12,[r3, r12] and r10,lr, r7, lsr #15 ldrh r10,[r3, r10] - and r12,r12,r9, lsl #2 + and r12,r12,r9 sub r8, r12,r12,lsr #2 @ r8 = 3/4 pix_s 1 add r8, r8, r6, lsr #18 + and r7, lr, r7, lsr #23 ldrh r7, [r3, r7] - and r10,r10,r9, lsl #2 + and r10,r10,r9 orr r8, r8, r10,lsl #15 add r8, r8, r12,lsl #15 @ pix_d 6, 7 + + @ r10 = px7 : 3/4px6+1/4px7 sub r10,r10,r10,lsr #2 @ r10= 3/4 pix_s 2 - and r7, r7, r9, lsl #2 + and r7, r7, r9 add r10,r10,r7, lsr #2 @ += 1/4 pix_s 3 orr r10,r10,r7, lsl #16 @ pix_d 8, 9 subs r2, r2, #1 stmia r0!, {r4,r5,r6,r8,r10} - bne .fl_loop32scale_RGB555 + bne .fl_32scale_bl4 ldmfd sp!, {r4-r10,pc} @@ -1826,10 +1989,10 @@ FinalizeLine555: ldrh r6, [r3, r6] and r5, lr, r12,lsr #7 ldrh r5, [r3, r5] - and r6, r6, r9, lsl #2 + and r6, r6, r9 orr r4, r4, r6, lsl #16 @ r4 = pix_d -1, 0 - and r5, r5, r9, lsl #2 + and r5, r5, r9 sub r8, r5, r5, lsr #2 @ r8 = 3/4 pix_s 1 add r6, r8, r6, lsr #2 @ r6 = (1/4 pix_s 0) + (3/4 pix_s 1) orr r5, r6, r5, lsl #15 @@ -1838,20 +2001,20 @@ FinalizeLine555: ldrh r6, [r3, r6] and r12,lr, r12,lsr #23 ldrh r12,[r3, r12] - and r6, r6, r9, lsl #2 + and r6, r6, r9 add r5, r5, r6, lsl #15 @ r5 = pix_d 1, 2 and r8, lr, r7, lsl #1 ldrh r8, [r3, r8] and r10,lr, r7, lsr #7 ldrh r10,[r3, r10] - and r12,r12,r9, lsl #2 + and r12,r12,r9 sub r6, r6, r6, lsr #2 @ r6 = 3/4 pix_s 2 add r6, r6, r12,lsr #2 orr r6, r6, r12,lsl #16 @ r6 = pix_d 3, 4 - and r8, r8, r9, lsl #2 - and r10,r10,r9, lsl #2 + and r8, r8, r9 + and r10,r10,r9 sub r12,r10,r10,lsr #2 @ r12 = 3/4 pix_s 5 orr r8, r8, r8, lsl #14 add r8, r8, r12,lsl #16 @ r8 = pix_d 5, 6 @@ -1859,12 +2022,12 @@ FinalizeLine555: ldrh r12,[r3, r12] and r7, lr, r7, lsr #23 ldrh r7, [r3, r7] - and r12,r12,r9, lsl #2 + and r12,r12,r9 add r10,r10,r12 mov r10,r10, lsr #1 sub r12,r12,r12,lsr #2 @ r12 = 3/4 pix_s 6 orr r10,r10,r12,lsl #16 - and r7, r7, r9, lsl #2 + and r7, r7, r9 add r10,r10,r7, lsl #14 @ r10 = pix_d 7, 8 subs r2, r2, #1 diff --git a/pico/mode4.c b/pico/mode4.c index 00a4aef9..76cbfabc 100644 --- a/pico/mode4.c +++ b/pico/mode4.c @@ -291,7 +291,7 @@ void PicoFrameStartMode4(void) } if (Pico.est.rendstatus != rendstatus_old || lines != rendlines) { - emu_video_mode_change(screen_offset, lines, 1); + emu_video_mode_change(screen_offset, lines, line_offset, 256); rendstatus_old = Pico.est.rendstatus; rendlines = lines; } @@ -352,6 +352,8 @@ void PicoDoHighPal555M4(void) Pico.est.HighPal[0xe0] = 0; } +#include + static void FinalizeLineRGB555M4(int line) { if (Pico.m.dirtyPal) @@ -364,15 +366,20 @@ static void FinalizeLineRGB555M4(int line) static void FinalizeLine8bitM4(int line) { - unsigned char *pd = Pico.est.DrawLineDest; - - if (DrawLineDestIncrement) - memcpy(pd + line_offset, Pico.est.HighCol + line_offset + 8, 256); + unsigned char *pd = Pico.est.DrawLineDest + line_offset; + unsigned char *ps = Pico.est.HighCol + line_offset + 8; + + if (DrawLineDestIncrement) { + if (PicoIn.opt & POPT_EN_SOFTSCALE) + rh_upscale_nn_4_5(pd, 320, ps, 256, 256, f_nop); + else + memcpy(pd, ps, 256); + } } void PicoDrawSetOutputMode4(pdso_t which) { - line_offset = PicoIn.opt & POPT_DIS_32C_BORDER ? 0 : 32; + line_offset = PicoIn.opt & (POPT_DIS_32C_BORDER|POPT_EN_SOFTSCALE) ? 0 : 32; switch (which) { case PDF_8BIT: FinalizeLineM4 = FinalizeLine8bitM4; break; diff --git a/pico/pico.h b/pico/pico.h index 912d9502..76786d61 100644 --- a/pico/pico.h +++ b/pico/pico.h @@ -41,7 +41,7 @@ extern void *plat_mem_get_for_drc(size_t size); extern int plat_mem_set_exec(void *ptr, size_t size); // this one should handle display mode changes -extern void emu_video_mode_change(int start_line, int line_count, int is_32cols); +extern void emu_video_mode_change(int start_line, int line_count, int start_col, int col_count); // this must switch to 16bpp mode extern void emu_32x_startup(void); @@ -86,7 +86,7 @@ extern void *p32x_bios_g, *p32x_bios_m, *p32x_bios_s; // the emulator is configured and some status is reported // through this global state (not saved in savestates) -typedef struct +typedef struct PicoInterface { unsigned int opt; // POPT_* bitfield @@ -101,6 +101,8 @@ typedef struct unsigned short quirks; // game-specific quirks: PQUIRK_* unsigned short overclockM68k; // overclock the emulated 68k, in % + unsigned short filter; // softscale filter type + int sndRate; // rate in Hz int sndFilterAlpha; // Low pass sound filter alpha (Q16) short *sndOut; // PCM output buffer diff --git a/pico/pico_int.h b/pico/pico_int.h index 83e81418..2038b6d9 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -356,6 +356,7 @@ struct PicoEState int DrawScanline; int rendstatus; void *DrawLineDest; // draw destination + int DrawLineDestIncr; unsigned char *HighCol; s32 *HighPreSpr; struct Pico *Pico; diff --git a/platform/common/emu.c b/platform/common/emu.c index 3f9ccff4..fd4a2519 100644 --- a/platform/common/emu.c +++ b/platform/common/emu.c @@ -1219,7 +1219,7 @@ void emu_cmn_forced_frame(int no_scale, int do_emu, void *buf) memset32((short *)g_screen_ptr + g_screen_ppitch * y, 0, g_screen_width * 2 / 4); - PicoIn.opt &= ~POPT_ALT_RENDERER; + PicoIn.opt &= ~(POPT_ALT_RENDERER|POPT_EN_SOFTSCALE); PicoIn.opt |= POPT_ACC_SPRITES; if (!no_scale && currentConfig.scaling) PicoIn.opt |= POPT_EN_SOFTSCALE; diff --git a/platform/common/emu.h b/platform/common/emu.h index a2110026..9e47e44a 100644 --- a/platform/common/emu.h +++ b/platform/common/emu.h @@ -37,10 +37,19 @@ extern int g_screen_ppitch; // pitch in pixels enum { EOPT_SCALE_NONE = 0, - EOPT_SCALE_SW, + // linux, GP2X: + EOPT_SCALE_SW = 1, EOPT_SCALE_HW, }; +enum { + EOPT_FILTER_NONE = 0, + // software scalers + EOPT_FILTER_SMOOTHER = 1, + EOPT_FILTER_BILINEAR1, + EOPT_FILTER_BILINEAR2, +}; + enum { EOPT_CONFIRM_NONE = 0, EOPT_CONFIRM_SAVE = 1, @@ -63,7 +72,7 @@ typedef struct _currentConfig_t { int CPUclock; int volume; int gamma; - int scaling; // gp2x: EOPT_SCALE_*; psp: bilinear filtering + int scaling; // EOPT_SCALE_* int vscaling; int rotation; // for UIQ float scale; // psp: screen scale @@ -72,7 +81,7 @@ typedef struct _currentConfig_t { int turbo_rate; int renderer; int renderer32x; - int filter; // pandora + int filter; // EOPT_FILTER_* video filter int analog_deadzone; int msh2_khz; int ssh2_khz; @@ -180,6 +189,7 @@ void plat_update_volume(int has_changed, int is_up); /* should be in libpicofe/plat.h */ void plat_video_clear_status(void); void plat_video_clear_buffers(void); +void plat_video_set_size(int w, int h); #ifdef __cplusplus } // extern "C" diff --git a/platform/common/plat_sdl.c b/platform/common/plat_sdl.c index 5edeb79e..f8bf78c3 100644 --- a/platform/common/plat_sdl.c +++ b/platform/common/plat_sdl.c @@ -22,6 +22,7 @@ #include static void *shadow_fb; +static struct area { int w, h; } area; static struct in_pdata in_sdl_platform_data = { .defbinds = in_sdl_defbinds, @@ -81,54 +82,77 @@ void bgr_to_uyvy_init(void) } } -void rgb565_to_uyvy(void *d, const void *s, int pixels, int x2) +void rgb565_to_uyvy(void *d, const void *s, int w, int h, int pitch, int x2) { uint32_t *dst = d; const uint16_t *src = s; + int i; - if (x2) - for (; pixels > 0; src += 4, dst += 4, pixels -= 4) - { - struct uyvy *uyvy0 = yuv_uyvy + src[0], *uyvy1 = yuv_uyvy + src[1]; - struct uyvy *uyvy2 = yuv_uyvy + src[2], *uyvy3 = yuv_uyvy + src[3]; + if (x2) while (h--) { + for (i = w; i > 0; src += 4, dst += 4, i -= 4) + { + struct uyvy *uyvy0 = yuv_uyvy + src[0], *uyvy1 = yuv_uyvy + src[1]; + struct uyvy *uyvy2 = yuv_uyvy + src[2], *uyvy3 = yuv_uyvy + src[3]; #if CPU_IS_LE - dst[0] = (uyvy0->y << 24) | uyvy0->vyu; - dst[1] = (uyvy1->y << 24) | uyvy1->vyu; - dst[2] = (uyvy2->y << 24) | uyvy2->vyu; - dst[3] = (uyvy3->y << 24) | uyvy3->vyu; + dst[0] = (uyvy0->y << 24) | uyvy0->vyu; + dst[1] = (uyvy1->y << 24) | uyvy1->vyu; + dst[2] = (uyvy2->y << 24) | uyvy2->vyu; + dst[3] = (uyvy3->y << 24) | uyvy3->vyu; #else - dst[0] = uyvy0->y | (uyvy0->vyu << 8); - dst[1] = uyvy1->y | (uyvy1->vyu << 8); - dst[2] = uyvy2->y | (uyvy2->vyu << 8); - dst[3] = uyvy3->y | (uyvy3->vyu << 8); + dst[0] = uyvy0->y | (uyvy0->vyu << 8); + dst[1] = uyvy1->y | (uyvy1->vyu << 8); + dst[2] = uyvy2->y | (uyvy2->vyu << 8); + dst[3] = uyvy3->y | (uyvy3->vyu << 8); #endif - } else - for (; pixels > 0; src += 4, dst += 2, pixels -= 4) - { - struct uyvy *uyvy0 = yuv_uyvy + src[0], *uyvy1 = yuv_uyvy + src[1]; - struct uyvy *uyvy2 = yuv_uyvy + src[2], *uyvy3 = yuv_uyvy + src[3]; + } + src += pitch - w; + } else while (h--) { + for (i = w; i > 0; src += 4, dst += 2, i -= 4) + { + struct uyvy *uyvy0 = yuv_uyvy + src[0], *uyvy1 = yuv_uyvy + src[1]; + struct uyvy *uyvy2 = yuv_uyvy + src[2], *uyvy3 = yuv_uyvy + src[3]; #if CPU_IS_LE - dst[0] = (uyvy1->y << 24) | uyvy0->vyu; - dst[1] = (uyvy3->y << 24) | uyvy2->vyu; + dst[0] = (uyvy1->y << 24) | uyvy0->vyu; + dst[1] = (uyvy3->y << 24) | uyvy2->vyu; #else - dst[0] = uyvy1->y | (uyvy0->vyu << 8); - dst[1] = uyvy3->y | (uyvy2->vyu << 8); + dst[0] = uyvy1->y | (uyvy0->vyu << 8); + dst[1] = uyvy3->y | (uyvy2->vyu << 8); #endif + } + src += pitch - w; } } static int clear_buf_cnt, clear_stat_cnt; +void plat_video_set_size(int w, int h) +{ + if (area.w != w || area.h != h) { + area = (struct area) { w, h }; + + if (plat_sdl_change_video_mode(w, h, 0) < 0) { + // failed, revert to original resolution + plat_sdl_change_video_mode(g_screen_width, g_screen_height, 0); + w = g_screen_width, h = g_screen_height; + } + if (!plat_sdl_overlay && !plat_sdl_gl_active) { + g_screen_width = w; + g_screen_height = h; + g_screen_ppitch = w; + g_screen_ptr = plat_sdl_screen->pixels; + } + } +} + void plat_video_flip(void) { if (plat_sdl_overlay != NULL) { SDL_Rect dstrect = { 0, 0, plat_sdl_screen->w, plat_sdl_screen->h }; - SDL_LockYUVOverlay(plat_sdl_overlay); rgb565_to_uyvy(plat_sdl_overlay->pixels[0], shadow_fb, - g_screen_ppitch * g_screen_height, - plat_sdl_overlay->w > 2*plat_sdl_overlay->h); + area.w, area.h, g_screen_ppitch, + plat_sdl_overlay->w >= 2*area.w); SDL_UnlockYUVOverlay(plat_sdl_overlay); SDL_DisplayYUVOverlay(plat_sdl_overlay, &dstrect); } @@ -205,7 +229,7 @@ void plat_video_menu_end(void) SDL_LockYUVOverlay(plat_sdl_overlay); rgb565_to_uyvy(plat_sdl_overlay->pixels[0], shadow_fb, - g_menuscreen_pp * g_menuscreen_h, 0); + g_menuscreen_w, g_menuscreen_h, g_menuscreen_pp, 0); SDL_UnlockYUVOverlay(plat_sdl_overlay); SDL_DisplayYUVOverlay(plat_sdl_overlay, &dstrect); @@ -227,10 +251,10 @@ void plat_video_menu_leave(void) void plat_video_loop_prepare(void) { - // take over any new vout settings XXX ask plat_sdl for scaling instead! + // take over any new vout settings plat_sdl_change_video_mode(g_menuscreen_w, g_menuscreen_h, 0); // switch over to scaled output if available - if (plat_sdl_overlay != NULL || plat_sdl_gl_active || currentConfig.scaling != EOPT_SCALE_NONE) { + if (plat_sdl_overlay != NULL || plat_sdl_gl_active) { g_screen_width = 320; g_screen_height = 240; g_screen_ppitch = g_screen_width; @@ -246,6 +270,7 @@ void plat_video_loop_prepare(void) g_screen_ptr = plat_sdl_screen->pixels; } plat_video_set_buffer(g_screen_ptr); + plat_video_set_size(g_screen_width, g_screen_height); } void plat_early_init(void) diff --git a/platform/common/upscale.c b/platform/common/upscale.c index b84c096e..ed3129f3 100644 --- a/platform/common/upscale.c +++ b/platform/common/upscale.c @@ -7,7 +7,7 @@ * nn: nearest neighbour * snn: "smoothed" nearest neighbour (see below) * bln: n-level-bilinear with n quantized weights - * quantization: 0: a<1/2*n, 1/n: 1/2*n<=a<3/2*n, etc + * quantization: 0: a<1/(2*n), 1/n: 1/(2*n)<=a<3/(2*n), etc * currently n=2, n=4 are implemented (there's n=8 mixing, but no filters) * [NB this has been brought to my attn, which is probably the same as bl2: * https://www.drdobbs.com/image-scaling-with-bresenham/184405045?pgno=1] @@ -18,490 +18,586 @@ * a sharper look than a bilinear filter, at the price of some visible jags * on diagonal edges. * - * scaling modes: - * 256x___ -> 320x___ only horizontal scaling. Produces an aspect error of - * ~7% for NTSC 224 line modes, but is correct for PAL - * 256/320x224/240 - * -> 320x240 always produces 320x240 at DAR 4:3 - * 160x144 -> 320x240 game gear (currently unused) + * example scaling modes: + * 256x_Y_ -> 320x_Y_, H32/mode 4, PAR 5:4, for PAL DAR 4:3 (NTSC 7% aspect err) + * 256x224 -> 320x240, H32/mode 4, PAR 5:4, for NTSC DAR 4:3 (PAL 7% aspect err) + * 320x224 -> 320x240, PAR 1:1, for NTSC, DAR 4:3 (PAL 7% etc etc...) + * 160x144 -> 320x240: GG, PAR 6:5, scaling to 320x240 for DAR 4:3 * * (C) 2021 kub */ #include "upscale.h" -/* 256x___ -> 320x___, H32/mode 4, PAR 5:4, for PAL DAR 4:3 (wrong for NTSC) */ -void upscale_clut_nn_256_320x___(u8 *__restrict di, int ds, u8 *__restrict si, int ss, int height) +/* X x Y -> X*5/4 x Y */ +void upscale_clut_nn_x_4_5(u8 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height) { int y; for (y = 0; y < height; y++) { - h_upscale_nn_4_5(di, ds, si, ss, 256, f_nop); + h_upscale_nn_4_5(di, ds, si, ss, width, f_nop); } } -void upscale_rgb_nn_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int height, u16 *pal) +void upscale_rgb_nn_x_4_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) { int y; for (y = 0; y < height; y++) { - h_upscale_nn_4_5(di, ds, si, ss, 256, f_pal); + h_upscale_nn_4_5(di, ds, si, ss, width, f_pal); } } -void upscale_rgb_snn_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int height, u16 *pal) +void upscale_rgb_snn_x_4_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) { int y; for (y = 0; y < height; y++) { - h_upscale_snn_4_5(di, ds, si, ss, 256, f_pal); + h_upscale_snn_4_5(di, ds, si, ss, width, f_pal); } } -void upscale_rgb_bl2_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int height, u16 *pal) +void upscale_rgb_bl2_x_4_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) { int y; for (y = 0; y < height; y++) { - h_upscale_bl2_4_5(di, ds, si, ss, 256, f_pal); + h_upscale_bl2_4_5(di, ds, si, ss, width, f_pal); } } -void upscale_rgb_bl4_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int height, u16 *pal) +void upscale_rgb_bl4_x_4_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) { int y; for (y = 0; y < height; y++) { - h_upscale_bl4_4_5(di, ds, si, ss, 256, f_pal); + h_upscale_bl4_4_5(di, ds, si, ss, width, f_pal); } } -/* 256x224 -> 320x240, H32/mode 4, PAR 5:4, for NTSC DAR 4:3 (wrong for PAL) */ -void upscale_clut_nn_256_320x224_240(u8 *__restrict di, int ds, u8 *__restrict si, int ss) +/* X x Y -> X*5/4 x Y*17/16 */ +void upscale_clut_nn_x_4_5_y_16_17(u8 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height) { + int swidth = width * 5/4; int y, j; - /* 14:15, 0 1 2 3 4 5 6 6 7 8 9 10 11 12 13 */ - for (y = 0; y < 224; y += 14) { - /* lines 0-6 */ - for (j = 0; j < 7; j++) { - h_upscale_nn_4_5(di, ds, si, ss, 256, f_nop); + for (y = 0; y < height; y += 16) { + for (j = 0; j < 8; j++) { + h_upscale_nn_4_5(di, ds, si, ss, width, f_nop); } - /* lines 8-14 */ - di += ds; - for (j = 0; j < 7; j++) { - h_upscale_nn_4_5(di, ds, si, ss, 256, f_nop); + di += ds; + for (j = 0; j < 8; j++) { + h_upscale_nn_4_5(di, ds, si, ss, width, f_nop); } - /* line 7 */ - di -= 8*ds; - v_copy(&di[0], &di[-ds], 320, f_nop); - di += 8*ds; + + di -= 9*ds; + v_copy(&di[0], &di[-ds], swidth, f_nop); + di += 9*ds; } } -void upscale_rgb_nn_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal) +void upscale_rgb_nn_x_4_5_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) { + int swidth = width * 5/4; int y, j; - for (y = 0; y < 224; y += 14) { - for (j = 0; j < 7; j++) { - h_upscale_nn_4_5(di, ds, si, ss, 256, f_pal); + for (y = 0; y < height; y += 16) { + for (j = 0; j < 8; j++) { + h_upscale_nn_4_5(di, ds, si, ss, width, f_pal); } di += ds; - for (j = 0; j < 7; j++) { - h_upscale_nn_4_5(di, ds, si, ss, 256, f_pal); + for (j = 0; j < 8; j++) { + h_upscale_nn_4_5(di, ds, si, ss, width, f_pal); } - di -= 8*ds; - v_copy(&di[0], &di[-ds], 320, f_nop); - di += 8*ds; + di -= 9*ds; + v_copy(&di[0], &di[-ds], swidth, f_nop); + di += 9*ds; } } -void upscale_rgb_snn_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal) +void upscale_rgb_snn_x_4_5_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) { + int swidth = width * 5/4; int y, j; - /* 14:15, 0 1 2 3 4 5 5+6 6+7 7+8 8 9 10 11 12 13 */ - for (y = 0; y < 224; y += 14) { - for (j = 0; j < 7; j++) { - h_upscale_snn_4_5(di, ds, si, ss, 256, f_pal); + for (y = 0; y < height; y += 16) { + for (j = 0; j < 8; j++) { + h_upscale_snn_4_5(di, ds, si, ss, width, f_pal); } di += ds; - for (j = 0; j < 7; j++) { - h_upscale_snn_4_5(di, ds, si, ss, 256, f_pal); + for (j = 0; j < 8; j++) { + h_upscale_snn_4_5(di, ds, si, ss, width, f_pal); } /* mix lines 6-8 */ - di -= 8*ds; - v_mix(&di[0], &di[-ds], &di[ds], 320, p_05, f_nop); - v_mix(&di[-ds], &di[-2*ds], &di[-ds], 320, p_05, f_nop); - v_mix(&di[ ds], &di[ ds], &di[ 2*ds], 320, p_05, f_nop); - di += 8*ds; + di -= 9*ds; + v_mix(&di[0], &di[-ds], &di[ds], swidth, p_05, f_nop); + v_mix(&di[-ds], &di[-2*ds], &di[-ds], swidth, p_05, f_nop); + v_mix(&di[ ds], &di[ ds], &di[ 2*ds], swidth, p_05, f_nop); + di += 9*ds; } } -void upscale_rgb_bln_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal) +void upscale_rgb_bl2_x_4_5_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) { + int swidth = width * 5/4; int y, j; - /* 14:15, 0 1 2 2+3 3+4 4+5 5+6 6+7 7+8 8+9 9+10 10+11 11 12 13 */ - for (y = 0; y < 224; y += 14) { - /* lines 0-2 */ - for (j = 0; j < 3; j++) { - h_upscale_bln_4_5(di, ds, si, ss, 256, f_pal); - } - /* lines 3-11 mixing prep */ - di += ds; - for (j = 0; j < 11; j++) { - h_upscale_bln_4_5(di, ds, si, ss, 256, f_pal); + for (y = 0; y < height; y += 16) { + for (j = 0; j < 4; j++) { + h_upscale_bl2_4_5(di, ds, si, ss, width, f_pal); } - di -= 12*ds; - /* mixing line 3: line 2 = -ds, line 3 = +ds */ - v_mix(&di[0], &di[-ds], &di[ds], 320, p_025, f_nop); - di += ds; - /* mixing lines 4-5: line n-1 = 0, line n = +ds */ - for (j = 0; j < 2; j++) { - v_mix(&di[0], &di[0], &di[ds], 320, p_025, f_nop); - di += ds; - } - /* mixing line 6-8 */ - for (j = 0; j < 3; j++) { - v_mix(&di[0], &di[0], &di[ds], 320, p_05, f_nop); - di += ds; + di += ds; + for (j = 0; j < 12; j++) { + h_upscale_bl2_4_5(di, ds, si, ss, width, f_pal); } - /* mixing lines 9-11 */ - for (j = 0; j < 3; j++) { - v_mix(&di[0], &di[0], &di[ds], 320, p_075, f_nop); + /* mix lines 3-10 */ + di -= 13*ds; + v_mix(&di[0], &di[-ds], &di[ds], swidth, p_05, f_nop); + for (j = 0; j < 7; j++) { di += ds; + v_mix(&di[0], &di[0], &di[ds], swidth, p_05, f_nop); } - /* lines 12-14, already in place */ - di += 3*ds; + di += 6*ds; } } -void upscale_rgb_bl2_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal) +void upscale_rgb_bl4_x_4_5_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) { + int swidth = width * 5/4; int y, j; - /* 14:15, 0 1 2 2+3 3+4 4+5 5+6 6+7 7+8 8+9 9+10 10 11 12 13 */ - for (y = 0; y < 224; y += 14) { - for (j = 0; j < 3; j++) { - h_upscale_bl2_4_5(di, ds, si, ss, 256, f_pal); + for (y = 0; y < height; y += 16) { + for (j = 0; j < 2; j++) { + h_upscale_bl4_4_5(di, ds, si, ss, width, f_pal); } - di += ds; - for (j = 0; j < 11; j++) { - h_upscale_bl2_4_5(di, ds, si, ss, 256, f_pal); + di += ds; + for (j = 0; j < 14; j++) { + h_upscale_bl4_4_5(di, ds, si, ss, width, f_pal); } - /* mix lines 3-10 */ - di -= 12*ds; - v_mix(&di[0], &di[-ds], &di[ds], 320, p_05, f_nop); - for (j = 0; j < 7; j++) { + di -= 15*ds; + /* mixing line 2: line 1 = -ds, line 2 = +ds */ + v_mix(&di[0], &di[-ds], &di[ds], swidth, p_025, f_nop); + di += ds; + /* mixing lines 3-5: line n-1 = 0, line n = +ds */ + for (j = 0; j < 3; j++) { + v_mix(&di[0], &di[0], &di[ds], swidth, p_025, f_nop); + di += ds; + } + /* mixing lines 6-9 */ + for (j = 0; j < 4; j++) { + v_mix(&di[0], &di[0], &di[ds], swidth, p_05, f_nop); + di += ds; + } + /* mixing lines 10-13 */ + for (j = 0; j < 4; j++) { + v_mix(&di[0], &di[0], &di[ds], swidth, p_075, f_nop); di += ds; - v_mix(&di[0], &di[0], &di[ds], 320, p_05, f_nop); } - di += 5*ds; + /* lines 14-16, already in place */ + di += 3*ds; } } -void upscale_rgb_bl4_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal) +/* "classic" upscaler as found in several emulators. It's really more like a + * x*4/3, y*16/15 upscaler, with an additional 5th row/17th line just inserted + * from the source image. That gives nice n/4,n/16 alpha values plus better + * symmetry in each block and avoids "borrowing" a row/line between blocks. + */ +void upscale_rgb_bln_x_4_5_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) { + int swidth = width * 5/4; int y, j; - /* 14:15, 0 0+1 1+2 2+3 3+4 4+5 5+6 6+7 7+8 8+9 9+10 10+11 11+12 12 13 */ - for (y = 0; y < 224; y += 14) { - /* line 0 */ - h_upscale_bl4_4_5(di, ds, si, ss, 256, f_pal); - /* lines 1-14 mixing prep */ + for (y = 0; y < height; y += 16) { + for (j = 0; j < 4; j++) { + h_upscale_bln_4_5(di, ds, si, ss, width, f_pal); + } di += ds; - for (j = 0; j < 13; j++) { - h_upscale_bl4_4_5(di, ds, si, ss, 256, f_pal); + for (j = 0; j < 12; j++) { + h_upscale_bln_4_5(di, ds, si, ss, width, f_pal); } - di -= 14*ds; - /* mixing line 1: line 0 = -ds, line 1 = +ds */ - v_mix(&di[0], &di[-ds], &di[ds], 320, p_025, f_nop); + di -= 13*ds; + /* mixing line 4: line 3 = -ds, line 4 = +ds */ + v_mix(&di[0], &di[-ds], &di[ds], swidth, p_025, f_nop); di += ds; - /* mixing lines 2-4: line n-1 = 0, line n = +ds */ - for (j = 0; j < 3; j++) { - v_mix(&di[0], &di[0], &di[ds], 320, p_025, f_nop); + /* mixing lines 5-6: line n-1 = 0, line n = +ds */ + for (j = 0; j < 2; j++) { + v_mix(&di[0], &di[0], &di[ds], swidth, p_025, f_nop); di += ds; } - /* mixing lines 5-8 */ - for (j = 0; j < 4; j++) { - v_mix(&di[0], &di[0], &di[ds], 320, p_05, f_nop); + /* mixing line 7-9 */ + for (j = 0; j < 3; j++) { + v_mix(&di[0], &di[0], &di[ds], swidth, p_05, f_nop); di += ds; } - /* mixing lines 9-12 */ - for (j = 0; j < 4; j++) { - v_mix(&di[0], &di[0], &di[ds], 320, p_075, f_nop); + /* mixing lines 10-12 */ + for (j = 0; j < 3; j++) { + v_mix(&di[0], &di[0], &di[ds], swidth, p_075, f_nop); di += ds; } - /* lines 13-14, already in place */ - di += 2*ds; + /* lines 13-16, already in place */ + di += 4*ds; } } -void upscale_rgb_bl8_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal) +/* experimental 8 level bilinear for quality assessment */ +void upscale_rgb_bl8_x_4_5_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) { - int y, j, d; + int swidth = width * 5/4; + int y, j; - /* 14:15, -1+0 0+1 1+2 2+3 3+4 4+5 5+6 6+7 7+8 8+9 9+10 10+11 11+12 12+13 13 */ - for (y = 0, d = ds; y < 224; y += 14, d = -ds) { - /* lines 0-14 mixing prep */ + for (y = 0; y < 224; y += 16) { + for (j = 0; j < 2; j++) { + h_upscale_bl8_4_5(di, ds, si, ss, width, f_pal); + } di += ds; for (j = 0; j < 14; j++) { - h_upscale_bl8_4_5(di, ds, si, ss, 256, f_pal); + h_upscale_bl8_4_5(di, ds, si, ss, width, f_pal); } di -= 15*ds; - /* mixing line 0: line 0 = -ds, line 1 = +ds */ - v_mix(&di[0], &di[d], &di[ds], 320, p_0125, f_nop); + /* mixing line 2: line 2 = -ds, line 3 = +ds */ + v_mix(&di[0], &di[-ds], &di[ds], swidth, p_0125, f_nop); di += ds; - /* mixing line 1: line 1 = 0, line 2 = +ds */ - v_mix(&di[0], &di[0], &di[ds], 320, p_0125, f_nop); + /* mixing line 3: line 3 = 0, line 4 = +ds */ + v_mix(&di[0], &di[0], &di[ds], swidth, p_0125, f_nop); di += ds; - /* mixing lines 2-3: line n-1 = 0, line n = +ds */ + /* mixing lines 4-5: line n-1 = 0, line n = +ds */ for (j = 0; j < 2; j++) { - v_mix(&di[0], &di[0], &di[ds], 320, p_025, f_nop); + v_mix(&di[0], &di[0], &di[ds], swidth, p_025, f_nop); di += ds; } - /* mixing lines 4-5 */ + /* mixing lines 6-7 */ for (j = 0; j < 2; j++) { v_mix(&di[0], &di[0], &di[ds], 320, p_0375, f_nop); di += ds; } - /* mixing lines 6-7 */ + /* mixing lines 8-9 */ for (j = 0; j < 2; j++) { v_mix(&di[0], &di[0], &di[ds], 320, p_05, f_nop); di += ds; } - /* mixing lines 8-9 */ + /* mixing lines 10-11 */ for (j = 0; j < 2; j++) { v_mix(&di[0], &di[0], &di[ds], 320, p_0625, f_nop); di += ds; } - /* mixing lines 10-11 */ + /* mixing lines 12-13 */ for (j = 0; j < 2; j++) { v_mix(&di[0], &di[0], &di[ds], 320, p_075, f_nop); di += ds; } - /* mixing lines 12-13 */ + /* mixing lines 14-15 */ for (j = 0; j < 2; j++) { v_mix(&di[0], &di[0], &di[ds], 320, p_0875, f_nop); di += ds; } - /* line 14, already in place */ + /* line 16, already in place */ di += ds; } } -/* 320x224 -> 320x240, PAR 1:1, for NTSC, DAR 4:3 (wrong for PAL) */ -void upscale_clut_nn_320x224_240(u8 *__restrict di, int ds, u8 *__restrict si, int ss) +/* X x Y -> X x Y*17/16 */ +void upscale_clut_nn_y_16_17(u8 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height) { int y, j; - for (y = 0; y < 224; y += 14) { - for (j = 0; j < 7; j++) { - h_copy(di, ds, si, ss, 320, f_nop); + for (y = 0; y < height; y += 16) { + for (j = 0; j < 8; j++) { + h_copy(di, ds, si, ss, width, f_nop); } - di += ds; - for (j = 0; j < 7; j++) { - h_copy(di, ds, si, ss, 320, f_nop); + di += ds; + for (j = 0; j < 8; j++) { + h_copy(di, ds, si, ss, width, f_nop); } - di -= 8*ds; - v_copy(&di[0], &di[-ds], 320, f_nop); - di += 8*ds; - + di -= 9*ds; + v_copy(&di[0], &di[-ds], width, f_nop); + di += 9*ds; } } -void upscale_rgb_nn_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal) +void upscale_rgb_nn_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) { int y, j; - for (y = 0; y < 224; y += 14) { - for (j = 0; j < 7; j++) { - h_copy(di, ds, si, ss, 320, f_pal); + for (y = 0; y < height; y += 16) { + for (j = 0; j < 8; j++) { + h_copy(di, ds, si, ss, width, f_pal); } di += ds; - for (j = 0; j < 7; j++) { - h_copy(di, ds, si, ss, 320, f_pal); + for (j = 0; j < 8; j++) { + h_copy(di, ds, si, ss, width, f_pal); } - di -= 8*ds; - v_copy(&di[0], &di[-ds], 320, f_nop); - di += 8*ds; + di -= 9*ds; + v_copy(&di[0], &di[-ds], width, f_nop); + di += 9*ds; } } -void upscale_rgb_snn_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal) +void upscale_rgb_snn_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) { int y, j; - for (y = 0; y < 224; y += 14) { - for (j = 0; j < 7; j++) { - h_copy(di, ds, si, ss, 320, f_pal); + for (y = 0; y < height; y += 16) { + for (j = 0; j < 8; j++) { + h_copy(di, ds, si, ss, width, f_pal); } di += ds; - for (j = 0; j < 7; j++) { - h_copy(di, ds, si, ss, 320, f_pal); + for (j = 0; j < 8; j++) { + h_copy(di, ds, si, ss, width, f_pal); } - di -= 8*ds; - v_mix(&di[ 0], &di[-ds], &di[ds], 320, p_05, f_nop); - v_mix(&di[-ds], &di[-2*ds], &di[-ds], 320, p_05, f_nop); - v_mix(&di[ ds], &di[ ds], &di[ 2*ds], 320, p_05, f_nop); - di += 8*ds; + /* mix lines 6-8 */ + di -= 9*ds; + v_mix(&di[0], &di[-ds], &di[ds], width, p_05, f_nop); + v_mix(&di[-ds], &di[-2*ds], &di[-ds], width, p_05, f_nop); + v_mix(&di[ ds], &di[ ds], &di[ 2*ds], width, p_05, f_nop); + di += 9*ds; } } -void upscale_rgb_bl2_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal) +void upscale_rgb_bl2_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) { int y, j; - for (y = 0; y < 224; y += 14) { - for (j = 0; j < 3; j++) { - h_copy(di, ds, si, ss, 320, f_pal); + for (y = 0; y < height; y += 16) { + for (j = 0; j < 4; j++) { + h_copy(di, ds, si, ss, width, f_pal); } - for (j = 0; j < 8; j++) { - v_mix(&di[0], &si[-ss], &si[0], 320, p_05, f_pal); - di += ds; - si += ss; + di += ds; + for (j = 0; j < 12; j++) { + h_copy(di, ds, si, ss, width, f_pal); } - si -= ss; - for (j = 0; j < 4; j++) { - h_copy(di, ds, si, ss, 320, f_pal); + /* mix lines 3-10 */ + di -= 13*ds; + v_mix(&di[0], &di[-ds], &di[ds], width, p_05, f_nop); + for (j = 0; j < 7; j++) { + di += ds; + v_mix(&di[0], &di[0], &di[ds], width, p_05, f_nop); } + di += 6*ds; } } -void upscale_rgb_bl4_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal) +void upscale_rgb_bl4_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) { int y, j; - for (y = 0; y < 224; y += 14) { - h_copy(di, ds, si, ss, 320, f_pal); + for (y = 0; y < height; y += 16) { + for (j = 0; j < 2; j++) { + h_copy(di, ds, si, ss, width, f_pal); + } + di += ds; + for (j = 0; j < 14; j++) { + h_copy(di, ds, si, ss, width, f_pal); + } + di -= 15*ds; + /* mixing line 2: line 1 = -ds, line 2 = +ds */ + v_mix(&di[0], &di[-ds], &di[ds], width, p_025, f_nop); + di += ds; + /* mixing lines 3-5: line n-1 = 0, line n = +ds */ + for (j = 0; j < 3; j++) { + v_mix(&di[0], &di[0], &di[ds], width, p_025, f_nop); + di += ds; + } + /* mixing lines 6-9 */ for (j = 0; j < 4; j++) { - v_mix(&di[0], &si[-ss], &si[0], 320, p_025, f_pal); + v_mix(&di[0], &di[0], &di[ds], width, p_05, f_nop); di += ds; - si += ss; } + /* mixing lines 10-13 */ for (j = 0; j < 4; j++) { - v_mix(&di[0], &si[-ss], &si[0], 320, p_05, f_pal); + v_mix(&di[0], &di[0], &di[ds], width, p_075, f_nop); di += ds; - si += ss; } - for (j = 0; j < 4; j++) { - v_mix(&di[0], &si[-ss], &si[0], 320, p_075, f_pal); + /* lines 14-16, already in place */ + di += 3*ds; + } +} + +/* X x Y -> X*2/1 x Y, e.g. for X 160->320 (GG) */ +void upscale_clut_nn_x_1_2(u8 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height) +{ + int y; + + for (y = 0; y < height; y++) { + h_upscale_nn_1_2(di, ds, si, ss, width, f_nop); + } +} + +void upscale_rgb_nn_x_1_2(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) +{ + int y; + + for (y = 0; y < height; y++) { + h_upscale_nn_1_2(di, ds, si, ss, width, f_pal); + } +} + +void upscale_rgb_bl2_x_1_2(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) +{ + int y; + + for (y = 0; y < height; y++) { + h_upscale_bl2_1_2(di, ds, si, ss, width, f_pal); + } +} + +/* X x Y -> X*2/1 x Y*5/3 (GG) */ +void upscale_clut_nn_x_1_2_y_3_5(u8 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height) +{ + int swidth = width * 2; + int y, j; + + for (y = 0; y < height; y += 3) { + /* lines 0,2,4 */ + for (j = 0; j < 3; j++) { + h_upscale_nn_1_2(di, ds, si, ss, width, f_nop); di += ds; - si += ss; } - si -= ss; + /* lines 1,3 */ + di -= 5*ds; for (j = 0; j < 2; j++) { - h_copy(di, ds, si, ss, 320, f_pal); + v_copy(&di[0], &di[-ds], swidth, f_nop); + di += 2*ds; } } } -/* 160x144 -> 320x240: GG, PAR 6:5, scaling to 320x240 for DAR 4:3 */ -/* NB for smoother image could scale to 288x216, x*9/5, y*3/2 ? - * h: 11111 11112 22222 22233 33333 33444 44444 45555 55555 - * 1 1 2 2+3 3 3+4 4 5 5 - * v: 11 12 22 - * 1 1+2 2 - */ -void upscale_clut_nn_160_320x144_240(u8 *__restrict di, int ds, u8 *__restrict si, int ss) +void upscale_rgb_nn_x_1_2_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) { + int swidth = width * 2; int y, j; - /* 3:5, 0 0 1 1 2 */ - for (y = 0; y < 144; y += 3) { - /* lines 0,2,4 */ + for (y = 0; y < height; y += 3) { for (j = 0; j < 3; j++) { - h_upscale_nn_1_2(di, ds, si, ss, 160, f_nop); + h_upscale_nn_1_2(di, ds, si, ss, width, f_pal); + di += ds; + } + di -= 5*ds; + for (j = 0; j < 2; j++) { + v_copy(&di[0], &di[-ds], swidth, f_nop); + di += 2*ds; + } + } +} + +void upscale_rgb_bl2_x_1_2_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) +{ + int swidth = width * 2; + int y, j; + + for (y = 0; y < height; y += 3) { + for (j = 0; j < 3; j++) { + h_upscale_bl2_1_2(di, ds, si, ss, width, f_pal); di += ds; } - /* lines 1,3 */ di -= 5*ds; for (j = 0; j < 2; j++) { - v_copy(&di[0], &di[-ds], 320, f_nop); + v_mix(&di[0], &di[-ds], &di[ds], swidth, p_05, f_nop); di += 2*ds; } } } -void upscale_rgb_nn_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal) +void upscale_rgb_bl4_x_1_2_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) +{ + int swidth = width * 2; + int y, j, d; + + /* for 1st block backwards reference virtually duplicate source line 0 */ + for (y = 0, d = 2*ds; y < height; y += 3, d = -ds) { + di += 2*ds; + for (j = 0; j < 3; j++) { + h_upscale_bl2_1_2(di, ds, si, ss, width, f_pal); + } + di -= 5*ds; + v_mix(&di[0], &di[d ], &di[2*ds], swidth, p_05, f_nop); /*-1+0 */ + di += ds; + v_mix(&di[0], &di[ds], &di[2*ds], swidth, p_075, f_nop);/* 0+1 */ + di += ds; + v_mix(&di[0], &di[ 0], &di[ ds], swidth, p_025, f_nop);/* 0+1 */ + di += ds; + v_mix(&di[0], &di[ 0], &di[ ds], swidth, p_05, f_nop); /* 1+2 */ + di += 2*ds; + } +} + +/* X x Y -> X x Y*5/3, e.g. for Y 144->240 (GG) */ +void upscale_clut_nn_y_3_5(u8 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height) { int y, j; - for (y = 0; y < 144; y += 3) { + for (y = 0; y < height; y += 3) { + /* lines 0,2,4 */ for (j = 0; j < 3; j++) { - h_upscale_nn_1_2(di, ds, si, ss, 160, f_pal); + h_copy(di, ds, si, ss, width, f_nop); di += ds; } + /* lines 1,3 */ di -= 5*ds; for (j = 0; j < 2; j++) { - v_copy(&di[0], &di[-ds], 320, f_nop); + v_copy(&di[0], &di[-ds], width, f_nop); di += 2*ds; } } } -void upscale_rgb_snn_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal) +void upscale_rgb_nn_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) { int y, j; - /* 3:5, 0 0+1 1 1+2 2 */ - for (y = 0; y < 144; y += 3) { + for (y = 0; y < height; y += 3) { for (j = 0; j < 3; j++) { - h_upscale_nn_1_2(di, ds, si, ss, 160, f_pal); + h_copy(di, ds, si, ss, width, f_pal); di += ds; } di -= 5*ds; for (j = 0; j < 2; j++) { - v_mix(&di[0], &di[-ds], &di[ds], 320, p_05, f_nop); + v_copy(&di[0], &di[-ds], width, f_nop); di += 2*ds; } } } -void upscale_rgb_bl2_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal) +void upscale_rgb_bl2_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) { int y, j; - /* 3:5, 0 0+1 1 1+2 2 */ - for (y = 0; y < 144; y += 3) { + for (y = 0; y < height; y += 3) { for (j = 0; j < 3; j++) { - h_upscale_bl2_1_2(di, ds, si, ss, 160, f_pal); + h_copy(di, ds, si, ss, width, f_pal); di += ds; } di -= 5*ds; for (j = 0; j < 2; j++) { - v_mix(&di[0], &di[-ds], &di[ds], 320, p_05, f_nop); + v_mix(&di[0], &di[-ds], &di[ds], width, p_05, f_nop); di += 2*ds; } } } -void upscale_rgb_bl4_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal) +void upscale_rgb_bl4_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) { int y, j, d; - /* 3:5, -1+0, 0+1 0+1 1+2 2 - * for 1st block backwards reference virtually duplicate source line 0 */ - for (y = 0, d = 2*ds; y < 144; y += 3, d = -ds) { + /* for 1st block backwards reference virtually duplicate source line 0 */ + for (y = 0, d = 2*ds; y < height; y += 3, d = -ds) { di += 2*ds; for (j = 0; j < 3; j++) { - h_upscale_bl2_1_2(di, ds, si, ss, 160, f_pal); + h_copy(di, ds, si, ss, width, f_pal); } di -= 5*ds; - v_mix(&di[0], &di[d ], &di[2*ds], 320, p_05, f_nop); /*-1+0 */ + v_mix(&di[0], &di[d ], &di[2*ds], width, p_05, f_nop); /*-1+0 */ di += ds; - v_mix(&di[0], &di[ds], &di[2*ds], 320, p_075, f_nop); /* 0+1 */ + v_mix(&di[0], &di[ds], &di[2*ds], width, p_075, f_nop);/* 0+1 */ di += ds; - v_mix(&di[0], &di[ 0], &di[ ds], 320, p_025, f_nop); /* 0+1 */ + v_mix(&di[0], &di[ 0], &di[ ds], width, p_025, f_nop);/* 0+1 */ di += ds; - v_mix(&di[0], &di[ 0], &di[ ds], 320, p_05, f_nop); /* 1+2 */ + v_mix(&di[0], &di[ 0], &di[ ds], width, p_05, f_nop); /* 1+2 */ di += 2*ds; } } diff --git a/platform/common/upscale.h b/platform/common/upscale.h index 58076610..db342c47 100644 --- a/platform/common/upscale.h +++ b/platform/common/upscale.h @@ -7,7 +7,7 @@ * nn: nearest neighbour * snn: "smoothed" nearest neighbour (see below) * bln: n-level-bilinear with n quantized weights - * quantization: 0: a<1/2*n, 1/n: 1/2*n<=a<3/2*n, etc + * quantization: 0: a<1/(2*n), 1/n: 1/(2*n)<=a<3/(2*n), etc * currently n=2, n=4 are implemented (there's n=8 mixing, but no filters) * [NB this has been brought to my attn, which is probably the same as bl2: * https://www.drdobbs.com/image-scaling-with-bresenham/184405045?pgno=1] @@ -18,34 +18,37 @@ * a sharper look than a bilinear filter, at the price of some visible jags * on diagonal edges. * - * scaling modes: - * 256x___ -> 320x___ only horizontal scaling. Produces an aspect error of - * ~7% for NTSC 224 line modes, but is correct for PAL - * 256/320x224/240 - * -> 320x240 always produces 320x240 at DAR 4:3 -* 160x144 -> 320x240 game gear (currently unused) -* + * example scaling modes: + * 256x_Y_ -> 320x_Y_, H32/mode 4, PAR 5:4, for PAL DAR 4:3 (NTSC 7% aspect err) + * 256x224 -> 320x240, H32/mode 4, PAR 5:4, for NTSC DAR 4:3 (PAL 7% aspect err) + * 320x224 -> 320x240, PAR 1:1, for NTSC, DAR 4:3 (PAL 7% etc etc...) + * 160x144 -> 320x240: GG, PAR 6:5, scaling to 320x240 for DAR 4:3 + * + * * (C) 2021 kub + * + * This work is licensed under the terms of MAME license. + * See COPYING file in the top-level directory. */ #include /* RGB565 pixel mixing, see https://www.compuphase.com/graphic/scale3.htm and http://blargg.8bitalley.com/info/rgb_mixing.html */ /* 2-level mixing */ -//#define p_05(p1,p2) (((p1)+(p2) + ( ((p1)^(p2))&0x0821))>>1) // round up -//#define p_05(p1,p2) (((p1)+(p2) - ( ((p1)^(p2))&0x0821))>>1) // round down -#define p_05(p1,p2) (((p1)&(p2)) + ((((p1)^(p2))&~0x0821)>>1)) +//#define p_05(d,p1,p2) d=(((p1)+(p2) + ( ((p1)^(p2))&0x0821))>>1) // round up +//#define p_05(d,p1,p2) d=(((p1)+(p2) - ( ((p1)^(p2))&0x0821))>>1) // round down +#define p_05(d,p1,p2) d=(((p1)&(p2)) + ((((p1)^(p2))&~0x0821)>>1)) /* 4-level mixing, 2 times slower */ // 1/4*p1 + 3/4*p2 = 1/2*(1/2*(p1+p2) + p2) -#define p_025(p1,p2) (t=p_05(p1, p2), p_05( t, p2)) -#define p_075(p1,p2) p_025(p2,p1) +#define p_025(d,p1,p2) p_05(t, p1, p2); p_05( d, t, p2) +#define p_075(d,p1,p2) p_025(d,p2,p1) /* 8-level mixing, 3 times slower */ // 1/8*p1 + 7/8*p2 = 1/2*(1/2*(1/2*(p1+p2) + p2) + p2) -#define p_0125(p1,p2) (t=p_05(p1, p2), u=p_05( t, p2), p_05( u, p2)) +#define p_0125(d,p1,p2) p_05(t, p1, p2); p_05( u, t, p2); p_05( d, u, p2) // 3/8*p1 + 5/8*p2 = 1/2*(1/2*(1/2*(p1+p2) + p2) + 1/2*(p1+p2)) -#define p_0375(p1,p2) (t=p_05(p1, p2), u=p_05( t, p2), p_05( u, t)) -#define p_0625(p1,p2) p_0375(p2,p1) -#define p_0875(p1,p2) p_0125(p2,p1) +#define p_0375(d,p1,p2) p_05(t, p1, p2); p_05( u, t, p2); p_05( d, u, t) +#define p_0625(d,p1,p2) p_0375(d,p2,p1) +#define p_0875(d,p1,p2) p_0125(d,p2,p1) /* pixel transforms */ #define f_pal(v) pal[v] // convert CLUT index -> RGB565 @@ -55,7 +58,7 @@ /* scalers h: 256->320: - (4:5) (256x224/240 -> 320x224/240) -256->299: - (6:7) (256x224 -> 299x224, DAR 4:3, 10.5 px border ) +256->299: - (6:7) (256x224 -> 299x224, alt?) 160->320: - (1:2) 2x (160x144 -> 320x240, GG) 160->288: - (5:9) (160x144 -> 288x216, GG alt?) */ @@ -95,7 +98,7 @@ scalers h: for (i = w/4; i > 0; i--, si += 4, di += 5) { \ di[0] = f(si[0]); \ di[1] = f(si[1]); \ - di[2] = p_05(f(si[1]),f(si[2])); \ + p_05(di[2], f(si[1]),f(si[2])); \ di[3] = f(si[2]); \ di[4] = f(si[3]); \ } \ @@ -104,12 +107,12 @@ scalers h: } while (0) #define h_upscale_bln_4_5(di,ds,si,ss,w,f) do { \ - int i, t; \ + int i; u16 t; \ for (i = w/4; i > 0; i--, si += 4, di += 5) { \ di[0] = f(si[0]); \ - di[1] = p_025(f(si[0]),f(si[1])); \ - di[2] = p_05 (f(si[1]),f(si[2])); \ - di[3] = p_075(f(si[2]),f(si[3])); \ + p_025(di[1], f(si[0]),f(si[1])); \ + p_05 (di[2], f(si[1]),f(si[2])); \ + p_075(di[3], f(si[2]),f(si[3])); \ di[4] = f(si[3]); \ } \ di += ds - w/4*5; \ @@ -120,8 +123,8 @@ scalers h: int i; \ for (i = w/4; i > 0; i--, si += 4, di += 5) { \ di[0] = f(si[0]); \ - di[1] = p_05(f(si[0]),f(si[1])); \ - di[2] = p_05(f(si[1]),f(si[2])); \ + p_05(di[1], f(si[0]),f(si[1])); \ + p_05(di[2], f(si[1]),f(si[2])); \ di[3] = f(si[2]); \ di[4] = f(si[3]); \ } \ @@ -130,12 +133,12 @@ scalers h: } while (0) #define h_upscale_bl4_4_5(di,ds,si,ss,w,f) do { \ - int i, t; uint p = f(si[0]); \ + int i; u16 t, p = f(si[0]); \ for (i = w/4; i > 0; i--, si += 4, di += 5) { \ - di[0] = p_025(p, f(si[0])); \ - di[1] = p_05 (f(si[0]),f(si[1])); \ - di[2] = p_05 (f(si[1]),f(si[2])); \ - di[3] = p_075(f(si[2]),f(si[3])); \ + p_025(di[0], p, f(si[0])); \ + p_05 (di[1], f(si[0]),f(si[1])); \ + p_05 (di[2], f(si[1]),f(si[2])); \ + p_075(di[3], f(si[2]),f(si[3])); \ di[4] = p = f(si[3]); \ } \ di += ds - w/4*5; \ @@ -143,12 +146,12 @@ scalers h: } while (0) #define h_upscale_bl8_4_5(di,ds,si,ss,w,f) do { \ - int i, t, u; uint p = f(si[0]); \ + int i; u16 t, u, p = f(si[0]); \ for (i = w/4; i > 0; i--, si += 4, di += 5) { \ - di[0] = p_025(p, f(si[0])); \ - di[1] = p_0375(f(si[0]),f(si[1])); \ - di[2] = p_0625(f(si[1]),f(si[2])); \ - di[3] = p_075(f(si[2]),f(si[3])); \ + p_025 (di[0], p, f(si[0])); \ + p_0375(di[1], f(si[0]),f(si[1])); \ + p_0625(di[2], f(si[1]),f(si[2])); \ + p_075 (di[3], f(si[2]),f(si[3])); \ di[4] = p = f(si[3]); \ } \ di += ds - w/4*5; \ @@ -195,7 +198,7 @@ scalers h: di[0] = f(si[0]); \ di[1] = f(si[1]); \ di[2] = f(si[2]); \ - di[3] = p_05(f(si[2]),f(si[3])); \ + p_05(di[3], f(si[2]),f(si[3])); \ di[4] = f(si[3]); \ di[5] = f(si[4]); \ di[6] = f(si[5]); \ @@ -208,10 +211,10 @@ scalers h: int i; \ for (i = w/6; i > 0; i--, si += 6, di += 7) { \ di[0] = f(si[0]); \ - di[1] = p_05(f(si[0]),f(si[1])); \ - di[2] = p_05(f(si[1]),f(si[2])); \ - di[3] = p_05(f(si[2]),f(si[3])); \ - di[4] = p_05(f(si[3]),f(si[4])); \ + p_05(di[1], f(si[0]),f(si[1])); \ + p_05(di[2], f(si[1]),f(si[2])); \ + p_05(di[3], f(si[2]),f(si[3])); \ + p_05(di[4], f(si[3]),f(si[4])); \ di[5] = f(si[4]); \ di[6] = f(si[5]); \ } \ @@ -220,14 +223,14 @@ scalers h: } while (0) #define h_upscale_bl4_6_7(di,ds,si,ss,w,f) do { \ - int i, t; uint p = f(si[0]); \ + int i; u16 t p = f(si[0]); \ for (i = w/6; i > 0; i--, si += 6, di += 7) { \ - di[0] = p_025(p,f(si[0])); \ - di[1] = p_025(f(si[0]),f(si[1])); \ - di[2] = p_05 (f(si[1]),f(si[2])); \ - di[3] = p_05 (f(si[2]),f(si[3])); \ - di[4] = p_075(f(si[3]),f(si[4])); \ - di[5] = p_075(f(si[4]),f(si[5])); \ + p_025(di[0], p, f(si[0])); \ + p_025(di[1], f(si[0]),f(si[1])); \ + p_05 (di[2], f(si[1]),f(si[2])); \ + p_05 (di[3], f(si[2]),f(si[3])); \ + p_075(di[4], f(si[3]),f(si[4])); \ + p_075(di[5], f(si[4]),f(si[5])); \ di[6] = p = f(si[5]); \ } \ di += ds - w/6*7; \ @@ -258,9 +261,9 @@ scalers h: di[0] = f(si[0]); \ di[1] = f(si[0]); \ di[2] = f(si[1]); \ - di[3] = p_05(f(si[1]),f(si[2])); \ + p_05(di[3], f(si[1]),f(si[2])); \ di[4] = f(si[2]); \ - di[5] = p_05(f(si[2]),f(si[3])); \ + p_05(di[5], f(si[2]),f(si[3])); \ di[6] = f(si[3]); \ di[7] = f(si[4]); \ di[8] = f(si[4]); \ @@ -273,13 +276,13 @@ scalers h: int i; \ for (i = w/5; i > 0; i--, si += 5, di += 9) { \ di[0] = f(si[0]); \ - di[1] = p_05(f(si[0]),f(si[1])); \ + p_05(di[1], f(si[0]),f(si[1])); \ di[2] = f(si[1]); \ - di[3] = p_05(f(si[1]),f(si[2])); \ + p_05(di[3], f(si[1]),f(si[2])); \ di[4] = f(si[2]); \ - di[5] = p_05(f(si[2]),f(si[3])); \ + p_05(di[5], f(si[2]),f(si[3])); \ di[6] = f(si[3]); \ - di[7] = p_05(f(si[3]),f(si[4])); \ + p_05(di[7], f(si[3]),f(si[4])); \ di[8] = f(si[4]); \ } \ di += ds - w/5*9; \ @@ -287,16 +290,16 @@ scalers h: } while (0) #define h_upscale_bl4_5_9(di,ds,si,ss,w,f) do { \ - int i, t; uint p = f(si[0]); \ + int i; u16 t, p = f(si[0]); \ for (i = w/5; i > 0; i--, si += 5, di += 9) { \ - di[0] = p_05 (p,f(si[0])); \ + p_05 (di[0], p, f(si[0])); \ di[1] = f(si[0]); \ - di[2] = p_025(f(si[0]),f(si[1])); \ - di[3] = p_075(f(si[1]),f(si[2])); \ - di[4] = p_025(f(si[1]),f(si[2])); \ - di[5] = p_075(f(si[2]),f(si[3])); \ + p_025(di[2], f(si[0]),f(si[1])); \ + p_075(di[3], f(si[1]),f(si[2])); \ + p_025(di[4], f(si[1]),f(si[2])); \ + p_075(di[5], f(si[2]),f(si[3])); \ di[6] = f(si[3]); \ - di[7] = p_05 (f(si[3]),f(si[4])); \ + p_05 (di[7], f(si[3]),f(si[4])); \ di[8] = p = f(si[4]); \ } \ di += ds - w/5*9; \ @@ -319,9 +322,9 @@ scalers h: #define h_upscale_bl2_1_2(di,ds,si,ss,w,f) do { \ int i; uint p = f(si[0]); \ for (i = w/2; i > 0; i--, si += 2, di += 4) { \ - di[0] = p_05 (p, f(si[0])); \ + p_05 (di[0], p, f(si[0])); \ di[1] = f(si[0]); \ - di[2] = p_05 (f(si[0]), f(si[1])); \ + p_05 (di[2], f(si[0]),f(si[1])); \ di[3] = p = f(si[1]); \ } \ di += ds - w*2; \ @@ -350,12 +353,12 @@ scalers v: */ #define v_mix(di,li,ri,w,p_mix,f) do { \ - int i, t, u; (void)t, (void)u; \ + u16 i, t, u; (void)t, (void)u; \ for (i = 0; i < w; i += 4) { \ - (di)[i ] = p_mix(f((li)[i ]), f((ri)[i ])); \ - (di)[i+1] = p_mix(f((li)[i+1]), f((ri)[i+1])); \ - (di)[i+2] = p_mix(f((li)[i+2]), f((ri)[i+2])); \ - (di)[i+3] = p_mix(f((li)[i+3]), f((ri)[i+3])); \ + p_mix((di)[i ], f((li)[i ]),f((ri)[i ])); \ + p_mix((di)[i+1], f((li)[i+1]),f((ri)[i+1])); \ + p_mix((di)[i+2], f((li)[i+2]),f((ri)[i+2])); \ + p_mix((di)[i+3], f((li)[i+3]),f((ri)[i+3])); \ } \ } while (0) @@ -369,32 +372,222 @@ scalers v: } \ } while (0) +/* scale 14:15 */ +#define v_upscale_nn_14_15(di,ds,w,l) do { \ + if (++l == 7) { \ + di += ds; \ + } else if (l >= 14) { \ + l = 0; \ + di -= 7*ds; \ + v_copy(&di[0], &di[-ds], w, f_nop); \ + di += 7*ds; \ + } \ +} while (0) + +#define v_upscale_snn_14_15(di,ds,w,l) do { \ + if (++l == 7) { \ + di += ds; \ + } else if (l >= 14) { \ + l = 0; \ + di -= 7*ds; \ + v_mix(&di[0], &di[-ds], &di[ds], w, p_05, f_nop); \ + v_mix(&di[-ds], &di[-2*ds], &di[-ds], w, p_05, f_nop); \ + v_mix(&di[ ds], &di[ ds], &di[ 2*ds], w, p_05, f_nop); \ + di += 7*ds; \ + } \ +} while (0) + +#define v_upscale_bl2_14_15(di,ds,w,l) do { \ + if (++l == 3) { \ + di += ds; \ + } else if (l >= 14) { \ + int j; \ + l = 0; \ + di -= 11*ds; \ + v_mix(&di[0], &di[-ds], &di[ds], w, p_05, f_nop); \ + for (j = 0; j < 7; j++) { \ + di += ds; \ + v_mix(&di[0], &di[0], &di[ds], w, p_05, f_nop); \ + } \ + di += 4*ds; \ + } \ +} while (0) + +#define v_upscale_bl4_14_15(di,ds,w,l) do { \ + if (++l == 1) { \ + di += ds; \ + } else if (l >= 14) { \ + int j; \ + l = 0; \ + di -= 13*ds; \ + v_mix(&di[0], &di[-ds], &di[ds], w, p_025, f_nop); \ + di += ds; \ + for (j = 0; j < 3; j++) { \ + v_mix(&di[0], &di[0], &di[ds], w, p_025, f_nop); \ + di += ds; \ + } \ + for (j = 0; j < 4; j++) { \ + v_mix(&di[0], &di[0], &di[ds], w, p_05, f_nop); \ + di += ds; \ + } \ + for (j = 0; j < 4; j++) { \ + v_mix(&di[0], &di[0], &di[ds], w, p_075, f_nop); \ + di += ds; \ + } \ + di += 1*ds; \ + } \ +} while (0) + +/* scale 16:17 */ +#define v_upscale_nn_16_17(di,ds,w,l) do { \ + if (++l == 8) { \ + di += ds; \ + } else if (l >= 16) { \ + l = 0; \ + di -= 8*ds; \ + v_copy(&di[0], &di[-ds], w, f_nop); \ + di += 8*ds; \ + } \ +} while (0) + +#define v_upscale_snn_16_17(di,ds,w,l) do { \ + if (++l == 8) { \ + di += ds; \ + } else if (l >= 16) { \ + l = 0; \ + di -= 8*ds; \ + v_mix(&di[0], &di[-ds], &di[ds], w, p_05, f_nop); \ + v_mix(&di[-ds], &di[-2*ds], &di[-ds], w, p_05, f_nop); \ + v_mix(&di[ ds], &di[ ds], &di[ 2*ds], w, p_05, f_nop); \ + di += 8*ds; \ + } \ +} while (0) + +#define v_upscale_bl2_16_17(di,ds,w,l) do { \ + if (++l == 4) { \ + di += ds; \ + } else if (l >= 16) { \ + int j; \ + l = 0; \ + di -= 12*ds; \ + v_mix(&di[0], &di[-ds], &di[ds], w, p_05, f_nop); \ + for (j = 0; j < 7; j++) { \ + di += ds; \ + v_mix(&di[0], &di[0], &di[ds], w, p_05, f_nop); \ + } \ + di += 5*ds; \ + } \ +} while (0) + +#define v_upscale_bl4_16_17(di,ds,w,l) do { \ + if (++l == 2) { \ + di += ds; \ + } else if (l >= 16) { \ + int j; \ + l = 0; \ + di -= 14*ds; \ + v_mix(&di[0], &di[-ds], &di[ds], w, p_025, f_nop); \ + di += ds; \ + for (j = 0; j < 3; j++) { \ + v_mix(&di[0], &di[0], &di[ds], w, p_025, f_nop); \ + di += ds; \ + } \ + for (j = 0; j < 4; j++) { \ + v_mix(&di[0], &di[0], &di[ds], w, p_05, f_nop); \ + di += ds; \ + } \ + for (j = 0; j < 4; j++) { \ + v_mix(&di[0], &di[0], &di[ds], w, p_075, f_nop); \ + di += ds; \ + } \ + di += 2*ds; \ + } \ +} while (0) + +/* scale 3:5 */ +#define v_upscale_nn_3_5(di,ds,w,l) do { \ + if (++l < 3) { \ + di += ds; \ + } else { \ + int j; \ + l = 0; \ + di -= 4*ds; \ + for (j = 0; j < 2; j++) { \ + v_copy(&di[0], &di[-ds], w, f_nop); \ + di += 2*ds; \ + } \ + } \ +} while (0) + +#define v_upscale_snn_3_5(di,ds,w,l) do { \ + if (++l < 3) { \ + di += ds; \ + } else { \ + int j; \ + l = 0; \ + di -= 4*ds; \ + for (j = 0; j < 2; j++) { \ + v_mix(&di[0], &di[-ds], &di[ds], w, p_05, f_nop); \ + di += 2*ds; \ + } \ + } \ +} while (0) + +/* scale 2:3 */ +#define v_upscale_nn_2_3(di,ds,w,l) do { \ + if (++l < 2) { \ + di += ds; \ + } else { \ + int j; \ + l = 0; \ + di -= 2*ds; \ + v_copy(&di[0], &di[-ds], w, f_nop); \ + di += 2*ds; \ + } \ +} while (0) + +#define v_upscale_snn_2_3(di,ds,w,l) do { \ + if (++l < 2) { \ + di += ds; \ + } else { \ + int j; \ + l = 0; \ + di -= 2*ds; \ + v_mix(&di[0], &di[-ds], &di[ds], w, p_05, f_nop); \ + di += 2*ds; \ + } \ +} while (0) + + +/* X x Y -> X*5/4 x Y, for X 256->320 */ +void upscale_rgb_nn_x_4_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); +void upscale_rgb_snn_x_4_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); +void upscale_rgb_bl2_x_4_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); +void upscale_rgb_bl4_x_4_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); + +/* X x Y -> X x Y*17/16, for Y 224->238 or 192->204 (SMS) */ +void upscale_rgb_nn_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); +void upscale_rgb_snn_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); +void upscale_rgb_bl2_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); +void upscale_rgb_bl4_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); +/* X x Y -> X*5/4 x Y*17/16 */ +void upscale_rgb_nn_x_4_5_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); +void upscale_rgb_snn_x_4_5_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); +void upscale_rgb_bl2_x_4_5_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); +void upscale_rgb_bl4_x_4_5_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); -/* 256x___ -> 320x___, H32/mode 4, PAR 5:4, for PAL DAR 4:3 (wrong for NTSC) */ -void upscale_clut_nn_256_320x___(u8 *__restrict di, int ds, u8 *__restrict si, int ss, int height); -void upscale_rgb_nn_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int height, u16 *pal); -void upscale_rgb_snn_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int height, u16 *pal); -void upscale_rgb_bl2_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int height, u16 *pal); -void upscale_rgb_bl4_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int height, u16 *pal); +/* X x Y -> X*2/1 x Y, e.g. for X 160->320 (GG) */ +void upscale_rgb_nn_x_1_2(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); +void upscale_rgb_bl2_x_1_2(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); -/* 256x224 -> 320x240, H32/mode 4, PAR 5:4, for NTSC DAR 4:3 (wrong for PAL) */ -void upscale_clut_nn_256_320x224_240(u8 *__restrict di, int ds, u8 *__restrict si, int ss); -void upscale_rgb_nn_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); -void upscale_rgb_snn_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); -void upscale_rgb_bl2_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); -void upscale_rgb_bl4_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); +/* X x Y -> X x Y*5/3, e.g. for Y 144->240 (GG) */ +void upscale_rgb_nn_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); +void upscale_rgb_bl2_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); +void upscale_rgb_bl4_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); -/* 320x224 -> 320x240, PAR 1:1, for NTSC, DAR 4:3 (wrong for PAL) */ -void upscale_clut_nn_320x224_240(u8 *__restrict di, int ds, u8 *__restrict si, int ss); -void upscale_rgb_nn_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); -void upscale_rgb_snn_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); -void upscale_rgb_bl2_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); -void upscale_rgb_bl4_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); +/* X x Y -> X*2/1 x Y*5/3 (GG) */ +void upscale_rgb_nn_x_1_2_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); +void upscale_rgb_bl2_x_1_2_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); +void upscale_rgb_bl4_x_1_2_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); -/* 160x144 -> 320x240: GG, PAR 6:5, scaling to 320x240 for DAR 4:3 */ -void upscale_clut_nn_160_320x144_240(u8 *__restrict di, int ds, u8 *__restrict si, int ss); -void upscale_rgb_nn_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); -void upscale_rgb_snn_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); -void upscale_rgb_bl2_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); -void upscale_rgb_bl4_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); diff --git a/platform/gp2x/emu.c b/platform/gp2x/emu.c index 88379985..0622691b 100644 --- a/platform/gp2x/emu.c +++ b/platform/gp2x/emu.c @@ -558,14 +558,16 @@ static void vid_reset_mode(void) Pico.m.dirtyPal = 1; PicoIn.opt &= ~POPT_EN_SOFTSCALE; - if (currentConfig.scaling == EOPT_SCALE_SW) + if (currentConfig.scaling == EOPT_SCALE_SW) { PicoIn.opt |= POPT_EN_SOFTSCALE; + PicoIn.filter = EOPT_FILTER_BILINEAR2; + } // palette converters for 8bit modes make_local_pal = (PicoIn.AHW & PAHW_SMS) ? make_local_pal_sms : make_local_pal_md; } -void emu_video_mode_change(int start_line, int line_count, int is_32cols) +void emu_video_mode_change(int start_line, int line_count, int start_col, int col_count) { int scalex = 320, scaley = 240; int ln_offs = 0; @@ -578,10 +580,10 @@ void emu_video_mode_change(int start_line, int line_count, int is_32cols) /* set up hwscaling here */ PicoIn.opt &= ~POPT_DIS_32C_BORDER; - if (is_32cols && currentConfig.scaling == EOPT_SCALE_HW) { - scalex = 256; + if (col_count < 320 && currentConfig.scaling == EOPT_SCALE_HW) { + scalex = col_count; PicoIn.opt |= POPT_DIS_32C_BORDER; - osd_fps_x = OSD_FPS_X - 64; + osd_fps_x = col_count - (320-OSD_FPS_X); } if (currentConfig.vscaling == EOPT_SCALE_HW) { diff --git a/platform/libretro/libretro.c b/platform/libretro/libretro.c index 0969017b..7c63c97b 100644 --- a/platform/libretro/libretro.c +++ b/platform/libretro/libretro.c @@ -84,7 +84,6 @@ static retro_environment_t environ_cb; static retro_audio_sample_batch_t audio_batch_cb; #define VOUT_MAX_WIDTH 320 -#define VOUT_32COL_WIDTH 256 #define VOUT_MAX_HEIGHT 240 #define INITIAL_SND_RATE 44100 @@ -99,7 +98,8 @@ static bool old_show_overscan = false; /* Required to allow on the fly changes to 'show overscan' */ static int vm_current_start_line = -1; static int vm_current_line_count = -1; -static int vm_current_is_32cols = -1; +static int vm_current_start_col = -1; +static int vm_current_col_count = -1; static int vout_16bit = 1; static int vout_format = PDF_RGB555; @@ -615,13 +615,14 @@ int plat_mem_set_exec(void *ptr, size_t size) return ret; } -void emu_video_mode_change(int start_line, int line_count, int is_32cols) +void emu_video_mode_change(int start_line, int line_count, int start_col, int col_count) { struct retro_system_av_info av_info; vm_current_start_line = start_line; vm_current_line_count = line_count; - vm_current_is_32cols = is_32cols; + vm_current_start_col = start_col; + vm_current_col_count = col_count; // 8bit renderes create a 328x256 CLUT image, while 16bit creates 320x240 RGB vout_16bit = vout_format == PDF_RGB555 || (PicoIn.AHW & PAHW_32X); @@ -631,13 +632,7 @@ void emu_video_mode_change(int start_line, int line_count, int is_32cols) vout_width = (vout_16bit ? VOUT_MAX_WIDTH : VOUT_8BIT_WIDTH); vout_height = (vout_16bit ? VOUT_MAX_HEIGHT : VOUT_8BIT_HEIGHT); vout_offset = (vout_16bit ? 0 : 8); // 8bit has 8 px overlap area on the left - if (is_32cols) { - // 256x240, with or w/o overlap on the left and 64 px on the right - padding = (struct retro_hw_ps2_insets){start_line, vout_offset, vout_height - line_count - start_line, vout_width - 256.0f - vout_offset}; - } else { - // 320x240, with or w/o overlap on the left and none on the right - padding = (struct retro_hw_ps2_insets){start_line, vout_offset, vout_height - line_count - start_line, vout_width - 320.0f - vout_offset}; - } + padding = (struct retro_hw_ps2_insets){start_line, vout_offset, vout_height - line_count - start_line, vout_width - col_count - vout_offset}; int pxsz = (vout_16bit ? 2 : 1); // pixel size: RGB = 16 bits, CLUT = 8 bits memset(vout_buf, 0, pxsz * vout_width * vout_height); @@ -651,7 +646,7 @@ void emu_video_mode_change(int start_line, int line_count, int is_32cols) ps2->padding = padding; } #else - vout_width = is_32cols ? VOUT_32COL_WIDTH : VOUT_MAX_WIDTH; + vout_width = col_count; memset(vout_buf, 0, VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT * 2); if (vout_16bit) PicoDrawSetOutBuf(vout_buf, vout_width * 2); @@ -686,11 +681,11 @@ void emu_32x_startup(void) PicoDrawSetOutFormat(vout_format, 0); if ((vm_current_start_line != -1) && (vm_current_line_count != -1) && - (vm_current_is_32cols != -1)) + (vm_current_start_col != -1) && + (vm_current_col_count != -1)) emu_video_mode_change( - vm_current_start_line, - vm_current_line_count, - vm_current_is_32cols); + vm_current_start_line, vm_current_line_count, + vm_current_start_col, vm_current_col_count); } void lprintf(const char *fmt, ...) @@ -1621,11 +1616,11 @@ static void update_variables(bool first_run) { if ((vm_current_start_line != -1) && (vm_current_line_count != -1) && - (vm_current_is_32cols != -1)) + (vm_current_start_col != -1) && + (vm_current_col_count != -1)) emu_video_mode_change( - vm_current_start_line, - vm_current_line_count, - vm_current_is_32cols); + vm_current_start_line, vm_current_line_count, + vm_current_start_col, vm_current_col_count); } /* Reinitialise frameskipping, if required */ diff --git a/platform/linux/emu.c b/platform/linux/emu.c index fa15d3f8..0b0c53d1 100644 --- a/platform/linux/emu.c +++ b/platform/linux/emu.c @@ -14,6 +14,7 @@ #include "../libpicofe/plat.h" #include "../common/emu.h" #include "../common/arm_utils.h" +#include "../common/upscale.h" #include "../common/version.h" #include @@ -23,8 +24,9 @@ const char *renderer_names[] = { "16bit accurate", " 8bit accurate", " 8bit fast const char *renderer_names32x[] = { "accurate", "faster", "fastest", NULL }; enum renderer_types { RT_16BIT, RT_8BIT_ACC, RT_8BIT_FAST, RT_COUNT }; -static int out_x, out_y; -static int out_w, out_h; +static int out_x, out_y, out_w, out_h; // renderer output in render buffer +static int screen_x, screen_y, screen_w, screen_h; // final render destination +static int render_bg; // force 16bit mode for bg render void pemu_prep_defconfig(void) { @@ -38,7 +40,7 @@ void pemu_validate_config(void) } #define is_16bit_mode() \ - (currentConfig.renderer == RT_16BIT || (PicoIn.AHW & PAHW_32X)) + (currentConfig.renderer == RT_16BIT || (PicoIn.AHW & PAHW_32X) || render_bg) static int get_renderer(void) { @@ -82,31 +84,79 @@ static void draw_cd_leds(void) #undef p } -static unsigned short *get_16bit_start(unsigned short *buf) +/* render/screen buffer handling: + * In 16 bit mode, render output is directly placed in the screen buffer. + * SW scaling is handled in renderer (x) and in vscaling callbacks here (y). + * In 8 bit modes, output goes to the internal Draw2FB buffer in alternate + * renderer format (8 pix overscan at left/top/bottom), left aligned (DIS_32C). + * It is converted to 16 bit and SW scaled in pemu_finalize_frame. + * + * HW scaling always aligns the image to the left/top, since selecting an area + * for display isn't always possible. + */ + +static u16 *screen_buffer(u16 *buf) +{ + // center the emulator display on the screen if screen is larger + if (currentConfig.scaling != EOPT_SCALE_HW) + buf += (g_screen_width-320)/2; + if (currentConfig.vscaling != EOPT_SCALE_HW) + buf += (g_screen_height-240)/2 * g_screen_ppitch; + return buf; +} + +void screen_blit(u16 *pd, int pp, u8* ps, int ss, u16 *pal) { - // center the output on the screen - int offs = (g_screen_height-240)/2 * g_screen_ppitch + (g_screen_width-320)/2; - return buf + offs; + typedef void (*upscale_t) + (u16 *di,int ds, u8 *si,int ss, int w,int h, u16 *pal); + upscale_t upscale_hv[] = { + upscale_rgb_nn_x_4_5_y_16_17, upscale_rgb_snn_x_4_5_y_16_17, + upscale_rgb_bl2_x_4_5_y_16_17, upscale_rgb_bl4_x_4_5_y_16_17, + }; + upscale_t upscale_h[] = { + upscale_rgb_nn_x_4_5, upscale_rgb_snn_x_4_5, + upscale_rgb_bl2_x_4_5, upscale_rgb_bl4_x_4_5, + }; + upscale_t upscale_v[] = { + upscale_rgb_nn_y_16_17, upscale_rgb_snn_y_16_17, + upscale_rgb_bl2_y_16_17, upscale_rgb_bl4_y_16_17, + }; + upscale_t *upscale; + int y; + + // handle software upscaling + upscale = NULL; + if (currentConfig.scaling == EOPT_SCALE_SW && out_w == 256) { + if (currentConfig.vscaling == EOPT_SCALE_SW && out_h <= 224) + // h+v scaling + upscale = upscale_hv; + else + // h scaling + upscale = upscale_h; + } else if (currentConfig.vscaling == EOPT_SCALE_SW && out_h <= 224) { + // v scaling + upscale = upscale_v; + } else { + // no scaling + for (y = 0; y < out_h; y++) + h_copy(pd, pp, ps, 328, out_w, f_pal); + return; + } + + upscale[currentConfig.filter & 0x3](pd, pp, ps, ss, out_w, out_h, pal); } void pemu_finalize_frame(const char *fps, const char *notice) { if (!is_16bit_mode()) { // convert the 8 bit CLUT output to 16 bit RGB - unsigned short *pd = (unsigned short *)g_screen_ptr + - out_y * g_screen_ppitch + out_x; - unsigned char *ps = Pico.est.Draw2FB + 328*out_y + 8; - unsigned short *pal = Pico.est.HighPal; - int i, x; + u16 *pd = screen_buffer(g_screen_ptr) + + screen_y * g_screen_ppitch + screen_x; + u8 *ps = Pico.est.Draw2FB + 328*out_y + out_x + 8; - pd = get_16bit_start(pd); PicoDrawUpdateHighPal(); - for (i = 0; i < out_h; i++, ps += 8) { - for (x = 0; x < out_w; x++) - *pd++ = pal[*ps++]; - pd += g_screen_ppitch - out_w; - ps += 320 - out_w; - } + + screen_blit(pd, g_screen_ppitch, ps, 328, Pico.est.HighPal); } if (notice) @@ -120,33 +170,44 @@ void pemu_finalize_frame(const char *fps, const char *notice) void plat_video_set_buffer(void *buf) { if (is_16bit_mode()) - PicoDrawSetOutBuf(get_16bit_start(buf), g_screen_ppitch * 2); + PicoDrawSetOutBuf(screen_buffer(buf), g_screen_ppitch * 2); } static void apply_renderer(void) { + PicoIn.opt &= ~(POPT_ALT_RENDERER|POPT_EN_SOFTSCALE|POPT_DIS_32C_BORDER); switch (get_renderer()) { case RT_16BIT: - PicoIn.opt &= ~POPT_ALT_RENDERER; - PicoIn.opt &= ~POPT_DIS_32C_BORDER; - PicoDrawSetOutFormat(PDF_RGB555, 0); - PicoDrawSetOutBuf(get_16bit_start(g_screen_ptr), g_screen_ppitch * 2); + // 32X uses line mode for vscaling with accurate renderer, since + // the MD VDP layer must be unscaled and merging the scaled 32X + // image data will fail. + PicoDrawSetOutFormat(PDF_RGB555, + (PicoIn.AHW & PAHW_32X) && currentConfig.vscaling); + PicoDrawSetOutBuf(screen_buffer(g_screen_ptr), g_screen_ppitch * 2); break; case RT_8BIT_ACC: - PicoIn.opt &= ~POPT_ALT_RENDERER; - PicoIn.opt |= POPT_DIS_32C_BORDER; + // for simplification the 8 bit accurate renderer uses the same + // storage format as the fast renderer PicoDrawSetOutFormat(PDF_8BIT, 0); PicoDrawSetOutBuf(Pico.est.Draw2FB, 328); break; case RT_8BIT_FAST: PicoIn.opt |= POPT_ALT_RENDERER; - PicoIn.opt |= POPT_DIS_32C_BORDER; PicoDrawSetOutFormat(PDF_NONE, 0); break; } if (PicoIn.AHW & PAHW_32X) - PicoDrawSetOutBuf(get_16bit_start(g_screen_ptr), g_screen_ppitch * 2); + PicoDrawSetOutBuf(screen_buffer(g_screen_ptr), g_screen_ppitch * 2); + else if (is_16bit_mode()) { + if (currentConfig.scaling == EOPT_SCALE_SW) { + PicoIn.opt |= POPT_EN_SOFTSCALE; + PicoIn.filter = currentConfig.filter; + } else if (currentConfig.scaling == EOPT_SCALE_HW) + // hw scaling, render without any padding + PicoIn.opt |= POPT_DIS_32C_BORDER; + } else + PicoIn.opt |= POPT_DIS_32C_BORDER; Pico.m.dirtyPal = 1; } @@ -188,37 +249,125 @@ void plat_update_volume(int has_changed, int is_up) { } +void pemu_sound_start(void) +{ + emu_sound_start(); +} + +void plat_debug_cat(char *str) +{ +} + void pemu_forced_frame(int no_scale, int do_emu) { - unsigned short *pd = get_16bit_start(g_screen_ptr); + u16 *pd = screen_buffer(g_screen_ptr); + int hs = currentConfig.scaling, vs = currentConfig.vscaling; + // create centered and sw scaled (if scaling enabled) 16 bit output PicoIn.opt &= ~POPT_DIS_32C_BORDER; - PicoDrawSetCallbacks(NULL, NULL); Pico.m.dirtyPal = 1; + if (currentConfig.scaling) currentConfig.scaling = EOPT_SCALE_SW; + if (currentConfig.vscaling) currentConfig.vscaling = EOPT_SCALE_SW; + plat_video_set_size(320, 240); + // render a frame in 16 bit mode + render_bg = 1; emu_cmn_forced_frame(no_scale, do_emu, pd); + render_bg = 0; g_menubg_src_ptr = g_screen_ptr; + currentConfig.scaling = hs, currentConfig.vscaling = vs; } -void pemu_sound_start(void) +/* vertical sw scaling, 16 bit mode */ +static int vscale_state; + +static int cb_vscaling_begin(unsigned int line) { - emu_sound_start(); + static int prevline = 999; + + // at start of new frame? + if (line < prevline) { + // set y frame offset (see emu_change_video_mode) + u16 *dest = g_screen_ptr; + Pico.est.DrawLineDest = dest + screen_y * g_screen_ppitch; + vscale_state = 0; + } + prevline = line; + return 0; } -void plat_debug_cat(char *str) +static int cb_vscaling_nop(unsigned int line) +{ + return 0; +} + +static int cb_vscaling_end(unsigned int line) { + u16 *dest = Pico.est.DrawLineDest; + switch (currentConfig.filter) { + case 3: v_upscale_bl4_16_17(dest, g_screen_ppitch, 320, vscale_state); + break; + case 2: v_upscale_bl2_16_17(dest, g_screen_ppitch, 320, vscale_state); + break; + case 1: v_upscale_snn_16_17(dest, g_screen_ppitch, 320, vscale_state); + break; + default: v_upscale_nn_16_17(dest, g_screen_ppitch, 320, vscale_state); + break; + } + Pico.est.DrawLineDest = dest; + return 0; } -void emu_video_mode_change(int start_line, int line_count, int is_32cols) +void emu_video_mode_change(int start_line, int line_count, int start_col, int col_count) { + // relative position in core fb and screen fb + out_y = start_line; out_x = start_col; + out_h = line_count; out_w = col_count; + + PicoDrawSetCallbacks(NULL, NULL); + screen_x = screen_y = 0; + screen_w = 320, screen_h = 240; + + switch (currentConfig.scaling) { + case EOPT_SCALE_HW: + screen_w = out_w; + break; + case EOPT_SCALE_NONE: + // center output in screen + screen_x = (screen_w - out_w)/2; + break; + } + switch (currentConfig.vscaling) { + case EOPT_SCALE_HW: + // NTSC always has 224 visible lines, anything smaller has bars + screen_h = (out_h < 224 ? 224 : out_h); + // handle vertical centering for 16 bit mode + screen_y = (screen_h - out_h) / 2; + if (is_16bit_mode()) + PicoDrawSetCallbacks(cb_vscaling_begin, cb_vscaling_nop); + break; + case EOPT_SCALE_SW: + // NTSC always has 224 visible lines, anything smaller has bars + if (out_y > 7) + screen_y = out_y - 7; + // in 16 bit mode sw scaling is divided between core and platform + if (is_16bit_mode() && out_h < 240) + PicoDrawSetCallbacks(cb_vscaling_begin, cb_vscaling_end); + break; + case EOPT_SCALE_NONE: + // center output in screen + screen_y = (screen_h - out_h)/2; + break; + } + + plat_video_set_size(screen_w, screen_h); + plat_video_set_buffer(g_screen_ptr); + // clear whole screen in all buffers if (!is_16bit_mode()) memset32(Pico.est.Draw2FB, 0xe0e0e0e0, (320+8) * (8+240+8) / 4); plat_video_clear_buffers(); - - out_y = start_line; out_x = (is_32cols ? 32 : 0); - out_h = line_count; out_w = (is_32cols ? 256:320); } void pemu_loop_prep(void) diff --git a/platform/linux/menu.c b/platform/linux/menu.c index b9bf4cee..236ebd92 100644 --- a/platform/linux/menu.c +++ b/platform/linux/menu.c @@ -1,12 +1,14 @@ -#include "../libpicofe/gp2x/plat_gp2x.h" - // ------------ gfx options menu ------------ +static const char *men_scaling_opts[] = { "OFF", "software", "hardware", NULL }; +static const char *men_filter_opts[] = { "nearest", "smoother", "bilinear 1", "bilinear 2", NULL }; -const char *men_scaling_opts[] = { "OFF", "ON", NULL }; +static const char h_scale[] = "hardware scaling may not be working on some devices"; #define MENU_OPTIONS_GFX \ - // mee_enum ("screen scaling", MA_OPT_SCALING, currentConfig.scaling, men_scaling_opts), \ + mee_enum_h ("Horizontal scaling", MA_OPT_SCALING, currentConfig.scaling, men_scaling_opts, h_scale), \ + mee_enum_h ("Vertical scaling", MA_OPT_VSCALING, currentConfig.vscaling, men_scaling_opts, h_scale), \ + mee_enum_h ("Scaler type", MA_OPT3_FILTERING, currentConfig.filter, men_filter_opts, NULL), \ #define MENU_OPTIONS_ADV diff --git a/platform/pandora/plat.c b/platform/pandora/plat.c index 0bd151cc..5532543f 100644 --- a/platform/pandora/plat.c +++ b/platform/pandora/plat.c @@ -331,17 +331,16 @@ void pnd_restore_layer_data(void) plat_video_flip(); } -void emu_video_mode_change(int start_line, int line_count, int is_32cols) +void emu_video_mode_change(int start_line, int line_count, int start_col, int col_count) { int fb_w = 320, fb_h = 240, fb_left = 0, fb_right = 0, fb_top = 0, fb_bottom = 0; if (doing_bg_frame) return; - if (is_32cols) { - fb_w = 256; - fb_left = fb_right = 32; - } + fb_w = col_count; + fb_left = start_col; + fb_right = 320 - (fb_w+fb_left);; switch (currentConfig.scaling) { case SCALE_1x1: @@ -349,7 +348,7 @@ void emu_video_mode_change(int start_line, int line_count, int is_32cols) g_layer_h = fb_h; break; case SCALE_2x2_3x2: - g_layer_w = fb_w * (is_32cols ? 3 : 2); + g_layer_w = fb_w * (col_count < 320 ? 3 : 2); g_layer_h = fb_h * 2; break; case SCALE_2x2_2x2: @@ -381,7 +380,7 @@ void emu_video_mode_change(int start_line, int line_count, int is_32cols) fb_h = line_count; break; } - g_osd_fps_x = is_32cols ? 232 : 264; + g_osd_fps_x = col_count < 320 ? 232 : 264; g_osd_y = fb_top + fb_h - 8; pnd_setup_layer(1, g_layer_x, g_layer_y, g_layer_w, g_layer_h); diff --git a/platform/psp/emu.c b/platform/psp/emu.c index 6c6c5b17..45f0623a 100644 --- a/platform/psp/emu.c +++ b/platform/psp/emu.c @@ -674,11 +674,11 @@ void plat_update_volume(int has_changed, int is_up) } /* prepare for MD screen mode change */ -void emu_video_mode_change(int start_line, int line_count, int is_32cols) +void emu_video_mode_change(int start_line, int line_count, int start_col, int col_count) { - h32_mode = is_32cols; - out_y = start_line; out_x = (is_32cols ? 32 : 0); - out_h = line_count; out_w = (is_32cols ? 256:320); + h32_mode = col_count < 320; + out_y = start_line; out_x = (h32_mode ? 32 : 0); + out_h = line_count; out_w = (h32_mode ? 256:320); vidResetMode(); if (h32_mode) // clear borders from h40 remnants diff --git a/platform/win32/plat.c b/platform/win32/plat.c index 70d4027f..2c82ef7e 100644 --- a/platform/win32/plat.c +++ b/platform/win32/plat.c @@ -107,10 +107,10 @@ void plat_video_toggle_renderer(int change, int is_menu) PicoDrawSetOutFormat(PDF_RGB555, 1); } -void emu_video_mode_change(int start_line, int line_count, int is_32cols) +void emu_video_mode_change(int start_line, int line_count, int start_col, int col_count) { - EmuScreenRect.left = is_32cols ? 32 : 0; - EmuScreenRect.right = is_32cols ? 256+32 : 320; + EmuScreenRect.left = start_col; + EmuScreenRect.right = start_col + col_count; EmuScreenRect.top = start_line; EmuScreenRect.bottom = start_line + line_count; diff --git a/tools/mkoffsets.sh b/tools/mkoffsets.sh index e45567f5..615e347f 100755 --- a/tools/mkoffsets.sh +++ b/tools/mkoffsets.sh @@ -134,9 +134,13 @@ get_define OFS_Pico_ Pico rom ; echo "$line" >>$fn get_define OFS_Pico_ Pico romsize ; echo "$line" >>$fn get_define OFS_Pico_ Pico est ; echo "$line" >>$fn +get_define OFS_PicoIn_ PicoInterface opt ; echo "$line" >>$fn +get_define OFS_PicoIn_ PicoInterface filter ; echo "$line" >>$fn + get_define OFS_EST_ PicoEState DrawScanline ; echo "$line" >>$fn get_define OFS_EST_ PicoEState rendstatus ; echo "$line" >>$fn get_define OFS_EST_ PicoEState DrawLineDest ; echo "$line" >>$fn +get_define OFS_EST_ PicoEState DrawLineDestIncr ; echo "$line" >>$fn get_define OFS_EST_ PicoEState HighCol ; echo "$line" >>$fn get_define OFS_EST_ PicoEState HighPreSpr ; echo "$line" >>$fn get_define OFS_EST_ PicoEState Pico ; echo "$line" >>$fn