From: kub Date: Thu, 22 Jul 2021 22:38:38 +0000 (+0200) Subject: platform support, add upscaling code X-Git-Tag: v2.00~500 X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e6ecc1ef14d4369ed48f89ebd2e2ceb6f44ee903;p=picodrive.git platform support, add upscaling code --- diff --git a/platform/common/upscale.c b/platform/common/upscale.c new file mode 100644 index 00000000..1d8c3f9e --- /dev/null +++ b/platform/common/upscale.c @@ -0,0 +1,341 @@ +/* + * upscale.c image upscaling + * + * This file contains upscalers for picodrive. + * + * scaler types: + * nn: nearest neighbour + * snn: "smoothed" nearest neighbour (see below) + * bln: bilinear (using only 0.25, 0.5, 0.75 as weight for better performance) + * + * "smoothed" nearest neighbour: uses the average of the source pixels if no + * source pixel covers more than 65% of the result pixel. It definitely + * looks better than nearest neighbour and is still quite fast. It creates + * a sharper look than a bilinear filter, at the price of some visible jags + * on diagonal edges. + * [NB this has been brought to my attn, which is probably very similar: + * https://www.drdobbs.com/image-scaling-with-bresenham/184405045?pgno=1] + * + * scaling modes: + * 256x___ -> 320x___ only horizontal scaling. Produces an aspect error of + * ~7% for NTSC 224 line modes, but is correct for PAL + * 256/320x224/240 + * -> 320x240 always produces 320x240 at DAR 4:3 + * 160x144 -> 320x240 game gear (currently unused) + * + * (C) 2021 kub + */ + +#include "upscale.h" + +/* 256x___ -> 320x___, H32/mode 4, PAR 5:4, for PAL DAR 4:3 (wrong for NTSC) */ +void upscale_clut_nn_256_320x___(u8 *__restrict di, int ds, u8 *__restrict si, int ss, int height) +{ + int y; + + for (y = 0; y < height; y++) { + h_upscale_nn_4_5(di, ds, si, ss, 256, f_nop); + } +} + +void upscale_rgb_nn_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int height, u16 *pal) +{ + int y; + + for (y = 0; y < height; y++) { + h_upscale_nn_4_5(di, ds, si, ss, 256, f_pal); + } +} + +void upscale_rgb_snn_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int height, u16 *pal) +{ + int y; + + for (y = 0; y < height; y++) { + h_upscale_snn_4_5(di, ds, si, ss, 256, f_pal); + } +} + +void upscale_rgb_bln_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int height, u16 *pal) +{ + int y; + + for (y = 0; y < height; y++) { + h_upscale_bln_4_5(di, ds, si, ss, 256, f_pal); + } +} + +/* 256x224 -> 320x240, H32/mode 4, PAR 5:4, for NTSC DAR 4:3 (wrong for PAL) */ +void upscale_clut_nn_256_320x224_240(u8 *__restrict di, int ds, u8 *__restrict si, int ss) +{ + int y, j; + + /* 14:15, 0 1 2 3 4 5 6 6 7 8 9 10 11 12 13 */ + for (y = 0; y < 224; y += 14) { + /* lines 0-6 */ + for (j = 0; j < 7; j++) { + h_upscale_nn_4_5(di, ds, si, ss, 256, f_nop); + } + /* lines 8-14 */ + di += ds; + for (j = 0; j < 7; j++) { + h_upscale_nn_4_5(di, ds, si, ss, 256, f_nop); + } + /* line 7 */ + di += 8*ds; + v_copy(&di[0], &di[-ds], 320, f_nop); + di += 8*ds; + } +} + +void upscale_rgb_nn_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal) +{ + int y, j; + + for (y = 0; y < 224; y += 14) { + for (j = 0; j < 7; j++) { + h_upscale_nn_4_5(di, ds, si, ss, 256, f_pal); + } + di += ds; + for (j = 0; j < 7; j++) { + h_upscale_nn_4_5(di, ds, si, ss, 256, f_pal); + } + + di -= 8*ds; + v_copy(&di[0], &di[-ds], 320, f_nop); + di += 8*ds; + } +} + +void upscale_rgb_snn_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal) +{ + int y, j; + + /* 14:15, 0 1 2 3 4 5 5+6 6+7 7+8 8 9 10 11 12 13 */ + for (y = 0; y < 224; y += 14) { + for (j = 0; j < 7; j++) { + h_upscale_snn_4_5(di, ds, si, ss, 256, f_pal); + } + di += ds; + for (j = 0; j < 7; j++) { + h_upscale_snn_4_5(di, ds, si, ss, 256, f_pal); + } + + /* mix lines 6-8 */ + di -= 8*ds; + v_mix(&di[0], &di[-ds], &di[ds], 320, p_05, f_nop); + v_mix(&di[-ds], &di[-2*ds], &di[-ds], 320, p_05, f_nop); + v_mix(&di[ ds], &di[ ds], &di[ 2*ds], 320, p_05, f_nop); + di += 8*ds; + } +} + +void upscale_rgb_bln_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal) +{ + int y, j; + + /* 14:15, 0 1 2 2+3 3+4 4+5 5+6 6+7 7+8 8+9 9+10 10+11 11 12 13 */ + for (y = 0; y < 224; y += 14) { + /* lines 0-2 */ + for (j = 0; j < 3; j++) { + h_upscale_bln_4_5(di, ds, si, ss, 256, f_pal); + } + /* lines 3-14 mixing prep */ + di += ds; + for (j = 0; j < 11; j++) { + h_upscale_bln_4_5(di, ds, si, ss, 256, f_pal); + } + di -= 12*ds; + /* mixing line 3: line 2 = -ds, line 3 = +ds */ + v_mix(&di[0], &di[-ds], &di[ds], 320, p_025, f_nop); + di += ds; + /* mixing lines 4-5: line n = 0, line n+1 = +ds */ + for (j = 0; j < 2; j++) { + v_mix(&di[0], &di[0], &di[ds], 320, p_025, f_nop); + di += ds; + } + /* mixing lines 6-8 */ + for (j = 0; j < 3; j++) { + v_mix(&di[0], &di[0], &di[ds], 320, p_05, f_nop); + di += ds; + } + /* mixing lines 9-11 */ + for (j = 0; j < 3; j++) { + v_mix(&di[0], &di[0], &di[ds], 320, p_075, f_nop); + di += ds; + } + /* lines 12-14, already in place */ + di += 3*ds; + } +} + +/* 320x224 -> 320x240, PAR 1:1, for NTSC, DAR 4:3 (wrong for PAL) */ +void upscale_clut_nn_320x224_240(u8 *__restrict di, int ds, u8 *__restrict si, int ss) +{ + int y, j; + + for (y = 0; y < 224; y += 14) { + for (j = 0; j < 7; j++) { + h_copy(di, ds, si, ss, 320, f_nop); + } + di += ds; + for (j = 0; j < 7; j++) { + h_copy(di, ds, si, ss, 320, f_nop); + } + + di += 8*ds; + v_copy(&di[0], &di[-ds], 320, f_nop); + di += 8*ds; + + } +} + +void upscale_rgb_nn_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal) +{ + int y, j; + + for (y = 0; y < 224; y += 14) { + for (j = 0; j < 7; j++) { + h_copy(di, ds, si, ss, 320, f_pal); + } + di += ds; + for (j = 0; j < 7; j++) { + h_copy(di, ds, si, ss, 320, f_pal); + } + + di -= 8*ds; + v_copy(&di[0], &di[-ds], 320, f_nop); + di += 8*ds; + } +} + +void upscale_rgb_snn_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal) +{ + int y, j; + + for (y = 0; y < 224; y += 14) { + for (j = 0; j < 7; j++) { + h_copy(di, ds, si, ss, 320, f_pal); + } + di += ds; + for (j = 0; j < 7; j++) { + h_copy(di, ds, si, ss, 320, f_pal); + } + + di -= 8*ds; + v_mix(&di[ 0], &di[-ds], &di[ds], 320, p_05, f_nop); + v_mix(&di[-ds], &di[-2*ds], &di[-ds], 320, p_05, f_nop); + v_mix(&di[ ds], &di[ ds], &di[ 2*ds], 320, p_05, f_nop); + di += 8*ds; + } +} + +void upscale_rgb_bln_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal) +{ + int y, j; + + for (y = 0; y < 224; y += 14) { + for (j = 0; j < 3; j++) { + h_copy(di, ds, si, ss, 320, f_pal); + } + for (j = 0; j < 3; j++) { + v_mix(&di[0], &si[-ss], &si[0], 320, p_025, f_pal); + di += ds; + si += ss; + } + for (j = 0; j < 3; j++) { + v_mix(&di[0], &si[-ss], &si[0], 320, p_05, f_pal); + di += ds; + si += ss; + } + for (j = 0; j < 3; j++) { + v_mix(&di[0], &si[-ss], &si[0], 320, p_075, f_pal); + di += ds; + si += ss; + } + si -= ss; + for (j = 0; j < 3; j++) { + h_copy(di, ds, si, ss, 320, f_pal); + } + } +} + +/* 160x144 -> 320x240: GG, PAR 6:5, scaling to 320x240 for DAR 4:3 */ +/* NB for smoother image could scale to 288x216, x*9/5, y*3/2 ? + * h: 11111 11112 22222 22233 33333 33444 44444 45555 55555 + * 1 1 2 2+3 3 3+4 4 5 5 + * v: 11 12 22 + * 1 1+2 2 + */ +void upscale_clut_nn_160_320x144_240(u8 *__restrict di, int ds, u8 *__restrict si, int ss) +{ + int y, j; + + /* 3:5, 0 0 1 1 2 */ + for (y = 0; y < 144; y += 3) { + /* lines 0,2,4 */ + for (j = 0; j < 3; j++) { + h_upscale_nn_1_2(di, ds, si, ss, 160, f_nop); + di += ds; + } + /* lines 1,3 */ + di -= 5*ds; + for (j = 0; j < 2; j++) { + v_copy(&di[0], &di[-ds], 320, f_nop); + di += 2*ds; + } + } +} + +void upscale_rgb_nn_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal) +{ + int y, j; + + for (y = 0; y < 144; y += 3) { + for (j = 0; j < 3; j++) { + h_upscale_nn_1_2(di, ds, si, ss, 160, f_pal); + di += ds; + } + di -= 5*ds; + for (j = 0; j < 2; j++) { + v_copy(&di[0], &di[-ds], 320, f_nop); + di += 2*ds; + } + } +} + +void upscale_rgb_snn_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal) +{ + int y, j; + + /* 3:5, 0 0+1 1 1+2 2 */ + for (y = 0; y < 144; y += 3) { + for (j = 0; j < 3; j++) { + h_upscale_nn_1_2(di, ds, si, ss, 160, f_pal); + di += ds; + } + di -= 5*ds; + for (j = 0; j < 2; j++) { + v_mix(&di[0], &di[-ds], &di[ds], 320, p_05, f_nop); + di += 2*ds; + } + } +} + +void upscale_rgb_bln_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal) +{ + int y, j; + + for (y = 0; y < 144; y += 3) { + for (j = 0; j < 3; j++) { + h_upscale_nn_1_2(di, ds, si, ss, 160, f_pal); + di += ds; + } + di -= 5*ds; + v_mix(&di[0], &di[-ds], &di[ds], 320, p_075, f_nop); + di += 2*ds; + v_mix(&di[0], &di[-ds], &di[ds], 320, p_025, f_nop); + di += 2*ds; + } +} + diff --git a/platform/common/upscale.h b/platform/common/upscale.h new file mode 100644 index 00000000..bb284df4 --- /dev/null +++ b/platform/common/upscale.h @@ -0,0 +1,171 @@ +/* + * upscale.h image upscaling + * + * This file contains upscalers for picodrive. + * + * scaler types: + * nn: nearest neighbour + * snn: "smoothed" nearest neighbour (see below) + * bln: bilinear (using only 0.25, 0.5, 0.75 as weight for better performance) + * + * "smoothed" nearest neighbour: uses the average of the source pixels if no + * source pixel covers more than 65% of the result pixel. It definitely + * looks better than nearest neighbour and is still quite fast. It creates + * a sharper look than a bilinear filter, at the price of some visible jags + * on diagonal edges. + * [NB this has been brought to my attn, which is probably very similar: + * https://www.drdobbs.com/image-scaling-with-bresenham/184405045?pgno=1] + * + * scaling modes: + * 256x___ -> 320x___ only horizontal scaling. Produces an aspect error of + * ~7% for NTSC 224 line modes, but is correct for PAL + * 256/320x224/240 + * -> 320x240 always produces 320x240 at DAR 4:3 + * 160x144 -> 320x240 game gear (currently unused) + * + * (C) 2021 kub + */ +#include + +/* RGB565 pixel mixing, see https://www.compuphase.com/graphic/scale3.htm and + http://blargg.8bitalley.com/info/rgb_mixing.html */ +//#define p_05(p1,p2) (t=((p1)&(p2)) + ((((p1)^(p2))&~0x0821)>>1)) +#define p_05(p1,p2) (t=((p1)+(p2) + ( ((p1)^(p2))&0x0821))>>1) // round up +//#define p_05(p1,p2) (t=((p1)+(p2) - ( ((p1)^(p2))&0x0821))>>1) // round down +#define p_025(p1,p2) (t=((p1)+(p2) + ( ((p1)^(p2))&0x0821))>>1, \ + (( t)+(p2) + ( (( t)^(p2))&0x0821))>>1) +#define p_075(p1,p2) p_025(p2,p1) + +/* pixel transforms, result must be RGB565 */ +#define f_pal(v) pal[v] // convert CLUT index -> RGB565 +#define f_nop(v) (v) // source already in RGB565 + +/* +scalers h: +256->320: - (4:5) (256x224/240 -> 320x224/240) +160->320: - (1:2) 2x (160x144 -> 320x240, GG) +*/ + +/* scale 4:5 */ +#define h_upscale_nn_4_5(di,ds,si,ss,w,f) do { \ + /* 1111 1222 2233 3334 4444 */ \ + /* 1 2 2 3 4 */ \ + int i; \ + for (i = 0; i < w; i += 4, si += 4, di += 5) { \ + di[0] = f(si[0]); \ + di[1] = f(si[1]); \ + di[2] = f(si[1]); \ + di[3] = f(si[2]); \ + di[4] = f(si[3]); \ + } \ + di += ds - w/4*5; \ + si += ss - w; \ +} while (0) + +#define h_upscale_snn_4_5(di,ds,si,ss,w,f) do { \ + /* 1111 1222 2233 3334 4444 */ \ + /* 1 2 2+3 3 4 */ \ + int i, t; \ + for (i = 0; i < w; i += 4, si += 4, di += 5) { \ + di[0] = f(si[0]); \ + di[1] = f(si[1]); \ + di[2] = p_05(f(si[1]),f(si[2])); \ + di[3] = f(si[2]); \ + di[4] = f(si[3]); \ + } \ + di += ds - w/4*5; \ + si += ss - w; \ +} while (0) + +#define h_upscale_bln_4_5(di,ds,si,ss,w,f) do { \ + /* 1111 1222 2233 3334 4444 */ \ + /* 1 2+3 2+3 3+4 4 */ \ + int i, t; \ + for (i = 0; i < w; i += 4, si += 4, di += 5) { \ + di[0] = f(si[0]); \ + di[1] = p_025(f(si[0]),f(si[1])); \ + di[2] = p_05 (f(si[1]),f(si[2])); \ + di[3] = p_075(f(si[2]),f(si[3])); \ + di[4] = f(si[3]); \ + } \ + di += ds - w/4*5; \ + si += ss - w; \ +} while (0) + +/* scale 1:2 integer scale */ +#define h_upscale_nn_1_2(di,ds,si,ss,w,f) do { \ + int i; \ + for (i = 0; i < w; i += 2, si += 2, di += 4) { \ + di[0] = f(si[0]); \ + di[1] = f(si[0]); \ + di[2] = f(si[1]); \ + di[3] = f(si[1]); \ + } \ + di += ds - w*2; \ + si += ss - w; \ +} while (0) + +/* scale 1:1, copy */ +#define h_copy(di,ds,si,ss,w,f) do { \ + int i; \ + for (i = 0; i < w; i += 4, si += 4, di += 4) { \ + di[0] = f(si[0]); \ + di[1] = f(si[1]); \ + di[2] = f(si[2]); \ + di[3] = f(si[3]); \ + } \ + di += ds - w; \ + si += ss - w; \ +} while (0) + +/* +scalers v: +224->240: - (14:15) (256/320x224 -> 320x240) +144->240: - (3:5) (160x144 -> 320x240, GG) +*/ + +#define v_mix(di,li,ri,w,p_mix,f) do { \ + int i, t; \ + for (i = 0; i < w; i += 4) { \ + (di)[i ] = p_mix(f((li)[i ]), f((ri)[i ])); \ + (di)[i+1] = p_mix(f((li)[i+1]), f((ri)[i+1])); \ + (di)[i+2] = p_mix(f((li)[i+2]), f((ri)[i+2])); \ + (di)[i+3] = p_mix(f((li)[i+3]), f((ri)[i+3])); \ + } \ +} while (0) + +#define v_copy(di,ri,w,f) do { \ + int i; \ + for (i = 0; i < w; i += 4) { \ + (di)[i ] = f((ri)[i ]); \ + (di)[i+1] = f((ri)[i+1]); \ + (di)[i+2] = f((ri)[i+2]); \ + (di)[i+3] = f((ri)[i+3]); \ + } \ +} while (0) + + + +/* 256x___ -> 320x___, H32/mode 4, PAR 5:4, for PAL DAR 4:3 (wrong for NTSC) */ +void upscale_clut_nn_256_320x___(u8 *__restrict di, int ds, u8 *__restrict si, int ss, int height); +void upscale_rgb_nn_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int height, u16 *pal); +void upscale_rgb_snn_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int height, u16 *pal); +void upscale_rgb_bln_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int height, u16 *pal); + +/* 256x224 -> 320x240, H32/mode 4, PAR 5:4, for NTSC DAR 4:3 (wrong for PAL) */ +void upscale_clut_nn_256_320x224_240(u8 *__restrict di, int ds, u8 *__restrict si, int ss); +void upscale_rgb_nn_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); +void upscale_rgb_snn_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); +void upscale_rgb_bln_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); + +/* 320x224 -> 320x240, PAR 1:1, for NTSC, DAR 4:3 (wrong for PAL) */ +void upscale_clut_nn_320x224_240(u8 *__restrict di, int ds, u8 *__restrict si, int ss); +void upscale_rgb_nn_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); +void upscale_rgb_snn_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); +void upscale_rgb_bln_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); + +/* 160x144 -> 320x240: GG, PAR 6:5, scaling to 320x240 for DAR 4:3 */ +void upscale_clut_nn_160_320x144_240(u8 *__restrict di, int ds, u8 *__restrict si, int ss); +void upscale_rgb_nn_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); +void upscale_rgb_snn_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); +void upscale_rgb_bln_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal);