From: kub Date: Fri, 20 Aug 2021 17:55:36 +0000 (+0200) Subject: platform support, more upscaling basics X-Git-Tag: v2.00~497 X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ff42e515aea4534baedd235de6ad01797e87f7ee;p=picodrive.git platform support, more upscaling basics --- diff --git a/platform/common/upscale.c b/platform/common/upscale.c index a84b68c7..b84c096e 100644 --- a/platform/common/upscale.c +++ b/platform/common/upscale.c @@ -6,15 +6,17 @@ * scaler types: * nn: nearest neighbour * snn: "smoothed" nearest neighbour (see below) - * bln: bilinear (using only 0.25, 0.5, 0.75 as weight for better performance) + * bln: n-level-bilinear with n quantized weights + * quantization: 0: a<1/2*n, 1/n: 1/2*n<=a<3/2*n, etc + * currently n=2, n=4 are implemented (there's n=8 mixing, but no filters) + * [NB this has been brought to my attn, which is probably the same as bl2: + * https://www.drdobbs.com/image-scaling-with-bresenham/184405045?pgno=1] * * "smoothed" nearest neighbour: uses the average of the source pixels if no * source pixel covers more than 65% of the result pixel. It definitely * looks better than nearest neighbour and is still quite fast. It creates * a sharper look than a bilinear filter, at the price of some visible jags * on diagonal edges. - * [NB this has been brought to my attn, which is probably very similar: - * https://www.drdobbs.com/image-scaling-with-bresenham/184405045?pgno=1] * * scaling modes: * 256x___ -> 320x___ only horizontal scaling. Produces an aspect error of @@ -56,12 +58,21 @@ void upscale_rgb_snn_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, } } -void upscale_rgb_bln_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int height, u16 *pal) +void upscale_rgb_bl2_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int height, u16 *pal) { int y; for (y = 0; y < height; y++) { - h_upscale_bln_4_5(di, ds, si, ss, 256, f_pal); + h_upscale_bl2_4_5(di, ds, si, ss, 256, f_pal); + } +} + +void upscale_rgb_bl4_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int height, u16 *pal) +{ + int y; + + for (y = 0; y < height; y++) { + h_upscale_bl4_4_5(di, ds, si, ss, 256, f_pal); } } @@ -140,7 +151,7 @@ void upscale_rgb_bln_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict for (j = 0; j < 3; j++) { h_upscale_bln_4_5(di, ds, si, ss, 256, f_pal); } - /* lines 3-14 mixing prep */ + /* lines 3-11 mixing prep */ di += ds; for (j = 0; j < 11; j++) { h_upscale_bln_4_5(di, ds, si, ss, 256, f_pal); @@ -154,7 +165,7 @@ void upscale_rgb_bln_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict v_mix(&di[0], &di[0], &di[ds], 320, p_025, f_nop); di += ds; } - /* mixing lines 6-8 */ + /* mixing line 6-8 */ for (j = 0; j < 3; j++) { v_mix(&di[0], &di[0], &di[ds], 320, p_05, f_nop); di += ds; @@ -169,6 +180,120 @@ void upscale_rgb_bln_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict } } +void upscale_rgb_bl2_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal) +{ + int y, j; + + /* 14:15, 0 1 2 2+3 3+4 4+5 5+6 6+7 7+8 8+9 9+10 10 11 12 13 */ + for (y = 0; y < 224; y += 14) { + for (j = 0; j < 3; j++) { + h_upscale_bl2_4_5(di, ds, si, ss, 256, f_pal); + } + di += ds; + for (j = 0; j < 11; j++) { + h_upscale_bl2_4_5(di, ds, si, ss, 256, f_pal); + } + /* mix lines 3-10 */ + di -= 12*ds; + v_mix(&di[0], &di[-ds], &di[ds], 320, p_05, f_nop); + for (j = 0; j < 7; j++) { + di += ds; + v_mix(&di[0], &di[0], &di[ds], 320, p_05, f_nop); + } + di += 5*ds; + } +} + +void upscale_rgb_bl4_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal) +{ + int y, j; + + /* 14:15, 0 0+1 1+2 2+3 3+4 4+5 5+6 6+7 7+8 8+9 9+10 10+11 11+12 12 13 */ + for (y = 0; y < 224; y += 14) { + /* line 0 */ + h_upscale_bl4_4_5(di, ds, si, ss, 256, f_pal); + /* lines 1-14 mixing prep */ + di += ds; + for (j = 0; j < 13; j++) { + h_upscale_bl4_4_5(di, ds, si, ss, 256, f_pal); + } + di -= 14*ds; + /* mixing line 1: line 0 = -ds, line 1 = +ds */ + v_mix(&di[0], &di[-ds], &di[ds], 320, p_025, f_nop); + di += ds; + /* mixing lines 2-4: line n-1 = 0, line n = +ds */ + for (j = 0; j < 3; j++) { + v_mix(&di[0], &di[0], &di[ds], 320, p_025, f_nop); + di += ds; + } + /* mixing lines 5-8 */ + for (j = 0; j < 4; j++) { + v_mix(&di[0], &di[0], &di[ds], 320, p_05, f_nop); + di += ds; + } + /* mixing lines 9-12 */ + for (j = 0; j < 4; j++) { + v_mix(&di[0], &di[0], &di[ds], 320, p_075, f_nop); + di += ds; + } + /* lines 13-14, already in place */ + di += 2*ds; + } +} + +void upscale_rgb_bl8_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal) +{ + int y, j, d; + + /* 14:15, -1+0 0+1 1+2 2+3 3+4 4+5 5+6 6+7 7+8 8+9 9+10 10+11 11+12 12+13 13 */ + for (y = 0, d = ds; y < 224; y += 14, d = -ds) { + /* lines 0-14 mixing prep */ + di += ds; + for (j = 0; j < 14; j++) { + h_upscale_bl8_4_5(di, ds, si, ss, 256, f_pal); + } + di -= 15*ds; + /* mixing line 0: line 0 = -ds, line 1 = +ds */ + v_mix(&di[0], &di[d], &di[ds], 320, p_0125, f_nop); + di += ds; + /* mixing line 1: line 1 = 0, line 2 = +ds */ + v_mix(&di[0], &di[0], &di[ds], 320, p_0125, f_nop); + di += ds; + /* mixing lines 2-3: line n-1 = 0, line n = +ds */ + for (j = 0; j < 2; j++) { + v_mix(&di[0], &di[0], &di[ds], 320, p_025, f_nop); + di += ds; + } + /* mixing lines 4-5 */ + for (j = 0; j < 2; j++) { + v_mix(&di[0], &di[0], &di[ds], 320, p_0375, f_nop); + di += ds; + } + /* mixing lines 6-7 */ + for (j = 0; j < 2; j++) { + v_mix(&di[0], &di[0], &di[ds], 320, p_05, f_nop); + di += ds; + } + /* mixing lines 8-9 */ + for (j = 0; j < 2; j++) { + v_mix(&di[0], &di[0], &di[ds], 320, p_0625, f_nop); + di += ds; + } + /* mixing lines 10-11 */ + for (j = 0; j < 2; j++) { + v_mix(&di[0], &di[0], &di[ds], 320, p_075, f_nop); + di += ds; + } + /* mixing lines 12-13 */ + for (j = 0; j < 2; j++) { + v_mix(&di[0], &di[0], &di[ds], 320, p_0875, f_nop); + di += ds; + } + /* line 14, already in place */ + di += ds; + } +} + /* 320x224 -> 320x240, PAR 1:1, for NTSC, DAR 4:3 (wrong for PAL) */ void upscale_clut_nn_320x224_240(u8 *__restrict di, int ds, u8 *__restrict si, int ss) { @@ -230,7 +355,7 @@ void upscale_rgb_snn_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, } } -void upscale_rgb_bln_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal) +void upscale_rgb_bl2_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal) { int y, j; @@ -238,23 +363,41 @@ void upscale_rgb_bln_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, for (j = 0; j < 3; j++) { h_copy(di, ds, si, ss, 320, f_pal); } - for (j = 0; j < 3; j++) { + for (j = 0; j < 8; j++) { + v_mix(&di[0], &si[-ss], &si[0], 320, p_05, f_pal); + di += ds; + si += ss; + } + si -= ss; + for (j = 0; j < 4; j++) { + h_copy(di, ds, si, ss, 320, f_pal); + } + } +} + +void upscale_rgb_bl4_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal) +{ + int y, j; + + for (y = 0; y < 224; y += 14) { + h_copy(di, ds, si, ss, 320, f_pal); + for (j = 0; j < 4; j++) { v_mix(&di[0], &si[-ss], &si[0], 320, p_025, f_pal); di += ds; si += ss; } - for (j = 0; j < 3; j++) { + for (j = 0; j < 4; j++) { v_mix(&di[0], &si[-ss], &si[0], 320, p_05, f_pal); di += ds; si += ss; } - for (j = 0; j < 3; j++) { + for (j = 0; j < 4; j++) { v_mix(&di[0], &si[-ss], &si[0], 320, p_075, f_pal); di += ds; si += ss; } si -= ss; - for (j = 0; j < 3; j++) { + for (j = 0; j < 2; j++) { h_copy(di, ds, si, ss, 320, f_pal); } } @@ -322,19 +465,43 @@ void upscale_rgb_snn_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict } } -void upscale_rgb_bln_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal) +void upscale_rgb_bl2_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal) { int y, j; + /* 3:5, 0 0+1 1 1+2 2 */ for (y = 0; y < 144; y += 3) { for (j = 0; j < 3; j++) { - h_upscale_nn_1_2(di, ds, si, ss, 160, f_pal); + h_upscale_bl2_1_2(di, ds, si, ss, 160, f_pal); di += ds; } di -= 5*ds; - v_mix(&di[0], &di[-ds], &di[ds], 320, p_075, f_nop); + for (j = 0; j < 2; j++) { + v_mix(&di[0], &di[-ds], &di[ds], 320, p_05, f_nop); + di += 2*ds; + } + } +} + +void upscale_rgb_bl4_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal) +{ + int y, j, d; + + /* 3:5, -1+0, 0+1 0+1 1+2 2 + * for 1st block backwards reference virtually duplicate source line 0 */ + for (y = 0, d = 2*ds; y < 144; y += 3, d = -ds) { di += 2*ds; - v_mix(&di[0], &di[-ds], &di[ds], 320, p_025, f_nop); + for (j = 0; j < 3; j++) { + h_upscale_bl2_1_2(di, ds, si, ss, 160, f_pal); + } + di -= 5*ds; + v_mix(&di[0], &di[d ], &di[2*ds], 320, p_05, f_nop); /*-1+0 */ + di += ds; + v_mix(&di[0], &di[ds], &di[2*ds], 320, p_075, f_nop); /* 0+1 */ + di += ds; + v_mix(&di[0], &di[ 0], &di[ ds], 320, p_025, f_nop); /* 0+1 */ + di += ds; + v_mix(&di[0], &di[ 0], &di[ ds], 320, p_05, f_nop); /* 1+2 */ di += 2*ds; } } diff --git a/platform/common/upscale.h b/platform/common/upscale.h index 2c22edfe..58076610 100644 --- a/platform/common/upscale.h +++ b/platform/common/upscale.h @@ -6,41 +6,51 @@ * scaler types: * nn: nearest neighbour * snn: "smoothed" nearest neighbour (see below) - * bln: bilinear (using only 0.25, 0.5, 0.75 as weight for better performance) + * bln: n-level-bilinear with n quantized weights + * quantization: 0: a<1/2*n, 1/n: 1/2*n<=a<3/2*n, etc + * currently n=2, n=4 are implemented (there's n=8 mixing, but no filters) + * [NB this has been brought to my attn, which is probably the same as bl2: + * https://www.drdobbs.com/image-scaling-with-bresenham/184405045?pgno=1] * * "smoothed" nearest neighbour: uses the average of the source pixels if no * source pixel covers more than 65% of the result pixel. It definitely * looks better than nearest neighbour and is still quite fast. It creates * a sharper look than a bilinear filter, at the price of some visible jags * on diagonal edges. - * [NB this has been brought to my attn, which is probably very similar: - * https://www.drdobbs.com/image-scaling-with-bresenham/184405045?pgno=1] * * scaling modes: * 256x___ -> 320x___ only horizontal scaling. Produces an aspect error of * ~7% for NTSC 224 line modes, but is correct for PAL * 256/320x224/240 * -> 320x240 always produces 320x240 at DAR 4:3 - * 160x144 -> 320x240 game gear (currently unused) - * +* 160x144 -> 320x240 game gear (currently unused) +* * (C) 2021 kub */ #include /* RGB565 pixel mixing, see https://www.compuphase.com/graphic/scale3.htm and http://blargg.8bitalley.com/info/rgb_mixing.html */ -#define p_05(p1,p2) (t=((p1)&(p2)) + ((((p1)^(p2))&~0x0821)>>1)) -//#define p_05(p1,p2) (t=((p1)+(p2) + ( ((p1)^(p2))&0x0821))>>1) // round up -//#define p_05(p1,p2) (t=((p1)+(p2) - ( ((p1)^(p2))&0x0821))>>1) // round down -#define p_025(p1,p2) (t=((p1)&(p2)) + ((((p1)^(p2))&~0x0821)>>1), \ - (( t)&(p2)) + (((( t)^(p2))&~0x0821)>>1)) -//#define p_025(p1,p2) (t=((p1)+(p2) + ( ((p1)^(p2))&0x0821))>>1, \ -// (( t)+(p2) + ( (( t)^(p2))&0x0821))>>1) +/* 2-level mixing */ +//#define p_05(p1,p2) (((p1)+(p2) + ( ((p1)^(p2))&0x0821))>>1) // round up +//#define p_05(p1,p2) (((p1)+(p2) - ( ((p1)^(p2))&0x0821))>>1) // round down +#define p_05(p1,p2) (((p1)&(p2)) + ((((p1)^(p2))&~0x0821)>>1)) +/* 4-level mixing, 2 times slower */ +// 1/4*p1 + 3/4*p2 = 1/2*(1/2*(p1+p2) + p2) +#define p_025(p1,p2) (t=p_05(p1, p2), p_05( t, p2)) #define p_075(p1,p2) p_025(p2,p1) +/* 8-level mixing, 3 times slower */ +// 1/8*p1 + 7/8*p2 = 1/2*(1/2*(1/2*(p1+p2) + p2) + p2) +#define p_0125(p1,p2) (t=p_05(p1, p2), u=p_05( t, p2), p_05( u, p2)) +// 3/8*p1 + 5/8*p2 = 1/2*(1/2*(1/2*(p1+p2) + p2) + 1/2*(p1+p2)) +#define p_0375(p1,p2) (t=p_05(p1, p2), u=p_05( t, p2), p_05( u, t)) +#define p_0625(p1,p2) p_0375(p2,p1) +#define p_0875(p1,p2) p_0125(p2,p1) -/* pixel transforms, result must be RGB565 */ +/* pixel transforms */ #define f_pal(v) pal[v] // convert CLUT index -> RGB565 -#define f_nop(v) (v) // source already in RGB565 +#define f_nop(v) (v) // source already in dest format (CLUT/RGB) +#define f_or(v) (v|pal) // CLUT, add palette selection /* scalers h: @@ -52,8 +62,6 @@ scalers h: /* scale 4:5 */ #define h_upscale_nn_4_5(di,ds,si,ss,w,f) do { \ - /* 1111 1222 2233 3334 4444 */ \ - /* 1 2 2 3 4 */ \ int i; \ for (i = w/4; i > 0; i--, si += 4, di += 5) { \ di[0] = f(si[0]); \ @@ -66,10 +74,24 @@ scalers h: si += ss - w; \ } while (0) +// reverse version for overlapping buffers +#define rh_upscale_nn_4_5(di,ds,si,ss,w,f) do { \ + int i; \ + di += w/4*5; \ + si += w; \ + for (i = w/4; i > 0; i--, si -= 4, di -= 5) { \ + di[-1] = f(si[-1]); \ + di[-2] = f(si[-2]); \ + di[-3] = f(si[-3]); \ + di[-4] = f(si[-3]); \ + di[-5] = f(si[-4]); \ + } \ + di += ds; \ + si += ss; \ +} while (0) + #define h_upscale_snn_4_5(di,ds,si,ss,w,f) do { \ - /* 1111 1222 2233 3334 4444 */ \ - /* 1 2 2+3 3 4 */ \ - int i, t; \ + int i; \ for (i = w/4; i > 0; i--, si += 4, di += 5) { \ di[0] = f(si[0]); \ di[1] = f(si[1]); \ @@ -82,9 +104,7 @@ scalers h: } while (0) #define h_upscale_bln_4_5(di,ds,si,ss,w,f) do { \ - /* 1111 1222 2233 3334 4444 */ \ - /* 1 2+3 2+3 3+4 4 */ \ - int i, t; \ + int i, t; \ for (i = w/4; i > 0; i--, si += 4, di += 5) { \ di[0] = f(si[0]); \ di[1] = p_025(f(si[0]),f(si[1])); \ @@ -96,10 +116,47 @@ scalers h: si += ss - w; \ } while (0) +#define h_upscale_bl2_4_5(di,ds,si,ss,w,f) do { \ + int i; \ + for (i = w/4; i > 0; i--, si += 4, di += 5) { \ + di[0] = f(si[0]); \ + di[1] = p_05(f(si[0]),f(si[1])); \ + di[2] = p_05(f(si[1]),f(si[2])); \ + di[3] = f(si[2]); \ + di[4] = f(si[3]); \ + } \ + di += ds - w/4*5; \ + si += ss - w; \ +} while (0) + +#define h_upscale_bl4_4_5(di,ds,si,ss,w,f) do { \ + int i, t; uint p = f(si[0]); \ + for (i = w/4; i > 0; i--, si += 4, di += 5) { \ + di[0] = p_025(p, f(si[0])); \ + di[1] = p_05 (f(si[0]),f(si[1])); \ + di[2] = p_05 (f(si[1]),f(si[2])); \ + di[3] = p_075(f(si[2]),f(si[3])); \ + di[4] = p = f(si[3]); \ + } \ + di += ds - w/4*5; \ + si += ss - w; \ +} while (0) + +#define h_upscale_bl8_4_5(di,ds,si,ss,w,f) do { \ + int i, t, u; uint p = f(si[0]); \ + for (i = w/4; i > 0; i--, si += 4, di += 5) { \ + di[0] = p_025(p, f(si[0])); \ + di[1] = p_0375(f(si[0]),f(si[1])); \ + di[2] = p_0625(f(si[1]),f(si[2])); \ + di[3] = p_075(f(si[2]),f(si[3])); \ + di[4] = p = f(si[3]); \ + } \ + di += ds - w/4*5; \ + si += ss - w; \ +} while (0) + /* scale 6:7 */ #define h_upscale_nn_6_7(di,ds,si,ss,w,f) do { \ - /* 111111 122222 223333 333444 444455 555556 666666 */ \ - /* 1 2 3 3 4 5 6 */ \ int i; \ for (i = w/6; i > 0; i--, si += 6, di += 7) { \ di[0] = f(si[0]); \ @@ -114,10 +171,26 @@ scalers h: si += ss - w; \ } while (0) +// reverse version for overlapping buffers +#define rh_upscale_nn_6_7(di,ds,si,ss,w,f) do { \ + int i; \ + di += w/6*7; \ + si += w; \ + for (i = w/6; i > 0; i--, si -= 6, di -= 7) { \ + di[-1] = f(si[-1]); \ + di[-2] = f(si[-2]); \ + di[-3] = f(si[-3]); \ + di[-4] = f(si[-4]); \ + di[-5] = f(si[-4]); \ + di[-6] = f(si[-5]); \ + di[-7] = f(si[-6]); \ + } \ + di += ds; \ + si += ss; \ +} while (0) + #define h_upscale_snn_6_7(di,ds,si,ss,w,f) do { \ - /* 111111 122222 223333 333444 444455 555556 666666 */ \ - /* 1 2 3 3+4 4 5 6 */ \ - int i, t; \ + int i; \ for (i = w/6; i > 0; i--, si += 6, di += 7) { \ di[0] = f(si[0]); \ di[1] = f(si[1]); \ @@ -131,16 +204,14 @@ scalers h: si += ss - w; \ } while (0) -#define h_upscale_bln_6_7(di,ds,si,ss,w,f) do { \ - /* 111111 122222 223333 333444 444455 555556 666666 */ \ - /* 1 2 2+3 3+4 4+5 5 6 */ \ - int i, t; \ +#define h_upscale_bl2_6_7(di,ds,si,ss,w,f) do { \ + int i; \ for (i = w/6; i > 0; i--, si += 6, di += 7) { \ di[0] = f(si[0]); \ - di[1] = f(si[1]); \ - di[2] = p_025(f(si[1]),f(si[2])); \ - di[3] = p_05 (f(si[3]),f(si[3])); \ - di[4] = p_075(f(si[2]),f(si[4])); \ + di[1] = p_05(f(si[0]),f(si[1])); \ + di[2] = p_05(f(si[1]),f(si[2])); \ + di[3] = p_05(f(si[2]),f(si[3])); \ + di[4] = p_05(f(si[3]),f(si[4])); \ di[5] = f(si[4]); \ di[6] = f(si[5]); \ } \ @@ -148,10 +219,23 @@ scalers h: si += ss - w; \ } while (0) +#define h_upscale_bl4_6_7(di,ds,si,ss,w,f) do { \ + int i, t; uint p = f(si[0]); \ + for (i = w/6; i > 0; i--, si += 6, di += 7) { \ + di[0] = p_025(p,f(si[0])); \ + di[1] = p_025(f(si[0]),f(si[1])); \ + di[2] = p_05 (f(si[1]),f(si[2])); \ + di[3] = p_05 (f(si[2]),f(si[3])); \ + di[4] = p_075(f(si[3]),f(si[4])); \ + di[5] = p_075(f(si[4]),f(si[5])); \ + di[6] = p = f(si[5]); \ + } \ + di += ds - w/6*7; \ + si += ss - w; \ +} while (0) + /* scale 5:9 */ #define h_upscale_nn_5_9(di,ds,si,ss,w,f) do { \ - /* 11111 11112 22222 22233 33333 33444 44444 45555 55555 */ \ - /* 1 1 2 2 3 4 4 5 5 */ \ int i; \ for (i = w/5; i > 0; i--, si += 5, di += 9) { \ di[0] = f(si[0]); \ @@ -169,9 +253,7 @@ scalers h: } while (0) #define h_upscale_snn_5_9(di,ds,si,ss,w,f) do { \ - /* 11111 11112 22222 22233 33333 33444 44444 45555 55555 */ \ - /* 1 1 2 2+3 3 3+4 4 5 5 */ \ - int i, t; \ + int i; \ for (i = w/5; i > 0; i--, si += 5, di += 9) { \ di[0] = f(si[0]); \ di[1] = f(si[0]); \ @@ -187,25 +269,40 @@ scalers h: si += ss - w; \ } while (0) -#define h_upscale_bln_5_9(di,ds,si,ss,w,f) do { \ - /* 11111 11112 22222 22233 33333 33444 44444 45555 55555 */ \ - /* 1 1+2 2 2+3 3 3+4 4 4+5 5 */ \ - int i, t; \ +#define h_upscale_bl2_5_9(di,ds,si,ss,w,f) do { \ + int i; \ for (i = w/5; i > 0; i--, si += 5, di += 9) { \ di[0] = f(si[0]); \ - di[1] = p_075(f(si[0]),f(si[1])); \ + di[1] = p_05(f(si[0]),f(si[1])); \ di[2] = f(si[1]); \ - di[3] = p_075(f(si[1]),f(si[2])); \ + di[3] = p_05(f(si[1]),f(si[2])); \ di[4] = f(si[2]); \ - di[5] = p_025(f(si[2]),f(si[3])); \ + di[5] = p_05(f(si[2]),f(si[3])); \ di[6] = f(si[3]); \ - di[5] = p_025(f(si[3]),f(si[4])); \ + di[7] = p_05(f(si[3]),f(si[4])); \ di[8] = f(si[4]); \ } \ di += ds - w/5*9; \ si += ss - w; \ } while (0) +#define h_upscale_bl4_5_9(di,ds,si,ss,w,f) do { \ + int i, t; uint p = f(si[0]); \ + for (i = w/5; i > 0; i--, si += 5, di += 9) { \ + di[0] = p_05 (p,f(si[0])); \ + di[1] = f(si[0]); \ + di[2] = p_025(f(si[0]),f(si[1])); \ + di[3] = p_075(f(si[1]),f(si[2])); \ + di[4] = p_025(f(si[1]),f(si[2])); \ + di[5] = p_075(f(si[2]),f(si[3])); \ + di[6] = f(si[3]); \ + di[7] = p_05 (f(si[3]),f(si[4])); \ + di[8] = p = f(si[4]); \ + } \ + di += ds - w/5*9; \ + si += ss - w; \ +} while (0) + /* scale 1:2 integer scale */ #define h_upscale_nn_1_2(di,ds,si,ss,w,f) do { \ int i; \ @@ -219,6 +316,18 @@ scalers h: si += ss - w; \ } while (0) +#define h_upscale_bl2_1_2(di,ds,si,ss,w,f) do { \ + int i; uint p = f(si[0]); \ + for (i = w/2; i > 0; i--, si += 2, di += 4) { \ + di[0] = p_05 (p, f(si[0])); \ + di[1] = f(si[0]); \ + di[2] = p_05 (f(si[0]), f(si[1])); \ + di[3] = p = f(si[1]); \ + } \ + di += ds - w*2; \ + si += ss - w; \ +} while (0) + /* scale 1:1, copy */ #define h_copy(di,ds,si,ss,w,f) do { \ int i; \ @@ -241,7 +350,7 @@ scalers v: */ #define v_mix(di,li,ri,w,p_mix,f) do { \ - int i, t; \ + int i, t, u; (void)t, (void)u; \ for (i = 0; i < w; i += 4) { \ (di)[i ] = p_mix(f((li)[i ]), f((ri)[i ])); \ (di)[i+1] = p_mix(f((li)[i+1]), f((ri)[i+1])); \ @@ -266,22 +375,26 @@ scalers v: void upscale_clut_nn_256_320x___(u8 *__restrict di, int ds, u8 *__restrict si, int ss, int height); void upscale_rgb_nn_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int height, u16 *pal); void upscale_rgb_snn_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int height, u16 *pal); -void upscale_rgb_bln_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int height, u16 *pal); +void upscale_rgb_bl2_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int height, u16 *pal); +void upscale_rgb_bl4_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int height, u16 *pal); /* 256x224 -> 320x240, H32/mode 4, PAR 5:4, for NTSC DAR 4:3 (wrong for PAL) */ void upscale_clut_nn_256_320x224_240(u8 *__restrict di, int ds, u8 *__restrict si, int ss); void upscale_rgb_nn_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); void upscale_rgb_snn_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); -void upscale_rgb_bln_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); +void upscale_rgb_bl2_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); +void upscale_rgb_bl4_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); /* 320x224 -> 320x240, PAR 1:1, for NTSC, DAR 4:3 (wrong for PAL) */ void upscale_clut_nn_320x224_240(u8 *__restrict di, int ds, u8 *__restrict si, int ss); void upscale_rgb_nn_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); void upscale_rgb_snn_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); -void upscale_rgb_bln_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); +void upscale_rgb_bl2_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); +void upscale_rgb_bl4_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); /* 160x144 -> 320x240: GG, PAR 6:5, scaling to 320x240 for DAR 4:3 */ void upscale_clut_nn_160_320x144_240(u8 *__restrict di, int ds, u8 *__restrict si, int ss); void upscale_rgb_nn_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); void upscale_rgb_snn_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); -void upscale_rgb_bln_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); +void upscale_rgb_bl2_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal); +void upscale_rgb_bl4_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal);