From: notaz Date: Tue, 30 Dec 2025 21:59:19 +0000 (+0200) Subject: gpu: rework downscale stuff X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b7354977448629cc7e8b5c7879dbc9d22f8f03cf;p=pcsx_rearmed.git gpu: rework downscale stuff no more need for extra buffer, works with all plugins --- diff --git a/frontend/cspace.c b/frontend/cspace.c index 8249f80f..c9d5a51a 100644 --- a/frontend/cspace.c +++ b/frontend/cspace.c @@ -20,10 +20,39 @@ #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define SWAP16(x) __builtin_bswap16(x) #define LE16TOHx2(x) ((SWAP16((x) >> 16) << 16) | SWAP16(x)) +#define LE32TOH(x) __builtin_bswap32(x) #else #define LE16TOHx2(x) (x) +#define LE32TOH(x) (x) #endif +static inline uint32_t bgr555_to_rgb565_pair(uint32_t p) +{ + uint32_t r, g, b; + r = (p & 0x001f001f) << 11; + g = (p & 0x03e003e0) << 1; + b = (p & 0x7c007c00) >> 10; + return r | g | b; +} + +static inline uint32_t bgr888_to_rgb565_pair(const uint8_t * __restrict__ src, int o2) +{ + uint32_t r1, g1, b1, r2, g2, b2; + r1 = src[0] & 0xf8; + g1 = src[1] & 0xfc; + b1 = src[2] & 0xf8; + r2 = src[o2 + 0] & 0xf8; + g2 = src[o2 + 1] & 0xfc; + b2 = src[o2 + 2] & 0xf8; +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return (r1 << 24) | (g1 << 19) | (b1 << 13) | + (r2 << 8) | (g2 << 3) | (b2 >> 3); +#else + return (r2 << 24) | (g2 << 19) | (b2 << 13) | + (r1 << 8) | (g1 << 3) | (b1 >> 3); +#endif +} + #if defined(HAVE_bgr555_to_rgb565) /* have bgr555_to_rgb565 somewhere else */ @@ -56,13 +85,14 @@ typedef uint16_t gvu16u __attribute__((vector_size(16),aligned(2))); gsri(d_, s1, 11); \ } -void bgr555_to_rgb565(void * __restrict__ dst_, const void * __restrict__ src_, int bytes) +void bgr555_to_rgb565(void * __restrict__ dst_, const void * __restrict__ src_, + int pixels) { const uint16_t * __restrict__ src = src_; uint16_t * __restrict__ dst = dst_; gvu16 c0x07c0 = gdup(0x07c0); - assert(!(((uintptr_t)dst | (uintptr_t)src | bytes) & 1)); + assert(!(((uintptr_t)dst | (uintptr_t)src) & 1)); // align the destination if ((uintptr_t)dst & 0x0e) @@ -73,10 +103,10 @@ void bgr555_to_rgb565(void * __restrict__ dst_, const void * __restrict__ src_, *(gvu16u *)dst = d; dst += left / 2; src += left / 2; - bytes -= left; + pixels -= left / 2; } // go - for (; bytes >= 16; dst += 8, src += 8, bytes -= 16) + for (; pixels >= 8; dst += 8, src += 8, pixels -= 8) { gvu16 d, s = *(const gvu16u *)src; do_one_simd(d, s, c0x07c0); @@ -84,7 +114,7 @@ void bgr555_to_rgb565(void * __restrict__ dst_, const void * __restrict__ src_, __builtin_prefetch(src + 128/2); } // finish it - for (; bytes > 0; dst++, src++, bytes -= 2) + for (; pixels > 0; dst++, src++, pixels--) *dst = do_one(*src); } #undef do_one @@ -92,79 +122,217 @@ void bgr555_to_rgb565(void * __restrict__ dst_, const void * __restrict__ src_, #else -void bgr555_to_rgb565(void *dst_, const void *src_, int bytes) +void bgr555_to_rgb565(void * __restrict__ dst_, const void * __restrict__ src_, + int pixels) { // source can be misaligned, but it's very rare, so just force - const unsigned int *src = (const void *)((intptr_t)src_ & ~3); - unsigned int *dst = dst_; - unsigned int x, p, r, g, b; + const uint32_t * __restrict__ src = (const void *)((intptr_t)src_ & ~3); + uint32_t x, * __restrict__ dst = dst_; - for (x = 0; x < bytes / 4; x++) { - p = LE16TOHx2(src[x]); - - r = (p & 0x001f001f) << 11; - g = (p & 0x03e003e0) << 1; - b = (p & 0x7c007c00) >> 10; - - dst[x] = r | g | b; - } + for (x = 0; x < pixels / 2; x++) + dst[x] = bgr555_to_rgb565_pair(LE16TOHx2(src[x])); } #endif +static inline void bgr888_to_rgb888_one(uint8_t * __restrict__ dst, + const uint8_t * __restrict__ src) +{ + dst[0] = src[2]; + dst[1] = src[1]; + dst[2] = src[0]; +} + #ifndef HAVE_bgr888_to_x -void attr_weak bgr888_to_rgb565(void *dst_, const void *src_, int bytes) +void attr_weak bgr888_to_rgb565(void * __restrict__ dst_, + const void * __restrict__ src_, int pixels) { - const unsigned char *src = src_; - unsigned int *dst = dst_; - unsigned int r1, g1, b1, r2, g2, b2; - - for (; bytes >= 6; bytes -= 6, src += 6, dst++) { - r1 = src[0] & 0xf8; - g1 = src[1] & 0xfc; - b1 = src[2] & 0xf8; - r2 = src[3] & 0xf8; - g2 = src[4] & 0xfc; - b2 = src[5] & 0xf8; -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - *dst = (r1 << 24) | (g1 << 19) | (b1 << 13) | - (r2 << 8) | (g2 << 3) | (b2 >> 3); -#else - *dst = (r2 << 24) | (g2 << 19) | (b2 << 13) | - (r1 << 8) | (g1 << 3) | (b1 >> 3); -#endif - } + const uint8_t * __restrict__ src = src_; + uint32_t * __restrict__ dst = dst_; + + for (; pixels >= 2; pixels -= 2, src += 3*2, dst++) + *dst = bgr888_to_rgb565_pair(src, 3); } // TODO? -void rgb888_to_rgb565(void *dst, const void *src, int bytes) {} -void bgr888_to_rgb888(void *dst, const void *src, int bytes) {} +void rgb888_to_rgb565(void *dst, const void *src, int pixels) {} + +void bgr888_to_rgb888(void * __restrict__ dst_, + const void * __restrict__ src_, int pixels) +{ + const uint8_t * __restrict__ src = src_; + uint8_t * __restrict__ dst = dst_; + + for (; pixels >= 1; pixels--, src += 3, dst += 3) + bgr888_to_rgb888_one(dst, src); +} #endif // HAVE_bgr888_to_x -void bgr555_to_xrgb8888(void * __restrict__ dst_, const void * __restrict__ src_, int bytes) +static inline uint32_t bgr555_to_xrgb8888_one(uint16_t p) +{ + uint32_t t = ((p << 19) | (p >> 7)) & 0xf800f8; + t |= (p << 6) & 0xf800; + return t | ((t >> 5) & 0x070707); +} + +static inline uint32_t bgr888_to_xrgb8888_one(const uint8_t * __restrict__ src) +{ + return (src[0] << 16) | (src[1] << 8) | src[2]; +} + +void bgr555_to_xrgb8888(void * __restrict__ dst_, + const void * __restrict__ src_, int pixels) { const uint16_t * __restrict__ src = src_; uint32_t * __restrict__ dst = dst_; - for (; bytes >= 2; bytes -= 2, src++, dst++) - { - uint32_t t = ((*src << 19) | (*src >> 7)) & 0xf800f8; - t |= (*src << 6) & 0xf800; - *dst = t | ((t >> 5) & 0x070707); + for (; pixels >= 1; pixels--, src++, dst++) + *dst = bgr555_to_xrgb8888_one(*src); +} + +void bgr888_to_xrgb8888(void * __restrict__ dst_, + const void * __restrict__ src_, int pixels) +{ + const uint8_t * __restrict__ src = src_; + uint32_t * __restrict__ dst = dst_; + + for (; pixels >= 1; pixels--, src += 3, dst++) + *dst = bgr888_to_xrgb8888_one(src); +} + +/* downscale */ +void bgr555_to_rgb565_640_to_320(void * __restrict__ dst_, + const void * __restrict__ src_, int dpixels) +{ + const uint16_t * __restrict__ src = src_; + uint32_t * __restrict__ dst = dst_; + + for (; dpixels >= 2; dpixels -= 2, src += 4, dst++) { + uint32_t p = LE32TOH(src[0] | (src[2] << 16)); + *dst = bgr555_to_rgb565_pair(p); } } -void bgr888_to_xrgb8888(void * __restrict__ dst_, const void * __restrict__ src_, int bytes) +void bgr888_to_rgb565_640_to_320(void * __restrict__ dst_, + const void * __restrict__ src_, int dpixels) { const uint8_t * __restrict__ src = src_; uint32_t * __restrict__ dst = dst_; - for (; bytes >= 3; bytes -= 3, src += 3, dst++) + for (; dpixels >= 2; dpixels -= 2, src += 4*3, dst++) + *dst = bgr888_to_rgb565_pair(src, 2*3); +} + +void bgr888_to_rgb888_640_to_320(void * __restrict__ dst_, + const void * __restrict__ src_, int dpixels) +{ + const uint8_t * __restrict__ src = src_; + uint8_t * __restrict__ dst = dst_; + + for (; dpixels >= 1; dpixels--, src += 2*3, dst += 3) + bgr888_to_rgb888_one(dst, src); +} + +void bgr555_to_xrgb8888_640_to_320(void * __restrict__ dst_, + const void * __restrict__ src_, int dpixels) +{ + const uint16_t * __restrict__ src = src_; + uint32_t * __restrict__ dst = dst_; + + for (; dpixels >= 1; dpixels--, src += 2, dst++) + *dst = bgr555_to_xrgb8888_one(*src); +} + +void bgr888_to_xrgb8888_640_to_320(void * __restrict__ dst_, + const void * __restrict__ src_, int dpixels) +{ + const uint8_t * __restrict__ src = src_; + uint32_t * __restrict__ dst = dst_; + + for (; dpixels >= 1; dpixels--, src += 3*2, dst++) *dst = (src[0] << 16) | (src[1] << 8) | src[2]; } +void bgr555_to_rgb565_512_to_320(void * __restrict__ dst_, + const void * __restrict__ src_, int dpixels) +{ + const uint16_t * __restrict__ src = src_; + uint32_t * __restrict__ dst = dst_; + + // 16 -> 10 to keep dst aligned + for (; dpixels >= 10; dpixels -= 10, src += 16, dst += 5) { + // picks a src pixel nearest to the center of the dst pixel + dst[0] = bgr555_to_rgb565_pair(LE32TOH(src[0] | (src[2] << 16))); + dst[1] = bgr555_to_rgb565_pair(LE32TOH(src[4] | (src[5] << 16))); + dst[2] = bgr555_to_rgb565_pair(LE32TOH(src[7] | (src[8] << 16))); + dst[3] = bgr555_to_rgb565_pair(LE32TOH(src[10] | (src[12] << 16))); + dst[4] = bgr555_to_rgb565_pair(LE32TOH(src[13] | (src[15] << 16))); + } +} + +void bgr888_to_rgb565_512_to_320(void * __restrict__ dst_, + const void * __restrict__ src_, int dpixels) +{ + const uint8_t * __restrict__ src = src_; + uint32_t * __restrict__ dst = dst_; + + for (; dpixels >= 10; dpixels -= 10, src += 16*3, dst += 5) { + dst[0] = bgr888_to_rgb565_pair(src + 3*0, 3*2); + dst[1] = bgr888_to_rgb565_pair(src + 3*4, 3*5); + dst[2] = bgr888_to_rgb565_pair(src + 3*7, 3*8); + dst[3] = bgr888_to_rgb565_pair(src + 3*10, 3*12); + dst[4] = bgr888_to_rgb565_pair(src + 3*13, 3*15); + } +} + +void bgr888_to_rgb888_512_to_320(void * __restrict__ dst_, + const void * __restrict__ src_, int dpixels) +{ + const uint8_t * __restrict__ src = src_; + uint8_t * __restrict__ dst = dst_; + + for (; dpixels >= 5; dpixels -= 5, src += 8*3, dst += 5*3) { + bgr888_to_rgb888_one(dst + 3*0, src + 3*0); + bgr888_to_rgb888_one(dst + 3*1, src + 3*2); + bgr888_to_rgb888_one(dst + 3*2, src + 3*4); + bgr888_to_rgb888_one(dst + 3*3, src + 3*5); + bgr888_to_rgb888_one(dst + 3*4, src + 3*7); + } +} + +void bgr555_to_xrgb8888_512_to_320(void * __restrict__ dst_, + const void * __restrict__ src_, int dpixels) +{ + const uint16_t * __restrict__ src = src_; + uint32_t * __restrict__ dst = dst_; + + // 8 -> 5 + for (; dpixels >= 5; dpixels -= 5, src += 8, dst += 5) { + dst[0] = bgr555_to_xrgb8888_one(src[0]); + dst[1] = bgr555_to_xrgb8888_one(src[2]); + dst[2] = bgr555_to_xrgb8888_one(src[4]); + dst[3] = bgr555_to_xrgb8888_one(src[5]); + dst[4] = bgr555_to_xrgb8888_one(src[7]); + } +} + +void bgr888_to_xrgb8888_512_to_320(void * __restrict__ dst_, + const void * __restrict__ src_, int dpixels) +{ + const uint8_t * __restrict__ src = src_; + uint32_t * __restrict__ dst = dst_; + + for (; dpixels >= 5; dpixels -= 5, src += 8*3, dst += 5) { + dst[0] = bgr888_to_xrgb8888_one(src + 0*3); + dst[1] = bgr888_to_xrgb8888_one(src + 2*3); + dst[2] = bgr888_to_xrgb8888_one(src + 4*3); + dst[3] = bgr888_to_xrgb8888_one(src + 5*3); + dst[4] = bgr888_to_xrgb8888_one(src + 7*3); + } +} + /* YUV stuff */ static int yuv_ry[32], yuv_gy[32], yuv_by[32]; static unsigned char yuv_u[32 * 2], yuv_v[32 * 2]; diff --git a/frontend/cspace.h b/frontend/cspace.h index e5931960..55325f9f 100644 --- a/frontend/cspace.h +++ b/frontend/cspace.h @@ -6,16 +6,27 @@ extern "C" { #endif -void bgr555_to_rgb565(void *dst, const void *src, int bytes); -void bgr888_to_rgb888(void *dst, const void *src, int bytes); -void bgr888_to_rgb565(void *dst, const void *src, int bytes); -void rgb888_to_rgb565(void *dst, const void *src, int bytes); +void bgr555_to_rgb565(void *dst, const void *src, int pixels); +void bgr888_to_rgb888(void *dst, const void *src, int pixels); +void bgr888_to_rgb565(void *dst, const void *src, int pixels); +void rgb888_to_rgb565(void *dst, const void *src, int pixels); -void bgr555_to_rgb565_b(void *dst, const void *src, int bytes, +void bgr555_to_rgb565_b(void *dst, const void *src, int pixels, int brightness2k); // 0-0x0800 -void bgr555_to_xrgb8888(void *dst, const void *src, int bytes); -void bgr888_to_xrgb8888(void *dst, const void *src, int bytes); +void bgr555_to_xrgb8888(void *dst, const void *src, int pixels); +void bgr888_to_xrgb8888(void *dst, const void *src, int pixels); + +void bgr555_to_rgb565_640_to_320(void *dst, const void *src, int dst_pixels); +void bgr888_to_rgb565_640_to_320(void *dst, const void *src, int dst_pixels); +void bgr888_to_rgb888_640_to_320(void *dst, const void *src, int dst_pixels); +void bgr555_to_xrgb8888_640_to_320(void *dst, const void *src, int dst_pixels); +void bgr888_to_xrgb8888_640_to_320(void *dst, const void *src, int dst_pixels); +void bgr555_to_rgb565_512_to_320(void *dst, const void *src, int dst_pixels); +void bgr888_to_rgb565_512_to_320(void *dst, const void *src, int dst_pixels); +void bgr888_to_rgb888_512_to_320(void *dst, const void *src, int dst_pixels); +void bgr555_to_xrgb8888_512_to_320(void *dst, const void *src, int dst_pixels); +void bgr888_to_xrgb8888_512_to_320(void *dst, const void *src, int dst_pixels); void bgr_to_uyvy_init(void); void rgb565_to_uyvy(void *d, const void *s, int pixels); diff --git a/frontend/cspace_arm.S b/frontend/cspace_arm.S index 41b1e691..0d09a736 100644 --- a/frontend/cspace_arm.S +++ b/frontend/cspace_arm.S @@ -34,8 +34,9 @@ #endif .endm -FUNCTION(bgr555_to_rgb565): @ void *dst, const void *src, int bytes +FUNCTION(bgr555_to_rgb565): @ void *dst, const void *src, int pixels push {r4-r11,lr} + mov r2, r2, lsl #1 mov lr, #0x001f subs r2, #4*8 orr lr, lr, lsl #16 @@ -119,7 +120,7 @@ FUNCTION(bgr888_to_rgb565): orr r7, r7, r8 @ r3g3b3 pkhbt r7, r6, r7, lsl #16 str r7, [r0], #4 - subs r2, r2, #12 + subs r2, r2, #4 bgt 0b pop {r4-r10,pc} diff --git a/frontend/cspace_neon.S b/frontend/cspace_neon.S index 3a89fdb9..923651ab 100644 --- a/frontend/cspace_neon.S +++ b/frontend/cspace_neon.S @@ -19,22 +19,22 @@ .text .align 2 -FUNCTION(bgr555_to_rgb565): @ dst, src, bytes - pld [r1] +FUNCTION(bgr555_to_rgb565): @ dst, src, pixels + pld [r1, #2048] mov r3, #0x07c0 vdup.16 q15, r3 tst r0, #8 beq 0f @ align the dst vld1.16 {d0}, [r1]! - sub r2, r2, #8 + sub r2, r2, #4 vshl.u16 d0, d0, #1 vshl.u16 d1, d0, #10 vsri.u16 d1, d0, #11 vbit d1, d0, d30 vst1.16 {d1}, [r0]! 0: - subs r2, r2, #64 + subs r2, r2, #32 blt btr16_end64 0: pld [r1, #64*2] @@ -69,13 +69,13 @@ FUNCTION(bgr555_to_rgb565): @ dst, src, bytes vbit q10, q2, q15 vbit q11, q3, q15 vstmia r0!, {q8-q11} - subs r2, r2, #64 + subs r2, r2, #32 bge 0b btr16_end64: - adds r2, r2, #64 + adds r2, r2, #32 bxeq lr - subs r2, r2, #16 + subs r2, r2, #8 blt btr16_end16 @ handle the remainder (reasonably rare) @@ -85,14 +85,14 @@ btr16_end64: vshl.u16 q1, q0, #10 vsri.u16 q1, q0, #11 vbit q1, q0, q15 - subs r2, r2, #16 + subs r2, r2, #8 vst1.16 {q1}, [r0]! bge 0b btr16_end16: - adds r2, r2, #16 + adds r2, r2, #8 bxeq lr - subs r2, r2, #8 + subs r2, r2, #4 bxlt lr @ very rare @@ -106,8 +106,8 @@ btr16_end16: @ note: may overflow source -FUNCTION(bgr555_to_rgb565_b): @ dst, src, bytes, int brightness2k // 0-0x0800 - pld [r1] +FUNCTION(bgr555_to_rgb565_b): @ dst, src, pixels, int brightness2k // 0-0x0800 + pld [r1, #2048] vdup.16 q15, r3 vpush {q4-q7} mov r3, #0x1f @@ -159,7 +159,7 @@ FUNCTION(bgr555_to_rgb565_b): @ dst, src, bytes, int brightness2k // 0-0x0800 vsri.u16 q6, q10, #11 vsri.u16 q7, q11, #11 - subs r2, r2, #64 + subs r2, r2, #32 ble 1f vstmia r0!, {q4-q7} b 0b @@ -169,16 +169,16 @@ FUNCTION(bgr555_to_rgb565_b): @ dst, src, bytes, int brightness2k // 0-0x0800 vstmia r0!, {q4-q7} b btr16b_end 0: - subs r2, r2, #8 + subs r2, r2, #4 blt btr16b_end vst1.16 {q4}, [r0]! - subs r2, r2, #8 + subs r2, r2, #4 blt btr16b_end vst1.16 {q5}, [r0]! - subs r2, r2, #8 + subs r2, r2, #4 blt btr16b_end vst1.16 {q6}, [r0]! - subs r2, r2, #8 + subs r2, r2, #4 blt btr16b_end vst1.16 {q7}, [r0]! @@ -187,13 +187,10 @@ btr16b_end: bx lr -FUNCTION(bgr888_to_rgb888): @ dst, src, bytes - pld [r1] - @ r2 /= 48 +FUNCTION(bgr888_to_rgb888): @ dst, src, pixels + pld [r1, #2048] + @ r2 /= 16 mov r2, r2, lsr #4 - movw r3, #0x5556 - movt r3, #0x5555 - umull r12,r2, r3, r2 0: pld [r1, #48*3] vld3.8 {d0-d2}, [r1]! @@ -203,19 +200,16 @@ FUNCTION(bgr888_to_rgb888): @ dst, src, bytes vst3.8 {d0-d2}, [r0, :64]! vst3.8 {d3-d5}, [r0, :64]! subs r2, r2, #1 - bne 0b + bgt 0b + nop bx lr -FUNCTION(bgr888_to_rgb565): @ dst, src, bytes - pld [r1] - @ r2 /= 48 +FUNCTION(bgr888_to_rgb565): @ dst, src, pixels + pld [r1, #2048] + @ r2 /= 16 mov r2, r2, lsr #4 - movw r3, #0x5556 - movt r3, #0x5555 - umull r12,r2, r3, r2 - mov r3, #0x07e0 vdup.16 q15, r3 0: @@ -235,19 +229,16 @@ FUNCTION(bgr888_to_rgb565): @ dst, src, bytes vstmia r0!, {d0,d1} vstmia r0!, {d4,d5} subs r2, r2, #1 - bne 0b + bgt 0b + nop bx lr -FUNCTION(rgb888_to_rgb565): @ dst, src, bytes - pld [r1] - @ r2 /= 48 +FUNCTION(rgb888_to_rgb565): @ dst, src, pixels + pld [r1, #2048] + @ r2 /= 16 mov r2, r2, lsr #4 - movw r3, #0x5556 - movt r3, #0x5555 - umull r12,r2, r3, r2 - mov r3, #0x07e0 vdup.16 q15, r3 0: @@ -267,8 +258,9 @@ FUNCTION(rgb888_to_rgb565): @ dst, src, bytes vstmia r0!, {d2,d3} vstmia r0!, {d6,d7} subs r2, r2, #1 - bne 0b + bgt 0b + nop bx lr diff --git a/frontend/libretro.c b/frontend/libretro.c index e3f60c38..aad3c00b 100644 --- a/frontend/libretro.c +++ b/frontend/libretro.c @@ -60,6 +60,9 @@ #include "3ds/3ds_utils.h" #endif +#ifndef min +#define min(a, b) ((b) < (a) ? (b) : (a)) +#endif #ifndef MAP_FAILED #define MAP_FAILED ((void *)(intptr_t)-1) #endif @@ -137,8 +140,6 @@ static int retro_audio_buff_underrun = false; static unsigned retro_audio_latency = 0; static int update_audio_latency = false; -static unsigned int current_width; -static unsigned int current_height; static enum retro_pixel_format current_fmt; static int plugins_opened; @@ -251,27 +252,44 @@ static void init_memcard(char *mcd_data) } } -static void bgr_to_fb_empty(void *dst, const void *src, int bytes) +static void bgr_to_fb_empty(void *dst, const void *src, int dst_pixels) { } -typedef void (bgr_to_fb_func)(void *dst, const void *src, int bytes); -static bgr_to_fb_func *g_bgr_to_fb = bgr_to_fb_empty; +typedef void (bgr_to_fb_func)(void *dst, const void *src, int dst_pixels); + +static const struct cspace_func_type { + void (*blit)(void *dst, const void *src, int dst_pixels); + void (*blit_dscale640)(void *dst, const void *src, int dst_pixels); + void (*blit_dscale512)(void *dst, const void *src, int dst_pixels); +} cspace_funcs[] = { + { bgr555_to_rgb565, bgr555_to_rgb565_640_to_320, bgr555_to_rgb565_512_to_320 }, + { bgr888_to_rgb565, bgr888_to_rgb565_640_to_320, bgr888_to_rgb565_512_to_320 }, + { bgr555_to_xrgb8888, bgr555_to_xrgb8888_640_to_320, bgr555_to_xrgb8888_512_to_320 }, + { bgr888_to_xrgb8888, bgr888_to_xrgb8888_640_to_320, bgr888_to_xrgb8888_512_to_320 }, +}; static void set_bgr_to_fb_func(int bgr24) { + int func_id = bgr24; switch (current_fmt) { - case RETRO_PIXEL_FORMAT_XRGB8888: - g_bgr_to_fb = bgr24 ? bgr888_to_xrgb8888 : bgr555_to_xrgb8888; - break; case RETRO_PIXEL_FORMAT_RGB565: - g_bgr_to_fb = bgr24 ? bgr888_to_rgb565 : bgr555_to_rgb565; + break; + case RETRO_PIXEL_FORMAT_XRGB8888: + func_id += 2; break; default: LogErr("unsupported current_fmt: %d\n", current_fmt); - g_bgr_to_fb = bgr_to_fb_empty; - break; + pl_rearmed_cbs.cspace_blit = bgr_to_fb_empty; + return; + } + pl_rearmed_cbs.cspace_blit = cspace_funcs[func_id].blit; + if (vout_width == 320) { + if (psx_w >= 640-4) + pl_rearmed_cbs.cspace_blit = cspace_funcs[func_id].blit_dscale640; + else if (psx_w >= 512-4) + pl_rearmed_cbs.cspace_blit = cspace_funcs[func_id].blit_dscale512; } } @@ -309,11 +327,20 @@ static void set_vout_fb(void) static void vout_set_mode(int w, int h, int raw_w, int raw_h, int bpp) { + static unsigned int current_width; + static unsigned int current_height; vout_width = w; vout_height = h; psx_w = raw_w; psx_h = raw_h; + if (pl_rearmed_cbs.scale_hires) { + if (raw_w >= 512-4 && w > 320) + vout_width = 320; + if (h > 256) + vout_height = h / 2; + } + /* it may seem like we could do RETRO_ENVIRONMENT_SET_PIXEL_FORMAT here to * switch to something that can accommodate bgr24 for FMVs, but although it * succeeds it doesn't actually change the format at least on Linux, and the @@ -378,14 +405,16 @@ static void vout_flip(const void *vram_, int vram_ofs, int bgr24, int x, int y, int w, int h, int dims_changed) { int bytes_pp = (current_fmt == RETRO_PIXEL_FORMAT_XRGB8888) ? 4 : 2; + bgr_to_fb_func *bgr_to_fb = pl_rearmed_cbs.cspace_blit; int bytes_pp_s = bgr24 ? 3 : 2; - bgr_to_fb_func *bgr_to_fb = g_bgr_to_fb; unsigned char *dest = vout_buf_ptr; const unsigned char *vram = vram_; - int dstride = vout_pitch_b, h1 = h; + int dstride = vout_pitch_b, h1; int enhres = w > psx_w; u32 vram_mask = enhres ? ~0 : 0xfffff; + int w_blit = min(w, vout_width); int port = 0, hwrapped; + int sstride = 2048; if (vram == NULL || dims_changed || (in_enable_crosshair[0] + in_enable_crosshair[1]) > 0) { @@ -401,22 +430,28 @@ static void vout_flip(const void *vram_, int vram_ofs, int bgr24, goto out; } + if (h >= vout_height * 3 / 2) { + sstride = 4096; + h /= 2; + } + h = min(h, vout_height); dest += x * bytes_pp + y * dstride; - for (; h1-- > 0; dest += dstride) { - bgr_to_fb(dest, vram + vram_ofs, w * bytes_pp_s); - vram_ofs = (vram_ofs + 2048) & vram_mask; + for (h1 = h; h1-- > 0; dest += dstride) { + bgr_to_fb(dest, vram + vram_ofs, w_blit); + vram_ofs = (vram_ofs + sstride) & vram_mask; } hwrapped = (vram_ofs & 2047) + w * bytes_pp_s - 2048; if (!enhres && hwrapped > 0) { // this is super-rare so just fix-up - vram_ofs = (vram_ofs - h * 2048) & 0xff800; + w_blit = hwrapped / bytes_pp_s; + vram_ofs = (vram_ofs - h * sstride) & 0xff800; dest -= dstride * h; dest += (w - hwrapped / bytes_pp_s) * bytes_pp; for (h1 = h; h1-- > 0; dest += dstride) { - bgr_to_fb(dest, vram + vram_ofs, hwrapped); - vram_ofs = (vram_ofs + 2048) & 0xfffff; + bgr_to_fb(dest, vram + vram_ofs, w_blit); + vram_ofs = (vram_ofs + sstride) & 0xfffff; } } @@ -721,6 +756,7 @@ struct rearmed_cbs pl_rearmed_cbs = { .pl_vout_set_mode = vout_set_mode, .pl_vout_flip = vout_flip, .pl_vout_close = vout_close, + .cspace_blit = bgr_to_fb_empty, .mmap = pl_mmap, .munmap = pl_munmap, .gpu_state_change = gpu_state_change, @@ -971,7 +1007,6 @@ static bool update_option_visibility(void) "pcsx_rearmed_gpu_unai_skipline", "pcsx_rearmed_gpu_unai_lighting", "pcsx_rearmed_gpu_unai_fast_lighting", - "pcsx_rearmed_gpu_unai_scale_hires", }; option_display.visible = show_advanced_gpu_unai_settings; @@ -2605,6 +2640,17 @@ static void update_variables(bool in_flight) pl_rearmed_cbs.show_overscan = 0; } + var.key = "pcsx_rearmed_scale_hires"; + var.value = NULL; + + if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) + { + if (strcmp(var.value, "disabled") == 0) + pl_rearmed_cbs.scale_hires = 0; + else if (strcmp(var.value, "enabled") == 0) + pl_rearmed_cbs.scale_hires = 1; + } + #ifdef USE_ASYNC_GPU var.key = "pcsx_rearmed_gpu_thread_rendering"; var.value = NULL; @@ -2699,7 +2745,7 @@ static void update_variables(bool in_flight) /* Note: This used to be an option, but it only works * (correctly) when running high resolution games * (480i, 512i) and has been obsoleted by - * pcsx_rearmed_gpu_unai_scale_hires */ + * pcsx_rearmed_scale_hires */ pl_rearmed_cbs.gpu_unai.ilace_force = 0; var.key = "pcsx_rearmed_gpu_unai_old_renderer"; @@ -2756,17 +2802,6 @@ static void update_variables(bool in_flight) else if (strcmp(var.value, "enabled") == 0) pl_rearmed_cbs.gpu_unai.blending = 1; } - - var.key = "pcsx_rearmed_gpu_unai_scale_hires"; - var.value = NULL; - - if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) - { - if (strcmp(var.value, "disabled") == 0) - pl_rearmed_cbs.gpu_unai.scale_hires = 0; - else if (strcmp(var.value, "enabled") == 0) - pl_rearmed_cbs.gpu_unai.scale_hires = 1; - } #endif // GPU_UNAI var.value = NULL; diff --git a/frontend/libretro_core_options.h b/frontend/libretro_core_options.h index 8fa70bb8..dc0d4d7a 100644 --- a/frontend/libretro_core_options.h +++ b/frontend/libretro_core_options.h @@ -455,6 +455,24 @@ struct retro_core_option_v2_definition option_defs_us[] = { }, "disabled", }, + { + "pcsx_rearmed_scale_hires", + "Hi-Res Downscaling", + NULL, + "When enabled, games that run in high resolution video modes (480i, 512i) will be downscaled to 320x240 by skipping lines and/or columns. May be useful on some devices with native 240p display resolutions that lack efficient hardware scaling.", + NULL, + "video", + { + { "disabled", NULL }, + { "enabled", NULL }, + { NULL, NULL}, + }, +#ifdef _MIYOO + "enabled", +#else + "disabled", +#endif + }, { "pcsx_rearmed_gpu_slow_llists", "(GPU) Slow linked list processing", @@ -817,24 +835,6 @@ struct retro_core_option_v2_definition option_defs_us[] = { }, "disabled", }, - { - "pcsx_rearmed_gpu_unai_scale_hires", - "(GPU) Hi-Res Downscaling", - "Hi-Res Downscaling", - "When enabled, games that run in high resolution video modes (480i, 512i) will be downscaled to 320x240. Can improve performance, and is recommended on devices with native 240p display resolutions.", - NULL, - "gpu_unai", - { - { "disabled", NULL }, - { "enabled", NULL }, - { NULL, NULL}, - }, -#ifdef _MIYOO - "enabled", -#else - "disabled", -#endif - }, #endif /* GPU_UNAI */ { "pcsx_rearmed_spu_reverb", diff --git a/frontend/menu.c b/frontend/menu.c index c22f3d85..3607fd5c 100644 --- a/frontend/menu.c +++ b/frontend/menu.c @@ -454,13 +454,13 @@ static const struct { CE_INTVAL_V(frameskip, 4), CE_INTVAL_PV(dithering, 2), CE_INTVAL_P(thread_rendering), + CE_INTVAL_P(scale_hires), CE_INTVAL_P(gpu_peops.dwActFixes), CE_INTVAL_P(gpu_unai.old_renderer), CE_INTVAL_P(gpu_unai.ilace_force), CE_INTVAL_P(gpu_unai.lighting), CE_INTVAL_P(gpu_unai.fast_lighting), CE_INTVAL_P(gpu_unai.blending), - CE_INTVAL_P(gpu_unai.scale_hires), CE_INTVAL_P(gpu_neon.allow_interlace), CE_INTVAL_P(gpu_neon.enhancement_enable), CE_INTVAL_P(gpu_neon.enhancement_no_main), @@ -857,9 +857,9 @@ static void draw_savestate_bg(int slot) for (; h > 0; h--, d += g_menuscreen_w, s += 1024) { if (gpu->ulStatus & 0x200000) - bgr888_to_rgb565(d, s, w * 3); + bgr888_to_rgb565(d, s, w); else - bgr555_to_rgb565(d, s, w * 2); + bgr555_to_rgb565(d, s, w); // darken this so that menu text is visible if (g_menuscreen_w - w < 320) @@ -1312,6 +1312,8 @@ static const char h_cscaler[] = "Displays the scaler layer, you can resize it\ "using d-pad or move it using R+d-pad"; static const char h_soft_filter[] = "Works only if game uses low resolution modes"; static const char h_gamma[] = "Gamma/brightness adjustment (default 100)"; +static const char h_lowres[] = "Forces all PSX high resolutions to 320x240 or lower\n" + "by skipping lines and pixels"; #ifdef HAVE_NEON32 static const char *men_scanlines[] = { "OFF", "1", "2", "3", NULL }; static const char h_scanline_l[] = "Scanline brightness, 0-100%"; @@ -1416,6 +1418,7 @@ static menu_entry e_menu_gfx_options[] = mee_range_h ("Gamma adjustment", MA_OPT_GAMMA, g_gamma, 1, 200, h_gamma), mee_onoff ("OpenGL Vsync", MA_OPT_VSYNC, g_opts, OPT_VSYNC), mee_cust_h ("Setup custom scaler", MA_OPT_VARSCALER_C, menu_loop_cscaler, NULL, h_cscaler), + mee_onoff_h ("Force low resolution", 0, pl_rearmed_cbs.scale_hires, 1, h_lowres), mee_end, }; @@ -1455,7 +1458,6 @@ static menu_entry e_menu_plugin_gpu_unai[] = mee_onoff ("Lighting", 0, pl_rearmed_cbs.gpu_unai.lighting, 1), mee_onoff ("Fast lighting", 0, pl_rearmed_cbs.gpu_unai.fast_lighting, 1), mee_onoff ("Blending", 0, pl_rearmed_cbs.gpu_unai.blending, 1), - mee_onoff ("Downscale Hi-Res", 0, pl_rearmed_cbs.gpu_unai.scale_hires, 1), mee_end, }; @@ -1823,7 +1825,7 @@ static void draw_frame_debug(GPUFreeze_t *gpuf, int x, int y) GPU_freeze(1, gpuf); for (; h > 0; h--, d += g_menuscreen_w, s += 1024) - bgr555_to_rgb565(d, s, w * 2); + bgr555_to_rgb565(d, s, w); smalltext_out16(4, 1, "build: "__DATE__ " " __TIME__ " " REV, 0xe7fc); snprintf(buff, sizeof(buff), "GPU sr: %08x", gpuf->ulStatus); @@ -2769,7 +2771,7 @@ static void menu_leave_emu(void) } else { for (; h > 0; h--, d += g_menuscreen_w, s += last_vout_w * 3) { - rgb888_to_rgb565(d, s, w * 3); + rgb888_to_rgb565(d, s, w); menu_darken_bg(d, d, w, 0); } } diff --git a/frontend/plat_pollux.c b/frontend/plat_pollux.c index a27b410d..0aa9924e 100644 --- a/frontend/plat_pollux.c +++ b/frontend/plat_pollux.c @@ -193,7 +193,7 @@ void plat_video_menu_enter(int is_rom_loaded) if (pl_vout_buf != NULL) { if (psx_bpp == 16) // have to do rgb conversion for menu bg - bgr555_to_rgb565(pl_vout_buf, pl_vout_buf, 320*240*2); + bgr555_to_rgb565(pl_vout_buf, pl_vout_buf, 320*240); else memset(pl_vout_buf, 0, 320*240*2); } @@ -228,7 +228,7 @@ void plat_video_menu_leave(void) void *plat_prepare_screenshot(int *w, int *h, int *bpp) { - bgr555_to_rgb565(pl_vout_buf, pl_vout_buf, 320*240*2); + bgr555_to_rgb565(pl_vout_buf, pl_vout_buf, 320*240); *w = 320; *h = 240; *bpp = psx_bpp; diff --git a/frontend/plat_sdl.c b/frontend/plat_sdl.c index a7dd0a03..ca0bdf74 100644 --- a/frontend/plat_sdl.c +++ b/frontend/plat_sdl.c @@ -484,11 +484,11 @@ static void centered_blit(int doffs, const void *src_, int w, int h, if (bgr24) { for (; h > 0; dst += dstride, src += sstride, h--) - bgr888_to_rgb565(dst, src, w * 3); + bgr888_to_rgb565(dst, src, w); } else { for (; h > 0; dst += dstride, src += sstride, h--) - bgr555_to_rgb565(dst, src, w * 2); + bgr555_to_rgb565(dst, src, w); } if (SDL_MUSTLOCK(plat_sdl_screen)) diff --git a/frontend/plugin_lib.c b/frontend/plugin_lib.c index 324fede5..d55b861e 100644 --- a/frontend/plugin_lib.c +++ b/frontend/plugin_lib.c @@ -249,6 +249,16 @@ void pl_update_layer_size(int w, int h, int fw, int fh) if (g_layer_h > fh * 2) g_layer_h = fh * 2; } +static const struct cspace_func_type { + void (*blit)(void *dst, const void *src, int dst_pixels); + void (*blit_dscale640)(void *dst, const void *src, int dst_pixels); + void (*blit_dscale512)(void *dst, const void *src, int dst_pixels); +} cspace_funcs[] = { + { bgr555_to_rgb565, bgr555_to_rgb565_640_to_320, bgr555_to_rgb565_512_to_320 }, + { bgr888_to_rgb888, bgr888_to_rgb888_640_to_320, bgr888_to_rgb888_512_to_320 }, + { bgr888_to_rgb565, bgr888_to_rgb565_640_to_320, bgr888_to_rgb565_512_to_320 }, +}; + // XXX: this is platform specific really static inline int resolution_ok(int w, int h) { @@ -257,6 +267,7 @@ static inline int resolution_ok(int w, int h) static void pl_vout_set_mode(int w, int h, int raw_w, int raw_h, int bpp) { + const struct cspace_func_type *cspace_f = cspace_funcs; int vout_w, vout_h, vout_bpp; // special h handling, Wipeout likes to change it by 1-6 @@ -271,8 +282,26 @@ static void pl_vout_set_mode(int w, int h, int raw_w, int raw_h, int bpp) vout_w = w; vout_h = h; vout_bpp = bpp; - if (pl_rearmed_cbs.only_16bpp) - vout_bpp = 16; + if (bpp > 16) { + cspace_f = &cspace_funcs[1]; + if (pl_rearmed_cbs.only_16bpp) { + cspace_f = &cspace_funcs[2]; + vout_bpp = 16; + } + } + pl_rearmed_cbs.cspace_blit = cspace_f->blit; + if (pl_rearmed_cbs.scale_hires) { + if (raw_w >= 640-4) { + pl_rearmed_cbs.cspace_blit = cspace_f->blit_dscale640; + vout_w = 320; + } + else if (raw_w >= 512-4) { + pl_rearmed_cbs.cspace_blit = cspace_f->blit_dscale512; + vout_w = 320; + } + if (vout_h > 256) + vout_h /= 2; + } assert(vout_h >= 192); @@ -321,9 +350,11 @@ void pl_force_clear(void) static void pl_vout_flip(const void *vram_, int vram_ofs, int bgr24, int x, int y, int w, int h, int dims_changed) { + void (*blit)(void *dst, const void *src, int bytes); unsigned char *dest = pl_vout_buf; const unsigned char *vram = vram_; - int dstride = pl_vout_w, h1 = h; + int dstride = pl_vout_w, h1; + int sstride = 2048; int h_full = pl_vout_h; int enhres = w > psx_w; int xoffs = 0, doffs; @@ -341,9 +372,6 @@ static void pl_vout_flip(const void *vram_, int vram_ofs, int bgr24, goto out_hud; } - assert(x + w <= pl_vout_w); - assert(y + h <= pl_vout_h); - // offset xoffs = x * pl_vout_scale_w; doffs = xoffs + y * pl_vout_scale_h * dstride; @@ -371,23 +399,32 @@ static void pl_vout_flip(const void *vram_, int vram_ofs, int bgr24, dest += doffs * 2; + if (x + w > pl_vout_w) + w = pl_vout_w - x; + if (h >= pl_vout_h * 3 / 2) { + sstride = 4096; + h /= 2; + } + assert(y + h <= pl_vout_h); + blit = pl_rearmed_cbs.cspace_blit; + if (bgr24) { hwrapped = (vram_ofs & 2047) + w * 3 - 2048; if (pl_rearmed_cbs.only_16bpp) { - for (; h1-- > 0; dest += dstride * 2) { - bgr888_to_rgb565(dest, vram + vram_ofs, w * 3); - vram_ofs = (vram_ofs + 2048) & 0xfffff; + for (h1 = h; h1-- > 0; dest += dstride * 2) { + blit(dest, vram + vram_ofs, w); + vram_ofs = (vram_ofs + sstride) & 0xfffff; } if (hwrapped > 0) { // this is super-rare so just fix-up - vram_ofs = (vram_ofs - h * 2048) & 0xff800; + vram_ofs = (vram_ofs - h * sstride) & 0xff800; dest -= dstride * 2 * h; dest += (w - hwrapped / 3) * 2; for (h1 = h; h1-- > 0; dest += dstride * 2) { - bgr888_to_rgb565(dest, vram + vram_ofs, hwrapped); - vram_ofs = (vram_ofs + 2048) & 0xfffff; + blit(dest, vram + vram_ofs, hwrapped / 2); + vram_ofs = (vram_ofs + sstride) & 0xfffff; } } } @@ -395,18 +432,18 @@ static void pl_vout_flip(const void *vram_, int vram_ofs, int bgr24, dest -= doffs * 2; dest += (doffs / 8) * 24; - for (; h1-- > 0; dest += dstride * 3) { - bgr888_to_rgb888(dest, vram + vram_ofs, w * 3); - vram_ofs = (vram_ofs + 2048) & 0xfffff; + for (h1 = h; h1-- > 0; dest += dstride * 3) { + blit(dest, vram + vram_ofs, w); + vram_ofs = (vram_ofs + sstride) & 0xfffff; } if (hwrapped > 0) { - vram_ofs = (vram_ofs - h * 2048) & 0xff800; + vram_ofs = (vram_ofs - h * sstride) & 0xff800; dest -= dstride * 3 * h; dest += w * 3 - hwrapped; for (h1 = h; h1-- > 0; dest += dstride * 3) { - bgr888_to_rgb888(dest, vram + vram_ofs, hwrapped); - vram_ofs = (vram_ofs + 2048) & 0xfffff; + blit(dest, vram + vram_ofs, hwrapped / 3); + vram_ofs = (vram_ofs + sstride) & 0xfffff; } } } @@ -425,20 +462,20 @@ static void pl_vout_flip(const void *vram_, int vram_ofs, int bgr24, else if (scanlines != 0 && scanline_level != 100) { int h2, l = scanline_level * 2048 / 100; - int stride_0 = pl_vout_scale_h >= 2 ? 0 : 2048; + int stride_0 = pl_vout_scale_h >= 2 ? 0 : sstride; - h1 *= pl_vout_scale_h; + h1 = h * pl_vout_scale_h; while (h1 > 0) { for (h2 = scanlines; h2 > 0 && h1 > 0; h2--, h1--) { - bgr555_to_rgb565(dest, vram + vram_ofs, w * 2); + bgr555_to_rgb565(dest, vram + vram_ofs, w); vram_ofs = (vram_ofs + stride_0) & 0xfffff; dest += dstride * 2; } for (h2 = scanlines; h2 > 0 && h1 > 0; h2--, h1--) { - bgr555_to_rgb565_b(dest, vram + vram_ofs, w * 2, l); - vram_ofs = (vram_ofs + 2048) & 0xfffff; + bgr555_to_rgb565_b(dest, vram + vram_ofs, w, l); + vram_ofs = (vram_ofs + sstride) & 0xfffff; dest += dstride * 2; } } @@ -447,19 +484,19 @@ static void pl_vout_flip(const void *vram_, int vram_ofs, int bgr24, else { unsigned int vram_mask = enhres ? ~0 : 0xfffff; - for (; h1-- > 0; dest += dstride * 2) { - bgr555_to_rgb565(dest, vram + vram_ofs, w * 2); - vram_ofs = (vram_ofs + 2048) & vram_mask; + for (h1 = h; h1-- > 0; dest += dstride * 2) { + blit(dest, vram + vram_ofs, w); + vram_ofs = (vram_ofs + sstride) & vram_mask; } hwrapped = (vram_ofs & 2047) + w * 2 - 2048; if (!enhres && hwrapped > 0) { - vram_ofs = (vram_ofs - h * 2048) & 0xff800; + vram_ofs = (vram_ofs - h * sstride) & 0xff800; dest -= dstride * 2 * h; dest += w * 2 - hwrapped; for (h1 = h; h1-- > 0; dest += dstride * 2) { - bgr555_to_rgb565(dest, vram + vram_ofs, hwrapped); - vram_ofs = (vram_ofs + 2048) & 0xfffff; + blit(dest, vram + vram_ofs, hwrapped / 2); + vram_ofs = (vram_ofs + sstride) & 0xfffff; } } } @@ -863,6 +900,7 @@ struct rearmed_cbs pl_rearmed_cbs = { pl_vout_flip, pl_vout_close, + .cspace_blit = bgr555_to_rgb565, .mmap = pl_mmap, .munmap = pl_munmap, .pl_set_gpu_caps = pl_set_gpu_caps, diff --git a/frontend/plugin_lib.h b/frontend/plugin_lib.h index b75443a6..11282d49 100644 --- a/frontend/plugin_lib.h +++ b/frontend/plugin_lib.h @@ -54,6 +54,7 @@ struct rearmed_cbs { void (*pl_vout_flip)(const void *vram, int vram_offset, int bgr24, int x, int y, int w, int h, int dims_changed); void (*pl_vout_close)(void); + void (*cspace_blit)(void *dst, const void *src, int bytes); void *(*mmap)(unsigned int size); void (*munmap)(void *ptr, unsigned int size); // only used by some frontends @@ -78,6 +79,7 @@ struct rearmed_cbs { unsigned int only_16bpp; // platform is 16bpp-only unsigned int thread_rendering; unsigned int dithering; // 0 off, 1 on, 2 force + unsigned int scale_hires; struct { int allow_interlace; // 0 off, 1 on, 2 guess int enhancement_enable; @@ -95,7 +97,6 @@ struct rearmed_cbs { int lighting; int fast_lighting; int blending; - int scale_hires; } gpu_unai; struct { int dwActFixes; diff --git a/plugins/gpu_unai/gpu.h b/plugins/gpu_unai/gpu.h index 009cdfc4..55b5abab 100644 --- a/plugins/gpu_unai/gpu.h +++ b/plugins/gpu_unai/gpu.h @@ -38,10 +38,6 @@ struct gpu_unai_config_t { // Normally 0. Value '1' will skip rendering // odd lines. - uint8_t scale_hires:1; // If 1, will scale hi-res output to - // 320x240 when gpulib reads the frame. - // Implies pixel_skip and ilace_force - // (when height > 240). uint8_t lighting:1; uint8_t fast_lighting:1; uint8_t blending:1; diff --git a/plugins/gpu_unai/gpu_unai.h b/plugins/gpu_unai/gpu_unai.h index ce2dac42..91cdb8af 100644 --- a/plugins/gpu_unai/gpu_unai.h +++ b/plugins/gpu_unai/gpu_unai.h @@ -376,18 +376,4 @@ static inline bool ProgressiveInterlaceEnabled() #endif } -// For now, 320x240 output resolution is assumed, using simple line-skipping -// and pixel-skipping downscaler. -// TODO: Flesh these out so they return useful values based on whether -// running on higher-res device or a resampling downscaler is enabled. -static inline bool PixelSkipEnabled() -{ - return gpu_unai.config.pixel_skip || gpu_unai.config.scale_hires; -} - -static inline bool LineSkipEnabled() -{ - return true; -} - #endif // GPU_UNAI_H diff --git a/plugins/gpu_unai/gpulib_if.cpp b/plugins/gpu_unai/gpulib_if.cpp index 03b04c2f..71eccb1a 100644 --- a/plugins/gpu_unai/gpulib_if.cpp +++ b/plugins/gpu_unai/gpulib_if.cpp @@ -59,159 +59,6 @@ #define IS_OLD_RENDERER() false #endif -#define DOWNSCALE_VRAM_SIZE (1024 * 512 * 2 * 2 + 4096) - -INLINE void scale_640_to_320(le16_t *dest, const le16_t *src, bool isRGB24) { - size_t uCount = 320; - - if(isRGB24) { - const uint8_t* src8 = (const uint8_t *)src; - uint8_t* dst8 = (uint8_t *)dest; - - do { - *dst8++ = *src8++; - *dst8++ = *src8++; - *dst8++ = *src8; - src8 += 4; - } while(--uCount); - } else { - const le16_t* src16 = src; - le16_t* dst16 = dest; - - do { - *dst16++ = *src16; - src16 += 2; - } while(--uCount); - } -} - -INLINE void scale_512_to_320(le16_t *dest, const le16_t *src, bool isRGB24) { - size_t uCount = 64; - - if(isRGB24) { - const uint8_t* src8 = (const uint8_t *)src; - uint8_t* dst8 = (uint8_t *)dest; - - do { - *dst8++ = *src8++; - *dst8++ = *src8++; - *dst8++ = *src8++; - *dst8++ = *src8++; - *dst8++ = *src8++; - *dst8++ = *src8; - src8 += 4; - *dst8++ = *src8++; - *dst8++ = *src8++; - *dst8++ = *src8++; - *dst8++ = *src8++; - *dst8++ = *src8++; - *dst8++ = *src8; - src8 += 4; - *dst8++ = *src8++; - *dst8++ = *src8++; - *dst8++ = *src8; - src8 += 4; - } while(--uCount); - } else { - const le16_t* src16 = src; - le16_t* dst16 = dest; - - do { - *dst16++ = *src16++; - *dst16++ = *src16; - src16 += 2; - *dst16++ = *src16++; - *dst16++ = *src16; - src16 += 2; - *dst16++ = *src16; - src16 += 2; - } while(--uCount); - } -} - -static uint16_t *get_downscale_buffer(int *x, int *y, int *w, int *h, int *vram_h) -{ - le16_t *dest = gpu_unai.downscale_vram; - const le16_t *src = gpu_unai.vram; - bool isRGB24 = (gpu_unai.GPU_GP1 & 0x00200000 ? true : false); - int stride = 1024, dstride = 1024, lines = *h, orig_w = *w; - - // PS1 fb read wraps around (fixes black screen in 'Tobal no. 1') - unsigned int fb_mask = 1024 * 512 - 1; - - if (*h > 240) { - *h /= 2; - stride *= 2; - lines = *h; - - // Ensure start at a non-skipped line - while (*y & gpu_unai.inn.ilace_mask) ++*y; - } - - unsigned int fb_offset_src = (*y * dstride + *x) & fb_mask; - unsigned int fb_offset_dest = fb_offset_src; - - if (*w == 512 || *w == 640) { - *w = 320; - } - - switch(orig_w) { - case 640: - do { - scale_640_to_320(dest + fb_offset_dest, src + fb_offset_src, isRGB24); - fb_offset_src = (fb_offset_src + stride) & fb_mask; - fb_offset_dest = (fb_offset_dest + dstride) & fb_mask; - } while(--lines); - - break; - case 512: - do { - scale_512_to_320(dest + fb_offset_dest, src + fb_offset_src, isRGB24); - fb_offset_src = (fb_offset_src + stride) & fb_mask; - fb_offset_dest = (fb_offset_dest + dstride) & fb_mask; - } while(--lines); - break; - default: - size_t size = isRGB24 ? *w * 3 : *w * 2; - - do { - memcpy(dest + fb_offset_dest, src + fb_offset_src, size); - fb_offset_src = (fb_offset_src + stride) & fb_mask; - fb_offset_dest = (fb_offset_dest + dstride) & fb_mask; - } while(--lines); - break; - } - - return (uint16_t *)gpu_unai.downscale_vram; -} - -static void map_downscale_buffer(void) -{ - if (gpu_unai.downscale_vram) - return; - - gpu_unai.downscale_vram = (le16_t*)gpu.mmap(DOWNSCALE_VRAM_SIZE); - - if (gpu_unai.downscale_vram == NULL || gpu_unai.downscale_vram == (le16_t *)(intptr_t)-1) { - SysPrintf("failed to map downscale buffer\n"); - gpu_unai.downscale_vram = NULL; - gpu.get_downscale_buffer = NULL; - } - else { - gpu.get_downscale_buffer = get_downscale_buffer; - } -} - -static void unmap_downscale_buffer(void) -{ - if (gpu_unai.downscale_vram == NULL) - return; - - gpu.munmap(gpu_unai.downscale_vram, DOWNSCALE_VRAM_SIZE); - gpu_unai.downscale_vram = NULL; - gpu.get_downscale_buffer = NULL; -} - int renderer_init(void) { memset((void*)&gpu_unai, 0, sizeof(gpu_unai)); @@ -255,25 +102,18 @@ int renderer_init(void) SetupLightLUT(); SetupDitheringConstants(); - if (gpu_unai.config.scale_hires) { - map_downscale_buffer(); - } - return 0; } void renderer_finish(void) { - unmap_downscale_buffer(); } void renderer_notify_screen_change(const struct psx_gpu_screen *screen) { gpu_unai.inn.ilace_mask = gpu_unai.config.ilace_force; -#ifndef HAVE_PRE_ARMV7 /* XXX */ - if (gpu_unai.config.scale_hires) -#endif + if (gpu.state.downscale_enable) { gpu_unai.inn.ilace_mask |= !!(gpu.status & PSX_GPU_STATUS_INTERLACE); } @@ -866,16 +706,10 @@ void renderer_set_config(const struct rearmed_cbs *cbs) gpu_unai.config.lighting = cbs->gpu_unai.lighting; gpu_unai.config.fast_lighting = cbs->gpu_unai.fast_lighting; gpu_unai.config.blending = cbs->gpu_unai.blending; - gpu_unai.config.scale_hires = cbs->gpu_unai.scale_hires; gpu_unai.config.dithering = cbs->dithering != 0; gpu_unai.config.force_dithering = cbs->dithering >> 1; - gpu.state.downscale_enable = gpu_unai.config.scale_hires; - if (gpu_unai.config.scale_hires) { - map_downscale_buffer(); - } else { - unmap_downscale_buffer(); - } + renderer_notify_screen_change(&gpu.screen); oldunai_renderer_set_config(cbs); } diff --git a/plugins/gpulib/gpu.c b/plugins/gpulib/gpu.c index 966aeeb4..dab5d53b 100644 --- a/plugins/gpulib/gpu.c +++ b/plugins/gpulib/gpu.c @@ -1064,6 +1064,7 @@ void GPUrearmedCallbacks(const struct rearmed_cbs *cbs) gpu.state.frame_count = (uint32_t *)cbs->gpu_frame_count; gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace; gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable; + gpu.state.downscale_enable = cbs->scale_hires; gpu.state.screen_centering_type_default = cbs->screen_centering_type_default; if (gpu.state.screen_centering_type != cbs->screen_centering_type || gpu.state.screen_centering_x != cbs->screen_centering_x diff --git a/plugins/gpulib/gpu.h b/plugins/gpulib/gpu.h index 163546d5..7a2bcab0 100644 --- a/plugins/gpulib/gpu.h +++ b/plugins/gpulib/gpu.h @@ -83,7 +83,6 @@ struct psx_gpu { uint32_t enhancement_active:1; uint32_t enhancement_was_active:1; uint32_t downscale_enable:1; - uint32_t downscale_active:1; uint32_t dims_changed:1; uint32_t show_overscan:2; uint32_t *frame_count; diff --git a/plugins/gpulib/gpu_async.c b/plugins/gpulib/gpu_async.c index f2ba9d48..137066b7 100644 --- a/plugins/gpulib/gpu_async.c +++ b/plugins/gpulib/gpu_async.c @@ -458,7 +458,7 @@ void gpu_async_notify_screen_change(struct psx_gpu *gpu) { union cmd_screen_change cmd; - if (!gpu->async || !gpu->state.enhancement_active) // gpu_neon only + if (!gpu->async) return; cmd.cmd = HTOLE32(FAKECMD_SCREEN_CHANGE << 24); cmd.x = gpu->screen.x; diff --git a/plugins/gpulib/vout_pl.c b/plugins/gpulib/vout_pl.c index 65e3de53..1baf9435 100644 --- a/plugins/gpulib/vout_pl.c +++ b/plugins/gpulib/vout_pl.c @@ -52,15 +52,6 @@ static void check_mode_change(int force) bpp = 24; } - gpu.state.downscale_active = - gpu.get_downscale_buffer != NULL && gpu.state.downscale_enable - && (w >= 512 || h >= 256); - - if (gpu.state.downscale_active) { - w_out = w < 512 ? w : 320; - h_out = h < 256 ? h : h / 2; - } - // width|rgb24 change? if (force || (gpu.status ^ gpu.state.status_vo_old) & ((7<<16)|(1<<21)) || w_out != gpu.state.w_out_old || h_out != gpu.state.h_out_old) @@ -109,9 +100,6 @@ int vout_update(void) src_x2 *= 2; } - if (gpu.state.downscale_active) - vram = (void *)gpu.get_downscale_buffer(&src_x, &src_y, &w, &h, &vram_h); - if (src_y + h > vram_h) { if (src_y + h - vram_h > h / 2) { // wrap @@ -123,6 +111,10 @@ int vout_update(void) h = vram_h - src_y; } + // gpu_unai skips drawing odd lines + if (h > 256 && gpu.state.downscale_enable && (src_y & 1)) + src_y++; + offset = (src_y * 1024 + src_x) * 2; offset += src_x2 * bpp / 8;