#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define SWAP16(x) __builtin_bswap16(x)
#define LE16TOHx2(x) ((SWAP16((x) >> 16) << 16) | SWAP16(x))
+#define LE32TOH(x) __builtin_bswap32(x)
#else
#define LE16TOHx2(x) (x)
+#define LE32TOH(x) (x)
#endif
+static inline uint32_t bgr555_to_rgb565_pair(uint32_t p)
+{
+ uint32_t r, g, b;
+ r = (p & 0x001f001f) << 11;
+ g = (p & 0x03e003e0) << 1;
+ b = (p & 0x7c007c00) >> 10;
+ return r | g | b;
+}
+
+static inline uint32_t bgr888_to_rgb565_pair(const uint8_t * __restrict__ src, int o2)
+{
+ uint32_t r1, g1, b1, r2, g2, b2;
+ r1 = src[0] & 0xf8;
+ g1 = src[1] & 0xfc;
+ b1 = src[2] & 0xf8;
+ r2 = src[o2 + 0] & 0xf8;
+ g2 = src[o2 + 1] & 0xfc;
+ b2 = src[o2 + 2] & 0xf8;
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ return (r1 << 24) | (g1 << 19) | (b1 << 13) |
+ (r2 << 8) | (g2 << 3) | (b2 >> 3);
+#else
+ return (r2 << 24) | (g2 << 19) | (b2 << 13) |
+ (r1 << 8) | (g1 << 3) | (b1 >> 3);
+#endif
+}
+
#if defined(HAVE_bgr555_to_rgb565)
/* have bgr555_to_rgb565 somewhere else */
gsri(d_, s1, 11); \
}
-void bgr555_to_rgb565(void * __restrict__ dst_, const void * __restrict__ src_, int bytes)
+void bgr555_to_rgb565(void * __restrict__ dst_, const void * __restrict__ src_,
+ int pixels)
{
const uint16_t * __restrict__ src = src_;
uint16_t * __restrict__ dst = dst_;
gvu16 c0x07c0 = gdup(0x07c0);
- assert(!(((uintptr_t)dst | (uintptr_t)src | bytes) & 1));
+ assert(!(((uintptr_t)dst | (uintptr_t)src) & 1));
// align the destination
if ((uintptr_t)dst & 0x0e)
*(gvu16u *)dst = d;
dst += left / 2;
src += left / 2;
- bytes -= left;
+ pixels -= left / 2;
}
// go
- for (; bytes >= 16; dst += 8, src += 8, bytes -= 16)
+ for (; pixels >= 8; dst += 8, src += 8, pixels -= 8)
{
gvu16 d, s = *(const gvu16u *)src;
do_one_simd(d, s, c0x07c0);
__builtin_prefetch(src + 128/2);
}
// finish it
- for (; bytes > 0; dst++, src++, bytes -= 2)
+ for (; pixels > 0; dst++, src++, pixels--)
*dst = do_one(*src);
}
#undef do_one
#else
-void bgr555_to_rgb565(void *dst_, const void *src_, int bytes)
+void bgr555_to_rgb565(void * __restrict__ dst_, const void * __restrict__ src_,
+ int pixels)
{
// source can be misaligned, but it's very rare, so just force
- const unsigned int *src = (const void *)((intptr_t)src_ & ~3);
- unsigned int *dst = dst_;
- unsigned int x, p, r, g, b;
+ const uint32_t * __restrict__ src = (const void *)((intptr_t)src_ & ~3);
+ uint32_t x, * __restrict__ dst = dst_;
- for (x = 0; x < bytes / 4; x++) {
- p = LE16TOHx2(src[x]);
-
- r = (p & 0x001f001f) << 11;
- g = (p & 0x03e003e0) << 1;
- b = (p & 0x7c007c00) >> 10;
-
- dst[x] = r | g | b;
- }
+ for (x = 0; x < pixels / 2; x++)
+ dst[x] = bgr555_to_rgb565_pair(LE16TOHx2(src[x]));
}
#endif
+static inline void bgr888_to_rgb888_one(uint8_t * __restrict__ dst,
+ const uint8_t * __restrict__ src)
+{
+ dst[0] = src[2];
+ dst[1] = src[1];
+ dst[2] = src[0];
+}
+
#ifndef HAVE_bgr888_to_x
-void attr_weak bgr888_to_rgb565(void *dst_, const void *src_, int bytes)
+void attr_weak bgr888_to_rgb565(void * __restrict__ dst_,
+ const void * __restrict__ src_, int pixels)
{
- const unsigned char *src = src_;
- unsigned int *dst = dst_;
- unsigned int r1, g1, b1, r2, g2, b2;
-
- for (; bytes >= 6; bytes -= 6, src += 6, dst++) {
- r1 = src[0] & 0xf8;
- g1 = src[1] & 0xfc;
- b1 = src[2] & 0xf8;
- r2 = src[3] & 0xf8;
- g2 = src[4] & 0xfc;
- b2 = src[5] & 0xf8;
-#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
- *dst = (r1 << 24) | (g1 << 19) | (b1 << 13) |
- (r2 << 8) | (g2 << 3) | (b2 >> 3);
-#else
- *dst = (r2 << 24) | (g2 << 19) | (b2 << 13) |
- (r1 << 8) | (g1 << 3) | (b1 >> 3);
-#endif
- }
+ const uint8_t * __restrict__ src = src_;
+ uint32_t * __restrict__ dst = dst_;
+
+ for (; pixels >= 2; pixels -= 2, src += 3*2, dst++)
+ *dst = bgr888_to_rgb565_pair(src, 3);
}
// TODO?
-void rgb888_to_rgb565(void *dst, const void *src, int bytes) {}
-void bgr888_to_rgb888(void *dst, const void *src, int bytes) {}
+void rgb888_to_rgb565(void *dst, const void *src, int pixels) {}
+
+void bgr888_to_rgb888(void * __restrict__ dst_,
+ const void * __restrict__ src_, int pixels)
+{
+ const uint8_t * __restrict__ src = src_;
+ uint8_t * __restrict__ dst = dst_;
+
+ for (; pixels >= 1; pixels--, src += 3, dst += 3)
+ bgr888_to_rgb888_one(dst, src);
+}
#endif // HAVE_bgr888_to_x
-void bgr555_to_xrgb8888(void * __restrict__ dst_, const void * __restrict__ src_, int bytes)
+static inline uint32_t bgr555_to_xrgb8888_one(uint16_t p)
+{
+ uint32_t t = ((p << 19) | (p >> 7)) & 0xf800f8;
+ t |= (p << 6) & 0xf800;
+ return t | ((t >> 5) & 0x070707);
+}
+
+static inline uint32_t bgr888_to_xrgb8888_one(const uint8_t * __restrict__ src)
+{
+ return (src[0] << 16) | (src[1] << 8) | src[2];
+}
+
+void bgr555_to_xrgb8888(void * __restrict__ dst_,
+ const void * __restrict__ src_, int pixels)
{
const uint16_t * __restrict__ src = src_;
uint32_t * __restrict__ dst = dst_;
- for (; bytes >= 2; bytes -= 2, src++, dst++)
- {
- uint32_t t = ((*src << 19) | (*src >> 7)) & 0xf800f8;
- t |= (*src << 6) & 0xf800;
- *dst = t | ((t >> 5) & 0x070707);
+ for (; pixels >= 1; pixels--, src++, dst++)
+ *dst = bgr555_to_xrgb8888_one(*src);
+}
+
+void bgr888_to_xrgb8888(void * __restrict__ dst_,
+ const void * __restrict__ src_, int pixels)
+{
+ const uint8_t * __restrict__ src = src_;
+ uint32_t * __restrict__ dst = dst_;
+
+ for (; pixels >= 1; pixels--, src += 3, dst++)
+ *dst = bgr888_to_xrgb8888_one(src);
+}
+
+/* downscale */
+void bgr555_to_rgb565_640_to_320(void * __restrict__ dst_,
+ const void * __restrict__ src_, int dpixels)
+{
+ const uint16_t * __restrict__ src = src_;
+ uint32_t * __restrict__ dst = dst_;
+
+ for (; dpixels >= 2; dpixels -= 2, src += 4, dst++) {
+ uint32_t p = LE32TOH(src[0] | (src[2] << 16));
+ *dst = bgr555_to_rgb565_pair(p);
}
}
-void bgr888_to_xrgb8888(void * __restrict__ dst_, const void * __restrict__ src_, int bytes)
+void bgr888_to_rgb565_640_to_320(void * __restrict__ dst_,
+ const void * __restrict__ src_, int dpixels)
{
const uint8_t * __restrict__ src = src_;
uint32_t * __restrict__ dst = dst_;
- for (; bytes >= 3; bytes -= 3, src += 3, dst++)
+ for (; dpixels >= 2; dpixels -= 2, src += 4*3, dst++)
+ *dst = bgr888_to_rgb565_pair(src, 2*3);
+}
+
+void bgr888_to_rgb888_640_to_320(void * __restrict__ dst_,
+ const void * __restrict__ src_, int dpixels)
+{
+ const uint8_t * __restrict__ src = src_;
+ uint8_t * __restrict__ dst = dst_;
+
+ for (; dpixels >= 1; dpixels--, src += 2*3, dst += 3)
+ bgr888_to_rgb888_one(dst, src);
+}
+
+void bgr555_to_xrgb8888_640_to_320(void * __restrict__ dst_,
+ const void * __restrict__ src_, int dpixels)
+{
+ const uint16_t * __restrict__ src = src_;
+ uint32_t * __restrict__ dst = dst_;
+
+ for (; dpixels >= 1; dpixels--, src += 2, dst++)
+ *dst = bgr555_to_xrgb8888_one(*src);
+}
+
+void bgr888_to_xrgb8888_640_to_320(void * __restrict__ dst_,
+ const void * __restrict__ src_, int dpixels)
+{
+ const uint8_t * __restrict__ src = src_;
+ uint32_t * __restrict__ dst = dst_;
+
+ for (; dpixels >= 1; dpixels--, src += 3*2, dst++)
*dst = (src[0] << 16) | (src[1] << 8) | src[2];
}
+void bgr555_to_rgb565_512_to_320(void * __restrict__ dst_,
+ const void * __restrict__ src_, int dpixels)
+{
+ const uint16_t * __restrict__ src = src_;
+ uint32_t * __restrict__ dst = dst_;
+
+ // 16 -> 10 to keep dst aligned
+ for (; dpixels >= 10; dpixels -= 10, src += 16, dst += 5) {
+ // picks a src pixel nearest to the center of the dst pixel
+ dst[0] = bgr555_to_rgb565_pair(LE32TOH(src[0] | (src[2] << 16)));
+ dst[1] = bgr555_to_rgb565_pair(LE32TOH(src[4] | (src[5] << 16)));
+ dst[2] = bgr555_to_rgb565_pair(LE32TOH(src[7] | (src[8] << 16)));
+ dst[3] = bgr555_to_rgb565_pair(LE32TOH(src[10] | (src[12] << 16)));
+ dst[4] = bgr555_to_rgb565_pair(LE32TOH(src[13] | (src[15] << 16)));
+ }
+}
+
+void bgr888_to_rgb565_512_to_320(void * __restrict__ dst_,
+ const void * __restrict__ src_, int dpixels)
+{
+ const uint8_t * __restrict__ src = src_;
+ uint32_t * __restrict__ dst = dst_;
+
+ for (; dpixels >= 10; dpixels -= 10, src += 16*3, dst += 5) {
+ dst[0] = bgr888_to_rgb565_pair(src + 3*0, 3*2);
+ dst[1] = bgr888_to_rgb565_pair(src + 3*4, 3*5);
+ dst[2] = bgr888_to_rgb565_pair(src + 3*7, 3*8);
+ dst[3] = bgr888_to_rgb565_pair(src + 3*10, 3*12);
+ dst[4] = bgr888_to_rgb565_pair(src + 3*13, 3*15);
+ }
+}
+
+void bgr888_to_rgb888_512_to_320(void * __restrict__ dst_,
+ const void * __restrict__ src_, int dpixels)
+{
+ const uint8_t * __restrict__ src = src_;
+ uint8_t * __restrict__ dst = dst_;
+
+ for (; dpixels >= 5; dpixels -= 5, src += 8*3, dst += 5*3) {
+ bgr888_to_rgb888_one(dst + 3*0, src + 3*0);
+ bgr888_to_rgb888_one(dst + 3*1, src + 3*2);
+ bgr888_to_rgb888_one(dst + 3*2, src + 3*4);
+ bgr888_to_rgb888_one(dst + 3*3, src + 3*5);
+ bgr888_to_rgb888_one(dst + 3*4, src + 3*7);
+ }
+}
+
+void bgr555_to_xrgb8888_512_to_320(void * __restrict__ dst_,
+ const void * __restrict__ src_, int dpixels)
+{
+ const uint16_t * __restrict__ src = src_;
+ uint32_t * __restrict__ dst = dst_;
+
+ // 8 -> 5
+ for (; dpixels >= 5; dpixels -= 5, src += 8, dst += 5) {
+ dst[0] = bgr555_to_xrgb8888_one(src[0]);
+ dst[1] = bgr555_to_xrgb8888_one(src[2]);
+ dst[2] = bgr555_to_xrgb8888_one(src[4]);
+ dst[3] = bgr555_to_xrgb8888_one(src[5]);
+ dst[4] = bgr555_to_xrgb8888_one(src[7]);
+ }
+}
+
+void bgr888_to_xrgb8888_512_to_320(void * __restrict__ dst_,
+ const void * __restrict__ src_, int dpixels)
+{
+ const uint8_t * __restrict__ src = src_;
+ uint32_t * __restrict__ dst = dst_;
+
+ for (; dpixels >= 5; dpixels -= 5, src += 8*3, dst += 5) {
+ dst[0] = bgr888_to_xrgb8888_one(src + 0*3);
+ dst[1] = bgr888_to_xrgb8888_one(src + 2*3);
+ dst[2] = bgr888_to_xrgb8888_one(src + 4*3);
+ dst[3] = bgr888_to_xrgb8888_one(src + 5*3);
+ dst[4] = bgr888_to_xrgb8888_one(src + 7*3);
+ }
+}
+
/* YUV stuff */
static int yuv_ry[32], yuv_gy[32], yuv_by[32];
static unsigned char yuv_u[32 * 2], yuv_v[32 * 2];
{
#endif
-void bgr555_to_rgb565(void *dst, const void *src, int bytes);
-void bgr888_to_rgb888(void *dst, const void *src, int bytes);
-void bgr888_to_rgb565(void *dst, const void *src, int bytes);
-void rgb888_to_rgb565(void *dst, const void *src, int bytes);
+void bgr555_to_rgb565(void *dst, const void *src, int pixels);
+void bgr888_to_rgb888(void *dst, const void *src, int pixels);
+void bgr888_to_rgb565(void *dst, const void *src, int pixels);
+void rgb888_to_rgb565(void *dst, const void *src, int pixels);
-void bgr555_to_rgb565_b(void *dst, const void *src, int bytes,
+void bgr555_to_rgb565_b(void *dst, const void *src, int pixels,
int brightness2k); // 0-0x0800
-void bgr555_to_xrgb8888(void *dst, const void *src, int bytes);
-void bgr888_to_xrgb8888(void *dst, const void *src, int bytes);
+void bgr555_to_xrgb8888(void *dst, const void *src, int pixels);
+void bgr888_to_xrgb8888(void *dst, const void *src, int pixels);
+
+void bgr555_to_rgb565_640_to_320(void *dst, const void *src, int dst_pixels);
+void bgr888_to_rgb565_640_to_320(void *dst, const void *src, int dst_pixels);
+void bgr888_to_rgb888_640_to_320(void *dst, const void *src, int dst_pixels);
+void bgr555_to_xrgb8888_640_to_320(void *dst, const void *src, int dst_pixels);
+void bgr888_to_xrgb8888_640_to_320(void *dst, const void *src, int dst_pixels);
+void bgr555_to_rgb565_512_to_320(void *dst, const void *src, int dst_pixels);
+void bgr888_to_rgb565_512_to_320(void *dst, const void *src, int dst_pixels);
+void bgr888_to_rgb888_512_to_320(void *dst, const void *src, int dst_pixels);
+void bgr555_to_xrgb8888_512_to_320(void *dst, const void *src, int dst_pixels);
+void bgr888_to_xrgb8888_512_to_320(void *dst, const void *src, int dst_pixels);
void bgr_to_uyvy_init(void);
void rgb565_to_uyvy(void *d, const void *s, int pixels);
#endif
.endm
-FUNCTION(bgr555_to_rgb565): @ void *dst, const void *src, int bytes
+FUNCTION(bgr555_to_rgb565): @ void *dst, const void *src, int pixels
push {r4-r11,lr}
+ mov r2, r2, lsl #1
mov lr, #0x001f
subs r2, #4*8
orr lr, lr, lsl #16
orr r7, r7, r8 @ r3g3b3
pkhbt r7, r6, r7, lsl #16
str r7, [r0], #4
- subs r2, r2, #12
+ subs r2, r2, #4
bgt 0b
pop {r4-r10,pc}
.text
.align 2
-FUNCTION(bgr555_to_rgb565): @ dst, src, bytes
- pld [r1]
+FUNCTION(bgr555_to_rgb565): @ dst, src, pixels
+ pld [r1, #2048]
mov r3, #0x07c0
vdup.16 q15, r3
tst r0, #8
beq 0f
@ align the dst
vld1.16 {d0}, [r1]!
- sub r2, r2, #8
+ sub r2, r2, #4
vshl.u16 d0, d0, #1
vshl.u16 d1, d0, #10
vsri.u16 d1, d0, #11
vbit d1, d0, d30
vst1.16 {d1}, [r0]!
0:
- subs r2, r2, #64
+ subs r2, r2, #32
blt btr16_end64
0:
pld [r1, #64*2]
vbit q10, q2, q15
vbit q11, q3, q15
vstmia r0!, {q8-q11}
- subs r2, r2, #64
+ subs r2, r2, #32
bge 0b
btr16_end64:
- adds r2, r2, #64
+ adds r2, r2, #32
bxeq lr
- subs r2, r2, #16
+ subs r2, r2, #8
blt btr16_end16
@ handle the remainder (reasonably rare)
vshl.u16 q1, q0, #10
vsri.u16 q1, q0, #11
vbit q1, q0, q15
- subs r2, r2, #16
+ subs r2, r2, #8
vst1.16 {q1}, [r0]!
bge 0b
btr16_end16:
- adds r2, r2, #16
+ adds r2, r2, #8
bxeq lr
- subs r2, r2, #8
+ subs r2, r2, #4
bxlt lr
@ very rare
@ note: may overflow source
-FUNCTION(bgr555_to_rgb565_b): @ dst, src, bytes, int brightness2k // 0-0x0800
- pld [r1]
+FUNCTION(bgr555_to_rgb565_b): @ dst, src, pixels, int brightness2k // 0-0x0800
+ pld [r1, #2048]
vdup.16 q15, r3
vpush {q4-q7}
mov r3, #0x1f
vsri.u16 q6, q10, #11
vsri.u16 q7, q11, #11
- subs r2, r2, #64
+ subs r2, r2, #32
ble 1f
vstmia r0!, {q4-q7}
b 0b
vstmia r0!, {q4-q7}
b btr16b_end
0:
- subs r2, r2, #8
+ subs r2, r2, #4
blt btr16b_end
vst1.16 {q4}, [r0]!
- subs r2, r2, #8
+ subs r2, r2, #4
blt btr16b_end
vst1.16 {q5}, [r0]!
- subs r2, r2, #8
+ subs r2, r2, #4
blt btr16b_end
vst1.16 {q6}, [r0]!
- subs r2, r2, #8
+ subs r2, r2, #4
blt btr16b_end
vst1.16 {q7}, [r0]!
bx lr
-FUNCTION(bgr888_to_rgb888): @ dst, src, bytes
- pld [r1]
- @ r2 /= 48
+FUNCTION(bgr888_to_rgb888): @ dst, src, pixels
+ pld [r1, #2048]
+ @ r2 /= 16
mov r2, r2, lsr #4
- movw r3, #0x5556
- movt r3, #0x5555
- umull r12,r2, r3, r2
0:
pld [r1, #48*3]
vld3.8 {d0-d2}, [r1]!
vst3.8 {d0-d2}, [r0, :64]!
vst3.8 {d3-d5}, [r0, :64]!
subs r2, r2, #1
- bne 0b
+ bgt 0b
+ nop
bx lr
-FUNCTION(bgr888_to_rgb565): @ dst, src, bytes
- pld [r1]
- @ r2 /= 48
+FUNCTION(bgr888_to_rgb565): @ dst, src, pixels
+ pld [r1, #2048]
+ @ r2 /= 16
mov r2, r2, lsr #4
- movw r3, #0x5556
- movt r3, #0x5555
- umull r12,r2, r3, r2
-
mov r3, #0x07e0
vdup.16 q15, r3
0:
vstmia r0!, {d0,d1}
vstmia r0!, {d4,d5}
subs r2, r2, #1
- bne 0b
+ bgt 0b
+ nop
bx lr
-FUNCTION(rgb888_to_rgb565): @ dst, src, bytes
- pld [r1]
- @ r2 /= 48
+FUNCTION(rgb888_to_rgb565): @ dst, src, pixels
+ pld [r1, #2048]
+ @ r2 /= 16
mov r2, r2, lsr #4
- movw r3, #0x5556
- movt r3, #0x5555
- umull r12,r2, r3, r2
-
mov r3, #0x07e0
vdup.16 q15, r3
0:
vstmia r0!, {d2,d3}
vstmia r0!, {d6,d7}
subs r2, r2, #1
- bne 0b
+ bgt 0b
+ nop
bx lr
#include "3ds/3ds_utils.h"
#endif
+#ifndef min
+#define min(a, b) ((b) < (a) ? (b) : (a))
+#endif
#ifndef MAP_FAILED
#define MAP_FAILED ((void *)(intptr_t)-1)
#endif
static unsigned retro_audio_latency = 0;
static int update_audio_latency = false;
-static unsigned int current_width;
-static unsigned int current_height;
static enum retro_pixel_format current_fmt;
static int plugins_opened;
}
}
-static void bgr_to_fb_empty(void *dst, const void *src, int bytes)
+static void bgr_to_fb_empty(void *dst, const void *src, int dst_pixels)
{
}
-typedef void (bgr_to_fb_func)(void *dst, const void *src, int bytes);
-static bgr_to_fb_func *g_bgr_to_fb = bgr_to_fb_empty;
+typedef void (bgr_to_fb_func)(void *dst, const void *src, int dst_pixels);
+
+static const struct cspace_func_type {
+ void (*blit)(void *dst, const void *src, int dst_pixels);
+ void (*blit_dscale640)(void *dst, const void *src, int dst_pixels);
+ void (*blit_dscale512)(void *dst, const void *src, int dst_pixels);
+} cspace_funcs[] = {
+ { bgr555_to_rgb565, bgr555_to_rgb565_640_to_320, bgr555_to_rgb565_512_to_320 },
+ { bgr888_to_rgb565, bgr888_to_rgb565_640_to_320, bgr888_to_rgb565_512_to_320 },
+ { bgr555_to_xrgb8888, bgr555_to_xrgb8888_640_to_320, bgr555_to_xrgb8888_512_to_320 },
+ { bgr888_to_xrgb8888, bgr888_to_xrgb8888_640_to_320, bgr888_to_xrgb8888_512_to_320 },
+};
static void set_bgr_to_fb_func(int bgr24)
{
+ int func_id = bgr24;
switch (current_fmt)
{
- case RETRO_PIXEL_FORMAT_XRGB8888:
- g_bgr_to_fb = bgr24 ? bgr888_to_xrgb8888 : bgr555_to_xrgb8888;
- break;
case RETRO_PIXEL_FORMAT_RGB565:
- g_bgr_to_fb = bgr24 ? bgr888_to_rgb565 : bgr555_to_rgb565;
+ break;
+ case RETRO_PIXEL_FORMAT_XRGB8888:
+ func_id += 2;
break;
default:
LogErr("unsupported current_fmt: %d\n", current_fmt);
- g_bgr_to_fb = bgr_to_fb_empty;
- break;
+ pl_rearmed_cbs.cspace_blit = bgr_to_fb_empty;
+ return;
+ }
+ pl_rearmed_cbs.cspace_blit = cspace_funcs[func_id].blit;
+ if (vout_width == 320) {
+ if (psx_w >= 640-4)
+ pl_rearmed_cbs.cspace_blit = cspace_funcs[func_id].blit_dscale640;
+ else if (psx_w >= 512-4)
+ pl_rearmed_cbs.cspace_blit = cspace_funcs[func_id].blit_dscale512;
}
}
static void vout_set_mode(int w, int h, int raw_w, int raw_h, int bpp)
{
+ static unsigned int current_width;
+ static unsigned int current_height;
vout_width = w;
vout_height = h;
psx_w = raw_w;
psx_h = raw_h;
+ if (pl_rearmed_cbs.scale_hires) {
+ if (raw_w >= 512-4 && w > 320)
+ vout_width = 320;
+ if (h > 256)
+ vout_height = h / 2;
+ }
+
/* it may seem like we could do RETRO_ENVIRONMENT_SET_PIXEL_FORMAT here to
* switch to something that can accommodate bgr24 for FMVs, but although it
* succeeds it doesn't actually change the format at least on Linux, and the
int x, int y, int w, int h, int dims_changed)
{
int bytes_pp = (current_fmt == RETRO_PIXEL_FORMAT_XRGB8888) ? 4 : 2;
+ bgr_to_fb_func *bgr_to_fb = pl_rearmed_cbs.cspace_blit;
int bytes_pp_s = bgr24 ? 3 : 2;
- bgr_to_fb_func *bgr_to_fb = g_bgr_to_fb;
unsigned char *dest = vout_buf_ptr;
const unsigned char *vram = vram_;
- int dstride = vout_pitch_b, h1 = h;
+ int dstride = vout_pitch_b, h1;
int enhres = w > psx_w;
u32 vram_mask = enhres ? ~0 : 0xfffff;
+ int w_blit = min(w, vout_width);
int port = 0, hwrapped;
+ int sstride = 2048;
if (vram == NULL || dims_changed || (in_enable_crosshair[0] + in_enable_crosshair[1]) > 0)
{
goto out;
}
+ if (h >= vout_height * 3 / 2) {
+ sstride = 4096;
+ h /= 2;
+ }
+ h = min(h, vout_height);
dest += x * bytes_pp + y * dstride;
- for (; h1-- > 0; dest += dstride) {
- bgr_to_fb(dest, vram + vram_ofs, w * bytes_pp_s);
- vram_ofs = (vram_ofs + 2048) & vram_mask;
+ for (h1 = h; h1-- > 0; dest += dstride) {
+ bgr_to_fb(dest, vram + vram_ofs, w_blit);
+ vram_ofs = (vram_ofs + sstride) & vram_mask;
}
hwrapped = (vram_ofs & 2047) + w * bytes_pp_s - 2048;
if (!enhres && hwrapped > 0) {
// this is super-rare so just fix-up
- vram_ofs = (vram_ofs - h * 2048) & 0xff800;
+ w_blit = hwrapped / bytes_pp_s;
+ vram_ofs = (vram_ofs - h * sstride) & 0xff800;
dest -= dstride * h;
dest += (w - hwrapped / bytes_pp_s) * bytes_pp;
for (h1 = h; h1-- > 0; dest += dstride) {
- bgr_to_fb(dest, vram + vram_ofs, hwrapped);
- vram_ofs = (vram_ofs + 2048) & 0xfffff;
+ bgr_to_fb(dest, vram + vram_ofs, w_blit);
+ vram_ofs = (vram_ofs + sstride) & 0xfffff;
}
}
.pl_vout_set_mode = vout_set_mode,
.pl_vout_flip = vout_flip,
.pl_vout_close = vout_close,
+ .cspace_blit = bgr_to_fb_empty,
.mmap = pl_mmap,
.munmap = pl_munmap,
.gpu_state_change = gpu_state_change,
"pcsx_rearmed_gpu_unai_skipline",
"pcsx_rearmed_gpu_unai_lighting",
"pcsx_rearmed_gpu_unai_fast_lighting",
- "pcsx_rearmed_gpu_unai_scale_hires",
};
option_display.visible = show_advanced_gpu_unai_settings;
pl_rearmed_cbs.show_overscan = 0;
}
+ var.key = "pcsx_rearmed_scale_hires";
+ var.value = NULL;
+
+ if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value)
+ {
+ if (strcmp(var.value, "disabled") == 0)
+ pl_rearmed_cbs.scale_hires = 0;
+ else if (strcmp(var.value, "enabled") == 0)
+ pl_rearmed_cbs.scale_hires = 1;
+ }
+
#ifdef USE_ASYNC_GPU
var.key = "pcsx_rearmed_gpu_thread_rendering";
var.value = NULL;
/* Note: This used to be an option, but it only works
* (correctly) when running high resolution games
* (480i, 512i) and has been obsoleted by
- * pcsx_rearmed_gpu_unai_scale_hires */
+ * pcsx_rearmed_scale_hires */
pl_rearmed_cbs.gpu_unai.ilace_force = 0;
var.key = "pcsx_rearmed_gpu_unai_old_renderer";
else if (strcmp(var.value, "enabled") == 0)
pl_rearmed_cbs.gpu_unai.blending = 1;
}
-
- var.key = "pcsx_rearmed_gpu_unai_scale_hires";
- var.value = NULL;
-
- if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value)
- {
- if (strcmp(var.value, "disabled") == 0)
- pl_rearmed_cbs.gpu_unai.scale_hires = 0;
- else if (strcmp(var.value, "enabled") == 0)
- pl_rearmed_cbs.gpu_unai.scale_hires = 1;
- }
#endif // GPU_UNAI
var.value = NULL;
},
"disabled",
},
+ {
+ "pcsx_rearmed_scale_hires",
+ "Hi-Res Downscaling",
+ NULL,
+ "When enabled, games that run in high resolution video modes (480i, 512i) will be downscaled to 320x240 by skipping lines and/or columns. May be useful on some devices with native 240p display resolutions that lack efficient hardware scaling.",
+ NULL,
+ "video",
+ {
+ { "disabled", NULL },
+ { "enabled", NULL },
+ { NULL, NULL},
+ },
+#ifdef _MIYOO
+ "enabled",
+#else
+ "disabled",
+#endif
+ },
{
"pcsx_rearmed_gpu_slow_llists",
"(GPU) Slow linked list processing",
},
"disabled",
},
- {
- "pcsx_rearmed_gpu_unai_scale_hires",
- "(GPU) Hi-Res Downscaling",
- "Hi-Res Downscaling",
- "When enabled, games that run in high resolution video modes (480i, 512i) will be downscaled to 320x240. Can improve performance, and is recommended on devices with native 240p display resolutions.",
- NULL,
- "gpu_unai",
- {
- { "disabled", NULL },
- { "enabled", NULL },
- { NULL, NULL},
- },
-#ifdef _MIYOO
- "enabled",
-#else
- "disabled",
-#endif
- },
#endif /* GPU_UNAI */
{
"pcsx_rearmed_spu_reverb",
CE_INTVAL_V(frameskip, 4),
CE_INTVAL_PV(dithering, 2),
CE_INTVAL_P(thread_rendering),
+ CE_INTVAL_P(scale_hires),
CE_INTVAL_P(gpu_peops.dwActFixes),
CE_INTVAL_P(gpu_unai.old_renderer),
CE_INTVAL_P(gpu_unai.ilace_force),
CE_INTVAL_P(gpu_unai.lighting),
CE_INTVAL_P(gpu_unai.fast_lighting),
CE_INTVAL_P(gpu_unai.blending),
- CE_INTVAL_P(gpu_unai.scale_hires),
CE_INTVAL_P(gpu_neon.allow_interlace),
CE_INTVAL_P(gpu_neon.enhancement_enable),
CE_INTVAL_P(gpu_neon.enhancement_no_main),
for (; h > 0; h--, d += g_menuscreen_w, s += 1024) {
if (gpu->ulStatus & 0x200000)
- bgr888_to_rgb565(d, s, w * 3);
+ bgr888_to_rgb565(d, s, w);
else
- bgr555_to_rgb565(d, s, w * 2);
+ bgr555_to_rgb565(d, s, w);
// darken this so that menu text is visible
if (g_menuscreen_w - w < 320)
"using d-pad or move it using R+d-pad";
static const char h_soft_filter[] = "Works only if game uses low resolution modes";
static const char h_gamma[] = "Gamma/brightness adjustment (default 100)";
+static const char h_lowres[] = "Forces all PSX high resolutions to 320x240 or lower\n"
+ "by skipping lines and pixels";
#ifdef HAVE_NEON32
static const char *men_scanlines[] = { "OFF", "1", "2", "3", NULL };
static const char h_scanline_l[] = "Scanline brightness, 0-100%";
mee_range_h ("Gamma adjustment", MA_OPT_GAMMA, g_gamma, 1, 200, h_gamma),
mee_onoff ("OpenGL Vsync", MA_OPT_VSYNC, g_opts, OPT_VSYNC),
mee_cust_h ("Setup custom scaler", MA_OPT_VARSCALER_C, menu_loop_cscaler, NULL, h_cscaler),
+ mee_onoff_h ("Force low resolution", 0, pl_rearmed_cbs.scale_hires, 1, h_lowres),
mee_end,
};
mee_onoff ("Lighting", 0, pl_rearmed_cbs.gpu_unai.lighting, 1),
mee_onoff ("Fast lighting", 0, pl_rearmed_cbs.gpu_unai.fast_lighting, 1),
mee_onoff ("Blending", 0, pl_rearmed_cbs.gpu_unai.blending, 1),
- mee_onoff ("Downscale Hi-Res", 0, pl_rearmed_cbs.gpu_unai.scale_hires, 1),
mee_end,
};
GPU_freeze(1, gpuf);
for (; h > 0; h--, d += g_menuscreen_w, s += 1024)
- bgr555_to_rgb565(d, s, w * 2);
+ bgr555_to_rgb565(d, s, w);
smalltext_out16(4, 1, "build: "__DATE__ " " __TIME__ " " REV, 0xe7fc);
snprintf(buff, sizeof(buff), "GPU sr: %08x", gpuf->ulStatus);
}
else {
for (; h > 0; h--, d += g_menuscreen_w, s += last_vout_w * 3) {
- rgb888_to_rgb565(d, s, w * 3);
+ rgb888_to_rgb565(d, s, w);
menu_darken_bg(d, d, w, 0);
}
}
if (pl_vout_buf != NULL) {
if (psx_bpp == 16)
// have to do rgb conversion for menu bg
- bgr555_to_rgb565(pl_vout_buf, pl_vout_buf, 320*240*2);
+ bgr555_to_rgb565(pl_vout_buf, pl_vout_buf, 320*240);
else
memset(pl_vout_buf, 0, 320*240*2);
}
void *plat_prepare_screenshot(int *w, int *h, int *bpp)
{
- bgr555_to_rgb565(pl_vout_buf, pl_vout_buf, 320*240*2);
+ bgr555_to_rgb565(pl_vout_buf, pl_vout_buf, 320*240);
*w = 320;
*h = 240;
*bpp = psx_bpp;
if (bgr24) {
for (; h > 0; dst += dstride, src += sstride, h--)
- bgr888_to_rgb565(dst, src, w * 3);
+ bgr888_to_rgb565(dst, src, w);
}
else {
for (; h > 0; dst += dstride, src += sstride, h--)
- bgr555_to_rgb565(dst, src, w * 2);
+ bgr555_to_rgb565(dst, src, w);
}
if (SDL_MUSTLOCK(plat_sdl_screen))
if (g_layer_h > fh * 2) g_layer_h = fh * 2;
}
+static const struct cspace_func_type {
+ void (*blit)(void *dst, const void *src, int dst_pixels);
+ void (*blit_dscale640)(void *dst, const void *src, int dst_pixels);
+ void (*blit_dscale512)(void *dst, const void *src, int dst_pixels);
+} cspace_funcs[] = {
+ { bgr555_to_rgb565, bgr555_to_rgb565_640_to_320, bgr555_to_rgb565_512_to_320 },
+ { bgr888_to_rgb888, bgr888_to_rgb888_640_to_320, bgr888_to_rgb888_512_to_320 },
+ { bgr888_to_rgb565, bgr888_to_rgb565_640_to_320, bgr888_to_rgb565_512_to_320 },
+};
+
// XXX: this is platform specific really
static inline int resolution_ok(int w, int h)
{
static void pl_vout_set_mode(int w, int h, int raw_w, int raw_h, int bpp)
{
+ const struct cspace_func_type *cspace_f = cspace_funcs;
int vout_w, vout_h, vout_bpp;
// special h handling, Wipeout likes to change it by 1-6
vout_w = w;
vout_h = h;
vout_bpp = bpp;
- if (pl_rearmed_cbs.only_16bpp)
- vout_bpp = 16;
+ if (bpp > 16) {
+ cspace_f = &cspace_funcs[1];
+ if (pl_rearmed_cbs.only_16bpp) {
+ cspace_f = &cspace_funcs[2];
+ vout_bpp = 16;
+ }
+ }
+ pl_rearmed_cbs.cspace_blit = cspace_f->blit;
+ if (pl_rearmed_cbs.scale_hires) {
+ if (raw_w >= 640-4) {
+ pl_rearmed_cbs.cspace_blit = cspace_f->blit_dscale640;
+ vout_w = 320;
+ }
+ else if (raw_w >= 512-4) {
+ pl_rearmed_cbs.cspace_blit = cspace_f->blit_dscale512;
+ vout_w = 320;
+ }
+ if (vout_h > 256)
+ vout_h /= 2;
+ }
assert(vout_h >= 192);
static void pl_vout_flip(const void *vram_, int vram_ofs, int bgr24,
int x, int y, int w, int h, int dims_changed)
{
+ void (*blit)(void *dst, const void *src, int bytes);
unsigned char *dest = pl_vout_buf;
const unsigned char *vram = vram_;
- int dstride = pl_vout_w, h1 = h;
+ int dstride = pl_vout_w, h1;
+ int sstride = 2048;
int h_full = pl_vout_h;
int enhres = w > psx_w;
int xoffs = 0, doffs;
goto out_hud;
}
- assert(x + w <= pl_vout_w);
- assert(y + h <= pl_vout_h);
-
// offset
xoffs = x * pl_vout_scale_w;
doffs = xoffs + y * pl_vout_scale_h * dstride;
dest += doffs * 2;
+ if (x + w > pl_vout_w)
+ w = pl_vout_w - x;
+ if (h >= pl_vout_h * 3 / 2) {
+ sstride = 4096;
+ h /= 2;
+ }
+ assert(y + h <= pl_vout_h);
+ blit = pl_rearmed_cbs.cspace_blit;
+
if (bgr24)
{
hwrapped = (vram_ofs & 2047) + w * 3 - 2048;
if (pl_rearmed_cbs.only_16bpp) {
- for (; h1-- > 0; dest += dstride * 2) {
- bgr888_to_rgb565(dest, vram + vram_ofs, w * 3);
- vram_ofs = (vram_ofs + 2048) & 0xfffff;
+ for (h1 = h; h1-- > 0; dest += dstride * 2) {
+ blit(dest, vram + vram_ofs, w);
+ vram_ofs = (vram_ofs + sstride) & 0xfffff;
}
if (hwrapped > 0) {
// this is super-rare so just fix-up
- vram_ofs = (vram_ofs - h * 2048) & 0xff800;
+ vram_ofs = (vram_ofs - h * sstride) & 0xff800;
dest -= dstride * 2 * h;
dest += (w - hwrapped / 3) * 2;
for (h1 = h; h1-- > 0; dest += dstride * 2) {
- bgr888_to_rgb565(dest, vram + vram_ofs, hwrapped);
- vram_ofs = (vram_ofs + 2048) & 0xfffff;
+ blit(dest, vram + vram_ofs, hwrapped / 2);
+ vram_ofs = (vram_ofs + sstride) & 0xfffff;
}
}
}
dest -= doffs * 2;
dest += (doffs / 8) * 24;
- for (; h1-- > 0; dest += dstride * 3) {
- bgr888_to_rgb888(dest, vram + vram_ofs, w * 3);
- vram_ofs = (vram_ofs + 2048) & 0xfffff;
+ for (h1 = h; h1-- > 0; dest += dstride * 3) {
+ blit(dest, vram + vram_ofs, w);
+ vram_ofs = (vram_ofs + sstride) & 0xfffff;
}
if (hwrapped > 0) {
- vram_ofs = (vram_ofs - h * 2048) & 0xff800;
+ vram_ofs = (vram_ofs - h * sstride) & 0xff800;
dest -= dstride * 3 * h;
dest += w * 3 - hwrapped;
for (h1 = h; h1-- > 0; dest += dstride * 3) {
- bgr888_to_rgb888(dest, vram + vram_ofs, hwrapped);
- vram_ofs = (vram_ofs + 2048) & 0xfffff;
+ blit(dest, vram + vram_ofs, hwrapped / 3);
+ vram_ofs = (vram_ofs + sstride) & 0xfffff;
}
}
}
else if (scanlines != 0 && scanline_level != 100)
{
int h2, l = scanline_level * 2048 / 100;
- int stride_0 = pl_vout_scale_h >= 2 ? 0 : 2048;
+ int stride_0 = pl_vout_scale_h >= 2 ? 0 : sstride;
- h1 *= pl_vout_scale_h;
+ h1 = h * pl_vout_scale_h;
while (h1 > 0)
{
for (h2 = scanlines; h2 > 0 && h1 > 0; h2--, h1--) {
- bgr555_to_rgb565(dest, vram + vram_ofs, w * 2);
+ bgr555_to_rgb565(dest, vram + vram_ofs, w);
vram_ofs = (vram_ofs + stride_0) & 0xfffff;
dest += dstride * 2;
}
for (h2 = scanlines; h2 > 0 && h1 > 0; h2--, h1--) {
- bgr555_to_rgb565_b(dest, vram + vram_ofs, w * 2, l);
- vram_ofs = (vram_ofs + 2048) & 0xfffff;
+ bgr555_to_rgb565_b(dest, vram + vram_ofs, w, l);
+ vram_ofs = (vram_ofs + sstride) & 0xfffff;
dest += dstride * 2;
}
}
else
{
unsigned int vram_mask = enhres ? ~0 : 0xfffff;
- for (; h1-- > 0; dest += dstride * 2) {
- bgr555_to_rgb565(dest, vram + vram_ofs, w * 2);
- vram_ofs = (vram_ofs + 2048) & vram_mask;
+ for (h1 = h; h1-- > 0; dest += dstride * 2) {
+ blit(dest, vram + vram_ofs, w);
+ vram_ofs = (vram_ofs + sstride) & vram_mask;
}
hwrapped = (vram_ofs & 2047) + w * 2 - 2048;
if (!enhres && hwrapped > 0) {
- vram_ofs = (vram_ofs - h * 2048) & 0xff800;
+ vram_ofs = (vram_ofs - h * sstride) & 0xff800;
dest -= dstride * 2 * h;
dest += w * 2 - hwrapped;
for (h1 = h; h1-- > 0; dest += dstride * 2) {
- bgr555_to_rgb565(dest, vram + vram_ofs, hwrapped);
- vram_ofs = (vram_ofs + 2048) & 0xfffff;
+ blit(dest, vram + vram_ofs, hwrapped / 2);
+ vram_ofs = (vram_ofs + sstride) & 0xfffff;
}
}
}
pl_vout_flip,
pl_vout_close,
+ .cspace_blit = bgr555_to_rgb565,
.mmap = pl_mmap,
.munmap = pl_munmap,
.pl_set_gpu_caps = pl_set_gpu_caps,
void (*pl_vout_flip)(const void *vram, int vram_offset, int bgr24,
int x, int y, int w, int h, int dims_changed);
void (*pl_vout_close)(void);
+ void (*cspace_blit)(void *dst, const void *src, int bytes);
void *(*mmap)(unsigned int size);
void (*munmap)(void *ptr, unsigned int size);
// only used by some frontends
unsigned int only_16bpp; // platform is 16bpp-only
unsigned int thread_rendering;
unsigned int dithering; // 0 off, 1 on, 2 force
+ unsigned int scale_hires;
struct {
int allow_interlace; // 0 off, 1 on, 2 guess
int enhancement_enable;
int lighting;
int fast_lighting;
int blending;
- int scale_hires;
} gpu_unai;
struct {
int dwActFixes;
// Normally 0. Value '1' will skip rendering
// odd lines.
- uint8_t scale_hires:1; // If 1, will scale hi-res output to
- // 320x240 when gpulib reads the frame.
- // Implies pixel_skip and ilace_force
- // (when height > 240).
uint8_t lighting:1;
uint8_t fast_lighting:1;
uint8_t blending:1;
#endif
}
-// For now, 320x240 output resolution is assumed, using simple line-skipping
-// and pixel-skipping downscaler.
-// TODO: Flesh these out so they return useful values based on whether
-// running on higher-res device or a resampling downscaler is enabled.
-static inline bool PixelSkipEnabled()
-{
- return gpu_unai.config.pixel_skip || gpu_unai.config.scale_hires;
-}
-
-static inline bool LineSkipEnabled()
-{
- return true;
-}
-
#endif // GPU_UNAI_H
#define IS_OLD_RENDERER() false
#endif
-#define DOWNSCALE_VRAM_SIZE (1024 * 512 * 2 * 2 + 4096)
-
-INLINE void scale_640_to_320(le16_t *dest, const le16_t *src, bool isRGB24) {
- size_t uCount = 320;
-
- if(isRGB24) {
- const uint8_t* src8 = (const uint8_t *)src;
- uint8_t* dst8 = (uint8_t *)dest;
-
- do {
- *dst8++ = *src8++;
- *dst8++ = *src8++;
- *dst8++ = *src8;
- src8 += 4;
- } while(--uCount);
- } else {
- const le16_t* src16 = src;
- le16_t* dst16 = dest;
-
- do {
- *dst16++ = *src16;
- src16 += 2;
- } while(--uCount);
- }
-}
-
-INLINE void scale_512_to_320(le16_t *dest, const le16_t *src, bool isRGB24) {
- size_t uCount = 64;
-
- if(isRGB24) {
- const uint8_t* src8 = (const uint8_t *)src;
- uint8_t* dst8 = (uint8_t *)dest;
-
- do {
- *dst8++ = *src8++;
- *dst8++ = *src8++;
- *dst8++ = *src8++;
- *dst8++ = *src8++;
- *dst8++ = *src8++;
- *dst8++ = *src8;
- src8 += 4;
- *dst8++ = *src8++;
- *dst8++ = *src8++;
- *dst8++ = *src8++;
- *dst8++ = *src8++;
- *dst8++ = *src8++;
- *dst8++ = *src8;
- src8 += 4;
- *dst8++ = *src8++;
- *dst8++ = *src8++;
- *dst8++ = *src8;
- src8 += 4;
- } while(--uCount);
- } else {
- const le16_t* src16 = src;
- le16_t* dst16 = dest;
-
- do {
- *dst16++ = *src16++;
- *dst16++ = *src16;
- src16 += 2;
- *dst16++ = *src16++;
- *dst16++ = *src16;
- src16 += 2;
- *dst16++ = *src16;
- src16 += 2;
- } while(--uCount);
- }
-}
-
-static uint16_t *get_downscale_buffer(int *x, int *y, int *w, int *h, int *vram_h)
-{
- le16_t *dest = gpu_unai.downscale_vram;
- const le16_t *src = gpu_unai.vram;
- bool isRGB24 = (gpu_unai.GPU_GP1 & 0x00200000 ? true : false);
- int stride = 1024, dstride = 1024, lines = *h, orig_w = *w;
-
- // PS1 fb read wraps around (fixes black screen in 'Tobal no. 1')
- unsigned int fb_mask = 1024 * 512 - 1;
-
- if (*h > 240) {
- *h /= 2;
- stride *= 2;
- lines = *h;
-
- // Ensure start at a non-skipped line
- while (*y & gpu_unai.inn.ilace_mask) ++*y;
- }
-
- unsigned int fb_offset_src = (*y * dstride + *x) & fb_mask;
- unsigned int fb_offset_dest = fb_offset_src;
-
- if (*w == 512 || *w == 640) {
- *w = 320;
- }
-
- switch(orig_w) {
- case 640:
- do {
- scale_640_to_320(dest + fb_offset_dest, src + fb_offset_src, isRGB24);
- fb_offset_src = (fb_offset_src + stride) & fb_mask;
- fb_offset_dest = (fb_offset_dest + dstride) & fb_mask;
- } while(--lines);
-
- break;
- case 512:
- do {
- scale_512_to_320(dest + fb_offset_dest, src + fb_offset_src, isRGB24);
- fb_offset_src = (fb_offset_src + stride) & fb_mask;
- fb_offset_dest = (fb_offset_dest + dstride) & fb_mask;
- } while(--lines);
- break;
- default:
- size_t size = isRGB24 ? *w * 3 : *w * 2;
-
- do {
- memcpy(dest + fb_offset_dest, src + fb_offset_src, size);
- fb_offset_src = (fb_offset_src + stride) & fb_mask;
- fb_offset_dest = (fb_offset_dest + dstride) & fb_mask;
- } while(--lines);
- break;
- }
-
- return (uint16_t *)gpu_unai.downscale_vram;
-}
-
-static void map_downscale_buffer(void)
-{
- if (gpu_unai.downscale_vram)
- return;
-
- gpu_unai.downscale_vram = (le16_t*)gpu.mmap(DOWNSCALE_VRAM_SIZE);
-
- if (gpu_unai.downscale_vram == NULL || gpu_unai.downscale_vram == (le16_t *)(intptr_t)-1) {
- SysPrintf("failed to map downscale buffer\n");
- gpu_unai.downscale_vram = NULL;
- gpu.get_downscale_buffer = NULL;
- }
- else {
- gpu.get_downscale_buffer = get_downscale_buffer;
- }
-}
-
-static void unmap_downscale_buffer(void)
-{
- if (gpu_unai.downscale_vram == NULL)
- return;
-
- gpu.munmap(gpu_unai.downscale_vram, DOWNSCALE_VRAM_SIZE);
- gpu_unai.downscale_vram = NULL;
- gpu.get_downscale_buffer = NULL;
-}
-
int renderer_init(void)
{
memset((void*)&gpu_unai, 0, sizeof(gpu_unai));
SetupLightLUT();
SetupDitheringConstants();
- if (gpu_unai.config.scale_hires) {
- map_downscale_buffer();
- }
-
return 0;
}
void renderer_finish(void)
{
- unmap_downscale_buffer();
}
void renderer_notify_screen_change(const struct psx_gpu_screen *screen)
{
gpu_unai.inn.ilace_mask = gpu_unai.config.ilace_force;
-#ifndef HAVE_PRE_ARMV7 /* XXX */
- if (gpu_unai.config.scale_hires)
-#endif
+ if (gpu.state.downscale_enable)
{
gpu_unai.inn.ilace_mask |= !!(gpu.status & PSX_GPU_STATUS_INTERLACE);
}
gpu_unai.config.lighting = cbs->gpu_unai.lighting;
gpu_unai.config.fast_lighting = cbs->gpu_unai.fast_lighting;
gpu_unai.config.blending = cbs->gpu_unai.blending;
- gpu_unai.config.scale_hires = cbs->gpu_unai.scale_hires;
gpu_unai.config.dithering = cbs->dithering != 0;
gpu_unai.config.force_dithering = cbs->dithering >> 1;
- gpu.state.downscale_enable = gpu_unai.config.scale_hires;
- if (gpu_unai.config.scale_hires) {
- map_downscale_buffer();
- } else {
- unmap_downscale_buffer();
- }
+ renderer_notify_screen_change(&gpu.screen);
oldunai_renderer_set_config(cbs);
}
gpu.state.frame_count = (uint32_t *)cbs->gpu_frame_count;
gpu.state.allow_interlace = cbs->gpu_neon.allow_interlace;
gpu.state.enhancement_enable = cbs->gpu_neon.enhancement_enable;
+ gpu.state.downscale_enable = cbs->scale_hires;
gpu.state.screen_centering_type_default = cbs->screen_centering_type_default;
if (gpu.state.screen_centering_type != cbs->screen_centering_type
|| gpu.state.screen_centering_x != cbs->screen_centering_x
uint32_t enhancement_active:1;
uint32_t enhancement_was_active:1;
uint32_t downscale_enable:1;
- uint32_t downscale_active:1;
uint32_t dims_changed:1;
uint32_t show_overscan:2;
uint32_t *frame_count;
{
union cmd_screen_change cmd;
- if (!gpu->async || !gpu->state.enhancement_active) // gpu_neon only
+ if (!gpu->async)
return;
cmd.cmd = HTOLE32(FAKECMD_SCREEN_CHANGE << 24);
cmd.x = gpu->screen.x;
bpp = 24;
}
- gpu.state.downscale_active =
- gpu.get_downscale_buffer != NULL && gpu.state.downscale_enable
- && (w >= 512 || h >= 256);
-
- if (gpu.state.downscale_active) {
- w_out = w < 512 ? w : 320;
- h_out = h < 256 ? h : h / 2;
- }
-
// width|rgb24 change?
if (force || (gpu.status ^ gpu.state.status_vo_old) & ((7<<16)|(1<<21))
|| w_out != gpu.state.w_out_old || h_out != gpu.state.h_out_old)
src_x2 *= 2;
}
- if (gpu.state.downscale_active)
- vram = (void *)gpu.get_downscale_buffer(&src_x, &src_y, &w, &h, &vram_h);
-
if (src_y + h > vram_h) {
if (src_y + h - vram_h > h / 2) {
// wrap
h = vram_h - src_y;
}
+ // gpu_unai skips drawing odd lines
+ if (h > 256 && gpu.state.downscale_enable && (src_y & 1))
+ src_y++;
+
offset = (src_y * 1024 + src_x) * 2;
offset += src_x2 * bpp / 8;