sdl, complete overhaul of hardware/software scaling

author kub <derkub@gmail.com>

Fri, 10 Sep 2021 16:46:05 +0000 (18:46 +0200)

committer kub <derkub@gmail.com>

Fri, 10 Sep 2021 16:54:05 +0000 (18:54 +0200)
author kub <derkub@gmail.com>
Fri, 10 Sep 2021 16:46:05 +0000 (18:46 +0200)
committer kub <derkub@gmail.com>
Fri, 10 Sep 2021 16:54:05 +0000 (18:54 +0200)
diff --git a/Makefile b/Makefile

index b0cc83e..56dba33 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -210,7 +210,7 @@ endif
  ifeq "$(USE_FRONTEND)" "1"
  
  # common
-OBJS += platform/common/main.o platform/common/emu.o \
+OBJS += platform/common/main.o platform/common/emu.o platform/common/upscale.o \
         platform/common/menu_pico.o platform/common/config_file.o
  
  # libpicofe
diff --git a/pico/32x/draw.c b/pico/32x/draw.c

index cc98a69..5143af8 100644 (file)
--- a/pico/32x/draw.c
+++ b/pico/32x/draw.c
@@ -122,7 +122,7 @@ void FinalizeLine32xRGB555(int sh, int line, struct PicoEState *est)
    FinalizeLine555(sh, line, est);
  
    if ((Pico32x.vdp_regs[0] & P32XV_Mx) == 0 || // 32x blanking
-      // XXX: how is 32col mode hadled by real hardware?
+      // XXX: how is 32col mode handled by real hardware?
        !(Pico.video.reg[12] & 1) || // 32col mode
        (Pico.video.debug_p & PVD_KILL_32X))
    {
@@ -162,6 +162,7 @@ void FinalizeLine32xRGB555(int sh, int line, struct PicoEState *est)
  
  #define PICOSCAN_POST \
    PicoScan32xEnd(l + (lines_sft_offs & 0xff)); \
+  Pico.est.DrawLineDest = (char *)Pico.est.DrawLineDest + DrawLineDestIncrement32x; \
  
  #define make_do_loop(name, pre_code, post_code, md_code)        \
  /* Direct Color Mode */                                         \
@@ -260,6 +261,7 @@ void PicoDraw32xLayer(int offs, int lines, int md_bg)
    int which_func;
  
    Pico.est.DrawLineDest = (char *)DrawLineDestBase32x + offs * DrawLineDestIncrement32x;
+  Pico.est.DrawLineDestIncr = DrawLineDestIncrement32x;
    dram = Pico32xMem->dram[Pico32x.vdp_regs[0x0a/2] & P32XV_FS];
  
    if (Pico32xDrawMode == PDM32X_BOTH)
@@ -330,7 +332,7 @@ void PicoDraw32xLayerMdOnly(int offs, int lines)
        dst[p + 2] = pal[*pmd++];
        dst[p + 3] = pal[*pmd++];
      }
-    dst = (void *)((char *)dst + DrawLineDestIncrement32x);
+    dst = Pico.est.DrawLineDest = (char *)dst + DrawLineDestIncrement32x;
      pmd += 328 - plen;
      if (have_scan)
        PicoScan32xEnd(l + offs);
diff --git a/pico/32x/draw_arm.S b/pico/32x/draw_arm.S

index cdcb211..8d6ee73 100644 (file)
--- a/pico/32x/draw_arm.S
+++ b/pico/32x/draw_arm.S
@@ -20,11 +20,11 @@
  
      PIC_LDR_INIT()
  
-.macro call_scan_prep cond est   @ &Pico.est
+.macro call_scan_prep cond pico   @ &Pico
  .if \cond
      PIC_LDR(r4, r6, PicoScan32xBegin)
      PIC_LDR(r5, r6, PicoScan32xEnd)
-    ldr     r6, [\est, #OFS_EST_DrawLineDest]
+    add     r6, \pico, #OFS_Pico_est
      ldr     r4, [r4]
      ldr     r5, [r5]
      stmfd   sp!, {r4,r5,r6}
@@ -44,9 +44,9 @@
      add     r0, r0, r4
      mov     lr, pc
      ldr     pc, [sp, #(3+0)*4]
-    ldr     r0, [sp, #(3+2)*4] @ &DrawLineDest
+    ldr     r0, [sp, #(3+2)*4] @ &Pico.est
      ldmfd   sp!, {r1-r3}
-    ldr     r0, [r0]
+    ldr     r0, [r0, #OFS_EST_DrawLineDest]
  .endif
  .endm
  
@@ -57,6 +57,11 @@
      add     r0, r0, r4
      mov     lr, pc
      ldr     pc, [sp, #(4+1)*4]
+    ldr     r1, [sp, #(3+2)*4] @ &Pico.est
+    ldr     r0, [r1, #OFS_EST_DrawLineDest]
+    ldr     r2, [r1, #OFS_EST_DrawLineDestIncr]
+    add     r0, r0, r2
+    str     r0, [r1]
      ldmfd   sp!, {r0-r3}
  .endif
  .endm
@@ -71,6 +76,7 @@
      PIC_LDR(lr, r9, Pico)
      PIC_LDR(r10,r9, Pico32x)
      ldr     r11, [lr, #OFS_Pico_est+OFS_EST_Draw2FB]
+    ldr     r12, [lr, #OFS_Pico_est+OFS_EST_DrawLineDestIncr]
      ldrh    r10,[r10, #0x40] @ Pico32x.vdp_regs[0]
      add     r9, lr, #OFS_Pico_est+OFS_EST_HighPal   @ palmd
  
@@ -88,6 +94,9 @@
  
  0: @ loop_outer:
      call_scan_end \call_scan
+    ldr     r12, [r9, #OFS_EST_DrawLineDestIncr-OFS_EST_HighPal]
+    sub     r0, r0, #320*2
+    add     r0, r0, r12
      add     r4, r4, #1
      cmp     r4, r2, lsr #16
      call_scan_fin_ge \call_scan
@@ -231,7 +240,7 @@
  
      PIC_LDR(lr, r9, Pico)
      PIC_LDR(r10,r9, Pico32xMem)
-    ldr     r9,=OFS_PMEM32x_pal_native
+    ldr     r9, =OFS_PMEM32x_pal_native
      ldr     r10, [r10]
      ldr     r11, [lr, #OFS_Pico_est+OFS_EST_Draw2FB]
      add     r10,r10,r9
@@ -248,6 +257,9 @@
  
  0: @ loop_outer:
      call_scan_end \call_scan
+    ldr     r12, [r9, #OFS_EST_DrawLineDestIncr-OFS_EST_HighPal]
+    sub     r0, r0, #320*2
+    add     r0, r0, r12
      add     r4, r4, #1
      cmp     r4, r2, lsr #16
      call_scan_fin_ge \call_scan
@@ -400,7 +412,7 @@
  
      PIC_LDR(lr, r9, Pico)
      PIC_LDR(r10,r9, Pico32xMem)
-    ldr     r9,=OFS_PMEM32x_pal_native
+    ldr     r9, =OFS_PMEM32x_pal_native
      ldr     r10, [r10]
      ldr     r11, [lr, #OFS_Pico_est+OFS_EST_Draw2FB]
      add     r10,r10,r9
@@ -417,6 +429,9 @@
  
  0: @ loop_outer:
      call_scan_end \call_scan
+    ldr     r12, [r9, #OFS_EST_DrawLineDestIncr-OFS_EST_HighPal]
+    sub     r0, r0, #320*2
+    add     r0, r0, r12
      add     r4, r4, #1
      cmp     r4, r2, lsr #16
      call_scan_fin_ge \call_scan
diff --git a/pico/draw.c b/pico/draw.c

index 7e279db..54e4851 100644 (file)
--- a/pico/draw.c
+++ b/pico/draw.c
@@ -44,6 +44,8 @@
   */\r
  \r
  #include "pico_int.h"\r
+#include <platform/common/upscale.h>\r
+\r
  #define FORCE  // layer forcing via debug register?\r
  \r
  int (*PicoScanBegin)(unsigned int num) = NULL;\r
@@ -1407,7 +1409,7 @@ static NOINLINE void PrepareSprites(int max_lines)
  \r
    if (!(Pico.video.reg[12]&1))\r
      max_sprites = 64, max_line_sprites = 16, max_width = 264;\r
-  if (PicoIn.opt & POPT_DIS_SPRITE_LIM)\r
+  if (*est->PicoOpt & POPT_DIS_SPRITE_LIM)\r
      max_line_sprites = MAX_LINE_SPRITES;\r
  \r
    sh = Pico.video.reg[0xC]&8; // shadow/hilight?\r
@@ -1643,20 +1645,20 @@ void FinalizeLine555(int sh, int line, struct PicoEState *est)
    if (Pico.video.reg[12]&1) {\r
      len = 320;\r
    } else {\r
-    if (!(PicoIn.opt&POPT_DIS_32C_BORDER)) pd+=32;\r
      len = 256;\r
    }\r
  \r
-  {\r
-#if 1\r
-    int i;\r
-\r
-    for (i = len; i > 0; i-=4) {\r
-      *pd++ = pal[*ps++];\r
-      *pd++ = pal[*ps++];\r
-      *pd++ = pal[*ps++];\r
-      *pd++ = pal[*ps++];\r
+  if ((*est->PicoOpt & POPT_EN_SOFTSCALE) && len == 256) {\r
+    switch (PicoIn.filter) {\r
+    case 3: h_upscale_bl4_4_5(pd, 320, ps, 256, 256, f_pal); break;\r
+    case 2: h_upscale_bl2_4_5(pd, 320, ps, 256, 256, f_pal); break;\r
+    case 1: h_upscale_snn_4_5(pd, 320, ps, 256, 256, f_pal); break;\r
+    default: h_upscale_nn_4_5(pd, 320, ps, 256, 256, f_pal); break;\r
      }\r
+  } else {\r
+    if (!(*est->PicoOpt & POPT_DIS_32C_BORDER) && len == 256) pd += 32;\r
+#if 1\r
+    h_copy(pd, 320, ps, 320, len, f_pal);\r
  #else\r
      extern void amips_clut(unsigned short *dst, unsigned char *src, unsigned short *pal, int count);\r
      extern void amips_clut_6bit(unsigned short *dst, unsigned char *src, unsigned short *pal, int count);\r
@@ -1691,19 +1693,32 @@ static void FinalizeLine8bit(int sh, int line, struct PicoEState *est)
    if (Pico.video.reg[12]&1) {\r
      len = 320;\r
    } else {\r
-    if (!(PicoIn.opt & POPT_DIS_32C_BORDER))\r
-      pd += 32;\r
      len = 256;\r
    }\r
  \r
-  if (DrawLineDestIncrement == 0) {\r
+  if ((PicoIn.opt & POPT_EN_SOFTSCALE) && len == 256) {\r
+    unsigned char *ps = est->HighCol+8;\r
+    unsigned char pal = 0;\r
+\r
+    if (!sh && (est->rendstatus & PDRAW_SONIC_MODE))\r
+      pal = est->SonicPalCount*0x40;\r
+    if (DrawLineDestIncrement == 0)\r
+      pd = est->HighCol+8;\r
+    // Smoothing can't be used with CLUT, hence it's always Nearest Neighbour.\r
+    // use reverse version since src and dest ptr may be the same.\r
+    rh_upscale_nn_4_5(pd, 320, ps, 256, len, f_or);\r
+  } else if (DrawLineDestIncrement == 0) {\r
      if (!sh && (est->rendstatus & PDRAW_SONIC_MODE))\r
-      blockcpy_or(pd+8, est->HighCol+8, len, est->SonicPalCount*0x40);\r
-  } else if (!sh && (est->rendstatus & PDRAW_SONIC_MODE)) {\r
-    // select active backup palette\r
-    blockcpy_or(pd, est->HighCol+8, len, est->SonicPalCount*0x40);\r
+      blockcpy_or(est->HighCol+8, est->HighCol+8, len, est->SonicPalCount*0x40);\r
    } else {\r
-    blockcpy(pd, est->HighCol+8, len);\r
+    if (!(PicoIn.opt & POPT_DIS_32C_BORDER))\r
+      pd += 32;\r
+    if (!sh && (est->rendstatus & PDRAW_SONIC_MODE)) {\r
+      // select active backup palette\r
+      blockcpy_or(pd, est->HighCol+8, len, est->SonicPalCount*0x40);\r
+    } else {\r
+      blockcpy(pd, est->HighCol+8, len);\r
+    }\r
    }\r
  }\r
  \r
@@ -1828,7 +1843,7 @@ static int DrawDisplay(int sh)
  // MUST be called every frame\r
  PICO_INTERNAL void PicoFrameStart(void)\r
  {\r
-  int offs = 8, lines = 224;\r
+  int loffs = 8, lines = 224, coffs = 0, columns = 320;\r
    int dirty = ((Pico.est.rendstatus & PDRAW_SONIC_MODE) || Pico.m.dirtyPal);\r
    int sprep = Pico.est.rendstatus & (PDRAW_SPRITES_MOVED|PDRAW_DIRTY_SPRITES);\r
    int skipped = Pico.est.rendstatus & PDRAW_SKIP_FRAME;\r
@@ -1837,19 +1852,25 @@ PICO_INTERNAL void PicoFrameStart(void)
    Pico.est.rendstatus = 0;\r
    if ((Pico.video.reg[12] & 6) == 6)\r
      Pico.est.rendstatus |= PDRAW_INTERLACE; // interlace mode\r
-  if (!(Pico.video.reg[12] & 1))\r
+  if (!(Pico.video.reg[12] & 1)) {\r
      Pico.est.rendstatus |= PDRAW_32_COLS;\r
+    if (!(PicoIn.opt & POPT_EN_SOFTSCALE)) {\r
+      columns = 256;\r
+      coffs = 32;\r
+    }\r
+  }\r
    if (Pico.video.reg[1] & 8) {\r
      Pico.est.rendstatus |= PDRAW_30_ROWS;\r
-    offs = 0;\r
      lines = 240;\r
+    loffs = 0;\r
    }\r
+  if (PicoIn.opt & POPT_DIS_32C_BORDER)\r
+    coffs = 0;\r
  \r
    if (Pico.est.rendstatus != rendstatus_old || lines != rendlines) {\r
      rendlines = lines;\r
      // mode_change() might reset rendstatus_old by calling SetColorFormat\r
-    emu_video_mode_change((lines == 240) ? 0 : 8,\r
-      lines, (Pico.video.reg[12] & 1) ? 0 : 1);\r
+    emu_video_mode_change(loffs, lines, coffs, columns);\r
      rendstatus_old = Pico.est.rendstatus;\r
    }\r
    if (PicoIn.skipFrame) // preserve this until something is rendered at last\r
@@ -1857,8 +1878,8 @@ PICO_INTERNAL void PicoFrameStart(void)
    if (sprep | skipped)\r
      Pico.est.rendstatus |= PDRAW_PARSE_SPRITES;\r
  \r
-  Pico.est.HighCol = HighColBase + offs * HighColIncrement;\r
-  Pico.est.DrawLineDest = (char *)DrawLineDestBase + offs * DrawLineDestIncrement;\r
+  Pico.est.HighCol = HighColBase + loffs * HighColIncrement;\r
+  Pico.est.DrawLineDest = (char *)DrawLineDestBase + loffs * DrawLineDestIncrement;\r
    Pico.est.DrawScanline = 0;\r
    skip_next_line = 0;\r
  \r
diff --git a/pico/draw_arm.S b/pico/draw_arm.S

index e05c2fd..38d831f 100644 (file)
--- a/pico/draw_arm.S
+++ b/pico/draw_arm.S
@@ -1653,9 +1653,9 @@ FinalizeLine555:
      tst     r12, #1\r
      movne   r2, #320/8           @ len\r
      bne     .fl_no32colRGB555\r
-    ldr     r4, [r10, #OFS_EST_PicoOpt]\r
+    ldr     r5, [r10, #OFS_EST_PicoOpt]\r
      mov     r2, #256/8\r
-    ldr     r4, [r4]\r
+    ldr     r4, [r5]\r
      tst     r4, #0x4000\r
      bne     .fl_32scale_RGB555\r
      tst     r4, #0x0100\r
@@ -1705,15 +1705,167 @@ FinalizeLine555:
  \r
  \r
  .fl_32scale_RGB555:\r
-    mov     r9, #0x3900 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007\r
-    orr     r9, r9, #0x00e7\r
+    ldr     r5, [r5, #OFS_PicoIn_filter-OFS_PicoIn_opt]\r
+\r
+    mov     r9, #0xf700 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007\r
+    orr     r9, r9, #0x00de\r
  \r
  #ifdef UNALIGNED_DRAWLINEDEST\r
      tst     r0, #2\r
      bne     .fl_32scale_RGB555u\r
  #endif\r
  \r
-.fl_loop32scale_RGB555:\r
+    ands    r5, r5, #0x3\r
+    addne   pc, pc, r5, lsl #2\r
+    b       .fl_32scale_nn\r
+    b       .fl_32scale_nn\r
+    b       .fl_32scale_snn\r
+    b       .fl_32scale_bl2\r
+    b       .fl_32scale_bl4\r
+\r
+.fl_32scale_nn:\r
+    ldr     r12, [r1], #4\r
+    ldr     r7,  [r1], #4\r
+\r
+    and     r4, lr, r12, lsl #1\r
+    ldrh    r4, [r3, r4]\r
+    and     r5, lr, r12, lsr #7\r
+    ldrh    r5, [r3, r5]\r
+    and     r6, lr, r12, lsr #15\r
+    ldrh    r6, [r3, r6]\r
+    and     r10,lr, r12, lsr #23\r
+    ldrh    r10,[r3, r10]\r
+\r
+    orr     r4, r4, r5, lsl #16\r
+    orr     r5, r6, r6, lsl #16\r
+\r
+    and     r6, lr, r7, lsl #1\r
+    ldrh    r6, [r3, r6]\r
+    and     r8, lr, r7, lsr #7\r
+    ldrh    r8, [r3, r8]\r
+    and     r12,lr, r7, lsr #15\r
+    ldrh    r12,[r3, r12]\r
+    and     r7, lr, r7, lsr #23\r
+    ldrh    r7, [r3, r7]\r
+\r
+    orr     r6, r10,r6, lsl  #16\r
+    orr     r8, r8,r12, lsl #16\r
+\r
+    subs    r2, r2, #1\r
+\r
+    orr     r10,r12,r7, lsl #16\r
+\r
+    stmia   r0!, {r4,r5,r6,r8,r10}\r
+    bne     .fl_32scale_nn\r
+\r
+    ldmfd   sp!, {r4-r10,pc}\r
+\r
+.fl_32scale_snn:\r
+    ldr     r12, [r1], #4\r
+    ldr     r7,  [r1], #4\r
+\r
+    and     r4, lr, r12, lsl #1\r
+    ldrh    r4, [r3, r4]\r
+    and     r5, lr, r12, lsr #7\r
+    ldrh    r5, [r3, r5]\r
+    and     r6, lr, r12, lsr #15\r
+    ldrh    r6, [r3, r6]\r
+    and     r10,lr, r12, lsr #23\r
+    ldrh    r10,[r3, r10]\r
+\r
+    and     r4, r4, r9\r
+    and     r5, r5, r9\r
+    orr     r4, r4, r5, lsl #16\r
+    and     r6, r6, r9\r
+    add     r5, r5, r6\r
+    mov     r5, r5, lsr #1\r
+    orr     r5, r5, r6, lsl #16\r
+\r
+    and     r6, lr, r7, lsl #1\r
+    ldrh    r6, [r3, r6]\r
+    and     r8, lr, r7, lsr #7\r
+    ldrh    r8, [r3, r8]\r
+    and     r12,lr, r7, lsr #15\r
+    ldrh    r12,[r3, r12]\r
+    and     r7, lr, r7, lsr #23\r
+    ldrh    r7, [r3, r7]\r
+\r
+    and     r6, r6, r9\r
+    and     r10,r10,r9\r
+    orr     r6, r10,r6, lsl  #16\r
+    and     r12,r12,r9\r
+    and     r7, r7, r9\r
+    orr     r10,r12,r7, lsl #16\r
+\r
+    and     r8, r8, r9\r
+    add     r12,r12,r8\r
+    mov     r12,r12,lsr #1\r
+    orr     r8, r8,r12, lsl #16\r
+\r
+    subs    r2, r2, #1\r
+\r
+    stmia   r0!, {r4,r5,r6,r8,r10}\r
+    bne     .fl_32scale_snn\r
+\r
+    ldmfd   sp!, {r4-r10,pc}\r
+\r
+.fl_32scale_bl2:\r
+    ldr     r12, [r1], #4\r
+    ldr     r7,  [r1], #4\r
+\r
+    and     r4, lr, r12, lsl #1\r
+    ldrh    r4, [r3, r4]\r
+    and     r5, lr, r12, lsr #7\r
+    ldrh    r5, [r3, r5]\r
+    and     r6, lr, r12, lsr #15\r
+    ldrh    r6, [r3, r6]\r
+\r
+    and     r4, r4, r9\r
+    and     r5, r5, r9\r
+    add     r10,r4, r5\r
+    mov     r10,r10,lsr #1\r
+    orr     r4, r4, r10,lsl #16                @ px0 | (px0+px1)/2\r
+\r
+    and     r6, r6, r9\r
+    add     r5, r5, r6\r
+    mov     r5, r5, lsr #1\r
+    orr     r5, r5, r6, lsl #16                @ (px1+px2)/2 | px2\r
+\r
+    and     r10,lr, r12, lsr #23\r
+    ldrh    r10,[r3, r10]\r
+    and     r8, lr, r7, lsl #1\r
+    ldrh    r8, [r3, r8]\r
+\r
+    and     r10,r10,r9\r
+    and     r8, r8, r9\r
+    orr     r6, r10,r8, lsl  #16       @ px3 | px4\r
+\r
+    and     r12,lr, r7, lsr #15\r
+    ldrh    r12,[r3, r12]\r
+    and     r10, lr, r7, lsr #23\r
+    ldrh    r10, [r3, r10]\r
+    and     r7, lr, r7, lsr #7\r
+    ldrh    r7, [r3, r7]\r
+\r
+    and     r12,r12,r9\r
+    and     r10,r10,r9\r
+    orr     r10,r12,r10, lsl #16       @ px6 | px7\r
+\r
+    and     r7, r7, r9\r
+    add     r12,r12,r7\r
+    add     r8, r8, r7\r
+    mov     r8, r8, lsr #1\r
+    mov     r12,r12,lsr #1\r
+    orr     r8, r8,r12, lsl #16                @ (px4+px5)/2 | (px5+px6)/2\r
+\r
+    subs    r2, r2, #1\r
+\r
+    stmia   r0!, {r4,r5,r6,r8,r10}\r
+    bne     .fl_32scale_bl2\r
+\r
+    ldmfd   sp!, {r4-r10,pc}\r
+\r
+.fl_32scale_bl4:\r
      ldr     r12, [r1], #4\r
      ldr     r7,  [r1], #4\r
  \r
@@ -1721,16 +1873,21 @@ FinalizeLine555:
      ldrh    r4, [r3, r4]\r
      and     r5, lr, r12,lsr #7\r
      ldrh    r5, [r3, r5]\r
-    and     r4, r4, r9, lsl #2\r
+\r
+    @ r4 = 1/4px0+3/4px1 : px0\r
+    and     r4, r4, r9\r
      orr     r4, r4, r4, lsl #14       @ r4[31:16] = 1/4 pix_s 0\r
-    and     r5, r5, r9, lsl #2\r
+    and     r5, r5, r9\r
      sub     r6, r5, r5, lsr #2        @ r6 = 3/4 pix_s 1\r
      add     r4, r4, r6, lsl #16       @ pix_d 0, 1\r
+\r
      and     r6, lr, r12,lsr #15\r
      ldrh    r6, [r3, r6]\r
      and     r12,lr, r12,lsr #23\r
      ldrh    r12,[r3, r12]\r
-    and     r6, r6, r9, lsl #2\r
+\r
+    @ r5 = 3/4px2+1/4px3 : (px1+px2)/2\r
+    and     r6, r6, r9\r
      add     r5, r5, r6\r
      mov     r5, r5, lsr #1\r
      sub     r6, r6, r6, lsr #2        @ r6 = 3/4 pix_s 2\r
@@ -1738,32 +1895,38 @@ FinalizeLine555:
  \r
      and     r6, lr, r7, lsl #1\r
      ldrh    r6, [r3, r6]\r
-    and     r12,r12,r9, lsl #2\r
+    and     r12,r12,r9\r
      add     r5, r5, r12,lsl #14       @ pix_d 2, 3\r
-    and     r6, r6, r9, lsl #2\r
+\r
+    @ r6 = px4 : px3\r
+    and     r6, r6, r9\r
      orr     r6, r12,r6, lsl #16       @ pix_d 4, 5\r
  \r
+    @ r8 = (px5+px6)/2 : 1/4px4+3/4px5\r
      and     r12,lr, r7, lsr #7\r
      ldrh    r12,[r3, r12]\r
      and     r10,lr, r7, lsr #15\r
      ldrh    r10,[r3, r10]\r
-    and     r12,r12,r9, lsl #2\r
+    and     r12,r12,r9\r
      sub     r8, r12,r12,lsr #2        @ r8 = 3/4 pix_s 1\r
      add     r8, r8, r6, lsr #18\r
+\r
      and     r7, lr, r7, lsr #23\r
      ldrh    r7, [r3, r7]\r
-    and     r10,r10,r9, lsl #2\r
+    and     r10,r10,r9\r
      orr     r8, r8, r10,lsl #15\r
      add     r8, r8, r12,lsl #15       @ pix_d 6, 7\r
+\r
+    @ r10 = px7 : 3/4px6+1/4px7\r
      sub     r10,r10,r10,lsr #2        @ r10= 3/4 pix_s 2\r
-    and     r7, r7, r9, lsl #2\r
+    and     r7, r7, r9\r
      add     r10,r10,r7, lsr #2        @ += 1/4 pix_s 3\r
      orr     r10,r10,r7, lsl #16       @ pix_d 8, 9\r
  \r
      subs    r2, r2, #1\r
  \r
      stmia   r0!, {r4,r5,r6,r8,r10}\r
-    bne     .fl_loop32scale_RGB555\r
+    bne     .fl_32scale_bl4\r
  \r
      ldmfd   sp!, {r4-r10,pc}\r
  \r
@@ -1826,10 +1989,10 @@ FinalizeLine555:
      ldrh    r6, [r3, r6]\r
      and     r5, lr, r12,lsr #7\r
      ldrh    r5, [r3, r5]\r
-    and     r6, r6, r9, lsl #2\r
+    and     r6, r6, r9\r
      orr     r4, r4, r6, lsl #16       @ r4 = pix_d -1, 0\r
  \r
-    and     r5, r5, r9, lsl #2\r
+    and     r5, r5, r9\r
      sub     r8, r5, r5, lsr #2        @ r8 = 3/4 pix_s 1\r
      add     r6, r8, r6, lsr #2        @ r6 = (1/4 pix_s 0) + (3/4 pix_s 1)\r
      orr     r5, r6, r5, lsl #15\r
@@ -1838,20 +2001,20 @@ FinalizeLine555:
      ldrh    r6, [r3, r6]\r
      and     r12,lr, r12,lsr #23\r
      ldrh    r12,[r3, r12]\r
-    and     r6, r6, r9, lsl #2\r
+    and     r6, r6, r9\r
      add     r5, r5, r6, lsl #15       @ r5 = pix_d 1, 2\r
  \r
      and     r8, lr, r7, lsl #1\r
      ldrh    r8, [r3, r8]\r
      and     r10,lr, r7, lsr #7\r
      ldrh    r10,[r3, r10]\r
-    and     r12,r12,r9, lsl #2\r
+    and     r12,r12,r9\r
      sub     r6, r6, r6, lsr #2        @ r6 = 3/4 pix_s 2\r
      add     r6, r6, r12,lsr #2\r
      orr     r6, r6, r12,lsl #16       @ r6 = pix_d 3, 4\r
  \r
-    and     r8, r8, r9, lsl #2\r
-    and     r10,r10,r9, lsl #2\r
+    and     r8, r8, r9\r
+    and     r10,r10,r9\r
      sub     r12,r10,r10,lsr #2        @ r12 = 3/4 pix_s 5\r
      orr     r8, r8, r8, lsl #14\r
      add     r8, r8, r12,lsl #16       @ r8 = pix_d 5, 6\r
@@ -1859,12 +2022,12 @@ FinalizeLine555:
      ldrh    r12,[r3, r12]\r
      and     r7, lr, r7, lsr #23\r
      ldrh    r7, [r3, r7]\r
-    and     r12,r12,r9, lsl #2\r
+    and     r12,r12,r9\r
      add     r10,r10,r12\r
      mov     r10,r10,    lsr #1\r
      sub     r12,r12,r12,lsr #2        @ r12 = 3/4 pix_s 6\r
      orr     r10,r10,r12,lsl #16\r
-    and     r7, r7, r9, lsl #2\r
+    and     r7, r7, r9\r
      add     r10,r10,r7, lsl #14       @ r10 = pix_d 7, 8\r
  \r
      subs    r2, r2, #1\r
diff --git a/pico/mode4.c b/pico/mode4.c

index 00a4aef..76cbfab 100644 (file)
--- a/pico/mode4.c
+++ b/pico/mode4.c
@@ -291,7 +291,7 @@ void PicoFrameStartMode4(void)
    }
  
    if (Pico.est.rendstatus != rendstatus_old || lines != rendlines) {
-    emu_video_mode_change(screen_offset, lines, 1);
+    emu_video_mode_change(screen_offset, lines, line_offset, 256);
      rendstatus_old = Pico.est.rendstatus;
      rendlines = lines;
    }
@@ -352,6 +352,8 @@ void PicoDoHighPal555M4(void)
    Pico.est.HighPal[0xe0] = 0;
  }
  
+#include <platform/common/upscale.h>
+
  static void FinalizeLineRGB555M4(int line)
  {
    if (Pico.m.dirtyPal)
@@ -364,15 +366,20 @@ static void FinalizeLineRGB555M4(int line)
  
  static void FinalizeLine8bitM4(int line)
  {
-  unsigned char *pd = Pico.est.DrawLineDest;
-
-  if (DrawLineDestIncrement)
-    memcpy(pd + line_offset, Pico.est.HighCol + line_offset + 8, 256);
+  unsigned char *pd = Pico.est.DrawLineDest + line_offset;
+  unsigned char *ps = Pico.est.HighCol + line_offset + 8;
+
+  if (DrawLineDestIncrement) {
+    if (PicoIn.opt & POPT_EN_SOFTSCALE)
+      rh_upscale_nn_4_5(pd, 320, ps, 256, 256, f_nop);
+    else
+      memcpy(pd, ps, 256);
+  }
  }
  
  void PicoDrawSetOutputMode4(pdso_t which)
  {
-  line_offset = PicoIn.opt & POPT_DIS_32C_BORDER ? 0 : 32;
+  line_offset = PicoIn.opt & (POPT_DIS_32C_BORDER|POPT_EN_SOFTSCALE) ? 0 : 32;
    switch (which)
    {
      case PDF_8BIT:   FinalizeLineM4 = FinalizeLine8bitM4; break;
diff --git a/pico/pico.h b/pico/pico.h

index 912d950..76786d6 100644 (file)
--- a/pico/pico.h
+++ b/pico/pico.h
@@ -41,7 +41,7 @@ extern void *plat_mem_get_for_drc(size_t size);
  extern int   plat_mem_set_exec(void *ptr, size_t size);\r
  \r
  // this one should handle display mode changes\r
-extern void emu_video_mode_change(int start_line, int line_count, int is_32cols);\r
+extern void emu_video_mode_change(int start_line, int line_count, int start_col, int col_count);\r
  \r
  // this must switch to 16bpp mode\r
  extern void emu_32x_startup(void);\r
@@ -86,7 +86,7 @@ extern void *p32x_bios_g, *p32x_bios_m, *p32x_bios_s;
  \r
  // the emulator is configured and some status is reported\r
  // through this global state (not saved in savestates)\r
-typedef struct\r
+typedef struct PicoInterface\r
  {\r
         unsigned int opt; // POPT_* bitfield\r
  \r
@@ -101,6 +101,8 @@ typedef struct
         unsigned short quirks;         // game-specific quirks: PQUIRK_*\r
         unsigned short overclockM68k;  // overclock the emulated 68k, in %\r
  \r
+       unsigned short filter;         // softscale filter type\r
+\r
         int sndRate;                   // rate in Hz\r
         int sndFilterAlpha;            // Low pass sound filter alpha (Q16)\r
         short *sndOut;                 // PCM output buffer\r
diff --git a/pico/pico_int.h b/pico/pico_int.h

index 83e8141..2038b6d 100644 (file)
--- a/pico/pico_int.h
+++ b/pico/pico_int.h
@@ -356,6 +356,7 @@ struct PicoEState
    int DrawScanline;\r
    int rendstatus;\r
    void *DrawLineDest;          // draw destination\r
+  int DrawLineDestIncr;\r
    unsigned char *HighCol;\r
    s32 *HighPreSpr;\r
    struct Pico *Pico;\r
diff --git a/platform/common/emu.c b/platform/common/emu.c

index 3f9ccff..fd4a251 100644 (file)
--- a/platform/common/emu.c
+++ b/platform/common/emu.c
@@ -1219,7 +1219,7 @@ void emu_cmn_forced_frame(int no_scale, int do_emu, void *buf)
                 memset32((short *)g_screen_ptr + g_screen_ppitch * y, 0,\r
                          g_screen_width * 2 / 4);\r
  \r
-       PicoIn.opt &= ~POPT_ALT_RENDERER;\r
+       PicoIn.opt &= ~(POPT_ALT_RENDERER|POPT_EN_SOFTSCALE);\r
         PicoIn.opt |= POPT_ACC_SPRITES;\r
         if (!no_scale && currentConfig.scaling)\r
                 PicoIn.opt |= POPT_EN_SOFTSCALE;\r
diff --git a/platform/common/emu.h b/platform/common/emu.h

index a211002..9e47e44 100644 (file)
--- a/platform/common/emu.h
+++ b/platform/common/emu.h
@@ -37,10 +37,19 @@ extern int g_screen_ppitch; // pitch in pixels
  
  enum {
         EOPT_SCALE_NONE = 0,
-       EOPT_SCALE_SW,
+       // linux, GP2X:
+       EOPT_SCALE_SW = 1,
         EOPT_SCALE_HW,
  };
  
+enum {
+       EOPT_FILTER_NONE = 0,
+       // software scalers
+       EOPT_FILTER_SMOOTHER = 1,
+       EOPT_FILTER_BILINEAR1,
+       EOPT_FILTER_BILINEAR2,
+};
+
  enum {
         EOPT_CONFIRM_NONE = 0,
         EOPT_CONFIRM_SAVE = 1,
@@ -63,7 +72,7 @@ typedef struct _currentConfig_t {
         int CPUclock;
         int volume;
         int gamma;
-       int scaling;  // gp2x: EOPT_SCALE_*; psp: bilinear filtering
+       int scaling;  // EOPT_SCALE_*
         int vscaling;
         int rotation; // for UIQ
         float scale; // psp: screen scale
@@ -72,7 +81,7 @@ typedef struct _currentConfig_t {
         int turbo_rate;
         int renderer;
         int renderer32x;
-       int filter; // pandora
+       int filter;  // EOPT_FILTER_* video filter
         int analog_deadzone;
         int msh2_khz;
         int ssh2_khz;
@@ -180,6 +189,7 @@ void plat_update_volume(int has_changed, int is_up);
  /* should be in libpicofe/plat.h */
  void plat_video_clear_status(void);
  void plat_video_clear_buffers(void);
+void plat_video_set_size(int w, int h);
  
  #ifdef __cplusplus
  } // extern "C"
diff --git a/platform/common/plat_sdl.c b/platform/common/plat_sdl.c

index 5edeb79..f8bf78c 100644 (file)
--- a/platform/common/plat_sdl.c
+++ b/platform/common/plat_sdl.c
@@ -22,6 +22,7 @@
  #include <pico/pico_int.h>
  
  static void *shadow_fb;
+static struct area { int w, h; } area;
  
  static struct in_pdata in_sdl_platform_data = {
         .defbinds = in_sdl_defbinds,
@@ -81,54 +82,77 @@ void bgr_to_uyvy_init(void)
    }
  }
  
-void rgb565_to_uyvy(void *d, const void *s, int pixels, int x2)
+void rgb565_to_uyvy(void *d, const void *s, int w, int h, int pitch, int x2)
  {
    uint32_t *dst = d;
    const uint16_t *src = s;
+  int i;
  
-  if (x2)
-  for (; pixels > 0; src += 4, dst += 4, pixels -= 4)
-  {
-    struct uyvy *uyvy0 = yuv_uyvy + src[0], *uyvy1 = yuv_uyvy + src[1];
-    struct uyvy *uyvy2 = yuv_uyvy + src[2], *uyvy3 = yuv_uyvy + src[3];
+  if (x2) while (h--) {
+    for (i = w; i > 0; src += 4, dst += 4, i -= 4)
+    {
+      struct uyvy *uyvy0 = yuv_uyvy + src[0], *uyvy1 = yuv_uyvy + src[1];
+      struct uyvy *uyvy2 = yuv_uyvy + src[2], *uyvy3 = yuv_uyvy + src[3];
  #if CPU_IS_LE
-    dst[0] = (uyvy0->y << 24) | uyvy0->vyu;
-    dst[1] = (uyvy1->y << 24) | uyvy1->vyu;
-    dst[2] = (uyvy2->y << 24) | uyvy2->vyu;
-    dst[3] = (uyvy3->y << 24) | uyvy3->vyu;
+      dst[0] = (uyvy0->y << 24) | uyvy0->vyu;
+      dst[1] = (uyvy1->y << 24) | uyvy1->vyu;
+      dst[2] = (uyvy2->y << 24) | uyvy2->vyu;
+      dst[3] = (uyvy3->y << 24) | uyvy3->vyu;
  #else
-    dst[0] = uyvy0->y | (uyvy0->vyu << 8);
-    dst[1] = uyvy1->y | (uyvy1->vyu << 8);
-    dst[2] = uyvy2->y | (uyvy2->vyu << 8);
-    dst[3] = uyvy3->y | (uyvy3->vyu << 8);
+      dst[0] = uyvy0->y | (uyvy0->vyu << 8);
+      dst[1] = uyvy1->y | (uyvy1->vyu << 8);
+      dst[2] = uyvy2->y | (uyvy2->vyu << 8);
+      dst[3] = uyvy3->y | (uyvy3->vyu << 8);
  #endif
-  } else 
-  for (; pixels > 0; src += 4, dst += 2, pixels -= 4)
-  {
-    struct uyvy *uyvy0 = yuv_uyvy + src[0], *uyvy1 = yuv_uyvy + src[1];
-    struct uyvy *uyvy2 = yuv_uyvy + src[2], *uyvy3 = yuv_uyvy + src[3];
+    }
+    src += pitch - w;
+  } else while (h--) {
+    for (i = w; i > 0; src += 4, dst += 2, i -= 4)
+    {
+      struct uyvy *uyvy0 = yuv_uyvy + src[0], *uyvy1 = yuv_uyvy + src[1];
+      struct uyvy *uyvy2 = yuv_uyvy + src[2], *uyvy3 = yuv_uyvy + src[3];
  #if CPU_IS_LE
-    dst[0] = (uyvy1->y << 24) | uyvy0->vyu;
-    dst[1] = (uyvy3->y << 24) | uyvy2->vyu;
+      dst[0] = (uyvy1->y << 24) | uyvy0->vyu;
+      dst[1] = (uyvy3->y << 24) | uyvy2->vyu;
  #else
-    dst[0] = uyvy1->y | (uyvy0->vyu << 8);
-    dst[1] = uyvy3->y | (uyvy2->vyu << 8);
+      dst[0] = uyvy1->y | (uyvy0->vyu << 8);
+      dst[1] = uyvy3->y | (uyvy2->vyu << 8);
  #endif
+    }
+    src += pitch - w;
    }
  }
  
  static int clear_buf_cnt, clear_stat_cnt;
  
+void plat_video_set_size(int w, int h)
+{
+       if (area.w != w || area.h != h) {
+               area = (struct area) { w, h };
+
+               if (plat_sdl_change_video_mode(w, h, 0) < 0) {
+                       // failed, revert to original resolution
+                       plat_sdl_change_video_mode(g_screen_width, g_screen_height, 0);
+                       w = g_screen_width, h = g_screen_height;
+               }
+               if (!plat_sdl_overlay && !plat_sdl_gl_active) {
+                       g_screen_width = w;
+                       g_screen_height = h;
+                       g_screen_ppitch = w;
+                       g_screen_ptr = plat_sdl_screen->pixels;
+               }
+       }
+}
+
  void plat_video_flip(void)
  {
         if (plat_sdl_overlay != NULL) {
                 SDL_Rect dstrect =
                         { 0, 0, plat_sdl_screen->w, plat_sdl_screen->h };
-
                 SDL_LockYUVOverlay(plat_sdl_overlay);
                 rgb565_to_uyvy(plat_sdl_overlay->pixels[0], shadow_fb,
-                               g_screen_ppitch * g_screen_height,
-                               plat_sdl_overlay->w > 2*plat_sdl_overlay->h);
+                               area.w, area.h, g_screen_ppitch,
+                               plat_sdl_overlay->w >= 2*area.w);
                 SDL_UnlockYUVOverlay(plat_sdl_overlay);
                 SDL_DisplayYUVOverlay(plat_sdl_overlay, &dstrect);
         }
@@ -205,7 +229,7 @@ void plat_video_menu_end(void)
  
                 SDL_LockYUVOverlay(plat_sdl_overlay);
                 rgb565_to_uyvy(plat_sdl_overlay->pixels[0], shadow_fb,
-                               g_menuscreen_pp * g_menuscreen_h, 0);
+                       g_menuscreen_w, g_menuscreen_h, g_menuscreen_pp, 0);
                 SDL_UnlockYUVOverlay(plat_sdl_overlay);
  
                 SDL_DisplayYUVOverlay(plat_sdl_overlay, &dstrect);
@@ -227,10 +251,10 @@ void plat_video_menu_leave(void)
  
  void plat_video_loop_prepare(void)
  {
-       // take over any new vout settings XXX ask plat_sdl for scaling instead!
+       // take over any new vout settings
         plat_sdl_change_video_mode(g_menuscreen_w, g_menuscreen_h, 0);
         // switch over to scaled output if available
-       if (plat_sdl_overlay != NULL || plat_sdl_gl_active || currentConfig.scaling != EOPT_SCALE_NONE) {
+       if (plat_sdl_overlay != NULL || plat_sdl_gl_active) {
                 g_screen_width = 320;
                 g_screen_height = 240;
                 g_screen_ppitch = g_screen_width;
@@ -246,6 +270,7 @@ void plat_video_loop_prepare(void)
                 g_screen_ptr = plat_sdl_screen->pixels;
         }
         plat_video_set_buffer(g_screen_ptr);
+       plat_video_set_size(g_screen_width, g_screen_height);
  }
  
  void plat_early_init(void)
diff --git a/platform/common/upscale.c b/platform/common/upscale.c

index b84c096..ed3129f 100644 (file)
--- a/platform/common/upscale.c
+++ b/platform/common/upscale.c
@@ -7,7 +7,7 @@
   * nn: nearest neighbour
   * snn:        "smoothed" nearest neighbour (see below)
   * bln:        n-level-bilinear with n quantized weights
- *     quantization: 0: a<1/2*n, 1/n: 1/2*n<=a<3/2*n, etc
+ *     quantization: 0: a<1/(2*n), 1/n: 1/(2*n)<=a<3/(2*n), etc
   *     currently n=2, n=4 are implemented (there's n=8 mixing, but no filters)
   *     [NB this has been brought to my attn, which is probably the same as bl2:
   *     https://www.drdobbs.com/image-scaling-with-bresenham/184405045?pgno=1]
@@ -18,490 +18,586 @@
   *     a sharper look than a bilinear filter, at the price of some visible jags
   *     on diagonal edges.
   * 
- * scaling modes:
- * 256x___ -> 320x___  only horizontal scaling. Produces an aspect error of
- *                     ~7% for NTSC 224 line modes, but is correct for PAL
- * 256/320x224/240
- *     -> 320x240      always produces 320x240 at DAR 4:3
- * 160x144 -> 320x240  game gear (currently unused)
+ * example scaling modes:
+ * 256x_Y_ -> 320x_Y_, H32/mode 4, PAR 5:4, for PAL DAR 4:3 (NTSC 7% aspect err)
+ * 256x224 -> 320x240, H32/mode 4, PAR 5:4, for NTSC DAR 4:3 (PAL 7% aspect err)
+ * 320x224 -> 320x240, PAR 1:1, for NTSC, DAR 4:3 (PAL 7% etc etc...)
+ * 160x144 -> 320x240: GG, PAR 6:5, scaling to 320x240 for DAR 4:3
   * 
   * (C) 2021 kub <derkub@gmail.com>
   */
  
  #include "upscale.h"
  
-/* 256x___ -> 320x___, H32/mode 4, PAR 5:4, for PAL DAR 4:3 (wrong for NTSC) */
-void upscale_clut_nn_256_320x___(u8 *__restrict di, int ds, u8 *__restrict si, int ss, int height)
+/* X x Y -> X*5/4 x Y */
+void upscale_clut_nn_x_4_5(u8 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height)
  {
         int y;
  
         for (y = 0; y < height; y++) {
-               h_upscale_nn_4_5(di, ds, si, ss, 256, f_nop);
+               h_upscale_nn_4_5(di, ds, si, ss, width, f_nop);
         }
  }
  
-void upscale_rgb_nn_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int height, u16 *pal)
+void upscale_rgb_nn_x_4_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal)
  {
         int y;
  
         for (y = 0; y < height; y++) {
-               h_upscale_nn_4_5(di, ds, si, ss, 256, f_pal);
+               h_upscale_nn_4_5(di, ds, si, ss, width, f_pal);
         }
  }
  
-void upscale_rgb_snn_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int height, u16 *pal)
+void upscale_rgb_snn_x_4_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal)
  {
         int y;
  
         for (y = 0; y < height; y++) {
-               h_upscale_snn_4_5(di, ds, si, ss, 256, f_pal);
+               h_upscale_snn_4_5(di, ds, si, ss, width, f_pal);
         }
  }
  
-void upscale_rgb_bl2_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int height, u16 *pal)
+void upscale_rgb_bl2_x_4_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal)
  {
         int y;
  
         for (y = 0; y < height; y++) {
-               h_upscale_bl2_4_5(di, ds, si, ss, 256, f_pal);
+               h_upscale_bl2_4_5(di, ds, si, ss, width, f_pal);
         }
  }
  
-void upscale_rgb_bl4_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int height, u16 *pal)
+void upscale_rgb_bl4_x_4_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal)
  {
         int y;
  
         for (y = 0; y < height; y++) {
-               h_upscale_bl4_4_5(di, ds, si, ss, 256, f_pal);
+               h_upscale_bl4_4_5(di, ds, si, ss, width, f_pal);
         }
  }
  
-/* 256x224 -> 320x240, H32/mode 4, PAR 5:4, for NTSC DAR 4:3 (wrong for PAL) */
-void upscale_clut_nn_256_320x224_240(u8 *__restrict di, int ds, u8 *__restrict si, int ss)
+/* X x Y -> X*5/4 x Y*17/16 */
+void upscale_clut_nn_x_4_5_y_16_17(u8 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height)
  {
+       int swidth = width * 5/4;
         int y, j;
  
-       /* 14:15, 0 1 2 3 4 5 6 6 7 8 9 10 11 12 13 */
-       for (y = 0; y < 224; y += 14) {
-               /* lines 0-6 */
-               for (j = 0; j < 7; j++) {
-                       h_upscale_nn_4_5(di, ds, si, ss, 256, f_nop);
+       for (y = 0; y < height; y += 16) {
+               for (j = 0; j < 8; j++) {
+                       h_upscale_nn_4_5(di, ds, si, ss, width, f_nop);
                 }
-               /* lines 8-14 */
-               di += ds;
-               for (j = 0; j < 7; j++) {
-                       h_upscale_nn_4_5(di, ds, si, ss, 256, f_nop);
+               di +=  ds;
+               for (j = 0; j < 8; j++) {
+                       h_upscale_nn_4_5(di, ds, si, ss, width, f_nop);
                 }
-               /* line 7 */
-               di -= 8*ds;
-               v_copy(&di[0], &di[-ds], 320, f_nop);
-               di += 8*ds;
+
+               di -= 9*ds;
+               v_copy(&di[0], &di[-ds], swidth, f_nop);
+               di += 9*ds;
         }
  }
  
-void upscale_rgb_nn_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal)
+void upscale_rgb_nn_x_4_5_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal)
  {
+       int swidth = width * 5/4;
         int y, j;
  
-       for (y = 0; y < 224; y += 14) {
-               for (j = 0; j < 7; j++) {
-                       h_upscale_nn_4_5(di, ds, si, ss, 256, f_pal);
+       for (y = 0; y < height; y += 16) {
+               for (j = 0; j < 8; j++) {
+                       h_upscale_nn_4_5(di, ds, si, ss, width, f_pal);
                 }
                 di +=  ds;
-               for (j = 0; j < 7; j++) {
-                       h_upscale_nn_4_5(di, ds, si, ss, 256, f_pal);
+               for (j = 0; j < 8; j++) {
+                       h_upscale_nn_4_5(di, ds, si, ss, width, f_pal);
                 }
  
-               di -= 8*ds;
-               v_copy(&di[0], &di[-ds], 320, f_nop);
-               di += 8*ds;
+               di -= 9*ds;
+               v_copy(&di[0], &di[-ds], swidth, f_nop);
+               di += 9*ds;
         }
  }
  
-void upscale_rgb_snn_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal)
+void upscale_rgb_snn_x_4_5_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal)
  {
+       int swidth = width * 5/4;
         int y, j;
  
-       /* 14:15, 0 1 2 3 4 5 5+6 6+7 7+8 8 9 10 11 12 13 */
-       for (y = 0; y < 224; y += 14) {
-               for (j = 0; j < 7; j++) {
-                       h_upscale_snn_4_5(di, ds, si, ss, 256, f_pal);
+       for (y = 0; y < height; y += 16) {
+               for (j = 0; j < 8; j++) {
+                       h_upscale_snn_4_5(di, ds, si, ss, width, f_pal);
                 }
                 di +=  ds;
-               for (j = 0; j < 7; j++) {
-                       h_upscale_snn_4_5(di, ds, si, ss, 256, f_pal);
+               for (j = 0; j < 8; j++) {
+                       h_upscale_snn_4_5(di, ds, si, ss, width, f_pal);
                 }
  
                 /* mix lines 6-8 */
-               di -= 8*ds;
-               v_mix(&di[0], &di[-ds], &di[ds], 320, p_05, f_nop);
-               v_mix(&di[-ds], &di[-2*ds], &di[-ds], 320, p_05, f_nop);
-               v_mix(&di[ ds], &di[ ds], &di[ 2*ds], 320, p_05, f_nop);
-               di += 8*ds;
+               di -= 9*ds;
+               v_mix(&di[0], &di[-ds], &di[ds], swidth, p_05, f_nop);
+               v_mix(&di[-ds], &di[-2*ds], &di[-ds], swidth, p_05, f_nop);
+               v_mix(&di[ ds], &di[ ds], &di[ 2*ds], swidth, p_05, f_nop);
+               di += 9*ds;
         }
  }
  
-void upscale_rgb_bln_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal)
+void upscale_rgb_bl2_x_4_5_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal)
  {
+       int swidth = width * 5/4;
         int y, j;
  
-       /* 14:15, 0 1 2 2+3 3+4 4+5 5+6 6+7 7+8 8+9 9+10 10+11 11 12 13 */
-       for (y = 0; y < 224; y += 14) {
-               /* lines 0-2 */
-               for (j = 0; j < 3; j++) {
-                       h_upscale_bln_4_5(di, ds, si, ss, 256, f_pal);
-               }
-               /* lines 3-11 mixing prep */
-               di += ds;
-               for (j = 0; j < 11; j++) {
-                       h_upscale_bln_4_5(di, ds, si, ss, 256, f_pal);
+       for (y = 0; y < height; y += 16) {
+               for (j = 0; j < 4; j++) {
+                       h_upscale_bl2_4_5(di, ds, si, ss, width, f_pal);
                 }
-               di -= 12*ds;
-               /* mixing line 3: line 2 = -ds, line 3 = +ds */
-                       v_mix(&di[0], &di[-ds], &di[ds], 320, p_025, f_nop);
-                       di += ds;
-               /* mixing lines 4-5: line n-1 = 0, line n = +ds */
-               for (j = 0; j < 2; j++) {
-                       v_mix(&di[0], &di[0], &di[ds], 320, p_025, f_nop);
-                       di += ds;
-                       }
-               /* mixing line 6-8 */
-               for (j = 0; j < 3; j++) {
-                       v_mix(&di[0], &di[0], &di[ds], 320, p_05, f_nop);
-                       di += ds;
+               di +=  ds;
+               for (j = 0; j < 12; j++) {
+                       h_upscale_bl2_4_5(di, ds, si, ss, width, f_pal);
                 }
-               /* mixing lines 9-11 */
-               for (j = 0; j < 3; j++) {
-                       v_mix(&di[0], &di[0], &di[ds], 320, p_075, f_nop);
+               /* mix lines 3-10 */
+               di -= 13*ds;
+                       v_mix(&di[0], &di[-ds], &di[ds], swidth, p_05, f_nop);
+               for (j = 0; j < 7; j++) {
                         di += ds;
+                       v_mix(&di[0], &di[0], &di[ds], swidth, p_05, f_nop);
                 }
-               /* lines 12-14, already in place */
-               di += 3*ds;
+               di += 6*ds;
         }
  }
  
-void upscale_rgb_bl2_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal)
+void upscale_rgb_bl4_x_4_5_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal)
  {
+       int swidth = width * 5/4;
         int y, j;
  
-       /* 14:15, 0 1 2 2+3 3+4 4+5 5+6 6+7 7+8 8+9 9+10 10 11 12 13 */
-       for (y = 0; y < 224; y += 14) {
-               for (j = 0; j < 3; j++) {
-                       h_upscale_bl2_4_5(di, ds, si, ss, 256, f_pal);
+       for (y = 0; y < height; y += 16) {
+               for (j = 0; j < 2; j++) {
+                       h_upscale_bl4_4_5(di, ds, si, ss, width, f_pal);
                 }
-               di +=  ds;
-               for (j = 0; j < 11; j++) {
-                       h_upscale_bl2_4_5(di, ds, si, ss, 256, f_pal);
+               di += ds;
+               for (j = 0; j < 14; j++) {
+                       h_upscale_bl4_4_5(di, ds, si, ss, width, f_pal);
                 }
-               /* mix lines 3-10 */
-               di -= 12*ds;
-                       v_mix(&di[0], &di[-ds], &di[ds], 320, p_05, f_nop);
-               for (j = 0; j < 7; j++) {
+               di -= 15*ds;
+               /* mixing line 2: line 1 = -ds, line 2 = +ds */
+                       v_mix(&di[0], &di[-ds], &di[ds], swidth, p_025, f_nop);
+                       di += ds;
+               /* mixing lines 3-5: line n-1 = 0, line n = +ds */
+               for (j = 0; j < 3; j++) {
+                       v_mix(&di[0], &di[0], &di[ds], swidth, p_025, f_nop);
+                       di += ds;
+                       }
+               /* mixing lines 6-9 */
+               for (j = 0; j < 4; j++) {
+                       v_mix(&di[0], &di[0], &di[ds], swidth, p_05, f_nop);
+                       di += ds;
+               }
+               /* mixing lines 10-13 */
+               for (j = 0; j < 4; j++) {
+                       v_mix(&di[0], &di[0], &di[ds], swidth, p_075, f_nop);
                         di += ds;
-                       v_mix(&di[0], &di[0], &di[ds], 320, p_05, f_nop);
                 }
-               di += 5*ds;
+               /* lines 14-16, already in place */
+               di += 3*ds;
         }
  }
  
-void upscale_rgb_bl4_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal)
+/* "classic" upscaler as found in several emulators. It's really more like a
+ * x*4/3, y*16/15 upscaler, with an additional 5th row/17th line just inserted
+ * from the source image. That gives nice n/4,n/16 alpha values plus better
+ * symmetry in each block and avoids "borrowing" a row/line between blocks.
+ */
+void upscale_rgb_bln_x_4_5_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal)
  {
+       int swidth = width * 5/4;
         int y, j;
  
-       /* 14:15, 0 0+1 1+2 2+3 3+4 4+5 5+6 6+7 7+8 8+9 9+10 10+11 11+12 12 13 */
-       for (y = 0; y < 224; y += 14) {
-               /* line 0 */
-                       h_upscale_bl4_4_5(di, ds, si, ss, 256, f_pal);
-               /* lines 1-14 mixing prep */
+       for (y = 0; y < height; y += 16) {
+               for (j = 0; j < 4; j++) {
+                       h_upscale_bln_4_5(di, ds, si, ss, width, f_pal);
+               }
                 di += ds;
-               for (j = 0; j < 13; j++) {
-                       h_upscale_bl4_4_5(di, ds, si, ss, 256, f_pal);
+               for (j = 0; j < 12; j++) {
+                       h_upscale_bln_4_5(di, ds, si, ss, width, f_pal);
                 }
-               di -= 14*ds;
-               /* mixing line 1: line 0 = -ds, line 1 = +ds */
-                       v_mix(&di[0], &di[-ds], &di[ds], 320, p_025, f_nop);
+               di -= 13*ds;
+               /* mixing line 4: line 3 = -ds, line 4 = +ds */
+                       v_mix(&di[0], &di[-ds], &di[ds], swidth, p_025, f_nop);
                         di += ds;
-               /* mixing lines 2-4: line n-1 = 0, line n = +ds */
-               for (j = 0; j < 3; j++) {
-                       v_mix(&di[0], &di[0], &di[ds], 320, p_025, f_nop);
+               /* mixing lines 5-6: line n-1 = 0, line n = +ds */
+               for (j = 0; j < 2; j++) {
+                       v_mix(&di[0], &di[0], &di[ds], swidth, p_025, f_nop);
                         di += ds;
                         }
-               /* mixing lines 5-8 */
-               for (j = 0; j < 4; j++) {
-                       v_mix(&di[0], &di[0], &di[ds], 320, p_05, f_nop);
+               /* mixing line 7-9 */
+               for (j = 0; j < 3; j++) {
+                       v_mix(&di[0], &di[0], &di[ds], swidth, p_05, f_nop);
                         di += ds;
                 }
-               /* mixing lines 9-12 */
-               for (j = 0; j < 4; j++) {
-                       v_mix(&di[0], &di[0], &di[ds], 320, p_075, f_nop);
+               /* mixing lines 10-12 */
+               for (j = 0; j < 3; j++) {
+                       v_mix(&di[0], &di[0], &di[ds], swidth, p_075, f_nop);
                         di += ds;
                 }
-               /* lines 13-14, already in place */
-               di += 2*ds;
+               /* lines 13-16, already in place */
+               di += 4*ds;
         }
  }
  
-void upscale_rgb_bl8_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal)
+/* experimental 8 level bilinear for quality assessment */
+void upscale_rgb_bl8_x_4_5_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal)
  {
-       int y, j, d;
+       int swidth = width * 5/4;
+       int y, j;
  
-       /* 14:15, -1+0 0+1 1+2 2+3 3+4 4+5 5+6 6+7 7+8 8+9 9+10 10+11 11+12 12+13 13 */
-       for (y = 0, d = ds; y < 224; y += 14, d = -ds) {
-               /* lines 0-14 mixing prep */
+       for (y = 0; y < 224; y += 16) {
+               for (j = 0; j < 2; j++) {
+                       h_upscale_bl8_4_5(di, ds, si, ss, width, f_pal);
+               }
                 di += ds;
                 for (j = 0; j < 14; j++) {
-                       h_upscale_bl8_4_5(di, ds, si, ss, 256, f_pal);
+                       h_upscale_bl8_4_5(di, ds, si, ss, width, f_pal);
                 }
                 di -= 15*ds;
-               /* mixing line 0: line 0 = -ds, line 1 = +ds */
-                       v_mix(&di[0], &di[d], &di[ds], 320, p_0125, f_nop);
+               /* mixing line 2: line 2 = -ds, line 3 = +ds */
+                       v_mix(&di[0], &di[-ds], &di[ds], swidth, p_0125, f_nop);
                         di += ds;
-               /* mixing line 1: line 1 = 0, line 2 = +ds */
-                       v_mix(&di[0], &di[0], &di[ds], 320, p_0125, f_nop);
+               /* mixing line 3: line 3 = 0, line 4 = +ds */
+                       v_mix(&di[0], &di[0], &di[ds], swidth, p_0125, f_nop);
                         di += ds;
-               /* mixing lines 2-3: line n-1 = 0, line n = +ds */
+               /* mixing lines 4-5: line n-1 = 0, line n = +ds */
                 for (j = 0; j < 2; j++) {
-                       v_mix(&di[0], &di[0], &di[ds], 320, p_025, f_nop);
+                       v_mix(&di[0], &di[0], &di[ds], swidth, p_025, f_nop);
                         di += ds;
                         }
-               /* mixing lines 4-5 */
+               /* mixing lines 6-7 */
                 for (j = 0; j < 2; j++) {
                         v_mix(&di[0], &di[0], &di[ds], 320, p_0375, f_nop);
                         di += ds;
                 }
-               /* mixing lines 6-7 */
+               /* mixing lines 8-9 */
                 for (j = 0; j < 2; j++) {
                         v_mix(&di[0], &di[0], &di[ds], 320, p_05, f_nop);
                         di += ds;
                 }
-               /* mixing lines 8-9 */
+               /* mixing lines 10-11 */
                 for (j = 0; j < 2; j++) {
                         v_mix(&di[0], &di[0], &di[ds], 320, p_0625, f_nop);
                         di += ds;
                 }
-               /* mixing lines 10-11 */
+               /* mixing lines 12-13 */
                 for (j = 0; j < 2; j++) {
                         v_mix(&di[0], &di[0], &di[ds], 320, p_075, f_nop);
                         di += ds;
                 }
-               /* mixing lines 12-13 */
+               /* mixing lines 14-15 */
                 for (j = 0; j < 2; j++) {
                         v_mix(&di[0], &di[0], &di[ds], 320, p_0875, f_nop);
                         di += ds;
                 }
-               /* line 14, already in place */
+               /* line 16, already in place */
                 di += ds;
         }
  }
  
-/* 320x224 -> 320x240, PAR 1:1, for NTSC, DAR 4:3 (wrong for PAL) */
-void upscale_clut_nn_320x224_240(u8 *__restrict di, int ds, u8 *__restrict si, int ss)
+/* X x Y -> X x Y*17/16 */
+void upscale_clut_nn_y_16_17(u8 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height)
  {
         int y, j;
  
-       for (y = 0; y < 224; y += 14) {
-               for (j = 0; j < 7; j++) {
-                       h_copy(di, ds, si, ss, 320, f_nop);
+       for (y = 0; y < height; y += 16) {
+               for (j = 0; j < 8; j++) {
+                       h_copy(di, ds, si, ss, width, f_nop);
                 }
-               di += ds;
-               for (j = 0; j < 7; j++) {
-                       h_copy(di, ds, si, ss, 320, f_nop);
+               di +=  ds;
+               for (j = 0; j < 8; j++) {
+                       h_copy(di, ds, si, ss, width, f_nop);
                 }
  
-               di -= 8*ds;
-               v_copy(&di[0], &di[-ds], 320, f_nop);
-               di += 8*ds;
-
+               di -= 9*ds;
+               v_copy(&di[0], &di[-ds], width, f_nop);
+               di += 9*ds;
         }
  }
  
-void upscale_rgb_nn_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal)
+void upscale_rgb_nn_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal)
  {
         int y, j;
  
-       for (y = 0; y < 224; y += 14) {
-               for (j = 0; j < 7; j++) {
-                       h_copy(di, ds, si, ss, 320, f_pal);
+       for (y = 0; y < height; y += 16) {
+               for (j = 0; j < 8; j++) {
+                       h_copy(di, ds, si, ss, width, f_pal);
                 }
                 di +=  ds;
-               for (j = 0; j < 7; j++) {
-                       h_copy(di, ds, si, ss, 320, f_pal);
+               for (j = 0; j < 8; j++) {
+                       h_copy(di, ds, si, ss, width, f_pal);
                 }
  
-               di -= 8*ds;
-               v_copy(&di[0], &di[-ds], 320, f_nop);
-               di += 8*ds;
+               di -= 9*ds;
+               v_copy(&di[0], &di[-ds], width, f_nop);
+               di += 9*ds;
         }
  }
  
-void upscale_rgb_snn_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal)
+void upscale_rgb_snn_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal)
  {
         int y, j;
  
-       for (y = 0; y < 224; y += 14) {
-               for (j = 0; j < 7; j++) {
-                       h_copy(di, ds, si, ss, 320, f_pal);
+       for (y = 0; y < height; y += 16) {
+               for (j = 0; j < 8; j++) {
+                       h_copy(di, ds, si, ss, width, f_pal);
                 }
                 di +=  ds;
-               for (j = 0; j < 7; j++) {
-                       h_copy(di, ds, si, ss, 320, f_pal);
+               for (j = 0; j < 8; j++) {
+                       h_copy(di, ds, si, ss, width, f_pal);
                 }
  
-               di -= 8*ds;
-               v_mix(&di[  0], &di[-ds], &di[ds], 320, p_05, f_nop);
-               v_mix(&di[-ds], &di[-2*ds], &di[-ds], 320, p_05, f_nop);
-               v_mix(&di[ ds], &di[ ds], &di[ 2*ds], 320, p_05, f_nop);
-               di += 8*ds;
+               /* mix lines 6-8 */
+               di -= 9*ds;
+               v_mix(&di[0], &di[-ds], &di[ds], width, p_05, f_nop);
+               v_mix(&di[-ds], &di[-2*ds], &di[-ds], width, p_05, f_nop);
+               v_mix(&di[ ds], &di[ ds], &di[ 2*ds], width, p_05, f_nop);
+               di += 9*ds;
         }
  }
  
-void upscale_rgb_bl2_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal)
+void upscale_rgb_bl2_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal)
  {
         int y, j;
  
-       for (y = 0; y < 224; y += 14) {
-               for (j = 0; j < 3; j++) {
-                       h_copy(di, ds, si, ss, 320, f_pal);
+       for (y = 0; y < height; y += 16) {
+               for (j = 0; j < 4; j++) {
+                       h_copy(di, ds, si, ss, width, f_pal);
                 }
-               for (j = 0; j < 8; j++) {
-                       v_mix(&di[0], &si[-ss], &si[0], 320, p_05, f_pal);
-                       di += ds;
-                       si += ss;
+               di +=  ds;
+               for (j = 0; j < 12; j++) {
+                       h_copy(di, ds, si, ss, width, f_pal);
                 }
-               si -= ss;
-               for (j = 0; j < 4; j++) {
-                       h_copy(di, ds, si, ss, 320, f_pal);
+               /* mix lines 3-10 */
+               di -= 13*ds;
+                       v_mix(&di[0], &di[-ds], &di[ds], width, p_05, f_nop);
+               for (j = 0; j < 7; j++) {
+                       di += ds;
+                       v_mix(&di[0], &di[0], &di[ds], width, p_05, f_nop);
                 }
+               di += 6*ds;
         }
  }
  
-void upscale_rgb_bl4_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal)
+void upscale_rgb_bl4_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal)
  {
         int y, j;
  
-       for (y = 0; y < 224; y += 14) {
-                       h_copy(di, ds, si, ss, 320, f_pal);
+       for (y = 0; y < height; y += 16) {
+               for (j = 0; j < 2; j++) {
+                       h_copy(di, ds, si, ss, width, f_pal);
+               }
+               di += ds;
+               for (j = 0; j < 14; j++) {
+                       h_copy(di, ds, si, ss, width, f_pal);
+               }
+               di -= 15*ds;
+               /* mixing line 2: line 1 = -ds, line 2 = +ds */
+                       v_mix(&di[0], &di[-ds], &di[ds], width, p_025, f_nop);
+                       di += ds;
+               /* mixing lines 3-5: line n-1 = 0, line n = +ds */
+               for (j = 0; j < 3; j++) {
+                       v_mix(&di[0], &di[0], &di[ds], width, p_025, f_nop);
+                       di += ds;
+                       }
+               /* mixing lines 6-9 */
                 for (j = 0; j < 4; j++) {
-                       v_mix(&di[0], &si[-ss], &si[0], 320, p_025, f_pal);
+                       v_mix(&di[0], &di[0], &di[ds], width, p_05, f_nop);
                         di += ds;
-                       si += ss;
                 }
+               /* mixing lines 10-13 */
                 for (j = 0; j < 4; j++) {
-                       v_mix(&di[0], &si[-ss], &si[0], 320, p_05, f_pal);
+                       v_mix(&di[0], &di[0], &di[ds], width, p_075, f_nop);
                         di += ds;
-                       si += ss;
                 }
-               for (j = 0; j < 4; j++) {
-                       v_mix(&di[0], &si[-ss], &si[0], 320, p_075, f_pal);
+               /* lines 14-16, already in place */
+               di += 3*ds;
+       }
+}
+
+/* X x Y -> X*2/1 x Y, e.g. for X 160->320 (GG) */
+void upscale_clut_nn_x_1_2(u8 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height)
+{
+       int y;
+
+       for (y = 0; y < height; y++) {
+               h_upscale_nn_1_2(di, ds, si, ss, width, f_nop);
+       }
+}
+
+void upscale_rgb_nn_x_1_2(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal)
+{
+       int y;
+
+       for (y = 0; y < height; y++) {
+               h_upscale_nn_1_2(di, ds, si, ss, width, f_pal);
+       }
+}
+
+void upscale_rgb_bl2_x_1_2(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal)
+{
+       int y;
+
+       for (y = 0; y < height; y++) {
+               h_upscale_bl2_1_2(di, ds, si, ss, width, f_pal);
+       }
+}
+
+/* X x Y -> X*2/1 x Y*5/3 (GG) */
+void upscale_clut_nn_x_1_2_y_3_5(u8 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height)
+{
+       int swidth = width * 2;
+       int y, j;
+
+       for (y = 0; y < height; y += 3) {
+               /* lines 0,2,4 */
+               for (j = 0; j < 3; j++) {
+                       h_upscale_nn_1_2(di, ds, si, ss, width, f_nop);
                         di += ds;
-                       si += ss;
                 }
-               si -= ss;
+               /* lines 1,3 */
+               di -= 5*ds;
                 for (j = 0; j < 2; j++) {
-                       h_copy(di, ds, si, ss, 320, f_pal);
+                       v_copy(&di[0], &di[-ds], swidth, f_nop);
+                       di += 2*ds;
                 }
         }
  }
  
-/* 160x144 -> 320x240: GG, PAR 6:5, scaling to 320x240 for DAR 4:3 */
-/* NB for smoother image could scale to 288x216, x*9/5, y*3/2 ?
- *      h: 11111 11112 22222 22233 33333 33444 44444 45555 55555
- *            1     1     2    2+3    3    3+4    4     5     5
- *       v: 11  12  22
- *          1   1+2 2
- */
-void upscale_clut_nn_160_320x144_240(u8 *__restrict di, int ds, u8 *__restrict si, int ss)
+void upscale_rgb_nn_x_1_2_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal)
  {
+       int swidth = width * 2;
         int y, j;
  
-       /* 3:5, 0 0 1 1 2 */
-       for (y = 0; y < 144; y += 3) {
-               /* lines 0,2,4 */
+       for (y = 0; y < height; y += 3) {
                 for (j = 0; j < 3; j++) {
-                       h_upscale_nn_1_2(di, ds, si, ss, 160, f_nop);
+                       h_upscale_nn_1_2(di, ds, si, ss, width, f_pal);
+                       di += ds;
+               }
+               di -= 5*ds;
+               for (j = 0; j < 2; j++) {
+                       v_copy(&di[0], &di[-ds], swidth, f_nop);
+                       di += 2*ds;
+               }
+       }
+}
+
+void upscale_rgb_bl2_x_1_2_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal)
+{
+       int swidth = width * 2;
+       int y, j;
+
+       for (y = 0; y < height; y += 3) {
+               for (j = 0; j < 3; j++) {
+                       h_upscale_bl2_1_2(di, ds, si, ss, width, f_pal);
                         di += ds;
                 }
-               /* lines 1,3 */
                 di -= 5*ds;
                 for (j = 0; j < 2; j++) {
-                       v_copy(&di[0], &di[-ds], 320, f_nop);
+                       v_mix(&di[0], &di[-ds], &di[ds], swidth, p_05, f_nop);
                         di += 2*ds;
                 }
         }
  }
  
-void upscale_rgb_nn_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal)
+void upscale_rgb_bl4_x_1_2_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal)
+{
+       int swidth = width * 2;
+       int y, j, d;
+
+       /* for 1st block backwards reference virtually duplicate source line 0 */
+       for (y = 0, d = 2*ds; y < height; y += 3, d = -ds) {
+               di += 2*ds;
+               for (j = 0; j < 3; j++) {
+                       h_upscale_bl2_1_2(di, ds, si, ss, width, f_pal);
+               }
+               di -= 5*ds;
+               v_mix(&di[0], &di[d ], &di[2*ds], swidth, p_05, f_nop); /*-1+0 */
+               di += ds;
+               v_mix(&di[0], &di[ds], &di[2*ds], swidth, p_075, f_nop);/* 0+1 */
+               di += ds;
+               v_mix(&di[0], &di[ 0], &di[  ds], swidth, p_025, f_nop);/* 0+1 */
+               di += ds;
+               v_mix(&di[0], &di[ 0], &di[  ds], swidth, p_05, f_nop); /* 1+2 */
+               di += 2*ds;
+       }
+}
+
+/* X x Y -> X x Y*5/3, e.g. for Y 144->240 (GG) */
+void upscale_clut_nn_y_3_5(u8 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height)
  {
         int y, j;
  
-       for (y = 0; y < 144; y += 3) {
+       for (y = 0; y < height; y += 3) {
+               /* lines 0,2,4 */
                 for (j = 0; j < 3; j++) {
-                       h_upscale_nn_1_2(di, ds, si, ss, 160, f_pal);
+                       h_copy(di, ds, si, ss, width, f_nop);
                         di += ds;
                 }
+               /* lines 1,3 */
                 di -= 5*ds;
                 for (j = 0; j < 2; j++) {
-                       v_copy(&di[0], &di[-ds], 320, f_nop);
+                       v_copy(&di[0], &di[-ds], width, f_nop);
                         di += 2*ds;
                 }
         }
  }
  
-void upscale_rgb_snn_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal)
+void upscale_rgb_nn_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal)
  {
         int y, j;
  
-       /* 3:5, 0 0+1 1 1+2 2 */
-       for (y = 0; y < 144; y += 3) {
+       for (y = 0; y < height; y += 3) {
                 for (j = 0; j < 3; j++) {
-                       h_upscale_nn_1_2(di, ds, si, ss, 160, f_pal);
+                       h_copy(di, ds, si, ss, width, f_pal);
                         di += ds;
                 }
                 di -= 5*ds;
                 for (j = 0; j < 2; j++) {
-                       v_mix(&di[0], &di[-ds], &di[ds], 320, p_05, f_nop);
+                       v_copy(&di[0], &di[-ds], width, f_nop);
                         di += 2*ds;
                 }
         }
  }
  
-void upscale_rgb_bl2_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal)
+void upscale_rgb_bl2_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal)
  {
         int y, j;
  
-       /* 3:5, 0 0+1 1 1+2 2 */
-       for (y = 0; y < 144; y += 3) {
+       for (y = 0; y < height; y += 3) {
                 for (j = 0; j < 3; j++) {
-                       h_upscale_bl2_1_2(di, ds, si, ss, 160, f_pal);
+                       h_copy(di, ds, si, ss, width, f_pal);
                         di += ds;
                 }
                 di -= 5*ds;
                 for (j = 0; j < 2; j++) {
-                       v_mix(&di[0], &di[-ds], &di[ds], 320, p_05, f_nop);
+                       v_mix(&di[0], &di[-ds], &di[ds], width, p_05, f_nop);
                         di += 2*ds;
                 }
         }
  }
  
-void upscale_rgb_bl4_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal)
+void upscale_rgb_bl4_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal)
  {
         int y, j, d;
  
-       /* 3:5, -1+0, 0+1 0+1 1+2 2
-        * for 1st block backwards reference virtually duplicate source line 0 */
-       for (y = 0, d = 2*ds; y < 144; y += 3, d = -ds) {
+       /* for 1st block backwards reference virtually duplicate source line 0 */
+       for (y = 0, d = 2*ds; y < height; y += 3, d = -ds) {
                 di += 2*ds;
                 for (j = 0; j < 3; j++) {
-                       h_upscale_bl2_1_2(di, ds, si, ss, 160, f_pal);
+                       h_copy(di, ds, si, ss, width, f_pal);
                 }
                 di -= 5*ds;
-               v_mix(&di[0], &di[d ], &di[2*ds], 320, p_05, f_nop);    /*-1+0 */
+               v_mix(&di[0], &di[d ], &di[2*ds], width, p_05, f_nop); /*-1+0 */
                 di += ds;
-               v_mix(&di[0], &di[ds], &di[2*ds], 320, p_075, f_nop);   /* 0+1 */
+               v_mix(&di[0], &di[ds], &di[2*ds], width, p_075, f_nop);/* 0+1 */
                 di += ds;
-               v_mix(&di[0], &di[ 0], &di[  ds], 320, p_025, f_nop);   /* 0+1 */
+               v_mix(&di[0], &di[ 0], &di[  ds], width, p_025, f_nop);/* 0+1 */
                 di += ds;
-               v_mix(&di[0], &di[ 0], &di[  ds], 320, p_05, f_nop);    /* 1+2 */
+               v_mix(&di[0], &di[ 0], &di[  ds], width, p_05, f_nop); /* 1+2 */
                 di += 2*ds;
         }
  }
diff --git a/platform/common/upscale.h b/platform/common/upscale.h

index 5807661..db342c4 100644 (file)
--- a/platform/common/upscale.h
+++ b/platform/common/upscale.h
@@ -7,7 +7,7 @@
   * nn: nearest neighbour
   * snn:        "smoothed" nearest neighbour (see below)
   * bln:        n-level-bilinear with n quantized weights
- *     quantization: 0: a<1/2*n, 1/n: 1/2*n<=a<3/2*n, etc
+ *     quantization: 0: a<1/(2*n), 1/n: 1/(2*n)<=a<3/(2*n), etc
   *     currently n=2, n=4 are implemented (there's n=8 mixing, but no filters)
   *     [NB this has been brought to my attn, which is probably the same as bl2:
   *     https://www.drdobbs.com/image-scaling-with-bresenham/184405045?pgno=1]
@@ -18,34 +18,37 @@
   *     a sharper look than a bilinear filter, at the price of some visible jags
   *     on diagonal edges.
   * 
- * scaling modes:
- * 256x___ -> 320x___  only horizontal scaling. Produces an aspect error of
- *                     ~7% for NTSC 224 line modes, but is correct for PAL
- * 256/320x224/240
- *     -> 320x240      always produces 320x240 at DAR 4:3
-* 160x144 -> 320x240   game gear (currently unused)
-* 
+ * example scaling modes:
+ * 256x_Y_ -> 320x_Y_, H32/mode 4, PAR 5:4, for PAL DAR 4:3 (NTSC 7% aspect err)
+ * 256x224 -> 320x240, H32/mode 4, PAR 5:4, for NTSC DAR 4:3 (PAL 7% aspect err)
+ * 320x224 -> 320x240, PAR 1:1, for NTSC, DAR 4:3 (PAL 7% etc etc...)
+ * 160x144 -> 320x240: GG, PAR 6:5, scaling to 320x240 for DAR 4:3
+ *
+ *
   * (C) 2021 kub <derkub@gmail.com>
+ *
+ * This work is licensed under the terms of MAME license.
+ * See COPYING file in the top-level directory.
   */
  #include <pico/pico_types.h>
  
  /* RGB565 pixel mixing, see https://www.compuphase.com/graphic/scale3.htm and
                             http://blargg.8bitalley.com/info/rgb_mixing.html */
  /* 2-level mixing */
-//#define p_05(p1,p2)  (((p1)+(p2)  + ( ((p1)^(p2))&0x0821))>>1) // round up
-//#define p_05(p1,p2)  (((p1)+(p2)  - ( ((p1)^(p2))&0x0821))>>1) // round down
-#define p_05(p1,p2)    (((p1)&(p2)) + ((((p1)^(p2))&~0x0821)>>1))
+//#define p_05(d,p1,p2)        d=(((p1)+(p2)  + ( ((p1)^(p2))&0x0821))>>1) // round up
+//#define p_05(d,p1,p2)        d=(((p1)+(p2)  - ( ((p1)^(p2))&0x0821))>>1) // round down
+#define p_05(d,p1,p2)  d=(((p1)&(p2)) + ((((p1)^(p2))&~0x0821)>>1))
  /* 4-level mixing, 2 times slower */
  // 1/4*p1 + 3/4*p2 = 1/2*(1/2*(p1+p2) + p2)
-#define p_025(p1,p2)   (t=p_05(p1, p2),   p_05( t, p2))
-#define p_075(p1,p2)   p_025(p2,p1)
+#define p_025(d,p1,p2) p_05(t, p1, p2); p_05( d, t, p2)
+#define p_075(d,p1,p2) p_025(d,p2,p1)
  /* 8-level mixing, 3 times slower */
  // 1/8*p1 + 7/8*p2 = 1/2*(1/2*(1/2*(p1+p2) + p2) + p2)
-#define p_0125(p1,p2)  (t=p_05(p1, p2), u=p_05( t, p2), p_05( u, p2))
+#define p_0125(d,p1,p2)        p_05(t, p1, p2); p_05( u, t, p2); p_05( d, u, p2)
  // 3/8*p1 + 5/8*p2 = 1/2*(1/2*(1/2*(p1+p2) + p2) + 1/2*(p1+p2))
-#define p_0375(p1,p2)  (t=p_05(p1, p2), u=p_05( t, p2), p_05( u,  t))
-#define p_0625(p1,p2)  p_0375(p2,p1)
-#define p_0875(p1,p2)  p_0125(p2,p1)
+#define p_0375(d,p1,p2)        p_05(t, p1, p2); p_05( u, t, p2); p_05( d, u,  t)
+#define p_0625(d,p1,p2)        p_0375(d,p2,p1)
+#define p_0875(d,p1,p2)        p_0125(d,p2,p1)
  
  /* pixel transforms */
  #define        f_pal(v)        pal[v]  // convert CLUT index -> RGB565
@@ -55,7 +58,7 @@
  /*
  scalers h:
  256->320:       - (4:5)         (256x224/240 -> 320x224/240)
-256->299:      - (6:7)         (256x224 -> 299x224, DAR 4:3, 10.5 px border )
+256->299:      - (6:7)         (256x224 -> 299x224, alt?)
  160->320:       - (1:2) 2x      (160x144 -> 320x240, GG)
  160->288:      - (5:9)         (160x144 -> 288x216, GG alt?)
  */
@@ -95,7 +98,7 @@ scalers h:
         for (i = w/4; i > 0; i--, si += 4, di += 5) {   \
                 di[0] = f(si[0]);                       \
                 di[1] = f(si[1]);                       \
-               di[2] = p_05(f(si[1]),f(si[2]));        \
+               p_05(di[2], f(si[1]),f(si[2]));         \
                 di[3] = f(si[2]);                       \
                 di[4] = f(si[3]);                       \
         }                                               \
@@ -104,12 +107,12 @@ scalers h:
  } while (0)
  
  #define h_upscale_bln_4_5(di,ds,si,ss,w,f) do {                \
-       int i, t;                                       \
+       int i; u16 t;                                   \
         for (i = w/4; i > 0; i--, si += 4, di += 5) {   \
                 di[0] = f(si[0]);                       \
-               di[1] = p_025(f(si[0]),f(si[1]));       \
-               di[2] = p_05 (f(si[1]),f(si[2]));       \
-               di[3] = p_075(f(si[2]),f(si[3]));       \
+               p_025(di[1], f(si[0]),f(si[1]));        \
+               p_05 (di[2], f(si[1]),f(si[2]));        \
+               p_075(di[3], f(si[2]),f(si[3]));        \
                 di[4] = f(si[3]);                       \
         }                                               \
         di += ds - w/4*5;                               \
@@ -120,8 +123,8 @@ scalers h:
         int i;                                          \
         for (i = w/4; i > 0; i--, si += 4, di += 5) {   \
                 di[0] = f(si[0]);                       \
-               di[1] = p_05(f(si[0]),f(si[1]));        \
-               di[2] = p_05(f(si[1]),f(si[2]));        \
+               p_05(di[1], f(si[0]),f(si[1]));         \
+               p_05(di[2], f(si[1]),f(si[2]));         \
                 di[3] = f(si[2]);                       \
                 di[4] = f(si[3]);                       \
         }                                               \
@@ -130,12 +133,12 @@ scalers h:
  } while (0)
  
  #define h_upscale_bl4_4_5(di,ds,si,ss,w,f) do {                \
-       int i, t; uint p = f(si[0]);                    \
+       int i; u16 t, p = f(si[0]);                     \
         for (i = w/4; i > 0; i--, si += 4, di += 5) {   \
-               di[0] = p_025(p, f(si[0]));             \
-               di[1] = p_05 (f(si[0]),f(si[1]));       \
-               di[2] = p_05 (f(si[1]),f(si[2]));       \
-               di[3] = p_075(f(si[2]),f(si[3]));       \
+               p_025(di[0], p,       f(si[0]));        \
+               p_05 (di[1], f(si[0]),f(si[1]));        \
+               p_05 (di[2], f(si[1]),f(si[2]));        \
+               p_075(di[3], f(si[2]),f(si[3]));        \
                 di[4] = p = f(si[3]);                   \
         }                                               \
         di += ds - w/4*5;                               \
@@ -143,12 +146,12 @@ scalers h:
  } while (0)
  
  #define h_upscale_bl8_4_5(di,ds,si,ss,w,f) do {                \
-       int i, t, u; uint p = f(si[0]);                 \
+       int i; u16 t, u, p = f(si[0]);                  \
         for (i = w/4; i > 0; i--, si += 4, di += 5) {   \
-               di[0] = p_025(p, f(si[0]));             \
-               di[1] = p_0375(f(si[0]),f(si[1]));      \
-               di[2] = p_0625(f(si[1]),f(si[2]));      \
-               di[3] = p_075(f(si[2]),f(si[3]));       \
+               p_025 (di[0], p,       f(si[0]));       \
+               p_0375(di[1], f(si[0]),f(si[1]));       \
+               p_0625(di[2], f(si[1]),f(si[2]));       \
+               p_075 (di[3], f(si[2]),f(si[3]));       \
                 di[4] = p = f(si[3]);                   \
         }                                               \
         di += ds - w/4*5;                               \
@@ -195,7 +198,7 @@ scalers h:
                 di[0] = f(si[0]);                       \
                 di[1] = f(si[1]);                       \
                 di[2] = f(si[2]);                       \
-               di[3] = p_05(f(si[2]),f(si[3]));        \
+               p_05(di[3], f(si[2]),f(si[3]));         \
                 di[4] = f(si[3]);                       \
                 di[5] = f(si[4]);                       \
                 di[6] = f(si[5]);                       \
@@ -208,10 +211,10 @@ scalers h:
         int i;                                          \
         for (i = w/6; i > 0; i--, si += 6, di += 7) {   \
                 di[0] = f(si[0]);                       \
-               di[1] = p_05(f(si[0]),f(si[1]));        \
-               di[2] = p_05(f(si[1]),f(si[2]));        \
-               di[3] = p_05(f(si[2]),f(si[3]));        \
-               di[4] = p_05(f(si[3]),f(si[4]));        \
+               p_05(di[1], f(si[0]),f(si[1]));         \
+               p_05(di[2], f(si[1]),f(si[2]));         \
+               p_05(di[3], f(si[2]),f(si[3]));         \
+               p_05(di[4], f(si[3]),f(si[4]));         \
                 di[5] = f(si[4]);                       \
                 di[6] = f(si[5]);                       \
         }                                               \
@@ -220,14 +223,14 @@ scalers h:
  } while (0)
  
  #define h_upscale_bl4_6_7(di,ds,si,ss,w,f) do {                \
-       int i, t; uint p = f(si[0]);                    \
+       int i; u16 t p = f(si[0]);                      \
         for (i = w/6; i > 0; i--, si += 6, di += 7) {   \
-               di[0] = p_025(p,f(si[0]));              \
-               di[1] = p_025(f(si[0]),f(si[1]));       \
-               di[2] = p_05 (f(si[1]),f(si[2]));       \
-               di[3] = p_05 (f(si[2]),f(si[3]));       \
-               di[4] = p_075(f(si[3]),f(si[4]));       \
-               di[5] = p_075(f(si[4]),f(si[5]));       \
+               p_025(di[0], p,       f(si[0]));        \
+               p_025(di[1], f(si[0]),f(si[1]));        \
+               p_05 (di[2], f(si[1]),f(si[2]));        \
+               p_05 (di[3], f(si[2]),f(si[3]));        \
+               p_075(di[4], f(si[3]),f(si[4]));        \
+               p_075(di[5], f(si[4]),f(si[5]));        \
                 di[6] = p = f(si[5]);                   \
         }                                               \
         di += ds - w/6*7;                               \
@@ -258,9 +261,9 @@ scalers h:
                 di[0] = f(si[0]);                       \
                 di[1] = f(si[0]);                       \
                 di[2] = f(si[1]);                       \
-               di[3] = p_05(f(si[1]),f(si[2]));        \
+               p_05(di[3], f(si[1]),f(si[2]));         \
                 di[4] = f(si[2]);                       \
-               di[5] = p_05(f(si[2]),f(si[3]));        \
+               p_05(di[5], f(si[2]),f(si[3]));         \
                 di[6] = f(si[3]);                       \
                 di[7] = f(si[4]);                       \
                 di[8] = f(si[4]);                       \
@@ -273,13 +276,13 @@ scalers h:
         int i;                                          \
         for (i = w/5; i > 0; i--, si += 5, di += 9) {   \
                 di[0] = f(si[0]);                       \
-               di[1] = p_05(f(si[0]),f(si[1]));        \
+               p_05(di[1], f(si[0]),f(si[1]));         \
                 di[2] = f(si[1]);                       \
-               di[3] = p_05(f(si[1]),f(si[2]));        \
+               p_05(di[3], f(si[1]),f(si[2]));         \
                 di[4] = f(si[2]);                       \
-               di[5] = p_05(f(si[2]),f(si[3]));        \
+               p_05(di[5], f(si[2]),f(si[3]));         \
                 di[6] = f(si[3]);                       \
-               di[7] = p_05(f(si[3]),f(si[4]));        \
+               p_05(di[7], f(si[3]),f(si[4]));         \
                 di[8] = f(si[4]);                       \
         }                                               \
         di += ds - w/5*9;                               \
@@ -287,16 +290,16 @@ scalers h:
  } while (0)
  
  #define h_upscale_bl4_5_9(di,ds,si,ss,w,f) do {                \
-       int i, t; uint p = f(si[0]);                    \
+       int i; u16 t, p = f(si[0]);                     \
         for (i = w/5; i > 0; i--, si += 5, di += 9) {   \
-               di[0] = p_05 (p,f(si[0]));              \
+               p_05 (di[0], p,       f(si[0]));        \
                 di[1] = f(si[0]);                       \
-               di[2] = p_025(f(si[0]),f(si[1]));       \
-               di[3] = p_075(f(si[1]),f(si[2]));       \
-               di[4] = p_025(f(si[1]),f(si[2]));       \
-               di[5] = p_075(f(si[2]),f(si[3]));       \
+               p_025(di[2], f(si[0]),f(si[1]));        \
+               p_075(di[3], f(si[1]),f(si[2]));        \
+               p_025(di[4], f(si[1]),f(si[2]));        \
+               p_075(di[5], f(si[2]),f(si[3]));        \
                 di[6] = f(si[3]);                       \
-               di[7] = p_05 (f(si[3]),f(si[4]));       \
+               p_05 (di[7], f(si[3]),f(si[4]));        \
                 di[8] = p = f(si[4]);                   \
         }                                               \
         di += ds - w/5*9;                               \
@@ -319,9 +322,9 @@ scalers h:
  #define h_upscale_bl2_1_2(di,ds,si,ss,w,f) do {                \
         int i; uint p = f(si[0]);                       \
         for (i = w/2; i > 0; i--, si += 2, di += 4) {   \
-               di[0] = p_05 (p, f(si[0]));             \
+               p_05 (di[0], p,       f(si[0]));        \
                 di[1] = f(si[0]);                       \
-               di[2] = p_05 (f(si[0]), f(si[1]));      \
+               p_05 (di[2], f(si[0]),f(si[1]));        \
                 di[3] = p = f(si[1]);                   \
         }                                               \
         di += ds - w*2;                                 \
@@ -350,12 +353,12 @@ scalers v:
  */
  
  #define v_mix(di,li,ri,w,p_mix,f) do {                 \
-       int i, t, u; (void)t, (void)u;                  \
+       u16 i, t, u; (void)t, (void)u;                  \
         for (i = 0; i < w; i += 4) {                    \
-               (di)[i  ] = p_mix(f((li)[i  ]), f((ri)[i  ])); \
-               (di)[i+1] = p_mix(f((li)[i+1]), f((ri)[i+1])); \
-               (di)[i+2] = p_mix(f((li)[i+2]), f((ri)[i+2])); \
-               (di)[i+3] = p_mix(f((li)[i+3]), f((ri)[i+3])); \
+               p_mix((di)[i  ], f((li)[i  ]),f((ri)[i  ])); \
+               p_mix((di)[i+1], f((li)[i+1]),f((ri)[i+1])); \
+               p_mix((di)[i+2], f((li)[i+2]),f((ri)[i+2])); \
+               p_mix((di)[i+3], f((li)[i+3]),f((ri)[i+3])); \
         }                                               \
  } while (0)
  
@@ -369,32 +372,222 @@ scalers v:
         }                                               \
  } while (0)
  
+/* scale 14:15 */
+#define v_upscale_nn_14_15(di,ds,w,l) do {             \
+       if (++l == 7) {                                 \
+               di += ds;                               \
+       } else if (l >= 14) {                           \
+               l = 0;                                  \
+               di -= 7*ds;                             \
+               v_copy(&di[0], &di[-ds], w, f_nop);     \
+               di += 7*ds;                             \
+       }                                               \
+} while (0)
+
+#define v_upscale_snn_14_15(di,ds,w,l) do {            \
+       if (++l == 7) {                                 \
+               di += ds;                               \
+       } else if (l >= 14) {                           \
+               l = 0;                                  \
+               di -= 7*ds;                             \
+               v_mix(&di[0], &di[-ds], &di[ds], w, p_05, f_nop); \
+               v_mix(&di[-ds], &di[-2*ds], &di[-ds], w, p_05, f_nop); \
+               v_mix(&di[ ds], &di[ ds], &di[ 2*ds], w, p_05, f_nop); \
+               di += 7*ds;                             \
+       }                                               \
+} while (0)
+
+#define v_upscale_bl2_14_15(di,ds,w,l) do {            \
+       if (++l == 3) {                                 \
+               di += ds;                               \
+       } else if (l >= 14) {                           \
+               int j;                                  \
+               l = 0;                                  \
+               di -= 11*ds;                            \
+                       v_mix(&di[0], &di[-ds], &di[ds], w, p_05, f_nop); \
+               for (j = 0; j < 7; j++) {               \
+                       di += ds;                       \
+                       v_mix(&di[0], &di[0], &di[ds], w, p_05, f_nop); \
+               }                                       \
+               di += 4*ds;                             \
+       }                                               \
+} while (0)
+
+#define v_upscale_bl4_14_15(di,ds,w,l) do {            \
+       if (++l == 1) {                                 \
+               di += ds;                               \
+       } else if (l >= 14) {                           \
+               int j;                                  \
+               l = 0;                                  \
+               di -= 13*ds;                            \
+                       v_mix(&di[0], &di[-ds], &di[ds], w, p_025, f_nop); \
+                       di += ds;                       \
+               for (j = 0; j < 3; j++) {               \
+                       v_mix(&di[0], &di[0], &di[ds], w, p_025, f_nop); \
+                       di += ds;                       \
+                       }                               \
+               for (j = 0; j < 4; j++) {               \
+                       v_mix(&di[0], &di[0], &di[ds], w, p_05, f_nop); \
+                       di += ds;                       \
+               }                                       \
+               for (j = 0; j < 4; j++) {               \
+                       v_mix(&di[0], &di[0], &di[ds], w, p_075, f_nop); \
+                       di += ds;                       \
+               }                                       \
+               di += 1*ds;                             \
+       }                                               \
+} while (0)
+
+/* scale 16:17 */
+#define v_upscale_nn_16_17(di,ds,w,l) do {             \
+       if (++l == 8) {                                 \
+               di += ds;                               \
+       } else if (l >= 16) {                           \
+               l = 0;                                  \
+               di -= 8*ds;                             \
+               v_copy(&di[0], &di[-ds], w, f_nop);     \
+               di += 8*ds;                             \
+       }                                               \
+} while (0)
+
+#define v_upscale_snn_16_17(di,ds,w,l) do {            \
+       if (++l == 8) {                                 \
+               di += ds;                               \
+       } else if (l >= 16) {                           \
+               l = 0;                                  \
+               di -= 8*ds;                             \
+               v_mix(&di[0], &di[-ds], &di[ds], w, p_05, f_nop); \
+               v_mix(&di[-ds], &di[-2*ds], &di[-ds], w, p_05, f_nop); \
+               v_mix(&di[ ds], &di[ ds], &di[ 2*ds], w, p_05, f_nop); \
+               di += 8*ds;                             \
+       }                                               \
+} while (0)
+
+#define v_upscale_bl2_16_17(di,ds,w,l) do {            \
+       if (++l == 4) {                                 \
+               di += ds;                               \
+       } else if (l >= 16) {                           \
+               int j;                                  \
+               l = 0;                                  \
+               di -= 12*ds;                            \
+                       v_mix(&di[0], &di[-ds], &di[ds], w, p_05, f_nop); \
+               for (j = 0; j < 7; j++) {               \
+                       di += ds;                       \
+                       v_mix(&di[0], &di[0], &di[ds], w, p_05, f_nop); \
+               }                                       \
+               di += 5*ds;                             \
+       }                                               \
+} while (0)
+
+#define v_upscale_bl4_16_17(di,ds,w,l) do {            \
+       if (++l == 2) {                                 \
+               di += ds;                               \
+       } else if (l >= 16) {                           \
+               int j;                                  \
+               l = 0;                                  \
+               di -= 14*ds;                            \
+                       v_mix(&di[0], &di[-ds], &di[ds], w, p_025, f_nop); \
+                       di += ds;                       \
+               for (j = 0; j < 3; j++) {               \
+                       v_mix(&di[0], &di[0], &di[ds], w, p_025, f_nop); \
+                       di += ds;                       \
+                       }                               \
+               for (j = 0; j < 4; j++) {               \
+                       v_mix(&di[0], &di[0], &di[ds], w, p_05, f_nop); \
+                       di += ds;                       \
+               }                                       \
+               for (j = 0; j < 4; j++) {               \
+                       v_mix(&di[0], &di[0], &di[ds], w, p_075, f_nop); \
+                       di += ds;                       \
+               }                                       \
+               di += 2*ds;                             \
+       }                                               \
+} while (0)
+
+/* scale 3:5 */
+#define v_upscale_nn_3_5(di,ds,w,l) do {               \
+       if (++l < 3) {                                  \
+               di += ds;                               \
+       } else  {                                       \
+               int j;                                  \
+               l = 0;                                  \
+               di -= 4*ds;                             \
+               for (j = 0; j < 2; j++) {               \
+                       v_copy(&di[0], &di[-ds], w, f_nop); \
+                       di += 2*ds;                     \
+               }                                       \
+       }                                               \
+} while (0)
+
+#define v_upscale_snn_3_5(di,ds,w,l) do {              \
+       if (++l < 3) {                                  \
+               di += ds;                               \
+       } else  {                                       \
+               int j;                                  \
+               l = 0;                                  \
+               di -= 4*ds;                             \
+               for (j = 0; j < 2; j++) {               \
+                       v_mix(&di[0], &di[-ds], &di[ds], w, p_05, f_nop); \
+                       di += 2*ds;                     \
+               }                                       \
+       }                                               \
+} while (0)
+
+/* scale 2:3 */
+#define v_upscale_nn_2_3(di,ds,w,l) do {               \
+       if (++l < 2) {                                  \
+               di += ds;                               \
+       } else  {                                       \
+               int j;                                  \
+               l = 0;                                  \
+               di -= 2*ds;                             \
+               v_copy(&di[0], &di[-ds], w, f_nop);     \
+               di += 2*ds;                             \
+       }                                               \
+} while (0)
+
+#define v_upscale_snn_2_3(di,ds,w,l) do {              \
+       if (++l < 2) {                                  \
+               di += ds;                               \
+       } else  {                                       \
+               int j;                                  \
+               l = 0;                                  \
+               di -= 2*ds;                             \
+               v_mix(&di[0], &di[-ds], &di[ds], w, p_05, f_nop); \
+               di += 2*ds;                             \
+       }                                               \
+} while (0)
+
+
+/* X x Y -> X*5/4 x Y, for X 256->320 */
+void upscale_rgb_nn_x_4_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal);
+void upscale_rgb_snn_x_4_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal);
+void upscale_rgb_bl2_x_4_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal);
+void upscale_rgb_bl4_x_4_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal);
+
+/* X x Y -> X x Y*17/16, for Y 224->238 or 192->204 (SMS) */
+void upscale_rgb_nn_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal);
+void upscale_rgb_snn_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal);
+void upscale_rgb_bl2_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal);
+void upscale_rgb_bl4_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal);
  
+/* X x Y -> X*5/4 x Y*17/16 */
+void upscale_rgb_nn_x_4_5_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal);
+void upscale_rgb_snn_x_4_5_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal);
+void upscale_rgb_bl2_x_4_5_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal);
+void upscale_rgb_bl4_x_4_5_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal);
  
-/* 256x___ -> 320x___, H32/mode 4, PAR 5:4, for PAL DAR 4:3 (wrong for NTSC) */
-void upscale_clut_nn_256_320x___(u8 *__restrict di, int ds, u8 *__restrict si, int ss, int height);
-void upscale_rgb_nn_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int height, u16 *pal);
-void upscale_rgb_snn_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int height, u16 *pal);
-void upscale_rgb_bl2_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int height, u16 *pal);
-void upscale_rgb_bl4_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int height, u16 *pal);
+/* X x Y -> X*2/1 x Y, e.g. for X 160->320 (GG) */
+void upscale_rgb_nn_x_1_2(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal);
+void upscale_rgb_bl2_x_1_2(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal);
  
-/* 256x224 -> 320x240, H32/mode 4, PAR 5:4, for NTSC DAR 4:3 (wrong for PAL) */
-void upscale_clut_nn_256_320x224_240(u8 *__restrict di, int ds, u8 *__restrict si, int ss);
-void upscale_rgb_nn_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal);
-void upscale_rgb_snn_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal);
-void upscale_rgb_bl2_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal);
-void upscale_rgb_bl4_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal);
+/* X x Y -> X x Y*5/3, e.g. for Y 144->240 (GG) */
+void upscale_rgb_nn_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal);
+void upscale_rgb_bl2_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal);
+void upscale_rgb_bl4_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal);
  
-/* 320x224 -> 320x240, PAR 1:1, for NTSC, DAR 4:3 (wrong for PAL) */
-void upscale_clut_nn_320x224_240(u8 *__restrict di, int ds, u8 *__restrict si, int ss);
-void upscale_rgb_nn_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal);
-void upscale_rgb_snn_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal);
-void upscale_rgb_bl2_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal);
-void upscale_rgb_bl4_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal);
+/* X x Y -> X*2/1 x Y*5/3 (GG) */
+void upscale_rgb_nn_x_1_2_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal);
+void upscale_rgb_bl2_x_1_2_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal);
+void upscale_rgb_bl4_x_1_2_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal);
  
-/* 160x144 -> 320x240: GG, PAR 6:5, scaling to 320x240 for DAR 4:3 */
-void upscale_clut_nn_160_320x144_240(u8 *__restrict di, int ds, u8 *__restrict si, int ss);
-void upscale_rgb_nn_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal);
-void upscale_rgb_snn_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal);
-void upscale_rgb_bl2_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal);
-void upscale_rgb_bl4_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal);
diff --git a/platform/gp2x/emu.c b/platform/gp2x/emu.c

index 8837998..0622691 100644 (file)
--- a/platform/gp2x/emu.c
+++ b/platform/gp2x/emu.c
@@ -558,14 +558,16 @@ static void vid_reset_mode(void)
         Pico.m.dirtyPal = 1;\r
  \r
         PicoIn.opt &= ~POPT_EN_SOFTSCALE;\r
-       if (currentConfig.scaling == EOPT_SCALE_SW)\r
+       if (currentConfig.scaling == EOPT_SCALE_SW) {\r
                 PicoIn.opt |= POPT_EN_SOFTSCALE;\r
+               PicoIn.filter = EOPT_FILTER_BILINEAR2;\r
+       }\r
  \r
         // palette converters for 8bit modes\r
         make_local_pal = (PicoIn.AHW & PAHW_SMS) ? make_local_pal_sms : make_local_pal_md;\r
  }\r
  \r
-void emu_video_mode_change(int start_line, int line_count, int is_32cols)\r
+void emu_video_mode_change(int start_line, int line_count, int start_col, int col_count)\r
  {\r
         int scalex = 320, scaley = 240;\r
         int ln_offs = 0;\r
@@ -578,10 +580,10 @@ void emu_video_mode_change(int start_line, int line_count, int is_32cols)
  \r
         /* set up hwscaling here */\r
         PicoIn.opt &= ~POPT_DIS_32C_BORDER;\r
-       if (is_32cols && currentConfig.scaling == EOPT_SCALE_HW) {\r
-               scalex = 256;\r
+       if (col_count < 320 && currentConfig.scaling == EOPT_SCALE_HW) {\r
+               scalex = col_count;\r
                 PicoIn.opt |= POPT_DIS_32C_BORDER;\r
-               osd_fps_x = OSD_FPS_X - 64;\r
+               osd_fps_x = col_count - (320-OSD_FPS_X);\r
         }\r
  \r
         if (currentConfig.vscaling == EOPT_SCALE_HW) {\r
diff --git a/platform/libretro/libretro.c b/platform/libretro/libretro.c

index 0969017..7c63c97 100644 (file)
--- a/platform/libretro/libretro.c
+++ b/platform/libretro/libretro.c
@@ -84,7 +84,6 @@ static retro_environment_t environ_cb;
  static retro_audio_sample_batch_t audio_batch_cb;
  
  #define VOUT_MAX_WIDTH 320
-#define VOUT_32COL_WIDTH 256
  #define VOUT_MAX_HEIGHT 240
  
  #define INITIAL_SND_RATE 44100
@@ -99,7 +98,8 @@ static bool old_show_overscan = false;
  /* Required to allow on the fly changes to 'show overscan' */
  static int vm_current_start_line = -1;
  static int vm_current_line_count = -1;
-static int vm_current_is_32cols = -1;
+static int vm_current_start_col = -1;
+static int vm_current_col_count = -1;
  
  static int vout_16bit = 1;
  static int vout_format = PDF_RGB555;
@@ -615,13 +615,14 @@ int plat_mem_set_exec(void *ptr, size_t size)
     return ret;
  }
  
-void emu_video_mode_change(int start_line, int line_count, int is_32cols)
+void emu_video_mode_change(int start_line, int line_count, int start_col, int col_count)
  {
     struct retro_system_av_info av_info;
  
     vm_current_start_line = start_line;
     vm_current_line_count = line_count;
-   vm_current_is_32cols = is_32cols;
+   vm_current_start_col = start_col;
+   vm_current_col_count = col_count;
  
     // 8bit renderes create a 328x256 CLUT image, while 16bit creates 320x240 RGB
     vout_16bit = vout_format == PDF_RGB555 || (PicoIn.AHW & PAHW_32X);
@@ -631,13 +632,7 @@ void emu_video_mode_change(int start_line, int line_count, int is_32cols)
     vout_width = (vout_16bit ? VOUT_MAX_WIDTH : VOUT_8BIT_WIDTH);
     vout_height = (vout_16bit ? VOUT_MAX_HEIGHT : VOUT_8BIT_HEIGHT);
     vout_offset = (vout_16bit ? 0 : 8); // 8bit has 8 px overlap area on the left
-   if (is_32cols) {
-      // 256x240, with or w/o overlap on the left and 64 px on the right
-      padding = (struct retro_hw_ps2_insets){start_line, vout_offset, vout_height - line_count - start_line, vout_width - 256.0f - vout_offset};
-   } else {
-      // 320x240, with or w/o overlap on the left and none on the right
-      padding = (struct retro_hw_ps2_insets){start_line, vout_offset, vout_height - line_count - start_line, vout_width - 320.0f - vout_offset};
-   }
+   padding = (struct retro_hw_ps2_insets){start_line, vout_offset, vout_height - line_count - start_line, vout_width - col_count - vout_offset};
  
     int pxsz = (vout_16bit ? 2 : 1); // pixel size: RGB = 16 bits, CLUT = 8 bits
     memset(vout_buf, 0, pxsz * vout_width * vout_height);
@@ -651,7 +646,7 @@ void emu_video_mode_change(int start_line, int line_count, int is_32cols)
        ps2->padding = padding;
     }
  #else
-   vout_width = is_32cols ? VOUT_32COL_WIDTH : VOUT_MAX_WIDTH;
+   vout_width = col_count;
     memset(vout_buf, 0, VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT * 2);  
     if (vout_16bit)
        PicoDrawSetOutBuf(vout_buf, vout_width * 2);
@@ -686,11 +681,11 @@ void emu_32x_startup(void)
     PicoDrawSetOutFormat(vout_format, 0);
     if ((vm_current_start_line != -1) &&
         (vm_current_line_count != -1) &&
-       (vm_current_is_32cols != -1))
+       (vm_current_start_col != -1) &&
+       (vm_current_col_count != -1))
        emu_video_mode_change(
-            vm_current_start_line,
-            vm_current_line_count,
-            vm_current_is_32cols);
+            vm_current_start_line, vm_current_line_count,
+            vm_current_start_col, vm_current_col_count);
  }
  
  void lprintf(const char *fmt, ...)
@@ -1621,11 +1616,11 @@ static void update_variables(bool first_run)
     {
        if ((vm_current_start_line != -1) &&
            (vm_current_line_count != -1) &&
-          (vm_current_is_32cols != -1))
+          (vm_current_start_col != -1) &&
+          (vm_current_col_count != -1))
           emu_video_mode_change(
-               vm_current_start_line,
-               vm_current_line_count,
-               vm_current_is_32cols);
+               vm_current_start_line, vm_current_line_count,
+               vm_current_start_col, vm_current_col_count);
     }
  
     /* Reinitialise frameskipping, if required */
diff --git a/platform/linux/emu.c b/platform/linux/emu.c

index fa15d3f..0b0c53d 100644 (file)
--- a/platform/linux/emu.c
+++ b/platform/linux/emu.c
@@ -14,6 +14,7 @@
  #include "../libpicofe/plat.h"\r
  #include "../common/emu.h"\r
  #include "../common/arm_utils.h"\r
+#include "../common/upscale.h"\r
  #include "../common/version.h"\r
  \r
  #include <pico/pico_int.h>\r
@@ -23,8 +24,9 @@ const char *renderer_names[] = { "16bit accurate", " 8bit accurate", " 8bit fast
  const char *renderer_names32x[] = { "accurate", "faster", "fastest", NULL };\r
  enum renderer_types { RT_16BIT, RT_8BIT_ACC, RT_8BIT_FAST, RT_COUNT };\r
  \r
-static int out_x, out_y;\r
-static int out_w, out_h;\r
+static int out_x, out_y, out_w, out_h; // renderer output in render buffer\r
+static int screen_x, screen_y, screen_w, screen_h; // final render destination \r
+static int render_bg;                  // force 16bit mode for bg render\r
  \r
  void pemu_prep_defconfig(void)\r
  {\r
@@ -38,7 +40,7 @@ void pemu_validate_config(void)
  }\r
  \r
  #define is_16bit_mode() \\r
-       (currentConfig.renderer == RT_16BIT || (PicoIn.AHW & PAHW_32X))\r
+       (currentConfig.renderer == RT_16BIT || (PicoIn.AHW & PAHW_32X) || render_bg)\r
  \r
  static int get_renderer(void)\r
  {\r
@@ -82,31 +84,79 @@ static void draw_cd_leds(void)
  #undef p\r
  }\r
  \r
-static unsigned short *get_16bit_start(unsigned short *buf)\r
+/* render/screen buffer handling:\r
+ * In 16 bit mode, render output is directly placed in the screen buffer.\r
+ * SW scaling is handled in renderer (x) and in vscaling callbacks here (y).\r
+ * In 8 bit modes, output goes to the internal Draw2FB buffer in alternate\r
+ * renderer format (8 pix overscan at left/top/bottom), left aligned (DIS_32C).\r
+ * It is converted to 16 bit and SW scaled in pemu_finalize_frame.\r
+ *\r
+ * HW scaling always aligns the image to the left/top, since selecting an area\r
+ * for display isn't always possible.\r
+ */\r
+\r
+static u16 *screen_buffer(u16 *buf)\r
+{\r
+       // center the emulator display on the screen if screen is larger\r
+       if (currentConfig.scaling != EOPT_SCALE_HW)\r
+               buf += (g_screen_width-320)/2;\r
+       if (currentConfig.vscaling != EOPT_SCALE_HW)\r
+               buf += (g_screen_height-240)/2 * g_screen_ppitch;\r
+       return buf;\r
+}\r
+\r
+void screen_blit(u16 *pd, int pp, u8* ps, int ss, u16 *pal)\r
  {\r
-       // center the output on the screen\r
-       int offs = (g_screen_height-240)/2 * g_screen_ppitch + (g_screen_width-320)/2;\r
-       return buf + offs;\r
+       typedef void (*upscale_t)\r
+                       (u16 *di,int ds, u8 *si,int ss, int w,int h, u16 *pal);\r
+       upscale_t upscale_hv[] = {\r
+               upscale_rgb_nn_x_4_5_y_16_17,   upscale_rgb_snn_x_4_5_y_16_17,\r
+               upscale_rgb_bl2_x_4_5_y_16_17,  upscale_rgb_bl4_x_4_5_y_16_17,\r
+       };\r
+       upscale_t upscale_h[] = {\r
+               upscale_rgb_nn_x_4_5,           upscale_rgb_snn_x_4_5,\r
+               upscale_rgb_bl2_x_4_5,          upscale_rgb_bl4_x_4_5,\r
+       };\r
+       upscale_t upscale_v[] = {\r
+               upscale_rgb_nn_y_16_17,         upscale_rgb_snn_y_16_17,\r
+               upscale_rgb_bl2_y_16_17,        upscale_rgb_bl4_y_16_17,\r
+       };\r
+       upscale_t *upscale;\r
+       int y;\r
+\r
+       // handle software upscaling\r
+       upscale = NULL;\r
+       if (currentConfig.scaling == EOPT_SCALE_SW && out_w == 256) {\r
+               if (currentConfig.vscaling == EOPT_SCALE_SW && out_h <= 224)\r
+                       // h+v scaling\r
+                       upscale = upscale_hv;\r
+               else\r
+                       // h scaling\r
+                       upscale = upscale_h;\r
+       } else if (currentConfig.vscaling == EOPT_SCALE_SW && out_h <= 224) {\r
+                       // v scaling\r
+                       upscale = upscale_v;\r
+       } else {\r
+               // no scaling\r
+               for (y = 0; y < out_h; y++)\r
+                       h_copy(pd, pp, ps, 328, out_w, f_pal);\r
+               return;\r
+       }\r
+\r
+       upscale[currentConfig.filter & 0x3](pd, pp, ps, ss, out_w, out_h, pal);\r
  }\r
  \r
  void pemu_finalize_frame(const char *fps, const char *notice)\r
  {\r
         if (!is_16bit_mode()) {\r
                 // convert the 8 bit CLUT output to 16 bit RGB\r
-               unsigned short *pd = (unsigned short *)g_screen_ptr +\r
-                                       out_y * g_screen_ppitch + out_x;\r
-               unsigned char *ps = Pico.est.Draw2FB + 328*out_y + 8;\r
-               unsigned short *pal = Pico.est.HighPal;\r
-               int i, x;\r
+               u16 *pd = screen_buffer(g_screen_ptr) +\r
+                                       screen_y * g_screen_ppitch + screen_x;\r
+               u8  *ps = Pico.est.Draw2FB + 328*out_y + out_x + 8;\r
  \r
-               pd = get_16bit_start(pd);\r
                 PicoDrawUpdateHighPal();\r
-               for (i = 0; i < out_h; i++, ps += 8) {\r
-                       for (x = 0; x < out_w; x++)\r
-                               *pd++ = pal[*ps++];\r
-                       pd += g_screen_ppitch - out_w;\r
-                       ps += 320 - out_w;\r
-               }\r
+\r
+               screen_blit(pd, g_screen_ppitch, ps, 328, Pico.est.HighPal);\r
         }\r
  \r
         if (notice)\r
@@ -120,33 +170,44 @@ void pemu_finalize_frame(const char *fps, const char *notice)
  void plat_video_set_buffer(void *buf)\r
  {\r
         if (is_16bit_mode())\r
-               PicoDrawSetOutBuf(get_16bit_start(buf), g_screen_ppitch * 2);\r
+               PicoDrawSetOutBuf(screen_buffer(buf), g_screen_ppitch * 2);\r
  }\r
  \r
  static void apply_renderer(void)\r
  {\r
+       PicoIn.opt &= ~(POPT_ALT_RENDERER|POPT_EN_SOFTSCALE|POPT_DIS_32C_BORDER);\r
         switch (get_renderer()) {\r
         case RT_16BIT:\r
-               PicoIn.opt &= ~POPT_ALT_RENDERER;\r
-               PicoIn.opt &= ~POPT_DIS_32C_BORDER;\r
-               PicoDrawSetOutFormat(PDF_RGB555, 0);\r
-               PicoDrawSetOutBuf(get_16bit_start(g_screen_ptr), g_screen_ppitch * 2);\r
+               // 32X uses line mode for vscaling with accurate renderer, since\r
+               // the MD VDP layer must be unscaled and merging the scaled 32X\r
+               // image data will fail.\r
+               PicoDrawSetOutFormat(PDF_RGB555,\r
+                       (PicoIn.AHW & PAHW_32X) && currentConfig.vscaling);\r
+               PicoDrawSetOutBuf(screen_buffer(g_screen_ptr), g_screen_ppitch * 2);\r
                 break;\r
         case RT_8BIT_ACC:\r
-               PicoIn.opt &= ~POPT_ALT_RENDERER;\r
-               PicoIn.opt |=  POPT_DIS_32C_BORDER;\r
+               // for simplification the 8 bit accurate renderer uses the same\r
+               // storage format as the fast renderer\r
                 PicoDrawSetOutFormat(PDF_8BIT, 0);\r
                 PicoDrawSetOutBuf(Pico.est.Draw2FB, 328);\r
                 break;\r
         case RT_8BIT_FAST:\r
                 PicoIn.opt |=  POPT_ALT_RENDERER;\r
-               PicoIn.opt |=  POPT_DIS_32C_BORDER;\r
                 PicoDrawSetOutFormat(PDF_NONE, 0);\r
                 break;\r
         }\r
  \r
         if (PicoIn.AHW & PAHW_32X)\r
-               PicoDrawSetOutBuf(get_16bit_start(g_screen_ptr), g_screen_ppitch * 2);\r
+               PicoDrawSetOutBuf(screen_buffer(g_screen_ptr), g_screen_ppitch * 2);\r
+       else if (is_16bit_mode()) {\r
+               if (currentConfig.scaling == EOPT_SCALE_SW) {\r
+                       PicoIn.opt |= POPT_EN_SOFTSCALE;\r
+                       PicoIn.filter = currentConfig.filter;\r
+               } else if (currentConfig.scaling == EOPT_SCALE_HW)\r
+                       // hw scaling, render without any padding\r
+                       PicoIn.opt |= POPT_DIS_32C_BORDER;\r
+       } else\r
+               PicoIn.opt |= POPT_DIS_32C_BORDER;\r
  \r
         Pico.m.dirtyPal = 1;\r
  }\r
@@ -188,37 +249,125 @@ void plat_update_volume(int has_changed, int is_up)
  {\r
  }\r
  \r
+void pemu_sound_start(void)\r
+{\r
+       emu_sound_start();\r
+}\r
+\r
+void plat_debug_cat(char *str)\r
+{\r
+}\r
+\r
  void pemu_forced_frame(int no_scale, int do_emu)\r
  {\r
-       unsigned short *pd = get_16bit_start(g_screen_ptr);\r
+       u16 *pd = screen_buffer(g_screen_ptr);\r
+       int hs = currentConfig.scaling, vs = currentConfig.vscaling;\r
  \r
+       // create centered and sw scaled (if scaling enabled) 16 bit output\r
         PicoIn.opt &= ~POPT_DIS_32C_BORDER;\r
-       PicoDrawSetCallbacks(NULL, NULL);\r
         Pico.m.dirtyPal = 1;\r
+       if (currentConfig.scaling)  currentConfig.scaling  = EOPT_SCALE_SW;\r
+       if (currentConfig.vscaling) currentConfig.vscaling = EOPT_SCALE_SW;\r
+       plat_video_set_size(320, 240);\r
  \r
+       // render a frame in 16 bit mode\r
+       render_bg = 1;\r
         emu_cmn_forced_frame(no_scale, do_emu, pd);\r
+       render_bg = 0;\r
  \r
         g_menubg_src_ptr = g_screen_ptr;\r
+       currentConfig.scaling = hs, currentConfig.vscaling = vs;\r
  }\r
  \r
-void pemu_sound_start(void)\r
+/* vertical sw scaling, 16 bit mode */\r
+static int vscale_state;\r
+\r
+static int cb_vscaling_begin(unsigned int line)\r
  {\r
-       emu_sound_start();\r
+       static int prevline = 999;\r
+\r
+       // at start of new frame?\r
+       if (line < prevline) {\r
+               // set y frame offset (see emu_change_video_mode)\r
+               u16 *dest = g_screen_ptr;\r
+               Pico.est.DrawLineDest = dest + screen_y * g_screen_ppitch;\r
+               vscale_state = 0;\r
+       }\r
+       prevline = line;\r
+       return 0;\r
  }\r
  \r
-void plat_debug_cat(char *str)\r
+static int cb_vscaling_nop(unsigned int line)\r
+{\r
+       return 0;\r
+}\r
+\r
+static int cb_vscaling_end(unsigned int line)\r
  {\r
+       u16 *dest = Pico.est.DrawLineDest;\r
+       switch (currentConfig.filter) {\r
+       case 3: v_upscale_bl4_16_17(dest, g_screen_ppitch, 320, vscale_state);\r
+               break;\r
+       case 2: v_upscale_bl2_16_17(dest, g_screen_ppitch, 320, vscale_state);\r
+               break;\r
+       case 1: v_upscale_snn_16_17(dest, g_screen_ppitch, 320, vscale_state);\r
+               break;\r
+       default: v_upscale_nn_16_17(dest, g_screen_ppitch, 320, vscale_state);\r
+               break;\r
+       }\r
+       Pico.est.DrawLineDest = dest;\r
+       return 0;\r
  }\r
  \r
-void emu_video_mode_change(int start_line, int line_count, int is_32cols)\r
+void emu_video_mode_change(int start_line, int line_count, int start_col, int col_count)\r
  {\r
+       // relative position in core fb and screen fb\r
+       out_y = start_line; out_x = start_col;\r
+       out_h = line_count; out_w = col_count;\r
+\r
+       PicoDrawSetCallbacks(NULL, NULL);\r
+       screen_x = screen_y = 0;\r
+       screen_w = 320, screen_h = 240;\r
+\r
+       switch (currentConfig.scaling) {\r
+       case EOPT_SCALE_HW:\r
+               screen_w = out_w;\r
+               break;\r
+       case EOPT_SCALE_NONE:\r
+               // center output in screen\r
+               screen_x = (screen_w - out_w)/2;\r
+               break;\r
+       }\r
+       switch (currentConfig.vscaling) {\r
+       case EOPT_SCALE_HW:\r
+               // NTSC always has 224 visible lines, anything smaller has bars\r
+               screen_h = (out_h < 224 ? 224 : out_h);\r
+               // handle vertical centering for 16 bit mode\r
+               screen_y = (screen_h - out_h) / 2;\r
+               if (is_16bit_mode())\r
+                       PicoDrawSetCallbacks(cb_vscaling_begin, cb_vscaling_nop);\r
+               break;\r
+       case EOPT_SCALE_SW:\r
+               // NTSC always has 224 visible lines, anything smaller has bars\r
+               if (out_y > 7)\r
+                       screen_y = out_y - 7;\r
+               // in 16 bit mode sw scaling is divided between core and platform\r
+               if (is_16bit_mode() && out_h < 240)\r
+                       PicoDrawSetCallbacks(cb_vscaling_begin, cb_vscaling_end);\r
+               break;\r
+       case EOPT_SCALE_NONE:\r
+               // center output in screen\r
+               screen_y = (screen_h - out_h)/2;\r
+               break;\r
+       }\r
+\r
+       plat_video_set_size(screen_w, screen_h);\r
+       plat_video_set_buffer(g_screen_ptr);\r
+\r
         // clear whole screen in all buffers\r
         if (!is_16bit_mode())\r
                 memset32(Pico.est.Draw2FB, 0xe0e0e0e0, (320+8) * (8+240+8) / 4);\r
         plat_video_clear_buffers();\r
-\r
-       out_y = start_line; out_x = (is_32cols ? 32 : 0);\r
-       out_h = line_count; out_w = (is_32cols ? 256:320);\r
  }\r
  \r
  void pemu_loop_prep(void)\r
diff --git a/platform/linux/menu.c b/platform/linux/menu.c

index b9bf4ce..236ebd9 100644 (file)
--- a/platform/linux/menu.c
+++ b/platform/linux/menu.c
@@ -1,12 +1,14 @@
-#include "../libpicofe/gp2x/plat_gp2x.h"
-
  // ------------ gfx options menu ------------
  
+static const char *men_scaling_opts[] = { "OFF", "software", "hardware", NULL };
+static const char *men_filter_opts[] = { "nearest", "smoother", "bilinear 1", "bilinear 2", NULL };
  
-const char *men_scaling_opts[] = { "OFF", "ON", NULL };
+static const char h_scale[] = "hardware scaling may not be working on some devices";
  
  #define MENU_OPTIONS_GFX \
-       // mee_enum      ("screen scaling",           MA_OPT_SCALING,        currentConfig.scaling, men_scaling_opts), \
+       mee_enum_h    ("Horizontal scaling", MA_OPT_SCALING, currentConfig.scaling, men_scaling_opts, h_scale), \
+       mee_enum_h    ("Vertical scaling",  MA_OPT_VSCALING, currentConfig.vscaling, men_scaling_opts, h_scale), \
+       mee_enum_h    ("Scaler type", MA_OPT3_FILTERING, currentConfig.filter, men_filter_opts, NULL), \
  
  #define MENU_OPTIONS_ADV
  
diff --git a/platform/pandora/plat.c b/platform/pandora/plat.c

index 0bd151c..5532543 100644 (file)
--- a/platform/pandora/plat.c
+++ b/platform/pandora/plat.c
@@ -331,17 +331,16 @@ void pnd_restore_layer_data(void)
         plat_video_flip();\r
  }\r
  \r
-void emu_video_mode_change(int start_line, int line_count, int is_32cols)\r
+void emu_video_mode_change(int start_line, int line_count, int start_col, int col_count)\r
  {\r
         int fb_w = 320, fb_h = 240, fb_left = 0, fb_right = 0, fb_top = 0, fb_bottom = 0;\r
  \r
         if (doing_bg_frame)\r
                 return;\r
  \r
-       if (is_32cols) {\r
-               fb_w = 256;\r
-               fb_left = fb_right = 32;\r
-       }\r
+       fb_w = col_count;\r
+       fb_left = start_col;\r
+       fb_right = 320 - (fb_w+fb_left);;\r
  \r
         switch (currentConfig.scaling) {\r
         case SCALE_1x1:\r
@@ -349,7 +348,7 @@ void emu_video_mode_change(int start_line, int line_count, int is_32cols)
                 g_layer_h = fb_h;\r
                 break;\r
         case SCALE_2x2_3x2:\r
-               g_layer_w = fb_w * (is_32cols ? 3 : 2);\r
+               g_layer_w = fb_w * (col_count < 320 ? 3 : 2);\r
                 g_layer_h = fb_h * 2;\r
                 break;\r
         case SCALE_2x2_2x2:\r
@@ -381,7 +380,7 @@ void emu_video_mode_change(int start_line, int line_count, int is_32cols)
                 fb_h = line_count;\r
                 break;\r
         }\r
-       g_osd_fps_x = is_32cols ? 232 : 264;\r
+       g_osd_fps_x = col_count < 320 ? 232 : 264;\r
         g_osd_y = fb_top + fb_h - 8;\r
  \r
         pnd_setup_layer(1, g_layer_x, g_layer_y, g_layer_w, g_layer_h);\r
diff --git a/platform/psp/emu.c b/platform/psp/emu.c

index 6c6c5b1..45f0623 100644 (file)
--- a/platform/psp/emu.c
+++ b/platform/psp/emu.c
@@ -674,11 +674,11 @@ void plat_update_volume(int has_changed, int is_up)
  }
  
  /* prepare for MD screen mode change */
-void emu_video_mode_change(int start_line, int line_count, int is_32cols)
+void emu_video_mode_change(int start_line, int line_count, int start_col, int col_count)
  {
-       h32_mode = is_32cols;
-       out_y = start_line; out_x = (is_32cols ? 32 : 0);
-       out_h = line_count; out_w = (is_32cols ? 256:320);
+       h32_mode = col_count < 320;
+       out_y = start_line; out_x = (h32_mode ? 32 : 0);
+       out_h = line_count; out_w = (h32_mode ? 256:320);
  
         vidResetMode();
         if (h32_mode)   // clear borders from h40 remnants
diff --git a/platform/win32/plat.c b/platform/win32/plat.c

index 70d4027..2c82ef7 100644 (file)
--- a/platform/win32/plat.c
+++ b/platform/win32/plat.c
@@ -107,10 +107,10 @@ void plat_video_toggle_renderer(int change, int is_menu)
         PicoDrawSetOutFormat(PDF_RGB555, 1);
  }
  
-void emu_video_mode_change(int start_line, int line_count, int is_32cols)
+void emu_video_mode_change(int start_line, int line_count, int start_col, int col_count)
  {
-       EmuScreenRect.left = is_32cols ? 32 : 0;
-       EmuScreenRect.right = is_32cols ? 256+32 : 320;
+       EmuScreenRect.left = start_col;
+       EmuScreenRect.right = start_col + col_count;
         EmuScreenRect.top = start_line;
         EmuScreenRect.bottom = start_line + line_count;
  
diff --git a/tools/mkoffsets.sh b/tools/mkoffsets.sh

index e45567f..615e347 100755 (executable)
--- a/tools/mkoffsets.sh
+++ b/tools/mkoffsets.sh
@@ -134,9 +134,13 @@ get_define OFS_Pico_ Pico rom                      ; echo "$line" >>$fn
  get_define OFS_Pico_ Pico romsize              ; echo "$line" >>$fn
  get_define OFS_Pico_ Pico est                  ; echo "$line" >>$fn
  
+get_define OFS_PicoIn_ PicoInterface opt       ; echo "$line" >>$fn
+get_define OFS_PicoIn_ PicoInterface filter    ; echo "$line" >>$fn
+
  get_define OFS_EST_ PicoEState DrawScanline    ; echo "$line" >>$fn
  get_define OFS_EST_ PicoEState rendstatus      ; echo "$line" >>$fn
  get_define OFS_EST_ PicoEState DrawLineDest    ; echo "$line" >>$fn
+get_define OFS_EST_ PicoEState DrawLineDestIncr        ; echo "$line" >>$fn
  get_define OFS_EST_ PicoEState HighCol         ; echo "$line" >>$fn
  get_define OFS_EST_ PicoEState HighPreSpr      ; echo "$line" >>$fn
  get_define OFS_EST_ PicoEState Pico            ; echo "$line" >>$fn
author	kub <derkub@gmail.com>
	Fri, 10 Sep 2021 16:46:05 +0000 (18:46 +0200)
committer	kub <derkub@gmail.com>
	Fri, 10 Sep 2021 16:54:05 +0000 (18:54 +0200)
Makefile		patch \| blob \| blame \| history
pico/32x/draw.c		patch \| blob \| blame \| history
pico/32x/draw_arm.S		patch \| blob \| blame \| history
pico/draw.c		patch \| blob \| blame \| history
pico/draw_arm.S		patch \| blob \| blame \| history
pico/mode4.c		patch \| blob \| blame \| history
pico/pico.h		patch \| blob \| blame \| history
pico/pico_int.h		patch \| blob \| blame \| history
platform/common/emu.c		patch \| blob \| blame \| history
platform/common/emu.h		patch \| blob \| blame \| history
platform/common/plat_sdl.c		patch \| blob \| blame \| history
platform/common/upscale.c		patch \| blob \| blame \| history
platform/common/upscale.h		patch \| blob \| blame \| history
platform/gp2x/emu.c		patch \| blob \| blame \| history
platform/libretro/libretro.c		patch \| blob \| blame \| history
platform/linux/emu.c		patch \| blob \| blame \| history
platform/linux/menu.c		patch \| blob \| blame \| history
platform/pandora/plat.c		patch \| blob \| blame \| history
platform/psp/emu.c		patch \| blob \| blame \| history
platform/win32/plat.c		patch \| blob \| blame \| history
tools/mkoffsets.sh		patch \| blob \| blame \| history