pandora: fix readme and pxml version
[picodrive.git] / pico / 32x / draw.c
index f802150..4426f3a 100644 (file)
@@ -1,33 +1,59 @@
 /*
  * PicoDrive
  * (C) notaz, 2009,2010
+ * (C) irixxxx, 2019-2024
  *
  * This work is licensed under the terms of MAME license.
  * See COPYING file in the top-level directory.
  */
 #include "../pico_int.h"
 
+// NB: 32X officially doesn't support H32 mode. However, it does work since the
+// cartridge slot carries the EDCLK signal which is always H40 clock and is used
+// as video clock by the 32X. The H32 MD image is overlaid with the 320 px 32X
+// image which has the same on-screen width. How the /YS signal on the cartridge
+// slot (signalling the display of background color) is processed in this case
+// is however unclear and might lead to glitches due to race conditions by the
+// different video clocks for H32 and H40.
+// NB: there is an offset of 4 pixels between MD and 32X layers in H32 mode.
+#define H32_OFFSET     4
+
+// BGR555 to native conversion
+#if defined(USE_BGR555)
+#define PXCONV(t)   ((t)&(mr|mg|mb|mp))
+#define PXPRIO      0x8000  // prio in MSB
+#elif defined(USE_BGR565)
+#define PXCONV(t)   (((t)&mr)  | (((t)&(mg|mb)) << 1) | (((t)&mp) >> 10))
+#define PXPRIO      0x0020  // prio in LS green bit
+#else // RGB565 
+#define PXCONV(t)   ((((t)&mr) << 11) | (((t)&mg) << 1) | (((t)&(mp|mb)) >> 10))
+#define PXPRIO      0x0020  // prio in LS green bit
+#endif
+
 int (*PicoScan32xBegin)(unsigned int num);
 int (*PicoScan32xEnd)(unsigned int num);
 int Pico32xDrawMode;
 
+void *DrawLineDestBase32x;
+int DrawLineDestIncrement32x;
+
 static void convert_pal555(int invert_prio)
 {
-  unsigned int *ps = (void *)Pico32xMem->pal;
-  unsigned int *pd = (void *)Pico32xMem->pal_native;
-  unsigned int m1 = 0x001f001f;
-  unsigned int m2 = 0x03e003e0;
-  unsigned int m3 = 0xfc00fc00;
-  unsigned int inv = 0;
+  u32 *ps = (void *)Pico32xMem->pal;
+  u32 *pd = (void *)Pico32xMem->pal_native;
+  u32 mr = 0x001f001f; // masks for red, green, blue, prio
+  u32 mg = 0x03e003e0;
+  u32 mb = 0x7c007c00;
+  u32 mp = 0x80008000;
+  u32 inv = 0;
   int i;
 
   if (invert_prio)
-    inv = 0x00200020;
+    inv = 0x80008000;
 
-  // place prio to LS green bit
   for (i = 0x100/2; i > 0; i--, ps++, pd++) {
-    unsigned int t = *ps;
-    *pd = (((t & m1) << 11) | ((t & m2) << 1) | ((t & m3) >> 10)) ^ inv;
+    u32 t = *ps ^ inv;
+    *pd = PXCONV(t);
   }
 
   Pico32x.dirty_pal = 0;
@@ -36,19 +62,25 @@ static void convert_pal555(int invert_prio)
 // direct color mode
 #define do_line_dc(pd, p32x, pmd, inv, pmd_draw_code)             \
 {                                                                 \
-  const unsigned int m1 = 0x001f;                                 \
-  const unsigned int m2 = 0x03e0;                                 \
-  const unsigned int m3 = 0x7c00;                                 \
-  int i;                                                          \
+  const u16 mr = 0x001f;                                          \
+  const u16 mg = 0x03e0;                                          \
+  const u16 mb = 0x7c00;                                          \
+  const u16 mp = 0x0000;                                          \
+  unsigned short t;                                               \
+  int i = 320;                                                    \
                                                                   \
-  for (i = 320; i > 0; i--, pd++, p32x++, pmd++) {                \
-    unsigned short t = *p32x;                                     \
-    if ((*pmd & 0x3f) != mdbg && !((t ^ inv) & 0x8000)) {         \
-      pmd_draw_code;                                              \
-      continue;                                                   \
+  while (i > 0) {                                                 \
+    for (; i > 0 && (*pmd & 0x3f) == mdbg; pd++, pmd++, i--) {    \
+      t = *p32x++;                                                \
+      *pd = PXCONV(t);                                            \
+    }                                                             \
+    for (; i > 0 && (*pmd & 0x3f) != mdbg; pd++, pmd++, i--) {    \
+      t = *p32x++ ^ inv;                                          \
+      if (t & 0x8000)                                             \
+        *pd = PXCONV(t);                                          \
+      else                                                        \
+        pmd_draw_code;                                            \
     }                                                             \
-                                                                  \
-    *pd = ((t & m1) << 11) | ((t & m2) << 1) | ((t & m3) >> 10);  \
   }                                                               \
 }
 
@@ -56,15 +88,21 @@ static void convert_pal555(int invert_prio)
 #define do_line_pp(pd, p32x, pmd, pmd_draw_code)                  \
 {                                                                 \
   unsigned short t;                                               \
-  int i;                                                          \
-  for (i = 320; i > 0; i--, pd++, p32x++, pmd++) {                \
-    t = pal[*(unsigned char *)((long)p32x ^ 1)];                  \
-    if ((t & 0x20) || (*pmd & 0x3f) == mdbg)                      \
+  int i = 320;                                                    \
+  while (i > 0) {                                                 \
+    for (; i > 0 && (*pmd & 0x3f) == mdbg; pd++, pmd++, i--) {    \
+      t = pal[*(unsigned char *)(MEM_BE2((uintptr_t)(p32x++)))];  \
       *pd = t;                                                    \
-    else                                                          \
-      pmd_draw_code;                                              \
+    }                                                             \
+    for (; i > 0 && (*pmd & 0x3f) != mdbg; pd++, pmd++, i--) {    \
+      t = pal[*(unsigned char *)(MEM_BE2((uintptr_t)(p32x++)))];  \
+      if (t & PXPRIO)                                             \
+        *pd = t;                                                  \
+      else                                                        \
+        pmd_draw_code;                                            \
+    }                                                             \
   }                                                               \
-} 
+}
 
 // run length mode
 #define do_line_rl(pd, p32x, pmd, pmd_draw_code)                  \
@@ -74,7 +112,7 @@ static void convert_pal555(int invert_prio)
   for (i = 320; i > 0; p32x++) {                                  \
     t = pal[*p32x & 0xff];                                        \
     for (len = (*p32x >> 8) + 1; len > 0 && i > 0; len--, i--, pd++, pmd++) { \
-      if ((*pmd & 0x3f) == mdbg || (t & 0x20))                    \
+      if ((*pmd & 0x3f) == mdbg || (t & PXPRIO))                  \
         *pd = t;                                                  \
       else                                                        \
         pmd_draw_code;                                            \
@@ -82,20 +120,23 @@ static void convert_pal555(int invert_prio)
   }                                                               \
 }
 
+#define MD_LAYER_CODE_H32 \
+  *dst = dst[H32_OFFSET]
+
 // this is almost never used (Wiz and menu bg gen only)
 void FinalizeLine32xRGB555(int sh, int line, struct PicoEState *est)
 {
-  unsigned short *pd = est->DrawLineDest;
+  unsigned short *dst = est->DrawLineDest;
   unsigned short *pal = Pico32xMem->pal_native;
   unsigned char  *pmd = est->HighCol + 8;
+  unsigned short *palmd = est->HighPal;
   unsigned short *dram, *p32x;
   unsigned char   mdbg;
+  int h32 = !(Pico.video.reg[12] & 0x1);
 
   FinalizeLine555(sh, line, est);
 
   if ((Pico32x.vdp_regs[0] & P32XV_Mx) == 0 || // 32x blanking
-      // XXX: how is 32col mode hadled by real hardware?
-      !(Pico.video.reg[12] & 1) || // 32col mode
       (Pico.video.debug_p & PVD_KILL_32X))
   {
     return;
@@ -104,10 +145,14 @@ void FinalizeLine32xRGB555(int sh, int line, struct PicoEState *est)
   dram = (void *)Pico32xMem->dram[Pico32x.vdp_regs[0x0a/2] & P32XV_FS];
   p32x = dram + dram[line];
   mdbg = Pico.video.reg[7] & 0x3f;
+  if (h32) pmd += H32_OFFSET;
 
   if ((Pico32x.vdp_regs[0] & P32XV_Mx) == 2) { // Direct Color Mode
     int inv_bit = (Pico32x.vdp_regs[0] & P32XV_PRI) ? 0x8000 : 0;
-    do_line_dc(pd, p32x, pmd, inv_bit,);
+    if (h32) {
+      do_line_dc(dst, p32x, pmd, inv_bit, MD_LAYER_CODE_H32);
+    } else
+      do_line_dc(dst, p32x, pmd, inv_bit,);
     return;
   }
 
@@ -118,10 +163,16 @@ void FinalizeLine32xRGB555(int sh, int line, struct PicoEState *est)
     unsigned char *p32xb = (void *)p32x;
     if (Pico32x.vdp_regs[2 / 2] & P32XV_SFT)
       p32xb++;
-    do_line_pp(pd, p32xb, pmd,);
+    if (h32) {
+      do_line_pp(dst, p32xb, pmd, MD_LAYER_CODE_H32);
+    } else
+      do_line_pp(dst, p32xb, pmd,);
   }
   else { // Run Length Mode
-    do_line_rl(pd, p32x, pmd,);
+    if (h32) {
+      do_line_rl(dst, p32x, pmd, MD_LAYER_CODE_H32);
+    } else
+      do_line_rl(dst, p32x, pmd,);
   }
 }
 
@@ -134,66 +185,73 @@ void FinalizeLine32xRGB555(int sh, int line, struct PicoEState *est)
 
 #define PICOSCAN_POST \
   PicoScan32xEnd(l + (lines_sft_offs & 0xff)); \
+  Pico.est.DrawLineDest = (char *)Pico.est.DrawLineDest + DrawLineDestIncrement32x; \
 
 #define make_do_loop(name, pre_code, post_code, md_code)        \
 /* Direct Color Mode */                                         \
 static void do_loop_dc##name(unsigned short *dst,               \
-    unsigned short *dram, int lines_sft_offs, int mdbg)         \
+    unsigned short *dram, unsigned lines_sft_offs, int mdbg)    \
 {                                                               \
   int inv_bit = (Pico32x.vdp_regs[0] & P32XV_PRI) ? 0x8000 : 0; \
   unsigned char  *pmd = Pico.est.Draw2FB +                      \
                           328 * (lines_sft_offs & 0xff) + 8;    \
   unsigned short *palmd = Pico.est.HighPal;                     \
   unsigned short *p32x;                                         \
-  int lines = lines_sft_offs >> 16;                             \
+  int lines = (lines_sft_offs >> 16) & 0xff;                    \
   int l;                                                        \
+  if (lines_sft_offs & (2<<8)) pmd += H32_OFFSET;               \
   (void)palmd;                                                  \
   for (l = 0; l < lines; l++, pmd += 8) {                       \
     pre_code;                                                   \
-    p32x = dram + dram[l];                                      \
+    p32x = dram + dram[l + (lines_sft_offs >> 24)];             \
     do_line_dc(dst, p32x, pmd, inv_bit, md_code);               \
     post_code;                                                  \
+    dst += DrawLineDestIncrement32x/2 - 320;                    \
   }                                                             \
 }                                                               \
                                                                 \
 /* Packed Pixel Mode */                                         \
 static void do_loop_pp##name(unsigned short *dst,               \
-    unsigned short *dram, int lines_sft_offs, int mdbg)         \
+    unsigned short *dram, unsigned lines_sft_offs, int mdbg)    \
 {                                                               \
   unsigned short *pal = Pico32xMem->pal_native;                 \
   unsigned char  *pmd = Pico.est.Draw2FB +                      \
                           328 * (lines_sft_offs & 0xff) + 8;    \
   unsigned short *palmd = Pico.est.HighPal;                     \
   unsigned char  *p32x;                                         \
-  int lines = lines_sft_offs >> 16;                             \
+  int lines = (lines_sft_offs >> 16) & 0xff;                    \
   int l;                                                        \
+  if (lines_sft_offs & (2<<8)) pmd += H32_OFFSET;               \
   (void)palmd;                                                  \
   for (l = 0; l < lines; l++, pmd += 8) {                       \
     pre_code;                                                   \
-    p32x = (void *)(dram + dram[l]);                            \
+    p32x = (void *)(dram + dram[l + (lines_sft_offs >> 24)]);   \
     p32x += (lines_sft_offs >> 8) & 1;                          \
     do_line_pp(dst, p32x, pmd, md_code);                        \
     post_code;                                                  \
+    dst += DrawLineDestIncrement32x/2 - 320;                    \
   }                                                             \
 }                                                               \
                                                                 \
 /* Run Length Mode */                                           \
 static void do_loop_rl##name(unsigned short *dst,               \
-    unsigned short *dram, int lines_sft_offs, int mdbg)         \
+    unsigned short *dram, unsigned lines_sft_offs, int mdbg)    \
 {                                                               \
   unsigned short *pal = Pico32xMem->pal_native;                 \
   unsigned char  *pmd = Pico.est.Draw2FB +                      \
                           328 * (lines_sft_offs & 0xff) + 8;    \
   unsigned short *palmd = Pico.est.HighPal;                     \
   unsigned short *p32x;                                         \
-  int lines = lines_sft_offs >> 16;                             \
+  int lines = (lines_sft_offs >> 16) & 0xff;                    \
   int l;                                                        \
+  if (lines_sft_offs & (2<<8)) pmd += H32_OFFSET;               \
   (void)palmd;                                                  \
   for (l = 0; l < lines; l++, pmd += 8) {                       \
     pre_code;                                                   \
-    p32x = dram + dram[l];                                      \
+    p32x = dram + dram[l + (lines_sft_offs >> 24)];             \
     do_line_rl(dst, p32x, pmd, md_code);                        \
     post_code;                                                  \
+    dst += DrawLineDestIncrement32x/2 - 320;                    \
   }                                                             \
 }
 
@@ -201,24 +259,26 @@ static void do_loop_rl##name(unsigned short *dst,               \
 #undef make_do_loop
 #define make_do_loop(name, pre_code, post_code, md_code) \
 extern void do_loop_dc##name(unsigned short *dst,        \
-    unsigned short *dram, int lines_offs, int mdbg);     \
+    unsigned short *dram, unsigned lines_offs, int mdbg);\
 extern void do_loop_pp##name(unsigned short *dst,        \
-    unsigned short *dram, int lines_offs, int mdbg);     \
+    unsigned short *dram, unsigned lines_offs, int mdbg);\
 extern void do_loop_rl##name(unsigned short *dst,        \
-    unsigned short *dram, int lines_offs, int mdbg);
+    unsigned short *dram, unsigned lines_offs, int mdbg);
 #endif
 
 make_do_loop(,,,)
 make_do_loop(_md, , , MD_LAYER_CODE)
+make_do_loop(_h32, , , MD_LAYER_CODE_H32)
 make_do_loop(_scan, PICOSCAN_PRE, PICOSCAN_POST, )
+make_do_loop(_scan_h32, PICOSCAN_PRE, PICOSCAN_POST, MD_LAYER_CODE_H32)
 make_do_loop(_scan_md, PICOSCAN_PRE, PICOSCAN_POST, MD_LAYER_CODE)
 
-typedef void (*do_loop_func)(unsigned short *dst, unsigned short *dram, int lines, int mdbg);
-enum { DO_LOOP, DO_LOOP_MD, DO_LOOP_SCAN, DO_LOOP_MD_SCAN };
+typedef void (*do_loop_func)(unsigned short *dst, unsigned short *dram, unsigned lines, int mdbg);
+enum { DO_LOOP, DO_LOOP_H32, DO_LOOP_MD, DO_LOOP_SCAN, DO_LOOP_H32_SCAN, DO_LOOP_MD_SCAN };
 
-static const do_loop_func do_loop_dc_f[] = { do_loop_dc, do_loop_dc_md, do_loop_dc_scan, do_loop_dc_scan_md };
-static const do_loop_func do_loop_pp_f[] = { do_loop_pp, do_loop_pp_md, do_loop_pp_scan, do_loop_pp_scan_md };
-static const do_loop_func do_loop_rl_f[] = { do_loop_rl, do_loop_rl_md, do_loop_rl_scan, do_loop_rl_scan_md };
+static const do_loop_func do_loop_dc_f[] = { do_loop_dc, do_loop_dc_h32, do_loop_dc_md, do_loop_dc_scan, do_loop_dc_scan_h32, do_loop_dc_scan_md };
+static const do_loop_func do_loop_pp_f[] = { do_loop_pp, do_loop_pp_h32, do_loop_pp_md, do_loop_pp_scan, do_loop_pp_scan_h32, do_loop_pp_scan_md };
+static const do_loop_func do_loop_rl_f[] = { do_loop_rl, do_loop_rl_h32, do_loop_rl_md, do_loop_rl_scan, do_loop_rl_scan_h32, do_loop_rl_scan_md };
 
 void PicoDraw32xLayer(int offs, int lines, int md_bg)
 {
@@ -228,13 +288,14 @@ void PicoDraw32xLayer(int offs, int lines, int md_bg)
   int lines_sft_offs;
   int which_func;
 
-  Pico.est.DrawLineDest = (char *)DrawLineDestBase + offs * DrawLineDestIncrement;
+  offs += Pico32x.sync_line;
+
+  Pico.est.DrawLineDest = (char *)DrawLineDestBase32x + offs * DrawLineDestIncrement32x;
+  Pico.est.DrawLineDestIncr = DrawLineDestIncrement32x;
   dram = Pico32xMem->dram[Pico32x.vdp_regs[0x0a/2] & P32XV_FS];
 
-  if (Pico32xDrawMode == PDM32X_BOTH) {
-    if (Pico.m.dirtyPal)
-      PicoDrawUpdateHighPal();
-  }
+  if (Pico32xDrawMode == PDM32X_BOTH)
+    PicoDrawUpdateHighPal();
 
   if ((Pico32x.vdp_regs[0] & P32XV_Mx) == 2)
   {
@@ -258,13 +319,20 @@ void PicoDraw32xLayer(int offs, int lines, int md_bg)
   }
 
 do_it:
+  // In 8bit modes MD+32X layers are merged together in 32X rendering, while in
+  // 16bit mode the MD layer is directly created in the target buffer and the
+  // 32X layer is overlaid onto that.
   if (Pico32xDrawMode == PDM32X_BOTH)
     which_func = have_scan ? DO_LOOP_MD_SCAN : DO_LOOP_MD;
+  else if (!(Pico.video.reg[12] & 1)) // H32, mind 4 px offset
+    which_func = have_scan ? DO_LOOP_H32_SCAN : DO_LOOP_H32;
   else
     which_func = have_scan ? DO_LOOP_SCAN : DO_LOOP;
-  lines_sft_offs = (lines << 16) | offs;
+  lines_sft_offs = (Pico32x.sync_line << 24) | (lines << 16) | offs;
   if (Pico32x.vdp_regs[2 / 2] & P32XV_SFT)
     lines_sft_offs |= 1 << 8;
+  if (!(Pico.video.reg[12] & 1)) // offset flag for H32
+    lines_sft_offs |= 2 << 8;
 
   do_loop[which_func](Pico.est.DrawLineDest, dram, lines_sft_offs, md_bg);
 }
@@ -273,26 +341,21 @@ do_it:
 void PicoDraw32xLayerMdOnly(int offs, int lines)
 {
   int have_scan = PicoScan32xBegin != NULL && PicoScan32xEnd != NULL;
-  unsigned short *dst = (void *)((char *)DrawLineDestBase + offs * DrawLineDestIncrement);
+  unsigned short *dst = (void *)((char *)DrawLineDestBase32x + offs * DrawLineDestIncrement32x);
   unsigned char  *pmd = Pico.est.Draw2FB + 328 * offs + 8;
   unsigned short *pal = Pico.est.HighPal;
-  int poffs = 0, plen = 320;
+  int plen = 320;
   int l, p;
 
-  if (!(Pico.video.reg[12] & 1)) {
-    // 32col mode
-    poffs = 32;
-    plen = 256;
-  }
+  PicoDrawUpdateHighPal();
 
-  if (Pico.m.dirtyPal)
-    PicoDrawUpdateHighPal();
+  offs += Pico32x.sync_line;
+  dst += Pico32x.sync_line * DrawLineDestIncrement32x;
 
-  dst += poffs;
   for (l = 0; l < lines; l++) {
     if (have_scan) {
       PicoScan32xBegin(l + offs);
-      dst = Pico.est.DrawLineDest + poffs;
+      dst = (unsigned short *)Pico.est.DrawLineDest;
     }
     for (p = 0; p < plen; p += 4) {
       dst[p + 0] = pal[*pmd++];
@@ -300,7 +363,7 @@ void PicoDraw32xLayerMdOnly(int offs, int lines)
       dst[p + 2] = pal[*pmd++];
       dst[p + 3] = pal[*pmd++];
     }
-    dst = (void *)((char *)dst + DrawLineDestIncrement);
+    dst = Pico.est.DrawLineDest = (char *)dst + DrawLineDestIncrement32x;
     pmd += 328 - plen;
     if (have_scan)
       PicoScan32xEnd(l + offs);
@@ -309,21 +372,32 @@ void PicoDraw32xLayerMdOnly(int offs, int lines)
 
 void PicoDrawSetOutFormat32x(pdso_t which, int use_32x_line_mode)
 {
-#ifdef _ASM_32X_DRAW
-  extern void *Pico32xNativePal;
-  Pico32xNativePal = Pico32xMem->pal_native;
-#endif
+  if (which == PDF_RGB555) {
+    // CLUT pixels needed as well, for layer priority
+    PicoDrawSetInternalBuf(Pico.est.Draw2FB, 328);
+    PicoDrawSetOutBufMD(NULL, 0);
+  } else {
+    // store CLUT pixels, same layout as alt renderer
+    PicoDrawSetInternalBuf(NULL, 0);
+    PicoDrawSetOutBufMD(Pico.est.Draw2FB, 328);
+  }
 
-  if (which == PDF_RGB555 && use_32x_line_mode) {
+  if (use_32x_line_mode)
     // we'll draw via FinalizeLine32xRGB555 (rare)
-    PicoDrawSetInternalBuf(NULL, 0);
     Pico32xDrawMode = PDM32X_OFF;
-    return;
-  }
+  else
+    // in RGB555 mode the 32x layer is overlaid on the MD layer, in the other
+    // modes 32x and MD layer are merged together by the 32x renderer
+    Pico32xDrawMode = (which == PDF_RGB555) ? PDM32X_32X_ONLY : PDM32X_BOTH;
+}
 
-  // use the same layout as alt renderer
-  PicoDrawSetInternalBuf(Pico.est.Draw2FB, 328);
-  Pico32xDrawMode = (which == PDF_RGB555) ? PDM32X_32X_ONLY : PDM32X_BOTH;
+void PicoDrawSetOutBuf32X(void *dest, int increment)
+{
+  DrawLineDestBase32x = dest;
+  DrawLineDestIncrement32x = increment;
+  // in RGB555 mode this buffer is also used by the MD renderer
+  if (Pico32xDrawMode != PDM32X_BOTH)
+    PicoDrawSetOutBufMD(DrawLineDestBase32x, DrawLineDestIncrement32x);
 }
 
 // vim:shiftwidth=2:ts=2:expandtab