minor changes

author kub <derkub@gmail.com>

Mon, 25 Mar 2019 18:31:32 +0000 (19:31 +0100)

committer kub <derkub@gmail.com>

Tue, 30 Jul 2019 14:34:40 +0000 (16:34 +0200)
author kub <derkub@gmail.com>
Mon, 25 Mar 2019 18:31:32 +0000 (19:31 +0100)
committer kub <derkub@gmail.com>
Tue, 30 Jul 2019 14:34:40 +0000 (16:34 +0200)
diff --git a/pico/32x/32x.c b/pico/32x/32x.c

index 3ee8c2e..a15cb11 100644 (file)
--- a/pico/32x/32x.c
+++ b/pico/32x/32x.c
@@ -194,11 +194,11 @@ void PicoPower32x(void)
  
  void PicoUnload32x(void)
  {
+  sh2_finish(&msh2);
+  sh2_finish(&ssh2);
    if (Pico32xMem != NULL)
      plat_munmap(Pico32xMem, sizeof(*Pico32xMem));
    Pico32xMem = NULL;
-  sh2_finish(&msh2);
-  sh2_finish(&ssh2);
  
    PicoIn.AHW &= ~PAHW_32X;
  }
diff --git a/pico/m68kif_cyclone.s b/pico/m68kif_cyclone.s

index a0a508c..3a9621d 100644 (file)
--- a/pico/m68kif_cyclone.s
+++ b/pico/m68kif_cyclone.s
@@ -87,19 +87,19 @@ cyclone_fetch32:
      orrcc   r0, r1, r0, lsl #16
      bxcc    lr
  
-    stmfd   sp!,{r0,r1,lr}
+    stmfd   sp!,{r0,r1,r2,lr}
      mov     lr, pc
      bx      r1
      mov     r2, r0, lsl #16
-    ldmia   sp, {r0,r1}
+    ldmfd   sp!, {r0,r1}
      str     r2, [sp]
      add     r0, r0, #2
      mov     lr, pc
      bx      r1
-    ldr     r1, [sp]
+    ldmfd   sp!, {r1,lr}
      mov     r0, r0, lsl #16
      orr     r0, r1, r0, lsr #16
-    ldmfd   sp!,{r1,r2,pc}
+    bx      lr
  
  
  cyclone_write8: @ u32 a, u8 d
diff --git a/pico/pico_int.h b/pico/pico_int.h

index f6d8b37..4d599ce 100644 (file)
--- a/pico/pico_int.h
+++ b/pico/pico_int.h
@@ -241,7 +241,7 @@ extern SH2 sh2s[2];
  # define sh2_pc(sh2) (sh2)->pc\r
  #endif\r
  \r
-#define sh2_cycles_done(sh2) ((unsigned)(sh2)->cycles_timeslice - sh2_cycles_left(sh2))\r
+#define sh2_cycles_done(sh2) (unsigned)((int)(sh2)->cycles_timeslice - sh2_cycles_left(sh2))\r
  #define sh2_cycles_done_t(sh2) \\r
    (unsigned)(C_M68K_TO_SH2(sh2, (sh2)->m68krcycles_done) + sh2_cycles_done(sh2))\r
  #define sh2_cycles_done_m68k(sh2) \\r
@@ -650,6 +650,7 @@ PICO_INTERNAL void PicoFrameStart(void);
  void PicoDrawSync(int to, int blank_last_line);\r
  void BackFill(int reg7, int sh, struct PicoEState *est);\r
  void FinalizeLine555(int sh, int line, struct PicoEState *est);\r
+void PicoDrawSetOutBufMD(void *dest, int increment);\r
  extern int (*PicoScanBegin)(unsigned int num);\r
  extern int (*PicoScanEnd)(unsigned int num);\r
  #define MAX_LINE_SPRITES 29\r
diff --git a/pico/sms.c b/pico/sms.c

index 286b8bf..2800e20 100644 (file)
--- a/pico/sms.c
+++ b/pico/sms.c
@@ -46,8 +46,8 @@ static void vdp_data_write(unsigned char d)
    struct PicoVideo *pv = &Pico.video;
  
    if (pv->type == 3) {
+    if (PicoMem.cram[pv->addr & 0x1f] != d) Pico.m.dirtyPal = 1;
      PicoMem.cram[pv->addr & 0x1f] = d;
-    Pico.m.dirtyPal = 1;
    } else {
      PicoMem.vramb[pv->addr] = d;
    }
diff --git a/platform/common/helix/lib.c b/platform/common/helix/lib.c

index d7c511b..d2b0589 100644 (file)
--- a/platform/common/helix/lib.c
+++ b/platform/common/helix/lib.c
@@ -53,70 +53,5 @@ void *memmove (void *dest, const void *src, size_t n)
         return dest;
  }
  #else
-/* memcpy/memmove in C with some simple optimizations.
- * ATTN does dirty aliasing tricks with undefined behaviour by standard.
- * (this works fine with gcc, though...)
- */
-void *memcpy(void *dest, const void *src, size_t n)
-{
-       struct _16 { uint32_t a[4]; };
-       union { const void *v; char *c; uint64_t *l; struct _16 *s; }
-               ss = { src }, ds = { dest };
-       const int lm = sizeof(uint32_t)-1;
-
-       if ((((unsigned)ss.c ^ (unsigned)ds.c) & lm) == 0) {
-               /* fast copy if pointers have the same aligment */
-               while (((unsigned)ss.c & lm) && n > 0)  /* align to word */
-                       *ds.c++ = *ss.c++, n--;
-               while (n >= sizeof(struct _16)) /* copy 16 bytes blocks */
-                       *ds.s++ = *ss.s++, n -= sizeof(struct _16);
-               if (n >= sizeof(uint64_t))      /* copy leftover 8 byte block */
-                       *ds.l++ = *ss.l++, n -= sizeof(uint64_t);
-       } else {
-               /* byte copy if pointers are unaligned */
-               while (n >= 8) {                /* copy 8 byte blocks */
-                       *ds.c++ = *ss.c++, n--; *ds.c++ = *ss.c++, n--;
-                       *ds.c++ = *ss.c++, n--; *ds.c++ = *ss.c++, n--;
-                       *ds.c++ = *ss.c++, n--; *ds.c++ = *ss.c++, n--;
-                       *ds.c++ = *ss.c++, n--; *ds.c++ = *ss.c++, n--;
-               }
-       }
-       /* copy max. 8 leftover bytes */
-       while (n > 0)
-               *ds.c++ = *ss.c++, n--;
-       return dest;
-}
-
-void *memmove (void *dest, const void *src, size_t n)
-{
-       struct _16 { uint32_t a[4]; };
-       union { const void *v; char *c; uint64_t *l; struct _16 *s; }
-               ss = { src+n }, ds = { dest+n };
-       const int lm = sizeof(uint32_t)-1;
-
-       if (dest <= src || dest >= src+n)
-               return memcpy(dest, src, n);
-
-       if ((((unsigned)ss.c ^ (unsigned)ds.c) & lm) == 0) {
-               /* fast copy if pointers have the same aligment */
-               while (((unsigned)ss.c & lm) && n > 0)
-                       *--ds.c = *--ss.c, n--;
-               while (n >= sizeof(struct _16))
-                       *--ds.s = *--ss.s, n -= sizeof(struct _16);
-               if (n >= sizeof(uint64_t))
-                       *--ds.l = *--ss.l, n -= sizeof(uint64_t);
-       } else {
-               /* byte copy if pointers are unaligned */
-               while (n >= 8) {
-                       *--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--;
-                       *--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--;
-                       *--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--;
-                       *--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--;
-               }
-       }
-       /* copy max. 8 leftover bytes */
-       while (n > 0)
-               *--ds.c = *--ss.c, n--;
-       return dest;
-}
+#include "../memcpy.c"
  #endif
diff --git a/platform/common/memcpy.c b/platform/common/memcpy.c

new file mode 100644 (file)

index 0000000..b99de4a
--- /dev/null
+++ b/platform/common/memcpy.c
@@ -0,0 +1,125 @@
+/*
+ * (C) 2018 Kai-Uwe Bloem <derkub@gmail.com>
+ *
+ * 32bit ARM/MIPS optimized C implementation of memcpy and memove, designed for
+ * good performance with gcc.
+ * - if src and dest have the same alignment, 4-word copy is used.
+ * - if src and dest are unaligned to each other, still loads word data and
+ *   stores correctly shifted word data (for all but the first and last bytes
+ *   to avoid under/overstepping the src region).
+ *
+ * ATTN does dirty aliasing tricks with undefined behaviour by standard.
+ *     (however, this was needed to improve the generated code).
+ * ATTN uses struct assignment, which only works if the compiler is inlining
+ *     this (else it would probably call memcpy :-)).
+ */
+#include <stdlib.h>
+#include <stdint.h>
+
+#include <endian.h>
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define        _L_ >>
+#define        _U_ <<
+#else
+#define        _L_ <<
+#define        _U_ >>
+#endif
+
+void *memcpy(void *dest, const void *src, size_t n)
+{
+       struct _16 { uint32_t a[4]; };
+       union { const void *v; uint8_t *c; uint32_t *i; uint64_t *l; struct _16 *s; }
+               ss = { src }, ds = { dest };
+       const int lm = sizeof(uint32_t)-1;
+
+       /* align src to word */
+       while (((unsigned)ss.c & lm) && n > 0)
+               *ds.c++ = *ss.c++, n--;
+       if (((unsigned)ds.c & lm) == 0) {
+               /* fast copy if pointers have the same aligment */
+               while (n >= sizeof(struct _16)) /* copy 16 bytes blocks */
+                       *ds.s++ = *ss.s++, n -= sizeof(struct _16);
+               if (n >= sizeof(uint64_t))      /* copy leftover 8 byte block */
+                       *ds.l++ = *ss.l++, n -= sizeof(uint64_t);
+       } else if (n >= 2*sizeof(uint32_t)) {
+               /* unaligned data big enough to avoid overstepping src */
+               uint32_t v1, v2, b, s;
+               /* align dest to word */
+               while (((unsigned)ds.c & lm) && n > 0)
+                       *ds.c++ = *ss.c++, n--;
+               /* copy loop: load aligned words and store shifted words */
+               b = (unsigned)ss.c & lm, s = b*8; ss.c -= b;
+               v1 = *ss.i++, v2 = *ss.i++;
+               while (n >= 3*sizeof(uint32_t)) {
+                       *ds.i++ = (v1 _L_ s) | (v2 _U_ (32-s)); v1 = *ss.i++;
+                       *ds.i++ = (v2 _L_ s) | (v1 _U_ (32-s)); v2 = *ss.i++;
+                       n -= 2*sizeof(uint32_t);
+               }
+               /* data for one more store is already loaded */
+               if (n >= sizeof(uint32_t)) {
+                       *ds.i++ = (v1 _L_ s) | (v2 _U_ (32-s));
+                       n -= sizeof(uint32_t);
+                       ss.c += sizeof(uint32_t);
+               }
+               ss.c += b - 2*sizeof(uint32_t);
+       }
+       /* copy 0-7 leftover bytes */
+       while (n >= 4) {
+               *ds.c++ = *ss.c++, n--; *ds.c++ = *ss.c++, n--;
+               *ds.c++ = *ss.c++, n--; *ds.c++ = *ss.c++, n--;
+       }
+       while (n > 0)
+               *ds.c++ = *ss.c++, n--;
+       return dest;
+}
+
+void *memmove (void *dest, const void *src, size_t n)
+{
+       struct _16 { uint32_t a[4]; };
+       union { const void *v; uint8_t *c; uint32_t *i; uint64_t *l; struct _16 *s; }
+               ss = { src+n }, ds = { dest+n };
+       const int lm = sizeof(uint32_t)-1;
+
+       if (dest <= src || dest >= src+n)
+               return memcpy(dest, src, n);
+
+       /* align src to word */
+       while (((unsigned)ss.c & lm) && n > 0)
+               *--ds.c = *--ss.c, n--;
+       if (((unsigned)ds.c & lm) == 0) {
+               /* fast copy if pointers have the same aligment */
+               while (n >= sizeof(struct _16)) /* copy 16 byte blocks */
+                       *--ds.s = *--ss.s, n -= sizeof(struct _16);
+               if (n >= sizeof(uint64_t))      /* copy leftover 8 byte block */
+                       *--ds.l = *--ss.l, n -= sizeof(uint64_t);
+       } else if (n >= 2*sizeof(uint32_t)) {
+               /* unaligned data big enough to avoid understepping src */
+               uint32_t v1, v2, b, s;
+               /* align dest to word */
+               while (((unsigned)ds.c & lm) && n > 0)
+                       *--ds.c = *--ss.c, n--;
+               /* copy loop: load aligned words and store shifted words */
+               b = (unsigned)ss.c & lm, s = b*8; ss.c += b;
+               v1 = *--ss.i, v2 = *--ss.i;
+               while (n >= 3*sizeof(uint32_t)) {
+                       *--ds.i = (v1 _U_ s) | (v2 _L_ (32-s)); v1 = *--ss.i;
+                       *--ds.i = (v2 _U_ s) | (v1 _L_ (32-s)); v2 = *--ss.i;
+                       n -= 2*sizeof(uint32_t);
+               }
+               /* data for one more store is already loaded */
+               if (n >= sizeof(uint32_t)) {
+                       *--ds.i = (v1 _U_ s) | (v2 _L_ (32-s));
+                       n -= sizeof(uint32_t);
+                       ss.c -= sizeof(uint32_t);
+               }
+               ss.c -= b - 2*sizeof(uint32_t);
+       }
+       /* copy 0-7 leftover bytes */
+       while (n >= 4) {
+               *--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--;
+               *--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--;
+       }
+       while (n > 0)
+               *--ds.c = *--ss.c, n--;
+       return dest;
+}
diff --git a/platform/common/plat_sdl.c b/platform/common/plat_sdl.c

index 4446f72..ef99af2 100644 (file)
--- a/platform/common/plat_sdl.c
+++ b/platform/common/plat_sdl.c
@@ -89,6 +89,7 @@ static const struct in_pdata in_sdl_platform_data = {
  /* YUV stuff */
  static int yuv_ry[32], yuv_gy[32], yuv_by[32];
  static unsigned char yuv_u[32 * 2], yuv_v[32 * 2];
+static int yuv_y[256];
  
  void bgr_to_uyvy_init(void)
  {
@@ -119,6 +120,10 @@ void bgr_to_uyvy_init(void)
        v = 255;
      yuv_v[i + 32] = v;
    }
+  // valid Y range seems to be 16..235
+  for (i = 0; i < 256; i++) {
+    yuv_y[i] = 16 + 219 * i / 32;
+  }
  }
  
  void rgb565_to_uyvy(void *d, const void *s, int pixels)
@@ -143,8 +148,8 @@ void rgb565_to_uyvy(void *d, const void *s, int pixels)
      u = yu[b0 - y0];
      v = yv[r0 - y0];
      // valid Y range seems to be 16..235
-    y0 = 16 + 219 * y0 / 31;
-    y1 = 16 + 219 * y1 / 31;
+    y0 = yuv_y[y0];
+    y1 = yuv_y[y1];
  
      *dst = (y1 << 24) | (v << 16) | (y0 << 8) | u;
    }
diff --git a/platform/common/version.h b/platform/common/version.h

index f65ba1e..8b3adbf 100644 (file)
--- a/platform/common/version.h
+++ b/platform/common/version.h
@@ -1 +1 @@
-#define VERSION "1.93"\r
+#define VERSION "1.93+"\r
diff --git a/tools/mkoffsets.sh b/tools/mkoffsets.sh

index 60088f2..90e6586 100755 (executable)
--- a/tools/mkoffsets.sh
+++ b/tools/mkoffsets.sh
@@ -1,16 +1,21 @@
-# usage: mkoffsets <output dir>
  # automatically compute structure offsets for gcc targets in ELF format
+# (C) 2018 Kai-Uwe Bloem. This work is placed in the public domain.
+#
+# usage: mkoffsets <output dir>
  
  CC=${CC:-gcc}
  
  # endianess of target (automagically determined below)
  ENDIAN=
  
+# compile with target C compiler and extract value from .rodata section
  compile_rodata ()
  {
         $CC $CFLAGS -I .. -c /tmp/getoffs.c -o /tmp/getoffs.o || exit 1
+       # find the name of the .rodata section (in case -fdata-sections is used)
         rosect=$(readelf -S /tmp/getoffs.o | grep '\.rodata' |
                                                 sed 's/^[^.]*././;s/ .*//')
+       # read out .rodata section as hex string (should be only 4 or 8 bytes)
         objcopy --dump-section $rosect=/tmp/getoffs.ro /tmp/getoffs.o || exit 1
         ro=$(xxd -ps /tmp/getoffs.ro)
         if [ "$ENDIAN" = "le" ]; then
@@ -22,9 +27,11 @@ compile_rodata ()
         else
                 hex=$ro
         fi
+       # extract decimal value from hex string
         rodata=$(printf "%d" 0x$hex)
  }
  
+# determine member offset and create #define
  get_define () # prefix struct member member...
  {
         prefix=$1; shift
author	kub <derkub@gmail.com>
	Mon, 25 Mar 2019 18:31:32 +0000 (19:31 +0100)
committer	kub <derkub@gmail.com>
	Tue, 30 Jul 2019 14:34:40 +0000 (16:34 +0200)
pico/32x/32x.c		patch \| blob \| blame \| history
pico/m68kif_cyclone.s		patch \| blob \| blame \| history
pico/pico_int.h		patch \| blob \| blame \| history
pico/sms.c		patch \| blob \| blame \| history
platform/common/helix/lib.c		patch \| blob \| blame \| history
platform/common/memcpy.c	[new file with mode: 0644]	patch \| blob
platform/common/plat_sdl.c		patch \| blob \| blame \| history
platform/common/version.h		patch \| blob \| blame \| history
tools/mkoffsets.sh		patch \| blob \| blame \| history