add a libcrypt warning

[pcsx_rearmed.git] / deps / libchdr / include / dr_libs / dr_flac.h
diff --git a/deps/libchdr/include/dr_libs/dr_flac.h b/deps/libchdr/include/dr_libs/dr_flac.h

index 2dcddb5..0c43eed 100644 (file)
--- a/deps/libchdr/include/dr_libs/dr_flac.h
+++ b/deps/libchdr/include/dr_libs/dr_flac.h
@@ -1,6 +1,6 @@
  /*
  FLAC audio decoder. Choice of public domain or MIT-0. See license statements at the end of this file.
-dr_flac - v0.12.28 - 2021-02-21
+dr_flac - v0.12.39 - 2022-09-17
  
  David Reid - mackron@gmail.com
  
@@ -210,6 +210,9 @@ Build Options
  #define DR_FLAC_NO_SIMD
    Disables SIMD optimizations (SSE on x86/x64 architectures, NEON on ARM architectures). Use this if you are having compatibility issues with your compiler.
  
+#define DR_FLAC_NO_WCHAR
+  Disables all functions ending with `_w`. Use this if your compiler does not provide wchar.h. Not required if DR_FLAC_NO_STDIO is also defined.
+
  
  
  Notes
@@ -232,7 +235,7 @@ extern "C" {
  
  #define DRFLAC_VERSION_MAJOR     0
  #define DRFLAC_VERSION_MINOR     12
-#define DRFLAC_VERSION_REVISION  28
+#define DRFLAC_VERSION_REVISION  39
  #define DRFLAC_VERSION_STRING    DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MAJOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MINOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_REVISION)
  
  #include <stddef.h> /* For size_t. */
@@ -244,7 +247,7 @@ typedef   signed short          drflac_int16;
  typedef unsigned short          drflac_uint16;
  typedef   signed int            drflac_int32;
  typedef unsigned int            drflac_uint32;
-#if defined(_MSC_VER)
+#if defined(_MSC_VER) && !defined(__clang__)
      typedef   signed __int64    drflac_int64;
      typedef unsigned __int64    drflac_uint64;
  #else
@@ -261,7 +264,7 @@ typedef unsigned int            drflac_uint32;
          #pragma GCC diagnostic pop
      #endif
  #endif
-#if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__)) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(__powerpc64__)
+#if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__)) || defined(_M_X64) || defined(__ia64) || defined(_M_IA64) || defined(__aarch64__) || defined(_M_ARM64) || defined(__powerpc64__)
      typedef drflac_uint64       drflac_uintptr;
  #else
      typedef drflac_uint32       drflac_uintptr;
@@ -383,15 +386,13 @@ typedef enum
      drflac_seek_origin_current
  } drflac_seek_origin;
  
-/* Packing is important on this structure because we map this directly to the raw data within the SEEKTABLE metadata block. */
-#pragma pack(2)
+/* The order of members in this structure is important because we map this directly to the raw data within the SEEKTABLE metadata block. */
  typedef struct
  {
      drflac_uint64 firstPCMFrame;
      drflac_uint64 flacFrameOffset;   /* The offset from the first byte of the header of the first frame. */
      drflac_uint16 pcmFrameCount;
  } drflac_seekpoint;
-#pragma pack()
  
  typedef struct
  {
@@ -1280,15 +1281,13 @@ typedef struct
      const char* pRunningData;
  } drflac_cuesheet_track_iterator;
  
-/* Packing is important on this structure because we map this directly to the raw data within the CUESHEET metadata block. */
-#pragma pack(4)
+/* The order of members here is important because we map this directly to the raw data within the CUESHEET metadata block. */
  typedef struct
  {
      drflac_uint64 offset;
      drflac_uint8 index;
      drflac_uint8 reserved[3];
  } drflac_cuesheet_track_index;
-#pragma pack()
  
  typedef struct
  {
@@ -1363,9 +1362,15 @@ DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterat
      I am using "__inline__" only when we're compiling in strict ANSI mode.
      */
      #if defined(__STRICT_ANSI__)
-        #define DRFLAC_INLINE __inline__ __attribute__((always_inline))
+        #define DRFLAC_GNUC_INLINE_HINT __inline__
      #else
-        #define DRFLAC_INLINE inline __attribute__((always_inline))
+        #define DRFLAC_GNUC_INLINE_HINT inline
+    #endif
+
+    #if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 2)) || defined(__clang__)
+        #define DRFLAC_INLINE DRFLAC_GNUC_INLINE_HINT __attribute__((always_inline))
+    #else
+        #define DRFLAC_INLINE DRFLAC_GNUC_INLINE_HINT
      #endif
  #elif defined(__WATCOMC__)
      #define DRFLAC_INLINE __inline
@@ -1378,7 +1383,7 @@ DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterat
      #define DRFLAC_X64
  #elif defined(__i386) || defined(_M_IX86)
      #define DRFLAC_X86
-#elif defined(__arm__) || defined(_M_ARM) || defined(_M_ARM64)
+#elif defined(__arm__) || defined(_M_ARM) || defined(__arm64) || defined(__arm64__) || defined(__aarch64__) || defined(_M_ARM64)
      #define DRFLAC_ARM
  #endif
  
@@ -1431,16 +1436,6 @@ Unfortuantely dr_flac depends on this for a few things so we're just going to di
      #if defined(DRFLAC_ARM)
          #if !defined(DRFLAC_NO_NEON) && (defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64))
              #define DRFLAC_SUPPORT_NEON
-        #endif
-
-        /* Fall back to looking for the #include file. */
-        #if !defined(__GNUC__) && !defined(__clang__) && defined(__has_include)
-            #if !defined(DRFLAC_SUPPORT_NEON) && !defined(DRFLAC_NO_NEON) && __has_include(<arm_neon.h>)
-                #define DRFLAC_SUPPORT_NEON
-            #endif
-        #endif
-
-        #if defined(DRFLAC_SUPPORT_NEON)
              #include <arm_neon.h>
          #endif
      #endif
@@ -1519,9 +1514,7 @@ static DRFLAC_INLINE drflac_bool32 drflac_has_sse41(void)
  {
  #if defined(DRFLAC_SUPPORT_SSE41)
      #if (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(DRFLAC_NO_SSE41)
-        #if defined(DRFLAC_X64)
-            return DRFLAC_TRUE;    /* 64-bit targets always support SSE4.1. */
-        #elif (defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(__SSE4_1__)
+        #if defined(__SSE4_1__) || defined(__AVX__)
              return DRFLAC_TRUE;    /* If the compiler is allowed to freely generate SSE41 code we can assume support. */
          #else
              #if defined(DRFLAC_NO_CPUID)
@@ -1577,6 +1570,30 @@ static DRFLAC_INLINE drflac_bool32 drflac_has_sse41(void)
      #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
          #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
      #endif
+#elif defined(__WATCOMC__) && defined(__386__)
+    #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
+    #define DRFLAC_HAS_BYTESWAP32_INTRINSIC
+    #define DRFLAC_HAS_BYTESWAP64_INTRINSIC
+    extern __inline drflac_uint16 _watcom_bswap16(drflac_uint16);
+    extern __inline drflac_uint32 _watcom_bswap32(drflac_uint32);
+    extern __inline drflac_uint64 _watcom_bswap64(drflac_uint64);
+#pragma aux _watcom_bswap16 = \
+    "xchg al, ah" \
+    parm  [ax]    \
+    value [ax]    \
+    modify nomemory;
+#pragma aux _watcom_bswap32 = \
+    "bswap eax" \
+    parm  [eax] \
+    value [eax] \
+    modify nomemory;
+#pragma aux _watcom_bswap64 = \
+    "bswap eax"     \
+    "bswap edx"     \
+    "xchg eax,edx"  \
+    parm [eax edx]  \
+    value [eax edx] \
+    modify nomemory;
  #endif
  
  
@@ -1677,6 +1694,10 @@ typedef drflac_int32 drflac_result;
  #define DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE            9
  #define DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE              10
  
+#define DRFLAC_SEEKPOINT_SIZE_IN_BYTES                  18
+#define DRFLAC_CUESHEET_TRACK_SIZE_IN_BYTES             36
+#define DRFLAC_CUESHEET_TRACK_INDEX_SIZE_IN_BYTES       12
+
  #define drflac_align(x, a)                              ((((x) + (a) - 1) / (a)) * (a))
  
  
@@ -1800,6 +1821,8 @@ static DRFLAC_INLINE drflac_uint16 drflac__swap_endian_uint16(drflac_uint16 n)
          return _byteswap_ushort(n);
      #elif defined(__GNUC__) || defined(__clang__)
          return __builtin_bswap16(n);
+    #elif defined(__WATCOMC__) && defined(__386__)
+        return _watcom_bswap16(n);
      #else
          #error "This compiler does not support the byte swap intrinsic."
      #endif
@@ -1829,6 +1852,8 @@ static DRFLAC_INLINE drflac_uint32 drflac__swap_endian_uint32(drflac_uint32 n)
          #else
              return __builtin_bswap32(n);
          #endif
+    #elif defined(__WATCOMC__) && defined(__386__)
+        return _watcom_bswap32(n);
      #else
          #error "This compiler does not support the byte swap intrinsic."
      #endif
@@ -1847,6 +1872,8 @@ static DRFLAC_INLINE drflac_uint64 drflac__swap_endian_uint64(drflac_uint64 n)
          return _byteswap_uint64(n);
      #elif defined(__GNUC__) || defined(__clang__)
          return __builtin_bswap64(n);
+    #elif defined(__WATCOMC__) && defined(__386__)
+        return _watcom_bswap64(n);
      #else
          #error "This compiler does not support the byte swap intrinsic."
      #endif
@@ -1882,6 +1909,12 @@ static DRFLAC_INLINE drflac_uint32 drflac__be2host_32(drflac_uint32 n)
      return n;
  }
  
+static DRFLAC_INLINE drflac_uint32 drflac__be2host_32_ptr_unaligned(const void* pData)
+{
+    const drflac_uint8* pNum = (drflac_uint8*)pData;
+    return *(pNum) << 24 | *(pNum+1) << 16 | *(pNum+2) << 8 | *(pNum+3);
+}
+
  static DRFLAC_INLINE drflac_uint64 drflac__be2host_64(drflac_uint64 n)
  {
      if (drflac__is_little_endian()) {
@@ -1901,6 +1934,12 @@ static DRFLAC_INLINE drflac_uint32 drflac__le2host_32(drflac_uint32 n)
      return n;
  }
  
+static DRFLAC_INLINE drflac_uint32 drflac__le2host_32_ptr_unaligned(const void* pData)
+{
+    const drflac_uint8* pNum = (drflac_uint8*)pData;
+    return *pNum | *(pNum+1) << 8 |  *(pNum+2) << 16 | *(pNum+3) << 24;
+}
+
  
  static DRFLAC_INLINE drflac_uint32 drflac__unsynchsafe_32(drflac_uint32 n)
  {
@@ -2402,6 +2441,10 @@ static DRFLAC_INLINE drflac_bool32 drflac__read_uint32(drflac_bs* bs, unsigned i
          if (!drflac__reload_cache(bs)) {
              return DRFLAC_FALSE;
          }
+        if (bitCountLo > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
+            /* This happens when we get to end of stream */
+            return DRFLAC_FALSE;
+        }
  
          *pResultOut = (resultHi << bitCountLo) | (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountLo);
          bs->consumedBits += bitCountLo;
@@ -2654,6 +2697,13 @@ static drflac_bool32 drflac__find_and_seek_to_next_sync_code(drflac_bs* bs)
  #if  defined(_MSC_VER) && _MSC_VER >= 1400 && (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(__clang__)
  #define DRFLAC_IMPLEMENT_CLZ_MSVC
  #endif
+#if  defined(__WATCOMC__) && defined(__386__)
+#define DRFLAC_IMPLEMENT_CLZ_WATCOM
+#endif
+#ifdef __MRC__
+#include <intrinsics.h>
+#define DRFLAC_IMPLEMENT_CLZ_MRC
+#endif
  
  static DRFLAC_INLINE drflac_uint32 drflac__clz_software(drflac_cache_t x)
  {
@@ -2694,6 +2744,8 @@ static DRFLAC_INLINE drflac_bool32 drflac__is_lzcnt_supported(void)
      /* Fast compile time check for ARM. */
  #if defined(DRFLAC_HAS_LZCNT_INTRINSIC) && defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5)
      return DRFLAC_TRUE;
+#elif defined(__MRC__)
+    return DRFLAC_TRUE;
  #else
      /* If the compiler itself does not support the intrinsic then we'll need to return false. */
      #ifdef DRFLAC_HAS_LZCNT_INTRINSIC
@@ -2801,6 +2853,26 @@ static DRFLAC_INLINE drflac_uint32 drflac__clz_msvc(drflac_cache_t x)
  }
  #endif
  
+#ifdef DRFLAC_IMPLEMENT_CLZ_WATCOM
+static __inline drflac_uint32 drflac__clz_watcom (drflac_uint32);
+#ifdef DRFLAC_IMPLEMENT_CLZ_WATCOM_LZCNT
+/* Use the LZCNT instruction (only available on some processors since the 2010s). */
+#pragma aux drflac__clz_watcom_lzcnt = \
+    "db 0F3h, 0Fh, 0BDh, 0C0h" /* lzcnt eax, eax */ \
+    parm [eax] \
+    value [eax] \
+    modify nomemory;
+#else
+/* Use the 386+-compatible implementation. */
+#pragma aux drflac__clz_watcom = \
+    "bsr eax, eax" \
+    "xor eax, 31" \
+    parm [eax] nomemory \
+    value [eax] \
+    modify exact [eax] nomemory;
+#endif
+#endif
+
  static DRFLAC_INLINE drflac_uint32 drflac__clz(drflac_cache_t x)
  {
  #ifdef DRFLAC_IMPLEMENT_CLZ_LZCNT
@@ -2811,6 +2883,12 @@ static DRFLAC_INLINE drflac_uint32 drflac__clz(drflac_cache_t x)
      {
  #ifdef DRFLAC_IMPLEMENT_CLZ_MSVC
          return drflac__clz_msvc(x);
+#elif defined(DRFLAC_IMPLEMENT_CLZ_WATCOM_LZCNT)
+        return drflac__clz_watcom_lzcnt(x);
+#elif defined(DRFLAC_IMPLEMENT_CLZ_WATCOM)
+        return (x == 0) ? sizeof(x)*8 : drflac__clz_watcom(x);
+#elif defined(__MRC__)
+        return __cntlzw(x);
  #else
          return drflac__clz_software(x);
  #endif
@@ -2830,9 +2908,24 @@ static DRFLAC_INLINE drflac_bool32 drflac__seek_past_next_set_bit(drflac_bs* bs,
          }
      }
  
+    if (bs->cache == 1) {
+        /* Not catching this would lead to undefined behaviour: a shift of a 32-bit number by 32 or more is undefined */
+        *pOffsetOut = zeroCounter + (drflac_uint32)DRFLAC_CACHE_L1_BITS_REMAINING(bs) - 1;
+        if (!drflac__reload_cache(bs)) {
+            return DRFLAC_FALSE;
+        }
+
+        return DRFLAC_TRUE;
+    }
+
      setBitOffsetPlus1 = drflac__clz(bs->cache);
      setBitOffsetPlus1 += 1;
  
+    if (setBitOffsetPlus1 > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
+        /* This happens when we get to end of stream */
+        return DRFLAC_FALSE;
+    }
+
      bs->consumedBits += setBitOffsetPlus1;
      bs->cache <<= setBitOffsetPlus1;
  
@@ -2947,6 +3040,25 @@ static drflac_result drflac__read_utf8_coded_number(drflac_bs* bs, drflac_uint64
  }
  
  
+static DRFLAC_INLINE drflac_uint32 drflac__ilog2_u32(drflac_uint32 x)
+{
+#if 1   /* Needs optimizing. */
+    drflac_uint32 result = 0;
+    while (x > 0) {
+        result += 1;
+        x >>= 1;
+    }
+
+    return result;
+#endif
+}
+
+static DRFLAC_INLINE drflac_bool32 drflac__use_64_bit_prediction(drflac_uint32 bitsPerSample, drflac_uint32 order, drflac_uint32 precision)
+{
+    /* https://web.archive.org/web/20220205005724/https://github.com/ietf-wg-cellar/flac-specification/blob/37a49aa48ba4ba12e8757badfc59c0df35435fec/rfc_backmatter.md */
+    return bitsPerSample + precision + drflac__ilog2_u32(order) > 32;
+}
+
  
  /*
  The next two functions are responsible for calculating the prediction.
@@ -2954,6 +3066,9 @@ The next two functions are responsible for calculating the prediction.
  When the bits per sample is >16 we need to use 64-bit integer arithmetic because otherwise we'll run out of precision. It's
  safe to assume this will be slower on 32-bit platforms so we use a more optimal solution when the bits per sample is <=16.
  */
+#if defined(__clang__)
+__attribute__((no_sanitize("signed-integer-overflow")))
+#endif
  static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_32(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
  {
      drflac_int32 prediction = 0;
@@ -3189,12 +3304,11 @@ static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_64(drflac_uint32
  Reference implementation for reading and decoding samples with residual. This is intentionally left unoptimized for the
  sake of readability and should only be used as a reference.
  */
-static drflac_bool32 drflac__decode_samples_with_residual__rice__reference(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+static drflac_bool32 drflac__decode_samples_with_residual__rice__reference(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
  {
      drflac_uint32 i;
  
      DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(count > 0);
      DRFLAC_ASSERT(pSamplesOut != NULL);
  
      for (i = 0; i < count; ++i) {
@@ -3229,10 +3343,10 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__reference(drfla
          }
  
  
-        if (bitsPerSample+shift >= 32) {
-            pSamplesOut[i] = decodedRice + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + i);
+        if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
+            pSamplesOut[i] = decodedRice + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + i);
          } else {
-            pSamplesOut[i] = decodedRice + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + i);
+            pSamplesOut[i] = decodedRice + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + i);
          }
      }
  
@@ -3329,6 +3443,10 @@ static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts(drflac_bs* bs, drflac
              if (!drflac__reload_cache(bs)) {
                  return DRFLAC_FALSE;
              }
+            if (bitCountLo > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
+                /* This happens when we get to end of stream */
+                return DRFLAC_FALSE;
+            }
          }
  
          riceParamPart = (drflac_uint32)(resultHi | DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE(bs, bitCountLo));
@@ -3409,6 +3527,10 @@ static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts_x1(drflac_bs* bs, drf
                  if (!drflac__reload_cache(bs)) {
                      return DRFLAC_FALSE;
                  }
+                if (riceParamPartLoBitCount > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
+                    /* This happens when we get to end of stream */
+                    return DRFLAC_FALSE;
+                }
  
                  bs_cache = bs->cache;
                  bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount;
@@ -3519,6 +3641,11 @@ static DRFLAC_INLINE drflac_bool32 drflac__seek_rice_parts(drflac_bs* bs, drflac
                      return DRFLAC_FALSE;
                  }
  
+                if (riceParamPartLoBitCount > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
+                    /* This happens when we get to end of stream */
+                    return DRFLAC_FALSE;
+                }
+
                  bs_cache = bs->cache;
                  bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount;
              }
@@ -3576,7 +3703,6 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar_zeroorde
      drflac_uint32 i;
  
      DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(count > 0);
      DRFLAC_ASSERT(pSamplesOut != NULL);
  
      (void)bitsPerSample;
@@ -3606,7 +3732,7 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar_zeroorde
      return DRFLAC_TRUE;
  }
  
-static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
  {
      drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
      drflac_uint32 zeroCountPart0 = 0;
@@ -3622,17 +3748,16 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_b
      drflac_uint32 i;
  
      DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(count > 0);
      DRFLAC_ASSERT(pSamplesOut != NULL);
  
-    if (order == 0) {
-        return drflac__decode_samples_with_residual__rice__scalar_zeroorder(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+    if (lpcOrder == 0) {
+        return drflac__decode_samples_with_residual__rice__scalar_zeroorder(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut);
      }
  
      riceParamMask  = (drflac_uint32)~((~0UL) << riceParam);
      pSamplesOutEnd = pSamplesOut + (count & ~3);
  
-    if (bitsPerSample+shift > 32) {
+    if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
          while (pSamplesOut < pSamplesOutEnd) {
              /*
              Rice extraction. It's faster to do this one at a time against local variables than it is to use the x4 version
@@ -3660,10 +3785,10 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_b
              riceParamPart2  = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01];
              riceParamPart3  = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01];
  
-            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0);
-            pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 1);
-            pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 2);
-            pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 3);
+            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 0);
+            pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 1);
+            pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 2);
+            pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 3);
  
              pSamplesOut += 4;
          }
@@ -3691,10 +3816,10 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_b
              riceParamPart2  = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01];
              riceParamPart3  = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01];
  
-            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0);
-            pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 1);
-            pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 2);
-            pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 3);
+            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 0);
+            pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 1);
+            pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 2);
+            pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 3);
  
              pSamplesOut += 4;
          }
@@ -3714,10 +3839,10 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_b
          /*riceParamPart0  = (riceParamPart0 >> 1) ^ (~(riceParamPart0 & 0x01) + 1);*/
  
          /* Sample reconstruction. */
-        if (bitsPerSample+shift > 32) {
-            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0);
+        if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
+            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 0);
          } else {
-            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0);
+            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 0);
          }
  
          i += 1;
@@ -4173,21 +4298,20 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41_64(drflac
      return DRFLAC_TRUE;
  }
  
-static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
  {
      DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(count > 0);
      DRFLAC_ASSERT(pSamplesOut != NULL);
  
      /* In my testing the order is rarely > 12, so in this case I'm going to simplify the SSE implementation by only handling order <= 12. */
-    if (order > 0 && order <= 12) {
-        if (bitsPerSample+shift > 32) {
-            return drflac__decode_samples_with_residual__rice__sse41_64(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
+    if (lpcOrder > 0 && lpcOrder <= 12) {
+        if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
+            return drflac__decode_samples_with_residual__rice__sse41_64(bs, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut);
          } else {
-            return drflac__decode_samples_with_residual__rice__sse41_32(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
+            return drflac__decode_samples_with_residual__rice__sse41_32(bs, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut);
          }
      } else {
-        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
      }
  }
  #endif
@@ -4326,7 +4450,7 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_32(drflac_
  
      const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
  
-    riceParamMask    = ~((~0UL) << riceParam);
+    riceParamMask    = (drflac_uint32)~((~0UL) << riceParam);
      riceParamMask128 = vdupq_n_u32(riceParamMask);
  
      riceParam128 = vdupq_n_s32(riceParam);
@@ -4512,10 +4636,13 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_64(drflac_
      int32x4_t riceParam128;
      int64x1_t shift64;
      uint32x4_t one128;
+    int64x2_t prediction128 = { 0 };
+    uint32x4_t zeroCountPart128;
+    uint32x4_t riceParamPart128;
  
      const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
  
-    riceParamMask    = ~((~0UL) << riceParam);
+    riceParamMask    = (drflac_uint32)~((~0UL) << riceParam);
      riceParamMask128 = vdupq_n_u32(riceParamMask);
  
      riceParam128 = vdupq_n_s32(riceParam);
@@ -4524,7 +4651,7 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_64(drflac_
  
      /*
      Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than
-    what's available in the input buffers. It would be conenient to use a fall-through switch to do this, but this results
+    what's available in the input buffers. It would be convenient to use a fall-through switch to do this, but this results
      in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted
      so I think there's opportunity for this to be simplified.
      */
@@ -4592,10 +4719,6 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_64(drflac_
  
      /* For this version we are doing one sample at a time. */
      while (pDecodedSamples < pDecodedSamplesEnd) {
-        int64x2_t prediction128;
-        uint32x4_t zeroCountPart128;
-        uint32x4_t riceParamPart128;
-
          if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0]) ||
              !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[1], &riceParamParts[1]) ||
              !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[2], &riceParamParts[2]) ||
@@ -4672,42 +4795,41 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_64(drflac_
      return DRFLAC_TRUE;
  }
  
-static drflac_bool32 drflac__decode_samples_with_residual__rice__neon(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+static drflac_bool32 drflac__decode_samples_with_residual__rice__neon(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
  {
      DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(count > 0);
      DRFLAC_ASSERT(pSamplesOut != NULL);
  
      /* In my testing the order is rarely > 12, so in this case I'm going to simplify the NEON implementation by only handling order <= 12. */
-    if (order > 0 && order <= 12) {
-        if (bitsPerSample+shift > 32) {
-            return drflac__decode_samples_with_residual__rice__neon_64(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
+    if (lpcOrder > 0 && lpcOrder <= 12) {
+        if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
+            return drflac__decode_samples_with_residual__rice__neon_64(bs, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut);
          } else {
-            return drflac__decode_samples_with_residual__rice__neon_32(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
+            return drflac__decode_samples_with_residual__rice__neon_32(bs, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut);
          }
      } else {
-        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
      }
  }
  #endif
  
-static drflac_bool32 drflac__decode_samples_with_residual__rice(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+static drflac_bool32 drflac__decode_samples_with_residual__rice(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
  {
  #if defined(DRFLAC_SUPPORT_SSE41)
      if (drflac__gIsSSE41Supported) {
-        return drflac__decode_samples_with_residual__rice__sse41(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+        return drflac__decode_samples_with_residual__rice__sse41(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
      } else
  #elif defined(DRFLAC_SUPPORT_NEON)
      if (drflac__gIsNEONSupported) {
-        return drflac__decode_samples_with_residual__rice__neon(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+        return drflac__decode_samples_with_residual__rice__neon(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
      } else
  #endif
      {
          /* Scalar fallback. */
      #if 0
-        return drflac__decode_samples_with_residual__rice__reference(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+        return drflac__decode_samples_with_residual__rice__reference(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
      #else
-        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
      #endif
      }
  }
@@ -4718,7 +4840,6 @@ static drflac_bool32 drflac__read_and_seek_residual__rice(drflac_bs* bs, drflac_
      drflac_uint32 i;
  
      DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(count > 0);
  
      for (i = 0; i < count; ++i) {
          if (!drflac__seek_rice_parts(bs, riceParam)) {
@@ -4729,12 +4850,14 @@ static drflac_bool32 drflac__read_and_seek_residual__rice(drflac_bs* bs, drflac_
      return DRFLAC_TRUE;
  }
  
-static drflac_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 unencodedBitsPerSample, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+#if defined(__clang__)
+__attribute__((no_sanitize("signed-integer-overflow")))
+#endif
+static drflac_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 unencodedBitsPerSample, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
  {
      drflac_uint32 i;
  
      DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(count > 0);
      DRFLAC_ASSERT(unencodedBitsPerSample <= 31);    /* <-- unencodedBitsPerSample is a 5 bit number, so cannot exceed 31. */
      DRFLAC_ASSERT(pSamplesOut != NULL);
  
@@ -4747,10 +4870,10 @@ static drflac_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs*
              pSamplesOut[i] = 0;
          }
  
-        if (bitsPerSample >= 24) {
-            pSamplesOut[i] += drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + i);
+        if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
+            pSamplesOut[i] += drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + i);
          } else {
-            pSamplesOut[i] += drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + i);
+            pSamplesOut[i] += drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + i);
          }
      }
  
@@ -4763,7 +4886,7 @@ Reads and decodes the residual for the sub-frame the decoder is currently sittin
  when the decoder is sitting at the very start of the RESIDUAL block. The first <order> residuals will be ignored. The
  <blockSize> and <order> parameters are used to determine how many residual values need to be decoded.
  */
-static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 blockSize, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
+static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 blockSize, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
  {
      drflac_uint8 residualMethod;
      drflac_uint8 partitionOrder;
@@ -4783,7 +4906,7 @@ static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_
      }
  
      /* Ignore the first <order> values. */
-    pDecodedSamples += order;
+    pDecodedSamples += lpcOrder;
  
      if (!drflac__read_uint8(bs, 4, &partitionOrder)) {
          return DRFLAC_FALSE;
@@ -4798,11 +4921,11 @@ static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_
      }
  
      /* Validation check. */
-    if ((blockSize / (1 << partitionOrder)) <= order) {
+    if ((blockSize / (1 << partitionOrder)) < lpcOrder) {
          return DRFLAC_FALSE;
      }
  
-    samplesInPartition = (blockSize / (1 << partitionOrder)) - order;
+    samplesInPartition = (blockSize / (1 << partitionOrder)) - lpcOrder;
      partitionsRemaining = (1 << partitionOrder);
      for (;;) {
          drflac_uint8 riceParam = 0;
@@ -4823,7 +4946,7 @@ static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_
          }
  
          if (riceParam != 0xFF) {
-            if (!drflac__decode_samples_with_residual__rice(bs, bitsPerSample, samplesInPartition, riceParam, order, shift, coefficients, pDecodedSamples)) {
+            if (!drflac__decode_samples_with_residual__rice(bs, bitsPerSample, samplesInPartition, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pDecodedSamples)) {
                  return DRFLAC_FALSE;
              }
          } else {
@@ -4832,7 +4955,7 @@ static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_
                  return DRFLAC_FALSE;
              }
  
-            if (!drflac__decode_samples_with_residual__unencoded(bs, bitsPerSample, samplesInPartition, unencodedBitsPerSample, order, shift, coefficients, pDecodedSamples)) {
+            if (!drflac__decode_samples_with_residual__unencoded(bs, bitsPerSample, samplesInPartition, unencodedBitsPerSample, lpcOrder, lpcShift, lpcPrecision, coefficients, pDecodedSamples)) {
                  return DRFLAC_FALSE;
              }
          }
@@ -5001,7 +5124,7 @@ static drflac_bool32 drflac__decode_samples__fixed(drflac_bs* bs, drflac_uint32
          pDecodedSamples[i] = sample;
      }
  
-    if (!drflac__decode_samples_with_residual(bs, subframeBitsPerSample, blockSize, lpcOrder, 0, lpcCoefficientsTable[lpcOrder], pDecodedSamples)) {
+    if (!drflac__decode_samples_with_residual(bs, subframeBitsPerSample, blockSize, lpcOrder, 0, 4, lpcCoefficientsTable[lpcOrder], pDecodedSamples)) {
          return DRFLAC_FALSE;
      }
  
@@ -5056,7 +5179,7 @@ static drflac_bool32 drflac__decode_samples__lpc(drflac_bs* bs, drflac_uint32 bl
          }
      }
  
-    if (!drflac__decode_samples_with_residual(bs, bitsPerSample, blockSize, lpcOrder, lpcShift, coefficients, pDecodedSamples)) {
+    if (!drflac__decode_samples_with_residual(bs, bitsPerSample, blockSize, lpcOrder, lpcShift, lpcPrecision, coefficients, pDecodedSamples)) {
          return DRFLAC_FALSE;
      }
  
@@ -5184,6 +5307,9 @@ static drflac_bool32 drflac__read_next_flac_frame_header(drflac_bs* bs, drflac_u
                  return DRFLAC_FALSE;
              }
              crc8 = drflac_crc8(crc8, header->blockSizeInPCMFrames, 16);
+            if (header->blockSizeInPCMFrames == 0xFFFF) {
+                return DRFLAC_FALSE;    /* Frame is too big. This is the size of the frame minus 1. The STREAMINFO block defines the max block size which is 16-bits. Adding one will make it 17 bits and therefore too big. */
+            }
              header->blockSizeInPCMFrames += 1;
          } else {
              DRFLAC_ASSERT(blockSize >= 8);
@@ -5222,6 +5348,11 @@ static drflac_bool32 drflac__read_next_flac_frame_header(drflac_bs* bs, drflac_u
              header->bitsPerSample = streaminfoBitsPerSample;
          }
  
+        if (header->bitsPerSample != streaminfoBitsPerSample) {
+            /* If this subframe has a different bitsPerSample then streaminfo or the first frame, reject it */
+            return DRFLAC_FALSE;
+        }
+
          if (!drflac__read_uint8(bs, 8, &header->crc8)) {
              return DRFLAC_FALSE;
          }
@@ -5308,6 +5439,11 @@ static drflac_bool32 drflac__decode_subframe(drflac_bs* bs, drflac_frame* frame,
          subframeBitsPerSample += 1;
      }
  
+    if (subframeBitsPerSample > 32) {
+        /* libFLAC and ffmpeg reject 33-bit subframes as well */
+        return DRFLAC_FALSE;
+    }
+
      /* Need to handle wasted bits per sample. */
      if (pSubframe->wastedBitsPerSample >= subframeBitsPerSample) {
          return DRFLAC_FALSE;
@@ -5978,6 +6114,11 @@ static drflac_bool32 drflac__seek_to_pcm_frame__seek_table(drflac* pFlac, drflac
          return DRFLAC_FALSE;
      }
  
+    /* Do not use the seektable if pcmFramIndex is not coverd by it. */
+    if (pFlac->pSeekpoints[0].firstPCMFrame > pcmFrameIndex) {
+        return DRFLAC_FALSE;
+    }
+
      for (iSeekpoint = 0; iSeekpoint < pFlac->seekpointCount; ++iSeekpoint) {
          if (pFlac->pSeekpoints[iSeekpoint].firstPCMFrame >= pcmFrameIndex) {
              break;
@@ -6325,7 +6466,7 @@ static void drflac__free_from_callbacks(void* p, const drflac_allocation_callbac
  }
  
  
-static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_uint64* pFirstFramePos, drflac_uint64* pSeektablePos, drflac_uint32* pSeektableSize, drflac_allocation_callbacks* pAllocationCallbacks)
+static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_uint64* pFirstFramePos, drflac_uint64* pSeektablePos, drflac_uint32* pSeekpointCount, drflac_allocation_callbacks* pAllocationCallbacks)
  {
      /*
      We want to keep track of the byte position in the stream of the seektable. At the time of calling this function we know that
@@ -6385,32 +6526,37 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
                  seektableSize = blockSize;
  
                  if (onMeta) {
+                    drflac_uint32 seekpointCount;
                      drflac_uint32 iSeekpoint;
                      void* pRawData;
  
-                    pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
+                    seekpointCount = blockSize/DRFLAC_SEEKPOINT_SIZE_IN_BYTES;
+
+                    pRawData = drflac__malloc_from_callbacks(seekpointCount * sizeof(drflac_seekpoint), pAllocationCallbacks);
                      if (pRawData == NULL) {
                          return DRFLAC_FALSE;
                      }
  
-                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
-                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                        return DRFLAC_FALSE;
-                    }
+                    /* We need to read seekpoint by seekpoint and do some processing. */
+                    for (iSeekpoint = 0; iSeekpoint < seekpointCount; ++iSeekpoint) {
+                        drflac_seekpoint* pSeekpoint = (drflac_seekpoint*)pRawData + iSeekpoint;
  
-                    metadata.pRawData = pRawData;
-                    metadata.rawDataSize = blockSize;
-                    metadata.data.seektable.seekpointCount = blockSize/sizeof(drflac_seekpoint);
-                    metadata.data.seektable.pSeekpoints = (const drflac_seekpoint*)pRawData;
+                        if (onRead(pUserData, pSeekpoint, DRFLAC_SEEKPOINT_SIZE_IN_BYTES) != DRFLAC_SEEKPOINT_SIZE_IN_BYTES) {
+                            drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                            return DRFLAC_FALSE;
+                        }
  
-                    /* Endian swap. */
-                    for (iSeekpoint = 0; iSeekpoint < metadata.data.seektable.seekpointCount; ++iSeekpoint) {
-                        drflac_seekpoint* pSeekpoint = (drflac_seekpoint*)pRawData + iSeekpoint;
+                        /* Endian swap. */
                          pSeekpoint->firstPCMFrame   = drflac__be2host_64(pSeekpoint->firstPCMFrame);
                          pSeekpoint->flacFrameOffset = drflac__be2host_64(pSeekpoint->flacFrameOffset);
                          pSeekpoint->pcmFrameCount   = drflac__be2host_16(pSeekpoint->pcmFrameCount);
                      }
  
+                    metadata.pRawData = pRawData;
+                    metadata.rawDataSize = blockSize;
+                    metadata.data.seektable.seekpointCount = seekpointCount;
+                    metadata.data.seektable.pSeekpoints = (const drflac_seekpoint*)pRawData;
+
                      onMeta(pUserDataMD, &metadata);
  
                      drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
@@ -6445,7 +6591,7 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
                      pRunningData    = (const char*)pRawData;
                      pRunningDataEnd = (const char*)pRawData + blockSize;
  
-                    metadata.data.vorbis_comment.vendorLength = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.vorbis_comment.vendorLength = drflac__le2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
  
                      /* Need space for the rest of the block */
                      if ((pRunningDataEnd - pRunningData) - 4 < (drflac_int64)metadata.data.vorbis_comment.vendorLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
@@ -6453,7 +6599,7 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
                          return DRFLAC_FALSE;
                      }
                      metadata.data.vorbis_comment.vendor       = pRunningData;                                            pRunningData += metadata.data.vorbis_comment.vendorLength;
-                    metadata.data.vorbis_comment.commentCount = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.vorbis_comment.commentCount = drflac__le2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
  
                      /* Need space for 'commentCount' comments after the block, which at minimum is a drflac_uint32 per comment */
                      if ((pRunningDataEnd - pRunningData) / sizeof(drflac_uint32) < metadata.data.vorbis_comment.commentCount) { /* <-- Note the order of operations to avoid overflow to a valid value */
@@ -6471,7 +6617,7 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
                              return DRFLAC_FALSE;
                          }
  
-                        commentLength = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                        commentLength = drflac__le2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
                          if (pRunningDataEnd - pRunningData < (drflac_int64)commentLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
                              drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
                              return DRFLAC_FALSE;
@@ -6495,9 +6641,15 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
                      void* pRawData;
                      const char* pRunningData;
                      const char* pRunningDataEnd;
+                    size_t bufferSize;
                      drflac_uint8 iTrack;
                      drflac_uint8 iIndex;
+                    void* pTrackData;
  
+                    /*
+                    This needs to be loaded in two passes. The first pass is used to calculate the size of the memory allocation
+                    we need for storing the necessary data. The second pass will fill that buffer with usable data.
+                    */
                      pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
                      if (pRawData == NULL) {
                          return DRFLAC_FALSE;
@@ -6518,38 +6670,91 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
                      metadata.data.cuesheet.leadInSampleCount = drflac__be2host_64(*(const drflac_uint64*)pRunningData); pRunningData += 8;
                      metadata.data.cuesheet.isCD              = (pRunningData[0] & 0x80) != 0;                           pRunningData += 259;
                      metadata.data.cuesheet.trackCount        = pRunningData[0];                                         pRunningData += 1;
-                    metadata.data.cuesheet.pTrackData        = pRunningData;
+                    metadata.data.cuesheet.pTrackData        = NULL;    /* Will be filled later. */
  
-                    /* Check that the cuesheet tracks are valid before passing it to the callback */
-                    for (iTrack = 0; iTrack < metadata.data.cuesheet.trackCount; ++iTrack) {
-                        drflac_uint8 indexCount;
-                        drflac_uint32 indexPointSize;
+                    /* Pass 1: Calculate the size of the buffer for the track data. */
+                    {
+                        const char* pRunningDataSaved = pRunningData;   /* Will be restored at the end in preparation for the second pass. */
  
-                        if (pRunningDataEnd - pRunningData < 36) {
-                            drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                            return DRFLAC_FALSE;
+                        bufferSize = metadata.data.cuesheet.trackCount * DRFLAC_CUESHEET_TRACK_SIZE_IN_BYTES;
+
+                        for (iTrack = 0; iTrack < metadata.data.cuesheet.trackCount; ++iTrack) {
+                            drflac_uint8 indexCount;
+                            drflac_uint32 indexPointSize;
+
+                            if (pRunningDataEnd - pRunningData < DRFLAC_CUESHEET_TRACK_SIZE_IN_BYTES) {
+                                drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                                return DRFLAC_FALSE;
+                            }
+
+                            /* Skip to the index point count */
+                            pRunningData += 35;
+                            
+                            indexCount = pRunningData[0];
+                            pRunningData += 1;
+                            
+                            bufferSize += indexCount * sizeof(drflac_cuesheet_track_index);
+
+                            /* Quick validation check. */
+                            indexPointSize = indexCount * DRFLAC_CUESHEET_TRACK_INDEX_SIZE_IN_BYTES;
+                            if (pRunningDataEnd - pRunningData < (drflac_int64)indexPointSize) {
+                                drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                                return DRFLAC_FALSE;
+                            }
+
+                            pRunningData += indexPointSize;
                          }
  
-                        /* Skip to the index point count */
-                        pRunningData += 35;
-                        indexCount = pRunningData[0]; pRunningData += 1;
-                        indexPointSize = indexCount * sizeof(drflac_cuesheet_track_index);
-                        if (pRunningDataEnd - pRunningData < (drflac_int64)indexPointSize) {
+                        pRunningData = pRunningDataSaved;
+                    }
+
+                    /* Pass 2: Allocate a buffer and fill the data. Validation was done in the step above so can be skipped. */
+                    {
+                        char* pRunningTrackData;
+
+                        pTrackData = drflac__malloc_from_callbacks(bufferSize, pAllocationCallbacks);
+                        if (pTrackData == NULL) {
                              drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
                              return DRFLAC_FALSE;
                          }
  
-                        /* Endian swap. */
-                        for (iIndex = 0; iIndex < indexCount; ++iIndex) {
-                            drflac_cuesheet_track_index* pTrack = (drflac_cuesheet_track_index*)pRunningData;
-                            pRunningData += sizeof(drflac_cuesheet_track_index);
-                            pTrack->offset = drflac__be2host_64(pTrack->offset);
+                        pRunningTrackData = (char*)pTrackData;
+
+                        for (iTrack = 0; iTrack < metadata.data.cuesheet.trackCount; ++iTrack) {
+                            drflac_uint8 indexCount;
+
+                            DRFLAC_COPY_MEMORY(pRunningTrackData, pRunningData, DRFLAC_CUESHEET_TRACK_SIZE_IN_BYTES);
+                            pRunningData      += DRFLAC_CUESHEET_TRACK_SIZE_IN_BYTES-1; /* Skip forward, but not beyond the last byte in the CUESHEET_TRACK block which is the index count. */
+                            pRunningTrackData += DRFLAC_CUESHEET_TRACK_SIZE_IN_BYTES-1;
+
+                            /* Grab the index count for the next part. */
+                            indexCount = pRunningData[0];
+                            pRunningData      += 1;
+                            pRunningTrackData += 1;
+
+                            /* Extract each track index. */
+                            for (iIndex = 0; iIndex < indexCount; ++iIndex) {
+                                drflac_cuesheet_track_index* pTrackIndex = (drflac_cuesheet_track_index*)pRunningTrackData;
+
+                                DRFLAC_COPY_MEMORY(pRunningTrackData, pRunningData, DRFLAC_CUESHEET_TRACK_INDEX_SIZE_IN_BYTES);
+                                pRunningData      += DRFLAC_CUESHEET_TRACK_INDEX_SIZE_IN_BYTES;
+                                pRunningTrackData += sizeof(drflac_cuesheet_track_index);
+
+                                pTrackIndex->offset = drflac__be2host_64(pTrackIndex->offset);
+                            }
                          }
+
+                        metadata.data.cuesheet.pTrackData = pTrackData;
                      }
  
+                    /* The original data is no longer needed. */
+                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                    pRawData = NULL;
+
                      onMeta(pUserDataMD, &metadata);
  
-                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                    drflac__free_from_callbacks(pTrackData, pAllocationCallbacks);
+                    pTrackData = NULL;
                  }
              } break;
  
@@ -6580,28 +6785,28 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
                      pRunningData    = (const char*)pRawData;
                      pRunningDataEnd = (const char*)pRawData + blockSize;
  
-                    metadata.data.picture.type       = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.mimeLength = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.type       = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
+                    metadata.data.picture.mimeLength = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
  
                      /* Need space for the rest of the block */
                      if ((pRunningDataEnd - pRunningData) - 24 < (drflac_int64)metadata.data.picture.mimeLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
                          drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
                          return DRFLAC_FALSE;
                      }
-                    metadata.data.picture.mime              = pRunningData;                                            pRunningData += metadata.data.picture.mimeLength;
-                    metadata.data.picture.descriptionLength = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.mime              = pRunningData;                                   pRunningData += metadata.data.picture.mimeLength;
+                    metadata.data.picture.descriptionLength = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
  
                      /* Need space for the rest of the block */
                      if ((pRunningDataEnd - pRunningData) - 20 < (drflac_int64)metadata.data.picture.descriptionLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
                          drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
                          return DRFLAC_FALSE;
                      }
-                    metadata.data.picture.description     = pRunningData;                                            pRunningData += metadata.data.picture.descriptionLength;
-                    metadata.data.picture.width           = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.height          = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.colorDepth      = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.indexColorCount = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.pictureDataSize = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.description     = pRunningData;                                   pRunningData += metadata.data.picture.descriptionLength;
+                    metadata.data.picture.width           = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
+                    metadata.data.picture.height          = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
+                    metadata.data.picture.colorDepth      = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
+                    metadata.data.picture.indexColorCount = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
+                    metadata.data.picture.pictureDataSize = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
                      metadata.data.picture.pPictureData    = (const drflac_uint8*)pRunningData;
  
                      /* Need space for the picture after the block */
@@ -6679,9 +6884,9 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
          }
      }
  
-    *pSeektablePos = seektablePos;
-    *pSeektableSize = seektableSize;
-    *pFirstFramePos = runningFilePos;
+    *pSeektablePos   = seektablePos;
+    *pSeekpointCount = seektableSize / DRFLAC_SEEKPOINT_SIZE_IN_BYTES;
+    *pFirstFramePos  = runningFilePos;
  
      return DRFLAC_TRUE;
  }
@@ -7711,11 +7916,11 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac
      drflac_uint32 wholeSIMDVectorCountPerChannel;
      drflac_uint32 decodedSamplesAllocationSize;
  #ifndef DR_FLAC_NO_OGG
-    drflac_oggbs oggbs;
+    drflac_oggbs* pOggbs = NULL;
  #endif
      drflac_uint64 firstFramePos;
      drflac_uint64 seektablePos;
-    drflac_uint32 seektableSize;
+    drflac_uint32 seekpointCount;
      drflac_allocation_callbacks allocationCallbacks;
      drflac* pFlac;
  
@@ -7769,18 +7974,21 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac
      /* There's additional data required for Ogg streams. */
      if (init.container == drflac_container_ogg) {
          allocationSize += sizeof(drflac_oggbs);
-    }
  
-    DRFLAC_ZERO_MEMORY(&oggbs, sizeof(oggbs));
-    if (init.container == drflac_container_ogg) {
-        oggbs.onRead = onRead;
-        oggbs.onSeek = onSeek;
-        oggbs.pUserData = pUserData;
-        oggbs.currentBytePos = init.oggFirstBytePos;
-        oggbs.firstBytePos = init.oggFirstBytePos;
-        oggbs.serialNumber = init.oggSerial;
-        oggbs.bosPageHeader = init.oggBosHeader;
-        oggbs.bytesRemainingInPage = 0;
+        pOggbs = (drflac_oggbs*)drflac__malloc_from_callbacks(sizeof(*pOggbs), &allocationCallbacks);
+        if (pOggbs == NULL) {
+            return NULL; /*DRFLAC_OUT_OF_MEMORY;*/
+        }
+
+        DRFLAC_ZERO_MEMORY(pOggbs, sizeof(*pOggbs));
+        pOggbs->onRead = onRead;
+        pOggbs->onSeek = onSeek;
+        pOggbs->pUserData = pUserData;
+        pOggbs->currentBytePos = init.oggFirstBytePos;
+        pOggbs->firstBytePos = init.oggFirstBytePos;
+        pOggbs->serialNumber = init.oggSerial;
+        pOggbs->bosPageHeader = init.oggBosHeader;
+        pOggbs->bytesRemainingInPage = 0;
      }
  #endif
  
@@ -7789,9 +7997,9 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac
      consist of only a single heap allocation. To this, the size of the seek table needs to be known, which we determine when reading
      and decoding the metadata.
      */
-    firstFramePos = 42;   /* <-- We know we are at byte 42 at this point. */
-    seektablePos  = 0;
-    seektableSize = 0;
+    firstFramePos  = 42;   /* <-- We know we are at byte 42 at this point. */
+    seektablePos   = 0;
+    seekpointCount = 0;
      if (init.hasMetadataBlocks) {
          drflac_read_proc onReadOverride = onRead;
          drflac_seek_proc onSeekOverride = onSeek;
@@ -7801,20 +8009,26 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac
          if (init.container == drflac_container_ogg) {
              onReadOverride = drflac__on_read_ogg;
              onSeekOverride = drflac__on_seek_ogg;
-            pUserDataOverride = (void*)&oggbs;
+            pUserDataOverride = (void*)pOggbs;
          }
  #endif
  
-        if (!drflac__read_and_decode_metadata(onReadOverride, onSeekOverride, onMeta, pUserDataOverride, pUserDataMD, &firstFramePos, &seektablePos, &seektableSize, &allocationCallbacks)) {
+        if (!drflac__read_and_decode_metadata(onReadOverride, onSeekOverride, onMeta, pUserDataOverride, pUserDataMD, &firstFramePos, &seektablePos, &seekpointCount, &allocationCallbacks)) {
+        #ifndef DR_FLAC_NO_OGG
+            drflac__free_from_callbacks(pOggbs, &allocationCallbacks);
+        #endif
              return NULL;
          }
  
-        allocationSize += seektableSize;
+        allocationSize += seekpointCount * sizeof(drflac_seekpoint);
      }
  
  
      pFlac = (drflac*)drflac__malloc_from_callbacks(allocationSize, &allocationCallbacks);
      if (pFlac == NULL) {
+    #ifndef DR_FLAC_NO_OGG
+        drflac__free_from_callbacks(pOggbs, &allocationCallbacks);
+    #endif
          return NULL;
      }
  
@@ -7824,8 +8038,12 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac
  
  #ifndef DR_FLAC_NO_OGG
      if (init.container == drflac_container_ogg) {
-        drflac_oggbs* pInternalOggbs = (drflac_oggbs*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize + seektableSize);
-        *pInternalOggbs = oggbs;
+        drflac_oggbs* pInternalOggbs = (drflac_oggbs*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize + (seekpointCount * sizeof(drflac_seekpoint)));
+        DRFLAC_COPY_MEMORY(pInternalOggbs, pOggbs, sizeof(*pOggbs));
+
+        /* At this point the pOggbs object has been handed over to pInternalOggbs and can be freed. */
+        drflac__free_from_callbacks(pOggbs, &allocationCallbacks);
+        pOggbs = NULL;
  
          /* The Ogg bistream needs to be layered on top of the original bitstream. */
          pFlac->bs.onRead = drflac__on_read_ogg;
@@ -7849,7 +8067,7 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac
      {
          /* If we have a seektable we need to load it now, making sure we move back to where we were previously. */
          if (seektablePos != 0) {
-            pFlac->seekpointCount = seektableSize / sizeof(*pFlac->pSeekpoints);
+            pFlac->seekpointCount = seekpointCount;
              pFlac->pSeekpoints = (drflac_seekpoint*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize);
  
              DRFLAC_ASSERT(pFlac->bs.onSeek != NULL);
@@ -7857,18 +8075,20 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac
  
              /* Seek to the seektable, then just read directly into our seektable buffer. */
              if (pFlac->bs.onSeek(pFlac->bs.pUserData, (int)seektablePos, drflac_seek_origin_start)) {
-                if (pFlac->bs.onRead(pFlac->bs.pUserData, pFlac->pSeekpoints, seektableSize) == seektableSize) {
-                    /* Endian swap. */
-                    drflac_uint32 iSeekpoint;
-                    for (iSeekpoint = 0; iSeekpoint < pFlac->seekpointCount; ++iSeekpoint) {
+                drflac_uint32 iSeekpoint;
+
+                for (iSeekpoint = 0; iSeekpoint < seekpointCount; iSeekpoint += 1) {
+                    if (pFlac->bs.onRead(pFlac->bs.pUserData, pFlac->pSeekpoints + iSeekpoint, DRFLAC_SEEKPOINT_SIZE_IN_BYTES) == DRFLAC_SEEKPOINT_SIZE_IN_BYTES) {
+                        /* Endian swap. */
                          pFlac->pSeekpoints[iSeekpoint].firstPCMFrame   = drflac__be2host_64(pFlac->pSeekpoints[iSeekpoint].firstPCMFrame);
                          pFlac->pSeekpoints[iSeekpoint].flacFrameOffset = drflac__be2host_64(pFlac->pSeekpoints[iSeekpoint].flacFrameOffset);
                          pFlac->pSeekpoints[iSeekpoint].pcmFrameCount   = drflac__be2host_16(pFlac->pSeekpoints[iSeekpoint].pcmFrameCount);
+                    } else {
+                        /* Failed to read the seektable. Pretend we don't have one. */
+                        pFlac->pSeekpoints = NULL;
+                        pFlac->seekpointCount = 0;
+                        break;
                      }
-                } else {
-                    /* Failed to read the seektable. Pretend we don't have one. */
-                    pFlac->pSeekpoints = NULL;
-                    pFlac->seekpointCount = 0;
                  }
  
                  /* We need to seek back to where we were. If this fails it's a critical error. */
@@ -7917,7 +8137,9 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac
  
  #ifndef DR_FLAC_NO_STDIO
  #include <stdio.h>
+#ifndef DR_FLAC_NO_WCHAR
  #include <wchar.h>      /* For wcslen(), wcsrtombs() */
+#endif
  
  /* drflac_result_from_errno() is only used for fopen() and wfopen() so putting it inside DR_WAV_NO_STDIO for now. If something else needs this later we can move it out. */
  #include <errno.h>
@@ -8383,6 +8605,7 @@ fallback, so if you notice your compiler not detecting this properly I'm happy t
      #endif
  #endif
  
+#ifndef DR_FLAC_NO_WCHAR
  static drflac_result drflac_wfopen(FILE** ppFile, const wchar_t* pFilePath, const wchar_t* pOpenMode, const drflac_allocation_callbacks* pAllocationCallbacks)
  {
      if (ppFile != NULL) {
@@ -8411,10 +8634,23 @@ static drflac_result drflac_wfopen(FILE** ppFile, const wchar_t* pFilePath, cons
      }
  #else
      /*
-    Use fopen() on anything other than Windows. Requires a conversion. This is annoying because fopen() is locale specific. The only real way I can
-    think of to do this is with wcsrtombs(). Note that wcstombs() is apparently not thread-safe because it uses a static global mbstate_t object for
-    maintaining state. I've checked this with -std=c89 and it works, but if somebody get's a compiler error I'll look into improving compatibility.
+    Use fopen() on anything other than Windows. Requires a conversion. This is annoying because
+       fopen() is locale specific. The only real way I can think of to do this is with wcsrtombs(). Note
+       that wcstombs() is apparently not thread-safe because it uses a static global mbstate_t object for
+    maintaining state. I've checked this with -std=c89 and it works, but if somebody get's a compiler
+       error I'll look into improving compatibility.
      */
+
+       /*
+       Some compilers don't support wchar_t or wcsrtombs() which we're using below. In this case we just
+       need to abort with an error. If you encounter a compiler lacking such support, add it to this list
+       and submit a bug report and it'll be added to the library upstream.
+       */
+       #if defined(__DJGPP__)
+       {
+               /* Nothing to do here. This will fall through to the error check below. */
+       }
+       #else
      {
          mbstate_t mbs;
          size_t lenMB;
@@ -8456,6 +8692,7 @@ static drflac_result drflac_wfopen(FILE** ppFile, const wchar_t* pFilePath, cons
  
          drflac__free_from_callbacks(pFilePathMB, pAllocationCallbacks);
      }
+       #endif
  
      if (*ppFile == NULL) {
          return DRFLAC_ERROR;
@@ -8464,6 +8701,7 @@ static drflac_result drflac_wfopen(FILE** ppFile, const wchar_t* pFilePath, cons
  
      return DRFLAC_SUCCESS;
  }
+#endif
  
  static size_t drflac__on_read_stdio(void* pUserData, void* bufferOut, size_t bytesToRead)
  {
@@ -8496,6 +8734,7 @@ DRFLAC_API drflac* drflac_open_file(const char* pFileName, const drflac_allocati
      return pFlac;
  }
  
+#ifndef DR_FLAC_NO_WCHAR
  DRFLAC_API drflac* drflac_open_file_w(const wchar_t* pFileName, const drflac_allocation_callbacks* pAllocationCallbacks)
  {
      drflac* pFlac;
@@ -8513,6 +8752,7 @@ DRFLAC_API drflac* drflac_open_file_w(const wchar_t* pFileName, const drflac_all
  
      return pFlac;
  }
+#endif
  
  DRFLAC_API drflac* drflac_open_file_with_metadata(const char* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
  {
@@ -8532,6 +8772,7 @@ DRFLAC_API drflac* drflac_open_file_with_metadata(const char* pFileName, drflac_
      return pFlac;
  }
  
+#ifndef DR_FLAC_NO_WCHAR
  DRFLAC_API drflac* drflac_open_file_with_metadata_w(const wchar_t* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
  {
      drflac* pFlac;
@@ -8549,6 +8790,7 @@ DRFLAC_API drflac* drflac_open_file_with_metadata_w(const wchar_t* pFileName, dr
  
      return pFlac;
  }
+#endif
  #endif  /* DR_FLAC_NO_STDIO */
  
  static size_t drflac__on_read_memory(void* pUserData, void* bufferOut, size_t bytesToRead)
@@ -11348,6 +11590,7 @@ DRFLAC_API drflac_bool32 drflac_seek_to_pcm_frame(drflac* pFlac, drflac_uint64 p
          return drflac__seek_to_first_frame(pFlac);
      } else {
          drflac_bool32 wasSuccessful = DRFLAC_FALSE;
+        drflac_uint64 originalPCMFrame = pFlac->currentPCMFrame;
  
          /* Clamp the sample to the end. */
          if (pcmFrameIndex > pFlac->totalPCMFrameCount) {
@@ -11405,7 +11648,16 @@ DRFLAC_API drflac_bool32 drflac_seek_to_pcm_frame(drflac* pFlac, drflac_uint64 p
              }
          }
  
-        pFlac->currentPCMFrame = pcmFrameIndex;
+        if (wasSuccessful) {
+            pFlac->currentPCMFrame = pcmFrameIndex;
+        } else {
+            /* Seek failed. Try putting the decoder back to it's original state. */
+            if (drflac_seek_to_pcm_frame(pFlac, originalPCMFrame) == DRFLAC_FALSE) {
+                /* Failed to seek back to the original PCM frame. Fall back to 0. */
+                drflac_seek_to_pcm_frame(pFlac, 0);
+            }
+        }
+
          return wasSuccessful;
      }
  }
@@ -11471,7 +11723,7 @@ static type* drflac__full_read_and_close_ ## extension (drflac* pFlac, unsigned
          DRFLAC_ZERO_MEMORY(pSampleData + (totalPCMFrameCount*pFlac->channels), (size_t)(sampleDataBufferSize - totalPCMFrameCount*pFlac->channels*sizeof(type)));   \
      } else {                                                                                                                                                        \
          drflac_uint64 dataSize = totalPCMFrameCount*pFlac->channels*sizeof(type);                                                                                   \
-        if (dataSize > DRFLAC_SIZE_MAX) {                                                                                                                           \
+        if (dataSize > (drflac_uint64)DRFLAC_SIZE_MAX) {                                                                                                            \
              goto on_error;  /* The decoded data is too big. */                                                                                                      \
          }                                                                                                                                                           \
                                                                                                                                                                      \
@@ -11736,7 +11988,7 @@ DRFLAC_API const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator
          return NULL;
      }
  
-    length = drflac__le2host_32(*(const drflac_uint32*)pIter->pRunningData);
+    length = drflac__le2host_32_ptr_unaligned(pIter->pRunningData);
      pIter->pRunningData += 4;
  
      pComment = pIter->pRunningData;
@@ -11806,6 +12058,47 @@ DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterat
  /*
  REVISION HISTORY
  ================
+v0.12.39 - 2022-09-17
+  - Fix compilation with DJGPP.
+  - Fix compilation error with Visual Studio 2019 and the ARM build.
+  - Fix an error with SSE 4.1 detection.
+  - Add support for disabling wchar_t with DR_WAV_NO_WCHAR.
+  - Improve compatibility with compilers which lack support for explicit struct packing.
+  - Improve compatibility with low-end and embedded hardware by reducing the amount of stack
+    allocation when loading an Ogg encapsulated file.
+
+v0.12.38 - 2022-04-10
+  - Fix compilation error on older versions of GCC.
+
+v0.12.37 - 2022-02-12
+  - Improve ARM detection.
+
+v0.12.36 - 2022-02-07
+  - Fix a compilation error with the ARM build.
+
+v0.12.35 - 2022-02-06
+  - Fix a bug due to underestimating the amount of precision required for the prediction stage.
+  - Fix some bugs found from fuzz testing.
+
+v0.12.34 - 2022-01-07
+  - Fix some misalignment bugs when reading metadata.
+
+v0.12.33 - 2021-12-22
+  - Fix a bug with seeking when the seek table does not start at PCM frame 0.
+
+v0.12.32 - 2021-12-11
+  - Fix a warning with Clang.
+
+v0.12.31 - 2021-08-16
+  - Silence some warnings.
+
+v0.12.30 - 2021-07-31
+  - Fix platform detection for ARM64.
+
+v0.12.29 - 2021-04-02
+  - Fix a bug where the running PCM frame index is set to an invalid value when over-seeking.
+  - Fix a decoding error due to an incorrect validation check.
+
  v0.12.28 - 2021-02-21
    - Fix a warning due to referencing _MSC_VER when it is undefined.