From b24e7fced81a43e2b992f8615bd61c545884cbf2 Mon Sep 17 00:00:00 2001
From: negativeExponent <negativeExponent@users.noreply.github.com>
Date: Wed, 21 Apr 2021 07:17:03 +0800
Subject: [PATCH] git subrepo clone (merge) https://github.com/rtissera/libchdr
 deps/libchdr

subrepo:
  subdir:   "deps/libchdr"
  merged:   "a17c0da"
upstream:
  origin:   "https://github.com/rtissera/libchdr"
  branch:   "master"
  commit:   "a17c0da"
git-subrepo:
  version:  "0.4.3"
  origin:   "???"
  commit:   "???"
---
 deps/libchdr/.gitrepo                         |    12 +
 deps/libchdr/CMakeLists.txt                   |   126 +
 deps/libchdr/LICENSE.txt                      |    24 +
 deps/libchdr/README.md                        |     7 +
 deps/libchdr/deps/lzma-19.00/CMakeLists.txt   |    32 +
 deps/libchdr/deps/lzma-19.00/LICENSE          |     3 +
 .../libchdr/deps/lzma-19.00/include/7zTypes.h |   375 +
 deps/libchdr/deps/lzma-19.00/include/Alloc.h  |    51 +
 deps/libchdr/deps/lzma-19.00/include/Bra.h    |    64 +
 .../deps/lzma-19.00/include/Compiler.h        |    33 +
 .../libchdr/deps/lzma-19.00/include/CpuArch.h |   336 +
 deps/libchdr/deps/lzma-19.00/include/Delta.h  |    19 +
 deps/libchdr/deps/lzma-19.00/include/LzFind.h |   121 +
 deps/libchdr/deps/lzma-19.00/include/LzHash.h |    57 +
 deps/libchdr/deps/lzma-19.00/include/Lzma86.h |   111 +
 .../libchdr/deps/lzma-19.00/include/LzmaDec.h |   234 +
 .../libchdr/deps/lzma-19.00/include/LzmaEnc.h |    76 +
 .../libchdr/deps/lzma-19.00/include/LzmaLib.h |   131 +
 .../libchdr/deps/lzma-19.00/include/Precomp.h |    10 +
 deps/libchdr/deps/lzma-19.00/include/Sort.h   |    18 +
 deps/libchdr/deps/lzma-19.00/lzma-history.txt |   446 +
 deps/libchdr/deps/lzma-19.00/lzma.txt         |   328 +
 deps/libchdr/deps/lzma-19.00/lzma.vcxproj     |   543 +
 .../deps/lzma-19.00/lzma.vcxproj.filters      |    17 +
 deps/libchdr/deps/lzma-19.00/src/Alloc.c      |   455 +
 deps/libchdr/deps/lzma-19.00/src/Bra86.c      |    82 +
 deps/libchdr/deps/lzma-19.00/src/BraIA64.c    |    53 +
 deps/libchdr/deps/lzma-19.00/src/CpuArch.c    |   218 +
 deps/libchdr/deps/lzma-19.00/src/Delta.c      |    64 +
 deps/libchdr/deps/lzma-19.00/src/LzFind.c     |  1127 ++
 deps/libchdr/deps/lzma-19.00/src/Lzma86Dec.c  |    54 +
 deps/libchdr/deps/lzma-19.00/src/LzmaDec.c    |  1185 ++
 deps/libchdr/deps/lzma-19.00/src/LzmaEnc.c    |  1330 ++
 deps/libchdr/deps/lzma-19.00/src/Sort.c       |   141 +
 deps/libchdr/deps/zlib-1.2.11/CMakeLists.txt  |    27 +
 deps/libchdr/deps/zlib-1.2.11/ChangeLog       |  1515 ++
 deps/libchdr/deps/zlib-1.2.11/FAQ             |   368 +
 deps/libchdr/deps/zlib-1.2.11/INDEX           |    68 +
 deps/libchdr/deps/zlib-1.2.11/Makefile        |     5 +
 deps/libchdr/deps/zlib-1.2.11/Makefile.in     |   410 +
 deps/libchdr/deps/zlib-1.2.11/README          |   115 +
 deps/libchdr/deps/zlib-1.2.11/adler32.c       |   186 +
 deps/libchdr/deps/zlib-1.2.11/compress.c      |    86 +
 deps/libchdr/deps/zlib-1.2.11/configure       |   921 ++
 deps/libchdr/deps/zlib-1.2.11/crc32.c         |   442 +
 deps/libchdr/deps/zlib-1.2.11/crc32.h         |   441 +
 deps/libchdr/deps/zlib-1.2.11/deflate.c       |  2163 +++
 deps/libchdr/deps/zlib-1.2.11/deflate.h       |   349 +
 .../deps/zlib-1.2.11/doc/algorithm.txt        |   209 +
 deps/libchdr/deps/zlib-1.2.11/doc/rfc1950.txt |   619 +
 deps/libchdr/deps/zlib-1.2.11/doc/rfc1951.txt |   955 ++
 deps/libchdr/deps/zlib-1.2.11/doc/rfc1952.txt |   675 +
 .../libchdr/deps/zlib-1.2.11/doc/txtvsbin.txt |   107 +
 deps/libchdr/deps/zlib-1.2.11/gzclose.c       |    25 +
 deps/libchdr/deps/zlib-1.2.11/gzguts.h        |   218 +
 deps/libchdr/deps/zlib-1.2.11/gzlib.c         |   637 +
 deps/libchdr/deps/zlib-1.2.11/gzread.c        |   654 +
 deps/libchdr/deps/zlib-1.2.11/gzwrite.c       |   665 +
 deps/libchdr/deps/zlib-1.2.11/infback.c       |   640 +
 deps/libchdr/deps/zlib-1.2.11/inffast.c       |   323 +
 deps/libchdr/deps/zlib-1.2.11/inffast.h       |    11 +
 deps/libchdr/deps/zlib-1.2.11/inffixed.h      |    94 +
 deps/libchdr/deps/zlib-1.2.11/inflate.c       |  1561 ++
 deps/libchdr/deps/zlib-1.2.11/inflate.h       |   125 +
 deps/libchdr/deps/zlib-1.2.11/inftrees.c      |   304 +
 deps/libchdr/deps/zlib-1.2.11/inftrees.h      |    62 +
 deps/libchdr/deps/zlib-1.2.11/make_vms.com    |   867 ++
 deps/libchdr/deps/zlib-1.2.11/treebuild.xml   |   116 +
 deps/libchdr/deps/zlib-1.2.11/trees.c         |  1203 ++
 deps/libchdr/deps/zlib-1.2.11/trees.h         |   128 +
 deps/libchdr/deps/zlib-1.2.11/uncompr.c       |    93 +
 deps/libchdr/deps/zlib-1.2.11/zconf.h         |   534 +
 deps/libchdr/deps/zlib-1.2.11/zconf.h.cmakein |   536 +
 deps/libchdr/deps/zlib-1.2.11/zconf.h.in      |   534 +
 deps/libchdr/deps/zlib-1.2.11/zlib.3          |   149 +
 deps/libchdr/deps/zlib-1.2.11/zlib.3.pdf      |   Bin 0 -> 19318 bytes
 deps/libchdr/deps/zlib-1.2.11/zlib.h          |  1912 +++
 deps/libchdr/deps/zlib-1.2.11/zlib.map        |    94 +
 deps/libchdr/deps/zlib-1.2.11/zlib.pc.cmakein |    13 +
 deps/libchdr/deps/zlib-1.2.11/zlib.pc.in      |    13 +
 deps/libchdr/deps/zlib-1.2.11/zlib2ansi       |   152 +
 deps/libchdr/deps/zlib-1.2.11/zutil.c         |   325 +
 deps/libchdr/deps/zlib-1.2.11/zutil.h         |   271 +
 deps/libchdr/include/dr_libs/dr_flac.h        | 12214 ++++++++++++++++
 deps/libchdr/include/libchdr/bitstream.h      |    43 +
 deps/libchdr/include/libchdr/cdrom.h          |   110 +
 deps/libchdr/include/libchdr/chd.h            |   427 +
 deps/libchdr/include/libchdr/chdconfig.h      |    10 +
 deps/libchdr/include/libchdr/coretypes.h      |    48 +
 deps/libchdr/include/libchdr/flac.h           |    50 +
 deps/libchdr/include/libchdr/huffman.h        |    90 +
 deps/libchdr/pkg-config.pc.in                 |    10 +
 deps/libchdr/src/libchdr_bitstream.c          |   125 +
 deps/libchdr/src/libchdr_cdrom.c              |   415 +
 deps/libchdr/src/libchdr_chd.c                |  2737 ++++
 deps/libchdr/src/libchdr_flac.c               |   302 +
 deps/libchdr/src/libchdr_huffman.c            |   544 +
 deps/libchdr/src/link.T                       |     5 +
 deps/libchdr/tests/benchmark.c                |    49 +
 deps/libchdr/tests/build_tests.sh             |     2 +
 100 files changed, 46984 insertions(+)
 create mode 100644 deps/libchdr/.gitrepo
 create mode 100644 deps/libchdr/CMakeLists.txt
 create mode 100644 deps/libchdr/LICENSE.txt
 create mode 100644 deps/libchdr/README.md
 create mode 100644 deps/libchdr/deps/lzma-19.00/CMakeLists.txt
 create mode 100644 deps/libchdr/deps/lzma-19.00/LICENSE
 create mode 100644 deps/libchdr/deps/lzma-19.00/include/7zTypes.h
 create mode 100644 deps/libchdr/deps/lzma-19.00/include/Alloc.h
 create mode 100644 deps/libchdr/deps/lzma-19.00/include/Bra.h
 create mode 100644 deps/libchdr/deps/lzma-19.00/include/Compiler.h
 create mode 100644 deps/libchdr/deps/lzma-19.00/include/CpuArch.h
 create mode 100644 deps/libchdr/deps/lzma-19.00/include/Delta.h
 create mode 100644 deps/libchdr/deps/lzma-19.00/include/LzFind.h
 create mode 100644 deps/libchdr/deps/lzma-19.00/include/LzHash.h
 create mode 100644 deps/libchdr/deps/lzma-19.00/include/Lzma86.h
 create mode 100644 deps/libchdr/deps/lzma-19.00/include/LzmaDec.h
 create mode 100644 deps/libchdr/deps/lzma-19.00/include/LzmaEnc.h
 create mode 100644 deps/libchdr/deps/lzma-19.00/include/LzmaLib.h
 create mode 100644 deps/libchdr/deps/lzma-19.00/include/Precomp.h
 create mode 100644 deps/libchdr/deps/lzma-19.00/include/Sort.h
 create mode 100644 deps/libchdr/deps/lzma-19.00/lzma-history.txt
 create mode 100644 deps/libchdr/deps/lzma-19.00/lzma.txt
 create mode 100644 deps/libchdr/deps/lzma-19.00/lzma.vcxproj
 create mode 100644 deps/libchdr/deps/lzma-19.00/lzma.vcxproj.filters
 create mode 100644 deps/libchdr/deps/lzma-19.00/src/Alloc.c
 create mode 100644 deps/libchdr/deps/lzma-19.00/src/Bra86.c
 create mode 100644 deps/libchdr/deps/lzma-19.00/src/BraIA64.c
 create mode 100644 deps/libchdr/deps/lzma-19.00/src/CpuArch.c
 create mode 100644 deps/libchdr/deps/lzma-19.00/src/Delta.c
 create mode 100644 deps/libchdr/deps/lzma-19.00/src/LzFind.c
 create mode 100644 deps/libchdr/deps/lzma-19.00/src/Lzma86Dec.c
 create mode 100644 deps/libchdr/deps/lzma-19.00/src/LzmaDec.c
 create mode 100644 deps/libchdr/deps/lzma-19.00/src/LzmaEnc.c
 create mode 100644 deps/libchdr/deps/lzma-19.00/src/Sort.c
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/CMakeLists.txt
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/ChangeLog
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/FAQ
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/INDEX
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/Makefile
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/Makefile.in
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/README
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/adler32.c
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/compress.c
 create mode 100755 deps/libchdr/deps/zlib-1.2.11/configure
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/crc32.c
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/crc32.h
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/deflate.c
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/deflate.h
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/doc/algorithm.txt
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/doc/rfc1950.txt
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/doc/rfc1951.txt
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/doc/rfc1952.txt
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/doc/txtvsbin.txt
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/gzclose.c
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/gzguts.h
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/gzlib.c
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/gzread.c
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/gzwrite.c
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/infback.c
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/inffast.c
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/inffast.h
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/inffixed.h
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/inflate.c
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/inflate.h
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/inftrees.c
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/inftrees.h
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/make_vms.com
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/treebuild.xml
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/trees.c
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/trees.h
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/uncompr.c
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/zconf.h
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/zconf.h.cmakein
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/zconf.h.in
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/zlib.3
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/zlib.3.pdf
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/zlib.h
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/zlib.map
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/zlib.pc.cmakein
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/zlib.pc.in
 create mode 100755 deps/libchdr/deps/zlib-1.2.11/zlib2ansi
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/zutil.c
 create mode 100644 deps/libchdr/deps/zlib-1.2.11/zutil.h
 create mode 100644 deps/libchdr/include/dr_libs/dr_flac.h
 create mode 100644 deps/libchdr/include/libchdr/bitstream.h
 create mode 100644 deps/libchdr/include/libchdr/cdrom.h
 create mode 100644 deps/libchdr/include/libchdr/chd.h
 create mode 100644 deps/libchdr/include/libchdr/chdconfig.h
 create mode 100644 deps/libchdr/include/libchdr/coretypes.h
 create mode 100644 deps/libchdr/include/libchdr/flac.h
 create mode 100644 deps/libchdr/include/libchdr/huffman.h
 create mode 100644 deps/libchdr/pkg-config.pc.in
 create mode 100644 deps/libchdr/src/libchdr_bitstream.c
 create mode 100644 deps/libchdr/src/libchdr_cdrom.c
 create mode 100644 deps/libchdr/src/libchdr_chd.c
 create mode 100644 deps/libchdr/src/libchdr_flac.c
 create mode 100644 deps/libchdr/src/libchdr_huffman.c
 create mode 100644 deps/libchdr/src/link.T
 create mode 100644 deps/libchdr/tests/benchmark.c
 create mode 100755 deps/libchdr/tests/build_tests.sh

diff --git a/deps/libchdr/.gitrepo b/deps/libchdr/.gitrepo
new file mode 100644
index 00000000..560b69c8
--- /dev/null
+++ b/deps/libchdr/.gitrepo
@@ -0,0 +1,12 @@
+; DO NOT EDIT (unless you know what you are doing)
+;
+; This subdirectory is a git "subrepo", and this file is maintained by the
+; git-subrepo command. See https://github.com/git-commands/git-subrepo#readme
+;
+[subrepo]
+	remote = https://github.com/rtissera/libchdr
+	branch = master
+	commit = a17c0da7e9efa8cbb752c707df7d5457b2149fb8
+	parent = 2f8b37246c10773b391368f99311a192f1fb331f
+	method = merge
+	cmdver = 0.4.3
diff --git a/deps/libchdr/CMakeLists.txt b/deps/libchdr/CMakeLists.txt
new file mode 100644
index 00000000..9842447a
--- /dev/null
+++ b/deps/libchdr/CMakeLists.txt
@@ -0,0 +1,126 @@
+cmake_minimum_required(VERSION 3.9)
+
+project(chdr C)
+
+set(CHDR_VERSION_MAJOR 0)
+set(CHDR_VERSION_MINOR 1)
+
+if(CMAKE_PROJECT_NAME STREQUAL "chdr")
+  option(BUILD_SHARED_LIBS "Build libchdr also as a shared library" ON)
+endif()
+option(INSTALL_STATIC_LIBS "Install static libraries" OFF)
+option(WITH_SYSTEM_ZLIB "Use system provided zlib library" OFF)
+
+if(CMAKE_C_COMPILER_ID MATCHES "GNU")
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden -fPIC -O3 -flto")
+  set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ON)
+elseif(CMAKE_C_COMPILER_ID MATCHES "Clang")
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3")
+  set(CMAKE_INTERPROCEDURAL_OPTIMIZATION OFF)
+endif()
+
+include(FindPkgConfig)
+include(GNUInstallDirs)
+
+# Detect processor type.
+if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "amd64")
+  set(CPU_ARCH "x64")
+elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64")
+  # MSVC x86/x64
+  if(CMAKE_SIZEOF_VOID_P EQUAL 8)
+    set(CPU_ARCH "x64")
+  else()
+    set(CPU_ARCH "x86")
+  endif()
+elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i386" OR
+       ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i686")
+  set(CPU_ARCH "x86")
+elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64")
+  set(CPU_ARCH "aarch64")
+elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "arm" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "armv7-a")
+  set(CPU_ARCH "arm")
+else()
+  message(FATAL_ERROR "Unknown system processor: " ${CMAKE_SYSTEM_PROCESSOR})
+endif()
+
+#--------------------------------------------------
+# dependencies
+#--------------------------------------------------
+
+
+# lzma
+add_subdirectory(deps/lzma-19.00 EXCLUDE_FROM_ALL)
+  list(APPEND CHDR_LIBS lzma)
+  list(APPEND CHDR_INCLUDES lzma)
+
+# zlib
+if (WITH_SYSTEM_ZLIB)
+  pkg_check_modules(ZLIB REQUIRED zlib)
+  list(APPEND PLATFORM_INCLUDES ${ZLIB_INCLUDE_DIRS})
+  list(APPEND PLATFORM_LIBS ${ZLIB_LIBRARIES})
+else()
+  add_subdirectory(deps/zlib-1.2.11 EXCLUDE_FROM_ALL)
+  list(APPEND CHDR_LIBS zlib)
+  list(APPEND CHDR_INCLUDES zlib)
+endif()
+
+#--------------------------------------------------
+# chdr
+#--------------------------------------------------
+
+set(CHDR_SOURCES
+  src/libchdr_bitstream.c
+  src/libchdr_cdrom.c
+  src/libchdr_chd.c
+  src/libchdr_flac.c
+  src/libchdr_huffman.c
+)
+
+list(APPEND CHDR_INCLUDES ${CMAKE_CURRENT_BINARY_DIR}/include)
+
+add_library(chdr-static STATIC ${CHDR_SOURCES})
+target_include_directories(chdr-static PRIVATE ${CHDR_INCLUDES} ${PLATFORM_INCLUDES} PUBLIC include)
+target_compile_definitions(chdr-static PRIVATE ${CHDR_DEFS})
+target_link_libraries(chdr-static PRIVATE ${CHDR_LIBS} ${PLATFORM_LIBS})
+
+if (INSTALL_STATIC_LIBS)
+  install(TARGETS chdr-static ${CHDR_LIBS}
+    ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}"
+  )
+endif()
+
+if (BUILD_SHARED_LIBS)
+  set(CMAKE_CXX_VISIBILITY_PRESET hidden)
+  set(CMAKE_VISIBILITY_INLINES_HIDDEN 1)
+
+  add_library(chdr SHARED ${CHDR_SOURCES})
+  target_include_directories(chdr PRIVATE ${CHDR_INCLUDES} ${PLATFORM_INCLUDES} PUBLIC include)
+  target_compile_definitions(chdr PRIVATE ${CHDR_DEFS})
+  target_link_libraries(chdr PRIVATE ${CHDR_LIBS} ${PLATFORM_LIBS})
+
+  if(MSVC)
+    target_compile_definitions(chdr PUBLIC "CHD_DLL")
+    target_compile_definitions(chdr PRIVATE "CHD_DLL_EXPORTS")
+  elseif(APPLE)
+    target_link_options(chdr PRIVATE -Wl,-dead_strip -Wl,-exported_symbol,_chd_*)
+  else()
+    target_link_options(chdr PRIVATE -Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/src/link.T -Wl,--no-undefined)
+  endif()
+
+  set_target_properties(chdr PROPERTIES PUBLIC_HEADER "include/libchdr/bitstream.h;include/libchdr/cdrom.h;include/libchdr/chd.h;include/libchdr/chdconfig.h;include/libchdr/coretypes.h;include/libchdr/flac.h;include/libchdr/huffman.h")
+  set_target_properties(chdr PROPERTIES VERSION "${CHDR_VERSION_MAJOR}.${CHDR_VERSION_MINOR}")
+
+  if (CMAKE_BUILD_TYPE MATCHES Release)
+    #add_custom_command(TARGET chdr POST_BUILD COMMAND ${CMAKE_STRIP} libchdr.so)
+  endif (CMAKE_BUILD_TYPE MATCHES Release)
+
+  install(TARGETS chdr
+    LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"
+    ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}"
+    PUBLIC_HEADER DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/libchdr"
+  )
+
+  configure_file(pkg-config.pc.in ${CMAKE_BINARY_DIR}/libchdr.pc @ONLY)
+  install(FILES ${CMAKE_BINARY_DIR}/libchdr.pc DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
+endif()
+
diff --git a/deps/libchdr/LICENSE.txt b/deps/libchdr/LICENSE.txt
new file mode 100644
index 00000000..1c36e5b5
--- /dev/null
+++ b/deps/libchdr/LICENSE.txt
@@ -0,0 +1,24 @@
+Copyright Romain Tisserand
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of the <organization> nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/deps/libchdr/README.md b/deps/libchdr/README.md
new file mode 100644
index 00000000..940920a5
--- /dev/null
+++ b/deps/libchdr/README.md
@@ -0,0 +1,7 @@
+# libchdr
+
+libchdr is a standalone library for reading MAME's CHDv1-v5 formats.
+
+The code is based off of MAME's old C codebase which read up to CHDv4 with OS-dependent features removed, and CHDv5 support backported from MAME's current C++ codebase.
+
+libchdr is licensed under the BSD 3-Clause (see [LICENSE.txt](LICENSE.txt)) and uses third party libraries that are each distributed under their own terms (see each library's license in [deps/](deps/)).
diff --git a/deps/libchdr/deps/lzma-19.00/CMakeLists.txt b/deps/libchdr/deps/lzma-19.00/CMakeLists.txt
new file mode 100644
index 00000000..fb01f60f
--- /dev/null
+++ b/deps/libchdr/deps/lzma-19.00/CMakeLists.txt
@@ -0,0 +1,32 @@
+add_library(lzma STATIC
+  include/7zTypes.h
+  include/Alloc.h
+  include/Bra.h
+  include/Compiler.h
+  include/CpuArch.h
+  include/Delta.h
+  include/LzFind.h
+  include/LzHash.h
+  include/Lzma86.h
+  include/LzmaDec.h
+  include/LzmaEnc.h
+  include/LzmaLib.h
+  include/Precomp.h
+  include/Sort.h
+  src/Alloc.c
+  src/Bra86.c
+  src/BraIA64.c
+  src/CpuArch.c
+  src/Delta.c
+  src/LzFind.c
+  src/Lzma86Dec.c
+  src/LzmaDec.c
+  src/LzmaEnc.c
+  src/Sort.c
+)
+
+target_compile_definitions(lzma PRIVATE _7ZIP_ST)
+
+target_include_directories(lzma PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/include")
+target_include_directories(lzma INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}/include")
+
diff --git a/deps/libchdr/deps/lzma-19.00/LICENSE b/deps/libchdr/deps/lzma-19.00/LICENSE
new file mode 100644
index 00000000..5f570516
--- /dev/null
+++ b/deps/libchdr/deps/lzma-19.00/LICENSE
@@ -0,0 +1,3 @@
+LZMA SDK is placed in the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute the original LZMA SDK code, either in source code form or as a compiled binary, for any purpose, commercial or non-commercial, and by any means.
\ No newline at end of file
diff --git a/deps/libchdr/deps/lzma-19.00/include/7zTypes.h b/deps/libchdr/deps/lzma-19.00/include/7zTypes.h
new file mode 100644
index 00000000..65b3af63
--- /dev/null
+++ b/deps/libchdr/deps/lzma-19.00/include/7zTypes.h
@@ -0,0 +1,375 @@
+/* 7zTypes.h -- Basic types
+2018-08-04 : Igor Pavlov : Public domain */
+
+#ifndef __7Z_TYPES_H
+#define __7Z_TYPES_H
+
+#ifdef _WIN32
+/* #include <windows.h> */
+#endif
+
+#include <stddef.h>
+
+#ifndef EXTERN_C_BEGIN
+#ifdef __cplusplus
+#define EXTERN_C_BEGIN extern "C" {
+#define EXTERN_C_END }
+#else
+#define EXTERN_C_BEGIN
+#define EXTERN_C_END
+#endif
+#endif
+
+EXTERN_C_BEGIN
+
+#define SZ_OK 0
+
+#define SZ_ERROR_DATA 1
+#define SZ_ERROR_MEM 2
+#define SZ_ERROR_CRC 3
+#define SZ_ERROR_UNSUPPORTED 4
+#define SZ_ERROR_PARAM 5
+#define SZ_ERROR_INPUT_EOF 6
+#define SZ_ERROR_OUTPUT_EOF 7
+#define SZ_ERROR_READ 8
+#define SZ_ERROR_WRITE 9
+#define SZ_ERROR_PROGRESS 10
+#define SZ_ERROR_FAIL 11
+#define SZ_ERROR_THREAD 12
+
+#define SZ_ERROR_ARCHIVE 16
+#define SZ_ERROR_NO_ARCHIVE 17
+
+typedef int SRes;
+
+
+#ifdef _WIN32
+
+/* typedef DWORD WRes; */
+typedef unsigned WRes;
+#define MY_SRes_HRESULT_FROM_WRes(x) HRESULT_FROM_WIN32(x)
+
+#else
+
+typedef int WRes;
+#define MY__FACILITY_WIN32 7
+#define MY__FACILITY__WRes MY__FACILITY_WIN32
+#define MY_SRes_HRESULT_FROM_WRes(x) ((HRESULT)(x) <= 0 ? ((HRESULT)(x)) : ((HRESULT) (((x) & 0x0000FFFF) | (MY__FACILITY__WRes << 16) | 0x80000000)))
+
+#endif
+
+
+#ifndef RINOK
+#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; }
+#endif
+
+typedef unsigned char Byte;
+typedef short Int16;
+typedef unsigned short UInt16;
+
+#ifdef _LZMA_UINT32_IS_ULONG
+typedef long Int32;
+typedef unsigned long UInt32;
+#else
+typedef int Int32;
+typedef unsigned int UInt32;
+#endif
+
+#ifdef _SZ_NO_INT_64
+
+/* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers.
+   NOTES: Some code will work incorrectly in that case! */
+
+typedef long Int64;
+typedef unsigned long UInt64;
+
+#else
+
+#if defined(_MSC_VER) || defined(__BORLANDC__)
+typedef __int64 Int64;
+typedef unsigned __int64 UInt64;
+#define UINT64_CONST(n) n
+#else
+typedef long long int Int64;
+typedef unsigned long long int UInt64;
+#define UINT64_CONST(n) n ## ULL
+#endif
+
+#endif
+
+#ifdef _LZMA_NO_SYSTEM_SIZE_T
+typedef UInt32 SizeT;
+#else
+typedef size_t SizeT;
+#endif
+
+typedef int BoolInt;
+/* typedef BoolInt Bool; */
+#define True 1
+#define False 0
+
+
+#ifdef _WIN32
+#define MY_STD_CALL __stdcall
+#else
+#define MY_STD_CALL
+#endif
+
+#ifdef _MSC_VER
+
+#if _MSC_VER >= 1300
+#define MY_NO_INLINE __declspec(noinline)
+#else
+#define MY_NO_INLINE
+#endif
+
+#define MY_FORCE_INLINE __forceinline
+
+#define MY_CDECL __cdecl
+#define MY_FAST_CALL __fastcall
+
+#else
+
+#define MY_NO_INLINE
+#define MY_FORCE_INLINE
+#define MY_CDECL
+#define MY_FAST_CALL
+
+/* inline keyword : for C++ / C99 */
+
+/* GCC, clang: */
+/*
+#if defined (__GNUC__) && (__GNUC__ >= 4)
+#define MY_FORCE_INLINE __attribute__((always_inline))
+#define MY_NO_INLINE __attribute__((noinline))
+#endif
+*/
+
+#endif
+
+
+/* The following interfaces use first parameter as pointer to structure */
+
+typedef struct IByteIn IByteIn;
+struct IByteIn
+{
+  Byte (*Read)(const IByteIn *p); /* reads one byte, returns 0 in case of EOF or error */
+};
+#define IByteIn_Read(p) (p)->Read(p)
+
+
+typedef struct IByteOut IByteOut;
+struct IByteOut
+{
+  void (*Write)(const IByteOut *p, Byte b);
+};
+#define IByteOut_Write(p, b) (p)->Write(p, b)
+
+
+typedef struct ISeqInStream ISeqInStream;
+struct ISeqInStream
+{
+  SRes (*Read)(const ISeqInStream *p, void *buf, size_t *size);
+    /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
+       (output(*size) < input(*size)) is allowed */
+};
+#define ISeqInStream_Read(p, buf, size) (p)->Read(p, buf, size)
+
+/* it can return SZ_ERROR_INPUT_EOF */
+SRes SeqInStream_Read(const ISeqInStream *stream, void *buf, size_t size);
+SRes SeqInStream_Read2(const ISeqInStream *stream, void *buf, size_t size, SRes errorType);
+SRes SeqInStream_ReadByte(const ISeqInStream *stream, Byte *buf);
+
+
+typedef struct ISeqOutStream ISeqOutStream;
+struct ISeqOutStream
+{
+  size_t (*Write)(const ISeqOutStream *p, const void *buf, size_t size);
+    /* Returns: result - the number of actually written bytes.
+       (result < size) means error */
+};
+#define ISeqOutStream_Write(p, buf, size) (p)->Write(p, buf, size)
+
+typedef enum
+{
+  SZ_SEEK_SET = 0,
+  SZ_SEEK_CUR = 1,
+  SZ_SEEK_END = 2
+} ESzSeek;
+
+
+typedef struct ISeekInStream ISeekInStream;
+struct ISeekInStream
+{
+  SRes (*Read)(const ISeekInStream *p, void *buf, size_t *size);  /* same as ISeqInStream::Read */
+  SRes (*Seek)(const ISeekInStream *p, Int64 *pos, ESzSeek origin);
+};
+#define ISeekInStream_Read(p, buf, size)   (p)->Read(p, buf, size)
+#define ISeekInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
+
+
+typedef struct ILookInStream ILookInStream;
+struct ILookInStream
+{
+  SRes (*Look)(const ILookInStream *p, const void **buf, size_t *size);
+    /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream.
+       (output(*size) > input(*size)) is not allowed
+       (output(*size) < input(*size)) is allowed */
+  SRes (*Skip)(const ILookInStream *p, size_t offset);
+    /* offset must be <= output(*size) of Look */
+
+  SRes (*Read)(const ILookInStream *p, void *buf, size_t *size);
+    /* reads directly (without buffer). It's same as ISeqInStream::Read */
+  SRes (*Seek)(const ILookInStream *p, Int64 *pos, ESzSeek origin);
+};
+
+#define ILookInStream_Look(p, buf, size)   (p)->Look(p, buf, size)
+#define ILookInStream_Skip(p, offset)      (p)->Skip(p, offset)
+#define ILookInStream_Read(p, buf, size)   (p)->Read(p, buf, size)
+#define ILookInStream_Seek(p, pos, origin) (p)->Seek(p, pos, origin)
+
+
+SRes LookInStream_LookRead(const ILookInStream *stream, void *buf, size_t *size);
+SRes LookInStream_SeekTo(const ILookInStream *stream, UInt64 offset);
+
+/* reads via ILookInStream::Read */
+SRes LookInStream_Read2(const ILookInStream *stream, void *buf, size_t size, SRes errorType);
+SRes LookInStream_Read(const ILookInStream *stream, void *buf, size_t size);
+
+
+
+typedef struct
+{
+  ILookInStream vt;
+  const ISeekInStream *realStream;
+ 
+  size_t pos;
+  size_t size; /* it's data size */
+  
+  /* the following variables must be set outside */
+  Byte *buf;
+  size_t bufSize;
+} CLookToRead2;
+
+void LookToRead2_CreateVTable(CLookToRead2 *p, int lookahead);
+
+#define LookToRead2_Init(p) { (p)->pos = (p)->size = 0; }
+
+
+typedef struct
+{
+  ISeqInStream vt;
+  const ILookInStream *realStream;
+} CSecToLook;
+
+void SecToLook_CreateVTable(CSecToLook *p);
+
+
+
+typedef struct
+{
+  ISeqInStream vt;
+  const ILookInStream *realStream;
+} CSecToRead;
+
+void SecToRead_CreateVTable(CSecToRead *p);
+
+
+typedef struct ICompressProgress ICompressProgress;
+
+struct ICompressProgress
+{
+  SRes (*Progress)(const ICompressProgress *p, UInt64 inSize, UInt64 outSize);
+    /* Returns: result. (result != SZ_OK) means break.
+       Value (UInt64)(Int64)-1 for size means unknown value. */
+};
+#define ICompressProgress_Progress(p, inSize, outSize) (p)->Progress(p, inSize, outSize)
+
+
+
+typedef struct ISzAlloc ISzAlloc;
+typedef const ISzAlloc * ISzAllocPtr;
+
+struct ISzAlloc
+{
+  void *(*Alloc)(ISzAllocPtr p, size_t size);
+  void (*Free)(ISzAllocPtr p, void *address); /* address can be 0 */
+};
+
+#define ISzAlloc_Alloc(p, size) (p)->Alloc(p, size)
+#define ISzAlloc_Free(p, a) (p)->Free(p, a)
+
+/* deprecated */
+#define IAlloc_Alloc(p, size) ISzAlloc_Alloc(p, size)
+#define IAlloc_Free(p, a) ISzAlloc_Free(p, a)
+
+
+
+
+
+#ifndef MY_offsetof
+  #ifdef offsetof
+    #define MY_offsetof(type, m) offsetof(type, m)
+    /*
+    #define MY_offsetof(type, m) FIELD_OFFSET(type, m)
+    */
+  #else
+    #define MY_offsetof(type, m) ((size_t)&(((type *)0)->m))
+  #endif
+#endif
+
+
+
+#ifndef MY_container_of
+
+/*
+#define MY_container_of(ptr, type, m) container_of(ptr, type, m)
+#define MY_container_of(ptr, type, m) CONTAINING_RECORD(ptr, type, m)
+#define MY_container_of(ptr, type, m) ((type *)((char *)(ptr) - offsetof(type, m)))
+#define MY_container_of(ptr, type, m) (&((type *)0)->m == (ptr), ((type *)(((char *)(ptr)) - MY_offsetof(type, m))))
+*/
+
+/*
+  GCC shows warning: "perhaps the 'offsetof' macro was used incorrectly"
+    GCC 3.4.4 : classes with constructor
+    GCC 4.8.1 : classes with non-public variable members"
+*/
+
+#define MY_container_of(ptr, type, m) ((type *)((char *)(1 ? (ptr) : &((type *)0)->m) - MY_offsetof(type, m)))
+
+
+#endif
+
+#define CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m) ((type *)(ptr))
+
+/*
+#define CONTAINER_FROM_VTBL(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
+*/
+#define CONTAINER_FROM_VTBL(ptr, type, m) MY_container_of(ptr, type, m)
+
+#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL_SIMPLE(ptr, type, m)
+/*
+#define CONTAINER_FROM_VTBL_CLS(ptr, type, m) CONTAINER_FROM_VTBL(ptr, type, m)
+*/
+
+
+
+#ifdef _WIN32
+
+#define CHAR_PATH_SEPARATOR '\\'
+#define WCHAR_PATH_SEPARATOR L'\\'
+#define STRING_PATH_SEPARATOR "\\"
+#define WSTRING_PATH_SEPARATOR L"\\"
+
+#else
+
+#define CHAR_PATH_SEPARATOR '/'
+#define WCHAR_PATH_SEPARATOR L'/'
+#define STRING_PATH_SEPARATOR "/"
+#define WSTRING_PATH_SEPARATOR L"/"
+
+#endif
+
+EXTERN_C_END
+
+#endif
diff --git a/deps/libchdr/deps/lzma-19.00/include/Alloc.h b/deps/libchdr/deps/lzma-19.00/include/Alloc.h
new file mode 100644
index 00000000..64823764
--- /dev/null
+++ b/deps/libchdr/deps/lzma-19.00/include/Alloc.h
@@ -0,0 +1,51 @@
+/* Alloc.h -- Memory allocation functions
+2018-02-19 : Igor Pavlov : Public domain */
+
+#ifndef __COMMON_ALLOC_H
+#define __COMMON_ALLOC_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+void *MyAlloc(size_t size);
+void MyFree(void *address);
+
+#ifdef _WIN32
+
+void SetLargePageSize();
+
+void *MidAlloc(size_t size);
+void MidFree(void *address);
+void *BigAlloc(size_t size);
+void BigFree(void *address);
+
+#else
+
+#define MidAlloc(size) MyAlloc(size)
+#define MidFree(address) MyFree(address)
+#define BigAlloc(size) MyAlloc(size)
+#define BigFree(address) MyFree(address)
+
+#endif
+
+extern const ISzAlloc g_Alloc;
+extern const ISzAlloc g_BigAlloc;
+extern const ISzAlloc g_MidAlloc;
+extern const ISzAlloc g_AlignedAlloc;
+
+
+typedef struct
+{
+  ISzAlloc vt;
+  ISzAllocPtr baseAlloc;
+  unsigned numAlignBits; /* ((1 << numAlignBits) >= sizeof(void *)) */
+  size_t offset;         /* (offset == (k * sizeof(void *)) && offset < (1 << numAlignBits) */
+} CAlignOffsetAlloc;
+
+void AlignOffsetAlloc_CreateVTable(CAlignOffsetAlloc *p);
+
+
+EXTERN_C_END
+
+#endif
diff --git a/deps/libchdr/deps/lzma-19.00/include/Bra.h b/deps/libchdr/deps/lzma-19.00/include/Bra.h
new file mode 100644
index 00000000..855e37a6
--- /dev/null
+++ b/deps/libchdr/deps/lzma-19.00/include/Bra.h
@@ -0,0 +1,64 @@
+/* Bra.h -- Branch converters for executables
+2013-01-18 : Igor Pavlov : Public domain */
+
+#ifndef __BRA_H
+#define __BRA_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+/*
+These functions convert relative addresses to absolute addresses
+in CALL instructions to increase the compression ratio.
+  
+  In:
+    data     - data buffer
+    size     - size of data
+    ip       - current virtual Instruction Pinter (IP) value
+    state    - state variable for x86 converter
+    encoding - 0 (for decoding), 1 (for encoding)
+  
+  Out:
+    state    - state variable for x86 converter
+
+  Returns:
+    The number of processed bytes. If you call these functions with multiple calls,
+    you must start next call with first byte after block of processed bytes.
+  
+  Type   Endian  Alignment  LookAhead
+  
+  x86    little      1          4
+  ARMT   little      2          2
+  ARM    little      4          0
+  PPC     big        4          0
+  SPARC   big        4          0
+  IA64   little     16          0
+
+  size must be >= Alignment + LookAhead, if it's not last block.
+  If (size < Alignment + LookAhead), converter returns 0.
+
+  Example:
+
+    UInt32 ip = 0;
+    for ()
+    {
+      ; size must be >= Alignment + LookAhead, if it's not last block
+      SizeT processed = Convert(data, size, ip, 1);
+      data += processed;
+      size -= processed;
+      ip += processed;
+    }
+*/
+
+#define x86_Convert_Init(state) { state = 0; }
+SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding);
+SizeT ARM_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
+SizeT ARMT_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
+SizeT PPC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
+SizeT SPARC_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
+SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding);
+
+EXTERN_C_END
+
+#endif
diff --git a/deps/libchdr/deps/lzma-19.00/include/Compiler.h b/deps/libchdr/deps/lzma-19.00/include/Compiler.h
new file mode 100644
index 00000000..0cc409d8
--- /dev/null
+++ b/deps/libchdr/deps/lzma-19.00/include/Compiler.h
@@ -0,0 +1,33 @@
+/* Compiler.h
+2017-04-03 : Igor Pavlov : Public domain */
+
+#ifndef __7Z_COMPILER_H
+#define __7Z_COMPILER_H
+
+#ifdef _MSC_VER
+
+  #ifdef UNDER_CE
+    #define RPC_NO_WINDOWS_H
+    /* #pragma warning(disable : 4115) // '_RPC_ASYNC_STATE' : named type definition in parentheses */
+    #pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union
+    #pragma warning(disable : 4214) // nonstandard extension used : bit field types other than int
+  #endif
+
+  #if _MSC_VER >= 1300
+    #pragma warning(disable : 4996) // This function or variable may be unsafe
+  #else
+    #pragma warning(disable : 4511) // copy constructor could not be generated
+    #pragma warning(disable : 4512) // assignment operator could not be generated
+    #pragma warning(disable : 4514) // unreferenced inline function has been removed
+    #pragma warning(disable : 4702) // unreachable code
+    #pragma warning(disable : 4710) // not inlined
+    #pragma warning(disable : 4714) // function marked as __forceinline not inlined
+    #pragma warning(disable : 4786) // identifier was truncated to '255' characters in the debug information
+  #endif
+
+#endif
+
+#define UNUSED_VAR(x) (void)x;
+/* #define UNUSED_VAR(x) x=x; */
+
+#endif
diff --git a/deps/libchdr/deps/lzma-19.00/include/CpuArch.h b/deps/libchdr/deps/lzma-19.00/include/CpuArch.h
new file mode 100644
index 00000000..bd429388
--- /dev/null
+++ b/deps/libchdr/deps/lzma-19.00/include/CpuArch.h
@@ -0,0 +1,336 @@
+/* CpuArch.h -- CPU specific code
+2018-02-18 : Igor Pavlov : Public domain */
+
+#ifndef __CPU_ARCH_H
+#define __CPU_ARCH_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+/*
+MY_CPU_LE means that CPU is LITTLE ENDIAN.
+MY_CPU_BE means that CPU is BIG ENDIAN.
+If MY_CPU_LE and MY_CPU_BE are not defined, we don't know about ENDIANNESS of platform.
+
+MY_CPU_LE_UNALIGN means that CPU is LITTLE ENDIAN and CPU supports unaligned memory accesses.
+*/
+
+#if  defined(_M_X64) \
+  || defined(_M_AMD64) \
+  || defined(__x86_64__) \
+  || defined(__AMD64__) \
+  || defined(__amd64__)
+  #define MY_CPU_AMD64
+  #ifdef __ILP32__
+    #define MY_CPU_NAME "x32"
+  #else
+    #define MY_CPU_NAME "x64"
+  #endif
+  #define MY_CPU_64BIT
+#endif
+
+
+#if  defined(_M_IX86) \
+  || defined(__i386__)
+  #define MY_CPU_X86
+  #define MY_CPU_NAME "x86"
+  #define MY_CPU_32BIT
+#endif
+
+
+#if  defined(_M_ARM64) \
+  || defined(__AARCH64EL__) \
+  || defined(__AARCH64EB__) \
+  || defined(__aarch64__)
+  #define MY_CPU_ARM64
+  #define MY_CPU_NAME "arm64"
+  #define MY_CPU_64BIT
+#endif
+
+
+#if  defined(_M_ARM) \
+  || defined(_M_ARM_NT) \
+  || defined(_M_ARMT) \
+  || defined(__arm__) \
+  || defined(__thumb__) \
+  || defined(__ARMEL__) \
+  || defined(__ARMEB__) \
+  || defined(__THUMBEL__) \
+  || defined(__THUMBEB__)
+  #define MY_CPU_ARM
+  #define MY_CPU_NAME "arm"
+  #define MY_CPU_32BIT
+#endif
+
+
+#if  defined(_M_IA64) \
+  || defined(__ia64__)
+  #define MY_CPU_IA64
+  #define MY_CPU_NAME "ia64"
+  #define MY_CPU_64BIT
+#endif
+
+
+#if  defined(__mips64) \
+  || defined(__mips64__) \
+  || (defined(__mips) && (__mips == 64 || __mips == 4 || __mips == 3))
+  #define MY_CPU_NAME "mips64"
+  #define MY_CPU_64BIT
+#elif defined(__mips__)
+  #define MY_CPU_NAME "mips"
+  /* #define MY_CPU_32BIT */
+#endif
+
+
+#if  defined(__ppc64__) \
+  || defined(__powerpc64__)
+  #ifdef __ILP32__
+    #define MY_CPU_NAME "ppc64-32"
+  #else
+    #define MY_CPU_NAME "ppc64"
+  #endif
+  #define MY_CPU_64BIT
+#elif defined(__ppc__) \
+  || defined(__powerpc__)
+  #define MY_CPU_NAME "ppc"
+  #define MY_CPU_32BIT
+#endif
+
+
+#if  defined(__sparc64__)
+  #define MY_CPU_NAME "sparc64"
+  #define MY_CPU_64BIT
+#elif defined(__sparc__)
+  #define MY_CPU_NAME "sparc"
+  /* #define MY_CPU_32BIT */
+#endif
+
+
+#if defined(MY_CPU_X86) || defined(MY_CPU_AMD64)
+#define MY_CPU_X86_OR_AMD64
+#endif
+
+
+#ifdef _WIN32
+
+  #ifdef MY_CPU_ARM
+  #define MY_CPU_ARM_LE
+  #endif
+
+  #ifdef MY_CPU_ARM64
+  #define MY_CPU_ARM64_LE
+  #endif
+
+  #ifdef _M_IA64
+  #define MY_CPU_IA64_LE
+  #endif
+
+#endif
+
+
+#if defined(MY_CPU_X86_OR_AMD64) \
+    || defined(MY_CPU_ARM_LE) \
+    || defined(MY_CPU_ARM64_LE) \
+    || defined(MY_CPU_IA64_LE) \
+    || defined(__LITTLE_ENDIAN__) \
+    || defined(__ARMEL__) \
+    || defined(__THUMBEL__) \
+    || defined(__AARCH64EL__) \
+    || defined(__MIPSEL__) \
+    || defined(__MIPSEL) \
+    || defined(_MIPSEL) \
+    || defined(__BFIN__) \
+    || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
+  #define MY_CPU_LE
+#endif
+
+#if defined(__BIG_ENDIAN__) \
+    || defined(__ARMEB__) \
+    || defined(__THUMBEB__) \
+    || defined(__AARCH64EB__) \
+    || defined(__MIPSEB__) \
+    || defined(__MIPSEB) \
+    || defined(_MIPSEB) \
+    || defined(__m68k__) \
+    || defined(__s390__) \
+    || defined(__s390x__) \
+    || defined(__zarch__) \
+    || (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))
+  #define MY_CPU_BE
+#endif
+
+
+#if defined(MY_CPU_LE) && defined(MY_CPU_BE)
+  #error Stop_Compiling_Bad_Endian
+#endif
+
+
+#if defined(MY_CPU_32BIT) && defined(MY_CPU_64BIT)
+  #error Stop_Compiling_Bad_32_64_BIT
+#endif
+
+
+#ifndef MY_CPU_NAME
+  #ifdef MY_CPU_LE
+    #define MY_CPU_NAME "LE"
+  #elif defined(MY_CPU_BE)
+    #define MY_CPU_NAME "BE"
+  #else
+    /*
+    #define MY_CPU_NAME ""
+    */
+  #endif
+#endif
+
+
+
+
+
+#ifdef MY_CPU_LE
+  #if defined(MY_CPU_X86_OR_AMD64) \
+      || defined(MY_CPU_ARM64) \
+      || defined(__ARM_FEATURE_UNALIGNED)
+    #define MY_CPU_LE_UNALIGN
+  #endif
+#endif
+
+
+#ifdef MY_CPU_LE_UNALIGN
+
+#define GetUi16(p) (*(const UInt16 *)(const void *)(p))
+#define GetUi32(p) (*(const UInt32 *)(const void *)(p))
+#define GetUi64(p) (*(const UInt64 *)(const void *)(p))
+
+#define SetUi16(p, v) { *(UInt16 *)(p) = (v); }
+#define SetUi32(p, v) { *(UInt32 *)(p) = (v); }
+#define SetUi64(p, v) { *(UInt64 *)(p) = (v); }
+
+#else
+
+#define GetUi16(p) ( (UInt16) ( \
+             ((const Byte *)(p))[0] | \
+    ((UInt16)((const Byte *)(p))[1] << 8) ))
+
+#define GetUi32(p) ( \
+             ((const Byte *)(p))[0]        | \
+    ((UInt32)((const Byte *)(p))[1] <<  8) | \
+    ((UInt32)((const Byte *)(p))[2] << 16) | \
+    ((UInt32)((const Byte *)(p))[3] << 24))
+
+#define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte *)(p)) + 4) << 32))
+
+#define SetUi16(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
+    _ppp_[0] = (Byte)_vvv_; \
+    _ppp_[1] = (Byte)(_vvv_ >> 8); }
+
+#define SetUi32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
+    _ppp_[0] = (Byte)_vvv_; \
+    _ppp_[1] = (Byte)(_vvv_ >> 8); \
+    _ppp_[2] = (Byte)(_vvv_ >> 16); \
+    _ppp_[3] = (Byte)(_vvv_ >> 24); }
+
+#define SetUi64(p, v) { Byte *_ppp2_ = (Byte *)(p); UInt64 _vvv2_ = (v); \
+    SetUi32(_ppp2_    , (UInt32)_vvv2_); \
+    SetUi32(_ppp2_ + 4, (UInt32)(_vvv2_ >> 32)); }
+
+#endif
+
+#ifdef __has_builtin
+  #define MY__has_builtin(x) __has_builtin(x)
+#else
+  #define MY__has_builtin(x) 0
+#endif
+
+#if defined(MY_CPU_LE_UNALIGN) && /* defined(_WIN64) && */ (_MSC_VER >= 1300)
+
+/* Note: we use bswap instruction, that is unsupported in 386 cpu */
+
+#include <stdlib.h>
+
+#pragma intrinsic(_byteswap_ushort)
+#pragma intrinsic(_byteswap_ulong)
+#pragma intrinsic(_byteswap_uint64)
+
+/* #define GetBe16(p) _byteswap_ushort(*(const UInt16 *)(const Byte *)(p)) */
+#define GetBe32(p) _byteswap_ulong(*(const UInt32 *)(const Byte *)(p))
+#define GetBe64(p) _byteswap_uint64(*(const UInt64 *)(const Byte *)(p))
+
+#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = _byteswap_ulong(v)
+
+#elif defined(MY_CPU_LE_UNALIGN) && ( \
+       (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) \
+    || (defined(__clang__) && MY__has_builtin(__builtin_bswap16)) )
+
+/* #define GetBe16(p) __builtin_bswap16(*(const UInt16 *)(const Byte *)(p)) */
+#define GetBe32(p) __builtin_bswap32(*(const UInt32 *)(const Byte *)(p))
+#define GetBe64(p) __builtin_bswap64(*(const UInt64 *)(const Byte *)(p))
+
+#define SetBe32(p, v) (*(UInt32 *)(void *)(p)) = __builtin_bswap32(v)
+
+#else
+
+#define GetBe32(p) ( \
+    ((UInt32)((const Byte *)(p))[0] << 24) | \
+    ((UInt32)((const Byte *)(p))[1] << 16) | \
+    ((UInt32)((const Byte *)(p))[2] <<  8) | \
+             ((const Byte *)(p))[3] )
+
+#define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte *)(p)) + 4))
+
+#define SetBe32(p, v) { Byte *_ppp_ = (Byte *)(p); UInt32 _vvv_ = (v); \
+    _ppp_[0] = (Byte)(_vvv_ >> 24); \
+    _ppp_[1] = (Byte)(_vvv_ >> 16); \
+    _ppp_[2] = (Byte)(_vvv_ >> 8); \
+    _ppp_[3] = (Byte)_vvv_; }
+
+#endif
+
+
+#ifndef GetBe16
+
+#define GetBe16(p) ( (UInt16) ( \
+    ((UInt16)((const Byte *)(p))[0] << 8) | \
+             ((const Byte *)(p))[1] ))
+
+#endif
+
+
+
+#ifdef MY_CPU_X86_OR_AMD64
+
+typedef struct
+{
+  UInt32 maxFunc;
+  UInt32 vendor[3];
+  UInt32 ver;
+  UInt32 b;
+  UInt32 c;
+  UInt32 d;
+} Cx86cpuid;
+
+enum
+{
+  CPU_FIRM_INTEL,
+  CPU_FIRM_AMD,
+  CPU_FIRM_VIA
+};
+
+void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d);
+
+BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p);
+int x86cpuid_GetFirm(const Cx86cpuid *p);
+
+#define x86cpuid_GetFamily(ver) (((ver >> 16) & 0xFF0) | ((ver >> 8) & 0xF))
+#define x86cpuid_GetModel(ver)  (((ver >> 12) &  0xF0) | ((ver >> 4) & 0xF))
+#define x86cpuid_GetStepping(ver) (ver & 0xF)
+
+BoolInt CPU_Is_InOrder();
+BoolInt CPU_Is_Aes_Supported();
+BoolInt CPU_IsSupported_PageGB();
+
+#endif
+
+EXTERN_C_END
+
+#endif
diff --git a/deps/libchdr/deps/lzma-19.00/include/Delta.h b/deps/libchdr/deps/lzma-19.00/include/Delta.h
new file mode 100644
index 00000000..2fa54ad6
--- /dev/null
+++ b/deps/libchdr/deps/lzma-19.00/include/Delta.h
@@ -0,0 +1,19 @@
+/* Delta.h -- Delta converter
+2013-01-18 : Igor Pavlov : Public domain */
+
+#ifndef __DELTA_H
+#define __DELTA_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+#define DELTA_STATE_SIZE 256
+
+void Delta_Init(Byte *state);
+void Delta_Encode(Byte *state, unsigned delta, Byte *data, SizeT size);
+void Delta_Decode(Byte *state, unsigned delta, Byte *data, SizeT size);
+
+EXTERN_C_END
+
+#endif
diff --git a/deps/libchdr/deps/lzma-19.00/include/LzFind.h b/deps/libchdr/deps/lzma-19.00/include/LzFind.h
new file mode 100644
index 00000000..42c13be1
--- /dev/null
+++ b/deps/libchdr/deps/lzma-19.00/include/LzFind.h
@@ -0,0 +1,121 @@
+/* LzFind.h -- Match finder for LZ algorithms
+2017-06-10 : Igor Pavlov : Public domain */
+
+#ifndef __LZ_FIND_H
+#define __LZ_FIND_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+typedef UInt32 CLzRef;
+
+typedef struct _CMatchFinder
+{
+  Byte *buffer;
+  UInt32 pos;
+  UInt32 posLimit;
+  UInt32 streamPos;
+  UInt32 lenLimit;
+
+  UInt32 cyclicBufferPos;
+  UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */
+
+  Byte streamEndWasReached;
+  Byte btMode;
+  Byte bigHash;
+  Byte directInput;
+
+  UInt32 matchMaxLen;
+  CLzRef *hash;
+  CLzRef *son;
+  UInt32 hashMask;
+  UInt32 cutValue;
+
+  Byte *bufferBase;
+  ISeqInStream *stream;
+  
+  UInt32 blockSize;
+  UInt32 keepSizeBefore;
+  UInt32 keepSizeAfter;
+
+  UInt32 numHashBytes;
+  size_t directInputRem;
+  UInt32 historySize;
+  UInt32 fixedHashSize;
+  UInt32 hashSizeSum;
+  SRes result;
+  UInt32 crc[256];
+  size_t numRefs;
+
+  UInt64 expectedDataSize;
+} CMatchFinder;
+
+#define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer)
+
+#define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos)
+
+#define Inline_MatchFinder_IsFinishedOK(p) \
+    ((p)->streamEndWasReached \
+        && (p)->streamPos == (p)->pos \
+        && (!(p)->directInput || (p)->directInputRem == 0))
+      
+int MatchFinder_NeedMove(CMatchFinder *p);
+Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p);
+void MatchFinder_MoveBlock(CMatchFinder *p);
+void MatchFinder_ReadIfRequired(CMatchFinder *p);
+
+void MatchFinder_Construct(CMatchFinder *p);
+
+/* Conditions:
+     historySize <= 3 GB
+     keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB
+*/
+int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
+    UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
+    ISzAllocPtr alloc);
+void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc);
+void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems);
+void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue);
+
+UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *buffer, CLzRef *son,
+    UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue,
+    UInt32 *distances, UInt32 maxLen);
+
+/*
+Conditions:
+  Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func.
+  Mf_GetPointerToCurrentPos_Func's result must be used only before any other function
+*/
+
+typedef void (*Mf_Init_Func)(void *object);
+typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void *object);
+typedef const Byte * (*Mf_GetPointerToCurrentPos_Func)(void *object);
+typedef UInt32 (*Mf_GetMatches_Func)(void *object, UInt32 *distances);
+typedef void (*Mf_Skip_Func)(void *object, UInt32);
+
+typedef struct _IMatchFinder
+{
+  Mf_Init_Func Init;
+  Mf_GetNumAvailableBytes_Func GetNumAvailableBytes;
+  Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos;
+  Mf_GetMatches_Func GetMatches;
+  Mf_Skip_Func Skip;
+} IMatchFinder;
+
+void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable);
+
+void MatchFinder_Init_LowHash(CMatchFinder *p);
+void MatchFinder_Init_HighHash(CMatchFinder *p);
+void MatchFinder_Init_3(CMatchFinder *p, int readData);
+void MatchFinder_Init(CMatchFinder *p);
+
+UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
+UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances);
+
+void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
+void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num);
+
+EXTERN_C_END
+
+#endif
diff --git a/deps/libchdr/deps/lzma-19.00/include/LzHash.h b/deps/libchdr/deps/lzma-19.00/include/LzHash.h
new file mode 100644
index 00000000..e7c94230
--- /dev/null
+++ b/deps/libchdr/deps/lzma-19.00/include/LzHash.h
@@ -0,0 +1,57 @@
+/* LzHash.h -- HASH functions for LZ algorithms
+2015-04-12 : Igor Pavlov : Public domain */
+
+#ifndef __LZ_HASH_H
+#define __LZ_HASH_H
+
+#define kHash2Size (1 << 10)
+#define kHash3Size (1 << 16)
+#define kHash4Size (1 << 20)
+
+#define kFix3HashSize (kHash2Size)
+#define kFix4HashSize (kHash2Size + kHash3Size)
+#define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size)
+
+#define HASH2_CALC hv = cur[0] | ((UInt32)cur[1] << 8);
+
+#define HASH3_CALC { \
+  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+  h2 = temp & (kHash2Size - 1); \
+  hv = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; }
+
+#define HASH4_CALC { \
+  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+  h2 = temp & (kHash2Size - 1); \
+  temp ^= ((UInt32)cur[2] << 8); \
+  h3 = temp & (kHash3Size - 1); \
+  hv = (temp ^ (p->crc[cur[3]] << 5)) & p->hashMask; }
+
+#define HASH5_CALC { \
+  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+  h2 = temp & (kHash2Size - 1); \
+  temp ^= ((UInt32)cur[2] << 8); \
+  h3 = temp & (kHash3Size - 1); \
+  temp ^= (p->crc[cur[3]] << 5); \
+  h4 = temp & (kHash4Size - 1); \
+  hv = (temp ^ (p->crc[cur[4]] << 3)) & p->hashMask; }
+
+/* #define HASH_ZIP_CALC hv = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */
+#define HASH_ZIP_CALC hv = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF;
+
+
+#define MT_HASH2_CALC \
+  h2 = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1);
+
+#define MT_HASH3_CALC { \
+  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+  h2 = temp & (kHash2Size - 1); \
+  h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
+
+#define MT_HASH4_CALC { \
+  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
+  h2 = temp & (kHash2Size - 1); \
+  temp ^= ((UInt32)cur[2] << 8); \
+  h3 = temp & (kHash3Size - 1); \
+  h4 = (temp ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); }
+
+#endif
diff --git a/deps/libchdr/deps/lzma-19.00/include/Lzma86.h b/deps/libchdr/deps/lzma-19.00/include/Lzma86.h
new file mode 100644
index 00000000..bebed5cb
--- /dev/null
+++ b/deps/libchdr/deps/lzma-19.00/include/Lzma86.h
@@ -0,0 +1,111 @@
+/* Lzma86.h -- LZMA + x86 (BCJ) Filter
+2013-01-18 : Igor Pavlov : Public domain */
+
+#ifndef __LZMA86_H
+#define __LZMA86_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+#define LZMA86_SIZE_OFFSET (1 + 5)
+#define LZMA86_HEADER_SIZE (LZMA86_SIZE_OFFSET + 8)
+
+/*
+It's an example for LZMA + x86 Filter use.
+You can use .lzma86 extension, if you write that stream to file.
+.lzma86 header adds one additional byte to standard .lzma header.
+.lzma86 header (14 bytes):
+  Offset Size  Description
+    0     1    = 0 - no filter, pure LZMA
+               = 1 - x86 filter + LZMA
+    1     1    lc, lp and pb in encoded form
+    2     4    dictSize (little endian)
+    6     8    uncompressed size (little endian)
+
+
+Lzma86_Encode
+-------------
+level - compression level: 0 <= level <= 9, the default value for "level" is 5.
+
+dictSize - The dictionary size in bytes. The maximum value is
+        128 MB = (1 << 27) bytes for 32-bit version
+          1 GB = (1 << 30) bytes for 64-bit version
+     The default value is 16 MB = (1 << 24) bytes, for level = 5.
+     It's recommended to use the dictionary that is larger than 4 KB and
+     that can be calculated as (1 << N) or (3 << N) sizes.
+     For better compression ratio dictSize must be >= inSize.
+
+filterMode:
+    SZ_FILTER_NO   - no Filter
+    SZ_FILTER_YES  - x86 Filter
+    SZ_FILTER_AUTO - it tries both alternatives to select best.
+              Encoder will use 2 or 3 passes:
+              2 passes when FILTER_NO provides better compression.
+              3 passes when FILTER_YES provides better compression.
+
+Lzma86Encode allocates Data with MyAlloc functions.
+RAM Requirements for compressing:
+  RamSize = dictionarySize * 11.5 + 6MB + FilterBlockSize
+      filterMode     FilterBlockSize
+     SZ_FILTER_NO         0
+     SZ_FILTER_YES      inSize
+     SZ_FILTER_AUTO     inSize
+
+
+Return code:
+  SZ_OK               - OK
+  SZ_ERROR_MEM        - Memory allocation error
+  SZ_ERROR_PARAM      - Incorrect paramater
+  SZ_ERROR_OUTPUT_EOF - output buffer overflow
+  SZ_ERROR_THREAD     - errors in multithreading functions (only for Mt version)
+*/
+
+enum ESzFilterMode
+{
+  SZ_FILTER_NO,
+  SZ_FILTER_YES,
+  SZ_FILTER_AUTO
+};
+
+SRes Lzma86_Encode(Byte *dest, size_t *destLen, const Byte *src, size_t srcLen,
+    int level, UInt32 dictSize, int filterMode);
+
+
+/*
+Lzma86_GetUnpackSize:
+  In:
+    src      - input data
+    srcLen   - input data size
+  Out:
+    unpackSize - size of uncompressed stream
+  Return code:
+    SZ_OK               - OK
+    SZ_ERROR_INPUT_EOF  - Error in headers
+*/
+
+SRes Lzma86_GetUnpackSize(const Byte *src, SizeT srcLen, UInt64 *unpackSize);
+
+/*
+Lzma86_Decode:
+  In:
+    dest     - output data
+    destLen  - output data size
+    src      - input data
+    srcLen   - input data size
+  Out:
+    destLen  - processed output size
+    srcLen   - processed input size
+  Return code:
+    SZ_OK           - OK
+    SZ_ERROR_DATA  - Data error
+    SZ_ERROR_MEM   - Memory allocation error
+    SZ_ERROR_UNSUPPORTED - unsupported file
+    SZ_ERROR_INPUT_EOF - it needs more bytes in input buffer
+*/
+
+SRes Lzma86_Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen);
+
+EXTERN_C_END
+
+#endif
diff --git a/deps/libchdr/deps/lzma-19.00/include/LzmaDec.h b/deps/libchdr/deps/lzma-19.00/include/LzmaDec.h
new file mode 100644
index 00000000..1f0927ab
--- /dev/null
+++ b/deps/libchdr/deps/lzma-19.00/include/LzmaDec.h
@@ -0,0 +1,234 @@
+/* LzmaDec.h -- LZMA Decoder
+2018-04-21 : Igor Pavlov : Public domain */
+
+#ifndef __LZMA_DEC_H
+#define __LZMA_DEC_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+/* #define _LZMA_PROB32 */
+/* _LZMA_PROB32 can increase the speed on some CPUs,
+   but memory usage for CLzmaDec::probs will be doubled in that case */
+
+typedef
+#ifdef _LZMA_PROB32
+  UInt32
+#else
+  UInt16
+#endif
+  CLzmaProb;
+
+
+/* ---------- LZMA Properties ---------- */
+
+#define LZMA_PROPS_SIZE 5
+
+typedef struct _CLzmaProps
+{
+  Byte lc;
+  Byte lp;
+  Byte pb;
+  Byte _pad_;
+  UInt32 dicSize;
+} CLzmaProps;
+
+/* LzmaProps_Decode - decodes properties
+Returns:
+  SZ_OK
+  SZ_ERROR_UNSUPPORTED - Unsupported properties
+*/
+
+SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size);
+
+
+/* ---------- LZMA Decoder state ---------- */
+
+/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case.
+   Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */
+
+#define LZMA_REQUIRED_INPUT_MAX 20
+
+typedef struct
+{
+  /* Don't change this structure. ASM code can use it. */
+  CLzmaProps prop;
+  CLzmaProb *probs;
+  CLzmaProb *probs_1664;
+  Byte *dic;
+  SizeT dicBufSize;
+  SizeT dicPos;
+  const Byte *buf;
+  UInt32 range;
+  UInt32 code;
+  UInt32 processedPos;
+  UInt32 checkDicSize;
+  UInt32 reps[4];
+  UInt32 state;
+  UInt32 remainLen;
+
+  UInt32 numProbs;
+  unsigned tempBufSize;
+  Byte tempBuf[LZMA_REQUIRED_INPUT_MAX];
+} CLzmaDec;
+
+#define LzmaDec_Construct(p) { (p)->dic = NULL; (p)->probs = NULL; }
+
+void LzmaDec_Init(CLzmaDec *p);
+
+/* There are two types of LZMA streams:
+     - Stream with end mark. That end mark adds about 6 bytes to compressed size.
+     - Stream without end mark. You must know exact uncompressed size to decompress such stream. */
+
+typedef enum
+{
+  LZMA_FINISH_ANY,   /* finish at any point */
+  LZMA_FINISH_END    /* block must be finished at the end */
+} ELzmaFinishMode;
+
+/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!!
+
+   You must use LZMA_FINISH_END, when you know that current output buffer
+   covers last bytes of block. In other cases you must use LZMA_FINISH_ANY.
+
+   If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK,
+   and output value of destLen will be less than output buffer size limit.
+   You can check status result also.
+
+   You can use multiple checks to test data integrity after full decompression:
+     1) Check Result and "status" variable.
+     2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize.
+     3) Check that output(srcLen) = compressedSize, if you know real compressedSize.
+        You must use correct finish mode in that case. */
+
+typedef enum
+{
+  LZMA_STATUS_NOT_SPECIFIED,               /* use main error code instead */
+  LZMA_STATUS_FINISHED_WITH_MARK,          /* stream was finished with end mark. */
+  LZMA_STATUS_NOT_FINISHED,                /* stream was not finished */
+  LZMA_STATUS_NEEDS_MORE_INPUT,            /* you must provide more input bytes */
+  LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK  /* there is probability that stream was finished without end mark */
+} ELzmaStatus;
+
+/* ELzmaStatus is used only as output value for function call */
+
+
+/* ---------- Interfaces ---------- */
+
+/* There are 3 levels of interfaces:
+     1) Dictionary Interface
+     2) Buffer Interface
+     3) One Call Interface
+   You can select any of these interfaces, but don't mix functions from different
+   groups for same object. */
+
+
+/* There are two variants to allocate state for Dictionary Interface:
+     1) LzmaDec_Allocate / LzmaDec_Free
+     2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs
+   You can use variant 2, if you set dictionary buffer manually.
+   For Buffer Interface you must always use variant 1.
+
+LzmaDec_Allocate* can return:
+  SZ_OK
+  SZ_ERROR_MEM         - Memory allocation error
+  SZ_ERROR_UNSUPPORTED - Unsupported properties
+*/
+   
+SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc);
+void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc);
+
+SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc);
+void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc);
+
+/* ---------- Dictionary Interface ---------- */
+
+/* You can use it, if you want to eliminate the overhead for data copying from
+   dictionary to some other external buffer.
+   You must work with CLzmaDec variables directly in this interface.
+
+   STEPS:
+     LzmaDec_Construct()
+     LzmaDec_Allocate()
+     for (each new stream)
+     {
+       LzmaDec_Init()
+       while (it needs more decompression)
+       {
+         LzmaDec_DecodeToDic()
+         use data from CLzmaDec::dic and update CLzmaDec::dicPos
+       }
+     }
+     LzmaDec_Free()
+*/
+
+/* LzmaDec_DecodeToDic
+   
+   The decoding to internal dictionary buffer (CLzmaDec::dic).
+   You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!!
+
+finishMode:
+  It has meaning only if the decoding reaches output limit (dicLimit).
+  LZMA_FINISH_ANY - Decode just dicLimit bytes.
+  LZMA_FINISH_END - Stream must be finished after dicLimit.
+
+Returns:
+  SZ_OK
+    status:
+      LZMA_STATUS_FINISHED_WITH_MARK
+      LZMA_STATUS_NOT_FINISHED
+      LZMA_STATUS_NEEDS_MORE_INPUT
+      LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
+  SZ_ERROR_DATA - Data error
+*/
+
+SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit,
+    const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
+
+
+/* ---------- Buffer Interface ---------- */
+
+/* It's zlib-like interface.
+   See LzmaDec_DecodeToDic description for information about STEPS and return results,
+   but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need
+   to work with CLzmaDec variables manually.
+
+finishMode:
+  It has meaning only if the decoding reaches output limit (*destLen).
+  LZMA_FINISH_ANY - Decode just destLen bytes.
+  LZMA_FINISH_END - Stream must be finished after (*destLen).
+*/
+
+SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen,
+    const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status);
+
+
+/* ---------- One Call Interface ---------- */
+
+/* LzmaDecode
+
+finishMode:
+  It has meaning only if the decoding reaches output limit (*destLen).
+  LZMA_FINISH_ANY - Decode just destLen bytes.
+  LZMA_FINISH_END - Stream must be finished after (*destLen).
+
+Returns:
+  SZ_OK
+    status:
+      LZMA_STATUS_FINISHED_WITH_MARK
+      LZMA_STATUS_NOT_FINISHED
+      LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
+  SZ_ERROR_DATA - Data error
+  SZ_ERROR_MEM  - Memory allocation error
+  SZ_ERROR_UNSUPPORTED - Unsupported properties
+  SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
+*/
+
+SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+    const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
+    ELzmaStatus *status, ISzAllocPtr alloc);
+
+EXTERN_C_END
+
+#endif
diff --git a/deps/libchdr/deps/lzma-19.00/include/LzmaEnc.h b/deps/libchdr/deps/lzma-19.00/include/LzmaEnc.h
new file mode 100644
index 00000000..9194ee57
--- /dev/null
+++ b/deps/libchdr/deps/lzma-19.00/include/LzmaEnc.h
@@ -0,0 +1,76 @@
+/*  LzmaEnc.h -- LZMA Encoder
+2017-07-27 : Igor Pavlov : Public domain */
+
+#ifndef __LZMA_ENC_H
+#define __LZMA_ENC_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+#define LZMA_PROPS_SIZE 5
+
+typedef struct _CLzmaEncProps
+{
+  int level;       /* 0 <= level <= 9 */
+  UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version
+                      (1 << 12) <= dictSize <= (3 << 29) for 64-bit version
+                      default = (1 << 24) */
+  int lc;          /* 0 <= lc <= 8, default = 3 */
+  int lp;          /* 0 <= lp <= 4, default = 0 */
+  int pb;          /* 0 <= pb <= 4, default = 2 */
+  int algo;        /* 0 - fast, 1 - normal, default = 1 */
+  int fb;          /* 5 <= fb <= 273, default = 32 */
+  int btMode;      /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */
+  int numHashBytes; /* 2, 3 or 4, default = 4 */
+  UInt32 mc;       /* 1 <= mc <= (1 << 30), default = 32 */
+  unsigned writeEndMark;  /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */
+  int numThreads;  /* 1 or 2, default = 2 */
+
+  UInt64 reduceSize; /* estimated size of data that will be compressed. default = (UInt64)(Int64)-1.
+                        Encoder uses this value to reduce dictionary size */
+} CLzmaEncProps;
+
+void LzmaEncProps_Init(CLzmaEncProps *p);
+void LzmaEncProps_Normalize(CLzmaEncProps *p);
+UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2);
+
+
+/* ---------- CLzmaEncHandle Interface ---------- */
+
+/* LzmaEnc* functions can return the following exit codes:
+SRes:
+  SZ_OK           - OK
+  SZ_ERROR_MEM    - Memory allocation error
+  SZ_ERROR_PARAM  - Incorrect paramater in props
+  SZ_ERROR_WRITE  - ISeqOutStream write callback error
+  SZ_ERROR_OUTPUT_EOF - output buffer overflow - version with (Byte *) output
+  SZ_ERROR_PROGRESS - some break from progress callback
+  SZ_ERROR_THREAD - error in multithreading functions (only for Mt version)
+*/
+
+typedef void * CLzmaEncHandle;
+
+CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc);
+void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig);
+
+SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps *props);
+void LzmaEnc_SetDataSize(CLzmaEncHandle p, UInt64 expectedDataSiize);
+SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte *properties, SizeT *size);
+unsigned LzmaEnc_IsWriteEndMark(CLzmaEncHandle p);
+
+SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream *outStream, ISeqInStream *inStream,
+    ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
+SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+    int writeEndMark, ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
+
+
+/* ---------- One Call Interface ---------- */
+
+SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+    const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark,
+    ICompressProgress *progress, ISzAllocPtr alloc, ISzAllocPtr allocBig);
+
+EXTERN_C_END
+
+#endif
diff --git a/deps/libchdr/deps/lzma-19.00/include/LzmaLib.h b/deps/libchdr/deps/lzma-19.00/include/LzmaLib.h
new file mode 100644
index 00000000..88fa87d3
--- /dev/null
+++ b/deps/libchdr/deps/lzma-19.00/include/LzmaLib.h
@@ -0,0 +1,131 @@
+/* LzmaLib.h -- LZMA library interface
+2013-01-18 : Igor Pavlov : Public domain */
+
+#ifndef __LZMA_LIB_H
+#define __LZMA_LIB_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+#define MY_STDAPI int MY_STD_CALL
+
+#define LZMA_PROPS_SIZE 5
+
+/*
+RAM requirements for LZMA:
+  for compression:   (dictSize * 11.5 + 6 MB) + state_size
+  for decompression: dictSize + state_size
+    state_size = (4 + (1.5 << (lc + lp))) KB
+    by default (lc=3, lp=0), state_size = 16 KB.
+
+LZMA properties (5 bytes) format
+    Offset Size  Description
+      0     1    lc, lp and pb in encoded form.
+      1     4    dictSize (little endian).
+*/
+
+/*
+LzmaCompress
+------------
+
+outPropsSize -
+     In:  the pointer to the size of outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5.
+     Out: the pointer to the size of written properties in outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5.
+
+  LZMA Encoder will use defult values for any parameter, if it is
+  -1  for any from: level, loc, lp, pb, fb, numThreads
+   0  for dictSize
+  
+level - compression level: 0 <= level <= 9;
+
+  level dictSize algo  fb
+    0:    16 KB   0    32
+    1:    64 KB   0    32
+    2:   256 KB   0    32
+    3:     1 MB   0    32
+    4:     4 MB   0    32
+    5:    16 MB   1    32
+    6:    32 MB   1    32
+    7+:   64 MB   1    64
+ 
+  The default value for "level" is 5.
+
+  algo = 0 means fast method
+  algo = 1 means normal method
+
+dictSize - The dictionary size in bytes. The maximum value is
+        128 MB = (1 << 27) bytes for 32-bit version
+          1 GB = (1 << 30) bytes for 64-bit version
+     The default value is 16 MB = (1 << 24) bytes.
+     It's recommended to use the dictionary that is larger than 4 KB and
+     that can be calculated as (1 << N) or (3 << N) sizes.
+
+lc - The number of literal context bits (high bits of previous literal).
+     It can be in the range from 0 to 8. The default value is 3.
+     Sometimes lc=4 gives the gain for big files.
+
+lp - The number of literal pos bits (low bits of current position for literals).
+     It can be in the range from 0 to 4. The default value is 0.
+     The lp switch is intended for periodical data when the period is equal to 2^lp.
+     For example, for 32-bit (4 bytes) periodical data you can use lp=2. Often it's
+     better to set lc=0, if you change lp switch.
+
+pb - The number of pos bits (low bits of current position).
+     It can be in the range from 0 to 4. The default value is 2.
+     The pb switch is intended for periodical data when the period is equal 2^pb.
+
+fb - Word size (the number of fast bytes).
+     It can be in the range from 5 to 273. The default value is 32.
+     Usually, a big number gives a little bit better compression ratio and
+     slower compression process.
+
+numThreads - The number of thereads. 1 or 2. The default value is 2.
+     Fast mode (algo = 0) can use only 1 thread.
+
+Out:
+  destLen  - processed output size
+Returns:
+  SZ_OK               - OK
+  SZ_ERROR_MEM        - Memory allocation error
+  SZ_ERROR_PARAM      - Incorrect paramater
+  SZ_ERROR_OUTPUT_EOF - output buffer overflow
+  SZ_ERROR_THREAD     - errors in multithreading functions (only for Mt version)
+*/
+
+MY_STDAPI LzmaCompress(unsigned char *dest, size_t *destLen, const unsigned char *src, size_t srcLen,
+  unsigned char *outProps, size_t *outPropsSize, /* *outPropsSize must be = 5 */
+  int level,      /* 0 <= level <= 9, default = 5 */
+  unsigned dictSize,  /* default = (1 << 24) */
+  int lc,        /* 0 <= lc <= 8, default = 3  */
+  int lp,        /* 0 <= lp <= 4, default = 0  */
+  int pb,        /* 0 <= pb <= 4, default = 2  */
+  int fb,        /* 5 <= fb <= 273, default = 32 */
+  int numThreads /* 1 or 2, default = 2 */
+  );
+
+/*
+LzmaUncompress
+--------------
+In:
+  dest     - output data
+  destLen  - output data size
+  src      - input data
+  srcLen   - input data size
+Out:
+  destLen  - processed output size
+  srcLen   - processed input size
+Returns:
+  SZ_OK                - OK
+  SZ_ERROR_DATA        - Data error
+  SZ_ERROR_MEM         - Memory allocation arror
+  SZ_ERROR_UNSUPPORTED - Unsupported properties
+  SZ_ERROR_INPUT_EOF   - it needs more bytes in input buffer (src)
+*/
+
+MY_STDAPI LzmaUncompress(unsigned char *dest, size_t *destLen, const unsigned char *src, SizeT *srcLen,
+  const unsigned char *props, size_t propsSize);
+
+EXTERN_C_END
+
+#endif
diff --git a/deps/libchdr/deps/lzma-19.00/include/Precomp.h b/deps/libchdr/deps/lzma-19.00/include/Precomp.h
new file mode 100644
index 00000000..e8ff8b40
--- /dev/null
+++ b/deps/libchdr/deps/lzma-19.00/include/Precomp.h
@@ -0,0 +1,10 @@
+/* Precomp.h -- StdAfx
+2013-11-12 : Igor Pavlov : Public domain */
+
+#ifndef __7Z_PRECOMP_H
+#define __7Z_PRECOMP_H
+
+#include "Compiler.h"
+/* #include "7zTypes.h" */
+
+#endif
diff --git a/deps/libchdr/deps/lzma-19.00/include/Sort.h b/deps/libchdr/deps/lzma-19.00/include/Sort.h
new file mode 100644
index 00000000..2e2963a2
--- /dev/null
+++ b/deps/libchdr/deps/lzma-19.00/include/Sort.h
@@ -0,0 +1,18 @@
+/* Sort.h -- Sort functions
+2014-04-05 : Igor Pavlov : Public domain */
+
+#ifndef __7Z_SORT_H
+#define __7Z_SORT_H
+
+#include "7zTypes.h"
+
+EXTERN_C_BEGIN
+
+void HeapSort(UInt32 *p, size_t size);
+void HeapSort64(UInt64 *p, size_t size);
+
+/* void HeapSortRef(UInt32 *p, UInt32 *vals, size_t size); */
+
+EXTERN_C_END
+
+#endif
diff --git a/deps/libchdr/deps/lzma-19.00/lzma-history.txt b/deps/libchdr/deps/lzma-19.00/lzma-history.txt
new file mode 100644
index 00000000..48ee7481
--- /dev/null
+++ b/deps/libchdr/deps/lzma-19.00/lzma-history.txt
@@ -0,0 +1,446 @@
+HISTORY of the LZMA SDK
+-----------------------
+
+19.00          2019-02-21
+-------------------------
+- Encryption strength for 7z archives was increased:
+  the size of random initialization vector was increased from 64-bit to 128-bit,
+  and the pseudo-random number generator was improved.
+- The bug in 7zIn.c code was fixed.
+
+
+18.06          2018-12-30
+-------------------------
+- The speed for LZMA/LZMA2 compressing was increased by 3-10%,
+  and there are minor changes in compression ratio.
+- Some bugs were fixed.
+- The bug in 7-Zip 18.02-18.05 was fixed:
+  There was memory leak in multithreading xz decoder - XzDecMt_Decode(),
+  if xz stream contains only one block.
+- The changes for MSVS compiler makefiles: 
+   - the makefiles now use "PLATFORM" macroname with values (x64, x86, arm64)
+     instead of "CPU" macroname with values (AMD64, ARM64).
+   - the makefiles by default now use static version of the run-time library.
+
+
+18.05          2018-04-30
+-------------------------
+- The speed for LZMA/LZMA2 compressing was increased 
+    by 8% for fastest/fast compression levels and 
+    by 3% for normal/maximum compression levels.
+- Previous versions of 7-Zip could work incorrectly in "Large memory pages" mode in
+  Windows 10 because of some BUG with "Large Pages" in Windows 10. 
+  Now 7-Zip doesn't use "Large Pages" on Windows 10 up to revision 1709 (16299).
+- The BUG was fixed in Lzma2Enc.c
+    Lzma2Enc_Encode2() function worked incorretly,
+      if (inStream == NULL) and the number of block threads is more than 1.
+
+
+18.03 beta     2018-03-04
+-------------------------
+- Asm\x86\LzmaDecOpt.asm: new optimized LZMA decoder written in asm 
+  for x64 with about 30% higher speed than main version of LZMA decoder written in C.
+- The speed for single-thread LZMA/LZMA2 decoder written in C was increased by 3%.
+- 7-Zip now can use multi-threading for 7z/LZMA2 decoding,
+  if there are multiple independent data chunks in LZMA2 stream.
+- 7-Zip now can use multi-threading for xz decoding,
+  if there are multiple blocks in xz stream.
+
+
+18.01          2019-01-28
+-------------------------
+- The BUG in 17.01 - 18.00 beta was fixed:
+  XzDec.c : random block unpacking and XzUnpacker_IsBlockFinished()
+  didn't work correctly for xz archives without checksum (CRC).
+
+
+18.00 beta     2019-01-10
+-------------------------
+- The BUG in xz encoder was fixed:
+  There was memory leak of 16 KB for each file compressed with 
+  xz compression method, if additional filter was used.
+
+
+17.01 beta     2017-08-28
+-------------------------
+- Minor speed optimization for LZMA2 (xz and 7z) multi-threading compression.
+  7-Zip now uses additional memory buffers for multi-block LZMA2 compression.
+  CPU utilization was slightly improved.
+- 7-zip now creates multi-block xz archives by default. Block size can be 
+  specified with -ms[Size]{m|g} switch.
+- xz decoder now can unpack random block from multi-block xz archives.
+- 7-Zip command line: @listfile now doesn't work after -- switch.
+  Use -i@listfile before -- switch instead.
+- The BUGs were fixed:
+  7-Zip 17.00 beta crashed for commands that write anti-item to 7z archive.
+
+
+17.00 beta     2017-04-29
+-------------------------
+- NewHandler.h / NewHandler.cpp: 
+    now it redefines operator new() only for old MSVC compilers (_MSC_VER < 1900).
+- C/7zTypes.h : the names of variables in interface structures were changed (vt).
+- Some bugs were fixed. 7-Zip could crash in some cases.
+- Some internal changes in code.
+
+
+16.04          2016-10-04
+-------------------------
+- The bug was fixed in DllSecur.c.
+
+
+16.03          2016-09-28
+-------------------------
+- SFX modules now use some protection against DLL preloading attack.
+- Some bugs in 7z code were fixed.
+
+
+16.02          2016-05-21
+-------------------------
+- The BUG in 16.00 - 16.01 was fixed:
+  Split Handler (SplitHandler.cpp) returned incorrect 
+  total size value (kpidSize) for split archives.
+
+
+16.01          2016-05-19
+-------------------------	
+- Some internal changes to reduce the number of compiler warnings.
+
+
+16.00          2016-05-10
+-------------------------	
+- Some bugs were fixed.
+
+
+15.12          2015-11-19
+-------------------------	
+- The BUG in C version of 7z decoder was fixed:
+  7zDec.c : SzDecodeLzma2()
+  7z decoder could mistakenly report about decoding error for some 7z archives
+  that use LZMA2 compression method.
+  The probability to get that mistaken decoding error report was about 
+  one error per 16384 solid blocks for solid blocks larger than 16 KB (compressed size). 
+- The BUG (in 9.26-15.11) in C version of 7z decoder was fixed:
+  7zArcIn.c : SzReadHeader2()
+  7z decoder worked incorrectly for 7z archives that contain 
+  empty solid blocks, that can be placed to 7z archive, if some file is 
+  unavailable for reading during archive creation.
+
+
+15.09 beta     2015-10-16
+-------------------------	
+- The BUG in LZMA / LZMA2 encoding code was fixed.
+  The BUG in LzFind.c::MatchFinder_ReadBlock() function.
+  If input data size is larger than (4 GiB - dictionary_size),
+  the following code worked incorrectly:
+  -  LZMA : LzmaEnc_MemEncode(), LzmaEncode() : LZMA encoding functions 
+     for compressing from memory to memory. 
+     That BUG is not related to LZMA encoder version that works via streams.
+  -  LZMA2 : multi-threaded version of LZMA2 encoder worked incorrectly, if 
+     default value of chunk size (CLzma2EncProps::blockSize) is changed 
+     to value larger than (4 GiB - dictionary_size).
+
+
+9.38 beta      2015-01-03
+-------------------------	
+- The BUG in 9.31-9.37 was fixed:
+  IArchiveGetRawProps interface was disabled for 7z archives.
+- The BUG in 9.26-9.36 was fixed:
+  Some code in CPP\7zip\Archive\7z\ worked correctly only under Windows.
+
+
+9.36 beta      2014-12-26
+-------------------------	
+- The BUG in command line version was fixed:
+  7-Zip created temporary archive in current folder during update archive
+  operation, if -w{Path} switch was not specified. 
+  The fixed 7-Zip creates temporary archive in folder that contains updated archive.
+- The BUG in 9.33-9.35 was fixed:
+  7-Zip silently ignored file reading errors during 7z or gz archive creation,
+  and the created archive contained only part of file that was read before error.
+  The fixed 7-Zip stops archive creation and it reports about error.
+
+
+9.35 beta      2014-12-07
+-------------------------	
+- 7zr.exe now support AES encryption.
+- SFX mudules were added to LZMA SDK
+- Some bugs were fixed.
+
+
+9.21 beta      2011-04-11
+-------------------------	
+- New class FString for file names at file systems.
+- Speed optimization in CRC code for big-endian CPUs.
+- The BUG in Lzma2Dec.c was fixed:
+    Lzma2Decode function didn't work.
+
+
+9.18 beta      2010-11-02
+-------------------------	
+- New small SFX module for installers (SfxSetup).
+
+
+9.12 beta      2010-03-24
+-------------------------
+- The BUG in LZMA SDK 9.* was fixed: LZMA2 codec didn't work,
+  if more than 10 threads were used (or more than 20 threads in some modes).
+
+
+9.11 beta      2010-03-15
+-------------------------
+- PPMd compression method support
+   
+
+9.09           2009-12-12
+-------------------------
+- The bug was fixed:
+   Utf16_To_Utf8 funstions in UTFConvert.cpp and 7zMain.c
+   incorrectly converted surrogate characters (the code >= 0x10000) to UTF-8.
+- Some bugs were fixed
+
+
+9.06           2009-08-17
+-------------------------
+- Some changes in ANSI-C 7z Decoder interfaces.
+
+
+9.04           2009-05-30
+-------------------------
+- LZMA2 compression method support
+- xz format support
+
+
+4.65           2009-02-03
+-------------------------
+- Some minor fixes
+
+
+4.63           2008-12-31
+-------------------------
+- Some minor fixes
+
+
+4.61 beta      2008-11-23
+-------------------------
+- The bug in ANSI-C LZMA Decoder was fixed:
+    If encoded stream was corrupted, decoder could access memory 
+    outside of allocated range.
+- Some changes in ANSI-C 7z Decoder interfaces.
+- LZMA SDK is placed in the public domain.
+
+
+4.60 beta      2008-08-19
+-------------------------
+- Some minor fixes.
+
+
+4.59 beta      2008-08-13
+-------------------------
+- The bug was fixed:
+    LZMA Encoder in fast compression mode could access memory outside of 
+    allocated range in some rare cases.
+
+
+4.58 beta      2008-05-05
+-------------------------
+- ANSI-C LZMA Decoder was rewritten for speed optimizations.
+- ANSI-C LZMA Encoder was included to LZMA SDK.
+- C++ LZMA code now is just wrapper over ANSI-C code.
+
+
+4.57           2007-12-12
+-------------------------
+- Speed optimizations in Ñ++ LZMA Decoder. 
+- Small changes for more compatibility with some C/C++ compilers.
+
+
+4.49 beta      2007-07-05
+-------------------------
+- .7z ANSI-C Decoder:
+     - now it supports BCJ and BCJ2 filters
+     - now it supports files larger than 4 GB.
+     - now it supports "Last Write Time" field for files.
+- C++ code for .7z archives compressing/decompressing from 7-zip 
+  was included to LZMA SDK.
+  
+
+4.43           2006-06-04
+-------------------------
+- Small changes for more compatibility with some C/C++ compilers.
+  
+
+4.42           2006-05-15
+-------------------------
+- Small changes in .h files in ANSI-C version.
+  
+
+4.39 beta      2006-04-14
+-------------------------
+- The bug in versions 4.33b:4.38b was fixed:
+  C++ version of LZMA encoder could not correctly compress 
+  files larger than 2 GB with HC4 match finder (-mfhc4).
+  
+
+4.37 beta      2005-04-06
+-------------------------
+- Fixes in C++ code: code could no be compiled if _NO_EXCEPTIONS was defined. 
+
+
+4.35 beta      2005-03-02
+-------------------------
+- The bug was fixed in C++ version of LZMA Decoder:
+    If encoded stream was corrupted, decoder could access memory 
+    outside of allocated range.
+
+
+4.34 beta      2006-02-27
+-------------------------
+- Compressing speed and memory requirements for compressing were increased
+- LZMA now can use only these match finders: HC4, BT2, BT3, BT4
+
+
+4.32           2005-12-09
+-------------------------
+- Java version of LZMA SDK was included
+
+
+4.30           2005-11-20
+-------------------------
+- Compression ratio was improved in -a2 mode
+- Speed optimizations for compressing in -a2 mode
+- -fb switch now supports values up to 273
+- The bug in 7z_C (7zIn.c) was fixed:
+  It used Alloc/Free functions from different memory pools.
+  So if program used two memory pools, it worked incorrectly.
+- 7z_C: .7z format supporting was improved
+- LZMA# SDK (C#.NET version) was included
+
+
+4.27 (Updated) 2005-09-21
+-------------------------
+- Some GUIDs/interfaces in C++ were changed.
+ IStream.h:
+   ISequentialInStream::Read now works as old ReadPart
+   ISequentialOutStream::Write now works as old WritePart
+
+
+4.27           2005-08-07
+-------------------------
+- The bug in LzmaDecodeSize.c was fixed:
+   if _LZMA_IN_CB and _LZMA_OUT_READ were defined,
+   decompressing worked incorrectly.
+
+
+4.26           2005-08-05
+-------------------------
+- Fixes in 7z_C code and LzmaTest.c:
+  previous versions could work incorrectly,
+  if malloc(0) returns 0
+
+
+4.23           2005-06-29
+-------------------------
+- Small fixes in C++ code
+
+
+4.22           2005-06-10
+-------------------------
+- Small fixes
+
+
+4.21           2005-06-08
+-------------------------
+- Interfaces for ANSI-C LZMA Decoder (LzmaDecode.c) were changed
+- New additional version of ANSI-C LZMA Decoder with zlib-like interface:
+    - LzmaStateDecode.h
+    - LzmaStateDecode.c
+    - LzmaStateTest.c
+- ANSI-C LZMA Decoder now can decompress files larger than 4 GB
+
+
+4.17           2005-04-18
+-------------------------
+- New example for RAM->RAM compressing/decompressing: 
+  LZMA + BCJ (filter for x86 code):
+    - LzmaRam.h
+    - LzmaRam.cpp
+    - LzmaRamDecode.h
+    - LzmaRamDecode.c
+    - -f86 switch for lzma.exe
+
+
+4.16           2005-03-29
+-------------------------
+- The bug was fixed in LzmaDecode.c (ANSI-C LZMA Decoder): 
+   If _LZMA_OUT_READ was defined, and if encoded stream was corrupted,
+   decoder could access memory outside of allocated range.
+- Speed optimization of ANSI-C LZMA Decoder (now it's about 20% faster).
+  Old version of LZMA Decoder now is in file LzmaDecodeSize.c. 
+  LzmaDecodeSize.c can provide slightly smaller code than LzmaDecode.c
+- Small speed optimization in LZMA C++ code
+- filter for SPARC's code was added
+- Simplified version of .7z ANSI-C Decoder was included
+
+
+4.06           2004-09-05
+-------------------------
+- The bug in v4.05 was fixed:
+    LZMA-Encoder didn't release output stream in some cases.
+
+
+4.05           2004-08-25
+-------------------------
+- Source code of filters for x86, IA-64, ARM, ARM-Thumb 
+  and PowerPC code was included to SDK
+- Some internal minor changes
+
+
+4.04           2004-07-28
+-------------------------
+- More compatibility with some C++ compilers
+
+
+4.03           2004-06-18
+-------------------------
+- "Benchmark" command was added. It measures compressing 
+  and decompressing speed and shows rating values. 
+  Also it checks hardware errors.
+
+
+4.02           2004-06-10
+-------------------------
+- C++ LZMA Encoder/Decoder code now is more portable
+  and it can be compiled by GCC on Linux.
+
+
+4.01           2004-02-15
+-------------------------
+- Some detection of data corruption was enabled.
+    LzmaDecode.c / RangeDecoderReadByte
+    .....
+    {
+      rd->ExtraBytes = 1;
+      return 0xFF;
+    }
+
+
+4.00           2004-02-13
+-------------------------
+- Original version of LZMA SDK
+
+
+
+HISTORY of the LZMA
+-------------------
+  2001-2008:  Improvements to LZMA compressing/decompressing code, 
+              keeping compatibility with original LZMA format
+  1996-2001:  Development of LZMA compression format
+
+  Some milestones:
+
+  2001-08-30: LZMA compression was added to 7-Zip
+  1999-01-02: First version of 7-Zip was released
+  
+
+End of document
diff --git a/deps/libchdr/deps/lzma-19.00/lzma.txt b/deps/libchdr/deps/lzma-19.00/lzma.txt
new file mode 100644
index 00000000..a65988fe
--- /dev/null
+++ b/deps/libchdr/deps/lzma-19.00/lzma.txt
@@ -0,0 +1,328 @@
+LZMA compression
+----------------
+Version: 9.35
+
+This file describes LZMA encoding and decoding functions written in C language.
+
+LZMA is an improved version of famous LZ77 compression algorithm. 
+It was improved in way of maximum increasing of compression ratio,
+keeping high decompression speed and low memory requirements for 
+decompressing.
+
+Note: you can read also LZMA Specification (lzma-specification.txt from LZMA SDK)
+
+Also you can look source code for LZMA encoding and decoding:
+  C/Util/Lzma/LzmaUtil.c
+
+
+LZMA compressed file format
+---------------------------
+Offset Size Description
+  0     1   Special LZMA properties (lc,lp, pb in encoded form)
+  1     4   Dictionary size (little endian)
+  5     8   Uncompressed size (little endian). -1 means unknown size
+ 13         Compressed data
+
+
+
+ANSI-C LZMA Decoder
+~~~~~~~~~~~~~~~~~~~
+
+Please note that interfaces for ANSI-C code were changed in LZMA SDK 4.58.
+If you want to use old interfaces you can download previous version of LZMA SDK
+from sourceforge.net site.
+
+To use ANSI-C LZMA Decoder you need the following files:
+1) LzmaDec.h + LzmaDec.c + 7zTypes.h + Precomp.h + Compiler.h
+
+Look example code:
+  C/Util/Lzma/LzmaUtil.c
+
+
+Memory requirements for LZMA decoding
+-------------------------------------
+
+Stack usage of LZMA decoding function for local variables is not 
+larger than 200-400 bytes.
+
+LZMA Decoder uses dictionary buffer and internal state structure.
+Internal state structure consumes
+  state_size = (4 + (1.5 << (lc + lp))) KB
+by default (lc=3, lp=0), state_size = 16 KB.
+
+
+How To decompress data
+----------------------
+
+LZMA Decoder (ANSI-C version) now supports 2 interfaces:
+1) Single-call Decompressing
+2) Multi-call State Decompressing (zlib-like interface)
+
+You must use external allocator:
+Example:
+void *SzAlloc(void *p, size_t size) { p = p; return malloc(size); }
+void SzFree(void *p, void *address) { p = p; free(address); }
+ISzAlloc alloc = { SzAlloc, SzFree };
+
+You can use p = p; operator to disable compiler warnings.
+
+
+Single-call Decompressing
+-------------------------
+When to use: RAM->RAM decompressing
+Compile files: LzmaDec.h + LzmaDec.c + 7zTypes.h
+Compile defines: no defines
+Memory Requirements:
+  - Input buffer: compressed size
+  - Output buffer: uncompressed size
+  - LZMA Internal Structures: state_size (16 KB for default settings) 
+
+Interface:
+  int LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+      const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode, 
+      ELzmaStatus *status, ISzAlloc *alloc);
+  In: 
+    dest     - output data
+    destLen  - output data size
+    src      - input data
+    srcLen   - input data size
+    propData - LZMA properties  (5 bytes)
+    propSize - size of propData buffer (5 bytes)
+    finishMode - It has meaning only if the decoding reaches output limit (*destLen).
+         LZMA_FINISH_ANY - Decode just destLen bytes.
+         LZMA_FINISH_END - Stream must be finished after (*destLen).
+                           You can use LZMA_FINISH_END, when you know that 
+                           current output buffer covers last bytes of stream. 
+    alloc    - Memory allocator.
+
+  Out: 
+    destLen  - processed output size 
+    srcLen   - processed input size 
+
+  Output:
+    SZ_OK
+      status:
+        LZMA_STATUS_FINISHED_WITH_MARK
+        LZMA_STATUS_NOT_FINISHED 
+        LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK
+    SZ_ERROR_DATA - Data error
+    SZ_ERROR_MEM  - Memory allocation error
+    SZ_ERROR_UNSUPPORTED - Unsupported properties
+    SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src).
+
+  If LZMA decoder sees end_marker before reaching output limit, it returns OK result,
+  and output value of destLen will be less than output buffer size limit.
+
+  You can use multiple checks to test data integrity after full decompression:
+    1) Check Result and "status" variable.
+    2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize.
+    3) Check that output(srcLen) = compressedSize, if you know real compressedSize. 
+       You must use correct finish mode in that case. */ 
+
+
+Multi-call State Decompressing (zlib-like interface)
+----------------------------------------------------
+
+When to use: file->file decompressing 
+Compile files: LzmaDec.h + LzmaDec.c + 7zTypes.h
+
+Memory Requirements:
+ - Buffer for input stream: any size (for example, 16 KB)
+ - Buffer for output stream: any size (for example, 16 KB)
+ - LZMA Internal Structures: state_size (16 KB for default settings) 
+ - LZMA dictionary (dictionary size is encoded in LZMA properties header)
+
+1) read LZMA properties (5 bytes) and uncompressed size (8 bytes, little-endian) to header:
+   unsigned char header[LZMA_PROPS_SIZE + 8];
+   ReadFile(inFile, header, sizeof(header)
+
+2) Allocate CLzmaDec structures (state + dictionary) using LZMA properties
+
+  CLzmaDec state;
+  LzmaDec_Constr(&state);
+  res = LzmaDec_Allocate(&state, header, LZMA_PROPS_SIZE, &g_Alloc);
+  if (res != SZ_OK)
+    return res;
+
+3) Init LzmaDec structure before any new LZMA stream. And call LzmaDec_DecodeToBuf in loop
+
+  LzmaDec_Init(&state);
+  for (;;)
+  {
+    ... 
+    int res = LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, 
+        const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode);
+    ...
+  }
+
+
+4) Free all allocated structures
+  LzmaDec_Free(&state, &g_Alloc);
+
+Look example code:
+  C/Util/Lzma/LzmaUtil.c
+
+
+How To compress data
+--------------------
+
+Compile files: 
+  7zTypes.h
+  Threads.h	
+  LzmaEnc.h
+  LzmaEnc.c
+  LzFind.h
+  LzFind.c
+  LzFindMt.h
+  LzFindMt.c
+  LzHash.h
+
+Memory Requirements:
+  - (dictSize * 11.5 + 6 MB) + state_size
+
+Lzma Encoder can use two memory allocators:
+1) alloc - for small arrays.
+2) allocBig - for big arrays.
+
+For example, you can use Large RAM Pages (2 MB) in allocBig allocator for 
+better compression speed. Note that Windows has bad implementation for 
+Large RAM Pages. 
+It's OK to use same allocator for alloc and allocBig.
+
+
+Single-call Compression with callbacks
+--------------------------------------
+
+Look example code:
+  C/Util/Lzma/LzmaUtil.c
+
+When to use: file->file compressing 
+
+1) you must implement callback structures for interfaces:
+ISeqInStream
+ISeqOutStream
+ICompressProgress
+ISzAlloc
+
+static void *SzAlloc(void *p, size_t size) { p = p; return MyAlloc(size); }
+static void SzFree(void *p, void *address) {  p = p; MyFree(address); }
+static ISzAlloc g_Alloc = { SzAlloc, SzFree };
+
+  CFileSeqInStream inStream;
+  CFileSeqOutStream outStream;
+
+  inStream.funcTable.Read = MyRead;
+  inStream.file = inFile;
+  outStream.funcTable.Write = MyWrite;
+  outStream.file = outFile;
+
+
+2) Create CLzmaEncHandle object;
+
+  CLzmaEncHandle enc;
+
+  enc = LzmaEnc_Create(&g_Alloc);
+  if (enc == 0)
+    return SZ_ERROR_MEM;
+
+
+3) initialize CLzmaEncProps properties;
+
+  LzmaEncProps_Init(&props);
+
+  Then you can change some properties in that structure.
+
+4) Send LZMA properties to LZMA Encoder
+
+  res = LzmaEnc_SetProps(enc, &props);
+
+5) Write encoded properties to header
+
+    Byte header[LZMA_PROPS_SIZE + 8];
+    size_t headerSize = LZMA_PROPS_SIZE;
+    UInt64 fileSize;
+    int i;
+
+    res = LzmaEnc_WriteProperties(enc, header, &headerSize);
+    fileSize = MyGetFileLength(inFile);
+    for (i = 0; i < 8; i++)
+      header[headerSize++] = (Byte)(fileSize >> (8 * i));
+    MyWriteFileAndCheck(outFile, header, headerSize)
+
+6) Call encoding function:
+      res = LzmaEnc_Encode(enc, &outStream.funcTable, &inStream.funcTable, 
+        NULL, &g_Alloc, &g_Alloc);
+
+7) Destroy LZMA Encoder Object
+  LzmaEnc_Destroy(enc, &g_Alloc, &g_Alloc);
+
+
+If callback function return some error code, LzmaEnc_Encode also returns that code
+or it can return the code like SZ_ERROR_READ, SZ_ERROR_WRITE or SZ_ERROR_PROGRESS.
+
+
+Single-call RAM->RAM Compression
+--------------------------------
+
+Single-call RAM->RAM Compression is similar to Compression with callbacks,
+but you provide pointers to buffers instead of pointers to stream callbacks:
+
+SRes LzmaEncode(Byte *dest, SizeT *destLen, const Byte *src, SizeT srcLen,
+    const CLzmaEncProps *props, Byte *propsEncoded, SizeT *propsSize, int writeEndMark, 
+    ICompressProgress *progress, ISzAlloc *alloc, ISzAlloc *allocBig);
+
+Return code:
+  SZ_OK               - OK
+  SZ_ERROR_MEM        - Memory allocation error 
+  SZ_ERROR_PARAM      - Incorrect paramater
+  SZ_ERROR_OUTPUT_EOF - output buffer overflow
+  SZ_ERROR_THREAD     - errors in multithreading functions (only for Mt version)
+
+
+
+Defines
+-------
+
+_LZMA_SIZE_OPT - Enable some optimizations in LZMA Decoder to get smaller executable code.
+
+_LZMA_PROB32   - It can increase the speed on some 32-bit CPUs, but memory usage for 
+                 some structures will be doubled in that case.
+
+_LZMA_UINT32_IS_ULONG  - Define it if int is 16-bit on your compiler and long is 32-bit.
+
+_LZMA_NO_SYSTEM_SIZE_T  - Define it if you don't want to use size_t type.
+
+
+_7ZIP_PPMD_SUPPPORT - Define it if you don't want to support PPMD method in AMSI-C .7z decoder.
+
+
+C++ LZMA Encoder/Decoder 
+~~~~~~~~~~~~~~~~~~~~~~~~
+C++ LZMA code use COM-like interfaces. So if you want to use it, 
+you can study basics of COM/OLE.
+C++ LZMA code is just wrapper over ANSI-C code.
+
+
+C++ Notes
+~~~~~~~~~~~~~~~~~~~~~~~~
+If you use some C++ code folders in 7-Zip (for example, C++ code for .7z handling),
+you must check that you correctly work with "new" operator.
+7-Zip can be compiled with MSVC 6.0 that doesn't throw "exception" from "new" operator.
+So 7-Zip uses "CPP\Common\NewHandler.cpp" that redefines "new" operator:
+operator new(size_t size)
+{
+  void *p = ::malloc(size);
+  if (p == 0)
+    throw CNewException();
+  return p;
+}
+If you use MSCV that throws exception for "new" operator, you can compile without 
+"NewHandler.cpp". So standard exception will be used. Actually some code of 
+7-Zip catches any exception in internal code and converts it to HRESULT code.
+So you don't need to catch CNewException, if you call COM interfaces of 7-Zip.
+
+---
+
+http://www.7-zip.org
+http://www.7-zip.org/sdk.html
+http://www.7-zip.org/support.html
diff --git a/deps/libchdr/deps/lzma-19.00/lzma.vcxproj b/deps/libchdr/deps/lzma-19.00/lzma.vcxproj
new file mode 100644
index 00000000..a4210356
--- /dev/null
+++ b/deps/libchdr/deps/lzma-19.00/lzma.vcxproj
@@ -0,0 +1,543 @@
+ï»¿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="DebugFast|ARM64">
+      <Configuration>DebugFast</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="DebugFast|Win32">
+      <Configuration>DebugFast</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="DebugFast|x64">
+      <Configuration>DebugFast</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|ARM64">
+      <Configuration>Debug</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="ReleaseLTCG|ARM64">
+      <Configuration>ReleaseLTCG</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="ReleaseLTCG|Win32">
+      <Configuration>ReleaseLTCG</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="ReleaseLTCG|x64">
+      <Configuration>ReleaseLTCG</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|ARM64">
+      <Configuration>Release</Configuration>
+      <Platform>ARM64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="src\Alloc.c" />
+    <ClCompile Include="src\Bra86.c" />
+    <ClCompile Include="src\BraIA64.c" />
+    <ClCompile Include="src\CpuArch.c" />
+    <ClCompile Include="src\Delta.c" />
+    <ClCompile Include="src\LzFind.c" />
+    <ClCompile Include="src\Lzma86Dec.c" />
+    <ClCompile Include="src\Lzma86Enc.c" />
+    <ClCompile Include="src\LzmaDec.c" />
+    <ClCompile Include="src\LzmaEnc.c" />
+    <ClCompile Include="src\LzmaLib.c" />
+    <ClCompile Include="src\Sort.c" />
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{DD944834-7899-4C1C-A4C1-064B5009D239}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>lzma</RootNamespace>
+    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>NotSet</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>NotSet</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>NotSet</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>NotSet</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>NotSet</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|ARM64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>NotSet</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>NotSet</CharacterSet>
+    <SpectreMitigation>false</SpectreMitigation>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>NotSet</CharacterSet>
+    <SpectreMitigation>false</SpectreMitigation>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>NotSet</CharacterSet>
+    <SpectreMitigation>false</SpectreMitigation>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>NotSet</CharacterSet>
+    <SpectreMitigation>false</SpectreMitigation>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>NotSet</CharacterSet>
+    <SpectreMitigation>false</SpectreMitigation>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|ARM64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>NotSet</CharacterSet>
+    <SpectreMitigation>false</SpectreMitigation>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|ARM64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
+    <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|x64'">
+    <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|ARM64'">
+    <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>
+    <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">
+    <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|x64'">
+    <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|ARM64'">
+    <IntDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(Platform)-$(Configuration)</TargetName>
+    <LinkIncremental>false</LinkIncremental>
+    <OutDir>$(SolutionDir)build\$(ProjectName)-$(Platform)-$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level2</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <MinimalRebuild>false</MinimalRebuild>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level2</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <MinimalRebuild>false</MinimalRebuild>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level2</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <MinimalRebuild>false</MinimalRebuild>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level2</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_7ZIP_ST;_ITERATOR_DEBUG_LEVEL=1;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUGFAST;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <MinimalRebuild>false</MinimalRebuild>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+      <SupportJustMyCode>false</SupportJustMyCode>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level2</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_7ZIP_ST;_ITERATOR_DEBUG_LEVEL=1;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUGFAST;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <MinimalRebuild>false</MinimalRebuild>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+      <SupportJustMyCode>false</SupportJustMyCode>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='DebugFast|ARM64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level2</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_7ZIP_ST;_ITERATOR_DEBUG_LEVEL=1;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUGFAST;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+      <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <BasicRuntimeChecks>Default</BasicRuntimeChecks>
+      <MinimalRebuild>false</MinimalRebuild>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+      <SupportJustMyCode>false</SupportJustMyCode>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level2</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>_7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <WholeProgramOptimization>false</WholeProgramOptimization>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|Win32'">
+    <ClCompile>
+      <WarningLevel>Level2</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>_7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+      <OmitFramePointers>true</OmitFramePointers>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level2</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>_7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <WholeProgramOptimization>false</WholeProgramOptimization>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">
+    <ClCompile>
+      <WarningLevel>Level2</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>_7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <WholeProgramOptimization>false</WholeProgramOptimization>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|x64'">
+    <ClCompile>
+      <WarningLevel>Level2</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>_7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+      <OmitFramePointers>true</OmitFramePointers>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseLTCG|ARM64'">
+    <ClCompile>
+      <WarningLevel>Level2</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>_7ZIP_ST;_CRT_NONSTDC_NO_WARNINGS;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>$(ProjectDir)include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+      <LanguageStandard>stdcpp17</LanguageStandard>
+      <OmitFramePointers>true</OmitFramePointers>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
+      <ConformanceMode>true</ConformanceMode>
+      <AdditionalOptions>/Zo /utf-8 %(AdditionalOptions)</AdditionalOptions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/deps/libchdr/deps/lzma-19.00/lzma.vcxproj.filters b/deps/libchdr/deps/lzma-19.00/lzma.vcxproj.filters
new file mode 100644
index 00000000..8725b394
--- /dev/null
+++ b/deps/libchdr/deps/lzma-19.00/lzma.vcxproj.filters
@@ -0,0 +1,17 @@
+ï»¿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <ClCompile Include="src\BraIA64.c" />
+    <ClCompile Include="src\CpuArch.c" />
+    <ClCompile Include="src\Delta.c" />
+    <ClCompile Include="src\LzFind.c" />
+    <ClCompile Include="src\Lzma86Dec.c" />
+    <ClCompile Include="src\Lzma86Enc.c" />
+    <ClCompile Include="src\LzmaDec.c" />
+    <ClCompile Include="src\LzmaEnc.c" />
+    <ClCompile Include="src\LzmaLib.c" />
+    <ClCompile Include="src\Sort.c" />
+    <ClCompile Include="src\Alloc.c" />
+    <ClCompile Include="src\Bra86.c" />
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/deps/libchdr/deps/lzma-19.00/src/Alloc.c b/deps/libchdr/deps/lzma-19.00/src/Alloc.c
new file mode 100644
index 00000000..bcede4b8
--- /dev/null
+++ b/deps/libchdr/deps/lzma-19.00/src/Alloc.c
@@ -0,0 +1,455 @@
+/* Alloc.c -- Memory allocation functions
+2018-04-27 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include <stdio.h>
+
+#ifdef _WIN32
+#include <windows.h>
+#endif
+#include <stdlib.h>
+
+#include "Alloc.h"
+
+/* #define _SZ_ALLOC_DEBUG */
+
+/* use _SZ_ALLOC_DEBUG to debug alloc/free operations */
+#ifdef _SZ_ALLOC_DEBUG
+
+#include <stdio.h>
+int g_allocCount = 0;
+int g_allocCountMid = 0;
+int g_allocCountBig = 0;
+
+
+#define CONVERT_INT_TO_STR(charType, tempSize) \
+  unsigned char temp[tempSize]; unsigned i = 0; \
+  while (val >= 10) { temp[i++] = (unsigned char)('0' + (unsigned)(val % 10)); val /= 10; } \
+  *s++ = (charType)('0' + (unsigned)val); \
+  while (i != 0) { i--; *s++ = temp[i]; } \
+  *s = 0;
+
+static void ConvertUInt64ToString(UInt64 val, char *s)
+{
+  CONVERT_INT_TO_STR(char, 24);
+}
+
+#define GET_HEX_CHAR(t) ((char)(((t < 10) ? ('0' + t) : ('A' + (t - 10)))))
+
+static void ConvertUInt64ToHex(UInt64 val, char *s)
+{
+  UInt64 v = val;
+  unsigned i;
+  for (i = 1;; i++)
+  {
+    v >>= 4;
+    if (v == 0)
+      break;
+  }
+  s[i] = 0;
+  do
+  {
+    unsigned t = (unsigned)(val & 0xF);
+    val >>= 4;
+    s[--i] = GET_HEX_CHAR(t);
+  }
+  while (i);
+}
+
+#define DEBUG_OUT_STREAM stderr
+
+static void Print(const char *s)
+{
+  fputs(s, DEBUG_OUT_STREAM);
+}
+
+static void PrintAligned(const char *s, size_t align)
+{
+  size_t len = strlen(s);
+  for(;;)
+  {
+    fputc(' ', DEBUG_OUT_STREAM);
+    if (len >= align)
+      break;
+    ++len;
+  }
+  Print(s);
+}
+
+static void PrintLn()
+{
+  Print("\n");
+}
+
+static void PrintHex(UInt64 v, size_t align)
+{
+  char s[32];
+  ConvertUInt64ToHex(v, s);
+  PrintAligned(s, align);
+}
+
+static void PrintDec(UInt64 v, size_t align)
+{
+  char s[32];
+  ConvertUInt64ToString(v, s);
+  PrintAligned(s, align);
+}
+
+static void PrintAddr(void *p)
+{
+  PrintHex((UInt64)(size_t)(ptrdiff_t)p, 12);
+}
+
+
+#define PRINT_ALLOC(name, cnt, size, ptr) \
+    Print(name " "); \
+    PrintDec(cnt++, 10); \
+    PrintHex(size, 10); \
+    PrintAddr(ptr); \
+    PrintLn();
+ 
+#define PRINT_FREE(name, cnt, ptr) if (ptr) { \
+    Print(name " "); \
+    PrintDec(--cnt, 10); \
+    PrintAddr(ptr); \
+    PrintLn(); }
+ 
+#else
+
+#define PRINT_ALLOC(name, cnt, size, ptr)
+#define PRINT_FREE(name, cnt, ptr)
+#define Print(s)
+#define PrintLn()
+#define PrintHex(v, align)
+#define PrintDec(v, align)
+#define PrintAddr(p)
+
+#endif
+
+
+
+void *MyAlloc(size_t size)
+{
+  if (size == 0)
+    return NULL;
+  #ifdef _SZ_ALLOC_DEBUG
+  {
+    void *p = malloc(size);
+    PRINT_ALLOC("Alloc    ", g_allocCount, size, p);
+    return p;
+  }
+  #else
+  return malloc(size);
+  #endif
+}
+
+void MyFree(void *address)
+{
+  PRINT_FREE("Free    ", g_allocCount, address);
+  
+  free(address);
+}
+
+#ifdef _WIN32
+
+void *MidAlloc(size_t size)
+{
+  if (size == 0)
+    return NULL;
+  
+  PRINT_ALLOC("Alloc-Mid", g_allocCountMid, size, NULL);
+  
+  return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
+}
+
+void MidFree(void *address)
+{
+  PRINT_FREE("Free-Mid", g_allocCountMid, address);
+
+  if (!address)
+    return;
+  VirtualFree(address, 0, MEM_RELEASE);
+}
+
+#ifndef MEM_LARGE_PAGES
+#undef _7ZIP_LARGE_PAGES
+#endif
+
+#ifdef _7ZIP_LARGE_PAGES
+SIZE_T g_LargePageSize = 0;
+typedef SIZE_T (WINAPI *GetLargePageMinimumP)();
+#endif
+
+void SetLargePageSize()
+{
+  #ifdef _7ZIP_LARGE_PAGES
+  SIZE_T size;
+  GetLargePageMinimumP largePageMinimum = (GetLargePageMinimumP)
+        GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum");
+  if (!largePageMinimum)
+    return;
+  size = largePageMinimum();
+  if (size == 0 || (size & (size - 1)) != 0)
+    return;
+  g_LargePageSize = size;
+  #endif
+}
+
+
+void *BigAlloc(size_t size)
+{
+  if (size == 0)
+    return NULL;
+
+  PRINT_ALLOC("Alloc-Big", g_allocCountBig, size, NULL);
+  
+  #ifdef _7ZIP_LARGE_PAGES
+  {
+    SIZE_T ps = g_LargePageSize;
+    if (ps != 0 && ps <= (1 << 30) && size > (ps / 2))
+    {
+      size_t size2;
+      ps--;
+      size2 = (size + ps) & ~ps;
+      if (size2 >= size)
+      {
+        void *res = VirtualAlloc(NULL, size2, MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE);
+        if (res)
+          return res;
+      }
+    }
+  }
+  #endif
+
+  return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
+}
+
+void BigFree(void *address)
+{
+  PRINT_FREE("Free-Big", g_allocCountBig, address);
+  
+  if (!address)
+    return;
+  VirtualFree(address, 0, MEM_RELEASE);
+}
+
+#endif
+
+
+static void *SzAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MyAlloc(size); }
+static void SzFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MyFree(address); }
+const ISzAlloc g_Alloc = { SzAlloc, SzFree };
+
+static void *SzMidAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return MidAlloc(size); }
+static void SzMidFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); MidFree(address); }
+const ISzAlloc g_MidAlloc = { SzMidAlloc, SzMidFree };
+
+static void *SzBigAlloc(ISzAllocPtr p, size_t size) { UNUSED_VAR(p); return BigAlloc(size); }
+static void SzBigFree(ISzAllocPtr p, void *address) { UNUSED_VAR(p); BigFree(address); }
+const ISzAlloc g_BigAlloc = { SzBigAlloc, SzBigFree };
+
+
+/*
+  uintptr_t : <stdint.h> C99 (optional)
+            : unsupported in VS6
+*/
+
+#ifdef _WIN32
+  typedef UINT_PTR UIntPtr;
+#else
+  /*
+  typedef uintptr_t UIntPtr;
+  */
+  typedef ptrdiff_t UIntPtr;
+#endif
+
+
+#define ADJUST_ALLOC_SIZE 0
+/*
+#define ADJUST_ALLOC_SIZE (sizeof(void *) - 1)
+*/
+/*
+  Use (ADJUST_ALLOC_SIZE = (sizeof(void *) - 1)), if
+     MyAlloc() can return address that is NOT multiple of sizeof(void *).
+*/
+
+
+/*
+#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((char *)(p) - ((size_t)(UIntPtr)(p) & ((align) - 1))))
+*/
+#define MY_ALIGN_PTR_DOWN(p, align) ((void *)((((UIntPtr)(p)) & ~((UIntPtr)(align) - 1))))
+
+#define MY_ALIGN_PTR_UP_PLUS(p, align) MY_ALIGN_PTR_DOWN(((char *)(p) + (align) + ADJUST_ALLOC_SIZE), align)
+
+
+#if (_POSIX_C_SOURCE >= 200112L) && !defined(_WIN32)
+  #define USE_posix_memalign
+#endif
+
+/*
+  This posix_memalign() is for test purposes only.
+  We also need special Free() function instead of free(),
+  if this posix_memalign() is used.
+*/
+
+/*
+static int posix_memalign(void **ptr, size_t align, size_t size)
+{
+  size_t newSize = size + align;
+  void *p;
+  void *pAligned;
+  *ptr = NULL;
+  if (newSize < size)
+    return 12; // ENOMEM
+  p = MyAlloc(newSize);
+  if (!p)
+    return 12; // ENOMEM
+  pAligned = MY_ALIGN_PTR_UP_PLUS(p, align);
+  ((void **)pAligned)[-1] = p;
+  *ptr = pAligned;
+  return 0;
+}
+*/
+
+/*
+  ALLOC_ALIGN_SIZE >= sizeof(void *)
+  ALLOC_ALIGN_SIZE >= cache_line_size
+*/
+
+#define ALLOC_ALIGN_SIZE ((size_t)1 << 7)
+
+static void *SzAlignedAlloc(ISzAllocPtr pp, size_t size)
+{
+  #ifndef USE_posix_memalign
+  
+  void *p;
+  void *pAligned;
+  size_t newSize;
+  UNUSED_VAR(pp);
+
+  /* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned
+     block to prevent cache line sharing with another allocated blocks */
+
+  newSize = size + ALLOC_ALIGN_SIZE * 1 + ADJUST_ALLOC_SIZE;
+  if (newSize < size)
+    return NULL;
+
+  p = MyAlloc(newSize);
+  
+  if (!p)
+    return NULL;
+  pAligned = MY_ALIGN_PTR_UP_PLUS(p, ALLOC_ALIGN_SIZE);
+
+  Print(" size="); PrintHex(size, 8);
+  Print(" a_size="); PrintHex(newSize, 8);
+  Print(" ptr="); PrintAddr(p);
+  Print(" a_ptr="); PrintAddr(pAligned);
+  PrintLn();
+
+  ((void **)pAligned)[-1] = p;
+
+  return pAligned;
+
+  #else
+
+  void *p;
+  UNUSED_VAR(pp);
+  if (posix_memalign(&p, ALLOC_ALIGN_SIZE, size))
+    return NULL;
+
+  Print(" posix_memalign="); PrintAddr(p);
+  PrintLn();
+
+  return p;
+
+  #endif
+}
+
+
+static void SzAlignedFree(ISzAllocPtr pp, void *address)
+{
+  UNUSED_VAR(pp);
+  #ifndef USE_posix_memalign
+  if (address)
+    MyFree(((void **)address)[-1]);
+  #else
+  free(address);
+  #endif
+}
+
+
+const ISzAlloc g_AlignedAlloc = { SzAlignedAlloc, SzAlignedFree };
+
+
+
+#define MY_ALIGN_PTR_DOWN_1(p) MY_ALIGN_PTR_DOWN(p, sizeof(void *))
+
+/* we align ptr to support cases where CAlignOffsetAlloc::offset is not multiply of sizeof(void *) */
+#define REAL_BLOCK_PTR_VAR(p) ((void **)MY_ALIGN_PTR_DOWN_1(p))[-1]
+/*
+#define REAL_BLOCK_PTR_VAR(p) ((void **)(p))[-1]
+*/
+
+static void *AlignOffsetAlloc_Alloc(ISzAllocPtr pp, size_t size)
+{
+  CAlignOffsetAlloc *p = CONTAINER_FROM_VTBL(pp, CAlignOffsetAlloc, vt);
+  void *adr;
+  void *pAligned;
+  size_t newSize;
+  size_t extra;
+  size_t alignSize = (size_t)1 << p->numAlignBits;
+
+  if (alignSize < sizeof(void *))
+    alignSize = sizeof(void *);
+  
+  if (p->offset >= alignSize)
+    return NULL;
+
+  /* also we can allocate additional dummy ALLOC_ALIGN_SIZE bytes after aligned
+     block to prevent cache line sharing with another allocated blocks */
+  extra = p->offset & (sizeof(void *) - 1);
+  newSize = size + alignSize + extra + ADJUST_ALLOC_SIZE;
+  if (newSize < size)
+    return NULL;
+
+  adr = ISzAlloc_Alloc(p->baseAlloc, newSize);
+  
+  if (!adr)
+    return NULL;
+
+  pAligned = (char *)MY_ALIGN_PTR_DOWN((char *)adr +
+      alignSize - p->offset + extra + ADJUST_ALLOC_SIZE, alignSize) + p->offset;
+
+  PrintLn();
+  Print("- Aligned: ");
+  Print(" size="); PrintHex(size, 8);
+  Print(" a_size="); PrintHex(newSize, 8);
+  Print(" ptr="); PrintAddr(adr);
+  Print(" a_ptr="); PrintAddr(pAligned);
+  PrintLn();
+
+  REAL_BLOCK_PTR_VAR(pAligned) = adr;
+
+  return pAligned;
+}
+
+
+static void AlignOffsetAlloc_Free(ISzAllocPtr pp, void *address)
+{
+  if (address)
+  {
+    CAlignOffsetAlloc *p = CONTAINER_FROM_VTBL(pp, CAlignOffsetAlloc, vt);
+    PrintLn();
+    Print("- Aligned Free: ");
+    PrintLn();
+    ISzAlloc_Free(p->baseAlloc, REAL_BLOCK_PTR_VAR(address));
+  }
+}
+
+
+void AlignOffsetAlloc_CreateVTable(CAlignOffsetAlloc *p)
+{
+  p->vt.Alloc = AlignOffsetAlloc_Alloc;
+  p->vt.Free = AlignOffsetAlloc_Free;
+}
diff --git a/deps/libchdr/deps/lzma-19.00/src/Bra86.c b/deps/libchdr/deps/lzma-19.00/src/Bra86.c
new file mode 100644
index 00000000..93ed4d76
--- /dev/null
+++ b/deps/libchdr/deps/lzma-19.00/src/Bra86.c
@@ -0,0 +1,82 @@
+/* Bra86.c -- Converter for x86 code (BCJ)
+2017-04-03 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "Bra.h"
+
+#define Test86MSByte(b) ((((b) + 1) & 0xFE) == 0)
+
+SizeT x86_Convert(Byte *data, SizeT size, UInt32 ip, UInt32 *state, int encoding)
+{
+  SizeT pos = 0;
+  UInt32 mask = *state & 7;
+  if (size < 5)
+    return 0;
+  size -= 4;
+  ip += 5;
+
+  for (;;)
+  {
+    Byte *p = data + pos;
+    const Byte *limit = data + size;
+    for (; p < limit; p++)
+      if ((*p & 0xFE) == 0xE8)
+        break;
+
+    {
+      SizeT d = (SizeT)(p - data - pos);
+      pos = (SizeT)(p - data);
+      if (p >= limit)
+      {
+        *state = (d > 2 ? 0 : mask >> (unsigned)d);
+        return pos;
+      }
+      if (d > 2)
+        mask = 0;
+      else
+      {
+        mask >>= (unsigned)d;
+        if (mask != 0 && (mask > 4 || mask == 3 || Test86MSByte(p[(size_t)(mask >> 1) + 1])))
+        {
+          mask = (mask >> 1) | 4;
+          pos++;
+          continue;
+        }
+      }
+    }
+
+    if (Test86MSByte(p[4]))
+    {
+      UInt32 v = ((UInt32)p[4] << 24) | ((UInt32)p[3] << 16) | ((UInt32)p[2] << 8) | ((UInt32)p[1]);
+      UInt32 cur = ip + (UInt32)pos;
+      pos += 5;
+      if (encoding)
+        v += cur;
+      else
+        v -= cur;
+      if (mask != 0)
+      {
+        unsigned sh = (mask & 6) << 2;
+        if (Test86MSByte((Byte)(v >> sh)))
+        {
+          v ^= (((UInt32)0x100 << sh) - 1);
+          if (encoding)
+            v += cur;
+          else
+            v -= cur;
+        }
+        mask = 0;
+      }
+      p[1] = (Byte)v;
+      p[2] = (Byte)(v >> 8);
+      p[3] = (Byte)(v >> 16);
+      p[4] = (Byte)(0 - ((v >> 24) & 1));
+    }
+    else
+    {
+      mask = (mask >> 1) | 4;
+      pos++;
+    }
+  }
+}
diff --git a/deps/libchdr/deps/lzma-19.00/src/BraIA64.c b/deps/libchdr/deps/lzma-19.00/src/BraIA64.c
new file mode 100644
index 00000000..d1dbc62c
--- /dev/null
+++ b/deps/libchdr/deps/lzma-19.00/src/BraIA64.c
@@ -0,0 +1,53 @@
+/* BraIA64.c -- Converter for IA-64 code
+2017-01-26 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "CpuArch.h"
+#include "Bra.h"
+
+SizeT IA64_Convert(Byte *data, SizeT size, UInt32 ip, int encoding)
+{
+  SizeT i;
+  if (size < 16)
+    return 0;
+  size -= 16;
+  i = 0;
+  do
+  {
+    unsigned m = ((UInt32)0x334B0000 >> (data[i] & 0x1E)) & 3;
+    if (m)
+    {
+      m++;
+      do
+      {
+        Byte *p = data + (i + (size_t)m * 5 - 8);
+        if (((p[3] >> m) & 15) == 5
+            && (((p[-1] | ((UInt32)p[0] << 8)) >> m) & 0x70) == 0)
+        {
+          unsigned raw = GetUi32(p);
+          unsigned v = raw >> m;
+          v = (v & 0xFFFFF) | ((v & (1 << 23)) >> 3);
+          
+          v <<= 4;
+          if (encoding)
+            v += ip + (UInt32)i;
+          else
+            v -= ip + (UInt32)i;
+          v >>= 4;
+          
+          v &= 0x1FFFFF;
+          v += 0x700000;
+          v &= 0x8FFFFF;
+          raw &= ~((UInt32)0x8FFFFF << m);
+          raw |= (v << m);
+          SetUi32(p, raw);
+        }
+      }
+      while (++m <= 4);
+    }
+    i += 16;
+  }
+  while (i <= size);
+  return i;
+}
diff --git a/deps/libchdr/deps/lzma-19.00/src/CpuArch.c b/deps/libchdr/deps/lzma-19.00/src/CpuArch.c
new file mode 100644
index 00000000..02e482e0
--- /dev/null
+++ b/deps/libchdr/deps/lzma-19.00/src/CpuArch.c
@@ -0,0 +1,218 @@
+/* CpuArch.c -- CPU specific code
+2018-02-18: Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "CpuArch.h"
+
+#ifdef MY_CPU_X86_OR_AMD64
+
+#if (defined(_MSC_VER) && !defined(MY_CPU_AMD64)) || defined(__GNUC__)
+#define USE_ASM
+#endif
+
+#if !defined(USE_ASM) && _MSC_VER >= 1500
+#include <intrin.h>
+#endif
+
+#if defined(USE_ASM) && !defined(MY_CPU_AMD64)
+static UInt32 CheckFlag(UInt32 flag)
+{
+  #ifdef _MSC_VER
+  __asm pushfd;
+  __asm pop EAX;
+  __asm mov EDX, EAX;
+  __asm xor EAX, flag;
+  __asm push EAX;
+  __asm popfd;
+  __asm pushfd;
+  __asm pop EAX;
+  __asm xor EAX, EDX;
+  __asm push EDX;
+  __asm popfd;
+  __asm and flag, EAX;
+  #else
+  __asm__ __volatile__ (
+    "pushf\n\t"
+    "pop  %%EAX\n\t"
+    "movl %%EAX,%%EDX\n\t"
+    "xorl %0,%%EAX\n\t"
+    "push %%EAX\n\t"
+    "popf\n\t"
+    "pushf\n\t"
+    "pop  %%EAX\n\t"
+    "xorl %%EDX,%%EAX\n\t"
+    "push %%EDX\n\t"
+    "popf\n\t"
+    "andl %%EAX, %0\n\t":
+    "=c" (flag) : "c" (flag) :
+    "%eax", "%edx");
+  #endif
+  return flag;
+}
+#define CHECK_CPUID_IS_SUPPORTED if (CheckFlag(1 << 18) == 0 || CheckFlag(1 << 21) == 0) return False;
+#else
+#define CHECK_CPUID_IS_SUPPORTED
+#endif
+
+void MyCPUID(UInt32 function, UInt32 *a, UInt32 *b, UInt32 *c, UInt32 *d)
+{
+  #ifdef USE_ASM
+
+  #ifdef _MSC_VER
+
+  UInt32 a2, b2, c2, d2;
+  __asm xor EBX, EBX;
+  __asm xor ECX, ECX;
+  __asm xor EDX, EDX;
+  __asm mov EAX, function;
+  __asm cpuid;
+  __asm mov a2, EAX;
+  __asm mov b2, EBX;
+  __asm mov c2, ECX;
+  __asm mov d2, EDX;
+
+  *a = a2;
+  *b = b2;
+  *c = c2;
+  *d = d2;
+
+  #else
+
+  __asm__ __volatile__ (
+  #if defined(MY_CPU_AMD64) && defined(__PIC__)
+    "mov %%rbx, %%rdi;"
+    "cpuid;"
+    "xchg %%rbx, %%rdi;"
+    : "=a" (*a) ,
+      "=D" (*b) ,
+  #elif defined(MY_CPU_X86) && defined(__PIC__)
+    "mov %%ebx, %%edi;"
+    "cpuid;"
+    "xchgl %%ebx, %%edi;"
+    : "=a" (*a) ,
+      "=D" (*b) ,
+  #else
+    "cpuid"
+    : "=a" (*a) ,
+      "=b" (*b) ,
+  #endif
+      "=c" (*c) ,
+      "=d" (*d)
+    : "0" (function)) ;
+
+  #endif
+  
+  #else
+
+  int CPUInfo[4];
+  __cpuid(CPUInfo, function);
+  *a = CPUInfo[0];
+  *b = CPUInfo[1];
+  *c = CPUInfo[2];
+  *d = CPUInfo[3];
+
+  #endif
+}
+
+BoolInt x86cpuid_CheckAndRead(Cx86cpuid *p)
+{
+  CHECK_CPUID_IS_SUPPORTED
+  MyCPUID(0, &p->maxFunc, &p->vendor[0], &p->vendor[2], &p->vendor[1]);
+  MyCPUID(1, &p->ver, &p->b, &p->c, &p->d);
+  return True;
+}
+
+static const UInt32 kVendors[][3] =
+{
+  { 0x756E6547, 0x49656E69, 0x6C65746E},
+  { 0x68747541, 0x69746E65, 0x444D4163},
+  { 0x746E6543, 0x48727561, 0x736C7561}
+};
+
+int x86cpuid_GetFirm(const Cx86cpuid *p)
+{
+  unsigned i;
+  for (i = 0; i < sizeof(kVendors) / sizeof(kVendors[i]); i++)
+  {
+    const UInt32 *v = kVendors[i];
+    if (v[0] == p->vendor[0] &&
+        v[1] == p->vendor[1] &&
+        v[2] == p->vendor[2])
+      return (int)i;
+  }
+  return -1;
+}
+
+BoolInt CPU_Is_InOrder()
+{
+  Cx86cpuid p;
+  int firm;
+  UInt32 family, model;
+  if (!x86cpuid_CheckAndRead(&p))
+    return True;
+
+  family = x86cpuid_GetFamily(p.ver);
+  model = x86cpuid_GetModel(p.ver);
+  
+  firm = x86cpuid_GetFirm(&p);
+
+  switch (firm)
+  {
+    case CPU_FIRM_INTEL: return (family < 6 || (family == 6 && (
+        /* In-Order Atom CPU */
+           model == 0x1C  /* 45 nm, N4xx, D4xx, N5xx, D5xx, 230, 330 */
+        || model == 0x26  /* 45 nm, Z6xx */
+        || model == 0x27  /* 32 nm, Z2460 */
+        || model == 0x35  /* 32 nm, Z2760 */
+        || model == 0x36  /* 32 nm, N2xxx, D2xxx */
+        )));
+    case CPU_FIRM_AMD: return (family < 5 || (family == 5 && (model < 6 || model == 0xA)));
+    case CPU_FIRM_VIA: return (family < 6 || (family == 6 && model < 0xF));
+  }
+  return True;
+}
+
+#if !defined(MY_CPU_AMD64) && defined(_WIN32)
+#include <windows.h>
+static BoolInt CPU_Sys_Is_SSE_Supported()
+{
+  OSVERSIONINFO vi;
+  vi.dwOSVersionInfoSize = sizeof(vi);
+  if (!GetVersionEx(&vi))
+    return False;
+  return (vi.dwMajorVersion >= 5);
+}
+#define CHECK_SYS_SSE_SUPPORT if (!CPU_Sys_Is_SSE_Supported()) return False;
+#else
+#define CHECK_SYS_SSE_SUPPORT
+#endif
+
+BoolInt CPU_Is_Aes_Supported()
+{
+  Cx86cpuid p;
+  CHECK_SYS_SSE_SUPPORT
+  if (!x86cpuid_CheckAndRead(&p))
+    return False;
+  return (p.c >> 25) & 1;
+}
+
+BoolInt CPU_IsSupported_PageGB()
+{
+  Cx86cpuid cpuid;
+  if (!x86cpuid_CheckAndRead(&cpuid))
+    return False;
+  {
+    UInt32 d[4] = { 0 };
+    MyCPUID(0x80000000, &d[0], &d[1], &d[2], &d[3]);
+    if (d[0] < 0x80000001)
+      return False;
+  }
+  {
+    UInt32 d[4] = { 0 };
+    MyCPUID(0x80000001, &d[0], &d[1], &d[2], &d[3]);
+    return (d[3] >> 26) & 1;
+  }
+}
+
+#endif
diff --git a/deps/libchdr/deps/lzma-19.00/src/Delta.c b/deps/libchdr/deps/lzma-19.00/src/Delta.c
new file mode 100644
index 00000000..e3edd21e
--- /dev/null
+++ b/deps/libchdr/deps/lzma-19.00/src/Delta.c
@@ -0,0 +1,64 @@
+/* Delta.c -- Delta converter
+2009-05-26 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "Delta.h"
+
+void Delta_Init(Byte *state)
+{
+  unsigned i;
+  for (i = 0; i < DELTA_STATE_SIZE; i++)
+    state[i] = 0;
+}
+
+static void MyMemCpy(Byte *dest, const Byte *src, unsigned size)
+{
+  unsigned i;
+  for (i = 0; i < size; i++)
+    dest[i] = src[i];
+}
+
+void Delta_Encode(Byte *state, unsigned delta, Byte *data, SizeT size)
+{
+  Byte buf[DELTA_STATE_SIZE];
+  unsigned j = 0;
+  MyMemCpy(buf, state, delta);
+  {
+    SizeT i;
+    for (i = 0; i < size;)
+    {
+      for (j = 0; j < delta && i < size; i++, j++)
+      {
+        Byte b = data[i];
+        data[i] = (Byte)(b - buf[j]);
+        buf[j] = b;
+      }
+    }
+  }
+  if (j == delta)
+    j = 0;
+  MyMemCpy(state, buf + j, delta - j);
+  MyMemCpy(state + delta - j, buf, j);
+}
+
+void Delta_Decode(Byte *state, unsigned delta, Byte *data, SizeT size)
+{
+  Byte buf[DELTA_STATE_SIZE];
+  unsigned j = 0;
+  MyMemCpy(buf, state, delta);
+  {
+    SizeT i;
+    for (i = 0; i < size;)
+    {
+      for (j = 0; j < delta && i < size; i++, j++)
+      {
+        buf[j] = data[i] = (Byte)(buf[j] + data[i]);
+      }
+    }
+  }
+  if (j == delta)
+    j = 0;
+  MyMemCpy(state, buf + j, delta - j);
+  MyMemCpy(state + delta - j, buf, j);
+}
diff --git a/deps/libchdr/deps/lzma-19.00/src/LzFind.c b/deps/libchdr/deps/lzma-19.00/src/LzFind.c
new file mode 100644
index 00000000..df55e86c
--- /dev/null
+++ b/deps/libchdr/deps/lzma-19.00/src/LzFind.c
@@ -0,0 +1,1127 @@
+/* LzFind.c -- Match finder for LZ algorithms
+2018-07-08 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include <string.h>
+
+#include "LzFind.h"
+#include "LzHash.h"
+
+#define kEmptyHashValue 0
+#define kMaxValForNormalize ((UInt32)0xFFFFFFFF)
+#define kNormalizeStepMin (1 << 10) /* it must be power of 2 */
+#define kNormalizeMask (~(UInt32)(kNormalizeStepMin - 1))
+#define kMaxHistorySize ((UInt32)7 << 29)
+
+#define kStartMaxLen 3
+
+static void LzInWindow_Free(CMatchFinder *p, ISzAllocPtr alloc)
+{
+  if (!p->directInput)
+  {
+    ISzAlloc_Free(alloc, p->bufferBase);
+    p->bufferBase = NULL;
+  }
+}
+
+/* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */
+
+static int LzInWindow_Create(CMatchFinder *p, UInt32 keepSizeReserv, ISzAllocPtr alloc)
+{
+  UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv;
+  if (p->directInput)
+  {
+    p->blockSize = blockSize;
+    return 1;
+  }
+  if (!p->bufferBase || p->blockSize != blockSize)
+  {
+    LzInWindow_Free(p, alloc);
+    p->blockSize = blockSize;
+    p->bufferBase = (Byte *)ISzAlloc_Alloc(alloc, (size_t)blockSize);
+  }
+  return (p->bufferBase != NULL);
+}
+
+Byte *MatchFinder_GetPointerToCurrentPos(CMatchFinder *p) { return p->buffer; }
+
+UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder *p) { return p->streamPos - p->pos; }
+
+void MatchFinder_ReduceOffsets(CMatchFinder *p, UInt32 subValue)
+{
+  p->posLimit -= subValue;
+  p->pos -= subValue;
+  p->streamPos -= subValue;
+}
+
+static void MatchFinder_ReadBlock(CMatchFinder *p)
+{
+  if (p->streamEndWasReached || p->result != SZ_OK)
+    return;
+
+  /* We use (p->streamPos - p->pos) value. (p->streamPos < p->pos) is allowed. */
+
+  if (p->directInput)
+  {
+    UInt32 curSize = 0xFFFFFFFF - (p->streamPos - p->pos);
+    if (curSize > p->directInputRem)
+      curSize = (UInt32)p->directInputRem;
+    p->directInputRem -= curSize;
+    p->streamPos += curSize;
+    if (p->directInputRem == 0)
+      p->streamEndWasReached = 1;
+    return;
+  }
+  
+  for (;;)
+  {
+    Byte *dest = p->buffer + (p->streamPos - p->pos);
+    size_t size = (p->bufferBase + p->blockSize - dest);
+    if (size == 0)
+      return;
+
+    p->result = ISeqInStream_Read(p->stream, dest, &size);
+    if (p->result != SZ_OK)
+      return;
+    if (size == 0)
+    {
+      p->streamEndWasReached = 1;
+      return;
+    }
+    p->streamPos += (UInt32)size;
+    if (p->streamPos - p->pos > p->keepSizeAfter)
+      return;
+  }
+}
+
+void MatchFinder_MoveBlock(CMatchFinder *p)
+{
+  memmove(p->bufferBase,
+      p->buffer - p->keepSizeBefore,
+      (size_t)(p->streamPos - p->pos) + p->keepSizeBefore);
+  p->buffer = p->bufferBase + p->keepSizeBefore;
+}
+
+int MatchFinder_NeedMove(CMatchFinder *p)
+{
+  if (p->directInput)
+    return 0;
+  /* if (p->streamEndWasReached) return 0; */
+  return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter);
+}
+
+void MatchFinder_ReadIfRequired(CMatchFinder *p)
+{
+  if (p->streamEndWasReached)
+    return;
+  if (p->keepSizeAfter >= p->streamPos - p->pos)
+    MatchFinder_ReadBlock(p);
+}
+
+static void MatchFinder_CheckAndMoveAndRead(CMatchFinder *p)
+{
+  if (MatchFinder_NeedMove(p))
+    MatchFinder_MoveBlock(p);
+  MatchFinder_ReadBlock(p);
+}
+
+static void MatchFinder_SetDefaultSettings(CMatchFinder *p)
+{
+  p->cutValue = 32;
+  p->btMode = 1;
+  p->numHashBytes = 4;
+  p->bigHash = 0;
+}
+
+#define kCrcPoly 0xEDB88320
+
+void MatchFinder_Construct(CMatchFinder *p)
+{
+  unsigned i;
+  p->bufferBase = NULL;
+  p->directInput = 0;
+  p->hash = NULL;
+  p->expectedDataSize = (UInt64)(Int64)-1;
+  MatchFinder_SetDefaultSettings(p);
+
+  for (i = 0; i < 256; i++)
+  {
+    UInt32 r = (UInt32)i;
+    unsigned j;
+    for (j = 0; j < 8; j++)
+      r = (r >> 1) ^ (kCrcPoly & ((UInt32)0 - (r & 1)));
+    p->crc[i] = r;
+  }
+}
+
+static void MatchFinder_FreeThisClassMemory(CMatchFinder *p, ISzAllocPtr alloc)
+{
+  ISzAlloc_Free(alloc, p->hash);
+  p->hash = NULL;
+}
+
+void MatchFinder_Free(CMatchFinder *p, ISzAllocPtr alloc)
+{
+  MatchFinder_FreeThisClassMemory(p, alloc);
+  LzInWindow_Free(p, alloc);
+}
+
+static CLzRef* AllocRefs(size_t num, ISzAllocPtr alloc)
+{
+  size_t sizeInBytes = (size_t)num * sizeof(CLzRef);
+  if (sizeInBytes / sizeof(CLzRef) != num)
+    return NULL;
+  return (CLzRef *)ISzAlloc_Alloc(alloc, sizeInBytes);
+}
+
+int MatchFinder_Create(CMatchFinder *p, UInt32 historySize,
+    UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter,
+    ISzAllocPtr alloc)
+{
+  UInt32 sizeReserv;
+  
+  if (historySize > kMaxHistorySize)
+  {
+    MatchFinder_Free(p, alloc);
+    return 0;
+  }
+  
+  sizeReserv = historySize >> 1;
+       if (historySize >= ((UInt32)3 << 30)) sizeReserv = historySize >> 3;
+  else if (historySize >= ((UInt32)2 << 30)) sizeReserv = historySize >> 2;
+  
+  sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19);
+
+  p->keepSizeBefore = historySize + keepAddBufferBefore + 1;
+  p->keepSizeAfter = matchMaxLen + keepAddBufferAfter;
+  
+  /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */
+  
+  if (LzInWindow_Create(p, sizeReserv, alloc))
+  {
+    UInt32 newCyclicBufferSize = historySize + 1;
+    UInt32 hs;
+    p->matchMaxLen = matchMaxLen;
+    {
+      p->fixedHashSize = 0;
+      if (p->numHashBytes == 2)
+        hs = (1 << 16) - 1;
+      else
+      {
+        hs = historySize;
+        if (hs > p->expectedDataSize)
+          hs = (UInt32)p->expectedDataSize;
+        if (hs != 0)
+          hs--;
+        hs |= (hs >> 1);
+        hs |= (hs >> 2);
+        hs |= (hs >> 4);
+        hs |= (hs >> 8);
+        hs >>= 1;
+        hs |= 0xFFFF; /* don't change it! It's required for Deflate */
+        if (hs > (1 << 24))
+        {
+          if (p->numHashBytes == 3)
+            hs = (1 << 24) - 1;
+          else
+            hs >>= 1;
+          /* if (bigHash) mode, GetHeads4b() in LzFindMt.c needs (hs >= ((1 << 24) - 1))) */
+        }
+      }
+      p->hashMask = hs;
+      hs++;
+      if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size;
+      if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size;
+      if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size;
+      hs += p->fixedHashSize;
+    }
+
+    {
+      size_t newSize;
+      size_t numSons;
+      p->historySize = historySize;
+      p->hashSizeSum = hs;
+      p->cyclicBufferSize = newCyclicBufferSize;
+      
+      numSons = newCyclicBufferSize;
+      if (p->btMode)
+        numSons <<= 1;
+      newSize = hs + numSons;
+
+      if (p->hash && p->numRefs == newSize)
+        return 1;
+      
+      MatchFinder_FreeThisClassMemory(p, alloc);
+      p->numRefs = newSize;
+      p->hash = AllocRefs(newSize, alloc);
+      
+      if (p->hash)
+      {
+        p->son = p->hash + p->hashSizeSum;
+        return 1;
+      }
+    }
+  }
+
+  MatchFinder_Free(p, alloc);
+  return 0;
+}
+
+static void MatchFinder_SetLimits(CMatchFinder *p)
+{
+  UInt32 limit = kMaxValForNormalize - p->pos;
+  UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos;
+  
+  if (limit2 < limit)
+    limit = limit2;
+  limit2 = p->streamPos - p->pos;
+  
+  if (limit2 <= p->keepSizeAfter)
+  {
+    if (limit2 > 0)
+      limit2 = 1;
+  }
+  else
+    limit2 -= p->keepSizeAfter;
+  
+  if (limit2 < limit)
+    limit = limit2;
+  
+  {
+    UInt32 lenLimit = p->streamPos - p->pos;
+    if (lenLimit > p->matchMaxLen)
+      lenLimit = p->matchMaxLen;
+    p->lenLimit = lenLimit;
+  }
+  p->posLimit = p->pos + limit;
+}
+
+
+void MatchFinder_Init_LowHash(CMatchFinder *p)
+{
+  size_t i;
+  CLzRef *items = p->hash;
+  size_t numItems = p->fixedHashSize;
+  for (i = 0; i < numItems; i++)
+    items[i] = kEmptyHashValue;
+}
+
+
+void MatchFinder_Init_HighHash(CMatchFinder *p)
+{
+  size_t i;
+  CLzRef *items = p->hash + p->fixedHashSize;
+  size_t numItems = (size_t)p->hashMask + 1;
+  for (i = 0; i < numItems; i++)
+    items[i] = kEmptyHashValue;
+}
+
+
+void MatchFinder_Init_3(CMatchFinder *p, int readData)
+{
+  p->cyclicBufferPos = 0;
+  p->buffer = p->bufferBase;
+  p->pos =
+  p->streamPos = p->cyclicBufferSize;
+  p->result = SZ_OK;
+  p->streamEndWasReached = 0;
+  
+  if (readData)
+    MatchFinder_ReadBlock(p);
+  
+  MatchFinder_SetLimits(p);
+}
+
+
+void MatchFinder_Init(CMatchFinder *p)
+{
+  MatchFinder_Init_HighHash(p);
+  MatchFinder_Init_LowHash(p);
+  MatchFinder_Init_3(p, True);
+}
+
+  
+static UInt32 MatchFinder_GetSubValue(CMatchFinder *p)
+{
+  return (p->pos - p->historySize - 1) & kNormalizeMask;
+}
+
+void MatchFinder_Normalize3(UInt32 subValue, CLzRef *items, size_t numItems)
+{
+  size_t i;
+  for (i = 0; i < numItems; i++)
+  {
+    UInt32 value = items[i];
+    if (value <= subValue)
+      value = kEmptyHashValue;
+    else
+      value -= subValue;
+    items[i] = value;
+  }
+}
+
+static void MatchFinder_Normalize(CMatchFinder *p)
+{
+  UInt32 subValue = MatchFinder_GetSubValue(p);
+  MatchFinder_Normalize3(subValue, p->hash, p->numRefs);
+  MatchFinder_ReduceOffsets(p, subValue);
+}
+
+
+MY_NO_INLINE
+static void MatchFinder_CheckLimits(CMatchFinder *p)
+{
+  if (p->pos == kMaxValForNormalize)
+    MatchFinder_Normalize(p);
+  if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos)
+    MatchFinder_CheckAndMoveAndRead(p);
+  if (p->cyclicBufferPos == p->cyclicBufferSize)
+    p->cyclicBufferPos = 0;
+  MatchFinder_SetLimits(p);
+}
+
+
+/*
+  (lenLimit > maxLen)
+*/
+MY_FORCE_INLINE
+static UInt32 * Hc_GetMatchesSpec(unsigned lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
+    UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
+    UInt32 *distances, unsigned maxLen)
+{
+  /*
+  son[_cyclicBufferPos] = curMatch;
+  for (;;)
+  {
+    UInt32 delta = pos - curMatch;
+    if (cutValue-- == 0 || delta >= _cyclicBufferSize)
+      return distances;
+    {
+      const Byte *pb = cur - delta;
+      curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
+      if (pb[maxLen] == cur[maxLen] && *pb == *cur)
+      {
+        UInt32 len = 0;
+        while (++len != lenLimit)
+          if (pb[len] != cur[len])
+            break;
+        if (maxLen < len)
+        {
+          maxLen = len;
+          *distances++ = len;
+          *distances++ = delta - 1;
+          if (len == lenLimit)
+            return distances;
+        }
+      }
+    }
+  }
+  */
+
+  const Byte *lim = cur + lenLimit;
+  son[_cyclicBufferPos] = curMatch;
+  do
+  {
+    UInt32 delta = pos - curMatch;
+    if (delta >= _cyclicBufferSize)
+      break;
+    {
+      ptrdiff_t diff;
+      curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)];
+      diff = (ptrdiff_t)0 - delta;
+      if (cur[maxLen] == cur[maxLen + diff])
+      {
+        const Byte *c = cur;
+        while (*c == c[diff])
+        {
+          if (++c == lim)
+          {
+            distances[0] = (UInt32)(lim - cur);
+            distances[1] = delta - 1;
+            return distances + 2;
+          }
+        }
+        {
+          unsigned len = (unsigned)(c - cur);
+          if (maxLen < len)
+          {
+            maxLen = len;
+            distances[0] = (UInt32)len;
+            distances[1] = delta - 1;
+            distances += 2;
+          }
+        }
+      }
+    }
+  }
+  while (--cutValue);
+  
+  return distances;
+}
+
+
+MY_FORCE_INLINE
+UInt32 * GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
+    UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue,
+    UInt32 *distances, UInt32 maxLen)
+{
+  CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
+  CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
+  unsigned len0 = 0, len1 = 0;
+  for (;;)
+  {
+    UInt32 delta = pos - curMatch;
+    if (cutValue-- == 0 || delta >= _cyclicBufferSize)
+    {
+      *ptr0 = *ptr1 = kEmptyHashValue;
+      return distances;
+    }
+    {
+      CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
+      const Byte *pb = cur - delta;
+      unsigned len = (len0 < len1 ? len0 : len1);
+      UInt32 pair0 = pair[0];
+      if (pb[len] == cur[len])
+      {
+        if (++len != lenLimit && pb[len] == cur[len])
+          while (++len != lenLimit)
+            if (pb[len] != cur[len])
+              break;
+        if (maxLen < len)
+        {
+          maxLen = (UInt32)len;
+          *distances++ = (UInt32)len;
+          *distances++ = delta - 1;
+          if (len == lenLimit)
+          {
+            *ptr1 = pair0;
+            *ptr0 = pair[1];
+            return distances;
+          }
+        }
+      }
+      if (pb[len] < cur[len])
+      {
+        *ptr1 = curMatch;
+        ptr1 = pair + 1;
+        curMatch = *ptr1;
+        len1 = len;
+      }
+      else
+      {
+        *ptr0 = curMatch;
+        ptr0 = pair;
+        curMatch = *ptr0;
+        len0 = len;
+      }
+    }
+  }
+}
+
+static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte *cur, CLzRef *son,
+    UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue)
+{
+  CLzRef *ptr0 = son + ((size_t)_cyclicBufferPos << 1) + 1;
+  CLzRef *ptr1 = son + ((size_t)_cyclicBufferPos << 1);
+  unsigned len0 = 0, len1 = 0;
+  for (;;)
+  {
+    UInt32 delta = pos - curMatch;
+    if (cutValue-- == 0 || delta >= _cyclicBufferSize)
+    {
+      *ptr0 = *ptr1 = kEmptyHashValue;
+      return;
+    }
+    {
+      CLzRef *pair = son + ((size_t)(_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1);
+      const Byte *pb = cur - delta;
+      unsigned len = (len0 < len1 ? len0 : len1);
+      if (pb[len] == cur[len])
+      {
+        while (++len != lenLimit)
+          if (pb[len] != cur[len])
+            break;
+        {
+          if (len == lenLimit)
+          {
+            *ptr1 = pair[0];
+            *ptr0 = pair[1];
+            return;
+          }
+        }
+      }
+      if (pb[len] < cur[len])
+      {
+        *ptr1 = curMatch;
+        ptr1 = pair + 1;
+        curMatch = *ptr1;
+        len1 = len;
+      }
+      else
+      {
+        *ptr0 = curMatch;
+        ptr0 = pair;
+        curMatch = *ptr0;
+        len0 = len;
+      }
+    }
+  }
+}
+
+#define MOVE_POS \
+  ++p->cyclicBufferPos; \
+  p->buffer++; \
+  if (++p->pos == p->posLimit) MatchFinder_CheckLimits(p);
+
+#define MOVE_POS_RET MOVE_POS return (UInt32)offset;
+
+static void MatchFinder_MovePos(CMatchFinder *p) { MOVE_POS; }
+
+#define GET_MATCHES_HEADER2(minLen, ret_op) \
+  unsigned lenLimit; UInt32 hv; const Byte *cur; UInt32 curMatch; \
+  lenLimit = (unsigned)p->lenLimit; { if (lenLimit < minLen) { MatchFinder_MovePos(p); ret_op; }} \
+  cur = p->buffer;
+
+#define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0)
+#define SKIP_HEADER(minLen)        GET_MATCHES_HEADER2(minLen, continue)
+
+#define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue
+
+#define GET_MATCHES_FOOTER(offset, maxLen) \
+  offset = (unsigned)(GetMatchesSpec1((UInt32)lenLimit, curMatch, MF_PARAMS(p), \
+  distances + offset, (UInt32)maxLen) - distances); MOVE_POS_RET;
+
+#define SKIP_FOOTER \
+  SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS;
+
+#define UPDATE_maxLen { \
+    ptrdiff_t diff = (ptrdiff_t)0 - d2; \
+    const Byte *c = cur + maxLen; \
+    const Byte *lim = cur + lenLimit; \
+    for (; c != lim; c++) if (*(c + diff) != *c) break; \
+    maxLen = (unsigned)(c - cur); }
+
+static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+  unsigned offset;
+  GET_MATCHES_HEADER(2)
+  HASH2_CALC;
+  curMatch = p->hash[hv];
+  p->hash[hv] = p->pos;
+  offset = 0;
+  GET_MATCHES_FOOTER(offset, 1)
+}
+
+UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+  unsigned offset;
+  GET_MATCHES_HEADER(3)
+  HASH_ZIP_CALC;
+  curMatch = p->hash[hv];
+  p->hash[hv] = p->pos;
+  offset = 0;
+  GET_MATCHES_FOOTER(offset, 2)
+}
+
+static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+  UInt32 h2, d2, pos;
+  unsigned maxLen, offset;
+  UInt32 *hash;
+  GET_MATCHES_HEADER(3)
+
+  HASH3_CALC;
+
+  hash = p->hash;
+  pos = p->pos;
+
+  d2 = pos - hash[h2];
+
+  curMatch = (hash + kFix3HashSize)[hv];
+  
+  hash[h2] = pos;
+  (hash + kFix3HashSize)[hv] = pos;
+
+  maxLen = 2;
+  offset = 0;
+
+  if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+  {
+    UPDATE_maxLen
+    distances[0] = (UInt32)maxLen;
+    distances[1] = d2 - 1;
+    offset = 2;
+    if (maxLen == lenLimit)
+    {
+      SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p));
+      MOVE_POS_RET;
+    }
+  }
+  
+  GET_MATCHES_FOOTER(offset, maxLen)
+}
+
+static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+  UInt32 h2, h3, d2, d3, pos;
+  unsigned maxLen, offset;
+  UInt32 *hash;
+  GET_MATCHES_HEADER(4)
+
+  HASH4_CALC;
+
+  hash = p->hash;
+  pos = p->pos;
+
+  d2 = pos - hash                  [h2];
+  d3 = pos - (hash + kFix3HashSize)[h3];
+
+  curMatch = (hash + kFix4HashSize)[hv];
+
+  hash                  [h2] = pos;
+  (hash + kFix3HashSize)[h3] = pos;
+  (hash + kFix4HashSize)[hv] = pos;
+
+  maxLen = 0;
+  offset = 0;
+  
+  if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+  {
+    maxLen = 2;
+    distances[0] = 2;
+    distances[1] = d2 - 1;
+    offset = 2;
+  }
+  
+  if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+  {
+    maxLen = 3;
+    distances[(size_t)offset + 1] = d3 - 1;
+    offset += 2;
+    d2 = d3;
+  }
+  
+  if (offset != 0)
+  {
+    UPDATE_maxLen
+    distances[(size_t)offset - 2] = (UInt32)maxLen;
+    if (maxLen == lenLimit)
+    {
+      SkipMatchesSpec((UInt32)lenLimit, curMatch, MF_PARAMS(p));
+      MOVE_POS_RET;
+    }
+  }
+  
+  if (maxLen < 3)
+    maxLen = 3;
+  
+  GET_MATCHES_FOOTER(offset, maxLen)
+}
+
+/*
+static UInt32 Bt5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+  UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos;
+  UInt32 *hash;
+  GET_MATCHES_HEADER(5)
+
+  HASH5_CALC;
+
+  hash = p->hash;
+  pos = p->pos;
+
+  d2 = pos - hash                  [h2];
+  d3 = pos - (hash + kFix3HashSize)[h3];
+  d4 = pos - (hash + kFix4HashSize)[h4];
+
+  curMatch = (hash + kFix5HashSize)[hv];
+
+  hash                  [h2] = pos;
+  (hash + kFix3HashSize)[h3] = pos;
+  (hash + kFix4HashSize)[h4] = pos;
+  (hash + kFix5HashSize)[hv] = pos;
+
+  maxLen = 0;
+  offset = 0;
+
+  if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+  {
+    distances[0] = maxLen = 2;
+    distances[1] = d2 - 1;
+    offset = 2;
+    if (*(cur - d2 + 2) == cur[2])
+      distances[0] = maxLen = 3;
+    else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+    {
+      distances[2] = maxLen = 3;
+      distances[3] = d3 - 1;
+      offset = 4;
+      d2 = d3;
+    }
+  }
+  else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+  {
+    distances[0] = maxLen = 3;
+    distances[1] = d3 - 1;
+    offset = 2;
+    d2 = d3;
+  }
+  
+  if (d2 != d4 && d4 < p->cyclicBufferSize
+      && *(cur - d4) == *cur
+      && *(cur - d4 + 3) == *(cur + 3))
+  {
+    maxLen = 4;
+    distances[(size_t)offset + 1] = d4 - 1;
+    offset += 2;
+    d2 = d4;
+  }
+  
+  if (offset != 0)
+  {
+    UPDATE_maxLen
+    distances[(size_t)offset - 2] = maxLen;
+    if (maxLen == lenLimit)
+    {
+      SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p));
+      MOVE_POS_RET;
+    }
+  }
+
+  if (maxLen < 4)
+    maxLen = 4;
+  
+  GET_MATCHES_FOOTER(offset, maxLen)
+}
+*/
+
+static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+  UInt32 h2, h3, d2, d3, pos;
+  unsigned maxLen, offset;
+  UInt32 *hash;
+  GET_MATCHES_HEADER(4)
+
+  HASH4_CALC;
+
+  hash = p->hash;
+  pos = p->pos;
+  
+  d2 = pos - hash                  [h2];
+  d3 = pos - (hash + kFix3HashSize)[h3];
+  curMatch = (hash + kFix4HashSize)[hv];
+
+  hash                  [h2] = pos;
+  (hash + kFix3HashSize)[h3] = pos;
+  (hash + kFix4HashSize)[hv] = pos;
+
+  maxLen = 0;
+  offset = 0;
+
+  if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+  {
+    maxLen = 2;
+    distances[0] = 2;
+    distances[1] = d2 - 1;
+    offset = 2;
+  }
+  
+  if (d2 != d3 && d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+  {
+    maxLen = 3;
+    distances[(size_t)offset + 1] = d3 - 1;
+    offset += 2;
+    d2 = d3;
+  }
+  
+  if (offset != 0)
+  {
+    UPDATE_maxLen
+    distances[(size_t)offset - 2] = (UInt32)maxLen;
+    if (maxLen == lenLimit)
+    {
+      p->son[p->cyclicBufferPos] = curMatch;
+      MOVE_POS_RET;
+    }
+  }
+  
+  if (maxLen < 3)
+    maxLen = 3;
+
+  offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
+      distances + offset, maxLen) - (distances));
+  MOVE_POS_RET
+}
+
+/*
+static UInt32 Hc5_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+  UInt32 h2, h3, h4, d2, d3, d4, maxLen, offset, pos
+  UInt32 *hash;
+  GET_MATCHES_HEADER(5)
+
+  HASH5_CALC;
+
+  hash = p->hash;
+  pos = p->pos;
+  
+  d2 = pos - hash                  [h2];
+  d3 = pos - (hash + kFix3HashSize)[h3];
+  d4 = pos - (hash + kFix4HashSize)[h4];
+
+  curMatch = (hash + kFix5HashSize)[hv];
+
+  hash                  [h2] = pos;
+  (hash + kFix3HashSize)[h3] = pos;
+  (hash + kFix4HashSize)[h4] = pos;
+  (hash + kFix5HashSize)[hv] = pos;
+
+  maxLen = 0;
+  offset = 0;
+
+  if (d2 < p->cyclicBufferSize && *(cur - d2) == *cur)
+  {
+    distances[0] = maxLen = 2;
+    distances[1] = d2 - 1;
+    offset = 2;
+    if (*(cur - d2 + 2) == cur[2])
+      distances[0] = maxLen = 3;
+    else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+    {
+      distances[2] = maxLen = 3;
+      distances[3] = d3 - 1;
+      offset = 4;
+      d2 = d3;
+    }
+  }
+  else if (d3 < p->cyclicBufferSize && *(cur - d3) == *cur)
+  {
+    distances[0] = maxLen = 3;
+    distances[1] = d3 - 1;
+    offset = 2;
+    d2 = d3;
+  }
+  
+  if (d2 != d4 && d4 < p->cyclicBufferSize
+      && *(cur - d4) == *cur
+      && *(cur - d4 + 3) == *(cur + 3))
+  {
+    maxLen = 4;
+    distances[(size_t)offset + 1] = d4 - 1;
+    offset += 2;
+    d2 = d4;
+  }
+  
+  if (offset != 0)
+  {
+    UPDATE_maxLen
+    distances[(size_t)offset - 2] = maxLen;
+    if (maxLen == lenLimit)
+    {
+      p->son[p->cyclicBufferPos] = curMatch;
+      MOVE_POS_RET;
+    }
+  }
+  
+  if (maxLen < 4)
+    maxLen = 4;
+
+  offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
+      distances + offset, maxLen) - (distances));
+  MOVE_POS_RET
+}
+*/
+
+UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder *p, UInt32 *distances)
+{
+  unsigned offset;
+  GET_MATCHES_HEADER(3)
+  HASH_ZIP_CALC;
+  curMatch = p->hash[hv];
+  p->hash[hv] = p->pos;
+  offset = (unsigned)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p),
+      distances, 2) - (distances));
+  MOVE_POS_RET
+}
+
+static void Bt2_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    SKIP_HEADER(2)
+    HASH2_CALC;
+    curMatch = p->hash[hv];
+    p->hash[hv] = p->pos;
+    SKIP_FOOTER
+  }
+  while (--num != 0);
+}
+
+void Bt3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    SKIP_HEADER(3)
+    HASH_ZIP_CALC;
+    curMatch = p->hash[hv];
+    p->hash[hv] = p->pos;
+    SKIP_FOOTER
+  }
+  while (--num != 0);
+}
+
+static void Bt3_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    UInt32 h2;
+    UInt32 *hash;
+    SKIP_HEADER(3)
+    HASH3_CALC;
+    hash = p->hash;
+    curMatch = (hash + kFix3HashSize)[hv];
+    hash[h2] =
+    (hash + kFix3HashSize)[hv] = p->pos;
+    SKIP_FOOTER
+  }
+  while (--num != 0);
+}
+
+static void Bt4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    UInt32 h2, h3;
+    UInt32 *hash;
+    SKIP_HEADER(4)
+    HASH4_CALC;
+    hash = p->hash;
+    curMatch = (hash + kFix4HashSize)[hv];
+    hash                  [h2] =
+    (hash + kFix3HashSize)[h3] =
+    (hash + kFix4HashSize)[hv] = p->pos;
+    SKIP_FOOTER
+  }
+  while (--num != 0);
+}
+
+/*
+static void Bt5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    UInt32 h2, h3, h4;
+    UInt32 *hash;
+    SKIP_HEADER(5)
+    HASH5_CALC;
+    hash = p->hash;
+    curMatch = (hash + kFix5HashSize)[hv];
+    hash                  [h2] =
+    (hash + kFix3HashSize)[h3] =
+    (hash + kFix4HashSize)[h4] =
+    (hash + kFix5HashSize)[hv] = p->pos;
+    SKIP_FOOTER
+  }
+  while (--num != 0);
+}
+*/
+
+static void Hc4_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    UInt32 h2, h3;
+    UInt32 *hash;
+    SKIP_HEADER(4)
+    HASH4_CALC;
+    hash = p->hash;
+    curMatch = (hash + kFix4HashSize)[hv];
+    hash                  [h2] =
+    (hash + kFix3HashSize)[h3] =
+    (hash + kFix4HashSize)[hv] = p->pos;
+    p->son[p->cyclicBufferPos] = curMatch;
+    MOVE_POS
+  }
+  while (--num != 0);
+}
+
+/*
+static void Hc5_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    UInt32 h2, h3, h4;
+    UInt32 *hash;
+    SKIP_HEADER(5)
+    HASH5_CALC;
+    hash = p->hash;
+    curMatch = hash + kFix5HashSize)[hv];
+    hash                  [h2] =
+    (hash + kFix3HashSize)[h3] =
+    (hash + kFix4HashSize)[h4] =
+    (hash + kFix5HashSize)[hv] = p->pos;
+    p->son[p->cyclicBufferPos] = curMatch;
+    MOVE_POS
+  }
+  while (--num != 0);
+}
+*/
+
+void Hc3Zip_MatchFinder_Skip(CMatchFinder *p, UInt32 num)
+{
+  do
+  {
+    SKIP_HEADER(3)
+    HASH_ZIP_CALC;
+    curMatch = p->hash[hv];
+    p->hash[hv] = p->pos;
+    p->son[p->cyclicBufferPos] = curMatch;
+    MOVE_POS
+  }
+  while (--num != 0);
+}
+
+void MatchFinder_CreateVTable(CMatchFinder *p, IMatchFinder *vTable)
+{
+  vTable->Init = (Mf_Init_Func)MatchFinder_Init;
+  vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes;
+  vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos;
+  if (!p->btMode)
+  {
+    /* if (p->numHashBytes <= 4) */
+    {
+      vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches;
+      vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip;
+    }
+    /*
+    else
+    {
+      vTable->GetMatches = (Mf_GetMatches_Func)Hc5_MatchFinder_GetMatches;
+      vTable->Skip = (Mf_Skip_Func)Hc5_MatchFinder_Skip;
+    }
+    */
+  }
+  else if (p->numHashBytes == 2)
+  {
+    vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches;
+    vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip;
+  }
+  else if (p->numHashBytes == 3)
+  {
+    vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches;
+    vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip;
+  }
+  else /* if (p->numHashBytes == 4) */
+  {
+    vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches;
+    vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip;
+  }
+  /*
+  else
+  {
+    vTable->GetMatches = (Mf_GetMatches_Func)Bt5_MatchFinder_GetMatches;
+    vTable->Skip = (Mf_Skip_Func)Bt5_MatchFinder_Skip;
+  }
+  */
+}
diff --git a/deps/libchdr/deps/lzma-19.00/src/Lzma86Dec.c b/deps/libchdr/deps/lzma-19.00/src/Lzma86Dec.c
new file mode 100644
index 00000000..21031745
--- /dev/null
+++ b/deps/libchdr/deps/lzma-19.00/src/Lzma86Dec.c
@@ -0,0 +1,54 @@
+/* Lzma86Dec.c -- LZMA + x86 (BCJ) Filter Decoder
+2016-05-16 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "Lzma86.h"
+
+#include "Alloc.h"
+#include "Bra.h"
+#include "LzmaDec.h"
+
+SRes Lzma86_GetUnpackSize(const Byte *src, SizeT srcLen, UInt64 *unpackSize)
+{
+  unsigned i;
+  if (srcLen < LZMA86_HEADER_SIZE)
+    return SZ_ERROR_INPUT_EOF;
+  *unpackSize = 0;
+  for (i = 0; i < sizeof(UInt64); i++)
+    *unpackSize += ((UInt64)src[LZMA86_SIZE_OFFSET + i]) << (8 * i);
+  return SZ_OK;
+}
+
+SRes Lzma86_Decode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen)
+{
+  SRes res;
+  int useFilter;
+  SizeT inSizePure;
+  ELzmaStatus status;
+
+  if (*srcLen < LZMA86_HEADER_SIZE)
+    return SZ_ERROR_INPUT_EOF;
+
+  useFilter = src[0];
+
+  if (useFilter > 1)
+  {
+    *destLen = 0;
+    return SZ_ERROR_UNSUPPORTED;
+  }
+
+  inSizePure = *srcLen - LZMA86_HEADER_SIZE;
+  res = LzmaDecode(dest, destLen, src + LZMA86_HEADER_SIZE, &inSizePure,
+      src + 1, LZMA_PROPS_SIZE, LZMA_FINISH_ANY, &status, &g_Alloc);
+  *srcLen = inSizePure + LZMA86_HEADER_SIZE;
+  if (res != SZ_OK)
+    return res;
+  if (useFilter == 1)
+  {
+    UInt32 x86State;
+    x86_Convert_Init(x86State);
+    x86_Convert(dest, *destLen, 0, &x86State, 0);
+  }
+  return SZ_OK;
+}
diff --git a/deps/libchdr/deps/lzma-19.00/src/LzmaDec.c b/deps/libchdr/deps/lzma-19.00/src/LzmaDec.c
new file mode 100644
index 00000000..ba3e1dd5
--- /dev/null
+++ b/deps/libchdr/deps/lzma-19.00/src/LzmaDec.c
@@ -0,0 +1,1185 @@
+/* LzmaDec.c -- LZMA Decoder
+2018-07-04 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include <string.h>
+
+/* #include "CpuArch.h" */
+#include "LzmaDec.h"
+
+#define kNumTopBits 24
+#define kTopValue ((UInt32)1 << kNumTopBits)
+
+#define kNumBitModelTotalBits 11
+#define kBitModelTotal (1 << kNumBitModelTotalBits)
+#define kNumMoveBits 5
+
+#define RC_INIT_SIZE 5
+
+#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); }
+
+#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
+#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
+#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits));
+#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \
+  { UPDATE_0(p); i = (i + i); A0; } else \
+  { UPDATE_1(p); i = (i + i) + 1; A1; }
+
+#define TREE_GET_BIT(probs, i) { GET_BIT2(probs + i, i, ;, ;); }
+
+#define REV_BIT(p, i, A0, A1) IF_BIT_0(p + i) \
+  { UPDATE_0(p + i); A0; } else \
+  { UPDATE_1(p + i); A1; }
+#define REV_BIT_VAR(  p, i, m) REV_BIT(p, i, i += m; m += m, m += m; i += m; )
+#define REV_BIT_CONST(p, i, m) REV_BIT(p, i, i += m;       , i += m * 2; )
+#define REV_BIT_LAST( p, i, m) REV_BIT(p, i, i -= m        , ; )
+
+#define TREE_DECODE(probs, limit, i) \
+  { i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; }
+
+/* #define _LZMA_SIZE_OPT */
+
+#ifdef _LZMA_SIZE_OPT
+#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i)
+#else
+#define TREE_6_DECODE(probs, i) \
+  { i = 1; \
+  TREE_GET_BIT(probs, i); \
+  TREE_GET_BIT(probs, i); \
+  TREE_GET_BIT(probs, i); \
+  TREE_GET_BIT(probs, i); \
+  TREE_GET_BIT(probs, i); \
+  TREE_GET_BIT(probs, i); \
+  i -= 0x40; }
+#endif
+
+#define NORMAL_LITER_DEC TREE_GET_BIT(prob, symbol)
+#define MATCHED_LITER_DEC \
+  matchByte += matchByte; \
+  bit = offs; \
+  offs &= matchByte; \
+  probLit = prob + (offs + bit + symbol); \
+  GET_BIT2(probLit, symbol, offs ^= bit; , ;)
+
+
+
+#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); }
+
+#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * (UInt32)ttt; if (code < bound)
+#define UPDATE_0_CHECK range = bound;
+#define UPDATE_1_CHECK range -= bound; code -= bound;
+#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \
+  { UPDATE_0_CHECK; i = (i + i); A0; } else \
+  { UPDATE_1_CHECK; i = (i + i) + 1; A1; }
+#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;)
+#define TREE_DECODE_CHECK(probs, limit, i) \
+  { i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; }
+
+
+#define REV_BIT_CHECK(p, i, m) IF_BIT_0_CHECK(p + i) \
+  { UPDATE_0_CHECK; i += m; m += m; } else \
+  { UPDATE_1_CHECK; m += m; i += m; }
+
+
+#define kNumPosBitsMax 4
+#define kNumPosStatesMax (1 << kNumPosBitsMax)
+
+#define kLenNumLowBits 3
+#define kLenNumLowSymbols (1 << kLenNumLowBits)
+#define kLenNumHighBits 8
+#define kLenNumHighSymbols (1 << kLenNumHighBits)
+
+#define LenLow 0
+#define LenHigh (LenLow + 2 * (kNumPosStatesMax << kLenNumLowBits))
+#define kNumLenProbs (LenHigh + kLenNumHighSymbols)
+
+#define LenChoice LenLow
+#define LenChoice2 (LenLow + (1 << kLenNumLowBits))
+
+#define kNumStates 12
+#define kNumStates2 16
+#define kNumLitStates 7
+
+#define kStartPosModelIndex 4
+#define kEndPosModelIndex 14
+#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
+
+#define kNumPosSlotBits 6
+#define kNumLenToPosStates 4
+
+#define kNumAlignBits 4
+#define kAlignTableSize (1 << kNumAlignBits)
+
+#define kMatchMinLen 2
+#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols)
+
+/* External ASM code needs same CLzmaProb array layout. So don't change it. */
+
+/* (probs_1664) is faster and better for code size at some platforms */
+/*
+#ifdef MY_CPU_X86_OR_AMD64
+*/
+#define kStartOffset 1664
+#define GET_PROBS p->probs_1664
+/*
+#define GET_PROBS p->probs + kStartOffset
+#else
+#define kStartOffset 0
+#define GET_PROBS p->probs
+#endif
+*/
+
+#define SpecPos (-kStartOffset)
+#define IsRep0Long (SpecPos + kNumFullDistances)
+#define RepLenCoder (IsRep0Long + (kNumStates2 << kNumPosBitsMax))
+#define LenCoder (RepLenCoder + kNumLenProbs)
+#define IsMatch (LenCoder + kNumLenProbs)
+#define Align (IsMatch + (kNumStates2 << kNumPosBitsMax))
+#define IsRep (Align + kAlignTableSize)
+#define IsRepG0 (IsRep + kNumStates)
+#define IsRepG1 (IsRepG0 + kNumStates)
+#define IsRepG2 (IsRepG1 + kNumStates)
+#define PosSlot (IsRepG2 + kNumStates)
+#define Literal (PosSlot + (kNumLenToPosStates << kNumPosSlotBits))
+#define NUM_BASE_PROBS (Literal + kStartOffset)
+
+#if Align != 0 && kStartOffset != 0
+  #error Stop_Compiling_Bad_LZMA_kAlign
+#endif
+
+#if NUM_BASE_PROBS != 1984
+  #error Stop_Compiling_Bad_LZMA_PROBS
+#endif
+
+
+#define LZMA_LIT_SIZE 0x300
+
+#define LzmaProps_GetNumProbs(p) (NUM_BASE_PROBS + ((UInt32)LZMA_LIT_SIZE << ((p)->lc + (p)->lp)))
+
+
+#define CALC_POS_STATE(processedPos, pbMask) (((processedPos) & (pbMask)) << 4)
+#define COMBINED_PS_STATE (posState + state)
+#define GET_LEN_STATE (posState)
+
+#define LZMA_DIC_MIN (1 << 12)
+
+/*
+p->remainLen : shows status of LZMA decoder:
+    < kMatchSpecLenStart : normal remain
+    = kMatchSpecLenStart : finished
+    = kMatchSpecLenStart + 1 : need init range coder
+    = kMatchSpecLenStart + 2 : need init range coder and state
+*/
+
+/* ---------- LZMA_DECODE_REAL ---------- */
+/*
+LzmaDec_DecodeReal_3() can be implemented in external ASM file.
+3 - is the code compatibility version of that function for check at link time.
+*/
+
+#define LZMA_DECODE_REAL LzmaDec_DecodeReal_3
+
+/*
+LZMA_DECODE_REAL()
+In:
+  RangeCoder is normalized
+  if (p->dicPos == limit)
+  {
+    LzmaDec_TryDummy() was called before to exclude LITERAL and MATCH-REP cases.
+    So first symbol can be only MATCH-NON-REP. And if that MATCH-NON-REP symbol
+    is not END_OF_PAYALOAD_MARKER, then function returns error code.
+  }
+
+Processing:
+  first LZMA symbol will be decoded in any case
+  All checks for limits are at the end of main loop,
+  It will decode new LZMA-symbols while (p->buf < bufLimit && dicPos < limit),
+  RangeCoder is still without last normalization when (p->buf < bufLimit) is being checked.
+
+Out:
+  RangeCoder is normalized
+  Result:
+    SZ_OK - OK
+    SZ_ERROR_DATA - Error
+  p->remainLen:
+    < kMatchSpecLenStart : normal remain
+    = kMatchSpecLenStart : finished
+*/
+
+
+#ifdef _LZMA_DEC_OPT
+
+int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit);
+
+#else
+
+static
+int MY_FAST_CALL LZMA_DECODE_REAL(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
+{
+  CLzmaProb *probs = GET_PROBS;
+  unsigned state = (unsigned)p->state;
+  UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3];
+  unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1;
+  unsigned lc = p->prop.lc;
+  unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc);
+
+  Byte *dic = p->dic;
+  SizeT dicBufSize = p->dicBufSize;
+  SizeT dicPos = p->dicPos;
+  
+  UInt32 processedPos = p->processedPos;
+  UInt32 checkDicSize = p->checkDicSize;
+  unsigned len = 0;
+
+  const Byte *buf = p->buf;
+  UInt32 range = p->range;
+  UInt32 code = p->code;
+
+  do
+  {
+    CLzmaProb *prob;
+    UInt32 bound;
+    unsigned ttt;
+    unsigned posState = CALC_POS_STATE(processedPos, pbMask);
+
+    prob = probs + IsMatch + COMBINED_PS_STATE;
+    IF_BIT_0(prob)
+    {
+      unsigned symbol;
+      UPDATE_0(prob);
+      prob = probs + Literal;
+      if (processedPos != 0 || checkDicSize != 0)
+        prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc);
+      processedPos++;
+
+      if (state < kNumLitStates)
+      {
+        state -= (state < 4) ? state : 3;
+        symbol = 1;
+        #ifdef _LZMA_SIZE_OPT
+        do { NORMAL_LITER_DEC } while (symbol < 0x100);
+        #else
+        NORMAL_LITER_DEC
+        NORMAL_LITER_DEC
+        NORMAL_LITER_DEC
+        NORMAL_LITER_DEC
+        NORMAL_LITER_DEC
+        NORMAL_LITER_DEC
+        NORMAL_LITER_DEC
+        NORMAL_LITER_DEC
+        #endif
+      }
+      else
+      {
+        unsigned matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
+        unsigned offs = 0x100;
+        state -= (state < 10) ? 3 : 6;
+        symbol = 1;
+        #ifdef _LZMA_SIZE_OPT
+        do
+        {
+          unsigned bit;
+          CLzmaProb *probLit;
+          MATCHED_LITER_DEC
+        }
+        while (symbol < 0x100);
+        #else
+        {
+          unsigned bit;
+          CLzmaProb *probLit;
+          MATCHED_LITER_DEC
+          MATCHED_LITER_DEC
+          MATCHED_LITER_DEC
+          MATCHED_LITER_DEC
+          MATCHED_LITER_DEC
+          MATCHED_LITER_DEC
+          MATCHED_LITER_DEC
+          MATCHED_LITER_DEC
+        }
+        #endif
+      }
+
+      dic[dicPos++] = (Byte)symbol;
+      continue;
+    }
+    
+    {
+      UPDATE_1(prob);
+      prob = probs + IsRep + state;
+      IF_BIT_0(prob)
+      {
+        UPDATE_0(prob);
+        state += kNumStates;
+        prob = probs + LenCoder;
+      }
+      else
+      {
+        UPDATE_1(prob);
+        /*
+        // that case was checked before with kBadRepCode
+        if (checkDicSize == 0 && processedPos == 0)
+          return SZ_ERROR_DATA;
+        */
+        prob = probs + IsRepG0 + state;
+        IF_BIT_0(prob)
+        {
+          UPDATE_0(prob);
+          prob = probs + IsRep0Long + COMBINED_PS_STATE;
+          IF_BIT_0(prob)
+          {
+            UPDATE_0(prob);
+            dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
+            dicPos++;
+            processedPos++;
+            state = state < kNumLitStates ? 9 : 11;
+            continue;
+          }
+          UPDATE_1(prob);
+        }
+        else
+        {
+          UInt32 distance;
+          UPDATE_1(prob);
+          prob = probs + IsRepG1 + state;
+          IF_BIT_0(prob)
+          {
+            UPDATE_0(prob);
+            distance = rep1;
+          }
+          else
+          {
+            UPDATE_1(prob);
+            prob = probs + IsRepG2 + state;
+            IF_BIT_0(prob)
+            {
+              UPDATE_0(prob);
+              distance = rep2;
+            }
+            else
+            {
+              UPDATE_1(prob);
+              distance = rep3;
+              rep3 = rep2;
+            }
+            rep2 = rep1;
+          }
+          rep1 = rep0;
+          rep0 = distance;
+        }
+        state = state < kNumLitStates ? 8 : 11;
+        prob = probs + RepLenCoder;
+      }
+      
+      #ifdef _LZMA_SIZE_OPT
+      {
+        unsigned lim, offset;
+        CLzmaProb *probLen = prob + LenChoice;
+        IF_BIT_0(probLen)
+        {
+          UPDATE_0(probLen);
+          probLen = prob + LenLow + GET_LEN_STATE;
+          offset = 0;
+          lim = (1 << kLenNumLowBits);
+        }
+        else
+        {
+          UPDATE_1(probLen);
+          probLen = prob + LenChoice2;
+          IF_BIT_0(probLen)
+          {
+            UPDATE_0(probLen);
+            probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
+            offset = kLenNumLowSymbols;
+            lim = (1 << kLenNumLowBits);
+          }
+          else
+          {
+            UPDATE_1(probLen);
+            probLen = prob + LenHigh;
+            offset = kLenNumLowSymbols * 2;
+            lim = (1 << kLenNumHighBits);
+          }
+        }
+        TREE_DECODE(probLen, lim, len);
+        len += offset;
+      }
+      #else
+      {
+        CLzmaProb *probLen = prob + LenChoice;
+        IF_BIT_0(probLen)
+        {
+          UPDATE_0(probLen);
+          probLen = prob + LenLow + GET_LEN_STATE;
+          len = 1;
+          TREE_GET_BIT(probLen, len);
+          TREE_GET_BIT(probLen, len);
+          TREE_GET_BIT(probLen, len);
+          len -= 8;
+        }
+        else
+        {
+          UPDATE_1(probLen);
+          probLen = prob + LenChoice2;
+          IF_BIT_0(probLen)
+          {
+            UPDATE_0(probLen);
+            probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
+            len = 1;
+            TREE_GET_BIT(probLen, len);
+            TREE_GET_BIT(probLen, len);
+            TREE_GET_BIT(probLen, len);
+          }
+          else
+          {
+            UPDATE_1(probLen);
+            probLen = prob + LenHigh;
+            TREE_DECODE(probLen, (1 << kLenNumHighBits), len);
+            len += kLenNumLowSymbols * 2;
+          }
+        }
+      }
+      #endif
+
+      if (state >= kNumStates)
+      {
+        UInt32 distance;
+        prob = probs + PosSlot +
+            ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);
+        TREE_6_DECODE(prob, distance);
+        if (distance >= kStartPosModelIndex)
+        {
+          unsigned posSlot = (unsigned)distance;
+          unsigned numDirectBits = (unsigned)(((distance >> 1) - 1));
+          distance = (2 | (distance & 1));
+          if (posSlot < kEndPosModelIndex)
+          {
+            distance <<= numDirectBits;
+            prob = probs + SpecPos;
+            {
+              UInt32 m = 1;
+              distance++;
+              do
+              {
+                REV_BIT_VAR(prob, distance, m);
+              }
+              while (--numDirectBits);
+              distance -= m;
+            }
+          }
+          else
+          {
+            numDirectBits -= kNumAlignBits;
+            do
+            {
+              NORMALIZE
+              range >>= 1;
+              
+              {
+                UInt32 t;
+                code -= range;
+                t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */
+                distance = (distance << 1) + (t + 1);
+                code += range & t;
+              }
+              /*
+              distance <<= 1;
+              if (code >= range)
+              {
+                code -= range;
+                distance |= 1;
+              }
+              */
+            }
+            while (--numDirectBits);
+            prob = probs + Align;
+            distance <<= kNumAlignBits;
+            {
+              unsigned i = 1;
+              REV_BIT_CONST(prob, i, 1);
+              REV_BIT_CONST(prob, i, 2);
+              REV_BIT_CONST(prob, i, 4);
+              REV_BIT_LAST (prob, i, 8);
+              distance |= i;
+            }
+            if (distance == (UInt32)0xFFFFFFFF)
+            {
+              len = kMatchSpecLenStart;
+              state -= kNumStates;
+              break;
+            }
+          }
+        }
+        
+        rep3 = rep2;
+        rep2 = rep1;
+        rep1 = rep0;
+        rep0 = distance + 1;
+        state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
+        if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize))
+        {
+          p->dicPos = dicPos;
+          return SZ_ERROR_DATA;
+        }
+      }
+
+      len += kMatchMinLen;
+
+      {
+        SizeT rem;
+        unsigned curLen;
+        SizeT pos;
+        
+        if ((rem = limit - dicPos) == 0)
+        {
+          p->dicPos = dicPos;
+          return SZ_ERROR_DATA;
+        }
+        
+        curLen = ((rem < len) ? (unsigned)rem : len);
+        pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0);
+
+        processedPos += (UInt32)curLen;
+
+        len -= curLen;
+        if (curLen <= dicBufSize - pos)
+        {
+          Byte *dest = dic + dicPos;
+          ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos;
+          const Byte *lim = dest + curLen;
+          dicPos += (SizeT)curLen;
+          do
+            *(dest) = (Byte)*(dest + src);
+          while (++dest != lim);
+        }
+        else
+        {
+          do
+          {
+            dic[dicPos++] = dic[pos];
+            if (++pos == dicBufSize)
+              pos = 0;
+          }
+          while (--curLen != 0);
+        }
+      }
+    }
+  }
+  while (dicPos < limit && buf < bufLimit);
+
+  NORMALIZE;
+  
+  p->buf = buf;
+  p->range = range;
+  p->code = code;
+  p->remainLen = (UInt32)len;
+  p->dicPos = dicPos;
+  p->processedPos = processedPos;
+  p->reps[0] = rep0;
+  p->reps[1] = rep1;
+  p->reps[2] = rep2;
+  p->reps[3] = rep3;
+  p->state = (UInt32)state;
+
+  return SZ_OK;
+}
+#endif
+
+static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec *p, SizeT limit)
+{
+  if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart)
+  {
+    Byte *dic = p->dic;
+    SizeT dicPos = p->dicPos;
+    SizeT dicBufSize = p->dicBufSize;
+    unsigned len = (unsigned)p->remainLen;
+    SizeT rep0 = p->reps[0]; /* we use SizeT to avoid the BUG of VC14 for AMD64 */
+    SizeT rem = limit - dicPos;
+    if (rem < len)
+      len = (unsigned)(rem);
+
+    if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len)
+      p->checkDicSize = p->prop.dicSize;
+
+    p->processedPos += (UInt32)len;
+    p->remainLen -= (UInt32)len;
+    while (len != 0)
+    {
+      len--;
+      dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
+      dicPos++;
+    }
+    p->dicPos = dicPos;
+  }
+}
+
+
+#define kRange0 0xFFFFFFFF
+#define kBound0 ((kRange0 >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1))
+#define kBadRepCode (kBound0 + (((kRange0 - kBound0) >> kNumBitModelTotalBits) << (kNumBitModelTotalBits - 1)))
+#if kBadRepCode != (0xC0000000 - 0x400)
+  #error Stop_Compiling_Bad_LZMA_Check
+#endif
+
+static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec *p, SizeT limit, const Byte *bufLimit)
+{
+  do
+  {
+    SizeT limit2 = limit;
+    if (p->checkDicSize == 0)
+    {
+      UInt32 rem = p->prop.dicSize - p->processedPos;
+      if (limit - p->dicPos > rem)
+        limit2 = p->dicPos + rem;
+
+      if (p->processedPos == 0)
+        if (p->code >= kBadRepCode)
+          return SZ_ERROR_DATA;
+    }
+
+    RINOK(LZMA_DECODE_REAL(p, limit2, bufLimit));
+    
+    if (p->checkDicSize == 0 && p->processedPos >= p->prop.dicSize)
+      p->checkDicSize = p->prop.dicSize;
+    
+    LzmaDec_WriteRem(p, limit);
+  }
+  while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart);
+
+  return 0;
+}
+
+typedef enum
+{
+  DUMMY_ERROR, /* unexpected end of input stream */
+  DUMMY_LIT,
+  DUMMY_MATCH,
+  DUMMY_REP
+} ELzmaDummy;
+
+static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const Byte *buf, SizeT inSize)
+{
+  UInt32 range = p->range;
+  UInt32 code = p->code;
+  const Byte *bufLimit = buf + inSize;
+  const CLzmaProb *probs = GET_PROBS;
+  unsigned state = (unsigned)p->state;
+  ELzmaDummy res;
+
+  {
+    const CLzmaProb *prob;
+    UInt32 bound;
+    unsigned ttt;
+    unsigned posState = CALC_POS_STATE(p->processedPos, (1 << p->prop.pb) - 1);
+
+    prob = probs + IsMatch + COMBINED_PS_STATE;
+    IF_BIT_0_CHECK(prob)
+    {
+      UPDATE_0_CHECK
+
+      /* if (bufLimit - buf >= 7) return DUMMY_LIT; */
+
+      prob = probs + Literal;
+      if (p->checkDicSize != 0 || p->processedPos != 0)
+        prob += ((UInt32)LZMA_LIT_SIZE *
+            ((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) +
+            (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc))));
+
+      if (state < kNumLitStates)
+      {
+        unsigned symbol = 1;
+        do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100);
+      }
+      else
+      {
+        unsigned matchByte = p->dic[p->dicPos - p->reps[0] +
+            (p->dicPos < p->reps[0] ? p->dicBufSize : 0)];
+        unsigned offs = 0x100;
+        unsigned symbol = 1;
+        do
+        {
+          unsigned bit;
+          const CLzmaProb *probLit;
+          matchByte += matchByte;
+          bit = offs;
+          offs &= matchByte;
+          probLit = prob + (offs + bit + symbol);
+          GET_BIT2_CHECK(probLit, symbol, offs ^= bit; , ; )
+        }
+        while (symbol < 0x100);
+      }
+      res = DUMMY_LIT;
+    }
+    else
+    {
+      unsigned len;
+      UPDATE_1_CHECK;
+
+      prob = probs + IsRep + state;
+      IF_BIT_0_CHECK(prob)
+      {
+        UPDATE_0_CHECK;
+        state = 0;
+        prob = probs + LenCoder;
+        res = DUMMY_MATCH;
+      }
+      else
+      {
+        UPDATE_1_CHECK;
+        res = DUMMY_REP;
+        prob = probs + IsRepG0 + state;
+        IF_BIT_0_CHECK(prob)
+        {
+          UPDATE_0_CHECK;
+          prob = probs + IsRep0Long + COMBINED_PS_STATE;
+          IF_BIT_0_CHECK(prob)
+          {
+            UPDATE_0_CHECK;
+            NORMALIZE_CHECK;
+            return DUMMY_REP;
+          }
+          else
+          {
+            UPDATE_1_CHECK;
+          }
+        }
+        else
+        {
+          UPDATE_1_CHECK;
+          prob = probs + IsRepG1 + state;
+          IF_BIT_0_CHECK(prob)
+          {
+            UPDATE_0_CHECK;
+          }
+          else
+          {
+            UPDATE_1_CHECK;
+            prob = probs + IsRepG2 + state;
+            IF_BIT_0_CHECK(prob)
+            {
+              UPDATE_0_CHECK;
+            }
+            else
+            {
+              UPDATE_1_CHECK;
+            }
+          }
+        }
+        state = kNumStates;
+        prob = probs + RepLenCoder;
+      }
+      {
+        unsigned limit, offset;
+        const CLzmaProb *probLen = prob + LenChoice;
+        IF_BIT_0_CHECK(probLen)
+        {
+          UPDATE_0_CHECK;
+          probLen = prob + LenLow + GET_LEN_STATE;
+          offset = 0;
+          limit = 1 << kLenNumLowBits;
+        }
+        else
+        {
+          UPDATE_1_CHECK;
+          probLen = prob + LenChoice2;
+          IF_BIT_0_CHECK(probLen)
+          {
+            UPDATE_0_CHECK;
+            probLen = prob + LenLow + GET_LEN_STATE + (1 << kLenNumLowBits);
+            offset = kLenNumLowSymbols;
+            limit = 1 << kLenNumLowBits;
+          }
+          else
+          {
+            UPDATE_1_CHECK;
+            probLen = prob + LenHigh;
+            offset = kLenNumLowSymbols * 2;
+            limit = 1 << kLenNumHighBits;
+          }
+        }
+        TREE_DECODE_CHECK(probLen, limit, len);
+        len += offset;
+      }
+
+      if (state < 4)
+      {
+        unsigned posSlot;
+        prob = probs + PosSlot +
+            ((len < kNumLenToPosStates - 1 ? len : kNumLenToPosStates - 1) <<
+            kNumPosSlotBits);
+        TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot);
+        if (posSlot >= kStartPosModelIndex)
+        {
+          unsigned numDirectBits = ((posSlot >> 1) - 1);
+
+          /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */
+
+          if (posSlot < kEndPosModelIndex)
+          {
+            prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits);
+          }
+          else
+          {
+            numDirectBits -= kNumAlignBits;
+            do
+            {
+              NORMALIZE_CHECK
+              range >>= 1;
+              code -= range & (((code - range) >> 31) - 1);
+              /* if (code >= range) code -= range; */
+            }
+            while (--numDirectBits);
+            prob = probs + Align;
+            numDirectBits = kNumAlignBits;
+          }
+          {
+            unsigned i = 1;
+            unsigned m = 1;
+            do
+            {
+              REV_BIT_CHECK(prob, i, m);
+            }
+            while (--numDirectBits);
+          }
+        }
+      }
+    }
+  }
+  NORMALIZE_CHECK;
+  return res;
+}
+
+
+void LzmaDec_InitDicAndState(CLzmaDec *p, BoolInt initDic, BoolInt initState)
+{
+  p->remainLen = kMatchSpecLenStart + 1;
+  p->tempBufSize = 0;
+
+  if (initDic)
+  {
+    p->processedPos = 0;
+    p->checkDicSize = 0;
+    p->remainLen = kMatchSpecLenStart + 2;
+  }
+  if (initState)
+    p->remainLen = kMatchSpecLenStart + 2;
+}
+
+void LzmaDec_Init(CLzmaDec *p)
+{
+  p->dicPos = 0;
+  LzmaDec_InitDicAndState(p, True, True);
+}
+
+
+SRes LzmaDec_DecodeToDic(CLzmaDec *p, SizeT dicLimit, const Byte *src, SizeT *srcLen,
+    ELzmaFinishMode finishMode, ELzmaStatus *status)
+{
+  SizeT inSize = *srcLen;
+  (*srcLen) = 0;
+  
+  *status = LZMA_STATUS_NOT_SPECIFIED;
+
+  if (p->remainLen > kMatchSpecLenStart)
+  {
+    for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--)
+      p->tempBuf[p->tempBufSize++] = *src++;
+    if (p->tempBufSize != 0 && p->tempBuf[0] != 0)
+      return SZ_ERROR_DATA;
+    if (p->tempBufSize < RC_INIT_SIZE)
+    {
+      *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+      return SZ_OK;
+    }
+    p->code =
+        ((UInt32)p->tempBuf[1] << 24)
+      | ((UInt32)p->tempBuf[2] << 16)
+      | ((UInt32)p->tempBuf[3] << 8)
+      | ((UInt32)p->tempBuf[4]);
+    p->range = 0xFFFFFFFF;
+    p->tempBufSize = 0;
+
+    if (p->remainLen > kMatchSpecLenStart + 1)
+    {
+      SizeT numProbs = LzmaProps_GetNumProbs(&p->prop);
+      SizeT i;
+      CLzmaProb *probs = p->probs;
+      for (i = 0; i < numProbs; i++)
+        probs[i] = kBitModelTotal >> 1;
+      p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1;
+      p->state = 0;
+    }
+
+    p->remainLen = 0;
+  }
+
+  LzmaDec_WriteRem(p, dicLimit);
+
+  while (p->remainLen != kMatchSpecLenStart)
+  {
+      int checkEndMarkNow = 0;
+
+      if (p->dicPos >= dicLimit)
+      {
+        if (p->remainLen == 0 && p->code == 0)
+        {
+          *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK;
+          return SZ_OK;
+        }
+        if (finishMode == LZMA_FINISH_ANY)
+        {
+          *status = LZMA_STATUS_NOT_FINISHED;
+          return SZ_OK;
+        }
+        if (p->remainLen != 0)
+        {
+          *status = LZMA_STATUS_NOT_FINISHED;
+          return SZ_ERROR_DATA;
+        }
+        checkEndMarkNow = 1;
+      }
+
+      if (p->tempBufSize == 0)
+      {
+        SizeT processed;
+        const Byte *bufLimit;
+        if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
+        {
+          int dummyRes = LzmaDec_TryDummy(p, src, inSize);
+          if (dummyRes == DUMMY_ERROR)
+          {
+            memcpy(p->tempBuf, src, inSize);
+            p->tempBufSize = (unsigned)inSize;
+            (*srcLen) += inSize;
+            *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+            return SZ_OK;
+          }
+          if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
+          {
+            *status = LZMA_STATUS_NOT_FINISHED;
+            return SZ_ERROR_DATA;
+          }
+          bufLimit = src;
+        }
+        else
+          bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX;
+        p->buf = src;
+        if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0)
+          return SZ_ERROR_DATA;
+        processed = (SizeT)(p->buf - src);
+        (*srcLen) += processed;
+        src += processed;
+        inSize -= processed;
+      }
+      else
+      {
+        unsigned rem = p->tempBufSize, lookAhead = 0;
+        while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize)
+          p->tempBuf[rem++] = src[lookAhead++];
+        p->tempBufSize = rem;
+        if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
+        {
+          int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, (SizeT)rem);
+          if (dummyRes == DUMMY_ERROR)
+          {
+            (*srcLen) += (SizeT)lookAhead;
+            *status = LZMA_STATUS_NEEDS_MORE_INPUT;
+            return SZ_OK;
+          }
+          if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
+          {
+            *status = LZMA_STATUS_NOT_FINISHED;
+            return SZ_ERROR_DATA;
+          }
+        }
+        p->buf = p->tempBuf;
+        if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0)
+          return SZ_ERROR_DATA;
+        
+        {
+          unsigned kkk = (unsigned)(p->buf - p->tempBuf);
+          if (rem < kkk)
+            return SZ_ERROR_FAIL; /* some internal error */
+          rem -= kkk;
+          if (lookAhead < rem)
+            return SZ_ERROR_FAIL; /* some internal error */
+          lookAhead -= rem;
+        }
+        (*srcLen) += (SizeT)lookAhead;
+        src += lookAhead;
+        inSize -= (SizeT)lookAhead;
+        p->tempBufSize = 0;
+      }
+  }
+  
+  if (p->code != 0)
+    return SZ_ERROR_DATA;
+  *status = LZMA_STATUS_FINISHED_WITH_MARK;
+  return SZ_OK;
+}
+
+
+SRes LzmaDec_DecodeToBuf(CLzmaDec *p, Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status)
+{
+  SizeT outSize = *destLen;
+  SizeT inSize = *srcLen;
+  *srcLen = *destLen = 0;
+  for (;;)
+  {
+    SizeT inSizeCur = inSize, outSizeCur, dicPos;
+    ELzmaFinishMode curFinishMode;
+    SRes res;
+    if (p->dicPos == p->dicBufSize)
+      p->dicPos = 0;
+    dicPos = p->dicPos;
+    if (outSize > p->dicBufSize - dicPos)
+    {
+      outSizeCur = p->dicBufSize;
+      curFinishMode = LZMA_FINISH_ANY;
+    }
+    else
+    {
+      outSizeCur = dicPos + outSize;
+      curFinishMode = finishMode;
+    }
+
+    res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status);
+    src += inSizeCur;
+    inSize -= inSizeCur;
+    *srcLen += inSizeCur;
+    outSizeCur = p->dicPos - dicPos;
+    memcpy(dest, p->dic + dicPos, outSizeCur);
+    dest += outSizeCur;
+    outSize -= outSizeCur;
+    *destLen += outSizeCur;
+    if (res != 0)
+      return res;
+    if (outSizeCur == 0 || outSize == 0)
+      return SZ_OK;
+  }
+}
+
+void LzmaDec_FreeProbs(CLzmaDec *p, ISzAllocPtr alloc)
+{
+  ISzAlloc_Free(alloc, p->probs);
+  p->probs = NULL;
+}
+
+static void LzmaDec_FreeDict(CLzmaDec *p, ISzAllocPtr alloc)
+{
+  ISzAlloc_Free(alloc, p->dic);
+  p->dic = NULL;
+}
+
+void LzmaDec_Free(CLzmaDec *p, ISzAllocPtr alloc)
+{
+  LzmaDec_FreeProbs(p, alloc);
+  LzmaDec_FreeDict(p, alloc);
+}
+
+SRes LzmaProps_Decode(CLzmaProps *p, const Byte *data, unsigned size)
+{
+  UInt32 dicSize;
+  Byte d;
+  
+  if (size < LZMA_PROPS_SIZE)
+    return SZ_ERROR_UNSUPPORTED;
+  else
+    dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24);
+ 
+  if (dicSize < LZMA_DIC_MIN)
+    dicSize = LZMA_DIC_MIN;
+  p->dicSize = dicSize;
+
+  d = data[0];
+  if (d >= (9 * 5 * 5))
+    return SZ_ERROR_UNSUPPORTED;
+
+  p->lc = (Byte)(d % 9);
+  d /= 9;
+  p->pb = (Byte)(d / 5);
+  p->lp = (Byte)(d % 5);
+
+  return SZ_OK;
+}
+
+static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAllocPtr alloc)
+{
+  UInt32 numProbs = LzmaProps_GetNumProbs(propNew);
+  if (!p->probs || numProbs != p->numProbs)
+  {
+    LzmaDec_FreeProbs(p, alloc);
+    p->probs = (CLzmaProb *)ISzAlloc_Alloc(alloc, numProbs * sizeof(CLzmaProb));
+    if (!p->probs)
+      return SZ_ERROR_MEM;
+    p->probs_1664 = p->probs + 1664;
+    p->numProbs = numProbs;
+  }
+  return SZ_OK;
+}
+
+SRes LzmaDec_AllocateProbs(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc)
+{
+  CLzmaProps propNew;
+  RINOK(LzmaProps_Decode(&propNew, props, propsSize));
+  RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
+  p->prop = propNew;
+  return SZ_OK;
+}
+
+SRes LzmaDec_Allocate(CLzmaDec *p, const Byte *props, unsigned propsSize, ISzAllocPtr alloc)
+{
+  CLzmaProps propNew;
+  SizeT dicBufSize;
+  RINOK(LzmaProps_Decode(&propNew, props, propsSize));
+  RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
+
+  {
+    UInt32 dictSize = propNew.dicSize;
+    SizeT mask = ((UInt32)1 << 12) - 1;
+         if (dictSize >= ((UInt32)1 << 30)) mask = ((UInt32)1 << 22) - 1;
+    else if (dictSize >= ((UInt32)1 << 22)) mask = ((UInt32)1 << 20) - 1;;
+    dicBufSize = ((SizeT)dictSize + mask) & ~mask;
+    if (dicBufSize < dictSize)
+      dicBufSize = dictSize;
+  }
+
+  if (!p->dic || dicBufSize != p->dicBufSize)
+  {
+    LzmaDec_FreeDict(p, alloc);
+    p->dic = (Byte *)ISzAlloc_Alloc(alloc, dicBufSize);
+    if (!p->dic)
+    {
+      LzmaDec_FreeProbs(p, alloc);
+      return SZ_ERROR_MEM;
+    }
+  }
+  p->dicBufSize = dicBufSize;
+  p->prop = propNew;
+  return SZ_OK;
+}
+
+SRes LzmaDecode(Byte *dest, SizeT *destLen, const Byte *src, SizeT *srcLen,
+    const Byte *propData, unsigned propSize, ELzmaFinishMode finishMode,
+    ELzmaStatus *status, ISzAllocPtr alloc)
+{
+  CLzmaDec p;
+  SRes res;
+  SizeT outSize = *destLen, inSize = *srcLen;
+  *destLen = *srcLen = 0;
+  *status = LZMA_STATUS_NOT_SPECIFIED;
+  if (inSize < RC_INIT_SIZE)
+    return SZ_ERROR_INPUT_EOF;
+  LzmaDec_Construct(&p);
+  RINOK(LzmaDec_AllocateProbs(&p, propData, propSize, alloc));
+  p.dic = dest;
+  p.dicBufSize = outSize;
+  LzmaDec_Init(&p);
+  *srcLen = inSize;
+  res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status);
+  *destLen = p.dicPos;
+  if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT)
+    res = SZ_ERROR_INPUT_EOF;
+  LzmaDec_FreeProbs(&p, alloc);
+  return res;
+}
diff --git a/deps/libchdr/deps/lzma-19.00/src/LzmaEnc.c b/deps/libchdr/deps/lzma-19.00/src/LzmaEnc.c
new file mode 100644
index 00000000..2d3839df
--- /dev/null
+++ b/deps/libchdr/deps/lzma-19.00/src/LzmaEnc.c
@@ -0,0 +1,1330 @@
+/* LzmaEnc.c -- LZMA Encoder
+2019-01-10: Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include <string.h>
+
+/* #define SHOW_STAT */
+/* #define SHOW_STAT2 */
+
+#if defined(SHOW_STAT) || defined(SHOW_STAT2)
+#include <stdio.h>
+#endif
+
+#include "LzmaEnc.h"
+
+#include "LzFind.h"
+#ifndef _7ZIP_ST
+#include "LzFindMt.h"
+#endif
+
+#ifdef SHOW_STAT
+static unsigned g_STAT_OFFSET = 0;
+#endif
+
+#define kLzmaMaxHistorySize ((UInt32)3 << 29)
+/* #define kLzmaMaxHistorySize ((UInt32)7 << 29) */
+
+#define kNumTopBits 24
+#define kTopValue ((UInt32)1 << kNumTopBits)
+
+#define kNumBitModelTotalBits 11
+#define kBitModelTotal (1 << kNumBitModelTotalBits)
+#define kNumMoveBits 5
+#define kProbInitValue (kBitModelTotal >> 1)
+
+#define kNumMoveReducingBits 4
+#define kNumBitPriceShiftBits 4
+#define kBitPrice (1 << kNumBitPriceShiftBits)
+
+#define REP_LEN_COUNT 64
+
+void LzmaEncProps_Init(CLzmaEncProps *p)
+{
+  p->level = 5;
+  p->dictSize = p->mc = 0;
+  p->reduceSize = (UInt64)(Int64)-1;
+  p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1;
+  p->writeEndMark = 0;
+}
+
+void LzmaEncProps_Normalize(CLzmaEncProps *p)
+{
+  int level = p->level;
+  if (level < 0) level = 5;
+  p->level = level;
+  
+  if (p->dictSize == 0) p->dictSize = (level <= 5 ? (1 << (level * 2 + 14)) : (level <= 7 ? (1 << 25) : (1 << 26)));
+  if (p->dictSize > p->reduceSize)
+  {
+    unsigned i;
+    UInt32 reduceSize = (UInt32)p->reduceSize;
+    for (i = 11; i <= 30; i++)
+    {
+      if (reduceSize <= ((UInt32)2 << i)) { p->dictSize = ((UInt32)2 << i); break; }
+      if (reduceSize <= ((UInt32)3 << i)) { p->dictSize = ((UInt32)3 << i); break; }
+    }
+  }
+
+  if (p->lc < 0) p->lc = 3;
+  if (p->lp < 0) p->lp = 0;
+  if (p->pb < 0) p->pb = 2;
+
+  if (p->algo < 0) p->algo = (level < 5 ? 0 : 1);
+  if (p->fb < 0) p->fb = (level < 7 ? 32 : 64);
+  if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1);
+  if (p->numHashBytes < 0) p->numHashBytes = 4;
+  if (p->mc == 0) p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1);
+  
+  if (p->numThreads < 0)
+    p->numThreads =
+      #ifndef _7ZIP_ST
+      ((p->btMode && p->algo) ? 2 : 1);
+      #else
+      1;
+      #endif
+}
+
+UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps *props2)
+{
+  CLzmaEncProps props = *props2;
+  LzmaEncProps_Normalize(&props);
+  return props.dictSize;
+}
+
+#if (_MSC_VER >= 1400)
+/* BSR code is fast for some new CPUs */
+/* #define LZMA_LOG_BSR */
+#endif
+
+#ifdef LZMA_LOG_BSR
+
+#define kDicLogSizeMaxCompress 32
+
+#define BSR2_RET(pos, res) { unsigned long zz; _BitScanReverse(&zz, (pos)); res = (zz + zz) + ((pos >> (zz - 1)) & 1); }
+
+static unsigned GetPosSlot1(UInt32 pos)
+{
+  unsigned res;
+  BSR2_RET(pos, res);
+  return res;
+}
+#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
+#define GetPosSlot(pos, res) { if (pos < 2) res = pos; else BSR2_RET(pos, res); }
+
+#else
+
+#define kNumLogBits (9 + sizeof(size_t) / 2)
+/* #define kNumLogBits (11 + sizeof(size_t) / 8 * 3) */
+
+#define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7)
+
+static void LzmaEnc_FastPosInit(Byte *g_FastPos)
+{
+  unsigned slot;
+  g_FastPos[0] = 0;
+  g_FastPos[1] = 1;
+  g_FastPos += 2;
+  
+  for (slot = 2; slot < kNumLogBits * 2; slot++)
+  {
+    size_t k = ((size_t)1 << ((slot >> 1) - 1));
+    size_t j;
+    for (j = 0; j < k; j++)
+      g_FastPos[j] = (Byte)slot;
+    g_FastPos += k;
+  }
+}
+
+/* we can use ((limit - pos) >> 31) only if (pos < ((UInt32)1 << 31)) */
+/*
+#define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \
+  (0 - (((((UInt32)1 << (kNumLogBits + 6)) - 1) - pos) >> 31))); \
+  res = p->g_FastPos[pos >> zz] + (zz * 2); }
+*/
+
+/*
+#define BSR2_RET(pos, res) { unsigned zz = 6 + ((kNumLogBits - 1) & \
+  (0 - (((((UInt32)1 << (kNumLogBits)) - 1) - (pos >> 6)) >> 31))); \
+  res = p->g_FastPos[pos >> zz] + (zz * 2); }
+*/
+
+#define BSR2_RET(pos, res) { unsigned zz = (pos < (1 << (kNumLogBits + 6))) ? 6 : 6 + kNumLogBits - 1; \
+  res = p->g_FastPos[pos >> zz] + (zz * 2); }
+
+/*
+#define BSR2_RET(pos, res) { res = (pos < (1 << (kNumLogBits + 6))) ? \
+  p->g_FastPos[pos >> 6] + 12 : \
+  p->g_FastPos[pos >> (6 + kNumLogBits - 1)] + (6 + (kNumLogBits - 1)) * 2; }
+*/
+
+#define GetPosSlot1(pos) p->g_FastPos[pos]
+#define GetPosSlot2(pos, res) { BSR2_RET(pos, res); }
+#define GetPosSlot(pos, res) { if (pos < kNumFullDistances) res = p->g_FastPos[pos & (kNumFullDistances - 1)]; else BSR2_RET(pos, res); }
+
+#endif
+
+
+#define LZMA_NUM_REPS 4
+
+typedef UInt16 CState;
+typedef UInt16 CExtra;
+
+typedef struct
+{
+  UInt32 price;
+  CState state;
+  CExtra extra;
+      // 0   : normal
+      // 1   : LIT : MATCH
+      // > 1 : MATCH (extra-1) : LIT : REP0 (len)
+  UInt32 len;
+  UInt32 dist;
+  UInt32 reps[LZMA_NUM_REPS];
+} COptimal;
+
+
+// 18.06
+#define kNumOpts (1 << 11)
+#define kPackReserve (kNumOpts * 8)
+// #define kNumOpts (1 << 12)
+// #define kPackReserve (1 + kNumOpts * 2)
+
+#define kNumLenToPosStates 4
+#define kNumPosSlotBits 6
+#define kDicLogSizeMin 0
+#define kDicLogSizeMax 32
+#define kDistTableSizeMax (kDicLogSizeMax * 2)
+
+#define kNumAlignBits 4
+#define kAlignTableSize (1 << kNumAlignBits)
+#define kAlignMask (kAlignTableSize - 1)
+
+#define kStartPosModelIndex 4
+#define kEndPosModelIndex 14
+#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
+
+typedef
+#ifdef _LZMA_PROB32
+  UInt32
+#else
+  UInt16
+#endif
+  CLzmaProb;
+
+#define LZMA_PB_MAX 4
+#define LZMA_LC_MAX 8
+#define LZMA_LP_MAX 4
+
+#define LZMA_NUM_PB_STATES_MAX (1 << LZMA_PB_MAX)
+
+#define kLenNumLowBits 3
+#define kLenNumLowSymbols (1 << kLenNumLowBits)
+#define kLenNumHighBits 8
+#define kLenNumHighSymbols (1 << kLenNumHighBits)
+#define kLenNumSymbolsTotal (kLenNumLowSymbols * 2 + kLenNumHighSymbols)
+
+#define LZMA_MATCH_LEN_MIN 2
+#define LZMA_MATCH_LEN_MAX (LZMA_MATCH_LEN_MIN + kLenNumSymbolsTotal - 1)
+
+#define kNumStates 12
+
+
+typedef struct
+{
+  CLzmaProb low[LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)];
+  CLzmaProb high[kLenNumHighSymbols];
+} CLenEnc;
+
+
+typedef struct
+{
+  unsigned tableSize;
+  UInt32 prices[LZMA_NUM_PB_STATES_MAX][kLenNumSymbolsTotal];
+  // UInt32 prices1[LZMA_NUM_PB_STATES_MAX][kLenNumLowSymbols * 2];
+  // UInt32 prices2[kLenNumSymbolsTotal];
+} CLenPriceEnc;
+
+#define GET_PRICE_LEN(p, posState, len) \
+    ((p)->prices[posState][(size_t)(len) - LZMA_MATCH_LEN_MIN])
+
+/*
+#define GET_PRICE_LEN(p, posState, len) \
+    ((p)->prices2[(size_t)(len) - 2] + ((p)->prices1[posState][((len) - 2) & (kLenNumLowSymbols * 2 - 1)] & (((len) - 2 - kLenNumLowSymbols * 2) >> 9)))
+*/
+
+typedef struct
+{
+  UInt32 range;
+  unsigned cache;
+  UInt64 low;
+  UInt64 cacheSize;
+  Byte *buf;
+  Byte *bufLim;
+  Byte *bufBase;
+  ISeqOutStream *outStream;
+  UInt64 processed;
+  SRes res;
+} CRangeEnc;
+
+
+typedef struct
+{
+  CLzmaProb *litProbs;
+
+  unsigned state;
+  UInt32 reps[LZMA_NUM_REPS];
+
+  CLzmaProb posAlignEncoder[1 << kNumAlignBits];
+  CLzmaProb isRep[kNumStates];
+  CLzmaProb isRepG0[kNumStates];
+  CLzmaProb isRepG1[kNumStates];
+  CLzmaProb isRepG2[kNumStates];
+  CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX];
+  CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX];
+
+  CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits];
+  CLzmaProb posEncoders[kNumFullDistances];
+  
+  CLenEnc lenProbs;
+  CLenEnc repLenProbs;
+
+} CSaveState;
+
+
+typedef UInt32 CProbPrice;
+
+
+typedef struct
+{
+  void *matchFinderObj;
+  IMatchFinder matchFinder;
+
+  unsigned optCur;
+  unsigned optEnd;
+
+  unsigned longestMatchLen;
+  unsigned numPairs;
+  UInt32 numAvail;
+
+  unsigned state;
+  unsigned numFastBytes;
+  unsigned additionalOffset;
+  UInt32 reps[LZMA_NUM_REPS];
+  unsigned lpMask, pbMask;
+  CLzmaProb *litProbs;
+  CRangeEnc rc;
+
+  UInt32 backRes;
+
+  unsigned lc, lp, pb;
+  unsigned lclp;
+
+  BoolInt fastMode;
+  BoolInt writeEndMark;
+  BoolInt finished;
+  BoolInt multiThread;
+  BoolInt needInit;
+  // BoolInt _maxMode;
+
+  UInt64 nowPos64;
+  
+  unsigned matchPriceCount;
+  // unsigned alignPriceCount;
+  int repLenEncCounter;
+
+  unsigned distTableSize;
+
+  UInt32 dictSize;
+  SRes result;
+
+  #ifndef _7ZIP_ST
+  BoolInt mtMode;
+  // begin of CMatchFinderMt is used in LZ thread
+  CMatchFinderMt matchFinderMt;
+  // end of CMatchFinderMt is used in BT and HASH threads
+  #endif
+
+  CMatchFinder matchFinderBase;
+
+  #ifndef _7ZIP_ST
+  Byte pad[128];
+  #endif
+  
+  // LZ thread
+  CProbPrice ProbPrices[kBitModelTotal >> kNumMoveReducingBits];
+
+  UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2 + 1];
+
+  UInt32 alignPrices[kAlignTableSize];
+  UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax];
+  UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances];
+
+  CLzmaProb posAlignEncoder[1 << kNumAlignBits];
+  CLzmaProb isRep[kNumStates];
+  CLzmaProb isRepG0[kNumStates];
+  CLzmaProb isRepG1[kNumStates];
+  CLzmaProb isRepG2[kNumStates];
+  CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX];
+  CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX];
+  CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits];
+  CLzmaProb posEncoders[kNumFullDistances];
+  
+  CLenEnc lenProbs;
+  CLenEnc repLenProbs;
+
+  #ifndef LZMA_LOG_BSR
+  Byte g_FastPos[1 << kNumLogBits];
+  #endif
+
+  CLenPriceEnc lenEnc;
+  CLenPriceEnc repLenEnc;
+
+  COptimal opt[kNumOpts];
+
+  CSaveState saveState;
+
+  #ifndef _7ZIP_ST
+  Byte pad2[128];
+  #endif
+} CLzmaEnc;
+
+
+SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps *props2)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  CLzmaEncProps props = *props2;
+  LzmaEncProps_Normalize(&props);
+
+  if (props.lc > LZMA_LC_MAX
+      || props.lp > LZMA_LP_MAX
+      || props.pb > LZMA_PB_MAX
+      || props.dictSize > ((UInt64)1 << kDicLogSizeMaxCompress)
+      || props.dictSize > kLzmaMaxHistorySize)
+    return SZ_ERROR_PARAM;
+
+  p->dictSize = props.dictSize;
+  {
+    unsigned fb = props.fb;
+    if (fb < 5)
+      fb = 5;
+    if (fb > LZMA_MATCH_LEN_MAX)
+      fb = LZMA_MATCH_LEN_MAX;
+    p->numFastBytes = fb;
+  }
+  p->lc = props.lc;
+  p->lp = props.lp;
+  p->pb = props.pb;
+  p->fastMode = (props.algo == 0);
+  // p->_maxMode = True;
+  p->matchFinderBase.btMode = (Byte)(props.btMode ? 1 : 0);
+  {
+    unsigned numHashBytes = 4;
+    if (props.btMode)
+    {
+      if (props.numHashBytes < 2)
+        numHashBytes = 2;
+      else if (props.numHashBytes < 4)
+        numHashBytes = props.numHashBytes;
+    }
+    p->matchFinderBase.numHashBytes = numHashBytes;
+  }
+
+  p->matchFinderBase.cutValue = props.mc;
+
+  p->writeEndMark = props.writeEndMark;
+
+  #ifndef _7ZIP_ST
+  /*
+  if (newMultiThread != _multiThread)
+  {
+    ReleaseMatchFinder();
+    _multiThread = newMultiThread;
+  }
+  */
+  p->multiThread = (props.numThreads > 1);
+  #endif
+
+  return SZ_OK;
+}
+
+
+void LzmaEnc_SetDataSize(CLzmaEncHandle pp, UInt64 expectedDataSiize)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  p->matchFinderBase.expectedDataSize = expectedDataSiize;
+}
+
+
+#define kState_Start 0
+#define kState_LitAfterMatch 4
+#define kState_LitAfterRep   5
+#define kState_MatchAfterLit 7
+#define kState_RepAfterLit   8
+
+static const Byte kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4,  5,  6,   4, 5};
+static const Byte kMatchNextStates[kNumStates]   = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10};
+static const Byte kRepNextStates[kNumStates]     = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11};
+static const Byte kShortRepNextStates[kNumStates]= {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11};
+
+#define IsLitState(s) ((s) < 7)
+#define GetLenToPosState2(len) (((len) < kNumLenToPosStates - 1) ? (len) : kNumLenToPosStates - 1)
+#define GetLenToPosState(len) (((len) < kNumLenToPosStates + 1) ? (len) - 2 : kNumLenToPosStates - 1)
+
+#define kInfinityPrice (1 << 30)
+
+static void RangeEnc_Construct(CRangeEnc *p)
+{
+  p->outStream = NULL;
+  p->bufBase = NULL;
+}
+
+#define RangeEnc_GetProcessed(p)       ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize)
+#define RangeEnc_GetProcessed_sizet(p) ((size_t)(p)->processed + ((p)->buf - (p)->bufBase) + (size_t)(p)->cacheSize)
+
+#define RC_BUF_SIZE (1 << 16)
+
+static int RangeEnc_Alloc(CRangeEnc *p, ISzAllocPtr alloc)
+{
+  if (!p->bufBase)
+  {
+    p->bufBase = (Byte *)ISzAlloc_Alloc(alloc, RC_BUF_SIZE);
+    if (!p->bufBase)
+      return 0;
+    p->bufLim = p->bufBase + RC_BUF_SIZE;
+  }
+  return 1;
+}
+
+static void RangeEnc_Free(CRangeEnc *p, ISzAllocPtr alloc)
+{
+  ISzAlloc_Free(alloc, p->bufBase);
+  p->bufBase = 0;
+}
+
+static void RangeEnc_Init(CRangeEnc *p)
+{
+  /* Stream.Init(); */
+  p->range = 0xFFFFFFFF;
+  p->cache = 0;
+  p->low = 0;
+  p->cacheSize = 0;
+
+  p->buf = p->bufBase;
+
+  p->processed = 0;
+  p->res = SZ_OK;
+}
+
+MY_NO_INLINE static void RangeEnc_FlushStream(CRangeEnc *p)
+{
+  size_t num;
+  if (p->res != SZ_OK)
+    return;
+  num = p->buf - p->bufBase;
+  if (num != ISeqOutStream_Write(p->outStream, p->bufBase, num))
+    p->res = SZ_ERROR_WRITE;
+  p->processed += num;
+  p->buf = p->bufBase;
+}
+
+MY_NO_INLINE static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc *p)
+{
+  UInt32 low = (UInt32)p->low;
+  unsigned high = (unsigned)(p->low >> 32);
+  p->low = (UInt32)(low << 8);
+  if (low < (UInt32)0xFF000000 || high != 0)
+  {
+    {
+      Byte *buf = p->buf;
+      *buf++ = (Byte)(p->cache + high);
+      p->cache = (unsigned)(low >> 24);
+      p->buf = buf;
+      if (buf == p->bufLim)
+        RangeEnc_FlushStream(p);
+      if (p->cacheSize == 0)
+        return;
+    }
+    high += 0xFF;
+    for (;;)
+    {
+      Byte *buf = p->buf;
+      *buf++ = (Byte)(high);
+      p->buf = buf;
+      if (buf == p->bufLim)
+        RangeEnc_FlushStream(p);
+      if (--p->cacheSize == 0)
+        return;
+    }
+  }
+  p->cacheSize++;
+}
+
+static void RangeEnc_FlushData(CRangeEnc *p)
+{
+  int i;
+  for (i = 0; i < 5; i++)
+    RangeEnc_ShiftLow(p);
+}
+
+#define RC_NORM(p) if (range < kTopValue) { range <<= 8; RangeEnc_ShiftLow(p); }
+
+#define RC_BIT_PRE(p, prob) \
+  ttt = *(prob); \
+  newBound = (range >> kNumBitModelTotalBits) * ttt;
+
+// #define _LZMA_ENC_USE_BRANCH
+
+#ifdef _LZMA_ENC_USE_BRANCH
+
+#define RC_BIT(p, prob, bit) { \
+  RC_BIT_PRE(p, prob) \
+  if (bit == 0) { range = newBound; ttt += (kBitModelTotal - ttt) >> kNumMoveBits; } \
+  else { (p)->low += newBound; range -= newBound; ttt -= ttt >> kNumMoveBits; } \
+  *(prob) = (CLzmaProb)ttt; \
+  RC_NORM(p) \
+  }
+
+#else
+
+#define RC_BIT(p, prob, bit) { \
+  UInt32 mask; \
+  RC_BIT_PRE(p, prob) \
+  mask = 0 - (UInt32)bit; \
+  range &= mask; \
+  mask &= newBound; \
+  range -= mask; \
+  (p)->low += mask; \
+  mask = (UInt32)bit - 1; \
+  range += newBound & mask; \
+  mask &= (kBitModelTotal - ((1 << kNumMoveBits) - 1)); \
+  mask += ((1 << kNumMoveBits) - 1); \
+  ttt += (Int32)(mask - ttt) >> kNumMoveBits; \
+  *(prob) = (CLzmaProb)ttt; \
+  RC_NORM(p) \
+  }
+
+#endif
+
+
+
+
+#define RC_BIT_0_BASE(p, prob) \
+  range = newBound; *(prob) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
+
+#define RC_BIT_1_BASE(p, prob) \
+  range -= newBound; (p)->low += newBound; *(prob) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); \
+
+#define RC_BIT_0(p, prob) \
+  RC_BIT_0_BASE(p, prob) \
+  RC_NORM(p)
+
+#define RC_BIT_1(p, prob) \
+  RC_BIT_1_BASE(p, prob) \
+  RC_NORM(p)
+
+static void RangeEnc_EncodeBit_0(CRangeEnc *p, CLzmaProb *prob)
+{
+  UInt32 range, ttt, newBound;
+  range = p->range;
+  RC_BIT_PRE(p, prob)
+  RC_BIT_0(p, prob)
+  p->range = range;
+}
+
+static void LitEnc_Encode(CRangeEnc *p, CLzmaProb *probs, UInt32 sym)
+{
+  UInt32 range = p->range;
+  sym |= 0x100;
+  do
+  {
+    UInt32 ttt, newBound;
+    // RangeEnc_EncodeBit(p, probs + (sym >> 8), (sym >> 7) & 1);
+    CLzmaProb *prob = probs + (sym >> 8);
+    UInt32 bit = (sym >> 7) & 1;
+    sym <<= 1;
+    RC_BIT(p, prob, bit);
+  }
+  while (sym < 0x10000);
+  p->range = range;
+}
+
+static void LzmaEnc_InitPriceTables(CProbPrice *ProbPrices)
+{
+  UInt32 i;
+  for (i = 0; i < (kBitModelTotal >> kNumMoveReducingBits); i++)
+  {
+    const unsigned kCyclesBits = kNumBitPriceShiftBits;
+    UInt32 w = (i << kNumMoveReducingBits) + (1 << (kNumMoveReducingBits - 1));
+    unsigned bitCount = 0;
+    unsigned j;
+    for (j = 0; j < kCyclesBits; j++)
+    {
+      w = w * w;
+      bitCount <<= 1;
+      while (w >= ((UInt32)1 << 16))
+      {
+        w >>= 1;
+        bitCount++;
+      }
+    }
+    ProbPrices[i] = (CProbPrice)((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount);
+    // printf("\n%3d: %5d", i, ProbPrices[i]);
+  }
+}
+
+
+#define GET_PRICE(prob, bit) \
+  p->ProbPrices[((prob) ^ (unsigned)(((-(int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits];
+
+#define GET_PRICEa(prob, bit) \
+     ProbPrices[((prob) ^ (unsigned)((-((int)(bit))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits];
+
+#define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits]
+#define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]
+
+#define GET_PRICEa_0(prob) ProbPrices[(prob) >> kNumMoveReducingBits]
+#define GET_PRICEa_1(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits]
+
+
+static UInt32 LitEnc_GetPrice(const CLzmaProb *probs, UInt32 sym, const CProbPrice *ProbPrices)
+{
+  UInt32 price = 0;
+  sym |= 0x100;
+  do
+  {
+    unsigned bit = sym & 1;
+    sym >>= 1;
+    price += GET_PRICEa(probs[sym], bit);
+  }
+  while (sym >= 2);
+  return price;
+}
+
+
+static UInt32 LitEnc_Matched_GetPrice(const CLzmaProb *probs, UInt32 sym, UInt32 matchByte, const CProbPrice *ProbPrices)
+{
+  UInt32 price = 0;
+  UInt32 offs = 0x100;
+  sym |= 0x100;
+  do
+  {
+    matchByte <<= 1;
+    price += GET_PRICEa(probs[offs + (matchByte & offs) + (sym >> 8)], (sym >> 7) & 1);
+    sym <<= 1;
+    offs &= ~(matchByte ^ sym);
+  }
+  while (sym < 0x10000);
+  return price;
+}
+
+
+
+static void LenEnc_Init(CLenEnc *p)
+{
+  unsigned i;
+  for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << (kLenNumLowBits + 1)); i++)
+    p->low[i] = kProbInitValue;
+  for (i = 0; i < kLenNumHighSymbols; i++)
+    p->high[i] = kProbInitValue;
+}
+
+static void LenEnc_Encode(CLenEnc *p, CRangeEnc *rc, unsigned sym, unsigned posState)
+{
+  UInt32 range, ttt, newBound;
+  CLzmaProb *probs = p->low;
+  range = rc->range;
+  RC_BIT_PRE(rc, probs);
+  if (sym >= kLenNumLowSymbols)
+  {
+    RC_BIT_1(rc, probs);
+    probs += kLenNumLowSymbols;
+    RC_BIT_PRE(rc, probs);
+    if (sym >= kLenNumLowSymbols * 2)
+    {
+      RC_BIT_1(rc, probs);
+      rc->range = range;
+      // RcTree_Encode(rc, p->high, kLenNumHighBits, sym - kLenNumLowSymbols * 2);
+      LitEnc_Encode(rc, p->high, sym - kLenNumLowSymbols * 2);
+      return;
+    }
+    sym -= kLenNumLowSymbols;
+  }
+
+  // RcTree_Encode(rc, probs + (posState << kLenNumLowBits), kLenNumLowBits, sym);
+  {
+    unsigned m;
+    unsigned bit;
+    RC_BIT_0(rc, probs);
+    probs += (posState << (1 + kLenNumLowBits));
+    bit = (sym >> 2)    ; RC_BIT(rc, probs + 1, bit); m = (1 << 1) + bit;
+    bit = (sym >> 1) & 1; RC_BIT(rc, probs + m, bit); m = (m << 1) + bit;
+    bit =  sym       & 1; RC_BIT(rc, probs + m, bit);
+    rc->range = range;
+  }
+}
+
+static void SetPrices_3(const CLzmaProb *probs, UInt32 startPrice, UInt32 *prices, const CProbPrice *ProbPrices)
+{
+  unsigned i;
+  for (i = 0; i < 8; i += 2)
+  {
+    UInt32 price = startPrice;
+    UInt32 prob;
+    price += GET_PRICEa(probs[1           ], (i >> 2));
+    price += GET_PRICEa(probs[2 + (i >> 2)], (i >> 1) & 1);
+    prob = probs[4 + (i >> 1)];
+    prices[i    ] = price + GET_PRICEa_0(prob);
+    prices[i + 1] = price + GET_PRICEa_1(prob);
+  }
+}
+
+
+MY_NO_INLINE static void MY_FAST_CALL LenPriceEnc_UpdateTables(
+    CLenPriceEnc *p,
+    unsigned numPosStates,
+    const CLenEnc *enc,
+    const CProbPrice *ProbPrices)
+{
+  UInt32 b;
+ 
+  {
+    unsigned prob = enc->low[0];
+    UInt32 a, c;
+    unsigned posState;
+    b = GET_PRICEa_1(prob);
+    a = GET_PRICEa_0(prob);
+    c = b + GET_PRICEa_0(enc->low[kLenNumLowSymbols]);
+    for (posState = 0; posState < numPosStates; posState++)
+    {
+      UInt32 *prices = p->prices[posState];
+      const CLzmaProb *probs = enc->low + (posState << (1 + kLenNumLowBits));
+      SetPrices_3(probs, a, prices, ProbPrices);
+      SetPrices_3(probs + kLenNumLowSymbols, c, prices + kLenNumLowSymbols, ProbPrices);
+    }
+  }
+
+  /*
+  {
+    unsigned i;
+    UInt32 b;
+    a = GET_PRICEa_0(enc->low[0]);
+    for (i = 0; i < kLenNumLowSymbols; i++)
+      p->prices2[i] = a;
+    a = GET_PRICEa_1(enc->low[0]);
+    b = a + GET_PRICEa_0(enc->low[kLenNumLowSymbols]);
+    for (i = kLenNumLowSymbols; i < kLenNumLowSymbols * 2; i++)
+      p->prices2[i] = b;
+    a += GET_PRICEa_1(enc->low[kLenNumLowSymbols]);
+  }
+  */
+ 
+  // p->counter = numSymbols;
+  // p->counter = 64;
+
+  {
+    unsigned i = p->tableSize;
+    
+    if (i > kLenNumLowSymbols * 2)
+    {
+      const CLzmaProb *probs = enc->high;
+      UInt32 *prices = p->prices[0] + kLenNumLowSymbols * 2;
+      i -= kLenNumLowSymbols * 2 - 1;
+      i >>= 1;
+      b += GET_PRICEa_1(enc->low[kLenNumLowSymbols]);
+      do
+      {
+        /*
+        p->prices2[i] = a +
+        // RcTree_GetPrice(enc->high, kLenNumHighBits, i - kLenNumLowSymbols * 2, ProbPrices);
+        LitEnc_GetPrice(probs, i - kLenNumLowSymbols * 2, ProbPrices);
+        */
+        // UInt32 price = a + RcTree_GetPrice(probs, kLenNumHighBits - 1, sym, ProbPrices);
+        unsigned sym = --i + (1 << (kLenNumHighBits - 1));
+        UInt32 price = b;
+        do
+        {
+          unsigned bit = sym & 1;
+          sym >>= 1;
+          price += GET_PRICEa(probs[sym], bit);
+        }
+        while (sym >= 2);
+
+        {
+          unsigned prob = probs[(size_t)i + (1 << (kLenNumHighBits - 1))];
+          prices[(size_t)i * 2    ] = price + GET_PRICEa_0(prob);
+          prices[(size_t)i * 2 + 1] = price + GET_PRICEa_1(prob);
+        }
+      }
+      while (i);
+
+      {
+        unsigned posState;
+        size_t num = (p->tableSize - kLenNumLowSymbols * 2) * sizeof(p->prices[0][0]);
+        for (posState = 1; posState < numPosStates; posState++)
+          memcpy(p->prices[posState] + kLenNumLowSymbols * 2, p->prices[0] + kLenNumLowSymbols * 2, num);
+      }
+    }
+  }
+}
+
+/*
+  #ifdef SHOW_STAT
+  g_STAT_OFFSET += num;
+  printf("\n MovePos %u", num);
+  #endif
+*/
+  
+#define MOVE_POS(p, num) { \
+    p->additionalOffset += (num); \
+    p->matchFinder.Skip(p->matchFinderObj, (UInt32)(num)); }
+
+
+#define MARK_LIT ((UInt32)(Int32)-1)
+
+#define MakeAs_Lit(p)       { (p)->dist = MARK_LIT; (p)->extra = 0; }
+#define MakeAs_ShortRep(p)  { (p)->dist = 0; (p)->extra = 0; }
+#define IsShortRep(p)       ((p)->dist == 0)
+
+
+#define GetPrice_ShortRep(p, state, posState) \
+  ( GET_PRICE_0(p->isRepG0[state]) + GET_PRICE_0(p->isRep0Long[state][posState]))
+
+#define GetPrice_Rep_0(p, state, posState) ( \
+    GET_PRICE_1(p->isMatch[state][posState]) \
+  + GET_PRICE_1(p->isRep0Long[state][posState])) \
+  + GET_PRICE_1(p->isRep[state]) \
+  + GET_PRICE_0(p->isRepG0[state])
+  
+MY_FORCE_INLINE
+static UInt32 GetPrice_PureRep(const CLzmaEnc *p, unsigned repIndex, size_t state, size_t posState)
+{
+  UInt32 price;
+  UInt32 prob = p->isRepG0[state];
+  if (repIndex == 0)
+  {
+    price = GET_PRICE_0(prob);
+    price += GET_PRICE_1(p->isRep0Long[state][posState]);
+  }
+  else
+  {
+    price = GET_PRICE_1(prob);
+    prob = p->isRepG1[state];
+    if (repIndex == 1)
+      price += GET_PRICE_0(prob);
+    else
+    {
+      price += GET_PRICE_1(prob);
+      price += GET_PRICE(p->isRepG2[state], repIndex - 2);
+    }
+  }
+  return price;
+}
+
+
+static SRes CheckErrors(CLzmaEnc *p)
+{
+  if (p->result != SZ_OK)
+    return p->result;
+  if (p->rc.res != SZ_OK)
+    p->result = SZ_ERROR_WRITE;
+  if (p->matchFinderBase.result != SZ_OK)
+    p->result = SZ_ERROR_READ;
+  if (p->result != SZ_OK)
+    p->finished = True;
+  return p->result;
+}
+
+
+MY_NO_INLINE static void FillAlignPrices(CLzmaEnc *p)
+{
+  unsigned i;
+  const CProbPrice *ProbPrices = p->ProbPrices;
+  const CLzmaProb *probs = p->posAlignEncoder;
+  // p->alignPriceCount = 0;
+  for (i = 0; i < kAlignTableSize / 2; i++)
+  {
+    UInt32 price = 0;
+    unsigned sym = i;
+    unsigned m = 1;
+    unsigned bit;
+    UInt32 prob;
+    bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit;
+    bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit;
+    bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) + bit;
+    prob = probs[m];
+    p->alignPrices[i    ] = price + GET_PRICEa_0(prob);
+    p->alignPrices[i + 8] = price + GET_PRICEa_1(prob);
+    // p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices);
+  }
+}
+
+
+MY_NO_INLINE static void FillDistancesPrices(CLzmaEnc *p)
+{
+  // int y; for (y = 0; y < 100; y++) {
+
+  UInt32 tempPrices[kNumFullDistances];
+  unsigned i, lps;
+
+  const CProbPrice *ProbPrices = p->ProbPrices;
+  p->matchPriceCount = 0;
+
+  for (i = kStartPosModelIndex / 2; i < kNumFullDistances / 2; i++)
+  {
+    unsigned posSlot = GetPosSlot1(i);
+    unsigned footerBits = (posSlot >> 1) - 1;
+    unsigned base = ((2 | (posSlot & 1)) << footerBits);
+    const CLzmaProb *probs = p->posEncoders + (size_t)base * 2;
+    // tempPrices[i] = RcTree_ReverseGetPrice(p->posEncoders + base, footerBits, i - base, p->ProbPrices);
+    UInt32 price = 0;
+    unsigned m = 1;
+    unsigned sym = i;
+    unsigned offset = (unsigned)1 << footerBits;
+    base += i;
+    
+    if (footerBits)
+    do
+    {
+      unsigned bit = sym & 1;
+      sym >>= 1;
+      price += GET_PRICEa(probs[m], bit);
+      m = (m << 1) + bit;
+    }
+    while (--footerBits);
+
+    {
+      unsigned prob = probs[m];
+      tempPrices[base         ] = price + GET_PRICEa_0(prob);
+      tempPrices[base + offset] = price + GET_PRICEa_1(prob);
+    }
+  }
+
+  for (lps = 0; lps < kNumLenToPosStates; lps++)
+  {
+    unsigned slot;
+    unsigned distTableSize2 = (p->distTableSize + 1) >> 1;
+    UInt32 *posSlotPrices = p->posSlotPrices[lps];
+    const CLzmaProb *probs = p->posSlotEncoder[lps];
+    
+    for (slot = 0; slot < distTableSize2; slot++)
+    {
+      // posSlotPrices[slot] = RcTree_GetPrice(encoder, kNumPosSlotBits, slot, p->ProbPrices);
+      UInt32 price;
+      unsigned bit;
+      unsigned sym = slot + (1 << (kNumPosSlotBits - 1));
+      unsigned prob;
+      bit = sym & 1; sym >>= 1; price  = GET_PRICEa(probs[sym], bit);
+      bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit);
+      bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit);
+      bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit);
+      bit = sym & 1; sym >>= 1; price += GET_PRICEa(probs[sym], bit);
+      prob = probs[(size_t)slot + (1 << (kNumPosSlotBits - 1))];
+      posSlotPrices[(size_t)slot * 2    ] = price + GET_PRICEa_0(prob);
+      posSlotPrices[(size_t)slot * 2 + 1] = price + GET_PRICEa_1(prob);
+    }
+    
+    {
+      UInt32 delta = ((UInt32)((kEndPosModelIndex / 2 - 1) - kNumAlignBits) << kNumBitPriceShiftBits);
+      for (slot = kEndPosModelIndex / 2; slot < distTableSize2; slot++)
+      {
+        posSlotPrices[(size_t)slot * 2    ] += delta;
+        posSlotPrices[(size_t)slot * 2 + 1] += delta;
+        delta += ((UInt32)1 << kNumBitPriceShiftBits);
+      }
+    }
+
+    {
+      UInt32 *dp = p->distancesPrices[lps];
+      
+      dp[0] = posSlotPrices[0];
+      dp[1] = posSlotPrices[1];
+      dp[2] = posSlotPrices[2];
+      dp[3] = posSlotPrices[3];
+
+      for (i = 4; i < kNumFullDistances; i += 2)
+      {
+        UInt32 slotPrice = posSlotPrices[GetPosSlot1(i)];
+        dp[i    ] = slotPrice + tempPrices[i];
+        dp[i + 1] = slotPrice + tempPrices[i + 1];
+      }
+    }
+  }
+  // }
+}
+
+
+
+void LzmaEnc_Construct(CLzmaEnc *p)
+{
+  RangeEnc_Construct(&p->rc);
+  MatchFinder_Construct(&p->matchFinderBase);
+  
+  #ifndef _7ZIP_ST
+  MatchFinderMt_Construct(&p->matchFinderMt);
+  p->matchFinderMt.MatchFinder = &p->matchFinderBase;
+  #endif
+
+  {
+    CLzmaEncProps props;
+    LzmaEncProps_Init(&props);
+    LzmaEnc_SetProps(p, &props);
+  }
+
+  #ifndef LZMA_LOG_BSR
+  LzmaEnc_FastPosInit(p->g_FastPos);
+  #endif
+
+  LzmaEnc_InitPriceTables(p->ProbPrices);
+  p->litProbs = NULL;
+  p->saveState.litProbs = NULL;
+
+}
+
+CLzmaEncHandle LzmaEnc_Create(ISzAllocPtr alloc)
+{
+  void *p;
+  p = ISzAlloc_Alloc(alloc, sizeof(CLzmaEnc));
+  if (p)
+    LzmaEnc_Construct((CLzmaEnc *)p);
+  return p;
+}
+
+void LzmaEnc_FreeLits(CLzmaEnc *p, ISzAllocPtr alloc)
+{
+  ISzAlloc_Free(alloc, p->litProbs);
+  ISzAlloc_Free(alloc, p->saveState.litProbs);
+  p->litProbs = NULL;
+  p->saveState.litProbs = NULL;
+}
+
+void LzmaEnc_Destruct(CLzmaEnc *p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+  #ifndef _7ZIP_ST
+  MatchFinderMt_Destruct(&p->matchFinderMt, allocBig);
+  #endif
+  
+  MatchFinder_Free(&p->matchFinderBase, allocBig);
+  LzmaEnc_FreeLits(p, alloc);
+  RangeEnc_Free(&p->rc, alloc);
+}
+
+void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+  LzmaEnc_Destruct((CLzmaEnc *)p, alloc, allocBig);
+  ISzAlloc_Free(alloc, p);
+}
+
+
+#define kBigHashDicLimit ((UInt32)1 << 24)
+
+static SRes LzmaEnc_Alloc(CLzmaEnc *p, UInt32 keepWindowSize, ISzAllocPtr alloc, ISzAllocPtr allocBig)
+{
+  UInt32 beforeSize = kNumOpts;
+  if (!RangeEnc_Alloc(&p->rc, alloc))
+    return SZ_ERROR_MEM;
+
+  #ifndef _7ZIP_ST
+  p->mtMode = (p->multiThread && !p->fastMode && (p->matchFinderBase.btMode != 0));
+  #endif
+
+  {
+    unsigned lclp = p->lc + p->lp;
+    if (!p->litProbs || !p->saveState.litProbs || p->lclp != lclp)
+    {
+      LzmaEnc_FreeLits(p, alloc);
+      p->litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb));
+      p->saveState.litProbs = (CLzmaProb *)ISzAlloc_Alloc(alloc, ((UInt32)0x300 << lclp) * sizeof(CLzmaProb));
+      if (!p->litProbs || !p->saveState.litProbs)
+      {
+        LzmaEnc_FreeLits(p, alloc);
+        return SZ_ERROR_MEM;
+      }
+      p->lclp = lclp;
+    }
+  }
+
+  p->matchFinderBase.bigHash = (Byte)(p->dictSize > kBigHashDicLimit ? 1 : 0);
+
+  if (beforeSize + p->dictSize < keepWindowSize)
+    beforeSize = keepWindowSize - p->dictSize;
+
+  #ifndef _7ZIP_ST
+  if (p->mtMode)
+  {
+    RINOK(MatchFinderMt_Create(&p->matchFinderMt, p->dictSize, beforeSize, p->numFastBytes,
+        LZMA_MATCH_LEN_MAX
+        + 1  /* 18.04 */
+        , allocBig));
+    p->matchFinderObj = &p->matchFinderMt;
+    p->matchFinderBase.bigHash = (Byte)(
+        (p->dictSize > kBigHashDicLimit && p->matchFinderBase.hashMask >= 0xFFFFFF) ? 1 : 0);
+    MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder);
+  }
+  else
+  #endif
+  {
+    if (!MatchFinder_Create(&p->matchFinderBase, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig))
+      return SZ_ERROR_MEM;
+    p->matchFinderObj = &p->matchFinderBase;
+    MatchFinder_CreateVTable(&p->matchFinderBase, &p->matchFinder);
+  }
+  
+  return SZ_OK;
+}
+
+void LzmaEnc_Init(CLzmaEnc *p)
+{
+  unsigned i;
+  p->state = 0;
+  p->reps[0] =
+  p->reps[1] =
+  p->reps[2] =
+  p->reps[3] = 1;
+
+  RangeEnc_Init(&p->rc);
+
+  for (i = 0; i < (1 << kNumAlignBits); i++)
+    p->posAlignEncoder[i] = kProbInitValue;
+
+  for (i = 0; i < kNumStates; i++)
+  {
+    unsigned j;
+    for (j = 0; j < LZMA_NUM_PB_STATES_MAX; j++)
+    {
+      p->isMatch[i][j] = kProbInitValue;
+      p->isRep0Long[i][j] = kProbInitValue;
+    }
+    p->isRep[i] = kProbInitValue;
+    p->isRepG0[i] = kProbInitValue;
+    p->isRepG1[i] = kProbInitValue;
+    p->isRepG2[i] = kProbInitValue;
+  }
+
+  {
+    for (i = 0; i < kNumLenToPosStates; i++)
+    {
+      CLzmaProb *probs = p->posSlotEncoder[i];
+      unsigned j;
+      for (j = 0; j < (1 << kNumPosSlotBits); j++)
+        probs[j] = kProbInitValue;
+    }
+  }
+  {
+    for (i = 0; i < kNumFullDistances; i++)
+      p->posEncoders[i] = kProbInitValue;
+  }
+
+  {
+    UInt32 num = (UInt32)0x300 << (p->lp + p->lc);
+    UInt32 k;
+    CLzmaProb *probs = p->litProbs;
+    for (k = 0; k < num; k++)
+      probs[k] = kProbInitValue;
+  }
+
+
+  LenEnc_Init(&p->lenProbs);
+  LenEnc_Init(&p->repLenProbs);
+
+  p->optEnd = 0;
+  p->optCur = 0;
+
+  {
+    for (i = 0; i < kNumOpts; i++)
+      p->opt[i].price = kInfinityPrice;
+  }
+
+  p->additionalOffset = 0;
+
+  p->pbMask = (1 << p->pb) - 1;
+  p->lpMask = ((UInt32)0x100 << p->lp) - ((unsigned)0x100 >> p->lc);
+}
+
+
+void LzmaEnc_InitPrices(CLzmaEnc *p)
+{
+  if (!p->fastMode)
+  {
+    FillDistancesPrices(p);
+    FillAlignPrices(p);
+  }
+
+  p->lenEnc.tableSize =
+  p->repLenEnc.tableSize =
+      p->numFastBytes + 1 - LZMA_MATCH_LEN_MIN;
+
+  p->repLenEncCounter = REP_LEN_COUNT;
+
+  LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, &p->lenProbs, p->ProbPrices);
+  LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, &p->repLenProbs, p->ProbPrices);
+}
+
+typedef struct
+{
+  ISeqOutStream vt;
+  Byte *data;
+  SizeT rem;
+  BoolInt overflow;
+} CLzmaEnc_SeqOutStreamBuf;
+
+static size_t SeqOutStreamBuf_Write(const ISeqOutStream *pp, const void *data, size_t size)
+{
+  CLzmaEnc_SeqOutStreamBuf *p = CONTAINER_FROM_VTBL(pp, CLzmaEnc_SeqOutStreamBuf, vt);
+  if (p->rem < size)
+  {
+    size = p->rem;
+    p->overflow = True;
+  }
+  memcpy(p->data, data, size);
+  p->rem -= size;
+  p->data += size;
+  return size;
+}
+
+
+UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp)
+{
+  const CLzmaEnc *p = (CLzmaEnc *)pp;
+  return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj);
+}
+
+
+const Byte *LzmaEnc_GetCurBuf(CLzmaEncHandle pp)
+{
+  const CLzmaEnc *p = (CLzmaEnc *)pp;
+  return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset;
+}
+
+
+SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte *props, SizeT *size)
+{
+  CLzmaEnc *p = (CLzmaEnc *)pp;
+  unsigned i;
+  UInt32 dictSize = p->dictSize;
+  if (*size < LZMA_PROPS_SIZE)
+    return SZ_ERROR_PARAM;
+  *size = LZMA_PROPS_SIZE;
+  props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc);
+
+  if (dictSize >= ((UInt32)1 << 22))
+  {
+    UInt32 kDictMask = ((UInt32)1 << 20) - 1;
+    if (dictSize < (UInt32)0xFFFFFFFF - kDictMask)
+      dictSize = (dictSize + kDictMask) & ~kDictMask;
+  }
+  else for (i = 11; i <= 30; i++)
+  {
+    if (dictSize <= ((UInt32)2 << i)) { dictSize = (2 << i); break; }
+    if (dictSize <= ((UInt32)3 << i)) { dictSize = (3 << i); break; }
+  }
+
+  for (i = 0; i < 4; i++)
+    props[1 + i] = (Byte)(dictSize >> (8 * i));
+  return SZ_OK;
+}
+
+
+
+
diff --git a/deps/libchdr/deps/lzma-19.00/src/Sort.c b/deps/libchdr/deps/lzma-19.00/src/Sort.c
new file mode 100644
index 00000000..e1097e38
--- /dev/null
+++ b/deps/libchdr/deps/lzma-19.00/src/Sort.c
@@ -0,0 +1,141 @@
+/* Sort.c -- Sort functions
+2014-04-05 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "Sort.h"
+
+#define HeapSortDown(p, k, size, temp) \
+  { for (;;) { \
+    size_t s = (k << 1); \
+    if (s > size) break; \
+    if (s < size && p[s + 1] > p[s]) s++; \
+    if (temp >= p[s]) break; \
+    p[k] = p[s]; k = s; \
+  } p[k] = temp; }
+
+void HeapSort(UInt32 *p, size_t size)
+{
+  if (size <= 1)
+    return;
+  p--;
+  {
+    size_t i = size / 2;
+    do
+    {
+      UInt32 temp = p[i];
+      size_t k = i;
+      HeapSortDown(p, k, size, temp)
+    }
+    while (--i != 0);
+  }
+  /*
+  do
+  {
+    size_t k = 1;
+    UInt32 temp = p[size];
+    p[size--] = p[1];
+    HeapSortDown(p, k, size, temp)
+  }
+  while (size > 1);
+  */
+  while (size > 3)
+  {
+    UInt32 temp = p[size];
+    size_t k = (p[3] > p[2]) ? 3 : 2;
+    p[size--] = p[1];
+    p[1] = p[k];
+    HeapSortDown(p, k, size, temp)
+  }
+  {
+    UInt32 temp = p[size];
+    p[size] = p[1];
+    if (size > 2 && p[2] < temp)
+    {
+      p[1] = p[2];
+      p[2] = temp;
+    }
+    else
+      p[1] = temp;
+  }
+}
+
+void HeapSort64(UInt64 *p, size_t size)
+{
+  if (size <= 1)
+    return;
+  p--;
+  {
+    size_t i = size / 2;
+    do
+    {
+      UInt64 temp = p[i];
+      size_t k = i;
+      HeapSortDown(p, k, size, temp)
+    }
+    while (--i != 0);
+  }
+  /*
+  do
+  {
+    size_t k = 1;
+    UInt64 temp = p[size];
+    p[size--] = p[1];
+    HeapSortDown(p, k, size, temp)
+  }
+  while (size > 1);
+  */
+  while (size > 3)
+  {
+    UInt64 temp = p[size];
+    size_t k = (p[3] > p[2]) ? 3 : 2;
+    p[size--] = p[1];
+    p[1] = p[k];
+    HeapSortDown(p, k, size, temp)
+  }
+  {
+    UInt64 temp = p[size];
+    p[size] = p[1];
+    if (size > 2 && p[2] < temp)
+    {
+      p[1] = p[2];
+      p[2] = temp;
+    }
+    else
+      p[1] = temp;
+  }
+}
+
+/*
+#define HeapSortRefDown(p, vals, n, size, temp) \
+  { size_t k = n; UInt32 val = vals[temp]; for (;;) { \
+    size_t s = (k << 1); \
+    if (s > size) break; \
+    if (s < size && vals[p[s + 1]] > vals[p[s]]) s++; \
+    if (val >= vals[p[s]]) break; \
+    p[k] = p[s]; k = s; \
+  } p[k] = temp; }
+
+void HeapSortRef(UInt32 *p, UInt32 *vals, size_t size)
+{
+  if (size <= 1)
+    return;
+  p--;
+  {
+    size_t i = size / 2;
+    do
+    {
+      UInt32 temp = p[i];
+      HeapSortRefDown(p, vals, i, size, temp);
+    }
+    while (--i != 0);
+  }
+  do
+  {
+    UInt32 temp = p[size];
+    p[size--] = p[1];
+    HeapSortRefDown(p, vals, 1, size, temp);
+  }
+  while (size > 1);
+}
+*/
diff --git a/deps/libchdr/deps/zlib-1.2.11/CMakeLists.txt b/deps/libchdr/deps/zlib-1.2.11/CMakeLists.txt
new file mode 100644
index 00000000..37b04b94
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/CMakeLists.txt
@@ -0,0 +1,27 @@
+add_library(zlib STATIC
+  zconf.h
+  zlib.h
+  adler32.c
+  compress.c
+  crc32.c
+  crc32.h
+  deflate.c
+  deflate.h
+  gzguts.h
+  infback.c
+  inffast.c
+  inffast.h
+  inffixed.h
+  inflate.c
+  inflate.h
+  inftrees.c
+  inftrees.h
+  trees.c
+  trees.h
+  uncompr.c
+  zutil.c
+  zutil.h
+)
+
+target_include_directories(zlib PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}")
+target_include_directories(zlib INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}")
diff --git a/deps/libchdr/deps/zlib-1.2.11/ChangeLog b/deps/libchdr/deps/zlib-1.2.11/ChangeLog
new file mode 100644
index 00000000..30199a65
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/ChangeLog
@@ -0,0 +1,1515 @@
+
+                ChangeLog file for zlib
+
+Changes in 1.2.11 (15 Jan 2017)
+- Fix deflate stored bug when pulling last block from window
+- Permit immediate deflateParams changes before any deflate input
+
+Changes in 1.2.10 (2 Jan 2017)
+- Avoid warnings on snprintf() return value
+- Fix bug in deflate_stored() for zero-length input
+- Fix bug in gzwrite.c that produced corrupt gzip files
+- Remove files to be installed before copying them in Makefile.in
+- Add warnings when compiling with assembler code
+
+Changes in 1.2.9 (31 Dec 2016)
+- Fix contrib/minizip to permit unzipping with desktop API [Zouzou]
+- Improve contrib/blast to return unused bytes
+- Assure that gzoffset() is correct when appending
+- Improve compress() and uncompress() to support large lengths
+- Fix bug in test/example.c where error code not saved
+- Remedy Coverity warning [Randers-Pehrson]
+- Improve speed of gzprintf() in transparent mode
+- Fix inflateInit2() bug when windowBits is 16 or 32
+- Change DEBUG macro to ZLIB_DEBUG
+- Avoid uninitialized access by gzclose_w()
+- Allow building zlib outside of the source directory
+- Fix bug that accepted invalid zlib header when windowBits is zero
+- Fix gzseek() problem on MinGW due to buggy _lseeki64 there
+- Loop on write() calls in gzwrite.c in case of non-blocking I/O
+- Add --warn (-w) option to ./configure for more compiler warnings
+- Reject a window size of 256 bytes if not using the zlib wrapper
+- Fix bug when level 0 used with Z_HUFFMAN or Z_RLE
+- Add --debug (-d) option to ./configure to define ZLIB_DEBUG
+- Fix bugs in creating a very large gzip header
+- Add uncompress2() function, which returns the input size used
+- Assure that deflateParams() will not switch functions mid-block
+- Dramatically speed up deflation for level 0 (storing)
+- Add gzfread(), duplicating the interface of fread()
+- Add gzfwrite(), duplicating the interface of fwrite()
+- Add deflateGetDictionary() function
+- Use snprintf() for later versions of Microsoft C
+- Fix *Init macros to use z_ prefix when requested
+- Replace as400 with os400 for OS/400 support [Monnerat]
+- Add crc32_z() and adler32_z() functions with size_t lengths
+- Update Visual Studio project files [AraHaan]
+
+Changes in 1.2.8 (28 Apr 2013)
+- Update contrib/minizip/iowin32.c for Windows RT [Vollant]
+- Do not force Z_CONST for C++
+- Clean up contrib/vstudio [RoÃ]
+- Correct spelling error in zlib.h
+- Fix mixed line endings in contrib/vstudio
+
+Changes in 1.2.7.3 (13 Apr 2013)
+- Fix version numbers and DLL names in contrib/vstudio/*/zlib.rc
+
+Changes in 1.2.7.2 (13 Apr 2013)
+- Change check for a four-byte type back to hexadecimal
+- Fix typo in win32/Makefile.msc
+- Add casts in gzwrite.c for pointer differences
+
+Changes in 1.2.7.1 (24 Mar 2013)
+- Replace use of unsafe string functions with snprintf if available
+- Avoid including stddef.h on Windows for Z_SOLO compile [Niessink]
+- Fix gzgetc undefine when Z_PREFIX set [Turk]
+- Eliminate use of mktemp in Makefile (not always available)
+- Fix bug in 'F' mode for gzopen()
+- Add inflateGetDictionary() function
+- Correct comment in deflate.h
+- Use _snprintf for snprintf in Microsoft C
+- On Darwin, only use /usr/bin/libtool if libtool is not Apple
+- Delete "--version" file if created by "ar --version" [Richard G.]
+- Fix configure check for veracity of compiler error return codes
+- Fix CMake compilation of static lib for MSVC2010 x64
+- Remove unused variable in infback9.c
+- Fix argument checks in gzlog_compress() and gzlog_write()
+- Clean up the usage of z_const and respect const usage within zlib
+- Clean up examples/gzlog.[ch] comparisons of different types
+- Avoid shift equal to bits in type (caused endless loop)
+- Fix uninitialized value bug in gzputc() introduced by const patches
+- Fix memory allocation error in examples/zran.c [Nor]
+- Fix bug where gzopen(), gzclose() would write an empty file
+- Fix bug in gzclose() when gzwrite() runs out of memory
+- Check for input buffer malloc failure in examples/gzappend.c
+- Add note to contrib/blast to use binary mode in stdio
+- Fix comparisons of differently signed integers in contrib/blast
+- Check for invalid code length codes in contrib/puff
+- Fix serious but very rare decompression bug in inftrees.c
+- Update inflateBack() comments, since inflate() can be faster
+- Use underscored I/O function names for WINAPI_FAMILY
+- Add _tr_flush_bits to the external symbols prefixed by --zprefix
+- Add contrib/vstudio/vc10 pre-build step for static only
+- Quote --version-script argument in CMakeLists.txt
+- Don't specify --version-script on Apple platforms in CMakeLists.txt
+- Fix casting error in contrib/testzlib/testzlib.c
+- Fix types in contrib/minizip to match result of get_crc_table()
+- Simplify contrib/vstudio/vc10 with 'd' suffix
+- Add TOP support to win32/Makefile.msc
+- Suport i686 and amd64 assembler builds in CMakeLists.txt
+- Fix typos in the use of _LARGEFILE64_SOURCE in zconf.h
+- Add vc11 and vc12 build files to contrib/vstudio
+- Add gzvprintf() as an undocumented function in zlib
+- Fix configure for Sun shell
+- Remove runtime check in configure for four-byte integer type
+- Add casts and consts to ease user conversion to C++
+- Add man pages for minizip and miniunzip
+- In Makefile uninstall, don't rm if preceding cd fails
+- Do not return Z_BUF_ERROR if deflateParam() has nothing to write
+
+Changes in 1.2.7 (2 May 2012)
+- Replace use of memmove() with a simple copy for portability
+- Test for existence of strerror
+- Restore gzgetc_ for backward compatibility with 1.2.6
+- Fix build with non-GNU make on Solaris
+- Require gcc 4.0 or later on Mac OS X to use the hidden attribute
+- Include unistd.h for Watcom C
+- Use __WATCOMC__ instead of __WATCOM__
+- Do not use the visibility attribute if NO_VIZ defined
+- Improve the detection of no hidden visibility attribute
+- Avoid using __int64 for gcc or solo compilation
+- Cast to char * in gzprintf to avoid warnings [Zinser]
+- Fix make_vms.com for VAX [Zinser]
+- Don't use library or built-in byte swaps
+- Simplify test and use of gcc hidden attribute
+- Fix bug in gzclose_w() when gzwrite() fails to allocate memory
+- Add "x" (O_EXCL) and "e" (O_CLOEXEC) modes support to gzopen()
+- Fix bug in test/minigzip.c for configure --solo
+- Fix contrib/vstudio project link errors [Mohanathas]
+- Add ability to choose the builder in make_vms.com [Schweda]
+- Add DESTDIR support to mingw32 win32/Makefile.gcc
+- Fix comments in win32/Makefile.gcc for proper usage
+- Allow overriding the default install locations for cmake
+- Generate and install the pkg-config file with cmake
+- Build both a static and a shared version of zlib with cmake
+- Include version symbols for cmake builds
+- If using cmake with MSVC, add the source directory to the includes
+- Remove unneeded EXTRA_CFLAGS from win32/Makefile.gcc [Truta]
+- Move obsolete emx makefile to old [Truta]
+- Allow the use of -Wundef when compiling or using zlib
+- Avoid the use of the -u option with mktemp
+- Improve inflate() documentation on the use of Z_FINISH
+- Recognize clang as gcc
+- Add gzopen_w() in Windows for wide character path names
+- Rename zconf.h in CMakeLists.txt to move it out of the way
+- Add source directory in CMakeLists.txt for building examples
+- Look in build directory for zlib.pc in CMakeLists.txt
+- Remove gzflags from zlibvc.def in vc9 and vc10
+- Fix contrib/minizip compilation in the MinGW environment
+- Update ./configure for Solaris, support --64 [Mooney]
+- Remove -R. from Solaris shared build (possible security issue)
+- Avoid race condition for parallel make (-j) running example
+- Fix type mismatch between get_crc_table() and crc_table
+- Fix parsing of version with "-" in CMakeLists.txt [Snider, Ziegler]
+- Fix the path to zlib.map in CMakeLists.txt
+- Force the native libtool in Mac OS X to avoid GNU libtool [Beebe]
+- Add instructions to win32/Makefile.gcc for shared install [Torri]
+
+Changes in 1.2.6.1 (12 Feb 2012)
+- Avoid the use of the Objective-C reserved name "id"
+- Include io.h in gzguts.h for Microsoft compilers
+- Fix problem with ./configure --prefix and gzgetc macro
+- Include gz_header definition when compiling zlib solo
+- Put gzflags() functionality back in zutil.c
+- Avoid library header include in crc32.c for Z_SOLO
+- Use name in GCC_CLASSIC as C compiler for coverage testing, if set
+- Minor cleanup in contrib/minizip/zip.c [Vollant]
+- Update make_vms.com [Zinser]
+- Remove unnecessary gzgetc_ function
+- Use optimized byte swap operations for Microsoft and GNU [Snyder]
+- Fix minor typo in zlib.h comments [Rzesniowiecki]
+
+Changes in 1.2.6 (29 Jan 2012)
+- Update the Pascal interface in contrib/pascal
+- Fix function numbers for gzgetc_ in zlibvc.def files
+- Fix configure.ac for contrib/minizip [Schiffer]
+- Fix large-entry detection in minizip on 64-bit systems [Schiffer]
+- Have ./configure use the compiler return code for error indication
+- Fix CMakeLists.txt for cross compilation [McClure]
+- Fix contrib/minizip/zip.c for 64-bit architectures [Dalsnes]
+- Fix compilation of contrib/minizip on FreeBSD [Marquez]
+- Correct suggested usages in win32/Makefile.msc [Shachar, Horvath]
+- Include io.h for Turbo C / Borland C on all platforms [Truta]
+- Make version explicit in contrib/minizip/configure.ac [Bosmans]
+- Avoid warning for no encryption in contrib/minizip/zip.c [Vollant]
+- Minor cleanup up contrib/minizip/unzip.c [Vollant]
+- Fix bug when compiling minizip with C++ [Vollant]
+- Protect for long name and extra fields in contrib/minizip [Vollant]
+- Avoid some warnings in contrib/minizip [Vollant]
+- Add -I../.. -L../.. to CFLAGS for minizip and miniunzip
+- Add missing libs to minizip linker command
+- Add support for VPATH builds in contrib/minizip
+- Add an --enable-demos option to contrib/minizip/configure
+- Add the generation of configure.log by ./configure
+- Exit when required parameters not provided to win32/Makefile.gcc
+- Have gzputc return the character written instead of the argument
+- Use the -m option on ldconfig for BSD systems [Tobias]
+- Correct in zlib.map when deflateResetKeep was added
+
+Changes in 1.2.5.3 (15 Jan 2012)
+- Restore gzgetc function for binary compatibility
+- Do not use _lseeki64 under Borland C++ [Truta]
+- Update win32/Makefile.msc to build test/*.c [Truta]
+- Remove old/visualc6 given CMakefile and other alternatives
+- Update AS400 build files and documentation [Monnerat]
+- Update win32/Makefile.gcc to build test/*.c [Truta]
+- Permit stronger flushes after Z_BLOCK flushes
+- Avoid extraneous empty blocks when doing empty flushes
+- Permit Z_NULL arguments to deflatePending
+- Allow deflatePrime() to insert bits in the middle of a stream
+- Remove second empty static block for Z_PARTIAL_FLUSH
+- Write out all of the available bits when using Z_BLOCK
+- Insert the first two strings in the hash table after a flush
+
+Changes in 1.2.5.2 (17 Dec 2011)
+- fix ld error: unable to find version dependency 'ZLIB_1.2.5'
+- use relative symlinks for shared libs
+- Avoid searching past window for Z_RLE strategy
+- Assure that high-water mark initialization is always applied in deflate
+- Add assertions to fill_window() in deflate.c to match comments
+- Update python link in README
+- Correct spelling error in gzread.c
+- Fix bug in gzgets() for a concatenated empty gzip stream
+- Correct error in comment for gz_make()
+- Change gzread() and related to ignore junk after gzip streams
+- Allow gzread() and related to continue after gzclearerr()
+- Allow gzrewind() and gzseek() after a premature end-of-file
+- Simplify gzseek() now that raw after gzip is ignored
+- Change gzgetc() to a macro for speed (~40% speedup in testing)
+- Fix gzclose() to return the actual error last encountered
+- Always add large file support for windows
+- Include zconf.h for windows large file support
+- Include zconf.h.cmakein for windows large file support
+- Update zconf.h.cmakein on make distclean
+- Merge vestigial vsnprintf determination from zutil.h to gzguts.h
+- Clarify how gzopen() appends in zlib.h comments
+- Correct documentation of gzdirect() since junk at end now ignored
+- Add a transparent write mode to gzopen() when 'T' is in the mode
+- Update python link in zlib man page
+- Get inffixed.h and MAKEFIXED result to match
+- Add a ./config --solo option to make zlib subset with no library use
+- Add undocumented inflateResetKeep() function for CAB file decoding
+- Add --cover option to ./configure for gcc coverage testing
+- Add #define ZLIB_CONST option to use const in the z_stream interface
+- Add comment to gzdopen() in zlib.h to use dup() when using fileno()
+- Note behavior of uncompress() to provide as much data as it can
+- Add files in contrib/minizip to aid in building libminizip
+- Split off AR options in Makefile.in and configure
+- Change ON macro to Z_ARG to avoid application conflicts
+- Facilitate compilation with Borland C++ for pragmas and vsnprintf
+- Include io.h for Turbo C / Borland C++
+- Move example.c and minigzip.c to test/
+- Simplify incomplete code table filling in inflate_table()
+- Remove code from inflate.c and infback.c that is impossible to execute
+- Test the inflate code with full coverage
+- Allow deflateSetDictionary, inflateSetDictionary at any time (in raw)
+- Add deflateResetKeep and fix inflateResetKeep to retain dictionary
+- Fix gzwrite.c to accommodate reduced memory zlib compilation
+- Have inflate() with Z_FINISH avoid the allocation of a window
+- Do not set strm->adler when doing raw inflate
+- Fix gzeof() to behave just like feof() when read is not past end of file
+- Fix bug in gzread.c when end-of-file is reached
+- Avoid use of Z_BUF_ERROR in gz* functions except for premature EOF
+- Document gzread() capability to read concurrently written files
+- Remove hard-coding of resource compiler in CMakeLists.txt [Blammo]
+
+Changes in 1.2.5.1 (10 Sep 2011)
+- Update FAQ entry on shared builds (#13)
+- Avoid symbolic argument to chmod in Makefile.in
+- Fix bug and add consts in contrib/puff [Oberhumer]
+- Update contrib/puff/zeros.raw test file to have all block types
+- Add full coverage test for puff in contrib/puff/Makefile
+- Fix static-only-build install in Makefile.in
+- Fix bug in unzGetCurrentFileInfo() in contrib/minizip [Kuno]
+- Add libz.a dependency to shared in Makefile.in for parallel builds
+- Spell out "number" (instead of "nb") in zlib.h for total_in, total_out
+- Replace $(...) with `...` in configure for non-bash sh [Bowler]
+- Add darwin* to Darwin* and solaris* to SunOS\ 5* in configure [Groffen]
+- Add solaris* to Linux* in configure to allow gcc use [Groffen]
+- Add *bsd* to Linux* case in configure [Bar-Lev]
+- Add inffast.obj to dependencies in win32/Makefile.msc
+- Correct spelling error in deflate.h [Kohler]
+- Change libzdll.a again to libz.dll.a (!) in win32/Makefile.gcc
+- Add test to configure for GNU C looking for gcc in output of $cc -v
+- Add zlib.pc generation to win32/Makefile.gcc [Weigelt]
+- Fix bug in zlib.h for _FILE_OFFSET_BITS set and _LARGEFILE64_SOURCE not
+- Add comment in zlib.h that adler32_combine with len2 < 0 makes no sense
+- Make NO_DIVIDE option in adler32.c much faster (thanks to John Reiser)
+- Make stronger test in zconf.h to include unistd.h for LFS
+- Apply Darwin patches for 64-bit file offsets to contrib/minizip [Slack]
+- Fix zlib.h LFS support when Z_PREFIX used
+- Add updated as400 support (removed from old) [Monnerat]
+- Avoid deflate sensitivity to volatile input data
+- Avoid division in adler32_combine for NO_DIVIDE
+- Clarify the use of Z_FINISH with deflateBound() amount of space
+- Set binary for output file in puff.c
+- Use u4 type for crc_table to avoid conversion warnings
+- Apply casts in zlib.h to avoid conversion warnings
+- Add OF to prototypes for adler32_combine_ and crc32_combine_ [Miller]
+- Improve inflateSync() documentation to note indeterminancy
+- Add deflatePending() function to return the amount of pending output
+- Correct the spelling of "specification" in FAQ [Randers-Pehrson]
+- Add a check in configure for stdarg.h, use for gzprintf()
+- Check that pointers fit in ints when gzprint() compiled old style
+- Add dummy name before $(SHAREDLIBV) in Makefile [Bar-Lev, Bowler]
+- Delete line in configure that adds -L. libz.a to LDFLAGS [Weigelt]
+- Add debug records in assmebler code [Londer]
+- Update RFC references to use http://tools.ietf.org/html/... [Li]
+- Add --archs option, use of libtool to configure for Mac OS X [Borstel]
+
+Changes in 1.2.5 (19 Apr 2010)
+- Disable visibility attribute in win32/Makefile.gcc [Bar-Lev]
+- Default to libdir as sharedlibdir in configure [Nieder]
+- Update copyright dates on modified source files
+- Update trees.c to be able to generate modified trees.h
+- Exit configure for MinGW, suggesting win32/Makefile.gcc
+- Check for NULL path in gz_open [Homurlu]
+
+Changes in 1.2.4.5 (18 Apr 2010)
+- Set sharedlibdir in configure [Torok]
+- Set LDFLAGS in Makefile.in [Bar-Lev]
+- Avoid mkdir objs race condition in Makefile.in [Bowler]
+- Add ZLIB_INTERNAL in front of internal inter-module functions and arrays
+- Define ZLIB_INTERNAL to hide internal functions and arrays for GNU C
+- Don't use hidden attribute when it is a warning generator (e.g. Solaris)
+
+Changes in 1.2.4.4 (18 Apr 2010)
+- Fix CROSS_PREFIX executable testing, CHOST extract, mingw* [Torok]
+- Undefine _LARGEFILE64_SOURCE in zconf.h if it is zero, but not if empty
+- Try to use bash or ksh regardless of functionality of /bin/sh
+- Fix configure incompatibility with NetBSD sh
+- Remove attempt to run under bash or ksh since have better NetBSD fix
+- Fix win32/Makefile.gcc for MinGW [Bar-Lev]
+- Add diagnostic messages when using CROSS_PREFIX in configure
+- Added --sharedlibdir option to configure [Weigelt]
+- Use hidden visibility attribute when available [Frysinger]
+
+Changes in 1.2.4.3 (10 Apr 2010)
+- Only use CROSS_PREFIX in configure for ar and ranlib if they exist
+- Use CROSS_PREFIX for nm [Bar-Lev]
+- Assume _LARGEFILE64_SOURCE defined is equivalent to true
+- Avoid use of undefined symbols in #if with && and ||
+- Make *64 prototypes in gzguts.h consistent with functions
+- Add -shared load option for MinGW in configure [Bowler]
+- Move z_off64_t to public interface, use instead of off64_t
+- Remove ! from shell test in configure (not portable to Solaris)
+- Change +0 macro tests to -0 for possibly increased portability
+
+Changes in 1.2.4.2 (9 Apr 2010)
+- Add consistent carriage returns to readme.txt's in masmx86 and masmx64
+- Really provide prototypes for *64 functions when building without LFS
+- Only define unlink() in minigzip.c if unistd.h not included
+- Update README to point to contrib/vstudio project files
+- Move projects/vc6 to old/ and remove projects/
+- Include stdlib.h in minigzip.c for setmode() definition under WinCE
+- Clean up assembler builds in win32/Makefile.msc [Rowe]
+- Include sys/types.h for Microsoft for off_t definition
+- Fix memory leak on error in gz_open()
+- Symbolize nm as $NM in configure [Weigelt]
+- Use TEST_LDSHARED instead of LDSHARED to link test programs [Weigelt]
+- Add +0 to _FILE_OFFSET_BITS and _LFS64_LARGEFILE in case not defined
+- Fix bug in gzeof() to take into account unused input data
+- Avoid initialization of structures with variables in puff.c
+- Updated win32/README-WIN32.txt [Rowe]
+
+Changes in 1.2.4.1 (28 Mar 2010)
+- Remove the use of [a-z] constructs for sed in configure [gentoo 310225]
+- Remove $(SHAREDLIB) from LIBS in Makefile.in [Creech]
+- Restore "for debugging" comment on sprintf() in gzlib.c
+- Remove fdopen for MVS from gzguts.h
+- Put new README-WIN32.txt in win32 [Rowe]
+- Add check for shell to configure and invoke another shell if needed
+- Fix big fat stinking bug in gzseek() on uncompressed files
+- Remove vestigial F_OPEN64 define in zutil.h
+- Set and check the value of _LARGEFILE_SOURCE and _LARGEFILE64_SOURCE
+- Avoid errors on non-LFS systems when applications define LFS macros
+- Set EXE to ".exe" in configure for MINGW [Kahle]
+- Match crc32() in crc32.c exactly to the prototype in zlib.h [Sherrill]
+- Add prefix for cross-compilation in win32/makefile.gcc [Bar-Lev]
+- Add DLL install in win32/makefile.gcc [Bar-Lev]
+- Allow Linux* or linux* from uname in configure [Bar-Lev]
+- Allow ldconfig to be redefined in configure and Makefile.in [Bar-Lev]
+- Add cross-compilation prefixes to configure [Bar-Lev]
+- Match type exactly in gz_load() invocation in gzread.c
+- Match type exactly of zcalloc() in zutil.c to zlib.h alloc_func
+- Provide prototypes for *64 functions when building zlib without LFS
+- Don't use -lc when linking shared library on MinGW
+- Remove errno.h check in configure and vestigial errno code in zutil.h
+
+Changes in 1.2.4 (14 Mar 2010)
+- Fix VER3 extraction in configure for no fourth subversion
+- Update zlib.3, add docs to Makefile.in to make .pdf out of it
+- Add zlib.3.pdf to distribution
+- Don't set error code in gzerror() if passed pointer is NULL
+- Apply destination directory fixes to CMakeLists.txt [Lowman]
+- Move #cmakedefine's to a new zconf.in.cmakein
+- Restore zconf.h for builds that don't use configure or cmake
+- Add distclean to dummy Makefile for convenience
+- Update and improve INDEX, README, and FAQ
+- Update CMakeLists.txt for the return of zconf.h [Lowman]
+- Update contrib/vstudio/vc9 and vc10 [Vollant]
+- Change libz.dll.a back to libzdll.a in win32/Makefile.gcc
+- Apply license and readme changes to contrib/asm686 [Raiter]
+- Check file name lengths and add -c option in minigzip.c [Li]
+- Update contrib/amd64 and contrib/masmx86/ [Vollant]
+- Avoid use of "eof" parameter in trees.c to not shadow library variable
+- Update make_vms.com for removal of zlibdefs.h [Zinser]
+- Update assembler code and vstudio projects in contrib [Vollant]
+- Remove outdated assembler code contrib/masm686 and contrib/asm586
+- Remove old vc7 and vc8 from contrib/vstudio
+- Update win32/Makefile.msc, add ZLIB_VER_SUBREVISION [Rowe]
+- Fix memory leaks in gzclose_r() and gzclose_w(), file leak in gz_open()
+- Add contrib/gcc_gvmat64 for longest_match and inflate_fast [Vollant]
+- Remove *64 functions from win32/zlib.def (they're not 64-bit yet)
+- Fix bug in void-returning vsprintf() case in gzwrite.c
+- Fix name change from inflate.h in contrib/inflate86/inffas86.c
+- Check if temporary file exists before removing in make_vms.com [Zinser]
+- Fix make install and uninstall for --static option
+- Fix usage of _MSC_VER in gzguts.h and zutil.h [Truta]
+- Update readme.txt in contrib/masmx64 and masmx86 to assemble
+
+Changes in 1.2.3.9 (21 Feb 2010)
+- Expunge gzio.c
+- Move as400 build information to old
+- Fix updates in contrib/minizip and contrib/vstudio
+- Add const to vsnprintf test in configure to avoid warnings [Weigelt]
+- Delete zconf.h (made by configure) [Weigelt]
+- Change zconf.in.h to zconf.h.in per convention [Weigelt]
+- Check for NULL buf in gzgets()
+- Return empty string for gzgets() with len == 1 (like fgets())
+- Fix description of gzgets() in zlib.h for end-of-file, NULL return
+- Update minizip to 1.1 [Vollant]
+- Avoid MSVC loss of data warnings in gzread.c, gzwrite.c
+- Note in zlib.h that gzerror() should be used to distinguish from EOF
+- Remove use of snprintf() from gzlib.c
+- Fix bug in gzseek()
+- Update contrib/vstudio, adding vc9 and vc10 [Kuno, Vollant]
+- Fix zconf.h generation in CMakeLists.txt [Lowman]
+- Improve comments in zconf.h where modified by configure
+
+Changes in 1.2.3.8 (13 Feb 2010)
+- Clean up text files (tabs, trailing whitespace, etc.) [Oberhumer]
+- Use z_off64_t in gz_zero() and gz_skip() to match state->skip
+- Avoid comparison problem when sizeof(int) == sizeof(z_off64_t)
+- Revert to Makefile.in from 1.2.3.6 (live with the clutter)
+- Fix missing error return in gzflush(), add zlib.h note
+- Add *64 functions to zlib.map [Levin]
+- Fix signed/unsigned comparison in gz_comp()
+- Use SFLAGS when testing shared linking in configure
+- Add --64 option to ./configure to use -m64 with gcc
+- Fix ./configure --help to correctly name options
+- Have make fail if a test fails [Levin]
+- Avoid buffer overrun in contrib/masmx64/gvmat64.asm [Simpson]
+- Remove assembler object files from contrib
+
+Changes in 1.2.3.7 (24 Jan 2010)
+- Always gzopen() with O_LARGEFILE if available
+- Fix gzdirect() to work immediately after gzopen() or gzdopen()
+- Make gzdirect() more precise when the state changes while reading
+- Improve zlib.h documentation in many places
+- Catch memory allocation failure in gz_open()
+- Complete close operation if seek forward in gzclose_w() fails
+- Return Z_ERRNO from gzclose_r() if close() fails
+- Return Z_STREAM_ERROR instead of EOF for gzclose() being passed NULL
+- Return zero for gzwrite() errors to match zlib.h description
+- Return -1 on gzputs() error to match zlib.h description
+- Add zconf.in.h to allow recovery from configure modification [Weigelt]
+- Fix static library permissions in Makefile.in [Weigelt]
+- Avoid warnings in configure tests that hide functionality [Weigelt]
+- Add *BSD and DragonFly to Linux case in configure [gentoo 123571]
+- Change libzdll.a to libz.dll.a in win32/Makefile.gcc [gentoo 288212]
+- Avoid access of uninitialized data for first inflateReset2 call [Gomes]
+- Keep object files in subdirectories to reduce the clutter somewhat
+- Remove default Makefile and zlibdefs.h, add dummy Makefile
+- Add new external functions to Z_PREFIX, remove duplicates, z_z_ -> z_
+- Remove zlibdefs.h completely -- modify zconf.h instead
+
+Changes in 1.2.3.6 (17 Jan 2010)
+- Avoid void * arithmetic in gzread.c and gzwrite.c
+- Make compilers happier with const char * for gz_error message
+- Avoid unused parameter warning in inflate.c
+- Avoid signed-unsigned comparison warning in inflate.c
+- Indent #pragma's for traditional C
+- Fix usage of strwinerror() in glib.c, change to gz_strwinerror()
+- Correct email address in configure for system options
+- Update make_vms.com and add make_vms.com to contrib/minizip [Zinser]
+- Update zlib.map [Brown]
+- Fix Makefile.in for Solaris 10 make of example64 and minizip64 [Torok]
+- Apply various fixes to CMakeLists.txt [Lowman]
+- Add checks on len in gzread() and gzwrite()
+- Add error message for no more room for gzungetc()
+- Remove zlib version check in gzwrite()
+- Defer compression of gzprintf() result until need to
+- Use snprintf() in gzdopen() if available
+- Remove USE_MMAP configuration determination (only used by minigzip)
+- Remove examples/pigz.c (available separately)
+- Update examples/gun.c to 1.6
+
+Changes in 1.2.3.5 (8 Jan 2010)
+- Add space after #if in zutil.h for some compilers
+- Fix relatively harmless bug in deflate_fast() [Exarevsky]
+- Fix same problem in deflate_slow()
+- Add $(SHAREDLIBV) to LIBS in Makefile.in [Brown]
+- Add deflate_rle() for faster Z_RLE strategy run-length encoding
+- Add deflate_huff() for faster Z_HUFFMAN_ONLY encoding
+- Change name of "write" variable in inffast.c to avoid library collisions
+- Fix premature EOF from gzread() in gzio.c [Brown]
+- Use zlib header window size if windowBits is 0 in inflateInit2()
+- Remove compressBound() call in deflate.c to avoid linking compress.o
+- Replace use of errno in gz* with functions, support WinCE [Alves]
+- Provide alternative to perror() in minigzip.c for WinCE [Alves]
+- Don't use _vsnprintf on later versions of MSVC [Lowman]
+- Add CMake build script and input file [Lowman]
+- Update contrib/minizip to 1.1 [Svensson, Vollant]
+- Moved nintendods directory from contrib to .
+- Replace gzio.c with a new set of routines with the same functionality
+- Add gzbuffer(), gzoffset(), gzclose_r(), gzclose_w() as part of above
+- Update contrib/minizip to 1.1b
+- Change gzeof() to return 0 on error instead of -1 to agree with zlib.h
+
+Changes in 1.2.3.4 (21 Dec 2009)
+- Use old school .SUFFIXES in Makefile.in for FreeBSD compatibility
+- Update comments in configure and Makefile.in for default --shared
+- Fix test -z's in configure [Marquess]
+- Build examplesh and minigzipsh when not testing
+- Change NULL's to Z_NULL's in deflate.c and in comments in zlib.h
+- Import LDFLAGS from the environment in configure
+- Fix configure to populate SFLAGS with discovered CFLAGS options
+- Adapt make_vms.com to the new Makefile.in [Zinser]
+- Add zlib2ansi script for C++ compilation [Marquess]
+- Add _FILE_OFFSET_BITS=64 test to make test (when applicable)
+- Add AMD64 assembler code for longest match to contrib [Teterin]
+- Include options from $SFLAGS when doing $LDSHARED
+- Simplify 64-bit file support by introducing z_off64_t type
+- Make shared object files in objs directory to work around old Sun cc
+- Use only three-part version number for Darwin shared compiles
+- Add rc option to ar in Makefile.in for when ./configure not run
+- Add -WI,-rpath,. to LDFLAGS for OSF 1 V4*
+- Set LD_LIBRARYN32_PATH for SGI IRIX shared compile
+- Protect against _FILE_OFFSET_BITS being defined when compiling zlib
+- Rename Makefile.in targets allstatic to static and allshared to shared
+- Fix static and shared Makefile.in targets to be independent
+- Correct error return bug in gz_open() by setting state [Brown]
+- Put spaces before ;;'s in configure for better sh compatibility
+- Add pigz.c (parallel implementation of gzip) to examples/
+- Correct constant in crc32.c to UL [Leventhal]
+- Reject negative lengths in crc32_combine()
+- Add inflateReset2() function to work like inflateEnd()/inflateInit2()
+- Include sys/types.h for _LARGEFILE64_SOURCE [Brown]
+- Correct typo in doc/algorithm.txt [Janik]
+- Fix bug in adler32_combine() [Zhu]
+- Catch missing-end-of-block-code error in all inflates and in puff
+    Assures that random input to inflate eventually results in an error
+- Added enough.c (calculation of ENOUGH for inftrees.h) to examples/
+- Update ENOUGH and its usage to reflect discovered bounds
+- Fix gzerror() error report on empty input file [Brown]
+- Add ush casts in trees.c to avoid pedantic runtime errors
+- Fix typo in zlib.h uncompress() description [Reiss]
+- Correct inflate() comments with regard to automatic header detection
+- Remove deprecation comment on Z_PARTIAL_FLUSH (it stays)
+- Put new version of gzlog (2.0) in examples with interruption recovery
+- Add puff compile option to permit invalid distance-too-far streams
+- Add puff TEST command options, ability to read piped input
+- Prototype the *64 functions in zlib.h when _FILE_OFFSET_BITS == 64, but
+  _LARGEFILE64_SOURCE not defined
+- Fix Z_FULL_FLUSH to truly erase the past by resetting s->strstart
+- Fix deflateSetDictionary() to use all 32K for output consistency
+- Remove extraneous #define MIN_LOOKAHEAD in deflate.c (in deflate.h)
+- Clear bytes after deflate lookahead to avoid use of uninitialized data
+- Change a limit in inftrees.c to be more transparent to Coverity Prevent
+- Update win32/zlib.def with exported symbols from zlib.h
+- Correct spelling errors in zlib.h [Willem, Sobrado]
+- Allow Z_BLOCK for deflate() to force a new block
+- Allow negative bits in inflatePrime() to delete existing bit buffer
+- Add Z_TREES flush option to inflate() to return at end of trees
+- Add inflateMark() to return current state information for random access
+- Add Makefile for NintendoDS to contrib [Costa]
+- Add -w in configure compile tests to avoid spurious warnings [Beucler]
+- Fix typos in zlib.h comments for deflateSetDictionary()
+- Fix EOF detection in transparent gzread() [Maier]
+
+Changes in 1.2.3.3 (2 October 2006)
+- Make --shared the default for configure, add a --static option
+- Add compile option to permit invalid distance-too-far streams
+- Add inflateUndermine() function which is required to enable above
+- Remove use of "this" variable name for C++ compatibility [Marquess]
+- Add testing of shared library in make test, if shared library built
+- Use ftello() and fseeko() if available instead of ftell() and fseek()
+- Provide two versions of all functions that use the z_off_t type for
+  binary compatibility -- a normal version and a 64-bit offset version,
+  per the Large File Support Extension when _LARGEFILE64_SOURCE is
+  defined; use the 64-bit versions by default when _FILE_OFFSET_BITS
+  is defined to be 64
+- Add a --uname= option to configure to perhaps help with cross-compiling
+
+Changes in 1.2.3.2 (3 September 2006)
+- Turn off silly Borland warnings [Hay]
+- Use off64_t and define _LARGEFILE64_SOURCE when present
+- Fix missing dependency on inffixed.h in Makefile.in
+- Rig configure --shared to build both shared and static [Teredesai, Truta]
+- Remove zconf.in.h and instead create a new zlibdefs.h file
+- Fix contrib/minizip/unzip.c non-encrypted after encrypted [Vollant]
+- Add treebuild.xml (see http://treebuild.metux.de/) [Weigelt]
+
+Changes in 1.2.3.1 (16 August 2006)
+- Add watcom directory with OpenWatcom make files [Daniel]
+- Remove #undef of FAR in zconf.in.h for MVS [Fedtke]
+- Update make_vms.com [Zinser]
+- Use -fPIC for shared build in configure [Teredesai, Nicholson]
+- Use only major version number for libz.so on IRIX and OSF1 [Reinholdtsen]
+- Use fdopen() (not _fdopen()) for Interix in zutil.h [BÃ¤ck]
+- Add some FAQ entries about the contrib directory
+- Update the MVS question in the FAQ
+- Avoid extraneous reads after EOF in gzio.c [Brown]
+- Correct spelling of "successfully" in gzio.c [Randers-Pehrson]
+- Add comments to zlib.h about gzerror() usage [Brown]
+- Set extra flags in gzip header in gzopen() like deflate() does
+- Make configure options more compatible with double-dash conventions
+  [Weigelt]
+- Clean up compilation under Solaris SunStudio cc [Rowe, Reinholdtsen]
+- Fix uninstall target in Makefile.in [Truta]
+- Add pkgconfig support [Weigelt]
+- Use $(DESTDIR) macro in Makefile.in [Reinholdtsen, Weigelt]
+- Replace set_data_type() with a more accurate detect_data_type() in
+  trees.c, according to the txtvsbin.txt document [Truta]
+- Swap the order of #include <stdio.h> and #include "zlib.h" in
+  gzio.c, example.c and minigzip.c [Truta]
+- Shut up annoying VS2005 warnings about standard C deprecation [Rowe,
+  Truta] (where?)
+- Fix target "clean" from win32/Makefile.bor [Truta]
+- Create .pdb and .manifest files in win32/makefile.msc [Ziegler, Rowe]
+- Update zlib www home address in win32/DLL_FAQ.txt [Truta]
+- Update contrib/masmx86/inffas32.asm for VS2005 [Vollant, Van Wassenhove]
+- Enable browse info in the "Debug" and "ASM Debug" configurations in
+  the Visual C++ 6 project, and set (non-ASM) "Debug" as default [Truta]
+- Add pkgconfig support [Weigelt]
+- Add ZLIB_VER_MAJOR, ZLIB_VER_MINOR and ZLIB_VER_REVISION in zlib.h,
+  for use in win32/zlib1.rc [Polushin, Rowe, Truta]
+- Add a document that explains the new text detection scheme to
+  doc/txtvsbin.txt [Truta]
+- Add rfc1950.txt, rfc1951.txt and rfc1952.txt to doc/ [Truta]
+- Move algorithm.txt into doc/ [Truta]
+- Synchronize FAQ with website
+- Fix compressBound(), was low for some pathological cases [Fearnley]
+- Take into account wrapper variations in deflateBound()
+- Set examples/zpipe.c input and output to binary mode for Windows
+- Update examples/zlib_how.html with new zpipe.c (also web site)
+- Fix some warnings in examples/gzlog.c and examples/zran.c (it seems
+  that gcc became pickier in 4.0)
+- Add zlib.map for Linux: "All symbols from zlib-1.1.4 remain
+  un-versioned, the patch adds versioning only for symbols introduced in
+  zlib-1.2.0 or later.  It also declares as local those symbols which are
+  not designed to be exported." [Levin]
+- Update Z_PREFIX list in zconf.in.h, add --zprefix option to configure
+- Do not initialize global static by default in trees.c, add a response
+  NO_INIT_GLOBAL_POINTERS to initialize them if needed [Marquess]
+- Don't use strerror() in gzio.c under WinCE [Yakimov]
+- Don't use errno.h in zutil.h under WinCE [Yakimov]
+- Move arguments for AR to its usage to allow replacing ar [Marot]
+- Add HAVE_VISIBILITY_PRAGMA in zconf.in.h for Mozilla [Randers-Pehrson]
+- Improve inflateInit() and inflateInit2() documentation
+- Fix structure size comment in inflate.h
+- Change configure help option from --h* to --help [Santos]
+
+Changes in 1.2.3 (18 July 2005)
+- Apply security vulnerability fixes to contrib/infback9 as well
+- Clean up some text files (carriage returns, trailing space)
+- Update testzlib, vstudio, masmx64, and masmx86 in contrib [Vollant]
+
+Changes in 1.2.2.4 (11 July 2005)
+- Add inflatePrime() function for starting inflation at bit boundary
+- Avoid some Visual C warnings in deflate.c
+- Avoid more silly Visual C warnings in inflate.c and inftrees.c for 64-bit
+  compile
+- Fix some spelling errors in comments [Betts]
+- Correct inflateInit2() error return documentation in zlib.h
+- Add zran.c example of compressed data random access to examples
+  directory, shows use of inflatePrime()
+- Fix cast for assignments to strm->state in inflate.c and infback.c
+- Fix zlibCompileFlags() in zutil.c to use 1L for long shifts [Oberhumer]
+- Move declarations of gf2 functions to right place in crc32.c [Oberhumer]
+- Add cast in trees.c t avoid a warning [Oberhumer]
+- Avoid some warnings in fitblk.c, gun.c, gzjoin.c in examples [Oberhumer]
+- Update make_vms.com [Zinser]
+- Initialize state->write in inflateReset() since copied in inflate_fast()
+- Be more strict on incomplete code sets in inflate_table() and increase
+  ENOUGH and MAXD -- this repairs a possible security vulnerability for
+  invalid inflate input.  Thanks to Tavis Ormandy and Markus Oberhumer for
+  discovering the vulnerability and providing test cases.
+- Add ia64 support to configure for HP-UX [Smith]
+- Add error return to gzread() for format or i/o error [Levin]
+- Use malloc.h for OS/2 [Necasek]
+
+Changes in 1.2.2.3 (27 May 2005)
+- Replace 1U constants in inflate.c and inftrees.c for 64-bit compile
+- Typecast fread() return values in gzio.c [Vollant]
+- Remove trailing space in minigzip.c outmode (VC++ can't deal with it)
+- Fix crc check bug in gzread() after gzungetc() [Heiner]
+- Add the deflateTune() function to adjust internal compression parameters
+- Add a fast gzip decompressor, gun.c, to examples (use of inflateBack)
+- Remove an incorrect assertion in examples/zpipe.c
+- Add C++ wrapper in infback9.h [Donais]
+- Fix bug in inflateCopy() when decoding fixed codes
+- Note in zlib.h how much deflateSetDictionary() actually uses
+- Remove USE_DICT_HEAD in deflate.c (would mess up inflate if used)
+- Add _WIN32_WCE to define WIN32 in zconf.in.h [Spencer]
+- Don't include stderr.h or errno.h for _WIN32_WCE in zutil.h [Spencer]
+- Add gzdirect() function to indicate transparent reads
+- Update contrib/minizip [Vollant]
+- Fix compilation of deflate.c when both ASMV and FASTEST [Oberhumer]
+- Add casts in crc32.c to avoid warnings [Oberhumer]
+- Add contrib/masmx64 [Vollant]
+- Update contrib/asm586, asm686, masmx86, testzlib, vstudio [Vollant]
+
+Changes in 1.2.2.2 (30 December 2004)
+- Replace structure assignments in deflate.c and inflate.c with zmemcpy to
+  avoid implicit memcpy calls (portability for no-library compilation)
+- Increase sprintf() buffer size in gzdopen() to allow for large numbers
+- Add INFLATE_STRICT to check distances against zlib header
+- Improve WinCE errno handling and comments [Chang]
+- Remove comment about no gzip header processing in FAQ
+- Add Z_FIXED strategy option to deflateInit2() to force fixed trees
+- Add updated make_vms.com [Coghlan], update README
+- Create a new "examples" directory, move gzappend.c there, add zpipe.c,
+  fitblk.c, gzlog.[ch], gzjoin.c, and zlib_how.html.
+- Add FAQ entry and comments in deflate.c on uninitialized memory access
+- Add Solaris 9 make options in configure [Gilbert]
+- Allow strerror() usage in gzio.c for STDC
+- Fix DecompressBuf in contrib/delphi/ZLib.pas [ManChesTer]
+- Update contrib/masmx86/inffas32.asm and gvmat32.asm [Vollant]
+- Use z_off_t for adler32_combine() and crc32_combine() lengths
+- Make adler32() much faster for small len
+- Use OS_CODE in deflate() default gzip header
+
+Changes in 1.2.2.1 (31 October 2004)
+- Allow inflateSetDictionary() call for raw inflate
+- Fix inflate header crc check bug for file names and comments
+- Add deflateSetHeader() and gz_header structure for custom gzip headers
+- Add inflateGetheader() to retrieve gzip headers
+- Add crc32_combine() and adler32_combine() functions
+- Add alloc_func, free_func, in_func, out_func to Z_PREFIX list
+- Use zstreamp consistently in zlib.h (inflate_back functions)
+- Remove GUNZIP condition from definition of inflate_mode in inflate.h
+  and in contrib/inflate86/inffast.S [Truta, Anderson]
+- Add support for AMD64 in contrib/inflate86/inffas86.c [Anderson]
+- Update projects/README.projects and projects/visualc6 [Truta]
+- Update win32/DLL_FAQ.txt [Truta]
+- Avoid warning under NO_GZCOMPRESS in gzio.c; fix typo [Truta]
+- Deprecate Z_ASCII; use Z_TEXT instead [Truta]
+- Use a new algorithm for setting strm->data_type in trees.c [Truta]
+- Do not define an exit() prototype in zutil.c unless DEBUG defined
+- Remove prototype of exit() from zutil.c, example.c, minigzip.c [Truta]
+- Add comment in zlib.h for Z_NO_FLUSH parameter to deflate()
+- Fix Darwin build version identification [Peterson]
+
+Changes in 1.2.2 (3 October 2004)
+- Update zlib.h comments on gzip in-memory processing
+- Set adler to 1 in inflateReset() to support Java test suite [Walles]
+- Add contrib/dotzlib [Ravn]
+- Update win32/DLL_FAQ.txt [Truta]
+- Update contrib/minizip [Vollant]
+- Move contrib/visual-basic.txt to old/ [Truta]
+- Fix assembler builds in projects/visualc6/ [Truta]
+
+Changes in 1.2.1.2 (9 September 2004)
+- Update INDEX file
+- Fix trees.c to update strm->data_type (no one ever noticed!)
+- Fix bug in error case in inflate.c, infback.c, and infback9.c [Brown]
+- Add "volatile" to crc table flag declaration (for DYNAMIC_CRC_TABLE)
+- Add limited multitasking protection to DYNAMIC_CRC_TABLE
+- Add NO_vsnprintf for VMS in zutil.h [Mozilla]
+- Don't declare strerror() under VMS [Mozilla]
+- Add comment to DYNAMIC_CRC_TABLE to use get_crc_table() to initialize
+- Update contrib/ada [Anisimkov]
+- Update contrib/minizip [Vollant]
+- Fix configure to not hardcode directories for Darwin [Peterson]
+- Fix gzio.c to not return error on empty files [Brown]
+- Fix indentation; update version in contrib/delphi/ZLib.pas and
+  contrib/pascal/zlibpas.pas [Truta]
+- Update mkasm.bat in contrib/masmx86 [Truta]
+- Update contrib/untgz [Truta]
+- Add projects/README.projects [Truta]
+- Add project for MS Visual C++ 6.0 in projects/visualc6 [Cadieux, Truta]
+- Update win32/DLL_FAQ.txt [Truta]
+- Update list of Z_PREFIX symbols in zconf.h [Randers-Pehrson, Truta]
+- Remove an unnecessary assignment to curr in inftrees.c [Truta]
+- Add OS/2 to exe builds in configure [Poltorak]
+- Remove err dummy parameter in zlib.h [Kientzle]
+
+Changes in 1.2.1.1 (9 January 2004)
+- Update email address in README
+- Several FAQ updates
+- Fix a big fat bug in inftrees.c that prevented decoding valid
+  dynamic blocks with only literals and no distance codes --
+  Thanks to "Hot Emu" for the bug report and sample file
+- Add a note to puff.c on no distance codes case.
+
+Changes in 1.2.1 (17 November 2003)
+- Remove a tab in contrib/gzappend/gzappend.c
+- Update some interfaces in contrib for new zlib functions
+- Update zlib version number in some contrib entries
+- Add Windows CE definition for ptrdiff_t in zutil.h [Mai, Truta]
+- Support shared libraries on Hurd and KFreeBSD [Brown]
+- Fix error in NO_DIVIDE option of adler32.c
+
+Changes in 1.2.0.8 (4 November 2003)
+- Update version in contrib/delphi/ZLib.pas and contrib/pascal/zlibpas.pas
+- Add experimental NO_DIVIDE #define in adler32.c
+    - Possibly faster on some processors (let me know if it is)
+- Correct Z_BLOCK to not return on first inflate call if no wrap
+- Fix strm->data_type on inflate() return to correctly indicate EOB
+- Add deflatePrime() function for appending in the middle of a byte
+- Add contrib/gzappend for an example of appending to a stream
+- Update win32/DLL_FAQ.txt [Truta]
+- Delete Turbo C comment in README [Truta]
+- Improve some indentation in zconf.h [Truta]
+- Fix infinite loop on bad input in configure script [Church]
+- Fix gzeof() for concatenated gzip files [Johnson]
+- Add example to contrib/visual-basic.txt [Michael B.]
+- Add -p to mkdir's in Makefile.in [vda]
+- Fix configure to properly detect presence or lack of printf functions
+- Add AS400 support [Monnerat]
+- Add a little Cygwin support [Wilson]
+
+Changes in 1.2.0.7 (21 September 2003)
+- Correct some debug formats in contrib/infback9
+- Cast a type in a debug statement in trees.c
+- Change search and replace delimiter in configure from % to # [Beebe]
+- Update contrib/untgz to 0.2 with various fixes [Truta]
+- Add build support for Amiga [Nikl]
+- Remove some directories in old that have been updated to 1.2
+- Add dylib building for Mac OS X in configure and Makefile.in
+- Remove old distribution stuff from Makefile
+- Update README to point to DLL_FAQ.txt, and add comment on Mac OS X
+- Update links in README
+
+Changes in 1.2.0.6 (13 September 2003)
+- Minor FAQ updates
+- Update contrib/minizip to 1.00 [Vollant]
+- Remove test of gz functions in example.c when GZ_COMPRESS defined [Truta]
+- Update POSTINC comment for 68060 [Nikl]
+- Add contrib/infback9 with deflate64 decoding (unsupported)
+- For MVS define NO_vsnprintf and undefine FAR [van Burik]
+- Add pragma for fdopen on MVS [van Burik]
+
+Changes in 1.2.0.5 (8 September 2003)
+- Add OF to inflateBackEnd() declaration in zlib.h
+- Remember start when using gzdopen in the middle of a file
+- Use internal off_t counters in gz* functions to properly handle seeks
+- Perform more rigorous check for distance-too-far in inffast.c
+- Add Z_BLOCK flush option to return from inflate at block boundary
+- Set strm->data_type on return from inflate
+    - Indicate bits unused, if at block boundary, and if in last block
+- Replace size_t with ptrdiff_t in crc32.c, and check for correct size
+- Add condition so old NO_DEFLATE define still works for compatibility
+- FAQ update regarding the Windows DLL [Truta]
+- INDEX update: add qnx entry, remove aix entry [Truta]
+- Install zlib.3 into mandir [Wilson]
+- Move contrib/zlib_dll_FAQ.txt to win32/DLL_FAQ.txt; update [Truta]
+- Adapt the zlib interface to the new DLL convention guidelines [Truta]
+- Introduce ZLIB_WINAPI macro to allow the export of functions using
+  the WINAPI calling convention, for Visual Basic [Vollant, Truta]
+- Update msdos and win32 scripts and makefiles [Truta]
+- Export symbols by name, not by ordinal, in win32/zlib.def [Truta]
+- Add contrib/ada [Anisimkov]
+- Move asm files from contrib/vstudio/vc70_32 to contrib/asm386 [Truta]
+- Rename contrib/asm386 to contrib/masmx86 [Truta, Vollant]
+- Add contrib/masm686 [Truta]
+- Fix offsets in contrib/inflate86 and contrib/masmx86/inffas32.asm
+  [Truta, Vollant]
+- Update contrib/delphi; rename to contrib/pascal; add example [Truta]
+- Remove contrib/delphi2; add a new contrib/delphi [Truta]
+- Avoid inclusion of the nonstandard <memory.h> in contrib/iostream,
+  and fix some method prototypes [Truta]
+- Fix the ZCR_SEED2 constant to avoid warnings in contrib/minizip
+  [Truta]
+- Avoid the use of backslash (\) in contrib/minizip [Vollant]
+- Fix file time handling in contrib/untgz; update makefiles [Truta]
+- Update contrib/vstudio/vc70_32 to comply with the new DLL guidelines
+  [Vollant]
+- Remove contrib/vstudio/vc15_16 [Vollant]
+- Rename contrib/vstudio/vc70_32 to contrib/vstudio/vc7 [Truta]
+- Update README.contrib [Truta]
+- Invert the assignment order of match_head and s->prev[...] in
+  INSERT_STRING [Truta]
+- Compare TOO_FAR with 32767 instead of 32768, to avoid 16-bit warnings
+  [Truta]
+- Compare function pointers with 0, not with NULL or Z_NULL [Truta]
+- Fix prototype of syncsearch in inflate.c [Truta]
+- Introduce ASMINF macro to be enabled when using an ASM implementation
+  of inflate_fast [Truta]
+- Change NO_DEFLATE to NO_GZCOMPRESS [Truta]
+- Modify test_gzio in example.c to take a single file name as a
+  parameter [Truta]
+- Exit the example.c program if gzopen fails [Truta]
+- Add type casts around strlen in example.c [Truta]
+- Remove casting to sizeof in minigzip.c; give a proper type
+  to the variable compared with SUFFIX_LEN [Truta]
+- Update definitions of STDC and STDC99 in zconf.h [Truta]
+- Synchronize zconf.h with the new Windows DLL interface [Truta]
+- Use SYS16BIT instead of __32BIT__ to distinguish between
+  16- and 32-bit platforms [Truta]
+- Use far memory allocators in small 16-bit memory models for
+  Turbo C [Truta]
+- Add info about the use of ASMV, ASMINF and ZLIB_WINAPI in
+  zlibCompileFlags [Truta]
+- Cygwin has vsnprintf [Wilson]
+- In Windows16, OS_CODE is 0, as in MSDOS [Truta]
+- In Cygwin, OS_CODE is 3 (Unix), not 11 (Windows32) [Wilson]
+
+Changes in 1.2.0.4 (10 August 2003)
+- Minor FAQ updates
+- Be more strict when checking inflateInit2's windowBits parameter
+- Change NO_GUNZIP compile option to NO_GZIP to cover deflate as well
+- Add gzip wrapper option to deflateInit2 using windowBits
+- Add updated QNX rule in configure and qnx directory [Bonnefoy]
+- Make inflate distance-too-far checks more rigorous
+- Clean up FAR usage in inflate
+- Add casting to sizeof() in gzio.c and minigzip.c
+
+Changes in 1.2.0.3 (19 July 2003)
+- Fix silly error in gzungetc() implementation [Vollant]
+- Update contrib/minizip and contrib/vstudio [Vollant]
+- Fix printf format in example.c
+- Correct cdecl support in zconf.in.h [Anisimkov]
+- Minor FAQ updates
+
+Changes in 1.2.0.2 (13 July 2003)
+- Add ZLIB_VERNUM in zlib.h for numerical preprocessor comparisons
+- Attempt to avoid warnings in crc32.c for pointer-int conversion
+- Add AIX to configure, remove aix directory [Bakker]
+- Add some casts to minigzip.c
+- Improve checking after insecure sprintf() or vsprintf() calls
+- Remove #elif's from crc32.c
+- Change leave label to inf_leave in inflate.c and infback.c to avoid
+  library conflicts
+- Remove inflate gzip decoding by default--only enable gzip decoding by
+  special request for stricter backward compatibility
+- Add zlibCompileFlags() function to return compilation information
+- More typecasting in deflate.c to avoid warnings
+- Remove leading underscore from _Capital #defines [Truta]
+- Fix configure to link shared library when testing
+- Add some Windows CE target adjustments [Mai]
+- Remove #define ZLIB_DLL in zconf.h [Vollant]
+- Add zlib.3 [Rodgers]
+- Update RFC URL in deflate.c and algorithm.txt [Mai]
+- Add zlib_dll_FAQ.txt to contrib [Truta]
+- Add UL to some constants [Truta]
+- Update minizip and vstudio [Vollant]
+- Remove vestigial NEED_DUMMY_RETURN from zconf.in.h
+- Expand use of NO_DUMMY_DECL to avoid all dummy structures
+- Added iostream3 to contrib [Schwardt]
+- Replace rewind() with fseek() for WinCE [Truta]
+- Improve setting of zlib format compression level flags
+    - Report 0 for huffman and rle strategies and for level == 0 or 1
+    - Report 2 only for level == 6
+- Only deal with 64K limit when necessary at compile time [Truta]
+- Allow TOO_FAR check to be turned off at compile time [Truta]
+- Add gzclearerr() function [Souza]
+- Add gzungetc() function
+
+Changes in 1.2.0.1 (17 March 2003)
+- Add Z_RLE strategy for run-length encoding [Truta]
+    - When Z_RLE requested, restrict matches to distance one
+    - Update zlib.h, minigzip.c, gzopen(), gzdopen() for Z_RLE
+- Correct FASTEST compilation to allow level == 0
+- Clean up what gets compiled for FASTEST
+- Incorporate changes to zconf.in.h [Vollant]
+    - Refine detection of Turbo C need for dummy returns
+    - Refine ZLIB_DLL compilation
+    - Include additional header file on VMS for off_t typedef
+- Try to use _vsnprintf where it supplants vsprintf [Vollant]
+- Add some casts in inffast.c
+- Enchance comments in zlib.h on what happens if gzprintf() tries to
+  write more than 4095 bytes before compression
+- Remove unused state from inflateBackEnd()
+- Remove exit(0) from minigzip.c, example.c
+- Get rid of all those darn tabs
+- Add "check" target to Makefile.in that does the same thing as "test"
+- Add "mostlyclean" and "maintainer-clean" targets to Makefile.in
+- Update contrib/inflate86 [Anderson]
+- Update contrib/testzlib, contrib/vstudio, contrib/minizip [Vollant]
+- Add msdos and win32 directories with makefiles [Truta]
+- More additions and improvements to the FAQ
+
+Changes in 1.2.0 (9 March 2003)
+- New and improved inflate code
+    - About 20% faster
+    - Does not allocate 32K window unless and until needed
+    - Automatically detects and decompresses gzip streams
+    - Raw inflate no longer needs an extra dummy byte at end
+    - Added inflateBack functions using a callback interface--even faster
+      than inflate, useful for file utilities (gzip, zip)
+    - Added inflateCopy() function to record state for random access on
+      externally generated deflate streams (e.g. in gzip files)
+    - More readable code (I hope)
+- New and improved crc32()
+    - About 50% faster, thanks to suggestions from Rodney Brown
+- Add deflateBound() and compressBound() functions
+- Fix memory leak in deflateInit2()
+- Permit setting dictionary for raw deflate (for parallel deflate)
+- Fix const declaration for gzwrite()
+- Check for some malloc() failures in gzio.c
+- Fix bug in gzopen() on single-byte file 0x1f
+- Fix bug in gzread() on concatenated file with 0x1f at end of buffer
+  and next buffer doesn't start with 0x8b
+- Fix uncompress() to return Z_DATA_ERROR on truncated input
+- Free memory at end of example.c
+- Remove MAX #define in trees.c (conflicted with some libraries)
+- Fix static const's in deflate.c, gzio.c, and zutil.[ch]
+- Declare malloc() and free() in gzio.c if STDC not defined
+- Use malloc() instead of calloc() in zutil.c if int big enough
+- Define STDC for AIX
+- Add aix/ with approach for compiling shared library on AIX
+- Add HP-UX support for shared libraries in configure
+- Add OpenUNIX support for shared libraries in configure
+- Use $cc instead of gcc to build shared library
+- Make prefix directory if needed when installing
+- Correct Macintosh avoidance of typedef Byte in zconf.h
+- Correct Turbo C memory allocation when under Linux
+- Use libz.a instead of -lz in Makefile (assure use of compiled library)
+- Update configure to check for snprintf or vsnprintf functions and their
+  return value, warn during make if using an insecure function
+- Fix configure problem with compile-time knowledge of HAVE_UNISTD_H that
+  is lost when library is used--resolution is to build new zconf.h
+- Documentation improvements (in zlib.h):
+    - Document raw deflate and inflate
+    - Update RFCs URL
+    - Point out that zlib and gzip formats are different
+    - Note that Z_BUF_ERROR is not fatal
+    - Document string limit for gzprintf() and possible buffer overflow
+    - Note requirement on avail_out when flushing
+    - Note permitted values of flush parameter of inflate()
+- Add some FAQs (and even answers) to the FAQ
+- Add contrib/inflate86/ for x86 faster inflate
+- Add contrib/blast/ for PKWare Data Compression Library decompression
+- Add contrib/puff/ simple inflate for deflate format description
+
+Changes in 1.1.4 (11 March 2002)
+- ZFREE was repeated on same allocation on some error conditions.
+  This creates a security problem described in
+  http://www.zlib.org/advisory-2002-03-11.txt
+- Returned incorrect error (Z_MEM_ERROR) on some invalid data
+- Avoid accesses before window for invalid distances with inflate window
+  less than 32K.
+- force windowBits > 8 to avoid a bug in the encoder for a window size
+  of 256 bytes. (A complete fix will be available in 1.1.5).
+
+Changes in 1.1.3 (9 July 1998)
+- fix "an inflate input buffer bug that shows up on rare but persistent
+  occasions" (Mark)
+- fix gzread and gztell for concatenated .gz files (Didier Le Botlan)
+- fix gzseek(..., SEEK_SET) in write mode
+- fix crc check after a gzeek (Frank Faubert)
+- fix miniunzip when the last entry in a zip file is itself a zip file
+  (J Lillge)
+- add contrib/asm586 and contrib/asm686 (Brian Raiter)
+  See http://www.muppetlabs.com/~breadbox/software/assembly.html
+- add support for Delphi 3 in contrib/delphi (Bob Dellaca)
+- add support for C++Builder 3 and Delphi 3 in contrib/delphi2 (Davide Moretti)
+- do not exit prematurely in untgz if 0 at start of block (Magnus Holmgren)
+- use macro EXTERN instead of extern to support DLL for BeOS (Sander Stoks)
+- added a FAQ file
+
+- Support gzdopen on Mac with Metrowerks (Jason Linhart)
+- Do not redefine Byte on Mac (Brad Pettit & Jason Linhart)
+- define SEEK_END too if SEEK_SET is not defined (Albert Chin-A-Young)
+- avoid some warnings with Borland C (Tom Tanner)
+- fix a problem in contrib/minizip/zip.c for 16-bit MSDOS (Gilles Vollant)
+- emulate utime() for WIN32 in contrib/untgz  (Gilles Vollant)
+- allow several arguments to configure (Tim Mooney, Frodo Looijaard)
+- use libdir and includedir in Makefile.in (Tim Mooney)
+- support shared libraries on OSF1 V4 (Tim Mooney)
+- remove so_locations in "make clean"  (Tim Mooney)
+- fix maketree.c compilation error (Glenn, Mark)
+- Python interface to zlib now in Python 1.5 (Jeremy Hylton)
+- new Makefile.riscos (Rich Walker)
+- initialize static descriptors in trees.c for embedded targets (Nick Smith)
+- use "foo-gz" in example.c for RISCOS and VMS (Nick Smith)
+- add the OS/2 files in Makefile.in too (Andrew Zabolotny)
+- fix fdopen and halloc macros for Microsoft C 6.0 (Tom Lane)
+- fix maketree.c to allow clean compilation of inffixed.h (Mark)
+- fix parameter check in deflateCopy (Gunther Nikl)
+- cleanup trees.c, use compressed_len only in debug mode (Christian Spieler)
+- Many portability patches by Christian Spieler:
+  . zutil.c, zutil.h: added "const" for zmem*
+  . Make_vms.com: fixed some typos
+  . Make_vms.com: msdos/Makefile.*: removed zutil.h from some dependency lists
+  . msdos/Makefile.msc: remove "default rtl link library" info from obj files
+  . msdos/Makefile.*: use model-dependent name for the built zlib library
+  . msdos/Makefile.emx, nt/Makefile.emx, nt/Makefile.gcc:
+     new makefiles, for emx (DOS/OS2), emx&rsxnt and mingw32 (Windows 9x / NT)
+- use define instead of typedef for Bytef also for MSC small/medium (Tom Lane)
+- replace __far with _far for better portability (Christian Spieler, Tom Lane)
+- fix test for errno.h in configure (Tim Newsham)
+
+Changes in 1.1.2 (19 March 98)
+- added contrib/minzip, mini zip and unzip based on zlib (Gilles Vollant)
+  See http://www.winimage.com/zLibDll/unzip.html
+- preinitialize the inflate tables for fixed codes, to make the code
+  completely thread safe (Mark)
+- some simplifications and slight speed-up to the inflate code (Mark)
+- fix gzeof on non-compressed files (Allan Schrum)
+- add -std1 option in configure for OSF1 to fix gzprintf (Martin Mokrejs)
+- use default value of 4K for Z_BUFSIZE for 16-bit MSDOS (Tim Wegner + Glenn)
+- added os2/Makefile.def and os2/zlib.def (Andrew Zabolotny)
+- add shared lib support for UNIX_SV4.2MP (MATSUURA Takanori)
+- do not wrap extern "C" around system includes (Tom Lane)
+- mention zlib binding for TCL in README (Andreas Kupries)
+- added amiga/Makefile.pup for Amiga powerUP SAS/C PPC (Andreas Kleinert)
+- allow "make install prefix=..." even after configure (Glenn Randers-Pehrson)
+- allow "configure --prefix $HOME" (Tim Mooney)
+- remove warnings in example.c and gzio.c (Glenn Randers-Pehrson)
+- move Makefile.sas to amiga/Makefile.sas
+
+Changes in 1.1.1 (27 Feb 98)
+- fix macros _tr_tally_* in deflate.h for debug mode  (Glenn Randers-Pehrson)
+- remove block truncation heuristic which had very marginal effect for zlib
+  (smaller lit_bufsize than in gzip 1.2.4) and degraded a little the
+  compression ratio on some files. This also allows inlining _tr_tally for
+  matches in deflate_slow.
+- added msdos/Makefile.w32 for WIN32 Microsoft Visual C++ (Bob Frazier)
+
+Changes in 1.1.0 (24 Feb 98)
+- do not return STREAM_END prematurely in inflate (John Bowler)
+- revert to the zlib 1.0.8 inflate to avoid the gcc 2.8.0 bug (Jeremy Buhler)
+- compile with -DFASTEST to get compression code optimized for speed only
+- in minigzip, try mmap'ing the input file first (Miguel Albrecht)
+- increase size of I/O buffers in minigzip.c and gzio.c (not a big gain
+  on Sun but significant on HP)
+
+- add a pointer to experimental unzip library in README (Gilles Vollant)
+- initialize variable gcc in configure (Chris Herborth)
+
+Changes in 1.0.9 (17 Feb 1998)
+- added gzputs and gzgets functions
+- do not clear eof flag in gzseek (Mark Diekhans)
+- fix gzseek for files in transparent mode (Mark Diekhans)
+- do not assume that vsprintf returns the number of bytes written (Jens Krinke)
+- replace EXPORT with ZEXPORT to avoid conflict with other programs
+- added compress2 in zconf.h, zlib.def, zlib.dnt
+- new asm code from Gilles Vollant in contrib/asm386
+- simplify the inflate code (Mark):
+ . Replace ZALLOC's in huft_build() with single ZALLOC in inflate_blocks_new()
+ . ZALLOC the length list in inflate_trees_fixed() instead of using stack
+ . ZALLOC the value area for huft_build() instead of using stack
+ . Simplify Z_FINISH check in inflate()
+
+- Avoid gcc 2.8.0 comparison bug a little differently than zlib 1.0.8
+- in inftrees.c, avoid cc -O bug on HP (Farshid Elahi)
+- in zconf.h move the ZLIB_DLL stuff earlier to avoid problems with
+  the declaration of FAR (Gilles VOllant)
+- install libz.so* with mode 755 (executable) instead of 644 (Marc Lehmann)
+- read_buf buf parameter of type Bytef* instead of charf*
+- zmemcpy parameters are of type Bytef*, not charf* (Joseph Strout)
+- do not redeclare unlink in minigzip.c for WIN32 (John Bowler)
+- fix check for presence of directories in "make install" (Ian Willis)
+
+Changes in 1.0.8 (27 Jan 1998)
+- fixed offsets in contrib/asm386/gvmat32.asm (Gilles Vollant)
+- fix gzgetc and gzputc for big endian systems (Markus Oberhumer)
+- added compress2() to allow setting the compression level
+- include sys/types.h to get off_t on some systems (Marc Lehmann & QingLong)
+- use constant arrays for the static trees in trees.c instead of computing
+  them at run time (thanks to Ken Raeburn for this suggestion). To create
+  trees.h, compile with GEN_TREES_H and run "make test".
+- check return code of example in "make test" and display result
+- pass minigzip command line options to file_compress
+- simplifying code of inflateSync to avoid gcc 2.8 bug
+
+- support CC="gcc -Wall" in configure -s (QingLong)
+- avoid a flush caused by ftell in gzopen for write mode (Ken Raeburn)
+- fix test for shared library support to avoid compiler warnings
+- zlib.lib -> zlib.dll in msdos/zlib.rc (Gilles Vollant)
+- check for TARGET_OS_MAC in addition to MACOS (Brad Pettit)
+- do not use fdopen for Metrowerks on Mac (Brad Pettit))
+- add checks for gzputc and gzputc in example.c
+- avoid warnings in gzio.c and deflate.c (Andreas Kleinert)
+- use const for the CRC table (Ken Raeburn)
+- fixed "make uninstall" for shared libraries
+- use Tracev instead of Trace in infblock.c
+- in example.c use correct compressed length for test_sync
+- suppress +vnocompatwarnings in configure for HPUX (not always supported)
+
+Changes in 1.0.7 (20 Jan 1998)
+- fix gzseek which was broken in write mode
+- return error for gzseek to negative absolute position
+- fix configure for Linux (Chun-Chung Chen)
+- increase stack space for MSC (Tim Wegner)
+- get_crc_table and inflateSyncPoint are EXPORTed (Gilles Vollant)
+- define EXPORTVA for gzprintf (Gilles Vollant)
+- added man page zlib.3 (Rick Rodgers)
+- for contrib/untgz, fix makedir() and improve Makefile
+
+- check gzseek in write mode in example.c
+- allocate extra buffer for seeks only if gzseek is actually called
+- avoid signed/unsigned comparisons (Tim Wegner, Gilles Vollant)
+- add inflateSyncPoint in zconf.h
+- fix list of exported functions in nt/zlib.dnt and mdsos/zlib.def
+
+Changes in 1.0.6 (19 Jan 1998)
+- add functions gzprintf, gzputc, gzgetc, gztell, gzeof, gzseek, gzrewind and
+  gzsetparams (thanks to Roland Giersig and Kevin Ruland for some of this code)
+- Fix a deflate bug occurring only with compression level 0 (thanks to
+  Andy Buckler for finding this one).
+- In minigzip, pass transparently also the first byte for .Z files.
+- return Z_BUF_ERROR instead of Z_OK if output buffer full in uncompress()
+- check Z_FINISH in inflate (thanks to Marc Schluper)
+- Implement deflateCopy (thanks to Adam Costello)
+- make static libraries by default in configure, add --shared option.
+- move MSDOS or Windows specific files to directory msdos
+- suppress the notion of partial flush to simplify the interface
+  (but the symbol Z_PARTIAL_FLUSH is kept for compatibility with 1.0.4)
+- suppress history buffer provided by application to simplify the interface
+  (this feature was not implemented anyway in 1.0.4)
+- next_in and avail_in must be initialized before calling inflateInit or
+  inflateInit2
+- add EXPORT in all exported functions (for Windows DLL)
+- added Makefile.nt (thanks to Stephen Williams)
+- added the unsupported "contrib" directory:
+   contrib/asm386/ by Gilles Vollant <info@winimage.com>
+        386 asm code replacing longest_match().
+   contrib/iostream/ by Kevin Ruland <kevin@rodin.wustl.edu>
+        A C++ I/O streams interface to the zlib gz* functions
+   contrib/iostream2/  by Tyge LÃ¸vset <Tyge.Lovset@cmr.no>
+        Another C++ I/O streams interface
+   contrib/untgz/  by "Pedro A. Aranda Guti\irrez" <paag@tid.es>
+        A very simple tar.gz file extractor using zlib
+   contrib/visual-basic.txt by Carlos Rios <c_rios@sonda.cl>
+        How to use compress(), uncompress() and the gz* functions from VB.
+- pass params -f (filtered data), -h (huffman only), -1 to -9 (compression
+  level) in minigzip (thanks to Tom Lane)
+
+- use const for rommable constants in deflate
+- added test for gzseek and gztell in example.c
+- add undocumented function inflateSyncPoint() (hack for Paul Mackerras)
+- add undocumented function zError to convert error code to string
+  (for Tim Smithers)
+- Allow compilation of gzio with -DNO_DEFLATE to avoid the compression code.
+- Use default memcpy for Symantec MSDOS compiler.
+- Add EXPORT keyword for check_func (needed for Windows DLL)
+- add current directory to LD_LIBRARY_PATH for "make test"
+- create also a link for libz.so.1
+- added support for FUJITSU UXP/DS (thanks to Toshiaki Nomura)
+- use $(SHAREDLIB) instead of libz.so in Makefile.in (for HPUX)
+- added -soname for Linux in configure (Chun-Chung Chen,
+- assign numbers to the exported functions in zlib.def (for Windows DLL)
+- add advice in zlib.h for best usage of deflateSetDictionary
+- work around compiler bug on Atari (cast Z_NULL in call of s->checkfn)
+- allow compilation with ANSI keywords only enabled for TurboC in large model
+- avoid "versionString"[0] (Borland bug)
+- add NEED_DUMMY_RETURN for Borland
+- use variable z_verbose for tracing in debug mode (L. Peter Deutsch).
+- allow compilation with CC
+- defined STDC for OS/2 (David Charlap)
+- limit external names to 8 chars for MVS (Thomas Lund)
+- in minigzip.c, use static buffers only for 16-bit systems
+- fix suffix check for "minigzip -d foo.gz"
+- do not return an error for the 2nd of two consecutive gzflush() (Felix Lee)
+- use _fdopen instead of fdopen for MSC >= 6.0 (Thomas Fanslau)
+- added makelcc.bat for lcc-win32 (Tom St Denis)
+- in Makefile.dj2, use copy and del instead of install and rm (Frank Donahoe)
+- Avoid expanded $Id$. Use "rcs -kb" or "cvs admin -kb" to avoid Id expansion.
+- check for unistd.h in configure (for off_t)
+- remove useless check parameter in inflate_blocks_free
+- avoid useless assignment of s->check to itself in inflate_blocks_new
+- do not flush twice in gzclose (thanks to Ken Raeburn)
+- rename FOPEN as F_OPEN to avoid clash with /usr/include/sys/file.h
+- use NO_ERRNO_H instead of enumeration of operating systems with errno.h
+- work around buggy fclose on pipes for HP/UX
+- support zlib DLL with BORLAND C++ 5.0 (thanks to Glenn Randers-Pehrson)
+- fix configure if CC is already equal to gcc
+
+Changes in 1.0.5 (3 Jan 98)
+- Fix inflate to terminate gracefully when fed corrupted or invalid data
+- Use const for rommable constants in inflate
+- Eliminate memory leaks on error conditions in inflate
+- Removed some vestigial code in inflate
+- Update web address in README
+
+Changes in 1.0.4 (24 Jul 96)
+- In very rare conditions, deflate(s, Z_FINISH) could fail to produce an EOF
+  bit, so the decompressor could decompress all the correct data but went
+  on to attempt decompressing extra garbage data. This affected minigzip too.
+- zlibVersion and gzerror return const char* (needed for DLL)
+- port to RISCOS (no fdopen, no multiple dots, no unlink, no fileno)
+- use z_error only for DEBUG (avoid problem with DLLs)
+
+Changes in 1.0.3 (2 Jul 96)
+- use z_streamp instead of z_stream *, which is now a far pointer in MSDOS
+  small and medium models; this makes the library incompatible with previous
+  versions for these models. (No effect in large model or on other systems.)
+- return OK instead of BUF_ERROR if previous deflate call returned with
+  avail_out as zero but there is nothing to do
+- added memcmp for non STDC compilers
+- define NO_DUMMY_DECL for more Mac compilers (.h files merged incorrectly)
+- define __32BIT__ if __386__ or i386 is defined (pb. with Watcom and SCO)
+- better check for 16-bit mode MSC (avoids problem with Symantec)
+
+Changes in 1.0.2 (23 May 96)
+- added Windows DLL support
+- added a function zlibVersion (for the DLL support)
+- fixed declarations using Bytef in infutil.c (pb with MSDOS medium model)
+- Bytef is define's instead of typedef'd only for Borland C
+- avoid reading uninitialized memory in example.c
+- mention in README that the zlib format is now RFC1950
+- updated Makefile.dj2
+- added algorithm.doc
+
+Changes in 1.0.1 (20 May 96) [1.0 skipped to avoid confusion]
+- fix array overlay in deflate.c which sometimes caused bad compressed data
+- fix inflate bug with empty stored block
+- fix MSDOS medium model which was broken in 0.99
+- fix deflateParams() which could generate bad compressed data.
+- Bytef is define'd instead of typedef'ed (work around Borland bug)
+- added an INDEX file
+- new makefiles for DJGPP (Makefile.dj2), 32-bit Borland (Makefile.b32),
+  Watcom (Makefile.wat), Amiga SAS/C (Makefile.sas)
+- speed up adler32 for modern machines without auto-increment
+- added -ansi for IRIX in configure
+- static_init_done in trees.c is an int
+- define unlink as delete for VMS
+- fix configure for QNX
+- add configure branch for SCO and HPUX
+- avoid many warnings (unused variables, dead assignments, etc...)
+- no fdopen for BeOS
+- fix the Watcom fix for 32 bit mode (define FAR as empty)
+- removed redefinition of Byte for MKWERKS
+- work around an MWKERKS bug (incorrect merge of all .h files)
+
+Changes in 0.99 (27 Jan 96)
+- allow preset dictionary shared between compressor and decompressor
+- allow compression level 0 (no compression)
+- add deflateParams in zlib.h: allow dynamic change of compression level
+  and compression strategy.
+- test large buffers and deflateParams in example.c
+- add optional "configure" to build zlib as a shared library
+- suppress Makefile.qnx, use configure instead
+- fixed deflate for 64-bit systems (detected on Cray)
+- fixed inflate_blocks for 64-bit systems (detected on Alpha)
+- declare Z_DEFLATED in zlib.h (possible parameter for deflateInit2)
+- always return Z_BUF_ERROR when deflate() has nothing to do
+- deflateInit and inflateInit are now macros to allow version checking
+- prefix all global functions and types with z_ with -DZ_PREFIX
+- make falloc completely reentrant (inftrees.c)
+- fixed very unlikely race condition in ct_static_init
+- free in reverse order of allocation to help memory manager
+- use zlib-1.0/* instead of zlib/* inside the tar.gz
+- make zlib warning-free with "gcc -O3 -Wall -Wwrite-strings -Wpointer-arith
+  -Wconversion -Wstrict-prototypes -Wmissing-prototypes"
+- allow gzread on concatenated .gz files
+- deflateEnd now returns Z_DATA_ERROR if it was premature
+- deflate is finally (?) fully deterministic (no matches beyond end of input)
+- Document Z_SYNC_FLUSH
+- add uninstall in Makefile
+- Check for __cpluplus in zlib.h
+- Better test in ct_align for partial flush
+- avoid harmless warnings for Borland C++
+- initialize hash_head in deflate.c
+- avoid warning on fdopen (gzio.c) for HP cc -Aa
+- include stdlib.h for STDC compilers
+- include errno.h for Cray
+- ignore error if ranlib doesn't exist
+- call ranlib twice for NeXTSTEP
+- use exec_prefix instead of prefix for libz.a
+- renamed ct_* as _tr_* to avoid conflict with applications
+- clear z->msg in inflateInit2 before any error return
+- initialize opaque in example.c, gzio.c, deflate.c and inflate.c
+- fixed typo in zconf.h (_GNUC__ => __GNUC__)
+- check for WIN32 in zconf.h and zutil.c (avoid farmalloc in 32-bit mode)
+- fix typo in Make_vms.com (f$trnlnm -> f$getsyi)
+- in fcalloc, normalize pointer if size > 65520 bytes
+- don't use special fcalloc for 32 bit Borland C++
+- use STDC instead of __GO32__ to avoid redeclaring exit, calloc, etc...
+- use Z_BINARY instead of BINARY
+- document that gzclose after gzdopen will close the file
+- allow "a" as mode in gzopen.
+- fix error checking in gzread
+- allow skipping .gz extra-field on pipes
+- added reference to Perl interface in README
+- put the crc table in FAR data (I dislike more and more the medium model :)
+- added get_crc_table
+- added a dimension to all arrays (Borland C can't count).
+- workaround Borland C bug in declaration of inflate_codes_new & inflate_fast
+- guard against multiple inclusion of *.h (for precompiled header on Mac)
+- Watcom C pretends to be Microsoft C small model even in 32 bit mode.
+- don't use unsized arrays to avoid silly warnings by Visual C++:
+     warning C4746: 'inflate_mask' : unsized array treated as  '__far'
+     (what's wrong with far data in far model?).
+- define enum out of inflate_blocks_state to allow compilation with C++
+
+Changes in 0.95 (16 Aug 95)
+- fix MSDOS small and medium model (now easier to adapt to any compiler)
+- inlined send_bits
+- fix the final (:-) bug for deflate with flush (output was correct but
+  not completely flushed in rare occasions).
+- default window size is same for compression and decompression
+  (it's now sufficient to set MAX_WBITS in zconf.h).
+- voidp -> voidpf and voidnp -> voidp (for consistency with other
+  typedefs and because voidnp was not near in large model).
+
+Changes in 0.94 (13 Aug 95)
+- support MSDOS medium model
+- fix deflate with flush (could sometimes generate bad output)
+- fix deflateReset (zlib header was incorrectly suppressed)
+- added support for VMS
+- allow a compression level in gzopen()
+- gzflush now calls fflush
+- For deflate with flush, flush even if no more input is provided.
+- rename libgz.a as libz.a
+- avoid complex expression in infcodes.c triggering Turbo C bug
+- work around a problem with gcc on Alpha (in INSERT_STRING)
+- don't use inline functions (problem with some gcc versions)
+- allow renaming of Byte, uInt, etc... with #define.
+- avoid warning about (unused) pointer before start of array in deflate.c
+- avoid various warnings in gzio.c, example.c, infblock.c, adler32.c, zutil.c
+- avoid reserved word 'new' in trees.c
+
+Changes in 0.93 (25 June 95)
+- temporarily disable inline functions
+- make deflate deterministic
+- give enough lookahead for PARTIAL_FLUSH
+- Set binary mode for stdin/stdout in minigzip.c for OS/2
+- don't even use signed char in inflate (not portable enough)
+- fix inflate memory leak for segmented architectures
+
+Changes in 0.92 (3 May 95)
+- don't assume that char is signed (problem on SGI)
+- Clear bit buffer when starting a stored block
+- no memcpy on Pyramid
+- suppressed inftest.c
+- optimized fill_window, put longest_match inline for gcc
+- optimized inflate on stored blocks.
+- untabify all sources to simplify patches
+
+Changes in 0.91 (2 May 95)
+- Default MEM_LEVEL is 8 (not 9 for Unix) as documented in zlib.h
+- Document the memory requirements in zconf.h
+- added "make install"
+- fix sync search logic in inflateSync
+- deflate(Z_FULL_FLUSH) now works even if output buffer too short
+- after inflateSync, don't scare people with just "lo world"
+- added support for DJGPP
+
+Changes in 0.9 (1 May 95)
+- don't assume that zalloc clears the allocated memory (the TurboC bug
+  was Mark's bug after all :)
+- let again gzread copy uncompressed data unchanged (was working in 0.71)
+- deflate(Z_FULL_FLUSH), inflateReset and inflateSync are now fully implemented
+- added a test of inflateSync in example.c
+- moved MAX_WBITS to zconf.h because users might want to change that.
+- document explicitly that zalloc(64K) on MSDOS must return a normalized
+  pointer (zero offset)
+- added Makefiles for Microsoft C, Turbo C, Borland C++
+- faster crc32()
+
+Changes in 0.8 (29 April 95)
+- added fast inflate (inffast.c)
+- deflate(Z_FINISH) now returns Z_STREAM_END when done. Warning: this
+  is incompatible with previous versions of zlib which returned Z_OK.
+- work around a TurboC compiler bug (bad code for b << 0, see infutil.h)
+  (actually that was not a compiler bug, see 0.81 above)
+- gzread no longer reads one extra byte in certain cases
+- In gzio destroy(), don't reference a freed structure
+- avoid many warnings for MSDOS
+- avoid the ERROR symbol which is used by MS Windows
+
+Changes in 0.71 (14 April 95)
+- Fixed more MSDOS compilation problems :( There is still a bug with
+  TurboC large model.
+
+Changes in 0.7 (14 April 95)
+- Added full inflate support.
+- Simplified the crc32() interface. The pre- and post-conditioning
+  (one's complement) is now done inside crc32(). WARNING: this is
+  incompatible with previous versions; see zlib.h for the new usage.
+
+Changes in 0.61 (12 April 95)
+- workaround for a bug in TurboC. example and minigzip now work on MSDOS.
+
+Changes in 0.6 (11 April 95)
+- added minigzip.c
+- added gzdopen to reopen a file descriptor as gzFile
+- added transparent reading of non-gziped files in gzread.
+- fixed bug in gzread (don't read crc as data)
+- fixed bug in destroy (gzio.c) (don't return Z_STREAM_END for gzclose).
+- don't allocate big arrays in the stack (for MSDOS)
+- fix some MSDOS compilation problems
+
+Changes in 0.5:
+- do real compression in deflate.c. Z_PARTIAL_FLUSH is supported but
+  not yet Z_FULL_FLUSH.
+- support decompression but only in a single step (forced Z_FINISH)
+- added opaque object for zalloc and zfree.
+- added deflateReset and inflateReset
+- added a variable zlib_version for consistency checking.
+- renamed the 'filter' parameter of deflateInit2 as 'strategy'.
+  Added Z_FILTERED and Z_HUFFMAN_ONLY constants.
+
+Changes in 0.4:
+- avoid "zip" everywhere, use zlib instead of ziplib.
+- suppress Z_BLOCK_FLUSH, interpret Z_PARTIAL_FLUSH as block flush
+  if compression method == 8.
+- added adler32 and crc32
+- renamed deflateOptions as deflateInit2, call one or the other but not both
+- added the method parameter for deflateInit2.
+- added inflateInit2
+- simplied considerably deflateInit and inflateInit by not supporting
+  user-provided history buffer. This is supported only in deflateInit2
+  and inflateInit2.
+
+Changes in 0.3:
+- prefix all macro names with Z_
+- use Z_FINISH instead of deflateEnd to finish compression.
+- added Z_HUFFMAN_ONLY
+- added gzerror()
diff --git a/deps/libchdr/deps/zlib-1.2.11/FAQ b/deps/libchdr/deps/zlib-1.2.11/FAQ
new file mode 100644
index 00000000..99b7cf92
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/FAQ
@@ -0,0 +1,368 @@
+
+                Frequently Asked Questions about zlib
+
+
+If your question is not there, please check the zlib home page
+http://zlib.net/ which may have more recent information.
+The lastest zlib FAQ is at http://zlib.net/zlib_faq.html
+
+
+ 1. Is zlib Y2K-compliant?
+
+    Yes. zlib doesn't handle dates.
+
+ 2. Where can I get a Windows DLL version?
+
+    The zlib sources can be compiled without change to produce a DLL.  See the
+    file win32/DLL_FAQ.txt in the zlib distribution.  Pointers to the
+    precompiled DLL are found in the zlib web site at http://zlib.net/ .
+
+ 3. Where can I get a Visual Basic interface to zlib?
+
+    See
+        * http://marknelson.us/1997/01/01/zlib-engine/
+        * win32/DLL_FAQ.txt in the zlib distribution
+
+ 4. compress() returns Z_BUF_ERROR.
+
+    Make sure that before the call of compress(), the length of the compressed
+    buffer is equal to the available size of the compressed buffer and not
+    zero.  For Visual Basic, check that this parameter is passed by reference
+    ("as any"), not by value ("as long").
+
+ 5. deflate() or inflate() returns Z_BUF_ERROR.
+
+    Before making the call, make sure that avail_in and avail_out are not zero.
+    When setting the parameter flush equal to Z_FINISH, also make sure that
+    avail_out is big enough to allow processing all pending input.  Note that a
+    Z_BUF_ERROR is not fatal--another call to deflate() or inflate() can be
+    made with more input or output space.  A Z_BUF_ERROR may in fact be
+    unavoidable depending on how the functions are used, since it is not
+    possible to tell whether or not there is more output pending when
+    strm.avail_out returns with zero.  See http://zlib.net/zlib_how.html for a
+    heavily annotated example.
+
+ 6. Where's the zlib documentation (man pages, etc.)?
+
+    It's in zlib.h .  Examples of zlib usage are in the files test/example.c
+    and test/minigzip.c, with more in examples/ .
+
+ 7. Why don't you use GNU autoconf or libtool or ...?
+
+    Because we would like to keep zlib as a very small and simple package.
+    zlib is rather portable and doesn't need much configuration.
+
+ 8. I found a bug in zlib.
+
+    Most of the time, such problems are due to an incorrect usage of zlib.
+    Please try to reproduce the problem with a small program and send the
+    corresponding source to us at zlib@gzip.org .  Do not send multi-megabyte
+    data files without prior agreement.
+
+ 9. Why do I get "undefined reference to gzputc"?
+
+    If "make test" produces something like
+
+       example.o(.text+0x154): undefined reference to `gzputc'
+
+    check that you don't have old files libz.* in /usr/lib, /usr/local/lib or
+    /usr/X11R6/lib. Remove any old versions, then do "make install".
+
+10. I need a Delphi interface to zlib.
+
+    See the contrib/delphi directory in the zlib distribution.
+
+11. Can zlib handle .zip archives?
+
+    Not by itself, no.  See the directory contrib/minizip in the zlib
+    distribution.
+
+12. Can zlib handle .Z files?
+
+    No, sorry.  You have to spawn an uncompress or gunzip subprocess, or adapt
+    the code of uncompress on your own.
+
+13. How can I make a Unix shared library?
+
+    By default a shared (and a static) library is built for Unix.  So:
+
+    make distclean
+    ./configure
+    make
+
+14. How do I install a shared zlib library on Unix?
+
+    After the above, then:
+
+    make install
+
+    However, many flavors of Unix come with a shared zlib already installed.
+    Before going to the trouble of compiling a shared version of zlib and
+    trying to install it, you may want to check if it's already there!  If you
+    can #include <zlib.h>, it's there.  The -lz option will probably link to
+    it.  You can check the version at the top of zlib.h or with the
+    ZLIB_VERSION symbol defined in zlib.h .
+
+15. I have a question about OttoPDF.
+
+    We are not the authors of OttoPDF. The real author is on the OttoPDF web
+    site: Joel Hainley, jhainley@myndkryme.com.
+
+16. Can zlib decode Flate data in an Adobe PDF file?
+
+    Yes. See http://www.pdflib.com/ . To modify PDF forms, see
+    http://sourceforge.net/projects/acroformtool/ .
+
+17. Why am I getting this "register_frame_info not found" error on Solaris?
+
+    After installing zlib 1.1.4 on Solaris 2.6, running applications using zlib
+    generates an error such as:
+
+        ld.so.1: rpm: fatal: relocation error: file /usr/local/lib/libz.so:
+        symbol __register_frame_info: referenced symbol not found
+
+    The symbol __register_frame_info is not part of zlib, it is generated by
+    the C compiler (cc or gcc).  You must recompile applications using zlib
+    which have this problem.  This problem is specific to Solaris.  See
+    http://www.sunfreeware.com for Solaris versions of zlib and applications
+    using zlib.
+
+18. Why does gzip give an error on a file I make with compress/deflate?
+
+    The compress and deflate functions produce data in the zlib format, which
+    is different and incompatible with the gzip format.  The gz* functions in
+    zlib on the other hand use the gzip format.  Both the zlib and gzip formats
+    use the same compressed data format internally, but have different headers
+    and trailers around the compressed data.
+
+19. Ok, so why are there two different formats?
+
+    The gzip format was designed to retain the directory information about a
+    single file, such as the name and last modification date.  The zlib format
+    on the other hand was designed for in-memory and communication channel
+    applications, and has a much more compact header and trailer and uses a
+    faster integrity check than gzip.
+
+20. Well that's nice, but how do I make a gzip file in memory?
+
+    You can request that deflate write the gzip format instead of the zlib
+    format using deflateInit2().  You can also request that inflate decode the
+    gzip format using inflateInit2().  Read zlib.h for more details.
+
+21. Is zlib thread-safe?
+
+    Yes.  However any library routines that zlib uses and any application-
+    provided memory allocation routines must also be thread-safe.  zlib's gz*
+    functions use stdio library routines, and most of zlib's functions use the
+    library memory allocation routines by default.  zlib's *Init* functions
+    allow for the application to provide custom memory allocation routines.
+
+    Of course, you should only operate on any given zlib or gzip stream from a
+    single thread at a time.
+
+22. Can I use zlib in my commercial application?
+
+    Yes.  Please read the license in zlib.h.
+
+23. Is zlib under the GNU license?
+
+    No.  Please read the license in zlib.h.
+
+24. The license says that altered source versions must be "plainly marked". So
+    what exactly do I need to do to meet that requirement?
+
+    You need to change the ZLIB_VERSION and ZLIB_VERNUM #defines in zlib.h.  In
+    particular, the final version number needs to be changed to "f", and an
+    identification string should be appended to ZLIB_VERSION.  Version numbers
+    x.x.x.f are reserved for modifications to zlib by others than the zlib
+    maintainers.  For example, if the version of the base zlib you are altering
+    is "1.2.3.4", then in zlib.h you should change ZLIB_VERNUM to 0x123f, and
+    ZLIB_VERSION to something like "1.2.3.f-zachary-mods-v3".  You can also
+    update the version strings in deflate.c and inftrees.c.
+
+    For altered source distributions, you should also note the origin and
+    nature of the changes in zlib.h, as well as in ChangeLog and README, along
+    with the dates of the alterations.  The origin should include at least your
+    name (or your company's name), and an email address to contact for help or
+    issues with the library.
+
+    Note that distributing a compiled zlib library along with zlib.h and
+    zconf.h is also a source distribution, and so you should change
+    ZLIB_VERSION and ZLIB_VERNUM and note the origin and nature of the changes
+    in zlib.h as you would for a full source distribution.
+
+25. Will zlib work on a big-endian or little-endian architecture, and can I
+    exchange compressed data between them?
+
+    Yes and yes.
+
+26. Will zlib work on a 64-bit machine?
+
+    Yes.  It has been tested on 64-bit machines, and has no dependence on any
+    data types being limited to 32-bits in length.  If you have any
+    difficulties, please provide a complete problem report to zlib@gzip.org
+
+27. Will zlib decompress data from the PKWare Data Compression Library?
+
+    No.  The PKWare DCL uses a completely different compressed data format than
+    does PKZIP and zlib.  However, you can look in zlib's contrib/blast
+    directory for a possible solution to your problem.
+
+28. Can I access data randomly in a compressed stream?
+
+    No, not without some preparation.  If when compressing you periodically use
+    Z_FULL_FLUSH, carefully write all the pending data at those points, and
+    keep an index of those locations, then you can start decompression at those
+    points.  You have to be careful to not use Z_FULL_FLUSH too often, since it
+    can significantly degrade compression.  Alternatively, you can scan a
+    deflate stream once to generate an index, and then use that index for
+    random access.  See examples/zran.c .
+
+29. Does zlib work on MVS, OS/390, CICS, etc.?
+
+    It has in the past, but we have not heard of any recent evidence.  There
+    were working ports of zlib 1.1.4 to MVS, but those links no longer work.
+    If you know of recent, successful applications of zlib on these operating
+    systems, please let us know.  Thanks.
+
+30. Is there some simpler, easier to read version of inflate I can look at to
+    understand the deflate format?
+
+    First off, you should read RFC 1951.  Second, yes.  Look in zlib's
+    contrib/puff directory.
+
+31. Does zlib infringe on any patents?
+
+    As far as we know, no.  In fact, that was originally the whole point behind
+    zlib.  Look here for some more information:
+
+    http://www.gzip.org/#faq11
+
+32. Can zlib work with greater than 4 GB of data?
+
+    Yes.  inflate() and deflate() will process any amount of data correctly.
+    Each call of inflate() or deflate() is limited to input and output chunks
+    of the maximum value that can be stored in the compiler's "unsigned int"
+    type, but there is no limit to the number of chunks.  Note however that the
+    strm.total_in and strm_total_out counters may be limited to 4 GB.  These
+    counters are provided as a convenience and are not used internally by
+    inflate() or deflate().  The application can easily set up its own counters
+    updated after each call of inflate() or deflate() to count beyond 4 GB.
+    compress() and uncompress() may be limited to 4 GB, since they operate in a
+    single call.  gzseek() and gztell() may be limited to 4 GB depending on how
+    zlib is compiled.  See the zlibCompileFlags() function in zlib.h.
+
+    The word "may" appears several times above since there is a 4 GB limit only
+    if the compiler's "long" type is 32 bits.  If the compiler's "long" type is
+    64 bits, then the limit is 16 exabytes.
+
+33. Does zlib have any security vulnerabilities?
+
+    The only one that we are aware of is potentially in gzprintf().  If zlib is
+    compiled to use sprintf() or vsprintf(), then there is no protection
+    against a buffer overflow of an 8K string space (or other value as set by
+    gzbuffer()), other than the caller of gzprintf() assuring that the output
+    will not exceed 8K.  On the other hand, if zlib is compiled to use
+    snprintf() or vsnprintf(), which should normally be the case, then there is
+    no vulnerability.  The ./configure script will display warnings if an
+    insecure variation of sprintf() will be used by gzprintf().  Also the
+    zlibCompileFlags() function will return information on what variant of
+    sprintf() is used by gzprintf().
+
+    If you don't have snprintf() or vsnprintf() and would like one, you can
+    find a portable implementation here:
+
+        http://www.ijs.si/software/snprintf/
+
+    Note that you should be using the most recent version of zlib.  Versions
+    1.1.3 and before were subject to a double-free vulnerability, and versions
+    1.2.1 and 1.2.2 were subject to an access exception when decompressing
+    invalid compressed data.
+
+34. Is there a Java version of zlib?
+
+    Probably what you want is to use zlib in Java. zlib is already included
+    as part of the Java SDK in the java.util.zip package. If you really want
+    a version of zlib written in the Java language, look on the zlib home
+    page for links: http://zlib.net/ .
+
+35. I get this or that compiler or source-code scanner warning when I crank it
+    up to maximally-pedantic. Can't you guys write proper code?
+
+    Many years ago, we gave up attempting to avoid warnings on every compiler
+    in the universe.  It just got to be a waste of time, and some compilers
+    were downright silly as well as contradicted each other.  So now, we simply
+    make sure that the code always works.
+
+36. Valgrind (or some similar memory access checker) says that deflate is
+    performing a conditional jump that depends on an uninitialized value.
+    Isn't that a bug?
+
+    No.  That is intentional for performance reasons, and the output of deflate
+    is not affected.  This only started showing up recently since zlib 1.2.x
+    uses malloc() by default for allocations, whereas earlier versions used
+    calloc(), which zeros out the allocated memory.  Even though the code was
+    correct, versions 1.2.4 and later was changed to not stimulate these
+    checkers.
+
+37. Will zlib read the (insert any ancient or arcane format here) compressed
+    data format?
+
+    Probably not. Look in the comp.compression FAQ for pointers to various
+    formats and associated software.
+
+38. How can I encrypt/decrypt zip files with zlib?
+
+    zlib doesn't support encryption.  The original PKZIP encryption is very
+    weak and can be broken with freely available programs.  To get strong
+    encryption, use GnuPG, http://www.gnupg.org/ , which already includes zlib
+    compression.  For PKZIP compatible "encryption", look at
+    http://www.info-zip.org/
+
+39. What's the difference between the "gzip" and "deflate" HTTP 1.1 encodings?
+
+    "gzip" is the gzip format, and "deflate" is the zlib format.  They should
+    probably have called the second one "zlib" instead to avoid confusion with
+    the raw deflate compressed data format.  While the HTTP 1.1 RFC 2616
+    correctly points to the zlib specification in RFC 1950 for the "deflate"
+    transfer encoding, there have been reports of servers and browsers that
+    incorrectly produce or expect raw deflate data per the deflate
+    specification in RFC 1951, most notably Microsoft.  So even though the
+    "deflate" transfer encoding using the zlib format would be the more
+    efficient approach (and in fact exactly what the zlib format was designed
+    for), using the "gzip" transfer encoding is probably more reliable due to
+    an unfortunate choice of name on the part of the HTTP 1.1 authors.
+
+    Bottom line: use the gzip format for HTTP 1.1 encoding.
+
+40. Does zlib support the new "Deflate64" format introduced by PKWare?
+
+    No.  PKWare has apparently decided to keep that format proprietary, since
+    they have not documented it as they have previous compression formats.  In
+    any case, the compression improvements are so modest compared to other more
+    modern approaches, that it's not worth the effort to implement.
+
+41. I'm having a problem with the zip functions in zlib, can you help?
+
+    There are no zip functions in zlib.  You are probably using minizip by
+    Giles Vollant, which is found in the contrib directory of zlib.  It is not
+    part of zlib.  In fact none of the stuff in contrib is part of zlib.  The
+    files in there are not supported by the zlib authors.  You need to contact
+    the authors of the respective contribution for help.
+
+42. The match.asm code in contrib is under the GNU General Public License.
+    Since it's part of zlib, doesn't that mean that all of zlib falls under the
+    GNU GPL?
+
+    No.  The files in contrib are not part of zlib.  They were contributed by
+    other authors and are provided as a convenience to the user within the zlib
+    distribution.  Each item in contrib has its own license.
+
+43. Is zlib subject to export controls?  What is its ECCN?
+
+    zlib is not subject to export controls, and so is classified as EAR99.
+
+44. Can you please sign these lengthy legal documents and fax them back to us
+    so that we can use your software in our product?
+
+    No. Go away. Shoo.
diff --git a/deps/libchdr/deps/zlib-1.2.11/INDEX b/deps/libchdr/deps/zlib-1.2.11/INDEX
new file mode 100644
index 00000000..2ba06412
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/INDEX
@@ -0,0 +1,68 @@
+CMakeLists.txt  cmake build file
+ChangeLog       history of changes
+FAQ             Frequently Asked Questions about zlib
+INDEX           this file
+Makefile        dummy Makefile that tells you to ./configure
+Makefile.in     template for Unix Makefile
+README          guess what
+configure       configure script for Unix
+make_vms.com    makefile for VMS
+test/example.c  zlib usages examples for build testing
+test/minigzip.c minimal gzip-like functionality for build testing
+test/infcover.c inf*.c code coverage for build coverage testing
+treebuild.xml   XML description of source file dependencies
+zconf.h.cmakein zconf.h template for cmake
+zconf.h.in      zconf.h template for configure
+zlib.3          Man page for zlib
+zlib.3.pdf      Man page in PDF format
+zlib.map        Linux symbol information
+zlib.pc.in      Template for pkg-config descriptor
+zlib.pc.cmakein zlib.pc template for cmake
+zlib2ansi       perl script to convert source files for C++ compilation
+
+amiga/          makefiles for Amiga SAS C
+as400/          makefiles for AS/400
+doc/            documentation for formats and algorithms
+msdos/          makefiles for MSDOS
+nintendods/     makefile for Nintendo DS
+old/            makefiles for various architectures and zlib documentation
+                files that have not yet been updated for zlib 1.2.x
+qnx/            makefiles for QNX
+watcom/         makefiles for OpenWatcom
+win32/          makefiles for Windows
+
+                zlib public header files (required for library use):
+zconf.h
+zlib.h
+
+                private source files used to build the zlib library:
+adler32.c
+compress.c
+crc32.c
+crc32.h
+deflate.c
+deflate.h
+gzclose.c
+gzguts.h
+gzlib.c
+gzread.c
+gzwrite.c
+infback.c
+inffast.c
+inffast.h
+inffixed.h
+inflate.c
+inflate.h
+inftrees.c
+inftrees.h
+trees.c
+trees.h
+uncompr.c
+zutil.c
+zutil.h
+
+                source files for sample programs
+See examples/README.examples
+
+                unsupported contributions by third parties
+See contrib/README.contrib
diff --git a/deps/libchdr/deps/zlib-1.2.11/Makefile b/deps/libchdr/deps/zlib-1.2.11/Makefile
new file mode 100644
index 00000000..6bba86c7
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/Makefile
@@ -0,0 +1,5 @@
+all:
+	-@echo "Please use ./configure first.  Thank you."
+
+distclean:
+	make -f Makefile.in distclean
diff --git a/deps/libchdr/deps/zlib-1.2.11/Makefile.in b/deps/libchdr/deps/zlib-1.2.11/Makefile.in
new file mode 100644
index 00000000..5a77949f
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/Makefile.in
@@ -0,0 +1,410 @@
+# Makefile for zlib
+# Copyright (C) 1995-2017 Jean-loup Gailly, Mark Adler
+# For conditions of distribution and use, see copyright notice in zlib.h
+
+# To compile and test, type:
+#    ./configure; make test
+# Normally configure builds both a static and a shared library.
+# If you want to build just a static library, use: ./configure --static
+
+# To use the asm code, type:
+#    cp contrib/asm?86/match.S ./match.S
+#    make LOC=-DASMV OBJA=match.o
+
+# To install /usr/local/lib/libz.* and /usr/local/include/zlib.h, type:
+#    make install
+# To install in $HOME instead of /usr/local, use:
+#    make install prefix=$HOME
+
+CC=cc
+
+CFLAGS=-O
+#CFLAGS=-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7
+#CFLAGS=-g -DZLIB_DEBUG
+#CFLAGS=-O3 -Wall -Wwrite-strings -Wpointer-arith -Wconversion \
+#           -Wstrict-prototypes -Wmissing-prototypes
+
+SFLAGS=-O
+LDFLAGS=
+TEST_LDFLAGS=-L. libz.a
+LDSHARED=$(CC)
+CPP=$(CC) -E
+
+STATICLIB=libz.a
+SHAREDLIB=libz.so
+SHAREDLIBV=libz.so.1.2.11
+SHAREDLIBM=libz.so.1
+LIBS=$(STATICLIB) $(SHAREDLIBV)
+
+AR=ar
+ARFLAGS=rc
+RANLIB=ranlib
+LDCONFIG=ldconfig
+LDSHAREDLIBC=-lc
+TAR=tar
+SHELL=/bin/sh
+EXE=
+
+prefix = /usr/local
+exec_prefix = ${prefix}
+libdir = ${exec_prefix}/lib
+sharedlibdir = ${libdir}
+includedir = ${prefix}/include
+mandir = ${prefix}/share/man
+man3dir = ${mandir}/man3
+pkgconfigdir = ${libdir}/pkgconfig
+SRCDIR=
+ZINC=
+ZINCOUT=-I.
+
+OBJZ = adler32.o crc32.o deflate.o infback.o inffast.o inflate.o inftrees.o trees.o zutil.o
+OBJG = compress.o uncompr.o gzclose.o gzlib.o gzread.o gzwrite.o
+OBJC = $(OBJZ) $(OBJG)
+
+PIC_OBJZ = adler32.lo crc32.lo deflate.lo infback.lo inffast.lo inflate.lo inftrees.lo trees.lo zutil.lo
+PIC_OBJG = compress.lo uncompr.lo gzclose.lo gzlib.lo gzread.lo gzwrite.lo
+PIC_OBJC = $(PIC_OBJZ) $(PIC_OBJG)
+
+# to use the asm code: make OBJA=match.o, PIC_OBJA=match.lo
+OBJA =
+PIC_OBJA =
+
+OBJS = $(OBJC) $(OBJA)
+
+PIC_OBJS = $(PIC_OBJC) $(PIC_OBJA)
+
+all: static shared
+
+static: example$(EXE) minigzip$(EXE)
+
+shared: examplesh$(EXE) minigzipsh$(EXE)
+
+all64: example64$(EXE) minigzip64$(EXE)
+
+check: test
+
+test: all teststatic testshared
+
+teststatic: static
+	@TMPST=tmpst_$$; \
+	if echo hello world | ./minigzip | ./minigzip -d && ./example $$TMPST ; then \
+	  echo '		*** zlib test OK ***'; \
+	else \
+	  echo '		*** zlib test FAILED ***'; false; \
+	fi; \
+	rm -f $$TMPST
+
+testshared: shared
+	@LD_LIBRARY_PATH=`pwd`:$(LD_LIBRARY_PATH) ; export LD_LIBRARY_PATH; \
+	LD_LIBRARYN32_PATH=`pwd`:$(LD_LIBRARYN32_PATH) ; export LD_LIBRARYN32_PATH; \
+	DYLD_LIBRARY_PATH=`pwd`:$(DYLD_LIBRARY_PATH) ; export DYLD_LIBRARY_PATH; \
+	SHLIB_PATH=`pwd`:$(SHLIB_PATH) ; export SHLIB_PATH; \
+	TMPSH=tmpsh_$$; \
+	if echo hello world | ./minigzipsh | ./minigzipsh -d && ./examplesh $$TMPSH; then \
+	  echo '		*** zlib shared test OK ***'; \
+	else \
+	  echo '		*** zlib shared test FAILED ***'; false; \
+	fi; \
+	rm -f $$TMPSH
+
+test64: all64
+	@TMP64=tmp64_$$; \
+	if echo hello world | ./minigzip64 | ./minigzip64 -d && ./example64 $$TMP64; then \
+	  echo '		*** zlib 64-bit test OK ***'; \
+	else \
+	  echo '		*** zlib 64-bit test FAILED ***'; false; \
+	fi; \
+	rm -f $$TMP64
+
+infcover.o: $(SRCDIR)test/infcover.c $(SRCDIR)zlib.h zconf.h
+	$(CC) $(CFLAGS) $(ZINCOUT) -c -o $@ $(SRCDIR)test/infcover.c
+
+infcover: infcover.o libz.a
+	$(CC) $(CFLAGS) -o $@ infcover.o libz.a
+
+cover: infcover
+	rm -f *.gcda
+	./infcover
+	gcov inf*.c
+
+libz.a: $(OBJS)
+	$(AR) $(ARFLAGS) $@ $(OBJS)
+	-@ ($(RANLIB) $@ || true) >/dev/null 2>&1
+
+match.o: match.S
+	$(CPP) match.S > _match.s
+	$(CC) -c _match.s
+	mv _match.o match.o
+	rm -f _match.s
+
+match.lo: match.S
+	$(CPP) match.S > _match.s
+	$(CC) -c -fPIC _match.s
+	mv _match.o match.lo
+	rm -f _match.s
+
+example.o: $(SRCDIR)test/example.c $(SRCDIR)zlib.h zconf.h
+	$(CC) $(CFLAGS) $(ZINCOUT) -c -o $@ $(SRCDIR)test/example.c
+
+minigzip.o: $(SRCDIR)test/minigzip.c $(SRCDIR)zlib.h zconf.h
+	$(CC) $(CFLAGS) $(ZINCOUT) -c -o $@ $(SRCDIR)test/minigzip.c
+
+example64.o: $(SRCDIR)test/example.c $(SRCDIR)zlib.h zconf.h
+	$(CC) $(CFLAGS) $(ZINCOUT) -D_FILE_OFFSET_BITS=64 -c -o $@ $(SRCDIR)test/example.c
+
+minigzip64.o: $(SRCDIR)test/minigzip.c $(SRCDIR)zlib.h zconf.h
+	$(CC) $(CFLAGS) $(ZINCOUT) -D_FILE_OFFSET_BITS=64 -c -o $@ $(SRCDIR)test/minigzip.c
+
+
+adler32.o: $(SRCDIR)adler32.c
+	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)adler32.c
+
+crc32.o: $(SRCDIR)crc32.c
+	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)crc32.c
+
+deflate.o: $(SRCDIR)deflate.c
+	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)deflate.c
+
+infback.o: $(SRCDIR)infback.c
+	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)infback.c
+
+inffast.o: $(SRCDIR)inffast.c
+	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)inffast.c
+
+inflate.o: $(SRCDIR)inflate.c
+	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)inflate.c
+
+inftrees.o: $(SRCDIR)inftrees.c
+	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)inftrees.c
+
+trees.o: $(SRCDIR)trees.c
+	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)trees.c
+
+zutil.o: $(SRCDIR)zutil.c
+	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)zutil.c
+
+compress.o: $(SRCDIR)compress.c
+	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)compress.c
+
+uncompr.o: $(SRCDIR)uncompr.c
+	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)uncompr.c
+
+gzclose.o: $(SRCDIR)gzclose.c
+	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)gzclose.c
+
+gzlib.o: $(SRCDIR)gzlib.c
+	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)gzlib.c
+
+gzread.o: $(SRCDIR)gzread.c
+	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)gzread.c
+
+gzwrite.o: $(SRCDIR)gzwrite.c
+	$(CC) $(CFLAGS) $(ZINC) -c -o $@ $(SRCDIR)gzwrite.c
+
+
+adler32.lo: $(SRCDIR)adler32.c
+	-@mkdir objs 2>/dev/null || test -d objs
+	$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/adler32.o $(SRCDIR)adler32.c
+	-@mv objs/adler32.o $@
+
+crc32.lo: $(SRCDIR)crc32.c
+	-@mkdir objs 2>/dev/null || test -d objs
+	$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/crc32.o $(SRCDIR)crc32.c
+	-@mv objs/crc32.o $@
+
+deflate.lo: $(SRCDIR)deflate.c
+	-@mkdir objs 2>/dev/null || test -d objs
+	$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/deflate.o $(SRCDIR)deflate.c
+	-@mv objs/deflate.o $@
+
+infback.lo: $(SRCDIR)infback.c
+	-@mkdir objs 2>/dev/null || test -d objs
+	$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/infback.o $(SRCDIR)infback.c
+	-@mv objs/infback.o $@
+
+inffast.lo: $(SRCDIR)inffast.c
+	-@mkdir objs 2>/dev/null || test -d objs
+	$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/inffast.o $(SRCDIR)inffast.c
+	-@mv objs/inffast.o $@
+
+inflate.lo: $(SRCDIR)inflate.c
+	-@mkdir objs 2>/dev/null || test -d objs
+	$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/inflate.o $(SRCDIR)inflate.c
+	-@mv objs/inflate.o $@
+
+inftrees.lo: $(SRCDIR)inftrees.c
+	-@mkdir objs 2>/dev/null || test -d objs
+	$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/inftrees.o $(SRCDIR)inftrees.c
+	-@mv objs/inftrees.o $@
+
+trees.lo: $(SRCDIR)trees.c
+	-@mkdir objs 2>/dev/null || test -d objs
+	$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/trees.o $(SRCDIR)trees.c
+	-@mv objs/trees.o $@
+
+zutil.lo: $(SRCDIR)zutil.c
+	-@mkdir objs 2>/dev/null || test -d objs
+	$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/zutil.o $(SRCDIR)zutil.c
+	-@mv objs/zutil.o $@
+
+compress.lo: $(SRCDIR)compress.c
+	-@mkdir objs 2>/dev/null || test -d objs
+	$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/compress.o $(SRCDIR)compress.c
+	-@mv objs/compress.o $@
+
+uncompr.lo: $(SRCDIR)uncompr.c
+	-@mkdir objs 2>/dev/null || test -d objs
+	$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/uncompr.o $(SRCDIR)uncompr.c
+	-@mv objs/uncompr.o $@
+
+gzclose.lo: $(SRCDIR)gzclose.c
+	-@mkdir objs 2>/dev/null || test -d objs
+	$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/gzclose.o $(SRCDIR)gzclose.c
+	-@mv objs/gzclose.o $@
+
+gzlib.lo: $(SRCDIR)gzlib.c
+	-@mkdir objs 2>/dev/null || test -d objs
+	$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/gzlib.o $(SRCDIR)gzlib.c
+	-@mv objs/gzlib.o $@
+
+gzread.lo: $(SRCDIR)gzread.c
+	-@mkdir objs 2>/dev/null || test -d objs
+	$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/gzread.o $(SRCDIR)gzread.c
+	-@mv objs/gzread.o $@
+
+gzwrite.lo: $(SRCDIR)gzwrite.c
+	-@mkdir objs 2>/dev/null || test -d objs
+	$(CC) $(SFLAGS) $(ZINC) -DPIC -c -o objs/gzwrite.o $(SRCDIR)gzwrite.c
+	-@mv objs/gzwrite.o $@
+
+
+placebo $(SHAREDLIBV): $(PIC_OBJS) libz.a
+	$(LDSHARED) $(SFLAGS) -o $@ $(PIC_OBJS) $(LDSHAREDLIBC) $(LDFLAGS)
+	rm -f $(SHAREDLIB) $(SHAREDLIBM)
+	ln -s $@ $(SHAREDLIB)
+	ln -s $@ $(SHAREDLIBM)
+	-@rmdir objs
+
+example$(EXE): example.o $(STATICLIB)
+	$(CC) $(CFLAGS) -o $@ example.o $(TEST_LDFLAGS)
+
+minigzip$(EXE): minigzip.o $(STATICLIB)
+	$(CC) $(CFLAGS) -o $@ minigzip.o $(TEST_LDFLAGS)
+
+examplesh$(EXE): example.o $(SHAREDLIBV)
+	$(CC) $(CFLAGS) -o $@ example.o -L. $(SHAREDLIBV)
+
+minigzipsh$(EXE): minigzip.o $(SHAREDLIBV)
+	$(CC) $(CFLAGS) -o $@ minigzip.o -L. $(SHAREDLIBV)
+
+example64$(EXE): example64.o $(STATICLIB)
+	$(CC) $(CFLAGS) -o $@ example64.o $(TEST_LDFLAGS)
+
+minigzip64$(EXE): minigzip64.o $(STATICLIB)
+	$(CC) $(CFLAGS) -o $@ minigzip64.o $(TEST_LDFLAGS)
+
+install-libs: $(LIBS)
+	-@if [ ! -d $(DESTDIR)$(exec_prefix)  ]; then mkdir -p $(DESTDIR)$(exec_prefix); fi
+	-@if [ ! -d $(DESTDIR)$(libdir)       ]; then mkdir -p $(DESTDIR)$(libdir); fi
+	-@if [ ! -d $(DESTDIR)$(sharedlibdir) ]; then mkdir -p $(DESTDIR)$(sharedlibdir); fi
+	-@if [ ! -d $(DESTDIR)$(man3dir)      ]; then mkdir -p $(DESTDIR)$(man3dir); fi
+	-@if [ ! -d $(DESTDIR)$(pkgconfigdir) ]; then mkdir -p $(DESTDIR)$(pkgconfigdir); fi
+	rm -f $(DESTDIR)$(libdir)/$(STATICLIB)
+	cp $(STATICLIB) $(DESTDIR)$(libdir)
+	chmod 644 $(DESTDIR)$(libdir)/$(STATICLIB)
+	-@($(RANLIB) $(DESTDIR)$(libdir)/libz.a || true) >/dev/null 2>&1
+	-@if test -n "$(SHAREDLIBV)"; then \
+	  rm -f $(DESTDIR)$(sharedlibdir)/$(SHAREDLIBV); \
+	  cp $(SHAREDLIBV) $(DESTDIR)$(sharedlibdir); \
+	  echo "cp $(SHAREDLIBV) $(DESTDIR)$(sharedlibdir)"; \
+	  chmod 755 $(DESTDIR)$(sharedlibdir)/$(SHAREDLIBV); \
+	  echo "chmod 755 $(DESTDIR)$(sharedlibdir)/$(SHAREDLIBV)"; \
+	  rm -f $(DESTDIR)$(sharedlibdir)/$(SHAREDLIB) $(DESTDIR)$(sharedlibdir)/$(SHAREDLIBM); \
+	  ln -s $(SHAREDLIBV) $(DESTDIR)$(sharedlibdir)/$(SHAREDLIB); \
+	  ln -s $(SHAREDLIBV) $(DESTDIR)$(sharedlibdir)/$(SHAREDLIBM); \
+	  ($(LDCONFIG) || true)  >/dev/null 2>&1; \
+	fi
+	rm -f $(DESTDIR)$(man3dir)/zlib.3
+	cp $(SRCDIR)zlib.3 $(DESTDIR)$(man3dir)
+	chmod 644 $(DESTDIR)$(man3dir)/zlib.3
+	rm -f $(DESTDIR)$(pkgconfigdir)/zlib.pc
+	cp zlib.pc $(DESTDIR)$(pkgconfigdir)
+	chmod 644 $(DESTDIR)$(pkgconfigdir)/zlib.pc
+# The ranlib in install is needed on NeXTSTEP which checks file times
+# ldconfig is for Linux
+
+install: install-libs
+	-@if [ ! -d $(DESTDIR)$(includedir)   ]; then mkdir -p $(DESTDIR)$(includedir); fi
+	rm -f $(DESTDIR)$(includedir)/zlib.h $(DESTDIR)$(includedir)/zconf.h
+	cp $(SRCDIR)zlib.h zconf.h $(DESTDIR)$(includedir)
+	chmod 644 $(DESTDIR)$(includedir)/zlib.h $(DESTDIR)$(includedir)/zconf.h
+
+uninstall:
+	cd $(DESTDIR)$(includedir) && rm -f zlib.h zconf.h
+	cd $(DESTDIR)$(libdir) && rm -f libz.a; \
+	if test -n "$(SHAREDLIBV)" -a -f $(SHAREDLIBV); then \
+	  rm -f $(SHAREDLIBV) $(SHAREDLIB) $(SHAREDLIBM); \
+	fi
+	cd $(DESTDIR)$(man3dir) && rm -f zlib.3
+	cd $(DESTDIR)$(pkgconfigdir) && rm -f zlib.pc
+
+docs: zlib.3.pdf
+
+zlib.3.pdf: $(SRCDIR)zlib.3
+	groff -mandoc -f H -T ps $(SRCDIR)zlib.3 | ps2pdf - $@
+
+zconf.h.cmakein: $(SRCDIR)zconf.h.in
+	-@ TEMPFILE=zconfh_$$; \
+	echo "/#define ZCONF_H/ a\\\\\n#cmakedefine Z_PREFIX\\\\\n#cmakedefine Z_HAVE_UNISTD_H\n" >> $$TEMPFILE &&\
+	sed -f $$TEMPFILE $(SRCDIR)zconf.h.in > $@ &&\
+	touch -r $(SRCDIR)zconf.h.in $@ &&\
+	rm $$TEMPFILE
+
+zconf: $(SRCDIR)zconf.h.in
+	cp -p $(SRCDIR)zconf.h.in zconf.h
+
+mostlyclean: clean
+clean:
+	rm -f *.o *.lo *~ \
+	   example$(EXE) minigzip$(EXE) examplesh$(EXE) minigzipsh$(EXE) \
+	   example64$(EXE) minigzip64$(EXE) \
+	   infcover \
+	   libz.* foo.gz so_locations \
+	   _match.s maketree contrib/infback9/*.o
+	rm -rf objs
+	rm -f *.gcda *.gcno *.gcov
+	rm -f contrib/infback9/*.gcda contrib/infback9/*.gcno contrib/infback9/*.gcov
+
+maintainer-clean: distclean
+distclean: clean zconf zconf.h.cmakein docs
+	rm -f Makefile zlib.pc configure.log
+	-@rm -f .DS_Store
+	@if [ -f Makefile.in ]; then \
+	printf 'all:\n\t-@echo "Please use ./configure first.  Thank you."\n' > Makefile ; \
+	printf '\ndistclean:\n\tmake -f Makefile.in distclean\n' >> Makefile ; \
+	touch -r $(SRCDIR)Makefile.in Makefile ; fi
+	@if [ ! -f zconf.h.in ]; then rm -f zconf.h zconf.h.cmakein ; fi
+	@if [ ! -f zlib.3 ]; then rm -f zlib.3.pdf ; fi
+
+tags:
+	etags $(SRCDIR)*.[ch]
+
+adler32.o zutil.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h
+gzclose.o gzlib.o gzread.o gzwrite.o: $(SRCDIR)zlib.h zconf.h $(SRCDIR)gzguts.h
+compress.o example.o minigzip.o uncompr.o: $(SRCDIR)zlib.h zconf.h
+crc32.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)crc32.h
+deflate.o: $(SRCDIR)deflate.h $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h
+infback.o inflate.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)inftrees.h $(SRCDIR)inflate.h $(SRCDIR)inffast.h $(SRCDIR)inffixed.h
+inffast.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)inftrees.h $(SRCDIR)inflate.h $(SRCDIR)inffast.h
+inftrees.o: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)inftrees.h
+trees.o: $(SRCDIR)deflate.h $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)trees.h
+
+adler32.lo zutil.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h
+gzclose.lo gzlib.lo gzread.lo gzwrite.lo: $(SRCDIR)zlib.h zconf.h $(SRCDIR)gzguts.h
+compress.lo example.lo minigzip.lo uncompr.lo: $(SRCDIR)zlib.h zconf.h
+crc32.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)crc32.h
+deflate.lo: $(SRCDIR)deflate.h $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h
+infback.lo inflate.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)inftrees.h $(SRCDIR)inflate.h $(SRCDIR)inffast.h $(SRCDIR)inffixed.h
+inffast.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)inftrees.h $(SRCDIR)inflate.h $(SRCDIR)inffast.h
+inftrees.lo: $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)inftrees.h
+trees.lo: $(SRCDIR)deflate.h $(SRCDIR)zutil.h $(SRCDIR)zlib.h zconf.h $(SRCDIR)trees.h
diff --git a/deps/libchdr/deps/zlib-1.2.11/README b/deps/libchdr/deps/zlib-1.2.11/README
new file mode 100644
index 00000000..51106de4
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/README
@@ -0,0 +1,115 @@
+ZLIB DATA COMPRESSION LIBRARY
+
+zlib 1.2.11 is a general purpose data compression library.  All the code is
+thread safe.  The data format used by the zlib library is described by RFCs
+(Request for Comments) 1950 to 1952 in the files
+http://tools.ietf.org/html/rfc1950 (zlib format), rfc1951 (deflate format) and
+rfc1952 (gzip format).
+
+All functions of the compression library are documented in the file zlib.h
+(volunteer to write man pages welcome, contact zlib@gzip.org).  A usage example
+of the library is given in the file test/example.c which also tests that
+the library is working correctly.  Another example is given in the file
+test/minigzip.c.  The compression library itself is composed of all source
+files in the root directory.
+
+To compile all files and run the test program, follow the instructions given at
+the top of Makefile.in.  In short "./configure; make test", and if that goes
+well, "make install" should work for most flavors of Unix.  For Windows, use
+one of the special makefiles in win32/ or contrib/vstudio/ .  For VMS, use
+make_vms.com.
+
+Questions about zlib should be sent to <zlib@gzip.org>, or to Gilles Vollant
+<info@winimage.com> for the Windows DLL version.  The zlib home page is
+http://zlib.net/ .  Before reporting a problem, please check this site to
+verify that you have the latest version of zlib; otherwise get the latest
+version and check whether the problem still exists or not.
+
+PLEASE read the zlib FAQ http://zlib.net/zlib_faq.html before asking for help.
+
+Mark Nelson <markn@ieee.org> wrote an article about zlib for the Jan.  1997
+issue of Dr.  Dobb's Journal; a copy of the article is available at
+http://marknelson.us/1997/01/01/zlib-engine/ .
+
+The changes made in version 1.2.11 are documented in the file ChangeLog.
+
+Unsupported third party contributions are provided in directory contrib/ .
+
+zlib is available in Java using the java.util.zip package, documented at
+http://java.sun.com/developer/technicalArticles/Programming/compression/ .
+
+A Perl interface to zlib written by Paul Marquess <pmqs@cpan.org> is available
+at CPAN (Comprehensive Perl Archive Network) sites, including
+http://search.cpan.org/~pmqs/IO-Compress-Zlib/ .
+
+A Python interface to zlib written by A.M. Kuchling <amk@amk.ca> is
+available in Python 1.5 and later versions, see
+http://docs.python.org/library/zlib.html .
+
+zlib is built into tcl: http://wiki.tcl.tk/4610 .
+
+An experimental package to read and write files in .zip format, written on top
+of zlib by Gilles Vollant <info@winimage.com>, is available in the
+contrib/minizip directory of zlib.
+
+
+Notes for some targets:
+
+- For Windows DLL versions, please see win32/DLL_FAQ.txt
+
+- For 64-bit Irix, deflate.c must be compiled without any optimization. With
+  -O, one libpng test fails. The test works in 32 bit mode (with the -n32
+  compiler flag). The compiler bug has been reported to SGI.
+
+- zlib doesn't work with gcc 2.6.3 on a DEC 3000/300LX under OSF/1 2.1 it works
+  when compiled with cc.
+
+- On Digital Unix 4.0D (formely OSF/1) on AlphaServer, the cc option -std1 is
+  necessary to get gzprintf working correctly. This is done by configure.
+
+- zlib doesn't work on HP-UX 9.05 with some versions of /bin/cc. It works with
+  other compilers. Use "make test" to check your compiler.
+
+- gzdopen is not supported on RISCOS or BEOS.
+
+- For PalmOs, see http://palmzlib.sourceforge.net/
+
+
+Acknowledgments:
+
+  The deflate format used by zlib was defined by Phil Katz.  The deflate and
+  zlib specifications were written by L.  Peter Deutsch.  Thanks to all the
+  people who reported problems and suggested various improvements in zlib; they
+  are too numerous to cite here.
+
+Copyright notice:
+
+ (C) 1995-2017 Jean-loup Gailly and Mark Adler
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+
+  Jean-loup Gailly        Mark Adler
+  jloup@gzip.org          madler@alumni.caltech.edu
+
+If you use the zlib library in a product, we would appreciate *not* receiving
+lengthy legal documents to sign.  The sources are provided for free but without
+warranty of any kind.  The library has been entirely written by Jean-loup
+Gailly and Mark Adler; it does not include third-party code.
+
+If you redistribute modified sources, we would appreciate that you include in
+the file ChangeLog history information documenting your changes.  Please read
+the FAQ for more information on the distribution of modified source versions.
diff --git a/deps/libchdr/deps/zlib-1.2.11/adler32.c b/deps/libchdr/deps/zlib-1.2.11/adler32.c
new file mode 100644
index 00000000..d0be4380
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/adler32.c
@@ -0,0 +1,186 @@
+/* adler32.c -- compute the Adler-32 checksum of a data stream
+ * Copyright (C) 1995-2011, 2016 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* @(#) $Id$ */
+
+#include "zutil.h"
+
+local uLong adler32_combine_ OF((uLong adler1, uLong adler2, z_off64_t len2));
+
+#define BASE 65521U     /* largest prime smaller than 65536 */
+#define NMAX 5552
+/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
+
+#define DO1(buf,i)  {adler += (buf)[i]; sum2 += adler;}
+#define DO2(buf,i)  DO1(buf,i); DO1(buf,i+1);
+#define DO4(buf,i)  DO2(buf,i); DO2(buf,i+2);
+#define DO8(buf,i)  DO4(buf,i); DO4(buf,i+4);
+#define DO16(buf)   DO8(buf,0); DO8(buf,8);
+
+/* use NO_DIVIDE if your processor does not do division in hardware --
+   try it both ways to see which is faster */
+#ifdef NO_DIVIDE
+/* note that this assumes BASE is 65521, where 65536 % 65521 == 15
+   (thank you to John Reiser for pointing this out) */
+#  define CHOP(a) \
+    do { \
+        unsigned long tmp = a >> 16; \
+        a &= 0xffffUL; \
+        a += (tmp << 4) - tmp; \
+    } while (0)
+#  define MOD28(a) \
+    do { \
+        CHOP(a); \
+        if (a >= BASE) a -= BASE; \
+    } while (0)
+#  define MOD(a) \
+    do { \
+        CHOP(a); \
+        MOD28(a); \
+    } while (0)
+#  define MOD63(a) \
+    do { /* this assumes a is not negative */ \
+        z_off64_t tmp = a >> 32; \
+        a &= 0xffffffffL; \
+        a += (tmp << 8) - (tmp << 5) + tmp; \
+        tmp = a >> 16; \
+        a &= 0xffffL; \
+        a += (tmp << 4) - tmp; \
+        tmp = a >> 16; \
+        a &= 0xffffL; \
+        a += (tmp << 4) - tmp; \
+        if (a >= BASE) a -= BASE; \
+    } while (0)
+#else
+#  define MOD(a) a %= BASE
+#  define MOD28(a) a %= BASE
+#  define MOD63(a) a %= BASE
+#endif
+
+/* ========================================================================= */
+uLong ZEXPORT adler32_z(adler, buf, len)
+    uLong adler;
+    const Bytef *buf;
+    z_size_t len;
+{
+    unsigned long sum2;
+    unsigned n;
+
+    /* split Adler-32 into component sums */
+    sum2 = (adler >> 16) & 0xffff;
+    adler &= 0xffff;
+
+    /* in case user likes doing a byte at a time, keep it fast */
+    if (len == 1) {
+        adler += buf[0];
+        if (adler >= BASE)
+            adler -= BASE;
+        sum2 += adler;
+        if (sum2 >= BASE)
+            sum2 -= BASE;
+        return adler | (sum2 << 16);
+    }
+
+    /* initial Adler-32 value (deferred check for len == 1 speed) */
+    if (buf == Z_NULL)
+        return 1L;
+
+    /* in case short lengths are provided, keep it somewhat fast */
+    if (len < 16) {
+        while (len--) {
+            adler += *buf++;
+            sum2 += adler;
+        }
+        if (adler >= BASE)
+            adler -= BASE;
+        MOD28(sum2);            /* only added so many BASE's */
+        return adler | (sum2 << 16);
+    }
+
+    /* do length NMAX blocks -- requires just one modulo operation */
+    while (len >= NMAX) {
+        len -= NMAX;
+        n = NMAX / 16;          /* NMAX is divisible by 16 */
+        do {
+            DO16(buf);          /* 16 sums unrolled */
+            buf += 16;
+        } while (--n);
+        MOD(adler);
+        MOD(sum2);
+    }
+
+    /* do remaining bytes (less than NMAX, still just one modulo) */
+    if (len) {                  /* avoid modulos if none remaining */
+        while (len >= 16) {
+            len -= 16;
+            DO16(buf);
+            buf += 16;
+        }
+        while (len--) {
+            adler += *buf++;
+            sum2 += adler;
+        }
+        MOD(adler);
+        MOD(sum2);
+    }
+
+    /* return recombined sums */
+    return adler | (sum2 << 16);
+}
+
+/* ========================================================================= */
+uLong ZEXPORT adler32(adler, buf, len)
+    uLong adler;
+    const Bytef *buf;
+    uInt len;
+{
+    return adler32_z(adler, buf, len);
+}
+
+/* ========================================================================= */
+local uLong adler32_combine_(adler1, adler2, len2)
+    uLong adler1;
+    uLong adler2;
+    z_off64_t len2;
+{
+    unsigned long sum1;
+    unsigned long sum2;
+    unsigned rem;
+
+    /* for negative len, return invalid adler32 as a clue for debugging */
+    if (len2 < 0)
+        return 0xffffffffUL;
+
+    /* the derivation of this formula is left as an exercise for the reader */
+    MOD63(len2);                /* assumes len2 >= 0 */
+    rem = (unsigned)len2;
+    sum1 = adler1 & 0xffff;
+    sum2 = rem * sum1;
+    MOD(sum2);
+    sum1 += (adler2 & 0xffff) + BASE - 1;
+    sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;
+    if (sum1 >= BASE) sum1 -= BASE;
+    if (sum1 >= BASE) sum1 -= BASE;
+    if (sum2 >= ((unsigned long)BASE << 1)) sum2 -= ((unsigned long)BASE << 1);
+    if (sum2 >= BASE) sum2 -= BASE;
+    return sum1 | (sum2 << 16);
+}
+
+/* ========================================================================= */
+uLong ZEXPORT adler32_combine(adler1, adler2, len2)
+    uLong adler1;
+    uLong adler2;
+    z_off_t len2;
+{
+    return adler32_combine_(adler1, adler2, len2);
+}
+
+uLong ZEXPORT adler32_combine64(adler1, adler2, len2)
+    uLong adler1;
+    uLong adler2;
+    z_off64_t len2;
+{
+    return adler32_combine_(adler1, adler2, len2);
+}
diff --git a/deps/libchdr/deps/zlib-1.2.11/compress.c b/deps/libchdr/deps/zlib-1.2.11/compress.c
new file mode 100644
index 00000000..e2db404a
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/compress.c
@@ -0,0 +1,86 @@
+/* compress.c -- compress a memory buffer
+ * Copyright (C) 1995-2005, 2014, 2016 Jean-loup Gailly, Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* @(#) $Id$ */
+
+#define ZLIB_INTERNAL
+#include "zlib.h"
+
+/* ===========================================================================
+     Compresses the source buffer into the destination buffer. The level
+   parameter has the same meaning as in deflateInit.  sourceLen is the byte
+   length of the source buffer. Upon entry, destLen is the total size of the
+   destination buffer, which must be at least 0.1% larger than sourceLen plus
+   12 bytes. Upon exit, destLen is the actual size of the compressed buffer.
+
+     compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_BUF_ERROR if there was not enough room in the output buffer,
+   Z_STREAM_ERROR if the level parameter is invalid.
+*/
+int ZEXPORT compress2 (dest, destLen, source, sourceLen, level)
+    Bytef *dest;
+    uLongf *destLen;
+    const Bytef *source;
+    uLong sourceLen;
+    int level;
+{
+    z_stream stream;
+    int err;
+    const uInt max = (uInt)-1;
+    uLong left;
+
+    left = *destLen;
+    *destLen = 0;
+
+    stream.zalloc = (alloc_func)0;
+    stream.zfree = (free_func)0;
+    stream.opaque = (voidpf)0;
+
+    err = deflateInit(&stream, level);
+    if (err != Z_OK) return err;
+
+    stream.next_out = dest;
+    stream.avail_out = 0;
+    stream.next_in = (z_const Bytef *)source;
+    stream.avail_in = 0;
+
+    do {
+        if (stream.avail_out == 0) {
+            stream.avail_out = left > (uLong)max ? max : (uInt)left;
+            left -= stream.avail_out;
+        }
+        if (stream.avail_in == 0) {
+            stream.avail_in = sourceLen > (uLong)max ? max : (uInt)sourceLen;
+            sourceLen -= stream.avail_in;
+        }
+        err = deflate(&stream, sourceLen ? Z_NO_FLUSH : Z_FINISH);
+    } while (err == Z_OK);
+
+    *destLen = stream.total_out;
+    deflateEnd(&stream);
+    return err == Z_STREAM_END ? Z_OK : err;
+}
+
+/* ===========================================================================
+ */
+int ZEXPORT compress (dest, destLen, source, sourceLen)
+    Bytef *dest;
+    uLongf *destLen;
+    const Bytef *source;
+    uLong sourceLen;
+{
+    return compress2(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION);
+}
+
+/* ===========================================================================
+     If the default memLevel or windowBits for deflateInit() is changed, then
+   this function needs to be updated.
+ */
+uLong ZEXPORT compressBound (sourceLen)
+    uLong sourceLen;
+{
+    return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) +
+           (sourceLen >> 25) + 13;
+}
diff --git a/deps/libchdr/deps/zlib-1.2.11/configure b/deps/libchdr/deps/zlib-1.2.11/configure
new file mode 100755
index 00000000..e974d1fd
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/configure
@@ -0,0 +1,921 @@
+#!/bin/sh
+# configure script for zlib.
+#
+# Normally configure builds both a static and a shared library.
+# If you want to build just a static library, use: ./configure --static
+#
+# To impose specific compiler or flags or install directory, use for example:
+#    prefix=$HOME CC=cc CFLAGS="-O4" ./configure
+# or for csh/tcsh users:
+#    (setenv prefix $HOME; setenv CC cc; setenv CFLAGS "-O4"; ./configure)
+
+# Incorrect settings of CC or CFLAGS may prevent creating a shared library.
+# If you have problems, try without defining CC and CFLAGS before reporting
+# an error.
+
+# start off configure.log
+echo -------------------- >> configure.log
+echo $0 $* >> configure.log
+date >> configure.log
+
+# get source directory
+SRCDIR=`dirname $0`
+if test $SRCDIR = "."; then
+    ZINC=""
+    ZINCOUT="-I."
+    SRCDIR=""
+else
+    ZINC='-include zconf.h'
+    ZINCOUT='-I. -I$(SRCDIR)'
+    SRCDIR="$SRCDIR/"
+fi
+
+# set command prefix for cross-compilation
+if [ -n "${CHOST}" ]; then
+    uname="`echo "${CHOST}" | sed -e 's/^[^-]*-\([^-]*\)$/\1/' -e 's/^[^-]*-[^-]*-\([^-]*\)$/\1/' -e 's/^[^-]*-[^-]*-\([^-]*\)-.*$/\1/'`"
+    CROSS_PREFIX="${CHOST}-"
+fi
+
+# destination name for static library
+STATICLIB=libz.a
+
+# extract zlib version numbers from zlib.h
+VER=`sed -n -e '/VERSION "/s/.*"\(.*\)".*/\1/p' < ${SRCDIR}zlib.h`
+VER3=`sed -n -e '/VERSION "/s/.*"\([0-9]*\\.[0-9]*\\.[0-9]*\).*/\1/p' < ${SRCDIR}zlib.h`
+VER2=`sed -n -e '/VERSION "/s/.*"\([0-9]*\\.[0-9]*\)\\..*/\1/p' < ${SRCDIR}zlib.h`
+VER1=`sed -n -e '/VERSION "/s/.*"\([0-9]*\)\\..*/\1/p' < ${SRCDIR}zlib.h`
+
+# establish commands for library building
+if "${CROSS_PREFIX}ar" --version >/dev/null 2>/dev/null || test $? -lt 126; then
+    AR=${AR-"${CROSS_PREFIX}ar"}
+    test -n "${CROSS_PREFIX}" && echo Using ${AR} | tee -a configure.log
+else
+    AR=${AR-"ar"}
+    test -n "${CROSS_PREFIX}" && echo Using ${AR} | tee -a configure.log
+fi
+ARFLAGS=${ARFLAGS-"rc"}
+if "${CROSS_PREFIX}ranlib" --version >/dev/null 2>/dev/null || test $? -lt 126; then
+    RANLIB=${RANLIB-"${CROSS_PREFIX}ranlib"}
+    test -n "${CROSS_PREFIX}" && echo Using ${RANLIB} | tee -a configure.log
+else
+    RANLIB=${RANLIB-"ranlib"}
+fi
+if "${CROSS_PREFIX}nm" --version >/dev/null 2>/dev/null || test $? -lt 126; then
+    NM=${NM-"${CROSS_PREFIX}nm"}
+    test -n "${CROSS_PREFIX}" && echo Using ${NM} | tee -a configure.log
+else
+    NM=${NM-"nm"}
+fi
+
+# set defaults before processing command line options
+LDCONFIG=${LDCONFIG-"ldconfig"}
+LDSHAREDLIBC="${LDSHAREDLIBC--lc}"
+ARCHS=
+prefix=${prefix-/usr/local}
+exec_prefix=${exec_prefix-'${prefix}'}
+libdir=${libdir-'${exec_prefix}/lib'}
+sharedlibdir=${sharedlibdir-'${libdir}'}
+includedir=${includedir-'${prefix}/include'}
+mandir=${mandir-'${prefix}/share/man'}
+shared_ext='.so'
+shared=1
+solo=0
+cover=0
+zprefix=0
+zconst=0
+build64=0
+gcc=0
+warn=0
+debug=0
+old_cc="$CC"
+old_cflags="$CFLAGS"
+OBJC='$(OBJZ) $(OBJG)'
+PIC_OBJC='$(PIC_OBJZ) $(PIC_OBJG)'
+
+# leave this script, optionally in a bad way
+leave()
+{
+  if test "$*" != "0"; then
+    echo "** $0 aborting." | tee -a configure.log
+  fi
+  rm -f $test.[co] $test $test$shared_ext $test.gcno ./--version
+  echo -------------------- >> configure.log
+  echo >> configure.log
+  echo >> configure.log
+  exit $1
+}
+
+# process command line options
+while test $# -ge 1
+do
+case "$1" in
+    -h* | --help)
+      echo 'usage:' | tee -a configure.log
+      echo '  configure [--const] [--zprefix] [--prefix=PREFIX]  [--eprefix=EXPREFIX]' | tee -a configure.log
+      echo '    [--static] [--64] [--libdir=LIBDIR] [--sharedlibdir=LIBDIR]' | tee -a configure.log
+      echo '    [--includedir=INCLUDEDIR] [--archs="-arch i386 -arch x86_64"]' | tee -a configure.log
+        exit 0 ;;
+    -p*=* | --prefix=*) prefix=`echo $1 | sed 's/.*=//'`; shift ;;
+    -e*=* | --eprefix=*) exec_prefix=`echo $1 | sed 's/.*=//'`; shift ;;
+    -l*=* | --libdir=*) libdir=`echo $1 | sed 's/.*=//'`; shift ;;
+    --sharedlibdir=*) sharedlibdir=`echo $1 | sed 's/.*=//'`; shift ;;
+    -i*=* | --includedir=*) includedir=`echo $1 | sed 's/.*=//'`;shift ;;
+    -u*=* | --uname=*) uname=`echo $1 | sed 's/.*=//'`;shift ;;
+    -p* | --prefix) prefix="$2"; shift; shift ;;
+    -e* | --eprefix) exec_prefix="$2"; shift; shift ;;
+    -l* | --libdir) libdir="$2"; shift; shift ;;
+    -i* | --includedir) includedir="$2"; shift; shift ;;
+    -s* | --shared | --enable-shared) shared=1; shift ;;
+    -t | --static) shared=0; shift ;;
+    --solo) solo=1; shift ;;
+    --cover) cover=1; shift ;;
+    -z* | --zprefix) zprefix=1; shift ;;
+    -6* | --64) build64=1; shift ;;
+    -a*=* | --archs=*) ARCHS=`echo $1 | sed 's/.*=//'`; shift ;;
+    --sysconfdir=*) echo "ignored option: --sysconfdir" | tee -a configure.log; shift ;;
+    --localstatedir=*) echo "ignored option: --localstatedir" | tee -a configure.log; shift ;;
+    -c* | --const) zconst=1; shift ;;
+    -w* | --warn) warn=1; shift ;;
+    -d* | --debug) debug=1; shift ;;
+    *)
+      echo "unknown option: $1" | tee -a configure.log
+      echo "$0 --help for help" | tee -a configure.log
+      leave 1;;
+    esac
+done
+
+# temporary file name
+test=ztest$$
+
+# put arguments in log, also put test file in log if used in arguments
+show()
+{
+  case "$*" in
+    *$test.c*)
+      echo === $test.c === >> configure.log
+      cat $test.c >> configure.log
+      echo === >> configure.log;;
+  esac
+  echo $* >> configure.log
+}
+
+# check for gcc vs. cc and set compile and link flags based on the system identified by uname
+cat > $test.c <<EOF
+extern int getchar();
+int hello() {return getchar();}
+EOF
+
+test -z "$CC" && echo Checking for ${CROSS_PREFIX}gcc... | tee -a configure.log
+cc=${CC-${CROSS_PREFIX}gcc}
+cflags=${CFLAGS-"-O3"}
+# to force the asm version use: CFLAGS="-O3 -DASMV" ./configure
+case "$cc" in
+  *gcc*) gcc=1 ;;
+  *clang*) gcc=1 ;;
+esac
+case `$cc -v 2>&1` in
+  *gcc*) gcc=1 ;;
+  *clang*) gcc=1 ;;
+esac
+
+show $cc -c $test.c
+if test "$gcc" -eq 1 && ($cc -c $test.c) >> configure.log 2>&1; then
+  echo ... using gcc >> configure.log
+  CC="$cc"
+  CFLAGS="${CFLAGS--O3}"
+  SFLAGS="${CFLAGS--O3} -fPIC"
+  if test "$ARCHS"; then
+    CFLAGS="${CFLAGS} ${ARCHS}"
+    LDFLAGS="${LDFLAGS} ${ARCHS}"
+  fi
+  if test $build64 -eq 1; then
+    CFLAGS="${CFLAGS} -m64"
+    SFLAGS="${SFLAGS} -m64"
+  fi
+  if test "$warn" -eq 1; then
+    if test "$zconst" -eq 1; then
+      CFLAGS="${CFLAGS} -Wall -Wextra -Wcast-qual -pedantic -DZLIB_CONST"
+    else
+      CFLAGS="${CFLAGS} -Wall -Wextra -pedantic"
+    fi
+  fi
+  if test $debug -eq 1; then
+    CFLAGS="${CFLAGS} -DZLIB_DEBUG"
+    SFLAGS="${SFLAGS} -DZLIB_DEBUG"
+  fi
+  if test -z "$uname"; then
+    uname=`(uname -s || echo unknown) 2>/dev/null`
+  fi
+  case "$uname" in
+  Linux* | linux* | GNU | GNU/* | solaris*)
+        LDSHARED=${LDSHARED-"$cc -shared -Wl,-soname,libz.so.1,--version-script,${SRCDIR}zlib.map"} ;;
+  *BSD | *bsd* | DragonFly)
+        LDSHARED=${LDSHARED-"$cc -shared -Wl,-soname,libz.so.1,--version-script,${SRCDIR}zlib.map"}
+        LDCONFIG="ldconfig -m" ;;
+  CYGWIN* | Cygwin* | cygwin* | OS/2*)
+        EXE='.exe' ;;
+  MINGW* | mingw*)
+# temporary bypass
+        rm -f $test.[co] $test $test$shared_ext
+        echo "Please use win32/Makefile.gcc instead." | tee -a configure.log
+        leave 1
+        LDSHARED=${LDSHARED-"$cc -shared"}
+        LDSHAREDLIBC=""
+        EXE='.exe' ;;
+  QNX*)  # This is for QNX6. I suppose that the QNX rule below is for QNX2,QNX4
+         # (alain.bonnefoy@icbt.com)
+                 LDSHARED=${LDSHARED-"$cc -shared -Wl,-hlibz.so.1"} ;;
+  HP-UX*)
+         LDSHARED=${LDSHARED-"$cc -shared $SFLAGS"}
+         case `(uname -m || echo unknown) 2>/dev/null` in
+         ia64)
+                 shared_ext='.so'
+                 SHAREDLIB='libz.so' ;;
+         *)
+                 shared_ext='.sl'
+                 SHAREDLIB='libz.sl' ;;
+         esac ;;
+  Darwin* | darwin*)
+             shared_ext='.dylib'
+             SHAREDLIB=libz$shared_ext
+             SHAREDLIBV=libz.$VER$shared_ext
+             SHAREDLIBM=libz.$VER1$shared_ext
+             LDSHARED=${LDSHARED-"$cc -dynamiclib -install_name $libdir/$SHAREDLIBM -compatibility_version $VER1 -current_version $VER3"}
+             if libtool -V 2>&1 | grep Apple > /dev/null; then
+                 AR="libtool"
+             else
+                 AR="/usr/bin/libtool"
+             fi
+             ARFLAGS="-o" ;;
+  *)             LDSHARED=${LDSHARED-"$cc -shared"} ;;
+  esac
+else
+  # find system name and corresponding cc options
+  CC=${CC-cc}
+  gcc=0
+  echo ... using $CC >> configure.log
+  if test -z "$uname"; then
+    uname=`(uname -sr || echo unknown) 2>/dev/null`
+  fi
+  case "$uname" in
+  HP-UX*)    SFLAGS=${CFLAGS-"-O +z"}
+             CFLAGS=${CFLAGS-"-O"}
+#            LDSHARED=${LDSHARED-"ld -b +vnocompatwarnings"}
+             LDSHARED=${LDSHARED-"ld -b"}
+         case `(uname -m || echo unknown) 2>/dev/null` in
+         ia64)
+             shared_ext='.so'
+             SHAREDLIB='libz.so' ;;
+         *)
+             shared_ext='.sl'
+             SHAREDLIB='libz.sl' ;;
+         esac ;;
+  IRIX*)     SFLAGS=${CFLAGS-"-ansi -O2 -rpath ."}
+             CFLAGS=${CFLAGS-"-ansi -O2"}
+             LDSHARED=${LDSHARED-"cc -shared -Wl,-soname,libz.so.1"} ;;
+  OSF1\ V4*) SFLAGS=${CFLAGS-"-O -std1"}
+             CFLAGS=${CFLAGS-"-O -std1"}
+             LDFLAGS="${LDFLAGS} -Wl,-rpath,."
+             LDSHARED=${LDSHARED-"cc -shared  -Wl,-soname,libz.so -Wl,-msym -Wl,-rpath,$(libdir) -Wl,-set_version,${VER}:1.0"} ;;
+  OSF1*)     SFLAGS=${CFLAGS-"-O -std1"}
+             CFLAGS=${CFLAGS-"-O -std1"}
+             LDSHARED=${LDSHARED-"cc -shared -Wl,-soname,libz.so.1"} ;;
+  QNX*)      SFLAGS=${CFLAGS-"-4 -O"}
+             CFLAGS=${CFLAGS-"-4 -O"}
+             LDSHARED=${LDSHARED-"cc"}
+             RANLIB=${RANLIB-"true"}
+             AR="cc"
+             ARFLAGS="-A" ;;
+  SCO_SV\ 3.2*) SFLAGS=${CFLAGS-"-O3 -dy -KPIC "}
+             CFLAGS=${CFLAGS-"-O3"}
+             LDSHARED=${LDSHARED-"cc -dy -KPIC -G"} ;;
+  SunOS\ 5* | solaris*)
+         LDSHARED=${LDSHARED-"cc -G -h libz$shared_ext.$VER1"}
+         SFLAGS=${CFLAGS-"-fast -KPIC"}
+         CFLAGS=${CFLAGS-"-fast"}
+         if test $build64 -eq 1; then
+             # old versions of SunPRO/Workshop/Studio don't support -m64,
+             # but newer ones do.  Check for it.
+             flag64=`$CC -flags | egrep -- '^-m64'`
+             if test x"$flag64" != x"" ; then
+                 CFLAGS="${CFLAGS} -m64"
+                 SFLAGS="${SFLAGS} -m64"
+             else
+                 case `(uname -m || echo unknown) 2>/dev/null` in
+                   i86*)
+                     SFLAGS="$SFLAGS -xarch=amd64"
+                     CFLAGS="$CFLAGS -xarch=amd64" ;;
+                   *)
+                     SFLAGS="$SFLAGS -xarch=v9"
+                     CFLAGS="$CFLAGS -xarch=v9" ;;
+                 esac
+             fi
+         fi
+         if test -n "$ZINC"; then
+             ZINC='-I- -I. -I$(SRCDIR)'
+         fi
+         ;;
+  SunOS\ 4*) SFLAGS=${CFLAGS-"-O2 -PIC"}
+             CFLAGS=${CFLAGS-"-O2"}
+             LDSHARED=${LDSHARED-"ld"} ;;
+  SunStudio\ 9*) SFLAGS=${CFLAGS-"-fast -xcode=pic32 -xtarget=ultra3 -xarch=v9b"}
+             CFLAGS=${CFLAGS-"-fast -xtarget=ultra3 -xarch=v9b"}
+             LDSHARED=${LDSHARED-"cc -xarch=v9b"} ;;
+  UNIX_System_V\ 4.2.0)
+             SFLAGS=${CFLAGS-"-KPIC -O"}
+             CFLAGS=${CFLAGS-"-O"}
+             LDSHARED=${LDSHARED-"cc -G"} ;;
+  UNIX_SV\ 4.2MP)
+             SFLAGS=${CFLAGS-"-Kconform_pic -O"}
+             CFLAGS=${CFLAGS-"-O"}
+             LDSHARED=${LDSHARED-"cc -G"} ;;
+  OpenUNIX\ 5)
+             SFLAGS=${CFLAGS-"-KPIC -O"}
+             CFLAGS=${CFLAGS-"-O"}
+             LDSHARED=${LDSHARED-"cc -G"} ;;
+  AIX*)  # Courtesy of dbakker@arrayasolutions.com
+             SFLAGS=${CFLAGS-"-O -qmaxmem=8192"}
+             CFLAGS=${CFLAGS-"-O -qmaxmem=8192"}
+             LDSHARED=${LDSHARED-"xlc -G"} ;;
+  # send working options for other systems to zlib@gzip.org
+  *)         SFLAGS=${CFLAGS-"-O"}
+             CFLAGS=${CFLAGS-"-O"}
+             LDSHARED=${LDSHARED-"cc -shared"} ;;
+  esac
+fi
+
+# destination names for shared library if not defined above
+SHAREDLIB=${SHAREDLIB-"libz$shared_ext"}
+SHAREDLIBV=${SHAREDLIBV-"libz$shared_ext.$VER"}
+SHAREDLIBM=${SHAREDLIBM-"libz$shared_ext.$VER1"}
+
+echo >> configure.log
+
+# define functions for testing compiler and library characteristics and logging the results
+
+cat > $test.c <<EOF
+#error error
+EOF
+if ($CC -c $CFLAGS $test.c) 2>/dev/null; then
+  try()
+  {
+    show $*
+    test "`( $* ) 2>&1 | tee -a configure.log`" = ""
+  }
+  echo - using any output from compiler to indicate an error >> configure.log
+else
+  try()
+  {
+    show $*
+    ( $* ) >> configure.log 2>&1
+    ret=$?
+    if test $ret -ne 0; then
+      echo "(exit code "$ret")" >> configure.log
+    fi
+    return $ret
+  }
+fi
+
+tryboth()
+{
+  show $*
+  got=`( $* ) 2>&1`
+  ret=$?
+  printf %s "$got" >> configure.log
+  if test $ret -ne 0; then
+    return $ret
+  fi
+  test "$got" = ""
+}
+
+cat > $test.c << EOF
+int foo() { return 0; }
+EOF
+echo "Checking for obsessive-compulsive compiler options..." >> configure.log
+if try $CC -c $CFLAGS $test.c; then
+  :
+else
+  echo "Compiler error reporting is too harsh for $0 (perhaps remove -Werror)." | tee -a configure.log
+  leave 1
+fi
+
+echo >> configure.log
+
+# see if shared library build supported
+cat > $test.c <<EOF
+extern int getchar();
+int hello() {return getchar();}
+EOF
+if test $shared -eq 1; then
+  echo Checking for shared library support... | tee -a configure.log
+  # we must test in two steps (cc then ld), required at least on SunOS 4.x
+  if try $CC -w -c $SFLAGS $test.c &&
+     try $LDSHARED $SFLAGS -o $test$shared_ext $test.o; then
+    echo Building shared library $SHAREDLIBV with $CC. | tee -a configure.log
+  elif test -z "$old_cc" -a -z "$old_cflags"; then
+    echo No shared library support. | tee -a configure.log
+    shared=0;
+  else
+    echo 'No shared library support; try without defining CC and CFLAGS' | tee -a configure.log
+    shared=0;
+  fi
+fi
+if test $shared -eq 0; then
+  LDSHARED="$CC"
+  ALL="static"
+  TEST="all teststatic"
+  SHAREDLIB=""
+  SHAREDLIBV=""
+  SHAREDLIBM=""
+  echo Building static library $STATICLIB version $VER with $CC. | tee -a configure.log
+else
+  ALL="static shared"
+  TEST="all teststatic testshared"
+fi
+
+# check for underscores in external names for use by assembler code
+CPP=${CPP-"$CC -E"}
+case $CFLAGS in
+  *ASMV*)
+    echo >> configure.log
+    show "$NM $test.o | grep _hello"
+    if test "`$NM $test.o | grep _hello | tee -a configure.log`" = ""; then
+      CPP="$CPP -DNO_UNDERLINE"
+      echo Checking for underline in external names... No. | tee -a configure.log
+    else
+      echo Checking for underline in external names... Yes. | tee -a configure.log
+    fi ;;
+esac
+
+echo >> configure.log
+
+# check for size_t
+cat > $test.c <<EOF
+#include <stdio.h>
+#include <stdlib.h>
+size_t dummy = 0;
+EOF
+if try $CC -c $CFLAGS $test.c; then
+  echo "Checking for size_t... Yes." | tee -a configure.log
+  need_sizet=0
+else
+  echo "Checking for size_t... No." | tee -a configure.log
+  need_sizet=1
+fi
+
+echo >> configure.log
+
+# find the size_t integer type, if needed
+if test $need_sizet -eq 1; then
+  cat > $test.c <<EOF
+long long dummy = 0;
+EOF
+  if try $CC -c $CFLAGS $test.c; then
+    echo "Checking for long long... Yes." | tee -a configure.log
+    cat > $test.c <<EOF
+#include <stdio.h>
+int main(void) {
+    if (sizeof(void *) <= sizeof(int)) puts("int");
+    else if (sizeof(void *) <= sizeof(long)) puts("long");
+    else puts("z_longlong");
+    return 0;
+}
+EOF
+  else
+    echo "Checking for long long... No." | tee -a configure.log
+    cat > $test.c <<EOF
+#include <stdio.h>
+int main(void) {
+    if (sizeof(void *) <= sizeof(int)) puts("int");
+    else puts("long");
+    return 0;
+}
+EOF
+  fi
+  if try $CC $CFLAGS -o $test $test.c; then
+    sizet=`./$test`
+    echo "Checking for a pointer-size integer type..." $sizet"." | tee -a configure.log
+  else
+    echo "Failed to find a pointer-size integer type." | tee -a configure.log
+    leave 1
+  fi
+fi
+
+if test $need_sizet -eq 1; then
+  CFLAGS="${CFLAGS} -DNO_SIZE_T=${sizet}"
+  SFLAGS="${SFLAGS} -DNO_SIZE_T=${sizet}"
+fi
+
+echo >> configure.log
+
+# check for large file support, and if none, check for fseeko()
+cat > $test.c <<EOF
+#include <sys/types.h>
+off64_t dummy = 0;
+EOF
+if try $CC -c $CFLAGS -D_LARGEFILE64_SOURCE=1 $test.c; then
+  CFLAGS="${CFLAGS} -D_LARGEFILE64_SOURCE=1"
+  SFLAGS="${SFLAGS} -D_LARGEFILE64_SOURCE=1"
+  ALL="${ALL} all64"
+  TEST="${TEST} test64"
+  echo "Checking for off64_t... Yes." | tee -a configure.log
+  echo "Checking for fseeko... Yes." | tee -a configure.log
+else
+  echo "Checking for off64_t... No." | tee -a configure.log
+  echo >> configure.log
+  cat > $test.c <<EOF
+#include <stdio.h>
+int main(void) {
+  fseeko(NULL, 0, 0);
+  return 0;
+}
+EOF
+  if try $CC $CFLAGS -o $test $test.c; then
+    echo "Checking for fseeko... Yes." | tee -a configure.log
+  else
+    CFLAGS="${CFLAGS} -DNO_FSEEKO"
+    SFLAGS="${SFLAGS} -DNO_FSEEKO"
+    echo "Checking for fseeko... No." | tee -a configure.log
+  fi
+fi
+
+echo >> configure.log
+
+# check for strerror() for use by gz* functions
+cat > $test.c <<EOF
+#include <string.h>
+#include <errno.h>
+int main() { return strlen(strerror(errno)); }
+EOF
+if try $CC $CFLAGS -o $test $test.c; then
+  echo "Checking for strerror... Yes." | tee -a configure.log
+else
+  CFLAGS="${CFLAGS} -DNO_STRERROR"
+  SFLAGS="${SFLAGS} -DNO_STRERROR"
+  echo "Checking for strerror... No." | tee -a configure.log
+fi
+
+# copy clean zconf.h for subsequent edits
+cp -p ${SRCDIR}zconf.h.in zconf.h
+
+echo >> configure.log
+
+# check for unistd.h and save result in zconf.h
+cat > $test.c <<EOF
+#include <unistd.h>
+int main() { return 0; }
+EOF
+if try $CC -c $CFLAGS $test.c; then
+  sed < zconf.h "/^#ifdef HAVE_UNISTD_H.* may be/s/def HAVE_UNISTD_H\(.*\) may be/ 1\1 was/" > zconf.temp.h
+  mv zconf.temp.h zconf.h
+  echo "Checking for unistd.h... Yes." | tee -a configure.log
+else
+  echo "Checking for unistd.h... No." | tee -a configure.log
+fi
+
+echo >> configure.log
+
+# check for stdarg.h and save result in zconf.h
+cat > $test.c <<EOF
+#include <stdarg.h>
+int main() { return 0; }
+EOF
+if try $CC -c $CFLAGS $test.c; then
+  sed < zconf.h "/^#ifdef HAVE_STDARG_H.* may be/s/def HAVE_STDARG_H\(.*\) may be/ 1\1 was/" > zconf.temp.h
+  mv zconf.temp.h zconf.h
+  echo "Checking for stdarg.h... Yes." | tee -a configure.log
+else
+  echo "Checking for stdarg.h... No." | tee -a configure.log
+fi
+
+# if the z_ prefix was requested, save that in zconf.h
+if test $zprefix -eq 1; then
+  sed < zconf.h "/#ifdef Z_PREFIX.* may be/s/def Z_PREFIX\(.*\) may be/ 1\1 was/" > zconf.temp.h
+  mv zconf.temp.h zconf.h
+  echo >> configure.log
+  echo "Using z_ prefix on all symbols." | tee -a configure.log
+fi
+
+# if --solo compilation was requested, save that in zconf.h and remove gz stuff from object lists
+if test $solo -eq 1; then
+  sed '/#define ZCONF_H/a\
+#define Z_SOLO
+
+' < zconf.h > zconf.temp.h
+  mv zconf.temp.h zconf.h
+OBJC='$(OBJZ)'
+PIC_OBJC='$(PIC_OBJZ)'
+fi
+
+# if code coverage testing was requested, use older gcc if defined, e.g. "gcc-4.2" on Mac OS X
+if test $cover -eq 1; then
+  CFLAGS="${CFLAGS} -fprofile-arcs -ftest-coverage"
+  if test -n "$GCC_CLASSIC"; then
+    CC=$GCC_CLASSIC
+  fi
+fi
+
+echo >> configure.log
+
+# conduct a series of tests to resolve eight possible cases of using "vs" or "s" printf functions
+# (using stdarg or not), with or without "n" (proving size of buffer), and with or without a
+# return value.  The most secure result is vsnprintf() with a return value.  snprintf() with a
+# return value is secure as well, but then gzprintf() will be limited to 20 arguments.
+cat > $test.c <<EOF
+#include <stdio.h>
+#include <stdarg.h>
+#include "zconf.h"
+int main()
+{
+#ifndef STDC
+  choke me
+#endif
+  return 0;
+}
+EOF
+if try $CC -c $CFLAGS $test.c; then
+  echo "Checking whether to use vs[n]printf() or s[n]printf()... using vs[n]printf()." | tee -a configure.log
+
+  echo >> configure.log
+  cat > $test.c <<EOF
+#include <stdio.h>
+#include <stdarg.h>
+int mytest(const char *fmt, ...)
+{
+  char buf[20];
+  va_list ap;
+  va_start(ap, fmt);
+  vsnprintf(buf, sizeof(buf), fmt, ap);
+  va_end(ap);
+  return 0;
+}
+int main()
+{
+  return (mytest("Hello%d\n", 1));
+}
+EOF
+  if try $CC $CFLAGS -o $test $test.c; then
+    echo "Checking for vsnprintf() in stdio.h... Yes." | tee -a configure.log
+
+    echo >> configure.log
+    cat >$test.c <<EOF
+#include <stdio.h>
+#include <stdarg.h>
+int mytest(const char *fmt, ...)
+{
+  int n;
+  char buf[20];
+  va_list ap;
+  va_start(ap, fmt);
+  n = vsnprintf(buf, sizeof(buf), fmt, ap);
+  va_end(ap);
+  return n;
+}
+int main()
+{
+  return (mytest("Hello%d\n", 1));
+}
+EOF
+
+    if try $CC -c $CFLAGS $test.c; then
+      echo "Checking for return value of vsnprintf()... Yes." | tee -a configure.log
+    else
+      CFLAGS="$CFLAGS -DHAS_vsnprintf_void"
+      SFLAGS="$SFLAGS -DHAS_vsnprintf_void"
+      echo "Checking for return value of vsnprintf()... No." | tee -a configure.log
+      echo "  WARNING: apparently vsnprintf() does not return a value. zlib" | tee -a configure.log
+      echo "  can build but will be open to possible string-format security" | tee -a configure.log
+      echo "  vulnerabilities." | tee -a configure.log
+    fi
+  else
+    CFLAGS="$CFLAGS -DNO_vsnprintf"
+    SFLAGS="$SFLAGS -DNO_vsnprintf"
+    echo "Checking for vsnprintf() in stdio.h... No." | tee -a configure.log
+    echo "  WARNING: vsnprintf() not found, falling back to vsprintf(). zlib" | tee -a configure.log
+    echo "  can build but will be open to possible buffer-overflow security" | tee -a configure.log
+    echo "  vulnerabilities." | tee -a configure.log
+
+    echo >> configure.log
+    cat >$test.c <<EOF
+#include <stdio.h>
+#include <stdarg.h>
+int mytest(const char *fmt, ...)
+{
+  int n;
+  char buf[20];
+  va_list ap;
+  va_start(ap, fmt);
+  n = vsprintf(buf, fmt, ap);
+  va_end(ap);
+  return n;
+}
+int main()
+{
+  return (mytest("Hello%d\n", 1));
+}
+EOF
+
+    if try $CC -c $CFLAGS $test.c; then
+      echo "Checking for return value of vsprintf()... Yes." | tee -a configure.log
+    else
+      CFLAGS="$CFLAGS -DHAS_vsprintf_void"
+      SFLAGS="$SFLAGS -DHAS_vsprintf_void"
+      echo "Checking for return value of vsprintf()... No." | tee -a configure.log
+      echo "  WARNING: apparently vsprintf() does not return a value. zlib" | tee -a configure.log
+      echo "  can build but will be open to possible string-format security" | tee -a configure.log
+      echo "  vulnerabilities." | tee -a configure.log
+    fi
+  fi
+else
+  echo "Checking whether to use vs[n]printf() or s[n]printf()... using s[n]printf()." | tee -a configure.log
+
+  echo >> configure.log
+  cat >$test.c <<EOF
+#include <stdio.h>
+int mytest()
+{
+  char buf[20];
+  snprintf(buf, sizeof(buf), "%s", "foo");
+  return 0;
+}
+int main()
+{
+  return (mytest());
+}
+EOF
+
+  if try $CC $CFLAGS -o $test $test.c; then
+    echo "Checking for snprintf() in stdio.h... Yes." | tee -a configure.log
+
+    echo >> configure.log
+    cat >$test.c <<EOF
+#include <stdio.h>
+int mytest()
+{
+  char buf[20];
+  return snprintf(buf, sizeof(buf), "%s", "foo");
+}
+int main()
+{
+  return (mytest());
+}
+EOF
+
+    if try $CC -c $CFLAGS $test.c; then
+      echo "Checking for return value of snprintf()... Yes." | tee -a configure.log
+    else
+      CFLAGS="$CFLAGS -DHAS_snprintf_void"
+      SFLAGS="$SFLAGS -DHAS_snprintf_void"
+      echo "Checking for return value of snprintf()... No." | tee -a configure.log
+      echo "  WARNING: apparently snprintf() does not return a value. zlib" | tee -a configure.log
+      echo "  can build but will be open to possible string-format security" | tee -a configure.log
+      echo "  vulnerabilities." | tee -a configure.log
+    fi
+  else
+    CFLAGS="$CFLAGS -DNO_snprintf"
+    SFLAGS="$SFLAGS -DNO_snprintf"
+    echo "Checking for snprintf() in stdio.h... No." | tee -a configure.log
+    echo "  WARNING: snprintf() not found, falling back to sprintf(). zlib" | tee -a configure.log
+    echo "  can build but will be open to possible buffer-overflow security" | tee -a configure.log
+    echo "  vulnerabilities." | tee -a configure.log
+
+    echo >> configure.log
+    cat >$test.c <<EOF
+#include <stdio.h>
+int mytest()
+{
+  char buf[20];
+  return sprintf(buf, "%s", "foo");
+}
+int main()
+{
+  return (mytest());
+}
+EOF
+
+    if try $CC -c $CFLAGS $test.c; then
+      echo "Checking for return value of sprintf()... Yes." | tee -a configure.log
+    else
+      CFLAGS="$CFLAGS -DHAS_sprintf_void"
+      SFLAGS="$SFLAGS -DHAS_sprintf_void"
+      echo "Checking for return value of sprintf()... No." | tee -a configure.log
+      echo "  WARNING: apparently sprintf() does not return a value. zlib" | tee -a configure.log
+      echo "  can build but will be open to possible string-format security" | tee -a configure.log
+      echo "  vulnerabilities." | tee -a configure.log
+    fi
+  fi
+fi
+
+# see if we can hide zlib internal symbols that are linked between separate source files
+if test "$gcc" -eq 1; then
+  echo >> configure.log
+  cat > $test.c <<EOF
+#define ZLIB_INTERNAL __attribute__((visibility ("hidden")))
+int ZLIB_INTERNAL foo;
+int main()
+{
+  return 0;
+}
+EOF
+  if tryboth $CC -c $CFLAGS $test.c; then
+    CFLAGS="$CFLAGS -DHAVE_HIDDEN"
+    SFLAGS="$SFLAGS -DHAVE_HIDDEN"
+    echo "Checking for attribute(visibility) support... Yes." | tee -a configure.log
+  else
+    echo "Checking for attribute(visibility) support... No." | tee -a configure.log
+  fi
+fi
+
+# show the results in the log
+echo >> configure.log
+echo ALL = $ALL >> configure.log
+echo AR = $AR >> configure.log
+echo ARFLAGS = $ARFLAGS >> configure.log
+echo CC = $CC >> configure.log
+echo CFLAGS = $CFLAGS >> configure.log
+echo CPP = $CPP >> configure.log
+echo EXE = $EXE >> configure.log
+echo LDCONFIG = $LDCONFIG >> configure.log
+echo LDFLAGS = $LDFLAGS >> configure.log
+echo LDSHARED = $LDSHARED >> configure.log
+echo LDSHAREDLIBC = $LDSHAREDLIBC >> configure.log
+echo OBJC = $OBJC >> configure.log
+echo PIC_OBJC = $PIC_OBJC >> configure.log
+echo RANLIB = $RANLIB >> configure.log
+echo SFLAGS = $SFLAGS >> configure.log
+echo SHAREDLIB = $SHAREDLIB >> configure.log
+echo SHAREDLIBM = $SHAREDLIBM >> configure.log
+echo SHAREDLIBV = $SHAREDLIBV >> configure.log
+echo STATICLIB = $STATICLIB >> configure.log
+echo TEST = $TEST >> configure.log
+echo VER = $VER >> configure.log
+echo Z_U4 = $Z_U4 >> configure.log
+echo SRCDIR = $SRCDIR >> configure.log
+echo exec_prefix = $exec_prefix >> configure.log
+echo includedir = $includedir >> configure.log
+echo libdir = $libdir >> configure.log
+echo mandir = $mandir >> configure.log
+echo prefix = $prefix >> configure.log
+echo sharedlibdir = $sharedlibdir >> configure.log
+echo uname = $uname >> configure.log
+
+# udpate Makefile with the configure results
+sed < ${SRCDIR}Makefile.in "
+/^CC *=/s#=.*#=$CC#
+/^CFLAGS *=/s#=.*#=$CFLAGS#
+/^SFLAGS *=/s#=.*#=$SFLAGS#
+/^LDFLAGS *=/s#=.*#=$LDFLAGS#
+/^LDSHARED *=/s#=.*#=$LDSHARED#
+/^CPP *=/s#=.*#=$CPP#
+/^STATICLIB *=/s#=.*#=$STATICLIB#
+/^SHAREDLIB *=/s#=.*#=$SHAREDLIB#
+/^SHAREDLIBV *=/s#=.*#=$SHAREDLIBV#
+/^SHAREDLIBM *=/s#=.*#=$SHAREDLIBM#
+/^AR *=/s#=.*#=$AR#
+/^ARFLAGS *=/s#=.*#=$ARFLAGS#
+/^RANLIB *=/s#=.*#=$RANLIB#
+/^LDCONFIG *=/s#=.*#=$LDCONFIG#
+/^LDSHAREDLIBC *=/s#=.*#=$LDSHAREDLIBC#
+/^EXE *=/s#=.*#=$EXE#
+/^SRCDIR *=/s#=.*#=$SRCDIR#
+/^ZINC *=/s#=.*#=$ZINC#
+/^ZINCOUT *=/s#=.*#=$ZINCOUT#
+/^prefix *=/s#=.*#=$prefix#
+/^exec_prefix *=/s#=.*#=$exec_prefix#
+/^libdir *=/s#=.*#=$libdir#
+/^sharedlibdir *=/s#=.*#=$sharedlibdir#
+/^includedir *=/s#=.*#=$includedir#
+/^mandir *=/s#=.*#=$mandir#
+/^OBJC *=/s#=.*#= $OBJC#
+/^PIC_OBJC *=/s#=.*#= $PIC_OBJC#
+/^all: */s#:.*#: $ALL#
+/^test: */s#:.*#: $TEST#
+" > Makefile
+
+# create zlib.pc with the configure results
+sed < ${SRCDIR}zlib.pc.in "
+/^CC *=/s#=.*#=$CC#
+/^CFLAGS *=/s#=.*#=$CFLAGS#
+/^CPP *=/s#=.*#=$CPP#
+/^LDSHARED *=/s#=.*#=$LDSHARED#
+/^STATICLIB *=/s#=.*#=$STATICLIB#
+/^SHAREDLIB *=/s#=.*#=$SHAREDLIB#
+/^SHAREDLIBV *=/s#=.*#=$SHAREDLIBV#
+/^SHAREDLIBM *=/s#=.*#=$SHAREDLIBM#
+/^AR *=/s#=.*#=$AR#
+/^ARFLAGS *=/s#=.*#=$ARFLAGS#
+/^RANLIB *=/s#=.*#=$RANLIB#
+/^EXE *=/s#=.*#=$EXE#
+/^prefix *=/s#=.*#=$prefix#
+/^exec_prefix *=/s#=.*#=$exec_prefix#
+/^libdir *=/s#=.*#=$libdir#
+/^sharedlibdir *=/s#=.*#=$sharedlibdir#
+/^includedir *=/s#=.*#=$includedir#
+/^mandir *=/s#=.*#=$mandir#
+/^LDFLAGS *=/s#=.*#=$LDFLAGS#
+" | sed -e "
+s/\@VERSION\@/$VER/g;
+" > zlib.pc
+
+# done
+leave 0
diff --git a/deps/libchdr/deps/zlib-1.2.11/crc32.c b/deps/libchdr/deps/zlib-1.2.11/crc32.c
new file mode 100644
index 00000000..9580440c
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/crc32.c
@@ -0,0 +1,442 @@
+/* crc32.c -- compute the CRC-32 of a data stream
+ * Copyright (C) 1995-2006, 2010, 2011, 2012, 2016 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ *
+ * Thanks to Rodney Brown <rbrown64@csc.com.au> for his contribution of faster
+ * CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing
+ * tables for updating the shift register in one step with three exclusive-ors
+ * instead of four steps with four exclusive-ors.  This results in about a
+ * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3.
+ */
+
+/* @(#) $Id$ */
+
+/*
+  Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
+  protection on the static variables used to control the first-use generation
+  of the crc tables.  Therefore, if you #define DYNAMIC_CRC_TABLE, you should
+  first call get_crc_table() to initialize the tables before allowing more than
+  one thread to use crc32().
+
+  DYNAMIC_CRC_TABLE and MAKECRCH can be #defined to write out crc32.h.
+ */
+
+#ifdef MAKECRCH
+#  include <stdio.h>
+#  ifndef DYNAMIC_CRC_TABLE
+#    define DYNAMIC_CRC_TABLE
+#  endif /* !DYNAMIC_CRC_TABLE */
+#endif /* MAKECRCH */
+
+#include "zutil.h"      /* for STDC and FAR definitions */
+
+/* Definitions for doing the crc four data bytes at a time. */
+#if !defined(NOBYFOUR) && defined(Z_U4)
+#  define BYFOUR
+#endif
+#ifdef BYFOUR
+   local unsigned long crc32_little OF((unsigned long,
+                        const unsigned char FAR *, z_size_t));
+   local unsigned long crc32_big OF((unsigned long,
+                        const unsigned char FAR *, z_size_t));
+#  define TBLS 8
+#else
+#  define TBLS 1
+#endif /* BYFOUR */
+
+/* Local functions for crc concatenation */
+local unsigned long gf2_matrix_times OF((unsigned long *mat,
+                                         unsigned long vec));
+local void gf2_matrix_square OF((unsigned long *square, unsigned long *mat));
+local uLong crc32_combine_ OF((uLong crc1, uLong crc2, z_off64_t len2));
+
+
+#ifdef DYNAMIC_CRC_TABLE
+
+local volatile int crc_table_empty = 1;
+local z_crc_t FAR crc_table[TBLS][256];
+local void make_crc_table OF((void));
+#ifdef MAKECRCH
+   local void write_table OF((FILE *, const z_crc_t FAR *));
+#endif /* MAKECRCH */
+/*
+  Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
+  x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
+
+  Polynomials over GF(2) are represented in binary, one bit per coefficient,
+  with the lowest powers in the most significant bit.  Then adding polynomials
+  is just exclusive-or, and multiplying a polynomial by x is a right shift by
+  one.  If we call the above polynomial p, and represent a byte as the
+  polynomial q, also with the lowest power in the most significant bit (so the
+  byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p,
+  where a mod b means the remainder after dividing a by b.
+
+  This calculation is done using the shift-register method of multiplying and
+  taking the remainder.  The register is initialized to zero, and for each
+  incoming bit, x^32 is added mod p to the register if the bit is a one (where
+  x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by
+  x (which is shifting right by one and adding x^32 mod p if the bit shifted
+  out is a one).  We start with the highest power (least significant bit) of
+  q and repeat for all eight bits of q.
+
+  The first table is simply the CRC of all possible eight bit values.  This is
+  all the information needed to generate CRCs on data a byte at a time for all
+  combinations of CRC register values and incoming bytes.  The remaining tables
+  allow for word-at-a-time CRC calculation for both big-endian and little-
+  endian machines, where a word is four bytes.
+*/
+local void make_crc_table()
+{
+    z_crc_t c;
+    int n, k;
+    z_crc_t poly;                       /* polynomial exclusive-or pattern */
+    /* terms of polynomial defining this crc (except x^32): */
+    static volatile int first = 1;      /* flag to limit concurrent making */
+    static const unsigned char p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26};
+
+    /* See if another task is already doing this (not thread-safe, but better
+       than nothing -- significantly reduces duration of vulnerability in
+       case the advice about DYNAMIC_CRC_TABLE is ignored) */
+    if (first) {
+        first = 0;
+
+        /* make exclusive-or pattern from polynomial (0xedb88320UL) */
+        poly = 0;
+        for (n = 0; n < (int)(sizeof(p)/sizeof(unsigned char)); n++)
+            poly |= (z_crc_t)1 << (31 - p[n]);
+
+        /* generate a crc for every 8-bit value */
+        for (n = 0; n < 256; n++) {
+            c = (z_crc_t)n;
+            for (k = 0; k < 8; k++)
+                c = c & 1 ? poly ^ (c >> 1) : c >> 1;
+            crc_table[0][n] = c;
+        }
+
+#ifdef BYFOUR
+        /* generate crc for each value followed by one, two, and three zeros,
+           and then the byte reversal of those as well as the first table */
+        for (n = 0; n < 256; n++) {
+            c = crc_table[0][n];
+            crc_table[4][n] = ZSWAP32(c);
+            for (k = 1; k < 4; k++) {
+                c = crc_table[0][c & 0xff] ^ (c >> 8);
+                crc_table[k][n] = c;
+                crc_table[k + 4][n] = ZSWAP32(c);
+            }
+        }
+#endif /* BYFOUR */
+
+        crc_table_empty = 0;
+    }
+    else {      /* not first */
+        /* wait for the other guy to finish (not efficient, but rare) */
+        while (crc_table_empty)
+            ;
+    }
+
+#ifdef MAKECRCH
+    /* write out CRC tables to crc32.h */
+    {
+        FILE *out;
+
+        out = fopen("crc32.h", "w");
+        if (out == NULL) return;
+        fprintf(out, "/* crc32.h -- tables for rapid CRC calculation\n");
+        fprintf(out, " * Generated automatically by crc32.c\n */\n\n");
+        fprintf(out, "local const z_crc_t FAR ");
+        fprintf(out, "crc_table[TBLS][256] =\n{\n  {\n");
+        write_table(out, crc_table[0]);
+#  ifdef BYFOUR
+        fprintf(out, "#ifdef BYFOUR\n");
+        for (k = 1; k < 8; k++) {
+            fprintf(out, "  },\n  {\n");
+            write_table(out, crc_table[k]);
+        }
+        fprintf(out, "#endif\n");
+#  endif /* BYFOUR */
+        fprintf(out, "  }\n};\n");
+        fclose(out);
+    }
+#endif /* MAKECRCH */
+}
+
+#ifdef MAKECRCH
+local void write_table(out, table)
+    FILE *out;
+    const z_crc_t FAR *table;
+{
+    int n;
+
+    for (n = 0; n < 256; n++)
+        fprintf(out, "%s0x%08lxUL%s", n % 5 ? "" : "    ",
+                (unsigned long)(table[n]),
+                n == 255 ? "\n" : (n % 5 == 4 ? ",\n" : ", "));
+}
+#endif /* MAKECRCH */
+
+#else /* !DYNAMIC_CRC_TABLE */
+/* ========================================================================
+ * Tables of CRC-32s of all single-byte values, made by make_crc_table().
+ */
+#include "crc32.h"
+#endif /* DYNAMIC_CRC_TABLE */
+
+/* =========================================================================
+ * This function can be used by asm versions of crc32()
+ */
+const z_crc_t FAR * ZEXPORT get_crc_table()
+{
+#ifdef DYNAMIC_CRC_TABLE
+    if (crc_table_empty)
+        make_crc_table();
+#endif /* DYNAMIC_CRC_TABLE */
+    return (const z_crc_t FAR *)crc_table;
+}
+
+/* ========================================================================= */
+#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8)
+#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1
+
+/* ========================================================================= */
+unsigned long ZEXPORT crc32_z(crc, buf, len)
+    unsigned long crc;
+    const unsigned char FAR *buf;
+    z_size_t len;
+{
+    if (buf == Z_NULL) return 0UL;
+
+#ifdef DYNAMIC_CRC_TABLE
+    if (crc_table_empty)
+        make_crc_table();
+#endif /* DYNAMIC_CRC_TABLE */
+
+#ifdef BYFOUR
+    if (sizeof(void *) == sizeof(ptrdiff_t)) {
+        z_crc_t endian;
+
+        endian = 1;
+        if (*((unsigned char *)(&endian)))
+            return crc32_little(crc, buf, len);
+        else
+            return crc32_big(crc, buf, len);
+    }
+#endif /* BYFOUR */
+    crc = crc ^ 0xffffffffUL;
+    while (len >= 8) {
+        DO8;
+        len -= 8;
+    }
+    if (len) do {
+        DO1;
+    } while (--len);
+    return crc ^ 0xffffffffUL;
+}
+
+/* ========================================================================= */
+unsigned long ZEXPORT crc32(crc, buf, len)
+    unsigned long crc;
+    const unsigned char FAR *buf;
+    uInt len;
+{
+    return crc32_z(crc, buf, len);
+}
+
+#ifdef BYFOUR
+
+/*
+   This BYFOUR code accesses the passed unsigned char * buffer with a 32-bit
+   integer pointer type. This violates the strict aliasing rule, where a
+   compiler can assume, for optimization purposes, that two pointers to
+   fundamentally different types won't ever point to the same memory. This can
+   manifest as a problem only if one of the pointers is written to. This code
+   only reads from those pointers. So long as this code remains isolated in
+   this compilation unit, there won't be a problem. For this reason, this code
+   should not be copied and pasted into a compilation unit in which other code
+   writes to the buffer that is passed to these routines.
+ */
+
+/* ========================================================================= */
+#define DOLIT4 c ^= *buf4++; \
+        c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \
+            crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24]
+#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4
+
+/* ========================================================================= */
+local unsigned long crc32_little(crc, buf, len)
+    unsigned long crc;
+    const unsigned char FAR *buf;
+    z_size_t len;
+{
+    register z_crc_t c;
+    register const z_crc_t FAR *buf4;
+
+    c = (z_crc_t)crc;
+    c = ~c;
+    while (len && ((ptrdiff_t)buf & 3)) {
+        c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8);
+        len--;
+    }
+
+    buf4 = (const z_crc_t FAR *)(const void FAR *)buf;
+    while (len >= 32) {
+        DOLIT32;
+        len -= 32;
+    }
+    while (len >= 4) {
+        DOLIT4;
+        len -= 4;
+    }
+    buf = (const unsigned char FAR *)buf4;
+
+    if (len) do {
+        c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8);
+    } while (--len);
+    c = ~c;
+    return (unsigned long)c;
+}
+
+/* ========================================================================= */
+#define DOBIG4 c ^= *buf4++; \
+        c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \
+            crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24]
+#define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4
+
+/* ========================================================================= */
+local unsigned long crc32_big(crc, buf, len)
+    unsigned long crc;
+    const unsigned char FAR *buf;
+    z_size_t len;
+{
+    register z_crc_t c;
+    register const z_crc_t FAR *buf4;
+
+    c = ZSWAP32((z_crc_t)crc);
+    c = ~c;
+    while (len && ((ptrdiff_t)buf & 3)) {
+        c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8);
+        len--;
+    }
+
+    buf4 = (const z_crc_t FAR *)(const void FAR *)buf;
+    while (len >= 32) {
+        DOBIG32;
+        len -= 32;
+    }
+    while (len >= 4) {
+        DOBIG4;
+        len -= 4;
+    }
+    buf = (const unsigned char FAR *)buf4;
+
+    if (len) do {
+        c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8);
+    } while (--len);
+    c = ~c;
+    return (unsigned long)(ZSWAP32(c));
+}
+
+#endif /* BYFOUR */
+
+#define GF2_DIM 32      /* dimension of GF(2) vectors (length of CRC) */
+
+/* ========================================================================= */
+local unsigned long gf2_matrix_times(mat, vec)
+    unsigned long *mat;
+    unsigned long vec;
+{
+    unsigned long sum;
+
+    sum = 0;
+    while (vec) {
+        if (vec & 1)
+            sum ^= *mat;
+        vec >>= 1;
+        mat++;
+    }
+    return sum;
+}
+
+/* ========================================================================= */
+local void gf2_matrix_square(square, mat)
+    unsigned long *square;
+    unsigned long *mat;
+{
+    int n;
+
+    for (n = 0; n < GF2_DIM; n++)
+        square[n] = gf2_matrix_times(mat, mat[n]);
+}
+
+/* ========================================================================= */
+local uLong crc32_combine_(crc1, crc2, len2)
+    uLong crc1;
+    uLong crc2;
+    z_off64_t len2;
+{
+    int n;
+    unsigned long row;
+    unsigned long even[GF2_DIM];    /* even-power-of-two zeros operator */
+    unsigned long odd[GF2_DIM];     /* odd-power-of-two zeros operator */
+
+    /* degenerate case (also disallow negative lengths) */
+    if (len2 <= 0)
+        return crc1;
+
+    /* put operator for one zero bit in odd */
+    odd[0] = 0xedb88320UL;          /* CRC-32 polynomial */
+    row = 1;
+    for (n = 1; n < GF2_DIM; n++) {
+        odd[n] = row;
+        row <<= 1;
+    }
+
+    /* put operator for two zero bits in even */
+    gf2_matrix_square(even, odd);
+
+    /* put operator for four zero bits in odd */
+    gf2_matrix_square(odd, even);
+
+    /* apply len2 zeros to crc1 (first square will put the operator for one
+       zero byte, eight zero bits, in even) */
+    do {
+        /* apply zeros operator for this bit of len2 */
+        gf2_matrix_square(even, odd);
+        if (len2 & 1)
+            crc1 = gf2_matrix_times(even, crc1);
+        len2 >>= 1;
+
+        /* if no more bits set, then done */
+        if (len2 == 0)
+            break;
+
+        /* another iteration of the loop with odd and even swapped */
+        gf2_matrix_square(odd, even);
+        if (len2 & 1)
+            crc1 = gf2_matrix_times(odd, crc1);
+        len2 >>= 1;
+
+        /* if no more bits set, then done */
+    } while (len2 != 0);
+
+    /* return combined crc */
+    crc1 ^= crc2;
+    return crc1;
+}
+
+/* ========================================================================= */
+uLong ZEXPORT crc32_combine(crc1, crc2, len2)
+    uLong crc1;
+    uLong crc2;
+    z_off_t len2;
+{
+    return crc32_combine_(crc1, crc2, len2);
+}
+
+uLong ZEXPORT crc32_combine64(crc1, crc2, len2)
+    uLong crc1;
+    uLong crc2;
+    z_off64_t len2;
+{
+    return crc32_combine_(crc1, crc2, len2);
+}
diff --git a/deps/libchdr/deps/zlib-1.2.11/crc32.h b/deps/libchdr/deps/zlib-1.2.11/crc32.h
new file mode 100644
index 00000000..9e0c7781
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/crc32.h
@@ -0,0 +1,441 @@
+/* crc32.h -- tables for rapid CRC calculation
+ * Generated automatically by crc32.c
+ */
+
+local const z_crc_t FAR crc_table[TBLS][256] =
+{
+  {
+    0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
+    0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
+    0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
+    0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
+    0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
+    0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
+    0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
+    0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
+    0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
+    0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
+    0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
+    0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
+    0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
+    0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
+    0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
+    0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
+    0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
+    0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
+    0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
+    0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
+    0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
+    0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
+    0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
+    0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
+    0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
+    0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
+    0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
+    0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
+    0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
+    0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
+    0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
+    0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
+    0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
+    0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
+    0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
+    0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
+    0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
+    0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
+    0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
+    0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
+    0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
+    0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
+    0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
+    0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
+    0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
+    0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
+    0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
+    0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
+    0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
+    0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
+    0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
+    0x2d02ef8dUL
+#ifdef BYFOUR
+  },
+  {
+    0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL,
+    0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL,
+    0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL,
+    0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL,
+    0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL,
+    0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL,
+    0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL,
+    0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL,
+    0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL,
+    0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL,
+    0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL,
+    0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL,
+    0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL,
+    0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL,
+    0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL,
+    0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL,
+    0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL,
+    0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL,
+    0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL,
+    0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL,
+    0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL,
+    0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL,
+    0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL,
+    0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL,
+    0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL,
+    0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL,
+    0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL,
+    0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL,
+    0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL,
+    0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL,
+    0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL,
+    0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL,
+    0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL,
+    0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL,
+    0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL,
+    0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL,
+    0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL,
+    0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL,
+    0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL,
+    0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL,
+    0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL,
+    0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL,
+    0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL,
+    0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL,
+    0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL,
+    0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL,
+    0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL,
+    0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL,
+    0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL,
+    0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL,
+    0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL,
+    0x9324fd72UL
+  },
+  {
+    0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL,
+    0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL,
+    0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL,
+    0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL,
+    0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL,
+    0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL,
+    0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL,
+    0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL,
+    0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL,
+    0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL,
+    0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL,
+    0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL,
+    0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL,
+    0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL,
+    0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL,
+    0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL,
+    0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL,
+    0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL,
+    0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL,
+    0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL,
+    0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL,
+    0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL,
+    0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL,
+    0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL,
+    0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL,
+    0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL,
+    0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL,
+    0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL,
+    0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL,
+    0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL,
+    0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL,
+    0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL,
+    0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL,
+    0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL,
+    0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL,
+    0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL,
+    0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL,
+    0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL,
+    0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL,
+    0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL,
+    0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL,
+    0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL,
+    0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL,
+    0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL,
+    0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL,
+    0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL,
+    0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL,
+    0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL,
+    0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL,
+    0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL,
+    0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL,
+    0xbe9834edUL
+  },
+  {
+    0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL,
+    0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL,
+    0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL,
+    0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL,
+    0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL,
+    0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL,
+    0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL,
+    0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL,
+    0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL,
+    0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL,
+    0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL,
+    0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL,
+    0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL,
+    0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL,
+    0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL,
+    0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL,
+    0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL,
+    0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL,
+    0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL,
+    0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL,
+    0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL,
+    0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL,
+    0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL,
+    0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL,
+    0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL,
+    0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL,
+    0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL,
+    0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL,
+    0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL,
+    0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL,
+    0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL,
+    0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL,
+    0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL,
+    0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL,
+    0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL,
+    0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL,
+    0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL,
+    0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL,
+    0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL,
+    0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL,
+    0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL,
+    0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL,
+    0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL,
+    0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL,
+    0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL,
+    0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL,
+    0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL,
+    0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL,
+    0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL,
+    0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL,
+    0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL,
+    0xde0506f1UL
+  },
+  {
+    0x00000000UL, 0x96300777UL, 0x2c610eeeUL, 0xba510999UL, 0x19c46d07UL,
+    0x8ff46a70UL, 0x35a563e9UL, 0xa395649eUL, 0x3288db0eUL, 0xa4b8dc79UL,
+    0x1ee9d5e0UL, 0x88d9d297UL, 0x2b4cb609UL, 0xbd7cb17eUL, 0x072db8e7UL,
+    0x911dbf90UL, 0x6410b71dUL, 0xf220b06aUL, 0x4871b9f3UL, 0xde41be84UL,
+    0x7dd4da1aUL, 0xebe4dd6dUL, 0x51b5d4f4UL, 0xc785d383UL, 0x56986c13UL,
+    0xc0a86b64UL, 0x7af962fdUL, 0xecc9658aUL, 0x4f5c0114UL, 0xd96c0663UL,
+    0x633d0ffaUL, 0xf50d088dUL, 0xc8206e3bUL, 0x5e10694cUL, 0xe44160d5UL,
+    0x727167a2UL, 0xd1e4033cUL, 0x47d4044bUL, 0xfd850dd2UL, 0x6bb50aa5UL,
+    0xfaa8b535UL, 0x6c98b242UL, 0xd6c9bbdbUL, 0x40f9bcacUL, 0xe36cd832UL,
+    0x755cdf45UL, 0xcf0dd6dcUL, 0x593dd1abUL, 0xac30d926UL, 0x3a00de51UL,
+    0x8051d7c8UL, 0x1661d0bfUL, 0xb5f4b421UL, 0x23c4b356UL, 0x9995bacfUL,
+    0x0fa5bdb8UL, 0x9eb80228UL, 0x0888055fUL, 0xb2d90cc6UL, 0x24e90bb1UL,
+    0x877c6f2fUL, 0x114c6858UL, 0xab1d61c1UL, 0x3d2d66b6UL, 0x9041dc76UL,
+    0x0671db01UL, 0xbc20d298UL, 0x2a10d5efUL, 0x8985b171UL, 0x1fb5b606UL,
+    0xa5e4bf9fUL, 0x33d4b8e8UL, 0xa2c90778UL, 0x34f9000fUL, 0x8ea80996UL,
+    0x18980ee1UL, 0xbb0d6a7fUL, 0x2d3d6d08UL, 0x976c6491UL, 0x015c63e6UL,
+    0xf4516b6bUL, 0x62616c1cUL, 0xd8306585UL, 0x4e0062f2UL, 0xed95066cUL,
+    0x7ba5011bUL, 0xc1f40882UL, 0x57c40ff5UL, 0xc6d9b065UL, 0x50e9b712UL,
+    0xeab8be8bUL, 0x7c88b9fcUL, 0xdf1ddd62UL, 0x492dda15UL, 0xf37cd38cUL,
+    0x654cd4fbUL, 0x5861b24dUL, 0xce51b53aUL, 0x7400bca3UL, 0xe230bbd4UL,
+    0x41a5df4aUL, 0xd795d83dUL, 0x6dc4d1a4UL, 0xfbf4d6d3UL, 0x6ae96943UL,
+    0xfcd96e34UL, 0x468867adUL, 0xd0b860daUL, 0x732d0444UL, 0xe51d0333UL,
+    0x5f4c0aaaUL, 0xc97c0dddUL, 0x3c710550UL, 0xaa410227UL, 0x10100bbeUL,
+    0x86200cc9UL, 0x25b56857UL, 0xb3856f20UL, 0x09d466b9UL, 0x9fe461ceUL,
+    0x0ef9de5eUL, 0x98c9d929UL, 0x2298d0b0UL, 0xb4a8d7c7UL, 0x173db359UL,
+    0x810db42eUL, 0x3b5cbdb7UL, 0xad6cbac0UL, 0x2083b8edUL, 0xb6b3bf9aUL,
+    0x0ce2b603UL, 0x9ad2b174UL, 0x3947d5eaUL, 0xaf77d29dUL, 0x1526db04UL,
+    0x8316dc73UL, 0x120b63e3UL, 0x843b6494UL, 0x3e6a6d0dUL, 0xa85a6a7aUL,
+    0x0bcf0ee4UL, 0x9dff0993UL, 0x27ae000aUL, 0xb19e077dUL, 0x44930ff0UL,
+    0xd2a30887UL, 0x68f2011eUL, 0xfec20669UL, 0x5d5762f7UL, 0xcb676580UL,
+    0x71366c19UL, 0xe7066b6eUL, 0x761bd4feUL, 0xe02bd389UL, 0x5a7ada10UL,
+    0xcc4add67UL, 0x6fdfb9f9UL, 0xf9efbe8eUL, 0x43beb717UL, 0xd58eb060UL,
+    0xe8a3d6d6UL, 0x7e93d1a1UL, 0xc4c2d838UL, 0x52f2df4fUL, 0xf167bbd1UL,
+    0x6757bca6UL, 0xdd06b53fUL, 0x4b36b248UL, 0xda2b0dd8UL, 0x4c1b0aafUL,
+    0xf64a0336UL, 0x607a0441UL, 0xc3ef60dfUL, 0x55df67a8UL, 0xef8e6e31UL,
+    0x79be6946UL, 0x8cb361cbUL, 0x1a8366bcUL, 0xa0d26f25UL, 0x36e26852UL,
+    0x95770cccUL, 0x03470bbbUL, 0xb9160222UL, 0x2f260555UL, 0xbe3bbac5UL,
+    0x280bbdb2UL, 0x925ab42bUL, 0x046ab35cUL, 0xa7ffd7c2UL, 0x31cfd0b5UL,
+    0x8b9ed92cUL, 0x1daede5bUL, 0xb0c2649bUL, 0x26f263ecUL, 0x9ca36a75UL,
+    0x0a936d02UL, 0xa906099cUL, 0x3f360eebUL, 0x85670772UL, 0x13570005UL,
+    0x824abf95UL, 0x147ab8e2UL, 0xae2bb17bUL, 0x381bb60cUL, 0x9b8ed292UL,
+    0x0dbed5e5UL, 0xb7efdc7cUL, 0x21dfdb0bUL, 0xd4d2d386UL, 0x42e2d4f1UL,
+    0xf8b3dd68UL, 0x6e83da1fUL, 0xcd16be81UL, 0x5b26b9f6UL, 0xe177b06fUL,
+    0x7747b718UL, 0xe65a0888UL, 0x706a0fffUL, 0xca3b0666UL, 0x5c0b0111UL,
+    0xff9e658fUL, 0x69ae62f8UL, 0xd3ff6b61UL, 0x45cf6c16UL, 0x78e20aa0UL,
+    0xeed20dd7UL, 0x5483044eUL, 0xc2b30339UL, 0x612667a7UL, 0xf71660d0UL,
+    0x4d476949UL, 0xdb776e3eUL, 0x4a6ad1aeUL, 0xdc5ad6d9UL, 0x660bdf40UL,
+    0xf03bd837UL, 0x53aebca9UL, 0xc59ebbdeUL, 0x7fcfb247UL, 0xe9ffb530UL,
+    0x1cf2bdbdUL, 0x8ac2bacaUL, 0x3093b353UL, 0xa6a3b424UL, 0x0536d0baUL,
+    0x9306d7cdUL, 0x2957de54UL, 0xbf67d923UL, 0x2e7a66b3UL, 0xb84a61c4UL,
+    0x021b685dUL, 0x942b6f2aUL, 0x37be0bb4UL, 0xa18e0cc3UL, 0x1bdf055aUL,
+    0x8def022dUL
+  },
+  {
+    0x00000000UL, 0x41311b19UL, 0x82623632UL, 0xc3532d2bUL, 0x04c56c64UL,
+    0x45f4777dUL, 0x86a75a56UL, 0xc796414fUL, 0x088ad9c8UL, 0x49bbc2d1UL,
+    0x8ae8effaUL, 0xcbd9f4e3UL, 0x0c4fb5acUL, 0x4d7eaeb5UL, 0x8e2d839eUL,
+    0xcf1c9887UL, 0x5112c24aUL, 0x1023d953UL, 0xd370f478UL, 0x9241ef61UL,
+    0x55d7ae2eUL, 0x14e6b537UL, 0xd7b5981cUL, 0x96848305UL, 0x59981b82UL,
+    0x18a9009bUL, 0xdbfa2db0UL, 0x9acb36a9UL, 0x5d5d77e6UL, 0x1c6c6cffUL,
+    0xdf3f41d4UL, 0x9e0e5acdUL, 0xa2248495UL, 0xe3159f8cUL, 0x2046b2a7UL,
+    0x6177a9beUL, 0xa6e1e8f1UL, 0xe7d0f3e8UL, 0x2483dec3UL, 0x65b2c5daUL,
+    0xaaae5d5dUL, 0xeb9f4644UL, 0x28cc6b6fUL, 0x69fd7076UL, 0xae6b3139UL,
+    0xef5a2a20UL, 0x2c09070bUL, 0x6d381c12UL, 0xf33646dfUL, 0xb2075dc6UL,
+    0x715470edUL, 0x30656bf4UL, 0xf7f32abbUL, 0xb6c231a2UL, 0x75911c89UL,
+    0x34a00790UL, 0xfbbc9f17UL, 0xba8d840eUL, 0x79dea925UL, 0x38efb23cUL,
+    0xff79f373UL, 0xbe48e86aUL, 0x7d1bc541UL, 0x3c2ade58UL, 0x054f79f0UL,
+    0x447e62e9UL, 0x872d4fc2UL, 0xc61c54dbUL, 0x018a1594UL, 0x40bb0e8dUL,
+    0x83e823a6UL, 0xc2d938bfUL, 0x0dc5a038UL, 0x4cf4bb21UL, 0x8fa7960aUL,
+    0xce968d13UL, 0x0900cc5cUL, 0x4831d745UL, 0x8b62fa6eUL, 0xca53e177UL,
+    0x545dbbbaUL, 0x156ca0a3UL, 0xd63f8d88UL, 0x970e9691UL, 0x5098d7deUL,
+    0x11a9ccc7UL, 0xd2fae1ecUL, 0x93cbfaf5UL, 0x5cd76272UL, 0x1de6796bUL,
+    0xdeb55440UL, 0x9f844f59UL, 0x58120e16UL, 0x1923150fUL, 0xda703824UL,
+    0x9b41233dUL, 0xa76bfd65UL, 0xe65ae67cUL, 0x2509cb57UL, 0x6438d04eUL,
+    0xa3ae9101UL, 0xe29f8a18UL, 0x21cca733UL, 0x60fdbc2aUL, 0xafe124adUL,
+    0xeed03fb4UL, 0x2d83129fUL, 0x6cb20986UL, 0xab2448c9UL, 0xea1553d0UL,
+    0x29467efbUL, 0x687765e2UL, 0xf6793f2fUL, 0xb7482436UL, 0x741b091dUL,
+    0x352a1204UL, 0xf2bc534bUL, 0xb38d4852UL, 0x70de6579UL, 0x31ef7e60UL,
+    0xfef3e6e7UL, 0xbfc2fdfeUL, 0x7c91d0d5UL, 0x3da0cbccUL, 0xfa368a83UL,
+    0xbb07919aUL, 0x7854bcb1UL, 0x3965a7a8UL, 0x4b98833bUL, 0x0aa99822UL,
+    0xc9fab509UL, 0x88cbae10UL, 0x4f5def5fUL, 0x0e6cf446UL, 0xcd3fd96dUL,
+    0x8c0ec274UL, 0x43125af3UL, 0x022341eaUL, 0xc1706cc1UL, 0x804177d8UL,
+    0x47d73697UL, 0x06e62d8eUL, 0xc5b500a5UL, 0x84841bbcUL, 0x1a8a4171UL,
+    0x5bbb5a68UL, 0x98e87743UL, 0xd9d96c5aUL, 0x1e4f2d15UL, 0x5f7e360cUL,
+    0x9c2d1b27UL, 0xdd1c003eUL, 0x120098b9UL, 0x533183a0UL, 0x9062ae8bUL,
+    0xd153b592UL, 0x16c5f4ddUL, 0x57f4efc4UL, 0x94a7c2efUL, 0xd596d9f6UL,
+    0xe9bc07aeUL, 0xa88d1cb7UL, 0x6bde319cUL, 0x2aef2a85UL, 0xed796bcaUL,
+    0xac4870d3UL, 0x6f1b5df8UL, 0x2e2a46e1UL, 0xe136de66UL, 0xa007c57fUL,
+    0x6354e854UL, 0x2265f34dUL, 0xe5f3b202UL, 0xa4c2a91bUL, 0x67918430UL,
+    0x26a09f29UL, 0xb8aec5e4UL, 0xf99fdefdUL, 0x3accf3d6UL, 0x7bfde8cfUL,
+    0xbc6ba980UL, 0xfd5ab299UL, 0x3e099fb2UL, 0x7f3884abUL, 0xb0241c2cUL,
+    0xf1150735UL, 0x32462a1eUL, 0x73773107UL, 0xb4e17048UL, 0xf5d06b51UL,
+    0x3683467aUL, 0x77b25d63UL, 0x4ed7facbUL, 0x0fe6e1d2UL, 0xccb5ccf9UL,
+    0x8d84d7e0UL, 0x4a1296afUL, 0x0b238db6UL, 0xc870a09dUL, 0x8941bb84UL,
+    0x465d2303UL, 0x076c381aUL, 0xc43f1531UL, 0x850e0e28UL, 0x42984f67UL,
+    0x03a9547eUL, 0xc0fa7955UL, 0x81cb624cUL, 0x1fc53881UL, 0x5ef42398UL,
+    0x9da70eb3UL, 0xdc9615aaUL, 0x1b0054e5UL, 0x5a314ffcUL, 0x996262d7UL,
+    0xd85379ceUL, 0x174fe149UL, 0x567efa50UL, 0x952dd77bUL, 0xd41ccc62UL,
+    0x138a8d2dUL, 0x52bb9634UL, 0x91e8bb1fUL, 0xd0d9a006UL, 0xecf37e5eUL,
+    0xadc26547UL, 0x6e91486cUL, 0x2fa05375UL, 0xe836123aUL, 0xa9070923UL,
+    0x6a542408UL, 0x2b653f11UL, 0xe479a796UL, 0xa548bc8fUL, 0x661b91a4UL,
+    0x272a8abdUL, 0xe0bccbf2UL, 0xa18dd0ebUL, 0x62defdc0UL, 0x23efe6d9UL,
+    0xbde1bc14UL, 0xfcd0a70dUL, 0x3f838a26UL, 0x7eb2913fUL, 0xb924d070UL,
+    0xf815cb69UL, 0x3b46e642UL, 0x7a77fd5bUL, 0xb56b65dcUL, 0xf45a7ec5UL,
+    0x370953eeUL, 0x763848f7UL, 0xb1ae09b8UL, 0xf09f12a1UL, 0x33cc3f8aUL,
+    0x72fd2493UL
+  },
+  {
+    0x00000000UL, 0x376ac201UL, 0x6ed48403UL, 0x59be4602UL, 0xdca80907UL,
+    0xebc2cb06UL, 0xb27c8d04UL, 0x85164f05UL, 0xb851130eUL, 0x8f3bd10fUL,
+    0xd685970dUL, 0xe1ef550cUL, 0x64f91a09UL, 0x5393d808UL, 0x0a2d9e0aUL,
+    0x3d475c0bUL, 0x70a3261cUL, 0x47c9e41dUL, 0x1e77a21fUL, 0x291d601eUL,
+    0xac0b2f1bUL, 0x9b61ed1aUL, 0xc2dfab18UL, 0xf5b56919UL, 0xc8f23512UL,
+    0xff98f713UL, 0xa626b111UL, 0x914c7310UL, 0x145a3c15UL, 0x2330fe14UL,
+    0x7a8eb816UL, 0x4de47a17UL, 0xe0464d38UL, 0xd72c8f39UL, 0x8e92c93bUL,
+    0xb9f80b3aUL, 0x3cee443fUL, 0x0b84863eUL, 0x523ac03cUL, 0x6550023dUL,
+    0x58175e36UL, 0x6f7d9c37UL, 0x36c3da35UL, 0x01a91834UL, 0x84bf5731UL,
+    0xb3d59530UL, 0xea6bd332UL, 0xdd011133UL, 0x90e56b24UL, 0xa78fa925UL,
+    0xfe31ef27UL, 0xc95b2d26UL, 0x4c4d6223UL, 0x7b27a022UL, 0x2299e620UL,
+    0x15f32421UL, 0x28b4782aUL, 0x1fdeba2bUL, 0x4660fc29UL, 0x710a3e28UL,
+    0xf41c712dUL, 0xc376b32cUL, 0x9ac8f52eUL, 0xada2372fUL, 0xc08d9a70UL,
+    0xf7e75871UL, 0xae591e73UL, 0x9933dc72UL, 0x1c259377UL, 0x2b4f5176UL,
+    0x72f11774UL, 0x459bd575UL, 0x78dc897eUL, 0x4fb64b7fUL, 0x16080d7dUL,
+    0x2162cf7cUL, 0xa4748079UL, 0x931e4278UL, 0xcaa0047aUL, 0xfdcac67bUL,
+    0xb02ebc6cUL, 0x87447e6dUL, 0xdefa386fUL, 0xe990fa6eUL, 0x6c86b56bUL,
+    0x5bec776aUL, 0x02523168UL, 0x3538f369UL, 0x087faf62UL, 0x3f156d63UL,
+    0x66ab2b61UL, 0x51c1e960UL, 0xd4d7a665UL, 0xe3bd6464UL, 0xba032266UL,
+    0x8d69e067UL, 0x20cbd748UL, 0x17a11549UL, 0x4e1f534bUL, 0x7975914aUL,
+    0xfc63de4fUL, 0xcb091c4eUL, 0x92b75a4cUL, 0xa5dd984dUL, 0x989ac446UL,
+    0xaff00647UL, 0xf64e4045UL, 0xc1248244UL, 0x4432cd41UL, 0x73580f40UL,
+    0x2ae64942UL, 0x1d8c8b43UL, 0x5068f154UL, 0x67023355UL, 0x3ebc7557UL,
+    0x09d6b756UL, 0x8cc0f853UL, 0xbbaa3a52UL, 0xe2147c50UL, 0xd57ebe51UL,
+    0xe839e25aUL, 0xdf53205bUL, 0x86ed6659UL, 0xb187a458UL, 0x3491eb5dUL,
+    0x03fb295cUL, 0x5a456f5eUL, 0x6d2fad5fUL, 0x801b35e1UL, 0xb771f7e0UL,
+    0xeecfb1e2UL, 0xd9a573e3UL, 0x5cb33ce6UL, 0x6bd9fee7UL, 0x3267b8e5UL,
+    0x050d7ae4UL, 0x384a26efUL, 0x0f20e4eeUL, 0x569ea2ecUL, 0x61f460edUL,
+    0xe4e22fe8UL, 0xd388ede9UL, 0x8a36abebUL, 0xbd5c69eaUL, 0xf0b813fdUL,
+    0xc7d2d1fcUL, 0x9e6c97feUL, 0xa90655ffUL, 0x2c101afaUL, 0x1b7ad8fbUL,
+    0x42c49ef9UL, 0x75ae5cf8UL, 0x48e900f3UL, 0x7f83c2f2UL, 0x263d84f0UL,
+    0x115746f1UL, 0x944109f4UL, 0xa32bcbf5UL, 0xfa958df7UL, 0xcdff4ff6UL,
+    0x605d78d9UL, 0x5737bad8UL, 0x0e89fcdaUL, 0x39e33edbUL, 0xbcf571deUL,
+    0x8b9fb3dfUL, 0xd221f5ddUL, 0xe54b37dcUL, 0xd80c6bd7UL, 0xef66a9d6UL,
+    0xb6d8efd4UL, 0x81b22dd5UL, 0x04a462d0UL, 0x33cea0d1UL, 0x6a70e6d3UL,
+    0x5d1a24d2UL, 0x10fe5ec5UL, 0x27949cc4UL, 0x7e2adac6UL, 0x494018c7UL,
+    0xcc5657c2UL, 0xfb3c95c3UL, 0xa282d3c1UL, 0x95e811c0UL, 0xa8af4dcbUL,
+    0x9fc58fcaUL, 0xc67bc9c8UL, 0xf1110bc9UL, 0x740744ccUL, 0x436d86cdUL,
+    0x1ad3c0cfUL, 0x2db902ceUL, 0x4096af91UL, 0x77fc6d90UL, 0x2e422b92UL,
+    0x1928e993UL, 0x9c3ea696UL, 0xab546497UL, 0xf2ea2295UL, 0xc580e094UL,
+    0xf8c7bc9fUL, 0xcfad7e9eUL, 0x9613389cUL, 0xa179fa9dUL, 0x246fb598UL,
+    0x13057799UL, 0x4abb319bUL, 0x7dd1f39aUL, 0x3035898dUL, 0x075f4b8cUL,
+    0x5ee10d8eUL, 0x698bcf8fUL, 0xec9d808aUL, 0xdbf7428bUL, 0x82490489UL,
+    0xb523c688UL, 0x88649a83UL, 0xbf0e5882UL, 0xe6b01e80UL, 0xd1dadc81UL,
+    0x54cc9384UL, 0x63a65185UL, 0x3a181787UL, 0x0d72d586UL, 0xa0d0e2a9UL,
+    0x97ba20a8UL, 0xce0466aaUL, 0xf96ea4abUL, 0x7c78ebaeUL, 0x4b1229afUL,
+    0x12ac6fadUL, 0x25c6adacUL, 0x1881f1a7UL, 0x2feb33a6UL, 0x765575a4UL,
+    0x413fb7a5UL, 0xc429f8a0UL, 0xf3433aa1UL, 0xaafd7ca3UL, 0x9d97bea2UL,
+    0xd073c4b5UL, 0xe71906b4UL, 0xbea740b6UL, 0x89cd82b7UL, 0x0cdbcdb2UL,
+    0x3bb10fb3UL, 0x620f49b1UL, 0x55658bb0UL, 0x6822d7bbUL, 0x5f4815baUL,
+    0x06f653b8UL, 0x319c91b9UL, 0xb48adebcUL, 0x83e01cbdUL, 0xda5e5abfUL,
+    0xed3498beUL
+  },
+  {
+    0x00000000UL, 0x6567bcb8UL, 0x8bc809aaUL, 0xeeafb512UL, 0x5797628fUL,
+    0x32f0de37UL, 0xdc5f6b25UL, 0xb938d79dUL, 0xef28b4c5UL, 0x8a4f087dUL,
+    0x64e0bd6fUL, 0x018701d7UL, 0xb8bfd64aUL, 0xddd86af2UL, 0x3377dfe0UL,
+    0x56106358UL, 0x9f571950UL, 0xfa30a5e8UL, 0x149f10faUL, 0x71f8ac42UL,
+    0xc8c07bdfUL, 0xada7c767UL, 0x43087275UL, 0x266fcecdUL, 0x707fad95UL,
+    0x1518112dUL, 0xfbb7a43fUL, 0x9ed01887UL, 0x27e8cf1aUL, 0x428f73a2UL,
+    0xac20c6b0UL, 0xc9477a08UL, 0x3eaf32a0UL, 0x5bc88e18UL, 0xb5673b0aUL,
+    0xd00087b2UL, 0x6938502fUL, 0x0c5fec97UL, 0xe2f05985UL, 0x8797e53dUL,
+    0xd1878665UL, 0xb4e03addUL, 0x5a4f8fcfUL, 0x3f283377UL, 0x8610e4eaUL,
+    0xe3775852UL, 0x0dd8ed40UL, 0x68bf51f8UL, 0xa1f82bf0UL, 0xc49f9748UL,
+    0x2a30225aUL, 0x4f579ee2UL, 0xf66f497fUL, 0x9308f5c7UL, 0x7da740d5UL,
+    0x18c0fc6dUL, 0x4ed09f35UL, 0x2bb7238dUL, 0xc518969fUL, 0xa07f2a27UL,
+    0x1947fdbaUL, 0x7c204102UL, 0x928ff410UL, 0xf7e848a8UL, 0x3d58149bUL,
+    0x583fa823UL, 0xb6901d31UL, 0xd3f7a189UL, 0x6acf7614UL, 0x0fa8caacUL,
+    0xe1077fbeUL, 0x8460c306UL, 0xd270a05eUL, 0xb7171ce6UL, 0x59b8a9f4UL,
+    0x3cdf154cUL, 0x85e7c2d1UL, 0xe0807e69UL, 0x0e2fcb7bUL, 0x6b4877c3UL,
+    0xa20f0dcbUL, 0xc768b173UL, 0x29c70461UL, 0x4ca0b8d9UL, 0xf5986f44UL,
+    0x90ffd3fcUL, 0x7e5066eeUL, 0x1b37da56UL, 0x4d27b90eUL, 0x284005b6UL,
+    0xc6efb0a4UL, 0xa3880c1cUL, 0x1ab0db81UL, 0x7fd76739UL, 0x9178d22bUL,
+    0xf41f6e93UL, 0x03f7263bUL, 0x66909a83UL, 0x883f2f91UL, 0xed589329UL,
+    0x546044b4UL, 0x3107f80cUL, 0xdfa84d1eUL, 0xbacff1a6UL, 0xecdf92feUL,
+    0x89b82e46UL, 0x67179b54UL, 0x027027ecUL, 0xbb48f071UL, 0xde2f4cc9UL,
+    0x3080f9dbUL, 0x55e74563UL, 0x9ca03f6bUL, 0xf9c783d3UL, 0x176836c1UL,
+    0x720f8a79UL, 0xcb375de4UL, 0xae50e15cUL, 0x40ff544eUL, 0x2598e8f6UL,
+    0x73888baeUL, 0x16ef3716UL, 0xf8408204UL, 0x9d273ebcUL, 0x241fe921UL,
+    0x41785599UL, 0xafd7e08bUL, 0xcab05c33UL, 0x3bb659edUL, 0x5ed1e555UL,
+    0xb07e5047UL, 0xd519ecffUL, 0x6c213b62UL, 0x094687daUL, 0xe7e932c8UL,
+    0x828e8e70UL, 0xd49eed28UL, 0xb1f95190UL, 0x5f56e482UL, 0x3a31583aUL,
+    0x83098fa7UL, 0xe66e331fUL, 0x08c1860dUL, 0x6da63ab5UL, 0xa4e140bdUL,
+    0xc186fc05UL, 0x2f294917UL, 0x4a4ef5afUL, 0xf3762232UL, 0x96119e8aUL,
+    0x78be2b98UL, 0x1dd99720UL, 0x4bc9f478UL, 0x2eae48c0UL, 0xc001fdd2UL,
+    0xa566416aUL, 0x1c5e96f7UL, 0x79392a4fUL, 0x97969f5dUL, 0xf2f123e5UL,
+    0x05196b4dUL, 0x607ed7f5UL, 0x8ed162e7UL, 0xebb6de5fUL, 0x528e09c2UL,
+    0x37e9b57aUL, 0xd9460068UL, 0xbc21bcd0UL, 0xea31df88UL, 0x8f566330UL,
+    0x61f9d622UL, 0x049e6a9aUL, 0xbda6bd07UL, 0xd8c101bfUL, 0x366eb4adUL,
+    0x53090815UL, 0x9a4e721dUL, 0xff29cea5UL, 0x11867bb7UL, 0x74e1c70fUL,
+    0xcdd91092UL, 0xa8beac2aUL, 0x46111938UL, 0x2376a580UL, 0x7566c6d8UL,
+    0x10017a60UL, 0xfeaecf72UL, 0x9bc973caUL, 0x22f1a457UL, 0x479618efUL,
+    0xa939adfdUL, 0xcc5e1145UL, 0x06ee4d76UL, 0x6389f1ceUL, 0x8d2644dcUL,
+    0xe841f864UL, 0x51792ff9UL, 0x341e9341UL, 0xdab12653UL, 0xbfd69aebUL,
+    0xe9c6f9b3UL, 0x8ca1450bUL, 0x620ef019UL, 0x07694ca1UL, 0xbe519b3cUL,
+    0xdb362784UL, 0x35999296UL, 0x50fe2e2eUL, 0x99b95426UL, 0xfcdee89eUL,
+    0x12715d8cUL, 0x7716e134UL, 0xce2e36a9UL, 0xab498a11UL, 0x45e63f03UL,
+    0x208183bbUL, 0x7691e0e3UL, 0x13f65c5bUL, 0xfd59e949UL, 0x983e55f1UL,
+    0x2106826cUL, 0x44613ed4UL, 0xaace8bc6UL, 0xcfa9377eUL, 0x38417fd6UL,
+    0x5d26c36eUL, 0xb389767cUL, 0xd6eecac4UL, 0x6fd61d59UL, 0x0ab1a1e1UL,
+    0xe41e14f3UL, 0x8179a84bUL, 0xd769cb13UL, 0xb20e77abUL, 0x5ca1c2b9UL,
+    0x39c67e01UL, 0x80fea99cUL, 0xe5991524UL, 0x0b36a036UL, 0x6e511c8eUL,
+    0xa7166686UL, 0xc271da3eUL, 0x2cde6f2cUL, 0x49b9d394UL, 0xf0810409UL,
+    0x95e6b8b1UL, 0x7b490da3UL, 0x1e2eb11bUL, 0x483ed243UL, 0x2d596efbUL,
+    0xc3f6dbe9UL, 0xa6916751UL, 0x1fa9b0ccUL, 0x7ace0c74UL, 0x9461b966UL,
+    0xf10605deUL
+#endif
+  }
+};
diff --git a/deps/libchdr/deps/zlib-1.2.11/deflate.c b/deps/libchdr/deps/zlib-1.2.11/deflate.c
new file mode 100644
index 00000000..1ec76144
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/deflate.c
@@ -0,0 +1,2163 @@
+/* deflate.c -- compress data using the deflation algorithm
+ * Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/*
+ *  ALGORITHM
+ *
+ *      The "deflation" process depends on being able to identify portions
+ *      of the input text which are identical to earlier input (within a
+ *      sliding window trailing behind the input currently being processed).
+ *
+ *      The most straightforward technique turns out to be the fastest for
+ *      most input files: try all possible matches and select the longest.
+ *      The key feature of this algorithm is that insertions into the string
+ *      dictionary are very simple and thus fast, and deletions are avoided
+ *      completely. Insertions are performed at each input character, whereas
+ *      string matches are performed only when the previous match ends. So it
+ *      is preferable to spend more time in matches to allow very fast string
+ *      insertions and avoid deletions. The matching algorithm for small
+ *      strings is inspired from that of Rabin & Karp. A brute force approach
+ *      is used to find longer strings when a small match has been found.
+ *      A similar algorithm is used in comic (by Jan-Mark Wams) and freeze
+ *      (by Leonid Broukhis).
+ *         A previous version of this file used a more sophisticated algorithm
+ *      (by Fiala and Greene) which is guaranteed to run in linear amortized
+ *      time, but has a larger average cost, uses more memory and is patented.
+ *      However the F&G algorithm may be faster for some highly redundant
+ *      files if the parameter max_chain_length (described below) is too large.
+ *
+ *  ACKNOWLEDGEMENTS
+ *
+ *      The idea of lazy evaluation of matches is due to Jan-Mark Wams, and
+ *      I found it in 'freeze' written by Leonid Broukhis.
+ *      Thanks to many people for bug reports and testing.
+ *
+ *  REFERENCES
+ *
+ *      Deutsch, L.P.,"DEFLATE Compressed Data Format Specification".
+ *      Available in http://tools.ietf.org/html/rfc1951
+ *
+ *      A description of the Rabin and Karp algorithm is given in the book
+ *         "Algorithms" by R. Sedgewick, Addison-Wesley, p252.
+ *
+ *      Fiala,E.R., and Greene,D.H.
+ *         Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595
+ *
+ */
+
+/* @(#) $Id$ */
+
+#include "deflate.h"
+
+const char deflate_copyright[] =
+   " deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler ";
+/*
+  If you use the zlib library in a product, an acknowledgment is welcome
+  in the documentation of your product. If for some reason you cannot
+  include such an acknowledgment, I would appreciate that you keep this
+  copyright string in the executable of your product.
+ */
+
+/* ===========================================================================
+ *  Function prototypes.
+ */
+typedef enum {
+    need_more,      /* block not completed, need more input or more output */
+    block_done,     /* block flush performed */
+    finish_started, /* finish started, need only more output at next deflate */
+    finish_done     /* finish done, accept no more input or output */
+} block_state;
+
+typedef block_state (*compress_func) OF((deflate_state *s, int flush));
+/* Compression function. Returns the block state after the call. */
+
+local int deflateStateCheck      OF((z_streamp strm));
+local void slide_hash     OF((deflate_state *s));
+local void fill_window    OF((deflate_state *s));
+local block_state deflate_stored OF((deflate_state *s, int flush));
+local block_state deflate_fast   OF((deflate_state *s, int flush));
+#ifndef FASTEST
+local block_state deflate_slow   OF((deflate_state *s, int flush));
+#endif
+local block_state deflate_rle    OF((deflate_state *s, int flush));
+local block_state deflate_huff   OF((deflate_state *s, int flush));
+local void lm_init        OF((deflate_state *s));
+local void putShortMSB    OF((deflate_state *s, uInt b));
+local void flush_pending  OF((z_streamp strm));
+local unsigned read_buf   OF((z_streamp strm, Bytef *buf, unsigned size));
+#ifdef ASMV
+#  pragma message("Assembler code may have bugs -- use at your own risk")
+      void match_init OF((void)); /* asm code initialization */
+      uInt longest_match  OF((deflate_state *s, IPos cur_match));
+#else
+local uInt longest_match  OF((deflate_state *s, IPos cur_match));
+#endif
+
+#ifdef ZLIB_DEBUG
+local  void check_match OF((deflate_state *s, IPos start, IPos match,
+                            int length));
+#endif
+
+/* ===========================================================================
+ * Local data
+ */
+
+#define NIL 0
+/* Tail of hash chains */
+
+#ifndef TOO_FAR
+#  define TOO_FAR 4096
+#endif
+/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
+
+/* Values for max_lazy_match, good_match and max_chain_length, depending on
+ * the desired pack level (0..9). The values given below have been tuned to
+ * exclude worst case performance for pathological files. Better values may be
+ * found for specific files.
+ */
+typedef struct config_s {
+   ush good_length; /* reduce lazy search above this match length */
+   ush max_lazy;    /* do not perform lazy search above this match length */
+   ush nice_length; /* quit search above this match length */
+   ush max_chain;
+   compress_func func;
+} config;
+
+#ifdef FASTEST
+local const config configuration_table[2] = {
+/*      good lazy nice chain */
+/* 0 */ {0,    0,  0,    0, deflate_stored},  /* store only */
+/* 1 */ {4,    4,  8,    4, deflate_fast}}; /* max speed, no lazy matches */
+#else
+local const config configuration_table[10] = {
+/*      good lazy nice chain */
+/* 0 */ {0,    0,  0,    0, deflate_stored},  /* store only */
+/* 1 */ {4,    4,  8,    4, deflate_fast}, /* max speed, no lazy matches */
+/* 2 */ {4,    5, 16,    8, deflate_fast},
+/* 3 */ {4,    6, 32,   32, deflate_fast},
+
+/* 4 */ {4,    4, 16,   16, deflate_slow},  /* lazy matches */
+/* 5 */ {8,   16, 32,   32, deflate_slow},
+/* 6 */ {8,   16, 128, 128, deflate_slow},
+/* 7 */ {8,   32, 128, 256, deflate_slow},
+/* 8 */ {32, 128, 258, 1024, deflate_slow},
+/* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* max compression */
+#endif
+
+/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4
+ * For deflate_fast() (levels <= 3) good is ignored and lazy has a different
+ * meaning.
+ */
+
+/* rank Z_BLOCK between Z_NO_FLUSH and Z_PARTIAL_FLUSH */
+#define RANK(f) (((f) * 2) - ((f) > 4 ? 9 : 0))
+
+/* ===========================================================================
+ * Update a hash value with the given input byte
+ * IN  assertion: all calls to UPDATE_HASH are made with consecutive input
+ *    characters, so that a running hash key can be computed from the previous
+ *    key instead of complete recalculation each time.
+ */
+#define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask)
+
+
+/* ===========================================================================
+ * Insert string str in the dictionary and set match_head to the previous head
+ * of the hash chain (the most recent string with same hash key). Return
+ * the previous length of the hash chain.
+ * If this file is compiled with -DFASTEST, the compression level is forced
+ * to 1, and no hash chains are maintained.
+ * IN  assertion: all calls to INSERT_STRING are made with consecutive input
+ *    characters and the first MIN_MATCH bytes of str are valid (except for
+ *    the last MIN_MATCH-1 bytes of the input file).
+ */
+#ifdef FASTEST
+#define INSERT_STRING(s, str, match_head) \
+   (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \
+    match_head = s->head[s->ins_h], \
+    s->head[s->ins_h] = (Pos)(str))
+#else
+#define INSERT_STRING(s, str, match_head) \
+   (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \
+    match_head = s->prev[(str) & s->w_mask] = s->head[s->ins_h], \
+    s->head[s->ins_h] = (Pos)(str))
+#endif
+
+/* ===========================================================================
+ * Initialize the hash table (avoiding 64K overflow for 16 bit systems).
+ * prev[] will be initialized on the fly.
+ */
+#define CLEAR_HASH(s) \
+    s->head[s->hash_size-1] = NIL; \
+    zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head));
+
+/* ===========================================================================
+ * Slide the hash table when sliding the window down (could be avoided with 32
+ * bit values at the expense of memory usage). We slide even when level == 0 to
+ * keep the hash table consistent if we switch back to level > 0 later.
+ */
+local void slide_hash(s)
+    deflate_state *s;
+{
+    unsigned n, m;
+    Posf *p;
+    uInt wsize = s->w_size;
+
+    n = s->hash_size;
+    p = &s->head[n];
+    do {
+        m = *--p;
+        *p = (Pos)(m >= wsize ? m - wsize : NIL);
+    } while (--n);
+    n = wsize;
+#ifndef FASTEST
+    p = &s->prev[n];
+    do {
+        m = *--p;
+        *p = (Pos)(m >= wsize ? m - wsize : NIL);
+        /* If n is not on any hash chain, prev[n] is garbage but
+         * its value will never be used.
+         */
+    } while (--n);
+#endif
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateInit_(strm, level, version, stream_size)
+    z_streamp strm;
+    int level;
+    const char *version;
+    int stream_size;
+{
+    return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL,
+                         Z_DEFAULT_STRATEGY, version, stream_size);
+    /* To do: ignore strm->next_in if we use it as window */
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
+                  version, stream_size)
+    z_streamp strm;
+    int  level;
+    int  method;
+    int  windowBits;
+    int  memLevel;
+    int  strategy;
+    const char *version;
+    int stream_size;
+{
+    deflate_state *s;
+    int wrap = 1;
+    static const char my_version[] = ZLIB_VERSION;
+
+    ushf *overlay;
+    /* We overlay pending_buf and d_buf+l_buf. This works since the average
+     * output size for (length,distance) codes is <= 24 bits.
+     */
+
+    if (version == Z_NULL || version[0] != my_version[0] ||
+        stream_size != sizeof(z_stream)) {
+        return Z_VERSION_ERROR;
+    }
+    if (strm == Z_NULL) return Z_STREAM_ERROR;
+
+    strm->msg = Z_NULL;
+    if (strm->zalloc == (alloc_func)0) {
+#ifdef Z_SOLO
+        return Z_STREAM_ERROR;
+#else
+        strm->zalloc = zcalloc;
+        strm->opaque = (voidpf)0;
+#endif
+    }
+    if (strm->zfree == (free_func)0)
+#ifdef Z_SOLO
+        return Z_STREAM_ERROR;
+#else
+        strm->zfree = zcfree;
+#endif
+
+#ifdef FASTEST
+    if (level != 0) level = 1;
+#else
+    if (level == Z_DEFAULT_COMPRESSION) level = 6;
+#endif
+
+    if (windowBits < 0) { /* suppress zlib wrapper */
+        wrap = 0;
+        windowBits = -windowBits;
+    }
+#ifdef GZIP
+    else if (windowBits > 15) {
+        wrap = 2;       /* write gzip wrapper instead */
+        windowBits -= 16;
+    }
+#endif
+    if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED ||
+        windowBits < 8 || windowBits > 15 || level < 0 || level > 9 ||
+        strategy < 0 || strategy > Z_FIXED || (windowBits == 8 && wrap != 1)) {
+        return Z_STREAM_ERROR;
+    }
+    if (windowBits == 8) windowBits = 9;  /* until 256-byte window bug fixed */
+    s = (deflate_state *) ZALLOC(strm, 1, sizeof(deflate_state));
+    if (s == Z_NULL) return Z_MEM_ERROR;
+    strm->state = (struct internal_state FAR *)s;
+    s->strm = strm;
+    s->status = INIT_STATE;     /* to pass state test in deflateReset() */
+
+    s->wrap = wrap;
+    s->gzhead = Z_NULL;
+    s->w_bits = (uInt)windowBits;
+    s->w_size = 1 << s->w_bits;
+    s->w_mask = s->w_size - 1;
+
+    s->hash_bits = (uInt)memLevel + 7;
+    s->hash_size = 1 << s->hash_bits;
+    s->hash_mask = s->hash_size - 1;
+    s->hash_shift =  ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH);
+
+    s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte));
+    s->prev   = (Posf *)  ZALLOC(strm, s->w_size, sizeof(Pos));
+    s->head   = (Posf *)  ZALLOC(strm, s->hash_size, sizeof(Pos));
+
+    s->high_water = 0;      /* nothing written to s->window yet */
+
+    s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */
+
+    overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2);
+    s->pending_buf = (uchf *) overlay;
+    s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L);
+
+    if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL ||
+        s->pending_buf == Z_NULL) {
+        s->status = FINISH_STATE;
+        strm->msg = ERR_MSG(Z_MEM_ERROR);
+        deflateEnd (strm);
+        return Z_MEM_ERROR;
+    }
+    s->d_buf = overlay + s->lit_bufsize/sizeof(ush);
+    s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize;
+
+    s->level = level;
+    s->strategy = strategy;
+    s->method = (Byte)method;
+
+    return deflateReset(strm);
+}
+
+/* =========================================================================
+ * Check for a valid deflate stream state. Return 0 if ok, 1 if not.
+ */
+local int deflateStateCheck (strm)
+    z_streamp strm;
+{
+    deflate_state *s;
+    if (strm == Z_NULL ||
+        strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0)
+        return 1;
+    s = strm->state;
+    if (s == Z_NULL || s->strm != strm || (s->status != INIT_STATE &&
+#ifdef GZIP
+                                           s->status != GZIP_STATE &&
+#endif
+                                           s->status != EXTRA_STATE &&
+                                           s->status != NAME_STATE &&
+                                           s->status != COMMENT_STATE &&
+                                           s->status != HCRC_STATE &&
+                                           s->status != BUSY_STATE &&
+                                           s->status != FINISH_STATE))
+        return 1;
+    return 0;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength)
+    z_streamp strm;
+    const Bytef *dictionary;
+    uInt  dictLength;
+{
+    deflate_state *s;
+    uInt str, n;
+    int wrap;
+    unsigned avail;
+    z_const unsigned char *next;
+
+    if (deflateStateCheck(strm) || dictionary == Z_NULL)
+        return Z_STREAM_ERROR;
+    s = strm->state;
+    wrap = s->wrap;
+    if (wrap == 2 || (wrap == 1 && s->status != INIT_STATE) || s->lookahead)
+        return Z_STREAM_ERROR;
+
+    /* when using zlib wrappers, compute Adler-32 for provided dictionary */
+    if (wrap == 1)
+        strm->adler = adler32(strm->adler, dictionary, dictLength);
+    s->wrap = 0;                    /* avoid computing Adler-32 in read_buf */
+
+    /* if dictionary would fill window, just replace the history */
+    if (dictLength >= s->w_size) {
+        if (wrap == 0) {            /* already empty otherwise */
+            CLEAR_HASH(s);
+            s->strstart = 0;
+            s->block_start = 0L;
+            s->insert = 0;
+        }
+        dictionary += dictLength - s->w_size;  /* use the tail */
+        dictLength = s->w_size;
+    }
+
+    /* insert dictionary into window and hash */
+    avail = strm->avail_in;
+    next = strm->next_in;
+    strm->avail_in = dictLength;
+    strm->next_in = (z_const Bytef *)dictionary;
+    fill_window(s);
+    while (s->lookahead >= MIN_MATCH) {
+        str = s->strstart;
+        n = s->lookahead - (MIN_MATCH-1);
+        do {
+            UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]);
+#ifndef FASTEST
+            s->prev[str & s->w_mask] = s->head[s->ins_h];
+#endif
+            s->head[s->ins_h] = (Pos)str;
+            str++;
+        } while (--n);
+        s->strstart = str;
+        s->lookahead = MIN_MATCH-1;
+        fill_window(s);
+    }
+    s->strstart += s->lookahead;
+    s->block_start = (long)s->strstart;
+    s->insert = s->lookahead;
+    s->lookahead = 0;
+    s->match_length = s->prev_length = MIN_MATCH-1;
+    s->match_available = 0;
+    strm->next_in = next;
+    strm->avail_in = avail;
+    s->wrap = wrap;
+    return Z_OK;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateGetDictionary (strm, dictionary, dictLength)
+    z_streamp strm;
+    Bytef *dictionary;
+    uInt  *dictLength;
+{
+    deflate_state *s;
+    uInt len;
+
+    if (deflateStateCheck(strm))
+        return Z_STREAM_ERROR;
+    s = strm->state;
+    len = s->strstart + s->lookahead;
+    if (len > s->w_size)
+        len = s->w_size;
+    if (dictionary != Z_NULL && len)
+        zmemcpy(dictionary, s->window + s->strstart + s->lookahead - len, len);
+    if (dictLength != Z_NULL)
+        *dictLength = len;
+    return Z_OK;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateResetKeep (strm)
+    z_streamp strm;
+{
+    deflate_state *s;
+
+    if (deflateStateCheck(strm)) {
+        return Z_STREAM_ERROR;
+    }
+
+    strm->total_in = strm->total_out = 0;
+    strm->msg = Z_NULL; /* use zfree if we ever allocate msg dynamically */
+    strm->data_type = Z_UNKNOWN;
+
+    s = (deflate_state *)strm->state;
+    s->pending = 0;
+    s->pending_out = s->pending_buf;
+
+    if (s->wrap < 0) {
+        s->wrap = -s->wrap; /* was made negative by deflate(..., Z_FINISH); */
+    }
+    s->status =
+#ifdef GZIP
+        s->wrap == 2 ? GZIP_STATE :
+#endif
+        s->wrap ? INIT_STATE : BUSY_STATE;
+    strm->adler =
+#ifdef GZIP
+        s->wrap == 2 ? crc32(0L, Z_NULL, 0) :
+#endif
+        adler32(0L, Z_NULL, 0);
+    s->last_flush = Z_NO_FLUSH;
+
+    _tr_init(s);
+
+    return Z_OK;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateReset (strm)
+    z_streamp strm;
+{
+    int ret;
+
+    ret = deflateResetKeep(strm);
+    if (ret == Z_OK)
+        lm_init(strm->state);
+    return ret;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateSetHeader (strm, head)
+    z_streamp strm;
+    gz_headerp head;
+{
+    if (deflateStateCheck(strm) || strm->state->wrap != 2)
+        return Z_STREAM_ERROR;
+    strm->state->gzhead = head;
+    return Z_OK;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflatePending (strm, pending, bits)
+    unsigned *pending;
+    int *bits;
+    z_streamp strm;
+{
+    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
+    if (pending != Z_NULL)
+        *pending = strm->state->pending;
+    if (bits != Z_NULL)
+        *bits = strm->state->bi_valid;
+    return Z_OK;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflatePrime (strm, bits, value)
+    z_streamp strm;
+    int bits;
+    int value;
+{
+    deflate_state *s;
+    int put;
+
+    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
+    s = strm->state;
+    if ((Bytef *)(s->d_buf) < s->pending_out + ((Buf_size + 7) >> 3))
+        return Z_BUF_ERROR;
+    do {
+        put = Buf_size - s->bi_valid;
+        if (put > bits)
+            put = bits;
+        s->bi_buf |= (ush)((value & ((1 << put) - 1)) << s->bi_valid);
+        s->bi_valid += put;
+        _tr_flush_bits(s);
+        value >>= put;
+        bits -= put;
+    } while (bits);
+    return Z_OK;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateParams(strm, level, strategy)
+    z_streamp strm;
+    int level;
+    int strategy;
+{
+    deflate_state *s;
+    compress_func func;
+
+    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
+    s = strm->state;
+
+#ifdef FASTEST
+    if (level != 0) level = 1;
+#else
+    if (level == Z_DEFAULT_COMPRESSION) level = 6;
+#endif
+    if (level < 0 || level > 9 || strategy < 0 || strategy > Z_FIXED) {
+        return Z_STREAM_ERROR;
+    }
+    func = configuration_table[s->level].func;
+
+    if ((strategy != s->strategy || func != configuration_table[level].func) &&
+        s->high_water) {
+        /* Flush the last buffer: */
+        int err = deflate(strm, Z_BLOCK);
+        if (err == Z_STREAM_ERROR)
+            return err;
+        if (strm->avail_out == 0)
+            return Z_BUF_ERROR;
+    }
+    if (s->level != level) {
+        if (s->level == 0 && s->matches != 0) {
+            if (s->matches == 1)
+                slide_hash(s);
+            else
+                CLEAR_HASH(s);
+            s->matches = 0;
+        }
+        s->level = level;
+        s->max_lazy_match   = configuration_table[level].max_lazy;
+        s->good_match       = configuration_table[level].good_length;
+        s->nice_match       = configuration_table[level].nice_length;
+        s->max_chain_length = configuration_table[level].max_chain;
+    }
+    s->strategy = strategy;
+    return Z_OK;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateTune(strm, good_length, max_lazy, nice_length, max_chain)
+    z_streamp strm;
+    int good_length;
+    int max_lazy;
+    int nice_length;
+    int max_chain;
+{
+    deflate_state *s;
+
+    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
+    s = strm->state;
+    s->good_match = (uInt)good_length;
+    s->max_lazy_match = (uInt)max_lazy;
+    s->nice_match = nice_length;
+    s->max_chain_length = (uInt)max_chain;
+    return Z_OK;
+}
+
+/* =========================================================================
+ * For the default windowBits of 15 and memLevel of 8, this function returns
+ * a close to exact, as well as small, upper bound on the compressed size.
+ * They are coded as constants here for a reason--if the #define's are
+ * changed, then this function needs to be changed as well.  The return
+ * value for 15 and 8 only works for those exact settings.
+ *
+ * For any setting other than those defaults for windowBits and memLevel,
+ * the value returned is a conservative worst case for the maximum expansion
+ * resulting from using fixed blocks instead of stored blocks, which deflate
+ * can emit on compressed data for some combinations of the parameters.
+ *
+ * This function could be more sophisticated to provide closer upper bounds for
+ * every combination of windowBits and memLevel.  But even the conservative
+ * upper bound of about 14% expansion does not seem onerous for output buffer
+ * allocation.
+ */
+uLong ZEXPORT deflateBound(strm, sourceLen)
+    z_streamp strm;
+    uLong sourceLen;
+{
+    deflate_state *s;
+    uLong complen, wraplen;
+
+    /* conservative upper bound for compressed data */
+    complen = sourceLen +
+              ((sourceLen + 7) >> 3) + ((sourceLen + 63) >> 6) + 5;
+
+    /* if can't get parameters, return conservative bound plus zlib wrapper */
+    if (deflateStateCheck(strm))
+        return complen + 6;
+
+    /* compute wrapper length */
+    s = strm->state;
+    switch (s->wrap) {
+    case 0:                                 /* raw deflate */
+        wraplen = 0;
+        break;
+    case 1:                                 /* zlib wrapper */
+        wraplen = 6 + (s->strstart ? 4 : 0);
+        break;
+#ifdef GZIP
+    case 2:                                 /* gzip wrapper */
+        wraplen = 18;
+        if (s->gzhead != Z_NULL) {          /* user-supplied gzip header */
+            Bytef *str;
+            if (s->gzhead->extra != Z_NULL)
+                wraplen += 2 + s->gzhead->extra_len;
+            str = s->gzhead->name;
+            if (str != Z_NULL)
+                do {
+                    wraplen++;
+                } while (*str++);
+            str = s->gzhead->comment;
+            if (str != Z_NULL)
+                do {
+                    wraplen++;
+                } while (*str++);
+            if (s->gzhead->hcrc)
+                wraplen += 2;
+        }
+        break;
+#endif
+    default:                                /* for compiler happiness */
+        wraplen = 6;
+    }
+
+    /* if not default parameters, return conservative bound */
+    if (s->w_bits != 15 || s->hash_bits != 8 + 7)
+        return complen + wraplen;
+
+    /* default settings: return tight bound for that case */
+    return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) +
+           (sourceLen >> 25) + 13 - 6 + wraplen;
+}
+
+/* =========================================================================
+ * Put a short in the pending buffer. The 16-bit value is put in MSB order.
+ * IN assertion: the stream state is correct and there is enough room in
+ * pending_buf.
+ */
+local void putShortMSB (s, b)
+    deflate_state *s;
+    uInt b;
+{
+    put_byte(s, (Byte)(b >> 8));
+    put_byte(s, (Byte)(b & 0xff));
+}
+
+/* =========================================================================
+ * Flush as much pending output as possible. All deflate() output, except for
+ * some deflate_stored() output, goes through this function so some
+ * applications may wish to modify it to avoid allocating a large
+ * strm->next_out buffer and copying into it. (See also read_buf()).
+ */
+local void flush_pending(strm)
+    z_streamp strm;
+{
+    unsigned len;
+    deflate_state *s = strm->state;
+
+    _tr_flush_bits(s);
+    len = s->pending;
+    if (len > strm->avail_out) len = strm->avail_out;
+    if (len == 0) return;
+
+    zmemcpy(strm->next_out, s->pending_out, len);
+    strm->next_out  += len;
+    s->pending_out  += len;
+    strm->total_out += len;
+    strm->avail_out -= len;
+    s->pending      -= len;
+    if (s->pending == 0) {
+        s->pending_out = s->pending_buf;
+    }
+}
+
+/* ===========================================================================
+ * Update the header CRC with the bytes s->pending_buf[beg..s->pending - 1].
+ */
+#define HCRC_UPDATE(beg) \
+    do { \
+        if (s->gzhead->hcrc && s->pending > (beg)) \
+            strm->adler = crc32(strm->adler, s->pending_buf + (beg), \
+                                s->pending - (beg)); \
+    } while (0)
+
+/* ========================================================================= */
+int ZEXPORT deflate (strm, flush)
+    z_streamp strm;
+    int flush;
+{
+    int old_flush; /* value of flush param for previous deflate call */
+    deflate_state *s;
+
+    if (deflateStateCheck(strm) || flush > Z_BLOCK || flush < 0) {
+        return Z_STREAM_ERROR;
+    }
+    s = strm->state;
+
+    if (strm->next_out == Z_NULL ||
+        (strm->avail_in != 0 && strm->next_in == Z_NULL) ||
+        (s->status == FINISH_STATE && flush != Z_FINISH)) {
+        ERR_RETURN(strm, Z_STREAM_ERROR);
+    }
+    if (strm->avail_out == 0) ERR_RETURN(strm, Z_BUF_ERROR);
+
+    old_flush = s->last_flush;
+    s->last_flush = flush;
+
+    /* Flush as much pending output as possible */
+    if (s->pending != 0) {
+        flush_pending(strm);
+        if (strm->avail_out == 0) {
+            /* Since avail_out is 0, deflate will be called again with
+             * more output space, but possibly with both pending and
+             * avail_in equal to zero. There won't be anything to do,
+             * but this is not an error situation so make sure we
+             * return OK instead of BUF_ERROR at next call of deflate:
+             */
+            s->last_flush = -1;
+            return Z_OK;
+        }
+
+    /* Make sure there is something to do and avoid duplicate consecutive
+     * flushes. For repeated and useless calls with Z_FINISH, we keep
+     * returning Z_STREAM_END instead of Z_BUF_ERROR.
+     */
+    } else if (strm->avail_in == 0 && RANK(flush) <= RANK(old_flush) &&
+               flush != Z_FINISH) {
+        ERR_RETURN(strm, Z_BUF_ERROR);
+    }
+
+    /* User must not provide more input after the first FINISH: */
+    if (s->status == FINISH_STATE && strm->avail_in != 0) {
+        ERR_RETURN(strm, Z_BUF_ERROR);
+    }
+
+    /* Write the header */
+    if (s->status == INIT_STATE) {
+        /* zlib header */
+        uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8;
+        uInt level_flags;
+
+        if (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2)
+            level_flags = 0;
+        else if (s->level < 6)
+            level_flags = 1;
+        else if (s->level == 6)
+            level_flags = 2;
+        else
+            level_flags = 3;
+        header |= (level_flags << 6);
+        if (s->strstart != 0) header |= PRESET_DICT;
+        header += 31 - (header % 31);
+
+        putShortMSB(s, header);
+
+        /* Save the adler32 of the preset dictionary: */
+        if (s->strstart != 0) {
+            putShortMSB(s, (uInt)(strm->adler >> 16));
+            putShortMSB(s, (uInt)(strm->adler & 0xffff));
+        }
+        strm->adler = adler32(0L, Z_NULL, 0);
+        s->status = BUSY_STATE;
+
+        /* Compression must start with an empty pending buffer */
+        flush_pending(strm);
+        if (s->pending != 0) {
+            s->last_flush = -1;
+            return Z_OK;
+        }
+    }
+#ifdef GZIP
+    if (s->status == GZIP_STATE) {
+        /* gzip header */
+        strm->adler = crc32(0L, Z_NULL, 0);
+        put_byte(s, 31);
+        put_byte(s, 139);
+        put_byte(s, 8);
+        if (s->gzhead == Z_NULL) {
+            put_byte(s, 0);
+            put_byte(s, 0);
+            put_byte(s, 0);
+            put_byte(s, 0);
+            put_byte(s, 0);
+            put_byte(s, s->level == 9 ? 2 :
+                     (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ?
+                      4 : 0));
+            put_byte(s, OS_CODE);
+            s->status = BUSY_STATE;
+
+            /* Compression must start with an empty pending buffer */
+            flush_pending(strm);
+            if (s->pending != 0) {
+                s->last_flush = -1;
+                return Z_OK;
+            }
+        }
+        else {
+            put_byte(s, (s->gzhead->text ? 1 : 0) +
+                     (s->gzhead->hcrc ? 2 : 0) +
+                     (s->gzhead->extra == Z_NULL ? 0 : 4) +
+                     (s->gzhead->name == Z_NULL ? 0 : 8) +
+                     (s->gzhead->comment == Z_NULL ? 0 : 16)
+                     );
+            put_byte(s, (Byte)(s->gzhead->time & 0xff));
+            put_byte(s, (Byte)((s->gzhead->time >> 8) & 0xff));
+            put_byte(s, (Byte)((s->gzhead->time >> 16) & 0xff));
+            put_byte(s, (Byte)((s->gzhead->time >> 24) & 0xff));
+            put_byte(s, s->level == 9 ? 2 :
+                     (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ?
+                      4 : 0));
+            put_byte(s, s->gzhead->os & 0xff);
+            if (s->gzhead->extra != Z_NULL) {
+                put_byte(s, s->gzhead->extra_len & 0xff);
+                put_byte(s, (s->gzhead->extra_len >> 8) & 0xff);
+            }
+            if (s->gzhead->hcrc)
+                strm->adler = crc32(strm->adler, s->pending_buf,
+                                    s->pending);
+            s->gzindex = 0;
+            s->status = EXTRA_STATE;
+        }
+    }
+    if (s->status == EXTRA_STATE) {
+        if (s->gzhead->extra != Z_NULL) {
+            ulg beg = s->pending;   /* start of bytes to update crc */
+            uInt left = (s->gzhead->extra_len & 0xffff) - s->gzindex;
+            while (s->pending + left > s->pending_buf_size) {
+                uInt copy = s->pending_buf_size - s->pending;
+                zmemcpy(s->pending_buf + s->pending,
+                        s->gzhead->extra + s->gzindex, copy);
+                s->pending = s->pending_buf_size;
+                HCRC_UPDATE(beg);
+                s->gzindex += copy;
+                flush_pending(strm);
+                if (s->pending != 0) {
+                    s->last_flush = -1;
+                    return Z_OK;
+                }
+                beg = 0;
+                left -= copy;
+            }
+            zmemcpy(s->pending_buf + s->pending,
+                    s->gzhead->extra + s->gzindex, left);
+            s->pending += left;
+            HCRC_UPDATE(beg);
+            s->gzindex = 0;
+        }
+        s->status = NAME_STATE;
+    }
+    if (s->status == NAME_STATE) {
+        if (s->gzhead->name != Z_NULL) {
+            ulg beg = s->pending;   /* start of bytes to update crc */
+            int val;
+            do {
+                if (s->pending == s->pending_buf_size) {
+                    HCRC_UPDATE(beg);
+                    flush_pending(strm);
+                    if (s->pending != 0) {
+                        s->last_flush = -1;
+                        return Z_OK;
+                    }
+                    beg = 0;
+                }
+                val = s->gzhead->name[s->gzindex++];
+                put_byte(s, val);
+            } while (val != 0);
+            HCRC_UPDATE(beg);
+            s->gzindex = 0;
+        }
+        s->status = COMMENT_STATE;
+    }
+    if (s->status == COMMENT_STATE) {
+        if (s->gzhead->comment != Z_NULL) {
+            ulg beg = s->pending;   /* start of bytes to update crc */
+            int val;
+            do {
+                if (s->pending == s->pending_buf_size) {
+                    HCRC_UPDATE(beg);
+                    flush_pending(strm);
+                    if (s->pending != 0) {
+                        s->last_flush = -1;
+                        return Z_OK;
+                    }
+                    beg = 0;
+                }
+                val = s->gzhead->comment[s->gzindex++];
+                put_byte(s, val);
+            } while (val != 0);
+            HCRC_UPDATE(beg);
+        }
+        s->status = HCRC_STATE;
+    }
+    if (s->status == HCRC_STATE) {
+        if (s->gzhead->hcrc) {
+            if (s->pending + 2 > s->pending_buf_size) {
+                flush_pending(strm);
+                if (s->pending != 0) {
+                    s->last_flush = -1;
+                    return Z_OK;
+                }
+            }
+            put_byte(s, (Byte)(strm->adler & 0xff));
+            put_byte(s, (Byte)((strm->adler >> 8) & 0xff));
+            strm->adler = crc32(0L, Z_NULL, 0);
+        }
+        s->status = BUSY_STATE;
+
+        /* Compression must start with an empty pending buffer */
+        flush_pending(strm);
+        if (s->pending != 0) {
+            s->last_flush = -1;
+            return Z_OK;
+        }
+    }
+#endif
+
+    /* Start a new block or continue the current one.
+     */
+    if (strm->avail_in != 0 || s->lookahead != 0 ||
+        (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) {
+        block_state bstate;
+
+        bstate = s->level == 0 ? deflate_stored(s, flush) :
+                 s->strategy == Z_HUFFMAN_ONLY ? deflate_huff(s, flush) :
+                 s->strategy == Z_RLE ? deflate_rle(s, flush) :
+                 (*(configuration_table[s->level].func))(s, flush);
+
+        if (bstate == finish_started || bstate == finish_done) {
+            s->status = FINISH_STATE;
+        }
+        if (bstate == need_more || bstate == finish_started) {
+            if (strm->avail_out == 0) {
+                s->last_flush = -1; /* avoid BUF_ERROR next call, see above */
+            }
+            return Z_OK;
+            /* If flush != Z_NO_FLUSH && avail_out == 0, the next call
+             * of deflate should use the same flush parameter to make sure
+             * that the flush is complete. So we don't have to output an
+             * empty block here, this will be done at next call. This also
+             * ensures that for a very small output buffer, we emit at most
+             * one empty block.
+             */
+        }
+        if (bstate == block_done) {
+            if (flush == Z_PARTIAL_FLUSH) {
+                _tr_align(s);
+            } else if (flush != Z_BLOCK) { /* FULL_FLUSH or SYNC_FLUSH */
+                _tr_stored_block(s, (char*)0, 0L, 0);
+                /* For a full flush, this empty block will be recognized
+                 * as a special marker by inflate_sync().
+                 */
+                if (flush == Z_FULL_FLUSH) {
+                    CLEAR_HASH(s);             /* forget history */
+                    if (s->lookahead == 0) {
+                        s->strstart = 0;
+                        s->block_start = 0L;
+                        s->insert = 0;
+                    }
+                }
+            }
+            flush_pending(strm);
+            if (strm->avail_out == 0) {
+              s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */
+              return Z_OK;
+            }
+        }
+    }
+
+    if (flush != Z_FINISH) return Z_OK;
+    if (s->wrap <= 0) return Z_STREAM_END;
+
+    /* Write the trailer */
+#ifdef GZIP
+    if (s->wrap == 2) {
+        put_byte(s, (Byte)(strm->adler & 0xff));
+        put_byte(s, (Byte)((strm->adler >> 8) & 0xff));
+        put_byte(s, (Byte)((strm->adler >> 16) & 0xff));
+        put_byte(s, (Byte)((strm->adler >> 24) & 0xff));
+        put_byte(s, (Byte)(strm->total_in & 0xff));
+        put_byte(s, (Byte)((strm->total_in >> 8) & 0xff));
+        put_byte(s, (Byte)((strm->total_in >> 16) & 0xff));
+        put_byte(s, (Byte)((strm->total_in >> 24) & 0xff));
+    }
+    else
+#endif
+    {
+        putShortMSB(s, (uInt)(strm->adler >> 16));
+        putShortMSB(s, (uInt)(strm->adler & 0xffff));
+    }
+    flush_pending(strm);
+    /* If avail_out is zero, the application will call deflate again
+     * to flush the rest.
+     */
+    if (s->wrap > 0) s->wrap = -s->wrap; /* write the trailer only once! */
+    return s->pending != 0 ? Z_OK : Z_STREAM_END;
+}
+
+/* ========================================================================= */
+int ZEXPORT deflateEnd (strm)
+    z_streamp strm;
+{
+    int status;
+
+    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
+
+    status = strm->state->status;
+
+    /* Deallocate in reverse order of allocations: */
+    TRY_FREE(strm, strm->state->pending_buf);
+    TRY_FREE(strm, strm->state->head);
+    TRY_FREE(strm, strm->state->prev);
+    TRY_FREE(strm, strm->state->window);
+
+    ZFREE(strm, strm->state);
+    strm->state = Z_NULL;
+
+    return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK;
+}
+
+/* =========================================================================
+ * Copy the source state to the destination state.
+ * To simplify the source, this is not supported for 16-bit MSDOS (which
+ * doesn't have enough memory anyway to duplicate compression states).
+ */
+int ZEXPORT deflateCopy (dest, source)
+    z_streamp dest;
+    z_streamp source;
+{
+#ifdef MAXSEG_64K
+    return Z_STREAM_ERROR;
+#else
+    deflate_state *ds;
+    deflate_state *ss;
+    ushf *overlay;
+
+
+    if (deflateStateCheck(source) || dest == Z_NULL) {
+        return Z_STREAM_ERROR;
+    }
+
+    ss = source->state;
+
+    zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream));
+
+    ds = (deflate_state *) ZALLOC(dest, 1, sizeof(deflate_state));
+    if (ds == Z_NULL) return Z_MEM_ERROR;
+    dest->state = (struct internal_state FAR *) ds;
+    zmemcpy((voidpf)ds, (voidpf)ss, sizeof(deflate_state));
+    ds->strm = dest;
+
+    ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte));
+    ds->prev   = (Posf *)  ZALLOC(dest, ds->w_size, sizeof(Pos));
+    ds->head   = (Posf *)  ZALLOC(dest, ds->hash_size, sizeof(Pos));
+    overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2);
+    ds->pending_buf = (uchf *) overlay;
+
+    if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL ||
+        ds->pending_buf == Z_NULL) {
+        deflateEnd (dest);
+        return Z_MEM_ERROR;
+    }
+    /* following zmemcpy do not work for 16-bit MSDOS */
+    zmemcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte));
+    zmemcpy((voidpf)ds->prev, (voidpf)ss->prev, ds->w_size * sizeof(Pos));
+    zmemcpy((voidpf)ds->head, (voidpf)ss->head, ds->hash_size * sizeof(Pos));
+    zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size);
+
+    ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf);
+    ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush);
+    ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize;
+
+    ds->l_desc.dyn_tree = ds->dyn_ltree;
+    ds->d_desc.dyn_tree = ds->dyn_dtree;
+    ds->bl_desc.dyn_tree = ds->bl_tree;
+
+    return Z_OK;
+#endif /* MAXSEG_64K */
+}
+
+/* ===========================================================================
+ * Read a new buffer from the current input stream, update the adler32
+ * and total number of bytes read.  All deflate() input goes through
+ * this function so some applications may wish to modify it to avoid
+ * allocating a large strm->next_in buffer and copying from it.
+ * (See also flush_pending()).
+ */
+local unsigned read_buf(strm, buf, size)
+    z_streamp strm;
+    Bytef *buf;
+    unsigned size;
+{
+    unsigned len = strm->avail_in;
+
+    if (len > size) len = size;
+    if (len == 0) return 0;
+
+    strm->avail_in  -= len;
+
+    zmemcpy(buf, strm->next_in, len);
+    if (strm->state->wrap == 1) {
+        strm->adler = adler32(strm->adler, buf, len);
+    }
+#ifdef GZIP
+    else if (strm->state->wrap == 2) {
+        strm->adler = crc32(strm->adler, buf, len);
+    }
+#endif
+    strm->next_in  += len;
+    strm->total_in += len;
+
+    return len;
+}
+
+/* ===========================================================================
+ * Initialize the "longest match" routines for a new zlib stream
+ */
+local void lm_init (s)
+    deflate_state *s;
+{
+    s->window_size = (ulg)2L*s->w_size;
+
+    CLEAR_HASH(s);
+
+    /* Set the default configuration parameters:
+     */
+    s->max_lazy_match   = configuration_table[s->level].max_lazy;
+    s->good_match       = configuration_table[s->level].good_length;
+    s->nice_match       = configuration_table[s->level].nice_length;
+    s->max_chain_length = configuration_table[s->level].max_chain;
+
+    s->strstart = 0;
+    s->block_start = 0L;
+    s->lookahead = 0;
+    s->insert = 0;
+    s->match_length = s->prev_length = MIN_MATCH-1;
+    s->match_available = 0;
+    s->ins_h = 0;
+#ifndef FASTEST
+#ifdef ASMV
+    match_init(); /* initialize the asm code */
+#endif
+#endif
+}
+
+#ifndef FASTEST
+/* ===========================================================================
+ * Set match_start to the longest match starting at the given string and
+ * return its length. Matches shorter or equal to prev_length are discarded,
+ * in which case the result is equal to prev_length and match_start is
+ * garbage.
+ * IN assertions: cur_match is the head of the hash chain for the current
+ *   string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1
+ * OUT assertion: the match length is not greater than s->lookahead.
+ */
+#ifndef ASMV
+/* For 80x86 and 680x0, an optimized version will be provided in match.asm or
+ * match.S. The code will be functionally equivalent.
+ */
+local uInt longest_match(s, cur_match)
+    deflate_state *s;
+    IPos cur_match;                             /* current match */
+{
+    unsigned chain_length = s->max_chain_length;/* max hash chain length */
+    register Bytef *scan = s->window + s->strstart; /* current string */
+    register Bytef *match;                      /* matched string */
+    register int len;                           /* length of current match */
+    int best_len = (int)s->prev_length;         /* best match length so far */
+    int nice_match = s->nice_match;             /* stop if match long enough */
+    IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
+        s->strstart - (IPos)MAX_DIST(s) : NIL;
+    /* Stop when cur_match becomes <= limit. To simplify the code,
+     * we prevent matches with the string of window index 0.
+     */
+    Posf *prev = s->prev;
+    uInt wmask = s->w_mask;
+
+#ifdef UNALIGNED_OK
+    /* Compare two bytes at a time. Note: this is not always beneficial.
+     * Try with and without -DUNALIGNED_OK to check.
+     */
+    register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1;
+    register ush scan_start = *(ushf*)scan;
+    register ush scan_end   = *(ushf*)(scan+best_len-1);
+#else
+    register Bytef *strend = s->window + s->strstart + MAX_MATCH;
+    register Byte scan_end1  = scan[best_len-1];
+    register Byte scan_end   = scan[best_len];
+#endif
+
+    /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
+     * It is easy to get rid of this optimization if necessary.
+     */
+    Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");
+
+    /* Do not waste too much time if we already have a good match: */
+    if (s->prev_length >= s->good_match) {
+        chain_length >>= 2;
+    }
+    /* Do not look for matches beyond the end of the input. This is necessary
+     * to make deflate deterministic.
+     */
+    if ((uInt)nice_match > s->lookahead) nice_match = (int)s->lookahead;
+
+    Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead");
+
+    do {
+        Assert(cur_match < s->strstart, "no future");
+        match = s->window + cur_match;
+
+        /* Skip to next match if the match length cannot increase
+         * or if the match length is less than 2.  Note that the checks below
+         * for insufficient lookahead only occur occasionally for performance
+         * reasons.  Therefore uninitialized memory will be accessed, and
+         * conditional jumps will be made that depend on those values.
+         * However the length of the match is limited to the lookahead, so
+         * the output of deflate is not affected by the uninitialized values.
+         */
+#if (defined(UNALIGNED_OK) && MAX_MATCH == 258)
+        /* This code assumes sizeof(unsigned short) == 2. Do not use
+         * UNALIGNED_OK if your compiler uses a different size.
+         */
+        if (*(ushf*)(match+best_len-1) != scan_end ||
+            *(ushf*)match != scan_start) continue;
+
+        /* It is not necessary to compare scan[2] and match[2] since they are
+         * always equal when the other bytes match, given that the hash keys
+         * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at
+         * strstart+3, +5, ... up to strstart+257. We check for insufficient
+         * lookahead only every 4th comparison; the 128th check will be made
+         * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is
+         * necessary to put more guard bytes at the end of the window, or
+         * to check more often for insufficient lookahead.
+         */
+        Assert(scan[2] == match[2], "scan[2]?");
+        scan++, match++;
+        do {
+        } while (*(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
+                 *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
+                 *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
+                 *(ushf*)(scan+=2) == *(ushf*)(match+=2) &&
+                 scan < strend);
+        /* The funny "do {}" generates better code on most compilers */
+
+        /* Here, scan <= window+strstart+257 */
+        Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
+        if (*scan == *match) scan++;
+
+        len = (MAX_MATCH - 1) - (int)(strend-scan);
+        scan = strend - (MAX_MATCH-1);
+
+#else /* UNALIGNED_OK */
+
+        if (match[best_len]   != scan_end  ||
+            match[best_len-1] != scan_end1 ||
+            *match            != *scan     ||
+            *++match          != scan[1])      continue;
+
+        /* The check at best_len-1 can be removed because it will be made
+         * again later. (This heuristic is not always a win.)
+         * It is not necessary to compare scan[2] and match[2] since they
+         * are always equal when the other bytes match, given that
+         * the hash keys are equal and that HASH_BITS >= 8.
+         */
+        scan += 2, match++;
+        Assert(*scan == *match, "match[2]?");
+
+        /* We check for insufficient lookahead only every 8th comparison;
+         * the 256th check will be made at strstart+258.
+         */
+        do {
+        } while (*++scan == *++match && *++scan == *++match &&
+                 *++scan == *++match && *++scan == *++match &&
+                 *++scan == *++match && *++scan == *++match &&
+                 *++scan == *++match && *++scan == *++match &&
+                 scan < strend);
+
+        Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
+
+        len = MAX_MATCH - (int)(strend - scan);
+        scan = strend - MAX_MATCH;
+
+#endif /* UNALIGNED_OK */
+
+        if (len > best_len) {
+            s->match_start = cur_match;
+            best_len = len;
+            if (len >= nice_match) break;
+#ifdef UNALIGNED_OK
+            scan_end = *(ushf*)(scan+best_len-1);
+#else
+            scan_end1  = scan[best_len-1];
+            scan_end   = scan[best_len];
+#endif
+        }
+    } while ((cur_match = prev[cur_match & wmask]) > limit
+             && --chain_length != 0);
+
+    if ((uInt)best_len <= s->lookahead) return (uInt)best_len;
+    return s->lookahead;
+}
+#endif /* ASMV */
+
+#else /* FASTEST */
+
+/* ---------------------------------------------------------------------------
+ * Optimized version for FASTEST only
+ */
+local uInt longest_match(s, cur_match)
+    deflate_state *s;
+    IPos cur_match;                             /* current match */
+{
+    register Bytef *scan = s->window + s->strstart; /* current string */
+    register Bytef *match;                       /* matched string */
+    register int len;                           /* length of current match */
+    register Bytef *strend = s->window + s->strstart + MAX_MATCH;
+
+    /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16.
+     * It is easy to get rid of this optimization if necessary.
+     */
+    Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever");
+
+    Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead");
+
+    Assert(cur_match < s->strstart, "no future");
+
+    match = s->window + cur_match;
+
+    /* Return failure if the match length is less than 2:
+     */
+    if (match[0] != scan[0] || match[1] != scan[1]) return MIN_MATCH-1;
+
+    /* The check at best_len-1 can be removed because it will be made
+     * again later. (This heuristic is not always a win.)
+     * It is not necessary to compare scan[2] and match[2] since they
+     * are always equal when the other bytes match, given that
+     * the hash keys are equal and that HASH_BITS >= 8.
+     */
+    scan += 2, match += 2;
+    Assert(*scan == *match, "match[2]?");
+
+    /* We check for insufficient lookahead only every 8th comparison;
+     * the 256th check will be made at strstart+258.
+     */
+    do {
+    } while (*++scan == *++match && *++scan == *++match &&
+             *++scan == *++match && *++scan == *++match &&
+             *++scan == *++match && *++scan == *++match &&
+             *++scan == *++match && *++scan == *++match &&
+             scan < strend);
+
+    Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");
+
+    len = MAX_MATCH - (int)(strend - scan);
+
+    if (len < MIN_MATCH) return MIN_MATCH - 1;
+
+    s->match_start = cur_match;
+    return (uInt)len <= s->lookahead ? (uInt)len : s->lookahead;
+}
+
+#endif /* FASTEST */
+
+#ifdef ZLIB_DEBUG
+
+#define EQUAL 0
+/* result of memcmp for equal strings */
+
+/* ===========================================================================
+ * Check that the match at match_start is indeed a match.
+ */
+local void check_match(s, start, match, length)
+    deflate_state *s;
+    IPos start, match;
+    int length;
+{
+    /* check that the match is indeed a match */
+    if (zmemcmp(s->window + match,
+                s->window + start, length) != EQUAL) {
+        fprintf(stderr, " start %u, match %u, length %d\n",
+                start, match, length);
+        do {
+            fprintf(stderr, "%c%c", s->window[match++], s->window[start++]);
+        } while (--length != 0);
+        z_error("invalid match");
+    }
+    if (z_verbose > 1) {
+        fprintf(stderr,"\\[%d,%d]", start-match, length);
+        do { putc(s->window[start++], stderr); } while (--length != 0);
+    }
+}
+#else
+#  define check_match(s, start, match, length)
+#endif /* ZLIB_DEBUG */
+
+/* ===========================================================================
+ * Fill the window when the lookahead becomes insufficient.
+ * Updates strstart and lookahead.
+ *
+ * IN assertion: lookahead < MIN_LOOKAHEAD
+ * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD
+ *    At least one byte has been read, or avail_in == 0; reads are
+ *    performed for at least two bytes (required for the zip translate_eol
+ *    option -- not supported here).
+ */
+local void fill_window(s)
+    deflate_state *s;
+{
+    unsigned n;
+    unsigned more;    /* Amount of free space at the end of the window. */
+    uInt wsize = s->w_size;
+
+    Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead");
+
+    do {
+        more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart);
+
+        /* Deal with !@#$% 64K limit: */
+        if (sizeof(int) <= 2) {
+            if (more == 0 && s->strstart == 0 && s->lookahead == 0) {
+                more = wsize;
+
+            } else if (more == (unsigned)(-1)) {
+                /* Very unlikely, but possible on 16 bit machine if
+                 * strstart == 0 && lookahead == 1 (input done a byte at time)
+                 */
+                more--;
+            }
+        }
+
+        /* If the window is almost full and there is insufficient lookahead,
+         * move the upper half to the lower one to make room in the upper half.
+         */
+        if (s->strstart >= wsize+MAX_DIST(s)) {
+
+            zmemcpy(s->window, s->window+wsize, (unsigned)wsize - more);
+            s->match_start -= wsize;
+            s->strstart    -= wsize; /* we now have strstart >= MAX_DIST */
+            s->block_start -= (long) wsize;
+            slide_hash(s);
+            more += wsize;
+        }
+        if (s->strm->avail_in == 0) break;
+
+        /* If there was no sliding:
+         *    strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
+         *    more == window_size - lookahead - strstart
+         * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
+         * => more >= window_size - 2*WSIZE + 2
+         * In the BIG_MEM or MMAP case (not yet supported),
+         *   window_size == input_size + MIN_LOOKAHEAD  &&
+         *   strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD.
+         * Otherwise, window_size == 2*WSIZE so more >= 2.
+         * If there was sliding, more >= WSIZE. So in all cases, more >= 2.
+         */
+        Assert(more >= 2, "more < 2");
+
+        n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more);
+        s->lookahead += n;
+
+        /* Initialize the hash value now that we have some input: */
+        if (s->lookahead + s->insert >= MIN_MATCH) {
+            uInt str = s->strstart - s->insert;
+            s->ins_h = s->window[str];
+            UPDATE_HASH(s, s->ins_h, s->window[str + 1]);
+#if MIN_MATCH != 3
+            Call UPDATE_HASH() MIN_MATCH-3 more times
+#endif
+            while (s->insert) {
+                UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]);
+#ifndef FASTEST
+                s->prev[str & s->w_mask] = s->head[s->ins_h];
+#endif
+                s->head[s->ins_h] = (Pos)str;
+                str++;
+                s->insert--;
+                if (s->lookahead + s->insert < MIN_MATCH)
+                    break;
+            }
+        }
+        /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage,
+         * but this is not important since only literal bytes will be emitted.
+         */
+
+    } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0);
+
+    /* If the WIN_INIT bytes after the end of the current data have never been
+     * written, then zero those bytes in order to avoid memory check reports of
+     * the use of uninitialized (or uninitialised as Julian writes) bytes by
+     * the longest match routines.  Update the high water mark for the next
+     * time through here.  WIN_INIT is set to MAX_MATCH since the longest match
+     * routines allow scanning to strstart + MAX_MATCH, ignoring lookahead.
+     */
+    if (s->high_water < s->window_size) {
+        ulg curr = s->strstart + (ulg)(s->lookahead);
+        ulg init;
+
+        if (s->high_water < curr) {
+            /* Previous high water mark below current data -- zero WIN_INIT
+             * bytes or up to end of window, whichever is less.
+             */
+            init = s->window_size - curr;
+            if (init > WIN_INIT)
+                init = WIN_INIT;
+            zmemzero(s->window + curr, (unsigned)init);
+            s->high_water = curr + init;
+        }
+        else if (s->high_water < (ulg)curr + WIN_INIT) {
+            /* High water mark at or above current data, but below current data
+             * plus WIN_INIT -- zero out to current data plus WIN_INIT, or up
+             * to end of window, whichever is less.
+             */
+            init = (ulg)curr + WIN_INIT - s->high_water;
+            if (init > s->window_size - s->high_water)
+                init = s->window_size - s->high_water;
+            zmemzero(s->window + s->high_water, (unsigned)init);
+            s->high_water += init;
+        }
+    }
+
+    Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD,
+           "not enough room for search");
+}
+
+/* ===========================================================================
+ * Flush the current block, with given end-of-file flag.
+ * IN assertion: strstart is set to the end of the current match.
+ */
+#define FLUSH_BLOCK_ONLY(s, last) { \
+   _tr_flush_block(s, (s->block_start >= 0L ? \
+                   (charf *)&s->window[(unsigned)s->block_start] : \
+                   (charf *)Z_NULL), \
+                (ulg)((long)s->strstart - s->block_start), \
+                (last)); \
+   s->block_start = s->strstart; \
+   flush_pending(s->strm); \
+   Tracev((stderr,"[FLUSH]")); \
+}
+
+/* Same but force premature exit if necessary. */
+#define FLUSH_BLOCK(s, last) { \
+   FLUSH_BLOCK_ONLY(s, last); \
+   if (s->strm->avail_out == 0) return (last) ? finish_started : need_more; \
+}
+
+/* Maximum stored block length in deflate format (not including header). */
+#define MAX_STORED 65535
+
+/* Minimum of a and b. */
+#define MIN(a, b) ((a) > (b) ? (b) : (a))
+
+/* ===========================================================================
+ * Copy without compression as much as possible from the input stream, return
+ * the current block state.
+ *
+ * In case deflateParams() is used to later switch to a non-zero compression
+ * level, s->matches (otherwise unused when storing) keeps track of the number
+ * of hash table slides to perform. If s->matches is 1, then one hash table
+ * slide will be done when switching. If s->matches is 2, the maximum value
+ * allowed here, then the hash table will be cleared, since two or more slides
+ * is the same as a clear.
+ *
+ * deflate_stored() is written to minimize the number of times an input byte is
+ * copied. It is most efficient with large input and output buffers, which
+ * maximizes the opportunites to have a single copy from next_in to next_out.
+ */
+local block_state deflate_stored(s, flush)
+    deflate_state *s;
+    int flush;
+{
+    /* Smallest worthy block size when not flushing or finishing. By default
+     * this is 32K. This can be as small as 507 bytes for memLevel == 1. For
+     * large input and output buffers, the stored block size will be larger.
+     */
+    unsigned min_block = MIN(s->pending_buf_size - 5, s->w_size);
+
+    /* Copy as many min_block or larger stored blocks directly to next_out as
+     * possible. If flushing, copy the remaining available input to next_out as
+     * stored blocks, if there is enough space.
+     */
+    unsigned len, left, have, last = 0;
+    unsigned used = s->strm->avail_in;
+    do {
+        /* Set len to the maximum size block that we can copy directly with the
+         * available input data and output space. Set left to how much of that
+         * would be copied from what's left in the window.
+         */
+        len = MAX_STORED;       /* maximum deflate stored block length */
+        have = (s->bi_valid + 42) >> 3;         /* number of header bytes */
+        if (s->strm->avail_out < have)          /* need room for header */
+            break;
+            /* maximum stored block length that will fit in avail_out: */
+        have = s->strm->avail_out - have;
+        left = s->strstart - s->block_start;    /* bytes left in window */
+        if (len > (ulg)left + s->strm->avail_in)
+            len = left + s->strm->avail_in;     /* limit len to the input */
+        if (len > have)
+            len = have;                         /* limit len to the output */
+
+        /* If the stored block would be less than min_block in length, or if
+         * unable to copy all of the available input when flushing, then try
+         * copying to the window and the pending buffer instead. Also don't
+         * write an empty block when flushing -- deflate() does that.
+         */
+        if (len < min_block && ((len == 0 && flush != Z_FINISH) ||
+                                flush == Z_NO_FLUSH ||
+                                len != left + s->strm->avail_in))
+            break;
+
+        /* Make a dummy stored block in pending to get the header bytes,
+         * including any pending bits. This also updates the debugging counts.
+         */
+        last = flush == Z_FINISH && len == left + s->strm->avail_in ? 1 : 0;
+        _tr_stored_block(s, (char *)0, 0L, last);
+
+        /* Replace the lengths in the dummy stored block with len. */
+        s->pending_buf[s->pending - 4] = len;
+        s->pending_buf[s->pending - 3] = len >> 8;
+        s->pending_buf[s->pending - 2] = ~len;
+        s->pending_buf[s->pending - 1] = ~len >> 8;
+
+        /* Write the stored block header bytes. */
+        flush_pending(s->strm);
+
+#ifdef ZLIB_DEBUG
+        /* Update debugging counts for the data about to be copied. */
+        s->compressed_len += len << 3;
+        s->bits_sent += len << 3;
+#endif
+
+        /* Copy uncompressed bytes from the window to next_out. */
+        if (left) {
+            if (left > len)
+                left = len;
+            zmemcpy(s->strm->next_out, s->window + s->block_start, left);
+            s->strm->next_out += left;
+            s->strm->avail_out -= left;
+            s->strm->total_out += left;
+            s->block_start += left;
+            len -= left;
+        }
+
+        /* Copy uncompressed bytes directly from next_in to next_out, updating
+         * the check value.
+         */
+        if (len) {
+            read_buf(s->strm, s->strm->next_out, len);
+            s->strm->next_out += len;
+            s->strm->avail_out -= len;
+            s->strm->total_out += len;
+        }
+    } while (last == 0);
+
+    /* Update the sliding window with the last s->w_size bytes of the copied
+     * data, or append all of the copied data to the existing window if less
+     * than s->w_size bytes were copied. Also update the number of bytes to
+     * insert in the hash tables, in the event that deflateParams() switches to
+     * a non-zero compression level.
+     */
+    used -= s->strm->avail_in;      /* number of input bytes directly copied */
+    if (used) {
+        /* If any input was used, then no unused input remains in the window,
+         * therefore s->block_start == s->strstart.
+         */
+        if (used >= s->w_size) {    /* supplant the previous history */
+            s->matches = 2;         /* clear hash */
+            zmemcpy(s->window, s->strm->next_in - s->w_size, s->w_size);
+            s->strstart = s->w_size;
+        }
+        else {
+            if (s->window_size - s->strstart <= used) {
+                /* Slide the window down. */
+                s->strstart -= s->w_size;
+                zmemcpy(s->window, s->window + s->w_size, s->strstart);
+                if (s->matches < 2)
+                    s->matches++;   /* add a pending slide_hash() */
+            }
+            zmemcpy(s->window + s->strstart, s->strm->next_in - used, used);
+            s->strstart += used;
+        }
+        s->block_start = s->strstart;
+        s->insert += MIN(used, s->w_size - s->insert);
+    }
+    if (s->high_water < s->strstart)
+        s->high_water = s->strstart;
+
+    /* If the last block was written to next_out, then done. */
+    if (last)
+        return finish_done;
+
+    /* If flushing and all input has been consumed, then done. */
+    if (flush != Z_NO_FLUSH && flush != Z_FINISH &&
+        s->strm->avail_in == 0 && (long)s->strstart == s->block_start)
+        return block_done;
+
+    /* Fill the window with any remaining input. */
+    have = s->window_size - s->strstart - 1;
+    if (s->strm->avail_in > have && s->block_start >= (long)s->w_size) {
+        /* Slide the window down. */
+        s->block_start -= s->w_size;
+        s->strstart -= s->w_size;
+        zmemcpy(s->window, s->window + s->w_size, s->strstart);
+        if (s->matches < 2)
+            s->matches++;           /* add a pending slide_hash() */
+        have += s->w_size;          /* more space now */
+    }
+    if (have > s->strm->avail_in)
+        have = s->strm->avail_in;
+    if (have) {
+        read_buf(s->strm, s->window + s->strstart, have);
+        s->strstart += have;
+    }
+    if (s->high_water < s->strstart)
+        s->high_water = s->strstart;
+
+    /* There was not enough avail_out to write a complete worthy or flushed
+     * stored block to next_out. Write a stored block to pending instead, if we
+     * have enough input for a worthy block, or if flushing and there is enough
+     * room for the remaining input as a stored block in the pending buffer.
+     */
+    have = (s->bi_valid + 42) >> 3;         /* number of header bytes */
+        /* maximum stored block length that will fit in pending: */
+    have = MIN(s->pending_buf_size - have, MAX_STORED);
+    min_block = MIN(have, s->w_size);
+    left = s->strstart - s->block_start;
+    if (left >= min_block ||
+        ((left || flush == Z_FINISH) && flush != Z_NO_FLUSH &&
+         s->strm->avail_in == 0 && left <= have)) {
+        len = MIN(left, have);
+        last = flush == Z_FINISH && s->strm->avail_in == 0 &&
+               len == left ? 1 : 0;
+        _tr_stored_block(s, (charf *)s->window + s->block_start, len, last);
+        s->block_start += len;
+        flush_pending(s->strm);
+    }
+
+    /* We've done all we can with the available input and output. */
+    return last ? finish_started : need_more;
+}
+
+/* ===========================================================================
+ * Compress as much as possible from the input stream, return the current
+ * block state.
+ * This function does not perform lazy evaluation of matches and inserts
+ * new strings in the dictionary only for unmatched strings or for short
+ * matches. It is used only for the fast compression options.
+ */
+local block_state deflate_fast(s, flush)
+    deflate_state *s;
+    int flush;
+{
+    IPos hash_head;       /* head of the hash chain */
+    int bflush;           /* set if current block must be flushed */
+
+    for (;;) {
+        /* Make sure that we always have enough lookahead, except
+         * at the end of the input file. We need MAX_MATCH bytes
+         * for the next match, plus MIN_MATCH bytes to insert the
+         * string following the next match.
+         */
+        if (s->lookahead < MIN_LOOKAHEAD) {
+            fill_window(s);
+            if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
+                return need_more;
+            }
+            if (s->lookahead == 0) break; /* flush the current block */
+        }
+
+        /* Insert the string window[strstart .. strstart+2] in the
+         * dictionary, and set hash_head to the head of the hash chain:
+         */
+        hash_head = NIL;
+        if (s->lookahead >= MIN_MATCH) {
+            INSERT_STRING(s, s->strstart, hash_head);
+        }
+
+        /* Find the longest match, discarding those <= prev_length.
+         * At this point we have always match_length < MIN_MATCH
+         */
+        if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) {
+            /* To simplify the code, we prevent matches with the string
+             * of window index 0 (in particular we have to avoid a match
+             * of the string with itself at the start of the input file).
+             */
+            s->match_length = longest_match (s, hash_head);
+            /* longest_match() sets match_start */
+        }
+        if (s->match_length >= MIN_MATCH) {
+            check_match(s, s->strstart, s->match_start, s->match_length);
+
+            _tr_tally_dist(s, s->strstart - s->match_start,
+                           s->match_length - MIN_MATCH, bflush);
+
+            s->lookahead -= s->match_length;
+
+            /* Insert new strings in the hash table only if the match length
+             * is not too large. This saves time but degrades compression.
+             */
+#ifndef FASTEST
+            if (s->match_length <= s->max_insert_length &&
+                s->lookahead >= MIN_MATCH) {
+                s->match_length--; /* string at strstart already in table */
+                do {
+                    s->strstart++;
+                    INSERT_STRING(s, s->strstart, hash_head);
+                    /* strstart never exceeds WSIZE-MAX_MATCH, so there are
+                     * always MIN_MATCH bytes ahead.
+                     */
+                } while (--s->match_length != 0);
+                s->strstart++;
+            } else
+#endif
+            {
+                s->strstart += s->match_length;
+                s->match_length = 0;
+                s->ins_h = s->window[s->strstart];
+                UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]);
+#if MIN_MATCH != 3
+                Call UPDATE_HASH() MIN_MATCH-3 more times
+#endif
+                /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not
+                 * matter since it will be recomputed at next deflate call.
+                 */
+            }
+        } else {
+            /* No match, output a literal byte */
+            Tracevv((stderr,"%c", s->window[s->strstart]));
+            _tr_tally_lit (s, s->window[s->strstart], bflush);
+            s->lookahead--;
+            s->strstart++;
+        }
+        if (bflush) FLUSH_BLOCK(s, 0);
+    }
+    s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1;
+    if (flush == Z_FINISH) {
+        FLUSH_BLOCK(s, 1);
+        return finish_done;
+    }
+    if (s->last_lit)
+        FLUSH_BLOCK(s, 0);
+    return block_done;
+}
+
+#ifndef FASTEST
+/* ===========================================================================
+ * Same as above, but achieves better compression. We use a lazy
+ * evaluation for matches: a match is finally adopted only if there is
+ * no better match at the next window position.
+ */
+local block_state deflate_slow(s, flush)
+    deflate_state *s;
+    int flush;
+{
+    IPos hash_head;          /* head of hash chain */
+    int bflush;              /* set if current block must be flushed */
+
+    /* Process the input block. */
+    for (;;) {
+        /* Make sure that we always have enough lookahead, except
+         * at the end of the input file. We need MAX_MATCH bytes
+         * for the next match, plus MIN_MATCH bytes to insert the
+         * string following the next match.
+         */
+        if (s->lookahead < MIN_LOOKAHEAD) {
+            fill_window(s);
+            if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
+                return need_more;
+            }
+            if (s->lookahead == 0) break; /* flush the current block */
+        }
+
+        /* Insert the string window[strstart .. strstart+2] in the
+         * dictionary, and set hash_head to the head of the hash chain:
+         */
+        hash_head = NIL;
+        if (s->lookahead >= MIN_MATCH) {
+            INSERT_STRING(s, s->strstart, hash_head);
+        }
+
+        /* Find the longest match, discarding those <= prev_length.
+         */
+        s->prev_length = s->match_length, s->prev_match = s->match_start;
+        s->match_length = MIN_MATCH-1;
+
+        if (hash_head != NIL && s->prev_length < s->max_lazy_match &&
+            s->strstart - hash_head <= MAX_DIST(s)) {
+            /* To simplify the code, we prevent matches with the string
+             * of window index 0 (in particular we have to avoid a match
+             * of the string with itself at the start of the input file).
+             */
+            s->match_length = longest_match (s, hash_head);
+            /* longest_match() sets match_start */
+
+            if (s->match_length <= 5 && (s->strategy == Z_FILTERED
+#if TOO_FAR <= 32767
+                || (s->match_length == MIN_MATCH &&
+                    s->strstart - s->match_start > TOO_FAR)
+#endif
+                )) {
+
+                /* If prev_match is also MIN_MATCH, match_start is garbage
+                 * but we will ignore the current match anyway.
+                 */
+                s->match_length = MIN_MATCH-1;
+            }
+        }
+        /* If there was a match at the previous step and the current
+         * match is not better, output the previous match:
+         */
+        if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) {
+            uInt max_insert = s->strstart + s->lookahead - MIN_MATCH;
+            /* Do not insert strings in hash table beyond this. */
+
+            check_match(s, s->strstart-1, s->prev_match, s->prev_length);
+
+            _tr_tally_dist(s, s->strstart -1 - s->prev_match,
+                           s->prev_length - MIN_MATCH, bflush);
+
+            /* Insert in hash table all strings up to the end of the match.
+             * strstart-1 and strstart are already inserted. If there is not
+             * enough lookahead, the last two strings are not inserted in
+             * the hash table.
+             */
+            s->lookahead -= s->prev_length-1;
+            s->prev_length -= 2;
+            do {
+                if (++s->strstart <= max_insert) {
+                    INSERT_STRING(s, s->strstart, hash_head);
+                }
+            } while (--s->prev_length != 0);
+            s->match_available = 0;
+            s->match_length = MIN_MATCH-1;
+            s->strstart++;
+
+            if (bflush) FLUSH_BLOCK(s, 0);
+
+        } else if (s->match_available) {
+            /* If there was no match at the previous position, output a
+             * single literal. If there was a match but the current match
+             * is longer, truncate the previous match to a single literal.
+             */
+            Tracevv((stderr,"%c", s->window[s->strstart-1]));
+            _tr_tally_lit(s, s->window[s->strstart-1], bflush);
+            if (bflush) {
+                FLUSH_BLOCK_ONLY(s, 0);
+            }
+            s->strstart++;
+            s->lookahead--;
+            if (s->strm->avail_out == 0) return need_more;
+        } else {
+            /* There is no previous match to compare with, wait for
+             * the next step to decide.
+             */
+            s->match_available = 1;
+            s->strstart++;
+            s->lookahead--;
+        }
+    }
+    Assert (flush != Z_NO_FLUSH, "no flush?");
+    if (s->match_available) {
+        Tracevv((stderr,"%c", s->window[s->strstart-1]));
+        _tr_tally_lit(s, s->window[s->strstart-1], bflush);
+        s->match_available = 0;
+    }
+    s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1;
+    if (flush == Z_FINISH) {
+        FLUSH_BLOCK(s, 1);
+        return finish_done;
+    }
+    if (s->last_lit)
+        FLUSH_BLOCK(s, 0);
+    return block_done;
+}
+#endif /* FASTEST */
+
+/* ===========================================================================
+ * For Z_RLE, simply look for runs of bytes, generate matches only of distance
+ * one.  Do not maintain a hash table.  (It will be regenerated if this run of
+ * deflate switches away from Z_RLE.)
+ */
+local block_state deflate_rle(s, flush)
+    deflate_state *s;
+    int flush;
+{
+    int bflush;             /* set if current block must be flushed */
+    uInt prev;              /* byte at distance one to match */
+    Bytef *scan, *strend;   /* scan goes up to strend for length of run */
+
+    for (;;) {
+        /* Make sure that we always have enough lookahead, except
+         * at the end of the input file. We need MAX_MATCH bytes
+         * for the longest run, plus one for the unrolled loop.
+         */
+        if (s->lookahead <= MAX_MATCH) {
+            fill_window(s);
+            if (s->lookahead <= MAX_MATCH && flush == Z_NO_FLUSH) {
+                return need_more;
+            }
+            if (s->lookahead == 0) break; /* flush the current block */
+        }
+
+        /* See how many times the previous byte repeats */
+        s->match_length = 0;
+        if (s->lookahead >= MIN_MATCH && s->strstart > 0) {
+            scan = s->window + s->strstart - 1;
+            prev = *scan;
+            if (prev == *++scan && prev == *++scan && prev == *++scan) {
+                strend = s->window + s->strstart + MAX_MATCH;
+                do {
+                } while (prev == *++scan && prev == *++scan &&
+                         prev == *++scan && prev == *++scan &&
+                         prev == *++scan && prev == *++scan &&
+                         prev == *++scan && prev == *++scan &&
+                         scan < strend);
+                s->match_length = MAX_MATCH - (uInt)(strend - scan);
+                if (s->match_length > s->lookahead)
+                    s->match_length = s->lookahead;
+            }
+            Assert(scan <= s->window+(uInt)(s->window_size-1), "wild scan");
+        }
+
+        /* Emit match if have run of MIN_MATCH or longer, else emit literal */
+        if (s->match_length >= MIN_MATCH) {
+            check_match(s, s->strstart, s->strstart - 1, s->match_length);
+
+            _tr_tally_dist(s, 1, s->match_length - MIN_MATCH, bflush);
+
+            s->lookahead -= s->match_length;
+            s->strstart += s->match_length;
+            s->match_length = 0;
+        } else {
+            /* No match, output a literal byte */
+            Tracevv((stderr,"%c", s->window[s->strstart]));
+            _tr_tally_lit (s, s->window[s->strstart], bflush);
+            s->lookahead--;
+            s->strstart++;
+        }
+        if (bflush) FLUSH_BLOCK(s, 0);
+    }
+    s->insert = 0;
+    if (flush == Z_FINISH) {
+        FLUSH_BLOCK(s, 1);
+        return finish_done;
+    }
+    if (s->last_lit)
+        FLUSH_BLOCK(s, 0);
+    return block_done;
+}
+
+/* ===========================================================================
+ * For Z_HUFFMAN_ONLY, do not look for matches.  Do not maintain a hash table.
+ * (It will be regenerated if this run of deflate switches away from Huffman.)
+ */
+local block_state deflate_huff(s, flush)
+    deflate_state *s;
+    int flush;
+{
+    int bflush;             /* set if current block must be flushed */
+
+    for (;;) {
+        /* Make sure that we have a literal to write. */
+        if (s->lookahead == 0) {
+            fill_window(s);
+            if (s->lookahead == 0) {
+                if (flush == Z_NO_FLUSH)
+                    return need_more;
+                break;      /* flush the current block */
+            }
+        }
+
+        /* Output a literal byte */
+        s->match_length = 0;
+        Tracevv((stderr,"%c", s->window[s->strstart]));
+        _tr_tally_lit (s, s->window[s->strstart], bflush);
+        s->lookahead--;
+        s->strstart++;
+        if (bflush) FLUSH_BLOCK(s, 0);
+    }
+    s->insert = 0;
+    if (flush == Z_FINISH) {
+        FLUSH_BLOCK(s, 1);
+        return finish_done;
+    }
+    if (s->last_lit)
+        FLUSH_BLOCK(s, 0);
+    return block_done;
+}
diff --git a/deps/libchdr/deps/zlib-1.2.11/deflate.h b/deps/libchdr/deps/zlib-1.2.11/deflate.h
new file mode 100644
index 00000000..23ecdd31
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/deflate.h
@@ -0,0 +1,349 @@
+/* deflate.h -- internal compression state
+ * Copyright (C) 1995-2016 Jean-loup Gailly
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+   part of the implementation of the compression library and is
+   subject to change. Applications should only use zlib.h.
+ */
+
+/* @(#) $Id$ */
+
+#ifndef DEFLATE_H
+#define DEFLATE_H
+
+#include "zutil.h"
+
+/* define NO_GZIP when compiling if you want to disable gzip header and
+   trailer creation by deflate().  NO_GZIP would be used to avoid linking in
+   the crc code when it is not needed.  For shared libraries, gzip encoding
+   should be left enabled. */
+#ifndef NO_GZIP
+#  define GZIP
+#endif
+
+/* ===========================================================================
+ * Internal compression state.
+ */
+
+#define LENGTH_CODES 29
+/* number of length codes, not counting the special END_BLOCK code */
+
+#define LITERALS  256
+/* number of literal bytes 0..255 */
+
+#define L_CODES (LITERALS+1+LENGTH_CODES)
+/* number of Literal or Length codes, including the END_BLOCK code */
+
+#define D_CODES   30
+/* number of distance codes */
+
+#define BL_CODES  19
+/* number of codes used to transfer the bit lengths */
+
+#define HEAP_SIZE (2*L_CODES+1)
+/* maximum heap size */
+
+#define MAX_BITS 15
+/* All codes must not exceed MAX_BITS bits */
+
+#define Buf_size 16
+/* size of bit buffer in bi_buf */
+
+#define INIT_STATE    42    /* zlib header -> BUSY_STATE */
+#ifdef GZIP
+#  define GZIP_STATE  57    /* gzip header -> BUSY_STATE | EXTRA_STATE */
+#endif
+#define EXTRA_STATE   69    /* gzip extra block -> NAME_STATE */
+#define NAME_STATE    73    /* gzip file name -> COMMENT_STATE */
+#define COMMENT_STATE 91    /* gzip comment -> HCRC_STATE */
+#define HCRC_STATE   103    /* gzip header CRC -> BUSY_STATE */
+#define BUSY_STATE   113    /* deflate -> FINISH_STATE */
+#define FINISH_STATE 666    /* stream complete */
+/* Stream status */
+
+
+/* Data structure describing a single value and its code string. */
+typedef struct ct_data_s {
+    union {
+        ush  freq;       /* frequency count */
+        ush  code;       /* bit string */
+    } fc;
+    union {
+        ush  dad;        /* father node in Huffman tree */
+        ush  len;        /* length of bit string */
+    } dl;
+} FAR ct_data;
+
+#define Freq fc.freq
+#define Code fc.code
+#define Dad  dl.dad
+#define Len  dl.len
+
+typedef struct static_tree_desc_s  static_tree_desc;
+
+typedef struct tree_desc_s {
+    ct_data *dyn_tree;           /* the dynamic tree */
+    int     max_code;            /* largest code with non zero frequency */
+    const static_tree_desc *stat_desc;  /* the corresponding static tree */
+} FAR tree_desc;
+
+typedef ush Pos;
+typedef Pos FAR Posf;
+typedef unsigned IPos;
+
+/* A Pos is an index in the character window. We use short instead of int to
+ * save space in the various tables. IPos is used only for parameter passing.
+ */
+
+typedef struct internal_state {
+    z_streamp strm;      /* pointer back to this zlib stream */
+    int   status;        /* as the name implies */
+    Bytef *pending_buf;  /* output still pending */
+    ulg   pending_buf_size; /* size of pending_buf */
+    Bytef *pending_out;  /* next pending byte to output to the stream */
+    ulg   pending;       /* nb of bytes in the pending buffer */
+    int   wrap;          /* bit 0 true for zlib, bit 1 true for gzip */
+    gz_headerp  gzhead;  /* gzip header information to write */
+    ulg   gzindex;       /* where in extra, name, or comment */
+    Byte  method;        /* can only be DEFLATED */
+    int   last_flush;    /* value of flush param for previous deflate call */
+
+                /* used by deflate.c: */
+
+    uInt  w_size;        /* LZ77 window size (32K by default) */
+    uInt  w_bits;        /* log2(w_size)  (8..16) */
+    uInt  w_mask;        /* w_size - 1 */
+
+    Bytef *window;
+    /* Sliding window. Input bytes are read into the second half of the window,
+     * and move to the first half later to keep a dictionary of at least wSize
+     * bytes. With this organization, matches are limited to a distance of
+     * wSize-MAX_MATCH bytes, but this ensures that IO is always
+     * performed with a length multiple of the block size. Also, it limits
+     * the window size to 64K, which is quite useful on MSDOS.
+     * To do: use the user input buffer as sliding window.
+     */
+
+    ulg window_size;
+    /* Actual size of window: 2*wSize, except when the user input buffer
+     * is directly used as sliding window.
+     */
+
+    Posf *prev;
+    /* Link to older string with same hash index. To limit the size of this
+     * array to 64K, this link is maintained only for the last 32K strings.
+     * An index in this array is thus a window index modulo 32K.
+     */
+
+    Posf *head; /* Heads of the hash chains or NIL. */
+
+    uInt  ins_h;          /* hash index of string to be inserted */
+    uInt  hash_size;      /* number of elements in hash table */
+    uInt  hash_bits;      /* log2(hash_size) */
+    uInt  hash_mask;      /* hash_size-1 */
+
+    uInt  hash_shift;
+    /* Number of bits by which ins_h must be shifted at each input
+     * step. It must be such that after MIN_MATCH steps, the oldest
+     * byte no longer takes part in the hash key, that is:
+     *   hash_shift * MIN_MATCH >= hash_bits
+     */
+
+    long block_start;
+    /* Window position at the beginning of the current output block. Gets
+     * negative when the window is moved backwards.
+     */
+
+    uInt match_length;           /* length of best match */
+    IPos prev_match;             /* previous match */
+    int match_available;         /* set if previous match exists */
+    uInt strstart;               /* start of string to insert */
+    uInt match_start;            /* start of matching string */
+    uInt lookahead;              /* number of valid bytes ahead in window */
+
+    uInt prev_length;
+    /* Length of the best match at previous step. Matches not greater than this
+     * are discarded. This is used in the lazy match evaluation.
+     */
+
+    uInt max_chain_length;
+    /* To speed up deflation, hash chains are never searched beyond this
+     * length.  A higher limit improves compression ratio but degrades the
+     * speed.
+     */
+
+    uInt max_lazy_match;
+    /* Attempt to find a better match only when the current match is strictly
+     * smaller than this value. This mechanism is used only for compression
+     * levels >= 4.
+     */
+#   define max_insert_length  max_lazy_match
+    /* Insert new strings in the hash table only if the match length is not
+     * greater than this length. This saves time but degrades compression.
+     * max_insert_length is used only for compression levels <= 3.
+     */
+
+    int level;    /* compression level (1..9) */
+    int strategy; /* favor or force Huffman coding*/
+
+    uInt good_match;
+    /* Use a faster search when the previous match is longer than this */
+
+    int nice_match; /* Stop searching when current match exceeds this */
+
+                /* used by trees.c: */
+    /* Didn't use ct_data typedef below to suppress compiler warning */
+    struct ct_data_s dyn_ltree[HEAP_SIZE];   /* literal and length tree */
+    struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */
+    struct ct_data_s bl_tree[2*BL_CODES+1];  /* Huffman tree for bit lengths */
+
+    struct tree_desc_s l_desc;               /* desc. for literal tree */
+    struct tree_desc_s d_desc;               /* desc. for distance tree */
+    struct tree_desc_s bl_desc;              /* desc. for bit length tree */
+
+    ush bl_count[MAX_BITS+1];
+    /* number of codes at each bit length for an optimal tree */
+
+    int heap[2*L_CODES+1];      /* heap used to build the Huffman trees */
+    int heap_len;               /* number of elements in the heap */
+    int heap_max;               /* element of largest frequency */
+    /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
+     * The same heap array is used to build all trees.
+     */
+
+    uch depth[2*L_CODES+1];
+    /* Depth of each subtree used as tie breaker for trees of equal frequency
+     */
+
+    uchf *l_buf;          /* buffer for literals or lengths */
+
+    uInt  lit_bufsize;
+    /* Size of match buffer for literals/lengths.  There are 4 reasons for
+     * limiting lit_bufsize to 64K:
+     *   - frequencies can be kept in 16 bit counters
+     *   - if compression is not successful for the first block, all input
+     *     data is still in the window so we can still emit a stored block even
+     *     when input comes from standard input.  (This can also be done for
+     *     all blocks if lit_bufsize is not greater than 32K.)
+     *   - if compression is not successful for a file smaller than 64K, we can
+     *     even emit a stored file instead of a stored block (saving 5 bytes).
+     *     This is applicable only for zip (not gzip or zlib).
+     *   - creating new Huffman trees less frequently may not provide fast
+     *     adaptation to changes in the input data statistics. (Take for
+     *     example a binary file with poorly compressible code followed by
+     *     a highly compressible string table.) Smaller buffer sizes give
+     *     fast adaptation but have of course the overhead of transmitting
+     *     trees more frequently.
+     *   - I can't count above 4
+     */
+
+    uInt last_lit;      /* running index in l_buf */
+
+    ushf *d_buf;
+    /* Buffer for distances. To simplify the code, d_buf and l_buf have
+     * the same number of elements. To use different lengths, an extra flag
+     * array would be necessary.
+     */
+
+    ulg opt_len;        /* bit length of current block with optimal trees */
+    ulg static_len;     /* bit length of current block with static trees */
+    uInt matches;       /* number of string matches in current block */
+    uInt insert;        /* bytes at end of window left to insert */
+
+#ifdef ZLIB_DEBUG
+    ulg compressed_len; /* total bit length of compressed file mod 2^32 */
+    ulg bits_sent;      /* bit length of compressed data sent mod 2^32 */
+#endif
+
+    ush bi_buf;
+    /* Output buffer. bits are inserted starting at the bottom (least
+     * significant bits).
+     */
+    int bi_valid;
+    /* Number of valid bits in bi_buf.  All bits above the last valid bit
+     * are always zero.
+     */
+
+    ulg high_water;
+    /* High water mark offset in window for initialized bytes -- bytes above
+     * this are set to zero in order to avoid memory check warnings when
+     * longest match routines access bytes past the input.  This is then
+     * updated to the new high water mark.
+     */
+
+} FAR deflate_state;
+
+/* Output a byte on the stream.
+ * IN assertion: there is enough room in pending_buf.
+ */
+#define put_byte(s, c) {s->pending_buf[s->pending++] = (Bytef)(c);}
+
+
+#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
+/* Minimum amount of lookahead, except at the end of the input file.
+ * See deflate.c for comments about the MIN_MATCH+1.
+ */
+
+#define MAX_DIST(s)  ((s)->w_size-MIN_LOOKAHEAD)
+/* In order to simplify the code, particularly on 16 bit machines, match
+ * distances are limited to MAX_DIST instead of WSIZE.
+ */
+
+#define WIN_INIT MAX_MATCH
+/* Number of bytes after end of data in window to initialize in order to avoid
+   memory checker errors from longest match routines */
+
+        /* in trees.c */
+void ZLIB_INTERNAL _tr_init OF((deflate_state *s));
+int ZLIB_INTERNAL _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc));
+void ZLIB_INTERNAL _tr_flush_block OF((deflate_state *s, charf *buf,
+                        ulg stored_len, int last));
+void ZLIB_INTERNAL _tr_flush_bits OF((deflate_state *s));
+void ZLIB_INTERNAL _tr_align OF((deflate_state *s));
+void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf,
+                        ulg stored_len, int last));
+
+#define d_code(dist) \
+   ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)])
+/* Mapping from a distance to a distance code. dist is the distance - 1 and
+ * must not have side effects. _dist_code[256] and _dist_code[257] are never
+ * used.
+ */
+
+#ifndef ZLIB_DEBUG
+/* Inline versions of _tr_tally for speed: */
+
+#if defined(GEN_TREES_H) || !defined(STDC)
+  extern uch ZLIB_INTERNAL _length_code[];
+  extern uch ZLIB_INTERNAL _dist_code[];
+#else
+  extern const uch ZLIB_INTERNAL _length_code[];
+  extern const uch ZLIB_INTERNAL _dist_code[];
+#endif
+
+# define _tr_tally_lit(s, c, flush) \
+  { uch cc = (c); \
+    s->d_buf[s->last_lit] = 0; \
+    s->l_buf[s->last_lit++] = cc; \
+    s->dyn_ltree[cc].Freq++; \
+    flush = (s->last_lit == s->lit_bufsize-1); \
+   }
+# define _tr_tally_dist(s, distance, length, flush) \
+  { uch len = (uch)(length); \
+    ush dist = (ush)(distance); \
+    s->d_buf[s->last_lit] = dist; \
+    s->l_buf[s->last_lit++] = len; \
+    dist--; \
+    s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
+    s->dyn_dtree[d_code(dist)].Freq++; \
+    flush = (s->last_lit == s->lit_bufsize-1); \
+  }
+#else
+# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c)
+# define _tr_tally_dist(s, distance, length, flush) \
+              flush = _tr_tally(s, distance, length)
+#endif
+
+#endif /* DEFLATE_H */
diff --git a/deps/libchdr/deps/zlib-1.2.11/doc/algorithm.txt b/deps/libchdr/deps/zlib-1.2.11/doc/algorithm.txt
new file mode 100644
index 00000000..c97f4950
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/doc/algorithm.txt
@@ -0,0 +1,209 @@
+1. Compression algorithm (deflate)
+
+The deflation algorithm used by gzip (also zip and zlib) is a variation of
+LZ77 (Lempel-Ziv 1977, see reference below). It finds duplicated strings in
+the input data.  The second occurrence of a string is replaced by a
+pointer to the previous string, in the form of a pair (distance,
+length).  Distances are limited to 32K bytes, and lengths are limited
+to 258 bytes. When a string does not occur anywhere in the previous
+32K bytes, it is emitted as a sequence of literal bytes.  (In this
+description, `string' must be taken as an arbitrary sequence of bytes,
+and is not restricted to printable characters.)
+
+Literals or match lengths are compressed with one Huffman tree, and
+match distances are compressed with another tree. The trees are stored
+in a compact form at the start of each block. The blocks can have any
+size (except that the compressed data for one block must fit in
+available memory). A block is terminated when deflate() determines that
+it would be useful to start another block with fresh trees. (This is
+somewhat similar to the behavior of LZW-based _compress_.)
+
+Duplicated strings are found using a hash table. All input strings of
+length 3 are inserted in the hash table. A hash index is computed for
+the next 3 bytes. If the hash chain for this index is not empty, all
+strings in the chain are compared with the current input string, and
+the longest match is selected.
+
+The hash chains are searched starting with the most recent strings, to
+favor small distances and thus take advantage of the Huffman encoding.
+The hash chains are singly linked. There are no deletions from the
+hash chains, the algorithm simply discards matches that are too old.
+
+To avoid a worst-case situation, very long hash chains are arbitrarily
+truncated at a certain length, determined by a runtime option (level
+parameter of deflateInit). So deflate() does not always find the longest
+possible match but generally finds a match which is long enough.
+
+deflate() also defers the selection of matches with a lazy evaluation
+mechanism. After a match of length N has been found, deflate() searches for
+a longer match at the next input byte. If a longer match is found, the
+previous match is truncated to a length of one (thus producing a single
+literal byte) and the process of lazy evaluation begins again. Otherwise,
+the original match is kept, and the next match search is attempted only N
+steps later.
+
+The lazy match evaluation is also subject to a runtime parameter. If
+the current match is long enough, deflate() reduces the search for a longer
+match, thus speeding up the whole process. If compression ratio is more
+important than speed, deflate() attempts a complete second search even if
+the first match is already long enough.
+
+The lazy match evaluation is not performed for the fastest compression
+modes (level parameter 1 to 3). For these fast modes, new strings
+are inserted in the hash table only when no match was found, or
+when the match is not too long. This degrades the compression ratio
+but saves time since there are both fewer insertions and fewer searches.
+
+
+2. Decompression algorithm (inflate)
+
+2.1 Introduction
+
+The key question is how to represent a Huffman code (or any prefix code) so
+that you can decode fast.  The most important characteristic is that shorter
+codes are much more common than longer codes, so pay attention to decoding the
+short codes fast, and let the long codes take longer to decode.
+
+inflate() sets up a first level table that covers some number of bits of
+input less than the length of longest code.  It gets that many bits from the
+stream, and looks it up in the table.  The table will tell if the next
+code is that many bits or less and how many, and if it is, it will tell
+the value, else it will point to the next level table for which inflate()
+grabs more bits and tries to decode a longer code.
+
+How many bits to make the first lookup is a tradeoff between the time it
+takes to decode and the time it takes to build the table.  If building the
+table took no time (and if you had infinite memory), then there would only
+be a first level table to cover all the way to the longest code.  However,
+building the table ends up taking a lot longer for more bits since short
+codes are replicated many times in such a table.  What inflate() does is
+simply to make the number of bits in the first table a variable, and  then
+to set that variable for the maximum speed.
+
+For inflate, which has 286 possible codes for the literal/length tree, the size
+of the first table is nine bits.  Also the distance trees have 30 possible
+values, and the size of the first table is six bits.  Note that for each of
+those cases, the table ended up one bit longer than the ``average'' code
+length, i.e. the code length of an approximately flat code which would be a
+little more than eight bits for 286 symbols and a little less than five bits
+for 30 symbols.
+
+
+2.2 More details on the inflate table lookup
+
+Ok, you want to know what this cleverly obfuscated inflate tree actually
+looks like.  You are correct that it's not a Huffman tree.  It is simply a
+lookup table for the first, let's say, nine bits of a Huffman symbol.  The
+symbol could be as short as one bit or as long as 15 bits.  If a particular
+symbol is shorter than nine bits, then that symbol's translation is duplicated
+in all those entries that start with that symbol's bits.  For example, if the
+symbol is four bits, then it's duplicated 32 times in a nine-bit table.  If a
+symbol is nine bits long, it appears in the table once.
+
+If the symbol is longer than nine bits, then that entry in the table points
+to another similar table for the remaining bits.  Again, there are duplicated
+entries as needed.  The idea is that most of the time the symbol will be short
+and there will only be one table look up.  (That's whole idea behind data
+compression in the first place.)  For the less frequent long symbols, there
+will be two lookups.  If you had a compression method with really long
+symbols, you could have as many levels of lookups as is efficient.  For
+inflate, two is enough.
+
+So a table entry either points to another table (in which case nine bits in
+the above example are gobbled), or it contains the translation for the symbol
+and the number of bits to gobble.  Then you start again with the next
+ungobbled bit.
+
+You may wonder: why not just have one lookup table for how ever many bits the
+longest symbol is?  The reason is that if you do that, you end up spending
+more time filling in duplicate symbol entries than you do actually decoding.
+At least for deflate's output that generates new trees every several 10's of
+kbytes.  You can imagine that filling in a 2^15 entry table for a 15-bit code
+would take too long if you're only decoding several thousand symbols.  At the
+other extreme, you could make a new table for every bit in the code.  In fact,
+that's essentially a Huffman tree.  But then you spend too much time
+traversing the tree while decoding, even for short symbols.
+
+So the number of bits for the first lookup table is a trade of the time to
+fill out the table vs. the time spent looking at the second level and above of
+the table.
+
+Here is an example, scaled down:
+
+The code being decoded, with 10 symbols, from 1 to 6 bits long:
+
+A: 0
+B: 10
+C: 1100
+D: 11010
+E: 11011
+F: 11100
+G: 11101
+H: 11110
+I: 111110
+J: 111111
+
+Let's make the first table three bits long (eight entries):
+
+000: A,1
+001: A,1
+010: A,1
+011: A,1
+100: B,2
+101: B,2
+110: -> table X (gobble 3 bits)
+111: -> table Y (gobble 3 bits)
+
+Each entry is what the bits decode as and how many bits that is, i.e. how
+many bits to gobble.  Or the entry points to another table, with the number of
+bits to gobble implicit in the size of the table.
+
+Table X is two bits long since the longest code starting with 110 is five bits
+long:
+
+00: C,1
+01: C,1
+10: D,2
+11: E,2
+
+Table Y is three bits long since the longest code starting with 111 is six
+bits long:
+
+000: F,2
+001: F,2
+010: G,2
+011: G,2
+100: H,2
+101: H,2
+110: I,3
+111: J,3
+
+So what we have here are three tables with a total of 20 entries that had to
+be constructed.  That's compared to 64 entries for a single table.  Or
+compared to 16 entries for a Huffman tree (six two entry tables and one four
+entry table).  Assuming that the code ideally represents the probability of
+the symbols, it takes on the average 1.25 lookups per symbol.  That's compared
+to one lookup for the single table, or 1.66 lookups per symbol for the
+Huffman tree.
+
+There, I think that gives you a picture of what's going on.  For inflate, the
+meaning of a particular symbol is often more than just a letter.  It can be a
+byte (a "literal"), or it can be either a length or a distance which
+indicates a base value and a number of bits to fetch after the code that is
+added to the base value.  Or it might be the special end-of-block code.  The
+data structures created in inftrees.c try to encode all that information
+compactly in the tables.
+
+
+Jean-loup Gailly        Mark Adler
+jloup@gzip.org          madler@alumni.caltech.edu
+
+
+References:
+
+[LZ77] Ziv J., Lempel A., ``A Universal Algorithm for Sequential Data
+Compression,'' IEEE Transactions on Information Theory, Vol. 23, No. 3,
+pp. 337-343.
+
+``DEFLATE Compressed Data Format Specification'' available in
+http://tools.ietf.org/html/rfc1951
diff --git a/deps/libchdr/deps/zlib-1.2.11/doc/rfc1950.txt b/deps/libchdr/deps/zlib-1.2.11/doc/rfc1950.txt
new file mode 100644
index 00000000..ce6428a0
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/doc/rfc1950.txt
@@ -0,0 +1,619 @@
+
+
+
+
+
+
+Network Working Group                                         P. Deutsch
+Request for Comments: 1950                           Aladdin Enterprises
+Category: Informational                                      J-L. Gailly
+                                                                Info-ZIP
+                                                                May 1996
+
+
+         ZLIB Compressed Data Format Specification version 3.3
+
+Status of This Memo
+
+   This memo provides information for the Internet community.  This memo
+   does not specify an Internet standard of any kind.  Distribution of
+   this memo is unlimited.
+
+IESG Note:
+
+   The IESG takes no position on the validity of any Intellectual
+   Property Rights statements contained in this document.
+
+Notices
+
+   Copyright (c) 1996 L. Peter Deutsch and Jean-Loup Gailly
+
+   Permission is granted to copy and distribute this document for any
+   purpose and without charge, including translations into other
+   languages and incorporation into compilations, provided that the
+   copyright notice and this notice are preserved, and that any
+   substantive changes or deletions from the original are clearly
+   marked.
+
+   A pointer to the latest version of this and related documentation in
+   HTML format can be found at the URL
+   <ftp://ftp.uu.net/graphics/png/documents/zlib/zdoc-index.html>.
+
+Abstract
+
+   This specification defines a lossless compressed data format.  The
+   data can be produced or consumed, even for an arbitrarily long
+   sequentially presented input data stream, using only an a priori
+   bounded amount of intermediate storage.  The format presently uses
+   the DEFLATE compression method but can be easily extended to use
+   other compression methods.  It can be implemented readily in a manner
+   not covered by patents.  This specification also defines the ADLER-32
+   checksum (an extension and improvement of the Fletcher checksum),
+   used for detection of data corruption, and provides an algorithm for
+   computing it.
+
+
+
+
+Deutsch & Gailly             Informational                      [Page 1]
+
+RFC 1950       ZLIB Compressed Data Format Specification        May 1996
+
+
+Table of Contents
+
+   1. Introduction ................................................... 2
+      1.1. Purpose ................................................... 2
+      1.2. Intended audience ......................................... 3
+      1.3. Scope ..................................................... 3
+      1.4. Compliance ................................................ 3
+      1.5.  Definitions of terms and conventions used ................ 3
+      1.6. Changes from previous versions ............................ 3
+   2. Detailed specification ......................................... 3
+      2.1. Overall conventions ....................................... 3
+      2.2. Data format ............................................... 4
+      2.3. Compliance ................................................ 7
+   3. References ..................................................... 7
+   4. Source code .................................................... 8
+   5. Security Considerations ........................................ 8
+   6. Acknowledgements ............................................... 8
+   7. Authors' Addresses ............................................. 8
+   8. Appendix: Rationale ............................................ 9
+   9. Appendix: Sample code ..........................................10
+
+1. Introduction
+
+   1.1. Purpose
+
+      The purpose of this specification is to define a lossless
+      compressed data format that:
+
+          * Is independent of CPU type, operating system, file system,
+            and character set, and hence can be used for interchange;
+
+          * Can be produced or consumed, even for an arbitrarily long
+            sequentially presented input data stream, using only an a
+            priori bounded amount of intermediate storage, and hence can
+            be used in data communications or similar structures such as
+            Unix filters;
+
+          * Can use a number of different compression methods;
+
+          * Can be implemented readily in a manner not covered by
+            patents, and hence can be practiced freely.
+
+      The data format defined by this specification does not attempt to
+      allow random access to compressed data.
+
+
+
+
+
+
+
+Deutsch & Gailly             Informational                      [Page 2]
+
+RFC 1950       ZLIB Compressed Data Format Specification        May 1996
+
+
+   1.2. Intended audience
+
+      This specification is intended for use by implementors of software
+      to compress data into zlib format and/or decompress data from zlib
+      format.
+
+      The text of the specification assumes a basic background in
+      programming at the level of bits and other primitive data
+      representations.
+
+   1.3. Scope
+
+      The specification specifies a compressed data format that can be
+      used for in-memory compression of a sequence of arbitrary bytes.
+
+   1.4. Compliance
+
+      Unless otherwise indicated below, a compliant decompressor must be
+      able to accept and decompress any data set that conforms to all
+      the specifications presented here; a compliant compressor must
+      produce data sets that conform to all the specifications presented
+      here.
+
+   1.5.  Definitions of terms and conventions used
+
+      byte: 8 bits stored or transmitted as a unit (same as an octet).
+      (For this specification, a byte is exactly 8 bits, even on
+      machines which store a character on a number of bits different
+      from 8.) See below, for the numbering of bits within a byte.
+
+   1.6. Changes from previous versions
+
+      Version 3.1 was the first public release of this specification.
+      In version 3.2, some terminology was changed and the Adler-32
+      sample code was rewritten for clarity.  In version 3.3, the
+      support for a preset dictionary was introduced, and the
+      specification was converted to RFC style.
+
+2. Detailed specification
+
+   2.1. Overall conventions
+
+      In the diagrams below, a box like this:
+
+         +---+
+         |   | <-- the vertical bars might be missing
+         +---+
+
+
+
+
+Deutsch & Gailly             Informational                      [Page 3]
+
+RFC 1950       ZLIB Compressed Data Format Specification        May 1996
+
+
+      represents one byte; a box like this:
+
+         +==============+
+         |              |
+         +==============+
+
+      represents a variable number of bytes.
+
+      Bytes stored within a computer do not have a "bit order", since
+      they are always treated as a unit.  However, a byte considered as
+      an integer between 0 and 255 does have a most- and least-
+      significant bit, and since we write numbers with the most-
+      significant digit on the left, we also write bytes with the most-
+      significant bit on the left.  In the diagrams below, we number the
+      bits of a byte so that bit 0 is the least-significant bit, i.e.,
+      the bits are numbered:
+
+         +--------+
+         |76543210|
+         +--------+
+
+      Within a computer, a number may occupy multiple bytes.  All
+      multi-byte numbers in the format described here are stored with
+      the MOST-significant byte first (at the lower memory address).
+      For example, the decimal number 520 is stored as:
+
+             0     1
+         +--------+--------+
+         |00000010|00001000|
+         +--------+--------+
+          ^        ^
+          |        |
+          |        + less significant byte = 8
+          + more significant byte = 2 x 256
+
+   2.2. Data format
+
+      A zlib stream has the following structure:
+
+           0   1
+         +---+---+
+         |CMF|FLG|   (more-->)
+         +---+---+
+
+
+
+
+
+
+
+
+Deutsch & Gailly             Informational                      [Page 4]
+
+RFC 1950       ZLIB Compressed Data Format Specification        May 1996
+
+
+      (if FLG.FDICT set)
+
+           0   1   2   3
+         +---+---+---+---+
+         |     DICTID    |   (more-->)
+         +---+---+---+---+
+
+         +=====================+---+---+---+---+
+         |...compressed data...|    ADLER32    |
+         +=====================+---+---+---+---+
+
+      Any data which may appear after ADLER32 are not part of the zlib
+      stream.
+
+      CMF (Compression Method and flags)
+         This byte is divided into a 4-bit compression method and a 4-
+         bit information field depending on the compression method.
+
+            bits 0 to 3  CM     Compression method
+            bits 4 to 7  CINFO  Compression info
+
+      CM (Compression method)
+         This identifies the compression method used in the file. CM = 8
+         denotes the "deflate" compression method with a window size up
+         to 32K.  This is the method used by gzip and PNG (see
+         references [1] and [2] in Chapter 3, below, for the reference
+         documents).  CM = 15 is reserved.  It might be used in a future
+         version of this specification to indicate the presence of an
+         extra field before the compressed data.
+
+      CINFO (Compression info)
+         For CM = 8, CINFO is the base-2 logarithm of the LZ77 window
+         size, minus eight (CINFO=7 indicates a 32K window size). Values
+         of CINFO above 7 are not allowed in this version of the
+         specification.  CINFO is not defined in this specification for
+         CM not equal to 8.
+
+      FLG (FLaGs)
+         This flag byte is divided as follows:
+
+            bits 0 to 4  FCHECK  (check bits for CMF and FLG)
+            bit  5       FDICT   (preset dictionary)
+            bits 6 to 7  FLEVEL  (compression level)
+
+         The FCHECK value must be such that CMF and FLG, when viewed as
+         a 16-bit unsigned integer stored in MSB order (CMF*256 + FLG),
+         is a multiple of 31.
+
+
+
+
+Deutsch & Gailly             Informational                      [Page 5]
+
+RFC 1950       ZLIB Compressed Data Format Specification        May 1996
+
+
+      FDICT (Preset dictionary)
+         If FDICT is set, a DICT dictionary identifier is present
+         immediately after the FLG byte. The dictionary is a sequence of
+         bytes which are initially fed to the compressor without
+         producing any compressed output. DICT is the Adler-32 checksum
+         of this sequence of bytes (see the definition of ADLER32
+         below).  The decompressor can use this identifier to determine
+         which dictionary has been used by the compressor.
+
+      FLEVEL (Compression level)
+         These flags are available for use by specific compression
+         methods.  The "deflate" method (CM = 8) sets these flags as
+         follows:
+
+            0 - compressor used fastest algorithm
+            1 - compressor used fast algorithm
+            2 - compressor used default algorithm
+            3 - compressor used maximum compression, slowest algorithm
+
+         The information in FLEVEL is not needed for decompression; it
+         is there to indicate if recompression might be worthwhile.
+
+      compressed data
+         For compression method 8, the compressed data is stored in the
+         deflate compressed data format as described in the document
+         "DEFLATE Compressed Data Format Specification" by L. Peter
+         Deutsch. (See reference [3] in Chapter 3, below)
+
+         Other compressed data formats are not specified in this version
+         of the zlib specification.
+
+      ADLER32 (Adler-32 checksum)
+         This contains a checksum value of the uncompressed data
+         (excluding any dictionary data) computed according to Adler-32
+         algorithm. This algorithm is a 32-bit extension and improvement
+         of the Fletcher algorithm, used in the ITU-T X.224 / ISO 8073
+         standard. See references [4] and [5] in Chapter 3, below)
+
+         Adler-32 is composed of two sums accumulated per byte: s1 is
+         the sum of all bytes, s2 is the sum of all s1 values. Both sums
+         are done modulo 65521. s1 is initialized to 1, s2 to zero.  The
+         Adler-32 checksum is stored as s2*65536 + s1 in most-
+         significant-byte first (network) order.
+
+
+
+
+
+
+
+
+Deutsch & Gailly             Informational                      [Page 6]
+
+RFC 1950       ZLIB Compressed Data Format Specification        May 1996
+
+
+   2.3. Compliance
+
+      A compliant compressor must produce streams with correct CMF, FLG
+      and ADLER32, but need not support preset dictionaries.  When the
+      zlib data format is used as part of another standard data format,
+      the compressor may use only preset dictionaries that are specified
+      by this other data format.  If this other format does not use the
+      preset dictionary feature, the compressor must not set the FDICT
+      flag.
+
+      A compliant decompressor must check CMF, FLG, and ADLER32, and
+      provide an error indication if any of these have incorrect values.
+      A compliant decompressor must give an error indication if CM is
+      not one of the values defined in this specification (only the
+      value 8 is permitted in this version), since another value could
+      indicate the presence of new features that would cause subsequent
+      data to be interpreted incorrectly.  A compliant decompressor must
+      give an error indication if FDICT is set and DICTID is not the
+      identifier of a known preset dictionary.  A decompressor may
+      ignore FLEVEL and still be compliant.  When the zlib data format
+      is being used as a part of another standard format, a compliant
+      decompressor must support all the preset dictionaries specified by
+      the other format. When the other format does not use the preset
+      dictionary feature, a compliant decompressor must reject any
+      stream in which the FDICT flag is set.
+
+3. References
+
+   [1] Deutsch, L.P.,"GZIP Compressed Data Format Specification",
+       available in ftp://ftp.uu.net/pub/archiving/zip/doc/
+
+   [2] Thomas Boutell, "PNG (Portable Network Graphics) specification",
+       available in ftp://ftp.uu.net/graphics/png/documents/
+
+   [3] Deutsch, L.P.,"DEFLATE Compressed Data Format Specification",
+       available in ftp://ftp.uu.net/pub/archiving/zip/doc/
+
+   [4] Fletcher, J. G., "An Arithmetic Checksum for Serial
+       Transmissions," IEEE Transactions on Communications, Vol. COM-30,
+       No. 1, January 1982, pp. 247-252.
+
+   [5] ITU-T Recommendation X.224, Annex D, "Checksum Algorithms,"
+       November, 1993, pp. 144, 145. (Available from
+       gopher://info.itu.ch). ITU-T X.244 is also the same as ISO 8073.
+
+
+
+
+
+
+
+Deutsch & Gailly             Informational                      [Page 7]
+
+RFC 1950       ZLIB Compressed Data Format Specification        May 1996
+
+
+4. Source code
+
+   Source code for a C language implementation of a "zlib" compliant
+   library is available at ftp://ftp.uu.net/pub/archiving/zip/zlib/.
+
+5. Security Considerations
+
+   A decoder that fails to check the ADLER32 checksum value may be
+   subject to undetected data corruption.
+
+6. Acknowledgements
+
+   Trademarks cited in this document are the property of their
+   respective owners.
+
+   Jean-Loup Gailly and Mark Adler designed the zlib format and wrote
+   the related software described in this specification.  Glenn
+   Randers-Pehrson converted this document to RFC and HTML format.
+
+7. Authors' Addresses
+
+   L. Peter Deutsch
+   Aladdin Enterprises
+   203 Santa Margarita Ave.
+   Menlo Park, CA 94025
+
+   Phone: (415) 322-0103 (AM only)
+   FAX:   (415) 322-1734
+   EMail: <ghost@aladdin.com>
+
+
+   Jean-Loup Gailly
+
+   EMail: <gzip@prep.ai.mit.edu>
+
+   Questions about the technical content of this specification can be
+   sent by email to
+
+   Jean-Loup Gailly <gzip@prep.ai.mit.edu> and
+   Mark Adler <madler@alumni.caltech.edu>
+
+   Editorial comments on this specification can be sent by email to
+
+   L. Peter Deutsch <ghost@aladdin.com> and
+   Glenn Randers-Pehrson <randeg@alumni.rpi.edu>
+
+
+
+
+
+
+Deutsch & Gailly             Informational                      [Page 8]
+
+RFC 1950       ZLIB Compressed Data Format Specification        May 1996
+
+
+8. Appendix: Rationale
+
+   8.1. Preset dictionaries
+
+      A preset dictionary is specially useful to compress short input
+      sequences. The compressor can take advantage of the dictionary
+      context to encode the input in a more compact manner. The
+      decompressor can be initialized with the appropriate context by
+      virtually decompressing a compressed version of the dictionary
+      without producing any output. However for certain compression
+      algorithms such as the deflate algorithm this operation can be
+      achieved without actually performing any decompression.
+
+      The compressor and the decompressor must use exactly the same
+      dictionary. The dictionary may be fixed or may be chosen among a
+      certain number of predefined dictionaries, according to the kind
+      of input data. The decompressor can determine which dictionary has
+      been chosen by the compressor by checking the dictionary
+      identifier. This document does not specify the contents of
+      predefined dictionaries, since the optimal dictionaries are
+      application specific. Standard data formats using this feature of
+      the zlib specification must precisely define the allowed
+      dictionaries.
+
+   8.2. The Adler-32 algorithm
+
+      The Adler-32 algorithm is much faster than the CRC32 algorithm yet
+      still provides an extremely low probability of undetected errors.
+
+      The modulo on unsigned long accumulators can be delayed for 5552
+      bytes, so the modulo operation time is negligible.  If the bytes
+      are a, b, c, the second sum is 3a + 2b + c + 3, and so is position
+      and order sensitive, unlike the first sum, which is just a
+      checksum.  That 65521 is prime is important to avoid a possible
+      large class of two-byte errors that leave the check unchanged.
+      (The Fletcher checksum uses 255, which is not prime and which also
+      makes the Fletcher check insensitive to single byte changes 0 <->
+      255.)
+
+      The sum s1 is initialized to 1 instead of zero to make the length
+      of the sequence part of s2, so that the length does not have to be
+      checked separately. (Any sequence of zeroes has a Fletcher
+      checksum of zero.)
+
+
+
+
+
+
+
+
+Deutsch & Gailly             Informational                      [Page 9]
+
+RFC 1950       ZLIB Compressed Data Format Specification        May 1996
+
+
+9. Appendix: Sample code
+
+   The following C code computes the Adler-32 checksum of a data buffer.
+   It is written for clarity, not for speed.  The sample code is in the
+   ANSI C programming language. Non C users may find it easier to read
+   with these hints:
+
+      &      Bitwise AND operator.
+      >>     Bitwise right shift operator. When applied to an
+             unsigned quantity, as here, right shift inserts zero bit(s)
+             at the left.
+      <<     Bitwise left shift operator. Left shift inserts zero
+             bit(s) at the right.
+      ++     "n++" increments the variable n.
+      %      modulo operator: a % b is the remainder of a divided by b.
+
+      #define BASE 65521 /* largest prime smaller than 65536 */
+
+      /*
+         Update a running Adler-32 checksum with the bytes buf[0..len-1]
+       and return the updated checksum. The Adler-32 checksum should be
+       initialized to 1.
+
+       Usage example:
+
+         unsigned long adler = 1L;
+
+         while (read_buffer(buffer, length) != EOF) {
+           adler = update_adler32(adler, buffer, length);
+         }
+         if (adler != original_adler) error();
+      */
+      unsigned long update_adler32(unsigned long adler,
+         unsigned char *buf, int len)
+      {
+        unsigned long s1 = adler & 0xffff;
+        unsigned long s2 = (adler >> 16) & 0xffff;
+        int n;
+
+        for (n = 0; n < len; n++) {
+          s1 = (s1 + buf[n]) % BASE;
+          s2 = (s2 + s1)     % BASE;
+        }
+        return (s2 << 16) + s1;
+      }
+
+      /* Return the adler32 of the bytes buf[0..len-1] */
+
+
+
+
+Deutsch & Gailly             Informational                     [Page 10]
+
+RFC 1950       ZLIB Compressed Data Format Specification        May 1996
+
+
+      unsigned long adler32(unsigned char *buf, int len)
+      {
+        return update_adler32(1L, buf, len);
+      }
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Deutsch & Gailly             Informational                     [Page 11]
+
diff --git a/deps/libchdr/deps/zlib-1.2.11/doc/rfc1951.txt b/deps/libchdr/deps/zlib-1.2.11/doc/rfc1951.txt
new file mode 100644
index 00000000..403c8c72
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/doc/rfc1951.txt
@@ -0,0 +1,955 @@
+
+
+
+
+
+
+Network Working Group                                         P. Deutsch
+Request for Comments: 1951                           Aladdin Enterprises
+Category: Informational                                         May 1996
+
+
+        DEFLATE Compressed Data Format Specification version 1.3
+
+Status of This Memo
+
+   This memo provides information for the Internet community.  This memo
+   does not specify an Internet standard of any kind.  Distribution of
+   this memo is unlimited.
+
+IESG Note:
+
+   The IESG takes no position on the validity of any Intellectual
+   Property Rights statements contained in this document.
+
+Notices
+
+   Copyright (c) 1996 L. Peter Deutsch
+
+   Permission is granted to copy and distribute this document for any
+   purpose and without charge, including translations into other
+   languages and incorporation into compilations, provided that the
+   copyright notice and this notice are preserved, and that any
+   substantive changes or deletions from the original are clearly
+   marked.
+
+   A pointer to the latest version of this and related documentation in
+   HTML format can be found at the URL
+   <ftp://ftp.uu.net/graphics/png/documents/zlib/zdoc-index.html>.
+
+Abstract
+
+   This specification defines a lossless compressed data format that
+   compresses data using a combination of the LZ77 algorithm and Huffman
+   coding, with efficiency comparable to the best currently available
+   general-purpose compression methods.  The data can be produced or
+   consumed, even for an arbitrarily long sequentially presented input
+   data stream, using only an a priori bounded amount of intermediate
+   storage.  The format can be implemented readily in a manner not
+   covered by patents.
+
+
+
+
+
+
+
+
+Deutsch                      Informational                      [Page 1]
+
+RFC 1951      DEFLATE Compressed Data Format Specification      May 1996
+
+
+Table of Contents
+
+   1. Introduction ................................................... 2
+      1.1. Purpose ................................................... 2
+      1.2. Intended audience ......................................... 3
+      1.3. Scope ..................................................... 3
+      1.4. Compliance ................................................ 3
+      1.5.  Definitions of terms and conventions used ................ 3
+      1.6. Changes from previous versions ............................ 4
+   2. Compressed representation overview ............................. 4
+   3. Detailed specification ......................................... 5
+      3.1. Overall conventions ....................................... 5
+          3.1.1. Packing into bytes .................................. 5
+      3.2. Compressed block format ................................... 6
+          3.2.1. Synopsis of prefix and Huffman coding ............... 6
+          3.2.2. Use of Huffman coding in the "deflate" format ....... 7
+          3.2.3. Details of block format ............................. 9
+          3.2.4. Non-compressed blocks (BTYPE=00) ................... 11
+          3.2.5. Compressed blocks (length and distance codes) ...... 11
+          3.2.6. Compression with fixed Huffman codes (BTYPE=01) .... 12
+          3.2.7. Compression with dynamic Huffman codes (BTYPE=10) .. 13
+      3.3. Compliance ............................................... 14
+   4. Compression algorithm details ................................. 14
+   5. References .................................................... 16
+   6. Security Considerations ....................................... 16
+   7. Source code ................................................... 16
+   8. Acknowledgements .............................................. 16
+   9. Author's Address .............................................. 17
+
+1. Introduction
+
+   1.1. Purpose
+
+      The purpose of this specification is to define a lossless
+      compressed data format that:
+          * Is independent of CPU type, operating system, file system,
+            and character set, and hence can be used for interchange;
+          * Can be produced or consumed, even for an arbitrarily long
+            sequentially presented input data stream, using only an a
+            priori bounded amount of intermediate storage, and hence
+            can be used in data communications or similar structures
+            such as Unix filters;
+          * Compresses data with efficiency comparable to the best
+            currently available general-purpose compression methods,
+            and in particular considerably better than the "compress"
+            program;
+          * Can be implemented readily in a manner not covered by
+            patents, and hence can be practiced freely;
+
+
+
+Deutsch                      Informational                      [Page 2]
+
+RFC 1951      DEFLATE Compressed Data Format Specification      May 1996
+
+
+          * Is compatible with the file format produced by the current
+            widely used gzip utility, in that conforming decompressors
+            will be able to read data produced by the existing gzip
+            compressor.
+
+      The data format defined by this specification does not attempt to:
+
+          * Allow random access to compressed data;
+          * Compress specialized data (e.g., raster graphics) as well
+            as the best currently available specialized algorithms.
+
+      A simple counting argument shows that no lossless compression
+      algorithm can compress every possible input data set.  For the
+      format defined here, the worst case expansion is 5 bytes per 32K-
+      byte block, i.e., a size increase of 0.015% for large data sets.
+      English text usually compresses by a factor of 2.5 to 3;
+      executable files usually compress somewhat less; graphical data
+      such as raster images may compress much more.
+
+   1.2. Intended audience
+
+      This specification is intended for use by implementors of software
+      to compress data into "deflate" format and/or decompress data from
+      "deflate" format.
+
+      The text of the specification assumes a basic background in
+      programming at the level of bits and other primitive data
+      representations.  Familiarity with the technique of Huffman coding
+      is helpful but not required.
+
+   1.3. Scope
+
+      The specification specifies a method for representing a sequence
+      of bytes as a (usually shorter) sequence of bits, and a method for
+      packing the latter bit sequence into bytes.
+
+   1.4. Compliance
+
+      Unless otherwise indicated below, a compliant decompressor must be
+      able to accept and decompress any data set that conforms to all
+      the specifications presented here; a compliant compressor must
+      produce data sets that conform to all the specifications presented
+      here.
+
+   1.5.  Definitions of terms and conventions used
+
+      Byte: 8 bits stored or transmitted as a unit (same as an octet).
+      For this specification, a byte is exactly 8 bits, even on machines
+
+
+
+Deutsch                      Informational                      [Page 3]
+
+RFC 1951      DEFLATE Compressed Data Format Specification      May 1996
+
+
+      which store a character on a number of bits different from eight.
+      See below, for the numbering of bits within a byte.
+
+      String: a sequence of arbitrary bytes.
+
+   1.6. Changes from previous versions
+
+      There have been no technical changes to the deflate format since
+      version 1.1 of this specification.  In version 1.2, some
+      terminology was changed.  Version 1.3 is a conversion of the
+      specification to RFC style.
+
+2. Compressed representation overview
+
+   A compressed data set consists of a series of blocks, corresponding
+   to successive blocks of input data.  The block sizes are arbitrary,
+   except that non-compressible blocks are limited to 65,535 bytes.
+
+   Each block is compressed using a combination of the LZ77 algorithm
+   and Huffman coding. The Huffman trees for each block are independent
+   of those for previous or subsequent blocks; the LZ77 algorithm may
+   use a reference to a duplicated string occurring in a previous block,
+   up to 32K input bytes before.
+
+   Each block consists of two parts: a pair of Huffman code trees that
+   describe the representation of the compressed data part, and a
+   compressed data part.  (The Huffman trees themselves are compressed
+   using Huffman encoding.)  The compressed data consists of a series of
+   elements of two types: literal bytes (of strings that have not been
+   detected as duplicated within the previous 32K input bytes), and
+   pointers to duplicated strings, where a pointer is represented as a
+   pair <length, backward distance>.  The representation used in the
+   "deflate" format limits distances to 32K bytes and lengths to 258
+   bytes, but does not limit the size of a block, except for
+   uncompressible blocks, which are limited as noted above.
+
+   Each type of value (literals, distances, and lengths) in the
+   compressed data is represented using a Huffman code, using one code
+   tree for literals and lengths and a separate code tree for distances.
+   The code trees for each block appear in a compact form just before
+   the compressed data for that block.
+
+
+
+
+
+
+
+
+
+
+Deutsch                      Informational                      [Page 4]
+
+RFC 1951      DEFLATE Compressed Data Format Specification      May 1996
+
+
+3. Detailed specification
+
+   3.1. Overall conventions In the diagrams below, a box like this:
+
+         +---+
+         |   | <-- the vertical bars might be missing
+         +---+
+
+      represents one byte; a box like this:
+
+         +==============+
+         |              |
+         +==============+
+
+      represents a variable number of bytes.
+
+      Bytes stored within a computer do not have a "bit order", since
+      they are always treated as a unit.  However, a byte considered as
+      an integer between 0 and 255 does have a most- and least-
+      significant bit, and since we write numbers with the most-
+      significant digit on the left, we also write bytes with the most-
+      significant bit on the left.  In the diagrams below, we number the
+      bits of a byte so that bit 0 is the least-significant bit, i.e.,
+      the bits are numbered:
+
+         +--------+
+         |76543210|
+         +--------+
+
+      Within a computer, a number may occupy multiple bytes.  All
+      multi-byte numbers in the format described here are stored with
+      the least-significant byte first (at the lower memory address).
+      For example, the decimal number 520 is stored as:
+
+             0        1
+         +--------+--------+
+         |00001000|00000010|
+         +--------+--------+
+          ^        ^
+          |        |
+          |        + more significant byte = 2 x 256
+          + less significant byte = 8
+
+      3.1.1. Packing into bytes
+
+         This document does not address the issue of the order in which
+         bits of a byte are transmitted on a bit-sequential medium,
+         since the final data format described here is byte- rather than
+
+
+
+Deutsch                      Informational                      [Page 5]
+
+RFC 1951      DEFLATE Compressed Data Format Specification      May 1996
+
+
+         bit-oriented.  However, we describe the compressed block format
+         in below, as a sequence of data elements of various bit
+         lengths, not a sequence of bytes.  We must therefore specify
+         how to pack these data elements into bytes to form the final
+         compressed byte sequence:
+
+             * Data elements are packed into bytes in order of
+               increasing bit number within the byte, i.e., starting
+               with the least-significant bit of the byte.
+             * Data elements other than Huffman codes are packed
+               starting with the least-significant bit of the data
+               element.
+             * Huffman codes are packed starting with the most-
+               significant bit of the code.
+
+         In other words, if one were to print out the compressed data as
+         a sequence of bytes, starting with the first byte at the
+         *right* margin and proceeding to the *left*, with the most-
+         significant bit of each byte on the left as usual, one would be
+         able to parse the result from right to left, with fixed-width
+         elements in the correct MSB-to-LSB order and Huffman codes in
+         bit-reversed order (i.e., with the first bit of the code in the
+         relative LSB position).
+
+   3.2. Compressed block format
+
+      3.2.1. Synopsis of prefix and Huffman coding
+
+         Prefix coding represents symbols from an a priori known
+         alphabet by bit sequences (codes), one code for each symbol, in
+         a manner such that different symbols may be represented by bit
+         sequences of different lengths, but a parser can always parse
+         an encoded string unambiguously symbol-by-symbol.
+
+         We define a prefix code in terms of a binary tree in which the
+         two edges descending from each non-leaf node are labeled 0 and
+         1 and in which the leaf nodes correspond one-for-one with (are
+         labeled with) the symbols of the alphabet; then the code for a
+         symbol is the sequence of 0's and 1's on the edges leading from
+         the root to the leaf labeled with that symbol.  For example:
+
+
+
+
+
+
+
+
+
+
+
+Deutsch                      Informational                      [Page 6]
+
+RFC 1951      DEFLATE Compressed Data Format Specification      May 1996
+
+
+                          /\              Symbol    Code
+                         0  1             ------    ----
+                        /    \                A      00
+                       /\     B               B       1
+                      0  1                    C     011
+                     /    \                   D     010
+                    A     /\
+                         0  1
+                        /    \
+                       D      C
+
+         A parser can decode the next symbol from an encoded input
+         stream by walking down the tree from the root, at each step
+         choosing the edge corresponding to the next input bit.
+
+         Given an alphabet with known symbol frequencies, the Huffman
+         algorithm allows the construction of an optimal prefix code
+         (one which represents strings with those symbol frequencies
+         using the fewest bits of any possible prefix codes for that
+         alphabet).  Such a code is called a Huffman code.  (See
+         reference [1] in Chapter 5, references for additional
+         information on Huffman codes.)
+
+         Note that in the "deflate" format, the Huffman codes for the
+         various alphabets must not exceed certain maximum code lengths.
+         This constraint complicates the algorithm for computing code
+         lengths from symbol frequencies.  Again, see Chapter 5,
+         references for details.
+
+      3.2.2. Use of Huffman coding in the "deflate" format
+
+         The Huffman codes used for each alphabet in the "deflate"
+         format have two additional rules:
+
+             * All codes of a given bit length have lexicographically
+               consecutive values, in the same order as the symbols
+               they represent;
+
+             * Shorter codes lexicographically precede longer codes.
+
+
+
+
+
+
+
+
+
+
+
+
+Deutsch                      Informational                      [Page 7]
+
+RFC 1951      DEFLATE Compressed Data Format Specification      May 1996
+
+
+         We could recode the example above to follow this rule as
+         follows, assuming that the order of the alphabet is ABCD:
+
+            Symbol  Code
+            ------  ----
+            A       10
+            B       0
+            C       110
+            D       111
+
+         I.e., 0 precedes 10 which precedes 11x, and 110 and 111 are
+         lexicographically consecutive.
+
+         Given this rule, we can define the Huffman code for an alphabet
+         just by giving the bit lengths of the codes for each symbol of
+         the alphabet in order; this is sufficient to determine the
+         actual codes.  In our example, the code is completely defined
+         by the sequence of bit lengths (2, 1, 3, 3).  The following
+         algorithm generates the codes as integers, intended to be read
+         from most- to least-significant bit.  The code lengths are
+         initially in tree[I].Len; the codes are produced in
+         tree[I].Code.
+
+         1)  Count the number of codes for each code length.  Let
+             bl_count[N] be the number of codes of length N, N >= 1.
+
+         2)  Find the numerical value of the smallest code for each
+             code length:
+
+                code = 0;
+                bl_count[0] = 0;
+                for (bits = 1; bits <= MAX_BITS; bits++) {
+                    code = (code + bl_count[bits-1]) << 1;
+                    next_code[bits] = code;
+                }
+
+         3)  Assign numerical values to all codes, using consecutive
+             values for all codes of the same length with the base
+             values determined at step 2. Codes that are never used
+             (which have a bit length of zero) must not be assigned a
+             value.
+
+                for (n = 0;  n <= max_code; n++) {
+                    len = tree[n].Len;
+                    if (len != 0) {
+                        tree[n].Code = next_code[len];
+                        next_code[len]++;
+                    }
+
+
+
+Deutsch                      Informational                      [Page 8]
+
+RFC 1951      DEFLATE Compressed Data Format Specification      May 1996
+
+
+                }
+
+         Example:
+
+         Consider the alphabet ABCDEFGH, with bit lengths (3, 3, 3, 3,
+         3, 2, 4, 4).  After step 1, we have:
+
+            N      bl_count[N]
+            -      -----------
+            2      1
+            3      5
+            4      2
+
+         Step 2 computes the following next_code values:
+
+            N      next_code[N]
+            -      ------------
+            1      0
+            2      0
+            3      2
+            4      14
+
+         Step 3 produces the following code values:
+
+            Symbol Length   Code
+            ------ ------   ----
+            A       3        010
+            B       3        011
+            C       3        100
+            D       3        101
+            E       3        110
+            F       2         00
+            G       4       1110
+            H       4       1111
+
+      3.2.3. Details of block format
+
+         Each block of compressed data begins with 3 header bits
+         containing the following data:
+
+            first bit       BFINAL
+            next 2 bits     BTYPE
+
+         Note that the header bits do not necessarily begin on a byte
+         boundary, since a block does not necessarily occupy an integral
+         number of bytes.
+
+
+
+
+
+Deutsch                      Informational                      [Page 9]
+
+RFC 1951      DEFLATE Compressed Data Format Specification      May 1996
+
+
+         BFINAL is set if and only if this is the last block of the data
+         set.
+
+         BTYPE specifies how the data are compressed, as follows:
+
+            00 - no compression
+            01 - compressed with fixed Huffman codes
+            10 - compressed with dynamic Huffman codes
+            11 - reserved (error)
+
+         The only difference between the two compressed cases is how the
+         Huffman codes for the literal/length and distance alphabets are
+         defined.
+
+         In all cases, the decoding algorithm for the actual data is as
+         follows:
+
+            do
+               read block header from input stream.
+               if stored with no compression
+                  skip any remaining bits in current partially
+                     processed byte
+                  read LEN and NLEN (see next section)
+                  copy LEN bytes of data to output
+               otherwise
+                  if compressed with dynamic Huffman codes
+                     read representation of code trees (see
+                        subsection below)
+                  loop (until end of block code recognized)
+                     decode literal/length value from input stream
+                     if value < 256
+                        copy value (literal byte) to output stream
+                     otherwise
+                        if value = end of block (256)
+                           break from loop
+                        otherwise (value = 257..285)
+                           decode distance from input stream
+
+                           move backwards distance bytes in the output
+                           stream, and copy length bytes from this
+                           position to the output stream.
+                  end loop
+            while not last block
+
+         Note that a duplicated string reference may refer to a string
+         in a previous block; i.e., the backward distance may cross one
+         or more block boundaries.  However a distance cannot refer past
+         the beginning of the output stream.  (An application using a
+
+
+
+Deutsch                      Informational                     [Page 10]
+
+RFC 1951      DEFLATE Compressed Data Format Specification      May 1996
+
+
+         preset dictionary might discard part of the output stream; a
+         distance can refer to that part of the output stream anyway)
+         Note also that the referenced string may overlap the current
+         position; for example, if the last 2 bytes decoded have values
+         X and Y, a string reference with <length = 5, distance = 2>
+         adds X,Y,X,Y,X to the output stream.
+
+         We now specify each compression method in turn.
+
+      3.2.4. Non-compressed blocks (BTYPE=00)
+
+         Any bits of input up to the next byte boundary are ignored.
+         The rest of the block consists of the following information:
+
+              0   1   2   3   4...
+            +---+---+---+---+================================+
+            |  LEN  | NLEN  |... LEN bytes of literal data...|
+            +---+---+---+---+================================+
+
+         LEN is the number of data bytes in the block.  NLEN is the
+         one's complement of LEN.
+
+      3.2.5. Compressed blocks (length and distance codes)
+
+         As noted above, encoded data blocks in the "deflate" format
+         consist of sequences of symbols drawn from three conceptually
+         distinct alphabets: either literal bytes, from the alphabet of
+         byte values (0..255), or <length, backward distance> pairs,
+         where the length is drawn from (3..258) and the distance is
+         drawn from (1..32,768).  In fact, the literal and length
+         alphabets are merged into a single alphabet (0..285), where
+         values 0..255 represent literal bytes, the value 256 indicates
+         end-of-block, and values 257..285 represent length codes
+         (possibly in conjunction with extra bits following the symbol
+         code) as follows:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Deutsch                      Informational                     [Page 11]
+
+RFC 1951      DEFLATE Compressed Data Format Specification      May 1996
+
+
+                 Extra               Extra               Extra
+            Code Bits Length(s) Code Bits Lengths   Code Bits Length(s)
+            ---- ---- ------     ---- ---- -------   ---- ---- -------
+             257   0     3       267   1   15,16     277   4   67-82
+             258   0     4       268   1   17,18     278   4   83-98
+             259   0     5       269   2   19-22     279   4   99-114
+             260   0     6       270   2   23-26     280   4  115-130
+             261   0     7       271   2   27-30     281   5  131-162
+             262   0     8       272   2   31-34     282   5  163-194
+             263   0     9       273   3   35-42     283   5  195-226
+             264   0    10       274   3   43-50     284   5  227-257
+             265   1  11,12      275   3   51-58     285   0    258
+             266   1  13,14      276   3   59-66
+
+         The extra bits should be interpreted as a machine integer
+         stored with the most-significant bit first, e.g., bits 1110
+         represent the value 14.
+
+                  Extra           Extra               Extra
+             Code Bits Dist  Code Bits   Dist     Code Bits Distance
+             ---- ---- ----  ---- ----  ------    ---- ---- --------
+               0   0    1     10   4     33-48    20    9   1025-1536
+               1   0    2     11   4     49-64    21    9   1537-2048
+               2   0    3     12   5     65-96    22   10   2049-3072
+               3   0    4     13   5     97-128   23   10   3073-4096
+               4   1   5,6    14   6    129-192   24   11   4097-6144
+               5   1   7,8    15   6    193-256   25   11   6145-8192
+               6   2   9-12   16   7    257-384   26   12  8193-12288
+               7   2  13-16   17   7    385-512   27   12 12289-16384
+               8   3  17-24   18   8    513-768   28   13 16385-24576
+               9   3  25-32   19   8   769-1024   29   13 24577-32768
+
+      3.2.6. Compression with fixed Huffman codes (BTYPE=01)
+
+         The Huffman codes for the two alphabets are fixed, and are not
+         represented explicitly in the data.  The Huffman code lengths
+         for the literal/length alphabet are:
+
+                   Lit Value    Bits        Codes
+                   ---------    ----        -----
+                     0 - 143     8          00110000 through
+                                            10111111
+                   144 - 255     9          110010000 through
+                                            111111111
+                   256 - 279     7          0000000 through
+                                            0010111
+                   280 - 287     8          11000000 through
+                                            11000111
+
+
+
+Deutsch                      Informational                     [Page 12]
+
+RFC 1951      DEFLATE Compressed Data Format Specification      May 1996
+
+
+         The code lengths are sufficient to generate the actual codes,
+         as described above; we show the codes in the table for added
+         clarity.  Literal/length values 286-287 will never actually
+         occur in the compressed data, but participate in the code
+         construction.
+
+         Distance codes 0-31 are represented by (fixed-length) 5-bit
+         codes, with possible additional bits as shown in the table
+         shown in Paragraph 3.2.5, above.  Note that distance codes 30-
+         31 will never actually occur in the compressed data.
+
+      3.2.7. Compression with dynamic Huffman codes (BTYPE=10)
+
+         The Huffman codes for the two alphabets appear in the block
+         immediately after the header bits and before the actual
+         compressed data, first the literal/length code and then the
+         distance code.  Each code is defined by a sequence of code
+         lengths, as discussed in Paragraph 3.2.2, above.  For even
+         greater compactness, the code length sequences themselves are
+         compressed using a Huffman code.  The alphabet for code lengths
+         is as follows:
+
+               0 - 15: Represent code lengths of 0 - 15
+                   16: Copy the previous code length 3 - 6 times.
+                       The next 2 bits indicate repeat length
+                             (0 = 3, ... , 3 = 6)
+                          Example:  Codes 8, 16 (+2 bits 11),
+                                    16 (+2 bits 10) will expand to
+                                    12 code lengths of 8 (1 + 6 + 5)
+                   17: Repeat a code length of 0 for 3 - 10 times.
+                       (3 bits of length)
+                   18: Repeat a code length of 0 for 11 - 138 times
+                       (7 bits of length)
+
+         A code length of 0 indicates that the corresponding symbol in
+         the literal/length or distance alphabet will not occur in the
+         block, and should not participate in the Huffman code
+         construction algorithm given earlier.  If only one distance
+         code is used, it is encoded using one bit, not zero bits; in
+         this case there is a single code length of one, with one unused
+         code.  One distance code of zero bits means that there are no
+         distance codes used at all (the data is all literals).
+
+         We can now define the format of the block:
+
+               5 Bits: HLIT, # of Literal/Length codes - 257 (257 - 286)
+               5 Bits: HDIST, # of Distance codes - 1        (1 - 32)
+               4 Bits: HCLEN, # of Code Length codes - 4     (4 - 19)
+
+
+
+Deutsch                      Informational                     [Page 13]
+
+RFC 1951      DEFLATE Compressed Data Format Specification      May 1996
+
+
+               (HCLEN + 4) x 3 bits: code lengths for the code length
+                  alphabet given just above, in the order: 16, 17, 18,
+                  0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
+
+                  These code lengths are interpreted as 3-bit integers
+                  (0-7); as above, a code length of 0 means the
+                  corresponding symbol (literal/length or distance code
+                  length) is not used.
+
+               HLIT + 257 code lengths for the literal/length alphabet,
+                  encoded using the code length Huffman code
+
+               HDIST + 1 code lengths for the distance alphabet,
+                  encoded using the code length Huffman code
+
+               The actual compressed data of the block,
+                  encoded using the literal/length and distance Huffman
+                  codes
+
+               The literal/length symbol 256 (end of data),
+                  encoded using the literal/length Huffman code
+
+         The code length repeat codes can cross from HLIT + 257 to the
+         HDIST + 1 code lengths.  In other words, all code lengths form
+         a single sequence of HLIT + HDIST + 258 values.
+
+   3.3. Compliance
+
+      A compressor may limit further the ranges of values specified in
+      the previous section and still be compliant; for example, it may
+      limit the range of backward pointers to some value smaller than
+      32K.  Similarly, a compressor may limit the size of blocks so that
+      a compressible block fits in memory.
+
+      A compliant decompressor must accept the full range of possible
+      values defined in the previous section, and must accept blocks of
+      arbitrary size.
+
+4. Compression algorithm details
+
+   While it is the intent of this document to define the "deflate"
+   compressed data format without reference to any particular
+   compression algorithm, the format is related to the compressed
+   formats produced by LZ77 (Lempel-Ziv 1977, see reference [2] below);
+   since many variations of LZ77 are patented, it is strongly
+   recommended that the implementor of a compressor follow the general
+   algorithm presented here, which is known not to be patented per se.
+   The material in this section is not part of the definition of the
+
+
+
+Deutsch                      Informational                     [Page 14]
+
+RFC 1951      DEFLATE Compressed Data Format Specification      May 1996
+
+
+   specification per se, and a compressor need not follow it in order to
+   be compliant.
+
+   The compressor terminates a block when it determines that starting a
+   new block with fresh trees would be useful, or when the block size
+   fills up the compressor's block buffer.
+
+   The compressor uses a chained hash table to find duplicated strings,
+   using a hash function that operates on 3-byte sequences.  At any
+   given point during compression, let XYZ be the next 3 input bytes to
+   be examined (not necessarily all different, of course).  First, the
+   compressor examines the hash chain for XYZ.  If the chain is empty,
+   the compressor simply writes out X as a literal byte and advances one
+   byte in the input.  If the hash chain is not empty, indicating that
+   the sequence XYZ (or, if we are unlucky, some other 3 bytes with the
+   same hash function value) has occurred recently, the compressor
+   compares all strings on the XYZ hash chain with the actual input data
+   sequence starting at the current point, and selects the longest
+   match.
+
+   The compressor searches the hash chains starting with the most recent
+   strings, to favor small distances and thus take advantage of the
+   Huffman encoding.  The hash chains are singly linked. There are no
+   deletions from the hash chains; the algorithm simply discards matches
+   that are too old.  To avoid a worst-case situation, very long hash
+   chains are arbitrarily truncated at a certain length, determined by a
+   run-time parameter.
+
+   To improve overall compression, the compressor optionally defers the
+   selection of matches ("lazy matching"): after a match of length N has
+   been found, the compressor searches for a longer match starting at
+   the next input byte.  If it finds a longer match, it truncates the
+   previous match to a length of one (thus producing a single literal
+   byte) and then emits the longer match.  Otherwise, it emits the
+   original match, and, as described above, advances N bytes before
+   continuing.
+
+   Run-time parameters also control this "lazy match" procedure.  If
+   compression ratio is most important, the compressor attempts a
+   complete second search regardless of the length of the first match.
+   In the normal case, if the current match is "long enough", the
+   compressor reduces the search for a longer match, thus speeding up
+   the process.  If speed is most important, the compressor inserts new
+   strings in the hash table only when no match was found, or when the
+   match is not "too long".  This degrades the compression ratio but
+   saves time since there are both fewer insertions and fewer searches.
+
+
+
+
+
+Deutsch                      Informational                     [Page 15]
+
+RFC 1951      DEFLATE Compressed Data Format Specification      May 1996
+
+
+5. References
+
+   [1] Huffman, D. A., "A Method for the Construction of Minimum
+       Redundancy Codes", Proceedings of the Institute of Radio
+       Engineers, September 1952, Volume 40, Number 9, pp. 1098-1101.
+
+   [2] Ziv J., Lempel A., "A Universal Algorithm for Sequential Data
+       Compression", IEEE Transactions on Information Theory, Vol. 23,
+       No. 3, pp. 337-343.
+
+   [3] Gailly, J.-L., and Adler, M., ZLIB documentation and sources,
+       available in ftp://ftp.uu.net/pub/archiving/zip/doc/
+
+   [4] Gailly, J.-L., and Adler, M., GZIP documentation and sources,
+       available as gzip-*.tar in ftp://prep.ai.mit.edu/pub/gnu/
+
+   [5] Schwartz, E. S., and Kallick, B. "Generating a canonical prefix
+       encoding." Comm. ACM, 7,3 (Mar. 1964), pp. 166-169.
+
+   [6] Hirschberg and Lelewer, "Efficient decoding of prefix codes,"
+       Comm. ACM, 33,4, April 1990, pp. 449-459.
+
+6. Security Considerations
+
+   Any data compression method involves the reduction of redundancy in
+   the data.  Consequently, any corruption of the data is likely to have
+   severe effects and be difficult to correct.  Uncompressed text, on
+   the other hand, will probably still be readable despite the presence
+   of some corrupted bytes.
+
+   It is recommended that systems using this data format provide some
+   means of validating the integrity of the compressed data.  See
+   reference [3], for example.
+
+7. Source code
+
+   Source code for a C language implementation of a "deflate" compliant
+   compressor and decompressor is available within the zlib package at
+   ftp://ftp.uu.net/pub/archiving/zip/zlib/.
+
+8. Acknowledgements
+
+   Trademarks cited in this document are the property of their
+   respective owners.
+
+   Phil Katz designed the deflate format.  Jean-Loup Gailly and Mark
+   Adler wrote the related software described in this specification.
+   Glenn Randers-Pehrson converted this document to RFC and HTML format.
+
+
+
+Deutsch                      Informational                     [Page 16]
+
+RFC 1951      DEFLATE Compressed Data Format Specification      May 1996
+
+
+9. Author's Address
+
+   L. Peter Deutsch
+   Aladdin Enterprises
+   203 Santa Margarita Ave.
+   Menlo Park, CA 94025
+
+   Phone: (415) 322-0103 (AM only)
+   FAX:   (415) 322-1734
+   EMail: <ghost@aladdin.com>
+
+   Questions about the technical content of this specification can be
+   sent by email to:
+
+   Jean-Loup Gailly <gzip@prep.ai.mit.edu> and
+   Mark Adler <madler@alumni.caltech.edu>
+
+   Editorial comments on this specification can be sent by email to:
+
+   L. Peter Deutsch <ghost@aladdin.com> and
+   Glenn Randers-Pehrson <randeg@alumni.rpi.edu>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Deutsch                      Informational                     [Page 17]
+
diff --git a/deps/libchdr/deps/zlib-1.2.11/doc/rfc1952.txt b/deps/libchdr/deps/zlib-1.2.11/doc/rfc1952.txt
new file mode 100644
index 00000000..a8e51b45
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/doc/rfc1952.txt
@@ -0,0 +1,675 @@
+
+
+
+
+
+
+Network Working Group                                         P. Deutsch
+Request for Comments: 1952                           Aladdin Enterprises
+Category: Informational                                         May 1996
+
+
+               GZIP file format specification version 4.3
+
+Status of This Memo
+
+   This memo provides information for the Internet community.  This memo
+   does not specify an Internet standard of any kind.  Distribution of
+   this memo is unlimited.
+
+IESG Note:
+
+   The IESG takes no position on the validity of any Intellectual
+   Property Rights statements contained in this document.
+
+Notices
+
+   Copyright (c) 1996 L. Peter Deutsch
+
+   Permission is granted to copy and distribute this document for any
+   purpose and without charge, including translations into other
+   languages and incorporation into compilations, provided that the
+   copyright notice and this notice are preserved, and that any
+   substantive changes or deletions from the original are clearly
+   marked.
+
+   A pointer to the latest version of this and related documentation in
+   HTML format can be found at the URL
+   <ftp://ftp.uu.net/graphics/png/documents/zlib/zdoc-index.html>.
+
+Abstract
+
+   This specification defines a lossless compressed data format that is
+   compatible with the widely used GZIP utility.  The format includes a
+   cyclic redundancy check value for detecting data corruption.  The
+   format presently uses the DEFLATE method of compression but can be
+   easily extended to use other compression methods.  The format can be
+   implemented readily in a manner not covered by patents.
+
+
+
+
+
+
+
+
+
+
+Deutsch                      Informational                      [Page 1]
+
+RFC 1952             GZIP File Format Specification             May 1996
+
+
+Table of Contents
+
+   1. Introduction ................................................... 2
+      1.1. Purpose ................................................... 2
+      1.2. Intended audience ......................................... 3
+      1.3. Scope ..................................................... 3
+      1.4. Compliance ................................................ 3
+      1.5. Definitions of terms and conventions used ................. 3
+      1.6. Changes from previous versions ............................ 3
+   2. Detailed specification ......................................... 4
+      2.1. Overall conventions ....................................... 4
+      2.2. File format ............................................... 5
+      2.3. Member format ............................................. 5
+          2.3.1. Member header and trailer ........................... 6
+              2.3.1.1. Extra field ................................... 8
+              2.3.1.2. Compliance .................................... 9
+      3. References .................................................. 9
+      4. Security Considerations .................................... 10
+      5. Acknowledgements ........................................... 10
+      6. Author's Address ........................................... 10
+      7. Appendix: Jean-Loup Gailly's gzip utility .................. 11
+      8. Appendix: Sample CRC Code .................................. 11
+
+1. Introduction
+
+   1.1. Purpose
+
+      The purpose of this specification is to define a lossless
+      compressed data format that:
+
+          * Is independent of CPU type, operating system, file system,
+            and character set, and hence can be used for interchange;
+          * Can compress or decompress a data stream (as opposed to a
+            randomly accessible file) to produce another data stream,
+            using only an a priori bounded amount of intermediate
+            storage, and hence can be used in data communications or
+            similar structures such as Unix filters;
+          * Compresses data with efficiency comparable to the best
+            currently available general-purpose compression methods,
+            and in particular considerably better than the "compress"
+            program;
+          * Can be implemented readily in a manner not covered by
+            patents, and hence can be practiced freely;
+          * Is compatible with the file format produced by the current
+            widely used gzip utility, in that conforming decompressors
+            will be able to read data produced by the existing gzip
+            compressor.
+
+
+
+
+Deutsch                      Informational                      [Page 2]
+
+RFC 1952             GZIP File Format Specification             May 1996
+
+
+      The data format defined by this specification does not attempt to:
+
+          * Provide random access to compressed data;
+          * Compress specialized data (e.g., raster graphics) as well as
+            the best currently available specialized algorithms.
+
+   1.2. Intended audience
+
+      This specification is intended for use by implementors of software
+      to compress data into gzip format and/or decompress data from gzip
+      format.
+
+      The text of the specification assumes a basic background in
+      programming at the level of bits and other primitive data
+      representations.
+
+   1.3. Scope
+
+      The specification specifies a compression method and a file format
+      (the latter assuming only that a file can store a sequence of
+      arbitrary bytes).  It does not specify any particular interface to
+      a file system or anything about character sets or encodings
+      (except for file names and comments, which are optional).
+
+   1.4. Compliance
+
+      Unless otherwise indicated below, a compliant decompressor must be
+      able to accept and decompress any file that conforms to all the
+      specifications presented here; a compliant compressor must produce
+      files that conform to all the specifications presented here.  The
+      material in the appendices is not part of the specification per se
+      and is not relevant to compliance.
+
+   1.5. Definitions of terms and conventions used
+
+      byte: 8 bits stored or transmitted as a unit (same as an octet).
+      (For this specification, a byte is exactly 8 bits, even on
+      machines which store a character on a number of bits different
+      from 8.)  See below for the numbering of bits within a byte.
+
+   1.6. Changes from previous versions
+
+      There have been no technical changes to the gzip format since
+      version 4.1 of this specification.  In version 4.2, some
+      terminology was changed, and the sample CRC code was rewritten for
+      clarity and to eliminate the requirement for the caller to do pre-
+      and post-conditioning.  Version 4.3 is a conversion of the
+      specification to RFC style.
+
+
+
+Deutsch                      Informational                      [Page 3]
+
+RFC 1952             GZIP File Format Specification             May 1996
+
+
+2. Detailed specification
+
+   2.1. Overall conventions
+
+      In the diagrams below, a box like this:
+
+         +---+
+         |   | <-- the vertical bars might be missing
+         +---+
+
+      represents one byte; a box like this:
+
+         +==============+
+         |              |
+         +==============+
+
+      represents a variable number of bytes.
+
+      Bytes stored within a computer do not have a "bit order", since
+      they are always treated as a unit.  However, a byte considered as
+      an integer between 0 and 255 does have a most- and least-
+      significant bit, and since we write numbers with the most-
+      significant digit on the left, we also write bytes with the most-
+      significant bit on the left.  In the diagrams below, we number the
+      bits of a byte so that bit 0 is the least-significant bit, i.e.,
+      the bits are numbered:
+
+         +--------+
+         |76543210|
+         +--------+
+
+      This document does not address the issue of the order in which
+      bits of a byte are transmitted on a bit-sequential medium, since
+      the data format described here is byte- rather than bit-oriented.
+
+      Within a computer, a number may occupy multiple bytes.  All
+      multi-byte numbers in the format described here are stored with
+      the least-significant byte first (at the lower memory address).
+      For example, the decimal number 520 is stored as:
+
+             0        1
+         +--------+--------+
+         |00001000|00000010|
+         +--------+--------+
+          ^        ^
+          |        |
+          |        + more significant byte = 2 x 256
+          + less significant byte = 8
+
+
+
+Deutsch                      Informational                      [Page 4]
+
+RFC 1952             GZIP File Format Specification             May 1996
+
+
+   2.2. File format
+
+      A gzip file consists of a series of "members" (compressed data
+      sets).  The format of each member is specified in the following
+      section.  The members simply appear one after another in the file,
+      with no additional information before, between, or after them.
+
+   2.3. Member format
+
+      Each member has the following structure:
+
+         +---+---+---+---+---+---+---+---+---+---+
+         |ID1|ID2|CM |FLG|     MTIME     |XFL|OS | (more-->)
+         +---+---+---+---+---+---+---+---+---+---+
+
+      (if FLG.FEXTRA set)
+
+         +---+---+=================================+
+         | XLEN  |...XLEN bytes of "extra field"...| (more-->)
+         +---+---+=================================+
+
+      (if FLG.FNAME set)
+
+         +=========================================+
+         |...original file name, zero-terminated...| (more-->)
+         +=========================================+
+
+      (if FLG.FCOMMENT set)
+
+         +===================================+
+         |...file comment, zero-terminated...| (more-->)
+         +===================================+
+
+      (if FLG.FHCRC set)
+
+         +---+---+
+         | CRC16 |
+         +---+---+
+
+         +=======================+
+         |...compressed blocks...| (more-->)
+         +=======================+
+
+           0   1   2   3   4   5   6   7
+         +---+---+---+---+---+---+---+---+
+         |     CRC32     |     ISIZE     |
+         +---+---+---+---+---+---+---+---+
+
+
+
+
+Deutsch                      Informational                      [Page 5]
+
+RFC 1952             GZIP File Format Specification             May 1996
+
+
+      2.3.1. Member header and trailer
+
+         ID1 (IDentification 1)
+         ID2 (IDentification 2)
+            These have the fixed values ID1 = 31 (0x1f, \037), ID2 = 139
+            (0x8b, \213), to identify the file as being in gzip format.
+
+         CM (Compression Method)
+            This identifies the compression method used in the file.  CM
+            = 0-7 are reserved.  CM = 8 denotes the "deflate"
+            compression method, which is the one customarily used by
+            gzip and which is documented elsewhere.
+
+         FLG (FLaGs)
+            This flag byte is divided into individual bits as follows:
+
+               bit 0   FTEXT
+               bit 1   FHCRC
+               bit 2   FEXTRA
+               bit 3   FNAME
+               bit 4   FCOMMENT
+               bit 5   reserved
+               bit 6   reserved
+               bit 7   reserved
+
+            If FTEXT is set, the file is probably ASCII text.  This is
+            an optional indication, which the compressor may set by
+            checking a small amount of the input data to see whether any
+            non-ASCII characters are present.  In case of doubt, FTEXT
+            is cleared, indicating binary data. For systems which have
+            different file formats for ascii text and binary data, the
+            decompressor can use FTEXT to choose the appropriate format.
+            We deliberately do not specify the algorithm used to set
+            this bit, since a compressor always has the option of
+            leaving it cleared and a decompressor always has the option
+            of ignoring it and letting some other program handle issues
+            of data conversion.
+
+            If FHCRC is set, a CRC16 for the gzip header is present,
+            immediately before the compressed data. The CRC16 consists
+            of the two least significant bytes of the CRC32 for all
+            bytes of the gzip header up to and not including the CRC16.
+            [The FHCRC bit was never set by versions of gzip up to
+            1.2.4, even though it was documented with a different
+            meaning in gzip 1.2.4.]
+
+            If FEXTRA is set, optional extra fields are present, as
+            described in a following section.
+
+
+
+Deutsch                      Informational                      [Page 6]
+
+RFC 1952             GZIP File Format Specification             May 1996
+
+
+            If FNAME is set, an original file name is present,
+            terminated by a zero byte.  The name must consist of ISO
+            8859-1 (LATIN-1) characters; on operating systems using
+            EBCDIC or any other character set for file names, the name
+            must be translated to the ISO LATIN-1 character set.  This
+            is the original name of the file being compressed, with any
+            directory components removed, and, if the file being
+            compressed is on a file system with case insensitive names,
+            forced to lower case. There is no original file name if the
+            data was compressed from a source other than a named file;
+            for example, if the source was stdin on a Unix system, there
+            is no file name.
+
+            If FCOMMENT is set, a zero-terminated file comment is
+            present.  This comment is not interpreted; it is only
+            intended for human consumption.  The comment must consist of
+            ISO 8859-1 (LATIN-1) characters.  Line breaks should be
+            denoted by a single line feed character (10 decimal).
+
+            Reserved FLG bits must be zero.
+
+         MTIME (Modification TIME)
+            This gives the most recent modification time of the original
+            file being compressed.  The time is in Unix format, i.e.,
+            seconds since 00:00:00 GMT, Jan.  1, 1970.  (Note that this
+            may cause problems for MS-DOS and other systems that use
+            local rather than Universal time.)  If the compressed data
+            did not come from a file, MTIME is set to the time at which
+            compression started.  MTIME = 0 means no time stamp is
+            available.
+
+         XFL (eXtra FLags)
+            These flags are available for use by specific compression
+            methods.  The "deflate" method (CM = 8) sets these flags as
+            follows:
+
+               XFL = 2 - compressor used maximum compression,
+                         slowest algorithm
+               XFL = 4 - compressor used fastest algorithm
+
+         OS (Operating System)
+            This identifies the type of file system on which compression
+            took place.  This may be useful in determining end-of-line
+            convention for text files.  The currently defined values are
+            as follows:
+
+
+
+
+
+
+Deutsch                      Informational                      [Page 7]
+
+RFC 1952             GZIP File Format Specification             May 1996
+
+
+                 0 - FAT filesystem (MS-DOS, OS/2, NT/Win32)
+                 1 - Amiga
+                 2 - VMS (or OpenVMS)
+                 3 - Unix
+                 4 - VM/CMS
+                 5 - Atari TOS
+                 6 - HPFS filesystem (OS/2, NT)
+                 7 - Macintosh
+                 8 - Z-System
+                 9 - CP/M
+                10 - TOPS-20
+                11 - NTFS filesystem (NT)
+                12 - QDOS
+                13 - Acorn RISCOS
+               255 - unknown
+
+         XLEN (eXtra LENgth)
+            If FLG.FEXTRA is set, this gives the length of the optional
+            extra field.  See below for details.
+
+         CRC32 (CRC-32)
+            This contains a Cyclic Redundancy Check value of the
+            uncompressed data computed according to CRC-32 algorithm
+            used in the ISO 3309 standard and in section 8.1.1.6.2 of
+            ITU-T recommendation V.42.  (See http://www.iso.ch for
+            ordering ISO documents. See gopher://info.itu.ch for an
+            online version of ITU-T V.42.)
+
+         ISIZE (Input SIZE)
+            This contains the size of the original (uncompressed) input
+            data modulo 2^32.
+
+      2.3.1.1. Extra field
+
+         If the FLG.FEXTRA bit is set, an "extra field" is present in
+         the header, with total length XLEN bytes.  It consists of a
+         series of subfields, each of the form:
+
+            +---+---+---+---+==================================+
+            |SI1|SI2|  LEN  |... LEN bytes of subfield data ...|
+            +---+---+---+---+==================================+
+
+         SI1 and SI2 provide a subfield ID, typically two ASCII letters
+         with some mnemonic value.  Jean-Loup Gailly
+         <gzip@prep.ai.mit.edu> is maintaining a registry of subfield
+         IDs; please send him any subfield ID you wish to use.  Subfield
+         IDs with SI2 = 0 are reserved for future use.  The following
+         IDs are currently defined:
+
+
+
+Deutsch                      Informational                      [Page 8]
+
+RFC 1952             GZIP File Format Specification             May 1996
+
+
+            SI1         SI2         Data
+            ----------  ----------  ----
+            0x41 ('A')  0x70 ('P')  Apollo file type information
+
+         LEN gives the length of the subfield data, excluding the 4
+         initial bytes.
+
+      2.3.1.2. Compliance
+
+         A compliant compressor must produce files with correct ID1,
+         ID2, CM, CRC32, and ISIZE, but may set all the other fields in
+         the fixed-length part of the header to default values (255 for
+         OS, 0 for all others).  The compressor must set all reserved
+         bits to zero.
+
+         A compliant decompressor must check ID1, ID2, and CM, and
+         provide an error indication if any of these have incorrect
+         values.  It must examine FEXTRA/XLEN, FNAME, FCOMMENT and FHCRC
+         at least so it can skip over the optional fields if they are
+         present.  It need not examine any other part of the header or
+         trailer; in particular, a decompressor may ignore FTEXT and OS
+         and always produce binary output, and still be compliant.  A
+         compliant decompressor must give an error indication if any
+         reserved bit is non-zero, since such a bit could indicate the
+         presence of a new field that would cause subsequent data to be
+         interpreted incorrectly.
+
+3. References
+
+   [1] "Information Processing - 8-bit single-byte coded graphic
+       character sets - Part 1: Latin alphabet No.1" (ISO 8859-1:1987).
+       The ISO 8859-1 (Latin-1) character set is a superset of 7-bit
+       ASCII. Files defining this character set are available as
+       iso_8859-1.* in ftp://ftp.uu.net/graphics/png/documents/
+
+   [2] ISO 3309
+
+   [3] ITU-T recommendation V.42
+
+   [4] Deutsch, L.P.,"DEFLATE Compressed Data Format Specification",
+       available in ftp://ftp.uu.net/pub/archiving/zip/doc/
+
+   [5] Gailly, J.-L., GZIP documentation, available as gzip-*.tar in
+       ftp://prep.ai.mit.edu/pub/gnu/
+
+   [6] Sarwate, D.V., "Computation of Cyclic Redundancy Checks via Table
+       Look-Up", Communications of the ACM, 31(8), pp.1008-1013.
+
+
+
+
+Deutsch                      Informational                      [Page 9]
+
+RFC 1952             GZIP File Format Specification             May 1996
+
+
+   [7] Schwaderer, W.D., "CRC Calculation", April 85 PC Tech Journal,
+       pp.118-133.
+
+   [8] ftp://ftp.adelaide.edu.au/pub/rocksoft/papers/crc_v3.txt,
+       describing the CRC concept.
+
+4. Security Considerations
+
+   Any data compression method involves the reduction of redundancy in
+   the data.  Consequently, any corruption of the data is likely to have
+   severe effects and be difficult to correct.  Uncompressed text, on
+   the other hand, will probably still be readable despite the presence
+   of some corrupted bytes.
+
+   It is recommended that systems using this data format provide some
+   means of validating the integrity of the compressed data, such as by
+   setting and checking the CRC-32 check value.
+
+5. Acknowledgements
+
+   Trademarks cited in this document are the property of their
+   respective owners.
+
+   Jean-Loup Gailly designed the gzip format and wrote, with Mark Adler,
+   the related software described in this specification.  Glenn
+   Randers-Pehrson converted this document to RFC and HTML format.
+
+6. Author's Address
+
+   L. Peter Deutsch
+   Aladdin Enterprises
+   203 Santa Margarita Ave.
+   Menlo Park, CA 94025
+
+   Phone: (415) 322-0103 (AM only)
+   FAX:   (415) 322-1734
+   EMail: <ghost@aladdin.com>
+
+   Questions about the technical content of this specification can be
+   sent by email to:
+
+   Jean-Loup Gailly <gzip@prep.ai.mit.edu> and
+   Mark Adler <madler@alumni.caltech.edu>
+
+   Editorial comments on this specification can be sent by email to:
+
+   L. Peter Deutsch <ghost@aladdin.com> and
+   Glenn Randers-Pehrson <randeg@alumni.rpi.edu>
+
+
+
+Deutsch                      Informational                     [Page 10]
+
+RFC 1952             GZIP File Format Specification             May 1996
+
+
+7. Appendix: Jean-Loup Gailly's gzip utility
+
+   The most widely used implementation of gzip compression, and the
+   original documentation on which this specification is based, were
+   created by Jean-Loup Gailly <gzip@prep.ai.mit.edu>.  Since this
+   implementation is a de facto standard, we mention some more of its
+   features here.  Again, the material in this section is not part of
+   the specification per se, and implementations need not follow it to
+   be compliant.
+
+   When compressing or decompressing a file, gzip preserves the
+   protection, ownership, and modification time attributes on the local
+   file system, since there is no provision for representing protection
+   attributes in the gzip file format itself.  Since the file format
+   includes a modification time, the gzip decompressor provides a
+   command line switch that assigns the modification time from the file,
+   rather than the local modification time of the compressed input, to
+   the decompressed output.
+
+8. Appendix: Sample CRC Code
+
+   The following sample code represents a practical implementation of
+   the CRC (Cyclic Redundancy Check). (See also ISO 3309 and ITU-T V.42
+   for a formal specification.)
+
+   The sample code is in the ANSI C programming language. Non C users
+   may find it easier to read with these hints:
+
+      &      Bitwise AND operator.
+      ^      Bitwise exclusive-OR operator.
+      >>     Bitwise right shift operator. When applied to an
+             unsigned quantity, as here, right shift inserts zero
+             bit(s) at the left.
+      !      Logical NOT operator.
+      ++     "n++" increments the variable n.
+      0xNNN  0x introduces a hexadecimal (base 16) constant.
+             Suffix L indicates a long value (at least 32 bits).
+
+      /* Table of CRCs of all 8-bit messages. */
+      unsigned long crc_table[256];
+
+      /* Flag: has the table been computed? Initially false. */
+      int crc_table_computed = 0;
+
+      /* Make the table for a fast CRC. */
+      void make_crc_table(void)
+      {
+        unsigned long c;
+
+
+
+Deutsch                      Informational                     [Page 11]
+
+RFC 1952             GZIP File Format Specification             May 1996
+
+
+        int n, k;
+        for (n = 0; n < 256; n++) {
+          c = (unsigned long) n;
+          for (k = 0; k < 8; k++) {
+            if (c & 1) {
+              c = 0xedb88320L ^ (c >> 1);
+            } else {
+              c = c >> 1;
+            }
+          }
+          crc_table[n] = c;
+        }
+        crc_table_computed = 1;
+      }
+
+      /*
+         Update a running crc with the bytes buf[0..len-1] and return
+       the updated crc. The crc should be initialized to zero. Pre- and
+       post-conditioning (one's complement) is performed within this
+       function so it shouldn't be done by the caller. Usage example:
+
+         unsigned long crc = 0L;
+
+         while (read_buffer(buffer, length) != EOF) {
+           crc = update_crc(crc, buffer, length);
+         }
+         if (crc != original_crc) error();
+      */
+      unsigned long update_crc(unsigned long crc,
+                      unsigned char *buf, int len)
+      {
+        unsigned long c = crc ^ 0xffffffffL;
+        int n;
+
+        if (!crc_table_computed)
+          make_crc_table();
+        for (n = 0; n < len; n++) {
+          c = crc_table[(c ^ buf[n]) & 0xff] ^ (c >> 8);
+        }
+        return c ^ 0xffffffffL;
+      }
+
+      /* Return the CRC of the bytes buf[0..len-1]. */
+      unsigned long crc(unsigned char *buf, int len)
+      {
+        return update_crc(0L, buf, len);
+      }
+
+
+
+
+Deutsch                      Informational                     [Page 12]
+
diff --git a/deps/libchdr/deps/zlib-1.2.11/doc/txtvsbin.txt b/deps/libchdr/deps/zlib-1.2.11/doc/txtvsbin.txt
new file mode 100644
index 00000000..3d0f0634
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/doc/txtvsbin.txt
@@ -0,0 +1,107 @@
+A Fast Method for Identifying Plain Text Files
+==============================================
+
+
+Introduction
+------------
+
+Given a file coming from an unknown source, it is sometimes desirable
+to find out whether the format of that file is plain text.  Although
+this may appear like a simple task, a fully accurate detection of the
+file type requires heavy-duty semantic analysis on the file contents.
+It is, however, possible to obtain satisfactory results by employing
+various heuristics.
+
+Previous versions of PKZip and other zip-compatible compression tools
+were using a crude detection scheme: if more than 80% (4/5) of the bytes
+found in a certain buffer are within the range [7..127], the file is
+labeled as plain text, otherwise it is labeled as binary.  A prominent
+limitation of this scheme is the restriction to Latin-based alphabets.
+Other alphabets, like Greek, Cyrillic or Asian, make extensive use of
+the bytes within the range [128..255], and texts using these alphabets
+are most often misidentified by this scheme; in other words, the rate
+of false negatives is sometimes too high, which means that the recall
+is low.  Another weakness of this scheme is a reduced precision, due to
+the false positives that may occur when binary files containing large
+amounts of textual characters are misidentified as plain text.
+
+In this article we propose a new, simple detection scheme that features
+a much increased precision and a near-100% recall.  This scheme is
+designed to work on ASCII, Unicode and other ASCII-derived alphabets,
+and it handles single-byte encodings (ISO-8859, MacRoman, KOI8, etc.)
+and variable-sized encodings (ISO-2022, UTF-8, etc.).  Wider encodings
+(UCS-2/UTF-16 and UCS-4/UTF-32) are not handled, however.
+
+
+The Algorithm
+-------------
+
+The algorithm works by dividing the set of bytecodes [0..255] into three
+categories:
+- The white list of textual bytecodes:
+  9 (TAB), 10 (LF), 13 (CR), 32 (SPACE) to 255.
+- The gray list of tolerated bytecodes:
+  7 (BEL), 8 (BS), 11 (VT), 12 (FF), 26 (SUB), 27 (ESC).
+- The black list of undesired, non-textual bytecodes:
+  0 (NUL) to 6, 14 to 31.
+
+If a file contains at least one byte that belongs to the white list and
+no byte that belongs to the black list, then the file is categorized as
+plain text; otherwise, it is categorized as binary.  (The boundary case,
+when the file is empty, automatically falls into the latter category.)
+
+
+Rationale
+---------
+
+The idea behind this algorithm relies on two observations.
+
+The first observation is that, although the full range of 7-bit codes
+[0..127] is properly specified by the ASCII standard, most control
+characters in the range [0..31] are not used in practice.  The only
+widely-used, almost universally-portable control codes are 9 (TAB),
+10 (LF) and 13 (CR).  There are a few more control codes that are
+recognized on a reduced range of platforms and text viewers/editors:
+7 (BEL), 8 (BS), 11 (VT), 12 (FF), 26 (SUB) and 27 (ESC); but these
+codes are rarely (if ever) used alone, without being accompanied by
+some printable text.  Even the newer, portable text formats such as
+XML avoid using control characters outside the list mentioned here.
+
+The second observation is that most of the binary files tend to contain
+control characters, especially 0 (NUL).  Even though the older text
+detection schemes observe the presence of non-ASCII codes from the range
+[128..255], the precision rarely has to suffer if this upper range is
+labeled as textual, because the files that are genuinely binary tend to
+contain both control characters and codes from the upper range.  On the
+other hand, the upper range needs to be labeled as textual, because it
+is used by virtually all ASCII extensions.  In particular, this range is
+used for encoding non-Latin scripts.
+
+Since there is no counting involved, other than simply observing the
+presence or the absence of some byte values, the algorithm produces
+consistent results, regardless what alphabet encoding is being used.
+(If counting were involved, it could be possible to obtain different
+results on a text encoded, say, using ISO-8859-16 versus UTF-8.)
+
+There is an extra category of plain text files that are "polluted" with
+one or more black-listed codes, either by mistake or by peculiar design
+considerations.  In such cases, a scheme that tolerates a small fraction
+of black-listed codes would provide an increased recall (i.e. more true
+positives).  This, however, incurs a reduced precision overall, since
+false positives are more likely to appear in binary files that contain
+large chunks of textual data.  Furthermore, "polluted" plain text should
+be regarded as binary by general-purpose text detection schemes, because
+general-purpose text processing algorithms might not be applicable.
+Under this premise, it is safe to say that our detection method provides
+a near-100% recall.
+
+Experiments have been run on many files coming from various platforms
+and applications.  We tried plain text files, system logs, source code,
+formatted office documents, compiled object code, etc.  The results
+confirm the optimistic assumptions about the capabilities of this
+algorithm.
+
+
+--
+Cosmin Truta
+Last updated: 2006-May-28
diff --git a/deps/libchdr/deps/zlib-1.2.11/gzclose.c b/deps/libchdr/deps/zlib-1.2.11/gzclose.c
new file mode 100644
index 00000000..caeb99a3
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/gzclose.c
@@ -0,0 +1,25 @@
+/* gzclose.c -- zlib gzclose() function
+ * Copyright (C) 2004, 2010 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include "gzguts.h"
+
+/* gzclose() is in a separate file so that it is linked in only if it is used.
+   That way the other gzclose functions can be used instead to avoid linking in
+   unneeded compression or decompression routines. */
+int ZEXPORT gzclose(file)
+    gzFile file;
+{
+#ifndef NO_GZCOMPRESS
+    gz_statep state;
+
+    if (file == NULL)
+        return Z_STREAM_ERROR;
+    state = (gz_statep)file;
+
+    return state->mode == GZ_READ ? gzclose_r(file) : gzclose_w(file);
+#else
+    return gzclose_r(file);
+#endif
+}
diff --git a/deps/libchdr/deps/zlib-1.2.11/gzguts.h b/deps/libchdr/deps/zlib-1.2.11/gzguts.h
new file mode 100644
index 00000000..990a4d25
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/gzguts.h
@@ -0,0 +1,218 @@
+/* gzguts.h -- zlib internal header definitions for gz* operations
+ * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifdef _LARGEFILE64_SOURCE
+#  ifndef _LARGEFILE_SOURCE
+#    define _LARGEFILE_SOURCE 1
+#  endif
+#  ifdef _FILE_OFFSET_BITS
+#    undef _FILE_OFFSET_BITS
+#  endif
+#endif
+
+#ifdef HAVE_HIDDEN
+#  define ZLIB_INTERNAL __attribute__((visibility ("hidden")))
+#else
+#  define ZLIB_INTERNAL
+#endif
+
+#include <stdio.h>
+#include "zlib.h"
+#ifdef STDC
+#  include <string.h>
+#  include <stdlib.h>
+#  include <limits.h>
+#endif
+
+#ifndef _POSIX_SOURCE
+#  define _POSIX_SOURCE
+#endif
+#include <fcntl.h>
+
+#ifdef _WIN32
+#  include <stddef.h>
+#endif
+
+#if defined(__TURBOC__) || defined(_MSC_VER) || defined(_WIN32)
+#  include <io.h>
+#endif
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+#  define WIDECHAR
+#endif
+
+#ifdef WINAPI_FAMILY
+#  define open _open
+#  define read _read
+#  define write _write
+#  define close _close
+#endif
+
+#ifdef NO_DEFLATE       /* for compatibility with old definition */
+#  define NO_GZCOMPRESS
+#endif
+
+#if defined(STDC99) || (defined(__TURBOC__) && __TURBOC__ >= 0x550)
+#  ifndef HAVE_VSNPRINTF
+#    define HAVE_VSNPRINTF
+#  endif
+#endif
+
+#if defined(__CYGWIN__)
+#  ifndef HAVE_VSNPRINTF
+#    define HAVE_VSNPRINTF
+#  endif
+#endif
+
+#if defined(MSDOS) && defined(__BORLANDC__) && (BORLANDC > 0x410)
+#  ifndef HAVE_VSNPRINTF
+#    define HAVE_VSNPRINTF
+#  endif
+#endif
+
+#ifndef HAVE_VSNPRINTF
+#  ifdef MSDOS
+/* vsnprintf may exist on some MS-DOS compilers (DJGPP?),
+   but for now we just assume it doesn't. */
+#    define NO_vsnprintf
+#  endif
+#  ifdef __TURBOC__
+#    define NO_vsnprintf
+#  endif
+#  ifdef WIN32
+/* In Win32, vsnprintf is available as the "non-ANSI" _vsnprintf. */
+#    if !defined(vsnprintf) && !defined(NO_vsnprintf)
+#      if !defined(_MSC_VER) || ( defined(_MSC_VER) && _MSC_VER < 1500 )
+#         define vsnprintf _vsnprintf
+#      endif
+#    endif
+#  endif
+#  ifdef __SASC
+#    define NO_vsnprintf
+#  endif
+#  ifdef VMS
+#    define NO_vsnprintf
+#  endif
+#  ifdef __OS400__
+#    define NO_vsnprintf
+#  endif
+#  ifdef __MVS__
+#    define NO_vsnprintf
+#  endif
+#endif
+
+/* unlike snprintf (which is required in C99), _snprintf does not guarantee
+   null termination of the result -- however this is only used in gzlib.c where
+   the result is assured to fit in the space provided */
+#if defined(_MSC_VER) && _MSC_VER < 1900
+#  define snprintf _snprintf
+#endif
+
+#ifndef local
+#  define local static
+#endif
+/* since "static" is used to mean two completely different things in C, we
+   define "local" for the non-static meaning of "static", for readability
+   (compile with -Dlocal if your debugger can't find static symbols) */
+
+/* gz* functions always use library allocation functions */
+#ifndef STDC
+  extern voidp  malloc OF((uInt size));
+  extern void   free   OF((voidpf ptr));
+#endif
+
+/* get errno and strerror definition */
+#if defined UNDER_CE
+#  include <windows.h>
+#  define zstrerror() gz_strwinerror((DWORD)GetLastError())
+#else
+#  ifndef NO_STRERROR
+#    include <errno.h>
+#    define zstrerror() strerror(errno)
+#  else
+#    define zstrerror() "stdio error (consult errno)"
+#  endif
+#endif
+
+/* provide prototypes for these when building zlib without LFS */
+#if !defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0
+    ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *));
+    ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int));
+    ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile));
+    ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile));
+#endif
+
+/* default memLevel */
+#if MAX_MEM_LEVEL >= 8
+#  define DEF_MEM_LEVEL 8
+#else
+#  define DEF_MEM_LEVEL  MAX_MEM_LEVEL
+#endif
+
+/* default i/o buffer size -- double this for output when reading (this and
+   twice this must be able to fit in an unsigned type) */
+#define GZBUFSIZE 8192
+
+/* gzip modes, also provide a little integrity check on the passed structure */
+#define GZ_NONE 0
+#define GZ_READ 7247
+#define GZ_WRITE 31153
+#define GZ_APPEND 1     /* mode set to GZ_WRITE after the file is opened */
+
+/* values for gz_state how */
+#define LOOK 0      /* look for a gzip header */
+#define COPY 1      /* copy input directly */
+#define GZIP 2      /* decompress a gzip stream */
+
+/* internal gzip file state data structure */
+typedef struct {
+        /* exposed contents for gzgetc() macro */
+    struct gzFile_s x;      /* "x" for exposed */
+                            /* x.have: number of bytes available at x.next */
+                            /* x.next: next output data to deliver or write */
+                            /* x.pos: current position in uncompressed data */
+        /* used for both reading and writing */
+    int mode;               /* see gzip modes above */
+    int fd;                 /* file descriptor */
+    char *path;             /* path or fd for error messages */
+    unsigned size;          /* buffer size, zero if not allocated yet */
+    unsigned want;          /* requested buffer size, default is GZBUFSIZE */
+    unsigned char *in;      /* input buffer (double-sized when writing) */
+    unsigned char *out;     /* output buffer (double-sized when reading) */
+    int direct;             /* 0 if processing gzip, 1 if transparent */
+        /* just for reading */
+    int how;                /* 0: get header, 1: copy, 2: decompress */
+    z_off64_t start;        /* where the gzip data started, for rewinding */
+    int eof;                /* true if end of input file reached */
+    int past;               /* true if read requested past end */
+        /* just for writing */
+    int level;              /* compression level */
+    int strategy;           /* compression strategy */
+        /* seek request */
+    z_off64_t skip;         /* amount to skip (already rewound if backwards) */
+    int seek;               /* true if seek request pending */
+        /* error information */
+    int err;                /* error code */
+    char *msg;              /* error message */
+        /* zlib inflate or deflate stream */
+    z_stream strm;          /* stream structure in-place (not a pointer) */
+} gz_state;
+typedef gz_state FAR *gz_statep;
+
+/* shared functions */
+void ZLIB_INTERNAL gz_error OF((gz_statep, int, const char *));
+#if defined UNDER_CE
+char ZLIB_INTERNAL *gz_strwinerror OF((DWORD error));
+#endif
+
+/* GT_OFF(x), where x is an unsigned value, is true if x > maximum z_off64_t
+   value -- needed when comparing unsigned to z_off64_t, which is signed
+   (possible z_off64_t types off_t, off64_t, and long are all signed) */
+#ifdef INT_MAX
+#  define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > INT_MAX)
+#else
+unsigned ZLIB_INTERNAL gz_intmax OF((void));
+#  define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > gz_intmax())
+#endif
diff --git a/deps/libchdr/deps/zlib-1.2.11/gzlib.c b/deps/libchdr/deps/zlib-1.2.11/gzlib.c
new file mode 100644
index 00000000..4105e6af
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/gzlib.c
@@ -0,0 +1,637 @@
+/* gzlib.c -- zlib functions common to reading and writing gzip files
+ * Copyright (C) 2004-2017 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include "gzguts.h"
+
+#if defined(_WIN32) && !defined(__BORLANDC__) && !defined(__MINGW32__)
+#  define LSEEK _lseeki64
+#else
+#if defined(_LARGEFILE64_SOURCE) && _LFS64_LARGEFILE-0
+#  define LSEEK lseek64
+#else
+#  define LSEEK lseek
+#endif
+#endif
+
+/* Local functions */
+local void gz_reset OF((gz_statep));
+local gzFile gz_open OF((const void *, int, const char *));
+
+#if defined UNDER_CE
+
+/* Map the Windows error number in ERROR to a locale-dependent error message
+   string and return a pointer to it.  Typically, the values for ERROR come
+   from GetLastError.
+
+   The string pointed to shall not be modified by the application, but may be
+   overwritten by a subsequent call to gz_strwinerror
+
+   The gz_strwinerror function does not change the current setting of
+   GetLastError. */
+char ZLIB_INTERNAL *gz_strwinerror (error)
+     DWORD error;
+{
+    static char buf[1024];
+
+    wchar_t *msgbuf;
+    DWORD lasterr = GetLastError();
+    DWORD chars = FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM
+        | FORMAT_MESSAGE_ALLOCATE_BUFFER,
+        NULL,
+        error,
+        0, /* Default language */
+        (LPVOID)&msgbuf,
+        0,
+        NULL);
+    if (chars != 0) {
+        /* If there is an \r\n appended, zap it.  */
+        if (chars >= 2
+            && msgbuf[chars - 2] == '\r' && msgbuf[chars - 1] == '\n') {
+            chars -= 2;
+            msgbuf[chars] = 0;
+        }
+
+        if (chars > sizeof (buf) - 1) {
+            chars = sizeof (buf) - 1;
+            msgbuf[chars] = 0;
+        }
+
+        wcstombs(buf, msgbuf, chars + 1);
+        LocalFree(msgbuf);
+    }
+    else {
+        sprintf(buf, "unknown win32 error (%ld)", error);
+    }
+
+    SetLastError(lasterr);
+    return buf;
+}
+
+#endif /* UNDER_CE */
+
+/* Reset gzip file state */
+local void gz_reset(state)
+    gz_statep state;
+{
+    state->x.have = 0;              /* no output data available */
+    if (state->mode == GZ_READ) {   /* for reading ... */
+        state->eof = 0;             /* not at end of file */
+        state->past = 0;            /* have not read past end yet */
+        state->how = LOOK;          /* look for gzip header */
+    }
+    state->seek = 0;                /* no seek request pending */
+    gz_error(state, Z_OK, NULL);    /* clear error */
+    state->x.pos = 0;               /* no uncompressed data yet */
+    state->strm.avail_in = 0;       /* no input data yet */
+}
+
+/* Open a gzip file either by name or file descriptor. */
+local gzFile gz_open(path, fd, mode)
+    const void *path;
+    int fd;
+    const char *mode;
+{
+    gz_statep state;
+    z_size_t len;
+    int oflag;
+#ifdef O_CLOEXEC
+    int cloexec = 0;
+#endif
+#ifdef O_EXCL
+    int exclusive = 0;
+#endif
+
+    /* check input */
+    if (path == NULL)
+        return NULL;
+
+    /* allocate gzFile structure to return */
+    state = (gz_statep)malloc(sizeof(gz_state));
+    if (state == NULL)
+        return NULL;
+    state->size = 0;            /* no buffers allocated yet */
+    state->want = GZBUFSIZE;    /* requested buffer size */
+    state->msg = NULL;          /* no error message yet */
+
+    /* interpret mode */
+    state->mode = GZ_NONE;
+    state->level = Z_DEFAULT_COMPRESSION;
+    state->strategy = Z_DEFAULT_STRATEGY;
+    state->direct = 0;
+    while (*mode) {
+        if (*mode >= '0' && *mode <= '9')
+            state->level = *mode - '0';
+        else
+            switch (*mode) {
+            case 'r':
+                state->mode = GZ_READ;
+                break;
+#ifndef NO_GZCOMPRESS
+            case 'w':
+                state->mode = GZ_WRITE;
+                break;
+            case 'a':
+                state->mode = GZ_APPEND;
+                break;
+#endif
+            case '+':       /* can't read and write at the same time */
+                free(state);
+                return NULL;
+            case 'b':       /* ignore -- will request binary anyway */
+                break;
+#ifdef O_CLOEXEC
+            case 'e':
+                cloexec = 1;
+                break;
+#endif
+#ifdef O_EXCL
+            case 'x':
+                exclusive = 1;
+                break;
+#endif
+            case 'f':
+                state->strategy = Z_FILTERED;
+                break;
+            case 'h':
+                state->strategy = Z_HUFFMAN_ONLY;
+                break;
+            case 'R':
+                state->strategy = Z_RLE;
+                break;
+            case 'F':
+                state->strategy = Z_FIXED;
+                break;
+            case 'T':
+                state->direct = 1;
+                break;
+            default:        /* could consider as an error, but just ignore */
+                ;
+            }
+        mode++;
+    }
+
+    /* must provide an "r", "w", or "a" */
+    if (state->mode == GZ_NONE) {
+        free(state);
+        return NULL;
+    }
+
+    /* can't force transparent read */
+    if (state->mode == GZ_READ) {
+        if (state->direct) {
+            free(state);
+            return NULL;
+        }
+        state->direct = 1;      /* for empty file */
+    }
+
+    /* save the path name for error messages */
+#ifdef WIDECHAR
+    if (fd == -2) {
+        len = wcstombs(NULL, path, 0);
+        if (len == (z_size_t)-1)
+            len = 0;
+    }
+    else
+#endif
+        len = strlen((const char *)path);
+    state->path = (char *)malloc(len + 1);
+    if (state->path == NULL) {
+        free(state);
+        return NULL;
+    }
+#ifdef WIDECHAR
+    if (fd == -2)
+        if (len)
+            wcstombs(state->path, path, len + 1);
+        else
+            *(state->path) = 0;
+    else
+#endif
+#if !defined(NO_snprintf) && !defined(NO_vsnprintf)
+        (void)snprintf(state->path, len + 1, "%s", (const char *)path);
+#else
+        strcpy(state->path, path);
+#endif
+
+    /* compute the flags for open() */
+    oflag =
+#ifdef O_LARGEFILE
+        O_LARGEFILE |
+#endif
+#ifdef O_BINARY
+        O_BINARY |
+#endif
+#ifdef O_CLOEXEC
+        (cloexec ? O_CLOEXEC : 0) |
+#endif
+        (state->mode == GZ_READ ?
+         O_RDONLY :
+         (O_WRONLY | O_CREAT |
+#ifdef O_EXCL
+          (exclusive ? O_EXCL : 0) |
+#endif
+          (state->mode == GZ_WRITE ?
+           O_TRUNC :
+           O_APPEND)));
+
+    /* open the file with the appropriate flags (or just use fd) */
+    state->fd = fd > -1 ? fd : (
+#ifdef WIDECHAR
+        fd == -2 ? _wopen(path, oflag, 0666) :
+#endif
+        open((const char *)path, oflag, 0666));
+    if (state->fd == -1) {
+        free(state->path);
+        free(state);
+        return NULL;
+    }
+    if (state->mode == GZ_APPEND) {
+        LSEEK(state->fd, 0, SEEK_END);  /* so gzoffset() is correct */
+        state->mode = GZ_WRITE;         /* simplify later checks */
+    }
+
+    /* save the current position for rewinding (only if reading) */
+    if (state->mode == GZ_READ) {
+        state->start = LSEEK(state->fd, 0, SEEK_CUR);
+        if (state->start == -1) state->start = 0;
+    }
+
+    /* initialize stream */
+    gz_reset(state);
+
+    /* return stream */
+    return (gzFile)state;
+}
+
+/* -- see zlib.h -- */
+gzFile ZEXPORT gzopen(path, mode)
+    const char *path;
+    const char *mode;
+{
+    return gz_open(path, -1, mode);
+}
+
+/* -- see zlib.h -- */
+gzFile ZEXPORT gzopen64(path, mode)
+    const char *path;
+    const char *mode;
+{
+    return gz_open(path, -1, mode);
+}
+
+/* -- see zlib.h -- */
+gzFile ZEXPORT gzdopen(fd, mode)
+    int fd;
+    const char *mode;
+{
+    char *path;         /* identifier for error messages */
+    gzFile gz;
+
+    if (fd == -1 || (path = (char *)malloc(7 + 3 * sizeof(int))) == NULL)
+        return NULL;
+#if !defined(NO_snprintf) && !defined(NO_vsnprintf)
+    (void)snprintf(path, 7 + 3 * sizeof(int), "<fd:%d>", fd);
+#else
+    sprintf(path, "<fd:%d>", fd);   /* for debugging */
+#endif
+    gz = gz_open(path, fd, mode);
+    free(path);
+    return gz;
+}
+
+/* -- see zlib.h -- */
+#ifdef WIDECHAR
+gzFile ZEXPORT gzopen_w(path, mode)
+    const wchar_t *path;
+    const char *mode;
+{
+    return gz_open(path, -2, mode);
+}
+#endif
+
+/* -- see zlib.h -- */
+int ZEXPORT gzbuffer(file, size)
+    gzFile file;
+    unsigned size;
+{
+    gz_statep state;
+
+    /* get internal structure and check integrity */
+    if (file == NULL)
+        return -1;
+    state = (gz_statep)file;
+    if (state->mode != GZ_READ && state->mode != GZ_WRITE)
+        return -1;
+
+    /* make sure we haven't already allocated memory */
+    if (state->size != 0)
+        return -1;
+
+    /* check and set requested size */
+    if ((size << 1) < size)
+        return -1;              /* need to be able to double it */
+    if (size < 2)
+        size = 2;               /* need two bytes to check magic header */
+    state->want = size;
+    return 0;
+}
+
+/* -- see zlib.h -- */
+int ZEXPORT gzrewind(file)
+    gzFile file;
+{
+    gz_statep state;
+
+    /* get internal structure */
+    if (file == NULL)
+        return -1;
+    state = (gz_statep)file;
+
+    /* check that we're reading and that there's no error */
+    if (state->mode != GZ_READ ||
+            (state->err != Z_OK && state->err != Z_BUF_ERROR))
+        return -1;
+
+    /* back up and start over */
+    if (LSEEK(state->fd, state->start, SEEK_SET) == -1)
+        return -1;
+    gz_reset(state);
+    return 0;
+}
+
+/* -- see zlib.h -- */
+z_off64_t ZEXPORT gzseek64(file, offset, whence)
+    gzFile file;
+    z_off64_t offset;
+    int whence;
+{
+    unsigned n;
+    z_off64_t ret;
+    gz_statep state;
+
+    /* get internal structure and check integrity */
+    if (file == NULL)
+        return -1;
+    state = (gz_statep)file;
+    if (state->mode != GZ_READ && state->mode != GZ_WRITE)
+        return -1;
+
+    /* check that there's no error */
+    if (state->err != Z_OK && state->err != Z_BUF_ERROR)
+        return -1;
+
+    /* can only seek from start or relative to current position */
+    if (whence != SEEK_SET && whence != SEEK_CUR)
+        return -1;
+
+    /* normalize offset to a SEEK_CUR specification */
+    if (whence == SEEK_SET)
+        offset -= state->x.pos;
+    else if (state->seek)
+        offset += state->skip;
+    state->seek = 0;
+
+    /* if within raw area while reading, just go there */
+    if (state->mode == GZ_READ && state->how == COPY &&
+            state->x.pos + offset >= 0) {
+        ret = LSEEK(state->fd, offset - state->x.have, SEEK_CUR);
+        if (ret == -1)
+            return -1;
+        state->x.have = 0;
+        state->eof = 0;
+        state->past = 0;
+        state->seek = 0;
+        gz_error(state, Z_OK, NULL);
+        state->strm.avail_in = 0;
+        state->x.pos += offset;
+        return state->x.pos;
+    }
+
+    /* calculate skip amount, rewinding if needed for back seek when reading */
+    if (offset < 0) {
+        if (state->mode != GZ_READ)         /* writing -- can't go backwards */
+            return -1;
+        offset += state->x.pos;
+        if (offset < 0)                     /* before start of file! */
+            return -1;
+        if (gzrewind(file) == -1)           /* rewind, then skip to offset */
+            return -1;
+    }
+
+    /* if reading, skip what's in output buffer (one less gzgetc() check) */
+    if (state->mode == GZ_READ) {
+        n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > offset ?
+            (unsigned)offset : state->x.have;
+        state->x.have -= n;
+        state->x.next += n;
+        state->x.pos += n;
+        offset -= n;
+    }
+
+    /* request skip (if not zero) */
+    if (offset) {
+        state->seek = 1;
+        state->skip = offset;
+    }
+    return state->x.pos + offset;
+}
+
+/* -- see zlib.h -- */
+z_off_t ZEXPORT gzseek(file, offset, whence)
+    gzFile file;
+    z_off_t offset;
+    int whence;
+{
+    z_off64_t ret;
+
+    ret = gzseek64(file, (z_off64_t)offset, whence);
+    return ret == (z_off_t)ret ? (z_off_t)ret : -1;
+}
+
+/* -- see zlib.h -- */
+z_off64_t ZEXPORT gztell64(file)
+    gzFile file;
+{
+    gz_statep state;
+
+    /* get internal structure and check integrity */
+    if (file == NULL)
+        return -1;
+    state = (gz_statep)file;
+    if (state->mode != GZ_READ && state->mode != GZ_WRITE)
+        return -1;
+
+    /* return position */
+    return state->x.pos + (state->seek ? state->skip : 0);
+}
+
+/* -- see zlib.h -- */
+z_off_t ZEXPORT gztell(file)
+    gzFile file;
+{
+    z_off64_t ret;
+
+    ret = gztell64(file);
+    return ret == (z_off_t)ret ? (z_off_t)ret : -1;
+}
+
+/* -- see zlib.h -- */
+z_off64_t ZEXPORT gzoffset64(file)
+    gzFile file;
+{
+    z_off64_t offset;
+    gz_statep state;
+
+    /* get internal structure and check integrity */
+    if (file == NULL)
+        return -1;
+    state = (gz_statep)file;
+    if (state->mode != GZ_READ && state->mode != GZ_WRITE)
+        return -1;
+
+    /* compute and return effective offset in file */
+    offset = LSEEK(state->fd, 0, SEEK_CUR);
+    if (offset == -1)
+        return -1;
+    if (state->mode == GZ_READ)             /* reading */
+        offset -= state->strm.avail_in;     /* don't count buffered input */
+    return offset;
+}
+
+/* -- see zlib.h -- */
+z_off_t ZEXPORT gzoffset(file)
+    gzFile file;
+{
+    z_off64_t ret;
+
+    ret = gzoffset64(file);
+    return ret == (z_off_t)ret ? (z_off_t)ret : -1;
+}
+
+/* -- see zlib.h -- */
+int ZEXPORT gzeof(file)
+    gzFile file;
+{
+    gz_statep state;
+
+    /* get internal structure and check integrity */
+    if (file == NULL)
+        return 0;
+    state = (gz_statep)file;
+    if (state->mode != GZ_READ && state->mode != GZ_WRITE)
+        return 0;
+
+    /* return end-of-file state */
+    return state->mode == GZ_READ ? state->past : 0;
+}
+
+/* -- see zlib.h -- */
+const char * ZEXPORT gzerror(file, errnum)
+    gzFile file;
+    int *errnum;
+{
+    gz_statep state;
+
+    /* get internal structure and check integrity */
+    if (file == NULL)
+        return NULL;
+    state = (gz_statep)file;
+    if (state->mode != GZ_READ && state->mode != GZ_WRITE)
+        return NULL;
+
+    /* return error information */
+    if (errnum != NULL)
+        *errnum = state->err;
+    return state->err == Z_MEM_ERROR ? "out of memory" :
+                                       (state->msg == NULL ? "" : state->msg);
+}
+
+/* -- see zlib.h -- */
+void ZEXPORT gzclearerr(file)
+    gzFile file;
+{
+    gz_statep state;
+
+    /* get internal structure and check integrity */
+    if (file == NULL)
+        return;
+    state = (gz_statep)file;
+    if (state->mode != GZ_READ && state->mode != GZ_WRITE)
+        return;
+
+    /* clear error and end-of-file */
+    if (state->mode == GZ_READ) {
+        state->eof = 0;
+        state->past = 0;
+    }
+    gz_error(state, Z_OK, NULL);
+}
+
+/* Create an error message in allocated memory and set state->err and
+   state->msg accordingly.  Free any previous error message already there.  Do
+   not try to free or allocate space if the error is Z_MEM_ERROR (out of
+   memory).  Simply save the error message as a static string.  If there is an
+   allocation failure constructing the error message, then convert the error to
+   out of memory. */
+void ZLIB_INTERNAL gz_error(state, err, msg)
+    gz_statep state;
+    int err;
+    const char *msg;
+{
+    /* free previously allocated message and clear */
+    if (state->msg != NULL) {
+        if (state->err != Z_MEM_ERROR)
+            free(state->msg);
+        state->msg = NULL;
+    }
+
+    /* if fatal, set state->x.have to 0 so that the gzgetc() macro fails */
+    if (err != Z_OK && err != Z_BUF_ERROR)
+        state->x.have = 0;
+
+    /* set error code, and if no message, then done */
+    state->err = err;
+    if (msg == NULL)
+        return;
+
+    /* for an out of memory error, return literal string when requested */
+    if (err == Z_MEM_ERROR)
+        return;
+
+    /* construct error message with path */
+    if ((state->msg = (char *)malloc(strlen(state->path) + strlen(msg) + 3)) ==
+            NULL) {
+        state->err = Z_MEM_ERROR;
+        return;
+    }
+#if !defined(NO_snprintf) && !defined(NO_vsnprintf)
+    (void)snprintf(state->msg, strlen(state->path) + strlen(msg) + 3,
+                   "%s%s%s", state->path, ": ", msg);
+#else
+    strcpy(state->msg, state->path);
+    strcat(state->msg, ": ");
+    strcat(state->msg, msg);
+#endif
+}
+
+#ifndef INT_MAX
+/* portably return maximum value for an int (when limits.h presumed not
+   available) -- we need to do this to cover cases where 2's complement not
+   used, since C standard permits 1's complement and sign-bit representations,
+   otherwise we could just use ((unsigned)-1) >> 1 */
+unsigned ZLIB_INTERNAL gz_intmax()
+{
+    unsigned p, q;
+
+    p = 1;
+    do {
+        q = p;
+        p <<= 1;
+        p++;
+    } while (p > q);
+    return q >> 1;
+}
+#endif
diff --git a/deps/libchdr/deps/zlib-1.2.11/gzread.c b/deps/libchdr/deps/zlib-1.2.11/gzread.c
new file mode 100644
index 00000000..956b91ea
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/gzread.c
@@ -0,0 +1,654 @@
+/* gzread.c -- zlib functions for reading gzip files
+ * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include "gzguts.h"
+
+/* Local functions */
+local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *));
+local int gz_avail OF((gz_statep));
+local int gz_look OF((gz_statep));
+local int gz_decomp OF((gz_statep));
+local int gz_fetch OF((gz_statep));
+local int gz_skip OF((gz_statep, z_off64_t));
+local z_size_t gz_read OF((gz_statep, voidp, z_size_t));
+
+/* Use read() to load a buffer -- return -1 on error, otherwise 0.  Read from
+   state->fd, and update state->eof, state->err, and state->msg as appropriate.
+   This function needs to loop on read(), since read() is not guaranteed to
+   read the number of bytes requested, depending on the type of descriptor. */
+local int gz_load(state, buf, len, have)
+    gz_statep state;
+    unsigned char *buf;
+    unsigned len;
+    unsigned *have;
+{
+    int ret;
+    unsigned get, max = ((unsigned)-1 >> 2) + 1;
+
+    *have = 0;
+    do {
+        get = len - *have;
+        if (get > max)
+            get = max;
+        ret = read(state->fd, buf + *have, get);
+        if (ret <= 0)
+            break;
+        *have += (unsigned)ret;
+    } while (*have < len);
+    if (ret < 0) {
+        gz_error(state, Z_ERRNO, zstrerror());
+        return -1;
+    }
+    if (ret == 0)
+        state->eof = 1;
+    return 0;
+}
+
+/* Load up input buffer and set eof flag if last data loaded -- return -1 on
+   error, 0 otherwise.  Note that the eof flag is set when the end of the input
+   file is reached, even though there may be unused data in the buffer.  Once
+   that data has been used, no more attempts will be made to read the file.
+   If strm->avail_in != 0, then the current data is moved to the beginning of
+   the input buffer, and then the remainder of the buffer is loaded with the
+   available data from the input file. */
+local int gz_avail(state)
+    gz_statep state;
+{
+    unsigned got;
+    z_streamp strm = &(state->strm);
+
+    if (state->err != Z_OK && state->err != Z_BUF_ERROR)
+        return -1;
+    if (state->eof == 0) {
+        if (strm->avail_in) {       /* copy what's there to the start */
+            unsigned char *p = state->in;
+            unsigned const char *q = strm->next_in;
+            unsigned n = strm->avail_in;
+            do {
+                *p++ = *q++;
+            } while (--n);
+        }
+        if (gz_load(state, state->in + strm->avail_in,
+                    state->size - strm->avail_in, &got) == -1)
+            return -1;
+        strm->avail_in += got;
+        strm->next_in = state->in;
+    }
+    return 0;
+}
+
+/* Look for gzip header, set up for inflate or copy.  state->x.have must be 0.
+   If this is the first time in, allocate required memory.  state->how will be
+   left unchanged if there is no more input data available, will be set to COPY
+   if there is no gzip header and direct copying will be performed, or it will
+   be set to GZIP for decompression.  If direct copying, then leftover input
+   data from the input buffer will be copied to the output buffer.  In that
+   case, all further file reads will be directly to either the output buffer or
+   a user buffer.  If decompressing, the inflate state will be initialized.
+   gz_look() will return 0 on success or -1 on failure. */
+local int gz_look(state)
+    gz_statep state;
+{
+    z_streamp strm = &(state->strm);
+
+    /* allocate read buffers and inflate memory */
+    if (state->size == 0) {
+        /* allocate buffers */
+        state->in = (unsigned char *)malloc(state->want);
+        state->out = (unsigned char *)malloc(state->want << 1);
+        if (state->in == NULL || state->out == NULL) {
+            free(state->out);
+            free(state->in);
+            gz_error(state, Z_MEM_ERROR, "out of memory");
+            return -1;
+        }
+        state->size = state->want;
+
+        /* allocate inflate memory */
+        state->strm.zalloc = Z_NULL;
+        state->strm.zfree = Z_NULL;
+        state->strm.opaque = Z_NULL;
+        state->strm.avail_in = 0;
+        state->strm.next_in = Z_NULL;
+        if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) {    /* gunzip */
+            free(state->out);
+            free(state->in);
+            state->size = 0;
+            gz_error(state, Z_MEM_ERROR, "out of memory");
+            return -1;
+        }
+    }
+
+    /* get at least the magic bytes in the input buffer */
+    if (strm->avail_in < 2) {
+        if (gz_avail(state) == -1)
+            return -1;
+        if (strm->avail_in == 0)
+            return 0;
+    }
+
+    /* look for gzip magic bytes -- if there, do gzip decoding (note: there is
+       a logical dilemma here when considering the case of a partially written
+       gzip file, to wit, if a single 31 byte is written, then we cannot tell
+       whether this is a single-byte file, or just a partially written gzip
+       file -- for here we assume that if a gzip file is being written, then
+       the header will be written in a single operation, so that reading a
+       single byte is sufficient indication that it is not a gzip file) */
+    if (strm->avail_in > 1 &&
+            strm->next_in[0] == 31 && strm->next_in[1] == 139) {
+        inflateReset(strm);
+        state->how = GZIP;
+        state->direct = 0;
+        return 0;
+    }
+
+    /* no gzip header -- if we were decoding gzip before, then this is trailing
+       garbage.  Ignore the trailing garbage and finish. */
+    if (state->direct == 0) {
+        strm->avail_in = 0;
+        state->eof = 1;
+        state->x.have = 0;
+        return 0;
+    }
+
+    /* doing raw i/o, copy any leftover input to output -- this assumes that
+       the output buffer is larger than the input buffer, which also assures
+       space for gzungetc() */
+    state->x.next = state->out;
+    if (strm->avail_in) {
+        memcpy(state->x.next, strm->next_in, strm->avail_in);
+        state->x.have = strm->avail_in;
+        strm->avail_in = 0;
+    }
+    state->how = COPY;
+    state->direct = 1;
+    return 0;
+}
+
+/* Decompress from input to the provided next_out and avail_out in the state.
+   On return, state->x.have and state->x.next point to the just decompressed
+   data.  If the gzip stream completes, state->how is reset to LOOK to look for
+   the next gzip stream or raw data, once state->x.have is depleted.  Returns 0
+   on success, -1 on failure. */
+local int gz_decomp(state)
+    gz_statep state;
+{
+    int ret = Z_OK;
+    unsigned had;
+    z_streamp strm = &(state->strm);
+
+    /* fill output buffer up to end of deflate stream */
+    had = strm->avail_out;
+    do {
+        /* get more input for inflate() */
+        if (strm->avail_in == 0 && gz_avail(state) == -1)
+            return -1;
+        if (strm->avail_in == 0) {
+            gz_error(state, Z_BUF_ERROR, "unexpected end of file");
+            break;
+        }
+
+        /* decompress and handle errors */
+        ret = inflate(strm, Z_NO_FLUSH);
+        if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
+            gz_error(state, Z_STREAM_ERROR,
+                     "internal error: inflate stream corrupt");
+            return -1;
+        }
+        if (ret == Z_MEM_ERROR) {
+            gz_error(state, Z_MEM_ERROR, "out of memory");
+            return -1;
+        }
+        if (ret == Z_DATA_ERROR) {              /* deflate stream invalid */
+            gz_error(state, Z_DATA_ERROR,
+                     strm->msg == NULL ? "compressed data error" : strm->msg);
+            return -1;
+        }
+    } while (strm->avail_out && ret != Z_STREAM_END);
+
+    /* update available output */
+    state->x.have = had - strm->avail_out;
+    state->x.next = strm->next_out - state->x.have;
+
+    /* if the gzip stream completed successfully, look for another */
+    if (ret == Z_STREAM_END)
+        state->how = LOOK;
+
+    /* good decompression */
+    return 0;
+}
+
+/* Fetch data and put it in the output buffer.  Assumes state->x.have is 0.
+   Data is either copied from the input file or decompressed from the input
+   file depending on state->how.  If state->how is LOOK, then a gzip header is
+   looked for to determine whether to copy or decompress.  Returns -1 on error,
+   otherwise 0.  gz_fetch() will leave state->how as COPY or GZIP unless the
+   end of the input file has been reached and all data has been processed.  */
+local int gz_fetch(state)
+    gz_statep state;
+{
+    z_streamp strm = &(state->strm);
+
+    do {
+        switch(state->how) {
+        case LOOK:      /* -> LOOK, COPY (only if never GZIP), or GZIP */
+            if (gz_look(state) == -1)
+                return -1;
+            if (state->how == LOOK)
+                return 0;
+            break;
+        case COPY:      /* -> COPY */
+            if (gz_load(state, state->out, state->size << 1, &(state->x.have))
+                    == -1)
+                return -1;
+            state->x.next = state->out;
+            return 0;
+        case GZIP:      /* -> GZIP or LOOK (if end of gzip stream) */
+            strm->avail_out = state->size << 1;
+            strm->next_out = state->out;
+            if (gz_decomp(state) == -1)
+                return -1;
+        }
+    } while (state->x.have == 0 && (!state->eof || strm->avail_in));
+    return 0;
+}
+
+/* Skip len uncompressed bytes of output.  Return -1 on error, 0 on success. */
+local int gz_skip(state, len)
+    gz_statep state;
+    z_off64_t len;
+{
+    unsigned n;
+
+    /* skip over len bytes or reach end-of-file, whichever comes first */
+    while (len)
+        /* skip over whatever is in output buffer */
+        if (state->x.have) {
+            n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ?
+                (unsigned)len : state->x.have;
+            state->x.have -= n;
+            state->x.next += n;
+            state->x.pos += n;
+            len -= n;
+        }
+
+        /* output buffer empty -- return if we're at the end of the input */
+        else if (state->eof && state->strm.avail_in == 0)
+            break;
+
+        /* need more data to skip -- load up output buffer */
+        else {
+            /* get more output, looking for header if required */
+            if (gz_fetch(state) == -1)
+                return -1;
+        }
+    return 0;
+}
+
+/* Read len bytes into buf from file, or less than len up to the end of the
+   input.  Return the number of bytes read.  If zero is returned, either the
+   end of file was reached, or there was an error.  state->err must be
+   consulted in that case to determine which. */
+local z_size_t gz_read(state, buf, len)
+    gz_statep state;
+    voidp buf;
+    z_size_t len;
+{
+    z_size_t got;
+    unsigned n;
+
+    /* if len is zero, avoid unnecessary operations */
+    if (len == 0)
+        return 0;
+
+    /* process a skip request */
+    if (state->seek) {
+        state->seek = 0;
+        if (gz_skip(state, state->skip) == -1)
+            return 0;
+    }
+
+    /* get len bytes to buf, or less than len if at the end */
+    got = 0;
+    do {
+        /* set n to the maximum amount of len that fits in an unsigned int */
+        n = -1;
+        if (n > len)
+            n = len;
+
+        /* first just try copying data from the output buffer */
+        if (state->x.have) {
+            if (state->x.have < n)
+                n = state->x.have;
+            memcpy(buf, state->x.next, n);
+            state->x.next += n;
+            state->x.have -= n;
+        }
+
+        /* output buffer empty -- return if we're at the end of the input */
+        else if (state->eof && state->strm.avail_in == 0) {
+            state->past = 1;        /* tried to read past end */
+            break;
+        }
+
+        /* need output data -- for small len or new stream load up our output
+           buffer */
+        else if (state->how == LOOK || n < (state->size << 1)) {
+            /* get more output, looking for header if required */
+            if (gz_fetch(state) == -1)
+                return 0;
+            continue;       /* no progress yet -- go back to copy above */
+            /* the copy above assures that we will leave with space in the
+               output buffer, allowing at least one gzungetc() to succeed */
+        }
+
+        /* large len -- read directly into user buffer */
+        else if (state->how == COPY) {      /* read directly */
+            if (gz_load(state, (unsigned char *)buf, n, &n) == -1)
+                return 0;
+        }
+
+        /* large len -- decompress directly into user buffer */
+        else {  /* state->how == GZIP */
+            state->strm.avail_out = n;
+            state->strm.next_out = (unsigned char *)buf;
+            if (gz_decomp(state) == -1)
+                return 0;
+            n = state->x.have;
+            state->x.have = 0;
+        }
+
+        /* update progress */
+        len -= n;
+        buf = (char *)buf + n;
+        got += n;
+        state->x.pos += n;
+    } while (len);
+
+    /* return number of bytes read into user buffer */
+    return got;
+}
+
+/* -- see zlib.h -- */
+int ZEXPORT gzread(file, buf, len)
+    gzFile file;
+    voidp buf;
+    unsigned len;
+{
+    gz_statep state;
+
+    /* get internal structure */
+    if (file == NULL)
+        return -1;
+    state = (gz_statep)file;
+
+    /* check that we're reading and that there's no (serious) error */
+    if (state->mode != GZ_READ ||
+            (state->err != Z_OK && state->err != Z_BUF_ERROR))
+        return -1;
+
+    /* since an int is returned, make sure len fits in one, otherwise return
+       with an error (this avoids a flaw in the interface) */
+    if ((int)len < 0) {
+        gz_error(state, Z_STREAM_ERROR, "request does not fit in an int");
+        return -1;
+    }
+
+    /* read len or fewer bytes to buf */
+    len = gz_read(state, buf, len);
+
+    /* check for an error */
+    if (len == 0 && state->err != Z_OK && state->err != Z_BUF_ERROR)
+        return -1;
+
+    /* return the number of bytes read (this is assured to fit in an int) */
+    return (int)len;
+}
+
+/* -- see zlib.h -- */
+z_size_t ZEXPORT gzfread(buf, size, nitems, file)
+    voidp buf;
+    z_size_t size;
+    z_size_t nitems;
+    gzFile file;
+{
+    z_size_t len;
+    gz_statep state;
+
+    /* get internal structure */
+    if (file == NULL)
+        return 0;
+    state = (gz_statep)file;
+
+    /* check that we're reading and that there's no (serious) error */
+    if (state->mode != GZ_READ ||
+            (state->err != Z_OK && state->err != Z_BUF_ERROR))
+        return 0;
+
+    /* compute bytes to read -- error on overflow */
+    len = nitems * size;
+    if (size && len / size != nitems) {
+        gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t");
+        return 0;
+    }
+
+    /* read len or fewer bytes to buf, return the number of full items read */
+    return len ? gz_read(state, buf, len) / size : 0;
+}
+
+/* -- see zlib.h -- */
+#ifdef Z_PREFIX_SET
+#  undef z_gzgetc
+#else
+#  undef gzgetc
+#endif
+int ZEXPORT gzgetc(file)
+    gzFile file;
+{
+    int ret;
+    unsigned char buf[1];
+    gz_statep state;
+
+    /* get internal structure */
+    if (file == NULL)
+        return -1;
+    state = (gz_statep)file;
+
+    /* check that we're reading and that there's no (serious) error */
+    if (state->mode != GZ_READ ||
+        (state->err != Z_OK && state->err != Z_BUF_ERROR))
+        return -1;
+
+    /* try output buffer (no need to check for skip request) */
+    if (state->x.have) {
+        state->x.have--;
+        state->x.pos++;
+        return *(state->x.next)++;
+    }
+
+    /* nothing there -- try gz_read() */
+    ret = gz_read(state, buf, 1);
+    return ret < 1 ? -1 : buf[0];
+}
+
+int ZEXPORT gzgetc_(file)
+gzFile file;
+{
+    return gzgetc(file);
+}
+
+/* -- see zlib.h -- */
+int ZEXPORT gzungetc(c, file)
+    int c;
+    gzFile file;
+{
+    gz_statep state;
+
+    /* get internal structure */
+    if (file == NULL)
+        return -1;
+    state = (gz_statep)file;
+
+    /* check that we're reading and that there's no (serious) error */
+    if (state->mode != GZ_READ ||
+        (state->err != Z_OK && state->err != Z_BUF_ERROR))
+        return -1;
+
+    /* process a skip request */
+    if (state->seek) {
+        state->seek = 0;
+        if (gz_skip(state, state->skip) == -1)
+            return -1;
+    }
+
+    /* can't push EOF */
+    if (c < 0)
+        return -1;
+
+    /* if output buffer empty, put byte at end (allows more pushing) */
+    if (state->x.have == 0) {
+        state->x.have = 1;
+        state->x.next = state->out + (state->size << 1) - 1;
+        state->x.next[0] = (unsigned char)c;
+        state->x.pos--;
+        state->past = 0;
+        return c;
+    }
+
+    /* if no room, give up (must have already done a gzungetc()) */
+    if (state->x.have == (state->size << 1)) {
+        gz_error(state, Z_DATA_ERROR, "out of room to push characters");
+        return -1;
+    }
+
+    /* slide output data if needed and insert byte before existing data */
+    if (state->x.next == state->out) {
+        unsigned char *src = state->out + state->x.have;
+        unsigned char *dest = state->out + (state->size << 1);
+        while (src > state->out)
+            *--dest = *--src;
+        state->x.next = dest;
+    }
+    state->x.have++;
+    state->x.next--;
+    state->x.next[0] = (unsigned char)c;
+    state->x.pos--;
+    state->past = 0;
+    return c;
+}
+
+/* -- see zlib.h -- */
+char * ZEXPORT gzgets(file, buf, len)
+    gzFile file;
+    char *buf;
+    int len;
+{
+    unsigned left, n;
+    char *str;
+    unsigned char *eol;
+    gz_statep state;
+
+    /* check parameters and get internal structure */
+    if (file == NULL || buf == NULL || len < 1)
+        return NULL;
+    state = (gz_statep)file;
+
+    /* check that we're reading and that there's no (serious) error */
+    if (state->mode != GZ_READ ||
+        (state->err != Z_OK && state->err != Z_BUF_ERROR))
+        return NULL;
+
+    /* process a skip request */
+    if (state->seek) {
+        state->seek = 0;
+        if (gz_skip(state, state->skip) == -1)
+            return NULL;
+    }
+
+    /* copy output bytes up to new line or len - 1, whichever comes first --
+       append a terminating zero to the string (we don't check for a zero in
+       the contents, let the user worry about that) */
+    str = buf;
+    left = (unsigned)len - 1;
+    if (left) do {
+        /* assure that something is in the output buffer */
+        if (state->x.have == 0 && gz_fetch(state) == -1)
+            return NULL;                /* error */
+        if (state->x.have == 0) {       /* end of file */
+            state->past = 1;            /* read past end */
+            break;                      /* return what we have */
+        }
+
+        /* look for end-of-line in current output buffer */
+        n = state->x.have > left ? left : state->x.have;
+        eol = (unsigned char *)memchr(state->x.next, '\n', n);
+        if (eol != NULL)
+            n = (unsigned)(eol - state->x.next) + 1;
+
+        /* copy through end-of-line, or remainder if not found */
+        memcpy(buf, state->x.next, n);
+        state->x.have -= n;
+        state->x.next += n;
+        state->x.pos += n;
+        left -= n;
+        buf += n;
+    } while (left && eol == NULL);
+
+    /* return terminated string, or if nothing, end of file */
+    if (buf == str)
+        return NULL;
+    buf[0] = 0;
+    return str;
+}
+
+/* -- see zlib.h -- */
+int ZEXPORT gzdirect(file)
+    gzFile file;
+{
+    gz_statep state;
+
+    /* get internal structure */
+    if (file == NULL)
+        return 0;
+    state = (gz_statep)file;
+
+    /* if the state is not known, but we can find out, then do so (this is
+       mainly for right after a gzopen() or gzdopen()) */
+    if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
+        (void)gz_look(state);
+
+    /* return 1 if transparent, 0 if processing a gzip stream */
+    return state->direct;
+}
+
+/* -- see zlib.h -- */
+int ZEXPORT gzclose_r(file)
+    gzFile file;
+{
+    int ret, err;
+    gz_statep state;
+
+    /* get internal structure */
+    if (file == NULL)
+        return Z_STREAM_ERROR;
+    state = (gz_statep)file;
+
+    /* check that we're reading */
+    if (state->mode != GZ_READ)
+        return Z_STREAM_ERROR;
+
+    /* free memory and close file */
+    if (state->size) {
+        inflateEnd(&(state->strm));
+        free(state->out);
+        free(state->in);
+    }
+    err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK;
+    gz_error(state, Z_OK, NULL);
+    free(state->path);
+    ret = close(state->fd);
+    free(state);
+    return ret ? Z_ERRNO : err;
+}
diff --git a/deps/libchdr/deps/zlib-1.2.11/gzwrite.c b/deps/libchdr/deps/zlib-1.2.11/gzwrite.c
new file mode 100644
index 00000000..c7b5651d
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/gzwrite.c
@@ -0,0 +1,665 @@
+/* gzwrite.c -- zlib functions for writing gzip files
+ * Copyright (C) 2004-2017 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include "gzguts.h"
+
+/* Local functions */
+local int gz_init OF((gz_statep));
+local int gz_comp OF((gz_statep, int));
+local int gz_zero OF((gz_statep, z_off64_t));
+local z_size_t gz_write OF((gz_statep, voidpc, z_size_t));
+
+/* Initialize state for writing a gzip file.  Mark initialization by setting
+   state->size to non-zero.  Return -1 on a memory allocation failure, or 0 on
+   success. */
+local int gz_init(state)
+    gz_statep state;
+{
+    int ret;
+    z_streamp strm = &(state->strm);
+
+    /* allocate input buffer (double size for gzprintf) */
+    state->in = (unsigned char *)malloc(state->want << 1);
+    if (state->in == NULL) {
+        gz_error(state, Z_MEM_ERROR, "out of memory");
+        return -1;
+    }
+
+    /* only need output buffer and deflate state if compressing */
+    if (!state->direct) {
+        /* allocate output buffer */
+        state->out = (unsigned char *)malloc(state->want);
+        if (state->out == NULL) {
+            free(state->in);
+            gz_error(state, Z_MEM_ERROR, "out of memory");
+            return -1;
+        }
+
+        /* allocate deflate memory, set up for gzip compression */
+        strm->zalloc = Z_NULL;
+        strm->zfree = Z_NULL;
+        strm->opaque = Z_NULL;
+        ret = deflateInit2(strm, state->level, Z_DEFLATED,
+                           MAX_WBITS + 16, DEF_MEM_LEVEL, state->strategy);
+        if (ret != Z_OK) {
+            free(state->out);
+            free(state->in);
+            gz_error(state, Z_MEM_ERROR, "out of memory");
+            return -1;
+        }
+        strm->next_in = NULL;
+    }
+
+    /* mark state as initialized */
+    state->size = state->want;
+
+    /* initialize write buffer if compressing */
+    if (!state->direct) {
+        strm->avail_out = state->size;
+        strm->next_out = state->out;
+        state->x.next = strm->next_out;
+    }
+    return 0;
+}
+
+/* Compress whatever is at avail_in and next_in and write to the output file.
+   Return -1 if there is an error writing to the output file or if gz_init()
+   fails to allocate memory, otherwise 0.  flush is assumed to be a valid
+   deflate() flush value.  If flush is Z_FINISH, then the deflate() state is
+   reset to start a new gzip stream.  If gz->direct is true, then simply write
+   to the output file without compressing, and ignore flush. */
+local int gz_comp(state, flush)
+    gz_statep state;
+    int flush;
+{
+    int ret, writ;
+    unsigned have, put, max = ((unsigned)-1 >> 2) + 1;
+    z_streamp strm = &(state->strm);
+
+    /* allocate memory if this is the first time through */
+    if (state->size == 0 && gz_init(state) == -1)
+        return -1;
+
+    /* write directly if requested */
+    if (state->direct) {
+        while (strm->avail_in) {
+            put = strm->avail_in > max ? max : strm->avail_in;
+            writ = write(state->fd, strm->next_in, put);
+            if (writ < 0) {
+                gz_error(state, Z_ERRNO, zstrerror());
+                return -1;
+            }
+            strm->avail_in -= (unsigned)writ;
+            strm->next_in += writ;
+        }
+        return 0;
+    }
+
+    /* run deflate() on provided input until it produces no more output */
+    ret = Z_OK;
+    do {
+        /* write out current buffer contents if full, or if flushing, but if
+           doing Z_FINISH then don't write until we get to Z_STREAM_END */
+        if (strm->avail_out == 0 || (flush != Z_NO_FLUSH &&
+            (flush != Z_FINISH || ret == Z_STREAM_END))) {
+            while (strm->next_out > state->x.next) {
+                put = strm->next_out - state->x.next > (int)max ? max :
+                      (unsigned)(strm->next_out - state->x.next);
+                writ = write(state->fd, state->x.next, put);
+                if (writ < 0) {
+                    gz_error(state, Z_ERRNO, zstrerror());
+                    return -1;
+                }
+                state->x.next += writ;
+            }
+            if (strm->avail_out == 0) {
+                strm->avail_out = state->size;
+                strm->next_out = state->out;
+                state->x.next = state->out;
+            }
+        }
+
+        /* compress */
+        have = strm->avail_out;
+        ret = deflate(strm, flush);
+        if (ret == Z_STREAM_ERROR) {
+            gz_error(state, Z_STREAM_ERROR,
+                      "internal error: deflate stream corrupt");
+            return -1;
+        }
+        have -= strm->avail_out;
+    } while (have);
+
+    /* if that completed a deflate stream, allow another to start */
+    if (flush == Z_FINISH)
+        deflateReset(strm);
+
+    /* all done, no errors */
+    return 0;
+}
+
+/* Compress len zeros to output.  Return -1 on a write error or memory
+   allocation failure by gz_comp(), or 0 on success. */
+local int gz_zero(state, len)
+    gz_statep state;
+    z_off64_t len;
+{
+    int first;
+    unsigned n;
+    z_streamp strm = &(state->strm);
+
+    /* consume whatever's left in the input buffer */
+    if (strm->avail_in && gz_comp(state, Z_NO_FLUSH) == -1)
+        return -1;
+
+    /* compress len zeros (len guaranteed > 0) */
+    first = 1;
+    while (len) {
+        n = GT_OFF(state->size) || (z_off64_t)state->size > len ?
+            (unsigned)len : state->size;
+        if (first) {
+            memset(state->in, 0, n);
+            first = 0;
+        }
+        strm->avail_in = n;
+        strm->next_in = state->in;
+        state->x.pos += n;
+        if (gz_comp(state, Z_NO_FLUSH) == -1)
+            return -1;
+        len -= n;
+    }
+    return 0;
+}
+
+/* Write len bytes from buf to file.  Return the number of bytes written.  If
+   the returned value is less than len, then there was an error. */
+local z_size_t gz_write(state, buf, len)
+    gz_statep state;
+    voidpc buf;
+    z_size_t len;
+{
+    z_size_t put = len;
+
+    /* if len is zero, avoid unnecessary operations */
+    if (len == 0)
+        return 0;
+
+    /* allocate memory if this is the first time through */
+    if (state->size == 0 && gz_init(state) == -1)
+        return 0;
+
+    /* check for seek request */
+    if (state->seek) {
+        state->seek = 0;
+        if (gz_zero(state, state->skip) == -1)
+            return 0;
+    }
+
+    /* for small len, copy to input buffer, otherwise compress directly */
+    if (len < state->size) {
+        /* copy to input buffer, compress when full */
+        do {
+            unsigned have, copy;
+
+            if (state->strm.avail_in == 0)
+                state->strm.next_in = state->in;
+            have = (unsigned)((state->strm.next_in + state->strm.avail_in) -
+                              state->in);
+            copy = state->size - have;
+            if (copy > len)
+                copy = len;
+            memcpy(state->in + have, buf, copy);
+            state->strm.avail_in += copy;
+            state->x.pos += copy;
+            buf = (const char *)buf + copy;
+            len -= copy;
+            if (len && gz_comp(state, Z_NO_FLUSH) == -1)
+                return 0;
+        } while (len);
+    }
+    else {
+        /* consume whatever's left in the input buffer */
+        if (state->strm.avail_in && gz_comp(state, Z_NO_FLUSH) == -1)
+            return 0;
+
+        /* directly compress user buffer to file */
+        state->strm.next_in = (z_const Bytef *)buf;
+        do {
+            unsigned n = (unsigned)-1;
+            if (n > len)
+                n = len;
+            state->strm.avail_in = n;
+            state->x.pos += n;
+            if (gz_comp(state, Z_NO_FLUSH) == -1)
+                return 0;
+            len -= n;
+        } while (len);
+    }
+
+    /* input was all buffered or compressed */
+    return put;
+}
+
+/* -- see zlib.h -- */
+int ZEXPORT gzwrite(file, buf, len)
+    gzFile file;
+    voidpc buf;
+    unsigned len;
+{
+    gz_statep state;
+
+    /* get internal structure */
+    if (file == NULL)
+        return 0;
+    state = (gz_statep)file;
+
+    /* check that we're writing and that there's no error */
+    if (state->mode != GZ_WRITE || state->err != Z_OK)
+        return 0;
+
+    /* since an int is returned, make sure len fits in one, otherwise return
+       with an error (this avoids a flaw in the interface) */
+    if ((int)len < 0) {
+        gz_error(state, Z_DATA_ERROR, "requested length does not fit in int");
+        return 0;
+    }
+
+    /* write len bytes from buf (the return value will fit in an int) */
+    return (int)gz_write(state, buf, len);
+}
+
+/* -- see zlib.h -- */
+z_size_t ZEXPORT gzfwrite(buf, size, nitems, file)
+    voidpc buf;
+    z_size_t size;
+    z_size_t nitems;
+    gzFile file;
+{
+    z_size_t len;
+    gz_statep state;
+
+    /* get internal structure */
+    if (file == NULL)
+        return 0;
+    state = (gz_statep)file;
+
+    /* check that we're writing and that there's no error */
+    if (state->mode != GZ_WRITE || state->err != Z_OK)
+        return 0;
+
+    /* compute bytes to read -- error on overflow */
+    len = nitems * size;
+    if (size && len / size != nitems) {
+        gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t");
+        return 0;
+    }
+
+    /* write len bytes to buf, return the number of full items written */
+    return len ? gz_write(state, buf, len) / size : 0;
+}
+
+/* -- see zlib.h -- */
+int ZEXPORT gzputc(file, c)
+    gzFile file;
+    int c;
+{
+    unsigned have;
+    unsigned char buf[1];
+    gz_statep state;
+    z_streamp strm;
+
+    /* get internal structure */
+    if (file == NULL)
+        return -1;
+    state = (gz_statep)file;
+    strm = &(state->strm);
+
+    /* check that we're writing and that there's no error */
+    if (state->mode != GZ_WRITE || state->err != Z_OK)
+        return -1;
+
+    /* check for seek request */
+    if (state->seek) {
+        state->seek = 0;
+        if (gz_zero(state, state->skip) == -1)
+            return -1;
+    }
+
+    /* try writing to input buffer for speed (state->size == 0 if buffer not
+       initialized) */
+    if (state->size) {
+        if (strm->avail_in == 0)
+            strm->next_in = state->in;
+        have = (unsigned)((strm->next_in + strm->avail_in) - state->in);
+        if (have < state->size) {
+            state->in[have] = (unsigned char)c;
+            strm->avail_in++;
+            state->x.pos++;
+            return c & 0xff;
+        }
+    }
+
+    /* no room in buffer or not initialized, use gz_write() */
+    buf[0] = (unsigned char)c;
+    if (gz_write(state, buf, 1) != 1)
+        return -1;
+    return c & 0xff;
+}
+
+/* -- see zlib.h -- */
+int ZEXPORT gzputs(file, str)
+    gzFile file;
+    const char *str;
+{
+    int ret;
+    z_size_t len;
+    gz_statep state;
+
+    /* get internal structure */
+    if (file == NULL)
+        return -1;
+    state = (gz_statep)file;
+
+    /* check that we're writing and that there's no error */
+    if (state->mode != GZ_WRITE || state->err != Z_OK)
+        return -1;
+
+    /* write string */
+    len = strlen(str);
+    ret = gz_write(state, str, len);
+    return ret == 0 && len != 0 ? -1 : ret;
+}
+
+#if defined(STDC) || defined(Z_HAVE_STDARG_H)
+#include <stdarg.h>
+
+/* -- see zlib.h -- */
+int ZEXPORTVA gzvprintf(gzFile file, const char *format, va_list va)
+{
+    int len;
+    unsigned left;
+    char *next;
+    gz_statep state;
+    z_streamp strm;
+
+    /* get internal structure */
+    if (file == NULL)
+        return Z_STREAM_ERROR;
+    state = (gz_statep)file;
+    strm = &(state->strm);
+
+    /* check that we're writing and that there's no error */
+    if (state->mode != GZ_WRITE || state->err != Z_OK)
+        return Z_STREAM_ERROR;
+
+    /* make sure we have some buffer space */
+    if (state->size == 0 && gz_init(state) == -1)
+        return state->err;
+
+    /* check for seek request */
+    if (state->seek) {
+        state->seek = 0;
+        if (gz_zero(state, state->skip) == -1)
+            return state->err;
+    }
+
+    /* do the printf() into the input buffer, put length in len -- the input
+       buffer is double-sized just for this function, so there is guaranteed to
+       be state->size bytes available after the current contents */
+    if (strm->avail_in == 0)
+        strm->next_in = state->in;
+    next = (char *)(state->in + (strm->next_in - state->in) + strm->avail_in);
+    next[state->size - 1] = 0;
+#ifdef NO_vsnprintf
+#  ifdef HAS_vsprintf_void
+    (void)vsprintf(next, format, va);
+    for (len = 0; len < state->size; len++)
+        if (next[len] == 0) break;
+#  else
+    len = vsprintf(next, format, va);
+#  endif
+#else
+#  ifdef HAS_vsnprintf_void
+    (void)vsnprintf(next, state->size, format, va);
+    len = strlen(next);
+#  else
+    len = vsnprintf(next, state->size, format, va);
+#  endif
+#endif
+
+    /* check that printf() results fit in buffer */
+    if (len == 0 || (unsigned)len >= state->size || next[state->size - 1] != 0)
+        return 0;
+
+    /* update buffer and position, compress first half if past that */
+    strm->avail_in += (unsigned)len;
+    state->x.pos += len;
+    if (strm->avail_in >= state->size) {
+        left = strm->avail_in - state->size;
+        strm->avail_in = state->size;
+        if (gz_comp(state, Z_NO_FLUSH) == -1)
+            return state->err;
+        memcpy(state->in, state->in + state->size, left);
+        strm->next_in = state->in;
+        strm->avail_in = left;
+    }
+    return len;
+}
+
+int ZEXPORTVA gzprintf(gzFile file, const char *format, ...)
+{
+    va_list va;
+    int ret;
+
+    va_start(va, format);
+    ret = gzvprintf(file, format, va);
+    va_end(va);
+    return ret;
+}
+
+#else /* !STDC && !Z_HAVE_STDARG_H */
+
+/* -- see zlib.h -- */
+int ZEXPORTVA gzprintf (file, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10,
+                       a11, a12, a13, a14, a15, a16, a17, a18, a19, a20)
+    gzFile file;
+    const char *format;
+    int a1, a2, a3, a4, a5, a6, a7, a8, a9, a10,
+        a11, a12, a13, a14, a15, a16, a17, a18, a19, a20;
+{
+    unsigned len, left;
+    char *next;
+    gz_statep state;
+    z_streamp strm;
+
+    /* get internal structure */
+    if (file == NULL)
+        return Z_STREAM_ERROR;
+    state = (gz_statep)file;
+    strm = &(state->strm);
+
+    /* check that can really pass pointer in ints */
+    if (sizeof(int) != sizeof(void *))
+        return Z_STREAM_ERROR;
+
+    /* check that we're writing and that there's no error */
+    if (state->mode != GZ_WRITE || state->err != Z_OK)
+        return Z_STREAM_ERROR;
+
+    /* make sure we have some buffer space */
+    if (state->size == 0 && gz_init(state) == -1)
+        return state->error;
+
+    /* check for seek request */
+    if (state->seek) {
+        state->seek = 0;
+        if (gz_zero(state, state->skip) == -1)
+            return state->error;
+    }
+
+    /* do the printf() into the input buffer, put length in len -- the input
+       buffer is double-sized just for this function, so there is guaranteed to
+       be state->size bytes available after the current contents */
+    if (strm->avail_in == 0)
+        strm->next_in = state->in;
+    next = (char *)(strm->next_in + strm->avail_in);
+    next[state->size - 1] = 0;
+#ifdef NO_snprintf
+#  ifdef HAS_sprintf_void
+    sprintf(next, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12,
+            a13, a14, a15, a16, a17, a18, a19, a20);
+    for (len = 0; len < size; len++)
+        if (next[len] == 0)
+            break;
+#  else
+    len = sprintf(next, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11,
+                  a12, a13, a14, a15, a16, a17, a18, a19, a20);
+#  endif
+#else
+#  ifdef HAS_snprintf_void
+    snprintf(next, state->size, format, a1, a2, a3, a4, a5, a6, a7, a8, a9,
+             a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20);
+    len = strlen(next);
+#  else
+    len = snprintf(next, state->size, format, a1, a2, a3, a4, a5, a6, a7, a8,
+                   a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20);
+#  endif
+#endif
+
+    /* check that printf() results fit in buffer */
+    if (len == 0 || len >= state->size || next[state->size - 1] != 0)
+        return 0;
+
+    /* update buffer and position, compress first half if past that */
+    strm->avail_in += len;
+    state->x.pos += len;
+    if (strm->avail_in >= state->size) {
+        left = strm->avail_in - state->size;
+        strm->avail_in = state->size;
+        if (gz_comp(state, Z_NO_FLUSH) == -1)
+            return state->err;
+        memcpy(state->in, state->in + state->size, left);
+        strm->next_in = state->in;
+        strm->avail_in = left;
+    }
+    return (int)len;
+}
+
+#endif
+
+/* -- see zlib.h -- */
+int ZEXPORT gzflush(file, flush)
+    gzFile file;
+    int flush;
+{
+    gz_statep state;
+
+    /* get internal structure */
+    if (file == NULL)
+        return Z_STREAM_ERROR;
+    state = (gz_statep)file;
+
+    /* check that we're writing and that there's no error */
+    if (state->mode != GZ_WRITE || state->err != Z_OK)
+        return Z_STREAM_ERROR;
+
+    /* check flush parameter */
+    if (flush < 0 || flush > Z_FINISH)
+        return Z_STREAM_ERROR;
+
+    /* check for seek request */
+    if (state->seek) {
+        state->seek = 0;
+        if (gz_zero(state, state->skip) == -1)
+            return state->err;
+    }
+
+    /* compress remaining data with requested flush */
+    (void)gz_comp(state, flush);
+    return state->err;
+}
+
+/* -- see zlib.h -- */
+int ZEXPORT gzsetparams(file, level, strategy)
+    gzFile file;
+    int level;
+    int strategy;
+{
+    gz_statep state;
+    z_streamp strm;
+
+    /* get internal structure */
+    if (file == NULL)
+        return Z_STREAM_ERROR;
+    state = (gz_statep)file;
+    strm = &(state->strm);
+
+    /* check that we're writing and that there's no error */
+    if (state->mode != GZ_WRITE || state->err != Z_OK)
+        return Z_STREAM_ERROR;
+
+    /* if no change is requested, then do nothing */
+    if (level == state->level && strategy == state->strategy)
+        return Z_OK;
+
+    /* check for seek request */
+    if (state->seek) {
+        state->seek = 0;
+        if (gz_zero(state, state->skip) == -1)
+            return state->err;
+    }
+
+    /* change compression parameters for subsequent input */
+    if (state->size) {
+        /* flush previous input with previous parameters before changing */
+        if (strm->avail_in && gz_comp(state, Z_BLOCK) == -1)
+            return state->err;
+        deflateParams(strm, level, strategy);
+    }
+    state->level = level;
+    state->strategy = strategy;
+    return Z_OK;
+}
+
+/* -- see zlib.h -- */
+int ZEXPORT gzclose_w(file)
+    gzFile file;
+{
+    int ret = Z_OK;
+    gz_statep state;
+
+    /* get internal structure */
+    if (file == NULL)
+        return Z_STREAM_ERROR;
+    state = (gz_statep)file;
+
+    /* check that we're writing */
+    if (state->mode != GZ_WRITE)
+        return Z_STREAM_ERROR;
+
+    /* check for seek request */
+    if (state->seek) {
+        state->seek = 0;
+        if (gz_zero(state, state->skip) == -1)
+            ret = state->err;
+    }
+
+    /* flush, free memory, and close file */
+    if (gz_comp(state, Z_FINISH) == -1)
+        ret = state->err;
+    if (state->size) {
+        if (!state->direct) {
+            (void)deflateEnd(&(state->strm));
+            free(state->out);
+        }
+        free(state->in);
+    }
+    gz_error(state, Z_OK, NULL);
+    free(state->path);
+    if (close(state->fd) == -1)
+        ret = Z_ERRNO;
+    free(state);
+    return ret;
+}
diff --git a/deps/libchdr/deps/zlib-1.2.11/infback.c b/deps/libchdr/deps/zlib-1.2.11/infback.c
new file mode 100644
index 00000000..59679ecb
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/infback.c
@@ -0,0 +1,640 @@
+/* infback.c -- inflate using a call-back interface
+ * Copyright (C) 1995-2016 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/*
+   This code is largely copied from inflate.c.  Normally either infback.o or
+   inflate.o would be linked into an application--not both.  The interface
+   with inffast.c is retained so that optimized assembler-coded versions of
+   inflate_fast() can be used with either inflate.c or infback.c.
+ */
+
+#include "zutil.h"
+#include "inftrees.h"
+#include "inflate.h"
+#include "inffast.h"
+
+/* function prototypes */
+local void fixedtables OF((struct inflate_state FAR *state));
+
+/*
+   strm provides memory allocation functions in zalloc and zfree, or
+   Z_NULL to use the library memory allocation functions.
+
+   windowBits is in the range 8..15, and window is a user-supplied
+   window and output buffer that is 2**windowBits bytes.
+ */
+int ZEXPORT inflateBackInit_(strm, windowBits, window, version, stream_size)
+z_streamp strm;
+int windowBits;
+unsigned char FAR *window;
+const char *version;
+int stream_size;
+{
+    struct inflate_state FAR *state;
+
+    if (version == Z_NULL || version[0] != ZLIB_VERSION[0] ||
+        stream_size != (int)(sizeof(z_stream)))
+        return Z_VERSION_ERROR;
+    if (strm == Z_NULL || window == Z_NULL ||
+        windowBits < 8 || windowBits > 15)
+        return Z_STREAM_ERROR;
+    strm->msg = Z_NULL;                 /* in case we return an error */
+    if (strm->zalloc == (alloc_func)0) {
+#ifdef Z_SOLO
+        return Z_STREAM_ERROR;
+#else
+        strm->zalloc = zcalloc;
+        strm->opaque = (voidpf)0;
+#endif
+    }
+    if (strm->zfree == (free_func)0)
+#ifdef Z_SOLO
+        return Z_STREAM_ERROR;
+#else
+    strm->zfree = zcfree;
+#endif
+    state = (struct inflate_state FAR *)ZALLOC(strm, 1,
+                                               sizeof(struct inflate_state));
+    if (state == Z_NULL) return Z_MEM_ERROR;
+    Tracev((stderr, "inflate: allocated\n"));
+    strm->state = (struct internal_state FAR *)state;
+    state->dmax = 32768U;
+    state->wbits = (uInt)windowBits;
+    state->wsize = 1U << windowBits;
+    state->window = window;
+    state->wnext = 0;
+    state->whave = 0;
+    return Z_OK;
+}
+
+/*
+   Return state with length and distance decoding tables and index sizes set to
+   fixed code decoding.  Normally this returns fixed tables from inffixed.h.
+   If BUILDFIXED is defined, then instead this routine builds the tables the
+   first time it's called, and returns those tables the first time and
+   thereafter.  This reduces the size of the code by about 2K bytes, in
+   exchange for a little execution time.  However, BUILDFIXED should not be
+   used for threaded applications, since the rewriting of the tables and virgin
+   may not be thread-safe.
+ */
+local void fixedtables(state)
+struct inflate_state FAR *state;
+{
+#ifdef BUILDFIXED
+    static int virgin = 1;
+    static code *lenfix, *distfix;
+    static code fixed[544];
+
+    /* build fixed huffman tables if first call (may not be thread safe) */
+    if (virgin) {
+        unsigned sym, bits;
+        static code *next;
+
+        /* literal/length table */
+        sym = 0;
+        while (sym < 144) state->lens[sym++] = 8;
+        while (sym < 256) state->lens[sym++] = 9;
+        while (sym < 280) state->lens[sym++] = 7;
+        while (sym < 288) state->lens[sym++] = 8;
+        next = fixed;
+        lenfix = next;
+        bits = 9;
+        inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work);
+
+        /* distance table */
+        sym = 0;
+        while (sym < 32) state->lens[sym++] = 5;
+        distfix = next;
+        bits = 5;
+        inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work);
+
+        /* do this just once */
+        virgin = 0;
+    }
+#else /* !BUILDFIXED */
+#   include "inffixed.h"
+#endif /* BUILDFIXED */
+    state->lencode = lenfix;
+    state->lenbits = 9;
+    state->distcode = distfix;
+    state->distbits = 5;
+}
+
+/* Macros for inflateBack(): */
+
+/* Load returned state from inflate_fast() */
+#define LOAD() \
+    do { \
+        put = strm->next_out; \
+        left = strm->avail_out; \
+        next = strm->next_in; \
+        have = strm->avail_in; \
+        hold = state->hold; \
+        bits = state->bits; \
+    } while (0)
+
+/* Set state from registers for inflate_fast() */
+#define RESTORE() \
+    do { \
+        strm->next_out = put; \
+        strm->avail_out = left; \
+        strm->next_in = next; \
+        strm->avail_in = have; \
+        state->hold = hold; \
+        state->bits = bits; \
+    } while (0)
+
+/* Clear the input bit accumulator */
+#define INITBITS() \
+    do { \
+        hold = 0; \
+        bits = 0; \
+    } while (0)
+
+/* Assure that some input is available.  If input is requested, but denied,
+   then return a Z_BUF_ERROR from inflateBack(). */
+#define PULL() \
+    do { \
+        if (have == 0) { \
+            have = in(in_desc, &next); \
+            if (have == 0) { \
+                next = Z_NULL; \
+                ret = Z_BUF_ERROR; \
+                goto inf_leave; \
+            } \
+        } \
+    } while (0)
+
+/* Get a byte of input into the bit accumulator, or return from inflateBack()
+   with an error if there is no input available. */
+#define PULLBYTE() \
+    do { \
+        PULL(); \
+        have--; \
+        hold += (unsigned long)(*next++) << bits; \
+        bits += 8; \
+    } while (0)
+
+/* Assure that there are at least n bits in the bit accumulator.  If there is
+   not enough available input to do that, then return from inflateBack() with
+   an error. */
+#define NEEDBITS(n) \
+    do { \
+        while (bits < (unsigned)(n)) \
+            PULLBYTE(); \
+    } while (0)
+
+/* Return the low n bits of the bit accumulator (n < 16) */
+#define BITS(n) \
+    ((unsigned)hold & ((1U << (n)) - 1))
+
+/* Remove n bits from the bit accumulator */
+#define DROPBITS(n) \
+    do { \
+        hold >>= (n); \
+        bits -= (unsigned)(n); \
+    } while (0)
+
+/* Remove zero to seven bits as needed to go to a byte boundary */
+#define BYTEBITS() \
+    do { \
+        hold >>= bits & 7; \
+        bits -= bits & 7; \
+    } while (0)
+
+/* Assure that some output space is available, by writing out the window
+   if it's full.  If the write fails, return from inflateBack() with a
+   Z_BUF_ERROR. */
+#define ROOM() \
+    do { \
+        if (left == 0) { \
+            put = state->window; \
+            left = state->wsize; \
+            state->whave = left; \
+            if (out(out_desc, put, left)) { \
+                ret = Z_BUF_ERROR; \
+                goto inf_leave; \
+            } \
+        } \
+    } while (0)
+
+/*
+   strm provides the memory allocation functions and window buffer on input,
+   and provides information on the unused input on return.  For Z_DATA_ERROR
+   returns, strm will also provide an error message.
+
+   in() and out() are the call-back input and output functions.  When
+   inflateBack() needs more input, it calls in().  When inflateBack() has
+   filled the window with output, or when it completes with data in the
+   window, it calls out() to write out the data.  The application must not
+   change the provided input until in() is called again or inflateBack()
+   returns.  The application must not change the window/output buffer until
+   inflateBack() returns.
+
+   in() and out() are called with a descriptor parameter provided in the
+   inflateBack() call.  This parameter can be a structure that provides the
+   information required to do the read or write, as well as accumulated
+   information on the input and output such as totals and check values.
+
+   in() should return zero on failure.  out() should return non-zero on
+   failure.  If either in() or out() fails, than inflateBack() returns a
+   Z_BUF_ERROR.  strm->next_in can be checked for Z_NULL to see whether it
+   was in() or out() that caused in the error.  Otherwise,  inflateBack()
+   returns Z_STREAM_END on success, Z_DATA_ERROR for an deflate format
+   error, or Z_MEM_ERROR if it could not allocate memory for the state.
+   inflateBack() can also return Z_STREAM_ERROR if the input parameters
+   are not correct, i.e. strm is Z_NULL or the state was not initialized.
+ */
+int ZEXPORT inflateBack(strm, in, in_desc, out, out_desc)
+z_streamp strm;
+in_func in;
+void FAR *in_desc;
+out_func out;
+void FAR *out_desc;
+{
+    struct inflate_state FAR *state;
+    z_const unsigned char FAR *next;    /* next input */
+    unsigned char FAR *put;     /* next output */
+    unsigned have, left;        /* available input and output */
+    unsigned long hold;         /* bit buffer */
+    unsigned bits;              /* bits in bit buffer */
+    unsigned copy;              /* number of stored or match bytes to copy */
+    unsigned char FAR *from;    /* where to copy match bytes from */
+    code here;                  /* current decoding table entry */
+    code last;                  /* parent table entry */
+    unsigned len;               /* length to copy for repeats, bits to drop */
+    int ret;                    /* return code */
+    static const unsigned short order[19] = /* permutation of code lengths */
+        {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
+
+    /* Check that the strm exists and that the state was initialized */
+    if (strm == Z_NULL || strm->state == Z_NULL)
+        return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+
+    /* Reset the state */
+    strm->msg = Z_NULL;
+    state->mode = TYPE;
+    state->last = 0;
+    state->whave = 0;
+    next = strm->next_in;
+    have = next != Z_NULL ? strm->avail_in : 0;
+    hold = 0;
+    bits = 0;
+    put = state->window;
+    left = state->wsize;
+
+    /* Inflate until end of block marked as last */
+    for (;;)
+        switch (state->mode) {
+        case TYPE:
+            /* determine and dispatch block type */
+            if (state->last) {
+                BYTEBITS();
+                state->mode = DONE;
+                break;
+            }
+            NEEDBITS(3);
+            state->last = BITS(1);
+            DROPBITS(1);
+            switch (BITS(2)) {
+            case 0:                             /* stored block */
+                Tracev((stderr, "inflate:     stored block%s\n",
+                        state->last ? " (last)" : ""));
+                state->mode = STORED;
+                break;
+            case 1:                             /* fixed block */
+                fixedtables(state);
+                Tracev((stderr, "inflate:     fixed codes block%s\n",
+                        state->last ? " (last)" : ""));
+                state->mode = LEN;              /* decode codes */
+                break;
+            case 2:                             /* dynamic block */
+                Tracev((stderr, "inflate:     dynamic codes block%s\n",
+                        state->last ? " (last)" : ""));
+                state->mode = TABLE;
+                break;
+            case 3:
+                strm->msg = (char *)"invalid block type";
+                state->mode = BAD;
+            }
+            DROPBITS(2);
+            break;
+
+        case STORED:
+            /* get and verify stored block length */
+            BYTEBITS();                         /* go to byte boundary */
+            NEEDBITS(32);
+            if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) {
+                strm->msg = (char *)"invalid stored block lengths";
+                state->mode = BAD;
+                break;
+            }
+            state->length = (unsigned)hold & 0xffff;
+            Tracev((stderr, "inflate:       stored length %u\n",
+                    state->length));
+            INITBITS();
+
+            /* copy stored block from input to output */
+            while (state->length != 0) {
+                copy = state->length;
+                PULL();
+                ROOM();
+                if (copy > have) copy = have;
+                if (copy > left) copy = left;
+                zmemcpy(put, next, copy);
+                have -= copy;
+                next += copy;
+                left -= copy;
+                put += copy;
+                state->length -= copy;
+            }
+            Tracev((stderr, "inflate:       stored end\n"));
+            state->mode = TYPE;
+            break;
+
+        case TABLE:
+            /* get dynamic table entries descriptor */
+            NEEDBITS(14);
+            state->nlen = BITS(5) + 257;
+            DROPBITS(5);
+            state->ndist = BITS(5) + 1;
+            DROPBITS(5);
+            state->ncode = BITS(4) + 4;
+            DROPBITS(4);
+#ifndef PKZIP_BUG_WORKAROUND
+            if (state->nlen > 286 || state->ndist > 30) {
+                strm->msg = (char *)"too many length or distance symbols";
+                state->mode = BAD;
+                break;
+            }
+#endif
+            Tracev((stderr, "inflate:       table sizes ok\n"));
+
+            /* get code length code lengths (not a typo) */
+            state->have = 0;
+            while (state->have < state->ncode) {
+                NEEDBITS(3);
+                state->lens[order[state->have++]] = (unsigned short)BITS(3);
+                DROPBITS(3);
+            }
+            while (state->have < 19)
+                state->lens[order[state->have++]] = 0;
+            state->next = state->codes;
+            state->lencode = (code const FAR *)(state->next);
+            state->lenbits = 7;
+            ret = inflate_table(CODES, state->lens, 19, &(state->next),
+                                &(state->lenbits), state->work);
+            if (ret) {
+                strm->msg = (char *)"invalid code lengths set";
+                state->mode = BAD;
+                break;
+            }
+            Tracev((stderr, "inflate:       code lengths ok\n"));
+
+            /* get length and distance code code lengths */
+            state->have = 0;
+            while (state->have < state->nlen + state->ndist) {
+                for (;;) {
+                    here = state->lencode[BITS(state->lenbits)];
+                    if ((unsigned)(here.bits) <= bits) break;
+                    PULLBYTE();
+                }
+                if (here.val < 16) {
+                    DROPBITS(here.bits);
+                    state->lens[state->have++] = here.val;
+                }
+                else {
+                    if (here.val == 16) {
+                        NEEDBITS(here.bits + 2);
+                        DROPBITS(here.bits);
+                        if (state->have == 0) {
+                            strm->msg = (char *)"invalid bit length repeat";
+                            state->mode = BAD;
+                            break;
+                        }
+                        len = (unsigned)(state->lens[state->have - 1]);
+                        copy = 3 + BITS(2);
+                        DROPBITS(2);
+                    }
+                    else if (here.val == 17) {
+                        NEEDBITS(here.bits + 3);
+                        DROPBITS(here.bits);
+                        len = 0;
+                        copy = 3 + BITS(3);
+                        DROPBITS(3);
+                    }
+                    else {
+                        NEEDBITS(here.bits + 7);
+                        DROPBITS(here.bits);
+                        len = 0;
+                        copy = 11 + BITS(7);
+                        DROPBITS(7);
+                    }
+                    if (state->have + copy > state->nlen + state->ndist) {
+                        strm->msg = (char *)"invalid bit length repeat";
+                        state->mode = BAD;
+                        break;
+                    }
+                    while (copy--)
+                        state->lens[state->have++] = (unsigned short)len;
+                }
+            }
+
+            /* handle error breaks in while */
+            if (state->mode == BAD) break;
+
+            /* check for end-of-block code (better have one) */
+            if (state->lens[256] == 0) {
+                strm->msg = (char *)"invalid code -- missing end-of-block";
+                state->mode = BAD;
+                break;
+            }
+
+            /* build code tables -- note: do not change the lenbits or distbits
+               values here (9 and 6) without reading the comments in inftrees.h
+               concerning the ENOUGH constants, which depend on those values */
+            state->next = state->codes;
+            state->lencode = (code const FAR *)(state->next);
+            state->lenbits = 9;
+            ret = inflate_table(LENS, state->lens, state->nlen, &(state->next),
+                                &(state->lenbits), state->work);
+            if (ret) {
+                strm->msg = (char *)"invalid literal/lengths set";
+                state->mode = BAD;
+                break;
+            }
+            state->distcode = (code const FAR *)(state->next);
+            state->distbits = 6;
+            ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist,
+                            &(state->next), &(state->distbits), state->work);
+            if (ret) {
+                strm->msg = (char *)"invalid distances set";
+                state->mode = BAD;
+                break;
+            }
+            Tracev((stderr, "inflate:       codes ok\n"));
+            state->mode = LEN;
+
+        case LEN:
+            /* use inflate_fast() if we have enough input and output */
+            if (have >= 6 && left >= 258) {
+                RESTORE();
+                if (state->whave < state->wsize)
+                    state->whave = state->wsize - left;
+                inflate_fast(strm, state->wsize);
+                LOAD();
+                break;
+            }
+
+            /* get a literal, length, or end-of-block code */
+            for (;;) {
+                here = state->lencode[BITS(state->lenbits)];
+                if ((unsigned)(here.bits) <= bits) break;
+                PULLBYTE();
+            }
+            if (here.op && (here.op & 0xf0) == 0) {
+                last = here;
+                for (;;) {
+                    here = state->lencode[last.val +
+                            (BITS(last.bits + last.op) >> last.bits)];
+                    if ((unsigned)(last.bits + here.bits) <= bits) break;
+                    PULLBYTE();
+                }
+                DROPBITS(last.bits);
+            }
+            DROPBITS(here.bits);
+            state->length = (unsigned)here.val;
+
+            /* process literal */
+            if (here.op == 0) {
+                Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
+                        "inflate:         literal '%c'\n" :
+                        "inflate:         literal 0x%02x\n", here.val));
+                ROOM();
+                *put++ = (unsigned char)(state->length);
+                left--;
+                state->mode = LEN;
+                break;
+            }
+
+            /* process end of block */
+            if (here.op & 32) {
+                Tracevv((stderr, "inflate:         end of block\n"));
+                state->mode = TYPE;
+                break;
+            }
+
+            /* invalid code */
+            if (here.op & 64) {
+                strm->msg = (char *)"invalid literal/length code";
+                state->mode = BAD;
+                break;
+            }
+
+            /* length code -- get extra bits, if any */
+            state->extra = (unsigned)(here.op) & 15;
+            if (state->extra != 0) {
+                NEEDBITS(state->extra);
+                state->length += BITS(state->extra);
+                DROPBITS(state->extra);
+            }
+            Tracevv((stderr, "inflate:         length %u\n", state->length));
+
+            /* get distance code */
+            for (;;) {
+                here = state->distcode[BITS(state->distbits)];
+                if ((unsigned)(here.bits) <= bits) break;
+                PULLBYTE();
+            }
+            if ((here.op & 0xf0) == 0) {
+                last = here;
+                for (;;) {
+                    here = state->distcode[last.val +
+                            (BITS(last.bits + last.op) >> last.bits)];
+                    if ((unsigned)(last.bits + here.bits) <= bits) break;
+                    PULLBYTE();
+                }
+                DROPBITS(last.bits);
+            }
+            DROPBITS(here.bits);
+            if (here.op & 64) {
+                strm->msg = (char *)"invalid distance code";
+                state->mode = BAD;
+                break;
+            }
+            state->offset = (unsigned)here.val;
+
+            /* get distance extra bits, if any */
+            state->extra = (unsigned)(here.op) & 15;
+            if (state->extra != 0) {
+                NEEDBITS(state->extra);
+                state->offset += BITS(state->extra);
+                DROPBITS(state->extra);
+            }
+            if (state->offset > state->wsize - (state->whave < state->wsize ?
+                                                left : 0)) {
+                strm->msg = (char *)"invalid distance too far back";
+                state->mode = BAD;
+                break;
+            }
+            Tracevv((stderr, "inflate:         distance %u\n", state->offset));
+
+            /* copy match from window to output */
+            do {
+                ROOM();
+                copy = state->wsize - state->offset;
+                if (copy < left) {
+                    from = put + copy;
+                    copy = left - copy;
+                }
+                else {
+                    from = put - state->offset;
+                    copy = left;
+                }
+                if (copy > state->length) copy = state->length;
+                state->length -= copy;
+                left -= copy;
+                do {
+                    *put++ = *from++;
+                } while (--copy);
+            } while (state->length != 0);
+            break;
+
+        case DONE:
+            /* inflate stream terminated properly -- write leftover output */
+            ret = Z_STREAM_END;
+            if (left < state->wsize) {
+                if (out(out_desc, state->window, state->wsize - left))
+                    ret = Z_BUF_ERROR;
+            }
+            goto inf_leave;
+
+        case BAD:
+            ret = Z_DATA_ERROR;
+            goto inf_leave;
+
+        default:                /* can't happen, but makes compilers happy */
+            ret = Z_STREAM_ERROR;
+            goto inf_leave;
+        }
+
+    /* Return unused input */
+  inf_leave:
+    strm->next_in = next;
+    strm->avail_in = have;
+    return ret;
+}
+
+int ZEXPORT inflateBackEnd(strm)
+z_streamp strm;
+{
+    if (strm == Z_NULL || strm->state == Z_NULL || strm->zfree == (free_func)0)
+        return Z_STREAM_ERROR;
+    ZFREE(strm, strm->state);
+    strm->state = Z_NULL;
+    Tracev((stderr, "inflate: end\n"));
+    return Z_OK;
+}
diff --git a/deps/libchdr/deps/zlib-1.2.11/inffast.c b/deps/libchdr/deps/zlib-1.2.11/inffast.c
new file mode 100644
index 00000000..0dbd1dbc
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/inffast.c
@@ -0,0 +1,323 @@
+/* inffast.c -- fast decoding
+ * Copyright (C) 1995-2017 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include "zutil.h"
+#include "inftrees.h"
+#include "inflate.h"
+#include "inffast.h"
+
+#ifdef ASMINF
+#  pragma message("Assembler code may have bugs -- use at your own risk")
+#else
+
+/*
+   Decode literal, length, and distance codes and write out the resulting
+   literal and match bytes until either not enough input or output is
+   available, an end-of-block is encountered, or a data error is encountered.
+   When large enough input and output buffers are supplied to inflate(), for
+   example, a 16K input buffer and a 64K output buffer, more than 95% of the
+   inflate execution time is spent in this routine.
+
+   Entry assumptions:
+
+        state->mode == LEN
+        strm->avail_in >= 6
+        strm->avail_out >= 258
+        start >= strm->avail_out
+        state->bits < 8
+
+   On return, state->mode is one of:
+
+        LEN -- ran out of enough output space or enough available input
+        TYPE -- reached end of block code, inflate() to interpret next block
+        BAD -- error in block data
+
+   Notes:
+
+    - The maximum input bits used by a length/distance pair is 15 bits for the
+      length code, 5 bits for the length extra, 15 bits for the distance code,
+      and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
+      Therefore if strm->avail_in >= 6, then there is enough input to avoid
+      checking for available input while decoding.
+
+    - The maximum bytes that a single length/distance pair can output is 258
+      bytes, which is the maximum length that can be coded.  inflate_fast()
+      requires strm->avail_out >= 258 for each loop to avoid checking for
+      output space.
+ */
+void ZLIB_INTERNAL inflate_fast(strm, start)
+z_streamp strm;
+unsigned start;         /* inflate()'s starting value for strm->avail_out */
+{
+    struct inflate_state FAR *state;
+    z_const unsigned char FAR *in;      /* local strm->next_in */
+    z_const unsigned char FAR *last;    /* have enough input while in < last */
+    unsigned char FAR *out;     /* local strm->next_out */
+    unsigned char FAR *beg;     /* inflate()'s initial strm->next_out */
+    unsigned char FAR *end;     /* while out < end, enough space available */
+#ifdef INFLATE_STRICT
+    unsigned dmax;              /* maximum distance from zlib header */
+#endif
+    unsigned wsize;             /* window size or zero if not using window */
+    unsigned whave;             /* valid bytes in the window */
+    unsigned wnext;             /* window write index */
+    unsigned char FAR *window;  /* allocated sliding window, if wsize != 0 */
+    unsigned long hold;         /* local strm->hold */
+    unsigned bits;              /* local strm->bits */
+    code const FAR *lcode;      /* local strm->lencode */
+    code const FAR *dcode;      /* local strm->distcode */
+    unsigned lmask;             /* mask for first level of length codes */
+    unsigned dmask;             /* mask for first level of distance codes */
+    code here;                  /* retrieved table entry */
+    unsigned op;                /* code bits, operation, extra bits, or */
+                                /*  window position, window bytes to copy */
+    unsigned len;               /* match length, unused bytes */
+    unsigned dist;              /* match distance */
+    unsigned char FAR *from;    /* where to copy match from */
+
+    /* copy state to local variables */
+    state = (struct inflate_state FAR *)strm->state;
+    in = strm->next_in;
+    last = in + (strm->avail_in - 5);
+    out = strm->next_out;
+    beg = out - (start - strm->avail_out);
+    end = out + (strm->avail_out - 257);
+#ifdef INFLATE_STRICT
+    dmax = state->dmax;
+#endif
+    wsize = state->wsize;
+    whave = state->whave;
+    wnext = state->wnext;
+    window = state->window;
+    hold = state->hold;
+    bits = state->bits;
+    lcode = state->lencode;
+    dcode = state->distcode;
+    lmask = (1U << state->lenbits) - 1;
+    dmask = (1U << state->distbits) - 1;
+
+    /* decode literals and length/distances until end-of-block or not enough
+       input data or output space */
+    do {
+        if (bits < 15) {
+            hold += (unsigned long)(*in++) << bits;
+            bits += 8;
+            hold += (unsigned long)(*in++) << bits;
+            bits += 8;
+        }
+        here = lcode[hold & lmask];
+      dolen:
+        op = (unsigned)(here.bits);
+        hold >>= op;
+        bits -= op;
+        op = (unsigned)(here.op);
+        if (op == 0) {                          /* literal */
+            Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
+                    "inflate:         literal '%c'\n" :
+                    "inflate:         literal 0x%02x\n", here.val));
+            *out++ = (unsigned char)(here.val);
+        }
+        else if (op & 16) {                     /* length base */
+            len = (unsigned)(here.val);
+            op &= 15;                           /* number of extra bits */
+            if (op) {
+                if (bits < op) {
+                    hold += (unsigned long)(*in++) << bits;
+                    bits += 8;
+                }
+                len += (unsigned)hold & ((1U << op) - 1);
+                hold >>= op;
+                bits -= op;
+            }
+            Tracevv((stderr, "inflate:         length %u\n", len));
+            if (bits < 15) {
+                hold += (unsigned long)(*in++) << bits;
+                bits += 8;
+                hold += (unsigned long)(*in++) << bits;
+                bits += 8;
+            }
+            here = dcode[hold & dmask];
+          dodist:
+            op = (unsigned)(here.bits);
+            hold >>= op;
+            bits -= op;
+            op = (unsigned)(here.op);
+            if (op & 16) {                      /* distance base */
+                dist = (unsigned)(here.val);
+                op &= 15;                       /* number of extra bits */
+                if (bits < op) {
+                    hold += (unsigned long)(*in++) << bits;
+                    bits += 8;
+                    if (bits < op) {
+                        hold += (unsigned long)(*in++) << bits;
+                        bits += 8;
+                    }
+                }
+                dist += (unsigned)hold & ((1U << op) - 1);
+#ifdef INFLATE_STRICT
+                if (dist > dmax) {
+                    strm->msg = (char *)"invalid distance too far back";
+                    state->mode = BAD;
+                    break;
+                }
+#endif
+                hold >>= op;
+                bits -= op;
+                Tracevv((stderr, "inflate:         distance %u\n", dist));
+                op = (unsigned)(out - beg);     /* max distance in output */
+                if (dist > op) {                /* see if copy from window */
+                    op = dist - op;             /* distance back in window */
+                    if (op > whave) {
+                        if (state->sane) {
+                            strm->msg =
+                                (char *)"invalid distance too far back";
+                            state->mode = BAD;
+                            break;
+                        }
+#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
+                        if (len <= op - whave) {
+                            do {
+                                *out++ = 0;
+                            } while (--len);
+                            continue;
+                        }
+                        len -= op - whave;
+                        do {
+                            *out++ = 0;
+                        } while (--op > whave);
+                        if (op == 0) {
+                            from = out - dist;
+                            do {
+                                *out++ = *from++;
+                            } while (--len);
+                            continue;
+                        }
+#endif
+                    }
+                    from = window;
+                    if (wnext == 0) {           /* very common case */
+                        from += wsize - op;
+                        if (op < len) {         /* some from window */
+                            len -= op;
+                            do {
+                                *out++ = *from++;
+                            } while (--op);
+                            from = out - dist;  /* rest from output */
+                        }
+                    }
+                    else if (wnext < op) {      /* wrap around window */
+                        from += wsize + wnext - op;
+                        op -= wnext;
+                        if (op < len) {         /* some from end of window */
+                            len -= op;
+                            do {
+                                *out++ = *from++;
+                            } while (--op);
+                            from = window;
+                            if (wnext < len) {  /* some from start of window */
+                                op = wnext;
+                                len -= op;
+                                do {
+                                    *out++ = *from++;
+                                } while (--op);
+                                from = out - dist;      /* rest from output */
+                            }
+                        }
+                    }
+                    else {                      /* contiguous in window */
+                        from += wnext - op;
+                        if (op < len) {         /* some from window */
+                            len -= op;
+                            do {
+                                *out++ = *from++;
+                            } while (--op);
+                            from = out - dist;  /* rest from output */
+                        }
+                    }
+                    while (len > 2) {
+                        *out++ = *from++;
+                        *out++ = *from++;
+                        *out++ = *from++;
+                        len -= 3;
+                    }
+                    if (len) {
+                        *out++ = *from++;
+                        if (len > 1)
+                            *out++ = *from++;
+                    }
+                }
+                else {
+                    from = out - dist;          /* copy direct from output */
+                    do {                        /* minimum length is three */
+                        *out++ = *from++;
+                        *out++ = *from++;
+                        *out++ = *from++;
+                        len -= 3;
+                    } while (len > 2);
+                    if (len) {
+                        *out++ = *from++;
+                        if (len > 1)
+                            *out++ = *from++;
+                    }
+                }
+            }
+            else if ((op & 64) == 0) {          /* 2nd level distance code */
+                here = dcode[here.val + (hold & ((1U << op) - 1))];
+                goto dodist;
+            }
+            else {
+                strm->msg = (char *)"invalid distance code";
+                state->mode = BAD;
+                break;
+            }
+        }
+        else if ((op & 64) == 0) {              /* 2nd level length code */
+            here = lcode[here.val + (hold & ((1U << op) - 1))];
+            goto dolen;
+        }
+        else if (op & 32) {                     /* end-of-block */
+            Tracevv((stderr, "inflate:         end of block\n"));
+            state->mode = TYPE;
+            break;
+        }
+        else {
+            strm->msg = (char *)"invalid literal/length code";
+            state->mode = BAD;
+            break;
+        }
+    } while (in < last && out < end);
+
+    /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
+    len = bits >> 3;
+    in -= len;
+    bits -= len << 3;
+    hold &= (1U << bits) - 1;
+
+    /* update state and return */
+    strm->next_in = in;
+    strm->next_out = out;
+    strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
+    strm->avail_out = (unsigned)(out < end ?
+                                 257 + (end - out) : 257 - (out - end));
+    state->hold = hold;
+    state->bits = bits;
+    return;
+}
+
+/*
+   inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
+   - Using bit fields for code structure
+   - Different op definition to avoid & for extra bits (do & for table bits)
+   - Three separate decoding do-loops for direct, window, and wnext == 0
+   - Special case for distance > 1 copies to do overlapped load and store copy
+   - Explicit branch predictions (based on measured branch probabilities)
+   - Deferring match copy and interspersed it with decoding subsequent codes
+   - Swapping literal/length else
+   - Swapping window/direct else
+   - Larger unrolled copy loops (three is about right)
+   - Moving len -= 3 statement into middle of loop
+ */
+
+#endif /* !ASMINF */
diff --git a/deps/libchdr/deps/zlib-1.2.11/inffast.h b/deps/libchdr/deps/zlib-1.2.11/inffast.h
new file mode 100644
index 00000000..e5c1aa4c
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/inffast.h
@@ -0,0 +1,11 @@
+/* inffast.h -- header to use inffast.c
+ * Copyright (C) 1995-2003, 2010 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+   part of the implementation of the compression library and is
+   subject to change. Applications should only use zlib.h.
+ */
+
+void ZLIB_INTERNAL inflate_fast OF((z_streamp strm, unsigned start));
diff --git a/deps/libchdr/deps/zlib-1.2.11/inffixed.h b/deps/libchdr/deps/zlib-1.2.11/inffixed.h
new file mode 100644
index 00000000..d6283277
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/inffixed.h
@@ -0,0 +1,94 @@
+    /* inffixed.h -- table for decoding fixed codes
+     * Generated automatically by makefixed().
+     */
+
+    /* WARNING: this file should *not* be used by applications.
+       It is part of the implementation of this library and is
+       subject to change. Applications should only use zlib.h.
+     */
+
+    static const code lenfix[512] = {
+        {96,7,0},{0,8,80},{0,8,16},{20,8,115},{18,7,31},{0,8,112},{0,8,48},
+        {0,9,192},{16,7,10},{0,8,96},{0,8,32},{0,9,160},{0,8,0},{0,8,128},
+        {0,8,64},{0,9,224},{16,7,6},{0,8,88},{0,8,24},{0,9,144},{19,7,59},
+        {0,8,120},{0,8,56},{0,9,208},{17,7,17},{0,8,104},{0,8,40},{0,9,176},
+        {0,8,8},{0,8,136},{0,8,72},{0,9,240},{16,7,4},{0,8,84},{0,8,20},
+        {21,8,227},{19,7,43},{0,8,116},{0,8,52},{0,9,200},{17,7,13},{0,8,100},
+        {0,8,36},{0,9,168},{0,8,4},{0,8,132},{0,8,68},{0,9,232},{16,7,8},
+        {0,8,92},{0,8,28},{0,9,152},{20,7,83},{0,8,124},{0,8,60},{0,9,216},
+        {18,7,23},{0,8,108},{0,8,44},{0,9,184},{0,8,12},{0,8,140},{0,8,76},
+        {0,9,248},{16,7,3},{0,8,82},{0,8,18},{21,8,163},{19,7,35},{0,8,114},
+        {0,8,50},{0,9,196},{17,7,11},{0,8,98},{0,8,34},{0,9,164},{0,8,2},
+        {0,8,130},{0,8,66},{0,9,228},{16,7,7},{0,8,90},{0,8,26},{0,9,148},
+        {20,7,67},{0,8,122},{0,8,58},{0,9,212},{18,7,19},{0,8,106},{0,8,42},
+        {0,9,180},{0,8,10},{0,8,138},{0,8,74},{0,9,244},{16,7,5},{0,8,86},
+        {0,8,22},{64,8,0},{19,7,51},{0,8,118},{0,8,54},{0,9,204},{17,7,15},
+        {0,8,102},{0,8,38},{0,9,172},{0,8,6},{0,8,134},{0,8,70},{0,9,236},
+        {16,7,9},{0,8,94},{0,8,30},{0,9,156},{20,7,99},{0,8,126},{0,8,62},
+        {0,9,220},{18,7,27},{0,8,110},{0,8,46},{0,9,188},{0,8,14},{0,8,142},
+        {0,8,78},{0,9,252},{96,7,0},{0,8,81},{0,8,17},{21,8,131},{18,7,31},
+        {0,8,113},{0,8,49},{0,9,194},{16,7,10},{0,8,97},{0,8,33},{0,9,162},
+        {0,8,1},{0,8,129},{0,8,65},{0,9,226},{16,7,6},{0,8,89},{0,8,25},
+        {0,9,146},{19,7,59},{0,8,121},{0,8,57},{0,9,210},{17,7,17},{0,8,105},
+        {0,8,41},{0,9,178},{0,8,9},{0,8,137},{0,8,73},{0,9,242},{16,7,4},
+        {0,8,85},{0,8,21},{16,8,258},{19,7,43},{0,8,117},{0,8,53},{0,9,202},
+        {17,7,13},{0,8,101},{0,8,37},{0,9,170},{0,8,5},{0,8,133},{0,8,69},
+        {0,9,234},{16,7,8},{0,8,93},{0,8,29},{0,9,154},{20,7,83},{0,8,125},
+        {0,8,61},{0,9,218},{18,7,23},{0,8,109},{0,8,45},{0,9,186},{0,8,13},
+        {0,8,141},{0,8,77},{0,9,250},{16,7,3},{0,8,83},{0,8,19},{21,8,195},
+        {19,7,35},{0,8,115},{0,8,51},{0,9,198},{17,7,11},{0,8,99},{0,8,35},
+        {0,9,166},{0,8,3},{0,8,131},{0,8,67},{0,9,230},{16,7,7},{0,8,91},
+        {0,8,27},{0,9,150},{20,7,67},{0,8,123},{0,8,59},{0,9,214},{18,7,19},
+        {0,8,107},{0,8,43},{0,9,182},{0,8,11},{0,8,139},{0,8,75},{0,9,246},
+        {16,7,5},{0,8,87},{0,8,23},{64,8,0},{19,7,51},{0,8,119},{0,8,55},
+        {0,9,206},{17,7,15},{0,8,103},{0,8,39},{0,9,174},{0,8,7},{0,8,135},
+        {0,8,71},{0,9,238},{16,7,9},{0,8,95},{0,8,31},{0,9,158},{20,7,99},
+        {0,8,127},{0,8,63},{0,9,222},{18,7,27},{0,8,111},{0,8,47},{0,9,190},
+        {0,8,15},{0,8,143},{0,8,79},{0,9,254},{96,7,0},{0,8,80},{0,8,16},
+        {20,8,115},{18,7,31},{0,8,112},{0,8,48},{0,9,193},{16,7,10},{0,8,96},
+        {0,8,32},{0,9,161},{0,8,0},{0,8,128},{0,8,64},{0,9,225},{16,7,6},
+        {0,8,88},{0,8,24},{0,9,145},{19,7,59},{0,8,120},{0,8,56},{0,9,209},
+        {17,7,17},{0,8,104},{0,8,40},{0,9,177},{0,8,8},{0,8,136},{0,8,72},
+        {0,9,241},{16,7,4},{0,8,84},{0,8,20},{21,8,227},{19,7,43},{0,8,116},
+        {0,8,52},{0,9,201},{17,7,13},{0,8,100},{0,8,36},{0,9,169},{0,8,4},
+        {0,8,132},{0,8,68},{0,9,233},{16,7,8},{0,8,92},{0,8,28},{0,9,153},
+        {20,7,83},{0,8,124},{0,8,60},{0,9,217},{18,7,23},{0,8,108},{0,8,44},
+        {0,9,185},{0,8,12},{0,8,140},{0,8,76},{0,9,249},{16,7,3},{0,8,82},
+        {0,8,18},{21,8,163},{19,7,35},{0,8,114},{0,8,50},{0,9,197},{17,7,11},
+        {0,8,98},{0,8,34},{0,9,165},{0,8,2},{0,8,130},{0,8,66},{0,9,229},
+        {16,7,7},{0,8,90},{0,8,26},{0,9,149},{20,7,67},{0,8,122},{0,8,58},
+        {0,9,213},{18,7,19},{0,8,106},{0,8,42},{0,9,181},{0,8,10},{0,8,138},
+        {0,8,74},{0,9,245},{16,7,5},{0,8,86},{0,8,22},{64,8,0},{19,7,51},
+        {0,8,118},{0,8,54},{0,9,205},{17,7,15},{0,8,102},{0,8,38},{0,9,173},
+        {0,8,6},{0,8,134},{0,8,70},{0,9,237},{16,7,9},{0,8,94},{0,8,30},
+        {0,9,157},{20,7,99},{0,8,126},{0,8,62},{0,9,221},{18,7,27},{0,8,110},
+        {0,8,46},{0,9,189},{0,8,14},{0,8,142},{0,8,78},{0,9,253},{96,7,0},
+        {0,8,81},{0,8,17},{21,8,131},{18,7,31},{0,8,113},{0,8,49},{0,9,195},
+        {16,7,10},{0,8,97},{0,8,33},{0,9,163},{0,8,1},{0,8,129},{0,8,65},
+        {0,9,227},{16,7,6},{0,8,89},{0,8,25},{0,9,147},{19,7,59},{0,8,121},
+        {0,8,57},{0,9,211},{17,7,17},{0,8,105},{0,8,41},{0,9,179},{0,8,9},
+        {0,8,137},{0,8,73},{0,9,243},{16,7,4},{0,8,85},{0,8,21},{16,8,258},
+        {19,7,43},{0,8,117},{0,8,53},{0,9,203},{17,7,13},{0,8,101},{0,8,37},
+        {0,9,171},{0,8,5},{0,8,133},{0,8,69},{0,9,235},{16,7,8},{0,8,93},
+        {0,8,29},{0,9,155},{20,7,83},{0,8,125},{0,8,61},{0,9,219},{18,7,23},
+        {0,8,109},{0,8,45},{0,9,187},{0,8,13},{0,8,141},{0,8,77},{0,9,251},
+        {16,7,3},{0,8,83},{0,8,19},{21,8,195},{19,7,35},{0,8,115},{0,8,51},
+        {0,9,199},{17,7,11},{0,8,99},{0,8,35},{0,9,167},{0,8,3},{0,8,131},
+        {0,8,67},{0,9,231},{16,7,7},{0,8,91},{0,8,27},{0,9,151},{20,7,67},
+        {0,8,123},{0,8,59},{0,9,215},{18,7,19},{0,8,107},{0,8,43},{0,9,183},
+        {0,8,11},{0,8,139},{0,8,75},{0,9,247},{16,7,5},{0,8,87},{0,8,23},
+        {64,8,0},{19,7,51},{0,8,119},{0,8,55},{0,9,207},{17,7,15},{0,8,103},
+        {0,8,39},{0,9,175},{0,8,7},{0,8,135},{0,8,71},{0,9,239},{16,7,9},
+        {0,8,95},{0,8,31},{0,9,159},{20,7,99},{0,8,127},{0,8,63},{0,9,223},
+        {18,7,27},{0,8,111},{0,8,47},{0,9,191},{0,8,15},{0,8,143},{0,8,79},
+        {0,9,255}
+    };
+
+    static const code distfix[32] = {
+        {16,5,1},{23,5,257},{19,5,17},{27,5,4097},{17,5,5},{25,5,1025},
+        {21,5,65},{29,5,16385},{16,5,3},{24,5,513},{20,5,33},{28,5,8193},
+        {18,5,9},{26,5,2049},{22,5,129},{64,5,0},{16,5,2},{23,5,385},
+        {19,5,25},{27,5,6145},{17,5,7},{25,5,1537},{21,5,97},{29,5,24577},
+        {16,5,4},{24,5,769},{20,5,49},{28,5,12289},{18,5,13},{26,5,3073},
+        {22,5,193},{64,5,0}
+    };
diff --git a/deps/libchdr/deps/zlib-1.2.11/inflate.c b/deps/libchdr/deps/zlib-1.2.11/inflate.c
new file mode 100644
index 00000000..ac333e8c
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/inflate.c
@@ -0,0 +1,1561 @@
+/* inflate.c -- zlib decompression
+ * Copyright (C) 1995-2016 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/*
+ * Change history:
+ *
+ * 1.2.beta0    24 Nov 2002
+ * - First version -- complete rewrite of inflate to simplify code, avoid
+ *   creation of window when not needed, minimize use of window when it is
+ *   needed, make inffast.c even faster, implement gzip decoding, and to
+ *   improve code readability and style over the previous zlib inflate code
+ *
+ * 1.2.beta1    25 Nov 2002
+ * - Use pointers for available input and output checking in inffast.c
+ * - Remove input and output counters in inffast.c
+ * - Change inffast.c entry and loop from avail_in >= 7 to >= 6
+ * - Remove unnecessary second byte pull from length extra in inffast.c
+ * - Unroll direct copy to three copies per loop in inffast.c
+ *
+ * 1.2.beta2    4 Dec 2002
+ * - Change external routine names to reduce potential conflicts
+ * - Correct filename to inffixed.h for fixed tables in inflate.c
+ * - Make hbuf[] unsigned char to match parameter type in inflate.c
+ * - Change strm->next_out[-state->offset] to *(strm->next_out - state->offset)
+ *   to avoid negation problem on Alphas (64 bit) in inflate.c
+ *
+ * 1.2.beta3    22 Dec 2002
+ * - Add comments on state->bits assertion in inffast.c
+ * - Add comments on op field in inftrees.h
+ * - Fix bug in reuse of allocated window after inflateReset()
+ * - Remove bit fields--back to byte structure for speed
+ * - Remove distance extra == 0 check in inflate_fast()--only helps for lengths
+ * - Change post-increments to pre-increments in inflate_fast(), PPC biased?
+ * - Add compile time option, POSTINC, to use post-increments instead (Intel?)
+ * - Make MATCH copy in inflate() much faster for when inflate_fast() not used
+ * - Use local copies of stream next and avail values, as well as local bit
+ *   buffer and bit count in inflate()--for speed when inflate_fast() not used
+ *
+ * 1.2.beta4    1 Jan 2003
+ * - Split ptr - 257 statements in inflate_table() to avoid compiler warnings
+ * - Move a comment on output buffer sizes from inffast.c to inflate.c
+ * - Add comments in inffast.c to introduce the inflate_fast() routine
+ * - Rearrange window copies in inflate_fast() for speed and simplification
+ * - Unroll last copy for window match in inflate_fast()
+ * - Use local copies of window variables in inflate_fast() for speed
+ * - Pull out common wnext == 0 case for speed in inflate_fast()
+ * - Make op and len in inflate_fast() unsigned for consistency
+ * - Add FAR to lcode and dcode declarations in inflate_fast()
+ * - Simplified bad distance check in inflate_fast()
+ * - Added inflateBackInit(), inflateBack(), and inflateBackEnd() in new
+ *   source file infback.c to provide a call-back interface to inflate for
+ *   programs like gzip and unzip -- uses window as output buffer to avoid
+ *   window copying
+ *
+ * 1.2.beta5    1 Jan 2003
+ * - Improved inflateBack() interface to allow the caller to provide initial
+ *   input in strm.
+ * - Fixed stored blocks bug in inflateBack()
+ *
+ * 1.2.beta6    4 Jan 2003
+ * - Added comments in inffast.c on effectiveness of POSTINC
+ * - Typecasting all around to reduce compiler warnings
+ * - Changed loops from while (1) or do {} while (1) to for (;;), again to
+ *   make compilers happy
+ * - Changed type of window in inflateBackInit() to unsigned char *
+ *
+ * 1.2.beta7    27 Jan 2003
+ * - Changed many types to unsigned or unsigned short to avoid warnings
+ * - Added inflateCopy() function
+ *
+ * 1.2.0        9 Mar 2003
+ * - Changed inflateBack() interface to provide separate opaque descriptors
+ *   for the in() and out() functions
+ * - Changed inflateBack() argument and in_func typedef to swap the length
+ *   and buffer address return values for the input function
+ * - Check next_in and next_out for Z_NULL on entry to inflate()
+ *
+ * The history for versions after 1.2.0 are in ChangeLog in zlib distribution.
+ */
+
+#include "zutil.h"
+#include "inftrees.h"
+#include "inflate.h"
+#include "inffast.h"
+
+#ifdef MAKEFIXED
+#  ifndef BUILDFIXED
+#    define BUILDFIXED
+#  endif
+#endif
+
+/* function prototypes */
+local int inflateStateCheck OF((z_streamp strm));
+local void fixedtables OF((struct inflate_state FAR *state));
+local int updatewindow OF((z_streamp strm, const unsigned char FAR *end,
+                           unsigned copy));
+#ifdef BUILDFIXED
+   void makefixed OF((void));
+#endif
+local unsigned syncsearch OF((unsigned FAR *have, const unsigned char FAR *buf,
+                              unsigned len));
+
+local int inflateStateCheck(strm)
+z_streamp strm;
+{
+    struct inflate_state FAR *state;
+    if (strm == Z_NULL ||
+        strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0)
+        return 1;
+    state = (struct inflate_state FAR *)strm->state;
+    if (state == Z_NULL || state->strm != strm ||
+        state->mode < HEAD || state->mode > SYNC)
+        return 1;
+    return 0;
+}
+
+int ZEXPORT inflateResetKeep(strm)
+z_streamp strm;
+{
+    struct inflate_state FAR *state;
+
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    strm->total_in = strm->total_out = state->total = 0;
+    strm->msg = Z_NULL;
+    if (state->wrap)        /* to support ill-conceived Java test suite */
+        strm->adler = state->wrap & 1;
+    state->mode = HEAD;
+    state->last = 0;
+    state->havedict = 0;
+    state->dmax = 32768U;
+    state->head = Z_NULL;
+    state->hold = 0;
+    state->bits = 0;
+    state->lencode = state->distcode = state->next = state->codes;
+    state->sane = 1;
+    state->back = -1;
+    Tracev((stderr, "inflate: reset\n"));
+    return Z_OK;
+}
+
+int ZEXPORT inflateReset(strm)
+z_streamp strm;
+{
+    struct inflate_state FAR *state;
+
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    state->wsize = 0;
+    state->whave = 0;
+    state->wnext = 0;
+    return inflateResetKeep(strm);
+}
+
+int ZEXPORT inflateReset2(strm, windowBits)
+z_streamp strm;
+int windowBits;
+{
+    int wrap;
+    struct inflate_state FAR *state;
+
+    /* get the state */
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+
+    /* extract wrap request from windowBits parameter */
+    if (windowBits < 0) {
+        wrap = 0;
+        windowBits = -windowBits;
+    }
+    else {
+        wrap = (windowBits >> 4) + 5;
+#ifdef GUNZIP
+        if (windowBits < 48)
+            windowBits &= 15;
+#endif
+    }
+
+    /* set number of window bits, free window if different */
+    if (windowBits && (windowBits < 8 || windowBits > 15))
+        return Z_STREAM_ERROR;
+    if (state->window != Z_NULL && state->wbits != (unsigned)windowBits) {
+        ZFREE(strm, state->window);
+        state->window = Z_NULL;
+    }
+
+    /* update state and reset the rest of it */
+    state->wrap = wrap;
+    state->wbits = (unsigned)windowBits;
+    return inflateReset(strm);
+}
+
+int ZEXPORT inflateInit2_(strm, windowBits, version, stream_size)
+z_streamp strm;
+int windowBits;
+const char *version;
+int stream_size;
+{
+    int ret;
+    struct inflate_state FAR *state;
+
+    if (version == Z_NULL || version[0] != ZLIB_VERSION[0] ||
+        stream_size != (int)(sizeof(z_stream)))
+        return Z_VERSION_ERROR;
+    if (strm == Z_NULL) return Z_STREAM_ERROR;
+    strm->msg = Z_NULL;                 /* in case we return an error */
+    if (strm->zalloc == (alloc_func)0) {
+#ifdef Z_SOLO
+        return Z_STREAM_ERROR;
+#else
+        strm->zalloc = zcalloc;
+        strm->opaque = (voidpf)0;
+#endif
+    }
+    if (strm->zfree == (free_func)0)
+#ifdef Z_SOLO
+        return Z_STREAM_ERROR;
+#else
+        strm->zfree = zcfree;
+#endif
+    state = (struct inflate_state FAR *)
+            ZALLOC(strm, 1, sizeof(struct inflate_state));
+    if (state == Z_NULL) return Z_MEM_ERROR;
+    Tracev((stderr, "inflate: allocated\n"));
+    strm->state = (struct internal_state FAR *)state;
+    state->strm = strm;
+    state->window = Z_NULL;
+    state->mode = HEAD;     /* to pass state test in inflateReset2() */
+    ret = inflateReset2(strm, windowBits);
+    if (ret != Z_OK) {
+        ZFREE(strm, state);
+        strm->state = Z_NULL;
+    }
+    return ret;
+}
+
+int ZEXPORT inflateInit_(strm, version, stream_size)
+z_streamp strm;
+const char *version;
+int stream_size;
+{
+    return inflateInit2_(strm, DEF_WBITS, version, stream_size);
+}
+
+int ZEXPORT inflatePrime(strm, bits, value)
+z_streamp strm;
+int bits;
+int value;
+{
+    struct inflate_state FAR *state;
+
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    if (bits < 0) {
+        state->hold = 0;
+        state->bits = 0;
+        return Z_OK;
+    }
+    if (bits > 16 || state->bits + (uInt)bits > 32) return Z_STREAM_ERROR;
+    value &= (1L << bits) - 1;
+    state->hold += (unsigned)value << state->bits;
+    state->bits += (uInt)bits;
+    return Z_OK;
+}
+
+/*
+   Return state with length and distance decoding tables and index sizes set to
+   fixed code decoding.  Normally this returns fixed tables from inffixed.h.
+   If BUILDFIXED is defined, then instead this routine builds the tables the
+   first time it's called, and returns those tables the first time and
+   thereafter.  This reduces the size of the code by about 2K bytes, in
+   exchange for a little execution time.  However, BUILDFIXED should not be
+   used for threaded applications, since the rewriting of the tables and virgin
+   may not be thread-safe.
+ */
+local void fixedtables(state)
+struct inflate_state FAR *state;
+{
+#ifdef BUILDFIXED
+    static int virgin = 1;
+    static code *lenfix, *distfix;
+    static code fixed[544];
+
+    /* build fixed huffman tables if first call (may not be thread safe) */
+    if (virgin) {
+        unsigned sym, bits;
+        static code *next;
+
+        /* literal/length table */
+        sym = 0;
+        while (sym < 144) state->lens[sym++] = 8;
+        while (sym < 256) state->lens[sym++] = 9;
+        while (sym < 280) state->lens[sym++] = 7;
+        while (sym < 288) state->lens[sym++] = 8;
+        next = fixed;
+        lenfix = next;
+        bits = 9;
+        inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work);
+
+        /* distance table */
+        sym = 0;
+        while (sym < 32) state->lens[sym++] = 5;
+        distfix = next;
+        bits = 5;
+        inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work);
+
+        /* do this just once */
+        virgin = 0;
+    }
+#else /* !BUILDFIXED */
+#   include "inffixed.h"
+#endif /* BUILDFIXED */
+    state->lencode = lenfix;
+    state->lenbits = 9;
+    state->distcode = distfix;
+    state->distbits = 5;
+}
+
+#ifdef MAKEFIXED
+#include <stdio.h>
+
+/*
+   Write out the inffixed.h that is #include'd above.  Defining MAKEFIXED also
+   defines BUILDFIXED, so the tables are built on the fly.  makefixed() writes
+   those tables to stdout, which would be piped to inffixed.h.  A small program
+   can simply call makefixed to do this:
+
+    void makefixed(void);
+
+    int main(void)
+    {
+        makefixed();
+        return 0;
+    }
+
+   Then that can be linked with zlib built with MAKEFIXED defined and run:
+
+    a.out > inffixed.h
+ */
+void makefixed()
+{
+    unsigned low, size;
+    struct inflate_state state;
+
+    fixedtables(&state);
+    puts("    /* inffixed.h -- table for decoding fixed codes");
+    puts("     * Generated automatically by makefixed().");
+    puts("     */");
+    puts("");
+    puts("    /* WARNING: this file should *not* be used by applications.");
+    puts("       It is part of the implementation of this library and is");
+    puts("       subject to change. Applications should only use zlib.h.");
+    puts("     */");
+    puts("");
+    size = 1U << 9;
+    printf("    static const code lenfix[%u] = {", size);
+    low = 0;
+    for (;;) {
+        if ((low % 7) == 0) printf("\n        ");
+        printf("{%u,%u,%d}", (low & 127) == 99 ? 64 : state.lencode[low].op,
+               state.lencode[low].bits, state.lencode[low].val);
+        if (++low == size) break;
+        putchar(',');
+    }
+    puts("\n    };");
+    size = 1U << 5;
+    printf("\n    static const code distfix[%u] = {", size);
+    low = 0;
+    for (;;) {
+        if ((low % 6) == 0) printf("\n        ");
+        printf("{%u,%u,%d}", state.distcode[low].op, state.distcode[low].bits,
+               state.distcode[low].val);
+        if (++low == size) break;
+        putchar(',');
+    }
+    puts("\n    };");
+}
+#endif /* MAKEFIXED */
+
+/*
+   Update the window with the last wsize (normally 32K) bytes written before
+   returning.  If window does not exist yet, create it.  This is only called
+   when a window is already in use, or when output has been written during this
+   inflate call, but the end of the deflate stream has not been reached yet.
+   It is also called to create a window for dictionary data when a dictionary
+   is loaded.
+
+   Providing output buffers larger than 32K to inflate() should provide a speed
+   advantage, since only the last 32K of output is copied to the sliding window
+   upon return from inflate(), and since all distances after the first 32K of
+   output will fall in the output data, making match copies simpler and faster.
+   The advantage may be dependent on the size of the processor's data caches.
+ */
+local int updatewindow(strm, end, copy)
+z_streamp strm;
+const Bytef *end;
+unsigned copy;
+{
+    struct inflate_state FAR *state;
+    unsigned dist;
+
+    state = (struct inflate_state FAR *)strm->state;
+
+    /* if it hasn't been done already, allocate space for the window */
+    if (state->window == Z_NULL) {
+        state->window = (unsigned char FAR *)
+                        ZALLOC(strm, 1U << state->wbits,
+                               sizeof(unsigned char));
+        if (state->window == Z_NULL) return 1;
+    }
+
+    /* if window not in use yet, initialize */
+    if (state->wsize == 0) {
+        state->wsize = 1U << state->wbits;
+        state->wnext = 0;
+        state->whave = 0;
+    }
+
+    /* copy state->wsize or less output bytes into the circular window */
+    if (copy >= state->wsize) {
+        zmemcpy(state->window, end - state->wsize, state->wsize);
+        state->wnext = 0;
+        state->whave = state->wsize;
+    }
+    else {
+        dist = state->wsize - state->wnext;
+        if (dist > copy) dist = copy;
+        zmemcpy(state->window + state->wnext, end - copy, dist);
+        copy -= dist;
+        if (copy) {
+            zmemcpy(state->window, end - copy, copy);
+            state->wnext = copy;
+            state->whave = state->wsize;
+        }
+        else {
+            state->wnext += dist;
+            if (state->wnext == state->wsize) state->wnext = 0;
+            if (state->whave < state->wsize) state->whave += dist;
+        }
+    }
+    return 0;
+}
+
+/* Macros for inflate(): */
+
+/* check function to use adler32() for zlib or crc32() for gzip */
+#ifdef GUNZIP
+#  define UPDATE(check, buf, len) \
+    (state->flags ? crc32(check, buf, len) : adler32(check, buf, len))
+#else
+#  define UPDATE(check, buf, len) adler32(check, buf, len)
+#endif
+
+/* check macros for header crc */
+#ifdef GUNZIP
+#  define CRC2(check, word) \
+    do { \
+        hbuf[0] = (unsigned char)(word); \
+        hbuf[1] = (unsigned char)((word) >> 8); \
+        check = crc32(check, hbuf, 2); \
+    } while (0)
+
+#  define CRC4(check, word) \
+    do { \
+        hbuf[0] = (unsigned char)(word); \
+        hbuf[1] = (unsigned char)((word) >> 8); \
+        hbuf[2] = (unsigned char)((word) >> 16); \
+        hbuf[3] = (unsigned char)((word) >> 24); \
+        check = crc32(check, hbuf, 4); \
+    } while (0)
+#endif
+
+/* Load registers with state in inflate() for speed */
+#define LOAD() \
+    do { \
+        put = strm->next_out; \
+        left = strm->avail_out; \
+        next = strm->next_in; \
+        have = strm->avail_in; \
+        hold = state->hold; \
+        bits = state->bits; \
+    } while (0)
+
+/* Restore state from registers in inflate() */
+#define RESTORE() \
+    do { \
+        strm->next_out = put; \
+        strm->avail_out = left; \
+        strm->next_in = next; \
+        strm->avail_in = have; \
+        state->hold = hold; \
+        state->bits = bits; \
+    } while (0)
+
+/* Clear the input bit accumulator */
+#define INITBITS() \
+    do { \
+        hold = 0; \
+        bits = 0; \
+    } while (0)
+
+/* Get a byte of input into the bit accumulator, or return from inflate()
+   if there is no input available. */
+#define PULLBYTE() \
+    do { \
+        if (have == 0) goto inf_leave; \
+        have--; \
+        hold += (unsigned long)(*next++) << bits; \
+        bits += 8; \
+    } while (0)
+
+/* Assure that there are at least n bits in the bit accumulator.  If there is
+   not enough available input to do that, then return from inflate(). */
+#define NEEDBITS(n) \
+    do { \
+        while (bits < (unsigned)(n)) \
+            PULLBYTE(); \
+    } while (0)
+
+/* Return the low n bits of the bit accumulator (n < 16) */
+#define BITS(n) \
+    ((unsigned)hold & ((1U << (n)) - 1))
+
+/* Remove n bits from the bit accumulator */
+#define DROPBITS(n) \
+    do { \
+        hold >>= (n); \
+        bits -= (unsigned)(n); \
+    } while (0)
+
+/* Remove zero to seven bits as needed to go to a byte boundary */
+#define BYTEBITS() \
+    do { \
+        hold >>= bits & 7; \
+        bits -= bits & 7; \
+    } while (0)
+
+/*
+   inflate() uses a state machine to process as much input data and generate as
+   much output data as possible before returning.  The state machine is
+   structured roughly as follows:
+
+    for (;;) switch (state) {
+    ...
+    case STATEn:
+        if (not enough input data or output space to make progress)
+            return;
+        ... make progress ...
+        state = STATEm;
+        break;
+    ...
+    }
+
+   so when inflate() is called again, the same case is attempted again, and
+   if the appropriate resources are provided, the machine proceeds to the
+   next state.  The NEEDBITS() macro is usually the way the state evaluates
+   whether it can proceed or should return.  NEEDBITS() does the return if
+   the requested bits are not available.  The typical use of the BITS macros
+   is:
+
+        NEEDBITS(n);
+        ... do something with BITS(n) ...
+        DROPBITS(n);
+
+   where NEEDBITS(n) either returns from inflate() if there isn't enough
+   input left to load n bits into the accumulator, or it continues.  BITS(n)
+   gives the low n bits in the accumulator.  When done, DROPBITS(n) drops
+   the low n bits off the accumulator.  INITBITS() clears the accumulator
+   and sets the number of available bits to zero.  BYTEBITS() discards just
+   enough bits to put the accumulator on a byte boundary.  After BYTEBITS()
+   and a NEEDBITS(8), then BITS(8) would return the next byte in the stream.
+
+   NEEDBITS(n) uses PULLBYTE() to get an available byte of input, or to return
+   if there is no input available.  The decoding of variable length codes uses
+   PULLBYTE() directly in order to pull just enough bytes to decode the next
+   code, and no more.
+
+   Some states loop until they get enough input, making sure that enough
+   state information is maintained to continue the loop where it left off
+   if NEEDBITS() returns in the loop.  For example, want, need, and keep
+   would all have to actually be part of the saved state in case NEEDBITS()
+   returns:
+
+    case STATEw:
+        while (want < need) {
+            NEEDBITS(n);
+            keep[want++] = BITS(n);
+            DROPBITS(n);
+        }
+        state = STATEx;
+    case STATEx:
+
+   As shown above, if the next state is also the next case, then the break
+   is omitted.
+
+   A state may also return if there is not enough output space available to
+   complete that state.  Those states are copying stored data, writing a
+   literal byte, and copying a matching string.
+
+   When returning, a "goto inf_leave" is used to update the total counters,
+   update the check value, and determine whether any progress has been made
+   during that inflate() call in order to return the proper return code.
+   Progress is defined as a change in either strm->avail_in or strm->avail_out.
+   When there is a window, goto inf_leave will update the window with the last
+   output written.  If a goto inf_leave occurs in the middle of decompression
+   and there is no window currently, goto inf_leave will create one and copy
+   output to the window for the next call of inflate().
+
+   In this implementation, the flush parameter of inflate() only affects the
+   return code (per zlib.h).  inflate() always writes as much as possible to
+   strm->next_out, given the space available and the provided input--the effect
+   documented in zlib.h of Z_SYNC_FLUSH.  Furthermore, inflate() always defers
+   the allocation of and copying into a sliding window until necessary, which
+   provides the effect documented in zlib.h for Z_FINISH when the entire input
+   stream available.  So the only thing the flush parameter actually does is:
+   when flush is set to Z_FINISH, inflate() cannot return Z_OK.  Instead it
+   will return Z_BUF_ERROR if it has not reached the end of the stream.
+ */
+
+int ZEXPORT inflate(strm, flush)
+z_streamp strm;
+int flush;
+{
+    struct inflate_state FAR *state;
+    z_const unsigned char FAR *next;    /* next input */
+    unsigned char FAR *put;     /* next output */
+    unsigned have, left;        /* available input and output */
+    unsigned long hold;         /* bit buffer */
+    unsigned bits;              /* bits in bit buffer */
+    unsigned in, out;           /* save starting available input and output */
+    unsigned copy;              /* number of stored or match bytes to copy */
+    unsigned char FAR *from;    /* where to copy match bytes from */
+    code here;                  /* current decoding table entry */
+    code last;                  /* parent table entry */
+    unsigned len;               /* length to copy for repeats, bits to drop */
+    int ret;                    /* return code */
+#ifdef GUNZIP
+    unsigned char hbuf[4];      /* buffer for gzip header crc calculation */
+#endif
+    static const unsigned short order[19] = /* permutation of code lengths */
+        {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
+
+    if (inflateStateCheck(strm) || strm->next_out == Z_NULL ||
+        (strm->next_in == Z_NULL && strm->avail_in != 0))
+        return Z_STREAM_ERROR;
+
+    state = (struct inflate_state FAR *)strm->state;
+    if (state->mode == TYPE) state->mode = TYPEDO;      /* skip check */
+    LOAD();
+    in = have;
+    out = left;
+    ret = Z_OK;
+    for (;;)
+        switch (state->mode) {
+        case HEAD:
+            if (state->wrap == 0) {
+                state->mode = TYPEDO;
+                break;
+            }
+            NEEDBITS(16);
+#ifdef GUNZIP
+            if ((state->wrap & 2) && hold == 0x8b1f) {  /* gzip header */
+                if (state->wbits == 0)
+                    state->wbits = 15;
+                state->check = crc32(0L, Z_NULL, 0);
+                CRC2(state->check, hold);
+                INITBITS();
+                state->mode = FLAGS;
+                break;
+            }
+            state->flags = 0;           /* expect zlib header */
+            if (state->head != Z_NULL)
+                state->head->done = -1;
+            if (!(state->wrap & 1) ||   /* check if zlib header allowed */
+#else
+            if (
+#endif
+                ((BITS(8) << 8) + (hold >> 8)) % 31) {
+                strm->msg = (char *)"incorrect header check";
+                state->mode = BAD;
+                break;
+            }
+            if (BITS(4) != Z_DEFLATED) {
+                strm->msg = (char *)"unknown compression method";
+                state->mode = BAD;
+                break;
+            }
+            DROPBITS(4);
+            len = BITS(4) + 8;
+            if (state->wbits == 0)
+                state->wbits = len;
+            if (len > 15 || len > state->wbits) {
+                strm->msg = (char *)"invalid window size";
+                state->mode = BAD;
+                break;
+            }
+            state->dmax = 1U << len;
+            Tracev((stderr, "inflate:   zlib header ok\n"));
+            strm->adler = state->check = adler32(0L, Z_NULL, 0);
+            state->mode = hold & 0x200 ? DICTID : TYPE;
+            INITBITS();
+            break;
+#ifdef GUNZIP
+        case FLAGS:
+            NEEDBITS(16);
+            state->flags = (int)(hold);
+            if ((state->flags & 0xff) != Z_DEFLATED) {
+                strm->msg = (char *)"unknown compression method";
+                state->mode = BAD;
+                break;
+            }
+            if (state->flags & 0xe000) {
+                strm->msg = (char *)"unknown header flags set";
+                state->mode = BAD;
+                break;
+            }
+            if (state->head != Z_NULL)
+                state->head->text = (int)((hold >> 8) & 1);
+            if ((state->flags & 0x0200) && (state->wrap & 4))
+                CRC2(state->check, hold);
+            INITBITS();
+            state->mode = TIME;
+        case TIME:
+            NEEDBITS(32);
+            if (state->head != Z_NULL)
+                state->head->time = hold;
+            if ((state->flags & 0x0200) && (state->wrap & 4))
+                CRC4(state->check, hold);
+            INITBITS();
+            state->mode = OS;
+        case OS:
+            NEEDBITS(16);
+            if (state->head != Z_NULL) {
+                state->head->xflags = (int)(hold & 0xff);
+                state->head->os = (int)(hold >> 8);
+            }
+            if ((state->flags & 0x0200) && (state->wrap & 4))
+                CRC2(state->check, hold);
+            INITBITS();
+            state->mode = EXLEN;
+        case EXLEN:
+            if (state->flags & 0x0400) {
+                NEEDBITS(16);
+                state->length = (unsigned)(hold);
+                if (state->head != Z_NULL)
+                    state->head->extra_len = (unsigned)hold;
+                if ((state->flags & 0x0200) && (state->wrap & 4))
+                    CRC2(state->check, hold);
+                INITBITS();
+            }
+            else if (state->head != Z_NULL)
+                state->head->extra = Z_NULL;
+            state->mode = EXTRA;
+        case EXTRA:
+            if (state->flags & 0x0400) {
+                copy = state->length;
+                if (copy > have) copy = have;
+                if (copy) {
+                    if (state->head != Z_NULL &&
+                        state->head->extra != Z_NULL) {
+                        len = state->head->extra_len - state->length;
+                        zmemcpy(state->head->extra + len, next,
+                                len + copy > state->head->extra_max ?
+                                state->head->extra_max - len : copy);
+                    }
+                    if ((state->flags & 0x0200) && (state->wrap & 4))
+                        state->check = crc32(state->check, next, copy);
+                    have -= copy;
+                    next += copy;
+                    state->length -= copy;
+                }
+                if (state->length) goto inf_leave;
+            }
+            state->length = 0;
+            state->mode = NAME;
+        case NAME:
+            if (state->flags & 0x0800) {
+                if (have == 0) goto inf_leave;
+                copy = 0;
+                do {
+                    len = (unsigned)(next[copy++]);
+                    if (state->head != Z_NULL &&
+                            state->head->name != Z_NULL &&
+                            state->length < state->head->name_max)
+                        state->head->name[state->length++] = (Bytef)len;
+                } while (len && copy < have);
+                if ((state->flags & 0x0200) && (state->wrap & 4))
+                    state->check = crc32(state->check, next, copy);
+                have -= copy;
+                next += copy;
+                if (len) goto inf_leave;
+            }
+            else if (state->head != Z_NULL)
+                state->head->name = Z_NULL;
+            state->length = 0;
+            state->mode = COMMENT;
+        case COMMENT:
+            if (state->flags & 0x1000) {
+                if (have == 0) goto inf_leave;
+                copy = 0;
+                do {
+                    len = (unsigned)(next[copy++]);
+                    if (state->head != Z_NULL &&
+                            state->head->comment != Z_NULL &&
+                            state->length < state->head->comm_max)
+                        state->head->comment[state->length++] = (Bytef)len;
+                } while (len && copy < have);
+                if ((state->flags & 0x0200) && (state->wrap & 4))
+                    state->check = crc32(state->check, next, copy);
+                have -= copy;
+                next += copy;
+                if (len) goto inf_leave;
+            }
+            else if (state->head != Z_NULL)
+                state->head->comment = Z_NULL;
+            state->mode = HCRC;
+        case HCRC:
+            if (state->flags & 0x0200) {
+                NEEDBITS(16);
+                if ((state->wrap & 4) && hold != (state->check & 0xffff)) {
+                    strm->msg = (char *)"header crc mismatch";
+                    state->mode = BAD;
+                    break;
+                }
+                INITBITS();
+            }
+            if (state->head != Z_NULL) {
+                state->head->hcrc = (int)((state->flags >> 9) & 1);
+                state->head->done = 1;
+            }
+            strm->adler = state->check = crc32(0L, Z_NULL, 0);
+            state->mode = TYPE;
+            break;
+#endif
+        case DICTID:
+            NEEDBITS(32);
+            strm->adler = state->check = ZSWAP32(hold);
+            INITBITS();
+            state->mode = DICT;
+        case DICT:
+            if (state->havedict == 0) {
+                RESTORE();
+                return Z_NEED_DICT;
+            }
+            strm->adler = state->check = adler32(0L, Z_NULL, 0);
+            state->mode = TYPE;
+        case TYPE:
+            if (flush == Z_BLOCK || flush == Z_TREES) goto inf_leave;
+        case TYPEDO:
+            if (state->last) {
+                BYTEBITS();
+                state->mode = CHECK;
+                break;
+            }
+            NEEDBITS(3);
+            state->last = BITS(1);
+            DROPBITS(1);
+            switch (BITS(2)) {
+            case 0:                             /* stored block */
+                Tracev((stderr, "inflate:     stored block%s\n",
+                        state->last ? " (last)" : ""));
+                state->mode = STORED;
+                break;
+            case 1:                             /* fixed block */
+                fixedtables(state);
+                Tracev((stderr, "inflate:     fixed codes block%s\n",
+                        state->last ? " (last)" : ""));
+                state->mode = LEN_;             /* decode codes */
+                if (flush == Z_TREES) {
+                    DROPBITS(2);
+                    goto inf_leave;
+                }
+                break;
+            case 2:                             /* dynamic block */
+                Tracev((stderr, "inflate:     dynamic codes block%s\n",
+                        state->last ? " (last)" : ""));
+                state->mode = TABLE;
+                break;
+            case 3:
+                strm->msg = (char *)"invalid block type";
+                state->mode = BAD;
+            }
+            DROPBITS(2);
+            break;
+        case STORED:
+            BYTEBITS();                         /* go to byte boundary */
+            NEEDBITS(32);
+            if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) {
+                strm->msg = (char *)"invalid stored block lengths";
+                state->mode = BAD;
+                break;
+            }
+            state->length = (unsigned)hold & 0xffff;
+            Tracev((stderr, "inflate:       stored length %u\n",
+                    state->length));
+            INITBITS();
+            state->mode = COPY_;
+            if (flush == Z_TREES) goto inf_leave;
+        case COPY_:
+            state->mode = COPY;
+        case COPY:
+            copy = state->length;
+            if (copy) {
+                if (copy > have) copy = have;
+                if (copy > left) copy = left;
+                if (copy == 0) goto inf_leave;
+                zmemcpy(put, next, copy);
+                have -= copy;
+                next += copy;
+                left -= copy;
+                put += copy;
+                state->length -= copy;
+                break;
+            }
+            Tracev((stderr, "inflate:       stored end\n"));
+            state->mode = TYPE;
+            break;
+        case TABLE:
+            NEEDBITS(14);
+            state->nlen = BITS(5) + 257;
+            DROPBITS(5);
+            state->ndist = BITS(5) + 1;
+            DROPBITS(5);
+            state->ncode = BITS(4) + 4;
+            DROPBITS(4);
+#ifndef PKZIP_BUG_WORKAROUND
+            if (state->nlen > 286 || state->ndist > 30) {
+                strm->msg = (char *)"too many length or distance symbols";
+                state->mode = BAD;
+                break;
+            }
+#endif
+            Tracev((stderr, "inflate:       table sizes ok\n"));
+            state->have = 0;
+            state->mode = LENLENS;
+        case LENLENS:
+            while (state->have < state->ncode) {
+                NEEDBITS(3);
+                state->lens[order[state->have++]] = (unsigned short)BITS(3);
+                DROPBITS(3);
+            }
+            while (state->have < 19)
+                state->lens[order[state->have++]] = 0;
+            state->next = state->codes;
+            state->lencode = (const code FAR *)(state->next);
+            state->lenbits = 7;
+            ret = inflate_table(CODES, state->lens, 19, &(state->next),
+                                &(state->lenbits), state->work);
+            if (ret) {
+                strm->msg = (char *)"invalid code lengths set";
+                state->mode = BAD;
+                break;
+            }
+            Tracev((stderr, "inflate:       code lengths ok\n"));
+            state->have = 0;
+            state->mode = CODELENS;
+        case CODELENS:
+            while (state->have < state->nlen + state->ndist) {
+                for (;;) {
+                    here = state->lencode[BITS(state->lenbits)];
+                    if ((unsigned)(here.bits) <= bits) break;
+                    PULLBYTE();
+                }
+                if (here.val < 16) {
+                    DROPBITS(here.bits);
+                    state->lens[state->have++] = here.val;
+                }
+                else {
+                    if (here.val == 16) {
+                        NEEDBITS(here.bits + 2);
+                        DROPBITS(here.bits);
+                        if (state->have == 0) {
+                            strm->msg = (char *)"invalid bit length repeat";
+                            state->mode = BAD;
+                            break;
+                        }
+                        len = state->lens[state->have - 1];
+                        copy = 3 + BITS(2);
+                        DROPBITS(2);
+                    }
+                    else if (here.val == 17) {
+                        NEEDBITS(here.bits + 3);
+                        DROPBITS(here.bits);
+                        len = 0;
+                        copy = 3 + BITS(3);
+                        DROPBITS(3);
+                    }
+                    else {
+                        NEEDBITS(here.bits + 7);
+                        DROPBITS(here.bits);
+                        len = 0;
+                        copy = 11 + BITS(7);
+                        DROPBITS(7);
+                    }
+                    if (state->have + copy > state->nlen + state->ndist) {
+                        strm->msg = (char *)"invalid bit length repeat";
+                        state->mode = BAD;
+                        break;
+                    }
+                    while (copy--)
+                        state->lens[state->have++] = (unsigned short)len;
+                }
+            }
+
+            /* handle error breaks in while */
+            if (state->mode == BAD) break;
+
+            /* check for end-of-block code (better have one) */
+            if (state->lens[256] == 0) {
+                strm->msg = (char *)"invalid code -- missing end-of-block";
+                state->mode = BAD;
+                break;
+            }
+
+            /* build code tables -- note: do not change the lenbits or distbits
+               values here (9 and 6) without reading the comments in inftrees.h
+               concerning the ENOUGH constants, which depend on those values */
+            state->next = state->codes;
+            state->lencode = (const code FAR *)(state->next);
+            state->lenbits = 9;
+            ret = inflate_table(LENS, state->lens, state->nlen, &(state->next),
+                                &(state->lenbits), state->work);
+            if (ret) {
+                strm->msg = (char *)"invalid literal/lengths set";
+                state->mode = BAD;
+                break;
+            }
+            state->distcode = (const code FAR *)(state->next);
+            state->distbits = 6;
+            ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist,
+                            &(state->next), &(state->distbits), state->work);
+            if (ret) {
+                strm->msg = (char *)"invalid distances set";
+                state->mode = BAD;
+                break;
+            }
+            Tracev((stderr, "inflate:       codes ok\n"));
+            state->mode = LEN_;
+            if (flush == Z_TREES) goto inf_leave;
+        case LEN_:
+            state->mode = LEN;
+        case LEN:
+            if (have >= 6 && left >= 258) {
+                RESTORE();
+                inflate_fast(strm, out);
+                LOAD();
+                if (state->mode == TYPE)
+                    state->back = -1;
+                break;
+            }
+            state->back = 0;
+            for (;;) {
+                here = state->lencode[BITS(state->lenbits)];
+                if ((unsigned)(here.bits) <= bits) break;
+                PULLBYTE();
+            }
+            if (here.op && (here.op & 0xf0) == 0) {
+                last = here;
+                for (;;) {
+                    here = state->lencode[last.val +
+                            (BITS(last.bits + last.op) >> last.bits)];
+                    if ((unsigned)(last.bits + here.bits) <= bits) break;
+                    PULLBYTE();
+                }
+                DROPBITS(last.bits);
+                state->back += last.bits;
+            }
+            DROPBITS(here.bits);
+            state->back += here.bits;
+            state->length = (unsigned)here.val;
+            if ((int)(here.op) == 0) {
+                Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
+                        "inflate:         literal '%c'\n" :
+                        "inflate:         literal 0x%02x\n", here.val));
+                state->mode = LIT;
+                break;
+            }
+            if (here.op & 32) {
+                Tracevv((stderr, "inflate:         end of block\n"));
+                state->back = -1;
+                state->mode = TYPE;
+                break;
+            }
+            if (here.op & 64) {
+                strm->msg = (char *)"invalid literal/length code";
+                state->mode = BAD;
+                break;
+            }
+            state->extra = (unsigned)(here.op) & 15;
+            state->mode = LENEXT;
+        case LENEXT:
+            if (state->extra) {
+                NEEDBITS(state->extra);
+                state->length += BITS(state->extra);
+                DROPBITS(state->extra);
+                state->back += state->extra;
+            }
+            Tracevv((stderr, "inflate:         length %u\n", state->length));
+            state->was = state->length;
+            state->mode = DIST;
+        case DIST:
+            for (;;) {
+                here = state->distcode[BITS(state->distbits)];
+                if ((unsigned)(here.bits) <= bits) break;
+                PULLBYTE();
+            }
+            if ((here.op & 0xf0) == 0) {
+                last = here;
+                for (;;) {
+                    here = state->distcode[last.val +
+                            (BITS(last.bits + last.op) >> last.bits)];
+                    if ((unsigned)(last.bits + here.bits) <= bits) break;
+                    PULLBYTE();
+                }
+                DROPBITS(last.bits);
+                state->back += last.bits;
+            }
+            DROPBITS(here.bits);
+            state->back += here.bits;
+            if (here.op & 64) {
+                strm->msg = (char *)"invalid distance code";
+                state->mode = BAD;
+                break;
+            }
+            state->offset = (unsigned)here.val;
+            state->extra = (unsigned)(here.op) & 15;
+            state->mode = DISTEXT;
+        case DISTEXT:
+            if (state->extra) {
+                NEEDBITS(state->extra);
+                state->offset += BITS(state->extra);
+                DROPBITS(state->extra);
+                state->back += state->extra;
+            }
+#ifdef INFLATE_STRICT
+            if (state->offset > state->dmax) {
+                strm->msg = (char *)"invalid distance too far back";
+                state->mode = BAD;
+                break;
+            }
+#endif
+            Tracevv((stderr, "inflate:         distance %u\n", state->offset));
+            state->mode = MATCH;
+        case MATCH:
+            if (left == 0) goto inf_leave;
+            copy = out - left;
+            if (state->offset > copy) {         /* copy from window */
+                copy = state->offset - copy;
+                if (copy > state->whave) {
+                    if (state->sane) {
+                        strm->msg = (char *)"invalid distance too far back";
+                        state->mode = BAD;
+                        break;
+                    }
+#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
+                    Trace((stderr, "inflate.c too far\n"));
+                    copy -= state->whave;
+                    if (copy > state->length) copy = state->length;
+                    if (copy > left) copy = left;
+                    left -= copy;
+                    state->length -= copy;
+                    do {
+                        *put++ = 0;
+                    } while (--copy);
+                    if (state->length == 0) state->mode = LEN;
+                    break;
+#endif
+                }
+                if (copy > state->wnext) {
+                    copy -= state->wnext;
+                    from = state->window + (state->wsize - copy);
+                }
+                else
+                    from = state->window + (state->wnext - copy);
+                if (copy > state->length) copy = state->length;
+            }
+            else {                              /* copy from output */
+                from = put - state->offset;
+                copy = state->length;
+            }
+            if (copy > left) copy = left;
+            left -= copy;
+            state->length -= copy;
+            do {
+                *put++ = *from++;
+            } while (--copy);
+            if (state->length == 0) state->mode = LEN;
+            break;
+        case LIT:
+            if (left == 0) goto inf_leave;
+            *put++ = (unsigned char)(state->length);
+            left--;
+            state->mode = LEN;
+            break;
+        case CHECK:
+            if (state->wrap) {
+                NEEDBITS(32);
+                out -= left;
+                strm->total_out += out;
+                state->total += out;
+                if ((state->wrap & 4) && out)
+                    strm->adler = state->check =
+                        UPDATE(state->check, put - out, out);
+                out = left;
+                if ((state->wrap & 4) && (
+#ifdef GUNZIP
+                     state->flags ? hold :
+#endif
+                     ZSWAP32(hold)) != state->check) {
+                    strm->msg = (char *)"incorrect data check";
+                    state->mode = BAD;
+                    break;
+                }
+                INITBITS();
+                Tracev((stderr, "inflate:   check matches trailer\n"));
+            }
+#ifdef GUNZIP
+            state->mode = LENGTH;
+        case LENGTH:
+            if (state->wrap && state->flags) {
+                NEEDBITS(32);
+                if (hold != (state->total & 0xffffffffUL)) {
+                    strm->msg = (char *)"incorrect length check";
+                    state->mode = BAD;
+                    break;
+                }
+                INITBITS();
+                Tracev((stderr, "inflate:   length matches trailer\n"));
+            }
+#endif
+            state->mode = DONE;
+        case DONE:
+            ret = Z_STREAM_END;
+            goto inf_leave;
+        case BAD:
+            ret = Z_DATA_ERROR;
+            goto inf_leave;
+        case MEM:
+            return Z_MEM_ERROR;
+        case SYNC:
+        default:
+            return Z_STREAM_ERROR;
+        }
+
+    /*
+       Return from inflate(), updating the total counts and the check value.
+       If there was no progress during the inflate() call, return a buffer
+       error.  Call updatewindow() to create and/or update the window state.
+       Note: a memory error from inflate() is non-recoverable.
+     */
+  inf_leave:
+    RESTORE();
+    if (state->wsize || (out != strm->avail_out && state->mode < BAD &&
+            (state->mode < CHECK || flush != Z_FINISH)))
+        if (updatewindow(strm, strm->next_out, out - strm->avail_out)) {
+            state->mode = MEM;
+            return Z_MEM_ERROR;
+        }
+    in -= strm->avail_in;
+    out -= strm->avail_out;
+    strm->total_in += in;
+    strm->total_out += out;
+    state->total += out;
+    if ((state->wrap & 4) && out)
+        strm->adler = state->check =
+            UPDATE(state->check, strm->next_out - out, out);
+    strm->data_type = (int)state->bits + (state->last ? 64 : 0) +
+                      (state->mode == TYPE ? 128 : 0) +
+                      (state->mode == LEN_ || state->mode == COPY_ ? 256 : 0);
+    if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK)
+        ret = Z_BUF_ERROR;
+    return ret;
+}
+
+int ZEXPORT inflateEnd(strm)
+z_streamp strm;
+{
+    struct inflate_state FAR *state;
+    if (inflateStateCheck(strm))
+        return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    if (state->window != Z_NULL) ZFREE(strm, state->window);
+    ZFREE(strm, strm->state);
+    strm->state = Z_NULL;
+    Tracev((stderr, "inflate: end\n"));
+    return Z_OK;
+}
+
+int ZEXPORT inflateGetDictionary(strm, dictionary, dictLength)
+z_streamp strm;
+Bytef *dictionary;
+uInt *dictLength;
+{
+    struct inflate_state FAR *state;
+
+    /* check state */
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+
+    /* copy dictionary */
+    if (state->whave && dictionary != Z_NULL) {
+        zmemcpy(dictionary, state->window + state->wnext,
+                state->whave - state->wnext);
+        zmemcpy(dictionary + state->whave - state->wnext,
+                state->window, state->wnext);
+    }
+    if (dictLength != Z_NULL)
+        *dictLength = state->whave;
+    return Z_OK;
+}
+
+int ZEXPORT inflateSetDictionary(strm, dictionary, dictLength)
+z_streamp strm;
+const Bytef *dictionary;
+uInt dictLength;
+{
+    struct inflate_state FAR *state;
+    unsigned long dictid;
+    int ret;
+
+    /* check state */
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    if (state->wrap != 0 && state->mode != DICT)
+        return Z_STREAM_ERROR;
+
+    /* check for correct dictionary identifier */
+    if (state->mode == DICT) {
+        dictid = adler32(0L, Z_NULL, 0);
+        dictid = adler32(dictid, dictionary, dictLength);
+        if (dictid != state->check)
+            return Z_DATA_ERROR;
+    }
+
+    /* copy dictionary to window using updatewindow(), which will amend the
+       existing dictionary if appropriate */
+    ret = updatewindow(strm, dictionary + dictLength, dictLength);
+    if (ret) {
+        state->mode = MEM;
+        return Z_MEM_ERROR;
+    }
+    state->havedict = 1;
+    Tracev((stderr, "inflate:   dictionary set\n"));
+    return Z_OK;
+}
+
+int ZEXPORT inflateGetHeader(strm, head)
+z_streamp strm;
+gz_headerp head;
+{
+    struct inflate_state FAR *state;
+
+    /* check state */
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    if ((state->wrap & 2) == 0) return Z_STREAM_ERROR;
+
+    /* save header structure */
+    state->head = head;
+    head->done = 0;
+    return Z_OK;
+}
+
+/*
+   Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff.  Return when found
+   or when out of input.  When called, *have is the number of pattern bytes
+   found in order so far, in 0..3.  On return *have is updated to the new
+   state.  If on return *have equals four, then the pattern was found and the
+   return value is how many bytes were read including the last byte of the
+   pattern.  If *have is less than four, then the pattern has not been found
+   yet and the return value is len.  In the latter case, syncsearch() can be
+   called again with more data and the *have state.  *have is initialized to
+   zero for the first call.
+ */
+local unsigned syncsearch(have, buf, len)
+unsigned FAR *have;
+const unsigned char FAR *buf;
+unsigned len;
+{
+    unsigned got;
+    unsigned next;
+
+    got = *have;
+    next = 0;
+    while (next < len && got < 4) {
+        if ((int)(buf[next]) == (got < 2 ? 0 : 0xff))
+            got++;
+        else if (buf[next])
+            got = 0;
+        else
+            got = 4 - got;
+        next++;
+    }
+    *have = got;
+    return next;
+}
+
+int ZEXPORT inflateSync(strm)
+z_streamp strm;
+{
+    unsigned len;               /* number of bytes to look at or looked at */
+    unsigned long in, out;      /* temporary to save total_in and total_out */
+    unsigned char buf[4];       /* to restore bit buffer to byte string */
+    struct inflate_state FAR *state;
+
+    /* check parameters */
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR;
+
+    /* if first time, start search in bit buffer */
+    if (state->mode != SYNC) {
+        state->mode = SYNC;
+        state->hold <<= state->bits & 7;
+        state->bits -= state->bits & 7;
+        len = 0;
+        while (state->bits >= 8) {
+            buf[len++] = (unsigned char)(state->hold);
+            state->hold >>= 8;
+            state->bits -= 8;
+        }
+        state->have = 0;
+        syncsearch(&(state->have), buf, len);
+    }
+
+    /* search available input */
+    len = syncsearch(&(state->have), strm->next_in, strm->avail_in);
+    strm->avail_in -= len;
+    strm->next_in += len;
+    strm->total_in += len;
+
+    /* return no joy or set up to restart inflate() on a new block */
+    if (state->have != 4) return Z_DATA_ERROR;
+    in = strm->total_in;  out = strm->total_out;
+    inflateReset(strm);
+    strm->total_in = in;  strm->total_out = out;
+    state->mode = TYPE;
+    return Z_OK;
+}
+
+/*
+   Returns true if inflate is currently at the end of a block generated by
+   Z_SYNC_FLUSH or Z_FULL_FLUSH. This function is used by one PPP
+   implementation to provide an additional safety check. PPP uses
+   Z_SYNC_FLUSH but removes the length bytes of the resulting empty stored
+   block. When decompressing, PPP checks that at the end of input packet,
+   inflate is waiting for these length bytes.
+ */
+int ZEXPORT inflateSyncPoint(strm)
+z_streamp strm;
+{
+    struct inflate_state FAR *state;
+
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    return state->mode == STORED && state->bits == 0;
+}
+
+int ZEXPORT inflateCopy(dest, source)
+z_streamp dest;
+z_streamp source;
+{
+    struct inflate_state FAR *state;
+    struct inflate_state FAR *copy;
+    unsigned char FAR *window;
+    unsigned wsize;
+
+    /* check input */
+    if (inflateStateCheck(source) || dest == Z_NULL)
+        return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)source->state;
+
+    /* allocate space */
+    copy = (struct inflate_state FAR *)
+           ZALLOC(source, 1, sizeof(struct inflate_state));
+    if (copy == Z_NULL) return Z_MEM_ERROR;
+    window = Z_NULL;
+    if (state->window != Z_NULL) {
+        window = (unsigned char FAR *)
+                 ZALLOC(source, 1U << state->wbits, sizeof(unsigned char));
+        if (window == Z_NULL) {
+            ZFREE(source, copy);
+            return Z_MEM_ERROR;
+        }
+    }
+
+    /* copy state */
+    zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream));
+    zmemcpy((voidpf)copy, (voidpf)state, sizeof(struct inflate_state));
+    copy->strm = dest;
+    if (state->lencode >= state->codes &&
+        state->lencode <= state->codes + ENOUGH - 1) {
+        copy->lencode = copy->codes + (state->lencode - state->codes);
+        copy->distcode = copy->codes + (state->distcode - state->codes);
+    }
+    copy->next = copy->codes + (state->next - state->codes);
+    if (window != Z_NULL) {
+        wsize = 1U << state->wbits;
+        zmemcpy(window, state->window, wsize);
+    }
+    copy->window = window;
+    dest->state = (struct internal_state FAR *)copy;
+    return Z_OK;
+}
+
+int ZEXPORT inflateUndermine(strm, subvert)
+z_streamp strm;
+int subvert;
+{
+    struct inflate_state FAR *state;
+
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
+    state->sane = !subvert;
+    return Z_OK;
+#else
+    (void)subvert;
+    state->sane = 1;
+    return Z_DATA_ERROR;
+#endif
+}
+
+int ZEXPORT inflateValidate(strm, check)
+z_streamp strm;
+int check;
+{
+    struct inflate_state FAR *state;
+
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    if (check)
+        state->wrap |= 4;
+    else
+        state->wrap &= ~4;
+    return Z_OK;
+}
+
+long ZEXPORT inflateMark(strm)
+z_streamp strm;
+{
+    struct inflate_state FAR *state;
+
+    if (inflateStateCheck(strm))
+        return -(1L << 16);
+    state = (struct inflate_state FAR *)strm->state;
+    return (long)(((unsigned long)((long)state->back)) << 16) +
+        (state->mode == COPY ? state->length :
+            (state->mode == MATCH ? state->was - state->length : 0));
+}
+
+unsigned long ZEXPORT inflateCodesUsed(strm)
+z_streamp strm;
+{
+    struct inflate_state FAR *state;
+    if (inflateStateCheck(strm)) return (unsigned long)-1;
+    state = (struct inflate_state FAR *)strm->state;
+    return (unsigned long)(state->next - state->codes);
+}
diff --git a/deps/libchdr/deps/zlib-1.2.11/inflate.h b/deps/libchdr/deps/zlib-1.2.11/inflate.h
new file mode 100644
index 00000000..a46cce6b
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/inflate.h
@@ -0,0 +1,125 @@
+/* inflate.h -- internal inflate state definition
+ * Copyright (C) 1995-2016 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+   part of the implementation of the compression library and is
+   subject to change. Applications should only use zlib.h.
+ */
+
+/* define NO_GZIP when compiling if you want to disable gzip header and
+   trailer decoding by inflate().  NO_GZIP would be used to avoid linking in
+   the crc code when it is not needed.  For shared libraries, gzip decoding
+   should be left enabled. */
+#ifndef NO_GZIP
+#  define GUNZIP
+#endif
+
+/* Possible inflate modes between inflate() calls */
+typedef enum {
+    HEAD = 16180,   /* i: waiting for magic header */
+    FLAGS,      /* i: waiting for method and flags (gzip) */
+    TIME,       /* i: waiting for modification time (gzip) */
+    OS,         /* i: waiting for extra flags and operating system (gzip) */
+    EXLEN,      /* i: waiting for extra length (gzip) */
+    EXTRA,      /* i: waiting for extra bytes (gzip) */
+    NAME,       /* i: waiting for end of file name (gzip) */
+    COMMENT,    /* i: waiting for end of comment (gzip) */
+    HCRC,       /* i: waiting for header crc (gzip) */
+    DICTID,     /* i: waiting for dictionary check value */
+    DICT,       /* waiting for inflateSetDictionary() call */
+        TYPE,       /* i: waiting for type bits, including last-flag bit */
+        TYPEDO,     /* i: same, but skip check to exit inflate on new block */
+        STORED,     /* i: waiting for stored size (length and complement) */
+        COPY_,      /* i/o: same as COPY below, but only first time in */
+        COPY,       /* i/o: waiting for input or output to copy stored block */
+        TABLE,      /* i: waiting for dynamic block table lengths */
+        LENLENS,    /* i: waiting for code length code lengths */
+        CODELENS,   /* i: waiting for length/lit and distance code lengths */
+            LEN_,       /* i: same as LEN below, but only first time in */
+            LEN,        /* i: waiting for length/lit/eob code */
+            LENEXT,     /* i: waiting for length extra bits */
+            DIST,       /* i: waiting for distance code */
+            DISTEXT,    /* i: waiting for distance extra bits */
+            MATCH,      /* o: waiting for output space to copy string */
+            LIT,        /* o: waiting for output space to write literal */
+    CHECK,      /* i: waiting for 32-bit check value */
+    LENGTH,     /* i: waiting for 32-bit length (gzip) */
+    DONE,       /* finished check, done -- remain here until reset */
+    BAD,        /* got a data error -- remain here until reset */
+    MEM,        /* got an inflate() memory error -- remain here until reset */
+    SYNC        /* looking for synchronization bytes to restart inflate() */
+} inflate_mode;
+
+/*
+    State transitions between above modes -
+
+    (most modes can go to BAD or MEM on error -- not shown for clarity)
+
+    Process header:
+        HEAD -> (gzip) or (zlib) or (raw)
+        (gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME -> COMMENT ->
+                  HCRC -> TYPE
+        (zlib) -> DICTID or TYPE
+        DICTID -> DICT -> TYPE
+        (raw) -> TYPEDO
+    Read deflate blocks:
+            TYPE -> TYPEDO -> STORED or TABLE or LEN_ or CHECK
+            STORED -> COPY_ -> COPY -> TYPE
+            TABLE -> LENLENS -> CODELENS -> LEN_
+            LEN_ -> LEN
+    Read deflate codes in fixed or dynamic block:
+                LEN -> LENEXT or LIT or TYPE
+                LENEXT -> DIST -> DISTEXT -> MATCH -> LEN
+                LIT -> LEN
+    Process trailer:
+        CHECK -> LENGTH -> DONE
+ */
+
+/* State maintained between inflate() calls -- approximately 7K bytes, not
+   including the allocated sliding window, which is up to 32K bytes. */
+struct inflate_state {
+    z_streamp strm;             /* pointer back to this zlib stream */
+    inflate_mode mode;          /* current inflate mode */
+    int last;                   /* true if processing last block */
+    int wrap;                   /* bit 0 true for zlib, bit 1 true for gzip,
+                                   bit 2 true to validate check value */
+    int havedict;               /* true if dictionary provided */
+    int flags;                  /* gzip header method and flags (0 if zlib) */
+    unsigned dmax;              /* zlib header max distance (INFLATE_STRICT) */
+    unsigned long check;        /* protected copy of check value */
+    unsigned long total;        /* protected copy of output count */
+    gz_headerp head;            /* where to save gzip header information */
+        /* sliding window */
+    unsigned wbits;             /* log base 2 of requested window size */
+    unsigned wsize;             /* window size or zero if not using window */
+    unsigned whave;             /* valid bytes in the window */
+    unsigned wnext;             /* window write index */
+    unsigned char FAR *window;  /* allocated sliding window, if needed */
+        /* bit accumulator */
+    unsigned long hold;         /* input bit accumulator */
+    unsigned bits;              /* number of bits in "in" */
+        /* for string and stored block copying */
+    unsigned length;            /* literal or length of data to copy */
+    unsigned offset;            /* distance back to copy string from */
+        /* for table and code decoding */
+    unsigned extra;             /* extra bits needed */
+        /* fixed and dynamic code tables */
+    code const FAR *lencode;    /* starting table for length/literal codes */
+    code const FAR *distcode;   /* starting table for distance codes */
+    unsigned lenbits;           /* index bits for lencode */
+    unsigned distbits;          /* index bits for distcode */
+        /* dynamic table building */
+    unsigned ncode;             /* number of code length code lengths */
+    unsigned nlen;              /* number of length code lengths */
+    unsigned ndist;             /* number of distance code lengths */
+    unsigned have;              /* number of code lengths in lens[] */
+    code FAR *next;             /* next available space in codes[] */
+    unsigned short lens[320];   /* temporary storage for code lengths */
+    unsigned short work[288];   /* work area for code table building */
+    code codes[ENOUGH];         /* space for code tables */
+    int sane;                   /* if false, allow invalid distance too far */
+    int back;                   /* bits back of last unprocessed length/lit */
+    unsigned was;               /* initial length of match */
+};
diff --git a/deps/libchdr/deps/zlib-1.2.11/inftrees.c b/deps/libchdr/deps/zlib-1.2.11/inftrees.c
new file mode 100644
index 00000000..2ea08fc1
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/inftrees.c
@@ -0,0 +1,304 @@
+/* inftrees.c -- generate Huffman trees for efficient decoding
+ * Copyright (C) 1995-2017 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include "zutil.h"
+#include "inftrees.h"
+
+#define MAXBITS 15
+
+const char inflate_copyright[] =
+   " inflate 1.2.11 Copyright 1995-2017 Mark Adler ";
+/*
+  If you use the zlib library in a product, an acknowledgment is welcome
+  in the documentation of your product. If for some reason you cannot
+  include such an acknowledgment, I would appreciate that you keep this
+  copyright string in the executable of your product.
+ */
+
+/*
+   Build a set of tables to decode the provided canonical Huffman code.
+   The code lengths are lens[0..codes-1].  The result starts at *table,
+   whose indices are 0..2^bits-1.  work is a writable array of at least
+   lens shorts, which is used as a work area.  type is the type of code
+   to be generated, CODES, LENS, or DISTS.  On return, zero is success,
+   -1 is an invalid code, and +1 means that ENOUGH isn't enough.  table
+   on return points to the next available entry's address.  bits is the
+   requested root table index bits, and on return it is the actual root
+   table index bits.  It will differ if the request is greater than the
+   longest code or if it is less than the shortest code.
+ */
+int ZLIB_INTERNAL inflate_table(type, lens, codes, table, bits, work)
+codetype type;
+unsigned short FAR *lens;
+unsigned codes;
+code FAR * FAR *table;
+unsigned FAR *bits;
+unsigned short FAR *work;
+{
+    unsigned len;               /* a code's length in bits */
+    unsigned sym;               /* index of code symbols */
+    unsigned min, max;          /* minimum and maximum code lengths */
+    unsigned root;              /* number of index bits for root table */
+    unsigned curr;              /* number of index bits for current table */
+    unsigned drop;              /* code bits to drop for sub-table */
+    int left;                   /* number of prefix codes available */
+    unsigned used;              /* code entries in table used */
+    unsigned huff;              /* Huffman code */
+    unsigned incr;              /* for incrementing code, index */
+    unsigned fill;              /* index for replicating entries */
+    unsigned low;               /* low bits for current root entry */
+    unsigned mask;              /* mask for low root bits */
+    code here;                  /* table entry for duplication */
+    code FAR *next;             /* next available space in table */
+    const unsigned short FAR *base;     /* base value table to use */
+    const unsigned short FAR *extra;    /* extra bits table to use */
+    unsigned match;             /* use base and extra for symbol >= match */
+    unsigned short count[MAXBITS+1];    /* number of codes of each length */
+    unsigned short offs[MAXBITS+1];     /* offsets in table for each length */
+    static const unsigned short lbase[31] = { /* Length codes 257..285 base */
+        3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
+        35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
+    static const unsigned short lext[31] = { /* Length codes 257..285 extra */
+        16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
+        19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 77, 202};
+    static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
+        1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
+        257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
+        8193, 12289, 16385, 24577, 0, 0};
+    static const unsigned short dext[32] = { /* Distance codes 0..29 extra */
+        16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22,
+        23, 23, 24, 24, 25, 25, 26, 26, 27, 27,
+        28, 28, 29, 29, 64, 64};
+
+    /*
+       Process a set of code lengths to create a canonical Huffman code.  The
+       code lengths are lens[0..codes-1].  Each length corresponds to the
+       symbols 0..codes-1.  The Huffman code is generated by first sorting the
+       symbols by length from short to long, and retaining the symbol order
+       for codes with equal lengths.  Then the code starts with all zero bits
+       for the first code of the shortest length, and the codes are integer
+       increments for the same length, and zeros are appended as the length
+       increases.  For the deflate format, these bits are stored backwards
+       from their more natural integer increment ordering, and so when the
+       decoding tables are built in the large loop below, the integer codes
+       are incremented backwards.
+
+       This routine assumes, but does not check, that all of the entries in
+       lens[] are in the range 0..MAXBITS.  The caller must assure this.
+       1..MAXBITS is interpreted as that code length.  zero means that that
+       symbol does not occur in this code.
+
+       The codes are sorted by computing a count of codes for each length,
+       creating from that a table of starting indices for each length in the
+       sorted table, and then entering the symbols in order in the sorted
+       table.  The sorted table is work[], with that space being provided by
+       the caller.
+
+       The length counts are used for other purposes as well, i.e. finding
+       the minimum and maximum length codes, determining if there are any
+       codes at all, checking for a valid set of lengths, and looking ahead
+       at length counts to determine sub-table sizes when building the
+       decoding tables.
+     */
+
+    /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */
+    for (len = 0; len <= MAXBITS; len++)
+        count[len] = 0;
+    for (sym = 0; sym < codes; sym++)
+        count[lens[sym]]++;
+
+    /* bound code lengths, force root to be within code lengths */
+    root = *bits;
+    for (max = MAXBITS; max >= 1; max--)
+        if (count[max] != 0) break;
+    if (root > max) root = max;
+    if (max == 0) {                     /* no symbols to code at all */
+        here.op = (unsigned char)64;    /* invalid code marker */
+        here.bits = (unsigned char)1;
+        here.val = (unsigned short)0;
+        *(*table)++ = here;             /* make a table to force an error */
+        *(*table)++ = here;
+        *bits = 1;
+        return 0;     /* no symbols, but wait for decoding to report error */
+    }
+    for (min = 1; min < max; min++)
+        if (count[min] != 0) break;
+    if (root < min) root = min;
+
+    /* check for an over-subscribed or incomplete set of lengths */
+    left = 1;
+    for (len = 1; len <= MAXBITS; len++) {
+        left <<= 1;
+        left -= count[len];
+        if (left < 0) return -1;        /* over-subscribed */
+    }
+    if (left > 0 && (type == CODES || max != 1))
+        return -1;                      /* incomplete set */
+
+    /* generate offsets into symbol table for each length for sorting */
+    offs[1] = 0;
+    for (len = 1; len < MAXBITS; len++)
+        offs[len + 1] = offs[len] + count[len];
+
+    /* sort symbols by length, by symbol order within each length */
+    for (sym = 0; sym < codes; sym++)
+        if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym;
+
+    /*
+       Create and fill in decoding tables.  In this loop, the table being
+       filled is at next and has curr index bits.  The code being used is huff
+       with length len.  That code is converted to an index by dropping drop
+       bits off of the bottom.  For codes where len is less than drop + curr,
+       those top drop + curr - len bits are incremented through all values to
+       fill the table with replicated entries.
+
+       root is the number of index bits for the root table.  When len exceeds
+       root, sub-tables are created pointed to by the root entry with an index
+       of the low root bits of huff.  This is saved in low to check for when a
+       new sub-table should be started.  drop is zero when the root table is
+       being filled, and drop is root when sub-tables are being filled.
+
+       When a new sub-table is needed, it is necessary to look ahead in the
+       code lengths to determine what size sub-table is needed.  The length
+       counts are used for this, and so count[] is decremented as codes are
+       entered in the tables.
+
+       used keeps track of how many table entries have been allocated from the
+       provided *table space.  It is checked for LENS and DIST tables against
+       the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in
+       the initial root table size constants.  See the comments in inftrees.h
+       for more information.
+
+       sym increments through all symbols, and the loop terminates when
+       all codes of length max, i.e. all codes, have been processed.  This
+       routine permits incomplete codes, so another loop after this one fills
+       in the rest of the decoding tables with invalid code markers.
+     */
+
+    /* set up for code type */
+    switch (type) {
+    case CODES:
+        base = extra = work;    /* dummy value--not used */
+        match = 20;
+        break;
+    case LENS:
+        base = lbase;
+        extra = lext;
+        match = 257;
+        break;
+    default:    /* DISTS */
+        base = dbase;
+        extra = dext;
+        match = 0;
+    }
+
+    /* initialize state for loop */
+    huff = 0;                   /* starting code */
+    sym = 0;                    /* starting code symbol */
+    len = min;                  /* starting code length */
+    next = *table;              /* current table to fill in */
+    curr = root;                /* current table index bits */
+    drop = 0;                   /* current bits to drop from code for index */
+    low = (unsigned)(-1);       /* trigger new sub-table when len > root */
+    used = 1U << root;          /* use root table entries */
+    mask = used - 1;            /* mask for comparing low */
+
+    /* check available table space */
+    if ((type == LENS && used > ENOUGH_LENS) ||
+        (type == DISTS && used > ENOUGH_DISTS))
+        return 1;
+
+    /* process all codes and make table entries */
+    for (;;) {
+        /* create table entry */
+        here.bits = (unsigned char)(len - drop);
+        if (work[sym] + 1U < match) {
+            here.op = (unsigned char)0;
+            here.val = work[sym];
+        }
+        else if (work[sym] >= match) {
+            here.op = (unsigned char)(extra[work[sym] - match]);
+            here.val = base[work[sym] - match];
+        }
+        else {
+            here.op = (unsigned char)(32 + 64);         /* end of block */
+            here.val = 0;
+        }
+
+        /* replicate for those indices with low len bits equal to huff */
+        incr = 1U << (len - drop);
+        fill = 1U << curr;
+        min = fill;                 /* save offset to next table */
+        do {
+            fill -= incr;
+            next[(huff >> drop) + fill] = here;
+        } while (fill != 0);
+
+        /* backwards increment the len-bit code huff */
+        incr = 1U << (len - 1);
+        while (huff & incr)
+            incr >>= 1;
+        if (incr != 0) {
+            huff &= incr - 1;
+            huff += incr;
+        }
+        else
+            huff = 0;
+
+        /* go to next symbol, update count, len */
+        sym++;
+        if (--(count[len]) == 0) {
+            if (len == max) break;
+            len = lens[work[sym]];
+        }
+
+        /* create new sub-table if needed */
+        if (len > root && (huff & mask) != low) {
+            /* if first time, transition to sub-tables */
+            if (drop == 0)
+                drop = root;
+
+            /* increment past last table */
+            next += min;            /* here min is 1 << curr */
+
+            /* determine length of next table */
+            curr = len - drop;
+            left = (int)(1 << curr);
+            while (curr + drop < max) {
+                left -= count[curr + drop];
+                if (left <= 0) break;
+                curr++;
+                left <<= 1;
+            }
+
+            /* check for enough space */
+            used += 1U << curr;
+            if ((type == LENS && used > ENOUGH_LENS) ||
+                (type == DISTS && used > ENOUGH_DISTS))
+                return 1;
+
+            /* point entry in root table to sub-table */
+            low = huff & mask;
+            (*table)[low].op = (unsigned char)curr;
+            (*table)[low].bits = (unsigned char)root;
+            (*table)[low].val = (unsigned short)(next - *table);
+        }
+    }
+
+    /* fill in remaining table entry if code is incomplete (guaranteed to have
+       at most one remaining entry, since if the code is incomplete, the
+       maximum code length that was allowed to get this far is one bit) */
+    if (huff != 0) {
+        here.op = (unsigned char)64;            /* invalid code marker */
+        here.bits = (unsigned char)(len - drop);
+        here.val = (unsigned short)0;
+        next[huff] = here;
+    }
+
+    /* set return parameters */
+    *table += used;
+    *bits = root;
+    return 0;
+}
diff --git a/deps/libchdr/deps/zlib-1.2.11/inftrees.h b/deps/libchdr/deps/zlib-1.2.11/inftrees.h
new file mode 100644
index 00000000..baa53a0b
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/inftrees.h
@@ -0,0 +1,62 @@
+/* inftrees.h -- header to use inftrees.c
+ * Copyright (C) 1995-2005, 2010 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+   part of the implementation of the compression library and is
+   subject to change. Applications should only use zlib.h.
+ */
+
+/* Structure for decoding tables.  Each entry provides either the
+   information needed to do the operation requested by the code that
+   indexed that table entry, or it provides a pointer to another
+   table that indexes more bits of the code.  op indicates whether
+   the entry is a pointer to another table, a literal, a length or
+   distance, an end-of-block, or an invalid code.  For a table
+   pointer, the low four bits of op is the number of index bits of
+   that table.  For a length or distance, the low four bits of op
+   is the number of extra bits to get after the code.  bits is
+   the number of bits in this code or part of the code to drop off
+   of the bit buffer.  val is the actual byte to output in the case
+   of a literal, the base length or distance, or the offset from
+   the current table to the next table.  Each entry is four bytes. */
+typedef struct {
+    unsigned char op;           /* operation, extra bits, table bits */
+    unsigned char bits;         /* bits in this part of the code */
+    unsigned short val;         /* offset in table or code value */
+} code;
+
+/* op values as set by inflate_table():
+    00000000 - literal
+    0000tttt - table link, tttt != 0 is the number of table index bits
+    0001eeee - length or distance, eeee is the number of extra bits
+    01100000 - end of block
+    01000000 - invalid code
+ */
+
+/* Maximum size of the dynamic table.  The maximum number of code structures is
+   1444, which is the sum of 852 for literal/length codes and 592 for distance
+   codes.  These values were found by exhaustive searches using the program
+   examples/enough.c found in the zlib distribtution.  The arguments to that
+   program are the number of symbols, the initial root table size, and the
+   maximum bit length of a code.  "enough 286 9 15" for literal/length codes
+   returns returns 852, and "enough 30 6 15" for distance codes returns 592.
+   The initial root table size (9 or 6) is found in the fifth argument of the
+   inflate_table() calls in inflate.c and infback.c.  If the root table size is
+   changed, then these maximum sizes would be need to be recalculated and
+   updated. */
+#define ENOUGH_LENS 852
+#define ENOUGH_DISTS 592
+#define ENOUGH (ENOUGH_LENS+ENOUGH_DISTS)
+
+/* Type of code to build for inflate_table() */
+typedef enum {
+    CODES,
+    LENS,
+    DISTS
+} codetype;
+
+int ZLIB_INTERNAL inflate_table OF((codetype type, unsigned short FAR *lens,
+                             unsigned codes, code FAR * FAR *table,
+                             unsigned FAR *bits, unsigned short FAR *work));
diff --git a/deps/libchdr/deps/zlib-1.2.11/make_vms.com b/deps/libchdr/deps/zlib-1.2.11/make_vms.com
new file mode 100644
index 00000000..65e9d0cb
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/make_vms.com
@@ -0,0 +1,867 @@
+$! make libz under VMS written by
+$! Martin P.J. Zinser
+$!
+$! In case of problems with the install you might contact me at
+$! zinser@zinser.no-ip.info(preferred) or
+$! martin.zinser@eurexchange.com (work)
+$!
+$! Make procedure history for Zlib
+$!
+$!------------------------------------------------------------------------------
+$! Version history
+$! 0.01 20060120 First version to receive a number
+$! 0.02 20061008 Adapt to new Makefile.in
+$! 0.03 20091224 Add support for large file check
+$! 0.04 20100110 Add new gzclose, gzlib, gzread, gzwrite
+$! 0.05 20100221 Exchange zlibdefs.h by zconf.h.in
+$! 0.06 20120111 Fix missing amiss_err, update zconf_h.in, fix new exmples
+$!               subdir path, update module search in makefile.in
+$! 0.07 20120115 Triggered by work done by Alexey Chupahin completly redesigned
+$!               shared image creation
+$! 0.08 20120219 Make it work on VAX again, pre-load missing symbols to shared
+$!               image
+$! 0.09 20120305 SMS.  P1 sets builder ("MMK", "MMS", " " (built-in)).
+$!               "" -> automatic, preference: MMK, MMS, built-in.
+$!
+$ on error then goto err_exit
+$!
+$ true  = 1
+$ false = 0
+$ tmpnam = "temp_" + f$getjpi("","pid")
+$ tt = tmpnam + ".txt"
+$ tc = tmpnam + ".c"
+$ th = tmpnam + ".h"
+$ define/nolog tconfig 'th'
+$ its_decc = false
+$ its_vaxc = false
+$ its_gnuc = false
+$ s_case   = False
+$!
+$! Setup variables holding "config" information
+$!
+$ Make    = "''p1'"
+$ name     = "Zlib"
+$ version  = "?.?.?"
+$ v_string = "ZLIB_VERSION"
+$ v_file   = "zlib.h"
+$ ccopt   = "/include = []"
+$ lopts   = ""
+$ dnsrl   = ""
+$ aconf_in_file = "zconf.h.in#zconf.h_in#zconf_h.in"
+$ conf_check_string = ""
+$ linkonly = false
+$ optfile  = name + ".opt"
+$ mapfile  = name + ".map"
+$ libdefs  = ""
+$ vax      = f$getsyi("HW_MODEL").lt.1024
+$ axp      = f$getsyi("HW_MODEL").ge.1024 .and. f$getsyi("HW_MODEL").lt.4096
+$ ia64     = f$getsyi("HW_MODEL").ge.4096
+$!
+$! 2012-03-05 SMS.
+$! Why is this needed?  And if it is needed, why not simply ".not. vax"?
+$!
+$!!! if axp .or. ia64 then  set proc/parse=extended
+$!
+$ whoami = f$parse(f$environment("Procedure"),,,,"NO_CONCEAL")
+$ mydef  = F$parse(whoami,,,"DEVICE")
+$ mydir  = f$parse(whoami,,,"DIRECTORY") - "]["
+$ myproc = f$parse(whoami,,,"Name") + f$parse(whoami,,,"type")
+$!
+$! Check for MMK/MMS
+$!
+$ if (Make .eqs. "")
+$ then
+$   If F$Search ("Sys$System:MMS.EXE") .nes. "" Then Make = "MMS"
+$   If F$Type (MMK) .eqs. "STRING" Then Make = "MMK"
+$ else
+$   Make = f$edit( Make, "trim")
+$ endif
+$!
+$ gosub find_version
+$!
+$  open/write topt tmp.opt
+$  open/write optf 'optfile'
+$!
+$ gosub check_opts
+$!
+$! Look for the compiler used
+$!
+$ gosub check_compiler
+$ close topt
+$ close optf
+$!
+$ if its_decc
+$ then
+$   ccopt = "/prefix=all" + ccopt
+$   if f$trnlnm("SYS") .eqs. ""
+$   then
+$     if axp
+$     then
+$       define sys sys$library:
+$     else
+$       ccopt = "/decc" + ccopt
+$       define sys decc$library_include:
+$     endif
+$   endif
+$!
+$! 2012-03-05 SMS.
+$! Why /NAMES = AS_IS?  Why not simply ".not. vax"?  And why not on VAX?
+$!
+$   if axp .or. ia64
+$   then
+$       ccopt = ccopt + "/name=as_is/opt=(inline=speed)"
+$       s_case = true
+$   endif
+$ endif
+$ if its_vaxc .or. its_gnuc
+$ then
+$    if f$trnlnm("SYS").eqs."" then define sys sys$library:
+$ endif
+$!
+$! Build a fake configure input header
+$!
+$ open/write conf_hin config.hin
+$ write conf_hin "#undef _LARGEFILE64_SOURCE"
+$ close conf_hin
+$!
+$!
+$ i = 0
+$FIND_ACONF:
+$ fname = f$element(i,"#",aconf_in_file)
+$ if fname .eqs. "#" then goto AMISS_ERR
+$ if f$search(fname) .eqs. ""
+$ then
+$   i = i + 1
+$   goto find_aconf
+$ endif
+$ open/read/err=aconf_err aconf_in 'fname'
+$ open/write aconf zconf.h
+$ACONF_LOOP:
+$ read/end_of_file=aconf_exit aconf_in line
+$ work = f$edit(line, "compress,trim")
+$ if f$extract(0,6,work) .nes. "#undef"
+$ then
+$   if f$extract(0,12,work) .nes. "#cmakedefine"
+$   then
+$       write aconf line
+$   endif
+$ else
+$   cdef = f$element(1," ",work)
+$   gosub check_config
+$ endif
+$ goto aconf_loop
+$ACONF_EXIT:
+$ write aconf ""
+$ write aconf "/* VMS specifics added by make_vms.com: */"
+$ write aconf "#define VMS 1"
+$ write aconf "#include <unistd.h>"
+$ write aconf "#include <unixio.h>"
+$ write aconf "#ifdef _LARGEFILE"
+$ write aconf "# define off64_t __off64_t"
+$ write aconf "# define fopen64 fopen"
+$ write aconf "# define fseeko64 fseeko"
+$ write aconf "# define lseek64 lseek"
+$ write aconf "# define ftello64 ftell"
+$ write aconf "#endif"
+$ write aconf "#if !defined( __VAX) && (__CRTL_VER >= 70312000)"
+$ write aconf "# define HAVE_VSNPRINTF"
+$ write aconf "#endif"
+$ close aconf_in
+$ close aconf
+$ if f$search("''th'") .nes. "" then delete 'th';*
+$! Build the thing plain or with mms
+$!
+$ write sys$output "Compiling Zlib sources ..."
+$ if make.eqs.""
+$ then
+$   if (f$search( "example.obj;*") .nes. "") then delete example.obj;*
+$   if (f$search( "minigzip.obj;*") .nes. "") then delete minigzip.obj;*
+$   CALL MAKE adler32.OBJ "CC ''CCOPT' adler32" -
+                adler32.c zlib.h zconf.h
+$   CALL MAKE compress.OBJ "CC ''CCOPT' compress" -
+                compress.c zlib.h zconf.h
+$   CALL MAKE crc32.OBJ "CC ''CCOPT' crc32" -
+                crc32.c zlib.h zconf.h
+$   CALL MAKE deflate.OBJ "CC ''CCOPT' deflate" -
+                deflate.c deflate.h zutil.h zlib.h zconf.h
+$   CALL MAKE gzclose.OBJ "CC ''CCOPT' gzclose" -
+                gzclose.c zutil.h zlib.h zconf.h
+$   CALL MAKE gzlib.OBJ "CC ''CCOPT' gzlib" -
+                gzlib.c zutil.h zlib.h zconf.h
+$   CALL MAKE gzread.OBJ "CC ''CCOPT' gzread" -
+                gzread.c zutil.h zlib.h zconf.h
+$   CALL MAKE gzwrite.OBJ "CC ''CCOPT' gzwrite" -
+                gzwrite.c zutil.h zlib.h zconf.h
+$   CALL MAKE infback.OBJ "CC ''CCOPT' infback" -
+                infback.c zutil.h inftrees.h inflate.h inffast.h inffixed.h
+$   CALL MAKE inffast.OBJ "CC ''CCOPT' inffast" -
+                inffast.c zutil.h zlib.h zconf.h inffast.h
+$   CALL MAKE inflate.OBJ "CC ''CCOPT' inflate" -
+                inflate.c zutil.h zlib.h zconf.h infblock.h
+$   CALL MAKE inftrees.OBJ "CC ''CCOPT' inftrees" -
+                inftrees.c zutil.h zlib.h zconf.h inftrees.h
+$   CALL MAKE trees.OBJ "CC ''CCOPT' trees" -
+                trees.c deflate.h zutil.h zlib.h zconf.h
+$   CALL MAKE uncompr.OBJ "CC ''CCOPT' uncompr" -
+                uncompr.c zlib.h zconf.h
+$   CALL MAKE zutil.OBJ "CC ''CCOPT' zutil" -
+                zutil.c zutil.h zlib.h zconf.h
+$   write sys$output "Building Zlib ..."
+$   CALL MAKE libz.OLB "lib/crea libz.olb *.obj" *.OBJ
+$   write sys$output "Building example..."
+$   CALL MAKE example.OBJ "CC ''CCOPT' [.test]example" -
+                [.test]example.c zlib.h zconf.h
+$   call make example.exe "LINK example,libz.olb/lib" example.obj libz.olb
+$   write sys$output "Building minigzip..."
+$   CALL MAKE minigzip.OBJ "CC ''CCOPT' [.test]minigzip" -
+              [.test]minigzip.c zlib.h zconf.h
+$   call make minigzip.exe -
+              "LINK minigzip,libz.olb/lib" -
+              minigzip.obj libz.olb
+$ else
+$   gosub crea_mms
+$   write sys$output "Make ''name' ''version' with ''Make' "
+$   'make'
+$ endif
+$!
+$! Create shareable image
+$!
+$ gosub crea_olist
+$ write sys$output "Creating libzshr.exe"
+$ call map_2_shopt 'mapfile' 'optfile'
+$ LINK_'lopts'/SHARE=libzshr.exe modules.opt/opt,'optfile'/opt
+$ write sys$output "Zlib build completed"
+$ delete/nolog tmp.opt;*
+$ exit
+$AMISS_ERR:
+$ write sys$output "No source for config.hin found."
+$ write sys$output "Tried any of ''aconf_in_file'"
+$ goto err_exit
+$CC_ERR:
+$ write sys$output "C compiler required to build ''name'"
+$ goto err_exit
+$ERR_EXIT:
+$ set message/facil/ident/sever/text
+$ close/nolog optf
+$ close/nolog topt
+$ close/nolog aconf_in
+$ close/nolog aconf
+$ close/nolog out
+$ close/nolog min
+$ close/nolog mod
+$ close/nolog h_in
+$ write sys$output "Exiting..."
+$ exit 2
+$!
+$!
+$MAKE: SUBROUTINE   !SUBROUTINE TO CHECK DEPENDENCIES
+$ V = 'F$Verify(0)
+$! P1 = What we are trying to make
+$! P2 = Command to make it
+$! P3 - P8  What it depends on
+$
+$ If F$Search(P1) .Eqs. "" Then Goto Makeit
+$ Time = F$CvTime(F$File(P1,"RDT"))
+$arg=3
+$Loop:
+$       Argument = P'arg
+$       If Argument .Eqs. "" Then Goto Exit
+$       El=0
+$Loop2:
+$       File = F$Element(El," ",Argument)
+$       If File .Eqs. " " Then Goto Endl
+$       AFile = ""
+$Loop3:
+$       OFile = AFile
+$       AFile = F$Search(File)
+$       If AFile .Eqs. "" .Or. AFile .Eqs. OFile Then Goto NextEl
+$       If F$CvTime(F$File(AFile,"RDT")) .Ges. Time Then Goto Makeit
+$       Goto Loop3
+$NextEL:
+$       El = El + 1
+$       Goto Loop2
+$EndL:
+$ arg=arg+1
+$ If arg .Le. 8 Then Goto Loop
+$ Goto Exit
+$
+$Makeit:
+$ VV=F$VERIFY(0)
+$ write sys$output P2
+$ 'P2
+$ VV='F$Verify(VV)
+$Exit:
+$ If V Then Set Verify
+$ENDSUBROUTINE
+$!------------------------------------------------------------------------------
+$!
+$! Check command line options and set symbols accordingly
+$!
+$!------------------------------------------------------------------------------
+$! Version history
+$! 0.01 20041206 First version to receive a number
+$! 0.02 20060126 Add new "HELP" target
+$ CHECK_OPTS:
+$ i = 1
+$ OPT_LOOP:
+$ if i .lt. 9
+$ then
+$   cparm = f$edit(p'i',"upcase")
+$!
+$! Check if parameter actually contains something
+$!
+$   if f$edit(cparm,"trim") .nes. ""
+$   then
+$     if cparm .eqs. "DEBUG"
+$     then
+$       ccopt = ccopt + "/noopt/deb"
+$       lopts = lopts + "/deb"
+$     endif
+$     if f$locate("CCOPT=",cparm) .lt. f$length(cparm)
+$     then
+$       start = f$locate("=",cparm) + 1
+$       len   = f$length(cparm) - start
+$       ccopt = ccopt + f$extract(start,len,cparm)
+$       if f$locate("AS_IS",f$edit(ccopt,"UPCASE")) .lt. f$length(ccopt) -
+          then s_case = true
+$     endif
+$     if cparm .eqs. "LINK" then linkonly = true
+$     if f$locate("LOPTS=",cparm) .lt. f$length(cparm)
+$     then
+$       start = f$locate("=",cparm) + 1
+$       len   = f$length(cparm) - start
+$       lopts = lopts + f$extract(start,len,cparm)
+$     endif
+$     if f$locate("CC=",cparm) .lt. f$length(cparm)
+$     then
+$       start  = f$locate("=",cparm) + 1
+$       len    = f$length(cparm) - start
+$       cc_com = f$extract(start,len,cparm)
+        if (cc_com .nes. "DECC") .and. -
+           (cc_com .nes. "VAXC") .and. -
+           (cc_com .nes. "GNUC")
+$       then
+$         write sys$output "Unsupported compiler choice ''cc_com' ignored"
+$         write sys$output "Use DECC, VAXC, or GNUC instead"
+$       else
+$         if cc_com .eqs. "DECC" then its_decc = true
+$         if cc_com .eqs. "VAXC" then its_vaxc = true
+$         if cc_com .eqs. "GNUC" then its_gnuc = true
+$       endif
+$     endif
+$     if f$locate("MAKE=",cparm) .lt. f$length(cparm)
+$     then
+$       start  = f$locate("=",cparm) + 1
+$       len    = f$length(cparm) - start
+$       mmks = f$extract(start,len,cparm)
+$       if (mmks .eqs. "MMK") .or. (mmks .eqs. "MMS")
+$       then
+$         make = mmks
+$       else
+$         write sys$output "Unsupported make choice ''mmks' ignored"
+$         write sys$output "Use MMK or MMS instead"
+$       endif
+$     endif
+$     if cparm .eqs. "HELP" then gosub bhelp
+$   endif
+$   i = i + 1
+$   goto opt_loop
+$ endif
+$ return
+$!------------------------------------------------------------------------------
+$!
+$! Look for the compiler used
+$!
+$! Version history
+$! 0.01 20040223 First version to receive a number
+$! 0.02 20040229 Save/set value of decc$no_rooted_search_lists
+$! 0.03 20060202 Extend handling of GNU C
+$! 0.04 20090402 Compaq -> hp
+$CHECK_COMPILER:
+$ if (.not. (its_decc .or. its_vaxc .or. its_gnuc))
+$ then
+$   its_decc = (f$search("SYS$SYSTEM:DECC$COMPILER.EXE") .nes. "")
+$   its_vaxc = .not. its_decc .and. (F$Search("SYS$System:VAXC.Exe") .nes. "")
+$   its_gnuc = .not. (its_decc .or. its_vaxc) .and. (f$trnlnm("gnu_cc") .nes. "")
+$ endif
+$!
+$! Exit if no compiler available
+$!
+$ if (.not. (its_decc .or. its_vaxc .or. its_gnuc))
+$ then goto CC_ERR
+$ else
+$   if its_decc
+$   then
+$     write sys$output "CC compiler check ... hp C"
+$     if f$trnlnm("decc$no_rooted_search_lists") .nes. ""
+$     then
+$       dnrsl = f$trnlnm("decc$no_rooted_search_lists")
+$     endif
+$     define/nolog decc$no_rooted_search_lists 1
+$   else
+$     if its_vaxc then write sys$output "CC compiler check ... VAX C"
+$     if its_gnuc
+$     then
+$         write sys$output "CC compiler check ... GNU C"
+$         if f$trnlnm(topt) then write topt "gnu_cc:[000000]gcclib.olb/lib"
+$         if f$trnlnm(optf) then write optf "gnu_cc:[000000]gcclib.olb/lib"
+$         cc = "gcc"
+$     endif
+$     if f$trnlnm(topt) then write topt "sys$share:vaxcrtl.exe/share"
+$     if f$trnlnm(optf) then write optf "sys$share:vaxcrtl.exe/share"
+$   endif
+$ endif
+$ return
+$!------------------------------------------------------------------------------
+$!
+$! If MMS/MMK are available dump out the descrip.mms if required
+$!
+$CREA_MMS:
+$ write sys$output "Creating descrip.mms..."
+$ create descrip.mms
+$ open/append out descrip.mms
+$ copy sys$input: out
+$ deck
+# descrip.mms: MMS description file for building zlib on VMS
+# written by Martin P.J. Zinser
+# <zinser@zinser.no-ip.info or martin.zinser@eurexchange.com>
+
+OBJS = adler32.obj, compress.obj, crc32.obj, gzclose.obj, gzlib.obj\
+       gzread.obj, gzwrite.obj, uncompr.obj, infback.obj\
+       deflate.obj, trees.obj, zutil.obj, inflate.obj, \
+       inftrees.obj, inffast.obj
+
+$ eod
+$ write out "CFLAGS=", ccopt
+$ write out "LOPTS=", lopts
+$ write out "all : example.exe minigzip.exe libz.olb"
+$ copy sys$input: out
+$ deck
+        @ write sys$output " Example applications available"
+
+libz.olb : libz.olb($(OBJS))
+	@ write sys$output " libz available"
+
+example.exe : example.obj libz.olb
+              link $(LOPTS) example,libz.olb/lib
+
+minigzip.exe : minigzip.obj libz.olb
+              link $(LOPTS) minigzip,libz.olb/lib
+
+clean :
+	delete *.obj;*,libz.olb;*,*.opt;*,*.exe;*
+
+
+# Other dependencies.
+adler32.obj  : adler32.c zutil.h zlib.h zconf.h
+compress.obj : compress.c zlib.h zconf.h
+crc32.obj    : crc32.c zutil.h zlib.h zconf.h
+deflate.obj  : deflate.c deflate.h zutil.h zlib.h zconf.h
+example.obj  : [.test]example.c zlib.h zconf.h
+gzclose.obj  : gzclose.c zutil.h zlib.h zconf.h
+gzlib.obj    : gzlib.c zutil.h zlib.h zconf.h
+gzread.obj   : gzread.c zutil.h zlib.h zconf.h
+gzwrite.obj  : gzwrite.c zutil.h zlib.h zconf.h
+inffast.obj  : inffast.c zutil.h zlib.h zconf.h inftrees.h inffast.h
+inflate.obj  : inflate.c zutil.h zlib.h zconf.h
+inftrees.obj : inftrees.c zutil.h zlib.h zconf.h inftrees.h
+minigzip.obj : [.test]minigzip.c zlib.h zconf.h
+trees.obj    : trees.c deflate.h zutil.h zlib.h zconf.h
+uncompr.obj  : uncompr.c zlib.h zconf.h
+zutil.obj    : zutil.c zutil.h zlib.h zconf.h
+infback.obj  : infback.c zutil.h inftrees.h inflate.h inffast.h inffixed.h
+$ eod
+$ close out
+$ return
+$!------------------------------------------------------------------------------
+$!
+$! Read list of core library sources from makefile.in and create options
+$! needed to build shareable image
+$!
+$CREA_OLIST:
+$ open/read min makefile.in
+$ open/write mod modules.opt
+$ src_check_list = "OBJZ =#OBJG ="
+$MRLOOP:
+$ read/end=mrdone min rec
+$ i = 0
+$SRC_CHECK_LOOP:
+$ src_check = f$element(i, "#", src_check_list)
+$ i = i+1
+$ if src_check .eqs. "#" then goto mrloop
+$ if (f$extract(0,6,rec) .nes. src_check) then goto src_check_loop
+$ rec = rec - src_check
+$ gosub extra_filnam
+$ if (f$element(1,"\",rec) .eqs. "\") then goto mrloop
+$MRSLOOP:
+$ read/end=mrdone min rec
+$ gosub extra_filnam
+$ if (f$element(1,"\",rec) .nes. "\") then goto mrsloop
+$MRDONE:
+$ close min
+$ close mod
+$ return
+$!------------------------------------------------------------------------------
+$!
+$! Take record extracted in crea_olist and split it into single filenames
+$!
+$EXTRA_FILNAM:
+$ myrec = f$edit(rec - "\", "trim,compress")
+$ i = 0
+$FELOOP:
+$ srcfil = f$element(i," ", myrec)
+$ if (srcfil .nes. " ")
+$ then
+$   write mod f$parse(srcfil,,,"NAME"), ".obj"
+$   i = i + 1
+$   goto feloop
+$ endif
+$ return
+$!------------------------------------------------------------------------------
+$!
+$! Find current Zlib version number
+$!
+$FIND_VERSION:
+$ open/read h_in 'v_file'
+$hloop:
+$ read/end=hdone h_in rec
+$ rec = f$edit(rec,"TRIM")
+$ if (f$extract(0,1,rec) .nes. "#") then goto hloop
+$ rec = f$edit(rec - "#", "TRIM")
+$ if f$element(0," ",rec) .nes. "define" then goto hloop
+$ if f$element(1," ",rec) .eqs. v_string
+$ then
+$   version = 'f$element(2," ",rec)'
+$   goto hdone
+$ endif
+$ goto hloop
+$hdone:
+$ close h_in
+$ return
+$!------------------------------------------------------------------------------
+$!
+$CHECK_CONFIG:
+$!
+$ in_ldef = f$locate(cdef,libdefs)
+$ if (in_ldef .lt. f$length(libdefs))
+$ then
+$   write aconf "#define ''cdef' 1"
+$   libdefs = f$extract(0,in_ldef,libdefs) + -
+              f$extract(in_ldef + f$length(cdef) + 1, -
+                        f$length(libdefs) - in_ldef - f$length(cdef) - 1, -
+                        libdefs)
+$ else
+$   if (f$type('cdef') .eqs. "INTEGER")
+$   then
+$     write aconf "#define ''cdef' ", 'cdef'
+$   else
+$     if (f$type('cdef') .eqs. "STRING")
+$     then
+$       write aconf "#define ''cdef' ", """", '''cdef'', """"
+$     else
+$       gosub check_cc_def
+$     endif
+$   endif
+$ endif
+$ return
+$!------------------------------------------------------------------------------
+$!
+$! Check if this is a define relating to the properties of the C/C++
+$! compiler
+$!
+$ CHECK_CC_DEF:
+$ if (cdef .eqs. "_LARGEFILE64_SOURCE")
+$ then
+$   copy sys$input: 'tc'
+$   deck
+#include "tconfig"
+#define _LARGEFILE
+#include <stdio.h>
+
+int main(){
+FILE *fp;
+  fp = fopen("temp.txt","r");
+  fseeko(fp,1,SEEK_SET);
+  fclose(fp);
+}
+
+$   eod
+$   test_inv = false
+$   comm_h = false
+$   gosub cc_prop_check
+$   return
+$ endif
+$ write aconf "/* ", line, " */"
+$ return
+$!------------------------------------------------------------------------------
+$!
+$! Check for properties of C/C++ compiler
+$!
+$! Version history
+$! 0.01 20031020 First version to receive a number
+$! 0.02 20031022 Added logic for defines with value
+$! 0.03 20040309 Make sure local config file gets not deleted
+$! 0.04 20041230 Also write include for configure run
+$! 0.05 20050103 Add processing of "comment defines"
+$CC_PROP_CHECK:
+$ cc_prop = true
+$ is_need = false
+$ is_need = (f$extract(0,4,cdef) .eqs. "NEED") .or. (test_inv .eq. true)
+$ if f$search(th) .eqs. "" then create 'th'
+$ set message/nofac/noident/nosever/notext
+$ on error then continue
+$ cc 'tmpnam'
+$ if .not. ($status)  then cc_prop = false
+$ on error then continue
+$! The headers might lie about the capabilities of the RTL
+$ link 'tmpnam',tmp.opt/opt
+$ if .not. ($status)  then cc_prop = false
+$ set message/fac/ident/sever/text
+$ on error then goto err_exit
+$ delete/nolog 'tmpnam'.*;*/exclude='th'
+$ if (cc_prop .and. .not. is_need) .or. -
+     (.not. cc_prop .and. is_need)
+$ then
+$   write sys$output "Checking for ''cdef'... yes"
+$   if f$type('cdef_val'_yes) .nes. ""
+$   then
+$     if f$type('cdef_val'_yes) .eqs. "INTEGER" -
+         then call write_config f$fao("#define !AS !UL",cdef,'cdef_val'_yes)
+$     if f$type('cdef_val'_yes) .eqs. "STRING" -
+         then call write_config f$fao("#define !AS !AS",cdef,'cdef_val'_yes)
+$   else
+$     call write_config f$fao("#define !AS 1",cdef)
+$   endif
+$   if (cdef .eqs. "HAVE_FSEEKO") .or. (cdef .eqs. "_LARGE_FILES") .or. -
+       (cdef .eqs. "_LARGEFILE64_SOURCE") then -
+      call write_config f$string("#define _LARGEFILE 1")
+$ else
+$   write sys$output "Checking for ''cdef'... no"
+$   if (comm_h)
+$   then
+      call write_config f$fao("/* !AS */",line)
+$   else
+$     if f$type('cdef_val'_no) .nes. ""
+$     then
+$       if f$type('cdef_val'_no) .eqs. "INTEGER" -
+           then call write_config f$fao("#define !AS !UL",cdef,'cdef_val'_no)
+$       if f$type('cdef_val'_no) .eqs. "STRING" -
+           then call write_config f$fao("#define !AS !AS",cdef,'cdef_val'_no)
+$     else
+$       call write_config f$fao("#undef !AS",cdef)
+$     endif
+$   endif
+$ endif
+$ return
+$!------------------------------------------------------------------------------
+$!
+$! Check for properties of C/C++ compiler with multiple result values
+$!
+$! Version history
+$! 0.01 20040127 First version
+$! 0.02 20050103 Reconcile changes from cc_prop up to version 0.05
+$CC_MPROP_CHECK:
+$ cc_prop = true
+$ i    = 1
+$ idel = 1
+$ MT_LOOP:
+$ if f$type(result_'i') .eqs. "STRING"
+$ then
+$   set message/nofac/noident/nosever/notext
+$   on error then continue
+$   cc 'tmpnam'_'i'
+$   if .not. ($status)  then cc_prop = false
+$   on error then continue
+$! The headers might lie about the capabilities of the RTL
+$   link 'tmpnam'_'i',tmp.opt/opt
+$   if .not. ($status)  then cc_prop = false
+$   set message/fac/ident/sever/text
+$   on error then goto err_exit
+$   delete/nolog 'tmpnam'_'i'.*;*
+$   if (cc_prop)
+$   then
+$     write sys$output "Checking for ''cdef'... ", mdef_'i'
+$     if f$type(mdef_'i') .eqs. "INTEGER" -
+         then call write_config f$fao("#define !AS !UL",cdef,mdef_'i')
+$     if f$type('cdef_val'_yes) .eqs. "STRING" -
+         then call write_config f$fao("#define !AS !AS",cdef,mdef_'i')
+$     goto msym_clean
+$   else
+$     i = i + 1
+$     goto mt_loop
+$   endif
+$ endif
+$ write sys$output "Checking for ''cdef'... no"
+$ call write_config f$fao("#undef !AS",cdef)
+$ MSYM_CLEAN:
+$ if (idel .le. msym_max)
+$ then
+$   delete/sym mdef_'idel'
+$   idel = idel + 1
+$   goto msym_clean
+$ endif
+$ return
+$!------------------------------------------------------------------------------
+$!
+$! Write configuration to both permanent and temporary config file
+$!
+$! Version history
+$! 0.01 20031029 First version to receive a number
+$!
+$WRITE_CONFIG: SUBROUTINE
+$  write aconf 'p1'
+$  open/append confh 'th'
+$  write confh 'p1'
+$  close confh
+$ENDSUBROUTINE
+$!------------------------------------------------------------------------------
+$!
+$! Analyze the project map file and create the symbol vector for a shareable
+$! image from it
+$!
+$! Version history
+$! 0.01 20120128 First version
+$! 0.02 20120226 Add pre-load logic
+$!
+$ MAP_2_SHOPT: Subroutine
+$!
+$ SAY := "WRITE_ SYS$OUTPUT"
+$!
+$ IF F$SEARCH("''P1'") .EQS. ""
+$ THEN
+$    SAY "MAP_2_SHOPT-E-NOSUCHFILE:  Error, inputfile ''p1' not available"
+$    goto exit_m2s
+$ ENDIF
+$ IF "''P2'" .EQS. ""
+$ THEN
+$    SAY "MAP_2_SHOPT:  Error, no output file provided"
+$    goto exit_m2s
+$ ENDIF
+$!
+$ module1 = "deflate#deflateEnd#deflateInit_#deflateParams#deflateSetDictionary"
+$ module2 = "gzclose#gzerror#gzgetc#gzgets#gzopen#gzprintf#gzputc#gzputs#gzread"
+$ module3 = "gzseek#gztell#inflate#inflateEnd#inflateInit_#inflateSetDictionary"
+$ module4 = "inflateSync#uncompress#zlibVersion#compress"
+$ open/read map 'p1
+$ if axp .or. ia64
+$ then
+$     open/write aopt a.opt
+$     open/write bopt b.opt
+$     write aopt " CASE_SENSITIVE=YES"
+$     write bopt "SYMBOL_VECTOR= (-"
+$     mod_sym_num = 1
+$ MOD_SYM_LOOP:
+$     if f$type(module'mod_sym_num') .nes. ""
+$     then
+$         mod_in = 0
+$ MOD_SYM_IN:
+$         shared_proc = f$element(mod_in, "#", module'mod_sym_num')
+$         if shared_proc .nes. "#"
+$         then
+$             write aopt f$fao(" symbol_vector=(!AS/!AS=PROCEDURE)",-
+        		       f$edit(shared_proc,"upcase"),shared_proc)
+$             write bopt f$fao("!AS=PROCEDURE,-",shared_proc)
+$             mod_in = mod_in + 1
+$             goto mod_sym_in
+$         endif
+$         mod_sym_num = mod_sym_num + 1
+$         goto mod_sym_loop
+$     endif
+$MAP_LOOP:
+$     read/end=map_end map line
+$     if (f$locate("{",line).lt. f$length(line)) .or. -
+         (f$locate("global:", line) .lt. f$length(line))
+$     then
+$         proc = true
+$         goto map_loop
+$     endif
+$     if f$locate("}",line).lt. f$length(line) then proc = false
+$     if f$locate("local:", line) .lt. f$length(line) then proc = false
+$     if proc
+$     then
+$         shared_proc = f$edit(line,"collapse")
+$         chop_semi = f$locate(";", shared_proc)
+$         if chop_semi .lt. f$length(shared_proc) then -
+              shared_proc = f$extract(0, chop_semi, shared_proc)
+$         write aopt f$fao(" symbol_vector=(!AS/!AS=PROCEDURE)",-
+        			 f$edit(shared_proc,"upcase"),shared_proc)
+$         write bopt f$fao("!AS=PROCEDURE,-",shared_proc)
+$     endif
+$     goto map_loop
+$MAP_END:
+$     close/nolog aopt
+$     close/nolog bopt
+$     open/append libopt 'p2'
+$     open/read aopt a.opt
+$     open/read bopt b.opt
+$ALOOP:
+$     read/end=aloop_end aopt line
+$     write libopt line
+$     goto aloop
+$ALOOP_END:
+$     close/nolog aopt
+$     sv = ""
+$BLOOP:
+$     read/end=bloop_end bopt svn
+$     if (svn.nes."")
+$     then
+$        if (sv.nes."") then write libopt sv
+$        sv = svn
+$     endif
+$     goto bloop
+$BLOOP_END:
+$     write libopt f$extract(0,f$length(sv)-2,sv), "-"
+$     write libopt ")"
+$     close/nolog bopt
+$     delete/nolog/noconf a.opt;*,b.opt;*
+$ else
+$     if vax
+$     then
+$     open/append libopt 'p2'
+$     mod_sym_num = 1
+$ VMOD_SYM_LOOP:
+$     if f$type(module'mod_sym_num') .nes. ""
+$     then
+$         mod_in = 0
+$ VMOD_SYM_IN:
+$         shared_proc = f$element(mod_in, "#", module'mod_sym_num')
+$         if shared_proc .nes. "#"
+$         then
+$     	      write libopt f$fao("UNIVERSAL=!AS",-
+      	  			     f$edit(shared_proc,"upcase"))
+$             mod_in = mod_in + 1
+$             goto vmod_sym_in
+$         endif
+$         mod_sym_num = mod_sym_num + 1
+$         goto vmod_sym_loop
+$     endif
+$VMAP_LOOP:
+$     	  read/end=vmap_end map line
+$     	  if (f$locate("{",line).lt. f$length(line)) .or. -
+   	      (f$locate("global:", line) .lt. f$length(line))
+$     	  then
+$     	      proc = true
+$     	      goto vmap_loop
+$     	  endif
+$     	  if f$locate("}",line).lt. f$length(line) then proc = false
+$     	  if f$locate("local:", line) .lt. f$length(line) then proc = false
+$     	  if proc
+$     	  then
+$     	      shared_proc = f$edit(line,"collapse")
+$     	      chop_semi = f$locate(";", shared_proc)
+$     	      if chop_semi .lt. f$length(shared_proc) then -
+      	  	  shared_proc = f$extract(0, chop_semi, shared_proc)
+$     	      write libopt f$fao("UNIVERSAL=!AS",-
+      	  			     f$edit(shared_proc,"upcase"))
+$     	  endif
+$     	  goto vmap_loop
+$VMAP_END:
+$     else
+$         write sys$output "Unknown Architecture (Not VAX, AXP, or IA64)"
+$         write sys$output "No options file created"
+$     endif
+$ endif
+$ EXIT_M2S:
+$ close/nolog map
+$ close/nolog libopt
+$ endsubroutine
diff --git a/deps/libchdr/deps/zlib-1.2.11/treebuild.xml b/deps/libchdr/deps/zlib-1.2.11/treebuild.xml
new file mode 100644
index 00000000..fd75525f
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/treebuild.xml
@@ -0,0 +1,116 @@
+<?xml version="1.0" ?>
+<package name="zlib" version="1.2.11">
+    <library name="zlib" dlversion="1.2.11" dlname="z">
+	<property name="description"> zip compression library </property>
+	<property name="include-target-dir" value="$(@PACKAGE/install-includedir)" />
+
+	<!-- fixme: not implemented yet -->
+	<property name="compiler/c/inline" value="yes" />
+
+	<include-file name="zlib.h" scope="public" mode="644" />
+	<include-file name="zconf.h" scope="public" mode="644" />
+
+	<source name="adler32.c">
+	    <depend name="zlib.h" />
+	    <depend name="zconf.h" />
+	</source>
+	<source name="compress.c">
+	    <depend name="zlib.h" />
+	    <depend name="zconf.h" />
+	</source>
+	<source name="crc32.c">
+	    <depend name="zlib.h" />
+	    <depend name="zconf.h" />
+	    <depend name="crc32.h" />
+	</source>
+	<source name="gzclose.c">
+	    <depend name="zlib.h" />
+	    <depend name="zconf.h" />
+	    <depend name="gzguts.h" />
+	</source>
+	<source name="gzlib.c">
+	    <depend name="zlib.h" />
+	    <depend name="zconf.h" />
+	    <depend name="gzguts.h" />
+	</source>
+	<source name="gzread.c">
+	    <depend name="zlib.h" />
+	    <depend name="zconf.h" />
+	    <depend name="gzguts.h" />
+	</source>
+	<source name="gzwrite.c">
+	    <depend name="zlib.h" />
+	    <depend name="zconf.h" />
+	    <depend name="gzguts.h" />
+	</source>
+	<source name="uncompr.c">
+	    <depend name="zlib.h" />
+	    <depend name="zconf.h" />
+	</source>
+	<source name="deflate.c">
+	    <depend name="zlib.h" />
+	    <depend name="zconf.h" />
+	    <depend name="zutil.h" />
+	    <depend name="deflate.h" />
+	</source>
+	<source name="trees.c">
+	    <depend name="zlib.h" />
+	    <depend name="zconf.h" />
+	    <depend name="zutil.h" />
+	    <depend name="deflate.h" />
+	    <depend name="trees.h" />
+	</source>
+	<source name="zutil.c">
+	    <depend name="zlib.h" />
+	    <depend name="zconf.h" />
+	    <depend name="zutil.h" />
+	</source>
+	<source name="inflate.c">
+	    <depend name="zlib.h" />
+	    <depend name="zconf.h" />
+	    <depend name="zutil.h" />
+	    <depend name="inftrees.h" />
+	    <depend name="inflate.h" />
+	    <depend name="inffast.h" />
+	</source>
+	<source name="infback.c">
+	    <depend name="zlib.h" />
+	    <depend name="zconf.h" />
+	    <depend name="zutil.h" />
+	    <depend name="inftrees.h" />
+	    <depend name="inflate.h" />
+	    <depend name="inffast.h" />
+	</source>
+	<source name="inftrees.c">
+	    <depend name="zlib.h" />
+	    <depend name="zconf.h" />
+	    <depend name="zutil.h" />
+	    <depend name="inftrees.h" />
+	</source>
+	<source name="inffast.c">
+	    <depend name="zlib.h" />
+	    <depend name="zconf.h" />
+	    <depend name="zutil.h" />
+	    <depend name="inftrees.h" />
+	    <depend name="inflate.h" />
+	    <depend name="inffast.h" />
+	</source>
+    </library>
+</package>
+
+<!--
+CFLAGS=-O
+#CFLAGS=-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7
+#CFLAGS=-g -DZLIB_DEBUG
+#CFLAGS=-O3 -Wall -Wwrite-strings -Wpointer-arith -Wconversion \
+#           -Wstrict-prototypes -Wmissing-prototypes
+
+# OBJA =
+# to use the asm code: make OBJA=match.o
+#
+match.o: match.S
+	$(CPP) match.S > _match.s
+	$(CC) -c _match.s
+	mv _match.o match.o
+	rm -f _match.s
+-->
diff --git a/deps/libchdr/deps/zlib-1.2.11/trees.c b/deps/libchdr/deps/zlib-1.2.11/trees.c
new file mode 100644
index 00000000..50cf4b45
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/trees.c
@@ -0,0 +1,1203 @@
+/* trees.c -- output deflated data using Huffman coding
+ * Copyright (C) 1995-2017 Jean-loup Gailly
+ * detect_data_type() function provided freely by Cosmin Truta, 2006
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/*
+ *  ALGORITHM
+ *
+ *      The "deflation" process uses several Huffman trees. The more
+ *      common source values are represented by shorter bit sequences.
+ *
+ *      Each code tree is stored in a compressed form which is itself
+ * a Huffman encoding of the lengths of all the code strings (in
+ * ascending order by source values).  The actual code strings are
+ * reconstructed from the lengths in the inflate process, as described
+ * in the deflate specification.
+ *
+ *  REFERENCES
+ *
+ *      Deutsch, L.P.,"'Deflate' Compressed Data Format Specification".
+ *      Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc
+ *
+ *      Storer, James A.
+ *          Data Compression:  Methods and Theory, pp. 49-50.
+ *          Computer Science Press, 1988.  ISBN 0-7167-8156-5.
+ *
+ *      Sedgewick, R.
+ *          Algorithms, p290.
+ *          Addison-Wesley, 1983. ISBN 0-201-06672-6.
+ */
+
+/* @(#) $Id$ */
+
+/* #define GEN_TREES_H */
+
+#include "deflate.h"
+
+#ifdef ZLIB_DEBUG
+#  include <ctype.h>
+#endif
+
+/* ===========================================================================
+ * Constants
+ */
+
+#define MAX_BL_BITS 7
+/* Bit length codes must not exceed MAX_BL_BITS bits */
+
+#define END_BLOCK 256
+/* end of block literal code */
+
+#define REP_3_6      16
+/* repeat previous bit length 3-6 times (2 bits of repeat count) */
+
+#define REPZ_3_10    17
+/* repeat a zero length 3-10 times  (3 bits of repeat count) */
+
+#define REPZ_11_138  18
+/* repeat a zero length 11-138 times  (7 bits of repeat count) */
+
+local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */
+   = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0};
+
+local const int extra_dbits[D_CODES] /* extra bits for each distance code */
+   = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
+
+local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */
+   = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7};
+
+local const uch bl_order[BL_CODES]
+   = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};
+/* The lengths of the bit length codes are sent in order of decreasing
+ * probability, to avoid transmitting the lengths for unused bit length codes.
+ */
+
+/* ===========================================================================
+ * Local data. These are initialized only once.
+ */
+
+#define DIST_CODE_LEN  512 /* see definition of array dist_code below */
+
+#if defined(GEN_TREES_H) || !defined(STDC)
+/* non ANSI compilers may not accept trees.h */
+
+local ct_data static_ltree[L_CODES+2];
+/* The static literal tree. Since the bit lengths are imposed, there is no
+ * need for the L_CODES extra codes used during heap construction. However
+ * The codes 286 and 287 are needed to build a canonical tree (see _tr_init
+ * below).
+ */
+
+local ct_data static_dtree[D_CODES];
+/* The static distance tree. (Actually a trivial tree since all codes use
+ * 5 bits.)
+ */
+
+uch _dist_code[DIST_CODE_LEN];
+/* Distance codes. The first 256 values correspond to the distances
+ * 3 .. 258, the last 256 values correspond to the top 8 bits of
+ * the 15 bit distances.
+ */
+
+uch _length_code[MAX_MATCH-MIN_MATCH+1];
+/* length code for each normalized match length (0 == MIN_MATCH) */
+
+local int base_length[LENGTH_CODES];
+/* First normalized length for each code (0 = MIN_MATCH) */
+
+local int base_dist[D_CODES];
+/* First normalized distance for each code (0 = distance of 1) */
+
+#else
+#  include "trees.h"
+#endif /* GEN_TREES_H */
+
+struct static_tree_desc_s {
+    const ct_data *static_tree;  /* static tree or NULL */
+    const intf *extra_bits;      /* extra bits for each code or NULL */
+    int     extra_base;          /* base index for extra_bits */
+    int     elems;               /* max number of elements in the tree */
+    int     max_length;          /* max bit length for the codes */
+};
+
+local const static_tree_desc  static_l_desc =
+{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS};
+
+local const static_tree_desc  static_d_desc =
+{static_dtree, extra_dbits, 0,          D_CODES, MAX_BITS};
+
+local const static_tree_desc  static_bl_desc =
+{(const ct_data *)0, extra_blbits, 0,   BL_CODES, MAX_BL_BITS};
+
+/* ===========================================================================
+ * Local (static) routines in this file.
+ */
+
+local void tr_static_init OF((void));
+local void init_block     OF((deflate_state *s));
+local void pqdownheap     OF((deflate_state *s, ct_data *tree, int k));
+local void gen_bitlen     OF((deflate_state *s, tree_desc *desc));
+local void gen_codes      OF((ct_data *tree, int max_code, ushf *bl_count));
+local void build_tree     OF((deflate_state *s, tree_desc *desc));
+local void scan_tree      OF((deflate_state *s, ct_data *tree, int max_code));
+local void send_tree      OF((deflate_state *s, ct_data *tree, int max_code));
+local int  build_bl_tree  OF((deflate_state *s));
+local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes,
+                              int blcodes));
+local void compress_block OF((deflate_state *s, const ct_data *ltree,
+                              const ct_data *dtree));
+local int  detect_data_type OF((deflate_state *s));
+local unsigned bi_reverse OF((unsigned value, int length));
+local void bi_windup      OF((deflate_state *s));
+local void bi_flush       OF((deflate_state *s));
+
+#ifdef GEN_TREES_H
+local void gen_trees_header OF((void));
+#endif
+
+#ifndef ZLIB_DEBUG
+#  define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len)
+   /* Send a code of the given tree. c and tree must not have side effects */
+
+#else /* !ZLIB_DEBUG */
+#  define send_code(s, c, tree) \
+     { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \
+       send_bits(s, tree[c].Code, tree[c].Len); }
+#endif
+
+/* ===========================================================================
+ * Output a short LSB first on the stream.
+ * IN assertion: there is enough room in pendingBuf.
+ */
+#define put_short(s, w) { \
+    put_byte(s, (uch)((w) & 0xff)); \
+    put_byte(s, (uch)((ush)(w) >> 8)); \
+}
+
+/* ===========================================================================
+ * Send a value on a given number of bits.
+ * IN assertion: length <= 16 and value fits in length bits.
+ */
+#ifdef ZLIB_DEBUG
+local void send_bits      OF((deflate_state *s, int value, int length));
+
+local void send_bits(s, value, length)
+    deflate_state *s;
+    int value;  /* value to send */
+    int length; /* number of bits */
+{
+    Tracevv((stderr," l %2d v %4x ", length, value));
+    Assert(length > 0 && length <= 15, "invalid length");
+    s->bits_sent += (ulg)length;
+
+    /* If not enough room in bi_buf, use (valid) bits from bi_buf and
+     * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid))
+     * unused bits in value.
+     */
+    if (s->bi_valid > (int)Buf_size - length) {
+        s->bi_buf |= (ush)value << s->bi_valid;
+        put_short(s, s->bi_buf);
+        s->bi_buf = (ush)value >> (Buf_size - s->bi_valid);
+        s->bi_valid += length - Buf_size;
+    } else {
+        s->bi_buf |= (ush)value << s->bi_valid;
+        s->bi_valid += length;
+    }
+}
+#else /* !ZLIB_DEBUG */
+
+#define send_bits(s, value, length) \
+{ int len = length;\
+  if (s->bi_valid > (int)Buf_size - len) {\
+    int val = (int)value;\
+    s->bi_buf |= (ush)val << s->bi_valid;\
+    put_short(s, s->bi_buf);\
+    s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\
+    s->bi_valid += len - Buf_size;\
+  } else {\
+    s->bi_buf |= (ush)(value) << s->bi_valid;\
+    s->bi_valid += len;\
+  }\
+}
+#endif /* ZLIB_DEBUG */
+
+
+/* the arguments must not have side effects */
+
+/* ===========================================================================
+ * Initialize the various 'constant' tables.
+ */
+local void tr_static_init()
+{
+#if defined(GEN_TREES_H) || !defined(STDC)
+    static int static_init_done = 0;
+    int n;        /* iterates over tree elements */
+    int bits;     /* bit counter */
+    int length;   /* length value */
+    int code;     /* code value */
+    int dist;     /* distance index */
+    ush bl_count[MAX_BITS+1];
+    /* number of codes at each bit length for an optimal tree */
+
+    if (static_init_done) return;
+
+    /* For some embedded targets, global variables are not initialized: */
+#ifdef NO_INIT_GLOBAL_POINTERS
+    static_l_desc.static_tree = static_ltree;
+    static_l_desc.extra_bits = extra_lbits;
+    static_d_desc.static_tree = static_dtree;
+    static_d_desc.extra_bits = extra_dbits;
+    static_bl_desc.extra_bits = extra_blbits;
+#endif
+
+    /* Initialize the mapping length (0..255) -> length code (0..28) */
+    length = 0;
+    for (code = 0; code < LENGTH_CODES-1; code++) {
+        base_length[code] = length;
+        for (n = 0; n < (1<<extra_lbits[code]); n++) {
+            _length_code[length++] = (uch)code;
+        }
+    }
+    Assert (length == 256, "tr_static_init: length != 256");
+    /* Note that the length 255 (match length 258) can be represented
+     * in two different ways: code 284 + 5 bits or code 285, so we
+     * overwrite length_code[255] to use the best encoding:
+     */
+    _length_code[length-1] = (uch)code;
+
+    /* Initialize the mapping dist (0..32K) -> dist code (0..29) */
+    dist = 0;
+    for (code = 0 ; code < 16; code++) {
+        base_dist[code] = dist;
+        for (n = 0; n < (1<<extra_dbits[code]); n++) {
+            _dist_code[dist++] = (uch)code;
+        }
+    }
+    Assert (dist == 256, "tr_static_init: dist != 256");
+    dist >>= 7; /* from now on, all distances are divided by 128 */
+    for ( ; code < D_CODES; code++) {
+        base_dist[code] = dist << 7;
+        for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) {
+            _dist_code[256 + dist++] = (uch)code;
+        }
+    }
+    Assert (dist == 256, "tr_static_init: 256+dist != 512");
+
+    /* Construct the codes of the static literal tree */
+    for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0;
+    n = 0;
+    while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++;
+    while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++;
+    while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++;
+    while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++;
+    /* Codes 286 and 287 do not exist, but we must include them in the
+     * tree construction to get a canonical Huffman tree (longest code
+     * all ones)
+     */
+    gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count);
+
+    /* The static distance tree is trivial: */
+    for (n = 0; n < D_CODES; n++) {
+        static_dtree[n].Len = 5;
+        static_dtree[n].Code = bi_reverse((unsigned)n, 5);
+    }
+    static_init_done = 1;
+
+#  ifdef GEN_TREES_H
+    gen_trees_header();
+#  endif
+#endif /* defined(GEN_TREES_H) || !defined(STDC) */
+}
+
+/* ===========================================================================
+ * Genererate the file trees.h describing the static trees.
+ */
+#ifdef GEN_TREES_H
+#  ifndef ZLIB_DEBUG
+#    include <stdio.h>
+#  endif
+
+#  define SEPARATOR(i, last, width) \
+      ((i) == (last)? "\n};\n\n" :    \
+       ((i) % (width) == (width)-1 ? ",\n" : ", "))
+
+void gen_trees_header()
+{
+    FILE *header = fopen("trees.h", "w");
+    int i;
+
+    Assert (header != NULL, "Can't open trees.h");
+    fprintf(header,
+            "/* header created automatically with -DGEN_TREES_H */\n\n");
+
+    fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n");
+    for (i = 0; i < L_CODES+2; i++) {
+        fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code,
+                static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5));
+    }
+
+    fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n");
+    for (i = 0; i < D_CODES; i++) {
+        fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code,
+                static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5));
+    }
+
+    fprintf(header, "const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = {\n");
+    for (i = 0; i < DIST_CODE_LEN; i++) {
+        fprintf(header, "%2u%s", _dist_code[i],
+                SEPARATOR(i, DIST_CODE_LEN-1, 20));
+    }
+
+    fprintf(header,
+        "const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= {\n");
+    for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) {
+        fprintf(header, "%2u%s", _length_code[i],
+                SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20));
+    }
+
+    fprintf(header, "local const int base_length[LENGTH_CODES] = {\n");
+    for (i = 0; i < LENGTH_CODES; i++) {
+        fprintf(header, "%1u%s", base_length[i],
+                SEPARATOR(i, LENGTH_CODES-1, 20));
+    }
+
+    fprintf(header, "local const int base_dist[D_CODES] = {\n");
+    for (i = 0; i < D_CODES; i++) {
+        fprintf(header, "%5u%s", base_dist[i],
+                SEPARATOR(i, D_CODES-1, 10));
+    }
+
+    fclose(header);
+}
+#endif /* GEN_TREES_H */
+
+/* ===========================================================================
+ * Initialize the tree data structures for a new zlib stream.
+ */
+void ZLIB_INTERNAL _tr_init(s)
+    deflate_state *s;
+{
+    tr_static_init();
+
+    s->l_desc.dyn_tree = s->dyn_ltree;
+    s->l_desc.stat_desc = &static_l_desc;
+
+    s->d_desc.dyn_tree = s->dyn_dtree;
+    s->d_desc.stat_desc = &static_d_desc;
+
+    s->bl_desc.dyn_tree = s->bl_tree;
+    s->bl_desc.stat_desc = &static_bl_desc;
+
+    s->bi_buf = 0;
+    s->bi_valid = 0;
+#ifdef ZLIB_DEBUG
+    s->compressed_len = 0L;
+    s->bits_sent = 0L;
+#endif
+
+    /* Initialize the first block of the first file: */
+    init_block(s);
+}
+
+/* ===========================================================================
+ * Initialize a new block.
+ */
+local void init_block(s)
+    deflate_state *s;
+{
+    int n; /* iterates over tree elements */
+
+    /* Initialize the trees. */
+    for (n = 0; n < L_CODES;  n++) s->dyn_ltree[n].Freq = 0;
+    for (n = 0; n < D_CODES;  n++) s->dyn_dtree[n].Freq = 0;
+    for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0;
+
+    s->dyn_ltree[END_BLOCK].Freq = 1;
+    s->opt_len = s->static_len = 0L;
+    s->last_lit = s->matches = 0;
+}
+
+#define SMALLEST 1
+/* Index within the heap array of least frequent node in the Huffman tree */
+
+
+/* ===========================================================================
+ * Remove the smallest element from the heap and recreate the heap with
+ * one less element. Updates heap and heap_len.
+ */
+#define pqremove(s, tree, top) \
+{\
+    top = s->heap[SMALLEST]; \
+    s->heap[SMALLEST] = s->heap[s->heap_len--]; \
+    pqdownheap(s, tree, SMALLEST); \
+}
+
+/* ===========================================================================
+ * Compares to subtrees, using the tree depth as tie breaker when
+ * the subtrees have equal frequency. This minimizes the worst case length.
+ */
+#define smaller(tree, n, m, depth) \
+   (tree[n].Freq < tree[m].Freq || \
+   (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m]))
+
+/* ===========================================================================
+ * Restore the heap property by moving down the tree starting at node k,
+ * exchanging a node with the smallest of its two sons if necessary, stopping
+ * when the heap property is re-established (each father smaller than its
+ * two sons).
+ */
+local void pqdownheap(s, tree, k)
+    deflate_state *s;
+    ct_data *tree;  /* the tree to restore */
+    int k;               /* node to move down */
+{
+    int v = s->heap[k];
+    int j = k << 1;  /* left son of k */
+    while (j <= s->heap_len) {
+        /* Set j to the smallest of the two sons: */
+        if (j < s->heap_len &&
+            smaller(tree, s->heap[j+1], s->heap[j], s->depth)) {
+            j++;
+        }
+        /* Exit if v is smaller than both sons */
+        if (smaller(tree, v, s->heap[j], s->depth)) break;
+
+        /* Exchange v with the smallest son */
+        s->heap[k] = s->heap[j];  k = j;
+
+        /* And continue down the tree, setting j to the left son of k */
+        j <<= 1;
+    }
+    s->heap[k] = v;
+}
+
+/* ===========================================================================
+ * Compute the optimal bit lengths for a tree and update the total bit length
+ * for the current block.
+ * IN assertion: the fields freq and dad are set, heap[heap_max] and
+ *    above are the tree nodes sorted by increasing frequency.
+ * OUT assertions: the field len is set to the optimal bit length, the
+ *     array bl_count contains the frequencies for each bit length.
+ *     The length opt_len is updated; static_len is also updated if stree is
+ *     not null.
+ */
+local void gen_bitlen(s, desc)
+    deflate_state *s;
+    tree_desc *desc;    /* the tree descriptor */
+{
+    ct_data *tree        = desc->dyn_tree;
+    int max_code         = desc->max_code;
+    const ct_data *stree = desc->stat_desc->static_tree;
+    const intf *extra    = desc->stat_desc->extra_bits;
+    int base             = desc->stat_desc->extra_base;
+    int max_length       = desc->stat_desc->max_length;
+    int h;              /* heap index */
+    int n, m;           /* iterate over the tree elements */
+    int bits;           /* bit length */
+    int xbits;          /* extra bits */
+    ush f;              /* frequency */
+    int overflow = 0;   /* number of elements with bit length too large */
+
+    for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0;
+
+    /* In a first pass, compute the optimal bit lengths (which may
+     * overflow in the case of the bit length tree).
+     */
+    tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */
+
+    for (h = s->heap_max+1; h < HEAP_SIZE; h++) {
+        n = s->heap[h];
+        bits = tree[tree[n].Dad].Len + 1;
+        if (bits > max_length) bits = max_length, overflow++;
+        tree[n].Len = (ush)bits;
+        /* We overwrite tree[n].Dad which is no longer needed */
+
+        if (n > max_code) continue; /* not a leaf node */
+
+        s->bl_count[bits]++;
+        xbits = 0;
+        if (n >= base) xbits = extra[n-base];
+        f = tree[n].Freq;
+        s->opt_len += (ulg)f * (unsigned)(bits + xbits);
+        if (stree) s->static_len += (ulg)f * (unsigned)(stree[n].Len + xbits);
+    }
+    if (overflow == 0) return;
+
+    Tracev((stderr,"\nbit length overflow\n"));
+    /* This happens for example on obj2 and pic of the Calgary corpus */
+
+    /* Find the first bit length which could increase: */
+    do {
+        bits = max_length-1;
+        while (s->bl_count[bits] == 0) bits--;
+        s->bl_count[bits]--;      /* move one leaf down the tree */
+        s->bl_count[bits+1] += 2; /* move one overflow item as its brother */
+        s->bl_count[max_length]--;
+        /* The brother of the overflow item also moves one step up,
+         * but this does not affect bl_count[max_length]
+         */
+        overflow -= 2;
+    } while (overflow > 0);
+
+    /* Now recompute all bit lengths, scanning in increasing frequency.
+     * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all
+     * lengths instead of fixing only the wrong ones. This idea is taken
+     * from 'ar' written by Haruhiko Okumura.)
+     */
+    for (bits = max_length; bits != 0; bits--) {
+        n = s->bl_count[bits];
+        while (n != 0) {
+            m = s->heap[--h];
+            if (m > max_code) continue;
+            if ((unsigned) tree[m].Len != (unsigned) bits) {
+                Tracev((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits));
+                s->opt_len += ((ulg)bits - tree[m].Len) * tree[m].Freq;
+                tree[m].Len = (ush)bits;
+            }
+            n--;
+        }
+    }
+}
+
+/* ===========================================================================
+ * Generate the codes for a given tree and bit counts (which need not be
+ * optimal).
+ * IN assertion: the array bl_count contains the bit length statistics for
+ * the given tree and the field len is set for all tree elements.
+ * OUT assertion: the field code is set for all tree elements of non
+ *     zero code length.
+ */
+local void gen_codes (tree, max_code, bl_count)
+    ct_data *tree;             /* the tree to decorate */
+    int max_code;              /* largest code with non zero frequency */
+    ushf *bl_count;            /* number of codes at each bit length */
+{
+    ush next_code[MAX_BITS+1]; /* next code value for each bit length */
+    unsigned code = 0;         /* running code value */
+    int bits;                  /* bit index */
+    int n;                     /* code index */
+
+    /* The distribution counts are first used to generate the code values
+     * without bit reversal.
+     */
+    for (bits = 1; bits <= MAX_BITS; bits++) {
+        code = (code + bl_count[bits-1]) << 1;
+        next_code[bits] = (ush)code;
+    }
+    /* Check that the bit counts in bl_count are consistent. The last code
+     * must be all ones.
+     */
+    Assert (code + bl_count[MAX_BITS]-1 == (1<<MAX_BITS)-1,
+            "inconsistent bit counts");
+    Tracev((stderr,"\ngen_codes: max_code %d ", max_code));
+
+    for (n = 0;  n <= max_code; n++) {
+        int len = tree[n].Len;
+        if (len == 0) continue;
+        /* Now reverse the bits */
+        tree[n].Code = (ush)bi_reverse(next_code[len]++, len);
+
+        Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ",
+             n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len]-1));
+    }
+}
+
+/* ===========================================================================
+ * Construct one Huffman tree and assigns the code bit strings and lengths.
+ * Update the total bit length for the current block.
+ * IN assertion: the field freq is set for all tree elements.
+ * OUT assertions: the fields len and code are set to the optimal bit length
+ *     and corresponding code. The length opt_len is updated; static_len is
+ *     also updated if stree is not null. The field max_code is set.
+ */
+local void build_tree(s, desc)
+    deflate_state *s;
+    tree_desc *desc; /* the tree descriptor */
+{
+    ct_data *tree         = desc->dyn_tree;
+    const ct_data *stree  = desc->stat_desc->static_tree;
+    int elems             = desc->stat_desc->elems;
+    int n, m;          /* iterate over heap elements */
+    int max_code = -1; /* largest code with non zero frequency */
+    int node;          /* new node being created */
+
+    /* Construct the initial heap, with least frequent element in
+     * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1].
+     * heap[0] is not used.
+     */
+    s->heap_len = 0, s->heap_max = HEAP_SIZE;
+
+    for (n = 0; n < elems; n++) {
+        if (tree[n].Freq != 0) {
+            s->heap[++(s->heap_len)] = max_code = n;
+            s->depth[n] = 0;
+        } else {
+            tree[n].Len = 0;
+        }
+    }
+
+    /* The pkzip format requires that at least one distance code exists,
+     * and that at least one bit should be sent even if there is only one
+     * possible code. So to avoid special checks later on we force at least
+     * two codes of non zero frequency.
+     */
+    while (s->heap_len < 2) {
+        node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0);
+        tree[node].Freq = 1;
+        s->depth[node] = 0;
+        s->opt_len--; if (stree) s->static_len -= stree[node].Len;
+        /* node is 0 or 1 so it does not have extra bits */
+    }
+    desc->max_code = max_code;
+
+    /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,
+     * establish sub-heaps of increasing lengths:
+     */
+    for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n);
+
+    /* Construct the Huffman tree by repeatedly combining the least two
+     * frequent nodes.
+     */
+    node = elems;              /* next internal node of the tree */
+    do {
+        pqremove(s, tree, n);  /* n = node of least frequency */
+        m = s->heap[SMALLEST]; /* m = node of next least frequency */
+
+        s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */
+        s->heap[--(s->heap_max)] = m;
+
+        /* Create a new node father of n and m */
+        tree[node].Freq = tree[n].Freq + tree[m].Freq;
+        s->depth[node] = (uch)((s->depth[n] >= s->depth[m] ?
+                                s->depth[n] : s->depth[m]) + 1);
+        tree[n].Dad = tree[m].Dad = (ush)node;
+#ifdef DUMP_BL_TREE
+        if (tree == s->bl_tree) {
+            fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)",
+                    node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq);
+        }
+#endif
+        /* and insert the new node in the heap */
+        s->heap[SMALLEST] = node++;
+        pqdownheap(s, tree, SMALLEST);
+
+    } while (s->heap_len >= 2);
+
+    s->heap[--(s->heap_max)] = s->heap[SMALLEST];
+
+    /* At this point, the fields freq and dad are set. We can now
+     * generate the bit lengths.
+     */
+    gen_bitlen(s, (tree_desc *)desc);
+
+    /* The field len is now set, we can generate the bit codes */
+    gen_codes ((ct_data *)tree, max_code, s->bl_count);
+}
+
+/* ===========================================================================
+ * Scan a literal or distance tree to determine the frequencies of the codes
+ * in the bit length tree.
+ */
+local void scan_tree (s, tree, max_code)
+    deflate_state *s;
+    ct_data *tree;   /* the tree to be scanned */
+    int max_code;    /* and its largest code of non zero frequency */
+{
+    int n;                     /* iterates over all tree elements */
+    int prevlen = -1;          /* last emitted length */
+    int curlen;                /* length of current code */
+    int nextlen = tree[0].Len; /* length of next code */
+    int count = 0;             /* repeat count of the current code */
+    int max_count = 7;         /* max repeat count */
+    int min_count = 4;         /* min repeat count */
+
+    if (nextlen == 0) max_count = 138, min_count = 3;
+    tree[max_code+1].Len = (ush)0xffff; /* guard */
+
+    for (n = 0; n <= max_code; n++) {
+        curlen = nextlen; nextlen = tree[n+1].Len;
+        if (++count < max_count && curlen == nextlen) {
+            continue;
+        } else if (count < min_count) {
+            s->bl_tree[curlen].Freq += count;
+        } else if (curlen != 0) {
+            if (curlen != prevlen) s->bl_tree[curlen].Freq++;
+            s->bl_tree[REP_3_6].Freq++;
+        } else if (count <= 10) {
+            s->bl_tree[REPZ_3_10].Freq++;
+        } else {
+            s->bl_tree[REPZ_11_138].Freq++;
+        }
+        count = 0; prevlen = curlen;
+        if (nextlen == 0) {
+            max_count = 138, min_count = 3;
+        } else if (curlen == nextlen) {
+            max_count = 6, min_count = 3;
+        } else {
+            max_count = 7, min_count = 4;
+        }
+    }
+}
+
+/* ===========================================================================
+ * Send a literal or distance tree in compressed form, using the codes in
+ * bl_tree.
+ */
+local void send_tree (s, tree, max_code)
+    deflate_state *s;
+    ct_data *tree; /* the tree to be scanned */
+    int max_code;       /* and its largest code of non zero frequency */
+{
+    int n;                     /* iterates over all tree elements */
+    int prevlen = -1;          /* last emitted length */
+    int curlen;                /* length of current code */
+    int nextlen = tree[0].Len; /* length of next code */
+    int count = 0;             /* repeat count of the current code */
+    int max_count = 7;         /* max repeat count */
+    int min_count = 4;         /* min repeat count */
+
+    /* tree[max_code+1].Len = -1; */  /* guard already set */
+    if (nextlen == 0) max_count = 138, min_count = 3;
+
+    for (n = 0; n <= max_code; n++) {
+        curlen = nextlen; nextlen = tree[n+1].Len;
+        if (++count < max_count && curlen == nextlen) {
+            continue;
+        } else if (count < min_count) {
+            do { send_code(s, curlen, s->bl_tree); } while (--count != 0);
+
+        } else if (curlen != 0) {
+            if (curlen != prevlen) {
+                send_code(s, curlen, s->bl_tree); count--;
+            }
+            Assert(count >= 3 && count <= 6, " 3_6?");
+            send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2);
+
+        } else if (count <= 10) {
+            send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3);
+
+        } else {
+            send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7);
+        }
+        count = 0; prevlen = curlen;
+        if (nextlen == 0) {
+            max_count = 138, min_count = 3;
+        } else if (curlen == nextlen) {
+            max_count = 6, min_count = 3;
+        } else {
+            max_count = 7, min_count = 4;
+        }
+    }
+}
+
+/* ===========================================================================
+ * Construct the Huffman tree for the bit lengths and return the index in
+ * bl_order of the last bit length code to send.
+ */
+local int build_bl_tree(s)
+    deflate_state *s;
+{
+    int max_blindex;  /* index of last bit length code of non zero freq */
+
+    /* Determine the bit length frequencies for literal and distance trees */
+    scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code);
+    scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code);
+
+    /* Build the bit length tree: */
+    build_tree(s, (tree_desc *)(&(s->bl_desc)));
+    /* opt_len now includes the length of the tree representations, except
+     * the lengths of the bit lengths codes and the 5+5+4 bits for the counts.
+     */
+
+    /* Determine the number of bit length codes to send. The pkzip format
+     * requires that at least 4 bit length codes be sent. (appnote.txt says
+     * 3 but the actual value used is 4.)
+     */
+    for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) {
+        if (s->bl_tree[bl_order[max_blindex]].Len != 0) break;
+    }
+    /* Update opt_len to include the bit length tree and counts */
+    s->opt_len += 3*((ulg)max_blindex+1) + 5+5+4;
+    Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld",
+            s->opt_len, s->static_len));
+
+    return max_blindex;
+}
+
+/* ===========================================================================
+ * Send the header for a block using dynamic Huffman trees: the counts, the
+ * lengths of the bit length codes, the literal tree and the distance tree.
+ * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4.
+ */
+local void send_all_trees(s, lcodes, dcodes, blcodes)
+    deflate_state *s;
+    int lcodes, dcodes, blcodes; /* number of codes for each tree */
+{
+    int rank;                    /* index in bl_order */
+
+    Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes");
+    Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES,
+            "too many codes");
+    Tracev((stderr, "\nbl counts: "));
+    send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */
+    send_bits(s, dcodes-1,   5);
+    send_bits(s, blcodes-4,  4); /* not -3 as stated in appnote.txt */
+    for (rank = 0; rank < blcodes; rank++) {
+        Tracev((stderr, "\nbl code %2d ", bl_order[rank]));
+        send_bits(s, s->bl_tree[bl_order[rank]].Len, 3);
+    }
+    Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent));
+
+    send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */
+    Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent));
+
+    send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */
+    Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent));
+}
+
+/* ===========================================================================
+ * Send a stored block
+ */
+void ZLIB_INTERNAL _tr_stored_block(s, buf, stored_len, last)
+    deflate_state *s;
+    charf *buf;       /* input block */
+    ulg stored_len;   /* length of input block */
+    int last;         /* one if this is the last block for a file */
+{
+    send_bits(s, (STORED_BLOCK<<1)+last, 3);    /* send block type */
+    bi_windup(s);        /* align on byte boundary */
+    put_short(s, (ush)stored_len);
+    put_short(s, (ush)~stored_len);
+    zmemcpy(s->pending_buf + s->pending, (Bytef *)buf, stored_len);
+    s->pending += stored_len;
+#ifdef ZLIB_DEBUG
+    s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L;
+    s->compressed_len += (stored_len + 4) << 3;
+    s->bits_sent += 2*16;
+    s->bits_sent += stored_len<<3;
+#endif
+}
+
+/* ===========================================================================
+ * Flush the bits in the bit buffer to pending output (leaves at most 7 bits)
+ */
+void ZLIB_INTERNAL _tr_flush_bits(s)
+    deflate_state *s;
+{
+    bi_flush(s);
+}
+
+/* ===========================================================================
+ * Send one empty static block to give enough lookahead for inflate.
+ * This takes 10 bits, of which 7 may remain in the bit buffer.
+ */
+void ZLIB_INTERNAL _tr_align(s)
+    deflate_state *s;
+{
+    send_bits(s, STATIC_TREES<<1, 3);
+    send_code(s, END_BLOCK, static_ltree);
+#ifdef ZLIB_DEBUG
+    s->compressed_len += 10L; /* 3 for block type, 7 for EOB */
+#endif
+    bi_flush(s);
+}
+
+/* ===========================================================================
+ * Determine the best encoding for the current block: dynamic trees, static
+ * trees or store, and write out the encoded block.
+ */
+void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last)
+    deflate_state *s;
+    charf *buf;       /* input block, or NULL if too old */
+    ulg stored_len;   /* length of input block */
+    int last;         /* one if this is the last block for a file */
+{
+    ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
+    int max_blindex = 0;  /* index of last bit length code of non zero freq */
+
+    /* Build the Huffman trees unless a stored block is forced */
+    if (s->level > 0) {
+
+        /* Check if the file is binary or text */
+        if (s->strm->data_type == Z_UNKNOWN)
+            s->strm->data_type = detect_data_type(s);
+
+        /* Construct the literal and distance trees */
+        build_tree(s, (tree_desc *)(&(s->l_desc)));
+        Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len,
+                s->static_len));
+
+        build_tree(s, (tree_desc *)(&(s->d_desc)));
+        Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len,
+                s->static_len));
+        /* At this point, opt_len and static_len are the total bit lengths of
+         * the compressed block data, excluding the tree representations.
+         */
+
+        /* Build the bit length tree for the above two trees, and get the index
+         * in bl_order of the last bit length code to send.
+         */
+        max_blindex = build_bl_tree(s);
+
+        /* Determine the best encoding. Compute the block lengths in bytes. */
+        opt_lenb = (s->opt_len+3+7)>>3;
+        static_lenb = (s->static_len+3+7)>>3;
+
+        Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ",
+                opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len,
+                s->last_lit));
+
+        if (static_lenb <= opt_lenb) opt_lenb = static_lenb;
+
+    } else {
+        Assert(buf != (char*)0, "lost buf");
+        opt_lenb = static_lenb = stored_len + 5; /* force a stored block */
+    }
+
+#ifdef FORCE_STORED
+    if (buf != (char*)0) { /* force stored block */
+#else
+    if (stored_len+4 <= opt_lenb && buf != (char*)0) {
+                       /* 4: two words for the lengths */
+#endif
+        /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
+         * Otherwise we can't have processed more than WSIZE input bytes since
+         * the last block flush, because compression would have been
+         * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
+         * transform a block into a stored block.
+         */
+        _tr_stored_block(s, buf, stored_len, last);
+
+#ifdef FORCE_STATIC
+    } else if (static_lenb >= 0) { /* force static trees */
+#else
+    } else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) {
+#endif
+        send_bits(s, (STATIC_TREES<<1)+last, 3);
+        compress_block(s, (const ct_data *)static_ltree,
+                       (const ct_data *)static_dtree);
+#ifdef ZLIB_DEBUG
+        s->compressed_len += 3 + s->static_len;
+#endif
+    } else {
+        send_bits(s, (DYN_TREES<<1)+last, 3);
+        send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1,
+                       max_blindex+1);
+        compress_block(s, (const ct_data *)s->dyn_ltree,
+                       (const ct_data *)s->dyn_dtree);
+#ifdef ZLIB_DEBUG
+        s->compressed_len += 3 + s->opt_len;
+#endif
+    }
+    Assert (s->compressed_len == s->bits_sent, "bad compressed size");
+    /* The above check is made mod 2^32, for files larger than 512 MB
+     * and uLong implemented on 32 bits.
+     */
+    init_block(s);
+
+    if (last) {
+        bi_windup(s);
+#ifdef ZLIB_DEBUG
+        s->compressed_len += 7;  /* align on byte boundary */
+#endif
+    }
+    Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3,
+           s->compressed_len-7*last));
+}
+
+/* ===========================================================================
+ * Save the match info and tally the frequency counts. Return true if
+ * the current block must be flushed.
+ */
+int ZLIB_INTERNAL _tr_tally (s, dist, lc)
+    deflate_state *s;
+    unsigned dist;  /* distance of matched string */
+    unsigned lc;    /* match length-MIN_MATCH or unmatched char (if dist==0) */
+{
+    s->d_buf[s->last_lit] = (ush)dist;
+    s->l_buf[s->last_lit++] = (uch)lc;
+    if (dist == 0) {
+        /* lc is the unmatched char */
+        s->dyn_ltree[lc].Freq++;
+    } else {
+        s->matches++;
+        /* Here, lc is the match length - MIN_MATCH */
+        dist--;             /* dist = match distance - 1 */
+        Assert((ush)dist < (ush)MAX_DIST(s) &&
+               (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) &&
+               (ush)d_code(dist) < (ush)D_CODES,  "_tr_tally: bad match");
+
+        s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++;
+        s->dyn_dtree[d_code(dist)].Freq++;
+    }
+
+#ifdef TRUNCATE_BLOCK
+    /* Try to guess if it is profitable to stop the current block here */
+    if ((s->last_lit & 0x1fff) == 0 && s->level > 2) {
+        /* Compute an upper bound for the compressed length */
+        ulg out_length = (ulg)s->last_lit*8L;
+        ulg in_length = (ulg)((long)s->strstart - s->block_start);
+        int dcode;
+        for (dcode = 0; dcode < D_CODES; dcode++) {
+            out_length += (ulg)s->dyn_dtree[dcode].Freq *
+                (5L+extra_dbits[dcode]);
+        }
+        out_length >>= 3;
+        Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ",
+               s->last_lit, in_length, out_length,
+               100L - out_length*100L/in_length));
+        if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1;
+    }
+#endif
+    return (s->last_lit == s->lit_bufsize-1);
+    /* We avoid equality with lit_bufsize because of wraparound at 64K
+     * on 16 bit machines and because stored blocks are restricted to
+     * 64K-1 bytes.
+     */
+}
+
+/* ===========================================================================
+ * Send the block data compressed using the given Huffman trees
+ */
+local void compress_block(s, ltree, dtree)
+    deflate_state *s;
+    const ct_data *ltree; /* literal tree */
+    const ct_data *dtree; /* distance tree */
+{
+    unsigned dist;      /* distance of matched string */
+    int lc;             /* match length or unmatched char (if dist == 0) */
+    unsigned lx = 0;    /* running index in l_buf */
+    unsigned code;      /* the code to send */
+    int extra;          /* number of extra bits to send */
+
+    if (s->last_lit != 0) do {
+        dist = s->d_buf[lx];
+        lc = s->l_buf[lx++];
+        if (dist == 0) {
+            send_code(s, lc, ltree); /* send a literal byte */
+            Tracecv(isgraph(lc), (stderr," '%c' ", lc));
+        } else {
+            /* Here, lc is the match length - MIN_MATCH */
+            code = _length_code[lc];
+            send_code(s, code+LITERALS+1, ltree); /* send the length code */
+            extra = extra_lbits[code];
+            if (extra != 0) {
+                lc -= base_length[code];
+                send_bits(s, lc, extra);       /* send the extra length bits */
+            }
+            dist--; /* dist is now the match distance - 1 */
+            code = d_code(dist);
+            Assert (code < D_CODES, "bad d_code");
+
+            send_code(s, code, dtree);       /* send the distance code */
+            extra = extra_dbits[code];
+            if (extra != 0) {
+                dist -= (unsigned)base_dist[code];
+                send_bits(s, dist, extra);   /* send the extra distance bits */
+            }
+        } /* literal or match pair ? */
+
+        /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */
+        Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx,
+               "pendingBuf overflow");
+
+    } while (lx < s->last_lit);
+
+    send_code(s, END_BLOCK, ltree);
+}
+
+/* ===========================================================================
+ * Check if the data type is TEXT or BINARY, using the following algorithm:
+ * - TEXT if the two conditions below are satisfied:
+ *    a) There are no non-portable control characters belonging to the
+ *       "black list" (0..6, 14..25, 28..31).
+ *    b) There is at least one printable character belonging to the
+ *       "white list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255).
+ * - BINARY otherwise.
+ * - The following partially-portable control characters form a
+ *   "gray list" that is ignored in this detection algorithm:
+ *   (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}).
+ * IN assertion: the fields Freq of dyn_ltree are set.
+ */
+local int detect_data_type(s)
+    deflate_state *s;
+{
+    /* black_mask is the bit mask of black-listed bytes
+     * set bits 0..6, 14..25, and 28..31
+     * 0xf3ffc07f = binary 11110011111111111100000001111111
+     */
+    unsigned long black_mask = 0xf3ffc07fUL;
+    int n;
+
+    /* Check for non-textual ("black-listed") bytes. */
+    for (n = 0; n <= 31; n++, black_mask >>= 1)
+        if ((black_mask & 1) && (s->dyn_ltree[n].Freq != 0))
+            return Z_BINARY;
+
+    /* Check for textual ("white-listed") bytes. */
+    if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0
+            || s->dyn_ltree[13].Freq != 0)
+        return Z_TEXT;
+    for (n = 32; n < LITERALS; n++)
+        if (s->dyn_ltree[n].Freq != 0)
+            return Z_TEXT;
+
+    /* There are no "black-listed" or "white-listed" bytes:
+     * this stream either is empty or has tolerated ("gray-listed") bytes only.
+     */
+    return Z_BINARY;
+}
+
+/* ===========================================================================
+ * Reverse the first len bits of a code, using straightforward code (a faster
+ * method would use a table)
+ * IN assertion: 1 <= len <= 15
+ */
+local unsigned bi_reverse(code, len)
+    unsigned code; /* the value to invert */
+    int len;       /* its bit length */
+{
+    register unsigned res = 0;
+    do {
+        res |= code & 1;
+        code >>= 1, res <<= 1;
+    } while (--len > 0);
+    return res >> 1;
+}
+
+/* ===========================================================================
+ * Flush the bit buffer, keeping at most 7 bits in it.
+ */
+local void bi_flush(s)
+    deflate_state *s;
+{
+    if (s->bi_valid == 16) {
+        put_short(s, s->bi_buf);
+        s->bi_buf = 0;
+        s->bi_valid = 0;
+    } else if (s->bi_valid >= 8) {
+        put_byte(s, (Byte)s->bi_buf);
+        s->bi_buf >>= 8;
+        s->bi_valid -= 8;
+    }
+}
+
+/* ===========================================================================
+ * Flush the bit buffer and align the output on a byte boundary
+ */
+local void bi_windup(s)
+    deflate_state *s;
+{
+    if (s->bi_valid > 8) {
+        put_short(s, s->bi_buf);
+    } else if (s->bi_valid > 0) {
+        put_byte(s, (Byte)s->bi_buf);
+    }
+    s->bi_buf = 0;
+    s->bi_valid = 0;
+#ifdef ZLIB_DEBUG
+    s->bits_sent = (s->bits_sent+7) & ~7;
+#endif
+}
diff --git a/deps/libchdr/deps/zlib-1.2.11/trees.h b/deps/libchdr/deps/zlib-1.2.11/trees.h
new file mode 100644
index 00000000..d35639d8
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/trees.h
@@ -0,0 +1,128 @@
+/* header created automatically with -DGEN_TREES_H */
+
+local const ct_data static_ltree[L_CODES+2] = {
+{{ 12},{  8}}, {{140},{  8}}, {{ 76},{  8}}, {{204},{  8}}, {{ 44},{  8}},
+{{172},{  8}}, {{108},{  8}}, {{236},{  8}}, {{ 28},{  8}}, {{156},{  8}},
+{{ 92},{  8}}, {{220},{  8}}, {{ 60},{  8}}, {{188},{  8}}, {{124},{  8}},
+{{252},{  8}}, {{  2},{  8}}, {{130},{  8}}, {{ 66},{  8}}, {{194},{  8}},
+{{ 34},{  8}}, {{162},{  8}}, {{ 98},{  8}}, {{226},{  8}}, {{ 18},{  8}},
+{{146},{  8}}, {{ 82},{  8}}, {{210},{  8}}, {{ 50},{  8}}, {{178},{  8}},
+{{114},{  8}}, {{242},{  8}}, {{ 10},{  8}}, {{138},{  8}}, {{ 74},{  8}},
+{{202},{  8}}, {{ 42},{  8}}, {{170},{  8}}, {{106},{  8}}, {{234},{  8}},
+{{ 26},{  8}}, {{154},{  8}}, {{ 90},{  8}}, {{218},{  8}}, {{ 58},{  8}},
+{{186},{  8}}, {{122},{  8}}, {{250},{  8}}, {{  6},{  8}}, {{134},{  8}},
+{{ 70},{  8}}, {{198},{  8}}, {{ 38},{  8}}, {{166},{  8}}, {{102},{  8}},
+{{230},{  8}}, {{ 22},{  8}}, {{150},{  8}}, {{ 86},{  8}}, {{214},{  8}},
+{{ 54},{  8}}, {{182},{  8}}, {{118},{  8}}, {{246},{  8}}, {{ 14},{  8}},
+{{142},{  8}}, {{ 78},{  8}}, {{206},{  8}}, {{ 46},{  8}}, {{174},{  8}},
+{{110},{  8}}, {{238},{  8}}, {{ 30},{  8}}, {{158},{  8}}, {{ 94},{  8}},
+{{222},{  8}}, {{ 62},{  8}}, {{190},{  8}}, {{126},{  8}}, {{254},{  8}},
+{{  1},{  8}}, {{129},{  8}}, {{ 65},{  8}}, {{193},{  8}}, {{ 33},{  8}},
+{{161},{  8}}, {{ 97},{  8}}, {{225},{  8}}, {{ 17},{  8}}, {{145},{  8}},
+{{ 81},{  8}}, {{209},{  8}}, {{ 49},{  8}}, {{177},{  8}}, {{113},{  8}},
+{{241},{  8}}, {{  9},{  8}}, {{137},{  8}}, {{ 73},{  8}}, {{201},{  8}},
+{{ 41},{  8}}, {{169},{  8}}, {{105},{  8}}, {{233},{  8}}, {{ 25},{  8}},
+{{153},{  8}}, {{ 89},{  8}}, {{217},{  8}}, {{ 57},{  8}}, {{185},{  8}},
+{{121},{  8}}, {{249},{  8}}, {{  5},{  8}}, {{133},{  8}}, {{ 69},{  8}},
+{{197},{  8}}, {{ 37},{  8}}, {{165},{  8}}, {{101},{  8}}, {{229},{  8}},
+{{ 21},{  8}}, {{149},{  8}}, {{ 85},{  8}}, {{213},{  8}}, {{ 53},{  8}},
+{{181},{  8}}, {{117},{  8}}, {{245},{  8}}, {{ 13},{  8}}, {{141},{  8}},
+{{ 77},{  8}}, {{205},{  8}}, {{ 45},{  8}}, {{173},{  8}}, {{109},{  8}},
+{{237},{  8}}, {{ 29},{  8}}, {{157},{  8}}, {{ 93},{  8}}, {{221},{  8}},
+{{ 61},{  8}}, {{189},{  8}}, {{125},{  8}}, {{253},{  8}}, {{ 19},{  9}},
+{{275},{  9}}, {{147},{  9}}, {{403},{  9}}, {{ 83},{  9}}, {{339},{  9}},
+{{211},{  9}}, {{467},{  9}}, {{ 51},{  9}}, {{307},{  9}}, {{179},{  9}},
+{{435},{  9}}, {{115},{  9}}, {{371},{  9}}, {{243},{  9}}, {{499},{  9}},
+{{ 11},{  9}}, {{267},{  9}}, {{139},{  9}}, {{395},{  9}}, {{ 75},{  9}},
+{{331},{  9}}, {{203},{  9}}, {{459},{  9}}, {{ 43},{  9}}, {{299},{  9}},
+{{171},{  9}}, {{427},{  9}}, {{107},{  9}}, {{363},{  9}}, {{235},{  9}},
+{{491},{  9}}, {{ 27},{  9}}, {{283},{  9}}, {{155},{  9}}, {{411},{  9}},
+{{ 91},{  9}}, {{347},{  9}}, {{219},{  9}}, {{475},{  9}}, {{ 59},{  9}},
+{{315},{  9}}, {{187},{  9}}, {{443},{  9}}, {{123},{  9}}, {{379},{  9}},
+{{251},{  9}}, {{507},{  9}}, {{  7},{  9}}, {{263},{  9}}, {{135},{  9}},
+{{391},{  9}}, {{ 71},{  9}}, {{327},{  9}}, {{199},{  9}}, {{455},{  9}},
+{{ 39},{  9}}, {{295},{  9}}, {{167},{  9}}, {{423},{  9}}, {{103},{  9}},
+{{359},{  9}}, {{231},{  9}}, {{487},{  9}}, {{ 23},{  9}}, {{279},{  9}},
+{{151},{  9}}, {{407},{  9}}, {{ 87},{  9}}, {{343},{  9}}, {{215},{  9}},
+{{471},{  9}}, {{ 55},{  9}}, {{311},{  9}}, {{183},{  9}}, {{439},{  9}},
+{{119},{  9}}, {{375},{  9}}, {{247},{  9}}, {{503},{  9}}, {{ 15},{  9}},
+{{271},{  9}}, {{143},{  9}}, {{399},{  9}}, {{ 79},{  9}}, {{335},{  9}},
+{{207},{  9}}, {{463},{  9}}, {{ 47},{  9}}, {{303},{  9}}, {{175},{  9}},
+{{431},{  9}}, {{111},{  9}}, {{367},{  9}}, {{239},{  9}}, {{495},{  9}},
+{{ 31},{  9}}, {{287},{  9}}, {{159},{  9}}, {{415},{  9}}, {{ 95},{  9}},
+{{351},{  9}}, {{223},{  9}}, {{479},{  9}}, {{ 63},{  9}}, {{319},{  9}},
+{{191},{  9}}, {{447},{  9}}, {{127},{  9}}, {{383},{  9}}, {{255},{  9}},
+{{511},{  9}}, {{  0},{  7}}, {{ 64},{  7}}, {{ 32},{  7}}, {{ 96},{  7}},
+{{ 16},{  7}}, {{ 80},{  7}}, {{ 48},{  7}}, {{112},{  7}}, {{  8},{  7}},
+{{ 72},{  7}}, {{ 40},{  7}}, {{104},{  7}}, {{ 24},{  7}}, {{ 88},{  7}},
+{{ 56},{  7}}, {{120},{  7}}, {{  4},{  7}}, {{ 68},{  7}}, {{ 36},{  7}},
+{{100},{  7}}, {{ 20},{  7}}, {{ 84},{  7}}, {{ 52},{  7}}, {{116},{  7}},
+{{  3},{  8}}, {{131},{  8}}, {{ 67},{  8}}, {{195},{  8}}, {{ 35},{  8}},
+{{163},{  8}}, {{ 99},{  8}}, {{227},{  8}}
+};
+
+local const ct_data static_dtree[D_CODES] = {
+{{ 0},{ 5}}, {{16},{ 5}}, {{ 8},{ 5}}, {{24},{ 5}}, {{ 4},{ 5}},
+{{20},{ 5}}, {{12},{ 5}}, {{28},{ 5}}, {{ 2},{ 5}}, {{18},{ 5}},
+{{10},{ 5}}, {{26},{ 5}}, {{ 6},{ 5}}, {{22},{ 5}}, {{14},{ 5}},
+{{30},{ 5}}, {{ 1},{ 5}}, {{17},{ 5}}, {{ 9},{ 5}}, {{25},{ 5}},
+{{ 5},{ 5}}, {{21},{ 5}}, {{13},{ 5}}, {{29},{ 5}}, {{ 3},{ 5}},
+{{19},{ 5}}, {{11},{ 5}}, {{27},{ 5}}, {{ 7},{ 5}}, {{23},{ 5}}
+};
+
+const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = {
+ 0,  1,  2,  3,  4,  4,  5,  5,  6,  6,  6,  6,  7,  7,  7,  7,  8,  8,  8,  8,
+ 8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9, 10, 10, 10, 10, 10, 10, 10, 10,
+10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13,
+13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15,
+15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,  0,  0, 16, 17,
+18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22,
+23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27,
+27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
+28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
+29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29
+};
+
+const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= {
+ 0,  1,  2,  3,  4,  5,  6,  7,  8,  8,  9,  9, 10, 10, 11, 11, 12, 12, 12, 12,
+13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16,
+17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19,
+19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22,
+22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
+23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
+25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
+25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26,
+26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
+26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27,
+27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28
+};
+
+local const int base_length[LENGTH_CODES] = {
+0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56,
+64, 80, 96, 112, 128, 160, 192, 224, 0
+};
+
+local const int base_dist[D_CODES] = {
+    0,     1,     2,     3,     4,     6,     8,    12,    16,    24,
+   32,    48,    64,    96,   128,   192,   256,   384,   512,   768,
+ 1024,  1536,  2048,  3072,  4096,  6144,  8192, 12288, 16384, 24576
+};
+
diff --git a/deps/libchdr/deps/zlib-1.2.11/uncompr.c b/deps/libchdr/deps/zlib-1.2.11/uncompr.c
new file mode 100644
index 00000000..f03a1a86
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/uncompr.c
@@ -0,0 +1,93 @@
+/* uncompr.c -- decompress a memory buffer
+ * Copyright (C) 1995-2003, 2010, 2014, 2016 Jean-loup Gailly, Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* @(#) $Id$ */
+
+#define ZLIB_INTERNAL
+#include "zlib.h"
+
+/* ===========================================================================
+     Decompresses the source buffer into the destination buffer.  *sourceLen is
+   the byte length of the source buffer. Upon entry, *destLen is the total size
+   of the destination buffer, which must be large enough to hold the entire
+   uncompressed data. (The size of the uncompressed data must have been saved
+   previously by the compressor and transmitted to the decompressor by some
+   mechanism outside the scope of this compression library.) Upon exit,
+   *destLen is the size of the decompressed data and *sourceLen is the number
+   of source bytes consumed. Upon return, source + *sourceLen points to the
+   first unused input byte.
+
+     uncompress returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_BUF_ERROR if there was not enough room in the output buffer, or
+   Z_DATA_ERROR if the input data was corrupted, including if the input data is
+   an incomplete zlib stream.
+*/
+int ZEXPORT uncompress2 (dest, destLen, source, sourceLen)
+    Bytef *dest;
+    uLongf *destLen;
+    const Bytef *source;
+    uLong *sourceLen;
+{
+    z_stream stream;
+    int err;
+    const uInt max = (uInt)-1;
+    uLong len, left;
+    Byte buf[1];    /* for detection of incomplete stream when *destLen == 0 */
+
+    len = *sourceLen;
+    if (*destLen) {
+        left = *destLen;
+        *destLen = 0;
+    }
+    else {
+        left = 1;
+        dest = buf;
+    }
+
+    stream.next_in = (z_const Bytef *)source;
+    stream.avail_in = 0;
+    stream.zalloc = (alloc_func)0;
+    stream.zfree = (free_func)0;
+    stream.opaque = (voidpf)0;
+
+    err = inflateInit(&stream);
+    if (err != Z_OK) return err;
+
+    stream.next_out = dest;
+    stream.avail_out = 0;
+
+    do {
+        if (stream.avail_out == 0) {
+            stream.avail_out = left > (uLong)max ? max : (uInt)left;
+            left -= stream.avail_out;
+        }
+        if (stream.avail_in == 0) {
+            stream.avail_in = len > (uLong)max ? max : (uInt)len;
+            len -= stream.avail_in;
+        }
+        err = inflate(&stream, Z_NO_FLUSH);
+    } while (err == Z_OK);
+
+    *sourceLen -= len + stream.avail_in;
+    if (dest != buf)
+        *destLen = stream.total_out;
+    else if (stream.total_out && err == Z_BUF_ERROR)
+        left = 1;
+
+    inflateEnd(&stream);
+    return err == Z_STREAM_END ? Z_OK :
+           err == Z_NEED_DICT ? Z_DATA_ERROR  :
+           err == Z_BUF_ERROR && left + stream.avail_out ? Z_DATA_ERROR :
+           err;
+}
+
+int ZEXPORT uncompress (dest, destLen, source, sourceLen)
+    Bytef *dest;
+    uLongf *destLen;
+    const Bytef *source;
+    uLong sourceLen;
+{
+    return uncompress2(dest, destLen, source, &sourceLen);
+}
diff --git a/deps/libchdr/deps/zlib-1.2.11/zconf.h b/deps/libchdr/deps/zlib-1.2.11/zconf.h
new file mode 100644
index 00000000..5e1d68a0
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/zconf.h
@@ -0,0 +1,534 @@
+/* zconf.h -- configuration of the zlib compression library
+ * Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* @(#) $Id$ */
+
+#ifndef ZCONF_H
+#define ZCONF_H
+
+/*
+ * If you *really* need a unique prefix for all types and library functions,
+ * compile with -DZ_PREFIX. The "standard" zlib should be compiled without it.
+ * Even better than compiling with -DZ_PREFIX would be to use configure to set
+ * this permanently in zconf.h using "./configure --zprefix".
+ */
+#ifdef Z_PREFIX     /* may be set to #if 1 by ./configure */
+#  define Z_PREFIX_SET
+
+/* all linked symbols and init macros */
+#  define _dist_code            z__dist_code
+#  define _length_code          z__length_code
+#  define _tr_align             z__tr_align
+#  define _tr_flush_bits        z__tr_flush_bits
+#  define _tr_flush_block       z__tr_flush_block
+#  define _tr_init              z__tr_init
+#  define _tr_stored_block      z__tr_stored_block
+#  define _tr_tally             z__tr_tally
+#  define adler32               z_adler32
+#  define adler32_combine       z_adler32_combine
+#  define adler32_combine64     z_adler32_combine64
+#  define adler32_z             z_adler32_z
+#  ifndef Z_SOLO
+#    define compress              z_compress
+#    define compress2             z_compress2
+#    define compressBound         z_compressBound
+#  endif
+#  define crc32                 z_crc32
+#  define crc32_combine         z_crc32_combine
+#  define crc32_combine64       z_crc32_combine64
+#  define crc32_z               z_crc32_z
+#  define deflate               z_deflate
+#  define deflateBound          z_deflateBound
+#  define deflateCopy           z_deflateCopy
+#  define deflateEnd            z_deflateEnd
+#  define deflateGetDictionary  z_deflateGetDictionary
+#  define deflateInit           z_deflateInit
+#  define deflateInit2          z_deflateInit2
+#  define deflateInit2_         z_deflateInit2_
+#  define deflateInit_          z_deflateInit_
+#  define deflateParams         z_deflateParams
+#  define deflatePending        z_deflatePending
+#  define deflatePrime          z_deflatePrime
+#  define deflateReset          z_deflateReset
+#  define deflateResetKeep      z_deflateResetKeep
+#  define deflateSetDictionary  z_deflateSetDictionary
+#  define deflateSetHeader      z_deflateSetHeader
+#  define deflateTune           z_deflateTune
+#  define deflate_copyright     z_deflate_copyright
+#  define get_crc_table         z_get_crc_table
+#  ifndef Z_SOLO
+#    define gz_error              z_gz_error
+#    define gz_intmax             z_gz_intmax
+#    define gz_strwinerror        z_gz_strwinerror
+#    define gzbuffer              z_gzbuffer
+#    define gzclearerr            z_gzclearerr
+#    define gzclose               z_gzclose
+#    define gzclose_r             z_gzclose_r
+#    define gzclose_w             z_gzclose_w
+#    define gzdirect              z_gzdirect
+#    define gzdopen               z_gzdopen
+#    define gzeof                 z_gzeof
+#    define gzerror               z_gzerror
+#    define gzflush               z_gzflush
+#    define gzfread               z_gzfread
+#    define gzfwrite              z_gzfwrite
+#    define gzgetc                z_gzgetc
+#    define gzgetc_               z_gzgetc_
+#    define gzgets                z_gzgets
+#    define gzoffset              z_gzoffset
+#    define gzoffset64            z_gzoffset64
+#    define gzopen                z_gzopen
+#    define gzopen64              z_gzopen64
+#    ifdef _WIN32
+#      define gzopen_w              z_gzopen_w
+#    endif
+#    define gzprintf              z_gzprintf
+#    define gzputc                z_gzputc
+#    define gzputs                z_gzputs
+#    define gzread                z_gzread
+#    define gzrewind              z_gzrewind
+#    define gzseek                z_gzseek
+#    define gzseek64              z_gzseek64
+#    define gzsetparams           z_gzsetparams
+#    define gztell                z_gztell
+#    define gztell64              z_gztell64
+#    define gzungetc              z_gzungetc
+#    define gzvprintf             z_gzvprintf
+#    define gzwrite               z_gzwrite
+#  endif
+#  define inflate               z_inflate
+#  define inflateBack           z_inflateBack
+#  define inflateBackEnd        z_inflateBackEnd
+#  define inflateBackInit       z_inflateBackInit
+#  define inflateBackInit_      z_inflateBackInit_
+#  define inflateCodesUsed      z_inflateCodesUsed
+#  define inflateCopy           z_inflateCopy
+#  define inflateEnd            z_inflateEnd
+#  define inflateGetDictionary  z_inflateGetDictionary
+#  define inflateGetHeader      z_inflateGetHeader
+#  define inflateInit           z_inflateInit
+#  define inflateInit2          z_inflateInit2
+#  define inflateInit2_         z_inflateInit2_
+#  define inflateInit_          z_inflateInit_
+#  define inflateMark           z_inflateMark
+#  define inflatePrime          z_inflatePrime
+#  define inflateReset          z_inflateReset
+#  define inflateReset2         z_inflateReset2
+#  define inflateResetKeep      z_inflateResetKeep
+#  define inflateSetDictionary  z_inflateSetDictionary
+#  define inflateSync           z_inflateSync
+#  define inflateSyncPoint      z_inflateSyncPoint
+#  define inflateUndermine      z_inflateUndermine
+#  define inflateValidate       z_inflateValidate
+#  define inflate_copyright     z_inflate_copyright
+#  define inflate_fast          z_inflate_fast
+#  define inflate_table         z_inflate_table
+#  ifndef Z_SOLO
+#    define uncompress            z_uncompress
+#    define uncompress2           z_uncompress2
+#  endif
+#  define zError                z_zError
+#  ifndef Z_SOLO
+#    define zcalloc               z_zcalloc
+#    define zcfree                z_zcfree
+#  endif
+#  define zlibCompileFlags      z_zlibCompileFlags
+#  define zlibVersion           z_zlibVersion
+
+/* all zlib typedefs in zlib.h and zconf.h */
+#  define Byte                  z_Byte
+#  define Bytef                 z_Bytef
+#  define alloc_func            z_alloc_func
+#  define charf                 z_charf
+#  define free_func             z_free_func
+#  ifndef Z_SOLO
+#    define gzFile                z_gzFile
+#  endif
+#  define gz_header             z_gz_header
+#  define gz_headerp            z_gz_headerp
+#  define in_func               z_in_func
+#  define intf                  z_intf
+#  define out_func              z_out_func
+#  define uInt                  z_uInt
+#  define uIntf                 z_uIntf
+#  define uLong                 z_uLong
+#  define uLongf                z_uLongf
+#  define voidp                 z_voidp
+#  define voidpc                z_voidpc
+#  define voidpf                z_voidpf
+
+/* all zlib structs in zlib.h and zconf.h */
+#  define gz_header_s           z_gz_header_s
+#  define internal_state        z_internal_state
+
+#endif
+
+#if defined(__MSDOS__) && !defined(MSDOS)
+#  define MSDOS
+#endif
+#if (defined(OS_2) || defined(__OS2__)) && !defined(OS2)
+#  define OS2
+#endif
+#if defined(_WINDOWS) && !defined(WINDOWS)
+#  define WINDOWS
+#endif
+#if defined(_WIN32) || defined(_WIN32_WCE) || defined(__WIN32__)
+#  ifndef WIN32
+#    define WIN32
+#  endif
+#endif
+#if (defined(MSDOS) || defined(OS2) || defined(WINDOWS)) && !defined(WIN32)
+#  if !defined(__GNUC__) && !defined(__FLAT__) && !defined(__386__)
+#    ifndef SYS16BIT
+#      define SYS16BIT
+#    endif
+#  endif
+#endif
+
+/*
+ * Compile with -DMAXSEG_64K if the alloc function cannot allocate more
+ * than 64k bytes at a time (needed on systems with 16-bit int).
+ */
+#ifdef SYS16BIT
+#  define MAXSEG_64K
+#endif
+#ifdef MSDOS
+#  define UNALIGNED_OK
+#endif
+
+#ifdef __STDC_VERSION__
+#  ifndef STDC
+#    define STDC
+#  endif
+#  if __STDC_VERSION__ >= 199901L
+#    ifndef STDC99
+#      define STDC99
+#    endif
+#  endif
+#endif
+#if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus))
+#  define STDC
+#endif
+#if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__))
+#  define STDC
+#endif
+#if !defined(STDC) && (defined(MSDOS) || defined(WINDOWS) || defined(WIN32))
+#  define STDC
+#endif
+#if !defined(STDC) && (defined(OS2) || defined(__HOS_AIX__))
+#  define STDC
+#endif
+
+#if defined(__OS400__) && !defined(STDC)    /* iSeries (formerly AS/400). */
+#  define STDC
+#endif
+
+#ifndef STDC
+#  ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */
+#    define const       /* note: need a more gentle solution here */
+#  endif
+#endif
+
+#if defined(ZLIB_CONST) && !defined(z_const)
+#  define z_const const
+#else
+#  define z_const
+#endif
+
+#ifdef Z_SOLO
+   typedef unsigned long z_size_t;
+#else
+#  define z_longlong long long
+#  if defined(NO_SIZE_T)
+     typedef unsigned NO_SIZE_T z_size_t;
+#  elif defined(STDC)
+#    include <stddef.h>
+     typedef size_t z_size_t;
+#  else
+     typedef unsigned long z_size_t;
+#  endif
+#  undef z_longlong
+#endif
+
+/* Maximum value for memLevel in deflateInit2 */
+#ifndef MAX_MEM_LEVEL
+#  ifdef MAXSEG_64K
+#    define MAX_MEM_LEVEL 8
+#  else
+#    define MAX_MEM_LEVEL 9
+#  endif
+#endif
+
+/* Maximum value for windowBits in deflateInit2 and inflateInit2.
+ * WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files
+ * created by gzip. (Files created by minigzip can still be extracted by
+ * gzip.)
+ */
+#ifndef MAX_WBITS
+#  define MAX_WBITS   15 /* 32K LZ77 window */
+#endif
+
+/* The memory requirements for deflate are (in bytes):
+            (1 << (windowBits+2)) +  (1 << (memLevel+9))
+ that is: 128K for windowBits=15  +  128K for memLevel = 8  (default values)
+ plus a few kilobytes for small objects. For example, if you want to reduce
+ the default memory requirements from 256K to 128K, compile with
+     make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7"
+ Of course this will generally degrade compression (there's no free lunch).
+
+   The memory requirements for inflate are (in bytes) 1 << windowBits
+ that is, 32K for windowBits=15 (default value) plus about 7 kilobytes
+ for small objects.
+*/
+
+                        /* Type declarations */
+
+#ifndef OF /* function prototypes */
+#  ifdef STDC
+#    define OF(args)  args
+#  else
+#    define OF(args)  ()
+#  endif
+#endif
+
+#ifndef Z_ARG /* function prototypes for stdarg */
+#  if defined(STDC) || defined(Z_HAVE_STDARG_H)
+#    define Z_ARG(args)  args
+#  else
+#    define Z_ARG(args)  ()
+#  endif
+#endif
+
+/* The following definitions for FAR are needed only for MSDOS mixed
+ * model programming (small or medium model with some far allocations).
+ * This was tested only with MSC; for other MSDOS compilers you may have
+ * to define NO_MEMCPY in zutil.h.  If you don't need the mixed model,
+ * just define FAR to be empty.
+ */
+#ifdef SYS16BIT
+#  if defined(M_I86SM) || defined(M_I86MM)
+     /* MSC small or medium model */
+#    define SMALL_MEDIUM
+#    ifdef _MSC_VER
+#      define FAR _far
+#    else
+#      define FAR far
+#    endif
+#  endif
+#  if (defined(__SMALL__) || defined(__MEDIUM__))
+     /* Turbo C small or medium model */
+#    define SMALL_MEDIUM
+#    ifdef __BORLANDC__
+#      define FAR _far
+#    else
+#      define FAR far
+#    endif
+#  endif
+#endif
+
+#if defined(WINDOWS) || defined(WIN32)
+   /* If building or using zlib as a DLL, define ZLIB_DLL.
+    * This is not mandatory, but it offers a little performance increase.
+    */
+#  ifdef ZLIB_DLL
+#    if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500))
+#      ifdef ZLIB_INTERNAL
+#        define ZEXTERN extern __declspec(dllexport)
+#      else
+#        define ZEXTERN extern __declspec(dllimport)
+#      endif
+#    endif
+#  endif  /* ZLIB_DLL */
+   /* If building or using zlib with the WINAPI/WINAPIV calling convention,
+    * define ZLIB_WINAPI.
+    * Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI.
+    */
+#  ifdef ZLIB_WINAPI
+#    ifdef FAR
+#      undef FAR
+#    endif
+#    include <windows.h>
+     /* No need for _export, use ZLIB.DEF instead. */
+     /* For complete Windows compatibility, use WINAPI, not __stdcall. */
+#    define ZEXPORT WINAPI
+#    ifdef WIN32
+#      define ZEXPORTVA WINAPIV
+#    else
+#      define ZEXPORTVA FAR CDECL
+#    endif
+#  endif
+#endif
+
+#if defined (__BEOS__)
+#  ifdef ZLIB_DLL
+#    ifdef ZLIB_INTERNAL
+#      define ZEXPORT   __declspec(dllexport)
+#      define ZEXPORTVA __declspec(dllexport)
+#    else
+#      define ZEXPORT   __declspec(dllimport)
+#      define ZEXPORTVA __declspec(dllimport)
+#    endif
+#  endif
+#endif
+
+#ifndef ZEXTERN
+#  define ZEXTERN extern
+#endif
+#ifndef ZEXPORT
+#  define ZEXPORT
+#endif
+#ifndef ZEXPORTVA
+#  define ZEXPORTVA
+#endif
+
+#ifndef FAR
+#  define FAR
+#endif
+
+#if !defined(__MACTYPES__)
+typedef unsigned char  Byte;  /* 8 bits */
+#endif
+typedef unsigned int   uInt;  /* 16 bits or more */
+typedef unsigned long  uLong; /* 32 bits or more */
+
+#ifdef SMALL_MEDIUM
+   /* Borland C/C++ and some old MSC versions ignore FAR inside typedef */
+#  define Bytef Byte FAR
+#else
+   typedef Byte  FAR Bytef;
+#endif
+typedef char  FAR charf;
+typedef int   FAR intf;
+typedef uInt  FAR uIntf;
+typedef uLong FAR uLongf;
+
+#ifdef STDC
+   typedef void const *voidpc;
+   typedef void FAR   *voidpf;
+   typedef void       *voidp;
+#else
+   typedef Byte const *voidpc;
+   typedef Byte FAR   *voidpf;
+   typedef Byte       *voidp;
+#endif
+
+#if !defined(Z_U4) && !defined(Z_SOLO) && defined(STDC)
+#  include <limits.h>
+#  if (UINT_MAX == 0xffffffffUL)
+#    define Z_U4 unsigned
+#  elif (ULONG_MAX == 0xffffffffUL)
+#    define Z_U4 unsigned long
+#  elif (USHRT_MAX == 0xffffffffUL)
+#    define Z_U4 unsigned short
+#  endif
+#endif
+
+#ifdef Z_U4
+   typedef Z_U4 z_crc_t;
+#else
+   typedef unsigned long z_crc_t;
+#endif
+
+#ifdef HAVE_UNISTD_H    /* may be set to #if 1 by ./configure */
+#  define Z_HAVE_UNISTD_H
+#endif
+
+#ifdef HAVE_STDARG_H    /* may be set to #if 1 by ./configure */
+#  define Z_HAVE_STDARG_H
+#endif
+
+#ifdef STDC
+#  ifndef Z_SOLO
+#    include <sys/types.h>      /* for off_t */
+#  endif
+#endif
+
+#if defined(STDC) || defined(Z_HAVE_STDARG_H)
+#  ifndef Z_SOLO
+#    include <stdarg.h>         /* for va_list */
+#  endif
+#endif
+
+#ifdef _WIN32
+#  ifndef Z_SOLO
+#    include <stddef.h>         /* for wchar_t */
+#  endif
+#endif
+
+/* a little trick to accommodate both "#define _LARGEFILE64_SOURCE" and
+ * "#define _LARGEFILE64_SOURCE 1" as requesting 64-bit operations, (even
+ * though the former does not conform to the LFS document), but considering
+ * both "#undef _LARGEFILE64_SOURCE" and "#define _LARGEFILE64_SOURCE 0" as
+ * equivalently requesting no 64-bit operations
+ */
+#if defined(_LARGEFILE64_SOURCE) && -_LARGEFILE64_SOURCE - -1 == 1
+#  undef _LARGEFILE64_SOURCE
+#endif
+
+#if defined(__WATCOMC__) && !defined(Z_HAVE_UNISTD_H)
+#  define Z_HAVE_UNISTD_H
+#endif
+#ifndef Z_SOLO
+#  if defined(Z_HAVE_UNISTD_H) || defined(_LARGEFILE64_SOURCE)
+#    include <unistd.h>         /* for SEEK_*, off_t, and _LFS64_LARGEFILE */
+#    ifdef VMS
+#      include <unixio.h>       /* for off_t */
+#    endif
+#    ifndef z_off_t
+#      define z_off_t off_t
+#    endif
+#  endif
+#endif
+
+#if defined(_LFS64_LARGEFILE) && _LFS64_LARGEFILE-0
+#  define Z_LFS64
+#endif
+
+#if defined(_LARGEFILE64_SOURCE) && defined(Z_LFS64)
+#  define Z_LARGE64
+#endif
+
+#if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS-0 == 64 && defined(Z_LFS64)
+#  define Z_WANT64
+#endif
+
+#if !defined(SEEK_SET) && !defined(Z_SOLO)
+#  define SEEK_SET        0       /* Seek from beginning of file.  */
+#  define SEEK_CUR        1       /* Seek from current position.  */
+#  define SEEK_END        2       /* Set file pointer to EOF plus "offset" */
+#endif
+
+#ifndef z_off_t
+#  define z_off_t long
+#endif
+
+#if !defined(_WIN32) && defined(Z_LARGE64)
+#  define z_off64_t off64_t
+#else
+#  if defined(_WIN32) && !defined(__GNUC__) && !defined(Z_SOLO)
+#    define z_off64_t __int64
+#  else
+#    define z_off64_t z_off_t
+#  endif
+#endif
+
+/* MVS linker does not support external names larger than 8 bytes */
+#if defined(__MVS__)
+  #pragma map(deflateInit_,"DEIN")
+  #pragma map(deflateInit2_,"DEIN2")
+  #pragma map(deflateEnd,"DEEND")
+  #pragma map(deflateBound,"DEBND")
+  #pragma map(inflateInit_,"ININ")
+  #pragma map(inflateInit2_,"ININ2")
+  #pragma map(inflateEnd,"INEND")
+  #pragma map(inflateSync,"INSY")
+  #pragma map(inflateSetDictionary,"INSEDI")
+  #pragma map(compressBound,"CMBND")
+  #pragma map(inflate_table,"INTABL")
+  #pragma map(inflate_fast,"INFA")
+  #pragma map(inflate_copyright,"INCOPY")
+#endif
+
+#endif /* ZCONF_H */
diff --git a/deps/libchdr/deps/zlib-1.2.11/zconf.h.cmakein b/deps/libchdr/deps/zlib-1.2.11/zconf.h.cmakein
new file mode 100644
index 00000000..a7f24cce
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/zconf.h.cmakein
@@ -0,0 +1,536 @@
+/* zconf.h -- configuration of the zlib compression library
+ * Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* @(#) $Id$ */
+
+#ifndef ZCONF_H
+#define ZCONF_H
+#cmakedefine Z_PREFIX
+#cmakedefine Z_HAVE_UNISTD_H
+
+/*
+ * If you *really* need a unique prefix for all types and library functions,
+ * compile with -DZ_PREFIX. The "standard" zlib should be compiled without it.
+ * Even better than compiling with -DZ_PREFIX would be to use configure to set
+ * this permanently in zconf.h using "./configure --zprefix".
+ */
+#ifdef Z_PREFIX     /* may be set to #if 1 by ./configure */
+#  define Z_PREFIX_SET
+
+/* all linked symbols and init macros */
+#  define _dist_code            z__dist_code
+#  define _length_code          z__length_code
+#  define _tr_align             z__tr_align
+#  define _tr_flush_bits        z__tr_flush_bits
+#  define _tr_flush_block       z__tr_flush_block
+#  define _tr_init              z__tr_init
+#  define _tr_stored_block      z__tr_stored_block
+#  define _tr_tally             z__tr_tally
+#  define adler32               z_adler32
+#  define adler32_combine       z_adler32_combine
+#  define adler32_combine64     z_adler32_combine64
+#  define adler32_z             z_adler32_z
+#  ifndef Z_SOLO
+#    define compress              z_compress
+#    define compress2             z_compress2
+#    define compressBound         z_compressBound
+#  endif
+#  define crc32                 z_crc32
+#  define crc32_combine         z_crc32_combine
+#  define crc32_combine64       z_crc32_combine64
+#  define crc32_z               z_crc32_z
+#  define deflate               z_deflate
+#  define deflateBound          z_deflateBound
+#  define deflateCopy           z_deflateCopy
+#  define deflateEnd            z_deflateEnd
+#  define deflateGetDictionary  z_deflateGetDictionary
+#  define deflateInit           z_deflateInit
+#  define deflateInit2          z_deflateInit2
+#  define deflateInit2_         z_deflateInit2_
+#  define deflateInit_          z_deflateInit_
+#  define deflateParams         z_deflateParams
+#  define deflatePending        z_deflatePending
+#  define deflatePrime          z_deflatePrime
+#  define deflateReset          z_deflateReset
+#  define deflateResetKeep      z_deflateResetKeep
+#  define deflateSetDictionary  z_deflateSetDictionary
+#  define deflateSetHeader      z_deflateSetHeader
+#  define deflateTune           z_deflateTune
+#  define deflate_copyright     z_deflate_copyright
+#  define get_crc_table         z_get_crc_table
+#  ifndef Z_SOLO
+#    define gz_error              z_gz_error
+#    define gz_intmax             z_gz_intmax
+#    define gz_strwinerror        z_gz_strwinerror
+#    define gzbuffer              z_gzbuffer
+#    define gzclearerr            z_gzclearerr
+#    define gzclose               z_gzclose
+#    define gzclose_r             z_gzclose_r
+#    define gzclose_w             z_gzclose_w
+#    define gzdirect              z_gzdirect
+#    define gzdopen               z_gzdopen
+#    define gzeof                 z_gzeof
+#    define gzerror               z_gzerror
+#    define gzflush               z_gzflush
+#    define gzfread               z_gzfread
+#    define gzfwrite              z_gzfwrite
+#    define gzgetc                z_gzgetc
+#    define gzgetc_               z_gzgetc_
+#    define gzgets                z_gzgets
+#    define gzoffset              z_gzoffset
+#    define gzoffset64            z_gzoffset64
+#    define gzopen                z_gzopen
+#    define gzopen64              z_gzopen64
+#    ifdef _WIN32
+#      define gzopen_w              z_gzopen_w
+#    endif
+#    define gzprintf              z_gzprintf
+#    define gzputc                z_gzputc
+#    define gzputs                z_gzputs
+#    define gzread                z_gzread
+#    define gzrewind              z_gzrewind
+#    define gzseek                z_gzseek
+#    define gzseek64              z_gzseek64
+#    define gzsetparams           z_gzsetparams
+#    define gztell                z_gztell
+#    define gztell64              z_gztell64
+#    define gzungetc              z_gzungetc
+#    define gzvprintf             z_gzvprintf
+#    define gzwrite               z_gzwrite
+#  endif
+#  define inflate               z_inflate
+#  define inflateBack           z_inflateBack
+#  define inflateBackEnd        z_inflateBackEnd
+#  define inflateBackInit       z_inflateBackInit
+#  define inflateBackInit_      z_inflateBackInit_
+#  define inflateCodesUsed      z_inflateCodesUsed
+#  define inflateCopy           z_inflateCopy
+#  define inflateEnd            z_inflateEnd
+#  define inflateGetDictionary  z_inflateGetDictionary
+#  define inflateGetHeader      z_inflateGetHeader
+#  define inflateInit           z_inflateInit
+#  define inflateInit2          z_inflateInit2
+#  define inflateInit2_         z_inflateInit2_
+#  define inflateInit_          z_inflateInit_
+#  define inflateMark           z_inflateMark
+#  define inflatePrime          z_inflatePrime
+#  define inflateReset          z_inflateReset
+#  define inflateReset2         z_inflateReset2
+#  define inflateResetKeep      z_inflateResetKeep
+#  define inflateSetDictionary  z_inflateSetDictionary
+#  define inflateSync           z_inflateSync
+#  define inflateSyncPoint      z_inflateSyncPoint
+#  define inflateUndermine      z_inflateUndermine
+#  define inflateValidate       z_inflateValidate
+#  define inflate_copyright     z_inflate_copyright
+#  define inflate_fast          z_inflate_fast
+#  define inflate_table         z_inflate_table
+#  ifndef Z_SOLO
+#    define uncompress            z_uncompress
+#    define uncompress2           z_uncompress2
+#  endif
+#  define zError                z_zError
+#  ifndef Z_SOLO
+#    define zcalloc               z_zcalloc
+#    define zcfree                z_zcfree
+#  endif
+#  define zlibCompileFlags      z_zlibCompileFlags
+#  define zlibVersion           z_zlibVersion
+
+/* all zlib typedefs in zlib.h and zconf.h */
+#  define Byte                  z_Byte
+#  define Bytef                 z_Bytef
+#  define alloc_func            z_alloc_func
+#  define charf                 z_charf
+#  define free_func             z_free_func
+#  ifndef Z_SOLO
+#    define gzFile                z_gzFile
+#  endif
+#  define gz_header             z_gz_header
+#  define gz_headerp            z_gz_headerp
+#  define in_func               z_in_func
+#  define intf                  z_intf
+#  define out_func              z_out_func
+#  define uInt                  z_uInt
+#  define uIntf                 z_uIntf
+#  define uLong                 z_uLong
+#  define uLongf                z_uLongf
+#  define voidp                 z_voidp
+#  define voidpc                z_voidpc
+#  define voidpf                z_voidpf
+
+/* all zlib structs in zlib.h and zconf.h */
+#  define gz_header_s           z_gz_header_s
+#  define internal_state        z_internal_state
+
+#endif
+
+#if defined(__MSDOS__) && !defined(MSDOS)
+#  define MSDOS
+#endif
+#if (defined(OS_2) || defined(__OS2__)) && !defined(OS2)
+#  define OS2
+#endif
+#if defined(_WINDOWS) && !defined(WINDOWS)
+#  define WINDOWS
+#endif
+#if defined(_WIN32) || defined(_WIN32_WCE) || defined(__WIN32__)
+#  ifndef WIN32
+#    define WIN32
+#  endif
+#endif
+#if (defined(MSDOS) || defined(OS2) || defined(WINDOWS)) && !defined(WIN32)
+#  if !defined(__GNUC__) && !defined(__FLAT__) && !defined(__386__)
+#    ifndef SYS16BIT
+#      define SYS16BIT
+#    endif
+#  endif
+#endif
+
+/*
+ * Compile with -DMAXSEG_64K if the alloc function cannot allocate more
+ * than 64k bytes at a time (needed on systems with 16-bit int).
+ */
+#ifdef SYS16BIT
+#  define MAXSEG_64K
+#endif
+#ifdef MSDOS
+#  define UNALIGNED_OK
+#endif
+
+#ifdef __STDC_VERSION__
+#  ifndef STDC
+#    define STDC
+#  endif
+#  if __STDC_VERSION__ >= 199901L
+#    ifndef STDC99
+#      define STDC99
+#    endif
+#  endif
+#endif
+#if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus))
+#  define STDC
+#endif
+#if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__))
+#  define STDC
+#endif
+#if !defined(STDC) && (defined(MSDOS) || defined(WINDOWS) || defined(WIN32))
+#  define STDC
+#endif
+#if !defined(STDC) && (defined(OS2) || defined(__HOS_AIX__))
+#  define STDC
+#endif
+
+#if defined(__OS400__) && !defined(STDC)    /* iSeries (formerly AS/400). */
+#  define STDC
+#endif
+
+#ifndef STDC
+#  ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */
+#    define const       /* note: need a more gentle solution here */
+#  endif
+#endif
+
+#if defined(ZLIB_CONST) && !defined(z_const)
+#  define z_const const
+#else
+#  define z_const
+#endif
+
+#ifdef Z_SOLO
+   typedef unsigned long z_size_t;
+#else
+#  define z_longlong long long
+#  if defined(NO_SIZE_T)
+     typedef unsigned NO_SIZE_T z_size_t;
+#  elif defined(STDC)
+#    include <stddef.h>
+     typedef size_t z_size_t;
+#  else
+     typedef unsigned long z_size_t;
+#  endif
+#  undef z_longlong
+#endif
+
+/* Maximum value for memLevel in deflateInit2 */
+#ifndef MAX_MEM_LEVEL
+#  ifdef MAXSEG_64K
+#    define MAX_MEM_LEVEL 8
+#  else
+#    define MAX_MEM_LEVEL 9
+#  endif
+#endif
+
+/* Maximum value for windowBits in deflateInit2 and inflateInit2.
+ * WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files
+ * created by gzip. (Files created by minigzip can still be extracted by
+ * gzip.)
+ */
+#ifndef MAX_WBITS
+#  define MAX_WBITS   15 /* 32K LZ77 window */
+#endif
+
+/* The memory requirements for deflate are (in bytes):
+            (1 << (windowBits+2)) +  (1 << (memLevel+9))
+ that is: 128K for windowBits=15  +  128K for memLevel = 8  (default values)
+ plus a few kilobytes for small objects. For example, if you want to reduce
+ the default memory requirements from 256K to 128K, compile with
+     make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7"
+ Of course this will generally degrade compression (there's no free lunch).
+
+   The memory requirements for inflate are (in bytes) 1 << windowBits
+ that is, 32K for windowBits=15 (default value) plus about 7 kilobytes
+ for small objects.
+*/
+
+                        /* Type declarations */
+
+#ifndef OF /* function prototypes */
+#  ifdef STDC
+#    define OF(args)  args
+#  else
+#    define OF(args)  ()
+#  endif
+#endif
+
+#ifndef Z_ARG /* function prototypes for stdarg */
+#  if defined(STDC) || defined(Z_HAVE_STDARG_H)
+#    define Z_ARG(args)  args
+#  else
+#    define Z_ARG(args)  ()
+#  endif
+#endif
+
+/* The following definitions for FAR are needed only for MSDOS mixed
+ * model programming (small or medium model with some far allocations).
+ * This was tested only with MSC; for other MSDOS compilers you may have
+ * to define NO_MEMCPY in zutil.h.  If you don't need the mixed model,
+ * just define FAR to be empty.
+ */
+#ifdef SYS16BIT
+#  if defined(M_I86SM) || defined(M_I86MM)
+     /* MSC small or medium model */
+#    define SMALL_MEDIUM
+#    ifdef _MSC_VER
+#      define FAR _far
+#    else
+#      define FAR far
+#    endif
+#  endif
+#  if (defined(__SMALL__) || defined(__MEDIUM__))
+     /* Turbo C small or medium model */
+#    define SMALL_MEDIUM
+#    ifdef __BORLANDC__
+#      define FAR _far
+#    else
+#      define FAR far
+#    endif
+#  endif
+#endif
+
+#if defined(WINDOWS) || defined(WIN32)
+   /* If building or using zlib as a DLL, define ZLIB_DLL.
+    * This is not mandatory, but it offers a little performance increase.
+    */
+#  ifdef ZLIB_DLL
+#    if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500))
+#      ifdef ZLIB_INTERNAL
+#        define ZEXTERN extern __declspec(dllexport)
+#      else
+#        define ZEXTERN extern __declspec(dllimport)
+#      endif
+#    endif
+#  endif  /* ZLIB_DLL */
+   /* If building or using zlib with the WINAPI/WINAPIV calling convention,
+    * define ZLIB_WINAPI.
+    * Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI.
+    */
+#  ifdef ZLIB_WINAPI
+#    ifdef FAR
+#      undef FAR
+#    endif
+#    include <windows.h>
+     /* No need for _export, use ZLIB.DEF instead. */
+     /* For complete Windows compatibility, use WINAPI, not __stdcall. */
+#    define ZEXPORT WINAPI
+#    ifdef WIN32
+#      define ZEXPORTVA WINAPIV
+#    else
+#      define ZEXPORTVA FAR CDECL
+#    endif
+#  endif
+#endif
+
+#if defined (__BEOS__)
+#  ifdef ZLIB_DLL
+#    ifdef ZLIB_INTERNAL
+#      define ZEXPORT   __declspec(dllexport)
+#      define ZEXPORTVA __declspec(dllexport)
+#    else
+#      define ZEXPORT   __declspec(dllimport)
+#      define ZEXPORTVA __declspec(dllimport)
+#    endif
+#  endif
+#endif
+
+#ifndef ZEXTERN
+#  define ZEXTERN extern
+#endif
+#ifndef ZEXPORT
+#  define ZEXPORT
+#endif
+#ifndef ZEXPORTVA
+#  define ZEXPORTVA
+#endif
+
+#ifndef FAR
+#  define FAR
+#endif
+
+#if !defined(__MACTYPES__)
+typedef unsigned char  Byte;  /* 8 bits */
+#endif
+typedef unsigned int   uInt;  /* 16 bits or more */
+typedef unsigned long  uLong; /* 32 bits or more */
+
+#ifdef SMALL_MEDIUM
+   /* Borland C/C++ and some old MSC versions ignore FAR inside typedef */
+#  define Bytef Byte FAR
+#else
+   typedef Byte  FAR Bytef;
+#endif
+typedef char  FAR charf;
+typedef int   FAR intf;
+typedef uInt  FAR uIntf;
+typedef uLong FAR uLongf;
+
+#ifdef STDC
+   typedef void const *voidpc;
+   typedef void FAR   *voidpf;
+   typedef void       *voidp;
+#else
+   typedef Byte const *voidpc;
+   typedef Byte FAR   *voidpf;
+   typedef Byte       *voidp;
+#endif
+
+#if !defined(Z_U4) && !defined(Z_SOLO) && defined(STDC)
+#  include <limits.h>
+#  if (UINT_MAX == 0xffffffffUL)
+#    define Z_U4 unsigned
+#  elif (ULONG_MAX == 0xffffffffUL)
+#    define Z_U4 unsigned long
+#  elif (USHRT_MAX == 0xffffffffUL)
+#    define Z_U4 unsigned short
+#  endif
+#endif
+
+#ifdef Z_U4
+   typedef Z_U4 z_crc_t;
+#else
+   typedef unsigned long z_crc_t;
+#endif
+
+#ifdef HAVE_UNISTD_H    /* may be set to #if 1 by ./configure */
+#  define Z_HAVE_UNISTD_H
+#endif
+
+#ifdef HAVE_STDARG_H    /* may be set to #if 1 by ./configure */
+#  define Z_HAVE_STDARG_H
+#endif
+
+#ifdef STDC
+#  ifndef Z_SOLO
+#    include <sys/types.h>      /* for off_t */
+#  endif
+#endif
+
+#if defined(STDC) || defined(Z_HAVE_STDARG_H)
+#  ifndef Z_SOLO
+#    include <stdarg.h>         /* for va_list */
+#  endif
+#endif
+
+#ifdef _WIN32
+#  ifndef Z_SOLO
+#    include <stddef.h>         /* for wchar_t */
+#  endif
+#endif
+
+/* a little trick to accommodate both "#define _LARGEFILE64_SOURCE" and
+ * "#define _LARGEFILE64_SOURCE 1" as requesting 64-bit operations, (even
+ * though the former does not conform to the LFS document), but considering
+ * both "#undef _LARGEFILE64_SOURCE" and "#define _LARGEFILE64_SOURCE 0" as
+ * equivalently requesting no 64-bit operations
+ */
+#if defined(_LARGEFILE64_SOURCE) && -_LARGEFILE64_SOURCE - -1 == 1
+#  undef _LARGEFILE64_SOURCE
+#endif
+
+#if defined(__WATCOMC__) && !defined(Z_HAVE_UNISTD_H)
+#  define Z_HAVE_UNISTD_H
+#endif
+#ifndef Z_SOLO
+#  if defined(Z_HAVE_UNISTD_H) || defined(_LARGEFILE64_SOURCE)
+#    include <unistd.h>         /* for SEEK_*, off_t, and _LFS64_LARGEFILE */
+#    ifdef VMS
+#      include <unixio.h>       /* for off_t */
+#    endif
+#    ifndef z_off_t
+#      define z_off_t off_t
+#    endif
+#  endif
+#endif
+
+#if defined(_LFS64_LARGEFILE) && _LFS64_LARGEFILE-0
+#  define Z_LFS64
+#endif
+
+#if defined(_LARGEFILE64_SOURCE) && defined(Z_LFS64)
+#  define Z_LARGE64
+#endif
+
+#if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS-0 == 64 && defined(Z_LFS64)
+#  define Z_WANT64
+#endif
+
+#if !defined(SEEK_SET) && !defined(Z_SOLO)
+#  define SEEK_SET        0       /* Seek from beginning of file.  */
+#  define SEEK_CUR        1       /* Seek from current position.  */
+#  define SEEK_END        2       /* Set file pointer to EOF plus "offset" */
+#endif
+
+#ifndef z_off_t
+#  define z_off_t long
+#endif
+
+#if !defined(_WIN32) && defined(Z_LARGE64)
+#  define z_off64_t off64_t
+#else
+#  if defined(_WIN32) && !defined(__GNUC__) && !defined(Z_SOLO)
+#    define z_off64_t __int64
+#  else
+#    define z_off64_t z_off_t
+#  endif
+#endif
+
+/* MVS linker does not support external names larger than 8 bytes */
+#if defined(__MVS__)
+  #pragma map(deflateInit_,"DEIN")
+  #pragma map(deflateInit2_,"DEIN2")
+  #pragma map(deflateEnd,"DEEND")
+  #pragma map(deflateBound,"DEBND")
+  #pragma map(inflateInit_,"ININ")
+  #pragma map(inflateInit2_,"ININ2")
+  #pragma map(inflateEnd,"INEND")
+  #pragma map(inflateSync,"INSY")
+  #pragma map(inflateSetDictionary,"INSEDI")
+  #pragma map(compressBound,"CMBND")
+  #pragma map(inflate_table,"INTABL")
+  #pragma map(inflate_fast,"INFA")
+  #pragma map(inflate_copyright,"INCOPY")
+#endif
+
+#endif /* ZCONF_H */
diff --git a/deps/libchdr/deps/zlib-1.2.11/zconf.h.in b/deps/libchdr/deps/zlib-1.2.11/zconf.h.in
new file mode 100644
index 00000000..5e1d68a0
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/zconf.h.in
@@ -0,0 +1,534 @@
+/* zconf.h -- configuration of the zlib compression library
+ * Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* @(#) $Id$ */
+
+#ifndef ZCONF_H
+#define ZCONF_H
+
+/*
+ * If you *really* need a unique prefix for all types and library functions,
+ * compile with -DZ_PREFIX. The "standard" zlib should be compiled without it.
+ * Even better than compiling with -DZ_PREFIX would be to use configure to set
+ * this permanently in zconf.h using "./configure --zprefix".
+ */
+#ifdef Z_PREFIX     /* may be set to #if 1 by ./configure */
+#  define Z_PREFIX_SET
+
+/* all linked symbols and init macros */
+#  define _dist_code            z__dist_code
+#  define _length_code          z__length_code
+#  define _tr_align             z__tr_align
+#  define _tr_flush_bits        z__tr_flush_bits
+#  define _tr_flush_block       z__tr_flush_block
+#  define _tr_init              z__tr_init
+#  define _tr_stored_block      z__tr_stored_block
+#  define _tr_tally             z__tr_tally
+#  define adler32               z_adler32
+#  define adler32_combine       z_adler32_combine
+#  define adler32_combine64     z_adler32_combine64
+#  define adler32_z             z_adler32_z
+#  ifndef Z_SOLO
+#    define compress              z_compress
+#    define compress2             z_compress2
+#    define compressBound         z_compressBound
+#  endif
+#  define crc32                 z_crc32
+#  define crc32_combine         z_crc32_combine
+#  define crc32_combine64       z_crc32_combine64
+#  define crc32_z               z_crc32_z
+#  define deflate               z_deflate
+#  define deflateBound          z_deflateBound
+#  define deflateCopy           z_deflateCopy
+#  define deflateEnd            z_deflateEnd
+#  define deflateGetDictionary  z_deflateGetDictionary
+#  define deflateInit           z_deflateInit
+#  define deflateInit2          z_deflateInit2
+#  define deflateInit2_         z_deflateInit2_
+#  define deflateInit_          z_deflateInit_
+#  define deflateParams         z_deflateParams
+#  define deflatePending        z_deflatePending
+#  define deflatePrime          z_deflatePrime
+#  define deflateReset          z_deflateReset
+#  define deflateResetKeep      z_deflateResetKeep
+#  define deflateSetDictionary  z_deflateSetDictionary
+#  define deflateSetHeader      z_deflateSetHeader
+#  define deflateTune           z_deflateTune
+#  define deflate_copyright     z_deflate_copyright
+#  define get_crc_table         z_get_crc_table
+#  ifndef Z_SOLO
+#    define gz_error              z_gz_error
+#    define gz_intmax             z_gz_intmax
+#    define gz_strwinerror        z_gz_strwinerror
+#    define gzbuffer              z_gzbuffer
+#    define gzclearerr            z_gzclearerr
+#    define gzclose               z_gzclose
+#    define gzclose_r             z_gzclose_r
+#    define gzclose_w             z_gzclose_w
+#    define gzdirect              z_gzdirect
+#    define gzdopen               z_gzdopen
+#    define gzeof                 z_gzeof
+#    define gzerror               z_gzerror
+#    define gzflush               z_gzflush
+#    define gzfread               z_gzfread
+#    define gzfwrite              z_gzfwrite
+#    define gzgetc                z_gzgetc
+#    define gzgetc_               z_gzgetc_
+#    define gzgets                z_gzgets
+#    define gzoffset              z_gzoffset
+#    define gzoffset64            z_gzoffset64
+#    define gzopen                z_gzopen
+#    define gzopen64              z_gzopen64
+#    ifdef _WIN32
+#      define gzopen_w              z_gzopen_w
+#    endif
+#    define gzprintf              z_gzprintf
+#    define gzputc                z_gzputc
+#    define gzputs                z_gzputs
+#    define gzread                z_gzread
+#    define gzrewind              z_gzrewind
+#    define gzseek                z_gzseek
+#    define gzseek64              z_gzseek64
+#    define gzsetparams           z_gzsetparams
+#    define gztell                z_gztell
+#    define gztell64              z_gztell64
+#    define gzungetc              z_gzungetc
+#    define gzvprintf             z_gzvprintf
+#    define gzwrite               z_gzwrite
+#  endif
+#  define inflate               z_inflate
+#  define inflateBack           z_inflateBack
+#  define inflateBackEnd        z_inflateBackEnd
+#  define inflateBackInit       z_inflateBackInit
+#  define inflateBackInit_      z_inflateBackInit_
+#  define inflateCodesUsed      z_inflateCodesUsed
+#  define inflateCopy           z_inflateCopy
+#  define inflateEnd            z_inflateEnd
+#  define inflateGetDictionary  z_inflateGetDictionary
+#  define inflateGetHeader      z_inflateGetHeader
+#  define inflateInit           z_inflateInit
+#  define inflateInit2          z_inflateInit2
+#  define inflateInit2_         z_inflateInit2_
+#  define inflateInit_          z_inflateInit_
+#  define inflateMark           z_inflateMark
+#  define inflatePrime          z_inflatePrime
+#  define inflateReset          z_inflateReset
+#  define inflateReset2         z_inflateReset2
+#  define inflateResetKeep      z_inflateResetKeep
+#  define inflateSetDictionary  z_inflateSetDictionary
+#  define inflateSync           z_inflateSync
+#  define inflateSyncPoint      z_inflateSyncPoint
+#  define inflateUndermine      z_inflateUndermine
+#  define inflateValidate       z_inflateValidate
+#  define inflate_copyright     z_inflate_copyright
+#  define inflate_fast          z_inflate_fast
+#  define inflate_table         z_inflate_table
+#  ifndef Z_SOLO
+#    define uncompress            z_uncompress
+#    define uncompress2           z_uncompress2
+#  endif
+#  define zError                z_zError
+#  ifndef Z_SOLO
+#    define zcalloc               z_zcalloc
+#    define zcfree                z_zcfree
+#  endif
+#  define zlibCompileFlags      z_zlibCompileFlags
+#  define zlibVersion           z_zlibVersion
+
+/* all zlib typedefs in zlib.h and zconf.h */
+#  define Byte                  z_Byte
+#  define Bytef                 z_Bytef
+#  define alloc_func            z_alloc_func
+#  define charf                 z_charf
+#  define free_func             z_free_func
+#  ifndef Z_SOLO
+#    define gzFile                z_gzFile
+#  endif
+#  define gz_header             z_gz_header
+#  define gz_headerp            z_gz_headerp
+#  define in_func               z_in_func
+#  define intf                  z_intf
+#  define out_func              z_out_func
+#  define uInt                  z_uInt
+#  define uIntf                 z_uIntf
+#  define uLong                 z_uLong
+#  define uLongf                z_uLongf
+#  define voidp                 z_voidp
+#  define voidpc                z_voidpc
+#  define voidpf                z_voidpf
+
+/* all zlib structs in zlib.h and zconf.h */
+#  define gz_header_s           z_gz_header_s
+#  define internal_state        z_internal_state
+
+#endif
+
+#if defined(__MSDOS__) && !defined(MSDOS)
+#  define MSDOS
+#endif
+#if (defined(OS_2) || defined(__OS2__)) && !defined(OS2)
+#  define OS2
+#endif
+#if defined(_WINDOWS) && !defined(WINDOWS)
+#  define WINDOWS
+#endif
+#if defined(_WIN32) || defined(_WIN32_WCE) || defined(__WIN32__)
+#  ifndef WIN32
+#    define WIN32
+#  endif
+#endif
+#if (defined(MSDOS) || defined(OS2) || defined(WINDOWS)) && !defined(WIN32)
+#  if !defined(__GNUC__) && !defined(__FLAT__) && !defined(__386__)
+#    ifndef SYS16BIT
+#      define SYS16BIT
+#    endif
+#  endif
+#endif
+
+/*
+ * Compile with -DMAXSEG_64K if the alloc function cannot allocate more
+ * than 64k bytes at a time (needed on systems with 16-bit int).
+ */
+#ifdef SYS16BIT
+#  define MAXSEG_64K
+#endif
+#ifdef MSDOS
+#  define UNALIGNED_OK
+#endif
+
+#ifdef __STDC_VERSION__
+#  ifndef STDC
+#    define STDC
+#  endif
+#  if __STDC_VERSION__ >= 199901L
+#    ifndef STDC99
+#      define STDC99
+#    endif
+#  endif
+#endif
+#if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus))
+#  define STDC
+#endif
+#if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__))
+#  define STDC
+#endif
+#if !defined(STDC) && (defined(MSDOS) || defined(WINDOWS) || defined(WIN32))
+#  define STDC
+#endif
+#if !defined(STDC) && (defined(OS2) || defined(__HOS_AIX__))
+#  define STDC
+#endif
+
+#if defined(__OS400__) && !defined(STDC)    /* iSeries (formerly AS/400). */
+#  define STDC
+#endif
+
+#ifndef STDC
+#  ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */
+#    define const       /* note: need a more gentle solution here */
+#  endif
+#endif
+
+#if defined(ZLIB_CONST) && !defined(z_const)
+#  define z_const const
+#else
+#  define z_const
+#endif
+
+#ifdef Z_SOLO
+   typedef unsigned long z_size_t;
+#else
+#  define z_longlong long long
+#  if defined(NO_SIZE_T)
+     typedef unsigned NO_SIZE_T z_size_t;
+#  elif defined(STDC)
+#    include <stddef.h>
+     typedef size_t z_size_t;
+#  else
+     typedef unsigned long z_size_t;
+#  endif
+#  undef z_longlong
+#endif
+
+/* Maximum value for memLevel in deflateInit2 */
+#ifndef MAX_MEM_LEVEL
+#  ifdef MAXSEG_64K
+#    define MAX_MEM_LEVEL 8
+#  else
+#    define MAX_MEM_LEVEL 9
+#  endif
+#endif
+
+/* Maximum value for windowBits in deflateInit2 and inflateInit2.
+ * WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files
+ * created by gzip. (Files created by minigzip can still be extracted by
+ * gzip.)
+ */
+#ifndef MAX_WBITS
+#  define MAX_WBITS   15 /* 32K LZ77 window */
+#endif
+
+/* The memory requirements for deflate are (in bytes):
+            (1 << (windowBits+2)) +  (1 << (memLevel+9))
+ that is: 128K for windowBits=15  +  128K for memLevel = 8  (default values)
+ plus a few kilobytes for small objects. For example, if you want to reduce
+ the default memory requirements from 256K to 128K, compile with
+     make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7"
+ Of course this will generally degrade compression (there's no free lunch).
+
+   The memory requirements for inflate are (in bytes) 1 << windowBits
+ that is, 32K for windowBits=15 (default value) plus about 7 kilobytes
+ for small objects.
+*/
+
+                        /* Type declarations */
+
+#ifndef OF /* function prototypes */
+#  ifdef STDC
+#    define OF(args)  args
+#  else
+#    define OF(args)  ()
+#  endif
+#endif
+
+#ifndef Z_ARG /* function prototypes for stdarg */
+#  if defined(STDC) || defined(Z_HAVE_STDARG_H)
+#    define Z_ARG(args)  args
+#  else
+#    define Z_ARG(args)  ()
+#  endif
+#endif
+
+/* The following definitions for FAR are needed only for MSDOS mixed
+ * model programming (small or medium model with some far allocations).
+ * This was tested only with MSC; for other MSDOS compilers you may have
+ * to define NO_MEMCPY in zutil.h.  If you don't need the mixed model,
+ * just define FAR to be empty.
+ */
+#ifdef SYS16BIT
+#  if defined(M_I86SM) || defined(M_I86MM)
+     /* MSC small or medium model */
+#    define SMALL_MEDIUM
+#    ifdef _MSC_VER
+#      define FAR _far
+#    else
+#      define FAR far
+#    endif
+#  endif
+#  if (defined(__SMALL__) || defined(__MEDIUM__))
+     /* Turbo C small or medium model */
+#    define SMALL_MEDIUM
+#    ifdef __BORLANDC__
+#      define FAR _far
+#    else
+#      define FAR far
+#    endif
+#  endif
+#endif
+
+#if defined(WINDOWS) || defined(WIN32)
+   /* If building or using zlib as a DLL, define ZLIB_DLL.
+    * This is not mandatory, but it offers a little performance increase.
+    */
+#  ifdef ZLIB_DLL
+#    if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500))
+#      ifdef ZLIB_INTERNAL
+#        define ZEXTERN extern __declspec(dllexport)
+#      else
+#        define ZEXTERN extern __declspec(dllimport)
+#      endif
+#    endif
+#  endif  /* ZLIB_DLL */
+   /* If building or using zlib with the WINAPI/WINAPIV calling convention,
+    * define ZLIB_WINAPI.
+    * Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI.
+    */
+#  ifdef ZLIB_WINAPI
+#    ifdef FAR
+#      undef FAR
+#    endif
+#    include <windows.h>
+     /* No need for _export, use ZLIB.DEF instead. */
+     /* For complete Windows compatibility, use WINAPI, not __stdcall. */
+#    define ZEXPORT WINAPI
+#    ifdef WIN32
+#      define ZEXPORTVA WINAPIV
+#    else
+#      define ZEXPORTVA FAR CDECL
+#    endif
+#  endif
+#endif
+
+#if defined (__BEOS__)
+#  ifdef ZLIB_DLL
+#    ifdef ZLIB_INTERNAL
+#      define ZEXPORT   __declspec(dllexport)
+#      define ZEXPORTVA __declspec(dllexport)
+#    else
+#      define ZEXPORT   __declspec(dllimport)
+#      define ZEXPORTVA __declspec(dllimport)
+#    endif
+#  endif
+#endif
+
+#ifndef ZEXTERN
+#  define ZEXTERN extern
+#endif
+#ifndef ZEXPORT
+#  define ZEXPORT
+#endif
+#ifndef ZEXPORTVA
+#  define ZEXPORTVA
+#endif
+
+#ifndef FAR
+#  define FAR
+#endif
+
+#if !defined(__MACTYPES__)
+typedef unsigned char  Byte;  /* 8 bits */
+#endif
+typedef unsigned int   uInt;  /* 16 bits or more */
+typedef unsigned long  uLong; /* 32 bits or more */
+
+#ifdef SMALL_MEDIUM
+   /* Borland C/C++ and some old MSC versions ignore FAR inside typedef */
+#  define Bytef Byte FAR
+#else
+   typedef Byte  FAR Bytef;
+#endif
+typedef char  FAR charf;
+typedef int   FAR intf;
+typedef uInt  FAR uIntf;
+typedef uLong FAR uLongf;
+
+#ifdef STDC
+   typedef void const *voidpc;
+   typedef void FAR   *voidpf;
+   typedef void       *voidp;
+#else
+   typedef Byte const *voidpc;
+   typedef Byte FAR   *voidpf;
+   typedef Byte       *voidp;
+#endif
+
+#if !defined(Z_U4) && !defined(Z_SOLO) && defined(STDC)
+#  include <limits.h>
+#  if (UINT_MAX == 0xffffffffUL)
+#    define Z_U4 unsigned
+#  elif (ULONG_MAX == 0xffffffffUL)
+#    define Z_U4 unsigned long
+#  elif (USHRT_MAX == 0xffffffffUL)
+#    define Z_U4 unsigned short
+#  endif
+#endif
+
+#ifdef Z_U4
+   typedef Z_U4 z_crc_t;
+#else
+   typedef unsigned long z_crc_t;
+#endif
+
+#ifdef HAVE_UNISTD_H    /* may be set to #if 1 by ./configure */
+#  define Z_HAVE_UNISTD_H
+#endif
+
+#ifdef HAVE_STDARG_H    /* may be set to #if 1 by ./configure */
+#  define Z_HAVE_STDARG_H
+#endif
+
+#ifdef STDC
+#  ifndef Z_SOLO
+#    include <sys/types.h>      /* for off_t */
+#  endif
+#endif
+
+#if defined(STDC) || defined(Z_HAVE_STDARG_H)
+#  ifndef Z_SOLO
+#    include <stdarg.h>         /* for va_list */
+#  endif
+#endif
+
+#ifdef _WIN32
+#  ifndef Z_SOLO
+#    include <stddef.h>         /* for wchar_t */
+#  endif
+#endif
+
+/* a little trick to accommodate both "#define _LARGEFILE64_SOURCE" and
+ * "#define _LARGEFILE64_SOURCE 1" as requesting 64-bit operations, (even
+ * though the former does not conform to the LFS document), but considering
+ * both "#undef _LARGEFILE64_SOURCE" and "#define _LARGEFILE64_SOURCE 0" as
+ * equivalently requesting no 64-bit operations
+ */
+#if defined(_LARGEFILE64_SOURCE) && -_LARGEFILE64_SOURCE - -1 == 1
+#  undef _LARGEFILE64_SOURCE
+#endif
+
+#if defined(__WATCOMC__) && !defined(Z_HAVE_UNISTD_H)
+#  define Z_HAVE_UNISTD_H
+#endif
+#ifndef Z_SOLO
+#  if defined(Z_HAVE_UNISTD_H) || defined(_LARGEFILE64_SOURCE)
+#    include <unistd.h>         /* for SEEK_*, off_t, and _LFS64_LARGEFILE */
+#    ifdef VMS
+#      include <unixio.h>       /* for off_t */
+#    endif
+#    ifndef z_off_t
+#      define z_off_t off_t
+#    endif
+#  endif
+#endif
+
+#if defined(_LFS64_LARGEFILE) && _LFS64_LARGEFILE-0
+#  define Z_LFS64
+#endif
+
+#if defined(_LARGEFILE64_SOURCE) && defined(Z_LFS64)
+#  define Z_LARGE64
+#endif
+
+#if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS-0 == 64 && defined(Z_LFS64)
+#  define Z_WANT64
+#endif
+
+#if !defined(SEEK_SET) && !defined(Z_SOLO)
+#  define SEEK_SET        0       /* Seek from beginning of file.  */
+#  define SEEK_CUR        1       /* Seek from current position.  */
+#  define SEEK_END        2       /* Set file pointer to EOF plus "offset" */
+#endif
+
+#ifndef z_off_t
+#  define z_off_t long
+#endif
+
+#if !defined(_WIN32) && defined(Z_LARGE64)
+#  define z_off64_t off64_t
+#else
+#  if defined(_WIN32) && !defined(__GNUC__) && !defined(Z_SOLO)
+#    define z_off64_t __int64
+#  else
+#    define z_off64_t z_off_t
+#  endif
+#endif
+
+/* MVS linker does not support external names larger than 8 bytes */
+#if defined(__MVS__)
+  #pragma map(deflateInit_,"DEIN")
+  #pragma map(deflateInit2_,"DEIN2")
+  #pragma map(deflateEnd,"DEEND")
+  #pragma map(deflateBound,"DEBND")
+  #pragma map(inflateInit_,"ININ")
+  #pragma map(inflateInit2_,"ININ2")
+  #pragma map(inflateEnd,"INEND")
+  #pragma map(inflateSync,"INSY")
+  #pragma map(inflateSetDictionary,"INSEDI")
+  #pragma map(compressBound,"CMBND")
+  #pragma map(inflate_table,"INTABL")
+  #pragma map(inflate_fast,"INFA")
+  #pragma map(inflate_copyright,"INCOPY")
+#endif
+
+#endif /* ZCONF_H */
diff --git a/deps/libchdr/deps/zlib-1.2.11/zlib.3 b/deps/libchdr/deps/zlib-1.2.11/zlib.3
new file mode 100644
index 00000000..bda4eb07
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/zlib.3
@@ -0,0 +1,149 @@
+.TH ZLIB 3 "15 Jan 2017"
+.SH NAME
+zlib \- compression/decompression library
+.SH SYNOPSIS
+[see
+.I zlib.h
+for full description]
+.SH DESCRIPTION
+The
+.I zlib
+library is a general purpose data compression library.
+The code is thread safe, assuming that the standard library functions
+used are thread safe, such as memory allocation routines.
+It provides in-memory compression and decompression functions,
+including integrity checks of the uncompressed data.
+This version of the library supports only one compression method (deflation)
+but other algorithms may be added later
+with the same stream interface.
+.LP
+Compression can be done in a single step if the buffers are large enough
+or can be done by repeated calls of the compression function.
+In the latter case,
+the application must provide more input and/or consume the output
+(providing more output space) before each call.
+.LP
+The library also supports reading and writing files in
+.IR gzip (1)
+(.gz) format
+with an interface similar to that of stdio.
+.LP
+The library does not install any signal handler.
+The decoder checks the consistency of the compressed data,
+so the library should never crash even in the case of corrupted input.
+.LP
+All functions of the compression library are documented in the file
+.IR zlib.h .
+The distribution source includes examples of use of the library
+in the files
+.I test/example.c
+and
+.IR test/minigzip.c,
+as well as other examples in the
+.IR examples/
+directory.
+.LP
+Changes to this version are documented in the file
+.I ChangeLog
+that accompanies the source.
+.LP
+.I zlib
+is built in to many languages and operating systems, including but not limited to
+Java, Python, .NET, PHP, Perl, Ruby, Swift, and Go.
+.LP
+An experimental package to read and write files in the .zip format,
+written on top of
+.I zlib
+by Gilles Vollant (info@winimage.com),
+is available at:
+.IP
+http://www.winimage.com/zLibDll/minizip.html
+and also in the
+.I contrib/minizip
+directory of the main
+.I zlib
+source distribution.
+.SH "SEE ALSO"
+The
+.I zlib
+web site can be found at:
+.IP
+http://zlib.net/
+.LP
+The data format used by the
+.I zlib
+library is described by RFC
+(Request for Comments) 1950 to 1952 in the files:
+.IP
+http://tools.ietf.org/html/rfc1950 (for the zlib header and trailer format)
+.br
+http://tools.ietf.org/html/rfc1951 (for the deflate compressed data format)
+.br
+http://tools.ietf.org/html/rfc1952 (for the gzip header and trailer format)
+.LP
+Mark Nelson wrote an article about
+.I zlib
+for the Jan. 1997 issue of  Dr. Dobb's Journal;
+a copy of the article is available at:
+.IP
+http://marknelson.us/1997/01/01/zlib-engine/
+.SH "REPORTING PROBLEMS"
+Before reporting a problem,
+please check the
+.I zlib
+web site to verify that you have the latest version of
+.IR zlib ;
+otherwise,
+obtain the latest version and see if the problem still exists.
+Please read the
+.I zlib
+FAQ at:
+.IP
+http://zlib.net/zlib_faq.html
+.LP
+before asking for help.
+Send questions and/or comments to zlib@gzip.org,
+or (for the Windows DLL version) to Gilles Vollant (info@winimage.com).
+.SH AUTHORS AND LICENSE
+Version 1.2.11
+.LP
+Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler
+.LP
+This software is provided 'as-is', without any express or implied
+warranty.  In no event will the authors be held liable for any damages
+arising from the use of this software.
+.LP
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+.LP
+.nr step 1 1
+.IP \n[step]. 3
+The origin of this software must not be misrepresented; you must not
+claim that you wrote the original software. If you use this software
+in a product, an acknowledgment in the product documentation would be
+appreciated but is not required.
+.IP \n+[step].
+Altered source versions must be plainly marked as such, and must not be
+misrepresented as being the original software.
+.IP \n+[step].
+This notice may not be removed or altered from any source distribution.
+.LP
+Jean-loup Gailly        Mark Adler
+.br
+jloup@gzip.org          madler@alumni.caltech.edu
+.LP
+The deflate format used by
+.I zlib
+was defined by Phil Katz.
+The deflate and
+.I zlib
+specifications were written by L. Peter Deutsch.
+Thanks to all the people who reported problems and suggested various
+improvements in
+.IR zlib ;
+who are too numerous to cite here.
+.LP
+UNIX manual page by R. P. C. Rodgers,
+U.S. National Library of Medicine (rodgers@nlm.nih.gov).
+.\" end of man page
diff --git a/deps/libchdr/deps/zlib-1.2.11/zlib.3.pdf b/deps/libchdr/deps/zlib-1.2.11/zlib.3.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..6fa519c5bdf5df33b2f17549e1df142c619c916c
GIT binary patch
literal 19318
zcmch<1z40#*8og65|UC&NwX~79a56gDJ<PAE#0Mb2uOEJNp}iJw@3;|3KD|gzo0&k
zPrTpv{{Qt||L(QBbI;5<bLPa%xzC(?X_Y0VSwO6uXte!%FLKbh0BivJr&ef!f<OhZ
zojJq;zzwZY1xj1mK)_A_ptOxK1S|<Qu{Q+^3!^zhoWRDmXzuCxI*U$A+_>Jenky_R
zjF^%z2*5%4XDY8<8S0-Zf~{xeY#q($F=t7g1UIzEeY+0|_eAV?>=24$8RuCFhh9H|
zi$0hvRN^^Km5ho<`99aKxmXj4#n5;-O6BBx_BCoZwCAR7Aubu+owD1I`CxRICp|A9
z&ryM3kt5Yyp;HOV{L7)Hy}`1ohXD0W;rv?W5m*1IR)qs%YE17mvG@5}^4od0&Y`0#
zK0?!KBmswNdzU(z@06vk&5OzUnsp<+O&B*)D__Zzw`51-@X7*|wgP%nZ(8Q!*Vjk3
zs8lbG)9IMIv9?D(EVs6|c{rOad)r`K&n4#TKbk8Hs9_#-uGy5a-C=%Czc}n;*dmlB
zqY&52sHI4wVCG@AT<FHaN~iU5RhKxjBe2@LzP;N#)*}Xd<L^2s<5^_=nzeb_hkGkR
zN`=v~Znu-TeK&*7d=tl9@NHmsTU(3A>FUVo+Q^lg(kr)W56Ds&>dwe)A({6uN77uV
z@IzeT@C!AuRfb4+n!R}~ytMTvWDEn|^9fhP=~Qcu;*l@+?m6fYkLxccn(BW+^az3z
z6nw{?R8r^~RFtdM$to-vz|zv-fnoRtd6cS&!MM^zwxi0Y1{le;gGNRf2GYFHXVt=e
zMmOTN$;S|qf4@Lb$6}VO{)uxXVXVjS#8mgIP1y>OLmnwClBB%dNHhJtS@PY+pk&q0
zQHIW`&gRj&hX-S2*tBGPkj<BPSI5Qc4h$)A+%MBnn_<!m(cd{+#dljtL_D{7Yg?+A
zNQCJwq@n;W7PzTK(PxFn-Y7OGs91dB-xufYwOv{;MG}ba@;E;NJS25EQk7sdjy+>Q
z!9c)wU-10R^1jRRYK7ffb%xfhWVE;l^$|ff16dRAQGnd?<#biPPp^&=yr_83KuS@|
zl?>AfvvD{il&><NCzR*S+aj|k6f-bHY?VMR8@)2z&OFs6P=A%*)8gEMn@1XHPF(AJ
zZ#nQMmB=g)v2{X<O>wJ<(v7?kV0ql(0?V&q2o?AWI+0QP$YMV0+fBGu`+vJ&zYp4@
zbkaFEEglgT^O1&O={Kj#N&{tI_}N&JyM?~5cpq<Y#Y(`uK0XDrz<nB9tH`$&Y&&ni
zz}1+)d;FnSk6JvdKh7}~maubCxjKo%KOxu2b6XZY=}=bXL&HF=Lxo;m?I#}H4!3Nj
z_3bqHhOMr~E0oeaVpa+HlUMh`9>K$1Kbo~ZveN%j)eFyT#ojviv`IqM&LuA!`;m-)
zsZ+tc@H=`A$8|<CLE^9s4qAoF7Bsyo#s{wvh9qazVH74{pIa>zEX585$*48PfFhaA
zwpSmk!C^ZPAn1nouywXgep4a5x{6Cs^m_9F8%6oe>_m?m8+l`b;6eg|xj%*ADuZ3B
zui=55!@z@?V(U;9C&utOn^%$yG+?<8oJht}FDmr#GsL(OUI}vBg_<xVKicxv(s|Nh
z6v1SI$gHvF>H<c!*exB)-bFn|7KfcgK@Eg?GRKMQprK_KRa!C+8|R0~Iwllz9co*=
zkEqj4*DzHcwi1938gx`V7jFn0pk{yDOze>AgGrH$$i{Lx=+|kqg7z+!h+p?j?bU&T
zT$0qMMt;GV6vGv2_4gID4^Pcbc-iH04^d_?IPdcv7$-cqSoEEKDV2eTV?a*dCp7-v
zx779i*BFk2SZRBPP>v6(uHP|lW|X5m0GS`fli?JuglKZ_F}jCJh=!VIsqgH;86lzK
z_wqf&%j>F#!_a+zpy!mT6rN4ngbh#B?4w`Dv4ygJ9!9I;I`qNuEWLShczx8n-LBJK
zaWODM`ABRHKe`yt!3tBeQ<Bk}(Zv`uKr`S}fwtm+Bu0@0MOFVm@pL2HKvryxerreh
z8m5)L(tMRyt79fby~2GX!sIiX_B;gN;ZY4gQaFD@F$}Pjo=pC0Rh(a(MQB%6IMSz;
z$%uzy{;T5Fa$N;NQ`Qp+8jUFWm?nccJ;kV(`50W>d7ra5w|zEgUictA;B(hNpozVA
zNy1ILehoK%HEc#<sU{{1qn#wX@I_2^C5t3ObcR*tM%=ezp>>4W^kw%wZ58M6O&^>>
z<r?IJP&B*zNp!za!0BU$BDcnX=L{aIOz;XOK=#nh*t6MvG>bA2_s%Kh4&6Rns5oP=
zXO4gzjO9+AP#iO{I2*yHM>=l{6*H`xbfzr9+bK(m$rw>5aBL+nvf`?~*IcE-3XLRX
z9K3vuf>ZxgQ8fTrex#<zxQ$?u>@_j(XQM9m8TPES2^x<+e{9`D(PkVn!5Kw8f~_&+
zN5F-liuS5{7=$R6Hr1*`TCJ@%gqReRE%brQ`}?}AJr*t&VvHj_6R44i^8nm0&zgj{
zx3v%oaq1sIHjG2Kr5)wu^U6mjEJK`xGRBI>)$@Vi(cV6$_|bV1td|_e9^!8+;kj4T
zCEC*JSRX3dmZ-Tu$akp&YI<9nAOdM6oJ(B}(#<EnfLVKoQ4L`8mjip7*f8RFK0ZDc
z&Y+BsqhFC&J>z_9P<L*!i~mLO84>%os(Kk)Mq~jaCcdsawRf(9@uc@hs$hk!PJ|WH
z^NM4e?PvKcR2kEbXtf#_bFL^)-tWTKD+iHu`x4J06#2dL<|^pt$Izxx^QGcF%@*~|
zd1>LMT;hZ&DvQoS*F=q%YHPk(ub&Qo^|`0oX^cy3=riYe^b7m6sEQYoQBCBLHkgSN
zx$sHtE=BjHhfFwjVNPi|1&XkhJ1Sm`>2Pj^JYnAp2|i?>Z+<JKh3U67?f2niw+c-5
zkmd_;HW!!O%DnppuY7m49920z-DzWaQlLd4#XWg=bKS}}{f(nWo`Q5d#+o8?_xY|N
zvzfa`-<C%6@~LM7B(rjQi!ijme$4^p<1VFoI`R=+;ZrFxD#VIQz&nsPHew<UT8kxL
zIbS4-OepygVx+&)52C`u3@qYDEr}0=ZTI(^dWM-Hii;5{ji&1L*a=YI-zV%>Jf=@1
zgan9MFIE}z4v-R{$5DG6Dok?%50~tA&xZOdNuw3&o|UFP`QSTYZaBm$!;%tPSpPas
zy^o+bfKyxi>vBxk`%1Ywvr>WkGSew-aHwV1pJ5rh2$lq5P?bma_ejkRgWug3pc&HI
zOVq%5C|C4M)DU>R0=qDp^L<%dCaPI8M#Q`Q>Sk?ib=A1Q#=|I3jJwTZthb@m>T6Sl
z3vBl#I44c)UejZ)A;rQW?(6y)!1((p7LIkI%`D=UUWqBO)idmi{10Hs6lMF#`$E%i
z%6j0^bd$S~NJR;hZ62D~Nq(v?ddc7k*pPUbX|`{ARL6jzL4jFgyzUFx>ijZVSkQxV
z&72-)Xb?xJ$#eL9Bj^G&_7!0}d+<wkI@>ZJbjN9F4I%gbu%m!g)io(;jgV0cSrY=<
zd03`j@{^qUaO<I)F2&9+S-Aw-w$jN-v&lfX5oA%m>41lppFk<ZgJu2Y@{_~h@T68F
z$bmUazwq!B%~X?`3Z_u4?sq)W<D*>>{@{1D1Qd&9g-5Go@L}Cn#V?FQn9fL1zx9|9
zR<DOv#dN5))qLH-R~`vHK7lo*s}982rtNgDTwu}gzqj!0RVj+r6cU?%O`lcz!=n%s
zgeT{%uAdlicNXgpFd~c7UzhA5=T*7-`uevxRJZHyDB4^4<+d6a7z)~Eb*-Ra$4pxr
z$1xOJsEEnfT1PxGj{L?+HQYM8)W$y3`8_c8TS6-D>|W^2X#MQy8wJB5A?tLH&F|cg
z)7kHNMTTQ}a5_@p;e0I%#sz{mu-E*vd|^?1CoBuKA|!ltg`QaHHo4I{_`n}$Arj&j
z4Vyc&7*R=GSp`p9%#&Zr<T8fu7Wb<^q?op0)j!$~CdW=(qqF-Gg7;30L(+rv%Qt&W
z5sAqsoEWcRzF1GYAlvZn50kRbMdr?4jSJHkUh5C+;+R~D@z?6h3-$JJ8u*y2&9THZ
zmSQio3SE0+r*^;Ml)CO7KtK%^+-y4~egAyJM^$O@{nU|YKKUYDTa$oGw25N`ku#|G
z*zD=0Rtb&XhnySKa=54TQ;fK#pBWGqB<HEHNiexxw^`W&2yyzqV+)?VZmfJcQJ<e%
zk`^JezZv>=_nbnZA!mDSq&ai2?)%nNvSxDolEQ<NESh%a!@W(72YSV{&nFyhT7aRJ
z6_PJjNeK%V&OTE22HL+v<(%LRpNJQMPo85FR>D=Uok`A=&X*U|Y&wc_)@gb?5GbhE
zI}m7mNOvlR@oApr`6rkr`1eh)l)(*(5{2sTch=~FUa376%3s{e{frW}awf`s{}DqA
zd;HDQphea1i1qiZvs5R0iyKQj_%C|pA~=t|of>(P)j|E*%EEesQfH6n4N4T#(TVAE
zA)Y)h>OADMhX$xl*097TP)T1kBlG5Y6g4^3MksVs4u8G>A*Xo4-Ep2Uu{|AxOrX)*
ztb&RcT*3UFD<LF%UEGgy$J4?&j5r<TkvPw0fuY!16Vs}mE^;QOTi2{H29>5!Q7318
zv_@EeN2!mv6ong7O4=ax^*(<Eo#cMHK=|n-`tj~qTlmrUOi`=xQ$hragCiLT<IP73
zEmO<S+UHy^f|>Mn&|;X0*C<220h^Rkj_cJW`22L58Fn)=hD!9#^H)+hA5@^&^&LRe
zM0Lb3YV#&@&3p&J2xK1tI^S}ecX4p=j=$l^Da)p>Z)Ou>J02kP7?zs7UwD9$x3f4B
zYBvyPql#x>s82a&b7JC$w1!*uRr`E+qe0thah%&Gpw-o*1ht;fmO&LQm99FHP*b@R
z5h0%v4rR*k34`eKx}@Cd<j5Ms)px$IcIBS-U^~;Fw};Tu?Jec)jpXew2R9oR+Rr)=
z&p+-rLA?LX4X2CN;_Im<9PiA^F%nn&T&=iDKHy$=Htv9MP)Cd$;_=G<_$$x_`$d=O
zMM%8wsMX+WA^0gDA`|)YgXvPuhKnvZRxMHANqZIhn=jnPp+5Jkx<9^^PI+N-$zXN1
zdk<aJv)XHEPAGsjj@Ed<^;8<n@Zt!v?EbyY_v@~VlJ}SSZySfgw)L<Cc{Z9I&jD`*
zBL|Wq<_~<<-?yG;uGl;yz)7x_bLg;BvNv)%pKWjV?msa2-gjUq?ECrhedOSZ>1ZP@
z{Hr3Z>hhS{*{i|=mzkoKY_uS*PcGi`+zJkZ!E6OfTjgPc^htvDN?)Y%1VwFJKWI2<
z(S@>-F=6%lYYYUHPHER4_<VUq?HLOm*Fd5&u7oVxl)4I5w{L)-=xZ(S%^!N0*PrxA
zQsTl8Dc*Ru>5^i3Y#SUh*`C?t%X!cf+9TFKI1+JhHkq$!^Cle?t~W~^YH69o04nM4
z!Y_)V99lQddY#c6Hh{lxFbE`fSCb*X+5B#PZgt-&KY@=#c0`>w!)M&OFh^nj98!<C
z*<SegT*%sA(Yr)0-JyU?yCaZhWbYH!QNc;<H^6`@&^{!Rk6nyvI2O$iW(h*$D~m+n
z>*t%3o?Y-+BL9toN#%4O8WCmgpd4wB)9YiGP4|vOl%-HJ(d{R_A-USsPb|Cg!$^JM
zA}xgBGAhimUXr=gnLkMtxrn2Bl%Q#OwoOtf@n)P4Wvfh1u<?ujStJD-h)-uVj9+B?
zl|PQIoX&$_gZbnUe<4>}#e?eV*)s2*^|<$8xbh-Pi1trEpy8juxC8ZkOkh(wU}xi>
zFih<}7&LRJ(s@~y)XG4R?v+yVcx;Vr!|8d)?(w`_#8?eyIwcZjNRmT!+GZ_Z_!hI_
zLg}}jQN%p0NrMRQH`#Rx808e+sq?-pb4u!iZyY?@(q}qkPGF17RcR3TT=%BVkRHl(
zn`W<0I)xt~WfT<k34u#2c6HpOQZ<=H{I7N2(x=mz3o#zUDxdAX)}+s*C006dVei@N
z?CI$f*T#H6%T0v8^QCPsG@K_$;_FHzYMkR(?Oyd{Q<L5#yIhQwn?-wwh)$=7@Uw|t
zQFB~GO~J~N88=BGwJ-GFj1`x=5H8K-6=bVn5E<MRLhzMXJ_Xcdun6I2lT&#x)zHv}
zzD_meMx~;w(Zfkbji<mzEWM5*Rf**}gSTGrk7!`MdK@olNwo>wV<&VJSRhM{n)HxS
z*`%E|5Pp-0&0Hqvwx1PknzAC!8OE04{4L|qV{^^9h+oMcY}(@?r)VZ8LA$b%gP|;%
zit04tRm)c!9>4}v42_iiwz_OvttC1XvN;8FW&#pKbks5+GND2x^uAn55f3t-S54oN
zJH+AwP?Jao4x%kK1aH1kP?4UGws>CKJ4U7`I?O8}9E*zgs8EG54>K|$2e#8Tpl#By
z4|n)g&<Oo;9+<<8#@tOEWg*N8fy5%>-V6FyFguIYOq^Gr-VQuePMi5a{h<O`rEPT*
zb;oY(rFB@<E-lhdN(e$|3@wu;=a*-N8dpo2Xd{e~Ep3KQh#w2Y7zsa2A*eV$v$C3V
z!n*mMUe0`2s6+LT1(G)AGo~Jtkdaj4kdu`EL5{1Kv(l>%EBlFhQoageZ5(f#DkauU
zqF?RkEIR=s`LJ!pSI4vy8>5U0-C1-65w!c{GMeR{NyRP=NNwf7_yo_909yn>^9mi|
zxdKa)=QVio!=umjG$Le}^e9zearD#kq?1w}tHPr`4-mTViktU#hI)NJ#XN)}7<Fjg
zum`rA;>w0J>WxF^hmjMq1PYdC2PByT+cn({6JM&?x}WVSQyMHj?pZwWD~pLX_u!gQ
z97MOm?~UEPwis^8*lh$w?+0NX?45pBjvMn}q{m39*8BF{Or_MB<1B|TtS4G4l57BL
znv<pD5t*d(i$YDJZ=0MY<|z*>UtC|7Gg2%$;CS;)=57vKB`Cogr@!%c*m-qr)dsf+
ze$AUqwDpA|`oxYKHpJYKT?x&e;C@PuIy=X@md2j_*Q4mGMTD7k8%*MoIftkCS)vV*
zrjYw9@76vkjdhD?ocNQ})H}ci3#hG@%}zyY3IVDFFIL13-c=Ch>J1r>VFtKff4FBv
zj3vd3?ABN~v_^$b?(6(SUksPuhBKP1eaCTIjNzE7&zyF$7k0G*;BnH}kShoF3b9j4
zNqE;!u<1(`jRyb16mul#0ZiX^6OYc^3uJ|^3y9EIE&*-w#{e_O3oE+(aMoE9?}jLA
zoc1P}dJ`qL_ycLbsR?*rPt7c!kdYD07u`lIqO2p2T{U<<TI6#q1Gh3!v9WV}glbu=
zKemONEyV7M*3u(4h3G}T8M-JbQsa<Srdn?I`3jsW<&q>bmK5M!Edh=D+rjx)O+Hi@
zmgwIs2TBGA5s!<}f2t_%&%6>xI{UmY9>@8S5mleI;R;@-jF{SNJto3|OQAi}L?%4T
zldS98`_uZUcbnaYmFF}k<Y?0|qa2BW+Xruvsk4b390v#Oww?_ffaBgzVRl5~49B&b
zykZ?PYr<094|e|S-O<}t11oG@@{(sWmZCt6w<rHH_g!H&T3$y5+apb(2MXR&>itJj
z-x-W{K>-x^_+`!8=4@~L+rPsr%-LK0!zZ}g>~Ea{5FZcsFQ<U>PoF^D!vPFbHZ}(X
z6~U&K#^Uzw09`g{GdGAGz{AI`j|Nn=hd?9J05)hZC$Jp^z;Ww6K&!#d_AX8)U}tC`
zTiMCpL=6nl1wupJ0H8Y99Rh`ua)-#M-GPE`$Af}M+e5)Yw*hcfG-zxaC;`2OVsr-l
zQT5Ad`Q^z#asU59Am48UvfobsZv@_XQFnDeDZFLEk0?HLzEBnd?s(`70LojMIs<gk
ze#{tj7urYD2SBIl0-Zbi?X-W*@ed3?5l9$AjBV`Ae_{H`W<@Z>*c93VVE;Rlesk>Z
zc=QLq(SS<IiU5$X@UM}66Spc4+AW;wZ*=^2AfXCkM}x{8+746&v7v$PI001UMZ5DD
ze}lQ@*N^D~|B3OAfxlY+Ta34({)7=Xb_U<73Q$8ySx!ox=@Hn*6%4U7F=kPEYGdi>
z0=`ouNwBktlcfX1-U$G@<JAvMxdnu3nx&I7M8d+@3BbV)R51Qo2C=gPH7!jc7S6h8
z>^!$~`nUIwmfLUW2kqZ_{8sm~ornFF{=dGtxNfBneg79YZu{PS*lr<yK>n&@|B3td
zmzRU%M?WrZv>&}VZfl@Wf6nLj3W|e`?bZhEpyx=IcIMD&+lkpZTmEsa=JFJBYx%dD
z1J#s2l=2qz?_$tV6PMM}`b`Yt_BN*fkO2q^srtXmfS2#T5Wy`K(D#4AxLd~m{^91n
zRc{`s@NR2=F@+lhx?|m4PpD9T_Ja1fE#0=>w)_|sTJ!&bakt9!cfM(8t4PU8{mHk#
zv+Qos`QJGP;^Fv@EW6c-TMfAV+>(Dg;oI`<FZ6S#Bfs?MPp$f0!+!SsIr&?ghc3`}
zT!C`t7ia(A(r<jdRkPdrTeP=odB?#!&i>;5A5cH}%*V-j2XVW(fPTv6cKADTptAmh
zP$=eK<=gf@%1}JF6TVY-=<nZ+;kMuJ=5PlG#dS;BEzGaa4>|u&`t@7S|3U}<!1s%Z
zKYzJ+Ie*h60Q=pZ0Q?V@ch~w;gMKXMzfAXbIsC(J|8B>UmS$#P=(Y^qhjr07_<)Ws
z_Rv#gOLGed8i<n(Xl4mCvq8JnAX`g27iaz7Ed9?N>`xW^b4|U|)yKxR;Qx6M7Ki!^
zx-8HwgM}UHXz+4Djgb?ie|v^)?hIXe(STykCbvEVv=+ME8#_D#-;(y@@^-M>wp&m%
zpe%IrvosO2Gq-`d8fZW@2-sEw0AlAs11efNJ42VSyM_BssexKQ(Qt9_L9s$td8qF}
z50n5Z0F8l9fhIr`ds|y$AQ)&4v;bNHZGg5wJD@$#0q6kbjJ+w)3Fr)TwlQ|L06IGu
zn}C53pgYi$;cmvzLwGRMdhXV;KY4X`{ZlqSj_H3trH2yx`(g?u<HxZ+I~yB^u<#!T
z_)nf#E8n*y*nbF_d>+O+pCFk|Q)i;R8)mO*`V0_&f-*ot86}<q(}@j_w2=+SI(`F!
zgM)(&%mEM~km2KXt=w1Nz{ghBOGnf!cPx)BFRv`_E3s9!b}ju5@jfrT%5Xj?c6J<|
zLg;o}nvh~tiYYN&HD+>^-*`RezB9a|y6~WoFm3FE(*#lP)M7CW+%TV!ola|ts#Z>b
zNcPd!0bAI<PRCG+Trw>B_%+j56~=MHYj3z@7DKy}{=tQ5>zPOzSwki&x*aj?Jn`Cu
zYuT6~EP9hXw8TPYX<T3B4Uz(0dMCoKkEqu-jUA~ppQPv!-1u@~bQ3V%<7HgGSJH(`
zd$2k<F+iUx*k`uMh!e8k*HT5B!t*imtMSd8D>I67kA~6X6e8z7JvPl@%##kmYLB^R
zHn1Tk8q6NUc<4O~4kRtuhg`8~M7(mNh-yrem<4qOfkE(}BlM&$W3L9!%?QlIIRt7(
zibezMkjeI~qb9T`2G`LWqX!ZzHbCh+a!;f7BzaKmfb7Mec0FuuEut<zyN||}nP$VE
z#Im$yjd0O^;!y6QK(lynaae+Zg6E)_48gIHpJub=h-xR2fnO)Rd<TKIr=1Jlcbscx
zdI|E!#Oo%mmnmq+)F)}9dvmJw!r=nBmclBsBzfoDs?}fa{SZkD@zbruHCMnRdY7{o
z5TqeX!`<z`;|i<5s_ZSFb+s(!0xI>~FQF>w`#q_Q1^&s`Y}Zy#Tl4H?axvMQSFU1M
zn79`aI6lCcpb9_^qA43%D46J0M2<e{XnwCtOFo==q55`!H!~q6?+qL9Dr`j5drL6l
zV{7sC7&pz{Ga-i)%y0KXRwgl|y=l2}Cs8#Dm*J%QNT>yry)zb1=X_RPbku~@%eXb|
z(yhV=59_}QueU)veeya^Jqkx8rY!18$o<{Rv$Avf<$MfW={<}HSNI|0%2g>D56yjG
zSD&54HFXmHWnzX*&Oy*<+~RY+o0ML7$ST$A?L*0j+TS8H5i}v}Tx#A^3nXW~O#&)Q
zwwplh)f#h<j%!=ui(Eg)7vaH6uuUoTE1OSB(6fU+IooP~+(<o&m&C@&M+?F}YRO4a
zzxPmQhhykDA$k5Advx|=EOX|7o^H@!u_J1Bakl7wj0VZ^bnqvtRh{D?MjlMnBE{CH
zoX9L*B2gb*r40?-9vk^4t=zjvVuTP9`X3AjM;w=&)JZ9d3QhHgQiKjKDP$v-s1aMH
z*?qgNBwYx~uS+VEkVnx~9c89WAE0KH6lwH$IV}552yN%v%4JG(Vlrpp5W<50_Au!q
zM4xZ5Ps^n<3P%Z_kC8qNNGA@9HQ<PHi_VRmC2gvUPL7R7)E4b6!2iVQAXknfEarAx
z3Y!)7VSVWvp|t64uLCZkiK(%on5E@T;|Y(PJ>0aTJaKHIS5$9GMgpa*VAC~Grgj2B
z5;-$g)N@oAd#g#Z&ywr!5NV?Wnx5K@pCWxq(N5CV8tF{pcrZ|jn;!Hn{;O3)XAB2$
z+C;yDKkM0<=KHbC=~)ew(jnFiZoC#IzH&Q86QKkl0bNo>Dl#lGF{z$&nws#>x}o2~
z5@{|&7@o2dlYIu_+R9K?red$XXC5*3CEMqVs((H6)~vh%S+?-0`*4X8sE6bAv!E4j
z0fV+|qS}Lyn}v@_2R9ETyJ&(H_WP6M&hlw{QQ{`mCO5{hl%uDurTa-N(7vU)y>1p~
zPT0dp|GYA)+<hX0FT};tWv#SW3F(NMUENF|jy$2g8bfnBW8hCQW$9ChH`EWoiq5^-
zW78cw6m%iJ!Ve6=y3EToDcww_1q)Xp`NSachPn?Ck!tvBF;#?@VA;y;1tgDo?aX9E
z)-8DM3k@YjRi7+w4O}wUx@^d9#@rCDPKTD)Wrzy35o|h>KHoG$F%ROHK#$_}4s{v*
z^7Och+TSjIvdc1(205Uf?rjvpNqQdP<cV-0mJ=vvy8B{o3+9t(amxgbBEDY1vOTG3
z)hP)rs}}<00S*SN>x$u~8`oMy8N;S9z&VAqOo9lVae@ypfmk%+rE8d>M)->Hz6Oy8
z;Rg(1V#Jbm?G_el9uB(T-sw^Ls4(-q<aY?&00B~_OHy=@$6I4>Ps^61civvyuD;d7
zo=i5eNM0Asl5C_}vLN^N>;8VMlEp;??Qk?RN56KI3xV#^wBWC1g|;J3@lKPJX$uP^
zK~;p_Pk7Q0Ly|s?TJ_|=(XijqN8pJVJX&$lC$v9#v@rV6w6-uA!lTS;Tn^-~RhnbA
zboXpn9o*s@H5J$;O@I7yCF`QHEj9X#(R5<=L}{gA`t4aBEU6}114sL{QdQ=v2v$?8
zaT;%@b@GZZzmitg8|^Ef^d1!k`H7@&xQZ6|i?c~lhFv%B3|=mDdKQ|T9ldoXPzyAi
z9adl7d+B&Zy>K}u?>d5E=|%3M+wdvTSjJh^Sv6QURVmYYA$Iv5^^v`yY8V#0=|Ir)
zVS)PN^jEnIGuzoISksnVp`*Q;y#&F2Oc#BgU-Oo(iWl(HjSsW+n~UGcqfWy{l4NdF
zvr)v!?BOOn9Z1j-8sV-@YHtersN>y8nYQ80BeD5`Ik8EisTiQq{FOP;a#v}mM?bFq
zm0YH!SvJ3tW)fv4htJkvyDtamylY<Vfufk0fx)XrzL0CeacWHb+k2+Jp1(l7+CT0B
z?tHl0o6&#sss8gF)ty7d2K9-c4mA(=@7@$Q)SZR;N&l5Y#qpo~DG>L+`BQ(mP+~xF
zs2e2-lmf~C9|2{d4wWMCF;EGp3{(ZG0o8#|fZ9KNroVVgcL9Unyrti6<^Owc3B<w6
z`MbBI=B1#fNhFedw#;2Iga3xVcfLe9>k%SeZ0K8M9EcxW(@0AAiaSF-9DcEhTDbo<
zWfykvSH>z32P6V6$6t{<mM`S6IE#(l(Rbi!&urUddfcAgr>AE!lOF}z4?lY)NH`Jp
zI4<R-)b0gv%Fp!SeeeCar<5N@_<3ajLU>$(p}Qxur$Jt7szqy`#`>mNJR1r3p;*u2
zqdv!2S(2ob2Cb9V?CdQ3nOxx%UoXFoc&MSGp`=&J*WH}w=)O$KOpMK)tf4@(od3vD
zP)jQREQ!Izx{Z~Amd=o{G@kXs5Rr0&pwWFlE?;FF^wwm3O|wEyL8nzZbc~skB$!fZ
zBlybz@>eSOV;N~qLc}alPOK4`P7Bzz7=hfd4GSFp$dE=hJ1n>5-nX>#`D2?|Z&{K-
z+D&@`jSVf*%cIUMqz|UgAU5zkV=<fD&-Y;Mt1~=LjPNXi-}g+i`D?sLbw48H_e+cv
zrJj&9Mi@O(rDoHgqr>B@2sHBJKgx^maE@4f+d0Svz*v<JU3|Il^0{H>`wLf{*)G?7
zl1A@U_ub0LqZb~8YD)GNmlsD=WqH?dL`Bj;*sC8cPhL(Ff23!@#3_=#Hy*I~_QN}y
z=h5<uEn4!qh;`nq_4Bzlq>JE~SnMLgRgSpfNfdvp9sN(}ZXOR<MZ*K9KXqmqY3g(&
zss{+n_VMo>#==$SZAE_EOUe`QXGt(wR4W!ZEfCnb=aB39C<H&t6Ls~8P6?qa@qJ-V
z_AvHK&~V_2u5WixC>etocX_<vATs7`f0$eKxOe<&6b$>bn$^&Qirr`>HWZ?ZJ-Tq7
z8CT#{$k05|nVmyaMA?W0?6-WB2MSwsQpyHQ<DR*+Zkf^WOnUcMi7mD{P-yRqtzUQ!
zOX|!wBo#Xh!yQ!lbvhGQYlP47v+L(=6(e;8O-$)~=sI)6#XH@fS9LjJay4FQ#c^n#
z(@1bu)wLFS)A~>ml>?FKLgmz?DIt4e$A&-IF1K;G%Sh{L<5O{`{6enJH^Tr9Kzp^m
z;euXejS5TITbK|AyBCl6Fv1~^R;4?PcvM6dM0K<LAM`0;aMKiJk6UWI=%IUShnt}k
z@X{}D*)Q4)&}!p*EiOk}Z?j5fzW9U<I~QT#VJnq_<Co(yLdl7@0-pVZ1oTDCxi8uz
zF5RshA16Ie^vst4KEjz7@gTKw*dwrDnjcZiMW6v!O3~mebeL(3$*)+tmTm`2yk2{%
zA(`)GOn^1S<#EQk8&1CT1+Kj#kj|^(Tu<N%O~m}5K*Z2-J&<IT&g?0e*oDwowVeku
zCxp$uB<E9Dp%KFRbzv4c&n!}Ns-M}%eIeI5*l=~&sjw<lKTF@|epuUt!dcE*0sY$8
zDx+2Wy@3V81Lu<*-F1(#KFx(matDMXjjVTxm7*M?xAr@0S}8`f2G2#ft${ZYWjGSC
zzm*#mGYAJMC3Jn9fbaX*d7tI|1x{N_ImgMDf<qBL($K_vSX=b~K9XiV$9ohELK=|T
zB1SCp)#0Z#brx4!!L%e*aX3|e5mNyX0$4SxbVmuXgn4>P=suSs7R>VRaJ}g>q`p>6
zub9jTFP_y_4Mu4yMk~o-9mxsK8nOpoCze=rh@b<=nT+uV$OC}%xIWqBm^zG8J61K;
zj$2cii%O?2Aa+q@DU_St(nv&4<|yZ>;#6>6Ykfi1YI7#beVj{sE^WIWz%5tAPCzYF
z{dD;f&-eRjc{j%+kaMe+zr-#sf1JSV>E^w4<E7cL`Q>p9H)1SC+3EmVb=zV(#V6Qm
zt<?rFv#43#yK-Qu-q_=zp$MnMlESqy&CRRQ((`>aMP{{PoJ+#}1nk*=ZehO#cK^p6
z>|bMBKcWH9Xv<wRfR76rR{Jv=!1-V9UH>^4zzK~f{4*HvFPoS2&+SVNDF1T{QwFO1
zv4v>>HGx_{9cZk;7y=C!JasZQu?9nIz-G{^ALYCF$z2@5#@_B{G{F=aPcXZSr&zi|
zOE!1G1q%-c3$PtD#$W}szKb*5MOmQH20KeTFf`;~5B-LQ9d6G}?(pB0eqjF@i*Woc
zV)G*~0k*X?vA-SH8SDzh{ntPR#KH*-4OqC@16_bFcBWt_XA^rTFwhm~2905O06l@8
zU?=;(Y|eL~@!vM*-@-os{pQTc#r1nABU%exD_VQe$FTVvKxW}ZTkRG>lc7!x0VKwu
zWhgM@Q7<Is;iWpJIBLbJ5lzmG8k0`2VMyKU227v?>obhlgc2itIB)TqIrx5adN91x
zUh%!D&Du@ht2MNBrsiPF=U~fVJV6C3qQG)JW@`A`R7>Na>_lR#g<>SUS6HBFq{GZ%
zyh|_!do1Ps+!B9pES)g;s}3vYqXWf3iv<yGK7c?Wo12}d^s8tIg^e$()Y81yF8%Lq
z^9o1OyF^~iAr!Etr@aE3C?S0<PMzl~nJQ}uYb@c5Bv_E71+&zm)`Tl48YZMBT)jxc
zlM22x_ZFlS0MH3Lh$$cR1b2E+Mn2pOC7@9>5Tq+(<7=U(rQ))+x~z>FXjD1L+3Gj#
zD=ENo=H-6&Eef~rG<=k`M(@3Jlh-rTbuP0q9;ta%wIWag8CT*1amC!0;%s#qu#%KY
zcag=Ae?)eOT6F?7SH=S|Zw1Ay^deqHKc|AVV$q@Cmtu%26;qOoDGy|Wl@F$9h62&@
z1&ar#WYmecBG3w3icqXLd)0)B0~0`8DaYCB2@Hu+UscQ!__$tuQasIOrstbZ{v<Q5
z?rQ}DR>aJE7|*FJJ*8C=#xM0E?_`HgsP9Y4d~Qp>Jv)<jLYe4{9p}77;<#h3ArZ03
zI~tBfEoAQ4_g!r(QEvv9Tq~YmMPqFdo}3DKokeYb&8=?ujJtkjpjSCzFze;=QY*D5
z)ilk9+rxx^iE|i|Z<%VDF|TR1HKI6@F%miKmO{bPZn121^ZNW^S+=$E?DOu4ROYd;
zh?cvyq;^>VwHd!=UI_6>PTEU?8i205UrnkT^KywAXfCK%ph<nW(&oM?M^(OdhrBkM
zKX;!(nUodiHAupCd~9PDjYNleQ--AHeSnV}Q~JjZm+x&8M_<l%HrNnvlp-y<>p~5x
zH3!~5Q%PrZg!|A*tTBxkOIt2w0y_iCrq#Dr{02?QpAm_J`Pp+=`$-ZG(LPiMe6y8O
z42QBW@|X!$M#X(=u>|Cn)U`>+=Pxe(Ls}zQ_e%J^*YYl@nj-k)to0usIei|DHL1fw
z%92O?kWVhpNB$j*z2?YO+Utg(g+{p~1+kdE=|x6sQ_eO`&@0nXj%nt#o`xK&qWCb1
zGT=EJpTGQsK=I)s2qgD<zvbb0ak%jKy}Et&Ip8bos=!c|dp%-gu56^FuUbPtV_%Nz
z?&UFmzW>VbbN98wp`}j+Ypfyw`Rnw<@(G3Hq(U4+%k2}L)KL{laRT=yZWAR=(yyj{
z3aNN8cA-_Eu7`;dpWrJWHz%LKbxMA@+$0ZMm8oJ{PkN{SGXEv6@N6;y-xR((N-)EI
zhyQ-vTPf|(L}X{Tl6t8V)uNFc6OO?#S_TQi1(r>hWQU|NG~_uB&@}ok4i>zCACf)F
zHxy5!eANSjFsiM>ppzH8jqwyrm4P{It6RG!jucBwPs`H0HcZ%%PfA~8;*i2v_K-Z#
zv6(4GxPAr7K`+Ic{$SJ>M#y!k{1^kLlEXCfJy2*e0>19G`Sl7Bx^age92$Uv#2+q{
z=`%=U5zG?CV<d<yLC(L1LMl9pgQarAas$8b5PM`@`z9`^lIa+0Z9_3xt$_TMMKnG&
zD3H#mom*F#dxrFlPJWLJuTA9%!7KgKPA41ok;QJVHPjUUc`PL#4cB<{K%@vRNTCK&
ze6}Jsvh|n1Y%R#-_N-g6WSz8#KUZL3&~oH-W+}@7?h*5*$kKJk{<R*emR<|o(u20>
z_~vq;f%fYqex#99k?!PVCGSqzfuigK3Z@PcOdko2s`2f}W_6tz1-;3U9OwP29Q}tO
z5oTRY4V@oM)Z$b)GTHh;g=$$1a`P=0S8J_b=|>n@eU^h#iv`X$n3DFt*=%hkwjk?9
zDDD<^0^+)F%3bVVntNL}81A$(UA!$5Dw-&{T<kt|du%FIshP6lx@l<?jdagyn<L21
zF$b_xY658R>%ErdHDUT56S!r@WTrQ4C6ghD@Exgd1@3i!?5I^Qx+^TNQ7-ph^pc9G
zfoWG!7Q!&VW^i4QDCj-lVD{sX%sHMHS*kTcHMK`{j)~Cw8LGe$gt8Y#0~UfbY!@=E
zm?X|rrFQQ$cX>4EAG}eTdDc{c@_Fa&TgF7cOeb7<48IJ`Nfw>zWQ~Wf=u%d>%O7tB
z?4Gt}EeHD{*{1h|_3<*W5V)`?FR<VE=Rd1X`Jl8U^kCNExQ1Ir>{WbRTVpYH<G3gC
zXbtNpo?yW$)F%{WZEuxU9tFLRJGpGcc#3M9(X9S1Uyq1gv3t-bhW@}TU3)L<yq7x%
zNGzV>!$X~4s%>6c@Yb*cE^eX_ck?YT=YEZcRAcbGbL0dzMS@d>qQ|HuduVi8zXU~$
zh$jL8+GmCHv%Igp=+_;+Z0#A#mSNjH7~s&I44Z+DL`1sR!_H^X+n2yrCX>0s?HS2T
zyHGL~_RqUk#=Q&E+s-}aEb|_ss3HyvD-W12q?qMoHJOLCD|7@h?7=)A_&th<KVvC@
z`xyuwcPG%35i2|T<k2<X84^?jIB*b89yb(z+jZSvmY>B-dwOp}U)(U;c9)6!WR5%l
zF`@M}J&(QJ$8_4yVOWYns$O3urMB*E*6eTQ;|pAfOkd*6-?-0Fi2{!q;(HpABdvQy
zi~Ju*G+exRWh^NzM|4j3+T1u3#=lCoYX(l{DSDV`x)w?Yn3uFPm5^CWj7n=w@*}=A
z{gp)>(K3dQWTVas)p&W3Ji;;Sbf7?b3;$Z$LYcI+G6a`06~-rb*#z9t&!?uA;1CAy
z)uKD*imq|-9j8nuZ`{%95;X5Gk{TUE-)C86Km{{8v3(hbaY|R)D{=bR9cC~>0A1{|
z^+blS5TC#tQ2O-C;ANMpkDz8-&a+c;2sYV5)7D$~X4iwS$DDY(!<XN+CuJUqy$j{(
z`h=cI&cd5uK;~)zS8S!rV<QO}YM4Z_$$poMJl3GF(@Wqr@)%9B#rg~UHfB=tOJ6Hq
z4s&s0mEx=S4kFvwJZ=L$Q9dy=aJixPGSJXTN4MGgQ~8~d?kn(18c}Lv;mWJzTFg9(
zRHG?={R+lh`<bqNR}|s(14j<z21&J#ncF^N(^ihOu%?{L{bp0rPaa)6W(gEj^W5`{
zu&ip;bgnumQ*DE+dEX-pmldbO(Lm4tM$diSTz``XJ*bbkG!8A=j@dzqkrn!)qwzsz
ziv3JAkF3n1<j`oDByI_T@^ochG7?;+Ao@Z?puN`WK<zNHd+_*dR4e4V+P9E}*H2bn
zsHz!N%j5q22?=H9myN(Hh0fI{p|koMv$N=FhWgF(5f}B=@#%>nXoQ2cGtm&_*lb$_
z6eUcrXS145(s;65n(^Ekd-F4suunhBr*j$XkmD_7DOup$UufP(f#y|eU|z=)ylRb$
zm3EQv6F+0**EMkYHq6sJnk^uR@nA71l>UfTwpYrUdJe66sb+zAdAXWniT3$w8PYcO
zxF4i^ecz#FP*qqN0x*tR->9^ENUNdny3YyACdymTRY{nnv`nsSur<gorv~5L?u3uD
zUN+T^Fh#@BJO3$Ob8j^RoYqv3-M)zHW!k3<7nETSc71xfcRlFc!OtJ~+5~S>TzUE6
zox>n(>z*7HPh_3PHSiYWyWzGPRy}sGYq~m0A%L|<M4Tf?cYN@;Kd_pEwI|Y;&DH-)
zpS2-*dx87m;0g^rA#UzL=)2L(1}lQsT@?~xi|GC_4I6JU^*$KiC*4aR!M>mUts4fH
zd>oUd=lkFT46X(7rOb_M#(QOYB^rB7%x|SEpEV%wp)?Cl`I;?nxFeF@0Is%Z=wz(x
za&W{1;lk*2d+XI7zEH0~m*Qv`UdVn8X9i5Gwx9N|tDNy4T)Qva&sq3fBUgi*`Prdd
zvAfx$;Ak92tH`iaffgtG&YN#!Cu26t&KePcT^LT7R9`Mzd-rF?gZ6YBXm*IoKfLc6
z`mn7oZBSnxmrs+`YFR*1OQm@ELSu<F>m`>VGhfmY|0fJ?x`ZzfqQI->%t+7F(jZxm
zT6CrS>_<!qahd6j^h_d-U)49FpSpq5N55XiuhW{#da<I;gLajjMtWv2VzOPo<X$d?
ze=4DNp_$H5X&}3|HaLAPbT(2cs(EqcGTjbi$m1;;j>kM|HHAqvj;jDh61a{?Ys<5^
zG`ChBEQ}DXu-Wf(5y1Z5(scYv)qF@SZroy#U}l96k{%KQ3~Rd9HD~FNi@rG^(wYXB
zU)OYty?J@_O1;;pcrIUZ#4J-V1fVzA0{$%ME^D(v(-+d)$eRq(QqK@qY_OCv33yaD
zbzJY%4PUtO3RL5|_f9!a6YojR+m<)e!At%3zB?iM5+Mt{3Tzc$8X`7IuFYzDg7kQ!
z-$0)cew0GtdxuX|csh_&OHl1Y@K(6~FdKnuTzrVw<&A5_K<o|kX8(6y9Sh#C8KKF(
zMeUA)T$29Hc_b5?SahlnWs}zc2Oh({t75WW)z<M-HRH|@sSOSYf^Z3j(bf;0&DxP&
zk+XCcu)1VnT$#zUd_(#p-aUeG$0NQ+B_Tji5ToT|(kcH8RroR)WB$c+51Cm@u!|{&
z>sDi&WiKCb=1K9==|t|<n8^+1{IQyT`b)cKRDnu^0^L<$qI**AG+hTgj@#C2FIj8I
zjR>)XQ3P?^)dTI3=Wx?*5Y`@8e8ww6^p=g$>heiJfqOMj%PZlInZ!eh*ZBzHz!;k!
zgZZmPJ^bPm6AH;uj5%uZD^DZ#@k5kFZhviwZ5E4W#Ck=7{skk8!Z)d3lz<sEZ#1+x
z8P2crB*yc~kd2&|D6s0RkH%+wnhQS)<hPuzmnWjzX^V)?V(Gu}`8pHZzm0=c<A`yw
z@^lGZx5vwiKBQTx6CUFXNNBbeUbS<Rn}JM%Kkp{JB~=+lHCbQ+BtlY&d8mRqLHR(i
z8uP64o5gU{(4gAkqR!=XQtQxT-l3JX`7V5j)#%IZI!8W(M$48Z>&LDOU7Wky+JyFj
zDwFo*aWB`Syqe<Aw{?T+NKguR*L=TGt}SD~A8eq|B2)c->8+d_T!2^*x8MUu^4+>O
zwCWOzQFMD=Un<Y5C3o=ys_QH|AE%?XSeH)Rhehks(OhnFg*PG^H1BDZY{!@3#7qY@
zmCp=;3_v<Arw(PWJ;OBkNP>BTIP<WJ_Y^V+E&Kd@k~uJCO)|d7Os2}HE^r1oFibZX
z%1A0SkDA?7NqYUcYA;}Pv9rVO8_HJV6=Ci%Bpm51*QG;)fllv_u?k){rry-wXA}S{
zXGh+Dn9ZPV`8j;m<8eq?g{MM#>V00T0kJwFyhxwNZ}K0Nrt07XxEtmYZlycmJyw8_
zAH*G@_lWN`5(1mjU@#X2Osn+ufKb16d#%7RHHB3x0-4~maU57?f6a1YvmY|lgSj8j
zM%R3LRmrpnY-yYNJ}Xc7Ae`Gk*PXAJvG{nsGGP1>UR)|}Y}(~cSsm;%<mT0H1rm$r
z2iIrK@O5Fn&xD-xrdo)NcjRVw<~pri2juq5Xw6(nq(A$_2N&(7&sj3GqoZeO&-G~}
zu~k}`MLWHC?m`ypaI<eN=VTVaEYFnbOZA3w9iNY9Uc^2WHiMWe_a4KhZ37m0`Hdq?
zO0;dy%$!|1ki1%Yyff*UZj=2Yxyy8_cQVe0XLcXoyb*sgguB_bnOfRC>ylZO-Ab&t
zXgQv?2MjeTIqdBFbOlnHQ=9G|ss!Bh)+sM$t98p<(5efK(t&I;HIiDrI(b0n>=f}t
zdj{?BGF6Tq;mCcoK(!GS4N*b(t$aaOo;kz8sm+O6?N>SJy=@mY;d7J(geULj8R^}T
zCOhwc^a-PRO>D<GPA9OsA+OC6g*ycL?m(ryjbl|L=)F6v?;-#PD^}SRD$Ao*5ke7r
z<7>3fi^1gh{W0OTSE1*J&BKJ<D&Hv-FTCewn)tIPW18u77Ip~Do(kbdr1)O@Qd@QW
z(Vgb4fz5{&+hPN}m1$9HcGU26-=96Ce)lncE#_cht@*pX_U1mm+hNea{<zmXfr)M9
zWPVzM&2m*XWS!uCyjs34cWmd!Xc9#QhEKk*u=Sp0|BU|K<(U4Sg!C)>^v@g@Ek%W2
z2`(TGPVPUBYy?Hzp~*DRKpNmF*xb@ii0-htoep4WDnzHrrO2k}AOW_peB|i_R`YzU
zZsKWe!e>e+B8(;|Vhnw93C!Yd?7(7n_Xilx0BBl}5S4_gf;81_2X}sVTL)Wc5-h;o
z*2d16-(86A4ul_CzO4q*0q(j$tcB>b6qNxGdwUyeOK7SsD<3O}1;oJ&Fmr;YF}c}0
zSpz_<+;mWsPNru3s?bE`9|)mWLUa}ohyy<m=;r3e>c+up?_>@H@$vBi+1P>5Lmp@k
z7H1DTh_O41oijCbygNcHwf+FJb7sAxi4~f71hmuwf>_yrzXtks{AX!z2e=*B_^G`M
zM2ODC#nP0Y&6L}Omy^>NN+vfu3kYP!&cX(LvGB5S@_^aQ*i4N%O@0mM@Ee(b84ub3
z9gqXc2WW{Onu%@dVghy&qLWcp0LWO_J41e?$pWD4;G+Znoc{k6RZ!XdI~Sk={mBMF
zXdiw>XcnrOha@zQT$r5=#KXb{6%Cg<8y`Q2jh~&9g^ib=jZF~vx1LbpLqR2=e}Dr0
zLzMpl^tYbBf!aH%Lsdf9+{xa|4B!flnBGbP1Z4&*`;QUtz<%};1m4Q%H}3pj^UK&l
zQSpD~n62%tO5gV5m$Wx=v4y6{$x8meQJDX08Jn8?L9m01lg*uVn3@2=HsD*@oS|X{
zK`Ayh;Wx8)vNeY4wy}c)G?mX70%bH1s*`lWg3v~YB{ak?d}n)NHjp0$8_Qc;;<quj
zGZ&(BXE6nv8N1j(=!Box-Ssy8Aq^W#;U9%NIN+bS{zX=IRZ!c8iVgV3${~yf_&<96
z-y!;QxrAmB2+_ISE{6ZeC;06k{g++S|C?DeK(ioktpv*OTRWkb<i9mCHV}x5jSs|j
zXJ2$|Y;@3WP&@ozdi=Bo`hT+*hCd!DaCZWmK{I37xzTPHtRF7`G(&-d3t$HLS;q~!
z&BeZb0qlO(fp|IjpxLs2sbk}YrX2shj*Eu}dJ^^5er&gy$$zWk;QkkU>>N<W{S}r6
z#QFCzKp;NOe~iJ+%MQ&){tGOWfw#}l{H2bKo0H@3b7SM-<@o2^*m<A_E`NjNVEacM
z7azwzXyM?weg5LFuv|P`|D=V3>mPjK=HUHD9WUq~^nutodH+5Jh@F%DAGGlB{DTKj
z1^ZpT5GP}3?yM8^Z><18HA_!0R5t+7M~Cd85BxxrVpIV@Svxa(z+I{cG&w~QperaT
zF3!y-CM7N<#v#Sa&IK(<Ku4C~li*<!=aiHX7yjRS=-)n1;|wu&g4`)2h?koa`Zrs&
JQcBWj{|}C3j9UNz

literal 0
HcmV?d00001

diff --git a/deps/libchdr/deps/zlib-1.2.11/zlib.h b/deps/libchdr/deps/zlib-1.2.11/zlib.h
new file mode 100644
index 00000000..f09cdaf1
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/zlib.h
@@ -0,0 +1,1912 @@
+/* zlib.h -- interface of the 'zlib' general purpose compression library
+  version 1.2.11, January 15th, 2017
+
+  Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+
+  Jean-loup Gailly        Mark Adler
+  jloup@gzip.org          madler@alumni.caltech.edu
+
+
+  The data format used by the zlib library is described by RFCs (Request for
+  Comments) 1950 to 1952 in the files http://tools.ietf.org/html/rfc1950
+  (zlib format), rfc1951 (deflate format) and rfc1952 (gzip format).
+*/
+
+#ifndef ZLIB_H
+#define ZLIB_H
+
+#include "zconf.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define ZLIB_VERSION "1.2.11"
+#define ZLIB_VERNUM 0x12b0
+#define ZLIB_VER_MAJOR 1
+#define ZLIB_VER_MINOR 2
+#define ZLIB_VER_REVISION 11
+#define ZLIB_VER_SUBREVISION 0
+
+/*
+    The 'zlib' compression library provides in-memory compression and
+  decompression functions, including integrity checks of the uncompressed data.
+  This version of the library supports only one compression method (deflation)
+  but other algorithms will be added later and will have the same stream
+  interface.
+
+    Compression can be done in a single step if the buffers are large enough,
+  or can be done by repeated calls of the compression function.  In the latter
+  case, the application must provide more input and/or consume the output
+  (providing more output space) before each call.
+
+    The compressed data format used by default by the in-memory functions is
+  the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped
+  around a deflate stream, which is itself documented in RFC 1951.
+
+    The library also supports reading and writing files in gzip (.gz) format
+  with an interface similar to that of stdio using the functions that start
+  with "gz".  The gzip format is different from the zlib format.  gzip is a
+  gzip wrapper, documented in RFC 1952, wrapped around a deflate stream.
+
+    This library can optionally read and write gzip and raw deflate streams in
+  memory as well.
+
+    The zlib format was designed to be compact and fast for use in memory
+  and on communications channels.  The gzip format was designed for single-
+  file compression on file systems, has a larger header than zlib to maintain
+  directory information, and uses a different, slower check method than zlib.
+
+    The library does not install any signal handler.  The decoder checks
+  the consistency of the compressed data, so the library should never crash
+  even in the case of corrupted input.
+*/
+
+typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size));
+typedef void   (*free_func)  OF((voidpf opaque, voidpf address));
+
+struct internal_state;
+
+typedef struct z_stream_s {
+    z_const Bytef *next_in;     /* next input byte */
+    uInt     avail_in;  /* number of bytes available at next_in */
+    uLong    total_in;  /* total number of input bytes read so far */
+
+    Bytef    *next_out; /* next output byte will go here */
+    uInt     avail_out; /* remaining free space at next_out */
+    uLong    total_out; /* total number of bytes output so far */
+
+    z_const char *msg;  /* last error message, NULL if no error */
+    struct internal_state FAR *state; /* not visible by applications */
+
+    alloc_func zalloc;  /* used to allocate the internal state */
+    free_func  zfree;   /* used to free the internal state */
+    voidpf     opaque;  /* private data object passed to zalloc and zfree */
+
+    int     data_type;  /* best guess about the data type: binary or text
+                           for deflate, or the decoding state for inflate */
+    uLong   adler;      /* Adler-32 or CRC-32 value of the uncompressed data */
+    uLong   reserved;   /* reserved for future use */
+} z_stream;
+
+typedef z_stream FAR *z_streamp;
+
+/*
+     gzip header information passed to and from zlib routines.  See RFC 1952
+  for more details on the meanings of these fields.
+*/
+typedef struct gz_header_s {
+    int     text;       /* true if compressed data believed to be text */
+    uLong   time;       /* modification time */
+    int     xflags;     /* extra flags (not used when writing a gzip file) */
+    int     os;         /* operating system */
+    Bytef   *extra;     /* pointer to extra field or Z_NULL if none */
+    uInt    extra_len;  /* extra field length (valid if extra != Z_NULL) */
+    uInt    extra_max;  /* space at extra (only when reading header) */
+    Bytef   *name;      /* pointer to zero-terminated file name or Z_NULL */
+    uInt    name_max;   /* space at name (only when reading header) */
+    Bytef   *comment;   /* pointer to zero-terminated comment or Z_NULL */
+    uInt    comm_max;   /* space at comment (only when reading header) */
+    int     hcrc;       /* true if there was or will be a header crc */
+    int     done;       /* true when done reading gzip header (not used
+                           when writing a gzip file) */
+} gz_header;
+
+typedef gz_header FAR *gz_headerp;
+
+/*
+     The application must update next_in and avail_in when avail_in has dropped
+   to zero.  It must update next_out and avail_out when avail_out has dropped
+   to zero.  The application must initialize zalloc, zfree and opaque before
+   calling the init function.  All other fields are set by the compression
+   library and must not be updated by the application.
+
+     The opaque value provided by the application will be passed as the first
+   parameter for calls of zalloc and zfree.  This can be useful for custom
+   memory management.  The compression library attaches no meaning to the
+   opaque value.
+
+     zalloc must return Z_NULL if there is not enough memory for the object.
+   If zlib is used in a multi-threaded application, zalloc and zfree must be
+   thread safe.  In that case, zlib is thread-safe.  When zalloc and zfree are
+   Z_NULL on entry to the initialization function, they are set to internal
+   routines that use the standard library functions malloc() and free().
+
+     On 16-bit systems, the functions zalloc and zfree must be able to allocate
+   exactly 65536 bytes, but will not be required to allocate more than this if
+   the symbol MAXSEG_64K is defined (see zconf.h).  WARNING: On MSDOS, pointers
+   returned by zalloc for objects of exactly 65536 bytes *must* have their
+   offset normalized to zero.  The default allocation function provided by this
+   library ensures this (see zutil.c).  To reduce memory requirements and avoid
+   any allocation of 64K objects, at the expense of compression ratio, compile
+   the library with -DMAX_WBITS=14 (see zconf.h).
+
+     The fields total_in and total_out can be used for statistics or progress
+   reports.  After compression, total_in holds the total size of the
+   uncompressed data and may be saved for use by the decompressor (particularly
+   if the decompressor wants to decompress everything in a single step).
+*/
+
+                        /* constants */
+
+#define Z_NO_FLUSH      0
+#define Z_PARTIAL_FLUSH 1
+#define Z_SYNC_FLUSH    2
+#define Z_FULL_FLUSH    3
+#define Z_FINISH        4
+#define Z_BLOCK         5
+#define Z_TREES         6
+/* Allowed flush values; see deflate() and inflate() below for details */
+
+#define Z_OK            0
+#define Z_STREAM_END    1
+#define Z_NEED_DICT     2
+#define Z_ERRNO        (-1)
+#define Z_STREAM_ERROR (-2)
+#define Z_DATA_ERROR   (-3)
+#define Z_MEM_ERROR    (-4)
+#define Z_BUF_ERROR    (-5)
+#define Z_VERSION_ERROR (-6)
+/* Return codes for the compression/decompression functions. Negative values
+ * are errors, positive values are used for special but normal events.
+ */
+
+#define Z_NO_COMPRESSION         0
+#define Z_BEST_SPEED             1
+#define Z_BEST_COMPRESSION       9
+#define Z_DEFAULT_COMPRESSION  (-1)
+/* compression levels */
+
+#define Z_FILTERED            1
+#define Z_HUFFMAN_ONLY        2
+#define Z_RLE                 3
+#define Z_FIXED               4
+#define Z_DEFAULT_STRATEGY    0
+/* compression strategy; see deflateInit2() below for details */
+
+#define Z_BINARY   0
+#define Z_TEXT     1
+#define Z_ASCII    Z_TEXT   /* for compatibility with 1.2.2 and earlier */
+#define Z_UNKNOWN  2
+/* Possible values of the data_type field for deflate() */
+
+#define Z_DEFLATED   8
+/* The deflate compression method (the only one supported in this version) */
+
+#define Z_NULL  0  /* for initializing zalloc, zfree, opaque */
+
+#define zlib_version zlibVersion()
+/* for compatibility with versions < 1.0.2 */
+
+
+                        /* basic functions */
+
+ZEXTERN const char * ZEXPORT zlibVersion OF((void));
+/* The application can compare zlibVersion and ZLIB_VERSION for consistency.
+   If the first character differs, the library code actually used is not
+   compatible with the zlib.h header file used by the application.  This check
+   is automatically made by deflateInit and inflateInit.
+ */
+
+/*
+ZEXTERN int ZEXPORT deflateInit OF((z_streamp strm, int level));
+
+     Initializes the internal stream state for compression.  The fields
+   zalloc, zfree and opaque must be initialized before by the caller.  If
+   zalloc and zfree are set to Z_NULL, deflateInit updates them to use default
+   allocation functions.
+
+     The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9:
+   1 gives best speed, 9 gives best compression, 0 gives no compression at all
+   (the input data is simply copied a block at a time).  Z_DEFAULT_COMPRESSION
+   requests a default compromise between speed and compression (currently
+   equivalent to level 6).
+
+     deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_STREAM_ERROR if level is not a valid compression level, or
+   Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible
+   with the version assumed by the caller (ZLIB_VERSION).  msg is set to null
+   if there is no error message.  deflateInit does not perform any compression:
+   this will be done by deflate().
+*/
+
+
+ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush));
+/*
+    deflate compresses as much data as possible, and stops when the input
+  buffer becomes empty or the output buffer becomes full.  It may introduce
+  some output latency (reading input without producing any output) except when
+  forced to flush.
+
+    The detailed semantics are as follows.  deflate performs one or both of the
+  following actions:
+
+  - Compress more input starting at next_in and update next_in and avail_in
+    accordingly.  If not all input can be processed (because there is not
+    enough room in the output buffer), next_in and avail_in are updated and
+    processing will resume at this point for the next call of deflate().
+
+  - Generate more output starting at next_out and update next_out and avail_out
+    accordingly.  This action is forced if the parameter flush is non zero.
+    Forcing flush frequently degrades the compression ratio, so this parameter
+    should be set only when necessary.  Some output may be provided even if
+    flush is zero.
+
+    Before the call of deflate(), the application should ensure that at least
+  one of the actions is possible, by providing more input and/or consuming more
+  output, and updating avail_in or avail_out accordingly; avail_out should
+  never be zero before the call.  The application can consume the compressed
+  output when it wants, for example when the output buffer is full (avail_out
+  == 0), or after each call of deflate().  If deflate returns Z_OK and with
+  zero avail_out, it must be called again after making room in the output
+  buffer because there might be more output pending. See deflatePending(),
+  which can be used if desired to determine whether or not there is more ouput
+  in that case.
+
+    Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to
+  decide how much data to accumulate before producing output, in order to
+  maximize compression.
+
+    If the parameter flush is set to Z_SYNC_FLUSH, all pending output is
+  flushed to the output buffer and the output is aligned on a byte boundary, so
+  that the decompressor can get all input data available so far.  (In
+  particular avail_in is zero after the call if enough output space has been
+  provided before the call.) Flushing may degrade compression for some
+  compression algorithms and so it should be used only when necessary.  This
+  completes the current deflate block and follows it with an empty stored block
+  that is three bits plus filler bits to the next byte, followed by four bytes
+  (00 00 ff ff).
+
+    If flush is set to Z_PARTIAL_FLUSH, all pending output is flushed to the
+  output buffer, but the output is not aligned to a byte boundary.  All of the
+  input data so far will be available to the decompressor, as for Z_SYNC_FLUSH.
+  This completes the current deflate block and follows it with an empty fixed
+  codes block that is 10 bits long.  This assures that enough bytes are output
+  in order for the decompressor to finish the block before the empty fixed
+  codes block.
+
+    If flush is set to Z_BLOCK, a deflate block is completed and emitted, as
+  for Z_SYNC_FLUSH, but the output is not aligned on a byte boundary, and up to
+  seven bits of the current block are held to be written as the next byte after
+  the next deflate block is completed.  In this case, the decompressor may not
+  be provided enough bits at this point in order to complete decompression of
+  the data provided so far to the compressor.  It may need to wait for the next
+  block to be emitted.  This is for advanced applications that need to control
+  the emission of deflate blocks.
+
+    If flush is set to Z_FULL_FLUSH, all output is flushed as with
+  Z_SYNC_FLUSH, and the compression state is reset so that decompression can
+  restart from this point if previous compressed data has been damaged or if
+  random access is desired.  Using Z_FULL_FLUSH too often can seriously degrade
+  compression.
+
+    If deflate returns with avail_out == 0, this function must be called again
+  with the same value of the flush parameter and more output space (updated
+  avail_out), until the flush is complete (deflate returns with non-zero
+  avail_out).  In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that
+  avail_out is greater than six to avoid repeated flush markers due to
+  avail_out == 0 on return.
+
+    If the parameter flush is set to Z_FINISH, pending input is processed,
+  pending output is flushed and deflate returns with Z_STREAM_END if there was
+  enough output space.  If deflate returns with Z_OK or Z_BUF_ERROR, this
+  function must be called again with Z_FINISH and more output space (updated
+  avail_out) but no more input data, until it returns with Z_STREAM_END or an
+  error.  After deflate has returned Z_STREAM_END, the only possible operations
+  on the stream are deflateReset or deflateEnd.
+
+    Z_FINISH can be used in the first deflate call after deflateInit if all the
+  compression is to be done in a single step.  In order to complete in one
+  call, avail_out must be at least the value returned by deflateBound (see
+  below).  Then deflate is guaranteed to return Z_STREAM_END.  If not enough
+  output space is provided, deflate will not return Z_STREAM_END, and it must
+  be called again as described above.
+
+    deflate() sets strm->adler to the Adler-32 checksum of all input read
+  so far (that is, total_in bytes).  If a gzip stream is being generated, then
+  strm->adler will be the CRC-32 checksum of the input read so far.  (See
+  deflateInit2 below.)
+
+    deflate() may update strm->data_type if it can make a good guess about
+  the input data type (Z_BINARY or Z_TEXT).  If in doubt, the data is
+  considered binary.  This field is only for information purposes and does not
+  affect the compression algorithm in any manner.
+
+    deflate() returns Z_OK if some progress has been made (more input
+  processed or more output produced), Z_STREAM_END if all input has been
+  consumed and all output has been produced (only when flush is set to
+  Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example
+  if next_in or next_out was Z_NULL or the state was inadvertently written over
+  by the application), or Z_BUF_ERROR if no progress is possible (for example
+  avail_in or avail_out was zero).  Note that Z_BUF_ERROR is not fatal, and
+  deflate() can be called again with more input and more output space to
+  continue compressing.
+*/
+
+
+ZEXTERN int ZEXPORT deflateEnd OF((z_streamp strm));
+/*
+     All dynamically allocated data structures for this stream are freed.
+   This function discards any unprocessed input and does not flush any pending
+   output.
+
+     deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the
+   stream state was inconsistent, Z_DATA_ERROR if the stream was freed
+   prematurely (some input or output was discarded).  In the error case, msg
+   may be set but then points to a static string (which must not be
+   deallocated).
+*/
+
+
+/*
+ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm));
+
+     Initializes the internal stream state for decompression.  The fields
+   next_in, avail_in, zalloc, zfree and opaque must be initialized before by
+   the caller.  In the current version of inflate, the provided input is not
+   read or consumed.  The allocation of a sliding window will be deferred to
+   the first call of inflate (if the decompression does not complete on the
+   first call).  If zalloc and zfree are set to Z_NULL, inflateInit updates
+   them to use default allocation functions.
+
+     inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_VERSION_ERROR if the zlib library version is incompatible with the
+   version assumed by the caller, or Z_STREAM_ERROR if the parameters are
+   invalid, such as a null pointer to the structure.  msg is set to null if
+   there is no error message.  inflateInit does not perform any decompression.
+   Actual decompression will be done by inflate().  So next_in, and avail_in,
+   next_out, and avail_out are unused and unchanged.  The current
+   implementation of inflateInit() does not process any header information --
+   that is deferred until inflate() is called.
+*/
+
+
+ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush));
+/*
+    inflate decompresses as much data as possible, and stops when the input
+  buffer becomes empty or the output buffer becomes full.  It may introduce
+  some output latency (reading input without producing any output) except when
+  forced to flush.
+
+  The detailed semantics are as follows.  inflate performs one or both of the
+  following actions:
+
+  - Decompress more input starting at next_in and update next_in and avail_in
+    accordingly.  If not all input can be processed (because there is not
+    enough room in the output buffer), then next_in and avail_in are updated
+    accordingly, and processing will resume at this point for the next call of
+    inflate().
+
+  - Generate more output starting at next_out and update next_out and avail_out
+    accordingly.  inflate() provides as much output as possible, until there is
+    no more input data or no more space in the output buffer (see below about
+    the flush parameter).
+
+    Before the call of inflate(), the application should ensure that at least
+  one of the actions is possible, by providing more input and/or consuming more
+  output, and updating the next_* and avail_* values accordingly.  If the
+  caller of inflate() does not provide both available input and available
+  output space, it is possible that there will be no progress made.  The
+  application can consume the uncompressed output when it wants, for example
+  when the output buffer is full (avail_out == 0), or after each call of
+  inflate().  If inflate returns Z_OK and with zero avail_out, it must be
+  called again after making room in the output buffer because there might be
+  more output pending.
+
+    The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FINISH,
+  Z_BLOCK, or Z_TREES.  Z_SYNC_FLUSH requests that inflate() flush as much
+  output as possible to the output buffer.  Z_BLOCK requests that inflate()
+  stop if and when it gets to the next deflate block boundary.  When decoding
+  the zlib or gzip format, this will cause inflate() to return immediately
+  after the header and before the first block.  When doing a raw inflate,
+  inflate() will go ahead and process the first block, and will return when it
+  gets to the end of that block, or when it runs out of data.
+
+    The Z_BLOCK option assists in appending to or combining deflate streams.
+  To assist in this, on return inflate() always sets strm->data_type to the
+  number of unused bits in the last byte taken from strm->next_in, plus 64 if
+  inflate() is currently decoding the last block in the deflate stream, plus
+  128 if inflate() returned immediately after decoding an end-of-block code or
+  decoding the complete header up to just before the first byte of the deflate
+  stream.  The end-of-block will not be indicated until all of the uncompressed
+  data from that block has been written to strm->next_out.  The number of
+  unused bits may in general be greater than seven, except when bit 7 of
+  data_type is set, in which case the number of unused bits will be less than
+  eight.  data_type is set as noted here every time inflate() returns for all
+  flush options, and so can be used to determine the amount of currently
+  consumed input in bits.
+
+    The Z_TREES option behaves as Z_BLOCK does, but it also returns when the
+  end of each deflate block header is reached, before any actual data in that
+  block is decoded.  This allows the caller to determine the length of the
+  deflate block header for later use in random access within a deflate block.
+  256 is added to the value of strm->data_type when inflate() returns
+  immediately after reaching the end of the deflate block header.
+
+    inflate() should normally be called until it returns Z_STREAM_END or an
+  error.  However if all decompression is to be performed in a single step (a
+  single call of inflate), the parameter flush should be set to Z_FINISH.  In
+  this case all pending input is processed and all pending output is flushed;
+  avail_out must be large enough to hold all of the uncompressed data for the
+  operation to complete.  (The size of the uncompressed data may have been
+  saved by the compressor for this purpose.)  The use of Z_FINISH is not
+  required to perform an inflation in one step.  However it may be used to
+  inform inflate that a faster approach can be used for the single inflate()
+  call.  Z_FINISH also informs inflate to not maintain a sliding window if the
+  stream completes, which reduces inflate's memory footprint.  If the stream
+  does not complete, either because not all of the stream is provided or not
+  enough output space is provided, then a sliding window will be allocated and
+  inflate() can be called again to continue the operation as if Z_NO_FLUSH had
+  been used.
+
+     In this implementation, inflate() always flushes as much output as
+  possible to the output buffer, and always uses the faster approach on the
+  first call.  So the effects of the flush parameter in this implementation are
+  on the return value of inflate() as noted below, when inflate() returns early
+  when Z_BLOCK or Z_TREES is used, and when inflate() avoids the allocation of
+  memory for a sliding window when Z_FINISH is used.
+
+     If a preset dictionary is needed after this call (see inflateSetDictionary
+  below), inflate sets strm->adler to the Adler-32 checksum of the dictionary
+  chosen by the compressor and returns Z_NEED_DICT; otherwise it sets
+  strm->adler to the Adler-32 checksum of all output produced so far (that is,
+  total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described
+  below.  At the end of the stream, inflate() checks that its computed Adler-32
+  checksum is equal to that saved by the compressor and returns Z_STREAM_END
+  only if the checksum is correct.
+
+    inflate() can decompress and check either zlib-wrapped or gzip-wrapped
+  deflate data.  The header type is detected automatically, if requested when
+  initializing with inflateInit2().  Any information contained in the gzip
+  header is not retained unless inflateGetHeader() is used.  When processing
+  gzip-wrapped deflate data, strm->adler32 is set to the CRC-32 of the output
+  produced so far.  The CRC-32 is checked against the gzip trailer, as is the
+  uncompressed length, modulo 2^32.
+
+    inflate() returns Z_OK if some progress has been made (more input processed
+  or more output produced), Z_STREAM_END if the end of the compressed data has
+  been reached and all uncompressed output has been produced, Z_NEED_DICT if a
+  preset dictionary is needed at this point, Z_DATA_ERROR if the input data was
+  corrupted (input stream not conforming to the zlib format or incorrect check
+  value, in which case strm->msg points to a string with a more specific
+  error), Z_STREAM_ERROR if the stream structure was inconsistent (for example
+  next_in or next_out was Z_NULL, or the state was inadvertently written over
+  by the application), Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR
+  if no progress was possible or if there was not enough room in the output
+  buffer when Z_FINISH is used.  Note that Z_BUF_ERROR is not fatal, and
+  inflate() can be called again with more input and more output space to
+  continue decompressing.  If Z_DATA_ERROR is returned, the application may
+  then call inflateSync() to look for a good compression block if a partial
+  recovery of the data is to be attempted.
+*/
+
+
+ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm));
+/*
+     All dynamically allocated data structures for this stream are freed.
+   This function discards any unprocessed input and does not flush any pending
+   output.
+
+     inflateEnd returns Z_OK if success, or Z_STREAM_ERROR if the stream state
+   was inconsistent.
+*/
+
+
+                        /* Advanced functions */
+
+/*
+    The following functions are needed only in some special applications.
+*/
+
+/*
+ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm,
+                                     int  level,
+                                     int  method,
+                                     int  windowBits,
+                                     int  memLevel,
+                                     int  strategy));
+
+     This is another version of deflateInit with more compression options.  The
+   fields next_in, zalloc, zfree and opaque must be initialized before by the
+   caller.
+
+     The method parameter is the compression method.  It must be Z_DEFLATED in
+   this version of the library.
+
+     The windowBits parameter is the base two logarithm of the window size
+   (the size of the history buffer).  It should be in the range 8..15 for this
+   version of the library.  Larger values of this parameter result in better
+   compression at the expense of memory usage.  The default value is 15 if
+   deflateInit is used instead.
+
+     For the current implementation of deflate(), a windowBits value of 8 (a
+   window size of 256 bytes) is not supported.  As a result, a request for 8
+   will result in 9 (a 512-byte window).  In that case, providing 8 to
+   inflateInit2() will result in an error when the zlib header with 9 is
+   checked against the initialization of inflate().  The remedy is to not use 8
+   with deflateInit2() with this initialization, or at least in that case use 9
+   with inflateInit2().
+
+     windowBits can also be -8..-15 for raw deflate.  In this case, -windowBits
+   determines the window size.  deflate() will then generate raw deflate data
+   with no zlib header or trailer, and will not compute a check value.
+
+     windowBits can also be greater than 15 for optional gzip encoding.  Add
+   16 to windowBits to write a simple gzip header and trailer around the
+   compressed data instead of a zlib wrapper.  The gzip header will have no
+   file name, no extra data, no comment, no modification time (set to zero), no
+   header crc, and the operating system will be set to the appropriate value,
+   if the operating system was determined at compile time.  If a gzip stream is
+   being written, strm->adler is a CRC-32 instead of an Adler-32.
+
+     For raw deflate or gzip encoding, a request for a 256-byte window is
+   rejected as invalid, since only the zlib header provides a means of
+   transmitting the window size to the decompressor.
+
+     The memLevel parameter specifies how much memory should be allocated
+   for the internal compression state.  memLevel=1 uses minimum memory but is
+   slow and reduces compression ratio; memLevel=9 uses maximum memory for
+   optimal speed.  The default value is 8.  See zconf.h for total memory usage
+   as a function of windowBits and memLevel.
+
+     The strategy parameter is used to tune the compression algorithm.  Use the
+   value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a
+   filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no
+   string match), or Z_RLE to limit match distances to one (run-length
+   encoding).  Filtered data consists mostly of small values with a somewhat
+   random distribution.  In this case, the compression algorithm is tuned to
+   compress them better.  The effect of Z_FILTERED is to force more Huffman
+   coding and less string matching; it is somewhat intermediate between
+   Z_DEFAULT_STRATEGY and Z_HUFFMAN_ONLY.  Z_RLE is designed to be almost as
+   fast as Z_HUFFMAN_ONLY, but give better compression for PNG image data.  The
+   strategy parameter only affects the compression ratio but not the
+   correctness of the compressed output even if it is not set appropriately.
+   Z_FIXED prevents the use of dynamic Huffman codes, allowing for a simpler
+   decoder for special applications.
+
+     deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_STREAM_ERROR if any parameter is invalid (such as an invalid
+   method), or Z_VERSION_ERROR if the zlib library version (zlib_version) is
+   incompatible with the version assumed by the caller (ZLIB_VERSION).  msg is
+   set to null if there is no error message.  deflateInit2 does not perform any
+   compression: this will be done by deflate().
+*/
+
+ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm,
+                                             const Bytef *dictionary,
+                                             uInt  dictLength));
+/*
+     Initializes the compression dictionary from the given byte sequence
+   without producing any compressed output.  When using the zlib format, this
+   function must be called immediately after deflateInit, deflateInit2 or
+   deflateReset, and before any call of deflate.  When doing raw deflate, this
+   function must be called either before any call of deflate, or immediately
+   after the completion of a deflate block, i.e. after all input has been
+   consumed and all output has been delivered when using any of the flush
+   options Z_BLOCK, Z_PARTIAL_FLUSH, Z_SYNC_FLUSH, or Z_FULL_FLUSH.  The
+   compressor and decompressor must use exactly the same dictionary (see
+   inflateSetDictionary).
+
+     The dictionary should consist of strings (byte sequences) that are likely
+   to be encountered later in the data to be compressed, with the most commonly
+   used strings preferably put towards the end of the dictionary.  Using a
+   dictionary is most useful when the data to be compressed is short and can be
+   predicted with good accuracy; the data can then be compressed better than
+   with the default empty dictionary.
+
+     Depending on the size of the compression data structures selected by
+   deflateInit or deflateInit2, a part of the dictionary may in effect be
+   discarded, for example if the dictionary is larger than the window size
+   provided in deflateInit or deflateInit2.  Thus the strings most likely to be
+   useful should be put at the end of the dictionary, not at the front.  In
+   addition, the current implementation of deflate will use at most the window
+   size minus 262 bytes of the provided dictionary.
+
+     Upon return of this function, strm->adler is set to the Adler-32 value
+   of the dictionary; the decompressor may later use this value to determine
+   which dictionary has been used by the compressor.  (The Adler-32 value
+   applies to the whole dictionary even if only a subset of the dictionary is
+   actually used by the compressor.) If a raw deflate was requested, then the
+   Adler-32 value is not computed and strm->adler is not set.
+
+     deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a
+   parameter is invalid (e.g.  dictionary being Z_NULL) or the stream state is
+   inconsistent (for example if deflate has already been called for this stream
+   or if not at a block boundary for raw deflate).  deflateSetDictionary does
+   not perform any compression: this will be done by deflate().
+*/
+
+ZEXTERN int ZEXPORT deflateGetDictionary OF((z_streamp strm,
+                                             Bytef *dictionary,
+                                             uInt  *dictLength));
+/*
+     Returns the sliding dictionary being maintained by deflate.  dictLength is
+   set to the number of bytes in the dictionary, and that many bytes are copied
+   to dictionary.  dictionary must have enough space, where 32768 bytes is
+   always enough.  If deflateGetDictionary() is called with dictionary equal to
+   Z_NULL, then only the dictionary length is returned, and nothing is copied.
+   Similary, if dictLength is Z_NULL, then it is not set.
+
+     deflateGetDictionary() may return a length less than the window size, even
+   when more than the window size in input has been provided. It may return up
+   to 258 bytes less in that case, due to how zlib's implementation of deflate
+   manages the sliding window and lookahead for matches, where matches can be
+   up to 258 bytes long. If the application needs the last window-size bytes of
+   input, then that would need to be saved by the application outside of zlib.
+
+     deflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the
+   stream state is inconsistent.
+*/
+
+ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest,
+                                    z_streamp source));
+/*
+     Sets the destination stream as a complete copy of the source stream.
+
+     This function can be useful when several compression strategies will be
+   tried, for example when there are several ways of pre-processing the input
+   data with a filter.  The streams that will be discarded should then be freed
+   by calling deflateEnd.  Note that deflateCopy duplicates the internal
+   compression state which can be quite large, so this strategy is slow and can
+   consume lots of memory.
+
+     deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not
+   enough memory, Z_STREAM_ERROR if the source stream state was inconsistent
+   (such as zalloc being Z_NULL).  msg is left unchanged in both source and
+   destination.
+*/
+
+ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm));
+/*
+     This function is equivalent to deflateEnd followed by deflateInit, but
+   does not free and reallocate the internal compression state.  The stream
+   will leave the compression level and any other attributes that may have been
+   set unchanged.
+
+     deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent (such as zalloc or state being Z_NULL).
+*/
+
+ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm,
+                                      int level,
+                                      int strategy));
+/*
+     Dynamically update the compression level and compression strategy.  The
+   interpretation of level and strategy is as in deflateInit2().  This can be
+   used to switch between compression and straight copy of the input data, or
+   to switch to a different kind of input data requiring a different strategy.
+   If the compression approach (which is a function of the level) or the
+   strategy is changed, and if any input has been consumed in a previous
+   deflate() call, then the input available so far is compressed with the old
+   level and strategy using deflate(strm, Z_BLOCK).  There are three approaches
+   for the compression levels 0, 1..3, and 4..9 respectively.  The new level
+   and strategy will take effect at the next call of deflate().
+
+     If a deflate(strm, Z_BLOCK) is performed by deflateParams(), and it does
+   not have enough output space to complete, then the parameter change will not
+   take effect.  In this case, deflateParams() can be called again with the
+   same parameters and more output space to try again.
+
+     In order to assure a change in the parameters on the first try, the
+   deflate stream should be flushed using deflate() with Z_BLOCK or other flush
+   request until strm.avail_out is not zero, before calling deflateParams().
+   Then no more input data should be provided before the deflateParams() call.
+   If this is done, the old level and strategy will be applied to the data
+   compressed before deflateParams(), and the new level and strategy will be
+   applied to the the data compressed after deflateParams().
+
+     deflateParams returns Z_OK on success, Z_STREAM_ERROR if the source stream
+   state was inconsistent or if a parameter was invalid, or Z_BUF_ERROR if
+   there was not enough output space to complete the compression of the
+   available input data before a change in the strategy or approach.  Note that
+   in the case of a Z_BUF_ERROR, the parameters are not changed.  A return
+   value of Z_BUF_ERROR is not fatal, in which case deflateParams() can be
+   retried with more output space.
+*/
+
+ZEXTERN int ZEXPORT deflateTune OF((z_streamp strm,
+                                    int good_length,
+                                    int max_lazy,
+                                    int nice_length,
+                                    int max_chain));
+/*
+     Fine tune deflate's internal compression parameters.  This should only be
+   used by someone who understands the algorithm used by zlib's deflate for
+   searching for the best matching string, and even then only by the most
+   fanatic optimizer trying to squeeze out the last compressed bit for their
+   specific input data.  Read the deflate.c source code for the meaning of the
+   max_lazy, good_length, nice_length, and max_chain parameters.
+
+     deflateTune() can be called after deflateInit() or deflateInit2(), and
+   returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream.
+ */
+
+ZEXTERN uLong ZEXPORT deflateBound OF((z_streamp strm,
+                                       uLong sourceLen));
+/*
+     deflateBound() returns an upper bound on the compressed size after
+   deflation of sourceLen bytes.  It must be called after deflateInit() or
+   deflateInit2(), and after deflateSetHeader(), if used.  This would be used
+   to allocate an output buffer for deflation in a single pass, and so would be
+   called before deflate().  If that first deflate() call is provided the
+   sourceLen input bytes, an output buffer allocated to the size returned by
+   deflateBound(), and the flush value Z_FINISH, then deflate() is guaranteed
+   to return Z_STREAM_END.  Note that it is possible for the compressed size to
+   be larger than the value returned by deflateBound() if flush options other
+   than Z_FINISH or Z_NO_FLUSH are used.
+*/
+
+ZEXTERN int ZEXPORT deflatePending OF((z_streamp strm,
+                                       unsigned *pending,
+                                       int *bits));
+/*
+     deflatePending() returns the number of bytes and bits of output that have
+   been generated, but not yet provided in the available output.  The bytes not
+   provided would be due to the available output space having being consumed.
+   The number of bits of output not provided are between 0 and 7, where they
+   await more bits to join them in order to fill out a full byte.  If pending
+   or bits are Z_NULL, then those values are not set.
+
+     deflatePending returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent.
+ */
+
+ZEXTERN int ZEXPORT deflatePrime OF((z_streamp strm,
+                                     int bits,
+                                     int value));
+/*
+     deflatePrime() inserts bits in the deflate output stream.  The intent
+   is that this function is used to start off the deflate output with the bits
+   leftover from a previous deflate stream when appending to it.  As such, this
+   function can only be used for raw deflate, and must be used before the first
+   deflate() call after a deflateInit2() or deflateReset().  bits must be less
+   than or equal to 16, and that many of the least significant bits of value
+   will be inserted in the output.
+
+     deflatePrime returns Z_OK if success, Z_BUF_ERROR if there was not enough
+   room in the internal buffer to insert the bits, or Z_STREAM_ERROR if the
+   source stream state was inconsistent.
+*/
+
+ZEXTERN int ZEXPORT deflateSetHeader OF((z_streamp strm,
+                                         gz_headerp head));
+/*
+     deflateSetHeader() provides gzip header information for when a gzip
+   stream is requested by deflateInit2().  deflateSetHeader() may be called
+   after deflateInit2() or deflateReset() and before the first call of
+   deflate().  The text, time, os, extra field, name, and comment information
+   in the provided gz_header structure are written to the gzip header (xflag is
+   ignored -- the extra flags are set according to the compression level).  The
+   caller must assure that, if not Z_NULL, name and comment are terminated with
+   a zero byte, and that if extra is not Z_NULL, that extra_len bytes are
+   available there.  If hcrc is true, a gzip header crc is included.  Note that
+   the current versions of the command-line version of gzip (up through version
+   1.3.x) do not support header crc's, and will report that it is a "multi-part
+   gzip file" and give up.
+
+     If deflateSetHeader is not used, the default gzip header has text false,
+   the time set to zero, and os set to 255, with no extra, name, or comment
+   fields.  The gzip header is returned to the default state by deflateReset().
+
+     deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent.
+*/
+
+/*
+ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm,
+                                     int  windowBits));
+
+     This is another version of inflateInit with an extra parameter.  The
+   fields next_in, avail_in, zalloc, zfree and opaque must be initialized
+   before by the caller.
+
+     The windowBits parameter is the base two logarithm of the maximum window
+   size (the size of the history buffer).  It should be in the range 8..15 for
+   this version of the library.  The default value is 15 if inflateInit is used
+   instead.  windowBits must be greater than or equal to the windowBits value
+   provided to deflateInit2() while compressing, or it must be equal to 15 if
+   deflateInit2() was not used.  If a compressed stream with a larger window
+   size is given as input, inflate() will return with the error code
+   Z_DATA_ERROR instead of trying to allocate a larger window.
+
+     windowBits can also be zero to request that inflate use the window size in
+   the zlib header of the compressed stream.
+
+     windowBits can also be -8..-15 for raw inflate.  In this case, -windowBits
+   determines the window size.  inflate() will then process raw deflate data,
+   not looking for a zlib or gzip header, not generating a check value, and not
+   looking for any check values for comparison at the end of the stream.  This
+   is for use with other formats that use the deflate compressed data format
+   such as zip.  Those formats provide their own check values.  If a custom
+   format is developed using the raw deflate format for compressed data, it is
+   recommended that a check value such as an Adler-32 or a CRC-32 be applied to
+   the uncompressed data as is done in the zlib, gzip, and zip formats.  For
+   most applications, the zlib format should be used as is.  Note that comments
+   above on the use in deflateInit2() applies to the magnitude of windowBits.
+
+     windowBits can also be greater than 15 for optional gzip decoding.  Add
+   32 to windowBits to enable zlib and gzip decoding with automatic header
+   detection, or add 16 to decode only the gzip format (the zlib format will
+   return a Z_DATA_ERROR).  If a gzip stream is being decoded, strm->adler is a
+   CRC-32 instead of an Adler-32.  Unlike the gunzip utility and gzread() (see
+   below), inflate() will not automatically decode concatenated gzip streams.
+   inflate() will return Z_STREAM_END at the end of the gzip stream.  The state
+   would need to be reset to continue decoding a subsequent gzip stream.
+
+     inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_VERSION_ERROR if the zlib library version is incompatible with the
+   version assumed by the caller, or Z_STREAM_ERROR if the parameters are
+   invalid, such as a null pointer to the structure.  msg is set to null if
+   there is no error message.  inflateInit2 does not perform any decompression
+   apart from possibly reading the zlib header if present: actual decompression
+   will be done by inflate().  (So next_in and avail_in may be modified, but
+   next_out and avail_out are unused and unchanged.) The current implementation
+   of inflateInit2() does not process any header information -- that is
+   deferred until inflate() is called.
+*/
+
+ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm,
+                                             const Bytef *dictionary,
+                                             uInt  dictLength));
+/*
+     Initializes the decompression dictionary from the given uncompressed byte
+   sequence.  This function must be called immediately after a call of inflate,
+   if that call returned Z_NEED_DICT.  The dictionary chosen by the compressor
+   can be determined from the Adler-32 value returned by that call of inflate.
+   The compressor and decompressor must use exactly the same dictionary (see
+   deflateSetDictionary).  For raw inflate, this function can be called at any
+   time to set the dictionary.  If the provided dictionary is smaller than the
+   window and there is already data in the window, then the provided dictionary
+   will amend what's there.  The application must insure that the dictionary
+   that was used for compression is provided.
+
+     inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a
+   parameter is invalid (e.g.  dictionary being Z_NULL) or the stream state is
+   inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the
+   expected one (incorrect Adler-32 value).  inflateSetDictionary does not
+   perform any decompression: this will be done by subsequent calls of
+   inflate().
+*/
+
+ZEXTERN int ZEXPORT inflateGetDictionary OF((z_streamp strm,
+                                             Bytef *dictionary,
+                                             uInt  *dictLength));
+/*
+     Returns the sliding dictionary being maintained by inflate.  dictLength is
+   set to the number of bytes in the dictionary, and that many bytes are copied
+   to dictionary.  dictionary must have enough space, where 32768 bytes is
+   always enough.  If inflateGetDictionary() is called with dictionary equal to
+   Z_NULL, then only the dictionary length is returned, and nothing is copied.
+   Similary, if dictLength is Z_NULL, then it is not set.
+
+     inflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the
+   stream state is inconsistent.
+*/
+
+ZEXTERN int ZEXPORT inflateSync OF((z_streamp strm));
+/*
+     Skips invalid compressed data until a possible full flush point (see above
+   for the description of deflate with Z_FULL_FLUSH) can be found, or until all
+   available input is skipped.  No output is provided.
+
+     inflateSync searches for a 00 00 FF FF pattern in the compressed data.
+   All full flush points have this pattern, but not all occurrences of this
+   pattern are full flush points.
+
+     inflateSync returns Z_OK if a possible full flush point has been found,
+   Z_BUF_ERROR if no more input was provided, Z_DATA_ERROR if no flush point
+   has been found, or Z_STREAM_ERROR if the stream structure was inconsistent.
+   In the success case, the application may save the current current value of
+   total_in which indicates where valid compressed data was found.  In the
+   error case, the application may repeatedly call inflateSync, providing more
+   input each time, until success or end of the input data.
+*/
+
+ZEXTERN int ZEXPORT inflateCopy OF((z_streamp dest,
+                                    z_streamp source));
+/*
+     Sets the destination stream as a complete copy of the source stream.
+
+     This function can be useful when randomly accessing a large stream.  The
+   first pass through the stream can periodically record the inflate state,
+   allowing restarting inflate at those points when randomly accessing the
+   stream.
+
+     inflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not
+   enough memory, Z_STREAM_ERROR if the source stream state was inconsistent
+   (such as zalloc being Z_NULL).  msg is left unchanged in both source and
+   destination.
+*/
+
+ZEXTERN int ZEXPORT inflateReset OF((z_streamp strm));
+/*
+     This function is equivalent to inflateEnd followed by inflateInit,
+   but does not free and reallocate the internal decompression state.  The
+   stream will keep attributes that may have been set by inflateInit2.
+
+     inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent (such as zalloc or state being Z_NULL).
+*/
+
+ZEXTERN int ZEXPORT inflateReset2 OF((z_streamp strm,
+                                      int windowBits));
+/*
+     This function is the same as inflateReset, but it also permits changing
+   the wrap and window size requests.  The windowBits parameter is interpreted
+   the same as it is for inflateInit2.  If the window size is changed, then the
+   memory allocated for the window is freed, and the window will be reallocated
+   by inflate() if needed.
+
+     inflateReset2 returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent (such as zalloc or state being Z_NULL), or if
+   the windowBits parameter is invalid.
+*/
+
+ZEXTERN int ZEXPORT inflatePrime OF((z_streamp strm,
+                                     int bits,
+                                     int value));
+/*
+     This function inserts bits in the inflate input stream.  The intent is
+   that this function is used to start inflating at a bit position in the
+   middle of a byte.  The provided bits will be used before any bytes are used
+   from next_in.  This function should only be used with raw inflate, and
+   should be used before the first inflate() call after inflateInit2() or
+   inflateReset().  bits must be less than or equal to 16, and that many of the
+   least significant bits of value will be inserted in the input.
+
+     If bits is negative, then the input stream bit buffer is emptied.  Then
+   inflatePrime() can be called again to put bits in the buffer.  This is used
+   to clear out bits leftover after feeding inflate a block description prior
+   to feeding inflate codes.
+
+     inflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent.
+*/
+
+ZEXTERN long ZEXPORT inflateMark OF((z_streamp strm));
+/*
+     This function returns two values, one in the lower 16 bits of the return
+   value, and the other in the remaining upper bits, obtained by shifting the
+   return value down 16 bits.  If the upper value is -1 and the lower value is
+   zero, then inflate() is currently decoding information outside of a block.
+   If the upper value is -1 and the lower value is non-zero, then inflate is in
+   the middle of a stored block, with the lower value equaling the number of
+   bytes from the input remaining to copy.  If the upper value is not -1, then
+   it is the number of bits back from the current bit position in the input of
+   the code (literal or length/distance pair) currently being processed.  In
+   that case the lower value is the number of bytes already emitted for that
+   code.
+
+     A code is being processed if inflate is waiting for more input to complete
+   decoding of the code, or if it has completed decoding but is waiting for
+   more output space to write the literal or match data.
+
+     inflateMark() is used to mark locations in the input data for random
+   access, which may be at bit positions, and to note those cases where the
+   output of a code may span boundaries of random access blocks.  The current
+   location in the input stream can be determined from avail_in and data_type
+   as noted in the description for the Z_BLOCK flush parameter for inflate.
+
+     inflateMark returns the value noted above, or -65536 if the provided
+   source stream state was inconsistent.
+*/
+
+ZEXTERN int ZEXPORT inflateGetHeader OF((z_streamp strm,
+                                         gz_headerp head));
+/*
+     inflateGetHeader() requests that gzip header information be stored in the
+   provided gz_header structure.  inflateGetHeader() may be called after
+   inflateInit2() or inflateReset(), and before the first call of inflate().
+   As inflate() processes the gzip stream, head->done is zero until the header
+   is completed, at which time head->done is set to one.  If a zlib stream is
+   being decoded, then head->done is set to -1 to indicate that there will be
+   no gzip header information forthcoming.  Note that Z_BLOCK or Z_TREES can be
+   used to force inflate() to return immediately after header processing is
+   complete and before any actual data is decompressed.
+
+     The text, time, xflags, and os fields are filled in with the gzip header
+   contents.  hcrc is set to true if there is a header CRC.  (The header CRC
+   was valid if done is set to one.) If extra is not Z_NULL, then extra_max
+   contains the maximum number of bytes to write to extra.  Once done is true,
+   extra_len contains the actual extra field length, and extra contains the
+   extra field, or that field truncated if extra_max is less than extra_len.
+   If name is not Z_NULL, then up to name_max characters are written there,
+   terminated with a zero unless the length is greater than name_max.  If
+   comment is not Z_NULL, then up to comm_max characters are written there,
+   terminated with a zero unless the length is greater than comm_max.  When any
+   of extra, name, or comment are not Z_NULL and the respective field is not
+   present in the header, then that field is set to Z_NULL to signal its
+   absence.  This allows the use of deflateSetHeader() with the returned
+   structure to duplicate the header.  However if those fields are set to
+   allocated memory, then the application will need to save those pointers
+   elsewhere so that they can be eventually freed.
+
+     If inflateGetHeader is not used, then the header information is simply
+   discarded.  The header is always checked for validity, including the header
+   CRC if present.  inflateReset() will reset the process to discard the header
+   information.  The application would need to call inflateGetHeader() again to
+   retrieve the header from the next gzip stream.
+
+     inflateGetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source
+   stream state was inconsistent.
+*/
+
+/*
+ZEXTERN int ZEXPORT inflateBackInit OF((z_streamp strm, int windowBits,
+                                        unsigned char FAR *window));
+
+     Initialize the internal stream state for decompression using inflateBack()
+   calls.  The fields zalloc, zfree and opaque in strm must be initialized
+   before the call.  If zalloc and zfree are Z_NULL, then the default library-
+   derived memory allocation routines are used.  windowBits is the base two
+   logarithm of the window size, in the range 8..15.  window is a caller
+   supplied buffer of that size.  Except for special applications where it is
+   assured that deflate was used with small window sizes, windowBits must be 15
+   and a 32K byte window must be supplied to be able to decompress general
+   deflate streams.
+
+     See inflateBack() for the usage of these routines.
+
+     inflateBackInit will return Z_OK on success, Z_STREAM_ERROR if any of
+   the parameters are invalid, Z_MEM_ERROR if the internal state could not be
+   allocated, or Z_VERSION_ERROR if the version of the library does not match
+   the version of the header file.
+*/
+
+typedef unsigned (*in_func) OF((void FAR *,
+                                z_const unsigned char FAR * FAR *));
+typedef int (*out_func) OF((void FAR *, unsigned char FAR *, unsigned));
+
+ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm,
+                                    in_func in, void FAR *in_desc,
+                                    out_func out, void FAR *out_desc));
+/*
+     inflateBack() does a raw inflate with a single call using a call-back
+   interface for input and output.  This is potentially more efficient than
+   inflate() for file i/o applications, in that it avoids copying between the
+   output and the sliding window by simply making the window itself the output
+   buffer.  inflate() can be faster on modern CPUs when used with large
+   buffers.  inflateBack() trusts the application to not change the output
+   buffer passed by the output function, at least until inflateBack() returns.
+
+     inflateBackInit() must be called first to allocate the internal state
+   and to initialize the state with the user-provided window buffer.
+   inflateBack() may then be used multiple times to inflate a complete, raw
+   deflate stream with each call.  inflateBackEnd() is then called to free the
+   allocated state.
+
+     A raw deflate stream is one with no zlib or gzip header or trailer.
+   This routine would normally be used in a utility that reads zip or gzip
+   files and writes out uncompressed files.  The utility would decode the
+   header and process the trailer on its own, hence this routine expects only
+   the raw deflate stream to decompress.  This is different from the default
+   behavior of inflate(), which expects a zlib header and trailer around the
+   deflate stream.
+
+     inflateBack() uses two subroutines supplied by the caller that are then
+   called by inflateBack() for input and output.  inflateBack() calls those
+   routines until it reads a complete deflate stream and writes out all of the
+   uncompressed data, or until it encounters an error.  The function's
+   parameters and return types are defined above in the in_func and out_func
+   typedefs.  inflateBack() will call in(in_desc, &buf) which should return the
+   number of bytes of provided input, and a pointer to that input in buf.  If
+   there is no input available, in() must return zero -- buf is ignored in that
+   case -- and inflateBack() will return a buffer error.  inflateBack() will
+   call out(out_desc, buf, len) to write the uncompressed data buf[0..len-1].
+   out() should return zero on success, or non-zero on failure.  If out()
+   returns non-zero, inflateBack() will return with an error.  Neither in() nor
+   out() are permitted to change the contents of the window provided to
+   inflateBackInit(), which is also the buffer that out() uses to write from.
+   The length written by out() will be at most the window size.  Any non-zero
+   amount of input may be provided by in().
+
+     For convenience, inflateBack() can be provided input on the first call by
+   setting strm->next_in and strm->avail_in.  If that input is exhausted, then
+   in() will be called.  Therefore strm->next_in must be initialized before
+   calling inflateBack().  If strm->next_in is Z_NULL, then in() will be called
+   immediately for input.  If strm->next_in is not Z_NULL, then strm->avail_in
+   must also be initialized, and then if strm->avail_in is not zero, input will
+   initially be taken from strm->next_in[0 ..  strm->avail_in - 1].
+
+     The in_desc and out_desc parameters of inflateBack() is passed as the
+   first parameter of in() and out() respectively when they are called.  These
+   descriptors can be optionally used to pass any information that the caller-
+   supplied in() and out() functions need to do their job.
+
+     On return, inflateBack() will set strm->next_in and strm->avail_in to
+   pass back any unused input that was provided by the last in() call.  The
+   return values of inflateBack() can be Z_STREAM_END on success, Z_BUF_ERROR
+   if in() or out() returned an error, Z_DATA_ERROR if there was a format error
+   in the deflate stream (in which case strm->msg is set to indicate the nature
+   of the error), or Z_STREAM_ERROR if the stream was not properly initialized.
+   In the case of Z_BUF_ERROR, an input or output error can be distinguished
+   using strm->next_in which will be Z_NULL only if in() returned an error.  If
+   strm->next_in is not Z_NULL, then the Z_BUF_ERROR was due to out() returning
+   non-zero.  (in() will always be called before out(), so strm->next_in is
+   assured to be defined if out() returns non-zero.)  Note that inflateBack()
+   cannot return Z_OK.
+*/
+
+ZEXTERN int ZEXPORT inflateBackEnd OF((z_streamp strm));
+/*
+     All memory allocated by inflateBackInit() is freed.
+
+     inflateBackEnd() returns Z_OK on success, or Z_STREAM_ERROR if the stream
+   state was inconsistent.
+*/
+
+ZEXTERN uLong ZEXPORT zlibCompileFlags OF((void));
+/* Return flags indicating compile-time options.
+
+    Type sizes, two bits each, 00 = 16 bits, 01 = 32, 10 = 64, 11 = other:
+     1.0: size of uInt
+     3.2: size of uLong
+     5.4: size of voidpf (pointer)
+     7.6: size of z_off_t
+
+    Compiler, assembler, and debug options:
+     8: ZLIB_DEBUG
+     9: ASMV or ASMINF -- use ASM code
+     10: ZLIB_WINAPI -- exported functions use the WINAPI calling convention
+     11: 0 (reserved)
+
+    One-time table building (smaller code, but not thread-safe if true):
+     12: BUILDFIXED -- build static block decoding tables when needed
+     13: DYNAMIC_CRC_TABLE -- build CRC calculation tables when needed
+     14,15: 0 (reserved)
+
+    Library content (indicates missing functionality):
+     16: NO_GZCOMPRESS -- gz* functions cannot compress (to avoid linking
+                          deflate code when not needed)
+     17: NO_GZIP -- deflate can't write gzip streams, and inflate can't detect
+                    and decode gzip streams (to avoid linking crc code)
+     18-19: 0 (reserved)
+
+    Operation variations (changes in library functionality):
+     20: PKZIP_BUG_WORKAROUND -- slightly more permissive inflate
+     21: FASTEST -- deflate algorithm with only one, lowest compression level
+     22,23: 0 (reserved)
+
+    The sprintf variant used by gzprintf (zero is best):
+     24: 0 = vs*, 1 = s* -- 1 means limited to 20 arguments after the format
+     25: 0 = *nprintf, 1 = *printf -- 1 means gzprintf() not secure!
+     26: 0 = returns value, 1 = void -- 1 means inferred string length returned
+
+    Remainder:
+     27-31: 0 (reserved)
+ */
+
+#ifndef Z_SOLO
+
+                        /* utility functions */
+
+/*
+     The following utility functions are implemented on top of the basic
+   stream-oriented functions.  To simplify the interface, some default options
+   are assumed (compression level and memory usage, standard memory allocation
+   functions).  The source code of these utility functions can be modified if
+   you need special options.
+*/
+
+ZEXTERN int ZEXPORT compress OF((Bytef *dest,   uLongf *destLen,
+                                 const Bytef *source, uLong sourceLen));
+/*
+     Compresses the source buffer into the destination buffer.  sourceLen is
+   the byte length of the source buffer.  Upon entry, destLen is the total size
+   of the destination buffer, which must be at least the value returned by
+   compressBound(sourceLen).  Upon exit, destLen is the actual size of the
+   compressed data.  compress() is equivalent to compress2() with a level
+   parameter of Z_DEFAULT_COMPRESSION.
+
+     compress returns Z_OK if success, Z_MEM_ERROR if there was not
+   enough memory, Z_BUF_ERROR if there was not enough room in the output
+   buffer.
+*/
+
+ZEXTERN int ZEXPORT compress2 OF((Bytef *dest,   uLongf *destLen,
+                                  const Bytef *source, uLong sourceLen,
+                                  int level));
+/*
+     Compresses the source buffer into the destination buffer.  The level
+   parameter has the same meaning as in deflateInit.  sourceLen is the byte
+   length of the source buffer.  Upon entry, destLen is the total size of the
+   destination buffer, which must be at least the value returned by
+   compressBound(sourceLen).  Upon exit, destLen is the actual size of the
+   compressed data.
+
+     compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_BUF_ERROR if there was not enough room in the output buffer,
+   Z_STREAM_ERROR if the level parameter is invalid.
+*/
+
+ZEXTERN uLong ZEXPORT compressBound OF((uLong sourceLen));
+/*
+     compressBound() returns an upper bound on the compressed size after
+   compress() or compress2() on sourceLen bytes.  It would be used before a
+   compress() or compress2() call to allocate the destination buffer.
+*/
+
+ZEXTERN int ZEXPORT uncompress OF((Bytef *dest,   uLongf *destLen,
+                                   const Bytef *source, uLong sourceLen));
+/*
+     Decompresses the source buffer into the destination buffer.  sourceLen is
+   the byte length of the source buffer.  Upon entry, destLen is the total size
+   of the destination buffer, which must be large enough to hold the entire
+   uncompressed data.  (The size of the uncompressed data must have been saved
+   previously by the compressor and transmitted to the decompressor by some
+   mechanism outside the scope of this compression library.) Upon exit, destLen
+   is the actual size of the uncompressed data.
+
+     uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
+   enough memory, Z_BUF_ERROR if there was not enough room in the output
+   buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete.  In
+   the case where there is not enough room, uncompress() will fill the output
+   buffer with the uncompressed data up to that point.
+*/
+
+ZEXTERN int ZEXPORT uncompress2 OF((Bytef *dest,   uLongf *destLen,
+                                    const Bytef *source, uLong *sourceLen));
+/*
+     Same as uncompress, except that sourceLen is a pointer, where the
+   length of the source is *sourceLen.  On return, *sourceLen is the number of
+   source bytes consumed.
+*/
+
+                        /* gzip file access functions */
+
+/*
+     This library supports reading and writing files in gzip (.gz) format with
+   an interface similar to that of stdio, using the functions that start with
+   "gz".  The gzip format is different from the zlib format.  gzip is a gzip
+   wrapper, documented in RFC 1952, wrapped around a deflate stream.
+*/
+
+typedef struct gzFile_s *gzFile;    /* semi-opaque gzip file descriptor */
+
+/*
+ZEXTERN gzFile ZEXPORT gzopen OF((const char *path, const char *mode));
+
+     Opens a gzip (.gz) file for reading or writing.  The mode parameter is as
+   in fopen ("rb" or "wb") but can also include a compression level ("wb9") or
+   a strategy: 'f' for filtered data as in "wb6f", 'h' for Huffman-only
+   compression as in "wb1h", 'R' for run-length encoding as in "wb1R", or 'F'
+   for fixed code compression as in "wb9F".  (See the description of
+   deflateInit2 for more information about the strategy parameter.)  'T' will
+   request transparent writing or appending with no compression and not using
+   the gzip format.
+
+     "a" can be used instead of "w" to request that the gzip stream that will
+   be written be appended to the file.  "+" will result in an error, since
+   reading and writing to the same gzip file is not supported.  The addition of
+   "x" when writing will create the file exclusively, which fails if the file
+   already exists.  On systems that support it, the addition of "e" when
+   reading or writing will set the flag to close the file on an execve() call.
+
+     These functions, as well as gzip, will read and decode a sequence of gzip
+   streams in a file.  The append function of gzopen() can be used to create
+   such a file.  (Also see gzflush() for another way to do this.)  When
+   appending, gzopen does not test whether the file begins with a gzip stream,
+   nor does it look for the end of the gzip streams to begin appending.  gzopen
+   will simply append a gzip stream to the existing file.
+
+     gzopen can be used to read a file which is not in gzip format; in this
+   case gzread will directly read from the file without decompression.  When
+   reading, this will be detected automatically by looking for the magic two-
+   byte gzip header.
+
+     gzopen returns NULL if the file could not be opened, if there was
+   insufficient memory to allocate the gzFile state, or if an invalid mode was
+   specified (an 'r', 'w', or 'a' was not provided, or '+' was provided).
+   errno can be checked to determine if the reason gzopen failed was that the
+   file could not be opened.
+*/
+
+ZEXTERN gzFile ZEXPORT gzdopen OF((int fd, const char *mode));
+/*
+     gzdopen associates a gzFile with the file descriptor fd.  File descriptors
+   are obtained from calls like open, dup, creat, pipe or fileno (if the file
+   has been previously opened with fopen).  The mode parameter is as in gzopen.
+
+     The next call of gzclose on the returned gzFile will also close the file
+   descriptor fd, just like fclose(fdopen(fd, mode)) closes the file descriptor
+   fd.  If you want to keep fd open, use fd = dup(fd_keep); gz = gzdopen(fd,
+   mode);.  The duplicated descriptor should be saved to avoid a leak, since
+   gzdopen does not close fd if it fails.  If you are using fileno() to get the
+   file descriptor from a FILE *, then you will have to use dup() to avoid
+   double-close()ing the file descriptor.  Both gzclose() and fclose() will
+   close the associated file descriptor, so they need to have different file
+   descriptors.
+
+     gzdopen returns NULL if there was insufficient memory to allocate the
+   gzFile state, if an invalid mode was specified (an 'r', 'w', or 'a' was not
+   provided, or '+' was provided), or if fd is -1.  The file descriptor is not
+   used until the next gz* read, write, seek, or close operation, so gzdopen
+   will not detect if fd is invalid (unless fd is -1).
+*/
+
+ZEXTERN int ZEXPORT gzbuffer OF((gzFile file, unsigned size));
+/*
+     Set the internal buffer size used by this library's functions.  The
+   default buffer size is 8192 bytes.  This function must be called after
+   gzopen() or gzdopen(), and before any other calls that read or write the
+   file.  The buffer memory allocation is always deferred to the first read or
+   write.  Three times that size in buffer space is allocated.  A larger buffer
+   size of, for example, 64K or 128K bytes will noticeably increase the speed
+   of decompression (reading).
+
+     The new buffer size also affects the maximum length for gzprintf().
+
+     gzbuffer() returns 0 on success, or -1 on failure, such as being called
+   too late.
+*/
+
+ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy));
+/*
+     Dynamically update the compression level or strategy.  See the description
+   of deflateInit2 for the meaning of these parameters.  Previously provided
+   data is flushed before the parameter change.
+
+     gzsetparams returns Z_OK if success, Z_STREAM_ERROR if the file was not
+   opened for writing, Z_ERRNO if there is an error writing the flushed data,
+   or Z_MEM_ERROR if there is a memory allocation error.
+*/
+
+ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len));
+/*
+     Reads the given number of uncompressed bytes from the compressed file.  If
+   the input file is not in gzip format, gzread copies the given number of
+   bytes into the buffer directly from the file.
+
+     After reaching the end of a gzip stream in the input, gzread will continue
+   to read, looking for another gzip stream.  Any number of gzip streams may be
+   concatenated in the input file, and will all be decompressed by gzread().
+   If something other than a gzip stream is encountered after a gzip stream,
+   that remaining trailing garbage is ignored (and no error is returned).
+
+     gzread can be used to read a gzip file that is being concurrently written.
+   Upon reaching the end of the input, gzread will return with the available
+   data.  If the error code returned by gzerror is Z_OK or Z_BUF_ERROR, then
+   gzclearerr can be used to clear the end of file indicator in order to permit
+   gzread to be tried again.  Z_OK indicates that a gzip stream was completed
+   on the last gzread.  Z_BUF_ERROR indicates that the input file ended in the
+   middle of a gzip stream.  Note that gzread does not return -1 in the event
+   of an incomplete gzip stream.  This error is deferred until gzclose(), which
+   will return Z_BUF_ERROR if the last gzread ended in the middle of a gzip
+   stream.  Alternatively, gzerror can be used before gzclose to detect this
+   case.
+
+     gzread returns the number of uncompressed bytes actually read, less than
+   len for end of file, or -1 for error.  If len is too large to fit in an int,
+   then nothing is read, -1 is returned, and the error state is set to
+   Z_STREAM_ERROR.
+*/
+
+ZEXTERN z_size_t ZEXPORT gzfread OF((voidp buf, z_size_t size, z_size_t nitems,
+                                     gzFile file));
+/*
+     Read up to nitems items of size size from file to buf, otherwise operating
+   as gzread() does.  This duplicates the interface of stdio's fread(), with
+   size_t request and return types.  If the library defines size_t, then
+   z_size_t is identical to size_t.  If not, then z_size_t is an unsigned
+   integer type that can contain a pointer.
+
+     gzfread() returns the number of full items read of size size, or zero if
+   the end of the file was reached and a full item could not be read, or if
+   there was an error.  gzerror() must be consulted if zero is returned in
+   order to determine if there was an error.  If the multiplication of size and
+   nitems overflows, i.e. the product does not fit in a z_size_t, then nothing
+   is read, zero is returned, and the error state is set to Z_STREAM_ERROR.
+
+     In the event that the end of file is reached and only a partial item is
+   available at the end, i.e. the remaining uncompressed data length is not a
+   multiple of size, then the final partial item is nevetheless read into buf
+   and the end-of-file flag is set.  The length of the partial item read is not
+   provided, but could be inferred from the result of gztell().  This behavior
+   is the same as the behavior of fread() implementations in common libraries,
+   but it prevents the direct use of gzfread() to read a concurrently written
+   file, reseting and retrying on end-of-file, when size is not 1.
+*/
+
+ZEXTERN int ZEXPORT gzwrite OF((gzFile file,
+                                voidpc buf, unsigned len));
+/*
+     Writes the given number of uncompressed bytes into the compressed file.
+   gzwrite returns the number of uncompressed bytes written or 0 in case of
+   error.
+*/
+
+ZEXTERN z_size_t ZEXPORT gzfwrite OF((voidpc buf, z_size_t size,
+                                      z_size_t nitems, gzFile file));
+/*
+     gzfwrite() writes nitems items of size size from buf to file, duplicating
+   the interface of stdio's fwrite(), with size_t request and return types.  If
+   the library defines size_t, then z_size_t is identical to size_t.  If not,
+   then z_size_t is an unsigned integer type that can contain a pointer.
+
+     gzfwrite() returns the number of full items written of size size, or zero
+   if there was an error.  If the multiplication of size and nitems overflows,
+   i.e. the product does not fit in a z_size_t, then nothing is written, zero
+   is returned, and the error state is set to Z_STREAM_ERROR.
+*/
+
+ZEXTERN int ZEXPORTVA gzprintf Z_ARG((gzFile file, const char *format, ...));
+/*
+     Converts, formats, and writes the arguments to the compressed file under
+   control of the format string, as in fprintf.  gzprintf returns the number of
+   uncompressed bytes actually written, or a negative zlib error code in case
+   of error.  The number of uncompressed bytes written is limited to 8191, or
+   one less than the buffer size given to gzbuffer().  The caller should assure
+   that this limit is not exceeded.  If it is exceeded, then gzprintf() will
+   return an error (0) with nothing written.  In this case, there may also be a
+   buffer overflow with unpredictable consequences, which is possible only if
+   zlib was compiled with the insecure functions sprintf() or vsprintf()
+   because the secure snprintf() or vsnprintf() functions were not available.
+   This can be determined using zlibCompileFlags().
+*/
+
+ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s));
+/*
+     Writes the given null-terminated string to the compressed file, excluding
+   the terminating null character.
+
+     gzputs returns the number of characters written, or -1 in case of error.
+*/
+
+ZEXTERN char * ZEXPORT gzgets OF((gzFile file, char *buf, int len));
+/*
+     Reads bytes from the compressed file until len-1 characters are read, or a
+   newline character is read and transferred to buf, or an end-of-file
+   condition is encountered.  If any characters are read or if len == 1, the
+   string is terminated with a null character.  If no characters are read due
+   to an end-of-file or len < 1, then the buffer is left untouched.
+
+     gzgets returns buf which is a null-terminated string, or it returns NULL
+   for end-of-file or in case of error.  If there was an error, the contents at
+   buf are indeterminate.
+*/
+
+ZEXTERN int ZEXPORT gzputc OF((gzFile file, int c));
+/*
+     Writes c, converted to an unsigned char, into the compressed file.  gzputc
+   returns the value that was written, or -1 in case of error.
+*/
+
+ZEXTERN int ZEXPORT gzgetc OF((gzFile file));
+/*
+     Reads one byte from the compressed file.  gzgetc returns this byte or -1
+   in case of end of file or error.  This is implemented as a macro for speed.
+   As such, it does not do all of the checking the other functions do.  I.e.
+   it does not check to see if file is NULL, nor whether the structure file
+   points to has been clobbered or not.
+*/
+
+ZEXTERN int ZEXPORT gzungetc OF((int c, gzFile file));
+/*
+     Push one character back onto the stream to be read as the first character
+   on the next read.  At least one character of push-back is allowed.
+   gzungetc() returns the character pushed, or -1 on failure.  gzungetc() will
+   fail if c is -1, and may fail if a character has been pushed but not read
+   yet.  If gzungetc is used immediately after gzopen or gzdopen, at least the
+   output buffer size of pushed characters is allowed.  (See gzbuffer above.)
+   The pushed character will be discarded if the stream is repositioned with
+   gzseek() or gzrewind().
+*/
+
+ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush));
+/*
+     Flushes all pending output into the compressed file.  The parameter flush
+   is as in the deflate() function.  The return value is the zlib error number
+   (see function gzerror below).  gzflush is only permitted when writing.
+
+     If the flush parameter is Z_FINISH, the remaining data is written and the
+   gzip stream is completed in the output.  If gzwrite() is called again, a new
+   gzip stream will be started in the output.  gzread() is able to read such
+   concatenated gzip streams.
+
+     gzflush should be called only when strictly necessary because it will
+   degrade compression if called too often.
+*/
+
+/*
+ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile file,
+                                   z_off_t offset, int whence));
+
+     Sets the starting position for the next gzread or gzwrite on the given
+   compressed file.  The offset represents a number of bytes in the
+   uncompressed data stream.  The whence parameter is defined as in lseek(2);
+   the value SEEK_END is not supported.
+
+     If the file is opened for reading, this function is emulated but can be
+   extremely slow.  If the file is opened for writing, only forward seeks are
+   supported; gzseek then compresses a sequence of zeroes up to the new
+   starting position.
+
+     gzseek returns the resulting offset location as measured in bytes from
+   the beginning of the uncompressed stream, or -1 in case of error, in
+   particular if the file is opened for writing and the new starting position
+   would be before the current position.
+*/
+
+ZEXTERN int ZEXPORT    gzrewind OF((gzFile file));
+/*
+     Rewinds the given file. This function is supported only for reading.
+
+     gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET)
+*/
+
+/*
+ZEXTERN z_off_t ZEXPORT    gztell OF((gzFile file));
+
+     Returns the starting position for the next gzread or gzwrite on the given
+   compressed file.  This position represents a number of bytes in the
+   uncompressed data stream, and is zero when starting, even if appending or
+   reading a gzip stream from the middle of a file using gzdopen().
+
+     gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR)
+*/
+
+/*
+ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile file));
+
+     Returns the current offset in the file being read or written.  This offset
+   includes the count of bytes that precede the gzip stream, for example when
+   appending or when using gzdopen() for reading.  When reading, the offset
+   does not include as yet unused buffered input.  This information can be used
+   for a progress indicator.  On error, gzoffset() returns -1.
+*/
+
+ZEXTERN int ZEXPORT gzeof OF((gzFile file));
+/*
+     Returns true (1) if the end-of-file indicator has been set while reading,
+   false (0) otherwise.  Note that the end-of-file indicator is set only if the
+   read tried to go past the end of the input, but came up short.  Therefore,
+   just like feof(), gzeof() may return false even if there is no more data to
+   read, in the event that the last read request was for the exact number of
+   bytes remaining in the input file.  This will happen if the input file size
+   is an exact multiple of the buffer size.
+
+     If gzeof() returns true, then the read functions will return no more data,
+   unless the end-of-file indicator is reset by gzclearerr() and the input file
+   has grown since the previous end of file was detected.
+*/
+
+ZEXTERN int ZEXPORT gzdirect OF((gzFile file));
+/*
+     Returns true (1) if file is being copied directly while reading, or false
+   (0) if file is a gzip stream being decompressed.
+
+     If the input file is empty, gzdirect() will return true, since the input
+   does not contain a gzip stream.
+
+     If gzdirect() is used immediately after gzopen() or gzdopen() it will
+   cause buffers to be allocated to allow reading the file to determine if it
+   is a gzip file.  Therefore if gzbuffer() is used, it should be called before
+   gzdirect().
+
+     When writing, gzdirect() returns true (1) if transparent writing was
+   requested ("wT" for the gzopen() mode), or false (0) otherwise.  (Note:
+   gzdirect() is not needed when writing.  Transparent writing must be
+   explicitly requested, so the application already knows the answer.  When
+   linking statically, using gzdirect() will include all of the zlib code for
+   gzip file reading and decompression, which may not be desired.)
+*/
+
+ZEXTERN int ZEXPORT    gzclose OF((gzFile file));
+/*
+     Flushes all pending output if necessary, closes the compressed file and
+   deallocates the (de)compression state.  Note that once file is closed, you
+   cannot call gzerror with file, since its structures have been deallocated.
+   gzclose must not be called more than once on the same file, just as free
+   must not be called more than once on the same allocation.
+
+     gzclose will return Z_STREAM_ERROR if file is not valid, Z_ERRNO on a
+   file operation error, Z_MEM_ERROR if out of memory, Z_BUF_ERROR if the
+   last read ended in the middle of a gzip stream, or Z_OK on success.
+*/
+
+ZEXTERN int ZEXPORT gzclose_r OF((gzFile file));
+ZEXTERN int ZEXPORT gzclose_w OF((gzFile file));
+/*
+     Same as gzclose(), but gzclose_r() is only for use when reading, and
+   gzclose_w() is only for use when writing or appending.  The advantage to
+   using these instead of gzclose() is that they avoid linking in zlib
+   compression or decompression code that is not used when only reading or only
+   writing respectively.  If gzclose() is used, then both compression and
+   decompression code will be included the application when linking to a static
+   zlib library.
+*/
+
+ZEXTERN const char * ZEXPORT gzerror OF((gzFile file, int *errnum));
+/*
+     Returns the error message for the last error which occurred on the given
+   compressed file.  errnum is set to zlib error number.  If an error occurred
+   in the file system and not in the compression library, errnum is set to
+   Z_ERRNO and the application may consult errno to get the exact error code.
+
+     The application must not modify the returned string.  Future calls to
+   this function may invalidate the previously returned string.  If file is
+   closed, then the string previously returned by gzerror will no longer be
+   available.
+
+     gzerror() should be used to distinguish errors from end-of-file for those
+   functions above that do not distinguish those cases in their return values.
+*/
+
+ZEXTERN void ZEXPORT gzclearerr OF((gzFile file));
+/*
+     Clears the error and end-of-file flags for file.  This is analogous to the
+   clearerr() function in stdio.  This is useful for continuing to read a gzip
+   file that is being written concurrently.
+*/
+
+#endif /* !Z_SOLO */
+
+                        /* checksum functions */
+
+/*
+     These functions are not related to compression but are exported
+   anyway because they might be useful in applications using the compression
+   library.
+*/
+
+ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len));
+/*
+     Update a running Adler-32 checksum with the bytes buf[0..len-1] and
+   return the updated checksum.  If buf is Z_NULL, this function returns the
+   required initial value for the checksum.
+
+     An Adler-32 checksum is almost as reliable as a CRC-32 but can be computed
+   much faster.
+
+   Usage example:
+
+     uLong adler = adler32(0L, Z_NULL, 0);
+
+     while (read_buffer(buffer, length) != EOF) {
+       adler = adler32(adler, buffer, length);
+     }
+     if (adler != original_adler) error();
+*/
+
+ZEXTERN uLong ZEXPORT adler32_z OF((uLong adler, const Bytef *buf,
+                                    z_size_t len));
+/*
+     Same as adler32(), but with a size_t length.
+*/
+
+/*
+ZEXTERN uLong ZEXPORT adler32_combine OF((uLong adler1, uLong adler2,
+                                          z_off_t len2));
+
+     Combine two Adler-32 checksums into one.  For two sequences of bytes, seq1
+   and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for
+   each, adler1 and adler2.  adler32_combine() returns the Adler-32 checksum of
+   seq1 and seq2 concatenated, requiring only adler1, adler2, and len2.  Note
+   that the z_off_t type (like off_t) is a signed integer.  If len2 is
+   negative, the result has no meaning or utility.
+*/
+
+ZEXTERN uLong ZEXPORT crc32   OF((uLong crc, const Bytef *buf, uInt len));
+/*
+     Update a running CRC-32 with the bytes buf[0..len-1] and return the
+   updated CRC-32.  If buf is Z_NULL, this function returns the required
+   initial value for the crc.  Pre- and post-conditioning (one's complement) is
+   performed within this function so it shouldn't be done by the application.
+
+   Usage example:
+
+     uLong crc = crc32(0L, Z_NULL, 0);
+
+     while (read_buffer(buffer, length) != EOF) {
+       crc = crc32(crc, buffer, length);
+     }
+     if (crc != original_crc) error();
+*/
+
+ZEXTERN uLong ZEXPORT crc32_z OF((uLong adler, const Bytef *buf,
+                                  z_size_t len));
+/*
+     Same as crc32(), but with a size_t length.
+*/
+
+/*
+ZEXTERN uLong ZEXPORT crc32_combine OF((uLong crc1, uLong crc2, z_off_t len2));
+
+     Combine two CRC-32 check values into one.  For two sequences of bytes,
+   seq1 and seq2 with lengths len1 and len2, CRC-32 check values were
+   calculated for each, crc1 and crc2.  crc32_combine() returns the CRC-32
+   check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and
+   len2.
+*/
+
+
+                        /* various hacks, don't look :) */
+
+/* deflateInit and inflateInit are macros to allow checking the zlib version
+ * and the compiler's view of z_stream:
+ */
+ZEXTERN int ZEXPORT deflateInit_ OF((z_streamp strm, int level,
+                                     const char *version, int stream_size));
+ZEXTERN int ZEXPORT inflateInit_ OF((z_streamp strm,
+                                     const char *version, int stream_size));
+ZEXTERN int ZEXPORT deflateInit2_ OF((z_streamp strm, int  level, int  method,
+                                      int windowBits, int memLevel,
+                                      int strategy, const char *version,
+                                      int stream_size));
+ZEXTERN int ZEXPORT inflateInit2_ OF((z_streamp strm, int  windowBits,
+                                      const char *version, int stream_size));
+ZEXTERN int ZEXPORT inflateBackInit_ OF((z_streamp strm, int windowBits,
+                                         unsigned char FAR *window,
+                                         const char *version,
+                                         int stream_size));
+#ifdef Z_PREFIX_SET
+#  define z_deflateInit(strm, level) \
+          deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream))
+#  define z_inflateInit(strm) \
+          inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream))
+#  define z_deflateInit2(strm, level, method, windowBits, memLevel, strategy) \
+          deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\
+                        (strategy), ZLIB_VERSION, (int)sizeof(z_stream))
+#  define z_inflateInit2(strm, windowBits) \
+          inflateInit2_((strm), (windowBits), ZLIB_VERSION, \
+                        (int)sizeof(z_stream))
+#  define z_inflateBackInit(strm, windowBits, window) \
+          inflateBackInit_((strm), (windowBits), (window), \
+                           ZLIB_VERSION, (int)sizeof(z_stream))
+#else
+#  define deflateInit(strm, level) \
+          deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream))
+#  define inflateInit(strm) \
+          inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream))
+#  define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \
+          deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\
+                        (strategy), ZLIB_VERSION, (int)sizeof(z_stream))
+#  define inflateInit2(strm, windowBits) \
+          inflateInit2_((strm), (windowBits), ZLIB_VERSION, \
+                        (int)sizeof(z_stream))
+#  define inflateBackInit(strm, windowBits, window) \
+          inflateBackInit_((strm), (windowBits), (window), \
+                           ZLIB_VERSION, (int)sizeof(z_stream))
+#endif
+
+#ifndef Z_SOLO
+
+/* gzgetc() macro and its supporting function and exposed data structure.  Note
+ * that the real internal state is much larger than the exposed structure.
+ * This abbreviated structure exposes just enough for the gzgetc() macro.  The
+ * user should not mess with these exposed elements, since their names or
+ * behavior could change in the future, perhaps even capriciously.  They can
+ * only be used by the gzgetc() macro.  You have been warned.
+ */
+struct gzFile_s {
+    unsigned have;
+    unsigned char *next;
+    z_off64_t pos;
+};
+ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file));  /* backward compatibility */
+#ifdef Z_PREFIX_SET
+#  undef z_gzgetc
+#  define z_gzgetc(g) \
+          ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : (gzgetc)(g))
+#else
+#  define gzgetc(g) \
+          ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : (gzgetc)(g))
+#endif
+
+/* provide 64-bit offset functions if _LARGEFILE64_SOURCE defined, and/or
+ * change the regular functions to 64 bits if _FILE_OFFSET_BITS is 64 (if
+ * both are true, the application gets the *64 functions, and the regular
+ * functions are changed to 64 bits) -- in case these are set on systems
+ * without large file support, _LFS64_LARGEFILE must also be true
+ */
+#ifdef Z_LARGE64
+   ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *));
+   ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int));
+   ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile));
+   ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile));
+   ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off64_t));
+   ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off64_t));
+#endif
+
+#if !defined(ZLIB_INTERNAL) && defined(Z_WANT64)
+#  ifdef Z_PREFIX_SET
+#    define z_gzopen z_gzopen64
+#    define z_gzseek z_gzseek64
+#    define z_gztell z_gztell64
+#    define z_gzoffset z_gzoffset64
+#    define z_adler32_combine z_adler32_combine64
+#    define z_crc32_combine z_crc32_combine64
+#  else
+#    define gzopen gzopen64
+#    define gzseek gzseek64
+#    define gztell gztell64
+#    define gzoffset gzoffset64
+#    define adler32_combine adler32_combine64
+#    define crc32_combine crc32_combine64
+#  endif
+#  ifndef Z_LARGE64
+     ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *));
+     ZEXTERN z_off_t ZEXPORT gzseek64 OF((gzFile, z_off_t, int));
+     ZEXTERN z_off_t ZEXPORT gztell64 OF((gzFile));
+     ZEXTERN z_off_t ZEXPORT gzoffset64 OF((gzFile));
+     ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t));
+     ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t));
+#  endif
+#else
+   ZEXTERN gzFile ZEXPORT gzopen OF((const char *, const char *));
+   ZEXTERN z_off_t ZEXPORT gzseek OF((gzFile, z_off_t, int));
+   ZEXTERN z_off_t ZEXPORT gztell OF((gzFile));
+   ZEXTERN z_off_t ZEXPORT gzoffset OF((gzFile));
+   ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t));
+   ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t));
+#endif
+
+#else /* Z_SOLO */
+
+   ZEXTERN uLong ZEXPORT adler32_combine OF((uLong, uLong, z_off_t));
+   ZEXTERN uLong ZEXPORT crc32_combine OF((uLong, uLong, z_off_t));
+
+#endif /* !Z_SOLO */
+
+/* undocumented functions */
+ZEXTERN const char   * ZEXPORT zError           OF((int));
+ZEXTERN int            ZEXPORT inflateSyncPoint OF((z_streamp));
+ZEXTERN const z_crc_t FAR * ZEXPORT get_crc_table    OF((void));
+ZEXTERN int            ZEXPORT inflateUndermine OF((z_streamp, int));
+ZEXTERN int            ZEXPORT inflateValidate OF((z_streamp, int));
+ZEXTERN unsigned long  ZEXPORT inflateCodesUsed OF ((z_streamp));
+ZEXTERN int            ZEXPORT inflateResetKeep OF((z_streamp));
+ZEXTERN int            ZEXPORT deflateResetKeep OF((z_streamp));
+#if (defined(_WIN32) || defined(__CYGWIN__)) && !defined(Z_SOLO)
+ZEXTERN gzFile         ZEXPORT gzopen_w OF((const wchar_t *path,
+                                            const char *mode));
+#endif
+#if defined(STDC) || defined(Z_HAVE_STDARG_H)
+#  ifndef Z_SOLO
+ZEXTERN int            ZEXPORTVA gzvprintf Z_ARG((gzFile file,
+                                                  const char *format,
+                                                  va_list va));
+#  endif
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ZLIB_H */
diff --git a/deps/libchdr/deps/zlib-1.2.11/zlib.map b/deps/libchdr/deps/zlib-1.2.11/zlib.map
new file mode 100644
index 00000000..40fa9db2
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/zlib.map
@@ -0,0 +1,94 @@
+ZLIB_1.2.0 {
+  global:
+    compressBound;
+    deflateBound;
+    inflateBack;
+    inflateBackEnd;
+    inflateBackInit_;
+    inflateCopy;
+  local:
+    deflate_copyright;
+    inflate_copyright;
+    inflate_fast;
+    inflate_table;
+    zcalloc;
+    zcfree;
+    z_errmsg;
+    gz_error;
+    gz_intmax;
+    _*;
+};
+
+ZLIB_1.2.0.2 {
+    gzclearerr;
+    gzungetc;
+    zlibCompileFlags;
+} ZLIB_1.2.0;
+
+ZLIB_1.2.0.8 {
+    deflatePrime;
+} ZLIB_1.2.0.2;
+
+ZLIB_1.2.2 {
+    adler32_combine;
+    crc32_combine;
+    deflateSetHeader;
+    inflateGetHeader;
+} ZLIB_1.2.0.8;
+
+ZLIB_1.2.2.3 {
+    deflateTune;
+    gzdirect;
+} ZLIB_1.2.2;
+
+ZLIB_1.2.2.4 {
+    inflatePrime;
+} ZLIB_1.2.2.3;
+
+ZLIB_1.2.3.3 {
+    adler32_combine64;
+    crc32_combine64;
+    gzopen64;
+    gzseek64;
+    gztell64;
+    inflateUndermine;
+} ZLIB_1.2.2.4;
+
+ZLIB_1.2.3.4 {
+    inflateReset2;
+    inflateMark;
+} ZLIB_1.2.3.3;
+
+ZLIB_1.2.3.5 {
+    gzbuffer;
+    gzoffset;
+    gzoffset64;
+    gzclose_r;
+    gzclose_w;
+} ZLIB_1.2.3.4;
+
+ZLIB_1.2.5.1 {
+    deflatePending;
+} ZLIB_1.2.3.5;
+
+ZLIB_1.2.5.2 {
+    deflateResetKeep;
+    gzgetc_;
+    inflateResetKeep;
+} ZLIB_1.2.5.1;
+
+ZLIB_1.2.7.1 {
+    inflateGetDictionary;
+    gzvprintf;
+} ZLIB_1.2.5.2;
+
+ZLIB_1.2.9 {
+    inflateCodesUsed;
+    inflateValidate;
+    uncompress2;
+    gzfread;
+    gzfwrite;
+    deflateGetDictionary;
+    adler32_z;
+    crc32_z;
+} ZLIB_1.2.7.1;
diff --git a/deps/libchdr/deps/zlib-1.2.11/zlib.pc.cmakein b/deps/libchdr/deps/zlib-1.2.11/zlib.pc.cmakein
new file mode 100644
index 00000000..a5e64293
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/zlib.pc.cmakein
@@ -0,0 +1,13 @@
+prefix=@CMAKE_INSTALL_PREFIX@
+exec_prefix=@CMAKE_INSTALL_PREFIX@
+libdir=@INSTALL_LIB_DIR@
+sharedlibdir=@INSTALL_LIB_DIR@
+includedir=@INSTALL_INC_DIR@
+
+Name: zlib
+Description: zlib compression library
+Version: @VERSION@
+
+Requires:
+Libs: -L${libdir} -L${sharedlibdir} -lz
+Cflags: -I${includedir}
diff --git a/deps/libchdr/deps/zlib-1.2.11/zlib.pc.in b/deps/libchdr/deps/zlib-1.2.11/zlib.pc.in
new file mode 100644
index 00000000..7e5acf9c
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/zlib.pc.in
@@ -0,0 +1,13 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+sharedlibdir=@sharedlibdir@
+includedir=@includedir@
+
+Name: zlib
+Description: zlib compression library
+Version: @VERSION@
+
+Requires:
+Libs: -L${libdir} -L${sharedlibdir} -lz
+Cflags: -I${includedir}
diff --git a/deps/libchdr/deps/zlib-1.2.11/zlib2ansi b/deps/libchdr/deps/zlib-1.2.11/zlib2ansi
new file mode 100755
index 00000000..15e3e165
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/zlib2ansi
@@ -0,0 +1,152 @@
+#!/usr/bin/perl
+
+# Transform K&R C function definitions into ANSI equivalent.
+#
+# Author: Paul Marquess
+# Version: 1.0
+# Date: 3 October 2006
+
+# TODO
+#
+# Asumes no function pointer parameters. unless they are typedefed.
+# Assumes no literal strings that look like function definitions
+# Assumes functions start at the beginning of a line
+
+use strict;
+use warnings;
+
+local $/;
+$_ = <>;
+
+my $sp = qr{ \s* (?: /\* .*? \*/ )? \s* }x; # assume no nested comments
+
+my $d1    = qr{ $sp (?: [\w\*\s]+ $sp)* $sp \w+ $sp [\[\]\s]* $sp }x ;
+my $decl  = qr{ $sp (?: \w+ $sp )+ $d1 }xo ;
+my $dList = qr{ $sp $decl (?: $sp , $d1 )* $sp ; $sp }xo ;
+
+
+while (s/^
+            (                  # Start $1
+                (              #   Start $2
+                    .*?        #     Minimal eat content
+                    ( ^ \w [\w\s\*]+ )    #     $3 -- function name
+                    \s*        #     optional whitespace
+                )              # $2 - Matched up to before parameter list
+
+                \( \s*         # Literal "(" + optional whitespace
+                ( [^\)]+ )     # $4 - one or more anythings except ")"
+                \s* \)         # optional whitespace surrounding a Literal ")"
+
+                ( (?: $dList )+ ) # $5
+
+                $sp ^ {        # literal "{" at start of line
+            )                  # Remember to $1
+        //xsom
+      )
+{
+    my $all = $1 ;
+    my $prefix = $2;
+    my $param_list = $4 ;
+    my $params = $5;
+
+    StripComments($params);
+    StripComments($param_list);
+    $param_list =~ s/^\s+//;
+    $param_list =~ s/\s+$//;
+
+    my $i = 0 ;
+    my %pList = map { $_ => $i++ }
+                split /\s*,\s*/, $param_list;
+    my $pMatch = '(\b' . join('|', keys %pList) . '\b)\W*$' ;
+
+    my @params = split /\s*;\s*/, $params;
+    my @outParams = ();
+    foreach my $p (@params)
+    {
+        if ($p =~ /,/)
+        {
+            my @bits = split /\s*,\s*/, $p;
+            my $first = shift @bits;
+            $first =~ s/^\s*//;
+            push @outParams, $first;
+            $first =~ /^(\w+\s*)/;
+            my $type = $1 ;
+            push @outParams, map { $type . $_ } @bits;
+        }
+        else
+        {
+            $p =~ s/^\s+//;
+            push @outParams, $p;
+        }
+    }
+
+
+    my %tmp = map { /$pMatch/;  $_ => $pList{$1}  }
+              @outParams ;
+
+    @outParams = map  { "    $_" }
+                 sort { $tmp{$a} <=> $tmp{$b} }
+                 @outParams ;
+
+    print $prefix ;
+    print "(\n" . join(",\n", @outParams) . ")\n";
+    print "{" ;
+
+}
+
+# Output any trailing code.
+print ;
+exit 0;
+
+
+sub StripComments
+{
+
+  no warnings;
+
+  # Strip C & C++ coments
+  # From the perlfaq
+  $_[0] =~
+
+    s{
+       /\*         ##  Start of /* ... */ comment
+       [^*]*\*+    ##  Non-* followed by 1-or-more *'s
+       (
+         [^/*][^*]*\*+
+       )*          ##  0-or-more things which don't start with /
+                   ##    but do end with '*'
+       /           ##  End of /* ... */ comment
+
+     |         ##     OR  C++ Comment
+       //          ## Start of C++ comment //
+       [^\n]*      ## followed by 0-or-more non end of line characters
+
+     |         ##     OR  various things which aren't comments:
+
+       (
+         "           ##  Start of " ... " string
+         (
+           \\.           ##  Escaped char
+         |               ##    OR
+           [^"\\]        ##  Non "\
+         )*
+         "           ##  End of " ... " string
+
+       |         ##     OR
+
+         '           ##  Start of ' ... ' string
+         (
+           \\.           ##  Escaped char
+         |               ##    OR
+           [^'\\]        ##  Non '\
+         )*
+         '           ##  End of ' ... ' string
+
+       |         ##     OR
+
+         .           ##  Anything other char
+         [^/"'\\]*   ##  Chars which doesn't start a comment, string or escape
+       )
+     }{$2}gxs;
+
+}
diff --git a/deps/libchdr/deps/zlib-1.2.11/zutil.c b/deps/libchdr/deps/zlib-1.2.11/zutil.c
new file mode 100644
index 00000000..a76c6b0c
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/zutil.c
@@ -0,0 +1,325 @@
+/* zutil.c -- target dependent utility functions for the compression library
+ * Copyright (C) 1995-2017 Jean-loup Gailly
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* @(#) $Id$ */
+
+#include "zutil.h"
+#ifndef Z_SOLO
+#  include "gzguts.h"
+#endif
+
+z_const char * const z_errmsg[10] = {
+    (z_const char *)"need dictionary",     /* Z_NEED_DICT       2  */
+    (z_const char *)"stream end",          /* Z_STREAM_END      1  */
+    (z_const char *)"",                    /* Z_OK              0  */
+    (z_const char *)"file error",          /* Z_ERRNO         (-1) */
+    (z_const char *)"stream error",        /* Z_STREAM_ERROR  (-2) */
+    (z_const char *)"data error",          /* Z_DATA_ERROR    (-3) */
+    (z_const char *)"insufficient memory", /* Z_MEM_ERROR     (-4) */
+    (z_const char *)"buffer error",        /* Z_BUF_ERROR     (-5) */
+    (z_const char *)"incompatible version",/* Z_VERSION_ERROR (-6) */
+    (z_const char *)""
+};
+
+
+const char * ZEXPORT zlibVersion()
+{
+    return ZLIB_VERSION;
+}
+
+uLong ZEXPORT zlibCompileFlags()
+{
+    uLong flags;
+
+    flags = 0;
+    switch ((int)(sizeof(uInt))) {
+    case 2:     break;
+    case 4:     flags += 1;     break;
+    case 8:     flags += 2;     break;
+    default:    flags += 3;
+    }
+    switch ((int)(sizeof(uLong))) {
+    case 2:     break;
+    case 4:     flags += 1 << 2;        break;
+    case 8:     flags += 2 << 2;        break;
+    default:    flags += 3 << 2;
+    }
+    switch ((int)(sizeof(voidpf))) {
+    case 2:     break;
+    case 4:     flags += 1 << 4;        break;
+    case 8:     flags += 2 << 4;        break;
+    default:    flags += 3 << 4;
+    }
+    switch ((int)(sizeof(z_off_t))) {
+    case 2:     break;
+    case 4:     flags += 1 << 6;        break;
+    case 8:     flags += 2 << 6;        break;
+    default:    flags += 3 << 6;
+    }
+#ifdef ZLIB_DEBUG
+    flags += 1 << 8;
+#endif
+#if defined(ASMV) || defined(ASMINF)
+    flags += 1 << 9;
+#endif
+#ifdef ZLIB_WINAPI
+    flags += 1 << 10;
+#endif
+#ifdef BUILDFIXED
+    flags += 1 << 12;
+#endif
+#ifdef DYNAMIC_CRC_TABLE
+    flags += 1 << 13;
+#endif
+#ifdef NO_GZCOMPRESS
+    flags += 1L << 16;
+#endif
+#ifdef NO_GZIP
+    flags += 1L << 17;
+#endif
+#ifdef PKZIP_BUG_WORKAROUND
+    flags += 1L << 20;
+#endif
+#ifdef FASTEST
+    flags += 1L << 21;
+#endif
+#if defined(STDC) || defined(Z_HAVE_STDARG_H)
+#  ifdef NO_vsnprintf
+    flags += 1L << 25;
+#    ifdef HAS_vsprintf_void
+    flags += 1L << 26;
+#    endif
+#  else
+#    ifdef HAS_vsnprintf_void
+    flags += 1L << 26;
+#    endif
+#  endif
+#else
+    flags += 1L << 24;
+#  ifdef NO_snprintf
+    flags += 1L << 25;
+#    ifdef HAS_sprintf_void
+    flags += 1L << 26;
+#    endif
+#  else
+#    ifdef HAS_snprintf_void
+    flags += 1L << 26;
+#    endif
+#  endif
+#endif
+    return flags;
+}
+
+#ifdef ZLIB_DEBUG
+#include <stdlib.h>
+#  ifndef verbose
+#    define verbose 0
+#  endif
+int ZLIB_INTERNAL z_verbose = verbose;
+
+void ZLIB_INTERNAL z_error (m)
+    char *m;
+{
+    fprintf(stderr, "%s\n", m);
+    exit(1);
+}
+#endif
+
+/* exported to allow conversion of error code to string for compress() and
+ * uncompress()
+ */
+const char * ZEXPORT zError(err)
+    int err;
+{
+    return ERR_MSG(err);
+}
+
+#if defined(_WIN32_WCE)
+    /* The Microsoft C Run-Time Library for Windows CE doesn't have
+     * errno.  We define it as a global variable to simplify porting.
+     * Its value is always 0 and should not be used.
+     */
+    int errno = 0;
+#endif
+
+#ifndef HAVE_MEMCPY
+
+void ZLIB_INTERNAL zmemcpy(dest, source, len)
+    Bytef* dest;
+    const Bytef* source;
+    uInt  len;
+{
+    if (len == 0) return;
+    do {
+        *dest++ = *source++; /* ??? to be unrolled */
+    } while (--len != 0);
+}
+
+int ZLIB_INTERNAL zmemcmp(s1, s2, len)
+    const Bytef* s1;
+    const Bytef* s2;
+    uInt  len;
+{
+    uInt j;
+
+    for (j = 0; j < len; j++) {
+        if (s1[j] != s2[j]) return 2*(s1[j] > s2[j])-1;
+    }
+    return 0;
+}
+
+void ZLIB_INTERNAL zmemzero(dest, len)
+    Bytef* dest;
+    uInt  len;
+{
+    if (len == 0) return;
+    do {
+        *dest++ = 0;  /* ??? to be unrolled */
+    } while (--len != 0);
+}
+#endif
+
+#ifndef Z_SOLO
+
+#ifdef SYS16BIT
+
+#ifdef __TURBOC__
+/* Turbo C in 16-bit mode */
+
+#  define MY_ZCALLOC
+
+/* Turbo C malloc() does not allow dynamic allocation of 64K bytes
+ * and farmalloc(64K) returns a pointer with an offset of 8, so we
+ * must fix the pointer. Warning: the pointer must be put back to its
+ * original form in order to free it, use zcfree().
+ */
+
+#define MAX_PTR 10
+/* 10*64K = 640K */
+
+local int next_ptr = 0;
+
+typedef struct ptr_table_s {
+    voidpf org_ptr;
+    voidpf new_ptr;
+} ptr_table;
+
+local ptr_table table[MAX_PTR];
+/* This table is used to remember the original form of pointers
+ * to large buffers (64K). Such pointers are normalized with a zero offset.
+ * Since MSDOS is not a preemptive multitasking OS, this table is not
+ * protected from concurrent access. This hack doesn't work anyway on
+ * a protected system like OS/2. Use Microsoft C instead.
+ */
+
+voidpf ZLIB_INTERNAL zcalloc (voidpf opaque, unsigned items, unsigned size)
+{
+    voidpf buf;
+    ulg bsize = (ulg)items*size;
+
+    (void)opaque;
+
+    /* If we allocate less than 65520 bytes, we assume that farmalloc
+     * will return a usable pointer which doesn't have to be normalized.
+     */
+    if (bsize < 65520L) {
+        buf = farmalloc(bsize);
+        if (*(ush*)&buf != 0) return buf;
+    } else {
+        buf = farmalloc(bsize + 16L);
+    }
+    if (buf == NULL || next_ptr >= MAX_PTR) return NULL;
+    table[next_ptr].org_ptr = buf;
+
+    /* Normalize the pointer to seg:0 */
+    *((ush*)&buf+1) += ((ush)((uch*)buf-0) + 15) >> 4;
+    *(ush*)&buf = 0;
+    table[next_ptr++].new_ptr = buf;
+    return buf;
+}
+
+void ZLIB_INTERNAL zcfree (voidpf opaque, voidpf ptr)
+{
+    int n;
+
+    (void)opaque;
+
+    if (*(ush*)&ptr != 0) { /* object < 64K */
+        farfree(ptr);
+        return;
+    }
+    /* Find the original pointer */
+    for (n = 0; n < next_ptr; n++) {
+        if (ptr != table[n].new_ptr) continue;
+
+        farfree(table[n].org_ptr);
+        while (++n < next_ptr) {
+            table[n-1] = table[n];
+        }
+        next_ptr--;
+        return;
+    }
+    Assert(0, "zcfree: ptr not found");
+}
+
+#endif /* __TURBOC__ */
+
+
+#ifdef M_I86
+/* Microsoft C in 16-bit mode */
+
+#  define MY_ZCALLOC
+
+#if (!defined(_MSC_VER) || (_MSC_VER <= 600))
+#  define _halloc  halloc
+#  define _hfree   hfree
+#endif
+
+voidpf ZLIB_INTERNAL zcalloc (voidpf opaque, uInt items, uInt size)
+{
+    (void)opaque;
+    return _halloc((long)items, size);
+}
+
+void ZLIB_INTERNAL zcfree (voidpf opaque, voidpf ptr)
+{
+    (void)opaque;
+    _hfree(ptr);
+}
+
+#endif /* M_I86 */
+
+#endif /* SYS16BIT */
+
+
+#ifndef MY_ZCALLOC /* Any system without a special alloc function */
+
+#ifndef STDC
+extern voidp  malloc OF((uInt size));
+extern voidp  calloc OF((uInt items, uInt size));
+extern void   free   OF((voidpf ptr));
+#endif
+
+voidpf ZLIB_INTERNAL zcalloc (opaque, items, size)
+    voidpf opaque;
+    unsigned items;
+    unsigned size;
+{
+    (void)opaque;
+    return sizeof(uInt) > 2 ? (voidpf)malloc(items * size) :
+                              (voidpf)calloc(items, size);
+}
+
+void ZLIB_INTERNAL zcfree (opaque, ptr)
+    voidpf opaque;
+    voidpf ptr;
+{
+    (void)opaque;
+    free(ptr);
+}
+
+#endif /* MY_ZCALLOC */
+
+#endif /* !Z_SOLO */
diff --git a/deps/libchdr/deps/zlib-1.2.11/zutil.h b/deps/libchdr/deps/zlib-1.2.11/zutil.h
new file mode 100644
index 00000000..b079ea6a
--- /dev/null
+++ b/deps/libchdr/deps/zlib-1.2.11/zutil.h
@@ -0,0 +1,271 @@
+/* zutil.h -- internal interface and configuration of the compression library
+ * Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+   part of the implementation of the compression library and is
+   subject to change. Applications should only use zlib.h.
+ */
+
+/* @(#) $Id$ */
+
+#ifndef ZUTIL_H
+#define ZUTIL_H
+
+#ifdef HAVE_HIDDEN
+#  define ZLIB_INTERNAL __attribute__((visibility ("hidden")))
+#else
+#  define ZLIB_INTERNAL
+#endif
+
+#include "zlib.h"
+
+#if defined(STDC) && !defined(Z_SOLO)
+#  if !(defined(_WIN32_WCE) && defined(_MSC_VER))
+#    include <stddef.h>
+#  endif
+#  include <string.h>
+#  include <stdlib.h>
+#endif
+
+#ifdef Z_SOLO
+   typedef long ptrdiff_t;  /* guess -- will be caught if guess is wrong */
+#endif
+
+#ifndef local
+#  define local static
+#endif
+/* since "static" is used to mean two completely different things in C, we
+   define "local" for the non-static meaning of "static", for readability
+   (compile with -Dlocal if your debugger can't find static symbols) */
+
+typedef unsigned char  uch;
+typedef uch FAR uchf;
+typedef unsigned short ush;
+typedef ush FAR ushf;
+typedef unsigned long  ulg;
+
+extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
+/* (size given to avoid silly warnings with Visual C++) */
+
+#define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)]
+
+#define ERR_RETURN(strm,err) \
+  return (strm->msg = ERR_MSG(err), (err))
+/* To be used only when the state is known to be valid */
+
+        /* common constants */
+
+#ifndef DEF_WBITS
+#  define DEF_WBITS MAX_WBITS
+#endif
+/* default windowBits for decompression. MAX_WBITS is for compression only */
+
+#if MAX_MEM_LEVEL >= 8
+#  define DEF_MEM_LEVEL 8
+#else
+#  define DEF_MEM_LEVEL  MAX_MEM_LEVEL
+#endif
+/* default memLevel */
+
+#define STORED_BLOCK 0
+#define STATIC_TREES 1
+#define DYN_TREES    2
+/* The three kinds of block type */
+
+#define MIN_MATCH  3
+#define MAX_MATCH  258
+/* The minimum and maximum match lengths */
+
+#define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */
+
+        /* target dependencies */
+
+#if defined(MSDOS) || (defined(WINDOWS) && !defined(WIN32))
+#  define OS_CODE  0x00
+#  ifndef Z_SOLO
+#    if defined(__TURBOC__) || defined(__BORLANDC__)
+#      if (__STDC__ == 1) && (defined(__LARGE__) || defined(__COMPACT__))
+         /* Allow compilation with ANSI keywords only enabled */
+         void _Cdecl farfree( void *block );
+         void *_Cdecl farmalloc( unsigned long nbytes );
+#      else
+#        include <alloc.h>
+#      endif
+#    else /* MSC or DJGPP */
+#      include <malloc.h>
+#    endif
+#  endif
+#endif
+
+#ifdef AMIGA
+#  define OS_CODE  1
+#endif
+
+#if defined(VAXC) || defined(VMS)
+#  define OS_CODE  2
+#  define F_OPEN(name, mode) \
+     fopen((name), (mode), "mbc=60", "ctx=stm", "rfm=fix", "mrs=512")
+#endif
+
+#ifdef __370__
+#  if __TARGET_LIB__ < 0x20000000
+#    define OS_CODE 4
+#  elif __TARGET_LIB__ < 0x40000000
+#    define OS_CODE 11
+#  else
+#    define OS_CODE 8
+#  endif
+#endif
+
+#if defined(ATARI) || defined(atarist)
+#  define OS_CODE  5
+#endif
+
+#ifdef OS2
+#  define OS_CODE  6
+#  if defined(M_I86) && !defined(Z_SOLO)
+#    include <malloc.h>
+#  endif
+#endif
+
+#if defined(MACOS) || defined(TARGET_OS_MAC)
+#  define OS_CODE  7
+#  ifndef Z_SOLO
+#    if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os
+#      include <unix.h> /* for fdopen */
+#    else
+#      ifndef fdopen
+#        define fdopen(fd,mode) NULL /* No fdopen() */
+#      endif
+#    endif
+#  endif
+#endif
+
+#ifdef __acorn
+#  define OS_CODE 13
+#endif
+
+#if defined(WIN32) && !defined(__CYGWIN__)
+#  define OS_CODE  10
+#endif
+
+#ifdef _BEOS_
+#  define OS_CODE  16
+#endif
+
+#ifdef __TOS_OS400__
+#  define OS_CODE 18
+#endif
+
+#ifdef __APPLE__
+#  define OS_CODE 19
+#endif
+
+#if defined(_BEOS_) || defined(RISCOS)
+#  define fdopen(fd,mode) NULL /* No fdopen() */
+#endif
+
+#if (defined(_MSC_VER) && (_MSC_VER > 600)) && !defined __INTERIX
+#  if defined(_WIN32_WCE)
+#    define fdopen(fd,mode) NULL /* No fdopen() */
+#    ifndef _PTRDIFF_T_DEFINED
+       typedef int ptrdiff_t;
+#      define _PTRDIFF_T_DEFINED
+#    endif
+#  else
+#    define fdopen(fd,type)  _fdopen(fd,type)
+#  endif
+#endif
+
+#if defined(__BORLANDC__) && !defined(MSDOS)
+  #pragma warn -8004
+  #pragma warn -8008
+  #pragma warn -8066
+#endif
+
+/* provide prototypes for these when building zlib without LFS */
+#if !defined(_WIN32) && \
+    (!defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0)
+    ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t));
+    ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t));
+#endif
+
+        /* common defaults */
+
+#ifndef OS_CODE
+#  define OS_CODE  3     /* assume Unix */
+#endif
+
+#ifndef F_OPEN
+#  define F_OPEN(name, mode) fopen((name), (mode))
+#endif
+
+         /* functions */
+
+#if defined(pyr) || defined(Z_SOLO)
+#  define NO_MEMCPY
+#endif
+#if defined(SMALL_MEDIUM) && !defined(_MSC_VER) && !defined(__SC__)
+ /* Use our own functions for small and medium model with MSC <= 5.0.
+  * You may have to use the same strategy for Borland C (untested).
+  * The __SC__ check is for Symantec.
+  */
+#  define NO_MEMCPY
+#endif
+#if defined(STDC) && !defined(HAVE_MEMCPY) && !defined(NO_MEMCPY)
+#  define HAVE_MEMCPY
+#endif
+#ifdef HAVE_MEMCPY
+#  ifdef SMALL_MEDIUM /* MSDOS small or medium model */
+#    define zmemcpy _fmemcpy
+#    define zmemcmp _fmemcmp
+#    define zmemzero(dest, len) _fmemset(dest, 0, len)
+#  else
+#    define zmemcpy memcpy
+#    define zmemcmp memcmp
+#    define zmemzero(dest, len) memset(dest, 0, len)
+#  endif
+#else
+   void ZLIB_INTERNAL zmemcpy OF((Bytef* dest, const Bytef* source, uInt len));
+   int ZLIB_INTERNAL zmemcmp OF((const Bytef* s1, const Bytef* s2, uInt len));
+   void ZLIB_INTERNAL zmemzero OF((Bytef* dest, uInt len));
+#endif
+
+/* Diagnostic functions */
+#ifdef ZLIB_DEBUG
+#  include <stdio.h>
+   extern int ZLIB_INTERNAL z_verbose;
+   extern void ZLIB_INTERNAL z_error OF((char *m));
+#  define Assert(cond,msg) {if(!(cond)) z_error(msg);}
+#  define Trace(x) {if (z_verbose>=0) fprintf x ;}
+#  define Tracev(x) {if (z_verbose>0) fprintf x ;}
+#  define Tracevv(x) {if (z_verbose>1) fprintf x ;}
+#  define Tracec(c,x) {if (z_verbose>0 && (c)) fprintf x ;}
+#  define Tracecv(c,x) {if (z_verbose>1 && (c)) fprintf x ;}
+#else
+#  define Assert(cond,msg)
+#  define Trace(x)
+#  define Tracev(x)
+#  define Tracevv(x)
+#  define Tracec(c,x)
+#  define Tracecv(c,x)
+#endif
+
+#ifndef Z_SOLO
+   voidpf ZLIB_INTERNAL zcalloc OF((voidpf opaque, unsigned items,
+                                    unsigned size));
+   void ZLIB_INTERNAL zcfree  OF((voidpf opaque, voidpf ptr));
+#endif
+
+#define ZALLOC(strm, items, size) \
+           (*((strm)->zalloc))((strm)->opaque, (items), (size))
+#define ZFREE(strm, addr)  (*((strm)->zfree))((strm)->opaque, (voidpf)(addr))
+#define TRY_FREE(s, p) {if (p) ZFREE(s, p);}
+
+/* Reverse the bytes in a 32-bit value */
+#define ZSWAP32(q) ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \
+                    (((q) & 0xff00) << 8) + (((q) & 0xff) << 24))
+
+#endif /* ZUTIL_H */
diff --git a/deps/libchdr/include/dr_libs/dr_flac.h b/deps/libchdr/include/dr_libs/dr_flac.h
new file mode 100644
index 00000000..2dcddb58
--- /dev/null
+++ b/deps/libchdr/include/dr_libs/dr_flac.h
@@ -0,0 +1,12214 @@
+/*
+FLAC audio decoder. Choice of public domain or MIT-0. See license statements at the end of this file.
+dr_flac - v0.12.28 - 2021-02-21
+
+David Reid - mackron@gmail.com
+
+GitHub: https://github.com/mackron/dr_libs
+*/
+
+/*
+RELEASE NOTES - v0.12.0
+=======================
+Version 0.12.0 has breaking API changes including changes to the existing API and the removal of deprecated APIs.
+
+
+Improved Client-Defined Memory Allocation
+-----------------------------------------
+The main change with this release is the addition of a more flexible way of implementing custom memory allocation routines. The
+existing system of DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE are still in place and will be used by default when no custom
+allocation callbacks are specified.
+
+To use the new system, you pass in a pointer to a drflac_allocation_callbacks object to drflac_open() and family, like this:
+
+    void* my_malloc(size_t sz, void* pUserData)
+    {
+        return malloc(sz);
+    }
+    void* my_realloc(void* p, size_t sz, void* pUserData)
+    {
+        return realloc(p, sz);
+    }
+    void my_free(void* p, void* pUserData)
+    {
+        free(p);
+    }
+
+    ...
+
+    drflac_allocation_callbacks allocationCallbacks;
+    allocationCallbacks.pUserData = &myData;
+    allocationCallbacks.onMalloc  = my_malloc;
+    allocationCallbacks.onRealloc = my_realloc;
+    allocationCallbacks.onFree    = my_free;
+    drflac* pFlac = drflac_open_file("my_file.flac", &allocationCallbacks);
+
+The advantage of this new system is that it allows you to specify user data which will be passed in to the allocation routines.
+
+Passing in null for the allocation callbacks object will cause dr_flac to use defaults which is the same as DRFLAC_MALLOC,
+DRFLAC_REALLOC and DRFLAC_FREE and the equivalent of how it worked in previous versions.
+
+Every API that opens a drflac object now takes this extra parameter. These include the following:
+
+    drflac_open()
+    drflac_open_relaxed()
+    drflac_open_with_metadata()
+    drflac_open_with_metadata_relaxed()
+    drflac_open_file()
+    drflac_open_file_with_metadata()
+    drflac_open_memory()
+    drflac_open_memory_with_metadata()
+    drflac_open_and_read_pcm_frames_s32()
+    drflac_open_and_read_pcm_frames_s16()
+    drflac_open_and_read_pcm_frames_f32()
+    drflac_open_file_and_read_pcm_frames_s32()
+    drflac_open_file_and_read_pcm_frames_s16()
+    drflac_open_file_and_read_pcm_frames_f32()
+    drflac_open_memory_and_read_pcm_frames_s32()
+    drflac_open_memory_and_read_pcm_frames_s16()
+    drflac_open_memory_and_read_pcm_frames_f32()
+
+
+
+Optimizations
+-------------
+Seeking performance has been greatly improved. A new binary search based seeking algorithm has been introduced which significantly
+improves performance over the brute force method which was used when no seek table was present. Seek table based seeking also takes
+advantage of the new binary search seeking system to further improve performance there as well. Note that this depends on CRC which
+means it will be disabled when DR_FLAC_NO_CRC is used.
+
+The SSE4.1 pipeline has been cleaned up and optimized. You should see some improvements with decoding speed of 24-bit files in
+particular. 16-bit streams should also see some improvement.
+
+drflac_read_pcm_frames_s16() has been optimized. Previously this sat on top of drflac_read_pcm_frames_s32() and performed it's s32
+to s16 conversion in a second pass. This is now all done in a single pass. This includes SSE2 and ARM NEON optimized paths.
+
+A minor optimization has been implemented for drflac_read_pcm_frames_s32(). This will now use an SSE2 optimized pipeline for stereo
+channel reconstruction which is the last part of the decoding process.
+
+The ARM build has seen a few improvements. The CLZ (count leading zeroes) and REV (byte swap) instructions are now used when
+compiling with GCC and Clang which is achieved using inline assembly. The CLZ instruction requires ARM architecture version 5 at
+compile time and the REV instruction requires ARM architecture version 6.
+
+An ARM NEON optimized pipeline has been implemented. To enable this you'll need to add -mfpu=neon to the command line when compiling.
+
+
+Removed APIs
+------------
+The following APIs were deprecated in version 0.11.0 and have been completely removed in version 0.12.0:
+
+    drflac_read_s32()                   -> drflac_read_pcm_frames_s32()
+    drflac_read_s16()                   -> drflac_read_pcm_frames_s16()
+    drflac_read_f32()                   -> drflac_read_pcm_frames_f32()
+    drflac_seek_to_sample()             -> drflac_seek_to_pcm_frame()
+    drflac_open_and_decode_s32()        -> drflac_open_and_read_pcm_frames_s32()
+    drflac_open_and_decode_s16()        -> drflac_open_and_read_pcm_frames_s16()
+    drflac_open_and_decode_f32()        -> drflac_open_and_read_pcm_frames_f32()
+    drflac_open_and_decode_file_s32()   -> drflac_open_file_and_read_pcm_frames_s32()
+    drflac_open_and_decode_file_s16()   -> drflac_open_file_and_read_pcm_frames_s16()
+    drflac_open_and_decode_file_f32()   -> drflac_open_file_and_read_pcm_frames_f32()
+    drflac_open_and_decode_memory_s32() -> drflac_open_memory_and_read_pcm_frames_s32()
+    drflac_open_and_decode_memory_s16() -> drflac_open_memory_and_read_pcm_frames_s16()
+    drflac_open_and_decode_memory_f32() -> drflac_open_memroy_and_read_pcm_frames_f32()
+
+Prior versions of dr_flac operated on a per-sample basis whereas now it operates on PCM frames. The removed APIs all relate
+to the old per-sample APIs. You now need to use the "pcm_frame" versions.
+*/
+
+
+/*
+Introduction
+============
+dr_flac is a single file library. To use it, do something like the following in one .c file.
+
+    ```c
+    #define DR_FLAC_IMPLEMENTATION
+    #include "dr_flac.h"
+    ```
+
+You can then #include this file in other parts of the program as you would with any other header file. To decode audio data, do something like the following:
+
+    ```c
+    drflac* pFlac = drflac_open_file("MySong.flac", NULL);
+    if (pFlac == NULL) {
+        // Failed to open FLAC file
+    }
+
+    drflac_int32* pSamples = malloc(pFlac->totalPCMFrameCount * pFlac->channels * sizeof(drflac_int32));
+    drflac_uint64 numberOfInterleavedSamplesActuallyRead = drflac_read_pcm_frames_s32(pFlac, pFlac->totalPCMFrameCount, pSamples);
+    ```
+
+The drflac object represents the decoder. It is a transparent type so all the information you need, such as the number of channels and the bits per sample,
+should be directly accessible - just make sure you don't change their values. Samples are always output as interleaved signed 32-bit PCM. In the example above
+a native FLAC stream was opened, however dr_flac has seamless support for Ogg encapsulated FLAC streams as well.
+
+You do not need to decode the entire stream in one go - you just specify how many samples you'd like at any given time and the decoder will give you as many
+samples as it can, up to the amount requested. Later on when you need the next batch of samples, just call it again. Example:
+
+    ```c
+    while (drflac_read_pcm_frames_s32(pFlac, chunkSizeInPCMFrames, pChunkSamples) > 0) {
+        do_something();
+    }
+    ```
+
+You can seek to a specific PCM frame with `drflac_seek_to_pcm_frame()`.
+
+If you just want to quickly decode an entire FLAC file in one go you can do something like this:
+
+    ```c
+    unsigned int channels;
+    unsigned int sampleRate;
+    drflac_uint64 totalPCMFrameCount;
+    drflac_int32* pSampleData = drflac_open_file_and_read_pcm_frames_s32("MySong.flac", &channels, &sampleRate, &totalPCMFrameCount, NULL);
+    if (pSampleData == NULL) {
+        // Failed to open and decode FLAC file.
+    }
+
+    ...
+
+    drflac_free(pSampleData, NULL);
+    ```
+
+You can read samples as signed 16-bit integer and 32-bit floating-point PCM with the *_s16() and *_f32() family of APIs respectively, but note that these
+should be considered lossy.
+
+
+If you need access to metadata (album art, etc.), use `drflac_open_with_metadata()`, `drflac_open_file_with_metdata()` or `drflac_open_memory_with_metadata()`.
+The rationale for keeping these APIs separate is that they're slightly slower than the normal versions and also just a little bit harder to use. dr_flac
+reports metadata to the application through the use of a callback, and every metadata block is reported before `drflac_open_with_metdata()` returns.
+
+The main opening APIs (`drflac_open()`, etc.) will fail if the header is not present. The presents a problem in certain scenarios such as broadcast style
+streams or internet radio where the header may not be present because the user has started playback mid-stream. To handle this, use the relaxed APIs:
+    
+    `drflac_open_relaxed()`
+    `drflac_open_with_metadata_relaxed()`
+
+It is not recommended to use these APIs for file based streams because a missing header would usually indicate a corrupt or perverse file. In addition, these
+APIs can take a long time to initialize because they may need to spend a lot of time finding the first frame.
+
+
+
+Build Options
+=============
+#define these options before including this file.
+
+#define DR_FLAC_NO_STDIO
+  Disable `drflac_open_file()` and family.
+
+#define DR_FLAC_NO_OGG
+  Disables support for Ogg/FLAC streams.
+
+#define DR_FLAC_BUFFER_SIZE <number>
+  Defines the size of the internal buffer to store data from onRead(). This buffer is used to reduce the number of calls back to the client for more data.
+  Larger values means more memory, but better performance. My tests show diminishing returns after about 4KB (which is the default). Consider reducing this if
+  you have a very efficient implementation of onRead(), or increase it if it's very inefficient. Must be a multiple of 8.
+
+#define DR_FLAC_NO_CRC
+  Disables CRC checks. This will offer a performance boost when CRC is unnecessary. This will disable binary search seeking. When seeking, the seek table will
+  be used if available. Otherwise the seek will be performed using brute force.
+
+#define DR_FLAC_NO_SIMD
+  Disables SIMD optimizations (SSE on x86/x64 architectures, NEON on ARM architectures). Use this if you are having compatibility issues with your compiler.
+
+
+
+Notes
+=====
+- dr_flac does not support changing the sample rate nor channel count mid stream.
+- dr_flac is not thread-safe, but its APIs can be called from any thread so long as you do your own synchronization.
+- When using Ogg encapsulation, a corrupted metadata block will result in `drflac_open_with_metadata()` and `drflac_open()` returning inconsistent samples due
+  to differences in corrupted stream recorvery logic between the two APIs.
+*/
+
+#ifndef dr_flac_h
+#define dr_flac_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define DRFLAC_STRINGIFY(x)      #x
+#define DRFLAC_XSTRINGIFY(x)     DRFLAC_STRINGIFY(x)
+
+#define DRFLAC_VERSION_MAJOR     0
+#define DRFLAC_VERSION_MINOR     12
+#define DRFLAC_VERSION_REVISION  28
+#define DRFLAC_VERSION_STRING    DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MAJOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MINOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_REVISION)
+
+#include <stddef.h> /* For size_t. */
+
+/* Sized types. */
+typedef   signed char           drflac_int8;
+typedef unsigned char           drflac_uint8;
+typedef   signed short          drflac_int16;
+typedef unsigned short          drflac_uint16;
+typedef   signed int            drflac_int32;
+typedef unsigned int            drflac_uint32;
+#if defined(_MSC_VER)
+    typedef   signed __int64    drflac_int64;
+    typedef unsigned __int64    drflac_uint64;
+#else
+    #if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)))
+        #pragma GCC diagnostic push
+        #pragma GCC diagnostic ignored "-Wlong-long"
+        #if defined(__clang__)
+            #pragma GCC diagnostic ignored "-Wc++11-long-long"
+        #endif
+    #endif
+    typedef   signed long long  drflac_int64;
+    typedef unsigned long long  drflac_uint64;
+    #if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)))
+        #pragma GCC diagnostic pop
+    #endif
+#endif
+#if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__)) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(__powerpc64__)
+    typedef drflac_uint64       drflac_uintptr;
+#else
+    typedef drflac_uint32       drflac_uintptr;
+#endif
+typedef drflac_uint8            drflac_bool8;
+typedef drflac_uint32           drflac_bool32;
+#define DRFLAC_TRUE             1
+#define DRFLAC_FALSE            0
+
+#if !defined(DRFLAC_API)
+    #if defined(DRFLAC_DLL)
+        #if defined(_WIN32)
+            #define DRFLAC_DLL_IMPORT  __declspec(dllimport)
+            #define DRFLAC_DLL_EXPORT  __declspec(dllexport)
+            #define DRFLAC_DLL_PRIVATE static
+        #else
+            #if defined(__GNUC__) && __GNUC__ >= 4
+                #define DRFLAC_DLL_IMPORT  __attribute__((visibility("default")))
+                #define DRFLAC_DLL_EXPORT  __attribute__((visibility("default")))
+                #define DRFLAC_DLL_PRIVATE __attribute__((visibility("hidden")))
+            #else
+                #define DRFLAC_DLL_IMPORT
+                #define DRFLAC_DLL_EXPORT
+                #define DRFLAC_DLL_PRIVATE static
+            #endif
+        #endif
+
+        #if defined(DR_FLAC_IMPLEMENTATION) || defined(DRFLAC_IMPLEMENTATION)
+            #define DRFLAC_API  DRFLAC_DLL_EXPORT
+        #else
+            #define DRFLAC_API  DRFLAC_DLL_IMPORT
+        #endif
+        #define DRFLAC_PRIVATE DRFLAC_DLL_PRIVATE
+    #else
+        #define DRFLAC_API extern
+        #define DRFLAC_PRIVATE static
+    #endif
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER >= 1700   /* Visual Studio 2012 */
+    #define DRFLAC_DEPRECATED       __declspec(deprecated)
+#elif (defined(__GNUC__) && __GNUC__ >= 4)  /* GCC 4 */
+    #define DRFLAC_DEPRECATED       __attribute__((deprecated))
+#elif defined(__has_feature)                /* Clang */
+    #if __has_feature(attribute_deprecated)
+        #define DRFLAC_DEPRECATED   __attribute__((deprecated))
+    #else
+        #define DRFLAC_DEPRECATED
+    #endif
+#else
+    #define DRFLAC_DEPRECATED
+#endif
+
+DRFLAC_API void drflac_version(drflac_uint32* pMajor, drflac_uint32* pMinor, drflac_uint32* pRevision);
+DRFLAC_API const char* drflac_version_string(void);
+
+/*
+As data is read from the client it is placed into an internal buffer for fast access. This controls the size of that buffer. Larger values means more speed,
+but also more memory. In my testing there is diminishing returns after about 4KB, but you can fiddle with this to suit your own needs. Must be a multiple of 8.
+*/
+#ifndef DR_FLAC_BUFFER_SIZE
+#define DR_FLAC_BUFFER_SIZE   4096
+#endif
+
+/* Check if we can enable 64-bit optimizations. */
+#if defined(_WIN64) || defined(_LP64) || defined(__LP64__)
+#define DRFLAC_64BIT
+#endif
+
+#ifdef DRFLAC_64BIT
+typedef drflac_uint64 drflac_cache_t;
+#else
+typedef drflac_uint32 drflac_cache_t;
+#endif
+
+/* The various metadata block types. */
+#define DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO       0
+#define DRFLAC_METADATA_BLOCK_TYPE_PADDING          1
+#define DRFLAC_METADATA_BLOCK_TYPE_APPLICATION      2
+#define DRFLAC_METADATA_BLOCK_TYPE_SEEKTABLE        3
+#define DRFLAC_METADATA_BLOCK_TYPE_VORBIS_COMMENT   4
+#define DRFLAC_METADATA_BLOCK_TYPE_CUESHEET         5
+#define DRFLAC_METADATA_BLOCK_TYPE_PICTURE          6
+#define DRFLAC_METADATA_BLOCK_TYPE_INVALID          127
+
+/* The various picture types specified in the PICTURE block. */
+#define DRFLAC_PICTURE_TYPE_OTHER                   0
+#define DRFLAC_PICTURE_TYPE_FILE_ICON               1
+#define DRFLAC_PICTURE_TYPE_OTHER_FILE_ICON         2
+#define DRFLAC_PICTURE_TYPE_COVER_FRONT             3
+#define DRFLAC_PICTURE_TYPE_COVER_BACK              4
+#define DRFLAC_PICTURE_TYPE_LEAFLET_PAGE            5
+#define DRFLAC_PICTURE_TYPE_MEDIA                   6
+#define DRFLAC_PICTURE_TYPE_LEAD_ARTIST             7
+#define DRFLAC_PICTURE_TYPE_ARTIST                  8
+#define DRFLAC_PICTURE_TYPE_CONDUCTOR               9
+#define DRFLAC_PICTURE_TYPE_BAND                    10
+#define DRFLAC_PICTURE_TYPE_COMPOSER                11
+#define DRFLAC_PICTURE_TYPE_LYRICIST                12
+#define DRFLAC_PICTURE_TYPE_RECORDING_LOCATION      13
+#define DRFLAC_PICTURE_TYPE_DURING_RECORDING        14
+#define DRFLAC_PICTURE_TYPE_DURING_PERFORMANCE      15
+#define DRFLAC_PICTURE_TYPE_SCREEN_CAPTURE          16
+#define DRFLAC_PICTURE_TYPE_BRIGHT_COLORED_FISH     17
+#define DRFLAC_PICTURE_TYPE_ILLUSTRATION            18
+#define DRFLAC_PICTURE_TYPE_BAND_LOGOTYPE           19
+#define DRFLAC_PICTURE_TYPE_PUBLISHER_LOGOTYPE      20
+
+typedef enum
+{
+    drflac_container_native,
+    drflac_container_ogg,
+    drflac_container_unknown
+} drflac_container;
+
+typedef enum
+{
+    drflac_seek_origin_start,
+    drflac_seek_origin_current
+} drflac_seek_origin;
+
+/* Packing is important on this structure because we map this directly to the raw data within the SEEKTABLE metadata block. */
+#pragma pack(2)
+typedef struct
+{
+    drflac_uint64 firstPCMFrame;
+    drflac_uint64 flacFrameOffset;   /* The offset from the first byte of the header of the first frame. */
+    drflac_uint16 pcmFrameCount;
+} drflac_seekpoint;
+#pragma pack()
+
+typedef struct
+{
+    drflac_uint16 minBlockSizeInPCMFrames;
+    drflac_uint16 maxBlockSizeInPCMFrames;
+    drflac_uint32 minFrameSizeInPCMFrames;
+    drflac_uint32 maxFrameSizeInPCMFrames;
+    drflac_uint32 sampleRate;
+    drflac_uint8  channels;
+    drflac_uint8  bitsPerSample;
+    drflac_uint64 totalPCMFrameCount;
+    drflac_uint8  md5[16];
+} drflac_streaminfo;
+
+typedef struct
+{
+    /*
+    The metadata type. Use this to know how to interpret the data below. Will be set to one of the
+    DRFLAC_METADATA_BLOCK_TYPE_* tokens.
+    */
+    drflac_uint32 type;
+
+    /*
+    A pointer to the raw data. This points to a temporary buffer so don't hold on to it. It's best to
+    not modify the contents of this buffer. Use the structures below for more meaningful and structured
+    information about the metadata. It's possible for this to be null.
+    */
+    const void* pRawData;
+
+    /* The size in bytes of the block and the buffer pointed to by pRawData if it's non-NULL. */
+    drflac_uint32 rawDataSize;
+
+    union
+    {
+        drflac_streaminfo streaminfo;
+
+        struct
+        {
+            int unused;
+        } padding;
+
+        struct
+        {
+            drflac_uint32 id;
+            const void* pData;
+            drflac_uint32 dataSize;
+        } application;
+
+        struct
+        {
+            drflac_uint32 seekpointCount;
+            const drflac_seekpoint* pSeekpoints;
+        } seektable;
+
+        struct
+        {
+            drflac_uint32 vendorLength;
+            const char* vendor;
+            drflac_uint32 commentCount;
+            const void* pComments;
+        } vorbis_comment;
+
+        struct
+        {
+            char catalog[128];
+            drflac_uint64 leadInSampleCount;
+            drflac_bool32 isCD;
+            drflac_uint8 trackCount;
+            const void* pTrackData;
+        } cuesheet;
+
+        struct
+        {
+            drflac_uint32 type;
+            drflac_uint32 mimeLength;
+            const char* mime;
+            drflac_uint32 descriptionLength;
+            const char* description;
+            drflac_uint32 width;
+            drflac_uint32 height;
+            drflac_uint32 colorDepth;
+            drflac_uint32 indexColorCount;
+            drflac_uint32 pictureDataSize;
+            const drflac_uint8* pPictureData;
+        } picture;
+    } data;
+} drflac_metadata;
+
+
+/*
+Callback for when data needs to be read from the client.
+
+
+Parameters
+----------
+pUserData (in)
+    The user data that was passed to drflac_open() and family.
+
+pBufferOut (out)
+    The output buffer.
+
+bytesToRead (in)
+    The number of bytes to read.
+
+
+Return Value
+------------
+The number of bytes actually read.
+
+
+Remarks
+-------
+A return value of less than bytesToRead indicates the end of the stream. Do _not_ return from this callback until either the entire bytesToRead is filled or
+you have reached the end of the stream.
+*/
+typedef size_t (* drflac_read_proc)(void* pUserData, void* pBufferOut, size_t bytesToRead);
+
+/*
+Callback for when data needs to be seeked.
+
+
+Parameters
+----------
+pUserData (in)
+    The user data that was passed to drflac_open() and family.
+
+offset (in)
+    The number of bytes to move, relative to the origin. Will never be negative.
+
+origin (in)
+    The origin of the seek - the current position or the start of the stream.
+
+
+Return Value
+------------
+Whether or not the seek was successful.
+
+
+Remarks
+-------
+The offset will never be negative. Whether or not it is relative to the beginning or current position is determined by the "origin" parameter which will be
+either drflac_seek_origin_start or drflac_seek_origin_current.
+
+When seeking to a PCM frame using drflac_seek_to_pcm_frame(), dr_flac may call this with an offset beyond the end of the FLAC stream. This needs to be detected
+and handled by returning DRFLAC_FALSE.
+*/
+typedef drflac_bool32 (* drflac_seek_proc)(void* pUserData, int offset, drflac_seek_origin origin);
+
+/*
+Callback for when a metadata block is read.
+
+
+Parameters
+----------
+pUserData (in)
+    The user data that was passed to drflac_open() and family.
+
+pMetadata (in)
+    A pointer to a structure containing the data of the metadata block.
+
+
+Remarks
+-------
+Use pMetadata->type to determine which metadata block is being handled and how to read the data. This
+will be set to one of the DRFLAC_METADATA_BLOCK_TYPE_* tokens.
+*/
+typedef void (* drflac_meta_proc)(void* pUserData, drflac_metadata* pMetadata);
+
+
+typedef struct
+{
+    void* pUserData;
+    void* (* onMalloc)(size_t sz, void* pUserData);
+    void* (* onRealloc)(void* p, size_t sz, void* pUserData);
+    void  (* onFree)(void* p, void* pUserData);
+} drflac_allocation_callbacks;
+
+/* Structure for internal use. Only used for decoders opened with drflac_open_memory. */
+typedef struct
+{
+    const drflac_uint8* data;
+    size_t dataSize;
+    size_t currentReadPos;
+} drflac__memory_stream;
+
+/* Structure for internal use. Used for bit streaming. */
+typedef struct
+{
+    /* The function to call when more data needs to be read. */
+    drflac_read_proc onRead;
+
+    /* The function to call when the current read position needs to be moved. */
+    drflac_seek_proc onSeek;
+
+    /* The user data to pass around to onRead and onSeek. */
+    void* pUserData;
+
+
+    /*
+    The number of unaligned bytes in the L2 cache. This will always be 0 until the end of the stream is hit. At the end of the
+    stream there will be a number of bytes that don't cleanly fit in an L1 cache line, so we use this variable to know whether
+    or not the bistreamer needs to run on a slower path to read those last bytes. This will never be more than sizeof(drflac_cache_t).
+    */
+    size_t unalignedByteCount;
+
+    /* The content of the unaligned bytes. */
+    drflac_cache_t unalignedCache;
+
+    /* The index of the next valid cache line in the "L2" cache. */
+    drflac_uint32 nextL2Line;
+
+    /* The number of bits that have been consumed by the cache. This is used to determine how many valid bits are remaining. */
+    drflac_uint32 consumedBits;
+
+    /*
+    The cached data which was most recently read from the client. There are two levels of cache. Data flows as such:
+    Client -> L2 -> L1. The L2 -> L1 movement is aligned and runs on a fast path in just a few instructions.
+    */
+    drflac_cache_t cacheL2[DR_FLAC_BUFFER_SIZE/sizeof(drflac_cache_t)];
+    drflac_cache_t cache;
+
+    /*
+    CRC-16. This is updated whenever bits are read from the bit stream. Manually set this to 0 to reset the CRC. For FLAC, this
+    is reset to 0 at the beginning of each frame.
+    */
+    drflac_uint16 crc16;
+    drflac_cache_t crc16Cache;              /* A cache for optimizing CRC calculations. This is filled when when the L1 cache is reloaded. */
+    drflac_uint32 crc16CacheIgnoredBytes;   /* The number of bytes to ignore when updating the CRC-16 from the CRC-16 cache. */
+} drflac_bs;
+
+typedef struct
+{
+    /* The type of the subframe: SUBFRAME_CONSTANT, SUBFRAME_VERBATIM, SUBFRAME_FIXED or SUBFRAME_LPC. */
+    drflac_uint8 subframeType;
+
+    /* The number of wasted bits per sample as specified by the sub-frame header. */
+    drflac_uint8 wastedBitsPerSample;
+
+    /* The order to use for the prediction stage for SUBFRAME_FIXED and SUBFRAME_LPC. */
+    drflac_uint8 lpcOrder;
+
+    /* A pointer to the buffer containing the decoded samples in the subframe. This pointer is an offset from drflac::pExtraData. */
+    drflac_int32* pSamplesS32;
+} drflac_subframe;
+
+typedef struct
+{
+    /*
+    If the stream uses variable block sizes, this will be set to the index of the first PCM frame. If fixed block sizes are used, this will
+    always be set to 0. This is 64-bit because the decoded PCM frame number will be 36 bits.
+    */
+    drflac_uint64 pcmFrameNumber;
+
+    /*
+    If the stream uses fixed block sizes, this will be set to the frame number. If variable block sizes are used, this will always be 0. This
+    is 32-bit because in fixed block sizes, the maximum frame number will be 31 bits.
+    */
+    drflac_uint32 flacFrameNumber;
+
+    /* The sample rate of this frame. */
+    drflac_uint32 sampleRate;
+
+    /* The number of PCM frames in each sub-frame within this frame. */
+    drflac_uint16 blockSizeInPCMFrames;
+
+    /*
+    The channel assignment of this frame. This is not always set to the channel count. If interchannel decorrelation is being used this
+    will be set to DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE, DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE or DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE.
+    */
+    drflac_uint8 channelAssignment;
+
+    /* The number of bits per sample within this frame. */
+    drflac_uint8 bitsPerSample;
+
+    /* The frame's CRC. */
+    drflac_uint8 crc8;
+} drflac_frame_header;
+
+typedef struct
+{
+    /* The header. */
+    drflac_frame_header header;
+
+    /*
+    The number of PCM frames left to be read in this FLAC frame. This is initially set to the block size. As PCM frames are read,
+    this will be decremented. When it reaches 0, the decoder will see this frame as fully consumed and load the next frame.
+    */
+    drflac_uint32 pcmFramesRemaining;
+
+    /* The list of sub-frames within the frame. There is one sub-frame for each channel, and there's a maximum of 8 channels. */
+    drflac_subframe subframes[8];
+} drflac_frame;
+
+typedef struct
+{
+    /* The function to call when a metadata block is read. */
+    drflac_meta_proc onMeta;
+
+    /* The user data posted to the metadata callback function. */
+    void* pUserDataMD;
+
+    /* Memory allocation callbacks. */
+    drflac_allocation_callbacks allocationCallbacks;
+
+
+    /* The sample rate. Will be set to something like 44100. */
+    drflac_uint32 sampleRate;
+
+    /*
+    The number of channels. This will be set to 1 for monaural streams, 2 for stereo, etc. Maximum 8. This is set based on the
+    value specified in the STREAMINFO block.
+    */
+    drflac_uint8 channels;
+
+    /* The bits per sample. Will be set to something like 16, 24, etc. */
+    drflac_uint8 bitsPerSample;
+
+    /* The maximum block size, in samples. This number represents the number of samples in each channel (not combined). */
+    drflac_uint16 maxBlockSizeInPCMFrames;
+
+    /*
+    The total number of PCM Frames making up the stream. Can be 0 in which case it's still a valid stream, but just means
+    the total PCM frame count is unknown. Likely the case with streams like internet radio.
+    */
+    drflac_uint64 totalPCMFrameCount;
+
+
+    /* The container type. This is set based on whether or not the decoder was opened from a native or Ogg stream. */
+    drflac_container container;
+
+    /* The number of seekpoints in the seektable. */
+    drflac_uint32 seekpointCount;
+
+
+    /* Information about the frame the decoder is currently sitting on. */
+    drflac_frame currentFLACFrame;
+
+
+    /* The index of the PCM frame the decoder is currently sitting on. This is only used for seeking. */
+    drflac_uint64 currentPCMFrame;
+
+    /* The position of the first FLAC frame in the stream. This is only ever used for seeking. */
+    drflac_uint64 firstFLACFramePosInBytes;
+
+
+    /* A hack to avoid a malloc() when opening a decoder with drflac_open_memory(). */
+    drflac__memory_stream memoryStream;
+
+
+    /* A pointer to the decoded sample data. This is an offset of pExtraData. */
+    drflac_int32* pDecodedSamples;
+
+    /* A pointer to the seek table. This is an offset of pExtraData, or NULL if there is no seek table. */
+    drflac_seekpoint* pSeekpoints;
+
+    /* Internal use only. Only used with Ogg containers. Points to a drflac_oggbs object. This is an offset of pExtraData. */
+    void* _oggbs;
+
+    /* Internal use only. Used for profiling and testing different seeking modes. */
+    drflac_bool32 _noSeekTableSeek    : 1;
+    drflac_bool32 _noBinarySearchSeek : 1;
+    drflac_bool32 _noBruteForceSeek   : 1;
+
+    /* The bit streamer. The raw FLAC data is fed through this object. */
+    drflac_bs bs;
+
+    /* Variable length extra data. We attach this to the end of the object so we can avoid unnecessary mallocs. */
+    drflac_uint8 pExtraData[1];
+} drflac;
+
+
+/*
+Opens a FLAC decoder.
+
+
+Parameters
+----------
+onRead (in)
+    The function to call when data needs to be read from the client.
+
+onSeek (in)
+    The function to call when the read position of the client data needs to move.
+
+pUserData (in, optional)
+    A pointer to application defined data that will be passed to onRead and onSeek.
+
+pAllocationCallbacks (in, optional)
+    A pointer to application defined callbacks for managing memory allocations.
+
+
+Return Value
+------------
+Returns a pointer to an object representing the decoder.
+
+
+Remarks
+-------
+Close the decoder with `drflac_close()`.
+
+`pAllocationCallbacks` can be NULL in which case it will use `DRFLAC_MALLOC`, `DRFLAC_REALLOC` and `DRFLAC_FREE`.
+
+This function will automatically detect whether or not you are attempting to open a native or Ogg encapsulated FLAC, both of which should work seamlessly
+without any manual intervention. Ogg encapsulation also works with multiplexed streams which basically means it can play FLAC encoded audio tracks in videos.
+
+This is the lowest level function for opening a FLAC stream. You can also use `drflac_open_file()` and `drflac_open_memory()` to open the stream from a file or
+from a block of memory respectively.
+
+The STREAMINFO block must be present for this to succeed. Use `drflac_open_relaxed()` to open a FLAC stream where the header may not be present.
+
+Use `drflac_open_with_metadata()` if you need access to metadata.
+
+
+Seek Also
+---------
+drflac_open_file()
+drflac_open_memory()
+drflac_open_with_metadata()
+drflac_close()
+*/
+DRFLAC_API drflac* drflac_open(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Opens a FLAC stream with relaxed validation of the header block.
+
+
+Parameters
+----------
+onRead (in)
+    The function to call when data needs to be read from the client.
+
+onSeek (in)
+    The function to call when the read position of the client data needs to move.
+
+container (in)
+    Whether or not the FLAC stream is encapsulated using standard FLAC encapsulation or Ogg encapsulation.
+
+pUserData (in, optional)
+    A pointer to application defined data that will be passed to onRead and onSeek.
+
+pAllocationCallbacks (in, optional)
+    A pointer to application defined callbacks for managing memory allocations.
+
+
+Return Value
+------------
+A pointer to an object representing the decoder.
+
+
+Remarks
+-------
+The same as drflac_open(), except attempts to open the stream even when a header block is not present.
+
+Because the header is not necessarily available, the caller must explicitly define the container (Native or Ogg). Do not set this to `drflac_container_unknown`
+as that is for internal use only.
+
+Opening in relaxed mode will continue reading data from onRead until it finds a valid frame. If a frame is never found it will continue forever. To abort,
+force your `onRead` callback to return 0, which dr_flac will use as an indicator that the end of the stream was found.
+
+Use `drflac_open_with_metadata_relaxed()` if you need access to metadata.
+*/
+DRFLAC_API drflac* drflac_open_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Opens a FLAC decoder and notifies the caller of the metadata chunks (album art, etc.).
+
+
+Parameters
+----------
+onRead (in)
+    The function to call when data needs to be read from the client.
+
+onSeek (in)
+    The function to call when the read position of the client data needs to move.
+
+onMeta (in)
+    The function to call for every metadata block.
+
+pUserData (in, optional)
+    A pointer to application defined data that will be passed to onRead, onSeek and onMeta.
+
+pAllocationCallbacks (in, optional)
+    A pointer to application defined callbacks for managing memory allocations.
+
+
+Return Value
+------------
+A pointer to an object representing the decoder.
+
+
+Remarks
+-------
+Close the decoder with `drflac_close()`.
+
+`pAllocationCallbacks` can be NULL in which case it will use `DRFLAC_MALLOC`, `DRFLAC_REALLOC` and `DRFLAC_FREE`.
+
+This is slower than `drflac_open()`, so avoid this one if you don't need metadata. Internally, this will allocate and free memory on the heap for every
+metadata block except for STREAMINFO and PADDING blocks.
+
+The caller is notified of the metadata via the `onMeta` callback. All metadata blocks will be handled before the function returns. This callback takes a
+pointer to a `drflac_metadata` object which is a union containing the data of all relevant metadata blocks. Use the `type` member to discriminate against
+the different metadata types.
+
+The STREAMINFO block must be present for this to succeed. Use `drflac_open_with_metadata_relaxed()` to open a FLAC stream where the header may not be present.
+
+Note that this will behave inconsistently with `drflac_open()` if the stream is an Ogg encapsulated stream and a metadata block is corrupted. This is due to
+the way the Ogg stream recovers from corrupted pages. When `drflac_open_with_metadata()` is being used, the open routine will try to read the contents of the
+metadata block, whereas `drflac_open()` will simply seek past it (for the sake of efficiency). This inconsistency can result in different samples being
+returned depending on whether or not the stream is being opened with metadata.
+
+
+Seek Also
+---------
+drflac_open_file_with_metadata()
+drflac_open_memory_with_metadata()
+drflac_open()
+drflac_close()
+*/
+DRFLAC_API drflac* drflac_open_with_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/*
+The same as drflac_open_with_metadata(), except attempts to open the stream even when a header block is not present.
+
+See Also
+--------
+drflac_open_with_metadata()
+drflac_open_relaxed()
+*/
+DRFLAC_API drflac* drflac_open_with_metadata_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Closes the given FLAC decoder.
+
+
+Parameters
+----------
+pFlac (in)
+    The decoder to close.
+
+
+Remarks
+-------
+This will destroy the decoder object.
+
+
+See Also
+--------
+drflac_open()
+drflac_open_with_metadata()
+drflac_open_file()
+drflac_open_file_w()
+drflac_open_file_with_metadata()
+drflac_open_file_with_metadata_w()
+drflac_open_memory()
+drflac_open_memory_with_metadata()
+*/
+DRFLAC_API void drflac_close(drflac* pFlac);
+
+
+/*
+Reads sample data from the given FLAC decoder, output as interleaved signed 32-bit PCM.
+
+
+Parameters
+----------
+pFlac (in)
+    The decoder.
+
+framesToRead (in)
+    The number of PCM frames to read.
+
+pBufferOut (out, optional)
+    A pointer to the buffer that will receive the decoded samples.
+
+
+Return Value
+------------
+Returns the number of PCM frames actually read. If the return value is less than `framesToRead` it has reached the end.
+
+
+Remarks
+-------
+pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of frames seeked.
+*/
+DRFLAC_API drflac_uint64 drflac_read_pcm_frames_s32(drflac* pFlac, drflac_uint64 framesToRead, drflac_int32* pBufferOut);
+
+
+/*
+Reads sample data from the given FLAC decoder, output as interleaved signed 16-bit PCM.
+
+
+Parameters
+----------
+pFlac (in)
+    The decoder.
+
+framesToRead (in)
+    The number of PCM frames to read.
+
+pBufferOut (out, optional)
+    A pointer to the buffer that will receive the decoded samples.
+
+
+Return Value
+------------
+Returns the number of PCM frames actually read. If the return value is less than `framesToRead` it has reached the end.
+
+
+Remarks
+-------
+pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of frames seeked.
+
+Note that this is lossy for streams where the bits per sample is larger than 16.
+*/
+DRFLAC_API drflac_uint64 drflac_read_pcm_frames_s16(drflac* pFlac, drflac_uint64 framesToRead, drflac_int16* pBufferOut);
+
+/*
+Reads sample data from the given FLAC decoder, output as interleaved 32-bit floating point PCM.
+
+
+Parameters
+----------
+pFlac (in)
+    The decoder.
+
+framesToRead (in)
+    The number of PCM frames to read.
+
+pBufferOut (out, optional)
+    A pointer to the buffer that will receive the decoded samples.
+
+
+Return Value
+------------
+Returns the number of PCM frames actually read. If the return value is less than `framesToRead` it has reached the end.
+
+
+Remarks
+-------
+pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of frames seeked.
+
+Note that this should be considered lossy due to the nature of floating point numbers not being able to exactly represent every possible number.
+*/
+DRFLAC_API drflac_uint64 drflac_read_pcm_frames_f32(drflac* pFlac, drflac_uint64 framesToRead, float* pBufferOut);
+
+/*
+Seeks to the PCM frame at the given index.
+
+
+Parameters
+----------
+pFlac (in)
+    The decoder.
+
+pcmFrameIndex (in)
+    The index of the PCM frame to seek to. See notes below.
+
+
+Return Value
+-------------
+`DRFLAC_TRUE` if successful; `DRFLAC_FALSE` otherwise.
+*/
+DRFLAC_API drflac_bool32 drflac_seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex);
+
+
+
+#ifndef DR_FLAC_NO_STDIO
+/*
+Opens a FLAC decoder from the file at the given path.
+
+
+Parameters
+----------
+pFileName (in)
+    The path of the file to open, either absolute or relative to the current directory.
+
+pAllocationCallbacks (in, optional)
+    A pointer to application defined callbacks for managing memory allocations.
+
+
+Return Value
+------------
+A pointer to an object representing the decoder.
+
+
+Remarks
+-------
+Close the decoder with drflac_close().
+
+
+Remarks
+-------
+This will hold a handle to the file until the decoder is closed with drflac_close(). Some platforms will restrict the number of files a process can have open
+at any given time, so keep this mind if you have many decoders open at the same time.
+
+
+See Also
+--------
+drflac_open_file_with_metadata()
+drflac_open()
+drflac_close()
+*/
+DRFLAC_API drflac* drflac_open_file(const char* pFileName, const drflac_allocation_callbacks* pAllocationCallbacks);
+DRFLAC_API drflac* drflac_open_file_w(const wchar_t* pFileName, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Opens a FLAC decoder from the file at the given path and notifies the caller of the metadata chunks (album art, etc.)
+
+
+Parameters
+----------
+pFileName (in)
+    The path of the file to open, either absolute or relative to the current directory.
+
+pAllocationCallbacks (in, optional)
+    A pointer to application defined callbacks for managing memory allocations.
+
+onMeta (in)
+    The callback to fire for each metadata block.
+
+pUserData (in)
+    A pointer to the user data to pass to the metadata callback.
+
+pAllocationCallbacks (in)
+    A pointer to application defined callbacks for managing memory allocations.
+
+
+Remarks
+-------
+Look at the documentation for drflac_open_with_metadata() for more information on how metadata is handled.
+
+
+See Also
+--------
+drflac_open_with_metadata()
+drflac_open()
+drflac_close()
+*/
+DRFLAC_API drflac* drflac_open_file_with_metadata(const char* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
+DRFLAC_API drflac* drflac_open_file_with_metadata_w(const wchar_t* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
+#endif
+
+/*
+Opens a FLAC decoder from a pre-allocated block of memory
+
+
+Parameters
+----------
+pData (in)
+    A pointer to the raw encoded FLAC data.
+
+dataSize (in)
+    The size in bytes of `data`.
+
+pAllocationCallbacks (in)
+    A pointer to application defined callbacks for managing memory allocations.
+
+
+Return Value
+------------
+A pointer to an object representing the decoder.
+
+
+Remarks
+-------
+This does not create a copy of the data. It is up to the application to ensure the buffer remains valid for the lifetime of the decoder.
+
+
+See Also
+--------
+drflac_open()
+drflac_close()
+*/
+DRFLAC_API drflac* drflac_open_memory(const void* pData, size_t dataSize, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Opens a FLAC decoder from a pre-allocated block of memory and notifies the caller of the metadata chunks (album art, etc.)
+
+
+Parameters
+----------
+pData (in)
+    A pointer to the raw encoded FLAC data.
+
+dataSize (in)
+    The size in bytes of `data`.
+
+onMeta (in)
+    The callback to fire for each metadata block.
+
+pUserData (in)
+    A pointer to the user data to pass to the metadata callback.
+
+pAllocationCallbacks (in)
+    A pointer to application defined callbacks for managing memory allocations.
+
+
+Remarks
+-------
+Look at the documentation for drflac_open_with_metadata() for more information on how metadata is handled.
+
+
+See Also
+-------
+drflac_open_with_metadata()
+drflac_open()
+drflac_close()
+*/
+DRFLAC_API drflac* drflac_open_memory_with_metadata(const void* pData, size_t dataSize, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+
+
+/* High Level APIs */
+
+/*
+Opens a FLAC stream from the given callbacks and fully decodes it in a single operation. The return value is a
+pointer to the sample data as interleaved signed 32-bit PCM. The returned data must be freed with drflac_free().
+
+You can pass in custom memory allocation callbacks via the pAllocationCallbacks parameter. This can be NULL in which
+case it will use DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE.
+
+Sometimes a FLAC file won't keep track of the total sample count. In this situation the function will continuously
+read samples into a dynamically sized buffer on the heap until no samples are left.
+
+Do not call this function on a broadcast type of stream (like internet radio streams and whatnot).
+*/
+DRFLAC_API drflac_int32* drflac_open_and_read_pcm_frames_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/* Same as drflac_open_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. */
+DRFLAC_API drflac_int16* drflac_open_and_read_pcm_frames_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/* Same as drflac_open_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. */
+DRFLAC_API float* drflac_open_and_read_pcm_frames_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+#ifndef DR_FLAC_NO_STDIO
+/* Same as drflac_open_and_read_pcm_frames_s32() except opens the decoder from a file. */
+DRFLAC_API drflac_int32* drflac_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/* Same as drflac_open_file_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. */
+DRFLAC_API drflac_int16* drflac_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/* Same as drflac_open_file_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. */
+DRFLAC_API float* drflac_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+#endif
+
+/* Same as drflac_open_and_read_pcm_frames_s32() except opens the decoder from a block of memory. */
+DRFLAC_API drflac_int32* drflac_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/* Same as drflac_open_memory_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. */
+DRFLAC_API drflac_int16* drflac_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/* Same as drflac_open_memory_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. */
+DRFLAC_API float* drflac_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Frees memory that was allocated internally by dr_flac.
+
+Set pAllocationCallbacks to the same object that was passed to drflac_open_*_and_read_pcm_frames_*(). If you originally passed in NULL, pass in NULL for this.
+*/
+DRFLAC_API void drflac_free(void* p, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+
+/* Structure representing an iterator for vorbis comments in a VORBIS_COMMENT metadata block. */
+typedef struct
+{
+    drflac_uint32 countRemaining;
+    const char* pRunningData;
+} drflac_vorbis_comment_iterator;
+
+/*
+Initializes a vorbis comment iterator. This can be used for iterating over the vorbis comments in a VORBIS_COMMENT
+metadata block.
+*/
+DRFLAC_API void drflac_init_vorbis_comment_iterator(drflac_vorbis_comment_iterator* pIter, drflac_uint32 commentCount, const void* pComments);
+
+/*
+Goes to the next vorbis comment in the given iterator. If null is returned it means there are no more comments. The
+returned string is NOT null terminated.
+*/
+DRFLAC_API const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, drflac_uint32* pCommentLengthOut);
+
+
+/* Structure representing an iterator for cuesheet tracks in a CUESHEET metadata block. */
+typedef struct
+{
+    drflac_uint32 countRemaining;
+    const char* pRunningData;
+} drflac_cuesheet_track_iterator;
+
+/* Packing is important on this structure because we map this directly to the raw data within the CUESHEET metadata block. */
+#pragma pack(4)
+typedef struct
+{
+    drflac_uint64 offset;
+    drflac_uint8 index;
+    drflac_uint8 reserved[3];
+} drflac_cuesheet_track_index;
+#pragma pack()
+
+typedef struct
+{
+    drflac_uint64 offset;
+    drflac_uint8 trackNumber;
+    char ISRC[12];
+    drflac_bool8 isAudio;
+    drflac_bool8 preEmphasis;
+    drflac_uint8 indexCount;
+    const drflac_cuesheet_track_index* pIndexPoints;
+} drflac_cuesheet_track;
+
+/*
+Initializes a cuesheet track iterator. This can be used for iterating over the cuesheet tracks in a CUESHEET metadata
+block.
+*/
+DRFLAC_API void drflac_init_cuesheet_track_iterator(drflac_cuesheet_track_iterator* pIter, drflac_uint32 trackCount, const void* pTrackData);
+
+/* Goes to the next cuesheet track in the given iterator. If DRFLAC_FALSE is returned it means there are no more comments. */
+DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, drflac_cuesheet_track* pCuesheetTrack);
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif  /* dr_flac_h */
+
+
+/************************************************************************************************************************************************************
+ ************************************************************************************************************************************************************
+
+ IMPLEMENTATION
+
+ ************************************************************************************************************************************************************
+ ************************************************************************************************************************************************************/
+#if defined(DR_FLAC_IMPLEMENTATION) || defined(DRFLAC_IMPLEMENTATION)
+#ifndef dr_flac_c
+#define dr_flac_c
+
+/* Disable some annoying warnings. */
+#if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)))
+    #pragma GCC diagnostic push
+    #if __GNUC__ >= 7
+    #pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
+    #endif
+#endif
+
+#ifdef __linux__
+    #ifndef _BSD_SOURCE
+        #define _BSD_SOURCE
+    #endif
+    #ifndef _DEFAULT_SOURCE
+        #define _DEFAULT_SOURCE
+    #endif
+    #ifndef __USE_BSD
+        #define __USE_BSD
+    #endif
+    #include <endian.h>
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef _MSC_VER
+    #define DRFLAC_INLINE __forceinline
+#elif defined(__GNUC__)
+    /*
+    I've had a bug report where GCC is emitting warnings about functions possibly not being inlineable. This warning happens when
+    the __attribute__((always_inline)) attribute is defined without an "inline" statement. I think therefore there must be some
+    case where "__inline__" is not always defined, thus the compiler emitting these warnings. When using -std=c89 or -ansi on the
+    command line, we cannot use the "inline" keyword and instead need to use "__inline__". In an attempt to work around this issue
+    I am using "__inline__" only when we're compiling in strict ANSI mode.
+    */
+    #if defined(__STRICT_ANSI__)
+        #define DRFLAC_INLINE __inline__ __attribute__((always_inline))
+    #else
+        #define DRFLAC_INLINE inline __attribute__((always_inline))
+    #endif
+#elif defined(__WATCOMC__)
+    #define DRFLAC_INLINE __inline
+#else
+    #define DRFLAC_INLINE
+#endif
+
+/* CPU architecture. */
+#if defined(__x86_64__) || defined(_M_X64)
+    #define DRFLAC_X64
+#elif defined(__i386) || defined(_M_IX86)
+    #define DRFLAC_X86
+#elif defined(__arm__) || defined(_M_ARM) || defined(_M_ARM64)
+    #define DRFLAC_ARM
+#endif
+
+/*
+Intrinsics Support
+
+There's a bug in GCC 4.2.x which results in an incorrect compilation error when using _mm_slli_epi32() where it complains with
+
+    "error: shift must be an immediate"
+
+Unfortuantely dr_flac depends on this for a few things so we're just going to disable SSE on GCC 4.2 and below.
+*/
+#if !defined(DR_FLAC_NO_SIMD)
+    #if defined(DRFLAC_X64) || defined(DRFLAC_X86)
+        #if defined(_MSC_VER) && !defined(__clang__)
+            /* MSVC. */
+            #if _MSC_VER >= 1400 && !defined(DRFLAC_NO_SSE2)    /* 2005 */
+                #define DRFLAC_SUPPORT_SSE2
+            #endif
+            #if _MSC_VER >= 1600 && !defined(DRFLAC_NO_SSE41)   /* 2010 */
+                #define DRFLAC_SUPPORT_SSE41
+            #endif
+        #elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)))
+            /* Assume GNUC-style. */
+            #if defined(__SSE2__) && !defined(DRFLAC_NO_SSE2)
+                #define DRFLAC_SUPPORT_SSE2
+            #endif
+            #if defined(__SSE4_1__) && !defined(DRFLAC_NO_SSE41)
+                #define DRFLAC_SUPPORT_SSE41
+            #endif
+        #endif
+
+        /* If at this point we still haven't determined compiler support for the intrinsics just fall back to __has_include. */
+        #if !defined(__GNUC__) && !defined(__clang__) && defined(__has_include)
+            #if !defined(DRFLAC_SUPPORT_SSE2) && !defined(DRFLAC_NO_SSE2) && __has_include(<emmintrin.h>)
+                #define DRFLAC_SUPPORT_SSE2
+            #endif
+            #if !defined(DRFLAC_SUPPORT_SSE41) && !defined(DRFLAC_NO_SSE41) && __has_include(<smmintrin.h>)
+                #define DRFLAC_SUPPORT_SSE41
+            #endif
+        #endif
+
+        #if defined(DRFLAC_SUPPORT_SSE41)
+            #include <smmintrin.h>
+        #elif defined(DRFLAC_SUPPORT_SSE2)
+            #include <emmintrin.h>
+        #endif
+    #endif
+
+    #if defined(DRFLAC_ARM)
+        #if !defined(DRFLAC_NO_NEON) && (defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64))
+            #define DRFLAC_SUPPORT_NEON
+        #endif
+
+        /* Fall back to looking for the #include file. */
+        #if !defined(__GNUC__) && !defined(__clang__) && defined(__has_include)
+            #if !defined(DRFLAC_SUPPORT_NEON) && !defined(DRFLAC_NO_NEON) && __has_include(<arm_neon.h>)
+                #define DRFLAC_SUPPORT_NEON
+            #endif
+        #endif
+
+        #if defined(DRFLAC_SUPPORT_NEON)
+            #include <arm_neon.h>
+        #endif
+    #endif
+#endif
+
+/* Compile-time CPU feature support. */
+#if !defined(DR_FLAC_NO_SIMD) && (defined(DRFLAC_X86) || defined(DRFLAC_X64))
+    #if defined(_MSC_VER) && !defined(__clang__)
+        #if _MSC_VER >= 1400
+            #include <intrin.h>
+            static void drflac__cpuid(int info[4], int fid)
+            {
+                __cpuid(info, fid);
+            }
+        #else
+            #define DRFLAC_NO_CPUID
+        #endif
+    #else
+        #if defined(__GNUC__) || defined(__clang__)
+            static void drflac__cpuid(int info[4], int fid)
+            {
+                /*
+                It looks like the -fPIC option uses the ebx register which GCC complains about. We can work around this by just using a different register, the
+                specific register of which I'm letting the compiler decide on. The "k" prefix is used to specify a 32-bit register. The {...} syntax is for
+                supporting different assembly dialects.
+
+                What's basically happening is that we're saving and restoring the ebx register manually.
+                */
+                #if defined(DRFLAC_X86) && defined(__PIC__)
+                    __asm__ __volatile__ (
+                        "xchg{l} {%%}ebx, %k1;"
+                        "cpuid;"
+                        "xchg{l} {%%}ebx, %k1;"
+                        : "=a"(info[0]), "=&r"(info[1]), "=c"(info[2]), "=d"(info[3]) : "a"(fid), "c"(0)
+                    );
+                #else
+                    __asm__ __volatile__ (
+                        "cpuid" : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) : "a"(fid), "c"(0)
+                    );
+                #endif
+            }
+        #else
+            #define DRFLAC_NO_CPUID
+        #endif
+    #endif
+#else
+    #define DRFLAC_NO_CPUID
+#endif
+
+static DRFLAC_INLINE drflac_bool32 drflac_has_sse2(void)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    #if (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(DRFLAC_NO_SSE2)
+        #if defined(DRFLAC_X64)
+            return DRFLAC_TRUE;    /* 64-bit targets always support SSE2. */
+        #elif (defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(__SSE2__)
+            return DRFLAC_TRUE;    /* If the compiler is allowed to freely generate SSE2 code we can assume support. */
+        #else
+            #if defined(DRFLAC_NO_CPUID)
+                return DRFLAC_FALSE;
+            #else
+                int info[4];
+                drflac__cpuid(info, 1);
+                return (info[3] & (1 << 26)) != 0;
+            #endif
+        #endif
+    #else
+        return DRFLAC_FALSE;       /* SSE2 is only supported on x86 and x64 architectures. */
+    #endif
+#else
+    return DRFLAC_FALSE;           /* No compiler support. */
+#endif
+}
+
+static DRFLAC_INLINE drflac_bool32 drflac_has_sse41(void)
+{
+#if defined(DRFLAC_SUPPORT_SSE41)
+    #if (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(DRFLAC_NO_SSE41)
+        #if defined(DRFLAC_X64)
+            return DRFLAC_TRUE;    /* 64-bit targets always support SSE4.1. */
+        #elif (defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(__SSE4_1__)
+            return DRFLAC_TRUE;    /* If the compiler is allowed to freely generate SSE41 code we can assume support. */
+        #else
+            #if defined(DRFLAC_NO_CPUID)
+                return DRFLAC_FALSE;
+            #else
+                int info[4];
+                drflac__cpuid(info, 1);
+                return (info[2] & (1 << 19)) != 0;
+            #endif
+        #endif
+    #else
+        return DRFLAC_FALSE;       /* SSE41 is only supported on x86 and x64 architectures. */
+    #endif
+#else
+    return DRFLAC_FALSE;           /* No compiler support. */
+#endif
+}
+
+
+#if defined(_MSC_VER) && _MSC_VER >= 1500 && (defined(DRFLAC_X86) || defined(DRFLAC_X64)) && !defined(__clang__)
+    #define DRFLAC_HAS_LZCNT_INTRINSIC
+#elif (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)))
+    #define DRFLAC_HAS_LZCNT_INTRINSIC
+#elif defined(__clang__)
+    #if defined(__has_builtin)
+        #if __has_builtin(__builtin_clzll) || __has_builtin(__builtin_clzl)
+            #define DRFLAC_HAS_LZCNT_INTRINSIC
+        #endif
+    #endif
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER >= 1400 && !defined(__clang__)
+    #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
+    #define DRFLAC_HAS_BYTESWAP32_INTRINSIC
+    #define DRFLAC_HAS_BYTESWAP64_INTRINSIC
+#elif defined(__clang__)
+    #if defined(__has_builtin)
+        #if __has_builtin(__builtin_bswap16)
+            #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
+        #endif
+        #if __has_builtin(__builtin_bswap32)
+            #define DRFLAC_HAS_BYTESWAP32_INTRINSIC
+        #endif
+        #if __has_builtin(__builtin_bswap64)
+            #define DRFLAC_HAS_BYTESWAP64_INTRINSIC
+        #endif
+    #endif
+#elif defined(__GNUC__)
+    #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
+        #define DRFLAC_HAS_BYTESWAP32_INTRINSIC
+        #define DRFLAC_HAS_BYTESWAP64_INTRINSIC
+    #endif
+    #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
+        #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
+    #endif
+#endif
+
+
+/* Standard library stuff. */
+#ifndef DRFLAC_ASSERT
+#include <assert.h>
+#define DRFLAC_ASSERT(expression)           assert(expression)
+#endif
+#ifndef DRFLAC_MALLOC
+#define DRFLAC_MALLOC(sz)                   malloc((sz))
+#endif
+#ifndef DRFLAC_REALLOC
+#define DRFLAC_REALLOC(p, sz)               realloc((p), (sz))
+#endif
+#ifndef DRFLAC_FREE
+#define DRFLAC_FREE(p)                      free((p))
+#endif
+#ifndef DRFLAC_COPY_MEMORY
+#define DRFLAC_COPY_MEMORY(dst, src, sz)    memcpy((dst), (src), (sz))
+#endif
+#ifndef DRFLAC_ZERO_MEMORY
+#define DRFLAC_ZERO_MEMORY(p, sz)           memset((p), 0, (sz))
+#endif
+#ifndef DRFLAC_ZERO_OBJECT
+#define DRFLAC_ZERO_OBJECT(p)               DRFLAC_ZERO_MEMORY((p), sizeof(*(p)))
+#endif
+
+#define DRFLAC_MAX_SIMD_VECTOR_SIZE                     64  /* 64 for AVX-512 in the future. */
+
+typedef drflac_int32 drflac_result;
+#define DRFLAC_SUCCESS                                   0
+#define DRFLAC_ERROR                                    -1   /* A generic error. */
+#define DRFLAC_INVALID_ARGS                             -2
+#define DRFLAC_INVALID_OPERATION                        -3
+#define DRFLAC_OUT_OF_MEMORY                            -4
+#define DRFLAC_OUT_OF_RANGE                             -5
+#define DRFLAC_ACCESS_DENIED                            -6
+#define DRFLAC_DOES_NOT_EXIST                           -7
+#define DRFLAC_ALREADY_EXISTS                           -8
+#define DRFLAC_TOO_MANY_OPEN_FILES                      -9
+#define DRFLAC_INVALID_FILE                             -10
+#define DRFLAC_TOO_BIG                                  -11
+#define DRFLAC_PATH_TOO_LONG                            -12
+#define DRFLAC_NAME_TOO_LONG                            -13
+#define DRFLAC_NOT_DIRECTORY                            -14
+#define DRFLAC_IS_DIRECTORY                             -15
+#define DRFLAC_DIRECTORY_NOT_EMPTY                      -16
+#define DRFLAC_END_OF_FILE                              -17
+#define DRFLAC_NO_SPACE                                 -18
+#define DRFLAC_BUSY                                     -19
+#define DRFLAC_IO_ERROR                                 -20
+#define DRFLAC_INTERRUPT                                -21
+#define DRFLAC_UNAVAILABLE                              -22
+#define DRFLAC_ALREADY_IN_USE                           -23
+#define DRFLAC_BAD_ADDRESS                              -24
+#define DRFLAC_BAD_SEEK                                 -25
+#define DRFLAC_BAD_PIPE                                 -26
+#define DRFLAC_DEADLOCK                                 -27
+#define DRFLAC_TOO_MANY_LINKS                           -28
+#define DRFLAC_NOT_IMPLEMENTED                          -29
+#define DRFLAC_NO_MESSAGE                               -30
+#define DRFLAC_BAD_MESSAGE                              -31
+#define DRFLAC_NO_DATA_AVAILABLE                        -32
+#define DRFLAC_INVALID_DATA                             -33
+#define DRFLAC_TIMEOUT                                  -34
+#define DRFLAC_NO_NETWORK                               -35
+#define DRFLAC_NOT_UNIQUE                               -36
+#define DRFLAC_NOT_SOCKET                               -37
+#define DRFLAC_NO_ADDRESS                               -38
+#define DRFLAC_BAD_PROTOCOL                             -39
+#define DRFLAC_PROTOCOL_UNAVAILABLE                     -40
+#define DRFLAC_PROTOCOL_NOT_SUPPORTED                   -41
+#define DRFLAC_PROTOCOL_FAMILY_NOT_SUPPORTED            -42
+#define DRFLAC_ADDRESS_FAMILY_NOT_SUPPORTED             -43
+#define DRFLAC_SOCKET_NOT_SUPPORTED                     -44
+#define DRFLAC_CONNECTION_RESET                         -45
+#define DRFLAC_ALREADY_CONNECTED                        -46
+#define DRFLAC_NOT_CONNECTED                            -47
+#define DRFLAC_CONNECTION_REFUSED                       -48
+#define DRFLAC_NO_HOST                                  -49
+#define DRFLAC_IN_PROGRESS                              -50
+#define DRFLAC_CANCELLED                                -51
+#define DRFLAC_MEMORY_ALREADY_MAPPED                    -52
+#define DRFLAC_AT_END                                   -53
+#define DRFLAC_CRC_MISMATCH                             -128
+
+#define DRFLAC_SUBFRAME_CONSTANT                        0
+#define DRFLAC_SUBFRAME_VERBATIM                        1
+#define DRFLAC_SUBFRAME_FIXED                           8
+#define DRFLAC_SUBFRAME_LPC                             32
+#define DRFLAC_SUBFRAME_RESERVED                        255
+
+#define DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE  0
+#define DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2 1
+
+#define DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT           0
+#define DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE             8
+#define DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE            9
+#define DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE              10
+
+#define drflac_align(x, a)                              ((((x) + (a) - 1) / (a)) * (a))
+
+
+DRFLAC_API void drflac_version(drflac_uint32* pMajor, drflac_uint32* pMinor, drflac_uint32* pRevision)
+{
+    if (pMajor) {
+        *pMajor = DRFLAC_VERSION_MAJOR;
+    }
+
+    if (pMinor) {
+        *pMinor = DRFLAC_VERSION_MINOR;
+    }
+
+    if (pRevision) {
+        *pRevision = DRFLAC_VERSION_REVISION;
+    }
+}
+
+DRFLAC_API const char* drflac_version_string(void)
+{
+    return DRFLAC_VERSION_STRING;
+}
+
+
+/* CPU caps. */
+#if defined(__has_feature)
+    #if __has_feature(thread_sanitizer)
+        #define DRFLAC_NO_THREAD_SANITIZE __attribute__((no_sanitize("thread")))
+    #else
+        #define DRFLAC_NO_THREAD_SANITIZE
+    #endif
+#else
+    #define DRFLAC_NO_THREAD_SANITIZE
+#endif
+
+#if defined(DRFLAC_HAS_LZCNT_INTRINSIC)
+static drflac_bool32 drflac__gIsLZCNTSupported = DRFLAC_FALSE;
+#endif
+
+#ifndef DRFLAC_NO_CPUID
+static drflac_bool32 drflac__gIsSSE2Supported  = DRFLAC_FALSE;
+static drflac_bool32 drflac__gIsSSE41Supported = DRFLAC_FALSE;
+
+/*
+I've had a bug report that Clang's ThreadSanitizer presents a warning in this function. Having reviewed this, this does
+actually make sense. However, since CPU caps should never differ for a running process, I don't think the trade off of
+complicating internal API's by passing around CPU caps versus just disabling the warnings is worthwhile. I'm therefore
+just going to disable these warnings. This is disabled via the DRFLAC_NO_THREAD_SANITIZE attribute.
+*/
+DRFLAC_NO_THREAD_SANITIZE static void drflac__init_cpu_caps(void)
+{
+    static drflac_bool32 isCPUCapsInitialized = DRFLAC_FALSE;
+
+    if (!isCPUCapsInitialized) {
+        /* LZCNT */
+#if defined(DRFLAC_HAS_LZCNT_INTRINSIC)
+        int info[4] = {0};
+        drflac__cpuid(info, 0x80000001);
+        drflac__gIsLZCNTSupported = (info[2] & (1 << 5)) != 0;
+#endif
+
+        /* SSE2 */
+        drflac__gIsSSE2Supported = drflac_has_sse2();
+
+        /* SSE4.1 */
+        drflac__gIsSSE41Supported = drflac_has_sse41();
+
+        /* Initialized. */
+        isCPUCapsInitialized = DRFLAC_TRUE;
+    }
+}
+#else
+static drflac_bool32 drflac__gIsNEONSupported  = DRFLAC_FALSE;
+
+static DRFLAC_INLINE drflac_bool32 drflac__has_neon(void)
+{
+#if defined(DRFLAC_SUPPORT_NEON)
+    #if defined(DRFLAC_ARM) && !defined(DRFLAC_NO_NEON)
+        #if (defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64))
+            return DRFLAC_TRUE;    /* If the compiler is allowed to freely generate NEON code we can assume support. */
+        #else
+            /* TODO: Runtime check. */
+            return DRFLAC_FALSE;
+        #endif
+    #else
+        return DRFLAC_FALSE;       /* NEON is only supported on ARM architectures. */
+    #endif
+#else
+    return DRFLAC_FALSE;           /* No compiler support. */
+#endif
+}
+
+DRFLAC_NO_THREAD_SANITIZE static void drflac__init_cpu_caps(void)
+{
+    drflac__gIsNEONSupported = drflac__has_neon();
+
+#if defined(DRFLAC_HAS_LZCNT_INTRINSIC) && defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5)
+    drflac__gIsLZCNTSupported = DRFLAC_TRUE;
+#endif
+}
+#endif
+
+
+/* Endian Management */
+static DRFLAC_INLINE drflac_bool32 drflac__is_little_endian(void)
+{
+#if defined(DRFLAC_X86) || defined(DRFLAC_X64)
+    return DRFLAC_TRUE;
+#elif defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && __BYTE_ORDER == __LITTLE_ENDIAN
+    return DRFLAC_TRUE;
+#else
+    int n = 1;
+    return (*(char*)&n) == 1;
+#endif
+}
+
+static DRFLAC_INLINE drflac_uint16 drflac__swap_endian_uint16(drflac_uint16 n)
+{
+#ifdef DRFLAC_HAS_BYTESWAP16_INTRINSIC
+    #if defined(_MSC_VER) && !defined(__clang__)
+        return _byteswap_ushort(n);
+    #elif defined(__GNUC__) || defined(__clang__)
+        return __builtin_bswap16(n);
+    #else
+        #error "This compiler does not support the byte swap intrinsic."
+    #endif
+#else
+    return ((n & 0xFF00) >> 8) |
+           ((n & 0x00FF) << 8);
+#endif
+}
+
+static DRFLAC_INLINE drflac_uint32 drflac__swap_endian_uint32(drflac_uint32 n)
+{
+#ifdef DRFLAC_HAS_BYTESWAP32_INTRINSIC
+    #if defined(_MSC_VER) && !defined(__clang__)
+        return _byteswap_ulong(n);
+    #elif defined(__GNUC__) || defined(__clang__)
+        #if defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 6) && !defined(DRFLAC_64BIT)   /* <-- 64-bit inline assembly has not been tested, so disabling for now. */
+            /* Inline assembly optimized implementation for ARM. In my testing, GCC does not generate optimized code with __builtin_bswap32(). */
+            drflac_uint32 r;
+            __asm__ __volatile__ (
+            #if defined(DRFLAC_64BIT)
+                "rev %w[out], %w[in]" : [out]"=r"(r) : [in]"r"(n)   /* <-- This is untested. If someone in the community could test this, that would be appreciated! */
+            #else
+                "rev %[out], %[in]" : [out]"=r"(r) : [in]"r"(n)
+            #endif
+            );
+            return r;
+        #else
+            return __builtin_bswap32(n);
+        #endif
+    #else
+        #error "This compiler does not support the byte swap intrinsic."
+    #endif
+#else
+    return ((n & 0xFF000000) >> 24) |
+           ((n & 0x00FF0000) >>  8) |
+           ((n & 0x0000FF00) <<  8) |
+           ((n & 0x000000FF) << 24);
+#endif
+}
+
+static DRFLAC_INLINE drflac_uint64 drflac__swap_endian_uint64(drflac_uint64 n)
+{
+#ifdef DRFLAC_HAS_BYTESWAP64_INTRINSIC
+    #if defined(_MSC_VER) && !defined(__clang__)
+        return _byteswap_uint64(n);
+    #elif defined(__GNUC__) || defined(__clang__)
+        return __builtin_bswap64(n);
+    #else
+        #error "This compiler does not support the byte swap intrinsic."
+    #endif
+#else
+    /* Weird "<< 32" bitshift is required for C89 because it doesn't support 64-bit constants. Should be optimized out by a good compiler. */
+    return ((n & ((drflac_uint64)0xFF000000 << 32)) >> 56) |
+           ((n & ((drflac_uint64)0x00FF0000 << 32)) >> 40) |
+           ((n & ((drflac_uint64)0x0000FF00 << 32)) >> 24) |
+           ((n & ((drflac_uint64)0x000000FF << 32)) >>  8) |
+           ((n & ((drflac_uint64)0xFF000000      )) <<  8) |
+           ((n & ((drflac_uint64)0x00FF0000      )) << 24) |
+           ((n & ((drflac_uint64)0x0000FF00      )) << 40) |
+           ((n & ((drflac_uint64)0x000000FF      )) << 56);
+#endif
+}
+
+
+static DRFLAC_INLINE drflac_uint16 drflac__be2host_16(drflac_uint16 n)
+{
+    if (drflac__is_little_endian()) {
+        return drflac__swap_endian_uint16(n);
+    }
+
+    return n;
+}
+
+static DRFLAC_INLINE drflac_uint32 drflac__be2host_32(drflac_uint32 n)
+{
+    if (drflac__is_little_endian()) {
+        return drflac__swap_endian_uint32(n);
+    }
+
+    return n;
+}
+
+static DRFLAC_INLINE drflac_uint64 drflac__be2host_64(drflac_uint64 n)
+{
+    if (drflac__is_little_endian()) {
+        return drflac__swap_endian_uint64(n);
+    }
+
+    return n;
+}
+
+
+static DRFLAC_INLINE drflac_uint32 drflac__le2host_32(drflac_uint32 n)
+{
+    if (!drflac__is_little_endian()) {
+        return drflac__swap_endian_uint32(n);
+    }
+
+    return n;
+}
+
+
+static DRFLAC_INLINE drflac_uint32 drflac__unsynchsafe_32(drflac_uint32 n)
+{
+    drflac_uint32 result = 0;
+    result |= (n & 0x7F000000) >> 3;
+    result |= (n & 0x007F0000) >> 2;
+    result |= (n & 0x00007F00) >> 1;
+    result |= (n & 0x0000007F) >> 0;
+
+    return result;
+}
+
+
+
+/* The CRC code below is based on this document: http://zlib.net/crc_v3.txt */
+static drflac_uint8 drflac__crc8_table[] = {
+    0x00, 0x07, 0x0E, 0x09, 0x1C, 0x1B, 0x12, 0x15, 0x38, 0x3F, 0x36, 0x31, 0x24, 0x23, 0x2A, 0x2D,
+    0x70, 0x77, 0x7E, 0x79, 0x6C, 0x6B, 0x62, 0x65, 0x48, 0x4F, 0x46, 0x41, 0x54, 0x53, 0x5A, 0x5D,
+    0xE0, 0xE7, 0xEE, 0xE9, 0xFC, 0xFB, 0xF2, 0xF5, 0xD8, 0xDF, 0xD6, 0xD1, 0xC4, 0xC3, 0xCA, 0xCD,
+    0x90, 0x97, 0x9E, 0x99, 0x8C, 0x8B, 0x82, 0x85, 0xA8, 0xAF, 0xA6, 0xA1, 0xB4, 0xB3, 0xBA, 0xBD,
+    0xC7, 0xC0, 0xC9, 0xCE, 0xDB, 0xDC, 0xD5, 0xD2, 0xFF, 0xF8, 0xF1, 0xF6, 0xE3, 0xE4, 0xED, 0xEA,
+    0xB7, 0xB0, 0xB9, 0xBE, 0xAB, 0xAC, 0xA5, 0xA2, 0x8F, 0x88, 0x81, 0x86, 0x93, 0x94, 0x9D, 0x9A,
+    0x27, 0x20, 0x29, 0x2E, 0x3B, 0x3C, 0x35, 0x32, 0x1F, 0x18, 0x11, 0x16, 0x03, 0x04, 0x0D, 0x0A,
+    0x57, 0x50, 0x59, 0x5E, 0x4B, 0x4C, 0x45, 0x42, 0x6F, 0x68, 0x61, 0x66, 0x73, 0x74, 0x7D, 0x7A,
+    0x89, 0x8E, 0x87, 0x80, 0x95, 0x92, 0x9B, 0x9C, 0xB1, 0xB6, 0xBF, 0xB8, 0xAD, 0xAA, 0xA3, 0xA4,
+    0xF9, 0xFE, 0xF7, 0xF0, 0xE5, 0xE2, 0xEB, 0xEC, 0xC1, 0xC6, 0xCF, 0xC8, 0xDD, 0xDA, 0xD3, 0xD4,
+    0x69, 0x6E, 0x67, 0x60, 0x75, 0x72, 0x7B, 0x7C, 0x51, 0x56, 0x5F, 0x58, 0x4D, 0x4A, 0x43, 0x44,
+    0x19, 0x1E, 0x17, 0x10, 0x05, 0x02, 0x0B, 0x0C, 0x21, 0x26, 0x2F, 0x28, 0x3D, 0x3A, 0x33, 0x34,
+    0x4E, 0x49, 0x40, 0x47, 0x52, 0x55, 0x5C, 0x5B, 0x76, 0x71, 0x78, 0x7F, 0x6A, 0x6D, 0x64, 0x63,
+    0x3E, 0x39, 0x30, 0x37, 0x22, 0x25, 0x2C, 0x2B, 0x06, 0x01, 0x08, 0x0F, 0x1A, 0x1D, 0x14, 0x13,
+    0xAE, 0xA9, 0xA0, 0xA7, 0xB2, 0xB5, 0xBC, 0xBB, 0x96, 0x91, 0x98, 0x9F, 0x8A, 0x8D, 0x84, 0x83,
+    0xDE, 0xD9, 0xD0, 0xD7, 0xC2, 0xC5, 0xCC, 0xCB, 0xE6, 0xE1, 0xE8, 0xEF, 0xFA, 0xFD, 0xF4, 0xF3
+};
+
+static drflac_uint16 drflac__crc16_table[] = {
+    0x0000, 0x8005, 0x800F, 0x000A, 0x801B, 0x001E, 0x0014, 0x8011,
+    0x8033, 0x0036, 0x003C, 0x8039, 0x0028, 0x802D, 0x8027, 0x0022,
+    0x8063, 0x0066, 0x006C, 0x8069, 0x0078, 0x807D, 0x8077, 0x0072,
+    0x0050, 0x8055, 0x805F, 0x005A, 0x804B, 0x004E, 0x0044, 0x8041,
+    0x80C3, 0x00C6, 0x00CC, 0x80C9, 0x00D8, 0x80DD, 0x80D7, 0x00D2,
+    0x00F0, 0x80F5, 0x80FF, 0x00FA, 0x80EB, 0x00EE, 0x00E4, 0x80E1,
+    0x00A0, 0x80A5, 0x80AF, 0x00AA, 0x80BB, 0x00BE, 0x00B4, 0x80B1,
+    0x8093, 0x0096, 0x009C, 0x8099, 0x0088, 0x808D, 0x8087, 0x0082,
+    0x8183, 0x0186, 0x018C, 0x8189, 0x0198, 0x819D, 0x8197, 0x0192,
+    0x01B0, 0x81B5, 0x81BF, 0x01BA, 0x81AB, 0x01AE, 0x01A4, 0x81A1,
+    0x01E0, 0x81E5, 0x81EF, 0x01EA, 0x81FB, 0x01FE, 0x01F4, 0x81F1,
+    0x81D3, 0x01D6, 0x01DC, 0x81D9, 0x01C8, 0x81CD, 0x81C7, 0x01C2,
+    0x0140, 0x8145, 0x814F, 0x014A, 0x815B, 0x015E, 0x0154, 0x8151,
+    0x8173, 0x0176, 0x017C, 0x8179, 0x0168, 0x816D, 0x8167, 0x0162,
+    0x8123, 0x0126, 0x012C, 0x8129, 0x0138, 0x813D, 0x8137, 0x0132,
+    0x0110, 0x8115, 0x811F, 0x011A, 0x810B, 0x010E, 0x0104, 0x8101,
+    0x8303, 0x0306, 0x030C, 0x8309, 0x0318, 0x831D, 0x8317, 0x0312,
+    0x0330, 0x8335, 0x833F, 0x033A, 0x832B, 0x032E, 0x0324, 0x8321,
+    0x0360, 0x8365, 0x836F, 0x036A, 0x837B, 0x037E, 0x0374, 0x8371,
+    0x8353, 0x0356, 0x035C, 0x8359, 0x0348, 0x834D, 0x8347, 0x0342,
+    0x03C0, 0x83C5, 0x83CF, 0x03CA, 0x83DB, 0x03DE, 0x03D4, 0x83D1,
+    0x83F3, 0x03F6, 0x03FC, 0x83F9, 0x03E8, 0x83ED, 0x83E7, 0x03E2,
+    0x83A3, 0x03A6, 0x03AC, 0x83A9, 0x03B8, 0x83BD, 0x83B7, 0x03B2,
+    0x0390, 0x8395, 0x839F, 0x039A, 0x838B, 0x038E, 0x0384, 0x8381,
+    0x0280, 0x8285, 0x828F, 0x028A, 0x829B, 0x029E, 0x0294, 0x8291,
+    0x82B3, 0x02B6, 0x02BC, 0x82B9, 0x02A8, 0x82AD, 0x82A7, 0x02A2,
+    0x82E3, 0x02E6, 0x02EC, 0x82E9, 0x02F8, 0x82FD, 0x82F7, 0x02F2,
+    0x02D0, 0x82D5, 0x82DF, 0x02DA, 0x82CB, 0x02CE, 0x02C4, 0x82C1,
+    0x8243, 0x0246, 0x024C, 0x8249, 0x0258, 0x825D, 0x8257, 0x0252,
+    0x0270, 0x8275, 0x827F, 0x027A, 0x826B, 0x026E, 0x0264, 0x8261,
+    0x0220, 0x8225, 0x822F, 0x022A, 0x823B, 0x023E, 0x0234, 0x8231,
+    0x8213, 0x0216, 0x021C, 0x8219, 0x0208, 0x820D, 0x8207, 0x0202
+};
+
+static DRFLAC_INLINE drflac_uint8 drflac_crc8_byte(drflac_uint8 crc, drflac_uint8 data)
+{
+    return drflac__crc8_table[crc ^ data];
+}
+
+static DRFLAC_INLINE drflac_uint8 drflac_crc8(drflac_uint8 crc, drflac_uint32 data, drflac_uint32 count)
+{
+#ifdef DR_FLAC_NO_CRC
+    (void)crc;
+    (void)data;
+    (void)count;
+    return 0;
+#else
+#if 0
+    /* REFERENCE (use of this implementation requires an explicit flush by doing "drflac_crc8(crc, 0, 8);") */
+    drflac_uint8 p = 0x07;
+    for (int i = count-1; i >= 0; --i) {
+        drflac_uint8 bit = (data & (1 << i)) >> i;
+        if (crc & 0x80) {
+            crc = ((crc << 1) | bit) ^ p;
+        } else {
+            crc = ((crc << 1) | bit);
+        }
+    }
+    return crc;
+#else
+    drflac_uint32 wholeBytes;
+    drflac_uint32 leftoverBits;
+    drflac_uint64 leftoverDataMask;
+
+    static drflac_uint64 leftoverDataMaskTable[8] = {
+        0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F
+    };
+
+    DRFLAC_ASSERT(count <= 32);
+
+    wholeBytes = count >> 3;
+    leftoverBits = count - (wholeBytes*8);
+    leftoverDataMask = leftoverDataMaskTable[leftoverBits];
+
+    switch (wholeBytes) {
+        case 4: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0xFF000000UL << leftoverBits)) >> (24 + leftoverBits)));
+        case 3: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0x00FF0000UL << leftoverBits)) >> (16 + leftoverBits)));
+        case 2: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0x0000FF00UL << leftoverBits)) >> ( 8 + leftoverBits)));
+        case 1: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0x000000FFUL << leftoverBits)) >> ( 0 + leftoverBits)));
+        case 0: if (leftoverBits > 0) crc = (drflac_uint8)((crc << leftoverBits) ^ drflac__crc8_table[(crc >> (8 - leftoverBits)) ^ (data & leftoverDataMask)]);
+    }
+    return crc;
+#endif
+#endif
+}
+
+static DRFLAC_INLINE drflac_uint16 drflac_crc16_byte(drflac_uint16 crc, drflac_uint8 data)
+{
+    return (crc << 8) ^ drflac__crc16_table[(drflac_uint8)(crc >> 8) ^ data];
+}
+
+static DRFLAC_INLINE drflac_uint16 drflac_crc16_cache(drflac_uint16 crc, drflac_cache_t data)
+{
+#ifdef DRFLAC_64BIT
+    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 56) & 0xFF));
+    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 48) & 0xFF));
+    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 40) & 0xFF));
+    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 32) & 0xFF));
+#endif
+    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 24) & 0xFF));
+    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 16) & 0xFF));
+    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >>  8) & 0xFF));
+    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >>  0) & 0xFF));
+
+    return crc;
+}
+
+static DRFLAC_INLINE drflac_uint16 drflac_crc16_bytes(drflac_uint16 crc, drflac_cache_t data, drflac_uint32 byteCount)
+{
+    switch (byteCount)
+    {
+#ifdef DRFLAC_64BIT
+    case 8: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 56) & 0xFF));
+    case 7: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 48) & 0xFF));
+    case 6: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 40) & 0xFF));
+    case 5: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 32) & 0xFF));
+#endif
+    case 4: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 24) & 0xFF));
+    case 3: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 16) & 0xFF));
+    case 2: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >>  8) & 0xFF));
+    case 1: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >>  0) & 0xFF));
+    }
+
+    return crc;
+}
+
+#if 0
+static DRFLAC_INLINE drflac_uint16 drflac_crc16__32bit(drflac_uint16 crc, drflac_uint32 data, drflac_uint32 count)
+{
+#ifdef DR_FLAC_NO_CRC
+    (void)crc;
+    (void)data;
+    (void)count;
+    return 0;
+#else
+#if 0
+    /* REFERENCE (use of this implementation requires an explicit flush by doing "drflac_crc16(crc, 0, 16);") */
+    drflac_uint16 p = 0x8005;
+    for (int i = count-1; i >= 0; --i) {
+        drflac_uint16 bit = (data & (1ULL << i)) >> i;
+        if (r & 0x8000) {
+            r = ((r << 1) | bit) ^ p;
+        } else {
+            r = ((r << 1) | bit);
+        }
+    }
+
+    return crc;
+#else
+    drflac_uint32 wholeBytes;
+    drflac_uint32 leftoverBits;
+    drflac_uint64 leftoverDataMask;
+
+    static drflac_uint64 leftoverDataMaskTable[8] = {
+        0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F
+    };
+
+    DRFLAC_ASSERT(count <= 64);
+
+    wholeBytes = count >> 3;
+    leftoverBits = count & 7;
+    leftoverDataMask = leftoverDataMaskTable[leftoverBits];
+
+    switch (wholeBytes) {
+        default:
+        case 4: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0xFF000000UL << leftoverBits)) >> (24 + leftoverBits)));
+        case 3: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0x00FF0000UL << leftoverBits)) >> (16 + leftoverBits)));
+        case 2: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0x0000FF00UL << leftoverBits)) >> ( 8 + leftoverBits)));
+        case 1: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0x000000FFUL << leftoverBits)) >> ( 0 + leftoverBits)));
+        case 0: if (leftoverBits > 0) crc = (crc << leftoverBits) ^ drflac__crc16_table[(crc >> (16 - leftoverBits)) ^ (data & leftoverDataMask)];
+    }
+    return crc;
+#endif
+#endif
+}
+
+static DRFLAC_INLINE drflac_uint16 drflac_crc16__64bit(drflac_uint16 crc, drflac_uint64 data, drflac_uint32 count)
+{
+#ifdef DR_FLAC_NO_CRC
+    (void)crc;
+    (void)data;
+    (void)count;
+    return 0;
+#else
+    drflac_uint32 wholeBytes;
+    drflac_uint32 leftoverBits;
+    drflac_uint64 leftoverDataMask;
+
+    static drflac_uint64 leftoverDataMaskTable[8] = {
+        0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F
+    };
+
+    DRFLAC_ASSERT(count <= 64);
+
+    wholeBytes = count >> 3;
+    leftoverBits = count & 7;
+    leftoverDataMask = leftoverDataMaskTable[leftoverBits];
+
+    switch (wholeBytes) {
+        default:
+        case 8: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0xFF000000 << 32) << leftoverBits)) >> (56 + leftoverBits)));    /* Weird "<< 32" bitshift is required for C89 because it doesn't support 64-bit constants. Should be optimized out by a good compiler. */
+        case 7: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x00FF0000 << 32) << leftoverBits)) >> (48 + leftoverBits)));
+        case 6: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x0000FF00 << 32) << leftoverBits)) >> (40 + leftoverBits)));
+        case 5: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x000000FF << 32) << leftoverBits)) >> (32 + leftoverBits)));
+        case 4: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0xFF000000      ) << leftoverBits)) >> (24 + leftoverBits)));
+        case 3: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x00FF0000      ) << leftoverBits)) >> (16 + leftoverBits)));
+        case 2: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x0000FF00      ) << leftoverBits)) >> ( 8 + leftoverBits)));
+        case 1: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x000000FF      ) << leftoverBits)) >> ( 0 + leftoverBits)));
+        case 0: if (leftoverBits > 0) crc = (crc << leftoverBits) ^ drflac__crc16_table[(crc >> (16 - leftoverBits)) ^ (data & leftoverDataMask)];
+    }
+    return crc;
+#endif
+}
+
+
+static DRFLAC_INLINE drflac_uint16 drflac_crc16(drflac_uint16 crc, drflac_cache_t data, drflac_uint32 count)
+{
+#ifdef DRFLAC_64BIT
+    return drflac_crc16__64bit(crc, data, count);
+#else
+    return drflac_crc16__32bit(crc, data, count);
+#endif
+}
+#endif
+
+
+#ifdef DRFLAC_64BIT
+#define drflac__be2host__cache_line drflac__be2host_64
+#else
+#define drflac__be2host__cache_line drflac__be2host_32
+#endif
+
+/*
+BIT READING ATTEMPT #2
+
+This uses a 32- or 64-bit bit-shifted cache - as bits are read, the cache is shifted such that the first valid bit is sitting
+on the most significant bit. It uses the notion of an L1 and L2 cache (borrowed from CPU architecture), where the L1 cache
+is a 32- or 64-bit unsigned integer (depending on whether or not a 32- or 64-bit build is being compiled) and the L2 is an
+array of "cache lines", with each cache line being the same size as the L1. The L2 is a buffer of about 4KB and is where data
+from onRead() is read into.
+*/
+#define DRFLAC_CACHE_L1_SIZE_BYTES(bs)                      (sizeof((bs)->cache))
+#define DRFLAC_CACHE_L1_SIZE_BITS(bs)                       (sizeof((bs)->cache)*8)
+#define DRFLAC_CACHE_L1_BITS_REMAINING(bs)                  (DRFLAC_CACHE_L1_SIZE_BITS(bs) - (bs)->consumedBits)
+#define DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount)           (~((~(drflac_cache_t)0) >> (_bitCount)))
+#define DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, _bitCount)      (DRFLAC_CACHE_L1_SIZE_BITS(bs) - (_bitCount))
+#define DRFLAC_CACHE_L1_SELECT(bs, _bitCount)               (((bs)->cache) & DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount))
+#define DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, _bitCount)     (DRFLAC_CACHE_L1_SELECT((bs), (_bitCount)) >>  DRFLAC_CACHE_L1_SELECTION_SHIFT((bs), (_bitCount)))
+#define DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE(bs, _bitCount)(DRFLAC_CACHE_L1_SELECT((bs), (_bitCount)) >> (DRFLAC_CACHE_L1_SELECTION_SHIFT((bs), (_bitCount)) & (DRFLAC_CACHE_L1_SIZE_BITS(bs)-1)))
+#define DRFLAC_CACHE_L2_SIZE_BYTES(bs)                      (sizeof((bs)->cacheL2))
+#define DRFLAC_CACHE_L2_LINE_COUNT(bs)                      (DRFLAC_CACHE_L2_SIZE_BYTES(bs) / sizeof((bs)->cacheL2[0]))
+#define DRFLAC_CACHE_L2_LINES_REMAINING(bs)                 (DRFLAC_CACHE_L2_LINE_COUNT(bs) - (bs)->nextL2Line)
+
+
+#ifndef DR_FLAC_NO_CRC
+static DRFLAC_INLINE void drflac__reset_crc16(drflac_bs* bs)
+{
+    bs->crc16 = 0;
+    bs->crc16CacheIgnoredBytes = bs->consumedBits >> 3;
+}
+
+static DRFLAC_INLINE void drflac__update_crc16(drflac_bs* bs)
+{
+    if (bs->crc16CacheIgnoredBytes == 0) {
+        bs->crc16 = drflac_crc16_cache(bs->crc16, bs->crc16Cache);
+    } else {
+        bs->crc16 = drflac_crc16_bytes(bs->crc16, bs->crc16Cache, DRFLAC_CACHE_L1_SIZE_BYTES(bs) - bs->crc16CacheIgnoredBytes);
+        bs->crc16CacheIgnoredBytes = 0;
+    }
+}
+
+static DRFLAC_INLINE drflac_uint16 drflac__flush_crc16(drflac_bs* bs)
+{
+    /* We should never be flushing in a situation where we are not aligned on a byte boundary. */
+    DRFLAC_ASSERT((DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7) == 0);
+
+    /*
+    The bits that were read from the L1 cache need to be accumulated. The number of bytes needing to be accumulated is determined
+    by the number of bits that have been consumed.
+    */
+    if (DRFLAC_CACHE_L1_BITS_REMAINING(bs) == 0) {
+        drflac__update_crc16(bs);
+    } else {
+        /* We only accumulate the consumed bits. */
+        bs->crc16 = drflac_crc16_bytes(bs->crc16, bs->crc16Cache >> DRFLAC_CACHE_L1_BITS_REMAINING(bs), (bs->consumedBits >> 3) - bs->crc16CacheIgnoredBytes);
+
+        /*
+        The bits that we just accumulated should never be accumulated again. We need to keep track of how many bytes were accumulated
+        so we can handle that later.
+        */
+        bs->crc16CacheIgnoredBytes = bs->consumedBits >> 3;
+    }
+
+    return bs->crc16;
+}
+#endif
+
+static DRFLAC_INLINE drflac_bool32 drflac__reload_l1_cache_from_l2(drflac_bs* bs)
+{
+    size_t bytesRead;
+    size_t alignedL1LineCount;
+
+    /* Fast path. Try loading straight from L2. */
+    if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
+        bs->cache = bs->cacheL2[bs->nextL2Line++];
+        return DRFLAC_TRUE;
+    }
+
+    /*
+    If we get here it means we've run out of data in the L2 cache. We'll need to fetch more from the client, if there's
+    any left.
+    */
+    if (bs->unalignedByteCount > 0) {
+        return DRFLAC_FALSE;   /* If we have any unaligned bytes it means there's no more aligned bytes left in the client. */
+    }
+
+    bytesRead = bs->onRead(bs->pUserData, bs->cacheL2, DRFLAC_CACHE_L2_SIZE_BYTES(bs));
+
+    bs->nextL2Line = 0;
+    if (bytesRead == DRFLAC_CACHE_L2_SIZE_BYTES(bs)) {
+        bs->cache = bs->cacheL2[bs->nextL2Line++];
+        return DRFLAC_TRUE;
+    }
+
+
+    /*
+    If we get here it means we were unable to retrieve enough data to fill the entire L2 cache. It probably
+    means we've just reached the end of the file. We need to move the valid data down to the end of the buffer
+    and adjust the index of the next line accordingly. Also keep in mind that the L2 cache must be aligned to
+    the size of the L1 so we'll need to seek backwards by any misaligned bytes.
+    */
+    alignedL1LineCount = bytesRead / DRFLAC_CACHE_L1_SIZE_BYTES(bs);
+
+    /* We need to keep track of any unaligned bytes for later use. */
+    bs->unalignedByteCount = bytesRead - (alignedL1LineCount * DRFLAC_CACHE_L1_SIZE_BYTES(bs));
+    if (bs->unalignedByteCount > 0) {
+        bs->unalignedCache = bs->cacheL2[alignedL1LineCount];
+    }
+
+    if (alignedL1LineCount > 0) {
+        size_t offset = DRFLAC_CACHE_L2_LINE_COUNT(bs) - alignedL1LineCount;
+        size_t i;
+        for (i = alignedL1LineCount; i > 0; --i) {
+            bs->cacheL2[i-1 + offset] = bs->cacheL2[i-1];
+        }
+
+        bs->nextL2Line = (drflac_uint32)offset;
+        bs->cache = bs->cacheL2[bs->nextL2Line++];
+        return DRFLAC_TRUE;
+    } else {
+        /* If we get into this branch it means we weren't able to load any L1-aligned data. */
+        bs->nextL2Line = DRFLAC_CACHE_L2_LINE_COUNT(bs);
+        return DRFLAC_FALSE;
+    }
+}
+
+static drflac_bool32 drflac__reload_cache(drflac_bs* bs)
+{
+    size_t bytesRead;
+
+#ifndef DR_FLAC_NO_CRC
+    drflac__update_crc16(bs);
+#endif
+
+    /* Fast path. Try just moving the next value in the L2 cache to the L1 cache. */
+    if (drflac__reload_l1_cache_from_l2(bs)) {
+        bs->cache = drflac__be2host__cache_line(bs->cache);
+        bs->consumedBits = 0;
+#ifndef DR_FLAC_NO_CRC
+        bs->crc16Cache = bs->cache;
+#endif
+        return DRFLAC_TRUE;
+    }
+
+    /* Slow path. */
+
+    /*
+    If we get here it means we have failed to load the L1 cache from the L2. Likely we've just reached the end of the stream and the last
+    few bytes did not meet the alignment requirements for the L2 cache. In this case we need to fall back to a slower path and read the
+    data from the unaligned cache.
+    */
+    bytesRead = bs->unalignedByteCount;
+    if (bytesRead == 0) {
+        bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs);   /* <-- The stream has been exhausted, so marked the bits as consumed. */
+        return DRFLAC_FALSE;
+    }
+
+    DRFLAC_ASSERT(bytesRead < DRFLAC_CACHE_L1_SIZE_BYTES(bs));
+    bs->consumedBits = (drflac_uint32)(DRFLAC_CACHE_L1_SIZE_BYTES(bs) - bytesRead) * 8;
+
+    bs->cache = drflac__be2host__cache_line(bs->unalignedCache);
+    bs->cache &= DRFLAC_CACHE_L1_SELECTION_MASK(DRFLAC_CACHE_L1_BITS_REMAINING(bs));    /* <-- Make sure the consumed bits are always set to zero. Other parts of the library depend on this property. */
+    bs->unalignedByteCount = 0;     /* <-- At this point the unaligned bytes have been moved into the cache and we thus have no more unaligned bytes. */
+
+#ifndef DR_FLAC_NO_CRC
+    bs->crc16Cache = bs->cache >> bs->consumedBits;
+    bs->crc16CacheIgnoredBytes = bs->consumedBits >> 3;
+#endif
+    return DRFLAC_TRUE;
+}
+
+static void drflac__reset_cache(drflac_bs* bs)
+{
+    bs->nextL2Line   = DRFLAC_CACHE_L2_LINE_COUNT(bs);  /* <-- This clears the L2 cache. */
+    bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs);   /* <-- This clears the L1 cache. */
+    bs->cache = 0;
+    bs->unalignedByteCount = 0;                         /* <-- This clears the trailing unaligned bytes. */
+    bs->unalignedCache = 0;
+
+#ifndef DR_FLAC_NO_CRC
+    bs->crc16Cache = 0;
+    bs->crc16CacheIgnoredBytes = 0;
+#endif
+}
+
+
+static DRFLAC_INLINE drflac_bool32 drflac__read_uint32(drflac_bs* bs, unsigned int bitCount, drflac_uint32* pResultOut)
+{
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(pResultOut != NULL);
+    DRFLAC_ASSERT(bitCount > 0);
+    DRFLAC_ASSERT(bitCount <= 32);
+
+    if (bs->consumedBits == DRFLAC_CACHE_L1_SIZE_BITS(bs)) {
+        if (!drflac__reload_cache(bs)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    if (bitCount <= DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
+        /*
+        If we want to load all 32-bits from a 32-bit cache we need to do it slightly differently because we can't do
+        a 32-bit shift on a 32-bit integer. This will never be the case on 64-bit caches, so we can have a slightly
+        more optimal solution for this.
+        */
+#ifdef DRFLAC_64BIT
+        *pResultOut = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCount);
+        bs->consumedBits += bitCount;
+        bs->cache <<= bitCount;
+#else
+        if (bitCount < DRFLAC_CACHE_L1_SIZE_BITS(bs)) {
+            *pResultOut = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCount);
+            bs->consumedBits += bitCount;
+            bs->cache <<= bitCount;
+        } else {
+            /* Cannot shift by 32-bits, so need to do it differently. */
+            *pResultOut = (drflac_uint32)bs->cache;
+            bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs);
+            bs->cache = 0;
+        }
+#endif
+
+        return DRFLAC_TRUE;
+    } else {
+        /* It straddles the cached data. It will never cover more than the next chunk. We just read the number in two parts and combine them. */
+        drflac_uint32 bitCountHi = DRFLAC_CACHE_L1_BITS_REMAINING(bs);
+        drflac_uint32 bitCountLo = bitCount - bitCountHi;
+        drflac_uint32 resultHi;
+
+        DRFLAC_ASSERT(bitCountHi > 0);
+        DRFLAC_ASSERT(bitCountHi < 32);
+        resultHi = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountHi);
+
+        if (!drflac__reload_cache(bs)) {
+            return DRFLAC_FALSE;
+        }
+
+        *pResultOut = (resultHi << bitCountLo) | (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountLo);
+        bs->consumedBits += bitCountLo;
+        bs->cache <<= bitCountLo;
+        return DRFLAC_TRUE;
+    }
+}
+
+static drflac_bool32 drflac__read_int32(drflac_bs* bs, unsigned int bitCount, drflac_int32* pResult)
+{
+    drflac_uint32 result;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(pResult != NULL);
+    DRFLAC_ASSERT(bitCount > 0);
+    DRFLAC_ASSERT(bitCount <= 32);
+
+    if (!drflac__read_uint32(bs, bitCount, &result)) {
+        return DRFLAC_FALSE;
+    }
+
+    /* Do not attempt to shift by 32 as it's undefined. */
+    if (bitCount < 32) {
+        drflac_uint32 signbit;
+        signbit = ((result >> (bitCount-1)) & 0x01);
+        result |= (~signbit + 1) << bitCount;
+    }
+
+    *pResult = (drflac_int32)result;
+    return DRFLAC_TRUE;
+}
+
+#ifdef DRFLAC_64BIT
+static drflac_bool32 drflac__read_uint64(drflac_bs* bs, unsigned int bitCount, drflac_uint64* pResultOut)
+{
+    drflac_uint32 resultHi;
+    drflac_uint32 resultLo;
+
+    DRFLAC_ASSERT(bitCount <= 64);
+    DRFLAC_ASSERT(bitCount >  32);
+
+    if (!drflac__read_uint32(bs, bitCount - 32, &resultHi)) {
+        return DRFLAC_FALSE;
+    }
+
+    if (!drflac__read_uint32(bs, 32, &resultLo)) {
+        return DRFLAC_FALSE;
+    }
+
+    *pResultOut = (((drflac_uint64)resultHi) << 32) | ((drflac_uint64)resultLo);
+    return DRFLAC_TRUE;
+}
+#endif
+
+/* Function below is unused, but leaving it here in case I need to quickly add it again. */
+#if 0
+static drflac_bool32 drflac__read_int64(drflac_bs* bs, unsigned int bitCount, drflac_int64* pResultOut)
+{
+    drflac_uint64 result;
+    drflac_uint64 signbit;
+
+    DRFLAC_ASSERT(bitCount <= 64);
+
+    if (!drflac__read_uint64(bs, bitCount, &result)) {
+        return DRFLAC_FALSE;
+    }
+
+    signbit = ((result >> (bitCount-1)) & 0x01);
+    result |= (~signbit + 1) << bitCount;
+
+    *pResultOut = (drflac_int64)result;
+    return DRFLAC_TRUE;
+}
+#endif
+
+static drflac_bool32 drflac__read_uint16(drflac_bs* bs, unsigned int bitCount, drflac_uint16* pResult)
+{
+    drflac_uint32 result;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(pResult != NULL);
+    DRFLAC_ASSERT(bitCount > 0);
+    DRFLAC_ASSERT(bitCount <= 16);
+
+    if (!drflac__read_uint32(bs, bitCount, &result)) {
+        return DRFLAC_FALSE;
+    }
+
+    *pResult = (drflac_uint16)result;
+    return DRFLAC_TRUE;
+}
+
+#if 0
+static drflac_bool32 drflac__read_int16(drflac_bs* bs, unsigned int bitCount, drflac_int16* pResult)
+{
+    drflac_int32 result;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(pResult != NULL);
+    DRFLAC_ASSERT(bitCount > 0);
+    DRFLAC_ASSERT(bitCount <= 16);
+
+    if (!drflac__read_int32(bs, bitCount, &result)) {
+        return DRFLAC_FALSE;
+    }
+
+    *pResult = (drflac_int16)result;
+    return DRFLAC_TRUE;
+}
+#endif
+
+static drflac_bool32 drflac__read_uint8(drflac_bs* bs, unsigned int bitCount, drflac_uint8* pResult)
+{
+    drflac_uint32 result;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(pResult != NULL);
+    DRFLAC_ASSERT(bitCount > 0);
+    DRFLAC_ASSERT(bitCount <= 8);
+
+    if (!drflac__read_uint32(bs, bitCount, &result)) {
+        return DRFLAC_FALSE;
+    }
+
+    *pResult = (drflac_uint8)result;
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__read_int8(drflac_bs* bs, unsigned int bitCount, drflac_int8* pResult)
+{
+    drflac_int32 result;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(pResult != NULL);
+    DRFLAC_ASSERT(bitCount > 0);
+    DRFLAC_ASSERT(bitCount <= 8);
+
+    if (!drflac__read_int32(bs, bitCount, &result)) {
+        return DRFLAC_FALSE;
+    }
+
+    *pResult = (drflac_int8)result;
+    return DRFLAC_TRUE;
+}
+
+
+static drflac_bool32 drflac__seek_bits(drflac_bs* bs, size_t bitsToSeek)
+{
+    if (bitsToSeek <= DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
+        bs->consumedBits += (drflac_uint32)bitsToSeek;
+        bs->cache <<= bitsToSeek;
+        return DRFLAC_TRUE;
+    } else {
+        /* It straddles the cached data. This function isn't called too frequently so I'm favouring simplicity here. */
+        bitsToSeek       -= DRFLAC_CACHE_L1_BITS_REMAINING(bs);
+        bs->consumedBits += DRFLAC_CACHE_L1_BITS_REMAINING(bs);
+        bs->cache         = 0;
+
+        /* Simple case. Seek in groups of the same number as bits that fit within a cache line. */
+#ifdef DRFLAC_64BIT
+        while (bitsToSeek >= DRFLAC_CACHE_L1_SIZE_BITS(bs)) {
+            drflac_uint64 bin;
+            if (!drflac__read_uint64(bs, DRFLAC_CACHE_L1_SIZE_BITS(bs), &bin)) {
+                return DRFLAC_FALSE;
+            }
+            bitsToSeek -= DRFLAC_CACHE_L1_SIZE_BITS(bs);
+        }
+#else
+        while (bitsToSeek >= DRFLAC_CACHE_L1_SIZE_BITS(bs)) {
+            drflac_uint32 bin;
+            if (!drflac__read_uint32(bs, DRFLAC_CACHE_L1_SIZE_BITS(bs), &bin)) {
+                return DRFLAC_FALSE;
+            }
+            bitsToSeek -= DRFLAC_CACHE_L1_SIZE_BITS(bs);
+        }
+#endif
+
+        /* Whole leftover bytes. */
+        while (bitsToSeek >= 8) {
+            drflac_uint8 bin;
+            if (!drflac__read_uint8(bs, 8, &bin)) {
+                return DRFLAC_FALSE;
+            }
+            bitsToSeek -= 8;
+        }
+
+        /* Leftover bits. */
+        if (bitsToSeek > 0) {
+            drflac_uint8 bin;
+            if (!drflac__read_uint8(bs, (drflac_uint32)bitsToSeek, &bin)) {
+                return DRFLAC_FALSE;
+            }
+            bitsToSeek = 0; /* <-- Necessary for the assert below. */
+        }
+
+        DRFLAC_ASSERT(bitsToSeek == 0);
+        return DRFLAC_TRUE;
+    }
+}
+
+
+/* This function moves the bit streamer to the first bit after the sync code (bit 15 of the of the frame header). It will also update the CRC-16. */
+static drflac_bool32 drflac__find_and_seek_to_next_sync_code(drflac_bs* bs)
+{
+    DRFLAC_ASSERT(bs != NULL);
+
+    /*
+    The sync code is always aligned to 8 bits. This is convenient for us because it means we can do byte-aligned movements. The first
+    thing to do is align to the next byte.
+    */
+    if (!drflac__seek_bits(bs, DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7)) {
+        return DRFLAC_FALSE;
+    }
+
+    for (;;) {
+        drflac_uint8 hi;
+
+#ifndef DR_FLAC_NO_CRC
+        drflac__reset_crc16(bs);
+#endif
+
+        if (!drflac__read_uint8(bs, 8, &hi)) {
+            return DRFLAC_FALSE;
+        }
+
+        if (hi == 0xFF) {
+            drflac_uint8 lo;
+            if (!drflac__read_uint8(bs, 6, &lo)) {
+                return DRFLAC_FALSE;
+            }
+
+            if (lo == 0x3E) {
+                return DRFLAC_TRUE;
+            } else {
+                if (!drflac__seek_bits(bs, DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7)) {
+                    return DRFLAC_FALSE;
+                }
+            }
+        }
+    }
+
+    /* Should never get here. */
+    /*return DRFLAC_FALSE;*/
+}
+
+
+#if defined(DRFLAC_HAS_LZCNT_INTRINSIC)
+#define DRFLAC_IMPLEMENT_CLZ_LZCNT
+#endif
+#if  defined(_MSC_VER) && _MSC_VER >= 1400 && (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(__clang__)
+#define DRFLAC_IMPLEMENT_CLZ_MSVC
+#endif
+
+static DRFLAC_INLINE drflac_uint32 drflac__clz_software(drflac_cache_t x)
+{
+    drflac_uint32 n;
+    static drflac_uint32 clz_table_4[] = {
+        0,
+        4,
+        3, 3,
+        2, 2, 2, 2,
+        1, 1, 1, 1, 1, 1, 1, 1
+    };
+
+    if (x == 0) {
+        return sizeof(x)*8;
+    }
+
+    n = clz_table_4[x >> (sizeof(x)*8 - 4)];
+    if (n == 0) {
+#ifdef DRFLAC_64BIT
+        if ((x & ((drflac_uint64)0xFFFFFFFF << 32)) == 0) { n  = 32; x <<= 32; }
+        if ((x & ((drflac_uint64)0xFFFF0000 << 32)) == 0) { n += 16; x <<= 16; }
+        if ((x & ((drflac_uint64)0xFF000000 << 32)) == 0) { n += 8;  x <<= 8;  }
+        if ((x & ((drflac_uint64)0xF0000000 << 32)) == 0) { n += 4;  x <<= 4;  }
+#else
+        if ((x & 0xFFFF0000) == 0) { n  = 16; x <<= 16; }
+        if ((x & 0xFF000000) == 0) { n += 8;  x <<= 8;  }
+        if ((x & 0xF0000000) == 0) { n += 4;  x <<= 4;  }
+#endif
+        n += clz_table_4[x >> (sizeof(x)*8 - 4)];
+    }
+
+    return n - 1;
+}
+
+#ifdef DRFLAC_IMPLEMENT_CLZ_LZCNT
+static DRFLAC_INLINE drflac_bool32 drflac__is_lzcnt_supported(void)
+{
+    /* Fast compile time check for ARM. */
+#if defined(DRFLAC_HAS_LZCNT_INTRINSIC) && defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5)
+    return DRFLAC_TRUE;
+#else
+    /* If the compiler itself does not support the intrinsic then we'll need to return false. */
+    #ifdef DRFLAC_HAS_LZCNT_INTRINSIC
+        return drflac__gIsLZCNTSupported;
+    #else
+        return DRFLAC_FALSE;
+    #endif
+#endif
+}
+
+static DRFLAC_INLINE drflac_uint32 drflac__clz_lzcnt(drflac_cache_t x)
+{
+    /*
+    It's critical for competitive decoding performance that this function be highly optimal. With MSVC we can use the __lzcnt64() and __lzcnt() intrinsics
+    to achieve good performance, however on GCC and Clang it's a little bit more annoying. The __builtin_clzl() and __builtin_clzll() intrinsics leave
+    it undefined as to the return value when `x` is 0. We need this to be well defined as returning 32 or 64, depending on whether or not it's a 32- or
+    64-bit build. To work around this we would need to add a conditional to check for the x = 0 case, but this creates unnecessary inefficiency. To work
+    around this problem I have written some inline assembly to emit the LZCNT (x86) or CLZ (ARM) instruction directly which removes the need to include
+    the conditional. This has worked well in the past, but for some reason Clang's MSVC compatible driver, clang-cl, does not seem to be handling this
+    in the same way as the normal Clang driver. It seems that `clang-cl` is just outputting the wrong results sometimes, maybe due to some register
+    getting clobbered?
+
+    I'm not sure if this is a bug with dr_flac's inlined assembly (most likely), a bug in `clang-cl` or just a misunderstanding on my part with inline
+    assembly rules for `clang-cl`. If somebody can identify an error in dr_flac's inlined assembly I'm happy to get that fixed.
+
+    Fortunately there is an easy workaround for this. Clang implements MSVC-specific intrinsics for compatibility. It also defines _MSC_VER for extra
+    compatibility. We can therefore just check for _MSC_VER and use the MSVC intrinsic which, fortunately for us, Clang supports. It would still be nice
+    to know how to fix the inlined assembly for correctness sake, however.
+    */
+
+#if defined(_MSC_VER) /*&& !defined(__clang__)*/    /* <-- Intentionally wanting Clang to use the MSVC __lzcnt64/__lzcnt intrinsics due to above ^. */
+    #ifdef DRFLAC_64BIT
+        return (drflac_uint32)__lzcnt64(x);
+    #else
+        return (drflac_uint32)__lzcnt(x);
+    #endif
+#else
+    #if defined(__GNUC__) || defined(__clang__)
+        #if defined(DRFLAC_X64)
+            {
+                drflac_uint64 r;
+                __asm__ __volatile__ (
+                    "lzcnt{ %1, %0| %0, %1}" : "=r"(r) : "r"(x) : "cc"
+                );
+
+                return (drflac_uint32)r;
+            }
+        #elif defined(DRFLAC_X86)
+            {
+                drflac_uint32 r;
+                __asm__ __volatile__ (
+                    "lzcnt{l %1, %0| %0, %1}" : "=r"(r) : "r"(x) : "cc"
+                );
+
+                return r;
+            }
+        #elif defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5) && !defined(DRFLAC_64BIT)   /* <-- I haven't tested 64-bit inline assembly, so only enabling this for the 32-bit build for now. */
+            {
+                unsigned int r;
+                __asm__ __volatile__ (
+                #if defined(DRFLAC_64BIT)
+                    "clz %w[out], %w[in]" : [out]"=r"(r) : [in]"r"(x)   /* <-- This is untested. If someone in the community could test this, that would be appreciated! */
+                #else
+                    "clz %[out], %[in]" : [out]"=r"(r) : [in]"r"(x)
+                #endif
+                );
+
+                return r;
+            }
+        #else
+            if (x == 0) {
+                return sizeof(x)*8;
+            }
+            #ifdef DRFLAC_64BIT
+                return (drflac_uint32)__builtin_clzll((drflac_uint64)x);
+            #else
+                return (drflac_uint32)__builtin_clzl((drflac_uint32)x);
+            #endif
+        #endif
+    #else
+        /* Unsupported compiler. */
+        #error "This compiler does not support the lzcnt intrinsic."
+    #endif
+#endif
+}
+#endif
+
+#ifdef DRFLAC_IMPLEMENT_CLZ_MSVC
+#include <intrin.h> /* For BitScanReverse(). */
+
+static DRFLAC_INLINE drflac_uint32 drflac__clz_msvc(drflac_cache_t x)
+{
+    drflac_uint32 n;
+
+    if (x == 0) {
+        return sizeof(x)*8;
+    }
+
+#ifdef DRFLAC_64BIT
+    _BitScanReverse64((unsigned long*)&n, x);
+#else
+    _BitScanReverse((unsigned long*)&n, x);
+#endif
+    return sizeof(x)*8 - n - 1;
+}
+#endif
+
+static DRFLAC_INLINE drflac_uint32 drflac__clz(drflac_cache_t x)
+{
+#ifdef DRFLAC_IMPLEMENT_CLZ_LZCNT
+    if (drflac__is_lzcnt_supported()) {
+        return drflac__clz_lzcnt(x);
+    } else
+#endif
+    {
+#ifdef DRFLAC_IMPLEMENT_CLZ_MSVC
+        return drflac__clz_msvc(x);
+#else
+        return drflac__clz_software(x);
+#endif
+    }
+}
+
+
+static DRFLAC_INLINE drflac_bool32 drflac__seek_past_next_set_bit(drflac_bs* bs, unsigned int* pOffsetOut)
+{
+    drflac_uint32 zeroCounter = 0;
+    drflac_uint32 setBitOffsetPlus1;
+
+    while (bs->cache == 0) {
+        zeroCounter += (drflac_uint32)DRFLAC_CACHE_L1_BITS_REMAINING(bs);
+        if (!drflac__reload_cache(bs)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    setBitOffsetPlus1 = drflac__clz(bs->cache);
+    setBitOffsetPlus1 += 1;
+
+    bs->consumedBits += setBitOffsetPlus1;
+    bs->cache <<= setBitOffsetPlus1;
+
+    *pOffsetOut = zeroCounter + setBitOffsetPlus1 - 1;
+    return DRFLAC_TRUE;
+}
+
+
+
+static drflac_bool32 drflac__seek_to_byte(drflac_bs* bs, drflac_uint64 offsetFromStart)
+{
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(offsetFromStart > 0);
+
+    /*
+    Seeking from the start is not quite as trivial as it sounds because the onSeek callback takes a signed 32-bit integer (which
+    is intentional because it simplifies the implementation of the onSeek callbacks), however offsetFromStart is unsigned 64-bit.
+    To resolve we just need to do an initial seek from the start, and then a series of offset seeks to make up the remainder.
+    */
+    if (offsetFromStart > 0x7FFFFFFF) {
+        drflac_uint64 bytesRemaining = offsetFromStart;
+        if (!bs->onSeek(bs->pUserData, 0x7FFFFFFF, drflac_seek_origin_start)) {
+            return DRFLAC_FALSE;
+        }
+        bytesRemaining -= 0x7FFFFFFF;
+
+        while (bytesRemaining > 0x7FFFFFFF) {
+            if (!bs->onSeek(bs->pUserData, 0x7FFFFFFF, drflac_seek_origin_current)) {
+                return DRFLAC_FALSE;
+            }
+            bytesRemaining -= 0x7FFFFFFF;
+        }
+
+        if (bytesRemaining > 0) {
+            if (!bs->onSeek(bs->pUserData, (int)bytesRemaining, drflac_seek_origin_current)) {
+                return DRFLAC_FALSE;
+            }
+        }
+    } else {
+        if (!bs->onSeek(bs->pUserData, (int)offsetFromStart, drflac_seek_origin_start)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    /* The cache should be reset to force a reload of fresh data from the client. */
+    drflac__reset_cache(bs);
+    return DRFLAC_TRUE;
+}
+
+
+static drflac_result drflac__read_utf8_coded_number(drflac_bs* bs, drflac_uint64* pNumberOut, drflac_uint8* pCRCOut)
+{
+    drflac_uint8 crc;
+    drflac_uint64 result;
+    drflac_uint8 utf8[7] = {0};
+    int byteCount;
+    int i;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(pNumberOut != NULL);
+    DRFLAC_ASSERT(pCRCOut != NULL);
+
+    crc = *pCRCOut;
+
+    if (!drflac__read_uint8(bs, 8, utf8)) {
+        *pNumberOut = 0;
+        return DRFLAC_AT_END;
+    }
+    crc = drflac_crc8(crc, utf8[0], 8);
+
+    if ((utf8[0] & 0x80) == 0) {
+        *pNumberOut = utf8[0];
+        *pCRCOut = crc;
+        return DRFLAC_SUCCESS;
+    }
+
+    /*byteCount = 1;*/
+    if ((utf8[0] & 0xE0) == 0xC0) {
+        byteCount = 2;
+    } else if ((utf8[0] & 0xF0) == 0xE0) {
+        byteCount = 3;
+    } else if ((utf8[0] & 0xF8) == 0xF0) {
+        byteCount = 4;
+    } else if ((utf8[0] & 0xFC) == 0xF8) {
+        byteCount = 5;
+    } else if ((utf8[0] & 0xFE) == 0xFC) {
+        byteCount = 6;
+    } else if ((utf8[0] & 0xFF) == 0xFE) {
+        byteCount = 7;
+    } else {
+        *pNumberOut = 0;
+        return DRFLAC_CRC_MISMATCH;     /* Bad UTF-8 encoding. */
+    }
+
+    /* Read extra bytes. */
+    DRFLAC_ASSERT(byteCount > 1);
+
+    result = (drflac_uint64)(utf8[0] & (0xFF >> (byteCount + 1)));
+    for (i = 1; i < byteCount; ++i) {
+        if (!drflac__read_uint8(bs, 8, utf8 + i)) {
+            *pNumberOut = 0;
+            return DRFLAC_AT_END;
+        }
+        crc = drflac_crc8(crc, utf8[i], 8);
+
+        result = (result << 6) | (utf8[i] & 0x3F);
+    }
+
+    *pNumberOut = result;
+    *pCRCOut = crc;
+    return DRFLAC_SUCCESS;
+}
+
+
+
+/*
+The next two functions are responsible for calculating the prediction.
+
+When the bits per sample is >16 we need to use 64-bit integer arithmetic because otherwise we'll run out of precision. It's
+safe to assume this will be slower on 32-bit platforms so we use a more optimal solution when the bits per sample is <=16.
+*/
+static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_32(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
+{
+    drflac_int32 prediction = 0;
+
+    DRFLAC_ASSERT(order <= 32);
+
+    /* 32-bit version. */
+
+    /* VC++ optimizes this to a single jmp. I've not yet verified this for other compilers. */
+    switch (order)
+    {
+    case 32: prediction += coefficients[31] * pDecodedSamples[-32];
+    case 31: prediction += coefficients[30] * pDecodedSamples[-31];
+    case 30: prediction += coefficients[29] * pDecodedSamples[-30];
+    case 29: prediction += coefficients[28] * pDecodedSamples[-29];
+    case 28: prediction += coefficients[27] * pDecodedSamples[-28];
+    case 27: prediction += coefficients[26] * pDecodedSamples[-27];
+    case 26: prediction += coefficients[25] * pDecodedSamples[-26];
+    case 25: prediction += coefficients[24] * pDecodedSamples[-25];
+    case 24: prediction += coefficients[23] * pDecodedSamples[-24];
+    case 23: prediction += coefficients[22] * pDecodedSamples[-23];
+    case 22: prediction += coefficients[21] * pDecodedSamples[-22];
+    case 21: prediction += coefficients[20] * pDecodedSamples[-21];
+    case 20: prediction += coefficients[19] * pDecodedSamples[-20];
+    case 19: prediction += coefficients[18] * pDecodedSamples[-19];
+    case 18: prediction += coefficients[17] * pDecodedSamples[-18];
+    case 17: prediction += coefficients[16] * pDecodedSamples[-17];
+    case 16: prediction += coefficients[15] * pDecodedSamples[-16];
+    case 15: prediction += coefficients[14] * pDecodedSamples[-15];
+    case 14: prediction += coefficients[13] * pDecodedSamples[-14];
+    case 13: prediction += coefficients[12] * pDecodedSamples[-13];
+    case 12: prediction += coefficients[11] * pDecodedSamples[-12];
+    case 11: prediction += coefficients[10] * pDecodedSamples[-11];
+    case 10: prediction += coefficients[ 9] * pDecodedSamples[-10];
+    case  9: prediction += coefficients[ 8] * pDecodedSamples[- 9];
+    case  8: prediction += coefficients[ 7] * pDecodedSamples[- 8];
+    case  7: prediction += coefficients[ 6] * pDecodedSamples[- 7];
+    case  6: prediction += coefficients[ 5] * pDecodedSamples[- 6];
+    case  5: prediction += coefficients[ 4] * pDecodedSamples[- 5];
+    case  4: prediction += coefficients[ 3] * pDecodedSamples[- 4];
+    case  3: prediction += coefficients[ 2] * pDecodedSamples[- 3];
+    case  2: prediction += coefficients[ 1] * pDecodedSamples[- 2];
+    case  1: prediction += coefficients[ 0] * pDecodedSamples[- 1];
+    }
+
+    return (drflac_int32)(prediction >> shift);
+}
+
+static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_64(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
+{
+    drflac_int64 prediction;
+
+    DRFLAC_ASSERT(order <= 32);
+
+    /* 64-bit version. */
+
+    /* This method is faster on the 32-bit build when compiling with VC++. See note below. */
+#ifndef DRFLAC_64BIT
+    if (order == 8)
+    {
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6];
+        prediction += coefficients[6] * (drflac_int64)pDecodedSamples[-7];
+        prediction += coefficients[7] * (drflac_int64)pDecodedSamples[-8];
+    }
+    else if (order == 7)
+    {
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6];
+        prediction += coefficients[6] * (drflac_int64)pDecodedSamples[-7];
+    }
+    else if (order == 3)
+    {
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
+    }
+    else if (order == 6)
+    {
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6];
+    }
+    else if (order == 5)
+    {
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5];
+    }
+    else if (order == 4)
+    {
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
+    }
+    else if (order == 12)
+    {
+        prediction  = coefficients[0]  * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1]  * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2]  * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3]  * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4]  * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5]  * (drflac_int64)pDecodedSamples[-6];
+        prediction += coefficients[6]  * (drflac_int64)pDecodedSamples[-7];
+        prediction += coefficients[7]  * (drflac_int64)pDecodedSamples[-8];
+        prediction += coefficients[8]  * (drflac_int64)pDecodedSamples[-9];
+        prediction += coefficients[9]  * (drflac_int64)pDecodedSamples[-10];
+        prediction += coefficients[10] * (drflac_int64)pDecodedSamples[-11];
+        prediction += coefficients[11] * (drflac_int64)pDecodedSamples[-12];
+    }
+    else if (order == 2)
+    {
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+    }
+    else if (order == 1)
+    {
+        prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+    }
+    else if (order == 10)
+    {
+        prediction  = coefficients[0]  * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1]  * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2]  * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3]  * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4]  * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5]  * (drflac_int64)pDecodedSamples[-6];
+        prediction += coefficients[6]  * (drflac_int64)pDecodedSamples[-7];
+        prediction += coefficients[7]  * (drflac_int64)pDecodedSamples[-8];
+        prediction += coefficients[8]  * (drflac_int64)pDecodedSamples[-9];
+        prediction += coefficients[9]  * (drflac_int64)pDecodedSamples[-10];
+    }
+    else if (order == 9)
+    {
+        prediction  = coefficients[0]  * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1]  * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2]  * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3]  * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4]  * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5]  * (drflac_int64)pDecodedSamples[-6];
+        prediction += coefficients[6]  * (drflac_int64)pDecodedSamples[-7];
+        prediction += coefficients[7]  * (drflac_int64)pDecodedSamples[-8];
+        prediction += coefficients[8]  * (drflac_int64)pDecodedSamples[-9];
+    }
+    else if (order == 11)
+    {
+        prediction  = coefficients[0]  * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1]  * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2]  * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3]  * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4]  * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5]  * (drflac_int64)pDecodedSamples[-6];
+        prediction += coefficients[6]  * (drflac_int64)pDecodedSamples[-7];
+        prediction += coefficients[7]  * (drflac_int64)pDecodedSamples[-8];
+        prediction += coefficients[8]  * (drflac_int64)pDecodedSamples[-9];
+        prediction += coefficients[9]  * (drflac_int64)pDecodedSamples[-10];
+        prediction += coefficients[10] * (drflac_int64)pDecodedSamples[-11];
+    }
+    else
+    {
+        int j;
+
+        prediction = 0;
+        for (j = 0; j < (int)order; ++j) {
+            prediction += coefficients[j] * (drflac_int64)pDecodedSamples[-j-1];
+        }
+    }
+#endif
+
+    /*
+    VC++ optimizes this to a single jmp instruction, but only the 64-bit build. The 32-bit build generates less efficient code for some
+    reason. The ugly version above is faster so we'll just switch between the two depending on the target platform.
+    */
+#ifdef DRFLAC_64BIT
+    prediction = 0;
+    switch (order)
+    {
+    case 32: prediction += coefficients[31] * (drflac_int64)pDecodedSamples[-32];
+    case 31: prediction += coefficients[30] * (drflac_int64)pDecodedSamples[-31];
+    case 30: prediction += coefficients[29] * (drflac_int64)pDecodedSamples[-30];
+    case 29: prediction += coefficients[28] * (drflac_int64)pDecodedSamples[-29];
+    case 28: prediction += coefficients[27] * (drflac_int64)pDecodedSamples[-28];
+    case 27: prediction += coefficients[26] * (drflac_int64)pDecodedSamples[-27];
+    case 26: prediction += coefficients[25] * (drflac_int64)pDecodedSamples[-26];
+    case 25: prediction += coefficients[24] * (drflac_int64)pDecodedSamples[-25];
+    case 24: prediction += coefficients[23] * (drflac_int64)pDecodedSamples[-24];
+    case 23: prediction += coefficients[22] * (drflac_int64)pDecodedSamples[-23];
+    case 22: prediction += coefficients[21] * (drflac_int64)pDecodedSamples[-22];
+    case 21: prediction += coefficients[20] * (drflac_int64)pDecodedSamples[-21];
+    case 20: prediction += coefficients[19] * (drflac_int64)pDecodedSamples[-20];
+    case 19: prediction += coefficients[18] * (drflac_int64)pDecodedSamples[-19];
+    case 18: prediction += coefficients[17] * (drflac_int64)pDecodedSamples[-18];
+    case 17: prediction += coefficients[16] * (drflac_int64)pDecodedSamples[-17];
+    case 16: prediction += coefficients[15] * (drflac_int64)pDecodedSamples[-16];
+    case 15: prediction += coefficients[14] * (drflac_int64)pDecodedSamples[-15];
+    case 14: prediction += coefficients[13] * (drflac_int64)pDecodedSamples[-14];
+    case 13: prediction += coefficients[12] * (drflac_int64)pDecodedSamples[-13];
+    case 12: prediction += coefficients[11] * (drflac_int64)pDecodedSamples[-12];
+    case 11: prediction += coefficients[10] * (drflac_int64)pDecodedSamples[-11];
+    case 10: prediction += coefficients[ 9] * (drflac_int64)pDecodedSamples[-10];
+    case  9: prediction += coefficients[ 8] * (drflac_int64)pDecodedSamples[- 9];
+    case  8: prediction += coefficients[ 7] * (drflac_int64)pDecodedSamples[- 8];
+    case  7: prediction += coefficients[ 6] * (drflac_int64)pDecodedSamples[- 7];
+    case  6: prediction += coefficients[ 5] * (drflac_int64)pDecodedSamples[- 6];
+    case  5: prediction += coefficients[ 4] * (drflac_int64)pDecodedSamples[- 5];
+    case  4: prediction += coefficients[ 3] * (drflac_int64)pDecodedSamples[- 4];
+    case  3: prediction += coefficients[ 2] * (drflac_int64)pDecodedSamples[- 3];
+    case  2: prediction += coefficients[ 1] * (drflac_int64)pDecodedSamples[- 2];
+    case  1: prediction += coefficients[ 0] * (drflac_int64)pDecodedSamples[- 1];
+    }
+#endif
+
+    return (drflac_int32)(prediction >> shift);
+}
+
+
+#if 0
+/*
+Reference implementation for reading and decoding samples with residual. This is intentionally left unoptimized for the
+sake of readability and should only be used as a reference.
+*/
+static drflac_bool32 drflac__decode_samples_with_residual__rice__reference(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    drflac_uint32 i;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(count > 0);
+    DRFLAC_ASSERT(pSamplesOut != NULL);
+
+    for (i = 0; i < count; ++i) {
+        drflac_uint32 zeroCounter = 0;
+        for (;;) {
+            drflac_uint8 bit;
+            if (!drflac__read_uint8(bs, 1, &bit)) {
+                return DRFLAC_FALSE;
+            }
+
+            if (bit == 0) {
+                zeroCounter += 1;
+            } else {
+                break;
+            }
+        }
+
+        drflac_uint32 decodedRice;
+        if (riceParam > 0) {
+            if (!drflac__read_uint32(bs, riceParam, &decodedRice)) {
+                return DRFLAC_FALSE;
+            }
+        } else {
+            decodedRice = 0;
+        }
+
+        decodedRice |= (zeroCounter << riceParam);
+        if ((decodedRice & 0x01)) {
+            decodedRice = ~(decodedRice >> 1);
+        } else {
+            decodedRice =  (decodedRice >> 1);
+        }
+
+
+        if (bitsPerSample+shift >= 32) {
+            pSamplesOut[i] = decodedRice + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + i);
+        } else {
+            pSamplesOut[i] = decodedRice + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + i);
+        }
+    }
+
+    return DRFLAC_TRUE;
+}
+#endif
+
+#if 0
+static drflac_bool32 drflac__read_rice_parts__reference(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut)
+{
+    drflac_uint32 zeroCounter = 0;
+    drflac_uint32 decodedRice;
+
+    for (;;) {
+        drflac_uint8 bit;
+        if (!drflac__read_uint8(bs, 1, &bit)) {
+            return DRFLAC_FALSE;
+        }
+
+        if (bit == 0) {
+            zeroCounter += 1;
+        } else {
+            break;
+        }
+    }
+
+    if (riceParam > 0) {
+        if (!drflac__read_uint32(bs, riceParam, &decodedRice)) {
+            return DRFLAC_FALSE;
+        }
+    } else {
+        decodedRice = 0;
+    }
+
+    *pZeroCounterOut = zeroCounter;
+    *pRiceParamPartOut = decodedRice;
+    return DRFLAC_TRUE;
+}
+#endif
+
+#if 0
+static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut)
+{
+    drflac_cache_t riceParamMask;
+    drflac_uint32 zeroCounter;
+    drflac_uint32 setBitOffsetPlus1;
+    drflac_uint32 riceParamPart;
+    drflac_uint32 riceLength;
+
+    DRFLAC_ASSERT(riceParam > 0);   /* <-- riceParam should never be 0. drflac__read_rice_parts__param_equals_zero() should be used instead for this case. */
+
+    riceParamMask = DRFLAC_CACHE_L1_SELECTION_MASK(riceParam);
+
+    zeroCounter = 0;
+    while (bs->cache == 0) {
+        zeroCounter += (drflac_uint32)DRFLAC_CACHE_L1_BITS_REMAINING(bs);
+        if (!drflac__reload_cache(bs)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    setBitOffsetPlus1 = drflac__clz(bs->cache);
+    zeroCounter += setBitOffsetPlus1;
+    setBitOffsetPlus1 += 1;
+
+    riceLength = setBitOffsetPlus1 + riceParam;
+    if (riceLength < DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
+        riceParamPart = (drflac_uint32)((bs->cache & (riceParamMask >> setBitOffsetPlus1)) >> DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceLength));
+
+        bs->consumedBits += riceLength;
+        bs->cache <<= riceLength;
+    } else {
+        drflac_uint32 bitCountLo;
+        drflac_cache_t resultHi;
+
+        bs->consumedBits += riceLength;
+        bs->cache <<= setBitOffsetPlus1 & (DRFLAC_CACHE_L1_SIZE_BITS(bs)-1);    /* <-- Equivalent to "if (setBitOffsetPlus1 < DRFLAC_CACHE_L1_SIZE_BITS(bs)) { bs->cache <<= setBitOffsetPlus1; }" */
+
+        /* It straddles the cached data. It will never cover more than the next chunk. We just read the number in two parts and combine them. */
+        bitCountLo = bs->consumedBits - DRFLAC_CACHE_L1_SIZE_BITS(bs);
+        resultHi = DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, riceParam);  /* <-- Use DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE() if ever this function allows riceParam=0. */
+
+        if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
+#ifndef DR_FLAC_NO_CRC
+            drflac__update_crc16(bs);
+#endif
+            bs->cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
+            bs->consumedBits = 0;
+#ifndef DR_FLAC_NO_CRC
+            bs->crc16Cache = bs->cache;
+#endif
+        } else {
+            /* Slow path. We need to fetch more data from the client. */
+            if (!drflac__reload_cache(bs)) {
+                return DRFLAC_FALSE;
+            }
+        }
+
+        riceParamPart = (drflac_uint32)(resultHi | DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE(bs, bitCountLo));
+
+        bs->consumedBits += bitCountLo;
+        bs->cache <<= bitCountLo;
+    }
+
+    pZeroCounterOut[0] = zeroCounter;
+    pRiceParamPartOut[0] = riceParamPart;
+
+    return DRFLAC_TRUE;
+}
+#endif
+
+static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts_x1(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut)
+{
+    drflac_uint32  riceParamPlus1 = riceParam + 1;
+    /*drflac_cache_t riceParamPlus1Mask  = DRFLAC_CACHE_L1_SELECTION_MASK(riceParamPlus1);*/
+    drflac_uint32  riceParamPlus1Shift = DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceParamPlus1);
+    drflac_uint32  riceParamPlus1MaxConsumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs) - riceParamPlus1;
+
+    /*
+    The idea here is to use local variables for the cache in an attempt to encourage the compiler to store them in registers. I have
+    no idea how this will work in practice...
+    */
+    drflac_cache_t bs_cache = bs->cache;
+    drflac_uint32  bs_consumedBits = bs->consumedBits;
+
+    /* The first thing to do is find the first unset bit. Most likely a bit will be set in the current cache line. */
+    drflac_uint32  lzcount = drflac__clz(bs_cache);
+    if (lzcount < sizeof(bs_cache)*8) {
+        pZeroCounterOut[0] = lzcount;
+
+        /*
+        It is most likely that the riceParam part (which comes after the zero counter) is also on this cache line. When extracting
+        this, we include the set bit from the unary coded part because it simplifies cache management. This bit will be handled
+        outside of this function at a higher level.
+        */
+    extract_rice_param_part:
+        bs_cache       <<= lzcount;
+        bs_consumedBits += lzcount;
+
+        if (bs_consumedBits <= riceParamPlus1MaxConsumedBits) {
+            /* Getting here means the rice parameter part is wholly contained within the current cache line. */
+            pRiceParamPartOut[0] = (drflac_uint32)(bs_cache >> riceParamPlus1Shift);
+            bs_cache       <<= riceParamPlus1;
+            bs_consumedBits += riceParamPlus1;
+        } else {
+            drflac_uint32 riceParamPartHi;
+            drflac_uint32 riceParamPartLo;
+            drflac_uint32 riceParamPartLoBitCount;
+
+            /*
+            Getting here means the rice parameter part straddles the cache line. We need to read from the tail of the current cache
+            line, reload the cache, and then combine it with the head of the next cache line.
+            */
+
+            /* Grab the high part of the rice parameter part. */
+            riceParamPartHi = (drflac_uint32)(bs_cache >> riceParamPlus1Shift);
+
+            /* Before reloading the cache we need to grab the size in bits of the low part. */
+            riceParamPartLoBitCount = bs_consumedBits - riceParamPlus1MaxConsumedBits;
+            DRFLAC_ASSERT(riceParamPartLoBitCount > 0 && riceParamPartLoBitCount < 32);
+
+            /* Now reload the cache. */
+            if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
+            #ifndef DR_FLAC_NO_CRC
+                drflac__update_crc16(bs);
+            #endif
+                bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
+                bs_consumedBits = riceParamPartLoBitCount;
+            #ifndef DR_FLAC_NO_CRC
+                bs->crc16Cache = bs_cache;
+            #endif
+            } else {
+                /* Slow path. We need to fetch more data from the client. */
+                if (!drflac__reload_cache(bs)) {
+                    return DRFLAC_FALSE;
+                }
+
+                bs_cache = bs->cache;
+                bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount;
+            }
+
+            /* We should now have enough information to construct the rice parameter part. */
+            riceParamPartLo = (drflac_uint32)(bs_cache >> (DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceParamPartLoBitCount)));
+            pRiceParamPartOut[0] = riceParamPartHi | riceParamPartLo;
+
+            bs_cache <<= riceParamPartLoBitCount;
+        }
+    } else {
+        /*
+        Getting here means there are no bits set on the cache line. This is a less optimal case because we just wasted a call
+        to drflac__clz() and we need to reload the cache.
+        */
+        drflac_uint32 zeroCounter = (drflac_uint32)(DRFLAC_CACHE_L1_SIZE_BITS(bs) - bs_consumedBits);
+        for (;;) {
+            if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
+            #ifndef DR_FLAC_NO_CRC
+                drflac__update_crc16(bs);
+            #endif
+                bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
+                bs_consumedBits = 0;
+            #ifndef DR_FLAC_NO_CRC
+                bs->crc16Cache = bs_cache;
+            #endif
+            } else {
+                /* Slow path. We need to fetch more data from the client. */
+                if (!drflac__reload_cache(bs)) {
+                    return DRFLAC_FALSE;
+                }
+
+                bs_cache = bs->cache;
+                bs_consumedBits = bs->consumedBits;
+            }
+
+            lzcount = drflac__clz(bs_cache);
+            zeroCounter += lzcount;
+
+            if (lzcount < sizeof(bs_cache)*8) {
+                break;
+            }
+        }
+
+        pZeroCounterOut[0] = zeroCounter;
+        goto extract_rice_param_part;
+    }
+
+    /* Make sure the cache is restored at the end of it all. */
+    bs->cache = bs_cache;
+    bs->consumedBits = bs_consumedBits;
+
+    return DRFLAC_TRUE;
+}
+
+static DRFLAC_INLINE drflac_bool32 drflac__seek_rice_parts(drflac_bs* bs, drflac_uint8 riceParam)
+{
+    drflac_uint32  riceParamPlus1 = riceParam + 1;
+    drflac_uint32  riceParamPlus1MaxConsumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs) - riceParamPlus1;
+
+    /*
+    The idea here is to use local variables for the cache in an attempt to encourage the compiler to store them in registers. I have
+    no idea how this will work in practice...
+    */
+    drflac_cache_t bs_cache = bs->cache;
+    drflac_uint32  bs_consumedBits = bs->consumedBits;
+
+    /* The first thing to do is find the first unset bit. Most likely a bit will be set in the current cache line. */
+    drflac_uint32  lzcount = drflac__clz(bs_cache);
+    if (lzcount < sizeof(bs_cache)*8) {
+        /*
+        It is most likely that the riceParam part (which comes after the zero counter) is also on this cache line. When extracting
+        this, we include the set bit from the unary coded part because it simplifies cache management. This bit will be handled
+        outside of this function at a higher level.
+        */
+    extract_rice_param_part:
+        bs_cache       <<= lzcount;
+        bs_consumedBits += lzcount;
+
+        if (bs_consumedBits <= riceParamPlus1MaxConsumedBits) {
+            /* Getting here means the rice parameter part is wholly contained within the current cache line. */
+            bs_cache       <<= riceParamPlus1;
+            bs_consumedBits += riceParamPlus1;
+        } else {
+            /*
+            Getting here means the rice parameter part straddles the cache line. We need to read from the tail of the current cache
+            line, reload the cache, and then combine it with the head of the next cache line.
+            */
+
+            /* Before reloading the cache we need to grab the size in bits of the low part. */
+            drflac_uint32 riceParamPartLoBitCount = bs_consumedBits - riceParamPlus1MaxConsumedBits;
+            DRFLAC_ASSERT(riceParamPartLoBitCount > 0 && riceParamPartLoBitCount < 32);
+
+            /* Now reload the cache. */
+            if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
+            #ifndef DR_FLAC_NO_CRC
+                drflac__update_crc16(bs);
+            #endif
+                bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
+                bs_consumedBits = riceParamPartLoBitCount;
+            #ifndef DR_FLAC_NO_CRC
+                bs->crc16Cache = bs_cache;
+            #endif
+            } else {
+                /* Slow path. We need to fetch more data from the client. */
+                if (!drflac__reload_cache(bs)) {
+                    return DRFLAC_FALSE;
+                }
+
+                bs_cache = bs->cache;
+                bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount;
+            }
+
+            bs_cache <<= riceParamPartLoBitCount;
+        }
+    } else {
+        /*
+        Getting here means there are no bits set on the cache line. This is a less optimal case because we just wasted a call
+        to drflac__clz() and we need to reload the cache.
+        */
+        for (;;) {
+            if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
+            #ifndef DR_FLAC_NO_CRC
+                drflac__update_crc16(bs);
+            #endif
+                bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
+                bs_consumedBits = 0;
+            #ifndef DR_FLAC_NO_CRC
+                bs->crc16Cache = bs_cache;
+            #endif
+            } else {
+                /* Slow path. We need to fetch more data from the client. */
+                if (!drflac__reload_cache(bs)) {
+                    return DRFLAC_FALSE;
+                }
+
+                bs_cache = bs->cache;
+                bs_consumedBits = bs->consumedBits;
+            }
+
+            lzcount = drflac__clz(bs_cache);
+            if (lzcount < sizeof(bs_cache)*8) {
+                break;
+            }
+        }
+
+        goto extract_rice_param_part;
+    }
+
+    /* Make sure the cache is restored at the end of it all. */
+    bs->cache = bs_cache;
+    bs->consumedBits = bs_consumedBits;
+
+    return DRFLAC_TRUE;
+}
+
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar_zeroorder(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
+    drflac_uint32 zeroCountPart0;
+    drflac_uint32 riceParamPart0;
+    drflac_uint32 riceParamMask;
+    drflac_uint32 i;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(count > 0);
+    DRFLAC_ASSERT(pSamplesOut != NULL);
+
+    (void)bitsPerSample;
+    (void)order;
+    (void)shift;
+    (void)coefficients;
+
+    riceParamMask  = (drflac_uint32)~((~0UL) << riceParam);
+
+    i = 0;
+    while (i < count) {
+        /* Rice extraction. */
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0)) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Rice reconstruction. */
+        riceParamPart0 &= riceParamMask;
+        riceParamPart0 |= (zeroCountPart0 << riceParam);
+        riceParamPart0  = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
+
+        pSamplesOut[i] = riceParamPart0;
+
+        i += 1;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
+    drflac_uint32 zeroCountPart0 = 0;
+    drflac_uint32 zeroCountPart1 = 0;
+    drflac_uint32 zeroCountPart2 = 0;
+    drflac_uint32 zeroCountPart3 = 0;
+    drflac_uint32 riceParamPart0 = 0;
+    drflac_uint32 riceParamPart1 = 0;
+    drflac_uint32 riceParamPart2 = 0;
+    drflac_uint32 riceParamPart3 = 0;
+    drflac_uint32 riceParamMask;
+    const drflac_int32* pSamplesOutEnd;
+    drflac_uint32 i;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(count > 0);
+    DRFLAC_ASSERT(pSamplesOut != NULL);
+
+    if (order == 0) {
+        return drflac__decode_samples_with_residual__rice__scalar_zeroorder(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+    }
+
+    riceParamMask  = (drflac_uint32)~((~0UL) << riceParam);
+    pSamplesOutEnd = pSamplesOut + (count & ~3);
+
+    if (bitsPerSample+shift > 32) {
+        while (pSamplesOut < pSamplesOutEnd) {
+            /*
+            Rice extraction. It's faster to do this one at a time against local variables than it is to use the x4 version
+            against an array. Not sure why, but perhaps it's making more efficient use of registers?
+            */
+            if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart1, &riceParamPart1) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart2, &riceParamPart2) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart3, &riceParamPart3)) {
+                return DRFLAC_FALSE;
+            }
+
+            riceParamPart0 &= riceParamMask;
+            riceParamPart1 &= riceParamMask;
+            riceParamPart2 &= riceParamMask;
+            riceParamPart3 &= riceParamMask;
+
+            riceParamPart0 |= (zeroCountPart0 << riceParam);
+            riceParamPart1 |= (zeroCountPart1 << riceParam);
+            riceParamPart2 |= (zeroCountPart2 << riceParam);
+            riceParamPart3 |= (zeroCountPart3 << riceParam);
+
+            riceParamPart0  = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
+            riceParamPart1  = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01];
+            riceParamPart2  = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01];
+            riceParamPart3  = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01];
+
+            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0);
+            pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 1);
+            pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 2);
+            pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 3);
+
+            pSamplesOut += 4;
+        }
+    } else {
+        while (pSamplesOut < pSamplesOutEnd) {
+            if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart1, &riceParamPart1) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart2, &riceParamPart2) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart3, &riceParamPart3)) {
+                return DRFLAC_FALSE;
+            }
+
+            riceParamPart0 &= riceParamMask;
+            riceParamPart1 &= riceParamMask;
+            riceParamPart2 &= riceParamMask;
+            riceParamPart3 &= riceParamMask;
+
+            riceParamPart0 |= (zeroCountPart0 << riceParam);
+            riceParamPart1 |= (zeroCountPart1 << riceParam);
+            riceParamPart2 |= (zeroCountPart2 << riceParam);
+            riceParamPart3 |= (zeroCountPart3 << riceParam);
+
+            riceParamPart0  = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
+            riceParamPart1  = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01];
+            riceParamPart2  = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01];
+            riceParamPart3  = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01];
+
+            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0);
+            pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 1);
+            pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 2);
+            pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 3);
+
+            pSamplesOut += 4;
+        }
+    }
+
+    i = (count & ~3);
+    while (i < count) {
+        /* Rice extraction. */
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0)) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Rice reconstruction. */
+        riceParamPart0 &= riceParamMask;
+        riceParamPart0 |= (zeroCountPart0 << riceParam);
+        riceParamPart0  = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
+        /*riceParamPart0  = (riceParamPart0 >> 1) ^ (~(riceParamPart0 & 0x01) + 1);*/
+
+        /* Sample reconstruction. */
+        if (bitsPerSample+shift > 32) {
+            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0);
+        } else {
+            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0);
+        }
+
+        i += 1;
+        pSamplesOut += 1;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE __m128i drflac__mm_packs_interleaved_epi32(__m128i a, __m128i b)
+{
+    __m128i r;
+
+    /* Pack. */
+    r = _mm_packs_epi32(a, b);
+
+    /* a3a2 a1a0 b3b2 b1b0 -> a3a2 b3b2 a1a0 b1b0 */
+    r = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 1, 2, 0));
+
+    /* a3a2 b3b2 a1a0 b1b0 -> a3b3 a2b2 a1b1 a0b0 */
+    r = _mm_shufflehi_epi16(r, _MM_SHUFFLE(3, 1, 2, 0));
+    r = _mm_shufflelo_epi16(r, _MM_SHUFFLE(3, 1, 2, 0));
+
+    return r;
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_SSE41)
+static DRFLAC_INLINE __m128i drflac__mm_not_si128(__m128i a)
+{
+    return _mm_xor_si128(a, _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()));
+}
+
+static DRFLAC_INLINE __m128i drflac__mm_hadd_epi32(__m128i x)
+{
+    __m128i x64 = _mm_add_epi32(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2)));
+    __m128i x32 = _mm_shufflelo_epi16(x64, _MM_SHUFFLE(1, 0, 3, 2));
+    return _mm_add_epi32(x64, x32);
+}
+
+static DRFLAC_INLINE __m128i drflac__mm_hadd_epi64(__m128i x)
+{
+    return _mm_add_epi64(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2)));
+}
+
+static DRFLAC_INLINE __m128i drflac__mm_srai_epi64(__m128i x, int count)
+{
+    /*
+    To simplify this we are assuming count < 32. This restriction allows us to work on a low side and a high side. The low side
+    is shifted with zero bits, whereas the right side is shifted with sign bits.
+    */
+    __m128i lo = _mm_srli_epi64(x, count);
+    __m128i hi = _mm_srai_epi32(x, count);
+
+    hi = _mm_and_si128(hi, _mm_set_epi32(0xFFFFFFFF, 0, 0xFFFFFFFF, 0));    /* The high part needs to have the low part cleared. */
+
+    return _mm_or_si128(lo, hi);
+}
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41_32(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    int i;
+    drflac_uint32 riceParamMask;
+    drflac_int32* pDecodedSamples    = pSamplesOut;
+    drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3);
+    drflac_uint32 zeroCountParts0 = 0;
+    drflac_uint32 zeroCountParts1 = 0;
+    drflac_uint32 zeroCountParts2 = 0;
+    drflac_uint32 zeroCountParts3 = 0;
+    drflac_uint32 riceParamParts0 = 0;
+    drflac_uint32 riceParamParts1 = 0;
+    drflac_uint32 riceParamParts2 = 0;
+    drflac_uint32 riceParamParts3 = 0;
+    __m128i coefficients128_0;
+    __m128i coefficients128_4;
+    __m128i coefficients128_8;
+    __m128i samples128_0;
+    __m128i samples128_4;
+    __m128i samples128_8;
+    __m128i riceParamMask128;
+
+    const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
+
+    riceParamMask    = (drflac_uint32)~((~0UL) << riceParam);
+    riceParamMask128 = _mm_set1_epi32(riceParamMask);
+
+    /* Pre-load. */
+    coefficients128_0 = _mm_setzero_si128();
+    coefficients128_4 = _mm_setzero_si128();
+    coefficients128_8 = _mm_setzero_si128();
+
+    samples128_0 = _mm_setzero_si128();
+    samples128_4 = _mm_setzero_si128();
+    samples128_8 = _mm_setzero_si128();
+
+    /*
+    Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than
+    what's available in the input buffers. It would be convenient to use a fall-through switch to do this, but this results
+    in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted
+    so I think there's opportunity for this to be simplified.
+    */
+#if 1
+    {
+        int runningOrder = order;
+
+        /* 0 - 3. */
+        if (runningOrder >= 4) {
+            coefficients128_0 = _mm_loadu_si128((const __m128i*)(coefficients + 0));
+            samples128_0      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 4));
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: coefficients128_0 = _mm_set_epi32(0, coefficients[2], coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], pSamplesOut[-3], 0); break;
+                case 2: coefficients128_0 = _mm_set_epi32(0, 0,               coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], 0,               0); break;
+                case 1: coefficients128_0 = _mm_set_epi32(0, 0,               0,               coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], 0,               0,               0); break;
+            }
+            runningOrder = 0;
+        }
+
+        /* 4 - 7 */
+        if (runningOrder >= 4) {
+            coefficients128_4 = _mm_loadu_si128((const __m128i*)(coefficients + 4));
+            samples128_4      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 8));
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: coefficients128_4 = _mm_set_epi32(0, coefficients[6], coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], pSamplesOut[-7], 0); break;
+                case 2: coefficients128_4 = _mm_set_epi32(0, 0,               coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], 0,               0); break;
+                case 1: coefficients128_4 = _mm_set_epi32(0, 0,               0,               coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], 0,               0,               0); break;
+            }
+            runningOrder = 0;
+        }
+
+        /* 8 - 11 */
+        if (runningOrder == 4) {
+            coefficients128_8 = _mm_loadu_si128((const __m128i*)(coefficients + 8));
+            samples128_8      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 12));
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: coefficients128_8 = _mm_set_epi32(0, coefficients[10], coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], pSamplesOut[-11], 0); break;
+                case 2: coefficients128_8 = _mm_set_epi32(0, 0,                coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], 0,                0); break;
+                case 1: coefficients128_8 = _mm_set_epi32(0, 0,                0,               coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], 0,                0,                0); break;
+            }
+            runningOrder = 0;
+        }
+
+        /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */
+        coefficients128_0 = _mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(0, 1, 2, 3));
+        coefficients128_4 = _mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(0, 1, 2, 3));
+        coefficients128_8 = _mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(0, 1, 2, 3));
+    }
+#else
+    /* This causes strict-aliasing warnings with GCC. */
+    switch (order)
+    {
+    case 12: ((drflac_int32*)&coefficients128_8)[0] = coefficients[11]; ((drflac_int32*)&samples128_8)[0] = pDecodedSamples[-12];
+    case 11: ((drflac_int32*)&coefficients128_8)[1] = coefficients[10]; ((drflac_int32*)&samples128_8)[1] = pDecodedSamples[-11];
+    case 10: ((drflac_int32*)&coefficients128_8)[2] = coefficients[ 9]; ((drflac_int32*)&samples128_8)[2] = pDecodedSamples[-10];
+    case 9:  ((drflac_int32*)&coefficients128_8)[3] = coefficients[ 8]; ((drflac_int32*)&samples128_8)[3] = pDecodedSamples[- 9];
+    case 8:  ((drflac_int32*)&coefficients128_4)[0] = coefficients[ 7]; ((drflac_int32*)&samples128_4)[0] = pDecodedSamples[- 8];
+    case 7:  ((drflac_int32*)&coefficients128_4)[1] = coefficients[ 6]; ((drflac_int32*)&samples128_4)[1] = pDecodedSamples[- 7];
+    case 6:  ((drflac_int32*)&coefficients128_4)[2] = coefficients[ 5]; ((drflac_int32*)&samples128_4)[2] = pDecodedSamples[- 6];
+    case 5:  ((drflac_int32*)&coefficients128_4)[3] = coefficients[ 4]; ((drflac_int32*)&samples128_4)[3] = pDecodedSamples[- 5];
+    case 4:  ((drflac_int32*)&coefficients128_0)[0] = coefficients[ 3]; ((drflac_int32*)&samples128_0)[0] = pDecodedSamples[- 4];
+    case 3:  ((drflac_int32*)&coefficients128_0)[1] = coefficients[ 2]; ((drflac_int32*)&samples128_0)[1] = pDecodedSamples[- 3];
+    case 2:  ((drflac_int32*)&coefficients128_0)[2] = coefficients[ 1]; ((drflac_int32*)&samples128_0)[2] = pDecodedSamples[- 2];
+    case 1:  ((drflac_int32*)&coefficients128_0)[3] = coefficients[ 0]; ((drflac_int32*)&samples128_0)[3] = pDecodedSamples[- 1];
+    }
+#endif
+
+    /* For this version we are doing one sample at a time. */
+    while (pDecodedSamples < pDecodedSamplesEnd) {
+        __m128i prediction128;
+        __m128i zeroCountPart128;
+        __m128i riceParamPart128;
+
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts1, &riceParamParts1) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts2, &riceParamParts2) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts3, &riceParamParts3)) {
+            return DRFLAC_FALSE;
+        }
+
+        zeroCountPart128 = _mm_set_epi32(zeroCountParts3, zeroCountParts2, zeroCountParts1, zeroCountParts0);
+        riceParamPart128 = _mm_set_epi32(riceParamParts3, riceParamParts2, riceParamParts1, riceParamParts0);
+
+        riceParamPart128 = _mm_and_si128(riceParamPart128, riceParamMask128);
+        riceParamPart128 = _mm_or_si128(riceParamPart128, _mm_slli_epi32(zeroCountPart128, riceParam));
+        riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_add_epi32(drflac__mm_not_si128(_mm_and_si128(riceParamPart128, _mm_set1_epi32(0x01))), _mm_set1_epi32(0x01)));  /* <-- SSE2 compatible */
+        /*riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_mullo_epi32(_mm_and_si128(riceParamPart128, _mm_set1_epi32(0x01)), _mm_set1_epi32(0xFFFFFFFF)));*/   /* <-- Only supported from SSE4.1 and is slower in my testing... */
+
+        if (order <= 4) {
+            for (i = 0; i < 4; i += 1) {
+                prediction128 = _mm_mullo_epi32(coefficients128_0, samples128_0);
+
+                /* Horizontal add and shift. */
+                prediction128 = drflac__mm_hadd_epi32(prediction128);
+                prediction128 = _mm_srai_epi32(prediction128, shift);
+                prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
+
+                samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
+                riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
+            }
+        } else if (order <= 8) {
+            for (i = 0; i < 4; i += 1) {
+                prediction128 =                              _mm_mullo_epi32(coefficients128_4, samples128_4);
+                prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_0, samples128_0));
+
+                /* Horizontal add and shift. */
+                prediction128 = drflac__mm_hadd_epi32(prediction128);
+                prediction128 = _mm_srai_epi32(prediction128, shift);
+                prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
+
+                samples128_4 = _mm_alignr_epi8(samples128_0,  samples128_4, 4);
+                samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
+                riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
+            }
+        } else {
+            for (i = 0; i < 4; i += 1) {
+                prediction128 =                              _mm_mullo_epi32(coefficients128_8, samples128_8);
+                prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_4, samples128_4));
+                prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_0, samples128_0));
+
+                /* Horizontal add and shift. */
+                prediction128 = drflac__mm_hadd_epi32(prediction128);
+                prediction128 = _mm_srai_epi32(prediction128, shift);
+                prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
+
+                samples128_8 = _mm_alignr_epi8(samples128_4,  samples128_8, 4);
+                samples128_4 = _mm_alignr_epi8(samples128_0,  samples128_4, 4);
+                samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
+                riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
+            }
+        }
+
+        /* We store samples in groups of 4. */
+        _mm_storeu_si128((__m128i*)pDecodedSamples, samples128_0);
+        pDecodedSamples += 4;
+    }
+
+    /* Make sure we process the last few samples. */
+    i = (count & ~3);
+    while (i < (int)count) {
+        /* Rice extraction. */
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0)) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Rice reconstruction. */
+        riceParamParts0 &= riceParamMask;
+        riceParamParts0 |= (zeroCountParts0 << riceParam);
+        riceParamParts0  = (riceParamParts0 >> 1) ^ t[riceParamParts0 & 0x01];
+
+        /* Sample reconstruction. */
+        pDecodedSamples[0] = riceParamParts0 + drflac__calculate_prediction_32(order, shift, coefficients, pDecodedSamples);
+
+        i += 1;
+        pDecodedSamples += 1;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41_64(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    int i;
+    drflac_uint32 riceParamMask;
+    drflac_int32* pDecodedSamples    = pSamplesOut;
+    drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3);
+    drflac_uint32 zeroCountParts0 = 0;
+    drflac_uint32 zeroCountParts1 = 0;
+    drflac_uint32 zeroCountParts2 = 0;
+    drflac_uint32 zeroCountParts3 = 0;
+    drflac_uint32 riceParamParts0 = 0;
+    drflac_uint32 riceParamParts1 = 0;
+    drflac_uint32 riceParamParts2 = 0;
+    drflac_uint32 riceParamParts3 = 0;
+    __m128i coefficients128_0;
+    __m128i coefficients128_4;
+    __m128i coefficients128_8;
+    __m128i samples128_0;
+    __m128i samples128_4;
+    __m128i samples128_8;
+    __m128i prediction128;
+    __m128i riceParamMask128;
+
+    const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
+
+    DRFLAC_ASSERT(order <= 12);
+
+    riceParamMask    = (drflac_uint32)~((~0UL) << riceParam);
+    riceParamMask128 = _mm_set1_epi32(riceParamMask);
+
+    prediction128 = _mm_setzero_si128();
+
+    /* Pre-load. */
+    coefficients128_0  = _mm_setzero_si128();
+    coefficients128_4  = _mm_setzero_si128();
+    coefficients128_8  = _mm_setzero_si128();
+
+    samples128_0  = _mm_setzero_si128();
+    samples128_4  = _mm_setzero_si128();
+    samples128_8  = _mm_setzero_si128();
+
+#if 1
+    {
+        int runningOrder = order;
+
+        /* 0 - 3. */
+        if (runningOrder >= 4) {
+            coefficients128_0 = _mm_loadu_si128((const __m128i*)(coefficients + 0));
+            samples128_0      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 4));
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: coefficients128_0 = _mm_set_epi32(0, coefficients[2], coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], pSamplesOut[-3], 0); break;
+                case 2: coefficients128_0 = _mm_set_epi32(0, 0,               coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], 0,               0); break;
+                case 1: coefficients128_0 = _mm_set_epi32(0, 0,               0,               coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], 0,               0,               0); break;
+            }
+            runningOrder = 0;
+        }
+
+        /* 4 - 7 */
+        if (runningOrder >= 4) {
+            coefficients128_4 = _mm_loadu_si128((const __m128i*)(coefficients + 4));
+            samples128_4      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 8));
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: coefficients128_4 = _mm_set_epi32(0, coefficients[6], coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], pSamplesOut[-7], 0); break;
+                case 2: coefficients128_4 = _mm_set_epi32(0, 0,               coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], 0,               0); break;
+                case 1: coefficients128_4 = _mm_set_epi32(0, 0,               0,               coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], 0,               0,               0); break;
+            }
+            runningOrder = 0;
+        }
+
+        /* 8 - 11 */
+        if (runningOrder == 4) {
+            coefficients128_8 = _mm_loadu_si128((const __m128i*)(coefficients + 8));
+            samples128_8      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 12));
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: coefficients128_8 = _mm_set_epi32(0, coefficients[10], coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], pSamplesOut[-11], 0); break;
+                case 2: coefficients128_8 = _mm_set_epi32(0, 0,                coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], 0,                0); break;
+                case 1: coefficients128_8 = _mm_set_epi32(0, 0,                0,               coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], 0,                0,                0); break;
+            }
+            runningOrder = 0;
+        }
+
+        /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */
+        coefficients128_0 = _mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(0, 1, 2, 3));
+        coefficients128_4 = _mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(0, 1, 2, 3));
+        coefficients128_8 = _mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(0, 1, 2, 3));
+    }
+#else
+    switch (order)
+    {
+    case 12: ((drflac_int32*)&coefficients128_8)[0] = coefficients[11]; ((drflac_int32*)&samples128_8)[0] = pDecodedSamples[-12];
+    case 11: ((drflac_int32*)&coefficients128_8)[1] = coefficients[10]; ((drflac_int32*)&samples128_8)[1] = pDecodedSamples[-11];
+    case 10: ((drflac_int32*)&coefficients128_8)[2] = coefficients[ 9]; ((drflac_int32*)&samples128_8)[2] = pDecodedSamples[-10];
+    case 9:  ((drflac_int32*)&coefficients128_8)[3] = coefficients[ 8]; ((drflac_int32*)&samples128_8)[3] = pDecodedSamples[- 9];
+    case 8:  ((drflac_int32*)&coefficients128_4)[0] = coefficients[ 7]; ((drflac_int32*)&samples128_4)[0] = pDecodedSamples[- 8];
+    case 7:  ((drflac_int32*)&coefficients128_4)[1] = coefficients[ 6]; ((drflac_int32*)&samples128_4)[1] = pDecodedSamples[- 7];
+    case 6:  ((drflac_int32*)&coefficients128_4)[2] = coefficients[ 5]; ((drflac_int32*)&samples128_4)[2] = pDecodedSamples[- 6];
+    case 5:  ((drflac_int32*)&coefficients128_4)[3] = coefficients[ 4]; ((drflac_int32*)&samples128_4)[3] = pDecodedSamples[- 5];
+    case 4:  ((drflac_int32*)&coefficients128_0)[0] = coefficients[ 3]; ((drflac_int32*)&samples128_0)[0] = pDecodedSamples[- 4];
+    case 3:  ((drflac_int32*)&coefficients128_0)[1] = coefficients[ 2]; ((drflac_int32*)&samples128_0)[1] = pDecodedSamples[- 3];
+    case 2:  ((drflac_int32*)&coefficients128_0)[2] = coefficients[ 1]; ((drflac_int32*)&samples128_0)[2] = pDecodedSamples[- 2];
+    case 1:  ((drflac_int32*)&coefficients128_0)[3] = coefficients[ 0]; ((drflac_int32*)&samples128_0)[3] = pDecodedSamples[- 1];
+    }
+#endif
+
+    /* For this version we are doing one sample at a time. */
+    while (pDecodedSamples < pDecodedSamplesEnd) {
+        __m128i zeroCountPart128;
+        __m128i riceParamPart128;
+
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts1, &riceParamParts1) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts2, &riceParamParts2) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts3, &riceParamParts3)) {
+            return DRFLAC_FALSE;
+        }
+
+        zeroCountPart128 = _mm_set_epi32(zeroCountParts3, zeroCountParts2, zeroCountParts1, zeroCountParts0);
+        riceParamPart128 = _mm_set_epi32(riceParamParts3, riceParamParts2, riceParamParts1, riceParamParts0);
+
+        riceParamPart128 = _mm_and_si128(riceParamPart128, riceParamMask128);
+        riceParamPart128 = _mm_or_si128(riceParamPart128, _mm_slli_epi32(zeroCountPart128, riceParam));
+        riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_add_epi32(drflac__mm_not_si128(_mm_and_si128(riceParamPart128, _mm_set1_epi32(1))), _mm_set1_epi32(1)));
+
+        for (i = 0; i < 4; i += 1) {
+            prediction128 = _mm_xor_si128(prediction128, prediction128);    /* Reset to 0. */
+
+            switch (order)
+            {
+            case 12:
+            case 11: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_8, _MM_SHUFFLE(1, 1, 0, 0))));
+            case 10:
+            case  9: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_8, _MM_SHUFFLE(3, 3, 2, 2))));
+            case  8:
+            case  7: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_4, _MM_SHUFFLE(1, 1, 0, 0))));
+            case  6:
+            case  5: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_4, _MM_SHUFFLE(3, 3, 2, 2))));
+            case  4:
+            case  3: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_0, _MM_SHUFFLE(1, 1, 0, 0))));
+            case  2:
+            case  1: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_0, _MM_SHUFFLE(3, 3, 2, 2))));
+            }
+
+            /* Horizontal add and shift. */
+            prediction128 = drflac__mm_hadd_epi64(prediction128);
+            prediction128 = drflac__mm_srai_epi64(prediction128, shift);
+            prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
+
+            /* Our value should be sitting in prediction128[0]. We need to combine this with our SSE samples. */
+            samples128_8 = _mm_alignr_epi8(samples128_4,  samples128_8, 4);
+            samples128_4 = _mm_alignr_epi8(samples128_0,  samples128_4, 4);
+            samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
+
+            /* Slide our rice parameter down so that the value in position 0 contains the next one to process. */
+            riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
+        }
+
+        /* We store samples in groups of 4. */
+        _mm_storeu_si128((__m128i*)pDecodedSamples, samples128_0);
+        pDecodedSamples += 4;
+    }
+
+    /* Make sure we process the last few samples. */
+    i = (count & ~3);
+    while (i < (int)count) {
+        /* Rice extraction. */
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0)) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Rice reconstruction. */
+        riceParamParts0 &= riceParamMask;
+        riceParamParts0 |= (zeroCountParts0 << riceParam);
+        riceParamParts0  = (riceParamParts0 >> 1) ^ t[riceParamParts0 & 0x01];
+
+        /* Sample reconstruction. */
+        pDecodedSamples[0] = riceParamParts0 + drflac__calculate_prediction_64(order, shift, coefficients, pDecodedSamples);
+
+        i += 1;
+        pDecodedSamples += 1;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(count > 0);
+    DRFLAC_ASSERT(pSamplesOut != NULL);
+
+    /* In my testing the order is rarely > 12, so in this case I'm going to simplify the SSE implementation by only handling order <= 12. */
+    if (order > 0 && order <= 12) {
+        if (bitsPerSample+shift > 32) {
+            return drflac__decode_samples_with_residual__rice__sse41_64(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
+        } else {
+            return drflac__decode_samples_with_residual__rice__sse41_32(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
+        }
+    } else {
+        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac__vst2q_s32(drflac_int32* p, int32x4x2_t x)
+{
+    vst1q_s32(p+0, x.val[0]);
+    vst1q_s32(p+4, x.val[1]);
+}
+
+static DRFLAC_INLINE void drflac__vst2q_u32(drflac_uint32* p, uint32x4x2_t x)
+{
+    vst1q_u32(p+0, x.val[0]);
+    vst1q_u32(p+4, x.val[1]);
+}
+
+static DRFLAC_INLINE void drflac__vst2q_f32(float* p, float32x4x2_t x)
+{
+    vst1q_f32(p+0, x.val[0]);
+    vst1q_f32(p+4, x.val[1]);
+}
+
+static DRFLAC_INLINE void drflac__vst2q_s16(drflac_int16* p, int16x4x2_t x)
+{
+    vst1q_s16(p, vcombine_s16(x.val[0], x.val[1]));
+}
+
+static DRFLAC_INLINE void drflac__vst2q_u16(drflac_uint16* p, uint16x4x2_t x)
+{
+    vst1q_u16(p, vcombine_u16(x.val[0], x.val[1]));
+}
+
+static DRFLAC_INLINE int32x4_t drflac__vdupq_n_s32x4(drflac_int32 x3, drflac_int32 x2, drflac_int32 x1, drflac_int32 x0)
+{
+    drflac_int32 x[4];
+    x[3] = x3;
+    x[2] = x2;
+    x[1] = x1;
+    x[0] = x0;
+    return vld1q_s32(x);
+}
+
+static DRFLAC_INLINE int32x4_t drflac__valignrq_s32_1(int32x4_t a, int32x4_t b)
+{
+    /* Equivalent to SSE's _mm_alignr_epi8(a, b, 4) */
+
+    /* Reference */
+    /*return drflac__vdupq_n_s32x4(
+        vgetq_lane_s32(a, 0),
+        vgetq_lane_s32(b, 3),
+        vgetq_lane_s32(b, 2),
+        vgetq_lane_s32(b, 1)
+    );*/
+
+    return vextq_s32(b, a, 1);
+}
+
+static DRFLAC_INLINE uint32x4_t drflac__valignrq_u32_1(uint32x4_t a, uint32x4_t b)
+{
+    /* Equivalent to SSE's _mm_alignr_epi8(a, b, 4) */
+
+    /* Reference */
+    /*return drflac__vdupq_n_s32x4(
+        vgetq_lane_s32(a, 0),
+        vgetq_lane_s32(b, 3),
+        vgetq_lane_s32(b, 2),
+        vgetq_lane_s32(b, 1)
+    );*/
+
+    return vextq_u32(b, a, 1);
+}
+
+static DRFLAC_INLINE int32x2_t drflac__vhaddq_s32(int32x4_t x)
+{
+    /* The sum must end up in position 0. */
+
+    /* Reference */
+    /*return vdupq_n_s32(
+        vgetq_lane_s32(x, 3) +
+        vgetq_lane_s32(x, 2) +
+        vgetq_lane_s32(x, 1) +
+        vgetq_lane_s32(x, 0)
+    );*/
+
+    int32x2_t r = vadd_s32(vget_high_s32(x), vget_low_s32(x));
+    return vpadd_s32(r, r);
+}
+
+static DRFLAC_INLINE int64x1_t drflac__vhaddq_s64(int64x2_t x)
+{
+    return vadd_s64(vget_high_s64(x), vget_low_s64(x));
+}
+
+static DRFLAC_INLINE int32x4_t drflac__vrevq_s32(int32x4_t x)
+{
+    /* Reference */
+    /*return drflac__vdupq_n_s32x4(
+        vgetq_lane_s32(x, 0),
+        vgetq_lane_s32(x, 1),
+        vgetq_lane_s32(x, 2),
+        vgetq_lane_s32(x, 3)
+    );*/
+
+    return vrev64q_s32(vcombine_s32(vget_high_s32(x), vget_low_s32(x)));
+}
+
+static DRFLAC_INLINE int32x4_t drflac__vnotq_s32(int32x4_t x)
+{
+    return veorq_s32(x, vdupq_n_s32(0xFFFFFFFF));
+}
+
+static DRFLAC_INLINE uint32x4_t drflac__vnotq_u32(uint32x4_t x)
+{
+    return veorq_u32(x, vdupq_n_u32(0xFFFFFFFF));
+}
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_32(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    int i;
+    drflac_uint32 riceParamMask;
+    drflac_int32* pDecodedSamples    = pSamplesOut;
+    drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3);
+    drflac_uint32 zeroCountParts[4];
+    drflac_uint32 riceParamParts[4];
+    int32x4_t coefficients128_0;
+    int32x4_t coefficients128_4;
+    int32x4_t coefficients128_8;
+    int32x4_t samples128_0;
+    int32x4_t samples128_4;
+    int32x4_t samples128_8;
+    uint32x4_t riceParamMask128;
+    int32x4_t riceParam128;
+    int32x2_t shift64;
+    uint32x4_t one128;
+
+    const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
+
+    riceParamMask    = ~((~0UL) << riceParam);
+    riceParamMask128 = vdupq_n_u32(riceParamMask);
+
+    riceParam128 = vdupq_n_s32(riceParam);
+    shift64 = vdup_n_s32(-shift); /* Negate the shift because we'll be doing a variable shift using vshlq_s32(). */
+    one128 = vdupq_n_u32(1);
+
+    /*
+    Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than
+    what's available in the input buffers. It would be conenient to use a fall-through switch to do this, but this results
+    in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted
+    so I think there's opportunity for this to be simplified.
+    */
+    {
+        int runningOrder = order;
+        drflac_int32 tempC[4] = {0, 0, 0, 0};
+        drflac_int32 tempS[4] = {0, 0, 0, 0};
+
+        /* 0 - 3. */
+        if (runningOrder >= 4) {
+            coefficients128_0 = vld1q_s32(coefficients + 0);
+            samples128_0      = vld1q_s32(pSamplesOut  - 4);
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: tempC[2] = coefficients[2]; tempS[1] = pSamplesOut[-3]; /* fallthrough */
+                case 2: tempC[1] = coefficients[1]; tempS[2] = pSamplesOut[-2]; /* fallthrough */
+                case 1: tempC[0] = coefficients[0]; tempS[3] = pSamplesOut[-1]; /* fallthrough */
+            }
+
+            coefficients128_0 = vld1q_s32(tempC);
+            samples128_0      = vld1q_s32(tempS);
+            runningOrder = 0;
+        }
+
+        /* 4 - 7 */
+        if (runningOrder >= 4) {
+            coefficients128_4 = vld1q_s32(coefficients + 4);
+            samples128_4      = vld1q_s32(pSamplesOut  - 8);
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: tempC[2] = coefficients[6]; tempS[1] = pSamplesOut[-7]; /* fallthrough */
+                case 2: tempC[1] = coefficients[5]; tempS[2] = pSamplesOut[-6]; /* fallthrough */
+                case 1: tempC[0] = coefficients[4]; tempS[3] = pSamplesOut[-5]; /* fallthrough */
+            }
+
+            coefficients128_4 = vld1q_s32(tempC);
+            samples128_4      = vld1q_s32(tempS);
+            runningOrder = 0;
+        }
+
+        /* 8 - 11 */
+        if (runningOrder == 4) {
+            coefficients128_8 = vld1q_s32(coefficients + 8);
+            samples128_8      = vld1q_s32(pSamplesOut  - 12);
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: tempC[2] = coefficients[10]; tempS[1] = pSamplesOut[-11]; /* fallthrough */
+                case 2: tempC[1] = coefficients[ 9]; tempS[2] = pSamplesOut[-10]; /* fallthrough */
+                case 1: tempC[0] = coefficients[ 8]; tempS[3] = pSamplesOut[- 9]; /* fallthrough */
+            }
+
+            coefficients128_8 = vld1q_s32(tempC);
+            samples128_8      = vld1q_s32(tempS);
+            runningOrder = 0;
+        }
+
+        /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */
+        coefficients128_0 = drflac__vrevq_s32(coefficients128_0);
+        coefficients128_4 = drflac__vrevq_s32(coefficients128_4);
+        coefficients128_8 = drflac__vrevq_s32(coefficients128_8);
+    }
+
+    /* For this version we are doing one sample at a time. */
+    while (pDecodedSamples < pDecodedSamplesEnd) {
+        int32x4_t prediction128;
+        int32x2_t prediction64;
+        uint32x4_t zeroCountPart128;
+        uint32x4_t riceParamPart128;
+
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0]) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[1], &riceParamParts[1]) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[2], &riceParamParts[2]) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[3], &riceParamParts[3])) {
+            return DRFLAC_FALSE;
+        }
+
+        zeroCountPart128 = vld1q_u32(zeroCountParts);
+        riceParamPart128 = vld1q_u32(riceParamParts);
+
+        riceParamPart128 = vandq_u32(riceParamPart128, riceParamMask128);
+        riceParamPart128 = vorrq_u32(riceParamPart128, vshlq_u32(zeroCountPart128, riceParam128));
+        riceParamPart128 = veorq_u32(vshrq_n_u32(riceParamPart128, 1), vaddq_u32(drflac__vnotq_u32(vandq_u32(riceParamPart128, one128)), one128));
+
+        if (order <= 4) {
+            for (i = 0; i < 4; i += 1) {
+                prediction128 = vmulq_s32(coefficients128_0, samples128_0);
+
+                /* Horizontal add and shift. */
+                prediction64 = drflac__vhaddq_s32(prediction128);
+                prediction64 = vshl_s32(prediction64, shift64);
+                prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128)));
+
+                samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0);
+                riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128);
+            }
+        } else if (order <= 8) {
+            for (i = 0; i < 4; i += 1) {
+                prediction128 =                vmulq_s32(coefficients128_4, samples128_4);
+                prediction128 = vmlaq_s32(prediction128, coefficients128_0, samples128_0);
+
+                /* Horizontal add and shift. */
+                prediction64 = drflac__vhaddq_s32(prediction128);
+                prediction64 = vshl_s32(prediction64, shift64);
+                prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128)));
+
+                samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4);
+                samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0);
+                riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128);
+            }
+        } else {
+            for (i = 0; i < 4; i += 1) {
+                prediction128 =                vmulq_s32(coefficients128_8, samples128_8);
+                prediction128 = vmlaq_s32(prediction128, coefficients128_4, samples128_4);
+                prediction128 = vmlaq_s32(prediction128, coefficients128_0, samples128_0);
+
+                /* Horizontal add and shift. */
+                prediction64 = drflac__vhaddq_s32(prediction128);
+                prediction64 = vshl_s32(prediction64, shift64);
+                prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128)));
+
+                samples128_8 = drflac__valignrq_s32_1(samples128_4, samples128_8);
+                samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4);
+                samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0);
+                riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128);
+            }
+        }
+
+        /* We store samples in groups of 4. */
+        vst1q_s32(pDecodedSamples, samples128_0);
+        pDecodedSamples += 4;
+    }
+
+    /* Make sure we process the last few samples. */
+    i = (count & ~3);
+    while (i < (int)count) {
+        /* Rice extraction. */
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0])) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Rice reconstruction. */
+        riceParamParts[0] &= riceParamMask;
+        riceParamParts[0] |= (zeroCountParts[0] << riceParam);
+        riceParamParts[0]  = (riceParamParts[0] >> 1) ^ t[riceParamParts[0] & 0x01];
+
+        /* Sample reconstruction. */
+        pDecodedSamples[0] = riceParamParts[0] + drflac__calculate_prediction_32(order, shift, coefficients, pDecodedSamples);
+
+        i += 1;
+        pDecodedSamples += 1;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_64(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    int i;
+    drflac_uint32 riceParamMask;
+    drflac_int32* pDecodedSamples    = pSamplesOut;
+    drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3);
+    drflac_uint32 zeroCountParts[4];
+    drflac_uint32 riceParamParts[4];
+    int32x4_t coefficients128_0;
+    int32x4_t coefficients128_4;
+    int32x4_t coefficients128_8;
+    int32x4_t samples128_0;
+    int32x4_t samples128_4;
+    int32x4_t samples128_8;
+    uint32x4_t riceParamMask128;
+    int32x4_t riceParam128;
+    int64x1_t shift64;
+    uint32x4_t one128;
+
+    const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
+
+    riceParamMask    = ~((~0UL) << riceParam);
+    riceParamMask128 = vdupq_n_u32(riceParamMask);
+
+    riceParam128 = vdupq_n_s32(riceParam);
+    shift64 = vdup_n_s64(-shift); /* Negate the shift because we'll be doing a variable shift using vshlq_s32(). */
+    one128 = vdupq_n_u32(1);
+
+    /*
+    Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than
+    what's available in the input buffers. It would be conenient to use a fall-through switch to do this, but this results
+    in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted
+    so I think there's opportunity for this to be simplified.
+    */
+    {
+        int runningOrder = order;
+        drflac_int32 tempC[4] = {0, 0, 0, 0};
+        drflac_int32 tempS[4] = {0, 0, 0, 0};
+
+        /* 0 - 3. */
+        if (runningOrder >= 4) {
+            coefficients128_0 = vld1q_s32(coefficients + 0);
+            samples128_0      = vld1q_s32(pSamplesOut  - 4);
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: tempC[2] = coefficients[2]; tempS[1] = pSamplesOut[-3]; /* fallthrough */
+                case 2: tempC[1] = coefficients[1]; tempS[2] = pSamplesOut[-2]; /* fallthrough */
+                case 1: tempC[0] = coefficients[0]; tempS[3] = pSamplesOut[-1]; /* fallthrough */
+            }
+
+            coefficients128_0 = vld1q_s32(tempC);
+            samples128_0      = vld1q_s32(tempS);
+            runningOrder = 0;
+        }
+
+        /* 4 - 7 */
+        if (runningOrder >= 4) {
+            coefficients128_4 = vld1q_s32(coefficients + 4);
+            samples128_4      = vld1q_s32(pSamplesOut  - 8);
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: tempC[2] = coefficients[6]; tempS[1] = pSamplesOut[-7]; /* fallthrough */
+                case 2: tempC[1] = coefficients[5]; tempS[2] = pSamplesOut[-6]; /* fallthrough */
+                case 1: tempC[0] = coefficients[4]; tempS[3] = pSamplesOut[-5]; /* fallthrough */
+            }
+
+            coefficients128_4 = vld1q_s32(tempC);
+            samples128_4      = vld1q_s32(tempS);
+            runningOrder = 0;
+        }
+
+        /* 8 - 11 */
+        if (runningOrder == 4) {
+            coefficients128_8 = vld1q_s32(coefficients + 8);
+            samples128_8      = vld1q_s32(pSamplesOut  - 12);
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: tempC[2] = coefficients[10]; tempS[1] = pSamplesOut[-11]; /* fallthrough */
+                case 2: tempC[1] = coefficients[ 9]; tempS[2] = pSamplesOut[-10]; /* fallthrough */
+                case 1: tempC[0] = coefficients[ 8]; tempS[3] = pSamplesOut[- 9]; /* fallthrough */
+            }
+
+            coefficients128_8 = vld1q_s32(tempC);
+            samples128_8      = vld1q_s32(tempS);
+            runningOrder = 0;
+        }
+
+        /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */
+        coefficients128_0 = drflac__vrevq_s32(coefficients128_0);
+        coefficients128_4 = drflac__vrevq_s32(coefficients128_4);
+        coefficients128_8 = drflac__vrevq_s32(coefficients128_8);
+    }
+
+    /* For this version we are doing one sample at a time. */
+    while (pDecodedSamples < pDecodedSamplesEnd) {
+        int64x2_t prediction128;
+        uint32x4_t zeroCountPart128;
+        uint32x4_t riceParamPart128;
+
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0]) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[1], &riceParamParts[1]) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[2], &riceParamParts[2]) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[3], &riceParamParts[3])) {
+            return DRFLAC_FALSE;
+        }
+
+        zeroCountPart128 = vld1q_u32(zeroCountParts);
+        riceParamPart128 = vld1q_u32(riceParamParts);
+
+        riceParamPart128 = vandq_u32(riceParamPart128, riceParamMask128);
+        riceParamPart128 = vorrq_u32(riceParamPart128, vshlq_u32(zeroCountPart128, riceParam128));
+        riceParamPart128 = veorq_u32(vshrq_n_u32(riceParamPart128, 1), vaddq_u32(drflac__vnotq_u32(vandq_u32(riceParamPart128, one128)), one128));
+
+        for (i = 0; i < 4; i += 1) {
+            int64x1_t prediction64;
+
+            prediction128 = veorq_s64(prediction128, prediction128);    /* Reset to 0. */
+            switch (order)
+            {
+            case 12:
+            case 11: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_8), vget_low_s32(samples128_8)));
+            case 10:
+            case  9: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_8), vget_high_s32(samples128_8)));
+            case  8:
+            case  7: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_4), vget_low_s32(samples128_4)));
+            case  6:
+            case  5: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_4), vget_high_s32(samples128_4)));
+            case  4:
+            case  3: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_0), vget_low_s32(samples128_0)));
+            case  2:
+            case  1: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_0), vget_high_s32(samples128_0)));
+            }
+
+            /* Horizontal add and shift. */
+            prediction64 = drflac__vhaddq_s64(prediction128);
+            prediction64 = vshl_s64(prediction64, shift64);
+            prediction64 = vadd_s64(prediction64, vdup_n_s64(vgetq_lane_u32(riceParamPart128, 0)));
+
+            /* Our value should be sitting in prediction64[0]. We need to combine this with our SSE samples. */
+            samples128_8 = drflac__valignrq_s32_1(samples128_4, samples128_8);
+            samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4);
+            samples128_0 = drflac__valignrq_s32_1(vcombine_s32(vreinterpret_s32_s64(prediction64), vdup_n_s32(0)), samples128_0);
+
+            /* Slide our rice parameter down so that the value in position 0 contains the next one to process. */
+            riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128);
+        }
+
+        /* We store samples in groups of 4. */
+        vst1q_s32(pDecodedSamples, samples128_0);
+        pDecodedSamples += 4;
+    }
+
+    /* Make sure we process the last few samples. */
+    i = (count & ~3);
+    while (i < (int)count) {
+        /* Rice extraction. */
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0])) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Rice reconstruction. */
+        riceParamParts[0] &= riceParamMask;
+        riceParamParts[0] |= (zeroCountParts[0] << riceParam);
+        riceParamParts[0]  = (riceParamParts[0] >> 1) ^ t[riceParamParts[0] & 0x01];
+
+        /* Sample reconstruction. */
+        pDecodedSamples[0] = riceParamParts[0] + drflac__calculate_prediction_64(order, shift, coefficients, pDecodedSamples);
+
+        i += 1;
+        pDecodedSamples += 1;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice__neon(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(count > 0);
+    DRFLAC_ASSERT(pSamplesOut != NULL);
+
+    /* In my testing the order is rarely > 12, so in this case I'm going to simplify the NEON implementation by only handling order <= 12. */
+    if (order > 0 && order <= 12) {
+        if (bitsPerSample+shift > 32) {
+            return drflac__decode_samples_with_residual__rice__neon_64(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
+        } else {
+            return drflac__decode_samples_with_residual__rice__neon_32(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
+        }
+    } else {
+        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+    }
+}
+#endif
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+#if defined(DRFLAC_SUPPORT_SSE41)
+    if (drflac__gIsSSE41Supported) {
+        return drflac__decode_samples_with_residual__rice__sse41(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported) {
+        return drflac__decode_samples_with_residual__rice__neon(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+    #if 0
+        return drflac__decode_samples_with_residual__rice__reference(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+    #else
+        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+    #endif
+    }
+}
+
+/* Reads and seeks past a string of residual values as Rice codes. The decoder should be sitting on the first bit of the Rice codes. */
+static drflac_bool32 drflac__read_and_seek_residual__rice(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam)
+{
+    drflac_uint32 i;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(count > 0);
+
+    for (i = 0; i < count; ++i) {
+        if (!drflac__seek_rice_parts(bs, riceParam)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 unencodedBitsPerSample, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    drflac_uint32 i;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(count > 0);
+    DRFLAC_ASSERT(unencodedBitsPerSample <= 31);    /* <-- unencodedBitsPerSample is a 5 bit number, so cannot exceed 31. */
+    DRFLAC_ASSERT(pSamplesOut != NULL);
+
+    for (i = 0; i < count; ++i) {
+        if (unencodedBitsPerSample > 0) {
+            if (!drflac__read_int32(bs, unencodedBitsPerSample, pSamplesOut + i)) {
+                return DRFLAC_FALSE;
+            }
+        } else {
+            pSamplesOut[i] = 0;
+        }
+
+        if (bitsPerSample >= 24) {
+            pSamplesOut[i] += drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + i);
+        } else {
+            pSamplesOut[i] += drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + i);
+        }
+    }
+
+    return DRFLAC_TRUE;
+}
+
+
+/*
+Reads and decodes the residual for the sub-frame the decoder is currently sitting on. This function should be called
+when the decoder is sitting at the very start of the RESIDUAL block. The first <order> residuals will be ignored. The
+<blockSize> and <order> parameters are used to determine how many residual values need to be decoded.
+*/
+static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 blockSize, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
+{
+    drflac_uint8 residualMethod;
+    drflac_uint8 partitionOrder;
+    drflac_uint32 samplesInPartition;
+    drflac_uint32 partitionsRemaining;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(blockSize != 0);
+    DRFLAC_ASSERT(pDecodedSamples != NULL);       /* <-- Should we allow NULL, in which case we just seek past the residual rather than do a full decode? */
+
+    if (!drflac__read_uint8(bs, 2, &residualMethod)) {
+        return DRFLAC_FALSE;
+    }
+
+    if (residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE && residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) {
+        return DRFLAC_FALSE;    /* Unknown or unsupported residual coding method. */
+    }
+
+    /* Ignore the first <order> values. */
+    pDecodedSamples += order;
+
+    if (!drflac__read_uint8(bs, 4, &partitionOrder)) {
+        return DRFLAC_FALSE;
+    }
+
+    /*
+    From the FLAC spec:
+      The Rice partition order in a Rice-coded residual section must be less than or equal to 8.
+    */
+    if (partitionOrder > 8) {
+        return DRFLAC_FALSE;
+    }
+
+    /* Validation check. */
+    if ((blockSize / (1 << partitionOrder)) <= order) {
+        return DRFLAC_FALSE;
+    }
+
+    samplesInPartition = (blockSize / (1 << partitionOrder)) - order;
+    partitionsRemaining = (1 << partitionOrder);
+    for (;;) {
+        drflac_uint8 riceParam = 0;
+        if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE) {
+            if (!drflac__read_uint8(bs, 4, &riceParam)) {
+                return DRFLAC_FALSE;
+            }
+            if (riceParam == 15) {
+                riceParam = 0xFF;
+            }
+        } else if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) {
+            if (!drflac__read_uint8(bs, 5, &riceParam)) {
+                return DRFLAC_FALSE;
+            }
+            if (riceParam == 31) {
+                riceParam = 0xFF;
+            }
+        }
+
+        if (riceParam != 0xFF) {
+            if (!drflac__decode_samples_with_residual__rice(bs, bitsPerSample, samplesInPartition, riceParam, order, shift, coefficients, pDecodedSamples)) {
+                return DRFLAC_FALSE;
+            }
+        } else {
+            drflac_uint8 unencodedBitsPerSample = 0;
+            if (!drflac__read_uint8(bs, 5, &unencodedBitsPerSample)) {
+                return DRFLAC_FALSE;
+            }
+
+            if (!drflac__decode_samples_with_residual__unencoded(bs, bitsPerSample, samplesInPartition, unencodedBitsPerSample, order, shift, coefficients, pDecodedSamples)) {
+                return DRFLAC_FALSE;
+            }
+        }
+
+        pDecodedSamples += samplesInPartition;
+
+        if (partitionsRemaining == 1) {
+            break;
+        }
+
+        partitionsRemaining -= 1;
+
+        if (partitionOrder != 0) {
+            samplesInPartition = blockSize / (1 << partitionOrder);
+        }
+    }
+
+    return DRFLAC_TRUE;
+}
+
+/*
+Reads and seeks past the residual for the sub-frame the decoder is currently sitting on. This function should be called
+when the decoder is sitting at the very start of the RESIDUAL block. The first <order> residuals will be set to 0. The
+<blockSize> and <order> parameters are used to determine how many residual values need to be decoded.
+*/
+static drflac_bool32 drflac__read_and_seek_residual(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 order)
+{
+    drflac_uint8 residualMethod;
+    drflac_uint8 partitionOrder;
+    drflac_uint32 samplesInPartition;
+    drflac_uint32 partitionsRemaining;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(blockSize != 0);
+
+    if (!drflac__read_uint8(bs, 2, &residualMethod)) {
+        return DRFLAC_FALSE;
+    }
+
+    if (residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE && residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) {
+        return DRFLAC_FALSE;    /* Unknown or unsupported residual coding method. */
+    }
+
+    if (!drflac__read_uint8(bs, 4, &partitionOrder)) {
+        return DRFLAC_FALSE;
+    }
+
+    /*
+    From the FLAC spec:
+      The Rice partition order in a Rice-coded residual section must be less than or equal to 8.
+    */
+    if (partitionOrder > 8) {
+        return DRFLAC_FALSE;
+    }
+
+    /* Validation check. */
+    if ((blockSize / (1 << partitionOrder)) <= order) {
+        return DRFLAC_FALSE;
+    }
+
+    samplesInPartition = (blockSize / (1 << partitionOrder)) - order;
+    partitionsRemaining = (1 << partitionOrder);
+    for (;;)
+    {
+        drflac_uint8 riceParam = 0;
+        if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE) {
+            if (!drflac__read_uint8(bs, 4, &riceParam)) {
+                return DRFLAC_FALSE;
+            }
+            if (riceParam == 15) {
+                riceParam = 0xFF;
+            }
+        } else if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) {
+            if (!drflac__read_uint8(bs, 5, &riceParam)) {
+                return DRFLAC_FALSE;
+            }
+            if (riceParam == 31) {
+                riceParam = 0xFF;
+            }
+        }
+
+        if (riceParam != 0xFF) {
+            if (!drflac__read_and_seek_residual__rice(bs, samplesInPartition, riceParam)) {
+                return DRFLAC_FALSE;
+            }
+        } else {
+            drflac_uint8 unencodedBitsPerSample = 0;
+            if (!drflac__read_uint8(bs, 5, &unencodedBitsPerSample)) {
+                return DRFLAC_FALSE;
+            }
+
+            if (!drflac__seek_bits(bs, unencodedBitsPerSample * samplesInPartition)) {
+                return DRFLAC_FALSE;
+            }
+        }
+
+
+        if (partitionsRemaining == 1) {
+            break;
+        }
+
+        partitionsRemaining -= 1;
+        samplesInPartition = blockSize / (1 << partitionOrder);
+    }
+
+    return DRFLAC_TRUE;
+}
+
+
+static drflac_bool32 drflac__decode_samples__constant(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 subframeBitsPerSample, drflac_int32* pDecodedSamples)
+{
+    drflac_uint32 i;
+
+    /* Only a single sample needs to be decoded here. */
+    drflac_int32 sample;
+    if (!drflac__read_int32(bs, subframeBitsPerSample, &sample)) {
+        return DRFLAC_FALSE;
+    }
+
+    /*
+    We don't really need to expand this, but it does simplify the process of reading samples. If this becomes a performance issue (unlikely)
+    we'll want to look at a more efficient way.
+    */
+    for (i = 0; i < blockSize; ++i) {
+        pDecodedSamples[i] = sample;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples__verbatim(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 subframeBitsPerSample, drflac_int32* pDecodedSamples)
+{
+    drflac_uint32 i;
+
+    for (i = 0; i < blockSize; ++i) {
+        drflac_int32 sample;
+        if (!drflac__read_int32(bs, subframeBitsPerSample, &sample)) {
+            return DRFLAC_FALSE;
+        }
+
+        pDecodedSamples[i] = sample;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples__fixed(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 subframeBitsPerSample, drflac_uint8 lpcOrder, drflac_int32* pDecodedSamples)
+{
+    drflac_uint32 i;
+
+    static drflac_int32 lpcCoefficientsTable[5][4] = {
+        {0,  0, 0,  0},
+        {1,  0, 0,  0},
+        {2, -1, 0,  0},
+        {3, -3, 1,  0},
+        {4, -6, 4, -1}
+    };
+
+    /* Warm up samples and coefficients. */
+    for (i = 0; i < lpcOrder; ++i) {
+        drflac_int32 sample;
+        if (!drflac__read_int32(bs, subframeBitsPerSample, &sample)) {
+            return DRFLAC_FALSE;
+        }
+
+        pDecodedSamples[i] = sample;
+    }
+
+    if (!drflac__decode_samples_with_residual(bs, subframeBitsPerSample, blockSize, lpcOrder, 0, lpcCoefficientsTable[lpcOrder], pDecodedSamples)) {
+        return DRFLAC_FALSE;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples__lpc(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 bitsPerSample, drflac_uint8 lpcOrder, drflac_int32* pDecodedSamples)
+{
+    drflac_uint8 i;
+    drflac_uint8 lpcPrecision;
+    drflac_int8 lpcShift;
+    drflac_int32 coefficients[32];
+
+    /* Warm up samples. */
+    for (i = 0; i < lpcOrder; ++i) {
+        drflac_int32 sample;
+        if (!drflac__read_int32(bs, bitsPerSample, &sample)) {
+            return DRFLAC_FALSE;
+        }
+
+        pDecodedSamples[i] = sample;
+    }
+
+    if (!drflac__read_uint8(bs, 4, &lpcPrecision)) {
+        return DRFLAC_FALSE;
+    }
+    if (lpcPrecision == 15) {
+        return DRFLAC_FALSE;    /* Invalid. */
+    }
+    lpcPrecision += 1;
+
+    if (!drflac__read_int8(bs, 5, &lpcShift)) {
+        return DRFLAC_FALSE;
+    }
+
+    /*
+    From the FLAC specification:
+
+        Quantized linear predictor coefficient shift needed in bits (NOTE: this number is signed two's-complement)
+
+    Emphasis on the "signed two's-complement". In practice there does not seem to be any encoders nor decoders supporting negative shifts. For now dr_flac is
+    not going to support negative shifts as I don't have any reference files. However, when a reference file comes through I will consider adding support.
+    */
+    if (lpcShift < 0) {
+        return DRFLAC_FALSE;
+    }
+
+    DRFLAC_ZERO_MEMORY(coefficients, sizeof(coefficients));
+    for (i = 0; i < lpcOrder; ++i) {
+        if (!drflac__read_int32(bs, lpcPrecision, coefficients + i)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    if (!drflac__decode_samples_with_residual(bs, bitsPerSample, blockSize, lpcOrder, lpcShift, coefficients, pDecodedSamples)) {
+        return DRFLAC_FALSE;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+
+static drflac_bool32 drflac__read_next_flac_frame_header(drflac_bs* bs, drflac_uint8 streaminfoBitsPerSample, drflac_frame_header* header)
+{
+    const drflac_uint32 sampleRateTable[12]  = {0, 88200, 176400, 192000, 8000, 16000, 22050, 24000, 32000, 44100, 48000, 96000};
+    const drflac_uint8 bitsPerSampleTable[8] = {0, 8, 12, (drflac_uint8)-1, 16, 20, 24, (drflac_uint8)-1};   /* -1 = reserved. */
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(header != NULL);
+
+    /* Keep looping until we find a valid sync code. */
+    for (;;) {
+        drflac_uint8 crc8 = 0xCE; /* 0xCE = drflac_crc8(0, 0x3FFE, 14); */
+        drflac_uint8 reserved = 0;
+        drflac_uint8 blockingStrategy = 0;
+        drflac_uint8 blockSize = 0;
+        drflac_uint8 sampleRate = 0;
+        drflac_uint8 channelAssignment = 0;
+        drflac_uint8 bitsPerSample = 0;
+        drflac_bool32 isVariableBlockSize;
+
+        if (!drflac__find_and_seek_to_next_sync_code(bs)) {
+            return DRFLAC_FALSE;
+        }
+
+        if (!drflac__read_uint8(bs, 1, &reserved)) {
+            return DRFLAC_FALSE;
+        }
+        if (reserved == 1) {
+            continue;
+        }
+        crc8 = drflac_crc8(crc8, reserved, 1);
+
+        if (!drflac__read_uint8(bs, 1, &blockingStrategy)) {
+            return DRFLAC_FALSE;
+        }
+        crc8 = drflac_crc8(crc8, blockingStrategy, 1);
+
+        if (!drflac__read_uint8(bs, 4, &blockSize)) {
+            return DRFLAC_FALSE;
+        }
+        if (blockSize == 0) {
+            continue;
+        }
+        crc8 = drflac_crc8(crc8, blockSize, 4);
+
+        if (!drflac__read_uint8(bs, 4, &sampleRate)) {
+            return DRFLAC_FALSE;
+        }
+        crc8 = drflac_crc8(crc8, sampleRate, 4);
+
+        if (!drflac__read_uint8(bs, 4, &channelAssignment)) {
+            return DRFLAC_FALSE;
+        }
+        if (channelAssignment > 10) {
+            continue;
+        }
+        crc8 = drflac_crc8(crc8, channelAssignment, 4);
+
+        if (!drflac__read_uint8(bs, 3, &bitsPerSample)) {
+            return DRFLAC_FALSE;
+        }
+        if (bitsPerSample == 3 || bitsPerSample == 7) {
+            continue;
+        }
+        crc8 = drflac_crc8(crc8, bitsPerSample, 3);
+
+
+        if (!drflac__read_uint8(bs, 1, &reserved)) {
+            return DRFLAC_FALSE;
+        }
+        if (reserved == 1) {
+            continue;
+        }
+        crc8 = drflac_crc8(crc8, reserved, 1);
+
+
+        isVariableBlockSize = blockingStrategy == 1;
+        if (isVariableBlockSize) {
+            drflac_uint64 pcmFrameNumber;
+            drflac_result result = drflac__read_utf8_coded_number(bs, &pcmFrameNumber, &crc8);
+            if (result != DRFLAC_SUCCESS) {
+                if (result == DRFLAC_AT_END) {
+                    return DRFLAC_FALSE;
+                } else {
+                    continue;
+                }
+            }
+            header->flacFrameNumber  = 0;
+            header->pcmFrameNumber = pcmFrameNumber;
+        } else {
+            drflac_uint64 flacFrameNumber = 0;
+            drflac_result result = drflac__read_utf8_coded_number(bs, &flacFrameNumber, &crc8);
+            if (result != DRFLAC_SUCCESS) {
+                if (result == DRFLAC_AT_END) {
+                    return DRFLAC_FALSE;
+                } else {
+                    continue;
+                }
+            }
+            header->flacFrameNumber  = (drflac_uint32)flacFrameNumber;   /* <-- Safe cast. */
+            header->pcmFrameNumber = 0;
+        }
+
+
+        DRFLAC_ASSERT(blockSize > 0);
+        if (blockSize == 1) {
+            header->blockSizeInPCMFrames = 192;
+        } else if (blockSize <= 5) {
+            DRFLAC_ASSERT(blockSize >= 2);
+            header->blockSizeInPCMFrames = 576 * (1 << (blockSize - 2));
+        } else if (blockSize == 6) {
+            if (!drflac__read_uint16(bs, 8, &header->blockSizeInPCMFrames)) {
+                return DRFLAC_FALSE;
+            }
+            crc8 = drflac_crc8(crc8, header->blockSizeInPCMFrames, 8);
+            header->blockSizeInPCMFrames += 1;
+        } else if (blockSize == 7) {
+            if (!drflac__read_uint16(bs, 16, &header->blockSizeInPCMFrames)) {
+                return DRFLAC_FALSE;
+            }
+            crc8 = drflac_crc8(crc8, header->blockSizeInPCMFrames, 16);
+            header->blockSizeInPCMFrames += 1;
+        } else {
+            DRFLAC_ASSERT(blockSize >= 8);
+            header->blockSizeInPCMFrames = 256 * (1 << (blockSize - 8));
+        }
+
+
+        if (sampleRate <= 11) {
+            header->sampleRate = sampleRateTable[sampleRate];
+        } else if (sampleRate == 12) {
+            if (!drflac__read_uint32(bs, 8, &header->sampleRate)) {
+                return DRFLAC_FALSE;
+            }
+            crc8 = drflac_crc8(crc8, header->sampleRate, 8);
+            header->sampleRate *= 1000;
+        } else if (sampleRate == 13) {
+            if (!drflac__read_uint32(bs, 16, &header->sampleRate)) {
+                return DRFLAC_FALSE;
+            }
+            crc8 = drflac_crc8(crc8, header->sampleRate, 16);
+        } else if (sampleRate == 14) {
+            if (!drflac__read_uint32(bs, 16, &header->sampleRate)) {
+                return DRFLAC_FALSE;
+            }
+            crc8 = drflac_crc8(crc8, header->sampleRate, 16);
+            header->sampleRate *= 10;
+        } else {
+            continue;  /* Invalid. Assume an invalid block. */
+        }
+
+
+        header->channelAssignment = channelAssignment;
+
+        header->bitsPerSample = bitsPerSampleTable[bitsPerSample];
+        if (header->bitsPerSample == 0) {
+            header->bitsPerSample = streaminfoBitsPerSample;
+        }
+
+        if (!drflac__read_uint8(bs, 8, &header->crc8)) {
+            return DRFLAC_FALSE;
+        }
+
+#ifndef DR_FLAC_NO_CRC
+        if (header->crc8 != crc8) {
+            continue;    /* CRC mismatch. Loop back to the top and find the next sync code. */
+        }
+#endif
+        return DRFLAC_TRUE;
+    }
+}
+
+static drflac_bool32 drflac__read_subframe_header(drflac_bs* bs, drflac_subframe* pSubframe)
+{
+    drflac_uint8 header;
+    int type;
+
+    if (!drflac__read_uint8(bs, 8, &header)) {
+        return DRFLAC_FALSE;
+    }
+
+    /* First bit should always be 0. */
+    if ((header & 0x80) != 0) {
+        return DRFLAC_FALSE;
+    }
+
+    type = (header & 0x7E) >> 1;
+    if (type == 0) {
+        pSubframe->subframeType = DRFLAC_SUBFRAME_CONSTANT;
+    } else if (type == 1) {
+        pSubframe->subframeType = DRFLAC_SUBFRAME_VERBATIM;
+    } else {
+        if ((type & 0x20) != 0) {
+            pSubframe->subframeType = DRFLAC_SUBFRAME_LPC;
+            pSubframe->lpcOrder = (drflac_uint8)(type & 0x1F) + 1;
+        } else if ((type & 0x08) != 0) {
+            pSubframe->subframeType = DRFLAC_SUBFRAME_FIXED;
+            pSubframe->lpcOrder = (drflac_uint8)(type & 0x07);
+            if (pSubframe->lpcOrder > 4) {
+                pSubframe->subframeType = DRFLAC_SUBFRAME_RESERVED;
+                pSubframe->lpcOrder = 0;
+            }
+        } else {
+            pSubframe->subframeType = DRFLAC_SUBFRAME_RESERVED;
+        }
+    }
+
+    if (pSubframe->subframeType == DRFLAC_SUBFRAME_RESERVED) {
+        return DRFLAC_FALSE;
+    }
+
+    /* Wasted bits per sample. */
+    pSubframe->wastedBitsPerSample = 0;
+    if ((header & 0x01) == 1) {
+        unsigned int wastedBitsPerSample;
+        if (!drflac__seek_past_next_set_bit(bs, &wastedBitsPerSample)) {
+            return DRFLAC_FALSE;
+        }
+        pSubframe->wastedBitsPerSample = (drflac_uint8)wastedBitsPerSample + 1;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_subframe(drflac_bs* bs, drflac_frame* frame, int subframeIndex, drflac_int32* pDecodedSamplesOut)
+{
+    drflac_subframe* pSubframe;
+    drflac_uint32 subframeBitsPerSample;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(frame != NULL);
+
+    pSubframe = frame->subframes + subframeIndex;
+    if (!drflac__read_subframe_header(bs, pSubframe)) {
+        return DRFLAC_FALSE;
+    }
+
+    /* Side channels require an extra bit per sample. Took a while to figure that one out... */
+    subframeBitsPerSample = frame->header.bitsPerSample;
+    if ((frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE || frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE) && subframeIndex == 1) {
+        subframeBitsPerSample += 1;
+    } else if (frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE && subframeIndex == 0) {
+        subframeBitsPerSample += 1;
+    }
+
+    /* Need to handle wasted bits per sample. */
+    if (pSubframe->wastedBitsPerSample >= subframeBitsPerSample) {
+        return DRFLAC_FALSE;
+    }
+    subframeBitsPerSample -= pSubframe->wastedBitsPerSample;
+
+    pSubframe->pSamplesS32 = pDecodedSamplesOut;
+
+    switch (pSubframe->subframeType)
+    {
+        case DRFLAC_SUBFRAME_CONSTANT:
+        {
+            drflac__decode_samples__constant(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->pSamplesS32);
+        } break;
+
+        case DRFLAC_SUBFRAME_VERBATIM:
+        {
+            drflac__decode_samples__verbatim(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->pSamplesS32);
+        } break;
+
+        case DRFLAC_SUBFRAME_FIXED:
+        {
+            drflac__decode_samples__fixed(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->lpcOrder, pSubframe->pSamplesS32);
+        } break;
+
+        case DRFLAC_SUBFRAME_LPC:
+        {
+            drflac__decode_samples__lpc(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->lpcOrder, pSubframe->pSamplesS32);
+        } break;
+
+        default: return DRFLAC_FALSE;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__seek_subframe(drflac_bs* bs, drflac_frame* frame, int subframeIndex)
+{
+    drflac_subframe* pSubframe;
+    drflac_uint32 subframeBitsPerSample;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(frame != NULL);
+
+    pSubframe = frame->subframes + subframeIndex;
+    if (!drflac__read_subframe_header(bs, pSubframe)) {
+        return DRFLAC_FALSE;
+    }
+
+    /* Side channels require an extra bit per sample. Took a while to figure that one out... */
+    subframeBitsPerSample = frame->header.bitsPerSample;
+    if ((frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE || frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE) && subframeIndex == 1) {
+        subframeBitsPerSample += 1;
+    } else if (frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE && subframeIndex == 0) {
+        subframeBitsPerSample += 1;
+    }
+
+    /* Need to handle wasted bits per sample. */
+    if (pSubframe->wastedBitsPerSample >= subframeBitsPerSample) {
+        return DRFLAC_FALSE;
+    }
+    subframeBitsPerSample -= pSubframe->wastedBitsPerSample;
+
+    pSubframe->pSamplesS32 = NULL;
+
+    switch (pSubframe->subframeType)
+    {
+        case DRFLAC_SUBFRAME_CONSTANT:
+        {
+            if (!drflac__seek_bits(bs, subframeBitsPerSample)) {
+                return DRFLAC_FALSE;
+            }
+        } break;
+
+        case DRFLAC_SUBFRAME_VERBATIM:
+        {
+            unsigned int bitsToSeek = frame->header.blockSizeInPCMFrames * subframeBitsPerSample;
+            if (!drflac__seek_bits(bs, bitsToSeek)) {
+                return DRFLAC_FALSE;
+            }
+        } break;
+
+        case DRFLAC_SUBFRAME_FIXED:
+        {
+            unsigned int bitsToSeek = pSubframe->lpcOrder * subframeBitsPerSample;
+            if (!drflac__seek_bits(bs, bitsToSeek)) {
+                return DRFLAC_FALSE;
+            }
+
+            if (!drflac__read_and_seek_residual(bs, frame->header.blockSizeInPCMFrames, pSubframe->lpcOrder)) {
+                return DRFLAC_FALSE;
+            }
+        } break;
+
+        case DRFLAC_SUBFRAME_LPC:
+        {
+            drflac_uint8 lpcPrecision;
+
+            unsigned int bitsToSeek = pSubframe->lpcOrder * subframeBitsPerSample;
+            if (!drflac__seek_bits(bs, bitsToSeek)) {
+                return DRFLAC_FALSE;
+            }
+
+            if (!drflac__read_uint8(bs, 4, &lpcPrecision)) {
+                return DRFLAC_FALSE;
+            }
+            if (lpcPrecision == 15) {
+                return DRFLAC_FALSE;    /* Invalid. */
+            }
+            lpcPrecision += 1;
+
+
+            bitsToSeek = (pSubframe->lpcOrder * lpcPrecision) + 5;    /* +5 for shift. */
+            if (!drflac__seek_bits(bs, bitsToSeek)) {
+                return DRFLAC_FALSE;
+            }
+
+            if (!drflac__read_and_seek_residual(bs, frame->header.blockSizeInPCMFrames, pSubframe->lpcOrder)) {
+                return DRFLAC_FALSE;
+            }
+        } break;
+
+        default: return DRFLAC_FALSE;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+
+static DRFLAC_INLINE drflac_uint8 drflac__get_channel_count_from_channel_assignment(drflac_int8 channelAssignment)
+{
+    drflac_uint8 lookup[] = {1, 2, 3, 4, 5, 6, 7, 8, 2, 2, 2};
+
+    DRFLAC_ASSERT(channelAssignment <= 10);
+    return lookup[channelAssignment];
+}
+
+static drflac_result drflac__decode_flac_frame(drflac* pFlac)
+{
+    int channelCount;
+    int i;
+    drflac_uint8 paddingSizeInBits;
+    drflac_uint16 desiredCRC16;
+#ifndef DR_FLAC_NO_CRC
+    drflac_uint16 actualCRC16;
+#endif
+
+    /* This function should be called while the stream is sitting on the first byte after the frame header. */
+    DRFLAC_ZERO_MEMORY(pFlac->currentFLACFrame.subframes, sizeof(pFlac->currentFLACFrame.subframes));
+
+    /* The frame block size must never be larger than the maximum block size defined by the FLAC stream. */
+    if (pFlac->currentFLACFrame.header.blockSizeInPCMFrames > pFlac->maxBlockSizeInPCMFrames) {
+        return DRFLAC_ERROR;
+    }
+
+    /* The number of channels in the frame must match the channel count from the STREAMINFO block. */
+    channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment);
+    if (channelCount != (int)pFlac->channels) {
+        return DRFLAC_ERROR;
+    }
+
+    for (i = 0; i < channelCount; ++i) {
+        if (!drflac__decode_subframe(&pFlac->bs, &pFlac->currentFLACFrame, i, pFlac->pDecodedSamples + (pFlac->currentFLACFrame.header.blockSizeInPCMFrames * i))) {
+            return DRFLAC_ERROR;
+        }
+    }
+
+    paddingSizeInBits = (drflac_uint8)(DRFLAC_CACHE_L1_BITS_REMAINING(&pFlac->bs) & 7);
+    if (paddingSizeInBits > 0) {
+        drflac_uint8 padding = 0;
+        if (!drflac__read_uint8(&pFlac->bs, paddingSizeInBits, &padding)) {
+            return DRFLAC_AT_END;
+        }
+    }
+
+#ifndef DR_FLAC_NO_CRC
+    actualCRC16 = drflac__flush_crc16(&pFlac->bs);
+#endif
+    if (!drflac__read_uint16(&pFlac->bs, 16, &desiredCRC16)) {
+        return DRFLAC_AT_END;
+    }
+
+#ifndef DR_FLAC_NO_CRC
+    if (actualCRC16 != desiredCRC16) {
+        return DRFLAC_CRC_MISMATCH;    /* CRC mismatch. */
+    }
+#endif
+
+    pFlac->currentFLACFrame.pcmFramesRemaining = pFlac->currentFLACFrame.header.blockSizeInPCMFrames;
+
+    return DRFLAC_SUCCESS;
+}
+
+static drflac_result drflac__seek_flac_frame(drflac* pFlac)
+{
+    int channelCount;
+    int i;
+    drflac_uint16 desiredCRC16;
+#ifndef DR_FLAC_NO_CRC
+    drflac_uint16 actualCRC16;
+#endif
+
+    channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment);
+    for (i = 0; i < channelCount; ++i) {
+        if (!drflac__seek_subframe(&pFlac->bs, &pFlac->currentFLACFrame, i)) {
+            return DRFLAC_ERROR;
+        }
+    }
+
+    /* Padding. */
+    if (!drflac__seek_bits(&pFlac->bs, DRFLAC_CACHE_L1_BITS_REMAINING(&pFlac->bs) & 7)) {
+        return DRFLAC_ERROR;
+    }
+
+    /* CRC. */
+#ifndef DR_FLAC_NO_CRC
+    actualCRC16 = drflac__flush_crc16(&pFlac->bs);
+#endif
+    if (!drflac__read_uint16(&pFlac->bs, 16, &desiredCRC16)) {
+        return DRFLAC_AT_END;
+    }
+
+#ifndef DR_FLAC_NO_CRC
+    if (actualCRC16 != desiredCRC16) {
+        return DRFLAC_CRC_MISMATCH;    /* CRC mismatch. */
+    }
+#endif
+
+    return DRFLAC_SUCCESS;
+}
+
+static drflac_bool32 drflac__read_and_decode_next_flac_frame(drflac* pFlac)
+{
+    DRFLAC_ASSERT(pFlac != NULL);
+
+    for (;;) {
+        drflac_result result;
+
+        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+            return DRFLAC_FALSE;
+        }
+
+        result = drflac__decode_flac_frame(pFlac);
+        if (result != DRFLAC_SUCCESS) {
+            if (result == DRFLAC_CRC_MISMATCH) {
+                continue;   /* CRC mismatch. Skip to the next frame. */
+            } else {
+                return DRFLAC_FALSE;
+            }
+        }
+
+        return DRFLAC_TRUE;
+    }
+}
+
+static void drflac__get_pcm_frame_range_of_current_flac_frame(drflac* pFlac, drflac_uint64* pFirstPCMFrame, drflac_uint64* pLastPCMFrame)
+{
+    drflac_uint64 firstPCMFrame;
+    drflac_uint64 lastPCMFrame;
+
+    DRFLAC_ASSERT(pFlac != NULL);
+
+    firstPCMFrame = pFlac->currentFLACFrame.header.pcmFrameNumber;
+    if (firstPCMFrame == 0) {
+        firstPCMFrame = ((drflac_uint64)pFlac->currentFLACFrame.header.flacFrameNumber) * pFlac->maxBlockSizeInPCMFrames;
+    }
+
+    lastPCMFrame = firstPCMFrame + pFlac->currentFLACFrame.header.blockSizeInPCMFrames;
+    if (lastPCMFrame > 0) {
+        lastPCMFrame -= 1; /* Needs to be zero based. */
+    }
+
+    if (pFirstPCMFrame) {
+        *pFirstPCMFrame = firstPCMFrame;
+    }
+    if (pLastPCMFrame) {
+        *pLastPCMFrame = lastPCMFrame;
+    }
+}
+
+static drflac_bool32 drflac__seek_to_first_frame(drflac* pFlac)
+{
+    drflac_bool32 result;
+
+    DRFLAC_ASSERT(pFlac != NULL);
+
+    result = drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes);
+
+    DRFLAC_ZERO_MEMORY(&pFlac->currentFLACFrame, sizeof(pFlac->currentFLACFrame));
+    pFlac->currentPCMFrame = 0;
+
+    return result;
+}
+
+static DRFLAC_INLINE drflac_result drflac__seek_to_next_flac_frame(drflac* pFlac)
+{
+    /* This function should only ever be called while the decoder is sitting on the first byte past the FRAME_HEADER section. */
+    DRFLAC_ASSERT(pFlac != NULL);
+    return drflac__seek_flac_frame(pFlac);
+}
+
+
+static drflac_uint64 drflac__seek_forward_by_pcm_frames(drflac* pFlac, drflac_uint64 pcmFramesToSeek)
+{
+    drflac_uint64 pcmFramesRead = 0;
+    while (pcmFramesToSeek > 0) {
+        if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
+            if (!drflac__read_and_decode_next_flac_frame(pFlac)) {
+                break;  /* Couldn't read the next frame, so just break from the loop and return. */
+            }
+        } else {
+            if (pFlac->currentFLACFrame.pcmFramesRemaining > pcmFramesToSeek) {
+                pcmFramesRead   += pcmFramesToSeek;
+                pFlac->currentFLACFrame.pcmFramesRemaining -= (drflac_uint32)pcmFramesToSeek;   /* <-- Safe cast. Will always be < currentFrame.pcmFramesRemaining < 65536. */
+                pcmFramesToSeek  = 0;
+            } else {
+                pcmFramesRead   += pFlac->currentFLACFrame.pcmFramesRemaining;
+                pcmFramesToSeek -= pFlac->currentFLACFrame.pcmFramesRemaining;
+                pFlac->currentFLACFrame.pcmFramesRemaining = 0;
+            }
+        }
+    }
+
+    pFlac->currentPCMFrame += pcmFramesRead;
+    return pcmFramesRead;
+}
+
+
+static drflac_bool32 drflac__seek_to_pcm_frame__brute_force(drflac* pFlac, drflac_uint64 pcmFrameIndex)
+{
+    drflac_bool32 isMidFrame = DRFLAC_FALSE;
+    drflac_uint64 runningPCMFrameCount;
+
+    DRFLAC_ASSERT(pFlac != NULL);
+
+    /* If we are seeking forward we start from the current position. Otherwise we need to start all the way from the start of the file. */
+    if (pcmFrameIndex >= pFlac->currentPCMFrame) {
+        /* Seeking forward. Need to seek from the current position. */
+        runningPCMFrameCount = pFlac->currentPCMFrame;
+
+        /* The frame header for the first frame may not yet have been read. We need to do that if necessary. */
+        if (pFlac->currentPCMFrame == 0 && pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
+            if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+                return DRFLAC_FALSE;
+            }
+        } else {
+            isMidFrame = DRFLAC_TRUE;
+        }
+    } else {
+        /* Seeking backwards. Need to seek from the start of the file. */
+        runningPCMFrameCount = 0;
+
+        /* Move back to the start. */
+        if (!drflac__seek_to_first_frame(pFlac)) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Decode the first frame in preparation for sample-exact seeking below. */
+        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    /*
+    We need to as quickly as possible find the frame that contains the target sample. To do this, we iterate over each frame and inspect its
+    header. If based on the header we can determine that the frame contains the sample, we do a full decode of that frame.
+    */
+    for (;;) {
+        drflac_uint64 pcmFrameCountInThisFLACFrame;
+        drflac_uint64 firstPCMFrameInFLACFrame = 0;
+        drflac_uint64 lastPCMFrameInFLACFrame = 0;
+
+        drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &firstPCMFrameInFLACFrame, &lastPCMFrameInFLACFrame);
+
+        pcmFrameCountInThisFLACFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1;
+        if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFLACFrame)) {
+            /*
+            The sample should be in this frame. We need to fully decode it, however if it's an invalid frame (a CRC mismatch), we need to pretend
+            it never existed and keep iterating.
+            */
+            drflac_uint64 pcmFramesToDecode = pcmFrameIndex - runningPCMFrameCount;
+
+            if (!isMidFrame) {
+                drflac_result result = drflac__decode_flac_frame(pFlac);
+                if (result == DRFLAC_SUCCESS) {
+                    /* The frame is valid. We just need to skip over some samples to ensure it's sample-exact. */
+                    return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode;  /* <-- If this fails, something bad has happened (it should never fail). */
+                } else {
+                    if (result == DRFLAC_CRC_MISMATCH) {
+                        goto next_iteration;   /* CRC mismatch. Pretend this frame never existed. */
+                    } else {
+                        return DRFLAC_FALSE;
+                    }
+                }
+            } else {
+                /* We started seeking mid-frame which means we need to skip the frame decoding part. */
+                return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode;
+            }
+        } else {
+            /*
+            It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this
+            frame never existed and leave the running sample count untouched.
+            */
+            if (!isMidFrame) {
+                drflac_result result = drflac__seek_to_next_flac_frame(pFlac);
+                if (result == DRFLAC_SUCCESS) {
+                    runningPCMFrameCount += pcmFrameCountInThisFLACFrame;
+                } else {
+                    if (result == DRFLAC_CRC_MISMATCH) {
+                        goto next_iteration;   /* CRC mismatch. Pretend this frame never existed. */
+                    } else {
+                        return DRFLAC_FALSE;
+                    }
+                }
+            } else {
+                /*
+                We started seeking mid-frame which means we need to seek by reading to the end of the frame instead of with
+                drflac__seek_to_next_flac_frame() which only works if the decoder is sitting on the byte just after the frame header.
+                */
+                runningPCMFrameCount += pFlac->currentFLACFrame.pcmFramesRemaining;
+                pFlac->currentFLACFrame.pcmFramesRemaining = 0;
+                isMidFrame = DRFLAC_FALSE;
+            }
+
+            /* If we are seeking to the end of the file and we've just hit it, we're done. */
+            if (pcmFrameIndex == pFlac->totalPCMFrameCount && runningPCMFrameCount == pFlac->totalPCMFrameCount) {
+                return DRFLAC_TRUE;
+            }
+        }
+
+    next_iteration:
+        /* Grab the next frame in preparation for the next iteration. */
+        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+            return DRFLAC_FALSE;
+        }
+    }
+}
+
+
+#if !defined(DR_FLAC_NO_CRC)
+/*
+We use an average compression ratio to determine our approximate start location. FLAC files are generally about 50%-70% the size of their
+uncompressed counterparts so we'll use this as a basis. I'm going to split the middle and use a factor of 0.6 to determine the starting
+location.
+*/
+#define DRFLAC_BINARY_SEARCH_APPROX_COMPRESSION_RATIO 0.6f
+
+static drflac_bool32 drflac__seek_to_approximate_flac_frame_to_byte(drflac* pFlac, drflac_uint64 targetByte, drflac_uint64 rangeLo, drflac_uint64 rangeHi, drflac_uint64* pLastSuccessfulSeekOffset)
+{
+    DRFLAC_ASSERT(pFlac != NULL);
+    DRFLAC_ASSERT(pLastSuccessfulSeekOffset != NULL);
+    DRFLAC_ASSERT(targetByte >= rangeLo);
+    DRFLAC_ASSERT(targetByte <= rangeHi);
+
+    *pLastSuccessfulSeekOffset = pFlac->firstFLACFramePosInBytes;
+
+    for (;;) {
+        /* After rangeLo == rangeHi == targetByte fails, we need to break out. */
+        drflac_uint64 lastTargetByte = targetByte;
+
+        /* When seeking to a byte, failure probably means we've attempted to seek beyond the end of the stream. To counter this we just halve it each attempt. */
+        if (!drflac__seek_to_byte(&pFlac->bs, targetByte)) {
+            /* If we couldn't even seek to the first byte in the stream we have a problem. Just abandon the whole thing. */
+            if (targetByte == 0) {
+                drflac__seek_to_first_frame(pFlac); /* Try to recover. */
+                return DRFLAC_FALSE;
+            }
+
+            /* Halve the byte location and continue. */
+            targetByte = rangeLo + ((rangeHi - rangeLo)/2);
+            rangeHi = targetByte;
+        } else {
+            /* Getting here should mean that we have seeked to an appropriate byte. */
+
+            /* Clear the details of the FLAC frame so we don't misreport data. */
+            DRFLAC_ZERO_MEMORY(&pFlac->currentFLACFrame, sizeof(pFlac->currentFLACFrame));
+
+            /*
+            Now seek to the next FLAC frame. We need to decode the entire frame (not just the header) because it's possible for the header to incorrectly pass the
+            CRC check and return bad data. We need to decode the entire frame to be more certain. Although this seems unlikely, this has happened to me in testing
+            so it needs to stay this way for now.
+            */
+#if 1
+            if (!drflac__read_and_decode_next_flac_frame(pFlac)) {
+                /* Halve the byte location and continue. */
+                targetByte = rangeLo + ((rangeHi - rangeLo)/2);
+                rangeHi = targetByte;
+            } else {
+                break;
+            }
+#else
+            if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+                /* Halve the byte location and continue. */
+                targetByte = rangeLo + ((rangeHi - rangeLo)/2);
+                rangeHi = targetByte;
+            } else {
+                break;
+            }
+#endif
+        }
+
+        /* We already tried this byte and there are no more to try, break out. */
+        if(targetByte == lastTargetByte) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    /* The current PCM frame needs to be updated based on the frame we just seeked to. */
+    drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &pFlac->currentPCMFrame, NULL);
+
+    DRFLAC_ASSERT(targetByte <= rangeHi);
+
+    *pLastSuccessfulSeekOffset = targetByte;
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(drflac* pFlac, drflac_uint64 offset)
+{
+    /* This section of code would be used if we were only decoding the FLAC frame header when calling drflac__seek_to_approximate_flac_frame_to_byte(). */
+#if 0
+    if (drflac__decode_flac_frame(pFlac) != DRFLAC_SUCCESS) {
+        /* We failed to decode this frame which may be due to it being corrupt. We'll just use the next valid FLAC frame. */
+        if (drflac__read_and_decode_next_flac_frame(pFlac) == DRFLAC_FALSE) {
+            return DRFLAC_FALSE;
+        }
+    }
+#endif
+
+    return drflac__seek_forward_by_pcm_frames(pFlac, offset) == offset;
+}
+
+
+static drflac_bool32 drflac__seek_to_pcm_frame__binary_search_internal(drflac* pFlac, drflac_uint64 pcmFrameIndex, drflac_uint64 byteRangeLo, drflac_uint64 byteRangeHi)
+{
+    /* This assumes pFlac->currentPCMFrame is sitting on byteRangeLo upon entry. */
+
+    drflac_uint64 targetByte;
+    drflac_uint64 pcmRangeLo = pFlac->totalPCMFrameCount;
+    drflac_uint64 pcmRangeHi = 0;
+    drflac_uint64 lastSuccessfulSeekOffset = (drflac_uint64)-1;
+    drflac_uint64 closestSeekOffsetBeforeTargetPCMFrame = byteRangeLo;
+    drflac_uint32 seekForwardThreshold = (pFlac->maxBlockSizeInPCMFrames != 0) ? pFlac->maxBlockSizeInPCMFrames*2 : 4096;
+
+    targetByte = byteRangeLo + (drflac_uint64)(((drflac_int64)((pcmFrameIndex - pFlac->currentPCMFrame) * pFlac->channels * pFlac->bitsPerSample)/8.0f) * DRFLAC_BINARY_SEARCH_APPROX_COMPRESSION_RATIO);
+    if (targetByte > byteRangeHi) {
+        targetByte = byteRangeHi;
+    }
+
+    for (;;) {
+        if (drflac__seek_to_approximate_flac_frame_to_byte(pFlac, targetByte, byteRangeLo, byteRangeHi, &lastSuccessfulSeekOffset)) {
+            /* We found a FLAC frame. We need to check if it contains the sample we're looking for. */
+            drflac_uint64 newPCMRangeLo;
+            drflac_uint64 newPCMRangeHi;
+            drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &newPCMRangeLo, &newPCMRangeHi);
+
+            /* If we selected the same frame, it means we should be pretty close. Just decode the rest. */
+            if (pcmRangeLo == newPCMRangeLo) {
+                if (!drflac__seek_to_approximate_flac_frame_to_byte(pFlac, closestSeekOffsetBeforeTargetPCMFrame, closestSeekOffsetBeforeTargetPCMFrame, byteRangeHi, &lastSuccessfulSeekOffset)) {
+                    break;  /* Failed to seek to closest frame. */
+                }
+
+                if (drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(pFlac, pcmFrameIndex - pFlac->currentPCMFrame)) {
+                    return DRFLAC_TRUE;
+                } else {
+                    break;  /* Failed to seek forward. */
+                }
+            }
+
+            pcmRangeLo = newPCMRangeLo;
+            pcmRangeHi = newPCMRangeHi;
+
+            if (pcmRangeLo <= pcmFrameIndex && pcmRangeHi >= pcmFrameIndex) {
+                /* The target PCM frame is in this FLAC frame. */
+                if (drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(pFlac, pcmFrameIndex - pFlac->currentPCMFrame) ) {
+                    return DRFLAC_TRUE;
+                } else {
+                    break;  /* Failed to seek to FLAC frame. */
+                }
+            } else {
+                const float approxCompressionRatio = (drflac_int64)(lastSuccessfulSeekOffset - pFlac->firstFLACFramePosInBytes) / ((drflac_int64)(pcmRangeLo * pFlac->channels * pFlac->bitsPerSample)/8.0f);
+
+                if (pcmRangeLo > pcmFrameIndex) {
+                    /* We seeked too far forward. We need to move our target byte backward and try again. */
+                    byteRangeHi = lastSuccessfulSeekOffset;
+                    if (byteRangeLo > byteRangeHi) {
+                        byteRangeLo = byteRangeHi;
+                    }
+
+                    targetByte = byteRangeLo + ((byteRangeHi - byteRangeLo) / 2);
+                    if (targetByte < byteRangeLo) {
+                        targetByte = byteRangeLo;
+                    }
+                } else /*if (pcmRangeHi < pcmFrameIndex)*/ {
+                    /* We didn't seek far enough. We need to move our target byte forward and try again. */
+
+                    /* If we're close enough we can just seek forward. */
+                    if ((pcmFrameIndex - pcmRangeLo) < seekForwardThreshold) {
+                        if (drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(pFlac, pcmFrameIndex - pFlac->currentPCMFrame)) {
+                            return DRFLAC_TRUE;
+                        } else {
+                            break;  /* Failed to seek to FLAC frame. */
+                        }
+                    } else {
+                        byteRangeLo = lastSuccessfulSeekOffset;
+                        if (byteRangeHi < byteRangeLo) {
+                            byteRangeHi = byteRangeLo;
+                        }
+
+                        targetByte = lastSuccessfulSeekOffset + (drflac_uint64)(((drflac_int64)((pcmFrameIndex-pcmRangeLo) * pFlac->channels * pFlac->bitsPerSample)/8.0f) * approxCompressionRatio);
+                        if (targetByte > byteRangeHi) {
+                            targetByte = byteRangeHi;
+                        }
+
+                        if (closestSeekOffsetBeforeTargetPCMFrame < lastSuccessfulSeekOffset) {
+                            closestSeekOffsetBeforeTargetPCMFrame = lastSuccessfulSeekOffset;
+                        }
+                    }
+                }
+            }
+        } else {
+            /* Getting here is really bad. We just recover as best we can, but moving to the first frame in the stream, and then abort. */
+            break;
+        }
+    }
+
+    drflac__seek_to_first_frame(pFlac); /* <-- Try to recover. */
+    return DRFLAC_FALSE;
+}
+
+static drflac_bool32 drflac__seek_to_pcm_frame__binary_search(drflac* pFlac, drflac_uint64 pcmFrameIndex)
+{
+    drflac_uint64 byteRangeLo;
+    drflac_uint64 byteRangeHi;
+    drflac_uint32 seekForwardThreshold = (pFlac->maxBlockSizeInPCMFrames != 0) ? pFlac->maxBlockSizeInPCMFrames*2 : 4096;
+
+    /* Our algorithm currently assumes the FLAC stream is currently sitting at the start. */
+    if (drflac__seek_to_first_frame(pFlac) == DRFLAC_FALSE) {
+        return DRFLAC_FALSE;
+    }
+
+    /* If we're close enough to the start, just move to the start and seek forward. */
+    if (pcmFrameIndex < seekForwardThreshold) {
+        return drflac__seek_forward_by_pcm_frames(pFlac, pcmFrameIndex) == pcmFrameIndex;
+    }
+
+    /*
+    Our starting byte range is the byte position of the first FLAC frame and the approximate end of the file as if it were completely uncompressed. This ensures
+    the entire file is included, even though most of the time it'll exceed the end of the actual stream. This is OK as the frame searching logic will handle it.
+    */
+    byteRangeLo = pFlac->firstFLACFramePosInBytes;
+    byteRangeHi = pFlac->firstFLACFramePosInBytes + (drflac_uint64)((drflac_int64)(pFlac->totalPCMFrameCount * pFlac->channels * pFlac->bitsPerSample)/8.0f);
+
+    return drflac__seek_to_pcm_frame__binary_search_internal(pFlac, pcmFrameIndex, byteRangeLo, byteRangeHi);
+}
+#endif  /* !DR_FLAC_NO_CRC */
+
+static drflac_bool32 drflac__seek_to_pcm_frame__seek_table(drflac* pFlac, drflac_uint64 pcmFrameIndex)
+{
+    drflac_uint32 iClosestSeekpoint = 0;
+    drflac_bool32 isMidFrame = DRFLAC_FALSE;
+    drflac_uint64 runningPCMFrameCount;
+    drflac_uint32 iSeekpoint;
+
+
+    DRFLAC_ASSERT(pFlac != NULL);
+
+    if (pFlac->pSeekpoints == NULL || pFlac->seekpointCount == 0) {
+        return DRFLAC_FALSE;
+    }
+
+    for (iSeekpoint = 0; iSeekpoint < pFlac->seekpointCount; ++iSeekpoint) {
+        if (pFlac->pSeekpoints[iSeekpoint].firstPCMFrame >= pcmFrameIndex) {
+            break;
+        }
+
+        iClosestSeekpoint = iSeekpoint;
+    }
+
+    /* There's been cases where the seek table contains only zeros. We need to do some basic validation on the closest seekpoint. */
+    if (pFlac->pSeekpoints[iClosestSeekpoint].pcmFrameCount == 0 || pFlac->pSeekpoints[iClosestSeekpoint].pcmFrameCount > pFlac->maxBlockSizeInPCMFrames) {
+        return DRFLAC_FALSE;
+    }
+    if (pFlac->pSeekpoints[iClosestSeekpoint].firstPCMFrame > pFlac->totalPCMFrameCount && pFlac->totalPCMFrameCount > 0) {
+        return DRFLAC_FALSE;
+    }
+
+#if !defined(DR_FLAC_NO_CRC)
+    /* At this point we should know the closest seek point. We can use a binary search for this. We need to know the total sample count for this. */
+    if (pFlac->totalPCMFrameCount > 0) {
+        drflac_uint64 byteRangeLo;
+        drflac_uint64 byteRangeHi;
+
+        byteRangeHi = pFlac->firstFLACFramePosInBytes + (drflac_uint64)((drflac_int64)(pFlac->totalPCMFrameCount * pFlac->channels * pFlac->bitsPerSample)/8.0f);
+        byteRangeLo = pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset;
+
+        /*
+        If our closest seek point is not the last one, we only need to search between it and the next one. The section below calculates an appropriate starting
+        value for byteRangeHi which will clamp it appropriately.
+
+        Note that the next seekpoint must have an offset greater than the closest seekpoint because otherwise our binary search algorithm will break down. There
+        have been cases where a seektable consists of seek points where every byte offset is set to 0 which causes problems. If this happens we need to abort.
+        */
+        if (iClosestSeekpoint < pFlac->seekpointCount-1) {
+            drflac_uint32 iNextSeekpoint = iClosestSeekpoint + 1;
+
+            /* Basic validation on the seekpoints to ensure they're usable. */
+            if (pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset >= pFlac->pSeekpoints[iNextSeekpoint].flacFrameOffset || pFlac->pSeekpoints[iNextSeekpoint].pcmFrameCount == 0) {
+                return DRFLAC_FALSE;    /* The next seekpoint doesn't look right. The seek table cannot be trusted from here. Abort. */
+            }
+
+            if (pFlac->pSeekpoints[iNextSeekpoint].firstPCMFrame != (((drflac_uint64)0xFFFFFFFF << 32) | 0xFFFFFFFF)) { /* Make sure it's not a placeholder seekpoint. */
+                byteRangeHi = pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iNextSeekpoint].flacFrameOffset - 1; /* byteRangeHi must be zero based. */
+            }
+        }
+
+        if (drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset)) {
+            if (drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+                drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &pFlac->currentPCMFrame, NULL);
+
+                if (drflac__seek_to_pcm_frame__binary_search_internal(pFlac, pcmFrameIndex, byteRangeLo, byteRangeHi)) {
+                    return DRFLAC_TRUE;
+                }
+            }
+        }
+    }
+#endif  /* !DR_FLAC_NO_CRC */
+
+    /* Getting here means we need to use a slower algorithm because the binary search method failed or cannot be used. */
+
+    /*
+    If we are seeking forward and the closest seekpoint is _before_ the current sample, we just seek forward from where we are. Otherwise we start seeking
+    from the seekpoint's first sample.
+    */
+    if (pcmFrameIndex >= pFlac->currentPCMFrame && pFlac->pSeekpoints[iClosestSeekpoint].firstPCMFrame <= pFlac->currentPCMFrame) {
+        /* Optimized case. Just seek forward from where we are. */
+        runningPCMFrameCount = pFlac->currentPCMFrame;
+
+        /* The frame header for the first frame may not yet have been read. We need to do that if necessary. */
+        if (pFlac->currentPCMFrame == 0 && pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
+            if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+                return DRFLAC_FALSE;
+            }
+        } else {
+            isMidFrame = DRFLAC_TRUE;
+        }
+    } else {
+        /* Slower case. Seek to the start of the seekpoint and then seek forward from there. */
+        runningPCMFrameCount = pFlac->pSeekpoints[iClosestSeekpoint].firstPCMFrame;
+
+        if (!drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset)) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Grab the frame the seekpoint is sitting on in preparation for the sample-exact seeking below. */
+        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    for (;;) {
+        drflac_uint64 pcmFrameCountInThisFLACFrame;
+        drflac_uint64 firstPCMFrameInFLACFrame = 0;
+        drflac_uint64 lastPCMFrameInFLACFrame = 0;
+
+        drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &firstPCMFrameInFLACFrame, &lastPCMFrameInFLACFrame);
+
+        pcmFrameCountInThisFLACFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1;
+        if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFLACFrame)) {
+            /*
+            The sample should be in this frame. We need to fully decode it, but if it's an invalid frame (a CRC mismatch) we need to pretend
+            it never existed and keep iterating.
+            */
+            drflac_uint64 pcmFramesToDecode = pcmFrameIndex - runningPCMFrameCount;
+
+            if (!isMidFrame) {
+                drflac_result result = drflac__decode_flac_frame(pFlac);
+                if (result == DRFLAC_SUCCESS) {
+                    /* The frame is valid. We just need to skip over some samples to ensure it's sample-exact. */
+                    return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode;  /* <-- If this fails, something bad has happened (it should never fail). */
+                } else {
+                    if (result == DRFLAC_CRC_MISMATCH) {
+                        goto next_iteration;   /* CRC mismatch. Pretend this frame never existed. */
+                    } else {
+                        return DRFLAC_FALSE;
+                    }
+                }
+            } else {
+                /* We started seeking mid-frame which means we need to skip the frame decoding part. */
+                return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode;
+            }
+        } else {
+            /*
+            It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this
+            frame never existed and leave the running sample count untouched.
+            */
+            if (!isMidFrame) {
+                drflac_result result = drflac__seek_to_next_flac_frame(pFlac);
+                if (result == DRFLAC_SUCCESS) {
+                    runningPCMFrameCount += pcmFrameCountInThisFLACFrame;
+                } else {
+                    if (result == DRFLAC_CRC_MISMATCH) {
+                        goto next_iteration;   /* CRC mismatch. Pretend this frame never existed. */
+                    } else {
+                        return DRFLAC_FALSE;
+                    }
+                }
+            } else {
+                /*
+                We started seeking mid-frame which means we need to seek by reading to the end of the frame instead of with
+                drflac__seek_to_next_flac_frame() which only works if the decoder is sitting on the byte just after the frame header.
+                */
+                runningPCMFrameCount += pFlac->currentFLACFrame.pcmFramesRemaining;
+                pFlac->currentFLACFrame.pcmFramesRemaining = 0;
+                isMidFrame = DRFLAC_FALSE;
+            }
+
+            /* If we are seeking to the end of the file and we've just hit it, we're done. */
+            if (pcmFrameIndex == pFlac->totalPCMFrameCount && runningPCMFrameCount == pFlac->totalPCMFrameCount) {
+                return DRFLAC_TRUE;
+            }
+        }
+
+    next_iteration:
+        /* Grab the next frame in preparation for the next iteration. */
+        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+            return DRFLAC_FALSE;
+        }
+    }
+}
+
+
+#ifndef DR_FLAC_NO_OGG
+typedef struct
+{
+    drflac_uint8 capturePattern[4];  /* Should be "OggS" */
+    drflac_uint8 structureVersion;   /* Always 0. */
+    drflac_uint8 headerType;
+    drflac_uint64 granulePosition;
+    drflac_uint32 serialNumber;
+    drflac_uint32 sequenceNumber;
+    drflac_uint32 checksum;
+    drflac_uint8 segmentCount;
+    drflac_uint8 segmentTable[255];
+} drflac_ogg_page_header;
+#endif
+
+typedef struct
+{
+    drflac_read_proc onRead;
+    drflac_seek_proc onSeek;
+    drflac_meta_proc onMeta;
+    drflac_container container;
+    void* pUserData;
+    void* pUserDataMD;
+    drflac_uint32 sampleRate;
+    drflac_uint8  channels;
+    drflac_uint8  bitsPerSample;
+    drflac_uint64 totalPCMFrameCount;
+    drflac_uint16 maxBlockSizeInPCMFrames;
+    drflac_uint64 runningFilePos;
+    drflac_bool32 hasStreamInfoBlock;
+    drflac_bool32 hasMetadataBlocks;
+    drflac_bs bs;                           /* <-- A bit streamer is required for loading data during initialization. */
+    drflac_frame_header firstFrameHeader;   /* <-- The header of the first frame that was read during relaxed initalization. Only set if there is no STREAMINFO block. */
+
+#ifndef DR_FLAC_NO_OGG
+    drflac_uint32 oggSerial;
+    drflac_uint64 oggFirstBytePos;
+    drflac_ogg_page_header oggBosHeader;
+#endif
+} drflac_init_info;
+
+static DRFLAC_INLINE void drflac__decode_block_header(drflac_uint32 blockHeader, drflac_uint8* isLastBlock, drflac_uint8* blockType, drflac_uint32* blockSize)
+{
+    blockHeader = drflac__be2host_32(blockHeader);
+    *isLastBlock = (drflac_uint8)((blockHeader & 0x80000000UL) >> 31);
+    *blockType   = (drflac_uint8)((blockHeader & 0x7F000000UL) >> 24);
+    *blockSize   =                (blockHeader & 0x00FFFFFFUL);
+}
+
+static DRFLAC_INLINE drflac_bool32 drflac__read_and_decode_block_header(drflac_read_proc onRead, void* pUserData, drflac_uint8* isLastBlock, drflac_uint8* blockType, drflac_uint32* blockSize)
+{
+    drflac_uint32 blockHeader;
+
+    *blockSize = 0;
+    if (onRead(pUserData, &blockHeader, 4) != 4) {
+        return DRFLAC_FALSE;
+    }
+
+    drflac__decode_block_header(blockHeader, isLastBlock, blockType, blockSize);
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__read_streaminfo(drflac_read_proc onRead, void* pUserData, drflac_streaminfo* pStreamInfo)
+{
+    drflac_uint32 blockSizes;
+    drflac_uint64 frameSizes = 0;
+    drflac_uint64 importantProps;
+    drflac_uint8 md5[16];
+
+    /* min/max block size. */
+    if (onRead(pUserData, &blockSizes, 4) != 4) {
+        return DRFLAC_FALSE;
+    }
+
+    /* min/max frame size. */
+    if (onRead(pUserData, &frameSizes, 6) != 6) {
+        return DRFLAC_FALSE;
+    }
+
+    /* Sample rate, channels, bits per sample and total sample count. */
+    if (onRead(pUserData, &importantProps, 8) != 8) {
+        return DRFLAC_FALSE;
+    }
+
+    /* MD5 */
+    if (onRead(pUserData, md5, sizeof(md5)) != sizeof(md5)) {
+        return DRFLAC_FALSE;
+    }
+
+    blockSizes     = drflac__be2host_32(blockSizes);
+    frameSizes     = drflac__be2host_64(frameSizes);
+    importantProps = drflac__be2host_64(importantProps);
+
+    pStreamInfo->minBlockSizeInPCMFrames = (drflac_uint16)((blockSizes & 0xFFFF0000) >> 16);
+    pStreamInfo->maxBlockSizeInPCMFrames = (drflac_uint16) (blockSizes & 0x0000FFFF);
+    pStreamInfo->minFrameSizeInPCMFrames = (drflac_uint32)((frameSizes     &  (((drflac_uint64)0x00FFFFFF << 16) << 24)) >> 40);
+    pStreamInfo->maxFrameSizeInPCMFrames = (drflac_uint32)((frameSizes     &  (((drflac_uint64)0x00FFFFFF << 16) <<  0)) >> 16);
+    pStreamInfo->sampleRate              = (drflac_uint32)((importantProps &  (((drflac_uint64)0x000FFFFF << 16) << 28)) >> 44);
+    pStreamInfo->channels                = (drflac_uint8 )((importantProps &  (((drflac_uint64)0x0000000E << 16) << 24)) >> 41) + 1;
+    pStreamInfo->bitsPerSample           = (drflac_uint8 )((importantProps &  (((drflac_uint64)0x0000001F << 16) << 20)) >> 36) + 1;
+    pStreamInfo->totalPCMFrameCount      =                ((importantProps & ((((drflac_uint64)0x0000000F << 16) << 16) | 0xFFFFFFFF)));
+    DRFLAC_COPY_MEMORY(pStreamInfo->md5, md5, sizeof(md5));
+
+    return DRFLAC_TRUE;
+}
+
+
+static void* drflac__malloc_default(size_t sz, void* pUserData)
+{
+    (void)pUserData;
+    return DRFLAC_MALLOC(sz);
+}
+
+static void* drflac__realloc_default(void* p, size_t sz, void* pUserData)
+{
+    (void)pUserData;
+    return DRFLAC_REALLOC(p, sz);
+}
+
+static void drflac__free_default(void* p, void* pUserData)
+{
+    (void)pUserData;
+    DRFLAC_FREE(p);
+}
+
+
+static void* drflac__malloc_from_callbacks(size_t sz, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pAllocationCallbacks == NULL) {
+        return NULL;
+    }
+
+    if (pAllocationCallbacks->onMalloc != NULL) {
+        return pAllocationCallbacks->onMalloc(sz, pAllocationCallbacks->pUserData);
+    }
+
+    /* Try using realloc(). */
+    if (pAllocationCallbacks->onRealloc != NULL) {
+        return pAllocationCallbacks->onRealloc(NULL, sz, pAllocationCallbacks->pUserData);
+    }
+
+    return NULL;
+}
+
+static void* drflac__realloc_from_callbacks(void* p, size_t szNew, size_t szOld, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pAllocationCallbacks == NULL) {
+        return NULL;
+    }
+
+    if (pAllocationCallbacks->onRealloc != NULL) {
+        return pAllocationCallbacks->onRealloc(p, szNew, pAllocationCallbacks->pUserData);
+    }
+
+    /* Try emulating realloc() in terms of malloc()/free(). */
+    if (pAllocationCallbacks->onMalloc != NULL && pAllocationCallbacks->onFree != NULL) {
+        void* p2;
+
+        p2 = pAllocationCallbacks->onMalloc(szNew, pAllocationCallbacks->pUserData);
+        if (p2 == NULL) {
+            return NULL;
+        }
+
+        if (p != NULL) {
+            DRFLAC_COPY_MEMORY(p2, p, szOld);
+            pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData);
+        }
+
+        return p2;
+    }
+
+    return NULL;
+}
+
+static void drflac__free_from_callbacks(void* p, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    if (p == NULL || pAllocationCallbacks == NULL) {
+        return;
+    }
+
+    if (pAllocationCallbacks->onFree != NULL) {
+        pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData);
+    }
+}
+
+
+static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_uint64* pFirstFramePos, drflac_uint64* pSeektablePos, drflac_uint32* pSeektableSize, drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    /*
+    We want to keep track of the byte position in the stream of the seektable. At the time of calling this function we know that
+    we'll be sitting on byte 42.
+    */
+    drflac_uint64 runningFilePos = 42;
+    drflac_uint64 seektablePos   = 0;
+    drflac_uint32 seektableSize  = 0;
+
+    for (;;) {
+        drflac_metadata metadata;
+        drflac_uint8 isLastBlock = 0;
+        drflac_uint8 blockType;
+        drflac_uint32 blockSize;
+        if (drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize) == DRFLAC_FALSE) {
+            return DRFLAC_FALSE;
+        }
+        runningFilePos += 4;
+
+        metadata.type = blockType;
+        metadata.pRawData = NULL;
+        metadata.rawDataSize = 0;
+
+        switch (blockType)
+        {
+            case DRFLAC_METADATA_BLOCK_TYPE_APPLICATION:
+            {
+                if (blockSize < 4) {
+                    return DRFLAC_FALSE;
+                }
+
+                if (onMeta) {
+                    void* pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
+                    if (pRawData == NULL) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+
+                    metadata.pRawData = pRawData;
+                    metadata.rawDataSize = blockSize;
+                    metadata.data.application.id       = drflac__be2host_32(*(drflac_uint32*)pRawData);
+                    metadata.data.application.pData    = (const void*)((drflac_uint8*)pRawData + sizeof(drflac_uint32));
+                    metadata.data.application.dataSize = blockSize - sizeof(drflac_uint32);
+                    onMeta(pUserDataMD, &metadata);
+
+                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                }
+            } break;
+
+            case DRFLAC_METADATA_BLOCK_TYPE_SEEKTABLE:
+            {
+                seektablePos  = runningFilePos;
+                seektableSize = blockSize;
+
+                if (onMeta) {
+                    drflac_uint32 iSeekpoint;
+                    void* pRawData;
+
+                    pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
+                    if (pRawData == NULL) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+
+                    metadata.pRawData = pRawData;
+                    metadata.rawDataSize = blockSize;
+                    metadata.data.seektable.seekpointCount = blockSize/sizeof(drflac_seekpoint);
+                    metadata.data.seektable.pSeekpoints = (const drflac_seekpoint*)pRawData;
+
+                    /* Endian swap. */
+                    for (iSeekpoint = 0; iSeekpoint < metadata.data.seektable.seekpointCount; ++iSeekpoint) {
+                        drflac_seekpoint* pSeekpoint = (drflac_seekpoint*)pRawData + iSeekpoint;
+                        pSeekpoint->firstPCMFrame   = drflac__be2host_64(pSeekpoint->firstPCMFrame);
+                        pSeekpoint->flacFrameOffset = drflac__be2host_64(pSeekpoint->flacFrameOffset);
+                        pSeekpoint->pcmFrameCount   = drflac__be2host_16(pSeekpoint->pcmFrameCount);
+                    }
+
+                    onMeta(pUserDataMD, &metadata);
+
+                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                }
+            } break;
+
+            case DRFLAC_METADATA_BLOCK_TYPE_VORBIS_COMMENT:
+            {
+                if (blockSize < 8) {
+                    return DRFLAC_FALSE;
+                }
+
+                if (onMeta) {
+                    void* pRawData;
+                    const char* pRunningData;
+                    const char* pRunningDataEnd;
+                    drflac_uint32 i;
+
+                    pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
+                    if (pRawData == NULL) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+
+                    metadata.pRawData = pRawData;
+                    metadata.rawDataSize = blockSize;
+
+                    pRunningData    = (const char*)pRawData;
+                    pRunningDataEnd = (const char*)pRawData + blockSize;
+
+                    metadata.data.vorbis_comment.vendorLength = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+
+                    /* Need space for the rest of the block */
+                    if ((pRunningDataEnd - pRunningData) - 4 < (drflac_int64)metadata.data.vorbis_comment.vendorLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+                    metadata.data.vorbis_comment.vendor       = pRunningData;                                            pRunningData += metadata.data.vorbis_comment.vendorLength;
+                    metadata.data.vorbis_comment.commentCount = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+
+                    /* Need space for 'commentCount' comments after the block, which at minimum is a drflac_uint32 per comment */
+                    if ((pRunningDataEnd - pRunningData) / sizeof(drflac_uint32) < metadata.data.vorbis_comment.commentCount) { /* <-- Note the order of operations to avoid overflow to a valid value */
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+                    metadata.data.vorbis_comment.pComments    = pRunningData;
+
+                    /* Check that the comments section is valid before passing it to the callback */
+                    for (i = 0; i < metadata.data.vorbis_comment.commentCount; ++i) {
+                        drflac_uint32 commentLength;
+
+                        if (pRunningDataEnd - pRunningData < 4) {
+                            drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                            return DRFLAC_FALSE;
+                        }
+
+                        commentLength = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                        if (pRunningDataEnd - pRunningData < (drflac_int64)commentLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
+                            drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                            return DRFLAC_FALSE;
+                        }
+                        pRunningData += commentLength;
+                    }
+
+                    onMeta(pUserDataMD, &metadata);
+
+                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                }
+            } break;
+
+            case DRFLAC_METADATA_BLOCK_TYPE_CUESHEET:
+            {
+                if (blockSize < 396) {
+                    return DRFLAC_FALSE;
+                }
+
+                if (onMeta) {
+                    void* pRawData;
+                    const char* pRunningData;
+                    const char* pRunningDataEnd;
+                    drflac_uint8 iTrack;
+                    drflac_uint8 iIndex;
+
+                    pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
+                    if (pRawData == NULL) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+
+                    metadata.pRawData = pRawData;
+                    metadata.rawDataSize = blockSize;
+
+                    pRunningData    = (const char*)pRawData;
+                    pRunningDataEnd = (const char*)pRawData + blockSize;
+
+                    DRFLAC_COPY_MEMORY(metadata.data.cuesheet.catalog, pRunningData, 128);                              pRunningData += 128;
+                    metadata.data.cuesheet.leadInSampleCount = drflac__be2host_64(*(const drflac_uint64*)pRunningData); pRunningData += 8;
+                    metadata.data.cuesheet.isCD              = (pRunningData[0] & 0x80) != 0;                           pRunningData += 259;
+                    metadata.data.cuesheet.trackCount        = pRunningData[0];                                         pRunningData += 1;
+                    metadata.data.cuesheet.pTrackData        = pRunningData;
+
+                    /* Check that the cuesheet tracks are valid before passing it to the callback */
+                    for (iTrack = 0; iTrack < metadata.data.cuesheet.trackCount; ++iTrack) {
+                        drflac_uint8 indexCount;
+                        drflac_uint32 indexPointSize;
+
+                        if (pRunningDataEnd - pRunningData < 36) {
+                            drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                            return DRFLAC_FALSE;
+                        }
+
+                        /* Skip to the index point count */
+                        pRunningData += 35;
+                        indexCount = pRunningData[0]; pRunningData += 1;
+                        indexPointSize = indexCount * sizeof(drflac_cuesheet_track_index);
+                        if (pRunningDataEnd - pRunningData < (drflac_int64)indexPointSize) {
+                            drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                            return DRFLAC_FALSE;
+                        }
+
+                        /* Endian swap. */
+                        for (iIndex = 0; iIndex < indexCount; ++iIndex) {
+                            drflac_cuesheet_track_index* pTrack = (drflac_cuesheet_track_index*)pRunningData;
+                            pRunningData += sizeof(drflac_cuesheet_track_index);
+                            pTrack->offset = drflac__be2host_64(pTrack->offset);
+                        }
+                    }
+
+                    onMeta(pUserDataMD, &metadata);
+
+                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                }
+            } break;
+
+            case DRFLAC_METADATA_BLOCK_TYPE_PICTURE:
+            {
+                if (blockSize < 32) {
+                    return DRFLAC_FALSE;
+                }
+
+                if (onMeta) {
+                    void* pRawData;
+                    const char* pRunningData;
+                    const char* pRunningDataEnd;
+
+                    pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
+                    if (pRawData == NULL) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+
+                    metadata.pRawData = pRawData;
+                    metadata.rawDataSize = blockSize;
+
+                    pRunningData    = (const char*)pRawData;
+                    pRunningDataEnd = (const char*)pRawData + blockSize;
+
+                    metadata.data.picture.type       = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.mimeLength = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+
+                    /* Need space for the rest of the block */
+                    if ((pRunningDataEnd - pRunningData) - 24 < (drflac_int64)metadata.data.picture.mimeLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+                    metadata.data.picture.mime              = pRunningData;                                            pRunningData += metadata.data.picture.mimeLength;
+                    metadata.data.picture.descriptionLength = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+
+                    /* Need space for the rest of the block */
+                    if ((pRunningDataEnd - pRunningData) - 20 < (drflac_int64)metadata.data.picture.descriptionLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+                    metadata.data.picture.description     = pRunningData;                                            pRunningData += metadata.data.picture.descriptionLength;
+                    metadata.data.picture.width           = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.height          = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.colorDepth      = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.indexColorCount = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.pictureDataSize = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.pPictureData    = (const drflac_uint8*)pRunningData;
+
+                    /* Need space for the picture after the block */
+                    if (pRunningDataEnd - pRunningData < (drflac_int64)metadata.data.picture.pictureDataSize) { /* <-- Note the order of operations to avoid overflow to a valid value */
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+
+                    onMeta(pUserDataMD, &metadata);
+
+                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                }
+            } break;
+
+            case DRFLAC_METADATA_BLOCK_TYPE_PADDING:
+            {
+                if (onMeta) {
+                    metadata.data.padding.unused = 0;
+
+                    /* Padding doesn't have anything meaningful in it, so just skip over it, but make sure the caller is aware of it by firing the callback. */
+                    if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) {
+                        isLastBlock = DRFLAC_TRUE;  /* An error occurred while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop. */
+                    } else {
+                        onMeta(pUserDataMD, &metadata);
+                    }
+                }
+            } break;
+
+            case DRFLAC_METADATA_BLOCK_TYPE_INVALID:
+            {
+                /* Invalid chunk. Just skip over this one. */
+                if (onMeta) {
+                    if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) {
+                        isLastBlock = DRFLAC_TRUE;  /* An error occurred while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop. */
+                    }
+                }
+            } break;
+
+            default:
+            {
+                /*
+                It's an unknown chunk, but not necessarily invalid. There's a chance more metadata blocks might be defined later on, so we
+                can at the very least report the chunk to the application and let it look at the raw data.
+                */
+                if (onMeta) {
+                    void* pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
+                    if (pRawData == NULL) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+
+                    metadata.pRawData = pRawData;
+                    metadata.rawDataSize = blockSize;
+                    onMeta(pUserDataMD, &metadata);
+
+                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                }
+            } break;
+        }
+
+        /* If we're not handling metadata, just skip over the block. If we are, it will have been handled earlier in the switch statement above. */
+        if (onMeta == NULL && blockSize > 0) {
+            if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) {
+                isLastBlock = DRFLAC_TRUE;
+            }
+        }
+
+        runningFilePos += blockSize;
+        if (isLastBlock) {
+            break;
+        }
+    }
+
+    *pSeektablePos = seektablePos;
+    *pSeektableSize = seektableSize;
+    *pFirstFramePos = runningFilePos;
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__init_private__native(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_bool32 relaxed)
+{
+    /* Pre Condition: The bit stream should be sitting just past the 4-byte id header. */
+
+    drflac_uint8 isLastBlock;
+    drflac_uint8 blockType;
+    drflac_uint32 blockSize;
+
+    (void)onSeek;
+
+    pInit->container = drflac_container_native;
+
+    /* The first metadata block should be the STREAMINFO block. */
+    if (!drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize)) {
+        return DRFLAC_FALSE;
+    }
+
+    if (blockType != DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO || blockSize != 34) {
+        if (!relaxed) {
+            /* We're opening in strict mode and the first block is not the STREAMINFO block. Error. */
+            return DRFLAC_FALSE;
+        } else {
+            /*
+            Relaxed mode. To open from here we need to just find the first frame and set the sample rate, etc. to whatever is defined
+            for that frame.
+            */
+            pInit->hasStreamInfoBlock = DRFLAC_FALSE;
+            pInit->hasMetadataBlocks  = DRFLAC_FALSE;
+
+            if (!drflac__read_next_flac_frame_header(&pInit->bs, 0, &pInit->firstFrameHeader)) {
+                return DRFLAC_FALSE;    /* Couldn't find a frame. */
+            }
+
+            if (pInit->firstFrameHeader.bitsPerSample == 0) {
+                return DRFLAC_FALSE;    /* Failed to initialize because the first frame depends on the STREAMINFO block, which does not exist. */
+            }
+
+            pInit->sampleRate              = pInit->firstFrameHeader.sampleRate;
+            pInit->channels                = drflac__get_channel_count_from_channel_assignment(pInit->firstFrameHeader.channelAssignment);
+            pInit->bitsPerSample           = pInit->firstFrameHeader.bitsPerSample;
+            pInit->maxBlockSizeInPCMFrames = 65535;   /* <-- See notes here: https://xiph.org/flac/format.html#metadata_block_streaminfo */
+            return DRFLAC_TRUE;
+        }
+    } else {
+        drflac_streaminfo streaminfo;
+        if (!drflac__read_streaminfo(onRead, pUserData, &streaminfo)) {
+            return DRFLAC_FALSE;
+        }
+
+        pInit->hasStreamInfoBlock      = DRFLAC_TRUE;
+        pInit->sampleRate              = streaminfo.sampleRate;
+        pInit->channels                = streaminfo.channels;
+        pInit->bitsPerSample           = streaminfo.bitsPerSample;
+        pInit->totalPCMFrameCount      = streaminfo.totalPCMFrameCount;
+        pInit->maxBlockSizeInPCMFrames = streaminfo.maxBlockSizeInPCMFrames;    /* Don't care about the min block size - only the max (used for determining the size of the memory allocation). */
+        pInit->hasMetadataBlocks       = !isLastBlock;
+
+        if (onMeta) {
+            drflac_metadata metadata;
+            metadata.type = DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO;
+            metadata.pRawData = NULL;
+            metadata.rawDataSize = 0;
+            metadata.data.streaminfo = streaminfo;
+            onMeta(pUserDataMD, &metadata);
+        }
+
+        return DRFLAC_TRUE;
+    }
+}
+
+#ifndef DR_FLAC_NO_OGG
+#define DRFLAC_OGG_MAX_PAGE_SIZE            65307
+#define DRFLAC_OGG_CAPTURE_PATTERN_CRC32    1605413199  /* CRC-32 of "OggS". */
+
+typedef enum
+{
+    drflac_ogg_recover_on_crc_mismatch,
+    drflac_ogg_fail_on_crc_mismatch
+} drflac_ogg_crc_mismatch_recovery;
+
+#ifndef DR_FLAC_NO_CRC
+static drflac_uint32 drflac__crc32_table[] = {
+    0x00000000L, 0x04C11DB7L, 0x09823B6EL, 0x0D4326D9L,
+    0x130476DCL, 0x17C56B6BL, 0x1A864DB2L, 0x1E475005L,
+    0x2608EDB8L, 0x22C9F00FL, 0x2F8AD6D6L, 0x2B4BCB61L,
+    0x350C9B64L, 0x31CD86D3L, 0x3C8EA00AL, 0x384FBDBDL,
+    0x4C11DB70L, 0x48D0C6C7L, 0x4593E01EL, 0x4152FDA9L,
+    0x5F15ADACL, 0x5BD4B01BL, 0x569796C2L, 0x52568B75L,
+    0x6A1936C8L, 0x6ED82B7FL, 0x639B0DA6L, 0x675A1011L,
+    0x791D4014L, 0x7DDC5DA3L, 0x709F7B7AL, 0x745E66CDL,
+    0x9823B6E0L, 0x9CE2AB57L, 0x91A18D8EL, 0x95609039L,
+    0x8B27C03CL, 0x8FE6DD8BL, 0x82A5FB52L, 0x8664E6E5L,
+    0xBE2B5B58L, 0xBAEA46EFL, 0xB7A96036L, 0xB3687D81L,
+    0xAD2F2D84L, 0xA9EE3033L, 0xA4AD16EAL, 0xA06C0B5DL,
+    0xD4326D90L, 0xD0F37027L, 0xDDB056FEL, 0xD9714B49L,
+    0xC7361B4CL, 0xC3F706FBL, 0xCEB42022L, 0xCA753D95L,
+    0xF23A8028L, 0xF6FB9D9FL, 0xFBB8BB46L, 0xFF79A6F1L,
+    0xE13EF6F4L, 0xE5FFEB43L, 0xE8BCCD9AL, 0xEC7DD02DL,
+    0x34867077L, 0x30476DC0L, 0x3D044B19L, 0x39C556AEL,
+    0x278206ABL, 0x23431B1CL, 0x2E003DC5L, 0x2AC12072L,
+    0x128E9DCFL, 0x164F8078L, 0x1B0CA6A1L, 0x1FCDBB16L,
+    0x018AEB13L, 0x054BF6A4L, 0x0808D07DL, 0x0CC9CDCAL,
+    0x7897AB07L, 0x7C56B6B0L, 0x71159069L, 0x75D48DDEL,
+    0x6B93DDDBL, 0x6F52C06CL, 0x6211E6B5L, 0x66D0FB02L,
+    0x5E9F46BFL, 0x5A5E5B08L, 0x571D7DD1L, 0x53DC6066L,
+    0x4D9B3063L, 0x495A2DD4L, 0x44190B0DL, 0x40D816BAL,
+    0xACA5C697L, 0xA864DB20L, 0xA527FDF9L, 0xA1E6E04EL,
+    0xBFA1B04BL, 0xBB60ADFCL, 0xB6238B25L, 0xB2E29692L,
+    0x8AAD2B2FL, 0x8E6C3698L, 0x832F1041L, 0x87EE0DF6L,
+    0x99A95DF3L, 0x9D684044L, 0x902B669DL, 0x94EA7B2AL,
+    0xE0B41DE7L, 0xE4750050L, 0xE9362689L, 0xEDF73B3EL,
+    0xF3B06B3BL, 0xF771768CL, 0xFA325055L, 0xFEF34DE2L,
+    0xC6BCF05FL, 0xC27DEDE8L, 0xCF3ECB31L, 0xCBFFD686L,
+    0xD5B88683L, 0xD1799B34L, 0xDC3ABDEDL, 0xD8FBA05AL,
+    0x690CE0EEL, 0x6DCDFD59L, 0x608EDB80L, 0x644FC637L,
+    0x7A089632L, 0x7EC98B85L, 0x738AAD5CL, 0x774BB0EBL,
+    0x4F040D56L, 0x4BC510E1L, 0x46863638L, 0x42472B8FL,
+    0x5C007B8AL, 0x58C1663DL, 0x558240E4L, 0x51435D53L,
+    0x251D3B9EL, 0x21DC2629L, 0x2C9F00F0L, 0x285E1D47L,
+    0x36194D42L, 0x32D850F5L, 0x3F9B762CL, 0x3B5A6B9BL,
+    0x0315D626L, 0x07D4CB91L, 0x0A97ED48L, 0x0E56F0FFL,
+    0x1011A0FAL, 0x14D0BD4DL, 0x19939B94L, 0x1D528623L,
+    0xF12F560EL, 0xF5EE4BB9L, 0xF8AD6D60L, 0xFC6C70D7L,
+    0xE22B20D2L, 0xE6EA3D65L, 0xEBA91BBCL, 0xEF68060BL,
+    0xD727BBB6L, 0xD3E6A601L, 0xDEA580D8L, 0xDA649D6FL,
+    0xC423CD6AL, 0xC0E2D0DDL, 0xCDA1F604L, 0xC960EBB3L,
+    0xBD3E8D7EL, 0xB9FF90C9L, 0xB4BCB610L, 0xB07DABA7L,
+    0xAE3AFBA2L, 0xAAFBE615L, 0xA7B8C0CCL, 0xA379DD7BL,
+    0x9B3660C6L, 0x9FF77D71L, 0x92B45BA8L, 0x9675461FL,
+    0x8832161AL, 0x8CF30BADL, 0x81B02D74L, 0x857130C3L,
+    0x5D8A9099L, 0x594B8D2EL, 0x5408ABF7L, 0x50C9B640L,
+    0x4E8EE645L, 0x4A4FFBF2L, 0x470CDD2BL, 0x43CDC09CL,
+    0x7B827D21L, 0x7F436096L, 0x7200464FL, 0x76C15BF8L,
+    0x68860BFDL, 0x6C47164AL, 0x61043093L, 0x65C52D24L,
+    0x119B4BE9L, 0x155A565EL, 0x18197087L, 0x1CD86D30L,
+    0x029F3D35L, 0x065E2082L, 0x0B1D065BL, 0x0FDC1BECL,
+    0x3793A651L, 0x3352BBE6L, 0x3E119D3FL, 0x3AD08088L,
+    0x2497D08DL, 0x2056CD3AL, 0x2D15EBE3L, 0x29D4F654L,
+    0xC5A92679L, 0xC1683BCEL, 0xCC2B1D17L, 0xC8EA00A0L,
+    0xD6AD50A5L, 0xD26C4D12L, 0xDF2F6BCBL, 0xDBEE767CL,
+    0xE3A1CBC1L, 0xE760D676L, 0xEA23F0AFL, 0xEEE2ED18L,
+    0xF0A5BD1DL, 0xF464A0AAL, 0xF9278673L, 0xFDE69BC4L,
+    0x89B8FD09L, 0x8D79E0BEL, 0x803AC667L, 0x84FBDBD0L,
+    0x9ABC8BD5L, 0x9E7D9662L, 0x933EB0BBL, 0x97FFAD0CL,
+    0xAFB010B1L, 0xAB710D06L, 0xA6322BDFL, 0xA2F33668L,
+    0xBCB4666DL, 0xB8757BDAL, 0xB5365D03L, 0xB1F740B4L
+};
+#endif
+
+static DRFLAC_INLINE drflac_uint32 drflac_crc32_byte(drflac_uint32 crc32, drflac_uint8 data)
+{
+#ifndef DR_FLAC_NO_CRC
+    return (crc32 << 8) ^ drflac__crc32_table[(drflac_uint8)((crc32 >> 24) & 0xFF) ^ data];
+#else
+    (void)data;
+    return crc32;
+#endif
+}
+
+#if 0
+static DRFLAC_INLINE drflac_uint32 drflac_crc32_uint32(drflac_uint32 crc32, drflac_uint32 data)
+{
+    crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >> 24) & 0xFF));
+    crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >> 16) & 0xFF));
+    crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >>  8) & 0xFF));
+    crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >>  0) & 0xFF));
+    return crc32;
+}
+
+static DRFLAC_INLINE drflac_uint32 drflac_crc32_uint64(drflac_uint32 crc32, drflac_uint64 data)
+{
+    crc32 = drflac_crc32_uint32(crc32, (drflac_uint32)((data >> 32) & 0xFFFFFFFF));
+    crc32 = drflac_crc32_uint32(crc32, (drflac_uint32)((data >>  0) & 0xFFFFFFFF));
+    return crc32;
+}
+#endif
+
+static DRFLAC_INLINE drflac_uint32 drflac_crc32_buffer(drflac_uint32 crc32, drflac_uint8* pData, drflac_uint32 dataSize)
+{
+    /* This can be optimized. */
+    drflac_uint32 i;
+    for (i = 0; i < dataSize; ++i) {
+        crc32 = drflac_crc32_byte(crc32, pData[i]);
+    }
+    return crc32;
+}
+
+
+static DRFLAC_INLINE drflac_bool32 drflac_ogg__is_capture_pattern(drflac_uint8 pattern[4])
+{
+    return pattern[0] == 'O' && pattern[1] == 'g' && pattern[2] == 'g' && pattern[3] == 'S';
+}
+
+static DRFLAC_INLINE drflac_uint32 drflac_ogg__get_page_header_size(drflac_ogg_page_header* pHeader)
+{
+    return 27 + pHeader->segmentCount;
+}
+
+static DRFLAC_INLINE drflac_uint32 drflac_ogg__get_page_body_size(drflac_ogg_page_header* pHeader)
+{
+    drflac_uint32 pageBodySize = 0;
+    int i;
+
+    for (i = 0; i < pHeader->segmentCount; ++i) {
+        pageBodySize += pHeader->segmentTable[i];
+    }
+
+    return pageBodySize;
+}
+
+static drflac_result drflac_ogg__read_page_header_after_capture_pattern(drflac_read_proc onRead, void* pUserData, drflac_ogg_page_header* pHeader, drflac_uint32* pBytesRead, drflac_uint32* pCRC32)
+{
+    drflac_uint8 data[23];
+    drflac_uint32 i;
+
+    DRFLAC_ASSERT(*pCRC32 == DRFLAC_OGG_CAPTURE_PATTERN_CRC32);
+
+    if (onRead(pUserData, data, 23) != 23) {
+        return DRFLAC_AT_END;
+    }
+    *pBytesRead += 23;
+
+    /*
+    It's not actually used, but set the capture pattern to 'OggS' for completeness. Not doing this will cause static analysers to complain about
+    us trying to access uninitialized data. We could alternatively just comment out this member of the drflac_ogg_page_header structure, but I
+    like to have it map to the structure of the underlying data.
+    */
+    pHeader->capturePattern[0] = 'O';
+    pHeader->capturePattern[1] = 'g';
+    pHeader->capturePattern[2] = 'g';
+    pHeader->capturePattern[3] = 'S';
+
+    pHeader->structureVersion = data[0];
+    pHeader->headerType       = data[1];
+    DRFLAC_COPY_MEMORY(&pHeader->granulePosition, &data[ 2], 8);
+    DRFLAC_COPY_MEMORY(&pHeader->serialNumber,    &data[10], 4);
+    DRFLAC_COPY_MEMORY(&pHeader->sequenceNumber,  &data[14], 4);
+    DRFLAC_COPY_MEMORY(&pHeader->checksum,        &data[18], 4);
+    pHeader->segmentCount     = data[22];
+
+    /* Calculate the CRC. Note that for the calculation the checksum part of the page needs to be set to 0. */
+    data[18] = 0;
+    data[19] = 0;
+    data[20] = 0;
+    data[21] = 0;
+
+    for (i = 0; i < 23; ++i) {
+        *pCRC32 = drflac_crc32_byte(*pCRC32, data[i]);
+    }
+
+
+    if (onRead(pUserData, pHeader->segmentTable, pHeader->segmentCount) != pHeader->segmentCount) {
+        return DRFLAC_AT_END;
+    }
+    *pBytesRead += pHeader->segmentCount;
+
+    for (i = 0; i < pHeader->segmentCount; ++i) {
+        *pCRC32 = drflac_crc32_byte(*pCRC32, pHeader->segmentTable[i]);
+    }
+
+    return DRFLAC_SUCCESS;
+}
+
+static drflac_result drflac_ogg__read_page_header(drflac_read_proc onRead, void* pUserData, drflac_ogg_page_header* pHeader, drflac_uint32* pBytesRead, drflac_uint32* pCRC32)
+{
+    drflac_uint8 id[4];
+
+    *pBytesRead = 0;
+
+    if (onRead(pUserData, id, 4) != 4) {
+        return DRFLAC_AT_END;
+    }
+    *pBytesRead += 4;
+
+    /* We need to read byte-by-byte until we find the OggS capture pattern. */
+    for (;;) {
+        if (drflac_ogg__is_capture_pattern(id)) {
+            drflac_result result;
+
+            *pCRC32 = DRFLAC_OGG_CAPTURE_PATTERN_CRC32;
+
+            result = drflac_ogg__read_page_header_after_capture_pattern(onRead, pUserData, pHeader, pBytesRead, pCRC32);
+            if (result == DRFLAC_SUCCESS) {
+                return DRFLAC_SUCCESS;
+            } else {
+                if (result == DRFLAC_CRC_MISMATCH) {
+                    continue;
+                } else {
+                    return result;
+                }
+            }
+        } else {
+            /* The first 4 bytes did not equal the capture pattern. Read the next byte and try again. */
+            id[0] = id[1];
+            id[1] = id[2];
+            id[2] = id[3];
+            if (onRead(pUserData, &id[3], 1) != 1) {
+                return DRFLAC_AT_END;
+            }
+            *pBytesRead += 1;
+        }
+    }
+}
+
+
+/*
+The main part of the Ogg encapsulation is the conversion from the physical Ogg bitstream to the native FLAC bitstream. It works
+in three general stages: Ogg Physical Bitstream -> Ogg/FLAC Logical Bitstream -> FLAC Native Bitstream. dr_flac is designed
+in such a way that the core sections assume everything is delivered in native format. Therefore, for each encapsulation type
+dr_flac is supporting there needs to be a layer sitting on top of the onRead and onSeek callbacks that ensures the bits read from
+the physical Ogg bitstream are converted and delivered in native FLAC format.
+*/
+typedef struct
+{
+    drflac_read_proc onRead;                /* The original onRead callback from drflac_open() and family. */
+    drflac_seek_proc onSeek;                /* The original onSeek callback from drflac_open() and family. */
+    void* pUserData;                        /* The user data passed on onRead and onSeek. This is the user data that was passed on drflac_open() and family. */
+    drflac_uint64 currentBytePos;           /* The position of the byte we are sitting on in the physical byte stream. Used for efficient seeking. */
+    drflac_uint64 firstBytePos;             /* The position of the first byte in the physical bitstream. Points to the start of the "OggS" identifier of the FLAC bos page. */
+    drflac_uint32 serialNumber;             /* The serial number of the FLAC audio pages. This is determined by the initial header page that was read during initialization. */
+    drflac_ogg_page_header bosPageHeader;   /* Used for seeking. */
+    drflac_ogg_page_header currentPageHeader;
+    drflac_uint32 bytesRemainingInPage;
+    drflac_uint32 pageDataSize;
+    drflac_uint8 pageData[DRFLAC_OGG_MAX_PAGE_SIZE];
+} drflac_oggbs; /* oggbs = Ogg Bitstream */
+
+static size_t drflac_oggbs__read_physical(drflac_oggbs* oggbs, void* bufferOut, size_t bytesToRead)
+{
+    size_t bytesActuallyRead = oggbs->onRead(oggbs->pUserData, bufferOut, bytesToRead);
+    oggbs->currentBytePos += bytesActuallyRead;
+
+    return bytesActuallyRead;
+}
+
+static drflac_bool32 drflac_oggbs__seek_physical(drflac_oggbs* oggbs, drflac_uint64 offset, drflac_seek_origin origin)
+{
+    if (origin == drflac_seek_origin_start) {
+        if (offset <= 0x7FFFFFFF) {
+            if (!oggbs->onSeek(oggbs->pUserData, (int)offset, drflac_seek_origin_start)) {
+                return DRFLAC_FALSE;
+            }
+            oggbs->currentBytePos = offset;
+
+            return DRFLAC_TRUE;
+        } else {
+            if (!oggbs->onSeek(oggbs->pUserData, 0x7FFFFFFF, drflac_seek_origin_start)) {
+                return DRFLAC_FALSE;
+            }
+            oggbs->currentBytePos = offset;
+
+            return drflac_oggbs__seek_physical(oggbs, offset - 0x7FFFFFFF, drflac_seek_origin_current);
+        }
+    } else {
+        while (offset > 0x7FFFFFFF) {
+            if (!oggbs->onSeek(oggbs->pUserData, 0x7FFFFFFF, drflac_seek_origin_current)) {
+                return DRFLAC_FALSE;
+            }
+            oggbs->currentBytePos += 0x7FFFFFFF;
+            offset -= 0x7FFFFFFF;
+        }
+
+        if (!oggbs->onSeek(oggbs->pUserData, (int)offset, drflac_seek_origin_current)) {    /* <-- Safe cast thanks to the loop above. */
+            return DRFLAC_FALSE;
+        }
+        oggbs->currentBytePos += offset;
+
+        return DRFLAC_TRUE;
+    }
+}
+
+static drflac_bool32 drflac_oggbs__goto_next_page(drflac_oggbs* oggbs, drflac_ogg_crc_mismatch_recovery recoveryMethod)
+{
+    drflac_ogg_page_header header;
+    for (;;) {
+        drflac_uint32 crc32 = 0;
+        drflac_uint32 bytesRead;
+        drflac_uint32 pageBodySize;
+#ifndef DR_FLAC_NO_CRC
+        drflac_uint32 actualCRC32;
+#endif
+
+        if (drflac_ogg__read_page_header(oggbs->onRead, oggbs->pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) {
+            return DRFLAC_FALSE;
+        }
+        oggbs->currentBytePos += bytesRead;
+
+        pageBodySize = drflac_ogg__get_page_body_size(&header);
+        if (pageBodySize > DRFLAC_OGG_MAX_PAGE_SIZE) {
+            continue;   /* Invalid page size. Assume it's corrupted and just move to the next page. */
+        }
+
+        if (header.serialNumber != oggbs->serialNumber) {
+            /* It's not a FLAC page. Skip it. */
+            if (pageBodySize > 0 && !drflac_oggbs__seek_physical(oggbs, pageBodySize, drflac_seek_origin_current)) {
+                return DRFLAC_FALSE;
+            }
+            continue;
+        }
+
+
+        /* We need to read the entire page and then do a CRC check on it. If there's a CRC mismatch we need to skip this page. */
+        if (drflac_oggbs__read_physical(oggbs, oggbs->pageData, pageBodySize) != pageBodySize) {
+            return DRFLAC_FALSE;
+        }
+        oggbs->pageDataSize = pageBodySize;
+
+#ifndef DR_FLAC_NO_CRC
+        actualCRC32 = drflac_crc32_buffer(crc32, oggbs->pageData, oggbs->pageDataSize);
+        if (actualCRC32 != header.checksum) {
+            if (recoveryMethod == drflac_ogg_recover_on_crc_mismatch) {
+                continue;   /* CRC mismatch. Skip this page. */
+            } else {
+                /*
+                Even though we are failing on a CRC mismatch, we still want our stream to be in a good state. Therefore we
+                go to the next valid page to ensure we're in a good state, but return false to let the caller know that the
+                seek did not fully complete.
+                */
+                drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch);
+                return DRFLAC_FALSE;
+            }
+        }
+#else
+        (void)recoveryMethod;   /* <-- Silence a warning. */
+#endif
+
+        oggbs->currentPageHeader = header;
+        oggbs->bytesRemainingInPage = pageBodySize;
+        return DRFLAC_TRUE;
+    }
+}
+
+/* Function below is unused at the moment, but I might be re-adding it later. */
+#if 0
+static drflac_uint8 drflac_oggbs__get_current_segment_index(drflac_oggbs* oggbs, drflac_uint8* pBytesRemainingInSeg)
+{
+    drflac_uint32 bytesConsumedInPage = drflac_ogg__get_page_body_size(&oggbs->currentPageHeader) - oggbs->bytesRemainingInPage;
+    drflac_uint8 iSeg = 0;
+    drflac_uint32 iByte = 0;
+    while (iByte < bytesConsumedInPage) {
+        drflac_uint8 segmentSize = oggbs->currentPageHeader.segmentTable[iSeg];
+        if (iByte + segmentSize > bytesConsumedInPage) {
+            break;
+        } else {
+            iSeg += 1;
+            iByte += segmentSize;
+        }
+    }
+
+    *pBytesRemainingInSeg = oggbs->currentPageHeader.segmentTable[iSeg] - (drflac_uint8)(bytesConsumedInPage - iByte);
+    return iSeg;
+}
+
+static drflac_bool32 drflac_oggbs__seek_to_next_packet(drflac_oggbs* oggbs)
+{
+    /* The current packet ends when we get to the segment with a lacing value of < 255 which is not at the end of a page. */
+    for (;;) {
+        drflac_bool32 atEndOfPage = DRFLAC_FALSE;
+
+        drflac_uint8 bytesRemainingInSeg;
+        drflac_uint8 iFirstSeg = drflac_oggbs__get_current_segment_index(oggbs, &bytesRemainingInSeg);
+
+        drflac_uint32 bytesToEndOfPacketOrPage = bytesRemainingInSeg;
+        for (drflac_uint8 iSeg = iFirstSeg; iSeg < oggbs->currentPageHeader.segmentCount; ++iSeg) {
+            drflac_uint8 segmentSize = oggbs->currentPageHeader.segmentTable[iSeg];
+            if (segmentSize < 255) {
+                if (iSeg == oggbs->currentPageHeader.segmentCount-1) {
+                    atEndOfPage = DRFLAC_TRUE;
+                }
+
+                break;
+            }
+
+            bytesToEndOfPacketOrPage += segmentSize;
+        }
+
+        /*
+        At this point we will have found either the packet or the end of the page. If were at the end of the page we'll
+        want to load the next page and keep searching for the end of the packet.
+        */
+        drflac_oggbs__seek_physical(oggbs, bytesToEndOfPacketOrPage, drflac_seek_origin_current);
+        oggbs->bytesRemainingInPage -= bytesToEndOfPacketOrPage;
+
+        if (atEndOfPage) {
+            /*
+            We're potentially at the next packet, but we need to check the next page first to be sure because the packet may
+            straddle pages.
+            */
+            if (!drflac_oggbs__goto_next_page(oggbs)) {
+                return DRFLAC_FALSE;
+            }
+
+            /* If it's a fresh packet it most likely means we're at the next packet. */
+            if ((oggbs->currentPageHeader.headerType & 0x01) == 0) {
+                return DRFLAC_TRUE;
+            }
+        } else {
+            /* We're at the next packet. */
+            return DRFLAC_TRUE;
+        }
+    }
+}
+
+static drflac_bool32 drflac_oggbs__seek_to_next_frame(drflac_oggbs* oggbs)
+{
+    /* The bitstream should be sitting on the first byte just after the header of the frame. */
+
+    /* What we're actually doing here is seeking to the start of the next packet. */
+    return drflac_oggbs__seek_to_next_packet(oggbs);
+}
+#endif
+
+static size_t drflac__on_read_ogg(void* pUserData, void* bufferOut, size_t bytesToRead)
+{
+    drflac_oggbs* oggbs = (drflac_oggbs*)pUserData;
+    drflac_uint8* pRunningBufferOut = (drflac_uint8*)bufferOut;
+    size_t bytesRead = 0;
+
+    DRFLAC_ASSERT(oggbs != NULL);
+    DRFLAC_ASSERT(pRunningBufferOut != NULL);
+
+    /* Reading is done page-by-page. If we've run out of bytes in the page we need to move to the next one. */
+    while (bytesRead < bytesToRead) {
+        size_t bytesRemainingToRead = bytesToRead - bytesRead;
+
+        if (oggbs->bytesRemainingInPage >= bytesRemainingToRead) {
+            DRFLAC_COPY_MEMORY(pRunningBufferOut, oggbs->pageData + (oggbs->pageDataSize - oggbs->bytesRemainingInPage), bytesRemainingToRead);
+            bytesRead += bytesRemainingToRead;
+            oggbs->bytesRemainingInPage -= (drflac_uint32)bytesRemainingToRead;
+            break;
+        }
+
+        /* If we get here it means some of the requested data is contained in the next pages. */
+        if (oggbs->bytesRemainingInPage > 0) {
+            DRFLAC_COPY_MEMORY(pRunningBufferOut, oggbs->pageData + (oggbs->pageDataSize - oggbs->bytesRemainingInPage), oggbs->bytesRemainingInPage);
+            bytesRead += oggbs->bytesRemainingInPage;
+            pRunningBufferOut += oggbs->bytesRemainingInPage;
+            oggbs->bytesRemainingInPage = 0;
+        }
+
+        DRFLAC_ASSERT(bytesRemainingToRead > 0);
+        if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) {
+            break;  /* Failed to go to the next page. Might have simply hit the end of the stream. */
+        }
+    }
+
+    return bytesRead;
+}
+
+static drflac_bool32 drflac__on_seek_ogg(void* pUserData, int offset, drflac_seek_origin origin)
+{
+    drflac_oggbs* oggbs = (drflac_oggbs*)pUserData;
+    int bytesSeeked = 0;
+
+    DRFLAC_ASSERT(oggbs != NULL);
+    DRFLAC_ASSERT(offset >= 0);  /* <-- Never seek backwards. */
+
+    /* Seeking is always forward which makes things a lot simpler. */
+    if (origin == drflac_seek_origin_start) {
+        if (!drflac_oggbs__seek_physical(oggbs, (int)oggbs->firstBytePos, drflac_seek_origin_start)) {
+            return DRFLAC_FALSE;
+        }
+
+        if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_fail_on_crc_mismatch)) {
+            return DRFLAC_FALSE;
+        }
+
+        return drflac__on_seek_ogg(pUserData, offset, drflac_seek_origin_current);
+    }
+
+    DRFLAC_ASSERT(origin == drflac_seek_origin_current);
+
+    while (bytesSeeked < offset) {
+        int bytesRemainingToSeek = offset - bytesSeeked;
+        DRFLAC_ASSERT(bytesRemainingToSeek >= 0);
+
+        if (oggbs->bytesRemainingInPage >= (size_t)bytesRemainingToSeek) {
+            bytesSeeked += bytesRemainingToSeek;
+            (void)bytesSeeked;  /* <-- Silence a dead store warning emitted by Clang Static Analyzer. */
+            oggbs->bytesRemainingInPage -= bytesRemainingToSeek;
+            break;
+        }
+
+        /* If we get here it means some of the requested data is contained in the next pages. */
+        if (oggbs->bytesRemainingInPage > 0) {
+            bytesSeeked += (int)oggbs->bytesRemainingInPage;
+            oggbs->bytesRemainingInPage = 0;
+        }
+
+        DRFLAC_ASSERT(bytesRemainingToSeek > 0);
+        if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_fail_on_crc_mismatch)) {
+            /* Failed to go to the next page. We either hit the end of the stream or had a CRC mismatch. */
+            return DRFLAC_FALSE;
+        }
+    }
+
+    return DRFLAC_TRUE;
+}
+
+
+static drflac_bool32 drflac_ogg__seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex)
+{
+    drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs;
+    drflac_uint64 originalBytePos;
+    drflac_uint64 runningGranulePosition;
+    drflac_uint64 runningFrameBytePos;
+    drflac_uint64 runningPCMFrameCount;
+
+    DRFLAC_ASSERT(oggbs != NULL);
+
+    originalBytePos = oggbs->currentBytePos;   /* For recovery. Points to the OggS identifier. */
+
+    /* First seek to the first frame. */
+    if (!drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes)) {
+        return DRFLAC_FALSE;
+    }
+    oggbs->bytesRemainingInPage = 0;
+
+    runningGranulePosition = 0;
+    for (;;) {
+        if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) {
+            drflac_oggbs__seek_physical(oggbs, originalBytePos, drflac_seek_origin_start);
+            return DRFLAC_FALSE;   /* Never did find that sample... */
+        }
+
+        runningFrameBytePos = oggbs->currentBytePos - drflac_ogg__get_page_header_size(&oggbs->currentPageHeader) - oggbs->pageDataSize;
+        if (oggbs->currentPageHeader.granulePosition >= pcmFrameIndex) {
+            break; /* The sample is somewhere in the previous page. */
+        }
+
+        /*
+        At this point we know the sample is not in the previous page. It could possibly be in this page. For simplicity we
+        disregard any pages that do not begin a fresh packet.
+        */
+        if ((oggbs->currentPageHeader.headerType & 0x01) == 0) {    /* <-- Is it a fresh page? */
+            if (oggbs->currentPageHeader.segmentTable[0] >= 2) {
+                drflac_uint8 firstBytesInPage[2];
+                firstBytesInPage[0] = oggbs->pageData[0];
+                firstBytesInPage[1] = oggbs->pageData[1];
+
+                if ((firstBytesInPage[0] == 0xFF) && (firstBytesInPage[1] & 0xFC) == 0xF8) {    /* <-- Does the page begin with a frame's sync code? */
+                    runningGranulePosition = oggbs->currentPageHeader.granulePosition;
+                }
+
+                continue;
+            }
+        }
+    }
+
+    /*
+    We found the page that that is closest to the sample, so now we need to find it. The first thing to do is seek to the
+    start of that page. In the loop above we checked that it was a fresh page which means this page is also the start of
+    a new frame. This property means that after we've seeked to the page we can immediately start looping over frames until
+    we find the one containing the target sample.
+    */
+    if (!drflac_oggbs__seek_physical(oggbs, runningFrameBytePos, drflac_seek_origin_start)) {
+        return DRFLAC_FALSE;
+    }
+    if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) {
+        return DRFLAC_FALSE;
+    }
+
+    /*
+    At this point we'll be sitting on the first byte of the frame header of the first frame in the page. We just keep
+    looping over these frames until we find the one containing the sample we're after.
+    */
+    runningPCMFrameCount = runningGranulePosition;
+    for (;;) {
+        /*
+        There are two ways to find the sample and seek past irrelevant frames:
+          1) Use the native FLAC decoder.
+          2) Use Ogg's framing system.
+
+        Both of these options have their own pros and cons. Using the native FLAC decoder is slower because it needs to
+        do a full decode of the frame. Using Ogg's framing system is faster, but more complicated and involves some code
+        duplication for the decoding of frame headers.
+
+        Another thing to consider is that using the Ogg framing system will perform direct seeking of the physical Ogg
+        bitstream. This is important to consider because it means we cannot read data from the drflac_bs object using the
+        standard drflac__*() APIs because that will read in extra data for its own internal caching which in turn breaks
+        the positioning of the read pointer of the physical Ogg bitstream. Therefore, anything that would normally be read
+        using the native FLAC decoding APIs, such as drflac__read_next_flac_frame_header(), need to be re-implemented so as to
+        avoid the use of the drflac_bs object.
+
+        Considering these issues, I have decided to use the slower native FLAC decoding method for the following reasons:
+          1) Seeking is already partially accelerated using Ogg's paging system in the code block above.
+          2) Seeking in an Ogg encapsulated FLAC stream is probably quite uncommon.
+          3) Simplicity.
+        */
+        drflac_uint64 firstPCMFrameInFLACFrame = 0;
+        drflac_uint64 lastPCMFrameInFLACFrame = 0;
+        drflac_uint64 pcmFrameCountInThisFrame;
+
+        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+            return DRFLAC_FALSE;
+        }
+
+        drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &firstPCMFrameInFLACFrame, &lastPCMFrameInFLACFrame);
+
+        pcmFrameCountInThisFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1;
+
+        /* If we are seeking to the end of the file and we've just hit it, we're done. */
+        if (pcmFrameIndex == pFlac->totalPCMFrameCount && (runningPCMFrameCount + pcmFrameCountInThisFrame) == pFlac->totalPCMFrameCount) {
+            drflac_result result = drflac__decode_flac_frame(pFlac);
+            if (result == DRFLAC_SUCCESS) {
+                pFlac->currentPCMFrame = pcmFrameIndex;
+                pFlac->currentFLACFrame.pcmFramesRemaining = 0;
+                return DRFLAC_TRUE;
+            } else {
+                return DRFLAC_FALSE;
+            }
+        }
+
+        if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFrame)) {
+            /*
+            The sample should be in this FLAC frame. We need to fully decode it, however if it's an invalid frame (a CRC mismatch), we need to pretend
+            it never existed and keep iterating.
+            */
+            drflac_result result = drflac__decode_flac_frame(pFlac);
+            if (result == DRFLAC_SUCCESS) {
+                /* The frame is valid. We just need to skip over some samples to ensure it's sample-exact. */
+                drflac_uint64 pcmFramesToDecode = (size_t)(pcmFrameIndex - runningPCMFrameCount);    /* <-- Safe cast because the maximum number of samples in a frame is 65535. */
+                if (pcmFramesToDecode == 0) {
+                    return DRFLAC_TRUE;
+                }
+
+                pFlac->currentPCMFrame = runningPCMFrameCount;
+
+                return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode;  /* <-- If this fails, something bad has happened (it should never fail). */
+            } else {
+                if (result == DRFLAC_CRC_MISMATCH) {
+                    continue;   /* CRC mismatch. Pretend this frame never existed. */
+                } else {
+                    return DRFLAC_FALSE;
+                }
+            }
+        } else {
+            /*
+            It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this
+            frame never existed and leave the running sample count untouched.
+            */
+            drflac_result result = drflac__seek_to_next_flac_frame(pFlac);
+            if (result == DRFLAC_SUCCESS) {
+                runningPCMFrameCount += pcmFrameCountInThisFrame;
+            } else {
+                if (result == DRFLAC_CRC_MISMATCH) {
+                    continue;   /* CRC mismatch. Pretend this frame never existed. */
+                } else {
+                    return DRFLAC_FALSE;
+                }
+            }
+        }
+    }
+}
+
+
+
+static drflac_bool32 drflac__init_private__ogg(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_bool32 relaxed)
+{
+    drflac_ogg_page_header header;
+    drflac_uint32 crc32 = DRFLAC_OGG_CAPTURE_PATTERN_CRC32;
+    drflac_uint32 bytesRead = 0;
+
+    /* Pre Condition: The bit stream should be sitting just past the 4-byte OggS capture pattern. */
+    (void)relaxed;
+
+    pInit->container = drflac_container_ogg;
+    pInit->oggFirstBytePos = 0;
+
+    /*
+    We'll get here if the first 4 bytes of the stream were the OggS capture pattern, however it doesn't necessarily mean the
+    stream includes FLAC encoded audio. To check for this we need to scan the beginning-of-stream page markers and check if
+    any match the FLAC specification. Important to keep in mind that the stream may be multiplexed.
+    */
+    if (drflac_ogg__read_page_header_after_capture_pattern(onRead, pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) {
+        return DRFLAC_FALSE;
+    }
+    pInit->runningFilePos += bytesRead;
+
+    for (;;) {
+        int pageBodySize;
+
+        /* Break if we're past the beginning of stream page. */
+        if ((header.headerType & 0x02) == 0) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Check if it's a FLAC header. */
+        pageBodySize = drflac_ogg__get_page_body_size(&header);
+        if (pageBodySize == 51) {   /* 51 = the lacing value of the FLAC header packet. */
+            /* It could be a FLAC page... */
+            drflac_uint32 bytesRemainingInPage = pageBodySize;
+            drflac_uint8 packetType;
+
+            if (onRead(pUserData, &packetType, 1) != 1) {
+                return DRFLAC_FALSE;
+            }
+
+            bytesRemainingInPage -= 1;
+            if (packetType == 0x7F) {
+                /* Increasingly more likely to be a FLAC page... */
+                drflac_uint8 sig[4];
+                if (onRead(pUserData, sig, 4) != 4) {
+                    return DRFLAC_FALSE;
+                }
+
+                bytesRemainingInPage -= 4;
+                if (sig[0] == 'F' && sig[1] == 'L' && sig[2] == 'A' && sig[3] == 'C') {
+                    /* Almost certainly a FLAC page... */
+                    drflac_uint8 mappingVersion[2];
+                    if (onRead(pUserData, mappingVersion, 2) != 2) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    if (mappingVersion[0] != 1) {
+                        return DRFLAC_FALSE;   /* Only supporting version 1.x of the Ogg mapping. */
+                    }
+
+                    /*
+                    The next 2 bytes are the non-audio packets, not including this one. We don't care about this because we're going to
+                    be handling it in a generic way based on the serial number and packet types.
+                    */
+                    if (!onSeek(pUserData, 2, drflac_seek_origin_current)) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    /* Expecting the native FLAC signature "fLaC". */
+                    if (onRead(pUserData, sig, 4) != 4) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    if (sig[0] == 'f' && sig[1] == 'L' && sig[2] == 'a' && sig[3] == 'C') {
+                        /* The remaining data in the page should be the STREAMINFO block. */
+                        drflac_streaminfo streaminfo;
+                        drflac_uint8 isLastBlock;
+                        drflac_uint8 blockType;
+                        drflac_uint32 blockSize;
+                        if (!drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize)) {
+                            return DRFLAC_FALSE;
+                        }
+
+                        if (blockType != DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO || blockSize != 34) {
+                            return DRFLAC_FALSE;    /* Invalid block type. First block must be the STREAMINFO block. */
+                        }
+
+                        if (drflac__read_streaminfo(onRead, pUserData, &streaminfo)) {
+                            /* Success! */
+                            pInit->hasStreamInfoBlock      = DRFLAC_TRUE;
+                            pInit->sampleRate              = streaminfo.sampleRate;
+                            pInit->channels                = streaminfo.channels;
+                            pInit->bitsPerSample           = streaminfo.bitsPerSample;
+                            pInit->totalPCMFrameCount      = streaminfo.totalPCMFrameCount;
+                            pInit->maxBlockSizeInPCMFrames = streaminfo.maxBlockSizeInPCMFrames;
+                            pInit->hasMetadataBlocks       = !isLastBlock;
+
+                            if (onMeta) {
+                                drflac_metadata metadata;
+                                metadata.type = DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO;
+                                metadata.pRawData = NULL;
+                                metadata.rawDataSize = 0;
+                                metadata.data.streaminfo = streaminfo;
+                                onMeta(pUserDataMD, &metadata);
+                            }
+
+                            pInit->runningFilePos  += pageBodySize;
+                            pInit->oggFirstBytePos  = pInit->runningFilePos - 79;   /* Subtracting 79 will place us right on top of the "OggS" identifier of the FLAC bos page. */
+                            pInit->oggSerial        = header.serialNumber;
+                            pInit->oggBosHeader     = header;
+                            break;
+                        } else {
+                            /* Failed to read STREAMINFO block. Aww, so close... */
+                            return DRFLAC_FALSE;
+                        }
+                    } else {
+                        /* Invalid file. */
+                        return DRFLAC_FALSE;
+                    }
+                } else {
+                    /* Not a FLAC header. Skip it. */
+                    if (!onSeek(pUserData, bytesRemainingInPage, drflac_seek_origin_current)) {
+                        return DRFLAC_FALSE;
+                    }
+                }
+            } else {
+                /* Not a FLAC header. Seek past the entire page and move on to the next. */
+                if (!onSeek(pUserData, bytesRemainingInPage, drflac_seek_origin_current)) {
+                    return DRFLAC_FALSE;
+                }
+            }
+        } else {
+            if (!onSeek(pUserData, pageBodySize, drflac_seek_origin_current)) {
+                return DRFLAC_FALSE;
+            }
+        }
+
+        pInit->runningFilePos += pageBodySize;
+
+
+        /* Read the header of the next page. */
+        if (drflac_ogg__read_page_header(onRead, pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) {
+            return DRFLAC_FALSE;
+        }
+        pInit->runningFilePos += bytesRead;
+    }
+
+    /*
+    If we get here it means we found a FLAC audio stream. We should be sitting on the first byte of the header of the next page. The next
+    packets in the FLAC logical stream contain the metadata. The only thing left to do in the initialization phase for Ogg is to create the
+    Ogg bistream object.
+    */
+    pInit->hasMetadataBlocks = DRFLAC_TRUE;    /* <-- Always have at least VORBIS_COMMENT metadata block. */
+    return DRFLAC_TRUE;
+}
+#endif
+
+static drflac_bool32 drflac__init_private(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, void* pUserDataMD)
+{
+    drflac_bool32 relaxed;
+    drflac_uint8 id[4];
+
+    if (pInit == NULL || onRead == NULL || onSeek == NULL) {
+        return DRFLAC_FALSE;
+    }
+
+    DRFLAC_ZERO_MEMORY(pInit, sizeof(*pInit));
+    pInit->onRead       = onRead;
+    pInit->onSeek       = onSeek;
+    pInit->onMeta       = onMeta;
+    pInit->container    = container;
+    pInit->pUserData    = pUserData;
+    pInit->pUserDataMD  = pUserDataMD;
+
+    pInit->bs.onRead    = onRead;
+    pInit->bs.onSeek    = onSeek;
+    pInit->bs.pUserData = pUserData;
+    drflac__reset_cache(&pInit->bs);
+
+
+    /* If the container is explicitly defined then we can try opening in relaxed mode. */
+    relaxed = container != drflac_container_unknown;
+
+    /* Skip over any ID3 tags. */
+    for (;;) {
+        if (onRead(pUserData, id, 4) != 4) {
+            return DRFLAC_FALSE;    /* Ran out of data. */
+        }
+        pInit->runningFilePos += 4;
+
+        if (id[0] == 'I' && id[1] == 'D' && id[2] == '3') {
+            drflac_uint8 header[6];
+            drflac_uint8 flags;
+            drflac_uint32 headerSize;
+
+            if (onRead(pUserData, header, 6) != 6) {
+                return DRFLAC_FALSE;    /* Ran out of data. */
+            }
+            pInit->runningFilePos += 6;
+
+            flags = header[1];
+
+            DRFLAC_COPY_MEMORY(&headerSize, header+2, 4);
+            headerSize = drflac__unsynchsafe_32(drflac__be2host_32(headerSize));
+            if (flags & 0x10) {
+                headerSize += 10;
+            }
+
+            if (!onSeek(pUserData, headerSize, drflac_seek_origin_current)) {
+                return DRFLAC_FALSE;    /* Failed to seek past the tag. */
+            }
+            pInit->runningFilePos += headerSize;
+        } else {
+            break;
+        }
+    }
+
+    if (id[0] == 'f' && id[1] == 'L' && id[2] == 'a' && id[3] == 'C') {
+        return drflac__init_private__native(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed);
+    }
+#ifndef DR_FLAC_NO_OGG
+    if (id[0] == 'O' && id[1] == 'g' && id[2] == 'g' && id[3] == 'S') {
+        return drflac__init_private__ogg(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed);
+    }
+#endif
+
+    /* If we get here it means we likely don't have a header. Try opening in relaxed mode, if applicable. */
+    if (relaxed) {
+        if (container == drflac_container_native) {
+            return drflac__init_private__native(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed);
+        }
+#ifndef DR_FLAC_NO_OGG
+        if (container == drflac_container_ogg) {
+            return drflac__init_private__ogg(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed);
+        }
+#endif
+    }
+
+    /* Unsupported container. */
+    return DRFLAC_FALSE;
+}
+
+static void drflac__init_from_info(drflac* pFlac, const drflac_init_info* pInit)
+{
+    DRFLAC_ASSERT(pFlac != NULL);
+    DRFLAC_ASSERT(pInit != NULL);
+
+    DRFLAC_ZERO_MEMORY(pFlac, sizeof(*pFlac));
+    pFlac->bs                      = pInit->bs;
+    pFlac->onMeta                  = pInit->onMeta;
+    pFlac->pUserDataMD             = pInit->pUserDataMD;
+    pFlac->maxBlockSizeInPCMFrames = pInit->maxBlockSizeInPCMFrames;
+    pFlac->sampleRate              = pInit->sampleRate;
+    pFlac->channels                = (drflac_uint8)pInit->channels;
+    pFlac->bitsPerSample           = (drflac_uint8)pInit->bitsPerSample;
+    pFlac->totalPCMFrameCount      = pInit->totalPCMFrameCount;
+    pFlac->container               = pInit->container;
+}
+
+
+static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, void* pUserDataMD, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac_init_info init;
+    drflac_uint32 allocationSize;
+    drflac_uint32 wholeSIMDVectorCountPerChannel;
+    drflac_uint32 decodedSamplesAllocationSize;
+#ifndef DR_FLAC_NO_OGG
+    drflac_oggbs oggbs;
+#endif
+    drflac_uint64 firstFramePos;
+    drflac_uint64 seektablePos;
+    drflac_uint32 seektableSize;
+    drflac_allocation_callbacks allocationCallbacks;
+    drflac* pFlac;
+
+    /* CPU support first. */
+    drflac__init_cpu_caps();
+
+    if (!drflac__init_private(&init, onRead, onSeek, onMeta, container, pUserData, pUserDataMD)) {
+        return NULL;
+    }
+
+    if (pAllocationCallbacks != NULL) {
+        allocationCallbacks = *pAllocationCallbacks;
+        if (allocationCallbacks.onFree == NULL || (allocationCallbacks.onMalloc == NULL && allocationCallbacks.onRealloc == NULL)) {
+            return NULL;    /* Invalid allocation callbacks. */
+        }
+    } else {
+        allocationCallbacks.pUserData = NULL;
+        allocationCallbacks.onMalloc  = drflac__malloc_default;
+        allocationCallbacks.onRealloc = drflac__realloc_default;
+        allocationCallbacks.onFree    = drflac__free_default;
+    }
+
+
+    /*
+    The size of the allocation for the drflac object needs to be large enough to fit the following:
+      1) The main members of the drflac structure
+      2) A block of memory large enough to store the decoded samples of the largest frame in the stream
+      3) If the container is Ogg, a drflac_oggbs object
+
+    The complicated part of the allocation is making sure there's enough room the decoded samples, taking into consideration
+    the different SIMD instruction sets.
+    */
+    allocationSize = sizeof(drflac);
+
+    /*
+    The allocation size for decoded frames depends on the number of 32-bit integers that fit inside the largest SIMD vector
+    we are supporting.
+    */
+    if ((init.maxBlockSizeInPCMFrames % (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) == 0) {
+        wholeSIMDVectorCountPerChannel = (init.maxBlockSizeInPCMFrames / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32)));
+    } else {
+        wholeSIMDVectorCountPerChannel = (init.maxBlockSizeInPCMFrames / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) + 1;
+    }
+
+    decodedSamplesAllocationSize = wholeSIMDVectorCountPerChannel * DRFLAC_MAX_SIMD_VECTOR_SIZE * init.channels;
+
+    allocationSize += decodedSamplesAllocationSize;
+    allocationSize += DRFLAC_MAX_SIMD_VECTOR_SIZE;  /* Allocate extra bytes to ensure we have enough for alignment. */
+
+#ifndef DR_FLAC_NO_OGG
+    /* There's additional data required for Ogg streams. */
+    if (init.container == drflac_container_ogg) {
+        allocationSize += sizeof(drflac_oggbs);
+    }
+
+    DRFLAC_ZERO_MEMORY(&oggbs, sizeof(oggbs));
+    if (init.container == drflac_container_ogg) {
+        oggbs.onRead = onRead;
+        oggbs.onSeek = onSeek;
+        oggbs.pUserData = pUserData;
+        oggbs.currentBytePos = init.oggFirstBytePos;
+        oggbs.firstBytePos = init.oggFirstBytePos;
+        oggbs.serialNumber = init.oggSerial;
+        oggbs.bosPageHeader = init.oggBosHeader;
+        oggbs.bytesRemainingInPage = 0;
+    }
+#endif
+
+    /*
+    This part is a bit awkward. We need to load the seektable so that it can be referenced in-memory, but I want the drflac object to
+    consist of only a single heap allocation. To this, the size of the seek table needs to be known, which we determine when reading
+    and decoding the metadata.
+    */
+    firstFramePos = 42;   /* <-- We know we are at byte 42 at this point. */
+    seektablePos  = 0;
+    seektableSize = 0;
+    if (init.hasMetadataBlocks) {
+        drflac_read_proc onReadOverride = onRead;
+        drflac_seek_proc onSeekOverride = onSeek;
+        void* pUserDataOverride = pUserData;
+
+#ifndef DR_FLAC_NO_OGG
+        if (init.container == drflac_container_ogg) {
+            onReadOverride = drflac__on_read_ogg;
+            onSeekOverride = drflac__on_seek_ogg;
+            pUserDataOverride = (void*)&oggbs;
+        }
+#endif
+
+        if (!drflac__read_and_decode_metadata(onReadOverride, onSeekOverride, onMeta, pUserDataOverride, pUserDataMD, &firstFramePos, &seektablePos, &seektableSize, &allocationCallbacks)) {
+            return NULL;
+        }
+
+        allocationSize += seektableSize;
+    }
+
+
+    pFlac = (drflac*)drflac__malloc_from_callbacks(allocationSize, &allocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    drflac__init_from_info(pFlac, &init);
+    pFlac->allocationCallbacks = allocationCallbacks;
+    pFlac->pDecodedSamples = (drflac_int32*)drflac_align((size_t)pFlac->pExtraData, DRFLAC_MAX_SIMD_VECTOR_SIZE);
+
+#ifndef DR_FLAC_NO_OGG
+    if (init.container == drflac_container_ogg) {
+        drflac_oggbs* pInternalOggbs = (drflac_oggbs*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize + seektableSize);
+        *pInternalOggbs = oggbs;
+
+        /* The Ogg bistream needs to be layered on top of the original bitstream. */
+        pFlac->bs.onRead = drflac__on_read_ogg;
+        pFlac->bs.onSeek = drflac__on_seek_ogg;
+        pFlac->bs.pUserData = (void*)pInternalOggbs;
+        pFlac->_oggbs = (void*)pInternalOggbs;
+    }
+#endif
+
+    pFlac->firstFLACFramePosInBytes = firstFramePos;
+
+    /* NOTE: Seektables are not currently compatible with Ogg encapsulation (Ogg has its own accelerated seeking system). I may change this later, so I'm leaving this here for now. */
+#ifndef DR_FLAC_NO_OGG
+    if (init.container == drflac_container_ogg)
+    {
+        pFlac->pSeekpoints = NULL;
+        pFlac->seekpointCount = 0;
+    }
+    else
+#endif
+    {
+        /* If we have a seektable we need to load it now, making sure we move back to where we were previously. */
+        if (seektablePos != 0) {
+            pFlac->seekpointCount = seektableSize / sizeof(*pFlac->pSeekpoints);
+            pFlac->pSeekpoints = (drflac_seekpoint*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize);
+
+            DRFLAC_ASSERT(pFlac->bs.onSeek != NULL);
+            DRFLAC_ASSERT(pFlac->bs.onRead != NULL);
+
+            /* Seek to the seektable, then just read directly into our seektable buffer. */
+            if (pFlac->bs.onSeek(pFlac->bs.pUserData, (int)seektablePos, drflac_seek_origin_start)) {
+                if (pFlac->bs.onRead(pFlac->bs.pUserData, pFlac->pSeekpoints, seektableSize) == seektableSize) {
+                    /* Endian swap. */
+                    drflac_uint32 iSeekpoint;
+                    for (iSeekpoint = 0; iSeekpoint < pFlac->seekpointCount; ++iSeekpoint) {
+                        pFlac->pSeekpoints[iSeekpoint].firstPCMFrame   = drflac__be2host_64(pFlac->pSeekpoints[iSeekpoint].firstPCMFrame);
+                        pFlac->pSeekpoints[iSeekpoint].flacFrameOffset = drflac__be2host_64(pFlac->pSeekpoints[iSeekpoint].flacFrameOffset);
+                        pFlac->pSeekpoints[iSeekpoint].pcmFrameCount   = drflac__be2host_16(pFlac->pSeekpoints[iSeekpoint].pcmFrameCount);
+                    }
+                } else {
+                    /* Failed to read the seektable. Pretend we don't have one. */
+                    pFlac->pSeekpoints = NULL;
+                    pFlac->seekpointCount = 0;
+                }
+
+                /* We need to seek back to where we were. If this fails it's a critical error. */
+                if (!pFlac->bs.onSeek(pFlac->bs.pUserData, (int)pFlac->firstFLACFramePosInBytes, drflac_seek_origin_start)) {
+                    drflac__free_from_callbacks(pFlac, &allocationCallbacks);
+                    return NULL;
+                }
+            } else {
+                /* Failed to seek to the seektable. Ominous sign, but for now we can just pretend we don't have one. */
+                pFlac->pSeekpoints = NULL;
+                pFlac->seekpointCount = 0;
+            }
+        }
+    }
+
+
+    /*
+    If we get here, but don't have a STREAMINFO block, it means we've opened the stream in relaxed mode and need to decode
+    the first frame.
+    */
+    if (!init.hasStreamInfoBlock) {
+        pFlac->currentFLACFrame.header = init.firstFrameHeader;
+        for (;;) {
+            drflac_result result = drflac__decode_flac_frame(pFlac);
+            if (result == DRFLAC_SUCCESS) {
+                break;
+            } else {
+                if (result == DRFLAC_CRC_MISMATCH) {
+                    if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+                        drflac__free_from_callbacks(pFlac, &allocationCallbacks);
+                        return NULL;
+                    }
+                    continue;
+                } else {
+                    drflac__free_from_callbacks(pFlac, &allocationCallbacks);
+                    return NULL;
+                }
+            }
+        }
+    }
+
+    return pFlac;
+}
+
+
+
+#ifndef DR_FLAC_NO_STDIO
+#include <stdio.h>
+#include <wchar.h>      /* For wcslen(), wcsrtombs() */
+
+/* drflac_result_from_errno() is only used for fopen() and wfopen() so putting it inside DR_WAV_NO_STDIO for now. If something else needs this later we can move it out. */
+#include <errno.h>
+static drflac_result drflac_result_from_errno(int e)
+{
+    switch (e)
+    {
+        case 0: return DRFLAC_SUCCESS;
+    #ifdef EPERM
+        case EPERM: return DRFLAC_INVALID_OPERATION;
+    #endif
+    #ifdef ENOENT
+        case ENOENT: return DRFLAC_DOES_NOT_EXIST;
+    #endif
+    #ifdef ESRCH
+        case ESRCH: return DRFLAC_DOES_NOT_EXIST;
+    #endif
+    #ifdef EINTR
+        case EINTR: return DRFLAC_INTERRUPT;
+    #endif
+    #ifdef EIO
+        case EIO: return DRFLAC_IO_ERROR;
+    #endif
+    #ifdef ENXIO
+        case ENXIO: return DRFLAC_DOES_NOT_EXIST;
+    #endif
+    #ifdef E2BIG
+        case E2BIG: return DRFLAC_INVALID_ARGS;
+    #endif
+    #ifdef ENOEXEC
+        case ENOEXEC: return DRFLAC_INVALID_FILE;
+    #endif
+    #ifdef EBADF
+        case EBADF: return DRFLAC_INVALID_FILE;
+    #endif
+    #ifdef ECHILD
+        case ECHILD: return DRFLAC_ERROR;
+    #endif
+    #ifdef EAGAIN
+        case EAGAIN: return DRFLAC_UNAVAILABLE;
+    #endif
+    #ifdef ENOMEM
+        case ENOMEM: return DRFLAC_OUT_OF_MEMORY;
+    #endif
+    #ifdef EACCES
+        case EACCES: return DRFLAC_ACCESS_DENIED;
+    #endif
+    #ifdef EFAULT
+        case EFAULT: return DRFLAC_BAD_ADDRESS;
+    #endif
+    #ifdef ENOTBLK
+        case ENOTBLK: return DRFLAC_ERROR;
+    #endif
+    #ifdef EBUSY
+        case EBUSY: return DRFLAC_BUSY;
+    #endif
+    #ifdef EEXIST
+        case EEXIST: return DRFLAC_ALREADY_EXISTS;
+    #endif
+    #ifdef EXDEV
+        case EXDEV: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENODEV
+        case ENODEV: return DRFLAC_DOES_NOT_EXIST;
+    #endif
+    #ifdef ENOTDIR
+        case ENOTDIR: return DRFLAC_NOT_DIRECTORY;
+    #endif
+    #ifdef EISDIR
+        case EISDIR: return DRFLAC_IS_DIRECTORY;
+    #endif
+    #ifdef EINVAL
+        case EINVAL: return DRFLAC_INVALID_ARGS;
+    #endif
+    #ifdef ENFILE
+        case ENFILE: return DRFLAC_TOO_MANY_OPEN_FILES;
+    #endif
+    #ifdef EMFILE
+        case EMFILE: return DRFLAC_TOO_MANY_OPEN_FILES;
+    #endif
+    #ifdef ENOTTY
+        case ENOTTY: return DRFLAC_INVALID_OPERATION;
+    #endif
+    #ifdef ETXTBSY
+        case ETXTBSY: return DRFLAC_BUSY;
+    #endif
+    #ifdef EFBIG
+        case EFBIG: return DRFLAC_TOO_BIG;
+    #endif
+    #ifdef ENOSPC
+        case ENOSPC: return DRFLAC_NO_SPACE;
+    #endif
+    #ifdef ESPIPE
+        case ESPIPE: return DRFLAC_BAD_SEEK;
+    #endif
+    #ifdef EROFS
+        case EROFS: return DRFLAC_ACCESS_DENIED;
+    #endif
+    #ifdef EMLINK
+        case EMLINK: return DRFLAC_TOO_MANY_LINKS;
+    #endif
+    #ifdef EPIPE
+        case EPIPE: return DRFLAC_BAD_PIPE;
+    #endif
+    #ifdef EDOM
+        case EDOM: return DRFLAC_OUT_OF_RANGE;
+    #endif
+    #ifdef ERANGE
+        case ERANGE: return DRFLAC_OUT_OF_RANGE;
+    #endif
+    #ifdef EDEADLK
+        case EDEADLK: return DRFLAC_DEADLOCK;
+    #endif
+    #ifdef ENAMETOOLONG
+        case ENAMETOOLONG: return DRFLAC_PATH_TOO_LONG;
+    #endif
+    #ifdef ENOLCK
+        case ENOLCK: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENOSYS
+        case ENOSYS: return DRFLAC_NOT_IMPLEMENTED;
+    #endif
+    #ifdef ENOTEMPTY
+        case ENOTEMPTY: return DRFLAC_DIRECTORY_NOT_EMPTY;
+    #endif
+    #ifdef ELOOP
+        case ELOOP: return DRFLAC_TOO_MANY_LINKS;
+    #endif
+    #ifdef ENOMSG
+        case ENOMSG: return DRFLAC_NO_MESSAGE;
+    #endif
+    #ifdef EIDRM
+        case EIDRM: return DRFLAC_ERROR;
+    #endif
+    #ifdef ECHRNG
+        case ECHRNG: return DRFLAC_ERROR;
+    #endif
+    #ifdef EL2NSYNC
+        case EL2NSYNC: return DRFLAC_ERROR;
+    #endif
+    #ifdef EL3HLT
+        case EL3HLT: return DRFLAC_ERROR;
+    #endif
+    #ifdef EL3RST
+        case EL3RST: return DRFLAC_ERROR;
+    #endif
+    #ifdef ELNRNG
+        case ELNRNG: return DRFLAC_OUT_OF_RANGE;
+    #endif
+    #ifdef EUNATCH
+        case EUNATCH: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENOCSI
+        case ENOCSI: return DRFLAC_ERROR;
+    #endif
+    #ifdef EL2HLT
+        case EL2HLT: return DRFLAC_ERROR;
+    #endif
+    #ifdef EBADE
+        case EBADE: return DRFLAC_ERROR;
+    #endif
+    #ifdef EBADR
+        case EBADR: return DRFLAC_ERROR;
+    #endif
+    #ifdef EXFULL
+        case EXFULL: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENOANO
+        case ENOANO: return DRFLAC_ERROR;
+    #endif
+    #ifdef EBADRQC
+        case EBADRQC: return DRFLAC_ERROR;
+    #endif
+    #ifdef EBADSLT
+        case EBADSLT: return DRFLAC_ERROR;
+    #endif
+    #ifdef EBFONT
+        case EBFONT: return DRFLAC_INVALID_FILE;
+    #endif
+    #ifdef ENOSTR
+        case ENOSTR: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENODATA
+        case ENODATA: return DRFLAC_NO_DATA_AVAILABLE;
+    #endif
+    #ifdef ETIME
+        case ETIME: return DRFLAC_TIMEOUT;
+    #endif
+    #ifdef ENOSR
+        case ENOSR: return DRFLAC_NO_DATA_AVAILABLE;
+    #endif
+    #ifdef ENONET
+        case ENONET: return DRFLAC_NO_NETWORK;
+    #endif
+    #ifdef ENOPKG
+        case ENOPKG: return DRFLAC_ERROR;
+    #endif
+    #ifdef EREMOTE
+        case EREMOTE: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENOLINK
+        case ENOLINK: return DRFLAC_ERROR;
+    #endif
+    #ifdef EADV
+        case EADV: return DRFLAC_ERROR;
+    #endif
+    #ifdef ESRMNT
+        case ESRMNT: return DRFLAC_ERROR;
+    #endif
+    #ifdef ECOMM
+        case ECOMM: return DRFLAC_ERROR;
+    #endif
+    #ifdef EPROTO
+        case EPROTO: return DRFLAC_ERROR;
+    #endif
+    #ifdef EMULTIHOP
+        case EMULTIHOP: return DRFLAC_ERROR;
+    #endif
+    #ifdef EDOTDOT
+        case EDOTDOT: return DRFLAC_ERROR;
+    #endif
+    #ifdef EBADMSG
+        case EBADMSG: return DRFLAC_BAD_MESSAGE;
+    #endif
+    #ifdef EOVERFLOW
+        case EOVERFLOW: return DRFLAC_TOO_BIG;
+    #endif
+    #ifdef ENOTUNIQ
+        case ENOTUNIQ: return DRFLAC_NOT_UNIQUE;
+    #endif
+    #ifdef EBADFD
+        case EBADFD: return DRFLAC_ERROR;
+    #endif
+    #ifdef EREMCHG
+        case EREMCHG: return DRFLAC_ERROR;
+    #endif
+    #ifdef ELIBACC
+        case ELIBACC: return DRFLAC_ACCESS_DENIED;
+    #endif
+    #ifdef ELIBBAD
+        case ELIBBAD: return DRFLAC_INVALID_FILE;
+    #endif
+    #ifdef ELIBSCN
+        case ELIBSCN: return DRFLAC_INVALID_FILE;
+    #endif
+    #ifdef ELIBMAX
+        case ELIBMAX: return DRFLAC_ERROR;
+    #endif
+    #ifdef ELIBEXEC
+        case ELIBEXEC: return DRFLAC_ERROR;
+    #endif
+    #ifdef EILSEQ
+        case EILSEQ: return DRFLAC_INVALID_DATA;
+    #endif
+    #ifdef ERESTART
+        case ERESTART: return DRFLAC_ERROR;
+    #endif
+    #ifdef ESTRPIPE
+        case ESTRPIPE: return DRFLAC_ERROR;
+    #endif
+    #ifdef EUSERS
+        case EUSERS: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENOTSOCK
+        case ENOTSOCK: return DRFLAC_NOT_SOCKET;
+    #endif
+    #ifdef EDESTADDRREQ
+        case EDESTADDRREQ: return DRFLAC_NO_ADDRESS;
+    #endif
+    #ifdef EMSGSIZE
+        case EMSGSIZE: return DRFLAC_TOO_BIG;
+    #endif
+    #ifdef EPROTOTYPE
+        case EPROTOTYPE: return DRFLAC_BAD_PROTOCOL;
+    #endif
+    #ifdef ENOPROTOOPT
+        case ENOPROTOOPT: return DRFLAC_PROTOCOL_UNAVAILABLE;
+    #endif
+    #ifdef EPROTONOSUPPORT
+        case EPROTONOSUPPORT: return DRFLAC_PROTOCOL_NOT_SUPPORTED;
+    #endif
+    #ifdef ESOCKTNOSUPPORT
+        case ESOCKTNOSUPPORT: return DRFLAC_SOCKET_NOT_SUPPORTED;
+    #endif
+    #ifdef EOPNOTSUPP
+        case EOPNOTSUPP: return DRFLAC_INVALID_OPERATION;
+    #endif
+    #ifdef EPFNOSUPPORT
+        case EPFNOSUPPORT: return DRFLAC_PROTOCOL_FAMILY_NOT_SUPPORTED;
+    #endif
+    #ifdef EAFNOSUPPORT
+        case EAFNOSUPPORT: return DRFLAC_ADDRESS_FAMILY_NOT_SUPPORTED;
+    #endif
+    #ifdef EADDRINUSE
+        case EADDRINUSE: return DRFLAC_ALREADY_IN_USE;
+    #endif
+    #ifdef EADDRNOTAVAIL
+        case EADDRNOTAVAIL: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENETDOWN
+        case ENETDOWN: return DRFLAC_NO_NETWORK;
+    #endif
+    #ifdef ENETUNREACH
+        case ENETUNREACH: return DRFLAC_NO_NETWORK;
+    #endif
+    #ifdef ENETRESET
+        case ENETRESET: return DRFLAC_NO_NETWORK;
+    #endif
+    #ifdef ECONNABORTED
+        case ECONNABORTED: return DRFLAC_NO_NETWORK;
+    #endif
+    #ifdef ECONNRESET
+        case ECONNRESET: return DRFLAC_CONNECTION_RESET;
+    #endif
+    #ifdef ENOBUFS
+        case ENOBUFS: return DRFLAC_NO_SPACE;
+    #endif
+    #ifdef EISCONN
+        case EISCONN: return DRFLAC_ALREADY_CONNECTED;
+    #endif
+    #ifdef ENOTCONN
+        case ENOTCONN: return DRFLAC_NOT_CONNECTED;
+    #endif
+    #ifdef ESHUTDOWN
+        case ESHUTDOWN: return DRFLAC_ERROR;
+    #endif
+    #ifdef ETOOMANYREFS
+        case ETOOMANYREFS: return DRFLAC_ERROR;
+    #endif
+    #ifdef ETIMEDOUT
+        case ETIMEDOUT: return DRFLAC_TIMEOUT;
+    #endif
+    #ifdef ECONNREFUSED
+        case ECONNREFUSED: return DRFLAC_CONNECTION_REFUSED;
+    #endif
+    #ifdef EHOSTDOWN
+        case EHOSTDOWN: return DRFLAC_NO_HOST;
+    #endif
+    #ifdef EHOSTUNREACH
+        case EHOSTUNREACH: return DRFLAC_NO_HOST;
+    #endif
+    #ifdef EALREADY
+        case EALREADY: return DRFLAC_IN_PROGRESS;
+    #endif
+    #ifdef EINPROGRESS
+        case EINPROGRESS: return DRFLAC_IN_PROGRESS;
+    #endif
+    #ifdef ESTALE
+        case ESTALE: return DRFLAC_INVALID_FILE;
+    #endif
+    #ifdef EUCLEAN
+        case EUCLEAN: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENOTNAM
+        case ENOTNAM: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENAVAIL
+        case ENAVAIL: return DRFLAC_ERROR;
+    #endif
+    #ifdef EISNAM
+        case EISNAM: return DRFLAC_ERROR;
+    #endif
+    #ifdef EREMOTEIO
+        case EREMOTEIO: return DRFLAC_IO_ERROR;
+    #endif
+    #ifdef EDQUOT
+        case EDQUOT: return DRFLAC_NO_SPACE;
+    #endif
+    #ifdef ENOMEDIUM
+        case ENOMEDIUM: return DRFLAC_DOES_NOT_EXIST;
+    #endif
+    #ifdef EMEDIUMTYPE
+        case EMEDIUMTYPE: return DRFLAC_ERROR;
+    #endif
+    #ifdef ECANCELED
+        case ECANCELED: return DRFLAC_CANCELLED;
+    #endif
+    #ifdef ENOKEY
+        case ENOKEY: return DRFLAC_ERROR;
+    #endif
+    #ifdef EKEYEXPIRED
+        case EKEYEXPIRED: return DRFLAC_ERROR;
+    #endif
+    #ifdef EKEYREVOKED
+        case EKEYREVOKED: return DRFLAC_ERROR;
+    #endif
+    #ifdef EKEYREJECTED
+        case EKEYREJECTED: return DRFLAC_ERROR;
+    #endif
+    #ifdef EOWNERDEAD
+        case EOWNERDEAD: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENOTRECOVERABLE
+        case ENOTRECOVERABLE: return DRFLAC_ERROR;
+    #endif
+    #ifdef ERFKILL
+        case ERFKILL: return DRFLAC_ERROR;
+    #endif
+    #ifdef EHWPOISON
+        case EHWPOISON: return DRFLAC_ERROR;
+    #endif
+        default: return DRFLAC_ERROR;
+    }
+}
+
+static drflac_result drflac_fopen(FILE** ppFile, const char* pFilePath, const char* pOpenMode)
+{
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+    errno_t err;
+#endif
+
+    if (ppFile != NULL) {
+        *ppFile = NULL;  /* Safety. */
+    }
+
+    if (pFilePath == NULL || pOpenMode == NULL || ppFile == NULL) {
+        return DRFLAC_INVALID_ARGS;
+    }
+
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+    err = fopen_s(ppFile, pFilePath, pOpenMode);
+    if (err != 0) {
+        return drflac_result_from_errno(err);
+    }
+#else
+#if defined(_WIN32) || defined(__APPLE__)
+    *ppFile = fopen(pFilePath, pOpenMode);
+#else
+    #if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS == 64 && defined(_LARGEFILE64_SOURCE)
+        *ppFile = fopen64(pFilePath, pOpenMode);
+    #else
+        *ppFile = fopen(pFilePath, pOpenMode);
+    #endif
+#endif
+    if (*ppFile == NULL) {
+        drflac_result result = drflac_result_from_errno(errno);
+        if (result == DRFLAC_SUCCESS) {
+            result = DRFLAC_ERROR;   /* Just a safety check to make sure we never ever return success when pFile == NULL. */
+        }
+
+        return result;
+    }
+#endif
+
+    return DRFLAC_SUCCESS;
+}
+
+/*
+_wfopen() isn't always available in all compilation environments.
+
+    * Windows only.
+    * MSVC seems to support it universally as far back as VC6 from what I can tell (haven't checked further back).
+    * MinGW-64 (both 32- and 64-bit) seems to support it.
+    * MinGW wraps it in !defined(__STRICT_ANSI__).
+    * OpenWatcom wraps it in !defined(_NO_EXT_KEYS).
+
+This can be reviewed as compatibility issues arise. The preference is to use _wfopen_s() and _wfopen() as opposed to the wcsrtombs()
+fallback, so if you notice your compiler not detecting this properly I'm happy to look at adding support.
+*/
+#if defined(_WIN32)
+    #if defined(_MSC_VER) || defined(__MINGW64__) || (!defined(__STRICT_ANSI__) && !defined(_NO_EXT_KEYS))
+        #define DRFLAC_HAS_WFOPEN
+    #endif
+#endif
+
+static drflac_result drflac_wfopen(FILE** ppFile, const wchar_t* pFilePath, const wchar_t* pOpenMode, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    if (ppFile != NULL) {
+        *ppFile = NULL;  /* Safety. */
+    }
+
+    if (pFilePath == NULL || pOpenMode == NULL || ppFile == NULL) {
+        return DRFLAC_INVALID_ARGS;
+    }
+
+#if defined(DRFLAC_HAS_WFOPEN)
+    {
+        /* Use _wfopen() on Windows. */
+    #if defined(_MSC_VER) && _MSC_VER >= 1400
+        errno_t err = _wfopen_s(ppFile, pFilePath, pOpenMode);
+        if (err != 0) {
+            return drflac_result_from_errno(err);
+        }
+    #else
+        *ppFile = _wfopen(pFilePath, pOpenMode);
+        if (*ppFile == NULL) {
+            return drflac_result_from_errno(errno);
+        }
+    #endif
+        (void)pAllocationCallbacks;
+    }
+#else
+    /*
+    Use fopen() on anything other than Windows. Requires a conversion. This is annoying because fopen() is locale specific. The only real way I can
+    think of to do this is with wcsrtombs(). Note that wcstombs() is apparently not thread-safe because it uses a static global mbstate_t object for
+    maintaining state. I've checked this with -std=c89 and it works, but if somebody get's a compiler error I'll look into improving compatibility.
+    */
+    {
+        mbstate_t mbs;
+        size_t lenMB;
+        const wchar_t* pFilePathTemp = pFilePath;
+        char* pFilePathMB = NULL;
+        char pOpenModeMB[32] = {0};
+
+        /* Get the length first. */
+        DRFLAC_ZERO_OBJECT(&mbs);
+        lenMB = wcsrtombs(NULL, &pFilePathTemp, 0, &mbs);
+        if (lenMB == (size_t)-1) {
+            return drflac_result_from_errno(errno);
+        }
+
+        pFilePathMB = (char*)drflac__malloc_from_callbacks(lenMB + 1, pAllocationCallbacks);
+        if (pFilePathMB == NULL) {
+            return DRFLAC_OUT_OF_MEMORY;
+        }
+
+        pFilePathTemp = pFilePath;
+        DRFLAC_ZERO_OBJECT(&mbs);
+        wcsrtombs(pFilePathMB, &pFilePathTemp, lenMB + 1, &mbs);
+
+        /* The open mode should always consist of ASCII characters so we should be able to do a trivial conversion. */
+        {
+            size_t i = 0;
+            for (;;) {
+                if (pOpenMode[i] == 0) {
+                    pOpenModeMB[i] = '\0';
+                    break;
+                }
+
+                pOpenModeMB[i] = (char)pOpenMode[i];
+                i += 1;
+            }
+        }
+
+        *ppFile = fopen(pFilePathMB, pOpenModeMB);
+
+        drflac__free_from_callbacks(pFilePathMB, pAllocationCallbacks);
+    }
+
+    if (*ppFile == NULL) {
+        return DRFLAC_ERROR;
+    }
+#endif
+
+    return DRFLAC_SUCCESS;
+}
+
+static size_t drflac__on_read_stdio(void* pUserData, void* bufferOut, size_t bytesToRead)
+{
+    return fread(bufferOut, 1, bytesToRead, (FILE*)pUserData);
+}
+
+static drflac_bool32 drflac__on_seek_stdio(void* pUserData, int offset, drflac_seek_origin origin)
+{
+    DRFLAC_ASSERT(offset >= 0);  /* <-- Never seek backwards. */
+
+    return fseek((FILE*)pUserData, offset, (origin == drflac_seek_origin_current) ? SEEK_CUR : SEEK_SET) == 0;
+}
+
+
+DRFLAC_API drflac* drflac_open_file(const char* pFileName, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+    FILE* pFile;
+
+    if (drflac_fopen(&pFile, pFileName, "rb") != DRFLAC_SUCCESS) {
+        return NULL;
+    }
+
+    pFlac = drflac_open(drflac__on_read_stdio, drflac__on_seek_stdio, (void*)pFile, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        fclose(pFile);
+        return NULL;
+    }
+
+    return pFlac;
+}
+
+DRFLAC_API drflac* drflac_open_file_w(const wchar_t* pFileName, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+    FILE* pFile;
+
+    if (drflac_wfopen(&pFile, pFileName, L"rb", pAllocationCallbacks) != DRFLAC_SUCCESS) {
+        return NULL;
+    }
+
+    pFlac = drflac_open(drflac__on_read_stdio, drflac__on_seek_stdio, (void*)pFile, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        fclose(pFile);
+        return NULL;
+    }
+
+    return pFlac;
+}
+
+DRFLAC_API drflac* drflac_open_file_with_metadata(const char* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+    FILE* pFile;
+
+    if (drflac_fopen(&pFile, pFileName, "rb") != DRFLAC_SUCCESS) {
+        return NULL;
+    }
+
+    pFlac = drflac_open_with_metadata_private(drflac__on_read_stdio, drflac__on_seek_stdio, onMeta, drflac_container_unknown, (void*)pFile, pUserData, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        fclose(pFile);
+        return pFlac;
+    }
+
+    return pFlac;
+}
+
+DRFLAC_API drflac* drflac_open_file_with_metadata_w(const wchar_t* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+    FILE* pFile;
+
+    if (drflac_wfopen(&pFile, pFileName, L"rb", pAllocationCallbacks) != DRFLAC_SUCCESS) {
+        return NULL;
+    }
+
+    pFlac = drflac_open_with_metadata_private(drflac__on_read_stdio, drflac__on_seek_stdio, onMeta, drflac_container_unknown, (void*)pFile, pUserData, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        fclose(pFile);
+        return pFlac;
+    }
+
+    return pFlac;
+}
+#endif  /* DR_FLAC_NO_STDIO */
+
+static size_t drflac__on_read_memory(void* pUserData, void* bufferOut, size_t bytesToRead)
+{
+    drflac__memory_stream* memoryStream = (drflac__memory_stream*)pUserData;
+    size_t bytesRemaining;
+
+    DRFLAC_ASSERT(memoryStream != NULL);
+    DRFLAC_ASSERT(memoryStream->dataSize >= memoryStream->currentReadPos);
+
+    bytesRemaining = memoryStream->dataSize - memoryStream->currentReadPos;
+    if (bytesToRead > bytesRemaining) {
+        bytesToRead = bytesRemaining;
+    }
+
+    if (bytesToRead > 0) {
+        DRFLAC_COPY_MEMORY(bufferOut, memoryStream->data + memoryStream->currentReadPos, bytesToRead);
+        memoryStream->currentReadPos += bytesToRead;
+    }
+
+    return bytesToRead;
+}
+
+static drflac_bool32 drflac__on_seek_memory(void* pUserData, int offset, drflac_seek_origin origin)
+{
+    drflac__memory_stream* memoryStream = (drflac__memory_stream*)pUserData;
+
+    DRFLAC_ASSERT(memoryStream != NULL);
+    DRFLAC_ASSERT(offset >= 0); /* <-- Never seek backwards. */
+
+    if (offset > (drflac_int64)memoryStream->dataSize) {
+        return DRFLAC_FALSE;
+    }
+
+    if (origin == drflac_seek_origin_current) {
+        if (memoryStream->currentReadPos + offset <= memoryStream->dataSize) {
+            memoryStream->currentReadPos += offset;
+        } else {
+            return DRFLAC_FALSE;  /* Trying to seek too far forward. */
+        }
+    } else {
+        if ((drflac_uint32)offset <= memoryStream->dataSize) {
+            memoryStream->currentReadPos = offset;
+        } else {
+            return DRFLAC_FALSE;  /* Trying to seek too far forward. */
+        }
+    }
+
+    return DRFLAC_TRUE;
+}
+
+DRFLAC_API drflac* drflac_open_memory(const void* pData, size_t dataSize, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac__memory_stream memoryStream;
+    drflac* pFlac;
+
+    memoryStream.data = (const drflac_uint8*)pData;
+    memoryStream.dataSize = dataSize;
+    memoryStream.currentReadPos = 0;
+    pFlac = drflac_open(drflac__on_read_memory, drflac__on_seek_memory, &memoryStream, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    pFlac->memoryStream = memoryStream;
+
+    /* This is an awful hack... */
+#ifndef DR_FLAC_NO_OGG
+    if (pFlac->container == drflac_container_ogg)
+    {
+        drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs;
+        oggbs->pUserData = &pFlac->memoryStream;
+    }
+    else
+#endif
+    {
+        pFlac->bs.pUserData = &pFlac->memoryStream;
+    }
+
+    return pFlac;
+}
+
+DRFLAC_API drflac* drflac_open_memory_with_metadata(const void* pData, size_t dataSize, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac__memory_stream memoryStream;
+    drflac* pFlac;
+
+    memoryStream.data = (const drflac_uint8*)pData;
+    memoryStream.dataSize = dataSize;
+    memoryStream.currentReadPos = 0;
+    pFlac = drflac_open_with_metadata_private(drflac__on_read_memory, drflac__on_seek_memory, onMeta, drflac_container_unknown, &memoryStream, pUserData, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    pFlac->memoryStream = memoryStream;
+
+    /* This is an awful hack... */
+#ifndef DR_FLAC_NO_OGG
+    if (pFlac->container == drflac_container_ogg)
+    {
+        drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs;
+        oggbs->pUserData = &pFlac->memoryStream;
+    }
+    else
+#endif
+    {
+        pFlac->bs.pUserData = &pFlac->memoryStream;
+    }
+
+    return pFlac;
+}
+
+
+
+DRFLAC_API drflac* drflac_open(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    return drflac_open_with_metadata_private(onRead, onSeek, NULL, drflac_container_unknown, pUserData, pUserData, pAllocationCallbacks);
+}
+DRFLAC_API drflac* drflac_open_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    return drflac_open_with_metadata_private(onRead, onSeek, NULL, container, pUserData, pUserData, pAllocationCallbacks);
+}
+
+DRFLAC_API drflac* drflac_open_with_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    return drflac_open_with_metadata_private(onRead, onSeek, onMeta, drflac_container_unknown, pUserData, pUserData, pAllocationCallbacks);
+}
+DRFLAC_API drflac* drflac_open_with_metadata_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    return drflac_open_with_metadata_private(onRead, onSeek, onMeta, container, pUserData, pUserData, pAllocationCallbacks);
+}
+
+DRFLAC_API void drflac_close(drflac* pFlac)
+{
+    if (pFlac == NULL) {
+        return;
+    }
+
+#ifndef DR_FLAC_NO_STDIO
+    /*
+    If we opened the file with drflac_open_file() we will want to close the file handle. We can know whether or not drflac_open_file()
+    was used by looking at the callbacks.
+    */
+    if (pFlac->bs.onRead == drflac__on_read_stdio) {
+        fclose((FILE*)pFlac->bs.pUserData);
+    }
+
+#ifndef DR_FLAC_NO_OGG
+    /* Need to clean up Ogg streams a bit differently due to the way the bit streaming is chained. */
+    if (pFlac->container == drflac_container_ogg) {
+        drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs;
+        DRFLAC_ASSERT(pFlac->bs.onRead == drflac__on_read_ogg);
+
+        if (oggbs->onRead == drflac__on_read_stdio) {
+            fclose((FILE*)oggbs->pUserData);
+        }
+    }
+#endif
+#endif
+
+    drflac__free_from_callbacks(pFlac, &pFlac->allocationCallbacks);
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    for (i = 0; i < frameCount; ++i) {
+        drflac_uint32 left  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+        drflac_uint32 side  = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+        drflac_uint32 right = left - side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left;
+        pOutputSamples[i*2+1] = (drflac_int32)right;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 left0 = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 left1 = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 left2 = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 left3 = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 side0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 side1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 side2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 side3 = pInputSamples1U32[i*4+3] << shift1;
+
+        drflac_uint32 right0 = left0 - side0;
+        drflac_uint32 right1 = left1 - side1;
+        drflac_uint32 right2 = left2 - side2;
+        drflac_uint32 right3 = left3 - side3;
+
+        pOutputSamples[i*8+0] = (drflac_int32)left0;
+        pOutputSamples[i*8+1] = (drflac_int32)right0;
+        pOutputSamples[i*8+2] = (drflac_int32)left1;
+        pOutputSamples[i*8+3] = (drflac_int32)right1;
+        pOutputSamples[i*8+4] = (drflac_int32)left2;
+        pOutputSamples[i*8+5] = (drflac_int32)right2;
+        pOutputSamples[i*8+6] = (drflac_int32)left3;
+        pOutputSamples[i*8+7] = (drflac_int32)right3;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left;
+        pOutputSamples[i*2+1] = (drflac_int32)right;
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i left  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+        __m128i right = _mm_sub_epi32(left, side);
+
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left;
+        pOutputSamples[i*2+1] = (drflac_int32)right;
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+    int32x4_t shift0_4;
+    int32x4_t shift1_4;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    shift0_4 = vdupq_n_s32(shift0);
+    shift1_4 = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        uint32x4_t left;
+        uint32x4_t side;
+        uint32x4_t right;
+
+        left  = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
+        side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
+        right = vsubq_u32(left, side);
+
+        drflac__vst2q_u32((drflac_uint32*)pOutputSamples + i*8, vzipq_u32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left;
+        pOutputSamples[i*2+1] = (drflac_int32)right;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s32__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s32__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_s32__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_s32__decode_left_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    for (i = 0; i < frameCount; ++i) {
+        drflac_uint32 side  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+        drflac_uint32 right = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+        drflac_uint32 left  = right + side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left;
+        pOutputSamples[i*2+1] = (drflac_int32)right;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 side0  = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 side1  = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 side2  = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 side3  = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 right0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 right1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 right2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 right3 = pInputSamples1U32[i*4+3] << shift1;
+
+        drflac_uint32 left0 = right0 + side0;
+        drflac_uint32 left1 = right1 + side1;
+        drflac_uint32 left2 = right2 + side2;
+        drflac_uint32 left3 = right3 + side3;
+
+        pOutputSamples[i*8+0] = (drflac_int32)left0;
+        pOutputSamples[i*8+1] = (drflac_int32)right0;
+        pOutputSamples[i*8+2] = (drflac_int32)left1;
+        pOutputSamples[i*8+3] = (drflac_int32)right1;
+        pOutputSamples[i*8+4] = (drflac_int32)left2;
+        pOutputSamples[i*8+5] = (drflac_int32)right2;
+        pOutputSamples[i*8+6] = (drflac_int32)left3;
+        pOutputSamples[i*8+7] = (drflac_int32)right3;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left;
+        pOutputSamples[i*2+1] = (drflac_int32)right;
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+        __m128i left  = _mm_add_epi32(right, side);
+
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left;
+        pOutputSamples[i*2+1] = (drflac_int32)right;
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+    int32x4_t shift0_4;
+    int32x4_t shift1_4;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    shift0_4 = vdupq_n_s32(shift0);
+    shift1_4 = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        uint32x4_t side;
+        uint32x4_t right;
+        uint32x4_t left;
+
+        side  = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
+        right = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
+        left  = vaddq_u32(right, side);
+
+        drflac__vst2q_u32((drflac_uint32*)pOutputSamples + i*8, vzipq_u32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left;
+        pOutputSamples[i*2+1] = (drflac_int32)right;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s32__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s32__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_s32__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_s32__decode_right_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    for (drflac_uint64 i = 0; i < frameCount; ++i) {
+        drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+        drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+        mid = (mid << 1) | (side & 0x01);
+
+        pOutputSamples[i*2+0] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample);
+        pOutputSamples[i*2+1] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_int32 shift = unusedBitsPerSample;
+
+    if (shift > 0) {
+        shift -= 1;
+        for (i = 0; i < frameCount4; ++i) {
+            drflac_uint32 temp0L;
+            drflac_uint32 temp1L;
+            drflac_uint32 temp2L;
+            drflac_uint32 temp3L;
+            drflac_uint32 temp0R;
+            drflac_uint32 temp1R;
+            drflac_uint32 temp2R;
+            drflac_uint32 temp3R;
+
+            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+
+            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid0 = (mid0 << 1) | (side0 & 0x01);
+            mid1 = (mid1 << 1) | (side1 & 0x01);
+            mid2 = (mid2 << 1) | (side2 & 0x01);
+            mid3 = (mid3 << 1) | (side3 & 0x01);
+
+            temp0L = (mid0 + side0) << shift;
+            temp1L = (mid1 + side1) << shift;
+            temp2L = (mid2 + side2) << shift;
+            temp3L = (mid3 + side3) << shift;
+
+            temp0R = (mid0 - side0) << shift;
+            temp1R = (mid1 - side1) << shift;
+            temp2R = (mid2 - side2) << shift;
+            temp3R = (mid3 - side3) << shift;
+
+            pOutputSamples[i*8+0] = (drflac_int32)temp0L;
+            pOutputSamples[i*8+1] = (drflac_int32)temp0R;
+            pOutputSamples[i*8+2] = (drflac_int32)temp1L;
+            pOutputSamples[i*8+3] = (drflac_int32)temp1R;
+            pOutputSamples[i*8+4] = (drflac_int32)temp2L;
+            pOutputSamples[i*8+5] = (drflac_int32)temp2R;
+            pOutputSamples[i*8+6] = (drflac_int32)temp3L;
+            pOutputSamples[i*8+7] = (drflac_int32)temp3R;
+        }
+    } else {
+        for (i = 0; i < frameCount4; ++i) {
+            drflac_uint32 temp0L;
+            drflac_uint32 temp1L;
+            drflac_uint32 temp2L;
+            drflac_uint32 temp3L;
+            drflac_uint32 temp0R;
+            drflac_uint32 temp1R;
+            drflac_uint32 temp2R;
+            drflac_uint32 temp3R;
+
+            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+
+            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid0 = (mid0 << 1) | (side0 & 0x01);
+            mid1 = (mid1 << 1) | (side1 & 0x01);
+            mid2 = (mid2 << 1) | (side2 & 0x01);
+            mid3 = (mid3 << 1) | (side3 & 0x01);
+
+            temp0L = (drflac_uint32)((drflac_int32)(mid0 + side0) >> 1);
+            temp1L = (drflac_uint32)((drflac_int32)(mid1 + side1) >> 1);
+            temp2L = (drflac_uint32)((drflac_int32)(mid2 + side2) >> 1);
+            temp3L = (drflac_uint32)((drflac_int32)(mid3 + side3) >> 1);
+
+            temp0R = (drflac_uint32)((drflac_int32)(mid0 - side0) >> 1);
+            temp1R = (drflac_uint32)((drflac_int32)(mid1 - side1) >> 1);
+            temp2R = (drflac_uint32)((drflac_int32)(mid2 - side2) >> 1);
+            temp3R = (drflac_uint32)((drflac_int32)(mid3 - side3) >> 1);
+
+            pOutputSamples[i*8+0] = (drflac_int32)temp0L;
+            pOutputSamples[i*8+1] = (drflac_int32)temp0R;
+            pOutputSamples[i*8+2] = (drflac_int32)temp1L;
+            pOutputSamples[i*8+3] = (drflac_int32)temp1R;
+            pOutputSamples[i*8+4] = (drflac_int32)temp2L;
+            pOutputSamples[i*8+5] = (drflac_int32)temp2R;
+            pOutputSamples[i*8+6] = (drflac_int32)temp3L;
+            pOutputSamples[i*8+7] = (drflac_int32)temp3R;
+        }
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+        drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+        mid = (mid << 1) | (side & 0x01);
+
+        pOutputSamples[i*2+0] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample);
+        pOutputSamples[i*2+1] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample);
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_int32 shift = unusedBitsPerSample;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    if (shift == 0) {
+        for (i = 0; i < frameCount4; ++i) {
+            __m128i mid;
+            __m128i side;
+            __m128i left;
+            __m128i right;
+
+            mid   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+            side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+
+            mid   = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
+
+            left  = _mm_srai_epi32(_mm_add_epi32(mid, side), 1);
+            right = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1);
+
+            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
+            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int32)(mid + side) >> 1;
+            pOutputSamples[i*2+1] = (drflac_int32)(mid - side) >> 1;
+        }
+    } else {
+        shift -= 1;
+        for (i = 0; i < frameCount4; ++i) {
+            __m128i mid;
+            __m128i side;
+            __m128i left;
+            __m128i right;
+
+            mid   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+            side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+
+            mid   = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
+
+            left  = _mm_slli_epi32(_mm_add_epi32(mid, side), shift);
+            right = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift);
+
+            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
+            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int32)((mid + side) << shift);
+            pOutputSamples[i*2+1] = (drflac_int32)((mid - side) << shift);
+        }
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_int32 shift = unusedBitsPerSample;
+    int32x4_t  wbpsShift0_4; /* wbps = Wasted Bits Per Sample */
+    int32x4_t  wbpsShift1_4; /* wbps = Wasted Bits Per Sample */
+    uint32x4_t one4;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    wbpsShift0_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+    wbpsShift1_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+    one4         = vdupq_n_u32(1);
+
+    if (shift == 0) {
+        for (i = 0; i < frameCount4; ++i) {
+            uint32x4_t mid;
+            uint32x4_t side;
+            int32x4_t left;
+            int32x4_t right;
+
+            mid   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4);
+            side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4);
+
+            mid   = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, one4));
+
+            left  = vshrq_n_s32(vreinterpretq_s32_u32(vaddq_u32(mid, side)), 1);
+            right = vshrq_n_s32(vreinterpretq_s32_u32(vsubq_u32(mid, side)), 1);
+
+            drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int32)(mid + side) >> 1;
+            pOutputSamples[i*2+1] = (drflac_int32)(mid - side) >> 1;
+        }
+    } else {
+        int32x4_t shift4;
+
+        shift -= 1;
+        shift4 = vdupq_n_s32(shift);
+
+        for (i = 0; i < frameCount4; ++i) {
+            uint32x4_t mid;
+            uint32x4_t side;
+            int32x4_t left;
+            int32x4_t right;
+
+            mid   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4);
+            side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4);
+
+            mid   = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, one4));
+
+            left  = vreinterpretq_s32_u32(vshlq_u32(vaddq_u32(mid, side), shift4));
+            right = vreinterpretq_s32_u32(vshlq_u32(vsubq_u32(mid, side), shift4));
+
+            drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int32)((mid + side) << shift);
+            pOutputSamples[i*2+1] = (drflac_int32)((mid - side) << shift);
+        }
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s32__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s32__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_s32__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_s32__decode_mid_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    for (drflac_uint64 i = 0; i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int32)((drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample));
+        pOutputSamples[i*2+1] = (drflac_int32)((drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample));
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 tempL0 = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 tempL1 = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 tempL2 = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 tempL3 = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 tempR0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 tempR1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 tempR2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 tempR3 = pInputSamples1U32[i*4+3] << shift1;
+
+        pOutputSamples[i*8+0] = (drflac_int32)tempL0;
+        pOutputSamples[i*8+1] = (drflac_int32)tempR0;
+        pOutputSamples[i*8+2] = (drflac_int32)tempL1;
+        pOutputSamples[i*8+3] = (drflac_int32)tempR1;
+        pOutputSamples[i*8+4] = (drflac_int32)tempL2;
+        pOutputSamples[i*8+5] = (drflac_int32)tempR2;
+        pOutputSamples[i*8+6] = (drflac_int32)tempL3;
+        pOutputSamples[i*8+7] = (drflac_int32)tempR3;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0);
+        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1);
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i left  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0);
+        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1);
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    int32x4_t shift4_0 = vdupq_n_s32(shift0);
+    int32x4_t shift4_1 = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        int32x4_t left;
+        int32x4_t right;
+
+        left  = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift4_0));
+        right = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift4_1));
+
+        drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0);
+        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s32__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s32__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_s32__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_s32__decode_independent_stereo__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+DRFLAC_API drflac_uint64 drflac_read_pcm_frames_s32(drflac* pFlac, drflac_uint64 framesToRead, drflac_int32* pBufferOut)
+{
+    drflac_uint64 framesRead;
+    drflac_uint32 unusedBitsPerSample;
+
+    if (pFlac == NULL || framesToRead == 0) {
+        return 0;
+    }
+
+    if (pBufferOut == NULL) {
+        return drflac__seek_forward_by_pcm_frames(pFlac, framesToRead);
+    }
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 32);
+    unusedBitsPerSample = 32 - pFlac->bitsPerSample;
+
+    framesRead = 0;
+    while (framesToRead > 0) {
+        /* If we've run out of samples in this frame, go to the next. */
+        if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
+            if (!drflac__read_and_decode_next_flac_frame(pFlac)) {
+                break;  /* Couldn't read the next frame, so just break from the loop and return. */
+            }
+        } else {
+            unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment);
+            drflac_uint64 iFirstPCMFrame = pFlac->currentFLACFrame.header.blockSizeInPCMFrames - pFlac->currentFLACFrame.pcmFramesRemaining;
+            drflac_uint64 frameCountThisIteration = framesToRead;
+
+            if (frameCountThisIteration > pFlac->currentFLACFrame.pcmFramesRemaining) {
+                frameCountThisIteration = pFlac->currentFLACFrame.pcmFramesRemaining;
+            }
+
+            if (channelCount == 2) {
+                const drflac_int32* pDecodedSamples0 = pFlac->currentFLACFrame.subframes[0].pSamplesS32 + iFirstPCMFrame;
+                const drflac_int32* pDecodedSamples1 = pFlac->currentFLACFrame.subframes[1].pSamplesS32 + iFirstPCMFrame;
+
+                switch (pFlac->currentFLACFrame.header.channelAssignment)
+                {
+                    case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE:
+                    {
+                        drflac_read_pcm_frames_s32__decode_left_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE:
+                    {
+                        drflac_read_pcm_frames_s32__decode_right_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE:
+                    {
+                        drflac_read_pcm_frames_s32__decode_mid_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT:
+                    default:
+                    {
+                        drflac_read_pcm_frames_s32__decode_independent_stereo(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+                }
+            } else {
+                /* Generic interleaving. */
+                drflac_uint64 i;
+                for (i = 0; i < frameCountThisIteration; ++i) {
+                    unsigned int j;
+                    for (j = 0; j < channelCount; ++j) {
+                        pBufferOut[(i*channelCount)+j] = (drflac_int32)((drflac_uint32)(pFlac->currentFLACFrame.subframes[j].pSamplesS32[iFirstPCMFrame + i]) << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[j].wastedBitsPerSample));
+                    }
+                }
+            }
+
+            framesRead                += frameCountThisIteration;
+            pBufferOut                += frameCountThisIteration * channelCount;
+            framesToRead              -= frameCountThisIteration;
+            pFlac->currentPCMFrame    += frameCountThisIteration;
+            pFlac->currentFLACFrame.pcmFramesRemaining -= (drflac_uint32)frameCountThisIteration;
+        }
+    }
+
+    return framesRead;
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    for (i = 0; i < frameCount; ++i) {
+        drflac_uint32 left  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+        drflac_uint32 side  = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+        drflac_uint32 right = left - side;
+
+        left  >>= 16;
+        right >>= 16;
+
+        pOutputSamples[i*2+0] = (drflac_int16)left;
+        pOutputSamples[i*2+1] = (drflac_int16)right;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 left0 = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 left1 = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 left2 = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 left3 = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 side0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 side1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 side2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 side3 = pInputSamples1U32[i*4+3] << shift1;
+
+        drflac_uint32 right0 = left0 - side0;
+        drflac_uint32 right1 = left1 - side1;
+        drflac_uint32 right2 = left2 - side2;
+        drflac_uint32 right3 = left3 - side3;
+
+        left0  >>= 16;
+        left1  >>= 16;
+        left2  >>= 16;
+        left3  >>= 16;
+
+        right0 >>= 16;
+        right1 >>= 16;
+        right2 >>= 16;
+        right3 >>= 16;
+
+        pOutputSamples[i*8+0] = (drflac_int16)left0;
+        pOutputSamples[i*8+1] = (drflac_int16)right0;
+        pOutputSamples[i*8+2] = (drflac_int16)left1;
+        pOutputSamples[i*8+3] = (drflac_int16)right1;
+        pOutputSamples[i*8+4] = (drflac_int16)left2;
+        pOutputSamples[i*8+5] = (drflac_int16)right2;
+        pOutputSamples[i*8+6] = (drflac_int16)left3;
+        pOutputSamples[i*8+7] = (drflac_int16)right3;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        left  >>= 16;
+        right >>= 16;
+
+        pOutputSamples[i*2+0] = (drflac_int16)left;
+        pOutputSamples[i*2+1] = (drflac_int16)right;
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i left  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+        __m128i right = _mm_sub_epi32(left, side);
+
+        left  = _mm_srai_epi32(left,  16);
+        right = _mm_srai_epi32(right, 16);
+
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        left  >>= 16;
+        right >>= 16;
+
+        pOutputSamples[i*2+0] = (drflac_int16)left;
+        pOutputSamples[i*2+1] = (drflac_int16)right;
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+    int32x4_t shift0_4;
+    int32x4_t shift1_4;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    shift0_4 = vdupq_n_s32(shift0);
+    shift1_4 = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        uint32x4_t left;
+        uint32x4_t side;
+        uint32x4_t right;
+
+        left  = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
+        side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
+        right = vsubq_u32(left, side);
+
+        left  = vshrq_n_u32(left,  16);
+        right = vshrq_n_u32(right, 16);
+
+        drflac__vst2q_u16((drflac_uint16*)pOutputSamples + i*8, vzip_u16(vmovn_u32(left), vmovn_u32(right)));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        left  >>= 16;
+        right >>= 16;
+
+        pOutputSamples[i*2+0] = (drflac_int16)left;
+        pOutputSamples[i*2+1] = (drflac_int16)right;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s16__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s16__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_s16__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_s16__decode_left_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    for (i = 0; i < frameCount; ++i) {
+        drflac_uint32 side  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+        drflac_uint32 right = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+        drflac_uint32 left  = right + side;
+
+        left  >>= 16;
+        right >>= 16;
+
+        pOutputSamples[i*2+0] = (drflac_int16)left;
+        pOutputSamples[i*2+1] = (drflac_int16)right;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 side0  = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 side1  = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 side2  = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 side3  = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 right0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 right1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 right2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 right3 = pInputSamples1U32[i*4+3] << shift1;
+
+        drflac_uint32 left0 = right0 + side0;
+        drflac_uint32 left1 = right1 + side1;
+        drflac_uint32 left2 = right2 + side2;
+        drflac_uint32 left3 = right3 + side3;
+
+        left0  >>= 16;
+        left1  >>= 16;
+        left2  >>= 16;
+        left3  >>= 16;
+
+        right0 >>= 16;
+        right1 >>= 16;
+        right2 >>= 16;
+        right3 >>= 16;
+
+        pOutputSamples[i*8+0] = (drflac_int16)left0;
+        pOutputSamples[i*8+1] = (drflac_int16)right0;
+        pOutputSamples[i*8+2] = (drflac_int16)left1;
+        pOutputSamples[i*8+3] = (drflac_int16)right1;
+        pOutputSamples[i*8+4] = (drflac_int16)left2;
+        pOutputSamples[i*8+5] = (drflac_int16)right2;
+        pOutputSamples[i*8+6] = (drflac_int16)left3;
+        pOutputSamples[i*8+7] = (drflac_int16)right3;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        left  >>= 16;
+        right >>= 16;
+
+        pOutputSamples[i*2+0] = (drflac_int16)left;
+        pOutputSamples[i*2+1] = (drflac_int16)right;
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+        __m128i left  = _mm_add_epi32(right, side);
+
+        left  = _mm_srai_epi32(left,  16);
+        right = _mm_srai_epi32(right, 16);
+
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        left  >>= 16;
+        right >>= 16;
+
+        pOutputSamples[i*2+0] = (drflac_int16)left;
+        pOutputSamples[i*2+1] = (drflac_int16)right;
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+    int32x4_t shift0_4;
+    int32x4_t shift1_4;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    shift0_4 = vdupq_n_s32(shift0);
+    shift1_4 = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        uint32x4_t side;
+        uint32x4_t right;
+        uint32x4_t left;
+
+        side  = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
+        right = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
+        left  = vaddq_u32(right, side);
+
+        left  = vshrq_n_u32(left,  16);
+        right = vshrq_n_u32(right, 16);
+
+        drflac__vst2q_u16((drflac_uint16*)pOutputSamples + i*8, vzip_u16(vmovn_u32(left), vmovn_u32(right)));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        left  >>= 16;
+        right >>= 16;
+
+        pOutputSamples[i*2+0] = (drflac_int16)left;
+        pOutputSamples[i*2+1] = (drflac_int16)right;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s16__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s16__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_s16__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_s16__decode_right_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    for (drflac_uint64 i = 0; i < frameCount; ++i) {
+        drflac_uint32 mid  = (drflac_uint32)pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+        drflac_uint32 side = (drflac_uint32)pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+        mid = (mid << 1) | (side & 0x01);
+
+        pOutputSamples[i*2+0] = (drflac_int16)(((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample) >> 16);
+        pOutputSamples[i*2+1] = (drflac_int16)(((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample) >> 16);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift = unusedBitsPerSample;
+
+    if (shift > 0) {
+        shift -= 1;
+        for (i = 0; i < frameCount4; ++i) {
+            drflac_uint32 temp0L;
+            drflac_uint32 temp1L;
+            drflac_uint32 temp2L;
+            drflac_uint32 temp3L;
+            drflac_uint32 temp0R;
+            drflac_uint32 temp1R;
+            drflac_uint32 temp2R;
+            drflac_uint32 temp3R;
+
+            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+
+            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid0 = (mid0 << 1) | (side0 & 0x01);
+            mid1 = (mid1 << 1) | (side1 & 0x01);
+            mid2 = (mid2 << 1) | (side2 & 0x01);
+            mid3 = (mid3 << 1) | (side3 & 0x01);
+
+            temp0L = (mid0 + side0) << shift;
+            temp1L = (mid1 + side1) << shift;
+            temp2L = (mid2 + side2) << shift;
+            temp3L = (mid3 + side3) << shift;
+
+            temp0R = (mid0 - side0) << shift;
+            temp1R = (mid1 - side1) << shift;
+            temp2R = (mid2 - side2) << shift;
+            temp3R = (mid3 - side3) << shift;
+
+            temp0L >>= 16;
+            temp1L >>= 16;
+            temp2L >>= 16;
+            temp3L >>= 16;
+
+            temp0R >>= 16;
+            temp1R >>= 16;
+            temp2R >>= 16;
+            temp3R >>= 16;
+
+            pOutputSamples[i*8+0] = (drflac_int16)temp0L;
+            pOutputSamples[i*8+1] = (drflac_int16)temp0R;
+            pOutputSamples[i*8+2] = (drflac_int16)temp1L;
+            pOutputSamples[i*8+3] = (drflac_int16)temp1R;
+            pOutputSamples[i*8+4] = (drflac_int16)temp2L;
+            pOutputSamples[i*8+5] = (drflac_int16)temp2R;
+            pOutputSamples[i*8+6] = (drflac_int16)temp3L;
+            pOutputSamples[i*8+7] = (drflac_int16)temp3R;
+        }
+    } else {
+        for (i = 0; i < frameCount4; ++i) {
+            drflac_uint32 temp0L;
+            drflac_uint32 temp1L;
+            drflac_uint32 temp2L;
+            drflac_uint32 temp3L;
+            drflac_uint32 temp0R;
+            drflac_uint32 temp1R;
+            drflac_uint32 temp2R;
+            drflac_uint32 temp3R;
+
+            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+
+            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid0 = (mid0 << 1) | (side0 & 0x01);
+            mid1 = (mid1 << 1) | (side1 & 0x01);
+            mid2 = (mid2 << 1) | (side2 & 0x01);
+            mid3 = (mid3 << 1) | (side3 & 0x01);
+
+            temp0L = ((drflac_int32)(mid0 + side0) >> 1);
+            temp1L = ((drflac_int32)(mid1 + side1) >> 1);
+            temp2L = ((drflac_int32)(mid2 + side2) >> 1);
+            temp3L = ((drflac_int32)(mid3 + side3) >> 1);
+
+            temp0R = ((drflac_int32)(mid0 - side0) >> 1);
+            temp1R = ((drflac_int32)(mid1 - side1) >> 1);
+            temp2R = ((drflac_int32)(mid2 - side2) >> 1);
+            temp3R = ((drflac_int32)(mid3 - side3) >> 1);
+
+            temp0L >>= 16;
+            temp1L >>= 16;
+            temp2L >>= 16;
+            temp3L >>= 16;
+
+            temp0R >>= 16;
+            temp1R >>= 16;
+            temp2R >>= 16;
+            temp3R >>= 16;
+
+            pOutputSamples[i*8+0] = (drflac_int16)temp0L;
+            pOutputSamples[i*8+1] = (drflac_int16)temp0R;
+            pOutputSamples[i*8+2] = (drflac_int16)temp1L;
+            pOutputSamples[i*8+3] = (drflac_int16)temp1R;
+            pOutputSamples[i*8+4] = (drflac_int16)temp2L;
+            pOutputSamples[i*8+5] = (drflac_int16)temp2R;
+            pOutputSamples[i*8+6] = (drflac_int16)temp3L;
+            pOutputSamples[i*8+7] = (drflac_int16)temp3R;
+        }
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+        drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+        mid = (mid << 1) | (side & 0x01);
+
+        pOutputSamples[i*2+0] = (drflac_int16)(((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample) >> 16);
+        pOutputSamples[i*2+1] = (drflac_int16)(((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample) >> 16);
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift = unusedBitsPerSample;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    if (shift == 0) {
+        for (i = 0; i < frameCount4; ++i) {
+            __m128i mid;
+            __m128i side;
+            __m128i left;
+            __m128i right;
+
+            mid   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+            side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+
+            mid   = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
+
+            left  = _mm_srai_epi32(_mm_add_epi32(mid, side), 1);
+            right = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1);
+
+            left  = _mm_srai_epi32(left,  16);
+            right = _mm_srai_epi32(right, 16);
+
+            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int16)(((drflac_int32)(mid + side) >> 1) >> 16);
+            pOutputSamples[i*2+1] = (drflac_int16)(((drflac_int32)(mid - side) >> 1) >> 16);
+        }
+    } else {
+        shift -= 1;
+        for (i = 0; i < frameCount4; ++i) {
+            __m128i mid;
+            __m128i side;
+            __m128i left;
+            __m128i right;
+
+            mid   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+            side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+
+            mid   = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
+
+            left  = _mm_slli_epi32(_mm_add_epi32(mid, side), shift);
+            right = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift);
+
+            left  = _mm_srai_epi32(left,  16);
+            right = _mm_srai_epi32(right, 16);
+
+            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int16)(((mid + side) << shift) >> 16);
+            pOutputSamples[i*2+1] = (drflac_int16)(((mid - side) << shift) >> 16);
+        }
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift = unusedBitsPerSample;
+    int32x4_t wbpsShift0_4; /* wbps = Wasted Bits Per Sample */
+    int32x4_t wbpsShift1_4; /* wbps = Wasted Bits Per Sample */
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    wbpsShift0_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+    wbpsShift1_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+
+    if (shift == 0) {
+        for (i = 0; i < frameCount4; ++i) {
+            uint32x4_t mid;
+            uint32x4_t side;
+            int32x4_t left;
+            int32x4_t right;
+
+            mid   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4);
+            side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4);
+
+            mid   = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1)));
+
+            left  = vshrq_n_s32(vreinterpretq_s32_u32(vaddq_u32(mid, side)), 1);
+            right = vshrq_n_s32(vreinterpretq_s32_u32(vsubq_u32(mid, side)), 1);
+
+            left  = vshrq_n_s32(left,  16);
+            right = vshrq_n_s32(right, 16);
+
+            drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right)));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int16)(((drflac_int32)(mid + side) >> 1) >> 16);
+            pOutputSamples[i*2+1] = (drflac_int16)(((drflac_int32)(mid - side) >> 1) >> 16);
+        }
+    } else {
+        int32x4_t shift4;
+
+        shift -= 1;
+        shift4 = vdupq_n_s32(shift);
+
+        for (i = 0; i < frameCount4; ++i) {
+            uint32x4_t mid;
+            uint32x4_t side;
+            int32x4_t left;
+            int32x4_t right;
+
+            mid   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4);
+            side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4);
+
+            mid   = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1)));
+
+            left  = vreinterpretq_s32_u32(vshlq_u32(vaddq_u32(mid, side), shift4));
+            right = vreinterpretq_s32_u32(vshlq_u32(vsubq_u32(mid, side), shift4));
+
+            left  = vshrq_n_s32(left,  16);
+            right = vshrq_n_s32(right, 16);
+
+            drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right)));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int16)(((mid + side) << shift) >> 16);
+            pOutputSamples[i*2+1] = (drflac_int16)(((mid - side) << shift) >> 16);
+        }
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s16__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s16__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_s16__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_s16__decode_mid_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    for (drflac_uint64 i = 0; i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int16)((drflac_int32)((drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample)) >> 16);
+        pOutputSamples[i*2+1] = (drflac_int16)((drflac_int32)((drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample)) >> 16);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 tempL0 = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 tempL1 = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 tempL2 = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 tempL3 = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 tempR0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 tempR1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 tempR2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 tempR3 = pInputSamples1U32[i*4+3] << shift1;
+
+        tempL0 >>= 16;
+        tempL1 >>= 16;
+        tempL2 >>= 16;
+        tempL3 >>= 16;
+
+        tempR0 >>= 16;
+        tempR1 >>= 16;
+        tempR2 >>= 16;
+        tempR3 >>= 16;
+
+        pOutputSamples[i*8+0] = (drflac_int16)tempL0;
+        pOutputSamples[i*8+1] = (drflac_int16)tempR0;
+        pOutputSamples[i*8+2] = (drflac_int16)tempL1;
+        pOutputSamples[i*8+3] = (drflac_int16)tempR1;
+        pOutputSamples[i*8+4] = (drflac_int16)tempL2;
+        pOutputSamples[i*8+5] = (drflac_int16)tempR2;
+        pOutputSamples[i*8+6] = (drflac_int16)tempL3;
+        pOutputSamples[i*8+7] = (drflac_int16)tempR3;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int16)((pInputSamples0U32[i] << shift0) >> 16);
+        pOutputSamples[i*2+1] = (drflac_int16)((pInputSamples1U32[i] << shift1) >> 16);
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i left  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+
+        left  = _mm_srai_epi32(left,  16);
+        right = _mm_srai_epi32(right, 16);
+
+        /* At this point we have results. We can now pack and interleave these into a single __m128i object and then store the in the output buffer. */
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int16)((pInputSamples0U32[i] << shift0) >> 16);
+        pOutputSamples[i*2+1] = (drflac_int16)((pInputSamples1U32[i] << shift1) >> 16);
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    int32x4_t shift0_4 = vdupq_n_s32(shift0);
+    int32x4_t shift1_4 = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        int32x4_t left;
+        int32x4_t right;
+
+        left  = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4));
+        right = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4));
+
+        left  = vshrq_n_s32(left,  16);
+        right = vshrq_n_s32(right, 16);
+
+        drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right)));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int16)((pInputSamples0U32[i] << shift0) >> 16);
+        pOutputSamples[i*2+1] = (drflac_int16)((pInputSamples1U32[i] << shift1) >> 16);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s16__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s16__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_s16__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_s16__decode_independent_stereo__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+DRFLAC_API drflac_uint64 drflac_read_pcm_frames_s16(drflac* pFlac, drflac_uint64 framesToRead, drflac_int16* pBufferOut)
+{
+    drflac_uint64 framesRead;
+    drflac_uint32 unusedBitsPerSample;
+
+    if (pFlac == NULL || framesToRead == 0) {
+        return 0;
+    }
+
+    if (pBufferOut == NULL) {
+        return drflac__seek_forward_by_pcm_frames(pFlac, framesToRead);
+    }
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 32);
+    unusedBitsPerSample = 32 - pFlac->bitsPerSample;
+
+    framesRead = 0;
+    while (framesToRead > 0) {
+        /* If we've run out of samples in this frame, go to the next. */
+        if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
+            if (!drflac__read_and_decode_next_flac_frame(pFlac)) {
+                break;  /* Couldn't read the next frame, so just break from the loop and return. */
+            }
+        } else {
+            unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment);
+            drflac_uint64 iFirstPCMFrame = pFlac->currentFLACFrame.header.blockSizeInPCMFrames - pFlac->currentFLACFrame.pcmFramesRemaining;
+            drflac_uint64 frameCountThisIteration = framesToRead;
+
+            if (frameCountThisIteration > pFlac->currentFLACFrame.pcmFramesRemaining) {
+                frameCountThisIteration = pFlac->currentFLACFrame.pcmFramesRemaining;
+            }
+
+            if (channelCount == 2) {
+                const drflac_int32* pDecodedSamples0 = pFlac->currentFLACFrame.subframes[0].pSamplesS32 + iFirstPCMFrame;
+                const drflac_int32* pDecodedSamples1 = pFlac->currentFLACFrame.subframes[1].pSamplesS32 + iFirstPCMFrame;
+
+                switch (pFlac->currentFLACFrame.header.channelAssignment)
+                {
+                    case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE:
+                    {
+                        drflac_read_pcm_frames_s16__decode_left_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE:
+                    {
+                        drflac_read_pcm_frames_s16__decode_right_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE:
+                    {
+                        drflac_read_pcm_frames_s16__decode_mid_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT:
+                    default:
+                    {
+                        drflac_read_pcm_frames_s16__decode_independent_stereo(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+                }
+            } else {
+                /* Generic interleaving. */
+                drflac_uint64 i;
+                for (i = 0; i < frameCountThisIteration; ++i) {
+                    unsigned int j;
+                    for (j = 0; j < channelCount; ++j) {
+                        drflac_int32 sampleS32 = (drflac_int32)((drflac_uint32)(pFlac->currentFLACFrame.subframes[j].pSamplesS32[iFirstPCMFrame + i]) << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[j].wastedBitsPerSample));
+                        pBufferOut[(i*channelCount)+j] = (drflac_int16)(sampleS32 >> 16);
+                    }
+                }
+            }
+
+            framesRead                += frameCountThisIteration;
+            pBufferOut                += frameCountThisIteration * channelCount;
+            framesToRead              -= frameCountThisIteration;
+            pFlac->currentPCMFrame    += frameCountThisIteration;
+            pFlac->currentFLACFrame.pcmFramesRemaining -= (drflac_uint32)frameCountThisIteration;
+        }
+    }
+
+    return framesRead;
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    for (i = 0; i < frameCount; ++i) {
+        drflac_uint32 left  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+        drflac_uint32 side  = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+        drflac_uint32 right = left - side;
+
+        pOutputSamples[i*2+0] = (float)((drflac_int32)left  / 2147483648.0);
+        pOutputSamples[i*2+1] = (float)((drflac_int32)right / 2147483648.0);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    float factor = 1 / 2147483648.0;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 left0 = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 left1 = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 left2 = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 left3 = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 side0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 side1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 side2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 side3 = pInputSamples1U32[i*4+3] << shift1;
+
+        drflac_uint32 right0 = left0 - side0;
+        drflac_uint32 right1 = left1 - side1;
+        drflac_uint32 right2 = left2 - side2;
+        drflac_uint32 right3 = left3 - side3;
+
+        pOutputSamples[i*8+0] = (drflac_int32)left0  * factor;
+        pOutputSamples[i*8+1] = (drflac_int32)right0 * factor;
+        pOutputSamples[i*8+2] = (drflac_int32)left1  * factor;
+        pOutputSamples[i*8+3] = (drflac_int32)right1 * factor;
+        pOutputSamples[i*8+4] = (drflac_int32)left2  * factor;
+        pOutputSamples[i*8+5] = (drflac_int32)right2 * factor;
+        pOutputSamples[i*8+6] = (drflac_int32)left3  * factor;
+        pOutputSamples[i*8+7] = (drflac_int32)right3 * factor;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left  * factor;
+        pOutputSamples[i*2+1] = (drflac_int32)right * factor;
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
+    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
+    __m128 factor;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    factor = _mm_set1_ps(1.0f / 8388608.0f);
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i left  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+        __m128i right = _mm_sub_epi32(left, side);
+        __m128 leftf  = _mm_mul_ps(_mm_cvtepi32_ps(left),  factor);
+        __m128 rightf = _mm_mul_ps(_mm_cvtepi32_ps(right), factor);
+
+        _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
+        _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left  / 8388608.0f;
+        pOutputSamples[i*2+1] = (drflac_int32)right / 8388608.0f;
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
+    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
+    float32x4_t factor4;
+    int32x4_t shift0_4;
+    int32x4_t shift1_4;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    factor4  = vdupq_n_f32(1.0f / 8388608.0f);
+    shift0_4 = vdupq_n_s32(shift0);
+    shift1_4 = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        uint32x4_t left;
+        uint32x4_t side;
+        uint32x4_t right;
+        float32x4_t leftf;
+        float32x4_t rightf;
+
+        left   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
+        side   = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
+        right  = vsubq_u32(left, side);
+        leftf  = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(left)),  factor4);
+        rightf = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(right)), factor4);
+
+        drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left  / 8388608.0f;
+        pOutputSamples[i*2+1] = (drflac_int32)right / 8388608.0f;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_f32__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_f32__decode_left_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    for (i = 0; i < frameCount; ++i) {
+        drflac_uint32 side  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+        drflac_uint32 right = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+        drflac_uint32 left  = right + side;
+
+        pOutputSamples[i*2+0] = (float)((drflac_int32)left  / 2147483648.0);
+        pOutputSamples[i*2+1] = (float)((drflac_int32)right / 2147483648.0);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+    float factor = 1 / 2147483648.0;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 side0  = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 side1  = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 side2  = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 side3  = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 right0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 right1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 right2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 right3 = pInputSamples1U32[i*4+3] << shift1;
+
+        drflac_uint32 left0 = right0 + side0;
+        drflac_uint32 left1 = right1 + side1;
+        drflac_uint32 left2 = right2 + side2;
+        drflac_uint32 left3 = right3 + side3;
+
+        pOutputSamples[i*8+0] = (drflac_int32)left0  * factor;
+        pOutputSamples[i*8+1] = (drflac_int32)right0 * factor;
+        pOutputSamples[i*8+2] = (drflac_int32)left1  * factor;
+        pOutputSamples[i*8+3] = (drflac_int32)right1 * factor;
+        pOutputSamples[i*8+4] = (drflac_int32)left2  * factor;
+        pOutputSamples[i*8+5] = (drflac_int32)right2 * factor;
+        pOutputSamples[i*8+6] = (drflac_int32)left3  * factor;
+        pOutputSamples[i*8+7] = (drflac_int32)right3 * factor;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left  * factor;
+        pOutputSamples[i*2+1] = (drflac_int32)right * factor;
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
+    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
+    __m128 factor;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    factor = _mm_set1_ps(1.0f / 8388608.0f);
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+        __m128i left  = _mm_add_epi32(right, side);
+        __m128 leftf  = _mm_mul_ps(_mm_cvtepi32_ps(left),  factor);
+        __m128 rightf = _mm_mul_ps(_mm_cvtepi32_ps(right), factor);
+
+        _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
+        _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left  / 8388608.0f;
+        pOutputSamples[i*2+1] = (drflac_int32)right / 8388608.0f;
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
+    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
+    float32x4_t factor4;
+    int32x4_t shift0_4;
+    int32x4_t shift1_4;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    factor4  = vdupq_n_f32(1.0f / 8388608.0f);
+    shift0_4 = vdupq_n_s32(shift0);
+    shift1_4 = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        uint32x4_t side;
+        uint32x4_t right;
+        uint32x4_t left;
+        float32x4_t leftf;
+        float32x4_t rightf;
+
+        side   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
+        right  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
+        left   = vaddq_u32(right, side);
+        leftf  = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(left)),  factor4);
+        rightf = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(right)), factor4);
+
+        drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left  / 8388608.0f;
+        pOutputSamples[i*2+1] = (drflac_int32)right / 8388608.0f;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_f32__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_f32__decode_right_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    for (drflac_uint64 i = 0; i < frameCount; ++i) {
+        drflac_uint32 mid  = (drflac_uint32)pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+        drflac_uint32 side = (drflac_uint32)pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+        mid = (mid << 1) | (side & 0x01);
+
+        pOutputSamples[i*2+0] = (float)((((drflac_int32)(mid + side) >> 1) << (unusedBitsPerSample)) / 2147483648.0);
+        pOutputSamples[i*2+1] = (float)((((drflac_int32)(mid - side) >> 1) << (unusedBitsPerSample)) / 2147483648.0);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift = unusedBitsPerSample;
+    float factor = 1 / 2147483648.0;
+
+    if (shift > 0) {
+        shift -= 1;
+        for (i = 0; i < frameCount4; ++i) {
+            drflac_uint32 temp0L;
+            drflac_uint32 temp1L;
+            drflac_uint32 temp2L;
+            drflac_uint32 temp3L;
+            drflac_uint32 temp0R;
+            drflac_uint32 temp1R;
+            drflac_uint32 temp2R;
+            drflac_uint32 temp3R;
+
+            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+
+            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid0 = (mid0 << 1) | (side0 & 0x01);
+            mid1 = (mid1 << 1) | (side1 & 0x01);
+            mid2 = (mid2 << 1) | (side2 & 0x01);
+            mid3 = (mid3 << 1) | (side3 & 0x01);
+
+            temp0L = (mid0 + side0) << shift;
+            temp1L = (mid1 + side1) << shift;
+            temp2L = (mid2 + side2) << shift;
+            temp3L = (mid3 + side3) << shift;
+
+            temp0R = (mid0 - side0) << shift;
+            temp1R = (mid1 - side1) << shift;
+            temp2R = (mid2 - side2) << shift;
+            temp3R = (mid3 - side3) << shift;
+
+            pOutputSamples[i*8+0] = (drflac_int32)temp0L * factor;
+            pOutputSamples[i*8+1] = (drflac_int32)temp0R * factor;
+            pOutputSamples[i*8+2] = (drflac_int32)temp1L * factor;
+            pOutputSamples[i*8+3] = (drflac_int32)temp1R * factor;
+            pOutputSamples[i*8+4] = (drflac_int32)temp2L * factor;
+            pOutputSamples[i*8+5] = (drflac_int32)temp2R * factor;
+            pOutputSamples[i*8+6] = (drflac_int32)temp3L * factor;
+            pOutputSamples[i*8+7] = (drflac_int32)temp3R * factor;
+        }
+    } else {
+        for (i = 0; i < frameCount4; ++i) {
+            drflac_uint32 temp0L;
+            drflac_uint32 temp1L;
+            drflac_uint32 temp2L;
+            drflac_uint32 temp3L;
+            drflac_uint32 temp0R;
+            drflac_uint32 temp1R;
+            drflac_uint32 temp2R;
+            drflac_uint32 temp3R;
+
+            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+
+            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid0 = (mid0 << 1) | (side0 & 0x01);
+            mid1 = (mid1 << 1) | (side1 & 0x01);
+            mid2 = (mid2 << 1) | (side2 & 0x01);
+            mid3 = (mid3 << 1) | (side3 & 0x01);
+
+            temp0L = (drflac_uint32)((drflac_int32)(mid0 + side0) >> 1);
+            temp1L = (drflac_uint32)((drflac_int32)(mid1 + side1) >> 1);
+            temp2L = (drflac_uint32)((drflac_int32)(mid2 + side2) >> 1);
+            temp3L = (drflac_uint32)((drflac_int32)(mid3 + side3) >> 1);
+
+            temp0R = (drflac_uint32)((drflac_int32)(mid0 - side0) >> 1);
+            temp1R = (drflac_uint32)((drflac_int32)(mid1 - side1) >> 1);
+            temp2R = (drflac_uint32)((drflac_int32)(mid2 - side2) >> 1);
+            temp3R = (drflac_uint32)((drflac_int32)(mid3 - side3) >> 1);
+
+            pOutputSamples[i*8+0] = (drflac_int32)temp0L * factor;
+            pOutputSamples[i*8+1] = (drflac_int32)temp0R * factor;
+            pOutputSamples[i*8+2] = (drflac_int32)temp1L * factor;
+            pOutputSamples[i*8+3] = (drflac_int32)temp1R * factor;
+            pOutputSamples[i*8+4] = (drflac_int32)temp2L * factor;
+            pOutputSamples[i*8+5] = (drflac_int32)temp2R * factor;
+            pOutputSamples[i*8+6] = (drflac_int32)temp3L * factor;
+            pOutputSamples[i*8+7] = (drflac_int32)temp3R * factor;
+        }
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+        drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+        mid = (mid << 1) | (side & 0x01);
+
+        pOutputSamples[i*2+0] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample) * factor;
+        pOutputSamples[i*2+1] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample) * factor;
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift = unusedBitsPerSample - 8;
+    float factor;
+    __m128 factor128;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    factor = 1.0f / 8388608.0f;
+    factor128 = _mm_set1_ps(factor);
+
+    if (shift == 0) {
+        for (i = 0; i < frameCount4; ++i) {
+            __m128i mid;
+            __m128i side;
+            __m128i tempL;
+            __m128i tempR;
+            __m128  leftf;
+            __m128  rightf;
+
+            mid    = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+            side   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+
+            mid    = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
+
+            tempL  = _mm_srai_epi32(_mm_add_epi32(mid, side), 1);
+            tempR  = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1);
+
+            leftf  = _mm_mul_ps(_mm_cvtepi32_ps(tempL), factor128);
+            rightf = _mm_mul_ps(_mm_cvtepi32_ps(tempR), factor128);
+
+            _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
+            _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = ((drflac_int32)(mid + side) >> 1) * factor;
+            pOutputSamples[i*2+1] = ((drflac_int32)(mid - side) >> 1) * factor;
+        }
+    } else {
+        shift -= 1;
+        for (i = 0; i < frameCount4; ++i) {
+            __m128i mid;
+            __m128i side;
+            __m128i tempL;
+            __m128i tempR;
+            __m128 leftf;
+            __m128 rightf;
+
+            mid    = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+            side   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+
+            mid    = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
+
+            tempL  = _mm_slli_epi32(_mm_add_epi32(mid, side), shift);
+            tempR  = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift);
+
+            leftf  = _mm_mul_ps(_mm_cvtepi32_ps(tempL), factor128);
+            rightf = _mm_mul_ps(_mm_cvtepi32_ps(tempR), factor128);
+
+            _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
+            _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int32)((mid + side) << shift) * factor;
+            pOutputSamples[i*2+1] = (drflac_int32)((mid - side) << shift) * factor;
+        }
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift = unusedBitsPerSample - 8;
+    float factor;
+    float32x4_t factor4;
+    int32x4_t shift4;
+    int32x4_t wbps0_4;  /* Wasted Bits Per Sample */
+    int32x4_t wbps1_4;  /* Wasted Bits Per Sample */
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    factor  = 1.0f / 8388608.0f;
+    factor4 = vdupq_n_f32(factor);
+    wbps0_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+    wbps1_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+
+    if (shift == 0) {
+        for (i = 0; i < frameCount4; ++i) {
+            int32x4_t lefti;
+            int32x4_t righti;
+            float32x4_t leftf;
+            float32x4_t rightf;
+
+            uint32x4_t mid  = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbps0_4);
+            uint32x4_t side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbps1_4);
+
+            mid    = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1)));
+
+            lefti  = vshrq_n_s32(vreinterpretq_s32_u32(vaddq_u32(mid, side)), 1);
+            righti = vshrq_n_s32(vreinterpretq_s32_u32(vsubq_u32(mid, side)), 1);
+
+            leftf  = vmulq_f32(vcvtq_f32_s32(lefti),  factor4);
+            rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4);
+
+            drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = ((drflac_int32)(mid + side) >> 1) * factor;
+            pOutputSamples[i*2+1] = ((drflac_int32)(mid - side) >> 1) * factor;
+        }
+    } else {
+        shift -= 1;
+        shift4 = vdupq_n_s32(shift);
+        for (i = 0; i < frameCount4; ++i) {
+            uint32x4_t mid;
+            uint32x4_t side;
+            int32x4_t lefti;
+            int32x4_t righti;
+            float32x4_t leftf;
+            float32x4_t rightf;
+
+            mid    = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbps0_4);
+            side   = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbps1_4);
+
+            mid    = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1)));
+
+            lefti  = vreinterpretq_s32_u32(vshlq_u32(vaddq_u32(mid, side), shift4));
+            righti = vreinterpretq_s32_u32(vshlq_u32(vsubq_u32(mid, side), shift4));
+
+            leftf  = vmulq_f32(vcvtq_f32_s32(lefti),  factor4);
+            rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4);
+
+            drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int32)((mid + side) << shift) * factor;
+            pOutputSamples[i*2+1] = (drflac_int32)((mid - side) << shift) * factor;
+        }
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_f32__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_f32__decode_mid_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    for (drflac_uint64 i = 0; i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (float)((drflac_int32)((drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample)) / 2147483648.0);
+        pOutputSamples[i*2+1] = (float)((drflac_int32)((drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample)) / 2147483648.0);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+    float factor = 1 / 2147483648.0;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 tempL0 = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 tempL1 = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 tempL2 = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 tempL3 = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 tempR0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 tempR1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 tempR2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 tempR3 = pInputSamples1U32[i*4+3] << shift1;
+
+        pOutputSamples[i*8+0] = (drflac_int32)tempL0 * factor;
+        pOutputSamples[i*8+1] = (drflac_int32)tempR0 * factor;
+        pOutputSamples[i*8+2] = (drflac_int32)tempL1 * factor;
+        pOutputSamples[i*8+3] = (drflac_int32)tempR1 * factor;
+        pOutputSamples[i*8+4] = (drflac_int32)tempL2 * factor;
+        pOutputSamples[i*8+5] = (drflac_int32)tempR2 * factor;
+        pOutputSamples[i*8+6] = (drflac_int32)tempL3 * factor;
+        pOutputSamples[i*8+7] = (drflac_int32)tempR3 * factor;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0) * factor;
+        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1) * factor;
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
+    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
+
+    float factor = 1.0f / 8388608.0f;
+    __m128 factor128 = _mm_set1_ps(factor);
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i lefti;
+        __m128i righti;
+        __m128 leftf;
+        __m128 rightf;
+
+        lefti  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        righti = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+
+        leftf  = _mm_mul_ps(_mm_cvtepi32_ps(lefti),  factor128);
+        rightf = _mm_mul_ps(_mm_cvtepi32_ps(righti), factor128);
+
+        _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
+        _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0) * factor;
+        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1) * factor;
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
+    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
+
+    float factor = 1.0f / 8388608.0f;
+    float32x4_t factor4 = vdupq_n_f32(factor);
+    int32x4_t shift0_4  = vdupq_n_s32(shift0);
+    int32x4_t shift1_4  = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        int32x4_t lefti;
+        int32x4_t righti;
+        float32x4_t leftf;
+        float32x4_t rightf;
+
+        lefti  = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4));
+        righti = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4));
+
+        leftf  = vmulq_f32(vcvtq_f32_s32(lefti),  factor4);
+        rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4);
+
+        drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0) * factor;
+        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1) * factor;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_f32__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_f32__decode_independent_stereo__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+DRFLAC_API drflac_uint64 drflac_read_pcm_frames_f32(drflac* pFlac, drflac_uint64 framesToRead, float* pBufferOut)
+{
+    drflac_uint64 framesRead;
+    drflac_uint32 unusedBitsPerSample;
+
+    if (pFlac == NULL || framesToRead == 0) {
+        return 0;
+    }
+
+    if (pBufferOut == NULL) {
+        return drflac__seek_forward_by_pcm_frames(pFlac, framesToRead);
+    }
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 32);
+    unusedBitsPerSample = 32 - pFlac->bitsPerSample;
+
+    framesRead = 0;
+    while (framesToRead > 0) {
+        /* If we've run out of samples in this frame, go to the next. */
+        if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
+            if (!drflac__read_and_decode_next_flac_frame(pFlac)) {
+                break;  /* Couldn't read the next frame, so just break from the loop and return. */
+            }
+        } else {
+            unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment);
+            drflac_uint64 iFirstPCMFrame = pFlac->currentFLACFrame.header.blockSizeInPCMFrames - pFlac->currentFLACFrame.pcmFramesRemaining;
+            drflac_uint64 frameCountThisIteration = framesToRead;
+
+            if (frameCountThisIteration > pFlac->currentFLACFrame.pcmFramesRemaining) {
+                frameCountThisIteration = pFlac->currentFLACFrame.pcmFramesRemaining;
+            }
+
+            if (channelCount == 2) {
+                const drflac_int32* pDecodedSamples0 = pFlac->currentFLACFrame.subframes[0].pSamplesS32 + iFirstPCMFrame;
+                const drflac_int32* pDecodedSamples1 = pFlac->currentFLACFrame.subframes[1].pSamplesS32 + iFirstPCMFrame;
+
+                switch (pFlac->currentFLACFrame.header.channelAssignment)
+                {
+                    case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE:
+                    {
+                        drflac_read_pcm_frames_f32__decode_left_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE:
+                    {
+                        drflac_read_pcm_frames_f32__decode_right_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE:
+                    {
+                        drflac_read_pcm_frames_f32__decode_mid_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT:
+                    default:
+                    {
+                        drflac_read_pcm_frames_f32__decode_independent_stereo(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+                }
+            } else {
+                /* Generic interleaving. */
+                drflac_uint64 i;
+                for (i = 0; i < frameCountThisIteration; ++i) {
+                    unsigned int j;
+                    for (j = 0; j < channelCount; ++j) {
+                        drflac_int32 sampleS32 = (drflac_int32)((drflac_uint32)(pFlac->currentFLACFrame.subframes[j].pSamplesS32[iFirstPCMFrame + i]) << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[j].wastedBitsPerSample));
+                        pBufferOut[(i*channelCount)+j] = (float)(sampleS32 / 2147483648.0);
+                    }
+                }
+            }
+
+            framesRead                += frameCountThisIteration;
+            pBufferOut                += frameCountThisIteration * channelCount;
+            framesToRead              -= frameCountThisIteration;
+            pFlac->currentPCMFrame    += frameCountThisIteration;
+            pFlac->currentFLACFrame.pcmFramesRemaining -= (unsigned int)frameCountThisIteration;
+        }
+    }
+
+    return framesRead;
+}
+
+
+DRFLAC_API drflac_bool32 drflac_seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex)
+{
+    if (pFlac == NULL) {
+        return DRFLAC_FALSE;
+    }
+
+    /* Don't do anything if we're already on the seek point. */
+    if (pFlac->currentPCMFrame == pcmFrameIndex) {
+        return DRFLAC_TRUE;
+    }
+
+    /*
+    If we don't know where the first frame begins then we can't seek. This will happen when the STREAMINFO block was not present
+    when the decoder was opened.
+    */
+    if (pFlac->firstFLACFramePosInBytes == 0) {
+        return DRFLAC_FALSE;
+    }
+
+    if (pcmFrameIndex == 0) {
+        pFlac->currentPCMFrame = 0;
+        return drflac__seek_to_first_frame(pFlac);
+    } else {
+        drflac_bool32 wasSuccessful = DRFLAC_FALSE;
+
+        /* Clamp the sample to the end. */
+        if (pcmFrameIndex > pFlac->totalPCMFrameCount) {
+            pcmFrameIndex = pFlac->totalPCMFrameCount;
+        }
+
+        /* If the target sample and the current sample are in the same frame we just move the position forward. */
+        if (pcmFrameIndex > pFlac->currentPCMFrame) {
+            /* Forward. */
+            drflac_uint32 offset = (drflac_uint32)(pcmFrameIndex - pFlac->currentPCMFrame);
+            if (pFlac->currentFLACFrame.pcmFramesRemaining >  offset) {
+                pFlac->currentFLACFrame.pcmFramesRemaining -= offset;
+                pFlac->currentPCMFrame = pcmFrameIndex;
+                return DRFLAC_TRUE;
+            }
+        } else {
+            /* Backward. */
+            drflac_uint32 offsetAbs = (drflac_uint32)(pFlac->currentPCMFrame - pcmFrameIndex);
+            drflac_uint32 currentFLACFramePCMFrameCount = pFlac->currentFLACFrame.header.blockSizeInPCMFrames;
+            drflac_uint32 currentFLACFramePCMFramesConsumed = currentFLACFramePCMFrameCount - pFlac->currentFLACFrame.pcmFramesRemaining;
+            if (currentFLACFramePCMFramesConsumed > offsetAbs) {
+                pFlac->currentFLACFrame.pcmFramesRemaining += offsetAbs;
+                pFlac->currentPCMFrame = pcmFrameIndex;
+                return DRFLAC_TRUE;
+            }
+        }
+
+        /*
+        Different techniques depending on encapsulation. Using the native FLAC seektable with Ogg encapsulation is a bit awkward so
+        we'll instead use Ogg's natural seeking facility.
+        */
+#ifndef DR_FLAC_NO_OGG
+        if (pFlac->container == drflac_container_ogg)
+        {
+            wasSuccessful = drflac_ogg__seek_to_pcm_frame(pFlac, pcmFrameIndex);
+        }
+        else
+#endif
+        {
+            /* First try seeking via the seek table. If this fails, fall back to a brute force seek which is much slower. */
+            if (/*!wasSuccessful && */!pFlac->_noSeekTableSeek) {
+                wasSuccessful = drflac__seek_to_pcm_frame__seek_table(pFlac, pcmFrameIndex);
+            }
+
+#if !defined(DR_FLAC_NO_CRC)
+            /* Fall back to binary search if seek table seeking fails. This requires the length of the stream to be known. */
+            if (!wasSuccessful && !pFlac->_noBinarySearchSeek && pFlac->totalPCMFrameCount > 0) {
+                wasSuccessful = drflac__seek_to_pcm_frame__binary_search(pFlac, pcmFrameIndex);
+            }
+#endif
+
+            /* Fall back to brute force if all else fails. */
+            if (!wasSuccessful && !pFlac->_noBruteForceSeek) {
+                wasSuccessful = drflac__seek_to_pcm_frame__brute_force(pFlac, pcmFrameIndex);
+            }
+        }
+
+        pFlac->currentPCMFrame = pcmFrameIndex;
+        return wasSuccessful;
+    }
+}
+
+
+
+/* High Level APIs */
+
+#if defined(SIZE_MAX)
+    #define DRFLAC_SIZE_MAX  SIZE_MAX
+#else
+    #if defined(DRFLAC_64BIT)
+        #define DRFLAC_SIZE_MAX  ((drflac_uint64)0xFFFFFFFFFFFFFFFF)
+    #else
+        #define DRFLAC_SIZE_MAX  0xFFFFFFFF
+    #endif
+#endif
+
+
+/* Using a macro as the definition of the drflac__full_decode_and_close_*() API family. Sue me. */
+#define DRFLAC_DEFINE_FULL_READ_AND_CLOSE(extension, type) \
+static type* drflac__full_read_and_close_ ## extension (drflac* pFlac, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut)\
+{                                                                                                                                                                   \
+    type* pSampleData = NULL;                                                                                                                                       \
+    drflac_uint64 totalPCMFrameCount;                                                                                                                               \
+                                                                                                                                                                    \
+    DRFLAC_ASSERT(pFlac != NULL);                                                                                                                                   \
+                                                                                                                                                                    \
+    totalPCMFrameCount = pFlac->totalPCMFrameCount;                                                                                                                 \
+                                                                                                                                                                    \
+    if (totalPCMFrameCount == 0) {                                                                                                                                  \
+        type buffer[4096];                                                                                                                                          \
+        drflac_uint64 pcmFramesRead;                                                                                                                                \
+        size_t sampleDataBufferSize = sizeof(buffer);                                                                                                               \
+                                                                                                                                                                    \
+        pSampleData = (type*)drflac__malloc_from_callbacks(sampleDataBufferSize, &pFlac->allocationCallbacks);                                                      \
+        if (pSampleData == NULL) {                                                                                                                                  \
+            goto on_error;                                                                                                                                          \
+        }                                                                                                                                                           \
+                                                                                                                                                                    \
+        while ((pcmFramesRead = (drflac_uint64)drflac_read_pcm_frames_##extension(pFlac, sizeof(buffer)/sizeof(buffer[0])/pFlac->channels, buffer)) > 0) {          \
+            if (((totalPCMFrameCount + pcmFramesRead) * pFlac->channels * sizeof(type)) > sampleDataBufferSize) {                                                   \
+                type* pNewSampleData;                                                                                                                               \
+                size_t newSampleDataBufferSize;                                                                                                                     \
+                                                                                                                                                                    \
+                newSampleDataBufferSize = sampleDataBufferSize * 2;                                                                                                 \
+                pNewSampleData = (type*)drflac__realloc_from_callbacks(pSampleData, newSampleDataBufferSize, sampleDataBufferSize, &pFlac->allocationCallbacks);    \
+                if (pNewSampleData == NULL) {                                                                                                                       \
+                    drflac__free_from_callbacks(pSampleData, &pFlac->allocationCallbacks);                                                                          \
+                    goto on_error;                                                                                                                                  \
+                }                                                                                                                                                   \
+                                                                                                                                                                    \
+                sampleDataBufferSize = newSampleDataBufferSize;                                                                                                     \
+                pSampleData = pNewSampleData;                                                                                                                       \
+            }                                                                                                                                                       \
+                                                                                                                                                                    \
+            DRFLAC_COPY_MEMORY(pSampleData + (totalPCMFrameCount*pFlac->channels), buffer, (size_t)(pcmFramesRead*pFlac->channels*sizeof(type)));                   \
+            totalPCMFrameCount += pcmFramesRead;                                                                                                                    \
+        }                                                                                                                                                           \
+                                                                                                                                                                    \
+        /* At this point everything should be decoded, but we just want to fill the unused part buffer with silence - need to                                       \
+           protect those ears from random noise! */                                                                                                                 \
+        DRFLAC_ZERO_MEMORY(pSampleData + (totalPCMFrameCount*pFlac->channels), (size_t)(sampleDataBufferSize - totalPCMFrameCount*pFlac->channels*sizeof(type)));   \
+    } else {                                                                                                                                                        \
+        drflac_uint64 dataSize = totalPCMFrameCount*pFlac->channels*sizeof(type);                                                                                   \
+        if (dataSize > DRFLAC_SIZE_MAX) {                                                                                                                           \
+            goto on_error;  /* The decoded data is too big. */                                                                                                      \
+        }                                                                                                                                                           \
+                                                                                                                                                                    \
+        pSampleData = (type*)drflac__malloc_from_callbacks((size_t)dataSize, &pFlac->allocationCallbacks);    /* <-- Safe cast as per the check above. */           \
+        if (pSampleData == NULL) {                                                                                                                                  \
+            goto on_error;                                                                                                                                          \
+        }                                                                                                                                                           \
+                                                                                                                                                                    \
+        totalPCMFrameCount = drflac_read_pcm_frames_##extension(pFlac, pFlac->totalPCMFrameCount, pSampleData);                                                     \
+    }                                                                                                                                                               \
+                                                                                                                                                                    \
+    if (sampleRateOut) *sampleRateOut = pFlac->sampleRate;                                                                                                          \
+    if (channelsOut) *channelsOut = pFlac->channels;                                                                                                                \
+    if (totalPCMFrameCountOut) *totalPCMFrameCountOut = totalPCMFrameCount;                                                                                         \
+                                                                                                                                                                    \
+    drflac_close(pFlac);                                                                                                                                            \
+    return pSampleData;                                                                                                                                             \
+                                                                                                                                                                    \
+on_error:                                                                                                                                                           \
+    drflac_close(pFlac);                                                                                                                                            \
+    return NULL;                                                                                                                                                    \
+}
+
+DRFLAC_DEFINE_FULL_READ_AND_CLOSE(s32, drflac_int32)
+DRFLAC_DEFINE_FULL_READ_AND_CLOSE(s16, drflac_int16)
+DRFLAC_DEFINE_FULL_READ_AND_CLOSE(f32, float)
+
+DRFLAC_API drflac_int32* drflac_open_and_read_pcm_frames_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalPCMFrameCountOut) {
+        *totalPCMFrameCountOut = 0;
+    }
+
+    pFlac = drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_s32(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut);
+}
+
+DRFLAC_API drflac_int16* drflac_open_and_read_pcm_frames_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalPCMFrameCountOut) {
+        *totalPCMFrameCountOut = 0;
+    }
+
+    pFlac = drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_s16(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut);
+}
+
+DRFLAC_API float* drflac_open_and_read_pcm_frames_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalPCMFrameCountOut) {
+        *totalPCMFrameCountOut = 0;
+    }
+
+    pFlac = drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_f32(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut);
+}
+
+#ifndef DR_FLAC_NO_STDIO
+DRFLAC_API drflac_int32* drflac_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalPCMFrameCount) {
+        *totalPCMFrameCount = 0;
+    }
+
+    pFlac = drflac_open_file(filename, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_s32(pFlac, channels, sampleRate, totalPCMFrameCount);
+}
+
+DRFLAC_API drflac_int16* drflac_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalPCMFrameCount) {
+        *totalPCMFrameCount = 0;
+    }
+
+    pFlac = drflac_open_file(filename, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_s16(pFlac, channels, sampleRate, totalPCMFrameCount);
+}
+
+DRFLAC_API float* drflac_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalPCMFrameCount) {
+        *totalPCMFrameCount = 0;
+    }
+
+    pFlac = drflac_open_file(filename, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_f32(pFlac, channels, sampleRate, totalPCMFrameCount);
+}
+#endif
+
+DRFLAC_API drflac_int32* drflac_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalPCMFrameCount) {
+        *totalPCMFrameCount = 0;
+    }
+
+    pFlac = drflac_open_memory(data, dataSize, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_s32(pFlac, channels, sampleRate, totalPCMFrameCount);
+}
+
+DRFLAC_API drflac_int16* drflac_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalPCMFrameCount) {
+        *totalPCMFrameCount = 0;
+    }
+
+    pFlac = drflac_open_memory(data, dataSize, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_s16(pFlac, channels, sampleRate, totalPCMFrameCount);
+}
+
+DRFLAC_API float* drflac_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalPCMFrameCount) {
+        *totalPCMFrameCount = 0;
+    }
+
+    pFlac = drflac_open_memory(data, dataSize, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_f32(pFlac, channels, sampleRate, totalPCMFrameCount);
+}
+
+
+DRFLAC_API void drflac_free(void* p, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pAllocationCallbacks != NULL) {
+        drflac__free_from_callbacks(p, pAllocationCallbacks);
+    } else {
+        drflac__free_default(p, NULL);
+    }
+}
+
+
+
+
+DRFLAC_API void drflac_init_vorbis_comment_iterator(drflac_vorbis_comment_iterator* pIter, drflac_uint32 commentCount, const void* pComments)
+{
+    if (pIter == NULL) {
+        return;
+    }
+
+    pIter->countRemaining = commentCount;
+    pIter->pRunningData   = (const char*)pComments;
+}
+
+DRFLAC_API const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, drflac_uint32* pCommentLengthOut)
+{
+    drflac_int32 length;
+    const char* pComment;
+
+    /* Safety. */
+    if (pCommentLengthOut) {
+        *pCommentLengthOut = 0;
+    }
+
+    if (pIter == NULL || pIter->countRemaining == 0 || pIter->pRunningData == NULL) {
+        return NULL;
+    }
+
+    length = drflac__le2host_32(*(const drflac_uint32*)pIter->pRunningData);
+    pIter->pRunningData += 4;
+
+    pComment = pIter->pRunningData;
+    pIter->pRunningData += length;
+    pIter->countRemaining -= 1;
+
+    if (pCommentLengthOut) {
+        *pCommentLengthOut = length;
+    }
+
+    return pComment;
+}
+
+
+
+
+DRFLAC_API void drflac_init_cuesheet_track_iterator(drflac_cuesheet_track_iterator* pIter, drflac_uint32 trackCount, const void* pTrackData)
+{
+    if (pIter == NULL) {
+        return;
+    }
+
+    pIter->countRemaining = trackCount;
+    pIter->pRunningData   = (const char*)pTrackData;
+}
+
+DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, drflac_cuesheet_track* pCuesheetTrack)
+{
+    drflac_cuesheet_track cuesheetTrack;
+    const char* pRunningData;
+    drflac_uint64 offsetHi;
+    drflac_uint64 offsetLo;
+
+    if (pIter == NULL || pIter->countRemaining == 0 || pIter->pRunningData == NULL) {
+        return DRFLAC_FALSE;
+    }
+
+    pRunningData = pIter->pRunningData;
+
+    offsetHi                   = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+    offsetLo                   = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+    cuesheetTrack.offset       = offsetLo | (offsetHi << 32);
+    cuesheetTrack.trackNumber  = pRunningData[0];                                         pRunningData += 1;
+    DRFLAC_COPY_MEMORY(cuesheetTrack.ISRC, pRunningData, sizeof(cuesheetTrack.ISRC));     pRunningData += 12;
+    cuesheetTrack.isAudio      = (pRunningData[0] & 0x80) != 0;
+    cuesheetTrack.preEmphasis  = (pRunningData[0] & 0x40) != 0;                           pRunningData += 14;
+    cuesheetTrack.indexCount   = pRunningData[0];                                         pRunningData += 1;
+    cuesheetTrack.pIndexPoints = (const drflac_cuesheet_track_index*)pRunningData;        pRunningData += cuesheetTrack.indexCount * sizeof(drflac_cuesheet_track_index);
+
+    pIter->pRunningData = pRunningData;
+    pIter->countRemaining -= 1;
+
+    if (pCuesheetTrack) {
+        *pCuesheetTrack = cuesheetTrack;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+#if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)))
+    #pragma GCC diagnostic pop
+#endif
+#endif  /* dr_flac_c */
+#endif  /* DR_FLAC_IMPLEMENTATION */
+
+
+/*
+REVISION HISTORY
+================
+v0.12.28 - 2021-02-21
+  - Fix a warning due to referencing _MSC_VER when it is undefined.
+
+v0.12.27 - 2021-01-31
+  - Fix a static analysis warning.
+
+v0.12.26 - 2021-01-17
+  - Fix a compilation warning due to _BSD_SOURCE being deprecated.
+
+v0.12.25 - 2020-12-26
+  - Update documentation.
+
+v0.12.24 - 2020-11-29
+  - Fix ARM64/NEON detection when compiling with MSVC.
+
+v0.12.23 - 2020-11-21
+  - Fix compilation with OpenWatcom.
+
+v0.12.22 - 2020-11-01
+  - Fix an error with the previous release.
+
+v0.12.21 - 2020-11-01
+  - Fix a possible deadlock when seeking.
+  - Improve compiler support for older versions of GCC.
+
+v0.12.20 - 2020-09-08
+  - Fix a compilation error on older compilers.
+
+v0.12.19 - 2020-08-30
+  - Fix a bug due to an undefined 32-bit shift.
+
+v0.12.18 - 2020-08-14
+  - Fix a crash when compiling with clang-cl.
+
+v0.12.17 - 2020-08-02
+  - Simplify sized types.
+
+v0.12.16 - 2020-07-25
+  - Fix a compilation warning.
+
+v0.12.15 - 2020-07-06
+  - Check for negative LPC shifts and return an error.
+
+v0.12.14 - 2020-06-23
+  - Add include guard for the implementation section.
+
+v0.12.13 - 2020-05-16
+  - Add compile-time and run-time version querying.
+    - DRFLAC_VERSION_MINOR
+    - DRFLAC_VERSION_MAJOR
+    - DRFLAC_VERSION_REVISION
+    - DRFLAC_VERSION_STRING
+    - drflac_version()
+    - drflac_version_string()
+
+v0.12.12 - 2020-04-30
+  - Fix compilation errors with VC6.
+
+v0.12.11 - 2020-04-19
+  - Fix some pedantic warnings.
+  - Fix some undefined behaviour warnings.
+
+v0.12.10 - 2020-04-10
+  - Fix some bugs when trying to seek with an invalid seek table.
+
+v0.12.9 - 2020-04-05
+  - Fix warnings.
+
+v0.12.8 - 2020-04-04
+  - Add drflac_open_file_w() and drflac_open_file_with_metadata_w().
+  - Fix some static analysis warnings.
+  - Minor documentation updates.
+
+v0.12.7 - 2020-03-14
+  - Fix compilation errors with VC6.
+
+v0.12.6 - 2020-03-07
+  - Fix compilation error with Visual Studio .NET 2003.
+
+v0.12.5 - 2020-01-30
+  - Silence some static analysis warnings.
+
+v0.12.4 - 2020-01-29
+  - Silence some static analysis warnings.
+
+v0.12.3 - 2019-12-02
+  - Fix some warnings when compiling with GCC and the -Og flag.
+  - Fix a crash in out-of-memory situations.
+  - Fix potential integer overflow bug.
+  - Fix some static analysis warnings.
+  - Fix a possible crash when using custom memory allocators without a custom realloc() implementation.
+  - Fix a bug with binary search seeking where the bits per sample is not a multiple of 8.
+
+v0.12.2 - 2019-10-07
+  - Internal code clean up.
+
+v0.12.1 - 2019-09-29
+  - Fix some Clang Static Analyzer warnings.
+  - Fix an unused variable warning.
+
+v0.12.0 - 2019-09-23
+  - API CHANGE: Add support for user defined memory allocation routines. This system allows the program to specify their own memory allocation
+    routines with a user data pointer for client-specific contextual data. This adds an extra parameter to the end of the following APIs:
+    - drflac_open()
+    - drflac_open_relaxed()
+    - drflac_open_with_metadata()
+    - drflac_open_with_metadata_relaxed()
+    - drflac_open_file()
+    - drflac_open_file_with_metadata()
+    - drflac_open_memory()
+    - drflac_open_memory_with_metadata()
+    - drflac_open_and_read_pcm_frames_s32()
+    - drflac_open_and_read_pcm_frames_s16()
+    - drflac_open_and_read_pcm_frames_f32()
+    - drflac_open_file_and_read_pcm_frames_s32()
+    - drflac_open_file_and_read_pcm_frames_s16()
+    - drflac_open_file_and_read_pcm_frames_f32()
+    - drflac_open_memory_and_read_pcm_frames_s32()
+    - drflac_open_memory_and_read_pcm_frames_s16()
+    - drflac_open_memory_and_read_pcm_frames_f32()
+    Set this extra parameter to NULL to use defaults which is the same as the previous behaviour. Setting this NULL will use
+    DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE.
+  - Remove deprecated APIs:
+    - drflac_read_s32()
+    - drflac_read_s16()
+    - drflac_read_f32()
+    - drflac_seek_to_sample()
+    - drflac_open_and_decode_s32()
+    - drflac_open_and_decode_s16()
+    - drflac_open_and_decode_f32()
+    - drflac_open_and_decode_file_s32()
+    - drflac_open_and_decode_file_s16()
+    - drflac_open_and_decode_file_f32()
+    - drflac_open_and_decode_memory_s32()
+    - drflac_open_and_decode_memory_s16()
+    - drflac_open_and_decode_memory_f32()
+  - Remove drflac.totalSampleCount which is now replaced with drflac.totalPCMFrameCount. You can emulate drflac.totalSampleCount
+    by doing pFlac->totalPCMFrameCount*pFlac->channels.
+  - Rename drflac.currentFrame to drflac.currentFLACFrame to remove ambiguity with PCM frames.
+  - Fix errors when seeking to the end of a stream.
+  - Optimizations to seeking.
+  - SSE improvements and optimizations.
+  - ARM NEON optimizations.
+  - Optimizations to drflac_read_pcm_frames_s16().
+  - Optimizations to drflac_read_pcm_frames_s32().
+
+v0.11.10 - 2019-06-26
+  - Fix a compiler error.
+
+v0.11.9 - 2019-06-16
+  - Silence some ThreadSanitizer warnings.
+
+v0.11.8 - 2019-05-21
+  - Fix warnings.
+
+v0.11.7 - 2019-05-06
+  - C89 fixes.
+
+v0.11.6 - 2019-05-05
+  - Add support for C89.
+  - Fix a compiler warning when CRC is disabled.
+  - Change license to choice of public domain or MIT-0.
+
+v0.11.5 - 2019-04-19
+  - Fix a compiler error with GCC.
+
+v0.11.4 - 2019-04-17
+  - Fix some warnings with GCC when compiling with -std=c99.
+
+v0.11.3 - 2019-04-07
+  - Silence warnings with GCC.
+
+v0.11.2 - 2019-03-10
+  - Fix a warning.
+
+v0.11.1 - 2019-02-17
+  - Fix a potential bug with seeking.
+
+v0.11.0 - 2018-12-16
+  - API CHANGE: Deprecated drflac_read_s32(), drflac_read_s16() and drflac_read_f32() and replaced them with
+    drflac_read_pcm_frames_s32(), drflac_read_pcm_frames_s16() and drflac_read_pcm_frames_f32(). The new APIs take
+    and return PCM frame counts instead of sample counts. To upgrade you will need to change the input count by
+    dividing it by the channel count, and then do the same with the return value.
+  - API_CHANGE: Deprecated drflac_seek_to_sample() and replaced with drflac_seek_to_pcm_frame(). Same rules as
+    the changes to drflac_read_*() apply.
+  - API CHANGE: Deprecated drflac_open_and_decode_*() and replaced with drflac_open_*_and_read_*(). Same rules as
+    the changes to drflac_read_*() apply.
+  - Optimizations.
+
+v0.10.0 - 2018-09-11
+  - Remove the DR_FLAC_NO_WIN32_IO option and the Win32 file IO functionality. If you need to use Win32 file IO you
+    need to do it yourself via the callback API.
+  - Fix the clang build.
+  - Fix undefined behavior.
+  - Fix errors with CUESHEET metdata blocks.
+  - Add an API for iterating over each cuesheet track in the CUESHEET metadata block. This works the same way as the
+    Vorbis comment API.
+  - Other miscellaneous bug fixes, mostly relating to invalid FLAC streams.
+  - Minor optimizations.
+
+v0.9.11 - 2018-08-29
+  - Fix a bug with sample reconstruction.
+
+v0.9.10 - 2018-08-07
+  - Improve 64-bit detection.
+
+v0.9.9 - 2018-08-05
+  - Fix C++ build on older versions of GCC.
+
+v0.9.8 - 2018-07-24
+  - Fix compilation errors.
+
+v0.9.7 - 2018-07-05
+  - Fix a warning.
+
+v0.9.6 - 2018-06-29
+  - Fix some typos.
+
+v0.9.5 - 2018-06-23
+  - Fix some warnings.
+
+v0.9.4 - 2018-06-14
+  - Optimizations to seeking.
+  - Clean up.
+
+v0.9.3 - 2018-05-22
+  - Bug fix.
+
+v0.9.2 - 2018-05-12
+  - Fix a compilation error due to a missing break statement.
+
+v0.9.1 - 2018-04-29
+  - Fix compilation error with Clang.
+
+v0.9 - 2018-04-24
+  - Fix Clang build.
+  - Start using major.minor.revision versioning.
+
+v0.8g - 2018-04-19
+  - Fix build on non-x86/x64 architectures.
+
+v0.8f - 2018-02-02
+  - Stop pretending to support changing rate/channels mid stream.
+
+v0.8e - 2018-02-01
+  - Fix a crash when the block size of a frame is larger than the maximum block size defined by the FLAC stream.
+  - Fix a crash the the Rice partition order is invalid.
+
+v0.8d - 2017-09-22
+  - Add support for decoding streams with ID3 tags. ID3 tags are just skipped.
+
+v0.8c - 2017-09-07
+  - Fix warning on non-x86/x64 architectures.
+
+v0.8b - 2017-08-19
+  - Fix build on non-x86/x64 architectures.
+
+v0.8a - 2017-08-13
+  - A small optimization for the Clang build.
+
+v0.8 - 2017-08-12
+  - API CHANGE: Rename dr_* types to drflac_*.
+  - Optimizations. This brings dr_flac back to about the same class of efficiency as the reference implementation.
+  - Add support for custom implementations of malloc(), realloc(), etc.
+  - Add CRC checking to Ogg encapsulated streams.
+  - Fix VC++ 6 build. This is only for the C++ compiler. The C compiler is not currently supported.
+  - Bug fixes.
+
+v0.7 - 2017-07-23
+  - Add support for opening a stream without a header block. To do this, use drflac_open_relaxed() / drflac_open_with_metadata_relaxed().
+
+v0.6 - 2017-07-22
+  - Add support for recovering from invalid frames. With this change, dr_flac will simply skip over invalid frames as if they
+    never existed. Frames are checked against their sync code, the CRC-8 of the frame header and the CRC-16 of the whole frame.
+
+v0.5 - 2017-07-16
+  - Fix typos.
+  - Change drflac_bool* types to unsigned.
+  - Add CRC checking. This makes dr_flac slower, but can be disabled with #define DR_FLAC_NO_CRC.
+
+v0.4f - 2017-03-10
+  - Fix a couple of bugs with the bitstreaming code.
+
+v0.4e - 2017-02-17
+  - Fix some warnings.
+
+v0.4d - 2016-12-26
+  - Add support for 32-bit floating-point PCM decoding.
+  - Use drflac_int* and drflac_uint* sized types to improve compiler support.
+  - Minor improvements to documentation.
+
+v0.4c - 2016-12-26
+  - Add support for signed 16-bit integer PCM decoding.
+
+v0.4b - 2016-10-23
+  - A minor change to drflac_bool8 and drflac_bool32 types.
+
+v0.4a - 2016-10-11
+  - Rename drBool32 to drflac_bool32 for styling consistency.
+
+v0.4 - 2016-09-29
+  - API/ABI CHANGE: Use fixed size 32-bit booleans instead of the built-in bool type.
+  - API CHANGE: Rename drflac_open_and_decode*() to drflac_open_and_decode*_s32().
+  - API CHANGE: Swap the order of "channels" and "sampleRate" parameters in drflac_open_and_decode*(). Rationale for this is to
+    keep it consistent with drflac_audio.
+
+v0.3f - 2016-09-21
+  - Fix a warning with GCC.
+
+v0.3e - 2016-09-18
+  - Fixed a bug where GCC 4.3+ was not getting properly identified.
+  - Fixed a few typos.
+  - Changed date formats to ISO 8601 (YYYY-MM-DD).
+
+v0.3d - 2016-06-11
+  - Minor clean up.
+
+v0.3c - 2016-05-28
+  - Fixed compilation error.
+
+v0.3b - 2016-05-16
+  - Fixed Linux/GCC build.
+  - Updated documentation.
+
+v0.3a - 2016-05-15
+  - Minor fixes to documentation.
+
+v0.3 - 2016-05-11
+  - Optimizations. Now at about parity with the reference implementation on 32-bit builds.
+  - Lots of clean up.
+
+v0.2b - 2016-05-10
+  - Bug fixes.
+
+v0.2a - 2016-05-10
+  - Made drflac_open_and_decode() more robust.
+  - Removed an unused debugging variable
+
+v0.2 - 2016-05-09
+  - Added support for Ogg encapsulation.
+  - API CHANGE. Have the onSeek callback take a third argument which specifies whether or not the seek
+    should be relative to the start or the current position. Also changes the seeking rules such that
+    seeking offsets will never be negative.
+  - Have drflac_open_and_decode() fail gracefully if the stream has an unknown total sample count.
+
+v0.1b - 2016-05-07
+  - Properly close the file handle in drflac_open_file() and family when the decoder fails to initialize.
+  - Removed a stale comment.
+
+v0.1a - 2016-05-05
+  - Minor formatting changes.
+  - Fixed a warning on the GCC build.
+
+v0.1 - 2016-05-03
+  - Initial versioned release.
+*/
+
+/*
+This software is available as a choice of the following licenses. Choose
+whichever you prefer.
+
+===============================================================================
+ALTERNATIVE 1 - Public Domain (www.unlicense.org)
+===============================================================================
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org/>
+
+===============================================================================
+ALTERNATIVE 2 - MIT No Attribution
+===============================================================================
+Copyright 2020 David Reid
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
diff --git a/deps/libchdr/include/libchdr/bitstream.h b/deps/libchdr/include/libchdr/bitstream.h
new file mode 100644
index 00000000..d376373b
--- /dev/null
+++ b/deps/libchdr/include/libchdr/bitstream.h
@@ -0,0 +1,43 @@
+/* license:BSD-3-Clause
+ * copyright-holders:Aaron Giles
+***************************************************************************
+
+    bitstream.h
+
+    Helper classes for reading/writing at the bit level.
+
+***************************************************************************/
+
+#pragma once
+
+#ifndef __BITSTREAM_H__
+#define __BITSTREAM_H__
+
+#include <stdint.h>
+
+/***************************************************************************
+ *  TYPE DEFINITIONS
+ ***************************************************************************
+ */
+
+/* helper class for reading from a bit buffer */
+struct bitstream
+{
+	uint32_t          buffer;       /* current bit accumulator */
+	int               bits;         /* number of bits in the accumulator */
+	const uint8_t *   read;         /* read pointer */
+	uint32_t          doffset;      /* byte offset within the data */
+	uint32_t          dlength;      /* length of the data */
+};
+
+struct bitstream* 	create_bitstream(const void *src, uint32_t srclength);
+int 				bitstream_overflow(struct bitstream* bitstream);
+uint32_t 			bitstream_read_offset(struct bitstream* bitstream);
+
+uint32_t 			bitstream_read(struct bitstream* bitstream, int numbits);
+uint32_t 			bitstream_peek(struct bitstream* bitstream, int numbits);
+void 				bitstream_remove(struct bitstream* bitstream, int numbits);
+uint32_t 			bitstream_flush(struct bitstream* bitstream);
+
+
+#endif
diff --git a/deps/libchdr/include/libchdr/cdrom.h b/deps/libchdr/include/libchdr/cdrom.h
new file mode 100644
index 00000000..816e6a5c
--- /dev/null
+++ b/deps/libchdr/include/libchdr/cdrom.h
@@ -0,0 +1,110 @@
+/* license:BSD-3-Clause
+ * copyright-holders:Aaron Giles
+***************************************************************************
+
+    cdrom.h
+
+    Generic MAME cd-rom implementation
+
+***************************************************************************/
+
+#pragma once
+
+#ifndef __CDROM_H__
+#define __CDROM_H__
+
+#include <stdint.h>
+#include <libchdr/chdconfig.h>
+
+/***************************************************************************
+    CONSTANTS
+***************************************************************************/
+
+/* tracks are padded to a multiple of this many frames */
+#define CD_TRACK_PADDING   	(4)
+#define CD_MAX_TRACKS           (99)    /* AFAIK the theoretical limit */
+#define CD_MAX_SECTOR_DATA      (2352)
+#define CD_MAX_SUBCODE_DATA     (96)
+
+#define CD_FRAME_SIZE           (CD_MAX_SECTOR_DATA + CD_MAX_SUBCODE_DATA)
+#define CD_FRAMES_PER_HUNK      (8)
+
+#define CD_METADATA_WORDS       (1+(CD_MAX_TRACKS * 6))
+
+enum
+{
+	CD_TRACK_MODE1 = 0,         /* mode 1 2048 bytes/sector */
+	CD_TRACK_MODE1_RAW,         /* mode 1 2352 bytes/sector */
+	CD_TRACK_MODE2,             /* mode 2 2336 bytes/sector */
+	CD_TRACK_MODE2_FORM1,       /* mode 2 2048 bytes/sector */
+	CD_TRACK_MODE2_FORM2,       /* mode 2 2324 bytes/sector */
+	CD_TRACK_MODE2_FORM_MIX,    /* mode 2 2336 bytes/sector */
+	CD_TRACK_MODE2_RAW,         /* mode 2 2352 bytes / sector */
+	CD_TRACK_AUDIO,         /* redbook audio track 2352 bytes/sector (588 samples) */
+
+	CD_TRACK_RAW_DONTCARE       /* special flag for cdrom_read_data: just return me whatever is there */
+};
+
+enum
+{
+	CD_SUB_NORMAL = 0,          /* "cooked" 96 bytes per sector */
+	CD_SUB_RAW,                 /* raw uninterleaved 96 bytes per sector */
+	CD_SUB_NONE                 /* no subcode data stored */
+};
+
+#define CD_FLAG_GDROM   0x00000001  /* disc is a GD-ROM, all tracks should be stored with GD-ROM metadata */
+#define CD_FLAG_GDROMLE 0x00000002  /* legacy GD-ROM, with little-endian CDDA data */
+
+/***************************************************************************
+    FUNCTION PROTOTYPES
+***************************************************************************/
+
+#ifdef WANT_RAW_DATA_SECTOR
+/* ECC utilities */
+int ecc_verify(const uint8_t *sector);
+void ecc_generate(uint8_t *sector);
+void ecc_clear(uint8_t *sector);
+#endif
+
+
+
+/***************************************************************************
+    INLINE FUNCTIONS
+***************************************************************************/
+
+static inline uint32_t msf_to_lba(uint32_t msf)
+{
+	return ( ((msf&0x00ff0000)>>16) * 60 * 75) + (((msf&0x0000ff00)>>8) * 75) + ((msf&0x000000ff)>>0);
+}
+
+static inline uint32_t lba_to_msf(uint32_t lba)
+{
+	uint8_t m, s, f;
+
+	m = lba / (60 * 75);
+	lba -= m * (60 * 75);
+	s = lba / 75;
+	f = lba % 75;
+
+	return ((m / 10) << 20) | ((m % 10) << 16) |
+			((s / 10) << 12) | ((s % 10) <<  8) |
+			((f / 10) <<  4) | ((f % 10) <<  0);
+}
+
+/**
+ * segacd needs it like this.. investigate
+ * Angelo also says PCE tracks often start playing at the
+ * wrong address.. related?
+ **/
+static inline uint32_t lba_to_msf_alt(int lba)
+{
+	uint32_t ret = 0;
+
+	ret |= ((lba / (60 * 75))&0xff)<<16;
+	ret |= (((lba / 75) % 60)&0xff)<<8;
+	ret |= ((lba % 75)&0xff)<<0;
+
+	return ret;
+}
+
+#endif  /* __CDROM_H__ */
diff --git a/deps/libchdr/include/libchdr/chd.h b/deps/libchdr/include/libchdr/chd.h
new file mode 100644
index 00000000..a37edc3f
--- /dev/null
+++ b/deps/libchdr/include/libchdr/chd.h
@@ -0,0 +1,427 @@
+/***************************************************************************
+
+    chd.h
+
+    MAME Compressed Hunks of Data file format
+
+****************************************************************************
+
+    Copyright Aaron Giles
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+
+        * Redistributions of source code must retain the above copyright
+          notice, this list of conditions and the following disclaimer.
+        * Redistributions in binary form must reproduce the above copyright
+          notice, this list of conditions and the following disclaimer in
+          the documentation and/or other materials provided with the
+          distribution.
+        * Neither the name 'MAME' nor the names of its contributors may be
+          used to endorse or promote products derived from this software
+          without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY AARON GILES ''AS IS'' AND ANY EXPRESS OR
+    IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+    DISCLAIMED. IN NO EVENT SHALL AARON GILES BE LIABLE FOR ANY DIRECT,
+    INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+    STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+    IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+    POSSIBILITY OF SUCH DAMAGE.
+
+***************************************************************************/
+
+#pragma once
+
+#ifndef __CHD_H__
+#define __CHD_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <libchdr/coretypes.h>
+#include <libchdr/chdconfig.h>
+
+/***************************************************************************
+
+    Compressed Hunks of Data header format. All numbers are stored in
+    Motorola (big-endian) byte ordering. The header is 76 (V1) or 80 (V2)
+    bytes long.
+
+    V1 header:
+
+    [  0] char   tag[8];        // 'MComprHD'
+    [  8] UINT32 length;        // length of header (including tag and length fields)
+    [ 12] UINT32 version;       // drive format version
+    [ 16] UINT32 flags;         // flags (see below)
+    [ 20] UINT32 compression;   // compression type
+    [ 24] UINT32 hunksize;      // 512-byte sectors per hunk
+    [ 28] UINT32 totalhunks;    // total # of hunks represented
+    [ 32] UINT32 cylinders;     // number of cylinders on hard disk
+    [ 36] UINT32 heads;         // number of heads on hard disk
+    [ 40] UINT32 sectors;       // number of sectors on hard disk
+    [ 44] UINT8  md5[16];       // MD5 checksum of raw data
+    [ 60] UINT8  parentmd5[16]; // MD5 checksum of parent file
+    [ 76] (V1 header length)
+
+    V2 header:
+
+    [  0] char   tag[8];        // 'MComprHD'
+    [  8] UINT32 length;        // length of header (including tag and length fields)
+    [ 12] UINT32 version;       // drive format version
+    [ 16] UINT32 flags;         // flags (see below)
+    [ 20] UINT32 compression;   // compression type
+    [ 24] UINT32 hunksize;      // seclen-byte sectors per hunk
+    [ 28] UINT32 totalhunks;    // total # of hunks represented
+    [ 32] UINT32 cylinders;     // number of cylinders on hard disk
+    [ 36] UINT32 heads;         // number of heads on hard disk
+    [ 40] UINT32 sectors;       // number of sectors on hard disk
+    [ 44] UINT8  md5[16];       // MD5 checksum of raw data
+    [ 60] UINT8  parentmd5[16]; // MD5 checksum of parent file
+    [ 76] UINT32 seclen;        // number of bytes per sector
+    [ 80] (V2 header length)
+
+    V3 header:
+
+    [  0] char   tag[8];        // 'MComprHD'
+    [  8] UINT32 length;        // length of header (including tag and length fields)
+    [ 12] UINT32 version;       // drive format version
+    [ 16] UINT32 flags;         // flags (see below)
+    [ 20] UINT32 compression;   // compression type
+    [ 24] UINT32 totalhunks;    // total # of hunks represented
+    [ 28] UINT64 logicalbytes;  // logical size of the data (in bytes)
+    [ 36] UINT64 metaoffset;    // offset to the first blob of metadata
+    [ 44] UINT8  md5[16];       // MD5 checksum of raw data
+    [ 60] UINT8  parentmd5[16]; // MD5 checksum of parent file
+    [ 76] UINT32 hunkbytes;     // number of bytes per hunk
+    [ 80] UINT8  sha1[20];      // SHA1 checksum of raw data
+    [100] UINT8  parentsha1[20];// SHA1 checksum of parent file
+    [120] (V3 header length)
+
+    V4 header:
+
+    [  0] char   tag[8];        // 'MComprHD'
+    [  8] UINT32 length;        // length of header (including tag and length fields)
+    [ 12] UINT32 version;       // drive format version
+    [ 16] UINT32 flags;         // flags (see below)
+    [ 20] UINT32 compression;   // compression type
+    [ 24] UINT32 totalhunks;    // total # of hunks represented
+    [ 28] UINT64 logicalbytes;  // logical size of the data (in bytes)
+    [ 36] UINT64 metaoffset;    // offset to the first blob of metadata
+    [ 44] UINT32 hunkbytes;     // number of bytes per hunk
+    [ 48] UINT8  sha1[20];      // combined raw+meta SHA1
+    [ 68] UINT8  parentsha1[20];// combined raw+meta SHA1 of parent
+    [ 88] UINT8  rawsha1[20];   // raw data SHA1
+    [108] (V4 header length)
+
+    Flags:
+        0x00000001 - set if this drive has a parent
+        0x00000002 - set if this drive allows writes
+
+   =========================================================================
+
+    V5 header:
+
+    [  0] char   tag[8];        // 'MComprHD'
+    [  8] uint32_t length;        // length of header (including tag and length fields)
+    [ 12] uint32_t version;       // drive format version
+    [ 16] uint32_t compressors[4];// which custom compressors are used?
+    [ 32] uint64_t logicalbytes;  // logical size of the data (in bytes)
+    [ 40] uint64_t mapoffset;     // offset to the map
+    [ 48] uint64_t metaoffset;    // offset to the first blob of metadata
+    [ 56] uint32_t hunkbytes;     // number of bytes per hunk (512k maximum)
+    [ 60] uint32_t unitbytes;     // number of bytes per unit within each hunk
+    [ 64] uint8_t  rawsha1[20];   // raw data SHA1
+    [ 84] uint8_t  sha1[20];      // combined raw+meta SHA1
+    [104] uint8_t  parentsha1[20];// combined raw+meta SHA1 of parent
+    [124] (V5 header length)
+
+    If parentsha1 != 0, we have a parent (no need for flags)
+    If compressors[0] == 0, we are uncompressed (including maps)
+
+    V5 uncompressed map format:
+
+    [  0] uint32_t offset;        // starting offset / hunk size
+
+    V5 compressed map format header:
+
+    [  0] uint32_t length;        // length of compressed map
+    [  4] UINT48 datastart;     // offset of first block
+    [ 10] uint16_t crc;           // crc-16 of the map
+    [ 12] uint8_t lengthbits;     // bits used to encode complength
+    [ 13] uint8_t hunkbits;       // bits used to encode self-refs
+    [ 14] uint8_t parentunitbits; // bits used to encode parent unit refs
+    [ 15] uint8_t reserved;       // future use
+    [ 16] (compressed header length)
+
+    Each compressed map entry, once expanded, looks like:
+
+    [  0] uint8_t compression;    // compression type
+    [  1] UINT24 complength;    // compressed length
+    [  4] UINT48 offset;        // offset
+    [ 10] uint16_t crc;           // crc-16 of the data
+
+***************************************************************************/
+
+
+/***************************************************************************
+    CONSTANTS
+***************************************************************************/
+
+/* header information */
+#define CHD_HEADER_VERSION			5
+#define CHD_V1_HEADER_SIZE			76
+#define CHD_V2_HEADER_SIZE			80
+#define CHD_V3_HEADER_SIZE			120
+#define CHD_V4_HEADER_SIZE			108
+#define CHD_V5_HEADER_SIZE          124
+
+#define CHD_MAX_HEADER_SIZE			CHD_V5_HEADER_SIZE
+
+/* checksumming information */
+#define CHD_MD5_BYTES				16
+#define CHD_SHA1_BYTES				20
+
+/* CHD global flags */
+#define CHDFLAGS_HAS_PARENT			0x00000001
+#define CHDFLAGS_IS_WRITEABLE		0x00000002
+#define CHDFLAGS_UNDEFINED			0xfffffffc
+
+#define CHD_MAKE_TAG(a,b,c,d)       (((a) << 24) | ((b) << 16) | ((c) << 8) | (d))
+
+/* compression types */
+#define CHDCOMPRESSION_NONE			0
+#define CHDCOMPRESSION_ZLIB			1
+#define CHDCOMPRESSION_ZLIB_PLUS	2
+#define CHDCOMPRESSION_AV			3
+
+#define CHD_CODEC_NONE 0
+#define CHD_CODEC_ZLIB				CHD_MAKE_TAG('z','l','i','b')
+/* general codecs with CD frontend */
+#define CHD_CODEC_CD_ZLIB			CHD_MAKE_TAG('c','d','z','l')
+#define CHD_CODEC_CD_LZMA			CHD_MAKE_TAG('c','d','l','z')
+#define CHD_CODEC_CD_FLAC			CHD_MAKE_TAG('c','d','f','l')
+
+/* A/V codec configuration parameters */
+#define AV_CODEC_COMPRESS_CONFIG	1
+#define AV_CODEC_DECOMPRESS_CONFIG	2
+
+/* metadata parameters */
+#define CHDMETATAG_WILDCARD			0
+#define CHD_METAINDEX_APPEND		((UINT32)-1)
+
+/* metadata flags */
+#define CHD_MDFLAGS_CHECKSUM		0x01		/* indicates data is checksummed */
+
+/* standard hard disk metadata */
+#define HARD_DISK_METADATA_TAG		CHD_MAKE_TAG('G','D','D','D')
+#define HARD_DISK_METADATA_FORMAT	"CYLS:%d,HEADS:%d,SECS:%d,BPS:%d"
+
+/* hard disk identify information */
+#define HARD_DISK_IDENT_METADATA_TAG CHD_MAKE_TAG('I','D','N','T')
+
+/* hard disk key information */
+#define HARD_DISK_KEY_METADATA_TAG	CHD_MAKE_TAG('K','E','Y',' ')
+
+/* pcmcia CIS information */
+#define PCMCIA_CIS_METADATA_TAG		CHD_MAKE_TAG('C','I','S',' ')
+
+/* standard CD-ROM metadata */
+#define CDROM_OLD_METADATA_TAG		CHD_MAKE_TAG('C','H','C','D')
+#define CDROM_TRACK_METADATA_TAG	CHD_MAKE_TAG('C','H','T','R')
+#define CDROM_TRACK_METADATA_FORMAT	"TRACK:%d TYPE:%s SUBTYPE:%s FRAMES:%d"
+#define CDROM_TRACK_METADATA2_TAG	CHD_MAKE_TAG('C','H','T','2')
+#define CDROM_TRACK_METADATA2_FORMAT	"TRACK:%d TYPE:%s SUBTYPE:%s FRAMES:%d PREGAP:%d PGTYPE:%s PGSUB:%s POSTGAP:%d"
+#define GDROM_OLD_METADATA_TAG		CHD_MAKE_TAG('C','H','G','T')
+#define GDROM_TRACK_METADATA_TAG	CHD_MAKE_TAG('C', 'H', 'G', 'D')
+#define GDROM_TRACK_METADATA_FORMAT	"TRACK:%d TYPE:%s SUBTYPE:%s FRAMES:%d PAD:%d PREGAP:%d PGTYPE:%s PGSUB:%s POSTGAP:%d"
+
+/* standard A/V metadata */
+#define AV_METADATA_TAG				CHD_MAKE_TAG('A','V','A','V')
+#define AV_METADATA_FORMAT			"FPS:%d.%06d WIDTH:%d HEIGHT:%d INTERLACED:%d CHANNELS:%d SAMPLERATE:%d"
+
+/* A/V laserdisc frame metadata */
+#define AV_LD_METADATA_TAG			CHD_MAKE_TAG('A','V','L','D')
+
+/* CHD open values */
+#define CHD_OPEN_READ				1
+#define CHD_OPEN_READWRITE			2
+
+/* error types */
+enum _chd_error
+{
+	CHDERR_NONE,
+	CHDERR_NO_INTERFACE,
+	CHDERR_OUT_OF_MEMORY,
+	CHDERR_INVALID_FILE,
+	CHDERR_INVALID_PARAMETER,
+	CHDERR_INVALID_DATA,
+	CHDERR_FILE_NOT_FOUND,
+	CHDERR_REQUIRES_PARENT,
+	CHDERR_FILE_NOT_WRITEABLE,
+	CHDERR_READ_ERROR,
+	CHDERR_WRITE_ERROR,
+	CHDERR_CODEC_ERROR,
+	CHDERR_INVALID_PARENT,
+	CHDERR_HUNK_OUT_OF_RANGE,
+	CHDERR_DECOMPRESSION_ERROR,
+	CHDERR_COMPRESSION_ERROR,
+	CHDERR_CANT_CREATE_FILE,
+	CHDERR_CANT_VERIFY,
+	CHDERR_NOT_SUPPORTED,
+	CHDERR_METADATA_NOT_FOUND,
+	CHDERR_INVALID_METADATA_SIZE,
+	CHDERR_UNSUPPORTED_VERSION,
+	CHDERR_VERIFY_INCOMPLETE,
+	CHDERR_INVALID_METADATA,
+	CHDERR_INVALID_STATE,
+	CHDERR_OPERATION_PENDING,
+	CHDERR_NO_ASYNC_OPERATION,
+	CHDERR_UNSUPPORTED_FORMAT
+};
+typedef enum _chd_error chd_error;
+
+
+
+/***************************************************************************
+    TYPE DEFINITIONS
+***************************************************************************/
+
+/* opaque types */
+typedef struct _chd_file chd_file;
+
+
+/* extract header structure (NOT the on-disk header structure) */
+typedef struct _chd_header chd_header;
+struct _chd_header
+{
+	UINT32		length;						/* length of header data */
+	UINT32		version;					/* drive format version */
+	UINT32		flags;						/* flags field */
+	UINT32		compression[4];				/* compression type */
+	UINT32		hunkbytes;					/* number of bytes per hunk */
+	UINT32		totalhunks;					/* total # of hunks represented */
+	UINT64		logicalbytes;				/* logical size of the data */
+	UINT64		metaoffset;					/* offset in file of first metadata */
+	UINT64		mapoffset;					/* TOOD V5 */
+	UINT8		md5[CHD_MD5_BYTES];			/* overall MD5 checksum */
+	UINT8		parentmd5[CHD_MD5_BYTES];	/* overall MD5 checksum of parent */
+	UINT8		sha1[CHD_SHA1_BYTES];		/* overall SHA1 checksum */
+	UINT8		rawsha1[CHD_SHA1_BYTES];	/* SHA1 checksum of raw data */
+	UINT8		parentsha1[CHD_SHA1_BYTES];	/* overall SHA1 checksum of parent */
+	UINT32		unitbytes;					/* TODO V5 */
+	UINT64		unitcount;					/* TODO V5 */
+    UINT32      hunkcount;                  /* TODO V5 */
+
+    /* map information */
+    UINT32      mapentrybytes;              /* length of each entry in a map (V5) */
+    UINT8*      rawmap;                     /* raw map data */
+
+	UINT32		obsolete_cylinders;			/* obsolete field -- do not use! */
+	UINT32		obsolete_sectors;			/* obsolete field -- do not use! */
+	UINT32		obsolete_heads;				/* obsolete field -- do not use! */
+	UINT32		obsolete_hunksize;			/* obsolete field -- do not use! */
+};
+
+
+/* structure for returning information about a verification pass */
+typedef struct _chd_verify_result chd_verify_result;
+struct _chd_verify_result
+{
+	UINT8		md5[CHD_MD5_BYTES];			/* overall MD5 checksum */
+	UINT8		sha1[CHD_SHA1_BYTES];		/* overall SHA1 checksum */
+	UINT8		rawsha1[CHD_SHA1_BYTES];	/* SHA1 checksum of raw data */
+	UINT8		metasha1[CHD_SHA1_BYTES];	/* SHA1 checksum of metadata */
+};
+
+
+
+/***************************************************************************
+    FUNCTION PROTOTYPES
+***************************************************************************/
+
+#ifdef _MSC_VER
+#ifdef CHD_DLL
+#ifdef CHD_DLL_EXPORTS
+#define CHD_EXPORT __declspec(dllexport)
+#else
+#define CHD_EXPORT __declspec(dllimport)
+#endif
+#else
+#define CHD_EXPORT
+#endif
+#else
+#define CHD_EXPORT __attribute__ ((visibility("default")))
+#endif
+
+/* ----- CHD file management ----- */
+
+/* create a new CHD file fitting the given description */
+/* chd_error chd_create(const char *filename, UINT64 logicalbytes, UINT32 hunkbytes, UINT32 compression, chd_file *parent); */
+
+/* same as chd_create(), but accepts an already-opened core_file object */
+/* chd_error chd_create_file(core_file *file, UINT64 logicalbytes, UINT32 hunkbytes, UINT32 compression, chd_file *parent); */
+
+/* open an existing CHD file */
+CHD_EXPORT chd_error chd_open_file(core_file *file, int mode, chd_file *parent, chd_file **chd);
+CHD_EXPORT chd_error chd_open(const char *filename, int mode, chd_file *parent, chd_file **chd);
+
+/* precache underlying file */
+CHD_EXPORT chd_error chd_precache(chd_file *chd);
+
+/* close a CHD file */
+CHD_EXPORT void chd_close(chd_file *chd);
+
+/* return the associated core_file */
+CHD_EXPORT core_file *chd_core_file(chd_file *chd);
+
+/* return an error string for the given CHD error */
+CHD_EXPORT const char *chd_error_string(chd_error err);
+
+
+
+/* ----- CHD header management ----- */
+
+/* return a pointer to the extracted CHD header data */
+CHD_EXPORT const chd_header *chd_get_header(chd_file *chd);
+
+/* read CHD header data from file into the pointed struct */
+CHD_EXPORT chd_error chd_read_header(const char *filename, chd_header *header);
+
+
+
+/* ----- core data read/write ----- */
+
+/* read one hunk from the CHD file */
+CHD_EXPORT chd_error chd_read(chd_file *chd, UINT32 hunknum, void *buffer);
+
+
+
+/* ----- metadata management ----- */
+
+/* get indexed metadata of a particular sort */
+CHD_EXPORT chd_error chd_get_metadata(chd_file *chd, UINT32 searchtag, UINT32 searchindex, void *output, UINT32 outputlen, UINT32 *resultlen, UINT32 *resulttag, UINT8 *resultflags);
+
+
+
+
+/* ----- codec interfaces ----- */
+
+/* set internal codec parameters */
+CHD_EXPORT chd_error chd_codec_config(chd_file *chd, int param, void *config);
+
+/* return a string description of a codec */
+CHD_EXPORT const char *chd_get_codec_name(UINT32 codec);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __CHD_H__ */
diff --git a/deps/libchdr/include/libchdr/chdconfig.h b/deps/libchdr/include/libchdr/chdconfig.h
new file mode 100644
index 00000000..752038b4
--- /dev/null
+++ b/deps/libchdr/include/libchdr/chdconfig.h
@@ -0,0 +1,10 @@
+#ifndef __CHDCONFIG_H__
+#define __CHDCONFIG_H__
+
+/* Configure CHDR features here */
+#define WANT_RAW_DATA_SECTOR    1
+#define WANT_SUBCODE            1
+#define NEED_CACHE_HUNK         1
+#define VERIFY_BLOCK_CRC        1
+
+#endif
diff --git a/deps/libchdr/include/libchdr/coretypes.h b/deps/libchdr/include/libchdr/coretypes.h
new file mode 100644
index 00000000..30f892f6
--- /dev/null
+++ b/deps/libchdr/include/libchdr/coretypes.h
@@ -0,0 +1,48 @@
+#ifndef __CORETYPES_H__
+#define __CORETYPES_H__
+
+#include <stdint.h>
+#include <stdio.h>
+
+#ifdef USE_LIBRETRO_VFS
+#include <streams/file_stream_transforms.h>
+#endif
+
+#define ARRAY_LENGTH(x) (sizeof(x)/sizeof(x[0]))
+
+typedef uint64_t UINT64;
+typedef uint32_t UINT32;
+typedef uint16_t UINT16;
+typedef uint8_t UINT8;
+
+typedef int64_t INT64;
+typedef int32_t INT32;
+typedef int16_t INT16;
+typedef int8_t INT8;
+
+#define core_file FILE
+#define core_fopen(file) fopen(file, "rb")
+#if defined(__WIN32__) || defined(_WIN32) || defined(WIN32) || defined(__WIN64__)
+	#define core_fseek _fseeki64
+	#define core_ftell _ftelli64
+#elif defined(_LARGEFILE_SOURCE) && defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS == 64
+	#define core_fseek fseeko64
+	#define core_ftell ftello64
+#else
+	#define core_fseek fseeko
+	#define core_ftell ftello
+#endif
+#define core_fread(fc, buff, len) fread(buff, 1, len, fc)
+#define core_fclose fclose
+
+static UINT64 core_fsize(core_file *f)
+{
+    UINT64 rv;
+    UINT64 p = core_ftell(f);
+    core_fseek(f, 0, SEEK_END);
+    rv = core_ftell(f);
+    core_fseek(f, p, SEEK_SET);
+    return rv;
+}
+
+#endif
diff --git a/deps/libchdr/include/libchdr/flac.h b/deps/libchdr/include/libchdr/flac.h
new file mode 100644
index 00000000..bff255b2
--- /dev/null
+++ b/deps/libchdr/include/libchdr/flac.h
@@ -0,0 +1,50 @@
+/* license:BSD-3-Clause
+ * copyright-holders:Aaron Giles
+ ***************************************************************************
+
+    flac.h
+
+    FLAC compression wrappers
+
+***************************************************************************/
+
+#pragma once
+
+#ifndef __FLAC_H__
+#define __FLAC_H__
+
+#include <stdint.h>
+
+/***************************************************************************
+ *  TYPE DEFINITIONS
+ ***************************************************************************
+ */
+
+typedef struct _flac_decoder flac_decoder;
+struct _flac_decoder {
+		/* output state */
+	void *                  decoder;				/* actual encoder */
+	uint32_t                sample_rate;			/* decoded sample rate */
+	uint8_t                 channels;				/* decoded number of channels */
+	uint8_t                 bits_per_sample;		/* decoded bits per sample */
+	uint32_t                compressed_offset;		/* current offset in compressed data */
+	const uint8_t *         compressed_start;		/* start of compressed data */
+	uint32_t                compressed_length;		/* length of compressed data */
+	const uint8_t *         compressed2_start;		/* start of compressed data */
+	uint32_t                compressed2_length;		/* length of compressed data */
+	int16_t *               uncompressed_start[8];	/* pointer to start of uncompressed data (up to 8 streams) */
+	uint32_t                uncompressed_offset;	/* current position in uncompressed data */
+	uint32_t                uncompressed_length;	/* length of uncompressed data */
+	int                    	uncompressed_swap;		/* swap uncompressed sample data */
+	uint8_t                 custom_header[0x2a];	/* custom header */
+};
+
+/* ======================> flac_decoder */
+
+int 		flac_decoder_init(flac_decoder* decoder);
+void 		flac_decoder_free(flac_decoder* decoder);
+int 		flac_decoder_reset(flac_decoder* decoder, uint32_t sample_rate, uint8_t num_channels, uint32_t block_size, const void *buffer, uint32_t length);
+int 		flac_decoder_decode_interleaved(flac_decoder* decoder, int16_t *samples, uint32_t num_samples, int swap_endian);
+uint32_t 	flac_decoder_finish(flac_decoder* decoder);
+
+#endif /* __FLAC_H__ */
diff --git a/deps/libchdr/include/libchdr/huffman.h b/deps/libchdr/include/libchdr/huffman.h
new file mode 100644
index 00000000..6c9f5113
--- /dev/null
+++ b/deps/libchdr/include/libchdr/huffman.h
@@ -0,0 +1,90 @@
+/* license:BSD-3-Clause
+ * copyright-holders:Aaron Giles
+ ***************************************************************************
+
+    huffman.h
+
+    Static Huffman compression and decompression helpers.
+
+***************************************************************************/
+
+#pragma once
+
+#ifndef __HUFFMAN_H__
+#define __HUFFMAN_H__
+
+#include <libchdr/bitstream.h>
+
+
+/***************************************************************************
+ *  CONSTANTS
+ ***************************************************************************
+ */
+
+enum huffman_error
+{
+	HUFFERR_NONE = 0,
+	HUFFERR_TOO_MANY_BITS,
+	HUFFERR_INVALID_DATA,
+	HUFFERR_INPUT_BUFFER_TOO_SMALL,
+	HUFFERR_OUTPUT_BUFFER_TOO_SMALL,
+	HUFFERR_INTERNAL_INCONSISTENCY,
+	HUFFERR_TOO_MANY_CONTEXTS
+};
+
+/***************************************************************************
+ *  TYPE DEFINITIONS
+ ***************************************************************************
+ */
+
+typedef uint16_t lookup_value;
+
+/* a node in the huffman tree */
+struct node_t
+{
+	struct node_t*		parent;		/* pointer to parent node */
+	uint32_t			count;		/* number of hits on this node */
+	uint32_t			weight;		/* assigned weight of this node */
+	uint32_t			bits;		/* bits used to encode the node */
+	uint8_t				numbits;	/* number of bits needed for this node */
+};
+
+/* ======================> huffman_context_base */
+
+/* context class for decoding */
+struct huffman_decoder
+{
+	/* internal state */
+	uint32_t			numcodes;             /* number of total codes being processed */
+	uint8_t				maxbits;           /* maximum bits per code */
+	uint8_t 			prevdata;             /* value of the previous data (for delta-RLE encoding) */
+	int             	rleremaining;         /* number of RLE bytes remaining (for delta-RLE encoding) */
+	lookup_value *  	lookup;               /* pointer to the lookup table */
+	struct node_t *     huffnode;             /* array of nodes */
+	uint32_t *      	datahisto;            /* histogram of data values */
+
+	/* array versions of the info we need */
+#if 0
+	node_t*			huffnode_array; /* [_NumCodes]; */
+	lookup_value*	lookup_array; /* [1 << _MaxBits]; */
+#endif
+};
+
+/* ======================> huffman_decoder */
+
+struct huffman_decoder* create_huffman_decoder(int numcodes, int maxbits);
+void delete_huffman_decoder(struct huffman_decoder* decoder);
+
+/* single item operations */
+uint32_t huffman_decode_one(struct huffman_decoder* decoder, struct bitstream* bitbuf);
+
+enum huffman_error huffman_import_tree_rle(struct huffman_decoder* decoder, struct bitstream* bitbuf);
+enum huffman_error huffman_import_tree_huffman(struct huffman_decoder* decoder, struct bitstream* bitbuf);
+
+int huffman_build_tree(struct huffman_decoder* decoder, uint32_t totaldata, uint32_t totalweight);
+enum huffman_error huffman_assign_canonical_codes(struct huffman_decoder* decoder);
+enum huffman_error huffman_compute_tree_from_histo(struct huffman_decoder* decoder);
+
+void huffman_build_lookup_table(struct huffman_decoder* decoder);
+
+#endif
diff --git a/deps/libchdr/pkg-config.pc.in b/deps/libchdr/pkg-config.pc.in
new file mode 100644
index 00000000..0289632e
--- /dev/null
+++ b/deps/libchdr/pkg-config.pc.in
@@ -0,0 +1,10 @@
+prefix=@CMAKE_INSTALL_PREFIX@
+libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@
+includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@/libchdr
+
+Name: libchdr
+Description: Standalone library for reading MAME's CHDv1-v5 formats
+Version: @CHDR_VERSION_MAJOR@.@CHDR_VERSION_MINOR@
+Libs: -L${libdir} -lchdr @LIBS@
+Cflags: -I${includedir}
+
diff --git a/deps/libchdr/src/libchdr_bitstream.c b/deps/libchdr/src/libchdr_bitstream.c
new file mode 100644
index 00000000..c82a67dd
--- /dev/null
+++ b/deps/libchdr/src/libchdr_bitstream.c
@@ -0,0 +1,125 @@
+/* license:BSD-3-Clause
+ * copyright-holders:Aaron Giles
+***************************************************************************
+
+    bitstream.c
+
+    Helper classes for reading/writing at the bit level.
+
+***************************************************************************/
+
+#include <stdlib.h>
+#include <libchdr/bitstream.h>
+
+/***************************************************************************
+ *  INLINE FUNCTIONS
+ ***************************************************************************
+ */
+
+int bitstream_overflow(struct bitstream* bitstream) { return ((bitstream->doffset - bitstream->bits / 8) > bitstream->dlength); }
+
+/*-------------------------------------------------
+ *  create_bitstream - constructor
+ *-------------------------------------------------
+ */
+
+struct bitstream* create_bitstream(const void *src, uint32_t srclength)
+{
+	struct bitstream* bitstream = (struct bitstream*)malloc(sizeof(struct bitstream));
+	bitstream->buffer = 0;
+	bitstream->bits = 0;
+	bitstream->read = (const uint8_t*)src;
+	bitstream->doffset = 0;
+	bitstream->dlength = srclength;
+	return bitstream;
+}
+
+
+/*-----------------------------------------------------
+ *  bitstream_peek - fetch the requested number of bits
+ *  but don't advance the input pointer
+ *-----------------------------------------------------
+ */
+
+uint32_t bitstream_peek(struct bitstream* bitstream, int numbits)
+{
+	if (numbits == 0)
+		return 0;
+
+	/* fetch data if we need more */
+	if (numbits > bitstream->bits)
+	{
+		while (bitstream->bits <= 24)
+		{
+			if (bitstream->doffset < bitstream->dlength)
+				bitstream->buffer |= bitstream->read[bitstream->doffset] << (24 - bitstream->bits);
+			bitstream->doffset++;
+			bitstream->bits += 8;
+		}
+	}
+
+	/* return the data */
+	return bitstream->buffer >> (32 - numbits);
+}
+
+
+/*-----------------------------------------------------
+ *  bitstream_remove - advance the input pointer by the
+ *  specified number of bits
+ *-----------------------------------------------------
+ */
+
+void bitstream_remove(struct bitstream* bitstream, int numbits)
+{
+	bitstream->buffer <<= numbits;
+	bitstream->bits -= numbits;
+}
+
+
+/*-----------------------------------------------------
+ *  bitstream_read - fetch the requested number of bits
+ *-----------------------------------------------------
+ */
+
+uint32_t bitstream_read(struct bitstream* bitstream, int numbits)
+{
+	uint32_t result = bitstream_peek(bitstream, numbits);
+	bitstream_remove(bitstream, numbits);
+	return result;
+}
+
+
+/*-------------------------------------------------
+ *  read_offset - return the current read offset
+ *-------------------------------------------------
+ */
+
+uint32_t bitstream_read_offset(struct bitstream* bitstream)
+{
+	uint32_t result = bitstream->doffset;
+	int bits = bitstream->bits;
+	while (bits >= 8)
+	{
+		result--;
+		bits -= 8;
+	}
+	return result;
+}
+
+
+/*-------------------------------------------------
+ *  flush - flush to the nearest byte
+ *-------------------------------------------------
+ */
+
+uint32_t bitstream_flush(struct bitstream* bitstream)
+{
+	while (bitstream->bits >= 8)
+	{
+		bitstream->doffset--;
+		bitstream->bits -= 8;
+	}
+	bitstream->bits = bitstream->buffer = 0;
+	return bitstream->doffset;
+}
+
diff --git a/deps/libchdr/src/libchdr_cdrom.c b/deps/libchdr/src/libchdr_cdrom.c
new file mode 100644
index 00000000..58be0155
--- /dev/null
+++ b/deps/libchdr/src/libchdr_cdrom.c
@@ -0,0 +1,415 @@
+/* license:BSD-3-Clause
+ * copyright-holders:Aaron Giles
+***************************************************************************
+
+    cdrom.c
+
+    Generic MAME CD-ROM utilties - build IDE and SCSI CD-ROMs on top of this
+
+****************************************************************************
+
+    IMPORTANT:
+    "physical" block addresses are the actual addresses on the emulated CD.
+    "chd" block addresses are the block addresses in the CHD file.
+    Because we pad each track to a 4-frame boundary, these addressing
+    schemes will differ after track 1!
+
+***************************************************************************/
+#include <assert.h>
+#include <string.h>
+
+#include <libchdr/cdrom.h>
+
+#ifdef WANT_RAW_DATA_SECTOR
+
+/***************************************************************************
+    DEBUGGING
+***************************************************************************/
+
+/** @brief  The verbose. */
+#define VERBOSE (0)
+#if VERBOSE
+
+/**
+ * @def LOG(x) do
+ *
+ * @brief   A macro that defines log.
+ *
+ * @param   x   The void to process.
+ */
+
+#define LOG(x) do { if (VERBOSE) logerror x; } while (0)
+
+/**
+ * @fn  void CLIB_DECL logerror(const char *text, ...) ATTR_PRINTF(1,2);
+ *
+ * @brief   Logerrors the given text.
+ *
+ * @param   text    The text.
+ *
+ * @return  A CLIB_DECL.
+ */
+
+void CLIB_DECL logerror(const char *text, ...) ATTR_PRINTF(1,2);
+#else
+
+/**
+ * @def LOG(x);
+ *
+ * @brief   A macro that defines log.
+ *
+ * @param   x   The void to process.
+ */
+
+#define LOG(x)
+#endif
+
+/***************************************************************************
+    CONSTANTS
+***************************************************************************/
+
+/** @brief  offset within sector. */
+#define SYNC_OFFSET 0x000
+/** @brief  12 bytes. */
+#define SYNC_NUM_BYTES 12
+
+/** @brief  offset within sector. */
+#define MODE_OFFSET 0x00f
+
+/** @brief  offset within sector. */
+#define ECC_P_OFFSET 0x81c
+/** @brief  2 lots of 86. */
+#define ECC_P_NUM_BYTES 86
+/** @brief  24 bytes each. */
+#define ECC_P_COMP 24
+
+/** @brief  The ECC q offset. */
+#define ECC_Q_OFFSET (ECC_P_OFFSET + 2 * ECC_P_NUM_BYTES)
+/** @brief  2 lots of 52. */
+#define ECC_Q_NUM_BYTES 52
+/** @brief  43 bytes each. */
+#define ECC_Q_COMP 43
+
+/**
+ * @brief   -------------------------------------------------
+ *            ECC lookup tables pre-calculated tables for ECC data calcs
+ *          -------------------------------------------------.
+ */
+
+static const uint8_t ecclow[256] =
+{
+	0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e,
+	0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e,
+	0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e,
+	0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e,
+	0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e,
+	0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe,
+	0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde,
+	0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe,
+	0x1d, 0x1f, 0x19, 0x1b, 0x15, 0x17, 0x11, 0x13, 0x0d, 0x0f, 0x09, 0x0b, 0x05, 0x07, 0x01, 0x03,
+	0x3d, 0x3f, 0x39, 0x3b, 0x35, 0x37, 0x31, 0x33, 0x2d, 0x2f, 0x29, 0x2b, 0x25, 0x27, 0x21, 0x23,
+	0x5d, 0x5f, 0x59, 0x5b, 0x55, 0x57, 0x51, 0x53, 0x4d, 0x4f, 0x49, 0x4b, 0x45, 0x47, 0x41, 0x43,
+	0x7d, 0x7f, 0x79, 0x7b, 0x75, 0x77, 0x71, 0x73, 0x6d, 0x6f, 0x69, 0x6b, 0x65, 0x67, 0x61, 0x63,
+	0x9d, 0x9f, 0x99, 0x9b, 0x95, 0x97, 0x91, 0x93, 0x8d, 0x8f, 0x89, 0x8b, 0x85, 0x87, 0x81, 0x83,
+	0xbd, 0xbf, 0xb9, 0xbb, 0xb5, 0xb7, 0xb1, 0xb3, 0xad, 0xaf, 0xa9, 0xab, 0xa5, 0xa7, 0xa1, 0xa3,
+	0xdd, 0xdf, 0xd9, 0xdb, 0xd5, 0xd7, 0xd1, 0xd3, 0xcd, 0xcf, 0xc9, 0xcb, 0xc5, 0xc7, 0xc1, 0xc3,
+	0xfd, 0xff, 0xf9, 0xfb, 0xf5, 0xf7, 0xf1, 0xf3, 0xed, 0xef, 0xe9, 0xeb, 0xe5, 0xe7, 0xe1, 0xe3
+};
+
+/** @brief  The ecchigh[ 256]. */
+static const uint8_t ecchigh[256] =
+{
+	0x00, 0xf4, 0xf5, 0x01, 0xf7, 0x03, 0x02, 0xf6, 0xf3, 0x07, 0x06, 0xf2, 0x04, 0xf0, 0xf1, 0x05,
+	0xfb, 0x0f, 0x0e, 0xfa, 0x0c, 0xf8, 0xf9, 0x0d, 0x08, 0xfc, 0xfd, 0x09, 0xff, 0x0b, 0x0a, 0xfe,
+	0xeb, 0x1f, 0x1e, 0xea, 0x1c, 0xe8, 0xe9, 0x1d, 0x18, 0xec, 0xed, 0x19, 0xef, 0x1b, 0x1a, 0xee,
+	0x10, 0xe4, 0xe5, 0x11, 0xe7, 0x13, 0x12, 0xe6, 0xe3, 0x17, 0x16, 0xe2, 0x14, 0xe0, 0xe1, 0x15,
+	0xcb, 0x3f, 0x3e, 0xca, 0x3c, 0xc8, 0xc9, 0x3d, 0x38, 0xcc, 0xcd, 0x39, 0xcf, 0x3b, 0x3a, 0xce,
+	0x30, 0xc4, 0xc5, 0x31, 0xc7, 0x33, 0x32, 0xc6, 0xc3, 0x37, 0x36, 0xc2, 0x34, 0xc0, 0xc1, 0x35,
+	0x20, 0xd4, 0xd5, 0x21, 0xd7, 0x23, 0x22, 0xd6, 0xd3, 0x27, 0x26, 0xd2, 0x24, 0xd0, 0xd1, 0x25,
+	0xdb, 0x2f, 0x2e, 0xda, 0x2c, 0xd8, 0xd9, 0x2d, 0x28, 0xdc, 0xdd, 0x29, 0xdf, 0x2b, 0x2a, 0xde,
+	0x8b, 0x7f, 0x7e, 0x8a, 0x7c, 0x88, 0x89, 0x7d, 0x78, 0x8c, 0x8d, 0x79, 0x8f, 0x7b, 0x7a, 0x8e,
+	0x70, 0x84, 0x85, 0x71, 0x87, 0x73, 0x72, 0x86, 0x83, 0x77, 0x76, 0x82, 0x74, 0x80, 0x81, 0x75,
+	0x60, 0x94, 0x95, 0x61, 0x97, 0x63, 0x62, 0x96, 0x93, 0x67, 0x66, 0x92, 0x64, 0x90, 0x91, 0x65,
+	0x9b, 0x6f, 0x6e, 0x9a, 0x6c, 0x98, 0x99, 0x6d, 0x68, 0x9c, 0x9d, 0x69, 0x9f, 0x6b, 0x6a, 0x9e,
+	0x40, 0xb4, 0xb5, 0x41, 0xb7, 0x43, 0x42, 0xb6, 0xb3, 0x47, 0x46, 0xb2, 0x44, 0xb0, 0xb1, 0x45,
+	0xbb, 0x4f, 0x4e, 0xba, 0x4c, 0xb8, 0xb9, 0x4d, 0x48, 0xbc, 0xbd, 0x49, 0xbf, 0x4b, 0x4a, 0xbe,
+	0xab, 0x5f, 0x5e, 0xaa, 0x5c, 0xa8, 0xa9, 0x5d, 0x58, 0xac, 0xad, 0x59, 0xaf, 0x5b, 0x5a, 0xae,
+	0x50, 0xa4, 0xa5, 0x51, 0xa7, 0x53, 0x52, 0xa6, 0xa3, 0x57, 0x56, 0xa2, 0x54, 0xa0, 0xa1, 0x55
+};
+
+/**
+ * @brief   -------------------------------------------------
+ *            poffsets - each row represents the addresses used to calculate a byte of the ECC P
+ *            data 86 (*2) ECC P bytes, 24 values represented by each
+ *          -------------------------------------------------.
+ */
+
+static const uint16_t poffsets[ECC_P_NUM_BYTES][ECC_P_COMP] =
+{
+	{ 0x000,0x056,0x0ac,0x102,0x158,0x1ae,0x204,0x25a,0x2b0,0x306,0x35c,0x3b2,0x408,0x45e,0x4b4,0x50a,0x560,0x5b6,0x60c,0x662,0x6b8,0x70e,0x764,0x7ba },
+	{ 0x001,0x057,0x0ad,0x103,0x159,0x1af,0x205,0x25b,0x2b1,0x307,0x35d,0x3b3,0x409,0x45f,0x4b5,0x50b,0x561,0x5b7,0x60d,0x663,0x6b9,0x70f,0x765,0x7bb },
+	{ 0x002,0x058,0x0ae,0x104,0x15a,0x1b0,0x206,0x25c,0x2b2,0x308,0x35e,0x3b4,0x40a,0x460,0x4b6,0x50c,0x562,0x5b8,0x60e,0x664,0x6ba,0x710,0x766,0x7bc },
+	{ 0x003,0x059,0x0af,0x105,0x15b,0x1b1,0x207,0x25d,0x2b3,0x309,0x35f,0x3b5,0x40b,0x461,0x4b7,0x50d,0x563,0x5b9,0x60f,0x665,0x6bb,0x711,0x767,0x7bd },
+	{ 0x004,0x05a,0x0b0,0x106,0x15c,0x1b2,0x208,0x25e,0x2b4,0x30a,0x360,0x3b6,0x40c,0x462,0x4b8,0x50e,0x564,0x5ba,0x610,0x666,0x6bc,0x712,0x768,0x7be },
+	{ 0x005,0x05b,0x0b1,0x107,0x15d,0x1b3,0x209,0x25f,0x2b5,0x30b,0x361,0x3b7,0x40d,0x463,0x4b9,0x50f,0x565,0x5bb,0x611,0x667,0x6bd,0x713,0x769,0x7bf },
+	{ 0x006,0x05c,0x0b2,0x108,0x15e,0x1b4,0x20a,0x260,0x2b6,0x30c,0x362,0x3b8,0x40e,0x464,0x4ba,0x510,0x566,0x5bc,0x612,0x668,0x6be,0x714,0x76a,0x7c0 },
+	{ 0x007,0x05d,0x0b3,0x109,0x15f,0x1b5,0x20b,0x261,0x2b7,0x30d,0x363,0x3b9,0x40f,0x465,0x4bb,0x511,0x567,0x5bd,0x613,0x669,0x6bf,0x715,0x76b,0x7c1 },
+	{ 0x008,0x05e,0x0b4,0x10a,0x160,0x1b6,0x20c,0x262,0x2b8,0x30e,0x364,0x3ba,0x410,0x466,0x4bc,0x512,0x568,0x5be,0x614,0x66a,0x6c0,0x716,0x76c,0x7c2 },
+	{ 0x009,0x05f,0x0b5,0x10b,0x161,0x1b7,0x20d,0x263,0x2b9,0x30f,0x365,0x3bb,0x411,0x467,0x4bd,0x513,0x569,0x5bf,0x615,0x66b,0x6c1,0x717,0x76d,0x7c3 },
+	{ 0x00a,0x060,0x0b6,0x10c,0x162,0x1b8,0x20e,0x264,0x2ba,0x310,0x366,0x3bc,0x412,0x468,0x4be,0x514,0x56a,0x5c0,0x616,0x66c,0x6c2,0x718,0x76e,0x7c4 },
+	{ 0x00b,0x061,0x0b7,0x10d,0x163,0x1b9,0x20f,0x265,0x2bb,0x311,0x367,0x3bd,0x413,0x469,0x4bf,0x515,0x56b,0x5c1,0x617,0x66d,0x6c3,0x719,0x76f,0x7c5 },
+	{ 0x00c,0x062,0x0b8,0x10e,0x164,0x1ba,0x210,0x266,0x2bc,0x312,0x368,0x3be,0x414,0x46a,0x4c0,0x516,0x56c,0x5c2,0x618,0x66e,0x6c4,0x71a,0x770,0x7c6 },
+	{ 0x00d,0x063,0x0b9,0x10f,0x165,0x1bb,0x211,0x267,0x2bd,0x313,0x369,0x3bf,0x415,0x46b,0x4c1,0x517,0x56d,0x5c3,0x619,0x66f,0x6c5,0x71b,0x771,0x7c7 },
+	{ 0x00e,0x064,0x0ba,0x110,0x166,0x1bc,0x212,0x268,0x2be,0x314,0x36a,0x3c0,0x416,0x46c,0x4c2,0x518,0x56e,0x5c4,0x61a,0x670,0x6c6,0x71c,0x772,0x7c8 },
+	{ 0x00f,0x065,0x0bb,0x111,0x167,0x1bd,0x213,0x269,0x2bf,0x315,0x36b,0x3c1,0x417,0x46d,0x4c3,0x519,0x56f,0x5c5,0x61b,0x671,0x6c7,0x71d,0x773,0x7c9 },
+	{ 0x010,0x066,0x0bc,0x112,0x168,0x1be,0x214,0x26a,0x2c0,0x316,0x36c,0x3c2,0x418,0x46e,0x4c4,0x51a,0x570,0x5c6,0x61c,0x672,0x6c8,0x71e,0x774,0x7ca },
+	{ 0x011,0x067,0x0bd,0x113,0x169,0x1bf,0x215,0x26b,0x2c1,0x317,0x36d,0x3c3,0x419,0x46f,0x4c5,0x51b,0x571,0x5c7,0x61d,0x673,0x6c9,0x71f,0x775,0x7cb },
+	{ 0x012,0x068,0x0be,0x114,0x16a,0x1c0,0x216,0x26c,0x2c2,0x318,0x36e,0x3c4,0x41a,0x470,0x4c6,0x51c,0x572,0x5c8,0x61e,0x674,0x6ca,0x720,0x776,0x7cc },
+	{ 0x013,0x069,0x0bf,0x115,0x16b,0x1c1,0x217,0x26d,0x2c3,0x319,0x36f,0x3c5,0x41b,0x471,0x4c7,0x51d,0x573,0x5c9,0x61f,0x675,0x6cb,0x721,0x777,0x7cd },
+	{ 0x014,0x06a,0x0c0,0x116,0x16c,0x1c2,0x218,0x26e,0x2c4,0x31a,0x370,0x3c6,0x41c,0x472,0x4c8,0x51e,0x574,0x5ca,0x620,0x676,0x6cc,0x722,0x778,0x7ce },
+	{ 0x015,0x06b,0x0c1,0x117,0x16d,0x1c3,0x219,0x26f,0x2c5,0x31b,0x371,0x3c7,0x41d,0x473,0x4c9,0x51f,0x575,0x5cb,0x621,0x677,0x6cd,0x723,0x779,0x7cf },
+	{ 0x016,0x06c,0x0c2,0x118,0x16e,0x1c4,0x21a,0x270,0x2c6,0x31c,0x372,0x3c8,0x41e,0x474,0x4ca,0x520,0x576,0x5cc,0x622,0x678,0x6ce,0x724,0x77a,0x7d0 },
+	{ 0x017,0x06d,0x0c3,0x119,0x16f,0x1c5,0x21b,0x271,0x2c7,0x31d,0x373,0x3c9,0x41f,0x475,0x4cb,0x521,0x577,0x5cd,0x623,0x679,0x6cf,0x725,0x77b,0x7d1 },
+	{ 0x018,0x06e,0x0c4,0x11a,0x170,0x1c6,0x21c,0x272,0x2c8,0x31e,0x374,0x3ca,0x420,0x476,0x4cc,0x522,0x578,0x5ce,0x624,0x67a,0x6d0,0x726,0x77c,0x7d2 },
+	{ 0x019,0x06f,0x0c5,0x11b,0x171,0x1c7,0x21d,0x273,0x2c9,0x31f,0x375,0x3cb,0x421,0x477,0x4cd,0x523,0x579,0x5cf,0x625,0x67b,0x6d1,0x727,0x77d,0x7d3 },
+	{ 0x01a,0x070,0x0c6,0x11c,0x172,0x1c8,0x21e,0x274,0x2ca,0x320,0x376,0x3cc,0x422,0x478,0x4ce,0x524,0x57a,0x5d0,0x626,0x67c,0x6d2,0x728,0x77e,0x7d4 },
+	{ 0x01b,0x071,0x0c7,0x11d,0x173,0x1c9,0x21f,0x275,0x2cb,0x321,0x377,0x3cd,0x423,0x479,0x4cf,0x525,0x57b,0x5d1,0x627,0x67d,0x6d3,0x729,0x77f,0x7d5 },
+	{ 0x01c,0x072,0x0c8,0x11e,0x174,0x1ca,0x220,0x276,0x2cc,0x322,0x378,0x3ce,0x424,0x47a,0x4d0,0x526,0x57c,0x5d2,0x628,0x67e,0x6d4,0x72a,0x780,0x7d6 },
+	{ 0x01d,0x073,0x0c9,0x11f,0x175,0x1cb,0x221,0x277,0x2cd,0x323,0x379,0x3cf,0x425,0x47b,0x4d1,0x527,0x57d,0x5d3,0x629,0x67f,0x6d5,0x72b,0x781,0x7d7 },
+	{ 0x01e,0x074,0x0ca,0x120,0x176,0x1cc,0x222,0x278,0x2ce,0x324,0x37a,0x3d0,0x426,0x47c,0x4d2,0x528,0x57e,0x5d4,0x62a,0x680,0x6d6,0x72c,0x782,0x7d8 },
+	{ 0x01f,0x075,0x0cb,0x121,0x177,0x1cd,0x223,0x279,0x2cf,0x325,0x37b,0x3d1,0x427,0x47d,0x4d3,0x529,0x57f,0x5d5,0x62b,0x681,0x6d7,0x72d,0x783,0x7d9 },
+	{ 0x020,0x076,0x0cc,0x122,0x178,0x1ce,0x224,0x27a,0x2d0,0x326,0x37c,0x3d2,0x428,0x47e,0x4d4,0x52a,0x580,0x5d6,0x62c,0x682,0x6d8,0x72e,0x784,0x7da },
+	{ 0x021,0x077,0x0cd,0x123,0x179,0x1cf,0x225,0x27b,0x2d1,0x327,0x37d,0x3d3,0x429,0x47f,0x4d5,0x52b,0x581,0x5d7,0x62d,0x683,0x6d9,0x72f,0x785,0x7db },
+	{ 0x022,0x078,0x0ce,0x124,0x17a,0x1d0,0x226,0x27c,0x2d2,0x328,0x37e,0x3d4,0x42a,0x480,0x4d6,0x52c,0x582,0x5d8,0x62e,0x684,0x6da,0x730,0x786,0x7dc },
+	{ 0x023,0x079,0x0cf,0x125,0x17b,0x1d1,0x227,0x27d,0x2d3,0x329,0x37f,0x3d5,0x42b,0x481,0x4d7,0x52d,0x583,0x5d9,0x62f,0x685,0x6db,0x731,0x787,0x7dd },
+	{ 0x024,0x07a,0x0d0,0x126,0x17c,0x1d2,0x228,0x27e,0x2d4,0x32a,0x380,0x3d6,0x42c,0x482,0x4d8,0x52e,0x584,0x5da,0x630,0x686,0x6dc,0x732,0x788,0x7de },
+	{ 0x025,0x07b,0x0d1,0x127,0x17d,0x1d3,0x229,0x27f,0x2d5,0x32b,0x381,0x3d7,0x42d,0x483,0x4d9,0x52f,0x585,0x5db,0x631,0x687,0x6dd,0x733,0x789,0x7df },
+	{ 0x026,0x07c,0x0d2,0x128,0x17e,0x1d4,0x22a,0x280,0x2d6,0x32c,0x382,0x3d8,0x42e,0x484,0x4da,0x530,0x586,0x5dc,0x632,0x688,0x6de,0x734,0x78a,0x7e0 },
+	{ 0x027,0x07d,0x0d3,0x129,0x17f,0x1d5,0x22b,0x281,0x2d7,0x32d,0x383,0x3d9,0x42f,0x485,0x4db,0x531,0x587,0x5dd,0x633,0x689,0x6df,0x735,0x78b,0x7e1 },
+	{ 0x028,0x07e,0x0d4,0x12a,0x180,0x1d6,0x22c,0x282,0x2d8,0x32e,0x384,0x3da,0x430,0x486,0x4dc,0x532,0x588,0x5de,0x634,0x68a,0x6e0,0x736,0x78c,0x7e2 },
+	{ 0x029,0x07f,0x0d5,0x12b,0x181,0x1d7,0x22d,0x283,0x2d9,0x32f,0x385,0x3db,0x431,0x487,0x4dd,0x533,0x589,0x5df,0x635,0x68b,0x6e1,0x737,0x78d,0x7e3 },
+	{ 0x02a,0x080,0x0d6,0x12c,0x182,0x1d8,0x22e,0x284,0x2da,0x330,0x386,0x3dc,0x432,0x488,0x4de,0x534,0x58a,0x5e0,0x636,0x68c,0x6e2,0x738,0x78e,0x7e4 },
+	{ 0x02b,0x081,0x0d7,0x12d,0x183,0x1d9,0x22f,0x285,0x2db,0x331,0x387,0x3dd,0x433,0x489,0x4df,0x535,0x58b,0x5e1,0x637,0x68d,0x6e3,0x739,0x78f,0x7e5 },
+	{ 0x02c,0x082,0x0d8,0x12e,0x184,0x1da,0x230,0x286,0x2dc,0x332,0x388,0x3de,0x434,0x48a,0x4e0,0x536,0x58c,0x5e2,0x638,0x68e,0x6e4,0x73a,0x790,0x7e6 },
+	{ 0x02d,0x083,0x0d9,0x12f,0x185,0x1db,0x231,0x287,0x2dd,0x333,0x389,0x3df,0x435,0x48b,0x4e1,0x537,0x58d,0x5e3,0x639,0x68f,0x6e5,0x73b,0x791,0x7e7 },
+	{ 0x02e,0x084,0x0da,0x130,0x186,0x1dc,0x232,0x288,0x2de,0x334,0x38a,0x3e0,0x436,0x48c,0x4e2,0x538,0x58e,0x5e4,0x63a,0x690,0x6e6,0x73c,0x792,0x7e8 },
+	{ 0x02f,0x085,0x0db,0x131,0x187,0x1dd,0x233,0x289,0x2df,0x335,0x38b,0x3e1,0x437,0x48d,0x4e3,0x539,0x58f,0x5e5,0x63b,0x691,0x6e7,0x73d,0x793,0x7e9 },
+	{ 0x030,0x086,0x0dc,0x132,0x188,0x1de,0x234,0x28a,0x2e0,0x336,0x38c,0x3e2,0x438,0x48e,0x4e4,0x53a,0x590,0x5e6,0x63c,0x692,0x6e8,0x73e,0x794,0x7ea },
+	{ 0x031,0x087,0x0dd,0x133,0x189,0x1df,0x235,0x28b,0x2e1,0x337,0x38d,0x3e3,0x439,0x48f,0x4e5,0x53b,0x591,0x5e7,0x63d,0x693,0x6e9,0x73f,0x795,0x7eb },
+	{ 0x032,0x088,0x0de,0x134,0x18a,0x1e0,0x236,0x28c,0x2e2,0x338,0x38e,0x3e4,0x43a,0x490,0x4e6,0x53c,0x592,0x5e8,0x63e,0x694,0x6ea,0x740,0x796,0x7ec },
+	{ 0x033,0x089,0x0df,0x135,0x18b,0x1e1,0x237,0x28d,0x2e3,0x339,0x38f,0x3e5,0x43b,0x491,0x4e7,0x53d,0x593,0x5e9,0x63f,0x695,0x6eb,0x741,0x797,0x7ed },
+	{ 0x034,0x08a,0x0e0,0x136,0x18c,0x1e2,0x238,0x28e,0x2e4,0x33a,0x390,0x3e6,0x43c,0x492,0x4e8,0x53e,0x594,0x5ea,0x640,0x696,0x6ec,0x742,0x798,0x7ee },
+	{ 0x035,0x08b,0x0e1,0x137,0x18d,0x1e3,0x239,0x28f,0x2e5,0x33b,0x391,0x3e7,0x43d,0x493,0x4e9,0x53f,0x595,0x5eb,0x641,0x697,0x6ed,0x743,0x799,0x7ef },
+	{ 0x036,0x08c,0x0e2,0x138,0x18e,0x1e4,0x23a,0x290,0x2e6,0x33c,0x392,0x3e8,0x43e,0x494,0x4ea,0x540,0x596,0x5ec,0x642,0x698,0x6ee,0x744,0x79a,0x7f0 },
+	{ 0x037,0x08d,0x0e3,0x139,0x18f,0x1e5,0x23b,0x291,0x2e7,0x33d,0x393,0x3e9,0x43f,0x495,0x4eb,0x541,0x597,0x5ed,0x643,0x699,0x6ef,0x745,0x79b,0x7f1 },
+	{ 0x038,0x08e,0x0e4,0x13a,0x190,0x1e6,0x23c,0x292,0x2e8,0x33e,0x394,0x3ea,0x440,0x496,0x4ec,0x542,0x598,0x5ee,0x644,0x69a,0x6f0,0x746,0x79c,0x7f2 },
+	{ 0x039,0x08f,0x0e5,0x13b,0x191,0x1e7,0x23d,0x293,0x2e9,0x33f,0x395,0x3eb,0x441,0x497,0x4ed,0x543,0x599,0x5ef,0x645,0x69b,0x6f1,0x747,0x79d,0x7f3 },
+	{ 0x03a,0x090,0x0e6,0x13c,0x192,0x1e8,0x23e,0x294,0x2ea,0x340,0x396,0x3ec,0x442,0x498,0x4ee,0x544,0x59a,0x5f0,0x646,0x69c,0x6f2,0x748,0x79e,0x7f4 },
+	{ 0x03b,0x091,0x0e7,0x13d,0x193,0x1e9,0x23f,0x295,0x2eb,0x341,0x397,0x3ed,0x443,0x499,0x4ef,0x545,0x59b,0x5f1,0x647,0x69d,0x6f3,0x749,0x79f,0x7f5 },
+	{ 0x03c,0x092,0x0e8,0x13e,0x194,0x1ea,0x240,0x296,0x2ec,0x342,0x398,0x3ee,0x444,0x49a,0x4f0,0x546,0x59c,0x5f2,0x648,0x69e,0x6f4,0x74a,0x7a0,0x7f6 },
+	{ 0x03d,0x093,0x0e9,0x13f,0x195,0x1eb,0x241,0x297,0x2ed,0x343,0x399,0x3ef,0x445,0x49b,0x4f1,0x547,0x59d,0x5f3,0x649,0x69f,0x6f5,0x74b,0x7a1,0x7f7 },
+	{ 0x03e,0x094,0x0ea,0x140,0x196,0x1ec,0x242,0x298,0x2ee,0x344,0x39a,0x3f0,0x446,0x49c,0x4f2,0x548,0x59e,0x5f4,0x64a,0x6a0,0x6f6,0x74c,0x7a2,0x7f8 },
+	{ 0x03f,0x095,0x0eb,0x141,0x197,0x1ed,0x243,0x299,0x2ef,0x345,0x39b,0x3f1,0x447,0x49d,0x4f3,0x549,0x59f,0x5f5,0x64b,0x6a1,0x6f7,0x74d,0x7a3,0x7f9 },
+	{ 0x040,0x096,0x0ec,0x142,0x198,0x1ee,0x244,0x29a,0x2f0,0x346,0x39c,0x3f2,0x448,0x49e,0x4f4,0x54a,0x5a0,0x5f6,0x64c,0x6a2,0x6f8,0x74e,0x7a4,0x7fa },
+	{ 0x041,0x097,0x0ed,0x143,0x199,0x1ef,0x245,0x29b,0x2f1,0x347,0x39d,0x3f3,0x449,0x49f,0x4f5,0x54b,0x5a1,0x5f7,0x64d,0x6a3,0x6f9,0x74f,0x7a5,0x7fb },
+	{ 0x042,0x098,0x0ee,0x144,0x19a,0x1f0,0x246,0x29c,0x2f2,0x348,0x39e,0x3f4,0x44a,0x4a0,0x4f6,0x54c,0x5a2,0x5f8,0x64e,0x6a4,0x6fa,0x750,0x7a6,0x7fc },
+	{ 0x043,0x099,0x0ef,0x145,0x19b,0x1f1,0x247,0x29d,0x2f3,0x349,0x39f,0x3f5,0x44b,0x4a1,0x4f7,0x54d,0x5a3,0x5f9,0x64f,0x6a5,0x6fb,0x751,0x7a7,0x7fd },
+	{ 0x044,0x09a,0x0f0,0x146,0x19c,0x1f2,0x248,0x29e,0x2f4,0x34a,0x3a0,0x3f6,0x44c,0x4a2,0x4f8,0x54e,0x5a4,0x5fa,0x650,0x6a6,0x6fc,0x752,0x7a8,0x7fe },
+	{ 0x045,0x09b,0x0f1,0x147,0x19d,0x1f3,0x249,0x29f,0x2f5,0x34b,0x3a1,0x3f7,0x44d,0x4a3,0x4f9,0x54f,0x5a5,0x5fb,0x651,0x6a7,0x6fd,0x753,0x7a9,0x7ff },
+	{ 0x046,0x09c,0x0f2,0x148,0x19e,0x1f4,0x24a,0x2a0,0x2f6,0x34c,0x3a2,0x3f8,0x44e,0x4a4,0x4fa,0x550,0x5a6,0x5fc,0x652,0x6a8,0x6fe,0x754,0x7aa,0x800 },
+	{ 0x047,0x09d,0x0f3,0x149,0x19f,0x1f5,0x24b,0x2a1,0x2f7,0x34d,0x3a3,0x3f9,0x44f,0x4a5,0x4fb,0x551,0x5a7,0x5fd,0x653,0x6a9,0x6ff,0x755,0x7ab,0x801 },
+	{ 0x048,0x09e,0x0f4,0x14a,0x1a0,0x1f6,0x24c,0x2a2,0x2f8,0x34e,0x3a4,0x3fa,0x450,0x4a6,0x4fc,0x552,0x5a8,0x5fe,0x654,0x6aa,0x700,0x756,0x7ac,0x802 },
+	{ 0x049,0x09f,0x0f5,0x14b,0x1a1,0x1f7,0x24d,0x2a3,0x2f9,0x34f,0x3a5,0x3fb,0x451,0x4a7,0x4fd,0x553,0x5a9,0x5ff,0x655,0x6ab,0x701,0x757,0x7ad,0x803 },
+	{ 0x04a,0x0a0,0x0f6,0x14c,0x1a2,0x1f8,0x24e,0x2a4,0x2fa,0x350,0x3a6,0x3fc,0x452,0x4a8,0x4fe,0x554,0x5aa,0x600,0x656,0x6ac,0x702,0x758,0x7ae,0x804 },
+	{ 0x04b,0x0a1,0x0f7,0x14d,0x1a3,0x1f9,0x24f,0x2a5,0x2fb,0x351,0x3a7,0x3fd,0x453,0x4a9,0x4ff,0x555,0x5ab,0x601,0x657,0x6ad,0x703,0x759,0x7af,0x805 },
+	{ 0x04c,0x0a2,0x0f8,0x14e,0x1a4,0x1fa,0x250,0x2a6,0x2fc,0x352,0x3a8,0x3fe,0x454,0x4aa,0x500,0x556,0x5ac,0x602,0x658,0x6ae,0x704,0x75a,0x7b0,0x806 },
+	{ 0x04d,0x0a3,0x0f9,0x14f,0x1a5,0x1fb,0x251,0x2a7,0x2fd,0x353,0x3a9,0x3ff,0x455,0x4ab,0x501,0x557,0x5ad,0x603,0x659,0x6af,0x705,0x75b,0x7b1,0x807 },
+	{ 0x04e,0x0a4,0x0fa,0x150,0x1a6,0x1fc,0x252,0x2a8,0x2fe,0x354,0x3aa,0x400,0x456,0x4ac,0x502,0x558,0x5ae,0x604,0x65a,0x6b0,0x706,0x75c,0x7b2,0x808 },
+	{ 0x04f,0x0a5,0x0fb,0x151,0x1a7,0x1fd,0x253,0x2a9,0x2ff,0x355,0x3ab,0x401,0x457,0x4ad,0x503,0x559,0x5af,0x605,0x65b,0x6b1,0x707,0x75d,0x7b3,0x809 },
+	{ 0x050,0x0a6,0x0fc,0x152,0x1a8,0x1fe,0x254,0x2aa,0x300,0x356,0x3ac,0x402,0x458,0x4ae,0x504,0x55a,0x5b0,0x606,0x65c,0x6b2,0x708,0x75e,0x7b4,0x80a },
+	{ 0x051,0x0a7,0x0fd,0x153,0x1a9,0x1ff,0x255,0x2ab,0x301,0x357,0x3ad,0x403,0x459,0x4af,0x505,0x55b,0x5b1,0x607,0x65d,0x6b3,0x709,0x75f,0x7b5,0x80b },
+	{ 0x052,0x0a8,0x0fe,0x154,0x1aa,0x200,0x256,0x2ac,0x302,0x358,0x3ae,0x404,0x45a,0x4b0,0x506,0x55c,0x5b2,0x608,0x65e,0x6b4,0x70a,0x760,0x7b6,0x80c },
+	{ 0x053,0x0a9,0x0ff,0x155,0x1ab,0x201,0x257,0x2ad,0x303,0x359,0x3af,0x405,0x45b,0x4b1,0x507,0x55d,0x5b3,0x609,0x65f,0x6b5,0x70b,0x761,0x7b7,0x80d },
+	{ 0x054,0x0aa,0x100,0x156,0x1ac,0x202,0x258,0x2ae,0x304,0x35a,0x3b0,0x406,0x45c,0x4b2,0x508,0x55e,0x5b4,0x60a,0x660,0x6b6,0x70c,0x762,0x7b8,0x80e },
+	{ 0x055,0x0ab,0x101,0x157,0x1ad,0x203,0x259,0x2af,0x305,0x35b,0x3b1,0x407,0x45d,0x4b3,0x509,0x55f,0x5b5,0x60b,0x661,0x6b7,0x70d,0x763,0x7b9,0x80f }
+};
+
+/**
+ * @brief   -------------------------------------------------
+ *            qoffsets - each row represents the addresses used to calculate a byte of the ECC Q
+ *            data 52 (*2) ECC Q bytes, 43 values represented by each
+ *          -------------------------------------------------.
+ */
+
+static const uint16_t qoffsets[ECC_Q_NUM_BYTES][ECC_Q_COMP] =
+{
+	{ 0x000,0x058,0x0b0,0x108,0x160,0x1b8,0x210,0x268,0x2c0,0x318,0x370,0x3c8,0x420,0x478,0x4d0,0x528,0x580,0x5d8,0x630,0x688,0x6e0,0x738,0x790,0x7e8,0x840,0x898,0x034,0x08c,0x0e4,0x13c,0x194,0x1ec,0x244,0x29c,0x2f4,0x34c,0x3a4,0x3fc,0x454,0x4ac,0x504,0x55c,0x5b4 },
+	{ 0x001,0x059,0x0b1,0x109,0x161,0x1b9,0x211,0x269,0x2c1,0x319,0x371,0x3c9,0x421,0x479,0x4d1,0x529,0x581,0x5d9,0x631,0x689,0x6e1,0x739,0x791,0x7e9,0x841,0x899,0x035,0x08d,0x0e5,0x13d,0x195,0x1ed,0x245,0x29d,0x2f5,0x34d,0x3a5,0x3fd,0x455,0x4ad,0x505,0x55d,0x5b5 },
+	{ 0x056,0x0ae,0x106,0x15e,0x1b6,0x20e,0x266,0x2be,0x316,0x36e,0x3c6,0x41e,0x476,0x4ce,0x526,0x57e,0x5d6,0x62e,0x686,0x6de,0x736,0x78e,0x7e6,0x83e,0x896,0x032,0x08a,0x0e2,0x13a,0x192,0x1ea,0x242,0x29a,0x2f2,0x34a,0x3a2,0x3fa,0x452,0x4aa,0x502,0x55a,0x5b2,0x60a },
+	{ 0x057,0x0af,0x107,0x15f,0x1b7,0x20f,0x267,0x2bf,0x317,0x36f,0x3c7,0x41f,0x477,0x4cf,0x527,0x57f,0x5d7,0x62f,0x687,0x6df,0x737,0x78f,0x7e7,0x83f,0x897,0x033,0x08b,0x0e3,0x13b,0x193,0x1eb,0x243,0x29b,0x2f3,0x34b,0x3a3,0x3fb,0x453,0x4ab,0x503,0x55b,0x5b3,0x60b },
+	{ 0x0ac,0x104,0x15c,0x1b4,0x20c,0x264,0x2bc,0x314,0x36c,0x3c4,0x41c,0x474,0x4cc,0x524,0x57c,0x5d4,0x62c,0x684,0x6dc,0x734,0x78c,0x7e4,0x83c,0x894,0x030,0x088,0x0e0,0x138,0x190,0x1e8,0x240,0x298,0x2f0,0x348,0x3a0,0x3f8,0x450,0x4a8,0x500,0x558,0x5b0,0x608,0x660 },
+	{ 0x0ad,0x105,0x15d,0x1b5,0x20d,0x265,0x2bd,0x315,0x36d,0x3c5,0x41d,0x475,0x4cd,0x525,0x57d,0x5d5,0x62d,0x685,0x6dd,0x735,0x78d,0x7e5,0x83d,0x895,0x031,0x089,0x0e1,0x139,0x191,0x1e9,0x241,0x299,0x2f1,0x349,0x3a1,0x3f9,0x451,0x4a9,0x501,0x559,0x5b1,0x609,0x661 },
+	{ 0x102,0x15a,0x1b2,0x20a,0x262,0x2ba,0x312,0x36a,0x3c2,0x41a,0x472,0x4ca,0x522,0x57a,0x5d2,0x62a,0x682,0x6da,0x732,0x78a,0x7e2,0x83a,0x892,0x02e,0x086,0x0de,0x136,0x18e,0x1e6,0x23e,0x296,0x2ee,0x346,0x39e,0x3f6,0x44e,0x4a6,0x4fe,0x556,0x5ae,0x606,0x65e,0x6b6 },
+	{ 0x103,0x15b,0x1b3,0x20b,0x263,0x2bb,0x313,0x36b,0x3c3,0x41b,0x473,0x4cb,0x523,0x57b,0x5d3,0x62b,0x683,0x6db,0x733,0x78b,0x7e3,0x83b,0x893,0x02f,0x087,0x0df,0x137,0x18f,0x1e7,0x23f,0x297,0x2ef,0x347,0x39f,0x3f7,0x44f,0x4a7,0x4ff,0x557,0x5af,0x607,0x65f,0x6b7 },
+	{ 0x158,0x1b0,0x208,0x260,0x2b8,0x310,0x368,0x3c0,0x418,0x470,0x4c8,0x520,0x578,0x5d0,0x628,0x680,0x6d8,0x730,0x788,0x7e0,0x838,0x890,0x02c,0x084,0x0dc,0x134,0x18c,0x1e4,0x23c,0x294,0x2ec,0x344,0x39c,0x3f4,0x44c,0x4a4,0x4fc,0x554,0x5ac,0x604,0x65c,0x6b4,0x70c },
+	{ 0x159,0x1b1,0x209,0x261,0x2b9,0x311,0x369,0x3c1,0x419,0x471,0x4c9,0x521,0x579,0x5d1,0x629,0x681,0x6d9,0x731,0x789,0x7e1,0x839,0x891,0x02d,0x085,0x0dd,0x135,0x18d,0x1e5,0x23d,0x295,0x2ed,0x345,0x39d,0x3f5,0x44d,0x4a5,0x4fd,0x555,0x5ad,0x605,0x65d,0x6b5,0x70d },
+	{ 0x1ae,0x206,0x25e,0x2b6,0x30e,0x366,0x3be,0x416,0x46e,0x4c6,0x51e,0x576,0x5ce,0x626,0x67e,0x6d6,0x72e,0x786,0x7de,0x836,0x88e,0x02a,0x082,0x0da,0x132,0x18a,0x1e2,0x23a,0x292,0x2ea,0x342,0x39a,0x3f2,0x44a,0x4a2,0x4fa,0x552,0x5aa,0x602,0x65a,0x6b2,0x70a,0x762 },
+	{ 0x1af,0x207,0x25f,0x2b7,0x30f,0x367,0x3bf,0x417,0x46f,0x4c7,0x51f,0x577,0x5cf,0x627,0x67f,0x6d7,0x72f,0x787,0x7df,0x837,0x88f,0x02b,0x083,0x0db,0x133,0x18b,0x1e3,0x23b,0x293,0x2eb,0x343,0x39b,0x3f3,0x44b,0x4a3,0x4fb,0x553,0x5ab,0x603,0x65b,0x6b3,0x70b,0x763 },
+	{ 0x204,0x25c,0x2b4,0x30c,0x364,0x3bc,0x414,0x46c,0x4c4,0x51c,0x574,0x5cc,0x624,0x67c,0x6d4,0x72c,0x784,0x7dc,0x834,0x88c,0x028,0x080,0x0d8,0x130,0x188,0x1e0,0x238,0x290,0x2e8,0x340,0x398,0x3f0,0x448,0x4a0,0x4f8,0x550,0x5a8,0x600,0x658,0x6b0,0x708,0x760,0x7b8 },
+	{ 0x205,0x25d,0x2b5,0x30d,0x365,0x3bd,0x415,0x46d,0x4c5,0x51d,0x575,0x5cd,0x625,0x67d,0x6d5,0x72d,0x785,0x7dd,0x835,0x88d,0x029,0x081,0x0d9,0x131,0x189,0x1e1,0x239,0x291,0x2e9,0x341,0x399,0x3f1,0x449,0x4a1,0x4f9,0x551,0x5a9,0x601,0x659,0x6b1,0x709,0x761,0x7b9 },
+	{ 0x25a,0x2b2,0x30a,0x362,0x3ba,0x412,0x46a,0x4c2,0x51a,0x572,0x5ca,0x622,0x67a,0x6d2,0x72a,0x782,0x7da,0x832,0x88a,0x026,0x07e,0x0d6,0x12e,0x186,0x1de,0x236,0x28e,0x2e6,0x33e,0x396,0x3ee,0x446,0x49e,0x4f6,0x54e,0x5a6,0x5fe,0x656,0x6ae,0x706,0x75e,0x7b6,0x80e },
+	{ 0x25b,0x2b3,0x30b,0x363,0x3bb,0x413,0x46b,0x4c3,0x51b,0x573,0x5cb,0x623,0x67b,0x6d3,0x72b,0x783,0x7db,0x833,0x88b,0x027,0x07f,0x0d7,0x12f,0x187,0x1df,0x237,0x28f,0x2e7,0x33f,0x397,0x3ef,0x447,0x49f,0x4f7,0x54f,0x5a7,0x5ff,0x657,0x6af,0x707,0x75f,0x7b7,0x80f },
+	{ 0x2b0,0x308,0x360,0x3b8,0x410,0x468,0x4c0,0x518,0x570,0x5c8,0x620,0x678,0x6d0,0x728,0x780,0x7d8,0x830,0x888,0x024,0x07c,0x0d4,0x12c,0x184,0x1dc,0x234,0x28c,0x2e4,0x33c,0x394,0x3ec,0x444,0x49c,0x4f4,0x54c,0x5a4,0x5fc,0x654,0x6ac,0x704,0x75c,0x7b4,0x80c,0x864 },
+	{ 0x2b1,0x309,0x361,0x3b9,0x411,0x469,0x4c1,0x519,0x571,0x5c9,0x621,0x679,0x6d1,0x729,0x781,0x7d9,0x831,0x889,0x025,0x07d,0x0d5,0x12d,0x185,0x1dd,0x235,0x28d,0x2e5,0x33d,0x395,0x3ed,0x445,0x49d,0x4f5,0x54d,0x5a5,0x5fd,0x655,0x6ad,0x705,0x75d,0x7b5,0x80d,0x865 },
+	{ 0x306,0x35e,0x3b6,0x40e,0x466,0x4be,0x516,0x56e,0x5c6,0x61e,0x676,0x6ce,0x726,0x77e,0x7d6,0x82e,0x886,0x022,0x07a,0x0d2,0x12a,0x182,0x1da,0x232,0x28a,0x2e2,0x33a,0x392,0x3ea,0x442,0x49a,0x4f2,0x54a,0x5a2,0x5fa,0x652,0x6aa,0x702,0x75a,0x7b2,0x80a,0x862,0x8ba },
+	{ 0x307,0x35f,0x3b7,0x40f,0x467,0x4bf,0x517,0x56f,0x5c7,0x61f,0x677,0x6cf,0x727,0x77f,0x7d7,0x82f,0x887,0x023,0x07b,0x0d3,0x12b,0x183,0x1db,0x233,0x28b,0x2e3,0x33b,0x393,0x3eb,0x443,0x49b,0x4f3,0x54b,0x5a3,0x5fb,0x653,0x6ab,0x703,0x75b,0x7b3,0x80b,0x863,0x8bb },
+	{ 0x35c,0x3b4,0x40c,0x464,0x4bc,0x514,0x56c,0x5c4,0x61c,0x674,0x6cc,0x724,0x77c,0x7d4,0x82c,0x884,0x020,0x078,0x0d0,0x128,0x180,0x1d8,0x230,0x288,0x2e0,0x338,0x390,0x3e8,0x440,0x498,0x4f0,0x548,0x5a0,0x5f8,0x650,0x6a8,0x700,0x758,0x7b0,0x808,0x860,0x8b8,0x054 },
+	{ 0x35d,0x3b5,0x40d,0x465,0x4bd,0x515,0x56d,0x5c5,0x61d,0x675,0x6cd,0x725,0x77d,0x7d5,0x82d,0x885,0x021,0x079,0x0d1,0x129,0x181,0x1d9,0x231,0x289,0x2e1,0x339,0x391,0x3e9,0x441,0x499,0x4f1,0x549,0x5a1,0x5f9,0x651,0x6a9,0x701,0x759,0x7b1,0x809,0x861,0x8b9,0x055 },
+	{ 0x3b2,0x40a,0x462,0x4ba,0x512,0x56a,0x5c2,0x61a,0x672,0x6ca,0x722,0x77a,0x7d2,0x82a,0x882,0x01e,0x076,0x0ce,0x126,0x17e,0x1d6,0x22e,0x286,0x2de,0x336,0x38e,0x3e6,0x43e,0x496,0x4ee,0x546,0x59e,0x5f6,0x64e,0x6a6,0x6fe,0x756,0x7ae,0x806,0x85e,0x8b6,0x052,0x0aa },
+	{ 0x3b3,0x40b,0x463,0x4bb,0x513,0x56b,0x5c3,0x61b,0x673,0x6cb,0x723,0x77b,0x7d3,0x82b,0x883,0x01f,0x077,0x0cf,0x127,0x17f,0x1d7,0x22f,0x287,0x2df,0x337,0x38f,0x3e7,0x43f,0x497,0x4ef,0x547,0x59f,0x5f7,0x64f,0x6a7,0x6ff,0x757,0x7af,0x807,0x85f,0x8b7,0x053,0x0ab },
+	{ 0x408,0x460,0x4b8,0x510,0x568,0x5c0,0x618,0x670,0x6c8,0x720,0x778,0x7d0,0x828,0x880,0x01c,0x074,0x0cc,0x124,0x17c,0x1d4,0x22c,0x284,0x2dc,0x334,0x38c,0x3e4,0x43c,0x494,0x4ec,0x544,0x59c,0x5f4,0x64c,0x6a4,0x6fc,0x754,0x7ac,0x804,0x85c,0x8b4,0x050,0x0a8,0x100 },
+	{ 0x409,0x461,0x4b9,0x511,0x569,0x5c1,0x619,0x671,0x6c9,0x721,0x779,0x7d1,0x829,0x881,0x01d,0x075,0x0cd,0x125,0x17d,0x1d5,0x22d,0x285,0x2dd,0x335,0x38d,0x3e5,0x43d,0x495,0x4ed,0x545,0x59d,0x5f5,0x64d,0x6a5,0x6fd,0x755,0x7ad,0x805,0x85d,0x8b5,0x051,0x0a9,0x101 },
+	{ 0x45e,0x4b6,0x50e,0x566,0x5be,0x616,0x66e,0x6c6,0x71e,0x776,0x7ce,0x826,0x87e,0x01a,0x072,0x0ca,0x122,0x17a,0x1d2,0x22a,0x282,0x2da,0x332,0x38a,0x3e2,0x43a,0x492,0x4ea,0x542,0x59a,0x5f2,0x64a,0x6a2,0x6fa,0x752,0x7aa,0x802,0x85a,0x8b2,0x04e,0x0a6,0x0fe,0x156 },
+	{ 0x45f,0x4b7,0x50f,0x567,0x5bf,0x617,0x66f,0x6c7,0x71f,0x777,0x7cf,0x827,0x87f,0x01b,0x073,0x0cb,0x123,0x17b,0x1d3,0x22b,0x283,0x2db,0x333,0x38b,0x3e3,0x43b,0x493,0x4eb,0x543,0x59b,0x5f3,0x64b,0x6a3,0x6fb,0x753,0x7ab,0x803,0x85b,0x8b3,0x04f,0x0a7,0x0ff,0x157 },
+	{ 0x4b4,0x50c,0x564,0x5bc,0x614,0x66c,0x6c4,0x71c,0x774,0x7cc,0x824,0x87c,0x018,0x070,0x0c8,0x120,0x178,0x1d0,0x228,0x280,0x2d8,0x330,0x388,0x3e0,0x438,0x490,0x4e8,0x540,0x598,0x5f0,0x648,0x6a0,0x6f8,0x750,0x7a8,0x800,0x858,0x8b0,0x04c,0x0a4,0x0fc,0x154,0x1ac },
+	{ 0x4b5,0x50d,0x565,0x5bd,0x615,0x66d,0x6c5,0x71d,0x775,0x7cd,0x825,0x87d,0x019,0x071,0x0c9,0x121,0x179,0x1d1,0x229,0x281,0x2d9,0x331,0x389,0x3e1,0x439,0x491,0x4e9,0x541,0x599,0x5f1,0x649,0x6a1,0x6f9,0x751,0x7a9,0x801,0x859,0x8b1,0x04d,0x0a5,0x0fd,0x155,0x1ad },
+	{ 0x50a,0x562,0x5ba,0x612,0x66a,0x6c2,0x71a,0x772,0x7ca,0x822,0x87a,0x016,0x06e,0x0c6,0x11e,0x176,0x1ce,0x226,0x27e,0x2d6,0x32e,0x386,0x3de,0x436,0x48e,0x4e6,0x53e,0x596,0x5ee,0x646,0x69e,0x6f6,0x74e,0x7a6,0x7fe,0x856,0x8ae,0x04a,0x0a2,0x0fa,0x152,0x1aa,0x202 },
+	{ 0x50b,0x563,0x5bb,0x613,0x66b,0x6c3,0x71b,0x773,0x7cb,0x823,0x87b,0x017,0x06f,0x0c7,0x11f,0x177,0x1cf,0x227,0x27f,0x2d7,0x32f,0x387,0x3df,0x437,0x48f,0x4e7,0x53f,0x597,0x5ef,0x647,0x69f,0x6f7,0x74f,0x7a7,0x7ff,0x857,0x8af,0x04b,0x0a3,0x0fb,0x153,0x1ab,0x203 },
+	{ 0x560,0x5b8,0x610,0x668,0x6c0,0x718,0x770,0x7c8,0x820,0x878,0x014,0x06c,0x0c4,0x11c,0x174,0x1cc,0x224,0x27c,0x2d4,0x32c,0x384,0x3dc,0x434,0x48c,0x4e4,0x53c,0x594,0x5ec,0x644,0x69c,0x6f4,0x74c,0x7a4,0x7fc,0x854,0x8ac,0x048,0x0a0,0x0f8,0x150,0x1a8,0x200,0x258 },
+	{ 0x561,0x5b9,0x611,0x669,0x6c1,0x719,0x771,0x7c9,0x821,0x879,0x015,0x06d,0x0c5,0x11d,0x175,0x1cd,0x225,0x27d,0x2d5,0x32d,0x385,0x3dd,0x435,0x48d,0x4e5,0x53d,0x595,0x5ed,0x645,0x69d,0x6f5,0x74d,0x7a5,0x7fd,0x855,0x8ad,0x049,0x0a1,0x0f9,0x151,0x1a9,0x201,0x259 },
+	{ 0x5b6,0x60e,0x666,0x6be,0x716,0x76e,0x7c6,0x81e,0x876,0x012,0x06a,0x0c2,0x11a,0x172,0x1ca,0x222,0x27a,0x2d2,0x32a,0x382,0x3da,0x432,0x48a,0x4e2,0x53a,0x592,0x5ea,0x642,0x69a,0x6f2,0x74a,0x7a2,0x7fa,0x852,0x8aa,0x046,0x09e,0x0f6,0x14e,0x1a6,0x1fe,0x256,0x2ae },
+	{ 0x5b7,0x60f,0x667,0x6bf,0x717,0x76f,0x7c7,0x81f,0x877,0x013,0x06b,0x0c3,0x11b,0x173,0x1cb,0x223,0x27b,0x2d3,0x32b,0x383,0x3db,0x433,0x48b,0x4e3,0x53b,0x593,0x5eb,0x643,0x69b,0x6f3,0x74b,0x7a3,0x7fb,0x853,0x8ab,0x047,0x09f,0x0f7,0x14f,0x1a7,0x1ff,0x257,0x2af },
+	{ 0x60c,0x664,0x6bc,0x714,0x76c,0x7c4,0x81c,0x874,0x010,0x068,0x0c0,0x118,0x170,0x1c8,0x220,0x278,0x2d0,0x328,0x380,0x3d8,0x430,0x488,0x4e0,0x538,0x590,0x5e8,0x640,0x698,0x6f0,0x748,0x7a0,0x7f8,0x850,0x8a8,0x044,0x09c,0x0f4,0x14c,0x1a4,0x1fc,0x254,0x2ac,0x304 },
+	{ 0x60d,0x665,0x6bd,0x715,0x76d,0x7c5,0x81d,0x875,0x011,0x069,0x0c1,0x119,0x171,0x1c9,0x221,0x279,0x2d1,0x329,0x381,0x3d9,0x431,0x489,0x4e1,0x539,0x591,0x5e9,0x641,0x699,0x6f1,0x749,0x7a1,0x7f9,0x851,0x8a9,0x045,0x09d,0x0f5,0x14d,0x1a5,0x1fd,0x255,0x2ad,0x305 },
+	{ 0x662,0x6ba,0x712,0x76a,0x7c2,0x81a,0x872,0x00e,0x066,0x0be,0x116,0x16e,0x1c6,0x21e,0x276,0x2ce,0x326,0x37e,0x3d6,0x42e,0x486,0x4de,0x536,0x58e,0x5e6,0x63e,0x696,0x6ee,0x746,0x79e,0x7f6,0x84e,0x8a6,0x042,0x09a,0x0f2,0x14a,0x1a2,0x1fa,0x252,0x2aa,0x302,0x35a },
+	{ 0x663,0x6bb,0x713,0x76b,0x7c3,0x81b,0x873,0x00f,0x067,0x0bf,0x117,0x16f,0x1c7,0x21f,0x277,0x2cf,0x327,0x37f,0x3d7,0x42f,0x487,0x4df,0x537,0x58f,0x5e7,0x63f,0x697,0x6ef,0x747,0x79f,0x7f7,0x84f,0x8a7,0x043,0x09b,0x0f3,0x14b,0x1a3,0x1fb,0x253,0x2ab,0x303,0x35b },
+	{ 0x6b8,0x710,0x768,0x7c0,0x818,0x870,0x00c,0x064,0x0bc,0x114,0x16c,0x1c4,0x21c,0x274,0x2cc,0x324,0x37c,0x3d4,0x42c,0x484,0x4dc,0x534,0x58c,0x5e4,0x63c,0x694,0x6ec,0x744,0x79c,0x7f4,0x84c,0x8a4,0x040,0x098,0x0f0,0x148,0x1a0,0x1f8,0x250,0x2a8,0x300,0x358,0x3b0 },
+	{ 0x6b9,0x711,0x769,0x7c1,0x819,0x871,0x00d,0x065,0x0bd,0x115,0x16d,0x1c5,0x21d,0x275,0x2cd,0x325,0x37d,0x3d5,0x42d,0x485,0x4dd,0x535,0x58d,0x5e5,0x63d,0x695,0x6ed,0x745,0x79d,0x7f5,0x84d,0x8a5,0x041,0x099,0x0f1,0x149,0x1a1,0x1f9,0x251,0x2a9,0x301,0x359,0x3b1 },
+	{ 0x70e,0x766,0x7be,0x816,0x86e,0x00a,0x062,0x0ba,0x112,0x16a,0x1c2,0x21a,0x272,0x2ca,0x322,0x37a,0x3d2,0x42a,0x482,0x4da,0x532,0x58a,0x5e2,0x63a,0x692,0x6ea,0x742,0x79a,0x7f2,0x84a,0x8a2,0x03e,0x096,0x0ee,0x146,0x19e,0x1f6,0x24e,0x2a6,0x2fe,0x356,0x3ae,0x406 },
+	{ 0x70f,0x767,0x7bf,0x817,0x86f,0x00b,0x063,0x0bb,0x113,0x16b,0x1c3,0x21b,0x273,0x2cb,0x323,0x37b,0x3d3,0x42b,0x483,0x4db,0x533,0x58b,0x5e3,0x63b,0x693,0x6eb,0x743,0x79b,0x7f3,0x84b,0x8a3,0x03f,0x097,0x0ef,0x147,0x19f,0x1f7,0x24f,0x2a7,0x2ff,0x357,0x3af,0x407 },
+	{ 0x764,0x7bc,0x814,0x86c,0x008,0x060,0x0b8,0x110,0x168,0x1c0,0x218,0x270,0x2c8,0x320,0x378,0x3d0,0x428,0x480,0x4d8,0x530,0x588,0x5e0,0x638,0x690,0x6e8,0x740,0x798,0x7f0,0x848,0x8a0,0x03c,0x094,0x0ec,0x144,0x19c,0x1f4,0x24c,0x2a4,0x2fc,0x354,0x3ac,0x404,0x45c },
+	{ 0x765,0x7bd,0x815,0x86d,0x009,0x061,0x0b9,0x111,0x169,0x1c1,0x219,0x271,0x2c9,0x321,0x379,0x3d1,0x429,0x481,0x4d9,0x531,0x589,0x5e1,0x639,0x691,0x6e9,0x741,0x799,0x7f1,0x849,0x8a1,0x03d,0x095,0x0ed,0x145,0x19d,0x1f5,0x24d,0x2a5,0x2fd,0x355,0x3ad,0x405,0x45d },
+	{ 0x7ba,0x812,0x86a,0x006,0x05e,0x0b6,0x10e,0x166,0x1be,0x216,0x26e,0x2c6,0x31e,0x376,0x3ce,0x426,0x47e,0x4d6,0x52e,0x586,0x5de,0x636,0x68e,0x6e6,0x73e,0x796,0x7ee,0x846,0x89e,0x03a,0x092,0x0ea,0x142,0x19a,0x1f2,0x24a,0x2a2,0x2fa,0x352,0x3aa,0x402,0x45a,0x4b2 },
+	{ 0x7bb,0x813,0x86b,0x007,0x05f,0x0b7,0x10f,0x167,0x1bf,0x217,0x26f,0x2c7,0x31f,0x377,0x3cf,0x427,0x47f,0x4d7,0x52f,0x587,0x5df,0x637,0x68f,0x6e7,0x73f,0x797,0x7ef,0x847,0x89f,0x03b,0x093,0x0eb,0x143,0x19b,0x1f3,0x24b,0x2a3,0x2fb,0x353,0x3ab,0x403,0x45b,0x4b3 },
+	{ 0x810,0x868,0x004,0x05c,0x0b4,0x10c,0x164,0x1bc,0x214,0x26c,0x2c4,0x31c,0x374,0x3cc,0x424,0x47c,0x4d4,0x52c,0x584,0x5dc,0x634,0x68c,0x6e4,0x73c,0x794,0x7ec,0x844,0x89c,0x038,0x090,0x0e8,0x140,0x198,0x1f0,0x248,0x2a0,0x2f8,0x350,0x3a8,0x400,0x458,0x4b0,0x508 },
+	{ 0x811,0x869,0x005,0x05d,0x0b5,0x10d,0x165,0x1bd,0x215,0x26d,0x2c5,0x31d,0x375,0x3cd,0x425,0x47d,0x4d5,0x52d,0x585,0x5dd,0x635,0x68d,0x6e5,0x73d,0x795,0x7ed,0x845,0x89d,0x039,0x091,0x0e9,0x141,0x199,0x1f1,0x249,0x2a1,0x2f9,0x351,0x3a9,0x401,0x459,0x4b1,0x509 },
+	{ 0x866,0x002,0x05a,0x0b2,0x10a,0x162,0x1ba,0x212,0x26a,0x2c2,0x31a,0x372,0x3ca,0x422,0x47a,0x4d2,0x52a,0x582,0x5da,0x632,0x68a,0x6e2,0x73a,0x792,0x7ea,0x842,0x89a,0x036,0x08e,0x0e6,0x13e,0x196,0x1ee,0x246,0x29e,0x2f6,0x34e,0x3a6,0x3fe,0x456,0x4ae,0x506,0x55e },
+	{ 0x867,0x003,0x05b,0x0b3,0x10b,0x163,0x1bb,0x213,0x26b,0x2c3,0x31b,0x373,0x3cb,0x423,0x47b,0x4d3,0x52b,0x583,0x5db,0x633,0x68b,0x6e3,0x73b,0x793,0x7eb,0x843,0x89b,0x037,0x08f,0x0e7,0x13f,0x197,0x1ef,0x247,0x29f,0x2f7,0x34f,0x3a7,0x3ff,0x457,0x4af,0x507,0x55f }
+};
+
+/*-------------------------------------------------
+ *  ecc_source_byte - return data from the sector
+ *  at the given offset, masking anything
+ *  particular to a mode
+ *-------------------------------------------------
+ */
+
+static inline uint8_t ecc_source_byte(const uint8_t *sector, uint32_t offset)
+{
+	/* in mode 2 always treat these as 0 bytes */
+	return (sector[MODE_OFFSET] == 2 && offset < 4) ? 0x00 : sector[SYNC_OFFSET + SYNC_NUM_BYTES + offset];
+}
+
+/**
+ * @fn  void ecc_compute_bytes(const uint8_t *sector, const uint16_t *row, int rowlen, uint8_t &val1, uint8_t &val2)
+ *
+ * @brief   -------------------------------------------------
+ *            ecc_compute_bytes - calculate an ECC value (P or Q)
+ *          -------------------------------------------------.
+ *
+ * @param   sector          The sector.
+ * @param   row             The row.
+ * @param   rowlen          The rowlen.
+ * @param [in,out]  val1    The first value.
+ * @param [in,out]  val2    The second value.
+ */
+
+void ecc_compute_bytes(const uint8_t *sector, const uint16_t *row, int rowlen, uint8_t *val1, uint8_t *val2)
+{
+	int component;
+	*val1 = *val2 = 0;
+	for (component = 0; component < rowlen; component++)
+	{
+		*val1 ^= ecc_source_byte(sector, row[component]);
+		*val2 ^= ecc_source_byte(sector, row[component]);
+		*val1 = ecclow[*val1];
+	}
+	*val1 = ecchigh[ecclow[*val1] ^ *val2];
+	*val2 ^= *val1;
+}
+
+/**
+ * @fn  int ecc_verify(const uint8_t *sector)
+ *
+ * @brief   -------------------------------------------------
+ *            ecc_verify - verify the P and Q ECC codes in a sector
+ *          -------------------------------------------------.
+ *
+ * @param   sector  The sector.
+ *
+ * @return  true if it succeeds, false if it fails.
+ */
+
+int ecc_verify(const uint8_t *sector)
+{
+	int byte;
+	/* first verify P bytes */
+	for (byte = 0; byte < ECC_P_NUM_BYTES; byte++)
+	{
+		uint8_t val1, val2;
+		ecc_compute_bytes(sector, poffsets[byte], ECC_P_COMP, &val1, &val2);
+		if (sector[ECC_P_OFFSET + byte] != val1 || sector[ECC_P_OFFSET + ECC_P_NUM_BYTES + byte] != val2)
+			return 0;
+	}
+
+	/* then verify Q bytes */
+	for (byte = 0; byte < ECC_Q_NUM_BYTES; byte++)
+	{
+		uint8_t val1, val2;
+		ecc_compute_bytes(sector, qoffsets[byte], ECC_Q_COMP, &val1, &val2);
+		if (sector[ECC_Q_OFFSET + byte] != val1 || sector[ECC_Q_OFFSET + ECC_Q_NUM_BYTES + byte] != val2)
+			return 0;
+	}
+	return 1;
+}
+
+/**
+ * @fn  void ecc_generate(uint8_t *sector)
+ *
+ * @brief   -------------------------------------------------
+ *            ecc_generate - generate the P and Q ECC codes for a sector, overwriting any
+ *            existing codes
+ *          -------------------------------------------------.
+ *
+ * @param [in,out]  sector  If non-null, the sector.
+ */
+
+void ecc_generate(uint8_t *sector)
+{
+	int byte;
+	/* first verify P bytes */
+	for (byte = 0; byte < ECC_P_NUM_BYTES; byte++)
+		ecc_compute_bytes(sector, poffsets[byte], ECC_P_COMP, &sector[ECC_P_OFFSET + byte], &sector[ECC_P_OFFSET + ECC_P_NUM_BYTES + byte]);
+
+	/* then verify Q bytes */
+	for (byte = 0; byte < ECC_Q_NUM_BYTES; byte++)
+		ecc_compute_bytes(sector, qoffsets[byte], ECC_Q_COMP, &sector[ECC_Q_OFFSET + byte], &sector[ECC_Q_OFFSET + ECC_Q_NUM_BYTES + byte]);
+}
+
+/**
+ * @fn  void ecc_clear(uint8_t *sector)
+ *
+ * @brief   -------------------------------------------------
+ *            ecc_clear - erase the ECC P and Q cods to 0 within a sector
+ *          -------------------------------------------------.
+ *
+ * @param [in,out]  sector  If non-null, the sector.
+ */
+
+void ecc_clear(uint8_t *sector)
+{
+	memset(&sector[ECC_P_OFFSET], 0, 2 * ECC_P_NUM_BYTES);
+	memset(&sector[ECC_Q_OFFSET], 0, 2 * ECC_Q_NUM_BYTES);
+}
+
+#endif /* WANT_RAW_DATA_SECTOR */
diff --git a/deps/libchdr/src/libchdr_chd.c b/deps/libchdr/src/libchdr_chd.c
new file mode 100644
index 00000000..c5cc1794
--- /dev/null
+++ b/deps/libchdr/src/libchdr_chd.c
@@ -0,0 +1,2737 @@
+/***************************************************************************
+
+    chd.c
+
+    MAME Compressed Hunks of Data file format
+
+****************************************************************************
+
+    Copyright Aaron Giles
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+
+        * Redistributions of source code must retain the above copyright
+          notice, this list of conditions and the following disclaimer.
+        * Redistributions in binary form must reproduce the above copyright
+          notice, this list of conditions and the following disclaimer in
+          the documentation and/or other materials provided with the
+          distribution.
+        * Neither the name 'MAME' nor the names of its contributors may be
+          used to endorse or promote products derived from this software
+          without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY AARON GILES ''AS IS'' AND ANY EXPRESS OR
+    IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+    DISCLAIMED. IN NO EVENT SHALL AARON GILES BE LIABLE FOR ANY DIRECT,
+    INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+    STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+    IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+    POSSIBILITY OF SUCH DAMAGE.
+
+***************************************************************************/
+
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include <libchdr/chd.h>
+#include <libchdr/cdrom.h>
+#include <libchdr/flac.h>
+#include <libchdr/huffman.h>
+
+#include "LzmaEnc.h"
+#include "LzmaDec.h"
+#include "zlib.h"
+
+#undef TRUE
+#undef FALSE
+#define TRUE 1
+#define FALSE 0
+
+#undef MAX
+#undef MIN
+#define MAX(x, y) (((x) > (y)) ? (x) : (y))
+#define MIN(x, y) (((x) < (y)) ? (x) : (y))
+
+#define SHA1_DIGEST_SIZE 20
+
+/***************************************************************************
+    DEBUGGING
+***************************************************************************/
+
+#define PRINTF_MAX_HUNK				(0)
+
+/***************************************************************************
+    CONSTANTS
+***************************************************************************/
+
+#define MAP_STACK_ENTRIES			512			/* max number of entries to use on the stack */
+#define MAP_ENTRY_SIZE				16			/* V3 and later */
+#define OLD_MAP_ENTRY_SIZE			8			/* V1-V2 */
+#define METADATA_HEADER_SIZE		16			/* metadata header size */
+
+#define MAP_ENTRY_FLAG_TYPE_MASK	0x0f		/* what type of hunk */
+#define MAP_ENTRY_FLAG_NO_CRC		0x10		/* no CRC is present */
+
+#define CHD_V1_SECTOR_SIZE			512			/* size of a "sector" in the V1 header */
+
+#define COOKIE_VALUE				0xbaadf00d
+#define MAX_ZLIB_ALLOCS				64
+
+#define END_OF_LIST_COOKIE			"EndOfListCookie"
+
+#define NO_MATCH					(~0)
+
+#ifdef WANT_RAW_DATA_SECTOR
+static const uint8_t s_cd_sync_header[12] = { 0x00,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00 };
+#endif
+
+/* V3-V4 entry types */
+enum
+{
+	V34_MAP_ENTRY_TYPE_INVALID = 0,             /* invalid type */
+	V34_MAP_ENTRY_TYPE_COMPRESSED = 1,          /* standard compression */
+	V34_MAP_ENTRY_TYPE_UNCOMPRESSED = 2,        /* uncompressed data */
+	V34_MAP_ENTRY_TYPE_MINI = 3,                /* mini: use offset as raw data */
+	V34_MAP_ENTRY_TYPE_SELF_HUNK = 4,           /* same as another hunk in this file */
+	V34_MAP_ENTRY_TYPE_PARENT_HUNK = 5,         /* same as a hunk in the parent file */
+	V34_MAP_ENTRY_TYPE_2ND_COMPRESSED = 6       /* compressed with secondary algorithm (usually FLAC CDDA) */
+};
+
+/* V5 compression types */
+enum
+{
+	/* codec #0
+	 * these types are live when running */
+	COMPRESSION_TYPE_0 = 0,
+	/* codec #1 */
+	COMPRESSION_TYPE_1 = 1,
+	/* codec #2 */
+	COMPRESSION_TYPE_2 = 2,
+	/* codec #3 */
+	COMPRESSION_TYPE_3 = 3,
+	/* no compression; implicit length = hunkbytes */
+	COMPRESSION_NONE = 4,
+	/* same as another block in this chd */
+	COMPRESSION_SELF = 5,
+	/* same as a hunk's worth of units in the parent chd */
+	COMPRESSION_PARENT = 6,
+
+	/* start of small RLE run (4-bit length)
+	 * these additional pseudo-types are used for compressed encodings: */
+	COMPRESSION_RLE_SMALL,
+	/* start of large RLE run (8-bit length) */
+	COMPRESSION_RLE_LARGE,
+	/* same as the last COMPRESSION_SELF block */
+	COMPRESSION_SELF_0,
+	/* same as the last COMPRESSION_SELF block + 1 */
+	COMPRESSION_SELF_1,
+	/* same block in the parent */
+	COMPRESSION_PARENT_SELF,
+	/* same as the last COMPRESSION_PARENT block */
+	COMPRESSION_PARENT_0,
+	/* same as the last COMPRESSION_PARENT block + 1 */
+	COMPRESSION_PARENT_1
+};
+
+/***************************************************************************
+    MACROS
+***************************************************************************/
+
+#define EARLY_EXIT(x)				do { (void)(x); goto cleanup; } while (0)
+
+/***************************************************************************
+    TYPE DEFINITIONS
+***************************************************************************/
+
+/* interface to a codec */
+typedef struct _codec_interface codec_interface;
+struct _codec_interface
+{
+	UINT32		compression;								/* type of compression */
+	const char *compname;									/* name of the algorithm */
+	UINT8		lossy;										/* is this a lossy algorithm? */
+	chd_error	(*init)(void *codec, UINT32 hunkbytes);		/* codec initialize */
+	void		(*free)(void *codec);						/* codec free */
+	chd_error	(*decompress)(void *codec, const uint8_t *src, uint32_t complen, uint8_t *dest, uint32_t destlen); /* decompress data */
+	chd_error	(*config)(void *codec, int param, void *config); /* configure */
+};
+
+/* a single map entry */
+typedef struct _map_entry map_entry;
+struct _map_entry
+{
+	UINT64					offset;			/* offset within the file of the data */
+	UINT32					crc;			/* 32-bit CRC of the data */
+	UINT32					length;			/* length of the data */
+	UINT8					flags;			/* misc flags */
+};
+
+/* a single metadata entry */
+typedef struct _metadata_entry metadata_entry;
+struct _metadata_entry
+{
+	UINT64					offset;			/* offset within the file of the header */
+	UINT64					next;			/* offset within the file of the next header */
+	UINT64					prev;			/* offset within the file of the previous header */
+	UINT32					length;			/* length of the metadata */
+	UINT32					metatag;		/* metadata tag */
+	UINT8					flags;			/* flag bits */
+};
+
+/* codec-private data for the ZLIB codec */
+
+typedef struct _zlib_allocator zlib_allocator;
+struct _zlib_allocator
+{
+	UINT32 *				allocptr[MAX_ZLIB_ALLOCS];
+	UINT32 *				allocptr2[MAX_ZLIB_ALLOCS];
+};
+
+typedef struct _zlib_codec_data zlib_codec_data;
+struct _zlib_codec_data
+{
+	z_stream				inflater;
+	zlib_allocator			allocator;
+};
+
+/* codec-private data for the LZMA codec */
+#define MAX_LZMA_ALLOCS 64
+
+typedef struct _lzma_allocator lzma_allocator;
+struct _lzma_allocator
+{
+	void *(*Alloc)(void *p, size_t size);
+ 	void (*Free)(void *p, void *address); /* address can be 0 */
+	void (*FreeSz)(void *p, void *address, size_t size); /* address can be 0 */
+	uint32_t*	allocptr[MAX_LZMA_ALLOCS];
+	uint32_t*	allocptr2[MAX_LZMA_ALLOCS];
+};
+
+typedef struct _lzma_codec_data lzma_codec_data;
+struct _lzma_codec_data
+{
+	CLzmaDec		decoder;
+	lzma_allocator	allocator;
+};
+
+/* codec-private data for the CDZL codec */
+typedef struct _cdzl_codec_data cdzl_codec_data;
+struct _cdzl_codec_data {
+	/* internal state */
+	zlib_codec_data		base_decompressor;
+#ifdef WANT_SUBCODE
+	zlib_codec_data		subcode_decompressor;
+#endif
+	uint8_t*			buffer;
+};
+
+/* codec-private data for the CDLZ codec */
+typedef struct _cdlz_codec_data cdlz_codec_data;
+struct _cdlz_codec_data {
+	/* internal state */
+	lzma_codec_data		base_decompressor;
+#ifdef WANT_SUBCODE
+	zlib_codec_data		subcode_decompressor;
+#endif
+	uint8_t*			buffer;
+};
+
+/* codec-private data for the CDFL codec */
+typedef struct _cdfl_codec_data cdfl_codec_data;
+struct _cdfl_codec_data {
+	/* internal state */
+	int		swap_endian;
+	flac_decoder	decoder;
+#ifdef WANT_SUBCODE
+	zlib_codec_data		subcode_decompressor;
+#endif
+	uint8_t*	buffer;
+};
+
+/* internal representation of an open CHD file */
+struct _chd_file
+{
+	UINT32					cookie;			/* cookie, should equal COOKIE_VALUE */
+
+	core_file *				file;			/* handle to the open core file */
+	UINT8					owns_file;		/* flag indicating if this file should be closed on chd_close() */
+	chd_header				header;			/* header, extracted from file */
+
+	chd_file *				parent;			/* pointer to parent file, or NULL */
+
+	map_entry *				map;			/* array of map entries */
+
+#ifdef NEED_CACHE_HUNK
+	UINT8 *					cache;			/* hunk cache pointer */
+	UINT32					cachehunk;		/* index of currently cached hunk */
+
+	UINT8 *					compare;		/* hunk compare pointer */
+	UINT32					comparehunk;	/* index of current compare data */
+#endif
+
+	UINT8 *					compressed;		/* pointer to buffer for compressed data */
+	const codec_interface *	codecintf[4];	/* interface to the codec */
+
+	zlib_codec_data			zlib_codec_data;		/* zlib codec data */
+	cdzl_codec_data			cdzl_codec_data;		/* cdzl codec data */
+	cdlz_codec_data			cdlz_codec_data;		/* cdlz codec data */
+	cdfl_codec_data			cdfl_codec_data;		/* cdfl codec data */
+
+#ifdef NEED_CACHE_HUNK
+	UINT32					maxhunk;		/* maximum hunk accessed */
+#endif
+
+	UINT8 *					file_cache;		/* cache of underlying file */
+};
+
+
+/***************************************************************************
+    GLOBAL VARIABLES
+***************************************************************************/
+
+static const UINT8 nullmd5[CHD_MD5_BYTES] = { 0 };
+static const UINT8 nullsha1[CHD_SHA1_BYTES] = { 0 };
+
+/***************************************************************************
+    PROTOTYPES
+***************************************************************************/
+
+/* internal header operations */
+static chd_error header_validate(const chd_header *header);
+static chd_error header_read(chd_file *chd, chd_header *header);
+
+/* internal hunk read/write */
+#ifdef NEED_CACHE_HUNK
+static chd_error hunk_read_into_cache(chd_file *chd, UINT32 hunknum);
+#endif
+static chd_error hunk_read_into_memory(chd_file *chd, UINT32 hunknum, UINT8 *dest);
+
+/* internal map access */
+static chd_error map_read(chd_file *chd);
+
+/* metadata management */
+static chd_error metadata_find_entry(chd_file *chd, UINT32 metatag, UINT32 metaindex, metadata_entry *metaentry);
+
+/* zlib compression codec */
+static chd_error zlib_codec_init(void *codec, uint32_t hunkbytes);
+static void zlib_codec_free(void *codec);
+static chd_error zlib_codec_decompress(void *codec, const uint8_t *src, uint32_t complen, uint8_t *dest, uint32_t destlen);
+static voidpf zlib_fast_alloc(voidpf opaque, uInt items, uInt size);
+static void zlib_fast_free(voidpf opaque, voidpf address);
+static void zlib_allocator_free(voidpf opaque);
+
+/* lzma compression codec */
+static chd_error lzma_codec_init(void *codec, uint32_t hunkbytes);
+static void lzma_codec_free(void *codec);
+static chd_error lzma_codec_decompress(void *codec, const uint8_t *src, uint32_t complen, uint8_t *dest, uint32_t destlen);
+
+/* cdzl compression codec */
+static chd_error cdzl_codec_init(void* codec, uint32_t hunkbytes);
+static void cdzl_codec_free(void* codec);
+static chd_error cdzl_codec_decompress(void *codec, const uint8_t *src, uint32_t complen, uint8_t *dest, uint32_t destlen);
+
+/* cdlz compression codec */
+static chd_error cdlz_codec_init(void* codec, uint32_t hunkbytes);
+static void cdlz_codec_free(void* codec);
+static chd_error cdlz_codec_decompress(void *codec, const uint8_t *src, uint32_t complen, uint8_t *dest, uint32_t destlen);
+
+/* cdfl compression codec */
+static chd_error cdfl_codec_init(void* codec, uint32_t hunkbytes);
+static void cdfl_codec_free(void* codec);
+static chd_error cdfl_codec_decompress(void *codec, const uint8_t *src, uint32_t complen, uint8_t *dest, uint32_t destlen);
+
+/***************************************************************************
+ *  LZMA ALLOCATOR HELPER
+ ***************************************************************************
+ */
+
+static void *lzma_fast_alloc(void *p, size_t size);
+static void lzma_fast_free(void *p, void *address);
+
+/*-------------------------------------------------
+ *  lzma_allocator_init
+ *-------------------------------------------------
+ */
+
+static void lzma_allocator_init(void* p)
+{
+	lzma_allocator *codec = (lzma_allocator *)(p);
+
+	/* reset pointer list */
+	memset(codec->allocptr, 0, sizeof(codec->allocptr));
+	memset(codec->allocptr2, 0, sizeof(codec->allocptr2));
+	codec->Alloc = lzma_fast_alloc;
+	codec->Free = lzma_fast_free;
+}
+
+/*-------------------------------------------------
+ *  lzma_allocator_free
+ *-------------------------------------------------
+ */
+
+static void lzma_allocator_free(void* p )
+{
+	int i;
+	lzma_allocator *codec = (lzma_allocator *)(p);
+
+	/* free our memory */
+	for (i = 0 ; i < MAX_LZMA_ALLOCS ; i++)
+	{
+		if (codec->allocptr[i] != NULL)
+			free(codec->allocptr[i]);
+	}
+}
+
+/*-------------------------------------------------
+ *  lzma_fast_alloc - fast malloc for lzma, which
+ *  allocates and frees memory frequently
+ *-------------------------------------------------
+ */
+
+/* Huge alignment values for possible SIMD optimization by compiler (NEON, SSE, AVX) */
+#define LZMA_MIN_ALIGNMENT_BITS 512
+#define LZMA_MIN_ALIGNMENT_BYTES (LZMA_MIN_ALIGNMENT_BITS / 8)
+
+static void *lzma_fast_alloc(void *p, size_t size)
+{
+	int scan;
+	uint32_t *addr        = NULL;
+	lzma_allocator *codec = (lzma_allocator *)(p);
+	uintptr_t vaddr = 0;
+
+	/* compute the size, rounding to the nearest 1k */
+	size = (size + 0x3ff) & ~0x3ff;
+
+	/* reuse a hunk if we can */
+	for (scan = 0; scan < MAX_LZMA_ALLOCS; scan++)
+	{
+		uint32_t *ptr = codec->allocptr[scan];
+		if (ptr != NULL && size == *ptr)
+		{
+			/* set the low bit of the size so we don't match next time */
+			*ptr |= 1;
+
+			/* return aligned address of the block */
+			return codec->allocptr2[scan];
+		}
+	}
+
+	/* alloc a new one and put it into the list */
+	addr = (uint32_t *)malloc(size + sizeof(uint32_t) + LZMA_MIN_ALIGNMENT_BYTES);
+	if (addr==NULL)
+		return NULL;
+	for (scan = 0; scan < MAX_LZMA_ALLOCS; scan++)
+	{
+		if (codec->allocptr[scan] == NULL)
+		{
+			/* store block address */
+			codec->allocptr[scan] = addr;
+
+			/* compute aligned address, store it */
+			vaddr = (uintptr_t)addr;
+			vaddr = (vaddr + sizeof(uint32_t) + (LZMA_MIN_ALIGNMENT_BYTES-1)) & (~(LZMA_MIN_ALIGNMENT_BYTES-1));
+			codec->allocptr2[scan] = (uint32_t*)vaddr;
+			break;
+		}
+	}
+
+	/* set the low bit of the size so we don't match next time */
+	*addr = size | 1;
+
+	/* return aligned address */
+	return (void*)vaddr;
+}
+
+/*-------------------------------------------------
+ *  lzma_fast_free - fast free for lzma, which
+ *  allocates and frees memory frequently
+ *-------------------------------------------------
+ */
+
+static void lzma_fast_free(void *p, void *address)
+{
+	int scan;
+	uint32_t *ptr = NULL;
+	lzma_allocator *codec = NULL;
+
+	if (address == NULL)
+		return;
+
+	codec = (lzma_allocator *)(p);
+
+	/* find the hunk */
+	ptr = (uint32_t *)address;
+	for (scan = 0; scan < MAX_LZMA_ALLOCS; scan++)
+	{
+		if (ptr == codec->allocptr2[scan])
+		{
+			/* clear the low bit of the size to allow matches */
+			*codec->allocptr[scan] &= ~1;
+			return;
+		}
+	}
+}
+
+/***************************************************************************
+ *  LZMA DECOMPRESSOR
+ ***************************************************************************
+ */
+
+/*-------------------------------------------------
+ *  lzma_codec_init - constructor
+ *-------------------------------------------------
+ */
+
+static chd_error lzma_codec_init(void* codec, uint32_t hunkbytes)
+{
+	CLzmaEncHandle enc;
+	CLzmaEncProps encoder_props;
+	Byte decoder_props[LZMA_PROPS_SIZE];
+	SizeT props_size;
+	lzma_allocator* alloc;
+	lzma_codec_data* lzma_codec = (lzma_codec_data*) codec;
+
+	/* construct the decoder */
+	LzmaDec_Construct(&lzma_codec->decoder);
+
+	/* FIXME: this code is written in a way that makes it impossible to safely upgrade the LZMA SDK
+	 * This code assumes that the current version of the encoder imposes the same requirements on the
+	 * decoder as the encoder used to produce the file.  This is not necessarily true.  The format
+	 * needs to be changed so the encoder properties are written to the file.
+
+	 * configure the properties like the compressor did */
+	LzmaEncProps_Init(&encoder_props);
+	encoder_props.level = 9;
+	encoder_props.reduceSize = hunkbytes;
+	LzmaEncProps_Normalize(&encoder_props);
+
+	/* convert to decoder properties */
+	alloc = &lzma_codec->allocator;
+	lzma_allocator_init(alloc);
+	enc = LzmaEnc_Create((ISzAlloc*)alloc);
+	if (!enc)
+		return CHDERR_DECOMPRESSION_ERROR;
+	if (LzmaEnc_SetProps(enc, &encoder_props) != SZ_OK)
+	{
+		LzmaEnc_Destroy(enc, (ISzAlloc*)&alloc, (ISzAlloc*)&alloc);
+		return CHDERR_DECOMPRESSION_ERROR;
+	}
+	props_size = sizeof(decoder_props);
+	if (LzmaEnc_WriteProperties(enc, decoder_props, &props_size) != SZ_OK)
+	{
+		LzmaEnc_Destroy(enc, (ISzAlloc*)alloc, (ISzAlloc*)alloc);
+		return CHDERR_DECOMPRESSION_ERROR;
+	}
+	LzmaEnc_Destroy(enc, (ISzAlloc*)alloc, (ISzAlloc*)alloc);
+
+	/* do memory allocations */
+	if (LzmaDec_Allocate(&lzma_codec->decoder, decoder_props, LZMA_PROPS_SIZE, (ISzAlloc*)alloc) != SZ_OK)
+		return CHDERR_DECOMPRESSION_ERROR;
+
+	/* Okay */
+	return CHDERR_NONE;
+}
+
+/*-------------------------------------------------
+ *  lzma_codec_free
+ *-------------------------------------------------
+ */
+
+static void lzma_codec_free(void* codec)
+{
+	lzma_codec_data* lzma_codec = (lzma_codec_data*) codec;
+
+	/* free memory */
+	LzmaDec_Free(&lzma_codec->decoder, (ISzAlloc*)&lzma_codec->allocator);
+	lzma_allocator_free(&lzma_codec->allocator);
+}
+
+/*-------------------------------------------------
+ *  decompress - decompress data using the LZMA
+ *  codec
+ *-------------------------------------------------
+ */
+
+static chd_error lzma_codec_decompress(void* codec, const uint8_t *src, uint32_t complen, uint8_t *dest, uint32_t destlen)
+{
+	ELzmaStatus status;
+	SRes res;
+	SizeT consumedlen, decodedlen;
+	/* initialize */
+	lzma_codec_data* lzma_codec = (lzma_codec_data*) codec;
+	LzmaDec_Init(&lzma_codec->decoder);
+
+	/* decode */
+	consumedlen = complen;
+	decodedlen = destlen;
+	res = LzmaDec_DecodeToBuf(&lzma_codec->decoder, dest, &decodedlen, src, &consumedlen, LZMA_FINISH_END, &status);
+	if ((res != SZ_OK && res != LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK) || consumedlen != complen || decodedlen != destlen)
+		return CHDERR_DECOMPRESSION_ERROR;
+	return CHDERR_NONE;
+}
+
+/* cdlz */
+static chd_error cdlz_codec_init(void* codec, uint32_t hunkbytes)
+{
+	chd_error ret;
+	cdlz_codec_data* cdlz = (cdlz_codec_data*) codec;
+
+	/* allocate buffer */
+	cdlz->buffer = (uint8_t*)malloc(sizeof(uint8_t) * hunkbytes);
+	if (cdlz->buffer == NULL)
+		return CHDERR_OUT_OF_MEMORY;
+
+	/* make sure the CHD's hunk size is an even multiple of the frame size */
+	ret = lzma_codec_init(&cdlz->base_decompressor, (hunkbytes / CD_FRAME_SIZE) * CD_MAX_SECTOR_DATA);
+	if (ret != CHDERR_NONE)
+		return ret;
+
+#ifdef WANT_SUBCODE
+	ret = zlib_codec_init(&cdlz->subcode_decompressor, (hunkbytes / CD_FRAME_SIZE) * CD_MAX_SUBCODE_DATA);
+	if (ret != CHDERR_NONE)
+		return ret;
+#endif
+
+	if (hunkbytes % CD_FRAME_SIZE != 0)
+		return CHDERR_CODEC_ERROR;
+
+	return CHDERR_NONE;
+}
+
+static void cdlz_codec_free(void* codec)
+{
+	cdlz_codec_data* cdlz = (cdlz_codec_data*) codec;
+	free(cdlz->buffer);
+	lzma_codec_free(&cdlz->base_decompressor);
+#ifdef WANT_SUBCODE
+	zlib_codec_free(&cdlz->subcode_decompressor);
+#endif
+}
+
+static chd_error cdlz_codec_decompress(void *codec, const uint8_t *src, uint32_t complen, uint8_t *dest, uint32_t destlen)
+{
+	uint32_t framenum;
+	cdlz_codec_data* cdlz = (cdlz_codec_data*)codec;
+
+	/* determine header bytes */
+	uint32_t frames = destlen / CD_FRAME_SIZE;
+	uint32_t complen_bytes = (destlen < 65536) ? 2 : 3;
+	uint32_t ecc_bytes = (frames + 7) / 8;
+	uint32_t header_bytes = ecc_bytes + complen_bytes;
+
+	/* extract compressed length of base */
+	uint32_t complen_base = (src[ecc_bytes + 0] << 8) | src[ecc_bytes + 1];
+	if (complen_bytes > 2)
+		complen_base = (complen_base << 8) | src[ecc_bytes + 2];
+
+	/* reset and decode */
+	lzma_codec_decompress(&cdlz->base_decompressor, &src[header_bytes], complen_base, &cdlz->buffer[0], frames * CD_MAX_SECTOR_DATA);
+#ifdef WANT_SUBCODE
+	zlib_codec_decompress(&cdlz->subcode_decompressor, &src[header_bytes + complen_base], complen - complen_base - header_bytes, &cdlz->buffer[frames * CD_MAX_SECTOR_DATA], frames * CD_MAX_SUBCODE_DATA);
+#endif
+
+	/* reassemble the data */
+	for (framenum = 0; framenum < frames; framenum++)
+	{
+		uint8_t *sector;
+
+		memcpy(&dest[framenum * CD_FRAME_SIZE], &cdlz->buffer[framenum * CD_MAX_SECTOR_DATA], CD_MAX_SECTOR_DATA);
+#ifdef WANT_SUBCODE
+		memcpy(&dest[framenum * CD_FRAME_SIZE + CD_MAX_SECTOR_DATA], &cdlz->buffer[frames * CD_MAX_SECTOR_DATA + framenum * CD_MAX_SUBCODE_DATA], CD_MAX_SUBCODE_DATA);
+#endif
+
+#ifdef WANT_RAW_DATA_SECTOR
+		/* reconstitute the ECC data and sync header */
+		sector = (uint8_t *)&dest[framenum * CD_FRAME_SIZE];
+		if ((src[framenum / 8] & (1 << (framenum % 8))) != 0)
+		{
+			memcpy(sector, s_cd_sync_header, sizeof(s_cd_sync_header));
+			ecc_generate(sector);
+		}
+#endif
+	}
+	return CHDERR_NONE;
+}
+
+/* cdzl */
+
+static chd_error cdzl_codec_init(void *codec, uint32_t hunkbytes)
+{
+	chd_error ret;
+	cdzl_codec_data* cdzl = (cdzl_codec_data*)codec;
+
+	/* make sure the CHD's hunk size is an even multiple of the frame size */
+	if (hunkbytes % CD_FRAME_SIZE != 0)
+		return CHDERR_CODEC_ERROR;
+
+	cdzl->buffer = (uint8_t*)malloc(sizeof(uint8_t) * hunkbytes);
+	if (cdzl->buffer == NULL)
+		return CHDERR_OUT_OF_MEMORY;
+
+	ret = zlib_codec_init(&cdzl->base_decompressor, (hunkbytes / CD_FRAME_SIZE) * CD_MAX_SECTOR_DATA);
+	if (ret != CHDERR_NONE)
+		return ret;
+
+#ifdef WANT_SUBCODE
+	ret = zlib_codec_init(&cdzl->subcode_decompressor, (hunkbytes / CD_FRAME_SIZE) * CD_MAX_SUBCODE_DATA);
+	if (ret != CHDERR_NONE)
+		return ret;
+#endif
+
+	return CHDERR_NONE;
+}
+
+static void cdzl_codec_free(void *codec)
+{
+	cdzl_codec_data* cdzl = (cdzl_codec_data*)codec;
+	zlib_codec_free(&cdzl->base_decompressor);
+#ifdef WANT_SUBCODE
+	zlib_codec_free(&cdzl->subcode_decompressor);
+#endif
+	free(cdzl->buffer);
+}
+
+static chd_error cdzl_codec_decompress(void *codec, const uint8_t *src, uint32_t complen, uint8_t *dest, uint32_t destlen)
+{
+	uint32_t framenum;
+	cdzl_codec_data* cdzl = (cdzl_codec_data*)codec;
+
+	/* determine header bytes */
+	uint32_t frames = destlen / CD_FRAME_SIZE;
+	uint32_t complen_bytes = (destlen < 65536) ? 2 : 3;
+	uint32_t ecc_bytes = (frames + 7) / 8;
+	uint32_t header_bytes = ecc_bytes + complen_bytes;
+
+	/* extract compressed length of base */
+	uint32_t complen_base = (src[ecc_bytes + 0] << 8) | src[ecc_bytes + 1];
+	if (complen_bytes > 2)
+		complen_base = (complen_base << 8) | src[ecc_bytes + 2];
+
+	/* reset and decode */
+	zlib_codec_decompress(&cdzl->base_decompressor, &src[header_bytes], complen_base, &cdzl->buffer[0], frames * CD_MAX_SECTOR_DATA);
+#ifdef WANT_SUBCODE
+	zlib_codec_decompress(&cdzl->subcode_decompressor, &src[header_bytes + complen_base], complen - complen_base - header_bytes, &cdzl->buffer[frames * CD_MAX_SECTOR_DATA], frames * CD_MAX_SUBCODE_DATA);
+#endif
+
+	/* reassemble the data */
+	for (framenum = 0; framenum < frames; framenum++)
+	{
+		uint8_t *sector;
+
+		memcpy(&dest[framenum * CD_FRAME_SIZE], &cdzl->buffer[framenum * CD_MAX_SECTOR_DATA], CD_MAX_SECTOR_DATA);
+#ifdef WANT_SUBCODE
+		memcpy(&dest[framenum * CD_FRAME_SIZE + CD_MAX_SECTOR_DATA], &cdzl->buffer[frames * CD_MAX_SECTOR_DATA + framenum * CD_MAX_SUBCODE_DATA], CD_MAX_SUBCODE_DATA);
+#endif
+
+#ifdef WANT_RAW_DATA_SECTOR
+		/* reconstitute the ECC data and sync header */
+		sector = (uint8_t *)&dest[framenum * CD_FRAME_SIZE];
+		if ((src[framenum / 8] & (1 << (framenum % 8))) != 0)
+		{
+			memcpy(sector, s_cd_sync_header, sizeof(s_cd_sync_header));
+			ecc_generate(sector);
+		}
+#endif
+	}
+	return CHDERR_NONE;
+}
+
+/***************************************************************************
+ *  CD FLAC DECOMPRESSOR
+ ***************************************************************************
+ */
+
+/*------------------------------------------------------
+ *  cdfl_codec_blocksize - return the optimal block size
+ *------------------------------------------------------
+ */
+
+static uint32_t cdfl_codec_blocksize(uint32_t bytes)
+{
+	/* determine FLAC block size, which must be 16-65535
+	 * clamp to 2k since that's supposed to be the sweet spot */
+	uint32_t hunkbytes = bytes / 4;
+	while (hunkbytes > 2048)
+		hunkbytes /= 2;
+	return hunkbytes;
+}
+
+static chd_error cdfl_codec_init(void *codec, uint32_t hunkbytes)
+{
+#ifdef WANT_SUBCODE
+	chd_error ret;
+#endif
+	uint16_t native_endian = 0;
+	cdfl_codec_data *cdfl = (cdfl_codec_data*)codec;
+
+	/* make sure the CHD's hunk size is an even multiple of the frame size */
+	if (hunkbytes % CD_FRAME_SIZE != 0)
+		return CHDERR_CODEC_ERROR;
+
+	cdfl->buffer = (uint8_t*)malloc(sizeof(uint8_t) * hunkbytes);
+	if (cdfl->buffer == NULL)
+		return CHDERR_OUT_OF_MEMORY;
+
+	/* determine whether we want native or swapped samples */
+	*(uint8_t *)(&native_endian) = 1;
+	cdfl->swap_endian = (native_endian & 1);
+
+#ifdef WANT_SUBCODE
+	/* init zlib inflater */
+	ret = zlib_codec_init(&cdfl->subcode_decompressor, (hunkbytes / CD_FRAME_SIZE) * CD_MAX_SECTOR_DATA);
+	if (ret != CHDERR_NONE)
+		return ret;
+#endif
+
+	/* flac decoder init */
+	if (flac_decoder_init(&cdfl->decoder))
+		return CHDERR_OUT_OF_MEMORY;
+
+	return CHDERR_NONE;
+}
+
+static void cdfl_codec_free(void *codec)
+{
+	cdfl_codec_data *cdfl = (cdfl_codec_data*)codec;
+	flac_decoder_free(&cdfl->decoder);
+#ifdef WANT_SUBCODE
+	zlib_codec_free(&cdfl->subcode_decompressor);
+#endif
+	if (cdfl->buffer)
+		free(cdfl->buffer);
+}
+
+static chd_error cdfl_codec_decompress(void *codec, const uint8_t *src, uint32_t complen, uint8_t *dest, uint32_t destlen)
+{
+	uint32_t framenum;
+	uint8_t *buffer;
+#ifdef WANT_SUBCODE
+	uint32_t offset;
+	chd_error ret;
+#endif
+	cdfl_codec_data *cdfl = (cdfl_codec_data*)codec;
+
+	/* reset and decode */
+	uint32_t frames = destlen / CD_FRAME_SIZE;
+
+	if (!flac_decoder_reset(&cdfl->decoder, 44100, 2, cdfl_codec_blocksize(frames * CD_MAX_SECTOR_DATA), src, complen))
+		return CHDERR_DECOMPRESSION_ERROR;
+	buffer = &cdfl->buffer[0];
+	if (!flac_decoder_decode_interleaved(&cdfl->decoder, (int16_t *)(buffer), frames * CD_MAX_SECTOR_DATA/4, cdfl->swap_endian))
+		return CHDERR_DECOMPRESSION_ERROR;
+
+#ifdef WANT_SUBCODE
+	/* inflate the subcode data */
+	offset = flac_decoder_finish(&cdfl->decoder);
+	ret = zlib_codec_decompress(&cdfl->subcode_decompressor, src + offset, complen - offset, &cdfl->buffer[frames * CD_MAX_SECTOR_DATA], frames * CD_MAX_SUBCODE_DATA);
+	if (ret != CHDERR_NONE)
+		return ret;
+#else
+	flac_decoder_finish(&cdfl->decoder);
+#endif
+
+	/* reassemble the data */
+	for (framenum = 0; framenum < frames; framenum++)
+	{
+		memcpy(&dest[framenum * CD_FRAME_SIZE], &cdfl->buffer[framenum * CD_MAX_SECTOR_DATA], CD_MAX_SECTOR_DATA);
+#ifdef WANT_SUBCODE
+		memcpy(&dest[framenum * CD_FRAME_SIZE + CD_MAX_SECTOR_DATA], &cdfl->buffer[frames * CD_MAX_SECTOR_DATA + framenum * CD_MAX_SUBCODE_DATA], CD_MAX_SUBCODE_DATA);
+#endif
+	}
+
+	return CHDERR_NONE;
+}
+/***************************************************************************
+    CODEC INTERFACES
+***************************************************************************/
+
+static const codec_interface codec_interfaces[] =
+{
+	/* "none" or no compression */
+	{
+		CHDCOMPRESSION_NONE,
+		"none",
+		FALSE,
+		NULL,
+		NULL,
+		NULL,
+		NULL
+	},
+
+	/* standard zlib compression */
+	{
+		CHDCOMPRESSION_ZLIB,
+		"zlib",
+		FALSE,
+		zlib_codec_init,
+		zlib_codec_free,
+		zlib_codec_decompress,
+		NULL
+	},
+
+	/* zlib+ compression */
+	{
+		CHDCOMPRESSION_ZLIB_PLUS,
+		"zlib+",
+		FALSE,
+		zlib_codec_init,
+		zlib_codec_free,
+		zlib_codec_decompress,
+		NULL
+	},
+
+	/* V5 zlib compression */
+	{
+		CHD_CODEC_ZLIB,
+		"zlib (Deflate)",
+		FALSE,
+		zlib_codec_init,
+		zlib_codec_free,
+		zlib_codec_decompress,
+		NULL
+	},
+
+	/* V5 CD zlib compression */
+	{
+		CHD_CODEC_CD_ZLIB,
+		"cdzl (CD Deflate)",
+		FALSE,
+		cdzl_codec_init,
+		cdzl_codec_free,
+		cdzl_codec_decompress,
+		NULL
+	},
+
+	/* V5 CD lzma compression */
+	{
+		CHD_CODEC_CD_LZMA,
+		"cdlz (CD LZMA)",
+		FALSE,
+		cdlz_codec_init,
+		cdlz_codec_free,
+		cdlz_codec_decompress,
+		NULL
+	},
+
+	/* V5 CD flac compression */
+	{
+		CHD_CODEC_CD_FLAC,
+		"cdfl (CD FLAC)",
+		FALSE,
+		cdfl_codec_init,
+		cdfl_codec_free,
+		cdfl_codec_decompress,
+		NULL
+	},
+};
+
+/***************************************************************************
+    INLINE FUNCTIONS
+***************************************************************************/
+
+/*-------------------------------------------------
+    get_bigendian_uint64 - fetch a UINT64 from
+    the data stream in bigendian order
+-------------------------------------------------*/
+
+static inline UINT64 get_bigendian_uint64(const UINT8 *base)
+{
+	return ((UINT64)base[0] << 56) | ((UINT64)base[1] << 48) | ((UINT64)base[2] << 40) | ((UINT64)base[3] << 32) |
+			((UINT64)base[4] << 24) | ((UINT64)base[5] << 16) | ((UINT64)base[6] << 8) | (UINT64)base[7];
+}
+
+/*-------------------------------------------------
+    put_bigendian_uint64 - write a UINT64 to
+    the data stream in bigendian order
+-------------------------------------------------*/
+
+static inline void put_bigendian_uint64(UINT8 *base, UINT64 value)
+{
+	base[0] = value >> 56;
+	base[1] = value >> 48;
+	base[2] = value >> 40;
+	base[3] = value >> 32;
+	base[4] = value >> 24;
+	base[5] = value >> 16;
+	base[6] = value >> 8;
+	base[7] = value;
+}
+
+/*-------------------------------------------------
+    get_bigendian_uint48 - fetch a UINT48 from
+    the data stream in bigendian order
+-------------------------------------------------*/
+
+static inline UINT64 get_bigendian_uint48(const UINT8 *base)
+{
+	return  ((UINT64)base[0] << 40) | ((UINT64)base[1] << 32) |
+			((UINT64)base[2] << 24) | ((UINT64)base[3] << 16) | ((UINT64)base[4] << 8) | (UINT64)base[5];
+}
+
+/*-------------------------------------------------
+    put_bigendian_uint48 - write a UINT48 to
+    the data stream in bigendian order
+-------------------------------------------------*/
+
+static inline void put_bigendian_uint48(UINT8 *base, UINT64 value)
+{
+	value &= 0xffffffffffff;
+	base[0] = value >> 40;
+	base[1] = value >> 32;
+	base[2] = value >> 24;
+	base[3] = value >> 16;
+	base[4] = value >> 8;
+	base[5] = value;
+}
+/*-------------------------------------------------
+    get_bigendian_uint32 - fetch a UINT32 from
+    the data stream in bigendian order
+-------------------------------------------------*/
+
+static inline UINT32 get_bigendian_uint32(const UINT8 *base)
+{
+	return (base[0] << 24) | (base[1] << 16) | (base[2] << 8) | base[3];
+}
+
+/*-------------------------------------------------
+    put_bigendian_uint32 - write a UINT32 to
+    the data stream in bigendian order
+-------------------------------------------------*/
+
+static inline void put_bigendian_uint32(UINT8 *base, UINT32 value)
+{
+	base[0] = value >> 24;
+	base[1] = value >> 16;
+	base[2] = value >> 8;
+	base[3] = value;
+}
+
+/*-------------------------------------------------
+    put_bigendian_uint24 - write a UINT24 to
+    the data stream in bigendian order
+-------------------------------------------------*/
+
+static inline void put_bigendian_uint24(UINT8 *base, UINT32 value)
+{
+	value &= 0xffffff;
+	base[0] = value >> 16;
+	base[1] = value >> 8;
+	base[2] = value;
+}
+
+/*-------------------------------------------------
+    get_bigendian_uint24 - fetch a UINT24 from
+    the data stream in bigendian order
+-------------------------------------------------*/
+
+static inline UINT32 get_bigendian_uint24(const UINT8 *base)
+{
+	return (base[0] << 16) | (base[1] << 8) | base[2];
+}
+
+/*-------------------------------------------------
+    get_bigendian_uint16 - fetch a UINT16 from
+    the data stream in bigendian order
+-------------------------------------------------*/
+
+static inline UINT16 get_bigendian_uint16(const UINT8 *base)
+{
+	return (base[0] << 8) | base[1];
+}
+
+/*-------------------------------------------------
+    put_bigendian_uint16 - write a UINT16 to
+    the data stream in bigendian order
+-------------------------------------------------*/
+
+static inline void put_bigendian_uint16(UINT8 *base, UINT16 value)
+{
+	base[0] = value >> 8;
+	base[1] = value;
+}
+
+/*-------------------------------------------------
+    map_extract - extract a single map
+    entry from the datastream
+-------------------------------------------------*/
+
+static inline void map_extract(const UINT8 *base, map_entry *entry)
+{
+	entry->offset = get_bigendian_uint64(&base[0]);
+	entry->crc = get_bigendian_uint32(&base[8]);
+	entry->length = get_bigendian_uint16(&base[12]) | (base[14] << 16);
+	entry->flags = base[15];
+}
+
+/*-------------------------------------------------
+    map_assemble - write a single map
+    entry to the datastream
+-------------------------------------------------*/
+
+static inline void map_assemble(UINT8 *base, map_entry *entry)
+{
+	put_bigendian_uint64(&base[0], entry->offset);
+	put_bigendian_uint32(&base[8], entry->crc);
+	put_bigendian_uint16(&base[12], entry->length);
+	base[14] = entry->length >> 16;
+	base[15] = entry->flags;
+}
+
+/*-------------------------------------------------
+    map_size_v5 - calculate CHDv5 map size
+-------------------------------------------------*/
+static inline int map_size_v5(chd_header* header)
+{
+	return header->hunkcount * header->mapentrybytes;
+}
+
+/*-------------------------------------------------
+    crc16 - calculate CRC16 (from hashing.cpp)
+-------------------------------------------------*/
+uint16_t crc16(const void *data, uint32_t length)
+{
+	uint16_t crc = 0xffff;
+
+	static const uint16_t s_table[256] =
+	{
+		0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
+		0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
+		0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
+		0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
+		0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
+		0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
+		0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
+		0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
+		0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
+		0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
+		0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
+		0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
+		0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
+		0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
+		0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
+		0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
+		0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
+		0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
+		0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
+		0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
+		0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
+		0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
+		0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
+		0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
+		0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
+		0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
+		0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
+		0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
+		0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
+		0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
+		0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
+		0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0
+	};
+
+	const uint8_t *src = (uint8_t*)data;
+
+	/* fetch the current value into a local and rip through the source data */
+	while (length-- != 0)
+		crc = (crc << 8) ^ s_table[(crc >> 8) ^ *src++];
+	return crc;
+}
+
+/*-------------------------------------------------
+	compressed - test if CHD file is compressed
++-------------------------------------------------*/
+static inline int chd_compressed(chd_header* header) {
+	return header->compression[0] != CHD_CODEC_NONE;
+}
+
+/*-------------------------------------------------
+	decompress_v5_map - decompress the v5 map
+-------------------------------------------------*/
+
+static chd_error decompress_v5_map(chd_file* chd, chd_header* header)
+{
+	int result = 0;
+	int hunknum;
+	int repcount = 0;
+	uint8_t lastcomp = 0;
+	uint32_t last_self = 0;
+	uint64_t last_parent = 0;
+	struct bitstream* bitbuf;
+	uint32_t mapbytes;
+	uint64_t firstoffs;
+	uint16_t mapcrc;
+	uint8_t lengthbits;
+	uint8_t selfbits;
+	uint8_t parentbits;
+	uint8_t *compressed_ptr;
+	uint8_t rawbuf[16];
+	struct huffman_decoder* decoder;
+	enum huffman_error err;
+	uint64_t curoffset;	
+	int rawmapsize = map_size_v5(header);
+
+	if (!chd_compressed(header))
+	{
+		header->rawmap = (uint8_t*)malloc(rawmapsize);
+		core_fseek(chd->file, header->mapoffset, SEEK_SET);
+		result = core_fread(chd->file, header->rawmap, rawmapsize);
+		return CHDERR_NONE;
+	}
+
+	/* read the reader */
+	core_fseek(chd->file, header->mapoffset, SEEK_SET);
+	result = core_fread(chd->file, rawbuf, sizeof(rawbuf));
+	mapbytes = get_bigendian_uint32(&rawbuf[0]);
+	firstoffs = get_bigendian_uint48(&rawbuf[4]);
+	mapcrc = get_bigendian_uint16(&rawbuf[10]);
+	lengthbits = rawbuf[12];
+	selfbits = rawbuf[13];
+	parentbits = rawbuf[14];
+
+	/* now read the map */
+	compressed_ptr = (uint8_t*)malloc(sizeof(uint8_t) * mapbytes);
+	core_fseek(chd->file, header->mapoffset + 16, SEEK_SET);
+	result = core_fread(chd->file, compressed_ptr, mapbytes);
+	bitbuf = create_bitstream(compressed_ptr, sizeof(uint8_t) * mapbytes);
+	header->rawmap = (uint8_t*)malloc(rawmapsize);
+
+	/* first decode the compression types */
+	decoder = create_huffman_decoder(16, 8);
+	if (decoder == NULL)
+	{
+		free(compressed_ptr);
+		free(bitbuf);
+		return CHDERR_OUT_OF_MEMORY;
+	}
+
+	err = huffman_import_tree_rle(decoder, bitbuf);
+	if (err != HUFFERR_NONE)
+	{
+		free(compressed_ptr);
+		free(bitbuf);
+		delete_huffman_decoder(decoder);
+		return CHDERR_DECOMPRESSION_ERROR;
+	}
+
+	for (hunknum = 0; hunknum < header->hunkcount; hunknum++)
+	{
+		uint8_t *rawmap = header->rawmap + (hunknum * 12);
+		if (repcount > 0)
+			rawmap[0] = lastcomp, repcount--;
+		else
+		{
+			uint8_t val = huffman_decode_one(decoder, bitbuf);
+			if (val == COMPRESSION_RLE_SMALL)
+				rawmap[0] = lastcomp, repcount = 2 + huffman_decode_one(decoder, bitbuf);
+			else if (val == COMPRESSION_RLE_LARGE)
+				rawmap[0] = lastcomp, repcount = 2 + 16 + (huffman_decode_one(decoder, bitbuf) << 4), repcount += huffman_decode_one(decoder, bitbuf);
+			else
+				rawmap[0] = lastcomp = val;
+		}
+	}
+
+	/* then iterate through the hunks and extract the needed data */
+	curoffset = firstoffs;
+	for (hunknum = 0; hunknum < header->hunkcount; hunknum++)
+	{
+		uint8_t *rawmap = header->rawmap + (hunknum * 12);
+		uint64_t offset = curoffset;
+		uint32_t length = 0;
+		uint16_t crc = 0;
+		switch (rawmap[0])
+		{
+			/* base types */
+			case COMPRESSION_TYPE_0:
+			case COMPRESSION_TYPE_1:
+			case COMPRESSION_TYPE_2:
+			case COMPRESSION_TYPE_3:
+				curoffset += length = bitstream_read(bitbuf, lengthbits);
+				crc = bitstream_read(bitbuf, 16);
+				break;
+
+			case COMPRESSION_NONE:
+				curoffset += length = header->hunkbytes;
+				crc = bitstream_read(bitbuf, 16);
+				break;
+
+			case COMPRESSION_SELF:
+				last_self = offset = bitstream_read(bitbuf, selfbits);
+				break;
+
+			case COMPRESSION_PARENT:
+				offset = bitstream_read(bitbuf, parentbits);
+				last_parent = offset;
+				break;
+
+			/* pseudo-types; convert into base types */
+			case COMPRESSION_SELF_1:
+				last_self++;
+			case COMPRESSION_SELF_0:
+				rawmap[0] = COMPRESSION_SELF;
+				offset = last_self;
+				break;
+
+			case COMPRESSION_PARENT_SELF:
+				rawmap[0] = COMPRESSION_PARENT;
+				last_parent = offset = ( ((uint64_t)hunknum) * ((uint64_t)header->hunkbytes) ) / header->unitbytes;
+				break;
+
+			case COMPRESSION_PARENT_1:
+				last_parent += header->hunkbytes / header->unitbytes;
+			case COMPRESSION_PARENT_0:
+				rawmap[0] = COMPRESSION_PARENT;
+				offset = last_parent;
+				break;
+		}
+		/* UINT24 length */
+		put_bigendian_uint24(&rawmap[1], length);
+
+		/* UINT48 offset */
+		put_bigendian_uint48(&rawmap[4], offset);
+
+		/* crc16 */
+		put_bigendian_uint16(&rawmap[10], crc);
+	}
+
+	/* free memory */
+	free(compressed_ptr);
+	free(bitbuf);
+	delete_huffman_decoder(decoder);
+
+	/* verify the final CRC */
+	if (crc16(&header->rawmap[0], header->hunkcount * 12) != mapcrc)
+		return CHDERR_DECOMPRESSION_ERROR;
+
+	return CHDERR_NONE;
+}
+
+/*-------------------------------------------------
+    map_extract_old - extract a single map
+    entry in old format from the datastream
+-------------------------------------------------*/
+
+static inline void map_extract_old(const UINT8 *base, map_entry *entry, UINT32 hunkbytes)
+{
+	entry->offset = get_bigendian_uint64(&base[0]);
+	entry->crc = 0;
+	entry->length = entry->offset >> 44;
+	entry->flags = MAP_ENTRY_FLAG_NO_CRC | ((entry->length == hunkbytes) ? V34_MAP_ENTRY_TYPE_UNCOMPRESSED : V34_MAP_ENTRY_TYPE_COMPRESSED);
+#ifdef __MWERKS__
+	entry->offset = entry->offset & 0x00000FFFFFFFFFFFLL;
+#else
+	entry->offset = (entry->offset << 20) >> 20;
+#endif
+}
+
+/***************************************************************************
+    CHD FILE MANAGEMENT
+***************************************************************************/
+
+/*-------------------------------------------------
+    chd_open_file - open a CHD file for access
+-------------------------------------------------*/
+
+CHD_EXPORT chd_error chd_open_file(core_file *file, int mode, chd_file *parent, chd_file **chd)
+{
+	chd_file *newchd = NULL;
+	chd_error err;
+	int intfnum;
+
+	/* verify parameters */
+	if (file == NULL)
+		EARLY_EXIT(err = CHDERR_INVALID_PARAMETER);
+
+	/* punt if invalid parent */
+	if (parent != NULL && parent->cookie != COOKIE_VALUE)
+		EARLY_EXIT(err = CHDERR_INVALID_PARAMETER);
+
+	/* allocate memory for the final result */
+	newchd = (chd_file *)malloc(sizeof(**chd));
+	if (newchd == NULL)
+		EARLY_EXIT(err = CHDERR_OUT_OF_MEMORY);
+	memset(newchd, 0, sizeof(*newchd));
+	newchd->cookie = COOKIE_VALUE;
+	newchd->parent = parent;
+	newchd->file = file;
+
+	/* now attempt to read the header */
+	err = header_read(newchd, &newchd->header);
+	if (err != CHDERR_NONE)
+		EARLY_EXIT(err);
+
+	/* validate the header */
+	err = header_validate(&newchd->header);
+	if (err != CHDERR_NONE)
+		EARLY_EXIT(err);
+
+	/* make sure we don't open a read-only file writeable */
+	if (mode == CHD_OPEN_READWRITE && !(newchd->header.flags & CHDFLAGS_IS_WRITEABLE))
+		EARLY_EXIT(err = CHDERR_FILE_NOT_WRITEABLE);
+
+	/* also, never open an older version writeable */
+	if (mode == CHD_OPEN_READWRITE && newchd->header.version < CHD_HEADER_VERSION)
+		EARLY_EXIT(err = CHDERR_UNSUPPORTED_VERSION);
+
+	/* if we need a parent, make sure we have one */
+	if (parent == NULL)
+	{
+		/* Detect parent requirement for versions below 5 */
+		if (newchd->header.version < 5 && newchd->header.flags & CHDFLAGS_HAS_PARENT)
+			EARLY_EXIT(err = CHDERR_REQUIRES_PARENT);
+		/* Detection for version 5 and above - if parentsha1 != 0, we have a parent */
+		else if (newchd->header.version >= 5 && memcmp(nullsha1, newchd->header.parentsha1, sizeof(newchd->header.parentsha1)) != 0)
+			EARLY_EXIT(err = CHDERR_REQUIRES_PARENT);
+	}
+
+	/* make sure we have a valid parent */
+	if (parent != NULL)
+	{
+		/* check MD5 if it isn't empty */
+		if (memcmp(nullmd5, newchd->header.parentmd5, sizeof(newchd->header.parentmd5)) != 0 &&
+			memcmp(nullmd5, newchd->parent->header.md5, sizeof(newchd->parent->header.md5)) != 0 &&
+			memcmp(newchd->parent->header.md5, newchd->header.parentmd5, sizeof(newchd->header.parentmd5)) != 0)
+			EARLY_EXIT(err = CHDERR_INVALID_PARENT);
+
+		/* check SHA1 if it isn't empty */
+		if (memcmp(nullsha1, newchd->header.parentsha1, sizeof(newchd->header.parentsha1)) != 0 &&
+			memcmp(nullsha1, newchd->parent->header.sha1, sizeof(newchd->parent->header.sha1)) != 0 &&
+			memcmp(newchd->parent->header.sha1, newchd->header.parentsha1, sizeof(newchd->header.parentsha1)) != 0)
+			EARLY_EXIT(err = CHDERR_INVALID_PARENT);
+	}
+
+	/* now read the hunk map */
+	if (newchd->header.version < 5)
+	{
+		err = map_read(newchd);
+		if (err != CHDERR_NONE)
+			EARLY_EXIT(err);
+	}
+	else
+	{
+		err = decompress_v5_map(newchd, &(newchd->header));
+	}
+	if (err != CHDERR_NONE)
+		EARLY_EXIT(err);
+
+#ifdef NEED_CACHE_HUNK
+	/* allocate and init the hunk cache */
+	newchd->cache = (UINT8 *)malloc(newchd->header.hunkbytes);
+	newchd->compare = (UINT8 *)malloc(newchd->header.hunkbytes);
+	if (newchd->cache == NULL || newchd->compare == NULL)
+		EARLY_EXIT(err = CHDERR_OUT_OF_MEMORY);
+	newchd->cachehunk = ~0;
+	newchd->comparehunk = ~0;
+#endif
+
+	/* allocate the temporary compressed buffer */
+	newchd->compressed = (UINT8 *)malloc(newchd->header.hunkbytes);
+	if (newchd->compressed == NULL)
+		EARLY_EXIT(err = CHDERR_OUT_OF_MEMORY);
+
+	/* find the codec interface */
+	if (newchd->header.version < 5)
+	{
+		for (intfnum = 0; intfnum < ARRAY_LENGTH(codec_interfaces); intfnum++)
+		{
+			if (codec_interfaces[intfnum].compression == newchd->header.compression[0])
+			{
+				newchd->codecintf[0] = &codec_interfaces[intfnum];
+				break;
+			}
+		}
+
+		if (intfnum == ARRAY_LENGTH(codec_interfaces))
+			EARLY_EXIT(err = CHDERR_UNSUPPORTED_FORMAT);
+
+		/* initialize the codec */
+		if (newchd->codecintf[0]->init != NULL)
+		{
+			err = (*newchd->codecintf[0]->init)(&newchd->zlib_codec_data, newchd->header.hunkbytes);
+			if (err != CHDERR_NONE)
+				EARLY_EXIT(err);
+		}
+	}
+	else
+	{
+		int decompnum;
+		/* verify the compression types and initialize the codecs */
+		for (decompnum = 0; decompnum < ARRAY_LENGTH(newchd->header.compression); decompnum++)
+		{
+			int i;
+			for (i = 0 ; i < ARRAY_LENGTH(codec_interfaces) ; i++)
+			{
+				if (codec_interfaces[i].compression == newchd->header.compression[decompnum])
+				{
+					newchd->codecintf[decompnum] = &codec_interfaces[i];
+					break;
+				}
+			}
+
+			if (newchd->codecintf[decompnum] == NULL && newchd->header.compression[decompnum] != 0)
+				EARLY_EXIT(err = CHDERR_UNSUPPORTED_FORMAT);
+
+			/* initialize the codec */
+			if (newchd->codecintf[decompnum]->init != NULL)
+			{
+				void* codec = NULL;
+				switch (newchd->header.compression[decompnum])
+				{
+					case CHD_CODEC_ZLIB:
+						codec = &newchd->zlib_codec_data;
+						break;
+
+					case CHD_CODEC_CD_ZLIB:
+						codec = &newchd->cdzl_codec_data;
+						break;
+
+					case CHD_CODEC_CD_LZMA:
+						codec = &newchd->cdlz_codec_data;
+						break;
+
+					case CHD_CODEC_CD_FLAC:
+						codec = &newchd->cdfl_codec_data;
+						break;
+				}
+
+				if (codec == NULL)
+					EARLY_EXIT(err = CHDERR_UNSUPPORTED_FORMAT);
+
+				err = (*newchd->codecintf[decompnum]->init)(codec, newchd->header.hunkbytes);
+				if (err != CHDERR_NONE)
+					EARLY_EXIT(err);
+			}
+		}
+	}
+
+	/* all done */
+	*chd = newchd;
+	return CHDERR_NONE;
+
+cleanup:
+	if (newchd != NULL)
+		chd_close(newchd);
+	return err;
+}
+
+/*-------------------------------------------------
+    chd_precache - precache underlying file in
+    memory
+-------------------------------------------------*/
+
+CHD_EXPORT chd_error chd_precache(chd_file *chd)
+{
+#ifdef _MSC_VER
+	size_t size, count;
+#else
+	ssize_t size, count;
+#endif
+
+	if (chd->file_cache == NULL)
+	{
+		core_fseek(chd->file, 0, SEEK_END);
+		size = core_ftell(chd->file);
+		if (size <= 0)
+			return CHDERR_INVALID_DATA;
+		chd->file_cache = malloc(size);
+		if (chd->file_cache == NULL)
+			return CHDERR_OUT_OF_MEMORY;
+		core_fseek(chd->file, 0, SEEK_SET);
+		count = core_fread(chd->file, chd->file_cache, size);
+		if (count != size)
+		{
+			free(chd->file_cache);
+			chd->file_cache = NULL;
+			return CHDERR_READ_ERROR;
+		}
+	}
+
+	return CHDERR_NONE;
+}
+
+/*-------------------------------------------------
+    chd_open - open a CHD file by
+    filename
+-------------------------------------------------*/
+
+CHD_EXPORT chd_error chd_open(const char *filename, int mode, chd_file *parent, chd_file **chd)
+{
+	chd_error err;
+	core_file *file = NULL;
+
+	/* choose the proper mode */
+	switch(mode)
+	{
+		case CHD_OPEN_READ:
+			break;
+
+		default:
+			err = CHDERR_INVALID_PARAMETER;
+			goto cleanup;
+	}
+
+	/* open the file */
+	file = core_fopen(filename);
+	if (file == 0)
+	{
+		err = CHDERR_FILE_NOT_FOUND;
+		goto cleanup;
+	}
+
+	/* now open the CHD */
+	err = chd_open_file(file, mode, parent, chd);
+	if (err != CHDERR_NONE)
+		goto cleanup;
+
+	/* we now own this file */
+	(*chd)->owns_file = TRUE;
+
+cleanup:
+	if ((err != CHDERR_NONE) && (file != NULL))
+		core_fclose(file);
+	return err;
+}
+
+/*-------------------------------------------------
+    chd_close - close a CHD file for access
+-------------------------------------------------*/
+
+CHD_EXPORT void chd_close(chd_file *chd)
+{
+	/* punt if NULL or invalid */
+	if (chd == NULL || chd->cookie != COOKIE_VALUE)
+		return;
+
+	/* deinit the codec */
+	if (chd->header.version < 5)
+	{
+		if (chd->codecintf[0] != NULL && chd->codecintf[0]->free != NULL)
+			(*chd->codecintf[0]->free)(&chd->zlib_codec_data);
+	}
+	else
+	{
+		int i;
+		/* Free the codecs */
+		for (i = 0 ; i < ARRAY_LENGTH(chd->codecintf); i++)
+		{
+			void* codec = NULL;
+
+			if (chd->codecintf[i] == NULL)
+				continue;
+
+			switch (chd->codecintf[i]->compression)
+			{
+				case CHD_CODEC_CD_LZMA:
+					codec = &chd->cdlz_codec_data;
+					break;
+
+				case CHD_CODEC_ZLIB:
+					codec = &chd->zlib_codec_data;
+					break;
+
+				case CHD_CODEC_CD_ZLIB:
+					codec = &chd->cdzl_codec_data;
+					break;
+
+				case CHD_CODEC_CD_FLAC:
+					codec = &chd->cdfl_codec_data;
+					break;
+			}
+
+			if (codec)
+			{
+				(*chd->codecintf[i]->free)(codec);
+			}
+		}
+
+		/* Free the raw map */
+		if (chd->header.rawmap != NULL)
+			free(chd->header.rawmap);
+	}
+
+	/* free the compressed data buffer */
+	if (chd->compressed != NULL)
+		free(chd->compressed);
+
+#ifdef NEED_CACHE_HUNK
+	/* free the hunk cache and compare data */
+	if (chd->compare != NULL)
+		free(chd->compare);
+	if (chd->cache != NULL)
+		free(chd->cache);
+#endif
+
+	/* free the hunk map */
+	if (chd->map != NULL)
+		free(chd->map);
+
+	/* close the file */
+	if (chd->owns_file && chd->file != NULL)
+		core_fclose(chd->file);
+
+#ifdef NEED_CACHE_HUNK
+	if (PRINTF_MAX_HUNK) printf("Max hunk = %d/%d\n", chd->maxhunk, chd->header.totalhunks);
+#endif
+	if (chd->file_cache)
+		free(chd->file_cache);
+
+	if (chd->parent)
+		chd_close(chd->parent);
+
+	/* free our memory */
+	free(chd);
+}
+
+/*-------------------------------------------------
+    chd_core_file - return the associated
+    core_file
+-------------------------------------------------*/
+
+CHD_EXPORT core_file *chd_core_file(chd_file *chd)
+{
+	return chd->file;
+}
+
+/*-------------------------------------------------
+    chd_error_string - return an error string for
+    the given CHD error
+-------------------------------------------------*/
+
+CHD_EXPORT const char *chd_error_string(chd_error err)
+{
+	switch (err)
+	{
+		case CHDERR_NONE:						return "no error";
+		case CHDERR_NO_INTERFACE:				return "no drive interface";
+		case CHDERR_OUT_OF_MEMORY:				return "out of memory";
+		case CHDERR_INVALID_FILE:				return "invalid file";
+		case CHDERR_INVALID_PARAMETER:			return "invalid parameter";
+		case CHDERR_INVALID_DATA:				return "invalid data";
+		case CHDERR_FILE_NOT_FOUND:				return "file not found";
+		case CHDERR_REQUIRES_PARENT:			return "requires parent";
+		case CHDERR_FILE_NOT_WRITEABLE:			return "file not writeable";
+		case CHDERR_READ_ERROR:					return "read error";
+		case CHDERR_WRITE_ERROR:				return "write error";
+		case CHDERR_CODEC_ERROR:				return "codec error";
+		case CHDERR_INVALID_PARENT:				return "invalid parent";
+		case CHDERR_HUNK_OUT_OF_RANGE:			return "hunk out of range";
+		case CHDERR_DECOMPRESSION_ERROR:		return "decompression error";
+		case CHDERR_COMPRESSION_ERROR:			return "compression error";
+		case CHDERR_CANT_CREATE_FILE:			return "can't create file";
+		case CHDERR_CANT_VERIFY:				return "can't verify file";
+		case CHDERR_NOT_SUPPORTED:				return "operation not supported";
+		case CHDERR_METADATA_NOT_FOUND:			return "can't find metadata";
+		case CHDERR_INVALID_METADATA_SIZE:		return "invalid metadata size";
+		case CHDERR_UNSUPPORTED_VERSION:		return "unsupported CHD version";
+		case CHDERR_VERIFY_INCOMPLETE:			return "incomplete verify";
+		case CHDERR_INVALID_METADATA:			return "invalid metadata";
+		case CHDERR_INVALID_STATE:				return "invalid state";
+		case CHDERR_OPERATION_PENDING:			return "operation pending";
+		case CHDERR_NO_ASYNC_OPERATION:			return "no async operation in progress";
+		case CHDERR_UNSUPPORTED_FORMAT:			return "unsupported format";
+		default:								return "undocumented error";
+	}
+}
+
+/***************************************************************************
+    CHD HEADER MANAGEMENT
+***************************************************************************/
+
+/*-------------------------------------------------
+    chd_get_header - return a pointer to the
+    extracted header data
+-------------------------------------------------*/
+
+CHD_EXPORT const chd_header *chd_get_header(chd_file *chd)
+{
+	/* punt if NULL or invalid */
+	if (chd == NULL || chd->cookie != COOKIE_VALUE)
+		return NULL;
+
+	return &chd->header;
+}
+
+/*-------------------------------------------------
+    chd_read_header - read CHD header data
+	from file into the pointed struct
+-------------------------------------------------*/
+CHD_EXPORT chd_error chd_read_header(const char *filename, chd_header *header)
+{
+	chd_error err = CHDERR_NONE;
+	chd_file chd;
+
+	/* punt if NULL */
+	if (filename == NULL || header == NULL)
+		EARLY_EXIT(err = CHDERR_INVALID_PARAMETER);
+
+	/* open the file */
+	chd.file = core_fopen(filename);
+	if (chd.file == NULL)
+		EARLY_EXIT(err = CHDERR_FILE_NOT_FOUND);
+
+	/* attempt to read the header */
+	err = header_read(&chd, header);
+	if (err != CHDERR_NONE)
+		EARLY_EXIT(err);
+
+	/* validate the header */
+	err = header_validate(header);
+	if (err != CHDERR_NONE)
+		EARLY_EXIT(err);
+
+cleanup:
+	if (chd.file != NULL)
+		core_fclose(chd.file);
+
+	return err;
+}
+
+/***************************************************************************
+    CORE DATA READ/WRITE
+***************************************************************************/
+
+/*-------------------------------------------------
+    chd_read - read a single hunk from the CHD
+    file
+-------------------------------------------------*/
+
+CHD_EXPORT chd_error chd_read(chd_file *chd, UINT32 hunknum, void *buffer)
+{
+	/* punt if NULL or invalid */
+	if (chd == NULL || chd->cookie != COOKIE_VALUE)
+		return CHDERR_INVALID_PARAMETER;
+
+	/* if we're past the end, fail */
+	if (hunknum >= chd->header.totalhunks)
+		return CHDERR_HUNK_OUT_OF_RANGE;
+
+	/* perform the read */
+	return hunk_read_into_memory(chd, hunknum, (UINT8 *)buffer);
+}
+
+/***************************************************************************
+    METADATA MANAGEMENT
+***************************************************************************/
+
+/*-------------------------------------------------
+    chd_get_metadata - get the indexed metadata
+    of the given type
+-------------------------------------------------*/
+
+CHD_EXPORT chd_error chd_get_metadata(chd_file *chd, UINT32 searchtag, UINT32 searchindex, void *output, UINT32 outputlen, UINT32 *resultlen, UINT32 *resulttag, UINT8 *resultflags)
+{
+	metadata_entry metaentry;
+	chd_error err;
+	UINT32 count;
+
+	/* if we didn't find it, just return */
+	err = metadata_find_entry(chd, searchtag, searchindex, &metaentry);
+	if (err != CHDERR_NONE)
+	{
+		/* unless we're an old version and they are requesting hard disk metadata */
+		if (chd->header.version < 3 && (searchtag == HARD_DISK_METADATA_TAG || searchtag == CHDMETATAG_WILDCARD) && searchindex == 0)
+		{
+			char faux_metadata[256];
+			UINT32 faux_length;
+
+			/* fill in the faux metadata */
+			sprintf(faux_metadata, HARD_DISK_METADATA_FORMAT, chd->header.obsolete_cylinders, chd->header.obsolete_heads, chd->header.obsolete_sectors, chd->header.hunkbytes / chd->header.obsolete_hunksize);
+			faux_length = (UINT32)strlen(faux_metadata) + 1;
+
+			/* copy the metadata itself */
+			memcpy(output, faux_metadata, MIN(outputlen, faux_length));
+
+			/* return the length of the data and the tag */
+			if (resultlen != NULL)
+				*resultlen = faux_length;
+			if (resulttag != NULL)
+				*resulttag = HARD_DISK_METADATA_TAG;
+			return CHDERR_NONE;
+		}
+		return err;
+	}
+
+	/* read the metadata */
+	outputlen = MIN(outputlen, metaentry.length);
+	core_fseek(chd->file, metaentry.offset + METADATA_HEADER_SIZE, SEEK_SET);
+	count = core_fread(chd->file, output, outputlen);
+	if (count != outputlen)
+		return CHDERR_READ_ERROR;
+
+	/* return the length of the data and the tag */
+	if (resultlen != NULL)
+		*resultlen = metaentry.length;
+	if (resulttag != NULL)
+		*resulttag = metaentry.metatag;
+	if (resultflags != NULL)
+		*resultflags = metaentry.flags;
+	return CHDERR_NONE;
+}
+
+/***************************************************************************
+    CODEC INTERFACES
+***************************************************************************/
+
+/*-------------------------------------------------
+    chd_codec_config - set internal codec
+    parameters
+-------------------------------------------------*/
+
+CHD_EXPORT chd_error chd_codec_config(chd_file *chd, int param, void *config)
+{
+	return CHDERR_INVALID_PARAMETER;
+}
+
+/*-------------------------------------------------
+    chd_get_codec_name - get the name of a
+    particular codec
+-------------------------------------------------*/
+
+CHD_EXPORT const char *chd_get_codec_name(UINT32 codec)
+{
+	return "Unknown";
+}
+
+/***************************************************************************
+    INTERNAL HEADER OPERATIONS
+***************************************************************************/
+
+/*-------------------------------------------------
+    header_validate - check the validity of a
+    CHD header
+-------------------------------------------------*/
+
+static chd_error header_validate(const chd_header *header)
+{
+	int intfnum;
+
+	/* require a valid version */
+	if (header->version == 0 || header->version > CHD_HEADER_VERSION)
+		return CHDERR_UNSUPPORTED_VERSION;
+
+	/* require a valid length */
+	if ((header->version == 1 && header->length != CHD_V1_HEADER_SIZE) ||
+		(header->version == 2 && header->length != CHD_V2_HEADER_SIZE) ||
+		(header->version == 3 && header->length != CHD_V3_HEADER_SIZE) ||
+		(header->version == 4 && header->length != CHD_V4_HEADER_SIZE) ||
+		(header->version == 5 && header->length != CHD_V5_HEADER_SIZE))
+		return CHDERR_INVALID_PARAMETER;
+
+	/* Do not validate v5 header */
+	if (header->version <= 4)
+	{
+		/* require valid flags */
+		if (header->flags & CHDFLAGS_UNDEFINED)
+			return CHDERR_INVALID_PARAMETER;
+
+		/* require a supported compression mechanism */
+		for (intfnum = 0; intfnum < ARRAY_LENGTH(codec_interfaces); intfnum++)
+			if (codec_interfaces[intfnum].compression == header->compression[0])
+				break;
+
+		if (intfnum == ARRAY_LENGTH(codec_interfaces))
+			return CHDERR_INVALID_PARAMETER;
+
+		/* require a valid hunksize */
+		if (header->hunkbytes == 0 || header->hunkbytes >= 65536 * 256)
+			return CHDERR_INVALID_PARAMETER;
+
+		/* require a valid hunk count */
+		if (header->totalhunks == 0)
+			return CHDERR_INVALID_PARAMETER;
+
+		/* require a valid MD5 and/or SHA1 if we're using a parent */
+		if ((header->flags & CHDFLAGS_HAS_PARENT) && memcmp(header->parentmd5, nullmd5, sizeof(nullmd5)) == 0 && memcmp(header->parentsha1, nullsha1, sizeof(nullsha1)) == 0)
+			return CHDERR_INVALID_PARAMETER;
+
+		/* if we're V3 or later, the obsolete fields must be 0 */
+		if (header->version >= 3 &&
+			(header->obsolete_cylinders != 0 || header->obsolete_sectors != 0 ||
+			 header->obsolete_heads != 0 || header->obsolete_hunksize != 0))
+			return CHDERR_INVALID_PARAMETER;
+
+		/* if we're pre-V3, the obsolete fields must NOT be 0 */
+		if (header->version < 3 &&
+			(header->obsolete_cylinders == 0 || header->obsolete_sectors == 0 ||
+			 header->obsolete_heads == 0 || header->obsolete_hunksize == 0))
+			return CHDERR_INVALID_PARAMETER;
+	}
+
+	return CHDERR_NONE;
+}
+
+/*-------------------------------------------------
+    header_guess_unitbytes - for older CHD formats,
+    guess at the bytes/unit based on metadata
+-------------------------------------------------*/
+
+static UINT32 header_guess_unitbytes(chd_file *chd)
+{
+	/* look for hard disk metadata; if found, then the unit size == sector size */
+	char metadata[512];
+	int i0, i1, i2, i3;
+	if (chd_get_metadata(chd, HARD_DISK_METADATA_TAG, 0, metadata, sizeof(metadata), NULL, NULL, NULL) == CHDERR_NONE &&
+		sscanf(metadata, HARD_DISK_METADATA_FORMAT, &i0, &i1, &i2, &i3) == 4)
+		return i3;
+
+	/* look for CD-ROM metadata; if found, then the unit size == CD frame size */
+	if (chd_get_metadata(chd, CDROM_OLD_METADATA_TAG, 0, metadata, sizeof(metadata), NULL, NULL, NULL) == CHDERR_NONE ||
+		chd_get_metadata(chd, CDROM_TRACK_METADATA_TAG, 0, metadata, sizeof(metadata), NULL, NULL, NULL) == CHDERR_NONE ||
+		chd_get_metadata(chd, CDROM_TRACK_METADATA2_TAG, 0, metadata, sizeof(metadata), NULL, NULL, NULL) == CHDERR_NONE ||
+		chd_get_metadata(chd, GDROM_OLD_METADATA_TAG, 0, metadata, sizeof(metadata), NULL, NULL, NULL) == CHDERR_NONE ||
+		chd_get_metadata(chd, GDROM_TRACK_METADATA_TAG, 0, metadata, sizeof(metadata), NULL, NULL, NULL) == CHDERR_NONE)
+		return CD_FRAME_SIZE;
+
+	/* otherwise, just map 1:1 with the hunk size */
+	return chd->header.hunkbytes;
+}
+
+/*-------------------------------------------------
+    header_read - read a CHD header into the
+    internal data structure
+-------------------------------------------------*/
+
+static chd_error header_read(chd_file *chd, chd_header *header)
+{
+	UINT8 rawheader[CHD_MAX_HEADER_SIZE];
+	UINT32 count;
+
+	/* punt if NULL */
+	if (header == NULL)
+		return CHDERR_INVALID_PARAMETER;
+
+	/* punt if invalid file */
+	if (chd->file == NULL)
+		return CHDERR_INVALID_FILE;
+
+	/* seek and read */
+	core_fseek(chd->file, 0, SEEK_SET);
+	count = core_fread(chd->file, rawheader, sizeof(rawheader));
+	if (count != sizeof(rawheader))
+		return CHDERR_READ_ERROR;
+
+	/* verify the tag */
+	if (strncmp((char *)rawheader, "MComprHD", 8) != 0)
+		return CHDERR_INVALID_DATA;
+
+	/* extract the direct data */
+	memset(header, 0, sizeof(*header));
+	header->length        = get_bigendian_uint32(&rawheader[8]);
+	header->version       = get_bigendian_uint32(&rawheader[12]);
+
+	/* make sure it's a version we understand */
+	if (header->version == 0 || header->version > CHD_HEADER_VERSION)
+		return CHDERR_UNSUPPORTED_VERSION;
+
+	/* make sure the length is expected */
+	if ((header->version == 1 && header->length != CHD_V1_HEADER_SIZE) ||
+		(header->version == 2 && header->length != CHD_V2_HEADER_SIZE) ||
+		(header->version == 3 && header->length != CHD_V3_HEADER_SIZE) ||
+		(header->version == 4 && header->length != CHD_V4_HEADER_SIZE) ||
+		(header->version == 5 && header->length != CHD_V5_HEADER_SIZE))
+
+		return CHDERR_INVALID_DATA;
+
+	/* extract the common data */
+	header->flags         	= get_bigendian_uint32(&rawheader[16]);
+	header->compression[0]	= get_bigendian_uint32(&rawheader[20]);
+	header->compression[1]	= CHD_CODEC_NONE;
+	header->compression[2]	= CHD_CODEC_NONE;
+	header->compression[3]	= CHD_CODEC_NONE;
+
+	/* extract the V1/V2-specific data */
+	if (header->version < 3)
+	{
+		int seclen = (header->version == 1) ? CHD_V1_SECTOR_SIZE : get_bigendian_uint32(&rawheader[76]);
+		header->obsolete_hunksize  = get_bigendian_uint32(&rawheader[24]);
+		header->totalhunks         = get_bigendian_uint32(&rawheader[28]);
+		header->obsolete_cylinders = get_bigendian_uint32(&rawheader[32]);
+		header->obsolete_heads     = get_bigendian_uint32(&rawheader[36]);
+		header->obsolete_sectors   = get_bigendian_uint32(&rawheader[40]);
+		memcpy(header->md5, &rawheader[44], CHD_MD5_BYTES);
+		memcpy(header->parentmd5, &rawheader[60], CHD_MD5_BYTES);
+		header->logicalbytes = (UINT64)header->obsolete_cylinders * (UINT64)header->obsolete_heads * (UINT64)header->obsolete_sectors * (UINT64)seclen;
+		header->hunkbytes = seclen * header->obsolete_hunksize;
+		header->unitbytes          = header_guess_unitbytes(chd);
+		header->unitcount          = (header->logicalbytes + header->unitbytes - 1) / header->unitbytes;
+		header->metaoffset = 0;
+	}
+
+	/* extract the V3-specific data */
+	else if (header->version == 3)
+	{
+		header->totalhunks   = get_bigendian_uint32(&rawheader[24]);
+		header->logicalbytes = get_bigendian_uint64(&rawheader[28]);
+		header->metaoffset   = get_bigendian_uint64(&rawheader[36]);
+		memcpy(header->md5, &rawheader[44], CHD_MD5_BYTES);
+		memcpy(header->parentmd5, &rawheader[60], CHD_MD5_BYTES);
+		header->hunkbytes    = get_bigendian_uint32(&rawheader[76]);
+		header->unitbytes    = header_guess_unitbytes(chd);
+		header->unitcount    = (header->logicalbytes + header->unitbytes - 1) / header->unitbytes;
+		memcpy(header->sha1, &rawheader[80], CHD_SHA1_BYTES);
+		memcpy(header->parentsha1, &rawheader[100], CHD_SHA1_BYTES);
+	}
+
+	/* extract the V4-specific data */
+	else if (header->version == 4)
+	{
+		header->totalhunks   = get_bigendian_uint32(&rawheader[24]);
+		header->logicalbytes = get_bigendian_uint64(&rawheader[28]);
+		header->metaoffset   = get_bigendian_uint64(&rawheader[36]);
+		header->hunkbytes    = get_bigendian_uint32(&rawheader[44]);
+		header->unitbytes    = header_guess_unitbytes(chd);
+		header->unitcount    = (header->logicalbytes + header->unitbytes - 1) / header->unitbytes;
+		memcpy(header->sha1, &rawheader[48], CHD_SHA1_BYTES);
+		memcpy(header->parentsha1, &rawheader[68], CHD_SHA1_BYTES);
+		memcpy(header->rawsha1, &rawheader[88], CHD_SHA1_BYTES);
+	}
+
+	/* extract the V5-specific data */
+	else if (header->version == 5)
+	{
+		/* TODO */
+		header->compression[0]  = get_bigendian_uint32(&rawheader[16]);
+		header->compression[1]  = get_bigendian_uint32(&rawheader[20]);
+		header->compression[2]  = get_bigendian_uint32(&rawheader[24]);
+		header->compression[3]  = get_bigendian_uint32(&rawheader[28]);
+		header->logicalbytes    = get_bigendian_uint64(&rawheader[32]);
+		header->mapoffset       = get_bigendian_uint64(&rawheader[40]);
+		header->metaoffset      = get_bigendian_uint64(&rawheader[48]);
+		header->hunkbytes       = get_bigendian_uint32(&rawheader[56]);
+		header->hunkcount       = (header->logicalbytes + header->hunkbytes - 1) / header->hunkbytes;
+		header->unitbytes       = get_bigendian_uint32(&rawheader[60]);
+		header->unitcount       = (header->logicalbytes + header->unitbytes - 1) / header->unitbytes;
+		memcpy(header->sha1, &rawheader[84], CHD_SHA1_BYTES);
+		memcpy(header->parentsha1, &rawheader[104], CHD_SHA1_BYTES);
+		memcpy(header->rawsha1, &rawheader[64], CHD_SHA1_BYTES);
+
+		/* determine properties of map entries */
+		header->mapentrybytes = chd_compressed(header) ? 12 : 4;
+
+		/* hack */
+		header->totalhunks 		= header->hunkcount;
+	}
+
+	/* Unknown version */
+	else
+	{
+		/* TODO */
+	}
+
+	/* guess it worked */
+	return CHDERR_NONE;
+}
+
+/***************************************************************************
+    INTERNAL HUNK READ/WRITE
+***************************************************************************/
+
+/*-------------------------------------------------
+    hunk_read_compressed - read a compressed
+    hunk
+-------------------------------------------------*/
+
+static UINT8* hunk_read_compressed(chd_file *chd, UINT64 offset, size_t size)
+{
+#ifdef _MSC_VER
+	size_t bytes;
+#else
+	ssize_t bytes;
+#endif
+	if (chd->file_cache != NULL)
+	{
+		return chd->file_cache + offset;
+	}
+	else
+	{
+		core_fseek(chd->file, offset, SEEK_SET);
+		bytes = core_fread(chd->file, chd->compressed, size);
+		if (bytes != size)
+			return NULL;
+		return chd->compressed;
+	}
+}
+
+/*-------------------------------------------------
+    hunk_read_uncompressed - read an uncompressed
+    hunk
+-------------------------------------------------*/
+
+static chd_error hunk_read_uncompressed(chd_file *chd, UINT64 offset, size_t size, UINT8 *dest)
+{
+#ifdef _MSC_VER
+	size_t bytes;
+#else
+	ssize_t bytes;
+#endif
+	if (chd->file_cache != NULL)
+	{
+		memcpy(dest, chd->file_cache + offset, size);
+	}
+	else
+	{
+		core_fseek(chd->file, offset, SEEK_SET);
+		bytes = core_fread(chd->file, dest, size);
+		if (bytes != size)
+			return CHDERR_READ_ERROR;
+	}
+	return CHDERR_NONE;
+}
+
+#ifdef NEED_CACHE_HUNK
+/*-------------------------------------------------
+    hunk_read_into_cache - read a hunk into
+    the CHD's hunk cache
+-------------------------------------------------*/
+
+static chd_error hunk_read_into_cache(chd_file *chd, UINT32 hunknum)
+{
+	chd_error err;
+
+	/* track the max */
+	if (hunknum > chd->maxhunk)
+		chd->maxhunk = hunknum;
+
+	/* if we're already in the cache, we're done */
+	if (chd->cachehunk == hunknum)
+		return CHDERR_NONE;
+	chd->cachehunk = ~0;
+
+	/* otherwise, read the data */
+	err = hunk_read_into_memory(chd, hunknum, chd->cache);
+	if (err != CHDERR_NONE)
+		return err;
+
+	/* mark the hunk successfully cached in */
+	chd->cachehunk = hunknum;
+	return CHDERR_NONE;
+}
+#endif
+
+/*-------------------------------------------------
+    hunk_read_into_memory - read a hunk into
+    memory at the given location
+-------------------------------------------------*/
+
+static chd_error hunk_read_into_memory(chd_file *chd, UINT32 hunknum, UINT8 *dest)
+{
+	chd_error err;
+
+	/* punt if no file */
+	if (chd->file == NULL)
+		return CHDERR_INVALID_FILE;
+
+	/* return an error if out of range */
+	if (hunknum >= chd->header.totalhunks)
+		return CHDERR_HUNK_OUT_OF_RANGE;
+
+	if (dest == NULL)
+		return CHDERR_INVALID_PARAMETER;
+
+	if (chd->header.version < 5)
+	{
+		map_entry *entry = &chd->map[hunknum];
+		UINT32 bytes;
+		UINT8* compressed_bytes;
+
+		/* switch off the entry type */
+		switch (entry->flags & MAP_ENTRY_FLAG_TYPE_MASK)
+		{
+			/* compressed data */
+			case V34_MAP_ENTRY_TYPE_COMPRESSED:
+            {
+               void *codec = NULL;
+
+				/* read it into the decompression buffer */
+				compressed_bytes = hunk_read_compressed(chd, entry->offset, entry->length);
+				if (compressed_bytes == NULL)
+					return CHDERR_READ_ERROR;
+
+				/* now decompress using the codec */
+				err = CHDERR_NONE;
+				codec = &chd->zlib_codec_data;
+				if (chd->codecintf[0]->decompress != NULL)
+					err = (*chd->codecintf[0]->decompress)(codec, compressed_bytes, entry->length, dest, chd->header.hunkbytes);
+				if (err != CHDERR_NONE)
+					return err;
+				break;
+			}
+
+			/* uncompressed data */
+			case V34_MAP_ENTRY_TYPE_UNCOMPRESSED:
+				err = hunk_read_uncompressed(chd, entry->offset, chd->header.hunkbytes, dest);
+				if (err != CHDERR_NONE)
+					return err;
+				break;
+
+			/* mini-compressed data */
+			case V34_MAP_ENTRY_TYPE_MINI:
+				put_bigendian_uint64(&dest[0], entry->offset);
+				for (bytes = 8; bytes < chd->header.hunkbytes; bytes++)
+					dest[bytes] = dest[bytes - 8];
+				break;
+
+			/* self-referenced data */
+			case V34_MAP_ENTRY_TYPE_SELF_HUNK:
+#ifdef NEED_CACHE_HUNK
+				if (chd->cachehunk == entry->offset && dest == chd->cache)
+					break;
+#endif
+				return hunk_read_into_memory(chd, entry->offset, dest);
+
+			/* parent-referenced data */
+			case V34_MAP_ENTRY_TYPE_PARENT_HUNK:
+				err = hunk_read_into_memory(chd->parent, entry->offset, dest);
+				if (err != CHDERR_NONE)
+					return err;
+				break;
+		}
+		return CHDERR_NONE;
+	}
+	else
+	{
+		void* codec = NULL;
+		/* get a pointer to the map entry */
+		uint64_t blockoffs;
+		uint32_t blocklen;
+#ifdef VERIFY_BLOCK_CRC
+		uint16_t blockcrc;
+#endif
+		uint8_t *rawmap = &chd->header.rawmap[chd->header.mapentrybytes * hunknum];
+		UINT8* compressed_bytes;
+
+		/* uncompressed case */
+		if (!chd_compressed(&chd->header))
+		{
+			blockoffs = (uint64_t)get_bigendian_uint32(rawmap) * (uint64_t)chd->header.hunkbytes;
+			if (blockoffs != 0) {
+				core_fseek(chd->file, blockoffs, SEEK_SET);
+				int result = core_fread(chd->file, dest, chd->header.hunkbytes);
+			/* TODO
+			else if (m_parent_missing)
+				throw CHDERR_REQUIRES_PARENT; */
+			} else if (chd->parent) {
+				err = hunk_read_into_memory(chd->parent, hunknum, dest);
+				if (err != CHDERR_NONE)
+					return err;
+			} else {
+				memset(dest, 0, chd->header.hunkbytes);
+			}
+
+			return CHDERR_NONE;
+		}
+
+		/* compressed case */
+		blocklen = get_bigendian_uint24(&rawmap[1]);
+		blockoffs = get_bigendian_uint48(&rawmap[4]);
+#ifdef VERIFY_BLOCK_CRC
+		blockcrc = get_bigendian_uint16(&rawmap[10]);
+#endif
+		codec = NULL;
+		switch (rawmap[0])
+		{
+			case COMPRESSION_TYPE_0:
+			case COMPRESSION_TYPE_1:
+			case COMPRESSION_TYPE_2:
+			case COMPRESSION_TYPE_3:
+				compressed_bytes = hunk_read_compressed(chd, blockoffs, blocklen);
+				if (compressed_bytes == NULL)
+					return CHDERR_READ_ERROR;
+				switch (chd->codecintf[rawmap[0]]->compression)
+				{
+					case CHD_CODEC_CD_LZMA:
+						codec = &chd->cdlz_codec_data;
+						break;
+
+					case CHD_CODEC_ZLIB:
+						codec = &chd->zlib_codec_data;
+						break;
+
+					case CHD_CODEC_CD_ZLIB:
+						codec = &chd->cdzl_codec_data;
+						break;
+
+					case CHD_CODEC_CD_FLAC:
+						codec = &chd->cdfl_codec_data;
+						break;
+				}
+				if (codec==NULL)
+					return CHDERR_CODEC_ERROR;
+				err = chd->codecintf[rawmap[0]]->decompress(codec, compressed_bytes, blocklen, dest, chd->header.hunkbytes);
+				if (err != CHDERR_NONE)
+					return err;
+#ifdef VERIFY_BLOCK_CRC
+				if (crc16(dest, chd->header.hunkbytes) != blockcrc)
+					return CHDERR_DECOMPRESSION_ERROR;
+#endif
+				return CHDERR_NONE;
+
+			case COMPRESSION_NONE:
+				err = hunk_read_uncompressed(chd, blockoffs, blocklen, dest);
+				if (err != CHDERR_NONE)
+					return err;
+#ifdef VERIFY_BLOCK_CRC
+				if (crc16(dest, chd->header.hunkbytes) != blockcrc)
+					return CHDERR_DECOMPRESSION_ERROR;
+#endif
+				return CHDERR_NONE;
+
+			case COMPRESSION_SELF:
+				return hunk_read_into_memory(chd, blockoffs, dest);
+
+			case COMPRESSION_PARENT:
+				if (chd->parent == NULL)
+					return CHDERR_REQUIRES_PARENT;
+				UINT8 units_in_hunk = chd->header.hunkbytes / chd->header.unitbytes;
+
+				/* blockoffs is aligned to units_in_hunk */
+				if (blockoffs % units_in_hunk == 0) {
+					return hunk_read_into_memory(chd->parent, blockoffs / units_in_hunk, dest);
+				/* blockoffs is not aligned to units_in_hunk */
+				} else {
+					UINT32 unit_in_hunk = blockoffs % units_in_hunk;
+					UINT8 *buf = malloc(chd->header.hunkbytes);
+					/* Read first half of hunk which contains blockoffs */
+					err = hunk_read_into_memory(chd->parent, blockoffs / units_in_hunk, buf);
+					if (err != CHDERR_NONE) {
+						free(buf);
+						return err;
+					}
+					memcpy(dest, buf + unit_in_hunk * chd->header.unitbytes, (units_in_hunk - unit_in_hunk) * chd->header.unitbytes);
+					/* Read second half of hunk which contains blockoffs */
+					err = hunk_read_into_memory(chd->parent, (blockoffs / units_in_hunk) + 1, buf);
+					if (err != CHDERR_NONE) {
+						free(buf);
+						return err;
+					}
+					memcpy(dest + (units_in_hunk - unit_in_hunk) * chd->header.unitbytes, buf, unit_in_hunk * chd->header.unitbytes);
+					free(buf);
+				}
+		}
+		return CHDERR_NONE;
+	}
+
+	/* We should not reach this code */
+	return CHDERR_DECOMPRESSION_ERROR;
+}
+
+/***************************************************************************
+    INTERNAL MAP ACCESS
+***************************************************************************/
+
+/*-------------------------------------------------
+    map_read - read the initial sector map
+-------------------------------------------------*/
+
+static chd_error map_read(chd_file *chd)
+{
+	UINT32 entrysize = (chd->header.version < 3) ? OLD_MAP_ENTRY_SIZE : MAP_ENTRY_SIZE;
+	UINT8 raw_map_entries[MAP_STACK_ENTRIES * MAP_ENTRY_SIZE];
+	UINT64 fileoffset, maxoffset = 0;
+	UINT8 cookie[MAP_ENTRY_SIZE];
+	UINT32 count;
+	chd_error err;
+	int i;
+
+	/* first allocate memory */
+	chd->map = (map_entry *)malloc(sizeof(chd->map[0]) * chd->header.totalhunks);
+	if (!chd->map)
+		return CHDERR_OUT_OF_MEMORY;
+
+	/* read the map entries in in chunks and extract to the map list */
+	fileoffset = chd->header.length;
+	for (i = 0; i < chd->header.totalhunks; i += MAP_STACK_ENTRIES)
+	{
+		/* compute how many entries this time */
+		int entries = chd->header.totalhunks - i, j;
+		if (entries > MAP_STACK_ENTRIES)
+			entries = MAP_STACK_ENTRIES;
+
+		/* read that many */
+		core_fseek(chd->file, fileoffset, SEEK_SET);
+		count = core_fread(chd->file, raw_map_entries, entries * entrysize);
+		if (count != entries * entrysize)
+		{
+			err = CHDERR_READ_ERROR;
+			goto cleanup;
+		}
+		fileoffset += entries * entrysize;
+
+		/* process that many */
+		if (entrysize == MAP_ENTRY_SIZE)
+		{
+			for (j = 0; j < entries; j++)
+				map_extract(&raw_map_entries[j * MAP_ENTRY_SIZE], &chd->map[i + j]);
+		}
+		else
+		{
+			for (j = 0; j < entries; j++)
+				map_extract_old(&raw_map_entries[j * OLD_MAP_ENTRY_SIZE], &chd->map[i + j], chd->header.hunkbytes);
+		}
+
+		/* track the maximum offset */
+		for (j = 0; j < entries; j++)
+			if ((chd->map[i + j].flags & MAP_ENTRY_FLAG_TYPE_MASK) == V34_MAP_ENTRY_TYPE_COMPRESSED ||
+				(chd->map[i + j].flags & MAP_ENTRY_FLAG_TYPE_MASK) == V34_MAP_ENTRY_TYPE_UNCOMPRESSED)
+				maxoffset = MAX(maxoffset, chd->map[i + j].offset + chd->map[i + j].length);
+	}
+
+	/* verify the cookie */
+	core_fseek(chd->file, fileoffset, SEEK_SET);
+	count = core_fread(chd->file, &cookie, entrysize);
+	if (count != entrysize || memcmp(&cookie, END_OF_LIST_COOKIE, entrysize))
+	{
+		err = CHDERR_INVALID_FILE;
+		goto cleanup;
+	}
+
+	/* verify the length */
+	if (maxoffset > core_fsize(chd->file))
+	{
+		err = CHDERR_INVALID_FILE;
+		goto cleanup;
+	}
+	return CHDERR_NONE;
+
+cleanup:
+	if (chd->map)
+		free(chd->map);
+	chd->map = NULL;
+	return err;
+}
+
+/***************************************************************************
+    INTERNAL METADATA ACCESS
+***************************************************************************/
+
+/*-------------------------------------------------
+    metadata_find_entry - find a metadata entry
+-------------------------------------------------*/
+
+static chd_error metadata_find_entry(chd_file *chd, UINT32 metatag, UINT32 metaindex, metadata_entry *metaentry)
+{
+	/* start at the beginning */
+	metaentry->offset = chd->header.metaoffset;
+	metaentry->prev = 0;
+
+	/* loop until we run out of options */
+	while (metaentry->offset != 0)
+	{
+		UINT8	raw_meta_header[METADATA_HEADER_SIZE];
+		UINT32	count;
+
+		/* read the raw header */
+		core_fseek(chd->file, metaentry->offset, SEEK_SET);
+		count = core_fread(chd->file, raw_meta_header, sizeof(raw_meta_header));
+		if (count != sizeof(raw_meta_header))
+			break;
+
+		/* extract the data */
+		metaentry->metatag = get_bigendian_uint32(&raw_meta_header[0]);
+		metaentry->length = get_bigendian_uint32(&raw_meta_header[4]);
+		metaentry->next = get_bigendian_uint64(&raw_meta_header[8]);
+
+		/* flags are encoded in the high byte of length */
+		metaentry->flags = metaentry->length >> 24;
+		metaentry->length &= 0x00ffffff;
+
+		/* if we got a match, proceed */
+		if (metatag == CHDMETATAG_WILDCARD || metaentry->metatag == metatag)
+			if (metaindex-- == 0)
+				return CHDERR_NONE;
+
+		/* no match, fetch the next link */
+		metaentry->prev = metaentry->offset;
+		metaentry->offset = metaentry->next;
+	}
+
+	/* if we get here, we didn't find it */
+	return CHDERR_METADATA_NOT_FOUND;
+}
+
+/***************************************************************************
+    ZLIB COMPRESSION CODEC
+***************************************************************************/
+
+/*-------------------------------------------------
+    zlib_codec_init - initialize the ZLIB codec
+-------------------------------------------------*/
+
+static chd_error zlib_codec_init(void *codec, uint32_t hunkbytes)
+{
+	int zerr;
+	chd_error err;
+	zlib_codec_data *data = (zlib_codec_data*)codec;
+
+	/* clear the buffers */
+	memset(data, 0, sizeof(zlib_codec_data));
+
+	/* init the inflater first */
+	data->inflater.next_in = (Bytef *)data;	/* bogus, but that's ok */
+	data->inflater.avail_in = 0;
+	data->inflater.zalloc = zlib_fast_alloc;
+	data->inflater.zfree = zlib_fast_free;
+	data->inflater.opaque = &data->allocator;
+	zerr = inflateInit2(&data->inflater, -MAX_WBITS);
+
+	/* convert errors */
+	if (zerr == Z_MEM_ERROR)
+		err = CHDERR_OUT_OF_MEMORY;
+	else if (zerr != Z_OK)
+		err = CHDERR_CODEC_ERROR;
+	else
+		err = CHDERR_NONE;
+
+	/* handle an error */
+	if (err != CHDERR_NONE)
+		free(data);
+
+	return err;
+}
+
+/*-------------------------------------------------
+    zlib_codec_free - free data for the ZLIB
+    codec
+-------------------------------------------------*/
+
+static void zlib_codec_free(void *codec)
+{
+	zlib_codec_data *data = (zlib_codec_data *)codec;
+
+	/* deinit the streams */
+	if (data != NULL)
+	{
+		int i;
+
+		inflateEnd(&data->inflater);
+
+		/* free our fast memory */
+		zlib_allocator_free(&data->allocator);
+	}
+}
+
+/*-------------------------------------------------
+    zlib_codec_decompress - decompress data using
+    the ZLIB codec
+-------------------------------------------------*/
+
+static chd_error zlib_codec_decompress(void *codec, const uint8_t *src, uint32_t complen, uint8_t *dest, uint32_t destlen)
+{
+	zlib_codec_data *data = (zlib_codec_data *)codec;
+	int zerr;
+
+	/* reset the decompressor */
+	data->inflater.next_in = (Bytef *)src;
+	data->inflater.avail_in = complen;
+	data->inflater.total_in = 0;
+	data->inflater.next_out = (Bytef *)dest;
+	data->inflater.avail_out = destlen;
+	data->inflater.total_out = 0;
+	zerr = inflateReset(&data->inflater);
+	if (zerr != Z_OK)
+		return CHDERR_DECOMPRESSION_ERROR;
+
+	/* do it */
+	zerr = inflate(&data->inflater, Z_FINISH);
+	if (data->inflater.total_out != destlen)
+		return CHDERR_DECOMPRESSION_ERROR;
+
+	return CHDERR_NONE;
+}
+
+/*-------------------------------------------------
+    zlib_fast_alloc - fast malloc for ZLIB, which
+    allocates and frees memory frequently
+-------------------------------------------------*/
+
+/* Huge alignment values for possible SIMD optimization by compiler (NEON, SSE, AVX) */
+#define ZLIB_MIN_ALIGNMENT_BITS 512
+#define ZLIB_MIN_ALIGNMENT_BYTES (ZLIB_MIN_ALIGNMENT_BITS / 8)
+
+static voidpf zlib_fast_alloc(voidpf opaque, uInt items, uInt size)
+{
+	zlib_allocator *alloc = (zlib_allocator *)opaque;
+	uintptr_t paddr = 0;
+	UINT32 *ptr;
+	int i;
+
+	/* compute the size, rounding to the nearest 1k */
+	size = (size * items + 0x3ff) & ~0x3ff;
+
+	/* reuse a hunk if we can */
+	for (i = 0; i < MAX_ZLIB_ALLOCS; i++)
+	{
+		ptr = alloc->allocptr[i];
+		if (ptr && size == *ptr)
+		{
+			/* set the low bit of the size so we don't match next time */
+			*ptr |= 1;
+
+			/* return aligned block address */
+			return (voidpf)(alloc->allocptr2[i]);
+		}
+	}
+
+	/* alloc a new one */
+    ptr = (UINT32 *)malloc(size + sizeof(UINT32) + ZLIB_MIN_ALIGNMENT_BYTES);
+	if (!ptr)
+		return NULL;
+
+	/* put it into the list */
+	for (i = 0; i < MAX_ZLIB_ALLOCS; i++)
+		if (!alloc->allocptr[i])
+		{
+			alloc->allocptr[i] = ptr;
+			paddr = (((uintptr_t)ptr) + sizeof(UINT32) + (ZLIB_MIN_ALIGNMENT_BYTES-1)) & (~(ZLIB_MIN_ALIGNMENT_BYTES-1));
+			alloc->allocptr2[i] = (uint32_t*)paddr;
+			break;
+		}
+
+	/* set the low bit of the size so we don't match next time */
+	*ptr = size | 1;
+
+	/* return aligned block address */
+	return (voidpf)paddr;
+}
+
+/*-------------------------------------------------
+    zlib_fast_free - fast free for ZLIB, which
+    allocates and frees memory frequently
+-------------------------------------------------*/
+
+static void zlib_fast_free(voidpf opaque, voidpf address)
+{
+	zlib_allocator *alloc = (zlib_allocator *)opaque;
+	UINT32 *ptr = (UINT32 *)address;
+	int i;
+
+	/* find the hunk */
+	for (i = 0; i < MAX_ZLIB_ALLOCS; i++)
+		if (ptr == alloc->allocptr2[i])
+		{
+			/* clear the low bit of the size to allow matches */
+			*(alloc->allocptr[i]) &= ~1;
+			return;
+		}
+}
+
+/*-------------------------------------------------
+    zlib_allocator_free
+-------------------------------------------------*/
+static void zlib_allocator_free(voidpf opaque)
+{
+	zlib_allocator *alloc = (zlib_allocator *)opaque;
+	int i;
+
+	for (i = 0; i < MAX_ZLIB_ALLOCS; i++)
+		if (alloc->allocptr[i])
+			free(alloc->allocptr[i]);
+}
diff --git a/deps/libchdr/src/libchdr_flac.c b/deps/libchdr/src/libchdr_flac.c
new file mode 100644
index 00000000..54d374f7
--- /dev/null
+++ b/deps/libchdr/src/libchdr_flac.c
@@ -0,0 +1,302 @@
+/* license:BSD-3-Clause
+ * copyright-holders:Aaron Giles
+***************************************************************************
+
+    flac.c
+
+    FLAC compression wrappers
+
+***************************************************************************/
+
+#include <assert.h>
+#include <string.h>
+
+#include <libchdr/flac.h>
+#define DR_FLAC_IMPLEMENTATION
+#include <dr_libs/dr_flac.h>
+
+/***************************************************************************
+ *  FLAC DECODER
+ ***************************************************************************
+ */
+
+static size_t flac_decoder_read_callback(void *userdata, void *buffer, size_t bytes);
+static drflac_bool32 flac_decoder_seek_callback(void *userdata, int offset, drflac_seek_origin origin);
+static void flac_decoder_metadata_callback(void *userdata, drflac_metadata *metadata);
+static void flac_decoder_write_callback(void *userdata, void *buffer, size_t len);
+
+
+/* getters (valid after reset) */
+static uint32_t sample_rate(flac_decoder *decoder)  { return decoder->sample_rate; }
+static uint8_t channels(flac_decoder *decoder)  { return decoder->channels; }
+static uint8_t bits_per_sample(flac_decoder *decoder) { return decoder->bits_per_sample; }
+
+/*-------------------------------------------------
+ *  flac_decoder - constructor
+ *-------------------------------------------------
+ */
+
+int flac_decoder_init(flac_decoder *decoder)
+{
+	decoder->decoder = NULL;
+	decoder->sample_rate = 0;
+	decoder->channels = 0;
+	decoder->bits_per_sample = 0;
+	decoder->compressed_offset = 0;
+	decoder->compressed_start = NULL;
+	decoder->compressed_length = 0;
+	decoder->compressed2_start = NULL;
+	decoder->compressed2_length = 0;
+	decoder->uncompressed_offset = 0;
+	decoder->uncompressed_length = 0;
+	decoder->uncompressed_swap = 0;
+	return 0;
+}
+
+/*-------------------------------------------------
+ *  flac_decoder - destructor
+ *-------------------------------------------------
+ */
+
+void flac_decoder_free(flac_decoder* decoder)
+{
+	if ((decoder != NULL) && (decoder->decoder != NULL))
+		drflac_close(decoder->decoder);
+	decoder->decoder = NULL;
+}
+
+/*-------------------------------------------------
+ *  reset - reset state with the original
+ *  parameters
+ *-------------------------------------------------
+ */
+
+static int flac_decoder_internal_reset(flac_decoder* decoder)
+{
+	decoder->compressed_offset = 0;
+	flac_decoder_free(decoder);
+	decoder->decoder = drflac_open_with_metadata(
+		flac_decoder_read_callback, flac_decoder_seek_callback,
+		flac_decoder_metadata_callback, decoder, NULL);
+	return (decoder->decoder != NULL);
+}
+
+/*-------------------------------------------------
+ *  reset - reset state with new memory parameters
+ *  and a custom-generated header
+ *-------------------------------------------------
+ */
+
+int flac_decoder_reset(flac_decoder* decoder, uint32_t sample_rate, uint8_t num_channels, uint32_t block_size, const void *buffer, uint32_t length)
+{
+	/* modify the template header with our parameters */
+	static const uint8_t s_header_template[0x2a] =
+	{
+		0x66, 0x4C, 0x61, 0x43,                         /* +00: 'fLaC' stream header */
+		0x80,                                           /* +04: metadata block type 0 (STREAMINFO), */
+								/*      flagged as last block */
+		0x00, 0x00, 0x22,                               /* +05: metadata block length = 0x22 */
+		0x00, 0x00,                                     /* +08: minimum block size */
+		0x00, 0x00,                                     /* +0A: maximum block size */
+		0x00, 0x00, 0x00,                               /* +0C: minimum frame size (0 == unknown) */
+		0x00, 0x00, 0x00,                               /* +0F: maximum frame size (0 == unknown) */
+		0x0A, 0xC4, 0x42, 0xF0, 0x00, 0x00, 0x00, 0x00, /* +12: sample rate (0x0ac44 == 44100), */
+								/*      numchannels (2), sample bits (16), */
+								/*      samples in stream (0 == unknown) */
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* +1A: MD5 signature (0 == none) */
+		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  /* +2A: start of stream data */
+	};
+	memcpy(decoder->custom_header, s_header_template, sizeof(s_header_template));
+	decoder->custom_header[0x08] = decoder->custom_header[0x0a] = (block_size*num_channels) >> 8;
+	decoder->custom_header[0x09] = decoder->custom_header[0x0b] = (block_size*num_channels) & 0xff;
+	decoder->custom_header[0x12] = sample_rate >> 12;
+	decoder->custom_header[0x13] = sample_rate >> 4;
+	decoder->custom_header[0x14] = (sample_rate << 4) | ((num_channels - 1) << 1);
+
+	/* configure the header ahead of the provided buffer */
+	decoder->compressed_start = (const uint8_t *)(decoder->custom_header);
+	decoder->compressed_length = sizeof(decoder->custom_header);
+	decoder->compressed2_start = (const uint8_t *)(buffer);
+	decoder->compressed2_length = length;
+	return flac_decoder_internal_reset(decoder);
+}
+
+/*-------------------------------------------------
+ *  decode_interleaved - decode to an interleaved
+ *  sound stream
+ *-------------------------------------------------
+ */
+
+int flac_decoder_decode_interleaved(flac_decoder* decoder, int16_t *samples, uint32_t num_samples, int swap_endian)
+{
+	/* configure the uncompressed buffer */
+	memset(decoder->uncompressed_start, 0, sizeof(decoder->uncompressed_start));
+	decoder->uncompressed_start[0] = samples;
+	decoder->uncompressed_offset = 0;
+	decoder->uncompressed_length = num_samples;
+	decoder->uncompressed_swap = swap_endian;
+
+#define	BUFFER	2352	/* bytes per CD audio sector */
+	int16_t buffer[BUFFER];
+	uint32_t buf_samples = BUFFER / channels(decoder);
+	/* loop until we get everything we want */
+	while (decoder->uncompressed_offset < decoder->uncompressed_length) {
+		uint32_t frames = (num_samples < buf_samples ? num_samples : buf_samples);
+		if (!drflac_read_pcm_frames_s16(decoder->decoder, frames, buffer))
+			return 0;
+		flac_decoder_write_callback(decoder, buffer, frames*sizeof(*buffer)*channels(decoder));
+		num_samples -= frames;
+	}
+	return 1;
+}
+
+/*-------------------------------------------------
+ *  finish - finish up the decode
+ *-------------------------------------------------
+ */
+
+uint32_t flac_decoder_finish(flac_decoder* decoder)
+{
+	/* get the final decoding position and move forward */
+	drflac *flac = decoder->decoder;
+	uint64_t position = decoder->compressed_offset;
+
+	/* ugh... there's no function to obtain bytes used in drflac :-/ */
+	position -= DRFLAC_CACHE_L2_LINES_REMAINING(&flac->bs) * sizeof(drflac_cache_t);
+	position -= DRFLAC_CACHE_L1_BITS_REMAINING(&flac->bs) / 8;
+	position -= flac->bs.unalignedByteCount;
+
+	/* adjust position if we provided the header */
+	if (position == 0)
+		return 0;
+	if (decoder->compressed_start == (const uint8_t *)(decoder->custom_header))
+		position -= decoder->compressed_length;
+
+	flac_decoder_free(decoder);
+	return position;
+}
+
+/*-------------------------------------------------
+ *  read_callback - handle reads from the input
+ *  stream
+ *-------------------------------------------------
+ */
+
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
+
+static size_t flac_decoder_read_callback(void *userdata, void *buffer, size_t bytes)
+{
+	flac_decoder* decoder = (flac_decoder*)userdata;
+	uint8_t *dst = buffer;
+
+	/* copy from primary buffer first */
+	uint32_t outputpos = 0;
+	if (outputpos < bytes && decoder->compressed_offset < decoder->compressed_length)
+	{
+		uint32_t bytes_to_copy = MIN(bytes - outputpos, decoder->compressed_length - decoder->compressed_offset);
+		memcpy(&dst[outputpos], decoder->compressed_start + decoder->compressed_offset, bytes_to_copy);
+		outputpos += bytes_to_copy;
+		decoder->compressed_offset += bytes_to_copy;
+	}
+
+	/* once we're out of that, copy from the secondary buffer */
+	if (outputpos < bytes && decoder->compressed_offset < decoder->compressed_length + decoder->compressed2_length)
+	{
+		uint32_t bytes_to_copy = MIN(bytes - outputpos, decoder->compressed2_length - (decoder->compressed_offset - decoder->compressed_length));
+		memcpy(&dst[outputpos], decoder->compressed2_start + decoder->compressed_offset - decoder->compressed_length, bytes_to_copy);
+		outputpos += bytes_to_copy;
+		decoder->compressed_offset += bytes_to_copy;
+	}
+
+	return outputpos;
+}
+
+/*-------------------------------------------------
+ *  metadata_callback - handle STREAMINFO metadata
+ *-------------------------------------------------
+ */
+
+static void flac_decoder_metadata_callback(void *userdata, drflac_metadata *metadata)
+{
+	flac_decoder *decoder = userdata;
+
+	/* ignore all but STREAMINFO metadata */
+	if (metadata->type != DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO)
+		return;
+
+	/* parse out the data we care about */
+	decoder->sample_rate = metadata->data.streaminfo.sampleRate;
+	decoder->bits_per_sample = metadata->data.streaminfo.bitsPerSample;
+	decoder->channels = metadata->data.streaminfo.channels;
+}
+
+/*-------------------------------------------------
+ *  write_callback - handle writes to the output
+ *  stream
+ *-------------------------------------------------
+ */
+
+static void flac_decoder_write_callback(void *userdata, void *buffer, size_t bytes)
+{
+	int sampnum, chan;
+	int shift, blocksize;
+	flac_decoder * decoder = (flac_decoder *)userdata;
+	int16_t *sampbuf = (int16_t *)buffer;
+	int sampch = channels(decoder);
+	uint32_t offset = decoder->uncompressed_offset;
+	uint16_t usample;
+
+	/* interleaved case */
+	shift = decoder->uncompressed_swap ? 8 : 0;
+	blocksize = bytes / (sampch * sizeof(sampbuf[0]));
+	if (decoder->uncompressed_start[1] == NULL)
+	{
+		int16_t *dest = decoder->uncompressed_start[0] + offset * sampch;
+		for (sampnum = 0; sampnum < blocksize && offset < decoder->uncompressed_length; sampnum++, offset++)
+			for (chan = 0; chan < sampch; chan++) {
+				usample = (uint16_t)*sampbuf++;
+				*dest++ = (int16_t)((usample << shift) | (usample >> shift));
+			}
+	}
+
+	/* non-interleaved case */
+	else
+	{
+		for (sampnum = 0; sampnum < blocksize && offset < decoder->uncompressed_length; sampnum++, offset++)
+			for (chan = 0; chan < sampch; chan++) {
+				usample = (uint16_t)*sampbuf++;
+				if (decoder->uncompressed_start[chan] != NULL)
+					decoder->uncompressed_start[chan][offset] = (int16_t) ((usample << shift) | (usample >> shift));
+			}
+	}
+	decoder->uncompressed_offset = offset;
+}
+
+
+/*-------------------------------------------------
+ *  seek_callback - handle seeks on the output
+ *  stream
+ *-------------------------------------------------
+ */
+
+static drflac_bool32 flac_decoder_seek_callback(void *userdata, int offset, drflac_seek_origin origin)
+{
+	flac_decoder * decoder = (flac_decoder *)userdata;
+	uint32_t length = decoder->compressed_length + decoder->compressed2_length;
+
+	if (origin == drflac_seek_origin_start) {
+		uint32_t pos = offset;
+		if (pos <= length) {
+			decoder->compressed_offset = pos;
+			return 1;
+		}
+	} else if (origin == drflac_seek_origin_current) {
+		uint32_t pos = decoder->compressed_offset + offset;
+		if (pos <= length) {
+			decoder->compressed_offset = pos;
+			return 1;
+		}
+	}
+	return 0;
+}
+
diff --git a/deps/libchdr/src/libchdr_huffman.c b/deps/libchdr/src/libchdr_huffman.c
new file mode 100644
index 00000000..6a50f134
--- /dev/null
+++ b/deps/libchdr/src/libchdr_huffman.c
@@ -0,0 +1,544 @@
+/* license:BSD-3-Clause
+ * copyright-holders:Aaron Giles
+****************************************************************************
+
+    huffman.c
+
+    Static Huffman compression and decompression helpers.
+
+****************************************************************************
+
+    Maximum codelength is officially (alphabetsize - 1). This would be 255 bits
+    (since we use 1 byte values). However, it is also dependent upon the number
+    of samples used, as follows:
+
+         2 bits -> 3..4 samples
+         3 bits -> 5..7 samples
+         4 bits -> 8..12 samples
+         5 bits -> 13..20 samples
+         6 bits -> 21..33 samples
+         7 bits -> 34..54 samples
+         8 bits -> 55..88 samples
+         9 bits -> 89..143 samples
+        10 bits -> 144..232 samples
+        11 bits -> 233..376 samples
+        12 bits -> 377..609 samples
+        13 bits -> 610..986 samples
+        14 bits -> 987..1596 samples
+        15 bits -> 1597..2583 samples
+        16 bits -> 2584..4180 samples   -> note that a 4k data size guarantees codelength <= 16 bits
+        17 bits -> 4181..6764 samples
+        18 bits -> 6765..10945 samples
+        19 bits -> 10946..17710 samples
+        20 bits -> 17711..28656 samples
+        21 bits -> 28657..46367 samples
+        22 bits -> 46368..75024 samples
+        23 bits -> 75025..121392 samples
+        24 bits -> 121393..196417 samples
+        25 bits -> 196418..317810 samples
+        26 bits -> 317811..514228 samples
+        27 bits -> 514229..832039 samples
+        28 bits -> 832040..1346268 samples
+        29 bits -> 1346269..2178308 samples
+        30 bits -> 2178309..3524577 samples
+        31 bits -> 3524578..5702886 samples
+        32 bits -> 5702887..9227464 samples
+
+    Looking at it differently, here is where powers of 2 fall into these buckets:
+
+          256 samples -> 11 bits max
+          512 samples -> 12 bits max
+           1k samples -> 14 bits max
+           2k samples -> 15 bits max
+           4k samples -> 16 bits max
+           8k samples -> 18 bits max
+          16k samples -> 19 bits max
+          32k samples -> 21 bits max
+          64k samples -> 22 bits max
+         128k samples -> 24 bits max
+         256k samples -> 25 bits max
+         512k samples -> 27 bits max
+           1M samples -> 28 bits max
+           2M samples -> 29 bits max
+           4M samples -> 31 bits max
+           8M samples -> 32 bits max
+
+****************************************************************************
+
+    Delta-RLE encoding works as follows:
+
+    Starting value is assumed to be 0. All data is encoded as a delta
+    from the previous value, such that final[i] = final[i - 1] + delta.
+    Long runs of 0s are RLE-encoded as follows:
+
+        0x100 = repeat count of 8
+        0x101 = repeat count of 9
+        0x102 = repeat count of 10
+        0x103 = repeat count of 11
+        0x104 = repeat count of 12
+        0x105 = repeat count of 13
+        0x106 = repeat count of 14
+        0x107 = repeat count of 15
+        0x108 = repeat count of 16
+        0x109 = repeat count of 32
+        0x10a = repeat count of 64
+        0x10b = repeat count of 128
+        0x10c = repeat count of 256
+        0x10d = repeat count of 512
+        0x10e = repeat count of 1024
+        0x10f = repeat count of 2048
+
+    Note that repeat counts are reset at the end of a row, so if a 0 run
+    extends to the end of a row, a large repeat count may be used.
+
+    The reason for starting the run counts at 8 is that 0 is expected to
+    be the most common symbol, and is typically encoded in 1 or 2 bits.
+
+***************************************************************************/
+
+#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <libchdr/huffman.h>
+
+#define MAX(x,y) ((x) > (y) ? (x) : (y))
+
+/***************************************************************************
+ *  MACROS
+ ***************************************************************************
+ */
+
+#define MAKE_LOOKUP(code,bits)  (((code) << 5) | ((bits) & 0x1f))
+
+/***************************************************************************
+ *  IMPLEMENTATION
+ ***************************************************************************
+ */
+
+/*-------------------------------------------------
+ *  huffman_context_base - create an encoding/
+ *  decoding context
+ *-------------------------------------------------
+ */
+
+struct huffman_decoder* create_huffman_decoder(int numcodes, int maxbits)
+{
+	struct huffman_decoder* decoder = NULL;
+
+	/* limit to 24 bits */
+	if (maxbits > 24)
+		return NULL;
+
+	decoder = (struct huffman_decoder*)malloc(sizeof(struct huffman_decoder));
+	decoder->numcodes = numcodes;
+	decoder->maxbits = maxbits;
+	decoder->lookup = (lookup_value*)malloc(sizeof(lookup_value) * (1 << maxbits));
+	decoder->huffnode = (struct node_t*)malloc(sizeof(struct node_t) * numcodes);
+	decoder->datahisto = NULL;
+	decoder->prevdata = 0;
+	decoder->rleremaining = 0;
+	return decoder;
+}
+
+void delete_huffman_decoder(struct huffman_decoder* decoder)
+{
+	if (decoder != NULL)
+	{
+		if (decoder->lookup != NULL)
+			free(decoder->lookup);
+		if (decoder->huffnode != NULL)
+			free(decoder->huffnode);
+		free(decoder);
+	}
+}
+
+/*-------------------------------------------------
+ *  decode_one - decode a single code from the
+ *  huffman stream
+ *-------------------------------------------------
+ */
+
+uint32_t huffman_decode_one(struct huffman_decoder* decoder, struct bitstream* bitbuf)
+{
+	/* peek ahead to get maxbits worth of data */
+	uint32_t bits = bitstream_peek(bitbuf, decoder->maxbits);
+
+	/* look it up, then remove the actual number of bits for this code */
+	lookup_value lookup = decoder->lookup[bits];
+	bitstream_remove(bitbuf, lookup & 0x1f);
+
+	/* return the value */
+	return lookup >> 5;
+}
+
+/*-------------------------------------------------
+ *  import_tree_rle - import an RLE-encoded
+ *  huffman tree from a source data stream
+ *-------------------------------------------------
+ */
+
+enum huffman_error huffman_import_tree_rle(struct huffman_decoder* decoder, struct bitstream* bitbuf)
+{
+	int numbits, curnode;
+	enum huffman_error error;
+
+	/* bits per entry depends on the maxbits */
+	if (decoder->maxbits >= 16)
+		numbits = 5;
+	else if (decoder->maxbits >= 8)
+		numbits = 4;
+	else
+		numbits = 3;
+
+	/* loop until we read all the nodes */
+	for (curnode = 0; curnode < decoder->numcodes; )
+	{
+		/* a non-one value is just raw */
+		int nodebits = bitstream_read(bitbuf, numbits);
+		if (nodebits != 1)
+			decoder->huffnode[curnode++].numbits = nodebits;
+
+		/* a one value is an escape code */
+		else
+		{
+			/* a double 1 is just a single 1 */
+			nodebits = bitstream_read(bitbuf, numbits);
+			if (nodebits == 1)
+				decoder->huffnode[curnode++].numbits = nodebits;
+
+			/* otherwise, we need one for value for the repeat count */
+			else
+			{
+				int repcount = bitstream_read(bitbuf, numbits) + 3;
+				while (repcount--)
+					decoder->huffnode[curnode++].numbits = nodebits;
+			}
+		}
+	}
+
+	/* make sure we ended up with the right number */
+	if (curnode != decoder->numcodes)
+		return HUFFERR_INVALID_DATA;
+
+	/* assign canonical codes for all nodes based on their code lengths */
+	error = huffman_assign_canonical_codes(decoder);
+	if (error != HUFFERR_NONE)
+		return error;
+
+	/* build the lookup table */
+	huffman_build_lookup_table(decoder);
+
+	/* determine final input length and report errors */
+	return bitstream_overflow(bitbuf) ? HUFFERR_INPUT_BUFFER_TOO_SMALL : HUFFERR_NONE;
+}
+
+
+/*-------------------------------------------------
+ *  import_tree_huffman - import a huffman-encoded
+ *  huffman tree from a source data stream
+ *-------------------------------------------------
+ */
+
+enum huffman_error huffman_import_tree_huffman(struct huffman_decoder* decoder, struct bitstream* bitbuf)
+{
+	int start;
+	int last = 0;
+	int count = 0;
+	int index;
+	int curcode;
+	uint8_t rlefullbits = 0;
+	uint32_t temp;
+	enum huffman_error error;
+	/* start by parsing the lengths for the small tree */
+	struct huffman_decoder* smallhuff = create_huffman_decoder(24, 6);
+	smallhuff->huffnode[0].numbits = bitstream_read(bitbuf, 3);
+	start = bitstream_read(bitbuf, 3) + 1;
+	for (index = 1; index < 24; index++)
+	{
+		if (index < start || count == 7)
+			smallhuff->huffnode[index].numbits = 0;
+		else
+		{
+			count = bitstream_read(bitbuf, 3);
+			smallhuff->huffnode[index].numbits = (count == 7) ? 0 : count;
+		}
+	}
+
+	/* then regenerate the tree */
+	error = huffman_assign_canonical_codes(smallhuff);
+	if (error != HUFFERR_NONE)
+		return error;
+	huffman_build_lookup_table(smallhuff);
+
+	/* determine the maximum length of an RLE count */
+	temp = decoder->numcodes - 9;
+	while (temp != 0)
+		temp >>= 1, rlefullbits++;
+
+	/* now process the rest of the data */
+	for (curcode = 0; curcode < decoder->numcodes; )
+	{
+		int value = huffman_decode_one(smallhuff, bitbuf);
+		if (value != 0)
+			decoder->huffnode[curcode++].numbits = last = value - 1;
+		else
+		{
+			int count = bitstream_read(bitbuf, 3) + 2;
+			if (count == 7+2)
+				count += bitstream_read(bitbuf, rlefullbits);
+			for ( ; count != 0 && curcode < decoder->numcodes; count--)
+				decoder->huffnode[curcode++].numbits = last;
+		}
+	}
+
+	/* make sure we ended up with the right number */
+	if (curcode != decoder->numcodes)
+		return HUFFERR_INVALID_DATA;
+
+	/* assign canonical codes for all nodes based on their code lengths */
+	error = huffman_assign_canonical_codes(decoder);
+	if (error != HUFFERR_NONE)
+		return error;
+
+	/* build the lookup table */
+	huffman_build_lookup_table(decoder);
+
+	/* determine final input length and report errors */
+	return bitstream_overflow(bitbuf) ? HUFFERR_INPUT_BUFFER_TOO_SMALL : HUFFERR_NONE;
+}
+
+/*-------------------------------------------------
+ *  compute_tree_from_histo - common backend for
+ *  computing a tree based on the data histogram
+ *-------------------------------------------------
+ */
+
+enum huffman_error huffman_compute_tree_from_histo(struct huffman_decoder* decoder)
+{
+	int i;
+	uint32_t lowerweight;
+	uint32_t upperweight;
+	/* compute the number of data items in the histogram */
+	uint32_t sdatacount = 0;
+	for (i = 0; i < decoder->numcodes; i++)
+		sdatacount += decoder->datahisto[i];
+
+	/* binary search to achieve the optimum encoding */
+	lowerweight = 0;
+	upperweight = sdatacount * 2;
+	while (1)
+	{
+		/* build a tree using the current weight */
+		uint32_t curweight = (upperweight + lowerweight) / 2;
+		int curmaxbits = huffman_build_tree(decoder, sdatacount, curweight);
+
+		/* apply binary search here */
+		if (curmaxbits <= decoder->maxbits)
+		{
+			lowerweight = curweight;
+
+			/* early out if it worked with the raw weights, or if we're done searching */
+			if (curweight == sdatacount || (upperweight - lowerweight) <= 1)
+				break;
+		}
+		else
+			upperweight = curweight;
+	}
+
+	/* assign canonical codes for all nodes based on their code lengths */
+	return huffman_assign_canonical_codes(decoder);
+}
+
+/***************************************************************************
+ *  INTERNAL FUNCTIONS
+ ***************************************************************************
+ */
+
+/*-------------------------------------------------
+ *  tree_node_compare - compare two tree nodes
+ *  by weight
+ *-------------------------------------------------
+ */
+
+static int huffman_tree_node_compare(const void *item1, const void *item2)
+{
+	const struct node_t *node1 = *(const struct node_t **)item1;
+	const struct node_t *node2 = *(const struct node_t **)item2;
+	if (node2->weight != node1->weight)
+		return node2->weight - node1->weight;
+	if (node2->bits - node1->bits == 0)
+		fprintf(stderr, "identical node sort keys, should not happen!\n");
+	return (int)node1->bits - (int)node2->bits;
+}
+
+/*-------------------------------------------------
+ *  build_tree - build a huffman tree based on the
+ *  data distribution
+ *-------------------------------------------------
+ */
+
+int huffman_build_tree(struct huffman_decoder* decoder, uint32_t totaldata, uint32_t totalweight)
+{
+	int curcode;
+	int nextalloc;
+	int listitems = 0;
+	int maxbits = 0;
+	/* make a list of all non-zero nodes */
+	struct node_t** list = (struct node_t**)malloc(sizeof(struct node_t*) * decoder->numcodes * 2);
+	memset(decoder->huffnode, 0, decoder->numcodes * sizeof(decoder->huffnode[0]));
+	for (curcode = 0; curcode < decoder->numcodes; curcode++)
+		if (decoder->datahisto[curcode] != 0)
+		{
+			list[listitems++] = &decoder->huffnode[curcode];
+			decoder->huffnode[curcode].count = decoder->datahisto[curcode];
+			decoder->huffnode[curcode].bits = curcode;
+
+			/* scale the weight by the current effective length, ensuring we don't go to 0 */
+			decoder->huffnode[curcode].weight = ((uint64_t)decoder->datahisto[curcode]) * ((uint64_t)totalweight) / ((uint64_t)totaldata);
+			if (decoder->huffnode[curcode].weight == 0)
+				decoder->huffnode[curcode].weight = 1;
+		}
+
+#if 0
+        fprintf(stderr, "Pre-sort:\n");
+        for (int i = 0; i < listitems; i++) {
+            fprintf(stderr, "weight: %d code: %d\n", list[i]->m_weight, list[i]->m_bits);
+        }
+#endif
+
+	/* sort the list by weight, largest weight first */
+	qsort(&list[0], listitems, sizeof(list[0]), huffman_tree_node_compare);
+
+#if 0
+        fprintf(stderr, "Post-sort:\n");
+        for (int i = 0; i < listitems; i++) {
+            fprintf(stderr, "weight: %d code: %d\n", list[i]->m_weight, list[i]->m_bits);
+        }
+        fprintf(stderr, "===================\n");
+#endif
+
+	/* now build the tree */
+	nextalloc = decoder->numcodes;
+	while (listitems > 1)
+	{
+		int curitem;
+		/* remove lowest two items */
+		struct node_t* node1 = &(*list[--listitems]);
+		struct node_t* node0 = &(*list[--listitems]);
+
+		/* create new node */
+		struct node_t* newnode = &decoder->huffnode[nextalloc++];
+		newnode->parent = NULL;
+		node0->parent = node1->parent = newnode;
+		newnode->weight = node0->weight + node1->weight;
+
+		/* insert into list at appropriate location */
+		for (curitem = 0; curitem < listitems; curitem++)
+			if (newnode->weight > list[curitem]->weight)
+			{
+				memmove(&list[curitem+1], &list[curitem], (listitems - curitem) * sizeof(list[0]));
+				break;
+			}
+		list[curitem] = newnode;
+		listitems++;
+	}
+
+	/* compute the number of bits in each code, and fill in another histogram */
+	for (curcode = 0; curcode < decoder->numcodes; curcode++)
+	{
+		struct node_t *curnode;
+		struct node_t* node = &decoder->huffnode[curcode];
+		node->numbits = 0;
+		node->bits = 0;
+
+		/* if we have a non-zero weight, compute the number of bits */
+		if (node->weight > 0)
+		{
+			/* determine the number of bits for this node */
+			for (curnode = node; curnode->parent != NULL; curnode = curnode->parent)
+				node->numbits++;
+			if (node->numbits == 0)
+				node->numbits = 1;
+
+			/* keep track of the max */
+			maxbits = MAX(maxbits, ((int)node->numbits));
+		}
+	}
+	return maxbits;
+}
+
+/*-------------------------------------------------
+ *  assign_canonical_codes - assign canonical codes
+ *  to all the nodes based on the number of bits
+ *  in each
+ *-------------------------------------------------
+ */
+
+enum huffman_error huffman_assign_canonical_codes(struct huffman_decoder* decoder)
+{
+	int curcode, codelen;
+	uint32_t curstart = 0;
+	/* build up a histogram of bit lengths */
+	uint32_t bithisto[33] = { 0 };
+	for (curcode = 0; curcode < decoder->numcodes; curcode++)
+	{
+		struct node_t* node = &decoder->huffnode[curcode];
+		if (node->numbits > decoder->maxbits)
+			return HUFFERR_INTERNAL_INCONSISTENCY;
+		if (node->numbits <= 32)
+			bithisto[node->numbits]++;
+	}
+
+	/* for each code length, determine the starting code number */
+	for (codelen = 32; codelen > 0; codelen--)
+	{
+		uint32_t nextstart = (curstart + bithisto[codelen]) >> 1;
+		if (codelen != 1 && nextstart * 2 != (curstart + bithisto[codelen]))
+			return HUFFERR_INTERNAL_INCONSISTENCY;
+		bithisto[codelen] = curstart;
+		curstart = nextstart;
+	}
+
+	/* now assign canonical codes */
+	for (curcode = 0; curcode < decoder->numcodes; curcode++)
+	{
+		struct node_t* node = &decoder->huffnode[curcode];
+		if (node->numbits > 0)
+			node->bits = bithisto[node->numbits]++;
+	}
+	return HUFFERR_NONE;
+}
+
+/*-------------------------------------------------
+ *  build_lookup_table - build a lookup table for
+ *  fast decoding
+ *-------------------------------------------------
+ */
+
+void huffman_build_lookup_table(struct huffman_decoder* decoder)
+{
+	int curcode;
+	/* iterate over all codes */
+	for (curcode = 0; curcode < decoder->numcodes; curcode++)
+	{
+		/* process all nodes which have non-zero bits */
+		struct node_t* node = &decoder->huffnode[curcode];
+		if (node->numbits > 0)
+		{
+         int shift;
+         lookup_value *dest;
+         lookup_value *destend;
+			/* set up the entry */
+			lookup_value value = MAKE_LOOKUP(curcode, node->numbits);
+
+			/* fill all matching entries */
+			shift = decoder->maxbits - node->numbits;
+			dest = &decoder->lookup[node->bits << shift];
+			destend = &decoder->lookup[((node->bits + 1) << shift) - 1];
+			while (dest <= destend)
+				*dest++ = value;
+		}
+	}
+}
diff --git a/deps/libchdr/src/link.T b/deps/libchdr/src/link.T
new file mode 100644
index 00000000..ea37716b
--- /dev/null
+++ b/deps/libchdr/src/link.T
@@ -0,0 +1,5 @@
+{
+   global: chd_*;
+   local: *;
+};
+
diff --git a/deps/libchdr/tests/benchmark.c b/deps/libchdr/tests/benchmark.c
new file mode 100644
index 00000000..5c10bae8
--- /dev/null
+++ b/deps/libchdr/tests/benchmark.c
@@ -0,0 +1,49 @@
+#include <libchdr/chd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+
+int main(int argc, char** argv)
+{
+  chd_error err;
+  chd_file* file;
+  const chd_header* header;
+  void* buffer;
+  int i;
+  unsigned int totalbytes;
+  clock_t start, end;
+  double time_taken;
+
+  printf("\nlibchdr benchmark tool....");
+
+  /* Recording the starting clock tick.*/
+  start = clock(); 
+  
+  /* Sequential read all hunks */
+  err = chd_open(argv[1], CHD_OPEN_READ, NULL, &file);
+  if (err)
+	printf("\nchd_open() error: %s", chd_error_string(err));
+  header = chd_get_header(file);
+  totalbytes = header->hunkbytes * header->totalhunks;
+  buffer = malloc(header->hunkbytes);
+  for (i = 0 ; i < header->totalhunks ; i++)
+  {
+    err = chd_read(file, i, buffer);
+    if (err)
+      printf("\nchd_read() error: %s", chd_error_string(err));
+  }
+  free(buffer);
+  chd_close(file);
+
+  /* Recording the end clock tick. */
+  end = clock();
+
+  /* Calculating total time taken by the program. */
+  time_taken = ((double)(end - start)) / ((double)CLOCKS_PER_SEC);
+
+  /* Print results */
+  printf("\nRead %d bytes in %lf seconds", totalbytes, time_taken);
+  printf("\nRate is %lf MB/s", (((double)totalbytes)/(1024*1024)) / time_taken);
+  printf("\n\n");
+  return 0;
+}
diff --git a/deps/libchdr/tests/build_tests.sh b/deps/libchdr/tests/build_tests.sh
new file mode 100755
index 00000000..8d9af6f7
--- /dev/null
+++ b/deps/libchdr/tests/build_tests.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+gcc benchmark.c -lchdr -o benchmark
-- 
2.39.5