From a7f8b731e5c31734636cef681a29f8ba3cb74f05 Mon Sep 17 00:00:00 2001
From: ITotalJustice <47043333+ITotalJustice@users.noreply.github.com>
Date: Tue, 17 Dec 2024 17:58:43 +0000
Subject: [PATCH 01/19] replace audrec with audout, use sdl2 resampler in place
 of audren, reduce memory by ~140KiB - audout doesn't suffer from session
 exhaution as audren did. this removes the hacky workaround of closing
 sys-tune   when a blacklisted game was launched.   This however created a new
 problem, we need a new way to resample the audio.   I am very familar with
 sdl2 code and know its resampler is pretty good, so i went with that. I will
 change this later   on if a better resampler is found. - playlists are now a
 fixed size of 256 entries, which uses 64k. (entry_count / 4 = kib).

---
 common/config/config.cpp                      |   15 -
 common/config/config.hpp                      |    4 -
 overlay/source/main.cpp                       |   12 -
 sys-tune/Makefile                             |    4 +-
 sys-tune/source/impl/music_player.cpp         |  499 +--
 sys-tune/source/impl/music_player.hpp         |   13 +-
 sys-tune/source/impl/resamplers/SDL_audioEX.c | 3374 +++++++++++++++++
 sys-tune/source/impl/resamplers/SDL_audioEX.h |  344 ++
 sys-tune/source/impl/source.cpp               |   42 +-
 sys-tune/source/impl/source.hpp               |   25 +-
 sys-tune/source/main.cpp                      |   23 +-
 11 files changed, 4001 insertions(+), 354 deletions(-)
 create mode 100644 sys-tune/source/impl/resamplers/SDL_audioEX.c
 create mode 100644 sys-tune/source/impl/resamplers/SDL_audioEX.h

diff --git a/common/config/config.cpp b/common/config/config.cpp
index d2e0505..f8d7805 100644
--- a/common/config/config.cpp
+++ b/common/config/config.cpp
@@ -8,12 +8,6 @@ namespace config {
 namespace {
 
 const char CONFIG_PATH[]{"/config/sys-tune/config.ini"};
-// blacklist uses it's own config file because eventually a database
-// may be setup and users can easily update their blacklist by downloading
-// an updated blacklist.ini.
-// Also, the blacklist lookup needs to be as fast as possible
-// (literally a race until the title opens audren), so a seperate, smaller file is ideal.
-const char BLACKLIST_PATH[]{"/config/sys-tune/blacklist.ini"};
 
 void create_config_dir() {
     /* Creating directory on every set call looks sus, but the user may delete the dir */
@@ -102,13 +96,4 @@ void set_default_title_volume(float value) {
     ini_putf("config", "global_volume", value, CONFIG_PATH);
 }
 
-auto get_title_blacklist(u64 tid) -> bool {
-    return ini_getbool("blacklist", get_tid_str(tid), false, BLACKLIST_PATH);
-}
-
-void set_title_blacklist(u64 tid, bool value) {
-    create_config_dir();
-    ini_putl("blacklist", get_tid_str(tid), value, BLACKLIST_PATH);
-}
-
 }
diff --git a/common/config/config.hpp b/common/config/config.hpp
index c0b8f4e..0af059a 100644
--- a/common/config/config.hpp
+++ b/common/config/config.hpp
@@ -34,8 +34,4 @@ void set_title_volume(u64 tid, float value);
 auto get_default_title_volume() -> float;
 void set_default_title_volume(float value);
 
-// returns true if title causes a fatal on launch
-auto get_title_blacklist(u64 tid) -> bool;
-void set_title_blacklist(u64 tid, bool value);
-
 }
diff --git a/overlay/source/main.cpp b/overlay/source/main.cpp
index d1465cb..17e3cd9 100644
--- a/overlay/source/main.cpp
+++ b/overlay/source/main.cpp
@@ -19,18 +19,6 @@ class SysTuneOverlay final : public tsl::Overlay {
             this->msg  = "Failed pm::Initialize()";
             return;
         }
-
-        // don't open sys-tune if blacklisted title is active!
-        u64 pid{}, tid{};
-        pm::getCurrentPidTid(&pid, &tid);
-
-        if (config::get_title_blacklist(tid)) {
-            this->msg =
-                "Title is blacklisted!\n"
-                "Exit to use sys-tune";
-            return;
-        }
-
         Result rc = tuneInitialize();
 
         // not found can happen if the service isn't started
diff --git a/sys-tune/Makefile b/sys-tune/Makefile
index 0d461e5..ab00418 100644
--- a/sys-tune/Makefile
+++ b/sys-tune/Makefile
@@ -39,7 +39,7 @@ include $(DEVKITPRO)/libnx/switch_rules
 #---------------------------------------------------------------------------------
 TARGET		:=	$(notdir $(CURDIR))
 BUILD		:=	build
-SOURCES		:=	source source/impl ../common/minIni ../common/sdmc ../common/config ../common/pm ../common/aud
+SOURCES		:=	source source/impl ../common/minIni ../common/sdmc ../common/config ../common/pm ../common/aud source/impl/resamplers
 DATA		:=	data
 INCLUDES	:=	../ipc ../common
 
@@ -62,7 +62,7 @@ endif
 #---------------------------------------------------------------------------------
 ARCH	:=	-march=armv8-a+crc+crypto -mtune=cortex-a57 -mtp=soft -fPIE
 
-CFLAGS	:=	-g -Wall -O2 -ffunction-sections \
+CFLAGS	:=	-flto -g -Wall -O2 -ffunction-sections \
 			$(ARCH) $(DEFINES)
 
 CFLAGS	+=	$(INCLUDE) -DTUNE_API_VERSION=$(API_VERSION) \
diff --git a/sys-tune/source/impl/music_player.cpp b/sys-tune/source/impl/music_player.cpp
index f7f05d6..415a1be 100644
--- a/sys-tune/source/impl/music_player.cpp
+++ b/sys-tune/source/impl/music_player.cpp
@@ -7,6 +7,7 @@
 #include "aud_wrapper.h"
 #include "config/config.hpp"
 #include "source.hpp"
+#include "resamplers/SDL_audioEX.h"
 
 #include <cstring>
 #include <nxExt.h>
@@ -14,18 +15,80 @@
 namespace tune::impl {
 
     namespace {
-        enum class AudrenCloseState {
-            None, // no change
-            Open, // just opened audren
-            Close, // just closed audren
+        constexpr float VOLUME_MAX = 1.f;
+        constexpr auto PLAYLIST_ENTRY_MAX = 256; // 64k
+
+        struct PlayListEntry2 {
+        public:
+            // in most cases, the path will not exceed 256 bytes,
+            // so this is a reasonable max rather than 0x301.
+            bool Add(const char* path) {
+                if (!IsEmpty()) {
+                    return false;
+                }
+
+                if (std::strlen(path) > sizeof(m_path)) {
+                    return false;
+                }
+
+                std::strcpy(m_path, path);
+                return true;
+            }
+
+            void Remove() {
+                m_path[0] = '\0';
+            }
+
+            bool IsEmpty() const {
+                return m_path[0] == '\0';
+            }
+
+        // private:
+            char m_path[256]{};
         };
 
-        constexpr float VOLUME_MAX = 1.f;
+        struct PlayList {
+            std::array<PlayListEntry2, PLAYLIST_ENTRY_MAX> m_entries{};
+
+            bool Add(u32 index, const char* path) {
+                if (index > m_entries.size()) {
+                    return false;
+                }
+
+                return m_entries[index].Add(path);
+            }
+
+            void Remove(u32 index) {
+                if (index > m_entries.size()) {
+                    return;
+                }
+
+                return m_entries[index].Remove();
+            }
+
+            s32 FindNextFreeEntry() const {
+                for (u32 i = 0; i < m_entries.size(); i++) {
+                    if (m_entries[i].IsEmpty()) {
+                        return i;
+                    }
+                }
 
-        std::vector<PlaylistEntry>* g_playlist;
-        std::vector<PlaylistID>* g_shuffle_playlist;
-        PlaylistEntry* g_current;
+                return -1;
+            }
+
+            const char* GetPath(u32 index) {
+                return m_entries[index].m_path;
+            }
+        };
+
+        PlayList g_playlist2;
+
+        // todo: move below into playlist struct
+        std::vector<PlaylistEntry> g_playlist;
+        std::vector<PlaylistID> g_shuffle_playlist;
+        PlaylistEntry g_current;
         u32 g_queue_position;
+
         LockableMutex g_mutex;
 
         RepeatMode g_repeat   = RepeatMode::All;
@@ -33,275 +96,140 @@ namespace tune::impl {
         PlayerStatus g_status = PlayerStatus::FetchNext;
         Source *g_source = nullptr;
 
-        float g_tune_volume = 1.f;
         float g_title_volume = 1.f;
         float g_default_title_volume = 1.f;
         bool g_use_title_volume = true;
 
-        AudioDriver g_drv;
-        constexpr const int MinSampleCount  = 256;
-        constexpr const int MaxChannelCount = 8;
-        constexpr const int BufferCount     = 2;
-        constexpr const int AudioSampleSize = MinSampleCount * MaxChannelCount * sizeof(s16);
-        constexpr const int AudioPoolSize   = AudioSampleSize * BufferCount;
-        alignas(AUDREN_MEMPOOL_ALIGNMENT) u8 AudioMemoryPool[AudioPoolSize];
+        constexpr auto AUDIO_BUFFER_COUNT = 2;
+        constexpr auto AUDIO_BUFFER_SIZE = 0x1000;
+
+        alignas(0x1000) s16 AudioMemoryPool[AUDIO_BUFFER_COUNT][AUDIO_BUFFER_SIZE];
+        static_assert((sizeof(AudioMemoryPool[0]) % 0x2000) == 0, "Audio Memory pool needs to be page aligned!");
 
-        static_assert((sizeof(AudioMemoryPool) % 0x2000) == 0, "Audio Memory pool needs to be page aligned!");
+        bool g_awoken_from_sleep = false;
+        bool g_should_pause      = false;
+        bool g_should_run        = true;
+        bool g_audout_init       = false;
 
-        bool g_should_pause = false;
-        bool g_should_run   = true;
-        bool g_close_audren = false;
+        void audioExit() {
+            if (g_audout_init) {
+                audoutStopAudioOut();
+                audoutExit();
+                g_audout_init = false;
+            }
+        }
 
         Result audioInit() {
-            /* Default audio config. */
-            const AudioRendererConfig audren_cfg = {
-                .output_rate = AudioRendererOutputRate_48kHz,
-                .num_voices = 2,
-                .num_effects = 0,
-                .num_sinks = 1,
-                .num_mix_objs = 1,
-                .num_mix_buffers = 2,
-            };
-
-            smInitialize();
-            Result rc = audrenInitialize(&audren_cfg);
-            smExit();
-
-            if (R_SUCCEEDED(rc)) {
-                /* Create audio driver. */
-                rc = audrvCreate(&g_drv, &audren_cfg, 2);
-                if (R_SUCCEEDED(rc)) {
-                    /* Register memory pool. */
-                    int mpid = audrvMemPoolAdd(&g_drv, AudioMemoryPool, AudioPoolSize);
-                    audrvMemPoolAttach(&g_drv, mpid);
-
-                    /* Attach default sink. */
-                    u8 sink_channels[] = {0, 1};
-                    audrvDeviceSinkAdd(&g_drv, AUDREN_DEFAULT_DEVICE_NAME, 2, sink_channels);
-
-                    rc = audrvUpdate(&g_drv);
-                    if (R_SUCCEEDED(rc)) {
-                        return audrenStartAudioRenderer();
-                    } else {
-                        /* Cleanup on failure */
-                        audrvClose(&g_drv);
-                    }
-                } else {
-                    /* Cleanup on failure */
-                    audrenExit();
-                }
+            if (g_audout_init) {
+                audioExit();
             }
 
-            return rc;
-        }
+            Result rc;
 
-        // Only call this from audrv thread, as closing audrv
-        // while accesing it will be very bad.
-        AudrenCloseState PollAudrenCloseState() {
-            static bool close_audren_previous = false;
-
-            if (close_audren_previous != g_close_audren) {
-                close_audren_previous = g_close_audren;
-                if (g_close_audren) {
-                    audrvClose(&g_drv);
-                    audrenExit();
-                    return AudrenCloseState::Close;
-                } else {
-                    audioInit();
-                    SetVolume(g_tune_volume);
-                    return AudrenCloseState::Open;
+            if (R_SUCCEEDED(rc = audoutInitialize())) {
+                if (R_SUCCEEDED(rc = audoutStartAudioOut())) {
+                    SetVolume(config::get_volume());
+                    g_audout_init = true;
+                    return 0;
                 }
+                audoutExit();
             }
 
-            return AudrenCloseState::None;
+            return rc;
         }
 
-        Result PlayTrack(const std::string &path) {
+        Result PlayTrack(const char* path) {
+            R_TRY(audioInit());
+
             /* Open file and allocate */
-            auto source = OpenFile(path.c_str());
+            auto source = OpenFile(path);
             R_UNLESS(source != nullptr, tune::FileOpenFailure);
             R_UNLESS(source->IsOpen(), tune::FileOpenFailure);
+            R_UNLESS(source->SetupResampler(audoutGetChannelCount(), audoutGetSampleRate()), tune::VoiceInitFailure);
 
-            const auto channel_count = source->GetChannelCount();
-            const auto sample_rate   = source->GetSampleRate();
-
-            const auto voice_init = [&]() -> Result {
-                R_UNLESS(audrvVoiceInit(&g_drv, 0, channel_count, PcmFormat_Int16, sample_rate), tune::VoiceInitFailure);
-
-                audrvVoiceSetDestinationMix(&g_drv, 0, AUDREN_FINAL_MIX_ID);
-
-                if (channel_count == 1) {
-                    audrvVoiceSetMixFactor(&g_drv, 0, 1.0f, 0, 0);
-                    audrvVoiceSetMixFactor(&g_drv, 0, 1.0f, 0, 1);
-                } else {
-                    audrvVoiceSetMixFactor(&g_drv, 0, 1.0f, 0, 0);
-                    audrvVoiceSetMixFactor(&g_drv, 0, 0.0f, 0, 1);
-                    audrvVoiceSetMixFactor(&g_drv, 0, 0.0f, 1, 0);
-                    audrvVoiceSetMixFactor(&g_drv, 0, 1.0f, 1, 1);
-                }
-
-                audrvVoiceStart(&g_drv, 0);
-
-                return 0;
-            };
-
-            if (auto rc = voice_init(); R_FAILED(rc)) {
-                return rc;
-            }
-
-            const s32 sample_count                  = AudioSampleSize / channel_count / sizeof(s16);
-            AudioDriverWaveBuf buffers[BufferCount] = {};
+            g_source = source.get();
 
-            for (int i = 0; i < BufferCount; i++) {
-                buffers[i].data_pcm16          = reinterpret_cast<s16 *>(&AudioMemoryPool);
-                buffers[i].size                = AudioSampleSize;
-                buffers[i].start_sample_offset = i * sample_count;
-                buffers[i].end_sample_offset   = buffers[i].start_sample_offset + sample_count;
+            AudioOutBuffer audout_buffer[AUDIO_BUFFER_COUNT]{};
+            for (int i = 0; i < AUDIO_BUFFER_COUNT; i++) {
+                audout_buffer[i].next = NULL;
+                audout_buffer[i].buffer = AudioMemoryPool[i];
+                audout_buffer[i].buffer_size = sizeof(AudioMemoryPool[i]);
             }
 
-            g_source = source.get();
+            bool pause_state_changed = g_should_pause;
 
             while (g_should_run && g_status == PlayerStatus::Playing) {
-                switch (PollAudrenCloseState()) {
-                    case AudrenCloseState::None:
-                        break;
-                    case AudrenCloseState::Open:
-                        if (auto rc = voice_init(); R_FAILED(rc)) {
-                            g_source = nullptr;
-                            return rc;
-                        }
-                        break;
-                    case AudrenCloseState::Close:
-                        for (auto &buffer : buffers) {
-                            buffer.state = AudioDriverWaveBufState_Free;
-                        }
-                        break;
-                }
-
-                if (g_close_audren) {
-                    svcSleepThread(100'000'000ul);
-                    continue;
+                if (g_awoken_from_sleep) {
+                    g_awoken_from_sleep = false;
+                    R_TRY(audioInit());
                 }
 
                 if (g_should_pause) {
+                    pause_state_changed = g_should_pause;
                     svcSleepThread(17'000'000);
                     continue;
                 }
 
-                AudioDriverWaveBuf *refillBuf = nullptr;
-                for (auto &buffer : buffers) {
-                    if (buffer.state == AudioDriverWaveBufState_Free || buffer.state == AudioDriverWaveBufState_Done) {
-                        refillBuf = &buffer;
+                // fixes bad sound.
+                if (pause_state_changed != g_should_pause) {
+                    pause_state_changed = g_should_pause;
+                    R_TRY(audioInit());
+                }
+
+                AudioOutBuffer* buffer = NULL;
+                for (int i = 0; i < AUDIO_BUFFER_COUNT; i++) {
+                    bool has_buffer = false;
+                    R_TRY(audoutContainsAudioOutBuffer(&audout_buffer[i], &has_buffer));
+                    if (!has_buffer) {
+                        buffer = &audout_buffer[i];
                         break;
                     }
                 }
 
-                if (refillBuf) {
-                    s16 *data = reinterpret_cast<s16 *>(&AudioMemoryPool) + refillBuf->start_sample_offset * 2;
-
-                    int nSamples = source->Decode(sample_count, data);
+                if (!buffer) {
+                    u32 released_count;
+                    R_TRY(audoutWaitPlayFinish(&buffer, &released_count, UINT64_MAX));
+                }
 
-                    if (nSamples == 0 && source->Done()) {
-                        if (g_repeat != RepeatMode::One)
+                if (buffer) {
+                    const auto nSamples = source->Resample((u8*)buffer->buffer, buffer->buffer_size);
+                    if (nSamples <= 0) {
+                        if (g_repeat != RepeatMode::One) {
                             Next();
+                        }
                         break;
+                    } else {
+                        buffer->data_size = nSamples;
+                        R_TRY(audoutAppendAudioOutBuffer(buffer));
                     }
-
-                    armDCacheFlush(data, nSamples * 2 * sizeof(u16));
-                    refillBuf->end_sample_offset = refillBuf->start_sample_offset + nSamples;
-
-                    audrvVoiceAddWaveBuf(&g_drv, 0, refillBuf);
-                    audrvVoiceStart(&g_drv, 0);
                 }
-
-                audrvUpdate(&g_drv);
-                audrenWaitFrame();
             }
 
-            audrvVoiceDrop(&g_drv, 0);
             g_source = nullptr;
 
+            // re-open and then pause, otherwise artifacts will continue to play...
+            audioInit();
+            audoutStopAudioOut();
+
             return 0;
         }
 
     }
 
-    Result Initialize(std::vector<PlaylistEntry>* playlist, std::vector<PlaylistID>* shuffle, PlaylistEntry* current) {
-        g_playlist = playlist;
-        g_shuffle_playlist = shuffle;
-        g_current = current;
-
-        // tldr, most fancy things made by N will fatal
-        const u64 blacklist[] = {
-            // https://github.com/HookedBehemoth/sys-tune/issues/10
-            0x010077B00E046000, // spyro reignited trilogy
-            0x0100AD9012510000, // pac man 99
-            0x01006C100EC08000, // minecraft dugeons
-            0x01000A10041EA000, // skyrim
-            0x0100F9F00C696000, // crash team racing nitro fueled
-            0x01001E9003502000, // labo 03
-            0x0100165003504000, // labo 04
-            0x0100F2300D4BA000, // darksiders genesis
-            0x0100E1400BA96000, // darksiders warmastered edition
-            0x010071800BA98000, // darksiders 2
-            0x0100F8F014190000, // darksiders 3
-            0x0100D870045B6000, // NES NSO
-            0x01008D300C50C000, // SNES NSO
-            0x0100C62011050000, // GB NSO
-            0x010012F017576000, // GBA NSO
-            0x0100C9A00ECE6000, // N64 NSO
-
-            // https://github.com/tallbl0nde/TriPlayer/issues/31
-            0x0100E5600D446000, // Ni No Kuni: Wrath of the White Witch
-            0x0100A3900C3E2000, // Paper Mario™: The Origami King
-            0x0100626011656000, // The Outer Worlds
-            0x010090F012916000, // Ghostrunner
-            0x0100F15012D36000, // IMMERSE LAND
-            0x01005950022EC000, // Blade Strangers
-            0x0100423009358000, // Death Road to Canada
-            0x010044500C182000, // Sid Meier's Civilization VI
-
-            // anything made by PROTOTYPE
-            0x0100A3A00CC7E000, // CLANNAD
-            0x01007B501372C000, // CLANNAD Side Stories
-            0x01003B300E4AA000, // THE GRISAIA TRILOGY
-            0x0100F06013710000, // ISLAND
-            0x0100BD100C752000, // planetarian
-            0x01002330123BC000, // GRISAIA PHANTOM TRIGGER 05
-            0x0100240013AE8000, // GRISAIA PHANTOM TRIGGER 06
-            0x01002EF014DA2000, // GRISAIA PHANTOM TRIGGER 07
-            0x0100398010314000, // Tomoyo After -It's a Wonderful Life- CS Edition
-            0x01004AB0133E8000, // GRISAIA PHANTOM TRIGGER 01 to 05
-            0x01005250123B8000, // GRISAIA PHANTOM TRIGGER 03
-            0x010054101370E000, // FATAL TWELVE
-            0x010062A0178A8000, // LOOPERS
-            0x0100806017562000, // OshiRabu: Waifus Over Husbandos + Love･or･die
-            0x0100943010310000, // Little Busters! Converted Edition
-            0x010096000CA38000, // TAISHO x ALICE ALL IN ONE
-            0x0100A1200CA3C000, // Butterfly's Poison; Blood Chains
-            0x0100C38019CE4000, // GRISAIA PHANTOM TRIGGER 08
-            0x0100C9C0178A6000, // Harmonia
-            0x0100CAF013AE6000, // GRISAIA PHANTOM TRIGGER 5.5
-            0x0100D970123BA000, // GRISAIA PHANTOM TRIGGER 04
-        };
-
-        // do this on startup because the user may not copy a config
-        // file or delete it at somepoint
-        for (auto tid : blacklist) {
-            config::set_title_blacklist(tid, true);
-        }
-
+    Result Initialize() {
         if (auto rc = audioInit(); R_FAILED(rc)) {
             return rc;
         }
 
+        g_playlist.reserve(PLAYLIST_ENTRY_MAX);
+        g_shuffle_playlist.reserve(PLAYLIST_ENTRY_MAX);
+
         /* Fetch values from config, sanitize the return value */
         if (auto c = config::get_repeat(); c <= 2 && c >= 0) {
             SetRepeatMode(static_cast<RepeatMode>(c));
         }
 
         SetShuffleMode(static_cast<ShuffleMode>(config::get_shuffle()));
-        SetVolume(config::get_volume());
         SetDefaultTitleVolume(config::get_default_title_volume());
 
         return 0;
@@ -315,49 +243,41 @@ namespace tune::impl {
     void TuneThreadFunc(void *) {
         /* Run as long as we aren't stopped and no error has been encountered. */
         while (g_should_run) {
-            // update g_close_audren, returned state isn't needed
-            PollAudrenCloseState();
-
-            if (g_close_audren) {
-                svcSleepThread(100'000'000ul);
-                continue;
-            }
-
-            g_current->path = "";
+            g_current.Reset();
             {
                 std::scoped_lock lk(g_mutex);
 
-                const auto &queue = *g_playlist;
+                const auto &queue = g_playlist;
                 const auto queue_size = queue.size();
                 if (queue_size == 0) {
-                    g_current->path = "";
+                    g_current.Reset();
                 } else if (g_queue_position >= queue_size) {
                     g_queue_position = queue_size - 1;
                     continue;
                 } else {
                     if (g_shuffle == ShuffleMode::On) {
-                        const auto shuffle_id = (*g_shuffle_playlist)[g_queue_position];
-                        for (u32 i = 0; i < g_playlist->size(); i++) {
-                            if ((*g_playlist)[i].id == shuffle_id) {
-                                *g_current = (*g_playlist)[i];
+                        const auto shuffle_id = g_shuffle_playlist[g_queue_position];
+                        for (u32 i = 0; i < g_playlist.size(); i++) {
+                            if (g_playlist[i].id == shuffle_id) {
+                                g_current = g_playlist[i];
                                 break;
                             }
                         }
                     } else {
-                        *g_current = queue[g_queue_position];
+                        g_current = queue[g_queue_position];
                     }
                 }
             }
 
             /* Sleep if queue is empty. */
-            if (g_current->path.empty()) {
+            if (!g_current.IsValid()) {
                 svcSleepThread(100'000'000ul);
                 continue;
             }
 
             g_status = PlayerStatus::Playing;
             /* Only play if playing and we have a track queued. */
-            Result rc = PlayTrack(g_current->path);
+            Result rc = PlayTrack(g_playlist2.GetPath(g_current.id));
 
             /* Log error. */
             if (R_FAILED(rc)) {
@@ -371,12 +291,7 @@ namespace tune::impl {
             }
         }
 
-        if (!g_close_audren) {
-            audrvClose(&g_drv);
-            // this needs to be closed asap if a blacklisted title is launched.
-            // this is why we close this here rather than in __appExit
-            audrenExit();
-        }
+        audioExit();
     }
 
     void PscmThreadFunc(void *ptr) {
@@ -398,6 +313,7 @@ namespace tune::impl {
                 // PscPmState_ReadySleep is sent multiple times.
                 // todo: fade in and delay playback on wakeup slightly
                 case PscPmState_ReadyAwaken:
+                    g_awoken_from_sleep = true;
                     g_should_pause = previous_state;
                     break;
                 // pause on sleep
@@ -420,6 +336,7 @@ namespace tune::impl {
         /* [0] Low == plugged in; [1] High == not plugged in. */
         GpioValue old_value = GpioValue_High;
 
+        // TODO(TJ): pausing on headphone change should be a config option.
         while (g_should_run) {
             /* Fetch current gpio value. */
             GpioValue value;
@@ -441,9 +358,6 @@ namespace tune::impl {
         while (g_should_run) {
             u64 pid{}, new_tid{};
             if (pm::PollCurrentPidTid(&pid, &new_tid)) {
-                // check if title is blacklisted
-                g_close_audren = config::get_title_blacklist(new_tid);
-
                 g_title_volume = 1.f;
 
                 if (config::has_title_volume(new_tid)) {
@@ -451,7 +365,7 @@ namespace tune::impl {
                     SetTitleVolume(std::clamp(config::get_title_volume(new_tid), 0.f, VOLUME_MAX));
                 }
 
-                // TODO: fade song in rather than abruptly playing to avoid jump scares
+                // TODO(TJ): fade song in rather than abruptly playing to avoid jump scares
                 if (config::has_title_enabled(new_tid)) {
                     g_should_pause = !config::get_title_enabled(new_tid);
                 } else {
@@ -490,7 +404,7 @@ namespace tune::impl {
         {
             std::scoped_lock lk(g_mutex);
 
-            if (g_queue_position < g_playlist->size() - 1) {
+            if (g_queue_position < g_playlist.size() - 1) {
                 g_queue_position++;
             } else {
                 g_queue_position = 0;
@@ -509,7 +423,7 @@ namespace tune::impl {
             if (g_queue_position > 0) {
                 g_queue_position--;
             } else {
-                g_queue_position = g_playlist->size() - 1;
+                g_queue_position = g_playlist.size() - 1;
             }
         }
         g_status     = PlayerStatus::FetchNext;
@@ -517,12 +431,14 @@ namespace tune::impl {
     }
 
     float GetVolume() {
-        return g_drv.in_mixes[0].volume;
+        float volume = 1.F;
+        audoutGetAudioOutVolume(&volume);
+        return volume;
     }
 
     void SetVolume(float volume) {
         volume = std::clamp(volume, 0.f, VOLUME_MAX);
-        g_tune_volume = g_drv.in_mixes[0].volume = volume;
+        audoutSetAudioOutVolume(volume);
         config::set_volume(volume);
     }
 
@@ -561,30 +477,22 @@ namespace tune::impl {
     void SetShuffleMode(ShuffleMode mode) {
         std::scoped_lock lk(g_mutex);
 
-        // if (g_playlist->size() > 0 && g_shuffle != mode) {
-        //     auto &dst = (mode == ShuffleMode::On) ? *g_shuffle_playlist : *g_playlist;
-
-        //     auto it = std::find(dst.cbegin(), dst.cend(), *g_current);
-        //     if (it != dst.cend())
-        //         g_queue_position = std::distance(dst.cbegin(), it);
-        // }
-
         g_shuffle = mode;
     }
 
     u32 GetPlaylistSize() {
         std::scoped_lock lk(g_mutex);
 
-        return g_playlist->size();
+        return g_playlist.size();
     }
 
     Result GetPlaylistItem(u32 index, char *buffer, size_t buffer_size) {
         std::scoped_lock lk(g_mutex);
 
-        if (index >= g_playlist->size())
+        if (index >= g_playlist.size())
             return tune::OutOfRange;
 
-        std::strncpy(buffer, (*g_playlist)[index].path.c_str(), buffer_size);
+        std::strncpy(buffer, g_playlist2.GetPath(index), buffer_size);
 
         return 0;
     }
@@ -595,9 +503,9 @@ namespace tune::impl {
 
         {
             std::scoped_lock lk(g_mutex);
-            R_UNLESS(!g_current->path.empty(), tune::NotPlaying);
-            R_UNLESS(buffer_size >= g_current->path.size(), tune::InvalidArgument);
-            std::strcpy(buffer, g_current->path.c_str());
+            R_UNLESS(g_current.IsValid(), tune::NotPlaying);
+            // R_UNLESS(buffer_size >= g_current.path.size(), tune::InvalidArgument);
+            std::strcpy(buffer, g_playlist2.GetPath(g_current.id));
         }
 
         auto [current, total] = g_source->Tell();
@@ -614,8 +522,8 @@ namespace tune::impl {
         {
             std::scoped_lock lk(g_mutex);
 
-            g_playlist->clear();
-            g_shuffle_playlist->clear();
+            g_playlist.clear();
+            g_shuffle_playlist.clear();
         }
         g_status = PlayerStatus::FetchNext;
     }
@@ -623,7 +531,7 @@ namespace tune::impl {
     void MoveQueueItem(u32 src, u32 dst) {
         std::scoped_lock lk(g_mutex);
 
-        const auto queue_size = g_playlist->size();
+        const auto queue_size = g_playlist.size();
 
         if (src >= queue_size) {
             src = queue_size - 1;
@@ -632,11 +540,11 @@ namespace tune::impl {
             dst = queue_size - 1;
         }
 
-        auto source = g_playlist->cbegin() + src;
-        auto dest   = g_playlist->cbegin() + dst;
+        auto source = g_playlist.cbegin() + src;
+        auto dest   = g_playlist.cbegin() + dst;
 
-        g_playlist->insert(dest, *source);
-        g_playlist->erase(source);
+        g_playlist.insert(dest, *source);
+        g_playlist.erase(source);
 
         if (src < dst) {
             if (g_queue_position == src) {
@@ -658,7 +566,7 @@ namespace tune::impl {
             std::scoped_lock lk(g_mutex);
 
             /* Check if we are out of bounds. */
-            size_t queue_size = g_playlist->size();
+            size_t queue_size = g_playlist.size();
             if (index >= queue_size) {
                 index = queue_size - 1;
             }
@@ -667,9 +575,9 @@ namespace tune::impl {
             u32 pos = index;
 
             if (g_shuffle == ShuffleMode::On) {
-                const auto track = g_playlist->cbegin() + index;
-                for (u32 i = 0; i < g_shuffle_playlist->size(); i++) {
-                    if ((*g_shuffle_playlist)[i] == track->id) {
+                const auto track = g_playlist.cbegin() + index;
+                for (u32 i = 0; i < g_shuffle_playlist.size(); i++) {
+                    if (g_shuffle_playlist[i] == track->id) {
                         pos = i;
                         break;
                     }
@@ -692,37 +600,39 @@ namespace tune::impl {
     }
 
     Result Enqueue(const char *buffer, size_t buffer_length, EnqueueType type) {
-        // NOTE: do not decrement this
-        static PlaylistID playlist_id{};
-
         /* Ensure file exists. */
         if (!sdmc::FileExists(buffer))
             return tune::InvalidPath;
 
         std::scoped_lock lk(g_mutex);
 
+        const auto new_id = g_playlist2.FindNextFreeEntry();
+        if (new_id < 0) {
+            return tune::OutOfMemory;
+        }
+
+        if (!g_playlist2.Add(new_id, buffer)) {
+            return tune::OutOfMemory;
+        }
+
         const PlaylistEntry new_entry{
-            .path = {buffer, buffer_length},
-            .id = playlist_id
+            .id = static_cast<PlaylistID>(new_id)
         };
 
         // add new entry to playlist
         if (type == EnqueueType::Front) {
-            g_playlist->emplace(g_playlist->cbegin(), new_entry);
+            g_playlist.emplace(g_playlist.cbegin(), new_entry);
             if (g_shuffle == ShuffleMode::Off) {
                 g_queue_position++;
             }
         } else {
-            g_playlist->emplace_back(new_entry);
+            g_playlist.emplace_back(new_entry);
         }
 
         // add new entry id to shuffle_playlist_list
-        const auto shuffle_playlist_size = g_shuffle_playlist->size();
+        const auto shuffle_playlist_size = g_shuffle_playlist.size();
         const auto shuffle_index = (shuffle_playlist_size > 1) ? (randomGet64() % shuffle_playlist_size) : 0;
-        g_shuffle_playlist->emplace(g_shuffle_playlist->cbegin() + shuffle_index, playlist_id);
-
-        // increase playlist counter
-        playlist_id++;
+        g_shuffle_playlist.emplace(g_shuffle_playlist.cbegin() + shuffle_index, new_id);
 
         return 0;
     }
@@ -731,27 +641,28 @@ namespace tune::impl {
         std::scoped_lock lk(g_mutex);
 
         /* Ensure we don't operate out of bounds. */
-        R_UNLESS(!g_playlist->empty(), tune::QueueEmpty);
-        R_UNLESS(index < g_playlist->size(), tune::OutOfRange);
+        R_UNLESS(!g_playlist.empty(), tune::QueueEmpty);
+        R_UNLESS(index < g_playlist.size(), tune::OutOfRange);
 
         /* Get iterator for index position. */
-        const auto track = g_playlist->cbegin() + index;
+        const auto track = g_playlist.cbegin() + index;
+        g_playlist2.Remove(track->id);
 
-        for (u32 i = 0; i < g_shuffle_playlist->size(); i++) {
-            if ((*g_shuffle_playlist)[i] == track->id) {
-                const auto shuffle_it = g_shuffle_playlist->cbegin() + i;
+        for (u32 i = 0; i < g_shuffle_playlist.size(); i++) {
+            if (g_shuffle_playlist[i] == track->id) {
+                const auto shuffle_it = g_shuffle_playlist.cbegin() + i;
                 // we are playing from shuffle list so use that index instead
                 if (g_shuffle == ShuffleMode::On) {
                     index = i;
                 }
                 // finally remove
-                g_shuffle_playlist->erase(shuffle_it);
+                g_shuffle_playlist.erase(shuffle_it);
                 break;
             }
         }
 
         /* Remove entry. */
-        g_playlist->erase(track);
+        g_playlist.erase(track);
 
         /* Fetch a new track if we deleted the current song. */
         bool fetch_new = g_queue_position == index;
diff --git a/sys-tune/source/impl/music_player.hpp b/sys-tune/source/impl/music_player.hpp
index df1db9b..0c5028b 100644
--- a/sys-tune/source/impl/music_player.hpp
+++ b/sys-tune/source/impl/music_player.hpp
@@ -8,11 +8,18 @@ namespace tune::impl {
     using PlaylistID = u32;
 
     struct PlaylistEntry {
-        std::string path;
-        PlaylistID id;
+        PlaylistID id{UINT32_MAX};
+
+        bool IsValid() const {
+            return id != UINT32_MAX;
+        }
+
+        void Reset() {
+            id = UINT32_MAX;
+        }
     };
 
-    Result Initialize(std::vector<PlaylistEntry>* playlist, std::vector<PlaylistID>* shuffle, PlaylistEntry* current);
+    Result Initialize();
     void Exit();
 
     void TuneThreadFunc(void *);
diff --git a/sys-tune/source/impl/resamplers/SDL_audioEX.c b/sys-tune/source/impl/resamplers/SDL_audioEX.c
new file mode 100644
index 0000000..8b7bcbc
--- /dev/null
+++ b/sys-tune/source/impl/resamplers/SDL_audioEX.c
@@ -0,0 +1,3374 @@
+#include "SDL_audioEX.h"
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <assert.h>
+
+#ifndef M_PI
+#define M_PI    3.14159265358979323846264338327950288   /**< pi */
+#endif
+
+#define SDL_zeropEX(x) memset((x), 0, sizeof(*(x)))
+#define SDL_zeroaEX(x) memset((x), 0, sizeof((x)))
+#define SDL_minEX(a,b) ((a) < (b) ? (a) : (b))
+
+#define DEBUG_AUDIOSTREAM 0
+
+static int SDL_OutOfMemoryEX() { return -1; }
+#ifndef NDEBUG
+#include <stdio.h>
+static int SDL_PrintError(const char* e) { printf(e);  return -1; }
+#else
+static int SDL_PrintError(const char* e) { (void)e;  return -1; }
+#endif
+
+// BEGIN AUDIO_c.h
+#ifndef DEBUG_CONVERT
+#define DEBUG_CONVERT 0
+#endif
+
+#if DEBUG_CONVERT
+#define LOG_DEBUG_CONVERT(from, to) fprintf(stderr, "Converting %s to %s.\n", from, to);
+#else
+#define LOG_DEBUG_CONVERT(from, to)
+#endif
+
+/* Functions and variables exported from SDL_audio.c for SDL_sysaudio.c */
+
+/* Choose the audio filter functions below */
+void SDL_ChooseAudioConverters(void);
+
+/* You need to call SDL_PrepareResampleFilter() before using the internal resampler. */
+static int SDL_PrepareResampleFilter(void);
+
+// BEGIN AUDIOTYPECVT
+#ifdef __ARM_NEON
+#include <arm_neon.h>
+#define HAVE_NEON_INTRINSICS 1
+#endif
+
+#ifdef __SSE2__
+#include <immintrin.h>
+#define HAVE_SSE2_INTRINSICS 1
+#endif
+
+#if defined(__x86_64__) && HAVE_SSE2_INTRINSICS
+#define NEED_SCALAR_CONVERTER_FALLBACKS 0  /* x86_64 guarantees SSE2. */
+#elif __MACOSX__ && HAVE_SSE2_INTRINSICS
+#define NEED_SCALAR_CONVERTER_FALLBACKS 0  /* Mac OS X/Intel guarantees SSE2. */
+#elif defined(__ARM_ARCH) && (__ARM_ARCH >= 8) && HAVE_NEON_INTRINSICS
+#define NEED_SCALAR_CONVERTER_FALLBACKS 0  /* ARMv8+ promise NEON. */
+#elif defined(__APPLE__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 7) && HAVE_NEON_INTRINSICS
+#define NEED_SCALAR_CONVERTER_FALLBACKS 0  /* All Apple ARMv7 chips promise NEON support. */
+#endif
+
+/* Set to zero if platform is guaranteed to use a SIMD codepath here. */
+#ifndef NEED_SCALAR_CONVERTER_FALLBACKS
+#define NEED_SCALAR_CONVERTER_FALLBACKS 1
+#endif
+
+/* Function pointers set to a CPU-specific implementation. */
+SDL_AudioFilter_EX SDL_Convert_S8_to_F32 = NULL;
+SDL_AudioFilter_EX SDL_Convert_U8_to_F32 = NULL;
+SDL_AudioFilter_EX SDL_Convert_S16_to_F32 = NULL;
+SDL_AudioFilter_EX SDL_Convert_U16_to_F32 = NULL;
+SDL_AudioFilter_EX SDL_Convert_S32_to_F32 = NULL;
+SDL_AudioFilter_EX SDL_Convert_F32_to_S8 = NULL;
+SDL_AudioFilter_EX SDL_Convert_F32_to_U8 = NULL;
+SDL_AudioFilter_EX SDL_Convert_F32_to_S16 = NULL;
+SDL_AudioFilter_EX SDL_Convert_F32_to_U16 = NULL;
+SDL_AudioFilter_EX SDL_Convert_F32_to_S32 = NULL;
+
+
+#define DIVBY128 0.0078125f
+#define DIVBY32768 0.000030517578125f
+#define DIVBY8388607 0.00000011920930376163766f
+
+
+#if NEED_SCALAR_CONVERTER_FALLBACKS
+static void
+SDL_Convert_S8_to_F32_Scalar(SDL_AudioCVT_EX *cvt, SDL_AudioFormat format)
+{
+    (void)format;
+    const int8_t *src = ((const int8_t *) (cvt->buf + cvt->len_cvt)) - 1;
+    float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
+    int i;
+
+    LOG_DEBUG_CONVERT("AUDIO_S8", "AUDIO_F32");
+
+    for (i = cvt->len_cvt; i; --i, --src, --dst) {
+        *dst = ((float) *src) * DIVBY128;
+    }
+
+    cvt->len_cvt *= 4;
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
+    }
+}
+
+static void
+SDL_Convert_U8_to_F32_Scalar(SDL_AudioCVT_EX *cvt, SDL_AudioFormat format)
+{
+    (void)format;
+    const uint8_t *src = ((const uint8_t *) (cvt->buf + cvt->len_cvt)) - 1;
+    float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
+    int i;
+
+    LOG_DEBUG_CONVERT("AUDIO_U8", "AUDIO_F32");
+
+    for (i = cvt->len_cvt; i; --i, --src, --dst) {
+        *dst = (((float) *src) * DIVBY128) - 1.0f;
+    }
+
+    cvt->len_cvt *= 4;
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
+    }
+}
+
+static void
+SDL_Convert_S16_to_F32_Scalar(SDL_AudioCVT_EX *cvt, SDL_AudioFormat format)
+{
+    (void)format;
+    const int16_t *src = ((const int16_t *) (cvt->buf + cvt->len_cvt)) - 1;
+    float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
+    int i;
+
+    LOG_DEBUG_CONVERT("AUDIO_S16", "AUDIO_F32");
+
+    for (i = cvt->len_cvt / sizeof (int16_t); i; --i, --src, --dst) {
+        *dst = ((float) *src) * DIVBY32768;
+    }
+
+    cvt->len_cvt *= 2;
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
+    }
+}
+
+static void
+SDL_Convert_U16_to_F32_Scalar(SDL_AudioCVT_EX *cvt, SDL_AudioFormat format)
+{
+    (void)format;
+
+    const uint16_t *src = ((const uint16_t *) (cvt->buf + cvt->len_cvt)) - 1;
+    float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
+    int i;
+
+    LOG_DEBUG_CONVERT("AUDIO_U16", "AUDIO_F32");
+
+    for (i = cvt->len_cvt / sizeof (uint16_t); i; --i, --src, --dst) {
+        *dst = (((float) *src) * DIVBY32768) - 1.0f;
+    }
+
+    cvt->len_cvt *= 2;
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
+    }
+}
+
+static void
+SDL_Convert_S32_to_F32_Scalar(SDL_AudioCVT_EX *cvt, SDL_AudioFormat format)
+{
+    (void)format;
+    const int32_t *src = (const int32_t *) cvt->buf;
+    float *dst = (float *) cvt->buf;
+    int i;
+
+    LOG_DEBUG_CONVERT("AUDIO_S32", "AUDIO_F32");
+
+    for (i = cvt->len_cvt / sizeof (int32_t); i; --i, ++src, ++dst) {
+        *dst = ((float) (*src>>8)) * DIVBY8388607;
+    }
+
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
+    }
+}
+
+static void
+SDL_Convert_F32_to_S8_Scalar(SDL_AudioCVT_EX *cvt, SDL_AudioFormat format)
+{
+    (void)format;
+    const float *src = (const float *) cvt->buf;
+    int8_t *dst = (int8_t *) cvt->buf;
+    int i;
+
+    LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S8");
+
+    for (i = cvt->len_cvt / sizeof (float); i; --i, ++src, ++dst) {
+        const float sample = *src;
+        if (sample >= 1.0f) {
+            *dst = 127;
+        } else if (sample <= -1.0f) {
+            *dst = -128;
+        } else {
+            *dst = (int8_t)(sample * 127.0f);
+        }
+    }
+
+    cvt->len_cvt /= 4;
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index](cvt, AUDIO_S8);
+    }
+}
+
+static void
+SDL_Convert_F32_to_U8_Scalar(SDL_AudioCVT_EX *cvt, SDL_AudioFormat format)
+{
+    (void)format;
+    const float *src = (const float *) cvt->buf;
+    uint8_t *dst = (uint8_t *) cvt->buf;
+    int i;
+
+    LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U8");
+
+    for (i = cvt->len_cvt / sizeof (float); i; --i, ++src, ++dst) {
+        const float sample = *src;
+        if (sample >= 1.0f) {
+            *dst = 255;
+        } else if (sample <= -1.0f) {
+            *dst = 0;
+        } else {
+            *dst = (uint8_t)((sample + 1.0f) * 127.0f);
+        }
+    }
+
+    cvt->len_cvt /= 4;
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index](cvt, AUDIO_U8);
+    }
+}
+
+static void
+SDL_Convert_F32_to_S16_Scalar(SDL_AudioCVT_EX *cvt, SDL_AudioFormat format)
+{
+    (void)format;
+    const float *src = (const float *) cvt->buf;
+    int16_t *dst = (int16_t *) cvt->buf;
+    int i;
+
+    LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S16");
+
+    for (i = cvt->len_cvt / sizeof (float); i; --i, ++src, ++dst) {
+        const float sample = *src;
+        if (sample >= 1.0f) {
+            *dst = 32767;
+        } else if (sample <= -1.0f) {
+            *dst = -32768;
+        } else {
+            *dst = (int16_t)(sample * 32767.0f);
+        }
+    }
+
+    cvt->len_cvt /= 2;
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index](cvt, AUDIO_S16SYS);
+    }
+}
+
+static void
+SDL_Convert_F32_to_U16_Scalar(SDL_AudioCVT_EX *cvt, SDL_AudioFormat format)
+{
+    (void)format;
+    const float *src = (const float *) cvt->buf;
+    uint16_t *dst = (uint16_t *) cvt->buf;
+    int i;
+
+    LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U16");
+
+    for (i = cvt->len_cvt / sizeof (float); i; --i, ++src, ++dst) {
+        const float sample = *src;
+        if (sample >= 1.0f) {
+            *dst = 65535;
+        } else if (sample <= -1.0f) {
+            *dst = 0;
+        } else {
+            *dst = (uint16_t)((sample + 1.0f) * 32767.0f);
+        }
+    }
+
+    cvt->len_cvt /= 2;
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index](cvt, AUDIO_U16SYS);
+    }
+}
+
+static void
+SDL_Convert_F32_to_S32_Scalar(SDL_AudioCVT_EX *cvt, SDL_AudioFormat format)
+{
+    (void)format;
+    const float *src = (const float *) cvt->buf;
+    int32_t *dst = (int32_t *) cvt->buf;
+    int i;
+
+    LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S32");
+
+    for (i = cvt->len_cvt / sizeof (float); i; --i, ++src, ++dst) {
+        const float sample = *src;
+        if (sample >= 1.0f) {
+            *dst = 2147483647;
+        } else if (sample <= -1.0f) {
+            *dst = (int32_t) -2147483648LL;
+        } else {
+            *dst = ((int32_t)(sample * 8388607.0f)) << 8;
+        }
+    }
+
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index](cvt, AUDIO_S32SYS);
+    }
+}
+#endif
+
+
+#if HAVE_SSE2_INTRINSICS
+static void
+SDL_Convert_S8_to_F32_SSE2(SDL_AudioCVT_EX *cvt, SDL_AudioFormat format)
+{
+    const int8_t *src = ((const int8_t *) (cvt->buf + cvt->len_cvt)) - 1;
+    float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
+    int i;
+
+    LOG_DEBUG_CONVERT("AUDIO_S8", "AUDIO_F32 (using SSE2)");
+
+    /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
+    for (i = cvt->len_cvt; i && (((size_t) (dst-15)) & 15); --i, --src, --dst) {
+        *dst = ((float) *src) * DIVBY128;
+    }
+
+    src -= 15; dst -= 15;  /* adjust to read SSE blocks from the start. */
+    assert(!i || ((((size_t) dst) & 15) == 0));
+
+    /* Make sure src is aligned too. */
+    if ((((size_t) src) & 15) == 0) {
+        /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
+        const __m128i *mmsrc = (const __m128i *) src;
+        const __m128i zero = _mm_setzero_si128();
+        const __m128 divby128 = _mm_set1_ps(DIVBY128);
+        while (i >= 16) {   /* 16 * 8-bit */
+            const __m128i bytes = _mm_load_si128(mmsrc);  /* get 16 sint8 into an XMM register. */
+            /* treat as int16, shift left to clear every other sint16, then back right with sign-extend. Now sint16. */
+            const __m128i shorts1 = _mm_srai_epi16(_mm_slli_epi16(bytes, 8), 8);
+            /* right-shift-sign-extend gets us sint16 with the other set of values. */
+            const __m128i shorts2 = _mm_srai_epi16(bytes, 8);
+            /* unpack against zero to make these int32, shift to make them sign-extend, convert to float, multiply. Whew! */
+            const __m128 floats1 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpacklo_epi16(shorts1, zero), 16), 16)), divby128);
+            const __m128 floats2 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpacklo_epi16(shorts2, zero), 16), 16)), divby128);
+            const __m128 floats3 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpackhi_epi16(shorts1, zero), 16), 16)), divby128);
+            const __m128 floats4 = _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_slli_epi32(_mm_unpackhi_epi16(shorts2, zero), 16), 16)), divby128);
+            /* Interleave back into correct order, store. */
+            _mm_store_ps(dst, _mm_unpacklo_ps(floats1, floats2));
+            _mm_store_ps(dst+4, _mm_unpackhi_ps(floats1, floats2));
+            _mm_store_ps(dst+8, _mm_unpacklo_ps(floats3, floats4));
+            _mm_store_ps(dst+12, _mm_unpackhi_ps(floats3, floats4));
+            i -= 16; mmsrc--; dst -= 16;
+        }
+
+        src = (const int8_t *) mmsrc;
+    }
+
+    src += 15; dst += 15;  /* adjust for any scalar finishing. */
+
+    /* Finish off any leftovers with scalar operations. */
+    while (i) {
+        *dst = ((float) *src) * DIVBY128;
+        i--; src--; dst--;
+    }
+
+    cvt->len_cvt *= 4;
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
+    }
+}
+
+static void
+SDL_Convert_U8_to_F32_SSE2(SDL_AudioCVT_EX *cvt, SDL_AudioFormat format)
+{
+    const uint8_t *src = ((const uint8_t *) (cvt->buf + cvt->len_cvt)) - 1;
+    float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
+    int i;
+
+    LOG_DEBUG_CONVERT("AUDIO_U8", "AUDIO_F32 (using SSE2)");
+
+    /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
+    for (i = cvt->len_cvt; i && (((size_t) (dst-15)) & 15); --i, --src, --dst) {
+        *dst = (((float) *src) * DIVBY128) - 1.0f;
+    }
+
+    src -= 15; dst -= 15;  /* adjust to read SSE blocks from the start. */
+    assert(!i || ((((size_t) dst) & 15) == 0));
+
+    /* Make sure src is aligned too. */
+    if ((((size_t) src) & 15) == 0) {
+        /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
+        const __m128i *mmsrc = (const __m128i *) src;
+        const __m128i zero = _mm_setzero_si128();
+        const __m128 divby128 = _mm_set1_ps(DIVBY128);
+        const __m128 minus1 = _mm_set1_ps(-1.0f);
+        while (i >= 16) {   /* 16 * 8-bit */
+            const __m128i bytes = _mm_load_si128(mmsrc);  /* get 16 uint8 into an XMM register. */
+            /* treat as int16, shift left to clear every other sint16, then back right with zero-extend. Now uint16. */
+            const __m128i shorts1 = _mm_srli_epi16(_mm_slli_epi16(bytes, 8), 8);
+            /* right-shift-zero-extend gets us uint16 with the other set of values. */
+            const __m128i shorts2 = _mm_srli_epi16(bytes, 8);
+            /* unpack against zero to make these int32, convert to float, multiply, add. Whew! */
+            /* Note that AVX2 can do floating point multiply+add in one instruction, fwiw. SSE2 cannot. */
+            const __m128 floats1 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(shorts1, zero)), divby128), minus1);
+            const __m128 floats2 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi16(shorts2, zero)), divby128), minus1);
+            const __m128 floats3 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(shorts1, zero)), divby128), minus1);
+            const __m128 floats4 = _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi16(shorts2, zero)), divby128), minus1);
+            /* Interleave back into correct order, store. */
+            _mm_store_ps(dst, _mm_unpacklo_ps(floats1, floats2));
+            _mm_store_ps(dst+4, _mm_unpackhi_ps(floats1, floats2));
+            _mm_store_ps(dst+8, _mm_unpacklo_ps(floats3, floats4));
+            _mm_store_ps(dst+12, _mm_unpackhi_ps(floats3, floats4));
+            i -= 16; mmsrc--; dst -= 16;
+        }
+
+        src = (const uint8_t *) mmsrc;
+    }
+
+    src += 15; dst += 15;  /* adjust for any scalar finishing. */
+
+    /* Finish off any leftovers with scalar operations. */
+    while (i) {
+        *dst = (((float) *src) * DIVBY128) - 1.0f;
+        i--; src--; dst--;
+    }
+
+    cvt->len_cvt *= 4;
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
+    }
+}
+
+static void
+SDL_Convert_S16_to_F32_SSE2(SDL_AudioCVT_EX *cvt, SDL_AudioFormat format)
+{
+    const int16_t *src = ((const int16_t *) (cvt->buf + cvt->len_cvt)) - 1;
+    float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
+    int i;
+
+    LOG_DEBUG_CONVERT("AUDIO_S16", "AUDIO_F32 (using SSE2)");
+
+    /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
+    for (i = cvt->len_cvt / sizeof (int16_t); i && (((size_t) (dst-7)) & 15); --i, --src, --dst) {
+        *dst = ((float) *src) * DIVBY32768;
+    }
+
+    src -= 7; dst -= 7;  /* adjust to read SSE blocks from the start. */
+    assert(!i || ((((size_t) dst) & 15) == 0));
+
+    /* Make sure src is aligned too. */
+    if ((((size_t) src) & 15) == 0) {
+        /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
+        const __m128 divby32768 = _mm_set1_ps(DIVBY32768);
+        while (i >= 8) {   /* 8 * 16-bit */
+            const __m128i ints = _mm_load_si128((__m128i const *) src);  /* get 8 sint16 into an XMM register. */
+            /* treat as int32, shift left to clear every other sint16, then back right with sign-extend. Now sint32. */
+            const __m128i a = _mm_srai_epi32(_mm_slli_epi32(ints, 16), 16);
+            /* right-shift-sign-extend gets us sint32 with the other set of values. */
+            const __m128i b = _mm_srai_epi32(ints, 16);
+            /* Interleave these back into the right order, convert to float, multiply, store. */
+            _mm_store_ps(dst, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi32(a, b)), divby32768));
+            _mm_store_ps(dst+4, _mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi32(a, b)), divby32768));
+            i -= 8; src -= 8; dst -= 8;
+        }
+    }
+
+    src += 7; dst += 7;  /* adjust for any scalar finishing. */
+
+    /* Finish off any leftovers with scalar operations. */
+    while (i) {
+        *dst = ((float) *src) * DIVBY32768;
+        i--; src--; dst--;
+    }
+
+    cvt->len_cvt *= 2;
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
+    }
+}
+
+static void
+SDL_Convert_U16_to_F32_SSE2(SDL_AudioCVT_EX *cvt, SDL_AudioFormat format)
+{
+    const uint16_t *src = ((const uint16_t *) (cvt->buf + cvt->len_cvt)) - 1;
+    float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
+    int i;
+
+    LOG_DEBUG_CONVERT("AUDIO_U16", "AUDIO_F32 (using SSE2)");
+
+    /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
+    for (i = cvt->len_cvt / sizeof (int16_t); i && (((size_t) (dst-7)) & 15); --i, --src, --dst) {
+        *dst = (((float) *src) * DIVBY32768) - 1.0f;
+    }
+
+    src -= 7; dst -= 7;  /* adjust to read SSE blocks from the start. */
+    assert(!i || ((((size_t) dst) & 15) == 0));
+
+    /* Make sure src is aligned too. */
+    if ((((size_t) src) & 15) == 0) {
+        /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
+        const __m128 divby32768 = _mm_set1_ps(DIVBY32768);
+        const __m128 minus1 = _mm_set1_ps(-1.0f);
+        while (i >= 8) {   /* 8 * 16-bit */
+            const __m128i ints = _mm_load_si128((__m128i const *) src);  /* get 8 sint16 into an XMM register. */
+            /* treat as int32, shift left to clear every other sint16, then back right with zero-extend. Now sint32. */
+            const __m128i a = _mm_srli_epi32(_mm_slli_epi32(ints, 16), 16);
+            /* right-shift-sign-extend gets us sint32 with the other set of values. */
+            const __m128i b = _mm_srli_epi32(ints, 16);
+            /* Interleave these back into the right order, convert to float, multiply, store. */
+            _mm_store_ps(dst, _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpacklo_epi32(a, b)), divby32768), minus1));
+            _mm_store_ps(dst+4, _mm_add_ps(_mm_mul_ps(_mm_cvtepi32_ps(_mm_unpackhi_epi32(a, b)), divby32768), minus1));
+            i -= 8; src -= 8; dst -= 8;
+        }
+    }
+
+    src += 7; dst += 7;  /* adjust for any scalar finishing. */
+
+    /* Finish off any leftovers with scalar operations. */
+    while (i) {
+        *dst = (((float) *src) * DIVBY32768) - 1.0f;
+        i--; src--; dst--;
+    }
+
+    cvt->len_cvt *= 2;
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
+    }
+}
+
+static void
+SDL_Convert_S32_to_F32_SSE2(SDL_AudioCVT_EX *cvt, SDL_AudioFormat format)
+{
+    const int32_t *src = (const int32_t *) cvt->buf;
+    float *dst = (float *) cvt->buf;
+    int i;
+
+    LOG_DEBUG_CONVERT("AUDIO_S32", "AUDIO_F32 (using SSE2)");
+
+    /* Get dst aligned to 16 bytes */
+    for (i = cvt->len_cvt / sizeof (int32_t); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
+        *dst = ((float) (*src>>8)) * DIVBY8388607;
+    }
+
+    assert(!i || ((((size_t) dst) & 15) == 0));
+
+    /* Make sure src is aligned too. */
+    if ((((size_t) src) & 15) == 0) {
+        /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
+        const __m128 divby8388607 = _mm_set1_ps(DIVBY8388607);
+        const __m128i *mmsrc = (const __m128i *) src;
+        while (i >= 4) {   /* 4 * sint32 */
+            /* shift out lowest bits so int fits in a float32. Small precision loss, but much faster. */
+            _mm_store_ps(dst, _mm_mul_ps(_mm_cvtepi32_ps(_mm_srai_epi32(_mm_load_si128(mmsrc), 8)), divby8388607));
+            i -= 4; mmsrc++; dst += 4;
+        }
+        src = (const int32_t *) mmsrc;
+    }
+
+    /* Finish off any leftovers with scalar operations. */
+    while (i) {
+        *dst = ((float) (*src>>8)) * DIVBY8388607;
+        i--; src++; dst++;
+    }
+
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
+    }
+}
+
+static void
+SDL_Convert_F32_to_S8_SSE2(SDL_AudioCVT_EX *cvt, SDL_AudioFormat format)
+{
+    const float *src = (const float *) cvt->buf;
+    int8_t *dst = (int8_t *) cvt->buf;
+    int i;
+
+    LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S8 (using SSE2)");
+
+    /* Get dst aligned to 16 bytes */
+    for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
+        const float sample = *src;
+        if (sample >= 1.0f) {
+            *dst = 127;
+        } else if (sample <= -1.0f) {
+            *dst = -128;
+        } else {
+            *dst = (int8_t)(sample * 127.0f);
+        }
+    }
+
+    assert(!i || ((((size_t) dst) & 15) == 0));
+
+    /* Make sure src is aligned too. */
+    if ((((size_t) src) & 15) == 0) {
+        /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
+        const __m128 one = _mm_set1_ps(1.0f);
+        const __m128 negone = _mm_set1_ps(-1.0f);
+        const __m128 mulby127 = _mm_set1_ps(127.0f);
+        __m128i *mmdst = (__m128i *) dst;
+        while (i >= 16) {   /* 16 * float32 */
+            const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src)), one), mulby127));  /* load 4 floats, clamp, convert to sint32 */
+            const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+4)), one), mulby127));  /* load 4 floats, clamp, convert to sint32 */
+            const __m128i ints3 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+8)), one), mulby127));  /* load 4 floats, clamp, convert to sint32 */
+            const __m128i ints4 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+12)), one), mulby127));  /* load 4 floats, clamp, convert to sint32 */
+            _mm_store_si128(mmdst, _mm_packs_epi16(_mm_packs_epi32(ints1, ints2), _mm_packs_epi32(ints3, ints4)));  /* pack down, store out. */
+            i -= 16; src += 16; mmdst++;
+        }
+        dst = (int8_t *) mmdst;
+    }
+
+    /* Finish off any leftovers with scalar operations. */
+    while (i) {
+        const float sample = *src;
+        if (sample >= 1.0f) {
+            *dst = 127;
+        } else if (sample <= -1.0f) {
+            *dst = -128;
+        } else {
+            *dst = (int8_t)(sample * 127.0f);
+        }
+        i--; src++; dst++;
+    }
+
+    cvt->len_cvt /= 4;
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index](cvt, AUDIO_S8);
+    }
+}
+
+static void
+SDL_Convert_F32_to_U8_SSE2(SDL_AudioCVT_EX *cvt, SDL_AudioFormat format)
+{
+    const float *src = (const float *) cvt->buf;
+    uint8_t *dst = cvt->buf;
+    int i;
+
+    LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U8 (using SSE2)");
+
+    /* Get dst aligned to 16 bytes */
+    for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
+        const float sample = *src;
+        if (sample >= 1.0f) {
+            *dst = 255;
+        } else if (sample <= -1.0f) {
+            *dst = 0;
+        } else {
+            *dst = (uint8_t)((sample + 1.0f) * 127.0f);
+        }
+    }
+
+    assert(!i || ((((size_t) dst) & 15) == 0));
+
+    /* Make sure src is aligned too. */
+    if ((((size_t) src) & 15) == 0) {
+        /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
+        const __m128 one = _mm_set1_ps(1.0f);
+        const __m128 negone = _mm_set1_ps(-1.0f);
+        const __m128 mulby127 = _mm_set1_ps(127.0f);
+        __m128i *mmdst = (__m128i *) dst;
+        while (i >= 16) {   /* 16 * float32 */
+            const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src)), one), one), mulby127));  /* load 4 floats, clamp, convert to sint32 */
+            const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+4)), one), one), mulby127));  /* load 4 floats, clamp, convert to sint32 */
+            const __m128i ints3 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+8)), one), one), mulby127));  /* load 4 floats, clamp, convert to sint32 */
+            const __m128i ints4 = _mm_cvtps_epi32(_mm_mul_ps(_mm_add_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+12)), one), one), mulby127));  /* load 4 floats, clamp, convert to sint32 */
+            _mm_store_si128(mmdst, _mm_packus_epi16(_mm_packs_epi32(ints1, ints2), _mm_packs_epi32(ints3, ints4)));  /* pack down, store out. */
+            i -= 16; src += 16; mmdst++;
+        }
+        dst = (uint8_t *) mmdst;
+    }
+
+    /* Finish off any leftovers with scalar operations. */
+    while (i) {
+        const float sample = *src;
+        if (sample >= 1.0f) {
+            *dst = 255;
+        } else if (sample <= -1.0f) {
+            *dst = 0;
+        } else {
+            *dst = (uint8_t)((sample + 1.0f) * 127.0f);
+        }
+        i--; src++; dst++;
+    }
+
+    cvt->len_cvt /= 4;
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index](cvt, AUDIO_U8);
+    }
+}
+
+static void
+SDL_Convert_F32_to_S16_SSE2(SDL_AudioCVT_EX *cvt, SDL_AudioFormat format)
+{
+    const float *src = (const float *) cvt->buf;
+    int16_t *dst = (int16_t *) cvt->buf;
+    int i;
+
+    LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S16 (using SSE2)");
+
+    /* Get dst aligned to 16 bytes */
+    for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
+        const float sample = *src;
+        if (sample >= 1.0f) {
+            *dst = 32767;
+        } else if (sample <= -1.0f) {
+            *dst = -32768;
+        } else {
+            *dst = (int16_t)(sample * 32767.0f);
+        }
+    }
+
+    assert(!i || ((((size_t) dst) & 15) == 0));
+
+    /* Make sure src is aligned too. */
+    if ((((size_t) src) & 15) == 0) {
+        /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
+        const __m128 one = _mm_set1_ps(1.0f);
+        const __m128 negone = _mm_set1_ps(-1.0f);
+        const __m128 mulby32767 = _mm_set1_ps(32767.0f);
+        __m128i *mmdst = (__m128i *) dst;
+        while (i >= 8) {   /* 8 * float32 */
+            const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src)), one), mulby32767));  /* load 4 floats, clamp, convert to sint32 */
+            const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+4)), one), mulby32767));  /* load 4 floats, clamp, convert to sint32 */
+            _mm_store_si128(mmdst, _mm_packs_epi32(ints1, ints2));  /* pack to sint16, store out. */
+            i -= 8; src += 8; mmdst++;
+        }
+        dst = (int16_t *) mmdst;
+    }
+
+    /* Finish off any leftovers with scalar operations. */
+    while (i) {
+        const float sample = *src;
+        if (sample >= 1.0f) {
+            *dst = 32767;
+        } else if (sample <= -1.0f) {
+            *dst = -32768;
+        } else {
+            *dst = (int16_t)(sample * 32767.0f);
+        }
+        i--; src++; dst++;
+    }
+
+    cvt->len_cvt /= 2;
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index](cvt, AUDIO_S16SYS);
+    }
+}
+
+static void
+SDL_Convert_F32_to_U16_SSE2(SDL_AudioCVT_EX *cvt, SDL_AudioFormat format)
+{
+    const float *src = (const float *) cvt->buf;
+    uint16_t *dst = (uint16_t *) cvt->buf;
+    int i;
+
+    LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U16 (using SSE2)");
+
+    /* Get dst aligned to 16 bytes */
+    for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
+        const float sample = *src;
+        if (sample >= 1.0f) {
+            *dst = 65535;
+        } else if (sample <= -1.0f) {
+            *dst = 0;
+        } else {
+            *dst = (uint16_t)((sample + 1.0f) * 32767.0f);
+        }
+    }
+
+    assert(!i || ((((size_t) dst) & 15) == 0));
+
+    /* Make sure src is aligned too. */
+    if ((((size_t) src) & 15) == 0) {
+        /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
+        /* This calculates differently than the scalar path because SSE2 can't
+           pack int32 data down to unsigned int16. _mm_packs_epi32 does signed
+           saturation, so that would corrupt our data. _mm_packus_epi32 exists,
+           but not before SSE 4.1. So we convert from float to sint16, packing
+           that down with legit signed saturation, and then xor the top bit
+           against 1. This results in the correct unsigned 16-bit value, even
+           though it looks like dark magic. */
+        const __m128 mulby32767 = _mm_set1_ps(32767.0f);
+        const __m128i topbit = _mm_set1_epi16(-32768);
+        const __m128 one = _mm_set1_ps(1.0f);
+        const __m128 negone = _mm_set1_ps(-1.0f);
+        __m128i *mmdst = (__m128i *) dst;
+        while (i >= 8) {   /* 8 * float32 */
+            const __m128i ints1 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src)), one), mulby32767));  /* load 4 floats, clamp, convert to sint32 */
+            const __m128i ints2 = _mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src+4)), one), mulby32767));  /* load 4 floats, clamp, convert to sint32 */
+            _mm_store_si128(mmdst, _mm_xor_si128(_mm_packs_epi32(ints1, ints2), topbit));  /* pack to sint16, xor top bit, store out. */
+            i -= 8; src += 8; mmdst++;
+        }
+        dst = (uint16_t *) mmdst;
+    }
+
+    /* Finish off any leftovers with scalar operations. */
+    while (i) {
+        const float sample = *src;
+        if (sample >= 1.0f) {
+            *dst = 65535;
+        } else if (sample <= -1.0f) {
+            *dst = 0;
+        } else {
+            *dst = (uint16_t)((sample + 1.0f) * 32767.0f);
+        }
+        i--; src++; dst++;
+    }
+
+    cvt->len_cvt /= 2;
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index](cvt, AUDIO_U16SYS);
+    }
+}
+
+static void
+SDL_Convert_F32_to_S32_SSE2(SDL_AudioCVT_EX *cvt, SDL_AudioFormat format)
+{
+    const float *src = (const float *) cvt->buf;
+    int32_t *dst = (int32_t *) cvt->buf;
+    int i;
+
+    LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S32 (using SSE2)");
+
+    /* Get dst aligned to 16 bytes */
+    for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
+        const float sample = *src;
+        if (sample >= 1.0f) {
+            *dst = 2147483647;
+        } else if (sample <= -1.0f) {
+            *dst = (int32_t) -2147483648LL;
+        } else {
+            *dst = ((int32_t)(sample * 8388607.0f)) << 8;
+        }
+    }
+
+    assert(!i || ((((size_t) dst) & 15) == 0));
+    assert(!i || ((((size_t) src) & 15) == 0));
+
+    {
+        /* Aligned! Do SSE blocks as long as we have 16 bytes available. */
+        const __m128 one = _mm_set1_ps(1.0f);
+        const __m128 negone = _mm_set1_ps(-1.0f);
+        const __m128 mulby8388607 = _mm_set1_ps(8388607.0f);
+        __m128i *mmdst = (__m128i *) dst;
+        while (i >= 4) {   /* 4 * float32 */
+            _mm_store_si128(mmdst, _mm_slli_epi32(_mm_cvtps_epi32(_mm_mul_ps(_mm_min_ps(_mm_max_ps(negone, _mm_load_ps(src)), one), mulby8388607)), 8));  /* load 4 floats, clamp, convert to sint32 */
+            i -= 4; src += 4; mmdst++;
+        }
+        dst = (int32_t *) mmdst;
+    }
+
+    /* Finish off any leftovers with scalar operations. */
+    while (i) {
+        const float sample = *src;
+        if (sample >= 1.0f) {
+            *dst = 2147483647;
+        } else if (sample <= -1.0f) {
+            *dst = (int32_t) -2147483648LL;
+        } else {
+            *dst = ((int32_t)(sample * 8388607.0f)) << 8;
+        }
+        i--; src++; dst++;
+    }
+
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index](cvt, AUDIO_S32SYS);
+    }
+}
+#endif
+
+
+#if HAVE_NEON_INTRINSICS
+static void
+SDL_Convert_S8_to_F32_NEON(SDL_AudioCVT_EX *cvt, SDL_AudioFormat format)
+{
+    const int8_t *src = ((const int8_t *) (cvt->buf + cvt->len_cvt)) - 1;
+    float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
+    int i;
+
+    LOG_DEBUG_CONVERT("AUDIO_S8", "AUDIO_F32 (using NEON)");
+
+    /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
+    for (i = cvt->len_cvt; i && (((size_t) (dst-15)) & 15); --i, --src, --dst) {
+        *dst = ((float) *src) * DIVBY128;
+    }
+
+    src -= 15; dst -= 15;  /* adjust to read NEON blocks from the start. */
+    assert(!i || ((((size_t) dst) & 15) == 0));
+
+    /* Make sure src is aligned too. */
+    if ((((size_t) src) & 15) == 0) {
+        /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
+        const int8_t *mmsrc = (const int8_t *) src;
+        const float32x4_t divby128 = vdupq_n_f32(DIVBY128);
+        while (i >= 16) {   /* 16 * 8-bit */
+            const int8x16_t bytes = vld1q_s8(mmsrc);  /* get 16 sint8 into a NEON register. */
+            const int16x8_t int16hi = vmovl_s8(vget_high_s8(bytes));  /* convert top 8 bytes to 8 int16 */
+            const int16x8_t int16lo = vmovl_s8(vget_low_s8(bytes));   /* convert bottom 8 bytes to 8 int16 */
+            /* split int16 to two int32, then convert to float, then multiply to normalize, store. */
+            vst1q_f32(dst, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(int16lo))), divby128));
+            vst1q_f32(dst+4, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(int16lo))), divby128));
+            vst1q_f32(dst+8, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(int16hi))), divby128));
+            vst1q_f32(dst+12, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(int16hi))), divby128));
+            i -= 16; mmsrc -= 16; dst -= 16;
+        }
+
+        src = (const int8_t *) mmsrc;
+    }
+
+    src += 15; dst += 15;  /* adjust for any scalar finishing. */
+
+    /* Finish off any leftovers with scalar operations. */
+    while (i) {
+        *dst = ((float) *src) * DIVBY128;
+        i--; src--; dst--;
+    }
+
+    cvt->len_cvt *= 4;
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
+    }
+}
+
+static void
+SDL_Convert_U8_to_F32_NEON(SDL_AudioCVT_EX *cvt, SDL_AudioFormat format)
+{
+    const uint8_t *src = ((const uint8_t *) (cvt->buf + cvt->len_cvt)) - 1;
+    float *dst = ((float *) (cvt->buf + cvt->len_cvt * 4)) - 1;
+    int i;
+
+    LOG_DEBUG_CONVERT("AUDIO_U8", "AUDIO_F32 (using NEON)");
+
+    /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
+    for (i = cvt->len_cvt; i && (((size_t) (dst-15)) & 15); --i, --src, --dst) {
+        *dst = (((float) *src) * DIVBY128) - 1.0f;
+    }
+
+    src -= 15; dst -= 15;  /* adjust to read NEON blocks from the start. */
+    assert(!i || ((((size_t) dst) & 15) == 0));
+
+    /* Make sure src is aligned too. */
+    if ((((size_t) src) & 15) == 0) {
+        /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
+        const uint8_t *mmsrc = (const uint8_t *) src;
+        const float32x4_t divby128 = vdupq_n_f32(DIVBY128);
+        const float32x4_t negone = vdupq_n_f32(-1.0f);
+        while (i >= 16) {   /* 16 * 8-bit */
+            const uint8x16_t bytes = vld1q_u8(mmsrc);  /* get 16 uint8 into a NEON register. */
+            const uint16x8_t uint16hi = vmovl_u8(vget_high_u8(bytes));  /* convert top 8 bytes to 8 uint16 */
+            const uint16x8_t uint16lo = vmovl_u8(vget_low_u8(bytes));   /* convert bottom 8 bytes to 8 uint16 */
+            /* split uint16 to two uint32, then convert to float, then multiply to normalize, subtract to adjust for sign, store. */
+            vst1q_f32(dst, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_low_u16(uint16lo))), divby128));
+            vst1q_f32(dst+4, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_high_u16(uint16lo))), divby128));
+            vst1q_f32(dst+8, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_low_u16(uint16hi))), divby128));
+            vst1q_f32(dst+12, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_high_u16(uint16hi))), divby128));
+            i -= 16; mmsrc -= 16; dst -= 16;
+        }
+
+        src = (const uint8_t *) mmsrc;
+    }
+
+    src += 15; dst += 15;  /* adjust for any scalar finishing. */
+
+    /* Finish off any leftovers with scalar operations. */
+    while (i) {
+        *dst = (((float) *src) * DIVBY128) - 1.0f;
+        i--; src--; dst--;
+    }
+
+    cvt->len_cvt *= 4;
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
+    }
+}
+
+static void
+SDL_Convert_S16_to_F32_NEON(SDL_AudioCVT_EX *cvt, SDL_AudioFormat format)
+{
+    const int16_t *src = ((const int16_t *) (cvt->buf + cvt->len_cvt)) - 1;
+    float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
+    int i;
+
+    LOG_DEBUG_CONVERT("AUDIO_S16", "AUDIO_F32 (using NEON)");
+
+    /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
+    for (i = cvt->len_cvt / sizeof (int16_t); i && (((size_t) (dst-7)) & 15); --i, --src, --dst) {
+        *dst = ((float) *src) * DIVBY32768;
+    }
+
+    src -= 7; dst -= 7;  /* adjust to read NEON blocks from the start. */
+    assert(!i || ((((size_t) dst) & 15) == 0));
+
+    /* Make sure src is aligned too. */
+    if ((((size_t) src) & 15) == 0) {
+        /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
+        const float32x4_t divby32768 = vdupq_n_f32(DIVBY32768);
+        while (i >= 8) {   /* 8 * 16-bit */
+            const int16x8_t ints = vld1q_s16((int16_t const *) src);  /* get 8 sint16 into a NEON register. */
+            /* split int16 to two int32, then convert to float, then multiply to normalize, store. */
+            vst1q_f32(dst, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_low_s16(ints))), divby32768));
+            vst1q_f32(dst+4, vmulq_f32(vcvtq_f32_s32(vmovl_s16(vget_high_s16(ints))), divby32768));
+            i -= 8; src -= 8; dst -= 8;
+        }
+    }
+
+    src += 7; dst += 7;  /* adjust for any scalar finishing. */
+
+    /* Finish off any leftovers with scalar operations. */
+    while (i) {
+        *dst = ((float) *src) * DIVBY32768;
+        i--; src--; dst--;
+    }
+
+    cvt->len_cvt *= 2;
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
+    }
+}
+
+static void
+SDL_Convert_U16_to_F32_NEON(SDL_AudioCVT_EX *cvt, SDL_AudioFormat format)
+{
+    const uint16_t *src = ((const uint16_t *) (cvt->buf + cvt->len_cvt)) - 1;
+    float *dst = ((float *) (cvt->buf + cvt->len_cvt * 2)) - 1;
+    int i;
+
+    LOG_DEBUG_CONVERT("AUDIO_U16", "AUDIO_F32 (using NEON)");
+
+    /* Get dst aligned to 16 bytes (since buffer is growing, we don't have to worry about overreading from src) */
+    for (i = cvt->len_cvt / sizeof (int16_t); i && (((size_t) (dst-7)) & 15); --i, --src, --dst) {
+        *dst = (((float) *src) * DIVBY32768) - 1.0f;
+    }
+
+    src -= 7; dst -= 7;  /* adjust to read NEON blocks from the start. */
+    assert(!i || ((((size_t) dst) & 15) == 0));
+
+    /* Make sure src is aligned too. */
+    if ((((size_t) src) & 15) == 0) {
+        /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
+        const float32x4_t divby32768 = vdupq_n_f32(DIVBY32768);
+        const float32x4_t negone = vdupq_n_f32(-1.0f);
+        while (i >= 8) {   /* 8 * 16-bit */
+            const uint16x8_t uints = vld1q_u16((uint16_t const *) src);  /* get 8 uint16 into a NEON register. */
+            /* split uint16 to two int32, then convert to float, then multiply to normalize, subtract for sign, store. */
+            vst1q_f32(dst, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_low_u16(uints))), divby32768));
+            vst1q_f32(dst+4, vmlaq_f32(negone, vcvtq_f32_u32(vmovl_u16(vget_high_u16(uints))), divby32768));
+            i -= 8; src -= 8; dst -= 8;
+        }
+    }
+
+    src += 7; dst += 7;  /* adjust for any scalar finishing. */
+
+    /* Finish off any leftovers with scalar operations. */
+    while (i) {
+        *dst = (((float) *src) * DIVBY32768) - 1.0f;
+        i--; src--; dst--;
+    }
+
+    cvt->len_cvt *= 2;
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
+    }
+}
+
+static void
+SDL_Convert_S32_to_F32_NEON(SDL_AudioCVT_EX *cvt, SDL_AudioFormat format)
+{
+    const int32_t *src = (const int32_t *) cvt->buf;
+    float *dst = (float *) cvt->buf;
+    int i;
+
+    LOG_DEBUG_CONVERT("AUDIO_S32", "AUDIO_F32 (using NEON)");
+
+    /* Get dst aligned to 16 bytes */
+    for (i = cvt->len_cvt / sizeof (int32_t); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
+        *dst = ((float) (*src>>8)) * DIVBY8388607;
+    }
+
+    assert(!i || ((((size_t) dst) & 15) == 0));
+
+    /* Make sure src is aligned too. */
+    if ((((size_t) src) & 15) == 0) {
+        /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
+        const float32x4_t divby8388607 = vdupq_n_f32(DIVBY8388607);
+        const int32_t *mmsrc = (const int32_t *) src;
+        while (i >= 4) {   /* 4 * sint32 */
+            /* shift out lowest bits so int fits in a float32. Small precision loss, but much faster. */
+            vst1q_f32(dst, vmulq_f32(vcvtq_f32_s32(vshrq_n_s32(vld1q_s32(mmsrc), 8)), divby8388607));
+            i -= 4; mmsrc += 4; dst += 4;
+        }
+        src = (const int32_t *) mmsrc;
+    }
+
+    /* Finish off any leftovers with scalar operations. */
+    while (i) {
+        *dst = ((float) (*src>>8)) * DIVBY8388607;
+        i--; src++; dst++;
+    }
+
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index](cvt, AUDIO_F32SYS);
+    }
+}
+
+static void
+SDL_Convert_F32_to_S8_NEON(SDL_AudioCVT_EX *cvt, SDL_AudioFormat format)
+{
+    const float *src = (const float *) cvt->buf;
+    int8_t *dst = (int8_t *) cvt->buf;
+    int i;
+
+    LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S8 (using NEON)");
+
+    /* Get dst aligned to 16 bytes */
+    for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
+        const float sample = *src;
+        if (sample >= 1.0f) {
+            *dst = 127;
+        } else if (sample <= -1.0f) {
+            *dst = -128;
+        } else {
+            *dst = (int8_t)(sample * 127.0f);
+        }
+    }
+
+    assert(!i || ((((size_t) dst) & 15) == 0));
+
+    /* Make sure src is aligned too. */
+    if ((((size_t) src) & 15) == 0) {
+        /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
+        const float32x4_t one = vdupq_n_f32(1.0f);
+        const float32x4_t negone = vdupq_n_f32(-1.0f);
+        const float32x4_t mulby127 = vdupq_n_f32(127.0f);
+        int8_t *mmdst = (int8_t *) dst;
+        while (i >= 16) {   /* 16 * float32 */
+            const int32x4_t ints1 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src)), one), mulby127));  /* load 4 floats, clamp, convert to sint32 */
+            const int32x4_t ints2 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+4)), one), mulby127));  /* load 4 floats, clamp, convert to sint32 */
+            const int32x4_t ints3 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+8)), one), mulby127));  /* load 4 floats, clamp, convert to sint32 */
+            const int32x4_t ints4 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+12)), one), mulby127));  /* load 4 floats, clamp, convert to sint32 */
+            const int8x8_t i8lo = vmovn_s16(vcombine_s16(vmovn_s32(ints1), vmovn_s32(ints2))); /* narrow to sint16, combine, narrow to sint8 */
+            const int8x8_t i8hi = vmovn_s16(vcombine_s16(vmovn_s32(ints3), vmovn_s32(ints4))); /* narrow to sint16, combine, narrow to sint8 */
+            vst1q_s8(mmdst, vcombine_s8(i8lo, i8hi));  /* combine to int8x16_t, store out */
+            i -= 16; src += 16; mmdst += 16;
+        }
+        dst = (int8_t *) mmdst;
+    }
+
+    /* Finish off any leftovers with scalar operations. */
+    while (i) {
+        const float sample = *src;
+        if (sample >= 1.0f) {
+            *dst = 127;
+        } else if (sample <= -1.0f) {
+            *dst = -128;
+        } else {
+            *dst = (int8_t)(sample * 127.0f);
+        }
+        i--; src++; dst++;
+    }
+
+    cvt->len_cvt /= 4;
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index](cvt, AUDIO_S8);
+    }
+}
+
+static void
+SDL_Convert_F32_to_U8_NEON(SDL_AudioCVT_EX *cvt, SDL_AudioFormat format)
+{
+    const float *src = (const float *) cvt->buf;
+    uint8_t *dst = (uint8_t *) cvt->buf;
+    int i;
+
+    LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U8 (using NEON)");
+
+    /* Get dst aligned to 16 bytes */
+    for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
+        const float sample = *src;
+        if (sample >= 1.0f) {
+            *dst = 255;
+        } else if (sample <= -1.0f) {
+            *dst = 0;
+        } else {
+            *dst = (uint8_t)((sample + 1.0f) * 127.0f);
+        }
+    }
+
+    assert(!i || ((((size_t) dst) & 15) == 0));
+
+    /* Make sure src is aligned too. */
+    if ((((size_t) src) & 15) == 0) {
+        /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
+        const float32x4_t one = vdupq_n_f32(1.0f);
+        const float32x4_t negone = vdupq_n_f32(-1.0f);
+        const float32x4_t mulby127 = vdupq_n_f32(127.0f);
+        uint8_t *mmdst = (uint8_t *) dst;
+        while (i >= 16) {   /* 16 * float32 */
+            const uint32x4_t uints1 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src)), one), one), mulby127));  /* load 4 floats, clamp, convert to uint32 */
+            const uint32x4_t uints2 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+4)), one), one), mulby127));  /* load 4 floats, clamp, convert to uint32 */
+            const uint32x4_t uints3 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+8)), one), one), mulby127));  /* load 4 floats, clamp, convert to uint32 */
+            const uint32x4_t uints4 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+12)), one), one), mulby127));  /* load 4 floats, clamp, convert to uint32 */
+            const uint8x8_t ui8lo = vmovn_u16(vcombine_u16(vmovn_u32(uints1), vmovn_u32(uints2))); /* narrow to uint16, combine, narrow to uint8 */
+            const uint8x8_t ui8hi = vmovn_u16(vcombine_u16(vmovn_u32(uints3), vmovn_u32(uints4))); /* narrow to uint16, combine, narrow to uint8 */
+            vst1q_u8(mmdst, vcombine_u8(ui8lo, ui8hi));  /* combine to uint8x16_t, store out */
+            i -= 16; src += 16; mmdst += 16;
+        }
+
+        dst = (uint8_t *) mmdst;
+    }
+
+    /* Finish off any leftovers with scalar operations. */
+    while (i) {
+        const float sample = *src;
+        if (sample >= 1.0f) {
+            *dst = 255;
+        } else if (sample <= -1.0f) {
+            *dst = 0;
+        } else {
+            *dst = (uint8_t)((sample + 1.0f) * 127.0f);
+        }
+        i--; src++; dst++;
+    }
+
+    cvt->len_cvt /= 4;
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index](cvt, AUDIO_U8);
+    }
+}
+
+static void
+SDL_Convert_F32_to_S16_NEON(SDL_AudioCVT_EX *cvt, SDL_AudioFormat format)
+{
+    const float *src = (const float *) cvt->buf;
+    int16_t *dst = (int16_t *) cvt->buf;
+    int i;
+
+    LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S16 (using NEON)");
+
+    /* Get dst aligned to 16 bytes */
+    for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
+        const float sample = *src;
+        if (sample >= 1.0f) {
+            *dst = 32767;
+        } else if (sample <= -1.0f) {
+            *dst = -32768;
+        } else {
+            *dst = (int16_t)(sample * 32767.0f);
+        }
+    }
+
+    assert(!i || ((((size_t) dst) & 15) == 0));
+
+    /* Make sure src is aligned too. */
+    if ((((size_t) src) & 15) == 0) {
+        /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
+        const float32x4_t one = vdupq_n_f32(1.0f);
+        const float32x4_t negone = vdupq_n_f32(-1.0f);
+        const float32x4_t mulby32767 = vdupq_n_f32(32767.0f);
+        int16_t *mmdst = (int16_t *) dst;
+        while (i >= 8) {   /* 8 * float32 */
+            const int32x4_t ints1 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src)), one), mulby32767));  /* load 4 floats, clamp, convert to sint32 */
+            const int32x4_t ints2 = vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+4)), one), mulby32767));  /* load 4 floats, clamp, convert to sint32 */
+            vst1q_s16(mmdst, vcombine_s16(vmovn_s32(ints1), vmovn_s32(ints2)));  /* narrow to sint16, combine, store out. */
+            i -= 8; src += 8; mmdst += 8;
+        }
+        dst = (int16_t *) mmdst;
+    }
+
+    /* Finish off any leftovers with scalar operations. */
+    while (i) {
+        const float sample = *src;
+        if (sample >= 1.0f) {
+            *dst = 32767;
+        } else if (sample <= -1.0f) {
+            *dst = -32768;
+        } else {
+            *dst = (int16_t)(sample * 32767.0f);
+        }
+        i--; src++; dst++;
+    }
+
+    cvt->len_cvt /= 2;
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index](cvt, AUDIO_S16SYS);
+    }
+}
+
+static void
+SDL_Convert_F32_to_U16_NEON(SDL_AudioCVT_EX *cvt, SDL_AudioFormat format)
+{
+    const float *src = (const float *) cvt->buf;
+    uint16_t *dst = (uint16_t *) cvt->buf;
+    int i;
+
+    LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_U16 (using NEON)");
+
+    /* Get dst aligned to 16 bytes */
+    for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
+        const float sample = *src;
+        if (sample >= 1.0f) {
+            *dst = 65535;
+        } else if (sample <= -1.0f) {
+            *dst = 0;
+        } else {
+            *dst = (uint16_t)((sample + 1.0f) * 32767.0f);
+        }
+    }
+
+    assert(!i || ((((size_t) dst) & 15) == 0));
+
+    /* Make sure src is aligned too. */
+    if ((((size_t) src) & 15) == 0) {
+        /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
+        const float32x4_t one = vdupq_n_f32(1.0f);
+        const float32x4_t negone = vdupq_n_f32(-1.0f);
+        const float32x4_t mulby32767 = vdupq_n_f32(32767.0f);
+        uint16_t *mmdst = (uint16_t *) dst;
+        while (i >= 8) {   /* 8 * float32 */
+            const uint32x4_t uints1 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src)), one), one), mulby32767));  /* load 4 floats, clamp, convert to uint32 */
+            const uint32x4_t uints2 = vcvtq_u32_f32(vmulq_f32(vaddq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src+4)), one), one), mulby32767));  /* load 4 floats, clamp, convert to uint32 */
+            vst1q_u16(mmdst, vcombine_u16(vmovn_u32(uints1), vmovn_u32(uints2)));  /* narrow to uint16, combine, store out. */
+            i -= 8; src += 8; mmdst += 8;
+        }
+        dst = (uint16_t *) mmdst;
+    }
+
+    /* Finish off any leftovers with scalar operations. */
+    while (i) {
+        const float sample = *src;
+        if (sample >= 1.0f) {
+            *dst = 65535;
+        } else if (sample <= -1.0f) {
+            *dst = 0;
+        } else {
+            *dst = (uint16_t)((sample + 1.0f) * 32767.0f);
+        }
+        i--; src++; dst++;
+    }
+
+    cvt->len_cvt /= 2;
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index](cvt, AUDIO_U16SYS);
+    }
+}
+
+static void
+SDL_Convert_F32_to_S32_NEON(SDL_AudioCVT_EX *cvt, SDL_AudioFormat format)
+{
+    const float *src = (const float *) cvt->buf;
+    int32_t *dst = (int32_t *) cvt->buf;
+    int i;
+
+    LOG_DEBUG_CONVERT("AUDIO_F32", "AUDIO_S32 (using NEON)");
+
+    /* Get dst aligned to 16 bytes */
+    for (i = cvt->len_cvt / sizeof (float); i && (((size_t) dst) & 15); --i, ++src, ++dst) {
+        const float sample = *src;
+        if (sample >= 1.0f) {
+            *dst = 2147483647;
+        } else if (sample <= -1.0f) {
+            *dst = (-2147483647) - 1;
+        } else {
+            *dst = ((int32_t)(sample * 8388607.0f)) << 8;
+        }
+    }
+
+    assert(!i || ((((size_t) dst) & 15) == 0));
+    assert(!i || ((((size_t) src) & 15) == 0));
+
+    {
+        /* Aligned! Do NEON blocks as long as we have 16 bytes available. */
+        const float32x4_t one = vdupq_n_f32(1.0f);
+        const float32x4_t negone = vdupq_n_f32(-1.0f);
+        const float32x4_t mulby8388607 = vdupq_n_f32(8388607.0f);
+        int32_t *mmdst = (int32_t *) dst;
+        while (i >= 4) {   /* 4 * float32 */
+            vst1q_s32(mmdst, vshlq_n_s32(vcvtq_s32_f32(vmulq_f32(vminq_f32(vmaxq_f32(negone, vld1q_f32(src)), one), mulby8388607)), 8));
+            i -= 4; src += 4; mmdst += 4;
+        }
+        dst = (int32_t *) mmdst;
+    }
+
+    /* Finish off any leftovers with scalar operations. */
+    while (i) {
+        const float sample = *src;
+        if (sample >= 1.0f) {
+            *dst = 2147483647;
+        } else if (sample <= -1.0f) {
+            *dst = (-2147483647) - 1;
+        } else {
+            *dst = ((int32_t)(sample * 8388607.0f)) << 8;
+        }
+        i--; src++; dst++;
+    }
+
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index](cvt, AUDIO_S32SYS);
+    }
+}
+#endif
+
+
+
+void SDL_ChooseAudioConverters(void)
+{
+    static bool converters_chosen = false;
+
+    if (converters_chosen) {
+        return;
+    }
+
+#define SET_CONVERTER_FUNCS(fntype) \
+        SDL_Convert_S8_to_F32 = SDL_Convert_S8_to_F32_##fntype; \
+        SDL_Convert_U8_to_F32 = SDL_Convert_U8_to_F32_##fntype; \
+        SDL_Convert_S16_to_F32 = SDL_Convert_S16_to_F32_##fntype; \
+        SDL_Convert_U16_to_F32 = SDL_Convert_U16_to_F32_##fntype; \
+        SDL_Convert_S32_to_F32 = SDL_Convert_S32_to_F32_##fntype; \
+        SDL_Convert_F32_to_S8 = SDL_Convert_F32_to_S8_##fntype; \
+        SDL_Convert_F32_to_U8 = SDL_Convert_F32_to_U8_##fntype; \
+        SDL_Convert_F32_to_S16 = SDL_Convert_F32_to_S16_##fntype; \
+        SDL_Convert_F32_to_U16 = SDL_Convert_F32_to_U16_##fntype; \
+        SDL_Convert_F32_to_S32 = SDL_Convert_F32_to_S32_##fntype; \
+        converters_chosen = true
+
+#if HAVE_SSE2_INTRINSICS
+    //if (SDL_HasSSE2()) {
+        SET_CONVERTER_FUNCS(SSE2);
+        return;
+    //}
+#endif
+
+#if HAVE_NEON_INTRINSICS
+    // if (SDL_HasNEON()) {
+        SET_CONVERTER_FUNCS(NEON);
+        return;
+    // }
+#endif
+
+#if NEED_SCALAR_CONVERTER_FALLBACKS
+    SET_CONVERTER_FUNCS(Scalar);
+#endif
+
+#undef SET_CONVERTER_FUNCS
+
+    assert(converters_chosen == true);
+}
+
+// BEGIN DATAQUEUE
+struct SDL_DataQueue;
+typedef struct SDL_DataQueue SDL_DataQueue;
+
+SDL_DataQueue *SDL_NewDataQueue(const size_t packetlen, const size_t initialslack);
+void SDL_FreeDataQueue(SDL_DataQueue *queue);
+void SDL_ClearDataQueue(SDL_DataQueue *queue, const size_t slack);
+int SDL_WriteToDataQueue(SDL_DataQueue *queue, const void *data, const size_t len);
+size_t SDL_ReadFromDataQueue(SDL_DataQueue *queue, void *buf, const size_t len);
+size_t SDL_PeekIntoDataQueue(SDL_DataQueue *queue, void *buf, const size_t len);
+size_t SDL_CountDataQueue(SDL_DataQueue *queue);
+void *SDL_ReserveSpaceInDataQueue(SDL_DataQueue *queue, const size_t len);
+
+typedef struct SDL_DataQueuePacket
+{
+    size_t datalen;  /* bytes currently in use in this packet. */
+    size_t startpos;  /* bytes currently consumed in this packet. */
+    struct SDL_DataQueuePacket *next;  /* next item in linked list. */
+    uint8_t data[1];  /* packet data */ // SDL_VARIABLE_LENGTH_ARRAY
+} SDL_DataQueuePacket;
+
+struct SDL_DataQueue
+{
+    SDL_DataQueuePacket *head; /* device fed from here. */
+    SDL_DataQueuePacket *tail; /* queue fills to here. */
+    SDL_DataQueuePacket *pool; /* these are unused packets. */
+    size_t packet_size;   /* size of new packets */
+    size_t queued_bytes;  /* number of bytes of data in the queue. */
+};
+
+static void
+SDL_FreeDataQueueList(SDL_DataQueuePacket *packet)
+{
+    while (packet) {
+        SDL_DataQueuePacket *next = packet->next;
+        free(packet);
+        packet = next;
+    }
+}
+
+
+/* this all expects that you managed thread safety elsewhere. */
+
+SDL_DataQueue *
+SDL_NewDataQueue(const size_t _packetlen, const size_t initialslack)
+{
+    SDL_DataQueue *queue = (SDL_DataQueue *) malloc(sizeof (SDL_DataQueue));
+
+    if (!queue) {
+        SDL_OutOfMemoryEX();
+        return NULL;
+    } else {
+        const size_t packetlen = _packetlen ? _packetlen : 1024;
+        const size_t wantpackets = (initialslack + (packetlen - 1)) / packetlen;
+        size_t i;
+
+        SDL_zeropEX(queue);
+        queue->packet_size = packetlen;
+
+        for (i = 0; i < wantpackets; i++) {
+            SDL_DataQueuePacket *packet = (SDL_DataQueuePacket *) malloc(sizeof (SDL_DataQueuePacket) + packetlen);
+            if (packet) { /* don't care if this fails, we'll deal later. */
+                packet->datalen = 0;
+                packet->startpos = 0;
+                packet->next = queue->pool;
+                queue->pool = packet;
+            }
+        }
+    }
+
+    return queue;
+}
+
+void
+SDL_FreeDataQueue(SDL_DataQueue *queue)
+{
+    if (queue) {
+        SDL_FreeDataQueueList(queue->head);
+        SDL_FreeDataQueueList(queue->pool);
+        free(queue);
+    }
+}
+
+void
+SDL_ClearDataQueue(SDL_DataQueue *queue, const size_t slack)
+{
+    const size_t packet_size = queue ? queue->packet_size : 1;
+    const size_t slackpackets = (slack + (packet_size-1)) / packet_size;
+    SDL_DataQueuePacket *packet;
+    SDL_DataQueuePacket *prev = NULL;
+    size_t i;
+
+    if (!queue) {
+        return;
+    }
+
+    packet = queue->head;
+
+    /* merge the available pool and the current queue into one list. */
+    if (packet) {
+        queue->tail->next = queue->pool;
+    } else {
+        packet = queue->pool;
+    }
+
+    /* Remove the queued packets from the device. */
+    queue->tail = NULL;
+    queue->head = NULL;
+    queue->queued_bytes = 0;
+    queue->pool = packet;
+
+    /* Optionally keep some slack in the pool to reduce malloc pressure. */
+    for (i = 0; packet && (i < slackpackets); i++) {
+        prev = packet;
+        packet = packet->next;
+    }
+
+    if (prev) {
+        prev->next = NULL;
+    } else {
+        queue->pool = NULL;
+    }
+
+    SDL_FreeDataQueueList(packet);  /* free extra packets */
+}
+
+static SDL_DataQueuePacket *
+AllocateDataQueuePacket(SDL_DataQueue *queue)
+{
+    SDL_DataQueuePacket *packet;
+
+    assert(queue != NULL);
+
+    packet = queue->pool;
+    if (packet != NULL) {
+        /* we have one available in the pool. */
+        queue->pool = packet->next;
+    } else {
+        /* Have to allocate a new one! */
+        packet = (SDL_DataQueuePacket *) malloc(sizeof (SDL_DataQueuePacket) + queue->packet_size);
+        if (packet == NULL) {
+            return NULL;
+        }
+    }
+
+    packet->datalen = 0;
+    packet->startpos = 0;
+    packet->next = NULL;
+                
+    assert((queue->head != NULL) == (queue->queued_bytes != 0));
+    if (queue->tail == NULL) {
+        queue->head = packet;
+    } else {
+        queue->tail->next = packet;
+    }
+    queue->tail = packet;
+    return packet;
+}
+
+
+int
+SDL_WriteToDataQueue(SDL_DataQueue *queue, const void *_data, const size_t _len)
+{
+    size_t len = _len;
+    const uint8_t *data = (const uint8_t *) _data;
+    const size_t packet_size = queue ? queue->packet_size : 0;
+    SDL_DataQueuePacket *orighead;
+    SDL_DataQueuePacket *origtail;
+    size_t origlen;
+    size_t datalen;
+
+    if (!queue) {
+        return SDL_PrintError("queue");
+    }
+
+    orighead = queue->head;
+    origtail = queue->tail;
+    origlen = origtail ? origtail->datalen : 0;
+
+    while (len > 0) {
+        SDL_DataQueuePacket *packet = queue->tail;
+        assert(!packet || (packet->datalen <= packet_size));
+        if (!packet || (packet->datalen >= packet_size)) {
+            /* tail packet missing or completely full; we need a new packet. */
+            packet = AllocateDataQueuePacket(queue);
+            if (!packet) {
+                /* uhoh, reset so we've queued nothing new, free what we can. */
+                if (!origtail) {
+                    packet = queue->head;  /* whole queue. */
+                } else {
+                    packet = origtail->next;  /* what we added to existing queue. */
+                    origtail->next = NULL;
+                    origtail->datalen = origlen;
+                }
+                queue->head = orighead;
+                queue->tail = origtail;
+                queue->pool = NULL;
+
+                SDL_FreeDataQueueList(packet);  /* give back what we can. */
+                return SDL_OutOfMemoryEX();
+            }
+        }
+
+        datalen = SDL_minEX(len, packet_size - packet->datalen);
+        memcpy(packet->data + packet->datalen, data, datalen);
+        data += datalen;
+        len -= datalen;
+        packet->datalen += datalen;
+        queue->queued_bytes += datalen;
+    }
+
+    return 0;
+}
+
+size_t
+SDL_PeekIntoDataQueue(SDL_DataQueue *queue, void *_buf, const size_t _len)
+{
+    size_t len = _len;
+    uint8_t *buf = (uint8_t *) _buf;
+    uint8_t *ptr = buf;
+    SDL_DataQueuePacket *packet;
+
+    if (!queue) {
+        return 0;
+    }
+
+    for (packet = queue->head; len && packet; packet = packet->next) {
+        const size_t avail = packet->datalen - packet->startpos;
+        const size_t cpy = SDL_minEX(len, avail);
+        assert(queue->queued_bytes >= avail);
+
+        memcpy(ptr, packet->data + packet->startpos, cpy);
+        ptr += cpy;
+        len -= cpy;
+    }
+
+    return (size_t) (ptr - buf);
+}
+
+size_t
+SDL_ReadFromDataQueue(SDL_DataQueue *queue, void *_buf, const size_t _len)
+{
+    size_t len = _len;
+    uint8_t *buf = (uint8_t *) _buf;
+    uint8_t *ptr = buf;
+    SDL_DataQueuePacket *packet;
+
+    if (!queue) {
+        return 0;
+    }
+
+    while ((len > 0) && ((packet = queue->head) != NULL)) {
+        const size_t avail = packet->datalen - packet->startpos;
+        const size_t cpy = SDL_minEX(len, avail);
+        assert(queue->queued_bytes >= avail);
+
+        memcpy(ptr, packet->data + packet->startpos, cpy);
+        packet->startpos += cpy;
+        ptr += cpy;
+        queue->queued_bytes -= cpy;
+        len -= cpy;
+
+        if (packet->startpos == packet->datalen) {  /* packet is done, put it in the pool. */
+            queue->head = packet->next;
+            assert((packet->next != NULL) || (packet == queue->tail));
+            packet->next = queue->pool;
+            queue->pool = packet;
+        }
+    }
+
+    assert((queue->head != NULL) == (queue->queued_bytes != 0));
+
+    if (queue->head == NULL) {
+        queue->tail = NULL;  /* in case we drained the queue entirely. */
+    }
+
+    return (size_t) (ptr - buf);
+}
+
+size_t
+SDL_CountDataQueue(SDL_DataQueue *queue)
+{
+    return queue ? queue->queued_bytes : 0;
+}
+
+void *
+SDL_ReserveSpaceInDataQueue(SDL_DataQueue *queue, const size_t len)
+{
+    SDL_DataQueuePacket *packet;
+
+    if (!queue) {
+        SDL_PrintError("queue");
+        return NULL;
+    } else if (len == 0) {
+        SDL_PrintError("len");
+        return NULL;
+    } else if (len > queue->packet_size) {
+        SDL_PrintError("len is larger than packet size");
+        return NULL;
+    }
+
+    packet = queue->head;
+    if (packet) {
+        const size_t avail = queue->packet_size - packet->datalen;
+        if (len <= avail) {  /* we can use the space at end of this packet. */
+            void *retval = packet->data + packet->datalen;
+            packet->datalen += len;
+            queue->queued_bytes += len;
+            return retval;
+        }
+    }
+
+    /* Need a fresh packet. */
+    packet = AllocateDataQueuePacket(queue);
+    if (!packet) {
+        SDL_OutOfMemoryEX();
+        return NULL;
+    }
+
+    packet->datalen = len;
+    queue->queued_bytes += len;
+    return packet->data;
+}
+
+// AUDIOCVT
+/* Convert from stereo to mono. Average left and right. */
+static void
+SDL_ConvertStereoToMono(SDL_AudioCVT_EX * cvt, SDL_AudioFormat format)
+{
+    float *dst = (float *) cvt->buf;
+    const float *src = dst;
+    int i;
+
+    LOG_DEBUG_CONVERT("stereo", "mono");
+    assert(format == AUDIO_F32SYS);
+
+    for (i = cvt->len_cvt / 8; i; --i, src += 2) {
+        *(dst++) = (src[0] + src[1]) * 0.5f;
+    }
+
+    cvt->len_cvt /= 2;
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index] (cvt, format);
+    }
+}
+
+
+/* Convert from 5.1 to stereo. Average left and right, distribute center, discard LFE. */
+static void
+SDL_Convert51ToStereo(SDL_AudioCVT_EX * cvt, SDL_AudioFormat format)
+{
+    float *dst = (float *) cvt->buf;
+    const float *src = dst;
+    int i;
+
+    LOG_DEBUG_CONVERT("5.1", "stereo");
+    assert(format == AUDIO_F32SYS);
+
+    /* SDL's 5.1 layout: FL+FR+FC+LFE+BL+BR */
+    for (i = cvt->len_cvt / (sizeof (float) * 6); i; --i, src += 6, dst += 2) {
+        const float front_center_distributed = src[2] * 0.5f;
+        dst[0] = (src[0] + front_center_distributed + src[4]) / 2.5f;  /* left */
+        dst[1] = (src[1] + front_center_distributed + src[5]) / 2.5f;  /* right */
+    }
+
+    cvt->len_cvt /= 3;
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index] (cvt, format);
+    }
+}
+
+
+/* Convert from quad to stereo. Average left and right. */
+static void
+SDL_ConvertQuadToStereo(SDL_AudioCVT_EX * cvt, SDL_AudioFormat format)
+{
+    float *dst = (float *) cvt->buf;
+    const float *src = dst;
+    int i;
+
+    LOG_DEBUG_CONVERT("quad", "stereo");
+    assert(format == AUDIO_F32SYS);
+
+    for (i = cvt->len_cvt / (sizeof (float) * 4); i; --i, src += 4, dst += 2) {
+        dst[0] = (src[0] + src[2]) * 0.5f; /* left */
+        dst[1] = (src[1] + src[3]) * 0.5f; /* right */
+    }
+
+    cvt->len_cvt /= 2;
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index] (cvt, format);
+    }
+}
+
+
+/* Convert from 7.1 to 5.1. Distribute sides across front and back. */
+static void
+SDL_Convert71To51(SDL_AudioCVT_EX * cvt, SDL_AudioFormat format)
+{
+    float *dst = (float *) cvt->buf;
+    const float *src = dst;
+    int i;
+
+    LOG_DEBUG_CONVERT("7.1", "5.1");
+    assert(format == AUDIO_F32SYS);
+
+    for (i = cvt->len_cvt / (sizeof (float) * 8); i; --i, src += 8, dst += 6) {
+        const float surround_left_distributed = src[6] * 0.5f;
+        const float surround_right_distributed = src[7] * 0.5f;
+        dst[0] = (src[0] + surround_left_distributed) / 1.5f;  /* FL */
+        dst[1] = (src[1] + surround_right_distributed) / 1.5f;  /* FR */
+        dst[2] = src[2] / 1.5f; /* CC */
+        dst[3] = src[3] / 1.5f; /* LFE */
+        dst[4] = (src[4] + surround_left_distributed) / 1.5f;  /* BL */
+        dst[5] = (src[5] + surround_right_distributed) / 1.5f;  /* BR */
+    }
+
+    cvt->len_cvt /= 8;
+    cvt->len_cvt *= 6;
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index] (cvt, format);
+    }
+}
+
+
+/* Convert from 5.1 to quad. Distribute center across front, discard LFE. */
+static void
+SDL_Convert51ToQuad(SDL_AudioCVT_EX * cvt, SDL_AudioFormat format)
+{
+    float *dst = (float *) cvt->buf;
+    const float *src = dst;
+    int i;
+
+    LOG_DEBUG_CONVERT("5.1", "quad");
+    assert(format == AUDIO_F32SYS);
+
+    /* SDL's 4.0 layout: FL+FR+BL+BR */
+    /* SDL's 5.1 layout: FL+FR+FC+LFE+BL+BR */
+    for (i = cvt->len_cvt / (sizeof (float) * 6); i; --i, src += 6, dst += 4) {
+        const float front_center_distributed = src[2] * 0.5f;
+        dst[0] = (src[0] + front_center_distributed) / 1.5f;  /* FL */
+        dst[1] = (src[1] + front_center_distributed) / 1.5f;  /* FR */
+        dst[2] = src[4] / 1.5f;  /* BL */
+        dst[3] = src[5] / 1.5f;  /* BR */
+    }
+
+    cvt->len_cvt /= 6;
+    cvt->len_cvt *= 4;
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index] (cvt, format);
+    }
+}
+
+
+/* Upmix mono to stereo (by duplication) */
+static void
+SDL_ConvertMonoToStereo(SDL_AudioCVT_EX * cvt, SDL_AudioFormat format)
+{
+    const float *src = (const float *) (cvt->buf + cvt->len_cvt);
+    float *dst = (float *) (cvt->buf + cvt->len_cvt * 2);
+    int i;
+
+    LOG_DEBUG_CONVERT("mono", "stereo");
+    assert(format == AUDIO_F32SYS);
+
+    for (i = cvt->len_cvt / sizeof (float); i; --i) {
+        src--;
+        dst -= 2;
+        dst[0] = dst[1] = *src;
+    }
+
+    cvt->len_cvt *= 2;
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index] (cvt, format);
+    }
+}
+
+
+/* Upmix stereo to a pseudo-5.1 stream */
+static void
+SDL_ConvertStereoTo51(SDL_AudioCVT_EX * cvt, SDL_AudioFormat format)
+{
+    int i;
+    float lf, rf, ce;
+    const float *src = (const float *) (cvt->buf + cvt->len_cvt);
+    float *dst = (float *) (cvt->buf + cvt->len_cvt * 3);
+
+    LOG_DEBUG_CONVERT("stereo", "5.1");
+    assert(format == AUDIO_F32SYS);
+
+    for (i = cvt->len_cvt / (sizeof(float) * 2); i; --i) {
+        dst -= 6;
+        src -= 2;
+        lf = src[0];
+        rf = src[1];
+        ce = (lf + rf) * 0.5f;
+        /* !!! FIXME: FL and FR may clip */
+        dst[0] = lf + (lf - ce);  /* FL */
+        dst[1] = rf + (rf - ce);  /* FR */
+        dst[2] = ce;  /* FC */
+        dst[3] = 0;   /* LFE (only meant for special LFE effects) */
+        dst[4] = lf;  /* BL */
+        dst[5] = rf;  /* BR */
+    }
+
+    cvt->len_cvt *= 3;
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index] (cvt, format);
+    }
+}
+
+
+/* Upmix quad to a pseudo-5.1 stream */
+static void
+SDL_ConvertQuadTo51(SDL_AudioCVT_EX * cvt, SDL_AudioFormat format)
+{
+    int i;
+    float lf, rf, lb, rb, ce;
+    const float *src = (const float *) (cvt->buf + cvt->len_cvt);
+    float *dst = (float *) (cvt->buf + cvt->len_cvt * 3 / 2);
+
+    LOG_DEBUG_CONVERT("quad", "5.1");
+    assert(format == AUDIO_F32SYS);
+    assert(cvt->len_cvt % (sizeof(float) * 4) == 0);
+
+    for (i = cvt->len_cvt / (sizeof(float) * 4); i; --i) {
+        dst -= 6;
+        src -= 4;
+        lf = src[0];
+        rf = src[1];
+        lb = src[2];
+        rb = src[3];
+        ce = (lf + rf) * 0.5f;
+        /* !!! FIXME: FL and FR may clip */
+        dst[0] = lf + (lf - ce);  /* FL */
+        dst[1] = rf + (rf - ce);  /* FR */
+        dst[2] = ce;  /* FC */
+        dst[3] = 0;   /* LFE (only meant for special LFE effects) */
+        dst[4] = lb;  /* BL */
+        dst[5] = rb;  /* BR */
+    }
+
+    cvt->len_cvt = cvt->len_cvt * 3 / 2;
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index] (cvt, format);
+    }
+}
+
+
+/* Upmix stereo to a pseudo-4.0 stream (by duplication) */
+static void
+SDL_ConvertStereoToQuad(SDL_AudioCVT_EX * cvt, SDL_AudioFormat format)
+{
+    const float *src = (const float *) (cvt->buf + cvt->len_cvt);
+    float *dst = (float *) (cvt->buf + cvt->len_cvt * 2);
+    float lf, rf;
+    int i;
+
+    LOG_DEBUG_CONVERT("stereo", "quad");
+    assert(format == AUDIO_F32SYS);
+
+    for (i = cvt->len_cvt / (sizeof(float) * 2); i; --i) {
+        dst -= 4;
+        src -= 2;
+        lf = src[0];
+        rf = src[1];
+        dst[0] = lf;  /* FL */
+        dst[1] = rf;  /* FR */
+        dst[2] = lf;  /* BL */
+        dst[3] = rf;  /* BR */
+    }
+
+    cvt->len_cvt *= 2;
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index] (cvt, format);
+    }
+}
+
+
+/* Upmix 5.1 to 7.1 */
+static void
+SDL_Convert51To71(SDL_AudioCVT_EX * cvt, SDL_AudioFormat format)
+{
+    float lf, rf, lb, rb, ls, rs;
+    int i;
+    const float *src = (const float *) (cvt->buf + cvt->len_cvt);
+    float *dst = (float *) (cvt->buf + cvt->len_cvt * 4 / 3);
+
+    LOG_DEBUG_CONVERT("5.1", "7.1");
+    assert(format == AUDIO_F32SYS);
+    assert(cvt->len_cvt % (sizeof(float) * 6) == 0);
+
+    for (i = cvt->len_cvt / (sizeof(float) * 6); i; --i) {
+        dst -= 8;
+        src -= 6;
+        lf = src[0];
+        rf = src[1];
+        lb = src[4];
+        rb = src[5];
+        ls = (lf + lb) * 0.5f;
+        rs = (rf + rb) * 0.5f;
+        /* !!! FIXME: these four may clip */
+        lf += lf - ls;
+        rf += rf - ls;
+        lb += lb - ls;
+        rb += rb - ls;
+        dst[3] = src[3];  /* LFE */
+        dst[2] = src[2];  /* FC */
+        dst[7] = rs; /* SR */
+        dst[6] = ls; /* SL */
+        dst[5] = rb;  /* BR */
+        dst[4] = lb;  /* BL */
+        dst[1] = rf;  /* FR */
+        dst[0] = lf;  /* FL */
+    }
+
+    cvt->len_cvt = cvt->len_cvt * 4 / 3;
+
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index] (cvt, format);
+    }
+}
+
+/* SDL's resampler uses a "bandlimited interpolation" algorithm:
+     https://ccrma.stanford.edu/~jos/resample/ */
+
+#define RESAMPLER_ZERO_CROSSINGS 5
+#define RESAMPLER_BITS_PER_SAMPLE 16
+#define RESAMPLER_SAMPLES_PER_ZERO_CROSSING  (1 << ((RESAMPLER_BITS_PER_SAMPLE / 2) + 1))
+#define RESAMPLER_FILTER_SIZE ((RESAMPLER_SAMPLES_PER_ZERO_CROSSING * RESAMPLER_ZERO_CROSSINGS) + 1)
+
+/* This is a "modified" bessel function, so you can't use POSIX j0() */
+static double
+bessel(const double x)
+{
+    const double xdiv2 = x / 2.0;
+    double i0 = 1.0f;
+    double f = 1.0f;
+    int i = 1;
+
+    while (true) {
+        const double diff = pow(xdiv2, i * 2) / pow(f, 2);
+        if (diff < 1.0e-21f) {
+            break;
+        }
+        i0 += diff;
+        i++;
+        f *= (double) i;
+    }
+
+    return i0;
+}
+
+/* build kaiser table with cardinal sine applied to it, and array of differences between elements. */
+static void
+kaiser_and_sinc(float *table, float *diffs, const int tablelen, const double beta)
+{
+    const int lenm1 = tablelen - 1;
+    const int lenm1div2 = lenm1 / 2;
+    int i;
+
+    table[0] = 1.0f;
+    for (i = 1; i < tablelen; i++) {
+        const double kaiser = bessel(beta * sqrt(1.0 - pow(((i - lenm1) / 2.0) / lenm1div2, 2.0))) / bessel(beta);
+        table[tablelen - i] = (float) kaiser;
+    }
+
+    for (i = 1; i < tablelen; i++) {
+        const float x = (((float) i) / ((float) RESAMPLER_SAMPLES_PER_ZERO_CROSSING)) * ((float) M_PI);
+        table[i] *= sinf(x) / x;
+        diffs[i - 1] = table[i] - table[i - 1];
+    }
+    diffs[lenm1] = 0.0f;
+}
+
+
+// static SDL_SpinLock ResampleFilterSpinlock = 0;
+static float *ResamplerFilter = NULL;
+static float *ResamplerFilterDifference = NULL;
+
+static int
+SDL_PrepareResampleFilter(void)
+{
+    // SDL_AtomicLock(&ResampleFilterSpinlock);
+    if (!ResamplerFilter) {
+        /* if dB > 50, beta=(0.1102 * (dB - 8.7)), according to Matlab. */
+        const double dB = 80.0;
+        const double beta = 0.1102 * (dB - 8.7);
+        // const size_t alloclen = RESAMPLER_FILTER_SIZE * sizeof (float);
+        static float resampler_filter_buffer[RESAMPLER_FILTER_SIZE] = { 0 };
+        static float resampler_filter_difference_buffer[RESAMPLER_FILTER_SIZE] = { 0 };
+        ResamplerFilter = resampler_filter_buffer;
+        ResamplerFilterDifference = resampler_filter_difference_buffer;
+        kaiser_and_sinc(ResamplerFilter, ResamplerFilterDifference, RESAMPLER_FILTER_SIZE, beta);
+    }
+    // SDL_AtomicUnlock(&ResampleFilterSpinlock);
+    return 0;
+}
+
+static int
+ResamplerPadding(const int inrate, const int outrate)
+{
+    if (inrate == outrate) {
+        return 0;
+    } else if (inrate > outrate) {
+        return (int) ceil(((float) (RESAMPLER_SAMPLES_PER_ZERO_CROSSING * inrate) / ((float) outrate)));
+    }
+    return RESAMPLER_SAMPLES_PER_ZERO_CROSSING;
+}
+
+/* lpadding and rpadding are expected to be buffers of (ResamplePadding(inrate, outrate) * chans * sizeof (float)) bytes. */
+static int
+SDL_ResampleAudio(const int chans, const int inrate, const int outrate,
+                        const float *lpadding, const float *rpadding,
+                        const float *inbuf, const int inbuflen,
+                        float *outbuf, const int outbuflen)
+{
+    const double finrate = (double) inrate;
+    const double outtimeincr = 1.0 / ((float) outrate);
+    const double  ratio = ((float) outrate) / ((float) inrate);
+    const int paddinglen = ResamplerPadding(inrate, outrate);
+    const int framelen = chans * (int)sizeof (float);
+    const int inframes = inbuflen / framelen;
+    const int wantedoutframes = (int) ((inbuflen / framelen) * ratio);  /* outbuflen isn't total to write, it's total available. */
+    const int maxoutframes = outbuflen / framelen;
+    const int outframes = SDL_minEX(wantedoutframes, maxoutframes);
+    float *dst = outbuf;
+    double outtime = 0.0;
+    int i, j, chan;
+
+    for (i = 0; i < outframes; i++) {
+        const int srcindex = (int) (outtime * inrate);
+        const double intime = ((double) srcindex) / finrate;
+        const double innexttime = ((double) (srcindex + 1)) / finrate;
+        const double interpolation1 = 1.0 - ((innexttime - outtime) / (innexttime - intime));
+        const int filterindex1 = (int) (interpolation1 * RESAMPLER_SAMPLES_PER_ZERO_CROSSING);
+        const double interpolation2 = 1.0 - interpolation1;
+        const int filterindex2 = (int) (interpolation2 * RESAMPLER_SAMPLES_PER_ZERO_CROSSING);
+
+        for (chan = 0; chan < chans; chan++) {
+            float outsample = 0.0f;
+
+            /* do this twice to calculate the sample, once for the "left wing" and then same for the right. */
+            /* !!! FIXME: do both wings in one loop */
+            for (j = 0; (filterindex1 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)) < RESAMPLER_FILTER_SIZE; j++) {
+                const int srcframe = srcindex - j;
+                /* !!! FIXME: we can bubble this conditional out of here by doing a pre loop. */
+                const float insample = (srcframe < 0) ? lpadding[((paddinglen + srcframe) * chans) + chan] : inbuf[(srcframe * chans) + chan];
+                outsample += (float)(insample * (ResamplerFilter[filterindex1 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)] + (interpolation1 * ResamplerFilterDifference[filterindex1 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)])));
+            }
+
+            for (j = 0; (filterindex2 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)) < RESAMPLER_FILTER_SIZE; j++) {
+                const int srcframe = srcindex + 1 + j;
+                /* !!! FIXME: we can bubble this conditional out of here by doing a post loop. */
+                const float insample = (srcframe >= inframes) ? rpadding[((srcframe - inframes) * chans) + chan] : inbuf[(srcframe * chans) + chan];
+                outsample += (float)(insample * (ResamplerFilter[filterindex2 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)] + (interpolation2 * ResamplerFilterDifference[filterindex2 + (j * RESAMPLER_SAMPLES_PER_ZERO_CROSSING)])));
+            }
+            *(dst++) = outsample;
+        }
+
+        outtime += outtimeincr;
+    }
+
+    return outframes * chans * sizeof (float);
+}
+
+int
+SDL_ConvertAudio_EX(SDL_AudioCVT_EX * cvt)
+{
+    /* !!! FIXME: (cvt) should be const; stack-copy it here. */
+    /* !!! FIXME: (actually, we can't...len_cvt needs to be updated. Grr.) */
+
+    /* Make sure there's data to convert */
+    if (cvt->buf == NULL) {
+        return SDL_PrintError("No buffer allocated for conversion");
+    }
+
+    /* Return okay if no conversion is necessary */
+    cvt->len_cvt = cvt->len;
+    if (cvt->filters[0] == NULL) {
+        return 0;
+    }
+
+    /* Set up the conversion and go! */
+    cvt->filter_index = 0;
+    cvt->filters[0] (cvt, cvt->src_format);
+    return 0;
+}
+
+static void
+SDL_Convert_Byteswap(SDL_AudioCVT_EX *cvt, SDL_AudioFormat format)
+{
+#if DEBUG_CONVERT
+    printf("Converting byte order\n");
+#endif
+    assert(0 && "we are doing byte swap!!!");
+    /*
+    switch (SDL_AUDIO_BITSIZE(format)) {
+        #define CASESWAP(b) \
+            case b: { \
+                uint##_t *ptr = (uint##_t *) cvt->buf; \
+                int i; \
+                for (i = cvt->len_cvt / sizeof (*ptr); i; --i, ++ptr) { \
+                    *ptr = SDL_Swap##b(*ptr); \
+                } \
+                break; \
+            }
+
+        CASESWAP(16);
+        CASESWAP(32);
+        CASESWAP(64);
+
+        #undef CASESWAP
+
+        default: assert(!"unhandled byteswap datatype!"); break;
+    }
+    */
+
+    if (cvt->filters[++cvt->filter_index]) {
+        /* flip endian flag for data. */
+        if (format & SDL_AUDIO_MASK_ENDIAN) {
+            format &= ~SDL_AUDIO_MASK_ENDIAN;
+        } else {
+            format |= SDL_AUDIO_MASK_ENDIAN;
+        }
+        cvt->filters[cvt->filter_index](cvt, format);
+    }
+}
+
+static int
+SDL_AddAudioCVTFilter(SDL_AudioCVT_EX *cvt, const SDL_AudioFilter_EX filter)
+{
+    if (cvt->filter_index >= SDL_AUDIOCVT_MAX_FILTERS) {
+        return SDL_PrintError("Too many filters needed for conversion, exceeded maximum of ");
+    }
+    if (filter == NULL) {
+        return SDL_PrintError("Audio filter pointer is NULL");
+    }
+    cvt->filters[cvt->filter_index++] = filter;
+    cvt->filters[cvt->filter_index] = NULL; /* Moving terminator */
+    return 0;
+}
+
+static int
+SDL_BuildAudioTypeCVTToFloat(SDL_AudioCVT_EX *cvt, const SDL_AudioFormat src_fmt)
+{
+    int retval = 0;  /* 0 == no conversion necessary. */
+
+    // assert(0 && "might check endian");
+    // if ((SDL_AUDIO_ISBIGENDIAN(src_fmt) != 0) == (SDL_BYTEORDER == SDL_LIL_ENDIAN)) {
+    //     if (SDL_AddAudioCVTFilter(cvt, SDL_Convert_Byteswap) < 0) {
+    //         return -1;
+    //     }
+    //     retval = 1;  /* added a converter. */
+    // }
+
+    if (!SDL_AUDIO_ISFLOAT(src_fmt)) {
+        const uint16_t src_bitsize = SDL_AUDIO_BITSIZE(src_fmt);
+        const uint16_t dst_bitsize = 32;
+        SDL_AudioFilter_EX filter = NULL;
+
+        // assert(0 && "trying to do float convert");
+        switch (src_fmt & ~SDL_AUDIO_MASK_ENDIAN) {
+            case AUDIO_S8: filter = SDL_Convert_S8_to_F32; break;
+            case AUDIO_U8: filter = SDL_Convert_U8_to_F32; break;
+            case AUDIO_S16: filter = SDL_Convert_S16_to_F32; break;
+            case AUDIO_U16: filter = SDL_Convert_U16_to_F32; break;
+            case AUDIO_S32: filter = SDL_Convert_S32_to_F32; break;
+            default: assert(!"Unexpected audio format!"); break;
+        }
+
+        if (!filter) {
+            return SDL_PrintError("No conversion from source format to float available");
+        }
+
+        if (SDL_AddAudioCVTFilter(cvt, filter) < 0) {
+            return -1;
+        }
+        if (src_bitsize < dst_bitsize) {
+            const int mult = (dst_bitsize / src_bitsize);
+            cvt->len_mult *= mult;
+            cvt->len_ratio *= mult;
+        } else if (src_bitsize > dst_bitsize) {
+            cvt->len_ratio /= (src_bitsize / dst_bitsize);
+        }
+
+        retval = 1;  /* added a converter. */
+    }
+
+    return retval;
+}
+
+static int
+SDL_BuildAudioTypeCVTFromFloat(SDL_AudioCVT_EX *cvt, const SDL_AudioFormat dst_fmt)
+{
+    int retval = 0;  /* 0 == no conversion necessary. */
+
+    if (!SDL_AUDIO_ISFLOAT(dst_fmt)) {
+        const uint16_t dst_bitsize = SDL_AUDIO_BITSIZE(dst_fmt);
+        const uint16_t src_bitsize = 32;
+        SDL_AudioFilter_EX filter = NULL;
+        // assert(0 && "trying to do from float convert!");
+        switch (dst_fmt & ~SDL_AUDIO_MASK_ENDIAN) {
+            case AUDIO_S8: filter = SDL_Convert_F32_to_S8; break;
+            case AUDIO_U8: filter = SDL_Convert_F32_to_U8; break;
+            case AUDIO_S16: filter = SDL_Convert_F32_to_S16; break;
+            case AUDIO_U16: filter = SDL_Convert_F32_to_U16; break;
+            case AUDIO_S32: filter = SDL_Convert_F32_to_S32; break;
+            default: assert(!"Unexpected audio format!"); break;
+        }
+
+        if (!filter) {
+            return SDL_PrintError("No conversion from float to format 0x%.4x available");
+        }
+
+        if (SDL_AddAudioCVTFilter(cvt, filter) < 0) {
+            return -1;
+        }
+        if (src_bitsize < dst_bitsize) {
+            const int mult = (dst_bitsize / src_bitsize);
+            cvt->len_mult *= mult;
+            cvt->len_ratio *= mult;
+        } else if (src_bitsize > dst_bitsize) {
+            cvt->len_ratio /= (src_bitsize / dst_bitsize);
+        }
+        retval = 1;  /* added a converter. */
+    }
+
+    // assert(0 && "might check byte order");
+    // if ((SDL_AUDIO_ISBIGENDIAN(dst_fmt) != 0) == (SDL_BYTEORDER == SDL_LIL_ENDIAN)) {
+    //     if (SDL_AddAudioCVTFilter(cvt, SDL_Convert_Byteswap) < 0) {
+    //         return -1;
+    //     }
+    //     retval = 1;  /* added a converter. */
+    // }
+
+    return retval;
+}
+
+static void
+SDL_ResampleCVT(SDL_AudioCVT_EX *cvt, const int chans, const SDL_AudioFormat format)
+{
+    /* !!! FIXME in 2.1: there are ten slots in the filter list, and the theoretical maximum we use is six (seven with NULL terminator).
+       !!! FIXME in 2.1:   We need to store data for this resampler, because the cvt structure doesn't store the original sample rates,
+       !!! FIXME in 2.1:   so we steal the ninth and tenth slot.  :( */
+    const int inrate = (int) (size_t) cvt->filters[SDL_AUDIOCVT_MAX_FILTERS-1];
+    const int outrate = (int) (size_t) cvt->filters[SDL_AUDIOCVT_MAX_FILTERS];
+    const float *src = (const float *) cvt->buf;
+    const int srclen = cvt->len_cvt;
+    /*float *dst = (float *) cvt->buf;
+    const int dstlen = (cvt->len * cvt->len_mult);*/
+    /* !!! FIXME: remove this if we can get the resampler to work in-place again. */
+    float *dst = (float *) (cvt->buf + srclen);
+    const int dstlen = (cvt->len * cvt->len_mult) - srclen;
+    const int requestedpadding = ResamplerPadding(inrate, outrate);
+    int paddingsamples;
+    float *padding;
+
+    if (requestedpadding < INT32_MAX / chans) {
+        paddingsamples = requestedpadding * chans;
+    } else {
+        paddingsamples = 0;
+    }
+    assert(format == AUDIO_F32SYS);
+
+    /* we keep no streaming state here, so pad with silence on both ends. */
+    padding = (float *) calloc(paddingsamples ? paddingsamples : 1, sizeof (float));
+    if (!padding) {
+        SDL_OutOfMemoryEX();
+        return;
+    }
+
+    cvt->len_cvt = SDL_ResampleAudio(chans, inrate, outrate, padding, padding, src, srclen, dst, dstlen);
+
+    free(padding);
+
+    memmove(cvt->buf, dst, cvt->len_cvt);  /* !!! FIXME: remove this if we can get the resampler to work in-place again. */
+
+    if (cvt->filters[++cvt->filter_index]) {
+        cvt->filters[cvt->filter_index](cvt, format);
+    }
+}
+
+/* !!! FIXME: We only have this macro salsa because SDL_AudioCVT_EX doesn't
+   !!! FIXME:  store channel info, so we have to have function entry
+   !!! FIXME:  points for each supported channel count and multiple
+   !!! FIXME:  vs arbitrary. When we rev the ABI, clean this up. */
+#define RESAMPLER_FUNCS(chans) \
+    static void \
+    SDL_ResampleCVT_c##chans(SDL_AudioCVT_EX *cvt, SDL_AudioFormat format) { \
+        SDL_ResampleCVT(cvt, chans, format); \
+    }
+RESAMPLER_FUNCS(1)
+RESAMPLER_FUNCS(2)
+RESAMPLER_FUNCS(4)
+RESAMPLER_FUNCS(6)
+RESAMPLER_FUNCS(8)
+#undef RESAMPLER_FUNCS
+
+static SDL_AudioFilter_EX
+ChooseCVTResampler(const int dst_channels)
+{
+    switch (dst_channels) {
+        case 1: return SDL_ResampleCVT_c1;
+        case 2: return SDL_ResampleCVT_c2;
+        case 4: return SDL_ResampleCVT_c4;
+        case 6: return SDL_ResampleCVT_c6;
+        case 8: return SDL_ResampleCVT_c8;
+        default: break;
+    }
+
+    return NULL;
+}
+
+static int
+SDL_BuildAudioResampleCVT(SDL_AudioCVT_EX * cvt, const int dst_channels,
+                          const int src_rate, const int dst_rate)
+{
+    SDL_AudioFilter_EX filter;
+
+    if (src_rate == dst_rate) {
+        return 0;  /* no conversion necessary. */
+    }
+
+    filter = ChooseCVTResampler(dst_channels);
+    if (filter == NULL) {
+        return SDL_PrintError("No conversion available for these rates");
+    }
+
+    if (SDL_PrepareResampleFilter() < 0) {
+        return -1;
+    }
+
+    /* Update (cvt) with filter details... */
+    if (SDL_AddAudioCVTFilter(cvt, filter) < 0) {
+        return -1;
+    }
+
+    /* !!! FIXME in 2.1: there are ten slots in the filter list, and the theoretical maximum we use is six (seven with NULL terminator).
+       !!! FIXME in 2.1:   We need to store data for this resampler, because the cvt structure doesn't store the original sample rates,
+       !!! FIXME in 2.1:   so we steal the ninth and tenth slot.  :( */
+    if (cvt->filter_index >= (SDL_AUDIOCVT_MAX_FILTERS-2)) {
+        return SDL_PrintError("Too many filters needed for conversion, exceeded maximum of");
+    }
+    cvt->filters[SDL_AUDIOCVT_MAX_FILTERS-1] = (SDL_AudioFilter_EX) (size_t) src_rate;
+    cvt->filters[SDL_AUDIOCVT_MAX_FILTERS] = (SDL_AudioFilter_EX) (size_t) dst_rate;
+
+    if (src_rate < dst_rate) {
+        const double mult = ((double) dst_rate) / ((double) src_rate);
+        cvt->len_mult *= (int) ceil(mult);
+        cvt->len_ratio *= mult;
+    } else {
+        cvt->len_ratio /= ((double) src_rate) / ((double) dst_rate);
+    }
+
+    /* !!! FIXME: remove this if we can get the resampler to work in-place again. */
+    /* the buffer is big enough to hold the destination now, but
+       we need it large enough to hold a separate scratch buffer. */
+    cvt->len_mult *= 2;
+
+    return 1;               /* added a converter. */
+}
+
+static bool
+SDL_SupportedAudioFormat(const SDL_AudioFormat fmt)
+{
+    switch (fmt) {
+        case AUDIO_U8:
+        case AUDIO_S8:
+        case AUDIO_U16LSB:
+        case AUDIO_S16LSB:
+        case AUDIO_U16MSB:
+        case AUDIO_S16MSB:
+        case AUDIO_S32LSB:
+        case AUDIO_S32MSB:
+        case AUDIO_F32LSB:
+        case AUDIO_F32MSB:
+            return true;  /* supported. */
+
+        default:
+            break;
+    }
+
+    return false;  /* unsupported. */
+}
+
+static bool
+SDL_SupportedChannelCount(const int channels)
+{
+    switch (channels) {
+        case 1:  /* mono */
+        case 2:  /* stereo */
+        case 4:  /* quad */
+        case 6:  /* 5.1 */
+        case 8:  /* 7.1 */
+          return true;  /* supported. */
+
+        default:
+            break;
+    }
+
+    return false;  /* unsupported. */
+}
+
+
+/* Creates a set of audio filters to convert from one format to another.
+   Returns 0 if no conversion is needed, 1 if the audio filter is set up,
+   or -1 if an error like invalid parameter, unsupported format, etc. occurred.
+*/
+
+int
+SDL_BuildAudioCVT_EX(SDL_AudioCVT_EX * cvt,
+                  SDL_AudioFormat src_fmt, uint8_t src_channels, int src_rate,
+                  SDL_AudioFormat dst_fmt, uint8_t dst_channels, int dst_rate)
+{
+    /* Sanity check target pointer */
+    if (cvt == NULL) {
+        return SDL_PrintError("cvt");
+    }
+
+    /* Make sure we zero out the audio conversion before error checking */
+    SDL_zeropEX(cvt);
+
+    if (!SDL_SupportedAudioFormat(src_fmt)) {
+        return SDL_PrintError("Invalid source format");
+    } else if (!SDL_SupportedAudioFormat(dst_fmt)) {
+        return SDL_PrintError("Invalid destination format");
+    } else if (!SDL_SupportedChannelCount(src_channels)) {
+        return SDL_PrintError("Invalid source channels");
+    } else if (!SDL_SupportedChannelCount(dst_channels)) {
+        return SDL_PrintError("Invalid destination channels");
+    } else if (src_rate <= 0) {
+        return SDL_PrintError("Source rate is equal to or less than zero");
+    } else if (dst_rate <= 0) {
+        return SDL_PrintError("Destination rate is equal to or less than zero");
+    } else if (src_rate >= INT32_MAX / RESAMPLER_SAMPLES_PER_ZERO_CROSSING) {
+        return SDL_PrintError("Source rate is too high");
+    } else if (dst_rate >= INT32_MAX / RESAMPLER_SAMPLES_PER_ZERO_CROSSING) {
+        return SDL_PrintError("Destination rate is too high");
+    }
+
+#if DEBUG_CONVERT
+    printf("Build format %04x->%04x, channels %u->%u, rate %d->%d\n",
+           src_fmt, dst_fmt, src_channels, dst_channels, src_rate, dst_rate);
+#endif
+
+    /* Start off with no conversion necessary */
+    cvt->src_format = src_fmt;
+    cvt->dst_format = dst_fmt;
+    cvt->needed = 0;
+    cvt->filter_index = 0;
+    SDL_zeroaEX(cvt->filters);
+    cvt->len_mult = 1;
+    cvt->len_ratio = 1.0;
+    cvt->rate_incr = ((double) dst_rate) / ((double) src_rate);
+
+    /* Make sure we've chosen audio conversion functions (MMX, scalar, etc.) */
+    // assert(0 && "choosing audio converters");
+    SDL_ChooseAudioConverters();
+
+    /* Type conversion goes like this now:
+        - byteswap to CPU native format first if necessary.
+        - convert to native Float32 if necessary.
+        - resample and change channel count if necessary.
+        - convert back to native format.
+        - byteswap back to foreign format if necessary.
+
+       The expectation is we can process data faster in float32
+       (possibly with SIMD), and making several passes over the same
+       buffer is likely to be CPU cache-friendly, avoiding the
+       biggest performance hit in modern times. Previously we had
+       (script-generated) custom converters for every data type and
+       it was a bloat on SDL compile times and final library size. */
+
+    /* see if we can skip float conversion entirely. */
+    if (src_rate == dst_rate && src_channels == dst_channels) {
+        if (src_fmt == dst_fmt) {
+            return 0;
+        }
+
+        /* just a byteswap needed? */
+        if ((src_fmt & ~SDL_AUDIO_MASK_ENDIAN) == (dst_fmt & ~SDL_AUDIO_MASK_ENDIAN)) {
+            if (SDL_AddAudioCVTFilter(cvt, SDL_Convert_Byteswap) < 0) {
+                return -1;
+            }
+            cvt->needed = 1;
+            return 1;
+        }
+    }
+
+    /* Convert data types, if necessary. Updates (cvt). */
+    if (SDL_BuildAudioTypeCVTToFloat(cvt, src_fmt) < 0) {
+        return -1;              /* shouldn't happen, but just in case... */
+    }
+
+    /* Channel conversion */
+    if (src_channels < dst_channels) {
+        /* Upmixing */
+        /* Mono -> Stereo [-> ...] */
+        if ((src_channels == 1) && (dst_channels > 1)) {
+            if (SDL_AddAudioCVTFilter(cvt, SDL_ConvertMonoToStereo) < 0) {
+                return -1;
+            }
+            cvt->len_mult *= 2;
+            src_channels = 2;
+            cvt->len_ratio *= 2;
+        }
+        /* [Mono ->] Stereo -> 5.1 [-> 7.1] */
+        if ((src_channels == 2) && (dst_channels >= 6)) {
+            if (SDL_AddAudioCVTFilter(cvt, SDL_ConvertStereoTo51) < 0) {
+                return -1;
+            }
+            src_channels = 6;
+            cvt->len_mult *= 3;
+            cvt->len_ratio *= 3;
+        }
+        /* Quad -> 5.1 [-> 7.1] */
+        if ((src_channels == 4) && (dst_channels >= 6)) {
+            if (SDL_AddAudioCVTFilter(cvt, SDL_ConvertQuadTo51) < 0) {
+                return -1;
+            }
+            src_channels = 6;
+            cvt->len_mult = (cvt->len_mult * 3 + 1) / 2;
+            cvt->len_ratio *= 1.5;
+        }
+        /* [[Mono ->] Stereo ->] 5.1 -> 7.1 */
+        if ((src_channels == 6) && (dst_channels == 8)) {
+            if (SDL_AddAudioCVTFilter(cvt, SDL_Convert51To71) < 0) {
+                return -1;
+            }
+            src_channels = 8;
+            cvt->len_mult = (cvt->len_mult * 4 + 2) / 3;
+            /* Should be numerically exact with every valid input to this
+               function */
+            cvt->len_ratio = cvt->len_ratio * 4 / 3;
+        }
+        /* [Mono ->] Stereo -> Quad */
+        if ((src_channels == 2) && (dst_channels == 4)) {
+            if (SDL_AddAudioCVTFilter(cvt, SDL_ConvertStereoToQuad) < 0) {
+                return -1;
+            }
+            src_channels = 4;
+            cvt->len_mult *= 2;
+            cvt->len_ratio *= 2;
+        }
+    } else if (src_channels > dst_channels) {
+        /* Downmixing */
+        /* 7.1 -> 5.1 [-> Stereo [-> Mono]] */
+        /* 7.1 -> 5.1 [-> Quad] */
+        if ((src_channels == 8) && (dst_channels <= 6)) {
+            if (SDL_AddAudioCVTFilter(cvt, SDL_Convert71To51) < 0) {
+                return -1;
+            }
+            src_channels = 6;
+            cvt->len_ratio *= 0.75;
+        }
+        /* [7.1 ->] 5.1 -> Stereo [-> Mono] */
+        if ((src_channels == 6) && (dst_channels <= 2)) {
+            if (SDL_AddAudioCVTFilter(cvt, SDL_Convert51ToStereo) < 0) {
+                return -1;
+            }
+            src_channels = 2;
+            cvt->len_ratio /= 3;
+        }
+        /* 5.1 -> Quad */
+        if ((src_channels == 6) && (dst_channels == 4)) {
+            if (SDL_AddAudioCVTFilter(cvt, SDL_Convert51ToQuad) < 0) {
+                return -1;
+            }
+            src_channels = 4;
+            cvt->len_ratio = cvt->len_ratio * 2 / 3;
+        }
+        /* Quad -> Stereo [-> Mono] */
+        if ((src_channels == 4) && (dst_channels <= 2)) {
+            if (SDL_AddAudioCVTFilter(cvt, SDL_ConvertQuadToStereo) < 0) {
+                return -1;
+            }
+            src_channels = 2;
+            cvt->len_ratio /= 2;
+        }
+        /* [... ->] Stereo -> Mono */
+        if ((src_channels == 2) && (dst_channels == 1)) {
+            SDL_AudioFilter_EX filter = NULL;
+
+            #if HAVE_SSE3_INTRINSICS
+            if (SDL_HasSSE3()) {
+                filter = SDL_ConvertStereoToMono_SSE3;
+            }
+            #endif
+
+            if (!filter) {
+                filter = SDL_ConvertStereoToMono;
+            }
+
+            if (SDL_AddAudioCVTFilter(cvt, filter) < 0) {
+                return -1;
+            }
+
+            src_channels = 1;
+            cvt->len_ratio /= 2;
+        }
+    }
+
+    if (src_channels != dst_channels) {
+        /* All combinations of supported channel counts should have been
+           handled by now, but let's be defensive */
+      return SDL_PrintError("Invalid channel combination");
+    }
+    
+    /* Do rate conversion, if necessary. Updates (cvt). */
+    if (SDL_BuildAudioResampleCVT(cvt, dst_channels, src_rate, dst_rate) < 0) {
+        return -1;              /* shouldn't happen, but just in case... */
+    }
+
+    /* Move to final data type. */
+    if (SDL_BuildAudioTypeCVTFromFloat(cvt, dst_fmt) < 0) {
+        return -1;              /* shouldn't happen, but just in case... */
+    }
+
+    cvt->needed = (cvt->filter_index != 0);
+    return (cvt->needed);
+}
+
+typedef int (*SDL_ResampleAudioStreamFunc)(SDL_AudioStream *stream, const void *inbuf, const int inbuflen, void *outbuf, const int outbuflen);
+typedef void (*SDL_ResetAudioStreamResamplerFunc)(SDL_AudioStream *stream);
+typedef void (*SDL_CleanupAudioStreamResamplerFunc)(SDL_AudioStream *stream);
+
+struct _SDL_AudioStream
+{
+    SDL_AudioCVT_EX cvt_before_resampling;
+    SDL_AudioCVT_EX cvt_after_resampling;
+    SDL_DataQueue *queue;
+    bool first_run;
+    uint8_t *staging_buffer;
+    int staging_buffer_size;
+    int staging_buffer_filled;
+    uint8_t *work_buffer_base;  /* maybe unaligned pointer from realloc(). */
+    int work_buffer_len;
+    int src_sample_frame_size;
+    SDL_AudioFormat src_format;
+    uint8_t src_channels;
+    int src_rate;
+    int dst_sample_frame_size;
+    SDL_AudioFormat dst_format;
+    uint8_t dst_channels;
+    int dst_rate;
+    double rate_incr;
+    uint8_t pre_resample_channels;
+    int packetlen;
+    int resampler_padding_samples;
+    float *resampler_padding;
+    void *resampler_state;
+    SDL_ResampleAudioStreamFunc resampler_func;
+    SDL_ResetAudioStreamResamplerFunc reset_resampler_func;
+    SDL_CleanupAudioStreamResamplerFunc cleanup_resampler_func;
+};
+
+static uint8_t *
+EnsureStreamBufferSize(SDL_AudioStream *stream, const int newlen)
+{
+    uint8_t *ptr;
+    size_t offset;
+
+    if (stream->work_buffer_len >= newlen) {
+        ptr = stream->work_buffer_base;
+    } else {
+        ptr = (uint8_t *) realloc(stream->work_buffer_base, newlen + 32);
+        if (!ptr) {
+            SDL_OutOfMemoryEX();
+            return NULL;
+        }
+        /* Make sure we're aligned to 16 bytes for SIMD code. */
+        stream->work_buffer_base = ptr;
+        stream->work_buffer_len = newlen;
+    }
+
+    offset = ((size_t) ptr) & 15;
+    return offset ? ptr + (16 - offset) : ptr;
+}
+
+#ifdef HAVE_LIBSAMPLERATE_H
+
+#include <samplerate.h>
+static int SRC_converter = SRC_SINC_MEDIUM_QUALITY;//SRC_SINC_FASTEST;
+
+static int
+SDL_ResampleAudioStream_SRC(SDL_AudioStream *stream, const void *_inbuf, const int inbuflen, void *_outbuf, const int outbuflen)
+{
+    const float *inbuf = (const float *) _inbuf;
+    float *outbuf = (float *) _outbuf;
+    const int framelen = sizeof(float) * stream->pre_resample_channels;
+    SRC_STATE *state = (SRC_STATE *)stream->resampler_state;
+    SRC_DATA data;
+    int result;
+
+    assert(inbuf != ((const float *) outbuf));  /* SDL_AudioStreamPut() shouldn't allow in-place resamples. */
+
+    data.data_in = (float *)inbuf; /* Older versions of libsamplerate had a non-const pointer, but didn't write to it */
+    data.input_frames = inbuflen / framelen;
+    data.input_frames_used = 0;
+
+    data.data_out = outbuf;
+    data.output_frames = outbuflen / framelen;
+
+    data.end_of_input = 0;
+    data.src_ratio = stream->rate_incr;
+
+    result = src_process(state, &data);
+    if (result != 0) {
+        // SDL_SetError("src_process() failed: %s", SRC_src_strerror(result));
+        return 0;
+    }
+
+    /* If this fails, we need to store them off somewhere */
+    assert(data.input_frames_used == data.input_frames);
+
+    return data.output_frames_gen * (sizeof(float) * stream->pre_resample_channels);
+}
+
+static void
+SDL_ResetAudioStreamResampler_SRC(SDL_AudioStream *stream)
+{
+    src_reset((SRC_STATE *)stream->resampler_state);
+}
+
+static void
+SDL_CleanupAudioStreamResampler_SRC(SDL_AudioStream *stream)
+{
+    SRC_STATE *state = (SRC_STATE *)stream->resampler_state;
+    if (state) {
+        src_delete(state);
+    }
+
+    stream->resampler_state = NULL;
+    stream->resampler_func = NULL;
+    stream->reset_resampler_func = NULL;
+    stream->cleanup_resampler_func = NULL;
+}
+
+static bool
+SetupLibSampleRateResampling(SDL_AudioStream *stream)
+{
+    int result = 0;
+    SRC_STATE *state = NULL;
+
+    state = src_new(SRC_converter, stream->pre_resample_channels, &result);
+    if (!state) {
+        printf("fail to init libsamplerate\n");
+        // SDL_SetError("src_new() failed: %s", src_strerror(result));
+        SDL_CleanupAudioStreamResampler_SRC(stream);
+        return false;
+    }
+
+    stream->resampler_state = state;
+    stream->resampler_func = SDL_ResampleAudioStream_SRC;
+    stream->reset_resampler_func = SDL_ResetAudioStreamResampler_SRC;
+    stream->cleanup_resampler_func = SDL_CleanupAudioStreamResampler_SRC;
+
+    return true;
+}
+#endif /* HAVE_LIBSAMPLERATE_H */
+
+static int
+SDL_ResampleAudioStream(SDL_AudioStream *stream, const void *_inbuf, const int inbuflen, void *_outbuf, const int outbuflen)
+{
+    const uint8_t *inbufend = ((const uint8_t *) _inbuf) + inbuflen;
+    const float *inbuf = (const float *) _inbuf;
+    float *outbuf = (float *) _outbuf;
+    const int chans = (int) stream->pre_resample_channels;
+    const int inrate = stream->src_rate;
+    const int outrate = stream->dst_rate;
+    const int paddingsamples = stream->resampler_padding_samples;
+    const int paddingbytes = paddingsamples * sizeof (float);
+    float *lpadding = (float *) stream->resampler_state;
+    const float *rpadding = (const float *) inbufend; /* we set this up so there are valid padding samples at the end of the input buffer. */
+    const int cpy = SDL_minEX(inbuflen, paddingbytes);
+    int retval;
+
+    assert(inbuf != ((const float *) outbuf));  /* SDL_AudioStreamPutEX() shouldn't allow in-place resamples. */
+
+    retval = SDL_ResampleAudio(chans, inrate, outrate, lpadding, rpadding, inbuf, inbuflen, outbuf, outbuflen);
+
+    /* update our left padding with end of current input, for next run. */
+    memcpy((lpadding + paddingsamples) - (cpy / sizeof (float)), inbufend - cpy, cpy);
+    return retval;
+}
+
+static void
+SDL_ResetAudioStreamResampler(SDL_AudioStream *stream)
+{
+    /* set all the padding to silence. */
+    const int len = stream->resampler_padding_samples;
+    memset(stream->resampler_state, '\0', len * sizeof (float));
+}
+
+static void
+SDL_CleanupAudioStreamResampler(SDL_AudioStream *stream)
+{
+    free(stream->resampler_state);
+}
+
+SDL_AudioStream *
+SDL_NewAudioStreamEX(const SDL_AudioFormat src_format,
+                   const uint8_t src_channels,
+                   const int src_rate,
+                   const SDL_AudioFormat dst_format,
+                   const uint8_t dst_channels,
+                   const int dst_rate)
+{
+    const int packetlen = 4096;  /* !!! FIXME: good enough for now. */
+    uint8_t pre_resample_channels;
+    SDL_AudioStream *retval;
+
+    retval = (SDL_AudioStream *) calloc(1, sizeof (SDL_AudioStream));
+    if (!retval) {
+        return NULL;
+    }
+
+    /* If increasing channels, do it after resampling, since we'd just
+       do more work to resample duplicate channels. If we're decreasing, do
+       it first so we resample the interpolated data instead of interpolating
+       the resampled data (!!! FIXME: decide if that works in practice, though!). */
+    pre_resample_channels = SDL_minEX(src_channels, dst_channels);
+
+    retval->first_run = true;
+    retval->src_sample_frame_size = (SDL_AUDIO_BITSIZE(src_format) / 8) * src_channels;
+    retval->src_format = src_format;
+    retval->src_channels = src_channels;
+    retval->src_rate = src_rate;
+    retval->dst_sample_frame_size = (SDL_AUDIO_BITSIZE(dst_format) / 8) * dst_channels;
+    retval->dst_format = dst_format;
+    retval->dst_channels = dst_channels;
+    retval->dst_rate = dst_rate;
+    retval->pre_resample_channels = pre_resample_channels;
+    retval->packetlen = packetlen;
+    retval->rate_incr = ((double) dst_rate) / ((double) src_rate);
+    retval->resampler_padding_samples = ResamplerPadding(retval->src_rate, retval->dst_rate) * pre_resample_channels;
+    retval->resampler_padding = (float *) calloc(retval->resampler_padding_samples ? retval->resampler_padding_samples : 1, sizeof (float));
+
+    if (retval->resampler_padding == NULL) {
+        SDL_FreeAudioStreamEX(retval);
+        SDL_OutOfMemoryEX();
+        return NULL;
+    }
+
+    retval->staging_buffer_size = ((retval->resampler_padding_samples / retval->pre_resample_channels) * retval->src_sample_frame_size);
+    if (retval->staging_buffer_size > 0) {
+        retval->staging_buffer = (uint8_t *) malloc(retval->staging_buffer_size);
+        if (retval->staging_buffer == NULL) {
+            SDL_FreeAudioStreamEX(retval);
+            SDL_OutOfMemoryEX();
+            return NULL;
+        }
+    }
+
+    /* Not resampling? It's an easy conversion (and maybe not even that!) */
+    if (src_rate == dst_rate) {
+        retval->cvt_before_resampling.needed = false;
+        if (SDL_BuildAudioCVT_EX(&retval->cvt_after_resampling, src_format, src_channels, dst_rate, dst_format, dst_channels, dst_rate) < 0) {
+            SDL_FreeAudioStreamEX(retval);
+            return NULL;  /* SDL_BuildAudioCVT_EX should have called fprintf.stderr,  */
+        }
+    } else {
+        /* Don't resample at first. Just get us to Float32 format. */
+        /* !!! FIXME: convert to int32 on devices without hardware float. */
+        if (SDL_BuildAudioCVT_EX(&retval->cvt_before_resampling, src_format, src_channels, src_rate, AUDIO_F32SYS, pre_resample_channels, src_rate) < 0) {
+            SDL_FreeAudioStreamEX(retval);
+            return NULL;  /* SDL_BuildAudioCVT_EX should have called fprintf.stderr,  */
+        }
+
+    #ifdef HAVE_LIBSAMPLERATE_H
+        SetupLibSampleRateResampling(retval);
+    #endif
+
+        if (!retval->resampler_func) {
+            retval->resampler_state = calloc(retval->resampler_padding_samples, sizeof (float));
+            if (!retval->resampler_state) {
+                SDL_FreeAudioStreamEX(retval);
+                SDL_OutOfMemoryEX();
+                return NULL;
+            }
+
+            if (SDL_PrepareResampleFilter() < 0) {
+                free(retval->resampler_state);
+                retval->resampler_state = NULL;
+                SDL_FreeAudioStreamEX(retval);
+                return NULL;
+            }
+
+            retval->resampler_func = SDL_ResampleAudioStream;
+            retval->reset_resampler_func = SDL_ResetAudioStreamResampler;
+            retval->cleanup_resampler_func = SDL_CleanupAudioStreamResampler;
+        }
+
+        /* Convert us to the final format after resampling. */
+        if (SDL_BuildAudioCVT_EX(&retval->cvt_after_resampling, AUDIO_F32SYS, pre_resample_channels, dst_rate, dst_format, dst_channels, dst_rate) < 0) {
+            SDL_FreeAudioStreamEX(retval);
+            return NULL;  /* SDL_BuildAudioCVT_EX should have called fprintf.stderr,  */
+        }
+    }
+
+    retval->queue = SDL_NewDataQueue(packetlen, packetlen * 2);
+    if (!retval->queue) {
+        SDL_FreeAudioStreamEX(retval);
+        return NULL;  /* SDL_NewDataQueue should have called fprintf.stderr,  */
+    }
+
+    return retval;
+}
+
+static int
+SDL_AudioStreamPutInternal(SDL_AudioStream *stream, const void *buf, int len, int *maxputbytes)
+{
+    int buflen = len;
+    int workbuflen;
+    uint8_t *workbuf;
+    uint8_t *resamplebuf = NULL;
+    int resamplebuflen = 0;
+    int neededpaddingbytes;
+    int paddingbytes;
+
+    /* !!! FIXME: several converters can take advantage of SIMD, but only
+       !!! FIXME:  if the data is aligned to 16 bytes. EnsureStreamBufferSize()
+       !!! FIXME:  guarantees the buffer will align, but the
+       !!! FIXME:  converters will iterate over the data backwards if
+       !!! FIXME:  the output grows, and this means we won't align if buflen
+       !!! FIXME:  isn't a multiple of 16. In these cases, we should chop off
+       !!! FIXME:  a few samples at the end and convert them separately. */
+
+    /* no padding prepended on first run. */
+    neededpaddingbytes = stream->resampler_padding_samples * sizeof (float);
+    paddingbytes = stream->first_run ? 0 : neededpaddingbytes;
+    stream->first_run = false;
+
+    /* Make sure the work buffer can hold all the data we need at once... */
+    workbuflen = buflen;
+    if (stream->cvt_before_resampling.needed) {
+        workbuflen *= stream->cvt_before_resampling.len_mult;
+    }
+
+    if (stream->dst_rate != stream->src_rate) {
+        /* resamples can't happen in place, so make space for second buf. */
+        const int framesize = stream->pre_resample_channels * sizeof (float);
+        const int frames = workbuflen / framesize;
+        resamplebuflen = ((int) ceil(frames * stream->rate_incr)) * framesize;
+        #if DEBUG_AUDIOSTREAM
+        printf("AUDIOSTREAM: will resample %d bytes to %d (ratio=%.6f)\n", workbuflen, resamplebuflen, stream->rate_incr);
+        #endif
+        workbuflen += resamplebuflen;
+    }
+
+    if (stream->cvt_after_resampling.needed) {
+        /* !!! FIXME: buffer might be big enough already? */
+        workbuflen *= stream->cvt_after_resampling.len_mult;
+    }
+
+    workbuflen += neededpaddingbytes;
+
+    #if DEBUG_AUDIOSTREAM
+    printf("AUDIOSTREAM: Putting %d bytes of preconverted audio, need %d byte work buffer\n", buflen, workbuflen);
+    #endif
+
+    workbuf = EnsureStreamBufferSize(stream, workbuflen);
+    if (!workbuf) {
+        return -1;  /* probably out of memory. */
+    }
+
+    resamplebuf = workbuf;  /* default if not resampling. */
+
+    memcpy(workbuf + paddingbytes, buf, buflen);
+
+    if (stream->cvt_before_resampling.needed) {
+        stream->cvt_before_resampling.buf = workbuf + paddingbytes;
+        stream->cvt_before_resampling.len = buflen;
+        if (SDL_ConvertAudio_EX(&stream->cvt_before_resampling) == -1) {
+            return -1;   /* uhoh! */
+        }
+        buflen = stream->cvt_before_resampling.len_cvt;
+
+        #if DEBUG_AUDIOSTREAM
+        printf("AUDIOSTREAM: After initial conversion we have %d bytes\n", buflen);
+        #endif
+    }
+
+    if (stream->dst_rate != stream->src_rate) {
+        /* save off some samples at the end; they are used for padding now so
+           the resampler is coherent and then used at the start of the next
+           put operation. Prepend last put operation's padding, too. */
+
+        /* prepend prior put's padding. :P */
+        if (paddingbytes) {
+            memcpy(workbuf, stream->resampler_padding, paddingbytes);
+            buflen += paddingbytes;
+        }
+
+        /* save off the data at the end for the next run. */
+        memcpy(stream->resampler_padding, workbuf + (buflen - neededpaddingbytes), neededpaddingbytes);
+
+        resamplebuf = workbuf + buflen;  /* skip to second piece of workbuf. */
+        assert(buflen >= neededpaddingbytes);
+        if (buflen > neededpaddingbytes) {
+            buflen = stream->resampler_func(stream, workbuf, buflen - neededpaddingbytes, resamplebuf, resamplebuflen);
+        } else {
+            buflen = 0;
+        }
+
+        #if DEBUG_AUDIOSTREAM
+        printf("AUDIOSTREAM: After resampling we have %d bytes\n", buflen);
+        #endif
+    }
+
+    if (stream->cvt_after_resampling.needed && (buflen > 0)) {
+        stream->cvt_after_resampling.buf = resamplebuf;
+        stream->cvt_after_resampling.len = buflen;
+        if (SDL_ConvertAudio_EX(&stream->cvt_after_resampling) == -1) {
+            return -1;   /* uhoh! */
+        }
+        buflen = stream->cvt_after_resampling.len_cvt;
+
+        #if DEBUG_AUDIOSTREAM
+        printf("AUDIOSTREAM: After final conversion we have %d bytes\n", buflen);
+        #endif
+    }
+
+    #if DEBUG_AUDIOSTREAM
+    printf("AUDIOSTREAM: Final output is %d bytes\n", buflen);
+    #endif
+
+    if (maxputbytes) {
+        const int maxbytes = *maxputbytes;
+        if (buflen > maxbytes)
+            buflen = maxbytes;
+        *maxputbytes -= buflen;
+    }
+
+    /* resamplebuf holds the final output, even if we didn't resample. */
+    return buflen ? SDL_WriteToDataQueue(stream->queue, resamplebuf, buflen) : 0;
+}
+
+int
+SDL_AudioStreamPutEX(SDL_AudioStream *stream, const void *buf, int len)
+{
+    /* !!! FIXME: several converters can take advantage of SIMD, but only
+       !!! FIXME:  if the data is aligned to 16 bytes. EnsureStreamBufferSize()
+       !!! FIXME:  guarantees the buffer will align, but the
+       !!! FIXME:  converters will iterate over the data backwards if
+       !!! FIXME:  the output grows, and this means we won't align if buflen
+       !!! FIXME:  isn't a multiple of 16. In these cases, we should chop off
+       !!! FIXME:  a few samples at the end and convert them separately. */
+
+    #if DEBUG_AUDIOSTREAM
+    printf("AUDIOSTREAM: wants to put %d preconverted bytes\n", buflen);
+    #endif
+
+    if (!stream) {
+        return SDL_PrintError("stream");
+    } else if (!buf) {
+        return SDL_PrintError("buf");
+    } else if (len == 0) {
+        return 0;  /* nothing to do. */
+    } else if ((len % stream->src_sample_frame_size) != 0) {
+        return SDL_PrintError("Can't add partial sample frames");
+    }
+
+    if (!stream->cvt_before_resampling.needed &&
+        (stream->dst_rate == stream->src_rate) &&
+        !stream->cvt_after_resampling.needed) {
+        #if DEBUG_AUDIOSTREAM
+        printf("AUDIOSTREAM: no conversion needed at all, queueing %d bytes.\n", len);
+        #endif
+        return SDL_WriteToDataQueue(stream->queue, buf, len);
+    }
+
+    while (len > 0) {
+        int amount;
+
+        /* If we don't have a staging buffer or we're given enough data that
+           we don't need to store it for later, skip the staging process.
+         */
+        if (!stream->staging_buffer_filled && len >= stream->staging_buffer_size) {
+            return SDL_AudioStreamPutInternal(stream, buf, len, NULL);
+        }
+
+        /* If there's not enough data to fill the staging buffer, just save it */
+        if ((stream->staging_buffer_filled + len) < stream->staging_buffer_size) {
+            memcpy(stream->staging_buffer + stream->staging_buffer_filled, buf, len);
+            stream->staging_buffer_filled += len;
+            return 0;
+        }
+ 
+        /* Fill the staging buffer, process it, and continue */
+        amount = (stream->staging_buffer_size - stream->staging_buffer_filled);
+        assert(amount > 0);
+        memcpy(stream->staging_buffer + stream->staging_buffer_filled, buf, amount);
+        stream->staging_buffer_filled = 0;
+        if (SDL_AudioStreamPutInternal(stream, stream->staging_buffer, stream->staging_buffer_size, NULL) < 0) {
+            return -1;
+        }
+        buf = (void *)((uint8_t *)buf + amount);
+        len -= amount;
+    }
+    return 0;
+}
+
+int SDL_AudioStreamFlushEX(SDL_AudioStream *stream)
+{
+    if (!stream) {
+        return SDL_PrintError("stream");
+    }
+
+    #if DEBUG_AUDIOSTREAM
+    printf("AUDIOSTREAM: flushing! staging_buffer_filled=%d bytes\n", stream->staging_buffer_filled);
+    #endif
+
+    /* shouldn't use a staging buffer if we're not resampling. */
+    assert((stream->dst_rate != stream->src_rate) || (stream->staging_buffer_filled == 0));
+
+    if (stream->staging_buffer_filled > 0) {
+        /* push the staging buffer + silence. We need to flush out not just
+           the staging buffer, but the piece that the stream was saving off
+           for right-side resampler padding. */
+        const bool first_run = stream->first_run;
+        const int filled = stream->staging_buffer_filled;
+        int actual_input_frames = filled / stream->src_sample_frame_size;
+        if (!first_run)
+            actual_input_frames += stream->resampler_padding_samples / stream->pre_resample_channels;
+
+        if (actual_input_frames > 0) {  /* don't bother if nothing to flush. */
+            /* This is how many bytes we're expecting without silence appended. */
+            int flush_remaining = ((int) ceil(actual_input_frames * stream->rate_incr)) * stream->dst_sample_frame_size;
+
+            #if DEBUG_AUDIOSTREAM
+            printf("AUDIOSTREAM: flushing with padding to get max %d bytes!\n", flush_remaining);
+            #endif
+
+            memset(stream->staging_buffer + filled, '\0', stream->staging_buffer_size - filled);
+            if (SDL_AudioStreamPutInternal(stream, stream->staging_buffer, stream->staging_buffer_size, &flush_remaining) < 0) {
+                return -1;
+            }
+
+            /* we have flushed out (or initially filled) the pending right-side
+               resampler padding, but we need to push more silence to guarantee
+               the staging buffer is fully flushed out, too. */
+            memset(stream->staging_buffer, '\0', filled);
+            if (SDL_AudioStreamPutInternal(stream, stream->staging_buffer, stream->staging_buffer_size, &flush_remaining) < 0) {
+                return -1;
+            }
+        }
+    }
+
+    stream->staging_buffer_filled = 0;
+    stream->first_run = true;
+
+    return 0;
+}
+
+/* get converted/resampled data from the stream */
+int SDL_AudioStreamGetEX(SDL_AudioStream *stream, void *buf, int len)
+{
+    #if DEBUG_AUDIOSTREAM
+    printf("AUDIOSTREAM: want to get %d converted bytes\n", len);
+    #endif
+
+    if (!stream) {
+        return SDL_PrintError("stream");
+    } else if (!buf) {
+        return SDL_PrintError("buf");
+    } else if (len <= 0) {
+        return 0;  /* nothing to do. */
+    } else if ((len % stream->dst_sample_frame_size) != 0) {
+        return SDL_PrintError("Can't request partial sample frames");
+    }
+
+    return (int) SDL_ReadFromDataQueue(stream->queue, buf, len);
+}
+
+/* number of converted/resampled bytes available */
+int
+SDL_AudioStreamAvailableEX(SDL_AudioStream *stream)
+{
+    return stream ? (int) SDL_CountDataQueue(stream->queue) : 0;
+}
+
+void
+SDL_AudioStreamClearEX(SDL_AudioStream *stream)
+{
+    if (!stream) {
+        SDL_PrintError("stream");
+    } else {
+        SDL_ClearDataQueue(stream->queue, stream->packetlen * 2);
+        if (stream->reset_resampler_func) {
+            stream->reset_resampler_func(stream);
+        }
+        stream->first_run = true;
+        stream->staging_buffer_filled = 0;
+    }
+}
+
+/* dispose of a stream */
+void
+SDL_FreeAudioStreamEX(SDL_AudioStream *stream)
+{
+    if (stream) {
+        if (stream->cleanup_resampler_func) {
+            stream->cleanup_resampler_func(stream);
+        }
+        SDL_FreeDataQueue(stream->queue);
+        free(stream->staging_buffer);
+        free(stream->work_buffer_base);
+        free(stream->resampler_padding);
+        free(stream);
+    }
+}
diff --git a/sys-tune/source/impl/resamplers/SDL_audioEX.h b/sys-tune/source/impl/resamplers/SDL_audioEX.h
new file mode 100644
index 0000000..1209477
--- /dev/null
+++ b/sys-tune/source/impl/resamplers/SDL_audioEX.h
@@ -0,0 +1,344 @@
+/*
+  Simple DirectMedia Layer
+  Copyright (C) 1997-2020 Sam Lantinga <slouken@libsdl.org>
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+*/
+
+/**
+ *  \file SDL_audio.h
+ *
+ *  Access to the raw audio mixing buffer for the SDL library.
+ */
+
+#ifndef SDL_audio_EX_h_
+#define SDL_audio_EX_h_
+
+#include <stdint.h>
+/* Set up for C function definitions, even when using C++ */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ *  \brief Audio format flags.
+ *
+ *  These are what the 16 bits in SDL_AudioFormat currently mean...
+ *  (Unspecified bits are always zero).
+ *
+ *  \verbatim
+    ++-----------------------sample is signed if set
+    ||
+    ||       ++-----------sample is bigendian if set
+    ||       ||
+    ||       ||          ++---sample is float if set
+    ||       ||          ||
+    ||       ||          || +---sample bit size---+
+    ||       ||          || |                     |
+    15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00
+    \endverbatim
+ *
+ *  There are macros in SDL 2.0 and later to query these bits.
+ */
+typedef uint16_t SDL_AudioFormat;
+
+/**
+ *  \name Audio flags
+ */
+/* @{ */
+
+#define SDL_AUDIO_MASK_BITSIZE       (0xFF)
+#define SDL_AUDIO_MASK_DATATYPE      (1<<8)
+#define SDL_AUDIO_MASK_ENDIAN        (1<<12)
+#define SDL_AUDIO_MASK_SIGNED        (1<<15)
+#define SDL_AUDIO_BITSIZE(x)         (x & SDL_AUDIO_MASK_BITSIZE)
+#define SDL_AUDIO_ISFLOAT(x)         (x & SDL_AUDIO_MASK_DATATYPE)
+#define SDL_AUDIO_ISBIGENDIAN(x)     (x & SDL_AUDIO_MASK_ENDIAN)
+#define SDL_AUDIO_ISSIGNED(x)        (x & SDL_AUDIO_MASK_SIGNED)
+#define SDL_AUDIO_ISINT(x)           (!SDL_AUDIO_ISFLOAT(x))
+#define SDL_AUDIO_ISLITTLEENDIAN(x)  (!SDL_AUDIO_ISBIGENDIAN(x))
+#define SDL_AUDIO_ISUNSIGNED(x)      (!SDL_AUDIO_ISSIGNED(x))
+
+/**
+ *  \name Audio format flags
+ *
+ *  Defaults to LSB byte order.
+ */
+/* @{ */
+#define AUDIO_U8        0x0008  /**< Unsigned 8-bit samples */
+#define AUDIO_S8        0x8008  /**< Signed 8-bit samples */
+#define AUDIO_U16LSB    0x0010  /**< Unsigned 16-bit samples */
+#define AUDIO_S16LSB    0x8010  /**< Signed 16-bit samples */
+#define AUDIO_U16MSB    0x1010  /**< As above, but big-endian byte order */
+#define AUDIO_S16MSB    0x9010  /**< As above, but big-endian byte order */
+#define AUDIO_U16       AUDIO_U16LSB
+#define AUDIO_S16       AUDIO_S16LSB
+/* @} */
+
+/**
+ *  \name int32 support
+ */
+/* @{ */
+#define AUDIO_S32LSB    0x8020  /**< 32-bit integer samples */
+#define AUDIO_S32MSB    0x9020  /**< As above, but big-endian byte order */
+#define AUDIO_S32       AUDIO_S32LSB
+/* @} */
+
+/**
+ *  \name float32 support
+ */
+/* @{ */
+#define AUDIO_F32LSB    0x8120  /**< 32-bit floating point samples */
+#define AUDIO_F32MSB    0x9120  /**< As above, but big-endian byte order */
+#define AUDIO_F32       AUDIO_F32LSB
+/* @} */
+
+/**
+ *  \name Native audio byte ordering
+ */
+/* @{ */
+#if 1
+#define AUDIO_U16SYS    AUDIO_U16LSB
+#define AUDIO_S16SYS    AUDIO_S16LSB
+#define AUDIO_S32SYS    AUDIO_S32LSB
+#define AUDIO_F32SYS    AUDIO_F32LSB
+#else
+#define AUDIO_U16SYS    AUDIO_U16MSB
+#define AUDIO_S16SYS    AUDIO_S16MSB
+#define AUDIO_S32SYS    AUDIO_S32MSB
+#define AUDIO_F32SYS    AUDIO_F32MSB
+#endif
+/* @} */
+
+
+struct SDL_AudioCVT_EX;
+typedef void  (*SDL_AudioFilter_EX) (struct SDL_AudioCVT_EX * cvt,
+                                          SDL_AudioFormat format);
+
+/**
+ *  \brief Upper limit of filters in SDL_AudioCVT_EX
+ *
+ *  The maximum number of SDL_AudioFilter_EX functions in SDL_AudioCVT_EX is
+ *  currently limited to 9. The SDL_AudioCVT_EX.filters array has 10 pointers,
+ *  one of which is the terminating NULL pointer.
+ */
+#define SDL_AUDIOCVT_MAX_FILTERS 9
+
+/**
+ *  \struct SDL_AudioCVT_EX
+ *  \brief A structure to hold a set of audio conversion filters and buffers.
+ *
+ *  Note that various parts of the conversion pipeline can take advantage
+ *  of SIMD operations (like SSE2, for example). SDL_AudioCVT_EX doesn't require
+ *  you to pass it aligned data, but can possibly run much faster if you
+ *  set both its (buf) field to a pointer that is aligned to 16 bytes, and its
+ *  (len) field to something that's a multiple of 16, if possible.
+ */
+#ifdef __GNUC__
+/* This structure is 84 bytes on 32-bit architectures, make sure GCC doesn't
+   pad it out to 88 bytes to guarantee ABI compatibility between compilers.
+   vvv
+   The next time we rev the ABI, make sure to size the ints and add padding.
+*/
+#define SDL_AUDIOCVT_PACKED __attribute__((packed))
+#else
+#define SDL_AUDIOCVT_PACKED
+#endif
+/* */
+typedef struct SDL_AudioCVT_EX
+{
+    int needed;                 /**< Set to 1 if conversion possible */
+    SDL_AudioFormat src_format; /**< Source audio format */
+    SDL_AudioFormat dst_format; /**< Target audio format */
+    double rate_incr;           /**< Rate conversion increment */
+    uint8_t *buf;                 /**< Buffer to hold entire audio data */
+    int len;                    /**< Length of original audio buffer */
+    int len_cvt;                /**< Length of converted audio buffer */
+    int len_mult;               /**< buffer must be len*len_mult big */
+    double len_ratio;           /**< Given len, final size is len*len_ratio */
+    SDL_AudioFilter_EX filters[SDL_AUDIOCVT_MAX_FILTERS + 1]; /**< NULL-terminated list of filter functions */
+    int filter_index;           /**< Current audio conversion function */
+} SDL_AUDIOCVT_PACKED SDL_AudioCVT_EX;
+
+
+/* Function prototypes */
+
+/**
+ *  This function takes a source format and rate and a destination format
+ *  and rate, and initializes the \c cvt structure with information needed
+ *  by SDL_ConvertAudio_EX() to convert a buffer of audio data from one format
+ *  to the other. An unsupported format causes an error and -1 will be returned.
+ *
+ *  \return 0 if no conversion is needed, 1 if the audio filter is set up,
+ *  or -1 on error.
+ */
+int SDL_BuildAudioCVT_EX(SDL_AudioCVT_EX * cvt,
+                                              SDL_AudioFormat src_format,
+                                              uint8_t src_channels,
+                                              int src_rate,
+                                              SDL_AudioFormat dst_format,
+                                              uint8_t dst_channels,
+                                              int dst_rate);
+
+/**
+ *  Once you have initialized the \c cvt structure using SDL_BuildAudioCVT_EX(),
+ *  created an audio buffer \c cvt->buf, and filled it with \c cvt->len bytes of
+ *  audio data in the source format, this function will convert it in-place
+ *  to the desired format.
+ *
+ *  The data conversion may expand the size of the audio data, so the buffer
+ *  \c cvt->buf should be allocated after the \c cvt structure is initialized by
+ *  SDL_BuildAudioCVT_EX(), and should be \c cvt->len*cvt->len_mult bytes long.
+ *
+ *  \return 0 on success or -1 if \c cvt->buf is NULL.
+ */
+int SDL_ConvertAudio_EX(SDL_AudioCVT_EX * cvt);
+
+/* SDL_AudioStream is a new audio conversion interface.
+   The benefits vs SDL_AudioCVT_EX:
+    - it can handle resampling data in chunks without generating
+      artifacts, when it doesn't have the complete buffer available.
+    - it can handle incoming data in any variable size.
+    - You push data as you have it, and pull it when you need it
+ */
+/* this is opaque to the outside world. */
+struct _SDL_AudioStream;
+typedef struct _SDL_AudioStream SDL_AudioStream;
+
+/**
+ *  Create a new audio stream
+ *
+ *  \param src_format The format of the source audio
+ *  \param src_channels The number of channels of the source audio
+ *  \param src_rate The sampling rate of the source audio
+ *  \param dst_format The format of the desired audio output
+ *  \param dst_channels The number of channels of the desired audio output
+ *  \param dst_rate The sampling rate of the desired audio output
+ *  \return 0 on success, or -1 on error.
+ *
+ *  \sa SDL_AudioStreamPutEX
+ *  \sa SDL_AudioStreamGetEX
+ *  \sa SDL_AudioStreamAvailableEX
+ *  \sa SDL_AudioStreamFlushEX
+ *  \sa SDL_AudioStreamClearEX
+ *  \sa SDL_FreeAudioStreamEX
+ */
+SDL_AudioStream * SDL_NewAudioStreamEX(const SDL_AudioFormat src_format,
+                                           const uint8_t src_channels,
+                                           const int src_rate,
+                                           const SDL_AudioFormat dst_format,
+                                           const uint8_t dst_channels,
+                                           const int dst_rate);
+
+/**
+ *  Add data to be converted/resampled to the stream
+ *
+ *  \param stream The stream the audio data is being added to
+ *  \param buf A pointer to the audio data to add
+ *  \param len The number of bytes to write to the stream
+ *  \return 0 on success, or -1 on error.
+ *
+ *  \sa SDL_NewAudioStreamEX
+ *  \sa SDL_AudioStreamGetEX
+ *  \sa SDL_AudioStreamAvailableEX
+ *  \sa SDL_AudioStreamFlushEX
+ *  \sa SDL_AudioStreamClearEX
+ *  \sa SDL_FreeAudioStreamEX
+ */
+int SDL_AudioStreamPutEX(SDL_AudioStream *stream, const void *buf, int len);
+
+/**
+ *  Get converted/resampled data from the stream
+ *
+ *  \param stream The stream the audio is being requested from
+ *  \param buf A buffer to fill with audio data
+ *  \param len The maximum number of bytes to fill
+ *  \return The number of bytes read from the stream, or -1 on error
+ *
+ *  \sa SDL_NewAudioStreamEX
+ *  \sa SDL_AudioStreamPutEX
+ *  \sa SDL_AudioStreamAvailableEX
+ *  \sa SDL_AudioStreamFlushEX
+ *  \sa SDL_AudioStreamClearEX
+ *  \sa SDL_FreeAudioStreamEX
+ */
+int SDL_AudioStreamGetEX(SDL_AudioStream *stream, void *buf, int len);
+
+/**
+ * Get the number of converted/resampled bytes available. The stream may be
+ *  buffering data behind the scenes until it has enough to resample
+ *  correctly, so this number might be lower than what you expect, or even
+ *  be zero. Add more data or flush the stream if you need the data now.
+ *
+ *  \sa SDL_NewAudioStreamEX
+ *  \sa SDL_AudioStreamPutEX
+ *  \sa SDL_AudioStreamGetEX
+ *  \sa SDL_AudioStreamFlushEX
+ *  \sa SDL_AudioStreamClearEX
+ *  \sa SDL_FreeAudioStreamEX
+ */
+int SDL_AudioStreamAvailableEX(SDL_AudioStream *stream);
+
+/**
+ * Tell the stream that you're done sending data, and anything being buffered
+ *  should be converted/resampled and made available immediately.
+ *
+ * It is legal to add more data to a stream after flushing, but there will
+ *  be audio gaps in the output. Generally this is intended to signal the
+ *  end of input, so the complete output becomes available.
+ *
+ *  \sa SDL_NewAudioStreamEX
+ *  \sa SDL_AudioStreamPutEX
+ *  \sa SDL_AudioStreamGetEX
+ *  \sa SDL_AudioStreamAvailableEX
+ *  \sa SDL_AudioStreamClearEX
+ *  \sa SDL_FreeAudioStreamEX
+ */
+int SDL_AudioStreamFlushEX(SDL_AudioStream *stream);
+
+/**
+ *  Clear any pending data in the stream without converting it
+ *
+ *  \sa SDL_NewAudioStreamEX
+ *  \sa SDL_AudioStreamPutEX
+ *  \sa SDL_AudioStreamGetEX
+ *  \sa SDL_AudioStreamAvailableEX
+ *  \sa SDL_AudioStreamFlushEX
+ *  \sa SDL_FreeAudioStreamEX
+ */
+void SDL_AudioStreamClearEX(SDL_AudioStream *stream);
+
+/**
+ * Free an audio stream
+ *
+ *  \sa SDL_NewAudioStreamEX
+ *  \sa SDL_AudioStreamPutEX
+ *  \sa SDL_AudioStreamGetEX
+ *  \sa SDL_AudioStreamAvailableEX
+ *  \sa SDL_AudioStreamFlushEX
+ *  \sa SDL_AudioStreamClearEX
+ */
+void SDL_FreeAudioStreamEX(SDL_AudioStream *stream);
+
+#define SDL_MIX_MAXVOLUME 128
+
+// /* Ends C function definitions when using C++ */
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SDL_audio_EX_h_ */
diff --git a/sys-tune/source/impl/source.cpp b/sys-tune/source/impl/source.cpp
index e8764de..e163025 100644
--- a/sys-tune/source/impl/source.cpp
+++ b/sys-tune/source/impl/source.cpp
@@ -12,7 +12,6 @@
 
 #define DR_MP3_IMPLEMENTATION
 #define DR_MP3_NO_STDIO
-#define DRMP3_DATA_CHUNK_SIZE (1024 * MP3_CHUNK_SIZE_KB)
 #include "dr_mp3.h"
 
 #define DR_WAV_IMPLEMENTATION
@@ -96,6 +95,41 @@ Source::~Source() {
     this->m_size   = 0;
 }
 
+bool Source::SetupResampler(u32 output_channels, u32 output_sample_rate) {
+    m_sdl_stream = UniqueAudioStream{
+        SDL_NewAudioStreamEX(
+        AUDIO_S16, GetChannelCount(), GetSampleRate(),
+        AUDIO_S16, output_channels, output_sample_rate)
+    };
+
+    return m_sdl_stream != nullptr;
+}
+
+s64 Source::Resample(u8* out, std::size_t size) {
+    if (!out || !size) {
+        return -1;
+    }
+
+    s64 data_read = 0;
+    while (size > 0) {
+        if (auto sz = SDL_AudioStreamGetEX(m_sdl_stream.get(), out, size); sz != 0) {
+            size -= sz;
+            out += sz;
+            data_read += sz;
+        } else {
+            const auto dec_got = Decode(m_resample_buffer.size(), m_resample_buffer.data());
+            if (dec_got == 0) {
+                return data_read;
+            }
+            if (0 != SDL_AudioStreamPutEX(m_sdl_stream.get(), m_resample_buffer.data(), dec_got)) {
+                return -1;
+            }
+        }
+    }
+
+    return data_read;
+}
+
 size_t Source::Read(void *buffer, size_t read_size) {
     size_t bytes_read = 0;
     if (R_SUCCEEDED(fsFileRead(&this->m_file, this->m_offset, buffer, read_size, 0, &bytes_read))) {
@@ -145,7 +179,7 @@ class FlacFile final : public Source {
     size_t Decode(size_t sample_count, s16 *data) override {
         std::scoped_lock lk(this->m_mutex);
 
-        return drflac_read_pcm_frames_s16(this->m_flac, sample_count, data);
+        return GetChannelCount() * sizeof(s16) * drflac_read_pcm_frames_s16(this->m_flac, sample_count / GetChannelCount(), data);
     }
 
     std::pair<u32, u32> Tell() override {
@@ -194,7 +228,7 @@ class Mp3File final : public Source {
     size_t Decode(size_t sample_count, s16 *data) override {
         std::scoped_lock lk(this->m_mutex);
 
-        return drmp3_read_pcm_frames_s16(&this->m_mp3, sample_count, data);
+        return GetChannelCount() * sizeof(s16) * drmp3_read_pcm_frames_s16(&this->m_mp3, sample_count / GetChannelCount(), data);
     }
 
     std::pair<u32, u32> Tell() override {
@@ -243,7 +277,7 @@ class WavFile final : public Source {
     size_t Decode(size_t sample_count, s16 *data) override {
         std::scoped_lock lk(this->m_mutex);
 
-        return drwav_read_pcm_frames_s16(&this->m_wav, sample_count, data);
+        return GetChannelCount() * sizeof(s16) * drwav_read_pcm_frames_s16(&this->m_wav, sample_count / GetChannelCount(), data);
     }
 
     std::pair<u32, u32> Tell() override {
diff --git a/sys-tune/source/impl/source.hpp b/sys-tune/source/impl/source.hpp
index f3e8f49..a878586 100644
--- a/sys-tune/source/impl/source.hpp
+++ b/sys-tune/source/impl/source.hpp
@@ -2,9 +2,7 @@
 
 #include <nxExt.h>
 #include <memory>
-
-// number of kb to allocate for mp3 chunk
-#define MP3_CHUNK_SIZE_KB 96
+#include "resamplers/SDL_audioEX.h"
 
 class Source {
   private:
@@ -13,12 +11,33 @@ class Source {
     s64 m_size = 0;
 
   protected:
+    // SOURCE: https://dev.krzaq.cc/post/you-dont-need-a-stateful-deleter-in-your-unique_ptr-usually/
+    // used for deconstructor for smart pointers.
+    template<auto func>
+    struct FunctionCaller {
+        template<typename... Us>
+        auto operator()(Us&&... us) const {
+            return func(std::forward<Us...>(us...));
+        }
+    };
+    template<auto func>
+    using Deleter = FunctionCaller<func>;
+
+  protected:
+    std::array<s16, 512> m_resample_buffer;
     LockableMutex m_mutex;
 
+  private:
+    using UniqueAudioStream = std::unique_ptr<SDL_AudioStream, Deleter<&SDL_FreeAudioStreamEX>>;
+    UniqueAudioStream m_sdl_stream{nullptr};
+
   public:
     Source(FsFile &&file);
     virtual ~Source();
 
+    bool SetupResampler(u32 output_channels, u32 output_sample_rate);
+    s64 Resample(u8* out, std::size_t size);
+
     size_t Read(void *buffer, size_t read_size);
     bool Seek(int offset, bool set);
 
diff --git a/sys-tune/source/main.cpp b/sys-tune/source/main.cpp
index 8e40869..35d0fe4 100644
--- a/sys-tune/source/main.cpp
+++ b/sys-tune/source/main.cpp
@@ -10,18 +10,10 @@ extern "C" {
 u32 __nx_applet_type     = AppletType_None;
 u32 __nx_fs_num_sessions = 1;
 
-// do not decrease this, will either cause fatal or will fail to start
-// - 1024 * 216: needed for sys-tune to boot
-// - 1024 * 236: base
-// - 1024 * 268: needed for mp3 playback (at 32kb)
-// - 1024 * 300: needed for mp3 playback (at 64kb)
-// - 1024 * 332: needed for mp3 playback (at 96kb)
-// - 1024 * 364: needed for closing / reopening audrv and audren (mp3 at 0kb)
-// - 1024 * 460: needed for closing / reopening audrv and audren (mp3 at 96kb)
-#define INNER_HEAP_SIZE 1024 * (364 + MP3_CHUNK_SIZE_KB)
-
+// TODO(TJ): calculate minimum heap
+// TODO(TJ): calculate reasonable amount of heap for playlist entries.
 void __libnx_initheap(void) {
-    static char inner_heap[INNER_HEAP_SIZE];
+    static char inner_heap[1024 * 200]; // 256 works but will run out with big playlists.
     extern char *fake_heap_start;
     extern char *fake_heap_end;
 
@@ -46,7 +38,6 @@ void __appInit() {
     R_ABORT_UNLESS(audWrapperInitialize());
     R_ABORT_UNLESS(pm::Initialize());
     R_ABORT_UNLESS(sdmc::Open());
-    smExit();
 }
 
 void __appExit(void) {
@@ -56,6 +47,7 @@ void __appExit(void) {
     fsExit();
     pscmExit();
     gpioExit();
+    smExit();
 }
 
 } // extern "C"
@@ -70,13 +62,10 @@ namespace {
 }
 
 int main(int argc, char *argv[]) {
-    std::vector<tune::impl::PlaylistEntry> playlist;
-    std::vector<tune::impl::PlaylistID> shuffle;
-    tune::impl::PlaylistEntry current;
-    R_ABORT_UNLESS(tune::impl::Initialize(&playlist, &shuffle, &current));
+    R_ABORT_UNLESS(tune::impl::Initialize());
 
     /* Register audio as our dependency so we can pause before it prepares for sleep. */
-    constexpr const u32 dependencies[] = { PscPmModuleId_Audio };
+    constexpr const u32 dependencies[] = { PscPmModuleId_Fs, PscPmModuleId_Audio };
 
     /* Get pm module to listen for state change. */
     PscPmModule pm_module;

From f134fe24e233142a04a40b76494d072aaad8dc1a Mon Sep 17 00:00:00 2001
From: ITotalJustice <47043333+ITotalJustice@users.noreply.github.com>
Date: Tue, 17 Dec 2024 19:22:47 +0000
Subject: [PATCH 02/19] add option to clear playlist queue, increase max
 playlist size to 512

---
 overlay/source/gui_playlist.cpp       | 20 ++++++++++++++------
 sys-tune/source/impl/music_player.cpp |  9 ++++++++-
 sys-tune/source/main.cpp              |  2 +-
 3 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/overlay/source/gui_playlist.cpp b/overlay/source/gui_playlist.cpp
index f701ff8..563ae19 100644
--- a/overlay/source/gui_playlist.cpp
+++ b/overlay/source/gui_playlist.cpp
@@ -65,6 +65,8 @@ PlaylistGui::PlaylistGui() {
         return;
     }
 
+    m_list->addItem(new tsl::elm::CategoryHeader("\uE0E2  To remove all", true));
+
     char path[FS_MAX_PATH];
     for (u32 i = 0; i < count; i++) {
         rc = tuneGetPlaylistItem(i, path, FS_MAX_PATH);
@@ -83,12 +85,11 @@ PlaylistGui::PlaylistGui() {
         auto item = new ButtonListItem(str, "\uE098");
         item->setClickListener([this, item](u64 keys) -> bool {
             u32 index  = this->m_list->getIndexInList(item);
-            u8 counter = 0;
             if (keys & HidNpadButton_A) {
                 tuneSelect(index);
-                counter++;
+                return true;
             }
-            if (keys & HidNpadButton_Y) {
+            else if (keys & HidNpadButton_Y) {
                 if (R_SUCCEEDED(tuneRemove(index))) {
                     this->removeFocus();
                     this->m_list->removeIndex(index);
@@ -102,10 +103,17 @@ PlaylistGui::PlaylistGui() {
                         this->m_list->setFocusedIndex(index - 1);
                     }
                 }
-
-                counter++;
+                return true;
+            }
+            else if (keys & HidNpadButton_X) {
+                if (R_SUCCEEDED(tuneClearQueue())) {
+                    this->removeFocus();
+                    this->m_list->clear();
+                    m_list->addItem(new tsl::elm::ListItem("Playlist empty."));
+                }
+                return true;
             }
-            return counter;
+            return false;
         });
         m_list->addItem(item);
     }
diff --git a/sys-tune/source/impl/music_player.cpp b/sys-tune/source/impl/music_player.cpp
index 415a1be..4c8a8f6 100644
--- a/sys-tune/source/impl/music_player.cpp
+++ b/sys-tune/source/impl/music_player.cpp
@@ -16,7 +16,7 @@ namespace tune::impl {
 
     namespace {
         constexpr float VOLUME_MAX = 1.f;
-        constexpr auto PLAYLIST_ENTRY_MAX = 256; // 64k
+        constexpr auto PLAYLIST_ENTRY_MAX = 512; // 128k
 
         struct PlayListEntry2 {
         public:
@@ -79,6 +79,12 @@ namespace tune::impl {
             const char* GetPath(u32 index) {
                 return m_entries[index].m_path;
             }
+
+            void Clear() {
+                for (u32 i = 0; i < m_entries.size(); i++) {
+                    m_entries[i].Remove();
+                }
+            }
         };
 
         PlayList g_playlist2;
@@ -524,6 +530,7 @@ namespace tune::impl {
 
             g_playlist.clear();
             g_shuffle_playlist.clear();
+            g_playlist2.Clear();
         }
         g_status = PlayerStatus::FetchNext;
     }
diff --git a/sys-tune/source/main.cpp b/sys-tune/source/main.cpp
index 35d0fe4..743d08b 100644
--- a/sys-tune/source/main.cpp
+++ b/sys-tune/source/main.cpp
@@ -13,7 +13,7 @@ u32 __nx_fs_num_sessions = 1;
 // TODO(TJ): calculate minimum heap
 // TODO(TJ): calculate reasonable amount of heap for playlist entries.
 void __libnx_initheap(void) {
-    static char inner_heap[1024 * 200]; // 256 works but will run out with big playlists.
+    static char inner_heap[1024 * 200];
     extern char *fake_heap_start;
     extern char *fake_heap_end;
 

From 5e5c1cf2fde9b6dc645107f57874eb0811e79a33 Mon Sep 17 00:00:00 2001
From: ITotalJustice <47043333+ITotalJustice@users.noreply.github.com>
Date: Tue, 17 Dec 2024 19:42:05 +0000
Subject: [PATCH 03/19] bump all dr_libs to latest commit.

---
 sys-tune/source/impl/dr_flac.h |  973 ++++--
 sys-tune/source/impl/dr_mp3.h  |  488 +++-
 sys-tune/source/impl/dr_wav.h  | 5031 ++++++++++++++++++++++++--------
 3 files changed, 4908 insertions(+), 1584 deletions(-)

diff --git a/sys-tune/source/impl/dr_flac.h b/sys-tune/source/impl/dr_flac.h
index 53a48c9..3a47251 100644
--- a/sys-tune/source/impl/dr_flac.h
+++ b/sys-tune/source/impl/dr_flac.h
@@ -1,6 +1,6 @@
 /*
 FLAC audio decoder. Choice of public domain or MIT-0. See license statements at the end of this file.
-dr_flac - v0.12.13 - 2020-05-16
+dr_flac - v0.12.43 - 2024-12-17
 
 David Reid - mackron@gmail.com
 
@@ -166,7 +166,7 @@ If you just want to quickly decode an entire FLAC file in one go you can do some
 
     ...
 
-    drflac_free(pSampleData);
+    drflac_free(pSampleData, NULL);
     ```
 
 You can read samples as signed 16-bit integer and 32-bit floating-point PCM with the *_s16() and *_f32() family of APIs respectively, but note that these
@@ -179,7 +179,7 @@ reports metadata to the application through the use of a callback, and every met
 
 The main opening APIs (`drflac_open()`, etc.) will fail if the header is not present. The presents a problem in certain scenarios such as broadcast style
 streams or internet radio where the header may not be present because the user has started playback mid-stream. To handle this, use the relaxed APIs:
-    
+
     `drflac_open_relaxed()`
     `drflac_open_with_metadata_relaxed()`
 
@@ -210,6 +210,9 @@ Build Options
 #define DR_FLAC_NO_SIMD
   Disables SIMD optimizations (SSE on x86/x64 architectures, NEON on ARM architectures). Use this if you are having compatibility issues with your compiler.
 
+#define DR_FLAC_NO_WCHAR
+  Disables all functions ending with `_w`. Use this if your compiler does not provide wchar.h. Not required if DR_FLAC_NO_STDIO is also defined.
+
 
 
 Notes
@@ -232,46 +235,47 @@ extern "C" {
 
 #define DRFLAC_VERSION_MAJOR     0
 #define DRFLAC_VERSION_MINOR     12
-#define DRFLAC_VERSION_REVISION  13
+#define DRFLAC_VERSION_REVISION  43
 #define DRFLAC_VERSION_STRING    DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MAJOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MINOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_REVISION)
 
 #include <stddef.h> /* For size_t. */
 
-/* Sized types. Prefer built-in types. Fall back to stdint. */
-#ifdef _MSC_VER
-    #if defined(__clang__)
+/* Sized Types */
+typedef   signed char           drflac_int8;
+typedef unsigned char           drflac_uint8;
+typedef   signed short          drflac_int16;
+typedef unsigned short          drflac_uint16;
+typedef   signed int            drflac_int32;
+typedef unsigned int            drflac_uint32;
+#if defined(_MSC_VER) && !defined(__clang__)
+    typedef   signed __int64    drflac_int64;
+    typedef unsigned __int64    drflac_uint64;
+#else
+    #if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)))
         #pragma GCC diagnostic push
-        #pragma GCC diagnostic ignored "-Wlanguage-extension-token"
-        #pragma GCC diagnostic ignored "-Wlong-long"        
-        #pragma GCC diagnostic ignored "-Wc++11-long-long"
-    #endif
-    typedef   signed __int8  drflac_int8;
-    typedef unsigned __int8  drflac_uint8;
-    typedef   signed __int16 drflac_int16;
-    typedef unsigned __int16 drflac_uint16;
-    typedef   signed __int32 drflac_int32;
-    typedef unsigned __int32 drflac_uint32;
-    typedef   signed __int64 drflac_int64;
-    typedef unsigned __int64 drflac_uint64;
-    #if defined(__clang__)
+        #pragma GCC diagnostic ignored "-Wlong-long"
+        #if defined(__clang__)
+            #pragma GCC diagnostic ignored "-Wc++11-long-long"
+        #endif
+    #endif
+    typedef   signed long long  drflac_int64;
+    typedef unsigned long long  drflac_uint64;
+    #if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)))
         #pragma GCC diagnostic pop
     #endif
+#endif
+#if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__)) || defined(_M_X64) || defined(__ia64) || defined(_M_IA64) || defined(__aarch64__) || defined(_M_ARM64) || defined(__powerpc64__)
+    typedef drflac_uint64       drflac_uintptr;
 #else
-    #include <stdint.h>
-    typedef int8_t           drflac_int8;
-    typedef uint8_t          drflac_uint8;
-    typedef int16_t          drflac_int16;
-    typedef uint16_t         drflac_uint16;
-    typedef int32_t          drflac_int32;
-    typedef uint32_t         drflac_uint32;
-    typedef int64_t          drflac_int64;
-    typedef uint64_t         drflac_uint64;
-#endif
-typedef drflac_uint8         drflac_bool8;
-typedef drflac_uint32        drflac_bool32;
-#define DRFLAC_TRUE          1
-#define DRFLAC_FALSE         0
+    typedef drflac_uint32       drflac_uintptr;
+#endif
+typedef drflac_uint8            drflac_bool8;
+typedef drflac_uint32           drflac_bool32;
+#define DRFLAC_TRUE             1
+#define DRFLAC_FALSE            0
+/* End Sized Types */
 
+/* Decorations */
 #if !defined(DRFLAC_API)
     #if defined(DRFLAC_DLL)
         #if defined(_WIN32)
@@ -301,6 +305,7 @@ typedef drflac_uint32        drflac_bool32;
         #define DRFLAC_PRIVATE static
     #endif
 #endif
+/* End Decorations */
 
 #if defined(_MSC_VER) && _MSC_VER >= 1700   /* Visual Studio 2012 */
     #define DRFLAC_DEPRECATED       __declspec(deprecated)
@@ -317,7 +322,17 @@ typedef drflac_uint32        drflac_bool32;
 #endif
 
 DRFLAC_API void drflac_version(drflac_uint32* pMajor, drflac_uint32* pMinor, drflac_uint32* pRevision);
-DRFLAC_API const char* drflac_version_string();
+DRFLAC_API const char* drflac_version_string(void);
+
+/* Allocation Callbacks */
+typedef struct
+{
+    void* pUserData;
+    void* (* onMalloc)(size_t sz, void* pUserData);
+    void* (* onRealloc)(void* p, size_t sz, void* pUserData);
+    void  (* onFree)(void* p, void* pUserData);
+} drflac_allocation_callbacks;
+/* End Allocation Callbacks */
 
 /*
 As data is read from the client it is placed into an internal buffer for fast access. This controls the size of that buffer. Larger values means more speed,
@@ -327,11 +342,22 @@ but also more memory. In my testing there is diminishing returns after about 4KB
 #define DR_FLAC_BUFFER_SIZE   4096
 #endif
 
-/* Check if we can enable 64-bit optimizations. */
+
+/* Architecture Detection */
 #if defined(_WIN64) || defined(_LP64) || defined(__LP64__)
 #define DRFLAC_64BIT
 #endif
 
+#if defined(__x86_64__) || (defined(_M_X64) && !defined(_M_ARM64EC))
+    #define DRFLAC_X64
+#elif defined(__i386) || defined(_M_IX86)
+    #define DRFLAC_X86
+#elif defined(__arm__) || defined(_M_ARM) || defined(__arm64) || defined(__arm64__) || defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
+    #define DRFLAC_ARM
+#endif
+/* End Architecture Detection */
+
+
 #ifdef DRFLAC_64BIT
 typedef drflac_uint64 drflac_cache_t;
 #else
@@ -384,15 +410,13 @@ typedef enum
     drflac_seek_origin_current
 } drflac_seek_origin;
 
-/* Packing is important on this structure because we map this directly to the raw data within the SEEKTABLE metadata block. */
-#pragma pack(2)
+/* The order of members in this structure is important because we map this directly to the raw data within the SEEKTABLE metadata block. */
 typedef struct
 {
     drflac_uint64 firstPCMFrame;
     drflac_uint64 flacFrameOffset;   /* The offset from the first byte of the header of the first frame. */
     drflac_uint16 pcmFrameCount;
 } drflac_seekpoint;
-#pragma pack()
 
 typedef struct
 {
@@ -409,7 +433,10 @@ typedef struct
 
 typedef struct
 {
-    /* The metadata type. Use this to know how to interpret the data below. */
+    /*
+    The metadata type. Use this to know how to interpret the data below. Will be set to one of the
+    DRFLAC_METADATA_BLOCK_TYPE_* tokens.
+    */
     drflac_uint32 type;
 
     /*
@@ -553,19 +580,12 @@ pMetadata (in)
 
 Remarks
 -------
-Use pMetadata->type to determine which metadata block is being handled and how to read the data.
+Use pMetadata->type to determine which metadata block is being handled and how to read the data. This
+will be set to one of the DRFLAC_METADATA_BLOCK_TYPE_* tokens.
 */
 typedef void (* drflac_meta_proc)(void* pUserData, drflac_metadata* pMetadata);
 
 
-typedef struct
-{
-    void* pUserData;
-    void* (* onMalloc)(size_t sz, void* pUserData);
-    void* (* onRealloc)(void* p, size_t sz, void* pUserData);
-    void  (* onFree)(void* p, void* pUserData);
-} drflac_allocation_callbacks;
-
 /* Structure for internal use. Only used for decoders opened with drflac_open_memory. */
 typedef struct
 {
@@ -798,6 +818,8 @@ from a block of memory respectively.
 
 The STREAMINFO block must be present for this to succeed. Use `drflac_open_relaxed()` to open a FLAC stream where the header may not be present.
 
+Use `drflac_open_with_metadata()` if you need access to metadata.
+
 
 Seek Also
 ---------
@@ -844,6 +866,8 @@ as that is for internal use only.
 
 Opening in relaxed mode will continue reading data from onRead until it finds a valid frame. If a frame is never found it will continue forever. To abort,
 force your `onRead` callback to return 0, which dr_flac will use as an indicator that the end of the stream was found.
+
+Use `drflac_open_with_metadata_relaxed()` if you need access to metadata.
 */
 DRFLAC_API drflac* drflac_open_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
 
@@ -883,7 +907,9 @@ Close the decoder with `drflac_close()`.
 This is slower than `drflac_open()`, so avoid this one if you don't need metadata. Internally, this will allocate and free memory on the heap for every
 metadata block except for STREAMINFO and PADDING blocks.
 
-The caller is notified of the metadata via the `onMeta` callback. All metadata blocks will be handled before the function returns.
+The caller is notified of the metadata via the `onMeta` callback. All metadata blocks will be handled before the function returns. This callback takes a
+pointer to a `drflac_metadata` object which is a union containing the data of all relevant metadata blocks. Use the `type` member to discriminate against
+the different metadata types.
 
 The STREAMINFO block must be present for this to succeed. Use `drflac_open_with_metadata_relaxed()` to open a FLAC stream where the header may not be present.
 
@@ -1271,15 +1297,13 @@ typedef struct
     const char* pRunningData;
 } drflac_cuesheet_track_iterator;
 
-/* Packing is important on this structure because we map this directly to the raw data within the CUESHEET metadata block. */
-#pragma pack(4)
+/* The order of members here is important because we map this directly to the raw data within the CUESHEET metadata block. */
 typedef struct
 {
     drflac_uint64 offset;
     drflac_uint8 index;
     drflac_uint8 reserved[3];
 } drflac_cuesheet_track_index;
-#pragma pack()
 
 typedef struct
 {
@@ -1316,9 +1340,11 @@ DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterat
  ************************************************************************************************************************************************************
  ************************************************************************************************************************************************************/
 #if defined(DR_FLAC_IMPLEMENTATION) || defined(DRFLAC_IMPLEMENTATION)
+#ifndef dr_flac_c
+#define dr_flac_c
 
 /* Disable some annoying warnings. */
-#if defined(__GNUC__)
+#if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)))
     #pragma GCC diagnostic push
     #if __GNUC__ >= 7
     #pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
@@ -1329,6 +1355,9 @@ DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterat
     #ifndef _BSD_SOURCE
         #define _BSD_SOURCE
     #endif
+    #ifndef _DEFAULT_SOURCE
+        #define _DEFAULT_SOURCE
+    #endif
     #ifndef __USE_BSD
         #define __USE_BSD
     #endif
@@ -1338,6 +1367,7 @@ DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterat
 #include <stdlib.h>
 #include <string.h>
 
+/* Inline */
 #ifdef _MSC_VER
     #define DRFLAC_INLINE __forceinline
 #elif defined(__GNUC__)
@@ -1349,24 +1379,32 @@ DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterat
     I am using "__inline__" only when we're compiling in strict ANSI mode.
     */
     #if defined(__STRICT_ANSI__)
-        #define DRFLAC_INLINE __inline__ __attribute__((always_inline))
+        #define DRFLAC_GNUC_INLINE_HINT __inline__
     #else
-        #define DRFLAC_INLINE inline __attribute__((always_inline))
+        #define DRFLAC_GNUC_INLINE_HINT inline
     #endif
+
+    #if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 2)) || defined(__clang__)
+        #define DRFLAC_INLINE DRFLAC_GNUC_INLINE_HINT __attribute__((always_inline))
+    #else
+        #define DRFLAC_INLINE DRFLAC_GNUC_INLINE_HINT
+    #endif
+#elif defined(__WATCOMC__)
+    #define DRFLAC_INLINE __inline
 #else
     #define DRFLAC_INLINE
 #endif
+/* End Inline */
 
-/* CPU architecture. */
-#if defined(__x86_64__) || defined(_M_X64)
-    #define DRFLAC_X64
-#elif defined(__i386) || defined(_M_IX86)
-    #define DRFLAC_X86
-#elif defined(__arm__) || defined(_M_ARM)
-    #define DRFLAC_ARM
-#endif
+/*
+Intrinsics Support
+
+There's a bug in GCC 4.2.x which results in an incorrect compilation error when using _mm_slli_epi32() where it complains with
 
-/* Intrinsics Support */
+    "error: shift must be an immediate"
+
+Unfortuantely dr_flac depends on this for a few things so we're just going to disable SSE on GCC 4.2 and below.
+*/
 #if !defined(DR_FLAC_NO_SIMD)
     #if defined(DRFLAC_X64) || defined(DRFLAC_X86)
         #if defined(_MSC_VER) && !defined(__clang__)
@@ -1377,7 +1415,7 @@ DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterat
             #if _MSC_VER >= 1600 && !defined(DRFLAC_NO_SSE41)   /* 2010 */
                 #define DRFLAC_SUPPORT_SSE41
             #endif
-        #else
+        #elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)))
             /* Assume GNUC-style. */
             #if defined(__SSE2__) && !defined(DRFLAC_NO_SSE2)
                 #define DRFLAC_SUPPORT_SSE2
@@ -1407,16 +1445,6 @@ DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterat
     #if defined(DRFLAC_ARM)
         #if !defined(DRFLAC_NO_NEON) && (defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64))
             #define DRFLAC_SUPPORT_NEON
-        #endif
-
-        /* Fall back to looking for the #include file. */
-        #if !defined(__GNUC__) && !defined(__clang__) && defined(__has_include)
-            #if !defined(DRFLAC_SUPPORT_NEON) && !defined(DRFLAC_NO_NEON) && __has_include(<arm_neon.h>)
-                #define DRFLAC_SUPPORT_NEON
-            #endif
-        #endif
-
-        #if defined(DRFLAC_SUPPORT_NEON)
             #include <arm_neon.h>
         #endif
     #endif
@@ -1495,9 +1523,7 @@ static DRFLAC_INLINE drflac_bool32 drflac_has_sse41(void)
 {
 #if defined(DRFLAC_SUPPORT_SSE41)
     #if (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(DRFLAC_NO_SSE41)
-        #if defined(DRFLAC_X64)
-            return DRFLAC_TRUE;    /* 64-bit targets always support SSE4.1. */
-        #elif (defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(__SSE4_1__)
+        #if defined(__SSE4_1__) || defined(__AVX__)
             return DRFLAC_TRUE;    /* If the compiler is allowed to freely generate SSE41 code we can assume support. */
         #else
             #if defined(DRFLAC_NO_CPUID)
@@ -1517,7 +1543,7 @@ static DRFLAC_INLINE drflac_bool32 drflac_has_sse41(void)
 }
 
 
-#if defined(_MSC_VER) && _MSC_VER >= 1500 && (defined(DRFLAC_X86) || defined(DRFLAC_X64))
+#if defined(_MSC_VER) && _MSC_VER >= 1500 && (defined(DRFLAC_X86) || defined(DRFLAC_X64)) && !defined(__clang__)
     #define DRFLAC_HAS_LZCNT_INTRINSIC
 #elif (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)))
     #define DRFLAC_HAS_LZCNT_INTRINSIC
@@ -1529,7 +1555,7 @@ static DRFLAC_INLINE drflac_bool32 drflac_has_sse41(void)
     #endif
 #endif
 
-#if defined(_MSC_VER) && _MSC_VER >= 1400
+#if defined(_MSC_VER) && _MSC_VER >= 1400 && !defined(__clang__)
     #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
     #define DRFLAC_HAS_BYTESWAP32_INTRINSIC
     #define DRFLAC_HAS_BYTESWAP64_INTRINSIC
@@ -1553,6 +1579,30 @@ static DRFLAC_INLINE drflac_bool32 drflac_has_sse41(void)
     #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
         #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
     #endif
+#elif defined(__WATCOMC__) && defined(__386__)
+    #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
+    #define DRFLAC_HAS_BYTESWAP32_INTRINSIC
+    #define DRFLAC_HAS_BYTESWAP64_INTRINSIC
+    extern __inline drflac_uint16 _watcom_bswap16(drflac_uint16);
+    extern __inline drflac_uint32 _watcom_bswap32(drflac_uint32);
+    extern __inline drflac_uint64 _watcom_bswap64(drflac_uint64);
+#pragma aux _watcom_bswap16 = \
+    "xchg al, ah" \
+    parm  [ax]    \
+    value [ax]    \
+    modify nomemory;
+#pragma aux _watcom_bswap32 = \
+    "bswap eax" \
+    parm  [eax] \
+    value [eax] \
+    modify nomemory;
+#pragma aux _watcom_bswap64 = \
+    "bswap eax"     \
+    "bswap edx"     \
+    "xchg eax,edx"  \
+    parm [eax edx]  \
+    value [eax edx] \
+    modify nomemory;
 #endif
 
 
@@ -1582,6 +1632,7 @@ static DRFLAC_INLINE drflac_bool32 drflac_has_sse41(void)
 
 #define DRFLAC_MAX_SIMD_VECTOR_SIZE                     64  /* 64 for AVX-512 in the future. */
 
+/* Result Codes */
 typedef drflac_int32 drflac_result;
 #define DRFLAC_SUCCESS                                   0
 #define DRFLAC_ERROR                                    -1   /* A generic error. */
@@ -1637,7 +1688,10 @@ typedef drflac_int32 drflac_result;
 #define DRFLAC_CANCELLED                                -51
 #define DRFLAC_MEMORY_ALREADY_MAPPED                    -52
 #define DRFLAC_AT_END                                   -53
-#define DRFLAC_CRC_MISMATCH                             -128
+
+#define DRFLAC_CRC_MISMATCH                             -100
+/* End Result Codes */
+
 
 #define DRFLAC_SUBFRAME_CONSTANT                        0
 #define DRFLAC_SUBFRAME_VERBATIM                        1
@@ -1653,6 +1707,10 @@ typedef drflac_int32 drflac_result;
 #define DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE            9
 #define DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE              10
 
+#define DRFLAC_SEEKPOINT_SIZE_IN_BYTES                  18
+#define DRFLAC_CUESHEET_TRACK_SIZE_IN_BYTES             36
+#define DRFLAC_CUESHEET_TRACK_INDEX_SIZE_IN_BYTES       12
+
 #define drflac_align(x, a)                              ((((x) + (a) - 1) / (a)) * (a))
 
 
@@ -1671,7 +1729,7 @@ DRFLAC_API void drflac_version(drflac_uint32* pMajor, drflac_uint32* pMinor, drf
     }
 }
 
-DRFLAC_API const char* drflac_version_string()
+DRFLAC_API const char* drflac_version_string(void)
 {
     return DRFLAC_VERSION_STRING;
 }
@@ -1772,10 +1830,12 @@ static DRFLAC_INLINE drflac_bool32 drflac__is_little_endian(void)
 static DRFLAC_INLINE drflac_uint16 drflac__swap_endian_uint16(drflac_uint16 n)
 {
 #ifdef DRFLAC_HAS_BYTESWAP16_INTRINSIC
-    #if defined(_MSC_VER)
+    #if defined(_MSC_VER) && !defined(__clang__)
         return _byteswap_ushort(n);
     #elif defined(__GNUC__) || defined(__clang__)
         return __builtin_bswap16(n);
+    #elif defined(__WATCOMC__) && defined(__386__)
+        return _watcom_bswap16(n);
     #else
         #error "This compiler does not support the byte swap intrinsic."
     #endif
@@ -1788,10 +1848,10 @@ static DRFLAC_INLINE drflac_uint16 drflac__swap_endian_uint16(drflac_uint16 n)
 static DRFLAC_INLINE drflac_uint32 drflac__swap_endian_uint32(drflac_uint32 n)
 {
 #ifdef DRFLAC_HAS_BYTESWAP32_INTRINSIC
-    #if defined(_MSC_VER)
+    #if defined(_MSC_VER) && !defined(__clang__)
         return _byteswap_ulong(n);
     #elif defined(__GNUC__) || defined(__clang__)
-        #if defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 6) && !defined(DRFLAC_64BIT)   /* <-- 64-bit inline assembly has not been tested, so disabling for now. */
+        #if defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 6) && !defined(__ARM_ARCH_6M__) && !defined(DRFLAC_64BIT)   /* <-- 64-bit inline assembly has not been tested, so disabling for now. */
             /* Inline assembly optimized implementation for ARM. In my testing, GCC does not generate optimized code with __builtin_bswap32(). */
             drflac_uint32 r;
             __asm__ __volatile__ (
@@ -1805,6 +1865,8 @@ static DRFLAC_INLINE drflac_uint32 drflac__swap_endian_uint32(drflac_uint32 n)
         #else
             return __builtin_bswap32(n);
         #endif
+    #elif defined(__WATCOMC__) && defined(__386__)
+        return _watcom_bswap32(n);
     #else
         #error "This compiler does not support the byte swap intrinsic."
     #endif
@@ -1819,22 +1881,25 @@ static DRFLAC_INLINE drflac_uint32 drflac__swap_endian_uint32(drflac_uint32 n)
 static DRFLAC_INLINE drflac_uint64 drflac__swap_endian_uint64(drflac_uint64 n)
 {
 #ifdef DRFLAC_HAS_BYTESWAP64_INTRINSIC
-    #if defined(_MSC_VER)
+    #if defined(_MSC_VER) && !defined(__clang__)
         return _byteswap_uint64(n);
     #elif defined(__GNUC__) || defined(__clang__)
         return __builtin_bswap64(n);
+    #elif defined(__WATCOMC__) && defined(__386__)
+        return _watcom_bswap64(n);
     #else
         #error "This compiler does not support the byte swap intrinsic."
     #endif
 #else
-    return ((n & (drflac_uint64)0xFF00000000000000) >> 56) |
-           ((n & (drflac_uint64)0x00FF000000000000) >> 40) |
-           ((n & (drflac_uint64)0x0000FF0000000000) >> 24) |
-           ((n & (drflac_uint64)0x000000FF00000000) >>  8) |
-           ((n & (drflac_uint64)0x00000000FF000000) <<  8) |
-           ((n & (drflac_uint64)0x0000000000FF0000) << 24) |
-           ((n & (drflac_uint64)0x000000000000FF00) << 40) |
-           ((n & (drflac_uint64)0x00000000000000FF) << 56);
+    /* Weird "<< 32" bitshift is required for C89 because it doesn't support 64-bit constants. Should be optimized out by a good compiler. */
+    return ((n & ((drflac_uint64)0xFF000000 << 32)) >> 56) |
+           ((n & ((drflac_uint64)0x00FF0000 << 32)) >> 40) |
+           ((n & ((drflac_uint64)0x0000FF00 << 32)) >> 24) |
+           ((n & ((drflac_uint64)0x000000FF << 32)) >>  8) |
+           ((n & ((drflac_uint64)0xFF000000      )) <<  8) |
+           ((n & ((drflac_uint64)0x00FF0000      )) << 24) |
+           ((n & ((drflac_uint64)0x0000FF00      )) << 40) |
+           ((n & ((drflac_uint64)0x000000FF      )) << 56);
 #endif
 }
 
@@ -1857,6 +1922,12 @@ static DRFLAC_INLINE drflac_uint32 drflac__be2host_32(drflac_uint32 n)
     return n;
 }
 
+static DRFLAC_INLINE drflac_uint32 drflac__be2host_32_ptr_unaligned(const void* pData)
+{
+    const drflac_uint8* pNum = (drflac_uint8*)pData;
+    return *(pNum) << 24 | *(pNum+1) << 16 | *(pNum+2) << 8 | *(pNum+3);
+}
+
 static DRFLAC_INLINE drflac_uint64 drflac__be2host_64(drflac_uint64 n)
 {
     if (drflac__is_little_endian()) {
@@ -1876,6 +1947,12 @@ static DRFLAC_INLINE drflac_uint32 drflac__le2host_32(drflac_uint32 n)
     return n;
 }
 
+static DRFLAC_INLINE drflac_uint32 drflac__le2host_32_ptr_unaligned(const void* pData)
+{
+    const drflac_uint8* pNum = (drflac_uint8*)pData;
+    return *pNum | *(pNum+1) << 8 |  *(pNum+2) << 16 | *(pNum+3) << 24;
+}
+
 
 static DRFLAC_INLINE drflac_uint32 drflac__unsynchsafe_32(drflac_uint32 n)
 {
@@ -2377,6 +2454,10 @@ static DRFLAC_INLINE drflac_bool32 drflac__read_uint32(drflac_bs* bs, unsigned i
         if (!drflac__reload_cache(bs)) {
             return DRFLAC_FALSE;
         }
+        if (bitCountLo > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
+            /* This happens when we get to end of stream */
+            return DRFLAC_FALSE;
+        }
 
         *pResultOut = (resultHi << bitCountLo) | (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountLo);
         bs->consumedBits += bitCountLo;
@@ -2388,7 +2469,6 @@ static DRFLAC_INLINE drflac_bool32 drflac__read_uint32(drflac_bs* bs, unsigned i
 static drflac_bool32 drflac__read_int32(drflac_bs* bs, unsigned int bitCount, drflac_int32* pResult)
 {
     drflac_uint32 result;
-    drflac_uint32 signbit;
 
     DRFLAC_ASSERT(bs != NULL);
     DRFLAC_ASSERT(pResult != NULL);
@@ -2399,8 +2479,12 @@ static drflac_bool32 drflac__read_int32(drflac_bs* bs, unsigned int bitCount, dr
         return DRFLAC_FALSE;
     }
 
-    signbit = ((result >> (bitCount-1)) & 0x01);
-    result |= (~signbit + 1) << bitCount;
+    /* Do not attempt to shift by 32 as it's undefined. */
+    if (bitCount < 32) {
+        drflac_uint32 signbit;
+        signbit = ((result >> (bitCount-1)) & 0x01);
+        result |= (~signbit + 1) << bitCount;
+    }
 
     *pResult = (drflac_int32)result;
     return DRFLAC_TRUE;
@@ -2623,9 +2707,16 @@ static drflac_bool32 drflac__find_and_seek_to_next_sync_code(drflac_bs* bs)
 #if defined(DRFLAC_HAS_LZCNT_INTRINSIC)
 #define DRFLAC_IMPLEMENT_CLZ_LZCNT
 #endif
-#if  defined(_MSC_VER) && _MSC_VER >= 1400 && (defined(DRFLAC_X64) || defined(DRFLAC_X86))
+#if  defined(_MSC_VER) && _MSC_VER >= 1400 && (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(__clang__)
 #define DRFLAC_IMPLEMENT_CLZ_MSVC
 #endif
+#if  defined(__WATCOMC__) && defined(__386__)
+#define DRFLAC_IMPLEMENT_CLZ_WATCOM
+#endif
+#ifdef __MRC__
+#include <intrinsics.h>
+#define DRFLAC_IMPLEMENT_CLZ_MRC
+#endif
 
 static DRFLAC_INLINE drflac_uint32 drflac__clz_software(drflac_cache_t x)
 {
@@ -2666,6 +2757,8 @@ static DRFLAC_INLINE drflac_bool32 drflac__is_lzcnt_supported(void)
     /* Fast compile time check for ARM. */
 #if defined(DRFLAC_HAS_LZCNT_INTRINSIC) && defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5)
     return DRFLAC_TRUE;
+#elif defined(__MRC__)
+    return DRFLAC_TRUE;
 #else
     /* If the compiler itself does not support the intrinsic then we'll need to return false. */
     #ifdef DRFLAC_HAS_LZCNT_INTRINSIC
@@ -2678,7 +2771,25 @@ static DRFLAC_INLINE drflac_bool32 drflac__is_lzcnt_supported(void)
 
 static DRFLAC_INLINE drflac_uint32 drflac__clz_lzcnt(drflac_cache_t x)
 {
-#if defined(_MSC_VER) && !defined(__clang__)
+    /*
+    It's critical for competitive decoding performance that this function be highly optimal. With MSVC we can use the __lzcnt64() and __lzcnt() intrinsics
+    to achieve good performance, however on GCC and Clang it's a little bit more annoying. The __builtin_clzl() and __builtin_clzll() intrinsics leave
+    it undefined as to the return value when `x` is 0. We need this to be well defined as returning 32 or 64, depending on whether or not it's a 32- or
+    64-bit build. To work around this we would need to add a conditional to check for the x = 0 case, but this creates unnecessary inefficiency. To work
+    around this problem I have written some inline assembly to emit the LZCNT (x86) or CLZ (ARM) instruction directly which removes the need to include
+    the conditional. This has worked well in the past, but for some reason Clang's MSVC compatible driver, clang-cl, does not seem to be handling this
+    in the same way as the normal Clang driver. It seems that `clang-cl` is just outputting the wrong results sometimes, maybe due to some register
+    getting clobbered?
+
+    I'm not sure if this is a bug with dr_flac's inlined assembly (most likely), a bug in `clang-cl` or just a misunderstanding on my part with inline
+    assembly rules for `clang-cl`. If somebody can identify an error in dr_flac's inlined assembly I'm happy to get that fixed.
+
+    Fortunately there is an easy workaround for this. Clang implements MSVC-specific intrinsics for compatibility. It also defines _MSC_VER for extra
+    compatibility. We can therefore just check for _MSC_VER and use the MSVC intrinsic which, fortunately for us, Clang supports. It would still be nice
+    to know how to fix the inlined assembly for correctness sake, however.
+    */
+
+#if defined(_MSC_VER) /*&& !defined(__clang__)*/    /* <-- Intentionally wanting Clang to use the MSVC __lzcnt64/__lzcnt intrinsics due to above ^. */
     #ifdef DRFLAC_64BIT
         return (drflac_uint32)__lzcnt64(x);
     #else
@@ -2690,7 +2801,7 @@ static DRFLAC_INLINE drflac_uint32 drflac__clz_lzcnt(drflac_cache_t x)
             {
                 drflac_uint64 r;
                 __asm__ __volatile__ (
-                    "lzcnt{ %1, %0| %0, %1}" : "=r"(r) : "r"(x)
+                    "lzcnt{ %1, %0| %0, %1}" : "=r"(r) : "r"(x) : "cc"
                 );
 
                 return (drflac_uint32)r;
@@ -2699,12 +2810,12 @@ static DRFLAC_INLINE drflac_uint32 drflac__clz_lzcnt(drflac_cache_t x)
             {
                 drflac_uint32 r;
                 __asm__ __volatile__ (
-                    "lzcnt{l %1, %0| %0, %1}" : "=r"(r) : "r"(x)
+                    "lzcnt{l %1, %0| %0, %1}" : "=r"(r) : "r"(x) : "cc"
                 );
 
                 return r;
             }
-        #elif defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5) && !defined(DRFLAC_64BIT)   /* <-- I haven't tested 64-bit inline assembly, so only enabling this for the 32-bit build for now. */
+        #elif defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5) && !defined(__ARM_ARCH_6M__) && !defined(DRFLAC_64BIT)   /* <-- I haven't tested 64-bit inline assembly, so only enabling this for the 32-bit build for now. */
             {
                 unsigned int r;
                 __asm__ __volatile__ (
@@ -2755,6 +2866,26 @@ static DRFLAC_INLINE drflac_uint32 drflac__clz_msvc(drflac_cache_t x)
 }
 #endif
 
+#ifdef DRFLAC_IMPLEMENT_CLZ_WATCOM
+static __inline drflac_uint32 drflac__clz_watcom (drflac_uint32);
+#ifdef DRFLAC_IMPLEMENT_CLZ_WATCOM_LZCNT
+/* Use the LZCNT instruction (only available on some processors since the 2010s). */
+#pragma aux drflac__clz_watcom_lzcnt = \
+    "db 0F3h, 0Fh, 0BDh, 0C0h" /* lzcnt eax, eax */ \
+    parm [eax] \
+    value [eax] \
+    modify nomemory;
+#else
+/* Use the 386+-compatible implementation. */
+#pragma aux drflac__clz_watcom = \
+    "bsr eax, eax" \
+    "xor eax, 31" \
+    parm [eax] nomemory \
+    value [eax] \
+    modify exact [eax] nomemory;
+#endif
+#endif
+
 static DRFLAC_INLINE drflac_uint32 drflac__clz(drflac_cache_t x)
 {
 #ifdef DRFLAC_IMPLEMENT_CLZ_LZCNT
@@ -2765,6 +2896,12 @@ static DRFLAC_INLINE drflac_uint32 drflac__clz(drflac_cache_t x)
     {
 #ifdef DRFLAC_IMPLEMENT_CLZ_MSVC
         return drflac__clz_msvc(x);
+#elif defined(DRFLAC_IMPLEMENT_CLZ_WATCOM_LZCNT)
+        return drflac__clz_watcom_lzcnt(x);
+#elif defined(DRFLAC_IMPLEMENT_CLZ_WATCOM)
+        return (x == 0) ? sizeof(x)*8 : drflac__clz_watcom(x);
+#elif defined(__MRC__)
+        return __cntlzw(x);
 #else
         return drflac__clz_software(x);
 #endif
@@ -2784,9 +2921,24 @@ static DRFLAC_INLINE drflac_bool32 drflac__seek_past_next_set_bit(drflac_bs* bs,
         }
     }
 
+    if (bs->cache == 1) {
+        /* Not catching this would lead to undefined behaviour: a shift of a 32-bit number by 32 or more is undefined */
+        *pOffsetOut = zeroCounter + (drflac_uint32)DRFLAC_CACHE_L1_BITS_REMAINING(bs) - 1;
+        if (!drflac__reload_cache(bs)) {
+            return DRFLAC_FALSE;
+        }
+
+        return DRFLAC_TRUE;
+    }
+
     setBitOffsetPlus1 = drflac__clz(bs->cache);
     setBitOffsetPlus1 += 1;
 
+    if (setBitOffsetPlus1 > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
+        /* This happens when we get to end of stream */
+        return DRFLAC_FALSE;
+    }
+
     bs->consumedBits += setBitOffsetPlus1;
     bs->cache <<= setBitOffsetPlus1;
 
@@ -2901,6 +3053,25 @@ static drflac_result drflac__read_utf8_coded_number(drflac_bs* bs, drflac_uint64
 }
 
 
+static DRFLAC_INLINE drflac_uint32 drflac__ilog2_u32(drflac_uint32 x)
+{
+#if 1   /* Needs optimizing. */
+    drflac_uint32 result = 0;
+    while (x > 0) {
+        result += 1;
+        x >>= 1;
+    }
+
+    return result;
+#endif
+}
+
+static DRFLAC_INLINE drflac_bool32 drflac__use_64_bit_prediction(drflac_uint32 bitsPerSample, drflac_uint32 order, drflac_uint32 precision)
+{
+    /* https://web.archive.org/web/20220205005724/https://github.com/ietf-wg-cellar/flac-specification/blob/37a49aa48ba4ba12e8757badfc59c0df35435fec/rfc_backmatter.md */
+    return bitsPerSample + precision + drflac__ilog2_u32(order) > 32;
+}
+
 
 /*
 The next two functions are responsible for calculating the prediction.
@@ -2908,6 +3079,9 @@ The next two functions are responsible for calculating the prediction.
 When the bits per sample is >16 we need to use 64-bit integer arithmetic because otherwise we'll run out of precision. It's
 safe to assume this will be slower on 32-bit platforms so we use a more optimal solution when the bits per sample is <=16.
 */
+#if defined(__clang__)
+__attribute__((no_sanitize("signed-integer-overflow")))
+#endif
 static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_32(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
 {
     drflac_int32 prediction = 0;
@@ -3143,12 +3317,11 @@ static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_64(drflac_uint32
 Reference implementation for reading and decoding samples with residual. This is intentionally left unoptimized for the
 sake of readability and should only be used as a reference.
 */
-static drflac_bool32 drflac__decode_samples_with_residual__rice__reference(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+static drflac_bool32 drflac__decode_samples_with_residual__rice__reference(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
 {
     drflac_uint32 i;
 
     DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(count > 0);
     DRFLAC_ASSERT(pSamplesOut != NULL);
 
     for (i = 0; i < count; ++i) {
@@ -3183,10 +3356,10 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__reference(drfla
         }
 
 
-        if (bitsPerSample+shift >= 32) {
-            pSamplesOut[i] = decodedRice + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + i);
+        if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
+            pSamplesOut[i] = decodedRice + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + i);
         } else {
-            pSamplesOut[i] = decodedRice + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + i);
+            pSamplesOut[i] = decodedRice + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + i);
         }
     }
 
@@ -3283,6 +3456,10 @@ static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts(drflac_bs* bs, drflac
             if (!drflac__reload_cache(bs)) {
                 return DRFLAC_FALSE;
             }
+            if (bitCountLo > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
+                /* This happens when we get to end of stream */
+                return DRFLAC_FALSE;
+            }
         }
 
         riceParamPart = (drflac_uint32)(resultHi | DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE(bs, bitCountLo));
@@ -3363,6 +3540,10 @@ static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts_x1(drflac_bs* bs, drf
                 if (!drflac__reload_cache(bs)) {
                     return DRFLAC_FALSE;
                 }
+                if (riceParamPartLoBitCount > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
+                    /* This happens when we get to end of stream */
+                    return DRFLAC_FALSE;
+                }
 
                 bs_cache = bs->cache;
                 bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount;
@@ -3473,6 +3654,11 @@ static DRFLAC_INLINE drflac_bool32 drflac__seek_rice_parts(drflac_bs* bs, drflac
                     return DRFLAC_FALSE;
                 }
 
+                if (riceParamPartLoBitCount > DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
+                    /* This happens when we get to end of stream */
+                    return DRFLAC_FALSE;
+                }
+
                 bs_cache = bs->cache;
                 bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount;
             }
@@ -3530,7 +3716,6 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar_zeroorde
     drflac_uint32 i;
 
     DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(count > 0);
     DRFLAC_ASSERT(pSamplesOut != NULL);
 
     (void)bitsPerSample;
@@ -3560,7 +3745,7 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar_zeroorde
     return DRFLAC_TRUE;
 }
 
-static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
 {
     drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
     drflac_uint32 zeroCountPart0 = 0;
@@ -3576,17 +3761,16 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_b
     drflac_uint32 i;
 
     DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(count > 0);
     DRFLAC_ASSERT(pSamplesOut != NULL);
 
-    if (order == 0) {
-        return drflac__decode_samples_with_residual__rice__scalar_zeroorder(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+    if (lpcOrder == 0) {
+        return drflac__decode_samples_with_residual__rice__scalar_zeroorder(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut);
     }
 
     riceParamMask  = (drflac_uint32)~((~0UL) << riceParam);
     pSamplesOutEnd = pSamplesOut + (count & ~3);
 
-    if (bitsPerSample+shift > 32) {
+    if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
         while (pSamplesOut < pSamplesOutEnd) {
             /*
             Rice extraction. It's faster to do this one at a time against local variables than it is to use the x4 version
@@ -3614,10 +3798,10 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_b
             riceParamPart2  = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01];
             riceParamPart3  = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01];
 
-            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0);
-            pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 1);
-            pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 2);
-            pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 3);
+            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 0);
+            pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 1);
+            pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 2);
+            pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 3);
 
             pSamplesOut += 4;
         }
@@ -3645,10 +3829,10 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_b
             riceParamPart2  = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01];
             riceParamPart3  = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01];
 
-            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0);
-            pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 1);
-            pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 2);
-            pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 3);
+            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 0);
+            pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 1);
+            pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 2);
+            pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 3);
 
             pSamplesOut += 4;
         }
@@ -3668,10 +3852,10 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_b
         /*riceParamPart0  = (riceParamPart0 >> 1) ^ (~(riceParamPart0 & 0x01) + 1);*/
 
         /* Sample reconstruction. */
-        if (bitsPerSample+shift > 32) {
-            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0);
+        if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
+            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 0);
         } else {
-            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0);
+            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 0);
         }
 
         i += 1;
@@ -4127,21 +4311,20 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41_64(drflac
     return DRFLAC_TRUE;
 }
 
-static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
 {
     DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(count > 0);
     DRFLAC_ASSERT(pSamplesOut != NULL);
 
     /* In my testing the order is rarely > 12, so in this case I'm going to simplify the SSE implementation by only handling order <= 12. */
-    if (order > 0 && order <= 12) {
-        if (bitsPerSample+shift > 32) {
-            return drflac__decode_samples_with_residual__rice__sse41_64(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
+    if (lpcOrder > 0 && lpcOrder <= 12) {
+        if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
+            return drflac__decode_samples_with_residual__rice__sse41_64(bs, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut);
         } else {
-            return drflac__decode_samples_with_residual__rice__sse41_32(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
+            return drflac__decode_samples_with_residual__rice__sse41_32(bs, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut);
         }
     } else {
-        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
     }
 }
 #endif
@@ -4280,7 +4463,7 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_32(drflac_
 
     const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
 
-    riceParamMask    = ~((~0UL) << riceParam);
+    riceParamMask    = (drflac_uint32)~((~0UL) << riceParam);
     riceParamMask128 = vdupq_n_u32(riceParamMask);
 
     riceParam128 = vdupq_n_s32(riceParam);
@@ -4466,10 +4649,13 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_64(drflac_
     int32x4_t riceParam128;
     int64x1_t shift64;
     uint32x4_t one128;
+    int64x2_t prediction128 = { 0 };
+    uint32x4_t zeroCountPart128;
+    uint32x4_t riceParamPart128;
 
     const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
 
-    riceParamMask    = ~((~0UL) << riceParam);
+    riceParamMask    = (drflac_uint32)~((~0UL) << riceParam);
     riceParamMask128 = vdupq_n_u32(riceParamMask);
 
     riceParam128 = vdupq_n_s32(riceParam);
@@ -4478,7 +4664,7 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_64(drflac_
 
     /*
     Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than
-    what's available in the input buffers. It would be conenient to use a fall-through switch to do this, but this results
+    what's available in the input buffers. It would be convenient to use a fall-through switch to do this, but this results
     in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted
     so I think there's opportunity for this to be simplified.
     */
@@ -4546,10 +4732,6 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_64(drflac_
 
     /* For this version we are doing one sample at a time. */
     while (pDecodedSamples < pDecodedSamplesEnd) {
-        int64x2_t prediction128;
-        uint32x4_t zeroCountPart128;
-        uint32x4_t riceParamPart128;
-
         if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0]) ||
             !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[1], &riceParamParts[1]) ||
             !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[2], &riceParamParts[2]) ||
@@ -4626,42 +4808,41 @@ static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_64(drflac_
     return DRFLAC_TRUE;
 }
 
-static drflac_bool32 drflac__decode_samples_with_residual__rice__neon(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+static drflac_bool32 drflac__decode_samples_with_residual__rice__neon(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
 {
     DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(count > 0);
     DRFLAC_ASSERT(pSamplesOut != NULL);
 
     /* In my testing the order is rarely > 12, so in this case I'm going to simplify the NEON implementation by only handling order <= 12. */
-    if (order > 0 && order <= 12) {
-        if (bitsPerSample+shift > 32) {
-            return drflac__decode_samples_with_residual__rice__neon_64(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
+    if (lpcOrder > 0 && lpcOrder <= 12) {
+        if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
+            return drflac__decode_samples_with_residual__rice__neon_64(bs, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut);
         } else {
-            return drflac__decode_samples_with_residual__rice__neon_32(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
+            return drflac__decode_samples_with_residual__rice__neon_32(bs, count, riceParam, lpcOrder, lpcShift, coefficients, pSamplesOut);
         }
     } else {
-        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
     }
 }
 #endif
 
-static drflac_bool32 drflac__decode_samples_with_residual__rice(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+static drflac_bool32 drflac__decode_samples_with_residual__rice(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
 {
 #if defined(DRFLAC_SUPPORT_SSE41)
     if (drflac__gIsSSE41Supported) {
-        return drflac__decode_samples_with_residual__rice__sse41(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+        return drflac__decode_samples_with_residual__rice__sse41(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
     } else
 #elif defined(DRFLAC_SUPPORT_NEON)
     if (drflac__gIsNEONSupported) {
-        return drflac__decode_samples_with_residual__rice__neon(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+        return drflac__decode_samples_with_residual__rice__neon(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
     } else
 #endif
     {
         /* Scalar fallback. */
     #if 0
-        return drflac__decode_samples_with_residual__rice__reference(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+        return drflac__decode_samples_with_residual__rice__reference(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
     #else
-        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pSamplesOut);
     #endif
     }
 }
@@ -4672,7 +4853,6 @@ static drflac_bool32 drflac__read_and_seek_residual__rice(drflac_bs* bs, drflac_
     drflac_uint32 i;
 
     DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(count > 0);
 
     for (i = 0; i < count; ++i) {
         if (!drflac__seek_rice_parts(bs, riceParam)) {
@@ -4683,12 +4863,14 @@ static drflac_bool32 drflac__read_and_seek_residual__rice(drflac_bs* bs, drflac_
     return DRFLAC_TRUE;
 }
 
-static drflac_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 unencodedBitsPerSample, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+#if defined(__clang__)
+__attribute__((no_sanitize("signed-integer-overflow")))
+#endif
+static drflac_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 unencodedBitsPerSample, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
 {
     drflac_uint32 i;
 
     DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(count > 0);
     DRFLAC_ASSERT(unencodedBitsPerSample <= 31);    /* <-- unencodedBitsPerSample is a 5 bit number, so cannot exceed 31. */
     DRFLAC_ASSERT(pSamplesOut != NULL);
 
@@ -4701,10 +4883,10 @@ static drflac_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs*
             pSamplesOut[i] = 0;
         }
 
-        if (bitsPerSample >= 24) {
-            pSamplesOut[i] += drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + i);
+        if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
+            pSamplesOut[i] += drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + i);
         } else {
-            pSamplesOut[i] += drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + i);
+            pSamplesOut[i] += drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + i);
         }
     }
 
@@ -4717,7 +4899,7 @@ Reads and decodes the residual for the sub-frame the decoder is currently sittin
 when the decoder is sitting at the very start of the RESIDUAL block. The first <order> residuals will be ignored. The
 <blockSize> and <order> parameters are used to determine how many residual values need to be decoded.
 */
-static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 blockSize, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
+static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 blockSize, drflac_uint32 lpcOrder, drflac_int32 lpcShift, drflac_uint32 lpcPrecision, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
 {
     drflac_uint8 residualMethod;
     drflac_uint8 partitionOrder;
@@ -4737,7 +4919,7 @@ static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_
     }
 
     /* Ignore the first <order> values. */
-    pDecodedSamples += order;
+    pDecodedSamples += lpcOrder;
 
     if (!drflac__read_uint8(bs, 4, &partitionOrder)) {
         return DRFLAC_FALSE;
@@ -4752,11 +4934,11 @@ static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_
     }
 
     /* Validation check. */
-    if ((blockSize / (1 << partitionOrder)) <= order) {
+    if ((blockSize / (1 << partitionOrder)) < lpcOrder) {
         return DRFLAC_FALSE;
     }
 
-    samplesInPartition = (blockSize / (1 << partitionOrder)) - order;
+    samplesInPartition = (blockSize / (1 << partitionOrder)) - lpcOrder;
     partitionsRemaining = (1 << partitionOrder);
     for (;;) {
         drflac_uint8 riceParam = 0;
@@ -4777,7 +4959,7 @@ static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_
         }
 
         if (riceParam != 0xFF) {
-            if (!drflac__decode_samples_with_residual__rice(bs, bitsPerSample, samplesInPartition, riceParam, order, shift, coefficients, pDecodedSamples)) {
+            if (!drflac__decode_samples_with_residual__rice(bs, bitsPerSample, samplesInPartition, riceParam, lpcOrder, lpcShift, lpcPrecision, coefficients, pDecodedSamples)) {
                 return DRFLAC_FALSE;
             }
         } else {
@@ -4786,7 +4968,7 @@ static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_
                 return DRFLAC_FALSE;
             }
 
-            if (!drflac__decode_samples_with_residual__unencoded(bs, bitsPerSample, samplesInPartition, unencodedBitsPerSample, order, shift, coefficients, pDecodedSamples)) {
+            if (!drflac__decode_samples_with_residual__unencoded(bs, bitsPerSample, samplesInPartition, unencodedBitsPerSample, lpcOrder, lpcShift, lpcPrecision, coefficients, pDecodedSamples)) {
                 return DRFLAC_FALSE;
             }
         }
@@ -4955,7 +5137,7 @@ static drflac_bool32 drflac__decode_samples__fixed(drflac_bs* bs, drflac_uint32
         pDecodedSamples[i] = sample;
     }
 
-    if (!drflac__decode_samples_with_residual(bs, subframeBitsPerSample, blockSize, lpcOrder, 0, lpcCoefficientsTable[lpcOrder], pDecodedSamples)) {
+    if (!drflac__decode_samples_with_residual(bs, subframeBitsPerSample, blockSize, lpcOrder, 0, 4, lpcCoefficientsTable[lpcOrder], pDecodedSamples)) {
         return DRFLAC_FALSE;
     }
 
@@ -4991,6 +5173,18 @@ static drflac_bool32 drflac__decode_samples__lpc(drflac_bs* bs, drflac_uint32 bl
         return DRFLAC_FALSE;
     }
 
+    /*
+    From the FLAC specification:
+
+        Quantized linear predictor coefficient shift needed in bits (NOTE: this number is signed two's-complement)
+
+    Emphasis on the "signed two's-complement". In practice there does not seem to be any encoders nor decoders supporting negative shifts. For now dr_flac is
+    not going to support negative shifts as I don't have any reference files. However, when a reference file comes through I will consider adding support.
+    */
+    if (lpcShift < 0) {
+        return DRFLAC_FALSE;
+    }
+
     DRFLAC_ZERO_MEMORY(coefficients, sizeof(coefficients));
     for (i = 0; i < lpcOrder; ++i) {
         if (!drflac__read_int32(bs, lpcPrecision, coefficients + i)) {
@@ -4998,7 +5192,7 @@ static drflac_bool32 drflac__decode_samples__lpc(drflac_bs* bs, drflac_uint32 bl
         }
     }
 
-    if (!drflac__decode_samples_with_residual(bs, bitsPerSample, blockSize, lpcOrder, lpcShift, coefficients, pDecodedSamples)) {
+    if (!drflac__decode_samples_with_residual(bs, bitsPerSample, blockSize, lpcOrder, lpcShift, lpcPrecision, coefficients, pDecodedSamples)) {
         return DRFLAC_FALSE;
     }
 
@@ -5112,7 +5306,8 @@ static drflac_bool32 drflac__read_next_flac_frame_header(drflac_bs* bs, drflac_u
         DRFLAC_ASSERT(blockSize > 0);
         if (blockSize == 1) {
             header->blockSizeInPCMFrames = 192;
-        } else if (blockSize >= 2 && blockSize <= 5) {
+        } else if (blockSize <= 5) {
+            DRFLAC_ASSERT(blockSize >= 2);
             header->blockSizeInPCMFrames = 576 * (1 << (blockSize - 2));
         } else if (blockSize == 6) {
             if (!drflac__read_uint16(bs, 8, &header->blockSizeInPCMFrames)) {
@@ -5125,6 +5320,9 @@ static drflac_bool32 drflac__read_next_flac_frame_header(drflac_bs* bs, drflac_u
                 return DRFLAC_FALSE;
             }
             crc8 = drflac_crc8(crc8, header->blockSizeInPCMFrames, 16);
+            if (header->blockSizeInPCMFrames == 0xFFFF) {
+                return DRFLAC_FALSE;    /* Frame is too big. This is the size of the frame minus 1. The STREAMINFO block defines the max block size which is 16-bits. Adding one will make it 17 bits and therefore too big. */
+            }
             header->blockSizeInPCMFrames += 1;
         } else {
             DRFLAC_ASSERT(blockSize >= 8);
@@ -5163,6 +5361,11 @@ static drflac_bool32 drflac__read_next_flac_frame_header(drflac_bs* bs, drflac_u
             header->bitsPerSample = streaminfoBitsPerSample;
         }
 
+        if (header->bitsPerSample != streaminfoBitsPerSample) {
+            /* If this subframe has a different bitsPerSample then streaminfo or the first frame, reject it */
+            return DRFLAC_FALSE;
+        }
+
         if (!drflac__read_uint8(bs, 8, &header->crc8)) {
             return DRFLAC_FALSE;
         }
@@ -5190,6 +5393,12 @@ static drflac_bool32 drflac__read_subframe_header(drflac_bs* bs, drflac_subframe
         return DRFLAC_FALSE;
     }
 
+    /*
+    Default to 0 for the LPC order. It's important that we always set this to 0 for non LPC
+    and FIXED subframes because we'll be using it in a generic validation check later.
+    */
+    pSubframe->lpcOrder = 0;
+
     type = (header & 0x7E) >> 1;
     if (type == 0) {
         pSubframe->subframeType = DRFLAC_SUBFRAME_CONSTANT;
@@ -5249,6 +5458,11 @@ static drflac_bool32 drflac__decode_subframe(drflac_bs* bs, drflac_frame* frame,
         subframeBitsPerSample += 1;
     }
 
+    if (subframeBitsPerSample > 32) {
+        /* libFLAC and ffmpeg reject 33-bit subframes as well */
+        return DRFLAC_FALSE;
+    }
+
     /* Need to handle wasted bits per sample. */
     if (pSubframe->wastedBitsPerSample >= subframeBitsPerSample) {
         return DRFLAC_FALSE;
@@ -5257,6 +5471,18 @@ static drflac_bool32 drflac__decode_subframe(drflac_bs* bs, drflac_frame* frame,
 
     pSubframe->pSamplesS32 = pDecodedSamplesOut;
 
+    /*
+    pDecodedSamplesOut will be pointing to a buffer that was allocated with enough memory to store
+    maxBlockSizeInPCMFrames samples (as specified in the FLAC header). We need to guard against an
+    overflow here. At a higher level we are checking maxBlockSizeInPCMFrames from the header, but
+    here we need to do an additional check to ensure this frame's block size fully encompasses any
+    warmup samples which is determined by the LPC order. For non LPC and FIXED subframes, the LPC
+    order will be have been set to 0 in drflac__read_subframe_header().
+    */
+    if (frame->header.blockSizeInPCMFrames < pSubframe->lpcOrder) {
+        return DRFLAC_FALSE;
+    }
+
     switch (pSubframe->subframeType)
     {
         case DRFLAC_SUBFRAME_CONSTANT:
@@ -5706,6 +5932,9 @@ static drflac_bool32 drflac__seek_to_approximate_flac_frame_to_byte(drflac* pFla
     *pLastSuccessfulSeekOffset = pFlac->firstFLACFramePosInBytes;
 
     for (;;) {
+        /* After rangeLo == rangeHi == targetByte fails, we need to break out. */
+        drflac_uint64 lastTargetByte = targetByte;
+
         /* When seeking to a byte, failure probably means we've attempted to seek beyond the end of the stream. To counter this we just halve it each attempt. */
         if (!drflac__seek_to_byte(&pFlac->bs, targetByte)) {
             /* If we couldn't even seek to the first byte in the stream we have a problem. Just abandon the whole thing. */
@@ -5746,6 +5975,11 @@ static drflac_bool32 drflac__seek_to_approximate_flac_frame_to_byte(drflac* pFla
             }
 #endif
         }
+
+        /* We already tried this byte and there are no more to try, break out. */
+        if(targetByte == lastTargetByte) {
+            return DRFLAC_FALSE;
+        }
     }
 
     /* The current PCM frame needs to be updated based on the frame we just seeked to. */
@@ -5911,6 +6145,11 @@ static drflac_bool32 drflac__seek_to_pcm_frame__seek_table(drflac* pFlac, drflac
         return DRFLAC_FALSE;
     }
 
+    /* Do not use the seektable if pcmFramIndex is not coverd by it. */
+    if (pFlac->pSeekpoints[0].firstPCMFrame > pcmFrameIndex) {
+        return DRFLAC_FALSE;
+    }
+
     for (iSeekpoint = 0; iSeekpoint < pFlac->seekpointCount; ++iSeekpoint) {
         if (pFlac->pSeekpoints[iSeekpoint].firstPCMFrame >= pcmFrameIndex) {
             break;
@@ -6258,7 +6497,7 @@ static void drflac__free_from_callbacks(void* p, const drflac_allocation_callbac
 }
 
 
-static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_uint64* pFirstFramePos, drflac_uint64* pSeektablePos, drflac_uint32* pSeektableSize, drflac_allocation_callbacks* pAllocationCallbacks)
+static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_uint64* pFirstFramePos, drflac_uint64* pSeektablePos, drflac_uint32* pSeekpointCount, drflac_allocation_callbacks* pAllocationCallbacks)
 {
     /*
     We want to keep track of the byte position in the stream of the seektable. At the time of calling this function we know that
@@ -6271,7 +6510,7 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
     for (;;) {
         drflac_metadata metadata;
         drflac_uint8 isLastBlock = 0;
-        drflac_uint8 blockType;
+        drflac_uint8 blockType = 0;
         drflac_uint32 blockSize;
         if (drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize) == DRFLAC_FALSE) {
             return DRFLAC_FALSE;
@@ -6318,32 +6557,37 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
                 seektableSize = blockSize;
 
                 if (onMeta) {
+                    drflac_uint32 seekpointCount;
                     drflac_uint32 iSeekpoint;
                     void* pRawData;
 
-                    pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
+                    seekpointCount = blockSize/DRFLAC_SEEKPOINT_SIZE_IN_BYTES;
+
+                    pRawData = drflac__malloc_from_callbacks(seekpointCount * sizeof(drflac_seekpoint), pAllocationCallbacks);
                     if (pRawData == NULL) {
                         return DRFLAC_FALSE;
                     }
 
-                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
-                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                        return DRFLAC_FALSE;
-                    }
+                    /* We need to read seekpoint by seekpoint and do some processing. */
+                    for (iSeekpoint = 0; iSeekpoint < seekpointCount; ++iSeekpoint) {
+                        drflac_seekpoint* pSeekpoint = (drflac_seekpoint*)pRawData + iSeekpoint;
 
-                    metadata.pRawData = pRawData;
-                    metadata.rawDataSize = blockSize;
-                    metadata.data.seektable.seekpointCount = blockSize/sizeof(drflac_seekpoint);
-                    metadata.data.seektable.pSeekpoints = (const drflac_seekpoint*)pRawData;
+                        if (onRead(pUserData, pSeekpoint, DRFLAC_SEEKPOINT_SIZE_IN_BYTES) != DRFLAC_SEEKPOINT_SIZE_IN_BYTES) {
+                            drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                            return DRFLAC_FALSE;
+                        }
 
-                    /* Endian swap. */
-                    for (iSeekpoint = 0; iSeekpoint < metadata.data.seektable.seekpointCount; ++iSeekpoint) {
-                        drflac_seekpoint* pSeekpoint = (drflac_seekpoint*)pRawData + iSeekpoint;
+                        /* Endian swap. */
                         pSeekpoint->firstPCMFrame   = drflac__be2host_64(pSeekpoint->firstPCMFrame);
                         pSeekpoint->flacFrameOffset = drflac__be2host_64(pSeekpoint->flacFrameOffset);
                         pSeekpoint->pcmFrameCount   = drflac__be2host_16(pSeekpoint->pcmFrameCount);
                     }
 
+                    metadata.pRawData = pRawData;
+                    metadata.rawDataSize = blockSize;
+                    metadata.data.seektable.seekpointCount = seekpointCount;
+                    metadata.data.seektable.pSeekpoints = (const drflac_seekpoint*)pRawData;
+
                     onMeta(pUserDataMD, &metadata);
 
                     drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
@@ -6378,7 +6622,7 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
                     pRunningData    = (const char*)pRawData;
                     pRunningDataEnd = (const char*)pRawData + blockSize;
 
-                    metadata.data.vorbis_comment.vendorLength = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.vorbis_comment.vendorLength = drflac__le2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
 
                     /* Need space for the rest of the block */
                     if ((pRunningDataEnd - pRunningData) - 4 < (drflac_int64)metadata.data.vorbis_comment.vendorLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
@@ -6386,7 +6630,7 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
                         return DRFLAC_FALSE;
                     }
                     metadata.data.vorbis_comment.vendor       = pRunningData;                                            pRunningData += metadata.data.vorbis_comment.vendorLength;
-                    metadata.data.vorbis_comment.commentCount = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.vorbis_comment.commentCount = drflac__le2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
 
                     /* Need space for 'commentCount' comments after the block, which at minimum is a drflac_uint32 per comment */
                     if ((pRunningDataEnd - pRunningData) / sizeof(drflac_uint32) < metadata.data.vorbis_comment.commentCount) { /* <-- Note the order of operations to avoid overflow to a valid value */
@@ -6404,7 +6648,7 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
                             return DRFLAC_FALSE;
                         }
 
-                        commentLength = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                        commentLength = drflac__le2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
                         if (pRunningDataEnd - pRunningData < (drflac_int64)commentLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
                             drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
                             return DRFLAC_FALSE;
@@ -6428,9 +6672,15 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
                     void* pRawData;
                     const char* pRunningData;
                     const char* pRunningDataEnd;
+                    size_t bufferSize;
                     drflac_uint8 iTrack;
                     drflac_uint8 iIndex;
+                    void* pTrackData;
 
+                    /*
+                    This needs to be loaded in two passes. The first pass is used to calculate the size of the memory allocation
+                    we need for storing the necessary data. The second pass will fill that buffer with usable data.
+                    */
                     pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
                     if (pRawData == NULL) {
                         return DRFLAC_FALSE;
@@ -6451,38 +6701,91 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
                     metadata.data.cuesheet.leadInSampleCount = drflac__be2host_64(*(const drflac_uint64*)pRunningData); pRunningData += 8;
                     metadata.data.cuesheet.isCD              = (pRunningData[0] & 0x80) != 0;                           pRunningData += 259;
                     metadata.data.cuesheet.trackCount        = pRunningData[0];                                         pRunningData += 1;
-                    metadata.data.cuesheet.pTrackData        = pRunningData;
+                    metadata.data.cuesheet.pTrackData        = NULL;    /* Will be filled later. */
+
+                    /* Pass 1: Calculate the size of the buffer for the track data. */
+                    {
+                        const char* pRunningDataSaved = pRunningData;   /* Will be restored at the end in preparation for the second pass. */
 
-                    /* Check that the cuesheet tracks are valid before passing it to the callback */
-                    for (iTrack = 0; iTrack < metadata.data.cuesheet.trackCount; ++iTrack) {
-                        drflac_uint8 indexCount;
-                        drflac_uint32 indexPointSize;
+                        bufferSize = metadata.data.cuesheet.trackCount * DRFLAC_CUESHEET_TRACK_SIZE_IN_BYTES;
 
-                        if (pRunningDataEnd - pRunningData < 36) {
-                            drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                            return DRFLAC_FALSE;
+                        for (iTrack = 0; iTrack < metadata.data.cuesheet.trackCount; ++iTrack) {
+                            drflac_uint8 indexCount;
+                            drflac_uint32 indexPointSize;
+
+                            if (pRunningDataEnd - pRunningData < DRFLAC_CUESHEET_TRACK_SIZE_IN_BYTES) {
+                                drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                                return DRFLAC_FALSE;
+                            }
+
+                            /* Skip to the index point count */
+                            pRunningData += 35;
+
+                            indexCount = pRunningData[0];
+                            pRunningData += 1;
+
+                            bufferSize += indexCount * sizeof(drflac_cuesheet_track_index);
+
+                            /* Quick validation check. */
+                            indexPointSize = indexCount * DRFLAC_CUESHEET_TRACK_INDEX_SIZE_IN_BYTES;
+                            if (pRunningDataEnd - pRunningData < (drflac_int64)indexPointSize) {
+                                drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                                return DRFLAC_FALSE;
+                            }
+
+                            pRunningData += indexPointSize;
                         }
 
-                        /* Skip to the index point count */
-                        pRunningData += 35;
-                        indexCount = pRunningData[0]; pRunningData += 1;
-                        indexPointSize = indexCount * sizeof(drflac_cuesheet_track_index);
-                        if (pRunningDataEnd - pRunningData < (drflac_int64)indexPointSize) {
+                        pRunningData = pRunningDataSaved;
+                    }
+
+                    /* Pass 2: Allocate a buffer and fill the data. Validation was done in the step above so can be skipped. */
+                    {
+                        char* pRunningTrackData;
+
+                        pTrackData = drflac__malloc_from_callbacks(bufferSize, pAllocationCallbacks);
+                        if (pTrackData == NULL) {
                             drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
                             return DRFLAC_FALSE;
                         }
 
-                        /* Endian swap. */
-                        for (iIndex = 0; iIndex < indexCount; ++iIndex) {
-                            drflac_cuesheet_track_index* pTrack = (drflac_cuesheet_track_index*)pRunningData;
-                            pRunningData += sizeof(drflac_cuesheet_track_index);
-                            pTrack->offset = drflac__be2host_64(pTrack->offset);
+                        pRunningTrackData = (char*)pTrackData;
+
+                        for (iTrack = 0; iTrack < metadata.data.cuesheet.trackCount; ++iTrack) {
+                            drflac_uint8 indexCount;
+
+                            DRFLAC_COPY_MEMORY(pRunningTrackData, pRunningData, DRFLAC_CUESHEET_TRACK_SIZE_IN_BYTES);
+                            pRunningData      += DRFLAC_CUESHEET_TRACK_SIZE_IN_BYTES-1; /* Skip forward, but not beyond the last byte in the CUESHEET_TRACK block which is the index count. */
+                            pRunningTrackData += DRFLAC_CUESHEET_TRACK_SIZE_IN_BYTES-1;
+
+                            /* Grab the index count for the next part. */
+                            indexCount = pRunningData[0];
+                            pRunningData      += 1;
+                            pRunningTrackData += 1;
+
+                            /* Extract each track index. */
+                            for (iIndex = 0; iIndex < indexCount; ++iIndex) {
+                                drflac_cuesheet_track_index* pTrackIndex = (drflac_cuesheet_track_index*)pRunningTrackData;
+
+                                DRFLAC_COPY_MEMORY(pRunningTrackData, pRunningData, DRFLAC_CUESHEET_TRACK_INDEX_SIZE_IN_BYTES);
+                                pRunningData      += DRFLAC_CUESHEET_TRACK_INDEX_SIZE_IN_BYTES;
+                                pRunningTrackData += sizeof(drflac_cuesheet_track_index);
+
+                                pTrackIndex->offset = drflac__be2host_64(pTrackIndex->offset);
+                            }
                         }
+
+                        metadata.data.cuesheet.pTrackData = pTrackData;
                     }
 
+                    /* The original data is no longer needed. */
+                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                    pRawData = NULL;
+
                     onMeta(pUserDataMD, &metadata);
 
-                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                    drflac__free_from_callbacks(pTrackData, pAllocationCallbacks);
+                    pTrackData = NULL;
                 }
             } break;
 
@@ -6513,28 +6816,28 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
                     pRunningData    = (const char*)pRawData;
                     pRunningDataEnd = (const char*)pRawData + blockSize;
 
-                    metadata.data.picture.type       = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.mimeLength = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.type       = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
+                    metadata.data.picture.mimeLength = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
 
                     /* Need space for the rest of the block */
                     if ((pRunningDataEnd - pRunningData) - 24 < (drflac_int64)metadata.data.picture.mimeLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
                         drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
                         return DRFLAC_FALSE;
                     }
-                    metadata.data.picture.mime              = pRunningData;                                            pRunningData += metadata.data.picture.mimeLength;
-                    metadata.data.picture.descriptionLength = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.mime              = pRunningData;                                   pRunningData += metadata.data.picture.mimeLength;
+                    metadata.data.picture.descriptionLength = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
 
                     /* Need space for the rest of the block */
                     if ((pRunningDataEnd - pRunningData) - 20 < (drflac_int64)metadata.data.picture.descriptionLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
                         drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
                         return DRFLAC_FALSE;
                     }
-                    metadata.data.picture.description     = pRunningData;                                            pRunningData += metadata.data.picture.descriptionLength;
-                    metadata.data.picture.width           = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.height          = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.colorDepth      = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.indexColorCount = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.pictureDataSize = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.description     = pRunningData;                                   pRunningData += metadata.data.picture.descriptionLength;
+                    metadata.data.picture.width           = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
+                    metadata.data.picture.height          = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
+                    metadata.data.picture.colorDepth      = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
+                    metadata.data.picture.indexColorCount = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
+                    metadata.data.picture.pictureDataSize = drflac__be2host_32_ptr_unaligned(pRunningData); pRunningData += 4;
                     metadata.data.picture.pPictureData    = (const drflac_uint8*)pRunningData;
 
                     /* Need space for the picture after the block */
@@ -6612,9 +6915,9 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
         }
     }
 
-    *pSeektablePos = seektablePos;
-    *pSeektableSize = seektableSize;
-    *pFirstFramePos = runningFilePos;
+    *pSeektablePos   = seektablePos;
+    *pSeekpointCount = seektableSize / DRFLAC_SEEKPOINT_SIZE_IN_BYTES;
+    *pFirstFramePos  = runningFilePos;
 
     return DRFLAC_TRUE;
 }
@@ -7639,16 +7942,16 @@ static void drflac__init_from_info(drflac* pFlac, const drflac_init_info* pInit)
 
 static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, void* pUserDataMD, const drflac_allocation_callbacks* pAllocationCallbacks)
 {
-    static drflac_init_info init;
+    drflac_init_info init;
     drflac_uint32 allocationSize;
     drflac_uint32 wholeSIMDVectorCountPerChannel;
     drflac_uint32 decodedSamplesAllocationSize;
 #ifndef DR_FLAC_NO_OGG
-    static drflac_oggbs oggbs;
+    drflac_oggbs* pOggbs = NULL;
 #endif
     drflac_uint64 firstFramePos;
     drflac_uint64 seektablePos;
-    drflac_uint32 seektableSize;
+    drflac_uint32 seekpointCount;
     drflac_allocation_callbacks allocationCallbacks;
     drflac* pFlac;
 
@@ -7702,18 +8005,21 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac
     /* There's additional data required for Ogg streams. */
     if (init.container == drflac_container_ogg) {
         allocationSize += sizeof(drflac_oggbs);
-    }
 
-    DRFLAC_ZERO_MEMORY(&oggbs, sizeof(oggbs));
-    if (init.container == drflac_container_ogg) {
-        oggbs.onRead = onRead;
-        oggbs.onSeek = onSeek;
-        oggbs.pUserData = pUserData;
-        oggbs.currentBytePos = init.oggFirstBytePos;
-        oggbs.firstBytePos = init.oggFirstBytePos;
-        oggbs.serialNumber = init.oggSerial;
-        oggbs.bosPageHeader = init.oggBosHeader;
-        oggbs.bytesRemainingInPage = 0;
+        pOggbs = (drflac_oggbs*)drflac__malloc_from_callbacks(sizeof(*pOggbs), &allocationCallbacks);
+        if (pOggbs == NULL) {
+            return NULL; /*DRFLAC_OUT_OF_MEMORY;*/
+        }
+
+        DRFLAC_ZERO_MEMORY(pOggbs, sizeof(*pOggbs));
+        pOggbs->onRead = onRead;
+        pOggbs->onSeek = onSeek;
+        pOggbs->pUserData = pUserData;
+        pOggbs->currentBytePos = init.oggFirstBytePos;
+        pOggbs->firstBytePos = init.oggFirstBytePos;
+        pOggbs->serialNumber = init.oggSerial;
+        pOggbs->bosPageHeader = init.oggBosHeader;
+        pOggbs->bytesRemainingInPage = 0;
     }
 #endif
 
@@ -7722,9 +8028,9 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac
     consist of only a single heap allocation. To this, the size of the seek table needs to be known, which we determine when reading
     and decoding the metadata.
     */
-    firstFramePos = 42;   /* <-- We know we are at byte 42 at this point. */
-    seektablePos  = 0;
-    seektableSize = 0;
+    firstFramePos  = 42;   /* <-- We know we are at byte 42 at this point. */
+    seektablePos   = 0;
+    seekpointCount = 0;
     if (init.hasMetadataBlocks) {
         drflac_read_proc onReadOverride = onRead;
         drflac_seek_proc onSeekOverride = onSeek;
@@ -7734,20 +8040,26 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac
         if (init.container == drflac_container_ogg) {
             onReadOverride = drflac__on_read_ogg;
             onSeekOverride = drflac__on_seek_ogg;
-            pUserDataOverride = (void*)&oggbs;
+            pUserDataOverride = (void*)pOggbs;
         }
 #endif
 
-        if (!drflac__read_and_decode_metadata(onReadOverride, onSeekOverride, onMeta, pUserDataOverride, pUserDataMD, &firstFramePos, &seektablePos, &seektableSize, &allocationCallbacks)) {
+        if (!drflac__read_and_decode_metadata(onReadOverride, onSeekOverride, onMeta, pUserDataOverride, pUserDataMD, &firstFramePos, &seektablePos, &seekpointCount, &allocationCallbacks)) {
+        #ifndef DR_FLAC_NO_OGG
+            drflac__free_from_callbacks(pOggbs, &allocationCallbacks);
+        #endif
             return NULL;
         }
 
-        allocationSize += seektableSize;
+        allocationSize += seekpointCount * sizeof(drflac_seekpoint);
     }
 
 
     pFlac = (drflac*)drflac__malloc_from_callbacks(allocationSize, &allocationCallbacks);
     if (pFlac == NULL) {
+    #ifndef DR_FLAC_NO_OGG
+        drflac__free_from_callbacks(pOggbs, &allocationCallbacks);
+    #endif
         return NULL;
     }
 
@@ -7757,8 +8069,12 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac
 
 #ifndef DR_FLAC_NO_OGG
     if (init.container == drflac_container_ogg) {
-        drflac_oggbs* pInternalOggbs = (drflac_oggbs*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize + seektableSize);
-        *pInternalOggbs = oggbs;
+        drflac_oggbs* pInternalOggbs = (drflac_oggbs*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize + (seekpointCount * sizeof(drflac_seekpoint)));
+        DRFLAC_COPY_MEMORY(pInternalOggbs, pOggbs, sizeof(*pOggbs));
+
+        /* At this point the pOggbs object has been handed over to pInternalOggbs and can be freed. */
+        drflac__free_from_callbacks(pOggbs, &allocationCallbacks);
+        pOggbs = NULL;
 
         /* The Ogg bistream needs to be layered on top of the original bitstream. */
         pFlac->bs.onRead = drflac__on_read_ogg;
@@ -7782,7 +8098,7 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac
     {
         /* If we have a seektable we need to load it now, making sure we move back to where we were previously. */
         if (seektablePos != 0) {
-            pFlac->seekpointCount = seektableSize / sizeof(*pFlac->pSeekpoints);
+            pFlac->seekpointCount = seekpointCount;
             pFlac->pSeekpoints = (drflac_seekpoint*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize);
 
             DRFLAC_ASSERT(pFlac->bs.onSeek != NULL);
@@ -7790,18 +8106,20 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac
 
             /* Seek to the seektable, then just read directly into our seektable buffer. */
             if (pFlac->bs.onSeek(pFlac->bs.pUserData, (int)seektablePos, drflac_seek_origin_start)) {
-                if (pFlac->bs.onRead(pFlac->bs.pUserData, pFlac->pSeekpoints, seektableSize) == seektableSize) {
-                    /* Endian swap. */
-                    drflac_uint32 iSeekpoint;
-                    for (iSeekpoint = 0; iSeekpoint < pFlac->seekpointCount; ++iSeekpoint) {
+                drflac_uint32 iSeekpoint;
+
+                for (iSeekpoint = 0; iSeekpoint < seekpointCount; iSeekpoint += 1) {
+                    if (pFlac->bs.onRead(pFlac->bs.pUserData, pFlac->pSeekpoints + iSeekpoint, DRFLAC_SEEKPOINT_SIZE_IN_BYTES) == DRFLAC_SEEKPOINT_SIZE_IN_BYTES) {
+                        /* Endian swap. */
                         pFlac->pSeekpoints[iSeekpoint].firstPCMFrame   = drflac__be2host_64(pFlac->pSeekpoints[iSeekpoint].firstPCMFrame);
                         pFlac->pSeekpoints[iSeekpoint].flacFrameOffset = drflac__be2host_64(pFlac->pSeekpoints[iSeekpoint].flacFrameOffset);
                         pFlac->pSeekpoints[iSeekpoint].pcmFrameCount   = drflac__be2host_16(pFlac->pSeekpoints[iSeekpoint].pcmFrameCount);
+                    } else {
+                        /* Failed to read the seektable. Pretend we don't have one. */
+                        pFlac->pSeekpoints = NULL;
+                        pFlac->seekpointCount = 0;
+                        break;
                     }
-                } else {
-                    /* Failed to read the seektable. Pretend we don't have one. */
-                    pFlac->pSeekpoints = NULL;
-                    pFlac->seekpointCount = 0;
                 }
 
                 /* We need to seek back to where we were. If this fails it's a critical error. */
@@ -7850,8 +8168,11 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac
 
 #ifndef DR_FLAC_NO_STDIO
 #include <stdio.h>
+#ifndef DR_FLAC_NO_WCHAR
 #include <wchar.h>      /* For wcslen(), wcsrtombs() */
+#endif
 
+/* Errno */
 /* drflac_result_from_errno() is only used for fopen() and wfopen() so putting it inside DR_WAV_NO_STDIO for now. If something else needs this later we can move it out. */
 #include <errno.h>
 static drflac_result drflac_result_from_errno(int e)
@@ -8255,10 +8576,12 @@ static drflac_result drflac_result_from_errno(int e)
         default: return DRFLAC_ERROR;
     }
 }
+/* End Errno */
 
+/* fopen */
 static drflac_result drflac_fopen(FILE** ppFile, const char* pFilePath, const char* pOpenMode)
 {
-#if _MSC_VER && _MSC_VER >= 1400
+#if defined(_MSC_VER) && _MSC_VER >= 1400
     errno_t err;
 #endif
 
@@ -8270,7 +8593,7 @@ static drflac_result drflac_fopen(FILE** ppFile, const char* pFilePath, const ch
         return DRFLAC_INVALID_ARGS;
     }
 
-#if _MSC_VER && _MSC_VER >= 1400
+#if defined(_MSC_VER) && _MSC_VER >= 1400
     err = fopen_s(ppFile, pFilePath, pOpenMode);
     if (err != 0) {
         return drflac_result_from_errno(err);
@@ -8305,16 +8628,18 @@ _wfopen() isn't always available in all compilation environments.
     * MSVC seems to support it universally as far back as VC6 from what I can tell (haven't checked further back).
     * MinGW-64 (both 32- and 64-bit) seems to support it.
     * MinGW wraps it in !defined(__STRICT_ANSI__).
+    * OpenWatcom wraps it in !defined(_NO_EXT_KEYS).
 
 This can be reviewed as compatibility issues arise. The preference is to use _wfopen_s() and _wfopen() as opposed to the wcsrtombs()
 fallback, so if you notice your compiler not detecting this properly I'm happy to look at adding support.
 */
 #if defined(_WIN32)
-    #if defined(_MSC_VER) || defined(__MINGW64__) || !defined(__STRICT_ANSI__)
+    #if defined(_MSC_VER) || defined(__MINGW64__) || (!defined(__STRICT_ANSI__) && !defined(_NO_EXT_KEYS))
         #define DRFLAC_HAS_WFOPEN
     #endif
 #endif
 
+#ifndef DR_FLAC_NO_WCHAR
 static drflac_result drflac_wfopen(FILE** ppFile, const wchar_t* pFilePath, const wchar_t* pOpenMode, const drflac_allocation_callbacks* pAllocationCallbacks)
 {
     if (ppFile != NULL) {
@@ -8343,10 +8668,23 @@ static drflac_result drflac_wfopen(FILE** ppFile, const wchar_t* pFilePath, cons
     }
 #else
     /*
-    Use fopen() on anything other than Windows. Requires a conversion. This is annoying because fopen() is locale specific. The only real way I can
-    think of to do this is with wcsrtombs(). Note that wcstombs() is apparently not thread-safe because it uses a static global mbstate_t object for
-    maintaining state. I've checked this with -std=c89 and it works, but if somebody get's a compiler error I'll look into improving compatibility.
+    Use fopen() on anything other than Windows. Requires a conversion. This is annoying because
+	fopen() is locale specific. The only real way I can think of to do this is with wcsrtombs(). Note
+	that wcstombs() is apparently not thread-safe because it uses a static global mbstate_t object for
+    maintaining state. I've checked this with -std=c89 and it works, but if somebody get's a compiler
+	error I'll look into improving compatibility.
     */
+
+	/*
+	Some compilers don't support wchar_t or wcsrtombs() which we're using below. In this case we just
+	need to abort with an error. If you encounter a compiler lacking such support, add it to this list
+	and submit a bug report and it'll be added to the library upstream.
+	*/
+	#if defined(__DJGPP__)
+	{
+		/* Nothing to do here. This will fall through to the error check below. */
+	}
+	#else
     {
         mbstate_t mbs;
         size_t lenMB;
@@ -8388,6 +8726,7 @@ static drflac_result drflac_wfopen(FILE** ppFile, const wchar_t* pFilePath, cons
 
         drflac__free_from_callbacks(pFilePathMB, pAllocationCallbacks);
     }
+	#endif
 
     if (*ppFile == NULL) {
         return DRFLAC_ERROR;
@@ -8396,6 +8735,8 @@ static drflac_result drflac_wfopen(FILE** ppFile, const wchar_t* pFilePath, cons
 
     return DRFLAC_SUCCESS;
 }
+#endif
+/* End fopen */
 
 static size_t drflac__on_read_stdio(void* pUserData, void* bufferOut, size_t bytesToRead)
 {
@@ -8428,6 +8769,7 @@ DRFLAC_API drflac* drflac_open_file(const char* pFileName, const drflac_allocati
     return pFlac;
 }
 
+#ifndef DR_FLAC_NO_WCHAR
 DRFLAC_API drflac* drflac_open_file_w(const wchar_t* pFileName, const drflac_allocation_callbacks* pAllocationCallbacks)
 {
     drflac* pFlac;
@@ -8445,6 +8787,7 @@ DRFLAC_API drflac* drflac_open_file_w(const wchar_t* pFileName, const drflac_all
 
     return pFlac;
 }
+#endif
 
 DRFLAC_API drflac* drflac_open_file_with_metadata(const char* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
 {
@@ -8464,6 +8807,7 @@ DRFLAC_API drflac* drflac_open_file_with_metadata(const char* pFileName, drflac_
     return pFlac;
 }
 
+#ifndef DR_FLAC_NO_WCHAR
 DRFLAC_API drflac* drflac_open_file_with_metadata_w(const wchar_t* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
 {
     drflac* pFlac;
@@ -8481,6 +8825,7 @@ DRFLAC_API drflac* drflac_open_file_with_metadata_w(const wchar_t* pFileName, dr
 
     return pFlac;
 }
+#endif
 #endif  /* DR_FLAC_NO_STDIO */
 
 static size_t drflac__on_read_memory(void* pUserData, void* bufferOut, size_t bytesToRead)
@@ -11280,6 +11625,7 @@ DRFLAC_API drflac_bool32 drflac_seek_to_pcm_frame(drflac* pFlac, drflac_uint64 p
         return drflac__seek_to_first_frame(pFlac);
     } else {
         drflac_bool32 wasSuccessful = DRFLAC_FALSE;
+        drflac_uint64 originalPCMFrame = pFlac->currentPCMFrame;
 
         /* Clamp the sample to the end. */
         if (pcmFrameIndex > pFlac->totalPCMFrameCount) {
@@ -11337,7 +11683,16 @@ DRFLAC_API drflac_bool32 drflac_seek_to_pcm_frame(drflac* pFlac, drflac_uint64 p
             }
         }
 
-        pFlac->currentPCMFrame = pcmFrameIndex;
+        if (wasSuccessful) {
+            pFlac->currentPCMFrame = pcmFrameIndex;
+        } else {
+            /* Seek failed. Try putting the decoder back to it's original state. */
+            if (drflac_seek_to_pcm_frame(pFlac, originalPCMFrame) == DRFLAC_FALSE) {
+                /* Failed to seek back to the original PCM frame. Fall back to 0. */
+                drflac_seek_to_pcm_frame(pFlac, 0);
+            }
+        }
+
         return wasSuccessful;
     }
 }
@@ -11346,6 +11701,7 @@ DRFLAC_API drflac_bool32 drflac_seek_to_pcm_frame(drflac* pFlac, drflac_uint64 p
 
 /* High Level APIs */
 
+/* SIZE_MAX */
 #if defined(SIZE_MAX)
     #define DRFLAC_SIZE_MAX  SIZE_MAX
 #else
@@ -11355,6 +11711,7 @@ DRFLAC_API drflac_bool32 drflac_seek_to_pcm_frame(drflac* pFlac, drflac_uint64 p
         #define DRFLAC_SIZE_MAX  0xFFFFFFFF
     #endif
 #endif
+/* End SIZE_MAX */
 
 
 /* Using a macro as the definition of the drflac__full_decode_and_close_*() API family. Sue me. */
@@ -11403,7 +11760,7 @@ static type* drflac__full_read_and_close_ ## extension (drflac* pFlac, unsigned
         DRFLAC_ZERO_MEMORY(pSampleData + (totalPCMFrameCount*pFlac->channels), (size_t)(sampleDataBufferSize - totalPCMFrameCount*pFlac->channels*sizeof(type)));   \
     } else {                                                                                                                                                        \
         drflac_uint64 dataSize = totalPCMFrameCount*pFlac->channels*sizeof(type);                                                                                   \
-        if (dataSize > DRFLAC_SIZE_MAX) {                                                                                                                           \
+        if (dataSize > (drflac_uint64)DRFLAC_SIZE_MAX) {                                                                                                            \
             goto on_error;  /* The decoded data is too big. */                                                                                                      \
         }                                                                                                                                                           \
                                                                                                                                                                     \
@@ -11668,7 +12025,7 @@ DRFLAC_API const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator
         return NULL;
     }
 
-    length = drflac__le2host_32(*(const drflac_uint32*)pIter->pRunningData);
+    length = drflac__le2host_32_ptr_unaligned(pIter->pRunningData);
     pIter->pRunningData += 4;
 
     pComment = pIter->pRunningData;
@@ -11728,15 +12085,117 @@ DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterat
     return DRFLAC_TRUE;
 }
 
-#if defined(__GNUC__)
+#if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)))
     #pragma GCC diagnostic pop
 #endif
+#endif  /* dr_flac_c */
 #endif  /* DR_FLAC_IMPLEMENTATION */
 
 
 /*
 REVISION HISTORY
 ================
+v0.12.43 - 2024-12-17
+  - Fix a possible buffer overflow during decoding.
+  - Improve detection of ARM64EC
+
+v0.12.42 - 2023-11-02
+  - Fix build for ARMv6-M.
+  - Fix a compilation warning with GCC.
+
+v0.12.41 - 2023-06-17
+  - Fix an incorrect date in revision history. No functional change.
+
+v0.12.40 - 2023-05-22
+  - Minor code restructure. No functional change.
+
+v0.12.39 - 2022-09-17
+  - Fix compilation with DJGPP.
+  - Fix compilation error with Visual Studio 2019 and the ARM build.
+  - Fix an error with SSE 4.1 detection.
+  - Add support for disabling wchar_t with DR_WAV_NO_WCHAR.
+  - Improve compatibility with compilers which lack support for explicit struct packing.
+  - Improve compatibility with low-end and embedded hardware by reducing the amount of stack
+    allocation when loading an Ogg encapsulated file.
+
+v0.12.38 - 2022-04-10
+  - Fix compilation error on older versions of GCC.
+
+v0.12.37 - 2022-02-12
+  - Improve ARM detection.
+
+v0.12.36 - 2022-02-07
+  - Fix a compilation error with the ARM build.
+
+v0.12.35 - 2022-02-06
+  - Fix a bug due to underestimating the amount of precision required for the prediction stage.
+  - Fix some bugs found from fuzz testing.
+
+v0.12.34 - 2022-01-07
+  - Fix some misalignment bugs when reading metadata.
+
+v0.12.33 - 2021-12-22
+  - Fix a bug with seeking when the seek table does not start at PCM frame 0.
+
+v0.12.32 - 2021-12-11
+  - Fix a warning with Clang.
+
+v0.12.31 - 2021-08-16
+  - Silence some warnings.
+
+v0.12.30 - 2021-07-31
+  - Fix platform detection for ARM64.
+
+v0.12.29 - 2021-04-02
+  - Fix a bug where the running PCM frame index is set to an invalid value when over-seeking.
+  - Fix a decoding error due to an incorrect validation check.
+
+v0.12.28 - 2021-02-21
+  - Fix a warning due to referencing _MSC_VER when it is undefined.
+
+v0.12.27 - 2021-01-31
+  - Fix a static analysis warning.
+
+v0.12.26 - 2021-01-17
+  - Fix a compilation warning due to _BSD_SOURCE being deprecated.
+
+v0.12.25 - 2020-12-26
+  - Update documentation.
+
+v0.12.24 - 2020-11-29
+  - Fix ARM64/NEON detection when compiling with MSVC.
+
+v0.12.23 - 2020-11-21
+  - Fix compilation with OpenWatcom.
+
+v0.12.22 - 2020-11-01
+  - Fix an error with the previous release.
+
+v0.12.21 - 2020-11-01
+  - Fix a possible deadlock when seeking.
+  - Improve compiler support for older versions of GCC.
+
+v0.12.20 - 2020-09-08
+  - Fix a compilation error on older compilers.
+
+v0.12.19 - 2020-08-30
+  - Fix a bug due to an undefined 32-bit shift.
+
+v0.12.18 - 2020-08-14
+  - Fix a crash when compiling with clang-cl.
+
+v0.12.17 - 2020-08-02
+  - Simplify sized types.
+
+v0.12.16 - 2020-07-25
+  - Fix a compilation warning.
+
+v0.12.15 - 2020-07-06
+  - Check for negative LPC shifts and return an error.
+
+v0.12.14 - 2020-06-23
+  - Add include guard for the implementation section.
+
 v0.12.13 - 2020-05-16
   - Add compile-time and run-time version querying.
     - DRFLAC_VERSION_MINOR
@@ -12080,7 +12539,7 @@ For more information, please refer to <http://unlicense.org/>
 ===============================================================================
 ALTERNATIVE 2 - MIT No Attribution
 ===============================================================================
-Copyright 2020 David Reid
+Copyright 2023 David Reid
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
diff --git a/sys-tune/source/impl/dr_mp3.h b/sys-tune/source/impl/dr_mp3.h
index 81d4d2c..aee3cef 100644
--- a/sys-tune/source/impl/dr_mp3.h
+++ b/sys-tune/source/impl/dr_mp3.h
@@ -1,6 +1,6 @@
 /*
 MP3 audio decoder. Choice of public domain or MIT-0. See license statements at the end of this file.
-dr_mp3 - v0.6.11 - 2020-05-26
+dr_mp3 - v0.6.40 - 2024-12-17
 
 David Reid - mackron@gmail.com
 
@@ -33,7 +33,7 @@ Support for loading a file from a `wchar_t` string has been added via the `drmp3
 */
 
 /*
-Introducation
+Introduction
 =============
 dr_mp3 is a single file library. To use it, do something like the following in one .c file.
 
@@ -95,46 +95,47 @@ extern "C" {
 
 #define DRMP3_VERSION_MAJOR     0
 #define DRMP3_VERSION_MINOR     6
-#define DRMP3_VERSION_REVISION  11
+#define DRMP3_VERSION_REVISION  40
 #define DRMP3_VERSION_STRING    DRMP3_XSTRINGIFY(DRMP3_VERSION_MAJOR) "." DRMP3_XSTRINGIFY(DRMP3_VERSION_MINOR) "." DRMP3_XSTRINGIFY(DRMP3_VERSION_REVISION)
 
 #include <stddef.h> /* For size_t. */
 
-/* Sized types. Prefer built-in types. Fall back to stdint. */
-#ifdef _MSC_VER
-    #if defined(__clang__)
+/* Sized Types */
+typedef   signed char           drmp3_int8;
+typedef unsigned char           drmp3_uint8;
+typedef   signed short          drmp3_int16;
+typedef unsigned short          drmp3_uint16;
+typedef   signed int            drmp3_int32;
+typedef unsigned int            drmp3_uint32;
+#if defined(_MSC_VER) && !defined(__clang__)
+    typedef   signed __int64    drmp3_int64;
+    typedef unsigned __int64    drmp3_uint64;
+#else
+    #if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)))
         #pragma GCC diagnostic push
-        #pragma GCC diagnostic ignored "-Wlanguage-extension-token"
-        #pragma GCC diagnostic ignored "-Wlong-long"        
-        #pragma GCC diagnostic ignored "-Wc++11-long-long"
-    #endif
-    typedef   signed __int8  drmp3_int8;
-    typedef unsigned __int8  drmp3_uint8;
-    typedef   signed __int16 drmp3_int16;
-    typedef unsigned __int16 drmp3_uint16;
-    typedef   signed __int32 drmp3_int32;
-    typedef unsigned __int32 drmp3_uint32;
-    typedef   signed __int64 drmp3_int64;
-    typedef unsigned __int64 drmp3_uint64;
-    #if defined(__clang__)
+        #pragma GCC diagnostic ignored "-Wlong-long"
+        #if defined(__clang__)
+            #pragma GCC diagnostic ignored "-Wc++11-long-long"
+        #endif
+    #endif
+    typedef   signed long long  drmp3_int64;
+    typedef unsigned long long  drmp3_uint64;
+    #if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)))
         #pragma GCC diagnostic pop
     #endif
+#endif
+#if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__)) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) || defined(__powerpc64__)
+    typedef drmp3_uint64        drmp3_uintptr;
 #else
-    #include <stdint.h>
-    typedef int8_t           drmp3_int8;
-    typedef uint8_t          drmp3_uint8;
-    typedef int16_t          drmp3_int16;
-    typedef uint16_t         drmp3_uint16;
-    typedef int32_t          drmp3_int32;
-    typedef uint32_t         drmp3_uint32;
-    typedef int64_t          drmp3_int64;
-    typedef uint64_t         drmp3_uint64;
+    typedef drmp3_uint32        drmp3_uintptr;
 #endif
-typedef drmp3_uint8          drmp3_bool8;
-typedef drmp3_uint32         drmp3_bool32;
-#define DRMP3_TRUE           1
-#define DRMP3_FALSE          0
+typedef drmp3_uint8             drmp3_bool8;
+typedef drmp3_uint32            drmp3_bool32;
+#define DRMP3_TRUE              1
+#define DRMP3_FALSE             0
+/* End Sized Types */
 
+/* Decorations */
 #if !defined(DRMP3_API)
     #if defined(DRMP3_DLL)
         #if defined(_WIN32)
@@ -164,7 +165,9 @@ typedef drmp3_uint32         drmp3_bool32;
         #define DRMP3_PRIVATE static
     #endif
 #endif
+/* End Decorations */
 
+/* Result Codes */
 typedef drmp3_int32 drmp3_result;
 #define DRMP3_SUCCESS                        0
 #define DRMP3_ERROR                         -1   /* A generic error. */
@@ -220,11 +223,12 @@ typedef drmp3_int32 drmp3_result;
 #define DRMP3_CANCELLED                     -51
 #define DRMP3_MEMORY_ALREADY_MAPPED         -52
 #define DRMP3_AT_END                        -53
-
+/* End Result Codes */
 
 #define DRMP3_MAX_PCM_FRAMES_PER_MP3_FRAME  1152
 #define DRMP3_MAX_SAMPLES_PER_FRAME         (DRMP3_MAX_PCM_FRAMES_PER_MP3_FRAME*2)
 
+/* Inline */
 #ifdef _MSC_VER
     #define DRMP3_INLINE __forceinline
 #elif defined(__GNUC__)
@@ -236,17 +240,37 @@ typedef drmp3_int32 drmp3_result;
     I am using "__inline__" only when we're compiling in strict ANSI mode.
     */
     #if defined(__STRICT_ANSI__)
-        #define DRMP3_INLINE __inline__ __attribute__((always_inline))
+        #define DRMP3_GNUC_INLINE_HINT __inline__
     #else
-        #define DRMP3_INLINE inline __attribute__((always_inline))
+        #define DRMP3_GNUC_INLINE_HINT inline
     #endif
+
+    #if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 2)) || defined(__clang__)
+        #define DRMP3_INLINE DRMP3_GNUC_INLINE_HINT __attribute__((always_inline))
+    #else
+        #define DRMP3_INLINE DRMP3_GNUC_INLINE_HINT
+    #endif
+#elif defined(__WATCOMC__)
+    #define DRMP3_INLINE __inline
 #else
     #define DRMP3_INLINE
 #endif
+/* End Inline */
 
 
 DRMP3_API void drmp3_version(drmp3_uint32* pMajor, drmp3_uint32* pMinor, drmp3_uint32* pRevision);
-DRMP3_API const char* drmp3_version_string();
+DRMP3_API const char* drmp3_version_string(void);
+
+
+/* Allocation Callbacks */
+typedef struct
+{
+    void* pUserData;
+    void* (* onMalloc)(size_t sz, void* pUserData);
+    void* (* onRealloc)(void* p, size_t sz, void* pUserData);
+    void  (* onFree)(void* p, void* pUserData);
+} drmp3_allocation_callbacks;
+/* End Allocation Callbacks */
 
 
 /*
@@ -280,14 +304,6 @@ DRMP3_API void drmp3dec_f32_to_s16(const float *in, drmp3_int16 *out, size_t num
 Main API (Pull API)
 ===================
 */
-#ifndef DRMP3_DEFAULT_CHANNELS
-#define DRMP3_DEFAULT_CHANNELS      2
-#endif
-#ifndef DRMP3_DEFAULT_SAMPLE_RATE
-#define DRMP3_DEFAULT_SAMPLE_RATE   44100
-#endif
-
-
 typedef enum
 {
     drmp3_seek_origin_start,
@@ -330,14 +346,6 @@ will be either drmp3_seek_origin_start or drmp3_seek_origin_current.
 */
 typedef drmp3_bool32 (* drmp3_seek_proc)(void* pUserData, int offset, drmp3_seek_origin origin);
 
-typedef struct
-{
-    void* pUserData;
-    void* (* onMalloc)(size_t sz, void* pUserData);
-    void* (* onRealloc)(void* p, size_t sz, void* pUserData);
-    void  (* onFree)(void* p, void* pUserData);
-} drmp3_allocation_callbacks;
-
 typedef struct
 {
     drmp3_uint32 channels;
@@ -347,7 +355,6 @@ typedef struct
 typedef struct
 {
     drmp3dec decoder;
-    drmp3dec_frame_info frameInfo;
     drmp3_uint32 channels;
     drmp3_uint32 sampleRate;
     drmp3_read_proc onRead;
@@ -523,6 +530,9 @@ DRMP3_API void drmp3_free(void* p, const drmp3_allocation_callbacks* pAllocation
  ************************************************************************************************************************************************************
  ************************************************************************************************************************************************************/
 #if defined(DR_MP3_IMPLEMENTATION) || defined(DRMP3_IMPLEMENTATION)
+#ifndef dr_mp3_c
+#define dr_mp3_c
+
 #include <stdlib.h>
 #include <string.h>
 #include <limits.h> /* For INT_MAX */
@@ -542,7 +552,7 @@ DRMP3_API void drmp3_version(drmp3_uint32* pMajor, drmp3_uint32* pMinor, drmp3_u
     }
 }
 
-DRMP3_API const char* drmp3_version_string()
+DRMP3_API const char* drmp3_version_string(void)
 {
     return DRMP3_VERSION_STRING;
 }
@@ -594,12 +604,12 @@ DRMP3_API const char* drmp3_version_string()
 
 #if !defined(DR_MP3_NO_SIMD)
 
-#if !defined(DR_MP3_ONLY_SIMD) && (defined(_M_X64) || defined(_M_ARM64) || defined(__x86_64__) || defined(__aarch64__))
+#if !defined(DR_MP3_ONLY_SIMD) && (defined(_M_X64) || defined(__x86_64__) || defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC))
 /* x64 always have SSE2, arm64 always have neon, no need for generic code */
 #define DR_MP3_ONLY_SIMD
 #endif
 
-#if ((defined(_MSC_VER) && _MSC_VER >= 1400) && (defined(_M_IX86) || defined(_M_X64))) || ((defined(__i386__) || defined(__x86_64__)) && defined(__SSE2__))
+#if ((defined(_MSC_VER) && _MSC_VER >= 1400) && defined(_M_X64)) || ((defined(__i386) || defined(_M_IX86) || defined(__i386__) || defined(__x86_64__)) && ((defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(__SSE2__)))
 #if defined(_MSC_VER)
 #include <intrin.h>
 #endif
@@ -670,7 +680,7 @@ static int drmp3_have_simd(void)
     return g_have_simd - 1;
 #endif
 }
-#elif defined(__ARM_NEON) || defined(__aarch64__)
+#elif defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
 #include <arm_neon.h>
 #define DRMP3_HAVE_SSE 0
 #define DRMP3_HAVE_SIMD 1
@@ -703,17 +713,44 @@ static int drmp3_have_simd(void)
 
 #endif
 
-#if defined(__ARM_ARCH) && (__ARM_ARCH >= 6) && !defined(__aarch64__)
+#if defined(__ARM_ARCH) && (__ARM_ARCH >= 6) && !defined(__aarch64__) && !defined(_M_ARM64) && !defined(_M_ARM64EC) && !defined(__ARM_ARCH_6M__)
 #define DRMP3_HAVE_ARMV6 1
-static __inline__ __attribute__((always_inline)) drmp3_int32 drmp3_clip_int16_arm(int32_t a)
+static __inline__ __attribute__((always_inline)) drmp3_int32 drmp3_clip_int16_arm(drmp3_int32 a)
 {
     drmp3_int32 x = 0;
     __asm__ ("ssat %0, #16, %1" : "=r"(x) : "r"(a));
     return x;
 }
+#else
+#define DRMP3_HAVE_ARMV6 0
 #endif
 
 
+/* Standard library stuff. */
+#ifndef DRMP3_ASSERT
+#include <assert.h>
+#define DRMP3_ASSERT(expression) assert(expression)
+#endif
+#ifndef DRMP3_COPY_MEMORY
+#define DRMP3_COPY_MEMORY(dst, src, sz) memcpy((dst), (src), (sz))
+#endif
+#ifndef DRMP3_MOVE_MEMORY
+#define DRMP3_MOVE_MEMORY(dst, src, sz) memmove((dst), (src), (sz))
+#endif
+#ifndef DRMP3_ZERO_MEMORY
+#define DRMP3_ZERO_MEMORY(p, sz) memset((p), 0, (sz))
+#endif
+#define DRMP3_ZERO_OBJECT(p) DRMP3_ZERO_MEMORY((p), sizeof(*(p)))
+#ifndef DRMP3_MALLOC
+#define DRMP3_MALLOC(sz) malloc((sz))
+#endif
+#ifndef DRMP3_REALLOC
+#define DRMP3_REALLOC(p, sz) realloc((p), (sz))
+#endif
+#ifndef DRMP3_FREE
+#define DRMP3_FREE(p) free((p))
+#endif
+
 typedef struct
 {
     const drmp3_uint8 *buf;
@@ -980,7 +1017,7 @@ static int drmp3_L12_dequantize_granule(float *grbuf, drmp3_bs *bs, drmp3_L12_sc
 static void drmp3_L12_apply_scf_384(drmp3_L12_scale_info *sci, const float *scf, float *dst)
 {
     int i, k;
-    memcpy(dst + 576 + sci->stereo_bands*18, dst + sci->stereo_bands*18, (sci->total_bands - sci->stereo_bands)*18*sizeof(float));
+    DRMP3_COPY_MEMORY(dst + 576 + sci->stereo_bands*18, dst + sci->stereo_bands*18, (sci->total_bands - sci->stereo_bands)*18*sizeof(float));
     for (i = 0; i < sci->total_bands; i++, dst += 18, scf += 6)
     {
         for (k = 0; k < 12; k++)
@@ -1125,14 +1162,14 @@ static void drmp3_L3_read_scalefactors(drmp3_uint8 *scf, drmp3_uint8 *ist_pos, c
         int cnt = scf_count[i];
         if (scfsi & 8)
         {
-            memcpy(scf, ist_pos, cnt);
+            DRMP3_COPY_MEMORY(scf, ist_pos, cnt);
         } else
         {
             int bits = scf_size[i];
             if (!bits)
             {
-                memset(scf, 0, cnt);
-                memset(ist_pos, 0, cnt);
+                DRMP3_ZERO_MEMORY(scf, cnt);
+                DRMP3_ZERO_MEMORY(ist_pos, cnt);
             } else
             {
                 int max_scf = (scfsi < 0) ? (1 << bits) - 1 : -1;
@@ -1273,7 +1310,7 @@ static void drmp3_L3_huffman(float *dst, drmp3_bs *bs, const drmp3_L3_gr_info *g
     static const drmp3_int16 tabindex[2*16] = { 0,32,64,98,0,132,180,218,292,364,426,538,648,746,0,1126,1460,1460,1460,1460,1460,1460,1460,1460,1842,1842,1842,1842,1842,1842,1842,1842 };
     static const drmp3_uint8 g_linbits[] =  { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,6,8,10,13,4,5,6,7,8,9,11,13 };
 
-#define DRMP3_PEEK_BITS(n)    (bs_cache >> (32 - n))
+#define DRMP3_PEEK_BITS(n)    (bs_cache >> (32 - (n)))
 #define DRMP3_FLUSH_BITS(n)   { bs_cache <<= (n); bs_sh += (n); }
 #define DRMP3_CHECK_BITS      while (bs_sh >= 0) { bs_cache |= (drmp3_uint32)*bs_next_ptr++ << bs_sh; bs_sh -= 8; }
 #define DRMP3_BSPOS           ((bs_next_ptr - bs->buf)*8 - 24 + bs_sh)
@@ -1392,12 +1429,22 @@ static void drmp3_L3_midside_stereo(float *left, int n)
     int i = 0;
     float *right = left + 576;
 #if DRMP3_HAVE_SIMD
-    if (drmp3_have_simd()) for (; i < n - 3; i += 4)
+    if (drmp3_have_simd())
     {
-        drmp3_f4 vl = DRMP3_VLD(left + i);
-        drmp3_f4 vr = DRMP3_VLD(right + i);
-        DRMP3_VSTORE(left + i, DRMP3_VADD(vl, vr));
-        DRMP3_VSTORE(right + i, DRMP3_VSUB(vl, vr));
+        for (; i < n - 3; i += 4)
+        {
+            drmp3_f4 vl = DRMP3_VLD(left + i);
+            drmp3_f4 vr = DRMP3_VLD(right + i);
+            DRMP3_VSTORE(left + i, DRMP3_VADD(vl, vr));
+            DRMP3_VSTORE(right + i, DRMP3_VSUB(vl, vr));
+        }
+#ifdef __GNUC__
+        /* Workaround for spurious -Waggressive-loop-optimizations warning from gcc.
+         * For more info see: https://github.com/lieff/minimp3/issues/88
+         */
+        if (__builtin_constant_p(n % 4 == 0) && n % 4 == 0)
+            return;
+#endif
     }
 #endif
     for (; i < n; i++)
@@ -1507,7 +1554,7 @@ static void drmp3_L3_reorder(float *grbuf, float *scratch, const drmp3_uint8 *sf
             *dst++ = src[2*len];
         }
     }
-    memcpy(grbuf, scratch, (dst - scratch)*sizeof(float));
+    DRMP3_COPY_MEMORY(grbuf, scratch, (dst - scratch)*sizeof(float));
 }
 
 static void drmp3_L3_antialias(float *grbuf, int nbands)
@@ -1676,8 +1723,8 @@ static void drmp3_L3_imdct_short(float *grbuf, float *overlap, int nbands)
     for (;nbands > 0; nbands--, overlap += 9, grbuf += 18)
     {
         float tmp[18];
-        memcpy(tmp, grbuf, sizeof(tmp));
-        memcpy(grbuf, overlap, 6*sizeof(float));
+        DRMP3_COPY_MEMORY(tmp, grbuf, sizeof(tmp));
+        DRMP3_COPY_MEMORY(grbuf, overlap, 6*sizeof(float));
         drmp3_L3_imdct12(tmp, grbuf + 6, overlap + 6);
         drmp3_L3_imdct12(tmp + 1, grbuf + 12, overlap + 6);
         drmp3_L3_imdct12(tmp + 2, overlap, overlap + 6);
@@ -1721,7 +1768,7 @@ static void drmp3_L3_save_reservoir(drmp3dec *h, drmp3dec_scratch *s)
     }
     if (remains > 0)
     {
-        memmove(h->reserv_buf, s->maindata + pos, remains);
+        DRMP3_MOVE_MEMORY(h->reserv_buf, s->maindata + pos, remains);
     }
     h->reserv = remains;
 }
@@ -1730,8 +1777,8 @@ static int drmp3_L3_restore_reservoir(drmp3dec *h, drmp3_bs *bs, drmp3dec_scratc
 {
     int frame_bytes = (bs->limit - bs->pos)/8;
     int bytes_have = DRMP3_MIN(h->reserv, main_data_begin);
-    memcpy(s->maindata, h->reserv_buf + DRMP3_MAX(0, h->reserv - main_data_begin), DRMP3_MIN(h->reserv, main_data_begin));
-    memcpy(s->maindata + bytes_have, bs->buf + bs->pos/8, frame_bytes);
+    DRMP3_COPY_MEMORY(s->maindata, h->reserv_buf + DRMP3_MAX(0, h->reserv - main_data_begin), DRMP3_MIN(h->reserv, main_data_begin));
+    DRMP3_COPY_MEMORY(s->maindata + bytes_have, bs->buf + bs->pos/8, frame_bytes);
     drmp3_bs_init(&s->bs, s->maindata, bytes_have + frame_bytes);
     return h->reserv >= main_data_begin;
 }
@@ -1831,7 +1878,7 @@ static void drmp3d_DCT_II(float *grbuf, int n)
 #if DRMP3_HAVE_SSE
 #define DRMP3_VSAVE2(i, v) _mm_storel_pi((__m64 *)(void*)&y[i*18], v)
 #else
-#define DRMP3_VSAVE2(i, v) vst1_f32((float32_t *)&y[i*18],  vget_low_f32(v))
+#define DRMP3_VSAVE2(i, v) vst1_f32((float32_t *)&y[(i)*18],  vget_low_f32(v))
 #endif
             for (i = 0; i < 7; i++, y += 4*18)
             {
@@ -1847,7 +1894,7 @@ static void drmp3d_DCT_II(float *grbuf, int n)
             DRMP3_VSAVE2(3, t[3][7]);
         } else
         {
-#define DRMP3_VSAVE4(i, v) DRMP3_VSTORE(&y[i*18], v)
+#define DRMP3_VSAVE4(i, v) DRMP3_VSTORE(&y[(i)*18], v)
             for (i = 0; i < 7; i++, y += 4*18)
             {
                 drmp3_f4 s = DRMP3_VADD(t[3][i], t[3][i + 1]);
@@ -1864,7 +1911,7 @@ static void drmp3d_DCT_II(float *grbuf, int n)
     } else
 #endif
 #ifdef DR_MP3_ONLY_SIMD
-    {}
+    {} /* for HAVE_SIMD=1, MINIMP3_ONLY_SIMD=1 case we do not need non-intrinsic "else" branch */
 #else
     for (; k < n; k++)
     {
@@ -1938,8 +1985,8 @@ static drmp3_int16 drmp3d_scale_pcm(float sample)
     s32 -= (s32 < 0);
     s = (drmp3_int16)drmp3_clip_int16_arm(s32);
 #else
-    if (sample >=  32766.5) return (drmp3_int16) 32767;
-    if (sample <= -32767.5) return (drmp3_int16)-32768;
+    if (sample >=  32766.5f) return (drmp3_int16) 32767;
+    if (sample <= -32767.5f) return (drmp3_int16)-32768;
     s = (drmp3_int16)(sample + .5f);
     s -= (s < 0);   /* away from zero, to be compliant */
 #endif
@@ -2070,7 +2117,11 @@ static void drmp3d_synth(float *xl, drmp3d_sample_t *dstl, int nch, float *lins)
             vst1_lane_s16(dstl + (49 + i)*nch, pcmb, 2);
 #endif
 #else
+        #if DRMP3_HAVE_SSE
             static const drmp3_f4 g_scale = { 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f, 1.0f/32768.0f };
+        #else
+            const drmp3_f4 g_scale = vdupq_n_f32(1.0f/32768.0f);
+        #endif
             a = DRMP3_VMUL(a, g_scale);
             b = DRMP3_VMUL(b, g_scale);
 #if DRMP3_HAVE_SSE
@@ -2097,7 +2148,7 @@ static void drmp3d_synth(float *xl, drmp3d_sample_t *dstl, int nch, float *lins)
     } else
 #endif
 #ifdef DR_MP3_ONLY_SIMD
-    {}
+    {} /* for HAVE_SIMD=1, MINIMP3_ONLY_SIMD=1 case we do not need non-intrinsic "else" branch */
 #else
     for (i = 14; i >= 0; i--)
     {
@@ -2138,7 +2189,7 @@ static void drmp3d_synth_granule(float *qmf_state, float *grbuf, int nbands, int
         drmp3d_DCT_II(grbuf + 576*i, nbands);
     }
 
-    memcpy(lins, qmf_state, sizeof(float)*15*64);
+    DRMP3_COPY_MEMORY(lins, qmf_state, sizeof(float)*15*64);
 
     for (i = 0; i < nbands; i += 2)
     {
@@ -2154,7 +2205,7 @@ static void drmp3d_synth_granule(float *qmf_state, float *grbuf, int nbands, int
     } else
 #endif
     {
-        memcpy(qmf_state, lins + nbands*64, sizeof(float)*15*64);
+        DRMP3_COPY_MEMORY(qmf_state, lins + nbands*64, sizeof(float)*15*64);
     }
 }
 
@@ -2220,7 +2271,7 @@ DRMP3_API int drmp3dec_decode_frame(drmp3dec *dec, const drmp3_uint8 *mp3, int m
     int i = 0, igr, frame_size = 0, success = 1;
     const drmp3_uint8 *hdr;
     drmp3_bs bs_frame[1];
-    static drmp3dec_scratch scratch;
+    drmp3dec_scratch scratch;
 
     if (mp3_bytes > 4 && dec->header[0] == 0xff && drmp3_hdr_compare(dec->header, mp3))
     {
@@ -2232,7 +2283,7 @@ DRMP3_API int drmp3dec_decode_frame(drmp3dec *dec, const drmp3_uint8 *mp3, int m
     }
     if (!frame_size)
     {
-        memset(dec, 0, sizeof(drmp3dec));
+        DRMP3_ZERO_MEMORY(dec, sizeof(drmp3dec));
         i = drmp3d_find_frame(mp3, mp3_bytes, &dec->free_format_bytes, &frame_size);
         if (!frame_size || i + frame_size > mp3_bytes)
         {
@@ -2242,7 +2293,7 @@ DRMP3_API int drmp3dec_decode_frame(drmp3dec *dec, const drmp3_uint8 *mp3, int m
     }
 
     hdr = mp3 + i;
-    memcpy(dec->header, hdr, DRMP3_HDR_SIZE);
+    DRMP3_COPY_MEMORY(dec->header, hdr, DRMP3_HDR_SIZE);
     info->frame_bytes = i + frame_size;
     info->channels = DRMP3_HDR_IS_MONO(hdr) ? 1 : 2;
     info->hz = drmp3_hdr_sample_rate_hz(hdr);
@@ -2268,7 +2319,7 @@ DRMP3_API int drmp3dec_decode_frame(drmp3dec *dec, const drmp3_uint8 *mp3, int m
         {
             for (igr = 0; igr < (DRMP3_HDR_TEST_MPEG1(hdr) ? 2 : 1); igr++, pcm = DRMP3_OFFSET_PTR(pcm, sizeof(drmp3d_sample_t)*576*info->channels))
             {
-                memset(scratch.grbuf[0], 0, 576*2*sizeof(float));
+                DRMP3_ZERO_MEMORY(scratch.grbuf[0], 576*2*sizeof(float));
                 drmp3_L3_decode(dec, &scratch, scratch.gr_info + igr*info->channels, info->channels);
                 drmp3d_synth_granule(dec->qmf_state, scratch.grbuf[0], 18, info->channels, (drmp3d_sample_t*)pcm, scratch.syn[0]);
             }
@@ -2287,7 +2338,7 @@ DRMP3_API int drmp3dec_decode_frame(drmp3dec *dec, const drmp3_uint8 *mp3, int m
 
         drmp3_L12_read_scale_info(hdr, bs_frame, sci);
 
-        memset(scratch.grbuf[0], 0, 576*2*sizeof(float));
+        DRMP3_ZERO_MEMORY(scratch.grbuf[0], 576*2*sizeof(float));
         for (i = 0, igr = 0; igr < 3; igr++)
         {
             if (12 == (i += drmp3_L12_dequantize_granule(scratch.grbuf[0] + i, bs_frame, sci, info->layer | 1)))
@@ -2295,7 +2346,7 @@ DRMP3_API int drmp3dec_decode_frame(drmp3dec *dec, const drmp3_uint8 *mp3, int m
                 i = 0;
                 drmp3_L12_apply_scf_384(sci, sci->scf + igr, scratch.grbuf[0]);
                 drmp3d_synth_granule(dec->qmf_state, scratch.grbuf[0], 12, info->channels, (drmp3d_sample_t*)pcm, scratch.syn[0]);
-                memset(scratch.grbuf[0], 0, 576*2*sizeof(float));
+                DRMP3_ZERO_MEMORY(scratch.grbuf[0], 576*2*sizeof(float));
                 pcm = DRMP3_OFFSET_PTR(pcm, sizeof(drmp3d_sample_t)*384*info->channels);
             }
             if (bs_frame->pos > bs_frame->limit)
@@ -2353,9 +2404,9 @@ DRMP3_API void drmp3dec_f32_to_s16(const float *in, drmp3_int16 *out, size_t num
     for(; i < num_samples; i++)
     {
         float sample = in[i] * 32768.0f;
-        if (sample >=  32766.5)
+        if (sample >=  32766.5f)
             out[i] = (drmp3_int16) 32767;
-        else if (sample <= -32767.5)
+        else if (sample <= -32767.5f)
             out[i] = (drmp3_int16)-32768;
         else
         {
@@ -2373,8 +2424,7 @@ DRMP3_API void drmp3dec_f32_to_s16(const float *in, drmp3_int16 *out, size_t num
  Main Public API
 
  ************************************************************************************************************************************************************/
-#include <math.h>   /* For sin() and exp(). */
-
+/* SIZE_MAX */
 #if defined(SIZE_MAX)
     #define DRMP3_SIZE_MAX  SIZE_MAX
 #else
@@ -2384,6 +2434,7 @@ DRMP3_API void drmp3dec_f32_to_s16(const float *in, drmp3_int16 *out, size_t num
         #define DRMP3_SIZE_MAX  0xFFFFFFFF
     #endif
 #endif
+/* End SIZE_MAX */
 
 /* Options. */
 #ifndef DRMP3_SEEK_LEADING_MP3_FRAMES
@@ -2394,32 +2445,10 @@ DRMP3_API void drmp3dec_f32_to_s16(const float *in, drmp3_int16 *out, size_t num
 
 /* The size in bytes of each chunk of data to read from the MP3 stream. minimp3 recommends at least 16K, but in an attempt to reduce data movement I'm making this slightly larger. */
 #ifndef DRMP3_DATA_CHUNK_SIZE
-#define DRMP3_DATA_CHUNK_SIZE  DRMP3_MIN_DATA_CHUNK_SIZE*4
+#define DRMP3_DATA_CHUNK_SIZE  (DRMP3_MIN_DATA_CHUNK_SIZE*4)
 #endif
 
 
-/* Standard library stuff. */
-#ifndef DRMP3_ASSERT
-#include <assert.h>
-#define DRMP3_ASSERT(expression) assert(expression)
-#endif
-#ifndef DRMP3_COPY_MEMORY
-#define DRMP3_COPY_MEMORY(dst, src, sz) memcpy((dst), (src), (sz))
-#endif
-#ifndef DRMP3_ZERO_MEMORY
-#define DRMP3_ZERO_MEMORY(p, sz) memset((p), 0, (sz))
-#endif
-#define DRMP3_ZERO_OBJECT(p) DRMP3_ZERO_MEMORY((p), sizeof(*(p)))
-#ifndef DRMP3_MALLOC
-#define DRMP3_MALLOC(sz) malloc((sz))
-#endif
-#ifndef DRMP3_REALLOC
-#define DRMP3_REALLOC(p, sz) realloc((p), (sz))
-#endif
-#ifndef DRMP3_FREE
-#define DRMP3_FREE(p) free((p))
-#endif
-
 #define DRMP3_COUNTOF(x)        (sizeof(x) / sizeof(x[0]))
 #define DRMP3_CLAMP(x, lo, hi)  (DRMP3_MAX(lo, DRMP3_MIN(x, hi)))
 
@@ -2461,24 +2490,6 @@ static DRMP3_INLINE drmp3_uint32 drmp3_gcf_u32(drmp3_uint32 a, drmp3_uint32 b)
 }
 
 
-static DRMP3_INLINE double drmp3_sin(double x)
-{
-    /* TODO: Implement custom sin(x). */
-    return sin(x);
-}
-
-static DRMP3_INLINE double drmp3_exp(double x)
-{
-    /* TODO: Implement custom exp(x). */
-    return exp(x);
-}
-
-static DRMP3_INLINE double drmp3_cos(double x)
-{
-    return drmp3_sin((DRMP3_PI_D*0.5) - x);
-}
-
-
 static void* drmp3__malloc_default(size_t sz, void* pUserData)
 {
     (void)pUserData;
@@ -2650,7 +2661,10 @@ static drmp3_uint32 drmp3_decode_next_frame_ex__callbacks(drmp3* pMP3, drmp3d_sa
             size_t bytesRead;
 
             /* First we need to move the data down. */
-            memmove(pMP3->pData, pMP3->pData + pMP3->dataConsumed, pMP3->dataSize);
+            if (pMP3->pData != NULL) {
+                DRMP3_MOVE_MEMORY(pMP3->pData, pMP3->pData + pMP3->dataConsumed, pMP3->dataSize);
+            }
+
             pMP3->dataConsumed = 0;
 
             if (pMP3->dataCapacity < DRMP3_DATA_CHUNK_SIZE) {
@@ -2684,6 +2698,14 @@ static drmp3_uint32 drmp3_decode_next_frame_ex__callbacks(drmp3* pMP3, drmp3d_sa
             return 0; /* File too big. */
         }
 
+        DRMP3_ASSERT(pMP3->pData != NULL);
+        DRMP3_ASSERT(pMP3->dataCapacity > 0);
+
+        /* Do a runtime check here to try silencing a false-positive from clang-analyzer. */
+        if (pMP3->pData == NULL) {
+            return 0;
+        }
+
         pcmFramesRead = drmp3dec_decode_frame(&pMP3->decoder, pMP3->pData + pMP3->dataConsumed, (int)pMP3->dataSize, pPCMFrames, &info);    /* <-- Safe size_t -> int conversion thanks to the check above. */
 
         /* Consume the data. */
@@ -2705,7 +2727,7 @@ static drmp3_uint32 drmp3_decode_next_frame_ex__callbacks(drmp3* pMP3, drmp3d_sa
             size_t bytesRead;
 
             /* First we need to move the data down. */
-            memmove(pMP3->pData, pMP3->pData + pMP3->dataConsumed, pMP3->dataSize);
+            DRMP3_MOVE_MEMORY(pMP3->pData, pMP3->pData + pMP3->dataConsumed, pMP3->dataSize);
             pMP3->dataConsumed = 0;
 
             if (pMP3->dataCapacity == pMP3->dataSize) {
@@ -2750,12 +2772,22 @@ static drmp3_uint32 drmp3_decode_next_frame_ex__memory(drmp3* pMP3, drmp3d_sampl
         return 0;
     }
 
-    pcmFramesRead = drmp3dec_decode_frame(&pMP3->decoder, pMP3->memory.pData + pMP3->memory.currentReadPos, (int)(pMP3->memory.dataSize - pMP3->memory.currentReadPos), pPCMFrames, &info);
-    if (pcmFramesRead > 0) {
-        pMP3->pcmFramesConsumedInMP3Frame  = 0;
-        pMP3->pcmFramesRemainingInMP3Frame = pcmFramesRead;
-        pMP3->mp3FrameChannels             = info.channels;
-        pMP3->mp3FrameSampleRate           = info.hz;
+    for (;;) {
+        pcmFramesRead = drmp3dec_decode_frame(&pMP3->decoder, pMP3->memory.pData + pMP3->memory.currentReadPos, (int)(pMP3->memory.dataSize - pMP3->memory.currentReadPos), pPCMFrames, &info);
+        if (pcmFramesRead > 0) {
+            pcmFramesRead = drmp3_hdr_frame_samples(pMP3->decoder.header);
+            pMP3->pcmFramesConsumedInMP3Frame  = 0;
+            pMP3->pcmFramesRemainingInMP3Frame = pcmFramesRead;
+            pMP3->mp3FrameChannels             = info.channels;
+            pMP3->mp3FrameSampleRate           = info.hz;
+            break;
+        } else if (info.frame_bytes > 0) {
+            /* No frames were read, but it looks like we skipped past one. Read the next MP3 frame. */
+            pMP3->memory.currentReadPos += (size_t)info.frame_bytes;
+        } else {
+            /* Nothing at all was read. Abort. */
+            break;
+        }
     }
 
     /* Consume the data. */
@@ -2818,8 +2850,8 @@ static drmp3_bool32 drmp3_init_internal(drmp3* pMP3, drmp3_read_proc onRead, drm
     }
 
     /* Decode the first frame to confirm that it is indeed a valid MP3 stream. */
-    if (!drmp3_decode_next_frame(pMP3)) {
-        drmp3_uninit(pMP3);
+    if (drmp3_decode_next_frame(pMP3) == 0) {
+        drmp3__free_from_callbacks(pMP3->pData, &pMP3->allocationCallbacks);    /* The call above may have allocated memory. Need to make sure it's freed before aborting. */
         return DRMP3_FALSE; /* Not a valid MP3 stream. */
     }
 
@@ -2915,6 +2947,7 @@ DRMP3_API drmp3_bool32 drmp3_init_memory(drmp3* pMP3, const void* pData, size_t
 #include <stdio.h>
 #include <wchar.h>      /* For wcslen(), wcsrtombs() */
 
+/* Errno */
 /* drmp3_result_from_errno() is only used inside DR_MP3_NO_STDIO for now. Move this out if it's ever used elsewhere. */
 #include <errno.h>
 static drmp3_result drmp3_result_from_errno(int e)
@@ -3318,10 +3351,12 @@ static drmp3_result drmp3_result_from_errno(int e)
         default: return DRMP3_ERROR;
     }
 }
+/* End Errno */
 
+/* fopen */
 static drmp3_result drmp3_fopen(FILE** ppFile, const char* pFilePath, const char* pOpenMode)
 {
-#if _MSC_VER && _MSC_VER >= 1400
+#if defined(_MSC_VER) && _MSC_VER >= 1400
     errno_t err;
 #endif
 
@@ -3333,7 +3368,7 @@ static drmp3_result drmp3_fopen(FILE** ppFile, const char* pFilePath, const char
         return DRMP3_INVALID_ARGS;
     }
 
-#if _MSC_VER && _MSC_VER >= 1400
+#if defined(_MSC_VER) && _MSC_VER >= 1400
     err = fopen_s(ppFile, pFilePath, pOpenMode);
     if (err != 0) {
         return drmp3_result_from_errno(err);
@@ -3368,12 +3403,13 @@ _wfopen() isn't always available in all compilation environments.
     * MSVC seems to support it universally as far back as VC6 from what I can tell (haven't checked further back).
     * MinGW-64 (both 32- and 64-bit) seems to support it.
     * MinGW wraps it in !defined(__STRICT_ANSI__).
+    * OpenWatcom wraps it in !defined(_NO_EXT_KEYS).
 
 This can be reviewed as compatibility issues arise. The preference is to use _wfopen_s() and _wfopen() as opposed to the wcsrtombs()
 fallback, so if you notice your compiler not detecting this properly I'm happy to look at adding support.
 */
 #if defined(_WIN32)
-    #if defined(_MSC_VER) || defined(__MINGW64__) || !defined(__STRICT_ANSI__)
+    #if defined(_MSC_VER) || defined(__MINGW64__) || (!defined(__STRICT_ANSI__) && !defined(_NO_EXT_KEYS))
         #define DRMP3_HAS_WFOPEN
     #endif
 #endif
@@ -3406,10 +3442,23 @@ static drmp3_result drmp3_wfopen(FILE** ppFile, const wchar_t* pFilePath, const
     }
 #else
     /*
-    Use fopen() on anything other than Windows. Requires a conversion. This is annoying because fopen() is locale specific. The only real way I can
-    think of to do this is with wcsrtombs(). Note that wcstombs() is apparently not thread-safe because it uses a static global mbstate_t object for
-    maintaining state. I've checked this with -std=c89 and it works, but if somebody get's a compiler error I'll look into improving compatibility.
+    Use fopen() on anything other than Windows. Requires a conversion. This is annoying because
+	fopen() is locale specific. The only real way I can think of to do this is with wcsrtombs(). Note
+	that wcstombs() is apparently not thread-safe because it uses a static global mbstate_t object for
+    maintaining state. I've checked this with -std=c89 and it works, but if somebody get's a compiler
+	error I'll look into improving compatibility.
     */
+
+	/*
+	Some compilers don't support wchar_t or wcsrtombs() which we're using below. In this case we just
+	need to abort with an error. If you encounter a compiler lacking such support, add it to this list
+	and submit a bug report and it'll be added to the library upstream.
+	*/
+	#if defined(__DJGPP__)
+	{
+		/* Nothing to do here. This will fall through to the error check below. */
+	}
+	#else
     {
         mbstate_t mbs;
         size_t lenMB;
@@ -3451,6 +3500,7 @@ static drmp3_result drmp3_wfopen(FILE** ppFile, const wchar_t* pFilePath, const
 
         drmp3__free_from_callbacks(pFilePathMB, pAllocationCallbacks);
     }
+	#endif
 
     if (*ppFile == NULL) {
         return DRMP3_ERROR;
@@ -3459,7 +3509,7 @@ static drmp3_result drmp3_wfopen(FILE** ppFile, const wchar_t* pFilePath, const
 
     return DRMP3_SUCCESS;
 }
-
+/* End fopen */
 
 
 static size_t drmp3__on_read_stdio(void* pUserData, void* pBufferOut, size_t bytesToRead)
@@ -3474,22 +3524,38 @@ static drmp3_bool32 drmp3__on_seek_stdio(void* pUserData, int offset, drmp3_seek
 
 DRMP3_API drmp3_bool32 drmp3_init_file(drmp3* pMP3, const char* pFilePath, const drmp3_allocation_callbacks* pAllocationCallbacks)
 {
+    drmp3_bool32 result;
     FILE* pFile;
+
     if (drmp3_fopen(&pFile, pFilePath, "rb") != DRMP3_SUCCESS) {
         return DRMP3_FALSE;
     }
 
-    return drmp3_init(pMP3, drmp3__on_read_stdio, drmp3__on_seek_stdio, (void*)pFile, pAllocationCallbacks);
+    result = drmp3_init(pMP3, drmp3__on_read_stdio, drmp3__on_seek_stdio, (void*)pFile, pAllocationCallbacks);
+    if (result != DRMP3_TRUE) {
+        fclose(pFile);
+        return result;
+    }
+
+    return DRMP3_TRUE;
 }
 
 DRMP3_API drmp3_bool32 drmp3_init_file_w(drmp3* pMP3, const wchar_t* pFilePath, const drmp3_allocation_callbacks* pAllocationCallbacks)
 {
+    drmp3_bool32 result;
     FILE* pFile;
+
     if (drmp3_wfopen(&pFile, pFilePath, L"rb", pAllocationCallbacks) != DRMP3_SUCCESS) {
         return DRMP3_FALSE;
     }
 
-    return drmp3_init(pMP3, drmp3__on_read_stdio, drmp3__on_seek_stdio, (void*)pFile, pAllocationCallbacks);
+    result = drmp3_init(pMP3, drmp3__on_read_stdio, drmp3__on_seek_stdio, (void*)pFile, pAllocationCallbacks);
+    if (result != DRMP3_TRUE) {
+        fclose(pFile);
+        return result;
+    }
+
+    return DRMP3_TRUE;
 }
 #endif
 
@@ -3498,10 +3564,14 @@ DRMP3_API void drmp3_uninit(drmp3* pMP3)
     if (pMP3 == NULL) {
         return;
     }
-    
+
 #ifndef DR_MP3_NO_STDIO
     if (pMP3->onRead == drmp3__on_read_stdio) {
-        fclose((FILE*)pMP3->pUserData);
+        FILE* pFile = (FILE*)pMP3->pUserData;
+        if (pFile != NULL) {
+            fclose(pFile);
+            pMP3->pUserData = NULL; /* Make sure the file handle is cleared to NULL to we don't attempt to close it a second time. */
+        }
     }
 #endif
 
@@ -3642,7 +3712,7 @@ DRMP3_API drmp3_uint64 drmp3_read_pcm_frames_f32(drmp3* pMP3, drmp3_uint64 frame
                 break;
             }
 
-            drmp3_s16_to_f32((float*)DRMP3_OFFSET_PTR(pBufferOut, sizeof(drmp3_int16) * totalPCMFramesRead * pMP3->channels), pTempS16, framesJustRead * pMP3->channels);
+            drmp3_s16_to_f32((float*)DRMP3_OFFSET_PTR(pBufferOut, sizeof(float) * totalPCMFramesRead * pMP3->channels), pTempS16, framesJustRead * pMP3->channels);
             totalPCMFramesRead += framesJustRead;
         }
 
@@ -3882,7 +3952,7 @@ DRMP3_API drmp3_bool32 drmp3_get_mp3_and_pcm_frame_count(drmp3* pMP3, drmp3_uint
 
     /* We'll need to seek back to where we were, so grab the PCM frame we're currently sitting on so we can restore later. */
     currentPCMFrame = pMP3->currentPCMFrame;
-    
+
     if (!drmp3_seek_to_start_of_stream(pMP3)) {
         return DRMP3_FALSE;
     }
@@ -3980,7 +4050,7 @@ DRMP3_API drmp3_bool32 drmp3_calculate_seek_points(drmp3* pMP3, drmp3_uint32* pS
 
     /* We'll need to seek back to the current sample after calculating the seekpoints so we need to go ahead and grab the current location at the top. */
     currentPCMFrame = pMP3->currentPCMFrame;
-    
+
     /* We never do more than the total number of MP3 frames and we limit it to 32-bits. */
     if (!drmp3_get_mp3_and_pcm_frame_count(pMP3, &totalMP3FrameCount, &totalPCMFrameCount)) {
         return DRMP3_FALSE;
@@ -4152,7 +4222,7 @@ static float* drmp3__full_read_and_close_f32(drmp3* pMP3, drmp3_config* pConfig,
 
             oldFramesBufferSize = framesCapacity * pMP3->channels * sizeof(float);
             newFramesBufferSize = newFramesCap   * pMP3->channels * sizeof(float);
-            if (newFramesBufferSize > DRMP3_SIZE_MAX) {
+            if (newFramesBufferSize > (drmp3_uint64)DRMP3_SIZE_MAX) {
                 break;
             }
 
@@ -4219,7 +4289,7 @@ static drmp3_int16* drmp3__full_read_and_close_s16(drmp3* pMP3, drmp3_config* pC
 
             oldFramesBufferSize = framesCapacity * pMP3->channels * sizeof(drmp3_int16);
             newFramesBufferSize = newFramesCap   * pMP3->channels * sizeof(drmp3_int16);
-            if (newFramesBufferSize > DRMP3_SIZE_MAX) {
+            if (newFramesBufferSize > (drmp3_uint64)DRMP3_SIZE_MAX) {
                 break;
             }
 
@@ -4339,7 +4409,8 @@ DRMP3_API void drmp3_free(void* p, const drmp3_allocation_callbacks* pAllocation
     }
 }
 
-#endif /*DR_MP3_IMPLEMENTATION*/
+#endif  /* dr_mp3_c */
+#endif  /*DR_MP3_IMPLEMENTATION*/
 
 /*
 DIFFERENCES BETWEEN minimp3 AND dr_mp3
@@ -4424,6 +4495,99 @@ counts rather than sample counts.
 /*
 REVISION HISTORY
 ================
+v0.6.40 - 2024-12-17
+  - Improve detection of ARM64EC
+
+v0.6.39 - 2024-02-27
+  - Fix a Wdouble-promotion warning.
+
+v0.6.38 - 2023-11-02
+  - Fix build for ARMv6-M.
+
+v0.6.37 - 2023-07-07
+  - Silence a static analysis warning.
+
+v0.6.36 - 2023-06-17
+  - Fix an incorrect date in revision history. No functional change.
+
+v0.6.35 - 2023-05-22
+  - Minor code restructure. No functional change.
+
+v0.6.34 - 2022-09-17
+  - Fix compilation with DJGPP.
+  - Fix compilation when compiling with x86 with no SSE2.
+  - Remove an unnecessary variable from the drmp3 structure.
+
+v0.6.33 - 2022-04-10
+  - Fix compilation error with the MSVC ARM64 build.
+  - Fix compilation error on older versions of GCC.
+  - Remove some unused functions.
+
+v0.6.32 - 2021-12-11
+  - Fix a warning with Clang.
+
+v0.6.31 - 2021-08-22
+  - Fix a bug when loading from memory.
+
+v0.6.30 - 2021-08-16
+  - Silence some warnings.
+  - Replace memory operations with DRMP3_* macros.
+
+v0.6.29 - 2021-08-08
+  - Bring up to date with minimp3.
+
+v0.6.28 - 2021-07-31
+  - Fix platform detection for ARM64.
+  - Fix a compilation error with C89.
+
+v0.6.27 - 2021-02-21
+  - Fix a warning due to referencing _MSC_VER when it is undefined.
+
+v0.6.26 - 2021-01-31
+  - Bring up to date with minimp3.
+
+v0.6.25 - 2020-12-26
+  - Remove DRMP3_DEFAULT_CHANNELS and DRMP3_DEFAULT_SAMPLE_RATE which are leftovers from some removed APIs.
+
+v0.6.24 - 2020-12-07
+  - Fix a typo in version date for 0.6.23.
+
+v0.6.23 - 2020-12-03
+  - Fix an error where a file can be closed twice when initialization of the decoder fails.
+
+v0.6.22 - 2020-12-02
+  - Fix an error where it's possible for a file handle to be left open when initialization of the decoder fails.
+
+v0.6.21 - 2020-11-28
+  - Bring up to date with minimp3.
+
+v0.6.20 - 2020-11-21
+  - Fix compilation with OpenWatcom.
+
+v0.6.19 - 2020-11-13
+  - Minor code clean up.
+
+v0.6.18 - 2020-11-01
+  - Improve compiler support for older versions of GCC.
+
+v0.6.17 - 2020-09-28
+  - Bring up to date with minimp3.
+
+v0.6.16 - 2020-08-02
+  - Simplify sized types.
+
+v0.6.15 - 2020-07-25
+  - Fix a compilation warning.
+
+v0.6.14 - 2020-07-23
+  - Fix undefined behaviour with memmove().
+
+v0.6.13 - 2020-07-06
+  - Fix a bug when converting from s16 to f32 in drmp3_read_pcm_frames_f32().
+
+v0.6.12 - 2020-06-23
+  - Add include guard for the implementation section.
+
 v0.6.11 - 2020-05-26
   - Fix use of uninitialized variable error.
 
@@ -4650,7 +4814,7 @@ For more information, please refer to <http://unlicense.org/>
 ===============================================================================
 ALTERNATIVE 2 - MIT No Attribution
 ===============================================================================
-Copyright 2020 David Reid
+Copyright 2023 David Reid
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in
diff --git a/sys-tune/source/impl/dr_wav.h b/sys-tune/source/impl/dr_wav.h
index a1e5f58..f9f69a7 100644
--- a/sys-tune/source/impl/dr_wav.h
+++ b/sys-tune/source/impl/dr_wav.h
@@ -1,36 +1,17 @@
 /*
 WAV audio loader and writer. Choice of public domain or MIT-0. See license statements at the end of this file.
-dr_wav - v0.12.5 - 2020-05-27
+dr_wav - v0.13.17 - 2024-12-17
 
 David Reid - mackron@gmail.com
 
 GitHub: https://github.com/mackron/dr_libs
 */
 
-/*
-RELEASE NOTES - VERSION 0.12
-============================
-Version 0.12 includes breaking changes to custom chunk handling.
-
-
-Changes to Chunk Callback
--------------------------
-dr_wav supports the ability to fire a callback when a chunk is encounted (except for WAVE and FMT chunks). The callback has been updated to include both the
-container (RIFF or Wave64) and the FMT chunk which contains information about the format of the data in the wave file.
-
-Previously, there was no direct way to determine the container, and therefore no way discriminate against the different IDs in the chunk header (RIFF and
-Wave64 containers encode chunk ID's differently). The `container` parameter can be used to know which ID to use.
-
-Sometimes it can be useful to know the data format at the time the chunk callback is fired. A pointer to a `drwav_fmt` object is now passed into the chunk
-callback which will give you information about the data format. To determine the sample format, use `drwav_fmt_get_format()`. This will return one of the
-`DR_WAVE_FORMAT_*` tokens.
-*/
-
 /*
 Introduction
 ============
 This is a single file library. To use it, do something like the following in one .c file.
-    
+
     ```c
     #define DR_WAV_IMPLEMENTATION
     #include "dr_wav.h"
@@ -65,7 +46,7 @@ If you just want to quickly open and read the audio data in a single operation y
 
     ...
 
-    drwav_free(pSampleData);
+    drwav_free(pSampleData, NULL);
     ```
 
 The examples above use versions of the API that convert the audio data to a consistent format (32-bit signed PCM, in this case), but you can still output the
@@ -98,7 +79,9 @@ dr_wav can also be used to output WAV files. This does not currently support com
     drwav_uint64 framesWritten = drwav_write_pcm_frames(pWav, frameCount, pSamples);
     ```
 
-dr_wav has seamless support the Sony Wave64 format. The decoder will automatically detect it and it should Just Work without any manual intervention.
+Note that writing to AIFF or RIFX is not supported.
+
+dr_wav has support for decoding from a number of different encapsulation formats. See below for details.
 
 
 Build Options
@@ -111,24 +94,44 @@ Build Options
 #define DR_WAV_NO_STDIO
   Disables APIs that initialize a decoder from a file such as `drwav_init_file()`, `drwav_init_file_write()`, etc.
 
+#define DR_WAV_NO_WCHAR
+  Disables all functions ending with `_w`. Use this if your compiler does not provide wchar.h. Not required if DR_WAV_NO_STDIO is also defined.
+
+
+Supported Encapsulations
+========================
+- RIFF (Regular WAV)
+- RIFX (Big-Endian)
+- AIFF (Does not currently support ADPCM)
+- RF64
+- W64
+
+Note that AIFF and RIFX do not support write mode, nor do they support reading of metadata.
+
+
+Supported Encodings
+===================
+- Unsigned 8-bit PCM
+- Signed 12-bit PCM
+- Signed 16-bit PCM
+- Signed 24-bit PCM
+- Signed 32-bit PCM
+- IEEE 32-bit floating point
+- IEEE 64-bit floating point
+- A-law and u-law
+- Microsoft ADPCM
+- IMA ADPCM (DVI, format code 0x11)
+
+8-bit PCM encodings are always assumed to be unsigned. Signed 8-bit encoding can only be read with `drwav_read_raw()`.
+
+Note that ADPCM is not currently supported with AIFF. Contributions welcome.
 
 
 Notes
 =====
 - Samples are always interleaved.
 - The default read function does not do any data conversion. Use `drwav_read_pcm_frames_f32()`, `drwav_read_pcm_frames_s32()` and `drwav_read_pcm_frames_s16()`
-  to read and convert audio data to 32-bit floating point, signed 32-bit integer and signed 16-bit integer samples respectively. Tested and supported internal
-  formats include the following:
-  - Unsigned 8-bit PCM
-  - Signed 12-bit PCM
-  - Signed 16-bit PCM
-  - Signed 24-bit PCM
-  - Signed 32-bit PCM
-  - IEEE 32-bit floating point
-  - IEEE 64-bit floating point
-  - A-law and u-law
-  - Microsoft ADPCM
-  - IMA ADPCM (DVI, format code 0x11)
+  to read and convert audio data to 32-bit floating point, signed 32-bit integer and signed 16-bit integer samples respectively.
 - dr_wav will try to read the WAV file as best it can, even if it's not strictly conformant to the WAV format.
 */
 
@@ -143,47 +146,48 @@ extern "C" {
 #define DRWAV_XSTRINGIFY(x)     DRWAV_STRINGIFY(x)
 
 #define DRWAV_VERSION_MAJOR     0
-#define DRWAV_VERSION_MINOR     12
-#define DRWAV_VERSION_REVISION  5
+#define DRWAV_VERSION_MINOR     13
+#define DRWAV_VERSION_REVISION  17
 #define DRWAV_VERSION_STRING    DRWAV_XSTRINGIFY(DRWAV_VERSION_MAJOR) "." DRWAV_XSTRINGIFY(DRWAV_VERSION_MINOR) "." DRWAV_XSTRINGIFY(DRWAV_VERSION_REVISION)
 
 #include <stddef.h> /* For size_t. */
 
-/* Sized types. Prefer built-in types. Fall back to stdint. */
-#ifdef _MSC_VER
-    #if defined(__clang__)
+/* Sized Types */
+typedef   signed char           drwav_int8;
+typedef unsigned char           drwav_uint8;
+typedef   signed short          drwav_int16;
+typedef unsigned short          drwav_uint16;
+typedef   signed int            drwav_int32;
+typedef unsigned int            drwav_uint32;
+#if defined(_MSC_VER) && !defined(__clang__)
+    typedef   signed __int64    drwav_int64;
+    typedef unsigned __int64    drwav_uint64;
+#else
+    #if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)))
         #pragma GCC diagnostic push
-        #pragma GCC diagnostic ignored "-Wlanguage-extension-token"
-        #pragma GCC diagnostic ignored "-Wlong-long"        
-        #pragma GCC diagnostic ignored "-Wc++11-long-long"
-    #endif
-    typedef   signed __int8  drwav_int8;
-    typedef unsigned __int8  drwav_uint8;
-    typedef   signed __int16 drwav_int16;
-    typedef unsigned __int16 drwav_uint16;
-    typedef   signed __int32 drwav_int32;
-    typedef unsigned __int32 drwav_uint32;
-    typedef   signed __int64 drwav_int64;
-    typedef unsigned __int64 drwav_uint64;
-    #if defined(__clang__)
+        #pragma GCC diagnostic ignored "-Wlong-long"
+        #if defined(__clang__)
+            #pragma GCC diagnostic ignored "-Wc++11-long-long"
+        #endif
+    #endif
+    typedef   signed long long  drwav_int64;
+    typedef unsigned long long  drwav_uint64;
+    #if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)))
         #pragma GCC diagnostic pop
     #endif
+#endif
+#if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__)) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) || defined(__powerpc64__)
+    typedef drwav_uint64        drwav_uintptr;
 #else
-    #include <stdint.h>
-    typedef int8_t           drwav_int8;
-    typedef uint8_t          drwav_uint8;
-    typedef int16_t          drwav_int16;
-    typedef uint16_t         drwav_uint16;
-    typedef int32_t          drwav_int32;
-    typedef uint32_t         drwav_uint32;
-    typedef int64_t          drwav_int64;
-    typedef uint64_t         drwav_uint64;
+    typedef drwav_uint32        drwav_uintptr;
 #endif
-typedef drwav_uint8          drwav_bool8;
-typedef drwav_uint32         drwav_bool32;
-#define DRWAV_TRUE           1
-#define DRWAV_FALSE          0
+typedef drwav_uint8             drwav_bool8;
+typedef drwav_uint32            drwav_bool32;
+#define DRWAV_TRUE              1
+#define DRWAV_FALSE             0
+/* End Sized Types */
 
+/* Decorations */
 #if !defined(DRWAV_API)
     #if defined(DRWAV_DLL)
         #if defined(_WIN32)
@@ -213,7 +217,9 @@ typedef drwav_uint32         drwav_bool32;
         #define DRWAV_PRIVATE static
     #endif
 #endif
+/* End Decorations */
 
+/* Result Codes */
 typedef drwav_int32 drwav_result;
 #define DRWAV_SUCCESS                        0
 #define DRWAV_ERROR                         -1   /* A generic error. */
@@ -269,6 +275,7 @@ typedef drwav_int32 drwav_result;
 #define DRWAV_CANCELLED                     -51
 #define DRWAV_MEMORY_ALREADY_MAPPED         -52
 #define DRWAV_AT_END                        -53
+/* End Result Codes */
 
 /* Common data formats. */
 #define DR_WAVE_FORMAT_PCM          0x1
@@ -279,16 +286,22 @@ typedef drwav_int32 drwav_result;
 #define DR_WAVE_FORMAT_DVI_ADPCM    0x11
 #define DR_WAVE_FORMAT_EXTENSIBLE   0xFFFE
 
-/* Constants. */
-#ifndef DRWAV_MAX_SMPL_LOOPS
-#define DRWAV_MAX_SMPL_LOOPS        1
-#endif
-
 /* Flags to pass into drwav_init_ex(), etc. */
 #define DRWAV_SEQUENTIAL            0x00000001
+#define DRWAV_WITH_METADATA         0x00000002
 
 DRWAV_API void drwav_version(drwav_uint32* pMajor, drwav_uint32* pMinor, drwav_uint32* pRevision);
-DRWAV_API const char* drwav_version_string();
+DRWAV_API const char* drwav_version_string(void);
+
+/* Allocation Callbacks */
+typedef struct
+{
+    void* pUserData;
+    void* (* onMalloc)(size_t sz, void* pUserData);
+    void* (* onRealloc)(void* p, size_t sz, void* pUserData);
+    void  (* onFree)(void* p, void* pUserData);
+} drwav_allocation_callbacks;
+/* End Allocation Callbacks */
 
 typedef enum
 {
@@ -299,7 +312,10 @@ typedef enum
 typedef enum
 {
     drwav_container_riff,
-    drwav_container_w64
+    drwav_container_rifx,
+    drwav_container_w64,
+    drwav_container_rf64,
+    drwav_container_aiff
 } drwav_container;
 
 typedef struct
@@ -420,23 +436,16 @@ Returns the number of bytes read + seeked.
 To read data from the chunk, call onRead(), passing in pReadSeekUserData as the first parameter. Do the same for seeking with onSeek(). The return value must
 be the total number of bytes you have read _plus_ seeked.
 
-Use the `container` argument to discriminate the fields in `pChunkHeader->id`. If the container is `drwav_container_riff` you should use `id.fourcc`,
-otherwise you should use `id.guid`.
+Use the `container` argument to discriminate the fields in `pChunkHeader->id`. If the container is `drwav_container_riff` or `drwav_container_rf64` you should
+use `id.fourcc`, otherwise you should use `id.guid`.
 
 The `pFMT` parameter can be used to determine the data format of the wave file. Use `drwav_fmt_get_format()` to get the sample format, which will be one of the
-`DR_WAVE_FORMAT_*` identifiers. 
+`DR_WAVE_FORMAT_*` identifiers.
 
 The read pointer will be sitting on the first byte after the chunk's header. You must not attempt to read beyond the boundary of the chunk.
 */
 typedef drwav_uint64 (* drwav_chunk_proc)(void* pChunkUserData, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pReadSeekUserData, const drwav_chunk_header* pChunkHeader, drwav_container container, const drwav_fmt* pFMT);
 
-typedef struct
-{
-    void* pUserData;
-    void* (* onMalloc)(size_t sz, void* pUserData);
-    void* (* onRealloc)(void* p, size_t sz, void* pUserData);
-    void  (* onFree)(void* p, void* pUserData);
-} drwav_allocation_callbacks;
 
 /* Structure for internal use. Only used for loaders opened with drwav_init_memory(). */
 typedef struct
@@ -465,32 +474,367 @@ typedef struct
     drwav_uint32 bitsPerSample;
 } drwav_data_format;
 
+typedef enum
+{
+    drwav_metadata_type_none                        = 0,
+
+    /*
+    Unknown simply means a chunk that drwav does not handle specifically. You can still ask to
+    receive these chunks as metadata objects. It is then up to you to interpret the chunk's data.
+    You can also write unknown metadata to a wav file. Be careful writing unknown chunks if you
+    have also edited the audio data. The unknown chunks could represent offsets/sizes that no
+    longer correctly correspond to the audio data.
+    */
+    drwav_metadata_type_unknown                     = 1 << 0,
+
+    /* Only 1 of each of these metadata items are allowed in a wav file. */
+    drwav_metadata_type_smpl                        = 1 << 1,
+    drwav_metadata_type_inst                        = 1 << 2,
+    drwav_metadata_type_cue                         = 1 << 3,
+    drwav_metadata_type_acid                        = 1 << 4,
+    drwav_metadata_type_bext                        = 1 << 5,
+
+    /*
+    Wav files often have a LIST chunk. This is a chunk that contains a set of subchunks. For this
+    higher-level metadata API, we don't make a distinction between a regular chunk and a LIST
+    subchunk. Instead, they are all just 'metadata' items.
+
+    There can be multiple of these metadata items in a wav file.
+    */
+    drwav_metadata_type_list_label                  = 1 << 6,
+    drwav_metadata_type_list_note                   = 1 << 7,
+    drwav_metadata_type_list_labelled_cue_region    = 1 << 8,
+
+    drwav_metadata_type_list_info_software          = 1 << 9,
+    drwav_metadata_type_list_info_copyright         = 1 << 10,
+    drwav_metadata_type_list_info_title             = 1 << 11,
+    drwav_metadata_type_list_info_artist            = 1 << 12,
+    drwav_metadata_type_list_info_comment           = 1 << 13,
+    drwav_metadata_type_list_info_date              = 1 << 14,
+    drwav_metadata_type_list_info_genre             = 1 << 15,
+    drwav_metadata_type_list_info_album             = 1 << 16,
+    drwav_metadata_type_list_info_tracknumber       = 1 << 17,
+
+    /* Other type constants for convenience. */
+    drwav_metadata_type_list_all_info_strings       = drwav_metadata_type_list_info_software
+                                                    | drwav_metadata_type_list_info_copyright
+                                                    | drwav_metadata_type_list_info_title
+                                                    | drwav_metadata_type_list_info_artist
+                                                    | drwav_metadata_type_list_info_comment
+                                                    | drwav_metadata_type_list_info_date
+                                                    | drwav_metadata_type_list_info_genre
+                                                    | drwav_metadata_type_list_info_album
+                                                    | drwav_metadata_type_list_info_tracknumber,
+
+    drwav_metadata_type_list_all_adtl               = drwav_metadata_type_list_label
+                                                    | drwav_metadata_type_list_note
+                                                    | drwav_metadata_type_list_labelled_cue_region,
+
+    drwav_metadata_type_all                         = -2,   /*0xFFFFFFFF & ~drwav_metadata_type_unknown,*/
+    drwav_metadata_type_all_including_unknown       = -1    /*0xFFFFFFFF,*/
+} drwav_metadata_type;
+
+/*
+Sampler Metadata
+
+The sampler chunk contains information about how a sound should be played in the context of a whole
+audio production, and when used in a sampler. See https://en.wikipedia.org/wiki/Sample-based_synthesis.
+*/
+typedef enum
+{
+    drwav_smpl_loop_type_forward  = 0,
+    drwav_smpl_loop_type_pingpong = 1,
+    drwav_smpl_loop_type_backward = 2
+} drwav_smpl_loop_type;
 
-/* See the following for details on the 'smpl' chunk: https://sites.google.com/site/musicgapi/technical-documents/wav-file-format#smpl */
 typedef struct
 {
+    /* The ID of the associated cue point, see drwav_cue and drwav_cue_point. As with all cue point IDs, this can correspond to a label chunk to give this loop a name, see drwav_list_label_or_note. */
     drwav_uint32 cuePointId;
+
+    /* See drwav_smpl_loop_type. */
     drwav_uint32 type;
-    drwav_uint32 start;
-    drwav_uint32 end;
-    drwav_uint32 fraction;
+
+    /* The byte offset of the first sample to be played in the loop. */
+    drwav_uint32 firstSampleByteOffset;
+
+    /* The byte offset into the audio data of the last sample to be played in the loop. */
+    drwav_uint32 lastSampleByteOffset;
+
+    /* A value to represent that playback should occur at a point between samples. This value ranges from 0 to UINT32_MAX. Where a value of 0 means no fraction, and a value of (UINT32_MAX / 2) would mean half a sample. */
+    drwav_uint32 sampleFraction;
+
+    /* Number of times to play the loop. 0 means loop infinitely. */
     drwav_uint32 playCount;
 } drwav_smpl_loop;
 
- typedef struct
+typedef struct
 {
-    drwav_uint32 manufacturer;
-    drwav_uint32 product;
-    drwav_uint32 samplePeriod;
-    drwav_uint32 midiUnityNotes;
+    /* IDs for a particular MIDI manufacturer. 0 if not used. */
+    drwav_uint32 manufacturerId;
+    drwav_uint32 productId;
+
+    /* The period of 1 sample in nanoseconds. */
+    drwav_uint32 samplePeriodNanoseconds;
+
+    /* The MIDI root note of this file. 0 to 127. */
+    drwav_uint32 midiUnityNote;
+
+    /* The fraction of a semitone up from the given MIDI note. This is a value from 0 to UINT32_MAX, where 0 means no change and (UINT32_MAX / 2) is half a semitone (AKA 50 cents). */
     drwav_uint32 midiPitchFraction;
+
+    /* Data relating to SMPTE standards which are used for syncing audio and video. 0 if not used. */
     drwav_uint32 smpteFormat;
     drwav_uint32 smpteOffset;
-    drwav_uint32 numSampleLoops;
-    drwav_uint32 samplerData;
-    drwav_smpl_loop loops[DRWAV_MAX_SMPL_LOOPS];
+
+    /* drwav_smpl_loop loops. */
+    drwav_uint32 sampleLoopCount;
+
+    /* Optional sampler-specific data. */
+    drwav_uint32 samplerSpecificDataSizeInBytes;
+
+    drwav_smpl_loop* pLoops;
+    drwav_uint8* pSamplerSpecificData;
 } drwav_smpl;
 
+/*
+Instrument Metadata
+
+The inst metadata contains data about how a sound should be played as part of an instrument. This
+commonly read by samplers. See https://en.wikipedia.org/wiki/Sample-based_synthesis.
+*/
+typedef struct
+{
+    drwav_int8 midiUnityNote;   /* The root note of the audio as a MIDI note number. 0 to 127. */
+    drwav_int8 fineTuneCents;   /* -50 to +50 */
+    drwav_int8 gainDecibels;    /* -64 to +64 */
+    drwav_int8 lowNote;         /* 0 to 127 */
+    drwav_int8 highNote;        /* 0 to 127 */
+    drwav_int8 lowVelocity;     /* 1 to 127 */
+    drwav_int8 highVelocity;    /* 1 to 127 */
+} drwav_inst;
+
+/*
+Cue Metadata
+
+Cue points are markers at specific points in the audio. They often come with an associated piece of
+drwav_list_label_or_note metadata which contains the text for the marker.
+*/
+typedef struct
+{
+    /* Unique identification value. */
+    drwav_uint32 id;
+
+    /* Set to 0. This is only relevant if there is a 'playlist' chunk - which is not supported by dr_wav. */
+    drwav_uint32 playOrderPosition;
+
+    /* Should always be "data". This represents the fourcc value of the chunk that this cue point corresponds to. dr_wav only supports a single data chunk so this should always be "data". */
+    drwav_uint8 dataChunkId[4];
+
+    /* Set to 0. This is only relevant if there is a wave list chunk. dr_wav, like lots of readers/writers, do not support this. */
+    drwav_uint32 chunkStart;
+
+    /* Set to 0 for uncompressed formats. Else the last byte in compressed wave data where decompression can begin to find the value of the corresponding sample value. */
+    drwav_uint32 blockStart;
+
+    /* For uncompressed formats this is the byte offset of the cue point into the audio data. For compressed formats this is relative to the block specified with blockStart. */
+    drwav_uint32 sampleByteOffset;
+} drwav_cue_point;
+
+typedef struct
+{
+    drwav_uint32 cuePointCount;
+    drwav_cue_point *pCuePoints;
+} drwav_cue;
+
+/*
+Acid Metadata
+
+This chunk contains some information about the time signature and the tempo of the audio.
+*/
+typedef enum
+{
+    drwav_acid_flag_one_shot      = 1,  /* If this is not set, then it is a loop instead of a one-shot. */
+    drwav_acid_flag_root_note_set = 2,
+    drwav_acid_flag_stretch       = 4,
+    drwav_acid_flag_disk_based    = 8,
+    drwav_acid_flag_acidizer      = 16  /* Not sure what this means. */
+} drwav_acid_flag;
+
+typedef struct
+{
+    /* A bit-field, see drwav_acid_flag. */
+    drwav_uint32 flags;
+
+    /* Valid if flags contains drwav_acid_flag_root_note_set. It represents the MIDI root note the file - a value from 0 to 127. */
+    drwav_uint16 midiUnityNote;
+
+    /* Reserved values that should probably be ignored. reserved1 seems to often be 128 and reserved2 is 0. */
+    drwav_uint16 reserved1;
+    float reserved2;
+
+    /* Number of beats. */
+    drwav_uint32 numBeats;
+
+    /* The time signature of the audio. */
+    drwav_uint16 meterDenominator;
+    drwav_uint16 meterNumerator;
+
+    /* Beats per minute of the track. Setting a value of 0 suggests that there is no tempo. */
+    float tempo;
+} drwav_acid;
+
+/*
+Cue Label or Note metadata
+
+These are 2 different types of metadata, but they have the exact same format. Labels tend to be the
+more common and represent a short name for a cue point. Notes might be used to represent a longer
+comment.
+*/
+typedef struct
+{
+    /* The ID of a cue point that this label or note corresponds to. */
+    drwav_uint32 cuePointId;
+
+    /* Size of the string not including any null terminator. */
+    drwav_uint32 stringLength;
+
+    /* The string. The *init_with_metadata functions null terminate this for convenience. */
+    char* pString;
+} drwav_list_label_or_note;
+
+/*
+BEXT metadata, also known as Broadcast Wave Format (BWF)
+
+This metadata adds some extra description to an audio file. You must check the version field to
+determine if the UMID or the loudness fields are valid.
+*/
+typedef struct
+{
+    /*
+    These top 3 fields, and the umid field are actually defined in the standard as a statically
+    sized buffers. In order to reduce the size of this struct (and therefore the union in the
+    metadata struct), we instead store these as pointers.
+    */
+    char* pDescription;                 /* Can be NULL or a null-terminated string, must be <= 256 characters. */
+    char* pOriginatorName;              /* Can be NULL or a null-terminated string, must be <= 32 characters. */
+    char* pOriginatorReference;         /* Can be NULL or a null-terminated string, must be <= 32 characters. */
+    char  pOriginationDate[10];         /* ASCII "yyyy:mm:dd". */
+    char  pOriginationTime[8];          /* ASCII "hh:mm:ss". */
+    drwav_uint64 timeReference;         /* First sample count since midnight. */
+    drwav_uint16 version;               /* Version of the BWF, check this to see if the fields below are valid. */
+
+    /*
+    Unrestricted ASCII characters containing a collection of strings terminated by CR/LF. Each
+    string shall contain a description of a coding process applied to the audio data.
+    */
+    char* pCodingHistory;
+    drwav_uint32 codingHistorySize;
+
+    /* Fields below this point are only valid if the version is 1 or above. */
+    drwav_uint8* pUMID;                  /* Exactly 64 bytes of SMPTE UMID */
+
+    /* Fields below this point are only valid if the version is 2 or above. */
+    drwav_uint16 loudnessValue;         /* Integrated Loudness Value of the file in LUFS (multiplied by 100). */
+    drwav_uint16 loudnessRange;         /* Loudness Range of the file in LU (multiplied by 100). */
+    drwav_uint16 maxTruePeakLevel;      /* Maximum True Peak Level of the file expressed as dBTP (multiplied by 100). */
+    drwav_uint16 maxMomentaryLoudness;  /* Highest value of the Momentary Loudness Level of the file in LUFS (multiplied by 100). */
+    drwav_uint16 maxShortTermLoudness;  /* Highest value of the Short-Term Loudness Level of the file in LUFS (multiplied by 100). */
+} drwav_bext;
+
+/*
+Info Text Metadata
+
+There a many different types of information text that can be saved in this format. This is where
+things like the album name, the artists, the year it was produced, etc are saved. See
+drwav_metadata_type for the full list of types that dr_wav supports.
+*/
+typedef struct
+{
+    /* Size of the string not including any null terminator. */
+    drwav_uint32 stringLength;
+
+    /* The string. The *init_with_metadata functions null terminate this for convenience. */
+    char* pString;
+} drwav_list_info_text;
+
+/*
+Labelled Cue Region Metadata
+
+The labelled cue region metadata is used to associate some region of audio with text. The region
+starts at a cue point, and extends for the given number of samples.
+*/
+typedef struct
+{
+    /* The ID of a cue point that this object corresponds to. */
+    drwav_uint32 cuePointId;
+
+    /* The number of samples from the cue point forwards that should be considered this region */
+    drwav_uint32 sampleLength;
+
+    /* Four characters used to say what the purpose of this region is. */
+    drwav_uint8 purposeId[4];
+
+    /* Unsure of the exact meanings of these. It appears to be acceptable to set them all to 0. */
+    drwav_uint16 country;
+    drwav_uint16 language;
+    drwav_uint16 dialect;
+    drwav_uint16 codePage;
+
+    /* Size of the string not including any null terminator. */
+    drwav_uint32 stringLength;
+
+    /* The string. The *init_with_metadata functions null terminate this for convenience. */
+    char* pString;
+} drwav_list_labelled_cue_region;
+
+/*
+Unknown Metadata
+
+This chunk just represents a type of chunk that dr_wav does not understand.
+
+Unknown metadata has a location attached to it. This is because wav files can have a LIST chunk
+that contains subchunks. These LIST chunks can be one of two types. An adtl list, or an INFO
+list. This enum is used to specify the location of a chunk that dr_wav currently doesn't support.
+*/
+typedef enum
+{
+    drwav_metadata_location_invalid,
+    drwav_metadata_location_top_level,
+    drwav_metadata_location_inside_info_list,
+    drwav_metadata_location_inside_adtl_list
+} drwav_metadata_location;
+
+typedef struct
+{
+    drwav_uint8 id[4];
+    drwav_metadata_location chunkLocation;
+    drwav_uint32 dataSizeInBytes;
+    drwav_uint8* pData;
+} drwav_unknown_metadata;
+
+/*
+Metadata is saved as a union of all the supported types.
+*/
+typedef struct
+{
+    /* Determines which item in the union is valid. */
+    drwav_metadata_type type;
+
+    union
+    {
+        drwav_cue cue;
+        drwav_smpl smpl;
+        drwav_acid acid;
+        drwav_inst inst;
+        drwav_bext bext;
+        drwav_list_label_or_note labelOrNote;   /* List label or list note. */
+        drwav_list_labelled_cue_region labelledCueRegion;
+        drwav_list_info_text infoText;          /* Any of the list info types. */
+        drwav_unknown_metadata unknown;
+    } data;
+} drwav_metadata;
+
 typedef struct
 {
     /* A pointer to the function to call when more data is needed. */
@@ -534,13 +878,16 @@ typedef struct
 
     /* The size in bytes of the data chunk. */
     drwav_uint64 dataChunkDataSize;
-    
-    /* The position in the stream of the first byte of the data chunk. This is used for seeking. */
+
+    /* The position in the stream of the first data byte of the data chunk. This is used for seeking. */
     drwav_uint64 dataChunkDataPos;
 
     /* The number of bytes remaining in the data chunk. */
     drwav_uint64 bytesRemaining;
 
+    /* The current read position in PCM frames. */
+    drwav_uint64 readCursorInPCMFrames;
+
 
     /*
     Only used in sequential write mode. Keeps track of the desired size of the "data" chunk at the point of initialization time. Always
@@ -552,20 +899,16 @@ typedef struct
     drwav_bool32 isSequentialWrite;
 
 
-    /* smpl chunk. */
-    drwav_smpl smpl;
+    /* A array of metadata. This is valid after the *init_with_metadata call returns. It will be valid until drwav_uninit() is called. You can take ownership of this data with drwav_take_ownership_of_metadata(). */
+    drwav_metadata* pMetadata;
+    drwav_uint32 metadataCount;
 
 
     /* A hack to avoid a DRWAV_MALLOC() when opening a decoder with drwav_init_memory(). */
     drwav__memory_stream memoryStream;
     drwav__memory_stream_write memoryStreamWrite;
 
-    /* Generic data for compressed formats. This data is shared across all block-compressed formats. */
-    struct
-    {
-        drwav_uint64 iCurrentPCMFrame;  /* The index of the next PCM frame that will be read by drwav_read_*(). This is used with "totalPCMFrameCount" to ensure we don't read excess samples at the end of the last block. */
-    } compressed;
-    
+
     /* Microsoft ADPCM specific data. */
     struct
     {
@@ -586,6 +929,13 @@ typedef struct
         drwav_int32  cachedFrames[16]; /* Samples are stored in this cache during decoding. */
         drwav_uint32 cachedFrameCount;
     } ima;
+
+    /* AIFF specific data. */
+    struct
+    {
+        drwav_bool8 isLE;   /* Will be set to true if the audio data is little-endian encoded. */
+        drwav_bool8 isUnsigned; /* Only used for 8-bit samples. When set to true, will be treated as unsigned. */
+    } aiff;
 } drwav;
 
 
@@ -620,13 +970,15 @@ See also: drwav_init_file(), drwav_init_memory(), drwav_uninit()
 */
 DRWAV_API drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks);
 DRWAV_API drwav_bool32 drwav_init_ex(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_chunk_proc onChunk, void* pReadSeekUserData, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_with_metadata(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
 
 /*
 Initializes a pre-allocated drwav object for writing.
 
-onWrite   [in]           The function to call when data needs to be written.
-onSeek    [in]           The function to call when the write position needs to move.
-pUserData [in, optional] A pointer to application defined data that will be passed to onWrite and onSeek.
+onWrite               [in]           The function to call when data needs to be written.
+onSeek                [in]           The function to call when the write position needs to move.
+pUserData             [in, optional] A pointer to application defined data that will be passed to onWrite and onSeek.
+metadata, numMetadata [in, optional] An array of metadata objects that should be written to the file. The array is not edited. You are responsible for this metadata memory and it must maintain valid until drwav_uninit() is called.
 
 Returns true if successful; false otherwise.
 
@@ -643,19 +995,31 @@ See also: drwav_init_file_write(), drwav_init_memory_write(), drwav_uninit()
 DRWAV_API drwav_bool32 drwav_init_write(drwav* pWav, const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks);
 DRWAV_API drwav_bool32 drwav_init_write_sequential(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_write_proc onWrite, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks);
 DRWAV_API drwav_bool32 drwav_init_write_sequential_pcm_frames(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, drwav_write_proc onWrite, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_write_with_metadata(drwav* pWav, const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks, drwav_metadata* pMetadata, drwav_uint32 metadataCount);
 
 /*
 Utility function to determine the target size of the entire data to be written (including all headers and chunks).
 
 Returns the target size in bytes.
 
+The metadata argument can be NULL meaning no metadata exists.
+
 Useful if the application needs to know the size to allocate.
 
 Only writing to the RIFF chunk and one data chunk is currently supported.
 
 See also: drwav_init_write(), drwav_init_file_write(), drwav_init_memory_write()
 */
-DRWAV_API drwav_uint64 drwav_target_write_size_bytes(const drwav_data_format* pFormat, drwav_uint64 totalSampleCount);
+DRWAV_API drwav_uint64 drwav_target_write_size_bytes(const drwav_data_format* pFormat, drwav_uint64 totalFrameCount, drwav_metadata* pMetadata, drwav_uint32 metadataCount);
+
+/*
+Take ownership of the metadata objects that were allocated via one of the init_with_metadata() function calls. The init_with_metdata functions perform a single heap allocation for this metadata.
+
+Useful if you want the data to persist beyond the lifetime of the drwav object.
+
+You must free the data returned from this function using drwav_free().
+*/
+DRWAV_API drwav_metadata* drwav_take_ownership_of_metadata(drwav* pWav);
 
 /*
 Uninitializes the given drwav object.
@@ -674,6 +1038,8 @@ bytes of the raw internal sample data.
 Consider using drwav_read_pcm_frames_s16(), drwav_read_pcm_frames_s32() or drwav_read_pcm_frames_f32() for
 reading sample data in a consistent format.
 
+pBufferOut can be NULL in which case a seek will be performed.
+
 Returns the number of bytes actually read.
 */
 DRWAV_API size_t drwav_read_raw(drwav* pWav, size_t bytesToRead, void* pBufferOut);
@@ -689,6 +1055,8 @@ you have requested more PCM frames than can possibly fit in the output buffer.
 
 This function will only work when sample data is of a fixed size and uncompressed. If you are
 using a compressed format consider using drwav_read_raw() or drwav_read_pcm_frames_s16/s32/f32().
+
+pBufferOut can be NULL in which case a seek will be performed.
 */
 DRWAV_API drwav_uint64 drwav_read_pcm_frames(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut);
 DRWAV_API drwav_uint64 drwav_read_pcm_frames_le(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut);
@@ -701,6 +1069,16 @@ Returns true if successful; false otherwise.
 */
 DRWAV_API drwav_bool32 drwav_seek_to_pcm_frame(drwav* pWav, drwav_uint64 targetFrameIndex);
 
+/*
+Retrieves the current read position in pcm frames.
+*/
+DRWAV_API drwav_result drwav_get_cursor_in_pcm_frames(drwav* pWav, drwav_uint64* pCursor);
+
+/*
+Retrieves the length of the file.
+*/
+DRWAV_API drwav_result drwav_get_length_in_pcm_frames(drwav* pWav, drwav_uint64* pLength);
+
 
 /*
 Writes raw audio data.
@@ -721,13 +1099,14 @@ DRWAV_API drwav_uint64 drwav_write_pcm_frames(drwav* pWav, drwav_uint64 framesTo
 DRWAV_API drwav_uint64 drwav_write_pcm_frames_le(drwav* pWav, drwav_uint64 framesToWrite, const void* pData);
 DRWAV_API drwav_uint64 drwav_write_pcm_frames_be(drwav* pWav, drwav_uint64 framesToWrite, const void* pData);
 
-
 /* Conversion Utilities */
 #ifndef DR_WAV_NO_CONVERSION_API
 
 /*
 Reads a chunk of audio data and converts it to signed 16-bit PCM samples.
 
+pBufferOut can be NULL in which case a seek will be performed.
+
 Returns the number of PCM frames actually read.
 
 If the return value is less than <framesToRead> it means the end of the file has been reached.
@@ -761,6 +1140,8 @@ DRWAV_API void drwav_mulaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, siz
 /*
 Reads a chunk of audio data and converts it to IEEE 32-bit floating point samples.
 
+pBufferOut can be NULL in which case a seek will be performed.
+
 Returns the number of PCM frames actually read.
 
 If the return value is less than <framesToRead> it means the end of the file has been reached.
@@ -794,6 +1175,8 @@ DRWAV_API void drwav_mulaw_to_f32(float* pOut, const drwav_uint8* pIn, size_t sa
 /*
 Reads a chunk of audio data and converts it to signed 32-bit PCM samples.
 
+pBufferOut can be NULL in which case a seek will be performed.
+
 Returns the number of PCM frames actually read.
 
 If the return value is less than <framesToRead> it means the end of the file has been reached.
@@ -840,6 +1223,9 @@ DRWAV_API drwav_bool32 drwav_init_file(drwav* pWav, const char* filename, const
 DRWAV_API drwav_bool32 drwav_init_file_ex(drwav* pWav, const char* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
 DRWAV_API drwav_bool32 drwav_init_file_w(drwav* pWav, const wchar_t* filename, const drwav_allocation_callbacks* pAllocationCallbacks);
 DRWAV_API drwav_bool32 drwav_init_file_ex_w(drwav* pWav, const wchar_t* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_file_with_metadata(drwav* pWav, const char* filename, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_file_with_metadata_w(drwav* pWav, const wchar_t* filename, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
+
 
 /*
 Helper for initializing a wave file for writing using stdio.
@@ -866,14 +1252,15 @@ The buffer should contain the contents of the entire wave file, not just the sam
 */
 DRWAV_API drwav_bool32 drwav_init_memory(drwav* pWav, const void* data, size_t dataSize, const drwav_allocation_callbacks* pAllocationCallbacks);
 DRWAV_API drwav_bool32 drwav_init_memory_ex(drwav* pWav, const void* data, size_t dataSize, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_memory_with_metadata(drwav* pWav, const void* data, size_t dataSize, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
 
 /*
 Helper for initializing a writer which outputs data to a memory buffer.
 
 dr_wav will manage the memory allocations, however it is up to the caller to free the data with drwav_free().
 
-The buffer will remain allocated even after drwav_uninit() is called. Indeed, the buffer should not be
-considered valid until after drwav_uninit() has been called anyway.
+The buffer will remain allocated even after drwav_uninit() is called. The buffer should not be considered valid
+until after drwav_uninit() has been called.
 */
 DRWAV_API drwav_bool32 drwav_init_memory_write(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks);
 DRWAV_API drwav_bool32 drwav_init_memory_write_sequential(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks);
@@ -922,6 +1309,7 @@ DRWAV_API drwav_uint32 drwav_bytes_to_u32(const drwav_uint8* data);
 DRWAV_API drwav_int32 drwav_bytes_to_s32(const drwav_uint8* data);
 DRWAV_API drwav_uint64 drwav_bytes_to_u64(const drwav_uint8* data);
 DRWAV_API drwav_int64 drwav_bytes_to_s64(const drwav_uint8* data);
+DRWAV_API float drwav_bytes_to_f32(const drwav_uint8* data);
 
 /* Compares a GUID for the purpose of checking the type of a Wave64 chunk. */
 DRWAV_API drwav_bool32 drwav_guid_equal(const drwav_uint8 a[16], const drwav_uint8 b[16]);
@@ -943,14 +1331,24 @@ DRWAV_API drwav_bool32 drwav_fourcc_equal(const drwav_uint8* a, const char* b);
  ************************************************************************************************************************************************************
  ************************************************************************************************************************************************************/
 #if defined(DR_WAV_IMPLEMENTATION) || defined(DRWAV_IMPLEMENTATION)
+#ifndef dr_wav_c
+#define dr_wav_c
+
+#ifdef __MRC__
+/* MrC currently doesn't compile dr_wav correctly with any optimizations enabled. */
+#pragma options opt off
+#endif
+
 #include <stdlib.h>
-#include <string.h> /* For memcpy(), memset() */
+#include <string.h>
 #include <limits.h> /* For INT_MAX */
 
 #ifndef DR_WAV_NO_STDIO
 #include <stdio.h>
+#ifndef DR_WAV_NO_WCHAR
 #include <wchar.h>
 #endif
+#endif
 
 /* Standard library stuff. */
 #ifndef DRWAV_ASSERT
@@ -981,18 +1379,21 @@ DRWAV_API drwav_bool32 drwav_fourcc_equal(const drwav_uint8* a, const char* b);
 #define drwav_min(a, b)                    (((a) < (b)) ? (a) : (b))
 #define drwav_max(a, b)                    (((a) > (b)) ? (a) : (b))
 #define drwav_clamp(x, lo, hi)             (drwav_max((lo), drwav_min((hi), (x))))
+#define drwav_offset_ptr(p, offset)        (((drwav_uint8*)(p)) + (offset))
 
-#define DRWAV_MAX_SIMD_VECTOR_SIZE         64  /* 64 for AVX-512 in the future. */
+#define DRWAV_MAX_SIMD_VECTOR_SIZE         32
 
-/* CPU architecture. */
-#if defined(__x86_64__) || defined(_M_X64)
+/* Architecture Detection */
+#if defined(__x86_64__) || (defined(_M_X64) && !defined(_M_ARM64EC))
     #define DRWAV_X64
 #elif defined(__i386) || defined(_M_IX86)
     #define DRWAV_X86
 #elif defined(__arm__) || defined(_M_ARM)
     #define DRWAV_ARM
 #endif
+/* End Architecture Detection */
 
+/* Inline */
 #ifdef _MSC_VER
     #define DRWAV_INLINE __forceinline
 #elif defined(__GNUC__)
@@ -1004,14 +1405,24 @@ DRWAV_API drwav_bool32 drwav_fourcc_equal(const drwav_uint8* a, const char* b);
     I am using "__inline__" only when we're compiling in strict ANSI mode.
     */
     #if defined(__STRICT_ANSI__)
-        #define DRWAV_INLINE __inline__ __attribute__((always_inline))
+        #define DRWAV_GNUC_INLINE_HINT __inline__
+    #else
+        #define DRWAV_GNUC_INLINE_HINT inline
+    #endif
+
+    #if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 2)) || defined(__clang__)
+        #define DRWAV_INLINE DRWAV_GNUC_INLINE_HINT __attribute__((always_inline))
     #else
-        #define DRWAV_INLINE inline __attribute__((always_inline))
+        #define DRWAV_INLINE DRWAV_GNUC_INLINE_HINT
     #endif
+#elif defined(__WATCOMC__)
+    #define DRWAV_INLINE __inline
 #else
     #define DRWAV_INLINE
 #endif
+/* End Inline */
 
+/* SIZE_MAX */
 #if defined(SIZE_MAX)
     #define DRWAV_SIZE_MAX  SIZE_MAX
 #else
@@ -1021,6 +1432,11 @@ DRWAV_API drwav_bool32 drwav_fourcc_equal(const drwav_uint8* a, const char* b);
         #define DRWAV_SIZE_MAX  0xFFFFFFFF
     #endif
 #endif
+/* End SIZE_MAX */
+
+/* Weird bit manipulation is for C89 compatibility (no direct support for 64-bit integers). */
+#define DRWAV_INT64_MIN ((drwav_int64) ((drwav_uint64)0x80000000 << 32))
+#define DRWAV_INT64_MAX ((drwav_int64)(((drwav_uint64)0x7FFFFFFF << 32) | 0xFFFFFFFF))
 
 #if defined(_MSC_VER) && _MSC_VER >= 1400
     #define DRWAV_HAS_BYTESWAP16_INTRINSIC
@@ -1063,7 +1479,7 @@ DRWAV_API void drwav_version(drwav_uint32* pMajor, drwav_uint32* pMinor, drwav_u
     }
 }
 
-DRWAV_API const char* drwav_version_string()
+DRWAV_API const char* drwav_version_string(void)
 {
     return DRWAV_VERSION_STRING;
 }
@@ -1084,33 +1500,11 @@ you doing?! (Let me know, I'd be curious!) Second, you can adjust these by #defi
 
 static const drwav_uint8 drwavGUID_W64_RIFF[16] = {0x72,0x69,0x66,0x66, 0x2E,0x91, 0xCF,0x11, 0xA5,0xD6, 0x28,0xDB,0x04,0xC1,0x00,0x00};    /* 66666972-912E-11CF-A5D6-28DB04C10000 */
 static const drwav_uint8 drwavGUID_W64_WAVE[16] = {0x77,0x61,0x76,0x65, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 65766177-ACF3-11D3-8CD1-00C04F8EDB8A */
-static const drwav_uint8 drwavGUID_W64_JUNK[16] = {0x6A,0x75,0x6E,0x6B, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 6B6E756A-ACF3-11D3-8CD1-00C04F8EDB8A */
+/*static const drwav_uint8 drwavGUID_W64_JUNK[16] = {0x6A,0x75,0x6E,0x6B, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};*/    /* 6B6E756A-ACF3-11D3-8CD1-00C04F8EDB8A */
 static const drwav_uint8 drwavGUID_W64_FMT [16] = {0x66,0x6D,0x74,0x20, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 20746D66-ACF3-11D3-8CD1-00C04F8EDB8A */
 static const drwav_uint8 drwavGUID_W64_FACT[16] = {0x66,0x61,0x63,0x74, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 74636166-ACF3-11D3-8CD1-00C04F8EDB8A */
 static const drwav_uint8 drwavGUID_W64_DATA[16] = {0x64,0x61,0x74,0x61, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 61746164-ACF3-11D3-8CD1-00C04F8EDB8A */
-static const drwav_uint8 drwavGUID_W64_SMPL[16] = {0x73,0x6D,0x70,0x6C, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 6C706D73-ACF3-11D3-8CD1-00C04F8EDB8A */
-
-static DRWAV_INLINE drwav_bool32 drwav__guid_equal(const drwav_uint8 a[16], const drwav_uint8 b[16])
-{
-    int i;
-    for (i = 0; i < 16; i += 1) {
-        if (a[i] != b[i]) {
-            return DRWAV_FALSE;
-        }
-    }
-
-    return DRWAV_TRUE;
-}
-
-static DRWAV_INLINE drwav_bool32 drwav__fourcc_equal(const drwav_uint8* a, const char* b)
-{
-    return
-        a[0] == b[0] &&
-        a[1] == b[1] &&
-        a[2] == b[2] &&
-        a[3] == b[3];
-}
-
+/*static const drwav_uint8 drwavGUID_W64_SMPL[16] = {0x73,0x6D,0x70,0x6C, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};*/    /* 6C706D73-ACF3-11D3-8CD1-00C04F8EDB8A */
 
 
 static DRWAV_INLINE int drwav__is_little_endian(void)
@@ -1125,39 +1519,8 @@ static DRWAV_INLINE int drwav__is_little_endian(void)
 #endif
 }
 
-static DRWAV_INLINE drwav_uint16 drwav__bytes_to_u16(const drwav_uint8* data)
-{
-    return (data[0] << 0) | (data[1] << 8);
-}
-
-static DRWAV_INLINE drwav_int16 drwav__bytes_to_s16(const drwav_uint8* data)
-{
-    return (short)drwav__bytes_to_u16(data);
-}
-
-static DRWAV_INLINE drwav_uint32 drwav__bytes_to_u32(const drwav_uint8* data)
-{
-    return (data[0] << 0) | (data[1] << 8) | (data[2] << 16) | (data[3] << 24);
-}
-
-static DRWAV_INLINE drwav_int32 drwav__bytes_to_s32(const drwav_uint8* data)
-{
-    return (drwav_int32)drwav__bytes_to_u32(data);
-}
-
-static DRWAV_INLINE drwav_uint64 drwav__bytes_to_u64(const drwav_uint8* data)
-{
-    return
-        ((drwav_uint64)data[0] <<  0) | ((drwav_uint64)data[1] <<  8) | ((drwav_uint64)data[2] << 16) | ((drwav_uint64)data[3] << 24) |
-        ((drwav_uint64)data[4] << 32) | ((drwav_uint64)data[5] << 40) | ((drwav_uint64)data[6] << 48) | ((drwav_uint64)data[7] << 56);
-}
 
-static DRWAV_INLINE drwav_int64 drwav__bytes_to_s64(const drwav_uint8* data)
-{
-    return (drwav_int64)drwav__bytes_to_u64(data);
-}
-
-static DRWAV_INLINE void drwav__bytes_to_guid(const drwav_uint8* data, drwav_uint8* guid)
+static DRWAV_INLINE void drwav_bytes_to_guid(const drwav_uint8* data, drwav_uint8* guid)
 {
     int i;
     for (i = 0; i < 16; ++i) {
@@ -1224,14 +1587,15 @@ static DRWAV_INLINE drwav_uint64 drwav__bswap64(drwav_uint64 n)
         #error "This compiler does not support the byte swap intrinsic."
     #endif
 #else
-    return ((n & (drwav_uint64)0xFF00000000000000) >> 56) |
-           ((n & (drwav_uint64)0x00FF000000000000) >> 40) |
-           ((n & (drwav_uint64)0x0000FF0000000000) >> 24) |
-           ((n & (drwav_uint64)0x000000FF00000000) >>  8) |
-           ((n & (drwav_uint64)0x00000000FF000000) <<  8) |
-           ((n & (drwav_uint64)0x0000000000FF0000) << 24) |
-           ((n & (drwav_uint64)0x000000000000FF00) << 40) |
-           ((n & (drwav_uint64)0x00000000000000FF) << 56);
+    /* Weird "<< 32" bitshift is required for C89 because it doesn't support 64-bit constants. Should be optimized out by a good compiler. */
+    return ((n & ((drwav_uint64)0xFF000000 << 32)) >> 56) |
+           ((n & ((drwav_uint64)0x00FF0000 << 32)) >> 40) |
+           ((n & ((drwav_uint64)0x0000FF00 << 32)) >> 24) |
+           ((n & ((drwav_uint64)0x000000FF << 32)) >>  8) |
+           ((n & ((drwav_uint64)0xFF000000      )) <<  8) |
+           ((n & ((drwav_uint64)0x00FF0000      )) << 24) |
+           ((n & ((drwav_uint64)0x0000FF00      )) << 40) |
+           ((n & ((drwav_uint64)0x000000FF      )) << 56);
 #endif
 }
 
@@ -1282,65 +1646,65 @@ static DRWAV_INLINE void drwav__bswap_samples_s32(drwav_int32* pSamples, drwav_u
 }
 
 
-static DRWAV_INLINE float drwav__bswap_f32(float n)
+static DRWAV_INLINE drwav_int64 drwav__bswap_s64(drwav_int64 n)
 {
-    union {
-        drwav_uint32 i;
-        float f;
-    } x;
-    x.f = n;
-    x.i = drwav__bswap32(x.i);
-
-    return x.f;
+    return (drwav_int64)drwav__bswap64((drwav_uint64)n);
 }
 
-static DRWAV_INLINE void drwav__bswap_samples_f32(float* pSamples, drwav_uint64 sampleCount)
+static DRWAV_INLINE void drwav__bswap_samples_s64(drwav_int64* pSamples, drwav_uint64 sampleCount)
 {
     drwav_uint64 iSample;
     for (iSample = 0; iSample < sampleCount; iSample += 1) {
-        pSamples[iSample] = drwav__bswap_f32(pSamples[iSample]);
+        pSamples[iSample] = drwav__bswap_s64(pSamples[iSample]);
     }
 }
 
 
-static DRWAV_INLINE double drwav__bswap_f64(double n)
+static DRWAV_INLINE float drwav__bswap_f32(float n)
 {
     union {
-        drwav_uint64 i;
-        double f;
+        drwav_uint32 i;
+        float f;
     } x;
     x.f = n;
-    x.i = drwav__bswap64(x.i);
+    x.i = drwav__bswap32(x.i);
 
     return x.f;
 }
 
-static DRWAV_INLINE void drwav__bswap_samples_f64(double* pSamples, drwav_uint64 sampleCount)
+static DRWAV_INLINE void drwav__bswap_samples_f32(float* pSamples, drwav_uint64 sampleCount)
 {
     drwav_uint64 iSample;
     for (iSample = 0; iSample < sampleCount; iSample += 1) {
-        pSamples[iSample] = drwav__bswap_f64(pSamples[iSample]);
+        pSamples[iSample] = drwav__bswap_f32(pSamples[iSample]);
     }
 }
 
 
-static DRWAV_INLINE void drwav__bswap_samples_pcm(void* pSamples, drwav_uint64 sampleCount, drwav_uint32 bytesPerSample)
+static DRWAV_INLINE void drwav__bswap_samples(void* pSamples, drwav_uint64 sampleCount, drwav_uint32 bytesPerSample)
 {
-    /* Assumes integer PCM. Floating point PCM is done in drwav__bswap_samples_ieee(). */
     switch (bytesPerSample)
     {
-        case 2: /* s16, s12 (loosely packed) */
+        case 1:
+        {
+            /* No-op. */
+        } break;
+        case 2:
         {
             drwav__bswap_samples_s16((drwav_int16*)pSamples, sampleCount);
         } break;
-        case 3: /* s24 */
+        case 3:
         {
             drwav__bswap_samples_s24((drwav_uint8*)pSamples, sampleCount);
         } break;
-        case 4: /* s32 */
+        case 4:
         {
             drwav__bswap_samples_s32((drwav_int32*)pSamples, sampleCount);
         } break;
+        case 8:
+        {
+            drwav__bswap_samples_s64((drwav_int64*)pSamples, sampleCount);
+        } break;
         default:
         {
             /* Unsupported format. */
@@ -1349,83 +1713,115 @@ static DRWAV_INLINE void drwav__bswap_samples_pcm(void* pSamples, drwav_uint64 s
     }
 }
 
-static DRWAV_INLINE void drwav__bswap_samples_ieee(void* pSamples, drwav_uint64 sampleCount, drwav_uint32 bytesPerSample)
+
+
+DRWAV_PRIVATE DRWAV_INLINE drwav_bool32 drwav_is_container_be(drwav_container container)
 {
-    switch (bytesPerSample)
-    {
-    #if 0   /* Contributions welcome for f16 support. */
-        case 2: /* f16 */
-        {
-            drwav__bswap_samples_f16((drwav_float16*)pSamples, sampleCount);
-        } break;
-    #endif
-        case 4: /* f32 */
-        {
-            drwav__bswap_samples_f32((float*)pSamples, sampleCount);
-        } break;
-        case 8: /* f64 */
-        {
-            drwav__bswap_samples_f64((double*)pSamples, sampleCount);
-        } break;
-        default:
-        {
-            /* Unsupported format. */
-            DRWAV_ASSERT(DRWAV_FALSE);
-        } break;
+    if (container == drwav_container_rifx || container == drwav_container_aiff) {
+        return DRWAV_TRUE;
+    } else {
+        return DRWAV_FALSE;
     }
 }
 
-static DRWAV_INLINE void drwav__bswap_samples(void* pSamples, drwav_uint64 sampleCount, drwav_uint32 bytesPerSample, drwav_uint16 format)
+
+DRWAV_PRIVATE DRWAV_INLINE drwav_uint16 drwav_bytes_to_u16_le(const drwav_uint8* data)
 {
-    switch (format)
-    {
-        case DR_WAVE_FORMAT_PCM:
-        {
-            drwav__bswap_samples_pcm(pSamples, sampleCount, bytesPerSample);
-        } break;
+    return ((drwav_uint16)data[0] << 0) | ((drwav_uint16)data[1] << 8);
+}
 
-        case DR_WAVE_FORMAT_IEEE_FLOAT:
-        {
-            drwav__bswap_samples_ieee(pSamples, sampleCount, bytesPerSample);
-        } break;
+DRWAV_PRIVATE DRWAV_INLINE drwav_uint16 drwav_bytes_to_u16_be(const drwav_uint8* data)
+{
+    return ((drwav_uint16)data[1] << 0) | ((drwav_uint16)data[0] << 8);
+}
 
-        case DR_WAVE_FORMAT_ALAW:
-        case DR_WAVE_FORMAT_MULAW:
-        {
-            drwav__bswap_samples_s16((drwav_int16*)pSamples, sampleCount);
-        } break;
+DRWAV_PRIVATE DRWAV_INLINE drwav_uint16 drwav_bytes_to_u16_ex(const drwav_uint8* data, drwav_container container)
+{
+    if (drwav_is_container_be(container)) {
+        return drwav_bytes_to_u16_be(data);
+    } else {
+        return drwav_bytes_to_u16_le(data);
+    }
+}
 
-        case DR_WAVE_FORMAT_ADPCM:
-        case DR_WAVE_FORMAT_DVI_ADPCM:
-        default:
-        {
-            /* Unsupported format. */
-            DRWAV_ASSERT(DRWAV_FALSE);
-        } break;
+
+DRWAV_PRIVATE DRWAV_INLINE drwav_uint32 drwav_bytes_to_u32_le(const drwav_uint8* data)
+{
+    return ((drwav_uint32)data[0] << 0) | ((drwav_uint32)data[1] << 8) | ((drwav_uint32)data[2] << 16) | ((drwav_uint32)data[3] << 24);
+}
+
+DRWAV_PRIVATE DRWAV_INLINE drwav_uint32 drwav_bytes_to_u32_be(const drwav_uint8* data)
+{
+    return ((drwav_uint32)data[3] << 0) | ((drwav_uint32)data[2] << 8) | ((drwav_uint32)data[1] << 16) | ((drwav_uint32)data[0] << 24);
+}
+
+DRWAV_PRIVATE DRWAV_INLINE drwav_uint32 drwav_bytes_to_u32_ex(const drwav_uint8* data, drwav_container container)
+{
+    if (drwav_is_container_be(container)) {
+        return drwav_bytes_to_u32_be(data);
+    } else {
+        return drwav_bytes_to_u32_le(data);
+    }
+}
+
+
+
+DRWAV_PRIVATE drwav_int64 drwav_aiff_extented_to_s64(const drwav_uint8* data)
+{
+    drwav_uint32 exponent = ((drwav_uint32)data[0] << 8) | data[1];
+    drwav_uint64 hi = ((drwav_uint64)data[2] << 24) | ((drwav_uint64)data[3] << 16) | ((drwav_uint64)data[4] <<  8) | ((drwav_uint64)data[5] <<  0);
+    drwav_uint64 lo = ((drwav_uint64)data[6] << 24) | ((drwav_uint64)data[7] << 16) | ((drwav_uint64)data[8] <<  8) | ((drwav_uint64)data[9] <<  0);
+    drwav_uint64 significand = (hi << 32) | lo;
+    int sign = exponent >> 15;
+
+    /* Remove sign bit. */
+    exponent &= 0x7FFF;
+
+    /* Special cases. */
+    if (exponent == 0 && significand == 0) {
+        return 0;
+    } else if (exponent == 0x7FFF) {
+        return sign ? DRWAV_INT64_MIN : DRWAV_INT64_MAX;    /* Infinite. */
+    }
+
+    exponent -= 16383;
+
+    if (exponent > 63) {
+        return sign ? DRWAV_INT64_MIN : DRWAV_INT64_MAX;    /* Too big for a 64-bit integer. */
+    } else if (exponent < 1) {
+        return 0;  /* Number is less than 1, so rounds down to 0. */
+    }
+
+    significand >>= (63 - exponent);
+
+    if (sign) {
+        return -(drwav_int64)significand;
+    } else {
+        return  (drwav_int64)significand;
     }
 }
 
 
-static void* drwav__malloc_default(size_t sz, void* pUserData)
+DRWAV_PRIVATE void* drwav__malloc_default(size_t sz, void* pUserData)
 {
     (void)pUserData;
     return DRWAV_MALLOC(sz);
 }
 
-static void* drwav__realloc_default(void* p, size_t sz, void* pUserData)
+DRWAV_PRIVATE void* drwav__realloc_default(void* p, size_t sz, void* pUserData)
 {
     (void)pUserData;
     return DRWAV_REALLOC(p, sz);
 }
 
-static void drwav__free_default(void* p, void* pUserData)
+DRWAV_PRIVATE void drwav__free_default(void* p, void* pUserData)
 {
     (void)pUserData;
     DRWAV_FREE(p);
 }
 
 
-static void* drwav__malloc_from_callbacks(size_t sz, const drwav_allocation_callbacks* pAllocationCallbacks)
+DRWAV_PRIVATE void* drwav__malloc_from_callbacks(size_t sz, const drwav_allocation_callbacks* pAllocationCallbacks)
 {
     if (pAllocationCallbacks == NULL) {
         return NULL;
@@ -1443,7 +1839,7 @@ static void* drwav__malloc_from_callbacks(size_t sz, const drwav_allocation_call
     return NULL;
 }
 
-static void* drwav__realloc_from_callbacks(void* p, size_t szNew, size_t szOld, const drwav_allocation_callbacks* pAllocationCallbacks)
+DRWAV_PRIVATE void* drwav__realloc_from_callbacks(void* p, size_t szNew, size_t szOld, const drwav_allocation_callbacks* pAllocationCallbacks)
 {
     if (pAllocationCallbacks == NULL) {
         return NULL;
@@ -1473,7 +1869,7 @@ static void* drwav__realloc_from_callbacks(void* p, size_t szNew, size_t szOld,
     return NULL;
 }
 
-static void drwav__free_from_callbacks(void* p, const drwav_allocation_callbacks* pAllocationCallbacks)
+DRWAV_PRIVATE void drwav__free_from_callbacks(void* p, const drwav_allocation_callbacks* pAllocationCallbacks)
 {
     if (p == NULL || pAllocationCallbacks == NULL) {
         return;
@@ -1485,7 +1881,7 @@ static void drwav__free_from_callbacks(void* p, const drwav_allocation_callbacks
 }
 
 
-static drwav_allocation_callbacks drwav_copy_allocation_callbacks_or_defaults(const drwav_allocation_callbacks* pAllocationCallbacks)
+DRWAV_PRIVATE drwav_allocation_callbacks drwav_copy_allocation_callbacks_or_defaults(const drwav_allocation_callbacks* pAllocationCallbacks)
 {
     if (pAllocationCallbacks != NULL) {
         /* Copy. */
@@ -1509,23 +1905,23 @@ static DRWAV_INLINE drwav_bool32 drwav__is_compressed_format_tag(drwav_uint16 fo
         formatTag == DR_WAVE_FORMAT_DVI_ADPCM;
 }
 
-static unsigned int drwav__chunk_padding_size_riff(drwav_uint64 chunkSize)
+DRWAV_PRIVATE unsigned int drwav__chunk_padding_size_riff(drwav_uint64 chunkSize)
 {
     return (unsigned int)(chunkSize % 2);
 }
 
-static unsigned int drwav__chunk_padding_size_w64(drwav_uint64 chunkSize)
+DRWAV_PRIVATE unsigned int drwav__chunk_padding_size_w64(drwav_uint64 chunkSize)
 {
     return (unsigned int)(chunkSize % 8);
 }
 
-static drwav_uint64 drwav_read_pcm_frames_s16__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut);
-static drwav_uint64 drwav_read_pcm_frames_s16__ima(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut);
-static drwav_bool32 drwav_init_write__internal(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount);
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_s16__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut);
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_s16__ima(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut);
+DRWAV_PRIVATE drwav_bool32 drwav_init_write__internal(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount);
 
-static drwav_result drwav__read_chunk_header(drwav_read_proc onRead, void* pUserData, drwav_container container, drwav_uint64* pRunningBytesReadOut, drwav_chunk_header* pHeaderOut)
+DRWAV_PRIVATE drwav_result drwav__read_chunk_header(drwav_read_proc onRead, void* pUserData, drwav_container container, drwav_uint64* pRunningBytesReadOut, drwav_chunk_header* pHeaderOut)
 {
-    if (container == drwav_container_riff) {
+    if (container == drwav_container_riff || container == drwav_container_rifx || container == drwav_container_rf64 || container == drwav_container_aiff) {
         drwav_uint8 sizeInBytes[4];
 
         if (onRead(pUserData, pHeaderOut->id.fourcc, 4) != 4) {
@@ -1536,10 +1932,11 @@ static drwav_result drwav__read_chunk_header(drwav_read_proc onRead, void* pUser
             return DRWAV_INVALID_FILE;
         }
 
-        pHeaderOut->sizeInBytes = drwav__bytes_to_u32(sizeInBytes);
+        pHeaderOut->sizeInBytes = drwav_bytes_to_u32_ex(sizeInBytes, container);
         pHeaderOut->paddingSize = drwav__chunk_padding_size_riff(pHeaderOut->sizeInBytes);
+
         *pRunningBytesReadOut += 8;
-    } else {
+    } else if (container == drwav_container_w64) {
         drwav_uint8 sizeInBytes[8];
 
         if (onRead(pUserData, pHeaderOut->id.guid, 16) != 16) {
@@ -1550,15 +1947,17 @@ static drwav_result drwav__read_chunk_header(drwav_read_proc onRead, void* pUser
             return DRWAV_INVALID_FILE;
         }
 
-        pHeaderOut->sizeInBytes = drwav__bytes_to_u64(sizeInBytes) - 24;    /* <-- Subtract 24 because w64 includes the size of the header. */
+        pHeaderOut->sizeInBytes = drwav_bytes_to_u64(sizeInBytes) - 24;    /* <-- Subtract 24 because w64 includes the size of the header. */
         pHeaderOut->paddingSize = drwav__chunk_padding_size_w64(pHeaderOut->sizeInBytes);
         *pRunningBytesReadOut += 24;
+    } else {
+        return DRWAV_INVALID_FILE;
     }
 
     return DRWAV_SUCCESS;
 }
 
-static drwav_bool32 drwav__seek_forward(drwav_seek_proc onSeek, drwav_uint64 offset, void* pUserData)
+DRWAV_PRIVATE drwav_bool32 drwav__seek_forward(drwav_seek_proc onSeek, drwav_uint64 offset, void* pUserData)
 {
     drwav_uint64 bytesRemainingToSeek = offset;
     while (bytesRemainingToSeek > 0) {
@@ -1578,7 +1977,7 @@ static drwav_bool32 drwav__seek_forward(drwav_seek_proc onSeek, drwav_uint64 off
     return DRWAV_TRUE;
 }
 
-static drwav_bool32 drwav__seek_from_start(drwav_seek_proc onSeek, drwav_uint64 offset, void* pUserData)
+DRWAV_PRIVATE drwav_bool32 drwav__seek_from_start(drwav_seek_proc onSeek, drwav_uint64 offset, void* pUserData)
 {
     if (offset <= 0x7FFFFFFF) {
         return onSeek(pUserData, (int)offset, drwav_seek_origin_start);
@@ -1606,117 +2005,8 @@ static drwav_bool32 drwav__seek_from_start(drwav_seek_proc onSeek, drwav_uint64
 }
 
 
-static drwav_bool32 drwav__read_fmt(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, drwav_container container, drwav_uint64* pRunningBytesReadOut, drwav_fmt* fmtOut)
-{
-    drwav_chunk_header header;
-    drwav_uint8 fmt[16];
-
-    if (drwav__read_chunk_header(onRead, pUserData, container, pRunningBytesReadOut, &header) != DRWAV_SUCCESS) {
-        return DRWAV_FALSE;
-    }
-
-
-    /* Skip non-fmt chunks. */
-    while ((container == drwav_container_riff && !drwav__fourcc_equal(header.id.fourcc, "fmt ")) || (container == drwav_container_w64 && !drwav__guid_equal(header.id.guid, drwavGUID_W64_FMT))) {
-        if (!drwav__seek_forward(onSeek, header.sizeInBytes + header.paddingSize, pUserData)) {
-            return DRWAV_FALSE;
-        }
-        *pRunningBytesReadOut += header.sizeInBytes + header.paddingSize;
-
-        /* Try the next header. */
-        if (drwav__read_chunk_header(onRead, pUserData, container, pRunningBytesReadOut, &header) != DRWAV_SUCCESS) {
-            return DRWAV_FALSE;
-        }
-    }
-
-
-    /* Validation. */
-    if (container == drwav_container_riff) {
-        if (!drwav__fourcc_equal(header.id.fourcc, "fmt ")) {
-            return DRWAV_FALSE;
-        }
-    } else {
-        if (!drwav__guid_equal(header.id.guid, drwavGUID_W64_FMT)) {
-            return DRWAV_FALSE;
-        }
-    }
-
-
-    if (onRead(pUserData, fmt, sizeof(fmt)) != sizeof(fmt)) {
-        return DRWAV_FALSE;
-    }
-    *pRunningBytesReadOut += sizeof(fmt);
-
-    fmtOut->formatTag      = drwav__bytes_to_u16(fmt + 0);
-    fmtOut->channels       = drwav__bytes_to_u16(fmt + 2);
-    fmtOut->sampleRate     = drwav__bytes_to_u32(fmt + 4);
-    fmtOut->avgBytesPerSec = drwav__bytes_to_u32(fmt + 8);
-    fmtOut->blockAlign     = drwav__bytes_to_u16(fmt + 12);
-    fmtOut->bitsPerSample  = drwav__bytes_to_u16(fmt + 14);
-
-    fmtOut->extendedSize       = 0;
-    fmtOut->validBitsPerSample = 0;
-    fmtOut->channelMask        = 0;
-    memset(fmtOut->subFormat, 0, sizeof(fmtOut->subFormat));
-
-    if (header.sizeInBytes > 16) {
-        drwav_uint8 fmt_cbSize[2];
-        int bytesReadSoFar = 0;
-
-        if (onRead(pUserData, fmt_cbSize, sizeof(fmt_cbSize)) != sizeof(fmt_cbSize)) {
-            return DRWAV_FALSE;    /* Expecting more data. */
-        }
-        *pRunningBytesReadOut += sizeof(fmt_cbSize);
-
-        bytesReadSoFar = 18;
-
-        fmtOut->extendedSize = drwav__bytes_to_u16(fmt_cbSize);
-        if (fmtOut->extendedSize > 0) {
-            /* Simple validation. */
-            if (fmtOut->formatTag == DR_WAVE_FORMAT_EXTENSIBLE) {
-                if (fmtOut->extendedSize != 22) {
-                    return DRWAV_FALSE;
-                }
-            }
-
-            if (fmtOut->formatTag == DR_WAVE_FORMAT_EXTENSIBLE) {
-                drwav_uint8 fmtext[22];
-                if (onRead(pUserData, fmtext, fmtOut->extendedSize) != fmtOut->extendedSize) {
-                    return DRWAV_FALSE;    /* Expecting more data. */
-                }
-
-                fmtOut->validBitsPerSample = drwav__bytes_to_u16(fmtext + 0);
-                fmtOut->channelMask        = drwav__bytes_to_u32(fmtext + 2);
-                drwav__bytes_to_guid(fmtext + 6, fmtOut->subFormat);
-            } else {
-                if (!onSeek(pUserData, fmtOut->extendedSize, drwav_seek_origin_current)) {
-                    return DRWAV_FALSE;
-                }
-            }
-            *pRunningBytesReadOut += fmtOut->extendedSize;
-
-            bytesReadSoFar += fmtOut->extendedSize;
-        }
 
-        /* Seek past any leftover bytes. For w64 the leftover will be defined based on the chunk size. */
-        if (!onSeek(pUserData, (int)(header.sizeInBytes - bytesReadSoFar), drwav_seek_origin_current)) {
-            return DRWAV_FALSE;
-        }
-        *pRunningBytesReadOut += (header.sizeInBytes - bytesReadSoFar);
-    }
-
-    if (header.paddingSize > 0) {
-        if (!onSeek(pUserData, header.paddingSize, drwav_seek_origin_current)) {
-            return DRWAV_FALSE;
-        }
-        *pRunningBytesReadOut += header.paddingSize;
-    }
-
-    return DRWAV_TRUE;
-}
-
-
-static size_t drwav__on_read(drwav_read_proc onRead, void* pUserData, void* pBufferOut, size_t bytesToRead, drwav_uint64* pCursor)
+DRWAV_PRIVATE size_t drwav__on_read(drwav_read_proc onRead, void* pUserData, void* pBufferOut, size_t bytesToRead, drwav_uint64* pCursor)
 {
     size_t bytesRead;
 
@@ -1729,7 +2019,7 @@ static size_t drwav__on_read(drwav_read_proc onRead, void* pUserData, void* pBuf
 }
 
 #if 0
-static drwav_bool32 drwav__on_seek(drwav_seek_proc onSeek, void* pUserData, int offset, drwav_seek_origin origin, drwav_uint64* pCursor)
+DRWAV_PRIVATE drwav_bool32 drwav__on_seek(drwav_seek_proc onSeek, void* pUserData, int offset, drwav_seek_origin origin, drwav_uint64* pCursor)
 {
     DRWAV_ASSERT(onSeek != NULL);
     DRWAV_ASSERT(pCursor != NULL);
@@ -1749,193 +2039,1206 @@ static drwav_bool32 drwav__on_seek(drwav_seek_proc onSeek, void* pUserData, int
 #endif
 
 
+#define DRWAV_SMPL_BYTES                    36
+#define DRWAV_SMPL_LOOP_BYTES               24
+#define DRWAV_INST_BYTES                    7
+#define DRWAV_ACID_BYTES                    24
+#define DRWAV_CUE_BYTES                     4
+#define DRWAV_BEXT_BYTES                    602
+#define DRWAV_BEXT_DESCRIPTION_BYTES        256
+#define DRWAV_BEXT_ORIGINATOR_NAME_BYTES    32
+#define DRWAV_BEXT_ORIGINATOR_REF_BYTES     32
+#define DRWAV_BEXT_RESERVED_BYTES           180
+#define DRWAV_BEXT_UMID_BYTES               64
+#define DRWAV_CUE_POINT_BYTES               24
+#define DRWAV_LIST_LABEL_OR_NOTE_BYTES      4
+#define DRWAV_LIST_LABELLED_TEXT_BYTES      20
 
-static drwav_uint32 drwav_get_bytes_per_pcm_frame(drwav* pWav)
+#define DRWAV_METADATA_ALIGNMENT            8
+
+typedef enum
 {
-    /*
-    The bytes per frame is a bit ambiguous. It can be either be based on the bits per sample, or the block align. The way I'm doing it here
-    is that if the bits per sample is a multiple of 8, use floor(bitsPerSample*channels/8), otherwise fall back to the block align.
-    */
-    if ((pWav->bitsPerSample & 0x7) == 0) {
-        /* Bits per sample is a multiple of 8. */
-        return (pWav->bitsPerSample * pWav->fmt.channels) >> 3;
-    } else {
-        return pWav->fmt.blockAlign;
+    drwav__metadata_parser_stage_count,
+    drwav__metadata_parser_stage_read
+} drwav__metadata_parser_stage;
+
+typedef struct
+{
+    drwav_read_proc onRead;
+    drwav_seek_proc onSeek;
+    void *pReadSeekUserData;
+    drwav__metadata_parser_stage stage;
+    drwav_metadata *pMetadata;
+    drwav_uint32 metadataCount;
+    drwav_uint8 *pData;
+    drwav_uint8 *pDataCursor;
+    drwav_uint64 metadataCursor;
+    drwav_uint64 extraCapacity;
+} drwav__metadata_parser;
+
+DRWAV_PRIVATE size_t drwav__metadata_memory_capacity(drwav__metadata_parser* pParser)
+{
+    drwav_uint64 cap = sizeof(drwav_metadata) * (drwav_uint64)pParser->metadataCount + pParser->extraCapacity;
+    if (cap > DRWAV_SIZE_MAX) {
+        return 0;   /* Too big. */
     }
+
+    return (size_t)cap; /* Safe cast thanks to the check above. */
 }
 
-DRWAV_API drwav_uint16 drwav_fmt_get_format(const drwav_fmt* pFMT)
+DRWAV_PRIVATE drwav_uint8* drwav__metadata_get_memory(drwav__metadata_parser* pParser, size_t size, size_t align)
 {
-    if (pFMT == NULL) {
-        return 0;
-    }
+    drwav_uint8* pResult;
 
-    if (pFMT->formatTag != DR_WAVE_FORMAT_EXTENSIBLE) {
-        return pFMT->formatTag;
-    } else {
-        return drwav__bytes_to_u16(pFMT->subFormat);    /* Only the first two bytes are required. */
+    if (align) {
+        drwav_uintptr modulo = (drwav_uintptr)pParser->pDataCursor % align;
+        if (modulo != 0) {
+            pParser->pDataCursor += align - modulo;
+        }
     }
+
+    pResult = pParser->pDataCursor;
+
+    /*
+    Getting to the point where this function is called means there should always be memory
+    available. Out of memory checks should have been done at an earlier stage.
+    */
+    DRWAV_ASSERT((pResult + size) <= (pParser->pData + drwav__metadata_memory_capacity(pParser)));
+
+    pParser->pDataCursor += size;
+    return pResult;
 }
 
-static drwav_bool32 drwav_preinit(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pReadSeekUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
+DRWAV_PRIVATE void drwav__metadata_request_extra_memory_for_stage_2(drwav__metadata_parser* pParser, size_t bytes, size_t align)
 {
-    if (pWav == NULL || onRead == NULL || onSeek == NULL) {
-        return DRWAV_FALSE;
-    }
+    size_t extra = bytes + (align ? (align - 1) : 0);
+    pParser->extraCapacity += extra;
+}
 
-    DRWAV_ZERO_MEMORY(pWav, sizeof(*pWav));
-    pWav->onRead    = onRead;
-    pWav->onSeek    = onSeek;
-    pWav->pUserData = pReadSeekUserData;
-    pWav->allocationCallbacks = drwav_copy_allocation_callbacks_or_defaults(pAllocationCallbacks);
+DRWAV_PRIVATE drwav_result drwav__metadata_alloc(drwav__metadata_parser* pParser, drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pParser->extraCapacity != 0 || pParser->metadataCount != 0) {
+        pAllocationCallbacks->onFree(pParser->pData, pAllocationCallbacks->pUserData);
 
-    if (pWav->allocationCallbacks.onFree == NULL || (pWav->allocationCallbacks.onMalloc == NULL && pWav->allocationCallbacks.onRealloc == NULL)) {
-        return DRWAV_FALSE;    /* Invalid allocation callbacks. */
+        pParser->pData = (drwav_uint8*)pAllocationCallbacks->onMalloc(drwav__metadata_memory_capacity(pParser), pAllocationCallbacks->pUserData);
+        pParser->pDataCursor = pParser->pData;
+
+        if (pParser->pData == NULL) {
+            return DRWAV_OUT_OF_MEMORY;
+        }
+
+        /*
+        We don't need to worry about specifying an alignment here because malloc always returns something
+        of suitable alignment. This also means pParser->pMetadata is all that we need to store in order
+        for us to free when we are done.
+        */
+        pParser->pMetadata = (drwav_metadata*)drwav__metadata_get_memory(pParser, sizeof(drwav_metadata) * pParser->metadataCount, 1);
+        pParser->metadataCursor = 0;
     }
 
-    return DRWAV_TRUE;
+    return DRWAV_SUCCESS;
 }
 
-static drwav_bool32 drwav_init__internal(drwav* pWav, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags)
+DRWAV_PRIVATE size_t drwav__metadata_parser_read(drwav__metadata_parser* pParser, void* pBufferOut, size_t bytesToRead, drwav_uint64* pCursor)
 {
-    /* This function assumes drwav_preinit() has been called beforehand. */
-
-    drwav_uint64 cursor;    /* <-- Keeps track of the byte position so we can seek to specific locations. */
-    drwav_bool32 sequential;
-    drwav_uint8 riff[4];
-    drwav_fmt fmt;
-    unsigned short translatedFormatTag;
-    drwav_uint64 sampleCountFromFactChunk;
-    drwav_bool32 foundDataChunk;
-    drwav_uint64 dataChunkSize;
-    drwav_uint64 chunkSize;
+    if (pCursor != NULL) {
+        return drwav__on_read(pParser->onRead, pParser->pReadSeekUserData, pBufferOut, bytesToRead, pCursor);
+    } else {
+        return pParser->onRead(pParser->pReadSeekUserData, pBufferOut, bytesToRead);
+    }
+}
 
-    cursor = 0;
-    sequential = (flags & DRWAV_SEQUENTIAL) != 0;
+DRWAV_PRIVATE drwav_uint64 drwav__read_smpl_to_metadata_obj(drwav__metadata_parser* pParser, const drwav_chunk_header* pChunkHeader, drwav_metadata* pMetadata)
+{
+    drwav_uint8 smplHeaderData[DRWAV_SMPL_BYTES];
+    drwav_uint64 totalBytesRead = 0;
+    size_t bytesJustRead;
 
-    /* The first 4 bytes should be the RIFF identifier. */
-    if (drwav__on_read(pWav->onRead, pWav->pUserData, riff, sizeof(riff), &cursor) != sizeof(riff)) {
-        return DRWAV_FALSE;
+    if (pMetadata == NULL) {
+        return 0;
     }
 
-    /*
-    The first 4 bytes can be used to identify the container. For RIFF files it will start with "RIFF" and for
-    w64 it will start with "riff".
-    */
-    if (drwav__fourcc_equal(riff, "RIFF")) {
-        pWav->container = drwav_container_riff;
-    } else if (drwav__fourcc_equal(riff, "riff")) {
-        int i;
-        drwav_uint8 riff2[12];
+    bytesJustRead = drwav__metadata_parser_read(pParser, smplHeaderData, sizeof(smplHeaderData), &totalBytesRead);
 
-        pWav->container = drwav_container_w64;
+    DRWAV_ASSERT(pParser->stage == drwav__metadata_parser_stage_read);
+    DRWAV_ASSERT(pChunkHeader != NULL);
 
-        /* Check the rest of the GUID for validity. */
-        if (drwav__on_read(pWav->onRead, pWav->pUserData, riff2, sizeof(riff2), &cursor) != sizeof(riff2)) {
-            return DRWAV_FALSE;
-        }
+    if (pMetadata != NULL && bytesJustRead == sizeof(smplHeaderData)) {
+        drwav_uint32 iSampleLoop;
 
-        for (i = 0; i < 12; ++i) {
-            if (riff2[i] != drwavGUID_W64_RIFF[i+4]) {
-                return DRWAV_FALSE;
+        pMetadata->type                                     = drwav_metadata_type_smpl;
+        pMetadata->data.smpl.manufacturerId                 = drwav_bytes_to_u32(smplHeaderData + 0);
+        pMetadata->data.smpl.productId                      = drwav_bytes_to_u32(smplHeaderData + 4);
+        pMetadata->data.smpl.samplePeriodNanoseconds        = drwav_bytes_to_u32(smplHeaderData + 8);
+        pMetadata->data.smpl.midiUnityNote                  = drwav_bytes_to_u32(smplHeaderData + 12);
+        pMetadata->data.smpl.midiPitchFraction              = drwav_bytes_to_u32(smplHeaderData + 16);
+        pMetadata->data.smpl.smpteFormat                    = drwav_bytes_to_u32(smplHeaderData + 20);
+        pMetadata->data.smpl.smpteOffset                    = drwav_bytes_to_u32(smplHeaderData + 24);
+        pMetadata->data.smpl.sampleLoopCount                = drwav_bytes_to_u32(smplHeaderData + 28);
+        pMetadata->data.smpl.samplerSpecificDataSizeInBytes = drwav_bytes_to_u32(smplHeaderData + 32);
+
+        /*
+        The loop count needs to be validated against the size of the chunk for safety so we don't
+        attempt to read over the boundary of the chunk.
+        */
+        if (pMetadata->data.smpl.sampleLoopCount == (pChunkHeader->sizeInBytes - DRWAV_SMPL_BYTES) / DRWAV_SMPL_LOOP_BYTES) {
+            pMetadata->data.smpl.pLoops = (drwav_smpl_loop*)drwav__metadata_get_memory(pParser, sizeof(drwav_smpl_loop) * pMetadata->data.smpl.sampleLoopCount, DRWAV_METADATA_ALIGNMENT);
+
+            for (iSampleLoop = 0; iSampleLoop < pMetadata->data.smpl.sampleLoopCount; ++iSampleLoop) {
+                drwav_uint8 smplLoopData[DRWAV_SMPL_LOOP_BYTES];
+                bytesJustRead = drwav__metadata_parser_read(pParser, smplLoopData, sizeof(smplLoopData), &totalBytesRead);
+
+                if (bytesJustRead == sizeof(smplLoopData)) {
+                    pMetadata->data.smpl.pLoops[iSampleLoop].cuePointId            = drwav_bytes_to_u32(smplLoopData + 0);
+                    pMetadata->data.smpl.pLoops[iSampleLoop].type                  = drwav_bytes_to_u32(smplLoopData + 4);
+                    pMetadata->data.smpl.pLoops[iSampleLoop].firstSampleByteOffset = drwav_bytes_to_u32(smplLoopData + 8);
+                    pMetadata->data.smpl.pLoops[iSampleLoop].lastSampleByteOffset  = drwav_bytes_to_u32(smplLoopData + 12);
+                    pMetadata->data.smpl.pLoops[iSampleLoop].sampleFraction        = drwav_bytes_to_u32(smplLoopData + 16);
+                    pMetadata->data.smpl.pLoops[iSampleLoop].playCount             = drwav_bytes_to_u32(smplLoopData + 20);
+                } else {
+                    break;
+                }
+            }
+
+            if (pMetadata->data.smpl.samplerSpecificDataSizeInBytes > 0) {
+                pMetadata->data.smpl.pSamplerSpecificData = drwav__metadata_get_memory(pParser, pMetadata->data.smpl.samplerSpecificDataSizeInBytes, 1);
+                DRWAV_ASSERT(pMetadata->data.smpl.pSamplerSpecificData != NULL);
+
+                drwav__metadata_parser_read(pParser, pMetadata->data.smpl.pSamplerSpecificData, pMetadata->data.smpl.samplerSpecificDataSizeInBytes, &totalBytesRead);
             }
         }
-    } else {
-        return DRWAV_FALSE;   /* Unknown or unsupported container. */
     }
 
+    return totalBytesRead;
+}
 
-    if (pWav->container == drwav_container_riff) {
-        drwav_uint8 chunkSizeBytes[4];
-        drwav_uint8 wave[4];
+DRWAV_PRIVATE drwav_uint64 drwav__read_cue_to_metadata_obj(drwav__metadata_parser* pParser, const drwav_chunk_header* pChunkHeader, drwav_metadata* pMetadata)
+{
+    drwav_uint8 cueHeaderSectionData[DRWAV_CUE_BYTES];
+    drwav_uint64 totalBytesRead = 0;
+    size_t bytesJustRead;
 
-        /* RIFF/WAVE */
-        if (drwav__on_read(pWav->onRead, pWav->pUserData, chunkSizeBytes, sizeof(chunkSizeBytes), &cursor) != sizeof(chunkSizeBytes)) {
-            return DRWAV_FALSE;
-        }
+    if (pMetadata == NULL) {
+        return 0;
+    }
 
-        if (drwav__bytes_to_u32(chunkSizeBytes) < 36) {
-            return DRWAV_FALSE;    /* Chunk size should always be at least 36 bytes. */
-        }
+    bytesJustRead = drwav__metadata_parser_read(pParser, cueHeaderSectionData, sizeof(cueHeaderSectionData), &totalBytesRead);
 
-        if (drwav__on_read(pWav->onRead, pWav->pUserData, wave, sizeof(wave), &cursor) != sizeof(wave)) {
-            return DRWAV_FALSE;
-        }
+    DRWAV_ASSERT(pParser->stage == drwav__metadata_parser_stage_read);
 
-        if (!drwav__fourcc_equal(wave, "WAVE")) {
-            return DRWAV_FALSE;    /* Expecting "WAVE". */
-        }
-    } else {
-        drwav_uint8 chunkSizeBytes[8];
-        drwav_uint8 wave[16];
+    if (bytesJustRead == sizeof(cueHeaderSectionData)) {
+        pMetadata->type                   = drwav_metadata_type_cue;
+        pMetadata->data.cue.cuePointCount = drwav_bytes_to_u32(cueHeaderSectionData);
 
-        /* W64 */
-        if (drwav__on_read(pWav->onRead, pWav->pUserData, chunkSizeBytes, sizeof(chunkSizeBytes), &cursor) != sizeof(chunkSizeBytes)) {
-            return DRWAV_FALSE;
+        /*
+        We need to validate the cue point count against the size of the chunk so we don't read
+        beyond the chunk.
+        */
+        if (pMetadata->data.cue.cuePointCount == (pChunkHeader->sizeInBytes - DRWAV_CUE_BYTES) / DRWAV_CUE_POINT_BYTES) {
+            pMetadata->data.cue.pCuePoints    = (drwav_cue_point*)drwav__metadata_get_memory(pParser, sizeof(drwav_cue_point) * pMetadata->data.cue.cuePointCount, DRWAV_METADATA_ALIGNMENT);
+            DRWAV_ASSERT(pMetadata->data.cue.pCuePoints != NULL);
+
+            if (pMetadata->data.cue.cuePointCount > 0) {
+                drwav_uint32 iCuePoint;
+
+                for (iCuePoint = 0; iCuePoint < pMetadata->data.cue.cuePointCount; ++iCuePoint) {
+                    drwav_uint8 cuePointData[DRWAV_CUE_POINT_BYTES];
+                    bytesJustRead = drwav__metadata_parser_read(pParser, cuePointData, sizeof(cuePointData), &totalBytesRead);
+
+                    if (bytesJustRead == sizeof(cuePointData)) {
+                        pMetadata->data.cue.pCuePoints[iCuePoint].id                = drwav_bytes_to_u32(cuePointData + 0);
+                        pMetadata->data.cue.pCuePoints[iCuePoint].playOrderPosition = drwav_bytes_to_u32(cuePointData + 4);
+                        pMetadata->data.cue.pCuePoints[iCuePoint].dataChunkId[0]    = cuePointData[8];
+                        pMetadata->data.cue.pCuePoints[iCuePoint].dataChunkId[1]    = cuePointData[9];
+                        pMetadata->data.cue.pCuePoints[iCuePoint].dataChunkId[2]    = cuePointData[10];
+                        pMetadata->data.cue.pCuePoints[iCuePoint].dataChunkId[3]    = cuePointData[11];
+                        pMetadata->data.cue.pCuePoints[iCuePoint].chunkStart        = drwav_bytes_to_u32(cuePointData + 12);
+                        pMetadata->data.cue.pCuePoints[iCuePoint].blockStart        = drwav_bytes_to_u32(cuePointData + 16);
+                        pMetadata->data.cue.pCuePoints[iCuePoint].sampleByteOffset  = drwav_bytes_to_u32(cuePointData + 20);
+                    } else {
+                        break;
+                    }
+                }
+            }
         }
+    }
 
-        if (drwav__bytes_to_u64(chunkSizeBytes) < 80) {
-            return DRWAV_FALSE;
-        }
+    return totalBytesRead;
+}
 
-        if (drwav__on_read(pWav->onRead, pWav->pUserData, wave, sizeof(wave), &cursor) != sizeof(wave)) {
-            return DRWAV_FALSE;
-        }
+DRWAV_PRIVATE drwav_uint64 drwav__read_inst_to_metadata_obj(drwav__metadata_parser* pParser, drwav_metadata* pMetadata)
+{
+    drwav_uint8 instData[DRWAV_INST_BYTES];
+    drwav_uint64 bytesRead;
 
-        if (!drwav__guid_equal(wave, drwavGUID_W64_WAVE)) {
-            return DRWAV_FALSE;
-        }
+    if (pMetadata == NULL) {
+        return 0;
     }
 
+    bytesRead = drwav__metadata_parser_read(pParser, instData, sizeof(instData), NULL);
+
+    DRWAV_ASSERT(pParser->stage == drwav__metadata_parser_stage_read);
 
-    /* The next bytes should be the "fmt " chunk. */
-    if (!drwav__read_fmt(pWav->onRead, pWav->onSeek, pWav->pUserData, pWav->container, &cursor, &fmt)) {
-        return DRWAV_FALSE;    /* Failed to read the "fmt " chunk. */
+    if (bytesRead == sizeof(instData)) {
+        pMetadata->type                    = drwav_metadata_type_inst;
+        pMetadata->data.inst.midiUnityNote = (drwav_int8)instData[0];
+        pMetadata->data.inst.fineTuneCents = (drwav_int8)instData[1];
+        pMetadata->data.inst.gainDecibels  = (drwav_int8)instData[2];
+        pMetadata->data.inst.lowNote       = (drwav_int8)instData[3];
+        pMetadata->data.inst.highNote      = (drwav_int8)instData[4];
+        pMetadata->data.inst.lowVelocity   = (drwav_int8)instData[5];
+        pMetadata->data.inst.highVelocity  = (drwav_int8)instData[6];
     }
 
-    /* Basic validation. */
-    if ((fmt.sampleRate    == 0 || fmt.sampleRate    > DRWAV_MAX_SAMPLE_RATE)     ||
-        (fmt.channels      == 0 || fmt.channels      > DRWAV_MAX_CHANNELS)        ||
-        (fmt.bitsPerSample == 0 || fmt.bitsPerSample > DRWAV_MAX_BITS_PER_SAMPLE) ||
-        fmt.blockAlign == 0) {
-        return DRWAV_FALSE; /* Probably an invalid WAV file. */
+    return bytesRead;
+}
+
+DRWAV_PRIVATE drwav_uint64 drwav__read_acid_to_metadata_obj(drwav__metadata_parser* pParser, drwav_metadata* pMetadata)
+{
+    drwav_uint8 acidData[DRWAV_ACID_BYTES];
+    drwav_uint64 bytesRead;
+
+    if (pMetadata == NULL) {
+        return 0;
+    }
+
+    bytesRead = drwav__metadata_parser_read(pParser, acidData, sizeof(acidData), NULL);
+
+    DRWAV_ASSERT(pParser->stage == drwav__metadata_parser_stage_read);
+
+    if (bytesRead == sizeof(acidData)) {
+        pMetadata->type                       = drwav_metadata_type_acid;
+        pMetadata->data.acid.flags            = drwav_bytes_to_u32(acidData + 0);
+        pMetadata->data.acid.midiUnityNote    = drwav_bytes_to_u16(acidData + 4);
+        pMetadata->data.acid.reserved1        = drwav_bytes_to_u16(acidData + 6);
+        pMetadata->data.acid.reserved2        = drwav_bytes_to_f32(acidData + 8);
+        pMetadata->data.acid.numBeats         = drwav_bytes_to_u32(acidData + 12);
+        pMetadata->data.acid.meterDenominator = drwav_bytes_to_u16(acidData + 16);
+        pMetadata->data.acid.meterNumerator   = drwav_bytes_to_u16(acidData + 18);
+        pMetadata->data.acid.tempo            = drwav_bytes_to_f32(acidData + 20);
+    }
+
+    return bytesRead;
+}
+
+DRWAV_PRIVATE size_t drwav__strlen(const char* str)
+{
+    size_t result = 0;
+
+    while (*str++) {
+        result += 1;
+    }
+
+    return result;
+}
+
+DRWAV_PRIVATE size_t drwav__strlen_clamped(const char* str, size_t maxToRead)
+{
+    size_t result = 0;
+
+    while (*str++ && result < maxToRead) {
+        result += 1;
+    }
+
+    return result;
+}
+
+DRWAV_PRIVATE char* drwav__metadata_copy_string(drwav__metadata_parser* pParser, const char* str, size_t maxToRead)
+{
+    size_t len = drwav__strlen_clamped(str, maxToRead);
+
+    if (len) {
+        char* result = (char*)drwav__metadata_get_memory(pParser, len + 1, 1);
+        DRWAV_ASSERT(result != NULL);
+
+        DRWAV_COPY_MEMORY(result, str, len);
+        result[len] = '\0';
+
+        return result;
+    } else {
+        return NULL;
+    }
+}
+
+typedef struct
+{
+    const void* pBuffer;
+    size_t sizeInBytes;
+    size_t cursor;
+} drwav_buffer_reader;
+
+DRWAV_PRIVATE drwav_result drwav_buffer_reader_init(const void* pBuffer, size_t sizeInBytes, drwav_buffer_reader* pReader)
+{
+    DRWAV_ASSERT(pBuffer != NULL);
+    DRWAV_ASSERT(pReader != NULL);
+
+    DRWAV_ZERO_OBJECT(pReader);
+
+    pReader->pBuffer     = pBuffer;
+    pReader->sizeInBytes = sizeInBytes;
+    pReader->cursor      = 0;
+
+    return DRWAV_SUCCESS;
+}
+
+DRWAV_PRIVATE const void* drwav_buffer_reader_ptr(const drwav_buffer_reader* pReader)
+{
+    DRWAV_ASSERT(pReader != NULL);
+
+    return drwav_offset_ptr(pReader->pBuffer, pReader->cursor);
+}
+
+DRWAV_PRIVATE drwav_result drwav_buffer_reader_seek(drwav_buffer_reader* pReader, size_t bytesToSeek)
+{
+    DRWAV_ASSERT(pReader != NULL);
+
+    if (pReader->cursor + bytesToSeek > pReader->sizeInBytes) {
+        return DRWAV_BAD_SEEK;  /* Seeking too far forward. */
+    }
+
+    pReader->cursor += bytesToSeek;
+
+    return DRWAV_SUCCESS;
+}
+
+DRWAV_PRIVATE drwav_result drwav_buffer_reader_read(drwav_buffer_reader* pReader, void* pDst, size_t bytesToRead, size_t* pBytesRead)
+{
+    drwav_result result = DRWAV_SUCCESS;
+    size_t bytesRemaining;
+
+    DRWAV_ASSERT(pReader != NULL);
+
+    if (pBytesRead != NULL) {
+        *pBytesRead = 0;
+    }
+
+    bytesRemaining = (pReader->sizeInBytes - pReader->cursor);
+    if (bytesToRead > bytesRemaining) {
+        bytesToRead = bytesRemaining;
+    }
+
+    if (pDst == NULL) {
+        /* Seek. */
+        result = drwav_buffer_reader_seek(pReader, bytesToRead);
+    } else {
+        /* Read. */
+        DRWAV_COPY_MEMORY(pDst, drwav_buffer_reader_ptr(pReader), bytesToRead);
+        pReader->cursor += bytesToRead;
+    }
+
+    DRWAV_ASSERT(pReader->cursor <= pReader->sizeInBytes);
+
+    if (result == DRWAV_SUCCESS) {
+        if (pBytesRead != NULL) {
+            *pBytesRead = bytesToRead;
+        }
+    }
+
+    return DRWAV_SUCCESS;
+}
+
+DRWAV_PRIVATE drwav_result drwav_buffer_reader_read_u16(drwav_buffer_reader* pReader, drwav_uint16* pDst)
+{
+    drwav_result result;
+    size_t bytesRead;
+    drwav_uint8 data[2];
+
+    DRWAV_ASSERT(pReader != NULL);
+    DRWAV_ASSERT(pDst != NULL);
+
+    *pDst = 0;  /* Safety. */
+
+    result = drwav_buffer_reader_read(pReader, data, sizeof(*pDst), &bytesRead);
+    if (result != DRWAV_SUCCESS || bytesRead != sizeof(*pDst)) {
+        return result;
+    }
+
+    *pDst = drwav_bytes_to_u16(data);
+
+    return DRWAV_SUCCESS;
+}
+
+DRWAV_PRIVATE drwav_result drwav_buffer_reader_read_u32(drwav_buffer_reader* pReader, drwav_uint32* pDst)
+{
+    drwav_result result;
+    size_t bytesRead;
+    drwav_uint8 data[4];
+
+    DRWAV_ASSERT(pReader != NULL);
+    DRWAV_ASSERT(pDst != NULL);
+
+    *pDst = 0;  /* Safety. */
+
+    result = drwav_buffer_reader_read(pReader, data, sizeof(*pDst), &bytesRead);
+    if (result != DRWAV_SUCCESS || bytesRead != sizeof(*pDst)) {
+        return result;
+    }
+
+    *pDst = drwav_bytes_to_u32(data);
+
+    return DRWAV_SUCCESS;
+}
+
+
+
+DRWAV_PRIVATE drwav_uint64 drwav__read_bext_to_metadata_obj(drwav__metadata_parser* pParser, drwav_metadata* pMetadata, drwav_uint64 chunkSize)
+{
+    drwav_uint8 bextData[DRWAV_BEXT_BYTES];
+    size_t bytesRead = drwav__metadata_parser_read(pParser, bextData, sizeof(bextData), NULL);
+
+    DRWAV_ASSERT(pParser->stage == drwav__metadata_parser_stage_read);
+
+    if (bytesRead == sizeof(bextData)) {
+        drwav_buffer_reader reader;
+        drwav_uint32 timeReferenceLow;
+        drwav_uint32 timeReferenceHigh;
+        size_t extraBytes;
+
+        pMetadata->type = drwav_metadata_type_bext;
+
+        if (drwav_buffer_reader_init(bextData, bytesRead, &reader) == DRWAV_SUCCESS) {
+            pMetadata->data.bext.pDescription = drwav__metadata_copy_string(pParser, (const char*)drwav_buffer_reader_ptr(&reader), DRWAV_BEXT_DESCRIPTION_BYTES);
+            drwav_buffer_reader_seek(&reader, DRWAV_BEXT_DESCRIPTION_BYTES);
+
+            pMetadata->data.bext.pOriginatorName = drwav__metadata_copy_string(pParser, (const char*)drwav_buffer_reader_ptr(&reader), DRWAV_BEXT_ORIGINATOR_NAME_BYTES);
+            drwav_buffer_reader_seek(&reader, DRWAV_BEXT_ORIGINATOR_NAME_BYTES);
+
+            pMetadata->data.bext.pOriginatorReference = drwav__metadata_copy_string(pParser, (const char*)drwav_buffer_reader_ptr(&reader), DRWAV_BEXT_ORIGINATOR_REF_BYTES);
+            drwav_buffer_reader_seek(&reader, DRWAV_BEXT_ORIGINATOR_REF_BYTES);
+
+            drwav_buffer_reader_read(&reader, pMetadata->data.bext.pOriginationDate, sizeof(pMetadata->data.bext.pOriginationDate), NULL);
+            drwav_buffer_reader_read(&reader, pMetadata->data.bext.pOriginationTime, sizeof(pMetadata->data.bext.pOriginationTime), NULL);
+
+            drwav_buffer_reader_read_u32(&reader, &timeReferenceLow);
+            drwav_buffer_reader_read_u32(&reader, &timeReferenceHigh);
+            pMetadata->data.bext.timeReference = ((drwav_uint64)timeReferenceHigh << 32) + timeReferenceLow;
+
+            drwav_buffer_reader_read_u16(&reader, &pMetadata->data.bext.version);
+
+            pMetadata->data.bext.pUMID = drwav__metadata_get_memory(pParser, DRWAV_BEXT_UMID_BYTES, 1);
+            drwav_buffer_reader_read(&reader, pMetadata->data.bext.pUMID, DRWAV_BEXT_UMID_BYTES, NULL);
+
+            drwav_buffer_reader_read_u16(&reader, &pMetadata->data.bext.loudnessValue);
+            drwav_buffer_reader_read_u16(&reader, &pMetadata->data.bext.loudnessRange);
+            drwav_buffer_reader_read_u16(&reader, &pMetadata->data.bext.maxTruePeakLevel);
+            drwav_buffer_reader_read_u16(&reader, &pMetadata->data.bext.maxMomentaryLoudness);
+            drwav_buffer_reader_read_u16(&reader, &pMetadata->data.bext.maxShortTermLoudness);
+
+            DRWAV_ASSERT((drwav_offset_ptr(drwav_buffer_reader_ptr(&reader), DRWAV_BEXT_RESERVED_BYTES)) == (bextData + DRWAV_BEXT_BYTES));
+
+            extraBytes = (size_t)(chunkSize - DRWAV_BEXT_BYTES);
+            if (extraBytes > 0) {
+                pMetadata->data.bext.pCodingHistory = (char*)drwav__metadata_get_memory(pParser, extraBytes + 1, 1);
+                DRWAV_ASSERT(pMetadata->data.bext.pCodingHistory != NULL);
+
+                bytesRead += drwav__metadata_parser_read(pParser, pMetadata->data.bext.pCodingHistory, extraBytes, NULL);
+                pMetadata->data.bext.codingHistorySize = (drwav_uint32)drwav__strlen(pMetadata->data.bext.pCodingHistory);
+            } else {
+                pMetadata->data.bext.pCodingHistory    = NULL;
+                pMetadata->data.bext.codingHistorySize = 0;
+            }
+        }
+    }
+
+    return bytesRead;
+}
+
+DRWAV_PRIVATE drwav_uint64 drwav__read_list_label_or_note_to_metadata_obj(drwav__metadata_parser* pParser, drwav_metadata* pMetadata, drwav_uint64 chunkSize, drwav_metadata_type type)
+{
+    drwav_uint8 cueIDBuffer[DRWAV_LIST_LABEL_OR_NOTE_BYTES];
+    drwav_uint64 totalBytesRead = 0;
+    size_t bytesJustRead = drwav__metadata_parser_read(pParser, cueIDBuffer, sizeof(cueIDBuffer), &totalBytesRead);
+
+    DRWAV_ASSERT(pParser->stage == drwav__metadata_parser_stage_read);
+
+    if (bytesJustRead == sizeof(cueIDBuffer)) {
+        drwav_uint32 sizeIncludingNullTerminator;
+
+        pMetadata->type = type;
+        pMetadata->data.labelOrNote.cuePointId = drwav_bytes_to_u32(cueIDBuffer);
+
+        sizeIncludingNullTerminator = (drwav_uint32)chunkSize - DRWAV_LIST_LABEL_OR_NOTE_BYTES;
+        if (sizeIncludingNullTerminator > 0) {
+            pMetadata->data.labelOrNote.stringLength = sizeIncludingNullTerminator - 1;
+            pMetadata->data.labelOrNote.pString      = (char*)drwav__metadata_get_memory(pParser, sizeIncludingNullTerminator, 1);
+            DRWAV_ASSERT(pMetadata->data.labelOrNote.pString != NULL);
+
+            drwav__metadata_parser_read(pParser, pMetadata->data.labelOrNote.pString, sizeIncludingNullTerminator, &totalBytesRead);
+        } else {
+            pMetadata->data.labelOrNote.stringLength = 0;
+            pMetadata->data.labelOrNote.pString      = NULL;
+        }
+    }
+
+    return totalBytesRead;
+}
+
+DRWAV_PRIVATE drwav_uint64 drwav__read_list_labelled_cue_region_to_metadata_obj(drwav__metadata_parser* pParser, drwav_metadata* pMetadata, drwav_uint64 chunkSize)
+{
+    drwav_uint8 buffer[DRWAV_LIST_LABELLED_TEXT_BYTES];
+    drwav_uint64 totalBytesRead = 0;
+    size_t bytesJustRead = drwav__metadata_parser_read(pParser, buffer, sizeof(buffer), &totalBytesRead);
+
+    DRWAV_ASSERT(pParser->stage == drwav__metadata_parser_stage_read);
+
+    if (bytesJustRead == sizeof(buffer)) {
+        drwav_uint32 sizeIncludingNullTerminator;
+
+        pMetadata->type                                = drwav_metadata_type_list_labelled_cue_region;
+        pMetadata->data.labelledCueRegion.cuePointId   = drwav_bytes_to_u32(buffer + 0);
+        pMetadata->data.labelledCueRegion.sampleLength = drwav_bytes_to_u32(buffer + 4);
+        pMetadata->data.labelledCueRegion.purposeId[0] = buffer[8];
+        pMetadata->data.labelledCueRegion.purposeId[1] = buffer[9];
+        pMetadata->data.labelledCueRegion.purposeId[2] = buffer[10];
+        pMetadata->data.labelledCueRegion.purposeId[3] = buffer[11];
+        pMetadata->data.labelledCueRegion.country      = drwav_bytes_to_u16(buffer + 12);
+        pMetadata->data.labelledCueRegion.language     = drwav_bytes_to_u16(buffer + 14);
+        pMetadata->data.labelledCueRegion.dialect      = drwav_bytes_to_u16(buffer + 16);
+        pMetadata->data.labelledCueRegion.codePage     = drwav_bytes_to_u16(buffer + 18);
+
+        sizeIncludingNullTerminator = (drwav_uint32)chunkSize - DRWAV_LIST_LABELLED_TEXT_BYTES;
+        if (sizeIncludingNullTerminator > 0) {
+            pMetadata->data.labelledCueRegion.stringLength = sizeIncludingNullTerminator - 1;
+            pMetadata->data.labelledCueRegion.pString      = (char*)drwav__metadata_get_memory(pParser, sizeIncludingNullTerminator, 1);
+            DRWAV_ASSERT(pMetadata->data.labelledCueRegion.pString != NULL);
+
+            drwav__metadata_parser_read(pParser, pMetadata->data.labelledCueRegion.pString, sizeIncludingNullTerminator, &totalBytesRead);
+        } else {
+            pMetadata->data.labelledCueRegion.stringLength = 0;
+            pMetadata->data.labelledCueRegion.pString      = NULL;
+        }
+    }
+
+    return totalBytesRead;
+}
+
+DRWAV_PRIVATE drwav_uint64 drwav__metadata_process_info_text_chunk(drwav__metadata_parser* pParser, drwav_uint64 chunkSize, drwav_metadata_type type)
+{
+    drwav_uint64 bytesRead = 0;
+    drwav_uint32 stringSizeWithNullTerminator = (drwav_uint32)chunkSize;
+
+    if (pParser->stage == drwav__metadata_parser_stage_count) {
+        pParser->metadataCount += 1;
+        drwav__metadata_request_extra_memory_for_stage_2(pParser, stringSizeWithNullTerminator, 1);
+    } else {
+        drwav_metadata* pMetadata = &pParser->pMetadata[pParser->metadataCursor];
+        pMetadata->type = type;
+        if (stringSizeWithNullTerminator > 0) {
+            pMetadata->data.infoText.stringLength = stringSizeWithNullTerminator - 1;
+            pMetadata->data.infoText.pString = (char*)drwav__metadata_get_memory(pParser, stringSizeWithNullTerminator, 1);
+            DRWAV_ASSERT(pMetadata->data.infoText.pString != NULL);
+
+            bytesRead = drwav__metadata_parser_read(pParser, pMetadata->data.infoText.pString, (size_t)stringSizeWithNullTerminator, NULL);
+            if (bytesRead == chunkSize) {
+                pParser->metadataCursor += 1;
+            } else {
+                /* Failed to parse. */
+            }
+        } else {
+            pMetadata->data.infoText.stringLength = 0;
+            pMetadata->data.infoText.pString      = NULL;
+            pParser->metadataCursor += 1;
+        }
+    }
+
+    return bytesRead;
+}
+
+DRWAV_PRIVATE drwav_uint64 drwav__metadata_process_unknown_chunk(drwav__metadata_parser* pParser, const drwav_uint8* pChunkId, drwav_uint64 chunkSize, drwav_metadata_location location)
+{
+    drwav_uint64 bytesRead = 0;
+
+    if (location == drwav_metadata_location_invalid) {
+        return 0;
+    }
+
+    if (drwav_fourcc_equal(pChunkId, "data") || drwav_fourcc_equal(pChunkId, "fmt ") || drwav_fourcc_equal(pChunkId, "fact")) {
+        return 0;
+    }
+
+    if (pParser->stage == drwav__metadata_parser_stage_count) {
+        pParser->metadataCount += 1;
+        drwav__metadata_request_extra_memory_for_stage_2(pParser, (size_t)chunkSize, 1);
+    } else {
+        drwav_metadata* pMetadata = &pParser->pMetadata[pParser->metadataCursor];
+        pMetadata->type                         = drwav_metadata_type_unknown;
+        pMetadata->data.unknown.chunkLocation   = location;
+        pMetadata->data.unknown.id[0]           = pChunkId[0];
+        pMetadata->data.unknown.id[1]           = pChunkId[1];
+        pMetadata->data.unknown.id[2]           = pChunkId[2];
+        pMetadata->data.unknown.id[3]           = pChunkId[3];
+        pMetadata->data.unknown.dataSizeInBytes = (drwav_uint32)chunkSize;
+        pMetadata->data.unknown.pData           = (drwav_uint8 *)drwav__metadata_get_memory(pParser, (size_t)chunkSize, 1);
+        DRWAV_ASSERT(pMetadata->data.unknown.pData != NULL);
+
+        bytesRead = drwav__metadata_parser_read(pParser, pMetadata->data.unknown.pData, pMetadata->data.unknown.dataSizeInBytes, NULL);
+        if (bytesRead == pMetadata->data.unknown.dataSizeInBytes) {
+            pParser->metadataCursor += 1;
+        } else {
+            /* Failed to read. */
+        }
+    }
+
+    return bytesRead;
+}
+
+DRWAV_PRIVATE drwav_bool32 drwav__chunk_matches(drwav_metadata_type allowedMetadataTypes, const drwav_uint8* pChunkID, drwav_metadata_type type, const char* pID)
+{
+    return (allowedMetadataTypes & type) && drwav_fourcc_equal(pChunkID, pID);
+}
+
+DRWAV_PRIVATE drwav_uint64 drwav__metadata_process_chunk(drwav__metadata_parser* pParser, const drwav_chunk_header* pChunkHeader, drwav_metadata_type allowedMetadataTypes)
+{
+    const drwav_uint8 *pChunkID = pChunkHeader->id.fourcc;
+    drwav_uint64 bytesRead = 0;
+
+    if (drwav__chunk_matches(allowedMetadataTypes, pChunkID, drwav_metadata_type_smpl, "smpl")) {
+        if (pChunkHeader->sizeInBytes >= DRWAV_SMPL_BYTES) {
+            if (pParser->stage == drwav__metadata_parser_stage_count) {
+                drwav_uint8 buffer[4];
+                size_t bytesJustRead;
+
+                if (!pParser->onSeek(pParser->pReadSeekUserData, 28, drwav_seek_origin_current)) {
+                    return bytesRead;
+                }
+                bytesRead += 28;
+
+                bytesJustRead = drwav__metadata_parser_read(pParser, buffer, sizeof(buffer), &bytesRead);
+                if (bytesJustRead == sizeof(buffer)) {
+                    drwav_uint32 loopCount = drwav_bytes_to_u32(buffer);
+                    drwav_uint64 calculatedLoopCount;
+
+                    /* The loop count must be validated against the size of the chunk. */
+                    calculatedLoopCount = (pChunkHeader->sizeInBytes - DRWAV_SMPL_BYTES) / DRWAV_SMPL_LOOP_BYTES;
+                    if (calculatedLoopCount == loopCount) {
+                        bytesJustRead = drwav__metadata_parser_read(pParser, buffer, sizeof(buffer), &bytesRead);
+                        if (bytesJustRead == sizeof(buffer)) {
+                            drwav_uint32 samplerSpecificDataSizeInBytes = drwav_bytes_to_u32(buffer);
+
+                            pParser->metadataCount += 1;
+                            drwav__metadata_request_extra_memory_for_stage_2(pParser, sizeof(drwav_smpl_loop) * loopCount, DRWAV_METADATA_ALIGNMENT);
+                            drwav__metadata_request_extra_memory_for_stage_2(pParser, samplerSpecificDataSizeInBytes, 1);
+                        }
+                    } else {
+                        /* Loop count in header does not match the size of the chunk. */
+                    }
+                }
+            } else {
+                bytesRead = drwav__read_smpl_to_metadata_obj(pParser, pChunkHeader, &pParser->pMetadata[pParser->metadataCursor]);
+                if (bytesRead == pChunkHeader->sizeInBytes) {
+                    pParser->metadataCursor += 1;
+                } else {
+                    /* Failed to parse. */
+                }
+            }
+        } else {
+            /* Incorrectly formed chunk. */
+        }
+    } else if (drwav__chunk_matches(allowedMetadataTypes, pChunkID, drwav_metadata_type_inst, "inst")) {
+        if (pChunkHeader->sizeInBytes == DRWAV_INST_BYTES) {
+            if (pParser->stage == drwav__metadata_parser_stage_count) {
+                pParser->metadataCount += 1;
+            } else {
+                bytesRead = drwav__read_inst_to_metadata_obj(pParser, &pParser->pMetadata[pParser->metadataCursor]);
+                if (bytesRead == pChunkHeader->sizeInBytes) {
+                    pParser->metadataCursor += 1;
+                } else {
+                    /* Failed to parse. */
+                }
+            }
+        } else {
+            /* Incorrectly formed chunk. */
+        }
+    } else if (drwav__chunk_matches(allowedMetadataTypes, pChunkID, drwav_metadata_type_acid, "acid")) {
+        if (pChunkHeader->sizeInBytes == DRWAV_ACID_BYTES) {
+            if (pParser->stage == drwav__metadata_parser_stage_count) {
+                pParser->metadataCount += 1;
+            } else {
+                bytesRead = drwav__read_acid_to_metadata_obj(pParser, &pParser->pMetadata[pParser->metadataCursor]);
+                if (bytesRead == pChunkHeader->sizeInBytes) {
+                    pParser->metadataCursor += 1;
+                } else {
+                    /* Failed to parse. */
+                }
+            }
+        } else {
+            /* Incorrectly formed chunk. */
+        }
+    } else if (drwav__chunk_matches(allowedMetadataTypes, pChunkID, drwav_metadata_type_cue, "cue ")) {
+        if (pChunkHeader->sizeInBytes >= DRWAV_CUE_BYTES) {
+            if (pParser->stage == drwav__metadata_parser_stage_count) {
+                size_t cueCount;
+
+                pParser->metadataCount += 1;
+                cueCount = (size_t)(pChunkHeader->sizeInBytes - DRWAV_CUE_BYTES) / DRWAV_CUE_POINT_BYTES;
+                drwav__metadata_request_extra_memory_for_stage_2(pParser, sizeof(drwav_cue_point) * cueCount, DRWAV_METADATA_ALIGNMENT);
+            } else {
+                bytesRead = drwav__read_cue_to_metadata_obj(pParser, pChunkHeader, &pParser->pMetadata[pParser->metadataCursor]);
+                if (bytesRead == pChunkHeader->sizeInBytes) {
+                    pParser->metadataCursor += 1;
+                } else {
+                    /* Failed to parse. */
+                }
+            }
+        } else {
+            /* Incorrectly formed chunk. */
+        }
+    } else if (drwav__chunk_matches(allowedMetadataTypes, pChunkID, drwav_metadata_type_bext, "bext")) {
+        if (pChunkHeader->sizeInBytes >= DRWAV_BEXT_BYTES) {
+            if (pParser->stage == drwav__metadata_parser_stage_count) {
+                /* The description field is the largest one in a bext chunk, so that is the max size of this temporary buffer. */
+                char buffer[DRWAV_BEXT_DESCRIPTION_BYTES + 1];
+                size_t allocSizeNeeded = DRWAV_BEXT_UMID_BYTES; /* We know we will need SMPTE umid size. */
+                size_t bytesJustRead;
+
+                buffer[DRWAV_BEXT_DESCRIPTION_BYTES] = '\0';
+                bytesJustRead = drwav__metadata_parser_read(pParser, buffer, DRWAV_BEXT_DESCRIPTION_BYTES, &bytesRead);
+                if (bytesJustRead != DRWAV_BEXT_DESCRIPTION_BYTES) {
+                    return bytesRead;
+                }
+                allocSizeNeeded += drwav__strlen(buffer) + 1;
+
+                buffer[DRWAV_BEXT_ORIGINATOR_NAME_BYTES] = '\0';
+                bytesJustRead = drwav__metadata_parser_read(pParser, buffer, DRWAV_BEXT_ORIGINATOR_NAME_BYTES, &bytesRead);
+                if (bytesJustRead != DRWAV_BEXT_ORIGINATOR_NAME_BYTES) {
+                    return bytesRead;
+                }
+                allocSizeNeeded += drwav__strlen(buffer) + 1;
+
+                buffer[DRWAV_BEXT_ORIGINATOR_REF_BYTES] = '\0';
+                bytesJustRead = drwav__metadata_parser_read(pParser, buffer, DRWAV_BEXT_ORIGINATOR_REF_BYTES, &bytesRead);
+                if (bytesJustRead != DRWAV_BEXT_ORIGINATOR_REF_BYTES) {
+                    return bytesRead;
+                }
+                allocSizeNeeded += drwav__strlen(buffer) + 1;
+                allocSizeNeeded += (size_t)pChunkHeader->sizeInBytes - DRWAV_BEXT_BYTES; /* Coding history. */
+
+                drwav__metadata_request_extra_memory_for_stage_2(pParser, allocSizeNeeded, 1);
+
+                pParser->metadataCount += 1;
+            } else {
+                bytesRead = drwav__read_bext_to_metadata_obj(pParser, &pParser->pMetadata[pParser->metadataCursor], pChunkHeader->sizeInBytes);
+                if (bytesRead == pChunkHeader->sizeInBytes) {
+                    pParser->metadataCursor += 1;
+                } else {
+                    /* Failed to parse. */
+                }
+            }
+        } else {
+            /* Incorrectly formed chunk. */
+        }
+    } else if (drwav_fourcc_equal(pChunkID, "LIST") || drwav_fourcc_equal(pChunkID, "list")) {
+        drwav_metadata_location listType = drwav_metadata_location_invalid;
+        while (bytesRead < pChunkHeader->sizeInBytes) {
+            drwav_uint8 subchunkId[4];
+            drwav_uint8 subchunkSizeBuffer[4];
+            drwav_uint64 subchunkDataSize;
+            drwav_uint64 subchunkBytesRead = 0;
+            drwav_uint64 bytesJustRead = drwav__metadata_parser_read(pParser, subchunkId, sizeof(subchunkId), &bytesRead);
+            if (bytesJustRead != sizeof(subchunkId)) {
+                break;
+            }
+
+            /*
+            The first thing in a list chunk should be "adtl" or "INFO".
+
+              - adtl means this list is a Associated Data List Chunk and will contain labels, notes
+                or labelled cue regions.
+              - INFO means this list is an Info List Chunk containing info text chunks such as IPRD
+                which would specifies the album of this wav file.
+
+            No data follows the adtl or INFO id so we just make note of what type this list is and
+            continue.
+            */
+            if (drwav_fourcc_equal(subchunkId, "adtl")) {
+                listType = drwav_metadata_location_inside_adtl_list;
+                continue;
+            } else if (drwav_fourcc_equal(subchunkId, "INFO")) {
+                listType = drwav_metadata_location_inside_info_list;
+                continue;
+            }
+
+            bytesJustRead = drwav__metadata_parser_read(pParser, subchunkSizeBuffer, sizeof(subchunkSizeBuffer), &bytesRead);
+            if (bytesJustRead != sizeof(subchunkSizeBuffer)) {
+                break;
+            }
+            subchunkDataSize = drwav_bytes_to_u32(subchunkSizeBuffer);
+
+            if (drwav__chunk_matches(allowedMetadataTypes, subchunkId, drwav_metadata_type_list_label, "labl") || drwav__chunk_matches(allowedMetadataTypes, subchunkId, drwav_metadata_type_list_note, "note")) {
+                if (subchunkDataSize >= DRWAV_LIST_LABEL_OR_NOTE_BYTES) {
+                    drwav_uint64 stringSizeWithNullTerm = subchunkDataSize - DRWAV_LIST_LABEL_OR_NOTE_BYTES;
+                    if (pParser->stage == drwav__metadata_parser_stage_count) {
+                        pParser->metadataCount += 1;
+                        drwav__metadata_request_extra_memory_for_stage_2(pParser, (size_t)stringSizeWithNullTerm, 1);
+                    } else {
+                        subchunkBytesRead = drwav__read_list_label_or_note_to_metadata_obj(pParser, &pParser->pMetadata[pParser->metadataCursor], subchunkDataSize, drwav_fourcc_equal(subchunkId, "labl") ? drwav_metadata_type_list_label : drwav_metadata_type_list_note);
+                        if (subchunkBytesRead == subchunkDataSize) {
+                            pParser->metadataCursor += 1;
+                        } else {
+                            /* Failed to parse. */
+                        }
+                    }
+                } else {
+                    /* Incorrectly formed chunk. */
+                }
+            } else if (drwav__chunk_matches(allowedMetadataTypes, subchunkId, drwav_metadata_type_list_labelled_cue_region, "ltxt")) {
+                if (subchunkDataSize >= DRWAV_LIST_LABELLED_TEXT_BYTES) {
+                    drwav_uint64 stringSizeWithNullTerminator = subchunkDataSize - DRWAV_LIST_LABELLED_TEXT_BYTES;
+                    if (pParser->stage == drwav__metadata_parser_stage_count) {
+                        pParser->metadataCount += 1;
+                        drwav__metadata_request_extra_memory_for_stage_2(pParser, (size_t)stringSizeWithNullTerminator, 1);
+                    } else {
+                        subchunkBytesRead = drwav__read_list_labelled_cue_region_to_metadata_obj(pParser, &pParser->pMetadata[pParser->metadataCursor], subchunkDataSize);
+                        if (subchunkBytesRead == subchunkDataSize) {
+                            pParser->metadataCursor += 1;
+                        } else {
+                            /* Failed to parse. */
+                        }
+                    }
+                } else {
+                    /* Incorrectly formed chunk. */
+                }
+            } else if (drwav__chunk_matches(allowedMetadataTypes, subchunkId, drwav_metadata_type_list_info_software, "ISFT")) {
+                subchunkBytesRead = drwav__metadata_process_info_text_chunk(pParser, subchunkDataSize,  drwav_metadata_type_list_info_software);
+            } else if (drwav__chunk_matches(allowedMetadataTypes, subchunkId, drwav_metadata_type_list_info_copyright, "ICOP")) {
+                subchunkBytesRead = drwav__metadata_process_info_text_chunk(pParser, subchunkDataSize,  drwav_metadata_type_list_info_copyright);
+            } else if (drwav__chunk_matches(allowedMetadataTypes, subchunkId, drwav_metadata_type_list_info_title, "INAM")) {
+                subchunkBytesRead = drwav__metadata_process_info_text_chunk(pParser, subchunkDataSize,  drwav_metadata_type_list_info_title);
+            } else if (drwav__chunk_matches(allowedMetadataTypes, subchunkId, drwav_metadata_type_list_info_artist, "IART")) {
+                subchunkBytesRead = drwav__metadata_process_info_text_chunk(pParser, subchunkDataSize,  drwav_metadata_type_list_info_artist);
+            } else if (drwav__chunk_matches(allowedMetadataTypes, subchunkId, drwav_metadata_type_list_info_comment, "ICMT")) {
+                subchunkBytesRead = drwav__metadata_process_info_text_chunk(pParser, subchunkDataSize,  drwav_metadata_type_list_info_comment);
+            } else if (drwav__chunk_matches(allowedMetadataTypes, subchunkId, drwav_metadata_type_list_info_date, "ICRD")) {
+                subchunkBytesRead = drwav__metadata_process_info_text_chunk(pParser, subchunkDataSize,  drwav_metadata_type_list_info_date);
+            } else if (drwav__chunk_matches(allowedMetadataTypes, subchunkId, drwav_metadata_type_list_info_genre, "IGNR")) {
+                subchunkBytesRead = drwav__metadata_process_info_text_chunk(pParser, subchunkDataSize,  drwav_metadata_type_list_info_genre);
+            } else if (drwav__chunk_matches(allowedMetadataTypes, subchunkId, drwav_metadata_type_list_info_album, "IPRD")) {
+                subchunkBytesRead = drwav__metadata_process_info_text_chunk(pParser, subchunkDataSize,  drwav_metadata_type_list_info_album);
+            } else if (drwav__chunk_matches(allowedMetadataTypes, subchunkId, drwav_metadata_type_list_info_tracknumber, "ITRK")) {
+                subchunkBytesRead = drwav__metadata_process_info_text_chunk(pParser, subchunkDataSize,  drwav_metadata_type_list_info_tracknumber);
+            } else if ((allowedMetadataTypes & drwav_metadata_type_unknown) != 0) {
+                subchunkBytesRead = drwav__metadata_process_unknown_chunk(pParser, subchunkId, subchunkDataSize, listType);
+            }
+
+            bytesRead += subchunkBytesRead;
+            DRWAV_ASSERT(subchunkBytesRead <= subchunkDataSize);
+
+            if (subchunkBytesRead < subchunkDataSize) {
+                drwav_uint64 bytesToSeek = subchunkDataSize - subchunkBytesRead;
+
+                if (!pParser->onSeek(pParser->pReadSeekUserData, (int)bytesToSeek, drwav_seek_origin_current)) {
+                    break;
+                }
+                bytesRead += bytesToSeek;
+            }
+
+            if ((subchunkDataSize % 2) == 1) {
+                if (!pParser->onSeek(pParser->pReadSeekUserData, 1, drwav_seek_origin_current)) {
+                    break;
+                }
+                bytesRead += 1;
+            }
+        }
+    } else if ((allowedMetadataTypes & drwav_metadata_type_unknown) != 0) {
+        bytesRead = drwav__metadata_process_unknown_chunk(pParser, pChunkID, pChunkHeader->sizeInBytes, drwav_metadata_location_top_level);
+    }
+
+    return bytesRead;
+}
+
+
+DRWAV_PRIVATE drwav_uint32 drwav_get_bytes_per_pcm_frame(drwav* pWav)
+{
+    drwav_uint32 bytesPerFrame;
+
+    /*
+    The bytes per frame is a bit ambiguous. It can be either be based on the bits per sample, or the block align. The way I'm doing it here
+    is that if the bits per sample is a multiple of 8, use floor(bitsPerSample*channels/8), otherwise fall back to the block align.
+    */
+    if ((pWav->bitsPerSample & 0x7) == 0) {
+        /* Bits per sample is a multiple of 8. */
+        bytesPerFrame = (pWav->bitsPerSample * pWav->fmt.channels) >> 3;
+    } else {
+        bytesPerFrame = pWav->fmt.blockAlign;
+    }
+
+    /* Validation for known formats. a-law and mu-law should be 1 byte per channel. If it's not, it's not decodable. */
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ALAW || pWav->translatedFormatTag == DR_WAVE_FORMAT_MULAW) {
+        if (bytesPerFrame != pWav->fmt.channels) {
+            return 0;   /* Invalid file. */
+        }
+    }
+
+    return bytesPerFrame;
+}
+
+DRWAV_API drwav_uint16 drwav_fmt_get_format(const drwav_fmt* pFMT)
+{
+    if (pFMT == NULL) {
+        return 0;
+    }
+
+    if (pFMT->formatTag != DR_WAVE_FORMAT_EXTENSIBLE) {
+        return pFMT->formatTag;
+    } else {
+        return drwav_bytes_to_u16(pFMT->subFormat);    /* Only the first two bytes are required. */
+    }
+}
+
+DRWAV_PRIVATE drwav_bool32 drwav_preinit(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pReadSeekUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pWav == NULL || onRead == NULL || onSeek == NULL) {
+        return DRWAV_FALSE;
+    }
+
+    DRWAV_ZERO_MEMORY(pWav, sizeof(*pWav));
+    pWav->onRead    = onRead;
+    pWav->onSeek    = onSeek;
+    pWav->pUserData = pReadSeekUserData;
+    pWav->allocationCallbacks = drwav_copy_allocation_callbacks_or_defaults(pAllocationCallbacks);
+
+    if (pWav->allocationCallbacks.onFree == NULL || (pWav->allocationCallbacks.onMalloc == NULL && pWav->allocationCallbacks.onRealloc == NULL)) {
+        return DRWAV_FALSE;    /* Invalid allocation callbacks. */
     }
 
+    return DRWAV_TRUE;
+}
 
-    /* Translate the internal format. */
-    translatedFormatTag = fmt.formatTag;
-    if (translatedFormatTag == DR_WAVE_FORMAT_EXTENSIBLE) {
-        translatedFormatTag = drwav__bytes_to_u16(fmt.subFormat + 0);
+DRWAV_PRIVATE drwav_bool32 drwav_init__internal(drwav* pWav, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags)
+{
+    /* This function assumes drwav_preinit() has been called beforehand. */
+    drwav_result result;
+    drwav_uint64 cursor;    /* <-- Keeps track of the byte position so we can seek to specific locations. */
+    drwav_bool32 sequential;
+    drwav_uint8 riff[4];
+    drwav_fmt fmt;
+    unsigned short translatedFormatTag;
+    drwav_uint64 dataChunkSize = 0;             /* <-- Important! Don't explicitly set this to 0 anywhere else. Calculation of the size of the data chunk is performed in different paths depending on the container. */
+    drwav_uint64 sampleCountFromFactChunk = 0;  /* Same as dataChunkSize - make sure this is the only place this is initialized to 0. */
+    drwav_uint64 metadataStartPos;
+    drwav__metadata_parser metadataParser;
+    drwav_bool8 isProcessingMetadata = DRWAV_FALSE;
+    drwav_bool8 foundChunk_fmt  = DRWAV_FALSE;
+    drwav_bool8 foundChunk_data = DRWAV_FALSE;
+    drwav_bool8 isAIFCFormType = DRWAV_FALSE;   /* Only used with AIFF. */
+    drwav_uint64 aiffFrameCount = 0;
+
+    cursor = 0;
+    sequential = (flags & DRWAV_SEQUENTIAL) != 0;
+    DRWAV_ZERO_OBJECT(&fmt);
+
+    /* The first 4 bytes should be the RIFF identifier. */
+    if (drwav__on_read(pWav->onRead, pWav->pUserData, riff, sizeof(riff), &cursor) != sizeof(riff)) {
+        return DRWAV_FALSE;
+    }
+
+    /*
+    The first 4 bytes can be used to identify the container. For RIFF files it will start with "RIFF" and for
+    w64 it will start with "riff".
+    */
+    if (drwav_fourcc_equal(riff, "RIFF")) {
+        pWav->container = drwav_container_riff;
+    } else if (drwav_fourcc_equal(riff, "RIFX")) {
+        pWav->container = drwav_container_rifx;
+    } else if (drwav_fourcc_equal(riff, "riff")) {
+        int i;
+        drwav_uint8 riff2[12];
+
+        pWav->container = drwav_container_w64;
+
+        /* Check the rest of the GUID for validity. */
+        if (drwav__on_read(pWav->onRead, pWav->pUserData, riff2, sizeof(riff2), &cursor) != sizeof(riff2)) {
+            return DRWAV_FALSE;
+        }
+
+        for (i = 0; i < 12; ++i) {
+            if (riff2[i] != drwavGUID_W64_RIFF[i+4]) {
+                return DRWAV_FALSE;
+            }
+        }
+    } else if (drwav_fourcc_equal(riff, "RF64")) {
+        pWav->container = drwav_container_rf64;
+    } else if (drwav_fourcc_equal(riff, "FORM")) {
+        pWav->container = drwav_container_aiff;
+    } else {
+        return DRWAV_FALSE;   /* Unknown or unsupported container. */
+    }
+
+
+    if (pWav->container == drwav_container_riff || pWav->container == drwav_container_rifx || pWav->container == drwav_container_rf64) {
+        drwav_uint8 chunkSizeBytes[4];
+        drwav_uint8 wave[4];
+
+        if (drwav__on_read(pWav->onRead, pWav->pUserData, chunkSizeBytes, sizeof(chunkSizeBytes), &cursor) != sizeof(chunkSizeBytes)) {
+            return DRWAV_FALSE;
+        }
+
+        if (pWav->container == drwav_container_riff || pWav->container == drwav_container_rifx) {
+            if (drwav_bytes_to_u32_ex(chunkSizeBytes, pWav->container) < 36) {
+                /*
+                I've had a report of a WAV file failing to load when the size of the WAVE chunk is not encoded
+                and is instead just set to 0. I'm going to relax the validation here to allow these files to
+                load. Considering the chunk size isn't actually used this should be safe. With this change my
+                test suite still passes.
+                */
+                /*return DRWAV_FALSE;*/    /* Chunk size should always be at least 36 bytes. */
+            }
+        } else if (pWav->container == drwav_container_rf64) {
+            if (drwav_bytes_to_u32_le(chunkSizeBytes) != 0xFFFFFFFF) {
+                return DRWAV_FALSE;    /* Chunk size should always be set to -1/0xFFFFFFFF for RF64. The actual size is retrieved later. */
+            }
+        } else {
+            return DRWAV_FALSE; /* Should never hit this. */
+        }
+
+        if (drwav__on_read(pWav->onRead, pWav->pUserData, wave, sizeof(wave), &cursor) != sizeof(wave)) {
+            return DRWAV_FALSE;
+        }
+
+        if (!drwav_fourcc_equal(wave, "WAVE")) {
+            return DRWAV_FALSE;    /* Expecting "WAVE". */
+        }
+    } else if (pWav->container == drwav_container_w64) {
+        drwav_uint8 chunkSizeBytes[8];
+        drwav_uint8 wave[16];
+
+        if (drwav__on_read(pWav->onRead, pWav->pUserData, chunkSizeBytes, sizeof(chunkSizeBytes), &cursor) != sizeof(chunkSizeBytes)) {
+            return DRWAV_FALSE;
+        }
+
+        if (drwav_bytes_to_u64(chunkSizeBytes) < 80) {
+            return DRWAV_FALSE;
+        }
+
+        if (drwav__on_read(pWav->onRead, pWav->pUserData, wave, sizeof(wave), &cursor) != sizeof(wave)) {
+            return DRWAV_FALSE;
+        }
+
+        if (!drwav_guid_equal(wave, drwavGUID_W64_WAVE)) {
+            return DRWAV_FALSE;
+        }
+    } else if (pWav->container == drwav_container_aiff) {
+        drwav_uint8 chunkSizeBytes[4];
+        drwav_uint8 aiff[4];
+
+        if (drwav__on_read(pWav->onRead, pWav->pUserData, chunkSizeBytes, sizeof(chunkSizeBytes), &cursor) != sizeof(chunkSizeBytes)) {
+            return DRWAV_FALSE;
+        }
+
+        if (drwav_bytes_to_u32_be(chunkSizeBytes) < 18) {
+            return DRWAV_FALSE;
+        }
+
+        if (drwav__on_read(pWav->onRead, pWav->pUserData, aiff, sizeof(aiff), &cursor) != sizeof(aiff)) {
+            return DRWAV_FALSE;
+        }
+
+        if (drwav_fourcc_equal(aiff, "AIFF")) {
+            isAIFCFormType = DRWAV_FALSE;
+        } else if (drwav_fourcc_equal(aiff, "AIFC")) {
+            isAIFCFormType = DRWAV_TRUE;
+        } else {
+            return DRWAV_FALSE; /* Expecting "AIFF" or "AIFC". */
+        }
+    } else {
+        return DRWAV_FALSE;
     }
 
 
+    /* For RF64, the "ds64" chunk must come next, before the "fmt " chunk. */
+    if (pWav->container == drwav_container_rf64) {
+        drwav_uint8 sizeBytes[8];
+        drwav_uint64 bytesRemainingInChunk;
+        drwav_chunk_header header;
+        result = drwav__read_chunk_header(pWav->onRead, pWav->pUserData, pWav->container, &cursor, &header);
+        if (result != DRWAV_SUCCESS) {
+            return DRWAV_FALSE;
+        }
+
+        if (!drwav_fourcc_equal(header.id.fourcc, "ds64")) {
+            return DRWAV_FALSE; /* Expecting "ds64". */
+        }
+
+        bytesRemainingInChunk = header.sizeInBytes + header.paddingSize;
+
+        /* We don't care about the size of the RIFF chunk - skip it. */
+        if (!drwav__seek_forward(pWav->onSeek, 8, pWav->pUserData)) {
+            return DRWAV_FALSE;
+        }
+        bytesRemainingInChunk -= 8;
+        cursor += 8;
+
+
+        /* Next 8 bytes is the size of the "data" chunk. */
+        if (drwav__on_read(pWav->onRead, pWav->pUserData, sizeBytes, sizeof(sizeBytes), &cursor) != sizeof(sizeBytes)) {
+            return DRWAV_FALSE;
+        }
+        bytesRemainingInChunk -= 8;
+        dataChunkSize = drwav_bytes_to_u64(sizeBytes);
+
+
+        /* Next 8 bytes is the same count which we would usually derived from the FACT chunk if it was available. */
+        if (drwav__on_read(pWav->onRead, pWav->pUserData, sizeBytes, sizeof(sizeBytes), &cursor) != sizeof(sizeBytes)) {
+            return DRWAV_FALSE;
+        }
+        bytesRemainingInChunk -= 8;
+        sampleCountFromFactChunk = drwav_bytes_to_u64(sizeBytes);
+
+
+        /* Skip over everything else. */
+        if (!drwav__seek_forward(pWav->onSeek, bytesRemainingInChunk, pWav->pUserData)) {
+            return DRWAV_FALSE;
+        }
+        cursor += bytesRemainingInChunk;
+    }
+
 
-    sampleCountFromFactChunk = 0;
+    metadataStartPos = cursor;
 
     /*
-    We need to enumerate over each chunk for two reasons:
-      1) The "data" chunk may not be the next one
-      2) We may want to report each chunk back to the client
-    
-    In order to correctly report each chunk back to the client we will need to keep looping until the end of the file.
+    Whether or not we are processing metadata controls how we load. We can load more efficiently when
+    metadata is not being processed, but we also cannot process metadata for Wave64 because I have not
+    been able to test it. If someone is able to test this and provide a patch I'm happy to enable it.
+
+    Seqential mode cannot support metadata because it involves seeking backwards.
     */
-    foundDataChunk = DRWAV_FALSE;
-    dataChunkSize = 0;
+    isProcessingMetadata = !sequential && ((flags & DRWAV_WITH_METADATA) != 0);
 
-    /* The next chunk we care about is the "data" chunk. This is not necessarily the next chunk so we'll need to loop. */
-    for (;;)
-    {
+    /* Don't allow processing of metadata with untested containers. */
+    if (pWav->container != drwav_container_riff && pWav->container != drwav_container_rf64) {
+        isProcessingMetadata = DRWAV_FALSE;
+    }
+
+    DRWAV_ZERO_MEMORY(&metadataParser, sizeof(metadataParser));
+    if (isProcessingMetadata) {
+        metadataParser.onRead = pWav->onRead;
+        metadataParser.onSeek = pWav->onSeek;
+        metadataParser.pReadSeekUserData = pWav->pUserData;
+        metadataParser.stage  = drwav__metadata_parser_stage_count;
+    }
+
+
+    /*
+    From here on out, chunks might be in any order. In order to robustly handle metadata we'll need
+    to loop through every chunk and handle them as we find them. In sequential mode we need to get
+    out of the loop as soon as we find the data chunk because we won't be able to seek back.
+    */
+    for (;;) {  /* For each chunk... */
         drwav_chunk_header header;
-        drwav_result result = drwav__read_chunk_header(pWav->onRead, pWav->pUserData, pWav->container, &cursor, &header);
+        drwav_uint64 chunkSize;
+
+        result = drwav__read_chunk_header(pWav->onRead, pWav->pUserData, pWav->container, &cursor, &header);
         if (result != DRWAV_SUCCESS) {
-            if (!foundDataChunk) {
-                return DRWAV_FALSE;
-            } else {
-                break;  /* Probably at the end of the file. Get out of the loop. */
-            }
+            break;
         }
 
-        /* Tell the client about this chunk. */
+        chunkSize = header.sizeInBytes;
+
+
+        /*
+        Always tell the caller about this chunk. We cannot do this in sequential mode because the
+        callback is allowed to read from the file, in which case we'll need to rewind.
+        */
         if (!sequential && onChunk != NULL) {
             drwav_uint64 callbackBytesRead = onChunk(pChunkUserData, pWav->onRead, pWav->onSeek, pWav->pUserData, &header, pWav->container, &fmt);
 
@@ -1944,142 +3247,355 @@ static drwav_bool32 drwav_init__internal(drwav* pWav, drwav_chunk_proc onChunk,
             we called the callback.
             */
             if (callbackBytesRead > 0) {
-                if (!drwav__seek_from_start(pWav->onSeek, cursor, pWav->pUserData)) {
+                if (drwav__seek_from_start(pWav->onSeek, cursor, pWav->pUserData) == DRWAV_FALSE) {
                     return DRWAV_FALSE;
                 }
             }
         }
-        
 
-        if (!foundDataChunk) {
-            pWav->dataChunkDataPos = cursor;
-        }
 
-        chunkSize = header.sizeInBytes;
-        if (pWav->container == drwav_container_riff) {
-            if (drwav__fourcc_equal(header.id.fourcc, "data")) {
-                foundDataChunk = DRWAV_TRUE;
-                dataChunkSize = chunkSize;
+        /* Explicitly handle known chunks first. */
+
+        /* "fmt " */
+        if (((pWav->container == drwav_container_riff || pWav->container == drwav_container_rifx || pWav->container == drwav_container_rf64) && drwav_fourcc_equal(header.id.fourcc, "fmt ")) ||
+            ((pWav->container == drwav_container_w64) && drwav_guid_equal(header.id.guid, drwavGUID_W64_FMT))) {
+            drwav_uint8 fmtData[16];
+
+            foundChunk_fmt = DRWAV_TRUE;
+
+            if (pWav->onRead(pWav->pUserData, fmtData, sizeof(fmtData)) != sizeof(fmtData)) {
+                return DRWAV_FALSE;
             }
-        } else {
-            if (drwav__guid_equal(header.id.guid, drwavGUID_W64_DATA)) {
-                foundDataChunk = DRWAV_TRUE;
-                dataChunkSize = chunkSize;
+            cursor += sizeof(fmtData);
+
+            fmt.formatTag      = drwav_bytes_to_u16_ex(fmtData + 0,  pWav->container);
+            fmt.channels       = drwav_bytes_to_u16_ex(fmtData + 2,  pWav->container);
+            fmt.sampleRate     = drwav_bytes_to_u32_ex(fmtData + 4,  pWav->container);
+            fmt.avgBytesPerSec = drwav_bytes_to_u32_ex(fmtData + 8,  pWav->container);
+            fmt.blockAlign     = drwav_bytes_to_u16_ex(fmtData + 12, pWav->container);
+            fmt.bitsPerSample  = drwav_bytes_to_u16_ex(fmtData + 14, pWav->container);
+
+            fmt.extendedSize       = 0;
+            fmt.validBitsPerSample = 0;
+            fmt.channelMask        = 0;
+            DRWAV_ZERO_MEMORY(fmt.subFormat, sizeof(fmt.subFormat));
+
+            if (header.sizeInBytes > 16) {
+                drwav_uint8 fmt_cbSize[2];
+                int bytesReadSoFar = 0;
+
+                if (pWav->onRead(pWav->pUserData, fmt_cbSize, sizeof(fmt_cbSize)) != sizeof(fmt_cbSize)) {
+                    return DRWAV_FALSE;    /* Expecting more data. */
+                }
+                cursor += sizeof(fmt_cbSize);
+
+                bytesReadSoFar = 18;
+
+                fmt.extendedSize = drwav_bytes_to_u16_ex(fmt_cbSize, pWav->container);
+                if (fmt.extendedSize > 0) {
+                    /* Simple validation. */
+                    if (fmt.formatTag == DR_WAVE_FORMAT_EXTENSIBLE) {
+                        if (fmt.extendedSize != 22) {
+                            return DRWAV_FALSE;
+                        }
+                    }
+
+                    if (fmt.formatTag == DR_WAVE_FORMAT_EXTENSIBLE) {
+                        drwav_uint8 fmtext[22];
+
+                        if (pWav->onRead(pWav->pUserData, fmtext, fmt.extendedSize) != fmt.extendedSize) {
+                            return DRWAV_FALSE;    /* Expecting more data. */
+                        }
+
+                        fmt.validBitsPerSample = drwav_bytes_to_u16_ex(fmtext + 0, pWav->container);
+                        fmt.channelMask        = drwav_bytes_to_u32_ex(fmtext + 2, pWav->container);
+                        drwav_bytes_to_guid(fmtext + 6, fmt.subFormat);
+                    } else {
+                        if (pWav->onSeek(pWav->pUserData, fmt.extendedSize, drwav_seek_origin_current) == DRWAV_FALSE) {
+                            return DRWAV_FALSE;
+                        }
+                    }
+                    cursor += fmt.extendedSize;
+
+                    bytesReadSoFar += fmt.extendedSize;
+                }
+
+                /* Seek past any leftover bytes. For w64 the leftover will be defined based on the chunk size. */
+                if (pWav->onSeek(pWav->pUserData, (int)(header.sizeInBytes - bytesReadSoFar), drwav_seek_origin_current) == DRWAV_FALSE) {
+                    return DRWAV_FALSE;
+                }
+                cursor += (header.sizeInBytes - bytesReadSoFar);
+            }
+
+            if (header.paddingSize > 0) {
+                if (drwav__seek_forward(pWav->onSeek, header.paddingSize, pWav->pUserData) == DRWAV_FALSE) {
+                    break;
+                }
+                cursor += header.paddingSize;
             }
+
+            /* Go to the next chunk. Don't include this chunk in metadata. */
+            continue;
         }
 
-        /*
-        If at this point we have found the data chunk and we're running in sequential mode, we need to break out of this loop. The reason for
-        this is that we would otherwise require a backwards seek which sequential mode forbids.
-        */
-        if (foundDataChunk && sequential) {
-            break;
+        /* "data" */
+        if (((pWav->container == drwav_container_riff || pWav->container == drwav_container_rifx || pWav->container == drwav_container_rf64) && drwav_fourcc_equal(header.id.fourcc, "data")) ||
+            ((pWav->container == drwav_container_w64) && drwav_guid_equal(header.id.guid, drwavGUID_W64_DATA))) {
+            foundChunk_data = DRWAV_TRUE;
+
+            pWav->dataChunkDataPos  = cursor;
+
+            if (pWav->container != drwav_container_rf64) {  /* The data chunk size for RF64 will always be set to 0xFFFFFFFF here. It was set to it's true value earlier. */
+                dataChunkSize = chunkSize;
+            }
+
+            /* If we're running in sequential mode, or we're not reading metadata, we have enough now that we can get out of the loop. */
+            if (sequential || !isProcessingMetadata) {
+                break;      /* No need to keep reading beyond the data chunk. */
+            } else {
+                chunkSize += header.paddingSize;    /* <-- Make sure we seek past the padding. */
+                if (drwav__seek_forward(pWav->onSeek, chunkSize, pWav->pUserData) == DRWAV_FALSE) {
+                    break;
+                }
+                cursor += chunkSize;
+
+                continue;   /* There may be some more metadata to read. */
+            }
         }
 
-        /* Optional. Get the total sample count from the FACT chunk. This is useful for compressed formats. */
-        if (pWav->container == drwav_container_riff) {
-            if (drwav__fourcc_equal(header.id.fourcc, "fact")) {
-                drwav_uint32 sampleCount;
+        /* "fact". This is optional. Can use this to get the sample count which is useful for compressed formats. For RF64 we retrieved the sample count from the ds64 chunk earlier. */
+        if (((pWav->container == drwav_container_riff || pWav->container == drwav_container_rifx || pWav->container == drwav_container_rf64) && drwav_fourcc_equal(header.id.fourcc, "fact")) ||
+            ((pWav->container == drwav_container_w64) && drwav_guid_equal(header.id.guid, drwavGUID_W64_FACT))) {
+            if (pWav->container == drwav_container_riff || pWav->container == drwav_container_rifx) {
+                drwav_uint8 sampleCount[4];
                 if (drwav__on_read(pWav->onRead, pWav->pUserData, &sampleCount, 4, &cursor) != 4) {
                     return DRWAV_FALSE;
                 }
-                chunkSize -= 4;
 
-                if (!foundDataChunk) {
-                    pWav->dataChunkDataPos = cursor;
-                }
+                chunkSize -= 4;
 
                 /*
                 The sample count in the "fact" chunk is either unreliable, or I'm not understanding it properly. For now I am only enabling this
                 for Microsoft ADPCM formats.
                 */
                 if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
-                    sampleCountFromFactChunk = sampleCount;
+                    sampleCountFromFactChunk = drwav_bytes_to_u32_ex(sampleCount, pWav->container);
                 } else {
                     sampleCountFromFactChunk = 0;
                 }
-            }
-        } else {
-            if (drwav__guid_equal(header.id.guid, drwavGUID_W64_FACT)) {
+            } else if (pWav->container == drwav_container_w64) {
                 if (drwav__on_read(pWav->onRead, pWav->pUserData, &sampleCountFromFactChunk, 8, &cursor) != 8) {
                     return DRWAV_FALSE;
                 }
+
                 chunkSize -= 8;
+            } else if (pWav->container == drwav_container_rf64) {
+                /* We retrieved the sample count from the ds64 chunk earlier so no need to do that here. */
+            }
+
+            /* Seek to the next chunk in preparation for the next iteration. */
+            chunkSize += header.paddingSize;    /* <-- Make sure we seek past the padding. */
+            if (drwav__seek_forward(pWav->onSeek, chunkSize, pWav->pUserData) == DRWAV_FALSE) {
+                break;
+            }
+            cursor += chunkSize;
+
+            continue;
+        }
+
+
+        /* "COMM". AIFF/AIFC only. */
+        if (pWav->container == drwav_container_aiff && drwav_fourcc_equal(header.id.fourcc, "COMM")) {
+            drwav_uint8 commData[24];
+            drwav_uint32 commDataBytesToRead;
+            drwav_uint16 channels;
+            drwav_uint32 frameCount;
+            drwav_uint16 sampleSizeInBits;
+            drwav_int64  sampleRate;
+            drwav_uint16 compressionFormat;
+
+            foundChunk_fmt = DRWAV_TRUE;
+
+            if (isAIFCFormType) {
+                commDataBytesToRead = 24;
+                if (header.sizeInBytes < commDataBytesToRead) {
+                    return DRWAV_FALSE; /* Invalid COMM chunk. */
+                }
+            } else {
+                commDataBytesToRead = 18;
+                if (header.sizeInBytes != commDataBytesToRead) {
+                    return DRWAV_FALSE; /* INVALID COMM chunk. */
+                }
+            }
+
+            if (drwav__on_read(pWav->onRead, pWav->pUserData, commData, commDataBytesToRead, &cursor) != commDataBytesToRead) {
+                return DRWAV_FALSE;
+            }
+
+
+            channels         = drwav_bytes_to_u16_ex     (commData + 0, pWav->container);
+            frameCount       = drwav_bytes_to_u32_ex     (commData + 2, pWav->container);
+            sampleSizeInBits = drwav_bytes_to_u16_ex     (commData + 6, pWav->container);
+            sampleRate       = drwav_aiff_extented_to_s64(commData + 8);
+
+            if (sampleRate < 0 || sampleRate > 0xFFFFFFFF) {
+                return DRWAV_FALSE; /* Invalid sample rate. */
+            }
+
+            if (isAIFCFormType) {
+                const drwav_uint8* type = commData + 18;
+
+                if (drwav_fourcc_equal(type, "NONE")) {
+                    compressionFormat = DR_WAVE_FORMAT_PCM; /* PCM, big-endian. */
+                } else if (drwav_fourcc_equal(type, "raw ")) {
+                    compressionFormat = DR_WAVE_FORMAT_PCM;
+
+                    /* In my testing, it looks like when the "raw " compression type is used, 8-bit samples should be considered unsigned. */
+                    if (sampleSizeInBits == 8) {
+                        pWav->aiff.isUnsigned = DRWAV_TRUE;
+                    }
+                } else if (drwav_fourcc_equal(type, "sowt")) {
+                    compressionFormat = DR_WAVE_FORMAT_PCM; /* PCM, little-endian. */
+                    pWav->aiff.isLE = DRWAV_TRUE;
+                } else if (drwav_fourcc_equal(type, "fl32") || drwav_fourcc_equal(type, "fl64") || drwav_fourcc_equal(type, "FL32") || drwav_fourcc_equal(type, "FL64")) {
+                    compressionFormat = DR_WAVE_FORMAT_IEEE_FLOAT;
+                } else if (drwav_fourcc_equal(type, "alaw") || drwav_fourcc_equal(type, "ALAW")) {
+                    compressionFormat = DR_WAVE_FORMAT_ALAW;
+                } else if (drwav_fourcc_equal(type, "ulaw") || drwav_fourcc_equal(type, "ULAW")) {
+                    compressionFormat = DR_WAVE_FORMAT_MULAW;
+                } else if (drwav_fourcc_equal(type, "ima4")) {
+                    compressionFormat = DR_WAVE_FORMAT_DVI_ADPCM;
+                    sampleSizeInBits = 4;
+
+                    /*
+                    I haven't been able to figure out how to get correct decoding for IMA ADPCM. Until this is figured out
+                    we'll need to abort when we encounter such an encoding. Advice welcome!
+                    */
+                    return DRWAV_FALSE;
+                } else {
+                    return DRWAV_FALSE; /* Unknown or unsupported compression format. Need to abort. */
+                }
+            } else {
+                compressionFormat = DR_WAVE_FORMAT_PCM; /* It's a standard AIFF form which is always compressed. */
+            }
+
+            /* With AIFF we want to use the explicitly defined frame count rather than deriving it from the size of the chunk. */
+            aiffFrameCount = frameCount;
+
+            /* We should now have enough information to fill out our fmt structure. */
+            fmt.formatTag      = compressionFormat;
+            fmt.channels       = channels;
+            fmt.sampleRate     = (drwav_uint32)sampleRate;
+            fmt.bitsPerSample  = sampleSizeInBits;
+            fmt.blockAlign     = (drwav_uint16)(fmt.channels * fmt.bitsPerSample / 8);
+            fmt.avgBytesPerSec = fmt.blockAlign * fmt.sampleRate;
+
+            if (fmt.blockAlign == 0 && compressionFormat == DR_WAVE_FORMAT_DVI_ADPCM) {
+                fmt.blockAlign = 34 * fmt.channels;
+            }
+
+            /*
+            Weird one. I've seen some alaw and ulaw encoded files that for some reason set the bits per sample to 16 when
+            it should be 8. To get this working I need to explicitly check for this and change it.
+            */
+            if (compressionFormat == DR_WAVE_FORMAT_ALAW || compressionFormat == DR_WAVE_FORMAT_MULAW) {
+                if (fmt.bitsPerSample > 8) {
+                    fmt.bitsPerSample = 8;
+                    fmt.blockAlign = fmt.channels;
+                }
+            }
+
+            /* In AIFF, samples are padded to 8 byte boundaries. We need to round up our bits per sample here. */
+            fmt.bitsPerSample += (fmt.bitsPerSample & 7);
+
+
+            /* If the form type is AIFC there will be some additional data in the chunk. We need to seek past it. */
+            if (isAIFCFormType) {
+                if (drwav__seek_forward(pWav->onSeek, (chunkSize - commDataBytesToRead), pWav->pUserData) == DRWAV_FALSE) {
+                    return DRWAV_FALSE;
+                }
+                cursor += (chunkSize - commDataBytesToRead);
+            }
+
+            /* Don't fall through or else we'll end up treating this chunk as metadata which is incorrect. */
+            continue;
+        }
+
+
+        /* "SSND". AIFF/AIFC only. This is the AIFF equivalent of the "data" chunk. */
+        if (pWav->container == drwav_container_aiff && drwav_fourcc_equal(header.id.fourcc, "SSND")) {
+            drwav_uint8 offsetAndBlockSizeData[8];
+            drwav_uint32 offset;
+
+            foundChunk_data = DRWAV_TRUE;
+
+            if (drwav__on_read(pWav->onRead, pWav->pUserData, offsetAndBlockSizeData, sizeof(offsetAndBlockSizeData), &cursor) != sizeof(offsetAndBlockSizeData)) {
+                return DRWAV_FALSE;
+            }
+
+            /* We need to seek forward by the offset. */
+            offset = drwav_bytes_to_u32_ex(offsetAndBlockSizeData + 0, pWav->container);
+            if (drwav__seek_forward(pWav->onSeek, offset, pWav->pUserData) == DRWAV_FALSE) {
+                return DRWAV_FALSE;
+            }
+            cursor += offset;
+
+            pWav->dataChunkDataPos = cursor;
+            dataChunkSize = chunkSize;
 
-                if (!foundDataChunk) {
-                    pWav->dataChunkDataPos = cursor;
+            /* If we're running in sequential mode, or we're not reading metadata, we have enough now that we can get out of the loop. */
+            if (sequential || !isProcessingMetadata) {
+                break;      /* No need to keep reading beyond the data chunk. */
+            } else {
+                if (drwav__seek_forward(pWav->onSeek, chunkSize, pWav->pUserData) == DRWAV_FALSE) {
+                    break;
                 }
+                cursor += chunkSize;
+
+                continue;   /* There may be some more metadata to read. */
             }
         }
 
-        /* "smpl" chunk. */
-        if (pWav->container == drwav_container_riff) {
-            if (drwav__fourcc_equal(header.id.fourcc, "smpl")) {
-                drwav_uint8 smplHeaderData[36];    /* 36 = size of the smpl header section, not including the loop data. */
-                if (chunkSize >= sizeof(smplHeaderData)) {
-                    drwav_uint64 bytesJustRead = drwav__on_read(pWav->onRead, pWav->pUserData, smplHeaderData, sizeof(smplHeaderData), &cursor);
-                    chunkSize -= bytesJustRead;
-
-                    if (bytesJustRead == sizeof(smplHeaderData)) {
-                        drwav_uint32 iLoop;
-
-                        pWav->smpl.manufacturer      = drwav__bytes_to_u32(smplHeaderData+0);
-                        pWav->smpl.product           = drwav__bytes_to_u32(smplHeaderData+4);
-                        pWav->smpl.samplePeriod      = drwav__bytes_to_u32(smplHeaderData+8);
-                        pWav->smpl.midiUnityNotes    = drwav__bytes_to_u32(smplHeaderData+12);
-                        pWav->smpl.midiPitchFraction = drwav__bytes_to_u32(smplHeaderData+16);
-                        pWav->smpl.smpteFormat       = drwav__bytes_to_u32(smplHeaderData+20);
-                        pWav->smpl.smpteOffset       = drwav__bytes_to_u32(smplHeaderData+24);
-                        pWav->smpl.numSampleLoops    = drwav__bytes_to_u32(smplHeaderData+28);
-                        pWav->smpl.samplerData       = drwav__bytes_to_u32(smplHeaderData+32);
-
-                        for (iLoop = 0; iLoop < pWav->smpl.numSampleLoops && iLoop < drwav_countof(pWav->smpl.loops); ++iLoop) {
-                            drwav_uint8 smplLoopData[24];  /* 24 = size of a loop section in the smpl chunk. */
-                            bytesJustRead = drwav__on_read(pWav->onRead, pWav->pUserData, smplLoopData, sizeof(smplLoopData), &cursor);
-                            chunkSize -= bytesJustRead;
-
-                            if (bytesJustRead == sizeof(smplLoopData)) {
-                                pWav->smpl.loops[iLoop].cuePointId = drwav__bytes_to_u32(smplLoopData+0);
-                                pWav->smpl.loops[iLoop].type       = drwav__bytes_to_u32(smplLoopData+4);
-                                pWav->smpl.loops[iLoop].start      = drwav__bytes_to_u32(smplLoopData+8);
-                                pWav->smpl.loops[iLoop].end        = drwav__bytes_to_u32(smplLoopData+12);
-                                pWav->smpl.loops[iLoop].fraction   = drwav__bytes_to_u32(smplLoopData+16);
-                                pWav->smpl.loops[iLoop].playCount  = drwav__bytes_to_u32(smplLoopData+20);
-                            } else {
-                                break;  /* Break from the smpl loop for loop. */
-                            }
-                        }
-                    }
-                } else {
-                    /* Looks like invalid data. Ignore the chunk. */
-                }
-            }
-        } else {
-            if (drwav__guid_equal(header.id.guid, drwavGUID_W64_SMPL)) {
-                /*
-                This path will be hit when a W64 WAV file contains a smpl chunk. I don't have a sample file to test this path, so a contribution
-                is welcome to add support for this.
-                */
+
+
+        /* Getting here means it's not a chunk that we care about internally, but might need to be handled as metadata by the caller. */
+        if (isProcessingMetadata) {
+            drwav__metadata_process_chunk(&metadataParser, &header, drwav_metadata_type_all_including_unknown);
+
+            /* Go back to the start of the chunk so we can normalize the position of the cursor. */
+            if (drwav__seek_from_start(pWav->onSeek, cursor, pWav->pUserData) == DRWAV_FALSE) {
+                break;  /* Failed to seek. Can't reliable read the remaining chunks. Get out. */
             }
         }
 
-        /* Make sure we seek past the padding. */
-        chunkSize += header.paddingSize;
-        if (!drwav__seek_forward(pWav->onSeek, chunkSize, pWav->pUserData)) {
+
+        /* Make sure we skip past the content of this chunk before we go to the next one. */
+        chunkSize += header.paddingSize;    /* <-- Make sure we seek past the padding. */
+        if (drwav__seek_forward(pWav->onSeek, chunkSize, pWav->pUserData) == DRWAV_FALSE) {
             break;
         }
         cursor += chunkSize;
-
-        if (!foundDataChunk) {
-            pWav->dataChunkDataPos = cursor;
-        }
     }
 
-    /* If we haven't found a data chunk, return an error. */
-    if (!foundDataChunk) {
+    /* There's some mandatory chunks that must exist. If they were not found in the iteration above we must abort. */
+    if (!foundChunk_fmt || !foundChunk_data) {
         return DRWAV_FALSE;
     }
 
+    /* Basic validation. */
+    if ((fmt.sampleRate    == 0 || fmt.sampleRate    > DRWAV_MAX_SAMPLE_RATE    ) ||
+        (fmt.channels      == 0 || fmt.channels      > DRWAV_MAX_CHANNELS       ) ||
+        (fmt.bitsPerSample == 0 || fmt.bitsPerSample > DRWAV_MAX_BITS_PER_SAMPLE) ||
+        fmt.blockAlign == 0) {
+        return DRWAV_FALSE; /* Probably an invalid WAV file. */
+    }
+
+    /* Translate the internal format. */
+    translatedFormatTag = fmt.formatTag;
+    if (translatedFormatTag == DR_WAVE_FORMAT_EXTENSIBLE) {
+        translatedFormatTag = drwav_bytes_to_u16_ex(fmt.subFormat + 0, pWav->container);
+    }
+
     /* We may have moved passed the data chunk. If so we need to move back. If running in sequential mode we can assume we are already sitting on the data chunk. */
     if (!sequential) {
         if (!drwav__seek_from_start(pWav->onSeek, pWav->dataChunkDataPos, pWav->pUserData)) {
@@ -2087,10 +3603,79 @@ static drwav_bool32 drwav_init__internal(drwav* pWav, drwav_chunk_proc onChunk,
         }
         cursor = pWav->dataChunkDataPos;
     }
-    
+
+
+    /*
+    At this point we should have done the initial parsing of each of our chunks, but we now need to
+    do a second pass to extract the actual contents of the metadata (the first pass just calculated
+    the length of the memory allocation).
+
+    We only do this if we've actually got metadata to parse.
+    */
+    if (isProcessingMetadata && metadataParser.metadataCount > 0) {
+        if (drwav__seek_from_start(pWav->onSeek, metadataStartPos, pWav->pUserData) == DRWAV_FALSE) {
+            return DRWAV_FALSE;
+        }
+
+        result = drwav__metadata_alloc(&metadataParser, &pWav->allocationCallbacks);
+        if (result != DRWAV_SUCCESS) {
+            return DRWAV_FALSE;
+        }
+
+        metadataParser.stage = drwav__metadata_parser_stage_read;
+
+        for (;;) {
+            drwav_chunk_header header;
+            drwav_uint64 metadataBytesRead;
+
+            result = drwav__read_chunk_header(pWav->onRead, pWav->pUserData, pWav->container, &cursor, &header);
+            if (result != DRWAV_SUCCESS) {
+                break;
+            }
+
+            metadataBytesRead = drwav__metadata_process_chunk(&metadataParser, &header, drwav_metadata_type_all_including_unknown);
+
+            /* Move to the end of the chunk so we can keep iterating. */
+            if (drwav__seek_forward(pWav->onSeek, (header.sizeInBytes + header.paddingSize) - metadataBytesRead, pWav->pUserData) == DRWAV_FALSE) {
+                drwav_free(metadataParser.pMetadata, &pWav->allocationCallbacks);
+                return DRWAV_FALSE;
+            }
+        }
+
+        /* Getting here means we're finished parsing the metadata. */
+        pWav->pMetadata     = metadataParser.pMetadata;
+        pWav->metadataCount = metadataParser.metadataCount;
+    }
+
 
     /* At this point we should be sitting on the first byte of the raw audio data. */
 
+    /*
+    I've seen a WAV file in the wild where a RIFF-ecapsulated file has the size of it's "RIFF" and
+    "data" chunks set to 0xFFFFFFFF when the file is definitely not that big. In this case we're
+    going to have to calculate the size by reading and discarding bytes, and then seeking back. We
+    cannot do this in sequential mode. We just assume that the rest of the file is audio data.
+    */
+    if (dataChunkSize == 0xFFFFFFFF && (pWav->container == drwav_container_riff || pWav->container == drwav_container_rifx) && pWav->isSequentialWrite == DRWAV_FALSE) {
+        dataChunkSize = 0;
+
+        for (;;) {
+            drwav_uint8 temp[4096];
+            size_t bytesRead = pWav->onRead(pWav->pUserData, temp, sizeof(temp));
+            dataChunkSize += bytesRead;
+
+            if (bytesRead < sizeof(temp)) {
+                break;
+            }
+        }
+    }
+
+    if (drwav__seek_from_start(pWav->onSeek, pWav->dataChunkDataPos, pWav->pUserData) == DRWAV_FALSE) {
+        drwav_free(pWav->pMetadata, &pWav->allocationCallbacks);
+        return DRWAV_FALSE;
+    }
+
+
     pWav->fmt                 = fmt;
     pWav->sampleRate          = fmt.sampleRate;
     pWav->channels            = fmt.channels;
@@ -2101,8 +3686,16 @@ static drwav_bool32 drwav_init__internal(drwav* pWav, drwav_chunk_proc onChunk,
 
     if (sampleCountFromFactChunk != 0) {
         pWav->totalPCMFrameCount = sampleCountFromFactChunk;
+    } else if (aiffFrameCount != 0) {
+        pWav->totalPCMFrameCount = aiffFrameCount;
     } else {
-        pWav->totalPCMFrameCount = dataChunkSize / drwav_get_bytes_per_pcm_frame(pWav);
+        drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+        if (bytesPerFrame == 0) {
+            drwav_free(pWav->pMetadata, &pWav->allocationCallbacks);
+            return DRWAV_FALSE; /* Invalid file. */
+        }
+
+        pWav->totalPCMFrameCount = dataChunkSize / bytesPerFrame;
 
         if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
             drwav_uint64 totalBlockHeaderSizeInBytes;
@@ -2138,10 +3731,17 @@ static drwav_bool32 drwav_init__internal(drwav* pWav, drwav_chunk_proc onChunk,
     /* Some formats only support a certain number of channels. */
     if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM || pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
         if (pWav->channels > 2) {
+            drwav_free(pWav->pMetadata, &pWav->allocationCallbacks);
             return DRWAV_FALSE;
         }
     }
 
+    /* The number of bytes per frame must be known. If not, it's an invalid file and not decodable. */
+    if (drwav_get_bytes_per_pcm_frame(pWav) == 0) {
+        drwav_free(pWav->pMetadata, &pWav->allocationCallbacks);
+        return DRWAV_FALSE;
+    }
+
 #ifdef DR_WAV_LIBSNDFILE_COMPAT
     /*
     I use libsndfile as a benchmark for testing, however in the version I'm using (from the Windows installer on the libsndfile website),
@@ -2175,22 +3775,555 @@ DRWAV_API drwav_bool32 drwav_init_ex(drwav* pWav, drwav_read_proc onRead, drwav_
         return DRWAV_FALSE;
     }
 
-    return drwav_init__internal(pWav, onChunk, pChunkUserData, flags);
-}
+    return drwav_init__internal(pWav, onChunk, pChunkUserData, flags);
+}
+
+DRWAV_API drwav_bool32 drwav_init_with_metadata(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (!drwav_preinit(pWav, onRead, onSeek, pUserData, pAllocationCallbacks)) {
+        return DRWAV_FALSE;
+    }
+
+    return drwav_init__internal(pWav, NULL, NULL, flags | DRWAV_WITH_METADATA);
+}
+
+DRWAV_API drwav_metadata* drwav_take_ownership_of_metadata(drwav* pWav)
+{
+    drwav_metadata *result = pWav->pMetadata;
+
+    pWav->pMetadata     = NULL;
+    pWav->metadataCount = 0;
+
+    return result;
+}
+
+
+DRWAV_PRIVATE size_t drwav__write(drwav* pWav, const void* pData, size_t dataSize)
+{
+    DRWAV_ASSERT(pWav          != NULL);
+    DRWAV_ASSERT(pWav->onWrite != NULL);
+
+    /* Generic write. Assumes no byte reordering required. */
+    return pWav->onWrite(pWav->pUserData, pData, dataSize);
+}
+
+DRWAV_PRIVATE size_t drwav__write_byte(drwav* pWav, drwav_uint8 byte)
+{
+    DRWAV_ASSERT(pWav          != NULL);
+    DRWAV_ASSERT(pWav->onWrite != NULL);
+
+    return pWav->onWrite(pWav->pUserData, &byte, 1);
+}
+
+DRWAV_PRIVATE size_t drwav__write_u16ne_to_le(drwav* pWav, drwav_uint16 value)
+{
+    DRWAV_ASSERT(pWav          != NULL);
+    DRWAV_ASSERT(pWav->onWrite != NULL);
+
+    if (!drwav__is_little_endian()) {
+        value = drwav__bswap16(value);
+    }
+
+    return drwav__write(pWav, &value, 2);
+}
+
+DRWAV_PRIVATE size_t drwav__write_u32ne_to_le(drwav* pWav, drwav_uint32 value)
+{
+    DRWAV_ASSERT(pWav          != NULL);
+    DRWAV_ASSERT(pWav->onWrite != NULL);
+
+    if (!drwav__is_little_endian()) {
+        value = drwav__bswap32(value);
+    }
+
+    return drwav__write(pWav, &value, 4);
+}
+
+DRWAV_PRIVATE size_t drwav__write_u64ne_to_le(drwav* pWav, drwav_uint64 value)
+{
+    DRWAV_ASSERT(pWav          != NULL);
+    DRWAV_ASSERT(pWav->onWrite != NULL);
+
+    if (!drwav__is_little_endian()) {
+        value = drwav__bswap64(value);
+    }
+
+    return drwav__write(pWav, &value, 8);
+}
+
+DRWAV_PRIVATE size_t drwav__write_f32ne_to_le(drwav* pWav, float value)
+{
+    union {
+       drwav_uint32 u32;
+       float f32;
+    } u;
+
+    DRWAV_ASSERT(pWav          != NULL);
+    DRWAV_ASSERT(pWav->onWrite != NULL);
+
+    u.f32 = value;
+
+    if (!drwav__is_little_endian()) {
+        u.u32 = drwav__bswap32(u.u32);
+    }
+
+    return drwav__write(pWav, &u.u32, 4);
+}
+
+DRWAV_PRIVATE size_t drwav__write_or_count(drwav* pWav, const void* pData, size_t dataSize)
+{
+    if (pWav == NULL) {
+        return dataSize;
+    }
+
+    return drwav__write(pWav, pData, dataSize);
+}
+
+DRWAV_PRIVATE size_t drwav__write_or_count_byte(drwav* pWav, drwav_uint8 byte)
+{
+    if (pWav == NULL) {
+        return 1;
+    }
+
+    return drwav__write_byte(pWav, byte);
+}
+
+DRWAV_PRIVATE size_t drwav__write_or_count_u16ne_to_le(drwav* pWav, drwav_uint16 value)
+{
+    if (pWav == NULL) {
+        return 2;
+    }
+
+    return drwav__write_u16ne_to_le(pWav, value);
+}
+
+DRWAV_PRIVATE size_t drwav__write_or_count_u32ne_to_le(drwav* pWav, drwav_uint32 value)
+{
+    if (pWav == NULL) {
+        return 4;
+    }
+
+    return drwav__write_u32ne_to_le(pWav, value);
+}
+
+#if 0   /* Unused for now. */
+DRWAV_PRIVATE size_t drwav__write_or_count_u64ne_to_le(drwav* pWav, drwav_uint64 value)
+{
+    if (pWav == NULL) {
+        return 8;
+    }
+
+    return drwav__write_u64ne_to_le(pWav, value);
+}
+#endif
+
+DRWAV_PRIVATE size_t drwav__write_or_count_f32ne_to_le(drwav* pWav, float value)
+{
+    if (pWav == NULL) {
+        return 4;
+    }
+
+    return drwav__write_f32ne_to_le(pWav, value);
+}
+
+DRWAV_PRIVATE size_t drwav__write_or_count_string_to_fixed_size_buf(drwav* pWav, char* str, size_t bufFixedSize)
+{
+    size_t len;
+
+    if (pWav == NULL) {
+        return bufFixedSize;
+    }
+
+    len = drwav__strlen_clamped(str, bufFixedSize);
+    drwav__write_or_count(pWav, str, len);
+
+    if (len < bufFixedSize) {
+        size_t i;
+        for (i = 0; i < bufFixedSize - len; ++i) {
+            drwav__write_byte(pWav, 0);
+        }
+    }
+
+    return bufFixedSize;
+}
+
+
+/* pWav can be NULL meaning just count the bytes that would be written. */
+DRWAV_PRIVATE size_t drwav__write_or_count_metadata(drwav* pWav, drwav_metadata* pMetadatas, drwav_uint32 metadataCount)
+{
+    size_t bytesWritten = 0;
+    drwav_bool32 hasListAdtl = DRWAV_FALSE;
+    drwav_bool32 hasListInfo = DRWAV_FALSE;
+    drwav_uint32 iMetadata;
+
+    if (pMetadatas == NULL || metadataCount == 0) {
+        return 0;
+    }
+
+    for (iMetadata = 0; iMetadata < metadataCount; ++iMetadata) {
+        drwav_metadata* pMetadata = &pMetadatas[iMetadata];
+        drwav_uint32 chunkSize = 0;
+
+        if ((pMetadata->type & drwav_metadata_type_list_all_info_strings) || (pMetadata->type == drwav_metadata_type_unknown && pMetadata->data.unknown.chunkLocation == drwav_metadata_location_inside_info_list)) {
+            hasListInfo = DRWAV_TRUE;
+        }
+
+        if ((pMetadata->type & drwav_metadata_type_list_all_adtl) || (pMetadata->type == drwav_metadata_type_unknown && pMetadata->data.unknown.chunkLocation == drwav_metadata_location_inside_adtl_list)) {
+            hasListAdtl = DRWAV_TRUE;
+        }
+
+        switch (pMetadata->type) {
+            case drwav_metadata_type_smpl:
+            {
+                drwav_uint32 iLoop;
+
+                chunkSize = DRWAV_SMPL_BYTES + DRWAV_SMPL_LOOP_BYTES * pMetadata->data.smpl.sampleLoopCount + pMetadata->data.smpl.samplerSpecificDataSizeInBytes;
+
+                bytesWritten += drwav__write_or_count(pWav, "smpl", 4);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, chunkSize);
+
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.manufacturerId);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.productId);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.samplePeriodNanoseconds);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.midiUnityNote);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.midiPitchFraction);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.smpteFormat);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.smpteOffset);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.sampleLoopCount);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.samplerSpecificDataSizeInBytes);
+
+                for (iLoop = 0; iLoop < pMetadata->data.smpl.sampleLoopCount; ++iLoop) {
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.pLoops[iLoop].cuePointId);
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.pLoops[iLoop].type);
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.pLoops[iLoop].firstSampleByteOffset);
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.pLoops[iLoop].lastSampleByteOffset);
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.pLoops[iLoop].sampleFraction);
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.pLoops[iLoop].playCount);
+                }
+
+                if (pMetadata->data.smpl.samplerSpecificDataSizeInBytes > 0) {
+                    bytesWritten += drwav__write_or_count(pWav, pMetadata->data.smpl.pSamplerSpecificData, pMetadata->data.smpl.samplerSpecificDataSizeInBytes);
+                }
+            } break;
+
+            case drwav_metadata_type_inst:
+            {
+                chunkSize = DRWAV_INST_BYTES;
+
+                bytesWritten += drwav__write_or_count(pWav, "inst", 4);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, chunkSize);
+                bytesWritten += drwav__write_or_count(pWav, &pMetadata->data.inst.midiUnityNote, 1);
+                bytesWritten += drwav__write_or_count(pWav, &pMetadata->data.inst.fineTuneCents, 1);
+                bytesWritten += drwav__write_or_count(pWav, &pMetadata->data.inst.gainDecibels, 1);
+                bytesWritten += drwav__write_or_count(pWav, &pMetadata->data.inst.lowNote, 1);
+                bytesWritten += drwav__write_or_count(pWav, &pMetadata->data.inst.highNote, 1);
+                bytesWritten += drwav__write_or_count(pWav, &pMetadata->data.inst.lowVelocity, 1);
+                bytesWritten += drwav__write_or_count(pWav, &pMetadata->data.inst.highVelocity, 1);
+            } break;
+
+            case drwav_metadata_type_cue:
+            {
+                drwav_uint32 iCuePoint;
+
+                chunkSize = DRWAV_CUE_BYTES + DRWAV_CUE_POINT_BYTES * pMetadata->data.cue.cuePointCount;
+
+                bytesWritten += drwav__write_or_count(pWav, "cue ", 4);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, chunkSize);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.cue.cuePointCount);
+                for (iCuePoint = 0; iCuePoint < pMetadata->data.cue.cuePointCount; ++iCuePoint) {
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.cue.pCuePoints[iCuePoint].id);
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.cue.pCuePoints[iCuePoint].playOrderPosition);
+                    bytesWritten += drwav__write_or_count(pWav, pMetadata->data.cue.pCuePoints[iCuePoint].dataChunkId, 4);
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.cue.pCuePoints[iCuePoint].chunkStart);
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.cue.pCuePoints[iCuePoint].blockStart);
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.cue.pCuePoints[iCuePoint].sampleByteOffset);
+                }
+            } break;
+
+            case drwav_metadata_type_acid:
+            {
+                chunkSize = DRWAV_ACID_BYTES;
+
+                bytesWritten += drwav__write_or_count(pWav, "acid", 4);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, chunkSize);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.acid.flags);
+                bytesWritten += drwav__write_or_count_u16ne_to_le(pWav, pMetadata->data.acid.midiUnityNote);
+                bytesWritten += drwav__write_or_count_u16ne_to_le(pWav, pMetadata->data.acid.reserved1);
+                bytesWritten += drwav__write_or_count_f32ne_to_le(pWav, pMetadata->data.acid.reserved2);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.acid.numBeats);
+                bytesWritten += drwav__write_or_count_u16ne_to_le(pWav, pMetadata->data.acid.meterDenominator);
+                bytesWritten += drwav__write_or_count_u16ne_to_le(pWav, pMetadata->data.acid.meterNumerator);
+                bytesWritten += drwav__write_or_count_f32ne_to_le(pWav, pMetadata->data.acid.tempo);
+            } break;
+
+            case drwav_metadata_type_bext:
+            {
+                char reservedBuf[DRWAV_BEXT_RESERVED_BYTES];
+                drwav_uint32 timeReferenceLow;
+                drwav_uint32 timeReferenceHigh;
+
+                chunkSize = DRWAV_BEXT_BYTES + pMetadata->data.bext.codingHistorySize;
+
+                bytesWritten += drwav__write_or_count(pWav, "bext", 4);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, chunkSize);
+
+                bytesWritten += drwav__write_or_count_string_to_fixed_size_buf(pWav, pMetadata->data.bext.pDescription, DRWAV_BEXT_DESCRIPTION_BYTES);
+                bytesWritten += drwav__write_or_count_string_to_fixed_size_buf(pWav, pMetadata->data.bext.pOriginatorName, DRWAV_BEXT_ORIGINATOR_NAME_BYTES);
+                bytesWritten += drwav__write_or_count_string_to_fixed_size_buf(pWav, pMetadata->data.bext.pOriginatorReference, DRWAV_BEXT_ORIGINATOR_REF_BYTES);
+                bytesWritten += drwav__write_or_count(pWav, pMetadata->data.bext.pOriginationDate, sizeof(pMetadata->data.bext.pOriginationDate));
+                bytesWritten += drwav__write_or_count(pWav, pMetadata->data.bext.pOriginationTime, sizeof(pMetadata->data.bext.pOriginationTime));
+
+                timeReferenceLow  = (drwav_uint32)(pMetadata->data.bext.timeReference & 0xFFFFFFFF);
+                timeReferenceHigh = (drwav_uint32)(pMetadata->data.bext.timeReference >> 32);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, timeReferenceLow);
+                bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, timeReferenceHigh);
+
+                bytesWritten += drwav__write_or_count_u16ne_to_le(pWav, pMetadata->data.bext.version);
+                bytesWritten += drwav__write_or_count(pWav, pMetadata->data.bext.pUMID, DRWAV_BEXT_UMID_BYTES);
+                bytesWritten += drwav__write_or_count_u16ne_to_le(pWav, pMetadata->data.bext.loudnessValue);
+                bytesWritten += drwav__write_or_count_u16ne_to_le(pWav, pMetadata->data.bext.loudnessRange);
+                bytesWritten += drwav__write_or_count_u16ne_to_le(pWav, pMetadata->data.bext.maxTruePeakLevel);
+                bytesWritten += drwav__write_or_count_u16ne_to_le(pWav, pMetadata->data.bext.maxMomentaryLoudness);
+                bytesWritten += drwav__write_or_count_u16ne_to_le(pWav, pMetadata->data.bext.maxShortTermLoudness);
+
+                DRWAV_ZERO_MEMORY(reservedBuf, sizeof(reservedBuf));
+                bytesWritten += drwav__write_or_count(pWav, reservedBuf, sizeof(reservedBuf));
+
+                if (pMetadata->data.bext.codingHistorySize > 0) {
+                    bytesWritten += drwav__write_or_count(pWav, pMetadata->data.bext.pCodingHistory, pMetadata->data.bext.codingHistorySize);
+                }
+            } break;
+
+            case drwav_metadata_type_unknown:
+            {
+                if (pMetadata->data.unknown.chunkLocation == drwav_metadata_location_top_level) {
+                    chunkSize = pMetadata->data.unknown.dataSizeInBytes;
+
+                    bytesWritten += drwav__write_or_count(pWav, pMetadata->data.unknown.id, 4);
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, chunkSize);
+                    bytesWritten += drwav__write_or_count(pWav, pMetadata->data.unknown.pData, pMetadata->data.unknown.dataSizeInBytes);
+                }
+            } break;
+
+            default: break;
+        }
+        if ((chunkSize % 2) != 0) {
+            bytesWritten += drwav__write_or_count_byte(pWav, 0);
+        }
+    }
+
+    if (hasListInfo) {
+        drwav_uint32 chunkSize = 4; /* Start with 4 bytes for "INFO". */
+        for (iMetadata = 0; iMetadata < metadataCount; ++iMetadata) {
+            drwav_metadata* pMetadata = &pMetadatas[iMetadata];
+
+            if ((pMetadata->type & drwav_metadata_type_list_all_info_strings)) {
+                chunkSize += 8; /* For id and string size. */
+                chunkSize += pMetadata->data.infoText.stringLength + 1; /* Include null terminator. */
+            } else if (pMetadata->type == drwav_metadata_type_unknown && pMetadata->data.unknown.chunkLocation == drwav_metadata_location_inside_info_list) {
+                chunkSize += 8; /* For id string size. */
+                chunkSize += pMetadata->data.unknown.dataSizeInBytes;
+            }
+
+            if ((chunkSize % 2) != 0) {
+                chunkSize += 1;
+            }
+        }
+
+        bytesWritten += drwav__write_or_count(pWav, "LIST", 4);
+        bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, chunkSize);
+        bytesWritten += drwav__write_or_count(pWav, "INFO", 4);
+
+        for (iMetadata = 0; iMetadata < metadataCount; ++iMetadata) {
+            drwav_metadata* pMetadata = &pMetadatas[iMetadata];
+            drwav_uint32 subchunkSize = 0;
+
+            if (pMetadata->type & drwav_metadata_type_list_all_info_strings) {
+                const char* pID = NULL;
+
+                switch (pMetadata->type) {
+                    case drwav_metadata_type_list_info_software:    pID = "ISFT"; break;
+                    case drwav_metadata_type_list_info_copyright:   pID = "ICOP"; break;
+                    case drwav_metadata_type_list_info_title:       pID = "INAM"; break;
+                    case drwav_metadata_type_list_info_artist:      pID = "IART"; break;
+                    case drwav_metadata_type_list_info_comment:     pID = "ICMT"; break;
+                    case drwav_metadata_type_list_info_date:        pID = "ICRD"; break;
+                    case drwav_metadata_type_list_info_genre:       pID = "IGNR"; break;
+                    case drwav_metadata_type_list_info_album:       pID = "IPRD"; break;
+                    case drwav_metadata_type_list_info_tracknumber: pID = "ITRK"; break;
+                    default: break;
+                }
+
+                DRWAV_ASSERT(pID != NULL);
+
+                if (pMetadata->data.infoText.stringLength) {
+                    subchunkSize = pMetadata->data.infoText.stringLength + 1;
+                    bytesWritten += drwav__write_or_count(pWav, pID, 4);
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, subchunkSize);
+                    bytesWritten += drwav__write_or_count(pWav, pMetadata->data.infoText.pString, pMetadata->data.infoText.stringLength);
+                    bytesWritten += drwav__write_or_count_byte(pWav, '\0');
+                }
+            } else if (pMetadata->type == drwav_metadata_type_unknown && pMetadata->data.unknown.chunkLocation == drwav_metadata_location_inside_info_list) {
+                if (pMetadata->data.unknown.dataSizeInBytes) {
+                    subchunkSize = pMetadata->data.unknown.dataSizeInBytes;
+
+                    bytesWritten += drwav__write_or_count(pWav, pMetadata->data.unknown.id, 4);
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.unknown.dataSizeInBytes);
+                    bytesWritten += drwav__write_or_count(pWav, pMetadata->data.unknown.pData, subchunkSize);
+                }
+            }
+
+            if ((subchunkSize % 2) != 0) {
+                bytesWritten += drwav__write_or_count_byte(pWav, 0);
+            }
+        }
+    }
+
+    if (hasListAdtl) {
+        drwav_uint32 chunkSize = 4; /* start with 4 bytes for "adtl" */
+
+        for (iMetadata = 0; iMetadata < metadataCount; ++iMetadata) {
+            drwav_metadata* pMetadata = &pMetadatas[iMetadata];
+
+            switch (pMetadata->type)
+            {
+                case drwav_metadata_type_list_label:
+                case drwav_metadata_type_list_note:
+                {
+                    chunkSize += 8; /* for id and chunk size */
+                    chunkSize += DRWAV_LIST_LABEL_OR_NOTE_BYTES;
+
+                    if (pMetadata->data.labelOrNote.stringLength > 0) {
+                        chunkSize += pMetadata->data.labelOrNote.stringLength + 1;
+                    }
+                } break;
+
+                case drwav_metadata_type_list_labelled_cue_region:
+                {
+                    chunkSize += 8; /* for id and chunk size */
+                    chunkSize += DRWAV_LIST_LABELLED_TEXT_BYTES;
+
+                    if (pMetadata->data.labelledCueRegion.stringLength > 0) {
+                        chunkSize += pMetadata->data.labelledCueRegion.stringLength + 1;
+                    }
+                } break;
+
+                case drwav_metadata_type_unknown:
+                {
+                    if (pMetadata->data.unknown.chunkLocation == drwav_metadata_location_inside_adtl_list) {
+                        chunkSize += 8; /* for id and chunk size */
+                        chunkSize += pMetadata->data.unknown.dataSizeInBytes;
+                    }
+                } break;
+
+                default: break;
+            }
+
+            if ((chunkSize % 2) != 0) {
+                chunkSize += 1;
+            }
+        }
+
+        bytesWritten += drwav__write_or_count(pWav, "LIST", 4);
+        bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, chunkSize);
+        bytesWritten += drwav__write_or_count(pWav, "adtl", 4);
+
+        for (iMetadata = 0; iMetadata < metadataCount; ++iMetadata) {
+            drwav_metadata* pMetadata = &pMetadatas[iMetadata];
+            drwav_uint32 subchunkSize = 0;
+
+            switch (pMetadata->type)
+            {
+                case drwav_metadata_type_list_label:
+                case drwav_metadata_type_list_note:
+                {
+                    if (pMetadata->data.labelOrNote.stringLength > 0) {
+                        const char *pID = NULL;
+
+                        if (pMetadata->type == drwav_metadata_type_list_label) {
+                            pID = "labl";
+                        }
+                        else if (pMetadata->type == drwav_metadata_type_list_note) {
+                            pID = "note";
+                        }
+
+                        DRWAV_ASSERT(pID != NULL);
+                        DRWAV_ASSERT(pMetadata->data.labelOrNote.pString != NULL);
+
+                        subchunkSize = DRWAV_LIST_LABEL_OR_NOTE_BYTES;
+
+                        bytesWritten += drwav__write_or_count(pWav, pID, 4);
+                        subchunkSize += pMetadata->data.labelOrNote.stringLength + 1;
+                        bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, subchunkSize);
+
+                        bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.labelOrNote.cuePointId);
+                        bytesWritten += drwav__write_or_count(pWav, pMetadata->data.labelOrNote.pString, pMetadata->data.labelOrNote.stringLength);
+                        bytesWritten += drwav__write_or_count_byte(pWav, '\0');
+                    }
+                } break;
+
+                case drwav_metadata_type_list_labelled_cue_region:
+                {
+                    subchunkSize = DRWAV_LIST_LABELLED_TEXT_BYTES;
+
+                    bytesWritten += drwav__write_or_count(pWav, "ltxt", 4);
+                    if (pMetadata->data.labelledCueRegion.stringLength > 0) {
+                        subchunkSize += pMetadata->data.labelledCueRegion.stringLength + 1;
+                    }
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, subchunkSize);
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.labelledCueRegion.cuePointId);
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.labelledCueRegion.sampleLength);
+                    bytesWritten += drwav__write_or_count(pWav, pMetadata->data.labelledCueRegion.purposeId, 4);
+                    bytesWritten += drwav__write_or_count_u16ne_to_le(pWav, pMetadata->data.labelledCueRegion.country);
+                    bytesWritten += drwav__write_or_count_u16ne_to_le(pWav, pMetadata->data.labelledCueRegion.language);
+                    bytesWritten += drwav__write_or_count_u16ne_to_le(pWav, pMetadata->data.labelledCueRegion.dialect);
+                    bytesWritten += drwav__write_or_count_u16ne_to_le(pWav, pMetadata->data.labelledCueRegion.codePage);
+
+                    if (pMetadata->data.labelledCueRegion.stringLength > 0) {
+                        DRWAV_ASSERT(pMetadata->data.labelledCueRegion.pString != NULL);
+
+                        bytesWritten += drwav__write_or_count(pWav, pMetadata->data.labelledCueRegion.pString, pMetadata->data.labelledCueRegion.stringLength);
+                        bytesWritten += drwav__write_or_count_byte(pWav, '\0');
+                    }
+                } break;
+
+                case drwav_metadata_type_unknown:
+                {
+                    if (pMetadata->data.unknown.chunkLocation == drwav_metadata_location_inside_adtl_list) {
+                        subchunkSize = pMetadata->data.unknown.dataSizeInBytes;
+
+                        DRWAV_ASSERT(pMetadata->data.unknown.pData != NULL);
+                        bytesWritten += drwav__write_or_count(pWav, pMetadata->data.unknown.id, 4);
+                        bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, subchunkSize);
+                        bytesWritten += drwav__write_or_count(pWav, pMetadata->data.unknown.pData, subchunkSize);
+                    }
+                } break;
+
+                default: break;
+            }
+
+            if ((subchunkSize % 2) != 0) {
+                bytesWritten += drwav__write_or_count_byte(pWav, 0);
+            }
+        }
+    }
+
+    DRWAV_ASSERT((bytesWritten % 2) == 0);
 
+    return bytesWritten;
+}
 
-static drwav_uint32 drwav__riff_chunk_size_riff(drwav_uint64 dataChunkSize)
+DRWAV_PRIVATE drwav_uint32 drwav__riff_chunk_size_riff(drwav_uint64 dataChunkSize, drwav_metadata* pMetadata, drwav_uint32 metadataCount)
 {
-    drwav_uint32 dataSubchunkPaddingSize = drwav__chunk_padding_size_riff(dataChunkSize);
-
-    if (dataChunkSize <= (0xFFFFFFFFUL - 36 - dataSubchunkPaddingSize)) {
-        return 36 + (drwav_uint32)(dataChunkSize + dataSubchunkPaddingSize);
-    } else {
-        return 0xFFFFFFFF;
+    drwav_uint64 chunkSize = 4 + 24 + (drwav_uint64)drwav__write_or_count_metadata(NULL, pMetadata, metadataCount) + 8 + dataChunkSize + drwav__chunk_padding_size_riff(dataChunkSize); /* 4 = "WAVE". 24 = "fmt " chunk. 8 = "data" + u32 data size. */
+    if (chunkSize > 0xFFFFFFFFUL) {
+        chunkSize = 0xFFFFFFFFUL;
     }
+
+    return (drwav_uint32)chunkSize; /* Safe cast due to the clamp above. */
 }
 
-static drwav_uint32 drwav__data_chunk_size_riff(drwav_uint64 dataChunkSize)
+DRWAV_PRIVATE drwav_uint32 drwav__data_chunk_size_riff(drwav_uint64 dataChunkSize)
 {
     if (dataChunkSize <= 0xFFFFFFFFUL) {
         return (drwav_uint32)dataChunkSize;
@@ -2199,20 +4332,36 @@ static drwav_uint32 drwav__data_chunk_size_riff(drwav_uint64 dataChunkSize)
     }
 }
 
-static drwav_uint64 drwav__riff_chunk_size_w64(drwav_uint64 dataChunkSize)
+DRWAV_PRIVATE drwav_uint64 drwav__riff_chunk_size_w64(drwav_uint64 dataChunkSize)
 {
     drwav_uint64 dataSubchunkPaddingSize = drwav__chunk_padding_size_w64(dataChunkSize);
 
     return 80 + 24 + dataChunkSize + dataSubchunkPaddingSize;   /* +24 because W64 includes the size of the GUID and size fields. */
 }
 
-static drwav_uint64 drwav__data_chunk_size_w64(drwav_uint64 dataChunkSize)
+DRWAV_PRIVATE drwav_uint64 drwav__data_chunk_size_w64(drwav_uint64 dataChunkSize)
 {
     return 24 + dataChunkSize;        /* +24 because W64 includes the size of the GUID and size fields. */
 }
 
+DRWAV_PRIVATE drwav_uint64 drwav__riff_chunk_size_rf64(drwav_uint64 dataChunkSize, drwav_metadata *metadata, drwav_uint32 numMetadata)
+{
+    drwav_uint64 chunkSize = 4 + 36 + 24 + (drwav_uint64)drwav__write_or_count_metadata(NULL, metadata, numMetadata) + 8 + dataChunkSize + drwav__chunk_padding_size_riff(dataChunkSize); /* 4 = "WAVE". 36 = "ds64" chunk. 24 = "fmt " chunk. 8 = "data" + u32 data size. */
+    if (chunkSize > 0xFFFFFFFFUL) {
+        chunkSize = 0xFFFFFFFFUL;
+    }
+
+    return chunkSize;
+}
+
+DRWAV_PRIVATE drwav_uint64 drwav__data_chunk_size_rf64(drwav_uint64 dataChunkSize)
+{
+    return dataChunkSize;
+}
+
+
 
-static drwav_bool32 drwav_preinit_write(drwav* pWav, const drwav_data_format* pFormat, drwav_bool32 isSequential, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
+DRWAV_PRIVATE drwav_bool32 drwav_preinit_write(drwav* pWav, const drwav_data_format* pFormat, drwav_bool32 isSequential, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
 {
     if (pWav == NULL || onWrite == NULL) {
         return DRWAV_FALSE;
@@ -2252,7 +4401,8 @@ static drwav_bool32 drwav_preinit_write(drwav* pWav, const drwav_data_format* pF
     return DRWAV_TRUE;
 }
 
-static drwav_bool32 drwav_init_write__internal(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount)
+
+DRWAV_PRIVATE drwav_bool32 drwav_init_write__internal(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount)
 {
     /* The function assumes drwav_preinit_write() was called beforehand. */
 
@@ -2284,60 +4434,77 @@ static drwav_bool32 drwav_init_write__internal(drwav* pWav, const drwav_data_for
 
     /* "RIFF" chunk. */
     if (pFormat->container == drwav_container_riff) {
-        drwav_uint32 chunkSizeRIFF = 36 + (drwav_uint32)initialDataChunkSize;   /* +36 = "RIFF"+[RIFF Chunk Size]+"WAVE" + [sizeof "fmt " chunk] */
-        runningPos += pWav->onWrite(pWav->pUserData, "RIFF", 4);
-        runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeRIFF, 4);
-        runningPos += pWav->onWrite(pWav->pUserData, "WAVE", 4);
+        drwav_uint32 chunkSizeRIFF = 28 + (drwav_uint32)initialDataChunkSize;   /* +28 = "WAVE" + [sizeof "fmt " chunk] */
+        runningPos += drwav__write(pWav, "RIFF", 4);
+        runningPos += drwav__write_u32ne_to_le(pWav, chunkSizeRIFF);
+        runningPos += drwav__write(pWav, "WAVE", 4);
+    } else if (pFormat->container == drwav_container_w64) {
+        drwav_uint64 chunkSizeRIFF = 80 + 24 + initialDataChunkSize;            /* +24 because W64 includes the size of the GUID and size fields. */
+        runningPos += drwav__write(pWav, drwavGUID_W64_RIFF, 16);
+        runningPos += drwav__write_u64ne_to_le(pWav, chunkSizeRIFF);
+        runningPos += drwav__write(pWav, drwavGUID_W64_WAVE, 16);
+    } else if (pFormat->container == drwav_container_rf64) {
+        runningPos += drwav__write(pWav, "RF64", 4);
+        runningPos += drwav__write_u32ne_to_le(pWav, 0xFFFFFFFF);               /* Always 0xFFFFFFFF for RF64. Set to a proper value in the "ds64" chunk. */
+        runningPos += drwav__write(pWav, "WAVE", 4);
     } else {
-        drwav_uint64 chunkSizeRIFF = 80 + 24 + initialDataChunkSize;   /* +24 because W64 includes the size of the GUID and size fields. */
-        runningPos += pWav->onWrite(pWav->pUserData, drwavGUID_W64_RIFF, 16);
-        runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeRIFF, 8);
-        runningPos += pWav->onWrite(pWav->pUserData, drwavGUID_W64_WAVE, 16);
+        return DRWAV_FALSE; /* Container not supported for writing. */
+    }
+
+
+    /* "ds64" chunk (RF64 only). */
+    if (pFormat->container == drwav_container_rf64) {
+        drwav_uint32 initialds64ChunkSize = 28;                                 /* 28 = [Size of RIFF (8 bytes)] + [Size of DATA (8 bytes)] + [Sample Count (8 bytes)] + [Table Length (4 bytes)]. Table length always set to 0. */
+        drwav_uint64 initialRiffChunkSize = 8 + initialds64ChunkSize + initialDataChunkSize;    /* +8 for the ds64 header. */
+
+        runningPos += drwav__write(pWav, "ds64", 4);
+        runningPos += drwav__write_u32ne_to_le(pWav, initialds64ChunkSize);     /* Size of ds64. */
+        runningPos += drwav__write_u64ne_to_le(pWav, initialRiffChunkSize);     /* Size of RIFF. Set to true value at the end. */
+        runningPos += drwav__write_u64ne_to_le(pWav, initialDataChunkSize);     /* Size of DATA. Set to true value at the end. */
+        runningPos += drwav__write_u64ne_to_le(pWav, totalSampleCount);         /* Sample count. */
+        runningPos += drwav__write_u32ne_to_le(pWav, 0);                        /* Table length. Always set to zero in our case since we're not doing any other chunks than "DATA". */
     }
 
+
     /* "fmt " chunk. */
-    if (pFormat->container == drwav_container_riff) {
+    if (pFormat->container == drwav_container_riff || pFormat->container == drwav_container_rf64) {
         chunkSizeFMT = 16;
-        runningPos += pWav->onWrite(pWav->pUserData, "fmt ", 4);
-        runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeFMT, 4);
-    } else {
+        runningPos += drwav__write(pWav, "fmt ", 4);
+        runningPos += drwav__write_u32ne_to_le(pWav, (drwav_uint32)chunkSizeFMT);
+    } else if (pFormat->container == drwav_container_w64) {
         chunkSizeFMT = 40;
-        runningPos += pWav->onWrite(pWav->pUserData, drwavGUID_W64_FMT, 16);
-        runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeFMT, 8);
+        runningPos += drwav__write(pWav, drwavGUID_W64_FMT, 16);
+        runningPos += drwav__write_u64ne_to_le(pWav, chunkSizeFMT);
     }
 
-    runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.formatTag,      2);
-    runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.channels,       2);
-    runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.sampleRate,     4);
-    runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.avgBytesPerSec, 4);
-    runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.blockAlign,     2);
-    runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.bitsPerSample,  2);
+    runningPos += drwav__write_u16ne_to_le(pWav, pWav->fmt.formatTag);
+    runningPos += drwav__write_u16ne_to_le(pWav, pWav->fmt.channels);
+    runningPos += drwav__write_u32ne_to_le(pWav, pWav->fmt.sampleRate);
+    runningPos += drwav__write_u32ne_to_le(pWav, pWav->fmt.avgBytesPerSec);
+    runningPos += drwav__write_u16ne_to_le(pWav, pWav->fmt.blockAlign);
+    runningPos += drwav__write_u16ne_to_le(pWav, pWav->fmt.bitsPerSample);
 
-    pWav->dataChunkDataPos = runningPos;
+    /* TODO: is a 'fact' chunk required for DR_WAVE_FORMAT_IEEE_FLOAT? */
 
-    /* "data" chunk. */
-    if (pFormat->container == drwav_container_riff) {
-        drwav_uint32 chunkSizeDATA = (drwav_uint32)initialDataChunkSize;
-        runningPos += pWav->onWrite(pWav->pUserData, "data", 4);
-        runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeDATA, 4);
-    } else {
-        drwav_uint64 chunkSizeDATA = 24 + initialDataChunkSize; /* +24 because W64 includes the size of the GUID and size fields. */
-        runningPos += pWav->onWrite(pWav->pUserData, drwavGUID_W64_DATA, 16);
-        runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeDATA, 8);
+    if (!pWav->isSequentialWrite && pWav->pMetadata != NULL && pWav->metadataCount > 0 && (pFormat->container == drwav_container_riff || pFormat->container == drwav_container_rf64)) {
+        runningPos += drwav__write_or_count_metadata(pWav, pWav->pMetadata, pWav->metadataCount);
     }
 
+    pWav->dataChunkDataPos = runningPos;
 
-    /* Simple validation. */
+    /* "data" chunk. */
     if (pFormat->container == drwav_container_riff) {
-        if (runningPos != 20 + chunkSizeFMT + 8) {
-            return DRWAV_FALSE;
-        }
-    } else {
-        if (runningPos != 40 + chunkSizeFMT + 24) {
-            return DRWAV_FALSE;
-        }
+        drwav_uint32 chunkSizeDATA = (drwav_uint32)initialDataChunkSize;
+        runningPos += drwav__write(pWav, "data", 4);
+        runningPos += drwav__write_u32ne_to_le(pWav, chunkSizeDATA);
+    } else if (pFormat->container == drwav_container_w64) {
+        drwav_uint64 chunkSizeDATA = 24 + initialDataChunkSize;     /* +24 because W64 includes the size of the GUID and size fields. */
+        runningPos += drwav__write(pWav, drwavGUID_W64_DATA, 16);
+        runningPos += drwav__write_u64ne_to_le(pWav, chunkSizeDATA);
+    } else if (pFormat->container == drwav_container_rf64) {
+        runningPos += drwav__write(pWav, "data", 4);
+        runningPos += drwav__write_u32ne_to_le(pWav, 0xFFFFFFFF);   /* Always set to 0xFFFFFFFF for RF64. The true size of the data chunk is specified in the ds64 chunk. */
     }
-    
 
     /* Set some properties for the client's convenience. */
     pWav->container = pFormat->container;
@@ -2345,6 +4512,7 @@ static drwav_bool32 drwav_init_write__internal(drwav* pWav, const drwav_data_for
     pWav->sampleRate = pFormat->sampleRate;
     pWav->bitsPerSample = (drwav_uint16)pFormat->bitsPerSample;
     pWav->translatedFormatTag = (drwav_uint16)pFormat->format;
+    pWav->dataChunkDataPos = runningPos;
 
     return DRWAV_TRUE;
 }
@@ -2377,19 +4545,35 @@ DRWAV_API drwav_bool32 drwav_init_write_sequential_pcm_frames(drwav* pWav, const
     return drwav_init_write_sequential(pWav, pFormat, totalPCMFrameCount*pFormat->channels, onWrite, pUserData, pAllocationCallbacks);
 }
 
-DRWAV_API drwav_uint64 drwav_target_write_size_bytes(const drwav_data_format* pFormat, drwav_uint64 totalSampleCount)
+DRWAV_API drwav_bool32 drwav_init_write_with_metadata(drwav* pWav, const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks, drwav_metadata* pMetadata, drwav_uint32 metadataCount)
+{
+    if (!drwav_preinit_write(pWav, pFormat, DRWAV_FALSE, onWrite, onSeek, pUserData, pAllocationCallbacks)) {
+        return DRWAV_FALSE;
+    }
+
+    pWav->pMetadata     = pMetadata;
+    pWav->metadataCount = metadataCount;
+
+    return drwav_init_write__internal(pWav, pFormat, 0);
+}
+
+
+DRWAV_API drwav_uint64 drwav_target_write_size_bytes(const drwav_data_format* pFormat, drwav_uint64 totalFrameCount, drwav_metadata* pMetadata, drwav_uint32 metadataCount)
 {
-    /* Casting totalSampleCount to drwav_int64 for VC6 compatibility. No issues in practice because nobody is going to exhaust the whole 63 bits. */
-    drwav_uint64 targetDataSizeBytes = (drwav_uint64)((drwav_int64)totalSampleCount * pFormat->channels * pFormat->bitsPerSample/8.0);
+    /* Casting totalFrameCount to drwav_int64 for VC6 compatibility. No issues in practice because nobody is going to exhaust the whole 63 bits. */
+    drwav_uint64 targetDataSizeBytes = (drwav_uint64)((drwav_int64)totalFrameCount * pFormat->channels * pFormat->bitsPerSample/8.0);
     drwav_uint64 riffChunkSizeBytes;
-    drwav_uint64 fileSizeBytes;
+    drwav_uint64 fileSizeBytes = 0;
 
     if (pFormat->container == drwav_container_riff) {
-        riffChunkSizeBytes = drwav__riff_chunk_size_riff(targetDataSizeBytes);
-        fileSizeBytes = (8 + riffChunkSizeBytes); /* +8 because WAV doesn't include the size of the ChunkID and ChunkSize fields. */
-    } else {
+        riffChunkSizeBytes = drwav__riff_chunk_size_riff(targetDataSizeBytes, pMetadata, metadataCount);
+        fileSizeBytes = (8 + riffChunkSizeBytes);   /* +8 because WAV doesn't include the size of the ChunkID and ChunkSize fields. */
+    } else if (pFormat->container == drwav_container_w64) {
         riffChunkSizeBytes = drwav__riff_chunk_size_w64(targetDataSizeBytes);
         fileSizeBytes = riffChunkSizeBytes;
+    } else if (pFormat->container == drwav_container_rf64) {
+        riffChunkSizeBytes = drwav__riff_chunk_size_rf64(targetDataSizeBytes, pMetadata, metadataCount);
+        fileSizeBytes = (8 + riffChunkSizeBytes);   /* +8 because WAV doesn't include the size of the ChunkID and ChunkSize fields. */
     }
 
     return fileSizeBytes;
@@ -2398,9 +4582,10 @@ DRWAV_API drwav_uint64 drwav_target_write_size_bytes(const drwav_data_format* pF
 
 #ifndef DR_WAV_NO_STDIO
 
+/* Errno */
 /* drwav_result_from_errno() is only used for fopen() and wfopen() so putting it inside DR_WAV_NO_STDIO for now. If something else needs this later we can move it out. */
 #include <errno.h>
-static drwav_result drwav_result_from_errno(int e)
+DRWAV_PRIVATE drwav_result drwav_result_from_errno(int e)
 {
     switch (e)
     {
@@ -2801,10 +4986,12 @@ static drwav_result drwav_result_from_errno(int e)
         default: return DRWAV_ERROR;
     }
 }
+/* End Errno */
 
-static drwav_result drwav_fopen(FILE** ppFile, const char* pFilePath, const char* pOpenMode)
+/* fopen */
+DRWAV_PRIVATE drwav_result drwav_fopen(FILE** ppFile, const char* pFilePath, const char* pOpenMode)
 {
-#if _MSC_VER && _MSC_VER >= 1400
+#if defined(_MSC_VER) && _MSC_VER >= 1400
     errno_t err;
 #endif
 
@@ -2816,7 +5003,7 @@ static drwav_result drwav_fopen(FILE** ppFile, const char* pFilePath, const char
         return DRWAV_INVALID_ARGS;
     }
 
-#if _MSC_VER && _MSC_VER >= 1400
+#if defined(_MSC_VER) && _MSC_VER >= 1400
     err = fopen_s(ppFile, pFilePath, pOpenMode);
     if (err != 0) {
         return drwav_result_from_errno(err);
@@ -2851,17 +5038,19 @@ _wfopen() isn't always available in all compilation environments.
     * MSVC seems to support it universally as far back as VC6 from what I can tell (haven't checked further back).
     * MinGW-64 (both 32- and 64-bit) seems to support it.
     * MinGW wraps it in !defined(__STRICT_ANSI__).
+    * OpenWatcom wraps it in !defined(_NO_EXT_KEYS).
 
 This can be reviewed as compatibility issues arise. The preference is to use _wfopen_s() and _wfopen() as opposed to the wcsrtombs()
 fallback, so if you notice your compiler not detecting this properly I'm happy to look at adding support.
 */
 #if defined(_WIN32)
-    #if defined(_MSC_VER) || defined(__MINGW64__) || !defined(__STRICT_ANSI__)
+    #if defined(_MSC_VER) || defined(__MINGW64__) || (!defined(__STRICT_ANSI__) && !defined(_NO_EXT_KEYS))
         #define DRWAV_HAS_WFOPEN
     #endif
 #endif
 
-static drwav_result drwav_wfopen(FILE** ppFile, const wchar_t* pFilePath, const wchar_t* pOpenMode, const drwav_allocation_callbacks* pAllocationCallbacks)
+#ifndef DR_WAV_NO_WCHAR
+DRWAV_PRIVATE drwav_result drwav_wfopen(FILE** ppFile, const wchar_t* pFilePath, const wchar_t* pOpenMode, const drwav_allocation_callbacks* pAllocationCallbacks)
 {
     if (ppFile != NULL) {
         *ppFile = NULL;  /* Safety. */
@@ -2888,11 +5077,24 @@ static drwav_result drwav_wfopen(FILE** ppFile, const wchar_t* pFilePath, const
         (void)pAllocationCallbacks;
     }
 #else
-    /*
-    Use fopen() on anything other than Windows. Requires a conversion. This is annoying because fopen() is locale specific. The only real way I can
-    think of to do this is with wcsrtombs(). Note that wcstombs() is apparently not thread-safe because it uses a static global mbstate_t object for
-    maintaining state. I've checked this with -std=c89 and it works, but if somebody get's a compiler error I'll look into improving compatibility.
+	/*
+    Use fopen() on anything other than Windows. Requires a conversion. This is annoying because
+	fopen() is locale specific. The only real way I can think of to do this is with wcsrtombs(). Note
+	that wcstombs() is apparently not thread-safe because it uses a static global mbstate_t object for
+    maintaining state. I've checked this with -std=c89 and it works, but if somebody get's a compiler
+	error I'll look into improving compatibility.
     */
+
+	/*
+	Some compilers don't support wchar_t or wcsrtombs() which we're using below. In this case we just
+	need to abort with an error. If you encounter a compiler lacking such support, add it to this list
+	and submit a bug report and it'll be added to the library upstream.
+	*/
+	#if defined(__DJGPP__)
+	{
+		/* Nothing to do here. This will fall through to the error check below. */
+	}
+	#else
     {
         mbstate_t mbs;
         size_t lenMB;
@@ -2934,6 +5136,7 @@ static drwav_result drwav_wfopen(FILE** ppFile, const wchar_t* pFilePath, const
 
         drwav__free_from_callbacks(pFilePathMB, pAllocationCallbacks);
     }
+	#endif
 
     if (*ppFile == NULL) {
         return DRWAV_ERROR;
@@ -2942,19 +5145,21 @@ static drwav_result drwav_wfopen(FILE** ppFile, const wchar_t* pFilePath, const
 
     return DRWAV_SUCCESS;
 }
+#endif
+/* End fopen */
 
 
-static size_t drwav__on_read_stdio(void* pUserData, void* pBufferOut, size_t bytesToRead)
+DRWAV_PRIVATE size_t drwav__on_read_stdio(void* pUserData, void* pBufferOut, size_t bytesToRead)
 {
     return fread(pBufferOut, 1, bytesToRead, (FILE*)pUserData);
 }
 
-static size_t drwav__on_write_stdio(void* pUserData, const void* pData, size_t bytesToWrite)
+DRWAV_PRIVATE size_t drwav__on_write_stdio(void* pUserData, const void* pData, size_t bytesToWrite)
 {
     return fwrite(pData, 1, bytesToWrite, (FILE*)pUserData);
 }
 
-static drwav_bool32 drwav__on_seek_stdio(void* pUserData, int offset, drwav_seek_origin origin)
+DRWAV_PRIVATE drwav_bool32 drwav__on_seek_stdio(void* pUserData, int offset, drwav_seek_origin origin)
 {
     return fseek((FILE*)pUserData, offset, (origin == drwav_seek_origin_current) ? SEEK_CUR : SEEK_SET) == 0;
 }
@@ -2965,7 +5170,7 @@ DRWAV_API drwav_bool32 drwav_init_file(drwav* pWav, const char* filename, const
 }
 
 
-static drwav_bool32 drwav_init_file__internal_FILE(drwav* pWav, FILE* pFile, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
+DRWAV_PRIVATE drwav_bool32 drwav_init_file__internal_FILE(drwav* pWav, FILE* pFile, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
 {
     drwav_bool32 result;
 
@@ -2995,6 +5200,7 @@ DRWAV_API drwav_bool32 drwav_init_file_ex(drwav* pWav, const char* filename, drw
     return drwav_init_file__internal_FILE(pWav, pFile, onChunk, pChunkUserData, flags, pAllocationCallbacks);
 }
 
+#ifndef DR_WAV_NO_WCHAR
 DRWAV_API drwav_bool32 drwav_init_file_w(drwav* pWav, const wchar_t* filename, const drwav_allocation_callbacks* pAllocationCallbacks)
 {
     return drwav_init_file_ex_w(pWav, filename, NULL, NULL, 0, pAllocationCallbacks);
@@ -3010,9 +5216,34 @@ DRWAV_API drwav_bool32 drwav_init_file_ex_w(drwav* pWav, const wchar_t* filename
     /* This takes ownership of the FILE* object. */
     return drwav_init_file__internal_FILE(pWav, pFile, onChunk, pChunkUserData, flags, pAllocationCallbacks);
 }
+#endif
+
+DRWAV_API drwav_bool32 drwav_init_file_with_metadata(drwav* pWav, const char* filename, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    FILE* pFile;
+    if (drwav_fopen(&pFile, filename, "rb") != DRWAV_SUCCESS) {
+        return DRWAV_FALSE;
+    }
+
+    /* This takes ownership of the FILE* object. */
+    return drwav_init_file__internal_FILE(pWav, pFile, NULL, NULL, flags | DRWAV_WITH_METADATA, pAllocationCallbacks);
+}
+
+#ifndef DR_WAV_NO_WCHAR
+DRWAV_API drwav_bool32 drwav_init_file_with_metadata_w(drwav* pWav, const wchar_t* filename, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    FILE* pFile;
+    if (drwav_wfopen(&pFile, filename, L"rb", pAllocationCallbacks) != DRWAV_SUCCESS) {
+        return DRWAV_FALSE;
+    }
+
+    /* This takes ownership of the FILE* object. */
+    return drwav_init_file__internal_FILE(pWav, pFile, NULL, NULL, flags | DRWAV_WITH_METADATA, pAllocationCallbacks);
+}
+#endif
 
 
-static drwav_bool32 drwav_init_file_write__internal_FILE(drwav* pWav, FILE* pFile, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, const drwav_allocation_callbacks* pAllocationCallbacks)
+DRWAV_PRIVATE drwav_bool32 drwav_init_file_write__internal_FILE(drwav* pWav, FILE* pFile, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, const drwav_allocation_callbacks* pAllocationCallbacks)
 {
     drwav_bool32 result;
 
@@ -3031,7 +5262,7 @@ static drwav_bool32 drwav_init_file_write__internal_FILE(drwav* pWav, FILE* pFil
     return DRWAV_TRUE;
 }
 
-static drwav_bool32 drwav_init_file_write__internal(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, const drwav_allocation_callbacks* pAllocationCallbacks)
+DRWAV_PRIVATE drwav_bool32 drwav_init_file_write__internal(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, const drwav_allocation_callbacks* pAllocationCallbacks)
 {
     FILE* pFile;
     if (drwav_fopen(&pFile, filename, "wb") != DRWAV_SUCCESS) {
@@ -3042,7 +5273,8 @@ static drwav_bool32 drwav_init_file_write__internal(drwav* pWav, const char* fil
     return drwav_init_file_write__internal_FILE(pWav, pFile, pFormat, totalSampleCount, isSequential, pAllocationCallbacks);
 }
 
-static drwav_bool32 drwav_init_file_write_w__internal(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, const drwav_allocation_callbacks* pAllocationCallbacks)
+#ifndef DR_WAV_NO_WCHAR
+DRWAV_PRIVATE drwav_bool32 drwav_init_file_write_w__internal(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, const drwav_allocation_callbacks* pAllocationCallbacks)
 {
     FILE* pFile;
     if (drwav_wfopen(&pFile, filename, L"wb", pAllocationCallbacks) != DRWAV_SUCCESS) {
@@ -3052,6 +5284,7 @@ static drwav_bool32 drwav_init_file_write_w__internal(drwav* pWav, const wchar_t
     /* This takes ownership of the FILE* object. */
     return drwav_init_file_write__internal_FILE(pWav, pFile, pFormat, totalSampleCount, isSequential, pAllocationCallbacks);
 }
+#endif
 
 DRWAV_API drwav_bool32 drwav_init_file_write(drwav* pWav, const char* filename, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks)
 {
@@ -3072,6 +5305,7 @@ DRWAV_API drwav_bool32 drwav_init_file_write_sequential_pcm_frames(drwav* pWav,
     return drwav_init_file_write_sequential(pWav, filename, pFormat, totalPCMFrameCount*pFormat->channels, pAllocationCallbacks);
 }
 
+#ifndef DR_WAV_NO_WCHAR
 DRWAV_API drwav_bool32 drwav_init_file_write_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks)
 {
     return drwav_init_file_write_w__internal(pWav, filename, pFormat, 0, DRWAV_FALSE, pAllocationCallbacks);
@@ -3090,10 +5324,11 @@ DRWAV_API drwav_bool32 drwav_init_file_write_sequential_pcm_frames_w(drwav* pWav
 
     return drwav_init_file_write_sequential_w(pWav, filename, pFormat, totalPCMFrameCount*pFormat->channels, pAllocationCallbacks);
 }
+#endif
 #endif  /* DR_WAV_NO_STDIO */
 
 
-static size_t drwav__on_read_memory(void* pUserData, void* pBufferOut, size_t bytesToRead)
+DRWAV_PRIVATE size_t drwav__on_read_memory(void* pUserData, void* pBufferOut, size_t bytesToRead)
 {
     drwav* pWav = (drwav*)pUserData;
     size_t bytesRemaining;
@@ -3114,7 +5349,7 @@ static size_t drwav__on_read_memory(void* pUserData, void* pBufferOut, size_t by
     return bytesToRead;
 }
 
-static drwav_bool32 drwav__on_seek_memory(void* pUserData, int offset, drwav_seek_origin origin)
+DRWAV_PRIVATE drwav_bool32 drwav__on_seek_memory(void* pUserData, int offset, drwav_seek_origin origin)
 {
     drwav* pWav = (drwav*)pUserData;
     DRWAV_ASSERT(pWav != NULL);
@@ -3139,11 +5374,11 @@ static drwav_bool32 drwav__on_seek_memory(void* pUserData, int offset, drwav_see
             return DRWAV_FALSE; /* Trying to seek too far forward. */
         }
     }
-    
+
     return DRWAV_TRUE;
 }
 
-static size_t drwav__on_write_memory(void* pUserData, const void* pDataIn, size_t bytesToWrite)
+DRWAV_PRIVATE size_t drwav__on_write_memory(void* pUserData, const void* pDataIn, size_t bytesToWrite)
 {
     drwav* pWav = (drwav*)pUserData;
     size_t bytesRemaining;
@@ -3183,7 +5418,7 @@ static size_t drwav__on_write_memory(void* pUserData, const void* pDataIn, size_
     return bytesToWrite;
 }
 
-static drwav_bool32 drwav__on_seek_memory_write(void* pUserData, int offset, drwav_seek_origin origin)
+DRWAV_PRIVATE drwav_bool32 drwav__on_seek_memory_write(void* pUserData, int offset, drwav_seek_origin origin)
 {
     drwav* pWav = (drwav*)pUserData;
     DRWAV_ASSERT(pWav != NULL);
@@ -3208,7 +5443,7 @@ static drwav_bool32 drwav__on_seek_memory_write(void* pUserData, int offset, drw
             pWav->memoryStreamWrite.currentWritePos = pWav->memoryStreamWrite.dataSize;  /* Trying to seek too far forward. */
         }
     }
-    
+
     return DRWAV_TRUE;
 }
 
@@ -3234,8 +5469,25 @@ DRWAV_API drwav_bool32 drwav_init_memory_ex(drwav* pWav, const void* data, size_
     return drwav_init__internal(pWav, onChunk, pChunkUserData, flags);
 }
 
+DRWAV_API drwav_bool32 drwav_init_memory_with_metadata(drwav* pWav, const void* data, size_t dataSize, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (data == NULL || dataSize == 0) {
+        return DRWAV_FALSE;
+    }
+
+    if (!drwav_preinit(pWav, drwav__on_read_memory, drwav__on_seek_memory, pWav, pAllocationCallbacks)) {
+        return DRWAV_FALSE;
+    }
+
+    pWav->memoryStream.data = (const drwav_uint8*)data;
+    pWav->memoryStream.dataSize = dataSize;
+    pWav->memoryStream.currentReadPos = 0;
+
+    return drwav_init__internal(pWav, NULL, NULL, flags | DRWAV_WITH_METADATA);
+}
+
 
-static drwav_bool32 drwav_init_memory_write__internal(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, const drwav_allocation_callbacks* pAllocationCallbacks)
+DRWAV_PRIVATE drwav_bool32 drwav_init_memory_write__internal(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, const drwav_allocation_callbacks* pAllocationCallbacks)
 {
     if (ppData == NULL || pDataSize == NULL) {
         return DRWAV_FALSE;
@@ -3295,15 +5547,15 @@ DRWAV_API drwav_result drwav_uninit(drwav* pWav)
         drwav_uint32 paddingSize = 0;
 
         /* Padding. Do not adjust pWav->dataChunkDataSize - this should not include the padding. */
-        if (pWav->container == drwav_container_riff) {
+        if (pWav->container == drwav_container_riff || pWav->container == drwav_container_rf64) {
             paddingSize = drwav__chunk_padding_size_riff(pWav->dataChunkDataSize);
         } else {
             paddingSize = drwav__chunk_padding_size_w64(pWav->dataChunkDataSize);
         }
-        
+
         if (paddingSize > 0) {
             drwav_uint64 paddingData = 0;
-            pWav->onWrite(pWav->pUserData, &paddingData, paddingSize);
+            drwav__write(pWav, &paddingData, paddingSize);  /* Byte order does not matter for this. */
         }
 
         /*
@@ -3314,26 +5566,41 @@ DRWAV_API drwav_result drwav_uninit(drwav* pWav)
             if (pWav->container == drwav_container_riff) {
                 /* The "RIFF" chunk size. */
                 if (pWav->onSeek(pWav->pUserData, 4, drwav_seek_origin_start)) {
-                    drwav_uint32 riffChunkSize = drwav__riff_chunk_size_riff(pWav->dataChunkDataSize);
-                    pWav->onWrite(pWav->pUserData, &riffChunkSize, 4);
+                    drwav_uint32 riffChunkSize = drwav__riff_chunk_size_riff(pWav->dataChunkDataSize, pWav->pMetadata, pWav->metadataCount);
+                    drwav__write_u32ne_to_le(pWav, riffChunkSize);
                 }
 
-                /* the "data" chunk size. */
-                if (pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos + 4, drwav_seek_origin_start)) {
+                /* The "data" chunk size. */
+                if (pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos - 4, drwav_seek_origin_start)) {
                     drwav_uint32 dataChunkSize = drwav__data_chunk_size_riff(pWav->dataChunkDataSize);
-                    pWav->onWrite(pWav->pUserData, &dataChunkSize, 4);
+                    drwav__write_u32ne_to_le(pWav, dataChunkSize);
                 }
-            } else {
+            } else if (pWav->container == drwav_container_w64) {
                 /* The "RIFF" chunk size. */
                 if (pWav->onSeek(pWav->pUserData, 16, drwav_seek_origin_start)) {
                     drwav_uint64 riffChunkSize = drwav__riff_chunk_size_w64(pWav->dataChunkDataSize);
-                    pWav->onWrite(pWav->pUserData, &riffChunkSize, 8);
+                    drwav__write_u64ne_to_le(pWav, riffChunkSize);
                 }
 
                 /* The "data" chunk size. */
-                if (pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos + 16, drwav_seek_origin_start)) {
+                if (pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos - 8, drwav_seek_origin_start)) {
                     drwav_uint64 dataChunkSize = drwav__data_chunk_size_w64(pWav->dataChunkDataSize);
-                    pWav->onWrite(pWav->pUserData, &dataChunkSize, 8);
+                    drwav__write_u64ne_to_le(pWav, dataChunkSize);
+                }
+            } else if (pWav->container == drwav_container_rf64) {
+                /* We only need to update the ds64 chunk. The "RIFF" and "data" chunks always have their sizes set to 0xFFFFFFFF for RF64. */
+                int ds64BodyPos = 12 + 8;
+
+                /* The "RIFF" chunk size. */
+                if (pWav->onSeek(pWav->pUserData, ds64BodyPos + 0, drwav_seek_origin_start)) {
+                    drwav_uint64 riffChunkSize = drwav__riff_chunk_size_rf64(pWav->dataChunkDataSize, pWav->pMetadata, pWav->metadataCount);
+                    drwav__write_u64ne_to_le(pWav, riffChunkSize);
+                }
+
+                /* The "data" chunk size. */
+                if (pWav->onSeek(pWav->pUserData, ds64BodyPos + 8, drwav_seek_origin_start)) {
+                    drwav_uint64 dataChunkSize = drwav__data_chunk_size_rf64(pWav->dataChunkDataSize);
+                    drwav__write_u64ne_to_le(pWav, dataChunkSize);
                 }
             }
         }
@@ -3344,6 +5611,8 @@ DRWAV_API drwav_result drwav_uninit(drwav* pWav)
                 result = DRWAV_INVALID_FILE;
             }
         }
+    } else {
+        drwav_free(pWav->pMetadata, &pWav->allocationCallbacks);
     }
 
 #ifndef DR_WAV_NO_STDIO
@@ -3364,16 +5633,62 @@ DRWAV_API drwav_result drwav_uninit(drwav* pWav)
 DRWAV_API size_t drwav_read_raw(drwav* pWav, size_t bytesToRead, void* pBufferOut)
 {
     size_t bytesRead;
+    drwav_uint32 bytesPerFrame;
 
-    if (pWav == NULL || bytesToRead == 0 || pBufferOut == NULL) {
-        return 0;
+    if (pWav == NULL || bytesToRead == 0) {
+        return 0;   /* Invalid args. */
     }
 
     if (bytesToRead > pWav->bytesRemaining) {
         bytesToRead = (size_t)pWav->bytesRemaining;
     }
 
-    bytesRead = pWav->onRead(pWav->pUserData, pBufferOut, bytesToRead);
+    if (bytesToRead == 0) {
+        return 0;   /* At end. */
+    }
+
+    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;   /* Could not determine the bytes per frame. */
+    }
+
+    if (pBufferOut != NULL) {
+        bytesRead = pWav->onRead(pWav->pUserData, pBufferOut, bytesToRead);
+    } else {
+        /* We need to seek. If we fail, we need to read-and-discard to make sure we get a good byte count. */
+        bytesRead = 0;
+        while (bytesRead < bytesToRead) {
+            size_t bytesToSeek = (bytesToRead - bytesRead);
+            if (bytesToSeek > 0x7FFFFFFF) {
+                bytesToSeek = 0x7FFFFFFF;
+            }
+
+            if (pWav->onSeek(pWav->pUserData, (int)bytesToSeek, drwav_seek_origin_current) == DRWAV_FALSE) {
+                break;
+            }
+
+            bytesRead += bytesToSeek;
+        }
+
+        /* When we get here we may need to read-and-discard some data. */
+        while (bytesRead < bytesToRead) {
+            drwav_uint8 buffer[4096];
+            size_t bytesSeeked;
+            size_t bytesToSeek = (bytesToRead - bytesRead);
+            if (bytesToSeek > sizeof(buffer)) {
+                bytesToSeek = sizeof(buffer);
+            }
+
+            bytesSeeked = pWav->onRead(pWav->pUserData, buffer, bytesToSeek);
+            bytesRead += bytesSeeked;
+
+            if (bytesSeeked < bytesToSeek) {
+                break;  /* Reached the end. */
+            }
+        }
+    }
+
+    pWav->readCursorInPCMFrames += bytesRead / bytesPerFrame;
 
     pWav->bytesRemaining -= bytesRead;
     return bytesRead;
@@ -3384,8 +5699,10 @@ DRWAV_API size_t drwav_read_raw(drwav* pWav, size_t bytesToRead, void* pBufferOu
 DRWAV_API drwav_uint64 drwav_read_pcm_frames_le(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut)
 {
     drwav_uint32 bytesPerFrame;
+    drwav_uint64 bytesToRead;   /* Intentionally uint64 instead of size_t so we can do a check that we're not reading too much on 32-bit builds. */
+    drwav_uint64 framesRemainingInFile;
 
-    if (pWav == NULL || framesToRead == 0 || pBufferOut == NULL) {
+    if (pWav == NULL || framesToRead == 0) {
         return 0;
     }
 
@@ -3394,39 +5711,100 @@ DRWAV_API drwav_uint64 drwav_read_pcm_frames_le(drwav* pWav, drwav_uint64 frames
         return 0;
     }
 
+    framesRemainingInFile = pWav->totalPCMFrameCount - pWav->readCursorInPCMFrames;
+    if (framesToRead > framesRemainingInFile) {
+        framesToRead = framesRemainingInFile;
+    }
+
     bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
     if (bytesPerFrame == 0) {
         return 0;
     }
 
     /* Don't try to read more samples than can potentially fit in the output buffer. */
-    if (framesToRead * bytesPerFrame > DRWAV_SIZE_MAX) {
-        framesToRead = DRWAV_SIZE_MAX / bytesPerFrame;
+    bytesToRead = framesToRead * bytesPerFrame;
+    if (bytesToRead > DRWAV_SIZE_MAX) {
+        bytesToRead = (DRWAV_SIZE_MAX / bytesPerFrame) * bytesPerFrame; /* Round the number of bytes to read to a clean frame boundary. */
+    }
+
+    /*
+    Doing an explicit check here just to make it clear that we don't want to be attempt to read anything if there's no bytes to read. There
+    *could* be a time where it evaluates to 0 due to overflowing.
+    */
+    if (bytesToRead == 0) {
+        return 0;
     }
 
-    return drwav_read_raw(pWav, (size_t)(framesToRead * bytesPerFrame), pBufferOut) / bytesPerFrame;
+    return drwav_read_raw(pWav, (size_t)bytesToRead, pBufferOut) / bytesPerFrame;
 }
 
 DRWAV_API drwav_uint64 drwav_read_pcm_frames_be(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut)
 {
     drwav_uint64 framesRead = drwav_read_pcm_frames_le(pWav, framesToRead, pBufferOut);
-    drwav__bswap_samples(pBufferOut, framesRead*pWav->channels, drwav_get_bytes_per_pcm_frame(pWav)/pWav->channels, pWav->translatedFormatTag);
+
+    if (pBufferOut != NULL) {
+        drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+        if (bytesPerFrame == 0) {
+            return 0;   /* Could not get the bytes per frame which means bytes per sample cannot be determined and we don't know how to byte swap. */
+        }
+
+        drwav__bswap_samples(pBufferOut, framesRead*pWav->channels, bytesPerFrame/pWav->channels);
+    }
 
     return framesRead;
 }
 
 DRWAV_API drwav_uint64 drwav_read_pcm_frames(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut)
 {
+    drwav_uint64 framesRead = 0;
+
+    if (drwav_is_container_be(pWav->container)) {
+        /*
+        Special case for AIFF. AIFF is a big-endian encoded format, but it supports a format that is
+        PCM in little-endian encoding. In this case, we fall through this branch and treate it as
+        little-endian.
+        */
+        if (pWav->container != drwav_container_aiff || pWav->aiff.isLE == DRWAV_FALSE) {
+            if (drwav__is_little_endian()) {
+                framesRead = drwav_read_pcm_frames_be(pWav, framesToRead, pBufferOut);
+            } else {
+                framesRead = drwav_read_pcm_frames_le(pWav, framesToRead, pBufferOut);
+            }
+
+            goto post_process;
+        }
+    }
+
+    /* Getting here means the data should be considered little-endian. */
     if (drwav__is_little_endian()) {
-        return drwav_read_pcm_frames_le(pWav, framesToRead, pBufferOut);
+        framesRead = drwav_read_pcm_frames_le(pWav, framesToRead, pBufferOut);
     } else {
-        return drwav_read_pcm_frames_be(pWav, framesToRead, pBufferOut);
+        framesRead = drwav_read_pcm_frames_be(pWav, framesToRead, pBufferOut);
     }
+
+    /*
+    Here is where we check if we need to do a signed/unsigned conversion for AIFF. The reason we need to do this
+    is because dr_wav always assumes an 8-bit sample is unsigned, whereas AIFF can have signed 8-bit formats.
+    */
+    post_process:
+    {
+        if (pWav->container == drwav_container_aiff && pWav->bitsPerSample == 8 && pWav->aiff.isUnsigned == DRWAV_FALSE) {
+            if (pBufferOut != NULL) {
+                drwav_uint64 iSample;
+
+                for (iSample = 0; iSample < framesRead * pWav->channels; iSample += 1) {
+                    ((drwav_uint8*)pBufferOut)[iSample] += 128;
+                }
+            }
+        }
+    }
+
+    return framesRead;
 }
 
 
 
-DRWAV_API drwav_bool32 drwav_seek_to_first_pcm_frame(drwav* pWav)
+DRWAV_PRIVATE drwav_bool32 drwav_seek_to_first_pcm_frame(drwav* pWav)
 {
     if (pWav->onWrite != NULL) {
         return DRWAV_FALSE; /* No seeking in write mode. */
@@ -3437,10 +5815,19 @@ DRWAV_API drwav_bool32 drwav_seek_to_first_pcm_frame(drwav* pWav)
     }
 
     if (drwav__is_compressed_format_tag(pWav->translatedFormatTag)) {
-        pWav->compressed.iCurrentPCMFrame = 0;
+        /* Cached data needs to be cleared for compressed formats. */
+        if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
+            DRWAV_ZERO_OBJECT(&pWav->msadpcm);
+        } else if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
+            DRWAV_ZERO_OBJECT(&pWav->ima);
+        } else {
+            DRWAV_ASSERT(DRWAV_FALSE);  /* If this assertion is triggered it means I've implemented a new compressed format but forgot to add a branch for it here. */
+        }
     }
-    
+
+    pWav->readCursorInPCMFrames = 0;
     pWav->bytesRemaining = pWav->dataChunkDataSize;
+
     return DRWAV_TRUE;
 }
 
@@ -3463,8 +5850,8 @@ DRWAV_API drwav_bool32 drwav_seek_to_pcm_frame(drwav* pWav, drwav_uint64 targetF
     }
 
     /* Make sure the sample is clamped. */
-    if (targetFrameIndex >= pWav->totalPCMFrameCount) {
-        targetFrameIndex  = pWav->totalPCMFrameCount - 1;
+    if (targetFrameIndex > pWav->totalPCMFrameCount) {
+        targetFrameIndex = pWav->totalPCMFrameCount;
     }
 
     /*
@@ -3473,19 +5860,19 @@ DRWAV_API drwav_bool32 drwav_seek_to_pcm_frame(drwav* pWav, drwav_uint64 targetF
     */
     if (drwav__is_compressed_format_tag(pWav->translatedFormatTag)) {
         /* TODO: This can be optimized. */
-        
+
         /*
         If we're seeking forward it's simple - just keep reading samples until we hit the sample we're requesting. If we're seeking backwards,
         we first need to seek back to the start and then just do the same thing as a forward seek.
         */
-        if (targetFrameIndex < pWav->compressed.iCurrentPCMFrame) {
+        if (targetFrameIndex < pWav->readCursorInPCMFrames) {
             if (!drwav_seek_to_first_pcm_frame(pWav)) {
                 return DRWAV_FALSE;
             }
         }
 
-        if (targetFrameIndex > pWav->compressed.iCurrentPCMFrame) {
-            drwav_uint64 offsetInFrames = targetFrameIndex - pWav->compressed.iCurrentPCMFrame;
+        if (targetFrameIndex > pWav->readCursorInPCMFrames) {
+            drwav_uint64 offsetInFrames = targetFrameIndex - pWav->readCursorInPCMFrames;
 
             drwav_int16 devnull[2048];
             while (offsetInFrames > 0) {
@@ -3515,12 +5902,18 @@ DRWAV_API drwav_bool32 drwav_seek_to_pcm_frame(drwav* pWav, drwav_uint64 targetF
         drwav_uint64 currentBytePos;
         drwav_uint64 targetBytePos;
         drwav_uint64 offset;
+        drwav_uint32 bytesPerFrame;
+
+        bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+        if (bytesPerFrame == 0) {
+            return DRWAV_FALSE; /* Not able to calculate offset. */
+        }
 
-        totalSizeInBytes = pWav->totalPCMFrameCount * drwav_get_bytes_per_pcm_frame(pWav);
-        DRWAV_ASSERT(totalSizeInBytes >= pWav->bytesRemaining);
+        totalSizeInBytes = pWav->totalPCMFrameCount * bytesPerFrame;
+        /*DRWAV_ASSERT(totalSizeInBytes >= pWav->bytesRemaining);*/
 
         currentBytePos = totalSizeInBytes - pWav->bytesRemaining;
-        targetBytePos  = targetFrameIndex * drwav_get_bytes_per_pcm_frame(pWav);
+        targetBytePos  = targetFrameIndex * bytesPerFrame;
 
         if (currentBytePos < targetBytePos) {
             /* Offset forwards. */
@@ -3539,14 +5932,49 @@ DRWAV_API drwav_bool32 drwav_seek_to_pcm_frame(drwav* pWav, drwav_uint64 targetF
                 return DRWAV_FALSE;
             }
 
-            pWav->bytesRemaining -= offset32;
-            offset -= offset32;
+            pWav->readCursorInPCMFrames += offset32 / bytesPerFrame;
+            pWav->bytesRemaining        -= offset32;
+            offset                      -= offset32;
         }
     }
 
     return DRWAV_TRUE;
 }
 
+DRWAV_API drwav_result drwav_get_cursor_in_pcm_frames(drwav* pWav, drwav_uint64* pCursor)
+{
+    if (pCursor == NULL) {
+        return DRWAV_INVALID_ARGS;
+    }
+
+    *pCursor = 0;   /* Safety. */
+
+    if (pWav == NULL) {
+        return DRWAV_INVALID_ARGS;
+    }
+
+    *pCursor = pWav->readCursorInPCMFrames;
+
+    return DRWAV_SUCCESS;
+}
+
+DRWAV_API drwav_result drwav_get_length_in_pcm_frames(drwav* pWav, drwav_uint64* pLength)
+{
+    if (pLength == NULL) {
+        return DRWAV_INVALID_ARGS;
+    }
+
+    *pLength = 0;   /* Safety. */
+
+    if (pWav == NULL) {
+        return DRWAV_INVALID_ARGS;
+    }
+
+    *pLength = pWav->totalPCMFrameCount;
+
+    return DRWAV_SUCCESS;
+}
+
 
 DRWAV_API size_t drwav_write_raw(drwav* pWav, size_t bytesToWrite, const void* pData)
 {
@@ -3562,7 +5990,6 @@ DRWAV_API size_t drwav_write_raw(drwav* pWav, size_t bytesToWrite, const void* p
     return bytesWritten;
 }
 
-
 DRWAV_API drwav_uint64 drwav_write_pcm_frames_le(drwav* pWav, drwav_uint64 framesToWrite, const void* pData)
 {
     drwav_uint64 bytesToWrite;
@@ -3621,7 +6048,10 @@ DRWAV_API drwav_uint64 drwav_write_pcm_frames_be(drwav* pWav, drwav_uint64 frame
     pRunningData = (const drwav_uint8*)pData;
 
     bytesPerSample = drwav_get_bytes_per_pcm_frame(pWav) / pWav->channels;
-    
+    if (bytesPerSample == 0) {
+        return 0;   /* Cannot determine bytes per sample, or bytes per sample is less than one byte. */
+    }
+
     while (bytesToWrite > 0) {
         drwav_uint8 temp[4096];
         drwav_uint32 sampleCount;
@@ -3642,7 +6072,7 @@ DRWAV_API drwav_uint64 drwav_write_pcm_frames_be(drwav* pWav, drwav_uint64 frame
         }
 
         DRWAV_COPY_MEMORY(temp, pRunningData, (size_t)bytesToWriteThisIteration);
-        drwav__bswap_samples(temp, sampleCount, bytesPerSample, pWav->translatedFormatTag);
+        drwav__bswap_samples(temp, sampleCount, bytesPerSample);
 
         bytesJustWritten = drwav_write_raw(pWav, (size_t)bytesToWriteThisIteration, temp);
         if (bytesJustWritten == 0) {
@@ -3667,17 +6097,25 @@ DRWAV_API drwav_uint64 drwav_write_pcm_frames(drwav* pWav, drwav_uint64 framesTo
 }
 
 
-static drwav_uint64 drwav_read_pcm_frames_s16__msadpcm(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_s16__msadpcm(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
 {
     drwav_uint64 totalFramesRead = 0;
 
+    static drwav_int32 adaptationTable[] = {
+        230, 230, 230, 230, 307, 409, 512, 614,
+        768, 614, 512, 409, 307, 230, 230, 230
+    };
+    static drwav_int32 coeff1Table[] = { 256, 512, 0, 192, 240, 460,  392 };
+    static drwav_int32 coeff2Table[] = { 0,  -256, 0, 64,  0,  -208, -232 };
+
     DRWAV_ASSERT(pWav != NULL);
     DRWAV_ASSERT(framesToRead > 0);
-    DRWAV_ASSERT(pBufferOut != NULL);
 
     /* TODO: Lots of room for optimization here. */
 
-    while (framesToRead > 0 && pWav->compressed.iCurrentPCMFrame < pWav->totalPCMFrameCount) {
+    while (pWav->readCursorInPCMFrames < pWav->totalPCMFrameCount) {
+        DRWAV_ASSERT(framesToRead > 0); /* This loop iteration will never get hit with framesToRead == 0 because it's asserted at the top, and we check for 0 inside the loop just below. */
+
         /* If there are no cached frames we need to load a new block. */
         if (pWav->msadpcm.cachedFrameCount == 0 && pWav->msadpcm.bytesRemainingInBlock == 0) {
             if (pWav->channels == 1) {
@@ -3689,12 +6127,17 @@ static drwav_uint64 drwav_read_pcm_frames_s16__msadpcm(drwav* pWav, drwav_uint64
                 pWav->msadpcm.bytesRemainingInBlock = pWav->fmt.blockAlign - sizeof(header);
 
                 pWav->msadpcm.predictor[0]     = header[0];
-                pWav->msadpcm.delta[0]         = drwav__bytes_to_s16(header + 1);
-                pWav->msadpcm.prevFrames[0][1] = (drwav_int32)drwav__bytes_to_s16(header + 3);
-                pWav->msadpcm.prevFrames[0][0] = (drwav_int32)drwav__bytes_to_s16(header + 5);
+                pWav->msadpcm.delta[0]         = drwav_bytes_to_s16(header + 1);
+                pWav->msadpcm.prevFrames[0][1] = (drwav_int32)drwav_bytes_to_s16(header + 3);
+                pWav->msadpcm.prevFrames[0][0] = (drwav_int32)drwav_bytes_to_s16(header + 5);
                 pWav->msadpcm.cachedFrames[2]  = pWav->msadpcm.prevFrames[0][0];
                 pWav->msadpcm.cachedFrames[3]  = pWav->msadpcm.prevFrames[0][1];
                 pWav->msadpcm.cachedFrameCount = 2;
+
+                /* The predictor is used as an index into coeff1Table so we'll need to validate to ensure it never overflows. */
+                if (pWav->msadpcm.predictor[0] >= drwav_countof(coeff1Table)) {
+                    return totalFramesRead; /* Invalid file. */
+                }
             } else {
                 /* Stereo. */
                 drwav_uint8 header[14];
@@ -3705,37 +6148,45 @@ static drwav_uint64 drwav_read_pcm_frames_s16__msadpcm(drwav* pWav, drwav_uint64
 
                 pWav->msadpcm.predictor[0] = header[0];
                 pWav->msadpcm.predictor[1] = header[1];
-                pWav->msadpcm.delta[0] = drwav__bytes_to_s16(header + 2);
-                pWav->msadpcm.delta[1] = drwav__bytes_to_s16(header + 4);
-                pWav->msadpcm.prevFrames[0][1] = (drwav_int32)drwav__bytes_to_s16(header + 6);
-                pWav->msadpcm.prevFrames[1][1] = (drwav_int32)drwav__bytes_to_s16(header + 8);
-                pWav->msadpcm.prevFrames[0][0] = (drwav_int32)drwav__bytes_to_s16(header + 10);
-                pWav->msadpcm.prevFrames[1][0] = (drwav_int32)drwav__bytes_to_s16(header + 12);
+                pWav->msadpcm.delta[0] = drwav_bytes_to_s16(header + 2);
+                pWav->msadpcm.delta[1] = drwav_bytes_to_s16(header + 4);
+                pWav->msadpcm.prevFrames[0][1] = (drwav_int32)drwav_bytes_to_s16(header + 6);
+                pWav->msadpcm.prevFrames[1][1] = (drwav_int32)drwav_bytes_to_s16(header + 8);
+                pWav->msadpcm.prevFrames[0][0] = (drwav_int32)drwav_bytes_to_s16(header + 10);
+                pWav->msadpcm.prevFrames[1][0] = (drwav_int32)drwav_bytes_to_s16(header + 12);
 
                 pWav->msadpcm.cachedFrames[0] = pWav->msadpcm.prevFrames[0][0];
                 pWav->msadpcm.cachedFrames[1] = pWav->msadpcm.prevFrames[1][0];
                 pWav->msadpcm.cachedFrames[2] = pWav->msadpcm.prevFrames[0][1];
                 pWav->msadpcm.cachedFrames[3] = pWav->msadpcm.prevFrames[1][1];
                 pWav->msadpcm.cachedFrameCount = 2;
+
+                /* The predictor is used as an index into coeff1Table so we'll need to validate to ensure it never overflows. */
+                if (pWav->msadpcm.predictor[0] >= drwav_countof(coeff1Table) || pWav->msadpcm.predictor[1] >= drwav_countof(coeff2Table)) {
+                    return totalFramesRead; /* Invalid file. */
+                }
             }
         }
 
         /* Output anything that's cached. */
-        while (framesToRead > 0 && pWav->msadpcm.cachedFrameCount > 0 && pWav->compressed.iCurrentPCMFrame < pWav->totalPCMFrameCount) {
-            drwav_uint32 iSample = 0;
-            for (iSample = 0; iSample < pWav->channels; iSample += 1) {
-                pBufferOut[iSample] = (drwav_int16)pWav->msadpcm.cachedFrames[(drwav_countof(pWav->msadpcm.cachedFrames) - (pWav->msadpcm.cachedFrameCount*pWav->channels)) + iSample];
+        while (framesToRead > 0 && pWav->msadpcm.cachedFrameCount > 0 && pWav->readCursorInPCMFrames < pWav->totalPCMFrameCount) {
+            if (pBufferOut != NULL) {
+                drwav_uint32 iSample = 0;
+                for (iSample = 0; iSample < pWav->channels; iSample += 1) {
+                    pBufferOut[iSample] = (drwav_int16)pWav->msadpcm.cachedFrames[(drwav_countof(pWav->msadpcm.cachedFrames) - (pWav->msadpcm.cachedFrameCount*pWav->channels)) + iSample];
+                }
+
+                pBufferOut += pWav->channels;
             }
 
-            pBufferOut      += pWav->channels;
             framesToRead    -= 1;
             totalFramesRead += 1;
-            pWav->compressed.iCurrentPCMFrame += 1;
+            pWav->readCursorInPCMFrames += 1;
             pWav->msadpcm.cachedFrameCount -= 1;
         }
 
         if (framesToRead == 0) {
-            return totalFramesRead;
+            break;
         }
 
 
@@ -3747,13 +6198,6 @@ static drwav_uint64 drwav_read_pcm_frames_s16__msadpcm(drwav* pWav, drwav_uint64
             if (pWav->msadpcm.bytesRemainingInBlock == 0) {
                 continue;
             } else {
-                static drwav_int32 adaptationTable[] = { 
-                    230, 230, 230, 230, 307, 409, 512, 614, 
-                    768, 614, 512, 409, 307, 230, 230, 230 
-                };
-                static drwav_int32 coeff1Table[] = { 256, 512, 0, 192, 240, 460,  392 };
-                static drwav_int32 coeff2Table[] = { 0,  -256, 0, 64,  0,  -208, -232 };
-
                 drwav_uint8 nibbles;
                 drwav_int32 nibble0;
                 drwav_int32 nibble1;
@@ -3845,17 +6289,36 @@ static drwav_uint64 drwav_read_pcm_frames_s16__msadpcm(drwav* pWav, drwav_uint64
 }
 
 
-static drwav_uint64 drwav_read_pcm_frames_s16__ima(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_s16__ima(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
 {
     drwav_uint64 totalFramesRead = 0;
+    drwav_uint32 iChannel;
+
+    static drwav_int32 indexTable[16] = {
+        -1, -1, -1, -1, 2, 4, 6, 8,
+        -1, -1, -1, -1, 2, 4, 6, 8
+    };
+
+    static drwav_int32 stepTable[89] = {
+        7,     8,     9,     10,    11,    12,    13,    14,    16,    17,
+        19,    21,    23,    25,    28,    31,    34,    37,    41,    45,
+        50,    55,    60,    66,    73,    80,    88,    97,    107,   118,
+        130,   143,   157,   173,   190,   209,   230,   253,   279,   307,
+        337,   371,   408,   449,   494,   544,   598,   658,   724,   796,
+        876,   963,   1060,  1166,  1282,  1411,  1552,  1707,  1878,  2066,
+        2272,  2499,  2749,  3024,  3327,  3660,  4026,  4428,  4871,  5358,
+        5894,  6484,  7132,  7845,  8630,  9493,  10442, 11487, 12635, 13899,
+        15289, 16818, 18500, 20350, 22385, 24623, 27086, 29794, 32767
+    };
 
     DRWAV_ASSERT(pWav != NULL);
     DRWAV_ASSERT(framesToRead > 0);
-    DRWAV_ASSERT(pBufferOut != NULL);
 
     /* TODO: Lots of room for optimization here. */
 
-    while (framesToRead > 0 && pWav->compressed.iCurrentPCMFrame < pWav->totalPCMFrameCount) {
+    while (pWav->readCursorInPCMFrames < pWav->totalPCMFrameCount) {
+        DRWAV_ASSERT(framesToRead > 0); /* This loop iteration will never get hit with framesToRead == 0 because it's asserted at the top, and we check for 0 inside the loop just below. */
+
         /* If there are no cached samples we need to load a new block. */
         if (pWav->ima.cachedFrameCount == 0 && pWav->ima.bytesRemainingInBlock == 0) {
             if (pWav->channels == 1) {
@@ -3866,8 +6329,14 @@ static drwav_uint64 drwav_read_pcm_frames_s16__ima(drwav* pWav, drwav_uint64 fra
                 }
                 pWav->ima.bytesRemainingInBlock = pWav->fmt.blockAlign - sizeof(header);
 
-                pWav->ima.predictor[0] = drwav__bytes_to_s16(header + 0);
-                pWav->ima.stepIndex[0] = header[2];
+                if (header[2] >= drwav_countof(stepTable)) {
+                    pWav->onSeek(pWav->pUserData, pWav->ima.bytesRemainingInBlock, drwav_seek_origin_current);
+                    pWav->ima.bytesRemainingInBlock = 0;
+                    return totalFramesRead; /* Invalid data. */
+                }
+
+                pWav->ima.predictor[0] = (drwav_int16)drwav_bytes_to_u16(header + 0);
+                pWav->ima.stepIndex[0] = drwav_clamp(header[2], 0, (drwav_int32)drwav_countof(stepTable)-1);    /* Clamp not necessary because we checked above, but adding here to silence a static analysis warning. */
                 pWav->ima.cachedFrames[drwav_countof(pWav->ima.cachedFrames) - 1] = pWav->ima.predictor[0];
                 pWav->ima.cachedFrameCount = 1;
             } else {
@@ -3878,10 +6347,16 @@ static drwav_uint64 drwav_read_pcm_frames_s16__ima(drwav* pWav, drwav_uint64 fra
                 }
                 pWav->ima.bytesRemainingInBlock = pWav->fmt.blockAlign - sizeof(header);
 
-                pWav->ima.predictor[0] = drwav__bytes_to_s16(header + 0);
-                pWav->ima.stepIndex[0] = header[2];
-                pWav->ima.predictor[1] = drwav__bytes_to_s16(header + 4);
-                pWav->ima.stepIndex[1] = header[6];
+                if (header[2] >= drwav_countof(stepTable) || header[6] >= drwav_countof(stepTable)) {
+                    pWav->onSeek(pWav->pUserData, pWav->ima.bytesRemainingInBlock, drwav_seek_origin_current);
+                    pWav->ima.bytesRemainingInBlock = 0;
+                    return totalFramesRead; /* Invalid data. */
+                }
+
+                pWav->ima.predictor[0] = drwav_bytes_to_s16(header + 0);
+                pWav->ima.stepIndex[0] = drwav_clamp(header[2], 0, (drwav_int32)drwav_countof(stepTable)-1);    /* Clamp not necessary because we checked above, but adding here to silence a static analysis warning. */
+                pWav->ima.predictor[1] = drwav_bytes_to_s16(header + 4);
+                pWav->ima.stepIndex[1] = drwav_clamp(header[6], 0, (drwav_int32)drwav_countof(stepTable)-1);    /* Clamp not necessary because we checked above, but adding here to silence a static analysis warning. */
 
                 pWav->ima.cachedFrames[drwav_countof(pWav->ima.cachedFrames) - 2] = pWav->ima.predictor[0];
                 pWav->ima.cachedFrames[drwav_countof(pWav->ima.cachedFrames) - 1] = pWav->ima.predictor[1];
@@ -3890,21 +6365,23 @@ static drwav_uint64 drwav_read_pcm_frames_s16__ima(drwav* pWav, drwav_uint64 fra
         }
 
         /* Output anything that's cached. */
-        while (framesToRead > 0 && pWav->ima.cachedFrameCount > 0 && pWav->compressed.iCurrentPCMFrame < pWav->totalPCMFrameCount) {
-            drwav_uint32 iSample;
-            for (iSample = 0; iSample < pWav->channels; iSample += 1) {
-                pBufferOut[iSample] = (drwav_int16)pWav->ima.cachedFrames[(drwav_countof(pWav->ima.cachedFrames) - (pWav->ima.cachedFrameCount*pWav->channels)) + iSample];
+        while (framesToRead > 0 && pWav->ima.cachedFrameCount > 0 && pWav->readCursorInPCMFrames < pWav->totalPCMFrameCount) {
+            if (pBufferOut != NULL) {
+                drwav_uint32 iSample;
+                for (iSample = 0; iSample < pWav->channels; iSample += 1) {
+                    pBufferOut[iSample] = (drwav_int16)pWav->ima.cachedFrames[(drwav_countof(pWav->ima.cachedFrames) - (pWav->ima.cachedFrameCount*pWav->channels)) + iSample];
+                }
+                pBufferOut += pWav->channels;
             }
 
-            pBufferOut      += pWav->channels;
             framesToRead    -= 1;
             totalFramesRead += 1;
-            pWav->compressed.iCurrentPCMFrame += 1;
+            pWav->readCursorInPCMFrames += 1;
             pWav->ima.cachedFrameCount -= 1;
         }
 
         if (framesToRead == 0) {
-            return totalFramesRead;
+            break;
         }
 
         /*
@@ -3915,25 +6392,6 @@ static drwav_uint64 drwav_read_pcm_frames_s16__ima(drwav* pWav, drwav_uint64 fra
             if (pWav->ima.bytesRemainingInBlock == 0) {
                 continue;
             } else {
-                static drwav_int32 indexTable[16] = {
-                    -1, -1, -1, -1, 2, 4, 6, 8,
-                    -1, -1, -1, -1, 2, 4, 6, 8
-                };
-
-                static drwav_int32 stepTable[89] = {
-                    7,     8,     9,     10,    11,    12,    13,    14,    16,    17, 
-                    19,    21,    23,    25,    28,    31,    34,    37,    41,    45, 
-                    50,    55,    60,    66,    73,    80,    88,    97,    107,   118, 
-                    130,   143,   157,   173,   190,   209,   230,   253,   279,   307,
-                    337,   371,   408,   449,   494,   544,   598,   658,   724,   796,
-                    876,   963,   1060,  1166,  1282,  1411,  1552,  1707,  1878,  2066, 
-                    2272,  2499,  2749,  3024,  3327,  3660,  4026,  4428,  4871,  5358,
-                    5894,  6484,  7132,  7845,  8630,  9493,  10442, 11487, 12635, 13899, 
-                    15289, 16818, 18500, 20350, 22385, 24623, 27086, 29794, 32767 
-                };
-
-                drwav_uint32 iChannel;
-
                 /*
                 From what I can tell with stereo streams, it looks like every 4 bytes (8 samples) is for one channel. So it goes 4 bytes for the
                 left channel, 4 bytes for the right channel.
@@ -3992,40 +6450,40 @@ static drwav_uint64 drwav_read_pcm_frames_s16__ima(drwav* pWav, drwav_uint64 fra
 
 #ifndef DR_WAV_NO_CONVERSION_API
 static unsigned short g_drwavAlawTable[256] = {
-    0xEA80, 0xEB80, 0xE880, 0xE980, 0xEE80, 0xEF80, 0xEC80, 0xED80, 0xE280, 0xE380, 0xE080, 0xE180, 0xE680, 0xE780, 0xE480, 0xE580, 
-    0xF540, 0xF5C0, 0xF440, 0xF4C0, 0xF740, 0xF7C0, 0xF640, 0xF6C0, 0xF140, 0xF1C0, 0xF040, 0xF0C0, 0xF340, 0xF3C0, 0xF240, 0xF2C0, 
-    0xAA00, 0xAE00, 0xA200, 0xA600, 0xBA00, 0xBE00, 0xB200, 0xB600, 0x8A00, 0x8E00, 0x8200, 0x8600, 0x9A00, 0x9E00, 0x9200, 0x9600, 
-    0xD500, 0xD700, 0xD100, 0xD300, 0xDD00, 0xDF00, 0xD900, 0xDB00, 0xC500, 0xC700, 0xC100, 0xC300, 0xCD00, 0xCF00, 0xC900, 0xCB00, 
-    0xFEA8, 0xFEB8, 0xFE88, 0xFE98, 0xFEE8, 0xFEF8, 0xFEC8, 0xFED8, 0xFE28, 0xFE38, 0xFE08, 0xFE18, 0xFE68, 0xFE78, 0xFE48, 0xFE58, 
-    0xFFA8, 0xFFB8, 0xFF88, 0xFF98, 0xFFE8, 0xFFF8, 0xFFC8, 0xFFD8, 0xFF28, 0xFF38, 0xFF08, 0xFF18, 0xFF68, 0xFF78, 0xFF48, 0xFF58, 
-    0xFAA0, 0xFAE0, 0xFA20, 0xFA60, 0xFBA0, 0xFBE0, 0xFB20, 0xFB60, 0xF8A0, 0xF8E0, 0xF820, 0xF860, 0xF9A0, 0xF9E0, 0xF920, 0xF960, 
-    0xFD50, 0xFD70, 0xFD10, 0xFD30, 0xFDD0, 0xFDF0, 0xFD90, 0xFDB0, 0xFC50, 0xFC70, 0xFC10, 0xFC30, 0xFCD0, 0xFCF0, 0xFC90, 0xFCB0, 
-    0x1580, 0x1480, 0x1780, 0x1680, 0x1180, 0x1080, 0x1380, 0x1280, 0x1D80, 0x1C80, 0x1F80, 0x1E80, 0x1980, 0x1880, 0x1B80, 0x1A80, 
-    0x0AC0, 0x0A40, 0x0BC0, 0x0B40, 0x08C0, 0x0840, 0x09C0, 0x0940, 0x0EC0, 0x0E40, 0x0FC0, 0x0F40, 0x0CC0, 0x0C40, 0x0DC0, 0x0D40, 
-    0x5600, 0x5200, 0x5E00, 0x5A00, 0x4600, 0x4200, 0x4E00, 0x4A00, 0x7600, 0x7200, 0x7E00, 0x7A00, 0x6600, 0x6200, 0x6E00, 0x6A00, 
-    0x2B00, 0x2900, 0x2F00, 0x2D00, 0x2300, 0x2100, 0x2700, 0x2500, 0x3B00, 0x3900, 0x3F00, 0x3D00, 0x3300, 0x3100, 0x3700, 0x3500, 
-    0x0158, 0x0148, 0x0178, 0x0168, 0x0118, 0x0108, 0x0138, 0x0128, 0x01D8, 0x01C8, 0x01F8, 0x01E8, 0x0198, 0x0188, 0x01B8, 0x01A8, 
-    0x0058, 0x0048, 0x0078, 0x0068, 0x0018, 0x0008, 0x0038, 0x0028, 0x00D8, 0x00C8, 0x00F8, 0x00E8, 0x0098, 0x0088, 0x00B8, 0x00A8, 
-    0x0560, 0x0520, 0x05E0, 0x05A0, 0x0460, 0x0420, 0x04E0, 0x04A0, 0x0760, 0x0720, 0x07E0, 0x07A0, 0x0660, 0x0620, 0x06E0, 0x06A0, 
+    0xEA80, 0xEB80, 0xE880, 0xE980, 0xEE80, 0xEF80, 0xEC80, 0xED80, 0xE280, 0xE380, 0xE080, 0xE180, 0xE680, 0xE780, 0xE480, 0xE580,
+    0xF540, 0xF5C0, 0xF440, 0xF4C0, 0xF740, 0xF7C0, 0xF640, 0xF6C0, 0xF140, 0xF1C0, 0xF040, 0xF0C0, 0xF340, 0xF3C0, 0xF240, 0xF2C0,
+    0xAA00, 0xAE00, 0xA200, 0xA600, 0xBA00, 0xBE00, 0xB200, 0xB600, 0x8A00, 0x8E00, 0x8200, 0x8600, 0x9A00, 0x9E00, 0x9200, 0x9600,
+    0xD500, 0xD700, 0xD100, 0xD300, 0xDD00, 0xDF00, 0xD900, 0xDB00, 0xC500, 0xC700, 0xC100, 0xC300, 0xCD00, 0xCF00, 0xC900, 0xCB00,
+    0xFEA8, 0xFEB8, 0xFE88, 0xFE98, 0xFEE8, 0xFEF8, 0xFEC8, 0xFED8, 0xFE28, 0xFE38, 0xFE08, 0xFE18, 0xFE68, 0xFE78, 0xFE48, 0xFE58,
+    0xFFA8, 0xFFB8, 0xFF88, 0xFF98, 0xFFE8, 0xFFF8, 0xFFC8, 0xFFD8, 0xFF28, 0xFF38, 0xFF08, 0xFF18, 0xFF68, 0xFF78, 0xFF48, 0xFF58,
+    0xFAA0, 0xFAE0, 0xFA20, 0xFA60, 0xFBA0, 0xFBE0, 0xFB20, 0xFB60, 0xF8A0, 0xF8E0, 0xF820, 0xF860, 0xF9A0, 0xF9E0, 0xF920, 0xF960,
+    0xFD50, 0xFD70, 0xFD10, 0xFD30, 0xFDD0, 0xFDF0, 0xFD90, 0xFDB0, 0xFC50, 0xFC70, 0xFC10, 0xFC30, 0xFCD0, 0xFCF0, 0xFC90, 0xFCB0,
+    0x1580, 0x1480, 0x1780, 0x1680, 0x1180, 0x1080, 0x1380, 0x1280, 0x1D80, 0x1C80, 0x1F80, 0x1E80, 0x1980, 0x1880, 0x1B80, 0x1A80,
+    0x0AC0, 0x0A40, 0x0BC0, 0x0B40, 0x08C0, 0x0840, 0x09C0, 0x0940, 0x0EC0, 0x0E40, 0x0FC0, 0x0F40, 0x0CC0, 0x0C40, 0x0DC0, 0x0D40,
+    0x5600, 0x5200, 0x5E00, 0x5A00, 0x4600, 0x4200, 0x4E00, 0x4A00, 0x7600, 0x7200, 0x7E00, 0x7A00, 0x6600, 0x6200, 0x6E00, 0x6A00,
+    0x2B00, 0x2900, 0x2F00, 0x2D00, 0x2300, 0x2100, 0x2700, 0x2500, 0x3B00, 0x3900, 0x3F00, 0x3D00, 0x3300, 0x3100, 0x3700, 0x3500,
+    0x0158, 0x0148, 0x0178, 0x0168, 0x0118, 0x0108, 0x0138, 0x0128, 0x01D8, 0x01C8, 0x01F8, 0x01E8, 0x0198, 0x0188, 0x01B8, 0x01A8,
+    0x0058, 0x0048, 0x0078, 0x0068, 0x0018, 0x0008, 0x0038, 0x0028, 0x00D8, 0x00C8, 0x00F8, 0x00E8, 0x0098, 0x0088, 0x00B8, 0x00A8,
+    0x0560, 0x0520, 0x05E0, 0x05A0, 0x0460, 0x0420, 0x04E0, 0x04A0, 0x0760, 0x0720, 0x07E0, 0x07A0, 0x0660, 0x0620, 0x06E0, 0x06A0,
     0x02B0, 0x0290, 0x02F0, 0x02D0, 0x0230, 0x0210, 0x0270, 0x0250, 0x03B0, 0x0390, 0x03F0, 0x03D0, 0x0330, 0x0310, 0x0370, 0x0350
 };
 
 static unsigned short g_drwavMulawTable[256] = {
-    0x8284, 0x8684, 0x8A84, 0x8E84, 0x9284, 0x9684, 0x9A84, 0x9E84, 0xA284, 0xA684, 0xAA84, 0xAE84, 0xB284, 0xB684, 0xBA84, 0xBE84, 
-    0xC184, 0xC384, 0xC584, 0xC784, 0xC984, 0xCB84, 0xCD84, 0xCF84, 0xD184, 0xD384, 0xD584, 0xD784, 0xD984, 0xDB84, 0xDD84, 0xDF84, 
-    0xE104, 0xE204, 0xE304, 0xE404, 0xE504, 0xE604, 0xE704, 0xE804, 0xE904, 0xEA04, 0xEB04, 0xEC04, 0xED04, 0xEE04, 0xEF04, 0xF004, 
-    0xF0C4, 0xF144, 0xF1C4, 0xF244, 0xF2C4, 0xF344, 0xF3C4, 0xF444, 0xF4C4, 0xF544, 0xF5C4, 0xF644, 0xF6C4, 0xF744, 0xF7C4, 0xF844, 
-    0xF8A4, 0xF8E4, 0xF924, 0xF964, 0xF9A4, 0xF9E4, 0xFA24, 0xFA64, 0xFAA4, 0xFAE4, 0xFB24, 0xFB64, 0xFBA4, 0xFBE4, 0xFC24, 0xFC64, 
-    0xFC94, 0xFCB4, 0xFCD4, 0xFCF4, 0xFD14, 0xFD34, 0xFD54, 0xFD74, 0xFD94, 0xFDB4, 0xFDD4, 0xFDF4, 0xFE14, 0xFE34, 0xFE54, 0xFE74, 
-    0xFE8C, 0xFE9C, 0xFEAC, 0xFEBC, 0xFECC, 0xFEDC, 0xFEEC, 0xFEFC, 0xFF0C, 0xFF1C, 0xFF2C, 0xFF3C, 0xFF4C, 0xFF5C, 0xFF6C, 0xFF7C, 
-    0xFF88, 0xFF90, 0xFF98, 0xFFA0, 0xFFA8, 0xFFB0, 0xFFB8, 0xFFC0, 0xFFC8, 0xFFD0, 0xFFD8, 0xFFE0, 0xFFE8, 0xFFF0, 0xFFF8, 0x0000, 
-    0x7D7C, 0x797C, 0x757C, 0x717C, 0x6D7C, 0x697C, 0x657C, 0x617C, 0x5D7C, 0x597C, 0x557C, 0x517C, 0x4D7C, 0x497C, 0x457C, 0x417C, 
-    0x3E7C, 0x3C7C, 0x3A7C, 0x387C, 0x367C, 0x347C, 0x327C, 0x307C, 0x2E7C, 0x2C7C, 0x2A7C, 0x287C, 0x267C, 0x247C, 0x227C, 0x207C, 
-    0x1EFC, 0x1DFC, 0x1CFC, 0x1BFC, 0x1AFC, 0x19FC, 0x18FC, 0x17FC, 0x16FC, 0x15FC, 0x14FC, 0x13FC, 0x12FC, 0x11FC, 0x10FC, 0x0FFC, 
-    0x0F3C, 0x0EBC, 0x0E3C, 0x0DBC, 0x0D3C, 0x0CBC, 0x0C3C, 0x0BBC, 0x0B3C, 0x0ABC, 0x0A3C, 0x09BC, 0x093C, 0x08BC, 0x083C, 0x07BC, 
-    0x075C, 0x071C, 0x06DC, 0x069C, 0x065C, 0x061C, 0x05DC, 0x059C, 0x055C, 0x051C, 0x04DC, 0x049C, 0x045C, 0x041C, 0x03DC, 0x039C, 
-    0x036C, 0x034C, 0x032C, 0x030C, 0x02EC, 0x02CC, 0x02AC, 0x028C, 0x026C, 0x024C, 0x022C, 0x020C, 0x01EC, 0x01CC, 0x01AC, 0x018C, 
-    0x0174, 0x0164, 0x0154, 0x0144, 0x0134, 0x0124, 0x0114, 0x0104, 0x00F4, 0x00E4, 0x00D4, 0x00C4, 0x00B4, 0x00A4, 0x0094, 0x0084, 
+    0x8284, 0x8684, 0x8A84, 0x8E84, 0x9284, 0x9684, 0x9A84, 0x9E84, 0xA284, 0xA684, 0xAA84, 0xAE84, 0xB284, 0xB684, 0xBA84, 0xBE84,
+    0xC184, 0xC384, 0xC584, 0xC784, 0xC984, 0xCB84, 0xCD84, 0xCF84, 0xD184, 0xD384, 0xD584, 0xD784, 0xD984, 0xDB84, 0xDD84, 0xDF84,
+    0xE104, 0xE204, 0xE304, 0xE404, 0xE504, 0xE604, 0xE704, 0xE804, 0xE904, 0xEA04, 0xEB04, 0xEC04, 0xED04, 0xEE04, 0xEF04, 0xF004,
+    0xF0C4, 0xF144, 0xF1C4, 0xF244, 0xF2C4, 0xF344, 0xF3C4, 0xF444, 0xF4C4, 0xF544, 0xF5C4, 0xF644, 0xF6C4, 0xF744, 0xF7C4, 0xF844,
+    0xF8A4, 0xF8E4, 0xF924, 0xF964, 0xF9A4, 0xF9E4, 0xFA24, 0xFA64, 0xFAA4, 0xFAE4, 0xFB24, 0xFB64, 0xFBA4, 0xFBE4, 0xFC24, 0xFC64,
+    0xFC94, 0xFCB4, 0xFCD4, 0xFCF4, 0xFD14, 0xFD34, 0xFD54, 0xFD74, 0xFD94, 0xFDB4, 0xFDD4, 0xFDF4, 0xFE14, 0xFE34, 0xFE54, 0xFE74,
+    0xFE8C, 0xFE9C, 0xFEAC, 0xFEBC, 0xFECC, 0xFEDC, 0xFEEC, 0xFEFC, 0xFF0C, 0xFF1C, 0xFF2C, 0xFF3C, 0xFF4C, 0xFF5C, 0xFF6C, 0xFF7C,
+    0xFF88, 0xFF90, 0xFF98, 0xFFA0, 0xFFA8, 0xFFB0, 0xFFB8, 0xFFC0, 0xFFC8, 0xFFD0, 0xFFD8, 0xFFE0, 0xFFE8, 0xFFF0, 0xFFF8, 0x0000,
+    0x7D7C, 0x797C, 0x757C, 0x717C, 0x6D7C, 0x697C, 0x657C, 0x617C, 0x5D7C, 0x597C, 0x557C, 0x517C, 0x4D7C, 0x497C, 0x457C, 0x417C,
+    0x3E7C, 0x3C7C, 0x3A7C, 0x387C, 0x367C, 0x347C, 0x327C, 0x307C, 0x2E7C, 0x2C7C, 0x2A7C, 0x287C, 0x267C, 0x247C, 0x227C, 0x207C,
+    0x1EFC, 0x1DFC, 0x1CFC, 0x1BFC, 0x1AFC, 0x19FC, 0x18FC, 0x17FC, 0x16FC, 0x15FC, 0x14FC, 0x13FC, 0x12FC, 0x11FC, 0x10FC, 0x0FFC,
+    0x0F3C, 0x0EBC, 0x0E3C, 0x0DBC, 0x0D3C, 0x0CBC, 0x0C3C, 0x0BBC, 0x0B3C, 0x0ABC, 0x0A3C, 0x09BC, 0x093C, 0x08BC, 0x083C, 0x07BC,
+    0x075C, 0x071C, 0x06DC, 0x069C, 0x065C, 0x061C, 0x05DC, 0x059C, 0x055C, 0x051C, 0x04DC, 0x049C, 0x045C, 0x041C, 0x03DC, 0x039C,
+    0x036C, 0x034C, 0x032C, 0x030C, 0x02EC, 0x02CC, 0x02AC, 0x028C, 0x026C, 0x024C, 0x022C, 0x020C, 0x01EC, 0x01CC, 0x01AC, 0x018C,
+    0x0174, 0x0164, 0x0154, 0x0144, 0x0134, 0x0124, 0x0114, 0x0104, 0x00F4, 0x00E4, 0x00D4, 0x00C4, 0x00B4, 0x00A4, 0x0094, 0x0084,
     0x0078, 0x0070, 0x0068, 0x0060, 0x0058, 0x0050, 0x0048, 0x0040, 0x0038, 0x0030, 0x0028, 0x0020, 0x0018, 0x0010, 0x0008, 0x0000
 };
 
@@ -4041,9 +6499,9 @@ static DRWAV_INLINE drwav_int16 drwav__mulaw_to_s16(drwav_uint8 sampleIn)
 
 
 
-static void drwav__pcm_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t totalSampleCount, unsigned int bytesPerSample)
+DRWAV_PRIVATE void drwav__pcm_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t totalSampleCount, unsigned int bytesPerSample)
 {
-    unsigned int i;
+    size_t i;
 
     /* Special case for 8-bit sample data because it's treated as unsigned. */
     if (bytesPerSample == 1) {
@@ -4093,7 +6551,7 @@ static void drwav__pcm_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t
     }
 }
 
-static void drwav__ieee_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t totalSampleCount, unsigned int bytesPerSample)
+DRWAV_PRIVATE void drwav__ieee_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t totalSampleCount, unsigned int bytesPerSample)
 {
     if (bytesPerSample == 4) {
         drwav_f32_to_s16(pOut, (const float*)pIn, totalSampleCount);
@@ -4108,33 +6566,50 @@ static void drwav__ieee_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t
     }
 }
 
-static drwav_uint64 drwav_read_pcm_frames_s16__pcm(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_s16__pcm(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
 {
-    drwav_uint32 bytesPerFrame;
     drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
+    drwav_uint8 sampleData[4096] = {0};
+    drwav_uint32 bytesPerFrame;
+    drwav_uint32 bytesPerSample;
+    drwav_uint64 samplesRead;
 
     /* Fast path. */
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM && pWav->bitsPerSample == 16) {
+    if ((pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM && pWav->bitsPerSample == 16) || pBufferOut == NULL) {
         return drwav_read_pcm_frames(pWav, framesToRead, pBufferOut);
     }
-    
+
     bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
     if (bytesPerFrame == 0) {
         return 0;
     }
 
+    bytesPerSample = bytesPerFrame / pWav->channels;
+    if (bytesPerSample == 0 || (bytesPerFrame % pWav->channels) != 0) {
+        return 0;   /* Only byte-aligned formats are supported. */
+    }
+
     totalFramesRead = 0;
-    
+
     while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        drwav_uint64 framesToReadThisIteration = drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame);
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, framesToReadThisIteration, sampleData);
         if (framesRead == 0) {
             break;
         }
 
-        drwav__pcm_to_s16(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels), bytesPerFrame/pWav->channels);
+        DRWAV_ASSERT(framesRead <= framesToReadThisIteration);   /* If this fails it means there's a bug in drwav_read_pcm_frames(). */
 
-        pBufferOut      += framesRead*pWav->channels;
+        /* Validation to ensure we don't read too much from out intermediary buffer. This is to protect from invalid files. */
+        samplesRead = framesRead * pWav->channels;
+        if ((samplesRead * bytesPerSample) > sizeof(sampleData)) {
+            DRWAV_ASSERT(DRWAV_FALSE);  /* This should never happen with a valid file. */
+            break;
+        }
+
+        drwav__pcm_to_s16(pBufferOut, sampleData, (size_t)samplesRead, bytesPerSample);
+
+        pBufferOut      += samplesRead;
         framesToRead    -= framesRead;
         totalFramesRead += framesRead;
     }
@@ -4142,27 +6617,49 @@ static drwav_uint64 drwav_read_pcm_frames_s16__pcm(drwav* pWav, drwav_uint64 fra
     return totalFramesRead;
 }
 
-static drwav_uint64 drwav_read_pcm_frames_s16__ieee(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_s16__ieee(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
 {
     drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
+    drwav_uint8 sampleData[4096] = {0};
+    drwav_uint32 bytesPerFrame;
+    drwav_uint32 bytesPerSample;
+    drwav_uint64 samplesRead;
+
+    if (pBufferOut == NULL) {
+        return drwav_read_pcm_frames(pWav, framesToRead, NULL);
+    }
 
-    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
     if (bytesPerFrame == 0) {
         return 0;
     }
 
+    bytesPerSample = bytesPerFrame / pWav->channels;
+    if (bytesPerSample == 0 || (bytesPerFrame % pWav->channels) != 0) {
+        return 0;   /* Only byte-aligned formats are supported. */
+    }
+
     totalFramesRead = 0;
-    
+
     while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        drwav_uint64 framesToReadThisIteration = drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame);
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, framesToReadThisIteration, sampleData);
         if (framesRead == 0) {
             break;
         }
 
-        drwav__ieee_to_s16(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels), bytesPerFrame/pWav->channels);
+        DRWAV_ASSERT(framesRead <= framesToReadThisIteration);   /* If this fails it means there's a bug in drwav_read_pcm_frames(). */
 
-        pBufferOut      += framesRead*pWav->channels;
+        /* Validation to ensure we don't read too much from out intermediary buffer. This is to protect from invalid files. */
+        samplesRead = framesRead * pWav->channels;
+        if ((samplesRead * bytesPerSample) > sizeof(sampleData)) {
+            DRWAV_ASSERT(DRWAV_FALSE);  /* This should never happen with a valid file. */
+            break;
+        }
+
+        drwav__ieee_to_s16(pBufferOut, sampleData, (size_t)samplesRead, bytesPerSample);    /* Safe cast. */
+
+        pBufferOut      += samplesRead;
         framesToRead    -= framesRead;
         totalFramesRead += framesRead;
     }
@@ -4170,27 +6667,66 @@ static drwav_uint64 drwav_read_pcm_frames_s16__ieee(drwav* pWav, drwav_uint64 fr
     return totalFramesRead;
 }
 
-static drwav_uint64 drwav_read_pcm_frames_s16__alaw(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_s16__alaw(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
 {
     drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
+    drwav_uint8 sampleData[4096] = {0};
+    drwav_uint32 bytesPerFrame;
+    drwav_uint32 bytesPerSample;
+    drwav_uint64 samplesRead;
+
+    if (pBufferOut == NULL) {
+        return drwav_read_pcm_frames(pWav, framesToRead, NULL);
+    }
 
-    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
     if (bytesPerFrame == 0) {
         return 0;
     }
 
+    bytesPerSample = bytesPerFrame / pWav->channels;
+    if (bytesPerSample == 0 || (bytesPerFrame % pWav->channels) != 0) {
+        return 0;   /* Only byte-aligned formats are supported. */
+    }
+
     totalFramesRead = 0;
-    
+
     while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        drwav_uint64 framesToReadThisIteration = drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame);
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, framesToReadThisIteration, sampleData);
         if (framesRead == 0) {
             break;
         }
 
-        drwav_alaw_to_s16(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels));
+        DRWAV_ASSERT(framesRead <= framesToReadThisIteration);   /* If this fails it means there's a bug in drwav_read_pcm_frames(). */
+
+        /* Validation to ensure we don't read too much from out intermediary buffer. This is to protect from invalid files. */
+        samplesRead = framesRead * pWav->channels;
+        if ((samplesRead * bytesPerSample) > sizeof(sampleData)) {
+            DRWAV_ASSERT(DRWAV_FALSE);  /* This should never happen with a valid file. */
+            break;
+        }
+
+        drwav_alaw_to_s16(pBufferOut, sampleData, (size_t)samplesRead);
+
+        /*
+        For some reason libsndfile seems to be returning samples of the opposite sign for a-law, but only
+        with AIFF files. For WAV files it seems to be the same as dr_wav. This is resulting in dr_wav's
+        automated tests failing. I'm not sure which is correct, but will assume dr_wav. If we're enforcing
+        libsndfile compatibility we'll swap the signs here.
+        */
+        #ifdef DR_WAV_LIBSNDFILE_COMPAT
+        {
+            if (pWav->container == drwav_container_aiff) {
+                drwav_uint64 iSample;
+                for (iSample = 0; iSample < samplesRead; iSample += 1) {
+                    pBufferOut[iSample] = -pBufferOut[iSample];
+                }
+            }
+        }
+        #endif
 
-        pBufferOut      += framesRead*pWav->channels;
+        pBufferOut      += samplesRead;
         framesToRead    -= framesRead;
         totalFramesRead += framesRead;
     }
@@ -4198,27 +6734,64 @@ static drwav_uint64 drwav_read_pcm_frames_s16__alaw(drwav* pWav, drwav_uint64 fr
     return totalFramesRead;
 }
 
-static drwav_uint64 drwav_read_pcm_frames_s16__mulaw(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_s16__mulaw(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
 {
     drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
+    drwav_uint8 sampleData[4096] = {0};
+    drwav_uint32 bytesPerFrame;
+    drwav_uint32 bytesPerSample;
+    drwav_uint64 samplesRead;
 
-    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (pBufferOut == NULL) {
+        return drwav_read_pcm_frames(pWav, framesToRead, NULL);
+    }
+
+    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
     if (bytesPerFrame == 0) {
         return 0;
     }
 
+    bytesPerSample = bytesPerFrame / pWav->channels;
+    if (bytesPerSample == 0 || (bytesPerFrame % pWav->channels) != 0) {
+        return 0;   /* Only byte-aligned formats are supported. */
+    }
+
     totalFramesRead = 0;
 
     while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        drwav_uint64 framesToReadThisIteration = drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame);
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, framesToReadThisIteration, sampleData);
         if (framesRead == 0) {
             break;
         }
 
-        drwav_mulaw_to_s16(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels));
+        DRWAV_ASSERT(framesRead <= framesToReadThisIteration);   /* If this fails it means there's a bug in drwav_read_pcm_frames(). */
 
-        pBufferOut      += framesRead*pWav->channels;
+        /* Validation to ensure we don't read too much from out intermediary buffer. This is to protect from invalid files. */
+        samplesRead = framesRead * pWav->channels;
+        if ((samplesRead * bytesPerSample) > sizeof(sampleData)) {
+            DRWAV_ASSERT(DRWAV_FALSE);  /* This should never happen with a valid file. */
+            break;
+        }
+
+        drwav_mulaw_to_s16(pBufferOut, sampleData, (size_t)samplesRead);
+
+        /*
+        Just like with alaw, for some reason the signs between libsndfile and dr_wav are opposite. We just need to
+        swap the sign if we're compiling with libsndfile compatiblity so our automated tests don't fail.
+        */
+        #ifdef DR_WAV_LIBSNDFILE_COMPAT
+        {
+            if (pWav->container == drwav_container_aiff) {
+                drwav_uint64 iSample;
+                for (iSample = 0; iSample < samplesRead; iSample += 1) {
+                    pBufferOut[iSample] = -pBufferOut[iSample];
+                }
+            }
+        }
+        #endif
+
+        pBufferOut      += samplesRead;
         framesToRead    -= framesRead;
         totalFramesRead += framesRead;
     }
@@ -4228,10 +6801,14 @@ static drwav_uint64 drwav_read_pcm_frames_s16__mulaw(drwav* pWav, drwav_uint64 f
 
 DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
 {
-    if (pWav == NULL || framesToRead == 0 || pBufferOut == NULL) {
+    if (pWav == NULL || framesToRead == 0) {
         return 0;
     }
 
+    if (pBufferOut == NULL) {
+        return drwav_read_pcm_frames(pWav, framesToRead, NULL);
+    }
+
     /* Don't try to read more samples than can potentially fit in the output buffer. */
     if (framesToRead * pWav->channels * sizeof(drwav_int16) > DRWAV_SIZE_MAX) {
         framesToRead = DRWAV_SIZE_MAX / sizeof(drwav_int16) / pWav->channels;
@@ -4267,7 +6844,7 @@ DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16(drwav* pWav, drwav_uint64 frame
 DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16le(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
 {
     drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, framesToRead, pBufferOut);
-    if (!drwav__is_little_endian()) {
+    if (pBufferOut != NULL && drwav__is_little_endian() == DRWAV_FALSE) {
         drwav__bswap_samples_s16(pBufferOut, framesRead*pWav->channels);
     }
 
@@ -4277,7 +6854,7 @@ DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16le(drwav* pWav, drwav_uint64 fra
 DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16be(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
 {
     drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, framesToRead, pBufferOut);
-    if (drwav__is_little_endian()) {
+    if (pBufferOut != NULL && drwav__is_little_endian() == DRWAV_TRUE) {
         drwav__bswap_samples_s16(pBufferOut, framesRead*pWav->channels);
     }
 
@@ -4366,8 +6943,7 @@ DRWAV_API void drwav_mulaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, siz
 }
 
 
-
-static void drwav__pcm_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount, unsigned int bytesPerSample)
+DRWAV_PRIVATE void drwav__pcm_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount, unsigned int bytesPerSample)
 {
     unsigned int i;
 
@@ -4416,7 +6992,7 @@ static void drwav__pcm_to_f32(float* pOut, const drwav_uint8* pIn, size_t sample
     }
 }
 
-static void drwav__ieee_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount, unsigned int bytesPerSample)
+DRWAV_PRIVATE void drwav__ieee_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount, unsigned int bytesPerSample)
 {
     if (bytesPerSample == 4) {
         unsigned int i;
@@ -4435,51 +7011,45 @@ static void drwav__ieee_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampl
 }
 
 
-static drwav_uint64 drwav_read_pcm_frames_f32__pcm(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_f32__pcm(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
 {
     drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
+    drwav_uint8 sampleData[4096] = {0};
+    drwav_uint32 bytesPerFrame;
+    drwav_uint32 bytesPerSample;
+    drwav_uint64 samplesRead;
 
-    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
     if (bytesPerFrame == 0) {
         return 0;
     }
 
+    bytesPerSample = bytesPerFrame / pWav->channels;
+    if (bytesPerSample == 0 || (bytesPerFrame % pWav->channels) != 0) {
+        return 0;   /* Only byte-aligned formats are supported. */
+    }
+
     totalFramesRead = 0;
 
     while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        drwav_uint64 framesToReadThisIteration = drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame);
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, framesToReadThisIteration, sampleData);
         if (framesRead == 0) {
             break;
         }
 
-        drwav__pcm_to_f32(pBufferOut, sampleData, (size_t)framesRead*pWav->channels, bytesPerFrame/pWav->channels);
+        DRWAV_ASSERT(framesRead <= framesToReadThisIteration);   /* If this fails it means there's a bug in drwav_read_pcm_frames(). */
 
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
-
-static drwav_uint64 drwav_read_pcm_frames_f32__msadpcm(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
-{
-    /*
-    We're just going to borrow the implementation from the drwav_read_s16() since ADPCM is a little bit more complicated than other formats and I don't
-    want to duplicate that code.
-    */
-    drwav_uint64 totalFramesRead = 0;
-    drwav_int16 samples16[2048];
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, drwav_min(framesToRead, drwav_countof(samples16)/pWav->channels), samples16);
-        if (framesRead == 0) {
+        /* Validation to ensure we don't read too much from out intermediary buffer. This is to protect from invalid files. */
+        samplesRead = framesRead * pWav->channels;
+        if ((samplesRead * bytesPerSample) > sizeof(sampleData)) {
+            DRWAV_ASSERT(DRWAV_FALSE);  /* This should never happen with a valid file. */
             break;
         }
 
-        drwav_s16_to_f32(pBufferOut, samples16, (size_t)(framesRead*pWav->channels));   /* <-- Safe cast because we're clamping to 2048. */
+        drwav__pcm_to_f32(pBufferOut, sampleData, (size_t)samplesRead, bytesPerSample);
 
-        pBufferOut      += framesRead*pWav->channels;
+        pBufferOut      += samplesRead;
         framesToRead    -= framesRead;
         totalFramesRead += framesRead;
     }
@@ -4487,20 +7057,26 @@ static drwav_uint64 drwav_read_pcm_frames_f32__msadpcm(drwav* pWav, drwav_uint64
     return totalFramesRead;
 }
 
-static drwav_uint64 drwav_read_pcm_frames_f32__ima(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_f32__msadpcm_ima(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
 {
     /*
-    We're just going to borrow the implementation from the drwav_read_s16() since IMA-ADPCM is a little bit more complicated than other formats and I don't
+    We're just going to borrow the implementation from the drwav_read_s16() since ADPCM is a little bit more complicated than other formats and I don't
     want to duplicate that code.
     */
-    drwav_uint64 totalFramesRead = 0;
+    drwav_uint64 totalFramesRead;
     drwav_int16 samples16[2048];
+
+    totalFramesRead = 0;
+
     while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, drwav_min(framesToRead, drwav_countof(samples16)/pWav->channels), samples16);
+        drwav_uint64 framesToReadThisIteration = drwav_min(framesToRead, drwav_countof(samples16)/pWav->channels);
+        drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, framesToReadThisIteration, samples16);
         if (framesRead == 0) {
             break;
         }
 
+        DRWAV_ASSERT(framesRead <= framesToReadThisIteration);   /* If this fails it means there's a bug in drwav_read_pcm_frames(). */
+
         drwav_s16_to_f32(pBufferOut, samples16, (size_t)(framesRead*pWav->channels));   /* <-- Safe cast because we're clamping to 2048. */
 
         pBufferOut      += framesRead*pWav->channels;
@@ -4511,33 +7087,50 @@ static drwav_uint64 drwav_read_pcm_frames_f32__ima(drwav* pWav, drwav_uint64 fra
     return totalFramesRead;
 }
 
-static drwav_uint64 drwav_read_pcm_frames_f32__ieee(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_f32__ieee(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
 {
     drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
+    drwav_uint8 sampleData[4096] = {0};
     drwav_uint32 bytesPerFrame;
+    drwav_uint32 bytesPerSample;
+    drwav_uint64 samplesRead;
 
     /* Fast path. */
     if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT && pWav->bitsPerSample == 32) {
         return drwav_read_pcm_frames(pWav, framesToRead, pBufferOut);
     }
-    
+
     bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
     if (bytesPerFrame == 0) {
         return 0;
     }
 
+    bytesPerSample = bytesPerFrame / pWav->channels;
+    if (bytesPerSample == 0 || (bytesPerFrame % pWav->channels) != 0) {
+        return 0;   /* Only byte-aligned formats are supported. */
+    }
+
     totalFramesRead = 0;
 
     while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        drwav_uint64 framesToReadThisIteration = drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame);
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, framesToReadThisIteration, sampleData);
         if (framesRead == 0) {
             break;
         }
 
-        drwav__ieee_to_f32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels), bytesPerFrame/pWav->channels);
+        DRWAV_ASSERT(framesRead <= framesToReadThisIteration);   /* If this fails it means there's a bug in drwav_read_pcm_frames(). */
 
-        pBufferOut      += framesRead*pWav->channels;
+        /* Validation to ensure we don't read too much from out intermediary buffer. This is to protect from invalid files. */
+        samplesRead = framesRead * pWav->channels;
+        if ((samplesRead * bytesPerSample) > sizeof(sampleData)) {
+            DRWAV_ASSERT(DRWAV_FALSE);  /* This should never happen with a valid file. */
+            break;
+        }
+
+        drwav__ieee_to_f32(pBufferOut, sampleData, (size_t)samplesRead, bytesPerSample);
+
+        pBufferOut      += samplesRead;
         framesToRead    -= framesRead;
         totalFramesRead += framesRead;
     }
@@ -4545,26 +7138,56 @@ static drwav_uint64 drwav_read_pcm_frames_f32__ieee(drwav* pWav, drwav_uint64 fr
     return totalFramesRead;
 }
 
-static drwav_uint64 drwav_read_pcm_frames_f32__alaw(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_f32__alaw(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
 {
     drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
-    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    drwav_uint8 sampleData[4096] = {0};
+    drwav_uint32 bytesPerFrame;
+    drwav_uint32 bytesPerSample;
+    drwav_uint64 samplesRead;
+
+    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
     if (bytesPerFrame == 0) {
         return 0;
     }
 
+    bytesPerSample = bytesPerFrame / pWav->channels;
+    if (bytesPerSample == 0 || (bytesPerFrame % pWav->channels) != 0) {
+        return 0;   /* Only byte-aligned formats are supported. */
+    }
+
     totalFramesRead = 0;
 
     while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        drwav_uint64 framesToReadThisIteration = drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame);
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, framesToReadThisIteration, sampleData);
         if (framesRead == 0) {
             break;
         }
 
-        drwav_alaw_to_f32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels));
+        DRWAV_ASSERT(framesRead <= framesToReadThisIteration);   /* If this fails it means there's a bug in drwav_read_pcm_frames(). */
 
-        pBufferOut      += framesRead*pWav->channels;
+        /* Validation to ensure we don't read too much from out intermediary buffer. This is to protect from invalid files. */
+        samplesRead = framesRead * pWav->channels;
+        if ((samplesRead * bytesPerSample) > sizeof(sampleData)) {
+            DRWAV_ASSERT(DRWAV_FALSE);  /* This should never happen with a valid file. */
+            break;
+        }
+
+        drwav_alaw_to_f32(pBufferOut, sampleData, (size_t)samplesRead);
+
+        #ifdef DR_WAV_LIBSNDFILE_COMPAT
+        {
+            if (pWav->container == drwav_container_aiff) {
+                drwav_uint64 iSample;
+                for (iSample = 0; iSample < samplesRead; iSample += 1) {
+                    pBufferOut[iSample] = -pBufferOut[iSample];
+                }
+            }
+        }
+        #endif
+
+        pBufferOut      += samplesRead;
         framesToRead    -= framesRead;
         totalFramesRead += framesRead;
     }
@@ -4572,27 +7195,56 @@ static drwav_uint64 drwav_read_pcm_frames_f32__alaw(drwav* pWav, drwav_uint64 fr
     return totalFramesRead;
 }
 
-static drwav_uint64 drwav_read_pcm_frames_f32__mulaw(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_f32__mulaw(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
 {
     drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
+    drwav_uint8 sampleData[4096] = {0};
+    drwav_uint32 bytesPerFrame;
+    drwav_uint32 bytesPerSample;
+    drwav_uint64 samplesRead;
 
-    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
     if (bytesPerFrame == 0) {
         return 0;
     }
 
+    bytesPerSample = bytesPerFrame / pWav->channels;
+    if (bytesPerSample == 0 || (bytesPerFrame % pWav->channels) != 0) {
+        return 0;   /* Only byte-aligned formats are supported. */
+    }
+
     totalFramesRead = 0;
 
     while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        drwav_uint64 framesToReadThisIteration = drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame);
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, framesToReadThisIteration, sampleData);
         if (framesRead == 0) {
             break;
         }
 
-        drwav_mulaw_to_f32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels));
+        DRWAV_ASSERT(framesRead <= framesToReadThisIteration);   /* If this fails it means there's a bug in drwav_read_pcm_frames(). */
 
-        pBufferOut      += framesRead*pWav->channels;
+        /* Validation to ensure we don't read too much from out intermediary buffer. This is to protect from invalid files. */
+        samplesRead = framesRead * pWav->channels;
+        if ((samplesRead * bytesPerSample) > sizeof(sampleData)) {
+            DRWAV_ASSERT(DRWAV_FALSE);  /* This should never happen with a valid file. */
+            break;
+        }
+
+        drwav_mulaw_to_f32(pBufferOut, sampleData, (size_t)samplesRead);
+
+        #ifdef DR_WAV_LIBSNDFILE_COMPAT
+        {
+            if (pWav->container == drwav_container_aiff) {
+                drwav_uint64 iSample;
+                for (iSample = 0; iSample < samplesRead; iSample += 1) {
+                    pBufferOut[iSample] = -pBufferOut[iSample];
+                }
+            }
+        }
+        #endif
+
+        pBufferOut      += samplesRead;
         framesToRead    -= framesRead;
         totalFramesRead += framesRead;
     }
@@ -4602,10 +7254,14 @@ static drwav_uint64 drwav_read_pcm_frames_f32__mulaw(drwav* pWav, drwav_uint64 f
 
 DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
 {
-    if (pWav == NULL || framesToRead == 0 || pBufferOut == NULL) {
+    if (pWav == NULL || framesToRead == 0) {
         return 0;
     }
 
+    if (pBufferOut == NULL) {
+        return drwav_read_pcm_frames(pWav, framesToRead, NULL);
+    }
+
     /* Don't try to read more samples than can potentially fit in the output buffer. */
     if (framesToRead * pWav->channels * sizeof(float) > DRWAV_SIZE_MAX) {
         framesToRead = DRWAV_SIZE_MAX / sizeof(float) / pWav->channels;
@@ -4615,8 +7271,8 @@ DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32(drwav* pWav, drwav_uint64 frame
         return drwav_read_pcm_frames_f32__pcm(pWav, framesToRead, pBufferOut);
     }
 
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
-        return drwav_read_pcm_frames_f32__msadpcm(pWav, framesToRead, pBufferOut);
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM || pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
+        return drwav_read_pcm_frames_f32__msadpcm_ima(pWav, framesToRead, pBufferOut);
     }
 
     if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT) {
@@ -4631,17 +7287,13 @@ DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32(drwav* pWav, drwav_uint64 frame
         return drwav_read_pcm_frames_f32__mulaw(pWav, framesToRead, pBufferOut);
     }
 
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
-        return drwav_read_pcm_frames_f32__ima(pWav, framesToRead, pBufferOut);
-    }
-
     return 0;
 }
 
 DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32le(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
 {
     drwav_uint64 framesRead = drwav_read_pcm_frames_f32(pWav, framesToRead, pBufferOut);
-    if (!drwav__is_little_endian()) {
+    if (pBufferOut != NULL && drwav__is_little_endian() == DRWAV_FALSE) {
         drwav__bswap_samples_f32(pBufferOut, framesRead*pWav->channels);
     }
 
@@ -4651,7 +7303,7 @@ DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32le(drwav* pWav, drwav_uint64 fra
 DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32be(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
 {
     drwav_uint64 framesRead = drwav_read_pcm_frames_f32(pWav, framesToRead, pBufferOut);
-    if (drwav__is_little_endian()) {
+    if (pBufferOut != NULL && drwav__is_little_endian() == DRWAV_TRUE) {
         drwav__bswap_samples_f32(pBufferOut, framesRead*pWav->channels);
     }
 
@@ -4710,7 +7362,12 @@ DRWAV_API void drwav_s24_to_f32(float* pOut, const drwav_uint8* pIn, size_t samp
     }
 
     for (i = 0; i < sampleCount; ++i) {
-        double x = (double)(((drwav_int32)(((drwav_uint32)(pIn[i*3+0]) << 8) | ((drwav_uint32)(pIn[i*3+1]) << 16) | ((drwav_uint32)(pIn[i*3+2])) << 24)) >> 8);
+        double x;
+        drwav_uint32 a = ((drwav_uint32)(pIn[i*3+0]) <<  8);
+        drwav_uint32 b = ((drwav_uint32)(pIn[i*3+1]) << 16);
+        drwav_uint32 c = ((drwav_uint32)(pIn[i*3+2]) << 24);
+
+        x = (double)((drwav_int32)(a | b | c) >> 8);
         *pOut++ = (float)(x * 0.00000011920928955078125);
     }
 }
@@ -4768,7 +7425,7 @@ DRWAV_API void drwav_mulaw_to_f32(float* pOut, const drwav_uint8* pIn, size_t sa
 
 
 
-static void drwav__pcm_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t totalSampleCount, unsigned int bytesPerSample)
+DRWAV_PRIVATE void drwav__pcm_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t totalSampleCount, unsigned int bytesPerSample)
 {
     unsigned int i;
 
@@ -4819,7 +7476,7 @@ static void drwav__pcm_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t
     }
 }
 
-static void drwav__ieee_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t totalSampleCount, unsigned int bytesPerSample)
+DRWAV_PRIVATE void drwav__ieee_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t totalSampleCount, unsigned int bytesPerSample)
 {
     if (bytesPerSample == 4) {
         drwav_f32_to_s32(pOut, (const float*)pIn, totalSampleCount);
@@ -4835,57 +7492,50 @@ static void drwav__ieee_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t
 }
 
 
-static drwav_uint64 drwav_read_pcm_frames_s32__pcm(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_s32__pcm(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
 {
     drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
+    drwav_uint8 sampleData[4096] = {0};
     drwav_uint32 bytesPerFrame;
+    drwav_uint32 bytesPerSample;
+    drwav_uint64 samplesRead;
 
     /* Fast path. */
     if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM && pWav->bitsPerSample == 32) {
         return drwav_read_pcm_frames(pWav, framesToRead, pBufferOut);
     }
-    
+
     bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
     if (bytesPerFrame == 0) {
         return 0;
     }
 
+    bytesPerSample = bytesPerFrame / pWav->channels;
+    if (bytesPerSample == 0 || (bytesPerFrame % pWav->channels) != 0) {
+        return 0;   /* Only byte-aligned formats are supported. */
+    }
+
     totalFramesRead = 0;
 
     while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        drwav_uint64 framesToReadThisIteration = drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame);
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, framesToReadThisIteration, sampleData);
         if (framesRead == 0) {
             break;
         }
 
-        drwav__pcm_to_s32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels), bytesPerFrame/pWav->channels);
-
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
+        DRWAV_ASSERT(framesRead <= framesToReadThisIteration);   /* If this fails it means there's a bug in drwav_read_pcm_frames(). */
 
-static drwav_uint64 drwav_read_pcm_frames_s32__msadpcm(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
-{
-    /*
-    We're just going to borrow the implementation from the drwav_read_s16() since ADPCM is a little bit more complicated than other formats and I don't
-    want to duplicate that code.
-    */
-    drwav_uint64 totalFramesRead = 0;
-    drwav_int16 samples16[2048];
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, drwav_min(framesToRead, drwav_countof(samples16)/pWav->channels), samples16);
-        if (framesRead == 0) {
+        /* Validation to ensure we don't read too much from out intermediary buffer. This is to protect from invalid files. */
+        samplesRead = framesRead * pWav->channels;
+        if ((samplesRead * bytesPerSample) > sizeof(sampleData)) {
+            DRWAV_ASSERT(DRWAV_FALSE);  /* This should never happen with a valid file. */
             break;
         }
 
-        drwav_s16_to_s32(pBufferOut, samples16, (size_t)(framesRead*pWav->channels));   /* <-- Safe cast because we're clamping to 2048. */
+        drwav__pcm_to_s32(pBufferOut, sampleData, (size_t)samplesRead, bytesPerSample);
 
-        pBufferOut      += framesRead*pWav->channels;
+        pBufferOut      += samplesRead;
         framesToRead    -= framesRead;
         totalFramesRead += framesRead;
     }
@@ -4893,20 +7543,24 @@ static drwav_uint64 drwav_read_pcm_frames_s32__msadpcm(drwav* pWav, drwav_uint64
     return totalFramesRead;
 }
 
-static drwav_uint64 drwav_read_pcm_frames_s32__ima(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_s32__msadpcm_ima(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
 {
     /*
-    We're just going to borrow the implementation from the drwav_read_s16() since IMA-ADPCM is a little bit more complicated than other formats and I don't
+    We're just going to borrow the implementation from the drwav_read_s16() since ADPCM is a little bit more complicated than other formats and I don't
     want to duplicate that code.
     */
     drwav_uint64 totalFramesRead = 0;
     drwav_int16 samples16[2048];
+
     while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, drwav_min(framesToRead, drwav_countof(samples16)/pWav->channels), samples16);
+        drwav_uint64 framesToReadThisIteration = drwav_min(framesToRead, drwav_countof(samples16)/pWav->channels);
+        drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, framesToReadThisIteration, samples16);
         if (framesRead == 0) {
             break;
         }
 
+        DRWAV_ASSERT(framesRead <= framesToReadThisIteration);   /* If this fails it means there's a bug in drwav_read_pcm_frames(). */
+
         drwav_s16_to_s32(pBufferOut, samples16, (size_t)(framesRead*pWav->channels));   /* <-- Safe cast because we're clamping to 2048. */
 
         pBufferOut      += framesRead*pWav->channels;
@@ -4917,27 +7571,45 @@ static drwav_uint64 drwav_read_pcm_frames_s32__ima(drwav* pWav, drwav_uint64 fra
     return totalFramesRead;
 }
 
-static drwav_uint64 drwav_read_pcm_frames_s32__ieee(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_s32__ieee(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
 {
     drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
+    drwav_uint8 sampleData[4096] = {0};
+    drwav_uint32 bytesPerFrame;
+    drwav_uint32 bytesPerSample;
+    drwav_uint64 samplesRead;
 
-    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
     if (bytesPerFrame == 0) {
         return 0;
     }
 
+    bytesPerSample = bytesPerFrame / pWav->channels;
+    if (bytesPerSample == 0 || (bytesPerFrame % pWav->channels) != 0) {
+        return 0;   /* Only byte-aligned formats are supported. */
+    }
+
     totalFramesRead = 0;
 
     while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        drwav_uint64 framesToReadThisIteration = drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame);
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, framesToReadThisIteration, sampleData);
         if (framesRead == 0) {
             break;
         }
 
-        drwav__ieee_to_s32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels), bytesPerFrame/pWav->channels);
+        DRWAV_ASSERT(framesRead <= framesToReadThisIteration);   /* If this fails it means there's a bug in drwav_read_pcm_frames(). */
 
-        pBufferOut      += framesRead*pWav->channels;
+        /* Validation to ensure we don't read too much from out intermediary buffer. This is to protect from invalid files. */
+        samplesRead = framesRead * pWav->channels;
+        if ((samplesRead * bytesPerSample) > sizeof(sampleData)) {
+            DRWAV_ASSERT(DRWAV_FALSE);  /* This should never happen with a valid file. */
+            break;
+        }
+
+        drwav__ieee_to_s32(pBufferOut, sampleData, (size_t)samplesRead, bytesPerSample);
+
+        pBufferOut      += samplesRead;
         framesToRead    -= framesRead;
         totalFramesRead += framesRead;
     }
@@ -4945,27 +7617,56 @@ static drwav_uint64 drwav_read_pcm_frames_s32__ieee(drwav* pWav, drwav_uint64 fr
     return totalFramesRead;
 }
 
-static drwav_uint64 drwav_read_pcm_frames_s32__alaw(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_s32__alaw(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
 {
     drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
+    drwav_uint8 sampleData[4096] = {0};
+    drwav_uint32 bytesPerFrame;
+    drwav_uint32 bytesPerSample;
+    drwav_uint64 samplesRead;
 
-    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
     if (bytesPerFrame == 0) {
         return 0;
     }
 
+    bytesPerSample = bytesPerFrame / pWav->channels;
+    if (bytesPerSample == 0 || (bytesPerFrame % pWav->channels) != 0) {
+        return 0;   /* Only byte-aligned formats are supported. */
+    }
+
     totalFramesRead = 0;
 
     while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        drwav_uint64 framesToReadThisIteration = drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame);
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, framesToReadThisIteration, sampleData);
         if (framesRead == 0) {
             break;
         }
 
-        drwav_alaw_to_s32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels));
+        DRWAV_ASSERT(framesRead <= framesToReadThisIteration);   /* If this fails it means there's a bug in drwav_read_pcm_frames(). */
 
-        pBufferOut      += framesRead*pWav->channels;
+        /* Validation to ensure we don't read too much from out intermediary buffer. This is to protect from invalid files. */
+        samplesRead = framesRead * pWav->channels;
+        if ((samplesRead * bytesPerSample) > sizeof(sampleData)) {
+            DRWAV_ASSERT(DRWAV_FALSE);  /* This should never happen with a valid file. */
+            break;
+        }
+
+        drwav_alaw_to_s32(pBufferOut, sampleData, (size_t)samplesRead);
+
+        #ifdef DR_WAV_LIBSNDFILE_COMPAT
+        {
+            if (pWav->container == drwav_container_aiff) {
+                drwav_uint64 iSample;
+                for (iSample = 0; iSample < samplesRead; iSample += 1) {
+                    pBufferOut[iSample] = -pBufferOut[iSample];
+                }
+            }
+        }
+        #endif
+
+        pBufferOut      += samplesRead;
         framesToRead    -= framesRead;
         totalFramesRead += framesRead;
     }
@@ -4973,27 +7674,56 @@ static drwav_uint64 drwav_read_pcm_frames_s32__alaw(drwav* pWav, drwav_uint64 fr
     return totalFramesRead;
 }
 
-static drwav_uint64 drwav_read_pcm_frames_s32__mulaw(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
+DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_s32__mulaw(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
 {
     drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
+    drwav_uint8 sampleData[4096] = {0};
+    drwav_uint32 bytesPerFrame;
+    drwav_uint32 bytesPerSample;
+    drwav_uint64 samplesRead;
 
-    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
     if (bytesPerFrame == 0) {
         return 0;
     }
 
+    bytesPerSample = bytesPerFrame / pWav->channels;
+    if (bytesPerSample == 0 || (bytesPerFrame % pWav->channels) != 0) {
+        return 0;   /* Only byte-aligned formats are supported. */
+    }
+
     totalFramesRead = 0;
 
     while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        drwav_uint64 framesToReadThisIteration = drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame);
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, framesToReadThisIteration, sampleData);
         if (framesRead == 0) {
             break;
         }
 
-        drwav_mulaw_to_s32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels));
+        DRWAV_ASSERT(framesRead <= framesToReadThisIteration);   /* If this fails it means there's a bug in drwav_read_pcm_frames(). */
 
-        pBufferOut      += framesRead*pWav->channels;
+        /* Validation to ensure we don't read too much from out intermediary buffer. This is to protect from invalid files. */
+        samplesRead = framesRead * pWav->channels;
+        if ((samplesRead * bytesPerSample) > sizeof(sampleData)) {
+            DRWAV_ASSERT(DRWAV_FALSE);  /* This should never happen with a valid file. */
+            break;
+        }
+
+        drwav_mulaw_to_s32(pBufferOut, sampleData, (size_t)samplesRead);
+
+        #ifdef DR_WAV_LIBSNDFILE_COMPAT
+        {
+            if (pWav->container == drwav_container_aiff) {
+                drwav_uint64 iSample;
+                for (iSample = 0; iSample < samplesRead; iSample += 1) {
+                    pBufferOut[iSample] = -pBufferOut[iSample];
+                }
+            }
+        }
+        #endif
+
+        pBufferOut      += samplesRead;
         framesToRead    -= framesRead;
         totalFramesRead += framesRead;
     }
@@ -5003,22 +7733,25 @@ static drwav_uint64 drwav_read_pcm_frames_s32__mulaw(drwav* pWav, drwav_uint64 f
 
 DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
 {
-    if (pWav == NULL || framesToRead == 0 || pBufferOut == NULL) {
+    if (pWav == NULL || framesToRead == 0) {
         return 0;
     }
 
+    if (pBufferOut == NULL) {
+        return drwav_read_pcm_frames(pWav, framesToRead, NULL);
+    }
+
     /* Don't try to read more samples than can potentially fit in the output buffer. */
     if (framesToRead * pWav->channels * sizeof(drwav_int32) > DRWAV_SIZE_MAX) {
         framesToRead = DRWAV_SIZE_MAX / sizeof(drwav_int32) / pWav->channels;
     }
 
-
     if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM) {
         return drwav_read_pcm_frames_s32__pcm(pWav, framesToRead, pBufferOut);
     }
 
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
-        return drwav_read_pcm_frames_s32__msadpcm(pWav, framesToRead, pBufferOut);
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM || pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
+        return drwav_read_pcm_frames_s32__msadpcm_ima(pWav, framesToRead, pBufferOut);
     }
 
     if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT) {
@@ -5033,17 +7766,13 @@ DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32(drwav* pWav, drwav_uint64 frame
         return drwav_read_pcm_frames_s32__mulaw(pWav, framesToRead, pBufferOut);
     }
 
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
-        return drwav_read_pcm_frames_s32__ima(pWav, framesToRead, pBufferOut);
-    }
-
     return 0;
 }
 
 DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32le(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
 {
     drwav_uint64 framesRead = drwav_read_pcm_frames_s32(pWav, framesToRead, pBufferOut);
-    if (!drwav__is_little_endian()) {
+    if (pBufferOut != NULL && drwav__is_little_endian() == DRWAV_FALSE) {
         drwav__bswap_samples_s32(pBufferOut, framesRead*pWav->channels);
     }
 
@@ -5053,7 +7782,7 @@ DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32le(drwav* pWav, drwav_uint64 fra
 DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32be(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
 {
     drwav_uint64 framesRead = drwav_read_pcm_frames_s32(pWav, framesToRead, pBufferOut);
-    if (drwav__is_little_endian()) {
+    if (pBufferOut != NULL && drwav__is_little_endian() == DRWAV_TRUE) {
         drwav__bswap_samples_s32(pBufferOut, framesRead*pWav->channels);
     }
 
@@ -5114,7 +7843,7 @@ DRWAV_API void drwav_f32_to_s32(drwav_int32* pOut, const float* pIn, size_t samp
     }
 
     for (i = 0; i < sampleCount; ++i) {
-        *pOut++ = (drwav_int32)(2147483648.0 * pIn[i]);
+        *pOut++ = (drwav_int32)(2147483648.0f * pIn[i]);
     }
 }
 
@@ -5159,7 +7888,7 @@ DRWAV_API void drwav_mulaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, siz
 
 
 
-static drwav_int16* drwav__read_pcm_frames_and_close_s16(drwav* pWav, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount)
+DRWAV_PRIVATE drwav_int16* drwav__read_pcm_frames_and_close_s16(drwav* pWav, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount)
 {
     drwav_uint64 sampleDataSize;
     drwav_int16* pSampleData;
@@ -5201,7 +7930,7 @@ static drwav_int16* drwav__read_pcm_frames_and_close_s16(drwav* pWav, unsigned i
     return pSampleData;
 }
 
-static float* drwav__read_pcm_frames_and_close_f32(drwav* pWav, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount)
+DRWAV_PRIVATE float* drwav__read_pcm_frames_and_close_f32(drwav* pWav, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount)
 {
     drwav_uint64 sampleDataSize;
     float* pSampleData;
@@ -5243,7 +7972,7 @@ static float* drwav__read_pcm_frames_and_close_f32(drwav* pWav, unsigned int* ch
     return pSampleData;
 }
 
-static drwav_int32* drwav__read_pcm_frames_and_close_s32(drwav* pWav, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount)
+DRWAV_PRIVATE drwav_int32* drwav__read_pcm_frames_and_close_s32(drwav* pWav, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount)
 {
     drwav_uint64 sampleDataSize;
     drwav_int32* pSampleData;
@@ -5415,6 +8144,7 @@ DRWAV_API drwav_int32* drwav_open_file_and_read_pcm_frames_s32(const char* filen
 }
 
 
+#ifndef DR_WAV_NO_WCHAR
 DRWAV_API drwav_int16* drwav_open_file_and_read_pcm_frames_s16_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
 {
     drwav wav;
@@ -5477,7 +8207,8 @@ DRWAV_API drwav_int32* drwav_open_file_and_read_pcm_frames_s32_w(const wchar_t*
 
     return drwav__read_pcm_frames_and_close_s32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
 }
-#endif
+#endif /* DR_WAV_NO_WCHAR */
+#endif /* DR_WAV_NO_STDIO */
 
 DRWAV_API drwav_int16* drwav_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
 {
@@ -5555,235 +8286,205 @@ DRWAV_API void drwav_free(void* p, const drwav_allocation_callbacks* pAllocation
 
 DRWAV_API drwav_uint16 drwav_bytes_to_u16(const drwav_uint8* data)
 {
-    return drwav__bytes_to_u16(data);
+    return ((drwav_uint16)data[0] << 0) | ((drwav_uint16)data[1] << 8);
 }
 
 DRWAV_API drwav_int16 drwav_bytes_to_s16(const drwav_uint8* data)
 {
-    return drwav__bytes_to_s16(data);
+    return (drwav_int16)drwav_bytes_to_u16(data);
 }
 
 DRWAV_API drwav_uint32 drwav_bytes_to_u32(const drwav_uint8* data)
 {
-    return drwav__bytes_to_u32(data);
+    return drwav_bytes_to_u32_le(data);
+}
+
+DRWAV_API float drwav_bytes_to_f32(const drwav_uint8* data)
+{
+    union {
+        drwav_uint32 u32;
+        float f32;
+    } value;
+
+    value.u32 = drwav_bytes_to_u32(data);
+    return value.f32;
 }
 
 DRWAV_API drwav_int32 drwav_bytes_to_s32(const drwav_uint8* data)
 {
-    return drwav__bytes_to_s32(data);
+    return (drwav_int32)drwav_bytes_to_u32(data);
 }
 
 DRWAV_API drwav_uint64 drwav_bytes_to_u64(const drwav_uint8* data)
 {
-    return drwav__bytes_to_u64(data);
+    return
+        ((drwav_uint64)data[0] <<  0) | ((drwav_uint64)data[1] <<  8) | ((drwav_uint64)data[2] << 16) | ((drwav_uint64)data[3] << 24) |
+        ((drwav_uint64)data[4] << 32) | ((drwav_uint64)data[5] << 40) | ((drwav_uint64)data[6] << 48) | ((drwav_uint64)data[7] << 56);
 }
 
 DRWAV_API drwav_int64 drwav_bytes_to_s64(const drwav_uint8* data)
 {
-    return drwav__bytes_to_s64(data);
+    return (drwav_int64)drwav_bytes_to_u64(data);
 }
 
 
 DRWAV_API drwav_bool32 drwav_guid_equal(const drwav_uint8 a[16], const drwav_uint8 b[16])
 {
-    return drwav__guid_equal(a, b);
+    int i;
+    for (i = 0; i < 16; i += 1) {
+        if (a[i] != b[i]) {
+            return DRWAV_FALSE;
+        }
+    }
+
+    return DRWAV_TRUE;
 }
 
 DRWAV_API drwav_bool32 drwav_fourcc_equal(const drwav_uint8* a, const char* b)
 {
-    return drwav__fourcc_equal(a, b);
+    return
+        a[0] == b[0] &&
+        a[1] == b[1] &&
+        a[2] == b[2] &&
+        a[3] == b[3];
 }
 
+#ifdef __MRC__
+/* Undo the pragma at the beginning of this file. */
+#pragma options opt reset
+#endif
+
+#endif  /* dr_wav_c */
 #endif  /* DR_WAV_IMPLEMENTATION */
 
 /*
-RELEASE NOTES - v0.11.0
-=======================
-Version 0.11.0 has breaking API changes.
+REVISION HISTORY
+================
+v0.13.17 - 2024-12-17
+  - Fix a possible crash when reading from MS-ADPCM encoded files.
+  - Improve detection of ARM64EC
 
-Improved Client-Defined Memory Allocation
------------------------------------------
-The main change with this release is the addition of a more flexible way of implementing custom memory allocation routines. The
-existing system of DRWAV_MALLOC, DRWAV_REALLOC and DRWAV_FREE are still in place and will be used by default when no custom
-allocation callbacks are specified.
+v0.13.16 - 2024-02-27
+  - Fix a Wdouble-promotion warning.
 
-To use the new system, you pass in a pointer to a drwav_allocation_callbacks object to drwav_init() and family, like this:
+v0.13.15 - 2024-01-23
+  - Relax some unnecessary validation that prevented some files from loading.
 
-    void* my_malloc(size_t sz, void* pUserData)
-    {
-        return malloc(sz);
-    }
-    void* my_realloc(void* p, size_t sz, void* pUserData)
-    {
-        return realloc(p, sz);
-    }
-    void my_free(void* p, void* pUserData)
-    {
-        free(p);
-    }
+v0.13.14 - 2023-12-02
+  - Fix a warning about an unused variable.
 
-    ...
+v0.13.13 - 2023-11-02
+  - Fix a warning when compiling with Clang.
 
-    drwav_allocation_callbacks allocationCallbacks;
-    allocationCallbacks.pUserData = &myData;
-    allocationCallbacks.onMalloc  = my_malloc;
-    allocationCallbacks.onRealloc = my_realloc;
-    allocationCallbacks.onFree    = my_free;
-    drwav_init_file(&wav, "my_file.wav", &allocationCallbacks);
-
-The advantage of this new system is that it allows you to specify user data which will be passed in to the allocation routines.
-
-Passing in null for the allocation callbacks object will cause dr_wav to use defaults which is the same as DRWAV_MALLOC,
-DRWAV_REALLOC and DRWAV_FREE and the equivalent of how it worked in previous versions.
-
-Every API that opens a drwav object now takes this extra parameter. These include the following:
-
-    drwav_init()
-    drwav_init_ex()
-    drwav_init_file()
-    drwav_init_file_ex()
-    drwav_init_file_w()
-    drwav_init_file_w_ex()
-    drwav_init_memory()
-    drwav_init_memory_ex()
-    drwav_init_write()
-    drwav_init_write_sequential()
-    drwav_init_write_sequential_pcm_frames()
-    drwav_init_file_write()
-    drwav_init_file_write_sequential()
-    drwav_init_file_write_sequential_pcm_frames()
-    drwav_init_file_write_w()
-    drwav_init_file_write_sequential_w()
-    drwav_init_file_write_sequential_pcm_frames_w()
-    drwav_init_memory_write()
-    drwav_init_memory_write_sequential()
-    drwav_init_memory_write_sequential_pcm_frames()
-    drwav_open_and_read_pcm_frames_s16()
-    drwav_open_and_read_pcm_frames_f32()
-    drwav_open_and_read_pcm_frames_s32()
-    drwav_open_file_and_read_pcm_frames_s16()
-    drwav_open_file_and_read_pcm_frames_f32()
-    drwav_open_file_and_read_pcm_frames_s32()
-    drwav_open_file_and_read_pcm_frames_s16_w()
-    drwav_open_file_and_read_pcm_frames_f32_w()
-    drwav_open_file_and_read_pcm_frames_s32_w()
-    drwav_open_memory_and_read_pcm_frames_s16()
-    drwav_open_memory_and_read_pcm_frames_f32()
-    drwav_open_memory_and_read_pcm_frames_s32()
-
-Endian Improvements
--------------------
-Previously, the following APIs returned little-endian audio data. These now return native-endian data. This improves compatibility
-on big-endian architectures.
-
-    drwav_read_pcm_frames()
-    drwav_read_pcm_frames_s16()
-    drwav_read_pcm_frames_s32()
-    drwav_read_pcm_frames_f32()
-    drwav_open_and_read_pcm_frames_s16()
-    drwav_open_and_read_pcm_frames_s32()
-    drwav_open_and_read_pcm_frames_f32()
-    drwav_open_file_and_read_pcm_frames_s16()
-    drwav_open_file_and_read_pcm_frames_s32()
-    drwav_open_file_and_read_pcm_frames_f32()
-    drwav_open_file_and_read_pcm_frames_s16_w()
-    drwav_open_file_and_read_pcm_frames_s32_w()
-    drwav_open_file_and_read_pcm_frames_f32_w()
-    drwav_open_memory_and_read_pcm_frames_s16()
-    drwav_open_memory_and_read_pcm_frames_s32()
-    drwav_open_memory_and_read_pcm_frames_f32()
-
-APIs have been added to give you explicit control over whether or not audio data is read or written in big- or little-endian byte
-order:
-
-    drwav_read_pcm_frames_le()
-    drwav_read_pcm_frames_be()
-    drwav_read_pcm_frames_s16le()
-    drwav_read_pcm_frames_s16be()
-    drwav_read_pcm_frames_f32le()
-    drwav_read_pcm_frames_f32be()
-    drwav_read_pcm_frames_s32le()
-    drwav_read_pcm_frames_s32be()
-    drwav_write_pcm_frames_le()
-    drwav_write_pcm_frames_be()
-
-Removed APIs
-------------
-The following APIs were deprecated in version 0.10.0 and have now been removed:
-
-    drwav_open()
-    drwav_open_ex()
-    drwav_open_write()
-    drwav_open_write_sequential()
-    drwav_open_file()
-    drwav_open_file_ex()
-    drwav_open_file_write()
-    drwav_open_file_write_sequential()
-    drwav_open_memory()
-    drwav_open_memory_ex()
-    drwav_open_memory_write()
-    drwav_open_memory_write_sequential()
-    drwav_close()
-
-
-
-RELEASE NOTES - v0.10.0
-=======================
-Version 0.10.0 has breaking API changes. There are no significant bug fixes in this release, so if you are affected you do
-not need to upgrade.
-
-Removed APIs
-------------
-The following APIs were deprecated in version 0.9.0 and have been completely removed in version 0.10.0:
-
-    drwav_read()
-    drwav_read_s16()
-    drwav_read_f32()
-    drwav_read_s32()
-    drwav_seek_to_sample()
-    drwav_write()
-    drwav_open_and_read_s16()
-    drwav_open_and_read_f32()
-    drwav_open_and_read_s32()
-    drwav_open_file_and_read_s16()
-    drwav_open_file_and_read_f32()
-    drwav_open_file_and_read_s32()
-    drwav_open_memory_and_read_s16()
-    drwav_open_memory_and_read_f32()
-    drwav_open_memory_and_read_s32()
-    drwav::totalSampleCount
-
-See release notes for version 0.9.0 at the bottom of this file for replacement APIs.
-
-Deprecated APIs
----------------
-The following APIs have been deprecated. There is a confusing and completely arbitrary difference between drwav_init*() and
-drwav_open*(), where drwav_init*() initializes a pre-allocated drwav object, whereas drwav_open*() will first allocated a
-drwav object on the heap and then initialize it. drwav_open*() has been deprecated which means you must now use a pre-
-allocated drwav object with drwav_init*(). If you need the previous functionality, you can just do a malloc() followed by
-a called to one of the drwav_init*() APIs.
-
-    drwav_open()
-    drwav_open_ex()
-    drwav_open_write()
-    drwav_open_write_sequential()
-    drwav_open_file()
-    drwav_open_file_ex()
-    drwav_open_file_write()
-    drwav_open_file_write_sequential()
-    drwav_open_memory()
-    drwav_open_memory_ex()
-    drwav_open_memory_write()
-    drwav_open_memory_write_sequential()
-    drwav_close()
-
-These APIs will be removed completely in a future version. The rationale for this change is to remove confusion between the
-two different ways to initialize a drwav object.
-*/
+v0.13.12 - 2023-08-07
+  - Fix a possible crash in drwav_read_pcm_frames().
+
+v0.13.11 - 2023-07-07
+  - AIFF compatibility improvements.
+
+v0.13.10 - 2023-05-29
+  - Fix a bug where drwav_init_with_metadata() does not decode any frames after initializtion.
+
+v0.13.9 - 2023-05-22
+  - Add support for AIFF decoding (writing and metadata not supported).
+  - Add support for RIFX decoding (writing and metadata not supported).
+  - Fix a bug where metadata is not processed if it's located before the "fmt " chunk.
+  - Add a workaround for a type of malformed WAV file where the size of the "RIFF" and "data" chunks
+    are incorrectly set to 0xFFFFFFFF.
+
+v0.13.8 - 2023-03-25
+  - Fix a possible null pointer dereference.
+  - Fix a crash when loading files with badly formed metadata.
+
+v0.13.7 - 2022-09-17
+  - Fix compilation with DJGPP.
+  - Add support for disabling wchar_t with DR_WAV_NO_WCHAR.
+
+v0.13.6 - 2022-04-10
+  - Fix compilation error on older versions of GCC.
+  - Remove some dependencies on the standard library.
+
+v0.13.5 - 2022-01-26
+  - Fix an error when seeking to the end of the file.
+
+v0.13.4 - 2021-12-08
+  - Fix some static analysis warnings.
+
+v0.13.3 - 2021-11-24
+  - Fix an incorrect assertion when trying to endian swap 1-byte sample formats. This is now a no-op
+    rather than a failed assertion.
+  - Fix a bug with parsing of the bext chunk.
+  - Fix some static analysis warnings.
+
+v0.13.2 - 2021-10-02
+  - Fix a possible buffer overflow when reading from compressed formats.
+
+v0.13.1 - 2021-07-31
+  - Fix platform detection for ARM64.
+
+v0.13.0 - 2021-07-01
+  - Improve support for reading and writing metadata. Use the `_with_metadata()` APIs to initialize
+    a WAV decoder and store the metadata within the `drwav` object. Use the `pMetadata` and
+    `metadataCount` members of the `drwav` object to read the data. The old way of handling metadata
+    via a callback is still usable and valid.
+  - API CHANGE: drwav_target_write_size_bytes() now takes extra parameters for calculating the
+    required write size when writing metadata.
+  - Add drwav_get_cursor_in_pcm_frames()
+  - Add drwav_get_length_in_pcm_frames()
+  - Fix a bug where drwav_read_raw() can call the read callback with a byte count of zero.
+
+v0.12.20 - 2021-06-11
+  - Fix some undefined behavior.
+
+v0.12.19 - 2021-02-21
+  - Fix a warning due to referencing _MSC_VER when it is undefined.
+  - Minor improvements to the management of some internal state concerning the data chunk cursor.
+
+v0.12.18 - 2021-01-31
+  - Clean up some static analysis warnings.
+
+v0.12.17 - 2021-01-17
+  - Minor fix to sample code in documentation.
+  - Correctly qualify a private API as private rather than public.
+  - Code cleanup.
+
+v0.12.16 - 2020-12-02
+  - Fix a bug when trying to read more bytes than can fit in a size_t.
+
+v0.12.15 - 2020-11-21
+  - Fix compilation with OpenWatcom.
+
+v0.12.14 - 2020-11-13
+  - Minor code clean up.
+
+v0.12.13 - 2020-11-01
+  - Improve compiler support for older versions of GCC.
+
+v0.12.12 - 2020-09-28
+  - Add support for RF64.
+  - Fix a bug in writing mode where the size of the RIFF chunk incorrectly includes the header section.
+
+v0.12.11 - 2020-09-08
+  - Fix a compilation error on older compilers.
+
+v0.12.10 - 2020-08-24
+  - Fix a bug when seeking with ADPCM formats.
+
+v0.12.9 - 2020-08-02
+  - Simplify sized types.
+
+v0.12.8 - 2020-07-25
+  - Fix a compilation warning.
+
+v0.12.7 - 2020-07-15
+  - Fix some bugs on big-endian architectures.
+  - Fix an error in s24 to f32 conversion.
+
+v0.12.6 - 2020-06-23
+  - Change drwav_read_*() to allow NULL to be passed in as the output buffer which is equivalent to a forward seek.
+  - Fix a buffer overflow when trying to decode invalid IMA-ADPCM files.
+  - Add include guard for the implementation section.
 
-/*
-REVISION HISTORY
-================
 v0.12.5 - 2020-05-27
   - Minor documentation fix.
 
@@ -6109,7 +8810,7 @@ For more information, please refer to <http://unlicense.org/>
 ===============================================================================
 ALTERNATIVE 2 - MIT No Attribution
 ===============================================================================
-Copyright 2020 David Reid
+Copyright 2023 David Reid
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
 this software and associated documentation files (the "Software"), to deal in

From c996336ea538a7e4b6c568ecfe4cda6b37a68550 Mon Sep 17 00:00:00 2001
From: ITotalJustice <47043333+ITotalJustice@users.noreply.github.com>
Date: Sat, 12 Jul 2025 21:09:25 +0100
Subject: [PATCH 04/19] remove audout hack due to being fixed in libnx, reduce
 latency between songs, remove pscm.

---
 sys-tune/source/impl/music_player.cpp | 136 +++++++-------------------
 sys-tune/source/impl/music_player.hpp |   1 -
 sys-tune/source/main.cpp              |  20 ----
 3 files changed, 33 insertions(+), 124 deletions(-)

diff --git a/sys-tune/source/impl/music_player.cpp b/sys-tune/source/impl/music_player.cpp
index 4c8a8f6..b1fda67 100644
--- a/sys-tune/source/impl/music_player.cpp
+++ b/sys-tune/source/impl/music_player.cpp
@@ -106,88 +106,46 @@ namespace tune::impl {
         float g_default_title_volume = 1.f;
         bool g_use_title_volume = true;
 
-        constexpr auto AUDIO_BUFFER_COUNT = 2;
-        constexpr auto AUDIO_BUFFER_SIZE = 0x1000;
-
-        alignas(0x1000) s16 AudioMemoryPool[AUDIO_BUFFER_COUNT][AUDIO_BUFFER_SIZE];
+        constexpr auto AUDIO_FREQ          = 48000;
+        constexpr auto AUDIO_CHANNEL_COUNT = 2;
+        constexpr auto AUDIO_BUFFER_COUNT  = 2;
+        constexpr auto AUDIO_LATENCY_MS    = 50;
+        constexpr auto AUDIO_BUFFER_SIZE   = AUDIO_FREQ / 1000 * AUDIO_LATENCY_MS * AUDIO_CHANNEL_COUNT;
+
+        AudioOutBuffer g_audout_buffer[AUDIO_BUFFER_COUNT];
+        alignas(0x1000) s16 AudioMemoryPool[AUDIO_BUFFER_COUNT][(AUDIO_BUFFER_SIZE + 0xFFF) & ~0xFFF];
         static_assert((sizeof(AudioMemoryPool[0]) % 0x2000) == 0, "Audio Memory pool needs to be page aligned!");
 
-        bool g_awoken_from_sleep = false;
         bool g_should_pause      = false;
         bool g_should_run        = true;
-        bool g_audout_init       = false;
-
-        void audioExit() {
-            if (g_audout_init) {
-                audoutStopAudioOut();
-                audoutExit();
-                g_audout_init = false;
-            }
-        }
-
-        Result audioInit() {
-            if (g_audout_init) {
-                audioExit();
-            }
-
-            Result rc;
-
-            if (R_SUCCEEDED(rc = audoutInitialize())) {
-                if (R_SUCCEEDED(rc = audoutStartAudioOut())) {
-                    SetVolume(config::get_volume());
-                    g_audout_init = true;
-                    return 0;
-                }
-                audoutExit();
-            }
-
-            return rc;
-        }
 
         Result PlayTrack(const char* path) {
-            R_TRY(audioInit());
-
             /* Open file and allocate */
             auto source = OpenFile(path);
             R_UNLESS(source != nullptr, tune::FileOpenFailure);
             R_UNLESS(source->IsOpen(), tune::FileOpenFailure);
             R_UNLESS(source->SetupResampler(audoutGetChannelCount(), audoutGetSampleRate()), tune::VoiceInitFailure);
 
-            g_source = source.get();
-
-            AudioOutBuffer audout_buffer[AUDIO_BUFFER_COUNT]{};
-            for (int i = 0; i < AUDIO_BUFFER_COUNT; i++) {
-                audout_buffer[i].next = NULL;
-                audout_buffer[i].buffer = AudioMemoryPool[i];
-                audout_buffer[i].buffer_size = sizeof(AudioMemoryPool[i]);
+            AudioOutState state;
+            R_TRY(audoutGetAudioOutState(&state));
+            if (state == AudioOutState_Stopped) {
+                R_TRY(audoutStartAudioOut());
             }
 
-            bool pause_state_changed = g_should_pause;
+            g_source = source.get();
 
             while (g_should_run && g_status == PlayerStatus::Playing) {
-                if (g_awoken_from_sleep) {
-                    g_awoken_from_sleep = false;
-                    R_TRY(audioInit());
-                }
-
                 if (g_should_pause) {
-                    pause_state_changed = g_should_pause;
                     svcSleepThread(17'000'000);
                     continue;
                 }
 
-                // fixes bad sound.
-                if (pause_state_changed != g_should_pause) {
-                    pause_state_changed = g_should_pause;
-                    R_TRY(audioInit());
-                }
-
                 AudioOutBuffer* buffer = NULL;
                 for (int i = 0; i < AUDIO_BUFFER_COUNT; i++) {
                     bool has_buffer = false;
-                    R_TRY(audoutContainsAudioOutBuffer(&audout_buffer[i], &has_buffer));
+                    R_TRY(audoutContainsAudioOutBuffer(&g_audout_buffer[i], &has_buffer));
                     if (!has_buffer) {
-                        buffer = &audout_buffer[i];
+                        buffer = &g_audout_buffer[i];
                         break;
                     }
                 }
@@ -197,36 +155,41 @@ namespace tune::impl {
                     R_TRY(audoutWaitPlayFinish(&buffer, &released_count, UINT64_MAX));
                 }
 
+                bool error = false;
                 if (buffer) {
-                    const auto nSamples = source->Resample((u8*)buffer->buffer, buffer->buffer_size);
+                    const auto nSamples = source->Resample((u8*)buffer->buffer, AUDIO_BUFFER_SIZE * sizeof(s16));
                     if (nSamples <= 0) {
-                        if (g_repeat != RepeatMode::One) {
-                            Next();
-                        }
-                        break;
+                        error = true;
                     } else {
                         buffer->data_size = nSamples;
                         R_TRY(audoutAppendAudioOutBuffer(buffer));
                     }
                 }
+
+                if (error || source->Done()) {
+                    if (g_repeat != RepeatMode::One) {
+                        Next();
+                    }
+                    break;
+                }
             }
 
             g_source = nullptr;
 
-            // re-open and then pause, otherwise artifacts will continue to play...
-            audioInit();
-            audoutStopAudioOut();
-
             return 0;
         }
 
     }
 
     Result Initialize() {
-        if (auto rc = audioInit(); R_FAILED(rc)) {
-            return rc;
+        for (int i = 0; i < AUDIO_BUFFER_COUNT; i++) {
+            g_audout_buffer[i].buffer = AudioMemoryPool[i];
+            g_audout_buffer[i].buffer_size = sizeof(AudioMemoryPool[i]);
         }
 
+        R_TRY(audoutInitialize());
+        SetVolume(config::get_volume());
+
         g_playlist.reserve(PLAYLIST_ENTRY_MAX);
         g_shuffle_playlist.reserve(PLAYLIST_ENTRY_MAX);
 
@@ -297,41 +260,8 @@ namespace tune::impl {
             }
         }
 
-        audioExit();
-    }
-
-    void PscmThreadFunc(void *ptr) {
-        PscPmModule *module = static_cast<PscPmModule *>(ptr);
-        bool previous_state{};
-
-        while (g_should_run) {
-            Result rc = eventWait(&module->event, 10'000'000);
-            if (R_VALUE(rc) == KERNELRESULT(TimedOut))
-                continue;
-            if (R_VALUE(rc) == KERNELRESULT(Cancelled))
-                break;
-
-            PscPmState state;
-            u32 flags;
-            R_ABORT_UNLESS(pscPmModuleGetRequest(module, &state, &flags));
-            switch (state) {
-                // NOTE: PscPmState_Awake event seems to get missed (rare) or
-                // PscPmState_ReadySleep is sent multiple times.
-                // todo: fade in and delay playback on wakeup slightly
-                case PscPmState_ReadyAwaken:
-                    g_awoken_from_sleep = true;
-                    g_should_pause = previous_state;
-                    break;
-                // pause on sleep
-                case PscPmState_ReadySleep:
-                    previous_state = g_should_pause;
-                    g_should_pause = true;
-                    break;
-                default:
-                    break;
-            }
-            pscPmModuleAcknowledge(module, state);
-        }
+        audoutStopAudioOut();
+        audoutExit();
     }
 
     void GpioThreadFunc(void *ptr) {
diff --git a/sys-tune/source/impl/music_player.hpp b/sys-tune/source/impl/music_player.hpp
index 0c5028b..354bda4 100644
--- a/sys-tune/source/impl/music_player.hpp
+++ b/sys-tune/source/impl/music_player.hpp
@@ -23,7 +23,6 @@ namespace tune::impl {
     void Exit();
 
     void TuneThreadFunc(void *);
-    void PscmThreadFunc(void *ptr);
     void GpioThreadFunc(void *ptr);
     void PmdmntThreadFunc(void *ptr);
 
diff --git a/sys-tune/source/main.cpp b/sys-tune/source/main.cpp
index 743d08b..88968be 100644
--- a/sys-tune/source/main.cpp
+++ b/sys-tune/source/main.cpp
@@ -33,7 +33,6 @@ void __appInit() {
     }
 
     R_ABORT_UNLESS(gpioInitialize());
-    R_ABORT_UNLESS(pscmInitialize());
     R_ABORT_UNLESS(fsInitialize());
     R_ABORT_UNLESS(audWrapperInitialize());
     R_ABORT_UNLESS(pm::Initialize());
@@ -45,7 +44,6 @@ void __appExit(void) {
     pm::Exit();
     audWrapperExit();
     fsExit();
-    pscmExit();
     gpioExit();
     smExit();
 }
@@ -55,7 +53,6 @@ void __appExit(void) {
 namespace {
 
     alignas(0x1000) u8 gpioThreadBuffer[0x1000];
-    alignas(0x1000) u8 pscmThreadBuffer[0x1000];
     alignas(0x1000) u8 pmdmntThreadBuffer[0x1000];
     alignas(0x1000) u8 tuneThreadBuffer[0x6000];
 
@@ -64,28 +61,18 @@ namespace {
 int main(int argc, char *argv[]) {
     R_ABORT_UNLESS(tune::impl::Initialize());
 
-    /* Register audio as our dependency so we can pause before it prepares for sleep. */
-    constexpr const u32 dependencies[] = { PscPmModuleId_Fs, PscPmModuleId_Audio };
-
-    /* Get pm module to listen for state change. */
-    PscPmModule pm_module;
-    R_ABORT_UNLESS(pscmGetPmModule(&pm_module, PscPmModuleId(420), dependencies, sizeof(dependencies) / sizeof(u32), true));
-
     /* Get GPIO session for the headphone jack pad. */
     GpioPadSession headphone_detect_session;
     R_ABORT_UNLESS(gpioOpenSession(&headphone_detect_session, GpioPadName(0x15)));
 
     ::Thread gpioThread;
-    ::Thread pscmThread;
     ::Thread pmdmtThread;
     ::Thread tuneThread;
     R_ABORT_UNLESS(threadCreate(&gpioThread, tune::impl::GpioThreadFunc, &headphone_detect_session, gpioThreadBuffer, sizeof(gpioThreadBuffer), 0x20, -2));
-    R_ABORT_UNLESS(threadCreate(&pscmThread, tune::impl::PscmThreadFunc, &pm_module, pscmThreadBuffer, sizeof(pscmThreadBuffer), 0x20, -2));
     R_ABORT_UNLESS(threadCreate(&pmdmtThread, tune::impl::PmdmntThreadFunc, nullptr, pmdmntThreadBuffer, sizeof(pmdmntThreadBuffer), 0x20, -2));
     R_ABORT_UNLESS(threadCreate(&tuneThread, tune::impl::TuneThreadFunc, nullptr, tuneThreadBuffer, sizeof(tuneThreadBuffer), 0x20, -2));
 
     R_ABORT_UNLESS(threadStart(&gpioThread));
-    R_ABORT_UNLESS(threadStart(&pscmThread));
     R_ABORT_UNLESS(threadStart(&pmdmtThread));
     R_ABORT_UNLESS(threadStart(&tuneThread));
 
@@ -96,24 +83,17 @@ int main(int argc, char *argv[]) {
 
     tune::impl::Exit();
     svcCancelSynchronization(gpioThread.handle);
-    svcCancelSynchronization(pscmThread.handle);
 
     R_ABORT_UNLESS(threadWaitForExit(&gpioThread));
-    R_ABORT_UNLESS(threadWaitForExit(&pscmThread));
     R_ABORT_UNLESS(threadWaitForExit(&pmdmtThread));
     R_ABORT_UNLESS(threadWaitForExit(&tuneThread));
 
     R_ABORT_UNLESS(threadClose(&gpioThread));
-    R_ABORT_UNLESS(threadClose(&pscmThread));
     R_ABORT_UNLESS(threadClose(&pmdmtThread));
     R_ABORT_UNLESS(threadClose(&tuneThread));
 
     /* Close gpio session. */
     gpioPadClose(&headphone_detect_session);
 
-    /* Unregister Psc module. */
-    pscPmModuleFinalize(&pm_module);
-    pscPmModuleClose(&pm_module);
-
     return 0;
 }

From 8a4830425f88efdf3703747253b82a9ab694e06e Mon Sep 17 00:00:00 2001
From: ITotalJustice <47043333+ITotalJustice@users.noreply.github.com>
Date: Sat, 12 Jul 2025 22:42:21 +0100
Subject: [PATCH 05/19] update dr libs to latest commit (85019a9), fix dr_mp3
 seek crash due to running out of stack space.

---
 sys-tune/source/impl/dr_flac.h  | 432 ++++++++--------
 sys-tune/source/impl/dr_mp3.h   | 882 +++++++++++++++++++++++++-------
 sys-tune/source/impl/dr_wav.h   | 452 +++++++++++-----
 sys-tune/source/impl/source.cpp |  71 ++-
 sys-tune/source/impl/source.hpp |   5 +-
 5 files changed, 1278 insertions(+), 564 deletions(-)

diff --git a/sys-tune/source/impl/dr_flac.h b/sys-tune/source/impl/dr_flac.h
index 3a47251..bed2f72 100644
--- a/sys-tune/source/impl/dr_flac.h
+++ b/sys-tune/source/impl/dr_flac.h
@@ -1,121 +1,12 @@
 /*
 FLAC audio decoder. Choice of public domain or MIT-0. See license statements at the end of this file.
-dr_flac - v0.12.43 - 2024-12-17
+dr_flac - v0.13.0 - TBD
 
 David Reid - mackron@gmail.com
 
 GitHub: https://github.com/mackron/dr_libs
 */
 
-/*
-RELEASE NOTES - v0.12.0
-=======================
-Version 0.12.0 has breaking API changes including changes to the existing API and the removal of deprecated APIs.
-
-
-Improved Client-Defined Memory Allocation
------------------------------------------
-The main change with this release is the addition of a more flexible way of implementing custom memory allocation routines. The
-existing system of DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE are still in place and will be used by default when no custom
-allocation callbacks are specified.
-
-To use the new system, you pass in a pointer to a drflac_allocation_callbacks object to drflac_open() and family, like this:
-
-    void* my_malloc(size_t sz, void* pUserData)
-    {
-        return malloc(sz);
-    }
-    void* my_realloc(void* p, size_t sz, void* pUserData)
-    {
-        return realloc(p, sz);
-    }
-    void my_free(void* p, void* pUserData)
-    {
-        free(p);
-    }
-
-    ...
-
-    drflac_allocation_callbacks allocationCallbacks;
-    allocationCallbacks.pUserData = &myData;
-    allocationCallbacks.onMalloc  = my_malloc;
-    allocationCallbacks.onRealloc = my_realloc;
-    allocationCallbacks.onFree    = my_free;
-    drflac* pFlac = drflac_open_file("my_file.flac", &allocationCallbacks);
-
-The advantage of this new system is that it allows you to specify user data which will be passed in to the allocation routines.
-
-Passing in null for the allocation callbacks object will cause dr_flac to use defaults which is the same as DRFLAC_MALLOC,
-DRFLAC_REALLOC and DRFLAC_FREE and the equivalent of how it worked in previous versions.
-
-Every API that opens a drflac object now takes this extra parameter. These include the following:
-
-    drflac_open()
-    drflac_open_relaxed()
-    drflac_open_with_metadata()
-    drflac_open_with_metadata_relaxed()
-    drflac_open_file()
-    drflac_open_file_with_metadata()
-    drflac_open_memory()
-    drflac_open_memory_with_metadata()
-    drflac_open_and_read_pcm_frames_s32()
-    drflac_open_and_read_pcm_frames_s16()
-    drflac_open_and_read_pcm_frames_f32()
-    drflac_open_file_and_read_pcm_frames_s32()
-    drflac_open_file_and_read_pcm_frames_s16()
-    drflac_open_file_and_read_pcm_frames_f32()
-    drflac_open_memory_and_read_pcm_frames_s32()
-    drflac_open_memory_and_read_pcm_frames_s16()
-    drflac_open_memory_and_read_pcm_frames_f32()
-
-
-
-Optimizations
--------------
-Seeking performance has been greatly improved. A new binary search based seeking algorithm has been introduced which significantly
-improves performance over the brute force method which was used when no seek table was present. Seek table based seeking also takes
-advantage of the new binary search seeking system to further improve performance there as well. Note that this depends on CRC which
-means it will be disabled when DR_FLAC_NO_CRC is used.
-
-The SSE4.1 pipeline has been cleaned up and optimized. You should see some improvements with decoding speed of 24-bit files in
-particular. 16-bit streams should also see some improvement.
-
-drflac_read_pcm_frames_s16() has been optimized. Previously this sat on top of drflac_read_pcm_frames_s32() and performed it's s32
-to s16 conversion in a second pass. This is now all done in a single pass. This includes SSE2 and ARM NEON optimized paths.
-
-A minor optimization has been implemented for drflac_read_pcm_frames_s32(). This will now use an SSE2 optimized pipeline for stereo
-channel reconstruction which is the last part of the decoding process.
-
-The ARM build has seen a few improvements. The CLZ (count leading zeroes) and REV (byte swap) instructions are now used when
-compiling with GCC and Clang which is achieved using inline assembly. The CLZ instruction requires ARM architecture version 5 at
-compile time and the REV instruction requires ARM architecture version 6.
-
-An ARM NEON optimized pipeline has been implemented. To enable this you'll need to add -mfpu=neon to the command line when compiling.
-
-
-Removed APIs
-------------
-The following APIs were deprecated in version 0.11.0 and have been completely removed in version 0.12.0:
-
-    drflac_read_s32()                   -> drflac_read_pcm_frames_s32()
-    drflac_read_s16()                   -> drflac_read_pcm_frames_s16()
-    drflac_read_f32()                   -> drflac_read_pcm_frames_f32()
-    drflac_seek_to_sample()             -> drflac_seek_to_pcm_frame()
-    drflac_open_and_decode_s32()        -> drflac_open_and_read_pcm_frames_s32()
-    drflac_open_and_decode_s16()        -> drflac_open_and_read_pcm_frames_s16()
-    drflac_open_and_decode_f32()        -> drflac_open_and_read_pcm_frames_f32()
-    drflac_open_and_decode_file_s32()   -> drflac_open_file_and_read_pcm_frames_s32()
-    drflac_open_and_decode_file_s16()   -> drflac_open_file_and_read_pcm_frames_s16()
-    drflac_open_and_decode_file_f32()   -> drflac_open_file_and_read_pcm_frames_f32()
-    drflac_open_and_decode_memory_s32() -> drflac_open_memory_and_read_pcm_frames_s32()
-    drflac_open_and_decode_memory_s16() -> drflac_open_memory_and_read_pcm_frames_s16()
-    drflac_open_and_decode_memory_f32() -> drflac_open_memroy_and_read_pcm_frames_f32()
-
-Prior versions of dr_flac operated on a per-sample basis whereas now it operates on PCM frames. The removed APIs all relate
-to the old per-sample APIs. You now need to use the "pcm_frame" versions.
-*/
-
-
 /*
 Introduction
 ============
@@ -234,8 +125,8 @@ extern "C" {
 #define DRFLAC_XSTRINGIFY(x)     DRFLAC_STRINGIFY(x)
 
 #define DRFLAC_VERSION_MAJOR     0
-#define DRFLAC_VERSION_MINOR     12
-#define DRFLAC_VERSION_REVISION  43
+#define DRFLAC_VERSION_MINOR     13
+#define DRFLAC_VERSION_REVISION  0
 #define DRFLAC_VERSION_STRING    DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MAJOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MINOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_REVISION)
 
 #include <stddef.h> /* For size_t. */
@@ -406,8 +297,9 @@ typedef enum
 
 typedef enum
 {
-    drflac_seek_origin_start,
-    drflac_seek_origin_current
+    DRFLAC_SEEK_SET,
+    DRFLAC_SEEK_CUR,
+    DRFLAC_SEEK_END
 } drflac_seek_origin;
 
 /* The order of members in this structure is important because we map this directly to the raw data within the SEEKTABLE metadata block. */
@@ -547,7 +439,7 @@ offset (in)
     The number of bytes to move, relative to the origin. Will never be negative.
 
 origin (in)
-    The origin of the seek - the current position or the start of the stream.
+    The origin of the seek - the current position, the start of the stream, or the end of the stream.
 
 
 Return Value
@@ -557,14 +449,32 @@ Whether or not the seek was successful.
 
 Remarks
 -------
-The offset will never be negative. Whether or not it is relative to the beginning or current position is determined by the "origin" parameter which will be
-either drflac_seek_origin_start or drflac_seek_origin_current.
+Seeking relative to the start and the current position must always be supported. If seeking from the end of the stream is not supported, return DRFLAC_FALSE.
 
 When seeking to a PCM frame using drflac_seek_to_pcm_frame(), dr_flac may call this with an offset beyond the end of the FLAC stream. This needs to be detected
 and handled by returning DRFLAC_FALSE.
 */
 typedef drflac_bool32 (* drflac_seek_proc)(void* pUserData, int offset, drflac_seek_origin origin);
 
+/*
+Callback for when the current position in the stream needs to be retrieved.
+
+
+Parameters
+----------
+pUserData (in)
+    The user data that was passed to drflac_open() and family.
+
+pCursor (out)
+    A pointer to a variable to receive the current position in the stream.
+
+
+Return Value
+------------
+Whether or not the operation was successful.
+*/
+typedef drflac_bool32 (* drflac_tell_proc)(void* pUserData, drflac_int64* pCursor);
+
 /*
 Callback for when a metadata block is read.
 
@@ -603,6 +513,9 @@ typedef struct
     /* The function to call when the current read position needs to be moved. */
     drflac_seek_proc onSeek;
 
+    /* The function to call when the current read position needs to be retrieved. */
+    drflac_tell_proc onTell;
+
     /* The user data to pass around to onRead and onSeek. */
     void* pUserData;
 
@@ -828,7 +741,7 @@ drflac_open_memory()
 drflac_open_with_metadata()
 drflac_close()
 */
-DRFLAC_API drflac* drflac_open(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
+DRFLAC_API drflac* drflac_open(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
 
 /*
 Opens a FLAC stream with relaxed validation of the header block.
@@ -869,7 +782,7 @@ force your `onRead` callback to return 0, which dr_flac will use as an indicator
 
 Use `drflac_open_with_metadata_relaxed()` if you need access to metadata.
 */
-DRFLAC_API drflac* drflac_open_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
+DRFLAC_API drflac* drflac_open_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
 
 /*
 Opens a FLAC decoder and notifies the caller of the metadata chunks (album art, etc.).
@@ -926,7 +839,7 @@ drflac_open_memory_with_metadata()
 drflac_open()
 drflac_close()
 */
-DRFLAC_API drflac* drflac_open_with_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
+DRFLAC_API drflac* drflac_open_with_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
 
 /*
 The same as drflac_open_with_metadata(), except attempts to open the stream even when a header block is not present.
@@ -936,7 +849,7 @@ See Also
 drflac_open_with_metadata()
 drflac_open_relaxed()
 */
-DRFLAC_API drflac* drflac_open_with_metadata_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
+DRFLAC_API drflac* drflac_open_with_metadata_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, drflac_meta_proc onMeta, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
 
 /*
 Closes the given FLAC decoder.
@@ -1234,13 +1147,13 @@ read samples into a dynamically sized buffer on the heap until no samples are le
 
 Do not call this function on a broadcast type of stream (like internet radio streams and whatnot).
 */
-DRFLAC_API drflac_int32* drflac_open_and_read_pcm_frames_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+DRFLAC_API drflac_int32* drflac_open_and_read_pcm_frames_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
 
 /* Same as drflac_open_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. */
-DRFLAC_API drflac_int16* drflac_open_and_read_pcm_frames_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+DRFLAC_API drflac_int16* drflac_open_and_read_pcm_frames_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
 
 /* Same as drflac_open_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. */
-DRFLAC_API float* drflac_open_and_read_pcm_frames_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+DRFLAC_API float* drflac_open_and_read_pcm_frames_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
 
 #ifndef DR_FLAC_NO_STDIO
 /* Same as drflac_open_and_read_pcm_frames_s32() except opens the decoder from a file. */
@@ -2960,25 +2873,25 @@ static drflac_bool32 drflac__seek_to_byte(drflac_bs* bs, drflac_uint64 offsetFro
     */
     if (offsetFromStart > 0x7FFFFFFF) {
         drflac_uint64 bytesRemaining = offsetFromStart;
-        if (!bs->onSeek(bs->pUserData, 0x7FFFFFFF, drflac_seek_origin_start)) {
+        if (!bs->onSeek(bs->pUserData, 0x7FFFFFFF, DRFLAC_SEEK_SET)) {
             return DRFLAC_FALSE;
         }
         bytesRemaining -= 0x7FFFFFFF;
 
         while (bytesRemaining > 0x7FFFFFFF) {
-            if (!bs->onSeek(bs->pUserData, 0x7FFFFFFF, drflac_seek_origin_current)) {
+            if (!bs->onSeek(bs->pUserData, 0x7FFFFFFF, DRFLAC_SEEK_CUR)) {
                 return DRFLAC_FALSE;
             }
             bytesRemaining -= 0x7FFFFFFF;
         }
 
         if (bytesRemaining > 0) {
-            if (!bs->onSeek(bs->pUserData, (int)bytesRemaining, drflac_seek_origin_current)) {
+            if (!bs->onSeek(bs->pUserData, (int)bytesRemaining, DRFLAC_SEEK_CUR)) {
                 return DRFLAC_FALSE;
             }
         }
     } else {
-        if (!bs->onSeek(bs->pUserData, (int)offsetFromStart, drflac_seek_origin_start)) {
+        if (!bs->onSeek(bs->pUserData, (int)offsetFromStart, DRFLAC_SEEK_SET)) {
             return DRFLAC_FALSE;
         }
     }
@@ -6330,6 +6243,7 @@ typedef struct
 {
     drflac_read_proc onRead;
     drflac_seek_proc onSeek;
+    drflac_tell_proc onTell;
     drflac_meta_proc onMeta;
     drflac_container container;
     void* pUserData;
@@ -6497,7 +6411,7 @@ static void drflac__free_from_callbacks(void* p, const drflac_allocation_callbac
 }
 
 
-static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_uint64* pFirstFramePos, drflac_uint64* pSeektablePos, drflac_uint32* pSeekpointCount, drflac_allocation_callbacks* pAllocationCallbacks)
+static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_uint64* pFirstFramePos, drflac_uint64* pSeektablePos, drflac_uint32* pSeekpointCount, drflac_allocation_callbacks* pAllocationCallbacks)
 {
     /*
     We want to keep track of the byte position in the stream of the seektable. At the time of calling this function we know that
@@ -6507,6 +6421,8 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
     drflac_uint64 seektablePos   = 0;
     drflac_uint32 seektableSize  = 0;
 
+    (void)onTell;
+
     for (;;) {
         drflac_metadata metadata;
         drflac_uint8 isLastBlock = 0;
@@ -6858,7 +6774,7 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
                     metadata.data.padding.unused = 0;
 
                     /* Padding doesn't have anything meaningful in it, so just skip over it, but make sure the caller is aware of it by firing the callback. */
-                    if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) {
+                    if (!onSeek(pUserData, blockSize, DRFLAC_SEEK_CUR)) {
                         isLastBlock = DRFLAC_TRUE;  /* An error occurred while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop. */
                     } else {
                         onMeta(pUserDataMD, &metadata);
@@ -6870,7 +6786,7 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
             {
                 /* Invalid chunk. Just skip over this one. */
                 if (onMeta) {
-                    if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) {
+                    if (!onSeek(pUserData, blockSize, DRFLAC_SEEK_CUR)) {
                         isLastBlock = DRFLAC_TRUE;  /* An error occurred while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop. */
                     }
                 }
@@ -6904,7 +6820,7 @@ static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, d
 
         /* If we're not handling metadata, just skip over the block. If we are, it will have been handled earlier in the switch statement above. */
         if (onMeta == NULL && blockSize > 0) {
-            if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) {
+            if (!onSeek(pUserData, blockSize, DRFLAC_SEEK_CUR)) {
                 isLastBlock = DRFLAC_TRUE;
             }
         }
@@ -7238,6 +7154,7 @@ typedef struct
 {
     drflac_read_proc onRead;                /* The original onRead callback from drflac_open() and family. */
     drflac_seek_proc onSeek;                /* The original onSeek callback from drflac_open() and family. */
+    drflac_tell_proc onTell;                /* The original onTell callback from drflac_open() and family. */
     void* pUserData;                        /* The user data passed on onRead and onSeek. This is the user data that was passed on drflac_open() and family. */
     drflac_uint64 currentBytePos;           /* The position of the byte we are sitting on in the physical byte stream. Used for efficient seeking. */
     drflac_uint64 firstBytePos;             /* The position of the first byte in the physical bitstream. Points to the start of the "OggS" identifier of the FLAC bos page. */
@@ -7259,32 +7176,32 @@ static size_t drflac_oggbs__read_physical(drflac_oggbs* oggbs, void* bufferOut,
 
 static drflac_bool32 drflac_oggbs__seek_physical(drflac_oggbs* oggbs, drflac_uint64 offset, drflac_seek_origin origin)
 {
-    if (origin == drflac_seek_origin_start) {
+    if (origin == DRFLAC_SEEK_SET) {
         if (offset <= 0x7FFFFFFF) {
-            if (!oggbs->onSeek(oggbs->pUserData, (int)offset, drflac_seek_origin_start)) {
+            if (!oggbs->onSeek(oggbs->pUserData, (int)offset, DRFLAC_SEEK_SET)) {
                 return DRFLAC_FALSE;
             }
             oggbs->currentBytePos = offset;
 
             return DRFLAC_TRUE;
         } else {
-            if (!oggbs->onSeek(oggbs->pUserData, 0x7FFFFFFF, drflac_seek_origin_start)) {
+            if (!oggbs->onSeek(oggbs->pUserData, 0x7FFFFFFF, DRFLAC_SEEK_SET)) {
                 return DRFLAC_FALSE;
             }
             oggbs->currentBytePos = offset;
 
-            return drflac_oggbs__seek_physical(oggbs, offset - 0x7FFFFFFF, drflac_seek_origin_current);
+            return drflac_oggbs__seek_physical(oggbs, offset - 0x7FFFFFFF, DRFLAC_SEEK_CUR);
         }
     } else {
         while (offset > 0x7FFFFFFF) {
-            if (!oggbs->onSeek(oggbs->pUserData, 0x7FFFFFFF, drflac_seek_origin_current)) {
+            if (!oggbs->onSeek(oggbs->pUserData, 0x7FFFFFFF, DRFLAC_SEEK_CUR)) {
                 return DRFLAC_FALSE;
             }
             oggbs->currentBytePos += 0x7FFFFFFF;
             offset -= 0x7FFFFFFF;
         }
 
-        if (!oggbs->onSeek(oggbs->pUserData, (int)offset, drflac_seek_origin_current)) {    /* <-- Safe cast thanks to the loop above. */
+        if (!oggbs->onSeek(oggbs->pUserData, (int)offset, DRFLAC_SEEK_CUR)) {    /* <-- Safe cast thanks to the loop above. */
             return DRFLAC_FALSE;
         }
         oggbs->currentBytePos += offset;
@@ -7316,7 +7233,7 @@ static drflac_bool32 drflac_oggbs__goto_next_page(drflac_oggbs* oggbs, drflac_og
 
         if (header.serialNumber != oggbs->serialNumber) {
             /* It's not a FLAC page. Skip it. */
-            if (pageBodySize > 0 && !drflac_oggbs__seek_physical(oggbs, pageBodySize, drflac_seek_origin_current)) {
+            if (pageBodySize > 0 && !drflac_oggbs__seek_physical(oggbs, pageBodySize, DRFLAC_SEEK_CUR)) {
                 return DRFLAC_FALSE;
             }
             continue;
@@ -7402,7 +7319,7 @@ static drflac_bool32 drflac_oggbs__seek_to_next_packet(drflac_oggbs* oggbs)
         At this point we will have found either the packet or the end of the page. If were at the end of the page we'll
         want to load the next page and keep searching for the end of the packet.
         */
-        drflac_oggbs__seek_physical(oggbs, bytesToEndOfPacketOrPage, drflac_seek_origin_current);
+        drflac_oggbs__seek_physical(oggbs, bytesToEndOfPacketOrPage, DRFLAC_SEEK_CUR);
         oggbs->bytesRemainingInPage -= bytesToEndOfPacketOrPage;
 
         if (atEndOfPage) {
@@ -7480,8 +7397,8 @@ static drflac_bool32 drflac__on_seek_ogg(void* pUserData, int offset, drflac_see
     DRFLAC_ASSERT(offset >= 0);  /* <-- Never seek backwards. */
 
     /* Seeking is always forward which makes things a lot simpler. */
-    if (origin == drflac_seek_origin_start) {
-        if (!drflac_oggbs__seek_physical(oggbs, (int)oggbs->firstBytePos, drflac_seek_origin_start)) {
+    if (origin == DRFLAC_SEEK_SET) {
+        if (!drflac_oggbs__seek_physical(oggbs, (int)oggbs->firstBytePos, DRFLAC_SEEK_SET)) {
             return DRFLAC_FALSE;
         }
 
@@ -7489,38 +7406,50 @@ static drflac_bool32 drflac__on_seek_ogg(void* pUserData, int offset, drflac_see
             return DRFLAC_FALSE;
         }
 
-        return drflac__on_seek_ogg(pUserData, offset, drflac_seek_origin_current);
-    }
-
-    DRFLAC_ASSERT(origin == drflac_seek_origin_current);
-
-    while (bytesSeeked < offset) {
-        int bytesRemainingToSeek = offset - bytesSeeked;
-        DRFLAC_ASSERT(bytesRemainingToSeek >= 0);
+        return drflac__on_seek_ogg(pUserData, offset, DRFLAC_SEEK_CUR);
+    } else if (origin == DRFLAC_SEEK_CUR) {
+        while (bytesSeeked < offset) {
+            int bytesRemainingToSeek = offset - bytesSeeked;
+            DRFLAC_ASSERT(bytesRemainingToSeek >= 0);
 
-        if (oggbs->bytesRemainingInPage >= (size_t)bytesRemainingToSeek) {
-            bytesSeeked += bytesRemainingToSeek;
-            (void)bytesSeeked;  /* <-- Silence a dead store warning emitted by Clang Static Analyzer. */
-            oggbs->bytesRemainingInPage -= bytesRemainingToSeek;
-            break;
-        }
+            if (oggbs->bytesRemainingInPage >= (size_t)bytesRemainingToSeek) {
+                bytesSeeked += bytesRemainingToSeek;
+                (void)bytesSeeked;  /* <-- Silence a dead store warning emitted by Clang Static Analyzer. */
+                oggbs->bytesRemainingInPage -= bytesRemainingToSeek;
+                break;
+            }
 
-        /* If we get here it means some of the requested data is contained in the next pages. */
-        if (oggbs->bytesRemainingInPage > 0) {
-            bytesSeeked += (int)oggbs->bytesRemainingInPage;
-            oggbs->bytesRemainingInPage = 0;
-        }
+            /* If we get here it means some of the requested data is contained in the next pages. */
+            if (oggbs->bytesRemainingInPage > 0) {
+                bytesSeeked += (int)oggbs->bytesRemainingInPage;
+                oggbs->bytesRemainingInPage = 0;
+            }
 
-        DRFLAC_ASSERT(bytesRemainingToSeek > 0);
-        if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_fail_on_crc_mismatch)) {
-            /* Failed to go to the next page. We either hit the end of the stream or had a CRC mismatch. */
-            return DRFLAC_FALSE;
+            DRFLAC_ASSERT(bytesRemainingToSeek > 0);
+            if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_fail_on_crc_mismatch)) {
+                /* Failed to go to the next page. We either hit the end of the stream or had a CRC mismatch. */
+                return DRFLAC_FALSE;
+            }
         }
+    } else if (origin == DRFLAC_SEEK_END) {
+        /* Seeking to the end is not supported. */
+        return DRFLAC_FALSE;
     }
 
     return DRFLAC_TRUE;
 }
 
+static drflac_bool32 drflac__on_tell_ogg(void* pUserData, drflac_int64* pCursor)
+{
+    /*
+    Not implemented for Ogg containers because we don't currently track the byte position of the logical bitstream. To support this, we'll need
+    to track the position in drflac__on_read_ogg and drflac__on_seek_ogg.
+    */
+    (void)pUserData;
+    (void)pCursor;
+    return DRFLAC_FALSE;
+}
+
 
 static drflac_bool32 drflac_ogg__seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex)
 {
@@ -7543,7 +7472,7 @@ static drflac_bool32 drflac_ogg__seek_to_pcm_frame(drflac* pFlac, drflac_uint64
     runningGranulePosition = 0;
     for (;;) {
         if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) {
-            drflac_oggbs__seek_physical(oggbs, originalBytePos, drflac_seek_origin_start);
+            drflac_oggbs__seek_physical(oggbs, originalBytePos, DRFLAC_SEEK_SET);
             return DRFLAC_FALSE;   /* Never did find that sample... */
         }
 
@@ -7577,7 +7506,7 @@ static drflac_bool32 drflac_ogg__seek_to_pcm_frame(drflac* pFlac, drflac_uint64
     a new frame. This property means that after we've seeked to the page we can immediately start looping over frames until
     we find the one containing the target sample.
     */
-    if (!drflac_oggbs__seek_physical(oggbs, runningFrameBytePos, drflac_seek_origin_start)) {
+    if (!drflac_oggbs__seek_physical(oggbs, runningFrameBytePos, DRFLAC_SEEK_SET)) {
         return DRFLAC_FALSE;
     }
     if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) {
@@ -7744,7 +7673,7 @@ static drflac_bool32 drflac__init_private__ogg(drflac_init_info* pInit, drflac_r
                     The next 2 bytes are the non-audio packets, not including this one. We don't care about this because we're going to
                     be handling it in a generic way based on the serial number and packet types.
                     */
-                    if (!onSeek(pUserData, 2, drflac_seek_origin_current)) {
+                    if (!onSeek(pUserData, 2, DRFLAC_SEEK_CUR)) {
                         return DRFLAC_FALSE;
                     }
 
@@ -7801,18 +7730,18 @@ static drflac_bool32 drflac__init_private__ogg(drflac_init_info* pInit, drflac_r
                     }
                 } else {
                     /* Not a FLAC header. Skip it. */
-                    if (!onSeek(pUserData, bytesRemainingInPage, drflac_seek_origin_current)) {
+                    if (!onSeek(pUserData, bytesRemainingInPage, DRFLAC_SEEK_CUR)) {
                         return DRFLAC_FALSE;
                     }
                 }
             } else {
                 /* Not a FLAC header. Seek past the entire page and move on to the next. */
-                if (!onSeek(pUserData, bytesRemainingInPage, drflac_seek_origin_current)) {
+                if (!onSeek(pUserData, bytesRemainingInPage, DRFLAC_SEEK_CUR)) {
                     return DRFLAC_FALSE;
                 }
             }
         } else {
-            if (!onSeek(pUserData, pageBodySize, drflac_seek_origin_current)) {
+            if (!onSeek(pUserData, pageBodySize, DRFLAC_SEEK_CUR)) {
                 return DRFLAC_FALSE;
             }
         }
@@ -7837,18 +7766,19 @@ static drflac_bool32 drflac__init_private__ogg(drflac_init_info* pInit, drflac_r
 }
 #endif
 
-static drflac_bool32 drflac__init_private(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, void* pUserDataMD)
+static drflac_bool32 drflac__init_private(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, drflac_meta_proc onMeta, drflac_container container, void* pUserData, void* pUserDataMD)
 {
     drflac_bool32 relaxed;
     drflac_uint8 id[4];
 
-    if (pInit == NULL || onRead == NULL || onSeek == NULL) {
+    if (pInit == NULL || onRead == NULL || onSeek == NULL) {    /* <-- onTell is optional. */
         return DRFLAC_FALSE;
     }
 
     DRFLAC_ZERO_MEMORY(pInit, sizeof(*pInit));
     pInit->onRead       = onRead;
     pInit->onSeek       = onSeek;
+    pInit->onTell       = onTell;
     pInit->onMeta       = onMeta;
     pInit->container    = container;
     pInit->pUserData    = pUserData;
@@ -7856,6 +7786,7 @@ static drflac_bool32 drflac__init_private(drflac_init_info* pInit, drflac_read_p
 
     pInit->bs.onRead    = onRead;
     pInit->bs.onSeek    = onSeek;
+    pInit->bs.onTell    = onTell;
     pInit->bs.pUserData = pUserData;
     drflac__reset_cache(&pInit->bs);
 
@@ -7888,7 +7819,7 @@ static drflac_bool32 drflac__init_private(drflac_init_info* pInit, drflac_read_p
                 headerSize += 10;
             }
 
-            if (!onSeek(pUserData, headerSize, drflac_seek_origin_current)) {
+            if (!onSeek(pUserData, headerSize, DRFLAC_SEEK_CUR)) {
                 return DRFLAC_FALSE;    /* Failed to seek past the tag. */
             }
             pInit->runningFilePos += headerSize;
@@ -7940,9 +7871,9 @@ static void drflac__init_from_info(drflac* pFlac, const drflac_init_info* pInit)
 }
 
 
-static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, void* pUserDataMD, const drflac_allocation_callbacks* pAllocationCallbacks)
+static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, drflac_meta_proc onMeta, drflac_container container, void* pUserData, void* pUserDataMD, const drflac_allocation_callbacks* pAllocationCallbacks)
 {
-    drflac_init_info init;
+    static drflac_init_info init; // TUNE-FIX
     drflac_uint32 allocationSize;
     drflac_uint32 wholeSIMDVectorCountPerChannel;
     drflac_uint32 decodedSamplesAllocationSize;
@@ -7958,7 +7889,7 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac
     /* CPU support first. */
     drflac__init_cpu_caps();
 
-    if (!drflac__init_private(&init, onRead, onSeek, onMeta, container, pUserData, pUserDataMD)) {
+    if (!drflac__init_private(&init, onRead, onSeek, onTell, onMeta, container, pUserData, pUserDataMD)) {
         return NULL;
     }
 
@@ -8014,6 +7945,7 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac
         DRFLAC_ZERO_MEMORY(pOggbs, sizeof(*pOggbs));
         pOggbs->onRead = onRead;
         pOggbs->onSeek = onSeek;
+        pOggbs->onTell = onTell;
         pOggbs->pUserData = pUserData;
         pOggbs->currentBytePos = init.oggFirstBytePos;
         pOggbs->firstBytePos = init.oggFirstBytePos;
@@ -8034,17 +7966,19 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac
     if (init.hasMetadataBlocks) {
         drflac_read_proc onReadOverride = onRead;
         drflac_seek_proc onSeekOverride = onSeek;
+        drflac_tell_proc onTellOverride = onTell;
         void* pUserDataOverride = pUserData;
 
 #ifndef DR_FLAC_NO_OGG
         if (init.container == drflac_container_ogg) {
             onReadOverride = drflac__on_read_ogg;
             onSeekOverride = drflac__on_seek_ogg;
+            onTellOverride = drflac__on_tell_ogg;
             pUserDataOverride = (void*)pOggbs;
         }
 #endif
 
-        if (!drflac__read_and_decode_metadata(onReadOverride, onSeekOverride, onMeta, pUserDataOverride, pUserDataMD, &firstFramePos, &seektablePos, &seekpointCount, &allocationCallbacks)) {
+        if (!drflac__read_and_decode_metadata(onReadOverride, onSeekOverride, onTellOverride, onMeta, pUserDataOverride, pUserDataMD, &firstFramePos, &seektablePos, &seekpointCount, &allocationCallbacks)) {
         #ifndef DR_FLAC_NO_OGG
             drflac__free_from_callbacks(pOggbs, &allocationCallbacks);
         #endif
@@ -8079,6 +8013,7 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac
         /* The Ogg bistream needs to be layered on top of the original bitstream. */
         pFlac->bs.onRead = drflac__on_read_ogg;
         pFlac->bs.onSeek = drflac__on_seek_ogg;
+        pFlac->bs.onTell = drflac__on_tell_ogg;
         pFlac->bs.pUserData = (void*)pInternalOggbs;
         pFlac->_oggbs = (void*)pInternalOggbs;
     }
@@ -8105,7 +8040,7 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac
             DRFLAC_ASSERT(pFlac->bs.onRead != NULL);
 
             /* Seek to the seektable, then just read directly into our seektable buffer. */
-            if (pFlac->bs.onSeek(pFlac->bs.pUserData, (int)seektablePos, drflac_seek_origin_start)) {
+            if (pFlac->bs.onSeek(pFlac->bs.pUserData, (int)seektablePos, DRFLAC_SEEK_SET)) {
                 drflac_uint32 iSeekpoint;
 
                 for (iSeekpoint = 0; iSeekpoint < seekpointCount; iSeekpoint += 1) {
@@ -8123,7 +8058,7 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac
                 }
 
                 /* We need to seek back to where we were. If this fails it's a critical error. */
-                if (!pFlac->bs.onSeek(pFlac->bs.pUserData, (int)pFlac->firstFLACFramePosInBytes, drflac_seek_origin_start)) {
+                if (!pFlac->bs.onSeek(pFlac->bs.pUserData, (int)pFlac->firstFLACFramePosInBytes, DRFLAC_SEEK_SET)) {
                     drflac__free_from_callbacks(pFlac, &allocationCallbacks);
                     return NULL;
                 }
@@ -8294,7 +8229,7 @@ static drflac_result drflac_result_from_errno(int e)
     #ifdef ENOSYS
         case ENOSYS: return DRFLAC_NOT_IMPLEMENTED;
     #endif
-    #ifdef ENOTEMPTY
+    #if defined(ENOTEMPTY) && ENOTEMPTY != EEXIST   /* In AIX, ENOTEMPTY and EEXIST use the same value. */
         case ENOTEMPTY: return DRFLAC_DIRECTORY_NOT_EMPTY;
     #endif
     #ifdef ELOOP
@@ -8745,12 +8680,42 @@ static size_t drflac__on_read_stdio(void* pUserData, void* bufferOut, size_t byt
 
 static drflac_bool32 drflac__on_seek_stdio(void* pUserData, int offset, drflac_seek_origin origin)
 {
-    DRFLAC_ASSERT(offset >= 0);  /* <-- Never seek backwards. */
+    int whence = SEEK_SET;
+    if (origin == DRFLAC_SEEK_CUR) {
+        whence = SEEK_CUR;
+    } else if (origin == DRFLAC_SEEK_END) {
+        whence = SEEK_END;
+    }
 
-    return fseek((FILE*)pUserData, offset, (origin == drflac_seek_origin_current) ? SEEK_CUR : SEEK_SET) == 0;
+    return fseek((FILE*)pUserData, offset, whence) == 0;
+}
+
+static drflac_bool32 drflac__on_tell_stdio(void* pUserData, drflac_int64* pCursor)
+{
+    FILE* pFileStdio = (FILE*)pUserData;
+    drflac_int64 result;
+
+    /* These were all validated at a higher level. */
+    DRFLAC_ASSERT(pFileStdio != NULL);
+    DRFLAC_ASSERT(pCursor    != NULL);
+
+#if defined(_WIN32)
+    #if defined(_MSC_VER) && _MSC_VER > 1200
+        result = _ftelli64(pFileStdio);
+    #else
+        result = ftell(pFileStdio);
+    #endif
+#else
+    result = ftell(pFileStdio);
+#endif
+
+    *pCursor = result;
+
+    return DRFLAC_TRUE;
 }
 
 
+
 DRFLAC_API drflac* drflac_open_file(const char* pFileName, const drflac_allocation_callbacks* pAllocationCallbacks)
 {
     drflac* pFlac;
@@ -8760,7 +8725,7 @@ DRFLAC_API drflac* drflac_open_file(const char* pFileName, const drflac_allocati
         return NULL;
     }
 
-    pFlac = drflac_open(drflac__on_read_stdio, drflac__on_seek_stdio, (void*)pFile, pAllocationCallbacks);
+    pFlac = drflac_open(drflac__on_read_stdio, drflac__on_seek_stdio, drflac__on_tell_stdio, (void*)pFile, pAllocationCallbacks);
     if (pFlac == NULL) {
         fclose(pFile);
         return NULL;
@@ -8779,7 +8744,7 @@ DRFLAC_API drflac* drflac_open_file_w(const wchar_t* pFileName, const drflac_all
         return NULL;
     }
 
-    pFlac = drflac_open(drflac__on_read_stdio, drflac__on_seek_stdio, (void*)pFile, pAllocationCallbacks);
+    pFlac = drflac_open(drflac__on_read_stdio, drflac__on_seek_stdio, drflac__on_tell_stdio, (void*)pFile, pAllocationCallbacks);
     if (pFlac == NULL) {
         fclose(pFile);
         return NULL;
@@ -8798,7 +8763,7 @@ DRFLAC_API drflac* drflac_open_file_with_metadata(const char* pFileName, drflac_
         return NULL;
     }
 
-    pFlac = drflac_open_with_metadata_private(drflac__on_read_stdio, drflac__on_seek_stdio, onMeta, drflac_container_unknown, (void*)pFile, pUserData, pAllocationCallbacks);
+    pFlac = drflac_open_with_metadata_private(drflac__on_read_stdio, drflac__on_seek_stdio, drflac__on_tell_stdio, onMeta, drflac_container_unknown, (void*)pFile, pUserData, pAllocationCallbacks);
     if (pFlac == NULL) {
         fclose(pFile);
         return pFlac;
@@ -8817,7 +8782,7 @@ DRFLAC_API drflac* drflac_open_file_with_metadata_w(const wchar_t* pFileName, dr
         return NULL;
     }
 
-    pFlac = drflac_open_with_metadata_private(drflac__on_read_stdio, drflac__on_seek_stdio, onMeta, drflac_container_unknown, (void*)pFile, pUserData, pAllocationCallbacks);
+    pFlac = drflac_open_with_metadata_private(drflac__on_read_stdio, drflac__on_seek_stdio, drflac__on_tell_stdio, onMeta, drflac_container_unknown, (void*)pFile, pUserData, pAllocationCallbacks);
     if (pFlac == NULL) {
         fclose(pFile);
         return pFlac;
@@ -8852,31 +8817,48 @@ static size_t drflac__on_read_memory(void* pUserData, void* bufferOut, size_t by
 static drflac_bool32 drflac__on_seek_memory(void* pUserData, int offset, drflac_seek_origin origin)
 {
     drflac__memory_stream* memoryStream = (drflac__memory_stream*)pUserData;
+    drflac_int64 newCursor;
 
     DRFLAC_ASSERT(memoryStream != NULL);
-    DRFLAC_ASSERT(offset >= 0); /* <-- Never seek backwards. */
 
-    if (offset > (drflac_int64)memoryStream->dataSize) {
+    newCursor = memoryStream->currentReadPos;
+
+    if (origin == DRFLAC_SEEK_SET) {
+        newCursor = 0;
+    } else if (origin == DRFLAC_SEEK_CUR) {
+        newCursor = (drflac_int64)memoryStream->currentReadPos;
+    } else if (origin == DRFLAC_SEEK_END) {
+        newCursor = (drflac_int64)memoryStream->dataSize;
+    } else {
+        DRFLAC_ASSERT(!"Invalid seek origin");
         return DRFLAC_FALSE;
     }
 
-    if (origin == drflac_seek_origin_current) {
-        if (memoryStream->currentReadPos + offset <= memoryStream->dataSize) {
-            memoryStream->currentReadPos += offset;
-        } else {
-            return DRFLAC_FALSE;  /* Trying to seek too far forward. */
-        }
-    } else {
-        if ((drflac_uint32)offset <= memoryStream->dataSize) {
-            memoryStream->currentReadPos = offset;
-        } else {
-            return DRFLAC_FALSE;  /* Trying to seek too far forward. */
-        }
+    newCursor += offset;
+
+    if (newCursor < 0) {
+        return DRFLAC_FALSE;  /* Trying to seek prior to the start of the buffer. */
     }
+    if ((size_t)newCursor > memoryStream->dataSize) {
+        return DRFLAC_FALSE;  /* Trying to seek beyond the end of the buffer. */
+    }
+
+    memoryStream->currentReadPos = (size_t)newCursor;
 
     return DRFLAC_TRUE;
 }
 
+static drflac_bool32 drflac__on_tell_memory(void* pUserData, drflac_int64* pCursor)
+{
+    drflac__memory_stream* memoryStream = (drflac__memory_stream*)pUserData;
+
+    DRFLAC_ASSERT(memoryStream != NULL);
+    DRFLAC_ASSERT(pCursor != NULL);
+
+    *pCursor = (drflac_int64)memoryStream->currentReadPos;
+    return DRFLAC_TRUE;
+}
+
 DRFLAC_API drflac* drflac_open_memory(const void* pData, size_t dataSize, const drflac_allocation_callbacks* pAllocationCallbacks)
 {
     drflac__memory_stream memoryStream;
@@ -8885,7 +8867,7 @@ DRFLAC_API drflac* drflac_open_memory(const void* pData, size_t dataSize, const
     memoryStream.data = (const drflac_uint8*)pData;
     memoryStream.dataSize = dataSize;
     memoryStream.currentReadPos = 0;
-    pFlac = drflac_open(drflac__on_read_memory, drflac__on_seek_memory, &memoryStream, pAllocationCallbacks);
+    pFlac = drflac_open(drflac__on_read_memory, drflac__on_seek_memory, drflac__on_tell_memory, &memoryStream, pAllocationCallbacks);
     if (pFlac == NULL) {
         return NULL;
     }
@@ -8916,7 +8898,7 @@ DRFLAC_API drflac* drflac_open_memory_with_metadata(const void* pData, size_t da
     memoryStream.data = (const drflac_uint8*)pData;
     memoryStream.dataSize = dataSize;
     memoryStream.currentReadPos = 0;
-    pFlac = drflac_open_with_metadata_private(drflac__on_read_memory, drflac__on_seek_memory, onMeta, drflac_container_unknown, &memoryStream, pUserData, pAllocationCallbacks);
+    pFlac = drflac_open_with_metadata_private(drflac__on_read_memory, drflac__on_seek_memory, drflac__on_tell_memory, onMeta, drflac_container_unknown, &memoryStream, pUserData, pAllocationCallbacks);
     if (pFlac == NULL) {
         return NULL;
     }
@@ -8941,22 +8923,22 @@ DRFLAC_API drflac* drflac_open_memory_with_metadata(const void* pData, size_t da
 
 
 
-DRFLAC_API drflac* drflac_open(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
+DRFLAC_API drflac* drflac_open(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
 {
-    return drflac_open_with_metadata_private(onRead, onSeek, NULL, drflac_container_unknown, pUserData, pUserData, pAllocationCallbacks);
+    return drflac_open_with_metadata_private(onRead, onSeek, onTell, NULL, drflac_container_unknown, pUserData, pUserData, pAllocationCallbacks);
 }
-DRFLAC_API drflac* drflac_open_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
+DRFLAC_API drflac* drflac_open_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
 {
-    return drflac_open_with_metadata_private(onRead, onSeek, NULL, container, pUserData, pUserData, pAllocationCallbacks);
+    return drflac_open_with_metadata_private(onRead, onSeek, onTell, NULL, container, pUserData, pUserData, pAllocationCallbacks);
 }
 
-DRFLAC_API drflac* drflac_open_with_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
+DRFLAC_API drflac* drflac_open_with_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
 {
-    return drflac_open_with_metadata_private(onRead, onSeek, onMeta, drflac_container_unknown, pUserData, pUserData, pAllocationCallbacks);
+    return drflac_open_with_metadata_private(onRead, onSeek, onTell, onMeta, drflac_container_unknown, pUserData, pUserData, pAllocationCallbacks);
 }
-DRFLAC_API drflac* drflac_open_with_metadata_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
+DRFLAC_API drflac* drflac_open_with_metadata_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, drflac_meta_proc onMeta, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
 {
-    return drflac_open_with_metadata_private(onRead, onSeek, onMeta, container, pUserData, pUserData, pAllocationCallbacks);
+    return drflac_open_with_metadata_private(onRead, onSeek, onTell, onMeta, container, pUserData, pUserData, pAllocationCallbacks);
 }
 
 DRFLAC_API void drflac_close(drflac* pFlac)
@@ -11788,7 +11770,7 @@ DRFLAC_DEFINE_FULL_READ_AND_CLOSE(s32, drflac_int32)
 DRFLAC_DEFINE_FULL_READ_AND_CLOSE(s16, drflac_int16)
 DRFLAC_DEFINE_FULL_READ_AND_CLOSE(f32, float)
 
-DRFLAC_API drflac_int32* drflac_open_and_read_pcm_frames_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks)
+DRFLAC_API drflac_int32* drflac_open_and_read_pcm_frames_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks)
 {
     drflac* pFlac;
 
@@ -11802,7 +11784,7 @@ DRFLAC_API drflac_int32* drflac_open_and_read_pcm_frames_s32(drflac_read_proc on
         *totalPCMFrameCountOut = 0;
     }
 
-    pFlac = drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks);
+    pFlac = drflac_open(onRead, onSeek, onTell, pUserData, pAllocationCallbacks);
     if (pFlac == NULL) {
         return NULL;
     }
@@ -11810,7 +11792,7 @@ DRFLAC_API drflac_int32* drflac_open_and_read_pcm_frames_s32(drflac_read_proc on
     return drflac__full_read_and_close_s32(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut);
 }
 
-DRFLAC_API drflac_int16* drflac_open_and_read_pcm_frames_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks)
+DRFLAC_API drflac_int16* drflac_open_and_read_pcm_frames_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks)
 {
     drflac* pFlac;
 
@@ -11824,7 +11806,7 @@ DRFLAC_API drflac_int16* drflac_open_and_read_pcm_frames_s16(drflac_read_proc on
         *totalPCMFrameCountOut = 0;
     }
 
-    pFlac = drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks);
+    pFlac = drflac_open(onRead, onSeek, onTell, pUserData, pAllocationCallbacks);
     if (pFlac == NULL) {
         return NULL;
     }
@@ -11832,7 +11814,7 @@ DRFLAC_API drflac_int16* drflac_open_and_read_pcm_frames_s16(drflac_read_proc on
     return drflac__full_read_and_close_s16(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut);
 }
 
-DRFLAC_API float* drflac_open_and_read_pcm_frames_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks)
+DRFLAC_API float* drflac_open_and_read_pcm_frames_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks)
 {
     drflac* pFlac;
 
@@ -11846,7 +11828,7 @@ DRFLAC_API float* drflac_open_and_read_pcm_frames_f32(drflac_read_proc onRead, d
         *totalPCMFrameCountOut = 0;
     }
 
-    pFlac = drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks);
+    pFlac = drflac_open(onRead, onSeek, onTell, pUserData, pAllocationCallbacks);
     if (pFlac == NULL) {
         return NULL;
     }
@@ -12095,6 +12077,22 @@ DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterat
 /*
 REVISION HISTORY
 ================
+v0.13.0 - TBD
+  - API CHANGE: Seek origin enums have been renamed to match the naming convention used by other dr_libs libraries:
+    - drflac_seek_origin_start   -> DRFLAC_SEEK_SET
+    - drflac_seek_origin_current -> DRFLAC_SEEK_CUR
+    - DRFLAC_SEEK_END (new)
+  - API CHANGE: A new seek origin has been added to allow seeking from the end of the file. If you implement your own `onSeek` callback, you should now detect and handle `DRFLAC_SEEK_END`. If seeking to the end is not supported, return `DRFLAC_FALSE`. If you only use `*_open_file()` or `*_open_memory()`, you need not change anything.
+  - API CHANGE: An `onTell` callback has been added to the following functions:
+    - drflac_open()
+    - drflac_open_relaxed()
+    - drflac_open_with_metadata()
+    - drflac_open_with_metadata_relaxed()
+    - drflac_open_and_read_pcm_frames_s32()
+    - drflac_open_and_read_pcm_frames_s16()
+    - drflac_open_and_read_pcm_frames_f32()
+  - Fix compilation for AIX OS.
+
 v0.12.43 - 2024-12-17
   - Fix a possible buffer overflow during decoding.
   - Improve detection of ARM64EC
diff --git a/sys-tune/source/impl/dr_mp3.h b/sys-tune/source/impl/dr_mp3.h
index aee3cef..170efcc 100644
--- a/sys-tune/source/impl/dr_mp3.h
+++ b/sys-tune/source/impl/dr_mp3.h
@@ -1,6 +1,6 @@
 /*
 MP3 audio decoder. Choice of public domain or MIT-0. See license statements at the end of this file.
-dr_mp3 - v0.6.40 - 2024-12-17
+dr_mp3 - v0.7.0 - TBD
 
 David Reid - mackron@gmail.com
 
@@ -9,29 +9,6 @@ GitHub: https://github.com/mackron/dr_libs
 Based on minimp3 (https://github.com/lieff/minimp3) which is where the real work was done. See the bottom of this file for differences between minimp3 and dr_mp3.
 */
 
-/*
-RELEASE NOTES - VERSION 0.6
-===========================
-Version 0.6 includes breaking changes with the configuration of decoders. The ability to customize the number of output channels and the sample rate has been
-removed. You must now use the channel count and sample rate reported by the MP3 stream itself, and all channel and sample rate conversion must be done
-yourself.
-
-
-Changes to Initialization
--------------------------
-Previously, `drmp3_init()`, etc. took a pointer to a `drmp3_config` object that allowed you to customize the output channels and sample rate. This has been
-removed. If you need the old behaviour you will need to convert the data yourself or just not upgrade. The following APIs have changed.
-
-    `drmp3_init()`
-    `drmp3_init_memory()`
-    `drmp3_init_file()`
-
-
-Miscellaneous Changes
----------------------
-Support for loading a file from a `wchar_t` string has been added via the `drmp3_init_file_w()` API.
-*/
-
 /*
 Introduction
 =============
@@ -94,8 +71,8 @@ extern "C" {
 #define DRMP3_XSTRINGIFY(x)     DRMP3_STRINGIFY(x)
 
 #define DRMP3_VERSION_MAJOR     0
-#define DRMP3_VERSION_MINOR     6
-#define DRMP3_VERSION_REVISION  40
+#define DRMP3_VERSION_MINOR     7
+#define DRMP3_VERSION_REVISION  0
 #define DRMP3_VERSION_STRING    DRMP3_XSTRINGIFY(DRMP3_VERSION_MAJOR) "." DRMP3_XSTRINGIFY(DRMP3_VERSION_MINOR) "." DRMP3_XSTRINGIFY(DRMP3_VERSION_REVISION)
 
 #include <stddef.h> /* For size_t. */
@@ -133,6 +110,9 @@ typedef drmp3_uint8             drmp3_bool8;
 typedef drmp3_uint32            drmp3_bool32;
 #define DRMP3_TRUE              1
 #define DRMP3_FALSE             0
+
+/* Weird shifting syntax is for VC6 compatibility. */
+#define DRMP3_UINT64_MAX        (((drmp3_uint64)0xFFFFFFFF << 32) | (drmp3_uint64)0xFFFFFFFF)
 /* End Sized Types */
 
 /* Decorations */
@@ -154,7 +134,7 @@ typedef drmp3_uint32            drmp3_bool32;
             #endif
         #endif
 
-        #if defined(DR_MP3_IMPLEMENTATION) || defined(DRMP3_IMPLEMENTATION)
+        #if defined(DR_MP3_IMPLEMENTATION)
             #define DRMP3_API  DRMP3_DLL_EXPORT
         #else
             #define DRMP3_API  DRMP3_DLL_IMPORT
@@ -279,7 +259,7 @@ Low Level Push API
 */
 typedef struct
 {
-    int frame_bytes, channels, hz, layer, bitrate_kbps;
+    int frame_bytes, channels, sample_rate, layer, bitrate_kbps;
 } drmp3dec_frame_info;
 
 typedef struct
@@ -306,8 +286,9 @@ Main API (Pull API)
 */
 typedef enum
 {
-    drmp3_seek_origin_start,
-    drmp3_seek_origin_current
+    DRMP3_SEEK_SET,
+    DRMP3_SEEK_CUR,
+    DRMP3_SEEK_END
 } drmp3_seek_origin;
 
 typedef struct
@@ -318,10 +299,27 @@ typedef struct
     drmp3_uint16 pcmFramesToDiscard;    /* The number of leading samples to read and discard. These are discarded after mp3FramesToDiscard. */
 } drmp3_seek_point;
 
+typedef enum
+{
+    DRMP3_METADATA_TYPE_ID3V1,
+    DRMP3_METADATA_TYPE_ID3V2,
+    DRMP3_METADATA_TYPE_APE,
+    DRMP3_METADATA_TYPE_XING,
+    DRMP3_METADATA_TYPE_VBRI
+} drmp3_metadata_type;
+
+typedef struct
+{
+    drmp3_metadata_type type;
+    const void* pRawData;               /* A pointer to the raw data. */
+    size_t rawDataSize;
+} drmp3_metadata;
+
+
 /*
 Callback for when data is read. Return value is the number of bytes actually read.
 
-pUserData   [in]  The user data that was passed to drmp3_init(), drmp3_open() and family.
+pUserData   [in]  The user data that was passed to drmp3_init(), and family.
 pBufferOut  [out] The output buffer.
 bytesToRead [in]  The number of bytes to read.
 
@@ -335,17 +333,33 @@ typedef size_t (* drmp3_read_proc)(void* pUserData, void* pBufferOut, size_t byt
 /*
 Callback for when data needs to be seeked.
 
-pUserData [in] The user data that was passed to drmp3_init(), drmp3_open() and family.
-offset    [in] The number of bytes to move, relative to the origin. Will never be negative.
-origin    [in] The origin of the seek - the current position or the start of the stream.
+pUserData [in] The user data that was passed to drmp3_init(), and family.
+offset    [in] The number of bytes to move, relative to the origin. Can be negative.
+origin    [in] The origin of the seek.
 
 Returns whether or not the seek was successful.
-
-Whether or not it is relative to the beginning or current position is determined by the "origin" parameter which
-will be either drmp3_seek_origin_start or drmp3_seek_origin_current.
 */
 typedef drmp3_bool32 (* drmp3_seek_proc)(void* pUserData, int offset, drmp3_seek_origin origin);
 
+/*
+Callback for retrieving the current cursor position.
+
+pUserData [in]  The user data that was passed to drmp3_init(), and family.
+pCursor   [out] The cursor position in bytes from the start of the stream.
+
+Returns whether or not the cursor position was successfully retrieved.
+*/
+typedef drmp3_bool32 (* drmp3_tell_proc)(void* pUserData, drmp3_int64* pCursor);
+
+
+/*
+Callback for when metadata is read.
+
+Only the raw data is provided. The client is responsible for parsing the contents of the data themsevles.
+*/
+typedef void (* drmp3_meta_proc)(void* pUserData, const drmp3_metadata* pMetadata);
+
+
 typedef struct
 {
     drmp3_uint32 channels;
@@ -359,22 +373,31 @@ typedef struct
     drmp3_uint32 sampleRate;
     drmp3_read_proc onRead;
     drmp3_seek_proc onSeek;
+    drmp3_meta_proc onMeta;
     void* pUserData;
+    void* pUserDataMeta;
     drmp3_allocation_callbacks allocationCallbacks;
     drmp3_uint32 mp3FrameChannels;      /* The number of channels in the currently loaded MP3 frame. Internal use only. */
     drmp3_uint32 mp3FrameSampleRate;    /* The sample rate of the currently loaded MP3 frame. Internal use only. */
     drmp3_uint32 pcmFramesConsumedInMP3Frame;
     drmp3_uint32 pcmFramesRemainingInMP3Frame;
     drmp3_uint8 pcmFrames[sizeof(float)*DRMP3_MAX_SAMPLES_PER_FRAME];  /* <-- Multipled by sizeof(float) to ensure there's enough room for DR_MP3_FLOAT_OUTPUT. */
-    drmp3_uint64 currentPCMFrame;       /* The current PCM frame, globally, based on the output sample rate. Mainly used for seeking. */
+    drmp3_uint64 currentPCMFrame;       /* The current PCM frame, globally. */
     drmp3_uint64 streamCursor;          /* The current byte the decoder is sitting on in the raw stream. */
+    drmp3_uint64 streamLength;          /* The length of the stream in bytes. dr_mp3 will not read beyond this. If a ID3v1 or APE tag is present, this will be set to the first byte of the tag. */
+    drmp3_uint64 streamStartOffset;     /* The offset of the start of the MP3 data. This is used for skipping ID3v2 and VBR tags. */
     drmp3_seek_point* pSeekPoints;      /* NULL by default. Set with drmp3_bind_seek_table(). Memory is owned by the client. dr_mp3 will never attempt to free this pointer. */
     drmp3_uint32 seekPointCount;        /* The number of items in pSeekPoints. When set to 0 assumes to no seek table. Defaults to zero. */
+    drmp3_uint32 delayInPCMFrames;
+    drmp3_uint32 paddingInPCMFrames;
+    drmp3_uint64 totalPCMFrameCount;    /* Set to DRMP3_UINT64_MAX if the length is unknown. Includes delay and padding. */
+    drmp3_bool32 isVBR;
+    drmp3_bool32 isCBR;
     size_t dataSize;
     size_t dataCapacity;
     size_t dataConsumed;
     drmp3_uint8* pData;
-    drmp3_bool32 atEnd : 1;
+    drmp3_bool32 atEnd;
     struct
     {
         const drmp3_uint8* pData;
@@ -388,6 +411,7 @@ Initializes an MP3 decoder.
 
 onRead    [in]           The function to call when data needs to be read from the client.
 onSeek    [in]           The function to call when the read position of the client data needs to move.
+onTell    [in]           The function to call when the read position of the client data needs to be retrieved.
 pUserData [in, optional] A pointer to application defined data that will be passed to onRead and onSeek.
 
 Returns true if successful; false otherwise.
@@ -396,7 +420,7 @@ Close the loader with drmp3_uninit().
 
 See also: drmp3_init_file(), drmp3_init_memory(), drmp3_uninit()
 */
-DRMP3_API drmp3_bool32 drmp3_init(drmp3* pMP3, drmp3_read_proc onRead, drmp3_seek_proc onSeek, void* pUserData, const drmp3_allocation_callbacks* pAllocationCallbacks);
+DRMP3_API drmp3_bool32 drmp3_init(drmp3* pMP3, drmp3_read_proc onRead, drmp3_seek_proc onSeek, drmp3_tell_proc onTell, drmp3_meta_proc onMeta, void* pUserData, const drmp3_allocation_callbacks* pAllocationCallbacks);
 
 /*
 Initializes an MP3 decoder from a block of memory.
@@ -406,6 +430,7 @@ the lifetime of the drmp3 object.
 
 The buffer should contain the contents of the entire MP3 file.
 */
+DRMP3_API drmp3_bool32 drmp3_init_memory_with_metadata(drmp3* pMP3, const void* pData, size_t dataSize, drmp3_meta_proc onMeta, void* pUserDataMeta, const drmp3_allocation_callbacks* pAllocationCallbacks);
 DRMP3_API drmp3_bool32 drmp3_init_memory(drmp3* pMP3, const void* pData, size_t dataSize, const drmp3_allocation_callbacks* pAllocationCallbacks);
 
 #ifndef DR_MP3_NO_STDIO
@@ -416,6 +441,9 @@ This holds the internal FILE object until drmp3_uninit() is called. Keep this in
 objects because the operating system may restrict the number of file handles an application can have open at
 any given time.
 */
+DRMP3_API drmp3_bool32 drmp3_init_file_with_metadata(drmp3* pMP3, const char* pFilePath, drmp3_meta_proc onMeta, void* pUserDataMeta, const drmp3_allocation_callbacks* pAllocationCallbacks);
+DRMP3_API drmp3_bool32 drmp3_init_file_with_metadata_w(drmp3* pMP3, const wchar_t* pFilePath, drmp3_meta_proc onMeta, void* pUserDataMeta, const drmp3_allocation_callbacks* pAllocationCallbacks);
+
 DRMP3_API drmp3_bool32 drmp3_init_file(drmp3* pMP3, const char* pFilePath, const drmp3_allocation_callbacks* pAllocationCallbacks);
 DRMP3_API drmp3_bool32 drmp3_init_file_w(drmp3* pMP3, const wchar_t* pFilePath, const drmp3_allocation_callbacks* pAllocationCallbacks);
 #endif
@@ -495,8 +523,8 @@ On output pConfig will receive the channel count and sample rate of the stream.
 
 Free the returned pointer with drmp3_free().
 */
-DRMP3_API float* drmp3_open_and_read_pcm_frames_f32(drmp3_read_proc onRead, drmp3_seek_proc onSeek, void* pUserData, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks);
-DRMP3_API drmp3_int16* drmp3_open_and_read_pcm_frames_s16(drmp3_read_proc onRead, drmp3_seek_proc onSeek, void* pUserData, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks);
+DRMP3_API float* drmp3_open_and_read_pcm_frames_f32(drmp3_read_proc onRead, drmp3_seek_proc onSeek, drmp3_tell_proc onTell, void* pUserData, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks);
+DRMP3_API drmp3_int16* drmp3_open_and_read_pcm_frames_s16(drmp3_read_proc onRead, drmp3_seek_proc onSeek, drmp3_tell_proc onTell, void* pUserData, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks);
 
 DRMP3_API float* drmp3_open_memory_and_read_pcm_frames_f32(const void* pData, size_t dataSize, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks);
 DRMP3_API drmp3_int16* drmp3_open_memory_and_read_pcm_frames_s16(const void* pData, size_t dataSize, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks);
@@ -529,7 +557,7 @@ DRMP3_API void drmp3_free(void* p, const drmp3_allocation_callbacks* pAllocation
 
  ************************************************************************************************************************************************************
  ************************************************************************************************************************************************************/
-#if defined(DR_MP3_IMPLEMENTATION) || defined(DRMP3_IMPLEMENTATION)
+#if defined(DR_MP3_IMPLEMENTATION)
 #ifndef dr_mp3_c
 #define dr_mp3_c
 
@@ -2271,7 +2299,7 @@ DRMP3_API int drmp3dec_decode_frame(drmp3dec *dec, const drmp3_uint8 *mp3, int m
     int i = 0, igr, frame_size = 0, success = 1;
     const drmp3_uint8 *hdr;
     drmp3_bs bs_frame[1];
-    drmp3dec_scratch scratch;
+    static drmp3dec_scratch scratch; // TUNE-FIX
 
     if (mp3_bytes > 4 && dec->header[0] == 0xff && drmp3_hdr_compare(dec->header, mp3))
     {
@@ -2296,7 +2324,7 @@ DRMP3_API int drmp3dec_decode_frame(drmp3dec *dec, const drmp3_uint8 *mp3, int m
     DRMP3_COPY_MEMORY(dec->header, hdr, DRMP3_HDR_SIZE);
     info->frame_bytes = i + frame_size;
     info->channels = DRMP3_HDR_IS_MONO(hdr) ? 1 : 2;
-    info->hz = drmp3_hdr_sample_rate_hz(hdr);
+    info->sample_rate = drmp3_hdr_sample_rate_hz(hdr);
     info->layer = 4 - DRMP3_HDR_GET_LAYER(hdr);
     info->bitrate_kbps = drmp3_hdr_bitrate_kbps(hdr);
 
@@ -2589,22 +2617,56 @@ static drmp3_allocation_callbacks drmp3_copy_allocation_callbacks_or_defaults(co
 
 static size_t drmp3__on_read(drmp3* pMP3, void* pBufferOut, size_t bytesToRead)
 {
-    size_t bytesRead = pMP3->onRead(pMP3->pUserData, pBufferOut, bytesToRead);
+    size_t bytesRead;
+
+    DRMP3_ASSERT(pMP3         != NULL);
+    DRMP3_ASSERT(pMP3->onRead != NULL);
+
+    /*
+    Don't try reading 0 bytes from the callback. This can happen when the stream is clamped against
+    ID3v1 or APE tags at the end of the stream.
+    */
+    if (bytesToRead == 0) {
+        return 0;
+    }
+
+    bytesRead = pMP3->onRead(pMP3->pUserData, pBufferOut, bytesToRead);
     pMP3->streamCursor += bytesRead;
+
     return bytesRead;
 }
 
+static size_t drmp3__on_read_clamped(drmp3* pMP3, void* pBufferOut, size_t bytesToRead)
+{
+    DRMP3_ASSERT(pMP3         != NULL);
+    DRMP3_ASSERT(pMP3->onRead != NULL);
+
+    if (pMP3->streamLength == DRMP3_UINT64_MAX) {
+        return drmp3__on_read(pMP3, pBufferOut, bytesToRead);
+    } else {
+        drmp3_uint64 bytesRemaining;
+
+        bytesRemaining = (pMP3->streamLength - pMP3->streamCursor);
+        if (bytesToRead >         bytesRemaining) {
+            bytesToRead = (size_t)bytesRemaining;
+        }
+
+        return drmp3__on_read(pMP3, pBufferOut, bytesToRead);
+    }
+}
+
 static drmp3_bool32 drmp3__on_seek(drmp3* pMP3, int offset, drmp3_seek_origin origin)
 {
     DRMP3_ASSERT(offset >= 0);
+    DRMP3_ASSERT(origin == DRMP3_SEEK_SET || origin == DRMP3_SEEK_CUR);
 
     if (!pMP3->onSeek(pMP3->pUserData, offset, origin)) {
         return DRMP3_FALSE;
     }
 
-    if (origin == drmp3_seek_origin_start) {
+    if (origin == DRMP3_SEEK_SET) {
         pMP3->streamCursor = (drmp3_uint64)offset;
-    } else {
+    } else{
         pMP3->streamCursor += offset;
     }
 
@@ -2617,21 +2679,20 @@ static drmp3_bool32 drmp3__on_seek_64(drmp3* pMP3, drmp3_uint64 offset, drmp3_se
         return drmp3__on_seek(pMP3, (int)offset, origin);
     }
 
-
     /* Getting here "offset" is too large for a 32-bit integer. We just keep seeking forward until we hit the offset. */
-    if (!drmp3__on_seek(pMP3, 0x7FFFFFFF, drmp3_seek_origin_start)) {
+    if (!drmp3__on_seek(pMP3, 0x7FFFFFFF, DRMP3_SEEK_SET)) {
         return DRMP3_FALSE;
     }
 
     offset -= 0x7FFFFFFF;
     while (offset > 0) {
         if (offset <= 0x7FFFFFFF) {
-            if (!drmp3__on_seek(pMP3, (int)offset, drmp3_seek_origin_current)) {
+            if (!drmp3__on_seek(pMP3, (int)offset, DRMP3_SEEK_CUR)) {
                 return DRMP3_FALSE;
             }
             offset = 0;
         } else {
-            if (!drmp3__on_seek(pMP3, 0x7FFFFFFF, drmp3_seek_origin_current)) {
+            if (!drmp3__on_seek(pMP3, 0x7FFFFFFF, DRMP3_SEEK_CUR)) {
                 return DRMP3_FALSE;
             }
             offset -= 0x7FFFFFFF;
@@ -2641,8 +2702,22 @@ static drmp3_bool32 drmp3__on_seek_64(drmp3* pMP3, drmp3_uint64 offset, drmp3_se
     return DRMP3_TRUE;
 }
 
+static void drmp3__on_meta(drmp3* pMP3, drmp3_metadata_type type, const void* pRawData, size_t rawDataSize)
+{
+    if (pMP3->onMeta) {
+        drmp3_metadata metadata;
+
+        DRMP3_ZERO_OBJECT(&metadata);
+        metadata.type        = type;
+        metadata.pRawData    = pRawData;
+        metadata.rawDataSize = rawDataSize;
+
+        pMP3->onMeta(pMP3->pUserDataMeta, &metadata);
+    }
+}
+
 
-static drmp3_uint32 drmp3_decode_next_frame_ex__callbacks(drmp3* pMP3, drmp3d_sample_t* pPCMFrames)
+static drmp3_uint32 drmp3_decode_next_frame_ex__callbacks(drmp3* pMP3, drmp3d_sample_t* pPCMFrames, drmp3dec_frame_info* pMP3FrameInfo, const drmp3_uint8** ppMP3FrameData)
 {
     drmp3_uint32 pcmFramesRead = 0;
 
@@ -2682,7 +2757,7 @@ static drmp3_uint32 drmp3_decode_next_frame_ex__callbacks(drmp3* pMP3, drmp3d_sa
                 pMP3->dataCapacity = newDataCap;
             }
 
-            bytesRead = drmp3__on_read(pMP3, pMP3->pData + pMP3->dataSize, (pMP3->dataCapacity - pMP3->dataSize));
+            bytesRead = drmp3__on_read_clamped(pMP3, pMP3->pData + pMP3->dataSize, (pMP3->dataCapacity - pMP3->dataSize));
             if (bytesRead == 0) {
                 if (pMP3->dataSize == 0) {
                     pMP3->atEnd = DRMP3_TRUE;
@@ -2709,10 +2784,8 @@ static drmp3_uint32 drmp3_decode_next_frame_ex__callbacks(drmp3* pMP3, drmp3d_sa
         pcmFramesRead = drmp3dec_decode_frame(&pMP3->decoder, pMP3->pData + pMP3->dataConsumed, (int)pMP3->dataSize, pPCMFrames, &info);    /* <-- Safe size_t -> int conversion thanks to the check above. */
 
         /* Consume the data. */
-        if (info.frame_bytes > 0) {
-            pMP3->dataConsumed += (size_t)info.frame_bytes;
-            pMP3->dataSize     -= (size_t)info.frame_bytes;
-        }
+        pMP3->dataConsumed += (size_t)info.frame_bytes;
+        pMP3->dataSize     -= (size_t)info.frame_bytes;
 
         /* pcmFramesRead will be equal to 0 if decoding failed. If it is zero and info.frame_bytes > 0 then we have successfully decoded the frame. */
         if (pcmFramesRead > 0) {
@@ -2720,7 +2793,16 @@ static drmp3_uint32 drmp3_decode_next_frame_ex__callbacks(drmp3* pMP3, drmp3d_sa
             pMP3->pcmFramesConsumedInMP3Frame = 0;
             pMP3->pcmFramesRemainingInMP3Frame = pcmFramesRead;
             pMP3->mp3FrameChannels = info.channels;
-            pMP3->mp3FrameSampleRate = info.hz;
+            pMP3->mp3FrameSampleRate = info.sample_rate;
+
+            if (pMP3FrameInfo != NULL) {
+                *pMP3FrameInfo = info;
+            }
+
+            if (ppMP3FrameData != NULL) {
+                *ppMP3FrameData = pMP3->pData + pMP3->dataConsumed - (size_t)info.frame_bytes;
+            }
+
             break;
         } else if (info.frame_bytes == 0) {
             /* Need more data. minimp3 recommends doing data submission in 16K chunks. */
@@ -2747,7 +2829,7 @@ static drmp3_uint32 drmp3_decode_next_frame_ex__callbacks(drmp3* pMP3, drmp3d_sa
             }
 
             /* Fill in a chunk. */
-            bytesRead = drmp3__on_read(pMP3, pMP3->pData + pMP3->dataSize, (pMP3->dataCapacity - pMP3->dataSize));
+            bytesRead = drmp3__on_read_clamped(pMP3, pMP3->pData + pMP3->dataSize, (pMP3->dataCapacity - pMP3->dataSize));
             if (bytesRead == 0) {
                 pMP3->atEnd = DRMP3_TRUE;
                 return 0; /* Error reading more data. */
@@ -2760,7 +2842,7 @@ static drmp3_uint32 drmp3_decode_next_frame_ex__callbacks(drmp3* pMP3, drmp3d_sa
     return pcmFramesRead;
 }
 
-static drmp3_uint32 drmp3_decode_next_frame_ex__memory(drmp3* pMP3, drmp3d_sample_t* pPCMFrames)
+static drmp3_uint32 drmp3_decode_next_frame_ex__memory(drmp3* pMP3, drmp3d_sample_t* pPCMFrames, drmp3dec_frame_info* pMP3FrameInfo, const drmp3_uint8** ppMP3FrameData)
 {
     drmp3_uint32 pcmFramesRead = 0;
     drmp3dec_frame_info info;
@@ -2779,11 +2861,21 @@ static drmp3_uint32 drmp3_decode_next_frame_ex__memory(drmp3* pMP3, drmp3d_sampl
             pMP3->pcmFramesConsumedInMP3Frame  = 0;
             pMP3->pcmFramesRemainingInMP3Frame = pcmFramesRead;
             pMP3->mp3FrameChannels             = info.channels;
-            pMP3->mp3FrameSampleRate           = info.hz;
+            pMP3->mp3FrameSampleRate           = info.sample_rate;
+
+            if (pMP3FrameInfo != NULL) {
+                *pMP3FrameInfo = info;
+            }
+
+            if (ppMP3FrameData != NULL) {
+                *ppMP3FrameData = pMP3->memory.pData + pMP3->memory.currentReadPos;
+            }
+
             break;
         } else if (info.frame_bytes > 0) {
             /* No frames were read, but it looks like we skipped past one. Read the next MP3 frame. */
             pMP3->memory.currentReadPos += (size_t)info.frame_bytes;
+            pMP3->streamCursor          += (size_t)info.frame_bytes;
         } else {
             /* Nothing at all was read. Abort. */
             break;
@@ -2792,23 +2884,24 @@ static drmp3_uint32 drmp3_decode_next_frame_ex__memory(drmp3* pMP3, drmp3d_sampl
 
     /* Consume the data. */
     pMP3->memory.currentReadPos += (size_t)info.frame_bytes;
+    pMP3->streamCursor          += (size_t)info.frame_bytes;
 
     return pcmFramesRead;
 }
 
-static drmp3_uint32 drmp3_decode_next_frame_ex(drmp3* pMP3, drmp3d_sample_t* pPCMFrames)
+static drmp3_uint32 drmp3_decode_next_frame_ex(drmp3* pMP3, drmp3d_sample_t* pPCMFrames, drmp3dec_frame_info* pMP3FrameInfo, const drmp3_uint8** ppMP3FrameData)
 {
     if (pMP3->memory.pData != NULL && pMP3->memory.dataSize > 0) {
-        return drmp3_decode_next_frame_ex__memory(pMP3, pPCMFrames);
+        return drmp3_decode_next_frame_ex__memory(pMP3, pPCMFrames, pMP3FrameInfo, ppMP3FrameData);
     } else {
-        return drmp3_decode_next_frame_ex__callbacks(pMP3, pPCMFrames);
+        return drmp3_decode_next_frame_ex__callbacks(pMP3, pPCMFrames, pMP3FrameInfo, ppMP3FrameData);
     }
 }
 
 static drmp3_uint32 drmp3_decode_next_frame(drmp3* pMP3)
 {
     DRMP3_ASSERT(pMP3 != NULL);
-    return drmp3_decode_next_frame_ex(pMP3, (drmp3d_sample_t*)pMP3->pcmFrames);
+    return drmp3_decode_next_frame_ex(pMP3, (drmp3d_sample_t*)pMP3->pcmFrames, NULL, NULL);
 }
 
 #if 0
@@ -2818,7 +2911,7 @@ static drmp3_uint32 drmp3_seek_next_frame(drmp3* pMP3)
 
     DRMP3_ASSERT(pMP3 != NULL);
 
-    pcmFrameCount = drmp3_decode_next_frame_ex(pMP3, NULL);
+    pcmFrameCount = drmp3_decode_next_frame_ex(pMP3, NULL, NULL, NULL);
     if (pcmFrameCount == 0) {
         return 0;
     }
@@ -2832,8 +2925,13 @@ static drmp3_uint32 drmp3_seek_next_frame(drmp3* pMP3)
 }
 #endif
 
-static drmp3_bool32 drmp3_init_internal(drmp3* pMP3, drmp3_read_proc onRead, drmp3_seek_proc onSeek, void* pUserData, const drmp3_allocation_callbacks* pAllocationCallbacks)
+static drmp3_bool32 drmp3_init_internal(drmp3* pMP3, drmp3_read_proc onRead, drmp3_seek_proc onSeek, drmp3_tell_proc onTell, drmp3_meta_proc onMeta, void* pUserData, void* pUserDataMeta, const drmp3_allocation_callbacks* pAllocationCallbacks)
 {
+    drmp3dec_frame_info firstFrameInfo;
+    const drmp3_uint8* pFirstFrameData;
+    drmp3_uint32 firstFramePCMFrameCount;
+    drmp3_uint32 detectedMP3FrameCount = 0xFFFFFFFF;
+
     DRMP3_ASSERT(pMP3 != NULL);
     DRMP3_ASSERT(onRead != NULL);
 
@@ -2842,17 +2940,326 @@ static drmp3_bool32 drmp3_init_internal(drmp3* pMP3, drmp3_read_proc onRead, drm
 
     pMP3->onRead = onRead;
     pMP3->onSeek = onSeek;
+    pMP3->onMeta = onMeta;
     pMP3->pUserData = pUserData;
+    pMP3->pUserDataMeta = pUserDataMeta;
     pMP3->allocationCallbacks = drmp3_copy_allocation_callbacks_or_defaults(pAllocationCallbacks);
 
     if (pMP3->allocationCallbacks.onFree == NULL || (pMP3->allocationCallbacks.onMalloc == NULL && pMP3->allocationCallbacks.onRealloc == NULL)) {
         return DRMP3_FALSE;    /* Invalid allocation callbacks. */
     }
 
-    /* Decode the first frame to confirm that it is indeed a valid MP3 stream. */
-    if (drmp3_decode_next_frame(pMP3) == 0) {
+    pMP3->streamCursor       = 0;
+    pMP3->streamLength       = DRMP3_UINT64_MAX;
+    pMP3->streamStartOffset  = 0;
+    pMP3->delayInPCMFrames   = 0;
+    pMP3->paddingInPCMFrames = 0;
+    pMP3->totalPCMFrameCount = DRMP3_UINT64_MAX;
+
+    /* We'll first check for any ID3v1 or APE tags. */
+    #if 1
+    if (onSeek != NULL && onTell != NULL) {
+        if (onSeek(pUserData, 0, DRMP3_SEEK_END)) {
+            drmp3_int64 streamLen;
+            int streamEndOffset = 0;
+
+            /* First get the length of the stream. We need this so we can ensure the stream is big enough to store the tags. */
+            if (onTell(pUserData, &streamLen)) {
+                /* ID3v1 */
+                if (streamLen > 128) {
+                    char id3[3];
+                    if (onSeek(pUserData, streamEndOffset - 128, DRMP3_SEEK_END)) {
+                        if (onRead(pUserData, id3, 3) == 3 && id3[0] == 'T' && id3[1] == 'A' && id3[2] == 'G') {
+                            /* We have an ID3v1 tag. */
+                            streamEndOffset -= 128;
+                            streamLen       -= 128;
+
+                            /* Fire a metadata callback for the TAG data. */
+                            if (onMeta != NULL) {
+                                drmp3_uint8 tag[128];
+                                tag[0] = 'T'; tag[1] = 'A'; tag[2] = 'G';
+
+                                if (onRead(pUserData, tag + 3, 125) == 125) {
+                                    drmp3__on_meta(pMP3, DRMP3_METADATA_TYPE_ID3V1, tag, 128);
+                                }
+                            }
+                        } else {
+                            /* No ID3v1 tag. */
+                        }
+                    } else {
+                        /* Failed to seek to the ID3v1 tag. */
+                    }
+                } else {
+                    /* Stream too short. No ID3v1 tag. */
+                }
+
+                /* APE */
+                if (streamLen > 32) {
+                    char ape[32];   /* The footer. */
+                    if (onSeek(pUserData, streamEndOffset - 32, DRMP3_SEEK_END)) {
+                        if (onRead(pUserData, ape, 32) == 32 && ape[0] == 'A' && ape[1] == 'P' && ape[2] == 'E' && ape[3] == 'T' && ape[4] == 'A' && ape[5] == 'G' && ape[6] == 'E' && ape[7] == 'X') {
+                            /* We have an APE tag. */
+                            drmp3_uint32 tagSize =
+                                ((drmp3_uint32)ape[24] << 0)  |
+                                ((drmp3_uint32)ape[25] << 8)  |
+                                ((drmp3_uint32)ape[26] << 16) |
+                                ((drmp3_uint32)ape[27] << 24);
+
+                            streamEndOffset -= 32 + tagSize;
+                            streamLen       -= 32 + tagSize;
+
+                            /* Fire a metadata callback for the APE data. Must include both the main content and footer. */
+                            if (onMeta != NULL) {
+                                /* We first need to seek to the start of the APE tag. */
+                                if (onSeek(pUserData, streamEndOffset, DRMP3_SEEK_END)) {
+                                    size_t apeTagSize = (size_t)tagSize + 32;
+                                    drmp3_uint8* pTagData = (drmp3_uint8*)drmp3_malloc(apeTagSize, pAllocationCallbacks);
+                                    if (pTagData != NULL) {
+                                        if (onRead(pUserData, pTagData, apeTagSize) == apeTagSize) {
+                                            drmp3__on_meta(pMP3, DRMP3_METADATA_TYPE_APE, pTagData, apeTagSize);
+                                        }
+
+                                        drmp3_free(pTagData, pAllocationCallbacks);
+                                    }
+                                }
+                            }
+                        }
+                    }
+                } else {
+                    /* Stream too short. No APE tag. */
+                }
+
+                /* Seek back to the start. */
+                if (!onSeek(pUserData, 0, DRMP3_SEEK_SET)) {
+                    return DRMP3_FALSE; /* Failed to seek back to the start. */
+                }
+
+                pMP3->streamLength = (drmp3_uint64)streamLen;
+
+                if (pMP3->memory.pData != NULL) {
+                    pMP3->memory.dataSize = (size_t)pMP3->streamLength;
+                }
+            } else {
+                /* Failed to get the length of the stream. ID3v1 and APE tags cannot be skipped. */
+                if (!onSeek(pUserData, 0, DRMP3_SEEK_SET)) {
+                    return DRMP3_FALSE; /* Failed to seek back to the start. */
+                }
+            }
+        } else {
+            /* Failed to seek to the end. Cannot skip ID3v1 or APE tags. */
+        }
+    } else {
+        /* No onSeek or onTell callback. Cannot skip ID3v1 or APE tags. */
+    }
+    #endif
+
+
+    /* ID3v2 tags */
+    #if 1
+    {
+        char header[10];
+        if (onRead(pUserData, header, 10) == 10) {
+            if (header[0] == 'I' && header[1] == 'D' && header[2] == '3') {
+                drmp3_uint32 tagSize =
+                    (((drmp3_uint32)header[6] & 0x7F) << 21) |
+                    (((drmp3_uint32)header[7] & 0x7F) << 14) |
+                    (((drmp3_uint32)header[8] & 0x7F) << 7)  |
+                    (((drmp3_uint32)header[9] & 0x7F) << 0);
+
+                /* Account for the footer. */
+                if (header[5] & 0x10) {
+                    tagSize += 10;
+                }
+
+                /* Read the tag content and fire a metadata callback. */
+                if (onMeta != NULL) {
+                    size_t tagSizeWithHeader = 10 + tagSize;
+                    drmp3_uint8* pTagData = (drmp3_uint8*)drmp3_malloc(tagSizeWithHeader, pAllocationCallbacks);
+                    if (pTagData != NULL) {
+                        DRMP3_COPY_MEMORY(pTagData, header, 10);
+
+                        if (onRead(pUserData, pTagData + 10, tagSize) == tagSize) {
+                            drmp3__on_meta(pMP3, DRMP3_METADATA_TYPE_ID3V2, pTagData, tagSizeWithHeader);
+                        }
+
+                        drmp3_free(pTagData, pAllocationCallbacks);
+                    }
+                } else {
+                    /* Don't have a metadata callback, so just skip the tag. */
+                    if (onSeek != NULL) {
+                        if (!onSeek(pUserData, tagSize, DRMP3_SEEK_CUR)) {
+                            return DRMP3_FALSE; /* Failed to seek past the ID3v2 tag. */
+                        }
+                    } else {
+                        /* Don't have a seek callback. Read and discard. */
+                        char discard[1024];
+
+                        while (tagSize > 0) {
+                            size_t bytesToRead = tagSize;
+                            if (bytesToRead > sizeof(discard)) {
+                                bytesToRead = sizeof(discard);
+                            }
+
+                            if (onRead(pUserData, discard, bytesToRead) != bytesToRead) {
+                                return DRMP3_FALSE; /* Failed to read data. */
+                            }
+
+                            tagSize -= (drmp3_uint32)bytesToRead;
+                        }
+                    }
+                }
+
+                pMP3->streamStartOffset += 10 + tagSize;    /* +10 for the header. */
+                pMP3->streamCursor = pMP3->streamStartOffset;
+            } else {
+                /* Not an ID3v2 tag. Seek back to the start. */
+                if (onSeek != NULL) {
+                    if (!onSeek(pUserData, 0, DRMP3_SEEK_SET)) {
+                        return DRMP3_FALSE; /* Failed to seek back to the start. */
+                    }
+                } else {
+                    /* Don't have a seek callback to move backwards. We'll just fall through and let the decoding process re-sync. The ideal solution here would be to read into the cache. */
+
+                    /*
+                    TODO: Copy the header into the cache. Will need to allocate space. See drmp3_decode_next_frame_ex__callbacks. There is not need
+                    to handle the memory case because that will always have a seek implementation and will never hit this code path.
+                    */
+                }
+            }
+        } else {
+            /* Failed to read the header. We can return false here. If we couldn't read 10 bytes there's no way we'll have a valid MP3 stream. */
+            return DRMP3_FALSE;
+        }
+    }
+    #endif
+
+    /*
+    Decode the first frame to confirm that it is indeed a valid MP3 stream. Note that it's possible the first frame
+    is actually a Xing/LAME/VBRI header. If this is the case we need to skip over it.
+    */
+    firstFramePCMFrameCount = drmp3_decode_next_frame_ex(pMP3, (drmp3d_sample_t*)pMP3->pcmFrames, &firstFrameInfo, &pFirstFrameData);
+    if (firstFramePCMFrameCount > 0) {
+        DRMP3_ASSERT(pFirstFrameData != NULL);
+
+        /*
+        It might be a header. If so, we need to clear out the cached PCM frames in order to trigger a reload of fresh
+        data when decoding starts. We can assume all validation has already been performed to check if this is a valid
+        MP3 frame and that there is more than 0 bytes making up the frame.
+
+        We're going to be basing this parsing code off the minimp3_ex implementation.
+        */
+        #if 1
+        DRMP3_ASSERT(firstFrameInfo.frame_bytes > 0);
+        {
+            drmp3_bs bs;
+            drmp3_L3_gr_info grInfo[4];
+            const drmp3_uint8* pTagData = pFirstFrameData;
+
+            drmp3_bs_init(&bs, pFirstFrameData + DRMP3_HDR_SIZE, firstFrameInfo.frame_bytes - DRMP3_HDR_SIZE);
+
+            if (DRMP3_HDR_IS_CRC(pFirstFrameData)) {
+                drmp3_bs_get_bits(&bs, 16); /* CRC. */
+            }
+
+            if (drmp3_L3_read_side_info(&bs, grInfo, pFirstFrameData) >= 0) {
+                drmp3_bool32 isXing = DRMP3_FALSE;
+                drmp3_bool32 isInfo = DRMP3_FALSE;
+                const drmp3_uint8* pTagDataBeg;
+
+                pTagDataBeg = pFirstFrameData + DRMP3_HDR_SIZE + (bs.pos/8);
+                pTagData    = pTagDataBeg;
+
+                /* Check for both "Xing" and "Info" identifiers. */
+                isXing = (pTagData[0] == 'X' && pTagData[1] == 'i' && pTagData[2] == 'n' && pTagData[3] == 'g');
+                isInfo = (pTagData[0] == 'I' && pTagData[1] == 'n' && pTagData[2] == 'f' && pTagData[3] == 'o');
+
+                if (isXing || isInfo) {
+                    drmp3_uint32 bytes = 0;
+                    drmp3_uint32 flags = pTagData[7];
+
+                    pTagData += 8;  /* Skip past the ID and flags. */
+
+                    if (flags & 0x01) { /* FRAMES flag. */
+                        detectedMP3FrameCount = (drmp3_uint32)pTagData[0] << 24 | (drmp3_uint32)pTagData[1] << 16 | (drmp3_uint32)pTagData[2] << 8 | (drmp3_uint32)pTagData[3];
+                        pTagData += 4;
+                    }
+
+                    if (flags & 0x02) { /* BYTES flag. */
+                        bytes  = (drmp3_uint32)pTagData[0] << 24 | (drmp3_uint32)pTagData[1] << 16 | (drmp3_uint32)pTagData[2] << 8 | (drmp3_uint32)pTagData[3];
+                        (void)bytes;    /* <-- Just to silence a warning about `bytes` being assigned but unused. Want to leave this here in case I want to make use of it later. */
+                        pTagData += 4;
+                    }
+
+                    if (flags & 0x04) { /* TOC flag. */
+                        /* TODO: Extract and bind seek points. */
+                        pTagData += 100;
+                    }
+
+                    if (flags & 0x08) { /* SCALE flag. */
+                        pTagData += 4;
+                    }
+
+                    /* At this point we're done with the Xing/Info header. Now we can look at the LAME data. */
+                    if (pTagData[0]) {
+                        pTagData += 21;
+
+                        if (pTagData - pFirstFrameData + 14 < firstFrameInfo.frame_bytes) {
+                            int delayInPCMFrames;
+                            int paddingInPCMFrames;
+
+                            delayInPCMFrames   = (( (drmp3_uint32)pTagData[0]        << 4) | ((drmp3_uint32)pTagData[1] >> 4)) + (528 + 1);
+                            paddingInPCMFrames = ((((drmp3_uint32)pTagData[1] & 0xF) << 8) | ((drmp3_uint32)pTagData[2]     )) - (528 + 1);
+                            if (paddingInPCMFrames < 0) {
+                                paddingInPCMFrames = 0; /* Padding cannot be negative. Probably a malformed file. Ignore. */
+                            }
+
+                            pMP3->delayInPCMFrames   = (drmp3_uint32)delayInPCMFrames;
+                            pMP3->paddingInPCMFrames = (drmp3_uint32)paddingInPCMFrames;
+                        }
+                    }
+
+                    /*
+                    My understanding is that if the "Xing" header is present we can consider this to be a VBR stream and if the "Info" header is
+                    present it's a CBR stream. If this is not the case let me know! I'm just tracking this for the time being in case I want to
+                    look at doing some CBR optimizations later on, such as faster seeking.
+                    */
+                    if (isXing) {
+                        pMP3->isVBR = DRMP3_TRUE;
+                    } else if (isInfo) {
+                        pMP3->isCBR = DRMP3_TRUE;
+                    }
+
+                    /* Post the raw data of the tag to the metadata callback. */
+                    if (onMeta != NULL) {
+                        drmp3_metadata_type metadataType = isXing ? DRMP3_METADATA_TYPE_XING : DRMP3_METADATA_TYPE_VBRI;
+                        size_t tagDataSize;
+
+                        tagDataSize  = (size_t)firstFrameInfo.frame_bytes;
+                        tagDataSize -= (size_t)(pTagDataBeg - pFirstFrameData);
+
+                        drmp3__on_meta(pMP3, metadataType, pTagDataBeg, tagDataSize);
+                    }
+
+                    /* Since this was identified as a tag, we don't want to treat it as audio. We need to clear out the PCM cache. */
+                    pMP3->pcmFramesRemainingInMP3Frame = 0;
+
+                    /* The start offset needs to be moved to the end of this frame so it's not included in any audio processing after seeking. */
+                    pMP3->streamStartOffset += (drmp3_uint32)(firstFrameInfo.frame_bytes);
+                    pMP3->streamCursor = pMP3->streamStartOffset;
+                }
+            } else {
+                /* Failed to read the side info. */
+            }
+        }
+        #endif
+    } else {
+        /* Not a valid MP3 stream. */
         drmp3__free_from_callbacks(pMP3->pData, &pMP3->allocationCallbacks);    /* The call above may have allocated memory. Need to make sure it's freed before aborting. */
-        return DRMP3_FALSE; /* Not a valid MP3 stream. */
+        return DRMP3_FALSE;
+    }
+
+    if (detectedMP3FrameCount != 0xFFFFFFFF) {
+        pMP3->totalPCMFrameCount = detectedMP3FrameCount * firstFramePCMFrameCount;
     }
 
     pMP3->channels   = pMP3->mp3FrameChannels;
@@ -2861,14 +3268,14 @@ static drmp3_bool32 drmp3_init_internal(drmp3* pMP3, drmp3_read_proc onRead, drm
     return DRMP3_TRUE;
 }
 
-DRMP3_API drmp3_bool32 drmp3_init(drmp3* pMP3, drmp3_read_proc onRead, drmp3_seek_proc onSeek, void* pUserData, const drmp3_allocation_callbacks* pAllocationCallbacks)
+DRMP3_API drmp3_bool32 drmp3_init(drmp3* pMP3, drmp3_read_proc onRead, drmp3_seek_proc onSeek, drmp3_tell_proc onTell, drmp3_meta_proc onMeta, void* pUserData, const drmp3_allocation_callbacks* pAllocationCallbacks)
 {
     if (pMP3 == NULL || onRead == NULL) {
         return DRMP3_FALSE;
     }
 
     DRMP3_ZERO_OBJECT(pMP3);
-    return drmp3_init_internal(pMP3, onRead, onSeek, pUserData, pAllocationCallbacks);
+    return drmp3_init_internal(pMP3, onRead, onSeek, onTell, onMeta, pUserData, pUserData, pAllocationCallbacks);
 }
 
 
@@ -2896,35 +3303,52 @@ static size_t drmp3__on_read_memory(void* pUserData, void* pBufferOut, size_t by
 static drmp3_bool32 drmp3__on_seek_memory(void* pUserData, int byteOffset, drmp3_seek_origin origin)
 {
     drmp3* pMP3 = (drmp3*)pUserData;
+    drmp3_int64 newCursor;
 
     DRMP3_ASSERT(pMP3 != NULL);
 
-    if (origin == drmp3_seek_origin_current) {
-        if (byteOffset > 0) {
-            if (pMP3->memory.currentReadPos + byteOffset > pMP3->memory.dataSize) {
-                byteOffset = (int)(pMP3->memory.dataSize - pMP3->memory.currentReadPos);  /* Trying to seek too far forward. */
-            }
-        } else {
-            if (pMP3->memory.currentReadPos < (size_t)-byteOffset) {
-                byteOffset = -(int)pMP3->memory.currentReadPos;  /* Trying to seek too far backwards. */
-            }
-        }
+    newCursor = pMP3->memory.currentReadPos;
 
-        /* This will never underflow thanks to the clamps above. */
-        pMP3->memory.currentReadPos += byteOffset;
+    if (origin == DRMP3_SEEK_SET) {
+        newCursor = 0;
+    } else if (origin == DRMP3_SEEK_CUR) {
+        newCursor = (drmp3_int64)pMP3->memory.currentReadPos;
+    } else if (origin == DRMP3_SEEK_END) {
+        newCursor = (drmp3_int64)pMP3->memory.dataSize;
     } else {
-        if ((drmp3_uint32)byteOffset <= pMP3->memory.dataSize) {
-            pMP3->memory.currentReadPos = byteOffset;
-        } else {
-            pMP3->memory.currentReadPos = pMP3->memory.dataSize;  /* Trying to seek too far forward. */
-        }
+        DRMP3_ASSERT(!"Invalid seek origin");
+        return DRMP3_FALSE;
+    }
+
+    newCursor += byteOffset;
+
+    if (newCursor < 0) {
+        return DRMP3_FALSE;  /* Trying to seek prior to the start of the buffer. */
+    }
+    if ((size_t)newCursor > pMP3->memory.dataSize) {
+        return DRMP3_FALSE;  /* Trying to seek beyond the end of the buffer. */
     }
 
+    pMP3->memory.currentReadPos = (size_t)newCursor;
+
     return DRMP3_TRUE;
 }
 
-DRMP3_API drmp3_bool32 drmp3_init_memory(drmp3* pMP3, const void* pData, size_t dataSize, const drmp3_allocation_callbacks* pAllocationCallbacks)
+static drmp3_bool32 drmp3__on_tell_memory(void* pUserData, drmp3_int64* pCursor)
 {
+    drmp3* pMP3 = (drmp3*)pUserData;
+
+    DRMP3_ASSERT(pMP3 != NULL);
+    DRMP3_ASSERT(pCursor != NULL);
+
+    *pCursor = (drmp3_int64)pMP3->memory.currentReadPos;
+    return DRMP3_TRUE;
+}
+
+DRMP3_API drmp3_bool32 drmp3_init_memory_with_metadata(drmp3* pMP3, const void* pData, size_t dataSize, drmp3_meta_proc onMeta, void* pUserDataMeta, const drmp3_allocation_callbacks* pAllocationCallbacks)
+{
+    drmp3_bool32 result;
+
     if (pMP3 == NULL) {
         return DRMP3_FALSE;
     }
@@ -2939,7 +3363,26 @@ DRMP3_API drmp3_bool32 drmp3_init_memory(drmp3* pMP3, const void* pData, size_t
     pMP3->memory.dataSize = dataSize;
     pMP3->memory.currentReadPos = 0;
 
-    return drmp3_init_internal(pMP3, drmp3__on_read_memory, drmp3__on_seek_memory, pMP3, pAllocationCallbacks);
+    result = drmp3_init_internal(pMP3, drmp3__on_read_memory, drmp3__on_seek_memory, drmp3__on_tell_memory, onMeta, pMP3, pUserDataMeta, pAllocationCallbacks);
+    if (result == DRMP3_FALSE) {
+        return DRMP3_FALSE;
+    }
+
+    /* Adjust the length of the memory stream to account for ID3v1 and APE tags. */
+    if (pMP3->streamLength <= (drmp3_uint64)DRMP3_SIZE_MAX) {
+        pMP3->memory.dataSize = (size_t)pMP3->streamLength; /* Safe cast. */
+    }
+
+    if (pMP3->streamStartOffset > (drmp3_uint64)DRMP3_SIZE_MAX) {
+        return DRMP3_FALSE; /* Tags too big. */
+    }
+
+    return DRMP3_TRUE;
+}
+
+DRMP3_API drmp3_bool32 drmp3_init_memory(drmp3* pMP3, const void* pData, size_t dataSize, const drmp3_allocation_callbacks* pAllocationCallbacks)
+{
+    return drmp3_init_memory_with_metadata(pMP3, pData, dataSize, NULL, NULL, pAllocationCallbacks);
 }
 
 
@@ -3069,7 +3512,7 @@ static drmp3_result drmp3_result_from_errno(int e)
     #ifdef ENOSYS
         case ENOSYS: return DRMP3_NOT_IMPLEMENTED;
     #endif
-    #ifdef ENOTEMPTY
+    #if defined(ENOTEMPTY) && ENOTEMPTY != EEXIST   /* In AIX, ENOTEMPTY and EEXIST use the same value. */
         case ENOTEMPTY: return DRMP3_DIRECTORY_NOT_EMPTY;
     #endif
     #ifdef ELOOP
@@ -3519,19 +3962,56 @@ static size_t drmp3__on_read_stdio(void* pUserData, void* pBufferOut, size_t byt
 
 static drmp3_bool32 drmp3__on_seek_stdio(void* pUserData, int offset, drmp3_seek_origin origin)
 {
-    return fseek((FILE*)pUserData, offset, (origin == drmp3_seek_origin_current) ? SEEK_CUR : SEEK_SET) == 0;
+    int whence = SEEK_SET;
+    if (origin == DRMP3_SEEK_CUR) {
+        whence = SEEK_CUR;
+    } else if (origin == DRMP3_SEEK_END) {
+        whence = SEEK_END;
+    }
+
+    return fseek((FILE*)pUserData, offset, whence) == 0;
 }
 
-DRMP3_API drmp3_bool32 drmp3_init_file(drmp3* pMP3, const char* pFilePath, const drmp3_allocation_callbacks* pAllocationCallbacks)
+static drmp3_bool32 drmp3__on_tell_stdio(void* pUserData, drmp3_int64* pCursor)
+{
+    FILE* pFileStdio = (FILE*)pUserData;
+    drmp3_int64 result;
+
+    /* These were all validated at a higher level. */
+    DRMP3_ASSERT(pFileStdio != NULL);
+    DRMP3_ASSERT(pCursor    != NULL);
+
+#if defined(_WIN32)
+    #if defined(_MSC_VER) && _MSC_VER > 1200
+        result = _ftelli64(pFileStdio);
+    #else
+        result = ftell(pFileStdio);
+    #endif
+#else
+    result = ftell(pFileStdio);
+#endif
+
+    *pCursor = result;
+
+    return DRMP3_TRUE;
+}
+
+DRMP3_API drmp3_bool32 drmp3_init_file_with_metadata(drmp3* pMP3, const char* pFilePath, drmp3_meta_proc onMeta, void* pUserDataMeta, const drmp3_allocation_callbacks* pAllocationCallbacks)
 {
     drmp3_bool32 result;
     FILE* pFile;
 
+    if (pMP3 == NULL) {
+        return DRMP3_FALSE;
+    }
+
+    DRMP3_ZERO_OBJECT(pMP3);
+
     if (drmp3_fopen(&pFile, pFilePath, "rb") != DRMP3_SUCCESS) {
         return DRMP3_FALSE;
     }
 
-    result = drmp3_init(pMP3, drmp3__on_read_stdio, drmp3__on_seek_stdio, (void*)pFile, pAllocationCallbacks);
+    result = drmp3_init_internal(pMP3, drmp3__on_read_stdio, drmp3__on_seek_stdio, drmp3__on_tell_stdio, onMeta, (void*)pFile, pUserDataMeta, pAllocationCallbacks);
     if (result != DRMP3_TRUE) {
         fclose(pFile);
         return result;
@@ -3540,16 +4020,22 @@ DRMP3_API drmp3_bool32 drmp3_init_file(drmp3* pMP3, const char* pFilePath, const
     return DRMP3_TRUE;
 }
 
-DRMP3_API drmp3_bool32 drmp3_init_file_w(drmp3* pMP3, const wchar_t* pFilePath, const drmp3_allocation_callbacks* pAllocationCallbacks)
+DRMP3_API drmp3_bool32 drmp3_init_file_with_metadata_w(drmp3* pMP3, const wchar_t* pFilePath, drmp3_meta_proc onMeta, void* pUserDataMeta, const drmp3_allocation_callbacks* pAllocationCallbacks)
 {
     drmp3_bool32 result;
     FILE* pFile;
 
+    if (pMP3 == NULL) {
+        return DRMP3_FALSE;
+    }
+
+    DRMP3_ZERO_OBJECT(pMP3);
+
     if (drmp3_wfopen(&pFile, pFilePath, L"rb", pAllocationCallbacks) != DRMP3_SUCCESS) {
         return DRMP3_FALSE;
     }
 
-    result = drmp3_init(pMP3, drmp3__on_read_stdio, drmp3__on_seek_stdio, (void*)pFile, pAllocationCallbacks);
+    result = drmp3_init_internal(pMP3, drmp3__on_read_stdio, drmp3__on_seek_stdio, drmp3__on_tell_stdio, onMeta, (void*)pFile, pUserDataMeta, pAllocationCallbacks);
     if (result != DRMP3_TRUE) {
         fclose(pFile);
         return result;
@@ -3557,6 +4043,16 @@ DRMP3_API drmp3_bool32 drmp3_init_file_w(drmp3* pMP3, const wchar_t* pFilePath,
 
     return DRMP3_TRUE;
 }
+
+DRMP3_API drmp3_bool32 drmp3_init_file(drmp3* pMP3, const char* pFilePath, const drmp3_allocation_callbacks* pAllocationCallbacks)
+{
+    return drmp3_init_file_with_metadata(pMP3, pFilePath, NULL, NULL, pAllocationCallbacks);
+}
+
+DRMP3_API drmp3_bool32 drmp3_init_file_w(drmp3* pMP3, const wchar_t* pFilePath, const drmp3_allocation_callbacks* pAllocationCallbacks)
+{
+    return drmp3_init_file_with_metadata_w(pMP3, pFilePath, NULL, NULL, pAllocationCallbacks);
+}
 #endif
 
 DRMP3_API void drmp3_uninit(drmp3* pMP3)
@@ -3644,19 +4140,48 @@ static drmp3_uint64 drmp3_read_pcm_frames_raw(drmp3* pMP3, drmp3_uint64 framesTo
     DRMP3_ASSERT(pMP3->onRead != NULL);
 
     while (framesToRead > 0) {
-        drmp3_uint32 framesToConsume = (drmp3_uint32)DRMP3_MIN(pMP3->pcmFramesRemainingInMP3Frame, framesToRead);
+        drmp3_uint32 framesToConsume;
+
+        /* Skip frames if necessary. */
+        if (pMP3->currentPCMFrame < pMP3->delayInPCMFrames) {
+            drmp3_uint32 framesToSkip = (drmp3_uint32)DRMP3_MIN(pMP3->pcmFramesRemainingInMP3Frame, pMP3->delayInPCMFrames - pMP3->currentPCMFrame);
+
+            pMP3->currentPCMFrame              += framesToSkip;
+            pMP3->pcmFramesConsumedInMP3Frame  += framesToSkip;
+            pMP3->pcmFramesRemainingInMP3Frame -= framesToSkip;
+        }
+
+        framesToConsume = (drmp3_uint32)DRMP3_MIN(pMP3->pcmFramesRemainingInMP3Frame, framesToRead);
+
+        /* Clamp the number of frames to read to the padding. */
+        if (pMP3->totalPCMFrameCount != DRMP3_UINT64_MAX && pMP3->totalPCMFrameCount > pMP3->paddingInPCMFrames) {
+            if (pMP3->currentPCMFrame < (pMP3->totalPCMFrameCount - pMP3->paddingInPCMFrames)) {
+                drmp3_uint64 framesRemainigToPadding = (pMP3->totalPCMFrameCount - pMP3->paddingInPCMFrames) - pMP3->currentPCMFrame;
+                if (framesToConsume >               framesRemainigToPadding) {
+                    framesToConsume = (drmp3_uint32)framesRemainigToPadding;
+                }
+            } else {
+                /* We're into the padding. Abort. */
+                break;
+            }
+        }
+
         if (pBufferOut != NULL) {
-        #if defined(DR_MP3_FLOAT_OUTPUT)
-            /* f32 */
-            float* pFramesOutF32 = (float*)DRMP3_OFFSET_PTR(pBufferOut,          sizeof(float) * totalFramesRead                   * pMP3->channels);
-            float* pFramesInF32  = (float*)DRMP3_OFFSET_PTR(&pMP3->pcmFrames[0], sizeof(float) * pMP3->pcmFramesConsumedInMP3Frame * pMP3->mp3FrameChannels);
-            DRMP3_COPY_MEMORY(pFramesOutF32, pFramesInF32, sizeof(float) * framesToConsume * pMP3->channels);
-        #else
-            /* s16 */
-            drmp3_int16* pFramesOutS16 = (drmp3_int16*)DRMP3_OFFSET_PTR(pBufferOut,          sizeof(drmp3_int16) * totalFramesRead                   * pMP3->channels);
-            drmp3_int16* pFramesInS16  = (drmp3_int16*)DRMP3_OFFSET_PTR(&pMP3->pcmFrames[0], sizeof(drmp3_int16) * pMP3->pcmFramesConsumedInMP3Frame * pMP3->mp3FrameChannels);
-            DRMP3_COPY_MEMORY(pFramesOutS16, pFramesInS16, sizeof(drmp3_int16) * framesToConsume * pMP3->channels);
-        #endif
+            #if defined(DR_MP3_FLOAT_OUTPUT)
+            {
+                /* f32 */
+                float* pFramesOutF32 = (float*)DRMP3_OFFSET_PTR(pBufferOut,          sizeof(float) * totalFramesRead                   * pMP3->channels);
+                float* pFramesInF32  = (float*)DRMP3_OFFSET_PTR(&pMP3->pcmFrames[0], sizeof(float) * pMP3->pcmFramesConsumedInMP3Frame * pMP3->mp3FrameChannels);
+                DRMP3_COPY_MEMORY(pFramesOutF32, pFramesInF32, sizeof(float) * framesToConsume * pMP3->channels);
+            }
+            #else
+            {
+                /* s16 */
+                drmp3_int16* pFramesOutS16 = (drmp3_int16*)DRMP3_OFFSET_PTR(pBufferOut,          sizeof(drmp3_int16) * totalFramesRead                   * pMP3->channels);
+                drmp3_int16* pFramesInS16  = (drmp3_int16*)DRMP3_OFFSET_PTR(&pMP3->pcmFrames[0], sizeof(drmp3_int16) * pMP3->pcmFramesConsumedInMP3Frame * pMP3->mp3FrameChannels);
+                DRMP3_COPY_MEMORY(pFramesOutS16, pFramesInS16, sizeof(drmp3_int16) * framesToConsume * pMP3->channels);
+            }
+            #endif
         }
 
         pMP3->currentPCMFrame              += framesToConsume;
@@ -3669,12 +4194,14 @@ static drmp3_uint64 drmp3_read_pcm_frames_raw(drmp3* pMP3, drmp3_uint64 framesTo
             break;
         }
 
+        /* If the cursor is already at the padding we need to abort. */
+        if (pMP3->totalPCMFrameCount != DRMP3_UINT64_MAX && pMP3->totalPCMFrameCount > pMP3->paddingInPCMFrames && pMP3->currentPCMFrame >= (pMP3->totalPCMFrameCount - pMP3->paddingInPCMFrames)) {
+            break;
+        }
+
         DRMP3_ASSERT(pMP3->pcmFramesRemainingInMP3Frame == 0);
 
-        /*
-        At this point we have exhausted our in-memory buffer so we need to re-fill. Note that the sample rate may have changed
-        at this point which means we'll also need to update our sample rate conversion pipeline.
-        */
+        /* At this point we have exhausted our in-memory buffer so we need to re-fill. */
         if (drmp3_decode_next_frame(pMP3) == 0) {
             break;
         }
@@ -3776,7 +4303,7 @@ static drmp3_bool32 drmp3_seek_to_start_of_stream(drmp3* pMP3)
     DRMP3_ASSERT(pMP3->onSeek != NULL);
 
     /* Seek to the start of the stream to begin with. */
-    if (!drmp3__on_seek(pMP3, 0, drmp3_seek_origin_start)) {
+    if (!drmp3__on_seek_64(pMP3, pMP3->streamStartOffset, DRMP3_SEEK_SET)) {
         return DRMP3_FALSE;
     }
 
@@ -3876,7 +4403,7 @@ static drmp3_bool32 drmp3_seek_to_pcm_frame__seek_table(drmp3* pMP3, drmp3_uint6
     }
 
     /* First thing to do is seek to the first byte of the relevant MP3 frame. */
-    if (!drmp3__on_seek_64(pMP3, seekPoint.seekPosInBytes, drmp3_seek_origin_start)) {
+    if (!drmp3__on_seek_64(pMP3, seekPoint.seekPosInBytes, DRMP3_SEEK_SET)) {
         return DRMP3_FALSE; /* Failed to seek. */
     }
 
@@ -3895,7 +4422,7 @@ static drmp3_bool32 drmp3_seek_to_pcm_frame__seek_table(drmp3* pMP3, drmp3_uint6
         }
 
         /* We first need to decode the next frame. */
-        pcmFramesRead = drmp3_decode_next_frame_ex(pMP3, pPCMFrames);
+        pcmFramesRead = drmp3_decode_next_frame_ex(pMP3, pPCMFrames, NULL, NULL);
         if (pcmFramesRead == 0) {
             return DRMP3_FALSE;
         }
@@ -3963,7 +4490,7 @@ DRMP3_API drmp3_bool32 drmp3_get_mp3_and_pcm_frame_count(drmp3* pMP3, drmp3_uint
     for (;;) {
         drmp3_uint32 pcmFramesInCurrentMP3Frame;
 
-        pcmFramesInCurrentMP3Frame = drmp3_decode_next_frame_ex(pMP3, NULL);
+        pcmFramesInCurrentMP3Frame = drmp3_decode_next_frame_ex(pMP3, NULL, NULL, NULL);
         if (pcmFramesInCurrentMP3Frame == 0) {
             break;
         }
@@ -3994,11 +4521,35 @@ DRMP3_API drmp3_bool32 drmp3_get_mp3_and_pcm_frame_count(drmp3* pMP3, drmp3_uint
 DRMP3_API drmp3_uint64 drmp3_get_pcm_frame_count(drmp3* pMP3)
 {
     drmp3_uint64 totalPCMFrameCount;
-    if (!drmp3_get_mp3_and_pcm_frame_count(pMP3, NULL, &totalPCMFrameCount)) {
+
+    if (pMP3 == NULL) {
         return 0;
     }
 
-    return totalPCMFrameCount;
+    if (pMP3->totalPCMFrameCount != DRMP3_UINT64_MAX) {
+        totalPCMFrameCount = pMP3->totalPCMFrameCount;
+
+        if (totalPCMFrameCount >= pMP3->delayInPCMFrames) {
+            totalPCMFrameCount -= pMP3->delayInPCMFrames;
+        } else {
+            /* The delay is greater than the frame count reported by the Xing/Info tag. Assume it's invalid and ignore. */
+        }
+
+        if (totalPCMFrameCount >= pMP3->paddingInPCMFrames) {
+            totalPCMFrameCount -= pMP3->paddingInPCMFrames;
+        } else {
+            /* The padding is greater than the frame count reported by the Xing/Info tag. Assume it's invalid and ignore. */
+        }
+
+        return totalPCMFrameCount;
+    } else {
+        /* Unknown frame count. Need to calculate it. */
+        if (!drmp3_get_mp3_and_pcm_frame_count(pMP3, NULL, &totalPCMFrameCount)) {
+            return 0;
+        }
+
+        return totalPCMFrameCount;
+    }
 }
 
 DRMP3_API drmp3_uint64 drmp3_get_mp3_frame_count(drmp3* pMP3)
@@ -4101,7 +4652,7 @@ DRMP3_API drmp3_bool32 drmp3_calculate_seek_points(drmp3* pMP3, drmp3_uint32* pS
             mp3FrameInfo[iMP3Frame].pcmFrameIndex = runningPCMFrameCount;
 
             /* We need to get information about this frame so we can know how many samples it contained. */
-            pcmFramesInCurrentMP3FrameIn = drmp3_decode_next_frame_ex(pMP3, NULL);
+            pcmFramesInCurrentMP3FrameIn = drmp3_decode_next_frame_ex(pMP3, NULL, NULL, NULL);
             if (pcmFramesInCurrentMP3FrameIn == 0) {
                 return DRMP3_FALSE; /* This should never happen. */
             }
@@ -4145,7 +4696,7 @@ DRMP3_API drmp3_bool32 drmp3_calculate_seek_points(drmp3* pMP3, drmp3_uint32* pS
                     Go to the next MP3 frame. This shouldn't ever fail, but just in case it does we just set the seek point and break. If it happens, it
                     should only ever do it for the last seek point.
                     */
-                    pcmFramesInCurrentMP3FrameIn = drmp3_decode_next_frame_ex(pMP3, NULL);
+                    pcmFramesInCurrentMP3FrameIn = drmp3_decode_next_frame_ex(pMP3, NULL, NULL, NULL);
                     if (pcmFramesInCurrentMP3FrameIn == 0) {
                         pSeekPoints[iSeekPoint].seekPosInBytes     = mp3FrameInfo[0].bytePos;
                         pSeekPoints[iSeekPoint].pcmFrameIndex      = nextTargetPCMFrame;
@@ -4327,20 +4878,20 @@ static drmp3_int16* drmp3__full_read_and_close_s16(drmp3* pMP3, drmp3_config* pC
 }
 
 
-DRMP3_API float* drmp3_open_and_read_pcm_frames_f32(drmp3_read_proc onRead, drmp3_seek_proc onSeek, void* pUserData, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks)
+DRMP3_API float* drmp3_open_and_read_pcm_frames_f32(drmp3_read_proc onRead, drmp3_seek_proc onSeek, drmp3_tell_proc onTell, void* pUserData, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks)
 {
     drmp3 mp3;
-    if (!drmp3_init(&mp3, onRead, onSeek, pUserData, pAllocationCallbacks)) {
+    if (!drmp3_init(&mp3, onRead, onSeek, onTell, NULL, pUserData, pAllocationCallbacks)) {
         return NULL;
     }
 
     return drmp3__full_read_and_close_f32(&mp3, pConfig, pTotalFrameCount);
 }
 
-DRMP3_API drmp3_int16* drmp3_open_and_read_pcm_frames_s16(drmp3_read_proc onRead, drmp3_seek_proc onSeek, void* pUserData, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks)
+DRMP3_API drmp3_int16* drmp3_open_and_read_pcm_frames_s16(drmp3_read_proc onRead, drmp3_seek_proc onSeek, drmp3_tell_proc onTell, void* pUserData, drmp3_config* pConfig, drmp3_uint64* pTotalFrameCount, const drmp3_allocation_callbacks* pAllocationCallbacks)
 {
     drmp3 mp3;
-    if (!drmp3_init(&mp3, onRead, onSeek, pUserData, pAllocationCallbacks)) {
+    if (!drmp3_init(&mp3, onRead, onSeek, onTell, NULL, pUserData, pAllocationCallbacks)) {
         return NULL;
     }
 
@@ -4427,74 +4978,23 @@ DIFFERENCES BETWEEN minimp3 AND dr_mp3
   using minimp3 in conjunction with stb_vorbis. dr_mp3 addresses this.
 */
 
-/*
-RELEASE NOTES - v0.5.0
-=======================
-Version 0.5.0 has breaking API changes.
-
-Improved Client-Defined Memory Allocation
------------------------------------------
-The main change with this release is the addition of a more flexible way of implementing custom memory allocation routines. The
-existing system of DRMP3_MALLOC, DRMP3_REALLOC and DRMP3_FREE are still in place and will be used by default when no custom
-allocation callbacks are specified.
-
-To use the new system, you pass in a pointer to a drmp3_allocation_callbacks object to drmp3_init() and family, like this:
-
-    void* my_malloc(size_t sz, void* pUserData)
-    {
-        return malloc(sz);
-    }
-    void* my_realloc(void* p, size_t sz, void* pUserData)
-    {
-        return realloc(p, sz);
-    }
-    void my_free(void* p, void* pUserData)
-    {
-        free(p);
-    }
-
-    ...
-
-    drmp3_allocation_callbacks allocationCallbacks;
-    allocationCallbacks.pUserData = &myData;
-    allocationCallbacks.onMalloc  = my_malloc;
-    allocationCallbacks.onRealloc = my_realloc;
-    allocationCallbacks.onFree    = my_free;
-    drmp3_init_file(&mp3, "my_file.mp3", NULL, &allocationCallbacks);
-
-The advantage of this new system is that it allows you to specify user data which will be passed in to the allocation routines.
-
-Passing in null for the allocation callbacks object will cause dr_mp3 to use defaults which is the same as DRMP3_MALLOC,
-DRMP3_REALLOC and DRMP3_FREE and the equivalent of how it worked in previous versions.
-
-Every API that opens a drmp3 object now takes this extra parameter. These include the following:
-
-    drmp3_init()
-    drmp3_init_file()
-    drmp3_init_memory()
-    drmp3_open_and_read_pcm_frames_f32()
-    drmp3_open_and_read_pcm_frames_s16()
-    drmp3_open_memory_and_read_pcm_frames_f32()
-    drmp3_open_memory_and_read_pcm_frames_s16()
-    drmp3_open_file_and_read_pcm_frames_f32()
-    drmp3_open_file_and_read_pcm_frames_s16()
-
-Renamed APIs
-------------
-The following APIs have been renamed for consistency with other dr_* libraries and to make it clear that they return PCM frame
-counts rather than sample counts.
-
-    drmp3_open_and_read_f32()        -> drmp3_open_and_read_pcm_frames_f32()
-    drmp3_open_and_read_s16()        -> drmp3_open_and_read_pcm_frames_s16()
-    drmp3_open_memory_and_read_f32() -> drmp3_open_memory_and_read_pcm_frames_f32()
-    drmp3_open_memory_and_read_s16() -> drmp3_open_memory_and_read_pcm_frames_s16()
-    drmp3_open_file_and_read_f32()   -> drmp3_open_file_and_read_pcm_frames_f32()
-    drmp3_open_file_and_read_s16()   -> drmp3_open_file_and_read_pcm_frames_s16()
-*/
-
 /*
 REVISION HISTORY
 ================
+v0.7.0 - TBD
+  - The old `DRMP3_IMPLEMENTATION` has been removed. Use `DR_MP3_IMPLEMENTATION` instead. The reason for this change is that in the future everything will eventually be using the underscored naming convention in the future, so `drmp3` will become `dr_mp3`.
+  - API CHANGE: Seek origins have been renamed to match the naming convention used by dr_wav and my other libraries.
+    - drmp3_seek_origin_start   -> DRMP3_SEEK_SET
+    - drmp3_seek_origin_current -> DRMP3_SEEK_CUR
+    - DRMP3_SEEK_END (new)
+  - API CHANGE: Add DRMP3_SEEK_END as a seek origin for the seek callback. This is required for detection of ID3v1 and APE tags.
+  - API CHANGE: Add onTell callback to `drmp3_init()`. This is needed in order to track the location of ID3v1 and APE tags.
+  - API CHANGE: Add onMeta callback to `drmp3_init()`. This is used for reporting tag data back to the caller. Currently this only reports the raw tag data which means applications need to parse the data themselves.
+  - API CHANGE: Rename `drmp3dec_frame_info.hz` to `drmp3dec_frame_info.sample_rate`.
+  - Add detection of ID3v2, ID3v1, APE and Xing/VBRI tags. This should fix errors with some files where the decoder was reading tags as audio data.
+  - Delay and padding samples from LAME tags are now handled.
+  - Fix compilation for AIX OS.
+
 v0.6.40 - 2024-12-17
   - Improve detection of ARM64EC
 
diff --git a/sys-tune/source/impl/dr_wav.h b/sys-tune/source/impl/dr_wav.h
index f9f69a7..a4a82ae 100644
--- a/sys-tune/source/impl/dr_wav.h
+++ b/sys-tune/source/impl/dr_wav.h
@@ -1,6 +1,6 @@
 /*
 WAV audio loader and writer. Choice of public domain or MIT-0. See license statements at the end of this file.
-dr_wav - v0.13.17 - 2024-12-17
+dr_wav - v0.14.0 - TBD
 
 David Reid - mackron@gmail.com
 
@@ -146,8 +146,8 @@ extern "C" {
 #define DRWAV_XSTRINGIFY(x)     DRWAV_STRINGIFY(x)
 
 #define DRWAV_VERSION_MAJOR     0
-#define DRWAV_VERSION_MINOR     13
-#define DRWAV_VERSION_REVISION  17
+#define DRWAV_VERSION_MINOR     14
+#define DRWAV_VERSION_REVISION  0
 #define DRWAV_VERSION_STRING    DRWAV_XSTRINGIFY(DRWAV_VERSION_MAJOR) "." DRWAV_XSTRINGIFY(DRWAV_VERSION_MINOR) "." DRWAV_XSTRINGIFY(DRWAV_VERSION_REVISION)
 
 #include <stddef.h> /* For size_t. */
@@ -305,8 +305,9 @@ typedef struct
 
 typedef enum
 {
-    drwav_seek_origin_start,
-    drwav_seek_origin_current
+    DRWAV_SEEK_SET,
+    DRWAV_SEEK_CUR,
+    DRWAV_SEEK_END
 } drwav_seek_origin;
 
 typedef enum
@@ -415,11 +416,21 @@ origin    [in] The origin of the seek - the current position or the start of the
 
 Returns whether or not the seek was successful.
 
-Whether or not it is relative to the beginning or current position is determined by the "origin" parameter which will be either drwav_seek_origin_start or
-drwav_seek_origin_current.
+Whether or not it is relative to the beginning or current position is determined by the "origin" parameter which will be either DRWAV_SEEK_SET or
+DRWAV_SEEK_CUR.
 */
 typedef drwav_bool32 (* drwav_seek_proc)(void* pUserData, int offset, drwav_seek_origin origin);
 
+/*
+Callback for when the current position in the stream needs to be retrieved.
+
+pUserData [in]  The user data that was passed to drwav_init() and family.
+pCursor   [out] A pointer to a variable to receive the current position in the stream.
+
+Returns whether or not the operation was successful.
+*/
+typedef drwav_bool32 (* drwav_tell_proc)(void* pUserData, drwav_int64* pCursor);
+
 /*
 Callback for when drwav_init_ex() finds a chunk.
 
@@ -514,6 +525,11 @@ typedef enum
     drwav_metadata_type_list_info_genre             = 1 << 15,
     drwav_metadata_type_list_info_album             = 1 << 16,
     drwav_metadata_type_list_info_tracknumber       = 1 << 17,
+    drwav_metadata_type_list_info_location          = 1 << 18,
+    drwav_metadata_type_list_info_organization      = 1 << 19,
+    drwav_metadata_type_list_info_keywords          = 1 << 20,
+    drwav_metadata_type_list_info_medium            = 1 << 21,
+    drwav_metadata_type_list_info_description       = 1 << 22,
 
     /* Other type constants for convenience. */
     drwav_metadata_type_list_all_info_strings       = drwav_metadata_type_list_info_software
@@ -524,7 +540,12 @@ typedef enum
                                                     | drwav_metadata_type_list_info_date
                                                     | drwav_metadata_type_list_info_genre
                                                     | drwav_metadata_type_list_info_album
-                                                    | drwav_metadata_type_list_info_tracknumber,
+                                                    | drwav_metadata_type_list_info_tracknumber
+                                                    | drwav_metadata_type_list_info_location
+                                                    | drwav_metadata_type_list_info_organization
+                                                    | drwav_metadata_type_list_info_keywords
+                                                    | drwav_metadata_type_list_info_medium
+                                                    | drwav_metadata_type_list_info_description,
 
     drwav_metadata_type_list_all_adtl               = drwav_metadata_type_list_label
                                                     | drwav_metadata_type_list_note
@@ -555,11 +576,11 @@ typedef struct
     /* See drwav_smpl_loop_type. */
     drwav_uint32 type;
 
-    /* The byte offset of the first sample to be played in the loop. */
-    drwav_uint32 firstSampleByteOffset;
+    /* The offset of the first sample to be played in the loop. */
+    drwav_uint32 firstSampleOffset;
 
-    /* The byte offset into the audio data of the last sample to be played in the loop. */
-    drwav_uint32 lastSampleByteOffset;
+    /* The offset into the audio data of the last sample to be played in the loop. */
+    drwav_uint32 lastSampleOffset;
 
     /* A value to represent that playback should occur at a point between samples. This value ranges from 0 to UINT32_MAX. Where a value of 0 means no fraction, and a value of (UINT32_MAX / 2) would mean half a sample. */
     drwav_uint32 sampleFraction;
@@ -637,8 +658,8 @@ typedef struct
     /* Set to 0 for uncompressed formats. Else the last byte in compressed wave data where decompression can begin to find the value of the corresponding sample value. */
     drwav_uint32 blockStart;
 
-    /* For uncompressed formats this is the byte offset of the cue point into the audio data. For compressed formats this is relative to the block specified with blockStart. */
-    drwav_uint32 sampleByteOffset;
+    /* For uncompressed formats this is the offset of the cue point into the audio data. For compressed formats this is relative to the block specified with blockStart. */
+    drwav_uint32 sampleOffset;
 } drwav_cue_point;
 
 typedef struct
@@ -846,6 +867,9 @@ typedef struct
     /* A pointer to the function to call when the wav file needs to be seeked. */
     drwav_seek_proc onSeek;
 
+    /* A pointer to the function to call when the position of the stream needs to be retrieved. */
+    drwav_tell_proc onTell;
+
     /* The user data to pass to callbacks. */
     void* pUserData;
 
@@ -968,9 +992,9 @@ after the function returns.
 
 See also: drwav_init_file(), drwav_init_memory(), drwav_uninit()
 */
-DRWAV_API drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_bool32 drwav_init_ex(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_chunk_proc onChunk, void* pReadSeekUserData, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_bool32 drwav_init_with_metadata(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_tell_proc onTell, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_ex(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_tell_proc onTell, drwav_chunk_proc onChunk, void* pReadSeekTellUserData, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_with_metadata(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_tell_proc onTell, void* pUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
 
 /*
 Initializes a pre-allocated drwav object for writing.
@@ -1273,9 +1297,9 @@ Opens and reads an entire wav file in a single operation.
 
 The return value is a heap-allocated buffer containing the audio data. Use drwav_free() to free the buffer.
 */
-DRWAV_API drwav_int16* drwav_open_and_read_pcm_frames_s16(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API float* drwav_open_and_read_pcm_frames_f32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_int32* drwav_open_and_read_pcm_frames_s32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_int16* drwav_open_and_read_pcm_frames_s16(drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_tell_proc onTell, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API float* drwav_open_and_read_pcm_frames_f32(drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_tell_proc onTell, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_int32* drwav_open_and_read_pcm_frames_s32(drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_tell_proc onTell, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
 #ifndef DR_WAV_NO_STDIO
 /*
 Opens and decodes an entire wav file in a single operation.
@@ -1962,12 +1986,12 @@ DRWAV_PRIVATE drwav_bool32 drwav__seek_forward(drwav_seek_proc onSeek, drwav_uin
     drwav_uint64 bytesRemainingToSeek = offset;
     while (bytesRemainingToSeek > 0) {
         if (bytesRemainingToSeek > 0x7FFFFFFF) {
-            if (!onSeek(pUserData, 0x7FFFFFFF, drwav_seek_origin_current)) {
+            if (!onSeek(pUserData, 0x7FFFFFFF, DRWAV_SEEK_CUR)) {
                 return DRWAV_FALSE;
             }
             bytesRemainingToSeek -= 0x7FFFFFFF;
         } else {
-            if (!onSeek(pUserData, (int)bytesRemainingToSeek, drwav_seek_origin_current)) {
+            if (!onSeek(pUserData, (int)bytesRemainingToSeek, DRWAV_SEEK_CUR)) {
                 return DRWAV_FALSE;
             }
             bytesRemainingToSeek = 0;
@@ -1980,21 +2004,21 @@ DRWAV_PRIVATE drwav_bool32 drwav__seek_forward(drwav_seek_proc onSeek, drwav_uin
 DRWAV_PRIVATE drwav_bool32 drwav__seek_from_start(drwav_seek_proc onSeek, drwav_uint64 offset, void* pUserData)
 {
     if (offset <= 0x7FFFFFFF) {
-        return onSeek(pUserData, (int)offset, drwav_seek_origin_start);
+        return onSeek(pUserData, (int)offset, DRWAV_SEEK_SET);
     }
 
     /* Larger than 32-bit seek. */
-    if (!onSeek(pUserData, 0x7FFFFFFF, drwav_seek_origin_start)) {
+    if (!onSeek(pUserData, 0x7FFFFFFF, DRWAV_SEEK_SET)) {
         return DRWAV_FALSE;
     }
     offset -= 0x7FFFFFFF;
 
     for (;;) {
         if (offset <= 0x7FFFFFFF) {
-            return onSeek(pUserData, (int)offset, drwav_seek_origin_current);
+            return onSeek(pUserData, (int)offset, DRWAV_SEEK_CUR);
         }
 
-        if (!onSeek(pUserData, 0x7FFFFFFF, drwav_seek_origin_current)) {
+        if (!onSeek(pUserData, 0x7FFFFFFF, DRWAV_SEEK_CUR)) {
             return DRWAV_FALSE;
         }
         offset -= 0x7FFFFFFF;
@@ -2028,7 +2052,7 @@ DRWAV_PRIVATE drwav_bool32 drwav__on_seek(drwav_seek_proc onSeek, void* pUserDat
         return DRWAV_FALSE;
     }
 
-    if (origin == drwav_seek_origin_start) {
+    if (origin == DRWAV_SEEK_SET) {
         *pCursor = offset;
     } else {
         *pCursor += offset;
@@ -2189,12 +2213,12 @@ DRWAV_PRIVATE drwav_uint64 drwav__read_smpl_to_metadata_obj(drwav__metadata_pars
                 bytesJustRead = drwav__metadata_parser_read(pParser, smplLoopData, sizeof(smplLoopData), &totalBytesRead);
 
                 if (bytesJustRead == sizeof(smplLoopData)) {
-                    pMetadata->data.smpl.pLoops[iSampleLoop].cuePointId            = drwav_bytes_to_u32(smplLoopData + 0);
-                    pMetadata->data.smpl.pLoops[iSampleLoop].type                  = drwav_bytes_to_u32(smplLoopData + 4);
-                    pMetadata->data.smpl.pLoops[iSampleLoop].firstSampleByteOffset = drwav_bytes_to_u32(smplLoopData + 8);
-                    pMetadata->data.smpl.pLoops[iSampleLoop].lastSampleByteOffset  = drwav_bytes_to_u32(smplLoopData + 12);
-                    pMetadata->data.smpl.pLoops[iSampleLoop].sampleFraction        = drwav_bytes_to_u32(smplLoopData + 16);
-                    pMetadata->data.smpl.pLoops[iSampleLoop].playCount             = drwav_bytes_to_u32(smplLoopData + 20);
+                    pMetadata->data.smpl.pLoops[iSampleLoop].cuePointId        = drwav_bytes_to_u32(smplLoopData + 0);
+                    pMetadata->data.smpl.pLoops[iSampleLoop].type              = drwav_bytes_to_u32(smplLoopData + 4);
+                    pMetadata->data.smpl.pLoops[iSampleLoop].firstSampleOffset = drwav_bytes_to_u32(smplLoopData + 8);
+                    pMetadata->data.smpl.pLoops[iSampleLoop].lastSampleOffset  = drwav_bytes_to_u32(smplLoopData + 12);
+                    pMetadata->data.smpl.pLoops[iSampleLoop].sampleFraction    = drwav_bytes_to_u32(smplLoopData + 16);
+                    pMetadata->data.smpl.pLoops[iSampleLoop].playCount         = drwav_bytes_to_u32(smplLoopData + 20);
                 } else {
                     break;
                 }
@@ -2254,7 +2278,7 @@ DRWAV_PRIVATE drwav_uint64 drwav__read_cue_to_metadata_obj(drwav__metadata_parse
                         pMetadata->data.cue.pCuePoints[iCuePoint].dataChunkId[3]    = cuePointData[11];
                         pMetadata->data.cue.pCuePoints[iCuePoint].chunkStart        = drwav_bytes_to_u32(cuePointData + 12);
                         pMetadata->data.cue.pCuePoints[iCuePoint].blockStart        = drwav_bytes_to_u32(cuePointData + 16);
-                        pMetadata->data.cue.pCuePoints[iCuePoint].sampleByteOffset  = drwav_bytes_to_u32(cuePointData + 20);
+                        pMetadata->data.cue.pCuePoints[iCuePoint].sampleOffset      = drwav_bytes_to_u32(cuePointData + 20);
                     } else {
                         break;
                     }
@@ -2698,7 +2722,7 @@ DRWAV_PRIVATE drwav_uint64 drwav__metadata_process_chunk(drwav__metadata_parser*
                 drwav_uint8 buffer[4];
                 size_t bytesJustRead;
 
-                if (!pParser->onSeek(pParser->pReadSeekUserData, 28, drwav_seek_origin_current)) {
+                if (!pParser->onSeek(pParser->pReadSeekUserData, 28, DRWAV_SEEK_CUR)) {
                     return bytesRead;
                 }
                 bytesRead += 28;
@@ -2811,7 +2835,7 @@ DRWAV_PRIVATE drwav_uint64 drwav__metadata_process_chunk(drwav__metadata_parser*
                     return bytesRead;
                 }
                 allocSizeNeeded += drwav__strlen(buffer) + 1;
-                allocSizeNeeded += (size_t)pChunkHeader->sizeInBytes - DRWAV_BEXT_BYTES; /* Coding history. */
+                allocSizeNeeded += (size_t)pChunkHeader->sizeInBytes - DRWAV_BEXT_BYTES + 1; /* Coding history. */
 
                 drwav__metadata_request_extra_memory_for_stage_2(pParser, allocSizeNeeded, 1);
 
@@ -2916,6 +2940,16 @@ DRWAV_PRIVATE drwav_uint64 drwav__metadata_process_chunk(drwav__metadata_parser*
                 subchunkBytesRead = drwav__metadata_process_info_text_chunk(pParser, subchunkDataSize,  drwav_metadata_type_list_info_album);
             } else if (drwav__chunk_matches(allowedMetadataTypes, subchunkId, drwav_metadata_type_list_info_tracknumber, "ITRK")) {
                 subchunkBytesRead = drwav__metadata_process_info_text_chunk(pParser, subchunkDataSize,  drwav_metadata_type_list_info_tracknumber);
+            } else if (drwav__chunk_matches(allowedMetadataTypes, subchunkId, drwav_metadata_type_list_info_location, "IARL")) {
+                subchunkBytesRead = drwav__metadata_process_info_text_chunk(pParser, subchunkDataSize,  drwav_metadata_type_list_info_location);
+            } else if (drwav__chunk_matches(allowedMetadataTypes, subchunkId, drwav_metadata_type_list_info_organization, "ICMS")) {
+                subchunkBytesRead = drwav__metadata_process_info_text_chunk(pParser, subchunkDataSize,  drwav_metadata_type_list_info_organization);
+            } else if (drwav__chunk_matches(allowedMetadataTypes, subchunkId, drwav_metadata_type_list_info_keywords, "IKEY")) {
+                subchunkBytesRead = drwav__metadata_process_info_text_chunk(pParser, subchunkDataSize,  drwav_metadata_type_list_info_keywords);
+            } else if (drwav__chunk_matches(allowedMetadataTypes, subchunkId, drwav_metadata_type_list_info_medium, "IMED")) {
+                subchunkBytesRead = drwav__metadata_process_info_text_chunk(pParser, subchunkDataSize,  drwav_metadata_type_list_info_medium);
+            } else if (drwav__chunk_matches(allowedMetadataTypes, subchunkId, drwav_metadata_type_list_info_description, "ISBJ")) {
+                subchunkBytesRead = drwav__metadata_process_info_text_chunk(pParser, subchunkDataSize,  drwav_metadata_type_list_info_description);
             } else if ((allowedMetadataTypes & drwav_metadata_type_unknown) != 0) {
                 subchunkBytesRead = drwav__metadata_process_unknown_chunk(pParser, subchunkId, subchunkDataSize, listType);
             }
@@ -2926,14 +2960,14 @@ DRWAV_PRIVATE drwav_uint64 drwav__metadata_process_chunk(drwav__metadata_parser*
             if (subchunkBytesRead < subchunkDataSize) {
                 drwav_uint64 bytesToSeek = subchunkDataSize - subchunkBytesRead;
 
-                if (!pParser->onSeek(pParser->pReadSeekUserData, (int)bytesToSeek, drwav_seek_origin_current)) {
+                if (!pParser->onSeek(pParser->pReadSeekUserData, (int)bytesToSeek, DRWAV_SEEK_CUR)) {
                     break;
                 }
                 bytesRead += bytesToSeek;
             }
 
             if ((subchunkDataSize % 2) == 1) {
-                if (!pParser->onSeek(pParser->pReadSeekUserData, 1, drwav_seek_origin_current)) {
+                if (!pParser->onSeek(pParser->pReadSeekUserData, 1, DRWAV_SEEK_CUR)) {
                     break;
                 }
                 bytesRead += 1;
@@ -2985,16 +3019,17 @@ DRWAV_API drwav_uint16 drwav_fmt_get_format(const drwav_fmt* pFMT)
     }
 }
 
-DRWAV_PRIVATE drwav_bool32 drwav_preinit(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pReadSeekUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
+DRWAV_PRIVATE drwav_bool32 drwav_preinit(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_tell_proc onTell, void* pReadSeekTellUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
 {
-    if (pWav == NULL || onRead == NULL || onSeek == NULL) {
+    if (pWav == NULL || onRead == NULL || onSeek == NULL) { /* <-- onTell is optional. */
         return DRWAV_FALSE;
     }
 
     DRWAV_ZERO_MEMORY(pWav, sizeof(*pWav));
     pWav->onRead    = onRead;
     pWav->onSeek    = onSeek;
-    pWav->pUserData = pReadSeekUserData;
+    pWav->onTell    = onTell;
+    pWav->pUserData = pReadSeekTellUserData;
     pWav->allocationCallbacks = drwav_copy_allocation_callbacks_or_defaults(pAllocationCallbacks);
 
     if (pWav->allocationCallbacks.onFree == NULL || (pWav->allocationCallbacks.onMalloc == NULL && pWav->allocationCallbacks.onRealloc == NULL)) {
@@ -3311,7 +3346,7 @@ DRWAV_PRIVATE drwav_bool32 drwav_init__internal(drwav* pWav, drwav_chunk_proc on
                         fmt.channelMask        = drwav_bytes_to_u32_ex(fmtext + 2, pWav->container);
                         drwav_bytes_to_guid(fmtext + 6, fmt.subFormat);
                     } else {
-                        if (pWav->onSeek(pWav->pUserData, fmt.extendedSize, drwav_seek_origin_current) == DRWAV_FALSE) {
+                        if (pWav->onSeek(pWav->pUserData, fmt.extendedSize, DRWAV_SEEK_CUR) == DRWAV_FALSE) {
                             return DRWAV_FALSE;
                         }
                     }
@@ -3321,7 +3356,7 @@ DRWAV_PRIVATE drwav_bool32 drwav_init__internal(drwav* pWav, drwav_chunk_proc on
                 }
 
                 /* Seek past any leftover bytes. For w64 the leftover will be defined based on the chunk size. */
-                if (pWav->onSeek(pWav->pUserData, (int)(header.sizeInBytes - bytesReadSoFar), drwav_seek_origin_current) == DRWAV_FALSE) {
+                if (pWav->onSeek(pWav->pUserData, (int)(header.sizeInBytes - bytesReadSoFar), DRWAV_SEEK_CUR) == DRWAV_FALSE) {
                     return DRWAV_FALSE;
                 }
                 cursor += (header.sizeInBytes - bytesReadSoFar);
@@ -3465,12 +3500,15 @@ DRWAV_PRIVATE drwav_bool32 drwav_init__internal(drwav* pWav, drwav_chunk_proc on
                     compressionFormat = DR_WAVE_FORMAT_MULAW;
                 } else if (drwav_fourcc_equal(type, "ima4")) {
                     compressionFormat = DR_WAVE_FORMAT_DVI_ADPCM;
-                    sampleSizeInBits = 4;
+                    sampleSizeInBits  = 4;
 
                     /*
                     I haven't been able to figure out how to get correct decoding for IMA ADPCM. Until this is figured out
                     we'll need to abort when we encounter such an encoding. Advice welcome!
                     */
+                    (void)compressionFormat;
+                    (void)sampleSizeInBits;
+
                     return DRWAV_FALSE;
                 } else {
                     return DRWAV_FALSE; /* Unknown or unsupported compression format. Need to abort. */
@@ -3533,20 +3571,46 @@ DRWAV_PRIVATE drwav_bool32 drwav_init__internal(drwav* pWav, drwav_chunk_proc on
                 return DRWAV_FALSE;
             }
 
-            /* We need to seek forward by the offset. */
+            /* The position of the audio data starts at an offset. */
             offset = drwav_bytes_to_u32_ex(offsetAndBlockSizeData + 0, pWav->container);
-            if (drwav__seek_forward(pWav->onSeek, offset, pWav->pUserData) == DRWAV_FALSE) {
-                return DRWAV_FALSE;
-            }
-            cursor += offset;
+            pWav->dataChunkDataPos = cursor + offset;
 
-            pWav->dataChunkDataPos = cursor;
+            /* The data chunk size needs to be reduced by the offset or else seeking will break. */
             dataChunkSize = chunkSize;
+            if (dataChunkSize  > offset) {
+                dataChunkSize -= offset;
+            } else {
+                dataChunkSize = 0;
+            }
 
-            /* If we're running in sequential mode, or we're not reading metadata, we have enough now that we can get out of the loop. */
-            if (sequential || !isProcessingMetadata) {
-                break;      /* No need to keep reading beyond the data chunk. */
+            if (sequential) {
+                if (foundChunk_fmt) {   /* <-- Name is misleading, but will be set to true if the COMM chunk has been parsed. */
+                    /*
+                    Getting here means we're opening in sequential mode and we've found the SSND (data) and COMM (fmt) chunks. We need
+                    to get out of the loop here or else we'll end up going past the data chunk and will have no way of getting back to
+                    it since we're not allowed to seek backwards.
+
+                    One subtle detail here is that there is an offset with the SSND chunk. We need to make sure we seek past this offset
+                    so we're left sitting on the first byte of actual audio data.
+                    */
+                    if (drwav__seek_forward(pWav->onSeek, offset, pWav->pUserData) == DRWAV_FALSE) {
+                        return DRWAV_FALSE;
+                    }
+                    cursor += offset;
+
+                    break;
+                } else {
+                    /*
+                    Getting here means the COMM chunk was not found. In sequential mode, if we haven't yet found the COMM chunk
+                    we'll need to abort because we can't be doing a backwards seek back to the SSND chunk in order to read the
+                    data. For this reason, this configuration of AIFF files are not supported with sequential mode.
+                    */
+                    return DRWAV_FALSE;
+                }
             } else {
+                chunkSize += header.paddingSize;                /* <-- Make sure we seek past the padding. */
+                chunkSize -= sizeof(offsetAndBlockSizeData);    /* <-- This was read earlier. */
+
                 if (drwav__seek_forward(pWav->onSeek, chunkSize, pWav->pUserData) == DRWAV_FALSE) {
                     break;
                 }
@@ -3557,7 +3621,6 @@ DRWAV_PRIVATE drwav_bool32 drwav_init__internal(drwav* pWav, drwav_chunk_proc on
         }
 
 
-
         /* Getting here means it's not a chunk that we care about internally, but might need to be handled as metadata by the caller. */
         if (isProcessingMetadata) {
             drwav__metadata_process_chunk(&metadataParser, &header, drwav_metadata_type_all_including_unknown);
@@ -3647,8 +3710,26 @@ DRWAV_PRIVATE drwav_bool32 drwav_init__internal(drwav* pWav, drwav_chunk_proc on
         pWav->metadataCount = metadataParser.metadataCount;
     }
 
-
-    /* At this point we should be sitting on the first byte of the raw audio data. */
+    /*
+    It's possible for the size reported in the data chunk to be greater than that of the file. We
+    need to do a validation check here to make sure we don't exceed the file size. To skip this
+    check, set the onTell callback to NULL.
+    */
+    if (pWav->onTell != NULL && pWav->onSeek != NULL) {
+        if (pWav->onSeek(pWav->pUserData, 0, DRWAV_SEEK_END) == DRWAV_TRUE) {
+            drwav_int64 fileSize;
+            if (pWav->onTell(pWav->pUserData, &fileSize)) {
+                if (dataChunkSize + pWav->dataChunkDataPos > (drwav_uint64)fileSize) {
+                    dataChunkSize = (drwav_uint64)fileSize - pWav->dataChunkDataPos;
+                }
+            }
+        } else {
+            /*
+            Failed to seek to the end of the file. It might not be supported by the backend so in
+            this case we cannot perform the validation check.
+            */
+        }
+    }
 
     /*
     I've seen a WAV file in the wild where a RIFF-ecapsulated file has the size of it's "RIFF" and
@@ -3670,6 +3751,7 @@ DRWAV_PRIVATE drwav_bool32 drwav_init__internal(drwav* pWav, drwav_chunk_proc on
         }
     }
 
+    /* At this point we want to be sitting on the first byte of the raw audio data. */
     if (drwav__seek_from_start(pWav->onSeek, pWav->dataChunkDataPos, pWav->pUserData) == DRWAV_FALSE) {
         drwav_free(pWav->pMetadata, &pWav->allocationCallbacks);
         return DRWAV_FALSE;
@@ -3680,8 +3762,26 @@ DRWAV_PRIVATE drwav_bool32 drwav_init__internal(drwav* pWav, drwav_chunk_proc on
     pWav->sampleRate          = fmt.sampleRate;
     pWav->channels            = fmt.channels;
     pWav->bitsPerSample       = fmt.bitsPerSample;
-    pWav->bytesRemaining      = dataChunkSize;
     pWav->translatedFormatTag = translatedFormatTag;
+
+    /*
+    I've had a report where files would start glitching after seeking. The reason for this is the data
+    chunk is not a clean multiple of the PCM frame size in bytes. Where this becomes a problem is when
+    seeking, because the number of bytes remaining in the data chunk is used to calculate the current
+    byte position. If this byte position is not aligned to the number of bytes in a PCM frame, it will
+    result in the seek not being cleanly positioned at the start of the PCM frame thereby resulting in
+    all decoded frames after that being corrupted.
+
+    To address this, we need to round the data chunk size down to the nearest multiple of the frame size.
+    */
+    if (!drwav__is_compressed_format_tag(translatedFormatTag)) {
+        drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+        if (bytesPerFrame > 0) {
+            dataChunkSize -= (dataChunkSize % bytesPerFrame);
+        }
+    }
+
+    pWav->bytesRemaining      = dataChunkSize;
     pWav->dataChunkDataSize   = dataChunkSize;
 
     if (sampleCountFromFactChunk != 0) {
@@ -3764,23 +3864,23 @@ DRWAV_PRIVATE drwav_bool32 drwav_init__internal(drwav* pWav, drwav_chunk_proc on
     return DRWAV_TRUE;
 }
 
-DRWAV_API drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
+DRWAV_API drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_tell_proc onTell, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
 {
-    return drwav_init_ex(pWav, onRead, onSeek, NULL, pUserData, NULL, 0, pAllocationCallbacks);
+    return drwav_init_ex(pWav, onRead, onSeek, onTell, NULL, pUserData, NULL, 0, pAllocationCallbacks);
 }
 
-DRWAV_API drwav_bool32 drwav_init_ex(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_chunk_proc onChunk, void* pReadSeekUserData, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
+DRWAV_API drwav_bool32 drwav_init_ex(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_tell_proc onTell, drwav_chunk_proc onChunk, void* pReadSeekTellUserData, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
 {
-    if (!drwav_preinit(pWav, onRead, onSeek, pReadSeekUserData, pAllocationCallbacks)) {
+    if (!drwav_preinit(pWav, onRead, onSeek, onTell, pReadSeekTellUserData, pAllocationCallbacks)) {
         return DRWAV_FALSE;
     }
 
     return drwav_init__internal(pWav, onChunk, pChunkUserData, flags);
 }
 
-DRWAV_API drwav_bool32 drwav_init_with_metadata(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
+DRWAV_API drwav_bool32 drwav_init_with_metadata(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_tell_proc onTell, void* pUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
 {
-    if (!drwav_preinit(pWav, onRead, onSeek, pUserData, pAllocationCallbacks)) {
+    if (!drwav_preinit(pWav, onRead, onSeek, onTell, pUserData, pAllocationCallbacks)) {
         return DRWAV_FALSE;
     }
 
@@ -3995,8 +4095,8 @@ DRWAV_PRIVATE size_t drwav__write_or_count_metadata(drwav* pWav, drwav_metadata*
                 for (iLoop = 0; iLoop < pMetadata->data.smpl.sampleLoopCount; ++iLoop) {
                     bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.pLoops[iLoop].cuePointId);
                     bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.pLoops[iLoop].type);
-                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.pLoops[iLoop].firstSampleByteOffset);
-                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.pLoops[iLoop].lastSampleByteOffset);
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.pLoops[iLoop].firstSampleOffset);
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.pLoops[iLoop].lastSampleOffset);
                     bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.pLoops[iLoop].sampleFraction);
                     bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.smpl.pLoops[iLoop].playCount);
                 }
@@ -4036,7 +4136,7 @@ DRWAV_PRIVATE size_t drwav__write_or_count_metadata(drwav* pWav, drwav_metadata*
                     bytesWritten += drwav__write_or_count(pWav, pMetadata->data.cue.pCuePoints[iCuePoint].dataChunkId, 4);
                     bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.cue.pCuePoints[iCuePoint].chunkStart);
                     bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.cue.pCuePoints[iCuePoint].blockStart);
-                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.cue.pCuePoints[iCuePoint].sampleByteOffset);
+                    bytesWritten += drwav__write_or_count_u32ne_to_le(pWav, pMetadata->data.cue.pCuePoints[iCuePoint].sampleOffset);
                 }
             } break;
 
@@ -4142,15 +4242,20 @@ DRWAV_PRIVATE size_t drwav__write_or_count_metadata(drwav* pWav, drwav_metadata*
                 const char* pID = NULL;
 
                 switch (pMetadata->type) {
-                    case drwav_metadata_type_list_info_software:    pID = "ISFT"; break;
-                    case drwav_metadata_type_list_info_copyright:   pID = "ICOP"; break;
-                    case drwav_metadata_type_list_info_title:       pID = "INAM"; break;
-                    case drwav_metadata_type_list_info_artist:      pID = "IART"; break;
-                    case drwav_metadata_type_list_info_comment:     pID = "ICMT"; break;
-                    case drwav_metadata_type_list_info_date:        pID = "ICRD"; break;
-                    case drwav_metadata_type_list_info_genre:       pID = "IGNR"; break;
-                    case drwav_metadata_type_list_info_album:       pID = "IPRD"; break;
-                    case drwav_metadata_type_list_info_tracknumber: pID = "ITRK"; break;
+                    case drwav_metadata_type_list_info_software:     pID = "ISFT"; break;
+                    case drwav_metadata_type_list_info_copyright:    pID = "ICOP"; break;
+                    case drwav_metadata_type_list_info_title:        pID = "INAM"; break;
+                    case drwav_metadata_type_list_info_artist:       pID = "IART"; break;
+                    case drwav_metadata_type_list_info_comment:      pID = "ICMT"; break;
+                    case drwav_metadata_type_list_info_date:         pID = "ICRD"; break;
+                    case drwav_metadata_type_list_info_genre:        pID = "IGNR"; break;
+                    case drwav_metadata_type_list_info_album:        pID = "IPRD"; break;
+                    case drwav_metadata_type_list_info_tracknumber:  pID = "ITRK"; break;
+                    case drwav_metadata_type_list_info_location:     pID = "IARL"; break;
+                    case drwav_metadata_type_list_info_organization: pID = "ICMS"; break;
+                    case drwav_metadata_type_list_info_keywords:     pID = "IKEY"; break;
+                    case drwav_metadata_type_list_info_medium:       pID = "IMED"; break;
+                    case drwav_metadata_type_list_info_description:  pID = "ISBJ"; break;
                     default: break;
                 }
 
@@ -4434,7 +4539,7 @@ DRWAV_PRIVATE drwav_bool32 drwav_init_write__internal(drwav* pWav, const drwav_d
 
     /* "RIFF" chunk. */
     if (pFormat->container == drwav_container_riff) {
-        drwav_uint32 chunkSizeRIFF = 28 + (drwav_uint32)initialDataChunkSize;   /* +28 = "WAVE" + [sizeof "fmt " chunk] */
+        drwav_uint32 chunkSizeRIFF = 36 + (drwav_uint32)initialDataChunkSize;   /* +36 = "WAVE" + [sizeof "fmt " chunk] + [data chunk header] */
         runningPos += drwav__write(pWav, "RIFF", 4);
         runningPos += drwav__write_u32ne_to_le(pWav, chunkSizeRIFF);
         runningPos += drwav__write(pWav, "WAVE", 4);
@@ -4704,7 +4809,7 @@ DRWAV_PRIVATE drwav_result drwav_result_from_errno(int e)
     #ifdef ENOSYS
         case ENOSYS: return DRWAV_NOT_IMPLEMENTED;
     #endif
-    #ifdef ENOTEMPTY
+    #if defined(ENOTEMPTY) && ENOTEMPTY != EEXIST   /* In AIX, ENOTEMPTY and EEXIST use the same value. */
         case ENOTEMPTY: return DRWAV_DIRECTORY_NOT_EMPTY;
     #endif
     #ifdef ELOOP
@@ -5161,7 +5266,38 @@ DRWAV_PRIVATE size_t drwav__on_write_stdio(void* pUserData, const void* pData, s
 
 DRWAV_PRIVATE drwav_bool32 drwav__on_seek_stdio(void* pUserData, int offset, drwav_seek_origin origin)
 {
-    return fseek((FILE*)pUserData, offset, (origin == drwav_seek_origin_current) ? SEEK_CUR : SEEK_SET) == 0;
+    int whence = SEEK_SET;
+    if (origin == DRWAV_SEEK_CUR) {
+        whence = SEEK_CUR;
+    } else if (origin == DRWAV_SEEK_END) {
+        whence = SEEK_END;
+    }
+
+    return fseek((FILE*)pUserData, offset, whence) == 0;
+}
+
+DRWAV_PRIVATE drwav_bool32 drwav__on_tell_stdio(void* pUserData, drwav_int64* pCursor)
+{
+    FILE* pFileStdio = (FILE*)pUserData;
+    drwav_int64 result;
+
+    /* These were all validated at a higher level. */
+    DRWAV_ASSERT(pFileStdio != NULL);
+    DRWAV_ASSERT(pCursor    != NULL);
+
+#if defined(_WIN32)
+    #if defined(_MSC_VER) && _MSC_VER > 1200
+        result = _ftelli64(pFileStdio);
+    #else
+        result = ftell(pFileStdio);
+    #endif
+#else
+    result = ftell(pFileStdio);
+#endif
+
+    *pCursor = result;
+
+    return DRWAV_TRUE;
 }
 
 DRWAV_API drwav_bool32 drwav_init_file(drwav* pWav, const char* filename, const drwav_allocation_callbacks* pAllocationCallbacks)
@@ -5174,7 +5310,7 @@ DRWAV_PRIVATE drwav_bool32 drwav_init_file__internal_FILE(drwav* pWav, FILE* pFi
 {
     drwav_bool32 result;
 
-    result = drwav_preinit(pWav, drwav__on_read_stdio, drwav__on_seek_stdio, (void*)pFile, pAllocationCallbacks);
+    result = drwav_preinit(pWav, drwav__on_read_stdio, drwav__on_seek_stdio, drwav__on_tell_stdio, (void*)pFile, pAllocationCallbacks);
     if (result != DRWAV_TRUE) {
         fclose(pFile);
         return result;
@@ -5352,29 +5488,34 @@ DRWAV_PRIVATE size_t drwav__on_read_memory(void* pUserData, void* pBufferOut, si
 DRWAV_PRIVATE drwav_bool32 drwav__on_seek_memory(void* pUserData, int offset, drwav_seek_origin origin)
 {
     drwav* pWav = (drwav*)pUserData;
+    drwav_int64 newCursor;
+
     DRWAV_ASSERT(pWav != NULL);
 
-    if (origin == drwav_seek_origin_current) {
-        if (offset > 0) {
-            if (pWav->memoryStream.currentReadPos + offset > pWav->memoryStream.dataSize) {
-                return DRWAV_FALSE; /* Trying to seek too far forward. */
-            }
-        } else {
-            if (pWav->memoryStream.currentReadPos < (size_t)-offset) {
-                return DRWAV_FALSE; /* Trying to seek too far backwards. */
-            }
-        }
+    newCursor = pWav->memoryStream.currentReadPos;
 
-        /* This will never underflow thanks to the clamps above. */
-        pWav->memoryStream.currentReadPos += offset;
+    if (origin == DRWAV_SEEK_SET) {
+        newCursor = 0;
+    } else if (origin == DRWAV_SEEK_CUR) {
+        newCursor = (drwav_int64)pWav->memoryStream.currentReadPos;
+    } else if (origin == DRWAV_SEEK_END) {
+        newCursor = (drwav_int64)pWav->memoryStream.dataSize;
     } else {
-        if ((drwav_uint32)offset <= pWav->memoryStream.dataSize) {
-            pWav->memoryStream.currentReadPos = offset;
-        } else {
-            return DRWAV_FALSE; /* Trying to seek too far forward. */
-        }
+        DRWAV_ASSERT(!"Invalid seek origin");
+        return DRWAV_FALSE;
     }
 
+    newCursor += offset;
+
+    if (newCursor < 0) {
+        return DRWAV_FALSE;  /* Trying to seek prior to the start of the buffer. */
+    }
+    if ((size_t)newCursor > pWav->memoryStream.dataSize) {
+        return DRWAV_FALSE;  /* Trying to seek beyond the end of the buffer. */
+    }
+
+    pWav->memoryStream.currentReadPos = (size_t)newCursor;
+
     return DRWAV_TRUE;
 }
 
@@ -5421,29 +5562,45 @@ DRWAV_PRIVATE size_t drwav__on_write_memory(void* pUserData, const void* pDataIn
 DRWAV_PRIVATE drwav_bool32 drwav__on_seek_memory_write(void* pUserData, int offset, drwav_seek_origin origin)
 {
     drwav* pWav = (drwav*)pUserData;
+    drwav_int64 newCursor;
+
     DRWAV_ASSERT(pWav != NULL);
 
-    if (origin == drwav_seek_origin_current) {
-        if (offset > 0) {
-            if (pWav->memoryStreamWrite.currentWritePos + offset > pWav->memoryStreamWrite.dataSize) {
-                offset = (int)(pWav->memoryStreamWrite.dataSize - pWav->memoryStreamWrite.currentWritePos);  /* Trying to seek too far forward. */
-            }
-        } else {
-            if (pWav->memoryStreamWrite.currentWritePos < (size_t)-offset) {
-                offset = -(int)pWav->memoryStreamWrite.currentWritePos;  /* Trying to seek too far backwards. */
-            }
-        }
+    newCursor = pWav->memoryStreamWrite.currentWritePos;
 
-        /* This will never underflow thanks to the clamps above. */
-        pWav->memoryStreamWrite.currentWritePos += offset;
+    if (origin == DRWAV_SEEK_SET) {
+        newCursor = 0;
+    } else if (origin == DRWAV_SEEK_CUR) {
+        newCursor = (drwav_int64)pWav->memoryStreamWrite.currentWritePos;
+    } else if (origin == DRWAV_SEEK_END) {
+        newCursor = (drwav_int64)pWav->memoryStreamWrite.dataSize;
     } else {
-        if ((drwav_uint32)offset <= pWav->memoryStreamWrite.dataSize) {
-            pWav->memoryStreamWrite.currentWritePos = offset;
-        } else {
-            pWav->memoryStreamWrite.currentWritePos = pWav->memoryStreamWrite.dataSize;  /* Trying to seek too far forward. */
-        }
+        DRWAV_ASSERT(!"Invalid seek origin");
+        return DRWAV_INVALID_ARGS;
     }
 
+    newCursor += offset;
+
+    if (newCursor < 0) {
+        return DRWAV_FALSE;  /* Trying to seek prior to the start of the buffer. */
+    }
+    if ((size_t)newCursor > pWav->memoryStreamWrite.dataSize) {
+        return DRWAV_FALSE;  /* Trying to seek beyond the end of the buffer. */
+    }
+
+    pWav->memoryStreamWrite.currentWritePos = (size_t)newCursor;
+
+    return DRWAV_TRUE;
+}
+
+DRWAV_PRIVATE drwav_bool32 drwav__on_tell_memory(void* pUserData, drwav_int64* pCursor)
+{
+    drwav* pWav = (drwav*)pUserData;
+
+    DRWAV_ASSERT(pWav != NULL);
+    DRWAV_ASSERT(pCursor != NULL);
+
+    *pCursor = (drwav_int64)pWav->memoryStream.currentReadPos;
     return DRWAV_TRUE;
 }
 
@@ -5458,7 +5615,7 @@ DRWAV_API drwav_bool32 drwav_init_memory_ex(drwav* pWav, const void* data, size_
         return DRWAV_FALSE;
     }
 
-    if (!drwav_preinit(pWav, drwav__on_read_memory, drwav__on_seek_memory, pWav, pAllocationCallbacks)) {
+    if (!drwav_preinit(pWav, drwav__on_read_memory, drwav__on_seek_memory, drwav__on_tell_memory, pWav, pAllocationCallbacks)) {
         return DRWAV_FALSE;
     }
 
@@ -5475,7 +5632,7 @@ DRWAV_API drwav_bool32 drwav_init_memory_with_metadata(drwav* pWav, const void*
         return DRWAV_FALSE;
     }
 
-    if (!drwav_preinit(pWav, drwav__on_read_memory, drwav__on_seek_memory, pWav, pAllocationCallbacks)) {
+    if (!drwav_preinit(pWav, drwav__on_read_memory, drwav__on_seek_memory, drwav__on_tell_memory, pWav, pAllocationCallbacks)) {
         return DRWAV_FALSE;
     }
 
@@ -5565,25 +5722,25 @@ DRWAV_API drwav_result drwav_uninit(drwav* pWav)
         if (pWav->onSeek && !pWav->isSequentialWrite) {
             if (pWav->container == drwav_container_riff) {
                 /* The "RIFF" chunk size. */
-                if (pWav->onSeek(pWav->pUserData, 4, drwav_seek_origin_start)) {
+                if (pWav->onSeek(pWav->pUserData, 4, DRWAV_SEEK_SET)) {
                     drwav_uint32 riffChunkSize = drwav__riff_chunk_size_riff(pWav->dataChunkDataSize, pWav->pMetadata, pWav->metadataCount);
                     drwav__write_u32ne_to_le(pWav, riffChunkSize);
                 }
 
                 /* The "data" chunk size. */
-                if (pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos - 4, drwav_seek_origin_start)) {
+                if (pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos - 4, DRWAV_SEEK_SET)) {
                     drwav_uint32 dataChunkSize = drwav__data_chunk_size_riff(pWav->dataChunkDataSize);
                     drwav__write_u32ne_to_le(pWav, dataChunkSize);
                 }
             } else if (pWav->container == drwav_container_w64) {
                 /* The "RIFF" chunk size. */
-                if (pWav->onSeek(pWav->pUserData, 16, drwav_seek_origin_start)) {
+                if (pWav->onSeek(pWav->pUserData, 16, DRWAV_SEEK_SET)) {
                     drwav_uint64 riffChunkSize = drwav__riff_chunk_size_w64(pWav->dataChunkDataSize);
                     drwav__write_u64ne_to_le(pWav, riffChunkSize);
                 }
 
                 /* The "data" chunk size. */
-                if (pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos - 8, drwav_seek_origin_start)) {
+                if (pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos - 8, DRWAV_SEEK_SET)) {
                     drwav_uint64 dataChunkSize = drwav__data_chunk_size_w64(pWav->dataChunkDataSize);
                     drwav__write_u64ne_to_le(pWav, dataChunkSize);
                 }
@@ -5592,13 +5749,13 @@ DRWAV_API drwav_result drwav_uninit(drwav* pWav)
                 int ds64BodyPos = 12 + 8;
 
                 /* The "RIFF" chunk size. */
-                if (pWav->onSeek(pWav->pUserData, ds64BodyPos + 0, drwav_seek_origin_start)) {
+                if (pWav->onSeek(pWav->pUserData, ds64BodyPos + 0, DRWAV_SEEK_SET)) {
                     drwav_uint64 riffChunkSize = drwav__riff_chunk_size_rf64(pWav->dataChunkDataSize, pWav->pMetadata, pWav->metadataCount);
                     drwav__write_u64ne_to_le(pWav, riffChunkSize);
                 }
 
                 /* The "data" chunk size. */
-                if (pWav->onSeek(pWav->pUserData, ds64BodyPos + 8, drwav_seek_origin_start)) {
+                if (pWav->onSeek(pWav->pUserData, ds64BodyPos + 8, DRWAV_SEEK_SET)) {
                     drwav_uint64 dataChunkSize = drwav__data_chunk_size_rf64(pWav->dataChunkDataSize);
                     drwav__write_u64ne_to_le(pWav, dataChunkSize);
                 }
@@ -5663,7 +5820,7 @@ DRWAV_API size_t drwav_read_raw(drwav* pWav, size_t bytesToRead, void* pBufferOu
                 bytesToSeek = 0x7FFFFFFF;
             }
 
-            if (pWav->onSeek(pWav->pUserData, (int)bytesToSeek, drwav_seek_origin_current) == DRWAV_FALSE) {
+            if (pWav->onSeek(pWav->pUserData, (int)bytesToSeek, DRWAV_SEEK_CUR) == DRWAV_FALSE) {
                 break;
             }
 
@@ -5810,7 +5967,7 @@ DRWAV_PRIVATE drwav_bool32 drwav_seek_to_first_pcm_frame(drwav* pWav)
         return DRWAV_FALSE; /* No seeking in write mode. */
     }
 
-    if (!pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos, drwav_seek_origin_start)) {
+    if (!pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos, DRWAV_SEEK_SET)) {
         return DRWAV_FALSE;
     }
 
@@ -5928,7 +6085,7 @@ DRWAV_API drwav_bool32 drwav_seek_to_pcm_frame(drwav* pWav, drwav_uint64 targetF
 
         while (offset > 0) {
             int offset32 = ((offset > INT_MAX) ? INT_MAX : (int)offset);
-            if (!pWav->onSeek(pWav->pUserData, offset32, drwav_seek_origin_current)) {
+            if (!pWav->onSeek(pWav->pUserData, offset32, DRWAV_SEEK_CUR)) {
                 return DRWAV_FALSE;
             }
 
@@ -6101,12 +6258,12 @@ DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_s16__msadpcm(drwav* pWav, drwav
 {
     drwav_uint64 totalFramesRead = 0;
 
-    static drwav_int32 adaptationTable[] = {
+    static const drwav_int32 adaptationTable[] = {
         230, 230, 230, 230, 307, 409, 512, 614,
         768, 614, 512, 409, 307, 230, 230, 230
     };
-    static drwav_int32 coeff1Table[] = { 256, 512, 0, 192, 240, 460,  392 };
-    static drwav_int32 coeff2Table[] = { 0,  -256, 0, 64,  0,  -208, -232 };
+    static const drwav_int32 coeff1Table[] = { 256, 512, 0, 192, 240, 460,  392 };
+    static const drwav_int32 coeff2Table[] = { 0,  -256, 0, 64,  0,  -208, -232 };
 
     DRWAV_ASSERT(pWav != NULL);
     DRWAV_ASSERT(framesToRead > 0);
@@ -6294,12 +6451,12 @@ DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_s16__ima(drwav* pWav, drwav_uin
     drwav_uint64 totalFramesRead = 0;
     drwav_uint32 iChannel;
 
-    static drwav_int32 indexTable[16] = {
+    static const drwav_int32 indexTable[16] = {
         -1, -1, -1, -1, 2, 4, 6, 8,
         -1, -1, -1, -1, 2, 4, 6, 8
     };
 
-    static drwav_int32 stepTable[89] = {
+    static const drwav_int32 stepTable[89] = {
         7,     8,     9,     10,    11,    12,    13,    14,    16,    17,
         19,    21,    23,    25,    28,    31,    34,    37,    41,    45,
         50,    55,    60,    66,    73,    80,    88,    97,    107,   118,
@@ -6330,7 +6487,7 @@ DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_s16__ima(drwav* pWav, drwav_uin
                 pWav->ima.bytesRemainingInBlock = pWav->fmt.blockAlign - sizeof(header);
 
                 if (header[2] >= drwav_countof(stepTable)) {
-                    pWav->onSeek(pWav->pUserData, pWav->ima.bytesRemainingInBlock, drwav_seek_origin_current);
+                    pWav->onSeek(pWav->pUserData, pWav->ima.bytesRemainingInBlock, DRWAV_SEEK_CUR);
                     pWav->ima.bytesRemainingInBlock = 0;
                     return totalFramesRead; /* Invalid data. */
                 }
@@ -6348,7 +6505,7 @@ DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_s16__ima(drwav* pWav, drwav_uin
                 pWav->ima.bytesRemainingInBlock = pWav->fmt.blockAlign - sizeof(header);
 
                 if (header[2] >= drwav_countof(stepTable) || header[6] >= drwav_countof(stepTable)) {
-                    pWav->onSeek(pWav->pUserData, pWav->ima.bytesRemainingInBlock, drwav_seek_origin_current);
+                    pWav->onSeek(pWav->pUserData, pWav->ima.bytesRemainingInBlock, DRWAV_SEEK_CUR);
                     pWav->ima.bytesRemainingInBlock = 0;
                     return totalFramesRead; /* Invalid data. */
                 }
@@ -6449,7 +6606,7 @@ DRWAV_PRIVATE drwav_uint64 drwav_read_pcm_frames_s16__ima(drwav* pWav, drwav_uin
 
 
 #ifndef DR_WAV_NO_CONVERSION_API
-static unsigned short g_drwavAlawTable[256] = {
+static const unsigned short g_drwavAlawTable[256] = {
     0xEA80, 0xEB80, 0xE880, 0xE980, 0xEE80, 0xEF80, 0xEC80, 0xED80, 0xE280, 0xE380, 0xE080, 0xE180, 0xE680, 0xE780, 0xE480, 0xE580,
     0xF540, 0xF5C0, 0xF440, 0xF4C0, 0xF740, 0xF7C0, 0xF640, 0xF6C0, 0xF140, 0xF1C0, 0xF040, 0xF0C0, 0xF340, 0xF3C0, 0xF240, 0xF2C0,
     0xAA00, 0xAE00, 0xA200, 0xA600, 0xBA00, 0xBE00, 0xB200, 0xB600, 0x8A00, 0x8E00, 0x8200, 0x8600, 0x9A00, 0x9E00, 0x9200, 0x9600,
@@ -6468,7 +6625,7 @@ static unsigned short g_drwavAlawTable[256] = {
     0x02B0, 0x0290, 0x02F0, 0x02D0, 0x0230, 0x0210, 0x0270, 0x0250, 0x03B0, 0x0390, 0x03F0, 0x03D0, 0x0330, 0x0310, 0x0370, 0x0350
 };
 
-static unsigned short g_drwavMulawTable[256] = {
+static const unsigned short g_drwavMulawTable[256] = {
     0x8284, 0x8684, 0x8A84, 0x8E84, 0x9284, 0x9684, 0x9A84, 0x9E84, 0xA284, 0xA684, 0xAA84, 0xAE84, 0xB284, 0xB684, 0xBA84, 0xBE84,
     0xC184, 0xC384, 0xC584, 0xC784, 0xC984, 0xCB84, 0xCD84, 0xCF84, 0xD184, 0xD384, 0xD584, 0xD784, 0xD984, 0xDB84, 0xDD84, 0xDF84,
     0xE104, 0xE204, 0xE304, 0xE404, 0xE504, 0xE604, 0xE704, 0xE804, 0xE904, 0xEA04, 0xEB04, 0xEC04, 0xED04, 0xEE04, 0xEF04, 0xF004,
@@ -8016,7 +8173,7 @@ DRWAV_PRIVATE drwav_int32* drwav__read_pcm_frames_and_close_s32(drwav* pWav, uns
 
 
 
-DRWAV_API drwav_int16* drwav_open_and_read_pcm_frames_s16(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+DRWAV_API drwav_int16* drwav_open_and_read_pcm_frames_s16(drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_tell_proc onTell, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
 {
     drwav wav;
 
@@ -8030,14 +8187,14 @@ DRWAV_API drwav_int16* drwav_open_and_read_pcm_frames_s16(drwav_read_proc onRead
         *totalFrameCountOut = 0;
     }
 
-    if (!drwav_init(&wav, onRead, onSeek, pUserData, pAllocationCallbacks)) {
+    if (!drwav_init(&wav, onRead, onSeek, onTell, pUserData, pAllocationCallbacks)) {
         return NULL;
     }
 
     return drwav__read_pcm_frames_and_close_s16(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
 }
 
-DRWAV_API float* drwav_open_and_read_pcm_frames_f32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+DRWAV_API float* drwav_open_and_read_pcm_frames_f32(drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_tell_proc onTell, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
 {
     drwav wav;
 
@@ -8051,14 +8208,14 @@ DRWAV_API float* drwav_open_and_read_pcm_frames_f32(drwav_read_proc onRead, drwa
         *totalFrameCountOut = 0;
     }
 
-    if (!drwav_init(&wav, onRead, onSeek, pUserData, pAllocationCallbacks)) {
+    if (!drwav_init(&wav, onRead, onSeek, onTell, pUserData, pAllocationCallbacks)) {
         return NULL;
     }
 
     return drwav__read_pcm_frames_and_close_f32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
 }
 
-DRWAV_API drwav_int32* drwav_open_and_read_pcm_frames_s32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+DRWAV_API drwav_int32* drwav_open_and_read_pcm_frames_s32(drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_tell_proc onTell, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
 {
     drwav wav;
 
@@ -8072,7 +8229,7 @@ DRWAV_API drwav_int32* drwav_open_and_read_pcm_frames_s32(drwav_read_proc onRead
         *totalFrameCountOut = 0;
     }
 
-    if (!drwav_init(&wav, onRead, onSeek, pUserData, pAllocationCallbacks)) {
+    if (!drwav_init(&wav, onRead, onSeek, onTell, pUserData, pAllocationCallbacks)) {
         return NULL;
     }
 
@@ -8360,6 +8517,23 @@ DRWAV_API drwav_bool32 drwav_fourcc_equal(const drwav_uint8* a, const char* b)
 /*
 REVISION HISTORY
 ================
+v0.14.0 - TBD
+  - API CHANGE: Seek origin enums have been renamed to the following:
+    - drwav_seek_origin_start   -> DRWAV_SEEK_SET
+    - drwav_seek_origin_current -> DRWAV_SEEK_CUR
+    - DRWAV_SEEK_END (new)
+  - API CHANGE: A new seek origin has been added to allow seeking from the end of the file. If you implement your own `onSeek` callback, you must now handle `DRWAV_SEEK_END`. If you only use `*_init_file()` or `*_init_memory()`, you need not change anything.
+  - API CHANGE: An `onTell` callback has been added to the following functions:
+    - drwav_init()
+    - drwav_init_ex()
+    - drwav_init_with_metadata()
+    - drwav_open_and_read_pcm_frames_s16()
+    - drwav_open_and_read_pcm_frames_f32()
+    - drwav_open_and_read_pcm_frames_s32()
+  - API CHANGE: The `firstSampleByteOffset`, `lastSampleByteOffset` and `sampleByteOffset` members of `drwav_cue_point` have been renamed to `firstSampleOffset`, `lastSampleOffset` and `sampleOffset`, respectively.
+  - Fix a static analysis warning.
+  - Fix compilation for AIX OS.
+
 v0.13.17 - 2024-12-17
   - Fix a possible crash when reading from MS-ADPCM encoded files.
   - Improve detection of ARM64EC
diff --git a/sys-tune/source/impl/source.cpp b/sys-tune/source/impl/source.cpp
index e163025..01c0ded 100644
--- a/sys-tune/source/impl/source.cpp
+++ b/sys-tune/source/impl/source.cpp
@@ -5,6 +5,7 @@
 #include <cstring>
 #include <strings.h>
 
+// NOTE: when updating dr_libs, check for TUNE-FIX comment for patches.
 #define DR_FLAC_IMPLEMENTATION
 #define DR_FLAC_NO_OGG
 #define DR_FLAC_NO_STDIO
@@ -23,25 +24,50 @@ namespace {
     size_t ReadCallback(void *pUserData, void *pBufferOut, size_t bytesToRead) {
         auto data = static_cast<Source *>(pUserData);
 
-        return data->Read(pBufferOut, bytesToRead);
+        return data->ReadFile(pBufferOut, bytesToRead);
     }
 
     drflac_bool32 FlacSeekCallback(void *pUserData, int offset, drflac_seek_origin origin) {
         auto data = static_cast<Source *>(pUserData);
 
-        return data->Seek(offset, origin == drflac_seek_origin_start);
+        return data->SeekFile(offset, origin);
+    }
+
+    drflac_bool32 FlacTellCallback(void *pUserData, drflac_int64* pCursor) {
+        auto data = static_cast<Source *>(pUserData);
+
+        *pCursor = data->TellFile();
+        return true;
     }
 
     drmp3_bool32 Mp3SeekCallback(void *pUserData, int offset, drmp3_seek_origin origin) {
         auto data = static_cast<Source *>(pUserData);
 
-        return data->Seek(offset, origin == drmp3_seek_origin_start);
+        return data->SeekFile(offset, origin);
+    }
+
+    drmp3_bool32 Mp3TellCallback(void *pUserData, drmp3_int64* pCursor) {
+        auto data = static_cast<Source *>(pUserData);
+
+        *pCursor = data->TellFile();
+        return true;
+    }
+
+    void Mp3MetaCallback(void *pUserData, const drmp3_metadata* pMetadata) {
+        // stubbed for now, will handle later to load album artwork.
     }
 
     drwav_bool32 WavSeekCallback(void *pUserData, int offset, drwav_seek_origin origin) {
         auto data = static_cast<Source *>(pUserData);
 
-        return data->Seek(offset, origin == drwav_seek_origin_start);
+        return data->SeekFile(offset, origin);
+    }
+
+    drwav_bool32 WavTellCallback(void *pUserData, drwav_int64* pCursor) {
+        auto data = static_cast<Source *>(pUserData);
+
+        *pCursor = data->TellFile();
+        return true;
     }
 
 #ifdef DEBUG
@@ -130,7 +156,7 @@ s64 Source::Resample(u8* out, std::size_t size) {
     return data_read;
 }
 
-size_t Source::Read(void *buffer, size_t read_size) {
+size_t Source::ReadFile(void *buffer, size_t read_size) {
     size_t bytes_read = 0;
     if (R_SUCCEEDED(fsFileRead(&this->m_file, this->m_offset, buffer, read_size, 0, &bytes_read))) {
         this->m_offset += bytes_read;
@@ -140,19 +166,34 @@ size_t Source::Read(void *buffer, size_t read_size) {
     }
 }
 
-bool Source::Seek(int offset, bool set) {
-    s64 absolute = offset;
-    if (!set)
-        absolute += this->m_offset;
-
-    if (absolute < this->m_size) {
-        this->m_offset = absolute;
+bool Source::SeekFile(s64 offset, int origin) {
+    s64 new_offset;
+    switch (origin) {
+        case DRWAV_SEEK_SET:
+            new_offset = offset;
+            break;
+        case DRWAV_SEEK_CUR:
+            new_offset = this->m_offset + offset;
+            break;
+        case DRWAV_SEEK_END:
+            new_offset = this->m_size + offset;
+            break;
+        default:
+            return false;
+    }
+
+    if (new_offset <= this->m_size) {
+        this->m_offset = new_offset;
         return true;
     } else {
         return false;
     }
 }
 
+s64 Source::TellFile() {
+    return this->m_offset;
+}
+
 bool Source::Done() {
     auto [current, total] = this->Tell();
 
@@ -165,7 +206,7 @@ class FlacFile final : public Source {
 
   public:
     FlacFile(FsFile &&file) : Source(std::move(file)) {
-        this->m_flac = drflac_open(ReadCallback, FlacSeekCallback, this, flac_alloc_ptr);
+        this->m_flac = drflac_open(ReadCallback, FlacSeekCallback, FlacTellCallback, this, flac_alloc_ptr);
     }
     ~FlacFile() {
         if (this->m_flac != nullptr)
@@ -211,7 +252,7 @@ class Mp3File final : public Source {
 
   public:
     Mp3File(FsFile &&file) : Source(std::move(file)) {
-        if (drmp3_init(&this->m_mp3, ReadCallback, Mp3SeekCallback, this, mp3_alloc_ptr)) {
+        if (drmp3_init(&this->m_mp3, ReadCallback, Mp3SeekCallback, Mp3TellCallback, Mp3MetaCallback, this, mp3_alloc_ptr)) {
             this->m_total_frame_count = drmp3_get_pcm_frame_count(&this->m_mp3);
             this->initialized         = true;
         }
@@ -260,7 +301,7 @@ class WavFile final : public Source {
 
   public:
     WavFile(FsFile &&file) : Source(std::move(file)) {
-        if (drwav_init(&this->m_wav, ReadCallback, WavSeekCallback, this, wav_alloc_ptr)) {
+        if (drwav_init(&this->m_wav, ReadCallback, WavSeekCallback, WavTellCallback, this, wav_alloc_ptr)) {
             this->m_bytes_per_pcm = drwav_get_bytes_per_pcm_frame(&this->m_wav);
             this->initialized     = true;
         }
diff --git a/sys-tune/source/impl/source.hpp b/sys-tune/source/impl/source.hpp
index a878586..b41f176 100644
--- a/sys-tune/source/impl/source.hpp
+++ b/sys-tune/source/impl/source.hpp
@@ -38,8 +38,9 @@ class Source {
     bool SetupResampler(u32 output_channels, u32 output_sample_rate);
     s64 Resample(u8* out, std::size_t size);
 
-    size_t Read(void *buffer, size_t read_size);
-    bool Seek(int offset, bool set);
+    size_t ReadFile(void *buffer, size_t read_size);
+    s64 TellFile();
+    bool SeekFile(s64 offset, int origin);
 
     virtual bool IsOpen() = 0;
     virtual size_t Decode(size_t sample_count, s16 *data) = 0;

From 3a244dfad534801b65031ab9916cdb6aa0cabb9c Mon Sep 17 00:00:00 2001
From: ITotalJustice <47043333+ITotalJustice@users.noreply.github.com>
Date: Sat, 12 Jul 2025 22:50:52 +0100
Subject: [PATCH 06/19] slightly more optimised version of sdmc::FileExists()

---
 common/sdmc/sdmc.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/common/sdmc/sdmc.cpp b/common/sdmc/sdmc.cpp
index 9b91a46..5963f8b 100644
--- a/common/sdmc/sdmc.cpp
+++ b/common/sdmc/sdmc.cpp
@@ -26,8 +26,8 @@ namespace sdmc {
 
     bool FileExists(const char* path) {
         std::strcpy(path_buffer, path);
-        FsTimeStampRaw ts;
-        return R_SUCCEEDED(fsFsGetFileTimeStampRaw(&sdmc, path_buffer, &ts));
+        FsDirEntryType type;
+        return R_SUCCEEDED(fsFsGetEntryType(&sdmc, path_buffer, &type)) && type == FsDirEntryType_File;
     }
 
     Result CreateFolder(const char* path) {

From b2ed9053a7078413a8b23bd408cd6e4dbdefb3a4 Mon Sep 17 00:00:00 2001
From: ITotalJustice <47043333+ITotalJustice@users.noreply.github.com>
Date: Sun, 13 Jul 2025 03:16:16 +0100
Subject: [PATCH 07/19] re-write the playlist code.

---
 sys-tune/source/impl/music_player.cpp | 343 +++++++++++++-------------
 sys-tune/source/impl/music_player.hpp |  13 -
 2 files changed, 177 insertions(+), 179 deletions(-)

diff --git a/sys-tune/source/impl/music_player.cpp b/sys-tune/source/impl/music_player.cpp
index b1fda67..43e2f24 100644
--- a/sys-tune/source/impl/music_player.cpp
+++ b/sys-tune/source/impl/music_player.cpp
@@ -18,81 +18,183 @@ namespace tune::impl {
         constexpr float VOLUME_MAX = 1.f;
         constexpr auto PLAYLIST_ENTRY_MAX = 512; // 128k
 
-        struct PlayListEntry2 {
+        struct PlaylistID {
+            u32 id{UINT32_MAX};
+
+            bool IsValid() const {
+                return id != UINT32_MAX;
+            }
+
+            void Reset() {
+                id = UINT32_MAX;
+            }
+        };
+
+        class PlayList {
         public:
-            // in most cases, the path will not exceed 256 bytes,
-            // so this is a reasonable max rather than 0x301.
-            bool Add(const char* path) {
-                if (!IsEmpty()) {
+            bool Add(const char* path, EnqueueType type) {
+                u32 index;
+                if (!FindNextFreeEntry(index)) {
                     return false;
                 }
 
-                if (std::strlen(path) > sizeof(m_path)) {
+                if (!m_entries[index].Add(path)) {
                     return false;
                 }
 
-                std::strcpy(m_path, path);
+                if (type == EnqueueType::Front) {
+                    m_playlist.emplace(m_playlist.cbegin(), index);
+                } else {
+                    m_playlist.emplace_back(index);
+                }
+
+                // add new entry id to shuffle_playlist_list
+                const auto shuffle_playlist_size = m_shuffle_playlist.size() + 1;
+                const auto shuffle_index = randomGet64() % shuffle_playlist_size;
+                m_shuffle_playlist.emplace(m_shuffle_playlist.cbegin() + shuffle_index, index);
+
                 return true;
             }
 
-            void Remove() {
-                m_path[0] = '\0';
+            bool Remove(u32 index, ShuffleMode shuffle) {
+                const auto entry = Get(index, shuffle);
+                R_UNLESS(entry.IsValid(), false);
+
+                // remove entry.
+                m_entries[entry.id].Remove();
+
+                // remove from both playlists.
+                if (shuffle == ShuffleMode::On) {
+                    m_playlist.erase(m_playlist.begin() + GetIndexFromID(entry, ShuffleMode::Off));
+                    m_shuffle_playlist.erase(m_shuffle_playlist.begin() + index);
+                } else {
+                    m_playlist.erase(m_playlist.begin() + index);
+                    m_shuffle_playlist.erase(m_shuffle_playlist.begin() + GetIndexFromID(entry, ShuffleMode::On));
+                }
+
+                return true;
             }
 
-            bool IsEmpty() const {
-                return m_path[0] == '\0';
+            bool Swap(u32 src, u32 dst, ShuffleMode shuffle) {
+                if (src >= Size() || dst >+ Size()) {
+                    return false;
+                }
+
+                if (shuffle == ShuffleMode::On) {
+                    std::swap(m_shuffle_playlist[src], m_shuffle_playlist[dst]);
+                } else {
+                    std::swap(m_playlist[src], m_playlist[dst]);
+                }
+
+                return true;
             }
 
-        // private:
-            char m_path[256]{};
-        };
+            const char* GetPath(u32 index, ShuffleMode shuffle) const {
+                return GetPath(Get(index, shuffle));
+            }
 
-        struct PlayList {
-            std::array<PlayListEntry2, PLAYLIST_ENTRY_MAX> m_entries{};
+            const char* GetPath(const PlaylistID& entry) const {
+                R_UNLESS(entry.IsValid(), nullptr);
 
-            bool Add(u32 index, const char* path) {
-                if (index > m_entries.size()) {
-                    return false;
+                return m_entries[entry.id].GetPath();
+            }
+
+            void Clear() {
+                for (u32 i = 0; i < m_entries.size(); i++) {
+                    m_entries[i].Remove();
                 }
 
-                return m_entries[index].Add(path);
+                m_playlist.clear();
+                m_shuffle_playlist.clear();
+            }
+
+            u32 Size() const {
+                return m_playlist.size();
             }
 
-            void Remove(u32 index) {
-                if (index > m_entries.size()) {
-                    return;
+            PlaylistID Get(u32 index, ShuffleMode shuffle) const {
+                if (index >= Size()) {
+                    return {};
                 }
 
-                return m_entries[index].Remove();
+                if (shuffle == ShuffleMode::On) {
+                    return m_shuffle_playlist[index];
+                } else {
+                    return m_playlist[index];
+                }
             }
 
-            s32 FindNextFreeEntry() const {
-                for (u32 i = 0; i < m_entries.size(); i++) {
-                    if (m_entries[i].IsEmpty()) {
+            u32 GetIndexFromID(const PlaylistID& entry, ShuffleMode shuffle) const {
+                std::span list{m_playlist};
+                if (shuffle == ShuffleMode::On) {
+                    list = m_shuffle_playlist;
+                }
+
+                for (u32 i = 0; i < list.size(); i++) {
+                    if (list[i].id == entry.id) {
                         return i;
                     }
                 }
 
-                return -1;
+                return 0;
             }
 
-            const char* GetPath(u32 index) {
-                return m_entries[index].m_path;
-            }
-
-            void Clear() {
+        private:
+            bool FindNextFreeEntry(u32& index) const {
                 for (u32 i = 0; i < m_entries.size(); i++) {
-                    m_entries[i].Remove();
+                    if (m_entries[i].IsEmpty()) {
+                        index = i;
+                        return true;
+                    }
                 }
+
+                return false;
             }
+
+        private:
+            struct PlayListNameEntry {
+            public:
+                // in most cases, the path will not exceed 256 bytes,
+                // so this is a reasonable max rather than 0x301.
+                bool Add(const char* path) {
+                    if (!IsEmpty()) {
+                        return false;
+                    }
+
+                    if (std::strlen(path) >= sizeof(m_path)) {
+                        return false;
+                    }
+
+                    std::strcpy(m_path, path);
+                    return true;
+                }
+
+                bool Remove() {
+                    m_path[0] = '\0';
+                    return true;
+                }
+
+                bool IsEmpty() const {
+                    return m_path[0] == '\0';
+                }
+
+                const char* GetPath() const {
+                    return m_path;
+                }
+
+            private:
+                char m_path[256]{};
+            };
+
+        private:
+            std::vector<PlaylistID> m_playlist{};
+            std::vector<PlaylistID> m_shuffle_playlist{};
+            std::array<PlayListNameEntry, PLAYLIST_ENTRY_MAX> m_entries{};
         };
 
-        PlayList g_playlist2;
+        PlayList g_playlist;
 
-        // todo: move below into playlist struct
-        std::vector<PlaylistEntry> g_playlist;
-        std::vector<PlaylistID> g_shuffle_playlist;
-        PlaylistEntry g_current;
+        PlaylistID g_current;
         u32 g_queue_position;
 
         LockableMutex g_mutex;
@@ -109,7 +211,7 @@ namespace tune::impl {
         constexpr auto AUDIO_FREQ          = 48000;
         constexpr auto AUDIO_CHANNEL_COUNT = 2;
         constexpr auto AUDIO_BUFFER_COUNT  = 2;
-        constexpr auto AUDIO_LATENCY_MS    = 50;
+        constexpr auto AUDIO_LATENCY_MS    = 42;
         constexpr auto AUDIO_BUFFER_SIZE   = AUDIO_FREQ / 1000 * AUDIO_LATENCY_MS * AUDIO_CHANNEL_COUNT;
 
         AudioOutBuffer g_audout_buffer[AUDIO_BUFFER_COUNT];
@@ -190,9 +292,6 @@ namespace tune::impl {
         R_TRY(audoutInitialize());
         SetVolume(config::get_volume());
 
-        g_playlist.reserve(PLAYLIST_ENTRY_MAX);
-        g_shuffle_playlist.reserve(PLAYLIST_ENTRY_MAX);
-
         /* Fetch values from config, sanitize the return value */
         if (auto c = config::get_repeat(); c <= 2 && c >= 0) {
             SetRepeatMode(static_cast<RepeatMode>(c));
@@ -216,25 +315,14 @@ namespace tune::impl {
             {
                 std::scoped_lock lk(g_mutex);
 
-                const auto &queue = g_playlist;
-                const auto queue_size = queue.size();
+                const auto queue_size = g_playlist.Size();
                 if (queue_size == 0) {
                     g_current.Reset();
                 } else if (g_queue_position >= queue_size) {
                     g_queue_position = queue_size - 1;
                     continue;
                 } else {
-                    if (g_shuffle == ShuffleMode::On) {
-                        const auto shuffle_id = g_shuffle_playlist[g_queue_position];
-                        for (u32 i = 0; i < g_playlist.size(); i++) {
-                            if (g_playlist[i].id == shuffle_id) {
-                                g_current = g_playlist[i];
-                                break;
-                            }
-                        }
-                    } else {
-                        g_current = queue[g_queue_position];
-                    }
+                    g_current = g_playlist.Get(g_queue_position, g_shuffle);
                 }
             }
 
@@ -246,17 +334,12 @@ namespace tune::impl {
 
             g_status = PlayerStatus::Playing;
             /* Only play if playing and we have a track queued. */
-            Result rc = PlayTrack(g_playlist2.GetPath(g_current.id));
+            Result rc = PlayTrack(g_playlist.GetPath(g_current));
 
             /* Log error. */
             if (R_FAILED(rc)) {
                 /* Remove track if something went wrong. */
-                bool shuffle = g_shuffle == ShuffleMode::On;
-                if (shuffle)
-                    SetShuffleMode(ShuffleMode::Off);
                 Remove(g_queue_position);
-                if (shuffle)
-                    SetShuffleMode(ShuffleMode::On);
             }
         }
 
@@ -340,7 +423,7 @@ namespace tune::impl {
         {
             std::scoped_lock lk(g_mutex);
 
-            if (g_queue_position < g_playlist.size() - 1) {
+            if (g_queue_position < g_playlist.Size() - 1) {
                 g_queue_position++;
             } else {
                 g_queue_position = 0;
@@ -359,7 +442,7 @@ namespace tune::impl {
             if (g_queue_position > 0) {
                 g_queue_position--;
             } else {
-                g_queue_position = g_playlist.size() - 1;
+                g_queue_position = g_playlist.Size() - 1;
             }
         }
         g_status     = PlayerStatus::FetchNext;
@@ -419,16 +502,16 @@ namespace tune::impl {
     u32 GetPlaylistSize() {
         std::scoped_lock lk(g_mutex);
 
-        return g_playlist.size();
+        return g_playlist.Size();
     }
 
     Result GetPlaylistItem(u32 index, char *buffer, size_t buffer_size) {
         std::scoped_lock lk(g_mutex);
 
-        if (index >= g_playlist.size())
-            return tune::OutOfRange;
+        const auto path = g_playlist.GetPath(index, g_shuffle);
+        R_UNLESS(path, tune::OutOfRange);
 
-        std::strncpy(buffer, g_playlist2.GetPath(index), buffer_size);
+        std::snprintf(buffer, buffer_size, "%s", path);
 
         return 0;
     }
@@ -439,9 +522,11 @@ namespace tune::impl {
 
         {
             std::scoped_lock lk(g_mutex);
-            R_UNLESS(g_current.IsValid(), tune::NotPlaying);
-            // R_UNLESS(buffer_size >= g_current.path.size(), tune::InvalidArgument);
-            std::strcpy(buffer, g_playlist2.GetPath(g_current.id));
+
+            const auto path = g_playlist.GetPath(g_current);
+            R_UNLESS(path, tune::NotPlaying);
+
+            std::snprintf(buffer, buffer_size, "%s", path);
         }
 
         auto [current, total] = g_source->Tell();
@@ -458,43 +543,21 @@ namespace tune::impl {
         {
             std::scoped_lock lk(g_mutex);
 
-            g_playlist.clear();
-            g_shuffle_playlist.clear();
-            g_playlist2.Clear();
+            g_playlist.Clear();
         }
         g_status = PlayerStatus::FetchNext;
     }
 
+    // currently unused (and untested).
     void MoveQueueItem(u32 src, u32 dst) {
         std::scoped_lock lk(g_mutex);
 
-        const auto queue_size = g_playlist.size();
-
-        if (src >= queue_size) {
-            src = queue_size - 1;
-        }
-        if (dst >= queue_size) {
-            dst = queue_size - 1;
+        if (!g_playlist.Swap(src, dst, g_shuffle)) {
+            return;
         }
 
-        auto source = g_playlist.cbegin() + src;
-        auto dest   = g_playlist.cbegin() + dst;
-
-        g_playlist.insert(dest, *source);
-        g_playlist.erase(source);
-
-        if (src < dst) {
-            if (g_queue_position == src) {
-                g_queue_position = dst;
-            } else if (g_queue_position >= src && g_queue_position <= dst) {
-                g_queue_position--;
-            }
-        } else if (dst < src) {
-            if (g_queue_position == src) {
-                g_queue_position = dst;
-            } else if (g_queue_position >= dst && g_queue_position <= src) {
-                g_queue_position++;
-            }
+        if (g_queue_position == src) {
+            g_queue_position = dst;
         }
     }
 
@@ -502,30 +565,13 @@ namespace tune::impl {
         {
             std::scoped_lock lk(g_mutex);
 
-            /* Check if we are out of bounds. */
-            size_t queue_size = g_playlist.size();
-            if (index >= queue_size) {
-                index = queue_size - 1;
-            }
-
-            /* Get absolute position in current playlist. Independent of shufflemode. */
-            u32 pos = index;
-
-            if (g_shuffle == ShuffleMode::On) {
-                const auto track = g_playlist.cbegin() + index;
-                for (u32 i = 0; i < g_shuffle_playlist.size(); i++) {
-                    if (g_shuffle_playlist[i] == track->id) {
-                        pos = i;
-                        break;
-                    }
-                }
-            }
-
-            /* Return if that track is already selected. */
-            if (g_queue_position == pos)
+            const auto size = g_playlist.Size();
+            if (!size) {
                 return;
+            }
 
-            g_queue_position = pos;
+            // adjust to index-1 so that FetchNext will jump to it.
+            g_queue_position = std::clamp<s32>(index - 1, 0, size - 1);
         }
         g_status     = PlayerStatus::FetchNext;
         g_should_pause = false;
@@ -543,34 +589,16 @@ namespace tune::impl {
 
         std::scoped_lock lk(g_mutex);
 
-        const auto new_id = g_playlist2.FindNextFreeEntry();
-        if (new_id < 0) {
-            return tune::OutOfMemory;
-        }
-
-        if (!g_playlist2.Add(new_id, buffer)) {
+        if (!g_playlist.Add(buffer, type)) {
             return tune::OutOfMemory;
         }
 
-        const PlaylistEntry new_entry{
-            .id = static_cast<PlaylistID>(new_id)
-        };
-
-        // add new entry to playlist
-        if (type == EnqueueType::Front) {
-            g_playlist.emplace(g_playlist.cbegin(), new_entry);
-            if (g_shuffle == ShuffleMode::Off) {
-                g_queue_position++;
-            }
-        } else {
-            g_playlist.emplace_back(new_entry);
+        // check if the current position still points to the same entry, update if not.
+        if (g_current.IsValid() && g_current.id != g_playlist.Get(g_queue_position, g_shuffle).id) {
+            g_queue_position = g_playlist.GetIndexFromID(g_current, g_shuffle);
+            g_current = g_playlist.Get(g_queue_position, g_shuffle);
         }
 
-        // add new entry id to shuffle_playlist_list
-        const auto shuffle_playlist_size = g_shuffle_playlist.size();
-        const auto shuffle_index = (shuffle_playlist_size > 1) ? (randomGet64() % shuffle_playlist_size) : 0;
-        g_shuffle_playlist.emplace(g_shuffle_playlist.cbegin() + shuffle_index, new_id);
-
         return 0;
     }
 
@@ -578,31 +606,14 @@ namespace tune::impl {
         std::scoped_lock lk(g_mutex);
 
         /* Ensure we don't operate out of bounds. */
-        R_UNLESS(!g_playlist.empty(), tune::QueueEmpty);
-        R_UNLESS(index < g_playlist.size(), tune::OutOfRange);
-
-        /* Get iterator for index position. */
-        const auto track = g_playlist.cbegin() + index;
-        g_playlist2.Remove(track->id);
-
-        for (u32 i = 0; i < g_shuffle_playlist.size(); i++) {
-            if (g_shuffle_playlist[i] == track->id) {
-                const auto shuffle_it = g_shuffle_playlist.cbegin() + i;
-                // we are playing from shuffle list so use that index instead
-                if (g_shuffle == ShuffleMode::On) {
-                    index = i;
-                }
-                // finally remove
-                g_shuffle_playlist.erase(shuffle_it);
-                break;
-            }
-        }
+        R_UNLESS(g_playlist.Size(), tune::QueueEmpty);
 
-        /* Remove entry. */
-        g_playlist.erase(track);
+        if (!g_playlist.Remove(index, g_shuffle)) {
+            return tune::OutOfRange;
+        }
 
         /* Fetch a new track if we deleted the current song. */
-        bool fetch_new = g_queue_position == index;
+        const bool fetch_new = g_queue_position == index;
 
         /* Lower current position if needed. */
         if (g_queue_position > index) {
diff --git a/sys-tune/source/impl/music_player.hpp b/sys-tune/source/impl/music_player.hpp
index 354bda4..b70aff5 100644
--- a/sys-tune/source/impl/music_player.hpp
+++ b/sys-tune/source/impl/music_player.hpp
@@ -5,19 +5,6 @@
 #include <vector>
 
 namespace tune::impl {
-    using PlaylistID = u32;
-
-    struct PlaylistEntry {
-        PlaylistID id{UINT32_MAX};
-
-        bool IsValid() const {
-            return id != UINT32_MAX;
-        }
-
-        void Reset() {
-            id = UINT32_MAX;
-        }
-    };
 
     Result Initialize();
     void Exit();

From 061ba9714a287a848ecbdf9426f1f35af4161627 Mon Sep 17 00:00:00 2001
From: ITotalJustice <47043333+ITotalJustice@users.noreply.github.com>
Date: Sun, 13 Jul 2025 03:41:34 +0100
Subject: [PATCH 08/19] silence gcc lto warning

---
 sys-tune/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sys-tune/Makefile b/sys-tune/Makefile
index ab00418..b367f93 100644
--- a/sys-tune/Makefile
+++ b/sys-tune/Makefile
@@ -62,7 +62,7 @@ endif
 #---------------------------------------------------------------------------------
 ARCH	:=	-march=armv8-a+crc+crypto -mtune=cortex-a57 -mtp=soft -fPIE
 
-CFLAGS	:=	-flto -g -Wall -O2 -ffunction-sections \
+CFLAGS	:=	-flto=auto -g -Wall -O2 -ffunction-sections \
 			$(ARCH) $(DEFINES)
 
 CFLAGS	+=	$(INCLUDE) -DTUNE_API_VERSION=$(API_VERSION) \

From aa50d81a4b3cefa80c4e41aef7dbf2974b907882 Mon Sep 17 00:00:00 2001
From: ITotalJustice <47043333+ITotalJustice@users.noreply.github.com>
Date: Sun, 13 Jul 2025 05:07:27 +0100
Subject: [PATCH 09/19] add support for loading a song / folder on startup.

---
 Makefile                              |  2 ++
 common/config/config.cpp              |  9 +++++++
 common/config/config.hpp              |  4 +++
 common/sdmc/sdmc.cpp                  | 13 +++++++--
 common/sdmc/sdmc.hpp                  |  3 +++
 sys-tune/Makefile                     |  2 +-
 sys-tune/source/impl/music_player.cpp | 39 +++++++++++++++++++++++++++
 sys-tune/source/impl/source.cpp       | 28 ++++++++++++++-----
 sys-tune/source/impl/source.hpp       |  8 ++++++
 9 files changed, 99 insertions(+), 9 deletions(-)

diff --git a/Makefile b/Makefile
index 4a315ae..d456354 100644
--- a/Makefile
+++ b/Makefile
@@ -26,9 +26,11 @@ module:
 dist: all
 	mkdir -p dist/switch/.overlays
 	mkdir -p dist/atmosphere/contents/4200000000000000
+	# mkdir -p dist/atmosphere/contents/4200000000000000/flags
 	cp sys-tune/sys-tune.nsp dist/atmosphere/contents/4200000000000000/exefs.nsp
 	cp overlay/sys-tune-overlay.ovl dist/switch/.overlays/
 	cp sys-tune/toolbox.json dist/atmosphere/contents/4200000000000000/
+	# touch dist/atmosphere/contents/4200000000000000/flags/boot2.flag
 	cd dist; zip -r sys-tune-$(VERSION)-$(GITHASH).zip ./**/; cd ../;
 	-hactool -t nso sys-tune/sys-tune.nso
 
diff --git a/common/config/config.cpp b/common/config/config.cpp
index f8d7805..553d7bc 100644
--- a/common/config/config.cpp
+++ b/common/config/config.cpp
@@ -96,4 +96,13 @@ void set_default_title_volume(float value) {
     ini_putf("config", "global_volume", value, CONFIG_PATH);
 }
 
+auto get_load_path(char* out, int max_len) -> int {
+    return ini_gets("config", "load_path", "", out, max_len, CONFIG_PATH);
+}
+
+void set_load_path(const char* path) {
+    create_config_dir();
+    ini_puts("config", "load_path", path, CONFIG_PATH);
+}
+
 }
diff --git a/common/config/config.hpp b/common/config/config.hpp
index 0af059a..22edebc 100644
--- a/common/config/config.hpp
+++ b/common/config/config.hpp
@@ -34,4 +34,8 @@ void set_title_volume(u64 tid, float value);
 auto get_default_title_volume() -> float;
 void set_default_title_volume(float value);
 
+// returns the length of the string
+auto get_load_path(char* out, int max_len) -> int;
+void set_load_path(const char* path);
+
 }
diff --git a/common/sdmc/sdmc.cpp b/common/sdmc/sdmc.cpp
index 5963f8b..dfd51cd 100644
--- a/common/sdmc/sdmc.cpp
+++ b/common/sdmc/sdmc.cpp
@@ -24,10 +24,19 @@ namespace sdmc {
         return fsFsOpenFile(&sdmc, path_buffer, open_mode, file);
     }
 
-    bool FileExists(const char* path) {
+    Result OpenDir(FsDir *dir, const char *path, int open_mode) {
+        std::strcpy(path_buffer, path);
+        return fsFsOpenDirectory(&sdmc, path_buffer, open_mode, dir);
+    }
+
+    Result GetType(const char* path, FsDirEntryType* type) {
         std::strcpy(path_buffer, path);
+        return fsFsGetEntryType(&sdmc, path_buffer, type);;
+    }
+
+    bool FileExists(const char* path) {
         FsDirEntryType type;
-        return R_SUCCEEDED(fsFsGetEntryType(&sdmc, path_buffer, &type)) && type == FsDirEntryType_File;
+        return R_SUCCEEDED(GetType(path, &type)) && type == FsDirEntryType_File;
     }
 
     Result CreateFolder(const char* path) {
diff --git a/common/sdmc/sdmc.hpp b/common/sdmc/sdmc.hpp
index 11a6575..27e060a 100644
--- a/common/sdmc/sdmc.hpp
+++ b/common/sdmc/sdmc.hpp
@@ -8,6 +8,9 @@ namespace sdmc {
     void Close();
 
     Result OpenFile(FsFile *file, const char* path, int open_mode = FsOpenMode_Read);
+    Result OpenDir(FsDir *dir, const char *path, int open_mode);
+
+    Result GetType(const char* path, FsDirEntryType* type);
     bool FileExists(const char* path);
 
     Result CreateFolder(const char* path);
diff --git a/sys-tune/Makefile b/sys-tune/Makefile
index b367f93..4b5e834 100644
--- a/sys-tune/Makefile
+++ b/sys-tune/Makefile
@@ -68,7 +68,7 @@ CFLAGS	:=	-flto=auto -g -Wall -O2 -ffunction-sections \
 CFLAGS	+=	$(INCLUDE) -DTUNE_API_VERSION=$(API_VERSION) \
 			$(WANT_FLAGS)
 
-CXXFLAGS	:= $(CFLAGS) -fno-rtti -fno-exceptions -std=c++23
+CXXFLAGS	:= $(CFLAGS) -fno-rtti -fno-exceptions -std=gnu++23
 
 ASFLAGS	:=	-g $(ARCH)
 LDFLAGS	=	-specs=$(DEVKITPRO)/libnx/switch.specs -g $(ARCH) -Wl,-Map,$(notdir $*.map)
diff --git a/sys-tune/source/impl/music_player.cpp b/sys-tune/source/impl/music_player.cpp
index 43e2f24..5273975 100644
--- a/sys-tune/source/impl/music_player.cpp
+++ b/sys-tune/source/impl/music_player.cpp
@@ -309,6 +309,42 @@ namespace tune::impl {
     }
 
     void TuneThreadFunc(void *) {
+        {
+            char load_path[512];
+            if (config::get_load_path(load_path, sizeof(load_path))) {
+                // check if the path is a file or folder.
+                FsDirEntryType type;
+                if (R_SUCCEEDED(sdmc::GetType(load_path, &type))) {
+                    if (type == FsDirEntryType_File) {
+                        // path is a file, load single entry.
+                        if (GetSourceType(load_path) != SourceType::NONE) {
+                            Enqueue(load_path, std::strlen(load_path), EnqueueType::Back);
+                        }
+                    } else {
+                        // path is a folder, load all entries.
+                        FsDir dir;
+                        if (R_SUCCEEDED(sdmc::OpenDir(&dir, load_path, FsDirOpenMode_ReadFiles|FsDirOpenMode_NoFileSize))) {
+                            // during init, we have a lot of memory to work with.
+                            std::vector<FsDirectoryEntry> entries(std::min(64, PLAYLIST_ENTRY_MAX));
+
+                            s64 total;
+                            char full_path[512];
+                            while (R_SUCCEEDED(fsDirRead(&dir, &total, entries.size(), entries.data())) && total) {
+                                for (s64 i = 0; i < total; i++) {
+                                    if (GetSourceType(entries[i].name) != SourceType::NONE) {
+                                        std::snprintf(full_path, sizeof(full_path), "%s/%s", load_path, entries[i].name);
+                                        Enqueue(full_path, std::strlen(full_path), EnqueueType::Back);
+                                    }
+                                }
+                            }
+
+                            fsDirClose(&dir);
+                        }
+                    }
+                }
+            }
+        }
+
         /* Run as long as we aren't stopped and no error has been encountered. */
         while (g_should_run) {
             g_current.Reset();
@@ -583,6 +619,9 @@ namespace tune::impl {
     }
 
     Result Enqueue(const char *buffer, size_t buffer_length, EnqueueType type) {
+        if (GetSourceType(buffer) == SourceType::NONE)
+            return tune::InvalidPath;
+
         /* Ensure file exists. */
         if (!sdmc::FileExists(buffer))
             return tune::InvalidPath;
diff --git a/sys-tune/source/impl/source.cpp b/sys-tune/source/impl/source.cpp
index 01c0ded..b4826a4 100644
--- a/sys-tune/source/impl/source.cpp
+++ b/sys-tune/source/impl/source.cpp
@@ -3,7 +3,6 @@
 #include "sdmc/sdmc.hpp"
 
 #include <cstring>
-#include <strings.h>
 
 // NOTE: when updating dr_libs, check for TUNE-FIX comment for patches.
 #define DR_FLAC_IMPLEMENTATION
@@ -344,8 +343,8 @@ class WavFile final : public Source {
 };
 
 std::unique_ptr<Source> OpenFile(const char *path) {
-    const auto length = std::strlen(path);
-    if (length < 5)
+    const auto type = GetSourceType(path);
+    if (type == SourceType::NONE)
         return nullptr;
 
     FsFile file;
@@ -355,17 +354,17 @@ std::unique_ptr<Source> OpenFile(const char *path) {
 
     if (false) {}
 #ifdef WANT_MP3
-    else if (strcasecmp(path + length - 4, ".mp3") == 0) {
+    else if (type == SourceType::MP3) {
         return std::make_unique<Mp3File>(std::move(file));
     }
 #endif
 #ifdef WANT_FLAC
-    else if (strcasecmp(path + length - 5, ".flac") == 0) {
+    else if (type == SourceType::FLAC) {
         return std::make_unique<FlacFile>(std::move(file));
     }
 #endif
 #ifdef WANT_WAV
-    else if (strcasecmp(path + length - 4, ".wav") == 0 || strcasecmp(path + length - 5, ".wave") == 0) {
+    else if (type == SourceType::WAV) {
         return std::make_unique<WavFile>(std::move(file));
     }
 #endif
@@ -374,3 +373,20 @@ std::unique_ptr<Source> OpenFile(const char *path) {
         return nullptr;
     }
 }
+
+SourceType GetSourceType(const char* path) {
+    const auto ext = std::strrchr(path, '.');
+    if (!ext) {
+        return SourceType::NONE;
+    }
+
+    if (!strcasecmp(ext, ".mp3")) {
+        return SourceType::MP3;
+    } else if (!strcasecmp(ext, ".flac")) {
+        return SourceType::FLAC;
+    } else if (!strcasecmp(ext, ".wav") || !strcasecmp(ext, ".wave")) {
+        return SourceType::WAV;
+    }
+
+    return SourceType::NONE;
+}
diff --git a/sys-tune/source/impl/source.hpp b/sys-tune/source/impl/source.hpp
index b41f176..43e5fb2 100644
--- a/sys-tune/source/impl/source.hpp
+++ b/sys-tune/source/impl/source.hpp
@@ -4,6 +4,13 @@
 #include <memory>
 #include "resamplers/SDL_audioEX.h"
 
+enum class SourceType {
+    NONE,
+    MP3,
+    FLAC,
+    WAV,
+};
+
 class Source {
   private:
     FsFile m_file = {};
@@ -54,3 +61,4 @@ class Source {
 };
 
 std::unique_ptr<Source> OpenFile(const char *path);
+SourceType GetSourceType(const char* path);

From d4ceec61943c172b8c24763f1ae5dac8ab179ecd Mon Sep 17 00:00:00 2001
From: ITotalJustice <47043333+ITotalJustice@users.noreply.github.com>
Date: Sun, 13 Jul 2025 05:14:31 +0100
Subject: [PATCH 10/19] improve overlay dir scan speed by reducing calls to
 fsDirRead

---
 overlay/source/gui_browser.cpp | 84 +++++++++++++++++++---------------
 1 file changed, 46 insertions(+), 38 deletions(-)

diff --git a/overlay/source/gui_browser.cpp b/overlay/source/gui_browser.cpp
index 5e99a95..cdc69c8 100644
--- a/overlay/source/gui_browser.cpp
+++ b/overlay/source/gui_browser.cpp
@@ -102,7 +102,7 @@ void BrowserGui::scanCwd() {
 
     /* Open directory. */
     FsDir dir;
-    Result rc = fsFsOpenDirectory(&this->m_fs, this->cwd, FsDirOpenMode_ReadDirs | FsDirOpenMode_ReadFiles, &dir);
+    Result rc = fsFsOpenDirectory(&this->m_fs, this->cwd, FsDirOpenMode_ReadDirs | FsDirOpenMode_ReadFiles | FsDirOpenMode_NoFileSize, &dir);
     if (R_FAILED(rc)) {
         char result_buffer[0x10];
         std::snprintf(result_buffer, 0x10, "2%03X-%04X", R_MODULE(rc), R_DESCRIPTION(rc));
@@ -116,38 +116,42 @@ void BrowserGui::scanCwd() {
 
     /* Iternate over directory. */
     s64 count = 0;
-    FsDirectoryEntry entry;
-    while (R_SUCCEEDED(fsDirRead(&dir, &count, 1, &entry)) && count) {
-        if (entry.type == FsDirEntryType_Dir) {
-            /* Add directory entries. */
-            auto item = new tsl::elm::ListItem(entry.name);
-            item->setClickListener([this, item](u64 down) -> bool {
-                if (down & HidNpadButton_A) {
-                    std::strncat(this->cwd, item->getText().c_str(), sizeof(this->cwd) - 1);
-                    std::strncat(this->cwd, "/", sizeof(this->cwd) - 1);
-                    this->scanCwd();
-                    return true;
-                }
-                return false;
-            });
-            folders.push_back(item);
-        } else if (SupportsType(entry.name)) {
-            /* Add file entry. */
-            auto item = new tsl::elm::ListItem(entry.name);
-            item->setClickListener([this, item](u64 down) -> bool {
-                if (down & HidNpadButton_A) {
-                    std::snprintf(path_buffer, sizeof(path_buffer), "%s%s", this->cwd, item->getText().c_str());
-                    Result rc = tuneEnqueue(path_buffer, TuneEnqueueType_Back);
-                    if (R_SUCCEEDED(rc)) {
-                        m_frame->setToast("Playlist updated", "Added 1 song to Playlist.");
-                    } else {
-                        m_frame->setToast("Failed to add Track.", "Does the name contain umlauts?");
+    std::vector<FsDirectoryEntry> entries(64);
+
+    while (R_SUCCEEDED(fsDirRead(&dir, &count, entries.size(), entries.data())) && count) {
+        for (s64 i = 0; i < count; i++) {
+            const auto& entry = entries[i];
+            if (entry.type == FsDirEntryType_Dir) {
+                /* Add directory entries. */
+                auto item = new tsl::elm::ListItem(entry.name);
+                item->setClickListener([this, item](u64 down) -> bool {
+                    if (down & HidNpadButton_A) {
+                        std::strncat(this->cwd, item->getText().c_str(), sizeof(this->cwd) - 1);
+                        std::strncat(this->cwd, "/", sizeof(this->cwd) - 1);
+                        this->scanCwd();
+                        return true;
                     }
-                    return true;
-                }
-                return false;
-            });
-            files.push_back(item);
+                    return false;
+                });
+                folders.push_back(item);
+            } else if (SupportsType(entry.name)) {
+                /* Add file entry. */
+                auto item = new tsl::elm::ListItem(entry.name);
+                item->setClickListener([this, item](u64 down) -> bool {
+                    if (down & HidNpadButton_A) {
+                        std::snprintf(path_buffer, sizeof(path_buffer), "%s%s", this->cwd, item->getText().c_str());
+                        Result rc = tuneEnqueue(path_buffer, TuneEnqueueType_Back);
+                        if (R_SUCCEEDED(rc)) {
+                            m_frame->setToast("Playlist updated", "Added 1 song to Playlist.");
+                        } else {
+                            m_frame->setToast("Failed to add Track.", "Does the name contain umlauts?");
+                        }
+                        return true;
+                    }
+                    return false;
+                });
+                files.push_back(item);
+            }
         }
     }
     if (folders.size() == 0 && files.size() == 0) {
@@ -184,7 +188,7 @@ void BrowserGui::upCwd() {
 
 void BrowserGui::addAllToPlaylist() {
     FsDir dir;
-    Result rc = fsFsOpenDirectory(&this->m_fs, this->cwd, FsDirOpenMode_ReadFiles, &dir);
+    Result rc = fsFsOpenDirectory(&this->m_fs, this->cwd, FsDirOpenMode_ReadFiles|FsDirOpenMode_NoFileSize, &dir);
     if (R_FAILED(rc)) {
         char result_buffer[0x10];
         std::snprintf(result_buffer, 0x10, "2%03X-%04X", R_MODULE(rc), R_DESCRIPTION(rc));
@@ -197,11 +201,15 @@ void BrowserGui::addAllToPlaylist() {
     std::vector<std::string> file_list;
     s64 songs_added = 0;
     s64 count = 0;
-    FsDirectoryEntry entry;
-    while (R_SUCCEEDED(fsDirRead(&dir, &count, 1, &entry)) && count){
-        if (entry.type == FsDirEntryType_File && SupportsType(entry.name)){
-            file_list.push_back(std::string(entry.name));
-            count++;
+    std::vector<FsDirectoryEntry> entries(64);
+
+    while (R_SUCCEEDED(fsDirRead(&dir, &count, entries.size(), entries.data())) && count){
+        for (s64 i = 0; i < count; i++) {
+            const auto& entry = entries[i];
+            if (entry.type == FsDirEntryType_File && SupportsType(entry.name)){
+                file_list.push_back(std::string(entry.name));
+                count++;
+            }
         }
     }
 

From d8f803815f1b46f3c3fc00539e69fd18ea61c816 Mon Sep 17 00:00:00 2001
From: ITotalJustice <47043333+ITotalJustice@users.noreply.github.com>
Date: Sun, 13 Jul 2025 05:38:18 +0100
Subject: [PATCH 11/19] fix overlay browser not focusing first element

---
 overlay/source/gui_browser.cpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/overlay/source/gui_browser.cpp b/overlay/source/gui_browser.cpp
index cdc69c8..86ce26d 100644
--- a/overlay/source/gui_browser.cpp
+++ b/overlay/source/gui_browser.cpp
@@ -159,17 +159,29 @@ void BrowserGui::scanCwd() {
         return;
     }
 
+    tsl::elm::ListItem* focus_elm = nullptr;
+
     if (folders.size() > 0) {
         std::sort(folders.begin(), folders.end(), ListItemTextCompare);
+
+        focus_elm = folders[0];
+
         for (auto element : folders)
             this->m_list->addItem(element);
     }
     if (files.size() > 0) {
         this->m_list->addItem(new tsl::elm::CategoryHeader("Files"));
         std::sort(files.begin(), files.end(), ListItemTextCompare);
+
+        if (!focus_elm)
+            focus_elm = files[0];
+
         for (auto element : files)
             this->m_list->addItem(element);
     }
+
+    if (focus_elm)
+        tsl::Gui::requestFocus(focus_elm, tsl::FocusDirection::None);
 }
 
 void BrowserGui::upCwd() {

From a26ded17d3da043a6186163d2a35c72e47b24118 Mon Sep 17 00:00:00 2001
From: ITotalJustice <47043333+ITotalJustice@users.noreply.github.com>
Date: Sun, 13 Jul 2025 08:57:06 +0100
Subject: [PATCH 12/19] ovl fix addAll, ovl add setting/removing startup file,
 ovl fix playlist using the wrong index for ipc, ovl focus current song in
 playlist, sys fix clearAll not resetting the queueIndex.

---
 overlay/source/gui_browser.cpp        | 20 +++++++--
 overlay/source/gui_main.cpp           | 21 ++++++++++
 overlay/source/gui_playlist.cpp       | 59 ++++++++++++++++++++++++---
 overlay/source/gui_playlist.hpp       |  1 +
 sys-tune/source/impl/music_player.cpp |  8 +++-
 5 files changed, 98 insertions(+), 11 deletions(-)

diff --git a/overlay/source/gui_browser.cpp b/overlay/source/gui_browser.cpp
index 86ce26d..3098710 100644
--- a/overlay/source/gui_browser.cpp
+++ b/overlay/source/gui_browser.cpp
@@ -1,5 +1,6 @@
 #include "gui_browser.hpp"
 
+#include "config/config.hpp"
 #include "tune.h"
 
 namespace {
@@ -8,7 +9,7 @@ namespace {
         return strcasecmp(_lhs->getText().c_str(), _rhs->getText().c_str()) < 0;
     };
 
-    bool StringTextCompare(std::string _lhs, std::string _rhs) {
+    bool StringTextCompare(const std::string& _lhs, const std::string& _rhs) {
         return strcasecmp(_lhs.c_str(), _rhs.c_str()) < 0;
     };
 
@@ -97,6 +98,8 @@ void BrowserGui::scanCwd() {
     tsl::Gui::removeFocus();
     this->m_list->clear();
 
+    this->m_list->addItem(new tsl::elm::CategoryHeader("\uE0E7  Play selected path on start up", true));
+
     /* Show absolute folder path. */
     this->m_list->addItem(new tsl::elm::CategoryHeader(this->cwd, true));
 
@@ -130,6 +133,11 @@ void BrowserGui::scanCwd() {
                         std::strncat(this->cwd, "/", sizeof(this->cwd) - 1);
                         this->scanCwd();
                         return true;
+                    } else if (down & HidNpadButton_ZR) {
+                        std::snprintf(path_buffer, sizeof(path_buffer), "%s%s", this->cwd, item->getText().c_str());
+                        config::set_load_path(path_buffer);
+                        m_frame->setToast("Set start up file", item->getText().c_str());
+                        return true;
                     }
                     return false;
                 });
@@ -147,6 +155,11 @@ void BrowserGui::scanCwd() {
                             m_frame->setToast("Failed to add Track.", "Does the name contain umlauts?");
                         }
                         return true;
+                    } else if (down & HidNpadButton_ZR) {
+                        std::snprintf(path_buffer, sizeof(path_buffer), "%s%s", this->cwd, item->getText().c_str());
+                        config::set_load_path(path_buffer);
+                        m_frame->setToast("Set start up file", path_buffer);
+                        return true;
                     }
                     return false;
                 });
@@ -215,12 +228,11 @@ void BrowserGui::addAllToPlaylist() {
     s64 count = 0;
     std::vector<FsDirectoryEntry> entries(64);
 
-    while (R_SUCCEEDED(fsDirRead(&dir, &count, entries.size(), entries.data())) && count){
+    while (R_SUCCEEDED(fsDirRead(&dir, &count, entries.size(), entries.data())) && count) {
         for (s64 i = 0; i < count; i++) {
             const auto& entry = entries[i];
             if (entry.type == FsDirEntryType_File && SupportsType(entry.name)){
-                file_list.push_back(std::string(entry.name));
-                count++;
+                file_list.emplace_back(entry.name);
             }
         }
     }
diff --git a/overlay/source/gui_main.cpp b/overlay/source/gui_main.cpp
index 8409ea9..25c5b1b 100644
--- a/overlay/source/gui_main.cpp
+++ b/overlay/source/gui_main.cpp
@@ -115,6 +115,27 @@ tsl::elm::Element *MainGui::createUI() {
 
     list->addItem(new tsl::elm::CategoryHeader("Misc"));
 
+    auto startup_button = new tsl::elm::ListItem("Remove start up file");
+    startup_button->setClickListener([frame](u64 keys) {
+        if (keys & HidNpadButton_A) {
+            char path[512];
+            if (config::get_load_path(path, sizeof(path))) {
+                config::set_load_path("");
+                const auto* p = path;
+                if (auto ext = std::strrchr(path, '/')) {
+                    p = ext + 1;
+                }
+
+                frame->setToast("Removed start up file", p);
+            } else {
+                frame->setToast("Failed to remove start up file", "No start up file set in config");
+            }
+            return true;
+        }
+        return false;
+    });
+    list->addItem(startup_button);
+
     auto exit_button = new tsl::elm::ListItem("Close sys-tune");
     exit_button->setClickListener([](u64 keys) {
         if (keys & HidNpadButton_A) {
diff --git a/overlay/source/gui_playlist.cpp b/overlay/source/gui_playlist.cpp
index 563ae19..8f2cb6c 100644
--- a/overlay/source/gui_playlist.cpp
+++ b/overlay/source/gui_playlist.cpp
@@ -1,6 +1,7 @@
 #include "gui_playlist.hpp"
 
 #include "elm_overlayframe.hpp"
+#include "config/config.hpp"
 #include "tune.h"
 
 namespace {
@@ -45,9 +46,12 @@ namespace {
         }
     };
 
+    ButtonListItem* g_focus_item;
+
 }
 
 PlaylistGui::PlaylistGui() {
+    g_focus_item = nullptr;
     m_list = new tsl::elm::List();
 
     u32 count;
@@ -65,14 +69,30 @@ PlaylistGui::PlaylistGui() {
         return;
     }
 
-    m_list->addItem(new tsl::elm::CategoryHeader("\uE0E2  To remove all", true));
+    char current_path[FS_MAX_PATH];
+    TuneCurrentStats current_stats;
+    rc = tuneGetCurrentQueueItem(current_path, sizeof(current_path), &current_stats);
+    if (R_FAILED(rc)) {
+        char result_buffer[0x10];
+        std::snprintf(result_buffer, 0x10, "2%03X-%04X", R_MODULE(rc), R_DESCRIPTION(rc));
+        this->m_list->addItem(new tsl::elm::ListItem("failed to get current item"));
+        this->m_list->addItem(new tsl::elm::ListItem(result_buffer));
+        return;
+    }
+
+    m_list->addItem(new tsl::elm::CategoryHeader("\uE0E2  To remove all      \uE0E7  Play on start up", true));
 
     char path[FS_MAX_PATH];
     for (u32 i = 0; i < count; i++) {
-        rc = tuneGetPlaylistItem(i, path, FS_MAX_PATH);
+        rc = tuneGetPlaylistItem(i, path, sizeof(path));
         if (R_FAILED(rc))
             break;
 
+        bool found = false;
+        if (!g_focus_item && !strcasecmp(current_path, path)) {
+            found = true;
+        }
+
         char *str = path;
         size_t length   = std::strlen(str);
         NullLastDot(str);
@@ -84,13 +104,16 @@ PlaylistGui::PlaylistGui() {
         }
         auto item = new ButtonListItem(str, "\uE098");
         item->setClickListener([this, item](u64 keys) -> bool {
-            u32 index  = this->m_list->getIndexInList(item);
+            // adjust index for above CategoryHeader.
+            const auto index = this->m_list->getIndexInList(item);
+            const auto tune_index = index - 1;
+
             if (keys & HidNpadButton_A) {
-                tuneSelect(index);
+                tuneSelect(tune_index);
                 return true;
             }
             else if (keys & HidNpadButton_Y) {
-                if (R_SUCCEEDED(tuneRemove(index))) {
+                if (R_SUCCEEDED(tuneRemove(tune_index))) {
                     this->removeFocus();
                     this->m_list->removeIndex(index);
                     auto element = this->m_list->getItemAtIndex(index + 1);
@@ -112,9 +135,22 @@ PlaylistGui::PlaylistGui() {
                     m_list->addItem(new tsl::elm::ListItem("Playlist empty."));
                 }
                 return true;
+            } else if (keys & HidNpadButton_ZR) {
+                char path[FS_MAX_PATH];
+                if (R_SUCCEEDED(tuneGetPlaylistItem(tune_index, path, sizeof(path)))) {
+                    config::set_load_path(path);
+                    // todo: toast
+                    // m_frame->setToast("Set start up file", item->getText().c_str());
+                }
+                return true;
             }
             return false;
         });
+
+        if (found) {
+            g_focus_item = item;
+        }
+
         m_list->addItem(item);
     }
 }
@@ -127,3 +163,16 @@ tsl::elm::Element *PlaylistGui::createUI() {
 
     return rootFrame;
 }
+
+void PlaylistGui::update()  {
+    if (g_focus_item) {
+        // wait until its added to the list.
+        const auto index = m_list->getIndexInList(g_focus_item);
+        if (index >= 0) {
+            this->removeFocus();
+            this->requestFocus(g_focus_item, tsl::FocusDirection::Down);
+            m_list->setFocusedIndex(index);
+            g_focus_item = nullptr;
+        }
+    }
+}
diff --git a/overlay/source/gui_playlist.hpp b/overlay/source/gui_playlist.hpp
index 781c819..46074bd 100644
--- a/overlay/source/gui_playlist.hpp
+++ b/overlay/source/gui_playlist.hpp
@@ -10,4 +10,5 @@ class PlaylistGui final : public tsl::Gui {
     PlaylistGui();
 
     tsl::elm::Element *createUI() override;
+    void update() override;
 };
diff --git a/sys-tune/source/impl/music_player.cpp b/sys-tune/source/impl/music_player.cpp
index 5273975..634a368 100644
--- a/sys-tune/source/impl/music_player.cpp
+++ b/sys-tune/source/impl/music_player.cpp
@@ -125,6 +125,10 @@ namespace tune::impl {
             }
 
             u32 GetIndexFromID(const PlaylistID& entry, ShuffleMode shuffle) const {
+                if (!entry.IsValid()) {
+                    return 0;
+                }
+
                 std::span list{m_playlist};
                 if (shuffle == ShuffleMode::On) {
                     list = m_shuffle_playlist;
@@ -580,6 +584,7 @@ namespace tune::impl {
             std::scoped_lock lk(g_mutex);
 
             g_playlist.Clear();
+            g_queue_position = 0;
         }
         g_status = PlayerStatus::FetchNext;
     }
@@ -606,8 +611,7 @@ namespace tune::impl {
                 return;
             }
 
-            // adjust to index-1 so that FetchNext will jump to it.
-            g_queue_position = std::clamp<s32>(index - 1, 0, size - 1);
+            g_queue_position = std::min(index, size - 1);
         }
         g_status     = PlayerStatus::FetchNext;
         g_should_pause = false;

From e71b8a546273bbce526c966ca2e99a9d2bdc3c83 Mon Sep 17 00:00:00 2001
From: ITotalJustice <47043333+ITotalJustice@users.noreply.github.com>
Date: Sun, 13 Jul 2025 09:12:21 +0100
Subject: [PATCH 13/19] update makefile to set boot2.flag, update workflow to
 build on pr but only deploy on push.

---
 .github/workflows/build.yml | 6 ++++--
 Makefile                    | 5 ++---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 92b2c83..8a75492 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -1,6 +1,6 @@
 name: Build sys-tune and overlay
 
-on: [push]
+on: [push, pull_request]
 jobs:
   build:
 
@@ -23,7 +23,9 @@ jobs:
         cp sys-tune/toolbox.json dist/atmosphere/contents/4200000000000000/
         cp overlay/sys-tune-overlay.ovl dist/switch/.overlays/
 
-    - uses: actions/upload-artifact@master
+    - name: Deploy
+      if: ${{ github.event_name != 'pull_request' && github.event.action != 'unassigned' }}
+      uses: actions/upload-artifact@master
       with:
         name: sys-tune
         path: dist
diff --git a/Makefile b/Makefile
index d456354..b640d18 100644
--- a/Makefile
+++ b/Makefile
@@ -25,12 +25,11 @@ module:
 
 dist: all
 	mkdir -p dist/switch/.overlays
-	mkdir -p dist/atmosphere/contents/4200000000000000
-	# mkdir -p dist/atmosphere/contents/4200000000000000/flags
+	mkdir -p dist/atmosphere/contents/4200000000000000/flags
+	touch dist/atmosphere/contents/4200000000000000/flags/boot2.flag
 	cp sys-tune/sys-tune.nsp dist/atmosphere/contents/4200000000000000/exefs.nsp
 	cp overlay/sys-tune-overlay.ovl dist/switch/.overlays/
 	cp sys-tune/toolbox.json dist/atmosphere/contents/4200000000000000/
-	# touch dist/atmosphere/contents/4200000000000000/flags/boot2.flag
 	cd dist; zip -r sys-tune-$(VERSION)-$(GITHASH).zip ./**/; cd ../;
 	-hactool -t nso sys-tune/sys-tune.nso
 

From 63a25cd13385e0645b8e9b5e1082beb3261bad9f Mon Sep 17 00:00:00 2001
From: ITotalJustice <47043333+ITotalJustice@users.noreply.github.com>
Date: Sun, 13 Jul 2025 09:55:29 +0100
Subject: [PATCH 14/19] allocate large drmp3/drflac structs on the heap rather
 that making them static (saves 20k bss).

---
 sys-tune/source/impl/dr_flac.h | 39 ++++++++++++++++++----------------
 sys-tune/source/impl/dr_mp3.h  | 28 +++++++++++++-----------
 2 files changed, 37 insertions(+), 30 deletions(-)

diff --git a/sys-tune/source/impl/dr_flac.h b/sys-tune/source/impl/dr_flac.h
index bed2f72..c1d10af 100644
--- a/sys-tune/source/impl/dr_flac.h
+++ b/sys-tune/source/impl/dr_flac.h
@@ -7870,10 +7870,13 @@ static void drflac__init_from_info(drflac* pFlac, const drflac_init_info* pInit)
     pFlac->container               = pInit->container;
 }
 
+static void _drflac_free(void* ptr) {
+	free(*(void**)ptr);
+}
 
 static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_tell_proc onTell, drflac_meta_proc onMeta, drflac_container container, void* pUserData, void* pUserDataMD, const drflac_allocation_callbacks* pAllocationCallbacks)
 {
-    static drflac_init_info init; // TUNE-FIX
+    __attribute__((cleanup(_drflac_free))) drflac_init_info* init = (drflac_init_info*)malloc(sizeof(*init)); // TUNE-FIX
     drflac_uint32 allocationSize;
     drflac_uint32 wholeSIMDVectorCountPerChannel;
     drflac_uint32 decodedSamplesAllocationSize;
@@ -7889,7 +7892,7 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac
     /* CPU support first. */
     drflac__init_cpu_caps();
 
-    if (!drflac__init_private(&init, onRead, onSeek, onTell, onMeta, container, pUserData, pUserDataMD)) {
+    if (!drflac__init_private(init, onRead, onSeek, onTell, onMeta, container, pUserData, pUserDataMD)) {
         return NULL;
     }
 
@@ -7921,20 +7924,20 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac
     The allocation size for decoded frames depends on the number of 32-bit integers that fit inside the largest SIMD vector
     we are supporting.
     */
-    if ((init.maxBlockSizeInPCMFrames % (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) == 0) {
-        wholeSIMDVectorCountPerChannel = (init.maxBlockSizeInPCMFrames / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32)));
+    if ((init->maxBlockSizeInPCMFrames % (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) == 0) {
+        wholeSIMDVectorCountPerChannel = (init->maxBlockSizeInPCMFrames / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32)));
     } else {
-        wholeSIMDVectorCountPerChannel = (init.maxBlockSizeInPCMFrames / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) + 1;
+        wholeSIMDVectorCountPerChannel = (init->maxBlockSizeInPCMFrames / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) + 1;
     }
 
-    decodedSamplesAllocationSize = wholeSIMDVectorCountPerChannel * DRFLAC_MAX_SIMD_VECTOR_SIZE * init.channels;
+    decodedSamplesAllocationSize = wholeSIMDVectorCountPerChannel * DRFLAC_MAX_SIMD_VECTOR_SIZE * init->channels;
 
     allocationSize += decodedSamplesAllocationSize;
     allocationSize += DRFLAC_MAX_SIMD_VECTOR_SIZE;  /* Allocate extra bytes to ensure we have enough for alignment. */
 
 #ifndef DR_FLAC_NO_OGG
     /* There's additional data required for Ogg streams. */
-    if (init.container == drflac_container_ogg) {
+    if (init->container == drflac_container_ogg) {
         allocationSize += sizeof(drflac_oggbs);
 
         pOggbs = (drflac_oggbs*)drflac__malloc_from_callbacks(sizeof(*pOggbs), &allocationCallbacks);
@@ -7947,10 +7950,10 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac
         pOggbs->onSeek = onSeek;
         pOggbs->onTell = onTell;
         pOggbs->pUserData = pUserData;
-        pOggbs->currentBytePos = init.oggFirstBytePos;
-        pOggbs->firstBytePos = init.oggFirstBytePos;
-        pOggbs->serialNumber = init.oggSerial;
-        pOggbs->bosPageHeader = init.oggBosHeader;
+        pOggbs->currentBytePos = init->oggFirstBytePos;
+        pOggbs->firstBytePos = init->oggFirstBytePos;
+        pOggbs->serialNumber = init->oggSerial;
+        pOggbs->bosPageHeader = init->oggBosHeader;
         pOggbs->bytesRemainingInPage = 0;
     }
 #endif
@@ -7963,14 +7966,14 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac
     firstFramePos  = 42;   /* <-- We know we are at byte 42 at this point. */
     seektablePos   = 0;
     seekpointCount = 0;
-    if (init.hasMetadataBlocks) {
+    if (init->hasMetadataBlocks) {
         drflac_read_proc onReadOverride = onRead;
         drflac_seek_proc onSeekOverride = onSeek;
         drflac_tell_proc onTellOverride = onTell;
         void* pUserDataOverride = pUserData;
 
 #ifndef DR_FLAC_NO_OGG
-        if (init.container == drflac_container_ogg) {
+        if (init->container == drflac_container_ogg) {
             onReadOverride = drflac__on_read_ogg;
             onSeekOverride = drflac__on_seek_ogg;
             onTellOverride = drflac__on_tell_ogg;
@@ -7997,12 +8000,12 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac
         return NULL;
     }
 
-    drflac__init_from_info(pFlac, &init);
+    drflac__init_from_info(pFlac, init);
     pFlac->allocationCallbacks = allocationCallbacks;
     pFlac->pDecodedSamples = (drflac_int32*)drflac_align((size_t)pFlac->pExtraData, DRFLAC_MAX_SIMD_VECTOR_SIZE);
 
 #ifndef DR_FLAC_NO_OGG
-    if (init.container == drflac_container_ogg) {
+    if (init->container == drflac_container_ogg) {
         drflac_oggbs* pInternalOggbs = (drflac_oggbs*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize + (seekpointCount * sizeof(drflac_seekpoint)));
         DRFLAC_COPY_MEMORY(pInternalOggbs, pOggbs, sizeof(*pOggbs));
 
@@ -8023,7 +8026,7 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac
 
     /* NOTE: Seektables are not currently compatible with Ogg encapsulation (Ogg has its own accelerated seeking system). I may change this later, so I'm leaving this here for now. */
 #ifndef DR_FLAC_NO_OGG
-    if (init.container == drflac_container_ogg)
+    if (init->container == drflac_container_ogg)
     {
         pFlac->pSeekpoints = NULL;
         pFlac->seekpointCount = 0;
@@ -8075,8 +8078,8 @@ static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac
     If we get here, but don't have a STREAMINFO block, it means we've opened the stream in relaxed mode and need to decode
     the first frame.
     */
-    if (!init.hasStreamInfoBlock) {
-        pFlac->currentFLACFrame.header = init.firstFrameHeader;
+    if (!init->hasStreamInfoBlock) {
+        pFlac->currentFLACFrame.header = init->firstFrameHeader;
         for (;;) {
             drflac_result result = drflac__decode_flac_frame(pFlac);
             if (result == DRFLAC_SUCCESS) {
diff --git a/sys-tune/source/impl/dr_mp3.h b/sys-tune/source/impl/dr_mp3.h
index 170efcc..167c3e0 100644
--- a/sys-tune/source/impl/dr_mp3.h
+++ b/sys-tune/source/impl/dr_mp3.h
@@ -2294,12 +2294,16 @@ DRMP3_API void drmp3dec_init(drmp3dec *dec)
     dec->header[0] = 0;
 }
 
+static void _drmp3dec_free(void* ptr) {
+	free(*(void**)ptr);
+}
+
 DRMP3_API int drmp3dec_decode_frame(drmp3dec *dec, const drmp3_uint8 *mp3, int mp3_bytes, void *pcm, drmp3dec_frame_info *info)
 {
     int i = 0, igr, frame_size = 0, success = 1;
     const drmp3_uint8 *hdr;
     drmp3_bs bs_frame[1];
-    static drmp3dec_scratch scratch; // TUNE-FIX
+    __attribute__((cleanup(_drmp3dec_free))) drmp3dec_scratch* scratch = (drmp3dec_scratch*)malloc(sizeof(*scratch)); // TUNE-FIX
 
     if (mp3_bytes > 4 && dec->header[0] == 0xff && drmp3_hdr_compare(dec->header, mp3))
     {
@@ -2336,23 +2340,23 @@ DRMP3_API int drmp3dec_decode_frame(drmp3dec *dec, const drmp3_uint8 *mp3, int m
 
     if (info->layer == 3)
     {
-        int main_data_begin = drmp3_L3_read_side_info(bs_frame, scratch.gr_info, hdr);
+        int main_data_begin = drmp3_L3_read_side_info(bs_frame, scratch->gr_info, hdr);
         if (main_data_begin < 0 || bs_frame->pos > bs_frame->limit)
         {
             drmp3dec_init(dec);
             return 0;
         }
-        success = drmp3_L3_restore_reservoir(dec, bs_frame, &scratch, main_data_begin);
+        success = drmp3_L3_restore_reservoir(dec, bs_frame, scratch, main_data_begin);
         if (success && pcm != NULL)
         {
             for (igr = 0; igr < (DRMP3_HDR_TEST_MPEG1(hdr) ? 2 : 1); igr++, pcm = DRMP3_OFFSET_PTR(pcm, sizeof(drmp3d_sample_t)*576*info->channels))
             {
-                DRMP3_ZERO_MEMORY(scratch.grbuf[0], 576*2*sizeof(float));
-                drmp3_L3_decode(dec, &scratch, scratch.gr_info + igr*info->channels, info->channels);
-                drmp3d_synth_granule(dec->qmf_state, scratch.grbuf[0], 18, info->channels, (drmp3d_sample_t*)pcm, scratch.syn[0]);
+                DRMP3_ZERO_MEMORY(scratch->grbuf[0], 576*2*sizeof(float));
+                drmp3_L3_decode(dec, scratch, scratch->gr_info + igr*info->channels, info->channels);
+                drmp3d_synth_granule(dec->qmf_state, scratch->grbuf[0], 18, info->channels, (drmp3d_sample_t*)pcm, scratch->syn[0]);
             }
         }
-        drmp3_L3_save_reservoir(dec, &scratch);
+        drmp3_L3_save_reservoir(dec, scratch);
     } else
     {
 #ifdef DR_MP3_ONLY_MP3
@@ -2366,15 +2370,15 @@ DRMP3_API int drmp3dec_decode_frame(drmp3dec *dec, const drmp3_uint8 *mp3, int m
 
         drmp3_L12_read_scale_info(hdr, bs_frame, sci);
 
-        DRMP3_ZERO_MEMORY(scratch.grbuf[0], 576*2*sizeof(float));
+        DRMP3_ZERO_MEMORY(scratch->grbuf[0], 576*2*sizeof(float));
         for (i = 0, igr = 0; igr < 3; igr++)
         {
-            if (12 == (i += drmp3_L12_dequantize_granule(scratch.grbuf[0] + i, bs_frame, sci, info->layer | 1)))
+            if (12 == (i += drmp3_L12_dequantize_granule(scratch->grbuf[0] + i, bs_frame, sci, info->layer | 1)))
             {
                 i = 0;
-                drmp3_L12_apply_scf_384(sci, sci->scf + igr, scratch.grbuf[0]);
-                drmp3d_synth_granule(dec->qmf_state, scratch.grbuf[0], 12, info->channels, (drmp3d_sample_t*)pcm, scratch.syn[0]);
-                DRMP3_ZERO_MEMORY(scratch.grbuf[0], 576*2*sizeof(float));
+                drmp3_L12_apply_scf_384(sci, sci->scf + igr, scratch->grbuf[0]);
+                drmp3d_synth_granule(dec->qmf_state, scratch->grbuf[0], 12, info->channels, (drmp3d_sample_t*)pcm, scratch->syn[0]);
+                DRMP3_ZERO_MEMORY(scratch->grbuf[0], 576*2*sizeof(float));
                 pcm = DRMP3_OFFSET_PTR(pcm, sizeof(drmp3d_sample_t)*384*info->channels);
             }
             if (bs_frame->pos > bs_frame->limit)

From bf1441248e34529cd1677d1bab5dc61949af4bbe Mon Sep 17 00:00:00 2001
From: ITotalJustice <47043333+ITotalJustice@users.noreply.github.com>
Date: Sun, 13 Jul 2025 19:53:20 +0100
Subject: [PATCH 15/19] increase heap to 300k, set NDEBUG, add guards against
 including dr_x if x is disabled.

---
 common/minIni/minIni.c                        |  1 +
 overlay/Makefile                              |  2 +-
 sys-tune/Makefile                             |  2 +-
 sys-tune/source/impl/music_player.cpp         |  7 ++--
 sys-tune/source/impl/resamplers/SDL_audioEX.c | 27 +++++++-------
 sys-tune/source/impl/source.cpp               | 36 +++++++++++++++++--
 sys-tune/source/main.cpp                      |  2 +-
 7 files changed, 55 insertions(+), 22 deletions(-)

diff --git a/common/minIni/minIni.c b/common/minIni/minIni.c
index 90ee57b..00dbcd7 100644
--- a/common/minIni/minIni.c
+++ b/common/minIni/minIni.c
@@ -32,6 +32,7 @@
 #define MININI_IMPLEMENTATION
 #include "minIni.h"
 #if defined NDEBUG
+  #undef assert
   #define assert(e)
 #else
   #include <assert.h>
diff --git a/overlay/Makefile b/overlay/Makefile
index b797f9f..cc81668 100644
--- a/overlay/Makefile
+++ b/overlay/Makefile
@@ -73,7 +73,7 @@ ARCH	:=	-march=armv8-a+crc+crypto -mtune=cortex-a57 -mtp=soft -fPIE
 CFLAGS	:=	-g -Wall -Wno-format-truncation -O2 -ffunction-sections \
 			$(ARCH) $(DEFINES)
 
-CFLAGS	+=	$(INCLUDE) -DVERSION=\"v$(APP_VERSION)\" -DTUNE_API_VERSION=$(API_VERSION) \
+CFLAGS	+=	$(INCLUDE) -DVERSION=\"v$(APP_VERSION)\" -DTUNE_API_VERSION=$(API_VERSION) -DNDEBUG=1 \
 			$(WANT_FLAGS)
 
 CXXFLAGS	:= $(CFLAGS) -fno-exceptions -std=c++23
diff --git a/sys-tune/Makefile b/sys-tune/Makefile
index 4b5e834..681a7fe 100644
--- a/sys-tune/Makefile
+++ b/sys-tune/Makefile
@@ -65,7 +65,7 @@ ARCH	:=	-march=armv8-a+crc+crypto -mtune=cortex-a57 -mtp=soft -fPIE
 CFLAGS	:=	-flto=auto -g -Wall -O2 -ffunction-sections \
 			$(ARCH) $(DEFINES)
 
-CFLAGS	+=	$(INCLUDE) -DTUNE_API_VERSION=$(API_VERSION) \
+CFLAGS	+=	$(INCLUDE) -DTUNE_API_VERSION=$(API_VERSION) -DNDEBUG=1 \
 			$(WANT_FLAGS)
 
 CXXFLAGS	:= $(CFLAGS) -fno-rtti -fno-exceptions -std=gnu++23
diff --git a/sys-tune/source/impl/music_player.cpp b/sys-tune/source/impl/music_player.cpp
index 634a368..ea11d9b 100644
--- a/sys-tune/source/impl/music_player.cpp
+++ b/sys-tune/source/impl/music_player.cpp
@@ -17,6 +17,7 @@ namespace tune::impl {
     namespace {
         constexpr float VOLUME_MAX = 1.f;
         constexpr auto PLAYLIST_ENTRY_MAX = 512; // 128k
+        constexpr auto PATH_SIZE_MAX = 256;
 
         struct PlaylistID {
             u32 id{UINT32_MAX};
@@ -187,7 +188,7 @@ namespace tune::impl {
                 }
 
             private:
-                char m_path[256]{};
+                char m_path[PATH_SIZE_MAX]{};
             };
 
         private:
@@ -314,7 +315,7 @@ namespace tune::impl {
 
     void TuneThreadFunc(void *) {
         {
-            char load_path[512];
+            char load_path[PATH_SIZE_MAX];
             if (config::get_load_path(load_path, sizeof(load_path))) {
                 // check if the path is a file or folder.
                 FsDirEntryType type;
@@ -332,7 +333,7 @@ namespace tune::impl {
                             std::vector<FsDirectoryEntry> entries(std::min(64, PLAYLIST_ENTRY_MAX));
 
                             s64 total;
-                            char full_path[512];
+                            char full_path[PATH_SIZE_MAX];
                             while (R_SUCCEEDED(fsDirRead(&dir, &total, entries.size(), entries.data())) && total) {
                                 for (s64 i = 0; i < total; i++) {
                                     if (GetSourceType(entries[i].name) != SourceType::NONE) {
diff --git a/sys-tune/source/impl/resamplers/SDL_audioEX.c b/sys-tune/source/impl/resamplers/SDL_audioEX.c
index 8b7bcbc..578ccc8 100644
--- a/sys-tune/source/impl/resamplers/SDL_audioEX.c
+++ b/sys-tune/source/impl/resamplers/SDL_audioEX.c
@@ -19,6 +19,7 @@
 static int SDL_OutOfMemoryEX() { return -1; }
 #ifndef NDEBUG
 #include <stdio.h>
+#error [TUNE] NDEBUG not set
 static int SDL_PrintError(const char* e) { printf(e);  return -1; }
 #else
 static int SDL_PrintError(const char* e) { (void)e;  return -1; }
@@ -70,16 +71,16 @@ static int SDL_PrepareResampleFilter(void);
 #endif
 
 /* Function pointers set to a CPU-specific implementation. */
-SDL_AudioFilter_EX SDL_Convert_S8_to_F32 = NULL;
-SDL_AudioFilter_EX SDL_Convert_U8_to_F32 = NULL;
-SDL_AudioFilter_EX SDL_Convert_S16_to_F32 = NULL;
-SDL_AudioFilter_EX SDL_Convert_U16_to_F32 = NULL;
-SDL_AudioFilter_EX SDL_Convert_S32_to_F32 = NULL;
-SDL_AudioFilter_EX SDL_Convert_F32_to_S8 = NULL;
-SDL_AudioFilter_EX SDL_Convert_F32_to_U8 = NULL;
-SDL_AudioFilter_EX SDL_Convert_F32_to_S16 = NULL;
-SDL_AudioFilter_EX SDL_Convert_F32_to_U16 = NULL;
-SDL_AudioFilter_EX SDL_Convert_F32_to_S32 = NULL;
+static SDL_AudioFilter_EX SDL_Convert_S8_to_F32 = NULL;
+static SDL_AudioFilter_EX SDL_Convert_U8_to_F32 = NULL;
+static SDL_AudioFilter_EX SDL_Convert_S16_to_F32 = NULL;
+static SDL_AudioFilter_EX SDL_Convert_U16_to_F32 = NULL;
+static SDL_AudioFilter_EX SDL_Convert_S32_to_F32 = NULL;
+static SDL_AudioFilter_EX SDL_Convert_F32_to_S8 = NULL;
+static SDL_AudioFilter_EX SDL_Convert_F32_to_U8 = NULL;
+static SDL_AudioFilter_EX SDL_Convert_F32_to_S16 = NULL;
+static SDL_AudioFilter_EX SDL_Convert_F32_to_U16 = NULL;
+static SDL_AudioFilter_EX SDL_Convert_F32_to_S32 = NULL;
 
 
 #define DIVBY128 0.0078125f
@@ -1607,7 +1608,7 @@ AllocateDataQueuePacket(SDL_DataQueue *queue)
     packet->datalen = 0;
     packet->startpos = 0;
     packet->next = NULL;
-                
+
     assert((queue->head != NULL) == (queue->queued_bytes != 0));
     if (queue->tail == NULL) {
         queue->head = packet;
@@ -2767,7 +2768,7 @@ SDL_BuildAudioCVT_EX(SDL_AudioCVT_EX * cvt,
            handled by now, but let's be defensive */
       return SDL_PrintError("Invalid channel combination");
     }
-    
+
     /* Do rate conversion, if necessary. Updates (cvt). */
     if (SDL_BuildAudioResampleCVT(cvt, dst_channels, src_rate, dst_rate) < 0) {
         return -1;              /* shouldn't happen, but just in case... */
@@ -3248,7 +3249,7 @@ SDL_AudioStreamPutEX(SDL_AudioStream *stream, const void *buf, int len)
             stream->staging_buffer_filled += len;
             return 0;
         }
- 
+
         /* Fill the staging buffer, process it, and continue */
         amount = (stream->staging_buffer_size - stream->staging_buffer_filled);
         assert(amount > 0);
diff --git a/sys-tune/source/impl/source.cpp b/sys-tune/source/impl/source.cpp
index b4826a4..6c126c8 100644
--- a/sys-tune/source/impl/source.cpp
+++ b/sys-tune/source/impl/source.cpp
@@ -5,27 +5,40 @@
 #include <cstring>
 
 // NOTE: when updating dr_libs, check for TUNE-FIX comment for patches.
+#ifdef WANT_FLAC
 #define DR_FLAC_IMPLEMENTATION
 #define DR_FLAC_NO_OGG
 #define DR_FLAC_NO_STDIO
 #include "dr_flac.h"
+#endif
 
+#ifdef WANT_MP3
 #define DR_MP3_IMPLEMENTATION
 #define DR_MP3_NO_STDIO
 #include "dr_mp3.h"
+#endif
 
+#ifdef WANT_WAV
 #define DR_WAV_IMPLEMENTATION
 #define DR_WAV_NO_STDIO
 #include "dr_wav.h"
+#endif
 
 namespace {
 
+    enum SeekOrigin {
+        SeekOrigin_SET,
+        SeekOrigin_CUR,
+        SeekOrigin_END
+    };
+
     size_t ReadCallback(void *pUserData, void *pBufferOut, size_t bytesToRead) {
         auto data = static_cast<Source *>(pUserData);
 
         return data->ReadFile(pBufferOut, bytesToRead);
     }
 
+#ifdef WANT_FLAC
     drflac_bool32 FlacSeekCallback(void *pUserData, int offset, drflac_seek_origin origin) {
         auto data = static_cast<Source *>(pUserData);
 
@@ -38,7 +51,9 @@ namespace {
         *pCursor = data->TellFile();
         return true;
     }
+#endif
 
+#ifdef WANT_MP3
     drmp3_bool32 Mp3SeekCallback(void *pUserData, int offset, drmp3_seek_origin origin) {
         auto data = static_cast<Source *>(pUserData);
 
@@ -55,7 +70,9 @@ namespace {
     void Mp3MetaCallback(void *pUserData, const drmp3_metadata* pMetadata) {
         // stubbed for now, will handle later to load album artwork.
     }
+#endif
 
+#ifdef WANT_WAV
     drwav_bool32 WavSeekCallback(void *pUserData, int offset, drwav_seek_origin origin) {
         auto data = static_cast<Source *>(pUserData);
 
@@ -68,6 +85,7 @@ namespace {
         *pCursor = data->TellFile();
         return true;
     }
+#endif
 
 #ifdef DEBUG
     void *log_malloc(size_t sz, void *) {
@@ -101,10 +119,16 @@ namespace {
     constexpr const drmp3_allocation_callbacks *mp3_alloc_ptr   = &mp3_alloc;
     constexpr const drwav_allocation_callbacks *wav_alloc_ptr   = &wav_alloc;
 #else
+#ifdef WANT_FLAC
     constexpr const drflac_allocation_callbacks *flac_alloc_ptr = nullptr;
+#endif
+#ifdef WANT_MP3
     constexpr const drmp3_allocation_callbacks *mp3_alloc_ptr   = nullptr;
+#endif
+#ifdef WANT_WAV
     constexpr const drwav_allocation_callbacks *wav_alloc_ptr   = nullptr;
 #endif
+#endif
 
 }
 
@@ -168,13 +192,13 @@ size_t Source::ReadFile(void *buffer, size_t read_size) {
 bool Source::SeekFile(s64 offset, int origin) {
     s64 new_offset;
     switch (origin) {
-        case DRWAV_SEEK_SET:
+        case SeekOrigin_SET:
             new_offset = offset;
             break;
-        case DRWAV_SEEK_CUR:
+        case SeekOrigin_CUR:
             new_offset = this->m_offset + offset;
             break;
-        case DRWAV_SEEK_END:
+        case SeekOrigin_END:
             new_offset = this->m_size + offset;
             break;
         default:
@@ -199,6 +223,7 @@ bool Source::Done() {
     return current == total;
 }
 
+#ifdef WANT_FLAC
 class FlacFile final : public Source {
   private:
     drflac *m_flac;
@@ -242,7 +267,9 @@ class FlacFile final : public Source {
         return this->m_flac->channels;
     }
 };
+#endif
 
+#ifdef WANT_MP3
 class Mp3File final : public Source {
   private:
     drmp3 m_mp3;
@@ -291,7 +318,9 @@ class Mp3File final : public Source {
         return this->m_mp3.channels;
     }
 };
+#endif
 
+#ifdef WANT_WAV
 class WavFile final : public Source {
   private:
     drwav m_wav;
@@ -341,6 +370,7 @@ class WavFile final : public Source {
         return this->m_wav.channels;
     }
 };
+#endif
 
 std::unique_ptr<Source> OpenFile(const char *path) {
     const auto type = GetSourceType(path);
diff --git a/sys-tune/source/main.cpp b/sys-tune/source/main.cpp
index 88968be..ec91d1b 100644
--- a/sys-tune/source/main.cpp
+++ b/sys-tune/source/main.cpp
@@ -13,7 +13,7 @@ u32 __nx_fs_num_sessions = 1;
 // TODO(TJ): calculate minimum heap
 // TODO(TJ): calculate reasonable amount of heap for playlist entries.
 void __libnx_initheap(void) {
-    static char inner_heap[1024 * 200];
+    static char inner_heap[1024 * 300];
     extern char *fake_heap_start;
     extern char *fake_heap_end;
 

From a2cc0c6c364a484798d93b00f04bf082018a37c9 Mon Sep 17 00:00:00 2001
From: ITotalJustice <47043333+ITotalJustice@users.noreply.github.com>
Date: Sun, 13 Jul 2025 20:06:51 +0100
Subject: [PATCH 16/19] ovl set file scan and playlist add limit.

---
 overlay/source/gui_browser.cpp | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/overlay/source/gui_browser.cpp b/overlay/source/gui_browser.cpp
index 3098710..48de340 100644
--- a/overlay/source/gui_browser.cpp
+++ b/overlay/source/gui_browser.cpp
@@ -119,10 +119,15 @@ void BrowserGui::scanCwd() {
 
     /* Iternate over directory. */
     s64 count = 0;
+    const u64 max = 2048; // max items to be added to the array.
     std::vector<FsDirectoryEntry> entries(64);
 
     while (R_SUCCEEDED(fsDirRead(&dir, &count, entries.size(), entries.data())) && count) {
         for (s64 i = 0; i < count; i++) {
+            if (folders.size() + files.size() >= max) {
+                break;
+            }
+
             const auto& entry = entries[i];
             if (entry.type == FsDirEntryType_Dir) {
                 /* Add directory entries. */
@@ -166,6 +171,11 @@ void BrowserGui::scanCwd() {
                 files.push_back(item);
             }
         }
+
+        if (folders.size() + files.size() >= max) {
+            m_frame->setToast("Stopped scanning folder", "maximum of " + std::to_string(max) + " hit");
+            break;
+        }
     }
     if (folders.size() == 0 && files.size() == 0) {
         this->m_list->addItem(new tsl::elm::CategoryHeader("Empty..."));
@@ -226,6 +236,7 @@ void BrowserGui::addAllToPlaylist() {
     std::vector<std::string> file_list;
     s64 songs_added = 0;
     s64 count = 0;
+    const u64 max = 512; // max set by PLAYLIST_ENTRY_MAX in music_player.cpp
     std::vector<FsDirectoryEntry> entries(64);
 
     while (R_SUCCEEDED(fsDirRead(&dir, &count, entries.size(), entries.data())) && count) {
@@ -233,8 +244,16 @@ void BrowserGui::addAllToPlaylist() {
             const auto& entry = entries[i];
             if (entry.type == FsDirEntryType_File && SupportsType(entry.name)){
                 file_list.emplace_back(entry.name);
+
+                if (file_list.size() >= max) {
+                    break;
+                }
             }
         }
+
+        if (file_list.size() >= max) {
+            break;
+        }
     }
 
     std::sort(file_list.begin(), file_list.end(), StringTextCompare);

From 9e6f4ce1946e28a1138ee31aff33be453c75d7a7 Mon Sep 17 00:00:00 2001
From: ITotalJustice <47043333+ITotalJustice@users.noreply.github.com>
Date: Sun, 13 Jul 2025 20:21:22 +0100
Subject: [PATCH 17/19] reserve max amount of entries in a vector to avoid
 allocs/moves in the loop.

---
 overlay/source/gui_browser.cpp        | 7 +++++++
 sys-tune/source/impl/music_player.cpp | 9 +++++++++
 2 files changed, 16 insertions(+)

diff --git a/overlay/source/gui_browser.cpp b/overlay/source/gui_browser.cpp
index 48de340..f7a7ed2 100644
--- a/overlay/source/gui_browser.cpp
+++ b/overlay/source/gui_browser.cpp
@@ -122,6 +122,10 @@ void BrowserGui::scanCwd() {
     const u64 max = 2048; // max items to be added to the array.
     std::vector<FsDirectoryEntry> entries(64);
 
+    // avoid vector allocs / resize in the loop.
+    folders.reserve(max);
+    files.reserve(max);
+
     while (R_SUCCEEDED(fsDirRead(&dir, &count, entries.size(), entries.data())) && count) {
         for (s64 i = 0; i < count; i++) {
             if (folders.size() + files.size() >= max) {
@@ -239,6 +243,9 @@ void BrowserGui::addAllToPlaylist() {
     const u64 max = 512; // max set by PLAYLIST_ENTRY_MAX in music_player.cpp
     std::vector<FsDirectoryEntry> entries(64);
 
+    // avoid vector allocs / resize in the loop.
+    file_list.reserve(max);
+
     while (R_SUCCEEDED(fsDirRead(&dir, &count, entries.size(), entries.data())) && count) {
         for (s64 i = 0; i < count; i++) {
             const auto& entry = entries[i];
diff --git a/sys-tune/source/impl/music_player.cpp b/sys-tune/source/impl/music_player.cpp
index ea11d9b..69c1fe5 100644
--- a/sys-tune/source/impl/music_player.cpp
+++ b/sys-tune/source/impl/music_player.cpp
@@ -33,6 +33,12 @@ namespace tune::impl {
 
         class PlayList {
         public:
+            void Init() {
+                Clear();
+                m_playlist.reserve(PLAYLIST_ENTRY_MAX);
+                m_shuffle_playlist.reserve(PLAYLIST_ENTRY_MAX);
+            }
+
             bool Add(const char* path, EnqueueType type) {
                 u32 index;
                 if (!FindNextFreeEntry(index)) {
@@ -305,6 +311,9 @@ namespace tune::impl {
         SetShuffleMode(static_cast<ShuffleMode>(config::get_shuffle()));
         SetDefaultTitleVolume(config::get_default_title_volume());
 
+        // reserves memory so that we don't allocate later on.
+        g_playlist.Init();
+
         return 0;
 
     }

From 69096dd3db628f87ceedf29a758f573229cd1c02 Mon Sep 17 00:00:00 2001
From: ITotalJustice <47043333+ITotalJustice@users.noreply.github.com>
Date: Mon, 14 Jul 2025 11:55:17 +0100
Subject: [PATCH 18/19] fix wav seek crash, impl buffered io, reduce latency
 between songs, increase resample buf size, reduce heap (because we have
 buffered io), label sdl funcs static, reduce mp3 heap size

---
 overlay/source/gui_browser.cpp                |   2 +-
 sys-tune/source/impl/music_player.cpp         |  24 +++-
 sys-tune/source/impl/resamplers/SDL_audioEX.c |  80 ++-----------
 sys-tune/source/impl/source.cpp               | 111 +++++++++++++-----
 sys-tune/source/impl/source.hpp               |  15 ++-
 sys-tune/source/main.cpp                      |   2 +-
 sys-tune/sys-tune.json                        |   4 +-
 7 files changed, 128 insertions(+), 110 deletions(-)

diff --git a/overlay/source/gui_browser.cpp b/overlay/source/gui_browser.cpp
index f7a7ed2..91e78e9 100644
--- a/overlay/source/gui_browser.cpp
+++ b/overlay/source/gui_browser.cpp
@@ -240,7 +240,7 @@ void BrowserGui::addAllToPlaylist() {
     std::vector<std::string> file_list;
     s64 songs_added = 0;
     s64 count = 0;
-    const u64 max = 512; // max set by PLAYLIST_ENTRY_MAX in music_player.cpp
+    const u64 max = 300; // max set by PLAYLIST_ENTRY_MAX in music_player.cpp
     std::vector<FsDirectoryEntry> entries(64);
 
     // avoid vector allocs / resize in the loop.
diff --git a/sys-tune/source/impl/music_player.cpp b/sys-tune/source/impl/music_player.cpp
index 69c1fe5..50696dd 100644
--- a/sys-tune/source/impl/music_player.cpp
+++ b/sys-tune/source/impl/music_player.cpp
@@ -16,7 +16,7 @@ namespace tune::impl {
 
     namespace {
         constexpr float VOLUME_MAX = 1.f;
-        constexpr auto PLAYLIST_ENTRY_MAX = 512; // 128k
+        constexpr auto PLAYLIST_ENTRY_MAX = 300; // 75k
         constexpr auto PATH_SIZE_MAX = 256;
 
         struct PlaylistID {
@@ -247,6 +247,9 @@ namespace tune::impl {
 
             g_source = source.get();
 
+            // for the first buffer, use very small buffer sizes to reduce latency between songs.
+            int first = 1;
+
             while (g_should_run && g_status == PlayerStatus::Playing) {
                 if (g_should_pause) {
                     svcSleepThread(17'000'000);
@@ -270,7 +273,13 @@ namespace tune::impl {
 
                 bool error = false;
                 if (buffer) {
-                    const auto nSamples = source->Resample((u8*)buffer->buffer, AUDIO_BUFFER_SIZE * sizeof(s16));
+                    auto buffer_size = AUDIO_BUFFER_SIZE * sizeof(s16);
+                    if (first) {
+                        first--;
+                        buffer_size = std::min(512 * sizeof(s16), buffer_size);
+                    }
+
+                    const auto nSamples = source->Resample((u8*)buffer->buffer, buffer_size);
                     if (nSamples <= 0) {
                         error = true;
                     } else {
@@ -343,13 +352,22 @@ namespace tune::impl {
 
                             s64 total;
                             char full_path[PATH_SIZE_MAX];
+                            Result rc = 0;
+
                             while (R_SUCCEEDED(fsDirRead(&dir, &total, entries.size(), entries.data())) && total) {
                                 for (s64 i = 0; i < total; i++) {
                                     if (GetSourceType(entries[i].name) != SourceType::NONE) {
                                         std::snprintf(full_path, sizeof(full_path), "%s/%s", load_path, entries[i].name);
-                                        Enqueue(full_path, std::strlen(full_path), EnqueueType::Back);
+                                        rc = Enqueue(full_path, std::strlen(full_path), EnqueueType::Back);
+                                        if (rc == tune::OutOfMemory) {
+                                            break;
+                                        }
                                     }
                                 }
+
+                                if (rc == tune::OutOfMemory) {
+                                    break;
+                                }
                             }
 
                             fsDirClose(&dir);
diff --git a/sys-tune/source/impl/resamplers/SDL_audioEX.c b/sys-tune/source/impl/resamplers/SDL_audioEX.c
index 578ccc8..7a07efa 100644
--- a/sys-tune/source/impl/resamplers/SDL_audioEX.c
+++ b/sys-tune/source/impl/resamplers/SDL_audioEX.c
@@ -39,7 +39,7 @@ static int SDL_PrintError(const char* e) { (void)e;  return -1; }
 /* Functions and variables exported from SDL_audio.c for SDL_sysaudio.c */
 
 /* Choose the audio filter functions below */
-void SDL_ChooseAudioConverters(void);
+static void SDL_ChooseAudioConverters(void);
 
 /* You need to call SDL_PrepareResampleFilter() before using the internal resampler. */
 static int SDL_PrepareResampleFilter(void);
@@ -1464,14 +1464,12 @@ void SDL_ChooseAudioConverters(void)
 struct SDL_DataQueue;
 typedef struct SDL_DataQueue SDL_DataQueue;
 
-SDL_DataQueue *SDL_NewDataQueue(const size_t packetlen, const size_t initialslack);
-void SDL_FreeDataQueue(SDL_DataQueue *queue);
-void SDL_ClearDataQueue(SDL_DataQueue *queue, const size_t slack);
-int SDL_WriteToDataQueue(SDL_DataQueue *queue, const void *data, const size_t len);
-size_t SDL_ReadFromDataQueue(SDL_DataQueue *queue, void *buf, const size_t len);
-size_t SDL_PeekIntoDataQueue(SDL_DataQueue *queue, void *buf, const size_t len);
-size_t SDL_CountDataQueue(SDL_DataQueue *queue);
-void *SDL_ReserveSpaceInDataQueue(SDL_DataQueue *queue, const size_t len);
+static SDL_DataQueue *SDL_NewDataQueue(const size_t packetlen, const size_t initialslack);
+static void SDL_FreeDataQueue(SDL_DataQueue *queue);
+static void SDL_ClearDataQueue(SDL_DataQueue *queue, const size_t slack);
+static int SDL_WriteToDataQueue(SDL_DataQueue *queue, const void *data, const size_t len);
+static size_t SDL_ReadFromDataQueue(SDL_DataQueue *queue, void *buf, const size_t len);
+static size_t SDL_CountDataQueue(SDL_DataQueue *queue);
 
 typedef struct SDL_DataQueuePacket
 {
@@ -1674,31 +1672,6 @@ SDL_WriteToDataQueue(SDL_DataQueue *queue, const void *_data, const size_t _len)
     return 0;
 }
 
-size_t
-SDL_PeekIntoDataQueue(SDL_DataQueue *queue, void *_buf, const size_t _len)
-{
-    size_t len = _len;
-    uint8_t *buf = (uint8_t *) _buf;
-    uint8_t *ptr = buf;
-    SDL_DataQueuePacket *packet;
-
-    if (!queue) {
-        return 0;
-    }
-
-    for (packet = queue->head; len && packet; packet = packet->next) {
-        const size_t avail = packet->datalen - packet->startpos;
-        const size_t cpy = SDL_minEX(len, avail);
-        assert(queue->queued_bytes >= avail);
-
-        memcpy(ptr, packet->data + packet->startpos, cpy);
-        ptr += cpy;
-        len -= cpy;
-    }
-
-    return (size_t) (ptr - buf);
-}
-
 size_t
 SDL_ReadFromDataQueue(SDL_DataQueue *queue, void *_buf, const size_t _len)
 {
@@ -1745,45 +1718,6 @@ SDL_CountDataQueue(SDL_DataQueue *queue)
     return queue ? queue->queued_bytes : 0;
 }
 
-void *
-SDL_ReserveSpaceInDataQueue(SDL_DataQueue *queue, const size_t len)
-{
-    SDL_DataQueuePacket *packet;
-
-    if (!queue) {
-        SDL_PrintError("queue");
-        return NULL;
-    } else if (len == 0) {
-        SDL_PrintError("len");
-        return NULL;
-    } else if (len > queue->packet_size) {
-        SDL_PrintError("len is larger than packet size");
-        return NULL;
-    }
-
-    packet = queue->head;
-    if (packet) {
-        const size_t avail = queue->packet_size - packet->datalen;
-        if (len <= avail) {  /* we can use the space at end of this packet. */
-            void *retval = packet->data + packet->datalen;
-            packet->datalen += len;
-            queue->queued_bytes += len;
-            return retval;
-        }
-    }
-
-    /* Need a fresh packet. */
-    packet = AllocateDataQueuePacket(queue);
-    if (!packet) {
-        SDL_OutOfMemoryEX();
-        return NULL;
-    }
-
-    packet->datalen = len;
-    queue->queued_bytes += len;
-    return packet->data;
-}
-
 // AUDIOCVT
 /* Convert from stereo to mono. Average left and right. */
 static void
diff --git a/sys-tune/source/impl/source.cpp b/sys-tune/source/impl/source.cpp
index 6c126c8..2972fdb 100644
--- a/sys-tune/source/impl/source.cpp
+++ b/sys-tune/source/impl/source.cpp
@@ -15,6 +15,7 @@
 #ifdef WANT_MP3
 #define DR_MP3_IMPLEMENTATION
 #define DR_MP3_NO_STDIO
+#define DRMP3_DATA_CHUNK_SIZE DRMP3_MIN_DATA_CHUNK_SIZE
 #include "dr_mp3.h"
 #endif
 
@@ -66,10 +67,6 @@ namespace {
         *pCursor = data->TellFile();
         return true;
     }
-
-    void Mp3MetaCallback(void *pUserData, const drmp3_metadata* pMetadata) {
-        // stubbed for now, will handle later to load album artwork.
-    }
 #endif
 
 #ifdef WANT_WAV
@@ -134,6 +131,7 @@ namespace {
 
 Source::Source(FsFile &&file) : m_file(file), m_offset(0), m_size(0) {
     file = {};
+    m_buffered.off = m_buffered.size = 0;
     if (R_FAILED(fsFileGetSize(&this->m_file, &this->m_size)))
         this->m_size = 0;
 }
@@ -144,7 +142,13 @@ Source::~Source() {
     this->m_size   = 0;
 }
 
-bool Source::SetupResampler(u32 output_channels, u32 output_sample_rate) {
+bool Source::SetupResampler(int output_channels, int output_sample_rate) {
+    // check if we even need the resampler.
+    m_native_stream = GetChannelCount() == output_channels && GetSampleRate() == output_sample_rate;
+    if (m_native_stream) {
+        return true;
+    }
+
     m_sdl_stream = UniqueAudioStream{
         SDL_NewAudioStreamEX(
         AUDIO_S16, GetChannelCount(), GetSampleRate(),
@@ -159,34 +163,85 @@ s64 Source::Resample(u8* out, std::size_t size) {
         return -1;
     }
 
-    s64 data_read = 0;
-    while (size > 0) {
-        if (auto sz = SDL_AudioStreamGetEX(m_sdl_stream.get(), out, size); sz != 0) {
-            size -= sz;
-            out += sz;
-            data_read += sz;
-        } else {
-            const auto dec_got = Decode(m_resample_buffer.size(), m_resample_buffer.data());
-            if (dec_got == 0) {
-                return data_read;
-            }
-            if (0 != SDL_AudioStreamPutEX(m_sdl_stream.get(), m_resample_buffer.data(), dec_got)) {
+    if (m_native_stream) {
+        return Decode(size / sizeof(s16), (s16*)out);
+    } else {
+        s64 data_read = 0;
+        while (size > 0) {
+            const auto sz = SDL_AudioStreamGetEX(m_sdl_stream.get(), out, size);
+
+            if (sz < 0) {
                 return -1;
+            } else if (sz > 0) {
+                size -= sz;
+                out += sz;
+                data_read += sz;
+            } else {
+                const auto dec_got = Decode(m_resample_buffer.size(), m_resample_buffer.data());
+                if (dec_got == 0) {
+                    return data_read;
+                }
+                if (0 != SDL_AudioStreamPutEX(m_sdl_stream.get(), m_resample_buffer.data(), dec_got)) {
+                    return -1;
+                }
             }
         }
-    }
 
-    return data_read;
+        return data_read;
+    }
 }
 
-size_t Source::ReadFile(void *buffer, size_t read_size) {
-    size_t bytes_read = 0;
-    if (R_SUCCEEDED(fsFileRead(&this->m_file, this->m_offset, buffer, read_size, 0, &bytes_read))) {
-        this->m_offset += bytes_read;
-        return bytes_read;
-    } else {
-        return 0;
+size_t Source::ReadFile(void *_buffer, size_t read_size) {
+    auto dst = static_cast<u8*>(_buffer);
+    size_t amount = 0;
+
+    // check if we already have this data buffered.
+    if (m_buffered.size) {
+        // check if we can read this data into the beginning of dst.
+        if (this->m_offset < m_buffered.off + m_buffered.size && this->m_offset >= m_buffered.off) {
+            const auto off = this->m_offset - m_buffered.off;
+            const auto size = std::min<s64>(read_size, m_buffered.size - off);
+            std::memcpy(dst, m_buffered.data + off, size);
+
+            read_size -= size;
+            m_offset += size;
+            amount += size;
+            dst += size;
+        }
+    }
+
+    if (read_size) {
+        u64 bytes_read = 0;
+
+        // if the dst dst is big enough, read data in place.
+        if (read_size >= sizeof(m_buffered.data)) {
+            if (R_SUCCEEDED(fsFileRead(&this->m_file, this->m_offset, dst, read_size, 0, &bytes_read)) && bytes_read) {
+                read_size -= bytes_read;
+                m_offset += bytes_read;
+                amount += bytes_read;
+                dst += bytes_read;
+
+                // save the last chunk of data to the m_buffered io.
+                const auto max_advance = std::min(amount, sizeof(m_buffered.data));
+                m_buffered.off = m_offset - max_advance;
+                m_buffered.size = max_advance;
+                std::memcpy(m_buffered.data, dst - max_advance, max_advance);
+            }
+        } else if (R_SUCCEEDED(fsFileRead(&this->m_file, this->m_offset, m_buffered.data, sizeof(m_buffered.data), 0, &bytes_read)) && bytes_read) {
+            const auto max_advance = std::min(read_size, bytes_read);
+            std::memcpy(dst, m_buffered.data, max_advance);
+
+            m_buffered.off = m_offset;
+            m_buffered.size = bytes_read;
+
+            read_size -= max_advance;
+            m_offset += max_advance;
+            amount += max_advance;
+            dst += max_advance;
+        }
     }
+
+    return amount;
 }
 
 bool Source::SeekFile(s64 offset, int origin) {
@@ -205,7 +260,7 @@ bool Source::SeekFile(s64 offset, int origin) {
             return false;
     }
 
-    if (new_offset <= this->m_size) {
+    if (new_offset >= 0 && new_offset <= this->m_size) {
         this->m_offset = new_offset;
         return true;
     } else {
@@ -278,7 +333,7 @@ class Mp3File final : public Source {
 
   public:
     Mp3File(FsFile &&file) : Source(std::move(file)) {
-        if (drmp3_init(&this->m_mp3, ReadCallback, Mp3SeekCallback, Mp3TellCallback, Mp3MetaCallback, this, mp3_alloc_ptr)) {
+        if (drmp3_init(&this->m_mp3, ReadCallback, Mp3SeekCallback, Mp3TellCallback, nullptr, this, mp3_alloc_ptr)) {
             this->m_total_frame_count = drmp3_get_pcm_frame_count(&this->m_mp3);
             this->initialized         = true;
         }
diff --git a/sys-tune/source/impl/source.hpp b/sys-tune/source/impl/source.hpp
index 43e5fb2..6ce2469 100644
--- a/sys-tune/source/impl/source.hpp
+++ b/sys-tune/source/impl/source.hpp
@@ -30,19 +30,30 @@ class Source {
     template<auto func>
     using Deleter = FunctionCaller<func>;
 
+    template<size_t Size>
+    struct BufferedFileData {
+        u8 data[Size];
+        s64 off;
+        s64 size;
+  };
+
   protected:
-    std::array<s16, 512> m_resample_buffer;
+    // increasing the size of this buffer also increases the memory used by the resampler.
+    static inline std::array<s16, 1024 * 4> m_resample_buffer;
+    // increasing this reduces io calls.
+    static inline BufferedFileData<1024 * 64> m_buffered;
     LockableMutex m_mutex;
 
   private:
     using UniqueAudioStream = std::unique_ptr<SDL_AudioStream, Deleter<&SDL_FreeAudioStreamEX>>;
     UniqueAudioStream m_sdl_stream{nullptr};
+    bool m_native_stream{};
 
   public:
     Source(FsFile &&file);
     virtual ~Source();
 
-    bool SetupResampler(u32 output_channels, u32 output_sample_rate);
+    bool SetupResampler(int output_channels, int output_sample_rate);
     s64 Resample(u8* out, std::size_t size);
 
     size_t ReadFile(void *buffer, size_t read_size);
diff --git a/sys-tune/source/main.cpp b/sys-tune/source/main.cpp
index ec91d1b..bcd958c 100644
--- a/sys-tune/source/main.cpp
+++ b/sys-tune/source/main.cpp
@@ -13,7 +13,7 @@ u32 __nx_fs_num_sessions = 1;
 // TODO(TJ): calculate minimum heap
 // TODO(TJ): calculate reasonable amount of heap for playlist entries.
 void __libnx_initheap(void) {
-    static char inner_heap[1024 * 300];
+    static char inner_heap[1024 * 250];
     extern char *fake_heap_start;
     extern char *fake_heap_end;
 
diff --git a/sys-tune/sys-tune.json b/sys-tune/sys-tune.json
index ab7acb9..df9f06d 100644
--- a/sys-tune/sys-tune.json
+++ b/sys-tune/sys-tune.json
@@ -3,7 +3,7 @@
 	"title_id": "0x4200000000000000",
 	"title_id_range_min": "0x4200000000000000",
 	"title_id_range_max": "0x4200000000000000",
-	"main_thread_stack_size": "0x00001000",
+	"main_thread_stack_size": "0x00004000",
 	"main_thread_priority": 48,
 	"default_cpu_id": 3,
 	"process_category": 0,
@@ -96,4 +96,4 @@
 			"value": 128
 		}
 	]
-}
\ No newline at end of file
+}

From c01ea1172e705a52c27c5281566eb153ea06b456 Mon Sep 17 00:00:00 2001
From: ITotalJustice <47043333+ITotalJustice@users.noreply.github.com>
Date: Mon, 14 Jul 2025 12:12:09 +0100
Subject: [PATCH 19/19] added support for re-shuffling the playlist when
 shuffle mode is toggled on.

this allows for a new list to be created, similar to how it works in youtube music.
---
 sys-tune/source/impl/music_player.cpp | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/sys-tune/source/impl/music_player.cpp b/sys-tune/source/impl/music_player.cpp
index 50696dd..85e77c0 100644
--- a/sys-tune/source/impl/music_player.cpp
+++ b/sys-tune/source/impl/music_player.cpp
@@ -96,6 +96,18 @@ namespace tune::impl {
                 return true;
             }
 
+            void Shuffle() {
+                const auto size = m_shuffle_playlist.size();
+                if (!size) {
+                    return;
+                }
+
+                for (auto& e : m_shuffle_playlist) {
+                    const auto index = randomGet64() % size;
+                    std::swap(e, m_shuffle_playlist[index]);
+                }
+            }
+
             const char* GetPath(u32 index, ShuffleMode shuffle) const {
                 return GetPath(Get(index, shuffle));
             }
@@ -564,6 +576,11 @@ namespace tune::impl {
     void SetShuffleMode(ShuffleMode mode) {
         std::scoped_lock lk(g_mutex);
 
+        // if we just enabled shuffle mode, re-shuffle the playlist.
+        if (g_shuffle == ShuffleMode::Off && mode == ShuffleMode::On) {
+            g_playlist.Shuffle();
+        }
+
         g_shuffle = mode;
     }