From 103513b186b3d978a5e82d1e5f171ebdce1062bf Mon Sep 17 00:00:00 2001 From: Geraint Luff Date: Mon, 11 Aug 2025 17:30:09 +0100 Subject: [PATCH 01/14] Add asymmetry parameter --- CMakeLists.txt | 2 +- cmd/Makefile | 26 +++++++++++++------------- cmd/main.cpp | 5 +++-- signalsmith-stretch.h | 5 +++-- 4 files changed, 20 insertions(+), 18 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index dfb23d5..ffe805b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ include(FetchContent) FetchContent_Declare( signalsmith-linear GIT_REPOSITORY https://github.com/Signalsmith-Audio/linear.git - GIT_TAG 0.2.3 + GIT_TAG 0.2.4 GIT_SHALLOW ON ) FetchContent_MakeAvailable(signalsmith-linear) diff --git a/cmd/Makefile b/cmd/Makefile index 6887a65..abd3678 100644 --- a/cmd/Makefile +++ b/cmd/Makefile @@ -11,19 +11,19 @@ out/stretch: main.cpp ../signalsmith-stretch.h util/*.h util/*.hxx # Uses input files from: https://signalsmith-audio.co.uk/code/stretch/inputs.zip examples: out/stretch mkdir -p out/examples - inputs/run-all.sh out/examples/u2- out/stretch --semitones=2 - inputs/run-all.sh out/examples/d2- out/stretch --semitones=-2 - inputs/run-all.sh out/examples/u4- out/stretch --semitones=4 - inputs/run-all.sh out/examples/d4- out/stretch --semitones=-4 - inputs/run-all.sh out/examples/u8- out/stretch --semitones=8 - inputs/run-all.sh out/examples/d8- out/stretch --semitones=-8 - inputs/run-all.sh out/examples/u16- out/stretch --semitones=16 - inputs/run-all.sh out/examples/d16- out/stretch --semitones=-16 - inputs/run-all.sh out/examples/t_8- out/stretch --time=0.8 - inputs/run-all.sh out/examples/t1_2- out/stretch --time=1.2 - inputs/run-all.sh out/examples/t1_5- out/stretch --time=1.5 - inputs/run-all.sh out/examples/t2- out/stretch --time=2 - inputs/run-all.sh out/examples/t4- out/stretch --time=4 + inputs/run-all.sh out/examples/u2- out/stretch --semitones=2 --asymmetry=0.5 + inputs/run-all.sh out/examples/d2- out/stretch --semitones=-2 --asymmetry=0.5 + inputs/run-all.sh out/examples/u4- out/stretch --semitones=4 --asymmetry=0.5 + inputs/run-all.sh out/examples/d4- out/stretch --semitones=-4 --asymmetry=0.5 + inputs/run-all.sh out/examples/u8- out/stretch --semitones=8 --asymmetry=0.5 + inputs/run-all.sh out/examples/d8- out/stretch --semitones=-8 --asymmetry=0.5 + inputs/run-all.sh out/examples/u16- out/stretch --semitones=16 --asymmetry=0.5 + inputs/run-all.sh out/examples/d16- out/stretch --semitones=-16 --asymmetry=0.5 + inputs/run-all.sh out/examples/t_8- out/stretch --time=0.8 --asymmetry=0.5 + inputs/run-all.sh out/examples/t1_2- out/stretch --time=1.2 --asymmetry=0.5 + inputs/run-all.sh out/examples/t1_5- out/stretch --time=1.5 --asymmetry=0.5 + inputs/run-all.sh out/examples/t2- out/stretch --time=2 --asymmetry=0.5 + inputs/run-all.sh out/examples/t4- out/stretch --time=4 --asymmetry=0.5 TEST_WAV ?= "inputs/voice.wav" diff --git a/cmd/main.cpp b/cmd/main.cpp index 0ff29ea..87c1eb2 100644 --- a/cmd/main.cpp +++ b/cmd/main.cpp @@ -19,12 +19,13 @@ int main(int argc, char* argv[]) { std::string inputWav = args.arg("input.wav", "16-bit WAV file"); std::string outputWav = args.arg("output.wav", "output WAV file"); + double time = args.flag("time", "time-stretch factor", 1); double semitones = args.flag("semitones", "pitch-shift amount", 0); double formants = args.flag("formant", "formant-shift amount (semitones)", 0); bool formantComp = args.hasFlag("formant-comp", "formant compensation"); double formantBase = args.flag("formant-base", "formant base frequency (Hz, 0=auto)", 100); double tonality = args.flag("tonality", "tonality limit (Hz)", 8000); - double time = args.flag("time", "time-stretch factor", 1); + double asymmetry = args.flag("asymmetry", "asymmetrical STFT analysis (0-1)", 0); bool splitComputation = args.hasFlag("split-computation", "distributes the computation more evenly (but higher latency)"); args.errorExit(); // exits on error, or with `--help` @@ -42,7 +43,7 @@ int main(int argc, char* argv[]) { outWav.resize(outputLength); SignalsmithStretch stretch; - stretch.presetDefault(int(inWav.channels), inWav.sampleRate, splitComputation); + stretch.configure(int(inWav.channels), inWav.sampleRate*0.12, inWav.sampleRate*0.03, splitComputation, asymmetry); stretch.setTransposeSemitones(semitones, tonality/inWav.sampleRate); stretch.setFormantSemitones(formants, formantComp); stretch.setFormantBase(formantBase/inWav.sampleRate); diff --git a/signalsmith-stretch.h b/signalsmith-stretch.h index e05406c..effc0fb 100644 --- a/signalsmith-stretch.h +++ b/signalsmith-stretch.h @@ -68,11 +68,12 @@ struct SignalsmithStretch { } // Manual setup - void configure(int nChannels, int blockSamples, int intervalSamples, bool splitComputation=false) { + void configure(int nChannels, int blockSamples, int intervalSamples, bool splitComputation=false, Sample asymmetry=0) { _splitComputation = splitComputation; channels = nChannels; + asymmetry *= 1 - 2.0*intervalSamples/blockSamples; // maximum asymmetry gives latency of two intervals stft.configure(channels, channels, blockSamples, intervalSamples + 1); - stft.setInterval(intervalSamples, stft.kaiser); + stft.setInterval(intervalSamples, stft.kaiser, asymmetry); stft.reset(0.1); stashedInput = stft.input; stashedOutput = stft.output; From e8f4c8a20387effdda59bcb2c042fb041e45cead Mon Sep 17 00:00:00 2001 From: Geraint Luff Date: Wed, 13 Aug 2025 15:31:05 +0100 Subject: [PATCH 02/14] Bump Linear to 0.2.8 --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ffe805b..6501964 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ include(FetchContent) FetchContent_Declare( signalsmith-linear GIT_REPOSITORY https://github.com/Signalsmith-Audio/linear.git - GIT_TAG 0.2.4 + GIT_TAG 0.2.8 GIT_SHALLOW ON ) FetchContent_MakeAvailable(signalsmith-linear) From 8ea9c1d4da3fbaa2706ba75dbf4573028cd3f724 Mon Sep 17 00:00:00 2001 From: Geraint Luff Date: Wed, 15 Oct 2025 17:42:49 +0100 Subject: [PATCH 03/14] Plot processing time for chunks, including output-seek --- cmd/Makefile | 15 ++++++++------- cmd/main.cpp | 43 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 50 insertions(+), 8 deletions(-) diff --git a/cmd/Makefile b/cmd/Makefile index abd3678..c184be5 100644 --- a/cmd/Makefile +++ b/cmd/Makefile @@ -1,5 +1,13 @@ all: out/stretch +dev: out/stretch + ./out/stretch inputs/dev.wav out/dev-2048.wav --process-chunk=2048 --semitones=4 + ./out/stretch inputs/dev.wav out/dev-512.wav --process-chunk=512 --semitones=4 + ./out/stretch inputs/dev.wav out/dev-100.wav --process-chunk=100 --semitones=4 + ./out/stretch inputs/dev.wav out/dev-2048-sc.wav --process-chunk=2048 --split-computation --semitones=4 + ./out/stretch inputs/dev.wav out/dev-512-sc.wav --process-chunk=512 --split-computation --semitones=4 + ./out/stretch inputs/dev.wav out/dev-100-sc.wav --process-chunk=100 --split-computation --semitones=4 + out/stretch: main.cpp ../signalsmith-stretch.h util/*.h util/*.hxx mkdir -p out g++ -std=c++11 -O3 -g \ @@ -27,13 +35,6 @@ examples: out/stretch TEST_WAV ?= "inputs/voice.wav" -dev: out/stretch - out/stretch --time=0.8 --semitones=10 $(TEST_WAV) out/shift.wav - out/stretch --time=0.8 --semitones=10 --formant-comp $(TEST_WAV) out/shift-fc.wav - out/stretch --time=0.8 --semitones=10 --formant-comp --formant=3 $(TEST_WAV) out/shift-fc-f3.wav - out/stretch --time=0.8 --semitones=10 --formant-comp --formant=3 --formant-base=500 $(TEST_WAV) out/shift-fc-f3-fb500.wav - out/stretch --time=0.8 --semitones=10 --formant-comp --formant=2 --formant-base=100 $(TEST_WAV) out/shift-fc-f2-fb100.wav - clean: rm -rf out diff --git a/cmd/main.cpp b/cmd/main.cpp index 87c1eb2..03eef31 100644 --- a/cmd/main.cpp +++ b/cmd/main.cpp @@ -7,6 +7,9 @@ using SignalsmithStretch = signalsmith::stretch::SignalsmithStretch; #include "./util/simple-args.h" #include "./util/wav.h" +#include "./util/stopwatch.h" + +#include "plot/plot.h" int main(int argc, char* argv[]) { SimpleArgs args(argc, argv); @@ -27,6 +30,7 @@ int main(int argc, char* argv[]) { double tonality = args.flag("tonality", "tonality limit (Hz)", 8000); double asymmetry = args.flag("asymmetry", "asymmetrical STFT analysis (0-1)", 0); bool splitComputation = args.hasFlag("split-computation", "distributes the computation more evenly (but higher latency)"); + int processChunkSize = args.flag("process-chunk", "process chunk size in samples", -1); args.errorExit(); // exits on error, or with `--help` std::cout << inputWav << " -> " << outputWav << "\n"; @@ -57,7 +61,9 @@ int main(int argc, char* argv[]) { // First, an "output seek", where we provide a chunk of input. // This is suitable for starting playback of a sample at a given playback rate. auto seekLength = stretch.outputSeekLength(1/time); + signalsmith::Stopwatch stopwatch; stretch.outputSeek(inWav, seekLength); + double seekTime = stopwatch.seconds(stopwatch.lap()); // At this point, the next output samples we get will correspond to the beginning of the audio file. // We're going to process until *just* before the end of the audio file (so we can get a tidier end using `.flush()`. @@ -76,7 +82,42 @@ int main(int argc, char* argv[]) { // OK, go for it inWav.offset = seekLength; - stretch.process(inWav, inputIndex - seekLength, outWav, outputIndex); + if (processChunkSize <= 0) { + stretch.process(inWav, inputIndex - seekLength, outWav, outputIndex); + } else { + signalsmith::plot::Plot2D timePlot(500, 200); + timePlot.x.major(0); + timePlot.y.major(0); + timePlot.y.minor(0.01*processChunkSize/inWav.sampleRate, "1%"); + timePlot.y.minor(0.02*processChunkSize/inWav.sampleRate, "2%"); + auto &timeLine = timePlot.line(); + auto &timeLineSeek = timePlot.line().fillToY(0); + timeLine.add(inWav.offset, 0); // output seek + timeLineSeek.add(0, 0); + timeLineSeek.add(0, seekTime); + timeLineSeek.add(inWav.offset, seekTime); + timeLineSeek.add(inWav.offset, 0); + + float residue = 0.f; + while (inWav.offset < size_t(inputIndex)) { + int toProcess = std::min(processChunkSize, inputIndex - inWav.offset); + float outputPrecise = toProcess * time + residue; + int outputSamples = std::round(outputPrecise); + residue = outputPrecise - outputSamples; + + stopwatch.startLap(); + stretch.process(inWav, toProcess, outWav, outputSamples); + double time = stopwatch.seconds(stopwatch.lap()); + timeLine.add(inWav.offset, time); + timeLine.add(inWav.offset + toProcess, time); + + inWav.offset += toProcess; + outWav.offset += outputSamples; + } + + timeLine.add(inWav.offset, 0); + timePlot.write(outputWav + ".svg"); + } // And as promised, get the last bits using `.flush()`, which does some extra stuff to avoid introducing clicks. outWav.offset = outputIndex; From 395b8a53e4b3b63cad0adec84ba251fd56a56525 Mon Sep 17 00:00:00 2001 From: Geraint Luff Date: Fri, 17 Oct 2025 12:18:02 +0100 Subject: [PATCH 04/14] Plot input/output windows --- cmd/Makefile | 40 +++++++++++---------- cmd/main.cpp | 41 +++++++++++++++++++--- signalsmith-stretch.h | 81 ++++++++++++++++++++++++++++++++++++------- 3 files changed, 126 insertions(+), 36 deletions(-) diff --git a/cmd/Makefile b/cmd/Makefile index c184be5..4dfcb86 100644 --- a/cmd/Makefile +++ b/cmd/Makefile @@ -1,12 +1,14 @@ all: out/stretch +DEV_FLAGS := --semitones=4 --time=1 + dev: out/stretch - ./out/stretch inputs/dev.wav out/dev-2048.wav --process-chunk=2048 --semitones=4 - ./out/stretch inputs/dev.wav out/dev-512.wav --process-chunk=512 --semitones=4 - ./out/stretch inputs/dev.wav out/dev-100.wav --process-chunk=100 --semitones=4 - ./out/stretch inputs/dev.wav out/dev-2048-sc.wav --process-chunk=2048 --split-computation --semitones=4 - ./out/stretch inputs/dev.wav out/dev-512-sc.wav --process-chunk=512 --split-computation --semitones=4 - ./out/stretch inputs/dev.wav out/dev-100-sc.wav --process-chunk=100 --split-computation --semitones=4 + ./out/stretch inputs/dev.wav out/dev-2048.wav --process-chunk=2048 $(DEV_FLAGS) + ./out/stretch inputs/dev.wav out/dev-512.wav --process-chunk=512 $(DEV_FLAGS) + ./out/stretch inputs/dev.wav out/dev-100.wav --process-chunk=100 $(DEV_FLAGS) + ./out/stretch inputs/dev.wav out/dev-2048-sc.wav --process-chunk=2048 --split-computation $(DEV_FLAGS) + ./out/stretch inputs/dev.wav out/dev-512-sc.wav --process-chunk=512 --split-computation $(DEV_FLAGS) + ./out/stretch inputs/dev.wav out/dev-100-sc.wav --process-chunk=100 --split-computation $(DEV_FLAGS) out/stretch: main.cpp ../signalsmith-stretch.h util/*.h util/*.hxx mkdir -p out @@ -19,19 +21,19 @@ out/stretch: main.cpp ../signalsmith-stretch.h util/*.h util/*.hxx # Uses input files from: https://signalsmith-audio.co.uk/code/stretch/inputs.zip examples: out/stretch mkdir -p out/examples - inputs/run-all.sh out/examples/u2- out/stretch --semitones=2 --asymmetry=0.5 - inputs/run-all.sh out/examples/d2- out/stretch --semitones=-2 --asymmetry=0.5 - inputs/run-all.sh out/examples/u4- out/stretch --semitones=4 --asymmetry=0.5 - inputs/run-all.sh out/examples/d4- out/stretch --semitones=-4 --asymmetry=0.5 - inputs/run-all.sh out/examples/u8- out/stretch --semitones=8 --asymmetry=0.5 - inputs/run-all.sh out/examples/d8- out/stretch --semitones=-8 --asymmetry=0.5 - inputs/run-all.sh out/examples/u16- out/stretch --semitones=16 --asymmetry=0.5 - inputs/run-all.sh out/examples/d16- out/stretch --semitones=-16 --asymmetry=0.5 - inputs/run-all.sh out/examples/t_8- out/stretch --time=0.8 --asymmetry=0.5 - inputs/run-all.sh out/examples/t1_2- out/stretch --time=1.2 --asymmetry=0.5 - inputs/run-all.sh out/examples/t1_5- out/stretch --time=1.5 --asymmetry=0.5 - inputs/run-all.sh out/examples/t2- out/stretch --time=2 --asymmetry=0.5 - inputs/run-all.sh out/examples/t4- out/stretch --time=4 --asymmetry=0.5 + inputs/run-all.sh out/examples/u2- out/stretch --semitones=2 + inputs/run-all.sh out/examples/d2- out/stretch --semitones=-2 + inputs/run-all.sh out/examples/u4- out/stretch --semitones=4 + inputs/run-all.sh out/examples/d4- out/stretch --semitones=-4 + inputs/run-all.sh out/examples/u8- out/stretch --semitones=8 + inputs/run-all.sh out/examples/d8- out/stretch --semitones=-8 + inputs/run-all.sh out/examples/u16- out/stretch --semitones=16 + inputs/run-all.sh out/examples/d16- out/stretch --semitones=-16 + inputs/run-all.sh out/examples/t_8- out/stretch --time=0.8 + inputs/run-all.sh out/examples/t1_2- out/stretch --time=1.2 + inputs/run-all.sh out/examples/t1_5- out/stretch --time=1.5 + inputs/run-all.sh out/examples/t2- out/stretch --time=2 + inputs/run-all.sh out/examples/t4- out/stretch --time=4 TEST_WAV ?= "inputs/voice.wav" diff --git a/cmd/main.cpp b/cmd/main.cpp index 03eef31..30bc522 100644 --- a/cmd/main.cpp +++ b/cmd/main.cpp @@ -52,6 +52,37 @@ int main(int argc, char* argv[]) { stretch.setFormantSemitones(formants, formantComp); stretch.setFormantBase(formantBase/inWav.sampleRate); + signalsmith::plot::Figure figure; + auto writeLater = figure.writeLater(outputWav + "-blocks.svg"); + size_t plotBlockCounter = 0; + { + auto &inputPlot = figure(0, 0).plot(800, 150); + inputPlot.x.major(0); + inputPlot.y.major(0); + auto &outputPlot = figure(0, 1).plot(800, 150); + outputPlot.x.major(0); + outputPlot.y.major(0); + + stretch.debugAnalysis = [&](int inputOffset, const float *window, bool isPrevious){ + if (plotBlockCounter > 10) return; + int blockSamples = stretch.blockSamples(); + auto &line = inputPlot.line(plotBlockCounter); + for (int i = 0; i < blockSamples; ++i) { + line.add(inputOffset - blockSamples + i + int(inWav.offset), window[i]); + } + }; + stretch.debugSynthesis = [&](int outputOffset, const float *window){ + if (plotBlockCounter > 10) return; + int blockSamples = stretch.blockSamples(); + auto &line = outputPlot.line(plotBlockCounter); + for (int i = 0; i < blockSamples; ++i) { + line.add(outputOffset + i + int(outWav.offset), window[i]); + } + + ++plotBlockCounter; + }; + } + /* Since the WAV helper allows sample access like `wav[c][index]`, we could just call: stretch.exact(inWav, int(inputLength), outWav, int(outputLength)); @@ -82,7 +113,7 @@ int main(int argc, char* argv[]) { // OK, go for it inWav.offset = seekLength; - if (processChunkSize <= 0) { + if (true || processChunkSize <= 0) { stretch.process(inWav, inputIndex - seekLength, outWav, outputIndex); } else { signalsmith::plot::Plot2D timePlot(500, 200); @@ -92,7 +123,7 @@ int main(int argc, char* argv[]) { timePlot.y.minor(0.02*processChunkSize/inWav.sampleRate, "2%"); auto &timeLine = timePlot.line(); auto &timeLineSeek = timePlot.line().fillToY(0); - timeLine.add(inWav.offset, 0); // output seek + timeLine.add(outWav.offset, 0); // output seek timeLineSeek.add(0, 0); timeLineSeek.add(0, seekTime); timeLineSeek.add(inWav.offset, seekTime); @@ -108,14 +139,14 @@ int main(int argc, char* argv[]) { stopwatch.startLap(); stretch.process(inWav, toProcess, outWav, outputSamples); double time = stopwatch.seconds(stopwatch.lap()); - timeLine.add(inWav.offset, time); - timeLine.add(inWav.offset + toProcess, time); + timeLine.add(outWav.offset, time); + timeLine.add(outWav.offset + toProcess, time); inWav.offset += toProcess; outWav.offset += outputSamples; } - timeLine.add(inWav.offset, 0); + timeLine.add(outWav.offset, 0); timePlot.write(outputWav + ".svg"); } diff --git a/signalsmith-stretch.h b/signalsmith-stretch.h index effc0fb..1c10581 100644 --- a/signalsmith-stretch.h +++ b/signalsmith-stretch.h @@ -50,6 +50,12 @@ struct SignalsmithStretch { stft.reset(0.1); stashedInput = stft.input; stashedOutput = stft.output; + +skipPreviousBlock = true; +if (restoreInterval) { + stft.setInterval(stft.defaultInterval(), stft.kaiser, configuredAsymmetry); + restoreInterval = false; +} prevInputOffset = -1; channelBands.assign(channelBands.size(), Band()); @@ -67,12 +73,17 @@ struct SignalsmithStretch { configure(nChannels, sampleRate*0.1, sampleRate*0.04, splitComputation); } +Sample configuredAsymmetry = 0; +bool restoreInterval = false; + // Manual setup void configure(int nChannels, int blockSamples, int intervalSamples, bool splitComputation=false, Sample asymmetry=0) { _splitComputation = splitComputation; channels = nChannels; asymmetry *= 1 - 2.0*intervalSamples/blockSamples; // maximum asymmetry gives latency of two intervals stft.configure(channels, channels, blockSamples, intervalSamples + 1); +configuredAsymmetry = asymmetry; +restoreInterval = false; stft.setInterval(intervalSamples, stft.kaiser, asymmetry); stft.reset(0.1); stashedInput = stft.input; @@ -168,21 +179,41 @@ struct SignalsmithStretch { return int(stft.blockSamples() + stft.defaultInterval()); } +bool skipPreviousBlock = false; + +//int outputSeekInputLatency() const { +// return stft.blockSamples()*0.95; +//} + // Moves the input position *and* pre-calculates some output, so that the next samples returned from `.process()` are aligned to the beginning of the sample. // The time-stretch rate is inferred from `inputLength`, so use `.outputSeekLength()` to get a correct value for that. template void outputSeek(Inputs &&inputs, int inputLength) { +//LOG_EXPR(outputLatency()); +//restoreInterval = true; +//int newOffset = stft.blockSamples() - outputSeekInputLatency(); +//LOG_EXPR(newOffset); +//stft.analysisOffset(newOffset); +//stft.synthesisOffset(newOffset); + +skipPreviousBlock = false; +for (auto &b : channelBands) { + b.output = b.prevInput = 0; +} + // TODO: add fade-out parameter to avoid clicks, instead of doing a full reset - reset(); + stft.reset(0.01); // Assume we've been handed enough surplus input to produce `outputLatency()` samples of pre-roll int surplusInput = std::max(inputLength - inputLatency(), 0); +LOG_EXPR(surplusInput); Sample playbackRate = surplusInput/Sample(outputLatency()); // Move the input position to the start of the sound int seekSamples = inputLength - surplusInput; seek(inputs, seekSamples, playbackRate); - tmpPreRollBuffer.resize(outputLatency()*channels); + auto preRollLength = int(outputLatency()); + tmpPreRollBuffer.resize(preRollLength*channels); struct BufferOutput { Sample *samples; int length; @@ -190,11 +221,14 @@ struct SignalsmithStretch { Sample * operator[](int c) { return samples + c*length; } - } preRollOutput{tmpPreRollBuffer.data(), outputLatency()}; - + } preRollOutput{tmpPreRollBuffer.data(), preRollLength}; + // Use the surplus input to produce pre-roll output OffsetIO offsetInput{inputs, seekSamples}; +debugAnalysisOffset = seekSamples; +debugSynthesisOffset = -preRollLength; process(offsetInput, surplusInput, preRollOutput, preRollOutput.length); +debugAnalysisOffset = debugSynthesisOffset = 0; // put the thing down, flip it and reverse it for (auto &v : tmpPreRollBuffer) v = -v; @@ -207,6 +241,10 @@ struct SignalsmithStretch { return inputLatency() + playbackRate*outputLatency(); } +int debugAnalysisOffset = 0, debugSynthesisOffset = 0; +std::function debugAnalysis; +std::function debugSynthesis; + template void process(Inputs &&inputs, int inputSamples, Outputs &&outputs, int outputSamples) { #ifdef SIGNALSMITH_STRETCH_PROFILE_PROCESS_START @@ -214,7 +252,6 @@ struct SignalsmithStretch { #endif int prevCopiedInput = 0; auto copyInput = [&](int toIndex){ - int length = std::min(int(stft.blockSamples() + stft.defaultInterval()), toIndex - prevCopiedInput); tmpProcessBuffer.resize(length); int offset = toIndex - length; @@ -302,6 +339,7 @@ struct SignalsmithStretch { if (blockProcess.newSpectrum) { // make sure the previous input is the correct distance in the past (give or take 1 sample) blockProcess.reanalysePrev = didSeek || std::abs(inputInterval - int(stft.defaultInterval())) > 1; + if (skipPreviousBlock) blockProcess.reanalysePrev = false; if (blockProcess.reanalysePrev) blockProcess.steps += stft.analyseSteps() + 1; // analyse a new input @@ -332,6 +370,7 @@ struct SignalsmithStretch { #endif if (blockProcess.newSpectrum) { if (blockProcess.reanalysePrev) { +if (step == 0 && debugAnalysis) debugAnalysis(prevInputOffset - stft.defaultInterval() + debugAnalysisOffset, stft.analysisWindow(), true); // analyse past input if (step < stft.analyseSteps()) { stashedInput.swap(stft.input); @@ -354,6 +393,8 @@ struct SignalsmithStretch { step -= 1; } +if (step == 0 && debugAnalysis) debugAnalysis(prevInputOffset + debugAnalysisOffset, stft.analysisWindow(), false); + // Analyse latest (stashed) input if (step < stft.analyseSteps()) { stashedInput.swap(stft.input); @@ -396,10 +437,15 @@ struct SignalsmithStretch { step -= 1; if (step < stft.synthesiseSteps()) { +if (step == 0 && debugSynthesis) debugSynthesis(outputIndex + debugSynthesisOffset, stft.synthesisWindow()); stft.synthesiseStep(step); continue; } } +if (processToStep == blockProcess.steps && restoreInterval) { + stft.setInterval(stft.defaultInterval(), stft.kaiser, configuredAsymmetry); + restoreInterval = false; +} #ifdef SIGNALSMITH_STRETCH_PROFILE_PROCESS_ENDSTEP SIGNALSMITH_STRETCH_PROFILE_PROCESS_ENDSTEP(); #endif @@ -455,13 +501,17 @@ struct SignalsmithStretch { } } stft.reset(0.1f); - // Reset the phase-vocoder stuff, so the next block gets a fresh start - for (int c = 0; c < channels; ++c) { - auto channelBands = bandsForChannel(c); - for (int b = 0; b < bands; ++b) { - channelBands[b].prevInput = channelBands[b].output = 0; - } - } +skipPreviousBlock = true; +for (auto &b : channelBands) { + b.prevInput = b.output = 0; +} +// // Reset the phase-vocoder stuff, so the next block gets a fresh start +// for (int c = 0; c < channels; ++c) { +// auto channelBands = bandsForChannel(c); +// for (int b = 0; b < bands; ++b) { +// channelBands[b].prevInput = channelBands[b].output = 0; +// } +// } } // Process a complete audio buffer all in one go @@ -642,6 +692,7 @@ struct SignalsmithStretch { if (blockProcess.newSpectrum) { if (step < size_t(channels)) { +if (skipPreviousBlock) return; int channel = int(step); auto bins = bandsForChannel(channel); @@ -696,6 +747,7 @@ struct SignalsmithStretch { } // Preliminary output prediction from phase-vocoder if (step < size_t(channels)) { +if (skipPreviousBlock) return; int c = int(step); Band *bins = bandsForChannel(c); auto *predictions = predictionsForChannel(c); @@ -754,6 +806,7 @@ struct SignalsmithStretch { auto &downBin = bins[b - 1]; phase += _impl::mul(downBin.output, shortVerticalTwist); +if (!skipPreviousBlock) { if (b >= longVerticalStep) { Complex longDownInput = getFractional<&Band::input>(maxChannel, mapPoint.inputBin - longVerticalStep*binTimeFactor); Complex longVerticalTwist = _impl::mul(prediction.input, longDownInput); @@ -761,8 +814,10 @@ struct SignalsmithStretch { auto &longDownBin = bins[b - longVerticalStep]; phase += _impl::mul(longDownBin.output, longVerticalTwist); } +} } // Downwards vertical steps +if (!skipPreviousBlock) { if (b < bands - 1) { auto &upPrediction = predictions[b + 1]; auto &upMapPoint = outputMap[b + 1]; @@ -785,6 +840,7 @@ struct SignalsmithStretch { phase += _impl::mul(longUpBin.output, longVerticalTwist); } } +} outputBin.output = prediction.makeOutput(phase); @@ -802,6 +858,7 @@ struct SignalsmithStretch { } return; } +skipPreviousBlock = false; step -= splitMainPrediction; if (blockProcess.newSpectrum) { From c384930ded13f0c3c34df5dc50797465ce849dde Mon Sep 17 00:00:00 2001 From: Geraint Luff Date: Fri, 17 Oct 2025 12:31:05 +0100 Subject: [PATCH 05/14] Mark centre time for each window --- cmd/Makefile | 2 +- cmd/main.cpp | 8 ++++--- signalsmith-stretch.h | 54 ++++++++++++++++++------------------------- 3 files changed, 28 insertions(+), 36 deletions(-) diff --git a/cmd/Makefile b/cmd/Makefile index 4dfcb86..84eb765 100644 --- a/cmd/Makefile +++ b/cmd/Makefile @@ -1,6 +1,6 @@ all: out/stretch -DEV_FLAGS := --semitones=4 --time=1 +DEV_FLAGS := --semitones=4 --time=0.667 dev: out/stretch ./out/stretch inputs/dev.wav out/dev-2048.wav --process-chunk=2048 $(DEV_FLAGS) diff --git a/cmd/main.cpp b/cmd/main.cpp index 30bc522..e455e71 100644 --- a/cmd/main.cpp +++ b/cmd/main.cpp @@ -63,21 +63,23 @@ int main(int argc, char* argv[]) { outputPlot.x.major(0); outputPlot.y.major(0); - stretch.debugAnalysis = [&](int inputOffset, const float *window, bool isPrevious){ + stretch.debugAnalysis = [&](int inputOffset, const float *window, size_t windowOffset, bool isPrevious){ if (plotBlockCounter > 10) return; int blockSamples = stretch.blockSamples(); auto &line = inputPlot.line(plotBlockCounter); for (int i = 0; i < blockSamples; ++i) { line.add(inputOffset - blockSamples + i + int(inWav.offset), window[i]); } + line.marker(inputOffset - blockSamples + int(windowOffset) + int(inWav.offset), window[windowOffset]); }; - stretch.debugSynthesis = [&](int outputOffset, const float *window){ + stretch.debugSynthesis = [&](int outputOffset, const float *window, size_t windowOffset){ if (plotBlockCounter > 10) return; int blockSamples = stretch.blockSamples(); auto &line = outputPlot.line(plotBlockCounter); for (int i = 0; i < blockSamples; ++i) { line.add(outputOffset + i + int(outWav.offset), window[i]); } + line.marker(outputOffset + windowOffset + int(outWav.offset), window[windowOffset]); ++plotBlockCounter; }; @@ -113,7 +115,7 @@ int main(int argc, char* argv[]) { // OK, go for it inWav.offset = seekLength; - if (true || processChunkSize <= 0) { + if (processChunkSize <= 0) { stretch.process(inWav, inputIndex - seekLength, outWav, outputIndex); } else { signalsmith::plot::Plot2D timePlot(500, 200); diff --git a/signalsmith-stretch.h b/signalsmith-stretch.h index 1c10581..57d1aa0 100644 --- a/signalsmith-stretch.h +++ b/signalsmith-stretch.h @@ -51,13 +51,13 @@ struct SignalsmithStretch { stashedInput = stft.input; stashedOutput = stft.output; -skipPreviousBlock = true; if (restoreInterval) { stft.setInterval(stft.defaultInterval(), stft.kaiser, configuredAsymmetry); restoreInterval = false; } prevInputOffset = -1; + assumePreviousBlockZero = true; channelBands.assign(channelBands.size(), Band()); silenceCounter = 0; didSeek = false; @@ -179,8 +179,6 @@ restoreInterval = false; return int(stft.blockSamples() + stft.defaultInterval()); } -bool skipPreviousBlock = false; - //int outputSeekInputLatency() const { // return stft.blockSamples()*0.95; //} @@ -196,16 +194,12 @@ bool skipPreviousBlock = false; //stft.analysisOffset(newOffset); //stft.synthesisOffset(newOffset); -skipPreviousBlock = false; -for (auto &b : channelBands) { - b.output = b.prevInput = 0; -} + clearPreviousBlock(); // TODO: add fade-out parameter to avoid clicks, instead of doing a full reset stft.reset(0.01); // Assume we've been handed enough surplus input to produce `outputLatency()` samples of pre-roll int surplusInput = std::max(inputLength - inputLatency(), 0); -LOG_EXPR(surplusInput); Sample playbackRate = surplusInput/Sample(outputLatency()); // Move the input position to the start of the sound @@ -242,8 +236,8 @@ debugAnalysisOffset = debugSynthesisOffset = 0; } int debugAnalysisOffset = 0, debugSynthesisOffset = 0; -std::function debugAnalysis; -std::function debugSynthesis; +std::function debugAnalysis; +std::function debugSynthesis; template void process(Inputs &&inputs, int inputSamples, Outputs &&outputs, int outputSamples) { @@ -339,7 +333,7 @@ std::function debugSynthesis; if (blockProcess.newSpectrum) { // make sure the previous input is the correct distance in the past (give or take 1 sample) blockProcess.reanalysePrev = didSeek || std::abs(inputInterval - int(stft.defaultInterval())) > 1; - if (skipPreviousBlock) blockProcess.reanalysePrev = false; + if (assumePreviousBlockZero) blockProcess.reanalysePrev = false; if (blockProcess.reanalysePrev) blockProcess.steps += stft.analyseSteps() + 1; // analyse a new input @@ -370,7 +364,7 @@ std::function debugSynthesis; #endif if (blockProcess.newSpectrum) { if (blockProcess.reanalysePrev) { -if (step == 0 && debugAnalysis) debugAnalysis(prevInputOffset - stft.defaultInterval() + debugAnalysisOffset, stft.analysisWindow(), true); +if (step == 0 && debugAnalysis) debugAnalysis(prevInputOffset - stft.defaultInterval() + debugAnalysisOffset, stft.analysisWindow(), stft.analysisOffset(), true); // analyse past input if (step < stft.analyseSteps()) { stashedInput.swap(stft.input); @@ -393,7 +387,7 @@ if (step == 0 && debugAnalysis) debugAnalysis(prevInputOffset - stft.defaultInte step -= 1; } -if (step == 0 && debugAnalysis) debugAnalysis(prevInputOffset + debugAnalysisOffset, stft.analysisWindow(), false); +if (step == 0 && debugAnalysis) debugAnalysis(prevInputOffset + debugAnalysisOffset, stft.analysisWindow(), stft.analysisOffset(), false); // Analyse latest (stashed) input if (step < stft.analyseSteps()) { @@ -437,7 +431,7 @@ if (step == 0 && debugAnalysis) debugAnalysis(prevInputOffset + debugAnalysisOff step -= 1; if (step < stft.synthesiseSteps()) { -if (step == 0 && debugSynthesis) debugSynthesis(outputIndex + debugSynthesisOffset, stft.synthesisWindow()); +if (step == 0 && debugSynthesis) debugSynthesis(outputIndex + debugSynthesisOffset, stft.synthesisWindow(), stft.synthesisOffset()); stft.synthesiseStep(step); continue; } @@ -501,17 +495,8 @@ if (processToStep == blockProcess.steps && restoreInterval) { } } stft.reset(0.1f); -skipPreviousBlock = true; -for (auto &b : channelBands) { - b.prevInput = b.output = 0; -} -// // Reset the phase-vocoder stuff, so the next block gets a fresh start -// for (int c = 0; c < channels; ++c) { -// auto channelBands = bandsForChannel(c); -// for (int b = 0; b < bands; ++b) { -// channelBands[b].prevInput = channelBands[b].output = 0; -// } -// } + + clearPreviousBlock(); } // Process a complete audio buffer all in one go @@ -595,6 +580,15 @@ for (auto &b : channelBands) { Band * bandsForChannel(int channel) { return channelBands.data() + channel*bands; } + + bool assumePreviousBlockZero = false; + void clearPreviousBlock() { + assumePreviousBlockZero = true; + for (auto &b : channelBands) { + b.output = b.prevInput = 0; + } + } + template Complex getBand(int channel, int index) { if (index < 0 || index >= bands) return 0; @@ -692,7 +686,7 @@ for (auto &b : channelBands) { if (blockProcess.newSpectrum) { if (step < size_t(channels)) { -if (skipPreviousBlock) return; +//if (assumePreviousBlockZero) return; int channel = int(step); auto bins = bandsForChannel(channel); @@ -747,7 +741,7 @@ if (skipPreviousBlock) return; } // Preliminary output prediction from phase-vocoder if (step < size_t(channels)) { -if (skipPreviousBlock) return; +//if (assumePreviousBlockZero) return; int c = int(step); Band *bins = bandsForChannel(c); auto *predictions = predictionsForChannel(c); @@ -806,7 +800,6 @@ if (skipPreviousBlock) return; auto &downBin = bins[b - 1]; phase += _impl::mul(downBin.output, shortVerticalTwist); -if (!skipPreviousBlock) { if (b >= longVerticalStep) { Complex longDownInput = getFractional<&Band::input>(maxChannel, mapPoint.inputBin - longVerticalStep*binTimeFactor); Complex longVerticalTwist = _impl::mul(prediction.input, longDownInput); @@ -814,10 +807,8 @@ if (!skipPreviousBlock) { auto &longDownBin = bins[b - longVerticalStep]; phase += _impl::mul(longDownBin.output, longVerticalTwist); } -} } // Downwards vertical steps -if (!skipPreviousBlock) { if (b < bands - 1) { auto &upPrediction = predictions[b + 1]; auto &upMapPoint = outputMap[b + 1]; @@ -840,7 +831,6 @@ if (!skipPreviousBlock) { phase += _impl::mul(longUpBin.output, longVerticalTwist); } } -} outputBin.output = prediction.makeOutput(phase); @@ -858,7 +848,6 @@ if (!skipPreviousBlock) { } return; } -skipPreviousBlock = false; step -= splitMainPrediction; if (blockProcess.newSpectrum) { @@ -867,6 +856,7 @@ skipPreviousBlock = false; bin.prevInput = bin.input; } } + assumePreviousBlockZero = false; } } From 14dbada17e12b2b614f1912a2cdae9875df7500b Mon Sep 17 00:00:00 2001 From: Geraint Luff Date: Fri, 17 Oct 2025 12:46:21 +0100 Subject: [PATCH 06/14] Skip some processing when `assumePreviousBlockZero` --- cmd/main.cpp | 4 ++-- signalsmith-stretch.h | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/cmd/main.cpp b/cmd/main.cpp index e455e71..710a284 100644 --- a/cmd/main.cpp +++ b/cmd/main.cpp @@ -79,7 +79,7 @@ int main(int argc, char* argv[]) { for (int i = 0; i < blockSamples; ++i) { line.add(outputOffset + i + int(outWav.offset), window[i]); } - line.marker(outputOffset + windowOffset + int(outWav.offset), window[windowOffset]); + line.marker(outputOffset + int(windowOffset) + int(outWav.offset), window[windowOffset]); ++plotBlockCounter; }; @@ -142,7 +142,7 @@ int main(int argc, char* argv[]) { stretch.process(inWav, toProcess, outWav, outputSamples); double time = stopwatch.seconds(stopwatch.lap()); timeLine.add(outWav.offset, time); - timeLine.add(outWav.offset + toProcess, time); + timeLine.add(outWav.offset + outputSamples, time); inWav.offset += toProcess; outWav.offset += outputSamples; diff --git a/signalsmith-stretch.h b/signalsmith-stretch.h index 57d1aa0..39230cd 100644 --- a/signalsmith-stretch.h +++ b/signalsmith-stretch.h @@ -564,6 +564,14 @@ if (processToStep == blockProcess.steps && restoreInterval) { bool didSeek = false; Sample seekTimeFactor = 1; + bool assumePreviousBlockZero = false; + void clearPreviousBlock() { + assumePreviousBlockZero = true; + for (auto &b : channelBands) { + b.output = b.prevInput = 0; + } + } + Sample bandToFreq(Sample b) const { return stft.binToFreq(b); } @@ -581,14 +589,6 @@ if (processToStep == blockProcess.steps && restoreInterval) { return channelBands.data() + channel*bands; } - bool assumePreviousBlockZero = false; - void clearPreviousBlock() { - assumePreviousBlockZero = true; - for (auto &b : channelBands) { - b.output = b.prevInput = 0; - } - } - template Complex getBand(int channel, int index) { if (index < 0 || index >= bands) return 0; @@ -686,7 +686,7 @@ if (processToStep == blockProcess.steps && restoreInterval) { if (blockProcess.newSpectrum) { if (step < size_t(channels)) { -//if (assumePreviousBlockZero) return; + if (assumePreviousBlockZero) return; // TODO: remove this from the processing schedule int channel = int(step); auto bins = bandsForChannel(channel); @@ -741,7 +741,7 @@ if (processToStep == blockProcess.steps && restoreInterval) { } // Preliminary output prediction from phase-vocoder if (step < size_t(channels)) { -//if (assumePreviousBlockZero) return; + if (assumePreviousBlockZero) return; // TODO: remove this from the processing schedule int c = int(step); Band *bins = bandsForChannel(c); auto *predictions = predictionsForChannel(c); From 6bb1cf1268c237cc2609ac9099d371b3ee732e1b Mon Sep 17 00:00:00 2001 From: Geraint Luff Date: Fri, 17 Oct 2025 13:51:21 +0100 Subject: [PATCH 07/14] Adjust `prevInput` and `output` to account for STFT offset change when restoring --- CMakeLists.txt | 2 +- signalsmith-stretch.h | 55 ++++++++++++++++++++++++++++++++++--------- 2 files changed, 45 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6501964..c73eada 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ include(FetchContent) FetchContent_Declare( signalsmith-linear GIT_REPOSITORY https://github.com/Signalsmith-Audio/linear.git - GIT_TAG 0.2.8 + GIT_TAG 0.3.0 GIT_SHALLOW ON ) FetchContent_MakeAvailable(signalsmith-linear) diff --git a/signalsmith-stretch.h b/signalsmith-stretch.h index 39230cd..42dfa21 100644 --- a/signalsmith-stretch.h +++ b/signalsmith-stretch.h @@ -52,8 +52,8 @@ struct SignalsmithStretch { stashedOutput = stft.output; if (restoreInterval) { - stft.setInterval(stft.defaultInterval(), stft.kaiser, configuredAsymmetry); - restoreInterval = false; + stft.setInterval(restoreInterval, stft.kaiser, configuredAsymmetry); + restoreInterval = 0; } prevInputOffset = -1; @@ -74,7 +74,7 @@ if (restoreInterval) { } Sample configuredAsymmetry = 0; -bool restoreInterval = false; +int restoreInterval = 0; // Manual setup void configure(int nChannels, int blockSamples, int intervalSamples, bool splitComputation=false, Sample asymmetry=0) { @@ -188,13 +188,12 @@ restoreInterval = false; template void outputSeek(Inputs &&inputs, int inputLength) { //LOG_EXPR(outputLatency()); -//restoreInterval = true; -//int newOffset = stft.blockSamples() - outputSeekInputLatency(); +restoreInterval = stft.defaultInterval(); +//stft.setInterval(stft.blockSamples()/2); +int newOffset = stft.defaultInterval() - 1; //LOG_EXPR(newOffset); -//stft.analysisOffset(newOffset); -//stft.synthesisOffset(newOffset); - - clearPreviousBlock(); +stft.analysisOffset(newOffset); +stft.synthesisOffset(newOffset); // TODO: add fade-out parameter to avoid clicks, instead of doing a full reset stft.reset(0.01); @@ -202,6 +201,8 @@ restoreInterval = false; int surplusInput = std::max(inputLength - inputLatency(), 0); Sample playbackRate = surplusInput/Sample(outputLatency()); + if (playbackRate > 1) clearPreviousBlock(); + // Move the input position to the start of the sound int seekSamples = inputLength - surplusInput; seek(inputs, seekSamples, playbackRate); @@ -437,8 +438,40 @@ if (step == 0 && debugSynthesis) debugSynthesis(outputIndex + debugSynthesisOffs } } if (processToStep == blockProcess.steps && restoreInterval) { - stft.setInterval(stft.defaultInterval(), stft.kaiser, configuredAsymmetry); - restoreInterval = false; + int prevOffsetA = stft.analysisOffset(), prevOffsetS = stft.synthesisOffset(); +LOG_EXPR(prevOffsetA); +LOG_EXPR(prevOffsetS); + stft.setInterval(restoreInterval, stft.kaiser, configuredAsymmetry); + restoreInterval = 0; + + int diffOffsetA = int(stft.analysisOffset()) - prevOffsetA; + for (int channel = 0; channel < channels; ++channel) { + auto bins = bandsForChannel(channel); + + Complex rot = std::polar(Sample(1), bandToFreq(0)*diffOffsetA*Sample(2*M_PI)); + Sample freqStep = bandToFreq(1) - bandToFreq(0); + Complex rotStep = std::polar(Sample(1), freqStep*diffOffsetA*Sample(2*M_PI)); + + for (int b = 0; b < bands; ++b) { + auto &bin = bins[b]; + bin.prevInput = _impl::mul(bin.prevInput, rot); + rot = _impl::mul(rot, rotStep); + } + } + int diffOffsetS = int(stft.synthesisOffset()) - prevOffsetS; + for (int channel = 0; channel < channels; ++channel) { + auto bins = bandsForChannel(channel); + + Complex rot = std::polar(Sample(1), bandToFreq(0)*diffOffsetS*Sample(2*M_PI)); + Sample freqStep = bandToFreq(1) - bandToFreq(0); + Complex rotStep = std::polar(Sample(1), freqStep*diffOffsetS*Sample(2*M_PI)); + + for (int b = 0; b < bands; ++b) { + auto &bin = bins[b]; + bin.output = _impl::mul(bin.output, rot); + rot = _impl::mul(rot, rotStep); + } + } } #ifdef SIGNALSMITH_STRETCH_PROFILE_PROCESS_ENDSTEP SIGNALSMITH_STRETCH_PROFILE_PROCESS_ENDSTEP(); From 7b5545fff19466106f72d36203ffff262dae1a36 Mon Sep 17 00:00:00 2001 From: Geraint Luff Date: Fri, 17 Oct 2025 14:01:59 +0100 Subject: [PATCH 08/14] Reset STFT interval as one of the processing stages --- signalsmith-stretch.h | 95 ++++++++++++++++++++++++------------------- 1 file changed, 54 insertions(+), 41 deletions(-) diff --git a/signalsmith-stretch.h b/signalsmith-stretch.h index 42dfa21..31b7768 100644 --- a/signalsmith-stretch.h +++ b/signalsmith-stretch.h @@ -51,10 +51,10 @@ struct SignalsmithStretch { stashedInput = stft.input; stashedOutput = stft.output; -if (restoreInterval) { - stft.setInterval(restoreInterval, stft.kaiser, configuredAsymmetry); - restoreInterval = 0; -} + if (restoreInterval) { + stft.setInterval(restoreInterval, stft.kaiser, configuredAsymmetry); + restoreInterval = 0; + } prevInputOffset = -1; assumePreviousBlockZero = true; @@ -75,6 +75,7 @@ if (restoreInterval) { Sample configuredAsymmetry = 0; int restoreInterval = 0; +int diffOffsetA = 0, diffOffsetS = 0; // Manual setup void configure(int nChannels, int blockSamples, int intervalSamples, bool splitComputation=false, Sample asymmetry=0) { @@ -348,8 +349,13 @@ std::function debugSynthesis; updateProcessSpectrumSteps(); blockProcess.steps += processSpectrumSteps; - + blockProcess.steps += stft.synthesiseSteps() + 1; + + if (restoreInterval > 0) { + blockProcess.resetInterval = true; + blockProcess.steps += 1 + channels; // STFT window reset then adjusting prevInput/output + } } size_t processToStep = newBlock ? blockProcess.steps : 0; @@ -436,43 +442,47 @@ if (step == 0 && debugSynthesis) debugSynthesis(outputIndex + debugSynthesisOffs stft.synthesiseStep(step); continue; } + step -= stft.synthesiseSteps(); + + if (blockProcess.resetInterval) { + if (step-- == 0) { + int prevOffsetA = stft.analysisOffset(), prevOffsetS = stft.synthesisOffset(); + stft.setInterval(restoreInterval, stft.kaiser, configuredAsymmetry); + restoreInterval = 0; + + diffOffsetA = int(stft.analysisOffset()) - prevOffsetA; + diffOffsetS = int(stft.synthesisOffset()) - prevOffsetS; + continue; + } else if (step < size_t(channels)) { + int channel = int(step); + auto bins = bandsForChannel(channel); + if (diffOffsetA) { // adjust prevInput + Complex rot = std::polar(Sample(1), bandToFreq(0)*diffOffsetA*Sample(2*M_PI)); + Sample freqStep = bandToFreq(1) - bandToFreq(0); + Complex rotStep = std::polar(Sample(1), freqStep*diffOffsetA*Sample(2*M_PI)); + + for (int b = 0; b < bands; ++b) { + auto &bin = bins[b]; + bin.prevInput = _impl::mul(bin.prevInput, rot); + rot = _impl::mul(rot, rotStep); + } + } + if (diffOffsetS) { + Complex rot = std::polar(Sample(1), bandToFreq(0)*diffOffsetS*Sample(2*M_PI)); + Sample freqStep = bandToFreq(1) - bandToFreq(0); + Complex rotStep = std::polar(Sample(1), freqStep*diffOffsetS*Sample(2*M_PI)); + + for (int b = 0; b < bands; ++b) { + auto &bin = bins[b]; + bin.output = _impl::mul(bin.output, rot); + rot = _impl::mul(rot, rotStep); + } + } + continue; + } + step -= channels; + } } -if (processToStep == blockProcess.steps && restoreInterval) { - int prevOffsetA = stft.analysisOffset(), prevOffsetS = stft.synthesisOffset(); -LOG_EXPR(prevOffsetA); -LOG_EXPR(prevOffsetS); - stft.setInterval(restoreInterval, stft.kaiser, configuredAsymmetry); - restoreInterval = 0; - - int diffOffsetA = int(stft.analysisOffset()) - prevOffsetA; - for (int channel = 0; channel < channels; ++channel) { - auto bins = bandsForChannel(channel); - - Complex rot = std::polar(Sample(1), bandToFreq(0)*diffOffsetA*Sample(2*M_PI)); - Sample freqStep = bandToFreq(1) - bandToFreq(0); - Complex rotStep = std::polar(Sample(1), freqStep*diffOffsetA*Sample(2*M_PI)); - - for (int b = 0; b < bands; ++b) { - auto &bin = bins[b]; - bin.prevInput = _impl::mul(bin.prevInput, rot); - rot = _impl::mul(rot, rotStep); - } - } - int diffOffsetS = int(stft.synthesisOffset()) - prevOffsetS; - for (int channel = 0; channel < channels; ++channel) { - auto bins = bandsForChannel(channel); - - Complex rot = std::polar(Sample(1), bandToFreq(0)*diffOffsetS*Sample(2*M_PI)); - Sample freqStep = bandToFreq(1) - bandToFreq(0); - Complex rotStep = std::polar(Sample(1), freqStep*diffOffsetS*Sample(2*M_PI)); - - for (int b = 0; b < bands; ++b) { - auto &bin = bins[b]; - bin.output = _impl::mul(bin.output, rot); - rot = _impl::mul(rot, rotStep); - } - } -} #ifdef SIGNALSMITH_STRETCH_PROFILE_PROCESS_ENDSTEP SIGNALSMITH_STRETCH_PROFILE_PROCESS_ENDSTEP(); #endif @@ -571,6 +581,9 @@ LOG_EXPR(prevOffsetS); bool mappedFrequencies = false; bool processFormants = false; Sample timeFactor; + + // If our previous block had an unusual offset/shape, reset and adjust + bool resetInterval = false; } blockProcess; using Complex = std::complex; From 47cd56b305ff4a0af92505dd4ef5145c750329a7 Mon Sep 17 00:00:00 2001 From: Geraint Luff Date: Fri, 17 Oct 2025 15:02:46 +0100 Subject: [PATCH 09/14] Swap outputs before reversing when computing `.outputSeek()` --- cmd/main.cpp | 12 +++++++----- signalsmith-stretch.h | 43 +++++++++++++++++++++++++++---------------- 2 files changed, 34 insertions(+), 21 deletions(-) diff --git a/cmd/main.cpp b/cmd/main.cpp index 710a284..3cd1331 100644 --- a/cmd/main.cpp +++ b/cmd/main.cpp @@ -52,14 +52,14 @@ int main(int argc, char* argv[]) { stretch.setFormantSemitones(formants, formantComp); stretch.setFormantBase(formantBase/inWav.sampleRate); - signalsmith::plot::Figure figure; - auto writeLater = figure.writeLater(outputWav + "-blocks.svg"); + bool plotBlocks = true; + signalsmith::plot::Figure blocksFigure; size_t plotBlockCounter = 0; - { - auto &inputPlot = figure(0, 0).plot(800, 150); + if (plotBlocks) { + auto &inputPlot = blocksFigure(0, 0).plot(800, 150); inputPlot.x.major(0); inputPlot.y.major(0); - auto &outputPlot = figure(0, 1).plot(800, 150); + auto &outputPlot = blocksFigure(0, 1).plot(800, 150); outputPlot.x.major(0); outputPlot.y.major(0); @@ -157,5 +157,7 @@ int main(int argc, char* argv[]) { stretch.flush(outWav, outputLength - outputIndex); outWav.offset = 0; + if (plotBlocks) blocksFigure.write(outputWav + "-blocks.svg"); + if (!outWav.write(outputWav).warn()) args.errorExit("failed to write WAV"); } diff --git a/signalsmith-stretch.h b/signalsmith-stretch.h index 31b7768..fc9af43 100644 --- a/signalsmith-stretch.h +++ b/signalsmith-stretch.h @@ -180,24 +180,34 @@ restoreInterval = false; return int(stft.blockSamples() + stft.defaultInterval()); } -//int outputSeekInputLatency() const { -// return stft.blockSamples()*0.95; -//} - // Moves the input position *and* pre-calculates some output, so that the next samples returned from `.process()` are aligned to the beginning of the sample. // The time-stretch rate is inferred from `inputLength`, so use `.outputSeekLength()` to get a correct value for that. template - void outputSeek(Inputs &&inputs, int inputLength) { -//LOG_EXPR(outputLatency()); -restoreInterval = stft.defaultInterval(); -//stft.setInterval(stft.blockSamples()/2); -int newOffset = stft.defaultInterval() - 1; -//LOG_EXPR(newOffset); -stft.analysisOffset(newOffset); -stft.synthesisOffset(newOffset); + void outputSeek(Inputs &&inputs, int inputLength, Sample firstBlockAsymmetry=0.75) { + if (firstBlockAsymmetry >= 0) { + restoreInterval = stft.defaultInterval(); + // Warped sine window + + size_t windowOffset = stft.blockSamples()*(1 - firstBlockAsymmetry)/2; + stft.analysisOffset(windowOffset); + stft.synthesisOffset(windowOffset); + + for (size_t i = 0; i < stft.blockSamples(); ++i) { + Sample r = (i + Sample(0.5))/stft.blockSamples(); + // Warp as two linear segments + if (r < (1 - firstBlockAsymmetry)/2) { + r /= (1 - firstBlockAsymmetry); + } else { + r = 1 + (r - 1)/(firstBlockAsymmetry + 1); + } + auto w = (1 - std::cos(r*Sample(2*M_PI)))/2; + stft.analysisWindow()[i] = w; + stft.synthesisWindow()[i] = w; + } + } // TODO: add fade-out parameter to avoid clicks, instead of doing a full reset - stft.reset(0.01); + stft.reset(0.1); // Assume we've been handed enough surplus input to produce `outputLatency()` samples of pre-roll int surplusInput = std::max(inputLength - inputLatency(), 0); Sample playbackRate = surplusInput/Sample(outputLatency()); @@ -229,8 +239,10 @@ debugAnalysisOffset = debugSynthesisOffset = 0; // put the thing down, flip it and reverse it for (auto &v : tmpPreRollBuffer) v = -v; for (int c = 0; c < channels; ++c) { - std::reverse(preRollOutput[c], preRollOutput[c] + preRollOutput.length); - stft.addOutput(c, preRollOutput.length, preRollOutput[c]); + std::reverse(preRollOutput[c], preRollOutput[c] + preRollLength); + if (_splitComputation) stashedOutput.swap(stft.output); + stft.addOutput(c, preRollLength, preRollOutput[c]); + if (_splitComputation) stashedOutput.swap(stft.output); } } int outputSeekLength(Sample playbackRate) const { @@ -787,7 +799,6 @@ if (step == 0 && debugSynthesis) debugSynthesis(outputIndex + debugSynthesisOffs } // Preliminary output prediction from phase-vocoder if (step < size_t(channels)) { - if (assumePreviousBlockZero) return; // TODO: remove this from the processing schedule int c = int(step); Band *bins = bandsForChannel(c); auto *predictions = predictionsForChannel(c); From e711ff24303e9d668baef2c11bee7e03bd7dbe59 Mon Sep 17 00:00:00 2001 From: Geraint Luff Date: Fri, 17 Oct 2025 17:39:16 +0100 Subject: [PATCH 10/14] Refactor stored config into its own object --- cmd/main.cpp | 36 +------------------- signalsmith-stretch.h | 78 ++++++++++++++++++++----------------------- 2 files changed, 37 insertions(+), 77 deletions(-) diff --git a/cmd/main.cpp b/cmd/main.cpp index 3cd1331..c4079b9 100644 --- a/cmd/main.cpp +++ b/cmd/main.cpp @@ -52,39 +52,6 @@ int main(int argc, char* argv[]) { stretch.setFormantSemitones(formants, formantComp); stretch.setFormantBase(formantBase/inWav.sampleRate); - bool plotBlocks = true; - signalsmith::plot::Figure blocksFigure; - size_t plotBlockCounter = 0; - if (plotBlocks) { - auto &inputPlot = blocksFigure(0, 0).plot(800, 150); - inputPlot.x.major(0); - inputPlot.y.major(0); - auto &outputPlot = blocksFigure(0, 1).plot(800, 150); - outputPlot.x.major(0); - outputPlot.y.major(0); - - stretch.debugAnalysis = [&](int inputOffset, const float *window, size_t windowOffset, bool isPrevious){ - if (plotBlockCounter > 10) return; - int blockSamples = stretch.blockSamples(); - auto &line = inputPlot.line(plotBlockCounter); - for (int i = 0; i < blockSamples; ++i) { - line.add(inputOffset - blockSamples + i + int(inWav.offset), window[i]); - } - line.marker(inputOffset - blockSamples + int(windowOffset) + int(inWav.offset), window[windowOffset]); - }; - stretch.debugSynthesis = [&](int outputOffset, const float *window, size_t windowOffset){ - if (plotBlockCounter > 10) return; - int blockSamples = stretch.blockSamples(); - auto &line = outputPlot.line(plotBlockCounter); - for (int i = 0; i < blockSamples; ++i) { - line.add(outputOffset + i + int(outWav.offset), window[i]); - } - line.marker(outputOffset + int(windowOffset) + int(outWav.offset), window[windowOffset]); - - ++plotBlockCounter; - }; - } - /* Since the WAV helper allows sample access like `wav[c][index]`, we could just call: stretch.exact(inWav, int(inputLength), outWav, int(outputLength)); @@ -118,6 +85,7 @@ int main(int argc, char* argv[]) { if (processChunkSize <= 0) { stretch.process(inWav, inputIndex - seekLength, outWav, outputIndex); } else { + // Plot computation time for each chunk signalsmith::plot::Plot2D timePlot(500, 200); timePlot.x.major(0); timePlot.y.major(0); @@ -157,7 +125,5 @@ int main(int argc, char* argv[]) { stretch.flush(outWav, outputLength - outputIndex); outWav.offset = 0; - if (plotBlocks) blocksFigure.write(outputWav + "-blocks.svg"); - if (!outWav.write(outputWav).warn()) args.errorExit("failed to write WAV"); } diff --git a/signalsmith-stretch.h b/signalsmith-stretch.h index fc9af43..3540637 100644 --- a/signalsmith-stretch.h +++ b/signalsmith-stretch.h @@ -51,9 +51,9 @@ struct SignalsmithStretch { stashedInput = stft.input; stashedOutput = stft.output; - if (restoreInterval) { - stft.setInterval(restoreInterval, stft.kaiser, configuredAsymmetry); - restoreInterval = 0; + if (restoreConfig.pending()) { + stft.setInterval(restoreConfig.interval, stft.kaiser, restoreConfig.asymmetry); + restoreConfig = {}; } prevInputOffset = -1; @@ -73,18 +73,17 @@ struct SignalsmithStretch { configure(nChannels, sampleRate*0.1, sampleRate*0.04, splitComputation); } -Sample configuredAsymmetry = 0; -int restoreInterval = 0; -int diffOffsetA = 0, diffOffsetS = 0; - // Manual setup void configure(int nChannels, int blockSamples, int intervalSamples, bool splitComputation=false, Sample asymmetry=0) { _splitComputation = splitComputation; channels = nChannels; asymmetry *= 1 - 2.0*intervalSamples/blockSamples; // maximum asymmetry gives latency of two intervals stft.configure(channels, channels, blockSamples, intervalSamples + 1); -configuredAsymmetry = asymmetry; -restoreInterval = false; + + restoreConfig = {}; + restoreConfig.interval = intervalSamples; + restoreConfig.asymmetry = asymmetry; + stft.setInterval(intervalSamples, stft.kaiser, asymmetry); stft.reset(0.1); stashedInput = stft.input; @@ -185,24 +184,23 @@ restoreInterval = false; template void outputSeek(Inputs &&inputs, int inputLength, Sample firstBlockAsymmetry=0.75) { if (firstBlockAsymmetry >= 0) { - restoreInterval = stft.defaultInterval(); - // Warped sine window + restoreConfig.pending = true; + restoreConfig.interval = stft.defaultInterval(); size_t windowOffset = stft.blockSamples()*(1 - firstBlockAsymmetry)/2; + size_t windowEnd = stft.synthesisOffset() + stft.defaultInterval(); stft.analysisOffset(windowOffset); stft.synthesisOffset(windowOffset); + // Sine window, warped as two linear segments for (size_t i = 0; i < stft.blockSamples(); ++i) { - Sample r = (i + Sample(0.5))/stft.blockSamples(); - // Warp as two linear segments - if (r < (1 - firstBlockAsymmetry)/2) { - r /= (1 - firstBlockAsymmetry); + Sample r = i + Sample(0.5); + if (i < windowOffset) { + r = r/stft.blockSamples(); } else { - r = 1 + (r - 1)/(firstBlockAsymmetry + 1); + r = (r - windowOffset)/(windowEnd - windowOffset); } - auto w = (1 - std::cos(r*Sample(2*M_PI)))/2; - stft.analysisWindow()[i] = w; - stft.synthesisWindow()[i] = w; + stft.analysisWindow()[i] = stft.synthesisWindow()[i] = (1 - std::cos(r*Sample(2*M_PI)))/2; } } @@ -231,10 +229,7 @@ restoreInterval = false; // Use the surplus input to produce pre-roll output OffsetIO offsetInput{inputs, seekSamples}; -debugAnalysisOffset = seekSamples; -debugSynthesisOffset = -preRollLength; process(offsetInput, surplusInput, preRollOutput, preRollOutput.length); -debugAnalysisOffset = debugSynthesisOffset = 0; // put the thing down, flip it and reverse it for (auto &v : tmpPreRollBuffer) v = -v; @@ -249,10 +244,6 @@ debugAnalysisOffset = debugSynthesisOffset = 0; return inputLatency() + playbackRate*outputLatency(); } -int debugAnalysisOffset = 0, debugSynthesisOffset = 0; -std::function debugAnalysis; -std::function debugSynthesis; - template void process(Inputs &&inputs, int inputSamples, Outputs &&outputs, int outputSamples) { #ifdef SIGNALSMITH_STRETCH_PROFILE_PROCESS_START @@ -364,7 +355,7 @@ std::function debugSynthesis; blockProcess.steps += stft.synthesiseSteps() + 1; - if (restoreInterval > 0) { + if (restoreConfig.pending > 0) { blockProcess.resetInterval = true; blockProcess.steps += 1 + channels; // STFT window reset then adjusting prevInput/output } @@ -383,7 +374,6 @@ std::function debugSynthesis; #endif if (blockProcess.newSpectrum) { if (blockProcess.reanalysePrev) { -if (step == 0 && debugAnalysis) debugAnalysis(prevInputOffset - stft.defaultInterval() + debugAnalysisOffset, stft.analysisWindow(), stft.analysisOffset(), true); // analyse past input if (step < stft.analyseSteps()) { stashedInput.swap(stft.input); @@ -406,8 +396,6 @@ if (step == 0 && debugAnalysis) debugAnalysis(prevInputOffset - stft.defaultInte step -= 1; } -if (step == 0 && debugAnalysis) debugAnalysis(prevInputOffset + debugAnalysisOffset, stft.analysisWindow(), stft.analysisOffset(), false); - // Analyse latest (stashed) input if (step < stft.analyseSteps()) { stashedInput.swap(stft.input); @@ -450,7 +438,6 @@ if (step == 0 && debugAnalysis) debugAnalysis(prevInputOffset + debugAnalysisOff step -= 1; if (step < stft.synthesiseSteps()) { -if (step == 0 && debugSynthesis) debugSynthesis(outputIndex + debugSynthesisOffset, stft.synthesisWindow(), stft.synthesisOffset()); stft.synthesiseStep(step); continue; } @@ -459,19 +446,19 @@ if (step == 0 && debugSynthesis) debugSynthesis(outputIndex + debugSynthesisOffs if (blockProcess.resetInterval) { if (step-- == 0) { int prevOffsetA = stft.analysisOffset(), prevOffsetS = stft.synthesisOffset(); - stft.setInterval(restoreInterval, stft.kaiser, configuredAsymmetry); - restoreInterval = 0; - - diffOffsetA = int(stft.analysisOffset()) - prevOffsetA; - diffOffsetS = int(stft.synthesisOffset()) - prevOffsetS; + stft.setInterval(restoreConfig.interval, stft.kaiser, restoreConfig.asymmetry); + restoreConfig.pending = false; + + restoreConfig.diffOffsetA = int(stft.analysisOffset()) - prevOffsetA; + restoreConfig.diffOffsetS = int(stft.synthesisOffset()) - prevOffsetS; continue; } else if (step < size_t(channels)) { int channel = int(step); auto bins = bandsForChannel(channel); - if (diffOffsetA) { // adjust prevInput - Complex rot = std::polar(Sample(1), bandToFreq(0)*diffOffsetA*Sample(2*M_PI)); + if (restoreConfig.diffOffsetA) { // adjust prevInput + Complex rot = std::polar(Sample(1), bandToFreq(0)*restoreConfig.diffOffsetA*Sample(2*M_PI)); Sample freqStep = bandToFreq(1) - bandToFreq(0); - Complex rotStep = std::polar(Sample(1), freqStep*diffOffsetA*Sample(2*M_PI)); + Complex rotStep = std::polar(Sample(1), freqStep*restoreConfig.diffOffsetA*Sample(2*M_PI)); for (int b = 0; b < bands; ++b) { auto &bin = bins[b]; @@ -479,10 +466,10 @@ if (step == 0 && debugSynthesis) debugSynthesis(outputIndex + debugSynthesisOffs rot = _impl::mul(rot, rotStep); } } - if (diffOffsetS) { - Complex rot = std::polar(Sample(1), bandToFreq(0)*diffOffsetS*Sample(2*M_PI)); + if (restoreConfig.diffOffsetS) { // adjust output + Complex rot = std::polar(Sample(1), bandToFreq(0)*restoreConfig.diffOffsetS*Sample(2*M_PI)); Sample freqStep = bandToFreq(1) - bandToFreq(0); - Complex rotStep = std::polar(Sample(1), freqStep*diffOffsetS*Sample(2*M_PI)); + Complex rotStep = std::polar(Sample(1), freqStep*restoreConfig.diffOffsetS*Sample(2*M_PI)); for (int b = 0; b < bands; ++b) { auto &bin = bins[b]; @@ -616,6 +603,13 @@ if (step == 0 && debugSynthesis) debugSynthesis(outputIndex + debugSynthesisOffs typename STFT::Output stashedOutput; std::vector tmpProcessBuffer, tmpPreRollBuffer; + + struct { + bool pending = false; + Sample asymmetry = 0; + int interval = 0; + int diffOffsetA = 0, diffOffsetS = 0; + } restoreConfig; int channels = 0, bands = 0; int prevInputOffset = -1; From f5bf622ff8191d9530585ddef22ce715c7398c4b Mon Sep 17 00:00:00 2001 From: Geraint Luff Date: Tue, 21 Oct 2025 14:53:34 +0100 Subject: [PATCH 11/14] Fix alignment issue when reducing length (playback rate > 1) --- cmd/Makefile | 2 +- cmd/main.cpp | 38 +++++++++++-------------- signalsmith-stretch.h | 65 ++++++++++++++++++++++++------------------- 3 files changed, 53 insertions(+), 52 deletions(-) diff --git a/cmd/Makefile b/cmd/Makefile index 84eb765..9f0803f 100644 --- a/cmd/Makefile +++ b/cmd/Makefile @@ -1,6 +1,6 @@ all: out/stretch -DEV_FLAGS := --semitones=4 --time=0.667 +DEV_FLAGS := --semitones=4 --time=0.667 --asymmetry=0.5 dev: out/stretch ./out/stretch inputs/dev.wav out/dev-2048.wav --process-chunk=2048 $(DEV_FLAGS) diff --git a/cmd/main.cpp b/cmd/main.cpp index c4079b9..a875212 100644 --- a/cmd/main.cpp +++ b/cmd/main.cpp @@ -67,23 +67,17 @@ int main(int argc, char* argv[]) { // At this point, the next output samples we get will correspond to the beginning of the audio file. // We're going to process until *just* before the end of the audio file (so we can get a tidier end using `.flush()`. - int outputIndex = outputLength - stretch.intervalSamples(); + int outputMainBlockLength = outputLength - stretch.intervalSamples(); + // And this is how much input we'll need for that + int inputMainBlockLength = outputMainBlockLength/time; - // Stretch's internal output position is slightly ahead of the output samples we get - int outputPos = outputIndex + stretch.outputLatency(); - // Time-map: where do we want the input position to be at that moment? - int inputPos = std::round(outputPos/time); - // And therefore which input samples do we need to be supplying? - int inputIndex = inputPos + stretch.inputLatency(); - - // In this particular case, our `inputPos` will be at the end of the file - // and `inputIndex` will be beyond the end, so we pad with 0s to have enough input - inWav.resize(inputIndex); + // This zero-pads the input, since we'll go past the end of it + inWav.resize(inputMainBlockLength + seekLength); - // OK, go for it + // Main block of processing inWav.offset = seekLength; if (processChunkSize <= 0) { - stretch.process(inWav, inputIndex - seekLength, outWav, outputIndex); + stretch.process(inWav, inputMainBlockLength, outWav, outputMainBlockLength); } else { // Plot computation time for each chunk signalsmith::plot::Plot2D timePlot(500, 200); @@ -100,19 +94,19 @@ int main(int argc, char* argv[]) { timeLineSeek.add(inWav.offset, 0); float residue = 0.f; - while (inWav.offset < size_t(inputIndex)) { - int toProcess = std::min(processChunkSize, inputIndex - inWav.offset); - float outputPrecise = toProcess * time + residue; - int outputSamples = std::round(outputPrecise); - residue = outputPrecise - outputSamples; + while (outWav.offset < size_t(outputMainBlockLength)) { + int outputSamples = std::min(processChunkSize, outputMainBlockLength - outWav.offset); + float inputPrecise = outputSamples/time + residue; + int inputSamples = std::round(inputPrecise); + residue = inputPrecise - inputSamples; stopwatch.startLap(); - stretch.process(inWav, toProcess, outWav, outputSamples); + stretch.process(inWav, inputSamples, outWav, outputSamples); double time = stopwatch.seconds(stopwatch.lap()); timeLine.add(outWav.offset, time); timeLine.add(outWav.offset + outputSamples, time); - inWav.offset += toProcess; + inWav.offset += inputSamples; outWav.offset += outputSamples; } @@ -121,8 +115,8 @@ int main(int argc, char* argv[]) { } // And as promised, get the last bits using `.flush()`, which does some extra stuff to avoid introducing clicks. - outWav.offset = outputIndex; - stretch.flush(outWav, outputLength - outputIndex); + outWav.offset = outputMainBlockLength; + stretch.flush(outWav, outputLength - outputMainBlockLength); outWav.offset = 0; if (!outWav.write(outputWav).warn()) args.errorExit("failed to write WAV"); diff --git a/signalsmith-stretch.h b/signalsmith-stretch.h index 3540637..34c6948 100644 --- a/signalsmith-stretch.h +++ b/signalsmith-stretch.h @@ -182,40 +182,46 @@ struct SignalsmithStretch { // Moves the input position *and* pre-calculates some output, so that the next samples returned from `.process()` are aligned to the beginning of the sample. // The time-stretch rate is inferred from `inputLength`, so use `.outputSeekLength()` to get a correct value for that. template - void outputSeek(Inputs &&inputs, int inputLength, Sample firstBlockAsymmetry=0.75) { - if (firstBlockAsymmetry >= 0) { - restoreConfig.pending = true; - restoreConfig.interval = stft.defaultInterval(); - - size_t windowOffset = stft.blockSamples()*(1 - firstBlockAsymmetry)/2; - size_t windowEnd = stft.synthesisOffset() + stft.defaultInterval(); - stft.analysisOffset(windowOffset); - stft.synthesisOffset(windowOffset); - - // Sine window, warped as two linear segments - for (size_t i = 0; i < stft.blockSamples(); ++i) { - Sample r = i + Sample(0.5); - if (i < windowOffset) { - r = r/stft.blockSamples(); - } else { - r = (r - windowOffset)/(windowEnd - windowOffset); - } - stft.analysisWindow()[i] = stft.synthesisWindow()[i] = (1 - std::cos(r*Sample(2*M_PI)))/2; + void outputSeek(Inputs &&inputs, int inputLength, Sample firstBlockAsymmetry=0.5) { + restoreConfig.pending = true; + restoreConfig.interval = stft.defaultInterval(); + + Sample playbackRate = std::max(inputLength - inputLatency(), 0)/Sample(outputLatency()); + + // Place the next (restored-window) block some time in the future + Sample nextBlockOutputStart = stft.defaultInterval()*firstBlockAsymmetry; + Sample nextBlockOutputPos = nextBlockOutputStart + stft.synthesisOffset(); + // The initial block starts some time before time 0 + Sample firstBlockOutputStart = nextBlockOutputStart - stft.defaultInterval(); + // Set the initial block's window so it's centred on 0 + // Use that as the input latency + size_t windowOffset = -firstBlockOutputStart; + size_t windowEnd = int(nextBlockOutputPos); // first block ends at centre of next block + stft.analysisOffset(windowOffset); + stft.synthesisOffset(windowOffset); + + // Sine window, warped as two linear segments + for (size_t i = 0; i < stft.blockSamples(); ++i) { + Sample r = i + Sample(0.5); + if (i < windowOffset) { + r = r/windowOffset/2; + } else if (r < windowEnd) { + r = (1 + (r - windowOffset)/(windowEnd - windowOffset))/2; + } else { + r = 1; } + stft.analysisWindow()[i] = stft.synthesisWindow()[i] = (1 - std::cos(r*Sample(2*M_PI)))/2; } // TODO: add fade-out parameter to avoid clicks, instead of doing a full reset - stft.reset(0.1); - // Assume we've been handed enough surplus input to produce `outputLatency()` samples of pre-roll - int surplusInput = std::max(inputLength - inputLatency(), 0); - Sample playbackRate = surplusInput/Sample(outputLatency()); - - if (playbackRate > 1) clearPreviousBlock(); + stft.reset(0.01); + clearPreviousBlock(); + auto seekSamples = inputLatency(); // Move the input position to the start of the sound - int seekSamples = inputLength - surplusInput; seek(inputs, seekSamples, playbackRate); - + + // Enough output to reach the start of the sound auto preRollLength = int(outputLatency()); tmpPreRollBuffer.resize(preRollLength*channels); struct BufferOutput { @@ -229,8 +235,9 @@ struct SignalsmithStretch { // Use the surplus input to produce pre-roll output OffsetIO offsetInput{inputs, seekSamples}; - process(offsetInput, surplusInput, preRollOutput, preRollOutput.length); - + int preRollInputSamples = std::max(inputLength - seekSamples, 0); + process(offsetInput, preRollInputSamples, preRollOutput, preRollLength); + // put the thing down, flip it and reverse it for (auto &v : tmpPreRollBuffer) v = -v; for (int c = 0; c < channels; ++c) { From 015026392a7f8057d4886bf47b8370ad89c606f2 Mon Sep 17 00:00:00 2001 From: Geraint Luff Date: Tue, 21 Oct 2025 16:03:40 +0100 Subject: [PATCH 12/14] Make `.flush()` clearer --- signalsmith-stretch.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/signalsmith-stretch.h b/signalsmith-stretch.h index 34c6948..809fd15 100644 --- a/signalsmith-stretch.h +++ b/signalsmith-stretch.h @@ -566,12 +566,15 @@ struct SignalsmithStretch { outputSeek(inputs, seekLength); - int outputIndex = outputSamples - seekLength/playbackRate; OffsetIO offsetInput{inputs, seekLength}; - process(offsetInput, inputSamples - seekLength, outputs, outputIndex); + int inputMainBlock = inputSamples - seekLength; + int outputMainBlock = inputMainBlock/playbackRate; + // Ordinary process calls, as far as the input goes + process(offsetInput, inputMainBlock, outputs, outputMainBlock); OffsetIO offsetOutput{outputs, outputIndex}; - flush(offsetOutput, outputSamples - outputIndex, playbackRate); + // We've run out of input - this gets the last chunk of output (cheaply) + flush(offsetOutput, outputSamples - outputMainBlock, playbackRate); return true; } From b0ece89d74f9a9775a3781f9095d1b869b1f726b Mon Sep 17 00:00:00 2001 From: Geraint Luff Date: Mon, 17 Nov 2025 14:11:06 +0000 Subject: [PATCH 13/14] Fix `.reset()` and `.exact()` --- signalsmith-stretch.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/signalsmith-stretch.h b/signalsmith-stretch.h index 809fd15..98eba45 100644 --- a/signalsmith-stretch.h +++ b/signalsmith-stretch.h @@ -51,7 +51,7 @@ struct SignalsmithStretch { stashedInput = stft.input; stashedOutput = stft.output; - if (restoreConfig.pending()) { + if (restoreConfig.pending) { stft.setInterval(restoreConfig.interval, stft.kaiser, restoreConfig.asymmetry); restoreConfig = {}; } @@ -572,7 +572,7 @@ struct SignalsmithStretch { // Ordinary process calls, as far as the input goes process(offsetInput, inputMainBlock, outputs, outputMainBlock); - OffsetIO offsetOutput{outputs, outputIndex}; + OffsetIO offsetOutput{outputs, outputMainBlock}; // We've run out of input - this gets the last chunk of output (cheaply) flush(offsetOutput, outputSamples - outputMainBlock, playbackRate); return true; From 14f83ada78c772683c31c6e3f0c0497a4fbdb0df Mon Sep 17 00:00:00 2001 From: Geraint Date: Sun, 23 Nov 2025 17:03:18 +0000 Subject: [PATCH 14/14] Store input/output latency, for `.outputSeek()` consistency --- signalsmith-stretch.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/signalsmith-stretch.h b/signalsmith-stretch.h index 98eba45..8bdbe89 100644 --- a/signalsmith-stretch.h +++ b/signalsmith-stretch.h @@ -40,10 +40,10 @@ struct SignalsmithStretch { // The difference between the internal position (centre of a block) and the input samples you're supplying int inputLatency() const { - return int(stft.analysisLatency()); + return configuredInputLatency; } int outputLatency() const { - return int(stft.synthesisLatency() + _splitComputation*stft.defaultInterval()); + return configuredOutputLatency; } void reset() { @@ -88,6 +88,8 @@ struct SignalsmithStretch { stft.reset(0.1); stashedInput = stft.input; stashedOutput = stft.output; + configuredInputLatency = int(stft.analysisLatency()); + configuredOutputLatency = int(stft.synthesisLatency() + _splitComputation*stft.defaultInterval()); bands = int(stft.bands()); channelBands.assign(bands*channels, Band()); @@ -186,7 +188,7 @@ struct SignalsmithStretch { restoreConfig.pending = true; restoreConfig.interval = stft.defaultInterval(); - Sample playbackRate = std::max(inputLength - inputLatency(), 0)/Sample(outputLatency()); + Sample playbackRate = std::max(inputLength - configuredInputLatency, 0)/Sample(configuredOutputLatency); // Place the next (restored-window) block some time in the future Sample nextBlockOutputStart = stft.defaultInterval()*firstBlockAsymmetry; @@ -217,7 +219,7 @@ struct SignalsmithStretch { stft.reset(0.01); clearPreviousBlock(); - auto seekSamples = inputLatency(); + auto seekSamples = int(stft.analysisLatency()); // Move the input position to the start of the sound seek(inputs, seekSamples, playbackRate); @@ -248,7 +250,7 @@ struct SignalsmithStretch { } } int outputSeekLength(Sample playbackRate) const { - return inputLatency() + playbackRate*outputLatency(); + return configuredInputLatency + playbackRate*configuredOutputLatency; } template @@ -611,6 +613,7 @@ struct SignalsmithStretch { STFT stft; typename STFT::Input stashedInput; typename STFT::Output stashedOutput; + int configuredInputLatency = 0, configuredOutputLatency = 0; std::vector tmpProcessBuffer, tmpPreRollBuffer;