From fcaaf19b74405d32e6b3777a2d3ec16e25f3ea55 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Wed, 21 Jan 2026 08:20:13 +0000 Subject: [PATCH] Parallelize CustomWaveform and stbi_write_png using OpenMP Implemented OpenMP parallelization for: 1. CustomWaveform::Draw: Added a parallel path for waveforms without per-point code, replicating the default vertex generation logic to avoid shared state contention. 2. CustomWaveform::SmoothWave: Parallelized the smoothing loop by calculating dependency indices locally. 3. stbi_write_png (SOIL2): Parallelized the PNG line filtering loop, ensuring thread-local scratch buffers. Also updated vendor/SOIL2/CMakeLists.txt to link OpenMP::OpenMP_C when enabled. --- .../MilkdropPreset/CustomWaveform.cpp | 71 ++++++++++++++----- vendor/SOIL2/CMakeLists.txt | 5 ++ vendor/SOIL2/src/SOIL2/stb_image_write.h | 53 ++++++++++++++ 3 files changed, 112 insertions(+), 17 deletions(-) diff --git a/src/libprojectM/MilkdropPreset/CustomWaveform.cpp b/src/libprojectM/MilkdropPreset/CustomWaveform.cpp index 3ca7bbe75..323c1405f 100644 --- a/src/libprojectM/MilkdropPreset/CustomWaveform.cpp +++ b/src/libprojectM/MilkdropPreset/CustomWaveform.cpp @@ -8,6 +8,10 @@ #include #include +#ifdef PRJM_ENABLE_OPENMP +#include +#endif + namespace libprojectM { namespace MilkdropPreset { @@ -141,20 +145,49 @@ void CustomWaveform::Draw(const PerFrameContext& presetPerFrameContext) std::vector colors(sampleCount); float const sampleMultiplicator = sampleCount > 1 ? 1.0f / static_cast(sampleCount - 1) : 0.0f; - for (int sample = 0; sample < sampleCount; sample++) + + if (!m_perPointContext.perPointCodeHandle) + { + const float invAspectX = m_presetState.renderContext.invAspectX; + const float invAspectY = m_presetState.renderContext.invAspectY; + const float r = static_cast(*m_perFrameContext.r); + const float g = static_cast(*m_perFrameContext.g); + const float b = static_cast(*m_perFrameContext.b); + const float a = static_cast(*m_perFrameContext.a); + +#ifdef PRJM_ENABLE_OPENMP +#pragma omp parallel for schedule(static) +#endif + for (int sample = 0; sample < sampleCount; sample++) + { + float const value1 = sampleDataL[sample]; + float const value2 = sampleDataR[sample]; + + // x = 0.5 + value1, y = 0.5 + value2 + // x' = (x * 2.0 - 1.0) * invAspectX = ((0.5 + value1) * 2.0 - 1.0) * invAspectX = 2.0 * value1 * invAspectX + // y' = (y * -2.0 + 1.0) * invAspectY = ((0.5 + value2) * -2.0 + 1.0) * invAspectY = -2.0 * value2 * invAspectY + + points[sample] = Renderer::Point(value1 * 2.0f * invAspectX, value2 * -2.0f * invAspectY); + colors[sample] = Renderer::Color::Modulo(Renderer::Color(r, g, b, a)); + } + } + else { - float const sampleIndex = static_cast(sample) * sampleMultiplicator; - LoadPerPointEvaluationVariables(sampleIndex, sampleDataL[sample], sampleDataR[sample]); + for (int sample = 0; sample < sampleCount; sample++) + { + float const sampleIndex = static_cast(sample) * sampleMultiplicator; + LoadPerPointEvaluationVariables(sampleIndex, sampleDataL[sample], sampleDataR[sample]); - m_perPointContext.ExecutePerPointCode(); + m_perPointContext.ExecutePerPointCode(); - points[sample] = Renderer::Point(static_cast((*m_perPointContext.x * 2.0 - 1.0) * m_presetState.renderContext.invAspectX), - static_cast((*m_perPointContext.y * -2.0 + 1.0) * m_presetState.renderContext.invAspectY)); + points[sample] = Renderer::Point(static_cast((*m_perPointContext.x * 2.0 - 1.0) * m_presetState.renderContext.invAspectX), + static_cast((*m_perPointContext.y * -2.0 + 1.0) * m_presetState.renderContext.invAspectY)); - colors[sample] = Renderer::Color::Modulo(Renderer::Color(static_cast(*m_perPointContext.r), - static_cast(*m_perPointContext.g), - static_cast(*m_perPointContext.b), - static_cast(*m_perPointContext.a))); + colors[sample] = Renderer::Color::Modulo(Renderer::Color(static_cast(*m_perPointContext.r), + static_cast(*m_perPointContext.g), + static_cast(*m_perPointContext.b), + static_cast(*m_perPointContext.a))); + } } SmoothWave(points, colors); @@ -270,18 +303,21 @@ void CustomWaveform::SmoothWave(const std::vector& points, cons constexpr float inverseSum{1.0f / (c1 + c2 + c3 + c4)}; size_t outputIndex = 0; - size_t iBelow = 0; - size_t iAbove2 = 1; - size_t vertexCount = points.size(); auto& outVertices = m_mesh.Vertices(); auto& outColors = m_mesh.Colors(); +#ifdef PRJM_ENABLE_OPENMP +#pragma omp parallel for schedule(static) +#endif for (size_t inputIndex = 0; inputIndex < vertexCount - 1; inputIndex++) { - size_t const iAbove = iAbove2; - iAbove2 = std::min(vertexCount - 1, inputIndex + 2); + size_t const outputIndex = inputIndex * 2; + size_t const iBelow = (inputIndex == 0) ? 0 : inputIndex - 1; + size_t const iAbove = std::min(vertexCount - 1, inputIndex + 1); + size_t const iAbove2 = std::min(vertexCount - 1, inputIndex + 2); + outVertices[outputIndex] = points[inputIndex]; outColors[outputIndex] = colors[inputIndex]; outColors[outputIndex + 1] = colors[inputIndex]; @@ -289,10 +325,11 @@ void CustomWaveform::SmoothWave(const std::vector& points, cons smoothedPoint = points[inputIndex]; smoothedPoint.SetX((c1 * points[iBelow].X() + c2 * points[inputIndex].X() + c3 * points[iAbove].X() + c4 * points[iAbove2].X()) * inverseSum); smoothedPoint.SetY((c1 * points[iBelow].Y() + c2 * points[inputIndex].Y() + c3 * points[iAbove].Y() + c4 * points[iAbove2].Y()) * inverseSum); - iBelow = inputIndex; - outputIndex += 2; } + // Set outputIndex to the end for the final point assignment + outputIndex = (vertexCount - 1) * 2; + outVertices[outputIndex] = points[vertexCount - 1]; outColors[outputIndex] = colors[vertexCount - 1]; diff --git a/vendor/SOIL2/CMakeLists.txt b/vendor/SOIL2/CMakeLists.txt index f26642eab..ef01c2f71 100644 --- a/vendor/SOIL2/CMakeLists.txt +++ b/vendor/SOIL2/CMakeLists.txt @@ -36,6 +36,11 @@ target_link_libraries(SOIL2 ${PROJECTM_OPENGL_LIBRARIES} ) +if(ENABLE_OPENMP AND OpenMP_C_FOUND) + target_link_libraries(SOIL2 PRIVATE OpenMP::OpenMP_C) + target_compile_definitions(SOIL2 PRIVATE PRJM_ENABLE_OPENMP) +endif() + if(USE_GLES) target_compile_definitions(SOIL2 PRIVATE diff --git a/vendor/SOIL2/src/SOIL2/stb_image_write.h b/vendor/SOIL2/src/SOIL2/stb_image_write.h index ab2de5c20..8a8ea82c6 100644 --- a/vendor/SOIL2/src/SOIL2/stb_image_write.h +++ b/vendor/SOIL2/src/SOIL2/stb_image_write.h @@ -203,6 +203,10 @@ STBIWDEF void stbi_flip_vertically_on_write(int flip_boolean); #ifdef STB_IMAGE_WRITE_IMPLEMENTATION +#ifdef PRJM_ENABLE_OPENMP +#include +#endif + #ifdef _WIN32 #ifndef _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_WARNINGS @@ -1147,6 +1151,54 @@ STBIWDEF unsigned char *stbi_write_png_to_mem(const unsigned char *pixels, int s } filt = (unsigned char *) STBIW_MALLOC((x*n+1) * y); if (!filt) return 0; +#ifdef PRJM_ENABLE_OPENMP + { + int success = 1; +#pragma omp parallel shared(success) + { + signed char *line_buffer = (signed char *) STBIW_MALLOC(x * n); + if (!line_buffer) { +#pragma omp atomic write + success = 0; + } else { +#pragma omp for + for (j=0; j < y; ++j) { + if (success) { + int filter_type; + if (force_filter > -1) { + filter_type = force_filter; + stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, force_filter, line_buffer); + } else { // Estimate the best filter by running through all of them: + int best_filter = 0, best_filter_val = 0x7fffffff, est, i; + for (filter_type = 0; filter_type < 5; filter_type++) { + stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, filter_type, line_buffer); + + // Estimate the entropy of the line using this filter; the less, the better. + est = 0; + for (i = 0; i < x*n; ++i) { + est += abs((signed char) line_buffer[i]); + } + if (est < best_filter_val) { + best_filter_val = est; + best_filter = filter_type; + } + } + if (filter_type != best_filter) { // If the last iteration already got us the best filter, don't redo it + stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, best_filter, line_buffer); + filter_type = best_filter; + } + } + // when we get here, filter_type contains the filter type, and line_buffer contains the data + filt[j*(x*n+1)] = (unsigned char) filter_type; + STBIW_MEMMOVE(filt+j*(x*n+1)+1, line_buffer, x*n); + } + } + STBIW_FREE(line_buffer); + } + } + if (!success) { STBIW_FREE(filt); return 0; } + } +#else line_buffer = (signed char *) STBIW_MALLOC(x * n); if (!line_buffer) { STBIW_FREE(filt); return 0; } for (j=0; j < y; ++j) { int filter_type; @@ -1178,6 +1230,7 @@ STBIWDEF unsigned char *stbi_write_png_to_mem(const unsigned char *pixels, int s STBIW_MEMMOVE(filt+j*(x*n+1)+1, line_buffer, x*n); } STBIW_FREE(line_buffer); +#endif zlib = stbi_zlib_compress(filt, y*( x*n+1), &zlen, stbi_write_png_compression_level); STBIW_FREE(filt); if (!zlib) return 0;