From 7ef9be0b3dea2b5518b50922f69e77790ce65940 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Guitaut?= Date: Tue, 25 Nov 2025 17:54:56 +0100 Subject: [PATCH 1/5] WIP: early prototyping --- lib/docker_manager/pitchfork_adapter.rb | 82 +++++++++++++++++++++ lib/docker_manager/unicorn_adapter.rb | 81 +++++++++++++++++++++ lib/docker_manager/upgrader.rb | 93 ++++++++---------------- lib/docker_manager/web_server_adapter.rb | 86 ++++++++++++++++++++++ 4 files changed, 281 insertions(+), 61 deletions(-) create mode 100644 lib/docker_manager/pitchfork_adapter.rb create mode 100644 lib/docker_manager/unicorn_adapter.rb create mode 100644 lib/docker_manager/web_server_adapter.rb diff --git a/lib/docker_manager/pitchfork_adapter.rb b/lib/docker_manager/pitchfork_adapter.rb new file mode 100644 index 0000000..0c35c22 --- /dev/null +++ b/lib/docker_manager/pitchfork_adapter.rb @@ -0,0 +1,82 @@ +# frozen_string_literal: true + +module DockerManager + # Adapter for managing Pitchfork web server processes + class PitchforkAdapter < WebServerAdapter + # @return [String] + def server_name + "Pitchfork" + end + + # Pitchfork doesn't use a separate launcher process in the same way as Unicorn + # The monitor process is the main process we interact with + # @return [Integer] + def launcher_pid + master_pid + end + + # @return [Integer] + def master_pid + `ps aux | grep "pitchfork monitor" | grep -v "grep" | awk '{print $2}'`.strip.to_i + end + + # @param master_pid [Integer] + # @return [Array] + def workers(master_pid) + # Workers are children of the monitor process in Pitchfork + `ps -f --ppid #{master_pid} | grep worker | awk '{ print $2 }'`.split("\n").map(&:to_i) + end + + # Reload Pitchfork by sending USR2 to monitor, which triggers reforking + # In Pitchfork, USR2 promotes a worker to a new mold and gradually replaces workers + # @param launcher_pid [Integer] + # @param original_master_pid [Integer] + # @param logger [#call] + def reload(launcher_pid, original_master_pid, logger) + logger.call("Triggering #{server_name} refork via monitor pid: #{launcher_pid}") + Process.kill("USR2", launcher_pid) + + # Pitchfork's reforking is gradual - workers are replaced one by one + # Wait for the process to complete by checking the generation of workers + logger.call("Waiting for #{server_name} to complete reforking") + + # Give it some time to start the refork process + sleep 5 + + # Wait for workers to be ready by checking the health endpoint + iterations = 0 + while `curl -s #{local_web_url}` != "ok" + iterations += 1 + break if iterations >= 60 + logger.call("Waiting for #{server_name} workers to be ready#{"." * iterations}") + sleep 2 + end + + # Additional wait to ensure all workers have been reforked + # This is a simplified approach - in production you might want to check worker generations + logger.call("Allowing time for #{server_name} to complete worker rollout") + sleep 10 + end + + # Pitchfork may not support pausing sidekiq via TSTP signal + # Sidekiq is managed through service workers in the config + # @param master_pid [Integer] + # @return [Boolean] + def pause_sidekiq(master_pid) + # Pitchfork doesn't support TSTP/CONT for sidekiq control + # Sidekiq management is handled differently through service workers + false + end + + # @param master_pid [Integer] + # @return [Boolean] + def resume_sidekiq(master_pid) + false + end + + # @return [Integer] + def min_workers + 1 + end + end +end diff --git a/lib/docker_manager/unicorn_adapter.rb b/lib/docker_manager/unicorn_adapter.rb new file mode 100644 index 0000000..7b17fc0 --- /dev/null +++ b/lib/docker_manager/unicorn_adapter.rb @@ -0,0 +1,81 @@ +# frozen_string_literal: true + +module DockerManager + # Adapter for managing Unicorn web server processes + class UnicornAdapter < WebServerAdapter + # @return [String] + def server_name + "Unicorn" + end + + # @return [Integer] + def launcher_pid + `ps aux | grep unicorn_launcher | grep -v sudo | grep -v grep | awk '{ print $2 }'` + .strip + .to_i + end + + # @return [Integer] + def master_pid + `ps aux | grep "unicorn master -E" | grep -v "grep" | awk '{print $2}'`.strip.to_i + end + + # @param master_pid [Integer] + # @return [Array] + def workers(master_pid) + `ps -f --ppid #{master_pid} | grep worker | awk '{ print $2 }'`.split("\n").map(&:to_i) + end + + # Reload Unicorn by sending USR2 to launcher, which spawns a new master + # @param launcher_pid [Integer] + # @param original_master_pid [Integer] + # @param logger [#call] + def reload(launcher_pid, original_master_pid, logger) + logger.call("Restarting #{server_name} pid: #{launcher_pid}") + Process.kill("USR2", launcher_pid) + + # Wait for the original master to exit (it will spawn a new one) + iterations = 0 + while pid_exists?(original_master_pid) + iterations += 1 + break if iterations >= 60 + logger.call("Waiting for #{server_name} to reload#{"." * iterations}") + sleep 2 + end + + # Wait for new workers to be ready + iterations = 0 + while `curl -s #{local_web_url}` != "ok" + iterations += 1 + break if iterations >= 60 + logger.call("Waiting for #{server_name} workers to start up#{"." * iterations}") + sleep 2 + end + end + + # Pause Sidekiq by sending TSTP signal to Unicorn master + # @param master_pid [Integer] + # @return [Boolean] + def pause_sidekiq(master_pid) + return false if ENV["UNICORN_SIDEKIQS"].to_i <= 0 + + Process.kill("TSTP", master_pid) + sleep 1 + # Older versions do not have support, so quickly send a CONT so master process is not hung + Process.kill("CONT", master_pid) + true + end + + # Unicorn handles resume automatically with CONT signal sent after TSTP + # @param master_pid [Integer] + # @return [Boolean] + def resume_sidekiq(master_pid) + false + end + + # @return [Integer] + def min_workers + 2 + end + end +end diff --git a/lib/docker_manager/upgrader.rb b/lib/docker_manager/upgrader.rb index f57040b..60faace 100644 --- a/lib/docker_manager/upgrader.rb +++ b/lib/docker_manager/upgrader.rb @@ -1,11 +1,16 @@ # frozen_string_literal: true +require_relative "web_server_adapter" +require_relative "unicorn_adapter" +require_relative "pitchfork_adapter" + class DockerManager::Upgrader def initialize(user_id, repos, from_version) @user_id = user_id @user = User.find(user_id) @repos = repos.is_a?(Array) ? repos : [repos] @from_version = from_version + @web_server = select_web_server_adapter end def reset! @@ -16,7 +21,7 @@ def reset! end def min_workers - 1 + @web_server.min_workers end def upgrade @@ -31,40 +36,40 @@ def upgrade log("*** Please be patient, next steps might take a while ***") log("********************************************************") - launcher_pid = unicorn_launcher_pid - master_pid = unicorn_master_pid - workers = unicorn_workers(master_pid).size + launcher_pid = @web_server.launcher_pid + master_pid = @web_server.master_pid + workers = @web_server.workers(master_pid).size - if workers < 2 - log("ABORTING, you do not have enough unicorn workers running") + if workers < @web_server.min_workers + log("ABORTING, you do not have enough #{@web_server.server_name} workers running") raise "Not enough workers" end if launcher_pid <= 0 || master_pid <= 0 - log("ABORTING, missing unicorn launcher or unicorn master") - raise "No unicorn master or launcher" + log("ABORTING, missing #{@web_server.server_name} launcher or master/monitor") + raise "No #{@web_server.server_name} master or launcher" end percent(5) - log("Cycling Unicorn, to free up memory") - reload_unicorn(launcher_pid) + log("Cycling #{@web_server.server_name}, to free up memory") + @web_server.reload(launcher_pid, master_pid, method(:log)) percent(10) reloaded = false num_workers_spun_down = workers - min_workers if num_workers_spun_down.positive? - log "Stopping #{workers - min_workers} Unicorn worker(s), to free up memory" - num_workers_spun_down.times { Process.kill("TTOU", unicorn_master_pid) } + log "Stopping #{num_workers_spun_down} #{@web_server.server_name} worker(s), to free up memory" + @web_server.scale_down_workers(master_pid, num_workers_spun_down) end if ENV["UNICORN_SIDEKIQS"].to_i > 0 - log "Stopping job queue to reclaim memory, master pid is #{master_pid}" - Process.kill("TSTP", unicorn_master_pid) - sleep 1 - # older versions do not have support, so quickly send a cont so master process is not hung - Process.kill("CONT", unicorn_master_pid) + if @web_server.pause_sidekiq(master_pid) + log "Stopping job queue to reclaim memory, master pid is #{master_pid}" + else + log "Note: #{@web_server.server_name} does not support pausing sidekiq via signals" + end end # HEAD@{upstream} is just a fancy way how to say origin/main (in normal case) @@ -117,7 +122,7 @@ def upgrade run("bundle exec rake s3:upload_assets") if using_s3_assets percent(80) - reload_unicorn(launcher_pid) + @web_server.reload(launcher_pid, master_pid, method(:log)) reloaded = true # Flush nginx cache here - this is not critical, and the rake task may not exist yet - ignore failures here. @@ -147,8 +152,8 @@ def upgrade end if num_workers_spun_down.to_i.positive? && !reloaded - log "Spinning up #{num_workers_spun_down} Unicorn worker(s) that were stopped initially" - num_workers_spun_down.times { Process.kill("TTIN", unicorn_master_pid) } + log "Spinning up #{num_workers_spun_down} #{@web_server.server_name} worker(s) that were stopped initially" + @web_server.scale_up_workers(master_pid, num_workers_spun_down) end raise ex @@ -269,47 +274,13 @@ def log_version_upgrade private - def pid_exists?(pid) - Process.getpgid(pid) - rescue Errno::ESRCH - false - end - - def unicorn_launcher_pid - `ps aux | grep unicorn_launcher | grep -v sudo | grep -v grep | awk '{ print $2 }'`.strip.to_i - end - - def unicorn_master_pid - `ps aux | grep "unicorn master -E" | grep -v "grep" | awk '{print $2}'`.strip.to_i - end - - def unicorn_workers(master_pid) - `ps -f --ppid #{master_pid} | grep worker | awk '{ print $2 }'`.split("\n").map(&:to_i) - end - - def local_web_url - "http://127.0.0.1:#{ENV["UNICORN_PORT"] || 3000}/srv/status" - end - - def reload_unicorn(launcher_pid) - log("Restarting unicorn pid: #{launcher_pid}") - original_master_pid = unicorn_master_pid - Process.kill("USR2", launcher_pid) - - iterations = 0 - while pid_exists?(original_master_pid) - iterations += 1 - break if iterations >= 60 - log("Waiting for Unicorn to reload#{"." * iterations}") - sleep 2 - end - - iterations = 0 - while `curl -s #{local_web_url}` != "ok" - iterations += 1 - break if iterations >= 60 - log("Waiting for Unicorn workers to start up#{"." * iterations}") - sleep 2 + # Select the appropriate web server adapter based on what's defined + # @return [WebServerAdapter] + def select_web_server_adapter + if defined?(Unicorn) + DockerManager::UnicornAdapter.new + else + DockerManager::PitchforkAdapter.new end end end diff --git a/lib/docker_manager/web_server_adapter.rb b/lib/docker_manager/web_server_adapter.rb new file mode 100644 index 0000000..39ea99c --- /dev/null +++ b/lib/docker_manager/web_server_adapter.rb @@ -0,0 +1,86 @@ +# frozen_string_literal: true + +module DockerManager + # Base adapter class for web server process management + # Provides an interface for interacting with Unicorn or Pitchfork servers + class WebServerAdapter + # @return [String] the name of the web server + def server_name + raise NotImplementedError + end + + # @return [Integer] the PID of the launcher/supervisor process (0 if not found) + def launcher_pid + raise NotImplementedError + end + + # @return [Integer] the PID of the master/monitor process (0 if not found) + def master_pid + raise NotImplementedError + end + + # @param master_pid [Integer] the master/monitor process PID + # @return [Array] array of worker PIDs + def workers(master_pid) + raise NotImplementedError + end + + # @return [String] the local web server URL for health checks + def local_web_url + "http://127.0.0.1:#{ENV["UNICORN_PORT"] || 3000}/srv/status" + end + + # Reload the web server + # @param launcher_pid [Integer] the launcher/supervisor PID to signal + # @param master_pid [Integer] the current master/monitor PID + # @param logger [#call] a proc/lambda for logging messages + def reload(launcher_pid, master_pid, logger) + raise NotImplementedError + end + + # Scale down workers by sending TTOU signals + # @param master_pid [Integer] the master/monitor PID + # @param count [Integer] number of workers to scale down + def scale_down_workers(master_pid, count) + count.times { Process.kill("TTOU", master_pid) } + end + + # Scale up workers by sending TTIN signals + # @param master_pid [Integer] the master/monitor PID + # @param count [Integer] number of workers to scale up + def scale_up_workers(master_pid, count) + count.times { Process.kill("TTIN", master_pid) } + end + + # Pause sidekiq workers if supported + # @param master_pid [Integer] the master/monitor PID + # @return [Boolean] true if sidekiq was paused, false if not supported + def pause_sidekiq(master_pid) + false + end + + # Resume sidekiq workers if supported + # @param master_pid [Integer] the master/monitor PID + # @return [Boolean] true if sidekiq was resumed, false if not supported + def resume_sidekiq(master_pid) + false + end + + # Minimum number of workers required for safe upgrade + # @return [Integer] + def min_workers + 1 + end + + protected + + # Check if a PID exists + # @param pid [Integer] + # @return [Boolean] + def pid_exists?(pid) + Process.getpgid(pid) + rescue Errno::ESRCH + false + end + end +end From e0f12d72d2bc38332f4cb2bb2a884dc5c7eb182e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Guitaut?= Date: Wed, 26 Nov 2025 18:12:47 +0100 Subject: [PATCH 2/5] WIP: heavy WIP still. Doing tests locally --- lib/docker_manager/unicorn_adapter.rb | 18 ++--- lib/docker_manager/upgrader.rb | 94 ++++++++++++------------ lib/docker_manager/web_server_adapter.rb | 44 +---------- 3 files changed, 57 insertions(+), 99 deletions(-) diff --git a/lib/docker_manager/unicorn_adapter.rb b/lib/docker_manager/unicorn_adapter.rb index 7b17fc0..a65874d 100644 --- a/lib/docker_manager/unicorn_adapter.rb +++ b/lib/docker_manager/unicorn_adapter.rb @@ -10,9 +10,7 @@ def server_name # @return [Integer] def launcher_pid - `ps aux | grep unicorn_launcher | grep -v sudo | grep -v grep | awk '{ print $2 }'` - .strip - .to_i + `ps aux | grep unicorn_launcher | grep -v sudo | grep -v grep | awk '{ print $2 }'`.strip.to_i end # @return [Integer] @@ -22,7 +20,7 @@ def master_pid # @param master_pid [Integer] # @return [Array] - def workers(master_pid) + def workers `ps -f --ppid #{master_pid} | grep worker | awk '{ print $2 }'`.split("\n").map(&:to_i) end @@ -30,8 +28,9 @@ def workers(master_pid) # @param launcher_pid [Integer] # @param original_master_pid [Integer] # @param logger [#call] - def reload(launcher_pid, original_master_pid, logger) + def reload(launcher_pid, logger) logger.call("Restarting #{server_name} pid: #{launcher_pid}") + original_master_pid = master_pid Process.kill("USR2", launcher_pid) # Wait for the original master to exit (it will spawn a new one) @@ -56,7 +55,7 @@ def reload(launcher_pid, original_master_pid, logger) # Pause Sidekiq by sending TSTP signal to Unicorn master # @param master_pid [Integer] # @return [Boolean] - def pause_sidekiq(master_pid) + def pause_sidekiq return false if ENV["UNICORN_SIDEKIQS"].to_i <= 0 Process.kill("TSTP", master_pid) @@ -66,13 +65,6 @@ def pause_sidekiq(master_pid) true end - # Unicorn handles resume automatically with CONT signal sent after TSTP - # @param master_pid [Integer] - # @return [Boolean] - def resume_sidekiq(master_pid) - false - end - # @return [Integer] def min_workers 2 diff --git a/lib/docker_manager/upgrader.rb b/lib/docker_manager/upgrader.rb index 60faace..2510720 100644 --- a/lib/docker_manager/upgrader.rb +++ b/lib/docker_manager/upgrader.rb @@ -38,7 +38,7 @@ def upgrade launcher_pid = @web_server.launcher_pid master_pid = @web_server.master_pid - workers = @web_server.workers(master_pid).size + workers = @web_server.workers.size if workers < @web_server.min_workers log("ABORTING, you do not have enough #{@web_server.server_name} workers running") @@ -53,7 +53,7 @@ def upgrade percent(5) log("Cycling #{@web_server.server_name}, to free up memory") - @web_server.reload(launcher_pid, master_pid, method(:log)) + @web_server.reload(launcher_pid, method(:log)) percent(10) reloaded = false @@ -61,11 +61,11 @@ def upgrade if num_workers_spun_down.positive? log "Stopping #{num_workers_spun_down} #{@web_server.server_name} worker(s), to free up memory" - @web_server.scale_down_workers(master_pid, num_workers_spun_down) + @web_server.scale_down_workers(num_workers_spun_down) end if ENV["UNICORN_SIDEKIQS"].to_i > 0 - if @web_server.pause_sidekiq(master_pid) + if @web_server.pause_sidekiq log "Stopping job queue to reclaim memory, master pid is #{master_pid}" else log "Note: #{@web_server.server_name} does not support pausing sidekiq via signals" @@ -76,45 +76,49 @@ def upgrade # see http://stackoverflow.com/a/12699604/84283 @repos.each_with_index do |repo, index| # We automatically handle renames from `master` -> `main` - if repo.upstream_branch == "origin/master" && repo.tracking_ref == "origin/main" - log "Branch has changed to #{repo.tracking_ref}" - - # Just in case `main` exists locally but is not used. Perhaps it was fetched? - if repo.has_local_main? - run "cd #{repo.path} && git checkout main" - else - run "cd #{repo.path} && git branch -m master main" - end - - run "cd #{repo.path} && git fetch origin --tags --force" - run "cd #{repo.path} && git branch -u origin/main main" - run("cd #{repo.path} && git reset --hard HEAD@{upstream}") - else - run("cd #{repo.path} && git fetch --tags --prune-tags --prune --force") - - if repo.detached_head? - run("cd #{repo.path} && git reset --hard") - run("cd #{repo.path} && git -c advice.detachedHead=false checkout #{repo.tracking_ref}") - else - run("cd #{repo.path} && git reset --hard HEAD@{upstream}") - end - end + log("Doing git stuff…") + sleep 5 + # if repo.upstream_branch == "origin/master" && repo.tracking_ref == "origin/main" + # log "Branch has changed to #{repo.tracking_ref}" + # + # # Just in case `main` exists locally but is not used. Perhaps it was fetched? + # if repo.has_local_main? + # run "cd #{repo.path} && git checkout main" + # else + # run "cd #{repo.path} && git branch -m master main" + # end + # + # run "cd #{repo.path} && git fetch origin --tags --force" + # run "cd #{repo.path} && git branch -u origin/main main" + # run("cd #{repo.path} && git reset --hard HEAD@{upstream}") + # else + # run("cd #{repo.path} && git fetch --tags --prune-tags --prune --force") + # + # if repo.detached_head? + # run("cd #{repo.path} && git reset --hard") + # run("cd #{repo.path} && git -c advice.detachedHead=false checkout #{repo.tracking_ref}") + # else + # run("cd #{repo.path} && git reset --hard HEAD@{upstream}") + # end + # end percent(20 * (index + 1) / @repos.size) end - run("bundle install --retry 3 --jobs 4") - run("if [ -f yarn.lock ]; then yarn install; else CI=1 pnpm install; fi") - begin - run("LOAD_PLUGINS=0 bundle exec rake plugin:pull_compatible_all") - rescue RuntimeError - log "Unable checkout compatible plugin versions" - end + log("Installing stuff…") + sleep 5 + # run("bundle install --retry 3 --jobs 4") + # run("if [ -f yarn.lock ]; then yarn install; else CI=1 pnpm install; fi") + # begin + # run("LOAD_PLUGINS=0 bundle exec rake plugin:pull_compatible_all") + # rescue RuntimeError + # log "Unable checkout compatible plugin versions" + # end percent(30) - run("SKIP_POST_DEPLOYMENT_MIGRATIONS=1 bundle exec rake multisite:migrate") + # run("SKIP_POST_DEPLOYMENT_MIGRATIONS=1 bundle exec rake multisite:migrate") percent(40) log("*** Bundling assets. This will take a while *** ") - run("bundle exec rake themes:update assets:precompile") + # run("bundle exec rake themes:update assets:precompile") using_s3_assets = ENV["DISCOURSE_USE_S3"] && ENV["DISCOURSE_S3_BUCKET"] && ENV["DISCOURSE_S3_CDN_URL"] @@ -122,16 +126,16 @@ def upgrade run("bundle exec rake s3:upload_assets") if using_s3_assets percent(80) - @web_server.reload(launcher_pid, master_pid, method(:log)) + @web_server.reload(launcher_pid, method(:log)) reloaded = true # Flush nginx cache here - this is not critical, and the rake task may not exist yet - ignore failures here. percent(85) - begin - run("bundle exec rake assets:flush_sw") - rescue RuntimeError - log "WARNING: Unable to flush service worker file" - end + # begin + # run("bundle exec rake assets:flush_sw") + # rescue RuntimeError + # log "WARNING: Unable to flush service worker file" + # end percent(90) log("Running post deploy migrations") @@ -153,7 +157,7 @@ def upgrade if num_workers_spun_down.to_i.positive? && !reloaded log "Spinning up #{num_workers_spun_down} #{@web_server.server_name} worker(s) that were stopped initially" - @web_server.scale_up_workers(master_pid, num_workers_spun_down) + @web_server.scale_up_workers(num_workers_spun_down) end raise ex @@ -274,10 +278,8 @@ def log_version_upgrade private - # Select the appropriate web server adapter based on what's defined - # @return [WebServerAdapter] def select_web_server_adapter - if defined?(Unicorn) + if `pgrep unicorn`.present? DockerManager::UnicornAdapter.new else DockerManager::PitchforkAdapter.new diff --git a/lib/docker_manager/web_server_adapter.rb b/lib/docker_manager/web_server_adapter.rb index 39ea99c..eb53a6d 100644 --- a/lib/docker_manager/web_server_adapter.rb +++ b/lib/docker_manager/web_server_adapter.rb @@ -4,65 +4,29 @@ module DockerManager # Base adapter class for web server process management # Provides an interface for interacting with Unicorn or Pitchfork servers class WebServerAdapter - # @return [String] the name of the web server - def server_name - raise NotImplementedError - end - - # @return [Integer] the PID of the launcher/supervisor process (0 if not found) - def launcher_pid - raise NotImplementedError - end - - # @return [Integer] the PID of the master/monitor process (0 if not found) - def master_pid - raise NotImplementedError - end - - # @param master_pid [Integer] the master/monitor process PID - # @return [Array] array of worker PIDs - def workers(master_pid) - raise NotImplementedError - end - # @return [String] the local web server URL for health checks def local_web_url "http://127.0.0.1:#{ENV["UNICORN_PORT"] || 3000}/srv/status" end - # Reload the web server - # @param launcher_pid [Integer] the launcher/supervisor PID to signal - # @param master_pid [Integer] the current master/monitor PID - # @param logger [#call] a proc/lambda for logging messages - def reload(launcher_pid, master_pid, logger) - raise NotImplementedError - end - # Scale down workers by sending TTOU signals # @param master_pid [Integer] the master/monitor PID # @param count [Integer] number of workers to scale down - def scale_down_workers(master_pid, count) + def scale_down_workers(count) count.times { Process.kill("TTOU", master_pid) } end # Scale up workers by sending TTIN signals # @param master_pid [Integer] the master/monitor PID # @param count [Integer] number of workers to scale up - def scale_up_workers(master_pid, count) + def scale_up_workers(count) count.times { Process.kill("TTIN", master_pid) } end # Pause sidekiq workers if supported # @param master_pid [Integer] the master/monitor PID # @return [Boolean] true if sidekiq was paused, false if not supported - def pause_sidekiq(master_pid) - false - end - - # Resume sidekiq workers if supported - # @param master_pid [Integer] the master/monitor PID - # @return [Boolean] true if sidekiq was resumed, false if not supported - def resume_sidekiq(master_pid) + def pause_sidekiq false end @@ -72,7 +36,7 @@ def min_workers 1 end - protected + private # Check if a PID exists # @param pid [Integer] From 926faca352033b69a6fbfaf373e183775663a86a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Guitaut?= Date: Thu, 4 Dec 2025 17:50:06 +0100 Subject: [PATCH 3/5] WIP: refactor --- lib/docker_manager/pitchfork_adapter.rb | 59 +++++++------------------ lib/docker_manager/unicorn_adapter.rb | 16 +------ lib/docker_manager/upgrader.rb | 6 ++- 3 files changed, 21 insertions(+), 60 deletions(-) diff --git a/lib/docker_manager/pitchfork_adapter.rb b/lib/docker_manager/pitchfork_adapter.rb index 0c35c22..82d55ac 100644 --- a/lib/docker_manager/pitchfork_adapter.rb +++ b/lib/docker_manager/pitchfork_adapter.rb @@ -1,49 +1,38 @@ # frozen_string_literal: true module DockerManager - # Adapter for managing Pitchfork web server processes class PitchforkAdapter < WebServerAdapter - # @return [String] def server_name "Pitchfork" end - # Pitchfork doesn't use a separate launcher process in the same way as Unicorn - # The monitor process is the main process we interact with - # @return [Integer] def launcher_pid - master_pid + `pgrep -f unicorn_launcher`.strip.to_i end - # @return [Integer] def master_pid `ps aux | grep "pitchfork monitor" | grep -v "grep" | awk '{print $2}'`.strip.to_i end - # @param master_pid [Integer] - # @return [Array] - def workers(master_pid) - # Workers are children of the monitor process in Pitchfork + def workers `ps -f --ppid #{master_pid} | grep worker | awk '{ print $2 }'`.split("\n").map(&:to_i) end - # Reload Pitchfork by sending USR2 to monitor, which triggers reforking - # In Pitchfork, USR2 promotes a worker to a new mold and gradually replaces workers - # @param launcher_pid [Integer] - # @param original_master_pid [Integer] - # @param logger [#call] - def reload(launcher_pid, original_master_pid, logger) - logger.call("Triggering #{server_name} refork via monitor pid: #{launcher_pid}") + def reload(launcher_pid, logger) + logger.call("Restarting #{server_name} pid: #{launcher_pid}") + original_master_pid = master_pid Process.kill("USR2", launcher_pid) - # Pitchfork's reforking is gradual - workers are replaced one by one - # Wait for the process to complete by checking the generation of workers - logger.call("Waiting for #{server_name} to complete reforking") - - # Give it some time to start the refork process - sleep 5 + # Wait for the original master to exit (it will spawn a new one) + iterations = 0 + while pid_exists?(original_master_pid) + iterations += 1 + break if iterations >= 60 + logger.call("Waiting for #{server_name} to reload#{"." * iterations}") + sleep 2 + end - # Wait for workers to be ready by checking the health endpoint + # Wait for workers to be ready iterations = 0 while `curl -s #{local_web_url}` != "ok" iterations += 1 @@ -51,30 +40,12 @@ def reload(launcher_pid, original_master_pid, logger) logger.call("Waiting for #{server_name} workers to be ready#{"." * iterations}") sleep 2 end - - # Additional wait to ensure all workers have been reforked - # This is a simplified approach - in production you might want to check worker generations - logger.call("Allowing time for #{server_name} to complete worker rollout") - sleep 10 - end - - # Pitchfork may not support pausing sidekiq via TSTP signal - # Sidekiq is managed through service workers in the config - # @param master_pid [Integer] - # @return [Boolean] - def pause_sidekiq(master_pid) - # Pitchfork doesn't support TSTP/CONT for sidekiq control - # Sidekiq management is handled differently through service workers - false end - # @param master_pid [Integer] - # @return [Boolean] - def resume_sidekiq(master_pid) + def pause_sidekiq false end - # @return [Integer] def min_workers 1 end diff --git a/lib/docker_manager/unicorn_adapter.rb b/lib/docker_manager/unicorn_adapter.rb index a65874d..1104cb4 100644 --- a/lib/docker_manager/unicorn_adapter.rb +++ b/lib/docker_manager/unicorn_adapter.rb @@ -1,33 +1,23 @@ # frozen_string_literal: true module DockerManager - # Adapter for managing Unicorn web server processes class UnicornAdapter < WebServerAdapter - # @return [String] def server_name "Unicorn" end - # @return [Integer] def launcher_pid - `ps aux | grep unicorn_launcher | grep -v sudo | grep -v grep | awk '{ print $2 }'`.strip.to_i + `pgrep -f unicorn_launcher`.strip.to_i end - # @return [Integer] def master_pid `ps aux | grep "unicorn master -E" | grep -v "grep" | awk '{print $2}'`.strip.to_i end - # @param master_pid [Integer] - # @return [Array] def workers `ps -f --ppid #{master_pid} | grep worker | awk '{ print $2 }'`.split("\n").map(&:to_i) end - # Reload Unicorn by sending USR2 to launcher, which spawns a new master - # @param launcher_pid [Integer] - # @param original_master_pid [Integer] - # @param logger [#call] def reload(launcher_pid, logger) logger.call("Restarting #{server_name} pid: #{launcher_pid}") original_master_pid = master_pid @@ -52,9 +42,6 @@ def reload(launcher_pid, logger) end end - # Pause Sidekiq by sending TSTP signal to Unicorn master - # @param master_pid [Integer] - # @return [Boolean] def pause_sidekiq return false if ENV["UNICORN_SIDEKIQS"].to_i <= 0 @@ -65,7 +52,6 @@ def pause_sidekiq true end - # @return [Integer] def min_workers 2 end diff --git a/lib/docker_manager/upgrader.rb b/lib/docker_manager/upgrader.rb index 2510720..95419fc 100644 --- a/lib/docker_manager/upgrader.rb +++ b/lib/docker_manager/upgrader.rb @@ -119,6 +119,10 @@ def upgrade percent(40) log("*** Bundling assets. This will take a while *** ") # run("bundle exec rake themes:update assets:precompile") + 30.times do + log(".") + sleep 1 + end using_s3_assets = ENV["DISCOURSE_USE_S3"] && ENV["DISCOURSE_S3_BUCKET"] && ENV["DISCOURSE_S3_CDN_URL"] @@ -279,7 +283,7 @@ def log_version_upgrade private def select_web_server_adapter - if `pgrep unicorn`.present? + if `pgrep -f '^unicorn[^_]'`.present? DockerManager::UnicornAdapter.new else DockerManager::PitchforkAdapter.new From 58246f6d9b879c7f4d9f351633288a46c5d50064 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Guitaut?= Date: Thu, 4 Dec 2025 17:55:37 +0100 Subject: [PATCH 4/5] WIP: use pgrep --- lib/docker_manager/pitchfork_adapter.rb | 4 ++-- lib/docker_manager/unicorn_adapter.rb | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/docker_manager/pitchfork_adapter.rb b/lib/docker_manager/pitchfork_adapter.rb index 82d55ac..3a01b82 100644 --- a/lib/docker_manager/pitchfork_adapter.rb +++ b/lib/docker_manager/pitchfork_adapter.rb @@ -11,11 +11,11 @@ def launcher_pid end def master_pid - `ps aux | grep "pitchfork monitor" | grep -v "grep" | awk '{print $2}'`.strip.to_i + `pgrep -f "pitchfork monitor"`.strip.to_i end def workers - `ps -f --ppid #{master_pid} | grep worker | awk '{ print $2 }'`.split("\n").map(&:to_i) + `pgrep -f -P #{master_pid} worker`.split("\n").map(&:to_i) end def reload(launcher_pid, logger) diff --git a/lib/docker_manager/unicorn_adapter.rb b/lib/docker_manager/unicorn_adapter.rb index 1104cb4..0b3ad48 100644 --- a/lib/docker_manager/unicorn_adapter.rb +++ b/lib/docker_manager/unicorn_adapter.rb @@ -11,11 +11,11 @@ def launcher_pid end def master_pid - `ps aux | grep "unicorn master -E" | grep -v "grep" | awk '{print $2}'`.strip.to_i + `pgrep -f "unicorn master -E"`.strip.to_i end def workers - `ps -f --ppid #{master_pid} | grep worker | awk '{ print $2 }'`.split("\n").map(&:to_i) + `pgrep -f -P #{master_pid} worker`.split("\n").map(&:to_i) end def reload(launcher_pid, logger) From 38dde2962e63c551b1ef4c071b478c6eb5ad8398 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Guitaut?= Date: Thu, 11 Dec 2025 17:09:43 +0100 Subject: [PATCH 5/5] WIP: put info into redis that server is restarting --- lib/docker_manager/upgrader.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/docker_manager/upgrader.rb b/lib/docker_manager/upgrader.rb index 95419fc..64df665 100644 --- a/lib/docker_manager/upgrader.rb +++ b/lib/docker_manager/upgrader.rb @@ -53,6 +53,7 @@ def upgrade percent(5) log("Cycling #{@web_server.server_name}, to free up memory") + Discourse.redis.setex("docker_manager:upgrade:server_restarting", 2.minutes.to_i, 1) @web_server.reload(launcher_pid, method(:log)) percent(10) @@ -167,6 +168,7 @@ def upgrade raise ex ensure @repos.each(&:stop_upgrading) + Discourse.redis.del("docker_manager:upgrade:server_restarting") end def publish(type, value)