From 372b9a04e35e2f97cd2c1f1b99fc83e1a53d5d01 Mon Sep 17 00:00:00 2001 From: Terry Brady Date: Mon, 21 Jul 2025 15:43:25 -0700 Subject: [PATCH 1/2] simplify nodes --- src-colladmin/actions/zookeeper_action.rb | 197 +--------------------- 1 file changed, 3 insertions(+), 194 deletions(-) diff --git a/src-colladmin/actions/zookeeper_action.rb b/src-colladmin/actions/zookeeper_action.rb index 86bea37..25dc4ba 100644 --- a/src-colladmin/actions/zookeeper_action.rb +++ b/src-colladmin/actions/zookeeper_action.rb @@ -84,6 +84,7 @@ def perform_action # Collection Admin Task class - see config/actions.yml for description class ZookeeperDumpAction < ZookeeperAction def initialize(config, action, path, myparams) + @node_dump = MerrittZK::NodeDump.new(@zk, myparams) @zkpath = myparams.fetch('zkpath', '/') @mode = myparams.fetch('mode', 'data') @full = false @@ -93,193 +94,7 @@ def initialize(config, action, path, myparams) end def data - @buf = StringIO.new - @buf << "Node State as of #{Time.now}:\n" - dump_node(@zkpath) - @buf.rewind - @buf.read - end - - def standard_node(n) - n =~ %r{^/(access|batch-uuids|batches|jobs|locks|migration)(/|$)} - end - - def system_node(n) - n =~ %r{^/zookeeper(/|$)} - end - - def show_data(n) - d = get_data(n) - df = d.is_a?(Hash) ? "\n#{JSON.pretty_generate(d)}" : " #{d}" - df = df.encode('UTF-8', invalid: :replace, undef: :replace, replace: '?') - @buf << df unless @buf.nil? - rescue StandardError => e - @buf << e - end - - def get_data(n) - d = @zk.get(n)[0] - return '' if d.nil? - - begin - JSON.parse(d.encode('UTF-8', invalid: :replace, undef: :replace, replace: '?'), symbolize_names: true) - rescue JSON::ParserError - d - rescue StandardError => e - "\n #{e.class}:#{e}:\n #{d}" - end - end - - def node_datetime(n) - return 'na' unless @zk.exists?(n) - - ctime = @zk.stat(n).ctime - ctime.nil? ? 'na' : Time.at(ctime / 1000).strftime('%Y-%m-%d %H:%M:%S') - end - - def node_stat(n) - return 'FAIL' unless @zk.exists?(n) - - ctime = @zk.stat(n).ctime - return 'FAIL' if ctime.nil? - - Time.now - Time.at(ctime / 1000) > 3600 ? 'FAIL' : 'WARN' - end - - def test_node(path, deleteable, n) - return if @zk.exists?(n) - - result = { path: path, test: "Test: #{n} should exist", status: node_stat(path) } - @test_results.append([ - result[:path], node_datetime(path), deleteable ? result[:path] : '', result[:test], - result[:status] -]) - @buf << "\n #{result[:test]}: #{result[:status]}" unless @buf.nil? - end - - def test_has_children(path, deleteable, n) - return if @zk.exists?(n) && !@zk.children(n).empty? - - result = { path: path, test: "Test: #{n} should have children", status: node_stat(path) } - @test_results.append([ - result[:path], node_datetime(path), deleteable ? result[:path] : '', result[:test], - result[:status] -]) - @buf << "\n #{result[:test]}: #{result[:status]}" unless @buf.nil? - end - - def test_not_node(path, deleteable, n) - return unless @zk.exists?(n) - - result = { path: path, test: "Test: #{n} should NOT exist", status: node_stat(path) } - @test_results.append([ - result[:path], node_datetime(path), deleteable ? result[:path] : '', result[:test], - result[:status] -]) - @buf << "\n #{result[:test]}: #{result[:status]}" unless @buf.nil? - end - - def show_test(n) - rx1 = %r{^/batches/bid[0-9]+/states/batch-.*/(jid[0-9]+)$} - rx2 = %r{^/jobs/(jid[0-9]+)/bid$} - rx3 = %r{^/jobs/(jid[0-9]+)$} - rx4 = %r{^/jobs/states/[^/]*/[0-9][0-9]-(jid[0-9]+)$} - rx5 = %r{^/batches/bid[0-9]+/states$} - - case n - when %r{^/batch-uuids/(.*)} - d = get_data(n) - test_node(n, true, "/batches/#{d}") - when %r{^/batches/bid[0-9]+/submission} - d = get_data(n).fetch(:batchID, 'na') - test_node(n, false, "/batch-uuids/#{d}") - when rx1 - jid = rx1.match(n)[1] - test_node(n, true, "/jobs/#{jid}") - when rx2 - jid = rx2.match(n)[1] - bid = get_data(n) - test_node(n, false, "/batches/#{bid}") - snode = "/jobs/#{jid}/status" - test_node(n, true, snode) - if @zk.exists?(snode) - d = get_data(snode) - return if d.nil? - - status = d.fetch(:status, 'na').downcase - case status - when 'deleted' - bstatus = 'batch-deleted' - when 'completed' - bstatus = 'batch-completed' - when 'failed' - bstatus = 'batch-failed' - else - bstatus = 'batch-processing' - end - test_node(n, false, "/batches/#{bid}/states/#{bstatus}/#{jid}") - %w[batch-deleted batch-completed batch-failed batch-processing].each do |ts| - next if ts == bstatus - - test_not_node(n, false, "/batches/#{bid}/states/#{ts}/#{jid}") - end - end - when rx3 - jid = rx3.match(n)[1] - snode = "/jobs/#{jid}/status" - test_node(n, true, snode) - if @zk.exists?(snode) - d = get_data(snode) - status = d.fetch(:status, 'na').downcase - priority = get_data("#{n}/priority") - test_node(n, false, "/jobs/states/#{status}/#{format('%02d', priority)}-#{jid}") - end - when rx4 - jid = rx4.match(n)[1] - test_node(n, true, "/jobs/#{jid}") - @job_states_count[jid] = [] unless @job_states_count.key?(jid) - @job_states_count[jid].append(n) - when rx5 - test_has_children(n, false, n) - end - end - - def report_node(n) - @buf << "#{n}:" unless @buf.nil? - if standard_node(n) - show_data(n) if @mode == 'data' - show_test(n) if @mode == 'test' - else - @buf << " Unsupported\n" unless @buf.nil? - end - @buf << "\n" unless @buf.nil? - end - - def check_full - return false if @buf.nil? - return true if @full - - # Lambda payload limit. May need to save output to S3. - if @buf.size > 250_000 - @buf << '... (truncated at 256K)' - @full = true - end - @full - end - - def dump_node(n = '/') - return if check_full - return unless @zk.exists?(n) - return if system_node(n) - - report_node(n) - arr = @zk.children(n) - return if arr.empty? - - arr.sort.each do |cp| - p = "#{n}/#{cp}".gsub(%r{/+}, '/') - dump_node(p) - end + @node_dump.listing end end @@ -299,13 +114,7 @@ def table_types end def table_rows(_body) - dump_node(@zkpath) - @job_states_count.each_value do |states| - next unless states.length > 1 - - @test_results.append([states.to_s, '', '', 'Duplicate JID', 'FAIL']) - end - @test_results + [] end def perform_action From 9c83a32b97688820f03fcefd347e091d032ded8f Mon Sep 17 00:00:00 2001 From: Terry Brady Date: Tue, 22 Jul 2025 12:40:17 -0700 Subject: [PATCH 2/2] orphan and batch improvements --- mysql-ruby-lambda/Gemfile.lock | 8 ++--- src-admintool/Gemfile.lock | 20 +++++------ src-colladmin/Gemfile | 2 +- src-colladmin/Gemfile.lock | 42 +++++++++++------------ src-colladmin/actions/zookeeper_action.rb | 25 ++++++++++---- src-colladmin/lib/queue.rb | 7 ++++ src-common/Gemfile.lock | 8 ++--- src-testdriver/Gemfile.lock | 22 ++++++------ 8 files changed, 77 insertions(+), 57 deletions(-) diff --git a/mysql-ruby-lambda/Gemfile.lock b/mysql-ruby-lambda/Gemfile.lock index b6d9ebf..49cf720 100644 --- a/mysql-ruby-lambda/Gemfile.lock +++ b/mysql-ruby-lambda/Gemfile.lock @@ -9,7 +9,7 @@ GEM remote: https://rubygems.org/ specs: ast (2.4.3) - json (2.12.2) + json (2.13.0) language_server-protocol (3.17.0.5) lint_roller (1.1.0) mysql2 (0.5.6) @@ -21,7 +21,7 @@ GEM racc (1.8.1) rainbow (3.1.1) regexp_parser (2.10.0) - rubocop (1.77.0) + rubocop (1.78.0) json (~> 2.3) language_server-protocol (~> 3.17.0.2) lint_roller (~> 1.1.0) @@ -32,7 +32,7 @@ GEM rubocop-ast (>= 1.45.1, < 2.0) ruby-progressbar (~> 1.7) unicode-display_width (>= 2.4.0, < 4.0) - rubocop-ast (1.45.1) + rubocop-ast (1.46.0) parser (>= 3.3.7.2) prism (~> 1.4) ruby-progressbar (1.13.0) @@ -49,4 +49,4 @@ DEPENDENCIES uc3-ssm! BUNDLED WITH - 2.6.9 + 2.7.1 diff --git a/src-admintool/Gemfile.lock b/src-admintool/Gemfile.lock index 2fb26f7..e9b0038 100644 --- a/src-admintool/Gemfile.lock +++ b/src-admintool/Gemfile.lock @@ -10,26 +10,26 @@ GEM specs: ast (2.4.3) aws-eventstream (1.4.0) - aws-partitions (1.1124.0) - aws-sdk-core (3.226.2) + aws-partitions (1.1132.0) + aws-sdk-core (3.227.0) aws-eventstream (~> 1, >= 1.3.0) aws-partitions (~> 1, >= 1.992.0) aws-sigv4 (~> 1.9) base64 jmespath (~> 1, >= 1.6.1) logger - aws-sdk-kms (1.106.0) - aws-sdk-core (~> 3, >= 3.225.0) + aws-sdk-kms (1.107.0) + aws-sdk-core (~> 3, >= 3.227.0) aws-sigv4 (~> 1.5) - aws-sdk-s3 (1.191.0) - aws-sdk-core (~> 3, >= 3.225.0) + aws-sdk-s3 (1.194.0) + aws-sdk-core (~> 3, >= 3.227.0) aws-sdk-kms (~> 1) aws-sigv4 (~> 1.5) aws-sigv4 (1.12.1) aws-eventstream (~> 1, >= 1.0.2) base64 (0.3.0) jmespath (1.6.2) - json (2.12.2) + json (2.13.0) language_server-protocol (3.17.0.5) lint_roller (1.1.0) logger (1.7.0) @@ -42,7 +42,7 @@ GEM racc (1.8.1) rainbow (3.1.1) regexp_parser (2.10.0) - rubocop (1.77.0) + rubocop (1.78.0) json (~> 2.3) language_server-protocol (~> 3.17.0.2) lint_roller (~> 1.1.0) @@ -53,7 +53,7 @@ GEM rubocop-ast (>= 1.45.1, < 2.0) ruby-progressbar (~> 1.7) unicode-display_width (>= 2.4.0, < 4.0) - rubocop-ast (1.45.1) + rubocop-ast (1.46.0) parser (>= 3.3.7.2) prism (~> 1.4) ruby-progressbar (1.13.0) @@ -73,4 +73,4 @@ DEPENDENCIES uc3-ssm! BUNDLED WITH - 2.6.9 + 2.7.1 diff --git a/src-colladmin/Gemfile b/src-colladmin/Gemfile index 8fb97b7..7eedce8 100644 --- a/src-colladmin/Gemfile +++ b/src-colladmin/Gemfile @@ -12,7 +12,7 @@ gem 'aws-sdk-lambda' gem 'aws-sdk-s3' gem 'aws-sdk-ssm' gem 'csv' -gem 'mrt-zk', git: 'https://github.com/CDLUC3/mrt-zk.git', tag: '2.4.6' +gem 'mrt-zk', git: 'https://github.com/CDLUC3/mrt-zk.git', tag: '2.4.18' gem 'nokogiri' gem 'rest-client' gem 'rubocop' diff --git a/src-colladmin/Gemfile.lock b/src-colladmin/Gemfile.lock index 5619160..3c87150 100644 --- a/src-colladmin/Gemfile.lock +++ b/src-colladmin/Gemfile.lock @@ -1,7 +1,7 @@ GIT remote: https://github.com/CDLUC3/mrt-zk.git - revision: 52c79f6a0fceac0d7e257b3a16766a5080f808af - tag: 2.4.6 + revision: e7db85e7623bcb24f9d7dba1f7f14c1e26bc8940 + tag: 2.4.18 specs: mrt-zk (1.0.1) zk (~> 1.10.0) @@ -19,29 +19,29 @@ GEM specs: ast (2.4.3) aws-eventstream (1.4.0) - aws-partitions (1.1124.0) - aws-sdk-core (3.226.2) + aws-partitions (1.1132.0) + aws-sdk-core (3.227.0) aws-eventstream (~> 1, >= 1.3.0) aws-partitions (~> 1, >= 1.992.0) aws-sigv4 (~> 1.9) base64 jmespath (~> 1, >= 1.6.1) logger - aws-sdk-ec2 (1.534.0) - aws-sdk-core (~> 3, >= 3.225.0) + aws-sdk-ec2 (1.540.0) + aws-sdk-core (~> 3, >= 3.227.0) aws-sigv4 (~> 1.5) - aws-sdk-kms (1.106.0) - aws-sdk-core (~> 3, >= 3.225.0) + aws-sdk-kms (1.107.0) + aws-sdk-core (~> 3, >= 3.227.0) aws-sigv4 (~> 1.5) - aws-sdk-lambda (1.153.0) - aws-sdk-core (~> 3, >= 3.225.0) + aws-sdk-lambda (1.154.0) + aws-sdk-core (~> 3, >= 3.227.0) aws-sigv4 (~> 1.5) - aws-sdk-s3 (1.191.0) - aws-sdk-core (~> 3, >= 3.225.0) + aws-sdk-s3 (1.194.0) + aws-sdk-core (~> 3, >= 3.227.0) aws-sdk-kms (~> 1) aws-sigv4 (~> 1.5) - aws-sdk-ssm (1.197.0) - aws-sdk-core (~> 3, >= 3.225.0) + aws-sdk-ssm (1.199.0) + aws-sdk-core (~> 3, >= 3.227.0) aws-sigv4 (~> 1.5) aws-sigv4 (1.12.1) aws-eventstream (~> 1, >= 1.0.2) @@ -52,21 +52,21 @@ GEM http-cookie (1.0.8) domain_name (~> 0.5) jmespath (1.6.2) - json (2.12.2) + json (2.13.0) language_server-protocol (3.17.0.5) lint_roller (1.1.0) logger (1.7.0) mime-types (3.7.0) logger mime-types-data (~> 3.2025, >= 3.2025.0507) - mime-types-data (3.2025.0701) + mime-types-data (3.2025.0715) mini_portile2 (2.8.9) mysql2 (0.5.6) netrc (0.11.0) - nokogiri (1.18.8) + nokogiri (1.18.9) mini_portile2 (~> 2.8.2) racc (~> 1.4) - nokogiri (1.18.8-x86_64-linux-gnu) + nokogiri (1.18.9-x86_64-linux-gnu) racc (~> 1.4) parallel (1.27.0) parser (3.3.8.0) @@ -81,7 +81,7 @@ GEM http-cookie (>= 1.0.2, < 2.0) mime-types (>= 1.16, < 4.0) netrc (~> 0.8) - rubocop (1.77.0) + rubocop (1.78.0) json (~> 2.3) language_server-protocol (~> 3.17.0.2) lint_roller (~> 1.1.0) @@ -92,7 +92,7 @@ GEM rubocop-ast (>= 1.45.1, < 2.0) ruby-progressbar (~> 1.7) unicode-display_width (>= 2.4.0, < 4.0) - rubocop-ast (1.45.1) + rubocop-ast (1.46.0) parser (>= 3.3.7.2) prism (~> 1.4) ruby-progressbar (1.13.0) @@ -124,4 +124,4 @@ DEPENDENCIES zookeeper BUNDLED WITH - 2.6.9 + 2.7.1 diff --git a/src-colladmin/actions/zookeeper_action.rb b/src-colladmin/actions/zookeeper_action.rb index 25dc4ba..a5bfc27 100644 --- a/src-colladmin/actions/zookeeper_action.rb +++ b/src-colladmin/actions/zookeeper_action.rb @@ -84,25 +84,38 @@ def perform_action # Collection Admin Task class - see config/actions.yml for description class ZookeeperDumpAction < ZookeeperAction def initialize(config, action, path, myparams) + super @node_dump = MerrittZK::NodeDump.new(@zk, myparams) @zkpath = myparams.fetch('zkpath', '/') @mode = myparams.fetch('mode', 'data') - @full = false - @test_results = [] - @job_states_count = {} super end def data - @node_dump.listing + buf = '' + @node_dump.listing.each do |rec| + if rec.is_a?(Hash) + rec.each do |k, v| + buf += "#{k}\n" + unless v.to_s.empty? + buf += JSON.pretty_generate(v) + buf += "\n\n" + end + end + else + buf += "#{rec}\n" + end + break if buf.length > 500_000 + end + buf end end ## table version of the dump/test action class ZookeeperDumpTableAction < ZookeeperDumpAction def initialize(config, action, path, myparams) + myparams['mode'] = 'test' super - @mode = 'test' end def table_headers @@ -114,7 +127,7 @@ def table_types end def table_rows(_body) - [] + @node_dump.test_results end def perform_action diff --git a/src-colladmin/lib/queue.rb b/src-colladmin/lib/queue.rb index 45e9de6..6184e8e 100644 --- a/src-colladmin/lib/queue.rb +++ b/src-colladmin/lib/queue.rb @@ -206,6 +206,8 @@ def get_queue_node ## Queue representation of Batch objects class BatchQueueEntry < QueueJson + AGE_BATCHWARN = 3600 * 24 + @@placeholder = nil def self.placeholder @@placeholder = BatchQueueEntry.new({}) if @@placeholder.nil? @@ -262,6 +264,11 @@ def initialize(json) qs = get_value(:qstatus, '') st = 'INFO' + begin + st = 'WARN' if Time.now - get_value(:date, Time.now) > AGE_BATCHWARN + rescue StandardError => e + puts "Error processing submission date #{e}" + end st = 'FAIL' if qs == 'Failed' st = 'PASS' if qs == 'Completed' add_property( diff --git a/src-common/Gemfile.lock b/src-common/Gemfile.lock index 626e506..acea21d 100644 --- a/src-common/Gemfile.lock +++ b/src-common/Gemfile.lock @@ -12,7 +12,7 @@ GEM base64 (0.3.0) httpclient (2.9.0) mutex_m - json (2.12.2) + json (2.13.0) jwt (3.1.2) base64 language_server-protocol (3.17.0.5) @@ -29,7 +29,7 @@ GEM racc (1.8.1) rainbow (3.1.1) regexp_parser (2.10.0) - rubocop (1.77.0) + rubocop (1.78.0) json (~> 2.3) language_server-protocol (~> 3.17.0.2) lint_roller (~> 1.1.0) @@ -40,7 +40,7 @@ GEM rubocop-ast (>= 1.45.1, < 2.0) ruby-progressbar (~> 1.7) unicode-display_width (>= 2.4.0, < 4.0) - rubocop-ast (1.45.1) + rubocop-ast (1.46.0) parser (>= 3.3.7.2) prism (~> 1.4) ruby-progressbar (1.13.0) @@ -64,4 +64,4 @@ DEPENDENCIES uc3-ssm! BUNDLED WITH - 2.6.9 + 2.7.1 diff --git a/src-testdriver/Gemfile.lock b/src-testdriver/Gemfile.lock index b73dfac..5116b03 100644 --- a/src-testdriver/Gemfile.lock +++ b/src-testdriver/Gemfile.lock @@ -10,19 +10,19 @@ GEM specs: ast (2.4.3) aws-eventstream (1.4.0) - aws-partitions (1.1124.0) - aws-sdk-core (3.226.2) + aws-partitions (1.1132.0) + aws-sdk-core (3.227.0) aws-eventstream (~> 1, >= 1.3.0) aws-partitions (~> 1, >= 1.992.0) aws-sigv4 (~> 1.9) base64 jmespath (~> 1, >= 1.6.1) logger - aws-sdk-lambda (1.153.0) - aws-sdk-core (~> 3, >= 3.225.0) + aws-sdk-lambda (1.154.0) + aws-sdk-core (~> 3, >= 3.227.0) aws-sigv4 (~> 1.5) - aws-sdk-ssm (1.197.0) - aws-sdk-core (~> 3, >= 3.225.0) + aws-sdk-ssm (1.199.0) + aws-sdk-core (~> 3, >= 3.227.0) aws-sigv4 (~> 1.5) aws-sigv4 (1.12.1) aws-eventstream (~> 1, >= 1.0.2) @@ -31,12 +31,12 @@ GEM colorize (1.1.0) diff-lcs (1.6.2) jmespath (1.6.2) - json (2.12.2) + json (2.13.0) language_server-protocol (3.17.0.5) lint_roller (1.1.0) logger (1.7.0) mini_portile2 (2.8.9) - nokogiri (1.18.8) + nokogiri (1.18.9) mini_portile2 (~> 2.8.2) racc (~> 1.4) parallel (1.27.0) @@ -60,7 +60,7 @@ GEM diff-lcs (>= 1.2.0, < 2.0) rspec-support (~> 3.13.0) rspec-support (3.13.4) - rubocop (1.77.0) + rubocop (1.78.0) json (~> 2.3) language_server-protocol (~> 3.17.0.2) lint_roller (~> 1.1.0) @@ -71,7 +71,7 @@ GEM rubocop-ast (>= 1.45.1, < 2.0) ruby-progressbar (~> 1.7) unicode-display_width (>= 2.4.0, < 4.0) - rubocop-ast (1.45.1) + rubocop-ast (1.46.0) parser (>= 3.3.7.2) prism (~> 1.4) ruby-progressbar (1.13.0) @@ -95,4 +95,4 @@ DEPENDENCIES uc3-ssm! BUNDLED WITH - 2.6.9 + 2.7.1