From 4a2997f81a938a80361287a0bd419d4705e0cc42 Mon Sep 17 00:00:00 2001 From: Xiwei Pan Date: Sat, 28 Mar 2026 20:35:54 +0800 Subject: [PATCH 1/6] feat: add KthLargestMTuple model (issue #405) Add the Kth Largest m-Tuple counting problem (Garey & Johnson MP10). This is the first aggregate-only model using Value = Sum, which required a fix to the example_db model_specs_are_optimal test to gracefully handle models without witness support. Closes #405 Co-Authored-By: Claude Opus 4.6 (1M context) --- docs/paper/reductions.typ | 27 +++ problemreductions-cli/src/cli.rs | 1 + problemreductions-cli/src/commands/create.rs | 41 +++- src/models/misc/kth_largest_m_tuple.rs | 208 ++++++++++++++++++ src/models/misc/mod.rs | 3 + src/models/mod.rs | 16 +- src/unit_tests/example_db.rs | 32 +-- .../models/misc/kth_largest_m_tuple.rs | 165 ++++++++++++++ 8 files changed, 469 insertions(+), 24 deletions(-) create mode 100644 src/models/misc/kth_largest_m_tuple.rs create mode 100644 src/unit_tests/models/misc/kth_largest_m_tuple.rs diff --git a/docs/paper/reductions.typ b/docs/paper/reductions.typ index de79d7ed..2f567b18 100644 --- a/docs/paper/reductions.typ +++ b/docs/paper/reductions.typ @@ -194,6 +194,7 @@ "SumOfSquaresPartition": [Sum of Squares Partition], "TimetableDesign": [Timetable Design], "TwoDimensionalConsecutiveSets": [2-Dimensional Consecutive Sets], + "KthLargestMTuple": [$K$th Largest $m$-Tuple], ) // Definition label: "def:" — each definition block must have a matching label @@ -4626,6 +4627,32 @@ A classical NP-complete problem from Garey and Johnson @garey1979[Ch.~3, p.~76], ] } +#{ + let x = load-model-example("KthLargestMTuple") + let sets = x.instance.sets + let k = x.instance.k + let bound = x.instance.bound + let config = x.optimal_config + let m = sets.len() + // Count qualifying tuples by enumerating the Cartesian product + let total = sets.fold(1, (acc, s) => acc * s.len()) + [ + #problem-def("KthLargestMTuple")[ + Given $m$ finite sets $X_1, dots, X_m$ of positive integers, a bound $B in ZZ^+$, and a threshold $K in ZZ^+$, count the number of distinct $m$-tuples $(x_1, dots, x_m) in X_1 times dots.c times X_m$ satisfying $sum_(i=1)^m x_i >= B$. The answer is _yes_ iff this count is at least $K$. + ][ + The $K$th Largest $m$-Tuple problem is MP10 in Garey and Johnson's appendix @garey1979. It is NP-complete because the special case $m = 2$, $K = 1$ reduces from Subset Sum. In the general case, the only known exact approach is brute-force enumeration of all $product_(i=1)^m |X_i|$ tuples, so the registered catalog complexity is `total_tuples * num_sets`#footnote[No algorithm improving on brute-force is known for the general $K$th Largest $m$-Tuple problem.]. + + *Example.* Let $m = #m$, $B = #bound$, and $K = #k$ with sets #sets.enumerate().map(((i, s)) => [$X_#(i+1) = {#s.map(str).join(", ")}$]).join([, ]). The Cartesian product has $#total$ tuples. For instance, the tuple $(#config.enumerate().map(((i, c)) => str(sets.at(i).at(c))).join(", "))$ has sum $#config.enumerate().map(((i, c)) => sets.at(i).at(c)).sum() >= #bound$, contributing 1 to the count. In total, #k of the #total tuples satisfy the bound, so the answer is _yes_ (count $= K$). + + #pred-commands( + "pred create --example KthLargestMTuple -o kth-largest-m-tuple.json", + "pred solve kth-largest-m-tuple.json", + "pred evaluate kth-largest-m-tuple.json --config " + config.map(str).join(","), + ) + ] + ] +} + #{ let x = load-model-example("SequencingWithReleaseTimesAndDeadlines") let n = x.instance.lengths.len() diff --git a/problemreductions-cli/src/cli.rs b/problemreductions-cli/src/cli.rs index fe0b7bd3..bdecc538 100644 --- a/problemreductions-cli/src/cli.rs +++ b/problemreductions-cli/src/cli.rs @@ -248,6 +248,7 @@ Flags by problem type: CapacityAssignment --capacities, --cost-matrix, --delay-matrix, --delay-budget SubsetSum --sizes, --target ThreePartition --sizes, --bound + KthLargestMTuple --sets, --k, --bound SumOfSquaresPartition --sizes, --num-groups ExpectedRetrievalCost --probabilities, --num-sectors PaintShop --sequence diff --git a/problemreductions-cli/src/commands/create.rs b/problemreductions-cli/src/commands/create.rs index d295670c..d845053f 100644 --- a/problemreductions-cli/src/commands/create.rs +++ b/problemreductions-cli/src/commands/create.rs @@ -23,9 +23,9 @@ use problemreductions::models::misc::{ AdditionalKey, BinPacking, BoyceCoddNormalFormViolation, CapacityAssignment, CbqRelation, ConjunctiveBooleanQuery, ConsistencyOfDatabaseFrequencyTables, EnsembleComputation, ExpectedRetrievalCost, FlowShopScheduling, FrequencyTable, GroupingBySwapping, - JobShopScheduling, KnownValue, LongestCommonSubsequence, MinimumTardinessSequencing, - MultiprocessorScheduling, PaintShop, PartiallyOrderedKnapsack, QueryArg, - RectilinearPictureCompression, ResourceConstrainedScheduling, + JobShopScheduling, KnownValue, KthLargestMTuple, LongestCommonSubsequence, + MinimumTardinessSequencing, MultiprocessorScheduling, PaintShop, PartiallyOrderedKnapsack, + QueryArg, RectilinearPictureCompression, ResourceConstrainedScheduling, SchedulingWithIndividualDeadlines, SequencingToMinimizeMaximumCumulativeCost, SequencingToMinimizeWeightedCompletionTime, SequencingToMinimizeWeightedTardiness, SequencingWithReleaseTimesAndDeadlines, SequencingWithinIntervals, ShortestCommonSupersequence, @@ -718,6 +718,7 @@ fn example_for(canonical: &str, graph_type: Option<&str>) -> &'static str { } "SubsetSum" => "--sizes 3,7,1,8,2,4 --target 11", "ThreePartition" => "--sizes 4,5,6,4,6,5 --bound 15", + "KthLargestMTuple" => "--sets \"2,5,8;3,6;1,4,7\" --k 14 --bound 12", "BoyceCoddNormalFormViolation" => { "--n 6 --sets \"0,1:2;2:3;3,4:5\" --target 0,1,2,3,4,5" } @@ -2367,6 +2368,40 @@ pub fn create(args: &CreateArgs, out: &OutputConfig) -> Result<()> { ) } + // KthLargestMTuple + "KthLargestMTuple" => { + let sets_str = args.sets.as_deref().ok_or_else(|| { + anyhow::anyhow!( + "KthLargestMTuple requires --sets, --k, and --bound\n\n\ + Usage: pred create KthLargestMTuple --sets \"2,5,8;3,6;1,4,7\" --k 14 --bound 12" + ) + })?; + let k_val = args.k.ok_or_else(|| { + anyhow::anyhow!( + "KthLargestMTuple requires --k\n\n\ + Usage: pred create KthLargestMTuple --sets \"2,5,8;3,6;1,4,7\" --k 14 --bound 12" + ) + })?; + let bound = args.bound.ok_or_else(|| { + anyhow::anyhow!( + "KthLargestMTuple requires --bound\n\n\ + Usage: pred create KthLargestMTuple --sets \"2,5,8;3,6;1,4,7\" --k 14 --bound 12" + ) + })?; + let bound = u64::try_from(bound).map_err(|_| { + anyhow::anyhow!("KthLargestMTuple requires a positive integer --bound") + })?; + let sets: Vec> = sets_str + .split(';') + .map(|group| util::parse_comma_list(group)) + .collect::>()?; + ( + ser(KthLargestMTuple::try_new(sets, k_val as u64, bound) + .map_err(anyhow::Error::msg)?)?, + resolved_variant.clone(), + ) + } + // SumOfSquaresPartition "SumOfSquaresPartition" => { let sizes_str = args.sizes.as_deref().ok_or_else(|| { diff --git a/src/models/misc/kth_largest_m_tuple.rs b/src/models/misc/kth_largest_m_tuple.rs new file mode 100644 index 00000000..6b600a98 --- /dev/null +++ b/src/models/misc/kth_largest_m_tuple.rs @@ -0,0 +1,208 @@ +//! Kth Largest m-Tuple problem implementation. +//! +//! Given m sets of positive integers and thresholds K and B, count how many +//! distinct m-tuples (one element per set) have total size at least B. +//! The answer is YES iff the count is at least K. Garey & Johnson MP10. + +use crate::registry::{FieldInfo, ProblemSchemaEntry, ProblemSizeFieldEntry}; +use crate::traits::Problem; +use crate::types::Sum; +use serde::de::Error as _; +use serde::{Deserialize, Deserializer, Serialize}; + +inventory::submit! { + ProblemSchemaEntry { + name: "KthLargestMTuple", + display_name: "Kth Largest m-Tuple", + aliases: &[], + dimensions: &[], + module_path: module_path!(), + description: "Count m-tuples whose total size meets a bound and compare against a threshold K", + fields: &[ + FieldInfo { name: "sets", type_name: "Vec>", description: "m sets, each containing positive integer sizes" }, + FieldInfo { name: "k", type_name: "u64", description: "Threshold K (answer YES iff count >= K)" }, + FieldInfo { name: "bound", type_name: "u64", description: "Lower bound B on tuple sum" }, + ], + } +} + +inventory::submit! { + ProblemSizeFieldEntry { + name: "KthLargestMTuple", + fields: &["num_sets", "total_tuples"], + } +} + +/// The Kth Largest m-Tuple problem. +/// +/// Given sets `X_1, ..., X_m` of positive integers, a threshold `K`, and a +/// bound `B`, count how many distinct m-tuples `(x_1, ..., x_m)` in +/// `X_1 x ... x X_m` satisfy `sum(x_i) >= B`. The answer is YES iff the +/// count is at least `K`. +/// +/// # Representation +/// +/// Variable `i` selects an element from set `X_i`, ranging over `{0, ..., |X_i|-1}`. +/// `evaluate` returns `Sum(1)` if the tuple sum >= B, else `Sum(0)`. +/// The aggregate over all configurations gives the total count of qualifying tuples. +/// +/// # Example +/// +/// ``` +/// use problemreductions::models::misc::KthLargestMTuple; +/// use problemreductions::{Problem, Solver, BruteForce}; +/// +/// let problem = KthLargestMTuple::new( +/// vec![vec![2, 5, 8], vec![3, 6], vec![1, 4, 7]], +/// 14, +/// 12, +/// ); +/// let solver = BruteForce::new(); +/// let value = solver.solve(&problem); +/// // 14 of the 18 tuples have sum >= 12 +/// assert_eq!(value, problemreductions::types::Sum(14)); +/// ``` +#[derive(Debug, Clone, Serialize)] +pub struct KthLargestMTuple { + sets: Vec>, + k: u64, + bound: u64, +} + +impl KthLargestMTuple { + fn validate(sets: &[Vec], k: u64, bound: u64) -> Result<(), String> { + if sets.is_empty() { + return Err("KthLargestMTuple requires at least one set".to_string()); + } + if sets.iter().any(|s| s.is_empty()) { + return Err("Every set must be non-empty".to_string()); + } + if sets.iter().any(|s| s.contains(&0)) { + return Err("All sizes must be positive (> 0)".to_string()); + } + if k == 0 { + return Err("Threshold K must be positive".to_string()); + } + if bound == 0 { + return Err("Bound B must be positive".to_string()); + } + Ok(()) + } + + /// Try to create a new KthLargestMTuple instance. + pub fn try_new(sets: Vec>, k: u64, bound: u64) -> Result { + Self::validate(&sets, k, bound)?; + Ok(Self { sets, k, bound }) + } + + /// Create a new KthLargestMTuple instance. + /// + /// # Panics + /// + /// Panics if the inputs are invalid. + pub fn new(sets: Vec>, k: u64, bound: u64) -> Self { + Self::try_new(sets, k, bound).unwrap_or_else(|msg| panic!("{msg}")) + } + + /// Returns the sets. + pub fn sets(&self) -> &[Vec] { + &self.sets + } + + /// Returns the threshold K. + pub fn k(&self) -> u64 { + self.k + } + + /// Returns the bound B. + pub fn bound(&self) -> u64 { + self.bound + } + + /// Returns the number of sets (m). + pub fn num_sets(&self) -> usize { + self.sets.len() + } + + /// Returns the total number of m-tuples (product of set sizes). + pub fn total_tuples(&self) -> usize { + self.sets.iter().map(|s| s.len()).product() + } +} + +#[derive(Deserialize)] +struct KthLargestMTupleDef { + sets: Vec>, + k: u64, + bound: u64, +} + +impl<'de> Deserialize<'de> for KthLargestMTuple { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let data = KthLargestMTupleDef::deserialize(deserializer)?; + Self::try_new(data.sets, data.k, data.bound).map_err(D::Error::custom) + } +} + +impl Problem for KthLargestMTuple { + const NAME: &'static str = "KthLargestMTuple"; + type Value = Sum; + + fn variant() -> Vec<(&'static str, &'static str)> { + crate::variant_params![] + } + + fn dims(&self) -> Vec { + self.sets.iter().map(|s| s.len()).collect() + } + + fn evaluate(&self, config: &[usize]) -> Sum { + if config.len() != self.num_sets() { + return Sum(0); + } + for (i, &choice) in config.iter().enumerate() { + if choice >= self.sets[i].len() { + return Sum(0); + } + } + let total: u64 = config + .iter() + .enumerate() + .map(|(i, &choice)| self.sets[i][choice]) + .sum(); + if total >= self.bound { + Sum(1) + } else { + Sum(0) + } + } +} + +// Best known: brute-force enumeration of all tuples, O(total_tuples * num_sets). +// No sub-exponential exact algorithm is known for the general case. +crate::declare_variants! { + default KthLargestMTuple => "total_tuples * num_sets", +} + +#[cfg(feature = "example-db")] +pub(crate) fn canonical_model_example_specs() -> Vec { + // m=3, X_1={2,5,8}, X_2={3,6}, X_3={1,4,7}, B=12, K=14. + // 14 of 18 tuples have sum >= 12. The config [2,1,2] picks (8,6,7) with sum=21 >= 12. + vec![crate::example_db::specs::ModelExampleSpec { + id: "kth_largest_m_tuple", + instance: Box::new(KthLargestMTuple::new( + vec![vec![2, 5, 8], vec![3, 6], vec![1, 4, 7]], + 14, + 12, + )), + optimal_config: vec![2, 1, 2], + optimal_value: serde_json::json!(1), + }] +} + +#[cfg(test)] +#[path = "../../unit_tests/models/misc/kth_largest_m_tuple.rs"] +mod tests; diff --git a/src/models/misc/mod.rs b/src/models/misc/mod.rs index 491950d3..5e79c4d3 100644 --- a/src/models/misc/mod.rs +++ b/src/models/misc/mod.rs @@ -74,6 +74,7 @@ mod flow_shop_scheduling; mod grouping_by_swapping; mod job_shop_scheduling; mod knapsack; +mod kth_largest_m_tuple; mod longest_common_subsequence; mod minimum_tardiness_sequencing; mod multiprocessor_scheduling; @@ -114,6 +115,7 @@ pub use flow_shop_scheduling::FlowShopScheduling; pub use grouping_by_swapping::GroupingBySwapping; pub use job_shop_scheduling::JobShopScheduling; pub use knapsack::Knapsack; +pub use kth_largest_m_tuple::KthLargestMTuple; pub use longest_common_subsequence::LongestCommonSubsequence; pub use minimum_tardiness_sequencing::MinimumTardinessSequencing; pub use multiprocessor_scheduling::MultiprocessorScheduling; @@ -178,5 +180,6 @@ pub(crate) fn canonical_model_example_specs() -> Vec KthLargestMTuple { + // m=3, X_1={2,5,8}, X_2={3,6}, X_3={1,4,7}, B=12, K=14 + KthLargestMTuple::new(vec![vec![2, 5, 8], vec![3, 6], vec![1, 4, 7]], 14, 12) +} + +#[test] +fn test_kth_largest_m_tuple_creation() { + let p = example_problem(); + assert_eq!(p.sets().len(), 3); + assert_eq!(p.sets()[0], vec![2, 5, 8]); + assert_eq!(p.sets()[1], vec![3, 6]); + assert_eq!(p.sets()[2], vec![1, 4, 7]); + assert_eq!(p.k(), 14); + assert_eq!(p.bound(), 12); + assert_eq!(p.num_sets(), 3); + assert_eq!(p.total_tuples(), 18); + assert_eq!(p.dims(), vec![3, 2, 3]); + assert_eq!(p.num_variables(), 3); + assert_eq!(::NAME, "KthLargestMTuple"); + assert_eq!(::variant(), vec![]); +} + +#[test] +fn test_kth_largest_m_tuple_evaluate_qualifying_tuple() { + let p = example_problem(); + // (8,6,7) = sum 21 >= 12 -> Sum(1) + assert_eq!(p.evaluate(&[2, 1, 2]), Sum(1)); + // (5,6,4) = sum 15 >= 12 -> Sum(1) + assert_eq!(p.evaluate(&[1, 1, 1]), Sum(1)); +} + +#[test] +fn test_kth_largest_m_tuple_evaluate_non_qualifying_tuple() { + let p = example_problem(); + // (2,3,1) = sum 6 < 12 -> Sum(0) + assert_eq!(p.evaluate(&[0, 0, 0]), Sum(0)); + // (2,3,4) = sum 9 < 12 -> Sum(0) + assert_eq!(p.evaluate(&[0, 0, 1]), Sum(0)); +} + +#[test] +fn test_kth_largest_m_tuple_evaluate_invalid_configs() { + let p = example_problem(); + // Wrong length + assert_eq!(p.evaluate(&[0, 0]), Sum(0)); + assert_eq!(p.evaluate(&[0, 0, 0, 0]), Sum(0)); + // Out of range + assert_eq!(p.evaluate(&[3, 0, 0]), Sum(0)); + assert_eq!(p.evaluate(&[0, 2, 0]), Sum(0)); + assert_eq!(p.evaluate(&[0, 0, 3]), Sum(0)); +} + +#[test] +fn test_kth_largest_m_tuple_solver() { + let p = example_problem(); + let solver = BruteForce::new(); + let value = solver.solve(&p); + // 14 of 18 tuples qualify (sum >= 12) + assert_eq!(value, Sum(14)); +} + +#[test] +fn test_kth_largest_m_tuple_boundary_example() { + // K=14 and count=14, so the answer is YES (count >= K) + let p = example_problem(); + let solver = BruteForce::new(); + let count = solver.solve(&p); + assert_eq!(count, Sum(14)); + assert!(count.0 >= p.k()); +} + +#[test] +fn test_kth_largest_m_tuple_serialization_round_trip() { + let p = example_problem(); + let json = serde_json::to_value(&p).unwrap(); + assert_eq!( + json, + serde_json::json!({ + "sets": [[2, 5, 8], [3, 6], [1, 4, 7]], + "k": 14, + "bound": 12, + }) + ); + + let restored: KthLargestMTuple = serde_json::from_value(json).unwrap(); + assert_eq!(restored.sets(), p.sets()); + assert_eq!(restored.k(), p.k()); + assert_eq!(restored.bound(), p.bound()); +} + +#[test] +fn test_kth_largest_m_tuple_deserialization_rejects_invalid() { + let invalid_cases = [ + // Empty sets + serde_json::json!({ "sets": [], "k": 1, "bound": 5 }), + // A set is empty + serde_json::json!({ "sets": [[1, 2], []], "k": 1, "bound": 3 }), + // Zero size + serde_json::json!({ "sets": [[0, 2]], "k": 1, "bound": 1 }), + // K=0 + serde_json::json!({ "sets": [[1, 2]], "k": 0, "bound": 1 }), + // Bound=0 + serde_json::json!({ "sets": [[1, 2]], "k": 1, "bound": 0 }), + ]; + + for invalid in invalid_cases { + assert!(serde_json::from_value::(invalid).is_err()); + } +} + +#[test] +#[should_panic(expected = "at least one set")] +fn test_kth_largest_m_tuple_empty_sets_panics() { + KthLargestMTuple::new(vec![], 1, 5); +} + +#[test] +#[should_panic(expected = "non-empty")] +fn test_kth_largest_m_tuple_empty_inner_set_panics() { + KthLargestMTuple::new(vec![vec![1, 2], vec![]], 1, 3); +} + +#[test] +#[should_panic(expected = "positive")] +fn test_kth_largest_m_tuple_zero_size_panics() { + KthLargestMTuple::new(vec![vec![0, 2]], 1, 1); +} + +#[test] +fn test_kth_largest_m_tuple_paper_example() { + // Issue example: m=3, X_1={2,5,8}, X_2={3,6}, X_3={1,4,7}, B=12, K=14 + // 14 of 18 tuples have sum >= 12 -> YES (boundary case: count == K) + let p = example_problem(); + let solver = BruteForce::new(); + let count = solver.solve(&p); + assert_eq!(count, Sum(14)); + + // Verify a specific qualifying tuple: (8,6,7), sum=21 + assert_eq!(p.evaluate(&[2, 1, 2]), Sum(1)); + + // Verify a specific non-qualifying tuple: (2,3,1), sum=6 + assert_eq!(p.evaluate(&[0, 0, 0]), Sum(0)); +} + +#[test] +fn test_kth_largest_m_tuple_all_qualify() { + // Two sets each with one large element, B=1 -> all tuples qualify + let p = KthLargestMTuple::new(vec![vec![5], vec![10]], 1, 1); + let solver = BruteForce::new(); + assert_eq!(solver.solve(&p), Sum(1)); + assert_eq!(p.total_tuples(), 1); +} + +#[test] +fn test_kth_largest_m_tuple_none_qualify() { + // B is larger than any possible sum + let p = KthLargestMTuple::new(vec![vec![1, 2], vec![1, 2]], 1, 100); + let solver = BruteForce::new(); + assert_eq!(solver.solve(&p), Sum(0)); +} From c6ebfe1fd00ba2f960f22b0a8693007eed559df3 Mon Sep 17 00:00:00 2001 From: Xiwei Pan Date: Sun, 29 Mar 2026 22:10:32 +0800 Subject: [PATCH 2/6] Fix formatting after merge conflict resolution Co-Authored-By: Claude Opus 4.6 (1M context) --- src/models/mod.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/models/mod.rs b/src/models/mod.rs index bbbdecdd..6b7eb13e 100644 --- a/src/models/mod.rs +++ b/src/models/mod.rs @@ -42,12 +42,12 @@ pub use misc::{ JobShopScheduling, Knapsack, KthLargestMTuple, LongestCommonSubsequence, MinimumTardinessSequencing, MultiprocessorScheduling, PaintShop, Partition, PrecedenceConstrainedScheduling, ProductionPlanning, QueryArg, RectilinearPictureCompression, - ResourceConstrainedScheduling, - SchedulingWithIndividualDeadlines, SequencingToMinimizeMaximumCumulativeCost, - SequencingToMinimizeWeightedCompletionTime, SequencingToMinimizeWeightedTardiness, - SequencingWithReleaseTimesAndDeadlines, SequencingWithinIntervals, ShortestCommonSupersequence, - StackerCrane, StaffScheduling, StringToStringCorrection, SubsetSum, SumOfSquaresPartition, - Term, ThreePartition, TimetableDesign, + ResourceConstrainedScheduling, SchedulingWithIndividualDeadlines, + SequencingToMinimizeMaximumCumulativeCost, SequencingToMinimizeWeightedCompletionTime, + SequencingToMinimizeWeightedTardiness, SequencingWithReleaseTimesAndDeadlines, + SequencingWithinIntervals, ShortestCommonSupersequence, StackerCrane, StaffScheduling, + StringToStringCorrection, SubsetSum, SumOfSquaresPartition, Term, ThreePartition, + TimetableDesign, }; pub use set::{ ComparativeContainment, ConsecutiveSets, ExactCoverBy3Sets, MaximumSetPacking, From 8fcef5f515b3935139f860cef2a847d5e5852075 Mon Sep 17 00:00:00 2001 From: Xiwei Pan Date: Sun, 29 Mar 2026 22:24:26 +0800 Subject: [PATCH 3/6] Fix paper: correct PP-completeness claim and broken solve command - Replace false NP-completeness claim with accurate PP-completeness description citing Haase & Kiefer (2016) - Fix `pred solve` command to use `--solver brute-force` (no ILP path) - Add haase2016 BibTeX entry Co-Authored-By: Claude Opus 4.6 (1M context) --- docs/paper/reductions.typ | 4 ++-- docs/paper/references.bib | 10 ++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/docs/paper/reductions.typ b/docs/paper/reductions.typ index c6f02a5f..06762a8c 100644 --- a/docs/paper/reductions.typ +++ b/docs/paper/reductions.typ @@ -4642,13 +4642,13 @@ A classical NP-complete problem from Garey and Johnson @garey1979[Ch.~3, p.~76], #problem-def("KthLargestMTuple")[ Given $m$ finite sets $X_1, dots, X_m$ of positive integers, a bound $B in ZZ^+$, and a threshold $K in ZZ^+$, count the number of distinct $m$-tuples $(x_1, dots, x_m) in X_1 times dots.c times X_m$ satisfying $sum_(i=1)^m x_i >= B$. The answer is _yes_ iff this count is at least $K$. ][ - The $K$th Largest $m$-Tuple problem is MP10 in Garey and Johnson's appendix @garey1979. It is NP-complete because the special case $m = 2$, $K = 1$ reduces from Subset Sum. In the general case, the only known exact approach is brute-force enumeration of all $product_(i=1)^m |X_i|$ tuples, so the registered catalog complexity is `total_tuples * num_sets`#footnote[No algorithm improving on brute-force is known for the general $K$th Largest $m$-Tuple problem.]. + The $K$th Largest $m$-Tuple problem is MP10 in Garey and Johnson's appendix @garey1979. It is _not known to be in NP_, because a "yes" certificate may need to exhibit $K$ qualifying tuples and $K$ can be exponentially large. The problem is PP-complete under polynomial-time Turing reductions @haase2016, though the special case $m = 2$, $K = 1$ is NP-complete via reduction from Subset Sum. In the general case, the only known exact approach is brute-force enumeration of all $product_(i=1)^m |X_i|$ tuples, so the registered catalog complexity is `total_tuples * num_sets`#footnote[No algorithm improving on brute-force is known for the general $K$th Largest $m$-Tuple problem.]. *Example.* Let $m = #m$, $B = #bound$, and $K = #k$ with sets #sets.enumerate().map(((i, s)) => [$X_#(i+1) = {#s.map(str).join(", ")}$]).join([, ]). The Cartesian product has $#total$ tuples. For instance, the tuple $(#config.enumerate().map(((i, c)) => str(sets.at(i).at(c))).join(", "))$ has sum $#config.enumerate().map(((i, c)) => sets.at(i).at(c)).sum() >= #bound$, contributing 1 to the count. In total, #k of the #total tuples satisfy the bound, so the answer is _yes_ (count $= K$). #pred-commands( "pred create --example KthLargestMTuple -o kth-largest-m-tuple.json", - "pred solve kth-largest-m-tuple.json", + "pred solve kth-largest-m-tuple.json --solver brute-force", "pred evaluate kth-largest-m-tuple.json --config " + config.map(str).join(","), ) ] diff --git a/docs/paper/references.bib b/docs/paper/references.bib index 3b63d53a..3826185e 100644 --- a/docs/paper/references.bib +++ b/docs/paper/references.bib @@ -1454,3 +1454,13 @@ @techreport{plaisted1976 number = {STAN-CS-76-583}, year = {1976} } + +@article{haase2016, + author = {Haase, Christoph and Kiefer, Stefan}, + title = {The Complexity of the {K}th Largest Subset Problem and Related Problems}, + journal = {Information Processing Letters}, + volume = {116}, + number = {2}, + pages = {111--115}, + year = {2016} +} From 5439cd86abd382af848751f324b33c7995c7f18b Mon Sep 17 00:00:00 2001 From: Xiwei Pan Date: Sun, 29 Mar 2026 22:35:05 +0800 Subject: [PATCH 4/6] Remove unused K field from KthLargestMTuple MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit K was stored but never used in evaluate() — the model is a pure counting problem. The G&J decision version (count >= K?) is noted in the paper but not part of the computational model. Co-Authored-By: Claude Opus 4.6 (1M context) --- docs/paper/reductions.typ | 7 ++-- problemreductions-cli/src/cli.rs | 2 +- problemreductions-cli/src/commands/create.rs | 17 +++----- src/models/misc/kth_largest_m_tuple.rs | 42 +++++++------------ .../models/misc/kth_largest_m_tuple.rs | 33 ++++++--------- 5 files changed, 36 insertions(+), 65 deletions(-) diff --git a/docs/paper/reductions.typ b/docs/paper/reductions.typ index 06762a8c..e6476ee1 100644 --- a/docs/paper/reductions.typ +++ b/docs/paper/reductions.typ @@ -4632,7 +4632,6 @@ A classical NP-complete problem from Garey and Johnson @garey1979[Ch.~3, p.~76], #{ let x = load-model-example("KthLargestMTuple") let sets = x.instance.sets - let k = x.instance.k let bound = x.instance.bound let config = x.optimal_config let m = sets.len() @@ -4640,11 +4639,11 @@ A classical NP-complete problem from Garey and Johnson @garey1979[Ch.~3, p.~76], let total = sets.fold(1, (acc, s) => acc * s.len()) [ #problem-def("KthLargestMTuple")[ - Given $m$ finite sets $X_1, dots, X_m$ of positive integers, a bound $B in ZZ^+$, and a threshold $K in ZZ^+$, count the number of distinct $m$-tuples $(x_1, dots, x_m) in X_1 times dots.c times X_m$ satisfying $sum_(i=1)^m x_i >= B$. The answer is _yes_ iff this count is at least $K$. + Given $m$ finite sets $X_1, dots, X_m$ of positive integers and a bound $B in ZZ^+$, count the number of distinct $m$-tuples $(x_1, dots, x_m) in X_1 times dots.c times X_m$ satisfying $sum_(i=1)^m x_i >= B$. ][ - The $K$th Largest $m$-Tuple problem is MP10 in Garey and Johnson's appendix @garey1979. It is _not known to be in NP_, because a "yes" certificate may need to exhibit $K$ qualifying tuples and $K$ can be exponentially large. The problem is PP-complete under polynomial-time Turing reductions @haase2016, though the special case $m = 2$, $K = 1$ is NP-complete via reduction from Subset Sum. In the general case, the only known exact approach is brute-force enumeration of all $product_(i=1)^m |X_i|$ tuples, so the registered catalog complexity is `total_tuples * num_sets`#footnote[No algorithm improving on brute-force is known for the general $K$th Largest $m$-Tuple problem.]. + The $K$th Largest $m$-Tuple problem is MP10 in Garey and Johnson's appendix @garey1979. The original G&J decision formulation asks whether this count is at least a given threshold $K$; here we model the pure counting version. The problem is PP-complete under polynomial-time Turing reductions @haase2016, though the special case $m = 2$, $K = 1$ is NP-complete via reduction from Subset Sum. In the general case, the only known exact approach is brute-force enumeration of all $product_(i=1)^m |X_i|$ tuples, so the registered catalog complexity is `total_tuples * num_sets`#footnote[No algorithm improving on brute-force is known for the general $K$th Largest $m$-Tuple problem.]. - *Example.* Let $m = #m$, $B = #bound$, and $K = #k$ with sets #sets.enumerate().map(((i, s)) => [$X_#(i+1) = {#s.map(str).join(", ")}$]).join([, ]). The Cartesian product has $#total$ tuples. For instance, the tuple $(#config.enumerate().map(((i, c)) => str(sets.at(i).at(c))).join(", "))$ has sum $#config.enumerate().map(((i, c)) => sets.at(i).at(c)).sum() >= #bound$, contributing 1 to the count. In total, #k of the #total tuples satisfy the bound, so the answer is _yes_ (count $= K$). + *Example.* Let $m = #m$ and $B = #bound$ with sets #sets.enumerate().map(((i, s)) => [$X_#(i+1) = {#s.map(str).join(", ")}$]).join([, ]). The Cartesian product has $#total$ tuples. For instance, the tuple $(#config.enumerate().map(((i, c)) => str(sets.at(i).at(c))).join(", "))$ has sum $#config.enumerate().map(((i, c)) => sets.at(i).at(c)).sum() >= #bound$, contributing 1 to the count. #pred-commands( "pred create --example KthLargestMTuple -o kth-largest-m-tuple.json", diff --git a/problemreductions-cli/src/cli.rs b/problemreductions-cli/src/cli.rs index 91bb0ed9..49770677 100644 --- a/problemreductions-cli/src/cli.rs +++ b/problemreductions-cli/src/cli.rs @@ -249,7 +249,7 @@ Flags by problem type: ProductionPlanning --num-periods, --demands, --capacities, --setup-costs, --production-costs, --inventory-costs, --cost-bound SubsetSum --sizes, --target ThreePartition --sizes, --bound - KthLargestMTuple --sets, --k, --bound + KthLargestMTuple --sets, --bound SumOfSquaresPartition --sizes, --num-groups ExpectedRetrievalCost --probabilities, --num-sectors PaintShop --sequence diff --git a/problemreductions-cli/src/commands/create.rs b/problemreductions-cli/src/commands/create.rs index d30f73d8..eb2f026f 100644 --- a/problemreductions-cli/src/commands/create.rs +++ b/problemreductions-cli/src/commands/create.rs @@ -725,7 +725,7 @@ fn example_for(canonical: &str, graph_type: Option<&str>) -> &'static str { } "SubsetSum" => "--sizes 3,7,1,8,2,4 --target 11", "ThreePartition" => "--sizes 4,5,6,4,6,5 --bound 15", - "KthLargestMTuple" => "--sets \"2,5,8;3,6;1,4,7\" --k 14 --bound 12", + "KthLargestMTuple" => "--sets \"2,5,8;3,6;1,4,7\" --bound 12", "BoyceCoddNormalFormViolation" => { "--n 6 --sets \"0,1:2;2:3;3,4:5\" --target 0,1,2,3,4,5" } @@ -2379,20 +2379,14 @@ pub fn create(args: &CreateArgs, out: &OutputConfig) -> Result<()> { "KthLargestMTuple" => { let sets_str = args.sets.as_deref().ok_or_else(|| { anyhow::anyhow!( - "KthLargestMTuple requires --sets, --k, and --bound\n\n\ - Usage: pred create KthLargestMTuple --sets \"2,5,8;3,6;1,4,7\" --k 14 --bound 12" - ) - })?; - let k_val = args.k.ok_or_else(|| { - anyhow::anyhow!( - "KthLargestMTuple requires --k\n\n\ - Usage: pred create KthLargestMTuple --sets \"2,5,8;3,6;1,4,7\" --k 14 --bound 12" + "KthLargestMTuple requires --sets and --bound\n\n\ + Usage: pred create KthLargestMTuple --sets \"2,5,8;3,6;1,4,7\" --bound 12" ) })?; let bound = args.bound.ok_or_else(|| { anyhow::anyhow!( "KthLargestMTuple requires --bound\n\n\ - Usage: pred create KthLargestMTuple --sets \"2,5,8;3,6;1,4,7\" --k 14 --bound 12" + Usage: pred create KthLargestMTuple --sets \"2,5,8;3,6;1,4,7\" --bound 12" ) })?; let bound = u64::try_from(bound).map_err(|_| { @@ -2403,8 +2397,7 @@ pub fn create(args: &CreateArgs, out: &OutputConfig) -> Result<()> { .map(|group| util::parse_comma_list(group)) .collect::>()?; ( - ser(KthLargestMTuple::try_new(sets, k_val as u64, bound) - .map_err(anyhow::Error::msg)?)?, + ser(KthLargestMTuple::try_new(sets, bound).map_err(anyhow::Error::msg)?)?, resolved_variant.clone(), ) } diff --git a/src/models/misc/kth_largest_m_tuple.rs b/src/models/misc/kth_largest_m_tuple.rs index 6b600a98..1717948c 100644 --- a/src/models/misc/kth_largest_m_tuple.rs +++ b/src/models/misc/kth_largest_m_tuple.rs @@ -1,8 +1,8 @@ //! Kth Largest m-Tuple problem implementation. //! -//! Given m sets of positive integers and thresholds K and B, count how many +//! Given m sets of positive integers and a bound B, count how many //! distinct m-tuples (one element per set) have total size at least B. -//! The answer is YES iff the count is at least K. Garey & Johnson MP10. +//! Garey & Johnson MP10. use crate::registry::{FieldInfo, ProblemSchemaEntry, ProblemSizeFieldEntry}; use crate::traits::Problem; @@ -17,10 +17,9 @@ inventory::submit! { aliases: &[], dimensions: &[], module_path: module_path!(), - description: "Count m-tuples whose total size meets a bound and compare against a threshold K", + description: "Count m-tuples whose total size meets a bound", fields: &[ FieldInfo { name: "sets", type_name: "Vec>", description: "m sets, each containing positive integer sizes" }, - FieldInfo { name: "k", type_name: "u64", description: "Threshold K (answer YES iff count >= K)" }, FieldInfo { name: "bound", type_name: "u64", description: "Lower bound B on tuple sum" }, ], } @@ -35,10 +34,9 @@ inventory::submit! { /// The Kth Largest m-Tuple problem. /// -/// Given sets `X_1, ..., X_m` of positive integers, a threshold `K`, and a -/// bound `B`, count how many distinct m-tuples `(x_1, ..., x_m)` in -/// `X_1 x ... x X_m` satisfy `sum(x_i) >= B`. The answer is YES iff the -/// count is at least `K`. +/// Given sets `X_1, ..., X_m` of positive integers and a bound `B`, count how +/// many distinct m-tuples `(x_1, ..., x_m)` in `X_1 x ... x X_m` satisfy +/// `sum(x_i) >= B`. /// /// # Representation /// @@ -54,7 +52,6 @@ inventory::submit! { /// /// let problem = KthLargestMTuple::new( /// vec![vec![2, 5, 8], vec![3, 6], vec![1, 4, 7]], -/// 14, /// 12, /// ); /// let solver = BruteForce::new(); @@ -65,12 +62,11 @@ inventory::submit! { #[derive(Debug, Clone, Serialize)] pub struct KthLargestMTuple { sets: Vec>, - k: u64, bound: u64, } impl KthLargestMTuple { - fn validate(sets: &[Vec], k: u64, bound: u64) -> Result<(), String> { + fn validate(sets: &[Vec], bound: u64) -> Result<(), String> { if sets.is_empty() { return Err("KthLargestMTuple requires at least one set".to_string()); } @@ -80,9 +76,6 @@ impl KthLargestMTuple { if sets.iter().any(|s| s.contains(&0)) { return Err("All sizes must be positive (> 0)".to_string()); } - if k == 0 { - return Err("Threshold K must be positive".to_string()); - } if bound == 0 { return Err("Bound B must be positive".to_string()); } @@ -90,9 +83,9 @@ impl KthLargestMTuple { } /// Try to create a new KthLargestMTuple instance. - pub fn try_new(sets: Vec>, k: u64, bound: u64) -> Result { - Self::validate(&sets, k, bound)?; - Ok(Self { sets, k, bound }) + pub fn try_new(sets: Vec>, bound: u64) -> Result { + Self::validate(&sets, bound)?; + Ok(Self { sets, bound }) } /// Create a new KthLargestMTuple instance. @@ -100,8 +93,8 @@ impl KthLargestMTuple { /// # Panics /// /// Panics if the inputs are invalid. - pub fn new(sets: Vec>, k: u64, bound: u64) -> Self { - Self::try_new(sets, k, bound).unwrap_or_else(|msg| panic!("{msg}")) + pub fn new(sets: Vec>, bound: u64) -> Self { + Self::try_new(sets, bound).unwrap_or_else(|msg| panic!("{msg}")) } /// Returns the sets. @@ -109,11 +102,6 @@ impl KthLargestMTuple { &self.sets } - /// Returns the threshold K. - pub fn k(&self) -> u64 { - self.k - } - /// Returns the bound B. pub fn bound(&self) -> u64 { self.bound @@ -133,7 +121,6 @@ impl KthLargestMTuple { #[derive(Deserialize)] struct KthLargestMTupleDef { sets: Vec>, - k: u64, bound: u64, } @@ -143,7 +130,7 @@ impl<'de> Deserialize<'de> for KthLargestMTuple { D: Deserializer<'de>, { let data = KthLargestMTupleDef::deserialize(deserializer)?; - Self::try_new(data.sets, data.k, data.bound).map_err(D::Error::custom) + Self::try_new(data.sets, data.bound).map_err(D::Error::custom) } } @@ -189,13 +176,12 @@ crate::declare_variants! { #[cfg(feature = "example-db")] pub(crate) fn canonical_model_example_specs() -> Vec { - // m=3, X_1={2,5,8}, X_2={3,6}, X_3={1,4,7}, B=12, K=14. + // m=3, X_1={2,5,8}, X_2={3,6}, X_3={1,4,7}, B=12. // 14 of 18 tuples have sum >= 12. The config [2,1,2] picks (8,6,7) with sum=21 >= 12. vec![crate::example_db::specs::ModelExampleSpec { id: "kth_largest_m_tuple", instance: Box::new(KthLargestMTuple::new( vec![vec![2, 5, 8], vec![3, 6], vec![1, 4, 7]], - 14, 12, )), optimal_config: vec![2, 1, 2], diff --git a/src/unit_tests/models/misc/kth_largest_m_tuple.rs b/src/unit_tests/models/misc/kth_largest_m_tuple.rs index 1b26eb83..3a8b5214 100644 --- a/src/unit_tests/models/misc/kth_largest_m_tuple.rs +++ b/src/unit_tests/models/misc/kth_largest_m_tuple.rs @@ -4,8 +4,8 @@ use crate::traits::Problem; use crate::types::Sum; fn example_problem() -> KthLargestMTuple { - // m=3, X_1={2,5,8}, X_2={3,6}, X_3={1,4,7}, B=12, K=14 - KthLargestMTuple::new(vec![vec![2, 5, 8], vec![3, 6], vec![1, 4, 7]], 14, 12) + // m=3, X_1={2,5,8}, X_2={3,6}, X_3={1,4,7}, B=12 + KthLargestMTuple::new(vec![vec![2, 5, 8], vec![3, 6], vec![1, 4, 7]], 12) } #[test] @@ -15,7 +15,6 @@ fn test_kth_largest_m_tuple_creation() { assert_eq!(p.sets()[0], vec![2, 5, 8]); assert_eq!(p.sets()[1], vec![3, 6]); assert_eq!(p.sets()[2], vec![1, 4, 7]); - assert_eq!(p.k(), 14); assert_eq!(p.bound(), 12); assert_eq!(p.num_sets(), 3); assert_eq!(p.total_tuples(), 18); @@ -66,12 +65,10 @@ fn test_kth_largest_m_tuple_solver() { #[test] fn test_kth_largest_m_tuple_boundary_example() { - // K=14 and count=14, so the answer is YES (count >= K) let p = example_problem(); let solver = BruteForce::new(); let count = solver.solve(&p); assert_eq!(count, Sum(14)); - assert!(count.0 >= p.k()); } #[test] @@ -82,14 +79,12 @@ fn test_kth_largest_m_tuple_serialization_round_trip() { json, serde_json::json!({ "sets": [[2, 5, 8], [3, 6], [1, 4, 7]], - "k": 14, "bound": 12, }) ); let restored: KthLargestMTuple = serde_json::from_value(json).unwrap(); assert_eq!(restored.sets(), p.sets()); - assert_eq!(restored.k(), p.k()); assert_eq!(restored.bound(), p.bound()); } @@ -97,15 +92,13 @@ fn test_kth_largest_m_tuple_serialization_round_trip() { fn test_kth_largest_m_tuple_deserialization_rejects_invalid() { let invalid_cases = [ // Empty sets - serde_json::json!({ "sets": [], "k": 1, "bound": 5 }), + serde_json::json!({ "sets": [], "bound": 5 }), // A set is empty - serde_json::json!({ "sets": [[1, 2], []], "k": 1, "bound": 3 }), + serde_json::json!({ "sets": [[1, 2], []], "bound": 3 }), // Zero size - serde_json::json!({ "sets": [[0, 2]], "k": 1, "bound": 1 }), - // K=0 - serde_json::json!({ "sets": [[1, 2]], "k": 0, "bound": 1 }), + serde_json::json!({ "sets": [[0, 2]], "bound": 1 }), // Bound=0 - serde_json::json!({ "sets": [[1, 2]], "k": 1, "bound": 0 }), + serde_json::json!({ "sets": [[1, 2]], "bound": 0 }), ]; for invalid in invalid_cases { @@ -116,25 +109,25 @@ fn test_kth_largest_m_tuple_deserialization_rejects_invalid() { #[test] #[should_panic(expected = "at least one set")] fn test_kth_largest_m_tuple_empty_sets_panics() { - KthLargestMTuple::new(vec![], 1, 5); + KthLargestMTuple::new(vec![], 5); } #[test] #[should_panic(expected = "non-empty")] fn test_kth_largest_m_tuple_empty_inner_set_panics() { - KthLargestMTuple::new(vec![vec![1, 2], vec![]], 1, 3); + KthLargestMTuple::new(vec![vec![1, 2], vec![]], 3); } #[test] #[should_panic(expected = "positive")] fn test_kth_largest_m_tuple_zero_size_panics() { - KthLargestMTuple::new(vec![vec![0, 2]], 1, 1); + KthLargestMTuple::new(vec![vec![0, 2]], 1); } #[test] fn test_kth_largest_m_tuple_paper_example() { - // Issue example: m=3, X_1={2,5,8}, X_2={3,6}, X_3={1,4,7}, B=12, K=14 - // 14 of 18 tuples have sum >= 12 -> YES (boundary case: count == K) + // Issue example: m=3, X_1={2,5,8}, X_2={3,6}, X_3={1,4,7}, B=12 + // 14 of 18 tuples have sum >= 12 let p = example_problem(); let solver = BruteForce::new(); let count = solver.solve(&p); @@ -150,7 +143,7 @@ fn test_kth_largest_m_tuple_paper_example() { #[test] fn test_kth_largest_m_tuple_all_qualify() { // Two sets each with one large element, B=1 -> all tuples qualify - let p = KthLargestMTuple::new(vec![vec![5], vec![10]], 1, 1); + let p = KthLargestMTuple::new(vec![vec![5], vec![10]], 1); let solver = BruteForce::new(); assert_eq!(solver.solve(&p), Sum(1)); assert_eq!(p.total_tuples(), 1); @@ -159,7 +152,7 @@ fn test_kth_largest_m_tuple_all_qualify() { #[test] fn test_kth_largest_m_tuple_none_qualify() { // B is larger than any possible sum - let p = KthLargestMTuple::new(vec![vec![1, 2], vec![1, 2]], 1, 100); + let p = KthLargestMTuple::new(vec![vec![1, 2], vec![1, 2]], 100); let solver = BruteForce::new(); assert_eq!(solver.solve(&p), Sum(0)); } From 512de7d27a1140211cabc37f021435d279cfea08 Mon Sep 17 00:00:00 2001 From: Xiwei Pan Date: Sun, 29 Mar 2026 22:53:45 +0800 Subject: [PATCH 5/6] Simplify paper paragraph: remove K references from counting model Co-Authored-By: Claude Opus 4.6 (1M context) --- docs/paper/reductions.typ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/paper/reductions.typ b/docs/paper/reductions.typ index e6476ee1..0e1f0855 100644 --- a/docs/paper/reductions.typ +++ b/docs/paper/reductions.typ @@ -4641,7 +4641,7 @@ A classical NP-complete problem from Garey and Johnson @garey1979[Ch.~3, p.~76], #problem-def("KthLargestMTuple")[ Given $m$ finite sets $X_1, dots, X_m$ of positive integers and a bound $B in ZZ^+$, count the number of distinct $m$-tuples $(x_1, dots, x_m) in X_1 times dots.c times X_m$ satisfying $sum_(i=1)^m x_i >= B$. ][ - The $K$th Largest $m$-Tuple problem is MP10 in Garey and Johnson's appendix @garey1979. The original G&J decision formulation asks whether this count is at least a given threshold $K$; here we model the pure counting version. The problem is PP-complete under polynomial-time Turing reductions @haase2016, though the special case $m = 2$, $K = 1$ is NP-complete via reduction from Subset Sum. In the general case, the only known exact approach is brute-force enumeration of all $product_(i=1)^m |X_i|$ tuples, so the registered catalog complexity is `total_tuples * num_sets`#footnote[No algorithm improving on brute-force is known for the general $K$th Largest $m$-Tuple problem.]. + This is the counting version of MP10 in Garey and Johnson's appendix @garey1979. The corresponding decision problem (is the count at least some threshold?) is PP-complete under polynomial-time Turing reductions @haase2016 and not known to be in NP. The only known exact approach is brute-force enumeration of all $product_(i=1)^m |X_i|$ tuples, so the registered catalog complexity is `total_tuples * num_sets`#footnote[No algorithm improving on brute-force is known for the general $K$th Largest $m$-Tuple problem.]. *Example.* Let $m = #m$ and $B = #bound$ with sets #sets.enumerate().map(((i, s)) => [$X_#(i+1) = {#s.map(str).join(", ")}$]).join([, ]). The Cartesian product has $#total$ tuples. For instance, the tuple $(#config.enumerate().map(((i, c)) => str(sets.at(i).at(c))).join(", "))$ has sum $#config.enumerate().map(((i, c)) => sets.at(i).at(c)).sum() >= #bound$, contributing 1 to the count. From 743414be417f9c7e6ce84568b6e6235369afe2fb Mon Sep 17 00:00:00 2001 From: Xiwei Pan Date: Sun, 29 Mar 2026 23:09:29 +0800 Subject: [PATCH 6/6] Revert K removal: keep K field for G&J decision formulation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The K threshold is needed for the standard PARTITION → KthLargestMTuple reduction (G&J R86). Without K, the counting version has no known many-one reductions — only Turing reductions exist. Retains the paper fixes: PP-completeness claim, --solver brute-force. Co-Authored-By: Claude Opus 4.6 (1M context) --- docs/paper/reductions.typ | 7 ++-- problemreductions-cli/src/cli.rs | 2 +- problemreductions-cli/src/commands/create.rs | 17 +++++--- src/models/misc/kth_largest_m_tuple.rs | 42 ++++++++++++------- .../models/misc/kth_largest_m_tuple.rs | 33 +++++++++------ 5 files changed, 65 insertions(+), 36 deletions(-) diff --git a/docs/paper/reductions.typ b/docs/paper/reductions.typ index 0e1f0855..06762a8c 100644 --- a/docs/paper/reductions.typ +++ b/docs/paper/reductions.typ @@ -4632,6 +4632,7 @@ A classical NP-complete problem from Garey and Johnson @garey1979[Ch.~3, p.~76], #{ let x = load-model-example("KthLargestMTuple") let sets = x.instance.sets + let k = x.instance.k let bound = x.instance.bound let config = x.optimal_config let m = sets.len() @@ -4639,11 +4640,11 @@ A classical NP-complete problem from Garey and Johnson @garey1979[Ch.~3, p.~76], let total = sets.fold(1, (acc, s) => acc * s.len()) [ #problem-def("KthLargestMTuple")[ - Given $m$ finite sets $X_1, dots, X_m$ of positive integers and a bound $B in ZZ^+$, count the number of distinct $m$-tuples $(x_1, dots, x_m) in X_1 times dots.c times X_m$ satisfying $sum_(i=1)^m x_i >= B$. + Given $m$ finite sets $X_1, dots, X_m$ of positive integers, a bound $B in ZZ^+$, and a threshold $K in ZZ^+$, count the number of distinct $m$-tuples $(x_1, dots, x_m) in X_1 times dots.c times X_m$ satisfying $sum_(i=1)^m x_i >= B$. The answer is _yes_ iff this count is at least $K$. ][ - This is the counting version of MP10 in Garey and Johnson's appendix @garey1979. The corresponding decision problem (is the count at least some threshold?) is PP-complete under polynomial-time Turing reductions @haase2016 and not known to be in NP. The only known exact approach is brute-force enumeration of all $product_(i=1)^m |X_i|$ tuples, so the registered catalog complexity is `total_tuples * num_sets`#footnote[No algorithm improving on brute-force is known for the general $K$th Largest $m$-Tuple problem.]. + The $K$th Largest $m$-Tuple problem is MP10 in Garey and Johnson's appendix @garey1979. It is _not known to be in NP_, because a "yes" certificate may need to exhibit $K$ qualifying tuples and $K$ can be exponentially large. The problem is PP-complete under polynomial-time Turing reductions @haase2016, though the special case $m = 2$, $K = 1$ is NP-complete via reduction from Subset Sum. In the general case, the only known exact approach is brute-force enumeration of all $product_(i=1)^m |X_i|$ tuples, so the registered catalog complexity is `total_tuples * num_sets`#footnote[No algorithm improving on brute-force is known for the general $K$th Largest $m$-Tuple problem.]. - *Example.* Let $m = #m$ and $B = #bound$ with sets #sets.enumerate().map(((i, s)) => [$X_#(i+1) = {#s.map(str).join(", ")}$]).join([, ]). The Cartesian product has $#total$ tuples. For instance, the tuple $(#config.enumerate().map(((i, c)) => str(sets.at(i).at(c))).join(", "))$ has sum $#config.enumerate().map(((i, c)) => sets.at(i).at(c)).sum() >= #bound$, contributing 1 to the count. + *Example.* Let $m = #m$, $B = #bound$, and $K = #k$ with sets #sets.enumerate().map(((i, s)) => [$X_#(i+1) = {#s.map(str).join(", ")}$]).join([, ]). The Cartesian product has $#total$ tuples. For instance, the tuple $(#config.enumerate().map(((i, c)) => str(sets.at(i).at(c))).join(", "))$ has sum $#config.enumerate().map(((i, c)) => sets.at(i).at(c)).sum() >= #bound$, contributing 1 to the count. In total, #k of the #total tuples satisfy the bound, so the answer is _yes_ (count $= K$). #pred-commands( "pred create --example KthLargestMTuple -o kth-largest-m-tuple.json", diff --git a/problemreductions-cli/src/cli.rs b/problemreductions-cli/src/cli.rs index 49770677..91bb0ed9 100644 --- a/problemreductions-cli/src/cli.rs +++ b/problemreductions-cli/src/cli.rs @@ -249,7 +249,7 @@ Flags by problem type: ProductionPlanning --num-periods, --demands, --capacities, --setup-costs, --production-costs, --inventory-costs, --cost-bound SubsetSum --sizes, --target ThreePartition --sizes, --bound - KthLargestMTuple --sets, --bound + KthLargestMTuple --sets, --k, --bound SumOfSquaresPartition --sizes, --num-groups ExpectedRetrievalCost --probabilities, --num-sectors PaintShop --sequence diff --git a/problemreductions-cli/src/commands/create.rs b/problemreductions-cli/src/commands/create.rs index eb2f026f..d30f73d8 100644 --- a/problemreductions-cli/src/commands/create.rs +++ b/problemreductions-cli/src/commands/create.rs @@ -725,7 +725,7 @@ fn example_for(canonical: &str, graph_type: Option<&str>) -> &'static str { } "SubsetSum" => "--sizes 3,7,1,8,2,4 --target 11", "ThreePartition" => "--sizes 4,5,6,4,6,5 --bound 15", - "KthLargestMTuple" => "--sets \"2,5,8;3,6;1,4,7\" --bound 12", + "KthLargestMTuple" => "--sets \"2,5,8;3,6;1,4,7\" --k 14 --bound 12", "BoyceCoddNormalFormViolation" => { "--n 6 --sets \"0,1:2;2:3;3,4:5\" --target 0,1,2,3,4,5" } @@ -2379,14 +2379,20 @@ pub fn create(args: &CreateArgs, out: &OutputConfig) -> Result<()> { "KthLargestMTuple" => { let sets_str = args.sets.as_deref().ok_or_else(|| { anyhow::anyhow!( - "KthLargestMTuple requires --sets and --bound\n\n\ - Usage: pred create KthLargestMTuple --sets \"2,5,8;3,6;1,4,7\" --bound 12" + "KthLargestMTuple requires --sets, --k, and --bound\n\n\ + Usage: pred create KthLargestMTuple --sets \"2,5,8;3,6;1,4,7\" --k 14 --bound 12" + ) + })?; + let k_val = args.k.ok_or_else(|| { + anyhow::anyhow!( + "KthLargestMTuple requires --k\n\n\ + Usage: pred create KthLargestMTuple --sets \"2,5,8;3,6;1,4,7\" --k 14 --bound 12" ) })?; let bound = args.bound.ok_or_else(|| { anyhow::anyhow!( "KthLargestMTuple requires --bound\n\n\ - Usage: pred create KthLargestMTuple --sets \"2,5,8;3,6;1,4,7\" --bound 12" + Usage: pred create KthLargestMTuple --sets \"2,5,8;3,6;1,4,7\" --k 14 --bound 12" ) })?; let bound = u64::try_from(bound).map_err(|_| { @@ -2397,7 +2403,8 @@ pub fn create(args: &CreateArgs, out: &OutputConfig) -> Result<()> { .map(|group| util::parse_comma_list(group)) .collect::>()?; ( - ser(KthLargestMTuple::try_new(sets, bound).map_err(anyhow::Error::msg)?)?, + ser(KthLargestMTuple::try_new(sets, k_val as u64, bound) + .map_err(anyhow::Error::msg)?)?, resolved_variant.clone(), ) } diff --git a/src/models/misc/kth_largest_m_tuple.rs b/src/models/misc/kth_largest_m_tuple.rs index 1717948c..6b600a98 100644 --- a/src/models/misc/kth_largest_m_tuple.rs +++ b/src/models/misc/kth_largest_m_tuple.rs @@ -1,8 +1,8 @@ //! Kth Largest m-Tuple problem implementation. //! -//! Given m sets of positive integers and a bound B, count how many +//! Given m sets of positive integers and thresholds K and B, count how many //! distinct m-tuples (one element per set) have total size at least B. -//! Garey & Johnson MP10. +//! The answer is YES iff the count is at least K. Garey & Johnson MP10. use crate::registry::{FieldInfo, ProblemSchemaEntry, ProblemSizeFieldEntry}; use crate::traits::Problem; @@ -17,9 +17,10 @@ inventory::submit! { aliases: &[], dimensions: &[], module_path: module_path!(), - description: "Count m-tuples whose total size meets a bound", + description: "Count m-tuples whose total size meets a bound and compare against a threshold K", fields: &[ FieldInfo { name: "sets", type_name: "Vec>", description: "m sets, each containing positive integer sizes" }, + FieldInfo { name: "k", type_name: "u64", description: "Threshold K (answer YES iff count >= K)" }, FieldInfo { name: "bound", type_name: "u64", description: "Lower bound B on tuple sum" }, ], } @@ -34,9 +35,10 @@ inventory::submit! { /// The Kth Largest m-Tuple problem. /// -/// Given sets `X_1, ..., X_m` of positive integers and a bound `B`, count how -/// many distinct m-tuples `(x_1, ..., x_m)` in `X_1 x ... x X_m` satisfy -/// `sum(x_i) >= B`. +/// Given sets `X_1, ..., X_m` of positive integers, a threshold `K`, and a +/// bound `B`, count how many distinct m-tuples `(x_1, ..., x_m)` in +/// `X_1 x ... x X_m` satisfy `sum(x_i) >= B`. The answer is YES iff the +/// count is at least `K`. /// /// # Representation /// @@ -52,6 +54,7 @@ inventory::submit! { /// /// let problem = KthLargestMTuple::new( /// vec![vec![2, 5, 8], vec![3, 6], vec![1, 4, 7]], +/// 14, /// 12, /// ); /// let solver = BruteForce::new(); @@ -62,11 +65,12 @@ inventory::submit! { #[derive(Debug, Clone, Serialize)] pub struct KthLargestMTuple { sets: Vec>, + k: u64, bound: u64, } impl KthLargestMTuple { - fn validate(sets: &[Vec], bound: u64) -> Result<(), String> { + fn validate(sets: &[Vec], k: u64, bound: u64) -> Result<(), String> { if sets.is_empty() { return Err("KthLargestMTuple requires at least one set".to_string()); } @@ -76,6 +80,9 @@ impl KthLargestMTuple { if sets.iter().any(|s| s.contains(&0)) { return Err("All sizes must be positive (> 0)".to_string()); } + if k == 0 { + return Err("Threshold K must be positive".to_string()); + } if bound == 0 { return Err("Bound B must be positive".to_string()); } @@ -83,9 +90,9 @@ impl KthLargestMTuple { } /// Try to create a new KthLargestMTuple instance. - pub fn try_new(sets: Vec>, bound: u64) -> Result { - Self::validate(&sets, bound)?; - Ok(Self { sets, bound }) + pub fn try_new(sets: Vec>, k: u64, bound: u64) -> Result { + Self::validate(&sets, k, bound)?; + Ok(Self { sets, k, bound }) } /// Create a new KthLargestMTuple instance. @@ -93,8 +100,8 @@ impl KthLargestMTuple { /// # Panics /// /// Panics if the inputs are invalid. - pub fn new(sets: Vec>, bound: u64) -> Self { - Self::try_new(sets, bound).unwrap_or_else(|msg| panic!("{msg}")) + pub fn new(sets: Vec>, k: u64, bound: u64) -> Self { + Self::try_new(sets, k, bound).unwrap_or_else(|msg| panic!("{msg}")) } /// Returns the sets. @@ -102,6 +109,11 @@ impl KthLargestMTuple { &self.sets } + /// Returns the threshold K. + pub fn k(&self) -> u64 { + self.k + } + /// Returns the bound B. pub fn bound(&self) -> u64 { self.bound @@ -121,6 +133,7 @@ impl KthLargestMTuple { #[derive(Deserialize)] struct KthLargestMTupleDef { sets: Vec>, + k: u64, bound: u64, } @@ -130,7 +143,7 @@ impl<'de> Deserialize<'de> for KthLargestMTuple { D: Deserializer<'de>, { let data = KthLargestMTupleDef::deserialize(deserializer)?; - Self::try_new(data.sets, data.bound).map_err(D::Error::custom) + Self::try_new(data.sets, data.k, data.bound).map_err(D::Error::custom) } } @@ -176,12 +189,13 @@ crate::declare_variants! { #[cfg(feature = "example-db")] pub(crate) fn canonical_model_example_specs() -> Vec { - // m=3, X_1={2,5,8}, X_2={3,6}, X_3={1,4,7}, B=12. + // m=3, X_1={2,5,8}, X_2={3,6}, X_3={1,4,7}, B=12, K=14. // 14 of 18 tuples have sum >= 12. The config [2,1,2] picks (8,6,7) with sum=21 >= 12. vec![crate::example_db::specs::ModelExampleSpec { id: "kth_largest_m_tuple", instance: Box::new(KthLargestMTuple::new( vec![vec![2, 5, 8], vec![3, 6], vec![1, 4, 7]], + 14, 12, )), optimal_config: vec![2, 1, 2], diff --git a/src/unit_tests/models/misc/kth_largest_m_tuple.rs b/src/unit_tests/models/misc/kth_largest_m_tuple.rs index 3a8b5214..1b26eb83 100644 --- a/src/unit_tests/models/misc/kth_largest_m_tuple.rs +++ b/src/unit_tests/models/misc/kth_largest_m_tuple.rs @@ -4,8 +4,8 @@ use crate::traits::Problem; use crate::types::Sum; fn example_problem() -> KthLargestMTuple { - // m=3, X_1={2,5,8}, X_2={3,6}, X_3={1,4,7}, B=12 - KthLargestMTuple::new(vec![vec![2, 5, 8], vec![3, 6], vec![1, 4, 7]], 12) + // m=3, X_1={2,5,8}, X_2={3,6}, X_3={1,4,7}, B=12, K=14 + KthLargestMTuple::new(vec![vec![2, 5, 8], vec![3, 6], vec![1, 4, 7]], 14, 12) } #[test] @@ -15,6 +15,7 @@ fn test_kth_largest_m_tuple_creation() { assert_eq!(p.sets()[0], vec![2, 5, 8]); assert_eq!(p.sets()[1], vec![3, 6]); assert_eq!(p.sets()[2], vec![1, 4, 7]); + assert_eq!(p.k(), 14); assert_eq!(p.bound(), 12); assert_eq!(p.num_sets(), 3); assert_eq!(p.total_tuples(), 18); @@ -65,10 +66,12 @@ fn test_kth_largest_m_tuple_solver() { #[test] fn test_kth_largest_m_tuple_boundary_example() { + // K=14 and count=14, so the answer is YES (count >= K) let p = example_problem(); let solver = BruteForce::new(); let count = solver.solve(&p); assert_eq!(count, Sum(14)); + assert!(count.0 >= p.k()); } #[test] @@ -79,12 +82,14 @@ fn test_kth_largest_m_tuple_serialization_round_trip() { json, serde_json::json!({ "sets": [[2, 5, 8], [3, 6], [1, 4, 7]], + "k": 14, "bound": 12, }) ); let restored: KthLargestMTuple = serde_json::from_value(json).unwrap(); assert_eq!(restored.sets(), p.sets()); + assert_eq!(restored.k(), p.k()); assert_eq!(restored.bound(), p.bound()); } @@ -92,13 +97,15 @@ fn test_kth_largest_m_tuple_serialization_round_trip() { fn test_kth_largest_m_tuple_deserialization_rejects_invalid() { let invalid_cases = [ // Empty sets - serde_json::json!({ "sets": [], "bound": 5 }), + serde_json::json!({ "sets": [], "k": 1, "bound": 5 }), // A set is empty - serde_json::json!({ "sets": [[1, 2], []], "bound": 3 }), + serde_json::json!({ "sets": [[1, 2], []], "k": 1, "bound": 3 }), // Zero size - serde_json::json!({ "sets": [[0, 2]], "bound": 1 }), + serde_json::json!({ "sets": [[0, 2]], "k": 1, "bound": 1 }), + // K=0 + serde_json::json!({ "sets": [[1, 2]], "k": 0, "bound": 1 }), // Bound=0 - serde_json::json!({ "sets": [[1, 2]], "bound": 0 }), + serde_json::json!({ "sets": [[1, 2]], "k": 1, "bound": 0 }), ]; for invalid in invalid_cases { @@ -109,25 +116,25 @@ fn test_kth_largest_m_tuple_deserialization_rejects_invalid() { #[test] #[should_panic(expected = "at least one set")] fn test_kth_largest_m_tuple_empty_sets_panics() { - KthLargestMTuple::new(vec![], 5); + KthLargestMTuple::new(vec![], 1, 5); } #[test] #[should_panic(expected = "non-empty")] fn test_kth_largest_m_tuple_empty_inner_set_panics() { - KthLargestMTuple::new(vec![vec![1, 2], vec![]], 3); + KthLargestMTuple::new(vec![vec![1, 2], vec![]], 1, 3); } #[test] #[should_panic(expected = "positive")] fn test_kth_largest_m_tuple_zero_size_panics() { - KthLargestMTuple::new(vec![vec![0, 2]], 1); + KthLargestMTuple::new(vec![vec![0, 2]], 1, 1); } #[test] fn test_kth_largest_m_tuple_paper_example() { - // Issue example: m=3, X_1={2,5,8}, X_2={3,6}, X_3={1,4,7}, B=12 - // 14 of 18 tuples have sum >= 12 + // Issue example: m=3, X_1={2,5,8}, X_2={3,6}, X_3={1,4,7}, B=12, K=14 + // 14 of 18 tuples have sum >= 12 -> YES (boundary case: count == K) let p = example_problem(); let solver = BruteForce::new(); let count = solver.solve(&p); @@ -143,7 +150,7 @@ fn test_kth_largest_m_tuple_paper_example() { #[test] fn test_kth_largest_m_tuple_all_qualify() { // Two sets each with one large element, B=1 -> all tuples qualify - let p = KthLargestMTuple::new(vec![vec![5], vec![10]], 1); + let p = KthLargestMTuple::new(vec![vec![5], vec![10]], 1, 1); let solver = BruteForce::new(); assert_eq!(solver.solve(&p), Sum(1)); assert_eq!(p.total_tuples(), 1); @@ -152,7 +159,7 @@ fn test_kth_largest_m_tuple_all_qualify() { #[test] fn test_kth_largest_m_tuple_none_qualify() { // B is larger than any possible sum - let p = KthLargestMTuple::new(vec![vec![1, 2], vec![1, 2]], 100); + let p = KthLargestMTuple::new(vec![vec![1, 2], vec![1, 2]], 1, 100); let solver = BruteForce::new(); assert_eq!(solver.solve(&p), Sum(0)); }