diff --git a/docs/paper/reductions.typ b/docs/paper/reductions.typ index 9fb6e543..08bf49aa 100644 --- a/docs/paper/reductions.typ +++ b/docs/paper/reductions.typ @@ -199,6 +199,7 @@ "SumOfSquaresPartition": [Sum of Squares Partition], "TimetableDesign": [Timetable Design], "TwoDimensionalConsecutiveSets": [2-Dimensional Consecutive Sets], + "KthLargestMTuple": [$K$th Largest $m$-Tuple], ) // Definition label: "def:" — each definition block must have a matching label @@ -4705,6 +4706,32 @@ A classical NP-complete problem from Garey and Johnson @garey1979[Ch.~3, p.~76], ] } +#{ + let x = load-model-example("KthLargestMTuple") + let sets = x.instance.sets + let k = x.instance.k + let bound = x.instance.bound + let config = x.optimal_config + let m = sets.len() + // Count qualifying tuples by enumerating the Cartesian product + let total = sets.fold(1, (acc, s) => acc * s.len()) + [ + #problem-def("KthLargestMTuple")[ + Given $m$ finite sets $X_1, dots, X_m$ of positive integers, a bound $B in ZZ^+$, and a threshold $K in ZZ^+$, count the number of distinct $m$-tuples $(x_1, dots, x_m) in X_1 times dots.c times X_m$ satisfying $sum_(i=1)^m x_i >= B$. The answer is _yes_ iff this count is at least $K$. + ][ + The $K$th Largest $m$-Tuple problem is MP10 in Garey and Johnson's appendix @garey1979. It is _not known to be in NP_, because a "yes" certificate may need to exhibit $K$ qualifying tuples and $K$ can be exponentially large. The problem is PP-complete under polynomial-time Turing reductions @haase2016, though the special case $m = 2$, $K = 1$ is NP-complete via reduction from Subset Sum. In the general case, the only known exact approach is brute-force enumeration of all $product_(i=1)^m |X_i|$ tuples, so the registered catalog complexity is `total_tuples * num_sets`#footnote[No algorithm improving on brute-force is known for the general $K$th Largest $m$-Tuple problem.]. + + *Example.* Let $m = #m$, $B = #bound$, and $K = #k$ with sets #sets.enumerate().map(((i, s)) => [$X_#(i+1) = {#s.map(str).join(", ")}$]).join([, ]). The Cartesian product has $#total$ tuples. For instance, the tuple $(#config.enumerate().map(((i, c)) => str(sets.at(i).at(c))).join(", "))$ has sum $#config.enumerate().map(((i, c)) => sets.at(i).at(c)).sum() >= #bound$, contributing 1 to the count. In total, #k of the #total tuples satisfy the bound, so the answer is _yes_ (count $= K$). + + #pred-commands( + "pred create --example KthLargestMTuple -o kth-largest-m-tuple.json", + "pred solve kth-largest-m-tuple.json --solver brute-force", + "pred evaluate kth-largest-m-tuple.json --config " + config.map(str).join(","), + ) + ] + ] +} + #{ let x = load-model-example("SequencingWithReleaseTimesAndDeadlines") let n = x.instance.lengths.len() diff --git a/docs/paper/references.bib b/docs/paper/references.bib index f9f3064f..ac226799 100644 --- a/docs/paper/references.bib +++ b/docs/paper/references.bib @@ -1455,6 +1455,16 @@ @techreport{plaisted1976 year = {1976} } +@article{haase2016, + author = {Haase, Christoph and Kiefer, Stefan}, + title = {The Complexity of the {K}th Largest Subset Problem and Related Problems}, + journal = {Information Processing Letters}, + volume = {116}, + number = {2}, + pages = {111--115}, + year = {2016} +} + @article{Murty1972, author = {Murty, Katta G.}, title = {A fundamental problem in linear inequalities with applications to the travelling salesman problem}, diff --git a/problemreductions-cli/src/cli.rs b/problemreductions-cli/src/cli.rs index 35484abd..5d95281d 100644 --- a/problemreductions-cli/src/cli.rs +++ b/problemreductions-cli/src/cli.rs @@ -249,6 +249,7 @@ Flags by problem type: ProductionPlanning --num-periods, --demands, --capacities, --setup-costs, --production-costs, --inventory-costs, --cost-bound SubsetSum --sizes, --target ThreePartition --sizes, --bound + KthLargestMTuple --sets, --k, --bound QuadraticDiophantineEquations --coeff-a, --coeff-b, --coeff-c SumOfSquaresPartition --sizes, --num-groups ExpectedRetrievalCost --probabilities, --num-sectors diff --git a/problemreductions-cli/src/commands/create.rs b/problemreductions-cli/src/commands/create.rs index fb22dba6..28d8a138 100644 --- a/problemreductions-cli/src/commands/create.rs +++ b/problemreductions-cli/src/commands/create.rs @@ -24,9 +24,9 @@ use problemreductions::models::misc::{ AdditionalKey, BinPacking, BoyceCoddNormalFormViolation, CapacityAssignment, CbqRelation, ConjunctiveBooleanQuery, ConsistencyOfDatabaseFrequencyTables, EnsembleComputation, ExpectedRetrievalCost, FlowShopScheduling, FrequencyTable, GroupingBySwapping, - JobShopScheduling, KnownValue, LongestCommonSubsequence, MinimumTardinessSequencing, - MultiprocessorScheduling, PaintShop, PartiallyOrderedKnapsack, ProductionPlanning, QueryArg, - RectilinearPictureCompression, ResourceConstrainedScheduling, + JobShopScheduling, KnownValue, KthLargestMTuple, LongestCommonSubsequence, + MinimumTardinessSequencing, MultiprocessorScheduling, PaintShop, PartiallyOrderedKnapsack, + ProductionPlanning, QueryArg, RectilinearPictureCompression, ResourceConstrainedScheduling, SchedulingWithIndividualDeadlines, SequencingToMinimizeMaximumCumulativeCost, SequencingToMinimizeWeightedCompletionTime, SequencingToMinimizeWeightedTardiness, SequencingWithReleaseTimesAndDeadlines, SequencingWithinIntervals, ShortestCommonSupersequence, @@ -732,6 +732,7 @@ fn example_for(canonical: &str, graph_type: Option<&str>) -> &'static str { "IntegerKnapsack" => "--sizes 3,4,5,2,7 --values 4,5,7,3,9 --capacity 15", "SubsetSum" => "--sizes 3,7,1,8,2,4 --target 11", "ThreePartition" => "--sizes 4,5,6,4,6,5 --bound 15", + "KthLargestMTuple" => "--sets \"2,5,8;3,6;1,4,7\" --k 14 --bound 12", "QuadraticDiophantineEquations" => "--coeff-a 3 --coeff-b 5 --coeff-c 53", "BoyceCoddNormalFormViolation" => { "--n 6 --sets \"0,1:2;2:3;3,4:5\" --target 0,1,2,3,4,5" @@ -2423,6 +2424,40 @@ pub fn create(args: &CreateArgs, out: &OutputConfig) -> Result<()> { ) } + // KthLargestMTuple + "KthLargestMTuple" => { + let sets_str = args.sets.as_deref().ok_or_else(|| { + anyhow::anyhow!( + "KthLargestMTuple requires --sets, --k, and --bound\n\n\ + Usage: pred create KthLargestMTuple --sets \"2,5,8;3,6;1,4,7\" --k 14 --bound 12" + ) + })?; + let k_val = args.k.ok_or_else(|| { + anyhow::anyhow!( + "KthLargestMTuple requires --k\n\n\ + Usage: pred create KthLargestMTuple --sets \"2,5,8;3,6;1,4,7\" --k 14 --bound 12" + ) + })?; + let bound = args.bound.ok_or_else(|| { + anyhow::anyhow!( + "KthLargestMTuple requires --bound\n\n\ + Usage: pred create KthLargestMTuple --sets \"2,5,8;3,6;1,4,7\" --k 14 --bound 12" + ) + })?; + let bound = u64::try_from(bound).map_err(|_| { + anyhow::anyhow!("KthLargestMTuple requires a positive integer --bound") + })?; + let sets: Vec> = sets_str + .split(';') + .map(|group| util::parse_comma_list(group)) + .collect::>()?; + ( + ser(KthLargestMTuple::try_new(sets, k_val as u64, bound) + .map_err(anyhow::Error::msg)?)?, + resolved_variant.clone(), + ) + } + // QuadraticDiophantineEquations "QuadraticDiophantineEquations" => { let a = args.coeff_a.ok_or_else(|| { diff --git a/src/models/misc/kth_largest_m_tuple.rs b/src/models/misc/kth_largest_m_tuple.rs new file mode 100644 index 00000000..6b600a98 --- /dev/null +++ b/src/models/misc/kth_largest_m_tuple.rs @@ -0,0 +1,208 @@ +//! Kth Largest m-Tuple problem implementation. +//! +//! Given m sets of positive integers and thresholds K and B, count how many +//! distinct m-tuples (one element per set) have total size at least B. +//! The answer is YES iff the count is at least K. Garey & Johnson MP10. + +use crate::registry::{FieldInfo, ProblemSchemaEntry, ProblemSizeFieldEntry}; +use crate::traits::Problem; +use crate::types::Sum; +use serde::de::Error as _; +use serde::{Deserialize, Deserializer, Serialize}; + +inventory::submit! { + ProblemSchemaEntry { + name: "KthLargestMTuple", + display_name: "Kth Largest m-Tuple", + aliases: &[], + dimensions: &[], + module_path: module_path!(), + description: "Count m-tuples whose total size meets a bound and compare against a threshold K", + fields: &[ + FieldInfo { name: "sets", type_name: "Vec>", description: "m sets, each containing positive integer sizes" }, + FieldInfo { name: "k", type_name: "u64", description: "Threshold K (answer YES iff count >= K)" }, + FieldInfo { name: "bound", type_name: "u64", description: "Lower bound B on tuple sum" }, + ], + } +} + +inventory::submit! { + ProblemSizeFieldEntry { + name: "KthLargestMTuple", + fields: &["num_sets", "total_tuples"], + } +} + +/// The Kth Largest m-Tuple problem. +/// +/// Given sets `X_1, ..., X_m` of positive integers, a threshold `K`, and a +/// bound `B`, count how many distinct m-tuples `(x_1, ..., x_m)` in +/// `X_1 x ... x X_m` satisfy `sum(x_i) >= B`. The answer is YES iff the +/// count is at least `K`. +/// +/// # Representation +/// +/// Variable `i` selects an element from set `X_i`, ranging over `{0, ..., |X_i|-1}`. +/// `evaluate` returns `Sum(1)` if the tuple sum >= B, else `Sum(0)`. +/// The aggregate over all configurations gives the total count of qualifying tuples. +/// +/// # Example +/// +/// ``` +/// use problemreductions::models::misc::KthLargestMTuple; +/// use problemreductions::{Problem, Solver, BruteForce}; +/// +/// let problem = KthLargestMTuple::new( +/// vec![vec![2, 5, 8], vec![3, 6], vec![1, 4, 7]], +/// 14, +/// 12, +/// ); +/// let solver = BruteForce::new(); +/// let value = solver.solve(&problem); +/// // 14 of the 18 tuples have sum >= 12 +/// assert_eq!(value, problemreductions::types::Sum(14)); +/// ``` +#[derive(Debug, Clone, Serialize)] +pub struct KthLargestMTuple { + sets: Vec>, + k: u64, + bound: u64, +} + +impl KthLargestMTuple { + fn validate(sets: &[Vec], k: u64, bound: u64) -> Result<(), String> { + if sets.is_empty() { + return Err("KthLargestMTuple requires at least one set".to_string()); + } + if sets.iter().any(|s| s.is_empty()) { + return Err("Every set must be non-empty".to_string()); + } + if sets.iter().any(|s| s.contains(&0)) { + return Err("All sizes must be positive (> 0)".to_string()); + } + if k == 0 { + return Err("Threshold K must be positive".to_string()); + } + if bound == 0 { + return Err("Bound B must be positive".to_string()); + } + Ok(()) + } + + /// Try to create a new KthLargestMTuple instance. + pub fn try_new(sets: Vec>, k: u64, bound: u64) -> Result { + Self::validate(&sets, k, bound)?; + Ok(Self { sets, k, bound }) + } + + /// Create a new KthLargestMTuple instance. + /// + /// # Panics + /// + /// Panics if the inputs are invalid. + pub fn new(sets: Vec>, k: u64, bound: u64) -> Self { + Self::try_new(sets, k, bound).unwrap_or_else(|msg| panic!("{msg}")) + } + + /// Returns the sets. + pub fn sets(&self) -> &[Vec] { + &self.sets + } + + /// Returns the threshold K. + pub fn k(&self) -> u64 { + self.k + } + + /// Returns the bound B. + pub fn bound(&self) -> u64 { + self.bound + } + + /// Returns the number of sets (m). + pub fn num_sets(&self) -> usize { + self.sets.len() + } + + /// Returns the total number of m-tuples (product of set sizes). + pub fn total_tuples(&self) -> usize { + self.sets.iter().map(|s| s.len()).product() + } +} + +#[derive(Deserialize)] +struct KthLargestMTupleDef { + sets: Vec>, + k: u64, + bound: u64, +} + +impl<'de> Deserialize<'de> for KthLargestMTuple { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let data = KthLargestMTupleDef::deserialize(deserializer)?; + Self::try_new(data.sets, data.k, data.bound).map_err(D::Error::custom) + } +} + +impl Problem for KthLargestMTuple { + const NAME: &'static str = "KthLargestMTuple"; + type Value = Sum; + + fn variant() -> Vec<(&'static str, &'static str)> { + crate::variant_params![] + } + + fn dims(&self) -> Vec { + self.sets.iter().map(|s| s.len()).collect() + } + + fn evaluate(&self, config: &[usize]) -> Sum { + if config.len() != self.num_sets() { + return Sum(0); + } + for (i, &choice) in config.iter().enumerate() { + if choice >= self.sets[i].len() { + return Sum(0); + } + } + let total: u64 = config + .iter() + .enumerate() + .map(|(i, &choice)| self.sets[i][choice]) + .sum(); + if total >= self.bound { + Sum(1) + } else { + Sum(0) + } + } +} + +// Best known: brute-force enumeration of all tuples, O(total_tuples * num_sets). +// No sub-exponential exact algorithm is known for the general case. +crate::declare_variants! { + default KthLargestMTuple => "total_tuples * num_sets", +} + +#[cfg(feature = "example-db")] +pub(crate) fn canonical_model_example_specs() -> Vec { + // m=3, X_1={2,5,8}, X_2={3,6}, X_3={1,4,7}, B=12, K=14. + // 14 of 18 tuples have sum >= 12. The config [2,1,2] picks (8,6,7) with sum=21 >= 12. + vec![crate::example_db::specs::ModelExampleSpec { + id: "kth_largest_m_tuple", + instance: Box::new(KthLargestMTuple::new( + vec![vec![2, 5, 8], vec![3, 6], vec![1, 4, 7]], + 14, + 12, + )), + optimal_config: vec![2, 1, 2], + optimal_value: serde_json::json!(1), + }] +} + +#[cfg(test)] +#[path = "../../unit_tests/models/misc/kth_largest_m_tuple.rs"] +mod tests; diff --git a/src/models/misc/mod.rs b/src/models/misc/mod.rs index 0e5572ca..e635c7db 100644 --- a/src/models/misc/mod.rs +++ b/src/models/misc/mod.rs @@ -77,6 +77,7 @@ mod flow_shop_scheduling; mod grouping_by_swapping; mod job_shop_scheduling; mod knapsack; +mod kth_largest_m_tuple; mod longest_common_subsequence; mod minimum_tardiness_sequencing; mod multiprocessor_scheduling; @@ -119,6 +120,7 @@ pub use flow_shop_scheduling::FlowShopScheduling; pub use grouping_by_swapping::GroupingBySwapping; pub use job_shop_scheduling::JobShopScheduling; pub use knapsack::Knapsack; +pub use kth_largest_m_tuple::KthLargestMTuple; pub use longest_common_subsequence::LongestCommonSubsequence; pub use minimum_tardiness_sequencing::MinimumTardinessSequencing; pub use multiprocessor_scheduling::MultiprocessorScheduling; @@ -186,5 +188,6 @@ pub(crate) fn canonical_model_example_specs() -> Vec KthLargestMTuple { + // m=3, X_1={2,5,8}, X_2={3,6}, X_3={1,4,7}, B=12, K=14 + KthLargestMTuple::new(vec![vec![2, 5, 8], vec![3, 6], vec![1, 4, 7]], 14, 12) +} + +#[test] +fn test_kth_largest_m_tuple_creation() { + let p = example_problem(); + assert_eq!(p.sets().len(), 3); + assert_eq!(p.sets()[0], vec![2, 5, 8]); + assert_eq!(p.sets()[1], vec![3, 6]); + assert_eq!(p.sets()[2], vec![1, 4, 7]); + assert_eq!(p.k(), 14); + assert_eq!(p.bound(), 12); + assert_eq!(p.num_sets(), 3); + assert_eq!(p.total_tuples(), 18); + assert_eq!(p.dims(), vec![3, 2, 3]); + assert_eq!(p.num_variables(), 3); + assert_eq!(::NAME, "KthLargestMTuple"); + assert_eq!(::variant(), vec![]); +} + +#[test] +fn test_kth_largest_m_tuple_evaluate_qualifying_tuple() { + let p = example_problem(); + // (8,6,7) = sum 21 >= 12 -> Sum(1) + assert_eq!(p.evaluate(&[2, 1, 2]), Sum(1)); + // (5,6,4) = sum 15 >= 12 -> Sum(1) + assert_eq!(p.evaluate(&[1, 1, 1]), Sum(1)); +} + +#[test] +fn test_kth_largest_m_tuple_evaluate_non_qualifying_tuple() { + let p = example_problem(); + // (2,3,1) = sum 6 < 12 -> Sum(0) + assert_eq!(p.evaluate(&[0, 0, 0]), Sum(0)); + // (2,3,4) = sum 9 < 12 -> Sum(0) + assert_eq!(p.evaluate(&[0, 0, 1]), Sum(0)); +} + +#[test] +fn test_kth_largest_m_tuple_evaluate_invalid_configs() { + let p = example_problem(); + // Wrong length + assert_eq!(p.evaluate(&[0, 0]), Sum(0)); + assert_eq!(p.evaluate(&[0, 0, 0, 0]), Sum(0)); + // Out of range + assert_eq!(p.evaluate(&[3, 0, 0]), Sum(0)); + assert_eq!(p.evaluate(&[0, 2, 0]), Sum(0)); + assert_eq!(p.evaluate(&[0, 0, 3]), Sum(0)); +} + +#[test] +fn test_kth_largest_m_tuple_solver() { + let p = example_problem(); + let solver = BruteForce::new(); + let value = solver.solve(&p); + // 14 of 18 tuples qualify (sum >= 12) + assert_eq!(value, Sum(14)); +} + +#[test] +fn test_kth_largest_m_tuple_boundary_example() { + // K=14 and count=14, so the answer is YES (count >= K) + let p = example_problem(); + let solver = BruteForce::new(); + let count = solver.solve(&p); + assert_eq!(count, Sum(14)); + assert!(count.0 >= p.k()); +} + +#[test] +fn test_kth_largest_m_tuple_serialization_round_trip() { + let p = example_problem(); + let json = serde_json::to_value(&p).unwrap(); + assert_eq!( + json, + serde_json::json!({ + "sets": [[2, 5, 8], [3, 6], [1, 4, 7]], + "k": 14, + "bound": 12, + }) + ); + + let restored: KthLargestMTuple = serde_json::from_value(json).unwrap(); + assert_eq!(restored.sets(), p.sets()); + assert_eq!(restored.k(), p.k()); + assert_eq!(restored.bound(), p.bound()); +} + +#[test] +fn test_kth_largest_m_tuple_deserialization_rejects_invalid() { + let invalid_cases = [ + // Empty sets + serde_json::json!({ "sets": [], "k": 1, "bound": 5 }), + // A set is empty + serde_json::json!({ "sets": [[1, 2], []], "k": 1, "bound": 3 }), + // Zero size + serde_json::json!({ "sets": [[0, 2]], "k": 1, "bound": 1 }), + // K=0 + serde_json::json!({ "sets": [[1, 2]], "k": 0, "bound": 1 }), + // Bound=0 + serde_json::json!({ "sets": [[1, 2]], "k": 1, "bound": 0 }), + ]; + + for invalid in invalid_cases { + assert!(serde_json::from_value::(invalid).is_err()); + } +} + +#[test] +#[should_panic(expected = "at least one set")] +fn test_kth_largest_m_tuple_empty_sets_panics() { + KthLargestMTuple::new(vec![], 1, 5); +} + +#[test] +#[should_panic(expected = "non-empty")] +fn test_kth_largest_m_tuple_empty_inner_set_panics() { + KthLargestMTuple::new(vec![vec![1, 2], vec![]], 1, 3); +} + +#[test] +#[should_panic(expected = "positive")] +fn test_kth_largest_m_tuple_zero_size_panics() { + KthLargestMTuple::new(vec![vec![0, 2]], 1, 1); +} + +#[test] +fn test_kth_largest_m_tuple_paper_example() { + // Issue example: m=3, X_1={2,5,8}, X_2={3,6}, X_3={1,4,7}, B=12, K=14 + // 14 of 18 tuples have sum >= 12 -> YES (boundary case: count == K) + let p = example_problem(); + let solver = BruteForce::new(); + let count = solver.solve(&p); + assert_eq!(count, Sum(14)); + + // Verify a specific qualifying tuple: (8,6,7), sum=21 + assert_eq!(p.evaluate(&[2, 1, 2]), Sum(1)); + + // Verify a specific non-qualifying tuple: (2,3,1), sum=6 + assert_eq!(p.evaluate(&[0, 0, 0]), Sum(0)); +} + +#[test] +fn test_kth_largest_m_tuple_all_qualify() { + // Two sets each with one large element, B=1 -> all tuples qualify + let p = KthLargestMTuple::new(vec![vec![5], vec![10]], 1, 1); + let solver = BruteForce::new(); + assert_eq!(solver.solve(&p), Sum(1)); + assert_eq!(p.total_tuples(), 1); +} + +#[test] +fn test_kth_largest_m_tuple_none_qualify() { + // B is larger than any possible sum + let p = KthLargestMTuple::new(vec![vec![1, 2], vec![1, 2]], 1, 100); + let solver = BruteForce::new(); + assert_eq!(solver.solve(&p), Sum(0)); +}