Skip to content
27 changes: 27 additions & 0 deletions docs/paper/reductions.typ
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@
"SumOfSquaresPartition": [Sum of Squares Partition],
"TimetableDesign": [Timetable Design],
"TwoDimensionalConsecutiveSets": [2-Dimensional Consecutive Sets],
"KthLargestMTuple": [$K$th Largest $m$-Tuple],
)

// Definition label: "def:<ProblemName>" — each definition block must have a matching label
Expand Down Expand Up @@ -4705,6 +4706,32 @@ A classical NP-complete problem from Garey and Johnson @garey1979[Ch.~3, p.~76],
]
}

#{
let x = load-model-example("KthLargestMTuple")
let sets = x.instance.sets
let k = x.instance.k
let bound = x.instance.bound
let config = x.optimal_config
let m = sets.len()
// Count qualifying tuples by enumerating the Cartesian product
let total = sets.fold(1, (acc, s) => acc * s.len())
[
#problem-def("KthLargestMTuple")[
Given $m$ finite sets $X_1, dots, X_m$ of positive integers, a bound $B in ZZ^+$, and a threshold $K in ZZ^+$, count the number of distinct $m$-tuples $(x_1, dots, x_m) in X_1 times dots.c times X_m$ satisfying $sum_(i=1)^m x_i >= B$. The answer is _yes_ iff this count is at least $K$.
][
The $K$th Largest $m$-Tuple problem is MP10 in Garey and Johnson's appendix @garey1979. It is _not known to be in NP_, because a "yes" certificate may need to exhibit $K$ qualifying tuples and $K$ can be exponentially large. The problem is PP-complete under polynomial-time Turing reductions @haase2016, though the special case $m = 2$, $K = 1$ is NP-complete via reduction from Subset Sum. In the general case, the only known exact approach is brute-force enumeration of all $product_(i=1)^m |X_i|$ tuples, so the registered catalog complexity is `total_tuples * num_sets`#footnote[No algorithm improving on brute-force is known for the general $K$th Largest $m$-Tuple problem.].

*Example.* Let $m = #m$, $B = #bound$, and $K = #k$ with sets #sets.enumerate().map(((i, s)) => [$X_#(i+1) = {#s.map(str).join(", ")}$]).join([, ]). The Cartesian product has $#total$ tuples. For instance, the tuple $(#config.enumerate().map(((i, c)) => str(sets.at(i).at(c))).join(", "))$ has sum $#config.enumerate().map(((i, c)) => sets.at(i).at(c)).sum() >= #bound$, contributing 1 to the count. In total, #k of the #total tuples satisfy the bound, so the answer is _yes_ (count $= K$).

#pred-commands(
"pred create --example KthLargestMTuple -o kth-largest-m-tuple.json",
"pred solve kth-largest-m-tuple.json --solver brute-force",
"pred evaluate kth-largest-m-tuple.json --config " + config.map(str).join(","),
)
]
]
}

#{
let x = load-model-example("SequencingWithReleaseTimesAndDeadlines")
let n = x.instance.lengths.len()
Expand Down
10 changes: 10 additions & 0 deletions docs/paper/references.bib
Original file line number Diff line number Diff line change
Expand Up @@ -1455,6 +1455,16 @@ @techreport{plaisted1976
year = {1976}
}

@article{haase2016,
author = {Haase, Christoph and Kiefer, Stefan},
title = {The Complexity of the {K}th Largest Subset Problem and Related Problems},
journal = {Information Processing Letters},
volume = {116},
number = {2},
pages = {111--115},
year = {2016}
}

@article{Murty1972,
author = {Murty, Katta G.},
title = {A fundamental problem in linear inequalities with applications to the travelling salesman problem},
Expand Down
1 change: 1 addition & 0 deletions problemreductions-cli/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ Flags by problem type:
ProductionPlanning --num-periods, --demands, --capacities, --setup-costs, --production-costs, --inventory-costs, --cost-bound
SubsetSum --sizes, --target
ThreePartition --sizes, --bound
KthLargestMTuple --sets, --k, --bound
QuadraticDiophantineEquations --coeff-a, --coeff-b, --coeff-c
SumOfSquaresPartition --sizes, --num-groups
ExpectedRetrievalCost --probabilities, --num-sectors
Expand Down
41 changes: 38 additions & 3 deletions problemreductions-cli/src/commands/create.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ use problemreductions::models::misc::{
AdditionalKey, BinPacking, BoyceCoddNormalFormViolation, CapacityAssignment, CbqRelation,
ConjunctiveBooleanQuery, ConsistencyOfDatabaseFrequencyTables, EnsembleComputation,
ExpectedRetrievalCost, FlowShopScheduling, FrequencyTable, GroupingBySwapping,
JobShopScheduling, KnownValue, LongestCommonSubsequence, MinimumTardinessSequencing,
MultiprocessorScheduling, PaintShop, PartiallyOrderedKnapsack, ProductionPlanning, QueryArg,
RectilinearPictureCompression, ResourceConstrainedScheduling,
JobShopScheduling, KnownValue, KthLargestMTuple, LongestCommonSubsequence,
MinimumTardinessSequencing, MultiprocessorScheduling, PaintShop, PartiallyOrderedKnapsack,
ProductionPlanning, QueryArg, RectilinearPictureCompression, ResourceConstrainedScheduling,
SchedulingWithIndividualDeadlines, SequencingToMinimizeMaximumCumulativeCost,
SequencingToMinimizeWeightedCompletionTime, SequencingToMinimizeWeightedTardiness,
SequencingWithReleaseTimesAndDeadlines, SequencingWithinIntervals, ShortestCommonSupersequence,
Expand Down Expand Up @@ -732,6 +732,7 @@ fn example_for(canonical: &str, graph_type: Option<&str>) -> &'static str {
"IntegerKnapsack" => "--sizes 3,4,5,2,7 --values 4,5,7,3,9 --capacity 15",
"SubsetSum" => "--sizes 3,7,1,8,2,4 --target 11",
"ThreePartition" => "--sizes 4,5,6,4,6,5 --bound 15",
"KthLargestMTuple" => "--sets \"2,5,8;3,6;1,4,7\" --k 14 --bound 12",
"QuadraticDiophantineEquations" => "--coeff-a 3 --coeff-b 5 --coeff-c 53",
"BoyceCoddNormalFormViolation" => {
"--n 6 --sets \"0,1:2;2:3;3,4:5\" --target 0,1,2,3,4,5"
Expand Down Expand Up @@ -2423,6 +2424,40 @@ pub fn create(args: &CreateArgs, out: &OutputConfig) -> Result<()> {
)
}

// KthLargestMTuple
"KthLargestMTuple" => {
let sets_str = args.sets.as_deref().ok_or_else(|| {
anyhow::anyhow!(
"KthLargestMTuple requires --sets, --k, and --bound\n\n\
Usage: pred create KthLargestMTuple --sets \"2,5,8;3,6;1,4,7\" --k 14 --bound 12"
)
})?;
let k_val = args.k.ok_or_else(|| {
anyhow::anyhow!(
"KthLargestMTuple requires --k\n\n\
Usage: pred create KthLargestMTuple --sets \"2,5,8;3,6;1,4,7\" --k 14 --bound 12"
)
})?;
let bound = args.bound.ok_or_else(|| {
anyhow::anyhow!(
"KthLargestMTuple requires --bound\n\n\
Usage: pred create KthLargestMTuple --sets \"2,5,8;3,6;1,4,7\" --k 14 --bound 12"
)
})?;
let bound = u64::try_from(bound).map_err(|_| {
anyhow::anyhow!("KthLargestMTuple requires a positive integer --bound")
})?;
let sets: Vec<Vec<u64>> = sets_str
.split(';')
.map(|group| util::parse_comma_list(group))
.collect::<Result<_, _>>()?;
(
ser(KthLargestMTuple::try_new(sets, k_val as u64, bound)
.map_err(anyhow::Error::msg)?)?,
resolved_variant.clone(),
)
}

// QuadraticDiophantineEquations
"QuadraticDiophantineEquations" => {
let a = args.coeff_a.ok_or_else(|| {
Expand Down
208 changes: 208 additions & 0 deletions src/models/misc/kth_largest_m_tuple.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
//! Kth Largest m-Tuple problem implementation.
//!
//! Given m sets of positive integers and thresholds K and B, count how many
//! distinct m-tuples (one element per set) have total size at least B.
//! The answer is YES iff the count is at least K. Garey & Johnson MP10.

use crate::registry::{FieldInfo, ProblemSchemaEntry, ProblemSizeFieldEntry};
use crate::traits::Problem;
use crate::types::Sum;
use serde::de::Error as _;
use serde::{Deserialize, Deserializer, Serialize};

inventory::submit! {
ProblemSchemaEntry {
name: "KthLargestMTuple",
display_name: "Kth Largest m-Tuple",
aliases: &[],
dimensions: &[],
module_path: module_path!(),
description: "Count m-tuples whose total size meets a bound and compare against a threshold K",
fields: &[
FieldInfo { name: "sets", type_name: "Vec<Vec<u64>>", description: "m sets, each containing positive integer sizes" },
FieldInfo { name: "k", type_name: "u64", description: "Threshold K (answer YES iff count >= K)" },
FieldInfo { name: "bound", type_name: "u64", description: "Lower bound B on tuple sum" },
],
}
}

inventory::submit! {
ProblemSizeFieldEntry {
name: "KthLargestMTuple",
fields: &["num_sets", "total_tuples"],
}
}

/// The Kth Largest m-Tuple problem.
///
/// Given sets `X_1, ..., X_m` of positive integers, a threshold `K`, and a
/// bound `B`, count how many distinct m-tuples `(x_1, ..., x_m)` in
/// `X_1 x ... x X_m` satisfy `sum(x_i) >= B`. The answer is YES iff the
/// count is at least `K`.
///
/// # Representation
///
/// Variable `i` selects an element from set `X_i`, ranging over `{0, ..., |X_i|-1}`.
/// `evaluate` returns `Sum(1)` if the tuple sum >= B, else `Sum(0)`.
/// The aggregate over all configurations gives the total count of qualifying tuples.
///
/// # Example
///
/// ```
/// use problemreductions::models::misc::KthLargestMTuple;
/// use problemreductions::{Problem, Solver, BruteForce};
///
/// let problem = KthLargestMTuple::new(
/// vec![vec![2, 5, 8], vec![3, 6], vec![1, 4, 7]],
/// 14,
/// 12,
/// );
/// let solver = BruteForce::new();
/// let value = solver.solve(&problem);
/// // 14 of the 18 tuples have sum >= 12
/// assert_eq!(value, problemreductions::types::Sum(14));
/// ```
#[derive(Debug, Clone, Serialize)]
pub struct KthLargestMTuple {
sets: Vec<Vec<u64>>,
k: u64,
bound: u64,
}

impl KthLargestMTuple {
fn validate(sets: &[Vec<u64>], k: u64, bound: u64) -> Result<(), String> {
if sets.is_empty() {
return Err("KthLargestMTuple requires at least one set".to_string());
}
if sets.iter().any(|s| s.is_empty()) {
return Err("Every set must be non-empty".to_string());
}
if sets.iter().any(|s| s.contains(&0)) {
return Err("All sizes must be positive (> 0)".to_string());
}
if k == 0 {
return Err("Threshold K must be positive".to_string());
}
if bound == 0 {
return Err("Bound B must be positive".to_string());
}
Ok(())
}

/// Try to create a new KthLargestMTuple instance.
pub fn try_new(sets: Vec<Vec<u64>>, k: u64, bound: u64) -> Result<Self, String> {
Self::validate(&sets, k, bound)?;
Ok(Self { sets, k, bound })
}

/// Create a new KthLargestMTuple instance.
///
/// # Panics
///
/// Panics if the inputs are invalid.
pub fn new(sets: Vec<Vec<u64>>, k: u64, bound: u64) -> Self {
Self::try_new(sets, k, bound).unwrap_or_else(|msg| panic!("{msg}"))
}

/// Returns the sets.
pub fn sets(&self) -> &[Vec<u64>] {
&self.sets
}

/// Returns the threshold K.
pub fn k(&self) -> u64 {
self.k
}

/// Returns the bound B.
pub fn bound(&self) -> u64 {
self.bound
}

/// Returns the number of sets (m).
pub fn num_sets(&self) -> usize {
self.sets.len()
}

/// Returns the total number of m-tuples (product of set sizes).
pub fn total_tuples(&self) -> usize {
self.sets.iter().map(|s| s.len()).product()
}
}

#[derive(Deserialize)]
struct KthLargestMTupleDef {
sets: Vec<Vec<u64>>,
k: u64,
bound: u64,
}

impl<'de> Deserialize<'de> for KthLargestMTuple {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let data = KthLargestMTupleDef::deserialize(deserializer)?;
Self::try_new(data.sets, data.k, data.bound).map_err(D::Error::custom)
}
}

impl Problem for KthLargestMTuple {
const NAME: &'static str = "KthLargestMTuple";
type Value = Sum<u64>;

fn variant() -> Vec<(&'static str, &'static str)> {
crate::variant_params![]
}

fn dims(&self) -> Vec<usize> {
self.sets.iter().map(|s| s.len()).collect()
}

fn evaluate(&self, config: &[usize]) -> Sum<u64> {
if config.len() != self.num_sets() {
return Sum(0);
}
for (i, &choice) in config.iter().enumerate() {
if choice >= self.sets[i].len() {
return Sum(0);
}
}
let total: u64 = config
.iter()
.enumerate()
.map(|(i, &choice)| self.sets[i][choice])
.sum();
if total >= self.bound {
Sum(1)
} else {
Sum(0)
}
}
}

// Best known: brute-force enumeration of all tuples, O(total_tuples * num_sets).
// No sub-exponential exact algorithm is known for the general case.
crate::declare_variants! {
default KthLargestMTuple => "total_tuples * num_sets",
}

#[cfg(feature = "example-db")]
pub(crate) fn canonical_model_example_specs() -> Vec<crate::example_db::specs::ModelExampleSpec> {
// m=3, X_1={2,5,8}, X_2={3,6}, X_3={1,4,7}, B=12, K=14.
// 14 of 18 tuples have sum >= 12. The config [2,1,2] picks (8,6,7) with sum=21 >= 12.
vec![crate::example_db::specs::ModelExampleSpec {
id: "kth_largest_m_tuple",
instance: Box::new(KthLargestMTuple::new(
vec![vec![2, 5, 8], vec![3, 6], vec![1, 4, 7]],
14,
12,
)),
optimal_config: vec![2, 1, 2],
optimal_value: serde_json::json!(1),
}]
}

#[cfg(test)]
#[path = "../../unit_tests/models/misc/kth_largest_m_tuple.rs"]
mod tests;
3 changes: 3 additions & 0 deletions src/models/misc/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ mod flow_shop_scheduling;
mod grouping_by_swapping;
mod job_shop_scheduling;
mod knapsack;
mod kth_largest_m_tuple;
mod longest_common_subsequence;
mod minimum_tardiness_sequencing;
mod multiprocessor_scheduling;
Expand Down Expand Up @@ -119,6 +120,7 @@ pub use flow_shop_scheduling::FlowShopScheduling;
pub use grouping_by_swapping::GroupingBySwapping;
pub use job_shop_scheduling::JobShopScheduling;
pub use knapsack::Knapsack;
pub use kth_largest_m_tuple::KthLargestMTuple;
pub use longest_common_subsequence::LongestCommonSubsequence;
pub use minimum_tardiness_sequencing::MinimumTardinessSequencing;
pub use multiprocessor_scheduling::MultiprocessorScheduling;
Expand Down Expand Up @@ -186,5 +188,6 @@ pub(crate) fn canonical_model_example_specs() -> Vec<crate::example_db::specs::M
specs.extend(subset_sum::canonical_model_example_specs());
specs.extend(three_partition::canonical_model_example_specs());
specs.extend(cosine_product_integration::canonical_model_example_specs());
specs.extend(kth_largest_m_tuple::canonical_model_example_specs());
specs
}
17 changes: 9 additions & 8 deletions src/models/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,15 @@ pub use misc::{
AdditionalKey, BinPacking, CapacityAssignment, CbqRelation, ConjunctiveBooleanQuery,
ConjunctiveQueryFoldability, ConsistencyOfDatabaseFrequencyTables, CosineProductIntegration,
EnsembleComputation, ExpectedRetrievalCost, Factoring, FlowShopScheduling, GroupingBySwapping,
JobShopScheduling, Knapsack, LongestCommonSubsequence, MinimumTardinessSequencing,
MultiprocessorScheduling, PaintShop, Partition, PrecedenceConstrainedScheduling,
ProductionPlanning, QueryArg, RectilinearPictureCompression, ResourceConstrainedScheduling,
SchedulingWithIndividualDeadlines, SequencingToMinimizeMaximumCumulativeCost,
SequencingToMinimizeWeightedCompletionTime, SequencingToMinimizeWeightedTardiness,
SequencingWithReleaseTimesAndDeadlines, SequencingWithinIntervals, ShortestCommonSupersequence,
StackerCrane, StaffScheduling, StringToStringCorrection, SubsetSum, SumOfSquaresPartition,
Term, ThreePartition, TimetableDesign,
JobShopScheduling, Knapsack, KthLargestMTuple, LongestCommonSubsequence,
MinimumTardinessSequencing, MultiprocessorScheduling, PaintShop, Partition,
PrecedenceConstrainedScheduling, ProductionPlanning, QueryArg, RectilinearPictureCompression,
ResourceConstrainedScheduling, SchedulingWithIndividualDeadlines,
SequencingToMinimizeMaximumCumulativeCost, SequencingToMinimizeWeightedCompletionTime,
SequencingToMinimizeWeightedTardiness, SequencingWithReleaseTimesAndDeadlines,
SequencingWithinIntervals, ShortestCommonSupersequence, StackerCrane, StaffScheduling,
StringToStringCorrection, SubsetSum, SumOfSquaresPartition, Term, ThreePartition,
TimetableDesign,
};
pub use set::{
ComparativeContainment, ConsecutiveSets, ExactCoverBy3Sets, IntegerKnapsack, MaximumSetPacking,
Expand Down
Loading
Loading