diff --git a/docs/paper/reductions.typ b/docs/paper/reductions.typ index c4f0cd36..e027882d 100644 --- a/docs/paper/reductions.typ +++ b/docs/paper/reductions.typ @@ -80,6 +80,7 @@ "MinimumSetCovering": [Minimum Set Covering], "ComparativeContainment": [Comparative Containment], "SetBasis": [Set Basis], + "MinimumCardinalityKey": [Minimum Cardinality Key], "SpinGlass": [Spin Glass], "QUBO": [QUBO], "ILP": [Integer Linear Programming], @@ -1518,6 +1519,29 @@ NP-completeness was established by Garey, Johnson, and Stockmeyer @gareyJohnsonS ] } +#{ + let x = load-model-example("MinimumCardinalityKey") + let n = x.instance.num_attributes + let deps = x.instance.dependencies + let m = deps.len() + let bound = x.instance.bound_k + let sample = x.samples.at(0) + let key-attrs = range(n).filter(i => sample.config.at(i) == 1) + let sat-count = x.optimal.len() + let fmt-set(s) = "${" + s.map(e => str(e)).join(", ") + "}$" + let fmt-fd(d) = fmt-set(d.at(0)) + " $arrow.r$ " + fmt-set(d.at(1)) + [ + #problem-def("MinimumCardinalityKey")[ + Given a set $A$ of attribute names, a collection $F$ of functional dependencies (ordered pairs of subsets of $A$), and a positive integer $M$, does there exist a candidate key $K subset.eq A$ with $|K| <= M$, i.e., a minimal subset $K$ such that the closure of $K$ under $F^*$ equals $A$? + ][ + The Minimum Cardinality Key problem arises in relational database theory, where identifying the smallest candidate key determines the most efficient way to uniquely identify rows in a relation. It was shown NP-complete by Lucchesi and Osborn (1978) @lucchesi1978keys via transformation from Vertex Cover. The problem appears as SR26 in Garey & Johnson (A4) @garey1979. The closure $F^*$ is defined by Armstrong's axioms: reflexivity ($B subset.eq C$ implies $C arrow.r B$), transitivity, and union. The best known exact algorithm is brute-force enumeration of all subsets of $A$, giving $O^*(2^(|A|))$ time#footnote[Lucchesi and Osborn give an output-polynomial algorithm for enumerating all candidate keys, but the number of keys can be exponential.]. + + *Example.* Let $A = {0, 1, ..., #(n - 1)}$ ($|A| = #n$) with $M = #bound$ and functional dependencies $F = {#deps.enumerate().map(((i, d)) => fmt-fd(d)).join(", ")}$. + The candidate key $K = #fmt-set(key-attrs)$ has $|K| = #key-attrs.len() <= #bound$. Its closure: start with ${0, 1}$; apply ${0, 1} arrow.r {2}$ to get ${0, 1, 2}$; apply ${0, 2} arrow.r {3}$ to get ${0, 1, 2, 3}$; apply ${1, 3} arrow.r {4}$ to get ${0, 1, 2, 3, 4}$; apply ${2, 4} arrow.r {5}$ to get $A$. Neither ${0}$ nor ${1}$ alone determines $A$, so $K$ is minimal. There are #sat-count satisfying encodings in total. + ] + ] +} + == Optimization Problems #{ diff --git a/docs/paper/references.bib b/docs/paper/references.bib index 7418ccd8..526068c6 100644 --- a/docs/paper/references.bib +++ b/docs/paper/references.bib @@ -710,6 +710,17 @@ @article{lucchesi1978 doi = {10.1112/jlms/s2-17.3.369} } +@article{lucchesi1978keys, + author = {Cl\'audio L. Lucchesi and Sylvia L. Osborn}, + title = {Candidate Keys for Relations}, + journal = {Journal of Computer and System Sciences}, + volume = {17}, + number = {2}, + pages = {270--279}, + year = {1978}, + doi = {10.1016/0022-0000(78)90009-0} +} + @article{lenstra1976, author = {Jan Karel Lenstra and Alexander H. G. Rinnooy Kan}, title = {On General Routing Problems}, diff --git a/docs/src/cli.md b/docs/src/cli.md index 0bd19f1c..11086f4a 100644 --- a/docs/src/cli.md +++ b/docs/src/cli.md @@ -352,6 +352,7 @@ pred create LengthBoundedDisjointPaths --graph 0-1,1-6,0-2,2-3,3-6,0-4,4-5,5-6 - pred create Factoring --target 15 --bits-m 4 --bits-n 4 -o factoring.json pred create Factoring --target 21 --bits-m 3 --bits-n 3 -o factoring2.json pred create X3C --universe 9 --sets "0,1,2;0,2,4;3,4,5;3,5,7;6,7,8;1,4,6;2,5,8" -o x3c.json +pred create MinimumCardinalityKey --num-attributes 6 --dependencies "0,1>2;0,2>3;1,3>4;2,4>5" --k 2 -o mck.json pred create MinimumTardinessSequencing --n 5 --deadlines 5,5,5,3,3 --precedence-pairs "0>3,1>3,1>4,2>4" -o mts.json pred create StringToStringCorrection --source-string "0,1,2,3,1,0" --target-string "0,1,3,2,1" --bound 2 | pred solve - --solver brute-force pred create StrongConnectivityAugmentation --arcs "0>1,1>2,2>0,3>4,4>3,2>3,4>5,5>3" --candidate-arcs "3>0:5,3>1:3,3>2:4,4>0:6,4>1:2,4>2:7,5>0:4,5>1:3,5>2:1,0>3:8,0>4:3,0>5:2,1>3:6,1>4:4,1>5:5,2>4:3,2>5:7,1>0:2" --bound 1 -o sca.json @@ -519,10 +520,17 @@ Source evaluation: Valid(2) > **Note:** The ILP solver requires a reduction path from the target problem to ILP. > Some problems do not currently have one. Examples include BoundedComponentSpanningForest, -> LengthBoundedDisjointPaths, QUBO, SpinGlass, MaxCut, CircuitSAT, and MultiprocessorScheduling. +> LengthBoundedDisjointPaths, MinimumCardinalityKey, QUBO, SpinGlass, MaxCut, CircuitSAT, and MultiprocessorScheduling. > Use `pred solve --solver brute-force` for these, or reduce to a problem that supports ILP first. > For other problems, use `pred path ILP` to check whether an ILP reduction path exists. +For example, the canonical Minimum Cardinality Key instance can be created and solved with: + +```bash +pred create MinimumCardinalityKey --num-attributes 6 --dependencies "0,1>2;0,2>3;1,3>4;2,4>5" --k 2 -o mck.json +pred solve mck.json --solver brute-force +``` + ## Shell Completions Enable tab completion by adding one line to your shell config: diff --git a/problemreductions-cli/src/cli.rs b/problemreductions-cli/src/cli.rs index 05196595..48f36700 100644 --- a/problemreductions-cli/src/cli.rs +++ b/problemreductions-cli/src/cli.rs @@ -238,6 +238,7 @@ Flags by problem type: ComparativeContainment --universe, --r-sets, --s-sets [--r-weights] [--s-weights] X3C (ExactCoverBy3Sets) --universe, --sets (3 elements each) SetBasis --universe, --sets, --k + MinimumCardinalityKey --num-attributes, --dependencies, --k BicliqueCover --left, --right, --biedges, --k BalancedCompleteBipartiteSubgraph --left, --right, --biedges, --k BiconnectivityAugmentation --graph, --potential-edges, --budget [--num-vertices] @@ -286,7 +287,8 @@ Examples: pred create FVS --arcs \"0>1,1>2,2>0\" --weights 1,1,1 pred create UndirectedTwoCommodityIntegralFlow --graph 0-2,1-2,2-3 --capacities 1,1,2 --source-1 0 --sink-1 3 --source-2 1 --sink-2 3 --requirement-1 1 --requirement-2 1 pred create X3C --universe 9 --sets \"0,1,2;0,2,4;3,4,5;3,5,7;6,7,8;1,4,6;2,5,8\" - pred create SetBasis --universe 4 --sets \"0,1;1,2;0,2;0,1,2\" --k 3")] + pred create SetBasis --universe 4 --sets \"0,1;1,2;0,2;0,1,2\" --k 3 + pred create MinimumCardinalityKey --num-attributes 6 --dependencies \"0,1>2;0,2>3;1,3>4;2,4>5\" --k 2")] pub struct CreateArgs { /// Problem type (e.g., MIS, QUBO, SAT). Omit when using --example. #[arg(value_parser = crate::problem_name::ProblemNameParser)] @@ -498,6 +500,12 @@ pub struct CreateArgs { /// Alphabet size for SCS or StringToStringCorrection (optional; inferred from max symbol + 1 if omitted) #[arg(long)] pub alphabet_size: Option, + /// Functional dependencies for MinimumCardinalityKey (semicolon-separated "lhs>rhs" pairs, e.g., "0,1>2;0,2>3") + #[arg(long)] + pub dependencies: Option, + /// Number of attributes for MinimumCardinalityKey + #[arg(long)] + pub num_attributes: Option, /// Source string for StringToStringCorrection (comma-separated symbol indices, e.g., "0,1,2,3") #[arg(long)] pub source_string: Option, diff --git a/problemreductions-cli/src/commands/create.rs b/problemreductions-cli/src/commands/create.rs index 25e4454c..e5e865f8 100644 --- a/problemreductions-cli/src/commands/create.rs +++ b/problemreductions-cli/src/commands/create.rs @@ -94,6 +94,8 @@ fn all_data_flags_empty(args: &CreateArgs) -> bool { && args.requirements.is_none() && args.num_workers.is_none() && args.alphabet_size.is_none() + && args.dependencies.is_none() + && args.num_attributes.is_none() && args.source_string.is_none() && args.target_string.is_none() && args.capacities.is_none() @@ -239,6 +241,7 @@ fn type_format_hint(type_name: &str, graph_type: Option<&str>) -> &'static str { Some("UnitDiskGraph") => "float positions: \"0.0,0.0;1.0,0.0\"", _ => "edge list: 0-1,1-2,2-3", }, + "Vec<(Vec, Vec)>" => "semicolon-separated dependencies: \"0,1>2;0,2>3\"", "Vec" => "comma-separated integers: 4,5,3,2,6", "Vec" => "comma-separated: 1,2,3", "Vec" => "comma-separated indices: 0,2,4", @@ -329,6 +332,9 @@ fn example_for(canonical: &str, graph_type: Option<&str>) -> &'static str { "--universe 4 --r-sets \"0,1,2,3;0,1\" --s-sets \"0,1,2,3;2,3\" --r-weights 2,5 --s-weights 3,6" } "SetBasis" => "--universe 4 --sets \"0,1;1,2;0,2;0,1,2\" --k 3", + "MinimumCardinalityKey" => { + "--num-attributes 6 --dependencies \"0,1>2;0,2>3;1,3>4;2,4>5\" --k 2" + } "ShortestCommonSupersequence" => "--strings \"0,1,2;1,2,0\" --bound 4", "StringToStringCorrection" => { "--source-string \"0,1,2,3,1,0\" --target-string \"0,1,3,2,1\" --bound 2" @@ -342,6 +348,7 @@ fn help_flag_name(canonical: &str, field_name: &str) -> String { match (canonical, field_name) { ("BoundedComponentSpanningForest", "max_components") => return "k".to_string(), ("BoundedComponentSpanningForest", "max_weight") => return "bound".to_string(), + ("MinimumCardinalityKey", "bound_k") => return "k".to_string(), ("StaffScheduling", "shifts_per_schedule") => return "k".to_string(), _ => {} } @@ -1223,6 +1230,33 @@ pub fn create(args: &CreateArgs, out: &OutputConfig) -> Result<()> { ) } + // MinimumCardinalityKey + "MinimumCardinalityKey" => { + let num_attributes = args.num_attributes.ok_or_else(|| { + anyhow::anyhow!( + "MinimumCardinalityKey requires --num-attributes, --dependencies, and --k\n\n\ + Usage: pred create MinimumCardinalityKey --num-attributes 6 --dependencies \"0,1>2;0,2>3;1,3>4;2,4>5\" --k 2" + ) + })?; + let k = args.k.ok_or_else(|| { + anyhow::anyhow!("MinimumCardinalityKey requires --k (bound on key cardinality)") + })?; + let deps_str = args.dependencies.as_deref().ok_or_else(|| { + anyhow::anyhow!( + "MinimumCardinalityKey requires --dependencies (e.g., \"0,1>2;0,2>3\")" + ) + })?; + let dependencies = parse_dependencies(deps_str)?; + ( + ser(problemreductions::models::set::MinimumCardinalityKey::new( + num_attributes, + dependencies, + k, + ))?, + resolved_variant.clone(), + ) + } + // BicliqueCover "BicliqueCover" => { let usage = "pred create BicliqueCover --left 2 --right 2 --biedges 0-0,0-1,1-1 --k 2"; @@ -2328,6 +2362,39 @@ fn parse_named_sets(sets_str: Option<&str>, flag: &str) -> Result .collect() } +/// Parse `--dependencies` as semicolon-separated "lhs>rhs" pairs. +/// E.g., "0,1>2;0,2>3;1,3>4;2,4>5" means {0,1}->{2}, {0,2}->{3}, etc. +fn parse_dependencies(input: &str) -> Result, Vec)>> { + fn parse_dependency_side(side: &str) -> Result> { + if side.trim().is_empty() { + return Ok(vec![]); + } + side.split(',') + .map(|s| { + s.trim() + .parse::() + .map_err(|e| anyhow::anyhow!("Invalid attribute index: {}", e)) + }) + .collect() + } + + input + .split(';') + .map(|dep| { + let parts: Vec<&str> = dep.trim().split('>').collect(); + if parts.len() != 2 { + bail!( + "Invalid dependency format: expected 'lhs>rhs', got '{}'", + dep.trim() + ); + } + let lhs = parse_dependency_side(parts[0])?; + let rhs = parse_dependency_side(parts[1])?; + Ok((lhs, rhs)) + }) + .collect() +} + fn validate_comparative_containment_sets( family_name: &str, flag: &str, @@ -3240,6 +3307,8 @@ mod tests { deadline: None, num_processors: None, alphabet_size: None, + dependencies: None, + num_attributes: None, source_string: None, target_string: None, schedules: None, diff --git a/problemreductions-cli/tests/cli_tests.rs b/problemreductions-cli/tests/cli_tests.rs index 112c102c..bffb1a1b 100644 --- a/problemreductions-cli/tests/cli_tests.rs +++ b/problemreductions-cli/tests/cli_tests.rs @@ -1361,6 +1361,73 @@ fn test_create_set_basis_rejects_out_of_range_elements() { assert!(!stderr.contains("panicked at"), "stderr: {stderr}"); } +#[test] +fn test_create_minimum_cardinality_key_problem_help_uses_supported_flags() { + let output = pred() + .args(["create", "MinimumCardinalityKey"]) + .output() + .unwrap(); + assert!(!output.status.success()); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!(stderr.contains("--num-attributes"), "stderr: {stderr}"); + assert!(stderr.contains("--dependencies"), "stderr: {stderr}"); + assert!(stderr.contains("--k"), "stderr: {stderr}"); + assert!( + stderr.contains("semicolon-separated dependencies"), + "stderr: {stderr}" + ); + assert!(!stderr.contains("--bound-k"), "stderr: {stderr}"); +} + +#[test] +fn test_create_minimum_cardinality_key_allows_empty_lhs_dependency() { + let output = pred() + .args([ + "create", + "MinimumCardinalityKey", + "--num-attributes", + "1", + "--dependencies", + ">0", + "--k", + "1", + ]) + .output() + .unwrap(); + assert!( + output.status.success(), + "stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); + + let stdout = String::from_utf8(output.stdout).unwrap(); + let json: serde_json::Value = serde_json::from_str(&stdout).unwrap(); + assert_eq!(json["type"], "MinimumCardinalityKey"); + assert_eq!(json["data"]["num_attributes"], 1); + assert_eq!(json["data"]["bound_k"], 1); + assert_eq!(json["data"]["dependencies"][0][0], serde_json::json!([])); + assert_eq!(json["data"]["dependencies"][0][1], serde_json::json!([0])); +} + +#[test] +fn test_create_minimum_cardinality_key_missing_num_attributes_message() { + let output = pred() + .args([ + "create", + "MinimumCardinalityKey", + "--dependencies", + "0>0", + "--k", + "1", + ]) + .output() + .unwrap(); + assert!(!output.status.success()); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!(stderr.contains("MinimumCardinalityKey requires --num-attributes")); + assert!(!stderr.contains("--num-vertices"), "stderr: {stderr}"); +} + #[test] fn test_create_then_evaluate() { // Create a problem diff --git a/src/example_db/fixtures/examples.json b/src/example_db/fixtures/examples.json index 10b81634..9248c829 100644 --- a/src/example_db/fixtures/examples.json +++ b/src/example_db/fixtures/examples.json @@ -24,6 +24,7 @@ {"problem":"MaximumIndependentSet","variant":{"graph":"SimpleGraph","weight":"i32"},"instance":{"graph":{"inner":{"edge_property":"undirected","edges":[[0,1,null],[1,2,null],[2,3,null],[3,4,null],[4,0,null],[5,7,null],[7,9,null],[9,6,null],[6,8,null],[8,5,null],[0,5,null],[1,6,null],[2,7,null],[3,8,null],[4,9,null]],"node_holes":[],"nodes":[null,null,null,null,null,null,null,null,null,null]}},"weights":[5,1,1,1,1,3,1,1,1,3]},"samples":[{"config":[1,0,1,0,0,0,0,0,1,1],"metric":{"Valid":10}}],"optimal":[{"config":[1,0,1,0,0,0,0,0,1,1],"metric":{"Valid":10}}]}, {"problem":"MaximumMatching","variant":{"graph":"SimpleGraph","weight":"i32"},"instance":{"edge_weights":[1,1,1,1,1,1],"graph":{"inner":{"edge_property":"undirected","edges":[[0,1,null],[0,2,null],[1,3,null],[2,3,null],[2,4,null],[3,4,null]],"node_holes":[],"nodes":[null,null,null,null,null]}}},"samples":[{"config":[1,0,0,0,1,0],"metric":{"Valid":2}}],"optimal":[{"config":[0,0,1,0,1,0],"metric":{"Valid":2}},{"config":[0,1,0,0,0,1],"metric":{"Valid":2}},{"config":[0,1,1,0,0,0],"metric":{"Valid":2}},{"config":[1,0,0,0,0,1],"metric":{"Valid":2}},{"config":[1,0,0,0,1,0],"metric":{"Valid":2}},{"config":[1,0,0,1,0,0],"metric":{"Valid":2}}]}, {"problem":"MaximumSetPacking","variant":{"weight":"i32"},"instance":{"sets":[[0,1],[1,2],[2,3],[3,4]],"weights":[1,1,1,1]},"samples":[{"config":[1,0,1,0],"metric":{"Valid":2}}],"optimal":[{"config":[0,1,0,1],"metric":{"Valid":2}},{"config":[1,0,0,1],"metric":{"Valid":2}},{"config":[1,0,1,0],"metric":{"Valid":2}}]}, + {"problem":"MinimumCardinalityKey","variant":{},"instance":{"bound_k":2,"dependencies":[[[0,1],[2]],[[0,2],[3]],[[1,3],[4]],[[2,4],[5]]],"num_attributes":6},"samples":[{"config":[1,1,0,0,0,0],"metric":true}],"optimal":[{"config":[1,1,0,0,0,0],"metric":true}]}, {"problem":"MinimumDominatingSet","variant":{"graph":"SimpleGraph","weight":"i32"},"instance":{"graph":{"inner":{"edge_property":"undirected","edges":[[0,1,null],[0,2,null],[1,3,null],[2,3,null],[2,4,null],[3,4,null]],"node_holes":[],"nodes":[null,null,null,null,null]}},"weights":[1,1,1,1,1]},"samples":[{"config":[0,0,1,1,0],"metric":{"Valid":2}}],"optimal":[{"config":[0,0,1,1,0],"metric":{"Valid":2}},{"config":[0,1,0,0,1],"metric":{"Valid":2}},{"config":[0,1,0,1,0],"metric":{"Valid":2}},{"config":[0,1,1,0,0],"metric":{"Valid":2}},{"config":[1,0,0,0,1],"metric":{"Valid":2}},{"config":[1,0,0,1,0],"metric":{"Valid":2}},{"config":[1,0,1,0,0],"metric":{"Valid":2}}]}, {"problem":"MinimumFeedbackVertexSet","variant":{"weight":"i32"},"instance":{"graph":{"inner":{"edge_property":"directed","edges":[[0,1,null],[1,2,null],[2,0,null],[0,3,null],[3,4,null],[4,1,null],[4,2,null]],"node_holes":[],"nodes":[null,null,null,null,null]}},"weights":[1,1,1,1,1]},"samples":[{"config":[1,0,0,0,0],"metric":{"Valid":1}}],"optimal":[{"config":[0,0,1,0,0],"metric":{"Valid":1}},{"config":[1,0,0,0,0],"metric":{"Valid":1}}]}, {"problem":"MinimumMultiwayCut","variant":{"graph":"SimpleGraph","weight":"i32"},"instance":{"edge_weights":[2,3,1,2,4,5],"graph":{"inner":{"edge_property":"undirected","edges":[[0,1,null],[1,2,null],[2,3,null],[3,4,null],[0,4,null],[1,3,null]],"node_holes":[],"nodes":[null,null,null,null,null]}},"terminals":[0,2,4]},"samples":[{"config":[1,0,0,1,1,0],"metric":{"Valid":8}}],"optimal":[{"config":[1,0,0,1,1,0],"metric":{"Valid":8}}]}, diff --git a/src/models/mod.rs b/src/models/mod.rs index d2501541..2d58e6a2 100644 --- a/src/models/mod.rs +++ b/src/models/mod.rs @@ -27,5 +27,6 @@ pub use misc::{ ShortestCommonSupersequence, StaffScheduling, StringToStringCorrection, SubsetSum, }; pub use set::{ - ComparativeContainment, ExactCoverBy3Sets, MaximumSetPacking, MinimumSetCovering, SetBasis, + ComparativeContainment, ExactCoverBy3Sets, MaximumSetPacking, MinimumCardinalityKey, + MinimumSetCovering, SetBasis, }; diff --git a/src/models/set/minimum_cardinality_key.rs b/src/models/set/minimum_cardinality_key.rs new file mode 100644 index 00000000..c9d6fc5b --- /dev/null +++ b/src/models/set/minimum_cardinality_key.rs @@ -0,0 +1,205 @@ +//! Minimum Cardinality Key problem implementation. +//! +//! Given a set of attribute names, functional dependencies, and a bound M, +//! determine whether there exists a candidate key of cardinality at most M. + +use crate::registry::{FieldInfo, ProblemSchemaEntry}; +use crate::traits::{Problem, SatisfactionProblem}; +use serde::{Deserialize, Serialize}; + +inventory::submit! { + ProblemSchemaEntry { + name: "MinimumCardinalityKey", + display_name: "Minimum Cardinality Key", + aliases: &[], + dimensions: &[], + module_path: module_path!(), + description: "Determine whether a relational system has a candidate key of bounded cardinality", + fields: &[ + FieldInfo { name: "num_attributes", type_name: "usize", description: "Number of attributes in the relation" }, + FieldInfo { name: "dependencies", type_name: "Vec<(Vec, Vec)>", description: "Functional dependencies as (lhs, rhs) pairs" }, + FieldInfo { name: "bound_k", type_name: "usize", description: "Upper bound on key cardinality" }, + ], + } +} + +/// The Minimum Cardinality Key decision problem. +/// +/// Given a set of attributes `A = {0, ..., n-1}`, a set of functional +/// dependencies `F` (each a pair `(X, Y)` where `X, Y` are subsets of `A`), +/// and a positive integer `k`, determine whether there exists a candidate key +/// (a minimal set of attributes that functionally determines all of `A`) of +/// cardinality at most `k`. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MinimumCardinalityKey { + /// Number of attributes (elements are `0..num_attributes`). + num_attributes: usize, + /// Functional dependencies as `(lhs, rhs)` pairs. + dependencies: Vec<(Vec, Vec)>, + /// Upper bound on key cardinality. + bound_k: usize, +} + +impl MinimumCardinalityKey { + /// Create a new Minimum Cardinality Key instance. + /// + /// # Panics + /// + /// Panics if any attribute index in a dependency lies outside the attribute set. + pub fn new( + num_attributes: usize, + dependencies: Vec<(Vec, Vec)>, + bound_k: usize, + ) -> Self { + let mut dependencies = dependencies; + for (dep_index, (lhs, rhs)) in dependencies.iter_mut().enumerate() { + lhs.sort_unstable(); + lhs.dedup(); + rhs.sort_unstable(); + rhs.dedup(); + for &attr in lhs.iter().chain(rhs.iter()) { + assert!( + attr < num_attributes, + "Dependency {} contains attribute {} which is outside attribute set of size {}", + dep_index, + attr, + num_attributes + ); + } + } + + Self { + num_attributes, + dependencies, + bound_k, + } + } + + /// Return the number of attributes. + pub fn num_attributes(&self) -> usize { + self.num_attributes + } + + /// Return the number of functional dependencies. + pub fn num_dependencies(&self) -> usize { + self.dependencies.len() + } + + /// Return the upper bound on key cardinality. + pub fn bound_k(&self) -> usize { + self.bound_k + } + + /// Return the functional dependencies. + pub fn dependencies(&self) -> &[(Vec, Vec)] { + &self.dependencies + } + + /// Compute the attribute closure of the selected attributes under the + /// functional dependencies. Starts with the selected set and repeatedly + /// applies each FD: if all lhs attributes are in the closure, add all rhs + /// attributes. Repeats until no change. + fn compute_closure(&self, selected: &[bool]) -> Vec { + let mut closure = selected.to_vec(); + loop { + let mut changed = false; + for (lhs, rhs) in &self.dependencies { + if lhs.iter().all(|&a| closure[a]) { + for &a in rhs { + if !closure[a] { + closure[a] = true; + changed = true; + } + } + } + } + if !changed { + break; + } + } + closure + } + + /// Check whether the selected attributes form a key (their closure equals + /// the full attribute set). + fn is_key(&self, selected: &[bool]) -> bool { + let closure = self.compute_closure(selected); + closure.iter().all(|&v| v) + } + + /// Check whether the selected attributes form a minimal key: they are a + /// key, and removing any single selected attribute breaks the key property. + fn is_minimal_key(&self, selected: &[bool]) -> bool { + if !self.is_key(selected) { + return false; + } + for i in 0..self.num_attributes { + if selected[i] { + let mut reduced = selected.to_vec(); + reduced[i] = false; + if self.is_key(&reduced) { + return false; + } + } + } + true + } +} + +impl Problem for MinimumCardinalityKey { + const NAME: &'static str = "MinimumCardinalityKey"; + type Metric = bool; + + fn dims(&self) -> Vec { + vec![2; self.num_attributes] + } + + fn evaluate(&self, config: &[usize]) -> bool { + if config.len() != self.num_attributes || config.iter().any(|&v| v > 1) { + return false; + } + + let selected: Vec = config.iter().map(|&v| v == 1).collect(); + let count = selected.iter().filter(|&&v| v).count(); + + if count > self.bound_k { + return false; + } + + self.is_minimal_key(&selected) + } + + fn variant() -> Vec<(&'static str, &'static str)> { + crate::variant_params![] + } +} + +impl SatisfactionProblem for MinimumCardinalityKey {} + +crate::declare_variants! { + default sat MinimumCardinalityKey => "2^num_attributes", +} + +#[cfg(feature = "example-db")] +pub(crate) fn canonical_model_example_specs() -> Vec { + vec![crate::example_db::specs::ModelExampleSpec { + id: "minimum_cardinality_key", + build: || { + let problem = MinimumCardinalityKey::new( + 6, + vec![ + (vec![0, 1], vec![2]), + (vec![0, 2], vec![3]), + (vec![1, 3], vec![4]), + (vec![2, 4], vec![5]), + ], + 2, + ); + crate::example_db::specs::satisfaction_example(problem, vec![vec![1, 1, 0, 0, 0, 0]]) + }, + }] +} + +#[cfg(test)] +#[path = "../../unit_tests/models/set/minimum_cardinality_key.rs"] +mod tests; diff --git a/src/models/set/mod.rs b/src/models/set/mod.rs index 9b9117b0..f60870e6 100644 --- a/src/models/set/mod.rs +++ b/src/models/set/mod.rs @@ -9,12 +9,14 @@ pub(crate) mod comparative_containment; pub(crate) mod exact_cover_by_3_sets; pub(crate) mod maximum_set_packing; +pub(crate) mod minimum_cardinality_key; pub(crate) mod minimum_set_covering; pub(crate) mod set_basis; pub use comparative_containment::ComparativeContainment; pub use exact_cover_by_3_sets::ExactCoverBy3Sets; pub use maximum_set_packing::MaximumSetPacking; +pub use minimum_cardinality_key::MinimumCardinalityKey; pub use minimum_set_covering::MinimumSetCovering; pub use set_basis::SetBasis; @@ -25,6 +27,7 @@ pub(crate) fn canonical_model_example_specs() -> Vec{2}, {0,2}->{3}, +/// {1,3}->{4}, {2,4}->{5}. K={0,1} is a candidate key of size 2. +fn instance1(bound_k: usize) -> MinimumCardinalityKey { + MinimumCardinalityKey::new( + 6, + vec![ + (vec![0, 1], vec![2]), + (vec![0, 2], vec![3]), + (vec![1, 3], vec![4]), + (vec![2, 4], vec![5]), + ], + bound_k, + ) +} + +/// Instance 2 from the issue: 6 attributes, FDs {0,1,2}->{3}, {3,4}->{5}. +/// No 2-element subset determines all attributes. +fn instance2() -> MinimumCardinalityKey { + MinimumCardinalityKey::new(6, vec![(vec![0, 1, 2], vec![3]), (vec![3, 4], vec![5])], 2) +} + +#[test] +fn test_minimum_cardinality_key_creation() { + let problem = instance1(2); + assert_eq!(problem.num_attributes(), 6); + assert_eq!(problem.num_dependencies(), 4); + assert_eq!(problem.bound_k(), 2); + assert_eq!(problem.num_variables(), 6); + assert_eq!(problem.dims(), vec![2; 6]); +} + +#[test] +fn test_minimum_cardinality_key_evaluation_yes() { + let problem = instance1(2); + // K={0,1}: closure under FDs reaches all 6 attributes, and it is minimal. + assert!(problem.evaluate(&[1, 1, 0, 0, 0, 0])); +} + +#[test] +fn test_minimum_cardinality_key_evaluation_no_instance() { + let problem = instance2(); + // No 2-element subset is a key for instance 2. + assert!(!problem.evaluate(&[1, 1, 0, 0, 0, 0])); + assert!(!problem.evaluate(&[1, 0, 1, 0, 0, 0])); + assert!(!problem.evaluate(&[0, 0, 0, 1, 1, 0])); +} + +#[test] +fn test_minimum_cardinality_key_non_minimal_rejected() { + let problem = instance1(3); + // K={0,1,2}: closure reaches all attributes, but {0,1} is a proper subset + // that is also a key, so {0,1,2} is NOT minimal. + assert!(!problem.evaluate(&[1, 1, 1, 0, 0, 0])); +} + +#[test] +fn test_minimum_cardinality_key_exceeds_bound() { + let problem = instance1(1); + // K={0,1} has |K|=2 > bound_k=1, so it must be rejected. + assert!(!problem.evaluate(&[1, 1, 0, 0, 0, 0])); +} + +#[test] +fn test_minimum_cardinality_key_solver() { + let problem = instance1(2); + let solver = BruteForce::new(); + let solutions = solver.find_all_satisfying(&problem); + let solution_set: HashSet> = solutions.iter().cloned().collect(); + + assert!(!solutions.is_empty()); + assert!(solution_set.contains(&vec![1, 1, 0, 0, 0, 0])); + assert!(solutions.iter().all(|sol| problem.evaluate(sol))); +} + +#[test] +fn test_minimum_cardinality_key_serialization() { + let problem = instance1(2); + let json = serde_json::to_string(&problem).unwrap(); + let deserialized: MinimumCardinalityKey = serde_json::from_str(&json).unwrap(); + + assert_eq!(deserialized.num_attributes(), problem.num_attributes()); + assert_eq!(deserialized.num_dependencies(), problem.num_dependencies()); + assert_eq!(deserialized.bound_k(), problem.bound_k()); + assert_eq!(deserialized.dependencies(), problem.dependencies()); +} + +#[test] +fn test_minimum_cardinality_key_invalid_config() { + let problem = instance1(2); + // Wrong length. + assert!(!problem.evaluate(&[1, 1, 0, 0, 0])); + // Value > 1. + assert!(!problem.evaluate(&[2, 1, 0, 0, 0, 0])); +} + +#[test] +fn test_minimum_cardinality_key_empty_deps() { + // No FDs: closure(K) = K. Only K = {0,1,2} determines all attributes. + // It is minimal because removing any element gives a set that does not + // cover all 3 attributes. + let problem = MinimumCardinalityKey::new(3, vec![], 3); + assert!(problem.evaluate(&[1, 1, 1])); + // Any proper subset fails (not a key). + assert!(!problem.evaluate(&[1, 1, 0])); + assert!(!problem.evaluate(&[1, 0, 0])); + assert!(!problem.evaluate(&[0, 0, 0])); +} + +#[test] +fn test_minimum_cardinality_key_empty_key_candidate() { + let problem = MinimumCardinalityKey::new(1, vec![(vec![], vec![0])], 1); + assert!(problem.evaluate(&[0])); + assert!(!problem.evaluate(&[1])); + + let solver = BruteForce::new(); + assert_eq!(solver.find_all_satisfying(&problem), vec![vec![0]]); +} + +#[test] +#[should_panic(expected = "outside attribute set")] +fn test_minimum_cardinality_key_panics_on_invalid_index() { + MinimumCardinalityKey::new(3, vec![(vec![0, 3], vec![1])], 2); +} + +#[test] +fn test_minimum_cardinality_key_paper_example() { + let problem = instance1(2); + let solution = vec![1, 1, 0, 0, 0, 0]; + assert!(problem.evaluate(&solution)); + + let solver = BruteForce::new(); + let solutions = solver.find_all_satisfying(&problem); + let solution_set: HashSet> = solutions.iter().cloned().collect(); + assert!(solution_set.contains(&solution)); + // All returned solutions must be valid. + assert!(solutions.iter().all(|sol| problem.evaluate(sol))); +}