diff --git a/docs/paper/reductions.typ b/docs/paper/reductions.typ index 01372f7cb..ea05e37b0 100644 --- a/docs/paper/reductions.typ +++ b/docs/paper/reductions.typ @@ -108,6 +108,7 @@ "SteinerTree": [Steiner Tree], "SubgraphIsomorphism": [Subgraph Isomorphism], "PartitionIntoTriangles": [Partition Into Triangles], + "PrimeAttributeName": [Prime Attribute Name], "FlowShopScheduling": [Flow Shop Scheduling], "MinimumTardinessSequencing": [Minimum Tardiness Sequencing], "SequencingWithinIntervals": [Sequencing Within Intervals], @@ -1313,6 +1314,72 @@ NP-completeness was established by Garey, Johnson, and Stockmeyer @gareyJohnsonS ] } +#{ + let x = load-model-example("PrimeAttributeName") + let n = x.instance.num_attributes + let deps = x.instance.dependencies + let q = x.instance.query_attribute + let sample = x.samples.at(0) + let key = sample.config.enumerate().filter(((i, v)) => v == 1).map(((i, _)) => i) + let num-sat = x.optimal.len() + // Format a set as {e0, e1, ...} (0-indexed) — for use in text mode + let fmt-set(s) = "${" + s.map(e => str(e)).join(", ") + "}$" + // Format a set for use inside math mode (no $ delimiters) + let fmt-set-math(s) = "{" + s.map(e => str(e)).join(", ") + "}" + [ + #problem-def("PrimeAttributeName")[ + Given a set $A = {0, 1, ..., #(n - 1)}$ of attribute names, a collection $F$ of functional dependencies on $A$, and a specified attribute $x in A$, determine whether $x$ is a _prime attribute_ for $chevron.l A, F chevron.r$ --- i.e., whether there exists a candidate key $K$ for $chevron.l A, F chevron.r$ such that $x in K$. + + A _candidate key_ is a minimal subset $K subset.eq A$ whose closure $K^+_F = A$, where the closure $K^+_F$ is the set of all attributes functionally determined by $K$ under $F$. + ][ + Classical NP-complete problem from relational database theory (Lucchesi and Osborn, 1978; Garey & Johnson SR28). Prime attributes are central to database normalization: Second Normal Form (2NF) requires that no non-prime attribute is partially dependent on any candidate key, and Third Normal Form (3NF) requires that for every non-trivial functional dependency $X arrow Y$, either $X$ is a superkey or $Y$ consists only of prime attributes. The brute-force approach enumerates all $2^n$ subsets of $A$ containing $x$, checking each for the key property; no algorithm significantly improving on this is known for the general problem. + + *Example.* Let $A = {0, 1, ..., #(n - 1)}$ ($n = #n$), query attribute $x = #q$, and $F = {#deps.enumerate().map(((i, d)) => $#fmt-set-math(d.at(0)) arrow #fmt-set-math(d.at(1))$).join(", ")}$. The subset $K = #fmt-set-math(key)$ is a candidate key containing $x = #q$: its closure is $K^+_F = A$ (since $#fmt-set-math(key.sorted()) arrow #fmt-set-math(deps.at(1).at(1))$ by the second FD, yielding all of $A$), and removing either element breaks the superkey property (${#(key.at(0))} arrow.r.not A$ and ${#(key.at(1))} arrow.r.not A$), so $K$ is minimal. Thus attribute #q is prime. There are #num-sat candidate keys containing attribute #q in total. + + #figure( + canvas(length: 1cm, { + import draw: * + // Attribute nodes in two rows + let positions = ( + (0, 1.2), // 0: top-left + (1.5, 1.2), // 1: top-center + (3.0, 1.2), // 2: top-right + (0, 0), // 3: bottom-left (query) + (1.5, 0), // 4: bottom-center + (3.0, 0), // 5: bottom-right + ) + // Draw attribute nodes + for (k, pos) in positions.enumerate() { + let is-key = key.contains(k) + let is-query = k == q + g-node(pos, name: "a" + str(k), radius: 0.25, + fill: if is-key { graph-colors.at(0) } else if is-query { graph-colors.at(1) } else { white }, + label: if is-key or is-query { text(fill: white)[$#k$] } else { [$#k$] }) + } + // Draw functional dependencies as grouped arrows + // FD 1: {0,1} -> {2,3,4,5} + let fd-y-offsets = (0.55, -0.55, -1.15) + for (fi, (lhs, rhs)) in deps.enumerate() { + let ly = if fi == 0 { 2.0 } else if fi == 1 { -0.8 } else { 2.5 } + // Compute LHS and RHS centers + let lx = lhs.map(a => positions.at(a).at(0)).sum() / lhs.len() + let rx = rhs.map(a => positions.at(a).at(0)).sum() / rhs.len() + let mid-x = (lx + rx) / 2 + // Draw arrow from LHS region to RHS region + let arrow-y = ly + on-layer(1, { + content((mid-x, arrow-y), + text(7pt)[FD#(fi + 1): $#fmt-set-math(lhs) arrow #fmt-set-math(rhs)$], + fill: white, frame: "rect", padding: 0.06, stroke: none) + }) + } + }), + caption: [Prime Attribute Name instance with $n = #n$ attributes. Candidate key $K = #fmt-set-math(key)$ is highlighted in blue; query attribute $x = #q$ is a member of $K$. The three functional dependencies determine the closure of every subset.], + ) + ] + ] +} + == Optimization Problems #{ diff --git a/problemreductions-cli/src/cli.rs b/problemreductions-cli/src/cli.rs index e71a9e426..113e8f597 100644 --- a/problemreductions-cli/src/cli.rs +++ b/problemreductions-cli/src/cli.rs @@ -235,6 +235,7 @@ Flags by problem type: MinimumSetCovering --universe, --sets [--weights] X3C (ExactCoverBy3Sets) --universe, --sets (3 elements each) SetBasis --universe, --sets, --k + PrimeAttributeName --universe, --deps, --query BicliqueCover --left, --right, --biedges, --k BMF --matrix (0/1), --rank SteinerTree --graph, --edge-weights, --terminals @@ -274,7 +275,8 @@ Examples: pred create FVS --arcs \"0>1,1>2,2>0\" --weights 1,1,1 pred create UndirectedTwoCommodityIntegralFlow --graph 0-2,1-2,2-3 --capacities 1,1,2 --source-1 0 --sink-1 3 --source-2 1 --sink-2 3 --requirement-1 1 --requirement-2 1 pred create X3C --universe 9 --sets \"0,1,2;0,2,4;3,4,5;3,5,7;6,7,8;1,4,6;2,5,8\" - pred create SetBasis --universe 4 --sets \"0,1;1,2;0,2;0,1,2\" --k 3")] + pred create SetBasis --universe 4 --sets \"0,1;1,2;0,2;0,1,2\" --k 3 + pred create PrimeAttributeName --universe 6 --deps \"0,1>2,3,4,5;2,3>0,1,4,5\" --query 3")] pub struct CreateArgs { /// Problem type (e.g., MIS, QUBO, SAT). Omit when using --example. #[arg(value_parser = crate::problem_name::ProblemNameParser)] @@ -456,6 +458,12 @@ pub struct CreateArgs { /// Alphabet size for SCS (optional; inferred from max symbol + 1 if omitted) #[arg(long)] pub alphabet_size: Option, + /// Functional dependencies (semicolon-separated, each dep is lhs>rhs with comma-separated indices, e.g., "0,1>2,3;2,3>0,1") + #[arg(long)] + pub deps: Option, + /// Query attribute index for PrimeAttributeName + #[arg(long)] + pub query: Option, } #[derive(clap::Args)] diff --git a/problemreductions-cli/src/commands/create.rs b/problemreductions-cli/src/commands/create.rs index 03220f4f6..1ab5934a6 100644 --- a/problemreductions-cli/src/commands/create.rs +++ b/problemreductions-cli/src/commands/create.rs @@ -90,6 +90,8 @@ fn all_data_flags_empty(args: &CreateArgs) -> bool { && args.sink_2.is_none() && args.requirement_1.is_none() && args.requirement_2.is_none() + && args.deps.is_none() + && args.query.is_none() } fn emit_problem_output(output: &ProblemJsonOutput, out: &OutputConfig) -> Result<()> { @@ -295,6 +297,9 @@ fn example_for(canonical: &str, graph_type: Option<&str>) -> &'static str { "SubgraphIsomorphism" => "--graph 0-1,1-2,2-0 --pattern 0-1", "SubsetSum" => "--sizes 3,7,1,8,2,4 --target 11", "SetBasis" => "--universe 4 --sets \"0,1;1,2;0,2;0,1,2\" --k 3", + "PrimeAttributeName" => { + "--universe 6 --deps \"0,1>2,3,4,5;2,3>0,1,4,5\" --query 3" + } "ShortestCommonSupersequence" => "--strings \"0,1,2;1,2,0\" --bound 4", _ => "", } @@ -305,6 +310,9 @@ fn help_flag_name(canonical: &str, field_name: &str) -> String { match (canonical, field_name) { ("BoundedComponentSpanningForest", "max_components") => return "k".to_string(), ("BoundedComponentSpanningForest", "max_weight") => return "bound".to_string(), + ("PrimeAttributeName", "num_attributes") => return "universe".to_string(), + ("PrimeAttributeName", "dependencies") => return "deps".to_string(), + ("PrimeAttributeName", "query_attribute") => return "query".to_string(), _ => {} } // General field-name overrides (previously in cli_flag_name) @@ -331,6 +339,9 @@ fn help_flag_hint( ) -> &'static str { match (canonical, field_name) { ("BoundedComponentSpanningForest", "max_weight") => "integer", + ("PrimeAttributeName", "dependencies") => { + "semicolon-separated dependencies: \"0,1>2,3;2,3>0,1\"" + } _ => type_format_hint(type_name, graph_type), } } @@ -367,12 +378,7 @@ fn print_problem_help(canonical: &str, graph_type: Option<&str>) -> Result<()> { eprintln!(" --{:<16} {} ({})", flag_name, field.description, hint); } else { let hint = help_flag_hint(canonical, &field.name, &field.type_name, graph_type); - eprintln!( - " --{:<16} {} ({})", - help_flag_name(canonical, &field.name), - field.description, - hint - ); + eprintln!(" --{:<16} {} ({})", flag_name, field.description, hint); } } } else { @@ -409,7 +415,7 @@ fn problem_help_flag_name( if canonical == "LengthBoundedDisjointPaths" && field_name == "max_length" { return "bound".to_string(); } - field_name.replace('_', "-") + help_flag_name(canonical, field_name) } fn lbdp_validation_error(message: &str, usage: Option<&str>) -> anyhow::Error { @@ -1542,6 +1548,52 @@ pub fn create(args: &CreateArgs, out: &OutputConfig) -> Result<()> { ) } + // PrimeAttributeName + "PrimeAttributeName" => { + let universe = args.universe.ok_or_else(|| { + anyhow::anyhow!( + "PrimeAttributeName requires --universe, --deps, and --query\n\n\ + Usage: pred create PrimeAttributeName --universe 6 --deps \"0,1>2,3,4,5;2,3>0,1,4,5\" --query 3" + ) + })?; + let deps_str = args.deps.as_deref().ok_or_else(|| { + anyhow::anyhow!( + "PrimeAttributeName requires --deps\n\n\ + Usage: pred create PrimeAttributeName --universe 6 --deps \"0,1>2,3,4,5;2,3>0,1,4,5\" --query 3" + ) + })?; + let query = args.query.ok_or_else(|| { + anyhow::anyhow!( + "PrimeAttributeName requires --query\n\n\ + Usage: pred create PrimeAttributeName --universe 6 --deps \"0,1>2,3,4,5;2,3>0,1,4,5\" --query 3" + ) + })?; + let dependencies = parse_deps(deps_str)?; + for (i, (lhs, rhs)) in dependencies.iter().enumerate() { + for &attr in lhs.iter().chain(rhs.iter()) { + if attr >= universe { + bail!( + "Dependency {} references attribute {} outside universe of size {}", + i, + attr, + universe + ); + } + } + } + if query >= universe { + bail!( + "Query attribute {} is outside universe of size {}", + query, + universe + ); + } + ( + ser(PrimeAttributeName::new(universe, dependencies, query))?, + resolved_variant.clone(), + ) + } + _ => bail!("{}", crate::problem_name::unknown_problem_error(canonical)), }; @@ -1969,6 +2021,33 @@ fn parse_sets(args: &CreateArgs) -> Result>> { .collect() } +/// Parse a dependency string as semicolon-separated `lhs>rhs` pairs. +/// E.g., "0,1>2,3;2,3>0,1" +fn parse_deps(s: &str) -> Result, Vec)>> { + s.split(';') + .map(|dep| { + let parts: Vec<&str> = dep.split('>').collect(); + if parts.len() != 2 { + bail!("Invalid dependency format '{}': expected 'lhs>rhs'", dep); + } + let lhs = parse_index_list(parts[0])?; + let rhs = parse_index_list(parts[1])?; + Ok((lhs, rhs)) + }) + .collect() +} + +/// Parse a comma-separated list of usize indices. +fn parse_index_list(s: &str) -> Result> { + s.split(',') + .map(|x| { + x.trim() + .parse::() + .map_err(|e| anyhow::anyhow!("Invalid index '{}': {}", x.trim(), e)) + }) + .collect() +} + /// Parse `--partition` as semicolon-separated groups of comma-separated arc indices. /// E.g., "0,1;2,3;4,7;5,6" fn parse_partition_groups(args: &CreateArgs, num_arcs: usize) -> Result>> { @@ -2434,4 +2513,25 @@ mod tests { "num-paths-required" ); } + + #[test] + fn test_problem_help_uses_prime_attribute_name_cli_overrides() { + assert_eq!( + problem_help_flag_name("PrimeAttributeName", "num_attributes", "usize", false), + "universe" + ); + assert_eq!( + problem_help_flag_name( + "PrimeAttributeName", + "dependencies", + "Vec<(Vec, Vec)>", + false, + ), + "deps" + ); + assert_eq!( + problem_help_flag_name("PrimeAttributeName", "query_attribute", "usize", false), + "query" + ); + } } diff --git a/problemreductions-cli/tests/cli_tests.rs b/problemreductions-cli/tests/cli_tests.rs index 9c35fd001..dff925be3 100644 --- a/problemreductions-cli/tests/cli_tests.rs +++ b/problemreductions-cli/tests/cli_tests.rs @@ -2355,6 +2355,40 @@ fn test_create_set_basis_no_flags_uses_actual_cli_flag_names() { ); } +#[test] +fn test_create_prime_attribute_name_no_flags_uses_actual_cli_flag_names() { + let output = pred() + .args(["create", "PrimeAttributeName"]) + .output() + .unwrap(); + assert!(!output.status.success()); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("--universe"), + "expected '--universe' in help output, got: {stderr}" + ); + assert!( + stderr.contains("--deps"), + "expected '--deps' in help output, got: {stderr}" + ); + assert!( + stderr.contains("--query"), + "expected '--query' in help output, got: {stderr}" + ); + assert!( + !stderr.contains("--num-attributes"), + "help should not advertise schema field names: {stderr}" + ); + assert!( + !stderr.contains("--dependencies"), + "help should not advertise schema field names: {stderr}" + ); + assert!( + !stderr.contains("--query-attribute"), + "help should not advertise schema field names: {stderr}" + ); +} + #[test] fn test_create_kcoloring_missing_k() { let output = pred() diff --git a/src/example_db/fixtures/examples.json b/src/example_db/fixtures/examples.json index 6e52883b2..08317bca8 100644 --- a/src/example_db/fixtures/examples.json +++ b/src/example_db/fixtures/examples.json @@ -30,6 +30,7 @@ {"problem":"MultipleChoiceBranching","variant":{"weight":"i32"},"instance":{"graph":{"inner":{"edge_property":"directed","edges":[[0,1,null],[0,2,null],[1,3,null],[2,3,null],[1,4,null],[3,5,null],[4,5,null],[2,4,null]],"node_holes":[],"nodes":[null,null,null,null,null,null]}},"partition":[[0,1],[2,3],[4,7],[5,6]],"threshold":10,"weights":[3,2,4,1,2,3,1,3]},"samples":[{"config":[1,0,1,0,0,1,0,1],"metric":true}],"optimal":[{"config":[0,0,1,0,0,1,0,1],"metric":true},{"config":[0,1,1,0,0,0,1,1],"metric":true},{"config":[0,1,1,0,0,1,0,1],"metric":true},{"config":[0,1,1,0,1,1,0,0],"metric":true},{"config":[1,0,0,1,0,1,0,1],"metric":true},{"config":[1,0,1,0,0,0,0,1],"metric":true},{"config":[1,0,1,0,0,0,1,1],"metric":true},{"config":[1,0,1,0,0,1,0,0],"metric":true},{"config":[1,0,1,0,0,1,0,1],"metric":true},{"config":[1,0,1,0,1,0,1,0],"metric":true},{"config":[1,0,1,0,1,1,0,0],"metric":true}]}, {"problem":"PaintShop","variant":{},"instance":{"car_labels":["A","B","C"],"is_first":[true,true,false,true,false,false],"num_cars":3,"sequence_indices":[0,1,0,2,1,2]},"samples":[{"config":[0,0,1],"metric":{"Valid":2}}],"optimal":[{"config":[0,0,1],"metric":{"Valid":2}},{"config":[0,1,1],"metric":{"Valid":2}},{"config":[1,0,0],"metric":{"Valid":2}},{"config":[1,1,0],"metric":{"Valid":2}}]}, {"problem":"PartitionIntoTriangles","variant":{"graph":"SimpleGraph"},"instance":{"graph":{"inner":{"edge_property":"undirected","edges":[[0,1,null],[0,2,null],[1,2,null],[3,4,null],[3,5,null],[4,5,null],[0,3,null]],"node_holes":[],"nodes":[null,null,null,null,null,null]}}},"samples":[{"config":[0,0,0,1,1,1],"metric":true}],"optimal":[{"config":[0,0,0,1,1,1],"metric":true},{"config":[1,1,1,0,0,0],"metric":true}]}, + {"problem":"PrimeAttributeName","variant":{},"instance":{"dependencies":[[[0,1],[2,3,4,5]],[[2,3],[0,1,4,5]],[[0,3],[1,2,4,5]]],"num_attributes":6,"query_attribute":3},"samples":[{"config":[0,0,1,1,0,0],"metric":true}],"optimal":[{"config":[0,0,1,1,0,0],"metric":true},{"config":[1,0,0,1,0,0],"metric":true}]}, {"problem":"QUBO","variant":{"weight":"f64"},"instance":{"matrix":[[-1.0,2.0,0.0],[0.0,-1.0,2.0],[0.0,0.0,-1.0]],"num_vars":3},"samples":[{"config":[1,0,1],"metric":{"Valid":-2.0}}],"optimal":[{"config":[1,0,1],"metric":{"Valid":-2.0}}]}, {"problem":"Satisfiability","variant":{},"instance":{"clauses":[{"literals":[1,2]},{"literals":[-1,3]},{"literals":[-2,-3]}],"num_vars":3},"samples":[{"config":[1,0,1],"metric":true}],"optimal":[{"config":[0,1,0],"metric":true},{"config":[1,0,1],"metric":true}]}, {"problem":"SequencingWithinIntervals","variant":{},"instance":{"deadlines":[11,11,11,11,6],"lengths":[3,1,2,4,1],"release_times":[0,0,0,0,5]},"samples":[{"config":[0,6,3,7,0],"metric":true}],"optimal":[{"config":[0,6,3,7,0],"metric":true},{"config":[0,10,3,6,0],"metric":true},{"config":[2,6,0,7,0],"metric":true},{"config":[2,10,0,6,0],"metric":true},{"config":[6,0,9,1,0],"metric":true},{"config":[6,4,9,0,0],"metric":true},{"config":[8,0,6,1,0],"metric":true},{"config":[8,4,6,0,0],"metric":true}]}, diff --git a/src/lib.rs b/src/lib.rs index 42bc6c927..3ef340fd3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -62,7 +62,7 @@ pub mod prelude { ShortestCommonSupersequence, SubsetSum, }; pub use crate::models::set::{ - ExactCoverBy3Sets, MaximumSetPacking, MinimumSetCovering, SetBasis, + ExactCoverBy3Sets, MaximumSetPacking, MinimumSetCovering, PrimeAttributeName, SetBasis, }; // Core traits diff --git a/src/models/mod.rs b/src/models/mod.rs index 072034f02..c0c06bc57 100644 --- a/src/models/mod.rs +++ b/src/models/mod.rs @@ -25,4 +25,6 @@ pub use misc::{ MinimumTardinessSequencing, PaintShop, SequencingWithinIntervals, ShortestCommonSupersequence, SubsetSum, }; -pub use set::{ExactCoverBy3Sets, MaximumSetPacking, MinimumSetCovering, SetBasis}; +pub use set::{ + ExactCoverBy3Sets, MaximumSetPacking, MinimumSetCovering, PrimeAttributeName, SetBasis, +}; diff --git a/src/models/set/mod.rs b/src/models/set/mod.rs index fb8ee7cd8..bcae4c8a3 100644 --- a/src/models/set/mod.rs +++ b/src/models/set/mod.rs @@ -4,15 +4,18 @@ //! - [`MinimumSetCovering`]: Minimum weight set cover //! - [`MaximumSetPacking`]: Maximum weight set packing //! - [`ExactCoverBy3Sets`]: Exact cover by 3-element subsets (X3C) +//! - [`PrimeAttributeName`]: Determine if an attribute belongs to any candidate key pub(crate) mod exact_cover_by_3_sets; pub(crate) mod maximum_set_packing; pub(crate) mod minimum_set_covering; +pub(crate) mod prime_attribute_name; pub(crate) mod set_basis; pub use exact_cover_by_3_sets::ExactCoverBy3Sets; pub use maximum_set_packing::MaximumSetPacking; pub use minimum_set_covering::MinimumSetCovering; +pub use prime_attribute_name::PrimeAttributeName; pub use set_basis::SetBasis; #[cfg(feature = "example-db")] @@ -21,6 +24,7 @@ pub(crate) fn canonical_model_example_specs() -> Vec. + +use crate::registry::{FieldInfo, ProblemSchemaEntry}; +use crate::traits::{Problem, SatisfactionProblem}; +use serde::{Deserialize, Serialize}; + +inventory::submit! { + ProblemSchemaEntry { + name: "PrimeAttributeName", + display_name: "Prime Attribute Name", + aliases: &[], + dimensions: &[], + module_path: module_path!(), + description: "Determine if an attribute belongs to any candidate key under functional dependencies", + fields: &[ + FieldInfo { name: "num_attributes", type_name: "usize", description: "Number of attributes" }, + FieldInfo { name: "dependencies", type_name: "Vec<(Vec, Vec)>", description: "Functional dependencies (lhs, rhs) pairs" }, + FieldInfo { name: "query_attribute", type_name: "usize", description: "The query attribute index" }, + ], + } +} + +/// Prime Attribute Name decision problem. +/// +/// Given a set A = {0, 1, ..., n-1} of attribute names, a collection F of +/// functional dependencies on A, and a specified attribute x in A, determine +/// whether x is a *prime attribute* -- i.e., whether there exists a candidate +/// key K for such that x is in K. +/// +/// A *candidate key* is a minimal set K of attributes whose closure under F +/// equals A. An attribute is *prime* if it belongs to at least one candidate key. +/// +/// This is a classical NP-complete problem from relational database theory +/// (Garey & Johnson SR28, Lucchesi & Osborne 1978). +/// +/// # Example +/// +/// ``` +/// use problemreductions::models::set::PrimeAttributeName; +/// use problemreductions::{Problem, Solver, BruteForce}; +/// +/// // 6 attributes, FDs: {0,1}->rest, {2,3}->rest, {0,3}->rest +/// let problem = PrimeAttributeName::new( +/// 6, +/// vec![ +/// (vec![0, 1], vec![2, 3, 4, 5]), +/// (vec![2, 3], vec![0, 1, 4, 5]), +/// (vec![0, 3], vec![1, 2, 4, 5]), +/// ], +/// 3, +/// ); +/// +/// // {2, 3} is a candidate key containing attribute 3 +/// assert!(problem.evaluate(&[0, 0, 1, 1, 0, 0])); +/// +/// let solver = BruteForce::new(); +/// let solution = solver.find_satisfying(&problem); +/// assert!(solution.is_some()); +/// ``` +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PrimeAttributeName { + /// Number of attributes (elements are 0..num_attributes). + num_attributes: usize, + /// Functional dependencies as (lhs, rhs) pairs. + dependencies: Vec<(Vec, Vec)>, + /// The query attribute index. + query_attribute: usize, +} + +impl PrimeAttributeName { + /// Create a new Prime Attribute Name problem. + /// + /// # Panics + /// + /// Panics if `query_attribute >= num_attributes`, if any attribute index + /// in a dependency is out of range, or if any LHS is empty. + pub fn new( + num_attributes: usize, + dependencies: Vec<(Vec, Vec)>, + query_attribute: usize, + ) -> Self { + assert!( + query_attribute < num_attributes, + "Query attribute {} is outside attribute set of size {}", + query_attribute, + num_attributes + ); + for (i, (lhs, rhs)) in dependencies.iter().enumerate() { + assert!(!lhs.is_empty(), "Dependency {} has empty LHS", i); + for &attr in lhs.iter().chain(rhs.iter()) { + assert!( + attr < num_attributes, + "Dependency {} references attribute {} which is outside attribute set of size {}", + i, + attr, + num_attributes + ); + } + } + Self { + num_attributes, + dependencies, + query_attribute, + } + } + + /// Get the number of attributes. + pub fn num_attributes(&self) -> usize { + self.num_attributes + } + + /// Get the number of functional dependencies. + pub fn num_dependencies(&self) -> usize { + self.dependencies.len() + } + + /// Get the query attribute index. + pub fn query_attribute(&self) -> usize { + self.query_attribute + } + + /// Get the functional dependencies. + pub fn dependencies(&self) -> &[(Vec, Vec)] { + &self.dependencies + } + + /// Compute the attribute closure of a set under the functional dependencies. + /// + /// Starting from the given boolean mask of attributes, repeatedly applies + /// all functional dependencies until a fixpoint is reached. + pub fn compute_closure(&self, attrs: &[bool]) -> Vec { + let mut closure = attrs.to_vec(); + loop { + let mut changed = false; + for (lhs, rhs) in &self.dependencies { + if lhs.iter().all(|&a| closure[a]) { + for &a in rhs { + if !closure[a] { + closure[a] = true; + changed = true; + } + } + } + } + if !changed { + break; + } + } + closure + } +} + +impl Problem for PrimeAttributeName { + const NAME: &'static str = "PrimeAttributeName"; + type Metric = bool; + + fn dims(&self) -> Vec { + vec![2; self.num_attributes] + } + + fn evaluate(&self, config: &[usize]) -> bool { + // Check config length and binary values + if config.len() != self.num_attributes || config.iter().any(|&v| v > 1) { + return false; + } + + // K = {i : config[i] = 1} + let k: Vec = config.iter().map(|&v| v == 1).collect(); + + // query_attribute must be in K + if !k[self.query_attribute] { + return false; + } + + // Compute closure(K) -- must equal all attributes (K is a superkey) + let closure = self.compute_closure(&k); + if closure.iter().any(|&v| !v) { + return false; + } + + // Check minimality: removing any attribute from K must break the superkey property + for i in 0..self.num_attributes { + if k[i] { + let mut reduced = k.clone(); + reduced[i] = false; + let reduced_closure = self.compute_closure(&reduced); + if reduced_closure.iter().all(|&v| v) { + // K \ {i} is still a superkey, so K is not minimal + return false; + } + } + } + + true + } + + fn variant() -> Vec<(&'static str, &'static str)> { + crate::variant_params![] + } +} + +impl SatisfactionProblem for PrimeAttributeName {} + +crate::declare_variants! { + default sat PrimeAttributeName => "2^num_attributes * num_dependencies * num_attributes", +} + +#[cfg(feature = "example-db")] +pub(crate) fn canonical_model_example_specs() -> Vec { + vec![crate::example_db::specs::ModelExampleSpec { + id: "prime_attribute_name", + build: || { + // Issue Example 1: 6 attributes, 3 FDs, query=3 -> YES + let problem = PrimeAttributeName::new( + 6, + vec![ + (vec![0, 1], vec![2, 3, 4, 5]), + (vec![2, 3], vec![0, 1, 4, 5]), + (vec![0, 3], vec![1, 2, 4, 5]), + ], + 3, + ); + // {2, 3} is a candidate key containing attribute 3 + crate::example_db::specs::satisfaction_example(problem, vec![vec![0, 0, 1, 1, 0, 0]]) + }, + }] +} + +#[cfg(test)] +#[path = "../../unit_tests/models/set/prime_attribute_name.rs"] +mod tests; diff --git a/src/unit_tests/models/set/prime_attribute_name.rs b/src/unit_tests/models/set/prime_attribute_name.rs new file mode 100644 index 000000000..8359d4bd4 --- /dev/null +++ b/src/unit_tests/models/set/prime_attribute_name.rs @@ -0,0 +1,172 @@ +use super::*; +use crate::solvers::BruteForce; +use crate::traits::Problem; + +/// Helper: Issue Example 1 — 6 attributes, 3 FDs, query=3 +/// Candidate keys: {0,1}, {2,3}, {0,3} — attribute 3 is prime +fn example1() -> PrimeAttributeName { + PrimeAttributeName::new( + 6, + vec![ + (vec![0, 1], vec![2, 3, 4, 5]), + (vec![2, 3], vec![0, 1, 4, 5]), + (vec![0, 3], vec![1, 2, 4, 5]), + ], + 3, + ) +} + +/// Helper: Issue Example 2 — 6 attributes, 1 FD, query=3 +/// Only candidate key: {0,1} — attribute 3 is NOT prime +fn example2() -> PrimeAttributeName { + PrimeAttributeName::new(6, vec![(vec![0, 1], vec![2, 3, 4, 5])], 3) +} + +#[test] +fn test_prime_attribute_name_creation() { + let problem = example1(); + assert_eq!(problem.num_attributes(), 6); + assert_eq!(problem.num_dependencies(), 3); + assert_eq!(problem.query_attribute(), 3); + assert_eq!(problem.num_variables(), 6); + assert_eq!(problem.dims(), vec![2, 2, 2, 2, 2, 2]); + assert_eq!(problem.dependencies().len(), 3); +} + +#[test] +fn test_prime_attribute_name_evaluate_yes() { + let problem = example1(); + // {2, 3} is a candidate key containing attribute 3 + assert!(problem.evaluate(&[0, 0, 1, 1, 0, 0])); +} + +#[test] +fn test_prime_attribute_name_evaluate_no() { + let problem = example2(); + // Only key is {0,1} which doesn't contain attribute 3 + // Config selecting {0,1}: this is a candidate key but doesn't contain query=3 + assert!(!problem.evaluate(&[1, 1, 0, 0, 0, 0])); + // Config selecting {2,3}: not a superkey since closure({2,3}) != A + assert!(!problem.evaluate(&[0, 0, 1, 1, 0, 0])); +} + +#[test] +fn test_prime_attribute_name_evaluate_superkey_not_minimal() { + let problem = example1(); + // {1,2,3} has closure = A (since {2,3}->rest), but it's not minimal + // because {2,3} alone is also a superkey + assert!(!problem.evaluate(&[0, 1, 1, 1, 0, 0])); +} + +#[test] +fn test_prime_attribute_name_evaluate_not_superkey() { + let problem = example1(); + // {0} alone: closure({0}) = {0}, not all of A + assert!(!problem.evaluate(&[1, 0, 0, 0, 0, 0])); +} + +#[test] +fn test_prime_attribute_name_evaluate_query_not_in_k() { + let problem = example1(); + // {0,1} is a candidate key but doesn't contain attribute 3 + assert!(!problem.evaluate(&[1, 1, 0, 0, 0, 0])); +} + +#[test] +fn test_prime_attribute_name_evaluate_all_selected() { + let problem = example1(); + // All attributes selected: superkey but not minimal + assert!(!problem.evaluate(&[1, 1, 1, 1, 1, 1])); +} + +#[test] +fn test_prime_attribute_name_evaluate_invalid_config() { + let problem = example1(); + // Wrong length + assert!(!problem.evaluate(&[0, 0, 1])); + // Non-binary value + assert!(!problem.evaluate(&[0, 0, 1, 2, 0, 0])); +} + +#[test] +fn test_prime_attribute_name_solver() { + let problem = example1(); + let solver = BruteForce::new(); + let mut solutions = solver.find_all_satisfying(&problem); + solutions.sort(); + assert!(!solutions.is_empty()); + for sol in &solutions { + assert!(problem.evaluate(sol)); + } + assert_eq!( + solutions, + vec![vec![0, 0, 1, 1, 0, 0], vec![1, 0, 0, 1, 0, 0]] + ); +} + +#[test] +fn test_prime_attribute_name_no_solution() { + let problem = example2(); + let solver = BruteForce::new(); + let solutions = solver.find_all_satisfying(&problem); + assert!(solutions.is_empty()); +} + +#[test] +fn test_prime_attribute_name_serialization() { + let problem = example1(); + let json = serde_json::to_string(&problem).unwrap(); + let deserialized: PrimeAttributeName = serde_json::from_str(&json).unwrap(); + assert_eq!(deserialized.num_attributes(), problem.num_attributes()); + assert_eq!(deserialized.num_dependencies(), problem.num_dependencies()); + assert_eq!(deserialized.query_attribute(), problem.query_attribute()); + assert_eq!(deserialized.dependencies(), problem.dependencies()); +} + +#[test] +fn test_prime_attribute_name_compute_closure() { + let problem = example1(); + // Closure of {0,1} should be all attributes + let mut attrs = vec![false; 6]; + attrs[0] = true; + attrs[1] = true; + let closure = problem.compute_closure(&attrs); + assert!(closure.iter().all(|&v| v)); + + // Closure of {0} should be just {0} + let mut attrs2 = vec![false; 6]; + attrs2[0] = true; + let closure2 = problem.compute_closure(&attrs2); + assert_eq!(closure2, vec![true, false, false, false, false, false]); +} + +#[test] +fn test_prime_attribute_name_compute_closure_transitive() { + let problem = PrimeAttributeName::new( + 4, + vec![(vec![0], vec![1]), (vec![1], vec![2]), (vec![2], vec![3])], + 0, + ); + let mut attrs = vec![false; 4]; + attrs[0] = true; + let closure = problem.compute_closure(&attrs); + assert_eq!(closure, vec![true, true, true, true]); +} + +#[test] +#[should_panic(expected = "Query attribute")] +fn test_prime_attribute_name_invalid_query() { + PrimeAttributeName::new(3, vec![(vec![0], vec![1, 2])], 5); +} + +#[test] +#[should_panic(expected = "empty LHS")] +fn test_prime_attribute_name_empty_lhs() { + PrimeAttributeName::new(3, vec![(vec![], vec![1, 2])], 0); +} + +#[test] +#[should_panic(expected = "outside attribute set")] +fn test_prime_attribute_name_dep_out_of_range() { + PrimeAttributeName::new(3, vec![(vec![0], vec![5])], 0); +}