diff --git a/pyproject.toml b/pyproject.toml index 2eeacf87..2fb2a259 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ dependencies = [ "anthropic>=0.74.0", "jsonschema>=4.17.0", "requests>=2.31.0", + "pydantic>=2.0.0", ] [project.optional-dependencies] diff --git a/src/agentready/cli/assess_batch.py b/src/agentready/cli/assess_batch.py index 84caaf0f..1a7fd97d 100644 --- a/src/agentready/cli/assess_batch.py +++ b/src/agentready/cli/assess_batch.py @@ -12,7 +12,7 @@ from ..reporters.html import HTMLReporter from ..reporters.markdown import MarkdownReporter from ..services.batch_scanner import BatchScanner -from ..utils.security import validate_config_dict, validate_path +from pydantic import ValidationError def _get_agentready_version() -> str: @@ -29,67 +29,44 @@ def _get_agentready_version() -> str: def _load_config(config_path: Path) -> Config: - """Load configuration from YAML file with validation. + """Load configuration from YAML file with Pydantic validation. - Uses centralized security utilities from utils.security module. - """ - import yaml + Uses Pydantic for automatic validation, replacing duplicated manual + validation code with the Config.from_yaml_dict() classmethod. - with open(config_path, "r", encoding="utf-8") as f: - data = yaml.safe_load(f) - - # Define config schema for validation - schema = { - "weights": {str: (int, float)}, - "excluded_attributes": [str], - "language_overrides": {str: list}, - "output_dir": str, - "report_theme": str, - "custom_theme": dict, - } - - # Validate config structure using centralized utility - validated = validate_config_dict(data, schema) - - # Additional nested validations for complex types - if "language_overrides" in validated: - for lang, patterns in validated["language_overrides"].items(): - if not isinstance(patterns, list): - raise ValueError( - f"'language_overrides' values must be lists, got {type(patterns).__name__}" - ) - for pattern in patterns: - if not isinstance(pattern, str): - raise ValueError( - f"'language_overrides' patterns must be strings, got {type(pattern).__name__}" - ) - - if "custom_theme" in validated: - for key, value in validated["custom_theme"].items(): - if not isinstance(key, str): - raise ValueError( - f"'custom_theme' keys must be strings, got {type(key).__name__}" - ) - if not isinstance(value, str): - raise ValueError( - f"'custom_theme' values must be strings, got {type(value).__name__}" - ) + Args: + config_path: Path to YAML configuration file - # Validate and sanitize output_dir path - output_dir = None - if "output_dir" in validated: - output_dir = validate_path( - validated["output_dir"], allow_system_dirs=False, must_exist=False - ) + Returns: + Validated Config instance - return Config( - weights=validated.get("weights", {}), - excluded_attributes=validated.get("excluded_attributes", []), - language_overrides=validated.get("language_overrides", {}), - output_dir=output_dir, - report_theme=validated.get("report_theme", "default"), - custom_theme=validated.get("custom_theme"), - ) + Raises: + ValidationError: If YAML data doesn't match expected schema + FileNotFoundError: If config file doesn't exist + yaml.YAMLError: If YAML parsing fails + """ + import sys + + import yaml + + try: + with open(config_path, "r", encoding="utf-8") as f: + data = yaml.safe_load(f) + + # Pydantic handles all validation automatically + return Config.from_yaml_dict(data) + except ValidationError as e: + # Convert Pydantic validation errors to user-friendly messages + errors = [] + for error in e.errors(): + field = " → ".join(str(x) for x in error["loc"]) + msg = error["msg"] + errors.append(f" - {field}: {msg}") + + click.echo("Configuration validation failed:", err=True) + for error in errors: + click.echo(error, err=True) + sys.exit(1) def _generate_multi_reports(batch_assessment, output_path: Path, verbose: bool) -> None: diff --git a/src/agentready/cli/main.py b/src/agentready/cli/main.py index d3c06f60..6fc0e709 100644 --- a/src/agentready/cli/main.py +++ b/src/agentready/cli/main.py @@ -18,7 +18,7 @@ from ..reporters.markdown import MarkdownReporter from ..services.research_loader import ResearchLoader from ..services.scanner import Scanner -from ..utils.security import validate_config_dict, validate_path +from pydantic import ValidationError from ..utils.subprocess_utils import safe_subprocess_run from .align import align from .assess_batch import assess_batch @@ -242,73 +242,45 @@ def run_assessment(repository_path, verbose, output_dir, config_path): def load_config(config_path: Path) -> Config: - """Load configuration from YAML file with validation. + """Load configuration from YAML file with Pydantic validation. - Security: Validates YAML structure to prevent injection attacks - and malformed data from causing crashes or unexpected behavior. - Uses centralized security utilities from utils.security module. + Uses Pydantic for automatic validation, replacing 67 lines of manual + validation code with declarative field validators. + + Security: Uses yaml.safe_load() for safe YAML parsing and Pydantic + validators for type checking and path sanitization. + + Args: + config_path: Path to YAML configuration file + + Returns: + Validated Config instance + + Raises: + ValidationError: If YAML data doesn't match expected schema + FileNotFoundError: If config file doesn't exist + yaml.YAMLError: If YAML parsing fails """ import yaml - with open(config_path, "r", encoding="utf-8") as f: - data = yaml.safe_load(f) - - # Define config schema for validation - schema = { - "weights": {str: (int, float)}, # dict[str, int|float] - "excluded_attributes": [str], # list[str] - "language_overrides": { - str: list - }, # dict[str, list] (nested list validated separately) - "output_dir": str, - "report_theme": str, - "custom_theme": dict, # dict (nested types validated separately) - } - - # Validate config structure using centralized utility - validated = validate_config_dict(data, schema) - - # Additional nested validations for complex types - if "language_overrides" in validated: - lang_overrides = validated["language_overrides"] - for lang, patterns in lang_overrides.items(): - if not isinstance(patterns, list): - raise ValueError( - f"'language_overrides' values must be lists, got {type(patterns).__name__}" - ) - for pattern in patterns: - if not isinstance(pattern, str): - raise ValueError( - f"'language_overrides' patterns must be strings, got {type(pattern).__name__}" - ) - - if "custom_theme" in validated: - custom_theme = validated["custom_theme"] - for key, value in custom_theme.items(): - if not isinstance(key, str): - raise ValueError( - f"'custom_theme' keys must be strings, got {type(key).__name__}" - ) - if not isinstance(value, str): - raise ValueError( - f"'custom_theme' values must be strings, got {type(value).__name__}" - ) - - # Validate and sanitize output_dir path - output_dir = None - if "output_dir" in validated: - output_dir = validate_path( - validated["output_dir"], allow_system_dirs=False, must_exist=False - ) - - return Config( - weights=validated.get("weights", {}), - excluded_attributes=validated.get("excluded_attributes", []), - language_overrides=validated.get("language_overrides", {}), - output_dir=output_dir, - report_theme=validated.get("report_theme", "default"), - custom_theme=validated.get("custom_theme"), - ) + try: + with open(config_path, "r", encoding="utf-8") as f: + data = yaml.safe_load(f) + + # Pydantic handles all validation automatically + return Config.from_yaml_dict(data) + except ValidationError as e: + # Convert Pydantic validation errors to user-friendly messages + errors = [] + for error in e.errors(): + field = " → ".join(str(x) for x in error["loc"]) + msg = error["msg"] + errors.append(f" - {field}: {msg}") + + click.echo("Configuration validation failed:", err=True) + for error in errors: + click.echo(error, err=True) + sys.exit(1) @cli.command() diff --git a/src/agentready/models/config.py b/src/agentready/models/config.py index 24eda7e5..a719f440 100644 --- a/src/agentready/models/config.py +++ b/src/agentready/models/config.py @@ -1,39 +1,79 @@ """Config model for user customization of assessment behavior.""" -from dataclasses import dataclass from pathlib import Path +from typing import Annotated +from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator -@dataclass -class Config: +from ..utils.security import validate_path + + +class Config(BaseModel): """User configuration for customizing assessment behavior. + Uses Pydantic for automatic validation, type checking, and JSON schema generation. + Replaces 85 lines of manual validation code with declarative field validators. + Attributes: - weights: Custom attribute weights (attribute_id → weight) - excluded_attributes: Attributes to skip - language_overrides: Force language detection (lang → [patterns]) + weights: Custom attribute weights (attribute_id → weight 0.0-1.0, must sum to 1.0) + excluded_attributes: Attributes to skip during assessment + language_overrides: Force language detection (lang → glob patterns) output_dir: Custom output directory (None uses default .agentready/) report_theme: Theme name for HTML reports (default, dark, light, etc.) custom_theme: Custom theme colors (overrides report_theme if provided) """ - weights: dict[str, float] - excluded_attributes: list[str] - language_overrides: dict[str, list[str]] - output_dir: Path | None - report_theme: str = "default" - custom_theme: dict[str, str] | None = None - - def __post_init__(self): - """Validate config data after initialization.""" - # Validate weights are positive - for attr_id, weight in self.weights.items(): + weights: Annotated[ + dict[str, float], + Field( + default_factory=dict, + description="Custom attribute weights (must sum to 1.0 if non-empty)", + ), + ] + excluded_attributes: Annotated[ + list[str], Field(default_factory=list, description="Attributes to skip") + ] + language_overrides: Annotated[ + dict[str, list[str]], + Field( + default_factory=dict, + description="Force language detection (lang → glob patterns)", + ), + ] + output_dir: Annotated[ + Path | None, + Field( + default=None, + description="Custom output directory (None uses .agentready/)", + ), + ] + report_theme: Annotated[ + str, Field(default="default", description="Theme name for HTML reports") + ] + custom_theme: Annotated[ + dict[str, str] | None, + Field( + default=None, + description="Custom theme colors (str → str color mappings)", + ), + ] + + model_config = ConfigDict(arbitrary_types_allowed=True) # Allow Path objects + + @field_validator("weights") + @classmethod + def validate_weights(cls, v: dict[str, float]) -> dict[str, float]: + """Validate weight values are positive and between 0 and 1.""" + for attr_id, weight in v.items(): if weight <= 0: raise ValueError(f"Weight must be positive for {attr_id}: {weight}") if weight > 1.0: raise ValueError(f"Weight must be <= 1.0 for {attr_id}: {weight}") + return v - # Validate weights sum (with tolerance for floating point) + @model_validator(mode="after") + def validate_weights_sum(self) -> "Config": + """Validate weights sum to 1.0 (if any weights provided).""" if self.weights: total = sum(self.weights.values()) tolerance = 0.001 @@ -42,17 +82,59 @@ def __post_init__(self): f"Weights must sum to 1.0 (got {total:.4f}, " f"difference: {total - 1.0:+.4f})" ) + return self + + @field_validator("language_overrides") + @classmethod + def validate_language_overrides( + cls, v: dict[str, list[str]] + ) -> dict[str, list[str]]: + """Validate language override patterns are strings.""" + for lang, patterns in v.items(): + if not all(isinstance(p, str) for p in patterns): + raise ValueError( + f"All language_overrides patterns for '{lang}' must be strings" + ) + return v + + @field_validator("custom_theme") + @classmethod + def validate_custom_theme(cls, v: dict[str, str] | None) -> dict[str, str] | None: + """Validate custom theme values are strings.""" + if v is not None: + if not all( + isinstance(k, str) and isinstance(val, str) for k, val in v.items() + ): + raise ValueError("All custom_theme keys and values must be strings") + return v + + @field_validator("output_dir", mode="before") + @classmethod + def validate_output_dir_path(cls, v: str | Path | None) -> Path | None: + """Validate and sanitize output directory path.""" + if v is None: + return None + if isinstance(v, str): + # Security: Use centralized path validation + return validate_path(v, allow_system_dirs=False, must_exist=False) + return v + + def model_dump(self, **kwargs) -> dict: + """Convert to dictionary for JSON serialization. + + Overrides Pydantic's model_dump to handle Path serialization. + """ + data = super().model_dump(**kwargs) + if self.output_dir: + data["output_dir"] = str(self.output_dir) + return data def to_dict(self) -> dict: - """Convert to dictionary for JSON serialization.""" - return { - "weights": self.weights, - "excluded_attributes": self.excluded_attributes, - "language_overrides": self.language_overrides, - "output_dir": str(self.output_dir) if self.output_dir else None, - "report_theme": self.report_theme, - "custom_theme": self.custom_theme, - } + """Convert to dictionary for JSON serialization. + + Provides backwards-compatible method name matching old dataclass API. + """ + return self.model_dump() def get_weight(self, attribute_id: str, default: float) -> float: """Get weight for attribute, falling back to default if not specified.""" @@ -69,11 +151,27 @@ def load_default(cls) -> "Config": Returns: Config with empty weights, no exclusions, no overrides """ - return cls( - weights={}, - excluded_attributes=[], - language_overrides={}, - output_dir=None, - report_theme="default", - custom_theme=None, - ) + return cls() + + @classmethod + def from_yaml_dict(cls, data: dict) -> "Config": + """Load config from YAML dictionary with Pydantic validation. + + This method replaces the 67-line load_config() function in cli/main.py + with automatic Pydantic validation and type checking. + + Args: + data: Dictionary from YAML file (via yaml.safe_load) + + Returns: + Validated Config instance + + Raises: + pydantic.ValidationError: If data doesn't match schema + """ + # Pydantic automatically handles: + # - Type validation (dict[str, float] for weights, etc.) + # - Nested structure validation (via field_validators) + # - Required vs optional fields + # - Default values + return cls(**data)