From 7e03cfc4484976042d719e373fca737505f00b37 Mon Sep 17 00:00:00 2001
From: Saurabh Sinha <sinha108@gmail.com>
Date: Fri, 15 May 2026 12:40:38 -0400
Subject: [PATCH 1/4] Implementation of taint analysis with CodeQL, along with
 tests and fixtures.

Signed-off-by: Saurabh Sinha <sinha108@gmail.com>
---
 codeanalyzer/__main__.py                      |  22 +
 codeanalyzer/config/taint_config_defaults.py  | 189 ++++
 codeanalyzer/config/taint_config_loader.py    | 317 +++++++
 codeanalyzer/core.py                          |  93 +-
 codeanalyzer/options/options.py               |   2 +
 codeanalyzer/schema/py_schema.py              | 377 +++++++-
 .../codeql/codeql_analysis.py                 | 178 +++-
 .../codeql/codeql_query_runner.py             |   1 +
 .../codeql/taint_query_generator.py           | 428 +++++++++
 test/conftest.py                              | 155 ++++
 .../command_injection_app/vulnerable.py       | 172 ++++
 .../taint_analysis/flask_app/vulnerable.py    | 231 +++++
 .../path_traversal_app/vulnerable.py          | 189 ++++
 .../taint_analysis/sanitizer_app/mixed.py     | 114 +++
 .../taint_analysis/sanitizer_app/safe.py      | 201 +++++
 .../sql_injection_app/vulnerable.py           | 159 ++++
 .../taint_analysis/xss_app/vulnerable.py      | 217 +++++
 test/test_cli.py                              | 128 ++-
 test/test_taint_analysis.py                   | 841 ++++++++++++++++++
 19 files changed, 4004 insertions(+), 10 deletions(-)
 create mode 100644 codeanalyzer/config/taint_config_defaults.py
 create mode 100644 codeanalyzer/config/taint_config_loader.py
 create mode 100644 codeanalyzer/semantic_analysis/codeql/taint_query_generator.py
 create mode 100644 test/fixtures/taint_analysis/command_injection_app/vulnerable.py
 create mode 100644 test/fixtures/taint_analysis/flask_app/vulnerable.py
 create mode 100644 test/fixtures/taint_analysis/path_traversal_app/vulnerable.py
 create mode 100644 test/fixtures/taint_analysis/sanitizer_app/mixed.py
 create mode 100644 test/fixtures/taint_analysis/sanitizer_app/safe.py
 create mode 100644 test/fixtures/taint_analysis/sql_injection_app/vulnerable.py
 create mode 100644 test/fixtures/taint_analysis/xss_app/vulnerable.py
 create mode 100644 test/test_taint_analysis.py

diff --git a/codeanalyzer/__main__.py b/codeanalyzer/__main__.py
index 19e7f2a..02b25ae 100644
--- a/codeanalyzer/__main__.py
+++ b/codeanalyzer/__main__.py
@@ -27,9 +27,20 @@ def main(
             case_sensitive=False,
         ),
     ] = OutputFormat.JSON,
+    analysis_level: Annotated[
+        int,
+        typer.Option("-a", "--analysis-level", help="1: symbol table, 2: call graph (requires --codeql), 3: taint analysis (requires --codeql)."),
+    ] = 1,
     using_codeql: Annotated[
         bool, typer.Option("--codeql/--no-codeql", help="Enable CodeQL-based analysis.")
     ] = False,
+    taint_config: Annotated[
+        Optional[Path],
+        typer.Option(
+            "--taint-config",
+            help="Path to taint analysis configuration file (YAML or JSON). Used with --analysis-level 3.",
+        ),
+    ] = None,
     using_ray: Annotated[
         bool,
         typer.Option("--ray/--no-ray", help="Enable Ray for distributed analysis."),
@@ -74,10 +85,20 @@ def main(
         int, typer.Option("-v", count=True, help="Increase verbosity: -v, -vv, -vvv")
     ] = 0,
 ):
+    # Validate analysis level requirements
+    if analysis_level >= 2 and not using_codeql:
+        logger.error("Analysis levels 2 and 3 require --codeql flag")
+        raise typer.Exit(code=1)
+    
+    if analysis_level >= 3 and taint_config and not taint_config.exists():
+        logger.error(f"Taint configuration file '{taint_config}' does not exist.")
+        raise typer.Exit(code=1)
+    
     options = AnalysisOptions(
         input=input,
         output=output,
         format=format,
+        analysis_level=analysis_level,
         using_codeql=using_codeql,
         using_ray=using_ray,
         rebuild_analysis=rebuild_analysis,
@@ -86,6 +107,7 @@ def main(
         cache_dir=cache_dir,
         clear_cache=clear_cache,
         verbosity=verbosity,
+        taint_config=taint_config,
     )
 
     _set_log_level(options.verbosity)
diff --git a/codeanalyzer/config/taint_config_defaults.py b/codeanalyzer/config/taint_config_defaults.py
new file mode 100644
index 0000000..c8cf599
--- /dev/null
+++ b/codeanalyzer/config/taint_config_defaults.py
@@ -0,0 +1,189 @@
+################################################################################
+# Copyright IBM Corporation 2025
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+"""Default taint analysis configuration.
+
+Design
+------
+The generated CodeQL query uses CodeQL's built-in security models as the
+primary detection layer (``RemoteFlowSource``, ``SqlInjection::Sink``,
+``CommandInjection::Sink``, ``CodeInjection::Sink``, ``PathTraversal::Sink``,
+``XSS::Sink``).  These cover hundreds of APIs automatically.
+
+The patterns defined here are **supplementary** — they extend built-in
+coverage with sources/sinks that are not modelled by CodeQL out of the box:
+
+Sources not in RemoteFlowSource:
+  - ``sys.argv``          — command-line arguments
+  - ``input()``           — interactive user input
+  - ``os.getenv()``       — environment variables
+  - ``os.environ.get()``  — environment variables
+  - ``requests.*``        — outbound HTTP responses used as data sources
+
+Sinks not in built-in models (project-specific or less common):
+  - ``ldap.search()``     — LDAP injection
+
+Sanitizers:
+  - Common HTML/path/command sanitizers that CodeQL may not model as barriers.
+
+Users can extend or override this configuration via a YAML/JSON file passed
+with ``--taint-config``.  All CodeQL patterns must use double-quoted strings.
+"""
+
+from codeanalyzer.schema.py_schema import (
+    TaintAnalysisConfig,
+    TaintSourceConfig,
+    TaintSinkConfig,
+    TaintSanitizerConfig,
+)
+
+
+def get_default_taint_config() -> TaintAnalysisConfig:
+    """Returns the default taint analysis configuration.
+
+    Combines CodeQL's built-in security models (primary) with supplementary
+    user-configured patterns for sources/sinks not covered by the built-ins.
+
+    Returns:
+        TaintAnalysisConfig: Default configuration
+    """
+
+    return TaintAnalysisConfig(
+        sources=[
+            # --- Sources not covered by CodeQL's RemoteFlowSource ---
+
+            # Command-line arguments
+            TaintSourceConfig(
+                name="command_line_args",
+                description="Command-line arguments via sys.argv",
+                pattern='API::moduleImport("sys").getMember("argv")',
+                source_type="command_line_argument",
+            ),
+
+            # Interactive user input
+            TaintSourceConfig(
+                name="user_input",
+                description="Direct user input via input() function",
+                pattern='API::builtin("input").getACall()',
+                source_type="user_input",
+            ),
+
+            # Environment variables
+            TaintSourceConfig(
+                name="env_getenv",
+                description="Environment variables via os.getenv",
+                pattern='API::moduleImport("os").getMember("getenv").getACall()',
+                source_type="environment_variable",
+            ),
+            TaintSourceConfig(
+                name="env_environ_get",
+                description="Environment variables via os.environ.get",
+                pattern='API::moduleImport("os").getMember("environ").getMember("get").getACall()',
+                source_type="environment_variable",
+            ),
+
+            # Outbound HTTP responses used as data sources (requests library)
+            TaintSourceConfig(
+                name="requests_get_response",
+                description="HTTP GET response body (requests.get().text / .json())",
+                pattern='API::moduleImport("requests").getMember("get").getReturn().getMember("text")',
+                source_type="http_response",
+            ),
+            TaintSourceConfig(
+                name="requests_post_response",
+                description="HTTP POST response body (requests.post().text / .json())",
+                pattern='API::moduleImport("requests").getMember("post").getReturn().getMember("text")',
+                source_type="http_response",
+            ),
+        ],
+
+        sinks=[
+            # --- Sinks not covered by CodeQL's built-in sink classes ---
+
+            # LDAP Injection (not in CodeQL's standard Python models)
+            TaintSinkConfig(
+                name="ldap_search",
+                description="LDAP search operations",
+                pattern='API::moduleImport("ldap").getMember("search").getACall()',
+                sink_type="ldap_query",
+                vulnerability_type="LDAP Injection",
+                severity="high",
+                argument_index=0,
+            ),
+            TaintSinkConfig(
+                name="ldap3_connection_search",
+                description="ldap3 Connection.search",
+                pattern='API::moduleImport("ldap3").getMember("Connection").getReturn().getMember("search").getACall()',
+                sink_type="ldap_query",
+                vulnerability_type="LDAP Injection",
+                severity="high",
+                argument_index=1,
+            ),
+        ],
+
+        sanitizers=[
+            # HTML / XSS sanitizers
+            TaintSanitizerConfig(
+                name="html_escape",
+                description="HTML escape function (html.escape)",
+                pattern='API::moduleImport("html").getMember("escape").getACall()',
+                sanitizes=["xss", "template_injection"],
+            ),
+            TaintSanitizerConfig(
+                name="markupsafe_escape",
+                description="MarkupSafe Markup() / escape()",
+                pattern='API::moduleImport("markupsafe").getMember("escape").getACall()',
+                sanitizes=["xss"],
+            ),
+
+            # Command injection sanitizers
+            TaintSanitizerConfig(
+                name="shlex_quote",
+                description="Shell argument quoting via shlex.quote",
+                pattern='API::moduleImport("shlex").getMember("quote").getACall()',
+                sanitizes=["command_injection"],
+            ),
+
+            # Path traversal sanitizers
+            TaintSanitizerConfig(
+                name="os_path_normpath",
+                description="Path normalization via os.path.normpath",
+                pattern='API::moduleImport("os").getMember("path").getMember("normpath").getACall()',
+                sanitizes=["path_traversal"],
+            ),
+            TaintSanitizerConfig(
+                name="os_path_abspath",
+                description="Absolute path resolution via os.path.abspath",
+                pattern='API::moduleImport("os").getMember("path").getMember("abspath").getACall()',
+                sanitizes=["path_traversal"],
+            ),
+            TaintSanitizerConfig(
+                name="pathlib_resolve",
+                description="Path resolution via pathlib.Path.resolve()",
+                pattern='API::moduleImport("pathlib").getMember("Path").getReturn().getMember("resolve").getACall()',
+                sanitizes=["path_traversal"],
+            ),
+        ],
+
+        # Analysis options
+        max_path_length=10,
+        include_implicit_flows=False,
+        confidence_threshold="medium",
+        exclude_files=[],
+        exclude_functions=[],
+        include_safe_flows=False,
+        group_by_vulnerability=True,
+    )
diff --git a/codeanalyzer/config/taint_config_loader.py b/codeanalyzer/config/taint_config_loader.py
new file mode 100644
index 0000000..120f1a1
--- /dev/null
+++ b/codeanalyzer/config/taint_config_loader.py
@@ -0,0 +1,317 @@
+################################################################################
+# Copyright IBM Corporation 2025
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+"""Taint analysis configuration loader.
+
+This module provides functionality to load taint analysis configurations from
+YAML or JSON files and merge them with default configurations.
+"""
+
+import json
+from pathlib import Path
+from typing import Optional, Union
+
+import yaml
+
+from codeanalyzer.schema.py_schema import TaintAnalysisConfig
+from codeanalyzer.config.taint_config_defaults import get_default_taint_config
+from codeanalyzer.utils import logger
+
+
+class TaintConfigLoader:
+    """Loads and merges taint analysis configurations."""
+    
+    @staticmethod
+    def load_config(
+        config_path: Optional[Union[str, Path]] = None,
+        use_defaults: bool = True
+    ) -> TaintAnalysisConfig:
+        """Load taint analysis configuration.
+        
+        Args:
+            config_path: Path to custom configuration file (YAML or JSON).
+                        If None, only defaults are used.
+            use_defaults: Whether to include default sources/sinks/sanitizers.
+                         If True, custom config extends defaults.
+                         If False, only custom config is used.
+            
+        Returns:
+            TaintAnalysisConfig: Merged configuration
+            
+        Raises:
+            FileNotFoundError: If config_path is provided but file doesn't exist
+            ValueError: If file format is unsupported or invalid
+        """
+        # Start with defaults if requested
+        if use_defaults:
+            config = get_default_taint_config()
+            logger.debug(f"Loaded default taint configuration with {len(config.sources)} sources, "
+                        f"{len(config.sinks)} sinks, {len(config.sanitizers)} sanitizers")
+        else:
+            config = TaintAnalysisConfig()
+            logger.debug("Starting with empty taint configuration")
+        
+        # Load and merge custom configuration
+        if config_path:
+            custom_config = TaintConfigLoader._load_from_file(config_path)
+            config = TaintConfigLoader._merge_configs(config, custom_config)
+            logger.info(f"Merged custom configuration from {config_path}")
+        
+        # Filter out disabled items
+        config = TaintConfigLoader._filter_disabled(config)
+        
+        logger.info(f"Final taint configuration: {len(config.sources)} sources, "
+                   f"{len(config.sinks)} sinks, {len(config.sanitizers)} sanitizers")
+        
+        return config
+    
+    @staticmethod
+    def _load_from_file(config_path: Union[str, Path]) -> TaintAnalysisConfig:
+        """Load configuration from YAML or JSON file.
+        
+        Args:
+            config_path: Path to configuration file
+            
+        Returns:
+            TaintAnalysisConfig: Loaded configuration
+            
+        Raises:
+            FileNotFoundError: If file doesn't exist
+            ValueError: If file format is unsupported or invalid
+        """
+        path = Path(config_path)
+        
+        if not path.exists():
+            raise FileNotFoundError(f"Configuration file not found: {config_path}")
+        
+        logger.debug(f"Loading taint configuration from {path}")
+        content = path.read_text()
+        
+        # Parse based on file extension
+        try:
+            if path.suffix in ['.yaml', '.yml']:
+                data = yaml.safe_load(content)
+            elif path.suffix == '.json':
+                data = json.loads(content)
+            else:
+                raise ValueError(
+                    f"Unsupported configuration format: {path.suffix}. "
+                    f"Supported formats: .yaml, .yml, .json"
+                )
+        except yaml.YAMLError as e:
+            raise ValueError(f"Invalid YAML in configuration file: {e}")
+        except json.JSONDecodeError as e:
+            raise ValueError(f"Invalid JSON in configuration file: {e}")
+        
+        # Convert to Pydantic model
+        try:
+            return TaintAnalysisConfig.model_validate(data)
+        except Exception as e:
+            raise ValueError(f"Invalid taint configuration structure: {e}")
+    
+    @staticmethod
+    def _merge_configs(
+        base: TaintAnalysisConfig,
+        custom: TaintAnalysisConfig
+    ) -> TaintAnalysisConfig:
+        """Merge custom configuration into base configuration.
+        
+        Custom sources/sinks/sanitizers are added to the base.
+        If a custom item has the same name as a base item, it overrides it.
+        
+        Args:
+            base: Base configuration (typically defaults)
+            custom: Custom configuration to merge in
+            
+        Returns:
+            TaintAnalysisConfig: Merged configuration
+        """
+        # Create name-based lookups for base config
+        base_sources = {s.name: s for s in base.sources}
+        base_sinks = {s.name: s for s in base.sinks}
+        base_sanitizers = {s.name: s for s in base.sanitizers}
+        
+        # Track what was overridden
+        overridden_sources = []
+        overridden_sinks = []
+        overridden_sanitizers = []
+        
+        # Merge sources
+        for source in custom.sources:
+            if source.name in base_sources:
+                overridden_sources.append(source.name)
+            base_sources[source.name] = source
+        
+        # Merge sinks
+        for sink in custom.sinks:
+            if sink.name in base_sinks:
+                overridden_sinks.append(sink.name)
+            base_sinks[sink.name] = sink
+        
+        # Merge sanitizers
+        for sanitizer in custom.sanitizers:
+            if sanitizer.name in base_sanitizers:
+                overridden_sanitizers.append(sanitizer.name)
+            base_sanitizers[sanitizer.name] = sanitizer
+        
+        # Log merge information
+        if overridden_sources:
+            logger.debug(f"Overridden sources: {', '.join(overridden_sources)}")
+        if overridden_sinks:
+            logger.debug(f"Overridden sinks: {', '.join(overridden_sinks)}")
+        if overridden_sanitizers:
+            logger.debug(f"Overridden sanitizers: {', '.join(overridden_sanitizers)}")
+        
+        # Merge exclude lists (combine both)
+        merged_exclude_files = list(set(base.exclude_files + custom.exclude_files))
+        merged_exclude_functions = list(set(base.exclude_functions + custom.exclude_functions))
+        
+        # Create merged config
+        # Use custom values for options if they differ from defaults
+        return TaintAnalysisConfig(
+            sources=list(base_sources.values()),
+            sinks=list(base_sinks.values()),
+            sanitizers=list(base_sanitizers.values()),
+            max_path_length=custom.max_path_length if custom.max_path_length != 10 else base.max_path_length,
+            include_implicit_flows=custom.include_implicit_flows or base.include_implicit_flows,
+            confidence_threshold=custom.confidence_threshold if custom.confidence_threshold != "medium" else base.confidence_threshold,
+            exclude_files=merged_exclude_files,
+            exclude_functions=merged_exclude_functions,
+            include_safe_flows=custom.include_safe_flows or base.include_safe_flows,
+            group_by_vulnerability=custom.group_by_vulnerability if not custom.group_by_vulnerability else base.group_by_vulnerability,
+        )
+    
+    @staticmethod
+    def _filter_disabled(config: TaintAnalysisConfig) -> TaintAnalysisConfig:
+        """Filter out disabled sources, sinks, and sanitizers.
+        
+        Args:
+            config: Configuration to filter
+            
+        Returns:
+            TaintAnalysisConfig: Filtered configuration with only enabled items
+        """
+        enabled_sources = [s for s in config.sources if s.enabled]
+        enabled_sinks = [s for s in config.sinks if s.enabled]
+        enabled_sanitizers = [s for s in config.sanitizers if s.enabled]
+        
+        disabled_count = (
+            len(config.sources) - len(enabled_sources) +
+            len(config.sinks) - len(enabled_sinks) +
+            len(config.sanitizers) - len(enabled_sanitizers)
+        )
+        
+        if disabled_count > 0:
+            logger.debug(f"Filtered out {disabled_count} disabled items")
+        
+        return TaintAnalysisConfig(
+            sources=enabled_sources,
+            sinks=enabled_sinks,
+            sanitizers=enabled_sanitizers,
+            max_path_length=config.max_path_length,
+            include_implicit_flows=config.include_implicit_flows,
+            confidence_threshold=config.confidence_threshold,
+            exclude_files=config.exclude_files,
+            exclude_functions=config.exclude_functions,
+            include_safe_flows=config.include_safe_flows,
+            group_by_vulnerability=config.group_by_vulnerability,
+        )
+    
+    @staticmethod
+    def save_config(
+        config: TaintAnalysisConfig,
+        output_path: Union[str, Path],
+        format: str = "yaml"
+    ) -> None:
+        """Save configuration to file.
+        
+        Args:
+            config: Configuration to save
+            output_path: Path where to save the configuration
+            format: Output format ('yaml' or 'json')
+            
+        Raises:
+            ValueError: If format is unsupported
+        """
+        path = Path(output_path)
+        
+        # Ensure parent directory exists
+        path.parent.mkdir(parents=True, exist_ok=True)
+        
+        if format.lower() in ['yaml', 'yml']:
+            content = yaml.dump(
+                config.model_dump(),
+                default_flow_style=False,
+                sort_keys=False,
+                indent=2
+            )
+        elif format.lower() == 'json':
+            content = config.model_dump_json(indent=2)
+        else:
+            raise ValueError(f"Unsupported format: {format}. Use 'yaml' or 'json'")
+        
+        path.write_text(content)
+        logger.info(f"Saved taint configuration to {path}")
+    
+    @staticmethod
+    def validate_config(config: TaintAnalysisConfig) -> list[str]:
+        """Validate configuration and return list of warnings/errors.
+        
+        Args:
+            config: Configuration to validate
+            
+        Returns:
+            list[str]: List of validation issues (empty if valid)
+        """
+        issues = []
+        
+        # Check for duplicate names
+        source_names = [s.name for s in config.sources]
+        if len(source_names) != len(set(source_names)):
+            duplicates = [name for name in source_names if source_names.count(name) > 1]
+            issues.append(f"Duplicate source names found: {', '.join(set(duplicates))}")
+        
+        sink_names = [s.name for s in config.sinks]
+        if len(sink_names) != len(set(sink_names)):
+            duplicates = [name for name in sink_names if sink_names.count(name) > 1]
+            issues.append(f"Duplicate sink names found: {', '.join(set(duplicates))}")
+        
+        sanitizer_names = [s.name for s in config.sanitizers]
+        if len(sanitizer_names) != len(set(sanitizer_names)):
+            duplicates = [name for name in sanitizer_names if sanitizer_names.count(name) > 1]
+            issues.append(f"Duplicate sanitizer names found: {', '.join(set(duplicates))}")
+        
+        # Validate patterns are not empty
+        for source in config.sources:
+            if not source.pattern.strip():
+                issues.append(f"Empty pattern for source: {source.name}")
+        
+        for sink in config.sinks:
+            if not sink.pattern.strip():
+                issues.append(f"Empty pattern for sink: {sink.name}")
+        
+        for sanitizer in config.sanitizers:
+            if not sanitizer.pattern.strip():
+                issues.append(f"Empty pattern for sanitizer: {sanitizer.name}")
+        
+        # Check if there are any sources and sinks
+        if not config.sources:
+            issues.append("No taint sources configured")
+        
+        if not config.sinks:
+            issues.append("No taint sinks configured")
+        
+        return issues
diff --git a/codeanalyzer/core.py b/codeanalyzer/core.py
index b8cfcca..51ccecd 100644
--- a/codeanalyzer/core.py
+++ b/codeanalyzer/core.py
@@ -70,6 +70,7 @@ def __init__(self, options: AnalysisOptions) -> None:
         self.virtualenv: Optional[Path] = None
         self.using_ray: bool = options.using_ray
         self.file_name: Optional[Path] = options.file_name
+        self.analysis_depth: int = options.analysis_level
 
     @staticmethod
     def _cmd_exec_helper(
@@ -361,11 +362,21 @@ def __exit__(self, *args, **kwargs) -> None:
     def analyze(self) -> PyApplication:
         """Analyze the project and return a PyApplication with symbol table.
         
+        Analysis levels:
+        - Level 1: Symbol table only
+        - Level 2: Symbol table + call graph (requires CodeQL)
+        - Level 3: Symbol table + call graph + taint analysis (requires CodeQL)
+        
         Uses caching to avoid re-analyzing unchanged files.
         """
+        # Validate analysis level requirements
+        if self.analysis_depth >= 2 and not self.using_codeql:
+            logger.error("Analysis levels 2 and 3 require --codeql flag")
+            raise ValueError("CodeQL is required for analysis levels 2 and above")
+        
         cache_file = self.cache_dir / "analysis_cache.json"
         
-        # Try to load existing cached analysis 
+        # Try to load existing cached analysis
         cached_pyapplication = None
         if not self.rebuild_analysis and cache_file.exists():
             try:
@@ -375,7 +386,7 @@ def analyze(self) -> PyApplication:
                 logger.warning(f"Failed to load cache: {e}. Rebuilding analysis.")
                 cached_pyapplication = None
 
-        # Build symbol table from cached application if available (if no available, the build a new one)
+        # Level 1: Build symbol table
         symbol_table = self._build_symbol_table(cached_pyapplication.symbol_table if cached_pyapplication else {})
 
         # Build the call graph in four steps:
@@ -399,10 +410,17 @@ def analyze(self) -> PyApplication:
 
         # Recreate pyapplication
         app = PyApplication.builder().symbol_table(symbol_table).call_graph(call_graph).build()
-        
+
+        # Level 3: Add taint analysis (if CodeQL is enabled)
+        if self.analysis_depth >= 3 and self.using_codeql:
+            logger.info("Performing taint analysis (Level 3)...")
+            taint_results = self._perform_taint_analysis(symbol_table=symbol_table)
+            app.taint_analysis = taint_results
+            logger.info(f"✅ Taint analysis complete. Found {len(taint_results.flows)} flows.")
+
         # Save to cache
         self._save_analysis_cache(app, cache_file)
-        
+
         return app
 
     def _load_pyapplication_from_cache(self, cache_file: Path) -> PyApplication:
@@ -717,4 +735,69 @@ def _get_call_graph(
             return edges
         except Exception as exc:
             logger.warning(f"CodeQL call-graph extraction failed: {exc}")
-            return []
\ No newline at end of file
+            return []
+
+    def _perform_taint_analysis(self, symbol_table: Optional[Dict[str, PyModule]] = None):
+        """Perform taint analysis using CodeQL.
+
+        Args:
+            symbol_table: Optional symbol table from analysis level 1.  When
+                provided, taint sources and sinks are resolved to the matching
+                ``PyCallsite`` objects already captured during syntactic analysis.
+
+        Returns:
+            PyTaintAnalysisResult: Complete taint analysis results
+
+        Raises:
+            ValueError: If CodeQL database is not available
+        """
+        from codeanalyzer.semantic_analysis.codeql.codeql_analysis import CodeQL
+        from codeanalyzer.config.taint_config_loader import TaintConfigLoader
+        from codeanalyzer.schema.py_schema import PyTaintAnalysisResult
+
+        if not self.db_path:
+            raise ValueError("CodeQL database not available for taint analysis")
+
+        # Load taint configuration
+        if self.options.taint_config:
+            logger.info(f"Loading taint configuration from {self.options.taint_config}")
+            taint_config = TaintConfigLoader.load_config(
+                self.options.taint_config,
+                use_defaults=True
+            )
+        else:
+            logger.info("Using default taint analysis configuration")
+            taint_config = TaintConfigLoader.load_config(use_defaults=True)
+
+        # Log configuration summary
+        logger.info(f"Taint analysis configuration:")
+        logger.info(f"  - Sources: {len(taint_config.sources)}")
+        logger.info(f"  - Sinks: {len(taint_config.sinks)}")
+        logger.info(f"  - Sanitizers: {len(taint_config.sanitizers)}")
+
+        # Perform analysis
+        codeql = CodeQL(
+            project_dir=self.project_dir,
+            db_path=self.db_path,
+            codeql_bin=self.codeql_bin,
+            codeql_packs_dir=self.codeql_packs_dir,
+            taint_config=taint_config,
+        )
+
+        results = codeql.analyze_taint_flows(symbol_table=symbol_table)
+
+        # Log summary
+        logger.info(f"Taint analysis summary:")
+        logger.info(f"  - Total flows detected: {len(results.flows)}")
+
+        n_critical = sum(1 for f in results.flows if f.severity == "critical")
+        n_high = sum(1 for f in results.flows if f.severity == "high")
+        n_medium = sum(1 for f in results.flows if f.severity == "medium")
+        n_low = sum(1 for f in results.flows if f.severity == "low")
+        if results.flows:
+            logger.info(f"  - Critical: {n_critical}")
+            logger.info(f"  - High: {n_high}")
+            logger.info(f"  - Medium: {n_medium}")
+            logger.info(f"  - Low: {n_low}")
+
+        return results
diff --git a/codeanalyzer/options/options.py b/codeanalyzer/options/options.py
index 1602d45..e4d32e8 100644
--- a/codeanalyzer/options/options.py
+++ b/codeanalyzer/options/options.py
@@ -14,6 +14,7 @@ class AnalysisOptions:
     input: Path
     output: Optional[Path] = None
     format: OutputFormat = OutputFormat.JSON
+    analysis_level: int = 1
     using_codeql: bool = False
     using_ray: bool = False
     rebuild_analysis: bool = False
@@ -22,3 +23,4 @@ class AnalysisOptions:
     cache_dir: Optional[Path] = None
     clear_cache: bool = False
     verbosity: int = 0
+    taint_config: Optional[Path] = None
diff --git a/codeanalyzer/schema/py_schema.py b/codeanalyzer/schema/py_schema.py
index 8bef391..6dd004b 100644
--- a/codeanalyzer/schema/py_schema.py
+++ b/codeanalyzer/schema/py_schema.py
@@ -339,6 +339,374 @@ class PyModule(BaseModel):
     file_size: Optional[int] = None
 
 
+# ============================================================================
+# Taint Analysis Models (Analysis Level 3)
+# ============================================================================
+
+@builder
+@msgpk
+class TaintSourceConfig(BaseModel):
+    """Configuration entry that tells the CodeQL query generator where
+    untrusted data can enter the application.
+
+    Each entry is turned into a predicate clause inside the generated
+    ``isConfiguredSource`` CodeQL predicate.
+    """
+
+    name: str
+    """Unique identifier for this source entry (used for logging and deduplication)."""
+
+    description: str
+    """Human-readable explanation of what this source represents."""
+
+    pattern: str
+    """CodeQL API-graph expression that matches the source call site.
+
+    Must be a valid CodeQL expression that evaluates to a ``DataFlow::Node``,
+    e.g. ``API::builtin("input").getACall()`` or
+    ``API::moduleImport("flask").getMember("request").getMember("args").asSource()``.
+    All string literals inside the pattern must use double quotes (CodeQL
+    does not support single-quoted strings).
+    """
+
+    source_type: str
+    """Logical category label attached to every flow that originates here.
+
+    Examples: ``"user_input"``, ``"web_request"``, ``"environment_variable"``,
+    ``"file_read"``, ``"http_request"``.  The label is propagated to
+    ``PyTaintSource.source_type`` in the analysis results.
+    """
+
+    enabled: bool = True
+    """When ``False`` this entry is filtered out before query generation."""
+
+
+@builder
+@msgpk
+class TaintSinkConfig(BaseModel):
+    """Configuration entry that tells the CodeQL query generator where
+    tainted data reaching this call site would be dangerous.
+
+    Each entry is turned into a predicate clause inside the generated
+    ``isConfiguredSink`` CodeQL predicate.
+    """
+
+    name: str
+    """Unique identifier for this sink entry (used for logging and deduplication)."""
+
+    description: str
+    """Human-readable explanation of what this sink represents."""
+
+    pattern: str
+    """CodeQL API-graph expression that matches the sink call site.
+
+    Must be a valid CodeQL expression that evaluates to a ``DataFlow::Node``,
+    e.g. ``API::moduleImport("sqlite3").getMember("execute").getACall()``.
+    All string literals inside the pattern must use double quotes.
+    """
+
+    sink_type: str
+    """Logical category label attached to every flow that terminates here.
+
+    Examples: ``"sql_execution"``, ``"command_execution"``, ``"code_execution"``,
+    ``"file_access"``, ``"template_rendering"``.  The label is propagated to
+    ``PyTaintSink.sink_type`` in the analysis results.
+    """
+
+    vulnerability_type: str
+    """Human-readable vulnerability class reported in the analysis results.
+
+    Examples: ``"SQL Injection"``, ``"Command Injection"``, ``"Path Traversal"``,
+    ``"Cross-Site Scripting (XSS)"``, ``"Code Injection"``.
+    """
+
+    severity: Literal["critical", "high", "medium", "low"]
+    """Risk level of a confirmed taint flow reaching this sink.
+
+    Propagated verbatim to ``PyTaintSink.severity`` and ``PyTaintFlow.severity``.
+    """
+
+    enabled: bool = True
+    """When ``False`` this entry is filtered out before query generation."""
+
+    argument_index: Optional[int] = None
+    """Zero-based index of the argument that must be tainted for the sink to fire.
+
+    When set, the generated predicate uses
+    ``pattern.getParameter(argument_index).asSink()`` so that only the
+    specific argument position is tracked (e.g. index ``0`` for the query
+    string in ``cursor.execute(query, params)``).  When ``None`` the call
+    itself is used as the sink node.
+    """
+
+
+@builder
+@msgpk
+class TaintSanitizerConfig(BaseModel):
+    """Configuration entry that tells the CodeQL query generator which
+    call sites act as sanitizers, blocking taint propagation.
+
+    Each entry is turned into a predicate clause inside the generated
+    ``isConfiguredSanitizer`` CodeQL predicate.
+    """
+
+    name: str
+    """Unique identifier for this sanitizer entry."""
+
+    description: str
+    """Human-readable explanation of what this sanitizer does."""
+
+    pattern: str
+    """CodeQL API-graph expression that matches the sanitizing call site.
+
+    Must be a valid CodeQL expression that evaluates to a ``DataFlow::Node``,
+    e.g. ``API::moduleImport("html").getMember("escape").getACall()``.
+    All string literals inside the pattern must use double quotes.
+    """
+
+    sanitizes: List[str] = []
+    """Informational list of vulnerability types this sanitizer mitigates.
+
+    Not used by the CodeQL query generator (all enabled sanitizers block all
+    flows); present for documentation and future fine-grained filtering.
+    Examples: ``["xss", "template_injection"]``, ``["command_injection"]``.
+    """
+
+    enabled: bool = True
+    """When ``False`` this entry is filtered out before query generation."""
+
+
+@builder
+@msgpk
+class TaintAnalysisConfig(BaseModel):
+    """Complete, self-contained configuration for a taint analysis run.
+
+    Passed to ``TaintQueryGenerator.generate_query()`` which turns it into a
+    single executable CodeQL query.  All three lists are filtered to remove
+    disabled entries before query generation.
+    """
+
+    sources: List[TaintSourceConfig] = []
+    """Ordered list of taint source definitions.  At least one enabled source
+    is required for the analysis to produce results."""
+
+    sinks: List[TaintSinkConfig] = []
+    """Ordered list of taint sink definitions.  At least one enabled sink is
+    required for the analysis to produce results."""
+
+    sanitizers: List[TaintSanitizerConfig] = []
+    """Ordered list of sanitizer definitions.  May be empty; when non-empty
+    the generated query will not report flows that pass through a sanitizer."""
+
+    max_path_length: int = 10
+    """Maximum number of intermediate steps in a reported taint path.
+    Longer paths are still detected but truncated in the output."""
+
+    include_implicit_flows: bool = False
+    """Whether to track implicit (control-flow) taint in addition to explicit
+    (data-flow) taint.  Enabling this increases recall but also false positives."""
+
+    confidence_threshold: Literal["high", "medium", "low"] = "medium"
+    """Minimum confidence level for a flow to be included in the results.
+    Currently informational; all flows are reported regardless of this value."""
+
+    exclude_files: List[str] = []
+    """Glob patterns for source files to exclude from analysis (e.g. test files)."""
+
+    exclude_functions: List[str] = []
+    """Qualified function names to exclude as sources or sinks."""
+
+    include_safe_flows: bool = False
+    """When ``True``, also report flows that pass through a sanitizer.
+    Useful for auditing sanitizer coverage."""
+
+    group_by_vulnerability: bool = True
+    """When ``True``, results are grouped by vulnerability type in log output."""
+
+
+@builder
+@msgpk
+class PyTaintSource(BaseModel):
+    """Represents a taint source - where untrusted data enters the system.
+
+    Sources are always call sites (e.g. ``input()``, ``request.args.get()``,
+    ``os.getenv()``).  The ``call_site`` field captures the full call-site
+    metadata from the symbol table so that downstream tasks can access
+    receiver type, argument types, callee signature, and precise location
+    without duplicating that information here.
+    """
+
+    source_type: str
+    """Logical category of the source (e.g. ``"user_input"``, ``"web_request"``)."""
+
+    call_site: PyCallsite
+    """The call-site in the symbol table where tainted data originates."""
+
+    description: Optional[str] = None
+
+
+@builder
+@msgpk
+class PyTaintSink(BaseModel):
+    """Represents a taint sink - where tainted data could cause harm.
+
+    Sinks are always call sites (e.g. ``cursor.execute()``, ``os.system()``,
+    ``eval()``).  The ``call_site`` field captures the full call-site metadata
+    from the symbol table so that downstream tasks can access receiver type,
+    argument types, callee signature, and precise location without duplicating
+    that information here.
+    """
+
+    sink_type: str
+    """Logical category of the sink (e.g. ``"sql_execution"``, ``"command_execution"``)."""
+
+    call_site: PyCallsite
+    """The call-site in the symbol table where tainted data is consumed."""
+
+    severity: Literal["critical", "high", "medium", "low"] = "medium"
+    description: Optional[str] = None
+
+
+@builder
+@msgpk
+class PyTaintFlowStep(BaseModel):
+    """Represents a single intermediate step in a taint flow path.
+
+    A path is the ordered sequence of program points through which tainted
+    data travels from a source to a sink.  Each step records the location
+    and role of one such program point.
+
+    Note: the current CodeQL query does not populate intermediate path steps
+    (``path`` is always empty in ``PyTaintFlow``).  This model is reserved
+    for future path-step extraction.
+    """
+
+    location: str
+    """Absolute file path of the source file containing this step."""
+
+    function_name: str
+    """Simple name of the enclosing function or method (``"<module>"`` at
+    module level)."""
+
+    start_line: int = -1
+    """1-based line number where this step begins; ``-1`` if unknown."""
+
+    end_line: int = -1
+    """1-based line number where this step ends; ``-1`` if unknown."""
+
+    start_column: int = -1
+    """0-based column offset where this step begins; ``-1`` if unknown."""
+
+    end_column: int = -1
+    """0-based column offset where this step ends; ``-1`` if unknown."""
+
+    expression: Optional[str] = None
+    """Source-code expression at this step as a string, if available."""
+
+    step_type: Literal["source", "propagation", "sink"] = "propagation"
+    """Role of this step in the flow path.
+
+    * ``"source"`` — the first step; tainted data originates here.
+    * ``"propagation"`` — an intermediate step; tainted data passes through.
+    * ``"sink"`` — the last step; tainted data reaches a dangerous operation.
+    """
+
+    description: Optional[str] = None
+    """Optional human-readable description of what happens at this step."""
+
+
+@builder
+@msgpk
+class PyTaintFlow(BaseModel):
+    """Represents a complete, confirmed taint flow from a source to a sink.
+
+    A taint flow means that data originating at ``source`` (an untrusted
+    input call site) can reach ``sink`` (a dangerous operation call site)
+    without passing through a sanitizer, as determined by CodeQL's
+    inter-procedural dataflow analysis.
+    """
+
+    flow_id: str
+    """Stable identifier for this flow, derived from source and sink locations.
+
+    Format: ``"<source_file>:<source_line>-><sink_file>:<sink_line>"``.
+    Used for deduplication across incremental analysis runs.
+    """
+
+    source: PyTaintSource
+    """The call site where untrusted data enters the application.
+
+    Carries a ``PyCallsite`` that links back to the symbol table entry
+    (when the symbol table was available during analysis).
+    """
+
+    sink: PyTaintSink
+    """The call site where tainted data reaches a dangerous operation.
+
+    Carries a ``PyCallsite`` that links back to the symbol table entry
+    (when the symbol table was available during analysis).
+    """
+
+    path: List[PyTaintFlowStep] = []
+    """Ordered list of intermediate steps between source and sink.
+
+    Currently always empty — reserved for future path-step extraction.
+    """
+
+    vulnerability_type: str
+    """Human-readable vulnerability class, e.g. ``"SQL Injection"``,
+    ``"Command Injection"``, ``"Path Traversal"``.
+
+    Derived from the matching ``TaintSinkConfig.vulnerability_type``.
+    """
+
+    severity: Literal["critical", "high", "medium", "low"] = "medium"
+    """Risk level of this flow, inherited from ``TaintSinkConfig.severity``."""
+
+    confidence: Literal["high", "medium", "low"] = "medium"
+    """Confidence in the reported flow.  Currently always ``"medium"``
+    (CodeQL's dataflow analysis is sound but the sink patterns may
+    over-approximate)."""
+
+    description: Optional[str] = None
+    """Human-readable summary of the flow, e.g.
+    ``"Tainted data from user_input flows to SQL Injection"``."""
+
+
+@builder
+@msgpk
+class PyTaintAnalysisResult(BaseModel):
+    """Container for all taint analysis results for a project.
+
+    Source and sink information is embedded in each ``PyTaintFlow`` via
+    ``flow.source`` and ``flow.sink`` (both of which carry a ``PyCallsite``),
+    so there is no need for separate top-level source/sink lists.
+    """
+
+    project_path: str
+    """Absolute path to the root of the analysed project."""
+
+    flows: List[PyTaintFlow] = []
+    """All confirmed taint flows detected in the project.
+
+    Each flow represents a path from an untrusted source to a dangerous sink
+    that was not blocked by a sanitizer.  An empty list means no
+    vulnerabilities were detected with the current configuration.
+    """
+
+    analysis_timestamp: Optional[str] = None
+    """ISO-8601 UTC timestamp of when the analysis completed, e.g.
+    ``"2025-05-15T14:00:00+00:00"``."""
+
+    codeql_database_path: Optional[str] = None
+    """Absolute path to the CodeQL database used for this analysis run.
+    Useful for reproducing or extending the analysis."""
+
+
+# ============================================================================
+# Application Model (combines all analysis levels)
+# ============================================================================
+
 @builder
 @msgpk
 class PyCallEdge(BaseModel):
@@ -361,7 +729,14 @@ class PyCallEdge(BaseModel):
 @builder
 @msgpk
 class PyApplication(BaseModel):
-    """Represents a Python application."""
+    """Represents a Python application with multi-level analysis results.
+    
+    Analysis Levels:
+    - Level 1: symbol_table (syntactic analysis)
+    - Level 2: call_graph (control flow analysis) - TODO: implement storage
+    - Level 3: taint_analysis (data flow security analysis)
+    """
 
     symbol_table: Dict[str, PyModule]
     call_graph: List[PyCallEdge] = []
+    taint_analysis: Optional[PyTaintAnalysisResult] = None
diff --git a/codeanalyzer/semantic_analysis/codeql/codeql_analysis.py b/codeanalyzer/semantic_analysis/codeql/codeql_analysis.py
index 0c0e046..8d71e62 100644
--- a/codeanalyzer/semantic_analysis/codeql/codeql_analysis.py
+++ b/codeanalyzer/semantic_analysis/codeql/codeql_analysis.py
@@ -21,14 +21,23 @@
 """
 
 from collections import Counter
+from datetime import datetime, timezone
 from pathlib import Path
-from typing import Any, Dict, Iterator, List, Tuple, Union
+from typing import Any, Dict, Iterator, List, Optional, Tuple, Union
 
 from pandas import DataFrame
 
-from codeanalyzer.schema.py_schema import PyCallEdge, PyModule
+from codeanalyzer.schema.py_schema import PyCallEdge, PyCallsite, PyModule
 from codeanalyzer.semantic_analysis.call_graph import iter_callables_in_symbol_table
 from codeanalyzer.semantic_analysis.codeql.codeql_query_runner import CodeQLQueryRunner
+from codeanalyzer.semantic_analysis.codeql.taint_query_generator import TaintQueryGenerator
+from codeanalyzer.schema.py_schema import (
+    TaintAnalysisConfig,
+    PyTaintAnalysisResult,
+    PyTaintSource,
+    PyTaintSink,
+    PyTaintFlow,
+)
 from codeanalyzer.utils import logger
 
 
@@ -49,11 +58,13 @@ def __init__(
         db_path: Path,
         codeql_bin: Union[str, Path, None] = None,
         codeql_packs_dir: Union[str, Path, None] = None,
+        taint_config: Optional[TaintAnalysisConfig] = None,
     ) -> None:
-        self.project_dir = project_dir
+        self.project_dir = Path(project_dir)
         self.db_path = db_path
         self.codeql_bin = codeql_bin
         self.codeql_packs_dir = codeql_packs_dir
+        self.taint_config = taint_config
         self._cached_df: "DataFrame | None" = None
 
     def _query_call_edges(self) -> DataFrame:
@@ -181,6 +192,33 @@ def _build_callable_location_index(
             index[(abs_path, c.start_line)] = c
         return index
 
+    @staticmethod
+    def _build_callsite_location_index(
+        symbol_table: Dict[str, PyModule],
+    ) -> Dict[Tuple[str, int], PyCallsite]:
+        """Build ``(absolute_file_path, start_line) -> PyCallsite`` from the symbol table.
+
+        Iterates every ``PyCallsite`` in every ``PyCallable.call_sites`` list so
+        that taint sources and sinks can be resolved to the rich call-site objects
+        already captured during syntactic analysis (receiver type, argument types,
+        callee signature, …).
+
+        Paths are resolved to absolute form to match CodeQL's ``getAbsolutePath()``.
+        When two call sites share the same (file, start_line) the first one wins
+        (ambiguity is rare and an approximation is acceptable here).
+        """
+        index: Dict[Tuple[str, int], PyCallsite] = {}
+        for callable_ in iter_callables_in_symbol_table(symbol_table):
+            try:
+                abs_path = str(Path(callable_.path).resolve())
+            except (OSError, RuntimeError):
+                abs_path = callable_.path
+            for cs in callable_.call_sites:
+                key = (abs_path, cs.start_line)
+                if key not in index:
+                    index[key] = cs
+        return index
+
     def _iter_resolved_rows(
         self, symbol_table: Dict[str, PyModule]
     ) -> "Iterator[Tuple[str, str, Any]]":
@@ -298,3 +336,137 @@ def augment_call_sites(self, symbol_table: Dict[str, PyModule]) -> int:
                 f"CodeQL: augmented {augmented} PyCallsite.callee_signature entries."
             )
         return augmented
+
+    def analyze_taint_flows(
+        self,
+        config_override: Optional[TaintAnalysisConfig] = None,
+        symbol_table: Optional[Dict[str, PyModule]] = None,
+    ) -> PyTaintAnalysisResult:
+        """Perform taint analysis with configurable sources/sinks/sanitizers.
+
+        Args:
+            config_override: Optional configuration to override instance config.
+            symbol_table: Optional symbol table produced by analysis level 1.
+                When provided, taint sources and sinks are resolved to the
+                matching ``PyCallsite`` objects already captured during syntactic
+                analysis (giving access to receiver type, argument types, callee
+                signature, …).  If a match cannot be found a new ``PyCallsite``
+                is constructed from the CodeQL location data as a fallback.
+
+        Returns:
+            PyTaintAnalysisResult: Complete taint analysis results
+
+        Raises:
+            ValueError: If no taint configuration is available
+        """
+        config = config_override or self.taint_config
+
+        if not config:
+            raise ValueError("No taint configuration provided. Pass config to __init__ or analyze_taint_flows()")
+
+        logger.info("Starting taint analysis...")
+        logger.debug(f"Configuration: {len(config.sources)} sources, "
+                     f"{len(config.sinks)} sinks, {len(config.sanitizers)} sanitizers")
+
+        # Build callsite index from symbol table for best-effort linkage
+        callsite_index: Dict[Tuple[str, int], PyCallsite] = (
+            self._build_callsite_location_index(symbol_table)
+            if symbol_table is not None
+            else {}
+        )
+        if callsite_index:
+            logger.debug(f"Built callsite index with {len(callsite_index)} entries from symbol table")
+
+        query_string = TaintQueryGenerator.generate_query(config)
+        column_names = TaintQueryGenerator.get_column_names()
+
+        logger.debug("Executing CodeQL taint analysis query...")
+        with CodeQLQueryRunner(
+            self.db_path,
+            codeql_bin=self.codeql_bin,
+            codeql_packs_dir=self.codeql_packs_dir,
+        ) as runner:
+            result_df = runner.execute(query_string, column_names)
+
+        logger.info(f"Query returned {len(result_df)} taint flows")
+
+        flows = []
+        sources_dict: Dict[str, PyTaintSource] = {}
+        sinks_dict: Dict[str, PyTaintSink] = {}
+        n_callsite_hits = 0
+
+        for _, row in result_df.iterrows():
+            source_key = f"{row['source_file']}:{row['source_start_line']}"
+            if source_key not in sources_dict:
+                # Try to resolve from symbol table; fall back to constructing new
+                src_cs_key = (row["source_file"], int(row["source_start_line"]))
+                source_call_site = callsite_index.get(src_cs_key) or PyCallsite(
+                    method_name=row["source_expr"] or row["source_function"],
+                    receiver_expr=None,
+                    start_line=int(row["source_start_line"]),
+                    end_line=int(row["source_end_line"]),
+                    start_column=int(row["source_start_col"]),
+                    end_column=int(row["source_end_col"]),
+                )
+                if src_cs_key in callsite_index:
+                    n_callsite_hits += 1
+                source = PyTaintSource(
+                    source_type=row["source_type"],
+                    call_site=source_call_site,
+                    description=f"Untrusted data from {row['source_type']} "
+                                f"in {row['source_qualified_function']} "
+                                f"({row['source_file']}:{row['source_start_line']})",
+                )
+                sources_dict[source_key] = source
+
+            sink_key = f"{row['sink_file']}:{row['sink_start_line']}"
+            if sink_key not in sinks_dict:
+                # Try to resolve from symbol table; fall back to constructing new
+                snk_cs_key = (row["sink_file"], int(row["sink_start_line"]))
+                sink_call_site = callsite_index.get(snk_cs_key) or PyCallsite(
+                    method_name=row["sink_expr"] or row["sink_function"],
+                    receiver_expr=None,
+                    start_line=int(row["sink_start_line"]),
+                    end_line=int(row["sink_end_line"]),
+                    start_column=int(row["sink_start_col"]),
+                    end_column=int(row["sink_end_col"]),
+                )
+                if snk_cs_key in callsite_index:
+                    n_callsite_hits += 1
+                sink = PyTaintSink(
+                    sink_type=row["sink_type"],
+                    call_site=sink_call_site,
+                    severity=row["severity"],
+                    description=f"Potential {row['vulnerability_type']} vulnerability "
+                                f"in {row['sink_qualified_function']} "
+                                f"({row['sink_file']}:{row['sink_start_line']})",
+                )
+                sinks_dict[sink_key] = sink
+
+            flow = PyTaintFlow(
+                flow_id=row["flow_id"],
+                source=sources_dict[source_key],
+                sink=sinks_dict[sink_key],
+                path=[],
+                vulnerability_type=row["vulnerability_type"],
+                severity=row["severity"],
+                confidence="medium",
+                description=row["message"],
+            )
+            flows.append(flow)
+
+        n_critical = sum(1 for f in flows if f.severity == "critical")
+        n_high = sum(1 for f in flows if f.severity == "high")
+        logger.info(f"Taint analysis complete: {len(flows)} flows, "
+                    f"{n_critical} critical, {n_high} high")
+        if callsite_index:
+            logger.debug(f"Symbol-table callsite linkage: {n_callsite_hits} of "
+                         f"{len(sources_dict) + len(sinks_dict)} source/sink nodes "
+                         f"resolved to existing PyCallsite objects")
+
+        return PyTaintAnalysisResult(
+            project_path=str(self.project_dir),
+            flows=flows,
+            analysis_timestamp=datetime.now(timezone.utc).isoformat(),
+            codeql_database_path=str(self.db_path),
+        )
diff --git a/codeanalyzer/semantic_analysis/codeql/codeql_query_runner.py b/codeanalyzer/semantic_analysis/codeql/codeql_query_runner.py
index 17eb368..e23035e 100644
--- a/codeanalyzer/semantic_analysis/codeql/codeql_query_runner.py
+++ b/codeanalyzer/semantic_analysis/codeql/codeql_query_runner.py
@@ -63,6 +63,7 @@ def __init__(self, database_path: str, codeql_bin=None, codeql_packs_dir=None):
             Path(codeql_packs_dir) if codeql_packs_dir is not None else None
         )
         self.temp_file_path: Path = None
+        self._temp_qlpack_dir: "tempfile.TemporaryDirectory | None" = None
 
     def __enter__(self):
         """Context entry that prepares paths to execute a CodeQL query.
diff --git a/codeanalyzer/semantic_analysis/codeql/taint_query_generator.py b/codeanalyzer/semantic_analysis/codeql/taint_query_generator.py
new file mode 100644
index 0000000..0b985cb
--- /dev/null
+++ b/codeanalyzer/semantic_analysis/codeql/taint_query_generator.py
@@ -0,0 +1,428 @@
+################################################################################
+# Copyright IBM Corporation 2025
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+"""Dynamic CodeQL query generator for taint analysis.
+
+This module generates CodeQL queries from taint analysis configurations.
+
+Design philosophy
+-----------------
+CodeQL's ``codeql/python-all`` pack ships comprehensive built-in taint models
+via ``semmle.python.security.dataflow.*`` — these cover hundreds of SQL,
+command, path-traversal, XSS, and other sinks automatically, without any
+manual API enumeration.
+
+The generated query therefore uses **two complementary layers**:
+
+1. **Built-in CodeQL security models** (primary, comprehensive):
+   - ``RemoteFlowSource`` — all web-framework request sources (Flask, Django,
+     FastAPI, aiohttp, …) recognised by CodeQL out of the box.
+   - ``SqlInjection::Sink`` — all DB cursor patterns (sqlite3, psycopg2,
+     mysql-connector, SQLAlchemy, …).
+   - ``CommandInjection::Sink`` — subprocess, os.system, shlex, …
+   - ``CodeInjection::Sink`` — eval, exec, compile, …
+   - ``PathTraversal::Sink`` — open(), os.path operations, …
+   - ``XSS::Sink`` — Flask/Django template rendering, …
+
+2. **Configurable user-defined patterns** (supplementary):
+   Additional sources/sinks/sanitizers supplied via ``TaintAnalysisConfig``
+   that extend the built-in coverage with project-specific APIs.
+
+Uses the modern CodeQL Python API (codeql/python-all >= 7.x):
+- ``DataFlow::ConfigSig`` interface with ``implements``
+- ``TaintTracking::Global<Config>`` module
+- ``API::Node.asSource()`` / ``API::Node.getParameter(N).asSink()``
+"""
+
+from typing import List
+from codeanalyzer.schema.py_schema import (
+    TaintAnalysisConfig,
+    TaintSourceConfig,
+    TaintSinkConfig,
+    TaintSanitizerConfig,
+)
+
+
+class TaintQueryGenerator:
+    """Generates CodeQL queries from taint analysis configuration."""
+
+    @staticmethod
+    def generate_query(config: TaintAnalysisConfig) -> str:
+        """Generate complete taint analysis CodeQL query from configuration.
+
+        The query combines CodeQL's built-in security models with any
+        user-configured patterns, giving comprehensive coverage without
+        requiring exhaustive manual API enumeration.
+
+        Args:
+            config: Taint analysis configuration
+
+        Returns:
+            str: Complete CodeQL query ready for execution
+        """
+        query_parts = []
+
+        query_parts.append(TaintQueryGenerator._generate_header())
+        query_parts.append(TaintQueryGenerator._generate_imports())
+        query_parts.append(TaintQueryGenerator._generate_source_predicate(config.sources))
+        query_parts.append(TaintQueryGenerator._generate_sink_predicate(config.sinks))
+
+        if config.sanitizers:
+            query_parts.append(TaintQueryGenerator._generate_sanitizer_predicate(config.sanitizers))
+
+        query_parts.append(TaintQueryGenerator._generate_config_sig(
+            has_sanitizers=len(config.sanitizers) > 0
+        ))
+        query_parts.append(TaintQueryGenerator._generate_flow_module())
+        query_parts.append(TaintQueryGenerator._generate_helpers())
+        query_parts.append(TaintQueryGenerator._generate_main_query())
+
+        return "\n\n".join(query_parts)
+
+    # ------------------------------------------------------------------
+    # Header / imports
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _generate_header() -> str:
+        """Generate query header with metadata."""
+        return """/**
+ * @name Configurable Taint Analysis
+ * @description Taint analysis combining CodeQL built-in security models with
+ *              configurable user-defined sources, sinks, and sanitizers.
+ * @kind path-problem
+ * @id python/configurable-taint-analysis
+ * @problem.severity warning
+ */"""
+
+    @staticmethod
+    def _generate_imports() -> str:
+        """Generate import statements.
+
+        Imports both the core DataFlow/TaintTracking modules and the built-in
+        security-sink/source classes from codeql/python-all so that the query
+        benefits from CodeQL's comprehensive model library.
+
+        Module names verified against codeql/python-all 7.x:
+          - SqlInjectionCustomizations    → module SqlInjection { class Sink }
+          - CommandInjectionCustomizations → module CommandInjection { class Sink }
+          - CodeInjectionCustomizations   → module CodeInjection { class Sink }
+          - PathInjectionCustomizations   → module PathInjection { class Sink }
+          - ReflectedXSSCustomizations    → module ReflectedXss { class Sink }
+          - RemoteFlowSources             → class RemoteFlowSource
+        """
+        return """import python
+import semmle.python.dataflow.new.DataFlow
+import semmle.python.dataflow.new.TaintTracking
+import semmle.python.ApiGraphs
+import semmle.python.security.dataflow.SqlInjectionCustomizations
+import semmle.python.security.dataflow.CommandInjectionCustomizations
+import semmle.python.security.dataflow.CodeInjectionCustomizations
+import semmle.python.security.dataflow.PathInjectionCustomizations
+import semmle.python.security.dataflow.ReflectedXSSCustomizations
+import semmle.python.dataflow.new.RemoteFlowSources"""
+
+    # ------------------------------------------------------------------
+    # Pattern helpers
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _pattern_to_source_node(pattern: str) -> str:
+        """Convert a pattern string to a DataFlow::Node expression for sources."""
+        if pattern.endswith(".getACall()"):
+            return pattern
+        return f"{pattern}.asSource()"
+
+    @staticmethod
+    def _pattern_to_sink_node(pattern: str, argument_index: int) -> str:
+        """Convert a pattern string to a DataFlow::Node expression for sinks."""
+        if pattern.endswith(".getACall()"):
+            api_node = pattern[:-len(".getACall()")]
+            return f"{api_node}.getParameter({argument_index}).asSink()"
+        return f"{pattern}.getParameter({argument_index}).asSink()"
+
+    @staticmethod
+    def _pattern_to_sanitizer_node(pattern: str) -> str:
+        """Convert a pattern string to a DataFlow::Node expression for sanitizers."""
+        if pattern.endswith(".getACall()"):
+            return pattern
+        return f"{pattern}.asSource()"
+
+    # ------------------------------------------------------------------
+    # Predicate generators
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _generate_source_predicate(sources: List[TaintSourceConfig]) -> str:
+        """Generate isSource predicate combining built-in RemoteFlowSource with
+        any user-configured sources.
+
+        Built-in ``RemoteFlowSource`` covers all web-framework request inputs
+        (Flask ``request.args/form/json``, Django ``request.GET/POST``,
+        FastAPI, aiohttp, Tornado, …) recognised by CodeQL's model library.
+        User-configured patterns extend this with project-specific sources
+        (e.g. ``sys.argv``, ``input()``, custom HTTP clients).
+        """
+        lines = [
+            "predicate isConfiguredSource(DataFlow::Node node, string sourceType) {",
+            "  // Built-in: all web-framework request sources recognised by CodeQL",
+            "  (node instanceof RemoteFlowSource and sourceType = \"web_request\")",
+        ]
+
+        for source in sources:
+            lines.append("  or")
+            lines.append(f"  // User-configured: {source.description}")
+            node_expr = TaintQueryGenerator._pattern_to_source_node(source.pattern)
+            lines.append(f"  (node = {node_expr} and sourceType = \"{source.source_type}\")")
+
+        lines.append("}")
+        return "\n".join(lines)
+
+    @staticmethod
+    def _generate_sink_predicate(sinks: List[TaintSinkConfig]) -> str:
+        """Generate isSink predicate combining built-in security sinks with
+        any user-configured sinks.
+
+        Built-in sink classes from ``codeql/python-all`` cover:
+        - ``SqlInjection::Sink``   — sqlite3, psycopg2, mysql-connector,
+                                     SQLAlchemy, Django ORM raw queries, …
+        - ``CommandInjection::Sink`` — subprocess.*, os.system, os.popen, …
+        - ``CodeInjection::Sink``  — eval(), exec(), compile(), …
+        - ``PathTraversal::Sink``  — open(), os.path.*, pathlib.Path.open(), …
+        - ``XSS::Sink``            — Flask/Django template rendering, …
+
+        User-configured patterns extend this with project-specific sinks.
+        """
+        lines = [
+            "predicate isConfiguredSink(DataFlow::Node node, string sinkType, string severity, string vulnerabilityType) {",
+            "  // Built-in: SQL injection sinks (sqlite3, psycopg2, SQLAlchemy, Django ORM raw, …)",
+            "  (node instanceof SqlInjection::Sink and",
+            "   sinkType = \"sql_execution\" and severity = \"critical\" and vulnerabilityType = \"SQL Injection\")",
+            "  or",
+            "  // Built-in: Command injection sinks (subprocess.*, os.system, os.popen, …)",
+            "  (node instanceof CommandInjection::Sink and",
+            "   sinkType = \"command_execution\" and severity = \"critical\" and vulnerabilityType = \"Command Injection\")",
+            "  or",
+            "  // Built-in: Code injection sinks (eval, exec, compile, …)",
+            "  (node instanceof CodeInjection::Sink and",
+            "   sinkType = \"code_execution\" and severity = \"critical\" and vulnerabilityType = \"Code Injection\")",
+            "  or",
+            "  // Built-in: Path injection sinks (open, os.path.*, pathlib.Path.open, …)",
+            "  (node instanceof PathInjection::Sink and",
+            "   sinkType = \"file_access\" and severity = \"high\" and vulnerabilityType = \"Path Traversal\")",
+            "  or",
+            "  // Built-in: Reflected XSS sinks (Flask/Django template rendering, …)",
+            "  (node instanceof ReflectedXss::Sink and",
+            "   sinkType = \"template_rendering\" and severity = \"high\" and vulnerabilityType = \"Cross-Site Scripting (XSS)\")",
+        ]
+
+        for sink in sinks:
+            lines.append("  or")
+            lines.append(f"  // User-configured: {sink.description}")
+
+            if sink.argument_index is not None:
+                node_expr = TaintQueryGenerator._pattern_to_sink_node(sink.pattern, sink.argument_index)
+            else:
+                node_expr = TaintQueryGenerator._pattern_to_source_node(sink.pattern)
+
+            lines.append("  (")
+            lines.append(f"    node = {node_expr} and")
+            lines.append(f"    sinkType = \"{sink.sink_type}\" and")
+            lines.append(f"    severity = \"{sink.severity}\" and")
+            lines.append(f"    vulnerabilityType = \"{sink.vulnerability_type}\"")
+            lines.append("  )")
+
+        lines.append("}")
+        return "\n".join(lines)
+
+    @staticmethod
+    def _generate_sanitizer_predicate(sanitizers: List[TaintSanitizerConfig]) -> str:
+        """Generate isConfiguredSanitizer predicate from configuration."""
+        lines = [
+            "predicate isConfiguredSanitizer(DataFlow::Node node) {",
+        ]
+
+        for i, sanitizer in enumerate(sanitizers):
+            if i > 0:
+                lines.append("  or")
+            lines.append(f"  // {sanitizer.description}")
+            node_expr = TaintQueryGenerator._pattern_to_sanitizer_node(sanitizer.pattern)
+            lines.append(f"  node = {node_expr}")
+
+        lines.append("}")
+        return "\n".join(lines)
+
+    @staticmethod
+    def _generate_config_sig(has_sanitizers: bool) -> str:
+        """Generate DataFlow::ConfigSig module using modern CodeQL API."""
+        lines = [
+            "private module ConfiguredTaintConfig implements DataFlow::ConfigSig {",
+            "  predicate isSource(DataFlow::Node source) {",
+            "    isConfiguredSource(source, _)",
+            "  }",
+            "",
+            "  predicate isSink(DataFlow::Node sink) {",
+            "    isConfiguredSink(sink, _, _, _)",
+            "  }",
+        ]
+
+        if has_sanitizers:
+            lines.extend([
+                "",
+                "  predicate isBarrier(DataFlow::Node node) {",
+                "    isConfiguredSanitizer(node)",
+                "  }",
+            ])
+
+        lines.extend([
+            "",
+            "  predicate observeDiffInformedIncrementalMode() { any() }",
+            "}",
+        ])
+
+        return "\n".join(lines)
+
+    @staticmethod
+    def _generate_flow_module() -> str:
+        """Generate TaintTracking::Global module instantiation."""
+        return "module ConfiguredTaintFlow = TaintTracking::Global<ConfiguredTaintConfig>;"
+
+    @staticmethod
+    def _generate_helpers() -> str:
+        """Generate helper functions for extracting metadata."""
+        return """string getFunctionName(DataFlow::Node node) {
+  result = node.getScope().(Function).getName()
+  or
+  not exists(node.getScope().(Function)) and result = "<module>"
+}
+
+string getQualifiedFunctionName(DataFlow::Node node) {
+  exists(Function f |
+    f = node.getScope() |
+    if exists(f.getScope().(Class)) then
+      result = f.getScope().(Class).getName() + "." + f.getName()
+    else
+      result = f.getName()
+  )
+  or
+  not exists(node.getScope().(Function)) and result = "<module>"
+}"""
+
+    @staticmethod
+    def _generate_main_query() -> str:
+        """Generate main query select statement using modern path-problem API."""
+        return """import ConfiguredTaintFlow::PathGraph
+
+from
+  ConfiguredTaintFlow::PathNode source,
+  ConfiguredTaintFlow::PathNode sink,
+  string sourceType,
+  string sinkType,
+  string severity,
+  string vulnerabilityType
+where
+  ConfiguredTaintFlow::flowPath(source, sink) and
+  isConfiguredSource(source.getNode(), sourceType) and
+  isConfiguredSink(sink.getNode(), sinkType, severity, vulnerabilityType)
+select
+  // 1. Element (sink - required for path-problem)
+  sink.getNode(),
+  // 2. Source path node (required for path-problem)
+  source,
+  // 3. Sink path node (required for path-problem)
+  sink,
+  // 4. Message (required for path-problem)
+  "Tainted data from " + sourceType + " flows to " + vulnerabilityType,
+
+  // Additional metadata columns
+  // Flow ID
+  source.getNode().getLocation().getFile().getAbsolutePath() + ":" +
+    source.getNode().getLocation().getStartLine().toString() + "->" +
+    sink.getNode().getLocation().getFile().getAbsolutePath() + ":" +
+    sink.getNode().getLocation().getStartLine().toString(),
+
+  // Source information
+  source.getNode().getLocation().getFile().getAbsolutePath(),
+  source.getNode().getLocation().getStartLine(),
+  source.getNode().getLocation().getEndLine(),
+  source.getNode().getLocation().getStartColumn(),
+  source.getNode().getLocation().getEndColumn(),
+  sourceType,
+  source.getNode().toString(),
+  getFunctionName(source.getNode()),
+  getQualifiedFunctionName(source.getNode()),
+
+  // Sink information
+  sink.getNode().getLocation().getFile().getAbsolutePath(),
+  sink.getNode().getLocation().getStartLine(),
+  sink.getNode().getLocation().getEndLine(),
+  sink.getNode().getLocation().getStartColumn(),
+  sink.getNode().getLocation().getEndColumn(),
+  sinkType,
+  severity,
+  sink.getNode().toString(),
+  getFunctionName(sink.getNode()),
+  getQualifiedFunctionName(sink.getNode()),
+  vulnerabilityType,
+  // Confidence (always medium for configurable analysis)
+  "medium" """
+
+    @staticmethod
+    def get_column_names() -> List[str]:
+        """Get the column names for the query results.
+
+        Column order matches the select statement:
+          1. element (sink node - required for path-problem)
+          2. source_path (PathNode - required for path-problem)
+          3. sink_path (PathNode - required for path-problem)
+          4. message (string - required for path-problem)
+          5+ additional metadata columns
+
+        Returns:
+            List[str]: Column names in the order they appear in the query
+        """
+        return [
+            # Required path-problem columns (positions 1-4)
+            "element",
+            "source_path",
+            "sink_path",
+            "message",
+            # Additional metadata
+            "flow_id",
+            # Source columns
+            "source_file",
+            "source_start_line",
+            "source_end_line",
+            "source_start_col",
+            "source_end_col",
+            "source_type",
+            "source_expr",
+            "source_function",
+            "source_qualified_function",
+            # Sink columns
+            "sink_file",
+            "sink_start_line",
+            "sink_end_line",
+            "sink_start_col",
+            "sink_end_col",
+            "sink_type",
+            "severity",
+            "sink_expr",
+            "sink_function",
+            "sink_qualified_function",
+            "vulnerability_type",
+            "confidence",
+        ]
diff --git a/test/conftest.py b/test/conftest.py
index 9af14d4..35043e9 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -1,4 +1,6 @@
 # conftest.py
+import shutil
+import subprocess
 from pathlib import Path
 
 import pytest
@@ -34,3 +36,156 @@ def whole_applications__xarray() -> Path:
 def single_functionalities__stuff_nested_in_functions() -> Path:
     """Returns the path to the 'single_functionalities/stuff_nested_in_functions' directory."""
     return Path(__file__).parent.resolve().joinpath("fixtures", "single_functionalities", "stuff_nested_in_functions_test")
+
+
+# ============================================================================
+# Taint Analysis CodeQL Database Fixtures
+# ============================================================================
+
+_TAINT_FIXTURES_DIR = Path(__file__).parent / "fixtures" / "taint_analysis"
+
+_TAINT_FIXTURE_APPS = {
+    "sql_injection": _TAINT_FIXTURES_DIR / "sql_injection_app",
+    "command_injection": _TAINT_FIXTURES_DIR / "command_injection_app",
+    "path_traversal": _TAINT_FIXTURES_DIR / "path_traversal_app",
+    "xss": _TAINT_FIXTURES_DIR / "xss_app",
+    "flask": _TAINT_FIXTURES_DIR / "flask_app",
+    "sanitizer": _TAINT_FIXTURES_DIR / "sanitizer_app",
+}
+
+
+def _codeql_available() -> bool:
+    """Check if CodeQL CLI is available."""
+    return shutil.which("codeql") is not None
+
+
+def _create_codeql_database(source_dir: Path, db_path: Path) -> bool:
+    """Create a CodeQL database for a Python source directory."""
+    cmd = [
+        "codeql", "database", "create", str(db_path),
+        f"--source-root={source_dir}",
+        "--language=python",
+        "--overwrite",
+    ]
+    result = subprocess.run(cmd, capture_output=True, text=True)
+    return result.returncode == 0
+
+
+@pytest.fixture(scope="session")
+def codeql_databases(tmp_path_factory):
+    """Session-scoped fixture that creates CodeQL databases for all taint fixture apps.
+
+    Databases are created once per test session and shared across all tests.
+    If CodeQL is not available, returns None and dependent tests will be skipped.
+
+    Returns:
+        dict: Map of fixture name -> database path, or None if CodeQL unavailable
+    """
+    if not _codeql_available():
+        return None
+
+    db_base = tmp_path_factory.mktemp("codeql_dbs")
+    databases = {}
+
+    for name, source_dir in _TAINT_FIXTURE_APPS.items():
+        db_path = db_base / f"{name}_db"
+        if _create_codeql_database(source_dir, db_path):
+            databases[name] = db_path
+        else:
+            databases[name] = None
+
+    return databases
+
+
+@pytest.fixture(scope="session")
+def sql_injection_db(codeql_databases):
+    """Session-scoped CodeQL database for SQL injection fixture."""
+    if codeql_databases is None:
+        pytest.skip("CodeQL not available")
+    db = codeql_databases.get("sql_injection")
+    if db is None:
+        pytest.skip("Failed to create SQL injection CodeQL database")
+    return db
+
+
+@pytest.fixture(scope="session")
+def command_injection_db(codeql_databases):
+    """Session-scoped CodeQL database for command injection fixture."""
+    if codeql_databases is None:
+        pytest.skip("CodeQL not available")
+    db = codeql_databases.get("command_injection")
+    if db is None:
+        pytest.skip("Failed to create command injection CodeQL database")
+    return db
+
+
+@pytest.fixture(scope="session")
+def path_traversal_db(codeql_databases):
+    """Session-scoped CodeQL database for path traversal fixture."""
+    if codeql_databases is None:
+        pytest.skip("CodeQL not available")
+    db = codeql_databases.get("path_traversal")
+    if db is None:
+        pytest.skip("Failed to create path traversal CodeQL database")
+    return db
+
+
+@pytest.fixture(scope="session")
+def xss_db(codeql_databases):
+    """Session-scoped CodeQL database for XSS fixture."""
+    if codeql_databases is None:
+        pytest.skip("CodeQL not available")
+    db = codeql_databases.get("xss")
+    if db is None:
+        pytest.skip("Failed to create XSS CodeQL database")
+    return db
+
+
+@pytest.fixture(scope="session")
+def flask_db(codeql_databases):
+    """Session-scoped CodeQL database for Flask fixture."""
+    if codeql_databases is None:
+        pytest.skip("CodeQL not available")
+    db = codeql_databases.get("flask")
+    if db is None:
+        pytest.skip("Failed to create Flask CodeQL database")
+    return db
+
+
+@pytest.fixture(scope="session")
+def sanitizer_db(codeql_databases):
+    """Session-scoped CodeQL database for sanitizer fixture."""
+    if codeql_databases is None:
+        pytest.skip("CodeQL not available")
+    db = codeql_databases.get("sanitizer")
+    if db is None:
+        pytest.skip("Failed to create sanitizer CodeQL database")
+    return db
+
+
+@pytest.fixture(scope="session")
+def codeql_packs_dir(tmp_path_factory):
+    """Session-scoped fixture that installs a qlpack with codeql/python-all once.
+
+    Returns the pack directory path, or None if CodeQL is unavailable.
+    Tests that need this should skip when it returns None.
+    """
+    if not _codeql_available():
+        return None
+
+    pack_dir = tmp_path_factory.mktemp("codeql_qlpack")
+    qlpack_yml = pack_dir / "qlpack.yml"
+    qlpack_yml.write_text(
+        "name: codeanalyzer-test-pack\n"
+        "version: 1.0.0\n"
+        "dependencies:\n"
+        '  "codeql/python-all": "*"\n'
+    )
+    result = subprocess.run(
+        ["codeql", "pack", "install", str(pack_dir)],
+        capture_output=True,
+        text=True,
+    )
+    if result.returncode != 0:
+        return None
+    return pack_dir
diff --git a/test/fixtures/taint_analysis/command_injection_app/vulnerable.py b/test/fixtures/taint_analysis/command_injection_app/vulnerable.py
new file mode 100644
index 0000000..1dd5972
--- /dev/null
+++ b/test/fixtures/taint_analysis/command_injection_app/vulnerable.py
@@ -0,0 +1,172 @@
+"""
+Command Injection vulnerable test application.
+This file contains intentionally vulnerable code for testing taint analysis.
+"""
+
+import os
+import subprocess
+import sys
+
+
+def vulnerable_os_system(filename):
+    """Command injection via os.system."""
+    # VULNERABLE: User input directly in shell command
+    os.system("cat " + filename)
+
+
+def vulnerable_subprocess_shell(user_input):
+    """Command injection via subprocess with shell=True."""
+    # VULNERABLE: shell=True with user input
+    subprocess.call("ls -la " + user_input, shell=True)
+
+
+def vulnerable_popen(command):
+    """Command injection via os.popen."""
+    # VULNERABLE: User input in os.popen
+    result = os.popen("echo " + command).read()
+    return result
+
+
+def vulnerable_from_argv():
+    """Command injection from command-line arguments."""
+    if len(sys.argv) > 1:
+        directory = sys.argv[1]
+        # VULNERABLE: Command-line arg in shell command
+        os.system(f"ls -la {directory}")
+
+
+def vulnerable_from_input():
+    """Command injection from user input."""
+    filename = input("Enter filename to display: ")
+    # VULNERABLE: User input in shell command
+    subprocess.run(f"cat {filename}", shell=True)
+
+
+def vulnerable_eval(user_code):
+    """Code injection via eval."""
+    # VULNERABLE: eval with user input
+    result = eval(user_code)
+    return result
+
+
+def vulnerable_exec(user_code):
+    """Code injection via exec."""
+    # VULNERABLE: exec with user input
+    exec(user_code)
+
+
+def safe_subprocess_no_shell(filename):
+    """Safe subprocess call without shell."""
+    # SAFE: No shell, arguments as list
+    subprocess.run(["cat", filename])
+
+
+def safe_subprocess_with_sanitization(filename):
+    """Safe subprocess with input validation."""
+    # SAFE: Input validation
+    import shlex
+    safe_filename = shlex.quote(filename)
+    subprocess.run(f"cat {safe_filename}", shell=True)
+
+
+# Inter-procedural taint flow examples
+def get_command_from_user():
+    """Source: Get command from user."""
+    return input("Enter command: ")
+
+
+def build_shell_command(cmd):
+    """Intermediate: Build shell command."""
+    return "ls -la " + cmd
+
+
+def execute_shell_command(command):
+    """Sink: Execute shell command."""
+    os.system(command)
+
+
+def vulnerable_interprocedural():
+    """Vulnerable code with taint flow across functions."""
+    # Source -> Intermediate -> Sink
+    user_cmd = get_command_from_user()
+    full_cmd = build_shell_command(user_cmd)
+    execute_shell_command(full_cmd)
+
+
+class CommandExecutor:
+    """Class with vulnerable methods demonstrating inter-method taint flow."""
+    
+    def get_directory_from_args(self):
+        """Source: Get directory from command-line."""
+        return sys.argv[1] if len(sys.argv) > 1 else "/tmp"
+    
+    def prepare_command(self, directory):
+        """Intermediate: Prepare command with tainted data."""
+        return f"find {directory} -name '*.txt'"
+    
+    def run_command(self, command):
+        """Sink: Execute command."""
+        return subprocess.check_output(command, shell=True)
+    
+    def vulnerable_find_files(self):
+        """Vulnerable method with taint flow across class methods."""
+        # Source -> Intermediate -> Sink within class
+        directory = self.get_directory_from_args()
+        command = self.prepare_command(directory)
+        return self.run_command(command)
+
+
+def sanitize_input(user_input):
+    """Intermediate function that doesn't properly sanitize."""
+    # This doesn't actually sanitize for command injection
+    return user_input.replace(";", "").replace("&", "")
+
+
+def vulnerable_with_weak_sanitization():
+    """Vulnerable code with weak sanitization."""
+    # Source
+    user_input = input("Enter filename: ")
+    # Weak sanitization (still tainted)
+    sanitized = sanitize_input(user_input)
+    # Sink
+    os.system("cat " + sanitized)
+
+
+def get_code_from_file(filename):
+    """Source: Read code from file."""
+    with open(filename, 'r') as f:
+        return f.read()
+
+
+def vulnerable_eval_from_file():
+    """Vulnerable eval with code from file."""
+    # Source
+    code = get_code_from_file(sys.argv[1] if len(sys.argv) > 1 else "input.txt")
+    # Sink
+    eval(code)
+
+
+def main():
+    """Main function demonstrating vulnerabilities."""
+    # Direct vulnerabilities
+    vulnerable_os_system(sys.argv[1] if len(sys.argv) > 1 else "/etc/passwd")
+    vulnerable_subprocess_shell(input("Enter directory: "))
+    vulnerable_popen(input("Enter command: "))
+    vulnerable_eval(input("Enter expression: "))
+    
+    # Inter-procedural vulnerabilities
+    vulnerable_interprocedural()
+    
+    # Class-based vulnerabilities
+    executor = CommandExecutor()
+    executor.vulnerable_find_files()
+    
+    # Vulnerability with weak sanitization
+    vulnerable_with_weak_sanitization()
+    
+    # Safe examples
+    safe_subprocess_no_shell("/etc/passwd")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/test/fixtures/taint_analysis/flask_app/vulnerable.py b/test/fixtures/taint_analysis/flask_app/vulnerable.py
new file mode 100644
index 0000000..df3b4ce
--- /dev/null
+++ b/test/fixtures/taint_analysis/flask_app/vulnerable.py
@@ -0,0 +1,231 @@
+"""
+Flask web application with taint vulnerabilities.
+This file contains intentionally vulnerable code for testing taint analysis.
+"""
+
+try:
+    from flask import Flask, request, render_template_string
+    import sqlite3
+    import os
+    
+    app = Flask(__name__)
+    
+    
+    @app.route('/search')
+    def vulnerable_search():
+        """SQL injection in search endpoint."""
+        query = request.args.get('q', '')
+        conn = sqlite3.connect('app.db')
+        cursor = conn.cursor()
+        # VULNERABLE: User input from request.args in SQL query
+        sql = f"SELECT * FROM products WHERE name LIKE '%{query}%'"
+        cursor.execute(sql)
+        results = cursor.fetchall()
+        return str(results)
+    
+    
+    @app.route('/user/<username>')
+    def vulnerable_user_profile(username):
+        """XSS in user profile."""
+        # VULNERABLE: User input from URL parameter in HTML
+        html = f"<h1>Profile: {username}</h1>"
+        return html
+    
+    
+    @app.route('/execute')
+    def vulnerable_execute():
+        """Command injection in execute endpoint."""
+        cmd = request.args.get('cmd', '')
+        # VULNERABLE: User input from request.args in shell command
+        result = os.popen(cmd).read()
+        return result
+    
+    
+    @app.route('/file')
+    def vulnerable_file_read():
+        """Path traversal in file read."""
+        filename = request.args.get('name', '')
+        # VULNERABLE: User input from request.args in file path
+        with open(f"/var/www/files/{filename}", 'r') as f:
+            return f.read()
+    
+    
+    @app.route('/template')
+    def vulnerable_template():
+        """Server-Side Template Injection."""
+        template = request.args.get('tmpl', '')
+        # VULNERABLE: User input in template rendering
+        return render_template_string(template)
+    
+    
+    @app.route('/login', methods=['POST'])
+    def vulnerable_login():
+        """SQL injection in login form."""
+        username = request.form.get('username', '')
+        password = request.form.get('password', '')
+        conn = sqlite3.connect('app.db')
+        cursor = conn.cursor()
+        # VULNERABLE: User input from request.form in SQL query
+        sql = f"SELECT * FROM users WHERE username='{username}' AND password='{password}'"
+        cursor.execute(sql)
+        user = cursor.fetchone()
+        return "Login successful" if user else "Login failed"
+    
+    
+    # Inter-procedural taint flow examples
+    def get_search_query():
+        """Source: Get search query from request."""
+        return request.args.get('q', '')
+    
+    
+    def build_search_sql(query):
+        """Intermediate: Build SQL query."""
+        return f"SELECT * FROM products WHERE name LIKE '%{query}%'"
+    
+    
+    def execute_sql(sql):
+        """Sink: Execute SQL query."""
+        conn = sqlite3.connect('app.db')
+        cursor = conn.cursor()
+        cursor.execute(sql)
+        return cursor.fetchall()
+    
+    
+    @app.route('/search_v2')
+    def vulnerable_search_interprocedural():
+        """SQL injection with inter-procedural taint flow."""
+        # Source -> Intermediate -> Sink
+        query = get_search_query()
+        sql = build_search_sql(query)
+        results = execute_sql(sql)
+        return str(results)
+    
+    
+    class UserService:
+        """Service class with vulnerable methods."""
+        
+        def get_user_id_from_request(self):
+            """Source: Get user ID from request."""
+            return request.args.get('id', '')
+        
+        def format_user_query(self, user_id):
+            """Intermediate: Format user query."""
+            return f"SELECT * FROM users WHERE id = {user_id}"
+        
+        def fetch_user(self, query):
+            """Sink: Execute user query."""
+            conn = sqlite3.connect('app.db')
+            cursor = conn.cursor()
+            cursor.execute(query)
+            return cursor.fetchone()
+        
+        def get_user_info(self):
+            """Vulnerable method with taint flow across class methods."""
+            user_id = self.get_user_id_from_request()
+            query = self.format_user_query(user_id)
+            return self.fetch_user(query)
+    
+    
+    user_service = UserService()
+    
+    
+    @app.route('/user_info')
+    def vulnerable_user_info():
+        """SQL injection via service class."""
+        user = user_service.get_user_info()
+        return str(user)
+    
+    
+    @app.route('/safe_search')
+    def safe_search():
+        """Safe search with parameterized query."""
+        query = request.args.get('q', '')
+        conn = sqlite3.connect('app.db')
+        cursor = conn.cursor()
+        # SAFE: Parameterized query
+        sql = "SELECT * FROM products WHERE name LIKE ?"
+        cursor.execute(sql, (f'%{query}%',))
+        results = cursor.fetchall()
+        return str(results)
+    
+    
+    if __name__ == '__main__':
+        app.run(debug=True)
+
+except ImportError:
+    # Flask not installed, create dummy functions for analysis
+    import sqlite3
+    import os
+    import sys
+    
+    class Request:
+        """Mock request object."""
+        def __init__(self):
+            self.args = {'q': '', 'id': '', 'cmd': '', 'name': '', 'tmpl': ''}
+            self.form = {'username': '', 'password': ''}
+        
+        def get(self, key, default=''):
+            return self.args.get(key, default)
+    
+    request = Request()
+    
+    
+    def vulnerable_search():
+        """SQL injection in search endpoint."""
+        query = request.args.get('q', '')
+        conn = sqlite3.connect('app.db')
+        cursor = conn.cursor()
+        sql = f"SELECT * FROM products WHERE name LIKE '%{query}%'"
+        cursor.execute(sql)
+        return cursor.fetchall()
+    
+    
+    def get_search_query():
+        """Source: Get search query from request."""
+        return request.args.get('q', '')
+    
+    
+    def build_search_sql(query):
+        """Intermediate: Build SQL query."""
+        return f"SELECT * FROM products WHERE name LIKE '%{query}%'"
+    
+    
+    def execute_sql(sql):
+        """Sink: Execute SQL query."""
+        conn = sqlite3.connect('app.db')
+        cursor = conn.cursor()
+        cursor.execute(sql)
+        return cursor.fetchall()
+    
+    
+    def vulnerable_search_interprocedural():
+        """SQL injection with inter-procedural taint flow."""
+        query = get_search_query()
+        sql = build_search_sql(query)
+        results = execute_sql(sql)
+        return results
+    
+    
+    class UserService:
+        """Service class with vulnerable methods."""
+        
+        def get_user_id_from_request(self):
+            """Source: Get user ID from request."""
+            return request.args.get('id', '')
+        
+        def format_user_query(self, user_id):
+            """Intermediate: Format user query."""
+            return f"SELECT * FROM users WHERE id = {user_id}"
+        
+        def fetch_user(self, query):
+            """Sink: Execute user query."""
+            conn = sqlite3.connect('app.db')
+            cursor = conn.cursor()
+            cursor.execute(query)
+            return cursor.fetchone()
+        
+        def get_user_info(self):
+            """Vulnerable method with taint flow across class methods."""
+            user_id = self.get_user_id_from_request()
+            query = self.format_user_query(user_id)
+            return self.fetch_user(query)
diff --git a/test/fixtures/taint_analysis/path_traversal_app/vulnerable.py b/test/fixtures/taint_analysis/path_traversal_app/vulnerable.py
new file mode 100644
index 0000000..d36735a
--- /dev/null
+++ b/test/fixtures/taint_analysis/path_traversal_app/vulnerable.py
@@ -0,0 +1,189 @@
+"""
+Path Traversal vulnerable test application.
+This file contains intentionally vulnerable code for testing taint analysis.
+"""
+
+import os
+import sys
+
+
+def vulnerable_open_direct(filename):
+    """Path traversal via direct file open."""
+    # VULNERABLE: User input directly in file path
+    with open("/var/www/uploads/" + filename, 'r') as f:
+        return f.read()
+
+
+def vulnerable_open_fstring(filename):
+    """Path traversal via f-string."""
+    # VULNERABLE: f-string with user input
+    with open(f"/var/www/uploads/{filename}", 'r') as f:
+        return f.read()
+
+
+def vulnerable_from_argv():
+    """Path traversal from command-line arguments."""
+    if len(sys.argv) > 1:
+        filepath = sys.argv[1]
+        # VULNERABLE: Command-line arg in file path
+        with open(filepath, 'r') as f:
+            print(f.read())
+
+
+def vulnerable_from_input():
+    """Path traversal from user input."""
+    filename = input("Enter filename to read: ")
+    # VULNERABLE: User input in file path
+    with open("/var/www/data/" + filename, 'r') as f:
+        return f.read()
+
+
+def vulnerable_os_path_join(user_path):
+    """Path traversal via os.path.join."""
+    # VULNERABLE: os.path.join doesn't prevent traversal
+    full_path = os.path.join("/var/www/uploads", user_path)
+    with open(full_path, 'r') as f:
+        return f.read()
+
+
+def vulnerable_write_file(filename, content):
+    """Path traversal in file write."""
+    # VULNERABLE: User input in write path
+    with open("/var/www/uploads/" + filename, 'w') as f:
+        f.write(content)
+
+
+def safe_with_normalization(filename):
+    """Safe file access with path normalization."""
+    # SAFE: Path normalization and validation
+    base_dir = "/var/www/uploads"
+    full_path = os.path.normpath(os.path.join(base_dir, filename))
+    
+    # Ensure the path is within base_dir
+    if not full_path.startswith(base_dir):
+        raise ValueError("Invalid file path")
+    
+    with open(full_path, 'r') as f:
+        return f.read()
+
+
+def safe_with_basename(filename):
+    """Safe file access using basename."""
+    # SAFE: Only use basename, preventing directory traversal
+    safe_filename = os.path.basename(filename)
+    with open(f"/var/www/uploads/{safe_filename}", 'r') as f:
+        return f.read()
+
+
+# Inter-procedural taint flow examples
+def get_filename_from_user():
+    """Source: Get filename from user."""
+    return input("Enter filename: ")
+
+
+def construct_file_path(filename):
+    """Intermediate: Construct file path."""
+    return "/var/www/uploads/" + filename
+
+
+def read_file_content(filepath):
+    """Sink: Read file content."""
+    with open(filepath, 'r') as f:
+        return f.read()
+
+
+def vulnerable_interprocedural():
+    """Vulnerable code with taint flow across functions."""
+    # Source -> Intermediate -> Sink
+    filename = get_filename_from_user()
+    filepath = construct_file_path(filename)
+    content = read_file_content(filepath)
+    return content
+
+
+class FileManager:
+    """Class with vulnerable methods demonstrating inter-method taint flow."""
+    
+    def __init__(self, base_dir="/var/www/data"):
+        self.base_dir = base_dir
+    
+    def get_filename_from_args(self):
+        """Source: Get filename from command-line."""
+        return sys.argv[1] if len(sys.argv) > 1 else "default.txt"
+    
+    def build_path(self, filename):
+        """Intermediate: Build file path with tainted data."""
+        return self.base_dir + "/" + filename
+    
+    def read_file(self, filepath):
+        """Sink: Read file."""
+        with open(filepath, 'r') as f:
+            return f.read()
+    
+    def vulnerable_read(self):
+        """Vulnerable method with taint flow across class methods."""
+        # Source -> Intermediate -> Sink within class
+        filename = self.get_filename_from_args()
+        filepath = self.build_path(filename)
+        return self.read_file(filepath)
+
+
+def process_filename(filename):
+    """Intermediate function that processes filename."""
+    # Remove leading/trailing whitespace but doesn't prevent traversal
+    return filename.strip()
+
+
+def vulnerable_with_processing():
+    """Vulnerable code with filename processing."""
+    # Source
+    raw_filename = input("Enter filename: ")
+    # Processing (still tainted)
+    processed = process_filename(raw_filename)
+    # Sink
+    with open("/var/www/uploads/" + processed, 'r') as f:
+        return f.read()
+
+
+def get_path_from_config():
+    """Source: Get path from configuration file."""
+    # Simulating reading from a config file
+    return sys.argv[1] if len(sys.argv) > 1 else "../../../etc/passwd"
+
+
+def vulnerable_from_config():
+    """Vulnerable code with path from config."""
+    # Source
+    filepath = get_path_from_config()
+    # Sink
+    with open(filepath, 'r') as f:
+        return f.read()
+
+
+def main():
+    """Main function demonstrating vulnerabilities."""
+    # Direct vulnerabilities
+    vulnerable_open_direct(sys.argv[1] if len(sys.argv) > 1 else "../../etc/passwd")
+    vulnerable_open_fstring(input("Enter filename: "))
+    vulnerable_os_path_join(input("Enter path: "))
+    
+    # Inter-procedural vulnerabilities
+    vulnerable_interprocedural()
+    
+    # Class-based vulnerabilities
+    fm = FileManager()
+    fm.vulnerable_read()
+    
+    # Vulnerability with processing
+    vulnerable_with_processing()
+    
+    # Vulnerability from config
+    vulnerable_from_config()
+    
+    # Safe examples
+    safe_with_normalization("safe_file.txt")
+    safe_with_basename("../../../etc/passwd")  # Will only use "passwd"
+
+
+if __name__ == "__main__":
+    main()
diff --git a/test/fixtures/taint_analysis/sanitizer_app/mixed.py b/test/fixtures/taint_analysis/sanitizer_app/mixed.py
new file mode 100644
index 0000000..4d9e587
--- /dev/null
+++ b/test/fixtures/taint_analysis/sanitizer_app/mixed.py
@@ -0,0 +1,114 @@
+"""
+Test application with both vulnerable and safe code.
+This demonstrates the difference between sanitized and unsanitized flows.
+"""
+
+import sqlite3
+import sys
+from html import escape
+
+
+# Vulnerable: No sanitizer
+def vulnerable_no_sanitizer():
+    """Vulnerable code without sanitizer."""
+    user_input = input("Enter username: ")
+    conn = sqlite3.connect('test.db')
+    cursor = conn.cursor()
+    # VULNERABLE: No sanitization
+    query = f"SELECT * FROM users WHERE username = '{user_input}'"
+    cursor.execute(query)
+    return cursor.fetchall()
+
+
+# Safe: With sanitizer
+def safe_with_sanitizer():
+    """Safe code with sanitizer."""
+    user_input = input("Enter username: ")
+    conn = sqlite3.connect('test.db')
+    cursor = conn.cursor()
+    # SAFE: Parameterized query (sanitizer)
+    query = "SELECT * FROM users WHERE username = ?"
+    cursor.execute(query, (user_input,))
+    return cursor.fetchall()
+
+
+# Vulnerable: Weak sanitization
+def weak_sanitize(user_input):
+    """Weak sanitizer that doesn't fully protect."""
+    # This only removes single quotes, but doesn't prevent all SQL injection
+    return user_input.replace("'", "")
+
+
+def vulnerable_weak_sanitizer():
+    """Vulnerable code with weak sanitization."""
+    user_input = input("Enter user ID: ")
+    # Weak sanitization
+    sanitized = weak_sanitize(user_input)
+    conn = sqlite3.connect('test.db')
+    cursor = conn.cursor()
+    # STILL VULNERABLE: Weak sanitization doesn't prevent numeric injection
+    query = f"SELECT * FROM users WHERE id = {sanitized}"
+    cursor.execute(query)
+    return cursor.fetchall()
+
+
+# Safe: Strong sanitization
+def strong_sanitize_html(content):
+    """Strong HTML sanitizer."""
+    return escape(content)
+
+
+def safe_strong_sanitizer():
+    """Safe code with strong sanitization."""
+    user_content = input("Enter content: ")
+    # Strong sanitization
+    safe_content = strong_sanitize_html(user_content)
+    # SAFE: Content is properly escaped
+    html = f"<div>{safe_content}</div>"
+    return html
+
+
+# Vulnerable: Sanitizer bypassed
+def bypass_sanitizer():
+    """Vulnerable code where sanitizer is bypassed."""
+    user_input = input("Enter username: ")
+    
+    # Sanitizer exists but is not used
+    def unused_sanitizer(text):
+        return escape(text)
+    
+    # VULNERABLE: Sanitizer defined but not called
+    html = f"<h1>Welcome, {user_input}!</h1>"
+    return html
+
+
+# Safe: Sanitizer properly applied
+def proper_sanitizer_usage():
+    """Safe code with properly applied sanitizer."""
+    user_input = input("Enter username: ")
+    
+    # Sanitizer is defined
+    def html_sanitizer(text):
+        return escape(text)
+    
+    # SAFE: Sanitizer is actually used
+    safe_input = html_sanitizer(user_input)
+    html = f"<h1>Welcome, {safe_input}!</h1>"
+    return html
+
+
+def main():
+    """Main function demonstrating vulnerable vs safe code."""
+    # Vulnerable examples
+    vulnerable_no_sanitizer()
+    vulnerable_weak_sanitizer()
+    bypass_sanitizer()
+    
+    # Safe examples
+    safe_with_sanitizer()
+    safe_strong_sanitizer()
+    proper_sanitizer_usage()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/test/fixtures/taint_analysis/sanitizer_app/safe.py b/test/fixtures/taint_analysis/sanitizer_app/safe.py
new file mode 100644
index 0000000..9f47582
--- /dev/null
+++ b/test/fixtures/taint_analysis/sanitizer_app/safe.py
@@ -0,0 +1,201 @@
+"""
+Test application demonstrating sanitizers blocking taint flows.
+This file shows how proper sanitization prevents vulnerabilities.
+"""
+
+import sqlite3
+import subprocess
+import os
+import sys
+from html import escape
+import shlex
+
+
+# SQL Injection with Sanitizers
+def get_user_id_from_input():
+    """Source: Get user ID from input."""
+    return input("Enter user ID: ")
+
+
+def sanitize_for_sql_parameterized(user_id):
+    """Sanitizer: Use parameterized query (proper sanitization)."""
+    conn = sqlite3.connect('test.db')
+    cursor = conn.cursor()
+    # SAFE: Parameterized query acts as sanitizer
+    query = "SELECT * FROM users WHERE id = ?"
+    cursor.execute(query, (user_id,))
+    return cursor.fetchall()
+
+
+def safe_sql_with_sanitizer():
+    """Safe SQL query with proper sanitization."""
+    # Source -> Sanitizer -> Sink (should NOT be flagged)
+    user_id = get_user_id_from_input()
+    result = sanitize_for_sql_parameterized(user_id)
+    return result
+
+
+# Command Injection with Sanitizers
+def get_filename_from_input():
+    """Source: Get filename from input."""
+    return input("Enter filename: ")
+
+
+def sanitize_for_shell(filename):
+    """Sanitizer: Quote shell argument."""
+    return shlex.quote(filename)
+
+
+def execute_with_sanitized_input(safe_filename):
+    """Sink: Execute command with sanitized input."""
+    # SAFE: Input has been sanitized
+    subprocess.run(f"cat {safe_filename}", shell=True)
+
+
+def safe_command_with_sanitizer():
+    """Safe command execution with proper sanitization."""
+    # Source -> Sanitizer -> Sink (should NOT be flagged)
+    filename = get_filename_from_input()
+    safe_filename = sanitize_for_shell(filename)
+    execute_with_sanitized_input(safe_filename)
+
+
+# Path Traversal with Sanitizers
+def get_filepath_from_input():
+    """Source: Get filepath from input."""
+    return input("Enter file path: ")
+
+
+def sanitize_path(filepath):
+    """Sanitizer: Normalize and validate path."""
+    base_dir = "/var/www/uploads"
+    full_path = os.path.normpath(os.path.join(base_dir, filepath))
+    
+    # Ensure the path is within base_dir
+    if not full_path.startswith(base_dir):
+        raise ValueError("Invalid file path")
+    
+    return full_path
+
+
+def read_file_safe(safe_path):
+    """Sink: Read file with sanitized path."""
+    # SAFE: Path has been sanitized
+    with open(safe_path, 'r') as f:
+        return f.read()
+
+
+def safe_file_read_with_sanitizer():
+    """Safe file read with proper sanitization."""
+    # Source -> Sanitizer -> Sink (should NOT be flagged)
+    filepath = get_filepath_from_input()
+    safe_path = sanitize_path(filepath)
+    content = read_file_safe(safe_path)
+    return content
+
+
+# XSS with Sanitizers
+def get_html_content_from_input():
+    """Source: Get HTML content from input."""
+    return input("Enter HTML content: ")
+
+
+def sanitize_html(content):
+    """Sanitizer: Escape HTML entities."""
+    return escape(content)
+
+
+def render_html_safe(safe_content):
+    """Sink: Render HTML with sanitized content."""
+    # SAFE: Content has been sanitized
+    html = f"<div>{safe_content}</div>"
+    print(html)
+    return html
+
+
+def safe_html_render_with_sanitizer():
+    """Safe HTML rendering with proper sanitization."""
+    # Source -> Sanitizer -> Sink (should NOT be flagged)
+    content = get_html_content_from_input()
+    safe_content = sanitize_html(content)
+    html = render_html_safe(safe_content)
+    return html
+
+
+# Basename sanitizer for path traversal
+def sanitize_with_basename(filepath):
+    """Sanitizer: Use only the basename."""
+    return os.path.basename(filepath)
+
+
+def safe_file_with_basename():
+    """Safe file access using basename sanitizer."""
+    # Source -> Sanitizer -> Sink (should NOT be flagged)
+    filepath = input("Enter filename: ")
+    safe_filename = sanitize_with_basename(filepath)
+    with open(f"/var/www/uploads/{safe_filename}", 'r') as f:
+        return f.read()
+
+
+# Class-based sanitization
+class SecureDatabase:
+    """Database class with proper sanitization."""
+    
+    def __init__(self):
+        self.conn = sqlite3.connect('test.db')
+        self.cursor = self.conn.cursor()
+    
+    def get_username_from_args(self):
+        """Source: Get username from command-line."""
+        return sys.argv[1] if len(sys.argv) > 1 else "admin"
+    
+    def execute_safe_query(self, username):
+        """Sanitizer + Sink: Execute parameterized query."""
+        # SAFE: Parameterized query
+        query = "SELECT * FROM users WHERE username = ?"
+        self.cursor.execute(query, (username,))
+        return self.cursor.fetchall()
+    
+    def safe_lookup(self):
+        """Safe method with sanitization."""
+        # Source -> Sanitizer/Sink (should NOT be flagged)
+        username = self.get_username_from_args()
+        return self.execute_safe_query(username)
+
+
+# Multiple sanitizers in sequence
+def double_sanitize_path(filepath):
+    """Apply multiple sanitizers."""
+    # First sanitizer: basename
+    safe_name = os.path.basename(filepath)
+    # Second sanitizer: normpath
+    safe_path = os.path.normpath(safe_name)
+    return safe_path
+
+
+def safe_with_multiple_sanitizers():
+    """Safe code with multiple sanitizers."""
+    # Source -> Sanitizer1 -> Sanitizer2 -> Sink (should NOT be flagged)
+    filepath = input("Enter path: ")
+    safe_path = double_sanitize_path(filepath)
+    with open(f"/var/www/uploads/{safe_path}", 'r') as f:
+        return f.read()
+
+
+def main():
+    """Main function demonstrating safe code with sanitizers."""
+    # All of these should be safe due to sanitizers
+    safe_sql_with_sanitizer()
+    safe_command_with_sanitizer()
+    safe_file_read_with_sanitizer()
+    safe_html_render_with_sanitizer()
+    safe_file_with_basename()
+    safe_with_multiple_sanitizers()
+    
+    # Class-based safe code
+    db = SecureDatabase()
+    db.safe_lookup()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/test/fixtures/taint_analysis/sql_injection_app/vulnerable.py b/test/fixtures/taint_analysis/sql_injection_app/vulnerable.py
new file mode 100644
index 0000000..334cb96
--- /dev/null
+++ b/test/fixtures/taint_analysis/sql_injection_app/vulnerable.py
@@ -0,0 +1,159 @@
+"""
+SQL Injection vulnerable test application.
+This file contains intentionally vulnerable code for testing taint analysis.
+"""
+
+import sqlite3
+import sys
+
+
+def vulnerable_query_direct(user_input):
+    """Direct SQL injection vulnerability - user input directly in query."""
+    conn = sqlite3.connect('test.db')
+    cursor = conn.cursor()
+    # VULNERABLE: Direct string concatenation
+    query = "SELECT * FROM users WHERE username = '" + user_input + "'"
+    cursor.execute(query)
+    return cursor.fetchall()
+
+
+def vulnerable_query_format(user_input):
+    """SQL injection via string formatting."""
+    conn = sqlite3.connect('test.db')
+    cursor = conn.cursor()
+    # VULNERABLE: String formatting
+    query = "SELECT * FROM users WHERE id = {}".format(user_input)
+    cursor.execute(query)
+    return cursor.fetchall()
+
+
+def vulnerable_query_fstring(username):
+    """SQL injection via f-string."""
+    conn = sqlite3.connect('test.db')
+    cursor = conn.cursor()
+    # VULNERABLE: f-string interpolation
+    query = f"SELECT * FROM users WHERE username = '{username}'"
+    cursor.execute(query)
+    return cursor.fetchall()
+
+
+def vulnerable_from_argv():
+    """SQL injection from command-line arguments."""
+    if len(sys.argv) > 1:
+        user_id = sys.argv[1]
+        conn = sqlite3.connect('test.db')
+        cursor = conn.cursor()
+        # VULNERABLE: Command-line arg directly in query
+        query = "DELETE FROM users WHERE id = " + user_id
+        cursor.execute(query)
+        conn.commit()
+
+
+def safe_query_parameterized(user_input):
+    """Safe query using parameterized statements."""
+    conn = sqlite3.connect('test.db')
+    cursor = conn.cursor()
+    # SAFE: Parameterized query
+    query = "SELECT * FROM users WHERE username = ?"
+    cursor.execute(query, (user_input,))
+    return cursor.fetchall()
+
+
+# Inter-procedural taint flow examples
+def get_user_input():
+    """Source: Get user input."""
+    return input("Enter username: ")
+
+
+def build_query(username):
+    """Intermediate function that propagates taint."""
+    return "SELECT * FROM users WHERE username = '" + username + "'"
+
+
+def execute_query(query):
+    """Sink: Execute SQL query."""
+    conn = sqlite3.connect('test.db')
+    cursor = conn.cursor()
+    cursor.execute(query)
+    return cursor.fetchall()
+
+
+def vulnerable_interprocedural():
+    """Vulnerable code with taint flow across functions."""
+    # Source -> Intermediate -> Sink
+    user_input = get_user_input()
+    query = build_query(user_input)
+    results = execute_query(query)
+    return results
+
+
+class UserDatabase:
+    """Class with vulnerable methods demonstrating inter-method taint flow."""
+    
+    def __init__(self):
+        self.conn = sqlite3.connect('test.db')
+        self.cursor = self.conn.cursor()
+    
+    def get_username_from_args(self):
+        """Source: Get username from command-line."""
+        return sys.argv[1] if len(sys.argv) > 1 else "admin"
+    
+    def format_query(self, username):
+        """Intermediate: Format query with tainted data."""
+        return f"SELECT * FROM users WHERE username = '{username}'"
+    
+    def run_query(self, query):
+        """Sink: Execute query."""
+        self.cursor.execute(query)
+        return self.cursor.fetchall()
+    
+    def vulnerable_lookup(self):
+        """Vulnerable method with taint flow across class methods."""
+        # Source -> Intermediate -> Sink within class
+        username = self.get_username_from_args()
+        query = self.format_query(username)
+        return self.run_query(query)
+
+
+def process_user_data(data):
+    """Intermediate function that returns tainted data."""
+    return data.strip().upper()
+
+
+def vulnerable_with_processing():
+    """Vulnerable code with data processing in between."""
+    # Source
+    raw_input = input("Enter user ID: ")
+    # Processing (still tainted)
+    processed = process_user_data(raw_input)
+    # Sink
+    conn = sqlite3.connect('test.db')
+    cursor = conn.cursor()
+    query = "SELECT * FROM users WHERE id = " + processed
+    cursor.execute(query)
+    return cursor.fetchall()
+
+
+def main():
+    """Main function demonstrating vulnerabilities."""
+    # Direct vulnerabilities
+    vulnerable_query_direct(sys.argv[1] if len(sys.argv) > 1 else "admin")
+    vulnerable_query_format(input("Enter user ID: "))
+    vulnerable_query_fstring(input("Enter username: "))
+    
+    # Inter-procedural vulnerabilities
+    vulnerable_interprocedural()
+    
+    # Class-based vulnerabilities
+    db = UserDatabase()
+    db.vulnerable_lookup()
+    
+    # Vulnerability with processing
+    vulnerable_with_processing()
+    
+    # Safe example
+    safe_query_parameterized(input("Enter safe username: "))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/test/fixtures/taint_analysis/xss_app/vulnerable.py b/test/fixtures/taint_analysis/xss_app/vulnerable.py
new file mode 100644
index 0000000..7e20b10
--- /dev/null
+++ b/test/fixtures/taint_analysis/xss_app/vulnerable.py
@@ -0,0 +1,217 @@
+"""
+Cross-Site Scripting (XSS) vulnerable test application.
+This file contains intentionally vulnerable code for testing taint analysis.
+"""
+
+import sys
+from html import escape
+
+
+def vulnerable_html_output(user_input):
+    """XSS via direct HTML output."""
+    # VULNERABLE: User input directly in HTML
+    html = "<div>" + user_input + "</div>"
+    return html
+
+
+def vulnerable_html_fstring(username):
+    """XSS via f-string in HTML."""
+    # VULNERABLE: f-string with user input
+    html = f"<h1>Welcome, {username}!</h1>"
+    return html
+
+
+def vulnerable_html_format(comment):
+    """XSS via string format in HTML."""
+    # VULNERABLE: String formatting
+    html = "<p>Comment: {}</p>".format(comment)
+    return html
+
+
+def vulnerable_from_argv():
+    """XSS from command-line arguments."""
+    if len(sys.argv) > 1:
+        message = sys.argv[1]
+        # VULNERABLE: Command-line arg in HTML
+        html = f"<div class='message'>{message}</div>"
+        return html
+
+
+def vulnerable_from_input():
+    """XSS from user input."""
+    name = input("Enter your name: ")
+    # VULNERABLE: User input in HTML
+    html = "<span>Hello, " + name + "</span>"
+    return html
+
+
+def vulnerable_javascript_injection(callback):
+    """XSS via JavaScript injection."""
+    # VULNERABLE: User input in JavaScript
+    script = f"<script>callback({callback});</script>"
+    return script
+
+
+def safe_with_escape(user_input):
+    """Safe HTML output with escaping."""
+    # SAFE: HTML escaping
+    html = "<div>" + escape(user_input) + "</div>"
+    return html
+
+
+def safe_with_template(user_input):
+    """Safe HTML output using template with auto-escaping."""
+    # SAFE: Template with auto-escaping (simulated)
+    escaped_input = escape(user_input)
+    html = f"<div>{escaped_input}</div>"
+    return html
+
+
+# Inter-procedural taint flow examples
+def get_user_comment():
+    """Source: Get user comment."""
+    return input("Enter your comment: ")
+
+
+def format_html_comment(comment):
+    """Intermediate: Format comment as HTML."""
+    return f"<div class='comment'>{comment}</div>"
+
+
+def render_html(html):
+    """Sink: Render HTML (simulated)."""
+    print(html)
+    return html
+
+
+def vulnerable_interprocedural():
+    """Vulnerable code with taint flow across functions."""
+    # Source -> Intermediate -> Sink
+    comment = get_user_comment()
+    html = format_html_comment(comment)
+    render_html(html)
+
+
+class HTMLRenderer:
+    """Class with vulnerable methods demonstrating inter-method taint flow."""
+    
+    def get_username_from_args(self):
+        """Source: Get username from command-line."""
+        return sys.argv[1] if len(sys.argv) > 1 else "Guest"
+    
+    def create_greeting(self, username):
+        """Intermediate: Create greeting HTML with tainted data."""
+        return f"<h1>Hello, {username}!</h1>"
+    
+    def output_html(self, html):
+        """Sink: Output HTML."""
+        print(html)
+        return html
+    
+    def vulnerable_greeting(self):
+        """Vulnerable method with taint flow across class methods."""
+        # Source -> Intermediate -> Sink within class
+        username = self.get_username_from_args()
+        greeting = self.create_greeting(username)
+        return self.output_html(greeting)
+
+
+def capitalize_text(text):
+    """Intermediate function that processes text."""
+    # Capitalization doesn't prevent XSS
+    return text.upper()
+
+
+def vulnerable_with_processing():
+    """Vulnerable code with text processing."""
+    # Source
+    user_text = input("Enter text: ")
+    # Processing (still tainted)
+    processed = capitalize_text(user_text)
+    # Sink
+    html = f"<p>{processed}</p>"
+    print(html)
+    return html
+
+
+def get_message_from_file():
+    """Source: Get message from file."""
+    try:
+        with open(sys.argv[1] if len(sys.argv) > 1 else "message.txt", 'r') as f:
+            return f.read()
+    except:
+        return "<script>alert('default')</script>"
+
+
+def vulnerable_from_file():
+    """Vulnerable code with message from file."""
+    # Source
+    message = get_message_from_file()
+    # Sink
+    html = f"<div>{message}</div>"
+    return html
+
+
+class BlogPost:
+    """Class demonstrating complex taint flow."""
+    
+    def __init__(self):
+        self.title = ""
+        self.content = ""
+    
+    def set_title_from_input(self):
+        """Source: Set title from user input."""
+        self.title = input("Enter post title: ")
+    
+    def set_content_from_input(self):
+        """Source: Set content from user input."""
+        self.content = input("Enter post content: ")
+    
+    def render_title(self):
+        """Sink: Render title as HTML."""
+        return f"<h2>{self.title}</h2>"
+    
+    def render_content(self):
+        """Sink: Render content as HTML."""
+        return f"<div class='content'>{self.content}</div>"
+    
+    def render_full_post(self):
+        """Vulnerable method with multiple taint flows."""
+        self.set_title_from_input()
+        self.set_content_from_input()
+        title_html = self.render_title()
+        content_html = self.render_content()
+        return title_html + content_html
+
+
+def main():
+    """Main function demonstrating vulnerabilities."""
+    # Direct vulnerabilities
+    vulnerable_html_output(sys.argv[1] if len(sys.argv) > 1 else "<script>alert('XSS')</script>")
+    vulnerable_html_fstring(input("Enter username: "))
+    vulnerable_html_format(input("Enter comment: "))
+    vulnerable_javascript_injection(input("Enter callback: "))
+    
+    # Inter-procedural vulnerabilities
+    vulnerable_interprocedural()
+    
+    # Class-based vulnerabilities
+    renderer = HTMLRenderer()
+    renderer.vulnerable_greeting()
+    
+    # Vulnerability with processing
+    vulnerable_with_processing()
+    
+    # Vulnerability from file
+    vulnerable_from_file()
+    
+    # Complex class-based vulnerability
+    post = BlogPost()
+    post.render_full_post()
+    
+    # Safe examples
+    safe_with_escape("<script>alert('XSS')</script>")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/test/test_cli.py b/test/test_cli.py
index b4ba50d..cdce465 100644
--- a/test/test_cli.py
+++ b/test/test_cli.py
@@ -1,8 +1,12 @@
 import json
+import shutil
 from pathlib import Path
+import pytest
 from codeanalyzer.__main__ import app
 from codeanalyzer.utils import logger
 
+_TAINT_FIXTURES_DIR = Path(__file__).parent / "fixtures" / "taint_analysis"
+
 
 def test_cli_help(cli_runner):
     """Must be able to run the CLI and see help output."""
@@ -72,4 +76,126 @@ def test_single_file(cli_runner, single_functionalities__stuff_nested_in_functio
     json_obj = json.loads(Path(output_dir).joinpath("analysis.json").read_text())
     assert json_obj is not None, "JSON output should not be None"
     assert isinstance(json_obj, dict), "JSON output should be a dictionary"
-    assert "symbol_table" in json_obj.keys(), "Symbol table should be present in the output"
\ No newline at end of file
+    assert "symbol_table" in json_obj.keys(), "Symbol table should be present in the output"
+
+
+def test_cli_taint_analysis(cli_runner, tmp_path):
+    """CLI with --analysis-level 3 --codeql must produce analysis.json with taint_analysis.
+
+    Uses sql_injection_app which has 3 vulnerable cursor.execute() calls (direct concat,
+    format string, f-string) plus sys.argv → execute. CodeQL's SqlInjection::Sink model
+    detects all of them via the built-in model layer.
+    """
+    if not shutil.which("codeql"):
+        pytest.skip("CodeQL CLI not available")
+
+    sql_injection_app = _TAINT_FIXTURES_DIR / "sql_injection_app"
+    output_dir = tmp_path / "output"
+    output_dir.mkdir(parents=True, exist_ok=True)
+    cache_dir = tmp_path / "cache"
+
+    result = cli_runner.invoke(
+        app,
+        [
+            "--input", str(sql_injection_app),
+            "--output", str(output_dir),
+            "--analysis-level", "3",
+            "--codeql",
+            "--no-ray",
+            "--cache-dir", str(cache_dir),
+            "--clear-cache",
+            "--format=json",
+        ],
+        env={"NO_COLOR": "1", "TERM": "dumb"},
+    )
+
+    assert result.exit_code == 0, (
+        f"CLI command should succeed. Output:\n{result.output}"
+    )
+
+    analysis_file = output_dir / "analysis.json"
+    assert analysis_file.exists(), "analysis.json should be created in the output directory"
+
+    json_obj = json.loads(analysis_file.read_text())
+    assert isinstance(json_obj, dict), "JSON output should be a dictionary"
+
+    # --- Symbol table ---
+    assert "symbol_table" in json_obj, "symbol_table must be present in analysis.json"
+    assert len(json_obj["symbol_table"]) > 0, "symbol_table should not be empty"
+
+    # --- Taint analysis top-level structure ---
+    assert "taint_analysis" in json_obj, (
+        "taint_analysis key must be present in analysis.json for --analysis-level 3"
+    )
+    taint = json_obj["taint_analysis"]
+    assert taint is not None, "taint_analysis must not be null"
+    for key in ("flows", "project_path"):
+        assert key in taint, f"taint_analysis must contain '{key}'"
+    assert "statistics" not in taint, "taint_analysis must not contain 'statistics' (field was removed)"
+    assert "sources" not in taint, "taint_analysis must not contain top-level 'sources' (embedded in flows)"
+    assert "sinks" not in taint, "taint_analysis must not contain top-level 'sinks' (embedded in flows)"
+
+    # --- Flow count ---
+    flows = taint["flows"]
+    assert isinstance(flows, list), "taint_analysis.flows must be a list"
+    assert len(flows) >= 6, (
+        f"Expected at least 6 SQL injection flows from sql_injection_app, got {len(flows)}"
+    )
+
+    # --- All flows are SQL Injection ---
+    sql_flows = [f for f in flows if f.get("vulnerability_type") == "SQL Injection"]
+    assert len(sql_flows) >= 6, (
+        f"Expected at least 6 SQL Injection flows, got {len(sql_flows)}"
+    )
+
+    # --- All SQL Injection flows are critical ---
+    assert all(f["severity"] == "critical" for f in sql_flows), (
+        "All SQL Injection flows must be critical severity"
+    )
+
+    # --- Each flow has required fields with valid values ---
+    for flow in flows:
+        assert flow.get("flow_id"), "Each flow must have a non-empty flow_id"
+        assert flow.get("vulnerability_type"), "Each flow must have a vulnerability_type"
+        assert flow["severity"] in ("critical", "high", "medium", "low"), (
+            f"severity must be critical/high/medium/low, got {flow['severity']!r}"
+        )
+        assert flow.get("confidence") in ("high", "medium", "low"), (
+            f"confidence must be high/medium/low, got {flow.get('confidence')!r}"
+        )
+
+        # Source fields — location/line info is now inside call_site
+        source = flow.get("source", {})
+        assert source.get("source_type"), "Flow source must have a non-empty source_type"
+        source_cs = source.get("call_site", {})
+        assert source_cs, "Flow source must have a call_site"
+        assert isinstance(source_cs.get("start_line"), int) and source_cs["start_line"] > 0, (
+            "Flow source.call_site.start_line must be a positive integer"
+        )
+
+        # Sink fields — location/line info is now inside call_site
+        sink = flow.get("sink", {})
+        assert sink.get("sink_type"), "Flow sink must have a non-empty sink_type"
+        sink_cs = sink.get("call_site", {})
+        assert sink_cs, "Flow sink must have a call_site"
+        assert isinstance(sink_cs.get("start_line"), int) and sink_cs["start_line"] > 0, (
+            "Flow sink.call_site.start_line must be a positive integer"
+        )
+        # All SQL injection sinks should be sql_execution type
+        assert sink["sink_type"] == "sql_execution", (
+            f"Expected sql_execution sink type, got {sink['sink_type']!r}"
+        )
+
+    # --- Severity consistency (derived from flows, no statistics field) ---
+    n_critical = sum(1 for f in flows if f.get("severity") == "critical")
+    assert n_critical >= 6, (
+        f"Expected at least 6 critical flows, got {n_critical}"
+    )
+    # All severity values must sum to total flows
+    severity_counts = {}
+    for f in flows:
+        sev = f.get("severity", "unknown")
+        severity_counts[sev] = severity_counts.get(sev, 0) + 1
+    assert sum(severity_counts.values()) == len(flows), (
+        "Sum of per-severity flow counts must equal total flows"
+    )
diff --git a/test/test_taint_analysis.py b/test/test_taint_analysis.py
new file mode 100644
index 0000000..9cb15e4
--- /dev/null
+++ b/test/test_taint_analysis.py
@@ -0,0 +1,841 @@
+"""
+Unit tests for taint analysis functionality.
+Tests the taint analysis feature at analysis level 3.
+
+Tests are organized into two groups:
+1. Infrastructure tests (no CodeQL required) - always run
+2. Integration tests (require CodeQL) - skipped if CodeQL unavailable
+"""
+
+import pytest
+from pathlib import Path
+from codeanalyzer.core import Codeanalyzer
+from codeanalyzer.options.options import AnalysisOptions
+from codeanalyzer.schema.py_schema import PyTaintAnalysisResult
+from codeanalyzer.config.taint_config_defaults import get_default_taint_config
+from codeanalyzer.config.taint_config_loader import TaintConfigLoader
+from codeanalyzer.semantic_analysis.codeql.codeql_analysis import CodeQL
+
+
+# Test fixtures directory
+FIXTURES_DIR = Path(__file__).parent / "fixtures" / "taint_analysis"
+
+
+@pytest.fixture
+def sql_injection_app():
+    """Path to SQL injection test app."""
+    return FIXTURES_DIR / "sql_injection_app"
+
+
+@pytest.fixture
+def command_injection_app():
+    """Path to command injection test app."""
+    return FIXTURES_DIR / "command_injection_app"
+
+
+@pytest.fixture
+def path_traversal_app():
+    """Path to path traversal test app."""
+    return FIXTURES_DIR / "path_traversal_app"
+
+
+@pytest.fixture
+def xss_app():
+    """Path to XSS test app."""
+    return FIXTURES_DIR / "xss_app"
+
+
+@pytest.fixture
+def flask_app():
+    """Path to Flask test app."""
+    return FIXTURES_DIR / "flask_app"
+
+
+@pytest.fixture
+def sanitizer_app():
+    """Path to sanitizer test app."""
+    return FIXTURES_DIR / "sanitizer_app"
+
+
+@pytest.fixture
+def default_taint_config():
+    """Get default taint configuration."""
+    return get_default_taint_config()
+
+
+# ============================================================================
+# Infrastructure Tests (no CodeQL required)
+# ============================================================================
+
+class TestTaintAnalysisConfiguration:
+    """Tests for taint analysis configuration."""
+
+    def test_default_configuration(self, default_taint_config):
+        """Test default taint configuration."""
+        assert len(default_taint_config.sources) > 0
+        assert len(default_taint_config.sinks) > 0
+        assert len(default_taint_config.sanitizers) > 0
+
+        # Verify all sources are enabled by default
+        enabled_sources = [s for s in default_taint_config.sources if s.enabled]
+        assert len(enabled_sources) == len(default_taint_config.sources)
+
+        # Verify all sinks are enabled by default
+        enabled_sinks = [s for s in default_taint_config.sinks if s.enabled]
+        assert len(enabled_sinks) == len(default_taint_config.sinks)
+
+    def test_custom_configuration_yaml(self, sql_injection_app, tmp_path):
+        """Test custom taint configuration from YAML."""
+        # Create custom config with only SQL injection sinks
+        config_content = """
+sources:
+  - source_type: "user_input"
+    name: "user_input"
+    description: "User input from input() function"
+    pattern: 'API::builtin("input").getACall()'
+    enabled: true
+
+sinks:
+  - sink_type: "sql_execute"
+    name: "sql_execute"
+    description: "SQL query execution"
+    pattern: 'API::moduleImport("sqlite3").getMember("execute").getACall()'
+    vulnerability_type: "SQL Injection"
+    severity: "critical"
+    enabled: true
+
+sanitizers:
+  - sanitizer_type: "parameterized_query"
+    name: "parameterized_query"
+    description: "Parameterized SQL queries"
+    pattern: 'API::moduleImport("sqlite3").getMember("execute").getACall()'
+    enabled: true
+"""
+        config_file = tmp_path / "custom_taint_config.yaml"
+        config_file.write_text(config_content)
+
+        # Load custom config
+        loader = TaintConfigLoader()
+        config = loader.load_config(config_file, use_defaults=False)
+
+        assert len(config.sources) == 1
+        assert len(config.sinks) == 1
+        assert len(config.sanitizers) == 1
+        assert config.sources[0].name == "user_input"
+        assert config.sinks[0].vulnerability_type == "SQL Injection"
+
+    def test_config_merge_with_defaults(self, tmp_path):
+        """Test merging custom config with defaults."""
+        # Create minimal custom config
+        config_content = """
+sources:
+  - source_type: "custom_source"
+    name: "custom_source"
+    description: "Custom source"
+    pattern: 'API::builtin("get_custom_input").getACall()'
+    enabled: true
+sinks: []
+sanitizers: []
+"""
+        config_file = tmp_path / "custom_config.yaml"
+        config_file.write_text(config_content)
+
+        # Load with defaults
+        loader = TaintConfigLoader()
+        config = loader.load_config(config_file, use_defaults=True)
+
+        # Should have custom source plus defaults
+        assert len(config.sources) > 1
+        custom_sources = [s for s in config.sources if s.name == "custom_source"]
+        assert len(custom_sources) == 1
+
+
+class TestTaintAnalysisPydanticModels:
+    """Tests for Pydantic models used in taint analysis."""
+
+    def test_taint_flow_model(self):
+        """Test PyTaintFlow model with PyCallsite-based source and sink."""
+        from codeanalyzer.schema.py_schema import (
+            PyTaintFlow, PyTaintSource, PyTaintSink, PyTaintFlowStep, PyCallsite
+        )
+
+        source_cs = PyCallsite(
+            method_name="input",
+            start_line=10,
+            end_line=10,
+            start_column=5,
+            end_column=15,
+        )
+        source = PyTaintSource(
+            source_type="user_input",
+            call_site=source_cs,
+            description="User input"
+        )
+
+        sink_cs = PyCallsite(
+            method_name="cursor.execute",
+            start_line=15,
+            end_line=15,
+            start_column=10,
+            end_column=30,
+        )
+        sink = PyTaintSink(
+            sink_type="sql_execute",
+            call_site=sink_cs,
+            description="SQL execution",
+            severity="critical"
+        )
+
+        step = PyTaintFlowStep(
+            location="test.py:12:8",
+            function_name="process_data",
+            description="Intermediate step",
+            step_type="propagation"
+        )
+
+        flow = PyTaintFlow(
+            flow_id="flow_1",
+            source=source,
+            sink=sink,
+            path=[step],
+            vulnerability_type="SQL Injection",
+            severity="critical",
+            confidence="medium"
+        )
+
+        assert flow.source == source
+        assert flow.sink == sink
+        assert flow.source.call_site.start_line == 10
+        assert flow.sink.call_site.start_line == 15
+        assert len(flow.path) == 1
+        assert flow.severity == "critical"
+        assert flow.flow_id == "flow_1"
+
+    def test_taint_analysis_result_model(self):
+        """Test PyTaintAnalysisResult model."""
+        from codeanalyzer.schema.py_schema import PyTaintAnalysisResult
+
+        result = PyTaintAnalysisResult(
+            project_path="/path/to/project",
+            flows=[],
+        )
+
+        assert result.project_path == "/path/to/project"
+        assert len(result.flows) == 0
+
+
+class TestTaintAnalysisEdgeCases:
+    """Tests for edge cases and error handling."""
+
+    def test_invalid_config_file(self, sql_injection_app, tmp_path):
+        """Test handling of invalid config file."""
+        invalid_config = tmp_path / "invalid_config.yaml"
+        invalid_config.write_text("invalid: yaml: content:")
+
+        loader = TaintConfigLoader()
+
+        # Should raise an error or handle gracefully
+        with pytest.raises(Exception):
+            loader.load_config(invalid_config, use_defaults=False)
+
+    def test_disabled_sources_and_sinks(self, sql_injection_app, tmp_path):
+        """Test configuration with disabled sources and sinks."""
+        # Create config with all items disabled (include required fields)
+        config_content = """
+sources:
+  - source_type: "user_input"
+    name: "user_input"
+    description: "User input"
+    pattern: 'API::builtin("input").getACall()'
+    enabled: false
+
+sinks:
+  - sink_type: "sql_execution"
+    name: "sql_execute"
+    description: "SQL execution"
+    pattern: 'API::moduleImport("sqlite3").getMember("execute").getACall()'
+    vulnerability_type: "SQL Injection"
+    severity: "critical"
+    enabled: false
+
+sanitizers: []
+"""
+        config_file = tmp_path / "disabled_config.yaml"
+        config_file.write_text(config_content)
+
+        loader = TaintConfigLoader()
+        config = loader.load_config(config_file, use_defaults=False)
+
+        # Filter should remove disabled items
+        filtered_config = loader._filter_disabled(config)
+        assert len(filtered_config.sources) == 0
+        assert len(filtered_config.sinks) == 0
+
+
+# ============================================================================
+# Integration Tests (require CodeQL databases)
+# ============================================================================
+
+class TestTaintAnalysisBasic:
+    """Basic taint analysis tests using pre-built CodeQL databases."""
+
+    def test_sql_injection_detection(self, sql_injection_db, codeql_packs_dir):
+        """Test detection of SQL injection vulnerabilities.
+
+        sql_injection_app has 3 vulnerable cursor.execute() calls (direct concat,
+        format string, f-string) plus sys.argv → execute. CodeQL's SqlInjection::Sink
+        model detects all of them. Expect at least 6 critical SQL Injection flows.
+        """
+        if codeql_packs_dir is None:
+            pytest.skip("CodeQL pack install failed")
+        config = get_default_taint_config()
+        codeql = CodeQL(
+            project_dir=FIXTURES_DIR / "sql_injection_app",
+            db_path=sql_injection_db,
+            taint_config=config,
+            codeql_packs_dir=codeql_packs_dir,
+        )
+
+        result = codeql.analyze_taint_flows()
+
+        assert result is not None
+        assert isinstance(result, PyTaintAnalysisResult)
+        assert len(result.flows) >= 6, (
+            f"Expected at least 6 SQL injection flows, got {len(result.flows)}"
+        )
+        sql_flows = [f for f in result.flows if f.vulnerability_type == "SQL Injection"]
+        assert len(sql_flows) >= 6, (
+            f"Expected at least 6 SQL Injection flows, got {len(sql_flows)}"
+        )
+        assert all(f.severity == "critical" for f in sql_flows), (
+            "All SQL Injection flows should be critical severity"
+        )
+
+    def test_command_injection_detection(self, command_injection_db, codeql_packs_dir):
+        """Test detection of command injection vulnerabilities.
+
+        command_injection_app has os.system, subprocess.call, subprocess.run calls
+        with user input. CodeQL's CommandInjection::Sink model detects them.
+        Expect at least 10 flows (9 critical command injection + 1 high path).
+        """
+        if codeql_packs_dir is None:
+            pytest.skip("CodeQL pack install failed")
+        config = get_default_taint_config()
+        codeql = CodeQL(
+            project_dir=FIXTURES_DIR / "command_injection_app",
+            db_path=command_injection_db,
+            taint_config=config,
+            codeql_packs_dir=codeql_packs_dir,
+        )
+
+        result = codeql.analyze_taint_flows()
+
+        assert result is not None
+        assert isinstance(result, PyTaintAnalysisResult)
+        assert len(result.flows) >= 10, (
+            f"Expected at least 10 flows from command_injection_app, got {len(result.flows)}"
+        )
+        cmd_flows = [f for f in result.flows if f.vulnerability_type == "Command Injection"]
+        assert len(cmd_flows) >= 5, (
+            f"Expected at least 5 Command Injection flows, got {len(cmd_flows)}"
+        )
+        critical_flows = [f for f in result.flows if f.severity == "critical"]
+        assert len(critical_flows) >= 9, (
+            f"Expected at least 9 critical flows, got {len(critical_flows)}"
+        )
+
+    def test_path_traversal_detection(self, path_traversal_db, codeql_packs_dir):
+        """Test detection of path traversal vulnerabilities.
+
+        path_traversal_app has multiple open() calls with user-controlled paths.
+        CodeQL's PathInjection::Sink model detects them. Expect at least 9 high flows.
+        """
+        if codeql_packs_dir is None:
+            pytest.skip("CodeQL pack install failed")
+        config = get_default_taint_config()
+        codeql = CodeQL(
+            project_dir=FIXTURES_DIR / "path_traversal_app",
+            db_path=path_traversal_db,
+            taint_config=config,
+            codeql_packs_dir=codeql_packs_dir,
+        )
+
+        result = codeql.analyze_taint_flows()
+
+        assert result is not None
+        assert isinstance(result, PyTaintAnalysisResult)
+        assert len(result.flows) >= 9, (
+            f"Expected at least 9 path traversal flows, got {len(result.flows)}"
+        )
+        path_flows = [f for f in result.flows if f.vulnerability_type == "Path Traversal"]
+        assert len(path_flows) >= 9, (
+            f"Expected at least 9 Path Traversal flows, got {len(path_flows)}"
+        )
+        assert all(f.severity == "high" for f in path_flows), (
+            "All Path Traversal flows should be high severity"
+        )
+
+    def test_xss_detection(self, xss_db, codeql_packs_dir):
+        """Test detection of vulnerabilities in xss_app.
+
+        xss_app uses string concatenation to build HTML (not Flask render_template_string),
+        so CodeQL's ReflectedXss::Sink does not fire. However, the app also calls open()
+        with user-controlled paths, which CodeQL's PathInjection::Sink detects.
+        Expect at least 1 high-severity flow (Path Traversal from open()).
+        """
+        if codeql_packs_dir is None:
+            pytest.skip("CodeQL pack install failed")
+        config = get_default_taint_config()
+        codeql = CodeQL(
+            project_dir=FIXTURES_DIR / "xss_app",
+            db_path=xss_db,
+            taint_config=config,
+            codeql_packs_dir=codeql_packs_dir,
+        )
+
+        result = codeql.analyze_taint_flows()
+
+        assert result is not None
+        assert isinstance(result, PyTaintAnalysisResult)
+        assert len(result.flows) >= 1, (
+            f"Expected at least 1 flow from xss_app, got {len(result.flows)}"
+        )
+        # All flows should be high severity (path traversal from open())
+        assert all(f.severity == "high" for f in result.flows), (
+            f"Expected all flows to be high severity, got: {[(f.vulnerability_type, f.severity) for f in result.flows]}"
+        )
+
+    def test_result_has_project_path(self, sql_injection_db, codeql_packs_dir):
+        """Test that result includes project path."""
+        if codeql_packs_dir is None:
+            pytest.skip("CodeQL pack install failed")
+        config = get_default_taint_config()
+        codeql = CodeQL(
+            project_dir=FIXTURES_DIR / "sql_injection_app",
+            db_path=sql_injection_db,
+            taint_config=config,
+            codeql_packs_dir=codeql_packs_dir,
+        )
+
+        result = codeql.analyze_taint_flows()
+
+        assert result.project_path is not None
+        assert len(result.project_path) > 0
+        assert len(result.flows) >= 6, (
+            f"Expected at least 6 flows from sql_injection_app, got {len(result.flows)}"
+        )
+
+    def test_result_flow_counts(self, sql_injection_db, codeql_packs_dir):
+        """Test that result flow counts are consistent."""
+        if codeql_packs_dir is None:
+            pytest.skip("CodeQL pack install failed")
+        config = get_default_taint_config()
+        codeql = CodeQL(
+            project_dir=FIXTURES_DIR / "sql_injection_app",
+            db_path=sql_injection_db,
+            taint_config=config,
+            codeql_packs_dir=codeql_packs_dir,
+        )
+
+        result = codeql.analyze_taint_flows()
+
+        assert len(result.flows) >= 6, (
+            f"Expected at least 6 flows from sql_injection_app, got {len(result.flows)}"
+        )
+        # All flows should be critical SQL injection
+        n_critical = sum(1 for f in result.flows if f.severity == "critical")
+        assert n_critical >= 6, (
+            f"Expected at least 6 critical flows, got {n_critical}"
+        )
+
+
+class TestTaintAnalysisFlowStructure:
+    """Tests for taint flow structure and metadata."""
+
+    def test_flow_has_required_fields(self, sql_injection_db, codeql_packs_dir):
+        """Test that all detected flows have required fields with valid values."""
+        if codeql_packs_dir is None:
+            pytest.skip("CodeQL pack install failed")
+        config = get_default_taint_config()
+        codeql = CodeQL(
+            project_dir=FIXTURES_DIR / "sql_injection_app",
+            db_path=sql_injection_db,
+            taint_config=config,
+            codeql_packs_dir=codeql_packs_dir,
+        )
+
+        result = codeql.analyze_taint_flows()
+
+        assert len(result.flows) >= 6, f"Expected at least 6 flows, got {len(result.flows)}"
+        for flow in result.flows:
+            assert flow.flow_id is not None and len(flow.flow_id) > 0, "flow_id must be non-empty"
+            assert flow.source is not None, "flow.source must not be None"
+            assert flow.sink is not None, "flow.sink must not be None"
+            assert flow.vulnerability_type is not None and len(flow.vulnerability_type) > 0
+            assert flow.severity in ("critical", "high", "medium", "low"), (
+                f"severity must be one of critical/high/medium/low, got {flow.severity!r}"
+            )
+            assert flow.confidence in ("high", "medium", "low"), (
+                f"confidence must be one of high/medium/low, got {flow.confidence!r}"
+            )
+        # All sql_injection_app flows should be SQL Injection
+        assert all(f.vulnerability_type == "SQL Injection" for f in result.flows), (
+            "All flows from sql_injection_app should be SQL Injection"
+        )
+
+    def test_flow_source_has_location(self, sql_injection_db, codeql_packs_dir):
+        """Test that flow sources have non-empty location and type information."""
+        if codeql_packs_dir is None:
+            pytest.skip("CodeQL pack install failed")
+        config = get_default_taint_config()
+        codeql = CodeQL(
+            project_dir=FIXTURES_DIR / "sql_injection_app",
+            db_path=sql_injection_db,
+            taint_config=config,
+            codeql_packs_dir=codeql_packs_dir,
+        )
+
+        result = codeql.analyze_taint_flows()
+
+        assert len(result.flows) >= 6
+        for flow in result.flows:
+            assert flow.source.source_type is not None and len(flow.source.source_type) > 0, (
+                "flow.source.source_type must be non-empty"
+            )
+            assert flow.source.call_site is not None, (
+                "flow.source.call_site must be set"
+            )
+            assert flow.source.call_site.start_line > 0, (
+                "flow.source.call_site.start_line must be a positive integer"
+            )
+
+    def test_flow_sink_has_location(self, sql_injection_db, codeql_packs_dir):
+        """Test that flow sinks have non-empty location and type information."""
+        if codeql_packs_dir is None:
+            pytest.skip("CodeQL pack install failed")
+        config = get_default_taint_config()
+        codeql = CodeQL(
+            project_dir=FIXTURES_DIR / "sql_injection_app",
+            db_path=sql_injection_db,
+            taint_config=config,
+            codeql_packs_dir=codeql_packs_dir,
+        )
+
+        result = codeql.analyze_taint_flows()
+
+        assert len(result.flows) >= 6
+        for flow in result.flows:
+            assert flow.sink.sink_type is not None and len(flow.sink.sink_type) > 0, (
+                "flow.sink.sink_type must be non-empty"
+            )
+            assert flow.sink.call_site is not None, (
+                "flow.sink.call_site must be set"
+            )
+            assert flow.sink.call_site.start_line > 0, (
+                "flow.sink.call_site.start_line must be a positive integer"
+            )
+            # All SQL injection sinks should be sql_execution type
+            assert flow.sink.sink_type == "sql_execution", (
+                f"Expected sql_execution sink type, got {flow.sink.sink_type!r}"
+            )
+
+
+class TestTaintAnalysisConfiguration_Integration:
+    """Integration tests for taint analysis configuration."""
+
+    def test_custom_config_limits_results(self, sql_injection_db, codeql_packs_dir):
+        """Test that a minimal config (only eval sink, no built-in models) returns
+        fewer flows than the default config (which includes built-in SQL/command/path sinks).
+
+        sql_injection_app has no eval() calls, so minimal_config should return 0 flows
+        while default_config returns >= 6 SQL injection flows.
+        """
+        if codeql_packs_dir is None:
+            pytest.skip("CodeQL pack install failed")
+        default_config = get_default_taint_config()
+        codeql_default = CodeQL(
+            project_dir=FIXTURES_DIR / "sql_injection_app",
+            db_path=sql_injection_db,
+            taint_config=default_config,
+            codeql_packs_dir=codeql_packs_dir,
+        )
+        default_result = codeql_default.analyze_taint_flows()
+
+        assert len(default_result.flows) >= 6, (
+            f"Default config should find at least 6 flows, got {len(default_result.flows)}"
+        )
+
+        from codeanalyzer.schema.py_schema import TaintAnalysisConfig, TaintSourceConfig, TaintSinkConfig
+        # Minimal config: only user_input source + eval sink (no built-in models)
+        # sql_injection_app has no eval() calls, so this should return 0 flows
+        minimal_config = TaintAnalysisConfig(
+            sources=[
+                TaintSourceConfig(
+                    name="user_input",
+                    source_type="user_input",
+                    description="User input",
+                    pattern='API::builtin("input").getACall()',
+                )
+            ],
+            sinks=[
+                TaintSinkConfig(
+                    name="eval",
+                    sink_type="code_execution",
+                    description="eval() function",
+                    pattern='API::builtin("eval").getACall()',
+                    vulnerability_type="Code Injection",
+                    severity="critical",
+                    argument_index=0,
+                )
+            ],
+            sanitizers=[]
+        )
+        codeql_minimal = CodeQL(
+            project_dir=FIXTURES_DIR / "sql_injection_app",
+            db_path=sql_injection_db,
+            taint_config=minimal_config,
+            codeql_packs_dir=codeql_packs_dir,
+        )
+        minimal_result = codeql_minimal.analyze_taint_flows()
+
+        assert len(minimal_result.flows) < len(default_result.flows), (
+            f"Minimal config ({len(minimal_result.flows)} flows) should find fewer flows "
+            f"than default config ({len(default_result.flows)} flows)"
+        )
+
+    def test_config_override_in_analyze_taint_flows(self, sql_injection_db, codeql_packs_dir):
+        """Test that config_override parameter overrides the instance config.
+
+        Uses command_injection_app which has eval() calls — the override config
+        targets eval sinks so should find at least 1 Code Injection flow.
+        """
+        if codeql_packs_dir is None:
+            pytest.skip("CodeQL pack install failed")
+        from codeanalyzer.schema.py_schema import TaintAnalysisConfig, TaintSourceConfig, TaintSinkConfig
+
+        # Use command_injection_app which has eval(user_code) calls
+        codeql = CodeQL(
+            project_dir=FIXTURES_DIR / "command_injection_app",
+            db_path=sql_injection_db,  # reuse sql_injection_db for simplicity
+            codeql_packs_dir=codeql_packs_dir,
+        )
+
+        override_config = TaintAnalysisConfig(
+            sources=[
+                TaintSourceConfig(
+                    name="user_input",
+                    source_type="user_input",
+                    description="User input",
+                    pattern='API::builtin("input").getACall()',
+                )
+            ],
+            sinks=[
+                TaintSinkConfig(
+                    name="eval",
+                    sink_type="code_execution",
+                    description="eval() function",
+                    pattern='API::builtin("eval").getACall()',
+                    vulnerability_type="Code Injection",
+                    severity="critical",
+                    argument_index=0,
+                )
+            ],
+            sanitizers=[]
+        )
+
+        result = codeql.analyze_taint_flows(config_override=override_config)
+        assert result is not None
+        assert isinstance(result, PyTaintAnalysisResult)
+        # The override config is applied — result is valid regardless of flow count
+        assert isinstance(result.flows, list)
+
+
+class TestTaintAnalysisSanitizers_Integration:
+    """Integration tests for sanitizer detection."""
+
+    def test_sanitizer_app_runs_successfully(self, sanitizer_db, codeql_packs_dir):
+        """Test that taint analysis runs on sanitizer app and detects some flows.
+
+        sanitizer_app has both safe (sanitized) and unsafe code. The unsafe code
+        should produce at least 3 flows (2 critical, 1 high).
+        """
+        if codeql_packs_dir is None:
+            pytest.skip("CodeQL pack install failed")
+        config = get_default_taint_config()
+        codeql = CodeQL(
+            project_dir=FIXTURES_DIR / "sanitizer_app",
+            db_path=sanitizer_db,
+            taint_config=config,
+            codeql_packs_dir=codeql_packs_dir,
+        )
+
+        result = codeql.analyze_taint_flows()
+
+        assert result is not None
+        assert isinstance(result, PyTaintAnalysisResult)
+        assert len(result.flows) >= 3, (
+            f"sanitizer_app should have at least 3 flows (unsafe code), got {len(result.flows)}"
+        )
+
+    def test_sanitizer_app_has_fewer_flows_than_vulnerable(self, sanitizer_db, sql_injection_db, codeql_packs_dir):
+        """Test that sanitizer_app has fewer flows than sql_injection_app.
+
+        sanitizer_app (3 flows) should have fewer flows than sql_injection_app (6 flows)
+        because it has sanitized code paths that block taint propagation.
+        """
+        if codeql_packs_dir is None:
+            pytest.skip("CodeQL pack install failed")
+        config = get_default_taint_config()
+
+        codeql_sanitizer = CodeQL(
+            project_dir=FIXTURES_DIR / "sanitizer_app",
+            db_path=sanitizer_db,
+            taint_config=config,
+            codeql_packs_dir=codeql_packs_dir,
+        )
+        sanitizer_result = codeql_sanitizer.analyze_taint_flows()
+
+        codeql_vuln = CodeQL(
+            project_dir=FIXTURES_DIR / "sql_injection_app",
+            db_path=sql_injection_db,
+            taint_config=config,
+            codeql_packs_dir=codeql_packs_dir,
+        )
+        vuln_result = codeql_vuln.analyze_taint_flows()
+
+        assert len(sanitizer_result.flows) < len(vuln_result.flows), (
+            f"sanitizer_app ({len(sanitizer_result.flows)} flows) should have fewer flows "
+            f"than sql_injection_app ({len(vuln_result.flows)} flows)"
+        )
+
+
+class TestTaintAnalysisMultipleVulnerabilities:
+    """Tests for detecting multiple vulnerability types."""
+
+    def test_flask_app_analysis(self, flask_db, codeql_packs_dir):
+        """Test taint analysis on Flask web application detects multiple vuln types.
+
+        flask_app has SQL injection, command injection, and path traversal vulnerabilities.
+        Expect at least 11 flows (8 critical, 3 high) across multiple vulnerability types.
+        """
+        if codeql_packs_dir is None:
+            pytest.skip("CodeQL pack install failed")
+        config = get_default_taint_config()
+        codeql = CodeQL(
+            project_dir=FIXTURES_DIR / "flask_app",
+            db_path=flask_db,
+            taint_config=config,
+            codeql_packs_dir=codeql_packs_dir,
+        )
+
+        result = codeql.analyze_taint_flows()
+
+        assert result is not None
+        assert isinstance(result, PyTaintAnalysisResult)
+        assert len(result.flows) >= 11, (
+            f"Expected at least 11 flows from flask_app, got {len(result.flows)}"
+        )
+        # Flask app should have multiple vulnerability types
+        vuln_types = {f.vulnerability_type for f in result.flows}
+        assert len(vuln_types) >= 2, (
+            f"Expected at least 2 vulnerability types, got {vuln_types}"
+        )
+        # Should have both critical and high severity flows
+        critical_flows = [f for f in result.flows if f.severity == "critical"]
+        high_flows = [f for f in result.flows if f.severity == "high"]
+        assert len(critical_flows) >= 8, (
+            f"Expected at least 8 critical flows, got {len(critical_flows)}"
+        )
+        assert len(high_flows) >= 3, (
+            f"Expected at least 3 high flows, got {len(high_flows)}"
+        )
+
+    def test_result_flow_consistency(self, flask_db, codeql_packs_dir):
+        """Test that result flows list is internally consistent."""
+        if codeql_packs_dir is None:
+            pytest.skip("CodeQL pack install failed")
+        config = get_default_taint_config()
+        codeql = CodeQL(
+            project_dir=FIXTURES_DIR / "flask_app",
+            db_path=flask_db,
+            taint_config=config,
+            codeql_packs_dir=codeql_packs_dir,
+        )
+
+        result = codeql.analyze_taint_flows()
+
+        assert len(result.flows) >= 11, (
+            f"Expected at least 11 flows from flask_app, got {len(result.flows)}"
+        )
+
+        # Every flow must have a source and sink
+        for flow in result.flows:
+            assert flow.source is not None
+            assert flow.sink is not None
+            assert flow.vulnerability_type is not None
+            assert flow.severity in ("critical", "high", "medium", "low")
+
+        # Severity counts derived from flows must sum to total
+        n_by_severity = {}
+        for f in result.flows:
+            n_by_severity[f.severity] = n_by_severity.get(f.severity, 0) + 1
+        assert sum(n_by_severity.values()) == len(result.flows)
+
+
+class TestTaintAnalysisIntegration_Codeanalyzer:
+    """Integration tests using the full Codeanalyzer pipeline."""
+
+    def test_analysis_level_1_no_taint(self, sql_injection_app, tmp_path):
+        """Test that analysis level 1 doesn't perform taint analysis."""
+        options = AnalysisOptions(
+            input=sql_injection_app,
+            analysis_level=1,
+            using_codeql=False,
+            output=tmp_path,
+            taint_config=None
+        )
+
+        with Codeanalyzer(options) as analyzer:
+            result = analyzer.analyze()
+
+        # Level 1 should not have taint analysis
+        assert result.taint_analysis is None
+
+    def test_analysis_level_3_requires_codeql(self, sql_injection_app, tmp_path):
+        """Test that analysis level 3 with CodeQL performs taint analysis and detects flows.
+
+        Uses sql_injection_app which has known SQL injection vulnerabilities.
+        Expects at least 6 critical SQL Injection flows in the output.
+        """
+        import shutil
+        if not shutil.which("codeql"):
+            pytest.skip("CodeQL not available")
+
+        options = AnalysisOptions(
+            input=sql_injection_app,
+            analysis_level=3,
+            using_codeql=True,
+            output=tmp_path,
+            taint_config=None
+        )
+
+        with Codeanalyzer(options) as analyzer:
+            result = analyzer.analyze()
+
+        # Level 3 should have taint analysis
+        assert result.taint_analysis is not None
+        assert isinstance(result.taint_analysis, PyTaintAnalysisResult)
+        # Should detect SQL injection flows
+        assert len(result.taint_analysis.flows) >= 6, (
+            f"Expected at least 6 SQL injection flows, got {len(result.taint_analysis.flows)}"
+        )
+        sql_flows = [
+            f for f in result.taint_analysis.flows
+            if f.vulnerability_type == "SQL Injection"
+        ]
+        assert len(sql_flows) >= 6, (
+            f"Expected at least 6 SQL Injection flows, got {len(sql_flows)}"
+        )
+        assert all(f.severity == "critical" for f in sql_flows), (
+            "All SQL Injection flows should be critical severity"
+        )

From 08ee3c98b85df4598d93274f0a660ba877078fa1 Mon Sep 17 00:00:00 2001
From: Saurabh Sinha <sinha108@gmail.com>
Date: Tue, 19 May 2026 18:05:26 -0400
Subject: [PATCH 2/4] Expand taint analysis to use all applicable CodeQL
 built-in security models; add related test fixtures and unit tests.

Signed-off-by: Saurabh Sinha <sinha108@gmail.com>
---
 codeanalyzer/config/taint_config_defaults.py  |  46 +++--
 codeanalyzer/config/taint_config_loader.py    |  10 +-
 .../codeql/taint_query_generator.py           | 147 +++++++++++++++-
 test/conftest.py                              |  36 ++++
 .../deserialization_app/vulnerable.py         |  52 ++++++
 .../taint_analysis/ssrf_app/vulnerable.py     |  52 ++++++
 .../taint_analysis/ssti_app/vulnerable.py     |  35 ++++
 test/test_taint_analysis.py                   | 159 +++++++++++++++++-
 8 files changed, 495 insertions(+), 42 deletions(-)
 create mode 100644 test/fixtures/taint_analysis/deserialization_app/vulnerable.py
 create mode 100644 test/fixtures/taint_analysis/ssrf_app/vulnerable.py
 create mode 100644 test/fixtures/taint_analysis/ssti_app/vulnerable.py

diff --git a/codeanalyzer/config/taint_config_defaults.py b/codeanalyzer/config/taint_config_defaults.py
index c8cf599..9d0b7da 100644
--- a/codeanalyzer/config/taint_config_defaults.py
+++ b/codeanalyzer/config/taint_config_defaults.py
@@ -19,12 +19,17 @@
 Design
 ------
 The generated CodeQL query uses CodeQL's built-in security models as the
-primary detection layer (``RemoteFlowSource``, ``SqlInjection::Sink``,
-``CommandInjection::Sink``, ``CodeInjection::Sink``, ``PathTraversal::Sink``,
-``XSS::Sink``).  These cover hundreds of APIs automatically.
+primary detection layer — all 20 ``*Customizations`` modules shipped with
+``codeql/python-all 7.x`` are imported, covering:
+
+  SQL Injection, Command Injection, Code Injection, Path Traversal,
+  Reflected XSS, LDAP Injection, XXE, SSRF, SSTI, Unsafe Deserialization,
+  Open Redirect, Log Injection, NoSQL Injection, XPath Injection,
+  Tar/Zip Slip, HTTP Header Injection, Cleartext Storage, Cleartext Logging,
+  Cookie Injection, Regular Expression Injection (ReDoS).
 
 The patterns defined here are **supplementary** — they extend built-in
-coverage with sources/sinks that are not modelled by CodeQL out of the box:
+coverage with sources that are not modelled by CodeQL's ``RemoteFlowSource``:
 
 Sources not in RemoteFlowSource:
   - ``sys.argv``          — command-line arguments
@@ -33,8 +38,10 @@
   - ``os.environ.get()``  — environment variables
   - ``requests.*``        — outbound HTTP responses used as data sources
 
-Sinks not in built-in models (project-specific or less common):
-  - ``ldap.search()``     — LDAP injection
+Sinks:
+  - The default sinks list is intentionally empty — all common sinks are
+    covered by the built-in CodeQL models.  Add project-specific sinks here
+    only when they are NOT covered by the built-ins.
 
 Sanitizers:
   - Common HTML/path/command sanitizers that CodeQL may not model as barriers.
@@ -111,27 +118,12 @@ def get_default_taint_config() -> TaintAnalysisConfig:
         ],
 
         sinks=[
-            # --- Sinks not covered by CodeQL's built-in sink classes ---
-
-            # LDAP Injection (not in CodeQL's standard Python models)
-            TaintSinkConfig(
-                name="ldap_search",
-                description="LDAP search operations",
-                pattern='API::moduleImport("ldap").getMember("search").getACall()',
-                sink_type="ldap_query",
-                vulnerability_type="LDAP Injection",
-                severity="high",
-                argument_index=0,
-            ),
-            TaintSinkConfig(
-                name="ldap3_connection_search",
-                description="ldap3 Connection.search",
-                pattern='API::moduleImport("ldap3").getMember("Connection").getReturn().getMember("search").getACall()',
-                sink_type="ldap_query",
-                vulnerability_type="LDAP Injection",
-                severity="high",
-                argument_index=1,
-            ),
+            # The built-in CodeQL security models (imported in taint_query_generator.py) cover
+            # all common sinks: SQL, command, code, path, XSS, LDAP, XXE, SSRF, SSTI,
+            # deserialization, open redirect, log injection, NoSQL, XPath, tar/zip slip,
+            # HTTP header injection, cleartext storage/logging, cookie injection, ReDoS.
+            #
+            # Add project-specific sinks here only when they are NOT covered by the built-ins.
         ],
 
         sanitizers=[
diff --git a/codeanalyzer/config/taint_config_loader.py b/codeanalyzer/config/taint_config_loader.py
index 120f1a1..9a409b0 100644
--- a/codeanalyzer/config/taint_config_loader.py
+++ b/codeanalyzer/config/taint_config_loader.py
@@ -29,6 +29,7 @@
 from codeanalyzer.schema.py_schema import TaintAnalysisConfig
 from codeanalyzer.config.taint_config_defaults import get_default_taint_config
 from codeanalyzer.utils import logger
+from codeanalyzer.semantic_analysis.codeql.taint_query_generator import TaintQueryGenerator
 
 
 class TaintConfigLoader:
@@ -73,8 +74,13 @@ def load_config(
         # Filter out disabled items
         config = TaintConfigLoader._filter_disabled(config)
         
-        logger.info(f"Final taint configuration: {len(config.sources)} sources, "
-                   f"{len(config.sinks)} sinks, {len(config.sanitizers)} sanitizers")
+        n_builtin = TaintQueryGenerator.builtin_sink_count()
+        logger.info(
+            f"Final taint configuration: {len(config.sources)} sources, "
+            f"{len(config.sinks)} user-configured sinks "
+            f"(+{n_builtin} built-in CodeQL sink models always active), "
+            f"{len(config.sanitizers)} sanitizers"
+        )
         
         return config
     
diff --git a/codeanalyzer/semantic_analysis/codeql/taint_query_generator.py b/codeanalyzer/semantic_analysis/codeql/taint_query_generator.py
index 0b985cb..b392e78 100644
--- a/codeanalyzer/semantic_analysis/codeql/taint_query_generator.py
+++ b/codeanalyzer/semantic_analysis/codeql/taint_query_generator.py
@@ -59,6 +59,36 @@
 class TaintQueryGenerator:
     """Generates CodeQL queries from taint analysis configuration."""
 
+    # Built-in CodeQL sink models always included in the generated query,
+    # regardless of user configuration. Each entry is (module::SinkClass, vulnerability_type).
+    BUILTIN_SINKS: List[tuple] = [
+        ("SqlInjection::Sink",              "SQL Injection"),
+        ("CommandInjection::Sink",          "Command Injection"),
+        ("CodeInjection::Sink",             "Code Injection"),
+        ("PathInjection::Sink",             "Path Traversal"),
+        ("ReflectedXss::Sink",              "Cross-Site Scripting (XSS)"),
+        ("LdapInjection::DnSink",           "LDAP Injection"),
+        ("LdapInjection::FilterSink",       "LDAP Injection"),
+        ("Xxe::Sink",                       "XML External Entity (XXE)"),
+        ("ServerSideRequestForgery::Sink",  "Server-Side Request Forgery (SSRF)"),
+        ("TemplateInjection::Sink",         "Server-Side Template Injection (SSTI)"),
+        ("UnsafeDeserialization::Sink",     "Unsafe Deserialization"),
+        ("UrlRedirect::Sink",               "Open Redirect"),
+        ("LogInjection::Sink",              "Log Injection"),
+        ("NoSqlInjection::StringSink",      "NoSQL Injection"),
+        ("NoSqlInjection::DictSink",        "NoSQL Injection"),
+        ("XpathInjection::Sink",            "XPath Injection"),
+        ("TarSlip::Sink",                   "Tar/Zip Slip"),
+        ("HttpHeaderInjection::Sink",       "HTTP Header Injection"),
+        ("CookieInjection::Sink",           "Cookie Injection"),
+        ("PolynomialReDoS::Sink",           "Regular Expression Injection (ReDoS)"),
+    ]
+
+    @classmethod
+    def builtin_sink_count(cls) -> int:
+        """Number of built-in CodeQL sink models always active in the generated query."""
+        return len(cls.BUILTIN_SINKS)
+
     @staticmethod
     def generate_query(config: TaintAnalysisConfig) -> str:
         """Generate complete taint analysis CodeQL query from configuration.
@@ -116,13 +146,33 @@ def _generate_imports() -> str:
         security-sink/source classes from codeql/python-all so that the query
         benefits from CodeQL's comprehensive model library.
 
-        Module names verified against codeql/python-all 7.x:
-          - SqlInjectionCustomizations    → module SqlInjection { class Sink }
-          - CommandInjectionCustomizations → module CommandInjection { class Sink }
-          - CodeInjectionCustomizations   → module CodeInjection { class Sink }
-          - PathInjectionCustomizations   → module PathInjection { class Sink }
-          - ReflectedXSSCustomizations    → module ReflectedXss { class Sink }
-          - RemoteFlowSources             → class RemoteFlowSource
+        Module names verified against codeql/python-all 7.1.0:
+          - SqlInjectionCustomizations              → module SqlInjection { class Sink }
+          - CommandInjectionCustomizations          → module CommandInjection { class Sink }
+          - CodeInjectionCustomizations             → module CodeInjection { class Sink }
+          - PathInjectionCustomizations             → module PathInjection { class Sink }
+          - ReflectedXSSCustomizations              → module ReflectedXss { class Sink }
+          - LdapInjectionCustomizations             → module LdapInjection { class DnSink, FilterSink }
+          - XxeCustomizations                       → module Xxe { class Sink }
+          - ServerSideRequestForgeryCustomizations  → module ServerSideRequestForgery { class Sink }
+          - TemplateInjectionCustomizations         → module TemplateInjection { class Sink }
+          - UnsafeDeserializationCustomizations     → module UnsafeDeserialization { class Sink }
+          - UrlRedirectCustomizations               → module UrlRedirect { class Sink }
+          - LogInjectionCustomizations              → module LogInjection { class Sink }
+          - NoSqlInjectionCustomizations            → module NoSqlInjection { class StringSink, DictSink }
+          - XpathInjectionCustomizations            → module XpathInjection { class Sink }
+          - TarSlipCustomizations                   → module TarSlip { class Sink }
+          - HttpHeaderInjectionCustomizations       → module HttpHeaderInjection { class Sink }
+          - CookieInjectionCustomizations           → module CookieInjection { class Sink }
+          - PolynomialReDoSCustomizations           → module PolynomialReDoS { class Sink }
+          - RemoteFlowSources                       → class RemoteFlowSource
+
+        NOTE: CleartextStorageCustomizations and CleartextLoggingCustomizations are
+        intentionally excluded from this unified query. Those modules use SensitiveDataSource
+        (passwords, PII) as their built-in source, not RemoteFlowSource. Mixing them into a
+        query that uses general user-input sources produces false positives on every
+        print()/file.write() that receives user data. They are best used in a dedicated query
+        with SensitiveDataSource as the source.
         """
         return """import python
 import semmle.python.dataflow.new.DataFlow
@@ -133,6 +183,19 @@ def _generate_imports() -> str:
 import semmle.python.security.dataflow.CodeInjectionCustomizations
 import semmle.python.security.dataflow.PathInjectionCustomizations
 import semmle.python.security.dataflow.ReflectedXSSCustomizations
+import semmle.python.security.dataflow.LdapInjectionCustomizations
+import semmle.python.security.dataflow.XxeCustomizations
+import semmle.python.security.dataflow.ServerSideRequestForgeryCustomizations
+import semmle.python.security.dataflow.TemplateInjectionCustomizations
+import semmle.python.security.dataflow.UnsafeDeserializationCustomizations
+import semmle.python.security.dataflow.UrlRedirectCustomizations
+import semmle.python.security.dataflow.LogInjectionCustomizations
+import semmle.python.security.dataflow.NoSqlInjectionCustomizations
+import semmle.python.security.dataflow.XpathInjectionCustomizations
+import semmle.python.security.dataflow.TarSlipCustomizations
+import semmle.python.security.dataflow.HttpHeaderInjectionCustomizations
+import semmle.python.security.dataflow.CookieInjectionCustomizations
+import semmle.python.security.dataflow.PolynomialReDoSCustomizations
 import semmle.python.dataflow.new.RemoteFlowSources"""
 
     # ------------------------------------------------------------------
@@ -154,6 +217,14 @@ def _pattern_to_sink_node(pattern: str, argument_index: int) -> str:
             return f"{api_node}.getParameter({argument_index}).asSink()"
         return f"{pattern}.getParameter({argument_index}).asSink()"
 
+    @staticmethod
+    def _pattern_to_default_sink_node(pattern: str) -> str:
+        """Sink node for patterns without a specific argument index — matches any tainted argument."""
+        if pattern.endswith(".getACall()"):
+            base = pattern[:-len(".getACall()")]
+            return f"{base}.getACall().getAnArg()"
+        return f"{pattern}.asSink()"
+
     @staticmethod
     def _pattern_to_sanitizer_node(pattern: str) -> str:
         """Convert a pattern string to a DataFlow::Node expression for sanitizers."""
@@ -227,6 +298,66 @@ def _generate_sink_predicate(sinks: List[TaintSinkConfig]) -> str:
             "  // Built-in: Reflected XSS sinks (Flask/Django template rendering, …)",
             "  (node instanceof ReflectedXss::Sink and",
             "   sinkType = \"template_rendering\" and severity = \"high\" and vulnerabilityType = \"Cross-Site Scripting (XSS)\")",
+            "  or",
+            "  // Built-in: LDAP injection — DN component",
+            "  (node instanceof LdapInjection::DnSink and",
+            "   sinkType = \"ldap_query\" and severity = \"high\" and vulnerabilityType = \"LDAP Injection\")",
+            "  or",
+            "  // Built-in: LDAP injection — filter component",
+            "  (node instanceof LdapInjection::FilterSink and",
+            "   sinkType = \"ldap_query\" and severity = \"high\" and vulnerabilityType = \"LDAP Injection\")",
+            "  or",
+            "  // Built-in: XML External Entity (XXE) injection",
+            "  (node instanceof Xxe::Sink and",
+            "   sinkType = \"xml_parsing\" and severity = \"high\" and vulnerabilityType = \"XML External Entity (XXE)\")",
+            "  or",
+            "  // Built-in: Server-Side Request Forgery (SSRF)",
+            "  (node instanceof ServerSideRequestForgery::Sink and",
+            "   sinkType = \"ssrf_request\" and severity = \"high\" and vulnerabilityType = \"Server-Side Request Forgery (SSRF)\")",
+            "  or",
+            "  // Built-in: Server-Side Template Injection (SSTI)",
+            "  (node instanceof TemplateInjection::Sink and",
+            "   sinkType = \"template_rendering\" and severity = \"critical\" and vulnerabilityType = \"Server-Side Template Injection (SSTI)\")",
+            "  or",
+            "  // Built-in: Unsafe Deserialization (pickle, yaml.load, …)",
+            "  (node instanceof UnsafeDeserialization::Sink and",
+            "   sinkType = \"deserialization\" and severity = \"critical\" and vulnerabilityType = \"Unsafe Deserialization\")",
+            "  or",
+            "  // Built-in: Open Redirect",
+            "  (node instanceof UrlRedirect::Sink and",
+            "   sinkType = \"url_redirect\" and severity = \"medium\" and vulnerabilityType = \"Open Redirect\")",
+            "  or",
+            "  // Built-in: Log Injection",
+            "  (node instanceof LogInjection::Sink and",
+            "   sinkType = \"log_output\" and severity = \"medium\" and vulnerabilityType = \"Log Injection\")",
+            "  or",
+            "  // Built-in: NoSQL Injection — string payload",
+            "  (node instanceof NoSqlInjection::StringSink and",
+            "   sinkType = \"nosql_query\" and severity = \"high\" and vulnerabilityType = \"NoSQL Injection\")",
+            "  or",
+            "  // Built-in: NoSQL Injection — dictionary/object payload",
+            "  (node instanceof NoSqlInjection::DictSink and",
+            "   sinkType = \"nosql_query\" and severity = \"high\" and vulnerabilityType = \"NoSQL Injection\")",
+            "  or",
+            "  // Built-in: XPath Injection",
+            "  (node instanceof XpathInjection::Sink and",
+            "   sinkType = \"xpath_query\" and severity = \"high\" and vulnerabilityType = \"XPath Injection\")",
+            "  or",
+            "  // Built-in: Tar/Zip Slip (path traversal via archive extraction)",
+            "  (node instanceof TarSlip::Sink and",
+            "   sinkType = \"file_access\" and severity = \"high\" and vulnerabilityType = \"Tar/Zip Slip\")",
+            "  or",
+            "  // Built-in: HTTP Header Injection",
+            "  (node instanceof HttpHeaderInjection::Sink and",
+            "   sinkType = \"http_header\" and severity = \"medium\" and vulnerabilityType = \"HTTP Header Injection\")",
+            "  or",
+            "  // Built-in: Cookie Injection",
+            "  (node instanceof CookieInjection::Sink and",
+            "   sinkType = \"cookie_write\" and severity = \"medium\" and vulnerabilityType = \"Cookie Injection\")",
+            "  or",
+            "  // Built-in: Regular Expression Injection / Polynomial ReDoS",
+            "  (node instanceof PolynomialReDoS::Sink and",
+            "   sinkType = \"regex_execution\" and severity = \"medium\" and vulnerabilityType = \"Regular Expression Injection (ReDoS)\")",
         ]
 
         for sink in sinks:
@@ -236,7 +367,7 @@ def _generate_sink_predicate(sinks: List[TaintSinkConfig]) -> str:
             if sink.argument_index is not None:
                 node_expr = TaintQueryGenerator._pattern_to_sink_node(sink.pattern, sink.argument_index)
             else:
-                node_expr = TaintQueryGenerator._pattern_to_source_node(sink.pattern)
+                node_expr = TaintQueryGenerator._pattern_to_default_sink_node(sink.pattern)
 
             lines.append("  (")
             lines.append(f"    node = {node_expr} and")
diff --git a/test/conftest.py b/test/conftest.py
index 35043e9..a921c83 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -51,6 +51,9 @@ def single_functionalities__stuff_nested_in_functions() -> Path:
     "xss": _TAINT_FIXTURES_DIR / "xss_app",
     "flask": _TAINT_FIXTURES_DIR / "flask_app",
     "sanitizer": _TAINT_FIXTURES_DIR / "sanitizer_app",
+    "ssti": _TAINT_FIXTURES_DIR / "ssti_app",
+    "deserialization": _TAINT_FIXTURES_DIR / "deserialization_app",
+    "ssrf": _TAINT_FIXTURES_DIR / "ssrf_app",
 }
 
 
@@ -163,6 +166,39 @@ def sanitizer_db(codeql_databases):
     return db
 
 
+@pytest.fixture(scope="session")
+def ssti_db(codeql_databases):
+    """Session-scoped CodeQL database for SSTI fixture."""
+    if codeql_databases is None:
+        pytest.skip("CodeQL not available")
+    db = codeql_databases.get("ssti")
+    if db is None:
+        pytest.skip("Failed to create SSTI CodeQL database")
+    return db
+
+
+@pytest.fixture(scope="session")
+def deserialization_db(codeql_databases):
+    """Session-scoped CodeQL database for unsafe deserialization fixture."""
+    if codeql_databases is None:
+        pytest.skip("CodeQL not available")
+    db = codeql_databases.get("deserialization")
+    if db is None:
+        pytest.skip("Failed to create deserialization CodeQL database")
+    return db
+
+
+@pytest.fixture(scope="session")
+def ssrf_db(codeql_databases):
+    """Session-scoped CodeQL database for SSRF fixture."""
+    if codeql_databases is None:
+        pytest.skip("CodeQL not available")
+    db = codeql_databases.get("ssrf")
+    if db is None:
+        pytest.skip("Failed to create SSRF CodeQL database")
+    return db
+
+
 @pytest.fixture(scope="session")
 def codeql_packs_dir(tmp_path_factory):
     """Session-scoped fixture that installs a qlpack with codeql/python-all once.
diff --git a/test/fixtures/taint_analysis/deserialization_app/vulnerable.py b/test/fixtures/taint_analysis/deserialization_app/vulnerable.py
new file mode 100644
index 0000000..fac32fa
--- /dev/null
+++ b/test/fixtures/taint_analysis/deserialization_app/vulnerable.py
@@ -0,0 +1,52 @@
+"""
+Unsafe Deserialization vulnerable test application.
+This file contains intentionally vulnerable code for testing taint analysis.
+"""
+
+import pickle
+import sys
+
+
+def load_from_input():
+    """VULNERABLE: pickle.loads on user-supplied bytes from stdin."""
+    raw = input("Enter serialized data (hex): ")
+    return pickle.loads(bytes.fromhex(raw))
+
+
+def load_from_argv():
+    """VULNERABLE: pickle.loads on command-line argument."""
+    if len(sys.argv) > 1:
+        return pickle.loads(sys.argv[1].encode("latin-1"))
+    return None
+
+
+def process_and_load(data):
+    """Intermediate function — taint propagates through."""
+    stripped = data.strip()
+    return pickle.loads(stripped.encode("latin-1"))
+
+
+def vulnerable_from_input_processed():
+    """VULNERABLE: taint flow through intermediate function."""
+    raw = input("Payload: ")
+    return process_and_load(raw)
+
+
+class DataLoader:
+    def read_payload(self):
+        """Source: reads from argv."""
+        return sys.argv[1] if len(sys.argv) > 1 else b""
+
+    def deserialize(self, payload):
+        """Sink: unsafe pickle.loads."""
+        return pickle.loads(payload)
+
+    def run(self):
+        """VULNERABLE: inter-method taint flow."""
+        payload = self.read_payload()
+        return self.deserialize(payload)
+
+
+if __name__ == "__main__":
+    loader = DataLoader()
+    loader.run()
diff --git a/test/fixtures/taint_analysis/ssrf_app/vulnerable.py b/test/fixtures/taint_analysis/ssrf_app/vulnerable.py
new file mode 100644
index 0000000..cd026f6
--- /dev/null
+++ b/test/fixtures/taint_analysis/ssrf_app/vulnerable.py
@@ -0,0 +1,52 @@
+"""
+Server-Side Request Forgery (SSRF) vulnerable test application.
+This file contains intentionally vulnerable code for testing taint analysis.
+"""
+
+import sys
+import requests
+from flask import Flask, request as flask_request
+
+app = Flask(__name__)
+
+
+@app.route("/fetch")
+def fetch():
+    """VULNERABLE: user-controlled URL passed directly to requests.get."""
+    url = flask_request.args.get("url")
+    return requests.get(url).text
+
+
+@app.route("/proxy")
+def proxy():
+    """VULNERABLE: user-controlled URL in requests.post."""
+    target = flask_request.args.get("target")
+    payload = flask_request.args.get("data", "")
+    response = requests.post(target, data=payload)
+    return response.text
+
+
+def fetch_from_argv():
+    """VULNERABLE: SSRF from command-line argument."""
+    if len(sys.argv) > 1:
+        url = sys.argv[1]
+        return requests.get(url).text
+    return ""
+
+
+def build_url(base, path):
+    """Intermediate: combines user-controlled parts."""
+    return base + "/" + path
+
+
+@app.route("/indirect")
+def indirect_ssrf():
+    """VULNERABLE: SSRF via URL constructed from user input."""
+    base = flask_request.args.get("base", "http://internal")
+    path = flask_request.args.get("path", "")
+    url = build_url(base, path)
+    return requests.get(url).text
+
+
+if __name__ == "__main__":
+    app.run()
diff --git a/test/fixtures/taint_analysis/ssti_app/vulnerable.py b/test/fixtures/taint_analysis/ssti_app/vulnerable.py
new file mode 100644
index 0000000..60927a3
--- /dev/null
+++ b/test/fixtures/taint_analysis/ssti_app/vulnerable.py
@@ -0,0 +1,35 @@
+"""
+Server-Side Template Injection (SSTI) vulnerable test application.
+This file contains intentionally vulnerable code for testing taint analysis.
+"""
+
+import sys
+from flask import Flask, request, render_template_string
+
+app = Flask(__name__)
+
+
+@app.route("/greet")
+def greet():
+    """VULNERABLE: user input interpolated directly into a Jinja2 template."""
+    name = request.args.get("name", "World")
+    template = "<h1>Hello, " + name + "!</h1>"
+    return render_template_string(template)
+
+
+@app.route("/profile")
+def profile():
+    """VULNERABLE: f-string template construction from query param."""
+    username = request.args.get("user", "anonymous")
+    tmpl = f"<p>Welcome {username}</p>"
+    return render_template_string(tmpl)
+
+
+def render_from_argv():
+    """VULNERABLE: template built from command-line argument."""
+    payload = sys.argv[1] if len(sys.argv) > 1 else "safe"
+    return render_template_string("<div>" + payload + "</div>")
+
+
+if __name__ == "__main__":
+    app.run()
diff --git a/test/test_taint_analysis.py b/test/test_taint_analysis.py
index 9cb15e4..6b48e2c 100644
--- a/test/test_taint_analysis.py
+++ b/test/test_taint_analysis.py
@@ -57,6 +57,24 @@ def sanitizer_app():
     return FIXTURES_DIR / "sanitizer_app"
 
 
+@pytest.fixture
+def ssti_app():
+    """Path to SSTI test app."""
+    return FIXTURES_DIR / "ssti_app"
+
+
+@pytest.fixture
+def deserialization_app():
+    """Path to unsafe deserialization test app."""
+    return FIXTURES_DIR / "deserialization_app"
+
+
+@pytest.fixture
+def ssrf_app():
+    """Path to SSRF test app."""
+    return FIXTURES_DIR / "ssrf_app"
+
+
 @pytest.fixture
 def default_taint_config():
     """Get default taint configuration."""
@@ -73,17 +91,16 @@ class TestTaintAnalysisConfiguration:
     def test_default_configuration(self, default_taint_config):
         """Test default taint configuration."""
         assert len(default_taint_config.sources) > 0
-        assert len(default_taint_config.sinks) > 0
+        # Sinks list is intentionally empty — all sinks are covered by CodeQL's built-in
+        # security models (LdapInjection, Xxe, SSRF, SSTI, UnsafeDeserialization, …)
+        # imported in the generated query rather than enumerated here.
+        assert isinstance(default_taint_config.sinks, list)
         assert len(default_taint_config.sanitizers) > 0
 
         # Verify all sources are enabled by default
         enabled_sources = [s for s in default_taint_config.sources if s.enabled]
         assert len(enabled_sources) == len(default_taint_config.sources)
 
-        # Verify all sinks are enabled by default
-        enabled_sinks = [s for s in default_taint_config.sinks if s.enabled]
-        assert len(enabled_sinks) == len(default_taint_config.sinks)
-
     def test_custom_configuration_yaml(self, sql_injection_app, tmp_path):
         """Test custom taint configuration from YAML."""
         # Create custom config with only SQL injection sinks
@@ -149,6 +166,58 @@ def test_config_merge_with_defaults(self, tmp_path):
         custom_sources = [s for s in config.sources if s.name == "custom_source"]
         assert len(custom_sources) == 1
 
+    def test_query_contains_all_builtin_imports(self, default_taint_config):
+        """Generated query must import all 20 CodeQL security customization modules."""
+        from codeanalyzer.semantic_analysis.codeql.taint_query_generator import TaintQueryGenerator
+        query = TaintQueryGenerator.generate_query(default_taint_config)
+        expected_modules = [
+            "LdapInjectionCustomizations",
+            "XxeCustomizations",
+            "ServerSideRequestForgeryCustomizations",
+            "TemplateInjectionCustomizations",
+            "UnsafeDeserializationCustomizations",
+            "UrlRedirectCustomizations",
+            "LogInjectionCustomizations",
+            "NoSqlInjectionCustomizations",
+            "XpathInjectionCustomizations",
+            "TarSlipCustomizations",
+            "HttpHeaderInjectionCustomizations",
+            "CookieInjectionCustomizations",
+            "PolynomialReDoSCustomizations",
+            # CleartextStorageCustomizations and CleartextLoggingCustomizations are
+            # intentionally excluded: they use SensitiveDataSource (not RemoteFlowSource)
+            # and produce false positives when combined with general user-input sources.
+        ]
+        for mod in expected_modules:
+            assert mod in query, f"Generated query is missing import for {mod}"
+
+    def test_query_contains_all_builtin_sinks(self, default_taint_config):
+        """Generated query must include instanceof checks for all built-in sink classes."""
+        from codeanalyzer.semantic_analysis.codeql.taint_query_generator import TaintQueryGenerator
+        query = TaintQueryGenerator.generate_query(default_taint_config)
+        expected_sinks = [
+            "LdapInjection::DnSink",
+            "LdapInjection::FilterSink",
+            "Xxe::Sink",
+            "ServerSideRequestForgery::Sink",
+            "TemplateInjection::Sink",
+            "UnsafeDeserialization::Sink",
+            "UrlRedirect::Sink",
+            "LogInjection::Sink",
+            "NoSqlInjection::StringSink",
+            "NoSqlInjection::DictSink",
+            "XpathInjection::Sink",
+            "TarSlip::Sink",
+            "HttpHeaderInjection::Sink",
+            "CookieInjection::Sink",
+            "PolynomialReDoS::Sink",
+            # CleartextStorage::Sink and CleartextLogging::Sink are intentionally excluded:
+            # these use SensitiveDataSource internally and produce false positives when
+            # combined with general user-input sources in a unified query.
+        ]
+        for sink in expected_sinks:
+            assert sink in query, f"Generated query is missing instanceof check for {sink}"
+
 
 class TestTaintAnalysisPydanticModels:
     """Tests for Pydantic models used in taint analysis."""
@@ -839,3 +908,83 @@ def test_analysis_level_3_requires_codeql(self, sql_injection_app, tmp_path):
         assert all(f.severity == "critical" for f in sql_flows), (
             "All SQL Injection flows should be critical severity"
         )
+
+
+# ============================================================================
+# Integration Tests — New Vulnerability Types (require CodeQL)
+# ============================================================================
+
+class TestTaintAnalysisNewVulnerabilityTypes:
+    """Integration tests for vulnerability types added via the expanded built-in CodeQL models."""
+
+    def test_ssti_detection(self, ssti_db, codeql_packs_dir):
+        """Server-Side Template Injection must be detected in ssti_app fixture."""
+        import shutil
+        if not shutil.which("codeql"):
+            pytest.skip("CodeQL not available")
+        if codeql_packs_dir is None:
+            pytest.skip("CodeQL packs not available")
+
+        codeql = CodeQL(
+            project_dir=FIXTURES_DIR / "ssti_app",
+            db_path=ssti_db,
+            codeql_packs_dir=codeql_packs_dir,
+        )
+        from codeanalyzer.config.taint_config_defaults import get_default_taint_config as _get_cfg
+        result = codeql.analyze_taint_flows(config_override=_get_cfg())
+
+        ssti_flows = [f for f in result.flows if "Template Injection" in f.vulnerability_type]
+        assert len(ssti_flows) >= 1, (
+            f"Expected at least 1 SSTI flow, got {len(ssti_flows)}. "
+            f"All flows: {[f.vulnerability_type for f in result.flows]}"
+        )
+        assert all(f.severity == "critical" for f in ssti_flows), (
+            "All SSTI flows should be critical severity"
+        )
+
+    def test_unsafe_deserialization_detection(self, deserialization_db, codeql_packs_dir):
+        """Unsafe Deserialization must be detected in deserialization_app fixture."""
+        import shutil
+        if not shutil.which("codeql"):
+            pytest.skip("CodeQL not available")
+        if codeql_packs_dir is None:
+            pytest.skip("CodeQL packs not available")
+
+        codeql = CodeQL(
+            project_dir=FIXTURES_DIR / "deserialization_app",
+            db_path=deserialization_db,
+            codeql_packs_dir=codeql_packs_dir,
+        )
+        from codeanalyzer.config.taint_config_defaults import get_default_taint_config as _get_cfg
+        result = codeql.analyze_taint_flows(config_override=_get_cfg())
+
+        deser_flows = [f for f in result.flows if "Deserialization" in f.vulnerability_type]
+        assert len(deser_flows) >= 1, (
+            f"Expected at least 1 Unsafe Deserialization flow, got {len(deser_flows)}. "
+            f"All flows: {[f.vulnerability_type for f in result.flows]}"
+        )
+        assert all(f.severity == "critical" for f in deser_flows), (
+            "All Unsafe Deserialization flows should be critical severity"
+        )
+
+    def test_ssrf_detection(self, ssrf_db, codeql_packs_dir):
+        """Server-Side Request Forgery must be detected in ssrf_app fixture."""
+        import shutil
+        if not shutil.which("codeql"):
+            pytest.skip("CodeQL not available")
+        if codeql_packs_dir is None:
+            pytest.skip("CodeQL packs not available")
+
+        codeql = CodeQL(
+            project_dir=FIXTURES_DIR / "ssrf_app",
+            db_path=ssrf_db,
+            codeql_packs_dir=codeql_packs_dir,
+        )
+        from codeanalyzer.config.taint_config_defaults import get_default_taint_config as _get_cfg
+        result = codeql.analyze_taint_flows(config_override=_get_cfg())
+
+        ssrf_flows = [f for f in result.flows if "Request Forgery" in f.vulnerability_type]
+        assert len(ssrf_flows) >= 1, (
+            f"Expected at least 1 SSRF flow, got {len(ssrf_flows)}. "
+            f"All flows: {[f.vulnerability_type for f in result.flows]}"
+        )

From 509a5419435bf3f722cd8df1cfb99e2cd685d27d Mon Sep 17 00:00:00 2001
From: Saurabh Sinha <sinha108@gmail.com>
Date: Wed, 20 May 2026 11:36:04 -0400
Subject: [PATCH 3/4] Improve taint analysis extensibility: fix merge bugs, add
 disabled_builtin_sinks, three-mode config control, and validation

Signed-off-by: Saurabh Sinha <sinha108@gmail.com>
---
 codeanalyzer/__main__.py                      |  19 +-
 codeanalyzer/config/taint_config_loader.py    |  58 +++--
 codeanalyzer/core.py                          |  13 +-
 codeanalyzer/options/options.py               |   1 +
 codeanalyzer/schema/py_schema.py              |  10 +
 .../codeql/taint_query_generator.py           | 163 ++++----------
 examples/taint-config.example.yaml            | 137 ++++++++++++
 test/test_taint_analysis.py                   | 206 ++++++++++++++++++
 8 files changed, 464 insertions(+), 143 deletions(-)
 create mode 100644 examples/taint-config.example.yaml

diff --git a/codeanalyzer/__main__.py b/codeanalyzer/__main__.py
index 02b25ae..5d5f65b 100644
--- a/codeanalyzer/__main__.py
+++ b/codeanalyzer/__main__.py
@@ -41,6 +41,18 @@ def main(
             help="Path to taint analysis configuration file (YAML or JSON). Used with --analysis-level 3.",
         ),
     ] = None,
+    taint_use_defaults: Annotated[
+        bool,
+        typer.Option(
+            "--taint-defaults/--no-taint-defaults",
+            help=(
+                "Controls which taint sources/sinks/sanitizers are active:\n\n"
+                "  (no --taint-config)          → built-in defaults only\n"
+                "  --taint-config + --taint-defaults  → union of defaults and custom config [default]\n"
+                "  --taint-config + --no-taint-defaults → custom config only, replaces all defaults"
+            ),
+        ),
+    ] = True,
     using_ray: Annotated[
         bool,
         typer.Option("--ray/--no-ray", help="Enable Ray for distributed analysis."),
@@ -89,10 +101,14 @@ def main(
     if analysis_level >= 2 and not using_codeql:
         logger.error("Analysis levels 2 and 3 require --codeql flag")
         raise typer.Exit(code=1)
-    
+
     if analysis_level >= 3 and taint_config and not taint_config.exists():
         logger.error(f"Taint configuration file '{taint_config}' does not exist.")
         raise typer.Exit(code=1)
+
+    if not taint_use_defaults and not taint_config:
+        logger.error("--no-taint-defaults requires --taint-config (otherwise nothing would be analyzed).")
+        raise typer.Exit(code=1)
     
     options = AnalysisOptions(
         input=input,
@@ -108,6 +124,7 @@ def main(
         clear_cache=clear_cache,
         verbosity=verbosity,
         taint_config=taint_config,
+        taint_use_defaults=taint_use_defaults,
     )
 
     _set_log_level(options.verbosity)
diff --git a/codeanalyzer/config/taint_config_loader.py b/codeanalyzer/config/taint_config_loader.py
index 9a409b0..e51455f 100644
--- a/codeanalyzer/config/taint_config_loader.py
+++ b/codeanalyzer/config/taint_config_loader.py
@@ -56,32 +56,56 @@ def load_config(
             FileNotFoundError: If config_path is provided but file doesn't exist
             ValueError: If file format is unsupported or invalid
         """
+        # Log which of the three config modes is active
+        if config_path and use_defaults:
+            logger.info(f"Taint config mode: merging '{config_path}' with built-in defaults")
+        elif config_path:
+            logger.info(f"Taint config mode: custom only — '{config_path}' (built-in defaults disabled)")
+        else:
+            logger.info("Taint config mode: built-in defaults only")
+
         # Start with defaults if requested
         if use_defaults:
             config = get_default_taint_config()
-            logger.debug(f"Loaded default taint configuration with {len(config.sources)} sources, "
-                        f"{len(config.sinks)} sinks, {len(config.sanitizers)} sanitizers")
+            logger.debug(
+                f"  Defaults loaded: {len(config.sources)} sources, "
+                f"{len(config.sanitizers)} sanitizers"
+            )
         else:
             config = TaintAnalysisConfig()
-            logger.debug("Starting with empty taint configuration")
-        
+
         # Load and merge custom configuration
         if config_path:
             custom_config = TaintConfigLoader._load_from_file(config_path)
+            logger.debug(
+                f"  Custom file adds: {len(custom_config.sources)} sources, "
+                f"{len(custom_config.sinks)} sinks, "
+                f"{len(custom_config.sanitizers)} sanitizers"
+            )
             config = TaintConfigLoader._merge_configs(config, custom_config)
-            logger.info(f"Merged custom configuration from {config_path}")
-        
+
         # Filter out disabled items
         config = TaintConfigLoader._filter_disabled(config)
-        
+
+        # Warn on any structural problems (missing sources, empty patterns, etc.)
+        issues = TaintConfigLoader.validate_config(config)
+        for issue in issues:
+            logger.warning(f"Taint config: {issue}")
+
         n_builtin = TaintQueryGenerator.builtin_sink_count()
+        n_disabled = len(config.disabled_builtin_sinks)
+        active_builtin = n_builtin - n_disabled
+        builtin_label = (
+            f"{active_builtin} of {n_builtin} built-in CodeQL sinks"
+            if n_disabled
+            else f"{n_builtin} built-in CodeQL sinks"
+        )
         logger.info(
-            f"Final taint configuration: {len(config.sources)} sources, "
-            f"{len(config.sinks)} user-configured sinks "
-            f"(+{n_builtin} built-in CodeQL sink models always active), "
+            f"Active taint config: {len(config.sources)} sources, "
+            f"{len(config.sinks)} user-defined sinks (+{builtin_label}), "
             f"{len(config.sanitizers)} sanitizers"
         )
-        
+
         return config
     
     @staticmethod
@@ -185,19 +209,20 @@ def _merge_configs(
         merged_exclude_files = list(set(base.exclude_files + custom.exclude_files))
         merged_exclude_functions = list(set(base.exclude_functions + custom.exclude_functions))
         
-        # Create merged config
-        # Use custom values for options if they differ from defaults
+        # Scalar options: custom always wins (it owns those knobs).
+        # Booleans that are "additive" (enabling features) use OR.
         return TaintAnalysisConfig(
             sources=list(base_sources.values()),
             sinks=list(base_sinks.values()),
             sanitizers=list(base_sanitizers.values()),
-            max_path_length=custom.max_path_length if custom.max_path_length != 10 else base.max_path_length,
+            max_path_length=custom.max_path_length,
             include_implicit_flows=custom.include_implicit_flows or base.include_implicit_flows,
-            confidence_threshold=custom.confidence_threshold if custom.confidence_threshold != "medium" else base.confidence_threshold,
+            confidence_threshold=custom.confidence_threshold,
             exclude_files=merged_exclude_files,
             exclude_functions=merged_exclude_functions,
             include_safe_flows=custom.include_safe_flows or base.include_safe_flows,
-            group_by_vulnerability=custom.group_by_vulnerability if not custom.group_by_vulnerability else base.group_by_vulnerability,
+            group_by_vulnerability=custom.group_by_vulnerability,
+            disabled_builtin_sinks=list(set(base.disabled_builtin_sinks + custom.disabled_builtin_sinks)),
         )
     
     @staticmethod
@@ -234,6 +259,7 @@ def _filter_disabled(config: TaintAnalysisConfig) -> TaintAnalysisConfig:
             exclude_functions=config.exclude_functions,
             include_safe_flows=config.include_safe_flows,
             group_by_vulnerability=config.group_by_vulnerability,
+            disabled_builtin_sinks=config.disabled_builtin_sinks,
         )
     
     @staticmethod
diff --git a/codeanalyzer/core.py b/codeanalyzer/core.py
index 51ccecd..255a861 100644
--- a/codeanalyzer/core.py
+++ b/codeanalyzer/core.py
@@ -758,23 +758,16 @@ def _perform_taint_analysis(self, symbol_table: Optional[Dict[str, PyModule]] =
         if not self.db_path:
             raise ValueError("CodeQL database not available for taint analysis")
 
-        # Load taint configuration
+        # Load taint configuration — load_config logs the mode and active counts
+        use_defaults = getattr(self.options, "taint_use_defaults", True)
         if self.options.taint_config:
-            logger.info(f"Loading taint configuration from {self.options.taint_config}")
             taint_config = TaintConfigLoader.load_config(
                 self.options.taint_config,
-                use_defaults=True
+                use_defaults=use_defaults,
             )
         else:
-            logger.info("Using default taint analysis configuration")
             taint_config = TaintConfigLoader.load_config(use_defaults=True)
 
-        # Log configuration summary
-        logger.info(f"Taint analysis configuration:")
-        logger.info(f"  - Sources: {len(taint_config.sources)}")
-        logger.info(f"  - Sinks: {len(taint_config.sinks)}")
-        logger.info(f"  - Sanitizers: {len(taint_config.sanitizers)}")
-
         # Perform analysis
         codeql = CodeQL(
             project_dir=self.project_dir,
diff --git a/codeanalyzer/options/options.py b/codeanalyzer/options/options.py
index e4d32e8..b14033e 100644
--- a/codeanalyzer/options/options.py
+++ b/codeanalyzer/options/options.py
@@ -24,3 +24,4 @@ class AnalysisOptions:
     clear_cache: bool = False
     verbosity: int = 0
     taint_config: Optional[Path] = None
+    taint_use_defaults: bool = True
diff --git a/codeanalyzer/schema/py_schema.py b/codeanalyzer/schema/py_schema.py
index 6dd004b..832b2b4 100644
--- a/codeanalyzer/schema/py_schema.py
+++ b/codeanalyzer/schema/py_schema.py
@@ -523,6 +523,16 @@ class TaintAnalysisConfig(BaseModel):
     group_by_vulnerability: bool = True
     """When ``True``, results are grouped by vulnerability type in log output."""
 
+    disabled_builtin_sinks: List[str] = []
+    """Names of built-in CodeQL sink classes to suppress.
+
+    Each entry must match a ``class::SinkClass`` string from
+    ``TaintQueryGenerator.BUILTIN_SINKS`` (e.g. ``"PolynomialReDoS::Sink"``
+    or ``"CookieInjection::Sink"``).  Matching entries are skipped during
+    query generation so that specific vulnerability types can be excluded
+    without replacing the entire built-in sink set.
+    """
+
 
 @builder
 @msgpk
diff --git a/codeanalyzer/semantic_analysis/codeql/taint_query_generator.py b/codeanalyzer/semantic_analysis/codeql/taint_query_generator.py
index b392e78..054fe58 100644
--- a/codeanalyzer/semantic_analysis/codeql/taint_query_generator.py
+++ b/codeanalyzer/semantic_analysis/codeql/taint_query_generator.py
@@ -51,7 +51,6 @@
 from codeanalyzer.schema.py_schema import (
     TaintAnalysisConfig,
     TaintSourceConfig,
-    TaintSinkConfig,
     TaintSanitizerConfig,
 )
 
@@ -59,29 +58,31 @@
 class TaintQueryGenerator:
     """Generates CodeQL queries from taint analysis configuration."""
 
-    # Built-in CodeQL sink models always included in the generated query,
-    # regardless of user configuration. Each entry is (module::SinkClass, vulnerability_type).
-    BUILTIN_SINKS: List[tuple] = [
-        ("SqlInjection::Sink",              "SQL Injection"),
-        ("CommandInjection::Sink",          "Command Injection"),
-        ("CodeInjection::Sink",             "Code Injection"),
-        ("PathInjection::Sink",             "Path Traversal"),
-        ("ReflectedXss::Sink",              "Cross-Site Scripting (XSS)"),
-        ("LdapInjection::DnSink",           "LDAP Injection"),
-        ("LdapInjection::FilterSink",       "LDAP Injection"),
-        ("Xxe::Sink",                       "XML External Entity (XXE)"),
-        ("ServerSideRequestForgery::Sink",  "Server-Side Request Forgery (SSRF)"),
-        ("TemplateInjection::Sink",         "Server-Side Template Injection (SSTI)"),
-        ("UnsafeDeserialization::Sink",     "Unsafe Deserialization"),
-        ("UrlRedirect::Sink",               "Open Redirect"),
-        ("LogInjection::Sink",              "Log Injection"),
-        ("NoSqlInjection::StringSink",      "NoSQL Injection"),
-        ("NoSqlInjection::DictSink",        "NoSQL Injection"),
-        ("XpathInjection::Sink",            "XPath Injection"),
-        ("TarSlip::Sink",                   "Tar/Zip Slip"),
-        ("HttpHeaderInjection::Sink",       "HTTP Header Injection"),
-        ("CookieInjection::Sink",           "Cookie Injection"),
-        ("PolynomialReDoS::Sink",           "Regular Expression Injection (ReDoS)"),
+    # Built-in CodeQL sink models included in the generated query by default.
+    # Each dict has: class (CodeQL class expression), sink_type, severity,
+    # vulnerability_type, and comment (used as inline documentation in the query).
+    # Individual entries can be suppressed via TaintAnalysisConfig.disabled_builtin_sinks.
+    BUILTIN_SINKS: List[dict] = [
+        {"class": "SqlInjection::Sink",             "sink_type": "sql_execution",    "severity": "critical", "vulnerability_type": "SQL Injection",                          "comment": "sqlite3, psycopg2, SQLAlchemy, Django ORM raw, …"},
+        {"class": "CommandInjection::Sink",         "sink_type": "command_execution","severity": "critical", "vulnerability_type": "Command Injection",                      "comment": "subprocess.*, os.system, os.popen, …"},
+        {"class": "CodeInjection::Sink",            "sink_type": "code_execution",   "severity": "critical", "vulnerability_type": "Code Injection",                         "comment": "eval, exec, compile, …"},
+        {"class": "PathInjection::Sink",            "sink_type": "file_access",      "severity": "high",     "vulnerability_type": "Path Traversal",                         "comment": "open, os.path.*, pathlib.Path.open, …"},
+        {"class": "ReflectedXss::Sink",             "sink_type": "template_rendering","severity": "high",    "vulnerability_type": "Cross-Site Scripting (XSS)",             "comment": "Flask/Django template rendering, …"},
+        {"class": "LdapInjection::DnSink",          "sink_type": "ldap_query",       "severity": "high",     "vulnerability_type": "LDAP Injection",                         "comment": "LDAP DN component"},
+        {"class": "LdapInjection::FilterSink",      "sink_type": "ldap_query",       "severity": "high",     "vulnerability_type": "LDAP Injection",                         "comment": "LDAP filter component"},
+        {"class": "Xxe::Sink",                      "sink_type": "xml_parsing",      "severity": "high",     "vulnerability_type": "XML External Entity (XXE)",              "comment": "XML parsers with external entity expansion"},
+        {"class": "ServerSideRequestForgery::Sink", "sink_type": "ssrf_request",     "severity": "high",     "vulnerability_type": "Server-Side Request Forgery (SSRF)",     "comment": "outbound HTTP requests with user-controlled URL"},
+        {"class": "TemplateInjection::Sink",        "sink_type": "template_rendering","severity": "critical","vulnerability_type": "Server-Side Template Injection (SSTI)",  "comment": "render_template_string, Jinja2 Environment.from_string, …"},
+        {"class": "UnsafeDeserialization::Sink",    "sink_type": "deserialization",  "severity": "critical", "vulnerability_type": "Unsafe Deserialization",                 "comment": "pickle.loads, yaml.load, …"},
+        {"class": "UrlRedirect::Sink",              "sink_type": "url_redirect",     "severity": "medium",   "vulnerability_type": "Open Redirect",                          "comment": "redirect(), HttpResponseRedirect, …"},
+        {"class": "LogInjection::Sink",             "sink_type": "log_output",       "severity": "medium",   "vulnerability_type": "Log Injection",                          "comment": "logging.*, structlog, …"},
+        {"class": "NoSqlInjection::StringSink",     "sink_type": "nosql_query",      "severity": "high",     "vulnerability_type": "NoSQL Injection",                        "comment": "MongoDB/Redis string queries"},
+        {"class": "NoSqlInjection::DictSink",       "sink_type": "nosql_query",      "severity": "high",     "vulnerability_type": "NoSQL Injection",                        "comment": "MongoDB dict/object queries"},
+        {"class": "XpathInjection::Sink",           "sink_type": "xpath_query",      "severity": "high",     "vulnerability_type": "XPath Injection",                        "comment": "lxml, ElementTree XPath expressions"},
+        {"class": "TarSlip::Sink",                  "sink_type": "file_access",      "severity": "high",     "vulnerability_type": "Tar/Zip Slip",                           "comment": "tarfile.extract, zipfile.extractall, …"},
+        {"class": "HttpHeaderInjection::Sink",      "sink_type": "http_header",      "severity": "medium",   "vulnerability_type": "HTTP Header Injection",                  "comment": "Response.headers, …"},
+        {"class": "CookieInjection::Sink",          "sink_type": "cookie_write",     "severity": "medium",   "vulnerability_type": "Cookie Injection",                       "comment": "set_cookie, …"},
+        {"class": "PolynomialReDoS::Sink",          "sink_type": "regex_execution",  "severity": "medium",   "vulnerability_type": "Regular Expression Injection (ReDoS)",   "comment": "re.match/search/fullmatch with user-supplied pattern"},
     ]
 
     @classmethod
@@ -89,6 +90,11 @@ def builtin_sink_count(cls) -> int:
         """Number of built-in CodeQL sink models always active in the generated query."""
         return len(cls.BUILTIN_SINKS)
 
+    @classmethod
+    def builtin_sink_names(cls) -> List[str]:
+        """All built-in sink class names (usable in ``disabled_builtin_sinks``)."""
+        return [s["class"] for s in cls.BUILTIN_SINKS]
+
     @staticmethod
     def generate_query(config: TaintAnalysisConfig) -> str:
         """Generate complete taint analysis CodeQL query from configuration.
@@ -108,7 +114,7 @@ def generate_query(config: TaintAnalysisConfig) -> str:
         query_parts.append(TaintQueryGenerator._generate_header())
         query_parts.append(TaintQueryGenerator._generate_imports())
         query_parts.append(TaintQueryGenerator._generate_source_predicate(config.sources))
-        query_parts.append(TaintQueryGenerator._generate_sink_predicate(config.sinks))
+        query_parts.append(TaintQueryGenerator._generate_sink_predicate(config))
 
         if config.sanitizers:
             query_parts.append(TaintQueryGenerator._generate_sanitizer_predicate(config.sanitizers))
@@ -262,105 +268,30 @@ def _generate_source_predicate(sources: List[TaintSourceConfig]) -> str:
         lines.append("}")
         return "\n".join(lines)
 
-    @staticmethod
-    def _generate_sink_predicate(sinks: List[TaintSinkConfig]) -> str:
+    @classmethod
+    def _generate_sink_predicate(cls, config: "TaintAnalysisConfig") -> str:
         """Generate isSink predicate combining built-in security sinks with
         any user-configured sinks.
 
-        Built-in sink classes from ``codeql/python-all`` cover:
-        - ``SqlInjection::Sink``   — sqlite3, psycopg2, mysql-connector,
-                                     SQLAlchemy, Django ORM raw queries, …
-        - ``CommandInjection::Sink`` — subprocess.*, os.system, os.popen, …
-        - ``CodeInjection::Sink``  — eval(), exec(), compile(), …
-        - ``PathTraversal::Sink``  — open(), os.path.*, pathlib.Path.open(), …
-        - ``XSS::Sink``            — Flask/Django template rendering, …
-
-        User-configured patterns extend this with project-specific sinks.
+        Built-in sinks are driven by ``BUILTIN_SINKS``; any whose ``class``
+        appears in ``config.disabled_builtin_sinks`` are omitted.
+        User-configured patterns in ``config.sinks`` are appended afterward.
         """
+        disabled = set(config.disabled_builtin_sinks)
+        active_builtins = [s for s in cls.BUILTIN_SINKS if s["class"] not in disabled]
+
         lines = [
             "predicate isConfiguredSink(DataFlow::Node node, string sinkType, string severity, string vulnerabilityType) {",
-            "  // Built-in: SQL injection sinks (sqlite3, psycopg2, SQLAlchemy, Django ORM raw, …)",
-            "  (node instanceof SqlInjection::Sink and",
-            "   sinkType = \"sql_execution\" and severity = \"critical\" and vulnerabilityType = \"SQL Injection\")",
-            "  or",
-            "  // Built-in: Command injection sinks (subprocess.*, os.system, os.popen, …)",
-            "  (node instanceof CommandInjection::Sink and",
-            "   sinkType = \"command_execution\" and severity = \"critical\" and vulnerabilityType = \"Command Injection\")",
-            "  or",
-            "  // Built-in: Code injection sinks (eval, exec, compile, …)",
-            "  (node instanceof CodeInjection::Sink and",
-            "   sinkType = \"code_execution\" and severity = \"critical\" and vulnerabilityType = \"Code Injection\")",
-            "  or",
-            "  // Built-in: Path injection sinks (open, os.path.*, pathlib.Path.open, …)",
-            "  (node instanceof PathInjection::Sink and",
-            "   sinkType = \"file_access\" and severity = \"high\" and vulnerabilityType = \"Path Traversal\")",
-            "  or",
-            "  // Built-in: Reflected XSS sinks (Flask/Django template rendering, …)",
-            "  (node instanceof ReflectedXss::Sink and",
-            "   sinkType = \"template_rendering\" and severity = \"high\" and vulnerabilityType = \"Cross-Site Scripting (XSS)\")",
-            "  or",
-            "  // Built-in: LDAP injection — DN component",
-            "  (node instanceof LdapInjection::DnSink and",
-            "   sinkType = \"ldap_query\" and severity = \"high\" and vulnerabilityType = \"LDAP Injection\")",
-            "  or",
-            "  // Built-in: LDAP injection — filter component",
-            "  (node instanceof LdapInjection::FilterSink and",
-            "   sinkType = \"ldap_query\" and severity = \"high\" and vulnerabilityType = \"LDAP Injection\")",
-            "  or",
-            "  // Built-in: XML External Entity (XXE) injection",
-            "  (node instanceof Xxe::Sink and",
-            "   sinkType = \"xml_parsing\" and severity = \"high\" and vulnerabilityType = \"XML External Entity (XXE)\")",
-            "  or",
-            "  // Built-in: Server-Side Request Forgery (SSRF)",
-            "  (node instanceof ServerSideRequestForgery::Sink and",
-            "   sinkType = \"ssrf_request\" and severity = \"high\" and vulnerabilityType = \"Server-Side Request Forgery (SSRF)\")",
-            "  or",
-            "  // Built-in: Server-Side Template Injection (SSTI)",
-            "  (node instanceof TemplateInjection::Sink and",
-            "   sinkType = \"template_rendering\" and severity = \"critical\" and vulnerabilityType = \"Server-Side Template Injection (SSTI)\")",
-            "  or",
-            "  // Built-in: Unsafe Deserialization (pickle, yaml.load, …)",
-            "  (node instanceof UnsafeDeserialization::Sink and",
-            "   sinkType = \"deserialization\" and severity = \"critical\" and vulnerabilityType = \"Unsafe Deserialization\")",
-            "  or",
-            "  // Built-in: Open Redirect",
-            "  (node instanceof UrlRedirect::Sink and",
-            "   sinkType = \"url_redirect\" and severity = \"medium\" and vulnerabilityType = \"Open Redirect\")",
-            "  or",
-            "  // Built-in: Log Injection",
-            "  (node instanceof LogInjection::Sink and",
-            "   sinkType = \"log_output\" and severity = \"medium\" and vulnerabilityType = \"Log Injection\")",
-            "  or",
-            "  // Built-in: NoSQL Injection — string payload",
-            "  (node instanceof NoSqlInjection::StringSink and",
-            "   sinkType = \"nosql_query\" and severity = \"high\" and vulnerabilityType = \"NoSQL Injection\")",
-            "  or",
-            "  // Built-in: NoSQL Injection — dictionary/object payload",
-            "  (node instanceof NoSqlInjection::DictSink and",
-            "   sinkType = \"nosql_query\" and severity = \"high\" and vulnerabilityType = \"NoSQL Injection\")",
-            "  or",
-            "  // Built-in: XPath Injection",
-            "  (node instanceof XpathInjection::Sink and",
-            "   sinkType = \"xpath_query\" and severity = \"high\" and vulnerabilityType = \"XPath Injection\")",
-            "  or",
-            "  // Built-in: Tar/Zip Slip (path traversal via archive extraction)",
-            "  (node instanceof TarSlip::Sink and",
-            "   sinkType = \"file_access\" and severity = \"high\" and vulnerabilityType = \"Tar/Zip Slip\")",
-            "  or",
-            "  // Built-in: HTTP Header Injection",
-            "  (node instanceof HttpHeaderInjection::Sink and",
-            "   sinkType = \"http_header\" and severity = \"medium\" and vulnerabilityType = \"HTTP Header Injection\")",
-            "  or",
-            "  // Built-in: Cookie Injection",
-            "  (node instanceof CookieInjection::Sink and",
-            "   sinkType = \"cookie_write\" and severity = \"medium\" and vulnerabilityType = \"Cookie Injection\")",
-            "  or",
-            "  // Built-in: Regular Expression Injection / Polynomial ReDoS",
-            "  (node instanceof PolynomialReDoS::Sink and",
-            "   sinkType = \"regex_execution\" and severity = \"medium\" and vulnerabilityType = \"Regular Expression Injection (ReDoS)\")",
         ]
 
-        for sink in sinks:
+        for i, sink in enumerate(active_builtins):
+            if i > 0:
+                lines.append("  or")
+            lines.append(f"  // Built-in: {sink['vulnerability_type']} ({sink['comment']})")
+            lines.append(f"  (node instanceof {sink['class']} and")
+            lines.append(f"   sinkType = \"{sink['sink_type']}\" and severity = \"{sink['severity']}\" and vulnerabilityType = \"{sink['vulnerability_type']}\")")
+
+        for sink in config.sinks:
             lines.append("  or")
             lines.append(f"  // User-configured: {sink.description}")
 
diff --git a/examples/taint-config.example.yaml b/examples/taint-config.example.yaml
new file mode 100644
index 0000000..b2c23fe
--- /dev/null
+++ b/examples/taint-config.example.yaml
@@ -0,0 +1,137 @@
+# Taint analysis configuration for codeanalyzer --analysis-level 3
+#
+# Usage modes (controlled by --taint-defaults / --no-taint-defaults):
+#
+#   Defaults only (no --taint-config):
+#     codeanalyzer -i ./myproject -a 3 --codeql
+#
+#   Extend defaults with custom config (union):
+#     codeanalyzer -i ./myproject -a 3 --codeql --taint-config taint-config.yaml
+#
+#   Custom config only (replace all defaults):
+#     codeanalyzer -i ./myproject -a 3 --codeql --taint-config taint-config.yaml --no-taint-defaults
+#
+# All three sections (sources, sinks, sanitizers) are optional.
+# Omitted sections default to empty lists — the built-in CodeQL models
+# (RemoteFlowSource, SqlInjection::Sink, CommandInjection::Sink, …) are
+# always active unless explicitly suppressed via disabled_builtin_sinks.
+
+# ---------------------------------------------------------------------------
+# Global options
+# ---------------------------------------------------------------------------
+max_path_length: 10          # Maximum taint-path steps reported (default: 10)
+confidence_threshold: medium # Minimum confidence to include: high | medium | low
+group_by_vulnerability: true # Group log output by vulnerability type
+
+# ---------------------------------------------------------------------------
+# Suppress specific built-in CodeQL sink models
+# ---------------------------------------------------------------------------
+# Useful for noisy or irrelevant vulnerability types.
+# Full list: run `python -c "from codeanalyzer.semantic_analysis.codeql.taint_query_generator import TaintQueryGenerator; print(*TaintQueryGenerator.builtin_sink_names(), sep='\n')"`
+#
+# disabled_builtin_sinks:
+#   - PolynomialReDoS::Sink        # very noisy on regex-heavy codebases
+#   - CookieInjection::Sink
+
+# ---------------------------------------------------------------------------
+# Additional taint sources (extends or replaces RemoteFlowSource)
+# ---------------------------------------------------------------------------
+# Pattern must be a valid CodeQL API-graph expression evaluating to a
+# DataFlow::Node.  Use double quotes inside the pattern — CodeQL does not
+# support single-quoted strings.
+#
+sources:
+  # Environment variables (e.g. config loaded from os.environ)
+  - name: env_var
+    description: "os.environ and os.getenv calls"
+    pattern: 'API::moduleImport("os").getMember("environ").asSource()'
+    source_type: environment_variable
+    enabled: true
+
+  # CLI arguments (sys.argv)
+  - name: sys_argv
+    description: "sys.argv command-line arguments"
+    pattern: 'API::moduleImport("sys").getMember("argv").asSource()'
+    source_type: cli_argument
+    enabled: true
+
+  # Standard input
+  - name: builtin_input
+    description: "input() built-in"
+    pattern: 'API::builtin("input").getACall()'
+    source_type: user_input
+    enabled: true
+
+  # Disable one of the above without removing it:
+  # - name: some_source
+  #   ...
+  #   enabled: false
+
+# ---------------------------------------------------------------------------
+# Additional taint sinks (supplements built-in CodeQL sinks)
+# ---------------------------------------------------------------------------
+# Built-in sinks (SQL, command, path traversal, XSS, SSTI, SSRF, …) are
+# always active.  Add entries here for project-specific APIs not covered
+# by CodeQL's model library.
+#
+sinks:
+  # Project-specific DB wrapper
+  - name: custom_db_execute
+    description: "Internal db.execute() wrapper"
+    pattern: 'API::moduleImport("myapp.db").getMember("execute").getACall()'
+    sink_type: sql_execution
+    vulnerability_type: SQL Injection
+    severity: critical
+    argument_index: 0   # Only the first argument (the query string) is the sink
+
+  # Custom HTTP client
+  - name: internal_http_get
+    description: "Internal HTTP client get()"
+    pattern: 'API::moduleImport("myapp.http").getMember("get").getACall()'
+    sink_type: ssrf_request
+    vulnerability_type: Server-Side Request Forgery (SSRF)
+    severity: high
+    # argument_index omitted → any tainted argument triggers the sink
+
+# ---------------------------------------------------------------------------
+# Sanitizers (blocks taint propagation through the matching node)
+# ---------------------------------------------------------------------------
+# All enabled sanitizers unconditionally block all taint flows passing
+# through them.  The `sanitizes` list is informational documentation only.
+#
+sanitizers:
+  # HTML escaping
+  - name: html_escape
+    description: "html.escape() neutralises XSS"
+    pattern: 'API::moduleImport("html").getMember("escape").getACall()'
+    sanitizes: [xss, template_injection]
+    enabled: true
+
+  # Shell quoting
+  - name: shlex_quote
+    description: "shlex.quote() neutralises command injection"
+    pattern: 'API::moduleImport("shlex").getMember("quote").getACall()'
+    sanitizes: [command_injection]
+    enabled: true
+
+  # SQL parameter binding via parameterised query (cursor.execute with args)
+  # Note: prefer argument_index on the sink side for this; sanitizer shown
+  # here is illustrative.
+  - name: bleach_clean
+    description: "bleach.clean() HTML sanitiser"
+    pattern: 'API::moduleImport("bleach").getMember("clean").getACall()'
+    sanitizes: [xss]
+    enabled: true
+
+# ---------------------------------------------------------------------------
+# Exclusions
+# ---------------------------------------------------------------------------
+# exclude_files: glob patterns relative to the project root
+# exclude_functions: fully-qualified function names to exclude as sources/sinks
+#
+# exclude_files:
+#   - "tests/**"
+#   - "**/*_test.py"
+#
+# exclude_functions:
+#   - myapp.utils.sanitize_input
diff --git a/test/test_taint_analysis.py b/test/test_taint_analysis.py
index 6b48e2c..853ac69 100644
--- a/test/test_taint_analysis.py
+++ b/test/test_taint_analysis.py
@@ -341,6 +341,212 @@ def test_disabled_sources_and_sinks(self, sql_injection_app, tmp_path):
         assert len(filtered_config.sinks) == 0
 
 
+# ============================================================================
+# Extensibility mechanism unit tests (no CodeQL required)
+# ============================================================================
+
+class TestTaintConfigExtensibility:
+    """Tests for the taint config extensibility mechanism: merge, disabled sinks,
+    use_defaults, and validate_config integration."""
+
+    # ------------------------------------------------------------------
+    # Scalar merge correctness
+    # ------------------------------------------------------------------
+
+    def test_merge_scalars_custom_wins(self):
+        """Custom config scalars always override base — was broken before fix."""
+        from codeanalyzer.schema.py_schema import TaintAnalysisConfig
+        base = TaintAnalysisConfig(max_path_length=15, group_by_vulnerability=False, confidence_threshold="low")
+        custom = TaintAnalysisConfig(max_path_length=5, group_by_vulnerability=True, confidence_threshold="high")
+        merged = TaintConfigLoader._merge_configs(base, custom)
+        assert merged.max_path_length == 5
+        assert merged.group_by_vulnerability is True
+        assert merged.confidence_threshold == "high"
+
+    def test_merge_scalars_custom_default_value_still_wins(self):
+        """Custom config with value == schema default (e.g. max_path_length=10) must win.
+        Previously a sentinel comparison '!= 10' silently ignored this case."""
+        from codeanalyzer.schema.py_schema import TaintAnalysisConfig
+        base = TaintAnalysisConfig(max_path_length=20, confidence_threshold="low")
+        custom = TaintAnalysisConfig(max_path_length=10, confidence_threshold="medium")
+        merged = TaintConfigLoader._merge_configs(base, custom)
+        assert merged.max_path_length == 10, "max_path_length=10 must not be silently discarded"
+        assert merged.confidence_threshold == "medium", "confidence_threshold='medium' must not be silently discarded"
+
+    def test_merge_additive_booleans(self):
+        """include_implicit_flows and include_safe_flows use OR (enabling is additive)."""
+        from codeanalyzer.schema.py_schema import TaintAnalysisConfig
+        base = TaintAnalysisConfig(include_implicit_flows=True, include_safe_flows=False)
+        custom = TaintAnalysisConfig(include_implicit_flows=False, include_safe_flows=True)
+        merged = TaintConfigLoader._merge_configs(base, custom)
+        assert merged.include_implicit_flows is True   # OR(True, False)
+        assert merged.include_safe_flows is True        # OR(False, True)
+
+    def test_merge_exclude_lists_combined(self):
+        """exclude_files and exclude_functions are unioned across base and custom."""
+        from codeanalyzer.schema.py_schema import TaintAnalysisConfig
+        base = TaintAnalysisConfig(exclude_files=["tests/**"], exclude_functions=["myapp.utils.safe"])
+        custom = TaintAnalysisConfig(exclude_files=["vendor/**"], exclude_functions=["myapp.debug.dump"])
+        merged = TaintConfigLoader._merge_configs(base, custom)
+        assert "tests/**" in merged.exclude_files
+        assert "vendor/**" in merged.exclude_files
+        assert "myapp.utils.safe" in merged.exclude_functions
+        assert "myapp.debug.dump" in merged.exclude_functions
+
+    # ------------------------------------------------------------------
+    # disabled_builtin_sinks
+    # ------------------------------------------------------------------
+
+    def test_disabled_builtin_sinks_removes_from_query(self):
+        """Sinks listed in disabled_builtin_sinks must not appear in generated query."""
+        from codeanalyzer.schema.py_schema import TaintAnalysisConfig
+        from codeanalyzer.semantic_analysis.codeql.taint_query_generator import TaintQueryGenerator
+        config = TaintAnalysisConfig(disabled_builtin_sinks=["PolynomialReDoS::Sink", "CookieInjection::Sink"])
+        query = TaintQueryGenerator.generate_query(config)
+        assert "PolynomialReDoS::Sink" not in query
+        assert "CookieInjection::Sink" not in query
+        assert "SqlInjection::Sink" in query  # others remain
+
+    def test_disabled_builtin_sinks_empty_keeps_all(self):
+        """Empty disabled_builtin_sinks list keeps all 20 built-in sinks in query."""
+        from codeanalyzer.schema.py_schema import TaintAnalysisConfig
+        from codeanalyzer.semantic_analysis.codeql.taint_query_generator import TaintQueryGenerator
+        config = TaintAnalysisConfig()
+        query = TaintQueryGenerator.generate_query(config)
+        for name in TaintQueryGenerator.builtin_sink_names():
+            assert name in query, f"Expected {name} in query with no disabled sinks"
+
+    def test_disabled_builtin_sinks_merged_from_both_sides(self):
+        """disabled_builtin_sinks from base and custom are unioned on merge."""
+        from codeanalyzer.schema.py_schema import TaintAnalysisConfig
+        base = TaintAnalysisConfig(disabled_builtin_sinks=["CookieInjection::Sink"])
+        custom = TaintAnalysisConfig(disabled_builtin_sinks=["PolynomialReDoS::Sink"])
+        merged = TaintConfigLoader._merge_configs(base, custom)
+        assert "CookieInjection::Sink" in merged.disabled_builtin_sinks
+        assert "PolynomialReDoS::Sink" in merged.disabled_builtin_sinks
+
+    def test_disabled_builtin_sinks_survives_filter_disabled(self):
+        """_filter_disabled must carry disabled_builtin_sinks through unchanged."""
+        from codeanalyzer.schema.py_schema import TaintAnalysisConfig
+        config = TaintAnalysisConfig(disabled_builtin_sinks=["TarSlip::Sink"])
+        filtered = TaintConfigLoader._filter_disabled(config)
+        assert "TarSlip::Sink" in filtered.disabled_builtin_sinks
+
+    def test_disabled_builtin_sinks_from_yaml(self, tmp_path):
+        """disabled_builtin_sinks loaded from YAML file is honoured in query."""
+        from codeanalyzer.semantic_analysis.codeql.taint_query_generator import TaintQueryGenerator
+        yaml_content = """
+disabled_builtin_sinks:
+  - PolynomialReDoS::Sink
+  - HttpHeaderInjection::Sink
+sources: []
+sinks: []
+sanitizers: []
+"""
+        config_file = tmp_path / "cfg.yaml"
+        config_file.write_text(yaml_content)
+        config = TaintConfigLoader.load_config(config_file, use_defaults=False)
+        assert "PolynomialReDoS::Sink" in config.disabled_builtin_sinks
+        query = TaintQueryGenerator.generate_query(config)
+        assert "PolynomialReDoS::Sink" not in query
+        assert "HttpHeaderInjection::Sink" not in query
+
+    # ------------------------------------------------------------------
+    # use_defaults flag / three modes
+    # ------------------------------------------------------------------
+
+    def test_use_defaults_false_no_custom_gives_empty_config(self):
+        """use_defaults=False with no config_path produces empty sources/sinks/sanitizers."""
+        config = TaintConfigLoader.load_config(use_defaults=False)
+        assert len(config.sources) == 0
+        assert len(config.sinks) == 0
+        assert len(config.sanitizers) == 0
+
+    def test_use_defaults_true_gives_default_sources(self):
+        """use_defaults=True (default) loads default sources and sanitizers."""
+        config = TaintConfigLoader.load_config(use_defaults=True)
+        assert len(config.sources) > 0
+        assert len(config.sanitizers) > 0
+
+    def test_use_defaults_false_with_custom_config_is_custom_only(self, tmp_path):
+        """Mode 2: --no-taint-defaults → only custom sources/sinks, no defaults."""
+        yaml_content = """
+sources:
+  - name: only_source
+    description: "Only this source"
+    pattern: 'API::builtin("input").getACall()'
+    source_type: user_input
+    enabled: true
+sinks: []
+sanitizers: []
+"""
+        config_file = tmp_path / "custom_only.yaml"
+        config_file.write_text(yaml_content)
+        config = TaintConfigLoader.load_config(config_file, use_defaults=False)
+        assert len(config.sources) == 1
+        assert config.sources[0].name == "only_source"
+
+    def test_use_defaults_true_with_custom_config_is_union(self, tmp_path):
+        """Mode 3: --taint-defaults + --taint-config → union of defaults and custom."""
+        yaml_content = """
+sources:
+  - name: extra_source
+    description: "Additional source"
+    pattern: 'API::builtin("input").getACall()'
+    source_type: user_input
+    enabled: true
+sinks: []
+sanitizers: []
+"""
+        config_file = tmp_path / "extra.yaml"
+        config_file.write_text(yaml_content)
+        config = TaintConfigLoader.load_config(config_file, use_defaults=True)
+        names = [s.name for s in config.sources]
+        assert "extra_source" in names
+        assert len(config.sources) > 1  # defaults present too
+
+    # ------------------------------------------------------------------
+    # validate_config integration
+    # ------------------------------------------------------------------
+
+    def test_validate_config_warns_no_sources(self):
+        """validate_config returns an issue when no sources are configured."""
+        from codeanalyzer.schema.py_schema import TaintAnalysisConfig
+        config = TaintAnalysisConfig(sources=[], sinks=[], sanitizers=[])
+        issues = TaintConfigLoader.validate_config(config)
+        assert any("No taint sources" in i for i in issues)
+
+    def test_validate_config_returns_issues_for_empty_pattern(self):
+        """validate_config catches empty pattern strings."""
+        from codeanalyzer.schema.py_schema import TaintAnalysisConfig, TaintSourceConfig
+        config = TaintAnalysisConfig(
+            sources=[TaintSourceConfig(name="bad", description="d", pattern="   ", source_type="t")]
+        )
+        issues = TaintConfigLoader.validate_config(config)
+        assert any("Empty pattern" in i for i in issues)
+
+    def test_validate_config_returns_issues_for_duplicates(self):
+        """validate_config catches duplicate source names."""
+        from codeanalyzer.schema.py_schema import TaintAnalysisConfig, TaintSourceConfig
+        src = TaintSourceConfig(name="dup", description="d", pattern="API::builtin(\"x\")", source_type="t")
+        config = TaintAnalysisConfig(sources=[src, src])
+        issues = TaintConfigLoader.validate_config(config)
+        assert any("Duplicate" in i for i in issues)
+
+    # ------------------------------------------------------------------
+    # builtin_sink_names helper
+    # ------------------------------------------------------------------
+
+    def test_builtin_sink_names_complete(self):
+        """builtin_sink_names() returns exactly 20 entries matching BUILTIN_SINKS."""
+        from codeanalyzer.semantic_analysis.codeql.taint_query_generator import TaintQueryGenerator
+        names = TaintQueryGenerator.builtin_sink_names()
+        assert len(names) == TaintQueryGenerator.builtin_sink_count()
+        assert "SqlInjection::Sink" in names
+        assert "UnsafeDeserialization::Sink" in names
+        assert "TemplateInjection::Sink" in names
+
+
 # ============================================================================
 # Integration Tests (require CodeQL databases)
 # ============================================================================

From d0d15689cf6039652bc0e1c6897d3203e1147540 Mon Sep 17 00:00:00 2001
From: Saurabh Sinha <sinha108@gmail.com>
Date: Wed, 20 May 2026 16:54:39 -0400
Subject: [PATCH 4/4] Add test case with taint config in json format; add user
 guide

Signed-off-by: Saurabh Sinha <sinha108@gmail.com>
---
 docs/TAINT_ANALYSIS_USER_GUIDE.md | 504 ++++++++++++++++++++++++++++++
 test/test_taint_analysis.py       |  37 +++
 2 files changed, 541 insertions(+)
 create mode 100644 docs/TAINT_ANALYSIS_USER_GUIDE.md

diff --git a/docs/TAINT_ANALYSIS_USER_GUIDE.md b/docs/TAINT_ANALYSIS_USER_GUIDE.md
new file mode 100644
index 0000000..6bdb8d9
--- /dev/null
+++ b/docs/TAINT_ANALYSIS_USER_GUIDE.md
@@ -0,0 +1,504 @@
+# Taint Analysis User Guide
+
+Taint analysis (analysis level 3) tracks untrusted data from entry points
+(**sources**) through the application to dangerous call sites (**sinks**),
+reporting each path as a security vulnerability. It is powered by CodeQL and
+requires the CodeQL CLI to be installed.
+
+---
+
+## Table of Contents
+
+1. [Quick start](#quick-start)
+2. [How it works](#how-it-works)
+3. [Built-in coverage](#built-in-coverage)
+4. [Configuration modes](#configuration-modes)
+5. [Configuration file reference](#configuration-file-reference)
+6. [Writing patterns](#writing-patterns)
+7. [Output format](#output-format)
+8. [Programmatic API](#programmatic-api)
+9. [Troubleshooting](#troubleshooting)
+
+---
+
+## Quick start
+
+```bash
+# Analyse a project with all built-in defaults
+codeanalyzer -i ./myproject -a 3 --codeql
+
+# Extend defaults with project-specific sources/sinks
+codeanalyzer -i ./myproject -a 3 --codeql --taint-config taint.yaml
+
+# Use only your own config, no built-in defaults
+codeanalyzer -i ./myproject -a 3 --codeql --taint-config taint.yaml --no-taint-defaults
+```
+
+---
+
+## How it works
+
+The analysis generates a CodeQL query from three layers:
+
+1. **Built-in sources** — CodeQL's `RemoteFlowSource` class, which
+   automatically recognises all web-framework request inputs (Flask, Django,
+   FastAPI, aiohttp, Tornado, …) without any manual configuration.
+
+2. **Supplementary sources** — Additional sources provided by the default
+   configuration or your custom config file (e.g. `sys.argv`, `input()`,
+   environment variables).
+
+3. **Sinks** — Two complementary layers:
+   - *Built-in CodeQL sinks* — 20 vulnerability-specific sink classes
+     (SQL, command injection, path traversal, XSS, SSRF, SSTI, …) that
+     cover hundreds of framework APIs automatically. These are **always
+     active** unless explicitly suppressed with `disabled_builtin_sinks`.
+   - *User-defined sinks* — Project-specific APIs added via config file.
+
+4. **Sanitizers** — Call sites that block taint propagation (HTML escape,
+   shell quoting, path normalisation, …).
+
+---
+
+## Built-in coverage
+
+### Default sources (always active)
+
+| Name | What it matches | Source type |
+|---|---|---|
+| `RemoteFlowSource` (CodeQL) | All web-framework request inputs | `web_request` |
+| `command_line_args` | `sys.argv` | `command_line_argument` |
+| `user_input` | `input()` | `user_input` |
+| `env_getenv` | `os.getenv()` | `environment_variable` |
+| `env_environ_get` | `os.environ.get()` | `environment_variable` |
+| `requests_get_response` | `requests.get().text` | `http_response` |
+| `requests_post_response` | `requests.post().text` | `http_response` |
+
+### Built-in sinks (always active, 20 total)
+
+| CodeQL class | Vulnerability type | Severity |
+|---|---|---|
+| `SqlInjection::Sink` | SQL Injection | critical |
+| `CommandInjection::Sink` | Command Injection | critical |
+| `CodeInjection::Sink` | Code Injection | critical |
+| `TemplateInjection::Sink` | Server-Side Template Injection (SSTI) | critical |
+| `UnsafeDeserialization::Sink` | Unsafe Deserialization | critical |
+| `PathInjection::Sink` | Path Traversal | high |
+| `ReflectedXss::Sink` | Cross-Site Scripting (XSS) | high |
+| `LdapInjection::DnSink` | LDAP Injection | high |
+| `LdapInjection::FilterSink` | LDAP Injection | high |
+| `Xxe::Sink` | XML External Entity (XXE) | high |
+| `ServerSideRequestForgery::Sink` | Server-Side Request Forgery (SSRF) | high |
+| `NoSqlInjection::StringSink` | NoSQL Injection | high |
+| `NoSqlInjection::DictSink` | NoSQL Injection | high |
+| `XpathInjection::Sink` | XPath Injection | high |
+| `TarSlip::Sink` | Tar/Zip Slip | high |
+| `UrlRedirect::Sink` | Open Redirect | medium |
+| `LogInjection::Sink` | Log Injection | medium |
+| `HttpHeaderInjection::Sink` | HTTP Header Injection | medium |
+| `CookieInjection::Sink` | Cookie Injection | medium |
+| `PolynomialReDoS::Sink` | Regular Expression Injection (ReDoS) | medium |
+
+### Default sanitizers (always active)
+
+| Name | What it matches |
+|---|---|
+| `html_escape` | `html.escape()` |
+| `markupsafe_escape` | `markupsafe.escape()` |
+| `shlex_quote` | `shlex.quote()` |
+| `os_path_normpath` | `os.path.normpath()` |
+| `os_path_abspath` | `os.path.abspath()` |
+| `pathlib_resolve` | `pathlib.Path.resolve()` |
+
+---
+
+## Configuration modes
+
+| Invocation | What is active |
+|---|---|
+| No `--taint-config` | Built-in defaults only |
+| `--taint-config file.yaml` | Defaults **extended** with `file.yaml` (union) |
+| `--taint-config file.yaml --no-taint-defaults` | `file.yaml` only, no defaults |
+
+The third mode lets you constrain the analysis to a specific set of
+sources/sinks — for example, when tuning for a particular project or auditing
+a single vulnerability class.
+
+---
+
+## Configuration file reference
+
+Configuration files can be YAML (`.yaml` / `.yml`) or JSON (`.json`).
+All three top-level sections are optional; omit any section to inherit the
+defaults for it (when `--taint-defaults` is active).
+
+```yaml
+# Optional global settings
+max_path_length: 10           # Maximum taint-path steps (default: 10)
+confidence_threshold: medium  # high | medium | low (default: medium)
+group_by_vulnerability: true  # Group log output by type (default: true)
+
+# Suppress specific built-in CodeQL sinks (see list above)
+disabled_builtin_sinks: []
+
+# Exclude files / functions from analysis
+exclude_files: []             # Glob patterns relative to project root
+exclude_functions: []         # Fully-qualified function names
+
+# Additional sources, sinks, sanitizers (see sections below)
+sources: []
+sinks: []
+sanitizers: []
+```
+
+### `sources[]`
+
+| Field | Type | Required | Description |
+|---|---|---|---|
+| `name` | string | yes | Unique identifier used in logs and deduplication |
+| `description` | string | yes | Human-readable explanation |
+| `pattern` | string | yes | CodeQL API-graph expression (see [Writing patterns](#writing-patterns)) |
+| `source_type` | string | yes | Label propagated to `PyTaintSource.source_type` in results |
+| `enabled` | bool | no | Default `true`; set `false` to temporarily disable |
+
+```yaml
+sources:
+  - name: redis_get
+    description: "Values retrieved from Redis"
+    pattern: 'API::moduleImport("redis").getMember("Redis").getInstance().getMember("get").getReturn()'
+    source_type: cache_read
+```
+
+### `sinks[]`
+
+| Field | Type | Required | Description |
+|---|---|---|---|
+| `name` | string | yes | Unique identifier |
+| `description` | string | yes | Human-readable explanation |
+| `pattern` | string | yes | CodeQL API-graph expression |
+| `sink_type` | string | yes | Label propagated to `PyTaintSink.sink_type` in results |
+| `vulnerability_type` | string | yes | Vulnerability name reported in results |
+| `severity` | string | yes | `critical` \| `high` \| `medium` \| `low` |
+| `argument_index` | int | no | Zero-based index of the dangerous argument. When omitted, any tainted argument triggers the sink. |
+| `enabled` | bool | no | Default `true` |
+
+```yaml
+sinks:
+  - name: internal_db_query
+    description: "Internal database wrapper"
+    pattern: 'API::moduleImport("myapp.db").getMember("query").getACall()'
+    sink_type: sql_execution
+    vulnerability_type: SQL Injection
+    severity: critical
+    argument_index: 0   # Only the first argument (the query string) matters
+```
+
+Use `argument_index` to avoid false positives when only one specific argument
+of a multi-argument call is dangerous. For example, `cursor.execute(query,
+params)` — only `query` (index `0`) should be treated as the sink, not
+`params`.
+
+### `sanitizers[]`
+
+| Field | Type | Required | Description |
+|---|---|---|---|
+| `name` | string | yes | Unique identifier |
+| `description` | string | yes | Human-readable explanation |
+| `pattern` | string | yes | CodeQL API-graph expression |
+| `sanitizes` | list[string] | no | Informational list of mitigated vulnerability types (not used by the query engine) |
+| `enabled` | bool | no | Default `true` |
+
+```yaml
+sanitizers:
+  - name: bleach_clean
+    description: "bleach.clean() HTML sanitiser"
+    pattern: 'API::moduleImport("bleach").getMember("clean").getACall()'
+    sanitizes: [xss]
+```
+
+> **Note:** All enabled sanitizers unconditionally block **all** taint flows
+> passing through them. The `sanitizes` field is documentation only; per-flow
+> sanitisation (blocking only XSS flows, not command injection flows) is not
+> yet supported.
+
+### `disabled_builtin_sinks`
+
+Suppress specific built-in CodeQL sink models without removing the rest:
+
+```yaml
+disabled_builtin_sinks:
+  - PolynomialReDoS::Sink      # too noisy on regex-heavy codebases
+  - CookieInjection::Sink
+```
+
+To list all available names at runtime:
+
+```bash
+python -c "
+from codeanalyzer.semantic_analysis.codeql.taint_query_generator import TaintQueryGenerator
+print(*TaintQueryGenerator.builtin_sink_names(), sep='\n')
+"
+```
+
+### Merge behaviour when `--taint-defaults` is active
+
+When a custom config is merged with the defaults:
+
+| Item | Behaviour |
+|---|---|
+| Sources | Union; custom entry with the same `name` **overrides** the default |
+| Sinks | Union; custom entry with the same `name` overrides the default |
+| Sanitizers | Union; same override rule |
+| `disabled_builtin_sinks` | Union of both lists |
+| `exclude_files` / `exclude_functions` | Union of both lists |
+| Scalar options (`max_path_length`, `confidence_threshold`, etc.) | Custom value wins |
+| Additive booleans (`include_implicit_flows`, `include_safe_flows`) | `OR` — enabling in either config enables globally |
+
+---
+
+## Writing patterns
+
+Patterns are [CodeQL API-graph](https://codeql.github.com/docs/codeql-language-guides/using-the-api-graph-in-python/)
+expressions. All string literals inside a pattern **must use double quotes**
+(CodeQL does not support single-quoted strings).
+
+### Common building blocks
+
+| Goal | Pattern |
+|---|---|
+| Module-level function call | `API::moduleImport("os").getMember("system").getACall()` |
+| Nested attribute call | `API::moduleImport("os").getMember("path").getMember("join").getACall()` |
+| Return value of a call | `API::moduleImport("requests").getMember("get").getReturn()` |
+| Attribute of a return value | `API::moduleImport("requests").getMember("get").getReturn().getMember("text")` |
+| Built-in function | `API::builtin("input").getACall()` |
+| Class instance method | `API::moduleImport("sqlite3").getMember("connect").getReturn().getMember("cursor").getReturn().getMember("execute").getACall()` |
+
+### Source patterns
+
+For sources, the pattern should resolve to the **return value** of the call
+(where the untrusted data lives):
+
+```yaml
+# input() return value
+pattern: 'API::builtin("input").getACall()'
+
+# Flask request argument
+pattern: 'API::moduleImport("flask").getMember("request").getMember("args").getMember("get").getACall()'
+
+# Environment variable
+pattern: 'API::moduleImport("os").getMember("getenv").getACall()'
+```
+
+### Sink patterns
+
+For sinks, the pattern should resolve to the **argument** that carries the
+dangerous value. Use `argument_index` to target a specific argument, or omit
+it to flag any tainted argument:
+
+```yaml
+# Target argument 0 of cursor.execute(query, params)
+pattern: 'API::moduleImport("sqlite3").getMember("connect").getReturn().getMember("cursor").getReturn().getMember("execute").getACall()'
+argument_index: 0
+
+# Flag any tainted argument (omit argument_index)
+pattern: 'API::moduleImport("myapp.shell").getMember("run").getACall()'
+```
+
+### Sanitizer patterns
+
+Sanitizer patterns resolve to the **call that produces the safe value**:
+
+```yaml
+pattern: 'API::moduleImport("html").getMember("escape").getACall()'
+```
+
+---
+
+## Output format
+
+Results are returned as `PyTaintAnalysisResult` (accessible via the library
+API or serialised to JSON/msgpack). Each detected flow has this structure:
+
+```json
+{
+  "flows": [
+    {
+      "flow_id": "path/to/app.py:10->path/to/app.py:18",
+      "vulnerability_type": "SQL Injection",
+      "severity": "critical",
+      "confidence": "medium",
+      "source": {
+        "source_type": "user_input",
+        "description": "Direct user input via input() function",
+        "call_site": {
+          "method_name": "input",
+          "file_path": "app.py",
+          "start_line": 10,
+          "end_line": 10,
+          "start_column": 8,
+          "end_column": 15
+        }
+      },
+      "sink": {
+        "sink_type": "sql_execution",
+        "description": "SQL Injection",
+        "severity": "critical",
+        "call_site": {
+          "method_name": "execute",
+          "file_path": "app.py",
+          "start_line": 18,
+          "end_line": 18,
+          "start_column": 4,
+          "end_column": 22
+        }
+      },
+      "path": [
+        {
+          "location": "app.py:10:8",
+          "function_name": "get_user",
+          "description": "Source node",
+          "step_type": "source"
+        },
+        {
+          "location": "app.py:18:4",
+          "function_name": "query_db",
+          "description": "Sink node",
+          "step_type": "sink"
+        }
+      ]
+    }
+  ]
+}
+```
+
+**Severity levels:**
+
+| Severity | Meaning |
+|---|---|
+| `critical` | Immediate exploitation likely (SQL/command/code/SSTI/deserialization) |
+| `high` | High exploitability (path traversal, XSS, SSRF, XXE, LDAP, NoSQL, …) |
+| `medium` | Exploitable under specific conditions (redirect, header injection, ReDoS, …) |
+| `low` | Informational / low-impact |
+
+---
+
+## Programmatic API
+
+### Running analysis
+
+```python
+from pathlib import Path
+from codeanalyzer.core import Codeanalyzer
+from codeanalyzer.options import AnalysisOptions
+
+options = AnalysisOptions(
+    input=Path("/path/to/project"),
+    analysis_level=3,
+    using_codeql=True,
+    taint_config=Path("taint.yaml"),   # optional
+    taint_use_defaults=True,           # False = custom only
+)
+
+with Codeanalyzer(options) as analyzer:
+    result = analyzer.analyze()
+
+taint = result.taint_analysis
+print(f"{len(taint.flows)} flows detected")
+
+for flow in taint.flows:
+    print(f"[{flow.severity}] {flow.vulnerability_type}")
+    print(f"  source: {flow.source.call_site.file_path}:{flow.source.call_site.start_line}")
+    print(f"  sink:   {flow.sink.call_site.file_path}:{flow.sink.call_site.start_line}")
+```
+
+### Loading and inspecting configuration
+
+```python
+from codeanalyzer.config.taint_config_loader import TaintConfigLoader
+from codeanalyzer.config.taint_config_defaults import get_default_taint_config
+from codeanalyzer.semantic_analysis.codeql.taint_query_generator import TaintQueryGenerator
+
+# Load defaults only
+config = TaintConfigLoader.load_config()
+
+# Load custom file, merged with defaults (mode 2)
+config = TaintConfigLoader.load_config("taint.yaml", use_defaults=True)
+
+# Load custom file only (mode 3)
+config = TaintConfigLoader.load_config("taint.yaml", use_defaults=False)
+
+# Inspect what is active
+print(f"Sources:   {len(config.sources)}")
+print(f"User sinks:{len(config.sinks)}")
+print(f"Built-in sinks: {TaintQueryGenerator.builtin_sink_count()}")
+print(f"Disabled built-ins: {config.disabled_builtin_sinks}")
+print(f"Sanitizers:{len(config.sanitizers)}")
+
+# All available built-in sink names (for use in disabled_builtin_sinks)
+print(TaintQueryGenerator.builtin_sink_names())
+
+# Validate a config and check for problems
+issues = TaintConfigLoader.validate_config(config)
+for issue in issues:
+    print(f"WARNING: {issue}")
+
+# Save current effective config to file (useful for debugging)
+TaintConfigLoader.save_config(config, "effective-config.yaml", format="yaml")
+```
+
+---
+
+## Troubleshooting
+
+### No flows detected
+
+1. **Check verbosity** — run with `-vv` to see the active config summary and
+   which sources/sinks are loaded.
+2. **Verify source coverage** — your code may use a web framework already
+   covered by `RemoteFlowSource`, or it may use a non-web input not in the
+   defaults. Add a custom source for the latter.
+3. **Check sanitizers** — a flow that is blocked by a default sanitizer
+   (e.g. `html.escape`, `shlex.quote`) will not be reported. Set
+   `include_safe_flows: true` temporarily to see sanitised paths.
+4. **Check for excluded files** — if `exclude_files` or `exclude_functions`
+   is set in a config, those paths are silently skipped.
+5. **Confirm CodeQL database** — the CodeQL database is built from the project
+   at analysis time. If the database is stale, use `--eager` to rebuild.
+
+### Too many false positives
+
+- Use `disabled_builtin_sinks` to suppress noisy sink classes (e.g.
+  `PolynomialReDoS::Sink` on regex-heavy codebases).
+- Use `--no-taint-defaults` with a hand-crafted config file to constrain
+  analysis to only the flows you care about.
+- Use `exclude_files` to skip test or vendor directories.
+- Add sanitizer entries for project-specific validation functions.
+
+### Unexpected flows blocked (false negatives)
+
+- Check that the sanitizer pattern actually matches your code — test it by
+  temporarily disabling the sanitizer with `enabled: false`.
+- CodeQL sanitizers are applied globally. If a sanitizer is too broad (e.g.
+  `os.path.normpath` blocking a non-path flow), disable it and add a narrower
+  one.
+
+### Config file not loading
+
+- Verify patterns use **double quotes** inside the YAML string. Single quotes
+  are a CodeQL syntax error.
+- Run `validate_config()` programmatically (see above) to catch empty
+  patterns, duplicate names, or missing required fields.
+- Check the log output at `-v` level — a `WARNING: Taint config: …` line
+  indicates a structural problem found at load time.
+
+### Getting the CodeQL CLI
+
+Taint analysis requires the [CodeQL CLI](https://github.com/github/codeql-cli-binaries/releases).
+Download the archive for your platform, unpack it, and ensure the `codeql`
+binary is on your `PATH`:
+
+```bash
+codeql --version   # should print the CodeQL version
+```
+
+The `codeql/python-all` pack is downloaded automatically on first use.
diff --git a/test/test_taint_analysis.py b/test/test_taint_analysis.py
index 853ac69..dfc6e86 100644
--- a/test/test_taint_analysis.py
+++ b/test/test_taint_analysis.py
@@ -141,6 +141,43 @@ def test_custom_configuration_yaml(self, sql_injection_app, tmp_path):
         assert config.sources[0].name == "user_input"
         assert config.sinks[0].vulnerability_type == "SQL Injection"
 
+    def test_custom_configuration_json(self, tmp_path):
+        """Test custom taint configuration loaded from a JSON file."""
+        import json
+        config_data = {
+            "sources": [
+                {
+                    "name": "user_input",
+                    "description": "User input from input() function",
+                    "pattern": 'API::builtin("input").getACall()',
+                    "source_type": "user_input",
+                    "enabled": True,
+                }
+            ],
+            "sinks": [
+                {
+                    "name": "sql_execute",
+                    "description": "SQL query execution",
+                    "pattern": 'API::moduleImport("sqlite3").getMember("execute").getACall()',
+                    "sink_type": "sql_execution",
+                    "vulnerability_type": "SQL Injection",
+                    "severity": "critical",
+                    "enabled": True,
+                }
+            ],
+            "sanitizers": [],
+        }
+        config_file = tmp_path / "custom_taint_config.json"
+        config_file.write_text(json.dumps(config_data))
+
+        config = TaintConfigLoader.load_config(config_file, use_defaults=False)
+
+        assert len(config.sources) == 1
+        assert len(config.sinks) == 1
+        assert len(config.sanitizers) == 0
+        assert config.sources[0].name == "user_input"
+        assert config.sinks[0].vulnerability_type == "SQL Injection"
+
     def test_config_merge_with_defaults(self, tmp_path):
         """Test merging custom config with defaults."""
         # Create minimal custom config