Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions .github/workflows/security-fast.yml
Original file line number Diff line number Diff line change
Expand Up @@ -91,14 +91,16 @@ jobs:
# GHSA-5239-wwwm-4pmq: pygments ReDoS in AdlLexer (dev-only, no fix available)
# GHSA-58qw-9mgm-455v: pip tar/zip confusion (pip itself, no fix available)
# GHSA-jp4c-xjxw-mgf9: pip self-update import ordering (fix requires py3.10+)
# GHSA-qccp-gfcp-xxvc: urllib3 cross-origin header leak (fix 2.7.0 requires py3.10+)
# GHSA-mf9v-mfxr-j63j: urllib3 decompression bomb (fix 2.7.0 requires py3.10+)
# GHSA-qccp-gfcp-xxvc: urllib3 cross-origin header leak (dev dep via uv sync; runtime uses httpx, fix 2.7.0 requires py3.10+)
# GHSA-mf9v-mfxr-j63j: urllib3 decompression bomb (dev dep via uv sync; runtime uses httpx, fix 2.7.0 requires py3.10+)
# PYSEC-2026-113: pyarrow C++ IPC pre-buffer UAF (PreBufferMetadata not exposed to Python bindings per advisory)
uv run pip-audit --desc --format json --output pip-audit-report.json \
--ignore-vuln GHSA-5239-wwwm-4pmq \
--ignore-vuln GHSA-58qw-9mgm-455v \
--ignore-vuln GHSA-jp4c-xjxw-mgf9 \
--ignore-vuln GHSA-qccp-gfcp-xxvc \
--ignore-vuln GHSA-mf9v-mfxr-j63j
--ignore-vuln GHSA-mf9v-mfxr-j63j \
--ignore-vuln PYSEC-2026-113

- name: Upload report
if: always()
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ dump.rdb
# Development
.env.local
.env.*.local
.env.dev
*.local
coverage.json

Expand Down
8 changes: 6 additions & 2 deletions .secrets.baseline
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@
{
"path": "detect_secrets.filters.allowlist.is_line_allowlisted"
},
{
"path": "detect_secrets.filters.common.is_baseline_file",
"filename": ".secrets.baseline"
},
{
"path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies",
"min_level": 2
Expand Down Expand Up @@ -218,7 +222,7 @@
"filename": "src/cachekit/config/decorator.py",
"hashed_secret": "1a9a9d37d8305b0cd8353468065cf844259e1b1f",
"is_verified": false,
"line_number": 553
"line_number": 557
}
],
"tests/critical/test_aad_v03_security.py": [
Expand Down Expand Up @@ -421,5 +425,5 @@
}
]
},
"generated_at": "2026-04-06T07:15:57Z"
"generated_at": "2026-05-28T00:50:38Z"
}
34 changes: 26 additions & 8 deletions src/cachekit/backends/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,32 +102,50 @@ async def get_async_client(self):


class DefaultBackendProvider(BackendProviderInterface):
"""Default backend provider using Redis backend.
"""Default backend provider with env-based auto-detection.

Creates RedisBackendProvider singleton with connection pooling.
Delegates to RedisBackendProvider.get_backend() for per-request wrappers.
Priority order (first matching env var wins):
1. CACHEKIT_API_KEY → CachekitIOBackend (SaaS)
2. CACHEKIT_REDIS_URL or REDIS_URL → RedisBackend

For single-tenant deployments (default), sets tenant_context to "default".
For multi-tenant deployments, tenant_context must be set externally.
"""

def __init__(self):
self._provider = None
self._cachekitio_backend = None
self._redis_provider = None

def get_backend(self):
"""Get per-request backend instance from singleton provider."""
if self._provider is None:
"""Get backend instance, auto-detected from environment on first call.

CachekitIO backends are stateless singletons (cached).
Redis backends are per-request tenant-scoped wrappers (not cached —
RedisBackendProvider.get_backend() reads tenant_context ContextVar).
"""
import os

# Priority 1: CachekitIO SaaS backend (stateless, safe to cache)
if os.environ.get("CACHEKIT_API_KEY"):
if self._cachekitio_backend is None:
from cachekit.backends.cachekitio import CachekitIOBackend

self._cachekitio_backend = CachekitIOBackend()
return self._cachekitio_backend

# Priority 2: Redis backend (tenant-scoped, call provider each time)
if self._redis_provider is None:
from cachekit.backends.redis.config import RedisBackendConfig
from cachekit.backends.redis.provider import RedisBackendProvider, tenant_context

redis_config = RedisBackendConfig.from_env()
self._provider = RedisBackendProvider(redis_url=redis_config.redis_url)
self._redis_provider = RedisBackendProvider(redis_url=redis_config.redis_url)

# Set default tenant for single-tenant mode (if not already set)
if tenant_context.get() is None:
tenant_context.set("default")

return self._provider.get_backend()
return self._redis_provider.get_backend()


__all__ = [
Expand Down
27 changes: 23 additions & 4 deletions src/cachekit/cache_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,13 +303,25 @@ def __init__(
but extraction fails, ValueError propagates to caller (no fallback to shared key).
"""
self.serializer_name = serializer_name
self.enable_integrity_checking = enable_integrity_checking
self._deployment_uuid_value: Optional[str] = None

# Auto-detect encryption from CACHEKIT_MASTER_KEY when not explicitly configured.
# This is the single convergence point for ALL backends and presets.
if not encryption and master_key is None and tenant_extractor is None:
from cachekit.config.singleton import get_settings

settings = get_settings()
if settings.master_key:
encryption = True
master_key = settings.master_key.get_secret_value()
single_tenant_mode = True

Comment on lines +309 to +319
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | 🏗️ Heavy lift

Preserve an explicit encryption=False opt-out.

Because encryption is still a plain bool, this block can't tell “unset” from “explicitly disabled”. As written, CACHEKIT_MASTER_KEY will force encryption on even for callers that intentionally passed encryption=False, which can now trip the default-serializer guard later in __init__. If the contract is “auto-detect only when not explicitly configured”, this needs a tri-state/sentinel or a separate auto-detect flag from the decorator layer.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@src/cachekit/cache_handler.py` around lines 309 - 319, The code currently
treats the boolean parameter encryption as both "unset" and "explicitly False",
causing CACHEKIT_MASTER_KEY to override callers who passed encryption=False;
change the API so auto-detection only runs when encryption is unset (use a
tri-state/sentinel or change the decorator to pass encryption=None for "auto"
instead of False), then update the block in cache_handler.py that checks
(encryption, master_key, tenant_extractor) to only enable encryption when
encryption is None/UNSET and settings.master_key exists; ensure related places
including the cache handler __init__ and the default-serializer guard check the
new sentinel (or an explicit auto_detect_encryption flag) rather than a plain
bool so explicit encryption=False remains honored and auto-detect only applies
when encryption was not provided.

self.encryption = encryption
self.tenant_extractor = tenant_extractor
self.single_tenant_mode = single_tenant_mode
self.deployment_uuid = deployment_uuid
self.master_key = master_key
self.enable_integrity_checking = enable_integrity_checking
self._deployment_uuid_value: Optional[str] = None

# Extract string name for metadata storage (for protocol instances, use class name)
if isinstance(serializer_name, str):
Expand Down Expand Up @@ -680,8 +692,9 @@ def deserialize_data(self, data: str | bytes, cache_key: str = "") -> Any:
else:
# Data is not encrypted - use base serializer directly (no cache_key needed)
return base_serializer.deserialize(serialized_data, metadata)
except ValueError:
# cache_key missing for encrypted data - FAIL CLOSED (re-raise)
except (ValueError, SerializationError):
# ValueError: cache_key missing for encrypted data — FAIL CLOSED
# SerializationError/EncryptionError: let the outer handler log and handle
raise
except Exception as e:
get_logger().error(f"Deserialization failed with {self.serializer_name}: {e}")
Expand Down Expand Up @@ -806,6 +819,9 @@ def get_cached_value(self, cache_key: str, refresh_ttl: Optional[int] = None) ->
# Return a tuple (True, value) to distinguish from "no cache entry"
return (True, deserialized)
return None
except SerializationError as e:
get_logger().warning(f"L2 cache decrypt/integrity failure for {cache_key}: {e}")
return None
except Exception as e:
get_logger().warning(f"Backend operation failed for get on {cache_key}: {e}")
return None
Expand Down Expand Up @@ -836,6 +852,9 @@ async def get_cached_value_async(self, cache_key: str, refresh_ttl: Optional[int
# Return a tuple (True, value) to distinguish from "no cache entry"
return (True, deserialized)
return None
except SerializationError as e:
get_logger().warning(f"L2 cache decrypt/integrity failure for {cache_key}: {e}")
return None
except Exception as e:
get_logger().warning(f"Backend operation failed for get on {cache_key}: {e}")
return None
Expand Down
9 changes: 7 additions & 2 deletions src/cachekit/config/decorator.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ def to_dict(self) -> dict[str, object]:
"enable_prometheus_metrics": self.monitoring.enable_prometheus_metrics,
# Encryption (flattened)
"encryption": self.encryption.enabled,
"master_key": self.encryption.master_key,
"master_key": "[REDACTED]" if self.encryption.master_key else None,
"tenant_extractor": self.encryption.tenant_extractor,
}

Expand Down Expand Up @@ -382,7 +382,7 @@ def secure(cls, master_key: str, tenant_extractor: Callable[..., str] | None = N
Use cases: PII, medical data, financial records, GDPR compliance
Architecture: Both L1 and L2 store encrypted bytes (encrypt-at-rest everywhere)

Note: Backend resolved from REDIS_URL env var, set_default_backend(), or explicit backend= kwarg
Note: Backend resolved from CACHEKIT_API_KEY, REDIS_URL, set_default_backend(), or explicit backend= kwarg
Note: integrity_checking is forced to True (non-negotiable for security)

Args:
Expand Down Expand Up @@ -539,6 +539,10 @@ def io(cls, **kwargs: Any) -> DecoratorConfig:
CACHEKIT_API_KEY: API key for authentication (required)
CACHEKIT_API_URL: API endpoint (default: https://api.cachekit.io)

Encryption: Set CACHEKIT_MASTER_KEY env var to enable automatic client-side
AES-256-GCM encryption — no code changes needed. Auto-detection happens in
CacheSerializationHandler and applies to ALL presets, not just .io().

Args:
**kwargs: Overrides (ttl, namespace, etc.)

Expand Down Expand Up @@ -572,6 +576,7 @@ def io(cls, **kwargs: Any) -> DecoratorConfig:
backend = CachekitIOBackend()

# Use production-grade settings with SaaS backend
# Encryption auto-detected from CACHEKIT_MASTER_KEY in CacheSerializationHandler
return cls(
backend=backend,
integrity_checking=True,
Expand Down
9 changes: 7 additions & 2 deletions src/cachekit/config/nested.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from __future__ import annotations

from dataclasses import dataclass
from dataclasses import dataclass, field
from typing import Callable

from .validation import ConfigurationError
Expand Down Expand Up @@ -285,6 +285,11 @@ class EncryptionConfig:
NOTE: Per backend abstraction spec, encryption stores encrypted bytes in BOTH L1 and L2.
L1 can be enabled with encryption (stores encrypted bytes, not plaintext).

Tenant mode is required: set single_tenant_mode=True for single-tenant or provide
a tenant_extractor callable for multi-tenant key isolation. @cache.secure() sets
single_tenant_mode automatically; if using EncryptionConfig directly (e.g. with
@cache.io), you must set it explicitly.

Attributes:
enabled: Enable client-side encryption (default: False)
master_key: Hex-encoded master key for key derivation (required if enabled)
Expand Down Expand Up @@ -322,7 +327,7 @@ class EncryptionConfig:
"""

enabled: bool = False
master_key: str | None = None
master_key: str | None = field(default=None, repr=False)
tenant_extractor: Callable[..., str] | None = None
single_tenant_mode: bool = False
deployment_uuid: str | None = None
Expand Down
20 changes: 19 additions & 1 deletion src/cachekit/decorators/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from ..l1_cache import get_l1_cache
from ..object_cache import ObjectCache
from ..reliability import CircuitBreakerConfig
from ..serializers.base import SerializationError

# Config import removed - using direct DecoratorConfig integration
from .orchestrator import FeatureOrchestrator
Expand Down Expand Up @@ -1033,8 +1034,21 @@ async def async_wrapper(*args: Any, **kwargs: Any) -> Any:

return result

except SerializationError as e:
# Decrypt/integrity failure on L2 data — warn explicitly (fail-open: recompute)
logger().warning(f"L2 cache decrypt/integrity failure for {cache_key}: {e}")
get_duration_ms = (time.perf_counter() - start_time) * 1000
features.handle_cache_error(
error=e,
operation="cache_get_deserialize",
cache_key=cache_key or "unknown",
namespace=namespace or "default",
duration_ms=get_duration_ms,
correlation_id=correlation_id,
)

except Exception as e:
# Redis error - record but continue to function execution
# Backend/network error - record but continue to function execution
get_duration_ms = (time.perf_counter() - start_time) * 1000
features.handle_cache_error(
error=e,
Expand Down Expand Up @@ -1080,6 +1094,8 @@ async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
_cached_keys.add(cache_key)

return result
except SerializationError as e:
logger().warning(f"L2 cache decrypt/integrity failure for {cache_key}: {e}")
except Exception as e:
# If double-check fails, continue to execute function
_logger.debug("Double-check cache failed after lock acquisition: %s", e)
Expand All @@ -1106,6 +1122,8 @@ async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
_cached_keys.add(cache_key)

return result
except SerializationError as e:
logger().warning(f"L2 cache decrypt/integrity failure for {cache_key}: {e}")
except Exception:
# Cache check failed - fall through to execute function
logger().warning(
Expand Down
29 changes: 27 additions & 2 deletions src/cachekit/key_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ class CacheKeyGenerator:
"local": "l", # Reference caching (no serialization)
}

# Regex for chars allowed in the func component of a SaaS cache key.
# SaaS validates: /^[a-zA-Z0-9_.]{1,200}$/
_FUNC_ALLOWED_RE = __import__("re").compile(r"[^A-Za-z0-9_.]")
_DOUBLE_DOT_RE = __import__("re").compile(r"\.{2,}")
_FUNC_NAME_MAX = 200

def __init__(self):
"""Initialize the key generator.

Expand Down Expand Up @@ -83,8 +89,9 @@ def generate_key(
if namespace:
key_parts.extend(["ns:", namespace, ":"])

# Add function identifier (module + name) - single string operation
key_parts.extend(["func:", func.__module__, ".", func.__qualname__, ":"])
# Add function identifier (module + name) — sanitized for SaaS key format
func_name = self._sanitize_func_name(func.__module__, func.__qualname__)
key_parts.extend(["func:", func_name, ":"])

# Generate args hash using Blake2b-256
args_hash = self._blake2b_hash(args, kwargs)
Expand Down Expand Up @@ -347,3 +354,21 @@ def _normalize_key(self, key: str) -> str:
normalized = f"{prefix}:{key_hash[:32]}"

return normalized

@classmethod
def _sanitize_func_name(cls, module: str, qualname: str) -> str:
"""Sanitize module.qualname for SaaS cache-key compliance.

The SaaS ``func`` component must match ``[a-zA-Z0-9_.]{1,200}``
and must not contain ``..``. Nested functions have qualnames like
``outer.<locals>.inner`` and lambdas are ``<lambda>`` — the angle
brackets violate the regex.

This replaces every disallowed char with ``_``, collapses runs of
``..`` into a single ``.``, and truncates to 200 chars. The mapping
is deterministic: same function → same key.
"""
raw = f"{module}.{qualname}"
sanitized = cls._FUNC_ALLOWED_RE.sub("_", raw)
sanitized = cls._DOUBLE_DOT_RE.sub(".", sanitized)
return sanitized[: cls._FUNC_NAME_MAX]
Loading
Loading