diff --git a/manifests/base/configmap.yaml b/manifests/base/configmap.yaml index 4b18c4d..4f1833a 100644 --- a/manifests/base/configmap.yaml +++ b/manifests/base/configmap.yaml @@ -13,6 +13,7 @@ data: SAFIR_PROFILE: "production" LTDPROXY_AUTH_CONFIG: "/opt/ltd-proxy/auth/authrules.yaml" LTDPROXY_PATH_PREFIX: "/" + LTDPROXY_REWRITES_CONFIG: "/opt/ltd-proxy/rewrites/rewriterules.yaml" LTDPROXY_S3_BUCKET: "" LTDPROXY_S3_PREFIX: "" LTDPROXY_AWS_REGION: "" diff --git a/manifests/base/deployment.yaml b/manifests/base/deployment.yaml index 8800fe2..fe22d63 100644 --- a/manifests/base/deployment.yaml +++ b/manifests/base/deployment.yaml @@ -36,10 +36,16 @@ spec: - name: "auth-config" mountPath: "/opt/ltd-proxy/auth/" readOnly: true + - name: "rewrites-config" + mountPath: "/opt/ltd-proxy/rewrites/" + readOnly: true volumes: - name: "auth-config" configMap: name: "ltd-proxy-auth" + - name: "rewrites-config" + configMap: + name: "ltd-proxy-rewrites" securityContext: runAsNonRoot: true runAsUser: 1000 diff --git a/manifests/base/kustomization.yaml b/manifests/base/kustomization.yaml index 953d07a..f2edf5e 100644 --- a/manifests/base/kustomization.yaml +++ b/manifests/base/kustomization.yaml @@ -3,10 +3,11 @@ kind: Kustomization images: - name: "ghcr.io/jsickcodes/ltd-proxy" - newTag: 0.1.0 + newTag: 0.2.0 resources: - configmap.yaml - auth-configmap.yaml + - rewrites-configmap.yaml - deployment.yaml - service.yaml diff --git a/manifests/base/rewrites-configmap.yaml b/manifests/base/rewrites-configmap.yaml new file mode 100644 index 0000000..b6c73b4 --- /dev/null +++ b/manifests/base/rewrites-configmap.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: "ltd-proxy-rewrites" + labels: + app.kubernetes.io/name: "ltd-proxy" +data: + rewriterules.yaml: | + # Example rule: + # - pattern: "\\/$" + # substitution: "http://localhost:8000/__healthz" + rewrites: [] diff --git a/src/ltdproxy/config.py b/src/ltdproxy/config.py index 5ca1c2c..b1fb36a 100644 --- a/src/ltdproxy/config.py +++ b/src/ltdproxy/config.py @@ -69,6 +69,8 @@ class Configuration(BaseSettings): path_prefix: str = Field("/", env="LTDPROXY_PATH_PREFIX") + rewrites_config_path: FilePath = Field(env="LTDPROXY_REWRITES_CONFIG") + config = Configuration(_env_file=os.getenv("LTD_PROXY_ENV")) """Configuration for ltd-proxy.""" diff --git a/src/ltdproxy/handlers/external.py b/src/ltdproxy/handlers/external.py index 943d989..cac51d1 100644 --- a/src/ltdproxy/handlers/external.py +++ b/src/ltdproxy/handlers/external.py @@ -13,7 +13,6 @@ from starlette.requests import Request from starlette.responses import ( HTMLResponse, - PlainTextResponse, RedirectResponse, StreamingResponse, ) @@ -28,6 +27,7 @@ github_oauth_dependency, set_serialized_github_memberships, ) +from ltdproxy.rewrites import RewriteEngine, rewrite_dependency from ltdproxy.s3 import Bucket, bucket_dependency from ltdproxy.urlmap import map_s3_path @@ -37,11 +37,6 @@ """FastAPI router for all external handlers.""" -@external_router.get("/", name="homepage") -async def get_homepage() -> PlainTextResponse: - return PlainTextResponse("OK", status_code=200) - - @external_router.get("/auth", name="get_oauth_callback") async def get_oauth_callback( ref: Optional[str], @@ -134,6 +129,7 @@ async def get_s3( bucket: Bucket = Depends(bucket_dependency), http_client: httpx.AsyncClient = Depends(http_client_dependency), github_auth: GitHubAuth = Depends(github_auth_dependency), + rewrite_engine: RewriteEngine = Depends(rewrite_dependency), ) -> Union[StreamingResponse, RedirectResponse]: """The S3 proxy endpoint.""" github_auth_result = github_auth.is_session_authorized( @@ -154,6 +150,11 @@ async def get_s3( raise HTTPException(status_code=403, detail="Not authorized") elif github_auth_result == AuthResult.authorized: + # User is authorized; first check rewrites + response = await rewrite_engine.build_response(f"/{path}") + if response: + return response + # User is authorized; stream from S3. bucket_path = map_s3_path(config.s3_bucket_prefix, path) logger.debug( diff --git a/src/ltdproxy/main.py b/src/ltdproxy/main.py index 74bf7d2..4266683 100644 --- a/src/ltdproxy/main.py +++ b/src/ltdproxy/main.py @@ -17,6 +17,7 @@ from .appsetup import add_handlers from .config import config +from .rewrites import rewrite_dependency __all__ = ["app", "config"] @@ -45,6 +46,7 @@ async def startup_event() -> None: ) logger.info("Starting up", version=metadata.version) app.add_middleware(XForwardedMiddleware) + await rewrite_dependency.initialize(await http_client_dependency()) @app.on_event("shutdown") diff --git a/src/ltdproxy/rewrites.py b/src/ltdproxy/rewrites.py new file mode 100644 index 0000000..ea674be --- /dev/null +++ b/src/ltdproxy/rewrites.py @@ -0,0 +1,118 @@ +"""Engine for rewriting request URLs to other servers than the S3 bucket.""" + +from __future__ import annotations + +import re +from pathlib import Path +from typing import List, Optional, Pattern, Tuple + +import httpx +import yaml +from pydantic import BaseModel +from starlette.background import BackgroundTask +from starlette.responses import StreamingResponse + +from ltdproxy.config import config + + +class RewriteRule(BaseModel): + """A single request URL rewrite rule.""" + + pattern: Pattern + + substitution: str + + +class RewriteConfigModel(BaseModel): + """Model parsing and validating the rewrite rules file.""" + + rewrites: List[RewriteRule] + + @classmethod + def parse_yaml(cls, path: Path) -> RewriteConfigModel: + """Parse the YAML representation of this configuration model.""" + data = yaml.safe_load(path.read_text()) + return cls.parse_obj(data) + + +class RewriteEngine: + """This class holds the URL rewrite configuration and is used by handlers + to determine if a request should be re-written to another HTTP server + rather than S3. + """ + + def __init__( + self, + *, + rewrite_rules: List[RewriteRule], + http_client: httpx.AsyncClient, + ) -> None: + self._rewrite_rules = rewrite_rules + self._http_client = http_client + + @classmethod + def init_from_file( + cls, *, path: Path, http_client: httpx.AsyncClient + ) -> RewriteEngine: + config_data = RewriteConfigModel.parse_yaml(path) + return cls(rewrite_rules=config_data.rewrites, http_client=http_client) + + def find_matching_rule( + self, path: str + ) -> Optional[Tuple[RewriteRule, re.Match]]: + for rule in self._rewrite_rules: + m = rule.pattern.match(path) + if m: + return rule, m + return None + + async def build_stream(self, path: str) -> Optional[httpx.Response]: + _match = self.find_matching_rule(path) + if _match is None: + return None # no matching rule + + rule, match = _match + new_url = rule.substitution + + request = self._http_client.build_request("GET", new_url) + stream = await self._http_client.send(request, stream=True) + return stream + + async def build_response(self, path: str) -> Optional[StreamingResponse]: + stream = await self.build_stream(path) + if stream is None: + return None + + stream_headers = stream.headers + response_headers = {} + copy_headers = ("Content-Type", "Content-length") + for key in copy_headers: + if key in stream_headers: + response_headers[key] = stream_headers[key] + + return StreamingResponse( + stream.aiter_raw(), + background=BackgroundTask(stream.aclose), + headers=response_headers, + ) + + +class RewriteDependency: + """FastAPI dependency for the rewrites engine.""" + + def __init__(self) -> None: + self._rewrite_engine: Optional[RewriteEngine] = None + + async def initialize(self, http_client: httpx.AsyncClient) -> None: + engine = RewriteEngine.init_from_file( + path=config.rewrites_config_path, http_client=http_client + ) + self._rewrite_engine = engine + + async def __call__(self) -> RewriteEngine: + if self._rewrite_engine is None: + raise RuntimeError("RewriteDependency is not initialized") + return self._rewrite_engine + + +rewrite_dependency = RewriteDependency() diff --git a/tests/rewrites.example.yaml b/tests/rewrites.example.yaml new file mode 100644 index 0000000..ab25fe7 --- /dev/null +++ b/tests/rewrites.example.yaml @@ -0,0 +1,3 @@ +rewrites: + - pattern: "\\/$" + substitution: "http://spherex-doc-portal/" diff --git a/tests/rewrites_test.py b/tests/rewrites_test.py new file mode 100644 index 0000000..1148a06 --- /dev/null +++ b/tests/rewrites_test.py @@ -0,0 +1,26 @@ +"""Tests for the RewriteEngine.""" + +from __future__ import annotations + +from pathlib import Path + +import httpx +import pytest + +from ltdproxy.rewrites import RewriteEngine + + +@pytest.mark.asyncio +async def test_rule_matching() -> None: + http_client = httpx.AsyncClient() + config_path = Path(__file__).parent / "rewrites.example.yaml" + engine = RewriteEngine.init_from_file( + path=config_path, http_client=http_client + ) + + result = engine.find_matching_rule("/") + assert result is not None + rule, _ = result + assert rule.substitution == "http://spherex-doc-portal/" + + assert engine.find_matching_rule("/mydoc/") is None diff --git a/tox.ini b/tox.ini index 8b69a31..eb411c0 100644 --- a/tox.ini +++ b/tox.ini @@ -15,6 +15,7 @@ setenv = LTDPROXY_GITHUB_CALLBACK_URL = http://127.0.0.1:8000/auth LTDPROXY_SESSION_KEY = 1234 LTDPROXY_AUTH_CONFIG = tests/githubauth.example.yaml + LTDPROXY_REWRITES_CONFIG = tests/rewrites.example.yaml commands = pytest --cov=ltdproxy --cov-branch --cov-report= {posargs}