diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index dc90047..ceeb98b 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -20,6 +20,9 @@ jobs: with: python-version: ${{ matrix.python }} + - name: Install stable virtualenv # https://github.com/pre-commit/action/issues/135 + run: pip install -U virtualenv==20.10.0 + - name: Run pre-commit uses: pre-commit/action@v2.0.3 diff --git a/manifests/base/auth-configmap.yaml b/manifests/base/auth-configmap.yaml index 0b24789..aae8dcb 100644 --- a/manifests/base/auth-configmap.yaml +++ b/manifests/base/auth-configmap.yaml @@ -1,7 +1,7 @@ apiVersion: v1 kind: ConfigMap metadata: - name: "ltdproxy-auth" + name: "ltd-proxy-auth" labels: app.kubernetes.io/name: "ltd-proxy" data: diff --git a/manifests/base/configmap.yaml b/manifests/base/configmap.yaml index cf24aa2..4b18c4d 100644 --- a/manifests/base/configmap.yaml +++ b/manifests/base/configmap.yaml @@ -1,17 +1,17 @@ apiVersion: v1 kind: ConfigMap metadata: - name: "ltdproxy" + name: "ltd-proxy" labels: app.kubernetes.io/name: "ltd-proxy" data: # These configurations are injected as environment variables into the # app container. - SAFIR_NAME: "ltdproxy" - SAFIR_LOGGER: "ltdproxy" + SAFIR_NAME: "ltd-proxy" + SAFIR_LOGGER: "ltd-proxy" SAFIR_LOG_LEVEL: "INFO" SAFIR_PROFILE: "production" - LTDPROXY_AUTH_CONFIG: "/opt/ltdproxy/auth/authrules.yaml" + LTDPROXY_AUTH_CONFIG: "/opt/ltd-proxy/auth/authrules.yaml" LTDPROXY_PATH_PREFIX: "/" LTDPROXY_S3_BUCKET: "" LTDPROXY_S3_PREFIX: "" diff --git a/manifests/base/deployment.yaml b/manifests/base/deployment.yaml index ac08d46..8800fe2 100644 --- a/manifests/base/deployment.yaml +++ b/manifests/base/deployment.yaml @@ -1,7 +1,7 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: "ltdproxy" + name: "ltd-proxy" labels: app.kubernetes.io/name: "ltd-proxy" spec: @@ -25,7 +25,7 @@ spec: name: "app" envFrom: - configMapRef: - name: "ltdproxy" + name: "ltd-proxy" securityContext: allowPrivilegeEscalation: false capabilities: @@ -34,12 +34,12 @@ spec: readOnlyRootFilesystem: true volumeMounts: - name: "auth-config" - mountPath: "/opt/ltdproxy/auth/" + mountPath: "/opt/ltd-proxy/auth/" readOnly: true volumes: - name: "auth-config" configMap: - name: "ltdproxy-auth" + name: "ltd-proxy-auth" securityContext: runAsNonRoot: true runAsUser: 1000 diff --git a/manifests/base/kustomization.yaml b/manifests/base/kustomization.yaml index 1fda23c..953d07a 100644 --- a/manifests/base/kustomization.yaml +++ b/manifests/base/kustomization.yaml @@ -3,7 +3,7 @@ kind: Kustomization images: - name: "ghcr.io/jsickcodes/ltd-proxy" - newTag: 0.0.0 + newTag: 0.1.0 resources: - configmap.yaml diff --git a/manifests/base/service.yaml b/manifests/base/service.yaml index cf02bf1..6164be8 100644 --- a/manifests/base/service.yaml +++ b/manifests/base/service.yaml @@ -1,14 +1,14 @@ apiVersion: v1 kind: Service metadata: - name: "ltdproxy" + name: "ltd-proxy" labels: app.kubernetes.io/name: "ltd-proxy" spec: ports: - - name: "ltdproxy-http" + - name: "ltd-proxy-http" protocol: "TCP" port: 8080 targetPort: "app" selector: - app.kubernetes.io/name: "ltdproxy" + app.kubernetes.io/name: "ltd-proxy" diff --git a/src/ltdproxy/githubauth.py b/src/ltdproxy/githubauth.py index 23166f2..bdc64f8 100644 --- a/src/ltdproxy/githubauth.py +++ b/src/ltdproxy/githubauth.py @@ -21,6 +21,7 @@ import yaml from authlib.integrations.starlette_client import OAuth from pydantic import BaseModel +from structlog import get_logger from ltdproxy.config import config @@ -42,6 +43,8 @@ ) """Type alias from the authlib GitHub OAuth client.""" +logger = get_logger(config.logger_name) + class GitHubOAuth: """This class maintains an OAuth instance that is registered for GitHub @@ -90,7 +93,8 @@ async def set_serialized_github_memberships( # These orgs and teams are mentioned in the GitHub Auth configuration, # and therefore are ones to pay attention to in the cookie. relevant_orgs = github_auth.relevant_orgs - relevant_teams = github_auth.relevant_teams + + logger.debug("Relevant orgs", orgs=relevant_orgs) github_client = gidgethub.httpx.GitHubAPI( http_client, "ltd-proxy", oauth_token=github_token @@ -106,11 +110,13 @@ async def set_serialized_github_memberships( user_teams: List[Tuple[str, str]] = [] async for team in github_client.getiter("/user/teams"): team_id = (team["organization"]["login"], team["name"]) - if team_id in relevant_teams: + if team_id[0] in relevant_orgs: + logger.debug("Found relevant team", team=team) user_teams.append(team_id) # Serialize memberships to JSON to pack inside the session cookie memberships = json.dumps({"orgs": user_orgs, "teams": user_teams}) + logger.debug("GitHub user memberships", orgs=user_orgs, teams=user_teams) session["github_memberships"] = memberships @@ -147,6 +153,11 @@ class PathRule(BaseModel): def path_matches(self, url_path: str) -> bool: """Test if a URL path matches the rule's patten.""" if self.pattern.match(url_path): + logger.debug( + "Path matches PathRule", + pattern=self.pattern, + url_path=url_path, + ) return True else: return False @@ -172,6 +183,12 @@ def is_user_authorized( return True # no matches + logger.debug( + "No authorization match", + pattern=self.pattern, + user_orgs=user_orgs, + user_teams=user_teams, + ) return False @@ -257,13 +274,11 @@ def relevant_orgs(self) -> Set[str]: all_orgs: Set[str] = set() for github_group in self.default: - if not github_group.is_team: - all_orgs.add(github_group.org) + all_orgs.add(github_group.org) for path_rule in self.paths: for github_group in path_rule.authorized: - if not github_group.is_team: - all_orgs.add(github_group.org) + all_orgs.add(github_group.org) return all_orgs diff --git a/src/ltdproxy/handlers/external.py b/src/ltdproxy/handlers/external.py index c5a257e..943d989 100644 --- a/src/ltdproxy/handlers/external.py +++ b/src/ltdproxy/handlers/external.py @@ -1,5 +1,6 @@ """Handlers for the app's external root, ``/ltdproxy/``.""" +import posixpath from typing import Optional, Union from urllib.parse import urlencode, urlparse @@ -12,6 +13,7 @@ from starlette.requests import Request from starlette.responses import ( HTMLResponse, + PlainTextResponse, RedirectResponse, StreamingResponse, ) @@ -27,6 +29,7 @@ set_serialized_github_memberships, ) from ltdproxy.s3 import Bucket, bucket_dependency +from ltdproxy.urlmap import map_s3_path __all__ = ["get_s3", "external_router"] @@ -34,6 +37,11 @@ """FastAPI router for all external handlers.""" +@external_router.get("/", name="homepage") +async def get_homepage() -> PlainTextResponse: + return PlainTextResponse("OK", status_code=200) + + @external_router.get("/auth", name="get_oauth_callback") async def get_oauth_callback( ref: Optional[str], @@ -147,20 +155,48 @@ async def get_s3( elif github_auth_result == AuthResult.authorized: # User is authorized; stream from S3. - if path == "" or path.endswith("/"): - # redwrite "*/" as "*/index.html" for static sites in S3 - bucket_path = f"{config.s3_bucket_prefix}{path}index.html" - else: - bucket_path = f"{config.s3_bucket_prefix}{path}" + bucket_path = map_s3_path(config.s3_bucket_prefix, path) + logger.debug( + "computed bucket path", + bucket_path=bucket_path, + request_url=str(request.url), + ) stream = await bucket.stream_object(http_client, bucket_path) if stream.status_code == 404: - raise HTTPException(status_code=404, detail="Does not exist.") + if not path.endswith("/") and posixpath.splitext(path)[1] == "": + # try a redirect + parsed_url = urlparse(str(request.url)) + parsed_url = parsed_url._replace(path=f"{parsed_url.path}/") + return RedirectResponse(url=parsed_url.geturl()) + else: + raise HTTPException(status_code=404, detail="Does not exist.") logger.debug("stream headers", headers=stream.headers) response_headers = { "Content-type": stream.headers["Content-type"], "Content-length": stream.headers["Content-length"], "Etag": stream.headers["Etag"], } + # FIXME hack to override content-type headers + if bucket_path.endswith(".html"): + logger.debug("is html") + response_headers["Content-type"] = "text/html" + elif bucket_path.endswith(".css"): + logger.debug("is css") + response_headers["Content-type"] = "text/css" + elif bucket_path.endswith(".js"): + logger.debug("is js") + response_headers["Content-type"] = "application/javascript" + elif bucket_path.endswith(".pdf"): + logger.debug("is pdf") + response_headers["Content-type"] = "application/pdf" + elif bucket_path.endswith(".png"): + logger.debug("is png") + response_headers["Content-type"] = "image/png" + else: + logger.debug("did not change response content-type") + + logger.debug("response headers", headers=response_headers) + return StreamingResponse( stream.aiter_raw(), background=BackgroundTask(stream.aclose), diff --git a/src/ltdproxy/main.py b/src/ltdproxy/main.py index a3d12b9..74bf7d2 100644 --- a/src/ltdproxy/main.py +++ b/src/ltdproxy/main.py @@ -7,9 +7,11 @@ called. """ +import structlog from fastapi import FastAPI from safir.dependencies.http_client import http_client_dependency from safir.logging import configure_logging +from safir.metadata import get_metadata from safir.middleware.x_forwarded import XForwardedMiddleware from starlette.middleware.sessions import SessionMiddleware @@ -36,6 +38,12 @@ @app.on_event("startup") async def startup_event() -> None: + logger = structlog.get_logger(config.logger_name) + metadata = get_metadata( + package_name="ltd-proxy", + application_name=config.name, + ) + logger.info("Starting up", version=metadata.version) app.add_middleware(XForwardedMiddleware) diff --git a/src/ltdproxy/urlmap.py b/src/ltdproxy/urlmap.py new file mode 100644 index 0000000..3cd3e3e --- /dev/null +++ b/src/ltdproxy/urlmap.py @@ -0,0 +1,40 @@ +"""Domain model for mapping a request URL to a resource in the S3 bucket.""" + +from __future__ import annotations + +__all__ = ["map_s3_path"] + + +def map_s3_path(bucket_prefix: str, request_path: str) -> str: + """Map a request URL to an S3 bucket key.""" + # decompose the path into the project and whether it is a /v/ edition or + # not + parts = request_path.split("/") + project_name = parts[0].lower() + + if (len(parts) >= 3) and parts[1].lower() == "v": + edition_name = parts[2] + edition_path = "/".join(parts[3:]) + else: + edition_name = "__main" # default edition + edition_path = "/".join(parts[1:]) + + # if edition_path == "" or edition_path.endswith("/"): + if request_path.endswith("/"): + edition_path = f"{edition_path}index.html" + + if bucket_prefix == "": + path_parts = [project_name, "v", edition_name, edition_path] + else: + path_parts = [ + bucket_prefix, + project_name, + "v", + edition_name, + edition_path, + ] + + bucket_path = "/".join(path_parts) + bucket_path = bucket_path.rstrip("/") # happens if edition_path is "" + + return bucket_path diff --git a/tests/urlmap_test.py b/tests/urlmap_test.py new file mode 100644 index 0000000..7a2dd0a --- /dev/null +++ b/tests/urlmap_test.py @@ -0,0 +1,79 @@ +"""Tests for the urlmap module.""" + +from __future__ import annotations + +import pytest + +from ltdproxy.urlmap import map_s3_path + + +@pytest.mark.parametrize( + "bucket_prefix,request_path,expected_bucket_path", + [ + ( + "", + "myproject", + "myproject/v/__main", + ), + ( + "", + "myproject/", + "myproject/v/__main/index.html", + ), + ( + "", + "myproject/test.css", + "myproject/v/__main/test.css", + ), + ( + "", + "myproject/index.html", + "myproject/v/__main/index.html", + ), + ( + "", + "myproject/v/dev", + "myproject/v/dev", + ), + ( + "", + "myproject/v/dev/index.html", + "myproject/v/dev/index.html", + ), + ( + "", + "myproject/v/dev/a/b/index.html", + "myproject/v/dev/a/b/index.html", + ), + ( + "", + "myproject/v/dev/a/b/", + "myproject/v/dev/a/b/index.html", + ), + ( + "prefix", + "myproject/", + "prefix/myproject/v/__main/index.html", + ), + ( + "prefix", + "myproject/index.html", + "prefix/myproject/v/__main/index.html", + ), + ( + "prefix", + "myproject/v/dev", + "prefix/myproject/v/dev", + ), + ( + "prefix", + "myproject/v/dev/index.html", + "prefix/myproject/v/dev/index.html", + ), + ], +) +def test_map_s3_path( + bucket_prefix: str, request_path: str, expected_bucket_path: str +) -> None: + result = map_s3_path(bucket_prefix, request_path) + assert result == expected_bucket_path