diff --git a/scripts/deploy_codex_audit_service.sh b/scripts/deploy_codex_audit_service.sh index 869b366..c17cfef 100644 --- a/scripts/deploy_codex_audit_service.sh +++ b/scripts/deploy_codex_audit_service.sh @@ -207,6 +207,7 @@ Environment=CODEX_AUDIT_SERVICE_ALLOWED_REFS=${ALLOWED_REFS} Environment=CODEX_AUDIT_SERVICE_ALLOWED_REPOSITORY_VISIBILITIES=${ALLOWED_REPOSITORY_VISIBILITIES} Environment=CODEX_AUDIT_SERVICE_ALLOWED_SOURCE_REPOSITORIES=${ALLOWED_SOURCE_REPOSITORIES} Environment=CODEX_AUDIT_SERVICE_JOB_DIR=${JOB_DIR} +Environment=CODEX_AUDIT_SERVICE_QUOTA_STORE=${JOB_DIR}/quota.json Environment=CODEX_AUDIT_SERVICE_SANDBOX=read-only ${audit_model_line} ${audit_reasoning_effort_line} diff --git a/service/ai_gateway_service.py b/service/ai_gateway_service.py index 0529ab3..086af94 100644 --- a/service/ai_gateway_service.py +++ b/service/ai_gateway_service.py @@ -756,6 +756,7 @@ def _handle_analyze(self, payload: dict[str, Any]) -> None: def _handle_execute_async(self, claims: dict[str, Any], payload: dict[str, Any]) -> None: """POST /v1/ai/execute/jobs — async Codex execution.""" + started = time.time() req = parse_execute_request(payload) # Security: validate source_repository against allowlist @@ -763,24 +764,27 @@ def _handle_execute_async(self, claims: dict[str, Any], payload: dict[str, Any]) if source_repo: _validate_source_repo(source_repo) _validate_source_repo_org(claims, source_repo) + quota_repo = source_repo or str(claims.get("repository") or "unknown") # Quota check quota = get_quota_manager() - qr = quota.check(source_repo, "codex-cli", req.prompt) + qr = quota.check(quota_repo, "codex-cli", req.prompt) if not qr["allowed"]: _json_response(self, HTTPStatus.TOO_MANY_REQUESTS, { "status": "error", "error": qr["reason"], "remaining_usd": qr.get("remaining_usd", 0), }) return - quota.record_execute(source_repo) + quota.record_execute(quota_repo) payload.setdefault("task", TASK_EXECUTE) job = _submit_job(claims, payload) + get_health_monitor().record("/v1/ai/execute/jobs", time.time() - started, True) _json_response(self, HTTPStatus.ACCEPTED, job) def _handle_execute_sync(self, claims: dict[str, Any], payload: dict[str, Any]) -> None: """POST /v1/ai/execute — sync Codex execution (backward compat).""" + started = time.time() req = parse_execute_request(payload) source_repo = str(payload.get("source_repository") or "") if source_repo: @@ -806,6 +810,7 @@ def _handle_execute_sync(self, claims: dict[str, Any], payload: dict[str, Any]) reasoning_effort=reasoning_effort, timeout=req.timeout_seconds, ) + get_health_monitor().record("/v1/ai/execute", time.time() - started, result.success, result.error if not result.success else "") if result.success: _json_response(self, HTTPStatus.OK, {"status": "ok", "output": result.output}) else: diff --git a/service/quota.py b/service/quota.py index b7fb90a..8089d72 100644 --- a/service/quota.py +++ b/service/quota.py @@ -141,6 +141,16 @@ def __init__(self): self._weekly_budget = DEFAULT_WEEKLY_BUDGET_USD self._repo_budgets: dict[str, dict[str, float]] = {} self._load_config() + self._load_records() + + def _store_path(self) -> Path | None: + path = os.environ.get("CODEX_AUDIT_SERVICE_QUOTA_STORE", "").strip() + if path: + return Path(path) + job_dir = os.environ.get("CODEX_AUDIT_SERVICE_JOB_DIR", "").strip() + if job_dir: + return Path(job_dir) / "quota.json" + return None def _load_config(self) -> None: config_path = os.environ.get("CODEX_AUDIT_SERVICE_QUOTA_CONFIG", "").strip() @@ -156,6 +166,41 @@ def _load_config(self) -> None: if isinstance(raw.get("repo_budgets"), dict): self._repo_budgets = raw["repo_budgets"] + def _load_records(self) -> None: + path = self._store_path() + if path is None: + return + if not path.exists(): + return + try: + raw = json.loads(path.read_text(encoding="utf-8")) + except (json.JSONDecodeError, OSError): + return + records = raw.get("records") if isinstance(raw, dict) else None + if not isinstance(records, dict): + return + self._records = { + repo: QuotaRecord.from_dict(item) + for repo, item in records.items() + if isinstance(repo, str) and isinstance(item, dict) + } + + def _save_records_locked(self) -> None: + path = self._store_path() + if path is None: + return + path.parent.mkdir(mode=0o700, parents=True, exist_ok=True) + payload = json.dumps( + {"records": {repo: record.to_dict() for repo, record in self._records.items()}}, + ensure_ascii=False, + sort_keys=True, + ).encode("utf-8") + tmp = path.with_suffix(path.suffix + ".tmp") + with open(tmp, "wb") as handle: + handle.write(payload) + os.chmod(tmp, 0o600) + os.replace(tmp, path) + def _reset_if_needed(self, record: QuotaRecord) -> QuotaRecord: now = time.time() if now - record.last_reset_daily > 86400: @@ -227,6 +272,7 @@ def record(self, repo: str, model: str, prompt: str, output: str = "") -> None: record.codex_calls += 1 record.total_cost_usd += cost self._records[repo] = record + self._save_records_locked() def record_execute(self, repo: str) -> None: """Record a codex exec call (flat cost).""" @@ -239,6 +285,7 @@ def record_execute(self, repo: str) -> None: record.codex_calls += 1 record.total_cost_usd += cost self._records[repo] = record + self._save_records_locked() def status(self, repo: str = "") -> dict[str, Any]: """Get quota status for a repo or all repos.""" diff --git a/tests/test_quota.py b/tests/test_quota.py index bcb64a2..aa18c65 100644 --- a/tests/test_quota.py +++ b/tests/test_quota.py @@ -149,6 +149,17 @@ def test_get_weekly_budget_respects_repo_overrides(self) -> None: self.manager._repo_budgets["premium/repo"] = {"weekly": 250.0} self.assertEqual(self.manager.get_weekly_budget("premium/repo"), 250.0) + def test_records_persist_to_store(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + store = str(Path(tmp) / "quota.json") + with patch.dict(os.environ, {"CODEX_AUDIT_SERVICE_QUOTA_STORE": store}): + first = QuotaManager() + first.record_execute("test/repo") + second = QuotaManager() + status = second.status("test/repo") + self.assertEqual(status["codex_calls"], 1) + self.assertGreater(status["total_cost_usd"], 0) + class TestQuotaConfigLoading(unittest.TestCase): """Quota configuration from JSON file."""