diff --git a/crates/challenge-orchestrator/src/backend.rs b/crates/challenge-orchestrator/src/backend.rs index 140df4777..25b0fa633 100644 --- a/crates/challenge-orchestrator/src/backend.rs +++ b/crates/challenge-orchestrator/src/backend.rs @@ -20,16 +20,41 @@ //! - Resource limits //! - No Docker socket access for challenges -use crate::{ChallengeContainerConfig, ChallengeInstance, ContainerStatus}; +use crate::{ChallengeContainerConfig, ChallengeDocker, ChallengeInstance, ContainerStatus}; use async_trait::async_trait; use secure_container_runtime::{ - ContainerConfigBuilder, ContainerState, NetworkMode, SecureContainerClient, + CleanupResult as BrokerCleanupResult, ContainerConfig, ContainerConfigBuilder, ContainerError, + ContainerInfo, ContainerStartResult, ContainerState, NetworkMode, SecureContainerClient, }; use std::path::Path; +use std::sync::Arc; use tracing::{error, info, warn}; /// Default broker socket path pub const DEFAULT_BROKER_SOCKET: &str = "/var/run/platform/broker.sock"; +const BROKER_SOCKET_OVERRIDE_ENV: &str = "BROKER_SOCKET_OVERRIDE"; + +/// Get the broker socket path, preferring an environment override. +/// +/// If the `BROKER_SOCKET_OVERRIDE` environment variable is set, its value is returned; +/// otherwise the compiled `DEFAULT_BROKER_SOCKET` is returned. +/// +/// # Examples +/// +/// ``` +/// use std::env; +/// // Ensure override is not set to observe default behavior +/// env::remove_var("BROKER_SOCKET_OVERRIDE"); +/// let p = crate::default_broker_socket_path(); +/// assert_eq!(p, crate::DEFAULT_BROKER_SOCKET.to_string()); +/// +/// // Set an override and observe it takes precedence +/// env::set_var("BROKER_SOCKET_OVERRIDE", "/tmp/override.sock"); +/// assert_eq!(crate::default_broker_socket_path(), "/tmp/override.sock"); +/// ``` +fn default_broker_socket_path() -> String { + std::env::var(BROKER_SOCKET_OVERRIDE_ENV).unwrap_or_else(|_| DEFAULT_BROKER_SOCKET.to_string()) +} /// Container backend trait for managing challenge containers #[async_trait] @@ -62,23 +87,396 @@ pub trait ContainerBackend: Send + Sync { async fn list_challenge_containers(&self, challenge_id: &str) -> anyhow::Result>; } +#[async_trait] +pub trait SecureContainerBridge: Send + Sync { + async fn create_container( + &self, + config: ContainerConfig, + ) -> Result<(String, String), ContainerError>; + async fn start_container( + &self, + container_id: &str, + ) -> Result; + async fn get_endpoint(&self, container_id: &str, port: u16) -> Result; + async fn stop_container( + &self, + container_id: &str, + timeout_secs: u32, + ) -> Result<(), ContainerError>; + async fn remove_container(&self, container_id: &str, force: bool) + -> Result<(), ContainerError>; + async fn inspect(&self, container_id: &str) -> Result; + async fn pull_image(&self, image: &str) -> Result<(), ContainerError>; + async fn logs(&self, container_id: &str, tail: usize) -> Result; + async fn cleanup_challenge( + &self, + challenge_id: &str, + ) -> Result; + async fn list_by_challenge( + &self, + challenge_id: &str, + ) -> Result, ContainerError>; +} + +struct SecureClientBridge { + client: SecureContainerClient, +} + +impl SecureClientBridge { + /// Creates a SecureClientBridge configured to communicate with the container broker at `socket_path`. + /// + /// `socket_path` is the filesystem path to the broker's Unix domain socket. + /// + /// # Examples + /// + /// ``` + /// let bridge = SecureClientBridge::new("/var/run/platform/broker.sock"); + /// ``` + fn new(socket_path: &str) -> Self { + Self { + client: SecureContainerClient::new(socket_path), + } + } +} + +#[async_trait] +impl SecureContainerBridge for SecureClientBridge { + /// Creates a container from the provided `ContainerConfig`. + /// + /// Returns the created container's ID and its name. + /// + /// # Examples + /// + /// ```no_run + /// # async fn example(bridge: &impl crate::SecureContainerBridge, config: crate::ContainerConfig) { + /// let (id, name) = bridge.create_container(config).await.unwrap(); + /// assert!(!id.is_empty()); + /// assert!(!name.is_empty()); + /// # } + /// ``` + async fn create_container( + &self, + config: ContainerConfig, + ) -> Result<(String, String), ContainerError> { + self.client.create_container(config).await + } + + /// Starts an existing container through the secure bridge. + /// + /// # Examples + /// + /// ```no_run + /// # async fn example(client: &impl SecureContainerBridge) -> Result<(), ContainerError> { + /// let result = client.start_container("container-id").await?; + /// // `result` contains the outcome of the start operation. + /// # Ok(()) + /// # } + /// ``` + async fn start_container( + &self, + container_id: &str, + ) -> Result { + self.client.start_container(container_id).await + } + + /// Get the host:port endpoint exposed for a container port. + /// + /// # Returns + /// + /// `Ok(String)` containing the endpoint in `host:port` form for the requested container and port, or `Err(ContainerError)` if the endpoint cannot be retrieved. + /// + /// # Examples + /// + /// ```no_run + /// # async fn example(backend: &impl crate::SecureContainerBridge) -> Result<(), crate::ContainerError> { + /// let endpoint = backend.get_endpoint("container-id-123", 8080).await?; + /// println!("Endpoint: {}", endpoint); + /// # Ok(()) } + /// ``` + async fn get_endpoint(&self, container_id: &str, port: u16) -> Result { + self.client.get_endpoint(container_id, port).await + } + + /// Stop a container managed by the broker, allowing a graceful shutdown period. + /// + /// # Parameters + /// + /// - `container_id`: Identifier of the container to stop. + /// - `timeout_secs`: Number of seconds to wait for graceful shutdown before forcefully stopping. + /// + /// # Returns + /// + /// `Ok(())` if the container was stopped successfully, `Err(ContainerError)` otherwise. + /// + /// # Examples + /// + /// ```no_run + /// # async fn example(bridge: &impl crate::SecureContainerBridge) -> Result<(), crate::ContainerError> { + /// bridge.stop_container("container-123", 30).await?; + /// # Ok(()) } + /// ``` + async fn stop_container( + &self, + container_id: &str, + timeout_secs: u32, + ) -> Result<(), ContainerError> { + self.client.stop_container(container_id, timeout_secs).await + } + + /// Remove a container by its identifier. + /// + /// Attempts to remove the container identified by `container_id`. The `force` flag + /// indicates whether the removal should be forced. + /// + /// Returns `Ok(())` if the container was removed, `Err(ContainerError)` if the + /// underlying removal operation failed. + /// + /// # Examples + /// + /// ```no_run + /// # async fn example(backend: &B) -> Result<(), Box> { + /// backend.remove_container("container-abc123", true).await?; + /// # Ok(()) + /// # } + /// ``` + async fn remove_container( + &self, + container_id: &str, + force: bool, + ) -> Result<(), ContainerError> { + self.client.remove_container(container_id, force).await + } + + /// Inspect a container and obtain its metadata. + /// + /// Inspect the container identified by `container_id` and return its runtime and configuration + /// information as a `ContainerInfo`. + /// + /// # Parameters + /// + /// - `container_id`: The identifier of the container to inspect. + /// + /// # Returns + /// + /// On success, returns the container's metadata as a `ContainerInfo`; on failure, returns a + /// `ContainerError`. + /// + /// # Examples + /// + /// ```no_run + /// # use futures::executor::block_on; + /// # let backend = /* Arc */ todo!(); + /// let info = block_on(async { backend.inspect("container-id").await }); + /// ``` + async fn inspect(&self, container_id: &str) -> Result { + self.client.inspect(container_id).await + } + + /// Pulls the specified container image through the configured secure broker. + /// + /// `image` should be a valid image reference (for example "nginx:latest" or "repo/image:tag"). + /// + /// # Returns + /// + /// `Ok(())` if the image was pulled successfully, `Err(ContainerError)` if the broker reported an error. + /// + /// # Examples + /// + /// ```no_run + /// # async fn example(backend: &B) { + /// backend.pull_image("nginx:latest").await.unwrap(); + /// # } + /// ``` + async fn pull_image(&self, image: &str) -> Result<(), ContainerError> { + self.client.pull_image(image).await + } + + /// Retrieve logs for a container, limiting the output to the most recent `tail` lines. + /// + /// # Examples + /// + /// ``` + /// # async fn example(backend: &B) { + /// let logs = backend.logs("container_id", 100).await.unwrap(); + /// println!("{}", logs); + /// # } + /// ``` + async fn logs(&self, container_id: &str, tail: usize) -> Result { + self.client.logs(container_id, tail).await + } + + /// Requests the broker to clean up all containers and resources for a challenge. + /// + /// Delegates cleanup to the configured secure container bridge and returns the broker's + /// cleanup result which includes counts and any error details reported by the broker. + /// + /// # Returns + /// + /// `Ok(BrokerCleanupResult)` with cleanup details when the broker operation succeeds, + /// `Err(ContainerError)` if the cleanup request fails or the bridge reports an error. + async fn cleanup_challenge( + &self, + challenge_id: &str, + ) -> Result { + self.client.cleanup_challenge(challenge_id).await + } + + /// List containers associated with a challenge. + /// + /// Returns a vector of `ContainerInfo` entries for containers that belong to the given + /// `challenge_id`. + /// + /// # Examples + /// + /// ```rust + /// // Acquire an implementation of `SecureContainerBridge` as `bridge` (specifics vary). + /// // Then call: + /// // let infos = bridge.list_by_challenge("challenge-123").await.unwrap(); + /// ``` + async fn list_by_challenge( + &self, + challenge_id: &str, + ) -> Result, ContainerError> { + self.client.list_by_challenge(challenge_id).await + } +} + /// Secure container backend using the broker pub struct SecureBackend { - client: SecureContainerClient, + client: Arc, validator_id: String, } impl SecureBackend { - /// Create a new secure backend + /// Constructs a SecureBackend connected to the secure container broker at the given socket and using the provided validator identifier. + /// + /// The `socket_path` is the filesystem path to the broker's Unix domain socket. `validator_id` is used to tag containers started by this backend. + /// + /// # Examples + /// + /// ``` + /// let backend = SecureBackend::new("/var/run/platform/broker.sock", "validator-abc"); + /// // use backend... + /// ``` + /// + /// # Returns + /// + /// A `SecureBackend` instance configured to communicate with the broker at `socket_path` and to use `validator_id` when naming or tagging containers. pub fn new(socket_path: &str, validator_id: &str) -> Self { + Self::with_bridge(SecureClientBridge::new(socket_path), validator_id) + } + + /// Provides a global, lazily initialized test slot for injecting or retrieving a `SecureBackend`. + /// + /// This helper returns a `'static` `Mutex>` used by tests to set a test backend + /// instance that other test helpers can take or inspect. + /// + /// # Examples + /// + /// ``` + /// // set the test backend + /// let slot = test_backend_slot(); + /// { + /// let mut guard = slot.lock().unwrap(); + /// *guard = Some(SecureBackend::with_bridge(...)); // example; construct a test backend + /// } + /// // read or take the backend later in another test helper + /// { + /// let mut guard = slot.lock().unwrap(); + /// let _backend = guard.take(); + /// } + /// ``` + #[cfg(test)] + fn test_backend_slot() -> &'static std::sync::Mutex> { + use std::sync::{Mutex, OnceLock}; + static SLOT: OnceLock>> = OnceLock::new(); + SLOT.get_or_init(|| Mutex::new(None)) + } + + /// Remove and return the current test `SecureBackend` stored in the internal test slot, if any. + /// + /// This takes the `Option` from the test slot, leaving `None` in its place. + /// + /// # Returns + /// + /// `Some(SecureBackend)` if a test backend was set, `None` otherwise. + /// + /// # Examples + /// + /// ``` + /// // In tests: set_test_backend(Some(secure_backend)); + /// // let backend = take_test_backend().expect("test backend should be present"); + /// ``` + #[cfg(test)] + fn take_test_backend() -> Option { + Self::test_backend_slot().lock().unwrap().take() + } + + /// Sets the global test SecureBackend used by test helpers. + /// + /// This replaces the current test backend stored in the internal test slot with `backend`. + /// + /// # Examples + /// + /// ``` + /// // In test code: + /// let test_backend = SecureBackend::with_bridge(RecordingSecureBridge::new(), "validator"); + /// set_test_backend(test_backend); + /// ``` + #[cfg(test)] + pub(crate) fn set_test_backend(backend: SecureBackend) { + Self::test_backend_slot().lock().unwrap().replace(backend); + } + + /// Creates a SecureBackend that uses the provided bridge for broker operations and the given validator identifier. + /// + /// This constructs a backend which delegates all SecureContainerBridge calls to `client` and records `validator_id` + /// for container naming and metadata. Intended for injecting custom or test bridges. + /// + /// # Examples + /// + /// ```no_run + /// let bridge = RecordingSecureBridge::default(); + /// let backend = SecureBackend::with_bridge(bridge, "validator-1"); + /// ``` + pub fn with_bridge( + client: impl SecureContainerBridge + 'static, + validator_id: impl Into, + ) -> Self { Self { - client: SecureContainerClient::new(socket_path), - validator_id: validator_id.to_string(), + client: Arc::new(client), + validator_id: validator_id.into(), } } - /// Create from environment or default socket + /// Constructs a SecureBackend from environment or the default broker socket when available. + /// + /// Checks for a broker socket in the following order: + /// 1. The `CONTAINER_BROKER_SOCKET` environment variable (if set and the path exists). + /// 2. The default broker socket path returned by `default_broker_socket_path()` (if it exists). + /// The `VALIDATOR_HOTKEY` environment variable is read to populate the validator identifier; if unset, `"unknown"` is used. + /// + /// # Returns + /// + /// `Some(Self)` when a usable broker socket path is found and a backend can be constructed, `None` otherwise. + /// + /// # Examples + /// + /// ``` + /// // Attempt to build a backend from environment; handle the absence of a broker gracefully. + /// if let Some(backend) = SecureBackend::from_env() { + /// // broker-backed backend is available + /// let _ = backend; + /// } else { + /// // fall back to another backend + /// } + /// ``` pub fn from_env() -> Option { + #[cfg(test)] + if let Some(backend) = Self::take_test_backend() { + return Some(backend); + } + let validator_id = std::env::var("VALIDATOR_HOTKEY").unwrap_or_else(|_| "unknown".to_string()); @@ -91,23 +489,77 @@ impl SecureBackend { warn!(socket = %socket, "Broker socket from env does not exist"); } - // Priority 2: Default socket path - if Path::new(DEFAULT_BROKER_SOCKET).exists() { - info!(socket = %DEFAULT_BROKER_SOCKET, "Using default broker socket"); - return Some(Self::new(DEFAULT_BROKER_SOCKET, &validator_id)); + // Priority 2: Default socket path (allow override for tests) + let default_socket = default_broker_socket_path(); + if Path::new(&default_socket).exists() { + info!(socket = %default_socket, "Using default broker socket"); + return Some(Self::new(&default_socket, &validator_id)); } None } - /// Check if broker is available + /// Determine whether a broker socket exists and is therefore available. + + /// + + /// This checks the `CONTAINER_BROKER_SOCKET` environment variable first: if it is set + + /// and points to an existing filesystem path, this function returns `true`. If the + + /// environment variable is not set or does not point to an existing path, the default + + /// broker socket path returned by `default_broker_socket_path()` is checked instead. + + /// + + /// # Examples + + /// + + /// ``` + + /// use std::fs; + + /// use std::env; + + /// use std::path::PathBuf; + + /// + + /// // create a temporary socket file and point the env var at it + + /// let mut p = env::temp_dir(); + + /// p.push("test_broker_socket.sock"); + + /// let path = p.to_string_lossy().into_owned(); + + /// let _f = fs::File::create(&path).unwrap(); + + /// env::set_var("CONTAINER_BROKER_SOCKET", &path); + + /// + + /// assert!(crate::backend::is_available()); + + /// + + /// // cleanup + + /// let _ = fs::remove_file(&path); + + /// env::remove_var("CONTAINER_BROKER_SOCKET"); + + /// ``` pub fn is_available() -> bool { if let Ok(socket) = std::env::var("CONTAINER_BROKER_SOCKET") { if Path::new(&socket).exists() { return true; } } - Path::new(DEFAULT_BROKER_SOCKET).exists() + let default_socket = default_broker_socket_path(); + Path::new(&default_socket).exists() } } @@ -233,15 +685,100 @@ impl ContainerBackend for SecureBackend { } /// Direct Docker backend (for local development) +#[derive(Clone)] pub struct DirectDockerBackend { - docker: crate::docker::DockerClient, + docker: Arc, } impl DirectDockerBackend { - /// Create a new direct Docker backend + /// Creates a new DirectDockerBackend by connecting to the Docker daemon. + /// + /// Returns `Ok(DirectDockerBackend)` on success, or an error if a Docker client cannot be created (for example, if the Docker daemon is unavailable). + /// In tests this constructor may return an injected test result instead of attempting a real Docker connection. + /// + /// # Examples + /// + /// ``` + /// # tokio_test::block_on(async { + /// let backend = crate::backend::DirectDockerBackend::new().await.unwrap(); + /// // use backend... + /// # }); + /// ``` pub async fn new() -> anyhow::Result { + #[cfg(test)] + if let Some(result) = Self::take_test_result() { + return result; + } + let docker = crate::docker::DockerClient::connect().await?; - Ok(Self { docker }) + Ok(Self::with_docker(docker)) + } + + /// Constructs a DirectDockerBackend that uses the provided `ChallengeDocker` implementation. + /// + /// The supplied `docker` is stored inside the backend and used for all subsequent Docker-backed + /// operations. This is primarily intended for testing with custom or mocked `ChallengeDocker` + /// implementations. + /// + /// # Examples + /// + /// ```no_run + /// // Provide any type that implements `ChallengeDocker` (e.g., a test double). + /// let docker_impl = /* your ChallengeDocker implementation */; + /// let backend = DirectDockerBackend::with_docker(docker_impl); + /// ``` + pub fn with_docker(docker: impl ChallengeDocker + 'static) -> Self { + Self { + docker: Arc::new(docker), + } + } + + /// Returns a global, lazily-initialized mutex slot used by tests to inject or take a + /// `DirectDockerBackend` construction result. + /// + /// The slot holds an `Option>` so tests can store either + /// a successful backend or an error for `DirectDockerBackend::new()` simulation. + /// + /// # Examples + /// + /// ``` + /// use std::sync::MutexGuard; + /// // Acquire the slot and set a simulated error result for tests: + /// let slot = crate::test_backend_slot(); + /// { + /// let mut guard = slot.lock().unwrap(); + /// *guard = Some(Err(anyhow::anyhow!("simulated failure"))); + /// } + /// // Later, a test can take or inspect the stored value. + /// ``` + #[cfg(test)] + fn test_backend_slot() -> &'static std::sync::Mutex>> + { + use std::sync::OnceLock; + static SLOT: OnceLock>>> = + OnceLock::new(); + SLOT.get_or_init(|| std::sync::Mutex::new(None)) + } + + #[cfg(test)] + fn take_test_result() -> Option> { + Self::test_backend_slot().lock().unwrap().take() + } + + /// Sets the test result that DirectDockerBackend::new() will return during tests. + /// + /// This injects a precomputed `Result` into the global test slot so test-only + /// code that constructs a `DirectDockerBackend` can observe the supplied success or failure. + /// + /// # Examples + /// + /// ```no_run + /// // Simulate a failure when DirectDockerBackend::new() is called in tests. + /// set_test_result(Err(anyhow::anyhow!("simulated docker init failure"))); + /// ``` + #[cfg(test)] + pub(crate) fn set_test_result(result: anyhow::Result) { + Self::test_backend_slot().lock().unwrap().replace(result); } } @@ -295,31 +832,69 @@ impl ContainerBackend for DirectDockerBackend { } } -/// Create the appropriate backend based on environment +/// Selects and constructs the appropriate container backend based on environment and broker availability. +/// +/// The selection priority is: +/// 1. If DEVELOPMENT_MODE is enabled, use the direct Docker backend (for local development). +/// 2. If a secure broker socket is available, use the secure broker backend. +/// 3. Otherwise, attempt a Docker fallback and return an error if that fails. +/// +/// # Returns /// -/// Priority order: -/// 1. DEVELOPMENT_MODE=true -> Direct Docker (local dev only) -/// 2. Broker socket available -> Secure broker (production default) -/// 3. No broker + not dev mode -> Error (production requires broker) +/// `Ok` with a boxed `ContainerBackend` implementation when a backend is successfully created, `Err` otherwise. +/// +/// # Examples +/// +/// ``` +/// # tokio_test::block_on(async { +/// let backend = crate::backend::create_backend().await; +/// match backend { +/// Ok(b) => { +/// // use the backend, e.g. b.pull_image("alpine:latest").await.unwrap(); +/// drop(b); +/// } +/// Err(e) => { +/// eprintln!("failed to create backend: {}", e); +/// } +/// } +/// # }); +/// ``` pub async fn create_backend() -> anyhow::Result> { - // Check if explicitly in development mode - let dev_mode = std::env::var("DEVELOPMENT_MODE") - .map(|v| v == "true" || v == "1") - .unwrap_or(false); - - if dev_mode { - info!("DEVELOPMENT_MODE=true: Using direct Docker (local development)"); - let direct = DirectDockerBackend::new().await?; - return Ok(Box::new(direct)); - } - - // Try to use secure broker (default for production) - if let Some(secure) = SecureBackend::from_env() { - info!("Using secure container broker (production mode)"); - return Ok(Box::new(secure)); + match select_backend_mode() { + BackendMode::Development => { + info!("DEVELOPMENT_MODE=true: Using direct Docker (local development)"); + let direct = DirectDockerBackend::new().await?; + Ok(Box::new(direct)) + } + BackendMode::Secure => { + if let Some(secure) = SecureBackend::from_env() { + info!("Using secure container broker (production mode)"); + Ok(Box::new(secure)) + } else { + warn!( + "Secure backend reported as available but failed to initialize; falling back to Docker" + ); + create_docker_fallback_backend().await + } + } + BackendMode::Fallback => create_docker_fallback_backend().await, } +} - // No broker available - try Docker as last resort but warn +/// Attempts to instantiate a direct Docker-backed ContainerBackend as a fallback when the broker is unavailable. +/// +/// On success returns a boxed backend that talks directly to the local Docker daemon. On failure returns an error +/// describing that no container backend is available and suggesting starting the broker or enabling development mode. +/// +/// # Examples +/// +/// ```ignore +/// // Run inside an async context: +/// let backend = create_docker_fallback_backend().await?; +/// // `backend` is a `Box` ready to use with Docker. +/// # Ok::<(), anyhow::Error>(()) +/// ``` +async fn create_docker_fallback_backend() -> anyhow::Result> { warn!("Broker not available. Attempting Docker fallback..."); warn!("This should only happen in local development!"); warn!("Set DEVELOPMENT_MODE=true to suppress this warning, or start the broker."); @@ -333,23 +908,1472 @@ pub async fn create_backend() -> anyhow::Result> { error!("Cannot connect to Docker: {}", e); error!("For production: Start the container-broker service"); error!("For development: Set DEVELOPMENT_MODE=true and ensure Docker is running"); + let default_socket = default_broker_socket_path(); Err(anyhow::anyhow!( "No container backend available. \ Start broker at {} or set DEVELOPMENT_MODE=true for local Docker", - DEFAULT_BROKER_SOCKET + default_socket )) } } } -/// Check if running in secure mode (broker available) +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BackendMode { + Development, + Secure, + Fallback, +} + +/// Selects which container backend mode the application should use. +/// +/// Returns Development when development mode is enabled via the environment; +/// otherwise returns Secure if a container broker socket is available; otherwise returns Fallback. +/// +/// # Examples +/// +/// ```rust +/// use crate::backend::BackendMode; +/// let mode = crate::backend::select_backend_mode(); +/// assert!(matches!( +/// mode, +/// BackendMode::Development | BackendMode::Secure | BackendMode::Fallback +/// )); +/// ``` +pub fn select_backend_mode() -> BackendMode { + if is_development_mode() { + BackendMode::Development + } else if SecureBackend::is_available() { + BackendMode::Secure + } else { + BackendMode::Fallback + } +} + +/// Indicates whether a secure broker socket is available. +/// +/// # Returns +/// `true` if a broker socket exists and secure mode is available, `false` otherwise. +/// +/// # Examples +/// +/// ``` +/// // Example: assert that secure mode detection returns a boolean +/// let _ = is_secure_mode(); +/// ``` pub fn is_secure_mode() -> bool { SecureBackend::is_available() } -/// Check if in development mode +/// Determines whether development mode is enabled. +/// +/// Treats the `DEVELOPMENT_MODE` environment variable as enabled when its value is `"true"` or `"1"`. +/// Returns `false` if the variable is unset or has any other value. +/// +/// # Examples +/// +/// ``` +/// std::env::set_var("DEVELOPMENT_MODE", "1"); +/// assert!(is_development_mode()); +/// +/// std::env::set_var("DEVELOPMENT_MODE", "true"); +/// assert!(is_development_mode()); +/// +/// std::env::set_var("DEVELOPMENT_MODE", "0"); +/// assert!(!is_development_mode()); +/// +/// std::env::remove_var("DEVELOPMENT_MODE"); +/// assert!(!is_development_mode()); +/// ``` pub fn is_development_mode() -> bool { std::env::var("DEVELOPMENT_MODE") .map(|v| v == "true" || v == "1") .unwrap_or(false) } + +#[cfg(test)] +mod tests { + use super::*; + use crate::docker::CleanupResult as DockerCleanupResult; + use chrono::Utc; + use platform_core::ChallengeId; + use serial_test::serial; + use std::collections::HashMap; + use std::sync::{Arc, Mutex}; + use tempfile::{tempdir, NamedTempFile}; + + /// Clears environment variables used by backend selection and test helpers. + + /// + + /// Removes the following variables if present: `DEVELOPMENT_MODE`, `CONTAINER_BROKER_SOCKET`, + + /// `VALIDATOR_HOTKEY`, and the value of `BROKER_SOCKET_OVERRIDE_ENV`. + + /// + + /// # Examples + + /// + + /// ``` + + /// use std::env; + + /// // ensure a variable is set + + /// env::set_var("DEVELOPMENT_MODE", "true"); + + /// assert!(env::var_os("DEVELOPMENT_MODE").is_some()); + + /// + + /// // clear the test-related vars + + /// reset_env(); + + /// + + /// // the variable should be removed + + /// assert!(env::var_os("DEVELOPMENT_MODE").is_none()); + + /// ``` + fn reset_env() { + for key in [ + "DEVELOPMENT_MODE", + "CONTAINER_BROKER_SOCKET", + "VALIDATOR_HOTKEY", + BROKER_SOCKET_OVERRIDE_ENV, + ] { + std::env::remove_var(key); + } + } + + #[test] + #[serial] + fn test_is_development_mode_reflects_env() { + reset_env(); + assert!(!is_development_mode()); + + std::env::set_var("DEVELOPMENT_MODE", "1"); + assert!(is_development_mode()); + + std::env::set_var("DEVELOPMENT_MODE", "false"); + assert!(!is_development_mode()); + reset_env(); + } + + #[test] + #[serial] + fn test_secure_backend_from_env_detects_socket() { + reset_env(); + let temp_socket = NamedTempFile::new().expect("temp socket path"); + let socket_path = temp_socket.path().to_path_buf(); + std::env::set_var("CONTAINER_BROKER_SOCKET", &socket_path); + std::env::set_var("VALIDATOR_HOTKEY", "validator123"); + + let backend = SecureBackend::from_env().expect("should create backend from env"); + assert_eq!(backend.validator_id, "validator123"); + + reset_env(); + drop(temp_socket); + } + + #[test] + #[serial] + fn test_is_secure_mode_uses_env_socket() { + reset_env(); + let temp_socket = NamedTempFile::new().expect("temp socket path"); + let socket_path = temp_socket.path().to_path_buf(); + std::env::set_var("CONTAINER_BROKER_SOCKET", &socket_path); + + assert!(is_secure_mode()); + + reset_env(); + drop(temp_socket); + } + + #[test] + #[serial] + fn test_secure_backend_is_available_with_override_socket() { + reset_env(); + let temp_socket = NamedTempFile::new().expect("temp socket path"); + let socket_path = temp_socket.path().to_path_buf(); + std::env::set_var(BROKER_SOCKET_OVERRIDE_ENV, &socket_path); + + assert!(SecureBackend::is_available()); + + reset_env(); + drop(temp_socket); + } + + #[test] + #[serial] + fn test_select_backend_mode_prefers_development_mode() { + reset_env(); + std::env::set_var("DEVELOPMENT_MODE", "true"); + + assert_eq!(select_backend_mode(), BackendMode::Development); + + reset_env(); + } + + /// Verifies that backend selection chooses `Secure` when a broker socket path is present. + /// + /// # Examples + /// + /// ``` + /// // create a temporary socket path and set override env var + /// let temp_socket = tempfile::NamedTempFile::new().expect("temp socket path"); + /// let socket_path = temp_socket.path().to_path_buf(); + /// std::env::set_var(BROKER_SOCKET_OVERRIDE_ENV, &socket_path); + /// + /// assert_eq!(select_backend_mode(), BackendMode::Secure); + /// + /// // cleanup + /// std::env::remove_var(BROKER_SOCKET_OVERRIDE_ENV); + /// drop(temp_socket); + /// ``` + #[test] + #[serial] + fn test_select_backend_mode_prefers_secure_when_broker_available() { + reset_env(); + let temp_socket = NamedTempFile::new().expect("temp socket path"); + let socket_path = temp_socket.path().to_path_buf(); + std::env::set_var(BROKER_SOCKET_OVERRIDE_ENV, &socket_path); + + assert_eq!(select_backend_mode(), BackendMode::Secure); + + reset_env(); + drop(temp_socket); + } + + #[test] + #[serial] + fn test_select_backend_mode_falls_back_without_broker() { + reset_env(); + let dir = tempdir().expect("temp dir"); + let missing_socket = dir.path().join("missing.sock"); + std::env::set_var(BROKER_SOCKET_OVERRIDE_ENV, &missing_socket); + + assert_eq!(select_backend_mode(), BackendMode::Fallback); + + reset_env(); + } + + #[test] + #[serial] + fn test_secure_backend_from_env_uses_default_socket() { + reset_env(); + let temp_socket = NamedTempFile::new().expect("temp socket path"); + let socket_path = temp_socket.path().to_path_buf(); + std::env::set_var(BROKER_SOCKET_OVERRIDE_ENV, &socket_path); + + let backend = SecureBackend::from_env().expect("backend from default socket"); + assert_eq!(backend.validator_id, "unknown"); + + reset_env(); + } + + #[tokio::test] + #[serial] + async fn test_secure_backend_start_challenge_via_bridge() { + reset_env(); + let bridge = RecordingSecureBridge::default(); + bridge.set_create_response("container-123", "challenge-container"); + bridge.set_endpoint("container-123", "http://sandbox:8080"); + + let backend = SecureBackend::with_bridge(bridge.clone(), "validator-abc"); + let config = sample_config("ghcr.io/platformnetwork/demo:v1"); + + let instance = backend + .start_challenge(&config) + .await + .expect("start succeeds"); + + assert_eq!(instance.container_id, "container-123"); + assert_eq!(instance.endpoint, "http://sandbox:8080"); + assert_eq!(instance.image, config.docker_image); + + let ops = bridge.operations(); + assert!(ops.iter().any(|op| op.starts_with("create:"))); + assert!(ops.iter().any(|op| op.starts_with("start:"))); + assert!(ops.iter().any(|op| op.starts_with("endpoint:"))); + + reset_env(); + } + + #[tokio::test] + #[serial] + async fn test_secure_backend_covers_remaining_methods() { + reset_env(); + let bridge = RecordingSecureBridge::default(); + bridge.set_inspect_state("running", ContainerState::Running); + bridge.set_inspect_state("stopped", ContainerState::Stopped); + bridge.set_logs("running", "log output"); + bridge.set_cleanup_result(BrokerCleanupResult { + total: 2, + stopped: 2, + removed: 2, + errors: Vec::new(), + }); + bridge.set_list( + "challenge-1", + vec![ + container_info("alpha", ContainerState::Running), + container_info("beta", ContainerState::Stopped), + ], + ); + let backend = SecureBackend::with_bridge(bridge.clone(), "validator-xyz"); + + backend + .stop_container("running") + .await + .expect("stop delegates"); + backend + .remove_container("running") + .await + .expect("remove delegates"); + backend + .pull_image("ghcr.io/platformnetwork/demo:v2") + .await + .expect("pull delegates"); + let logs = backend + .get_logs("running", 50) + .await + .expect("logs delegates"); + assert_eq!(logs, "log output"); + assert!(backend + .is_container_running("running") + .await + .expect("running state")); + assert!(!backend + .is_container_running("stopped") + .await + .expect("stopped state")); + + let removed = backend + .cleanup_challenge("challenge-1") + .await + .expect("cleanup delegates"); + assert_eq!(removed, 2); + + let ids = backend + .list_challenge_containers("challenge-1") + .await + .expect("list delegates"); + assert_eq!(ids, vec!["alpha".to_string(), "beta".to_string()]); + + let ops = bridge.operations(); + assert!(ops.iter().any(|op| op.starts_with("stop:"))); + assert!(ops.iter().any(|op| op.starts_with("remove:"))); + assert!(ops.iter().any(|op| op.starts_with("pull:"))); + assert!(ops.iter().any(|op| op.starts_with("logs:"))); + assert!(ops.iter().any(|op| op.starts_with("inspect:"))); + assert!(ops.iter().any(|op| op.starts_with("cleanup:"))); + assert!(ops.iter().any(|op| op.starts_with("list:"))); + + reset_env(); + } + + #[tokio::test] + #[serial] + async fn test_direct_backend_delegates_to_docker() { + let docker = RecordingChallengeDocker::default(); + docker.set_list(vec!["container-1".to_string(), "other".to_string()]); + + let backend = DirectDockerBackend::with_docker(docker.clone()); + let mut config = sample_config("ghcr.io/platformnetwork/demo:v3"); + config.challenge_id = ChallengeId::new(); + + backend.pull_image(&config.docker_image).await.unwrap(); + let instance = backend.start_challenge(&config).await.unwrap(); + docker.set_running(&instance.container_id, true); + docker.set_logs(&instance.container_id, "container logs"); + backend + .stop_container(&instance.container_id) + .await + .unwrap(); + backend + .remove_container(&instance.container_id) + .await + .unwrap(); + assert!(backend + .is_container_running(&instance.container_id) + .await + .unwrap()); + let logs = backend.get_logs(&instance.container_id, 10).await.unwrap(); + assert_eq!(logs, "container logs"); + + let listed = backend.list_challenge_containers("unused").await.unwrap(); + assert_eq!(listed.len(), 2); + + let ops = docker.operations(); + assert!(ops.iter().any(|op| op.starts_with("pull:"))); + assert!(ops.iter().any(|op| op.starts_with("start:"))); + assert!(ops.iter().any(|op| op.starts_with("stop:"))); + assert!(ops.iter().any(|op| op.starts_with("remove:"))); + assert!(ops.iter().any(|op| op.starts_with("logs:"))); + } + + #[tokio::test] + #[serial] + async fn test_direct_backend_cleanup_filters_by_challenge_id() { + let docker = RecordingChallengeDocker::default(); + let challenge_id = ChallengeId::new(); + let challenge_str = challenge_id.to_string(); + docker.set_list(vec![ + format!("{challenge_str}-a"), + "platform-helper".to_string(), + format!("other-{challenge_str}"), + ]); + + let backend = DirectDockerBackend::with_docker(docker.clone()); + let removed = backend + .cleanup_challenge(&challenge_str) + .await + .expect("cleanup succeeds"); + assert_eq!(removed, 2); + + let ops = docker.operations(); + assert!(ops.iter().filter(|op| op.starts_with("stop:")).count() >= 2); + assert!(ops.iter().filter(|op| op.starts_with("remove:")).count() >= 2); + } + + #[tokio::test] + #[serial] + async fn test_create_backend_uses_direct_in_dev_mode() { + reset_env(); + std::env::set_var("DEVELOPMENT_MODE", "true"); + let docker = RecordingChallengeDocker::default(); + DirectDockerBackend::set_test_result(Ok(DirectDockerBackend::with_docker(docker.clone()))); + + let backend = create_backend().await.expect("backend"); + backend + .pull_image("ghcr.io/platformnetwork/test:v1") + .await + .unwrap(); + + assert!(docker + .operations() + .iter() + .any(|op| op == "pull:ghcr.io/platformnetwork/test:v1")); + + reset_env(); + } + + /// Ensures that create_backend selects the secure (broker-backed) backend when a broker socket is available. + /// + /// Sets the broker socket override and injects a test SecureBackend bridge, then verifies that + /// a subsequent `pull_image` call is forwarded to the broker. + /// + /// # Examples + /// + /// ``` + /// // Configure a broker socket override and inject a RecordingSecureBridge as the test backend, + /// // then call `create_backend().await` and assert that `pull_image` is handled by the broker. + /// ``` + #[tokio::test] + #[serial] + async fn test_create_backend_uses_secure_when_broker_available() { + reset_env(); + let temp_socket = NamedTempFile::new().expect("temp socket path"); + let socket_path = temp_socket.path().to_path_buf(); + std::env::set_var(BROKER_SOCKET_OVERRIDE_ENV, &socket_path); + + let bridge = RecordingSecureBridge::default(); + SecureBackend::set_test_backend(SecureBackend::with_bridge( + bridge.clone(), + "validator-secure", + )); + + let backend = create_backend().await.expect("secure backend"); + backend + .pull_image("ghcr.io/platformnetwork/secure:v1") + .await + .unwrap(); + + assert!(bridge + .operations() + .iter() + .any(|op| op == "pull:ghcr.io/platformnetwork/secure:v1")); + + reset_env(); + drop(temp_socket); + } + + #[tokio::test] + #[serial] + async fn test_create_backend_falls_back_when_secure_missing() { + reset_env(); + let dir = tempdir().expect("temp dir"); + let missing_socket = dir.path().join("missing.sock"); + std::env::set_var(BROKER_SOCKET_OVERRIDE_ENV, &missing_socket); + DirectDockerBackend::set_test_result(Ok(DirectDockerBackend::with_docker( + RecordingChallengeDocker::default(), + ))); + + let backend = create_backend().await.expect("fallback backend"); + backend + .pull_image("ghcr.io/platformnetwork/fallback:v1") + .await + .unwrap(); + + reset_env(); + } + + #[tokio::test] + #[serial] + async fn test_create_docker_fallback_backend_reports_error() { + reset_env(); + DirectDockerBackend::set_test_result(Err(anyhow::anyhow!("boom"))); + let err = match create_docker_fallback_backend().await { + Ok(_) => panic!("expected error"), + Err(err) => err, + }; + assert!(err.to_string().contains("No container backend available")); + reset_env(); + } + + /// Builds a test-oriented ChallengeContainerConfig populated with sensible default values. + /// + /// The returned config uses `image` as the container image and fills other fields + /// (IDs, resources, timeouts, and weights) with typical defaults suitable for tests. + /// + /// # Parameters + /// + /// - `image`: Docker image reference to set on the returned configuration. + /// + /// # Examples + /// + /// ``` + /// let cfg = sample_config("example/image:latest"); + /// assert_eq!(cfg.docker_image, "example/image:latest"); + /// assert_eq!(cfg.memory_mb, 512); + /// ``` + fn sample_config(image: &str) -> ChallengeContainerConfig { + ChallengeContainerConfig { + challenge_id: ChallengeId::new(), + name: "challenge".to_string(), + docker_image: image.to_string(), + mechanism_id: 0, + emission_weight: 1.0, + timeout_secs: 300, + cpu_cores: 1.0, + memory_mb: 512, + gpu_required: false, + } + } + + /// Creates a sample `ContainerInfo` populated with the given `id` and `state` and default test values for other fields. + /// + /// This is a test helper that returns a `ContainerInfo` whose `id` and `state` are set from the arguments; other fields + /// (name, challenge_id, owner_id, image, timestamps, and empty maps) use fixed sample values. + /// + /// # Examples + /// + /// ``` + /// let info = container_info("abc123", ContainerState::Running); + /// assert_eq!(info.id, "abc123"); + /// assert_eq!(info.state, ContainerState::Running); + /// assert!(info.endpoint.is_none()); + /// ``` + fn container_info(id: &str, state: ContainerState) -> ContainerInfo { + ContainerInfo { + id: id.to_string(), + name: format!("{id}-container"), + challenge_id: "challenge-1".to_string(), + owner_id: "owner".to_string(), + image: "ghcr.io/platformnetwork/demo".to_string(), + state, + created_at: Utc::now(), + ports: HashMap::new(), + endpoint: None, + labels: HashMap::new(), + } + } + + #[derive(Clone, Default)] + struct RecordingSecureBridge { + inner: Arc, + } + + struct RecordingSecureBridgeInner { + operations: Mutex>, + inspect_map: Mutex>, + endpoint_map: Mutex>, + logs_map: Mutex>, + list_map: Mutex>>, + cleanup_result: Mutex, + create_response: Mutex<(String, String)>, + } + + impl Default for RecordingSecureBridgeInner { + /// Constructs a new RecordingSecureBridge with empty recorded operation lists and default simulated responses. + /// + /// The returned instance is initialized with: + /// - empty vectors/maps for recorded operations and simulated inspect/endpoint/logs/list responses, + /// - a `BrokerCleanupResult` with zeros and an empty error list, + /// - a default `create_response` of `("container-id", "container")`. + /// + /// # Examples + /// + /// ``` + /// let bridge = RecordingSecureBridge::default(); + /// let create_resp = bridge.create_response.lock().unwrap(); + /// assert_eq!(create_resp.0, "container-id"); + /// assert_eq!(create_resp.1, "container"); + /// ``` + fn default() -> Self { + Self { + operations: Mutex::new(Vec::new()), + inspect_map: Mutex::new(HashMap::new()), + endpoint_map: Mutex::new(HashMap::new()), + logs_map: Mutex::new(HashMap::new()), + list_map: Mutex::new(HashMap::new()), + cleanup_result: Mutex::new(BrokerCleanupResult { + total: 0, + stopped: 0, + removed: 0, + errors: Vec::new(), + }), + create_response: Mutex::new(("container-id".to_string(), "container".to_string())), + } + } + } + + impl RecordingSecureBridge { + /// Retrieve a snapshot of recorded operation names. + /// + /// The returned vector contains the operations in the order they were recorded. + /// + /// # Returns + /// + /// A `Vec` with the recorded operation names in chronological order. + /// + /// # Examples + /// + /// ``` + /// // assuming `recorder` is an instance with prior recorded operations + /// let ops = recorder.operations(); + /// assert!(ops.iter().all(|s| !s.is_empty())); + /// ``` + fn operations(&self) -> Vec { + self.inner.operations.lock().unwrap().clone() + } + + /// Records inspect information for a container. + /// + /// Stores a `ContainerInfo` constructed from `state` under `id` in the bridge's internal + /// inspection map so that subsequent lookups will return the stored value. + /// + /// # Examples + /// + /// ``` + /// let bridge = RecordingSecureBridge::default(); + /// bridge.set_inspect_state("container-123", ContainerState::Running); + /// assert!(bridge + /// .inner + /// .inspect_map + /// .lock() + /// .unwrap() + /// .contains_key("container-123")); + /// ``` + fn set_inspect_state(&self, id: &str, state: ContainerState) { + self.inner + .inspect_map + .lock() + .unwrap() + .insert(id.to_string(), container_info(id, state)); + } + + /// Associates a container identifier with its exposed endpoint. + /// + /// Stores the provided `endpoint` string under the given `id` in the bridge's internal endpoint map. + /// + /// # Parameters + /// + /// - `id`: The container identifier to associate the endpoint with. + /// - `endpoint`: The endpoint (for example, "http://127.0.0.1:8080") exposed by the container. + /// + /// # Examples + /// + /// ``` + /// // assuming `bridge` implements `set_endpoint` + /// bridge.set_endpoint("container-123", "http://127.0.0.1:8080"); + /// ``` + fn set_endpoint(&self, id: &str, endpoint: &str) { + self.inner + .endpoint_map + .lock() + .unwrap() + .insert(id.to_string(), endpoint.to_string()); + } + + /// Store or replace the stored logs for a container identifier. + /// + /// This updates the internal logs map so subsequent reads for the same `id` will + /// return `logs`. + /// + /// # Examples + /// + /// ``` + /// let rec = RecordingSecureBridge::default(); + /// rec.set_logs("container-1", "line1\nline2"); + /// assert_eq!( + /// rec.inner.logs_map.lock().unwrap().get("container-1").map(String::as_str), + /// Some("line1\nline2") + /// ); + /// ``` + fn set_logs(&self, id: &str, logs: &str) { + self.inner + .logs_map + .lock() + .unwrap() + .insert(id.to_string(), logs.to_string()); + } + + /// Store the given container list under the specified challenge identifier in the bridge's internal mapping. + /// + /// This replaces any existing entry for the challenge with `containers`. + /// + /// # Examples + /// + /// ``` + /// // Assuming `bridge` implements the same API as the recording bridge used in tests: + /// // let bridge = RecordingSecureBridge::new(); + /// // bridge.set_list("challenge-123", vec![container_info("c1")]); + /// ``` + fn set_list(&self, challenge: &str, containers: Vec) { + self.inner + .list_map + .lock() + .unwrap() + .insert(challenge.to_string(), containers); + } + + /// Sets the broker cleanup result returned by this recording bridge. + /// + /// This overwrites the bridge's current `cleanup_result` so subsequent cleanup calls + /// will observe `result`. + /// + /// # Parameters + /// + /// - `result`: The `BrokerCleanupResult` to store and return for future cleanup requests. + /// + /// # Examples + /// + /// ``` + /// let bridge = RecordingSecureBridge::new(); + /// bridge.set_cleanup_result(BrokerCleanupResult::default()); + /// ``` + fn set_cleanup_result(&self, result: BrokerCleanupResult) { + *self.inner.cleanup_result.lock().unwrap() = result; + } + + /// Sets the simulated container creation response used by this recording bridge. + /// + /// `id` is the container identifier to return, and `name` is the created container's name. + /// This is intended for tests to control what `create_container` will report. + /// + /// # Examples + /// + /// ``` + /// let bridge = RecordingSecureBridge::default(); + /// bridge.set_create_response("container-123", "challenge-abc"); + /// ``` + fn set_create_response(&self, id: &str, name: &str) { + *self.inner.create_response.lock().unwrap() = (id.to_string(), name.to_string()); + } + } + + #[async_trait] + impl SecureContainerBridge for RecordingSecureBridge { + /// Creates a container for the given configuration, recording the creation request and returning + /// the preconfigured `(container_id, container_name)` or a `ContainerError`. + /// + /// The implementation records a `"create:{challenge_id}"` entry in the bridge's operation log + /// and returns whatever value has been set on `create_response`. + /// + /// # Examples + /// + /// ``` + /// // Setup a RecordingSecureBridge with a preset response + /// let bridge = RecordingSecureBridge::default(); + /// *bridge.inner.create_response.lock().unwrap() = ("cid".to_string(), "name".to_string()); + /// + /// let cfg = ContainerConfig { challenge_id: "chal1".to_string(), ..Default::default() }; + /// let res = bridge.create_container(cfg).await.unwrap(); + /// assert_eq!(res, ("cid".to_string(), "name".to_string())); + /// assert_eq!(bridge.inner.operations.lock().unwrap().last().unwrap(), "create:chal1"); + /// ``` + async fn create_container( + &self, + config: ContainerConfig, + ) -> Result<(String, String), ContainerError> { + self.inner + .operations + .lock() + .unwrap() + .push(format!("create:{}", config.challenge_id)); + Ok(self.inner.create_response.lock().unwrap().clone()) + } + + /// Records a start operation for the given container ID and returns a `ContainerStartResult` with no exposed ports or endpoint. + /// + /// # Examples + /// + /// ``` + /// // Given a `bridge` that exposes `start_container`, calling it records the start and returns an empty result. + /// let res = futures::executor::block_on(bridge.start_container("container-1")).unwrap(); + /// assert_eq!(res.container_id, "container-1"); + /// assert!(res.ports.is_empty()); + /// assert!(res.endpoint.is_none()); + /// ``` + async fn start_container( + &self, + container_id: &str, + ) -> Result { + self.inner + .operations + .lock() + .unwrap() + .push(format!("start:{container_id}")); + Ok(ContainerStartResult { + container_id: container_id.to_string(), + ports: HashMap::new(), + endpoint: None, + }) + } + + /// Retrieves the network endpoint for a container's exposed port. + /// + /// Returns the endpoint string associated with `container_id` and `port` when available, + /// otherwise returns `ContainerError::ContainerNotFound`. + /// + /// # Examples + /// + /// ``` + /// # use std::sync::Arc; + /// # async fn doc_example() -> Result<(), Box> { + /// // `bridge` would be an implementation providing `get_endpoint`. + /// // let endpoint = bridge.get_endpoint("container-123", 8080).await?; + /// // assert_eq!(endpoint, "127.0.0.1:32768"); + /// # Ok(()) } + /// ``` + async fn get_endpoint( + &self, + container_id: &str, + port: u16, + ) -> Result { + self.inner + .operations + .lock() + .unwrap() + .push(format!("endpoint:{container_id}:{port}")); + self.inner + .endpoint_map + .lock() + .unwrap() + .get(container_id) + .cloned() + .ok_or_else(|| ContainerError::ContainerNotFound(container_id.to_string())) + } + + /// Stops the container identified by `container_id`, using `timeout_secs` as the shutdown timeout. + /// + /// Attempts to stop the container and returns success or a `ContainerError` on failure. + /// + /// # Examples + /// + /// ```no_run + /// # async fn run_example(backend: &B) -> Result<(), crate::backend::ContainerError> { + /// backend.stop_container("container-123", 30).await?; + /// # Ok(()) + /// # } + /// ``` + async fn stop_container( + &self, + container_id: &str, + timeout_secs: u32, + ) -> Result<(), ContainerError> { + self.inner + .operations + .lock() + .unwrap() + .push(format!("stop:{container_id}:{timeout_secs}")); + Ok(()) + } + + /// Records a remove operation for the specified container ID with the given `force` flag and returns success. + /// + /// # Examples + /// + /// ``` + /// # use futures::executor::block_on; + /// # async fn run_example() { + /// // `bridge` is an instance providing `remove_container`. + /// // block_on is used here to run the async call in a synchronous example. + /// // Replace `bridge` with the actual instance in real code. + /// // block_on(async { bridge.remove_container("container-id", true).await.unwrap(); }); + /// # } + /// ``` + async fn remove_container( + &self, + container_id: &str, + force: bool, + ) -> Result<(), ContainerError> { + self.inner + .operations + .lock() + .unwrap() + .push(format!("remove:{container_id}:{force}")); + Ok(()) + } + + /// Retrieves cached inspection information for a container by its ID. + /// + /// Looks up the container in the bridge's internal inspection map and returns the associated + /// `ContainerInfo`. + /// + /// # Errors + /// + /// Returns `ContainerError::ContainerNotFound(container_id)` if no inspection entry exists for + /// the provided `container_id`. + /// + /// # Examples + /// + /// ``` + /// # async fn example>(bridge: &B) { + /// let res = bridge.inspect("example-container-id").await; + /// match res { + /// Ok(info) => println!("found container with id: {}", info.id), + /// Err(crate::backend::ContainerError::ContainerNotFound(id)) => println!("not found: {}", id), + /// Err(_) => panic!("unexpected error"), + /// } + /// # } + /// ``` + async fn inspect(&self, container_id: &str) -> Result { + self.inner + .operations + .lock() + .unwrap() + .push(format!("inspect:{container_id}")); + self.inner + .inspect_map + .lock() + .unwrap() + .get(container_id) + .cloned() + .ok_or_else(|| ContainerError::ContainerNotFound(container_id.to_string())) + } + + /// Records a request to pull the specified container image. + /// + /// This implementation appends a `pull:` entry to the recorder's operations + /// and reports success. + /// + /// # Examples + /// + /// ```no_run + /// # async fn example() {} + /// // Assuming `backend` implements `pull_image(&str)`: + /// // backend.pull_image("alpine:latest").await.unwrap(); + /// ``` + async fn pull_image(&self, image: &str) -> Result<(), ContainerError> { + self.inner + .operations + .lock() + .unwrap() + .push(format!("pull:{image}")); + Ok(()) + } + + /// Retrieve the recorded logs for a given container identifier. + /// + /// The `tail` parameter is the requested number of trailing log lines and is recorded for inspection by tests. + /// + /// # Returns + /// + /// `Ok(String)` with the container's logs, or `Err(ContainerError::ContainerNotFound(_))` if no logs are recorded for `container_id`. + /// + /// # Examples + /// + /// ``` + /// # // Example usage (bridge must implement an async `logs` method with this signature) + /// # use futures::executor::block_on; + /// # async fn _example(bridge: &impl std::ops::Deref) {} + /// // let output = block_on(bridge.logs("container-id", 100)); + /// ``` + async fn logs(&self, container_id: &str, tail: usize) -> Result { + self.inner + .operations + .lock() + .unwrap() + .push(format!("logs:{container_id}:{tail}")); + self.inner + .logs_map + .lock() + .unwrap() + .get(container_id) + .cloned() + .ok_or_else(|| ContainerError::ContainerNotFound(container_id.to_string())) + } + + /// Performs cleanup for the specified challenge and returns the preconfigured broker cleanup result. + /// + /// This implementation records the cleanup operation (appending `cleanup:{challenge_id}` to the + /// bridge's internal operation log) and returns a clone of the bridge's configured + /// `BrokerCleanupResult`. + /// + /// # Returns + /// + /// `Ok(BrokerCleanupResult)` containing the configured cleanup result. + async fn cleanup_challenge( + &self, + challenge_id: &str, + ) -> Result { + self.inner + .operations + .lock() + .unwrap() + .push(format!("cleanup:{challenge_id}")); + Ok(self.inner.cleanup_result.lock().unwrap().clone()) + } + + /// Retrieves all containers associated with a challenge. + /// + /// Returns the list of container inspection records for the challenge identified by `challenge_id`. + /// + /// # Examples + /// + /// ``` + /// // assuming `bridge` implements the same trait and is available in scope: + /// # async fn run_example(bridge: &B) where B: std::marker::Send { + /// let containers = bridge.list_by_challenge("challenge-123").await.unwrap(); + /// assert!(containers.is_empty() || containers.iter().all(|c| !c.id.is_empty())); + /// # } + /// ``` + async fn list_by_challenge( + &self, + challenge_id: &str, + ) -> Result, ContainerError> { + self.inner + .operations + .lock() + .unwrap() + .push(format!("list:{challenge_id}")); + Ok(self + .inner + .list_map + .lock() + .unwrap() + .get(challenge_id) + .cloned() + .unwrap_or_default()) + } + } + + #[derive(Clone, Default)] + struct RecordingChallengeDocker { + inner: Arc, + } + + #[derive(Default)] + struct RecordingChallengeDockerInner { + operations: Mutex>, + running: Mutex>, + logs: Mutex>, + list: Mutex>, + next_id: Mutex, + } + + impl RecordingChallengeDocker { + /// Retrieve a snapshot of recorded operation names. + /// + /// The returned vector contains the operations in the order they were recorded. + /// + /// # Returns + /// + /// A `Vec` with the recorded operation names in chronological order. + /// + /// # Examples + /// + /// ``` + /// // assuming `recorder` is an instance with prior recorded operations + /// let ops = recorder.operations(); + /// assert!(ops.iter().all(|s| !s.is_empty())); + /// ``` + fn operations(&self) -> Vec { + self.inner.operations.lock().unwrap().clone() + } + + /// Set the running state for a container identifier in the backend's internal registry. + /// + /// Updates the internal mapping so subsequent queries will reflect whether the given + /// container `id` is considered running (`true`) or not (`false`). + /// + /// # Parameters + /// + /// - `id`: The container identifier to update. + /// - `running`: `true` if the container is running, `false` otherwise. + /// + /// # Examples + /// + /// ```rust + /// // Assume `backend` is an instance providing this method. + /// // This example is illustrative and marked `ignore` to avoid doctest compilation. + /// # #[allow(unused)] + /// # fn example(backend: &impl std::ops::Deref) {} + /// // Mark container "c1" as running: + /// // backend.set_running("c1", true); + /// ``` + fn set_running(&self, id: &str, running: bool) { + self.inner + .running + .lock() + .unwrap() + .insert(id.to_string(), running); + } + + /// Store or replace the logs string associated with a given identifier. + /// + /// This acquires a lock on the internal `logs` map and inserts the provided + /// `logs` value under `id`, replacing any existing entry for that `id`. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// use std::sync::Mutex; + /// + /// struct Inner { + /// logs: Mutex>, + /// } + /// + /// struct Recorder { + /// inner: Inner, + /// } + /// + /// impl Recorder { + /// fn set_logs(&self, id: &str, logs: &str) { + /// self.inner + /// .logs + /// .lock() + /// .unwrap() + /// .insert(id.to_string(), logs.to_string()); + /// } + /// } + /// + /// let recorder = Recorder { + /// inner: Inner { + /// logs: Mutex::new(HashMap::new()), + /// }, + /// }; + /// + /// recorder.set_logs("container-1", "started\nready"); + /// let map = recorder.inner.logs.lock().unwrap(); + /// assert_eq!(map.get("container-1").map(|s| s.as_str()), Some("started\nready")); + /// ``` + fn set_logs(&self, id: &str, logs: &str) { + self.inner + .logs + .lock() + .unwrap() + .insert(id.to_string(), logs.to_string()); + } + + /// Replaces the stored list with the provided `items`. + /// + /// Acquires the inner list mutex and sets its contents to `items`. + /// Panics if the mutex is poisoned. + /// + /// # Examples + /// + /// ```no_run + /// // Assuming `obj` has the `set_list` method shown: + /// // obj.set_list(vec!["one".into(), "two".into()]); + /// ``` + fn set_list(&self, items: Vec) { + *self.inner.list.lock().unwrap() = items; + } + + /// Generate the next challenge instance with a unique container name. + /// + /// The returned instance is initialized for the same challenge and image as `config`, + /// and has a container name of the form `container-` where `` is an incrementing + /// internal counter. The instance's status is set to `Running`. + /// + /// # Examples + /// + /// ``` + /// // Shows the container name format produced by this method. + /// let name = format!("container-{}", 42); + /// assert_eq!(name, "container-42"); + /// ``` + fn next_instance(&self, config: &ChallengeContainerConfig) -> ChallengeInstance { + let mut guard = self.inner.next_id.lock().unwrap(); + let value = *guard; + *guard += 1; + let suffix = value.to_string(); + sample_instance( + config.challenge_id, + &format!("container-{}", suffix), + &config.docker_image, + ContainerStatus::Running, + ) + } + } + + /// Constructs a ChallengeInstance from the provided identifiers, image, and status. + /// + /// The returned instance has its `container_id`, `image`, and `status` set from the + /// corresponding arguments, `endpoint` set to `http://{container_id}`, and `started_at` + /// set to the current UTC time. + /// + /// # Examples + /// + /// ``` + /// # use chrono::Utc; + /// # use crate::types::{ChallengeId, ContainerStatus, ChallengeInstance}; + /// let cid = ChallengeId::new("challenge-1"); + /// let inst = sample_instance(cid, "container-123", "example/image:latest", ContainerStatus::Running); + /// assert_eq!(inst.container_id, "container-123"); + /// assert_eq!(inst.image, "example/image:latest"); + /// assert_eq!(inst.endpoint, "http://container-123"); + /// assert_eq!(inst.status, ContainerStatus::Running); + /// ``` + fn sample_instance( + challenge_id: ChallengeId, + container_id: &str, + image: &str, + status: ContainerStatus, + ) -> ChallengeInstance { + ChallengeInstance { + challenge_id, + container_id: container_id.to_string(), + image: image.to_string(), + endpoint: format!("http://{container_id}"), + started_at: Utc::now(), + status, + } + } + + #[async_trait] + impl ChallengeDocker for RecordingChallengeDocker { + /// Records a requested image pull into the recorder for testing purposes. + /// + /// This method notes that an image pull was requested by appending `pull:{image}` + /// to the internal operations log and returns success. + /// + /// # Examples + /// + /// ``` + /// // In an async context: + /// // let client = /* a recording client instance */; + /// // client.pull_image("nginx:latest").await.unwrap(); + /// ``` + async fn pull_image(&self, image: &str) -> anyhow::Result<()> { + self.inner + .operations + .lock() + .unwrap() + .push(format!("pull:{image}")); + Ok(()) + } + + /// Record a start operation for the given challenge and produce the next challenge instance. + /// + /// This test helper appends a `start:` entry to the internal operations log + /// and returns a constructed `ChallengeInstance` based on `config`. + /// + /// # Parameters + /// + /// - `config`: Configuration used to derive the returned `ChallengeInstance`. + /// + /// # Returns + /// + /// `Ok(ChallengeInstance)` containing the next prepared instance for the provided `config`. + /// + /// # Examples + /// + /// ``` + /// # use futures::executor::block_on; + /// # // `bridge` and `cfg` would be prepared test values in real tests. + /// # let bridge = RecordingSecureBridge::default(); + /// # let cfg = ChallengeContainerConfig { challenge_id: "example".into(), ..Default::default() }; + /// let instance = block_on(bridge.start_challenge(&cfg)).unwrap(); + /// assert_eq!(instance.challenge_id, "example"); + /// ``` + async fn start_challenge( + &self, + config: &ChallengeContainerConfig, + ) -> anyhow::Result { + self.inner + .operations + .lock() + .unwrap() + .push(format!("start:{}", config.challenge_id)); + Ok(self.next_instance(config)) + } + + /// Stops a container identified by `container_id`. + /// + /// This implementation records the stop request (for example, into an internal + /// operations log) and returns success when the request has been recorded. + /// + /// # Examples + /// + /// ``` + /// // `backend` is any value with an async `stop_container(&self, &str) -> anyhow::Result<()>` method. + /// let backend = /* construct backend */ ; + /// let rt = tokio::runtime::Runtime::new().unwrap(); + /// rt.block_on(async { + /// backend.stop_container("container123").await.unwrap(); + /// }); + /// ``` + async fn stop_container(&self, container_id: &str) -> anyhow::Result<()> { + self.inner + .operations + .lock() + .unwrap() + .push(format!("stop:{container_id}")); + Ok(()) + } + + /// Records the removal of the container identified by `container_id` and succeeds. + /// + /// This implementation appends the string `"remove:{container_id}"` to the bridge's recorded operations and returns `Ok(())`. + /// + /// # Examples + /// + /// ``` + /// # tokio::test + /// async fn example_record_remove() { + /// let bridge = RecordingSecureBridge::default(); + /// bridge.remove_container("abc123").await.unwrap(); + /// let ops = bridge.inner.operations.lock().unwrap(); + /// assert_eq!(ops.last().map(|s| s.as_str()), Some("remove:abc123")); + /// } + /// ``` + async fn remove_container(&self, container_id: &str) -> anyhow::Result<()> { + self.inner + .operations + .lock() + .unwrap() + .push(format!("remove:{container_id}")); + Ok(()) + } + + /// Checks whether the container with the specified ID is currently running. + /// + /// # Examples + /// + /// ``` + /// // Call from an async context on a backend implementing the method: + /// // let running = backend.is_container_running("container-id").await.unwrap(); + /// // assert!(running == true || running == false); + /// ``` + pub(crate) async fn is_container_running(&self, container_id: &str) -> anyhow::Result; + async fn is_container_running(&self, container_id: &str) -> anyhow::Result { + self.inner + .operations + .lock() + .unwrap() + .push(format!("is_running:{container_id}")); + Ok(*self + .inner + .running + .lock() + .unwrap() + .get(container_id) + .unwrap_or(&false)) + } + + /// Retrieve recorded logs for a container and record the logs request in the operation history. + /// + /// The method appends a "logs:{container_id}:{tail}" entry to the bridge's operation log and returns + /// the stored log string for `container_id`. If no logs are recorded for the given container, an + /// empty string is returned. + /// + /// # Parameters + /// + /// - `container_id`: Identifier of the container whose logs are requested. + /// - `tail`: Number of trailing lines requested (recorded for auditing; does not alter returned value + /// in the test recording bridge). + /// + /// # Returns + /// + /// `String` containing the recorded logs for the container, or an empty string if none exist. + /// + /// # Examples + /// + /// ``` + /// # // Example placeholder: in real usage, `bridge` is an instance that provides `get_logs`. + /// # async fn example_usage() { + /// # // let logs = bridge.get_logs("container-1", 100).await.unwrap(); + /// # } + /// ``` + async fn get_logs(&self, container_id: &str, tail: usize) -> anyhow::Result { + self.inner + .operations + .lock() + .unwrap() + .push(format!("logs:{container_id}:{tail}")); + Ok(self + .inner + .logs + .lock() + .unwrap() + .get(container_id) + .cloned() + .unwrap_or_default()) + } + + /// Return the list of container IDs tracked by this backend. + /// + /// # Examples + /// + /// ```no_run + /// # async fn example(backend: &impl crate::ChallengeDocker) { + /// let containers: Vec = backend.list_challenge_containers().await.unwrap(); + /// # } + /// ``` + async fn list_challenge_containers(&self) -> anyhow::Result> { + self.inner + .operations + .lock() + .unwrap() + .push("list_containers".to_string()); + Ok(self.inner.list.lock().unwrap().clone()) + } + + /// Record a cleanup operation request for stale containers identified by a name prefix. + /// + /// This method logs a cleanup intent for containers whose names start with `prefix`. + /// + /// # Parameters + /// + /// - `prefix`: Container name prefix used to select which containers to consider for cleanup. + /// - `max_age_minutes`: Maximum age in minutes to consider a container "stale" (unused by this implementation). + /// - `exclude_patterns`: List of name patterns to exclude from cleanup (unused by this implementation). + /// + /// # Returns + /// + /// `DockerCleanupResult` summarizing the outcome; currently always the default result (no removals). + async fn cleanup_stale_containers( + &self, + prefix: &str, + _max_age_minutes: u64, + _exclude_patterns: &[&str], + ) -> anyhow::Result { + self.inner + .operations + .lock() + .unwrap() + .push(format!("cleanup:{prefix}")); + Ok(DockerCleanupResult::default()) + } + } +} \ No newline at end of file diff --git a/crates/challenge-orchestrator/src/docker.rs b/crates/challenge-orchestrator/src/docker.rs index 2c71ffb98..9d8010eea 100644 --- a/crates/challenge-orchestrator/src/docker.rs +++ b/crates/challenge-orchestrator/src/docker.rs @@ -4,59 +4,763 @@ //! are allowed to be pulled or run. This prevents malicious container attacks. use crate::{ChallengeContainerConfig, ChallengeInstance, ContainerStatus}; +use async_trait::async_trait; use bollard::container::{ - Config, CreateContainerOptions, ListContainersOptions, RemoveContainerOptions, - StartContainerOptions, StopContainerOptions, + Config, CreateContainerOptions, InspectContainerOptions, ListContainersOptions, LogsOptions, + RemoveContainerOptions, StartContainerOptions, StopContainerOptions, }; +use bollard::errors::Error as DockerError; use bollard::image::CreateImageOptions; -use bollard::models::{DeviceRequest, HostConfig, PortBinding}; +use bollard::models::{ + ContainerCreateResponse, ContainerInspectResponse, ContainerSummary, CreateImageInfo, + DeviceRequest, HostConfig, Network, PortBinding, +}; +use bollard::network::{ConnectNetworkOptions, CreateNetworkOptions, ListNetworksOptions}; +use bollard::volume::CreateVolumeOptions; use bollard::Docker; -use futures::StreamExt; +use futures::{Stream, StreamExt}; use platform_core::ALLOWED_DOCKER_PREFIXES; use std::collections::HashMap; +use std::pin::Pin; +use std::sync::Arc; use tracing::{debug, error, info, warn}; +type ImageStream = Pin> + Send>>; +type LogStream = + Pin> + Send>>; + +#[async_trait] +pub trait DockerBridge: Send + Sync { + async fn ping(&self) -> Result<(), DockerError>; + async fn list_networks( + &self, + options: Option>, + ) -> Result, DockerError>; + async fn create_network( + &self, + options: CreateNetworkOptions, + ) -> Result<(), DockerError>; + async fn inspect_container( + &self, + id: &str, + options: Option, + ) -> Result; + async fn connect_network( + &self, + network: &str, + options: ConnectNetworkOptions, + ) -> Result<(), DockerError>; + fn create_image_stream(&self, options: Option>) -> ImageStream; + async fn create_volume(&self, options: CreateVolumeOptions) -> Result<(), DockerError>; + async fn create_container( + &self, + options: Option>, + config: Config, + ) -> Result; + async fn start_container( + &self, + id: &str, + options: Option>, + ) -> Result<(), DockerError>; + async fn stop_container( + &self, + id: &str, + options: Option, + ) -> Result<(), DockerError>; + async fn remove_container( + &self, + id: &str, + options: Option, + ) -> Result<(), DockerError>; + async fn list_containers( + &self, + options: Option>, + ) -> Result, DockerError>; + fn logs_stream(&self, id: &str, options: LogsOptions) -> LogStream; +} + +#[derive(Clone)] +struct BollardBridge { + docker: Docker, +} + +impl BollardBridge { + /// Creates a new BollardBridge that wraps the provided Bollard Docker client. + /// + /// # Examples + /// + /// ```no_run + /// use bollard::Docker; + /// let docker = Docker::connect_with_unix_defaults().unwrap(); + /// let bridge = BollardBridge::new(docker); + /// ``` + fn new(docker: Docker) -> Self { + Self { docker } + } +} + +#[async_trait] +impl DockerBridge for BollardBridge { + /// Verifies that the Docker daemon is reachable. + /// + /// Performs a lightweight ping to the Docker daemon to confirm connectivity. + /// + /// # Returns + /// + /// `Ok(())` if the daemon responds, `Err(DockerError)` if the ping fails. + /// + /// # Examples + /// + /// ```no_run + /// # async fn run(client: &impl crate::DockerBridge) -> Result<(), crate::DockerError> { + /// client.ping().await?; + /// # Ok(()) + /// # } + /// ``` + async fn ping(&self) -> Result<(), DockerError> { + self.docker.ping().await.map(|_| ()) + } + + /// Lists Docker networks visible to the daemon, optionally filtered by the provided options. + /// + /// The returned list contains networks that match the provided `ListNetworksOptions` + /// (if `None`, all networks are returned). + /// + /// # Parameters + /// + /// - `options`: Optional filters and query options to narrow the returned networks. + /// + /// # Returns + /// + /// A `Vec` containing matching network descriptions. + /// + /// # Examples + /// + /// ```no_run + /// use bollard::models::Network; + /// // Obtain a bridge implementing the same API (e.g., BollardBridge) before calling. + /// let rt = tokio::runtime::Runtime::new().unwrap(); + /// rt.block_on(async { + /// // let bridge = ...; // obtain Arc wrapper + /// // let networks: Vec = bridge.list_networks(None).await.unwrap(); + /// }); + /// ``` + async fn list_networks( + &self, + options: Option>, + ) -> Result, DockerError> { + self.docker.list_networks(options).await + } + + /// Create a Docker network using the provided creation options. + /// + /// The method requests the Docker daemon to create a network described by `options`. + /// + /// # Errors + /// + /// Returns a `DockerError` if the Docker API reports a failure creating the network. + /// + /// # Examples + /// + /// ``` + /// # async fn example(bridge: &impl DockerBridge) { + /// bridge.create_network(bollard::network::CreateNetworkOptions{ + /// name: "platform-network".to_string(), + /// ..Default::default() + /// }).await.unwrap(); + /// # } + /// ``` + async fn create_network( + &self, + options: CreateNetworkOptions, + ) -> Result<(), DockerError> { + self.docker.create_network(options).await.map(|_| ()) + } + + /// Retrieves inspection information for the container identified by `id`. + /// + /// Returns a `ContainerInspectResponse` containing detailed metadata about the container. + /// + /// # Examples + /// + /// ``` + /// # tokio_test::block_on(async { + /// // `client` is an implementation that provides `inspect_container`. + /// // Here we show the call pattern; adjust `client` to your context. + /// let resp = client.inspect_container("container_id", None).await; + /// if let Ok(info) = resp { + /// // use `info` (ContainerInspectResponse) + /// println!("{:?}", info.id); + /// } + /// # }); + /// ``` + async fn inspect_container( + &self, + id: &str, + options: Option, + ) -> Result { + self.docker.inspect_container(id, options).await + } + + /// Connects a container to the specified Docker network. + /// + /// `network` is the name or ID of the target network; `options` specifies the container and endpoint configuration used when connecting. + /// + /// # Examples + /// + /// ```no_run + /// // `bridge` implements `DockerBridge`. + /// # async fn example(bridge: &impl DockerBridge) { + /// use bollard::network::ConnectNetworkOptions; + /// let opts = ConnectNetworkOptions:: { container: "container_id".to_string(), endpoint_config: None }; + /// bridge.connect_network("platform-network", opts).await.unwrap(); + /// # } + /// ``` + /// + /// Returns `Ok(())` on success, or an `Err(DockerError)` if the connect operation fails. + async fn connect_network( + &self, + network: &str, + options: ConnectNetworkOptions, + ) -> Result<(), DockerError> { + self.docker.connect_network(network, options).await + } + + /// Creates a stream of Docker image-pull progress events for the given pull options. + /// + /// The returned stream yields `CreateImageInfo` items produced by the Docker daemon while + /// pulling an image. Pass `None` to use default pull behavior. + /// + /// # Examples + /// + /// ``` + /// // Obtain a bridge implementing `DockerBridge`, then: + /// let stream = bridge.create_image_stream(None); + /// // Consume the stream asynchronously to observe pull progress. + /// ``` + fn create_image_stream(&self, options: Option>) -> ImageStream { + Box::pin(self.docker.create_image(options, None, None)) + } + + /// Create a Docker volume using the provided options. + /// + /// The function requests Docker to create a volume described by `options` and returns when the + /// request completes. Use `CreateVolumeOptions` to specify the volume name, driver, labels, and + /// other creation parameters. + /// + /// # Examples + /// + /// ```no_run + /// use bollard::volume::CreateVolumeOptions; + /// # async fn example() -> Result<(), Box> { + /// // `bridge` must implement the same interface as the surrounding context. + /// // let bridge = ...; + /// let opts = CreateVolumeOptions { + /// name: "my-volume".to_string(), + /// ..Default::default() + /// }; + /// // bridge.create_volume(opts).await?; + /// # Ok(()) + /// # } + /// ``` + /// + /// # Returns + /// + /// `Ok(())` if the volume was created successfully, `Err(DockerError)` on failure. + async fn create_volume(&self, options: CreateVolumeOptions) -> Result<(), DockerError> { + self.docker.create_volume(options).await.map(|_| ()) + } + + /// Creates a container using the provided Docker create options and container configuration. + /// + /// Uses the underlying Docker bridge to invoke the create container API and returns the + /// created container metadata on success. + /// + /// # Examples + /// + /// ``` + /// # use bollard::container::{CreateContainerOptions, Config, ContainerCreateResponse}; + /// # use std::collections::HashMap; + /// # // `client` implements a method `create_container` matching this signature. + /// # async fn _example(client: &impl std::ops::Deref) {} + /// let options = CreateContainerOptions:: { name: "example-container".to_string() }; + /// let config = Config { + /// image: Some("alpine:latest".to_string()), + /// env: Some(vec!["FOO=bar".to_string()]), + /// ..Default::default() + /// }; + /// // let resp: ContainerCreateResponse = client.create_container(Some(options), config).await?; + /// ``` + /// + /// # Returns + /// + /// `ContainerCreateResponse` containing the created container's id and warnings on success, or + /// a `DockerError` if the create operation fails. + async fn create_container( + &self, + options: Option>, + config: Config, + ) -> Result { + self.docker.create_container(options, config).await + } + + /// Starts the container identified by `id` with the given start options. + /// + /// # Examples + /// + /// ```no_run + /// # use bollard::container::StartContainerOptions; + /// # async fn example(client: &crate::DockerClient) -> anyhow::Result<()> { + /// client.start_container("my-container", None).await?; + /// # Ok(()) + /// # } + /// ``` + /// + /// # Returns + /// + /// `Ok(())` if the container was started successfully, `Err(DockerError)` if the Docker API call failed. + async fn start_container( + &self, + id: &str, + options: Option>, + ) -> Result<(), DockerError> { + self.docker.start_container(id, options).await + } + + /// Stops the container identified by `id`. + /// + /// Attempts to stop the container via the underlying Docker bridge. + /// + /// # Examples + /// + /// ```no_run + /// # async fn example(client: &impl crate::docker::DockerBridge) { + /// // Stop a container, ignoring errors for brevity in this example. + /// let _ = client.stop_container("container_id", None).await; + /// # } + /// ``` + /// + /// # Returns + /// + /// `Ok(())` on success, `Err(DockerError)` if the stop operation fails. + async fn stop_container( + &self, + id: &str, + options: Option, + ) -> Result<(), DockerError> { + self.docker.stop_container(id, options).await + } + + /// Remove a container by ID from the Docker daemon. + /// + /// `id` is the container identifier or name. `options` may provide removal flags + /// (for example, force removal or removing volumes); pass `None` to use defaults. + /// + /// # Returns + /// + /// `Ok(())` if the container was removed or already absent according to the + /// underlying Docker behavior, `Err(DockerError)` on failure. + /// + /// # Examples + /// + /// ```rust + /// // Execute an async removal in a synchronous context: + /// let client = /* obtain a DockerClient or bridge-backed client */ ; + /// futures::executor::block_on(async { + /// client.remove_container("my-container", None).await.unwrap(); + /// }); + /// ``` + async fn remove_container( + &self, + id: &str, + options: Option, + ) -> Result<(), DockerError> { + self.docker.remove_container(id, options).await + } + + /// Lists containers from the Docker daemon according to the given listing options. + /// + /// # Parameters + /// + /// - `options`: Optional `ListContainersOptions` to filter or modify the listing behavior (e.g., show all containers, apply filters). If `None`, default listing behavior is used by the underlying bridge. + /// + /// # Returns + /// + /// A `Vec` containing summaries of the containers that match the provided options on success, or a `DockerError` on failure. + /// + /// # Examples + /// + /// ``` + /// # use bollard::container::ListContainersOptions; + /// # async fn example(client: &crate::DockerClient) -> Result<(), anyhow::Error> { + /// let containers = client.list_containers(None).await?; + /// assert!(containers.iter().all(|c| c.id.is_some())); + /// # Ok(()) + /// # } + /// ``` + async fn list_containers( + &self, + options: Option>, + ) -> Result, DockerError> { + self.docker.list_containers(options).await + } + + /// Streams the log output for the specified container. + /// + /// The returned stream yields `LogOutput` items produced by the container's stdout and stderr until the log source completes. + /// + /// # Examples + /// + /// ```no_run + /// use bollard::container::LogsOptions; + /// + /// let opts = LogsOptions::::new().stdout(true).stderr(true); + /// let mut stream = docker_client.logs_stream("container_id", opts); + /// // consume the stream to receive `LogOutput` frames + /// ``` + fn logs_stream(&self, id: &str, options: LogsOptions) -> LogStream { + Box::pin(self.docker.logs(id, Some(options))) + } +} + /// Docker client for managing challenge containers pub struct DockerClient { - docker: Docker, + docker: Arc, network_name: String, } +#[async_trait] +pub trait ChallengeDocker: Send + Sync { + async fn pull_image(&self, image: &str) -> anyhow::Result<()>; + async fn start_challenge( + &self, + config: &ChallengeContainerConfig, + ) -> anyhow::Result; + async fn stop_container(&self, container_id: &str) -> anyhow::Result<()>; + async fn remove_container(&self, container_id: &str) -> anyhow::Result<()>; + async fn is_container_running(&self, container_id: &str) -> anyhow::Result; + async fn get_logs(&self, container_id: &str, tail: usize) -> anyhow::Result; + async fn list_challenge_containers(&self) -> anyhow::Result>; + async fn cleanup_stale_containers( + &self, + prefix: &str, + max_age_minutes: u64, + exclude_patterns: &[&str], + ) -> anyhow::Result; +} + +#[async_trait] +impl ChallengeDocker for DockerClient { + /// Ensures the specified Docker image is available locally by pulling it from its registry. + + /// + + /// # Arguments + + /// + + /// * `image` - The image reference to pull (for example `"registry/repo:tag"` or `"alpine:latest"`). + + /// + + /// # Returns + + /// + + /// `Ok(())` on success; `Err` if the image is disallowed by the whitelist or if the pull fails. + + /// + + /// # Examples + + /// + + /// ``` + + /// # async fn doc_example() -> anyhow::Result<()> { + + /// // `client` implements ChallengeDocker (e.g., DockerClient) + + /// // client.pull_image("alpine:latest").await?; + + /// # Ok(()) + + /// # } + + /// ``` + async fn pull_image(&self, image: &str) -> anyhow::Result<()> { + DockerClient::pull_image(self, image).await + } + + /// Starts a challenge container according to the provided configuration and returns its runtime instance. + /// + /// Validates the configuration and image policy, ensures the platform network and required volumes exist, + /// creates and starts the container, and returns an instance describing the created container and its endpoint. + /// + /// # Parameters + /// + /// - `config` — Configuration that describes the challenge container to create (image, resources, mounts, env, etc.). + /// + /// # Returns + /// + /// `ChallengeInstance` containing the created container's id, endpoint, start timestamp, and initial status. + /// + /// # Examples + /// + /// ```no_run + /// # use anyhow::Result; + /// # async fn example() -> Result<()> { + /// let client = /* obtain a DockerClient implementing ChallengeDocker */ unimplemented!(); + /// let config = /* build a ChallengeContainerConfig */ unimplemented!(); + /// let instance = client.start_challenge(&config).await?; + /// println!("started container: {}", instance.container_id); + /// # Ok(()) } + /// ``` + async fn start_challenge( + &self, + config: &ChallengeContainerConfig, + ) -> anyhow::Result { + DockerClient::start_challenge(self, config).await + } + + /// Stops the specified container, waiting up to 30 seconds for it to stop. + /// + /// 304 (container already stopped) and 404 (container not found) are treated as no-ops. + /// + /// # Examples + /// + /// ``` + /// # async fn example(client: &crate::DockerClient) -> anyhow::Result<()> { + /// client.stop_container("container-id").await?; + /// # Ok(()) + /// # } + /// ``` + /// + /// # Returns + /// + /// `Ok(())` if the container was stopped or was already absent/stopped, `Err(_)` on failure. + async fn stop_container(&self, container_id: &str) -> anyhow::Result<()> { + DockerClient::stop_container(self, container_id).await + } + + /// Remove the specified container, treating a non-existent container as a no-op. + /// + /// On success this function ensures the container is removed; if the container + /// does not exist it returns Ok without error. Other failures return an error. + /// + /// # Examples + /// + /// ```ignore + /// // Assuming `client` is a DockerClient or an implementation exposing this method. + /// client.remove_container("my-container-id").await.unwrap(); + /// ``` + async fn remove_container(&self, container_id: &str) -> anyhow::Result<()> { + DockerClient::remove_container(self, container_id).await + } + + /// Check whether a container is currently running. + /// + /// Returns `true` if the container exists and its Docker state is running, `false` if the + /// container does not exist or is not running. Other errors from the Docker bridge are + /// propagated as an `Err`. + /// + /// # Examples + /// + /// ```no_run + /// # use anyhow::Result; + /// # async fn example(client: &crate::DockerClient) -> Result<()> { + /// let is_running = client.is_container_running("my-container-id").await?; + /// println!("running: {}", is_running); + /// # Ok(()) + /// # } + /// ``` + async fn is_container_running(&self, container_id: &str) -> anyhow::Result { + DockerClient::is_container_running(self, container_id).await + } + + /// Fetches and combined stdout and stderr logs for a container and returns them as a single string. + /// + /// # Returns + /// + /// `String` containing the concatenated logs from stdout and stderr; an empty string if there are no logs. + /// + /// # Examples + /// + /// ```no_run + /// # use anyhow::Result; + /// # async fn doc_example() -> Result<()> { + /// // `client` can be a DockerClient or any type implementing `ChallengeDocker`. + /// let client = /* obtain client */ unimplemented!(); + /// let logs = client.get_logs("container-id", 100).await?; + /// println!("{}", logs); + /// # Ok(()) } + /// ``` + async fn get_logs(&self, container_id: &str, tail: usize) -> anyhow::Result { + DockerClient::get_logs(self, container_id, tail).await + } + + /// Lists challenge containers visible to the client's configured Docker network. + /// + /// The returned list contains container IDs for containers whose names start with the + /// "challenge-" prefix and that are attached to the client's configured platform network. + /// + /// # Examples + /// + /// ```no_run + /// # async fn example(client: &impl crate::ChallengeDocker) -> anyhow::Result<()> { + /// let ids = client.list_challenge_containers().await?; + /// assert!(ids.iter().all(|id| !id.is_empty())); + /// # Ok(()) } + /// ``` + async fn list_challenge_containers(&self) -> anyhow::Result> { + DockerClient::list_challenge_containers(self).await + } + + /// Remove challenge containers whose names start with `prefix`, subject to age and exclude filters, and return a summary of the cleanup. + /// + /// This lists all containers, filters those whose names begin with `prefix`, skips any whose names match any string in `exclude_patterns`, and — if `max_age_minutes` is greater than zero — skips containers younger than that age. Matching containers are removed and any errors encountered are collected in the result; removals of already-missing containers are treated as no-ops. + /// + /// # Parameters + /// + /// - `prefix`: Name prefix used to select candidate containers for removal. + /// - `max_age_minutes`: If greater than zero, only containers older than this many minutes are removed; a value of `0` disables age-based filtering. + /// - `exclude_patterns`: Slice of substrings; any container whose name contains any of these substrings will be excluded from removal. + /// + /// # Returns + /// + /// A `CleanupResult` summarizing `total_found`, `removed`, and any `errors` encountered during removal. + /// + /// # Examples + /// + /// ``` + /// #[tokio::test] + /// async fn cleanup_example() { + /// // `client` should be a connected DockerClient implementing the cleanup method. + /// // let client = DockerClient::connect().await.unwrap(); + /// // Example call (placeholder client in real usage): + /// // let result = client.cleanup_stale_containers("challenge-", 60, &["keep-this"]).await.unwrap(); + /// // assert!(result.total_found >= result.removed); + /// } + /// ``` + async fn cleanup_stale_containers( + &self, + prefix: &str, + max_age_minutes: u64, + exclude_patterns: &[&str], + ) -> anyhow::Result { + DockerClient::cleanup_stale_containers(self, prefix, max_age_minutes, exclude_patterns) + .await + } +} + impl DockerClient { - /// Connect to Docker daemon + /// Constructs a DockerClient backed by the provided DockerBridge and configured to use the given network. + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// + /// // `RecordingBridge` is a test bridge implementation; replace with a real bridge in production. + /// let bridge = Arc::new(crate::RecordingBridge::default()) as Arc; + /// let client = crate::DockerClient::from_bridge(bridge, "platform-network"); + /// assert_eq!(client.network_name, "platform-network"); + /// ``` + fn from_bridge(docker: Arc, network_name: impl Into) -> Self { + Self { + docker, + network_name: network_name.into(), + } + } + + /// Constructs a DockerClient using a custom DockerBridge implementation. + /// + /// # Examples + /// + /// ```no_run + /// // Provide any type implementing `DockerBridge`. + /// let my_bridge = /* implementor of DockerBridge */ ; + /// let client = DockerClient::with_bridge(my_bridge, "platform-network"); + /// ``` + pub fn with_bridge( + docker: impl DockerBridge + 'static, + network_name: impl Into, + ) -> Self { + Self::from_bridge(Arc::new(docker), network_name) + } + + /// Establishes a connection to the local Docker daemon and returns a DockerClient configured + /// to use the "platform-network". + /// + /// Attempts to ping the daemon to verify the connection before constructing the client. + /// + /// # Examples + /// + /// ``` + /// # tokio_test::block_on(async { + /// let client = crate::docker::DockerClient::connect().await.unwrap(); + /// // client is ready to use with the "platform-network" + /// # }); + /// ``` pub async fn connect() -> anyhow::Result { let docker = Docker::connect_with_local_defaults()?; // Verify connection - docker.ping().await?; + let bridge = Arc::new(BollardBridge::new(docker)); + bridge.ping().await?; info!("Connected to Docker daemon"); - Ok(Self { - docker, - network_name: "platform-network".to_string(), - }) + Ok(Self::from_bridge(bridge, "platform-network")) } - /// Connect with custom network name + /// Creates a DockerClient connected to the local Docker daemon and configured to use the given network. + /// + /// Attempts to connect to the local Docker daemon, verifies the daemon is responsive, and returns a client that will use `network_name`. + /// + /// # Errors + /// + /// Returns an error if connecting to the local Docker daemon or pinging it fails. + /// + /// # Examples + /// + /// ```no_run + /// # async fn run() -> anyhow::Result<()> { + /// let client = crate::DockerClient::connect_with_network("platform-network").await?; + /// # Ok(()) + /// # } + /// ``` pub async fn connect_with_network(network_name: &str) -> anyhow::Result { let docker = Docker::connect_with_local_defaults()?; - docker.ping().await?; + let bridge = Arc::new(BollardBridge::new(docker)); + bridge.ping().await?; - Ok(Self { - docker, - network_name: network_name.to_string(), - }) + Ok(Self::from_bridge(bridge, network_name)) } - /// Connect and auto-detect the network from the validator container - /// This ensures challenge containers are on the same network as the validator + /// Create a DockerClient connected to the local Docker daemon and configured to use + /// the validator network when available. + /// + /// Attempts to detect the validator container's network and configures the client + /// to use that network for challenge containers. If detection fails, the client + /// falls back to the "platform-network" network and logs a warning. + /// + /// # Examples + /// + /// ``` + /// # async fn run() -> anyhow::Result<()> { + /// let client = crate::docker::DockerClient::connect_auto_detect().await?; + /// // use `client` to manage challenge containers... + /// # Ok(()) + /// # } + /// ``` pub async fn connect_auto_detect() -> anyhow::Result { let docker = Docker::connect_with_local_defaults()?; - docker.ping().await?; + let bridge = Arc::new(BollardBridge::new(docker)); + bridge.ping().await?; info!("Connected to Docker daemon"); // Try to detect the network from the current container - let network_name = Self::detect_validator_network_static(&docker) + let network_name = Self::detect_validator_network(&*bridge) .await .unwrap_or_else(|e| { warn!( @@ -68,14 +772,28 @@ impl DockerClient { info!(network = %network_name, "Using network for challenge containers"); - Ok(Self { - docker, - network_name, - }) + Ok(Self::from_bridge(bridge, network_name)) } - /// Detect the network the validator container is running on - async fn detect_validator_network_static(docker: &Docker) -> anyhow::Result { + /// Determine which Docker network the current validator container is connected to. + /// + /// Prefers a user-defined bridge network when available; falls back to the `bridge` network if present. + /// + /// # Errors + /// + /// Returns an error if the container has no network settings or if no suitable network can be found. + /// + /// # Examples + /// + /// ```no_run + /// # use anyhow::Result; + /// # async fn example(docker: &dyn challenge_orchestrator::docker::DockerBridge) -> Result<()> { + /// let network = challenge_orchestrator::docker::DockerClient::detect_validator_network(docker).await?; + /// println!("Selected network: {}", network); + /// # Ok(()) + /// # } + /// ``` + async fn detect_validator_network(docker: &dyn DockerBridge) -> anyhow::Result { // Get our container ID let container_id = Self::get_container_id_static()?; @@ -196,9 +914,29 @@ impl DockerClient { format!("{:012x}", hasher.finish()) // 12 hex chars } - /// Ensure the Docker network exists + /// Ensures the client's configured Docker network exists, creating it if missing. + /// + /// Attempts to list networks and creates a bridge network with the client's `network_name` when none matches. + /// + /// # Returns + /// + /// `Ok(())` if the network exists or was created successfully, `Err` if the Docker operations fail. + /// + /// # Examples + /// + /// ``` + /// # async fn run_example() -> anyhow::Result<()> { + /// let client = /* obtain a DockerClient instance */; + /// client.ensure_network().await?; + /// # Ok(()) + /// # } + /// ``` pub async fn ensure_network(&self) -> anyhow::Result<()> { - let networks = self.docker.list_networks::(None).await?; + let networks = self + .docker + .list_networks(None::>) + .await + .map_err(anyhow::Error::from)?; let exists = networks.iter().any(|n| { n.name @@ -216,7 +954,10 @@ impl DockerClient { ..Default::default() }; - self.docker.create_network(config).await?; + self.docker + .create_network(config) + .await + .map_err(anyhow::Error::from)?; info!(network = %self.network_name, "Created Docker network"); } else { debug!(network = %self.network_name, "Docker network already exists"); @@ -225,14 +966,33 @@ impl DockerClient { Ok(()) } - /// Connect the current container to the platform network - /// This allows the validator to communicate with challenge containers via hostname + /// Connects the current process's container to the configured platform network. + /// + /// Determines the container ID for the current process and, if the container is not + /// already attached to the client's configured network, connects it. This operation + /// is idempotent: if the container is already connected the method returns successfully + /// without making changes. + /// + /// # Examples + /// + /// ``` + /// # use std::sync::Arc; + /// # tokio_test::block_on(async { + /// # // `client` would be a real DockerClient in production; shown here as a placeholder. + /// # let client: crate::DockerClient = unimplemented!(); + /// client.connect_self_to_network().await.unwrap(); + /// # }); + /// ``` pub async fn connect_self_to_network(&self) -> anyhow::Result<()> { // Get our container ID from the hostname or cgroup let container_id = self.get_self_container_id()?; // Check if already connected - let inspect = self.docker.inspect_container(&container_id, None).await?; + let inspect = self + .docker + .inspect_container(&container_id, None) + .await + .map_err(anyhow::Error::from)?; let networks = inspect .network_settings .as_ref() @@ -260,7 +1020,8 @@ impl DockerClient { self.docker .connect_network(&self.network_name, config) - .await?; + .await + .map_err(anyhow::Error::from)?; info!( container = %container_id, @@ -341,7 +1102,24 @@ impl DockerClient { .any(|prefix| image_lower.starts_with(&prefix.to_lowercase())) } - /// Pull a Docker image (only from whitelisted registries) + /// Pulls a Docker image after enforcing the configured image whitelist. + /// + /// Errors if the image is not allowed by the whitelist or if the pull operation fails. + /// By default the policy permits images from ghcr.io/platformnetwork/ unless development + /// overrides are enabled. + /// + /// # Examples + /// + /// ```no_run + /// # use anyhow::Result; + /// # #[tokio::main] + /// # async fn main() -> Result<()> { + /// // `client` is a DockerClient or another implementor of the same API. + /// let client = /* obtain DockerClient */ unimplemented!(); + /// client.pull_image("ghcr.io/platformnetwork/example:latest").await?; + /// # Ok(()) + /// # } + /// ``` pub async fn pull_image(&self, image: &str) -> anyhow::Result<()> { // SECURITY: Verify image is from allowed registry before pulling if !Self::is_image_allowed(image) { @@ -359,11 +1137,11 @@ impl DockerClient { info!(image = %image, "Pulling Docker image (whitelisted)"); let options = CreateImageOptions { - from_image: image, + from_image: image.to_string(), ..Default::default() }; - let mut stream = self.docker.create_image(Some(options), None, None); + let mut stream = self.docker.create_image_stream(Some(options)); while let Some(result) = stream.next().await { match result { @@ -382,7 +1160,32 @@ impl DockerClient { Ok(()) } - /// Start a challenge container (only from whitelisted registries) + /// Starts a challenge container from the configured (whitelisted) image and returns a record of the started instance. + /// + /// Performs image allowlist validation, validates the provided challenge configuration, ensures the platform + /// network exists, prepares required volumes and host bindings, constructs container environment and host + /// configuration (including optional GPU support), creates and starts the container, and inspects it to + /// determine the container ID and mapped ports. + /// + /// On success returns a `ChallengeInstance` describing the started container (IDs, image, endpoint, start time, + /// and initial status). The call fails if the image is not allowed, the config is invalid, or any Docker + /// operation (volume creation, container creation, start, inspection) fails. + /// + /// # Examples + /// + /// ```no_run + /// # use anyhow::Result; + /// # async fn doc_example() -> Result<()> { + /// // Setup: obtain a DockerClient and a ChallengeContainerConfig (omitted) + /// // let client: DockerClient = /* construct or connect */ ; + /// // let config: ChallengeContainerConfig = /* build config */ ; + /// + /// // Start the challenge container + /// // let instance = client.start_challenge(&config).await?; + /// // println!("Started challenge container: {}", instance.container_id); + /// # Ok(()) + /// # } + /// ``` pub async fn start_challenge( &self, config: &ChallengeContainerConfig, @@ -457,9 +1260,9 @@ impl DockerClient { let volume_name = format!("{}-data", container_name); // Create volumes if they don't exist (Docker will auto-create on mount, but explicit is clearer) - let volume_opts = bollard::volume::CreateVolumeOptions { - name: volume_name.as_str(), - driver: "local", + let volume_opts = CreateVolumeOptions { + name: volume_name.clone(), + driver: "local".to_string(), ..Default::default() }; if let Err(e) = self.docker.create_volume(volume_opts).await { @@ -473,9 +1276,9 @@ impl DockerClient { "challenge-{}-cache", config.name.to_lowercase().replace(' ', "-") ); - let cache_volume_opts = bollard::volume::CreateVolumeOptions { - name: cache_volume_name.as_str(), - driver: "local", + let cache_volume_opts = CreateVolumeOptions { + name: cache_volume_name.clone(), + driver: "local".to_string(), ..Default::default() }; if let Err(e) = self.docker.create_volume(cache_volume_opts).await { @@ -489,9 +1292,9 @@ impl DockerClient { let evals_volume = "term-challenge-evals"; for vol_name in [tasks_volume, dind_cache_volume, evals_volume] { - let vol_opts = bollard::volume::CreateVolumeOptions { - name: vol_name, - driver: "local", + let vol_opts = CreateVolumeOptions { + name: vol_name.to_string(), + driver: "local".to_string(), ..Default::default() }; if let Err(e) = self.docker.create_volume(vol_opts).await { @@ -687,23 +1490,29 @@ impl DockerClient { // Create container let options = CreateContainerOptions { - name: &container_name, + name: container_name.clone(), platform: None, }; let response = self .docker .create_container(Some(options), container_config) - .await?; + .await + .map_err(anyhow::Error::from)?; let container_id = response.id; // Start container self.docker .start_container(&container_id, None::>) - .await?; + .await + .map_err(anyhow::Error::from)?; // Get assigned port - let inspect = self.docker.inspect_container(&container_id, None).await?; + let inspect = self + .docker + .inspect_container(&container_id, None) + .await + .map_err(anyhow::Error::from)?; let port = inspect .network_settings .and_then(|ns| ns.ports) @@ -803,26 +1612,64 @@ impl DockerClient { } } - /// List all challenge containers + /// Lists challenge containers visible to the configured network. + /// + /// Returns a vector of container IDs for containers whose names start with `challenge-` and that are attached to the client's configured network. + /// + /// # Examples + /// + /// ``` + /// # tokio_test::block_on(async { + /// let client = /* DockerClient constructed elsewhere */; + /// let ids = client.list_challenge_containers().await.unwrap(); + /// for id in ids { assert!(!id.is_empty()); } + /// # }); + /// ``` pub async fn list_challenge_containers(&self) -> anyhow::Result> { - let mut filters = HashMap::new(); - filters.insert("name", vec!["challenge-"]); - filters.insert("network", vec![self.network_name.as_str()]); + let mut filters: HashMap> = HashMap::new(); + filters.insert("name".to_string(), vec!["challenge-".to_string()]); + filters.insert("network".to_string(), vec![self.network_name.clone()]); - let options = ListContainersOptions { + let options = ListContainersOptions:: { all: true, filters, ..Default::default() }; - let containers = self.docker.list_containers(Some(options)).await?; + let containers = self + .docker + .list_containers(Some(options)) + .await + .map_err(anyhow::Error::from)?; Ok(containers.into_iter().filter_map(|c| c.id).collect()) } - /// Get container logs + /// Fetches the container's stdout and stderr logs and returns them concatenated as a single string. + /// + /// Streams the requested number of tail lines from both stdout and stderr for the given container + /// and joins each log chunk in order into one `String`. + /// + /// # Parameters + /// + /// - `container_id`: the Docker container identifier to read logs from. + /// - `tail`: the number of most-recent log lines to include. + /// + /// # Returns + /// + /// A `String` containing the concatenated log output (stdout and stderr); an empty string if no logs. + /// + /// # Examples + /// + /// ```no_run + /// # async fn run() -> anyhow::Result<()> { + /// let client = DockerClient::connect().await?; + /// let logs = client.get_logs("my-container-id", 100).await?; + /// println!("{}", logs); + /// # Ok(()) + /// # } + /// ``` pub async fn get_logs(&self, container_id: &str, tail: usize) -> anyhow::Result { - use bollard::container::LogsOptions; use futures::TryStreamExt; let options = LogsOptions:: { @@ -834,7 +1681,7 @@ impl DockerClient { let logs: Vec<_> = self .docker - .logs(container_id, Some(options)) + .logs_stream(container_id, options) .try_collect() .await?; @@ -847,17 +1694,39 @@ impl DockerClient { Ok(output) } - /// Clean up stale task containers created by challenge evaluations + /// Removes stale task containers whose names start with a given prefix. + /// + /// Containers matching `prefix` will be considered for removal unless their + /// names contain any of the provided `exclude_patterns` or they are younger + /// than `max_age_minutes` (when `max_age_minutes > 0`). Typical exclusions + /// include main challenge containers (e.g., `challenge-*`), platform validator + /// containers, and watchtower containers; pass appropriate patterns to + /// `exclude_patterns` to protect those. + /// + /// # Parameters /// - /// This removes containers that match the pattern but excludes: - /// - Main challenge containers (challenge-*) - /// - Platform validator containers - /// - Watchtower containers + /// - `prefix`: Container name prefix to match (e.g., `"term-challenge-"`). + /// - `max_age_minutes`: Only remove containers older than this many minutes + /// (use `0` to remove all matching containers regardless of age). + /// - `exclude_patterns`: Substrings; if any is present in a container name, + /// that container will be skipped. /// - /// Parameters: - /// - `prefix`: Container name prefix to match (e.g., "term-challenge-") - /// - `max_age_minutes`: Only remove containers older than this (0 = remove all matching) - /// - `exclude_patterns`: Container names containing these patterns will be kept + /// # Returns + /// + /// `Ok(CleanupResult)` containing counts of found and removed containers and any + /// errors encountered while attempting removals. + /// + /// # Examples + /// + /// ```no_run + /// # async fn example(client: &crate::DockerClient) -> anyhow::Result<()> { + /// let result = client + /// .cleanup_stale_containers("term-challenge-", 60, &["challenge-", "validator", "watchtower"]) + /// .await?; + /// println!("Removed {}/{} stale containers", result.removed, result.total_found); + /// # Ok(()) + /// # } + /// ``` pub async fn cleanup_stale_containers( &self, prefix: &str, @@ -867,12 +1736,14 @@ impl DockerClient { let mut result = CleanupResult::default(); // List ALL containers (including stopped) - let options = ListContainersOptions:: { - all: true, - ..Default::default() - }; + let mut options: ListContainersOptions = Default::default(); + options.all = true; - let containers = self.docker.list_containers(Some(options)).await?; + let containers = self + .docker + .list_containers(Some(options)) + .await + .map_err(anyhow::Error::from)?; let now = chrono::Utc::now().timestamp(); let max_age_secs = (max_age_minutes * 60) as i64; @@ -941,8 +1812,653 @@ impl DockerClient { } } +#[cfg(test)] +mod tests { + use super::*; + use bollard::models::EndpointSettings; + use futures::StreamExt; + use serial_test::serial; + use std::collections::HashMap; + use std::sync::{Arc, Mutex}; + + /// Remove environment variables for the provided keys. + /// + /// Removes each environment variable named in `keys`. Missing variables are ignored. + /// + /// # Examples + /// + /// ``` + /// std::env::set_var("FOO", "1"); + /// std::env::set_var("BAR", "2"); + /// reset_env(&["FOO", "BAR"]); + /// assert!(std::env::var("FOO").is_err()); + /// assert!(std::env::var("BAR").is_err()); + /// ``` + fn reset_env(keys: &[&str]) { + for key in keys { + std::env::remove_var(key); + } + } + + #[test] + #[serial] + fn test_is_image_allowed_enforces_whitelist() { + reset_env(&["DEVELOPMENT_MODE"]); + assert!(DockerClient::is_image_allowed( + "ghcr.io/platformnetwork/challenge:latest" + )); + assert!(!DockerClient::is_image_allowed( + "docker.io/library/alpine:latest" + )); + } + + #[test] + #[serial] + fn test_is_image_allowed_allows_dev_mode_override() { + std::env::set_var("DEVELOPMENT_MODE", "true"); + assert!(DockerClient::is_image_allowed( + "docker.io/library/alpine:latest" + )); + reset_env(&["DEVELOPMENT_MODE"]); + } + + #[test] + #[serial] + fn test_is_image_allowed_case_insensitive() { + reset_env(&["DEVELOPMENT_MODE"]); + assert!(DockerClient::is_image_allowed( + "GHCR.IO/PLATFORMNETWORK/IMAGE:TAG" + )); + } + + #[test] + #[serial] + fn test_get_validator_suffix_prefers_validator_name() { + reset_env(&["VALIDATOR_NAME", "HOSTNAME"]); + std::env::set_var("VALIDATOR_NAME", "Node 42-Test"); + std::env::set_var("HOSTNAME", "should_not_be_used"); + + let suffix = DockerClient::get_validator_suffix(); + assert_eq!(suffix, "node42test"); + + reset_env(&["VALIDATOR_NAME", "HOSTNAME"]); + } + + #[test] + #[serial] + fn test_get_validator_suffix_uses_container_id_from_hostname() { + reset_env(&["VALIDATOR_NAME"]); + std::env::set_var("HOSTNAME", "abcdef123456"); + + let suffix = DockerClient::get_validator_suffix(); + assert_eq!(suffix, "abcdef123456"); + + reset_env(&["HOSTNAME"]); + } + + #[tokio::test] + #[ignore = "requires Docker"] + async fn test_docker_connect() { + let client = DockerClient::connect().await; + assert!(client.is_ok()); + } + + #[derive(Clone, Default)] + struct RecordingBridge { + inner: Arc, + } + + #[derive(Default)] + struct RecordingBridgeInner { + networks: Mutex>, + created_networks: Mutex>, + containers: Mutex>, + removed: Mutex>, + inspect_map: Mutex>, + connect_calls: Mutex>, + } + + impl RecordingBridge { + /// Creates a RecordingBridge pre-populated with networks having the given names. + + /// + + /// `names` is a slice of network name strings to add to the bridge's internal network list. + + /// This helper is intended for tests that need a RecordingBridge already containing specific networks. + + /// + + /// # Examples + + /// + + /// ``` + + /// let bridge = RecordingBridge::with_networks(&["platform-network", "validator-net"]); + + /// let locked = bridge.inner.networks.lock().unwrap(); + + /// let names: Vec<_> = locked.iter().filter_map(|n| n.name.as_deref()).collect(); + + /// assert!(names.contains(&"platform-network")); + + /// assert!(names.contains(&"validator-net")); + + /// ``` + fn with_networks(names: &[&str]) -> Self { + let bridge = RecordingBridge::default(); + { + let mut lock = bridge.inner.networks.lock().unwrap(); + for name in names { + lock.push(Network { + name: Some(name.to_string()), + ..Default::default() + }); + } + } + bridge + } + + /// List network names created by the recording bridge. + /// + /// # Returns + /// + /// `Vec` containing the names of networks that have been recorded as created. + /// + /// # Examples + /// + /// ``` + /// let bridge = RecordingBridge::default(); + /// // simulate creation in tests by manipulating bridge.inner as needed... + /// let networks = bridge.created_networks(); + /// assert!(networks.is_empty() || networks.iter().all(|n| n.is_string())); + /// ``` + fn created_networks(&self) -> Vec { + self.inner.created_networks.lock().unwrap().clone() + } + + /// Insert a ContainerInspectResponse with the given networks into the bridge's inspect map. + /// + /// The method creates a `ContainerInspectResponse` whose `network_settings.networks` map + /// contains an entry for each name in `networks` mapped to default `EndpointSettings`, + /// then stores it under `container_id` in the internal `inspect_map`. + /// + /// # Parameters + /// + /// - `container_id`: the container identifier used as the map key. + /// - `networks`: slice of network names to include in the constructed inspect response. + /// + /// # Examples + /// + /// ``` + /// // Assuming `bridge` is a RecordingBridge (or similar) with `set_inspect_networks` + /// let bridge = RecordingBridge::default(); + /// bridge.set_inspect_networks("container-123", &["platform-network", "bridge"]); + /// let map = bridge.inner.inspect_map.lock().unwrap(); + /// assert!(map.contains_key("container-123")); + /// ``` + fn set_inspect_networks(&self, container_id: &str, networks: &[&str]) { + let mut map: HashMap = HashMap::new(); + for name in networks { + map.insert(name.to_string(), Default::default()); + } + let response = ContainerInspectResponse { + network_settings: Some(bollard::models::NetworkSettings { + networks: Some(map), + ..Default::default() + }), + ..Default::default() + }; + self.inner + .inspect_map + .lock() + .unwrap() + .insert(container_id.to_string(), response); + } + + /// Replaces the stored list of container summaries with the provided vector. + /// + /// # Parameters + /// + /// - `containers`: The new list of `ContainerSummary` values to store. + /// + /// # Examples + /// + /// ``` + /// // Replace the client's cached containers with an empty list. + /// client.set_containers(vec![]); + /// ``` + fn set_containers(&self, containers: Vec) { + *self.inner.containers.lock().unwrap() = containers; + } + + /// Returns the list of container IDs that were recorded as removed. + /// + /// The returned vector is a clone of the internal removal log and can be modified + /// by the caller without affecting the bridge's internal state. + /// + /// # Examples + /// + /// ```no_run + /// // `bridge` is an instance that provides `removed_containers() -> Vec` + /// let removed = bridge.removed_containers(); + /// for id in removed { + /// println!("Removed container: {}", id); + /// } + /// ``` + fn removed_containers(&self) -> Vec { + self.inner.removed.lock().unwrap().clone() + } + + /// List recorded Docker network connect calls. + /// + /// Each entry is a tuple of `(network_name, container_id)` in the order the calls were invoked. + /// + /// # Examples + /// + /// ``` + /// // `rec` is a test recording object exposing `connect_calls`. + /// let calls = rec.connect_calls(); + /// // e.g., assert that the first recorded call targeted the "platform-network" + /// assert!(calls.first().map(|(net, _)| net == "platform-network").unwrap_or(false)); + /// ``` + fn connect_calls(&self) -> Vec<(String, String)> { + self.inner.connect_calls.lock().unwrap().clone() + } + } + + #[async_trait] + impl DockerBridge for RecordingBridge { + /// Checks connectivity to the Docker daemon. + /// + /// # Returns + /// + /// `Ok(())` if the daemon responds, `Err(DockerError)` if the check fails. + /// + /// # Examples + /// + /// ``` + /// # async fn _example(client: &impl crate::DockerBridge) { + /// client.ping().await.expect("docker daemon reachable"); + /// # } + /// ``` + async fn ping(&self) -> Result<(), DockerError> { + Ok(()) + } + + /// Returns the list of networks currently recorded by this bridge. + /// + /// The `_options` parameter is ignored by this implementation and may be `None`. + /// + /// # Examples + /// + /// ``` + /// # async fn example_usage() { + /// let bridge = RecordingBridge::new(); + /// let networks = bridge.list_networks(None).await.unwrap(); + /// assert!(networks.is_empty() || networks.len() >= 0); + /// # } + /// ``` + async fn list_networks( + &self, + _options: Option>, + ) -> Result, DockerError> { + Ok(self.inner.networks.lock().unwrap().clone()) + } + + /// Appends the provided network name to the bridge's in-memory list of created networks. + /// + /// This mock implementation simulates network creation by pushing `options.name` into + /// `self.inner.created_networks` and returns success. + /// + /// # Examples + /// + /// ``` + /// # use std::sync::Arc; + /// # use tokio::runtime::Runtime; + /// // Construct a RecordingBridge (test double) and verify network names are recorded. + /// let rt = Runtime::new().unwrap(); + /// rt.block_on(async { + /// let bridge = RecordingBridge::new(); + /// let opts = CreateNetworkOptions { name: "platform-network".to_string(), ..Default::default() }; + /// bridge.create_network(opts).await.unwrap(); + /// assert!(bridge.inner.created_networks.lock().unwrap().contains(&"platform-network".to_string())); + /// }); + /// ``` + async fn create_network( + &self, + options: CreateNetworkOptions, + ) -> Result<(), DockerError> { + self.inner + .created_networks + .lock() + .unwrap() + .push(options.name); + Ok(()) + } + + /// Returns the stored inspection metadata for a container by id. + /// + /// The `options` argument is ignored by this implementation. + /// + /// # Arguments + /// + /// * `id` - The container identifier to look up. + /// + /// # Returns + /// + /// `ContainerInspectResponse` for the container if present, or a `DockerError::IOError` with + /// `NotFound` if no inspection entry exists for the given `id`. + /// + /// # Examples + /// + /// ```no_run + /// # async fn doc_example(bridge: &crate::RecordingBridge) { + /// let info = bridge.inspect_container("container-id", None).await.unwrap(); + /// // use `info`... + /// # } + /// ``` + async fn inspect_container( + &self, + id: &str, + _options: Option, + ) -> Result { + self.inner + .inspect_map + .lock() + .unwrap() + .get(id) + .cloned() + .ok_or_else(|| DockerError::IOError { + err: std::io::Error::new(std::io::ErrorKind::NotFound, "missing inspect"), + }) + } + + /// Records a request to connect a container to a network in the mock bridge. + /// + /// This implementation appends the tuple `(container_id, network_name)` to the bridge's + /// internal `connect_calls` log and returns `Ok(())`. + /// + /// # Examples + /// + /// ``` + /// // Construct a mock bridge (type details omitted) and call the async method. + /// // The bridge will record the requested connection in its `connect_calls`. + /// # async fn doc_example() { + /// let bridge = /* RecordingBridge::new() or equivalent setup */; + /// let opts = /* ConnectNetworkOptions { container: "container-id".to_string(), .. } */; + /// let res = bridge.connect_network("platform-network", opts).await; + /// assert!(res.is_ok()); + /// // assert that bridge.inner.connect_calls contains ("container-id", "platform-network") + /// # } + /// ``` + async fn connect_network( + &self, + network: &str, + options: ConnectNetworkOptions, + ) -> Result<(), DockerError> { + self.inner + .connect_calls + .lock() + .unwrap() + .push((options.container, network.to_string())); + Ok(()) + } + + /// Creates a stream of image-pull progress events for the given image-pull options. + /// + /// The stream yields `CreateImageInfo` items describing progress/status of the image + /// pull operation. + /// + /// # Returns + /// + /// An `ImageStream` that produces `CreateImageInfo` values as the image is pulled; the + /// default implementation produces an empty stream (no events). + /// + /// # Examples + /// + /// ``` + /// use futures::stream::StreamExt; + /// + /// // `bridge` is an implementor of the trait providing `create_image_stream`. + /// // The returned stream can be polled for pull progress events. + /// let stream = bridge.create_image_stream(None); + /// let first = futures::executor::block_on(stream.next()); + /// assert!(first.is_none() || first.is_some()); + /// ``` + fn create_image_stream(&self, _options: Option>) -> ImageStream { + futures::stream::empty().boxed() + } + + /// Creates a Docker volume. + /// + /// This implementation is a no-op: the provided `CreateVolumeOptions` are ignored and the call + /// always succeeds. + /// + /// # Examples + /// + /// ```no_run + /// # async fn example(bridge: &impl DockerBridge) { + /// use bollard::volume::CreateVolumeOptions; + /// + /// let opts = CreateVolumeOptions { name: "test-volume".to_string(), ..Default::default() }; + /// bridge.create_volume(opts).await.unwrap(); + /// # } + /// ``` + async fn create_volume( + &self, + _options: CreateVolumeOptions, + ) -> Result<(), DockerError> { + Ok(()) + } + + /// Test-only stub implementation that always panics when invoked. + /// + /// This function is provided for test scaffolding and should not be called in normal execution. + /// It unconditionally panics to surface unexpected usage in tests. + /// + /// # Examples + /// + /// ``` + /// // Assuming a `RecordingBridge` test double is in scope: + /// // #[tokio::test] + /// // #[should_panic] + /// // async fn create_container_stub_panics() { + /// // let bridge = RecordingBridge::default(); + /// // // This call will panic as the stub is not intended to be used. + /// // let _ = bridge.create_container(None, Default::default()).await; + /// // } + /// ``` + async fn create_container( + &self, + _options: Option>, + _config: Config, + ) -> Result { + panic!("not used in tests") + } + + /// Starts the container identified by `id` with the provided start options. + /// + /// This concrete implementation is a test stub and will panic if invoked. + /// + /// # Returns + /// + /// `Ok(())` on success, `Err(DockerError)` on failure. + /// + /// # Examples + /// + /// ```no_run + /// // Typical usage (actual implementation should not panic): + /// // let _ = client.start_container("container_id", None).await; + /// ``` + async fn start_container( + &self, + _id: &str, + _options: Option>, + ) -> Result<(), DockerError> { + panic!("not used in tests") + } + + async fn stop_container( + &self, + _id: &str, + _options: Option, + ) -> Result<(), DockerError> { + panic!("not used in tests") + } + + /// Records a container removal request by appending the container `id` to the internal + /// removed list and returns success. + /// + /// # Examples + /// + /// ``` + /// # use std::sync::Arc; + /// # async fn run_example() { + /// let bridge = RecordingBridge::default(); + /// bridge.remove_container("container-123", None).await.unwrap(); + /// assert!(bridge.inner.removed.lock().unwrap().contains(&"container-123".to_string())); + /// # } + /// ``` + async fn remove_container( + &self, + id: &str, + _options: Option, + ) -> Result<(), DockerError> { + self.inner.removed.lock().unwrap().push(id.to_string()); + Ok(()) + } + + /// Returns the list of container summaries currently recorded by the bridge. + /// + /// The method ignores any provided `ListContainersOptions` and returns a cloned + /// snapshot of the internal container summaries. + /// + /// # Examples + /// + /// ``` + /// # tokio_test::block_on(async { + /// // `bridge` is a value with this method (e.g., a recording/mock bridge). + /// // Replace with the actual instance in real tests. + /// let bridge = /* create or obtain bridge instance */ todo!(); + /// let summaries = bridge.list_containers(None).await.unwrap(); + /// // `summaries` is a Vec + /// let _ = summaries; + /// # }); + /// ``` + async fn list_containers( + &self, + _options: Option>, + ) -> Result, DockerError> { + Ok(self.inner.containers.lock().unwrap().clone()) + } + + /// Returns a stream of log output items for the container identified by `id`, using the provided `options`. + /// + /// The returned stream yields `LogOutput` entries produced by the container (stdout/stderr, timestamps, etc.). + /// + /// # Examples + /// + /// ``` + /// // Obtain a log stream from a Docker client and consume entries: + /// // let mut stream = docker.logs_stream("container_id", LogsOptions::default()); + /// // while let Some(entry) = stream.next().await { /* handle LogOutput */ } + /// ``` + fn logs_stream(&self, _id: &str, _options: LogsOptions) -> LogStream { + futures::stream::empty().boxed() + } + } + + #[tokio::test] + async fn test_ensure_network_creates_when_missing() { + let bridge = RecordingBridge::default(); + let client = DockerClient::with_bridge(bridge.clone(), "platform-network"); + client.ensure_network().await.unwrap(); + assert_eq!( + bridge.created_networks(), + vec!["platform-network".to_string()] + ); + } + + #[tokio::test] + async fn test_ensure_network_skips_existing() { + let bridge = RecordingBridge::with_networks(&["platform-network"]); + let client = DockerClient::with_bridge(bridge.clone(), "platform-network"); + client.ensure_network().await.unwrap(); + assert!(bridge.created_networks().is_empty()); + } + + #[tokio::test] + #[serial] + async fn test_connect_self_to_network_only_when_needed() { + let bridge = RecordingBridge::default(); + let container_id = "aaaaaaaaaaaa"; + std::env::set_var("HOSTNAME", container_id); + bridge.set_inspect_networks(container_id, &[]); + let client = DockerClient::with_bridge(bridge.clone(), "platform-network"); + client.connect_self_to_network().await.unwrap(); + assert_eq!( + bridge.connect_calls(), + vec![(container_id.to_string(), "platform-network".to_string())] + ); + + let bridge2 = RecordingBridge::default(); + let container_two = "bbbbbbbbbbbb"; + std::env::set_var("HOSTNAME", container_two); + bridge2.set_inspect_networks(container_two, &["platform-network"]); + let client2 = DockerClient::with_bridge(bridge2.clone(), "platform-network"); + client2.connect_self_to_network().await.unwrap(); + assert!(bridge2.connect_calls().is_empty()); + std::env::remove_var("HOSTNAME"); + } + + /// Constructs a minimal `ContainerSummary` populated with the provided id, name, and creation timestamp. + /// + /// The returned summary will have `id` set to the provided `id`, `names` set to a single entry prefixed with `/` + /// (e.g., passing `"foo"` produces `names = ["/foo"]`), and `created` set to the provided timestamp. All other + /// fields are left as their default values. + /// + /// # Examples + /// + /// ``` + /// let summary = make_container_summary("abcdef", "my-container", 1_700_000_000); + /// assert_eq!(summary.id.as_deref(), Some("abcdef")); + /// assert_eq!(summary.names.as_ref().map(|v| v.as_slice()), Some(&["/my-container"])); + /// assert_eq!(summary.created, Some(1_700_000_000)); + /// ``` + fn make_container_summary(id: &str, name: &str, created: i64) -> ContainerSummary { + ContainerSummary { + id: Some(id.to_string()), + names: Some(vec![format!("/{name}")]), + created: Some(created), + ..Default::default() + } + } + + #[tokio::test] + async fn test_cleanup_stale_containers_filters_entries() { + let bridge = RecordingBridge::default(); + let now = chrono::Utc::now().timestamp(); + bridge.set_containers(vec![ + make_container_summary("old", "term-challenge-old", now - 10_000), + make_container_summary("exclude", "platform-helper", now - 10_000), + make_container_summary("young", "term-challenge-young", now - 100), + ]); + let client = DockerClient::with_bridge(bridge.clone(), "platform-network"); + + let result = client + .cleanup_stale_containers("term-challenge-", 120, &["platform-"]) + .await + .unwrap(); + assert_eq!(result.total_found, 1); + assert_eq!(result.removed, 1); + assert_eq!(bridge.removed_containers(), vec!["old".to_string()]); + } +} + /// Result of container cleanup operation -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone)] pub struct CleanupResult { pub total_found: usize, pub removed: usize, @@ -956,13 +2472,15 @@ impl CleanupResult { } #[cfg(test)] -mod tests { - use super::*; +mod cleanup_tests { + use super::CleanupResult; - #[tokio::test] - #[ignore = "requires Docker"] - async fn test_docker_connect() { - let client = DockerClient::connect().await; - assert!(client.is_ok()); + #[test] + fn test_cleanup_result_success_flag() { + let mut result = CleanupResult::default(); + assert!(result.success()); + + result.errors.push("boom".into()); + assert!(!result.success()); } -} +} \ No newline at end of file diff --git a/crates/challenge-orchestrator/src/evaluator.rs b/crates/challenge-orchestrator/src/evaluator.rs index 24af7235a..d0de84faf 100644 --- a/crates/challenge-orchestrator/src/evaluator.rs +++ b/crates/challenge-orchestrator/src/evaluator.rs @@ -270,6 +270,37 @@ pub enum EvaluatorError { #[cfg(test)] mod tests { use super::*; + use parking_lot::RwLock; + use platform_core::ChallengeId; + use std::collections::HashMap; + use std::sync::Arc; + use tokio::io::{AsyncReadExt, AsyncWriteExt}; + use tokio::net::TcpListener; + use tokio::task::JoinHandle; + use tokio_test::block_on; + + /// Creates a pre-filled `ChallengeInstance` for tests using the given container `status`. + /// + /// The returned instance has deterministic placeholder values for `container_id`, `image`, + /// and `endpoint`, with `started_at` set to the current UTC time. + /// + /// # Examples + /// + /// ``` + /// let inst = sample_instance(ContainerStatus::Running); + /// assert_eq!(inst.status, ContainerStatus::Running); + /// assert!(inst.image.contains("ghcr.io/platformnetwork/example")); + /// ``` + fn sample_instance(status: ContainerStatus) -> ChallengeInstance { + ChallengeInstance { + challenge_id: ChallengeId::new(), + container_id: "cid".into(), + image: "ghcr.io/platformnetwork/example:latest".into(), + endpoint: "http://127.0.0.1:9000".into(), + started_at: chrono::Utc::now(), + status, + } + } #[test] fn test_challenge_info_deserialize() { @@ -283,4 +314,398 @@ mod tests { assert_eq!(info.name, "term-challenge"); assert_eq!(info.mechanism_id, 0); // default } -} + + #[test] + fn test_evaluate_generic_requires_running_status() { + let challenges = Arc::new(RwLock::new(HashMap::new())); + let instance = sample_instance(ContainerStatus::Starting); + let challenge_id = instance.challenge_id; + challenges.write().insert(challenge_id, instance.clone()); + + let evaluator = ChallengeEvaluator::new(challenges); + let err = block_on(evaluator.evaluate_generic(challenge_id, serde_json::json!({}), None)) + .expect_err("should fail when not running"); + + match err { + EvaluatorError::ChallengeNotReady(id) => assert_eq!(id, challenge_id), + other => panic!("unexpected error: {:?}", other), + } + } + + #[test] + fn test_proxy_request_missing_challenge() { + let evaluator = ChallengeEvaluator::new(Arc::new(RwLock::new(HashMap::new()))); + let challenge_id = ChallengeId::new(); + let err = block_on(evaluator.proxy_request( + challenge_id, + "status", + reqwest::Method::GET, + None, + None, + )) + .expect_err("missing challenge should error"); + + match err { + EvaluatorError::ChallengeNotFound(id) => assert_eq!(id, challenge_id), + other => panic!("unexpected error: {:?}", other), + } + } + + #[tokio::test] + async fn test_proxy_request_requires_running_status() { + let challenges = Arc::new(RwLock::new(HashMap::new())); + let instance = sample_instance(ContainerStatus::Starting); + let challenge_id = instance.challenge_id; + challenges.write().insert(challenge_id, instance); + + let evaluator = ChallengeEvaluator::new(challenges); + let err = evaluator + .proxy_request(challenge_id, "health", reqwest::Method::GET, None, None) + .await + .expect_err("non-running challenge should be rejected"); + + match err { + EvaluatorError::ChallengeNotReady(id) => assert_eq!(id, challenge_id), + other => panic!("unexpected error: {:?}", other), + } + } + + #[test] + fn test_list_challenges_returns_current_instances() { + let challenges = Arc::new(RwLock::new(HashMap::new())); + let instance_a = sample_instance(ContainerStatus::Running); + let instance_b = sample_instance(ContainerStatus::Unhealthy); + let id_a = instance_a.challenge_id; + let id_b = instance_b.challenge_id; + challenges.write().insert(id_a, instance_a.clone()); + challenges.write().insert(id_b, instance_b.clone()); + + let evaluator = ChallengeEvaluator::new(challenges); + let list = evaluator.list_challenges(); + assert_eq!(list.len(), 2); + + let status_map: std::collections::HashMap = list + .into_iter() + .map(|entry| (entry.challenge_id, entry.status)) + .collect(); + + assert_eq!(status_map.get(&id_a), Some(&ContainerStatus::Running)); + assert_eq!(status_map.get(&id_b), Some(&ContainerStatus::Unhealthy)); + } + + #[tokio::test] + async fn test_evaluate_generic_succeeds_with_ok_response() { + let (addr, handle) = + spawn_static_http_server("200 OK", r#"{"value": 42}"#, "application/json").await; + let endpoint = format!("http://{}", addr); + let (evaluator, challenge_id) = evaluator_with_instance(endpoint, ContainerStatus::Running); + + let response = evaluator + .evaluate_generic(challenge_id, serde_json::json!({"input": 1}), Some(5)) + .await + .expect("evaluation succeeds"); + + assert_eq!(response["value"], 42); + handle.await.expect("server finished"); + } + + #[tokio::test] + async fn test_evaluate_generic_reports_challenge_error() { + let (addr, handle) = + spawn_static_http_server("500 Internal Server Error", "boom", "text/plain").await; + let endpoint = format!("http://{}", addr); + let (evaluator, challenge_id) = evaluator_with_instance(endpoint, ContainerStatus::Running); + + let err = evaluator + .evaluate_generic(challenge_id, serde_json::json!({}), Some(5)) + .await + .expect_err("should propagate challenge error"); + + match err { + EvaluatorError::ChallengeError { status, message } => { + assert_eq!(status, 500); + assert_eq!(message, "boom"); + } + other => panic!("unexpected error: {:?}", other), + } + + handle.await.expect("server finished"); + } + + #[tokio::test] + async fn test_evaluate_generic_reports_parse_error() { + let (addr, handle) = spawn_static_http_server("200 OK", "not json", "text/plain").await; + let endpoint = format!("http://{}", addr); + let (evaluator, challenge_id) = evaluator_with_instance(endpoint, ContainerStatus::Running); + + let err = evaluator + .evaluate_generic(challenge_id, serde_json::json!({}), Some(5)) + .await + .expect_err("invalid JSON should error"); + + assert!(matches!(err, EvaluatorError::ParseError(_))); + + handle.await.expect("server finished"); + } + + #[tokio::test] + async fn test_evaluate_generic_reports_network_error() { + let (addr, handle) = spawn_drop_http_server().await; + let endpoint = format!("http://{}", addr); + let (evaluator, challenge_id) = evaluator_with_instance(endpoint, ContainerStatus::Running); + + let err = evaluator + .evaluate_generic(challenge_id, serde_json::json!({}), Some(1)) + .await + .expect_err("network failure should bubble up"); + + assert!(matches!(err, EvaluatorError::NetworkError(_))); + handle.await.expect("server finished"); + } + + #[tokio::test] + async fn test_proxy_request_returns_payload() { + let (addr, handle) = + spawn_static_http_server("200 OK", r#"{"ok":true}"#, "application/json").await; + let endpoint = format!("http://{}", addr); + let (evaluator, challenge_id) = evaluator_with_instance(endpoint, ContainerStatus::Running); + + let response = evaluator + .proxy_request( + challenge_id, + "custom/path", + reqwest::Method::POST, + Some(serde_json::json!({"payload": true})), + Some(5), + ) + .await + .expect("proxy request succeeds"); + + assert_eq!(response["ok"], true); + handle.await.expect("server finished"); + } + + #[tokio::test] + async fn test_proxy_request_reports_challenge_error() { + let (addr, handle) = + spawn_static_http_server("503 Service Unavailable", "oops", "text/plain").await; + let endpoint = format!("http://{}", addr); + let (evaluator, challenge_id) = evaluator_with_instance(endpoint, ContainerStatus::Running); + + let err = evaluator + .proxy_request(challenge_id, "custom", reqwest::Method::GET, None, Some(5)) + .await + .expect_err("should surface challenge error"); + + match err { + EvaluatorError::ChallengeError { status, message } => { + assert_eq!(status, 503); + assert_eq!(message, "oops"); + } + other => panic!("unexpected error: {:?}", other), + } + + handle.await.expect("server finished"); + } + + #[tokio::test] + async fn test_get_info_fetches_metadata() { + let body = r#"{"name":"demo","version":"0.1.0","mechanism_id":7}"#; + let (addr, handle) = spawn_static_http_server("200 OK", body, "application/json").await; + let endpoint = format!("http://{}", addr); + let (evaluator, challenge_id) = evaluator_with_instance(endpoint, ContainerStatus::Running); + + let info = evaluator + .get_info(challenge_id) + .await + .expect("info should deserialize"); + + assert_eq!(info.name, "demo"); + assert_eq!(info.version, "0.1.0"); + assert_eq!(info.mechanism_id, 7); + handle.await.expect("server finished"); + } + + #[tokio::test] + async fn test_get_info_reports_error_status() { + let (addr, handle) = + spawn_static_http_server("404 Not Found", "missing", "text/plain").await; + let endpoint = format!("http://{}", addr); + let (evaluator, challenge_id) = evaluator_with_instance(endpoint, ContainerStatus::Running); + + let err = evaluator + .get_info(challenge_id) + .await + .expect_err("should return challenge error for non-200 info"); + + match err { + EvaluatorError::ChallengeError { status, message } => { + assert_eq!(status, 404); + assert_eq!(message, "Failed to get challenge info"); + } + other => panic!("unexpected error: {:?}", other), + } + + handle.await.expect("server finished"); + } + + #[tokio::test] + async fn test_check_health_reflects_status_code() { + let (addr_ok, handle_ok) = + spawn_static_http_server("200 OK", "{}", "application/json").await; + let (evaluator, ok_id) = + evaluator_with_instance(format!("http://{}", addr_ok), ContainerStatus::Running); + + assert!(evaluator + .check_health(ok_id) + .await + .expect("health request succeeds")); + handle_ok.await.expect("server finished"); + + let (addr_err, handle_err) = + spawn_static_http_server("503 Service Unavailable", "oops", "text/plain").await; + let (evaluator, fail_id) = + evaluator_with_instance(format!("http://{}", addr_err), ContainerStatus::Running); + + assert!(!evaluator + .check_health(fail_id) + .await + .expect("health request succeeds")); + handle_err.await.expect("server finished"); + } + + #[tokio::test] + async fn test_check_health_handles_request_failure() { + let (addr, handle) = spawn_drop_http_server().await; + let (evaluator, challenge_id) = + evaluator_with_instance(format!("http://{}", addr), ContainerStatus::Running); + + let result = evaluator + .check_health(challenge_id) + .await + .expect("network errors should be converted to false"); + + assert!(!result); + handle.await.expect("server finished"); + } + + /// Create a ChallengeEvaluator pre-populated with a single ChallengeInstance. + /// + /// The provided `endpoint` is set as the instance's base URL and `status` is used + /// as the instance's container status before registration. + /// + /// # Examples + /// + /// ```rust,no_run + /// let (evaluator, challenge_id) = evaluator_with_instance( + /// "http://127.0.0.1:8000".to_string(), + /// ContainerStatus::Running, + /// ); + /// // `evaluator` now contains a single registered instance reachable at the given endpoint, + /// // and `challenge_id` is the registered instance's ID. + /// ``` + /// + /// # Returns + /// + /// A tuple `(ChallengeEvaluator, ChallengeId)` where the evaluator contains the registered instance + /// and the second element is the instance's `ChallengeId`. + fn evaluator_with_instance( + endpoint: String, + status: ContainerStatus, + ) -> (ChallengeEvaluator, ChallengeId) { + let challenges = Arc::new(RwLock::new(HashMap::new())); + let mut instance = sample_instance(status); + instance.endpoint = endpoint; + let challenge_id = instance.challenge_id; + challenges.write().insert(challenge_id, instance); + (ChallengeEvaluator::new(challenges), challenge_id) + } + + /// Starts a minimal one-shot HTTP server on localhost that accepts a single connection and replies with the provided status line, body, and Content-Type. + /// + /// The server binds to 127.0.0.1 on an ephemeral port and returns the bound socket address and a JoinHandle for the spawned task. The spawned task reads the incoming request, writes the formatted HTTP response, then shuts down after serving that one connection. + /// + /// # Examples + /// + /// ``` + /// # tokio_test::block_on(async { + /// let (addr, handle) = spawn_static_http_server("200 OK", "hello", "text/plain").await; + /// let url = format!("http://{}/", addr); + /// let resp = reqwest::get(&url).await.unwrap(); + /// assert_eq!(resp.status().as_u16(), 200); + /// let body = resp.text().await.unwrap(); + /// assert_eq!(body, "hello"); + /// let _ = handle.await; + /// # }); + /// ``` + async fn spawn_static_http_server( + status_line: &str, + body: &str, + content_type: &str, + ) -> (std::net::SocketAddr, JoinHandle<()>) { + let listener = TcpListener::bind("127.0.0.1:0") + .await + .expect("bind local server"); + let addr = listener.local_addr().expect("read addr"); + let body = body.to_string(); + let content_type = content_type.to_string(); + let status_line = status_line.to_string(); + + let handle = tokio::spawn(async move { + if let Ok((mut socket, _)) = listener.accept().await { + let mut buf = vec![0u8; 1024]; + let _ = socket.read(&mut buf).await; + let response = format!( + "HTTP/1.1 {status}\r\nContent-Type: {content_type}\r\nContent-Length: {}\r\n\r\n{}", + body.len(), body, + status = status_line, + ); + let _ = socket.write_all(response.as_bytes()).await; + let _ = socket.shutdown().await; + } + }); + + (addr, handle) + } + + /// Spawns a local TCP server that accepts a single connection and then drops it without responding. + /// + /// The server binds to an ephemeral localhost port and returns the socket address and a JoinHandle + /// for the background task. This is useful for tests that need to simulate a peer that accepts a + /// connection and then closes it to produce a client-side network error. + /// + /// # Examples + /// + /// ``` + /// # use tokio::io::{AsyncReadExt, AsyncWriteExt}; + /// # use tokio::net::TcpStream; + /// # + /// # async fn run() { + /// let (addr, _handle) = crate::spawn_drop_http_server().await; + /// let mut stream = TcpStream::connect(addr).await.unwrap(); + /// // Send a request; the server will read and then drop the connection without replying. + /// let _ = stream.write_all(b"GET / HTTP/1.0\r\n\r\n").await; + /// let mut buf = [0u8; 16]; + /// // Reading yields 0 bytes when the peer closes the connection. + /// let n = stream.read(&mut buf).await.unwrap(); + /// assert_eq!(n, 0); + /// # } + /// # tokio::runtime::Runtime::new().unwrap().block_on(run()); + /// ``` + async fn spawn_drop_http_server() -> (std::net::SocketAddr, JoinHandle<()>) { + let listener = TcpListener::bind("127.0.0.1:0") + .await + .expect("bind local server"); + let addr = listener.local_addr().expect("read addr"); + + let handle = tokio::spawn(async move { + if let Ok((mut socket, _)) = listener.accept().await { + let mut buf = vec![0u8; 1024]; + let _ = socket.read(&mut buf).await; + // Drop connection without responding to trigger client-side network error. + } + }); + + (addr, handle) + } +} \ No newline at end of file diff --git a/crates/challenge-orchestrator/src/health.rs b/crates/challenge-orchestrator/src/health.rs index a7c30e996..bd6a43b8d 100644 --- a/crates/challenge-orchestrator/src/health.rs +++ b/crates/challenge-orchestrator/src/health.rs @@ -200,6 +200,38 @@ impl HealthSummary { #[cfg(test)] mod tests { use super::*; + use parking_lot::RwLock; + use platform_core::ChallengeId; + use std::collections::HashMap; + use std::sync::Arc; + use std::time::{Duration, Instant}; + use tokio::io::{AsyncReadExt, AsyncWriteExt}; + use tokio::net::TcpListener; + + /// Create a sample `ChallengeInstance` populated with deterministic placeholder values for testing. + /// + /// The returned instance uses fixed values for `container_id`, `image`, and `endpoint`; the `started_at` + /// is set to the current time and the `status` is taken from the `status` parameter. + /// + /// # Examples + /// + /// ``` + /// let inst = sample_instance(ContainerStatus::Starting); + /// assert_eq!(inst.container_id, "cid"); + /// assert_eq!(inst.image, "ghcr.io/platformnetwork/example:latest"); + /// assert_eq!(inst.endpoint, "http://127.0.0.1:9000"); + /// assert_eq!(inst.status, ContainerStatus::Starting); + /// ``` + fn sample_instance(status: ContainerStatus) -> ChallengeInstance { + ChallengeInstance { + challenge_id: ChallengeId::new(), + container_id: "cid".into(), + image: "ghcr.io/platformnetwork/example:latest".into(), + endpoint: "http://127.0.0.1:9000".into(), + started_at: chrono::Utc::now(), + status, + } + } #[test] fn test_health_summary() { @@ -228,4 +260,406 @@ mod tests { assert!(summary.all_healthy()); assert_eq!(summary.percentage_healthy(), 100.0); } -} + + #[test] + fn test_percentage_healthy_handles_zero_total() { + let summary = HealthSummary { + total: 0, + running: 0, + unhealthy: 0, + starting: 0, + stopped: 0, + }; + + assert_eq!(summary.percentage_healthy(), 100.0); + } + + #[test] + fn test_get_unhealthy_lists_ids() { + let challenges = Arc::new(RwLock::new(HashMap::new())); + let healthy_instance = sample_instance(ContainerStatus::Running); + let healthy_id = healthy_instance.challenge_id; + let unhealthy_instance = sample_instance(ContainerStatus::Unhealthy); + let unhealthy_id = unhealthy_instance.challenge_id; + + { + let mut guard = challenges.write(); + guard.insert(healthy_id, healthy_instance.clone()); + guard.insert(unhealthy_id, unhealthy_instance.clone()); + } + + let monitor = HealthMonitor::new(challenges, Duration::from_secs(5)); + let ids = monitor.get_unhealthy(); + + assert_eq!(ids.len(), 1); + assert_eq!(ids[0], unhealthy_id); + } + + #[test] + fn test_health_monitor_summary_counts_statuses() { + let challenges = Arc::new(RwLock::new(HashMap::new())); + { + let mut guard = challenges.write(); + guard.insert( + ChallengeId::new(), + sample_instance(ContainerStatus::Running), + ); + guard.insert( + ChallengeId::new(), + sample_instance(ContainerStatus::Unhealthy), + ); + guard.insert( + ChallengeId::new(), + sample_instance(ContainerStatus::Starting), + ); + } + + let monitor = HealthMonitor::new(challenges, Duration::from_secs(5)); + let summary = monitor.summary(); + + assert_eq!(summary.total, 3); + assert_eq!(summary.running, 1); + assert_eq!(summary.unhealthy, 1); + assert_eq!(summary.starting, 1); + assert_eq!(summary.stopped, 0); + } + + /// Starts a one-shot HTTP test server that accepts a single connection and replies with the given status line and body. + /// + /// The server binds to localhost on an ephemeral port and returns its socket address and a JoinHandle for the spawned task. + /// The spawned task accepts one connection, reads the request (up to 1024 bytes), writes a response composed from `status_line` and `body`, and then completes. + /// + /// # Examples + /// + /// ``` + /// # tokio::test + /// # async fn _example() { + /// let (addr, handle) = spawn_health_server("200 OK", r#"{"status":"ok"}"#).await; + /// let url = format!("http://{}/health", addr); + /// let res = reqwest::get(&url).await.unwrap(); + /// assert!(res.status().is_success()); + /// // ensure server task completes + /// handle.await.unwrap(); + /// # } + /// ``` + async fn spawn_health_server( + status_line: &str, + body: &str, + ) -> (std::net::SocketAddr, tokio::task::JoinHandle<()>) { + let listener = TcpListener::bind("127.0.0.1:0") + .await + .expect("bind local server"); + let addr = listener.local_addr().expect("read addr"); + let body = body.to_string(); + let response = format!( + "HTTP/1.1 {status_line}\r\nContent-Type: application/json\r\nContent-Length: {}\r\n\r\n{}", + body.len(), body + ); + + let handle = tokio::spawn(async move { + if let Ok((mut socket, _)) = listener.accept().await { + let mut buf = [0u8; 1024]; + let _ = socket.read(&mut buf).await; + let _ = socket.write_all(response.as_bytes()).await; + } + }); + + (addr, handle) + } + + /// Starts a lightweight TCP server that repeatedly accepts connections and responds with a fixed HTTP response. + /// + /// The server listens on 127.0.0.1 at an OS-assigned port and responds to every incoming connection with an HTTP response + /// constructed from `status_line` and `body`. Each connection is handled in a spawned task so the server continues accepting + /// subsequent connections until the listener or returned task is dropped or aborted. + /// + /// # Parameters + /// + /// - `status_line`: the HTTP status line to use (for example, `"HTTP/1.1 200 OK"` or `"HTTP/1.1 500 Internal Server Error"`). + /// - `body`: the response body to include; `Content-Length` and `Content-Type: application/json` are added automatically. + /// + /// # Returns + /// + /// A tuple of `(addr, handle)` where: + /// - `addr` is the socket address the server is bound to (useful for issuing HTTP requests to the server). + /// - `handle` is a `tokio::task::JoinHandle<()>` for the background accept loop; aborting or awaiting it will stop the server. + /// + /// # Examples + /// + /// ``` + /// #[tokio::test] + /// async fn spawn_repeating_health_server_example() { + /// let (addr, handle) = spawn_repeating_health_server("HTTP/1.1 200 OK", "{\"status\":\"ok\"}").await; + /// // addr can be used to make requests to the server (for example via reqwest). + /// // Stop the server when done. + /// handle.abort(); + /// let _ = handle.await; + /// } + /// ``` + async fn spawn_repeating_health_server( + status_line: &str, + body: &str, + ) -> (std::net::SocketAddr, tokio::task::JoinHandle<()>) { + let listener = TcpListener::bind("127.0.0.1:0") + .await + .expect("bind repeating server"); + let addr = listener.local_addr().expect("read addr"); + let body = body.to_string(); + let response = Arc::new(format!( + "HTTP/1.1 {status_line}\r\nContent-Type: application/json\r\nContent-Length: {}\r\n\r\n{}", + body.len(), body + )); + + let handle = tokio::spawn(async move { + loop { + let (mut socket, _) = match listener.accept().await { + Ok(conn) => conn, + Err(_) => break, + }; + let resp = response.clone(); + tokio::spawn(async move { + let mut buf = [0u8; 1024]; + let _ = socket.read(&mut buf).await; + let _ = socket.write_all(resp.as_bytes()).await; + }); + } + }); + + (addr, handle) + } + + /// Spawns a TCP server that accepts incoming connections and immediately closes them. + + /// + + /// The server listens on 127.0.0.1 on an OS-assigned port and runs until its listener is dropped + + /// or the returned task handle is aborted. This is useful for simulating a health endpoint that + + /// closes connections (causing request errors) in tests. + + /// + + /// # Examples + + /// + + /// ``` + + /// # tokio::test + + /// # async fn example() { + + /// let (addr, handle) = spawn_closing_health_server().await; + + /// // connecting succeeds but the server will close the connection immediately + + /// let _stream = tokio::net::TcpStream::connect(addr).await.unwrap(); + + /// // stop the server task + + /// handle.abort(); + + /// # } + + /// ``` + async fn spawn_closing_health_server() -> (std::net::SocketAddr, tokio::task::JoinHandle<()>) { + let listener = TcpListener::bind("127.0.0.1:0") + .await + .expect("bind closing server"); + let addr = listener.local_addr().expect("read addr"); + let handle = tokio::spawn(async move { + loop { + let (socket, _) = match listener.accept().await { + Ok(conn) => conn, + Err(_) => break, + }; + drop(socket); + } + }); + (addr, handle) + } + + #[tokio::test] + async fn test_health_monitor_check_sets_running_on_success() { + let (addr, handle) = spawn_health_server("200 OK", r#"{"status":"ok"}"#).await; + let challenges = Arc::new(RwLock::new(HashMap::new())); + let mut instance = sample_instance(ContainerStatus::Starting); + instance.endpoint = format!("http://{}", addr); + let challenge_id = instance.challenge_id; + challenges.write().insert(challenge_id, instance); + + let monitor = HealthMonitor::new(challenges.clone(), Duration::from_secs(5)); + let status = monitor + .check(&challenge_id) + .await + .expect("status should be returned"); + + assert_eq!(status, ContainerStatus::Running); + assert_eq!( + challenges + .read() + .get(&challenge_id) + .expect("challenge present") + .status, + ContainerStatus::Running + ); + + handle.await.expect("server finished"); + } + + #[tokio::test] + async fn test_health_monitor_check_marks_unhealthy_on_failure() { + let (addr, handle) = + spawn_health_server("500 Internal Server Error", r#"{"status":"error"}"#).await; + let challenges = Arc::new(RwLock::new(HashMap::new())); + let mut instance = sample_instance(ContainerStatus::Running); + instance.endpoint = format!("http://{}", addr); + let challenge_id = instance.challenge_id; + challenges.write().insert(challenge_id, instance); + + let monitor = HealthMonitor::new(challenges.clone(), Duration::from_secs(5)); + let status = monitor + .check(&challenge_id) + .await + .expect("status should be returned"); + + assert_eq!(status, ContainerStatus::Unhealthy); + assert_eq!( + challenges + .read() + .get(&challenge_id) + .expect("challenge present") + .status, + ContainerStatus::Unhealthy + ); + + handle.await.expect("server finished"); + } + + #[tokio::test] + async fn test_health_monitor_start_updates_status() { + let (addr, handle) = spawn_repeating_health_server("200 OK", r#"{"status":"ok"}"#).await; + let challenges = Arc::new(RwLock::new(HashMap::new())); + let mut instance = sample_instance(ContainerStatus::Starting); + instance.endpoint = format!("http://{}", addr); + let challenge_id = instance.challenge_id; + challenges.write().insert(challenge_id, instance); + + let monitor = HealthMonitor::new(challenges.clone(), Duration::from_millis(10)); + monitor.start().await.expect("monitor starts"); + + let deadline = Instant::now() + Duration::from_millis(500); + loop { + if challenges + .read() + .get(&challenge_id) + .map(|inst| inst.status == ContainerStatus::Running) + .unwrap_or(false) + { + break; + } + + if Instant::now() > deadline { + panic!("status never updated to running"); + } + + tokio::time::sleep(Duration::from_millis(20)).await; + } + + handle.abort(); + } + + #[tokio::test] + async fn test_health_monitor_start_marks_unhealthy_on_failed_response() { + let (addr, handle) = + spawn_repeating_health_server("500 Internal Server Error", r#"{"status":"error"}"#) + .await; + let challenges = Arc::new(RwLock::new(HashMap::new())); + let mut instance = sample_instance(ContainerStatus::Running); + instance.endpoint = format!("http://{}", addr); + let challenge_id = instance.challenge_id; + challenges.write().insert(challenge_id, instance); + + let monitor = HealthMonitor::new(challenges.clone(), Duration::from_millis(10)); + monitor.start().await.expect("monitor starts"); + + let deadline = Instant::now() + Duration::from_millis(500); + loop { + if challenges + .read() + .get(&challenge_id) + .map(|inst| inst.status == ContainerStatus::Unhealthy) + .unwrap_or(false) + { + break; + } + + if Instant::now() > deadline { + panic!("status never updated to unhealthy"); + } + + tokio::time::sleep(Duration::from_millis(20)).await; + } + + handle.abort(); + } + + #[tokio::test] + async fn test_health_monitor_start_handles_request_error() { + let (addr, handle) = spawn_closing_health_server().await; + let challenges = Arc::new(RwLock::new(HashMap::new())); + let mut instance = sample_instance(ContainerStatus::Running); + instance.endpoint = format!("http://{}", addr); + let challenge_id = instance.challenge_id; + challenges.write().insert(challenge_id, instance); + + let monitor = HealthMonitor::new(challenges.clone(), Duration::from_millis(10)); + monitor.start().await.expect("monitor starts"); + + let deadline = Instant::now() + Duration::from_millis(500); + loop { + if challenges + .read() + .get(&challenge_id) + .map(|inst| inst.status == ContainerStatus::Unhealthy) + .unwrap_or(false) + { + break; + } + + if Instant::now() > deadline { + panic!("status never updated after request error"); + } + + tokio::time::sleep(Duration::from_millis(20)).await; + } + + handle.abort(); + } + + #[tokio::test] + async fn test_health_monitor_check_treats_parse_error_as_healthy() { + let (addr, handle) = spawn_health_server("200 OK", "not-json").await; + let challenges = Arc::new(RwLock::new(HashMap::new())); + let mut instance = sample_instance(ContainerStatus::Starting); + instance.endpoint = format!("http://{}", addr); + let challenge_id = instance.challenge_id; + challenges.write().insert(challenge_id, instance); + + let monitor = HealthMonitor::new(challenges.clone(), Duration::from_secs(5)); + let status = monitor.check(&challenge_id).await.expect("status returned"); + + assert_eq!(status, ContainerStatus::Running); + assert_eq!( + challenges + .read() + .get(&challenge_id) + .expect("challenge present") + .status, + ContainerStatus::Running + ); + + handle.await.expect("server finished"); + } +} \ No newline at end of file diff --git a/crates/challenge-orchestrator/src/lib.rs b/crates/challenge-orchestrator/src/lib.rs index 466ba5cae..c6842fc02 100644 --- a/crates/challenge-orchestrator/src/lib.rs +++ b/crates/challenge-orchestrator/src/lib.rs @@ -30,11 +30,10 @@ pub use backend::{ SecureBackend, DEFAULT_BROKER_SOCKET, }; pub use config::*; -pub use docker::{CleanupResult, DockerClient}; +pub use docker::{ChallengeDocker, CleanupResult, DockerClient}; pub use evaluator::*; pub use health::*; pub use lifecycle::*; - use parking_lot::RwLock; use platform_core::ChallengeId; use std::collections::HashMap; @@ -43,7 +42,7 @@ use std::sync::Arc; /// Main orchestrator managing all challenge containers #[allow(dead_code)] pub struct ChallengeOrchestrator { - docker: DockerClient, + docker: Arc, challenges: Arc>>, health_monitor: HealthMonitor, config: OrchestratorConfig, @@ -53,11 +52,56 @@ pub struct ChallengeOrchestrator { pub const PLATFORM_NETWORK: &str = "platform-network"; impl ChallengeOrchestrator { + /// Creates and initializes a new ChallengeOrchestrator from the provided configuration. + /// + /// In tests, if a test Docker client has been injected, that client will be used instead of + /// auto-detecting the host Docker environment. Otherwise the function auto-detects a Docker + /// client, ensures the platform network is available, and bootstraps the orchestrator. + /// + /// # Examples + /// + /// ``` + /// # tokio_test::block_on(async { + /// let config = OrchestratorConfig::default(); + /// let orch = crate::ChallengeOrchestrator::new(config).await.unwrap(); + /// assert!(orch.list_challenges().is_empty()); + /// # }); + /// ``` pub async fn new(config: OrchestratorConfig) -> anyhow::Result { + #[cfg(test)] + if let Some(docker) = Self::take_test_docker_client() { + return Self::bootstrap_with_docker(docker, config).await; + } + // Auto-detect the network from the validator container // This ensures challenge containers are on the same network as the validator let docker = DockerClient::connect_auto_detect().await?; + Self::bootstrap_with_docker(docker, config).await + } + + /// Initializes a ChallengeOrchestrator using the provided Docker client and configuration. + /// + /// Ensures the platform network exists and attempts to connect the validator container to that + /// network (a failure to connect is logged as a warning), then constructs and returns an + /// orchestrator that uses the given Docker client. + /// + /// # Returns + /// + /// A configured `ChallengeOrchestrator` on success. + /// + /// # Examples + /// + /// ``` + /// # async fn example(docker: crate::docker::DockerClient, config: crate::config::OrchestratorConfig) -> anyhow::Result<()> { + /// let orchestrator = crate::orchestrator::bootstrap_with_docker(docker, config).await?; + /// // use `orchestrator`... + /// # Ok(()) } + /// ``` + async fn bootstrap_with_docker( + docker: DockerClient, + config: OrchestratorConfig, + ) -> anyhow::Result { // Ensure the detected network exists (creates it if running outside Docker) docker.ensure_network().await?; @@ -67,6 +111,65 @@ impl ChallengeOrchestrator { tracing::warn!("Could not connect validator to platform network: {}", e); } + Self::with_docker(docker, config).await + } + + #[cfg(test)] + fn test_docker_client_slot() -> &'static std::sync::Mutex> { + use std::sync::{Mutex, OnceLock}; + static SLOT: OnceLock>> = OnceLock::new(); + SLOT.get_or_init(|| Mutex::new(None)) + } + + /// Removes and returns the test Docker client stored in the global test slot. + /// + /// The stored client is taken (removed) from the slot so subsequent calls will return `None`. + /// + /// # Examples + /// + /// ``` + /// // set_test_docker_client is available in the test-only API + /// let client = DockerClient::new_for_testing(); + /// set_test_docker_client(client); + /// assert!(take_test_docker_client().is_some()); + /// assert!(take_test_docker_client().is_none()); + /// ``` + #[cfg(test)] + fn take_test_docker_client() -> Option { + Self::test_docker_client_slot().lock().unwrap().take() + } + + #[cfg(test)] + pub(crate) fn set_test_docker_client(docker: DockerClient) { + Self::test_docker_client_slot() + .lock() + .unwrap() + .replace(docker); + } + + /// Creates a ChallengeOrchestrator that uses the provided Docker implementation. + /// + /// The returned orchestrator is initialized with an empty challenge registry and a + /// HealthMonitor configured from `config`. + /// + /// # Examples + /// + /// ``` + /// # use platform::orchestrator::{with_docker, OrchestratorConfig}; + /// # use platform::docker::TestDocker; + /// # tokio_test::block_on(async { + /// let config = OrchestratorConfig::default(); + /// let docker = TestDocker::new(); + /// let orchestrator = with_docker(docker, config).await.unwrap(); + /// # }); + /// ``` + /// + /// A ChallengeOrchestrator on success. + pub async fn with_docker( + docker: impl ChallengeDocker + 'static, + config: OrchestratorConfig, + ) -> anyhow::Result { + let docker = Arc::new(docker); let challenges = Arc::new(RwLock::new(HashMap::new())); let health_monitor = HealthMonitor::new(challenges.clone(), config.health_check_interval); @@ -275,9 +378,17 @@ impl ChallengeOrchestrator { Ok(result) } - /// Get the Docker client for direct operations - pub fn docker(&self) -> &DockerClient { - &self.docker + /// Access the orchestrator's Docker client for direct container operations. + /// + /// # Examples + /// + /// ``` + /// // Obtain an orchestrator instance, then get a reference to its Docker client: + /// // let orchestrator = /* ChallengeOrchestrator::with_docker(...) or other constructor */; + /// // let client = orchestrator.docker(); + /// ``` + pub fn docker(&self) -> &dyn ChallengeDocker { + self.docker.as_ref() } } @@ -299,3 +410,1155 @@ pub enum ContainerStatus { Unhealthy, Stopped, } + +#[cfg(test)] +mod tests { + use super::*; + use crate::docker::DockerBridge; + use async_trait::async_trait; + use bollard::container::{ + Config, CreateContainerOptions, InspectContainerOptions, ListContainersOptions, LogOutput, + LogsOptions, RemoveContainerOptions, StartContainerOptions, StopContainerOptions, + }; + use bollard::errors::Error as DockerError; + use bollard::image::CreateImageOptions; + use bollard::models::{ + ContainerCreateResponse, ContainerInspectResponse, ContainerSummary, CreateImageInfo, + EndpointSettings, Network, NetworkSettings, + }; + use bollard::network::{ConnectNetworkOptions, CreateNetworkOptions, ListNetworksOptions}; + use bollard::volume::CreateVolumeOptions; + use chrono::Utc; + use futures::{stream, Stream}; + use platform_core::ChallengeId; + use std::collections::HashMap; + use std::pin::Pin; + use std::sync::atomic::{AtomicUsize, Ordering}; + use std::sync::{Arc, Mutex}; + + #[derive(Clone, Default)] + struct TestDocker { + inner: Arc, + } + + struct TestDockerInner { + operations: Mutex>, + cleanup_result: Mutex, + cleanup_calls: Mutex)>>, + next_container_id: AtomicUsize, + } + + impl Default for TestDockerInner { + /// Creates a default test Docker client with empty recorded operations, a default cleanup result, + /// no recorded cleanup calls, and the next container id initialized to 1. + /// + /// # Examples + /// + /// ``` + /// let _client = TestDocker::default(); + /// ``` + fn default() -> Self { + Self { + operations: Mutex::new(Vec::new()), + cleanup_result: Mutex::new(CleanupResult::default()), + cleanup_calls: Mutex::new(Vec::new()), + next_container_id: AtomicUsize::new(1), + } + } + } + + impl TestDocker { + /// Appends an operation entry to the internal operations log. + /// + /// The provided `entry` is converted into a `String` and pushed onto the internal + /// operations vector protected by a mutex. + /// + /// # Examples + /// + /// ``` + /// recorder.record("pull:image"); + /// ``` + fn record(&self, entry: impl Into) { + self.inner.operations.lock().unwrap().push(entry.into()); + } + + /// Retrieve the recorded operations log. + /// + /// Returns a cloned list of operation entries (each entry is a `String`) in the order they were recorded. + /// + /// # Examples + /// + /// ``` + /// // `mock` is an instance with an internal operations log. + /// let ops: Vec = mock.operations(); + /// // inspect or assert on recorded operations + /// assert!(ops.iter().all(|s| !s.is_empty())); + /// ``` + fn operations(&self) -> Vec { + self.inner.operations.lock().unwrap().clone() + } + + /// Update the stored cleanup result used by the orchestrator. + /// + /// The provided `result` replaces the current cached `CleanupResult` + /// held inside the orchestrator's internal state. + /// + /// # Examples + /// + /// ``` + /// // Replace the cached cleanup result with a new value. + /// orchestrator.set_cleanup_result(result); + /// ``` + fn set_cleanup_result(&self, result: CleanupResult) { + *self.inner.cleanup_result.lock().unwrap() = result; + } + + /// Returns the recorded cleanup calls made to the component. + /// + /// The returned vector contains tuples in the form `(prefix, max_age_minutes, exclusions)`, + /// where `prefix` is the container name prefix used for cleanup, `max_age_minutes` is the + /// maximum age (in minutes) used to select stale containers, and `exclusions` lists + /// container name substrings to exclude from cleanup. + /// + /// # Examples + /// + /// ``` + /// // `orchestrator` is a test instance that records cleanup calls. + /// let calls = orchestrator.cleanup_calls(); + /// // Each element is (prefix, max_age_minutes, exclusions) + /// for (prefix, age, excludes) in calls { + /// println!("prefix={} age={} excludes={:?}", prefix, age, excludes); + /// } + /// ``` + fn cleanup_calls(&self) -> Vec<(String, u64, Vec)> { + self.inner.cleanup_calls.lock().unwrap().clone() + } + + /// Creates a synthetic ChallengeInstance for testing based on the given container configuration. + /// + /// The returned instance uses the config's `challenge_id` and `docker_image`. The `container_id` + /// and `endpoint` include an incrementing index to ensure uniqueness; `started_at` is set to the + /// current UTC time and `status` is `Running`. + /// + /// # Examples + /// + /// ``` + /// // Construct a minimal config with the fields required by `next_instance`. + /// // Adjust field names as needed to match the real `ChallengeContainerConfig` in tests. + /// let config = ChallengeContainerConfig { + /// challenge_id: "test-chal".into(), + /// docker_image: "example/image:latest".to_string(), + /// ..Default::default() + /// }; + /// let td = TestDocker::default(); + /// let inst = td.next_instance(&config); + /// assert!(inst.container_id.starts_with("container-test-chal-")); + /// assert_eq!(inst.image, "example/image:latest"); + /// assert_eq!(inst.status, ContainerStatus::Running); + /// ``` + fn next_instance(&self, config: &ChallengeContainerConfig) -> ChallengeInstance { + let idx = self.inner.next_container_id.fetch_add(1, Ordering::SeqCst); + let id_str = config.challenge_id.to_string(); + ChallengeInstance { + challenge_id: config.challenge_id, + container_id: format!("container-{id_str}-{idx}"), + image: config.docker_image.clone(), + endpoint: format!("http://{id_str}:{idx}"), + started_at: Utc::now(), + status: ContainerStatus::Running, + } + } + } + + #[async_trait] + impl ChallengeDocker for TestDocker { + /// Pulls the specified Docker image and records the pull operation. + /// + /// The method records a "pull:" operation and returns `Ok(())` on success. + /// + /// # Examples + /// + /// ``` + /// // Given a Docker client `client` that implements `pull_image`, + /// // the simplest usage is: + /// # async fn example(client: &impl std::ops::Deref) {} + /// // client.pull_image("alpine:latest").await.unwrap(); + /// ``` + async fn pull_image(&self, image: &str) -> anyhow::Result<()> { + self.record(format!("pull:{image}")); + Ok(()) + } + + /// Starts a challenge container from the given container configuration and returns the resulting instance. + /// + /// # Examples + /// + /// ``` + /// // Given a Docker-like client `docker` and a `config` for a challenge: + /// let instance = tokio_test::block_on(async { docker.start_challenge(&config).await.unwrap() }); + /// assert_eq!(instance.challenge_id, config.challenge_id); + /// ``` + async fn start_challenge( + &self, + config: &ChallengeContainerConfig, + ) -> anyhow::Result { + self.record(format!("start:{}", config.challenge_id)); + Ok(self.next_instance(config)) + } + + /// Stop the container identified by `container_id`. + /// + /// Records the stop operation for the target container and attempts to stop it via the Docker backend. + /// + /// # Parameters + /// + /// - `container_id`: Identifier of the container to stop. + /// + /// # Returns + /// + /// `Ok(())` if the stop operation was recorded and completed successfully, `Err` if stopping the container failed. + /// + /// # Examples + /// + /// ``` + /// # async fn example(client: &impl crate::docker::ChallengeDocker) { + /// client.stop_container("container-id").await.unwrap(); + /// # } + /// ``` + async fn stop_container(&self, container_id: &str) -> anyhow::Result<()> { + self.record(format!("stop:{container_id}")); + Ok(()) + } + + /// Record that a container removal was requested for the given container id. + /// + /// This mock implementation appends the operation string `remove:` to the + /// test operation log so tests can assert that a removal was attempted. + /// + /// # Examples + /// + /// ``` + /// // assuming `td` is a test docker instance with an `operations: Mutex>` + /// td.remove_container("abc123").await.unwrap(); + /// assert_eq!(td.operations.lock().unwrap().last().unwrap(), "remove:abc123"); + /// ``` + async fn remove_container(&self, container_id: &str) -> anyhow::Result<()> { + self.record(format!("remove:{container_id}")); + Ok(()) + } + + /// Check whether a container with the given ID is running. + /// + /// # Arguments + /// + /// * `container_id` - The identifier of the container to query. + /// + /// # Returns + /// + /// `true` if the container is running, `false` otherwise. + /// + /// # Examples + /// + /// ```ignore + /// let running = orchestrator.is_container_running("container-123").await.unwrap(); + /// ``` + async fn is_container_running(&self, container_id: &str) -> anyhow::Result { + self.record(format!("is_running:{container_id}")); + Ok(true) + } + + /// Fetches the logs for the specified container and returns the last `tail` lines as a string. + /// + /// The `tail` parameter limits the returned content to the most-recent `tail` lines of the container log. + /// + /// # Examples + /// + /// ``` + /// # async fn example_usage() { + /// // `client` is an instance that exposes `get_logs(&self, container_id: &str, tail: usize)`. + /// let logs = client.get_logs("container-123", 50).await.unwrap(); + /// assert!(logs.contains("logs-container-123")); + /// # } + /// ``` + async fn get_logs(&self, container_id: &str, tail: usize) -> anyhow::Result { + self.record(format!("logs:{container_id}:{tail}")); + Ok(format!("logs-{container_id}")) + } + + /// List IDs of current challenge containers. + /// + /// # Examples + /// + /// ```rust + /// # async fn example() { + /// let docker = TestDocker::new(); + /// let ids = docker.list_challenge_containers().await.unwrap(); + /// assert!(ids.is_empty()); + /// # } + /// ``` + async fn list_challenge_containers(&self) -> anyhow::Result> { + self.record("list_containers".to_string()); + Ok(Vec::new()) + } + + async fn cleanup_stale_containers( + &self, + prefix: &str, + max_age_minutes: u64, + exclude_patterns: &[&str], + ) -> anyhow::Result { + self.record(format!("cleanup:{prefix}:{max_age_minutes}")); + self.inner.cleanup_calls.lock().unwrap().push(( + prefix.to_string(), + max_age_minutes, + exclude_patterns.iter().map(|s| s.to_string()).collect(), + )); + Ok(self.inner.cleanup_result.lock().unwrap().clone()) + } + } + + /// Creates a `ChallengeContainerConfig` for the given challenge ID and Docker image using sensible defaults. + /// + /// The returned config sets the challenge name to `challenge-{challenge_id}`, uses the provided image, + /// and applies default resource and runtime settings (mechanism_id 0, emission_weight 1.0, 300s timeout, + /// 1.0 CPU core, 512 MB memory, no GPU). + /// + /// # Examples + /// + /// ``` + /// // Construct a config for a challenge (assumes `ChallengeId` implements `From<&str>`). + /// let id = ChallengeId::from("challenge-1"); + /// let cfg = sample_config_with_id(id, "repo.example/challenge:latest"); + /// assert_eq!(cfg.docker_image, "repo.example/challenge:latest"); + /// ``` + fn sample_config_with_id(challenge_id: ChallengeId, image: &str) -> ChallengeContainerConfig { + let id_str = challenge_id.to_string(); + ChallengeContainerConfig { + challenge_id, + name: format!("challenge-{id_str}"), + docker_image: image.to_string(), + mechanism_id: 0, + emission_weight: 1.0, + timeout_secs: 300, + cpu_cores: 1.0, + memory_mb: 512, + gpu_required: false, + } + } + + /// Creates a ChallengeContainerConfig for the given image using a newly generated ChallengeId. + /// + /// The returned config is populated for the provided Docker image and uses a freshly generated + /// ChallengeId produced by `ChallengeId::new()`. + /// + /// # Examples + /// + /// ``` + /// let cfg = sample_config("example/image:latest"); + /// assert_eq!(cfg.docker_image, "example/image:latest"); + /// ``` + fn sample_config(image: &str) -> ChallengeContainerConfig { + sample_config_with_id(ChallengeId::new(), image) + } + + /// Create a ChallengeOrchestrator backed by the provided test Docker implementation. + /// + /// # Examples + /// + /// ``` + /// # use crate::tests::TestDocker; + /// # use crate::orchestrator::orchestrator_with_mock; + /// #[tokio::test] + /// async fn create_orchestrator_with_test_docker() { + /// let docker = TestDocker::default(); + /// let orch = orchestrator_with_mock(docker).await; + /// assert!(orch.list_challenges().is_empty()); + /// } + /// ``` + async fn orchestrator_with_mock(docker: TestDocker) -> ChallengeOrchestrator { + ChallengeOrchestrator::with_docker(docker, OrchestratorConfig::default()) + .await + .expect("build orchestrator") + } + + #[tokio::test] + async fn test_add_challenge_registers_instance() { + let docker = TestDocker::default(); + let orchestrator = orchestrator_with_mock(docker.clone()).await; + let config = sample_config("ghcr.io/platformnetwork/challenge:v1"); + let challenge_id = config.challenge_id; + + orchestrator + .add_challenge(config.clone()) + .await + .expect("add challenge"); + + let stored = orchestrator + .get_challenge(&challenge_id) + .expect("challenge stored"); + assert_eq!(stored.image, config.docker_image); + assert_eq!(orchestrator.list_challenges(), vec![challenge_id]); + + let ops = docker.operations(); + assert!(ops.contains(&format!("pull:{}", config.docker_image))); + assert!(ops.contains(&format!("start:{}", challenge_id))); + } + + #[tokio::test] + async fn test_update_challenge_restarts_with_new_image() { + let docker = TestDocker::default(); + let orchestrator = orchestrator_with_mock(docker.clone()).await; + let mut config = sample_config("ghcr.io/platformnetwork/challenge:v1"); + let challenge_id = config.challenge_id; + + orchestrator + .add_challenge(config.clone()) + .await + .expect("initial add"); + let initial_instance = orchestrator + .get_challenge(&challenge_id) + .expect("initial instance"); + + config.docker_image = "ghcr.io/platformnetwork/challenge:v2".into(); + orchestrator + .update_challenge(config.clone()) + .await + .expect("update succeeds"); + + let updated = orchestrator + .get_challenge(&challenge_id) + .expect("updated instance"); + assert_eq!(updated.image, config.docker_image); + assert_ne!(updated.container_id, initial_instance.container_id); + + let ops = docker.operations(); + assert!(ops + .iter() + .any(|op| op == &format!("stop:{}", initial_instance.container_id))); + assert!(ops + .iter() + .any(|op| op == &format!("pull:{}", config.docker_image))); + } + + #[tokio::test] + async fn test_remove_challenge_stops_and_removes_container() { + let docker = TestDocker::default(); + let orchestrator = orchestrator_with_mock(docker.clone()).await; + let config = sample_config("ghcr.io/platformnetwork/challenge:remove"); + let challenge_id = config.challenge_id; + + orchestrator + .add_challenge(config) + .await + .expect("added challenge"); + let container_id = orchestrator + .get_challenge(&challenge_id) + .unwrap() + .container_id; + + orchestrator + .remove_challenge(challenge_id) + .await + .expect("removed challenge"); + assert!(orchestrator.get_challenge(&challenge_id).is_none()); + + let ops = docker.operations(); + assert!(ops.contains(&format!("stop:{container_id}"))); + assert!(ops.contains(&format!("remove:{container_id}"))); + } + + #[tokio::test] + async fn test_refresh_challenge_repulls_image() { + let docker = TestDocker::default(); + let orchestrator = orchestrator_with_mock(docker.clone()).await; + let config = sample_config("ghcr.io/platformnetwork/challenge:refresh"); + let challenge_id = config.challenge_id; + + orchestrator + .add_challenge(config.clone()) + .await + .expect("added challenge"); + let initial = orchestrator + .get_challenge(&challenge_id) + .expect("initial instance"); + + orchestrator + .refresh_challenge(challenge_id) + .await + .expect("refresh succeeds"); + let refreshed = orchestrator + .get_challenge(&challenge_id) + .expect("refreshed instance"); + + assert_eq!(refreshed.image, initial.image); + assert_ne!(refreshed.container_id, initial.container_id); + + let ops = docker.operations(); + let pull_count = ops + .iter() + .filter(|op| *op == &format!("pull:{}", initial.image)) + .count(); + assert_eq!(pull_count, 2, "pull once for add, once for refresh"); + } + + #[tokio::test] + async fn test_sync_challenges_handles_all_paths() { + let docker = TestDocker::default(); + let orchestrator = orchestrator_with_mock(docker.clone()).await; + let update_config = sample_config("ghcr.io/platformnetwork/challenge:update-v1"); + let remove_config = sample_config("ghcr.io/platformnetwork/challenge:remove-v1"); + let update_id = update_config.challenge_id; + let remove_id = remove_config.challenge_id; + + orchestrator + .add_challenge(update_config.clone()) + .await + .expect("added update target"); + orchestrator + .add_challenge(remove_config.clone()) + .await + .expect("added removal target"); + + let remove_container_id = orchestrator.get_challenge(&remove_id).unwrap().container_id; + + let new_id = ChallengeId::new(); + let desired = vec![ + sample_config_with_id(update_id, "ghcr.io/platformnetwork/challenge:update-v2"), + sample_config_with_id(new_id, "ghcr.io/platformnetwork/challenge:new"), + ]; + + orchestrator + .sync_challenges(&desired) + .await + .expect("sync succeeds"); + + let ids = orchestrator.list_challenges(); + assert!(ids.contains(&update_id)); + assert!(ids.contains(&new_id)); + assert!(!ids.contains(&remove_id)); + + let ops = docker.operations(); + assert!(ops.contains(&format!("stop:{remove_container_id}"))); + assert!(ops.contains(&format!("remove:{remove_container_id}"))); + assert!(ops + .iter() + .any(|op| op == &"pull:ghcr.io/platformnetwork/challenge:update-v2".to_string())); + assert!(ops + .iter() + .any(|op| op == &"pull:ghcr.io/platformnetwork/challenge:new".to_string())); + } + + #[tokio::test] + async fn test_cleanup_stale_task_containers_propagates_result() { + let docker = TestDocker::default(); + docker.set_cleanup_result(CleanupResult { + total_found: 3, + removed: 2, + errors: vec!["dang".into()], + }); + let orchestrator = orchestrator_with_mock(docker.clone()).await; + + let result = orchestrator + .cleanup_stale_task_containers() + .await + .expect("cleanup ok"); + assert_eq!(result.total_found, 3); + assert_eq!(result.removed, 2); + assert_eq!(result.errors, vec!["dang".to_string()]); + + let calls = docker.cleanup_calls(); + assert_eq!(calls.len(), 1); + let (prefix, max_age, excludes) = &calls[0]; + assert_eq!(prefix, "term-challenge-"); + assert_eq!(*max_age, 120); + let expected: Vec = vec![ + "challenge-term-challenge".to_string(), + "platform-".to_string(), + ]; + assert_eq!(excludes, &expected); + } + + #[tokio::test] + async fn test_refresh_all_challenges_refreshes_each_container() { + let docker = TestDocker::default(); + let orchestrator = orchestrator_with_mock(docker.clone()).await; + let config_a = sample_config("ghcr.io/platformnetwork/challenge:refresh-a"); + let config_b = sample_config("ghcr.io/platformnetwork/challenge:refresh-b"); + let id_a = config_a.challenge_id; + let id_b = config_b.challenge_id; + + orchestrator + .add_challenge(config_a.clone()) + .await + .expect("added first challenge"); + orchestrator + .add_challenge(config_b.clone()) + .await + .expect("added second challenge"); + + let first_initial = orchestrator + .get_challenge(&id_a) + .expect("first challenge present") + .container_id; + let second_initial = orchestrator + .get_challenge(&id_b) + .expect("second challenge present") + .container_id; + + orchestrator + .refresh_all_challenges() + .await + .expect("refresh all succeeds"); + + let first_refreshed = orchestrator + .get_challenge(&id_a) + .expect("first challenge refreshed") + .container_id; + let second_refreshed = orchestrator + .get_challenge(&id_b) + .expect("second challenge refreshed") + .container_id; + + assert_ne!(first_initial, first_refreshed); + assert_ne!(second_initial, second_refreshed); + + let ops = docker.operations(); + assert!(ops.contains(&format!("stop:{first_initial}"))); + assert!(ops.contains(&format!("stop:{second_initial}"))); + } + + #[tokio::test] + async fn test_start_launches_health_monitor() { + let orchestrator = orchestrator_with_mock(TestDocker::default()).await; + orchestrator + .start() + .await + .expect("health monitor start succeeds"); + } + + #[tokio::test] + async fn test_evaluator_method_returns_shared_state() { + let docker = TestDocker::default(); + let orchestrator = orchestrator_with_mock(docker).await; + let config = sample_config("ghcr.io/platformnetwork/challenge:evaluator"); + let challenge_id = config.challenge_id; + + orchestrator + .add_challenge(config) + .await + .expect("challenge added"); + + let evaluator = orchestrator.evaluator(); + let ids: Vec<_> = evaluator + .list_challenges() + .into_iter() + .map(|status| status.challenge_id) + .collect(); + + assert_eq!(ids, vec![challenge_id]); + } + + #[tokio::test] + async fn test_docker_method_exposes_underlying_client() { + let docker = TestDocker::default(); + let orchestrator = orchestrator_with_mock(docker.clone()).await; + + orchestrator + .docker() + .list_challenge_containers() + .await + .expect("list call succeeds"); + + let ops = docker.operations(); + assert!(ops.contains(&"list_containers".to_string())); + } + + /// Ensures `ChallengeOrchestrator::new` picks up a test Docker client injected via `set_test_docker_client`. + /// + /// This test verifies that when a test Docker client is provided through the orchestrator's test slot, + /// constructing a new orchestrator uses that client to create and connect the platform network. + /// + /// # Examples + /// + /// ``` + /// # use std::env; + /// # use crate::{ChallengeOrchestrator, OrchestratorConfig, PLATFORM_NETWORK}; + /// # use crate::tests::{TestDockerBridge, DockerClient}; + /// let bridge = TestDockerBridge::default(); + /// let docker = DockerClient::with_bridge(bridge.clone(), PLATFORM_NETWORK); + /// ChallengeOrchestrator::set_test_docker_client(docker); + /// + /// let original_hostname = env::var("HOSTNAME").ok(); + /// env::set_var("HOSTNAME", "abcdef123456"); + /// + /// let orchestrator = ChallengeOrchestrator::new(OrchestratorConfig::default()) + /// .await + /// .expect("constructed orchestrator"); + /// assert_eq!(bridge.created_networks(), vec![PLATFORM_NETWORK.to_string()]); + /// assert!(bridge + /// .connected_networks() + /// .iter() + /// .any(|name| name == PLATFORM_NETWORK)); + /// + /// drop(orchestrator); + /// + /// if let Some(value) = original_hostname { + /// env::set_var("HOSTNAME", value); + /// } else { + /// env::remove_var("HOSTNAME"); + /// } + /// ``` + #[tokio::test] + async fn test_new_uses_injected_docker_client() { + let bridge = TestDockerBridge::default(); + let docker = DockerClient::with_bridge(bridge.clone(), PLATFORM_NETWORK); + ChallengeOrchestrator::set_test_docker_client(docker); + + let original_hostname = std::env::var("HOSTNAME").ok(); + std::env::set_var("HOSTNAME", "abcdef123456"); + + let orchestrator = ChallengeOrchestrator::new(OrchestratorConfig::default()) + .await + .expect("constructed orchestrator"); + assert_eq!( + bridge.created_networks(), + vec![PLATFORM_NETWORK.to_string()] + ); + assert!(bridge + .connected_networks() + .iter() + .any(|name| name == PLATFORM_NETWORK)); + + drop(orchestrator); + + if let Some(value) = original_hostname { + std::env::set_var("HOSTNAME", value); + } else { + std::env::remove_var("HOSTNAME"); + } + } + + #[derive(Clone, Default)] + struct TestDockerBridge { + inner: Arc, + } + + #[derive(Default)] + struct TestDockerBridgeInner { + available_networks: Mutex>, + created_networks: Mutex>, + connected_networks: Mutex>, + } + + impl TestDockerBridge { + /// Get a clone of the network names that this orchestrator has recorded as created. + /// + /// The returned vector contains the names of networks that were created and stored internally. + /// + /// # Examples + /// + /// ```no_run + /// // Assuming `orch` is an instance of the orchestrator: + /// // let names = orch.created_networks(); + /// // assert!(names.iter().all(|n| !n.is_empty())); + /// ``` + fn created_networks(&self) -> Vec { + self.inner.created_networks.lock().unwrap().clone() + } + + /// Get the current list of connected network names. + /// + /// The returned vector is a snapshot of the orchestrator's connected networks at the time of the call; each entry is an owned network name. + /// + /// # Examples + /// + /// ``` + /// let nets = orchestrator.connected_networks(); + /// // `nets` is a Vec containing network names + /// assert!(nets.iter().all(|n| !n.is_empty())); + /// ``` + fn connected_networks(&self) -> Vec { + self.inner.connected_networks.lock().unwrap().clone() + } + } + + #[async_trait] + impl DockerBridge for TestDockerBridge { + /// Performs a liveness check against the configured Docker backend. + /// + /// # Returns + /// + /// `Ok(())` if the Docker backend is reachable and responsive, `Err(DockerError)` otherwise. + /// + /// # Examples + /// + /// ``` + /// // Synchronously run the async ping in a simple executor for demonstration. + /// // Replace `orchestrator` with a value implementing the method in your code. + /// let _ = futures::executor::block_on(orchestrator.ping()); + /// ``` + async fn ping(&self) -> Result<(), DockerError> { + Ok(()) + } + + /// Returns the client's available Docker networks as a vector of `Network` objects. + /// + /// The `_options` parameter is ignored; the function snapshots the client's internal + /// available_networks and returns one `Network` per name with the `name` field set. + /// + /// # Examples + /// + /// ``` + /// let nets = futures::executor::block_on(client.list_networks(None)).unwrap(); + /// assert!(nets.iter().all(|n| n.name.is_some())); + /// ``` + async fn list_networks( + &self, + _options: Option>, + ) -> Result, DockerError> { + let networks = self.inner.available_networks.lock().unwrap().clone(); + Ok(networks + .into_iter() + .map(|name| Network { + name: Some(name), + ..Default::default() + }) + .collect()) + } + + /// Registers a new network name with the test Docker implementation by adding the provided + /// network name to both the created and available network lists. + /// + /// # Parameters + /// + /// - `options.name`: the network name to create. + /// + /// # Examples + /// + /// ``` + /// use bollard::network::CreateNetworkOptions; + /// let docker = TestDocker::new(); + /// let opts = CreateNetworkOptions { name: "platform-network".to_string(), ..Default::default() }; + /// futures::executor::block_on(docker.create_network(opts)).unwrap(); + /// assert!(docker.inner.available_networks.lock().unwrap().contains(&"platform-network".to_string())); + /// ``` + async fn create_network( + &self, + options: CreateNetworkOptions, + ) -> Result<(), DockerError> { + self.inner + .created_networks + .lock() + .unwrap() + .push(options.name.clone()); + self.inner + .available_networks + .lock() + .unwrap() + .push(options.name); + Ok(()) + } + + /// Builds a ContainerInspectResponse whose NetworkSettings.networks map contains + /// the orchestrator's currently connected network names mapped to default EndpointSettings. + /// + /// # Examples + /// + /// ``` + /// # use futures::executor::block_on; + /// // Assuming `bridge` implements the same method signature as shown. + /// // let resp = block_on(bridge.inspect_container("id", None)).unwrap(); + /// // assert!(resp.network_settings.unwrap().networks.unwrap().contains_key("platform-network")); + /// ``` + async fn inspect_container( + &self, + _id: &str, + _options: Option, + ) -> Result { + let mut map = HashMap::new(); + for name in self + .inner + .connected_networks + .lock() + .unwrap() + .iter() + .cloned() + { + map.insert(name, EndpointSettings::default()); + } + Ok(ContainerInspectResponse { + network_settings: Some(NetworkSettings { + networks: Some(map), + ..Default::default() + }), + ..Default::default() + }) + } + + /// Marks the bridge as connected to the given network and ensures the network is listed as available. + /// + /// The method records that this bridge is connected to `network` and makes sure the network + /// name appears in the bridge's available networks. + /// + /// # Parameters + /// + /// - `network`: Name of the network to connect. + /// + /// # Returns + /// + /// `Ok(())` if the network was recorded successfully, `Err(DockerError)` if an underlying error occurred. + /// + /// # Examples + /// + /// ```no_run + /// // Assuming `bridge` implements `connect_network`. + /// # async fn doc_example>() {} + /// # + /// // Example usage: + /// // bridge.connect_network("platform-network", Default::default()).await.unwrap(); + /// ``` + async fn connect_network( + &self, + network: &str, + _options: ConnectNetworkOptions, + ) -> Result<(), DockerError> { + let mut connected = self.inner.connected_networks.lock().unwrap(); + if !connected.iter().any(|name| name == network) { + connected.push(network.to_string()); + } + let mut available = self.inner.available_networks.lock().unwrap(); + if !available.iter().any(|name| name == network) { + available.push(network.to_string()); + } + Ok(()) + } + + /// Produces a stream of image creation progress and informational messages for a requested image. + /// + /// The stream yields `Result` items describing progress or errors and completes when image creation finishes. + /// + /// # Parameters + /// - `options`: optional parameters that control how the image is created (e.g., image reference, auth), if provided. + /// + /// # Returns + /// A stream that produces progress/info messages (`CreateImageInfo`) wrapped in `Result`; the stream ends when image creation completes. + /// + /// # Examples + /// + /// ``` + /// use futures::StreamExt; + /// + /// // `client` is an instance providing `create_image_stream`. + /// let mut stream = client.create_image_stream(None); + /// // In this implementation the stream may complete without yielding items. + /// let next = futures::executor::block_on(async { stream.next().await }); + /// assert!(next.is_none() || next.unwrap().is_ok()); + /// ``` + fn create_image_stream( + &self, + _options: Option>, + ) -> Pin> + Send>> { + Box::pin(stream::empty::>()) + as Pin> + Send>> + } + + /// Creates a Docker volume from the provided options. + /// + /// In this implementation the call is a no-op and always succeeds. + /// + /// # Examples + /// + /// ``` + /// # async fn example(client: &impl std::marker::Send) { + /// // `options` is typically constructed with the desired volume name and labels. + /// let options = Default::default(); + /// // `create_volume` is async; call from an async context. + /// // Here we ignore the concrete client type for the example. + /// // client.create_volume(options).await.unwrap(); + /// # } + /// ``` + async fn create_volume( + &self, + _options: CreateVolumeOptions, + ) -> Result<(), DockerError> { + Ok(()) + } + + /// Simulates creating a Docker container and returns a fixed test response. + /// + /// This test implementation always returns a `ContainerCreateResponse` with the + /// container id set to `"test-container"` and an empty warnings list. It is + /// intended for use in unit tests or test harnesses that require a predictable + /// create-container result. + /// + /// # Examples + /// + /// ``` + /// # use futures::executor::block_on; + /// # // assume `create_container` is available in scope on a test object `svc` + /// let resp = block_on(svc.create_container(None, Default::default())).unwrap(); + /// assert_eq!(resp.id, "test-container"); + /// assert!(resp.warnings.is_empty()); + /// ``` + async fn create_container( + &self, + _options: Option>, + _config: Config, + ) -> Result { + Ok(ContainerCreateResponse { + id: "test-container".to_string(), + warnings: Vec::new(), + }) + } + + /// Starts the container identified by `id`, applying the provided start options if any. + + /// + + /// `id` is the container identifier to start. `options` configures runtime start parameters + + /// such as entrypoint overrides, environment variables, and networking settings. + + /// + + /// # Returns + + /// + + /// `Ok(())` if the container was started successfully, `Err(DockerError)` if the start operation failed. + + /// + + /// # Examples + + /// + + /// ``` + + /// # use crate::docker::DockerError; + + /// # async fn example(starter: &impl Fn(&str, Option>) -> futures::future::BoxFuture<'_, Result<(), DockerError>>) { + + /// let _ = starter("container-123", None).await; + + /// # } + + /// ``` + async fn start_container( + &self, + _id: &str, + _options: Option>, + ) -> Result<(), DockerError> { + Ok(()) + } + + /// Stops the container with the given id. + /// + /// This implementation is a no-op and reports success without performing any action. + /// + /// # Examples + /// + /// ``` + /// // Callers can await this async operation; here we use a simple executor. + /// // Replace `client` with an actual implementation of the Docker client that provides `stop_container`. + /// # fn example() { + /// let client = /* impl providing async fn stop_container(&self, id: &str, options: Option<()>) -> Result<(), _> */ (); + /// // futures::executor::block_on(client.stop_container("container-id", None)).unwrap(); + /// # } + /// ``` + /// + /// # Returns + /// + /// `Ok(())` on success, or a `DockerError` on failure. + async fn stop_container( + &self, + _id: &str, + _options: Option, + ) -> Result<(), DockerError> { + Ok(()) + } + + /// Removes a container identified by `id` from the Docker runtime. + /// + /// Performs container removal using the provided optional `RemoveContainerOptions`. On success the container is removed and the function returns `Ok(())`. + /// + /// # Examples + /// + /// ```no_run + /// // Example uses local stand-ins for crate types to demonstrate calling the method. + /// type RemoveContainerOptions = (); + /// type DockerError = (); + /// + /// struct Dummy; + /// + /// impl Dummy { + /// async fn remove_container( + /// &self, + /// _id: &str, + /// _options: Option, + /// ) -> Result<(), DockerError> { + /// Ok(()) + /// } + /// } + /// + /// # futures::executor::block_on(async { + /// let client = Dummy; + /// client.remove_container("container-id", None).await.unwrap(); + /// # }); + /// ``` + async fn remove_container( + &self, + _id: &str, + _options: Option, + ) -> Result<(), DockerError> { + Ok(()) + } + + /// Lists Docker containers according to the provided options. + /// + /// The optional `options` argument specifies filters and list parameters (such as all/only-running, + /// name/image filters, or limit) to apply when enumerating containers. + /// + /// # Returns + /// + /// A `Vec` containing a summary entry for each container that matches `options`. + /// + /// # Examples + /// + /// ``` + /// # async fn _example(orchestrator: &impl std::ops::Deref) { + /// let summaries = orchestrator.list_containers(None).await.unwrap(); + /// for s in summaries { + /// println!("container id: {}", s.Id); + /// } + /// # } + /// ``` + async fn list_containers( + &self, + _options: Option>, + ) -> Result, DockerError> { + Ok(Vec::new()) + } + + /// Provides a stream of log frames for the specified container. + /// + /// The stream yields `Result` items representing log frames or errors; + /// this implementation returns an empty stream that yields no items. + /// + /// # Examples + /// + /// ```no_run + /// // `obj` is an instance that implements this method. + /// let stream = obj.logs_stream("container-id", Default::default()); + /// // `stream` implements `futures::Stream>` + /// ``` + fn logs_stream( + &self, + _id: &str, + _options: LogsOptions, + ) -> Pin> + Send>> { + Box::pin(stream::empty::>()) + as Pin> + Send>> + } + } +} \ No newline at end of file diff --git a/crates/challenge-orchestrator/src/lifecycle.rs b/crates/challenge-orchestrator/src/lifecycle.rs index d54704f4e..aa52f40fc 100644 --- a/crates/challenge-orchestrator/src/lifecycle.rs +++ b/crates/challenge-orchestrator/src/lifecycle.rs @@ -1,6 +1,8 @@ //! Container lifecycle management -use crate::{ChallengeContainerConfig, ChallengeInstance, ContainerStatus, DockerClient}; +#[cfg(test)] +use crate::CleanupResult; +use crate::{ChallengeContainerConfig, ChallengeDocker, ChallengeInstance, ContainerStatus}; use parking_lot::RwLock; use platform_core::ChallengeId; use std::collections::HashMap; @@ -9,18 +11,34 @@ use tracing::{error, info}; /// Manages the lifecycle of challenge containers pub struct LifecycleManager { - docker: DockerClient, + docker: Box, challenges: Arc>>, configs: Arc>>, } impl LifecycleManager { + /// Creates a new LifecycleManager using the provided Docker implementation and shared challenges map. + /// + /// The returned manager stores the given docker implementation (boxed) and the provided `challenges` map, + /// and initializes an empty, shared `configs` map for tracking container configurations. + /// + /// # Examples + /// + /// ```no_run + /// use std::sync::{Arc, RwLock}; + /// use std::collections::HashMap; + /// + /// // Assume `MyDocker` implements `ChallengeDocker` and `ChallengeId`/`ChallengeInstance` are in scope. + /// let challenges: Arc>> = Arc::new(RwLock::new(HashMap::new())); + /// let docker = MyDocker::new(); + /// let manager = LifecycleManager::new(docker, challenges); + /// ``` pub fn new( - docker: DockerClient, + docker: impl ChallengeDocker + 'static, challenges: Arc>>, ) -> Self { Self { - docker, + docker: Box::new(docker), challenges, configs: Arc::new(RwLock::new(HashMap::new())), } @@ -222,6 +240,10 @@ impl SyncResult { #[cfg(test)] mod tests { use super::*; + use async_trait::async_trait; + use chrono::Utc; + use std::collections::HashMap; + use std::sync::{Arc, Mutex}; #[test] fn test_sync_result_default() { @@ -249,4 +271,514 @@ mod tests { assert!(!result.is_success()); } -} + + #[tokio::test] + async fn test_restart_unhealthy_restarts_only_unhealthy() { + let mock = MockDocker::default(); + let mut manager = + LifecycleManager::new(mock.clone(), Arc::new(RwLock::new(HashMap::new()))); + + let unhealthy_id = ChallengeId::new(); + let healthy_id = ChallengeId::new(); + let unhealthy_container_id = "container-unhealthy"; + let healthy_container_id = "container-healthy"; + + manager.configs.write().insert( + unhealthy_id, + sample_config(unhealthy_id, "ghcr.io/org/unhealthy:1"), + ); + manager.configs.write().insert( + healthy_id, + sample_config(healthy_id, "ghcr.io/org/healthy:1"), + ); + + manager.challenges.write().insert( + unhealthy_id, + sample_instance( + unhealthy_id, + unhealthy_container_id, + "ghcr.io/org/unhealthy:1", + ContainerStatus::Unhealthy, + ), + ); + manager.challenges.write().insert( + healthy_id, + sample_instance( + healthy_id, + healthy_container_id, + "ghcr.io/org/healthy:1", + ContainerStatus::Running, + ), + ); + + let results = manager.restart_unhealthy().await; + + assert_eq!(results.len(), 1); + assert_eq!(results[0].0, unhealthy_id); + assert!(results[0].1.is_ok()); + + let ops = mock.operations(); + assert!(ops + .iter() + .any(|op| op == &format!("stop:{unhealthy_container_id}"))); + assert!(ops + .iter() + .any(|op| op == &format!("remove:{unhealthy_container_id}"))); + assert!(ops + .iter() + .any(|op| op == &format!("start:{}", unhealthy_id.to_string()))); + assert!(!ops + .iter() + .any(|op| op == &format!("stop:{healthy_container_id}"))); + } + + #[tokio::test] + async fn test_sync_handles_add_update_remove() { + let mock = MockDocker::default(); + let challenges = Arc::new(RwLock::new(HashMap::new())); + let mut manager = LifecycleManager::new(mock.clone(), challenges); + + let update_id = ChallengeId::new(); + let remove_id = ChallengeId::new(); + let new_id = ChallengeId::new(); + + manager + .configs + .write() + .insert(update_id, sample_config(update_id, "ghcr.io/org/update:v1")); + manager + .configs + .write() + .insert(remove_id, sample_config(remove_id, "ghcr.io/org/remove:v1")); + + manager.challenges.write().insert( + update_id, + sample_instance( + update_id, + "container-update-old", + "ghcr.io/org/update:v1", + ContainerStatus::Running, + ), + ); + manager.challenges.write().insert( + remove_id, + sample_instance( + remove_id, + "container-remove-old", + "ghcr.io/org/remove:v1", + ContainerStatus::Running, + ), + ); + + let result = manager + .sync(vec![ + sample_config(update_id, "ghcr.io/org/update:v2"), + sample_config(new_id, "ghcr.io/org/new:v1"), + ]) + .await + .expect("sync succeeds"); + + assert_eq!(result.added, vec![new_id]); + assert_eq!(result.updated, vec![update_id]); + assert_eq!(result.removed, vec![remove_id]); + assert!(result.errors.is_empty()); + assert!(result.unchanged.is_empty()); + + let challenges = manager.challenges.read(); + assert!(challenges.contains_key(&update_id)); + assert!(challenges.contains_key(&new_id)); + assert!(!challenges.contains_key(&remove_id)); + drop(challenges); + + let ops = mock.operations(); + assert!(ops.iter().any(|op| op == "pull:ghcr.io/org/update:v2")); + assert!(ops.iter().any(|op| op == "pull:ghcr.io/org/new:v1")); + assert!(ops + .iter() + .any(|op| op == &format!("start:{}", update_id.to_string()))); + assert!(ops + .iter() + .any(|op| op == &format!("start:{}", new_id.to_string()))); + assert!(ops.iter().any(|op| op == "stop:container-update-old")); + assert!(ops.iter().any(|op| op == "remove:container-update-old")); + assert!(ops.iter().any(|op| op == "stop:container-remove-old")); + assert!(ops.iter().any(|op| op == "remove:container-remove-old")); + } + + #[tokio::test] + async fn test_add_records_config_and_instance_state() { + let mock = MockDocker::default(); + let challenges = Arc::new(RwLock::new(HashMap::new())); + let mut manager = LifecycleManager::new(mock.clone(), challenges); + let challenge_id = ChallengeId::new(); + let config = sample_config(challenge_id, "ghcr.io/org/add:v1"); + + manager.add(config.clone()).await.expect("add succeeds"); + + assert!(manager.challenges.read().contains_key(&challenge_id)); + assert!(manager.configs.read().contains_key(&challenge_id)); + + let ops = mock.operations(); + assert!(ops.contains(&format!("pull:{}", config.docker_image))); + assert!(ops.contains(&format!("start:{}", challenge_id))); + } + + #[tokio::test] + async fn test_stop_all_removes_every_challenge() { + let mock = MockDocker::default(); + let challenges = Arc::new(RwLock::new(HashMap::new())); + let mut manager = LifecycleManager::new(mock.clone(), challenges); + + let first_id = ChallengeId::new(); + let second_id = ChallengeId::new(); + + manager + .configs + .write() + .insert(first_id, sample_config(first_id, "ghcr.io/org/first:v1")); + manager + .configs + .write() + .insert(second_id, sample_config(second_id, "ghcr.io/org/second:v1")); + + manager.challenges.write().insert( + first_id, + sample_instance( + first_id, + "container-first", + "ghcr.io/org/first:v1", + ContainerStatus::Running, + ), + ); + manager.challenges.write().insert( + second_id, + sample_instance( + second_id, + "container-second", + "ghcr.io/org/second:v1", + ContainerStatus::Running, + ), + ); + + let results = manager.stop_all().await; + + assert_eq!(results.len(), 2); + assert!(results.iter().all(|(_, res)| res.is_ok())); + assert!(manager.challenges.read().is_empty()); + assert!(manager.configs.read().is_empty()); + + let ops = mock.operations(); + assert!(ops.contains(&"stop:container-first".to_string())); + assert!(ops.contains(&"remove:container-first".to_string())); + assert!(ops.contains(&"stop:container-second".to_string())); + assert!(ops.contains(&"remove:container-second".to_string())); + } + + #[derive(Clone, Default)] + struct MockDocker { + inner: Arc, + } + + #[derive(Default)] + struct MockDockerInner { + operations: Mutex>, + } + + impl MockDocker { + /// Appends an operation description to the internal operations log. + /// + /// # Examples + /// + /// ``` + /// let inner = MockDockerInner::default(); + /// inner.record("start"); + /// assert_eq!(inner.operations.lock().unwrap().last().unwrap(), "start"); + /// ``` + fn record(&self, entry: impl Into) { + self.inner.operations.lock().unwrap().push(entry.into()); + } + + /// Retrieve the recorded docker operations in chronological order. + /// + /// # Returns + /// + /// A `Vec` containing each recorded operation, ordered from oldest to newest. + /// + /// # Examples + /// + /// ``` + /// let mock = MockDocker::new(); + /// // ... perform operations that the mock records ... + /// let ops = mock.operations(); + /// assert!(ops.len() >= 0); + /// ``` + fn operations(&self) -> Vec { + self.inner.operations.lock().unwrap().clone() + } + } + + #[async_trait] + impl ChallengeDocker for MockDocker { + /// Pulls the specified container image into the local Docker cache. + /// + /// The `image` argument is a Docker image reference (for example `"repo/image:tag"` or + /// `"repo/image@sha256:"`). The function ensures the image is available locally, + /// returning an error if the pull fails. + /// + /// # Examples + /// + /// ```no_run + /// // Assuming `docker` implements the method: + /// // docker.pull_image("repo/image:latest").await?; + /// ``` + async fn pull_image(&self, image: &str) -> anyhow::Result<()> { + self.record(format!("pull:{image}")); + Ok(()) + } + + /// Starts a challenge container according to the provided configuration and returns its runtime instance. + /// + /// # Parameters + /// + /// - `config`: configuration describing the challenge container to start. + /// + /// # Returns + /// + /// `ChallengeInstance` representing the started container, with its `status` set to `ContainerStatus::Running`. + /// + /// # Examples + /// + /// ``` + /// # use crate::{sample_config, ChallengeId}; + /// # tokio_test::block_on(async { + /// let config = sample_config(ChallengeId::new(), "example-image:latest"); + /// let docker = crate::tests::MockDocker::default(); + /// let inst = docker.start_challenge(&config).await.unwrap(); + /// assert_eq!(inst.challenge_id, config.challenge_id); + /// assert_eq!(inst.status, crate::ContainerStatus::Running); + /// # }); + /// ``` + async fn start_challenge( + &self, + config: &ChallengeContainerConfig, + ) -> anyhow::Result { + self.record(format!("start:{}", config.challenge_id)); + Ok(sample_instance( + config.challenge_id, + &format!("container-{}", config.challenge_id), + &config.docker_image, + ContainerStatus::Running, + )) + } + + /// Records a stop operation for the container identified by `container_id`. + /// + /// In the mock implementation this does not stop a real container; it records the stop action and returns `Ok(())`. + /// + /// # Examples + /// + /// ``` + /// # use futures::executor::block_on; + /// # // Assume `mock` implements `stop_container` and `operations`. + /// # let mock = MockDocker::default(); + /// block_on(mock.stop_container("container-123")).unwrap(); + /// assert_eq!(mock.operations(), vec!["stop:container-123"]); + /// ``` + async fn stop_container(&self, container_id: &str) -> anyhow::Result<()> { + self.record(format!("stop:{container_id}")); + Ok(()) + } + + /// Record the removal of a container identified by `container_id`. + /// + /// This method records a "remove:{container_id}" operation in the manager's internal operation log + /// and returns success if the recording completes. + /// + /// # Parameters + /// + /// - `container_id`: The identifier of the container to record removal for. + /// + /// # Returns + /// + /// `Ok(())` if the removal record was recorded successfully, an `Err` if recording failed. + /// + /// # Examples + /// + /// ``` + /// struct Dummy; + /// + /// impl Dummy { + /// async fn remove_container(&self, _container_id: &str) -> anyhow::Result<()> { + /// // simulate the real method's contract for the example + /// Ok(()) + /// } + /// } + /// + /// # futures::executor::block_on(async { + /// let d = Dummy; + /// let res = d.remove_container("container-123").await; + /// assert!(res.is_ok()); + /// # }); + /// ``` + async fn remove_container(&self, container_id: &str) -> anyhow::Result<()> { + self.record(format!("remove:{container_id}")); + Ok(()) + } + + /// Checks whether the container with the specified id is currently running. + /// + /// # Returns + /// + /// `true` if the container is running, `false` otherwise. + /// + /// # Examples + /// + /// ```no_run + /// # async fn example(manager: &impl crate::ChallengeDocker) -> anyhow::Result<()> { + /// let is_running = manager.is_container_running("container123").await?; + /// // `is_running` will be `true` or `false` depending on the container state + /// assert!(is_running == true || is_running == false); + /// # Ok(()) + /// # } + /// ``` + async fn is_container_running(&self, container_id: &str) -> anyhow::Result { + self.record(format!("is_running:{container_id}")); + Ok(true) + } + + /// Fetches the logs for the specified container, returning up to the last `tail` lines. + /// + /// # Parameters + /// - `container_id`: Identifier of the container whose logs to retrieve. + /// - `tail`: Number of trailing log lines to return. + /// + /// # Returns + /// The container logs as a `String` on success. + /// + /// # Examples + /// + /// ```no_run + /// // `mgr` is an instance that exposes `get_logs`. + /// let logs = mgr.get_logs("container-123", 100).await.unwrap(); + /// println!("{}", logs); + /// ``` + async fn get_logs(&self, container_id: &str, tail: usize) -> anyhow::Result { + self.record(format!("logs:{container_id}:{tail}")); + Ok(String::new()) + } + + /// Lists identifiers of challenge-related containers currently tracked by the lifecycle manager. + /// + /// # Examples + /// + /// ```no_run + /// # async fn example(manager: &crate::LifecycleManager) -> anyhow::Result<()> { + /// let ids = manager.list_challenge_containers().await?; + /// println!("containers: {:?}", ids); + /// # Ok(()) + /// # } + /// ``` + /// + /// # Returns + /// + /// A vector of container identifier strings on success. + async fn list_challenge_containers(&self) -> anyhow::Result> { + self.record("list_containers".to_string()); + Ok(Vec::new()) + } + + /// Records a cleanup request for stale containers that match the given prefix. + /// + /// This implementation records the cleanup operation and returns a default + /// `CleanupResult`. The `max_age_minutes` and `exclude_patterns` parameters are + /// accepted for API compatibility but are currently ignored. + /// + /// # Parameters + /// + /// - `prefix` — container name prefix to target for cleanup. + /// + /// # Returns + /// + /// `CleanupResult` summarizing cleanup actions (currently the default/empty result). + /// + /// # Examples + /// + /// ``` + /// // Example usage; replace `mgr` with an instance that provides this method. + /// # async fn _example(mgr: &impl std::ops::Deref) {} + /// // let result = mgr.cleanup_stale_containers("ch-", 60, &[]).await.unwrap(); + /// // assert_eq!(result, CleanupResult::default()); + /// ``` + async fn cleanup_stale_containers( + &self, + prefix: &str, + _max_age_minutes: u64, + _exclude_patterns: &[&str], + ) -> anyhow::Result { + self.record(format!("cleanup:{prefix}")); + Ok(CleanupResult::default()) + } + } + + /// Create a sample `ChallengeContainerConfig` for the given challenge ID and Docker image. + /// + /// The returned config is populated with sensible defaults for resource limits and timeouts: + /// a 1.0 CPU core allocation, 512 MB memory, 3600 second timeout, no GPU required, and + /// emission weight of 1.0. The `name` field is set to `challenge-`. + /// + /// # Examples + /// + /// ``` + /// let id = ChallengeId::new(); + /// let cfg = sample_config(id.clone(), "registry/example:latest"); + /// assert_eq!(cfg.challenge_id, id); + /// assert_eq!(cfg.docker_image, "registry/example:latest"); + /// assert_eq!(cfg.cpu_cores, 1.0); + /// assert_eq!(cfg.memory_mb, 512); + /// ``` + fn sample_config(challenge_id: ChallengeId, image: &str) -> ChallengeContainerConfig { + ChallengeContainerConfig { + challenge_id, + name: format!("challenge-{challenge_id}"), + docker_image: image.to_string(), + mechanism_id: 0, + emission_weight: 1.0, + timeout_secs: 3600, + cpu_cores: 1.0, + memory_mb: 512, + gpu_required: false, + } + } + + /// Creates a ChallengeInstance populated with the provided identifiers and metadata. + /// + /// The instance's `endpoint` is set to `http://{challenge_id}` and `started_at` is set to the current UTC time. + /// + /// # Examples + /// + /// ``` + /// let cid = ChallengeId::new(); + /// let inst = sample_instance(cid.clone(), "ctr-1", "img:latest", ContainerStatus::Running); + /// assert_eq!(inst.challenge_id, cid); + /// assert_eq!(inst.container_id, "ctr-1"); + /// assert_eq!(inst.image, "img:latest"); + /// assert_eq!(inst.endpoint, format!("http://{}", cid)); + /// assert!(matches!(inst.status, ContainerStatus::Running)); + /// ``` + fn sample_instance( + challenge_id: ChallengeId, + container_id: &str, + image: &str, + status: ContainerStatus, + ) -> ChallengeInstance { + let id_str = challenge_id.to_string(); + ChallengeInstance { + challenge_id, + container_id: container_id.to_string(), + image: image.to_string(), + endpoint: format!("http://{id_str}"), + started_at: Utc::now(), + status, + } + } +} \ No newline at end of file