parseablehq · nikhilsinhaparseable · Feb 21, 2026 · Feb 21, 2026 · Feb 25, 2026 · Feb 25, 2026
diff --git a/src/connectors/kafka/processor.rs b/src/connectors/kafka/processor.rs
@@ -64,7 +64,8 @@ impl ParseableSinkProcessor {
                 vec![log_source_entry],
                 TelemetryType::default(),
                 tenant_id,
-                None,
+                vec![],
+                vec![],
             )
             .await?;
 

diff --git a/src/handlers/http/datasets.rs b/src/handlers/http/datasets.rs
@@ -0,0 +1,204 @@
+/*
+ * Parseable Server (C) 2022 - 2025 Parseable, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+use std::collections::HashSet;
+
+use actix_web::http::StatusCode;
+use actix_web::{HttpRequest, HttpResponse, web};
+use serde::{Deserialize, Serialize};
+
+use crate::utils::get_tenant_id_from_request;
+use crate::{
+    handlers::DatasetTag,
+    parseable::PARSEABLE,
+    storage::{ObjectStorageError, StreamType},
+};
+
+#[derive(Debug, Serialize)]
+#[serde(rename_all = "camelCase")]
+struct CorrelatedDataset {
+    name: String,
+    shared_tags: Vec<DatasetTag>,
+    shared_labels: Vec<String>,
+}
+
+/// GET /api/v1/datasets/{name}/correlated
+/// Returns all datasets sharing at least one tag or label with the named dataset.
+pub async fn get_correlated_datasets(
+    req: HttpRequest,
+    path: web::Path<String>,
+) -> Result<HttpResponse, DatasetsError> {
+    let dataset_name = path.into_inner();
+    let tenant_id = get_tenant_id_from_request(&req);
+    let stream = PARSEABLE
+        .get_stream(&dataset_name, &tenant_id)
+        .map_err(|_| DatasetsError::DatasetNotFound(dataset_name.clone()))?;
+
+    let target_tags: HashSet<DatasetTag> = stream.get_dataset_tags().into_iter().collect();
+    let target_labels: HashSet<String> = stream.get_dataset_labels().into_iter().collect();
+
+    if target_tags.is_empty() && target_labels.is_empty() {
+        return Ok(HttpResponse::Ok().json(Vec::<CorrelatedDataset>::new()));
+    }
+
+    let all_streams = PARSEABLE.streams.list(&tenant_id);
+    let mut correlated = Vec::new();
+
+    for name in all_streams {
+        if name == dataset_name {
+            continue;
+        }
+        if let Ok(s) = PARSEABLE.get_stream(&name, &tenant_id) {
+            // Skip internal streams
+            if s.get_stream_type() == StreamType::Internal {
+                continue;
+            }
+
+            let s_tags: HashSet<DatasetTag> = s.get_dataset_tags().into_iter().collect();
+            let s_labels: HashSet<String> = s.get_dataset_labels().into_iter().collect();
+
+            let shared_tags: Vec<DatasetTag> = target_tags.intersection(&s_tags).copied().collect();
+            let shared_labels: Vec<String> =
+                target_labels.intersection(&s_labels).cloned().collect();
+
+            if !shared_tags.is_empty() || !shared_labels.is_empty() {
+                correlated.push(CorrelatedDataset {
+                    name,
+                    shared_tags,
+                    shared_labels,
+                });
+            }
+        }
+    }
+
+    Ok(HttpResponse::Ok().json(correlated))
+}
+
+/// GET /api/v1/datasets/tags/{tag}
+/// Returns all datasets that have the specified tag.
+pub async fn get_datasets_by_tag(
+    req: HttpRequest,
+    path: web::Path<String>,
+) -> Result<HttpResponse, DatasetsError> {
+    let tenant_id = get_tenant_id_from_request(&req);
+    let tag_str = path.into_inner();
+    let tag =
+        DatasetTag::try_from(tag_str.as_str()).map_err(|_| DatasetsError::InvalidTag(tag_str))?;
+
+    let all_streams = PARSEABLE.streams.list(&tenant_id);
+    let mut matching = Vec::new();
+
+    for name in all_streams {
+        if let Ok(s) = PARSEABLE.get_stream(&name, &tenant_id) {
+            if s.get_stream_type() == StreamType::Internal {
+                continue;
+            }
+            if s.get_dataset_tags().contains(&tag) {
+                matching.push(name);
+            }
+        }
+    }
+
+    Ok(HttpResponse::Ok().json(matching))
+}
+
+#[derive(Debug, Deserialize)]
+pub struct PutDatasetMetadataBody {
+    pub tags: Option<Vec<DatasetTag>>,
+    pub labels: Option<Vec<String>>,
+}
+
+/// PUT /api/v1/datasets/{name}
+/// Replaces the dataset's tags and/or labels.
+/// Only fields present in the body are updated; absent fields are left unchanged.
+pub async fn put_dataset_metadata(
+    req: HttpRequest,
+    path: web::Path<String>,
+    body: web::Json<PutDatasetMetadataBody>,
+) -> Result<HttpResponse, DatasetsError> {
+    let dataset_name = path.into_inner();
+    let body = body.into_inner();
+    let tenant_id = get_tenant_id_from_request(&req);
+
+    let stream = PARSEABLE
+        .get_stream(&dataset_name, &tenant_id)
+        .map_err(|_| DatasetsError::DatasetNotFound(dataset_name.clone()))?;
+
+    let final_tags = match body.tags {
+        Some(tags) => tags
+            .into_iter()
+            .collect::<HashSet<_>>()
+            .into_iter()
+            .collect(),
+        None => stream.get_dataset_tags(),
+    };
+    let final_labels = match body.labels {
+        Some(labels) => labels
+            .into_iter()
+            .collect::<HashSet<_>>()
+            .into_iter()
+            .collect(),
+        None => stream.get_dataset_labels(),
+    };
+
+    // Update storage first, then in-memory
+    let storage = PARSEABLE.storage.get_object_store();
+    storage
+        .update_dataset_tags_and_labels_in_stream(
+            &dataset_name,
+            &final_tags,
+            &final_labels,
+            &tenant_id,
+        )
+        .await
+        .map_err(DatasetsError::Storage)?;
+
+    stream.set_dataset_tags(final_tags.clone());
+    stream.set_dataset_labels(final_labels.clone());
+
+    Ok(HttpResponse::Ok().json(serde_json::json!({
+        "tags": final_tags,
+        "labels": final_labels,
+    })))
+}
+
+#[derive(Debug, thiserror::Error)]
+pub enum DatasetsError {
+    #[error("Dataset not found: {0}")]
+    DatasetNotFound(String),
+    #[error("Invalid tag: {0}")]
+    InvalidTag(String),
+    #[error("Storage error: {0}")]
+    Storage(ObjectStorageError),
+}
+
+impl actix_web::ResponseError for DatasetsError {
+    fn status_code(&self) -> StatusCode {
+        match self {
+            DatasetsError::DatasetNotFound(_) => StatusCode::NOT_FOUND,
+            DatasetsError::InvalidTag(_) => StatusCode::BAD_REQUEST,
+            DatasetsError::Storage(_) => StatusCode::INTERNAL_SERVER_ERROR,
+        }
+    }
+
+    fn error_response(&self) -> HttpResponse {
+        HttpResponse::build(self.status_code()).json(serde_json::json!({
+            "error": self.to_string()
+        }))
+    }
+}
diff --git a/src/handlers/http/ingest.rs b/src/handlers/http/ingest.rs
@@ -120,7 +120,8 @@ pub async fn ingest(
             vec![log_source_entry.clone()],
             telemetry_type,
             &tenant_id,
-            None,
+            vec![],
+            vec![],
         )
         .await
         .map_err(|e| {
@@ -239,7 +240,8 @@ pub async fn setup_otel_stream(
             vec![log_source_entry.clone()],
             telemetry_type,
             &tenant_id,
-            None,
+            vec![],
+            vec![],
         )
         .await?;
     let mut time_partition = None;

diff --git a/src/handlers/http/mod.rs b/src/handlers/http/mod.rs
@@ -32,6 +32,7 @@ pub mod about;
 pub mod alerts;
 pub mod cluster;
 pub mod correlation;
+pub mod datasets;
 pub mod demo_data;
 pub mod health_check;
 pub mod ingest;

diff --git a/src/handlers/http/modal/server.rs b/src/handlers/http/modal/server.rs
@@ -199,14 +199,33 @@ impl Server {
     }
 
     pub fn get_prism_datasets() -> Scope {
-        web::scope("/datasets").route(
-            "",
-            web::post()
-                .to(http::prism_logstream::post_datasets)
-                .authorize_for_resource(Action::GetStreamInfo)
-                .authorize_for_resource(Action::GetStats)
-                .authorize_for_resource(Action::GetRetention),
-        )
+        web::scope("/datasets")
+            .route(
+                "",
+                web::post()
+                    .to(http::prism_logstream::post_datasets)
+                    .authorize_for_resource(Action::GetStreamInfo)
+                    .authorize_for_resource(Action::GetStats)
+                    .authorize_for_resource(Action::GetRetention),
+            )
+            .route(
+                "/tags/{tag}",
+                web::get()
+                    .to(http::datasets::get_datasets_by_tag)
+                    .authorize_for_resource(Action::GetStreamInfo),
+            )
+            .route(
+                "/{name}/correlated",
+                web::get()
+                    .to(http::datasets::get_correlated_datasets)
+                    .authorize_for_resource(Action::GetStreamInfo),
+            )
+            .route(
+                "/{name}",
+                web::put()
+                    .to(http::datasets::put_dataset_metadata)
+                    .authorize_for_resource(Action::CreateStream),
+            )
     }
 
     pub fn get_demo_data_webscope() -> Scope {

diff --git a/src/handlers/http/modal/utils/logstream_utils.rs b/src/handlers/http/modal/utils/logstream_utils.rs
@@ -16,17 +16,18 @@
  *
  */
 
+use actix_web::http::header::HeaderMap;
+
 use crate::{
     event::format::LogSource,
     handlers::{
-        CUSTOM_PARTITION_KEY, DATASET_TAG_KEY, DatasetTag, LOG_SOURCE_KEY, STATIC_SCHEMA_FLAG,
-        STREAM_TYPE_KEY, TELEMETRY_TYPE_KEY, TIME_PARTITION_KEY, TIME_PARTITION_LIMIT_KEY,
-        TelemetryType, UPDATE_STREAM_KEY,
+        CUSTOM_PARTITION_KEY, DATASET_LABELS_KEY, DATASET_TAG_KEY, DATASET_TAGS_KEY, DatasetTag,
+        LOG_SOURCE_KEY, STATIC_SCHEMA_FLAG, STREAM_TYPE_KEY, TELEMETRY_TYPE_KEY,
+        TIME_PARTITION_KEY, TIME_PARTITION_LIMIT_KEY, TelemetryType, UPDATE_STREAM_KEY,
+        parse_dataset_labels, parse_dataset_tags,
     },
     storage::StreamType,
 };
-use actix_web::http::header::HeaderMap;
-use tracing::warn;
 
 #[derive(Debug, Default)]
 pub struct PutStreamHeaders {
@@ -38,7 +39,8 @@ pub struct PutStreamHeaders {
     pub stream_type: StreamType,
     pub log_source: LogSource,
     pub telemetry_type: TelemetryType,
-    pub dataset_tag: Option<DatasetTag>,
+    pub dataset_tags: Vec<DatasetTag>,
+    pub dataset_labels: Vec<String>,
 }
 
 impl From<&HeaderMap> for PutStreamHeaders {
@@ -72,16 +74,17 @@ impl From<&HeaderMap> for PutStreamHeaders {
                 .get(TELEMETRY_TYPE_KEY)
                 .and_then(|v| v.to_str().ok())
                 .map_or(TelemetryType::Logs, TelemetryType::from),
-            dataset_tag: headers
-                .get(DATASET_TAG_KEY)
+            dataset_tags: headers
+                .get(DATASET_TAGS_KEY)
+                .or_else(|| headers.get(DATASET_TAG_KEY))
                 .and_then(|v| v.to_str().ok())
-                .and_then(|v| match DatasetTag::try_from(v) {
-                    Ok(tag) => Some(tag),
-                    Err(err) => {
-                        warn!("Invalid dataset tag '{v}': {err}");
-                        None
-                    }
-                }),
+                .map(parse_dataset_tags)
+                .unwrap_or_default(),
+            dataset_labels: headers
+                .get(DATASET_LABELS_KEY)
+                .and_then(|v| v.to_str().ok())
+                .map(parse_dataset_labels)
+                .unwrap_or_default(),
         }
     }
 }