-
-
Notifications
You must be signed in to change notification settings - Fork 159
add system defined tags and free form labels to datasets #1553
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
400255b
3d69b6c
87ff8e0
4a3e23c
7448c27
4a2b764
34ab039
12250c8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,204 @@ | ||
| /* | ||
| * Parseable Server (C) 2022 - 2025 Parseable, Inc. | ||
| * | ||
| * This program is free software: you can redistribute it and/or modify | ||
| * it under the terms of the GNU Affero General Public License as | ||
| * published by the Free Software Foundation, either version 3 of the | ||
| * License, or (at your option) any later version. | ||
| * | ||
| * This program is distributed in the hope that it will be useful, | ||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| * GNU Affero General Public License for more details. | ||
| * | ||
| * You should have received a copy of the GNU Affero General Public License | ||
| * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
| * | ||
| */ | ||
|
|
||
| use std::collections::HashSet; | ||
|
|
||
| use actix_web::http::StatusCode; | ||
| use actix_web::{HttpRequest, HttpResponse, web}; | ||
| use serde::{Deserialize, Serialize}; | ||
|
|
||
| use crate::utils::get_tenant_id_from_request; | ||
| use crate::{ | ||
| handlers::DatasetTag, | ||
| parseable::PARSEABLE, | ||
| storage::{ObjectStorageError, StreamType}, | ||
| }; | ||
|
|
||
| #[derive(Debug, Serialize)] | ||
| #[serde(rename_all = "camelCase")] | ||
| struct CorrelatedDataset { | ||
| name: String, | ||
| shared_tags: Vec<DatasetTag>, | ||
| shared_labels: Vec<String>, | ||
| } | ||
|
|
||
| /// GET /api/v1/datasets/{name}/correlated | ||
| /// Returns all datasets sharing at least one tag or label with the named dataset. | ||
| pub async fn get_correlated_datasets( | ||
| req: HttpRequest, | ||
| path: web::Path<String>, | ||
| ) -> Result<HttpResponse, DatasetsError> { | ||
| let dataset_name = path.into_inner(); | ||
| let tenant_id = get_tenant_id_from_request(&req); | ||
| let stream = PARSEABLE | ||
| .get_stream(&dataset_name, &tenant_id) | ||
| .map_err(|_| DatasetsError::DatasetNotFound(dataset_name.clone()))?; | ||
|
|
||
| let target_tags: HashSet<DatasetTag> = stream.get_dataset_tags().into_iter().collect(); | ||
| let target_labels: HashSet<String> = stream.get_dataset_labels().into_iter().collect(); | ||
|
|
||
| if target_tags.is_empty() && target_labels.is_empty() { | ||
| return Ok(HttpResponse::Ok().json(Vec::<CorrelatedDataset>::new())); | ||
| } | ||
|
|
||
| let all_streams = PARSEABLE.streams.list(&tenant_id); | ||
| let mut correlated = Vec::new(); | ||
|
|
||
| for name in all_streams { | ||
| if name == dataset_name { | ||
| continue; | ||
| } | ||
| if let Ok(s) = PARSEABLE.get_stream(&name, &tenant_id) { | ||
| // Skip internal streams | ||
| if s.get_stream_type() == StreamType::Internal { | ||
| continue; | ||
| } | ||
|
|
||
| let s_tags: HashSet<DatasetTag> = s.get_dataset_tags().into_iter().collect(); | ||
| let s_labels: HashSet<String> = s.get_dataset_labels().into_iter().collect(); | ||
|
|
||
| let shared_tags: Vec<DatasetTag> = target_tags.intersection(&s_tags).copied().collect(); | ||
| let shared_labels: Vec<String> = | ||
| target_labels.intersection(&s_labels).cloned().collect(); | ||
|
|
||
| if !shared_tags.is_empty() || !shared_labels.is_empty() { | ||
| correlated.push(CorrelatedDataset { | ||
| name, | ||
| shared_tags, | ||
| shared_labels, | ||
| }); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| Ok(HttpResponse::Ok().json(correlated)) | ||
| } | ||
|
|
||
| /// GET /api/v1/datasets/tags/{tag} | ||
| /// Returns all datasets that have the specified tag. | ||
| pub async fn get_datasets_by_tag( | ||
| req: HttpRequest, | ||
| path: web::Path<String>, | ||
| ) -> Result<HttpResponse, DatasetsError> { | ||
| let tenant_id = get_tenant_id_from_request(&req); | ||
| let tag_str = path.into_inner(); | ||
| let tag = | ||
| DatasetTag::try_from(tag_str.as_str()).map_err(|_| DatasetsError::InvalidTag(tag_str))?; | ||
|
|
||
| let all_streams = PARSEABLE.streams.list(&tenant_id); | ||
| let mut matching = Vec::new(); | ||
|
|
||
| for name in all_streams { | ||
| if let Ok(s) = PARSEABLE.get_stream(&name, &tenant_id) { | ||
| if s.get_stream_type() == StreamType::Internal { | ||
| continue; | ||
| } | ||
| if s.get_dataset_tags().contains(&tag) { | ||
| matching.push(name); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| Ok(HttpResponse::Ok().json(matching)) | ||
| } | ||
|
|
||
| #[derive(Debug, Deserialize)] | ||
| pub struct PutDatasetMetadataBody { | ||
| pub tags: Option<Vec<DatasetTag>>, | ||
| pub labels: Option<Vec<String>>, | ||
| } | ||
|
|
||
| /// PUT /api/v1/datasets/{name} | ||
| /// Replaces the dataset's tags and/or labels. | ||
| /// Only fields present in the body are updated; absent fields are left unchanged. | ||
| pub async fn put_dataset_metadata( | ||
| req: HttpRequest, | ||
| path: web::Path<String>, | ||
| body: web::Json<PutDatasetMetadataBody>, | ||
| ) -> Result<HttpResponse, DatasetsError> { | ||
| let dataset_name = path.into_inner(); | ||
| let body = body.into_inner(); | ||
| let tenant_id = get_tenant_id_from_request(&req); | ||
|
|
||
| let stream = PARSEABLE | ||
| .get_stream(&dataset_name, &tenant_id) | ||
| .map_err(|_| DatasetsError::DatasetNotFound(dataset_name.clone()))?; | ||
|
|
||
| let final_tags = match body.tags { | ||
| Some(tags) => tags | ||
| .into_iter() | ||
| .collect::<HashSet<_>>() | ||
| .into_iter() | ||
| .collect(), | ||
| None => stream.get_dataset_tags(), | ||
| }; | ||
| let final_labels = match body.labels { | ||
| Some(labels) => labels | ||
| .into_iter() | ||
| .collect::<HashSet<_>>() | ||
| .into_iter() | ||
| .collect(), | ||
| None => stream.get_dataset_labels(), | ||
| }; | ||
|
|
||
| // Update storage first, then in-memory | ||
| let storage = PARSEABLE.storage.get_object_store(); | ||
| storage | ||
| .update_dataset_tags_and_labels_in_stream( | ||
| &dataset_name, | ||
| &final_tags, | ||
| &final_labels, | ||
| &tenant_id, | ||
| ) | ||
| .await | ||
| .map_err(DatasetsError::Storage)?; | ||
|
|
||
| stream.set_dataset_tags(final_tags.clone()); | ||
| stream.set_dataset_labels(final_labels.clone()); | ||
|
|
||
| Ok(HttpResponse::Ok().json(serde_json::json!({ | ||
| "tags": final_tags, | ||
| "labels": final_labels, | ||
| }))) | ||
| } | ||
|
|
||
| #[derive(Debug, thiserror::Error)] | ||
| pub enum DatasetsError { | ||
| #[error("Dataset not found: {0}")] | ||
| DatasetNotFound(String), | ||
| #[error("Invalid tag: {0}")] | ||
| InvalidTag(String), | ||
| #[error("Storage error: {0}")] | ||
| Storage(ObjectStorageError), | ||
| } | ||
|
|
||
| impl actix_web::ResponseError for DatasetsError { | ||
| fn status_code(&self) -> StatusCode { | ||
| match self { | ||
| DatasetsError::DatasetNotFound(_) => StatusCode::NOT_FOUND, | ||
| DatasetsError::InvalidTag(_) => StatusCode::BAD_REQUEST, | ||
| DatasetsError::Storage(_) => StatusCode::INTERNAL_SERVER_ERROR, | ||
| } | ||
| } | ||
|
|
||
| fn error_response(&self) -> HttpResponse { | ||
| HttpResponse::build(self.status_code()).json(serde_json::json!({ | ||
| "error": self.to_string() | ||
| })) | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -199,14 +199,33 @@ impl Server { | |
| } | ||
|
|
||
| pub fn get_prism_datasets() -> Scope { | ||
| web::scope("/datasets").route( | ||
| "", | ||
| web::post() | ||
| .to(http::prism_logstream::post_datasets) | ||
| .authorize_for_resource(Action::GetStreamInfo) | ||
| .authorize_for_resource(Action::GetStats) | ||
| .authorize_for_resource(Action::GetRetention), | ||
| ) | ||
| web::scope("/datasets") | ||
| .route( | ||
| "", | ||
| web::post() | ||
| .to(http::prism_logstream::post_datasets) | ||
| .authorize_for_resource(Action::GetStreamInfo) | ||
| .authorize_for_resource(Action::GetStats) | ||
| .authorize_for_resource(Action::GetRetention), | ||
| ) | ||
| .route( | ||
| "/tags/{tag}", | ||
| web::get() | ||
| .to(http::datasets::get_datasets_by_tag) | ||
| .authorize_for_resource(Action::GetStreamInfo), | ||
| ) | ||
| .route( | ||
| "/{name}/correlated", | ||
| web::get() | ||
| .to(http::datasets::get_correlated_datasets) | ||
| .authorize_for_resource(Action::GetStreamInfo), | ||
|
Comment on lines
+211
to
+221
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Route shadowing: actix-web evaluates Reordering the routes so the more-specific two-segment pattern comes first does not help here because both are two-segment paths; the real fix is to use a path that avoids the structural collision, e.g. 🔀 Proposed fix: reorder / rename to eliminate shadowing- .route(
- "/tags/{tag}",
- web::get()
- .to(http::datasets::get_datasets_by_tag)
- .authorize_for_resource(Action::GetStreamInfo),
- )
- .route(
- "/{name}/correlated",
- web::get()
- .to(http::datasets::get_correlated_datasets)
- .authorize_for_resource(Action::GetStreamInfo),
- )
+ .route(
+ "/correlated/{name}",
+ web::get()
+ .to(http::datasets::get_correlated_datasets)
+ .authorize_for_resource(Action::GetStreamInfo),
+ )
+ .route(
+ "/tags/{tag}",
+ web::get()
+ .to(http::datasets::get_datasets_by_tag)
+ .authorize_for_resource(Action::GetStreamInfo),
+ )(Remember to update the corresponding path extractor in 🤖 Prompt for AI Agents |
||
| ) | ||
| .route( | ||
| "/{name}", | ||
| web::put() | ||
| .to(http::datasets::put_dataset_metadata) | ||
| .authorize_for_resource(Action::CreateStream), | ||
| ) | ||
coderabbitai[bot] marked this conversation as resolved.
Show resolved
Hide resolved
Comment on lines
201
to
+228
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
# Search for the four handler implementations and how they extract tenant_id
echo "=== Searching for handler implementations ==="
echo -e "\n--- 1. prism_logstream::post_datasets ---"
rg -n "fn post_datasets" --type rust -A 10 src/handlers/http/ | head -40
echo -e "\n--- 2. datasets::get_datasets_by_tag ---"
rg -n "fn get_datasets_by_tag" --type rust -A 10 src/handlers/http/ | head -40
echo -e "\n--- 3. datasets::get_correlated_datasets ---"
rg -n "fn get_correlated_datasets" --type rust -A 10 src/handlers/http/ | head -40
echo -e "\n--- 4. datasets::put_dataset_metadata ---"
rg -n "fn put_dataset_metadata" --type rust -A 10 src/handlers/http/ | head -40
echo -e "\n--- Check for get_tenant_id_from_request usage in datasets.rs ---"
rg -n "get_tenant_id_from_request" --type rust src/handlers/http/datasets.rsRepository: parseablehq/parseable Length of output: 1362 🏁 Script executed: #!/bin/bash
# Read datasets.rs around the get_tenant_id_from_request usages
echo "=== datasets.rs: lines 20-60 (first usage) ==="
sed -n '20,60p' src/handlers/http/datasets.rs
echo -e "\n=== datasets.rs: lines 95-110 (second usage) ==="
sed -n '95,110p' src/handlers/http/datasets.rs
echo -e "\n=== datasets.rs: lines 133-150 (third usage) ==="
sed -n '133,150p' src/handlers/http/datasets.rs
echo -e "\n=== prism_logstream.rs: full post_datasets function ==="
sed -n '41,80p' src/handlers/http/prism_logstream.rs
echo -e "\n=== Check if handlers exist in datasets.rs ==="
rg -n "pub async fn (get_datasets_by_tag|get_correlated_datasets|put_dataset_metadata)" --type rust src/handlers/http/datasets.rsRepository: parseablehq/parseable Length of output: 3385 🏁 Script executed: #!/bin/bash
# Check extract_session_key_from_req and understand if it provides tenant scoping
echo "=== Search for extract_session_key_from_req definition ==="
rg -n "fn extract_session_key_from_req" --type rust -A 5 src/
echo -e "\n=== Check PrismDatasetRequest and get_datasets ==="
rg -n "impl.*PrismDatasetRequest\|fn get_datasets" --type rust -A 10 src/handlers/http/prism_logstream.rs | head -50
echo -e "\n=== Check PrismDatasetRequest struct ==="
rg -n "struct PrismDatasetRequest" --type rust -A 15 src/handlers/http/prism_logstream.rs
echo -e "\n=== Verify tenant scoping context for prism endpoints ==="
rg -n "get_prism_datasets|prism_logstream" --type rust src/handlers/http/modal/server.rs | head -20Repository: parseablehq/parseable Length of output: 1138 🏁 Script executed: #!/bin/bash
# Check the exact route registration and actions for post_datasets
echo "=== server.rs post_datasets route registration (lines 200-227) ==="
sed -n '200,227p' src/handlers/http/modal/server.rs
echo -e "\n=== Search for PrismDatasetRequest and get_datasets method ==="
rg -n "struct PrismDatasetRequest\|fn get_datasets" --type rust -A 20 src/handlers/http/prism_logstream.rs | head -80
echo -e "\n=== Check what get_datasets does (full method) ==="
rg -n "async fn get_datasets\|async fn get_info" --type rust -A 30 src/handlers/http/prism_logstream.rsRepository: parseablehq/parseable Length of output: 1332 🏁 Script executed: #!/bin/bash
# Broader search for PrismDatasetRequest and related types
echo "=== Count lines in prism_logstream.rs ==="
wc -l src/handlers/http/prism_logstream.rs
echo -e "\n=== Search for 'Request' in prism_logstream.rs ==="
rg -n "Request\|request" --type rust src/handlers/http/prism_logstream.rs | head -20
echo -e "\n=== Full prism_logstream.rs file (if small) ==="
wc -l src/handlers/http/prism_logstream.rs && cat -n src/handlers/http/prism_logstream.rs
echo -e "\n=== Check if PrismDatasetRequest is defined elsewhere ==="
rg -n "PrismDatasetRequest" --type rust src/Repository: parseablehq/parseable Length of output: 2896 🏁 Script executed: #!/bin/bash
# Check the PrismDatasetRequest.get_datasets implementation
echo "=== PrismDatasetRequest struct and impl ==="
sed -n '211,250p' src/prism/logstream/mod.rs
echo -e "\n=== Full get_datasets method ==="
rg -n "fn get_datasets" --type rust -A 50 src/prism/logstream/mod.rs | head -80Repository: parseablehq/parseable Length of output: 3457 Use The handler at 🤖 Prompt for AI Agents |
||
| } | ||
|
|
||
| pub fn get_demo_data_webscope() -> Scope { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.