From d981af29c4959dab98f9f2123a5b6cca37b4b446 Mon Sep 17 00:00:00 2001 From: Christian Geier Date: Tue, 26 May 2026 16:48:03 +0200 Subject: [PATCH 1/3] Derive MinIO buckets from namespace --- .../mlflow-mobile-price-classification.ipynb | 10 +++++----- .../mobile-price-classifications.ipynb | 8 ++++---- .../mobile-price-classifications.ipynb | 10 +++++----- pipelines/lightweight-python-package/README.md | 3 +-- pipelines/lightweight-python-package/submit-cluster.py | 7 +++---- rstudio/iris-classification/README.md | 2 +- rstudio/iris-classification/iris-classification.r | 7 +++---- rstudio/mobile-price-classification/README.md | 5 ++--- .../mobile-price-classification.r | 7 +++---- serving/minimal-s3-model/minimal-s3-model.ipynb | 2 +- 10 files changed, 28 insertions(+), 33 deletions(-) diff --git a/mlflow/mobile-price-classification/mlflow-mobile-price-classification.ipynb b/mlflow/mobile-price-classification/mlflow-mobile-price-classification.ipynb index af9ae43..a55203b 100644 --- a/mlflow/mobile-price-classification/mlflow-mobile-price-classification.ipynb +++ b/mlflow/mobile-price-classification/mlflow-mobile-price-classification.ipynb @@ -96,12 +96,12 @@ "outputs": [], "source": [ "# Configuration for MinIO bucket\n", - "# Change to your MinIO bucket name, which is -data by default\n", + "# Uses the default prokube bucket for this namespace: -data\n", "# You can check your buckets by opening a terminal in the notebook and run 'mc ls minio'\n", - "s3_bucket = \"\"\n", - "s3_dataset_path = f\"{s3_bucket}/mobile-price-classification\"\n", - "if not s3_bucket:\n", - " raise ValueError(\"Please set the 's3_bucket' variable to your MinIO bucket name.\")" + "with open(\"/var/run/secrets/kubernetes.io/serviceaccount/namespace\", \"r\") as namespace_file:\n", + " namespace = namespace_file.read().strip()\n", + "s3_bucket = f\"{namespace}-data\"\n", + "s3_dataset_path = f\"{s3_bucket}/mobile-price-classification\"" ] }, { diff --git a/notebooks/mobile-price-classification/mobile-price-classifications.ipynb b/notebooks/mobile-price-classification/mobile-price-classifications.ipynb index 0c976fa..7c25a98 100644 --- a/notebooks/mobile-price-classification/mobile-price-classifications.ipynb +++ b/notebooks/mobile-price-classification/mobile-price-classifications.ipynb @@ -46,12 +46,12 @@ "outputs": [], "source": [ "# Configuration for MinIO bucket\n", - "# Change to your MinIO bucket name, which is -data by default\n", + "# Uses the default prokube bucket for this namespace: -data\n", "# You can check your buckets by opening a terminal in the notebook and run 'mc ls minio'\n", - "s3_bucket = \"\"\n", + "with open(\"/var/run/secrets/kubernetes.io/serviceaccount/namespace\", \"r\") as namespace_file:\n", + " namespace = namespace_file.read().strip()\n", + "s3_bucket = f\"{namespace}-data\"\n", "s3_dataset_path = f\"{s3_bucket}/mobile-price-classification\"\n", - "if not s3_bucket:\n", - " raise ValueError(\"Please set the 's3_bucket' variable to your MinIO bucket name.\")\n", "\n", "minio_train_data_path=f\"s3://{s3_dataset_path}/train.csv\"\n", "minio_test_data_path=f\"s3://{s3_dataset_path}/test.csv\"" diff --git a/pipelines/lightweight-components/mobile-price-classifications.ipynb b/pipelines/lightweight-components/mobile-price-classifications.ipynb index 37b960a..d21127f 100644 --- a/pipelines/lightweight-components/mobile-price-classifications.ipynb +++ b/pipelines/lightweight-components/mobile-price-classifications.ipynb @@ -48,12 +48,12 @@ "outputs": [], "source": [ "# Configuration for MinIO bucket\n", - "# Change to your MinIO bucket name, which is -data by default\n", + "# Uses the default prokube bucket for this namespace: -data\n", "# You can check your buckets by opening a terminal in the notebook and run 'mc ls minio'\n", - "s3_bucket = \"\"\n", - "s3_dataset_path = f\"{s3_bucket}/mobile-price-classification\"\n", - "if not s3_bucket:\n", - " raise ValueError(\"Please set the 's3_bucket' variable to your MinIO bucket name.\")" + "with open(\"/var/run/secrets/kubernetes.io/serviceaccount/namespace\", \"r\") as namespace_file:\n", + " namespace = namespace_file.read().strip()\n", + "s3_bucket = f\"{namespace}-data\"\n", + "s3_dataset_path = f\"{s3_bucket}/mobile-price-classification\"" ] }, { diff --git a/pipelines/lightweight-python-package/README.md b/pipelines/lightweight-python-package/README.md index 1133e25..f907af6 100644 --- a/pipelines/lightweight-python-package/README.md +++ b/pipelines/lightweight-python-package/README.md @@ -76,14 +76,13 @@ COMPONENTS_IMAGE = "/mobile-price-classification:v1" Follow the same dataset preparation steps as in the lightweight components example: 1. Download the dataset from Kaggle -2. Upload to your MinIO bucket +2. Upload to your default MinIO bucket (`-data`) ### 4. Run the Pipeline **From within the cluster (e.g., from a Kubeflow Notebook):** ```sh -# Update s3_bucket in submit-cluster.py first python submit-cluster.py ``` diff --git a/pipelines/lightweight-python-package/submit-cluster.py b/pipelines/lightweight-python-package/submit-cluster.py index 72aaf59..426bc47 100644 --- a/pipelines/lightweight-python-package/submit-cluster.py +++ b/pipelines/lightweight-python-package/submit-cluster.py @@ -4,11 +4,10 @@ if __name__ == "__main__": - # Configuration - update these values - s3_bucket = "" # e.g., "my-namespace-data" - if not s3_bucket: - raise ValueError("Please set the 's3_bucket' variable to your MinIO bucket name.") + with open("/var/run/secrets/kubernetes.io/serviceaccount/namespace", "r", encoding="utf-8") as namespace_file: + namespace = namespace_file.read().strip() + s3_bucket = f"{namespace}-data" s3_dataset_path = f"{s3_bucket}/mobile-price-classification" client = Client() diff --git a/rstudio/iris-classification/README.md b/rstudio/iris-classification/README.md index dc6fd71..3b27227 100644 --- a/rstudio/iris-classification/README.md +++ b/rstudio/iris-classification/README.md @@ -12,7 +12,7 @@ The included `iris-classification.r` script performs the following steps: - Loads the built-in Iris dataset and converts it to a tibble for tidy processing. 2. **Upload to S3/MinIO:** - - Exports the dataset as CSV and uploads it to an S3-compatible bucket for persistence and reuse. + - Exports the dataset as CSV and uploads it to the default prokube bucket for the current namespace (`-data`) for persistence and reuse. 3. **Exploratory Data Analysis (EDA):** - Summary statistics, class distribution, and feature distributions by species. diff --git a/rstudio/iris-classification/iris-classification.r b/rstudio/iris-classification/iris-classification.r index 63308b6..0838802 100644 --- a/rstudio/iris-classification/iris-classification.r +++ b/rstudio/iris-classification/iris-classification.r @@ -24,8 +24,9 @@ summary(iris_df) # 2. Upload to S3/MinIO # ============================================================================== -# Add your bucket here (e.g. -data) -bucket_name <- "" +# Uses the default prokube bucket for this namespace: -data +namespace <- trimws(readLines("/var/run/secrets/kubernetes.io/serviceaccount/namespace", warn = FALSE)) +bucket_name <- paste0(namespace, "-data") s3_key_csv <- "iris-classification/iris.csv" endpoint <- Sys.getenv("AWS_S3_ENDPOINT") @@ -154,5 +155,3 @@ ggplot(results_df, aes(x = Petal.Length, y = Petal.Width, subtitle = "X marks misclassifications", x = "Petal Length (cm)", y = "Petal Width (cm)") + theme_minimal() - - diff --git a/rstudio/mobile-price-classification/README.md b/rstudio/mobile-price-classification/README.md index 99e3e0f..1b2c436 100644 --- a/rstudio/mobile-price-classification/README.md +++ b/rstudio/mobile-price-classification/README.md @@ -13,8 +13,8 @@ The included `mobile-price-classification.r` script performs the following steps - Extracts the downloaded zip file into a local temporary directory. 3. **Upload to S3/MinIO:** - - Optionally uploads the extracted CSV files (`train.csv`, `test.csv`) to a specified S3 or MinIO bucket for reuse. - - Requires appropriate S3 endpoint and bucket configuration (already set up in the RStudio environment). + - Optionally uploads the extracted CSV files (`train.csv`, `test.csv`) to the default prokube bucket for the current namespace (`-data`) for reuse. + - Requires the S3 endpoint configuration already set up in the RStudio environment. 4. **Load Data:** - Loads the training and test data into R tibbles for further demonstration. @@ -24,4 +24,3 @@ The included `mobile-price-classification.r` script performs the following steps 6. **Exploratory Visualizations:** - Generates basic plots to illustrate class balance and feature relationships. - diff --git a/rstudio/mobile-price-classification/mobile-price-classification.r b/rstudio/mobile-price-classification/mobile-price-classification.r index 00def19..7cb174a 100644 --- a/rstudio/mobile-price-classification/mobile-price-classification.r +++ b/rstudio/mobile-price-classification/mobile-price-classification.r @@ -2,8 +2,9 @@ library(tidyverse) library(aws.s3) library(readr) -# Add your bucket here (e.g. -data) -bucket_name <- "" +# Uses the default prokube bucket for this namespace: -data +namespace <- trimws(readLines("/var/run/secrets/kubernetes.io/serviceaccount/namespace", warn = FALSE)) +bucket_name <- paste0(namespace, "-data") s3_key_train_csv <- "mobile-price-classification/train.csv" s3_key_test_csv <- "mobile-price-classification/test.csv" endpoint <- Sys.getenv("AWS_S3_ENDPOINT") @@ -68,5 +69,3 @@ ggplot(train_df, aes(x = battery_power, fill = factor(price_range))) + geom_histogram(position = "identity", alpha = 0.4, bins = 30) + labs(title = "Battery power distribution", x = "battery_power (mAh)", fill = "price_range") + theme_minimal() - - diff --git a/serving/minimal-s3-model/minimal-s3-model.ipynb b/serving/minimal-s3-model/minimal-s3-model.ipynb index d0f31d3..c47e6c5 100644 --- a/serving/minimal-s3-model/minimal-s3-model.ipynb +++ b/serving/minimal-s3-model/minimal-s3-model.ipynb @@ -96,7 +96,7 @@ "outputs": [], "source": [ "with open(\"/var/run/secrets/kubernetes.io/serviceaccount/namespace\", \"r\") as namespace_file:\n", - " namespace = namespace_file.read()\n", + " namespace = namespace_file.read().strip()\n", "s3_bucket = f\"{namespace}-data\"\n", "s3_model_path = f\"{s3_bucket}/minimal-kserve-example\"\n", "print(f\"The created model will be uploaded to s3://{s3_model_path}\")" From 889afeb31540480b3ce9d8ae2c559e4c702dc7c9 Mon Sep 17 00:00:00 2001 From: Christian Geier Date: Tue, 26 May 2026 17:28:40 +0200 Subject: [PATCH 2/3] Use generic object storage terminology --- README.md | 2 +- mlflow/mlflow-kfp-example.ipynb | 2 +- .../mlflow-mobile-price-classification.ipynb | 32 ++++++++--------- .../mobile-price-classifications.ipynb | 22 ++++++------ .../mobile-price-classifications.ipynb | 36 +++++++++---------- .../lightweight-python-package/README.md | 2 +- .../lightweight-python-package/pipeline.py | 21 +++++------ .../mobile_price_classification/read_data.py | 12 +++---- .../submit-cluster.py | 4 +-- rstudio/iris-classification/README.md | 2 +- .../iris-classification/iris-classification.r | 4 +-- rstudio/mobile-price-classification/README.md | 2 +- .../mobile-price-classification.r | 2 +- serving/minimal-s3-model/README.md | 8 ++--- .../minimal-s3-model/minimal-s3-model.ipynb | 6 ++-- 15 files changed, 79 insertions(+), 78 deletions(-) diff --git a/README.md b/README.md index 5edc0d2..05c3307 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ extensively to store intermediate and final task/pipeline artifacts. Furthermore to serve models directly from object storage. prokube.ai comes pre-configured with integrated object storage. Alternatively, admins can configure pipelines -to use other instances of object storage (e.g. self-hosted MinIO, AWS S3, GCS, etc.). +to use other instances of object storage (e.g. self-hosted S3-compatible storage, AWS S3, GCS, etc.). Many S3 libraries use environment variables for their configuration — those are usually: `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, and `S3_ENDPOINT`. They are likely already available in your environment. You can also ask your admin about them. diff --git a/mlflow/mlflow-kfp-example.ipynb b/mlflow/mlflow-kfp-example.ipynb index 10bb275..6a1c51e 100644 --- a/mlflow/mlflow-kfp-example.ipynb +++ b/mlflow/mlflow-kfp-example.ipynb @@ -81,7 +81,7 @@ "outputs": [], "source": [ "def add_env_vars_to_tasks(task_list: list[dsl.PipelineTask]) -> None:\n", - " \"\"\"Adds environment variables for MinIO to the MLflow tasks\"\"\"\n", + " \"\"\"Adds object storage environment variables to the MLflow tasks\"\"\"\n", " for task in task_list:\n", " use_secret_as_env(\n", " task,\n", diff --git a/mlflow/mobile-price-classification/mlflow-mobile-price-classification.ipynb b/mlflow/mobile-price-classification/mlflow-mobile-price-classification.ipynb index a55203b..54db4af 100644 --- a/mlflow/mobile-price-classification/mlflow-mobile-price-classification.ipynb +++ b/mlflow/mobile-price-classification/mlflow-mobile-price-classification.ipynb @@ -95,9 +95,9 @@ "metadata": {}, "outputs": [], "source": [ - "# Configuration for MinIO bucket\n", + "# Configuration for object storage bucket\n", "# Uses the default prokube bucket for this namespace: -data\n", - "# You can check your buckets by opening a terminal in the notebook and run 'mc ls minio'\n", + "# You can check your buckets by opening your object storage browser or using the configured storage CLI alias.\n", "with open(\"/var/run/secrets/kubernetes.io/serviceaccount/namespace\", \"r\") as namespace_file:\n", " namespace = namespace_file.read().strip()\n", "s3_bucket = f\"{namespace}-data\"\n", @@ -109,7 +109,7 @@ "id": "65946d3d", "metadata": {}, "source": [ - "## Download Dataset From Kagglehub and Upload to MinIO with Python" + "## Download Dataset From Kagglehub and Upload to Object Storage with Python" ] }, { @@ -132,14 +132,14 @@ "metadata": {}, "outputs": [], "source": [ - "# Upload dataset to MinIO\n", + "# Upload dataset to object storage\n", "if not os.getenv(\"AWS_ACCESS_KEY_ID\") or not os.getenv(\"AWS_SECRET_ACCESS_KEY\"):\n", " raise ValueError(\"AWS credentials not found in environment variables.\")\n", "\n", "# Initialize S3 filesystem\n", "s3 = s3fs.S3FileSystem()\n", "\n", - "# Upload the dataset to MinIO\n", + "# Upload the dataset to object storage\n", "s3.put(f\"{dataset_path}/train.csv\", f\"{s3_dataset_path}/train.csv\")\n", "s3.put(f\"{dataset_path}/test.csv\", f\"{s3_dataset_path}/test.csv\")\n", "\n", @@ -174,16 +174,16 @@ " base_image=\"python:3.9\",\n", ")\n", "def read_data(\n", - " minio_train_data_path: str,\n", - " minio_test_data_path: str,\n", + " train_data_path: str,\n", + " test_data_path: str,\n", " train_df: Output[Dataset],\n", " test_df: Output[Dataset], \n", "):\n", " \"\"\"Reads training and test data writes it to pipeline artifacts as parquet.\"\"\"\n", " import pandas as pd\n", "\n", - " df_train = pd.read_csv(minio_train_data_path)\n", - " df_test = pd.read_csv(minio_test_data_path)\n", + " df_train = pd.read_csv(train_data_path)\n", + " df_test = pd.read_csv(test_data_path)\n", " \n", " df_train.to_parquet(train_df.path)\n", " df_test.to_parquet(test_df.path)" @@ -592,8 +592,8 @@ "source": [ "@dsl.pipeline\n", "def mobile_price_classification_pipeline(\n", - " minio_train_data_path: str,\n", - " minio_test_data_path: str,\n", + " train_data_path: str,\n", + " test_data_path: str,\n", " test_size: float = 0.5,\n", " C: List = [1, 0.1, 0.25, 0.5, 2, 0.75],\n", " kernel: List = [\"linear\", \"rbf\"],\n", @@ -618,11 +618,11 @@ " \n", " # Step 1: Read the data\n", " read_data_task = read_data(\n", - " minio_train_data_path=minio_train_data_path,\n", - " minio_test_data_path=minio_test_data_path,\n", + " train_data_path=train_data_path,\n", + " test_data_path=test_data_path,\n", " )\n", " # Use the cluster internal s3 endpoint\n", - " read_data_task.set_env_variable('AWS_ENDPOINT_URL',\"http://minio.minio\")\n", + " read_data_task.set_env_variable('AWS_ENDPOINT_URL', f'http://{os.environ[\"S3_ENDPOINT\"]}')\n", " # Use Kubernetes secrets to provide AWS credentials to the read_data component\n", " kubernetes.use_secret_as_env(\n", " read_data_task,\n", @@ -702,8 +702,8 @@ "\n", "# Define the arguments to be passed to the pipeline\n", "args = dict(\n", - " minio_train_data_path=f\"s3://{s3_dataset_path}/train.csv\",\n", - " minio_test_data_path=f\"s3://{s3_dataset_path}/test.csv\",\n", + " train_data_path=f\"s3://{s3_dataset_path}/train.csv\",\n", + " test_data_path=f\"s3://{s3_dataset_path}/test.csv\",\n", " test_size=0.2,\n", " C=[1, 0.1, 0.25, 0.5, 2, 0.75],\n", " kernel=[\"linear\", \"rbf\"],\n", diff --git a/notebooks/mobile-price-classification/mobile-price-classifications.ipynb b/notebooks/mobile-price-classification/mobile-price-classifications.ipynb index 7c25a98..3523043 100644 --- a/notebooks/mobile-price-classification/mobile-price-classifications.ipynb +++ b/notebooks/mobile-price-classification/mobile-price-classifications.ipynb @@ -45,16 +45,16 @@ "metadata": {}, "outputs": [], "source": [ - "# Configuration for MinIO bucket\n", + "# Configuration for object storage bucket\n", "# Uses the default prokube bucket for this namespace: -data\n", - "# You can check your buckets by opening a terminal in the notebook and run 'mc ls minio'\n", + "# You can check your buckets by opening your object storage browser or using the configured storage CLI alias.\n", "with open(\"/var/run/secrets/kubernetes.io/serviceaccount/namespace\", \"r\") as namespace_file:\n", " namespace = namespace_file.read().strip()\n", "s3_bucket = f\"{namespace}-data\"\n", "s3_dataset_path = f\"{s3_bucket}/mobile-price-classification\"\n", "\n", - "minio_train_data_path=f\"s3://{s3_dataset_path}/train.csv\"\n", - "minio_test_data_path=f\"s3://{s3_dataset_path}/test.csv\"" + "train_data_path=f\"s3://{s3_dataset_path}/train.csv\"\n", + "test_data_path=f\"s3://{s3_dataset_path}/test.csv\"" ] }, { @@ -89,7 +89,7 @@ "id": "65946d3d", "metadata": {}, "source": [ - "## Download Dataset From Kagglehub and Upload to MinIO with Python" + "## Download Dataset From Kagglehub and Upload to Object Storage with Python" ] }, { @@ -112,14 +112,14 @@ "metadata": {}, "outputs": [], "source": [ - "# Upload dataset to MinIO\n", + "# Upload dataset to object storage\n", "if not os.getenv(\"AWS_ACCESS_KEY_ID\") or not os.getenv(\"AWS_SECRET_ACCESS_KEY\"):\n", " raise ValueError(\"AWS credentials not found in environment variables.\")\n", "\n", "# Initialize S3 filesystem\n", "s3 = s3fs.S3FileSystem()\n", "\n", - "# Upload the dataset to MinIO\n", + "# Upload the dataset to object storage\n", "s3.put(f\"{dataset_path}/train.csv\", f\"{s3_dataset_path}/train.csv\")\n", "s3.put(f\"{dataset_path}/test.csv\", f\"{s3_dataset_path}/test.csv\")\n", "\n", @@ -149,9 +149,9 @@ "metadata": {}, "outputs": [], "source": [ - "# The storage options are preconfigured for the cluster internal MinIO, so we can read the data directly without additional configuration\n", - "df_train = pd.read_csv(minio_train_data_path)\n", - "df_test = pd.read_csv(minio_test_data_path)" + "# The storage options are preconfigured for the cluster internal object storage, so we can read the data directly without additional configuration\n", + "df_train = pd.read_csv(train_data_path)\n", + "df_test = pd.read_csv(test_data_path)" ] }, { @@ -346,7 +346,7 @@ "import plotly.express as px\n", "\n", "# Load the fitted scaler and preprocess test data\n", - "x_test = pd.read_csv(minio_test_data_path)\n", + "x_test = pd.read_csv(test_data_path)\n", "x_test = x_test.drop('id', axis=1)\n", "x_test = pd.DataFrame(scaler.transform(x_test), columns=x_test.columns)\n", "\n", diff --git a/pipelines/lightweight-components/mobile-price-classifications.ipynb b/pipelines/lightweight-components/mobile-price-classifications.ipynb index d21127f..49d9612 100644 --- a/pipelines/lightweight-components/mobile-price-classifications.ipynb +++ b/pipelines/lightweight-components/mobile-price-classifications.ipynb @@ -47,9 +47,9 @@ "metadata": {}, "outputs": [], "source": [ - "# Configuration for MinIO bucket\n", + "# Configuration for object storage bucket\n", "# Uses the default prokube bucket for this namespace: -data\n", - "# You can check your buckets by opening a terminal in the notebook and run 'mc ls minio'\n", + "# You can check your buckets by opening your object storage browser or using the configured storage CLI alias.\n", "with open(\"/var/run/secrets/kubernetes.io/serviceaccount/namespace\", \"r\") as namespace_file:\n", " namespace = namespace_file.read().strip()\n", "s3_bucket = f\"{namespace}-data\"\n", @@ -61,7 +61,7 @@ "id": "65946d3d", "metadata": {}, "source": [ - "## Download Dataset From Kagglehub and Upload to MinIO with Python" + "## Download Dataset From Kagglehub and Upload to Object Storage with Python" ] }, { @@ -84,14 +84,14 @@ "metadata": {}, "outputs": [], "source": [ - "# Upload dataset to MinIO\n", + "# Upload dataset to object storage\n", "if not os.getenv(\"AWS_ACCESS_KEY_ID\") or not os.getenv(\"AWS_SECRET_ACCESS_KEY\"):\n", " raise ValueError(\"AWS credentials not found in environment variables.\")\n", "\n", "# Initialize S3 filesystem\n", "s3 = s3fs.S3FileSystem()\n", "\n", - "# Upload the dataset to MinIO\n", + "# Upload the dataset to object storage\n", "s3.put(f\"{dataset_path}/train.csv\", f\"{s3_dataset_path}/train.csv\")\n", "s3.put(f\"{dataset_path}/test.csv\", f\"{s3_dataset_path}/test.csv\")\n", "\n", @@ -126,16 +126,16 @@ " base_image=\"python:3.9\",\n", ")\n", "def read_data(\n", - " minio_train_data_path: str,\n", - " minio_test_data_path: str,\n", + " train_data_path: str,\n", + " test_data_path: str,\n", " train_df: Output[Dataset],\n", " test_df: Output[Dataset], \n", "):\n", " \"\"\"Reads training and test data writes it to pipeline artifacts as parquet.\"\"\"\n", " import pandas as pd\n", "\n", - " df_train = pd.read_csv(minio_train_data_path)\n", - " df_test = pd.read_csv(minio_test_data_path)\n", + " df_train = pd.read_csv(train_data_path)\n", + " df_test = pd.read_csv(test_data_path)\n", " \n", " df_train.to_parquet(train_df.path)\n", " df_test.to_parquet(test_df.path)" @@ -490,8 +490,8 @@ "source": [ "@dsl.pipeline\n", "def mobile_price_classification_pipeline(\n", - " minio_train_data_path: str,\n", - " minio_test_data_path: str,\n", + " train_data_path: str,\n", + " test_data_path: str,\n", " test_size: float = 0.5,\n", " C: List = [1, 0.1, 0.25, 0.5, 2, 0.75],\n", " kernel: List = [\"linear\", \"rbf\"],\n", @@ -516,11 +516,11 @@ " \n", " # Step 1: Read the data\n", " read_data_task = read_data(\n", - " minio_train_data_path=minio_train_data_path,\n", - " minio_test_data_path=minio_test_data_path,\n", + " train_data_path=train_data_path,\n", + " test_data_path=test_data_path,\n", " )\n", " # Use the cluster internal s3 endpoint\n", - " read_data_task.set_env_variable('AWS_ENDPOINT_URL',\"http://minio.minio\")\n", + " read_data_task.set_env_variable('AWS_ENDPOINT_URL', f'http://{os.environ[\"S3_ENDPOINT\"]}')\n", " # Use Kubernetes secrets to provide AWS credentials to the read_data component\n", " kubernetes.use_secret_as_env(\n", " read_data_task,\n", @@ -596,8 +596,8 @@ "\n", "# Define the arguments to be passed to the pipeline\n", "args = dict(\n", - " minio_train_data_path=f\"s3://{s3_dataset_path}/train.csv\",\n", - " minio_test_data_path=f\"s3://{s3_dataset_path}/test.csv\",\n", + " train_data_path=f\"s3://{s3_dataset_path}/train.csv\",\n", + " test_data_path=f\"s3://{s3_dataset_path}/test.csv\",\n", " test_size=0.2,\n", " C=[1, 0.1, 0.25, 0.5, 2, 0.75],\n", " kernel=[\"linear\", \"rbf\"],\n", @@ -623,7 +623,7 @@ "metadata": {}, "source": [ "# Debugging with Lightweight Components\n", - "Debugging can be challenging when using lightweight components in Kubeflow Pipelines. A practical approach is to download the artifacts from the steps preceding the failing one, from MinIO, and then run the functions used in the components locally. You can easily copy the paths to these artifacts from the Kubeflow Pipelines UI. Once you have these paths, you can use them as shown in the example below to download and read in Pandas DataFrames or perform similar operations. Make sure to adjust the paths according to your specific setup requirements." + "Debugging can be challenging when using lightweight components in Kubeflow Pipelines. A practical approach is to download the artifacts from the steps preceding the failing one from object storage, and then run the functions used in the components locally. You can easily copy the paths to these artifacts from the Kubeflow Pipelines UI. Once you have these paths, you can use them as shown in the example below to download and read in Pandas DataFrames or perform similar operations. Make sure to adjust the paths according to your specific setup requirements." ] }, { @@ -666,7 +666,7 @@ "outputs": [], "source": [ "# use the mc tool to download model artifact\n", - "!mc cp minio/mlpipeline/v2/artifacts/mmobile-price-classification-pipeline/90e172f1-5143-475d-b02c-92fbc34338cb/train-model/trained_model ./trained_model" + "!mc cp /mlpipeline/v2/artifacts/mmobile-price-classification-pipeline/90e172f1-5143-475d-b02c-92fbc34338cb/train-model/trained_model ./trained_model" ] }, { diff --git a/pipelines/lightweight-python-package/README.md b/pipelines/lightweight-python-package/README.md index f907af6..12a3ba4 100644 --- a/pipelines/lightweight-python-package/README.md +++ b/pipelines/lightweight-python-package/README.md @@ -76,7 +76,7 @@ COMPONENTS_IMAGE = "/mobile-price-classification:v1" Follow the same dataset preparation steps as in the lightweight components example: 1. Download the dataset from Kaggle -2. Upload to your default MinIO bucket (`-data`) +2. Upload to your default object storage bucket (`-data`) ### 4. Run the Pipeline diff --git a/pipelines/lightweight-python-package/pipeline.py b/pipelines/lightweight-python-package/pipeline.py index b631606..f1d89b2 100644 --- a/pipelines/lightweight-python-package/pipeline.py +++ b/pipelines/lightweight-python-package/pipeline.py @@ -1,3 +1,4 @@ +import os from typing import Dict, List from kfp import dsl @@ -9,8 +10,8 @@ @dsl.component(base_image=COMPONENTS_IMAGE) def read_data( - minio_train_data_path: str, - minio_test_data_path: str, + train_data_path: str, + test_data_path: str, train_df: Output[Dataset], test_df: Output[Dataset], ): @@ -18,8 +19,8 @@ def read_data( from mobile_price_classification import read_data as _read_data _read_data( - minio_train_data_path=minio_train_data_path, - minio_test_data_path=minio_test_data_path, + train_data_path=train_data_path, + test_data_path=test_data_path, train_output_path=train_df.path, test_output_path=test_df.path, ) @@ -155,8 +156,8 @@ def test_model( @dsl.pipeline def mobile_price_classification_pipeline( - minio_train_data_path: str, - minio_test_data_path: str, + train_data_path: str, + test_data_path: str, test_size: float = 0.5, C: List = [1, 0.1, 0.25, 0.5, 2, 0.75], kernel: List = ["linear", "rbf"], @@ -182,11 +183,11 @@ def mobile_price_classification_pipeline( # Step 1: Read the data read_data_task = read_data( - minio_train_data_path=minio_train_data_path, - minio_test_data_path=minio_test_data_path, + train_data_path=train_data_path, + test_data_path=test_data_path, ) - # Use the cluster internal s3 endpoint - read_data_task.set_env_variable('AWS_ENDPOINT_URL',"http://minio.minio") + # Use the cluster internal object storage endpoint + read_data_task.set_env_variable('AWS_ENDPOINT_URL', f'http://{os.environ["S3_ENDPOINT"]}') # Use Kubernetes secrets to provide AWS credentials to the read_data component kubernetes.use_secret_as_env( read_data_task, diff --git a/pipelines/lightweight-python-package/src/mobile_price_classification/read_data.py b/pipelines/lightweight-python-package/src/mobile_price_classification/read_data.py index 6e3c7f4..af310d6 100644 --- a/pipelines/lightweight-python-package/src/mobile_price_classification/read_data.py +++ b/pipelines/lightweight-python-package/src/mobile_price_classification/read_data.py @@ -2,19 +2,19 @@ def read_data( - minio_train_data_path: str, - minio_test_data_path: str, + train_data_path: str, + test_data_path: str, train_output_path: str, test_output_path: str, ): """ - Read training and test CSV data from MinIO/S3 (or local paths) and save them as Parquet. + Read training and test CSV data from object storage (or local paths) and save them as Parquet. - When using S3/MinIO, access configuration is taken from environment variables such as + When using S3-compatible object storage, access configuration is taken from environment variables such as AWS_ENDPOINT_URL, AWS_ACCESS_KEY_ID, and AWS_SECRET_ACCESS_KEY. """ - df_train = pd.read_csv(minio_train_data_path) - df_test = pd.read_csv(minio_test_data_path) + df_train = pd.read_csv(train_data_path) + df_test = pd.read_csv(test_data_path) df_train.to_parquet(train_output_path) df_test.to_parquet(test_output_path) diff --git a/pipelines/lightweight-python-package/submit-cluster.py b/pipelines/lightweight-python-package/submit-cluster.py index 426bc47..d9499ae 100644 --- a/pipelines/lightweight-python-package/submit-cluster.py +++ b/pipelines/lightweight-python-package/submit-cluster.py @@ -16,8 +16,8 @@ mobile_price_classification_pipeline, enable_caching=True, arguments={ - "minio_train_data_path": f"s3://{s3_dataset_path}/train.csv", - "minio_test_data_path": f"s3://{s3_dataset_path}/test.csv", + "train_data_path": f"s3://{s3_dataset_path}/train.csv", + "test_data_path": f"s3://{s3_dataset_path}/test.csv", "test_size": 0.2, "C": [1, 0.1, 0.25, 0.5, 2, 0.75], "kernel": ["linear", "rbf"], diff --git a/rstudio/iris-classification/README.md b/rstudio/iris-classification/README.md index 3b27227..3ad7b0e 100644 --- a/rstudio/iris-classification/README.md +++ b/rstudio/iris-classification/README.md @@ -11,7 +11,7 @@ The included `iris-classification.r` script performs the following steps: 1. **Load and Prepare Data:** - Loads the built-in Iris dataset and converts it to a tibble for tidy processing. -2. **Upload to S3/MinIO:** +2. **Upload to Object Storage:** - Exports the dataset as CSV and uploads it to the default prokube bucket for the current namespace (`-data`) for persistence and reuse. 3. **Exploratory Data Analysis (EDA):** diff --git a/rstudio/iris-classification/iris-classification.r b/rstudio/iris-classification/iris-classification.r index 0838802..a0e66b4 100644 --- a/rstudio/iris-classification/iris-classification.r +++ b/rstudio/iris-classification/iris-classification.r @@ -21,7 +21,7 @@ glimpse(iris_df) summary(iris_df) # ============================================================================== -# 2. Upload to S3/MinIO +# 2. Upload to Object Storage # ============================================================================== # Uses the default prokube bucket for this namespace: -data @@ -34,7 +34,7 @@ endpoint <- Sys.getenv("AWS_S3_ENDPOINT") csv_path <- tempfile(fileext = ".csv") write_csv(iris_df, csv_path) -# S3 config for MinIO (HTTP, path-style, no region). +# S3-compatible object storage config (HTTP, path-style, no region). s3_cfg <- list(use_https = FALSE, region = "", use_path_style = TRUE) if (!bucket_exists(bucket_name, base_url = endpoint, diff --git a/rstudio/mobile-price-classification/README.md b/rstudio/mobile-price-classification/README.md index 1b2c436..7d1890d 100644 --- a/rstudio/mobile-price-classification/README.md +++ b/rstudio/mobile-price-classification/README.md @@ -12,7 +12,7 @@ The included `mobile-price-classification.r` script performs the following steps 2. **Unzip Dataset:** - Extracts the downloaded zip file into a local temporary directory. -3. **Upload to S3/MinIO:** +3. **Upload to Object Storage:** - Optionally uploads the extracted CSV files (`train.csv`, `test.csv`) to the default prokube bucket for the current namespace (`-data`) for reuse. - Requires the S3 endpoint configuration already set up in the RStudio environment. diff --git a/rstudio/mobile-price-classification/mobile-price-classification.r b/rstudio/mobile-price-classification/mobile-price-classification.r index 7cb174a..59699fa 100644 --- a/rstudio/mobile-price-classification/mobile-price-classification.r +++ b/rstudio/mobile-price-classification/mobile-price-classification.r @@ -26,7 +26,7 @@ unzip(zip_path, exdir = unzip_dir) train_path <- file.path(unzip_dir, "train.csv") test_path <- file.path(unzip_dir, "test.csv") -# Optional: push the CSVs to S3/MinIO so you can reuse them from there later. +# Optional: push the CSVs to object storage so you can reuse them from there later. # Safe the s3 config. s3_cfg <- list(use_https = FALSE, region = "", use_path_style = TRUE) diff --git a/serving/minimal-s3-model/README.md b/serving/minimal-s3-model/README.md index 6424eea..2ab1714 100644 --- a/serving/minimal-s3-model/README.md +++ b/serving/minimal-s3-model/README.md @@ -1,7 +1,7 @@ -# Minimal KServe + MinIO Model Serving Example +# Minimal KServe + Object Storage Model Serving Example -This example demonstrates end-to-end model serving on prokube from a MinIO bucket: training a simple sklearn model, -uploading it to MinIO, deploying it as a KServe InferenceService, and testing it with a prediction request. +This example demonstrates end-to-end model serving on prokube from an object storage bucket: training a simple sklearn model, +uploading it to object storage, deploying it as a KServe InferenceService, and testing it with a prediction request. **Requires prokube platform v1.7.0+.** To check your version, run the following in your notebook: @@ -12,6 +12,6 @@ uploading it to MinIO, deploying it as a KServe InferenceService, and testing it ## What the notebook does 1. Trains a small SVM classifier on the Iris dataset and serializes it with `joblib`. -2. Uploads the model to your MinIO bucket using `s3fs`. +2. Uploads the model to your bucket using `s3fs`. 3. Generates and deploys a KServe `InferenceService` manifest via `kubectl`. 4. Tests the deployed service using both the internal cluster URL and the external URL. diff --git a/serving/minimal-s3-model/minimal-s3-model.ipynb b/serving/minimal-s3-model/minimal-s3-model.ipynb index c47e6c5..6d1b063 100644 --- a/serving/minimal-s3-model/minimal-s3-model.ipynb +++ b/serving/minimal-s3-model/minimal-s3-model.ipynb @@ -58,7 +58,7 @@ "id": "e382f93b", "metadata": {}, "source": [ - "# Push the created model to s3 storage (MinIO)" + "# Push the created model to object storage" ] }, { @@ -129,7 +129,7 @@ "id": "c71cbc70-01cf-4ff5-a489-ca17dc2ca8b4", "metadata": {}, "source": [ - "Upload the model to MinIO:" + "Upload the model to object storage:" ] }, { @@ -169,7 +169,7 @@ "metadata": {}, "outputs": [], "source": [ - "inference_service_name = \"kserve-minio-test\"\n", + "inference_service_name = \"kserve-object-storage-test\"\n", "inference_service_manifest= \\\n", "f\"\"\"\n", "apiVersion: serving.kserve.io/v1beta1\n", From db3cadb6309f049d31dfe15fdf954a31d242924a Mon Sep 17 00:00:00 2001 From: Christian Geier Date: Tue, 26 May 2026 18:11:13 +0200 Subject: [PATCH 3/3] Keep default object storage CLI alias --- .../lightweight-components/mobile-price-classifications.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/lightweight-components/mobile-price-classifications.ipynb b/pipelines/lightweight-components/mobile-price-classifications.ipynb index 49d9612..f05881c 100644 --- a/pipelines/lightweight-components/mobile-price-classifications.ipynb +++ b/pipelines/lightweight-components/mobile-price-classifications.ipynb @@ -666,7 +666,7 @@ "outputs": [], "source": [ "# use the mc tool to download model artifact\n", - "!mc cp /mlpipeline/v2/artifacts/mmobile-price-classification-pipeline/90e172f1-5143-475d-b02c-92fbc34338cb/train-model/trained_model ./trained_model" + "!mc cp minio/mlpipeline/v2/artifacts/mmobile-price-classification-pipeline/90e172f1-5143-475d-b02c-92fbc34338cb/train-model/trained_model ./trained_model" ] }, {