diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index a56ebe474..f0ebc9771 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -48,6 +48,7 @@ jobs:
             | awk -F'/' '
                 /^functions\/src\// {print $1"/"$2"/"$3}
                 /^modules\/src\//  {print $1"/"$2"/"$3}
+                /^steps\/src\//  {print $1"/"$2"/"$3}
               ' \
             | sort -u
           )
diff --git a/.github/workflows/test-all.yaml b/.github/workflows/test-all.yaml
index d8eb6c6ed..14a914ca5 100644
--- a/.github/workflows/test-all.yaml
+++ b/.github/workflows/test-all.yaml
@@ -40,6 +40,7 @@ jobs:
             | awk -F'/' '
                 /^functions\/src\// {print $1"/"$2"/"$3}
                 /^modules\/src\//  {print $1"/"$2"/"$3}
+                /^steps\/src\//  {print $1"/"$2"/"$3}
               ' \
             | sort -u
           )
@@ -134,7 +135,7 @@ jobs:
       - name: Regenerate README tables
         env:
           CHANNEL: ${{ steps.branch.outputs.branch }}
-        run: python -m cli.cli update-readme -c $CHANNEL --asset functions --asset modules
+        run: python -m cli.cli update-readme -c $CHANNEL --asset functions --asset modules --asset steps
       - name: Commit & push (if changed)
         env:
           USERNAME: ${{ secrets.USERNAME }}
@@ -146,7 +147,7 @@ jobs:
           fi
           git config --local user.name $USERNAME
           git config --local user.email $USEREMAIL
-          git add functions/README.md modules/README.md || true
+          git add functions/README.md modules/README.md steps/README.md || true
           git commit -m "chore(readme): auto-update asset tables [skip ci]"
           git push
 
@@ -187,6 +188,7 @@ jobs:
           cd ..
           python -m cli.cli build-marketplace -s ./functions/src -sn functions -m marketplace -c $CHANNEL -v -f
           python -m cli.cli build-marketplace -s ./modules/src -sn modules -m marketplace -c $CHANNEL -v -f
+          python -m cli.cli build-marketplace -s ./steps/src -sn steps -m marketplace -c $CHANNEL -v -f
       ## Uncomment the following lines if you want to upload the built marketplace as an artifact
 #      - name: Upload built marketplace as artifact
 #        uses: actions/upload-artifact@v4
@@ -247,4 +249,4 @@ jobs:
           echo "Pushing [$BRANCH_NAME] to remote [$REMOTE]"
           git push -f $REMOTE $BRANCH_NAME
           echo "Submiting pull request..."
-          gh pr create --title "Marketplace update from $BRANCH_NAME" --body "github-workflow" --base $BASE_BRANCH --head $BRANCH_NAME --repo $BASE_REPO/$REPO_PATH
+          gh pr create --title "Marketplace update from $BRANCH_NAME" --body "github-workflow" --base $BASE_BRANCH --head $BRANCH_NAME --repo $BASE_REPO/$REPO_PATH
\ No newline at end of file
diff --git a/cli/common/generate_item_yaml.py b/cli/common/generate_item_yaml.py
index 542e98cd8..e97089ad3 100644
--- a/cli/common/generate_item_yaml.py
+++ b/cli/common/generate_item_yaml.py
@@ -7,6 +7,7 @@
 TEMPLATES = {
     "function": "cli/utils/function_item_template.yaml.j2",
     "module": "cli/utils/module_item_template.yaml.j2",
+    "step": "cli/utils/step_item_template.yaml.j2",
 }
 
 
diff --git a/cli/common/update_readme.py b/cli/common/update_readme.py
index 89b6aa094..f6e582bb6 100644
--- a/cli/common/update_readme.py
+++ b/cli/common/update_readme.py
@@ -22,7 +22,11 @@
 
 MARKER_START = "<!-- AUTOGEN:START"
 MARKER_END = "<!-- AUTOGEN:END -->"
-COLUMNS = ("Name", "Description", "Kind", "Categories")
+ASSET_COLUMNS = {
+    "functions": ("Name", "Description", "Kind", "Categories"),
+    "modules": ("Name", "Description", "Kind", "Categories"),
+    "steps": ("Name", "Description", "Class Name", "Categories"),
+}
 
 @click.command("update-readme")
 @click.option("-c", "--channel", default="master", help="Name of build channel")
@@ -31,7 +35,7 @@
     multiple=True,
     required=True,
     help="Asset types to process (e.g: functions). "
-         "Pass multiple: --assets functions --assets modules",
+         "Pass multiple: --asset functions --asset modules",
 )
 @click.option("--check", is_flag=True,
               help="Do not write; exit non‑zero if README(s) would change.")
@@ -45,21 +49,22 @@ def update_readme(channel: str, asset: Iterable[str],
     touched: list[str] = []
 
     for t in asset_list:
+        columns = ASSET_COLUMNS.get(t, ("Name", "Description", "Kind", "Categories"))
         if check:
             # simulate by reading/writing to a temp string, but easiest is: run update and revert if not checking
             # Instead: compute would-change by comparing strings without writing:
             root = Path(".").resolve()
             asset_dir = root / t
             readme = asset_dir / "README.md"
-            rows = _rows_for_asset_type(channel, asset_dir)
-            table_md = _build_table_md(rows)
+            rows = _rows_for_asset_type(channel, asset_dir, columns)
+            table_md = _build_table_md(rows, columns)
             old = readme.read_text() if readme.exists() else f"# {t.title()}\n\n"
             new = _replace_block(old, table_md)
             if new != old:
                 changed_any = True
                 touched.append(str(readme))
         else:
-            if _update_one(channel, t):
+            if _update_one(channel, t, columns):
                 changed_any = True
                 touched.append(str((Path(t) / "README.md").as_posix()))
 
@@ -79,13 +84,13 @@ def update_readme(channel: str, asset: Iterable[str],
             click.echo("No README changes.")
 
 
-def _rows_for_asset_type(channel: str, asset_dir: Path) -> List[Tuple[str, str, str, str]]:
+def _rows_for_asset_type(channel: str, asset_dir: Path, columns) -> list:
     """Scan <asset>/src/*/item.yaml and return table rows."""
     src = asset_dir / "src"
     if not src.exists():
         return []
 
-    rows: List[Tuple[str, str, str, str]] = []
+    rows = []
     for item_yaml in sorted(src.glob("*/item.yaml")):
         asset_name = item_yaml.parent.name
         try:
@@ -95,24 +100,39 @@ def _rows_for_asset_type(channel: str, asset_dir: Path) -> List[Tuple[str, str,
 
         desc = (data.get("description") or "").strip()
         kind = (data.get("spec", {}).get("kind", "")).strip()
+        class_name = (data.get("className", "")).strip()
         cats = data.get("categories") or []
         cats_str = ", ".join(c.strip() for c in cats) if isinstance(cats, list) else str(cats).strip()
         # Link the name to its source directory
         # Construct the relative path from the repo root for the asset
         rel_path = asset_dir.relative_to(Path(".").resolve())
         link = f"[{asset_name}](https://github.com/mlrun/functions/tree/{channel}/{rel_path}/src/{asset_name})"
-        rows.append((link, desc, kind, cats_str))
+        row = []
+        for col in columns:
+            if col == "Name":
+                row.append(link)
+            elif col == "Description":
+                row.append(desc)
+            elif col == "Kind":
+                row.append(kind)
+            elif col == "Class Name":
+                row.append(class_name)
+            elif col == "Categories":
+                row.append(cats_str)
+            else:
+                row.append("")
+        rows.append(tuple(row))
 
     rows.sort(key=lambda r: r[0].lower())
     return rows
 
 
-def _build_table_md(rows: List[Tuple[str, str, str, str]]) -> str:
+def _build_table_md(rows, columns) -> str:
     if not rows:
         return "_No items found_"
     lines = [
-        "| " + " | ".join(COLUMNS) + " |",
-        "| " + " | ".join("---" for _ in COLUMNS) + " |",
+        "| " + " | ".join(columns) + " |",
+        "| " + " | ".join("---" for _ in columns) + " |",
     ]
     for r in rows:
         lines.append("| " + " | ".join((cell or "").replace("\n", " ").strip() for cell in r) + " |")
@@ -143,14 +163,14 @@ def _replace_block(readme_text: str, new_block: str) -> str:
     return readme_text[:start_close] + "\n" + new_block + "\n" + readme_text[ei:]
 
 
-def _update_one(channel: str, asset_type: str) -> bool:
+def _update_one(channel: str, asset_type: str, columns) -> bool:
     """Generate/replace the table in <asset_type>/README.md. Return True if changed."""
     root = Path(".").resolve()
     asset_dir = root / asset_type
     readme = asset_dir / "README.md"
 
-    rows = _rows_for_asset_type(channel, asset_dir)
-    table_md = _build_table_md(rows)
+    rows = _rows_for_asset_type(channel, asset_dir, columns)
+    table_md = _build_table_md(rows, columns)
     old = readme.read_text() if readme.exists() else f"# {asset_type.title()}\n\n"
     new = _replace_block(old, table_md)
 
@@ -159,5 +179,3 @@ def _update_one(channel: str, asset_type: str) -> bool:
         readme.write_text(new)
         return True
     return False
-
-
diff --git a/cli/utils/step_item_template.yaml.j2 b/cli/utils/step_item_template.yaml.j2
new file mode 100644
index 000000000..937b4fbfe
--- /dev/null
+++ b/cli/utils/step_item_template.yaml.j2
@@ -0,0 +1,17 @@
+apiVersion: v1
+categories: []                                      {# List of category names #}
+description: ''                                     {# Short description #}
+example: {{ example|default('') }}                  {# Path to example notebook #}
+generationDate: {{ generationDate|default('') }}    {# Automatically generated ISO8086 datetime #}
+hidden: false                                       {# Hide item from the UI #}
+labels:
+  author: Iguazio
+mlrunVersion: ''                                    {# Item’s MLRun version requirement, should follow python’s versioning schema #}
+name: {{ name|default('') }}                        {# Step name #}
+className: {{ className|default('') }}              {# Step class name #}
+defaultHandler: {{ defaultHandler|default('') }}    {# Default handler name #}
+spec:
+  filename: {{ filename|default('') }}              {# Implementation file #}
+  image: mlrun/mlrun                                {# Base image name #}
+  requirements: []                                  {# List of Pythonic library requirements #}
+version: 1.0.0                                      {# Step version, should follow standard semantic versioning schema #}
\ No newline at end of file
diff --git a/functions/README.md b/functions/README.md
index e6b45ddfb..3618833a5 100644
--- a/functions/README.md
+++ b/functions/README.md
@@ -9,40 +9,40 @@ it is expected that contributors follow certain guidelines/protocols (please chi
 <!-- AUTOGEN:START (do not edit below) -->
 | Name | Description | Kind | Categories |
 | --- | --- | --- | --- |
-| [aggregate](https://github.com/mlrun/functions/tree/master/functions/src/aggregate) | Rolling aggregation over Metrics and Lables according to specifications | job | data-preparation |
-| [arc_to_parquet](https://github.com/mlrun/functions/tree/master/functions/src/arc_to_parquet) | retrieve remote archive, open and save as parquet | job | utils |
-| [auto_trainer](https://github.com/mlrun/functions/tree/master/functions/src/auto_trainer) | Automatic train, evaluate and predict functions for the ML frameworks - Scikit-Learn, XGBoost and LightGBM. | job | machine-learning, model-training |
-| [azureml_serving](https://github.com/mlrun/functions/tree/master/functions/src/azureml_serving) | AzureML serving function | serving | machine-learning, model-serving |
-| [azureml_utils](https://github.com/mlrun/functions/tree/master/functions/src/azureml_utils) | Azure AutoML integration in MLRun, including utils functions for training models on Azure AutoML platfrom. | job | model-serving, utils |
-| [batch_inference](https://github.com/mlrun/functions/tree/master/functions/src/batch_inference) | Batch inference (also knows as prediction) for the common ML frameworks (SciKit-Learn, XGBoost and LightGBM) while performing data drift analysis. | job | model-serving |
-| [batch_inference_v2](https://github.com/mlrun/functions/tree/master/functions/src/batch_inference_v2) | Batch inference (also knows as prediction) for the common ML frameworks (SciKit-Learn, XGBoost and LightGBM) while performing data drift analysis. | job | model-serving |
-| [describe](https://github.com/mlrun/functions/tree/master/functions/src/describe) | describe and visualizes dataset stats | job | data-analysis |
-| [describe_dask](https://github.com/mlrun/functions/tree/master/functions/src/describe_dask) | describe and visualizes dataset stats | job | data-analysis |
-| [describe_spark](https://github.com/mlrun/functions/tree/master/functions/src/describe_spark) |  | job | data-analysis |
-| [feature_selection](https://github.com/mlrun/functions/tree/master/functions/src/feature_selection) | Select features through multiple Statistical and Model filters | job | data-preparation, machine-learning |
-| [gen_class_data](https://github.com/mlrun/functions/tree/master/functions/src/gen_class_data) | Create a binary classification sample dataset and save. | job | data-generation |
-| [github_utils](https://github.com/mlrun/functions/tree/master/functions/src/github_utils) | add comments to github pull request | job | utils |
-| [hugging_face_serving](https://github.com/mlrun/functions/tree/master/functions/src/hugging_face_serving) | Generic Hugging Face model server. | serving | genai, model-serving |
-| [load_dataset](https://github.com/mlrun/functions/tree/master/functions/src/load_dataset) | load a toy dataset from scikit-learn | job | data-preparation |
-| [mlflow_utils](https://github.com/mlrun/functions/tree/master/functions/src/mlflow_utils) | Mlflow model server, and additional utils. | serving | model-serving, utils |
-| [model_server](https://github.com/mlrun/functions/tree/master/functions/src/model_server) | generic sklearn model server | nuclio:serving | model-serving, machine-learning |
-| [model_server_tester](https://github.com/mlrun/functions/tree/master/functions/src/model_server_tester) | test model servers | job | monitoring, model-serving |
-| [noise_reduction](https://github.com/mlrun/functions/tree/master/functions/src/noise_reduction) | Reduce noise from audio files | job | data-preparation, audio |
-| [onnx_utils](https://github.com/mlrun/functions/tree/master/functions/src/onnx_utils) | ONNX intigration in MLRun, some utils functions for the ONNX framework, optimizing and converting models from different framework to ONNX using MLRun. | job | utils, deep-learning |
-| [open_archive](https://github.com/mlrun/functions/tree/master/functions/src/open_archive) | Open a file/object archive into a target directory | job | utils |
-| [pii_recognizer](https://github.com/mlrun/functions/tree/master/functions/src/pii_recognizer) | This function is used to recognize PII in a directory of text files | job | data-preparation, NLP |
-| [pyannote_audio](https://github.com/mlrun/functions/tree/master/functions/src/pyannote_audio) | pyannote's speech diarization of audio files | job | deep-learning, audio |
-| [question_answering](https://github.com/mlrun/functions/tree/master/functions/src/question_answering) | GenAI approach of question answering on a given data | job | genai |
-| [send_email](https://github.com/mlrun/functions/tree/master/functions/src/send_email) | Send Email messages through SMTP server | job | utils |
-| [silero_vad](https://github.com/mlrun/functions/tree/master/functions/src/silero_vad) | Silero VAD (Voice Activity Detection) functions. | job | deep-learning, audio |
-| [sklearn_classifier](https://github.com/mlrun/functions/tree/master/functions/src/sklearn_classifier) | train any classifier using scikit-learn's API | job | machine-learning, model-training |
-| [sklearn_classifier_dask](https://github.com/mlrun/functions/tree/master/functions/src/sklearn_classifier_dask) | train any classifier using scikit-learn's API over Dask | job | machine-learning, model-training |
-| [structured_data_generator](https://github.com/mlrun/functions/tree/master/functions/src/structured_data_generator) | GenAI approach of generating structured data according to a given schema | job | data-generation, genai |
-| [test_classifier](https://github.com/mlrun/functions/tree/master/functions/src/test_classifier) | test a classifier using held-out or new data | job | machine-learning, model-testing |
-| [text_to_audio_generator](https://github.com/mlrun/functions/tree/master/functions/src/text_to_audio_generator) | Generate audio file from text using different speakers | job | data-generation, audio |
-| [tf2_serving](https://github.com/mlrun/functions/tree/master/functions/src/tf2_serving) | tf2 image classification server | nuclio:serving | model-serving, machine-learning |
-| [transcribe](https://github.com/mlrun/functions/tree/master/functions/src/transcribe) | Transcribe audio files into text files | job | audio, genai |
-| [translate](https://github.com/mlrun/functions/tree/master/functions/src/translate) | Translate text files from one language to another | job | genai, NLP |
-| [v2_model_server](https://github.com/mlrun/functions/tree/master/functions/src/v2_model_server) | generic sklearn model server | serving | model-serving, machine-learning |
-| [v2_model_tester](https://github.com/mlrun/functions/tree/master/functions/src/v2_model_tester) | test v2 model servers | job | model-testing, machine-learning |
+| [aggregate](https://github.com/mlrun/functions/tree/development/functions/src/aggregate) | Rolling aggregation over Metrics and Lables according to specifications | job | data-preparation |
+| [arc_to_parquet](https://github.com/mlrun/functions/tree/development/functions/src/arc_to_parquet) | retrieve remote archive, open and save as parquet | job | utils |
+| [auto_trainer](https://github.com/mlrun/functions/tree/development/functions/src/auto_trainer) | Automatic train, evaluate and predict functions for the ML frameworks - Scikit-Learn, XGBoost and LightGBM. | job | machine-learning, model-training |
+| [azureml_serving](https://github.com/mlrun/functions/tree/development/functions/src/azureml_serving) | AzureML serving function | serving | machine-learning, model-serving |
+| [azureml_utils](https://github.com/mlrun/functions/tree/development/functions/src/azureml_utils) | Azure AutoML integration in MLRun, including utils functions for training models on Azure AutoML platfrom. | job | model-serving, utils |
+| [batch_inference](https://github.com/mlrun/functions/tree/development/functions/src/batch_inference) | Batch inference (also knows as prediction) for the common ML frameworks (SciKit-Learn, XGBoost and LightGBM) while performing data drift analysis. | job | model-serving |
+| [batch_inference_v2](https://github.com/mlrun/functions/tree/development/functions/src/batch_inference_v2) | Batch inference (also knows as prediction) for the common ML frameworks (SciKit-Learn, XGBoost and LightGBM) while performing data drift analysis. | job | model-serving |
+| [describe](https://github.com/mlrun/functions/tree/development/functions/src/describe) | describe and visualizes dataset stats | job | data-analysis |
+| [describe_dask](https://github.com/mlrun/functions/tree/development/functions/src/describe_dask) | describe and visualizes dataset stats | job | data-analysis |
+| [describe_spark](https://github.com/mlrun/functions/tree/development/functions/src/describe_spark) |  | job | data-analysis |
+| [feature_selection](https://github.com/mlrun/functions/tree/development/functions/src/feature_selection) | Select features through multiple Statistical and Model filters | job | data-preparation, machine-learning |
+| [gen_class_data](https://github.com/mlrun/functions/tree/development/functions/src/gen_class_data) | Create a binary classification sample dataset and save. | job | data-generation |
+| [github_utils](https://github.com/mlrun/functions/tree/development/functions/src/github_utils) | add comments to github pull request | job | utils |
+| [hugging_face_serving](https://github.com/mlrun/functions/tree/development/functions/src/hugging_face_serving) | Generic Hugging Face model server. | serving | genai, model-serving |
+| [load_dataset](https://github.com/mlrun/functions/tree/development/functions/src/load_dataset) | load a toy dataset from scikit-learn | job | data-preparation |
+| [mlflow_utils](https://github.com/mlrun/functions/tree/development/functions/src/mlflow_utils) | Mlflow model server, and additional utils. | serving | model-serving, utils |
+| [model_server](https://github.com/mlrun/functions/tree/development/functions/src/model_server) | generic sklearn model server | nuclio:serving | model-serving, machine-learning |
+| [model_server_tester](https://github.com/mlrun/functions/tree/development/functions/src/model_server_tester) | test model servers | job | monitoring, model-serving |
+| [noise_reduction](https://github.com/mlrun/functions/tree/development/functions/src/noise_reduction) | Reduce noise from audio files | job | data-preparation, audio |
+| [onnx_utils](https://github.com/mlrun/functions/tree/development/functions/src/onnx_utils) | ONNX intigration in MLRun, some utils functions for the ONNX framework, optimizing and converting models from different framework to ONNX using MLRun. | job | utils, deep-learning |
+| [open_archive](https://github.com/mlrun/functions/tree/development/functions/src/open_archive) | Open a file/object archive into a target directory | job | utils |
+| [pii_recognizer](https://github.com/mlrun/functions/tree/development/functions/src/pii_recognizer) | This function is used to recognize PII in a directory of text files | job | data-preparation, NLP |
+| [pyannote_audio](https://github.com/mlrun/functions/tree/development/functions/src/pyannote_audio) | pyannote's speech diarization of audio files | job | deep-learning, audio |
+| [question_answering](https://github.com/mlrun/functions/tree/development/functions/src/question_answering) | GenAI approach of question answering on a given data | job | genai |
+| [send_email](https://github.com/mlrun/functions/tree/development/functions/src/send_email) | Send Email messages through SMTP server | job | utils |
+| [silero_vad](https://github.com/mlrun/functions/tree/development/functions/src/silero_vad) | Silero VAD (Voice Activity Detection) functions. | job | deep-learning, audio |
+| [sklearn_classifier](https://github.com/mlrun/functions/tree/development/functions/src/sklearn_classifier) | train any classifier using scikit-learn's API | job | machine-learning, model-training |
+| [sklearn_classifier_dask](https://github.com/mlrun/functions/tree/development/functions/src/sklearn_classifier_dask) | train any classifier using scikit-learn's API over Dask | job | machine-learning, model-training |
+| [structured_data_generator](https://github.com/mlrun/functions/tree/development/functions/src/structured_data_generator) | GenAI approach of generating structured data according to a given schema | job | data-generation, genai |
+| [test_classifier](https://github.com/mlrun/functions/tree/development/functions/src/test_classifier) | test a classifier using held-out or new data | job | machine-learning, model-testing |
+| [text_to_audio_generator](https://github.com/mlrun/functions/tree/development/functions/src/text_to_audio_generator) | Generate audio file from text using different speakers | job | data-generation, audio |
+| [tf2_serving](https://github.com/mlrun/functions/tree/development/functions/src/tf2_serving) | tf2 image classification server | nuclio:serving | model-serving, machine-learning |
+| [transcribe](https://github.com/mlrun/functions/tree/development/functions/src/transcribe) | Transcribe audio files into text files | job | audio, genai |
+| [translate](https://github.com/mlrun/functions/tree/development/functions/src/translate) | Translate text files from one language to another | job | genai, NLP |
+| [v2_model_server](https://github.com/mlrun/functions/tree/development/functions/src/v2_model_server) | generic sklearn model server | serving | model-serving, machine-learning |
+| [v2_model_tester](https://github.com/mlrun/functions/tree/development/functions/src/v2_model_tester) | test v2 model servers | job | model-testing, machine-learning |
 <!-- AUTOGEN:END -->
diff --git a/modules/README.md b/modules/README.md
index 7c69401c1..aaad9863e 100644
--- a/modules/README.md
+++ b/modules/README.md
@@ -6,9 +6,10 @@
 <!-- AUTOGEN:START (do not edit below) -->
 | Name | Description | Kind | Categories |
 | --- | --- | --- | --- |
-| [agent_deployer](https://github.com/mlrun/functions/tree/master/modules/src/agent_deployer) | Helper for serving function deploy of an AI agents using MLRun | monitoring_application | model-serving |
-| [count_events](https://github.com/mlrun/functions/tree/master/modules/src/count_events) | Count events in each time window | monitoring_application | model-serving |
-| [evidently_iris](https://github.com/mlrun/functions/tree/master/modules/src/evidently_iris) | Demonstrates Evidently integration in MLRun for data quality and drift monitoring using the Iris dataset | monitoring_application | model-serving, structured-ML |
-| [histogram_data_drift](https://github.com/mlrun/functions/tree/master/modules/src/histogram_data_drift) | Model-monitoring application for detecting and visualizing data drift | monitoring_application | model-serving, structured-ML |
-| [openai_proxy_app](https://github.com/mlrun/functions/tree/master/modules/src/openai_proxy_app) | OpenAI application runtime based on fastapi | generic | genai |
+| [agent_deployer](https://github.com/mlrun/functions/tree/development/modules/src/agent_deployer) | Helper for serving function deploy of an AI agents using MLRun | monitoring_application | model-serving |
+| [count_events](https://github.com/mlrun/functions/tree/development/modules/src/count_events) | Count events in each time window | monitoring_application | model-serving |
+| [evidently_iris](https://github.com/mlrun/functions/tree/development/modules/src/evidently_iris) | Demonstrates Evidently integration in MLRun for data quality and drift monitoring using the Iris dataset | monitoring_application | model-serving, structured-ML |
+| [histogram_data_drift](https://github.com/mlrun/functions/tree/development/modules/src/histogram_data_drift) | Model-monitoring application for detecting and visualizing data drift | monitoring_application | model-serving, structured-ML |
+| [openai_proxy_app](https://github.com/mlrun/functions/tree/development/modules/src/openai_proxy_app) | OpenAI application runtime based on fastapi | generic | genai |
+| [vllm_module](https://github.com/mlrun/functions/tree/development/modules/src/vllm_module) | Deploys a vLLM OpenAI-compatible LLM server as an MLRun application runtime, with configurable GPU usage, node selection, tensor parallelism, and runtime flags. | generic | genai |
 <!-- AUTOGEN:END -->
diff --git a/modules/src/agent_deployer/agent_deployer.ipynb b/modules/src/agent_deployer/agent_deployer.ipynb
index 944dba116..98531ecd1 100644
--- a/modules/src/agent_deployer/agent_deployer.ipynb
+++ b/modules/src/agent_deployer/agent_deployer.ipynb
@@ -10,18 +10,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
    "id": "be42e7c5-b2af-476f-8041-c17be56edb52",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "> 2025-12-03 07:17:36,530 [info] Project loaded successfully: {\"project_name\":\"langchain-example-10\"}\n"
-     ]
-    }
-   ],
    "source": [
     "%config Completer.use_jedi = False\n",
     "\n",
@@ -31,7 +21,9 @@
     "image = \"mlrun/mlrun\"\n",
     "project_name = \"langchain-example\"\n",
     "project = get_or_create_project(project_name, context=\"./\", allow_cross_project=True)"
-   ]
+   ],
+   "outputs": [],
+   "execution_count": null
   },
   {
    "cell_type": "markdown",
@@ -43,17 +35,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
    "id": "a47d7789-2ea2-493e-8905-f53b978e2abd",
    "metadata": {},
-   "outputs": [],
    "source": [
     "# Create project secrets for project\n",
     "secrets = {\"OPENAI_API_KEY\": \"\", # add your OpenAI API key here\n",
     "          \"OPENAI_BASE_URL\": \"\" # add your OpenAI base url here if needed\n",
     "          }\n",
     "project.set_secrets(secrets=secrets, provider=\"kubernetes\")"
-   ]
+   ],
+   "outputs": [],
+   "execution_count": null
   },
   {
    "cell_type": "markdown",
@@ -65,10 +57,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "25cbd982-86de-43b5-91ef-24fc60b2d758",
    "metadata": {},
-   "outputs": [],
    "source": [
     "%%writefile langchain_model.py\n",
     "\n",
@@ -197,7 +187,9 @@
     "        result[\"total_cost_usd\"] = input_cost + output_cost\n",
     "        return result\n",
     "            "
-   ]
+   ],
+   "outputs": [],
+   "execution_count": null
   },
   {
    "cell_type": "markdown",
@@ -209,20 +201,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 81,
    "id": "691e9068-ec9c-40d6-9ac8-e6c3e605b44c",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "> 2025-12-03 10:55:46,194 [info] Project loaded successfully: {\"project_name\":\"langchain-example-10\"}\n",
-      "> 2025-12-03 10:55:46,463 [info] Model monitoring credentials were set successfully. Please keep in mind that if you already had model monitoring functions / model monitoring infra / tracked model server deployed on your project, you will need to redeploy them. For redeploying the model monitoring infra, first disable it using `project.disable_model_monitoring()` and then enable it using `project.enable_model_monitoring()`.\n",
-      "details: MLRunConflictError(\"The following model-montioring infrastructure functions are already deployed, aborting: ['model-monitoring-controller', 'model-monitoring-writer']\\nIf you want to redeploy the model-monitoring controller (maybe with different base-period), use update_model_monitoring_controller.If you want to redeploy all of model-monitoring infrastructure, call disable_model_monitoringbefore calling enable_model_monitoring again.\")\n"
-     ]
-    }
-   ],
    "source": [
     "module = mlrun.import_module(\"hub://agent_deployer\")\n",
     "\n",
@@ -237,11 +217,11 @@
     "            prompt_template= \"\"\"\n",
     "            Answer the following questions as best you can.\n",
     "            You have access to the following tools:\n",
-    "            {tools}\n",
+    "            {{tools}}\n",
     "            Use the following format:\n",
     "            Question: the input question you must answer\n",
     "            Thought: you should always think about what to do\n",
-    "            Action: the action to take, should be one of [{tool_names}]\n",
+    "            Action: the action to take, should be one of [{{tool_names}}]\n",
     "            Action Input: the input to the action\n",
     "            Observation: the result of the action\n",
     "            ... (this Thought/Action/Action Input/Observation can repeat N times)\n",
@@ -252,16 +232,18 @@
     "            Question: {input}\n",
     "            Thought:{agent_scratchpad}\n",
     "            \"\"\",\n",
-    ")"
-   ]
+    ")\n"
+   ],
+   "outputs": [],
+   "execution_count": null
   },
   {
    "cell_type": "code",
-   "execution_count": 82,
    "id": "0bb1c4d1-5d7c-4d1c-bf51-8f53b319e91f",
    "metadata": {},
+   "source": "func = agent.deploy_function(enable_tracking=True)",
    "outputs": [],
-   "source": "func = agent.deploy_function(enable_tracking=True)"
+   "execution_count": null
   },
   {
    "metadata": {},
@@ -272,10 +254,10 @@
   {
    "metadata": {},
    "cell_type": "code",
+   "source": "func.invoke(\"./\", {\"question\" : \"If a pizza costs $18.75 and I want to buy 3, what is the total cost?\"})",
+   "id": "ac5c3ba174d2cf8b",
    "outputs": [],
-   "execution_count": null,
-   "source": "func.invoke(\"./\", {\"question\" : \"If a pizza costs $18.75 and I want to buy 3, plus a 15% tip, what is the total cost?\"})",
-   "id": "ac5c3ba174d2cf8b"
+   "execution_count": null
   },
   {
    "metadata": {},
@@ -289,8 +271,6 @@
   {
    "metadata": {},
    "cell_type": "code",
-   "outputs": [],
-   "execution_count": null,
    "source": [
     "%%writefile monitoring_application.py\n",
     "\n",
@@ -405,7 +385,9 @@
     "            value=value,\n",
     "        )\n"
    ],
-   "id": "377487422f5ed289"
+   "id": "377487422f5ed289",
+   "outputs": [],
+   "execution_count": null
   },
   {
    "metadata": {},
@@ -416,8 +398,6 @@
   {
    "metadata": {},
    "cell_type": "code",
-   "outputs": [],
-   "execution_count": null,
    "source": [
     "llm_monitoring_app = project.set_model_monitoring_function(\n",
     "    func=\"monitoring_application.py\",\n",
@@ -428,7 +408,9 @@
     "\n",
     "project.deploy_function(llm_monitoring_app)"
    ],
-   "id": "9d6ad2a4a47a44bd"
+   "id": "9d6ad2a4a47a44bd",
+   "outputs": [],
+   "execution_count": null
   }
  ],
  "metadata": {
diff --git a/modules/src/vllm_module/item.yaml b/modules/src/vllm_module/item.yaml
new file mode 100644
index 000000000..ca684340f
--- /dev/null
+++ b/modules/src/vllm_module/item.yaml
@@ -0,0 +1,16 @@
+apiVersion: v1
+categories:
+- genai
+description: Deploys a vLLM OpenAI-compatible LLM server as an MLRun application runtime, with configurable GPU usage, node selection, tensor parallelism, and runtime flags.
+example: vllm_module.ipynb
+generationDate: 2025-12-17:12-25
+hidden: false
+labels:
+  author: Iguazio
+mlrunVersion: 1.10.0
+name: vllm_module
+spec:
+    filename: vllm_module.py
+    image: mlrun/mlrun
+    kind: generic
+version: 1.0.0
diff --git a/modules/src/vllm_module/test_vllm_module.py b/modules/src/vllm_module/test_vllm_module.py
new file mode 100644
index 000000000..3a5f422ae
--- /dev/null
+++ b/modules/src/vllm_module/test_vllm_module.py
@@ -0,0 +1,35 @@
+# Copyright 2025 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from vllm_module import VLLMModule
+import mlrun
+
+
+class TestVllmModule:
+    """Test suite for VLLMModule class."""
+
+    def setup_method(self):
+        project = mlrun.new_project("vllm", save=False)
+
+        # if your VLLMModule requires node_selector as keyword-only, keep it here
+        self.TestVllmModule = VLLMModule(
+            project,
+            node_selector={"alpha.eksctl.io/nodegroup-name": "added-gpu"},
+        )
+
+    def test_vllm_module(self):
+        assert (
+            type(self.TestVllmModule.vllm_app) == mlrun.runtimes.nuclio.application.application.ApplicationRuntime
+        )
diff --git a/modules/src/vllm_module/vllm_module.ipynb b/modules/src/vllm_module/vllm_module.ipynb
new file mode 100644
index 000000000..2452dbd22
--- /dev/null
+++ b/modules/src/vllm_module/vllm_module.ipynb
@@ -0,0 +1,234 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "7d551647-dfc2-47da-bc8a-3792af622073",
+   "metadata": {},
+   "source": [
+    "# vLLM Module with MLRun\n",
+    "\n",
+    "This notebook shows how to configure and deploy a vLLM OpenAI compatible server as an MLRun application runtime, then showcases how to send a chat request to it to the vLLM server."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "7707b270-30cc-448a-a828-cb93aa28030d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import mlrun\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d5cff681-bfdf-4468-a1d1-2aeadb56065e",
+   "metadata": {},
+   "source": [
+    "## Prerequisite\n",
+    "* At lease one GPU is required for running this notebook."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d5c84798-289f-4b4f-8c1b-f4dd12a3bda5",
+   "metadata": {},
+   "source": [
+    "## What this notebook does\n",
+    "\n",
+    "In this notebook we will:\n",
+    "\n",
+    "- Create or load an **MLRun project**\n",
+    "- Import a custom **vLLM module** from the MLRun Hub\n",
+    "- Deploy a **vLLM OpenAI-compatible server** as an MLRun application runtime\n",
+    "- Configure deployment parameters such as model, GPU count, memory, node selector, port, and log level\n",
+    "- Invoke the deployed service using the `/v1/chat/completions` endpoint\n",
+    "- Parse the response and extract only the assistant’s generated text\n",
+    "\n",
+    "By the end of this notebook, you will have a working vLLM deployment that can be queried directly from a Jupyter notebook using OpenAI-style APIs.\n",
+    "\n",
+    "For more information about [vLLM documentation](https://docs.vllm.ai/en/latest/serving/openai_compatible_server/)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "879ca641-ee35-4682-9995-4eb319d89090",
+   "metadata": {},
+   "source": [
+    "## 1. Create an MLRun project\n",
+    "\n",
+    "In this section we create or load an MLRun project that will own the deployed vLLM application runtime."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6eac263a-17d1-4454-9e19-459dfbe2f231",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "project = mlrun.get_or_create_project(name=\"vllm-module\", context=\"\", user_project=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "da49d335-b704-4fb6-801f-4d07b64f9be6",
+   "metadata": {},
+   "source": [
+    "## 2. Import the vLLM module from the MLRun Hub\n",
+    "\n",
+    "In this section we import the vLLM module from the MLRun Hub so we can instantiate `VLLMModule` and deploy it as an application runtime."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e6d89dee-db58-4c0c-8009-b37020c9599a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vllm = mlrun.import_module(\"hub://vllm-module\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1202ddd5-0ce7-4769-be29-8fc264c1f80e",
+   "metadata": {},
+   "source": [
+    "## 3. Deploy the vLLM application runtime\n",
+    "\n",
+    "Configure the vLLM deployment parameters and deploy the application.\n",
+    "\n",
+    "The returned address is the service URL for the application runtime."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e433123a-e64b-4a7a-8c7f-8165bcdcc6d1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Initialize the vLLM app\n",
+    "vllm_module = vllm.VLLMModule(\n",
+    "    project=project,\n",
+    "    node_selector={\"alpha.eksctl.io/nodegroup-name\": \"added-gpu\"},\n",
+    "    name=\"qwen-vllm\",\n",
+    "    image=\"vllm/vllm-openai:latest\",\n",
+    "    model=\"Qwen/Qwen2.5-Omni-3B\",\n",
+    "    gpus=1,\n",
+    "    mem=\"10G\",\n",
+    "    port=8000,\n",
+    "    dtype=\"auto\",\n",
+    "    uvicorn_log_level=\"info\",\n",
+    "    max_tokens = 501,\n",
+    ")\n",
+    "\n",
+    "# Deploy the vLLM app\n",
+    "addr = vllm_module.vllm_app.deploy(with_mlrun=True)\n",
+    "addr"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "06832de3-5c31-43bf-b07b-0e71fb2d072d",
+   "metadata": {},
+   "source": [
+    "## 4. Get the runtime handle\n",
+    "\n",
+    "Fetch the runtime object and invoke the service using `app.invoke(...)`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "102d3fd0-1ee6-49b8-8c86-df742ac1c559",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Optional: get_runtime() method uses to get the MLRun application runtime\n",
+    "app = vllm_module.get_runtime()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "925730c1-0ac5-454b-8fb2-ab8cebb3f3ac",
+   "metadata": {},
+   "source": [
+    "## 5. Send a chat request for testing\n",
+    "\n",
+    "Call the OpenAI compatible endpoint `/v1/chat/completions`, parse the JSON response, and print only the assistant message text."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "id": "31bc78d4-1c6f-439c-b894-1522e3a6d3e6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "body = {\n",
+    "    \"model\": vllm_module.model,\n",
+    "    \"messages\": [{\"role\": \"user\", \"content\": \"what are the 3 countries with the most gpu as far as you know\"}],\n",
+    "    \"max_tokens\": vllm_module.max_tokens,     # start smaller for testing\n",
+    "}\n",
+    "\n",
+    "resp = app.invoke(path=\"/v1/chat/completions\", body=body)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "a459d5f8-dad0-4735-94c2-3801d4f94bb5",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "assistant:\n",
+      "\n",
+      "As of the most commonly cited estimates, the three countries with the largest GPU capacity for AI workloads are the United States, China, and India.\n"
+     ]
+    }
+   ],
+   "source": [
+    "data = resp\n",
+    "assistant_text = data[\"choices\"][0][\"message\"][\"content\"]\n",
+    "\n",
+    "print(\"\\nassistant:\\n\")\n",
+    "print(assistant_text.strip())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "957b5d21-7ade-4131-9100-878652c477fc",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "mlrun-base",
+   "language": "python",
+   "name": "conda-env-mlrun-base-py"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.22"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/modules/src/vllm_module/vllm_module.py b/modules/src/vllm_module/vllm_module.py
new file mode 100644
index 000000000..50bc9f038
--- /dev/null
+++ b/modules/src/vllm_module/vllm_module.py
@@ -0,0 +1,131 @@
+# Copyright 2025 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#This module acts as a lightweight gateway to OpenAI-compatible APIs.
+#You can send chat prompts, create embeddings, or get model responses without worrying about authentication or endpoint differences. 
+#It simplifies access so you can test, analyze, or integrate AI features directly into your projects or notebooks with minimal setup.
+
+
+from typing import Dict, Optional, List
+
+class VLLMModule:
+    """
+    VLLMModule
+    
+    This module provides a lightweight wrapper for deploying a vLLM
+    (OpenAI-compatible) large language model server as an MLRun application runtime.
+    
+    The VLLMModule is responsible for:
+    - Creating an MLRun application runtime based on a vLLM container image
+    - Configuring GPU resources, memory limits, and Kubernetes node selection
+    - Launching the model using `vllm serve` with configurable runtime flags
+    - Supporting multi-GPU inference via tensor parallelism
+    - Automatically configuring shared memory (/dev/shm) when using multiple GPUs
+    - Exposing an OpenAI-compatible API (e.g. /v1/chat/completions) for inference
+    - Providing a simple Python interface for deployment and invocation from Jupyter notebooks
+    
+    The module is designed to be used in Jupyter notebooks and MLRun pipelines,
+    allowing users to deploy and test large language models on Kubernetes
+    with minimal configuration.
+    """
+
+    def __init__(
+            self,
+            project: str,
+            *,
+            node_selector: Optional[Dict[str, str]] = None,
+            name: str = "vllm",
+            image: str = "vllm/vllm-openai:latest",
+            model: str = "Qwen/Qwen2.5-Omni-3B",
+            gpus: int = 1,
+            mem: str = "10G",
+            port: int = 8000,
+            dtype: str = "auto",
+            uvicorn_log_level: str = "info",
+            max_tokens: int = 500,
+    ):
+        if gpus < 1:
+            raise ValueError("gpus must be >= 1")
+
+        
+        
+        if node_selector is None:
+            node_selector = {"alpha.eksctl.io/nodegroup-name": "added-gpu"}
+        
+        if not isinstance(max_tokens, int):
+            raise TypeError("max_tokens must be an integer")
+
+        if max_tokens < 1:
+            raise ValueError("max_tokens must be >= 1")
+
+        self.project = project
+        self.name = name
+        self.image = image
+        self.model = model
+        self.gpus = gpus
+        self.mem = mem
+        self.node_selector = node_selector
+        self.port = port
+        self.dtype = dtype
+        self.uvicorn_log_level = uvicorn_log_level
+        self.max_tokens = max_tokens
+
+        self.vllm_app = self.project.set_function(
+            name=self.name,
+            kind="application",
+            image=self.image,
+        )
+
+        self.vllm_app.with_limits(gpus=self.gpus, mem=self.mem)
+
+        if self.node_selector:
+            self.vllm_app.with_node_selection(node_selector=self.node_selector)
+
+        self.vllm_app.set_internal_application_port(self.port)
+
+        args: List[str] = [
+            "serve",
+            self.model,
+            "--dtype",
+            self.dtype,
+            "--port",
+            str(self.port),
+        ]
+
+        if self.uvicorn_log_level:
+            args += ["--uvicorn-log-level", self.uvicorn_log_level]
+
+        if self.gpus > 1:
+            args += ["--tensor-parallel-size", str(gpus)]
+
+            # For more than one GPU you should create a share volume for the multiple GPUs
+            self.vllm_app.spec.volumes = [{"name": "dshm", "emptyDir": {"medium": "Memory"}}]
+            self.vllm_app.spec.volume_mounts = [{"name": "dshm", "mountPath": "/dev/shm"}]
+
+    
+
+        self.vllm_app.spec.command = "vllm"
+        self.vllm_app.spec.args = args
+
+        self.vllm_app.spec.min_replicas = 1
+        self.vllm_app.spec.max_replicas = 1
+
+    def get_runtime(self):
+        return self.vllm_app
+
+    def add_args(self, extra_args: List[str]):
+        if not isinstance(extra_args, list) or not all(isinstance(x, str) for x in extra_args):
+            raise ValueError("extra_args must be a list of strings")
+        self.vllm_app.spec.args += extra_args
+
diff --git a/steps/README.md b/steps/README.md
new file mode 100644
index 000000000..caf726aa7
--- /dev/null
+++ b/steps/README.md
@@ -0,0 +1,8 @@
+
+
+
+## Catalog
+
+<!-- AUTOGEN:START (do not edit below) -->
+_No items found_
+<!-- AUTOGEN:END -->
diff --git a/steps/src/.gitkeep b/steps/src/.gitkeep
new file mode 100644
index 000000000..e69de29bb