diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index a56ebe474..f0ebc9771 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -48,6 +48,7 @@ jobs: | awk -F'/' ' /^functions\/src\// {print $1"/"$2"/"$3} /^modules\/src\// {print $1"/"$2"/"$3} + /^steps\/src\// {print $1"/"$2"/"$3} ' \ | sort -u ) diff --git a/.github/workflows/test-all.yaml b/.github/workflows/test-all.yaml index d8eb6c6ed..14a914ca5 100644 --- a/.github/workflows/test-all.yaml +++ b/.github/workflows/test-all.yaml @@ -40,6 +40,7 @@ jobs: | awk -F'/' ' /^functions\/src\// {print $1"/"$2"/"$3} /^modules\/src\// {print $1"/"$2"/"$3} + /^steps\/src\// {print $1"/"$2"/"$3} ' \ | sort -u ) @@ -134,7 +135,7 @@ jobs: - name: Regenerate README tables env: CHANNEL: ${{ steps.branch.outputs.branch }} - run: python -m cli.cli update-readme -c $CHANNEL --asset functions --asset modules + run: python -m cli.cli update-readme -c $CHANNEL --asset functions --asset modules --asset steps - name: Commit & push (if changed) env: USERNAME: ${{ secrets.USERNAME }} @@ -146,7 +147,7 @@ jobs: fi git config --local user.name $USERNAME git config --local user.email $USEREMAIL - git add functions/README.md modules/README.md || true + git add functions/README.md modules/README.md steps/README.md || true git commit -m "chore(readme): auto-update asset tables [skip ci]" git push @@ -187,6 +188,7 @@ jobs: cd .. python -m cli.cli build-marketplace -s ./functions/src -sn functions -m marketplace -c $CHANNEL -v -f python -m cli.cli build-marketplace -s ./modules/src -sn modules -m marketplace -c $CHANNEL -v -f + python -m cli.cli build-marketplace -s ./steps/src -sn steps -m marketplace -c $CHANNEL -v -f ## Uncomment the following lines if you want to upload the built marketplace as an artifact # - name: Upload built marketplace as artifact # uses: actions/upload-artifact@v4 @@ -247,4 +249,4 @@ jobs: echo "Pushing [$BRANCH_NAME] to remote [$REMOTE]" git push -f $REMOTE $BRANCH_NAME echo "Submiting pull request..." - gh pr create --title "Marketplace update from $BRANCH_NAME" --body "github-workflow" --base $BASE_BRANCH --head $BRANCH_NAME --repo $BASE_REPO/$REPO_PATH + gh pr create --title "Marketplace update from $BRANCH_NAME" --body "github-workflow" --base $BASE_BRANCH --head $BRANCH_NAME --repo $BASE_REPO/$REPO_PATH \ No newline at end of file diff --git a/cli/common/generate_item_yaml.py b/cli/common/generate_item_yaml.py index 542e98cd8..e97089ad3 100644 --- a/cli/common/generate_item_yaml.py +++ b/cli/common/generate_item_yaml.py @@ -7,6 +7,7 @@ TEMPLATES = { "function": "cli/utils/function_item_template.yaml.j2", "module": "cli/utils/module_item_template.yaml.j2", + "step": "cli/utils/step_item_template.yaml.j2", } diff --git a/cli/common/update_readme.py b/cli/common/update_readme.py index 89b6aa094..f6e582bb6 100644 --- a/cli/common/update_readme.py +++ b/cli/common/update_readme.py @@ -22,7 +22,11 @@ MARKER_START = "" -COLUMNS = ("Name", "Description", "Kind", "Categories") +ASSET_COLUMNS = { + "functions": ("Name", "Description", "Kind", "Categories"), + "modules": ("Name", "Description", "Kind", "Categories"), + "steps": ("Name", "Description", "Class Name", "Categories"), +} @click.command("update-readme") @click.option("-c", "--channel", default="master", help="Name of build channel") @@ -31,7 +35,7 @@ multiple=True, required=True, help="Asset types to process (e.g: functions). " - "Pass multiple: --assets functions --assets modules", + "Pass multiple: --asset functions --asset modules", ) @click.option("--check", is_flag=True, help="Do not write; exit non‑zero if README(s) would change.") @@ -45,21 +49,22 @@ def update_readme(channel: str, asset: Iterable[str], touched: list[str] = [] for t in asset_list: + columns = ASSET_COLUMNS.get(t, ("Name", "Description", "Kind", "Categories")) if check: # simulate by reading/writing to a temp string, but easiest is: run update and revert if not checking # Instead: compute would-change by comparing strings without writing: root = Path(".").resolve() asset_dir = root / t readme = asset_dir / "README.md" - rows = _rows_for_asset_type(channel, asset_dir) - table_md = _build_table_md(rows) + rows = _rows_for_asset_type(channel, asset_dir, columns) + table_md = _build_table_md(rows, columns) old = readme.read_text() if readme.exists() else f"# {t.title()}\n\n" new = _replace_block(old, table_md) if new != old: changed_any = True touched.append(str(readme)) else: - if _update_one(channel, t): + if _update_one(channel, t, columns): changed_any = True touched.append(str((Path(t) / "README.md").as_posix())) @@ -79,13 +84,13 @@ def update_readme(channel: str, asset: Iterable[str], click.echo("No README changes.") -def _rows_for_asset_type(channel: str, asset_dir: Path) -> List[Tuple[str, str, str, str]]: +def _rows_for_asset_type(channel: str, asset_dir: Path, columns) -> list: """Scan /src/*/item.yaml and return table rows.""" src = asset_dir / "src" if not src.exists(): return [] - rows: List[Tuple[str, str, str, str]] = [] + rows = [] for item_yaml in sorted(src.glob("*/item.yaml")): asset_name = item_yaml.parent.name try: @@ -95,24 +100,39 @@ def _rows_for_asset_type(channel: str, asset_dir: Path) -> List[Tuple[str, str, desc = (data.get("description") or "").strip() kind = (data.get("spec", {}).get("kind", "")).strip() + class_name = (data.get("className", "")).strip() cats = data.get("categories") or [] cats_str = ", ".join(c.strip() for c in cats) if isinstance(cats, list) else str(cats).strip() # Link the name to its source directory # Construct the relative path from the repo root for the asset rel_path = asset_dir.relative_to(Path(".").resolve()) link = f"[{asset_name}](https://github.com/mlrun/functions/tree/{channel}/{rel_path}/src/{asset_name})" - rows.append((link, desc, kind, cats_str)) + row = [] + for col in columns: + if col == "Name": + row.append(link) + elif col == "Description": + row.append(desc) + elif col == "Kind": + row.append(kind) + elif col == "Class Name": + row.append(class_name) + elif col == "Categories": + row.append(cats_str) + else: + row.append("") + rows.append(tuple(row)) rows.sort(key=lambda r: r[0].lower()) return rows -def _build_table_md(rows: List[Tuple[str, str, str, str]]) -> str: +def _build_table_md(rows, columns) -> str: if not rows: return "_No items found_" lines = [ - "| " + " | ".join(COLUMNS) + " |", - "| " + " | ".join("---" for _ in COLUMNS) + " |", + "| " + " | ".join(columns) + " |", + "| " + " | ".join("---" for _ in columns) + " |", ] for r in rows: lines.append("| " + " | ".join((cell or "").replace("\n", " ").strip() for cell in r) + " |") @@ -143,14 +163,14 @@ def _replace_block(readme_text: str, new_block: str) -> str: return readme_text[:start_close] + "\n" + new_block + "\n" + readme_text[ei:] -def _update_one(channel: str, asset_type: str) -> bool: +def _update_one(channel: str, asset_type: str, columns) -> bool: """Generate/replace the table in /README.md. Return True if changed.""" root = Path(".").resolve() asset_dir = root / asset_type readme = asset_dir / "README.md" - rows = _rows_for_asset_type(channel, asset_dir) - table_md = _build_table_md(rows) + rows = _rows_for_asset_type(channel, asset_dir, columns) + table_md = _build_table_md(rows, columns) old = readme.read_text() if readme.exists() else f"# {asset_type.title()}\n\n" new = _replace_block(old, table_md) @@ -159,5 +179,3 @@ def _update_one(channel: str, asset_type: str) -> bool: readme.write_text(new) return True return False - - diff --git a/cli/utils/step_item_template.yaml.j2 b/cli/utils/step_item_template.yaml.j2 new file mode 100644 index 000000000..937b4fbfe --- /dev/null +++ b/cli/utils/step_item_template.yaml.j2 @@ -0,0 +1,17 @@ +apiVersion: v1 +categories: [] {# List of category names #} +description: '' {# Short description #} +example: {{ example|default('') }} {# Path to example notebook #} +generationDate: {{ generationDate|default('') }} {# Automatically generated ISO8086 datetime #} +hidden: false {# Hide item from the UI #} +labels: + author: Iguazio +mlrunVersion: '' {# Item’s MLRun version requirement, should follow python’s versioning schema #} +name: {{ name|default('') }} {# Step name #} +className: {{ className|default('') }} {# Step class name #} +defaultHandler: {{ defaultHandler|default('') }} {# Default handler name #} +spec: + filename: {{ filename|default('') }} {# Implementation file #} + image: mlrun/mlrun {# Base image name #} + requirements: [] {# List of Pythonic library requirements #} +version: 1.0.0 {# Step version, should follow standard semantic versioning schema #} \ No newline at end of file diff --git a/functions/README.md b/functions/README.md index e6b45ddfb..3618833a5 100644 --- a/functions/README.md +++ b/functions/README.md @@ -9,40 +9,40 @@ it is expected that contributors follow certain guidelines/protocols (please chi | Name | Description | Kind | Categories | | --- | --- | --- | --- | -| [aggregate](https://github.com/mlrun/functions/tree/master/functions/src/aggregate) | Rolling aggregation over Metrics and Lables according to specifications | job | data-preparation | -| [arc_to_parquet](https://github.com/mlrun/functions/tree/master/functions/src/arc_to_parquet) | retrieve remote archive, open and save as parquet | job | utils | -| [auto_trainer](https://github.com/mlrun/functions/tree/master/functions/src/auto_trainer) | Automatic train, evaluate and predict functions for the ML frameworks - Scikit-Learn, XGBoost and LightGBM. | job | machine-learning, model-training | -| [azureml_serving](https://github.com/mlrun/functions/tree/master/functions/src/azureml_serving) | AzureML serving function | serving | machine-learning, model-serving | -| [azureml_utils](https://github.com/mlrun/functions/tree/master/functions/src/azureml_utils) | Azure AutoML integration in MLRun, including utils functions for training models on Azure AutoML platfrom. | job | model-serving, utils | -| [batch_inference](https://github.com/mlrun/functions/tree/master/functions/src/batch_inference) | Batch inference (also knows as prediction) for the common ML frameworks (SciKit-Learn, XGBoost and LightGBM) while performing data drift analysis. | job | model-serving | -| [batch_inference_v2](https://github.com/mlrun/functions/tree/master/functions/src/batch_inference_v2) | Batch inference (also knows as prediction) for the common ML frameworks (SciKit-Learn, XGBoost and LightGBM) while performing data drift analysis. | job | model-serving | -| [describe](https://github.com/mlrun/functions/tree/master/functions/src/describe) | describe and visualizes dataset stats | job | data-analysis | -| [describe_dask](https://github.com/mlrun/functions/tree/master/functions/src/describe_dask) | describe and visualizes dataset stats | job | data-analysis | -| [describe_spark](https://github.com/mlrun/functions/tree/master/functions/src/describe_spark) | | job | data-analysis | -| [feature_selection](https://github.com/mlrun/functions/tree/master/functions/src/feature_selection) | Select features through multiple Statistical and Model filters | job | data-preparation, machine-learning | -| [gen_class_data](https://github.com/mlrun/functions/tree/master/functions/src/gen_class_data) | Create a binary classification sample dataset and save. | job | data-generation | -| [github_utils](https://github.com/mlrun/functions/tree/master/functions/src/github_utils) | add comments to github pull request | job | utils | -| [hugging_face_serving](https://github.com/mlrun/functions/tree/master/functions/src/hugging_face_serving) | Generic Hugging Face model server. | serving | genai, model-serving | -| [load_dataset](https://github.com/mlrun/functions/tree/master/functions/src/load_dataset) | load a toy dataset from scikit-learn | job | data-preparation | -| [mlflow_utils](https://github.com/mlrun/functions/tree/master/functions/src/mlflow_utils) | Mlflow model server, and additional utils. | serving | model-serving, utils | -| [model_server](https://github.com/mlrun/functions/tree/master/functions/src/model_server) | generic sklearn model server | nuclio:serving | model-serving, machine-learning | -| [model_server_tester](https://github.com/mlrun/functions/tree/master/functions/src/model_server_tester) | test model servers | job | monitoring, model-serving | -| [noise_reduction](https://github.com/mlrun/functions/tree/master/functions/src/noise_reduction) | Reduce noise from audio files | job | data-preparation, audio | -| [onnx_utils](https://github.com/mlrun/functions/tree/master/functions/src/onnx_utils) | ONNX intigration in MLRun, some utils functions for the ONNX framework, optimizing and converting models from different framework to ONNX using MLRun. | job | utils, deep-learning | -| [open_archive](https://github.com/mlrun/functions/tree/master/functions/src/open_archive) | Open a file/object archive into a target directory | job | utils | -| [pii_recognizer](https://github.com/mlrun/functions/tree/master/functions/src/pii_recognizer) | This function is used to recognize PII in a directory of text files | job | data-preparation, NLP | -| [pyannote_audio](https://github.com/mlrun/functions/tree/master/functions/src/pyannote_audio) | pyannote's speech diarization of audio files | job | deep-learning, audio | -| [question_answering](https://github.com/mlrun/functions/tree/master/functions/src/question_answering) | GenAI approach of question answering on a given data | job | genai | -| [send_email](https://github.com/mlrun/functions/tree/master/functions/src/send_email) | Send Email messages through SMTP server | job | utils | -| [silero_vad](https://github.com/mlrun/functions/tree/master/functions/src/silero_vad) | Silero VAD (Voice Activity Detection) functions. | job | deep-learning, audio | -| [sklearn_classifier](https://github.com/mlrun/functions/tree/master/functions/src/sklearn_classifier) | train any classifier using scikit-learn's API | job | machine-learning, model-training | -| [sklearn_classifier_dask](https://github.com/mlrun/functions/tree/master/functions/src/sklearn_classifier_dask) | train any classifier using scikit-learn's API over Dask | job | machine-learning, model-training | -| [structured_data_generator](https://github.com/mlrun/functions/tree/master/functions/src/structured_data_generator) | GenAI approach of generating structured data according to a given schema | job | data-generation, genai | -| [test_classifier](https://github.com/mlrun/functions/tree/master/functions/src/test_classifier) | test a classifier using held-out or new data | job | machine-learning, model-testing | -| [text_to_audio_generator](https://github.com/mlrun/functions/tree/master/functions/src/text_to_audio_generator) | Generate audio file from text using different speakers | job | data-generation, audio | -| [tf2_serving](https://github.com/mlrun/functions/tree/master/functions/src/tf2_serving) | tf2 image classification server | nuclio:serving | model-serving, machine-learning | -| [transcribe](https://github.com/mlrun/functions/tree/master/functions/src/transcribe) | Transcribe audio files into text files | job | audio, genai | -| [translate](https://github.com/mlrun/functions/tree/master/functions/src/translate) | Translate text files from one language to another | job | genai, NLP | -| [v2_model_server](https://github.com/mlrun/functions/tree/master/functions/src/v2_model_server) | generic sklearn model server | serving | model-serving, machine-learning | -| [v2_model_tester](https://github.com/mlrun/functions/tree/master/functions/src/v2_model_tester) | test v2 model servers | job | model-testing, machine-learning | +| [aggregate](https://github.com/mlrun/functions/tree/development/functions/src/aggregate) | Rolling aggregation over Metrics and Lables according to specifications | job | data-preparation | +| [arc_to_parquet](https://github.com/mlrun/functions/tree/development/functions/src/arc_to_parquet) | retrieve remote archive, open and save as parquet | job | utils | +| [auto_trainer](https://github.com/mlrun/functions/tree/development/functions/src/auto_trainer) | Automatic train, evaluate and predict functions for the ML frameworks - Scikit-Learn, XGBoost and LightGBM. | job | machine-learning, model-training | +| [azureml_serving](https://github.com/mlrun/functions/tree/development/functions/src/azureml_serving) | AzureML serving function | serving | machine-learning, model-serving | +| [azureml_utils](https://github.com/mlrun/functions/tree/development/functions/src/azureml_utils) | Azure AutoML integration in MLRun, including utils functions for training models on Azure AutoML platfrom. | job | model-serving, utils | +| [batch_inference](https://github.com/mlrun/functions/tree/development/functions/src/batch_inference) | Batch inference (also knows as prediction) for the common ML frameworks (SciKit-Learn, XGBoost and LightGBM) while performing data drift analysis. | job | model-serving | +| [batch_inference_v2](https://github.com/mlrun/functions/tree/development/functions/src/batch_inference_v2) | Batch inference (also knows as prediction) for the common ML frameworks (SciKit-Learn, XGBoost and LightGBM) while performing data drift analysis. | job | model-serving | +| [describe](https://github.com/mlrun/functions/tree/development/functions/src/describe) | describe and visualizes dataset stats | job | data-analysis | +| [describe_dask](https://github.com/mlrun/functions/tree/development/functions/src/describe_dask) | describe and visualizes dataset stats | job | data-analysis | +| [describe_spark](https://github.com/mlrun/functions/tree/development/functions/src/describe_spark) | | job | data-analysis | +| [feature_selection](https://github.com/mlrun/functions/tree/development/functions/src/feature_selection) | Select features through multiple Statistical and Model filters | job | data-preparation, machine-learning | +| [gen_class_data](https://github.com/mlrun/functions/tree/development/functions/src/gen_class_data) | Create a binary classification sample dataset and save. | job | data-generation | +| [github_utils](https://github.com/mlrun/functions/tree/development/functions/src/github_utils) | add comments to github pull request | job | utils | +| [hugging_face_serving](https://github.com/mlrun/functions/tree/development/functions/src/hugging_face_serving) | Generic Hugging Face model server. | serving | genai, model-serving | +| [load_dataset](https://github.com/mlrun/functions/tree/development/functions/src/load_dataset) | load a toy dataset from scikit-learn | job | data-preparation | +| [mlflow_utils](https://github.com/mlrun/functions/tree/development/functions/src/mlflow_utils) | Mlflow model server, and additional utils. | serving | model-serving, utils | +| [model_server](https://github.com/mlrun/functions/tree/development/functions/src/model_server) | generic sklearn model server | nuclio:serving | model-serving, machine-learning | +| [model_server_tester](https://github.com/mlrun/functions/tree/development/functions/src/model_server_tester) | test model servers | job | monitoring, model-serving | +| [noise_reduction](https://github.com/mlrun/functions/tree/development/functions/src/noise_reduction) | Reduce noise from audio files | job | data-preparation, audio | +| [onnx_utils](https://github.com/mlrun/functions/tree/development/functions/src/onnx_utils) | ONNX intigration in MLRun, some utils functions for the ONNX framework, optimizing and converting models from different framework to ONNX using MLRun. | job | utils, deep-learning | +| [open_archive](https://github.com/mlrun/functions/tree/development/functions/src/open_archive) | Open a file/object archive into a target directory | job | utils | +| [pii_recognizer](https://github.com/mlrun/functions/tree/development/functions/src/pii_recognizer) | This function is used to recognize PII in a directory of text files | job | data-preparation, NLP | +| [pyannote_audio](https://github.com/mlrun/functions/tree/development/functions/src/pyannote_audio) | pyannote's speech diarization of audio files | job | deep-learning, audio | +| [question_answering](https://github.com/mlrun/functions/tree/development/functions/src/question_answering) | GenAI approach of question answering on a given data | job | genai | +| [send_email](https://github.com/mlrun/functions/tree/development/functions/src/send_email) | Send Email messages through SMTP server | job | utils | +| [silero_vad](https://github.com/mlrun/functions/tree/development/functions/src/silero_vad) | Silero VAD (Voice Activity Detection) functions. | job | deep-learning, audio | +| [sklearn_classifier](https://github.com/mlrun/functions/tree/development/functions/src/sklearn_classifier) | train any classifier using scikit-learn's API | job | machine-learning, model-training | +| [sklearn_classifier_dask](https://github.com/mlrun/functions/tree/development/functions/src/sklearn_classifier_dask) | train any classifier using scikit-learn's API over Dask | job | machine-learning, model-training | +| [structured_data_generator](https://github.com/mlrun/functions/tree/development/functions/src/structured_data_generator) | GenAI approach of generating structured data according to a given schema | job | data-generation, genai | +| [test_classifier](https://github.com/mlrun/functions/tree/development/functions/src/test_classifier) | test a classifier using held-out or new data | job | machine-learning, model-testing | +| [text_to_audio_generator](https://github.com/mlrun/functions/tree/development/functions/src/text_to_audio_generator) | Generate audio file from text using different speakers | job | data-generation, audio | +| [tf2_serving](https://github.com/mlrun/functions/tree/development/functions/src/tf2_serving) | tf2 image classification server | nuclio:serving | model-serving, machine-learning | +| [transcribe](https://github.com/mlrun/functions/tree/development/functions/src/transcribe) | Transcribe audio files into text files | job | audio, genai | +| [translate](https://github.com/mlrun/functions/tree/development/functions/src/translate) | Translate text files from one language to another | job | genai, NLP | +| [v2_model_server](https://github.com/mlrun/functions/tree/development/functions/src/v2_model_server) | generic sklearn model server | serving | model-serving, machine-learning | +| [v2_model_tester](https://github.com/mlrun/functions/tree/development/functions/src/v2_model_tester) | test v2 model servers | job | model-testing, machine-learning | diff --git a/modules/README.md b/modules/README.md index 7c69401c1..aaad9863e 100644 --- a/modules/README.md +++ b/modules/README.md @@ -6,9 +6,10 @@ | Name | Description | Kind | Categories | | --- | --- | --- | --- | -| [agent_deployer](https://github.com/mlrun/functions/tree/master/modules/src/agent_deployer) | Helper for serving function deploy of an AI agents using MLRun | monitoring_application | model-serving | -| [count_events](https://github.com/mlrun/functions/tree/master/modules/src/count_events) | Count events in each time window | monitoring_application | model-serving | -| [evidently_iris](https://github.com/mlrun/functions/tree/master/modules/src/evidently_iris) | Demonstrates Evidently integration in MLRun for data quality and drift monitoring using the Iris dataset | monitoring_application | model-serving, structured-ML | -| [histogram_data_drift](https://github.com/mlrun/functions/tree/master/modules/src/histogram_data_drift) | Model-monitoring application for detecting and visualizing data drift | monitoring_application | model-serving, structured-ML | -| [openai_proxy_app](https://github.com/mlrun/functions/tree/master/modules/src/openai_proxy_app) | OpenAI application runtime based on fastapi | generic | genai | +| [agent_deployer](https://github.com/mlrun/functions/tree/development/modules/src/agent_deployer) | Helper for serving function deploy of an AI agents using MLRun | monitoring_application | model-serving | +| [count_events](https://github.com/mlrun/functions/tree/development/modules/src/count_events) | Count events in each time window | monitoring_application | model-serving | +| [evidently_iris](https://github.com/mlrun/functions/tree/development/modules/src/evidently_iris) | Demonstrates Evidently integration in MLRun for data quality and drift monitoring using the Iris dataset | monitoring_application | model-serving, structured-ML | +| [histogram_data_drift](https://github.com/mlrun/functions/tree/development/modules/src/histogram_data_drift) | Model-monitoring application for detecting and visualizing data drift | monitoring_application | model-serving, structured-ML | +| [openai_proxy_app](https://github.com/mlrun/functions/tree/development/modules/src/openai_proxy_app) | OpenAI application runtime based on fastapi | generic | genai | +| [vllm_module](https://github.com/mlrun/functions/tree/development/modules/src/vllm_module) | Deploys a vLLM OpenAI-compatible LLM server as an MLRun application runtime, with configurable GPU usage, node selection, tensor parallelism, and runtime flags. | generic | genai | diff --git a/modules/src/agent_deployer/agent_deployer.ipynb b/modules/src/agent_deployer/agent_deployer.ipynb index 944dba116..98531ecd1 100644 --- a/modules/src/agent_deployer/agent_deployer.ipynb +++ b/modules/src/agent_deployer/agent_deployer.ipynb @@ -10,18 +10,8 @@ }, { "cell_type": "code", - "execution_count": 21, "id": "be42e7c5-b2af-476f-8041-c17be56edb52", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "> 2025-12-03 07:17:36,530 [info] Project loaded successfully: {\"project_name\":\"langchain-example-10\"}\n" - ] - } - ], "source": [ "%config Completer.use_jedi = False\n", "\n", @@ -31,7 +21,9 @@ "image = \"mlrun/mlrun\"\n", "project_name = \"langchain-example\"\n", "project = get_or_create_project(project_name, context=\"./\", allow_cross_project=True)" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -43,17 +35,17 @@ }, { "cell_type": "code", - "execution_count": 23, "id": "a47d7789-2ea2-493e-8905-f53b978e2abd", "metadata": {}, - "outputs": [], "source": [ "# Create project secrets for project\n", "secrets = {\"OPENAI_API_KEY\": \"\", # add your OpenAI API key here\n", " \"OPENAI_BASE_URL\": \"\" # add your OpenAI base url here if needed\n", " }\n", "project.set_secrets(secrets=secrets, provider=\"kubernetes\")" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -65,10 +57,8 @@ }, { "cell_type": "code", - "execution_count": null, "id": "25cbd982-86de-43b5-91ef-24fc60b2d758", "metadata": {}, - "outputs": [], "source": [ "%%writefile langchain_model.py\n", "\n", @@ -197,7 +187,9 @@ " result[\"total_cost_usd\"] = input_cost + output_cost\n", " return result\n", " " - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -209,20 +201,8 @@ }, { "cell_type": "code", - "execution_count": 81, "id": "691e9068-ec9c-40d6-9ac8-e6c3e605b44c", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "> 2025-12-03 10:55:46,194 [info] Project loaded successfully: {\"project_name\":\"langchain-example-10\"}\n", - "> 2025-12-03 10:55:46,463 [info] Model monitoring credentials were set successfully. Please keep in mind that if you already had model monitoring functions / model monitoring infra / tracked model server deployed on your project, you will need to redeploy them. For redeploying the model monitoring infra, first disable it using `project.disable_model_monitoring()` and then enable it using `project.enable_model_monitoring()`.\n", - "details: MLRunConflictError(\"The following model-montioring infrastructure functions are already deployed, aborting: ['model-monitoring-controller', 'model-monitoring-writer']\\nIf you want to redeploy the model-monitoring controller (maybe with different base-period), use update_model_monitoring_controller.If you want to redeploy all of model-monitoring infrastructure, call disable_model_monitoringbefore calling enable_model_monitoring again.\")\n" - ] - } - ], "source": [ "module = mlrun.import_module(\"hub://agent_deployer\")\n", "\n", @@ -237,11 +217,11 @@ " prompt_template= \"\"\"\n", " Answer the following questions as best you can.\n", " You have access to the following tools:\n", - " {tools}\n", + " {{tools}}\n", " Use the following format:\n", " Question: the input question you must answer\n", " Thought: you should always think about what to do\n", - " Action: the action to take, should be one of [{tool_names}]\n", + " Action: the action to take, should be one of [{{tool_names}}]\n", " Action Input: the input to the action\n", " Observation: the result of the action\n", " ... (this Thought/Action/Action Input/Observation can repeat N times)\n", @@ -252,16 +232,18 @@ " Question: {input}\n", " Thought:{agent_scratchpad}\n", " \"\"\",\n", - ")" - ] + ")\n" + ], + "outputs": [], + "execution_count": null }, { "cell_type": "code", - "execution_count": 82, "id": "0bb1c4d1-5d7c-4d1c-bf51-8f53b319e91f", "metadata": {}, + "source": "func = agent.deploy_function(enable_tracking=True)", "outputs": [], - "source": "func = agent.deploy_function(enable_tracking=True)" + "execution_count": null }, { "metadata": {}, @@ -272,10 +254,10 @@ { "metadata": {}, "cell_type": "code", + "source": "func.invoke(\"./\", {\"question\" : \"If a pizza costs $18.75 and I want to buy 3, what is the total cost?\"})", + "id": "ac5c3ba174d2cf8b", "outputs": [], - "execution_count": null, - "source": "func.invoke(\"./\", {\"question\" : \"If a pizza costs $18.75 and I want to buy 3, plus a 15% tip, what is the total cost?\"})", - "id": "ac5c3ba174d2cf8b" + "execution_count": null }, { "metadata": {}, @@ -289,8 +271,6 @@ { "metadata": {}, "cell_type": "code", - "outputs": [], - "execution_count": null, "source": [ "%%writefile monitoring_application.py\n", "\n", @@ -405,7 +385,9 @@ " value=value,\n", " )\n" ], - "id": "377487422f5ed289" + "id": "377487422f5ed289", + "outputs": [], + "execution_count": null }, { "metadata": {}, @@ -416,8 +398,6 @@ { "metadata": {}, "cell_type": "code", - "outputs": [], - "execution_count": null, "source": [ "llm_monitoring_app = project.set_model_monitoring_function(\n", " func=\"monitoring_application.py\",\n", @@ -428,7 +408,9 @@ "\n", "project.deploy_function(llm_monitoring_app)" ], - "id": "9d6ad2a4a47a44bd" + "id": "9d6ad2a4a47a44bd", + "outputs": [], + "execution_count": null } ], "metadata": { diff --git a/modules/src/vllm_module/item.yaml b/modules/src/vllm_module/item.yaml new file mode 100644 index 000000000..ca684340f --- /dev/null +++ b/modules/src/vllm_module/item.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +categories: +- genai +description: Deploys a vLLM OpenAI-compatible LLM server as an MLRun application runtime, with configurable GPU usage, node selection, tensor parallelism, and runtime flags. +example: vllm_module.ipynb +generationDate: 2025-12-17:12-25 +hidden: false +labels: + author: Iguazio +mlrunVersion: 1.10.0 +name: vllm_module +spec: + filename: vllm_module.py + image: mlrun/mlrun + kind: generic +version: 1.0.0 diff --git a/modules/src/vllm_module/test_vllm_module.py b/modules/src/vllm_module/test_vllm_module.py new file mode 100644 index 000000000..3a5f422ae --- /dev/null +++ b/modules/src/vllm_module/test_vllm_module.py @@ -0,0 +1,35 @@ +# Copyright 2025 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from vllm_module import VLLMModule +import mlrun + + +class TestVllmModule: + """Test suite for VLLMModule class.""" + + def setup_method(self): + project = mlrun.new_project("vllm", save=False) + + # if your VLLMModule requires node_selector as keyword-only, keep it here + self.TestVllmModule = VLLMModule( + project, + node_selector={"alpha.eksctl.io/nodegroup-name": "added-gpu"}, + ) + + def test_vllm_module(self): + assert ( + type(self.TestVllmModule.vllm_app) == mlrun.runtimes.nuclio.application.application.ApplicationRuntime + ) diff --git a/modules/src/vllm_module/vllm_module.ipynb b/modules/src/vllm_module/vllm_module.ipynb new file mode 100644 index 000000000..2452dbd22 --- /dev/null +++ b/modules/src/vllm_module/vllm_module.ipynb @@ -0,0 +1,234 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "7d551647-dfc2-47da-bc8a-3792af622073", + "metadata": {}, + "source": [ + "# vLLM Module with MLRun\n", + "\n", + "This notebook shows how to configure and deploy a vLLM OpenAI compatible server as an MLRun application runtime, then showcases how to send a chat request to it to the vLLM server." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "7707b270-30cc-448a-a828-cb93aa28030d", + "metadata": {}, + "outputs": [], + "source": [ + "import mlrun\n" + ] + }, + { + "cell_type": "markdown", + "id": "d5cff681-bfdf-4468-a1d1-2aeadb56065e", + "metadata": {}, + "source": [ + "## Prerequisite\n", + "* At lease one GPU is required for running this notebook." + ] + }, + { + "cell_type": "markdown", + "id": "d5c84798-289f-4b4f-8c1b-f4dd12a3bda5", + "metadata": {}, + "source": [ + "## What this notebook does\n", + "\n", + "In this notebook we will:\n", + "\n", + "- Create or load an **MLRun project**\n", + "- Import a custom **vLLM module** from the MLRun Hub\n", + "- Deploy a **vLLM OpenAI-compatible server** as an MLRun application runtime\n", + "- Configure deployment parameters such as model, GPU count, memory, node selector, port, and log level\n", + "- Invoke the deployed service using the `/v1/chat/completions` endpoint\n", + "- Parse the response and extract only the assistant’s generated text\n", + "\n", + "By the end of this notebook, you will have a working vLLM deployment that can be queried directly from a Jupyter notebook using OpenAI-style APIs.\n", + "\n", + "For more information about [vLLM documentation](https://docs.vllm.ai/en/latest/serving/openai_compatible_server/)" + ] + }, + { + "cell_type": "markdown", + "id": "879ca641-ee35-4682-9995-4eb319d89090", + "metadata": {}, + "source": [ + "## 1. Create an MLRun project\n", + "\n", + "In this section we create or load an MLRun project that will own the deployed vLLM application runtime." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6eac263a-17d1-4454-9e19-459dfbe2f231", + "metadata": {}, + "outputs": [], + "source": [ + "project = mlrun.get_or_create_project(name=\"vllm-module\", context=\"\", user_project=True)" + ] + }, + { + "cell_type": "markdown", + "id": "da49d335-b704-4fb6-801f-4d07b64f9be6", + "metadata": {}, + "source": [ + "## 2. Import the vLLM module from the MLRun Hub\n", + "\n", + "In this section we import the vLLM module from the MLRun Hub so we can instantiate `VLLMModule` and deploy it as an application runtime." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e6d89dee-db58-4c0c-8009-b37020c9599a", + "metadata": {}, + "outputs": [], + "source": [ + "vllm = mlrun.import_module(\"hub://vllm-module\")" + ] + }, + { + "cell_type": "markdown", + "id": "1202ddd5-0ce7-4769-be29-8fc264c1f80e", + "metadata": {}, + "source": [ + "## 3. Deploy the vLLM application runtime\n", + "\n", + "Configure the vLLM deployment parameters and deploy the application.\n", + "\n", + "The returned address is the service URL for the application runtime." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e433123a-e64b-4a7a-8c7f-8165bcdcc6d1", + "metadata": {}, + "outputs": [], + "source": [ + "# Initialize the vLLM app\n", + "vllm_module = vllm.VLLMModule(\n", + " project=project,\n", + " node_selector={\"alpha.eksctl.io/nodegroup-name\": \"added-gpu\"},\n", + " name=\"qwen-vllm\",\n", + " image=\"vllm/vllm-openai:latest\",\n", + " model=\"Qwen/Qwen2.5-Omni-3B\",\n", + " gpus=1,\n", + " mem=\"10G\",\n", + " port=8000,\n", + " dtype=\"auto\",\n", + " uvicorn_log_level=\"info\",\n", + " max_tokens = 501,\n", + ")\n", + "\n", + "# Deploy the vLLM app\n", + "addr = vllm_module.vllm_app.deploy(with_mlrun=True)\n", + "addr" + ] + }, + { + "cell_type": "markdown", + "id": "06832de3-5c31-43bf-b07b-0e71fb2d072d", + "metadata": {}, + "source": [ + "## 4. Get the runtime handle\n", + "\n", + "Fetch the runtime object and invoke the service using `app.invoke(...)`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "102d3fd0-1ee6-49b8-8c86-df742ac1c559", + "metadata": {}, + "outputs": [], + "source": [ + "# Optional: get_runtime() method uses to get the MLRun application runtime\n", + "app = vllm_module.get_runtime()" + ] + }, + { + "cell_type": "markdown", + "id": "925730c1-0ac5-454b-8fb2-ab8cebb3f3ac", + "metadata": {}, + "source": [ + "## 5. Send a chat request for testing\n", + "\n", + "Call the OpenAI compatible endpoint `/v1/chat/completions`, parse the JSON response, and print only the assistant message text." + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "31bc78d4-1c6f-439c-b894-1522e3a6d3e6", + "metadata": {}, + "outputs": [], + "source": [ + "body = {\n", + " \"model\": vllm_module.model,\n", + " \"messages\": [{\"role\": \"user\", \"content\": \"what are the 3 countries with the most gpu as far as you know\"}],\n", + " \"max_tokens\": vllm_module.max_tokens, # start smaller for testing\n", + "}\n", + "\n", + "resp = app.invoke(path=\"/v1/chat/completions\", body=body)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "a459d5f8-dad0-4735-94c2-3801d4f94bb5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "assistant:\n", + "\n", + "As of the most commonly cited estimates, the three countries with the largest GPU capacity for AI workloads are the United States, China, and India.\n" + ] + } + ], + "source": [ + "data = resp\n", + "assistant_text = data[\"choices\"][0][\"message\"][\"content\"]\n", + "\n", + "print(\"\\nassistant:\\n\")\n", + "print(assistant_text.strip())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "957b5d21-7ade-4131-9100-878652c477fc", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "mlrun-base", + "language": "python", + "name": "conda-env-mlrun-base-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.22" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/modules/src/vllm_module/vllm_module.py b/modules/src/vllm_module/vllm_module.py new file mode 100644 index 000000000..50bc9f038 --- /dev/null +++ b/modules/src/vllm_module/vllm_module.py @@ -0,0 +1,131 @@ +# Copyright 2025 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +#This module acts as a lightweight gateway to OpenAI-compatible APIs. +#You can send chat prompts, create embeddings, or get model responses without worrying about authentication or endpoint differences. +#It simplifies access so you can test, analyze, or integrate AI features directly into your projects or notebooks with minimal setup. + + +from typing import Dict, Optional, List + +class VLLMModule: + """ + VLLMModule + + This module provides a lightweight wrapper for deploying a vLLM + (OpenAI-compatible) large language model server as an MLRun application runtime. + + The VLLMModule is responsible for: + - Creating an MLRun application runtime based on a vLLM container image + - Configuring GPU resources, memory limits, and Kubernetes node selection + - Launching the model using `vllm serve` with configurable runtime flags + - Supporting multi-GPU inference via tensor parallelism + - Automatically configuring shared memory (/dev/shm) when using multiple GPUs + - Exposing an OpenAI-compatible API (e.g. /v1/chat/completions) for inference + - Providing a simple Python interface for deployment and invocation from Jupyter notebooks + + The module is designed to be used in Jupyter notebooks and MLRun pipelines, + allowing users to deploy and test large language models on Kubernetes + with minimal configuration. + """ + + def __init__( + self, + project: str, + *, + node_selector: Optional[Dict[str, str]] = None, + name: str = "vllm", + image: str = "vllm/vllm-openai:latest", + model: str = "Qwen/Qwen2.5-Omni-3B", + gpus: int = 1, + mem: str = "10G", + port: int = 8000, + dtype: str = "auto", + uvicorn_log_level: str = "info", + max_tokens: int = 500, + ): + if gpus < 1: + raise ValueError("gpus must be >= 1") + + + + if node_selector is None: + node_selector = {"alpha.eksctl.io/nodegroup-name": "added-gpu"} + + if not isinstance(max_tokens, int): + raise TypeError("max_tokens must be an integer") + + if max_tokens < 1: + raise ValueError("max_tokens must be >= 1") + + self.project = project + self.name = name + self.image = image + self.model = model + self.gpus = gpus + self.mem = mem + self.node_selector = node_selector + self.port = port + self.dtype = dtype + self.uvicorn_log_level = uvicorn_log_level + self.max_tokens = max_tokens + + self.vllm_app = self.project.set_function( + name=self.name, + kind="application", + image=self.image, + ) + + self.vllm_app.with_limits(gpus=self.gpus, mem=self.mem) + + if self.node_selector: + self.vllm_app.with_node_selection(node_selector=self.node_selector) + + self.vllm_app.set_internal_application_port(self.port) + + args: List[str] = [ + "serve", + self.model, + "--dtype", + self.dtype, + "--port", + str(self.port), + ] + + if self.uvicorn_log_level: + args += ["--uvicorn-log-level", self.uvicorn_log_level] + + if self.gpus > 1: + args += ["--tensor-parallel-size", str(gpus)] + + # For more than one GPU you should create a share volume for the multiple GPUs + self.vllm_app.spec.volumes = [{"name": "dshm", "emptyDir": {"medium": "Memory"}}] + self.vllm_app.spec.volume_mounts = [{"name": "dshm", "mountPath": "/dev/shm"}] + + + + self.vllm_app.spec.command = "vllm" + self.vllm_app.spec.args = args + + self.vllm_app.spec.min_replicas = 1 + self.vllm_app.spec.max_replicas = 1 + + def get_runtime(self): + return self.vllm_app + + def add_args(self, extra_args: List[str]): + if not isinstance(extra_args, list) or not all(isinstance(x, str) for x in extra_args): + raise ValueError("extra_args must be a list of strings") + self.vllm_app.spec.args += extra_args + diff --git a/steps/README.md b/steps/README.md new file mode 100644 index 000000000..caf726aa7 --- /dev/null +++ b/steps/README.md @@ -0,0 +1,8 @@ + + + +## Catalog + + +_No items found_ + diff --git a/steps/src/.gitkeep b/steps/src/.gitkeep new file mode 100644 index 000000000..e69de29bb